]> git.tdb.fi Git - libs/datafile.git/blob - source/parser.cpp
Support ignoring statements
[libs/datafile.git] / source / parser.cpp
1 /*
2 This file is part of libmspparser
3 Copyright © 2006  Mikko Rasa, Mikkosoft Productions
4 Distributed under the LGPL
5 */
6 #include <cctype>
7 #include <sstream>
8 #include <msp/error.h>
9 #include "parser.h"
10 #include "statement.h"
11 #include "token.h"
12
13 using namespace std;
14
15 namespace Msp {
16 namespace Parser {
17
18 Parser::Parser(istream &i, const string &s):
19         in(i),
20         src(s),
21         good(true)
22 { }
23
24 Statement Parser::parse()
25 {
26         if(!good)
27                 throw Exception("Parser is not good");
28         
29         try
30         {
31                 return parse_(0);
32         }
33         catch(const Exception &e)
34         {
35                 good=false;
36                 throw;
37         }
38 }
39
40 Statement Parser::parse_(const Token *t)
41 {
42         Statement result;
43         bool      sub=false;
44         bool      finish=false;
45         
46         while(in)
47         {
48                 Token token;
49                 if(t)
50                 {
51                         token=*t;
52                         t=0;
53                 }
54                 else
55                         token=parse_token();
56                 
57                 if(result.keyword.empty())
58                 {
59                         if(token.str.empty())
60                                 break;
61                         else if(token.type!=Token::IDENTIFIER)
62                                 throw DataError(get_location()+": Syntax error at token '"+token.str+"' (expected an identifier)");
63                         result.keyword=token.str;
64                         result.valid=true;
65                         result.source=src;
66                         result.line=in.get_line_number();
67                 }
68                 else if(sub)
69                 {
70                         if(token.str=="}")
71                         {
72                                 sub=false;
73                                 finish=true;
74                         }
75                         else
76                         {
77                                 Statement ss=parse_(&token);
78                                 result.sub.push_back(ss);
79                         }
80                 }
81                 else if(finish)
82                 {
83                         if(token.str!=";")
84                                 throw DataError(get_location()+": Syntax error at token '"+token.str+"' (Expected a ';')");
85                         break;
86                 }
87                 else if(token.str=="{")
88                         sub=true;
89                 else if(token.str==";")
90                         break;
91                 else if(token.type==Token::INTEGER)
92                         result.args.push_back(Value(Value::INTEGER, token.str));
93                 else if(token.type==Token::FLOAT)
94                         result.args.push_back(Value(Value::FLOAT, token.str));
95                 else if(token.type==Token::STRING)
96                         result.args.push_back(Value(Value::STRING, token.str));
97                 else if(token.type==Token::IDENTIFIER)
98                 {
99                         if(token.str=="true")
100                                 result.args.push_back(Value(Value::BOOLEAN, "1"));
101                         else if(token.str=="false")
102                                 result.args.push_back(Value(Value::BOOLEAN, "0"));
103                         else
104                                 result.args.push_back(Value(Value::ENUM, token.str));
105                         //result.args.push_back(resolve_identifiertoken.str);
106                 }
107                 else if(token.str=="")
108                         throw DataError(src+": Unexcepted EOF");
109                 else
110                         throw DataError(get_location()+": Syntax error");
111         }
112
113         return result;
114 }
115
116 Token Parser::parse_token()
117 {
118         int c;
119         unsigned comment=0;
120         while(in)
121         {
122                 c=in.get();
123                 int next=in.peek();
124
125                 //cout<<c<<' '<<next<<'\n';
126
127                 if(c=='/' && next=='/')
128                         comment=1;
129                 else if(c=='/' && next=='*')
130                         comment=2;
131                 else if(c=='\n' && comment==1)
132                         comment=0;
133                 else if(c=='*' && next=='/' && comment==2)
134                         comment=3;
135                 else if(comment==3)   // Skip the second character of block comment end
136                         comment=0;
137                 else if(!isspace(c) && !comment)
138                         break;
139         }
140         if(comment)
141                 throw DataError(src+": Unfinished comment");
142         
143         enum ParseState
144         {
145                 INIT,
146                 SIGN,
147                 FLOATEXPINIT,
148                 FLOATEXPSIGN,
149                 STRING,
150                 ACCEPT,
151                 ZERO,
152                 DECIMAL,
153                 HEXADECIMAL,
154                 OCTAL,
155                 FLOAT,
156                 FLOATEXP,
157                 IDENTIFIER
158         };
159
160         static Token::Type token_type[]=
161         {
162                 Token::SPECIAL,
163                 Token::SPECIAL,
164                 Token::SPECIAL,
165                 Token::SPECIAL,
166                 Token::STRING,
167                 Token::SPECIAL,
168                 Token::INTEGER,
169                 Token::INTEGER,
170                 Token::INTEGER,
171                 Token::INTEGER,
172                 Token::FLOAT,
173                 Token::FLOAT,
174                 Token::IDENTIFIER
175         };
176
177         ParseState state=INIT;
178         string     buf;
179         bool       escape=false;
180
181         while(in)
182         {
183                 if(state!=INIT)
184                         c=in.get();
185                 int next=in.peek();
186                 
187                 buf+=c;
188
189                 switch(state)
190                 {
191                 case INIT:
192                         if(c=='0')
193                                 state=ZERO;
194                         else if(c=='-' || c=='+')
195                                 state=SIGN;
196                         else if(c=='.')
197                                 state=FLOAT;
198                         else if(c=='"')
199                                 state=STRING;
200                         else if(c=='{' || c=='}' || c==';')
201                                 return Token(Token::SPECIAL, string(1, c));
202                         else if(isdigit(c))
203                                 state=DECIMAL;
204                         else if(isalpha(c))
205                                 state=IDENTIFIER;
206                         else
207                                 parse_error(c, state);
208                         break;
209                 
210                 case SIGN:
211                         if(c=='0')
212                                 state=ZERO;
213                         else if(isdigit(c))
214                                 state=DECIMAL;
215                         else if(c=='.')
216                                 state=FLOAT;
217                         else
218                                 parse_error(c, state);
219                         break;
220
221                 case ZERO:
222                         if(c=='x')
223                                 state=HEXADECIMAL;
224                         else if(isdigit(c))
225                                 state=OCTAL;
226                         else if(c=='.')
227                                 state=FLOAT;
228                         else
229                                 parse_error(c, state);
230                         break;
231
232                 case DECIMAL:
233                         if(c=='.')
234                                 state=FLOAT;
235                         else if(!isdigit(c))
236                                 parse_error(c, state);
237                         break;
238
239                 case HEXADECIMAL:
240                         if(!isxdigit(c))
241                                 parse_error(c, state);
242                         break;
243
244                 case OCTAL:
245                         if(!isodigit(c))
246                                 parse_error(c, state);
247                         break;
248
249                 case FLOAT:
250                         if(c=='e' || c=='E')
251                                 state=FLOATEXPINIT;
252                         else if(!isdigit(c))
253                                 parse_error(c, state);
254                         break;
255
256                 case FLOATEXPINIT:
257                         if(c=='+' || c=='-')
258                                 state=FLOATEXPSIGN;
259                         else if(isdigit(c))
260                                 state=FLOATEXP;
261                         else
262                                 parse_error(c, state);
263                         break;
264
265                 case FLOATEXPSIGN:
266                         if(isdigit(c))
267                                 state=FLOATEXP;
268                         else
269                                 parse_error(c, state);
270                         break;
271
272                 case FLOATEXP:
273                         if(!isdigit(c))
274                                 parse_error(c, state);
275                         break;
276
277                 case STRING:
278                         if(c=='\\')
279                                 escape=!escape;
280                         else if(c=='"' && !escape)
281                                 return Token(Token::STRING, unescape_string(buf));
282                         else
283                                 escape=false;
284                         break;
285                 
286                 case IDENTIFIER:
287                         if(!isalpha(c) && !isdigit(c) && c!='_')
288                                 parse_error(c, state);
289                         break;
290
291                 default:
292                         throw Exception(get_location()+": Internal error (bad state)");
293                 }
294
295                 if(is_delimiter(next) && state>=ACCEPT)
296                         return Token(token_type[state], buf);
297         }
298
299         return Token(Token::SPECIAL, "");
300 }
301
302 bool Parser::is_delimiter(int c)
303 {
304         return (isspace(c) || c=='{' || c=='}' || c==';' || c=='/');
305 }
306
307 bool Parser::isodigit(int c)
308 {
309         return (c>='0' && c<='7');
310 }
311
312 string Parser::unescape_string(const string &str)
313 {
314         string   result;
315         bool     escape=false;
316         unsigned hexcape=0;
317         for(string::const_iterator i=str.begin()+1; i!=str.end()-1; ++i)
318         {
319                 if(escape)
320                 {
321                         if(*i=='n')
322                                 result+='\n';
323                         else if(*i=='t')
324                                 result+='\t';
325                         else if(*i=='\\')
326                                 result+='\\';
327                         else if(*i=='"')
328                                 result+='"';
329                         else if(*i=='x')
330                                 hexcape=0x100;
331                         else
332                                 throw DataError("Invalid escape");
333                         escape=false;
334                 }
335                 else if(hexcape)
336                 {
337                         unsigned digit=0;
338                         if(*i>='0' && *i<='9')
339                                 digit=*i-'0';
340                         else if(*i>='a' && *i<='f')
341                                 digit=*i-'a'+10;
342                         else if(*i>='A' && *i<='F')
343                                 digit=*i-'A'+10;
344                         else
345                                 throw DataError("Invalid hex digit");
346
347                         hexcape=(hexcape<<4)|digit;
348                         if(hexcape&0x10000)
349                         {
350                                 result+=hexcape&0xFF;
351                                 hexcape=0;
352                         }
353                 }
354                 else if(*i=='\\')
355                         escape=true;
356                 else
357                         result+=*i;
358         }
359
360         return result;
361 }
362
363 string Parser::get_location()
364 {
365         ostringstream ss;
366         ss<<src<<':'<<in.get_line_number();
367         return ss.str();
368 }
369
370 void Parser::parse_error(int c, int state)
371 {
372         ostringstream ss;
373         ss<<get_location()<<": Parse error at '"<<(char)c<<"' (state "<<state<<')';
374         throw DataError(ss.str());
375 }
376
377 } // namespace Parser
378 } // namespace Msp