]> git.tdb.fi Git - libs/datafile.git/blob - source/parser.cpp
Remove old build info
[libs/datafile.git] / source / parser.cpp
1 /*
2 This file is part of libmspparser
3 Copyright © 2006  Mikko Rasa, Mikkosoft Productions
4 Distributed under the LGPL
5 */
6 #include <cctype>
7 #include <sstream>
8 #include "error.h"
9 #include "parser.h"
10 #include "statement.h"
11 #include "token.h"
12
13 using namespace std;
14
15 namespace Msp {
16 namespace Parser {
17
18 Parser::Parser(istream &i, const string &s):
19         in(i),
20         src(s),
21         good(true)
22 { }
23
24 Statement Parser::parse()
25 {
26         if(!good)
27                 throw Exception("Parser is not good");
28
29         try
30         {
31                 return parse_(0);
32         }
33         catch(const Exception &e)
34         {
35                 good=false;
36                 throw;
37         }
38 }
39
40 Statement Parser::parse_(const Token *t)
41 {
42         Statement result;
43         bool      sub=false;
44         bool      finish=false;
45
46         while(in)
47         {
48                 Token token;
49                 if(t)
50                 {
51                         token=*t;
52                         t=0;
53                 }
54                 else
55                         token=parse_token();
56
57                 if(result.keyword.empty())
58                 {
59                         if(token.str.empty())
60                                 break;
61                         else if(token.type!=Token::IDENTIFIER)
62                                 throw ParseError(get_location()+": Syntax error at token '"+token.str+"' (expected an identifier)", src, in.get_line_number());
63                         result.keyword=token.str;
64                         result.valid=true;
65                         result.source=src;
66                         result.line=in.get_line_number();
67                 }
68                 else if(sub)
69                 {
70                         if(token.str=="}")
71                         {
72                                 sub=false;
73                                 finish=true;
74                         }
75                         else
76                         {
77                                 Statement ss=parse_(&token);
78                                 result.sub.push_back(ss);
79                         }
80                 }
81                 else if(finish)
82                 {
83                         if(token.str!=";")
84                                 throw ParseError(get_location()+": Syntax error at token '"+token.str+"' (Expected a ';')", src, in.get_line_number());
85                         break;
86                 }
87                 else if(token.str=="{")
88                         sub=true;
89                 else if(token.str==";")
90                         break;
91                 else if(token.type==Token::INTEGER)
92                         result.args.push_back(Value(Value::INTEGER, token.str));
93                 else if(token.type==Token::FLOAT)
94                         result.args.push_back(Value(Value::FLOAT, token.str));
95                 else if(token.type==Token::STRING)
96                         result.args.push_back(Value(Value::STRING, token.str));
97                 else if(token.type==Token::IDENTIFIER)
98                 {
99                         if(token.str=="true")
100                                 result.args.push_back(Value(Value::BOOLEAN, "1"));
101                         else if(token.str=="false")
102                                 result.args.push_back(Value(Value::BOOLEAN, "0"));
103                         else
104                                 result.args.push_back(Value(Value::ENUM, token.str));
105                         //result.args.push_back(resolve_identifiertoken.str);
106                 }
107                 else if(token.str=="")
108                         throw ParseError(src+": Unexcepted EOF", src, in.get_line_number());
109                 else
110                         throw ParseError(get_location()+": Syntax error", src, in.get_line_number());
111         }
112
113         return result;
114 }
115
116 Token Parser::parse_token()
117 {
118         int c=0;
119         unsigned comment=0;
120
121         // Skip over comments and whitespace
122         while(in)
123         {
124                 c=in.get();
125                 int next=in.peek();
126
127                 if(c=='/' && next=='/')
128                         comment=1;
129                 else if(c=='/' && next=='*')
130                         comment=2;
131                 else if(c=='\n' && comment==1)
132                         comment=0;
133                 else if(c=='*' && next=='/' && comment==2)
134                         comment=3;
135                 else if(comment==3)   // Skip the second character of block comment end
136                         comment=0;
137                 else if(!isspace(c) && !comment)
138                         break;
139         }
140
141         if(comment)  // Didn't hit any non-whitespace
142                 throw ParseError(src+": Unfinished comment", src, in.get_line_number());
143
144         enum ParseState
145         {
146                 INIT,
147                 SIGN,
148                 FLOATEXPINIT,
149                 FLOATEXPSIGN,
150                 STRING,
151                 ACCEPT,
152                 ZERO,
153                 DECIMAL,
154                 HEXADECIMAL,
155                 OCTAL,
156                 FLOAT,
157                 FLOATEXP,
158                 IDENTIFIER
159         };
160
161         static Token::Type token_type[]=
162         {
163                 Token::SPECIAL,
164                 Token::SPECIAL,
165                 Token::SPECIAL,
166                 Token::SPECIAL,
167                 Token::STRING,
168                 Token::SPECIAL,
169                 Token::INTEGER,
170                 Token::INTEGER,
171                 Token::INTEGER,
172                 Token::INTEGER,
173                 Token::FLOAT,
174                 Token::FLOAT,
175                 Token::IDENTIFIER
176         };
177
178         ParseState state=INIT;
179         string     buf;
180         bool       escape=false;
181
182         while(in)
183         {
184                 if(state!=INIT)
185                         c=in.get();
186                 int next=in.peek();
187
188                 buf+=c;
189
190                 switch(state)
191                 {
192                 case INIT:
193                         if(c=='0')
194                                 state=ZERO;
195                         else if(c=='-' || c=='+')
196                                 state=SIGN;
197                         else if(c=='.')
198                                 state=FLOAT;
199                         else if(c=='"')
200                                 state=STRING;
201                         else if(c=='{' || c=='}' || c==';')
202                                 return Token(Token::SPECIAL, string(1, c));
203                         else if(isdigit(c))
204                                 state=DECIMAL;
205                         else if(isalpha(c))
206                                 state=IDENTIFIER;
207                         else
208                                 parse_error(c, state);
209                         break;
210
211                 case SIGN:
212                         if(c=='0')
213                                 state=ZERO;
214                         else if(isdigit(c))
215                                 state=DECIMAL;
216                         else if(c=='.')
217                                 state=FLOAT;
218                         else
219                                 parse_error(c, state);
220                         break;
221
222                 case ZERO:
223                         if(c=='x')
224                                 state=HEXADECIMAL;
225                         else if(isdigit(c))
226                                 state=OCTAL;
227                         else if(c=='.')
228                                 state=FLOAT;
229                         else
230                                 parse_error(c, state);
231                         break;
232
233                 case DECIMAL:
234                         if(c=='.')
235                                 state=FLOAT;
236                         else if(!isdigit(c))
237                                 parse_error(c, state);
238                         break;
239
240                 case HEXADECIMAL:
241                         if(!isxdigit(c))
242                                 parse_error(c, state);
243                         break;
244
245                 case OCTAL:
246                         if(!isodigit(c))
247                                 parse_error(c, state);
248                         break;
249
250                 case FLOAT:
251                         if(c=='e' || c=='E')
252                                 state=FLOATEXPINIT;
253                         else if(!isdigit(c))
254                                 parse_error(c, state);
255                         break;
256
257                 case FLOATEXPINIT:
258                         if(c=='+' || c=='-')
259                                 state=FLOATEXPSIGN;
260                         else if(isdigit(c))
261                                 state=FLOATEXP;
262                         else
263                                 parse_error(c, state);
264                         break;
265
266                 case FLOATEXPSIGN:
267                         if(isdigit(c))
268                                 state=FLOATEXP;
269                         else
270                                 parse_error(c, state);
271                         break;
272
273                 case FLOATEXP:
274                         if(!isdigit(c))
275                                 parse_error(c, state);
276                         break;
277
278                 case STRING:
279                         if(c=='\\')
280                                 escape=!escape;
281                         else if(c=='"' && !escape)
282                                 return Token(Token::STRING, unescape_string(buf));
283                         else
284                                 escape=false;
285                         break;
286
287                 case IDENTIFIER:
288                         if(!isalpha(c) && !isdigit(c) && c!='_')
289                                 parse_error(c, state);
290                         break;
291
292                 default:
293                         throw Exception(get_location()+": Internal error (bad state)");
294                 }
295
296                 if(is_delimiter(next) && state>=ACCEPT)
297                         return Token(token_type[state], buf);
298         }
299
300         return Token(Token::SPECIAL, "");
301 }
302
303 bool Parser::is_delimiter(int c)
304 {
305         return (isspace(c) || c=='{' || c=='}' || c==';' || c=='/');
306 }
307
308 bool Parser::isodigit(int c)
309 {
310         return (c>='0' && c<='7');
311 }
312
313 string Parser::unescape_string(const string &str)
314 {
315         string   result;
316         bool     escape=false;
317         unsigned hexcape=0;
318         for(string::const_iterator i=str.begin()+1; i!=str.end()-1; ++i)
319         {
320                 if(escape)
321                 {
322                         if(*i=='n')
323                                 result+='\n';
324                         else if(*i=='t')
325                                 result+='\t';
326                         else if(*i=='\\')
327                                 result+='\\';
328                         else if(*i=='"')
329                                 result+='"';
330                         else if(*i=='x')
331                                 hexcape=0x100;
332                         else
333                                 throw ParseError("Invalid escape", src, in.get_line_number());
334                         escape=false;
335                 }
336                 else if(hexcape)
337                 {
338                         unsigned digit=0;
339                         if(*i>='0' && *i<='9')
340                                 digit=*i-'0';
341                         else if(*i>='a' && *i<='f')
342                                 digit=*i-'a'+10;
343                         else if(*i>='A' && *i<='F')
344                                 digit=*i-'A'+10;
345                         else
346                                 throw ParseError("Invalid hex digit", src, in.get_line_number());
347
348                         hexcape=(hexcape<<4)|digit;
349                         if(hexcape&0x10000)
350                         {
351                                 result+=hexcape&0xFF;
352                                 hexcape=0;
353                         }
354                 }
355                 else if(*i=='\\')
356                         escape=true;
357                 else
358                         result+=*i;
359         }
360
361         return result;
362 }
363
364 string Parser::get_location()
365 {
366         ostringstream ss;
367         ss<<src<<':'<<in.get_line_number();
368         return ss.str();
369 }
370
371 void Parser::parse_error(int c, int state)
372 {
373         ostringstream ss;
374         ss<<get_location()<<": Parse error at '"<<(char)c<<"' (state "<<state<<')';
375         throw ParseError(ss.str(), src, in.get_line_number());
376 }
377
378 } // namespace Parser
379 } // namespace Msp