]> git.tdb.fi Git - libs/datafile.git/blob - source/parser.cpp
Rename to datafile
[libs/datafile.git] / source / parser.cpp
1 /* $Id$
2
3 This file is part of libmspdatafile
4 Copyright © 2006  Mikko Rasa, Mikkosoft Productions
5 Distributed under the LGPL
6 */
7 #include <cctype>
8 #include <sstream>
9 #include "error.h"
10 #include "parser.h"
11 #include "statement.h"
12 #include "token.h"
13
14 using namespace std;
15
16 namespace Msp {
17 namespace DataFile {
18
19 Parser::Parser(istream &i, const string &s):
20         in(i),
21         src(s),
22         good(true)
23 { }
24
25 Statement Parser::parse()
26 {
27         if(!good)
28                 throw Exception("Parser is not good");
29
30         try
31         {
32                 return parse_(0);
33         }
34         catch(const Exception &e)
35         {
36                 good=false;
37                 throw;
38         }
39 }
40
41 Statement Parser::parse_(const Token *t)
42 {
43         Statement result;
44         bool      sub=false;
45         bool      finish=false;
46
47         while(in)
48         {
49                 Token token;
50                 if(t)
51                 {
52                         token=*t;
53                         t=0;
54                 }
55                 else
56                         token=parse_token();
57
58                 if(result.keyword.empty())
59                 {
60                         if(token.str.empty())
61                                 break;
62                         else if(token.type!=Token::IDENTIFIER)
63                                 throw ParseError(get_location()+": Syntax error at token '"+token.str+"' (expected an identifier)", src, in.get_line_number());
64                         result.keyword=token.str;
65                         result.valid=true;
66                         result.source=src;
67                         result.line=in.get_line_number();
68                 }
69                 else if(sub)
70                 {
71                         if(token.str=="}")
72                         {
73                                 sub=false;
74                                 finish=true;
75                         }
76                         else
77                         {
78                                 Statement ss=parse_(&token);
79                                 result.sub.push_back(ss);
80                         }
81                 }
82                 else if(finish)
83                 {
84                         if(token.str!=";")
85                                 throw ParseError(get_location()+": Syntax error at token '"+token.str+"' (Expected a ';')", src, in.get_line_number());
86                         break;
87                 }
88                 else if(token.str=="{")
89                         sub=true;
90                 else if(token.str==";")
91                         break;
92                 else if(token.type==Token::INTEGER)
93                         result.args.push_back(Value(Value::INTEGER, token.str));
94                 else if(token.type==Token::FLOAT)
95                         result.args.push_back(Value(Value::FLOAT, token.str));
96                 else if(token.type==Token::STRING)
97                         result.args.push_back(Value(Value::STRING, token.str));
98                 else if(token.type==Token::IDENTIFIER)
99                 {
100                         if(token.str=="true")
101                                 result.args.push_back(Value(Value::BOOLEAN, "1"));
102                         else if(token.str=="false")
103                                 result.args.push_back(Value(Value::BOOLEAN, "0"));
104                         else
105                                 result.args.push_back(Value(Value::ENUM, token.str));
106                         //result.args.push_back(resolve_identifiertoken.str);
107                 }
108                 else if(token.str=="")
109                         throw ParseError(src+": Unexcepted EOF", src, in.get_line_number());
110                 else
111                         throw ParseError(get_location()+": Syntax error", src, in.get_line_number());
112         }
113
114         return result;
115 }
116
117 Token Parser::parse_token()
118 {
119         int c=0;
120         unsigned comment=0;
121
122         // Skip over comments and whitespace
123         while(in)
124         {
125                 c=in.get();
126                 int next=in.peek();
127
128                 if(c=='/' && next=='/')
129                         comment=1;
130                 else if(c=='/' && next=='*')
131                         comment=2;
132                 else if(c=='\n' && comment==1)
133                         comment=0;
134                 else if(c=='*' && next=='/' && comment==2)
135                         comment=3;
136                 else if(comment==3)   // Skip the second character of block comment end
137                         comment=0;
138                 else if(!isspace(c) && !comment)
139                         break;
140         }
141
142         if(comment)  // Didn't hit any non-whitespace
143                 throw ParseError(src+": Unfinished comment", src, in.get_line_number());
144
145         enum ParseState
146         {
147                 INIT,
148                 SIGN,
149                 FLOATEXPINIT,
150                 FLOATEXPSIGN,
151                 STRING,
152                 ACCEPT,
153                 ZERO,
154                 DECIMAL,
155                 HEXADECIMAL,
156                 OCTAL,
157                 FLOAT,
158                 FLOATEXP,
159                 IDENTIFIER
160         };
161
162         static Token::Type token_type[]=
163         {
164                 Token::SPECIAL,
165                 Token::SPECIAL,
166                 Token::SPECIAL,
167                 Token::SPECIAL,
168                 Token::STRING,
169                 Token::SPECIAL,
170                 Token::INTEGER,
171                 Token::INTEGER,
172                 Token::INTEGER,
173                 Token::INTEGER,
174                 Token::FLOAT,
175                 Token::FLOAT,
176                 Token::IDENTIFIER
177         };
178
179         ParseState state=INIT;
180         string     buf;
181         bool       escape=false;
182
183         while(in)
184         {
185                 if(state!=INIT)
186                         c=in.get();
187                 int next=in.peek();
188
189                 buf+=c;
190
191                 switch(state)
192                 {
193                 case INIT:
194                         if(c=='0')
195                                 state=ZERO;
196                         else if(c=='-' || c=='+')
197                                 state=SIGN;
198                         else if(c=='.')
199                                 state=FLOAT;
200                         else if(c=='"')
201                                 state=STRING;
202                         else if(c=='{' || c=='}' || c==';')
203                                 return Token(Token::SPECIAL, string(1, c));
204                         else if(isdigit(c))
205                                 state=DECIMAL;
206                         else if(isalpha(c))
207                                 state=IDENTIFIER;
208                         else
209                                 parse_error(c, state);
210                         break;
211
212                 case SIGN:
213                         if(c=='0')
214                                 state=ZERO;
215                         else if(isdigit(c))
216                                 state=DECIMAL;
217                         else if(c=='.')
218                                 state=FLOAT;
219                         else
220                                 parse_error(c, state);
221                         break;
222
223                 case ZERO:
224                         if(c=='x')
225                                 state=HEXADECIMAL;
226                         else if(isdigit(c))
227                                 state=OCTAL;
228                         else if(c=='.')
229                                 state=FLOAT;
230                         else
231                                 parse_error(c, state);
232                         break;
233
234                 case DECIMAL:
235                         if(c=='.')
236                                 state=FLOAT;
237                         else if(!isdigit(c))
238                                 parse_error(c, state);
239                         break;
240
241                 case HEXADECIMAL:
242                         if(!isxdigit(c))
243                                 parse_error(c, state);
244                         break;
245
246                 case OCTAL:
247                         if(!isodigit(c))
248                                 parse_error(c, state);
249                         break;
250
251                 case FLOAT:
252                         if(c=='e' || c=='E')
253                                 state=FLOATEXPINIT;
254                         else if(!isdigit(c))
255                                 parse_error(c, state);
256                         break;
257
258                 case FLOATEXPINIT:
259                         if(c=='+' || c=='-')
260                                 state=FLOATEXPSIGN;
261                         else if(isdigit(c))
262                                 state=FLOATEXP;
263                         else
264                                 parse_error(c, state);
265                         break;
266
267                 case FLOATEXPSIGN:
268                         if(isdigit(c))
269                                 state=FLOATEXP;
270                         else
271                                 parse_error(c, state);
272                         break;
273
274                 case FLOATEXP:
275                         if(!isdigit(c))
276                                 parse_error(c, state);
277                         break;
278
279                 case STRING:
280                         if(c=='\\')
281                                 escape=!escape;
282                         else if(c=='"' && !escape)
283                                 return Token(Token::STRING, unescape_string(buf));
284                         else
285                                 escape=false;
286                         break;
287
288                 case IDENTIFIER:
289                         if(!isalpha(c) && !isdigit(c) && c!='_')
290                                 parse_error(c, state);
291                         break;
292
293                 default:
294                         throw Exception(get_location()+": Internal error (bad state)");
295                 }
296
297                 if(is_delimiter(next) && state>=ACCEPT)
298                         return Token(token_type[state], buf);
299         }
300
301         return Token(Token::SPECIAL, "");
302 }
303
304 bool Parser::is_delimiter(int c)
305 {
306         return (isspace(c) || c=='{' || c=='}' || c==';' || c=='/');
307 }
308
309 bool Parser::isodigit(int c)
310 {
311         return (c>='0' && c<='7');
312 }
313
314 string Parser::unescape_string(const string &str)
315 {
316         string   result;
317         bool     escape=false;
318         unsigned hexcape=0;
319         for(string::const_iterator i=str.begin()+1; i!=str.end()-1; ++i)
320         {
321                 if(escape)
322                 {
323                         if(*i=='n')
324                                 result+='\n';
325                         else if(*i=='t')
326                                 result+='\t';
327                         else if(*i=='\\')
328                                 result+='\\';
329                         else if(*i=='"')
330                                 result+='"';
331                         else if(*i=='x')
332                                 hexcape=0x100;
333                         else
334                                 throw ParseError("Invalid escape", src, in.get_line_number());
335                         escape=false;
336                 }
337                 else if(hexcape)
338                 {
339                         unsigned digit=0;
340                         if(*i>='0' && *i<='9')
341                                 digit=*i-'0';
342                         else if(*i>='a' && *i<='f')
343                                 digit=*i-'a'+10;
344                         else if(*i>='A' && *i<='F')
345                                 digit=*i-'A'+10;
346                         else
347                                 throw ParseError("Invalid hex digit", src, in.get_line_number());
348
349                         hexcape=(hexcape<<4)|digit;
350                         if(hexcape&0x10000)
351                         {
352                                 result+=hexcape&0xFF;
353                                 hexcape=0;
354                         }
355                 }
356                 else if(*i=='\\')
357                         escape=true;
358                 else
359                         result+=*i;
360         }
361
362         return result;
363 }
364
365 string Parser::get_location()
366 {
367         ostringstream ss;
368         ss<<src<<':'<<in.get_line_number();
369         return ss.str();
370 }
371
372 void Parser::parse_error(int c, int state)
373 {
374         ostringstream ss;
375         ss<<get_location()<<": Parse error at '"<<(char)c<<"' (state "<<state<<')';
376         throw ParseError(ss.str(), src, in.get_line_number());
377 }
378
379 } // namespace DataFile
380 } // namespace Msp