]> git.tdb.fi Git - libs/datafile.git/blob - source/parser.cpp
Type checking in value conversions
[libs/datafile.git] / source / parser.cpp
1 /*
2 This file is part of libmspparser
3 Copyright © 2006  Mikko Rasa, Mikkosoft Productions
4 Distributed under the LGPL
5 */
6 #include <cctype>
7 #include <sstream>
8 #include <msp/error.h>
9 #include "parser.h"
10 #include "statement.h"
11 #include "token.h"
12
13 using namespace std;
14
15 namespace Msp {
16 namespace Parser {
17
18 Parser::Parser(istream &i, const string &s):
19         in(i),
20         src(s),
21         good(true)
22 { }
23
24 Statement Parser::parse()
25 {
26         if(!good)
27                 throw Exception("Parser is not good");
28         
29         try
30         {
31                 return parse_(0);
32         }
33         catch(const Exception &e)
34         {
35                 good=false;
36                 throw;
37         }
38 }
39
40 Statement Parser::parse_(const Token *t)
41 {
42         Statement result;
43         bool      sub=false;
44         bool      finish=false;
45         
46         while(in)
47         {
48                 Token token;
49                 if(t)
50                 {
51                         token=*t;
52                         t=0;
53                 }
54                 else
55                         token=parse_token();
56                 
57                 if(result.keyword.empty())
58                 {
59                         if(token.str.empty())
60                                 break;
61                         else if(token.type!=Token::IDENTIFIER)
62                                 throw DataError(get_location()+": Syntax error at token '"+token.str+"' (expected an identifier)");
63                         result.keyword=token.str;
64                         result.valid=true;
65                         result.source=src;
66                         result.line=in.get_line_number();
67                 }
68                 else if(sub)
69                 {
70                         if(token.str=="}")
71                         {
72                                 sub=false;
73                                 finish=true;
74                         }
75                         else
76                         {
77                                 Statement ss=parse_(&token);
78                                 result.sub.push_back(ss);
79                         }
80                 }
81                 else if(finish)
82                 {
83                         if(token.str!=";")
84                                 throw DataError(get_location()+": Syntax error at token '"+token.str+"' (Expected a ';')");
85                         break;
86                 }
87                 else if(token.str=="{")
88                         sub=true;
89                 else if(token.str==";")
90                         break;
91                 else if(token.type==Token::INTEGER)
92                         result.args.push_back(Value(Value::INTEGER, token.str));
93                 else if(token.type==Token::FLOAT)
94                         result.args.push_back(Value(Value::FLOAT, token.str));
95                 else if(token.type==Token::STRING)
96                         result.args.push_back(Value(Value::STRING, token.str));
97                 else if(token.type==Token::IDENTIFIER)
98                 {
99                         if(token.str=="true")
100                                 result.args.push_back(Value(Value::BOOLEAN, "1"));
101                         else if(token.str=="false")
102                                 result.args.push_back(Value(Value::BOOLEAN, "0"));
103                         else
104                                 result.args.push_back(Value(Value::ENUM, token.str));
105                         //result.args.push_back(resolve_identifiertoken.str);
106                 }
107                 else if(token.str=="")
108                         throw DataError(src+": Unexcepted EOF");
109                 else
110                         throw DataError(get_location()+": Syntax error");
111         }
112
113         return result;
114 }
115
116 Token Parser::parse_token()
117 {
118         int c;
119         unsigned comment=0;
120         while(in)
121         {
122                 c=in.get();
123                 int next=in.peek();
124
125                 //cout<<c<<' '<<next<<'\n';
126
127                 if(c=='/' && next=='/')
128                         comment=1;
129                 else if(c=='/' && next=='*')
130                         comment=2;
131                 else if(c=='\n' && comment==1)
132                         comment=0;
133                 else if(c=='*' && next=='/' && comment==2)
134                         comment=3;
135                 else if(comment==3)   // Skip the second character of block comment end
136                         comment=0;
137                 else if(!isspace(c) && !comment)
138                         break;
139         }
140         if(comment)
141                 throw DataError(src+": Unfinished comment");
142         
143         enum ParseState
144         {
145                 INIT,
146                 NEGATIVE,
147                 STRING,
148                 ACCEPT,
149                 ZERO,
150                 DECIMAL,
151                 HEXADECIMAL,
152                 OCTAL,
153                 FLOAT,
154                 IDENTIFIER
155         };
156
157         static Token::Type token_type[]=
158         {
159                 Token::SPECIAL,
160                 Token::SPECIAL,
161                 Token::STRING,
162                 Token::SPECIAL,
163                 Token::INTEGER,
164                 Token::INTEGER,
165                 Token::INTEGER,
166                 Token::INTEGER,
167                 Token::FLOAT,
168                 Token::IDENTIFIER
169         };
170
171         ParseState state=INIT;
172         string     buf;
173         bool       escape=false;
174
175         while(in)
176         {
177                 if(state!=INIT)
178                         c=in.get();
179                 int next=in.peek();
180                 
181                 buf+=c;
182
183                 switch(state)
184                 {
185                 case INIT:
186                         if(c=='0')
187                                 state=ZERO;
188                         else if(c=='-')
189                                 state=NEGATIVE;
190                         else if(c=='.')
191                                 state=FLOAT;
192                         else if(c=='"')
193                                 state=STRING;
194                         else if(c=='{' || c=='}' || c==';')
195                                 return Token(Token::SPECIAL, string(1, c));
196                         else if(isdigit(c))
197                                 state=DECIMAL;
198                         else if(isalpha(c))
199                                 state=IDENTIFIER;
200                         else
201                                 parse_error(c, state);
202                         break;
203                 
204                 case NEGATIVE:
205                         if(c=='0')
206                                 state=ZERO;
207                         else if(isdigit(c))
208                                 state=DECIMAL;
209                         else if(c=='.')
210                                 state=FLOAT;
211                         else
212                                 parse_error(c, state);
213                         break;
214
215                 case ZERO:
216                         if(c=='x')
217                                 state=HEXADECIMAL;
218                         else if(isdigit(c))
219                                 state=OCTAL;
220                         else if(c=='.')
221                                 state=FLOAT;
222                         else
223                                 parse_error(c, state);
224                         break;
225
226                 case DECIMAL:
227                         if(c=='.')
228                                 state=FLOAT;
229                         else if(!isdigit(c))
230                                 parse_error(c, state);
231                         break;
232
233                 case HEXADECIMAL:
234                         if(!isxdigit(c))
235                                 parse_error(c, state);
236                         break;
237
238                 case OCTAL:
239                         if(!isodigit(c))
240                                 parse_error(c, state);
241                         break;
242
243                 case FLOAT:
244                         if(!isdigit(c))
245                                 parse_error(c, state);
246                         break;
247
248                 case STRING:
249                         if(c=='\\')
250                                 escape=!escape;
251                         else if(c=='"' && !escape)
252                                 return Token(Token::STRING, unescape_string(buf));
253                         else
254                                 escape=false;
255                         break;
256                 
257                 case IDENTIFIER:
258                         if(!isalpha(c) && !isdigit(c) && c!='_')
259                                 parse_error(c, state);
260                         break;
261
262                 default:
263                         throw Exception(get_location()+": Internal error (bad state)");
264                 }
265
266                 if(is_delimiter(next) && state>=ACCEPT)
267                         return Token(token_type[state], buf);
268         }
269
270         return Token(Token::SPECIAL, "");
271 }
272
273 bool Parser::is_delimiter(int c)
274 {
275         return (isspace(c) || c=='{' || c=='}' || c==';' || c=='/');
276 }
277
278 bool Parser::isodigit(int c)
279 {
280         return (c>='0' && c<='7');
281 }
282
283 string Parser::unescape_string(const string &str)
284 {
285         string   result;
286         bool     escape=false;
287         unsigned hexcape=0;
288         for(string::const_iterator i=str.begin()+1; i!=str.end()-1; ++i)
289         {
290                 if(escape)
291                 {
292                         if(*i=='n')
293                                 result+='\n';
294                         else if(*i=='t')
295                                 result+='\t';
296                         else if(*i=='\\')
297                                 result+='\\';
298                         else if(*i=='"')
299                                 result+='"';
300                         else if(*i=='x')
301                                 hexcape=0x100;
302                         else
303                                 throw DataError("Invalid escape");
304                         escape=false;
305                 }
306                 else if(hexcape)
307                 {
308                         unsigned digit=0;
309                         if(*i>='0' && *i<='9')
310                                 digit=*i-'0';
311                         else if(*i>='a' && *i<='f')
312                                 digit=*i-'a'+10;
313                         else if(*i>='A' && *i<='F')
314                                 digit=*i-'A'+10;
315                         else
316                                 throw DataError("Invalid hex digit");
317
318                         hexcape=(hexcape<<4)|digit;
319                         if(hexcape&0x10000)
320                         {
321                                 result+=hexcape&0xFF;
322                                 hexcape=0;
323                         }
324                 }
325                 else if(*i=='\\')
326                         escape=true;
327                 else
328                         result+=*i;
329         }
330
331         return result;
332 }
333
334 string Parser::get_location()
335 {
336         ostringstream ss;
337         ss<<src<<':'<<in.get_line_number();
338         return ss.str();
339 }
340
341 void Parser::parse_error(int c, int state)
342 {
343         ostringstream ss;
344         ss<<get_location()<<": Parse error at '"<<c<<"' (state "<<state<<')';
345         throw DataError(ss.str());
346 }
347
348 } // namespace Parser
349 } // namespace Msp