]> git.tdb.fi Git - libs/datafile.git/blob - source/parser.cpp
ef2daa3e0ab8bd5dbb75ce8696a64c789c8417ab
[libs/datafile.git] / source / parser.cpp
1 /*
2 This file is part of libmspparser
3 Copyright © 2006 Mikko Rasa, Mikkosoft Productions
4 Distributed under the LGPL
5 */
6 #include <cctype>
7 #include <sstream>
8 #include <msp/error.h>
9 #include "parser.h"
10 #include "statement.h"
11 #include "token.h"
12
13 using namespace std;
14
15 namespace Msp {
16 namespace Parser {
17
18 Parser::Parser(istream &i, const string &s):
19         in(i),
20         src(s),
21         good(true)
22 { }
23
24 Statement Parser::parse()
25 {
26         if(!good)
27                 throw Exception("Parser is not good");
28         
29         try
30         {
31                 return parse_(0);
32         }
33         catch(const Exception &e)
34         {
35                 good=false;
36                 throw;
37         }
38 }
39
40 Statement Parser::parse_(const Token *t)
41 {
42         Statement result;
43         bool      sub=false;
44         bool      finish=false;
45         
46         while(in)
47         {
48                 Token token;
49                 if(t)
50                 {
51                         token=*t;
52                         t=0;
53                 }
54                 else
55                         token=parse_token();
56                 
57                 if(result.keyword.empty())
58                 {
59                         if(token.str.empty())
60                                 break;
61                         else if(token.type!=Token::IDENTIFIER)
62                                 throw DataError(get_location()+": Syntax error at token '"+token.str+"' (expected an identifier)");
63                         result.keyword=token.str;
64                         result.valid=true;
65                         result.source=src;
66                         result.line=in.get_line_number();
67                 }
68                 else if(sub)
69                 {
70                         if(token.str=="}")
71                         {
72                                 sub=false;
73                                 finish=true;
74                         }
75                         else
76                         {
77                                 Statement ss=parse_(&token);
78                                 result.sub.push_back(ss);
79                         }
80                 }
81                 else if(finish)
82                 {
83                         if(token.str!=";")
84                                 throw DataError(get_location()+": Syntax error at token '"+token.str+"' (Expected a ';')");
85                         break;
86                 }
87                 else if(token.str=="{")
88                         sub=true;
89                 else if(token.str==";")
90                         break;
91                 else if(token.type==Token::INTEGER)
92                         result.args.push_back(Value(Value::INTEGER, token.str));
93                 else if(token.type==Token::FLOAT)
94                         result.args.push_back(Value(Value::FLOAT, token.str));
95                 else if(token.type==Token::STRING)
96                         result.args.push_back(Value(Value::STRING, token.str));
97                 else if(token.type==Token::IDENTIFIER)
98                 {
99                         //result.args.push_back(resolve_identifiertoken.str);
100                 }
101                 else if(token.str=="")
102                         throw DataError(src+": Unexcepted EOF");
103                 else
104                         throw DataError(get_location()+": Syntax error");
105         }
106
107         return result;
108 }
109
110 Token Parser::parse_token()
111 {
112         int c;
113         unsigned comment=0;
114         while(in)
115         {
116                 c=in.get();
117                 int next=in.peek();
118
119                 //cout<<c<<' '<<next<<'\n';
120
121                 if(c=='/' && next=='/')
122                         comment=1;
123                 else if(c=='/' && next=='*')
124                         comment=2;
125                 else if(c=='\n' && comment==1)
126                         comment=0;
127                 else if(c=='*' && next=='/' && comment==2)
128                         comment=3;
129                 else if(comment==3)   // Skip the second character of block comment end
130                         comment=0;
131                 else if(!isspace(c) && !comment)
132                         break;
133         }
134         if(comment)
135                 throw DataError(src+": Unfinished comment");
136         
137         enum ParseState
138         {
139                 INIT,
140                 NEGATIVE,
141                 STRING,
142                 ACCEPT,
143                 ZERO,
144                 DECIMAL,
145                 HEXADECIMAL,
146                 OCTAL,
147                 FLOAT,
148                 IDENTIFIER
149         };
150
151         static Token::Type token_type[]=
152         {
153                 Token::SPECIAL,
154                 Token::SPECIAL,
155                 Token::STRING,
156                 Token::SPECIAL,
157                 Token::INTEGER,
158                 Token::INTEGER,
159                 Token::INTEGER,
160                 Token::INTEGER,
161                 Token::FLOAT,
162                 Token::IDENTIFIER
163         };
164
165         ParseState state=INIT;
166         string     buf;
167         bool       escape=false;
168
169         while(in)
170         {
171                 if(state!=INIT)
172                         c=in.get();
173                 int next=in.peek();
174                 
175                 buf+=c;
176
177                 switch(state)
178                 {
179                 case INIT:
180                         if(c=='0')
181                                 state=ZERO;
182                         else if(c=='-')
183                                 state=NEGATIVE;
184                         else if(c=='.')
185                                 state=FLOAT;
186                         else if(c=='"')
187                                 state=STRING;
188                         else if(c=='{' || c=='}' || c==';')
189                                 return Token(Token::SPECIAL, string(1, c));
190                         else if(isdigit(c))
191                                 state=DECIMAL;
192                         else if(isalpha(c))
193                                 state=IDENTIFIER;
194                         else
195                                 parse_error(c, state);
196                         break;
197                 
198                 case NEGATIVE:
199                         if(c=='0')
200                                 state=ZERO;
201                         else if(isdigit(c))
202                                 state=DECIMAL;
203                         else if(c=='.')
204                                 state=FLOAT;
205                         else
206                                 parse_error(c, state);
207                         break;
208
209                 case ZERO:
210                         if(c=='x')
211                                 state=HEXADECIMAL;
212                         else if(isdigit(c))
213                                 state=OCTAL;
214                         else if(c=='.')
215                                 state=FLOAT;
216                         else
217                                 parse_error(c, state);
218                         break;
219
220                 case DECIMAL:
221                         if(c=='.')
222                                 state=FLOAT;
223                         else if(!isdigit(c))
224                                 parse_error(c, state);
225                         break;
226
227                 case HEXADECIMAL:
228                         if(!isxdigit(c))
229                                 parse_error(c, state);
230                         break;
231
232                 case OCTAL:
233                         if(!isodigit(c))
234                                 parse_error(c, state);
235                         break;
236
237                 case FLOAT:
238                         if(!isdigit(c))
239                                 parse_error(c, state);
240                         break;
241
242                 case STRING:
243                         if(c=='\\')
244                                 escape=!escape;
245                         else if(c=='"' && !escape)
246                                 return Token(Token::STRING, unescape_string(buf));
247                         else
248                                 escape=false;
249                         break;
250                 
251                 case IDENTIFIER:
252                         if(!isalpha(c) && !isdigit(c) && c!='_')
253                                 parse_error(c, state);
254                         break;
255
256                 default:
257                         throw Exception(get_location()+": Internal error (bad state)");
258                 }
259
260                 if(is_delimiter(next) && state>=ACCEPT)
261                         return Token(token_type[state], buf);
262         }
263
264         return Token(Token::SPECIAL, "");
265 }
266
267 bool Parser::is_delimiter(int c)
268 {
269         return (isspace(c) || c=='{' || c=='}' || c==';' || c=='/');
270 }
271
272 bool Parser::isodigit(int c)
273 {
274         return (c>='0' && c<='7');
275 }
276
277 string Parser::unescape_string(const string &str)
278 {
279         string   result;
280         bool     escape=false;
281         unsigned hexcape=0;
282         for(string::const_iterator i=str.begin()+1; i!=str.end()-1; ++i)
283         {
284                 if(escape)
285                 {
286                         if(*i=='n')
287                                 result+='\n';
288                         else if(*i=='t')
289                                 result+='\t';
290                         else if(*i=='\\')
291                                 result+='\\';
292                         else if(*i=='"')
293                                 result+='"';
294                         else if(*i=='x')
295                                 hexcape=0x100;
296                         else
297                                 throw DataError("Invalid escape");
298                         escape=false;
299                 }
300                 else if(hexcape)
301                 {
302                         unsigned digit=0;
303                         if(*i>='0' && *i<='9')
304                                 digit=*i-'0';
305                         else if(*i>='a' && *i<='f')
306                                 digit=*i-'a'+10;
307                         else if(*i>='A' && *i<='F')
308                                 digit=*i-'A'+10;
309                         else
310                                 throw DataError("Invalid hex digit");
311
312                         hexcape=(hexcape<<4)|digit;
313                         if(hexcape&0x10000)
314                         {
315                                 result+=hexcape&0xFF;
316                                 hexcape=0;
317                         }
318                 }
319                 else if(*i=='\\')
320                         escape=true;
321                 else
322                         result+=*i;
323         }
324
325         return result;
326 }
327
328 string Parser::get_location()
329 {
330         ostringstream ss;
331         ss<<src<<':'<<in.get_line_number();
332         return ss.str();
333 }
334
335 void Parser::parse_error(int c, int state)
336 {
337         ostringstream ss;
338         ss<<get_location()<<": Parse error at '"<<c<<"' (state "<<state<<')';
339         throw DataError(ss.str());
340 }
341
342 } // namespace Parser
343 } // namespace Msp