]> git.tdb.fi Git - libs/datafile.git/blob - source/parser.cpp
Add files
[libs/datafile.git] / source / parser.cpp
1 /*
2 This file is part of libmspparser
3 Copyright © 2006 Mikko Rasa, Mikkosoft Productions
4 Distributed under the LGPL
5 */
6 #include <cctype>
7 #include <msp/error.h>
8 #include <msp/streams/format.h>
9 #include "parser.h"
10 #include "statement.h"
11 #include "token.h"
12
13 using namespace std;
14
15 #include <iostream>
16
17 namespace Msp {
18 namespace Parser {
19
20 Parser::Parser(istream &i, const string &s):
21         in(i),
22         src(s),
23         good(true)
24 { }
25
26 Statement Parser::parse()
27 {
28         if(!good)
29                 throw Exception("Parser is not good");
30         
31         try
32         {
33                 return parse_(0);
34         }
35         catch(const Exception &e)
36         {
37                 good=false;
38                 throw;
39         }
40 }
41
42 Statement Parser::parse_(const Token *t)
43 {
44         Statement result;
45         bool      sub=false;
46         bool      finish=false;
47         
48         while(in)
49         {
50                 Token token;
51                 if(t)
52                 {
53                         token=*t;
54                         t=0;
55                 }
56                 else
57                         token=parse_token();
58                 
59                 if(result.keyword.empty())
60                 {
61                         if(token.str.empty())
62                                 break;
63                         else if(token.type!=Token::IDENTIFIER)
64                                 throw DataError(get_location()+format(": Syntax error at token '%S' (expected an identifier)", &token.str).str());
65                         result.keyword=token.str;
66                         result.valid=true;
67                         result.source=src;
68                         result.line=in.get_line_number();
69                 }
70                 else if(sub)
71                 {
72                         if(token.str=="}")
73                         {
74                                 sub=false;
75                                 finish=true;
76                         }
77                         else
78                         {
79                                 Statement ss=parse_(&token);
80                                 result.sub.push_back(ss);
81                         }
82                 }
83                 else if(finish)
84                 {
85                         if(token.str!=";")
86                                 throw DataError(get_location()+format(": Syntax error at token '%S' (Expected a ';')", &token.str).str());
87                         break;
88                 }
89                 else if(token.str=="{")
90                         sub=true;
91                 else if(token.str==";")
92                         break;
93                 else if(token.type==Token::INTEGER)
94                         result.args.push_back(Value(Value::INTEGER, token.str));
95                 else if(token.type==Token::FLOAT)
96                         result.args.push_back(Value(Value::FLOAT, token.str));
97                 else if(token.type==Token::STRING)
98                         result.args.push_back(Value(Value::STRING, token.str));
99                 else if(token.type==Token::IDENTIFIER)
100                 {
101                         //result.args.push_back(resolve_identifiertoken.str);
102                 }
103                 else if(token.str=="")
104                         throw DataError(src+": Unexcepted EOF");
105                 else
106                         throw DataError(get_location()+": Syntax error");
107         }
108
109         return result;
110 }
111
112 Token Parser::parse_token()
113 {
114         int c;
115         unsigned comment=0;
116         while(in)
117         {
118                 c=in.get();
119                 int next=in.peek();
120
121                 //cout<<c<<' '<<next<<'\n';
122
123                 if(c=='/' && next=='/')
124                         comment=1;
125                 else if(c=='/' && next=='*')
126                         comment=2;
127                 else if(c=='\n' && comment==1)
128                         comment=0;
129                 else if(c=='*' && next=='/' && comment==2)
130                         comment=3;
131                 else if(comment==3)   // Skip the second character of block comment end
132                         comment=0;
133                 else if(!isspace(c) && !comment)
134                         break;
135         }
136         if(comment)
137                 throw DataError(src+": Unfinished comment");
138         
139         enum ParseState
140         {
141                 INIT,
142                 NEGATIVE,
143                 STRING,
144                 ACCEPT,
145                 ZERO,
146                 DECIMAL,
147                 HEXADECIMAL,
148                 OCTAL,
149                 FLOAT,
150                 IDENTIFIER
151         };
152
153         static Token::Type token_type[]=
154         {
155                 Token::SPECIAL,
156                 Token::SPECIAL,
157                 Token::STRING,
158                 Token::SPECIAL,
159                 Token::INTEGER,
160                 Token::INTEGER,
161                 Token::INTEGER,
162                 Token::INTEGER,
163                 Token::FLOAT,
164                 Token::IDENTIFIER
165         };
166
167         ParseState state=INIT;
168         string     buf;
169         bool       escape=false;
170
171         while(in)
172         {
173                 if(state!=INIT)
174                         c=in.get();
175                 int next=in.peek();
176                 
177                 buf+=c;
178
179                 switch(state)
180                 {
181                 case INIT:
182                         if(c=='0')
183                                 state=ZERO;
184                         else if(c=='-')
185                                 state=NEGATIVE;
186                         else if(c=='.')
187                                 state=FLOAT;
188                         else if(c=='"')
189                                 state=STRING;
190                         else if(c=='{' || c=='}' || c==';')
191                                 return Token(Token::SPECIAL, string(1, c));
192                         else if(isdigit(c))
193                                 state=DECIMAL;
194                         else if(isalpha(c))
195                                 state=IDENTIFIER;
196                         else
197                                 parse_error(c, state);
198                         break;
199                 
200                 case NEGATIVE:
201                         if(c=='0')
202                                 state=ZERO;
203                         else if(isdigit(c))
204                                 state=DECIMAL;
205                         else if(c=='.')
206                                 state=FLOAT;
207                         else
208                                 parse_error(c, state);
209                         break;
210
211                 case ZERO:
212                         if(c=='x')
213                                 state=HEXADECIMAL;
214                         else if(isdigit(c))
215                                 state=OCTAL;
216                         else if(c=='.')
217                                 state=FLOAT;
218                         else
219                                 parse_error(c, state);
220                         break;
221
222                 case DECIMAL:
223                         if(c=='.')
224                                 state=FLOAT;
225                         else if(!isdigit(c))
226                                 parse_error(c, state);
227                         break;
228
229                 case HEXADECIMAL:
230                         if(!isxdigit(c))
231                                 parse_error(c, state);
232                         break;
233
234                 case OCTAL:
235                         if(!isodigit(c))
236                                 parse_error(c, state);
237                         break;
238
239                 case FLOAT:
240                         if(!isdigit(c))
241                                 parse_error(c, state);
242                         break;
243
244                 case STRING:
245                         if(c=='\\')
246                                 escape=!escape;
247                         else if(c=='"' && !escape)
248                                 return Token(Token::STRING, unescape_string(buf));
249                         else
250                                 escape=false;
251                         break;
252                 
253                 case IDENTIFIER:
254                         if(!isalpha(c) && !isdigit(c) && c!='_')
255                                 parse_error(c, state);
256                         break;
257
258                 default:
259                         throw Exception(get_location()+": Internal error (bad state)");
260                 }
261
262                 if(is_delimiter(next) && state>=ACCEPT)
263                         return Token(token_type[state], buf);
264         }
265
266         return Token(Token::SPECIAL, "");
267 }
268
269 bool Parser::is_delimiter(int c)
270 {
271         return (isspace(c) || c=='{' || c=='}' || c==';' || c=='/');
272 }
273
274 bool Parser::isodigit(int c)
275 {
276         return (c>='0' && c<='7');
277 }
278
279 string Parser::unescape_string(const string &str)
280 {
281         string   result;
282         bool     escape=false;
283         unsigned hexcape=0;
284         for(string::const_iterator i=str.begin()+1; i!=str.end()-1; ++i)
285         {
286                 if(escape)
287                 {
288                         if(*i=='n')
289                                 result+='\n';
290                         else if(*i=='t')
291                                 result+='\t';
292                         else if(*i=='\\')
293                                 result+='\\';
294                         else if(*i=='"')
295                                 result+='"';
296                         else if(*i=='x')
297                                 hexcape=0x100;
298                         else
299                                 throw DataError("Invalid escape");
300                         escape=false;
301                 }
302                 else if(hexcape)
303                 {
304                         unsigned digit=0;
305                         if(*i>='0' && *i<='9')
306                                 digit=*i-'0';
307                         else if(*i>='a' && *i<='f')
308                                 digit=*i-'a'+10;
309                         else if(*i>='A' && *i<='F')
310                                 digit=*i-'A'+10;
311                         else
312                                 throw DataError("Invalid hex digit");
313
314                         hexcape=(hexcape<<4)|digit;
315                         if(hexcape&0x10000)
316                         {
317                                 result+=hexcape&0xFF;
318                                 hexcape=0;
319                         }
320                 }
321                 else if(*i=='\\')
322                         escape=true;
323                 else
324                         result+=*i;
325         }
326
327         return result;
328 }
329
330 string Parser::get_location()
331 {
332         ostringstream ss;
333         ss<<src<<':'<<in.get_line_number();
334         return ss.str();
335 }
336
337 void Parser::parse_error(int c, int state)
338 {
339         throw DataError(get_location()+format(": Parse error at '%c' (state %d)", c, state).str());
340 }
341
342 } // namespace Parser
343 } // namespace Msp