]> git.tdb.fi Git - libs/datafile.git/blob - source/textparser.cpp
Fix binary format
[libs/datafile.git] / source / textparser.cpp
1 /* $Id$
2
3 This file is part of libmspdatafile
4 Copyright © 2007  Mikko Rasa, Mikkosoft Productions
5 Distributed under the LGPL
6 */
7
8 #include <msp/strings/formatter.h>
9 #include <msp/strings/utils.h>
10 #include "input.h"
11 #include "textparser.h"
12 #include "token.h"
13
14 using namespace std;
15
16 namespace Msp {
17 namespace DataFile {
18
19 TextParser::TextParser(Input &i, const string &s):
20         ParserMode(i, s)
21 { }
22
23 Statement TextParser::parse()
24 {
25         return parse_statement(0);
26 }
27
28 Statement TextParser::parse_statement(const Token *t)
29 {
30         Statement result;
31         bool      sub=false;
32         bool      finish=false;
33
34         while(in)
35         {
36                 Token token;
37                 if(t)
38                 {
39                         token=*t;
40                         t=0;
41                 }
42                 else
43                         token=parse_token();
44
45                 if(result.keyword.empty())
46                 {
47                         if(token.str.empty())
48                                 break;
49                         else if(token.type!=Token::IDENTIFIER)
50                                 throw ParseError(format("%s: Syntax error at token '%s' (expected an identifier)", get_location(), token.str), src, in.get_line_number());
51                         result.keyword=token.str;
52                         result.valid=true;
53                         result.source=src;
54                         result.line=in.get_line_number();
55                 }
56                 else if(sub)
57                 {
58                         if(token.str=="}")
59                         {
60                                 sub=false;
61                                 finish=true;
62                         }
63                         else
64                         {
65                                 Statement ss=parse_statement(&token);
66                                 result.sub.push_back(ss);
67                         }
68                 }
69                 else if(finish)
70                 {
71                         if(token.str!=";")
72                                 throw ParseError(format("%s: Syntax error at token '%s' (Expected a ';')", get_location(), token.str), src, in.get_line_number());
73                         break;
74                 }
75                 else if(token.str=="{")
76                         sub=true;
77                 else if(token.str==";")
78                         break;
79                 else if(token.type==Token::INTEGER)
80                         result.args.push_back(Value(INTEGER, token.str));
81                 else if(token.type==Token::FLOAT)
82                         result.args.push_back(Value(FLOAT, token.str));
83                 else if(token.type==Token::STRING)
84                         result.args.push_back(Value(STRING, token.str));
85                 else if(token.type==Token::IDENTIFIER)
86                 {
87                         if(token.str=="true")
88                                 result.args.push_back(Value(BOOLEAN, "1"));
89                         else if(token.str=="false")
90                                 result.args.push_back(Value(BOOLEAN, "0"));
91                         else
92                                 result.args.push_back(Value(ENUM, token.str));
93                         //result.args.push_back(resolve_identifiertoken.str);
94                 }
95                 else if(token.str=="")
96                         throw ParseError(src+": Unexcepted end of input", src, in.get_line_number());
97                 else
98                         throw ParseError(get_location()+": Syntax error", src, in.get_line_number());
99         }
100
101         return result;
102 }
103
104 Token TextParser::parse_token()
105 {
106         int c=0;
107         unsigned comment=0;
108
109         // Skip over comments and whitespace
110         while(in)
111         {
112                 c=in.get();
113                 int next=in.peek();
114
115                 if(c=='/' && next=='/')
116                         comment=1;
117                 else if(c=='/' && next=='*')
118                         comment=2;
119                 else if(c=='\n' && comment==1)
120                         comment=0;
121                 else if(c=='*' && next=='/' && comment==2)
122                         comment=3;
123                 else if(comment==3)   // Skip the second character of block comment end
124                         comment=0;
125                 else if(!isspace(c) && !comment)
126                         break;
127         }
128
129         if(comment)  // Didn't hit any non-whitespace
130                 throw ParseError(src+": Unfinished comment at end of input", src, in.get_line_number());
131
132         enum ParseState
133         {
134                 INIT,
135                 SIGN,
136                 FLOATEXPINIT,
137                 FLOATEXPSIGN,
138                 STRING,
139                 ACCEPT,
140                 ZERO,
141                 DECIMAL,
142                 HEXADECIMAL,
143                 OCTAL,
144                 FLOAT,
145                 FLOATEXP,
146                 IDENTIFIER
147         };
148
149         static Token::Type token_type[]=
150         {
151                 Token::SPECIAL,
152                 Token::SPECIAL,
153                 Token::SPECIAL,
154                 Token::SPECIAL,
155                 Token::STRING,
156                 Token::SPECIAL,
157                 Token::INTEGER,
158                 Token::INTEGER,
159                 Token::INTEGER,
160                 Token::INTEGER,
161                 Token::FLOAT,
162                 Token::FLOAT,
163                 Token::IDENTIFIER
164         };
165
166         ParseState state=INIT;
167         string     buf;
168         bool       escape=false;
169
170         while(in)
171         {
172                 if(state!=INIT)
173                         c=in.get();
174                 int next=in.peek();
175
176                 buf+=c;
177
178                 switch(state)
179                 {
180                 case INIT:
181                         if(c=='0')
182                                 state=ZERO;
183                         else if(c=='-' || c=='+')
184                                 state=SIGN;
185                         else if(c=='.')
186                                 state=FLOAT;
187                         else if(c=='"')
188                                 state=STRING;
189                         else if(c=='{' || c=='}' || c==';')
190                                 return Token(Token::SPECIAL, string(1, c));
191                         else if(isdigit(c))
192                                 state=DECIMAL;
193                         else if(isalpha(c) || c=='_')
194                                 state=IDENTIFIER;
195                         else
196                                 parse_error(c, state);
197                         break;
198
199                 case SIGN:
200                         if(c=='0')
201                                 state=ZERO;
202                         else if(isdigit(c))
203                                 state=DECIMAL;
204                         else if(c=='.')
205                                 state=FLOAT;
206                         else
207                                 parse_error(c, state);
208                         break;
209
210                 case ZERO:
211                         if(c=='x')
212                                 state=HEXADECIMAL;
213                         else if(isdigit(c))
214                                 state=OCTAL;
215                         else if(c=='.')
216                                 state=FLOAT;
217                         else
218                                 parse_error(c, state);
219                         break;
220
221                 case DECIMAL:
222                         if(c=='.')
223                                 state=FLOAT;
224                         else if(!isdigit(c))
225                                 parse_error(c, state);
226                         break;
227
228                 case HEXADECIMAL:
229                         if(!isxdigit(c))
230                                 parse_error(c, state);
231                         break;
232
233                 case OCTAL:
234                         if(!isodigit(c))
235                                 parse_error(c, state);
236                         break;
237
238                 case FLOAT:
239                         if(c=='e' || c=='E')
240                                 state=FLOATEXPINIT;
241                         else if(!isdigit(c))
242                                 parse_error(c, state);
243                         break;
244
245                 case FLOATEXPINIT:
246                         if(c=='+' || c=='-')
247                                 state=FLOATEXPSIGN;
248                         else if(isdigit(c))
249                                 state=FLOATEXP;
250                         else
251                                 parse_error(c, state);
252                         break;
253
254                 case FLOATEXPSIGN:
255                         if(isdigit(c))
256                                 state=FLOATEXP;
257                         else
258                                 parse_error(c, state);
259                         break;
260
261                 case FLOATEXP:
262                         if(!isdigit(c))
263                                 parse_error(c, state);
264                         break;
265
266                 case STRING:
267                         if(c=='\\')
268                                 escape=!escape;
269                         else if(c=='"' && !escape)
270                         {
271                                 try
272                                 {
273                                         return Token(Token::STRING, c_unescape(buf.substr(1, buf.size()-2)));
274                                 }
275                                 catch(const Exception &e)
276                                 {
277                                         throw ParseError(format("%s: %s", get_location(), e.what()), src, in.get_line_number());
278                                 }
279                         }
280                         else
281                                 escape=false;
282                         break;
283
284                 case IDENTIFIER:
285                         if(!isalpha(c) && !isdigit(c) && c!='_')
286                                 parse_error(c, state);
287                         break;
288
289                 default:
290                         throw Exception(get_location()+": Internal error (bad state)");
291                 }
292
293                 if(is_delimiter(next) && state>=ACCEPT)
294                         return Token(token_type[state], buf);
295         }
296
297         return Token(Token::SPECIAL, "");
298 }
299
300 bool TextParser::is_delimiter(int c)
301 {
302         return (isspace(c) || c=='{' || c=='}' || c==';' || c=='/');
303 }
304
305 bool TextParser::isodigit(int c)
306 {
307         return (c>='0' && c<='7');
308 }
309
310 string TextParser::get_location()
311 {
312         ostringstream ss;
313         ss<<src<<':'<<in.get_line_number();
314         return ss.str();
315 }
316
317 void TextParser::parse_error(int c, int state)
318 {
319         throw ParseError(format("%s: Parse error at '%c' (state %d)", get_location(), static_cast<char>(c), state), src, in.get_line_number());
320 }
321
322 } // namespace DataFile
323 } // namespace Msp