]> git.tdb.fi Git - libs/datafile.git/blob - source/textparser.cpp
Style update: add spaces around assignments
[libs/datafile.git] / source / textparser.cpp
1 /* $Id$
2
3 This file is part of libmspdatafile
4 Copyright © 2007-2008  Mikko Rasa, Mikkosoft Productions
5 Distributed under the LGPL
6 */
7
8 #include <msp/strings/formatter.h>
9 #include <msp/strings/utils.h>
10 #include "input.h"
11 #include "textparser.h"
12 #include "token.h"
13
14 using namespace std;
15
16 namespace Msp {
17 namespace DataFile {
18
19 TextParser::TextParser(Input &i, const string &s):
20         ParserMode(i, s)
21 { }
22
23 Statement TextParser::parse()
24 {
25         return parse_statement(0);
26 }
27
28 Statement TextParser::parse_statement(const Token *t)
29 {
30         Statement result;
31         bool      sub = false;
32         bool      finish = false;
33
34         while(in)
35         {
36                 Token token;
37                 if(t)
38                 {
39                         token = *t;
40                         t = 0;
41                 }
42                 else
43                         token = parse_token();
44
45                 if(result.keyword.empty())
46                 {
47                         if(token.str.empty())
48                                 break;
49                         else if(token.type!=Token::IDENTIFIER)
50                                 throw_at(ParseError(format("Syntax error at token '%s' (expected an identifier)", token.str)), get_location());
51                         result.keyword = token.str;
52                         result.valid = true;
53                         result.source = src;
54                         result.line = in.get_line_number();
55                 }
56                 else if(sub)
57                 {
58                         if(token.str=="}")
59                         {
60                                 sub = false;
61                                 finish = true;
62                         }
63                         else
64                         {
65                                 Statement ss = parse_statement(&token);
66                                 result.sub.push_back(ss);
67                         }
68                 }
69                 else if(finish)
70                 {
71                         if(token.str!=";")
72                                 throw_at(ParseError(format("Syntax error at token '%s' (Expected a ';')", token.str)), get_location());
73                         break;
74                 }
75                 else if(token.str=="{")
76                         sub = true;
77                 else if(token.str==";")
78                         break;
79                 else if(token.type==Token::INTEGER)
80                         result.args.push_back(Value(INTEGER, token.str));
81                 else if(token.type==Token::FLOAT)
82                         result.args.push_back(Value(FLOAT, token.str));
83                 else if(token.type==Token::STRING)
84                         result.args.push_back(Value(STRING, token.str));
85                 else if(token.type==Token::IDENTIFIER)
86                 {
87                         if(token.str=="true")
88                                 result.args.push_back(Value(BOOLEAN, "1"));
89                         else if(token.str=="false")
90                                 result.args.push_back(Value(BOOLEAN, "0"));
91                         else
92                                 result.args.push_back(Value(ENUM, token.str));
93                         //result.args.push_back(resolve_identifiertoken.str);
94                 }
95                 else if(token.str=="")
96                         throw_at(ParseError("Unexcepted end of input"), get_location());
97                 else
98                         throw_at(ParseError("Syntax error"), get_location());
99         }
100
101         return result;
102 }
103
104 Token TextParser::parse_token()
105 {
106         int c = 0;
107         int comment = 0;
108
109         // Skip over comments and whitespace
110         while(in && comment>=0)
111         {
112                 c = in.get();
113                 int next = in.peek();
114
115                 if(c=='/' && next=='/')
116                         comment = 1;
117                 else if(c=='/' && next=='*')
118                         comment = 2;
119                 else if(c=='\n' && comment==1)
120                         comment = 0;
121                 else if(c=='*' && next=='/' && comment==2)
122                         comment = 3;
123                 else if(comment==3)   // Skip the second character of block comment end
124                         comment = 0;
125                 else if(!isspace(c) && !comment)
126                         comment = -1;
127         }
128
129         if(comment>0)  // EOF while in comment
130                 throw_at(ParseError("Unfinished comment at end of input"), get_location());
131         else if(comment==0)  // Didn't hit any non-whitespace
132                 return Token(Token::SPECIAL, "");
133
134         enum ParseState
135         {
136                 INIT,
137                 SIGN,
138                 FLOATEXPINIT,
139                 FLOATEXPSIGN,
140                 STRING,
141                 ACCEPT,
142                 ZERO,
143                 DECIMAL,
144                 HEXADECIMAL,
145                 OCTAL,
146                 FLOAT,
147                 FLOATEXP,
148                 IDENTIFIER
149         };
150
151         static Token::Type token_type[]=
152         {
153                 Token::SPECIAL,
154                 Token::SPECIAL,
155                 Token::SPECIAL,
156                 Token::SPECIAL,
157                 Token::STRING,
158                 Token::SPECIAL,
159                 Token::INTEGER,
160                 Token::INTEGER,
161                 Token::INTEGER,
162                 Token::INTEGER,
163                 Token::FLOAT,
164                 Token::FLOAT,
165                 Token::IDENTIFIER
166         };
167
168         ParseState state = INIT;
169         string     buf;
170         bool       escape = false;
171
172         while(in || state==INIT)
173         {
174                 if(state!=INIT)
175                         c = in.get();
176                 int next = in.peek();
177
178                 buf += c;
179
180                 switch(state)
181                 {
182                 case INIT:
183                         if(c=='0')
184                                 state = ZERO;
185                         else if(c=='-' || c=='+')
186                                 state = SIGN;
187                         else if(c=='.')
188                                 state = FLOAT;
189                         else if(c=='"')
190                                 state = STRING;
191                         else if(c=='{' || c=='}' || c==';')
192                                 return Token(Token::SPECIAL, string(1, c));
193                         else if(isdigit(c))
194                                 state = DECIMAL;
195                         else if(isalpha(c) || c=='_')
196                                 state = IDENTIFIER;
197                         else
198                                 parse_error(c, "0-9A-Za-z_.\"{};+-");
199                         break;
200
201                 case SIGN:
202                         if(c=='0')
203                                 state = ZERO;
204                         else if(isdigit(c))
205                                 state = DECIMAL;
206                         else if(c=='.')
207                                 state = FLOAT;
208                         else
209                                 parse_error(c, "0-9.");
210                         break;
211
212                 case ZERO:
213                         if(c=='x')
214                                 state = HEXADECIMAL;
215                         else if(isdigit(c))
216                                 state = OCTAL;
217                         else if(c=='.')
218                                 state = FLOAT;
219                         else
220                                 parse_error(c, "0-9A-Fa-f.");
221                         break;
222
223                 case DECIMAL:
224                         if(c=='.')
225                                 state = FLOAT;
226                         else if(!isdigit(c))
227                                 parse_error(c, "0-9.");
228                         break;
229
230                 case HEXADECIMAL:
231                         if(!isxdigit(c))
232                                 parse_error(c, "0-9A-Fa-f");
233                         break;
234
235                 case OCTAL:
236                         if(!isodigit(c))
237                                 parse_error(c, "0-7");
238                         break;
239
240                 case FLOAT:
241                         if(c=='e' || c=='E')
242                                 state = FLOATEXPINIT;
243                         else if(!isdigit(c))
244                                 parse_error(c, "0-9Ee");
245                         break;
246
247                 case FLOATEXPINIT:
248                         if(c=='+' || c=='-')
249                                 state = FLOATEXPSIGN;
250                         else if(isdigit(c))
251                                 state = FLOATEXP;
252                         else
253                                 parse_error(c, "0-9+-");
254                         break;
255
256                 case FLOATEXPSIGN:
257                         if(isdigit(c))
258                                 state = FLOATEXP;
259                         else
260                                 parse_error(c, "0-9");
261                         break;
262
263                 case FLOATEXP:
264                         if(!isdigit(c))
265                                 parse_error(c, "0-9");
266                         break;
267
268                 case STRING:
269                         if(c=='\\')
270                                 escape = !escape;
271                         else if(c=='"' && !escape)
272                         {
273                                 try
274                                 {
275                                         return Token(Token::STRING, c_unescape(buf.substr(1, buf.size()-2)));
276                                 }
277                                 catch(Exception &e)
278                                 {
279                                         e.at(get_location());
280                                         throw;
281                                 }
282                         }
283                         else
284                                 escape = false;
285                         break;
286
287                 case IDENTIFIER:
288                         if(!isalpha(c) && !isdigit(c) && c!='_')
289                                 parse_error(c, "0-9A-Za-z_");
290                         break;
291
292                 default:
293                         throw_at(InvalidState("Internal error (bad state)"), get_location());
294                 }
295
296                 if(is_delimiter(next) && state>=ACCEPT)
297                         return Token(token_type[state], buf);
298         }
299
300         return Token(Token::SPECIAL, "");
301 }
302
303 bool TextParser::is_delimiter(int c)
304 {
305         return (isspace(c) || c=='{' || c=='}' || c==';' || c=='/');
306 }
307
308 bool TextParser::isodigit(int c)
309 {
310         return (c>='0' && c<='7');
311 }
312
313 string TextParser::get_location()
314 {
315         ostringstream ss;
316         ss<<src<<':'<<in.get_line_number();
317         return ss.str();
318 }
319
320 void TextParser::parse_error(int c, const char *e)
321 {
322         throw_at(ParseError(format("Parse error at '%c', expected one of \"%s\"", static_cast<char>(c), e)), get_location());
323 }
324
325 } // namespace DataFile
326 } // namespace Msp