]> git.tdb.fi Git - libs/datafile.git/blob - source/textparser.cpp
Fix creating Symbols from arbitary types
[libs/datafile.git] / source / textparser.cpp
1 /* $Id$
2
3 This file is part of libmspdatafile
4 Copyright © 2007-2008, 2010  Mikko Rasa, Mikkosoft Productions
5 Distributed under the LGPL
6 */
7
8 #include <msp/strings/formatter.h>
9 #include <msp/strings/utils.h>
10 #include "input.h"
11 #include "textparser.h"
12 #include "token.h"
13
14 using namespace std;
15
16 namespace Msp {
17 namespace DataFile {
18
19 TextParser::TextParser(Input &i, const string &s):
20         ParserMode(i, s)
21 { }
22
23 Statement TextParser::parse()
24 {
25         return parse_statement(0);
26 }
27
28 Statement TextParser::parse_statement(const Token *t)
29 {
30         Statement result;
31         bool      sub = false;
32         bool      finish = false;
33
34         while(in)
35         {
36                 Token token;
37                 if(t)
38                 {
39                         token = *t;
40                         t = 0;
41                 }
42                 else
43                         token = parse_token();
44
45                 if(result.keyword.empty())
46                 {
47                         if(token.str.empty())
48                                 break;
49                         else if(token.type!=Token::IDENTIFIER)
50                                 throw_at(ParseError(format("Syntax error at token '%s' (expected an identifier)", token.str)), get_location());
51                         result.keyword = token.str;
52                         result.valid = true;
53                         result.source = src;
54                         result.line = in.get_line_number();
55                 }
56                 else if(sub)
57                 {
58                         if(token.str=="}")
59                         {
60                                 sub = false;
61                                 finish = true;
62                         }
63                         else
64                         {
65                                 Statement ss = parse_statement(&token);
66                                 result.sub.push_back(ss);
67                         }
68                 }
69                 else if(finish)
70                 {
71                         if(token.str!=";")
72                                 throw_at(ParseError(format("Syntax error at token '%s' (Expected a ';')", token.str)), get_location());
73                         break;
74                 }
75                 else if(token.str=="{")
76                         sub = true;
77                 else if(token.str==";")
78                         break;
79                 else if(token.type==Token::INTEGER)
80                         result.append(lexical_cast<IntType::Store>(token.str));
81                 else if(token.type==Token::FLOAT)
82                         result.append(lexical_cast<FloatType::Store>(token.str));
83                 else if(token.type==Token::STRING)
84                         result.append(token.str);
85                 else if(token.type==Token::IDENTIFIER)
86                 {
87                         if(token.str=="true")
88                                 result.append(true);
89                         else if(token.str=="false")
90                                 result.append(false);
91                         else
92                                 result.append(Symbol(token.str));
93                 }
94                 else if(token.str=="")
95                         throw_at(ParseError("Unexcepted end of input"), get_location());
96                 else
97                         throw_at(ParseError("Syntax error"), get_location());
98         }
99
100         return result;
101 }
102
103 Token TextParser::parse_token()
104 {
105         int c = 0;
106         int comment = 0;
107
108         // Skip over comments and whitespace
109         while(in && comment>=0)
110         {
111                 c = in.get();
112                 int next = in.peek();
113
114                 if(c=='/' && next=='/')
115                         comment = 1;
116                 else if(c=='/' && next=='*')
117                         comment = 2;
118                 else if(c=='\n' && comment==1)
119                         comment = 0;
120                 else if(c=='*' && next=='/' && comment==2)
121                         comment = 3;
122                 else if(comment==3)   // Skip the second character of block comment end
123                         comment = 0;
124                 else if(c!=-1 && !isspace(c) && !comment)
125                         comment = -1;
126         }
127
128         if(comment>0)  // EOF while in comment
129                 throw_at(ParseError("Unfinished comment at end of input"), get_location());
130         else if(comment==0)  // Didn't hit any non-whitespace
131                 return Token(Token::SPECIAL, "");
132
133         enum ParseState
134         {
135                 INIT,
136                 SIGN,
137                 FLOATEXPINIT,
138                 FLOATEXPSIGN,
139                 STRING,
140                 ACCEPT,
141                 ZERO,
142                 DECIMAL,
143                 HEXADECIMAL,
144                 OCTAL,
145                 FLOAT,
146                 FLOATEXP,
147                 STRING_END,
148                 IDENTIFIER
149         };
150
151         static Token::Type token_type[]=
152         {
153                 Token::SPECIAL,
154                 Token::SPECIAL,
155                 Token::SPECIAL,
156                 Token::SPECIAL,
157                 Token::SPECIAL,
158                 Token::SPECIAL,
159                 Token::INTEGER,
160                 Token::INTEGER,
161                 Token::INTEGER,
162                 Token::INTEGER,
163                 Token::FLOAT,
164                 Token::FLOAT,
165                 Token::STRING,
166                 Token::IDENTIFIER
167         };
168
169         ParseState state = INIT;
170         string     buf;
171         bool       escape = false;
172
173         while(in || state==INIT)
174         {
175                 if(state!=INIT)
176                         c = in.get();
177                 int next = in.peek();
178
179                 buf += c;
180
181                 switch(state)
182                 {
183                 case INIT:
184                         if(c=='0')
185                                 state = ZERO;
186                         else if(c=='-' || c=='+')
187                                 state = SIGN;
188                         else if(c=='.')
189                                 state = FLOAT;
190                         else if(c=='"')
191                                 state = STRING;
192                         else if(c=='{' || c=='}' || c==';')
193                                 return Token(Token::SPECIAL, string(1, c));
194                         else if(isdigit(c))
195                                 state = DECIMAL;
196                         else if(isalpha(c) || c=='_' || c=='\\')
197                                 state = IDENTIFIER;
198                         else
199                                 parse_error(c, "0-9A-Za-z_\\.\"{};+-");
200                         break;
201
202                 case SIGN:
203                         if(c=='0')
204                                 state = ZERO;
205                         else if(isdigit(c))
206                                 state = DECIMAL;
207                         else if(c=='.')
208                                 state = FLOAT;
209                         else
210                                 parse_error(c, "0-9.");
211                         break;
212
213                 case ZERO:
214                         if(c=='x')
215                                 state = HEXADECIMAL;
216                         else if(isdigit(c))
217                                 state = OCTAL;
218                         else if(c=='.')
219                                 state = FLOAT;
220                         else
221                                 parse_error(c, "0-9A-Fa-f.");
222                         break;
223
224                 case DECIMAL:
225                         if(c=='.')
226                                 state = FLOAT;
227                         else if(!isdigit(c))
228                                 parse_error(c, "0-9.");
229                         break;
230
231                 case HEXADECIMAL:
232                         if(!isxdigit(c))
233                                 parse_error(c, "0-9A-Fa-f");
234                         break;
235
236                 case OCTAL:
237                         if(!isodigit(c))
238                                 parse_error(c, "0-7");
239                         break;
240
241                 case FLOAT:
242                         if(c=='e' || c=='E')
243                                 state = FLOATEXPINIT;
244                         else if(!isdigit(c))
245                                 parse_error(c, "0-9Ee");
246                         break;
247
248                 case FLOATEXPINIT:
249                         if(c=='+' || c=='-')
250                                 state = FLOATEXPSIGN;
251                         else if(isdigit(c))
252                                 state = FLOATEXP;
253                         else
254                                 parse_error(c, "0-9+-");
255                         break;
256
257                 case FLOATEXPSIGN:
258                         if(isdigit(c))
259                                 state = FLOATEXP;
260                         else
261                                 parse_error(c, "0-9");
262                         break;
263
264                 case FLOATEXP:
265                         if(!isdigit(c))
266                                 parse_error(c, "0-9");
267                         break;
268
269                 case STRING:
270                         if(c=='\\')
271                                 escape = !escape;
272                         else if(c=='"' && !escape)
273                                 state = STRING_END;
274                         else
275                                 escape = false;
276                         break;
277
278                 case IDENTIFIER:
279                         if(!isalpha(c) && !isdigit(c) && c!='_' && c!='-' && c!='/')
280                                 parse_error(c, "0-9A-Za-z_/-");
281                         break;
282
283                 case STRING_END:
284                         throw_at(ParseError("Garbage after string"), get_location());
285
286                 default:
287                         throw_at(InvalidState("Internal error (bad state)"), get_location());
288                 }
289
290                 if(is_delimiter(next) && state>=ACCEPT)
291                 {
292                         if(state==IDENTIFIER && buf[0]=='\\')
293                                 return Token(Token::IDENTIFIER, buf.substr(1));
294                         else if(state==STRING_END)
295                         {
296                                 try
297                                 {
298                                         return Token(Token::STRING, c_unescape(buf.substr(1, buf.size()-2)));
299                                 }
300                                 catch(Exception &e)
301                                 {
302                                         e.at(get_location());
303                                         throw;
304                                 }
305                         }
306                         else
307                                 return Token(token_type[state], buf);
308                 }
309         }
310
311         return Token(Token::SPECIAL, "");
312 }
313
314 bool TextParser::is_delimiter(int c)
315 {
316         return (isspace(c) || c=='{' || c=='}' || c==';' || c=='/');
317 }
318
319 bool TextParser::isodigit(int c)
320 {
321         return (c>='0' && c<='7');
322 }
323
324 string TextParser::get_location()
325 {
326         ostringstream ss;
327         ss<<src<<':'<<in.get_line_number();
328         return ss.str();
329 }
330
331 void TextParser::parse_error(int c, const char *e)
332 {
333         throw_at(ParseError(format("Parse error at '%c', expected one of \"%s\"", static_cast<char>(c), e)), get_location());
334 }
335
336 } // namespace DataFile
337 } // namespace Msp