]> git.tdb.fi Git - libs/datafile.git/blob - source/textparser.cpp
Update formatter.h -> format.h
[libs/datafile.git] / source / textparser.cpp
1 #include <msp/strings/format.h>
2 #include <msp/strings/utils.h>
3 #include "input.h"
4 #include "textparser.h"
5 #include "token.h"
6
7 using namespace std;
8
9 namespace Msp {
10 namespace DataFile {
11
12 TextParser::TextParser(Input &i, const string &s):
13         ParserMode(i, s)
14 { }
15
16 Statement TextParser::parse()
17 {
18         return parse_statement(0);
19 }
20
21 Statement TextParser::parse_statement(const Token *t)
22 {
23         Statement result;
24         bool      sub = false;
25         bool      finish = false;
26
27         while(in)
28         {
29                 Token token;
30                 if(t)
31                 {
32                         token = *t;
33                         t = 0;
34                 }
35                 else
36                         token = parse_token();
37
38                 if(result.keyword.empty())
39                 {
40                         if(token.str.empty())
41                                 break;
42                         else if(token.type!=Token::IDENTIFIER)
43                                 throw_at(ParseError(format("Syntax error at token '%s' (expected an identifier)", token.str)), get_location());
44                         result.keyword = token.str;
45                         result.valid = true;
46                         result.source = src;
47                         result.line = in.get_line_number();
48                 }
49                 else if(sub)
50                 {
51                         if(token.str=="}")
52                         {
53                                 sub = false;
54                                 finish = true;
55                         }
56                         else
57                         {
58                                 Statement ss = parse_statement(&token);
59                                 result.sub.push_back(ss);
60                         }
61                 }
62                 else if(finish)
63                 {
64                         if(token.str!=";")
65                                 throw_at(ParseError(format("Syntax error at token '%s' (Expected a ';')", token.str)), get_location());
66                         break;
67                 }
68                 else if(token.str=="{")
69                         sub = true;
70                 else if(token.str==";")
71                         break;
72                 else if(token.type==Token::INTEGER)
73                         result.append(lexical_cast<IntType::Store>(token.str));
74                 else if(token.type==Token::FLOAT)
75                         result.append(lexical_cast<FloatType::Store>(token.str));
76                 else if(token.type==Token::STRING)
77                         result.append(token.str);
78                 else if(token.type==Token::IDENTIFIER)
79                 {
80                         if(token.str=="true")
81                                 result.append(true);
82                         else if(token.str=="false")
83                                 result.append(false);
84                         else
85                                 result.append(Symbol(token.str));
86                 }
87                 else if(token.str=="")
88                         throw_at(ParseError("Unexcepted end of input"), get_location());
89                 else
90                         throw_at(ParseError("Syntax error"), get_location());
91         }
92
93         return result;
94 }
95
96 Token TextParser::parse_token()
97 {
98         int c = 0;
99         int comment = 0;
100
101         // Skip over comments and whitespace
102         while(in && comment>=0)
103         {
104                 c = in.get();
105                 int next = in.peek();
106
107                 if(c=='/' && next=='/')
108                         comment = 1;
109                 else if(c=='/' && next=='*')
110                         comment = 2;
111                 else if(c=='\n' && comment==1)
112                         comment = 0;
113                 else if(c=='*' && next=='/' && comment==2)
114                         comment = 3;
115                 else if(comment==3)   // Skip the second character of block comment end
116                         comment = 0;
117                 else if(c!=-1 && !isspace(c) && !comment)
118                         comment = -1;
119         }
120
121         if(comment>0)  // EOF while in comment
122                 throw_at(ParseError("Unfinished comment at end of input"), get_location());
123         else if(comment==0)  // Didn't hit any non-whitespace
124                 return Token(Token::SPECIAL, "");
125
126         enum ParseState
127         {
128                 INIT,
129                 SIGN,
130                 FLOATEXPINIT,
131                 FLOATEXPSIGN,
132                 STRING,
133                 ACCEPT,
134                 ZERO,
135                 DECIMAL,
136                 HEXADECIMAL,
137                 OCTAL,
138                 FLOAT,
139                 FLOATEXP,
140                 STRING_END,
141                 IDENTIFIER
142         };
143
144         static Token::Type token_type[]=
145         {
146                 Token::SPECIAL,
147                 Token::SPECIAL,
148                 Token::SPECIAL,
149                 Token::SPECIAL,
150                 Token::SPECIAL,
151                 Token::SPECIAL,
152                 Token::INTEGER,
153                 Token::INTEGER,
154                 Token::INTEGER,
155                 Token::INTEGER,
156                 Token::FLOAT,
157                 Token::FLOAT,
158                 Token::STRING,
159                 Token::IDENTIFIER
160         };
161
162         ParseState state = INIT;
163         string     buf;
164         bool       escape = false;
165
166         while(in || state==INIT)
167         {
168                 if(state!=INIT)
169                         c = in.get();
170                 int next = in.peek();
171
172                 buf += c;
173
174                 switch(state)
175                 {
176                 case INIT:
177                         if(c=='0')
178                                 state = ZERO;
179                         else if(c=='-' || c=='+')
180                                 state = SIGN;
181                         else if(c=='.')
182                                 state = FLOAT;
183                         else if(c=='"')
184                                 state = STRING;
185                         else if(c=='{' || c=='}' || c==';')
186                                 return Token(Token::SPECIAL, string(1, c));
187                         else if(isdigit(c))
188                                 state = DECIMAL;
189                         else if(isalpha(c) || c=='_' || c=='\\')
190                                 state = IDENTIFIER;
191                         else
192                                 parse_error(c, "0-9A-Za-z_\\.\"{};+-");
193                         break;
194
195                 case SIGN:
196                         if(c=='0')
197                                 state = ZERO;
198                         else if(isdigit(c))
199                                 state = DECIMAL;
200                         else if(c=='.')
201                                 state = FLOAT;
202                         else
203                                 parse_error(c, "0-9.");
204                         break;
205
206                 case ZERO:
207                         if(c=='x')
208                                 state = HEXADECIMAL;
209                         else if(isdigit(c))
210                                 state = OCTAL;
211                         else if(c=='.')
212                                 state = FLOAT;
213                         else
214                                 parse_error(c, "0-9A-Fa-f.");
215                         break;
216
217                 case DECIMAL:
218                         if(c=='.')
219                                 state = FLOAT;
220                         else if(!isdigit(c))
221                                 parse_error(c, "0-9.");
222                         break;
223
224                 case HEXADECIMAL:
225                         if(!isxdigit(c))
226                                 parse_error(c, "0-9A-Fa-f");
227                         break;
228
229                 case OCTAL:
230                         if(!isodigit(c))
231                                 parse_error(c, "0-7");
232                         break;
233
234                 case FLOAT:
235                         if(c=='e' || c=='E')
236                                 state = FLOATEXPINIT;
237                         else if(!isdigit(c))
238                                 parse_error(c, "0-9Ee");
239                         break;
240
241                 case FLOATEXPINIT:
242                         if(c=='+' || c=='-')
243                                 state = FLOATEXPSIGN;
244                         else if(isdigit(c))
245                                 state = FLOATEXP;
246                         else
247                                 parse_error(c, "0-9+-");
248                         break;
249
250                 case FLOATEXPSIGN:
251                         if(isdigit(c))
252                                 state = FLOATEXP;
253                         else
254                                 parse_error(c, "0-9");
255                         break;
256
257                 case FLOATEXP:
258                         if(!isdigit(c))
259                                 parse_error(c, "0-9");
260                         break;
261
262                 case STRING:
263                         if(c=='\\')
264                                 escape = !escape;
265                         else if(c=='"' && !escape)
266                                 state = STRING_END;
267                         else
268                                 escape = false;
269                         break;
270
271                 case IDENTIFIER:
272                         if(!isalpha(c) && !isdigit(c) && c!='_' && c!='-' && c!='/')
273                                 parse_error(c, "0-9A-Za-z_/-");
274                         break;
275
276                 case STRING_END:
277                         throw_at(ParseError("Garbage after string"), get_location());
278
279                 default:
280                         throw_at(InvalidState("Internal error (bad state)"), get_location());
281                 }
282
283                 if(is_delimiter(next) && state>=ACCEPT)
284                 {
285                         if(state==IDENTIFIER && buf[0]=='\\')
286                                 return Token(Token::IDENTIFIER, buf.substr(1));
287                         else if(state==STRING_END)
288                         {
289                                 try
290                                 {
291                                         return Token(Token::STRING, c_unescape(buf.substr(1, buf.size()-2)));
292                                 }
293                                 catch(Exception &e)
294                                 {
295                                         e.at(get_location());
296                                         throw;
297                                 }
298                         }
299                         else
300                                 return Token(token_type[state], buf);
301                 }
302         }
303
304         return Token(Token::SPECIAL, "");
305 }
306
307 bool TextParser::is_delimiter(int c)
308 {
309         return (isspace(c) || c=='{' || c=='}' || c==';' || c=='/');
310 }
311
312 bool TextParser::isodigit(int c)
313 {
314         return (c>='0' && c<='7');
315 }
316
317 string TextParser::get_location()
318 {
319         ostringstream ss;
320         ss<<src<<':'<<in.get_line_number();
321         return ss.str();
322 }
323
324 void TextParser::parse_error(int c, const char *e)
325 {
326         throw_at(ParseError(format("Parse error at '%c', expected one of \"%s\"", static_cast<char>(c), e)), get_location());
327 }
328
329 } // namespace DataFile
330 } // namespace Msp