]> git.tdb.fi Git - libs/datafile.git/blob - source/textparser.cpp
05521a90456a617e2e82b1fb664225d512a22af4
[libs/datafile.git] / source / textparser.cpp
1 #include <msp/strings/format.h>
2 #include <msp/strings/utils.h>
3 #include "input.h"
4 #include "textparser.h"
5 #include "token.h"
6
7 using namespace std;
8
9 namespace Msp {
10 namespace DataFile {
11
12 class parse_error: public runtime_error
13 {
14 public:
15         parse_error(const std::string &t):
16                 runtime_error(t.empty() ? "at end of input" : format("after '%s'", t))
17         { }
18
19         virtual ~parse_error() throw() { }
20 };
21
22
23 class syntax_error: public runtime_error
24 {
25 public:
26         syntax_error(const std::string &t):
27                 runtime_error(t.empty() ? "at end of input" : format("at '%s'", t))
28         { }
29
30         virtual ~syntax_error() throw() { }
31 };
32
33
34 TextParser::TextParser(Input &i, const string &s):
35         ParserMode(i, s)
36 { }
37
38 Statement TextParser::parse()
39 {
40         return parse_statement(0);
41 }
42
43 Statement TextParser::parse_statement(const Token *t)
44 {
45         Statement result;
46         bool sub = false;
47         bool finish = false;
48
49         while(in)
50         {
51                 Token token;
52                 if(t)
53                 {
54                         token = *t;
55                         t = 0;
56                 }
57                 else
58                         token = parse_token();
59
60                 if(result.keyword.empty())
61                 {
62                         if(token.str.empty())
63                                 break;
64                         else if(token.type!=Token::IDENTIFIER)
65                                 throw syntax_error(token.str);
66                         result.keyword = token.str;
67                         result.valid = true;
68                         result.source = src;
69                         result.line = in.get_line_number();
70                 }
71                 else if(sub)
72                 {
73                         if(token.str=="}")
74                         {
75                                 sub = false;
76                                 finish = true;
77                         }
78                         else
79                         {
80                                 Statement ss = parse_statement(&token);
81                                 result.sub.push_back(ss);
82                         }
83                 }
84                 else if(finish)
85                 {
86                         if(token.str!=";")
87                                 throw syntax_error(token.str);
88                         break;
89                 }
90                 else if(token.str=="{")
91                         sub = true;
92                 else if(token.str==";")
93                         break;
94                 else if(token.type==Token::INTEGER)
95                         result.append(lexical_cast<IntType::Store>(token.str));
96                 else if(token.type==Token::FLOAT)
97                         result.append(lexical_cast<FloatType::Store>(token.str));
98                 else if(token.type==Token::STRING)
99                         result.append(token.str);
100                 else if(token.type==Token::IDENTIFIER)
101                 {
102                         if(token.str=="true")
103                                 result.append(true);
104                         else if(token.str=="false")
105                                 result.append(false);
106                         else
107                                 result.append(Symbol(token.str));
108                 }
109                 else
110                         throw syntax_error(token.str);
111         }
112
113         return result;
114 }
115
116 Token TextParser::parse_token()
117 {
118         int c = 0;
119         int comment = 0;
120
121         // Skip over comments and whitespace
122         while(in && comment>=0)
123         {
124                 c = in.get();
125                 int next = in.peek();
126
127                 if(c=='/' && next=='/' && !comment)
128                         comment = 1;
129                 else if(c=='/' && next=='*' && !comment)
130                         comment = 2;
131                 else if(c=='\n' && comment==1)
132                         comment = 0;
133                 else if(c=='*' && next=='/' && comment==2)
134                         comment = 3;
135                 else if(comment==3)   // Skip the second character of block comment end
136                         comment = 0;
137                 else if(c!=-1 && !isspace(c) && !comment)
138                         comment = -1;
139         }
140
141         if(comment>0)  // EOF while in comment
142                 throw parse_error(string());
143         else if(comment==0)  // Didn't hit any non-whitespace
144                 return Token(Token::SPECIAL, "");
145
146         enum ParseState
147         {
148                 INIT,
149                 SIGN,
150                 FLOATEXPINIT,
151                 FLOATEXPSIGN,
152                 STRING,
153                 ACCEPT,
154                 ZERO,
155                 DECIMAL,
156                 HEXADECIMAL,
157                 OCTAL,
158                 FLOAT,
159                 FLOATEXP,
160                 STRING_END,
161                 IDENTIFIER
162         };
163
164         static Token::Type token_type[]=
165         {
166                 Token::SPECIAL,
167                 Token::SPECIAL,
168                 Token::SPECIAL,
169                 Token::SPECIAL,
170                 Token::SPECIAL,
171                 Token::SPECIAL,
172                 Token::INTEGER,
173                 Token::INTEGER,
174                 Token::INTEGER,
175                 Token::INTEGER,
176                 Token::FLOAT,
177                 Token::FLOAT,
178                 Token::STRING,
179                 Token::IDENTIFIER
180         };
181
182         ParseState state = INIT;
183         string buf;
184         bool escape = false;
185
186         while(in || state==INIT)
187         {
188                 if(state!=INIT)
189                         c = in.get();
190                 int next = in.peek();
191
192                 buf += c;
193
194                 switch(state)
195                 {
196                 case INIT:
197                         if(c=='0')
198                                 state = ZERO;
199                         else if(c=='-' || c=='+')
200                                 state = SIGN;
201                         else if(c=='.')
202                                 state = FLOAT;
203                         else if(c=='"')
204                                 state = STRING;
205                         else if(c=='{' || c=='}' || c==';')
206                                 return Token(Token::SPECIAL, string(1, c));
207                         else if(isdigit(c))
208                                 state = DECIMAL;
209                         else if(isalpha(c) || c=='_' || c=='\\')
210                                 state = IDENTIFIER;
211                         else
212                                 throw parse_error(buf);
213                         break;
214
215                 case SIGN:
216                         if(c=='0')
217                                 state = ZERO;
218                         else if(isdigit(c))
219                                 state = DECIMAL;
220                         else if(c=='.')
221                                 state = FLOAT;
222                         else
223                                 throw parse_error(buf);
224                         break;
225
226                 case ZERO:
227                         if(c=='x')
228                                 state = HEXADECIMAL;
229                         else if(isdigit(c))
230                                 state = OCTAL;
231                         else if(c=='.')
232                                 state = FLOAT;
233                         else
234                                 throw parse_error(buf);
235                         break;
236
237                 case DECIMAL:
238                         if(c=='.')
239                                 state = FLOAT;
240                         else if(c=='e' || c=='E')
241                                 state = FLOATEXPINIT;
242                         else if(!isdigit(c))
243                                 throw parse_error(buf);
244                         break;
245
246                 case HEXADECIMAL:
247                         if(!isxdigit(c))
248                                 throw parse_error(buf);
249                         break;
250
251                 case OCTAL:
252                         if(!isodigit(c))
253                                 throw parse_error(buf);
254                         break;
255
256                 case FLOAT:
257                         if(c=='e' || c=='E')
258                                 state = FLOATEXPINIT;
259                         else if(!isdigit(c))
260                                 throw parse_error(buf);
261                         break;
262
263                 case FLOATEXPINIT:
264                         if(c=='+' || c=='-')
265                                 state = FLOATEXPSIGN;
266                         else if(isdigit(c))
267                                 state = FLOATEXP;
268                         else
269                                 throw parse_error(buf);
270                         break;
271
272                 case FLOATEXPSIGN:
273                         if(isdigit(c))
274                                 state = FLOATEXP;
275                         else
276                                 throw parse_error(buf);
277                         break;
278
279                 case FLOATEXP:
280                         if(!isdigit(c))
281                                 throw parse_error(buf);
282                         break;
283
284                 case STRING:
285                         if(c=='\\')
286                                 escape = !escape;
287                         else if(c=='"' && !escape)
288                                 state = STRING_END;
289                         else
290                                 escape = false;
291                         break;
292
293                 case IDENTIFIER:
294                         if(!isalpha(c) && !isdigit(c) && c!='_' && c!='-' && c!='/')
295                                 throw parse_error(buf);
296                         break;
297
298                 case STRING_END:
299                         throw parse_error(buf);
300
301                 default:
302                         throw logic_error("bad parser state");
303                 }
304
305                 if(is_delimiter(next) && state>=ACCEPT)
306                 {
307                         if(state==IDENTIFIER && buf[0]=='\\')
308                                 return Token(Token::IDENTIFIER, buf.substr(1));
309                         else if(state==STRING_END)
310                                 return Token(Token::STRING, c_unescape(buf.substr(1, buf.size()-2)));
311                         else
312                                 return Token(token_type[state], buf);
313                 }
314         }
315
316         return Token(Token::SPECIAL, "");
317 }
318
319 bool TextParser::is_delimiter(int c)
320 {
321         return (isspace(c) || c=='{' || c=='}' || c==';' || c=='/');
322 }
323
324 bool TextParser::isodigit(int c)
325 {
326         return (c>='0' && c<='7');
327 }
328
329 } // namespace DataFile
330 } // namespace Msp