]> git.tdb.fi Git - libs/datafile.git/blob - source/textparser.cpp
c87a32df666924e163ee83e8d53f06d7d181b2e3
[libs/datafile.git] / source / textparser.cpp
1 #include <msp/strings/format.h>
2 #include <msp/strings/utils.h>
3 #include "except.h"
4 #include "input.h"
5 #include "textparser.h"
6 #include "token.h"
7
8 using namespace std;
9
10 namespace Msp {
11 namespace DataFile {
12
13 TextParser::TextParser(Input &i, const string &s):
14         ParserMode(i, s)
15 { }
16
17 Statement TextParser::parse()
18 {
19         return parse_statement(0);
20 }
21
22 Statement TextParser::parse_statement(const Token *t)
23 {
24         Statement result;
25         bool sub = false;
26         bool finish = false;
27
28         while(in)
29         {
30                 Token token;
31                 if(t)
32                 {
33                         token = *t;
34                         t = 0;
35                 }
36                 else
37                         token = parse_token();
38
39                 if(result.keyword.empty())
40                 {
41                         if(token.str.empty())
42                                 break;
43                         else if(token.type!=Token::IDENTIFIER)
44                                 throw syntax_error(token.str);
45                         result.keyword = token.str;
46                         result.valid = true;
47                         result.source = src;
48                         result.line = in.get_line_number();
49                 }
50                 else if(sub)
51                 {
52                         if(token.str=="}")
53                         {
54                                 sub = false;
55                                 finish = true;
56                         }
57                         else
58                         {
59                                 Statement ss = parse_statement(&token);
60                                 result.sub.push_back(ss);
61                         }
62                 }
63                 else if(finish)
64                 {
65                         if(token.str!=";")
66                                 throw syntax_error(token.str);
67                         break;
68                 }
69                 else if(token.str=="{")
70                         sub = true;
71                 else if(token.str==";")
72                         break;
73                 else if(token.type==Token::INTEGER)
74                         result.append(lexical_cast<IntType::Store>(token.str));
75                 else if(token.type==Token::FLOAT)
76                         result.append(lexical_cast<FloatType::Store>(token.str));
77                 else if(token.type==Token::STRING)
78                         result.append(token.str);
79                 else if(token.type==Token::IDENTIFIER)
80                 {
81                         if(token.str=="true")
82                                 result.append(true);
83                         else if(token.str=="false")
84                                 result.append(false);
85                         else
86                                 result.append(Symbol(token.str));
87                 }
88                 else
89                         throw syntax_error(token.str);
90         }
91
92         return result;
93 }
94
95 Token TextParser::parse_token()
96 {
97         int c = 0;
98         int comment = 0;
99
100         // Skip over comments and whitespace
101         while(in && comment>=0)
102         {
103                 c = in.get();
104                 int next = in.peek();
105
106                 if(c=='/' && next=='/' && !comment)
107                         comment = 1;
108                 else if(c=='/' && next=='*' && !comment)
109                         comment = 2;
110                 else if(c=='\n' && comment==1)
111                         comment = 0;
112                 else if(c=='*' && next=='/' && comment==2)
113                         comment = 3;
114                 else if(comment==3)   // Skip the second character of block comment end
115                         comment = 0;
116                 else if(c!=-1 && !isspace(c) && !comment)
117                         comment = -1;
118         }
119
120         if(comment>0)  // EOF while in comment
121                 throw parse_error(string());
122         else if(comment==0)  // Didn't hit any non-whitespace
123                 return Token(Token::SPECIAL, "");
124
125         enum ParseState
126         {
127                 INIT,
128                 SIGN,
129                 FLOATEXPINIT,
130                 FLOATEXPSIGN,
131                 STRING,
132                 ACCEPT,
133                 ZERO,
134                 DECIMAL,
135                 HEXADECIMAL,
136                 OCTAL,
137                 FLOAT,
138                 FLOATEXP,
139                 STRING_END,
140                 IDENTIFIER
141         };
142
143         static Token::Type token_type[]=
144         {
145                 Token::SPECIAL,
146                 Token::SPECIAL,
147                 Token::SPECIAL,
148                 Token::SPECIAL,
149                 Token::SPECIAL,
150                 Token::SPECIAL,
151                 Token::INTEGER,
152                 Token::INTEGER,
153                 Token::INTEGER,
154                 Token::INTEGER,
155                 Token::FLOAT,
156                 Token::FLOAT,
157                 Token::STRING,
158                 Token::IDENTIFIER
159         };
160
161         ParseState state = INIT;
162         string buf;
163         bool escape = false;
164
165         while(in || state==INIT)
166         {
167                 if(state!=INIT)
168                         c = in.get();
169                 int next = in.peek();
170
171                 buf += c;
172
173                 switch(state)
174                 {
175                 case INIT:
176                         if(c=='0')
177                                 state = ZERO;
178                         else if(c=='-' || c=='+')
179                                 state = SIGN;
180                         else if(c=='.')
181                                 state = FLOAT;
182                         else if(c=='"')
183                                 state = STRING;
184                         else if(c=='{' || c=='}' || c==';')
185                                 return Token(Token::SPECIAL, string(1, c));
186                         else if(isdigit(c))
187                                 state = DECIMAL;
188                         else if(isalpha(c) || c=='_' || c=='\\')
189                                 state = IDENTIFIER;
190                         else
191                                 throw parse_error(buf);
192                         break;
193
194                 case SIGN:
195                         if(c=='0')
196                                 state = ZERO;
197                         else if(isdigit(c))
198                                 state = DECIMAL;
199                         else if(c=='.')
200                                 state = FLOAT;
201                         else
202                                 throw parse_error(buf);
203                         break;
204
205                 case ZERO:
206                         if(c=='x')
207                                 state = HEXADECIMAL;
208                         else if(isdigit(c))
209                                 state = OCTAL;
210                         else if(c=='.')
211                                 state = FLOAT;
212                         else
213                                 throw parse_error(buf);
214                         break;
215
216                 case DECIMAL:
217                         if(c=='.')
218                                 state = FLOAT;
219                         else if(c=='e' || c=='E')
220                                 state = FLOATEXPINIT;
221                         else if(!isdigit(c))
222                                 throw parse_error(buf);
223                         break;
224
225                 case HEXADECIMAL:
226                         if(!isxdigit(c))
227                                 throw parse_error(buf);
228                         break;
229
230                 case OCTAL:
231                         if(!isodigit(c))
232                                 throw parse_error(buf);
233                         break;
234
235                 case FLOAT:
236                         if(c=='e' || c=='E')
237                                 state = FLOATEXPINIT;
238                         else if(!isdigit(c))
239                                 throw parse_error(buf);
240                         break;
241
242                 case FLOATEXPINIT:
243                         if(c=='+' || c=='-')
244                                 state = FLOATEXPSIGN;
245                         else if(isdigit(c))
246                                 state = FLOATEXP;
247                         else
248                                 throw parse_error(buf);
249                         break;
250
251                 case FLOATEXPSIGN:
252                         if(isdigit(c))
253                                 state = FLOATEXP;
254                         else
255                                 throw parse_error(buf);
256                         break;
257
258                 case FLOATEXP:
259                         if(!isdigit(c))
260                                 throw parse_error(buf);
261                         break;
262
263                 case STRING:
264                         if(c=='\\')
265                                 escape = !escape;
266                         else if(c=='"' && !escape)
267                                 state = STRING_END;
268                         else
269                                 escape = false;
270                         break;
271
272                 case IDENTIFIER:
273                         if(!isalpha(c) && !isdigit(c) && c!='_' && c!='-' && c!='/')
274                                 throw parse_error(buf);
275                         break;
276
277                 case STRING_END:
278                         throw parse_error(buf);
279
280                 default:
281                         throw logic_error("bad parser state");
282                 }
283
284                 if(is_delimiter(next) && state>=ACCEPT)
285                 {
286                         if(state==IDENTIFIER && buf[0]=='\\')
287                                 return Token(Token::IDENTIFIER, buf.substr(1));
288                         else if(state==STRING_END)
289                                 return Token(Token::STRING, c_unescape(buf.substr(1, buf.size()-2)));
290                         else
291                                 return Token(token_type[state], buf);
292                 }
293         }
294
295         return Token(Token::SPECIAL, "");
296 }
297
298 bool TextParser::is_delimiter(int c)
299 {
300         return (isspace(c) || c=='{' || c=='}' || c==';' || c=='/');
301 }
302
303 bool TextParser::isodigit(int c)
304 {
305         return (c>='0' && c<='7');
306 }
307
308 } // namespace DataFile
309 } // namespace Msp