]> git.tdb.fi Git - libs/datafile.git/blob - source/textparser.cpp
Cosmetic changes
[libs/datafile.git] / source / textparser.cpp
1 #include <msp/strings/format.h>
2 #include <msp/strings/utils.h>
3 #include "except.h"
4 #include "input.h"
5 #include "textparser.h"
6 #include "token.h"
7
8 using namespace std;
9
10 namespace Msp {
11 namespace DataFile {
12
13 TextParser::TextParser(Input &i, const string &s):
14         ParserMode(i, s)
15 { }
16
17 Statement TextParser::parse()
18 {
19         return parse_statement(0);
20 }
21
22 Statement TextParser::parse_statement(const Token *t)
23 {
24         Statement result;
25         unsigned sub = 0;
26
27         while(in)
28         {
29                 Token token;
30                 if(t)
31                 {
32                         token = *t;
33                         t = 0;
34                 }
35                 else
36                         token = parse_token();
37
38                 if(result.keyword.empty())
39                 {
40                         if(token.str.empty())
41                                 break;
42                         else if(token.type!=Token::IDENTIFIER)
43                                 throw syntax_error(token.str);
44                         result.keyword = token.str;
45                         result.valid = true;
46                         result.source = src;
47                         result.line = in.get_line_number();
48                 }
49                 else if(sub==1)
50                 {
51                         if(token.str=="}")
52                                 sub = 2;
53                         else
54                         {
55                                 Statement ss = parse_statement(&token);
56                                 result.sub.push_back(ss);
57                         }
58                 }
59                 else if(sub==2)
60                 {
61                         if(token.str!=";")
62                                 throw syntax_error(token.str);
63                         break;
64                 }
65                 else if(token.str=="{")
66                         sub = 1;
67                 else if(token.str==";")
68                         break;
69                 else if(token.type!=Token::SPECIAL)
70                         result.append_from_token(token);
71                 else
72                         throw syntax_error(token.str);
73         }
74
75         return result;
76 }
77
78 Token TextParser::parse_token()
79 {
80         int c = 0;
81         int comment = 0;
82
83         // Skip over comments and whitespace
84         while(in && comment>=0)
85         {
86                 c = in.get();
87                 int next = in.peek();
88
89                 if(c=='/' && next=='/' && !comment)
90                         comment = 1;
91                 else if(c=='/' && next=='*' && !comment)
92                         comment = 2;
93                 else if(c=='\n' && comment==1)
94                         comment = 0;
95                 else if(c=='*' && next=='/' && comment==2)
96                         comment = 3;
97                 else if(comment==3)   // Skip the second character of block comment end
98                         comment = 0;
99                 else if(c!=-1 && !isspace(c) && !comment)
100                         comment = -1;
101         }
102
103         if(comment>0)  // EOF while in comment
104                 throw parse_error(string());
105         else if(comment==0)  // Didn't hit any non-whitespace
106                 return Token(Token::SPECIAL, "");
107
108         enum ParseState
109         {
110                 INIT,
111                 SIGN,
112                 FLOATEXPINIT,
113                 FLOATEXPSIGN,
114                 STRING,
115                 STRING_ESCAPE,
116                 ACCEPT,
117                 ZERO,
118                 DECIMAL,
119                 HEXADECIMAL,
120                 OCTAL,
121                 FLOAT,
122                 FLOATEXP,
123                 STRING_END,
124                 IDENTIFIER
125         };
126
127         static Token::Type token_type[]=
128         {
129                 Token::SPECIAL,
130                 Token::SPECIAL,
131                 Token::SPECIAL,
132                 Token::SPECIAL,
133                 Token::SPECIAL,
134                 Token::SPECIAL,
135                 Token::SPECIAL,
136                 Token::INTEGER,
137                 Token::INTEGER,
138                 Token::INTEGER,
139                 Token::INTEGER,
140                 Token::FLOAT,
141                 Token::FLOAT,
142                 Token::STRING,
143                 Token::IDENTIFIER
144         };
145
146         ParseState state = INIT;
147         string buf;
148
149         while(in || state==INIT)
150         {
151                 if(state!=INIT)
152                         c = in.get();
153                 int next = in.peek();
154
155                 buf += c;
156
157                 switch(state)
158                 {
159                 case INIT:
160                         if(c=='0')
161                                 state = ZERO;
162                         else if(c=='-' || c=='+')
163                                 state = SIGN;
164                         else if(c=='.')
165                                 state = FLOAT;
166                         else if(c=='"')
167                                 state = STRING;
168                         else if(c=='{' || c=='}' || c==';')
169                                 return Token(Token::SPECIAL, string(1, c));
170                         else if(isdigit(c))
171                                 state = DECIMAL;
172                         else if(isalpha(c) || c=='_' || c=='\\')
173                                 state = IDENTIFIER;
174                         else
175                                 throw parse_error(buf);
176                         break;
177
178                 case SIGN:
179                         if(c=='0')
180                                 state = ZERO;
181                         else if(isdigit(c))
182                                 state = DECIMAL;
183                         else if(c=='.')
184                                 state = FLOAT;
185                         else
186                                 throw parse_error(buf);
187                         break;
188
189                 case ZERO:
190                         if(c=='x')
191                                 state = HEXADECIMAL;
192                         else if(isdigit(c))
193                                 state = OCTAL;
194                         else if(c=='.')
195                                 state = FLOAT;
196                         else
197                                 throw parse_error(buf);
198                         break;
199
200                 case DECIMAL:
201                         if(c=='.')
202                                 state = FLOAT;
203                         else if(c=='e' || c=='E')
204                                 state = FLOATEXPINIT;
205                         else if(!isdigit(c))
206                                 throw parse_error(buf);
207                         break;
208
209                 case HEXADECIMAL:
210                         if(!isxdigit(c))
211                                 throw parse_error(buf);
212                         break;
213
214                 case OCTAL:
215                         if(!isodigit(c))
216                                 throw parse_error(buf);
217                         break;
218
219                 case FLOAT:
220                         if(c=='e' || c=='E')
221                                 state = FLOATEXPINIT;
222                         else if(!isdigit(c))
223                                 throw parse_error(buf);
224                         break;
225
226                 case FLOATEXPINIT:
227                         if(c=='+' || c=='-')
228                                 state = FLOATEXPSIGN;
229                         else if(isdigit(c))
230                                 state = FLOATEXP;
231                         else
232                                 throw parse_error(buf);
233                         break;
234
235                 case FLOATEXPSIGN:
236                         if(isdigit(c))
237                                 state = FLOATEXP;
238                         else
239                                 throw parse_error(buf);
240                         break;
241
242                 case FLOATEXP:
243                         if(!isdigit(c))
244                                 throw parse_error(buf);
245                         break;
246
247                 case STRING:
248                         if(c=='\\')
249                                 state = STRING_ESCAPE;
250                         else if(c=='"')
251                                 state = STRING_END;
252                         break;
253
254                 case STRING_ESCAPE:
255                         state = STRING;
256                         break;
257
258                 case IDENTIFIER:
259                         if(!isalpha(c) && !isdigit(c) && c!='_' && c!='-' && c!='/')
260                                 throw parse_error(buf);
261                         break;
262
263                 case STRING_END:
264                         throw parse_error(buf);
265
266                 default:
267                         throw logic_error("bad parser state");
268                 }
269
270                 if(is_delimiter(next) && state>=ACCEPT)
271                 {
272                         if(state==IDENTIFIER && buf[0]=='\\')
273                                 return Token(Token::IDENTIFIER, buf.substr(1));
274                         else if(state==STRING_END)
275                                 return Token(Token::STRING, c_unescape(buf.substr(1, buf.size()-2)));
276                         else
277                                 return Token(token_type[state], buf);
278                 }
279         }
280
281         return Token(Token::SPECIAL, "");
282 }
283
284 bool TextParser::is_delimiter(int c)
285 {
286         return (isspace(c) || c=='{' || c=='}' || c==';' || c=='/');
287 }
288
289 bool TextParser::isodigit(int c)
290 {
291         return (c>='0' && c<='7');
292 }
293
294 } // namespace DataFile
295 } // namespace Msp