1 #include <msp/strings/format.h>
2 #include <msp/strings/utils.h>
5 #include "textparser.h"
13 TextParser::TextParser(Input &i, const string &s):
17 Statement TextParser::parse()
19 return parse_statement(0);
22 Statement TextParser::parse_statement(const Token *t)
36 token = parse_token();
38 if(result.keyword.empty())
42 else if(token.type!=Token::IDENTIFIER)
43 throw syntax_error(token.str);
44 result.keyword = token.str;
47 result.line = in.get_line_number();
55 Statement ss = parse_statement(&token);
56 result.sub.push_back(ss);
62 throw syntax_error(token.str);
65 else if(token.str=="{")
67 else if(token.str==";")
69 else if(token.type!=Token::SPECIAL)
70 result.append_from_token(token);
72 throw syntax_error(token.str);
78 Token TextParser::parse_token()
83 // Skip over comments and whitespace
84 while(in && comment>=0)
89 if(c=='/' && next=='/' && !comment)
91 else if(c=='/' && next=='*' && !comment)
93 else if(c=='\n' && comment==1)
95 else if(c=='*' && next=='/' && comment==2)
97 else if(comment==3) // Skip the second character of block comment end
99 else if(c!=-1 && !isspace(c) && !comment)
103 if(comment>0) // EOF while in comment
104 throw parse_error(string());
105 else if(comment==0) // Didn't hit any non-whitespace
106 return Token(Token::SPECIAL, "");
129 static Token::Type token_type[]=
150 ParseState state = INIT;
153 while(in || state==INIT)
157 int next = in.peek();
166 else if(c=='-' || c=='+')
173 state = STRING_BASE64;
174 else if(c=='{' || c=='}' || c==';')
175 return Token(Token::SPECIAL, string(1, c));
178 else if(isalpha(c) || c=='_' || c=='\\')
181 throw parse_error(buf);
192 throw parse_error(buf);
203 throw parse_error(buf);
209 else if(c=='e' || c=='E')
210 state = FLOATEXPINIT;
212 throw parse_error(buf);
217 throw parse_error(buf);
222 throw parse_error(buf);
227 state = FLOATEXPINIT;
229 throw parse_error(buf);
234 state = FLOATEXPSIGN;
238 throw parse_error(buf);
245 throw parse_error(buf);
250 throw parse_error(buf);
255 state = STRING_ESCAPE;
266 state = STRING_BASE64_PAD;
267 else if(!isalnum(c) && c!='+' && c!='/')
268 throw parse_error(buf);
271 case STRING_BASE64_PAD:
273 throw parse_error(buf);
277 if(!isalpha(c) && !isdigit(c) && c!='_' && c!='-' && c!='/')
278 throw parse_error(buf);
282 throw parse_error(buf);
285 throw logic_error("bad parser state");
288 if(is_delimiter(next) && state>=ACCEPT)
290 if(state==IDENTIFIER && buf[0]=='\\')
291 return Token(Token::IDENTIFIER, buf.substr(1));
292 else if(state==STRING_END)
293 return Token(Token::STRING, c_unescape(buf.substr(1, buf.size()-2)));
294 else if(state==STRING_BASE64 || state==STRING_BASE64_PAD)
295 return Token(Token::STRING, base64_decode(buf));
297 return Token(token_type[state], buf);
301 return Token(Token::SPECIAL, "");
304 bool TextParser::is_delimiter(int c)
306 return (isspace(c) || c=='{' || c=='}' || c==';' || c=='/');
309 bool TextParser::isodigit(int c)
311 return (c>='0' && c<='7');
314 string TextParser::base64_decode(const string &data)
317 bin.reserve(data.size()*3/4);
320 for(string::const_iterator i=data.begin(); i!=data.end(); ++i)
323 if(*i>='A' && *i<='Z')
325 else if(*i>='a' && *i<='z')
327 else if(*i>='0' && *i<='9')
336 throw invalid_argument("TextParser::base64_decode");
338 accum = (accum<<6)|d;
343 bin += (accum>>(a_bits-8))&0xFF;
351 } // namespace DataFile