#include <msp/strings/format.h>
#include <msp/strings/utils.h>
+#include "except.h"
#include "input.h"
#include "textparser.h"
#include "token.h"
Statement TextParser::parse()
{
- return parse_statement(0);
+ return parse_statement(nullptr);
}
Statement TextParser::parse_statement(const Token *t)
{
Statement result;
- bool sub = false;
- bool finish = false;
+ unsigned sub = 0;
while(in)
{
if(t)
{
token = *t;
- t = 0;
+ t = nullptr;
}
else
token = parse_token();
if(token.str.empty())
break;
else if(token.type!=Token::IDENTIFIER)
- throw_at(ParseError(format("Syntax error at token '%s' (expected an identifier)", token.str)), get_location());
+ throw syntax_error(token.str);
result.keyword = token.str;
result.valid = true;
result.source = src;
result.line = in.get_line_number();
}
- else if(sub)
+ else if(sub==1)
{
if(token.str=="}")
- {
- sub = false;
- finish = true;
- }
+ sub = 2;
else
{
Statement ss = parse_statement(&token);
result.sub.push_back(ss);
}
}
- else if(finish)
+ else if(sub==2)
{
if(token.str!=";")
- throw_at(ParseError(format("Syntax error at token '%s' (Expected a ';')", token.str)), get_location());
+ throw syntax_error(token.str);
break;
}
else if(token.str=="{")
- sub = true;
+ sub = 1;
else if(token.str==";")
break;
- else if(token.type==Token::INTEGER)
- result.append(lexical_cast<IntType::Store>(token.str));
- else if(token.type==Token::FLOAT)
- result.append(lexical_cast<FloatType::Store>(token.str));
- else if(token.type==Token::STRING)
- result.append(token.str);
- else if(token.type==Token::IDENTIFIER)
- {
- if(token.str=="true")
- result.append(true);
- else if(token.str=="false")
- result.append(false);
- else
- result.append(Symbol(token.str));
- }
- else if(token.str=="")
- throw_at(ParseError("Unexcepted end of input"), get_location());
+ else if(token.type!=Token::SPECIAL)
+ result.append_from_token(token);
else
- throw_at(ParseError("Syntax error"), get_location());
+ throw syntax_error(token.str);
}
return result;
c = in.get();
int next = in.peek();
- if(c=='/' && next=='/')
+ if(c=='/' && next=='/' && !comment)
comment = 1;
- else if(c=='/' && next=='*')
+ else if(c=='/' && next=='*' && !comment)
comment = 2;
else if(c=='\n' && comment==1)
comment = 0;
}
if(comment>0) // EOF while in comment
- throw_at(ParseError("Unfinished comment at end of input"), get_location());
+ throw parse_error(string());
else if(comment==0) // Didn't hit any non-whitespace
return Token(Token::SPECIAL, "");
FLOATEXPINIT,
FLOATEXPSIGN,
STRING,
+ STRING_ESCAPE,
+ STRING_BASE64,
ACCEPT,
ZERO,
DECIMAL,
FLOAT,
FLOATEXP,
STRING_END,
+ STRING_BASE64_PAD,
IDENTIFIER
};
Token::SPECIAL,
Token::SPECIAL,
Token::SPECIAL,
+ Token::SPECIAL,
+ Token::SPECIAL,
Token::INTEGER,
Token::INTEGER,
Token::INTEGER,
Token::FLOAT,
Token::FLOAT,
Token::STRING,
+ Token::STRING,
Token::IDENTIFIER
};
ParseState state = INIT;
string buf;
- bool escape = false;
while(in || state==INIT)
{
state = FLOAT;
else if(c=='"')
state = STRING;
+ else if(c=='=')
+ state = STRING_BASE64;
else if(c=='{' || c=='}' || c==';')
return Token(Token::SPECIAL, string(1, c));
else if(isdigit(c))
else if(isalpha(c) || c=='_' || c=='\\')
state = IDENTIFIER;
else
- parse_error(c, "0-9A-Za-z_\\.\"{};+-");
+ throw parse_error(buf);
break;
case SIGN:
else if(c=='.')
state = FLOAT;
else
- parse_error(c, "0-9.");
+ throw parse_error(buf);
break;
case ZERO:
else if(c=='.')
state = FLOAT;
else
- parse_error(c, "0-9A-Fa-f.");
+ throw parse_error(buf);
break;
case DECIMAL:
if(c=='.')
state = FLOAT;
+ else if(c=='e' || c=='E')
+ state = FLOATEXPINIT;
else if(!isdigit(c))
- parse_error(c, "0-9.");
+ throw parse_error(buf);
break;
case HEXADECIMAL:
if(!isxdigit(c))
- parse_error(c, "0-9A-Fa-f");
+ throw parse_error(buf);
break;
case OCTAL:
if(!isodigit(c))
- parse_error(c, "0-7");
+ throw parse_error(buf);
break;
case FLOAT:
if(c=='e' || c=='E')
state = FLOATEXPINIT;
else if(!isdigit(c))
- parse_error(c, "0-9Ee");
+ throw parse_error(buf);
break;
case FLOATEXPINIT:
else if(isdigit(c))
state = FLOATEXP;
else
- parse_error(c, "0-9+-");
+ throw parse_error(buf);
break;
case FLOATEXPSIGN:
if(isdigit(c))
state = FLOATEXP;
else
- parse_error(c, "0-9");
+ throw parse_error(buf);
break;
case FLOATEXP:
if(!isdigit(c))
- parse_error(c, "0-9");
+ throw parse_error(buf);
break;
case STRING:
if(c=='\\')
- escape = !escape;
- else if(c=='"' && !escape)
+ state = STRING_ESCAPE;
+ else if(c=='"')
state = STRING_END;
- else
- escape = false;
+ break;
+
+ case STRING_ESCAPE:
+ state = STRING;
+ break;
+
+ case STRING_BASE64:
+ if(c=='=')
+ state = STRING_BASE64_PAD;
+ else if(!isalnum(c) && c!='+' && c!='/')
+ throw parse_error(buf);
+ break;
+
+ case STRING_BASE64_PAD:
+ if(c!='=')
+ throw parse_error(buf);
break;
case IDENTIFIER:
if(!isalpha(c) && !isdigit(c) && c!='_' && c!='-' && c!='/')
- parse_error(c, "0-9A-Za-z_/-");
+ throw parse_error(buf);
break;
case STRING_END:
- throw_at(ParseError("Garbage after string"), get_location());
+ throw parse_error(buf);
default:
- throw_at(InvalidState("Internal error (bad state)"), get_location());
+ throw logic_error("bad parser state");
}
if(is_delimiter(next) && state>=ACCEPT)
if(state==IDENTIFIER && buf[0]=='\\')
return Token(Token::IDENTIFIER, buf.substr(1));
else if(state==STRING_END)
- {
- try
- {
- return Token(Token::STRING, c_unescape(buf.substr(1, buf.size()-2)));
- }
- catch(Exception &e)
- {
- e.at(get_location());
- throw;
- }
- }
+ return Token(Token::STRING, c_unescape(buf.substr(1, buf.size()-2)));
+ else if(state==STRING_BASE64 || state==STRING_BASE64_PAD)
+ return Token(Token::STRING, base64_decode(buf));
else
return Token(token_type[state], buf);
}
return (c>='0' && c<='7');
}
-string TextParser::get_location()
+string TextParser::base64_decode(const string &data)
{
- ostringstream ss;
- ss<<src<<':'<<in.get_line_number();
- return ss.str();
-}
+ string bin;
+ bin.reserve(data.size()*3/4);
+ unsigned accum = 0;
+ unsigned a_bits = 0;
+ for(char c: data)
+ {
+ unsigned d;
+ if(c>='A' && c<='Z')
+ d = c-'A';
+ else if(c>='a' && c<='z')
+ d = 26+(c-'a');
+ else if(c>='0' && c<='9')
+ d = 52+(c-'0');
+ else if(c=='+')
+ d = 62;
+ else if(c=='/')
+ d = 63;
+ else if(c=='=')
+ continue;
+ else
+ throw invalid_argument("TextParser::base64_decode");
-void TextParser::parse_error(int c, const char *e)
-{
- throw_at(ParseError(format("Parse error at '%c', expected one of \"%s\"", static_cast<char>(c), e)), get_location());
+ accum = (accum<<6)|d;
+ a_bits += 6;
+
+ if(a_bits>=8)
+ {
+ bin += (accum>>(a_bits-8))&0xFF;
+ a_bits -= 8;
+ }
+ }
+
+ return bin;
}
} // namespace DataFile