-/* $Id$
-
-This file is part of libmspdatafile
-Copyright © 2007 Mikko Rasa, Mikkosoft Productions
-Distributed under the LGPL
-*/
-
-#include <msp/strings/formatter.h>
+#include <msp/strings/format.h>
#include <msp/strings/utils.h>
+#include "except.h"
#include "input.h"
#include "textparser.h"
#include "token.h"
Statement TextParser::parse()
{
- return parse_statement(0);
+ return parse_statement(nullptr);
}
Statement TextParser::parse_statement(const Token *t)
{
Statement result;
- bool sub=false;
- bool finish=false;
+ unsigned sub = 0;
while(in)
{
Token token;
if(t)
{
- token=*t;
- t=0;
+ token = *t;
+ t = nullptr;
}
else
- token=parse_token();
+ token = parse_token();
if(result.keyword.empty())
{
if(token.str.empty())
break;
else if(token.type!=Token::IDENTIFIER)
- throw ParseError(format("%s: Syntax error at token '%s' (expected an identifier)", get_location(), token.str), src, in.get_line_number());
- result.keyword=token.str;
- result.valid=true;
- result.source=src;
- result.line=in.get_line_number();
+ throw syntax_error(token.str);
+ result.keyword = token.str;
+ result.valid = true;
+ result.source = src;
+ result.line = in.get_line_number();
}
- else if(sub)
+ else if(sub==1)
{
if(token.str=="}")
- {
- sub=false;
- finish=true;
- }
+ sub = 2;
else
{
- Statement ss=parse_statement(&token);
+ Statement ss = parse_statement(&token);
result.sub.push_back(ss);
}
}
- else if(finish)
+ else if(sub==2)
{
if(token.str!=";")
- throw ParseError(format("%s: Syntax error at token '%s' (Expected a ';')", get_location(), token.str), src, in.get_line_number());
+ throw syntax_error(token.str);
break;
}
else if(token.str=="{")
- sub=true;
+ sub = 1;
else if(token.str==";")
break;
- else if(token.type==Token::INTEGER)
- result.args.push_back(Value(INTEGER, token.str));
- else if(token.type==Token::FLOAT)
- result.args.push_back(Value(FLOAT, token.str));
- else if(token.type==Token::STRING)
- result.args.push_back(Value(STRING, token.str));
- else if(token.type==Token::IDENTIFIER)
- {
- if(token.str=="true")
- result.args.push_back(Value(BOOLEAN, "1"));
- else if(token.str=="false")
- result.args.push_back(Value(BOOLEAN, "0"));
- else
- result.args.push_back(Value(ENUM, token.str));
- //result.args.push_back(resolve_identifiertoken.str);
- }
- else if(token.str=="")
- throw ParseError(src+": Unexcepted end of input", src, in.get_line_number());
+ else if(token.type!=Token::SPECIAL)
+ result.append_from_token(token);
else
- throw ParseError(get_location()+": Syntax error", src, in.get_line_number());
+ throw syntax_error(token.str);
}
return result;
Token TextParser::parse_token()
{
- int c=0;
- unsigned comment=0;
+ int c = 0;
+ int comment = 0;
// Skip over comments and whitespace
- while(in)
+ while(in && comment>=0)
{
- c=in.get();
- int next=in.peek();
+ c = in.get();
+ int next = in.peek();
- if(c=='/' && next=='/')
- comment=1;
- else if(c=='/' && next=='*')
- comment=2;
+ if(c=='/' && next=='/' && !comment)
+ comment = 1;
+ else if(c=='/' && next=='*' && !comment)
+ comment = 2;
else if(c=='\n' && comment==1)
- comment=0;
+ comment = 0;
else if(c=='*' && next=='/' && comment==2)
- comment=3;
+ comment = 3;
else if(comment==3) // Skip the second character of block comment end
- comment=0;
- else if(!isspace(c) && !comment)
- break;
+ comment = 0;
+ else if(c!=-1 && !isspace(c) && !comment)
+ comment = -1;
}
- if(comment) // Didn't hit any non-whitespace
- throw ParseError(src+": Unfinished comment at end of input", src, in.get_line_number());
+ if(comment>0) // EOF while in comment
+ throw parse_error(string());
+ else if(comment==0) // Didn't hit any non-whitespace
+ return Token(Token::SPECIAL, "");
enum ParseState
{
FLOATEXPINIT,
FLOATEXPSIGN,
STRING,
+ STRING_ESCAPE,
+ STRING_BASE64,
ACCEPT,
ZERO,
DECIMAL,
OCTAL,
FLOAT,
FLOATEXP,
+ STRING_END,
+ STRING_BASE64_PAD,
IDENTIFIER
};
Token::SPECIAL,
Token::SPECIAL,
Token::SPECIAL,
- Token::STRING,
+ Token::SPECIAL,
+ Token::SPECIAL,
+ Token::SPECIAL,
Token::SPECIAL,
Token::INTEGER,
Token::INTEGER,
Token::INTEGER,
Token::FLOAT,
Token::FLOAT,
+ Token::STRING,
+ Token::STRING,
Token::IDENTIFIER
};
- ParseState state=INIT;
- string buf;
- bool escape=false;
+ ParseState state = INIT;
+ string buf;
- while(in)
+ while(in || state==INIT)
{
if(state!=INIT)
- c=in.get();
- int next=in.peek();
+ c = in.get();
+ int next = in.peek();
- buf+=c;
+ buf += c;
switch(state)
{
case INIT:
if(c=='0')
- state=ZERO;
+ state = ZERO;
else if(c=='-' || c=='+')
- state=SIGN;
+ state = SIGN;
else if(c=='.')
- state=FLOAT;
+ state = FLOAT;
else if(c=='"')
- state=STRING;
+ state = STRING;
+ else if(c=='=')
+ state = STRING_BASE64;
else if(c=='{' || c=='}' || c==';')
return Token(Token::SPECIAL, string(1, c));
else if(isdigit(c))
- state=DECIMAL;
- else if(isalpha(c) || c=='_')
- state=IDENTIFIER;
+ state = DECIMAL;
+ else if(isalpha(c) || c=='_' || c=='\\')
+ state = IDENTIFIER;
else
- parse_error(c, state);
+ throw parse_error(buf);
break;
case SIGN:
if(c=='0')
- state=ZERO;
+ state = ZERO;
else if(isdigit(c))
- state=DECIMAL;
+ state = DECIMAL;
else if(c=='.')
- state=FLOAT;
+ state = FLOAT;
else
- parse_error(c, state);
+ throw parse_error(buf);
break;
case ZERO:
if(c=='x')
- state=HEXADECIMAL;
+ state = HEXADECIMAL;
else if(isdigit(c))
- state=OCTAL;
+ state = OCTAL;
else if(c=='.')
- state=FLOAT;
+ state = FLOAT;
else
- parse_error(c, state);
+ throw parse_error(buf);
break;
case DECIMAL:
if(c=='.')
- state=FLOAT;
+ state = FLOAT;
+ else if(c=='e' || c=='E')
+ state = FLOATEXPINIT;
else if(!isdigit(c))
- parse_error(c, state);
+ throw parse_error(buf);
break;
case HEXADECIMAL:
if(!isxdigit(c))
- parse_error(c, state);
+ throw parse_error(buf);
break;
case OCTAL:
if(!isodigit(c))
- parse_error(c, state);
+ throw parse_error(buf);
break;
case FLOAT:
if(c=='e' || c=='E')
- state=FLOATEXPINIT;
+ state = FLOATEXPINIT;
else if(!isdigit(c))
- parse_error(c, state);
+ throw parse_error(buf);
break;
case FLOATEXPINIT:
if(c=='+' || c=='-')
- state=FLOATEXPSIGN;
+ state = FLOATEXPSIGN;
else if(isdigit(c))
- state=FLOATEXP;
+ state = FLOATEXP;
else
- parse_error(c, state);
+ throw parse_error(buf);
break;
case FLOATEXPSIGN:
if(isdigit(c))
- state=FLOATEXP;
+ state = FLOATEXP;
else
- parse_error(c, state);
+ throw parse_error(buf);
break;
case FLOATEXP:
if(!isdigit(c))
- parse_error(c, state);
+ throw parse_error(buf);
break;
case STRING:
if(c=='\\')
- escape=!escape;
- else if(c=='"' && !escape)
- {
- try
- {
- return Token(Token::STRING, c_unescape(buf.substr(1, buf.size()-2)));
- }
- catch(const Exception &e)
- {
- throw ParseError(format("%s: %s", get_location(), e.what()), src, in.get_line_number());
- }
- }
- else
- escape=false;
+ state = STRING_ESCAPE;
+ else if(c=='"')
+ state = STRING_END;
+ break;
+
+ case STRING_ESCAPE:
+ state = STRING;
+ break;
+
+ case STRING_BASE64:
+ if(c=='=')
+ state = STRING_BASE64_PAD;
+ else if(!isalnum(c) && c!='+' && c!='/')
+ throw parse_error(buf);
+ break;
+
+ case STRING_BASE64_PAD:
+ if(c!='=')
+ throw parse_error(buf);
break;
case IDENTIFIER:
- if(!isalpha(c) && !isdigit(c) && c!='_')
- parse_error(c, state);
+ if(!isalpha(c) && !isdigit(c) && c!='_' && c!='-' && c!='/')
+ throw parse_error(buf);
break;
+ case STRING_END:
+ throw parse_error(buf);
+
default:
- throw Exception(get_location()+": Internal error (bad state)");
+ throw logic_error("bad parser state");
}
if(is_delimiter(next) && state>=ACCEPT)
- return Token(token_type[state], buf);
+ {
+ if(state==IDENTIFIER && buf[0]=='\\')
+ return Token(Token::IDENTIFIER, buf.substr(1));
+ else if(state==STRING_END)
+ return Token(Token::STRING, c_unescape(buf.substr(1, buf.size()-2)));
+ else if(state==STRING_BASE64 || state==STRING_BASE64_PAD)
+ return Token(Token::STRING, base64_decode(buf));
+ else
+ return Token(token_type[state], buf);
+ }
}
return Token(Token::SPECIAL, "");
return (c>='0' && c<='7');
}
-string TextParser::get_location()
+string TextParser::base64_decode(const string &data)
{
- ostringstream ss;
- ss<<src<<':'<<in.get_line_number();
- return ss.str();
-}
+ string bin;
+ bin.reserve(data.size()*3/4);
+ unsigned accum = 0;
+ unsigned a_bits = 0;
+ for(char c: data)
+ {
+ unsigned d;
+ if(c>='A' && c<='Z')
+ d = c-'A';
+ else if(c>='a' && c<='z')
+ d = 26+(c-'a');
+ else if(c>='0' && c<='9')
+ d = 52+(c-'0');
+ else if(c=='+')
+ d = 62;
+ else if(c=='/')
+ d = 63;
+ else if(c=='=')
+ continue;
+ else
+ throw invalid_argument("TextParser::base64_decode");
-void TextParser::parse_error(int c, int state)
-{
- throw ParseError(format("%s: Parse error at '%c' (state %d)", get_location(), static_cast<char>(c), state), src, in.get_line_number());
+ accum = (accum<<6)|d;
+ a_bits += 6;
+
+ if(a_bits>=8)
+ {
+ bin += (accum>>(a_bits-8))&0xFF;
+ a_bits -= 8;
+ }
+ }
+
+ return bin;
}
} // namespace DataFile