X-Git-Url: http://git.tdb.fi/?p=libs%2Fdatafile.git;a=blobdiff_plain;f=source%2Fparser.cpp;h=82ac0069b95d4f22e3d0de1ab0c0b66b04c76423;hp=8fccec2f94c208d93f966ec88f4279eeb133b081;hb=27630d44298cb67e075c166f4421288cc8ca117e;hpb=c4930d8d15a5a248ca921e0ed3f9bca8aa18b322 diff --git a/source/parser.cpp b/source/parser.cpp index 8fccec2..82ac006 100644 --- a/source/parser.cpp +++ b/source/parser.cpp @@ -6,10 +6,10 @@ Distributed under the LGPL */ #include #include -#include "error.h" +#include "binaryparser.h" #include "parser.h" #include "statement.h" -#include "token.h" +#include "textparser.h" using namespace std; @@ -19,9 +19,15 @@ namespace DataFile { Parser::Parser(istream &i, const string &s): in(i), src(s), - good(true) + good(true), + mode(new TextParser(in, src)) { } +Parser::~Parser() +{ + delete mode; +} + Statement Parser::parse() { if(!good) @@ -29,351 +35,28 @@ Statement Parser::parse() try { - return parse_(0); - } - catch(const Exception &e) - { - good=false; - throw; - } -} - -Statement Parser::parse_(const Token *t) -{ - Statement result; - bool sub=false; - bool finish=false; - - while(in) - { - Token token; - if(t) - { - token=*t; - t=0; - } - else - token=parse_token(); - - if(result.keyword.empty()) - { - if(token.str.empty()) - break; - else if(token.type!=Token::IDENTIFIER) - throw ParseError(get_location()+": Syntax error at token '"+token.str+"' (expected an identifier)", src, in.get_line_number()); - result.keyword=token.str; - result.valid=true; - result.source=src; - result.line=in.get_line_number(); - } - else if(sub) + while(1) { - if(token.str=="}") + Statement st=mode->parse(); + if(st.keyword=="__bin") { - sub=false; - finish=true; + delete mode; + mode=new BinaryParser(in, src); } - else + else if(st.keyword=="__text") { - Statement ss=parse_(&token); - result.sub.push_back(ss); + delete mode; + mode=new TextParser(in, src); } - } - else if(finish) - { - if(token.str!=";") - throw ParseError(get_location()+": Syntax error at token '"+token.str+"' (Expected a ';')", src, in.get_line_number()); - break; - } - else if(token.str=="{") - sub=true; - else if(token.str==";") - break; - else if(token.type==Token::INTEGER) - result.args.push_back(Value(Value::INTEGER, token.str)); - else if(token.type==Token::FLOAT) - result.args.push_back(Value(Value::FLOAT, token.str)); - else if(token.type==Token::STRING) - result.args.push_back(Value(Value::STRING, token.str)); - else if(token.type==Token::IDENTIFIER) - { - if(token.str=="true") - result.args.push_back(Value(Value::BOOLEAN, "1")); - else if(token.str=="false") - result.args.push_back(Value(Value::BOOLEAN, "0")); else - result.args.push_back(Value(Value::ENUM, token.str)); - //result.args.push_back(resolve_identifiertoken.str); + return st; } - else if(token.str=="") - throw ParseError(src+": Unexcepted EOF", src, in.get_line_number()); - else - throw ParseError(get_location()+": Syntax error", src, in.get_line_number()); } - - return result; -} - -Token Parser::parse_token() -{ - int c=0; - unsigned comment=0; - - // Skip over comments and whitespace - while(in) - { - c=in.get(); - int next=in.peek(); - - if(c=='/' && next=='/') - comment=1; - else if(c=='/' && next=='*') - comment=2; - else if(c=='\n' && comment==1) - comment=0; - else if(c=='*' && next=='/' && comment==2) - comment=3; - else if(comment==3) // Skip the second character of block comment end - comment=0; - else if(!isspace(c) && !comment) - break; - } - - if(comment) // Didn't hit any non-whitespace - throw ParseError(src+": Unfinished comment", src, in.get_line_number()); - - enum ParseState - { - INIT, - SIGN, - FLOATEXPINIT, - FLOATEXPSIGN, - STRING, - ACCEPT, - ZERO, - DECIMAL, - HEXADECIMAL, - OCTAL, - FLOAT, - FLOATEXP, - IDENTIFIER - }; - - static Token::Type token_type[]= - { - Token::SPECIAL, - Token::SPECIAL, - Token::SPECIAL, - Token::SPECIAL, - Token::STRING, - Token::SPECIAL, - Token::INTEGER, - Token::INTEGER, - Token::INTEGER, - Token::INTEGER, - Token::FLOAT, - Token::FLOAT, - Token::IDENTIFIER - }; - - ParseState state=INIT; - string buf; - bool escape=false; - - while(in) - { - if(state!=INIT) - c=in.get(); - int next=in.peek(); - - buf+=c; - - switch(state) - { - case INIT: - if(c=='0') - state=ZERO; - else if(c=='-' || c=='+') - state=SIGN; - else if(c=='.') - state=FLOAT; - else if(c=='"') - state=STRING; - else if(c=='{' || c=='}' || c==';') - return Token(Token::SPECIAL, string(1, c)); - else if(isdigit(c)) - state=DECIMAL; - else if(isalpha(c)) - state=IDENTIFIER; - else - parse_error(c, state); - break; - - case SIGN: - if(c=='0') - state=ZERO; - else if(isdigit(c)) - state=DECIMAL; - else if(c=='.') - state=FLOAT; - else - parse_error(c, state); - break; - - case ZERO: - if(c=='x') - state=HEXADECIMAL; - else if(isdigit(c)) - state=OCTAL; - else if(c=='.') - state=FLOAT; - else - parse_error(c, state); - break; - - case DECIMAL: - if(c=='.') - state=FLOAT; - else if(!isdigit(c)) - parse_error(c, state); - break; - - case HEXADECIMAL: - if(!isxdigit(c)) - parse_error(c, state); - break; - - case OCTAL: - if(!isodigit(c)) - parse_error(c, state); - break; - - case FLOAT: - if(c=='e' || c=='E') - state=FLOATEXPINIT; - else if(!isdigit(c)) - parse_error(c, state); - break; - - case FLOATEXPINIT: - if(c=='+' || c=='-') - state=FLOATEXPSIGN; - else if(isdigit(c)) - state=FLOATEXP; - else - parse_error(c, state); - break; - - case FLOATEXPSIGN: - if(isdigit(c)) - state=FLOATEXP; - else - parse_error(c, state); - break; - - case FLOATEXP: - if(!isdigit(c)) - parse_error(c, state); - break; - - case STRING: - if(c=='\\') - escape=!escape; - else if(c=='"' && !escape) - return Token(Token::STRING, unescape_string(buf)); - else - escape=false; - break; - - case IDENTIFIER: - if(!isalpha(c) && !isdigit(c) && c!='_') - parse_error(c, state); - break; - - default: - throw Exception(get_location()+": Internal error (bad state)"); - } - - if(is_delimiter(next) && state>=ACCEPT) - return Token(token_type[state], buf); - } - - return Token(Token::SPECIAL, ""); -} - -bool Parser::is_delimiter(int c) -{ - return (isspace(c) || c=='{' || c=='}' || c==';' || c=='/'); -} - -bool Parser::isodigit(int c) -{ - return (c>='0' && c<='7'); -} - -string Parser::unescape_string(const string &str) -{ - string result; - bool escape=false; - unsigned hexcape=0; - for(string::const_iterator i=str.begin()+1; i!=str.end()-1; ++i) + catch(const Exception &e) { - if(escape) - { - if(*i=='n') - result+='\n'; - else if(*i=='t') - result+='\t'; - else if(*i=='\\') - result+='\\'; - else if(*i=='"') - result+='"'; - else if(*i=='x') - hexcape=0x100; - else - throw ParseError("Invalid escape", src, in.get_line_number()); - escape=false; - } - else if(hexcape) - { - unsigned digit=0; - if(*i>='0' && *i<='9') - digit=*i-'0'; - else if(*i>='a' && *i<='f') - digit=*i-'a'+10; - else if(*i>='A' && *i<='F') - digit=*i-'A'+10; - else - throw ParseError("Invalid hex digit", src, in.get_line_number()); - - hexcape=(hexcape<<4)|digit; - if(hexcape&0x10000) - { - result+=hexcape&0xFF; - hexcape=0; - } - } - else if(*i=='\\') - escape=true; - else - result+=*i; + good=false; + throw; } - - return result; -} - -string Parser::get_location() -{ - ostringstream ss; - ss<