X-Git-Url: http://git.tdb.fi/?a=blobdiff_plain;f=source%2Ftextparser.cpp;h=b8515a9b487fa2749d78c5c41a734c2f00c1b70f;hb=256b44a5009467171af53316141277027bcc0ba4;hp=225425b4e23166eea05fe0697f16974c327ab1c0;hpb=6653c7d83dbe1fe81a541a125be8bb808b234eb7;p=libs%2Fdatafile.git diff --git a/source/textparser.cpp b/source/textparser.cpp index 225425b..b8515a9 100644 --- a/source/textparser.cpp +++ b/source/textparser.cpp @@ -1,5 +1,6 @@ #include #include +#include "except.h" #include "input.h" #include "textparser.h" #include "token.h" @@ -9,33 +10,11 @@ using namespace std; namespace Msp { namespace DataFile { -class parse_error: public runtime_error -{ -public: - parse_error(const std::string &t): - runtime_error(t.empty() ? "at end of input" : format("after '%s'", t)) - { } - - virtual ~parse_error() throw() { } -}; - - -class syntax_error: public runtime_error -{ -public: - syntax_error(const std::string &t): - runtime_error(t.empty() ? "at end of input" : format("at '%s'", t)) - { } - - virtual ~syntax_error() throw() { } -}; - - TextParser::TextParser(Input &i, const string &s): ParserMode(i, s) { } -Statement TextParser::parse(bool) +Statement TextParser::parse() { return parse_statement(0); } @@ -43,8 +22,7 @@ Statement TextParser::parse(bool) Statement TextParser::parse_statement(const Token *t) { Statement result; - bool sub = false; - bool finish = false; + unsigned sub = 0; while(in) { @@ -68,44 +46,28 @@ Statement TextParser::parse_statement(const Token *t) result.source = src; result.line = in.get_line_number(); } - else if(sub) + else if(sub==1) { if(token.str=="}") - { - sub = false; - finish = true; - } + sub = 2; else { Statement ss = parse_statement(&token); result.sub.push_back(ss); } } - else if(finish) + else if(sub==2) { if(token.str!=";") throw syntax_error(token.str); break; } else if(token.str=="{") - sub = true; + sub = 1; else if(token.str==";") break; - else if(token.type==Token::INTEGER) - result.append(lexical_cast(token.str)); - else if(token.type==Token::FLOAT) - result.append(lexical_cast(token.str)); - else if(token.type==Token::STRING) - result.append(token.str); - else if(token.type==Token::IDENTIFIER) - { - if(token.str=="true") - result.append(true); - else if(token.str=="false") - result.append(false); - else - result.append(Symbol(token.str)); - } + else if(token.type!=Token::SPECIAL) + result.append_from_token(token); else throw syntax_error(token.str); } @@ -124,9 +86,9 @@ Token TextParser::parse_token() c = in.get(); int next = in.peek(); - if(c=='/' && next=='/') + if(c=='/' && next=='/' && !comment) comment = 1; - else if(c=='/' && next=='*') + else if(c=='/' && next=='*' && !comment) comment = 2; else if(c=='\n' && comment==1) comment = 0; @@ -150,6 +112,8 @@ Token TextParser::parse_token() FLOATEXPINIT, FLOATEXPSIGN, STRING, + STRING_ESCAPE, + STRING_BASE64, ACCEPT, ZERO, DECIMAL, @@ -158,6 +122,7 @@ Token TextParser::parse_token() FLOAT, FLOATEXP, STRING_END, + STRING_BASE64_PAD, IDENTIFIER }; @@ -169,6 +134,8 @@ Token TextParser::parse_token() Token::SPECIAL, Token::SPECIAL, Token::SPECIAL, + Token::SPECIAL, + Token::SPECIAL, Token::INTEGER, Token::INTEGER, Token::INTEGER, @@ -176,12 +143,12 @@ Token TextParser::parse_token() Token::FLOAT, Token::FLOAT, Token::STRING, + Token::STRING, Token::IDENTIFIER }; ParseState state = INIT; string buf; - bool escape = false; while(in || state==INIT) { @@ -202,6 +169,8 @@ Token TextParser::parse_token() state = FLOAT; else if(c=='"') state = STRING; + else if(c=='=') + state = STRING_BASE64; else if(c=='{' || c=='}' || c==';') return Token(Token::SPECIAL, string(1, c)); else if(isdigit(c)) @@ -283,11 +252,25 @@ Token TextParser::parse_token() case STRING: if(c=='\\') - escape = !escape; - else if(c=='"' && !escape) + state = STRING_ESCAPE; + else if(c=='"') state = STRING_END; - else - escape = false; + break; + + case STRING_ESCAPE: + state = STRING; + break; + + case STRING_BASE64: + if(c=='=') + state = STRING_BASE64_PAD; + else if(!isalnum(c) && c!='+' && c!='/') + throw parse_error(buf); + break; + + case STRING_BASE64_PAD: + if(c!='=') + throw parse_error(buf); break; case IDENTIFIER: @@ -308,6 +291,8 @@ Token TextParser::parse_token() return Token(Token::IDENTIFIER, buf.substr(1)); else if(state==STRING_END) return Token(Token::STRING, c_unescape(buf.substr(1, buf.size()-2))); + else if(state==STRING_BASE64 || state==STRING_BASE64_PAD) + return Token(Token::STRING, base64_decode(buf)); else return Token(token_type[state], buf); } @@ -326,5 +311,42 @@ bool TextParser::isodigit(int c) return (c>='0' && c<='7'); } +string TextParser::base64_decode(const string &data) +{ + string bin; + bin.reserve(data.size()*3/4); + unsigned accum = 0; + unsigned a_bits = 0; + for(char c: data) + { + unsigned d; + if(c>='A' && c<='Z') + d = c-'A'; + else if(c>='a' && c<='z') + d = 26+(c-'a'); + else if(c>='0' && c<='9') + d = 52+(c-'0'); + else if(c=='+') + d = 62; + else if(c=='/') + d = 63; + else if(c=='=') + continue; + else + throw invalid_argument("TextParser::base64_decode"); + + accum = (accum<<6)|d; + a_bits += 6; + + if(a_bits>=8) + { + bin += (accum>>(a_bits-8))&0xFF; + a_bits -= 8; + } + } + + return bin; +} + } // namespace DataFile } // namespace Msp