X-Git-Url: http://git.tdb.fi/?a=blobdiff_plain;f=source%2Ftextparser.cpp;h=225425b4e23166eea05fe0697f16974c327ab1c0;hb=6653c7d83dbe1fe81a541a125be8bb808b234eb7;hp=d8e422ee7ff4d4d710a764a62383955f377305f5;hpb=cbd0ddd6ee033e46646bfb85d19232c816ea1eda;p=libs%2Fdatafile.git diff --git a/source/textparser.cpp b/source/textparser.cpp index d8e422e..225425b 100644 --- a/source/textparser.cpp +++ b/source/textparser.cpp @@ -1,11 +1,4 @@ -/* $Id$ - -This file is part of libmspdatafile -Copyright © 2007-2008 Mikko Rasa, Mikkosoft Productions -Distributed under the LGPL -*/ - -#include +#include #include #include "input.h" #include "textparser.h" @@ -16,11 +9,33 @@ using namespace std; namespace Msp { namespace DataFile { +class parse_error: public runtime_error +{ +public: + parse_error(const std::string &t): + runtime_error(t.empty() ? "at end of input" : format("after '%s'", t)) + { } + + virtual ~parse_error() throw() { } +}; + + +class syntax_error: public runtime_error +{ +public: + syntax_error(const std::string &t): + runtime_error(t.empty() ? "at end of input" : format("at '%s'", t)) + { } + + virtual ~syntax_error() throw() { } +}; + + TextParser::TextParser(Input &i, const string &s): ParserMode(i, s) { } -Statement TextParser::parse() +Statement TextParser::parse(bool) { return parse_statement(0); } @@ -28,8 +43,8 @@ Statement TextParser::parse() Statement TextParser::parse_statement(const Token *t) { Statement result; - bool sub = false; - bool finish = false; + bool sub = false; + bool finish = false; while(in) { @@ -47,7 +62,7 @@ Statement TextParser::parse_statement(const Token *t) if(token.str.empty()) break; else if(token.type!=Token::IDENTIFIER) - throw_at(ParseError(format("Syntax error at token '%s' (expected an identifier)", token.str)), get_location()); + throw syntax_error(token.str); result.keyword = token.str; result.valid = true; result.source = src; @@ -69,7 +84,7 @@ Statement TextParser::parse_statement(const Token *t) else if(finish) { if(token.str!=";") - throw_at(ParseError(format("Syntax error at token '%s' (Expected a ';')", token.str)), get_location()); + throw syntax_error(token.str); break; } else if(token.str=="{") @@ -77,25 +92,22 @@ Statement TextParser::parse_statement(const Token *t) else if(token.str==";") break; else if(token.type==Token::INTEGER) - result.args.push_back(Value(INTEGER, token.str)); + result.append(lexical_cast(token.str)); else if(token.type==Token::FLOAT) - result.args.push_back(Value(FLOAT, token.str)); + result.append(lexical_cast(token.str)); else if(token.type==Token::STRING) - result.args.push_back(Value(STRING, token.str)); + result.append(token.str); else if(token.type==Token::IDENTIFIER) { if(token.str=="true") - result.args.push_back(Value(BOOLEAN, "1")); + result.append(true); else if(token.str=="false") - result.args.push_back(Value(BOOLEAN, "0")); + result.append(false); else - result.args.push_back(Value(ENUM, token.str)); - //result.args.push_back(resolve_identifiertoken.str); + result.append(Symbol(token.str)); } - else if(token.str=="") - throw_at(ParseError("Unexcepted end of input"), get_location()); else - throw_at(ParseError("Syntax error"), get_location()); + throw syntax_error(token.str); } return result; @@ -122,12 +134,12 @@ Token TextParser::parse_token() comment = 3; else if(comment==3) // Skip the second character of block comment end comment = 0; - else if(!isspace(c) && !comment) + else if(c!=-1 && !isspace(c) && !comment) comment = -1; } if(comment>0) // EOF while in comment - throw_at(ParseError("Unfinished comment at end of input"), get_location()); + throw parse_error(string()); else if(comment==0) // Didn't hit any non-whitespace return Token(Token::SPECIAL, ""); @@ -145,6 +157,7 @@ Token TextParser::parse_token() OCTAL, FLOAT, FLOATEXP, + STRING_END, IDENTIFIER }; @@ -154,7 +167,7 @@ Token TextParser::parse_token() Token::SPECIAL, Token::SPECIAL, Token::SPECIAL, - Token::STRING, + Token::SPECIAL, Token::SPECIAL, Token::INTEGER, Token::INTEGER, @@ -162,12 +175,13 @@ Token TextParser::parse_token() Token::INTEGER, Token::FLOAT, Token::FLOAT, + Token::STRING, Token::IDENTIFIER }; ParseState state = INIT; - string buf; - bool escape = false; + string buf; + bool escape = false; while(in || state==INIT) { @@ -192,10 +206,10 @@ Token TextParser::parse_token() return Token(Token::SPECIAL, string(1, c)); else if(isdigit(c)) state = DECIMAL; - else if(isalpha(c) || c=='_') + else if(isalpha(c) || c=='_' || c=='\\') state = IDENTIFIER; else - parse_error(c, "0-9A-Za-z_.\"{};+-"); + throw parse_error(buf); break; case SIGN: @@ -206,7 +220,7 @@ Token TextParser::parse_token() else if(c=='.') state = FLOAT; else - parse_error(c, "0-9."); + throw parse_error(buf); break; case ZERO: @@ -217,31 +231,33 @@ Token TextParser::parse_token() else if(c=='.') state = FLOAT; else - parse_error(c, "0-9A-Fa-f."); + throw parse_error(buf); break; case DECIMAL: if(c=='.') state = FLOAT; + else if(c=='e' || c=='E') + state = FLOATEXPINIT; else if(!isdigit(c)) - parse_error(c, "0-9."); + throw parse_error(buf); break; case HEXADECIMAL: if(!isxdigit(c)) - parse_error(c, "0-9A-Fa-f"); + throw parse_error(buf); break; case OCTAL: if(!isodigit(c)) - parse_error(c, "0-7"); + throw parse_error(buf); break; case FLOAT: if(c=='e' || c=='E') state = FLOATEXPINIT; else if(!isdigit(c)) - parse_error(c, "0-9Ee"); + throw parse_error(buf); break; case FLOATEXPINIT: @@ -250,51 +266,51 @@ Token TextParser::parse_token() else if(isdigit(c)) state = FLOATEXP; else - parse_error(c, "0-9+-"); + throw parse_error(buf); break; case FLOATEXPSIGN: if(isdigit(c)) state = FLOATEXP; else - parse_error(c, "0-9"); + throw parse_error(buf); break; case FLOATEXP: if(!isdigit(c)) - parse_error(c, "0-9"); + throw parse_error(buf); break; case STRING: if(c=='\\') escape = !escape; else if(c=='"' && !escape) - { - try - { - return Token(Token::STRING, c_unescape(buf.substr(1, buf.size()-2))); - } - catch(Exception &e) - { - e.at(get_location()); - throw; - } - } + state = STRING_END; else escape = false; break; case IDENTIFIER: - if(!isalpha(c) && !isdigit(c) && c!='_') - parse_error(c, "0-9A-Za-z_"); + if(!isalpha(c) && !isdigit(c) && c!='_' && c!='-' && c!='/') + throw parse_error(buf); break; + case STRING_END: + throw parse_error(buf); + default: - throw_at(InvalidState("Internal error (bad state)"), get_location()); + throw logic_error("bad parser state"); } if(is_delimiter(next) && state>=ACCEPT) - return Token(token_type[state], buf); + { + if(state==IDENTIFIER && buf[0]=='\\') + return Token(Token::IDENTIFIER, buf.substr(1)); + else if(state==STRING_END) + return Token(Token::STRING, c_unescape(buf.substr(1, buf.size()-2))); + else + return Token(token_type[state], buf); + } } return Token(Token::SPECIAL, ""); @@ -310,17 +326,5 @@ bool TextParser::isodigit(int c) return (c>='0' && c<='7'); } -string TextParser::get_location() -{ - ostringstream ss; - ss<(c), e)), get_location()); -} - } // namespace DataFile } // namespace Msp