X-Git-Url: http://git.tdb.fi/?a=blobdiff_plain;f=source%2Ftextparser.cpp;h=0f17d3f3cc79808a7659e291e406f6b7bb814137;hb=256f7238bc60d6dcc31a564988f5cc02a60c4537;hp=298b7cac363ebd2b75a712b2ea41d18dd17b3801;hpb=27630d44298cb67e075c166f4421288cc8ca117e;p=libs%2Fdatafile.git diff --git a/source/textparser.cpp b/source/textparser.cpp index 298b7ca..0f17d3f 100644 --- a/source/textparser.cpp +++ b/source/textparser.cpp @@ -1,11 +1,12 @@ /* $Id$ This file is part of libmspdatafile -Copyright © 2007 Mikko Rasa, Mikkosoft Productions +Copyright © 2007-2008 Mikko Rasa, Mikkosoft Productions Distributed under the LGPL */ #include +#include #include "input.h" #include "textparser.h" #include "token.h" @@ -46,7 +47,7 @@ Statement TextParser::parse_statement(const Token *t) if(token.str.empty()) break; else if(token.type!=Token::IDENTIFIER) - throw ParseError(format("%s: Syntax error at token '%s' (expected an identifier)", get_location(), token.str), src, in.get_line_number()); + throw_at(ParseError(format("Syntax error at token '%s' (expected an identifier)", token.str)), get_location()); result.keyword=token.str; result.valid=true; result.source=src; @@ -68,7 +69,7 @@ Statement TextParser::parse_statement(const Token *t) else if(finish) { if(token.str!=";") - throw ParseError(format("%s: Syntax error at token '%s' (Expected a ';')", get_location(), token.str), src, in.get_line_number()); + throw_at(ParseError(format("Syntax error at token '%s' (Expected a ';')", token.str)), get_location()); break; } else if(token.str=="{") @@ -92,9 +93,9 @@ Statement TextParser::parse_statement(const Token *t) //result.args.push_back(resolve_identifiertoken.str); } else if(token.str=="") - throw ParseError(src+": Unexcepted end of input", src, in.get_line_number()); + throw_at(ParseError("Unexcepted end of input"), get_location()); else - throw ParseError(get_location()+": Syntax error", src, in.get_line_number()); + throw_at(ParseError("Syntax error"), get_location()); } return result; @@ -103,10 +104,10 @@ Statement TextParser::parse_statement(const Token *t) Token TextParser::parse_token() { int c=0; - unsigned comment=0; + int comment=0; // Skip over comments and whitespace - while(in) + while(in && comment>=0) { c=in.get(); int next=in.peek(); @@ -122,11 +123,13 @@ Token TextParser::parse_token() else if(comment==3) // Skip the second character of block comment end comment=0; else if(!isspace(c) && !comment) - break; + comment=-1; } - if(comment) // Didn't hit any non-whitespace - throw ParseError(src+": Unfinished comment at end of input", src, in.get_line_number()); + if(comment>0) // EOF while in comment + throw_at(ParseError("Unfinished comment at end of input"), get_location()); + else if(comment==0) // Didn't hit any non-whitespace + return Token(Token::SPECIAL, ""); enum ParseState { @@ -166,7 +169,7 @@ Token TextParser::parse_token() string buf; bool escape=false; - while(in) + while(in || state==INIT) { if(state!=INIT) c=in.get(); @@ -192,7 +195,7 @@ Token TextParser::parse_token() else if(isalpha(c) || c=='_') state=IDENTIFIER; else - parse_error(c, state); + parse_error(c, "0-9A-Za-z_.\"{};+-"); break; case SIGN: @@ -203,7 +206,7 @@ Token TextParser::parse_token() else if(c=='.') state=FLOAT; else - parse_error(c, state); + parse_error(c, "0-9."); break; case ZERO: @@ -214,31 +217,31 @@ Token TextParser::parse_token() else if(c=='.') state=FLOAT; else - parse_error(c, state); + parse_error(c, "0-9A-Fa-f."); break; case DECIMAL: if(c=='.') state=FLOAT; else if(!isdigit(c)) - parse_error(c, state); + parse_error(c, "0-9."); break; case HEXADECIMAL: if(!isxdigit(c)) - parse_error(c, state); + parse_error(c, "0-9A-Fa-f"); break; case OCTAL: if(!isodigit(c)) - parse_error(c, state); + parse_error(c, "0-7"); break; case FLOAT: if(c=='e' || c=='E') state=FLOATEXPINIT; else if(!isdigit(c)) - parse_error(c, state); + parse_error(c, "0-9Ee"); break; case FLOATEXPINIT: @@ -247,37 +250,47 @@ Token TextParser::parse_token() else if(isdigit(c)) state=FLOATEXP; else - parse_error(c, state); + parse_error(c, "0-9+-"); break; case FLOATEXPSIGN: if(isdigit(c)) state=FLOATEXP; else - parse_error(c, state); + parse_error(c, "0-9"); break; case FLOATEXP: if(!isdigit(c)) - parse_error(c, state); + parse_error(c, "0-9"); break; case STRING: if(c=='\\') escape=!escape; else if(c=='"' && !escape) - return Token(Token::STRING, unescape_string(buf)); + { + try + { + return Token(Token::STRING, c_unescape(buf.substr(1, buf.size()-2))); + } + catch(Exception &e) + { + e.at(get_location()); + throw; + } + } else escape=false; break; case IDENTIFIER: if(!isalpha(c) && !isdigit(c) && c!='_') - parse_error(c, state); + parse_error(c, "0-9A-Za-z_"); break; default: - throw Exception(get_location()+": Internal error (bad state)"); + throw_at(InvalidState("Internal error (bad state)"), get_location()); } if(is_delimiter(next) && state>=ACCEPT) @@ -297,57 +310,6 @@ bool TextParser::isodigit(int c) return (c>='0' && c<='7'); } -string TextParser::unescape_string(const string &str) -{ - string result; - bool escape=false; - unsigned hexcape=0; - for(string::const_iterator i=str.begin()+1; i!=str.end()-1; ++i) - { - if(escape) - { - if(*i=='n') - result+='\n'; - else if(*i=='t') - result+='\t'; - else if(*i=='\\') - result+='\\'; - else if(*i=='"') - result+='"'; - else if(*i=='x') - hexcape=0x100; - else - throw ParseError(format("%s: Invalid escape sequence '\\%c'", get_location(), *i), src, in.get_line_number()); - escape=false; - } - else if(hexcape) - { - unsigned digit=0; - if(*i>='0' && *i<='9') - digit=*i-'0'; - else if(*i>='a' && *i<='f') - digit=*i-'a'+10; - else if(*i>='A' && *i<='F') - digit=*i-'A'+10; - else - throw ParseError(get_location()+": Invalid hex digit", src, in.get_line_number()); - - hexcape=(hexcape<<4)|digit; - if(hexcape&0x10000) - { - result+=hexcape&0xFF; - hexcape=0; - } - } - else if(*i=='\\') - escape=true; - else - result+=*i; - } - - return result; -} - string TextParser::get_location() { ostringstream ss; @@ -355,9 +317,9 @@ string TextParser::get_location() return ss.str(); } -void TextParser::parse_error(int c, int state) +void TextParser::parse_error(int c, const char *e) { - throw ParseError(format("%s: Parse error at '%c' (state %d)", get_location(), static_cast(c), state), src, in.get_line_number()); + throw_at(ParseError(format("Parse error at '%c', expected one of \"%s\"", static_cast(c), e)), get_location()); } } // namespace DataFile