X-Git-Url: http://git.tdb.fi/?a=blobdiff_plain;f=source%2Ftextparser.cpp;h=05521a90456a617e2e82b1fb664225d512a22af4;hb=29fafaa2c570b0cf92f41eeb534cfb65a841a892;hp=298b7cac363ebd2b75a712b2ea41d18dd17b3801;hpb=27630d44298cb67e075c166f4421288cc8ca117e;p=libs%2Fdatafile.git diff --git a/source/textparser.cpp b/source/textparser.cpp index 298b7ca..05521a9 100644 --- a/source/textparser.cpp +++ b/source/textparser.cpp @@ -1,11 +1,5 @@ -/* $Id$ - -This file is part of libmspdatafile -Copyright © 2007 Mikko Rasa, Mikkosoft Productions -Distributed under the LGPL -*/ - -#include +#include +#include #include "input.h" #include "textparser.h" #include "token.h" @@ -15,6 +9,28 @@ using namespace std; namespace Msp { namespace DataFile { +class parse_error: public runtime_error +{ +public: + parse_error(const std::string &t): + runtime_error(t.empty() ? "at end of input" : format("after '%s'", t)) + { } + + virtual ~parse_error() throw() { } +}; + + +class syntax_error: public runtime_error +{ +public: + syntax_error(const std::string &t): + runtime_error(t.empty() ? "at end of input" : format("at '%s'", t)) + { } + + virtual ~syntax_error() throw() { } +}; + + TextParser::TextParser(Input &i, const string &s): ParserMode(i, s) { } @@ -27,74 +43,71 @@ Statement TextParser::parse() Statement TextParser::parse_statement(const Token *t) { Statement result; - bool sub=false; - bool finish=false; + bool sub = false; + bool finish = false; while(in) { Token token; if(t) { - token=*t; - t=0; + token = *t; + t = 0; } else - token=parse_token(); + token = parse_token(); if(result.keyword.empty()) { if(token.str.empty()) break; else if(token.type!=Token::IDENTIFIER) - throw ParseError(format("%s: Syntax error at token '%s' (expected an identifier)", get_location(), token.str), src, in.get_line_number()); - result.keyword=token.str; - result.valid=true; - result.source=src; - result.line=in.get_line_number(); + throw syntax_error(token.str); + result.keyword = token.str; + result.valid = true; + result.source = src; + result.line = in.get_line_number(); } else if(sub) { if(token.str=="}") { - sub=false; - finish=true; + sub = false; + finish = true; } else { - Statement ss=parse_statement(&token); + Statement ss = parse_statement(&token); result.sub.push_back(ss); } } else if(finish) { if(token.str!=";") - throw ParseError(format("%s: Syntax error at token '%s' (Expected a ';')", get_location(), token.str), src, in.get_line_number()); + throw syntax_error(token.str); break; } else if(token.str=="{") - sub=true; + sub = true; else if(token.str==";") break; else if(token.type==Token::INTEGER) - result.args.push_back(Value(INTEGER, token.str)); + result.append(lexical_cast(token.str)); else if(token.type==Token::FLOAT) - result.args.push_back(Value(FLOAT, token.str)); + result.append(lexical_cast(token.str)); else if(token.type==Token::STRING) - result.args.push_back(Value(STRING, token.str)); + result.append(token.str); else if(token.type==Token::IDENTIFIER) { if(token.str=="true") - result.args.push_back(Value(BOOLEAN, "1")); + result.append(true); else if(token.str=="false") - result.args.push_back(Value(BOOLEAN, "0")); + result.append(false); else - result.args.push_back(Value(ENUM, token.str)); - //result.args.push_back(resolve_identifiertoken.str); + result.append(Symbol(token.str)); } - else if(token.str=="") - throw ParseError(src+": Unexcepted end of input", src, in.get_line_number()); else - throw ParseError(get_location()+": Syntax error", src, in.get_line_number()); + throw syntax_error(token.str); } return result; @@ -102,31 +115,33 @@ Statement TextParser::parse_statement(const Token *t) Token TextParser::parse_token() { - int c=0; - unsigned comment=0; + int c = 0; + int comment = 0; // Skip over comments and whitespace - while(in) + while(in && comment>=0) { - c=in.get(); - int next=in.peek(); + c = in.get(); + int next = in.peek(); - if(c=='/' && next=='/') - comment=1; - else if(c=='/' && next=='*') - comment=2; + if(c=='/' && next=='/' && !comment) + comment = 1; + else if(c=='/' && next=='*' && !comment) + comment = 2; else if(c=='\n' && comment==1) - comment=0; + comment = 0; else if(c=='*' && next=='/' && comment==2) - comment=3; + comment = 3; else if(comment==3) // Skip the second character of block comment end - comment=0; - else if(!isspace(c) && !comment) - break; + comment = 0; + else if(c!=-1 && !isspace(c) && !comment) + comment = -1; } - if(comment) // Didn't hit any non-whitespace - throw ParseError(src+": Unfinished comment at end of input", src, in.get_line_number()); + if(comment>0) // EOF while in comment + throw parse_error(string()); + else if(comment==0) // Didn't hit any non-whitespace + return Token(Token::SPECIAL, ""); enum ParseState { @@ -142,6 +157,7 @@ Token TextParser::parse_token() OCTAL, FLOAT, FLOATEXP, + STRING_END, IDENTIFIER }; @@ -151,7 +167,7 @@ Token TextParser::parse_token() Token::SPECIAL, Token::SPECIAL, Token::SPECIAL, - Token::STRING, + Token::SPECIAL, Token::SPECIAL, Token::INTEGER, Token::INTEGER, @@ -159,129 +175,142 @@ Token TextParser::parse_token() Token::INTEGER, Token::FLOAT, Token::FLOAT, + Token::STRING, Token::IDENTIFIER }; - ParseState state=INIT; - string buf; - bool escape=false; + ParseState state = INIT; + string buf; + bool escape = false; - while(in) + while(in || state==INIT) { if(state!=INIT) - c=in.get(); - int next=in.peek(); + c = in.get(); + int next = in.peek(); - buf+=c; + buf += c; switch(state) { case INIT: if(c=='0') - state=ZERO; + state = ZERO; else if(c=='-' || c=='+') - state=SIGN; + state = SIGN; else if(c=='.') - state=FLOAT; + state = FLOAT; else if(c=='"') - state=STRING; + state = STRING; else if(c=='{' || c=='}' || c==';') return Token(Token::SPECIAL, string(1, c)); else if(isdigit(c)) - state=DECIMAL; - else if(isalpha(c) || c=='_') - state=IDENTIFIER; + state = DECIMAL; + else if(isalpha(c) || c=='_' || c=='\\') + state = IDENTIFIER; else - parse_error(c, state); + throw parse_error(buf); break; case SIGN: if(c=='0') - state=ZERO; + state = ZERO; else if(isdigit(c)) - state=DECIMAL; + state = DECIMAL; else if(c=='.') - state=FLOAT; + state = FLOAT; else - parse_error(c, state); + throw parse_error(buf); break; case ZERO: if(c=='x') - state=HEXADECIMAL; + state = HEXADECIMAL; else if(isdigit(c)) - state=OCTAL; + state = OCTAL; else if(c=='.') - state=FLOAT; + state = FLOAT; else - parse_error(c, state); + throw parse_error(buf); break; case DECIMAL: if(c=='.') - state=FLOAT; + state = FLOAT; + else if(c=='e' || c=='E') + state = FLOATEXPINIT; else if(!isdigit(c)) - parse_error(c, state); + throw parse_error(buf); break; case HEXADECIMAL: if(!isxdigit(c)) - parse_error(c, state); + throw parse_error(buf); break; case OCTAL: if(!isodigit(c)) - parse_error(c, state); + throw parse_error(buf); break; case FLOAT: if(c=='e' || c=='E') - state=FLOATEXPINIT; + state = FLOATEXPINIT; else if(!isdigit(c)) - parse_error(c, state); + throw parse_error(buf); break; case FLOATEXPINIT: if(c=='+' || c=='-') - state=FLOATEXPSIGN; + state = FLOATEXPSIGN; else if(isdigit(c)) - state=FLOATEXP; + state = FLOATEXP; else - parse_error(c, state); + throw parse_error(buf); break; case FLOATEXPSIGN: if(isdigit(c)) - state=FLOATEXP; + state = FLOATEXP; else - parse_error(c, state); + throw parse_error(buf); break; case FLOATEXP: if(!isdigit(c)) - parse_error(c, state); + throw parse_error(buf); break; case STRING: if(c=='\\') - escape=!escape; + escape = !escape; else if(c=='"' && !escape) - return Token(Token::STRING, unescape_string(buf)); + state = STRING_END; else - escape=false; + escape = false; break; case IDENTIFIER: - if(!isalpha(c) && !isdigit(c) && c!='_') - parse_error(c, state); + if(!isalpha(c) && !isdigit(c) && c!='_' && c!='-' && c!='/') + throw parse_error(buf); break; + case STRING_END: + throw parse_error(buf); + default: - throw Exception(get_location()+": Internal error (bad state)"); + throw logic_error("bad parser state"); } if(is_delimiter(next) && state>=ACCEPT) - return Token(token_type[state], buf); + { + if(state==IDENTIFIER && buf[0]=='\\') + return Token(Token::IDENTIFIER, buf.substr(1)); + else if(state==STRING_END) + return Token(Token::STRING, c_unescape(buf.substr(1, buf.size()-2))); + else + return Token(token_type[state], buf); + } } return Token(Token::SPECIAL, ""); @@ -297,68 +326,5 @@ bool TextParser::isodigit(int c) return (c>='0' && c<='7'); } -string TextParser::unescape_string(const string &str) -{ - string result; - bool escape=false; - unsigned hexcape=0; - for(string::const_iterator i=str.begin()+1; i!=str.end()-1; ++i) - { - if(escape) - { - if(*i=='n') - result+='\n'; - else if(*i=='t') - result+='\t'; - else if(*i=='\\') - result+='\\'; - else if(*i=='"') - result+='"'; - else if(*i=='x') - hexcape=0x100; - else - throw ParseError(format("%s: Invalid escape sequence '\\%c'", get_location(), *i), src, in.get_line_number()); - escape=false; - } - else if(hexcape) - { - unsigned digit=0; - if(*i>='0' && *i<='9') - digit=*i-'0'; - else if(*i>='a' && *i<='f') - digit=*i-'a'+10; - else if(*i>='A' && *i<='F') - digit=*i-'A'+10; - else - throw ParseError(get_location()+": Invalid hex digit", src, in.get_line_number()); - - hexcape=(hexcape<<4)|digit; - if(hexcape&0x10000) - { - result+=hexcape&0xFF; - hexcape=0; - } - } - else if(*i=='\\') - escape=true; - else - result+=*i; - } - - return result; -} - -string TextParser::get_location() -{ - ostringstream ss; - ss<(c), state), src, in.get_line_number()); -} - } // namespace DataFile } // namespace Msp