-/* $Id$
-
-This file is part of libmspdatafile
-Copyright © 2007 Mikko Rasa, Mikkosoft Productions
-Distributed under the LGPL
-*/
-
-#include <msp/strings/formatter.h>
+#include <msp/strings/format.h>
#include <msp/strings/utils.h>
#include "input.h"
#include "textparser.h"
namespace Msp {
namespace DataFile {
+class parse_error: public runtime_error
+{
+public:
+ parse_error(const std::string &t):
+ runtime_error(t.empty() ? "at end of input" : format("after '%s'", t))
+ { }
+
+ virtual ~parse_error() throw() { }
+};
+
+
+class syntax_error: public runtime_error
+{
+public:
+ syntax_error(const std::string &t):
+ runtime_error(t.empty() ? "at end of input" : format("at '%s'", t))
+ { }
+
+ virtual ~syntax_error() throw() { }
+};
+
+
TextParser::TextParser(Input &i, const string &s):
ParserMode(i, s)
{ }
Statement TextParser::parse_statement(const Token *t)
{
Statement result;
- bool sub=false;
- bool finish=false;
+ bool sub = false;
+ bool finish = false;
while(in)
{
Token token;
if(t)
{
- token=*t;
- t=0;
+ token = *t;
+ t = 0;
}
else
- token=parse_token();
+ token = parse_token();
if(result.keyword.empty())
{
if(token.str.empty())
break;
else if(token.type!=Token::IDENTIFIER)
- throw ParseError(format("%s: Syntax error at token '%s' (expected an identifier)", get_location(), token.str), src, in.get_line_number());
- result.keyword=token.str;
- result.valid=true;
- result.source=src;
- result.line=in.get_line_number();
+ throw syntax_error(token.str);
+ result.keyword = token.str;
+ result.valid = true;
+ result.source = src;
+ result.line = in.get_line_number();
}
else if(sub)
{
if(token.str=="}")
{
- sub=false;
- finish=true;
+ sub = false;
+ finish = true;
}
else
{
- Statement ss=parse_statement(&token);
+ Statement ss = parse_statement(&token);
result.sub.push_back(ss);
}
}
else if(finish)
{
if(token.str!=";")
- throw ParseError(format("%s: Syntax error at token '%s' (Expected a ';')", get_location(), token.str), src, in.get_line_number());
+ throw syntax_error(token.str);
break;
}
else if(token.str=="{")
- sub=true;
+ sub = true;
else if(token.str==";")
break;
else if(token.type==Token::INTEGER)
- result.args.push_back(Value(INTEGER, token.str));
+ result.append(lexical_cast<IntType::Store>(token.str));
else if(token.type==Token::FLOAT)
- result.args.push_back(Value(FLOAT, token.str));
+ result.append(lexical_cast<FloatType::Store>(token.str));
else if(token.type==Token::STRING)
- result.args.push_back(Value(STRING, token.str));
+ result.append(token.str);
else if(token.type==Token::IDENTIFIER)
{
if(token.str=="true")
- result.args.push_back(Value(BOOLEAN, "1"));
+ result.append(true);
else if(token.str=="false")
- result.args.push_back(Value(BOOLEAN, "0"));
+ result.append(false);
else
- result.args.push_back(Value(ENUM, token.str));
- //result.args.push_back(resolve_identifiertoken.str);
+ result.append(Symbol(token.str));
}
- else if(token.str=="")
- throw ParseError(src+": Unexcepted end of input", src, in.get_line_number());
else
- throw ParseError(get_location()+": Syntax error", src, in.get_line_number());
+ throw syntax_error(token.str);
}
return result;
Token TextParser::parse_token()
{
- int c=0;
- unsigned comment=0;
+ int c = 0;
+ int comment = 0;
// Skip over comments and whitespace
- while(in)
+ while(in && comment>=0)
{
- c=in.get();
- int next=in.peek();
+ c = in.get();
+ int next = in.peek();
if(c=='/' && next=='/')
- comment=1;
+ comment = 1;
else if(c=='/' && next=='*')
- comment=2;
+ comment = 2;
else if(c=='\n' && comment==1)
- comment=0;
+ comment = 0;
else if(c=='*' && next=='/' && comment==2)
- comment=3;
+ comment = 3;
else if(comment==3) // Skip the second character of block comment end
- comment=0;
- else if(!isspace(c) && !comment)
- break;
+ comment = 0;
+ else if(c!=-1 && !isspace(c) && !comment)
+ comment = -1;
}
- if(comment) // Didn't hit any non-whitespace
- throw ParseError(src+": Unfinished comment at end of input", src, in.get_line_number());
+ if(comment>0) // EOF while in comment
+ throw parse_error(string());
+ else if(comment==0) // Didn't hit any non-whitespace
+ return Token(Token::SPECIAL, "");
enum ParseState
{
OCTAL,
FLOAT,
FLOATEXP,
+ STRING_END,
IDENTIFIER
};
Token::SPECIAL,
Token::SPECIAL,
Token::SPECIAL,
- Token::STRING,
+ Token::SPECIAL,
Token::SPECIAL,
Token::INTEGER,
Token::INTEGER,
Token::INTEGER,
Token::FLOAT,
Token::FLOAT,
+ Token::STRING,
Token::IDENTIFIER
};
- ParseState state=INIT;
- string buf;
- bool escape=false;
+ ParseState state = INIT;
+ string buf;
+ bool escape = false;
- while(in)
+ while(in || state==INIT)
{
if(state!=INIT)
- c=in.get();
- int next=in.peek();
+ c = in.get();
+ int next = in.peek();
- buf+=c;
+ buf += c;
switch(state)
{
case INIT:
if(c=='0')
- state=ZERO;
+ state = ZERO;
else if(c=='-' || c=='+')
- state=SIGN;
+ state = SIGN;
else if(c=='.')
- state=FLOAT;
+ state = FLOAT;
else if(c=='"')
- state=STRING;
+ state = STRING;
else if(c=='{' || c=='}' || c==';')
return Token(Token::SPECIAL, string(1, c));
else if(isdigit(c))
- state=DECIMAL;
- else if(isalpha(c) || c=='_')
- state=IDENTIFIER;
+ state = DECIMAL;
+ else if(isalpha(c) || c=='_' || c=='\\')
+ state = IDENTIFIER;
else
- parse_error(c, state);
+ throw parse_error(buf);
break;
case SIGN:
if(c=='0')
- state=ZERO;
+ state = ZERO;
else if(isdigit(c))
- state=DECIMAL;
+ state = DECIMAL;
else if(c=='.')
- state=FLOAT;
+ state = FLOAT;
else
- parse_error(c, state);
+ throw parse_error(buf);
break;
case ZERO:
if(c=='x')
- state=HEXADECIMAL;
+ state = HEXADECIMAL;
else if(isdigit(c))
- state=OCTAL;
+ state = OCTAL;
else if(c=='.')
- state=FLOAT;
+ state = FLOAT;
else
- parse_error(c, state);
+ throw parse_error(buf);
break;
case DECIMAL:
if(c=='.')
- state=FLOAT;
+ state = FLOAT;
else if(!isdigit(c))
- parse_error(c, state);
+ throw parse_error(buf);
break;
case HEXADECIMAL:
if(!isxdigit(c))
- parse_error(c, state);
+ throw parse_error(buf);
break;
case OCTAL:
if(!isodigit(c))
- parse_error(c, state);
+ throw parse_error(buf);
break;
case FLOAT:
if(c=='e' || c=='E')
- state=FLOATEXPINIT;
+ state = FLOATEXPINIT;
else if(!isdigit(c))
- parse_error(c, state);
+ throw parse_error(buf);
break;
case FLOATEXPINIT:
if(c=='+' || c=='-')
- state=FLOATEXPSIGN;
+ state = FLOATEXPSIGN;
else if(isdigit(c))
- state=FLOATEXP;
+ state = FLOATEXP;
else
- parse_error(c, state);
+ throw parse_error(buf);
break;
case FLOATEXPSIGN:
if(isdigit(c))
- state=FLOATEXP;
+ state = FLOATEXP;
else
- parse_error(c, state);
+ throw parse_error(buf);
break;
case FLOATEXP:
if(!isdigit(c))
- parse_error(c, state);
+ throw parse_error(buf);
break;
case STRING:
if(c=='\\')
- escape=!escape;
+ escape = !escape;
else if(c=='"' && !escape)
- {
- try
- {
- return Token(Token::STRING, c_unescape(buf.substr(1, buf.size()-2)));
- }
- catch(const Exception &e)
- {
- throw ParseError(format("%s: %s", get_location(), e.what()), src, in.get_line_number());
- }
- }
+ state = STRING_END;
else
- escape=false;
+ escape = false;
break;
case IDENTIFIER:
- if(!isalpha(c) && !isdigit(c) && c!='_')
- parse_error(c, state);
+ if(!isalpha(c) && !isdigit(c) && c!='_' && c!='-' && c!='/')
+ throw parse_error(buf);
break;
+ case STRING_END:
+ throw parse_error(buf);
+
default:
- throw Exception(get_location()+": Internal error (bad state)");
+ throw logic_error("bad parser state");
}
if(is_delimiter(next) && state>=ACCEPT)
- return Token(token_type[state], buf);
+ {
+ if(state==IDENTIFIER && buf[0]=='\\')
+ return Token(Token::IDENTIFIER, buf.substr(1));
+ else if(state==STRING_END)
+ return Token(Token::STRING, c_unescape(buf.substr(1, buf.size()-2)));
+ else
+ return Token(token_type[state], buf);
+ }
}
return Token(Token::SPECIAL, "");
return (c>='0' && c<='7');
}
-string TextParser::get_location()
-{
- ostringstream ss;
- ss<<src<<':'<<in.get_line_number();
- return ss.str();
-}
-
-void TextParser::parse_error(int c, int state)
-{
- throw ParseError(format("%s: Parse error at '%c' (state %d)", get_location(), static_cast<char>(c), state), src, in.get_line_number());
-}
-
} // namespace DataFile
} // namespace Msp