+/* $Id$
+
+This file is part of libmspdatafile
+Copyright © 2007 Mikko Rasa, Mikkosoft Productions
+Distributed under the LGPL
+*/
+
+#include <msp/strings/formatter.h>
+#include "input.h"
+#include "textparser.h"
+#include "token.h"
+
+using namespace std;
+
+namespace Msp {
+namespace DataFile {
+
+TextParser::TextParser(Input &i, const string &s):
+ ParserMode(i, s)
+{ }
+
+Statement TextParser::parse()
+{
+ return parse_statement(0);
+}
+
+Statement TextParser::parse_statement(const Token *t)
+{
+ Statement result;
+ bool sub=false;
+ bool finish=false;
+
+ while(in)
+ {
+ Token token;
+ if(t)
+ {
+ token=*t;
+ t=0;
+ }
+ else
+ token=parse_token();
+
+ if(result.keyword.empty())
+ {
+ if(token.str.empty())
+ break;
+ else if(token.type!=Token::IDENTIFIER)
+ throw ParseError(format("%s: Syntax error at token '%s' (expected an identifier)", get_location(), token.str), src, in.get_line_number());
+ result.keyword=token.str;
+ result.valid=true;
+ result.source=src;
+ result.line=in.get_line_number();
+ }
+ else if(sub)
+ {
+ if(token.str=="}")
+ {
+ sub=false;
+ finish=true;
+ }
+ else
+ {
+ Statement ss=parse_statement(&token);
+ result.sub.push_back(ss);
+ }
+ }
+ else if(finish)
+ {
+ if(token.str!=";")
+ throw ParseError(format("%s: Syntax error at token '%s' (Expected a ';')", get_location(), token.str), src, in.get_line_number());
+ break;
+ }
+ else if(token.str=="{")
+ sub=true;
+ else if(token.str==";")
+ break;
+ else if(token.type==Token::INTEGER)
+ result.args.push_back(Value(INTEGER, token.str));
+ else if(token.type==Token::FLOAT)
+ result.args.push_back(Value(FLOAT, token.str));
+ else if(token.type==Token::STRING)
+ result.args.push_back(Value(STRING, token.str));
+ else if(token.type==Token::IDENTIFIER)
+ {
+ if(token.str=="true")
+ result.args.push_back(Value(BOOLEAN, "1"));
+ else if(token.str=="false")
+ result.args.push_back(Value(BOOLEAN, "0"));
+ else
+ result.args.push_back(Value(ENUM, token.str));
+ //result.args.push_back(resolve_identifiertoken.str);
+ }
+ else if(token.str=="")
+ throw ParseError(src+": Unexcepted end of input", src, in.get_line_number());
+ else
+ throw ParseError(get_location()+": Syntax error", src, in.get_line_number());
+ }
+
+ return result;
+}
+
+Token TextParser::parse_token()
+{
+ int c=0;
+ unsigned comment=0;
+
+ // Skip over comments and whitespace
+ while(in)
+ {
+ c=in.get();
+ int next=in.peek();
+
+ if(c=='/' && next=='/')
+ comment=1;
+ else if(c=='/' && next=='*')
+ comment=2;
+ else if(c=='\n' && comment==1)
+ comment=0;
+ else if(c=='*' && next=='/' && comment==2)
+ comment=3;
+ else if(comment==3) // Skip the second character of block comment end
+ comment=0;
+ else if(!isspace(c) && !comment)
+ break;
+ }
+
+ if(comment) // Didn't hit any non-whitespace
+ throw ParseError(src+": Unfinished comment at end of input", src, in.get_line_number());
+
+ enum ParseState
+ {
+ INIT,
+ SIGN,
+ FLOATEXPINIT,
+ FLOATEXPSIGN,
+ STRING,
+ ACCEPT,
+ ZERO,
+ DECIMAL,
+ HEXADECIMAL,
+ OCTAL,
+ FLOAT,
+ FLOATEXP,
+ IDENTIFIER
+ };
+
+ static Token::Type token_type[]=
+ {
+ Token::SPECIAL,
+ Token::SPECIAL,
+ Token::SPECIAL,
+ Token::SPECIAL,
+ Token::STRING,
+ Token::SPECIAL,
+ Token::INTEGER,
+ Token::INTEGER,
+ Token::INTEGER,
+ Token::INTEGER,
+ Token::FLOAT,
+ Token::FLOAT,
+ Token::IDENTIFIER
+ };
+
+ ParseState state=INIT;
+ string buf;
+ bool escape=false;
+
+ while(in)
+ {
+ if(state!=INIT)
+ c=in.get();
+ int next=in.peek();
+
+ buf+=c;
+
+ switch(state)
+ {
+ case INIT:
+ if(c=='0')
+ state=ZERO;
+ else if(c=='-' || c=='+')
+ state=SIGN;
+ else if(c=='.')
+ state=FLOAT;
+ else if(c=='"')
+ state=STRING;
+ else if(c=='{' || c=='}' || c==';')
+ return Token(Token::SPECIAL, string(1, c));
+ else if(isdigit(c))
+ state=DECIMAL;
+ else if(isalpha(c) || c=='_')
+ state=IDENTIFIER;
+ else
+ parse_error(c, state);
+ break;
+
+ case SIGN:
+ if(c=='0')
+ state=ZERO;
+ else if(isdigit(c))
+ state=DECIMAL;
+ else if(c=='.')
+ state=FLOAT;
+ else
+ parse_error(c, state);
+ break;
+
+ case ZERO:
+ if(c=='x')
+ state=HEXADECIMAL;
+ else if(isdigit(c))
+ state=OCTAL;
+ else if(c=='.')
+ state=FLOAT;
+ else
+ parse_error(c, state);
+ break;
+
+ case DECIMAL:
+ if(c=='.')
+ state=FLOAT;
+ else if(!isdigit(c))
+ parse_error(c, state);
+ break;
+
+ case HEXADECIMAL:
+ if(!isxdigit(c))
+ parse_error(c, state);
+ break;
+
+ case OCTAL:
+ if(!isodigit(c))
+ parse_error(c, state);
+ break;
+
+ case FLOAT:
+ if(c=='e' || c=='E')
+ state=FLOATEXPINIT;
+ else if(!isdigit(c))
+ parse_error(c, state);
+ break;
+
+ case FLOATEXPINIT:
+ if(c=='+' || c=='-')
+ state=FLOATEXPSIGN;
+ else if(isdigit(c))
+ state=FLOATEXP;
+ else
+ parse_error(c, state);
+ break;
+
+ case FLOATEXPSIGN:
+ if(isdigit(c))
+ state=FLOATEXP;
+ else
+ parse_error(c, state);
+ break;
+
+ case FLOATEXP:
+ if(!isdigit(c))
+ parse_error(c, state);
+ break;
+
+ case STRING:
+ if(c=='\\')
+ escape=!escape;
+ else if(c=='"' && !escape)
+ return Token(Token::STRING, unescape_string(buf));
+ else
+ escape=false;
+ break;
+
+ case IDENTIFIER:
+ if(!isalpha(c) && !isdigit(c) && c!='_')
+ parse_error(c, state);
+ break;
+
+ default:
+ throw Exception(get_location()+": Internal error (bad state)");
+ }
+
+ if(is_delimiter(next) && state>=ACCEPT)
+ return Token(token_type[state], buf);
+ }
+
+ return Token(Token::SPECIAL, "");
+}
+
+bool TextParser::is_delimiter(int c)
+{
+ return (isspace(c) || c=='{' || c=='}' || c==';' || c=='/');
+}
+
+bool TextParser::isodigit(int c)
+{
+ return (c>='0' && c<='7');
+}
+
+string TextParser::unescape_string(const string &str)
+{
+ string result;
+ bool escape=false;
+ unsigned hexcape=0;
+ for(string::const_iterator i=str.begin()+1; i!=str.end()-1; ++i)
+ {
+ if(escape)
+ {
+ if(*i=='n')
+ result+='\n';
+ else if(*i=='t')
+ result+='\t';
+ else if(*i=='\\')
+ result+='\\';
+ else if(*i=='"')
+ result+='"';
+ else if(*i=='x')
+ hexcape=0x100;
+ else
+ throw ParseError(format("%s: Invalid escape sequence '\\%c'", get_location(), *i), src, in.get_line_number());
+ escape=false;
+ }
+ else if(hexcape)
+ {
+ unsigned digit=0;
+ if(*i>='0' && *i<='9')
+ digit=*i-'0';
+ else if(*i>='a' && *i<='f')
+ digit=*i-'a'+10;
+ else if(*i>='A' && *i<='F')
+ digit=*i-'A'+10;
+ else
+ throw ParseError(get_location()+": Invalid hex digit", src, in.get_line_number());
+
+ hexcape=(hexcape<<4)|digit;
+ if(hexcape&0x10000)
+ {
+ result+=hexcape&0xFF;
+ hexcape=0;
+ }
+ }
+ else if(*i=='\\')
+ escape=true;
+ else
+ result+=*i;
+ }
+
+ return result;
+}
+
+string TextParser::get_location()
+{
+ ostringstream ss;
+ ss<<src<<':'<<in.get_line_number();
+ return ss.str();
+}
+
+void TextParser::parse_error(int c, int state)
+{
+ throw ParseError(format("%s: Parse error at '%c' (state %d)", get_location(), static_cast<char>(c), state), src, in.get_line_number());
+}
+
+} // namespace DataFile
+} // namespace Msp