-/* $Id$
-
-This file is part of libmspdatafile
-Copyright © 2006 Mikko Rasa, Mikkosoft Productions
-Distributed under the LGPL
-*/
-#include <cctype>
-#include <sstream>
-#include "error.h"
+#include <msp/strings/format.h>
+#include "binaryparser.h"
+#include "dataerror.h"
#include "parser.h"
#include "statement.h"
-#include "token.h"
+#include "textparser.h"
using namespace std;
namespace Msp {
namespace DataFile {
-Parser::Parser(istream &i, const string &s):
+Parser::Parser(IO::Base &i, const string &s):
in(i),
+ main_src(s),
src(s),
- good(true)
+ good(true),
+ mode(new TextParser(in, src))
{ }
-Statement Parser::parse()
+Parser::~Parser()
{
- if(!good)
- throw Exception("Parser is not good");
-
- try
- {
- return parse_(0);
- }
- catch(const Exception &e)
- {
- good=false;
- throw;
- }
+ delete mode;
}
-Statement Parser::parse_(const Token *t)
+Statement Parser::parse()
{
- Statement result;
- bool sub=false;
- bool finish=false;
+ if(!good)
+ throw logic_error("Parser::parse() !good");
- while(in)
+ try
{
- Token token;
- if(t)
+ while(1)
{
- token=*t;
- t=0;
- }
- else
- token=parse_token();
-
- if(result.keyword.empty())
- {
- if(token.str.empty())
- break;
- else if(token.type!=Token::IDENTIFIER)
- throw ParseError(get_location()+": Syntax error at token '"+token.str+"' (expected an identifier)", src, in.get_line_number());
- result.keyword=token.str;
- result.valid=true;
- result.source=src;
- result.line=in.get_line_number();
- }
- else if(sub)
- {
- if(token.str=="}")
+ Statement st = mode->parse();
+ if(st.keyword=="__bin")
{
- sub=false;
- finish=true;
+ delete mode;
+ mode = new BinaryParser(in, src);
}
- else
+ else if(st.keyword=="__text")
{
- Statement ss=parse_(&token);
- result.sub.push_back(ss);
+ delete mode;
+ mode = new TextParser(in, src);
+ }
+ else if(st.keyword=="__src")
+ {
+ string s = st.args[0].get<string>();
+ if(s.empty())
+ src = main_src;
+ else
+ src = format("%s[%s]", main_src, s);
}
- }
- else if(finish)
- {
- if(token.str!=";")
- throw ParseError(get_location()+": Syntax error at token '"+token.str+"' (Expected a ';')", src, in.get_line_number());
- break;
- }
- else if(token.str=="{")
- sub=true;
- else if(token.str==";")
- break;
- else if(token.type==Token::INTEGER)
- result.args.push_back(Value(Value::INTEGER, token.str));
- else if(token.type==Token::FLOAT)
- result.args.push_back(Value(Value::FLOAT, token.str));
- else if(token.type==Token::STRING)
- result.args.push_back(Value(Value::STRING, token.str));
- else if(token.type==Token::IDENTIFIER)
- {
- if(token.str=="true")
- result.args.push_back(Value(Value::BOOLEAN, "1"));
- else if(token.str=="false")
- result.args.push_back(Value(Value::BOOLEAN, "0"));
- else
- result.args.push_back(Value(Value::ENUM, token.str));
- //result.args.push_back(resolve_identifiertoken.str);
- }
- else if(token.str=="")
- throw ParseError(src+": Unexcepted EOF", src, in.get_line_number());
- else
- throw ParseError(get_location()+": Syntax error", src, in.get_line_number());
- }
-
- return result;
-}
-
-Token Parser::parse_token()
-{
- int c=0;
- unsigned comment=0;
-
- // Skip over comments and whitespace
- while(in)
- {
- c=in.get();
- int next=in.peek();
-
- if(c=='/' && next=='/')
- comment=1;
- else if(c=='/' && next=='*')
- comment=2;
- else if(c=='\n' && comment==1)
- comment=0;
- else if(c=='*' && next=='/' && comment==2)
- comment=3;
- else if(comment==3) // Skip the second character of block comment end
- comment=0;
- else if(!isspace(c) && !comment)
- break;
- }
-
- if(comment) // Didn't hit any non-whitespace
- throw ParseError(src+": Unfinished comment", src, in.get_line_number());
-
- enum ParseState
- {
- INIT,
- SIGN,
- FLOATEXPINIT,
- FLOATEXPSIGN,
- STRING,
- ACCEPT,
- ZERO,
- DECIMAL,
- HEXADECIMAL,
- OCTAL,
- FLOAT,
- FLOATEXP,
- IDENTIFIER
- };
-
- static Token::Type token_type[]=
- {
- Token::SPECIAL,
- Token::SPECIAL,
- Token::SPECIAL,
- Token::SPECIAL,
- Token::STRING,
- Token::SPECIAL,
- Token::INTEGER,
- Token::INTEGER,
- Token::INTEGER,
- Token::INTEGER,
- Token::FLOAT,
- Token::FLOAT,
- Token::IDENTIFIER
- };
-
- ParseState state=INIT;
- string buf;
- bool escape=false;
-
- while(in)
- {
- if(state!=INIT)
- c=in.get();
- int next=in.peek();
-
- buf+=c;
-
- switch(state)
- {
- case INIT:
- if(c=='0')
- state=ZERO;
- else if(c=='-' || c=='+')
- state=SIGN;
- else if(c=='.')
- state=FLOAT;
- else if(c=='"')
- state=STRING;
- else if(c=='{' || c=='}' || c==';')
- return Token(Token::SPECIAL, string(1, c));
- else if(isdigit(c))
- state=DECIMAL;
- else if(isalpha(c))
- state=IDENTIFIER;
- else
- parse_error(c, state);
- break;
-
- case SIGN:
- if(c=='0')
- state=ZERO;
- else if(isdigit(c))
- state=DECIMAL;
- else if(c=='.')
- state=FLOAT;
- else
- parse_error(c, state);
- break;
-
- case ZERO:
- if(c=='x')
- state=HEXADECIMAL;
- else if(isdigit(c))
- state=OCTAL;
- else if(c=='.')
- state=FLOAT;
- else
- parse_error(c, state);
- break;
-
- case DECIMAL:
- if(c=='.')
- state=FLOAT;
- else if(!isdigit(c))
- parse_error(c, state);
- break;
-
- case HEXADECIMAL:
- if(!isxdigit(c))
- parse_error(c, state);
- break;
-
- case OCTAL:
- if(!isodigit(c))
- parse_error(c, state);
- break;
-
- case FLOAT:
- if(c=='e' || c=='E')
- state=FLOATEXPINIT;
- else if(!isdigit(c))
- parse_error(c, state);
- break;
-
- case FLOATEXPINIT:
- if(c=='+' || c=='-')
- state=FLOATEXPSIGN;
- else if(isdigit(c))
- state=FLOATEXP;
- else
- parse_error(c, state);
- break;
-
- case FLOATEXPSIGN:
- if(isdigit(c))
- state=FLOATEXP;
- else
- parse_error(c, state);
- break;
-
- case FLOATEXP:
- if(!isdigit(c))
- parse_error(c, state);
- break;
-
- case STRING:
- if(c=='\\')
- escape=!escape;
- else if(c=='"' && !escape)
- return Token(Token::STRING, unescape_string(buf));
else
- escape=false;
- break;
-
- case IDENTIFIER:
- if(!isalpha(c) && !isdigit(c) && c!='_')
- parse_error(c, state);
- break;
-
- default:
- throw Exception(get_location()+": Internal error (bad state)");
+ return st;
}
-
- if(is_delimiter(next) && state>=ACCEPT)
- return Token(token_type[state], buf);
}
-
- return Token(Token::SPECIAL, "");
-}
-
-bool Parser::is_delimiter(int c)
-{
- return (isspace(c) || c=='{' || c=='}' || c==';' || c=='/');
-}
-
-bool Parser::isodigit(int c)
-{
- return (c>='0' && c<='7');
-}
-
-string Parser::unescape_string(const string &str)
-{
- string result;
- bool escape=false;
- unsigned hexcape=0;
- for(string::const_iterator i=str.begin()+1; i!=str.end()-1; ++i)
+ catch(const exception &e)
{
- if(escape)
- {
- if(*i=='n')
- result+='\n';
- else if(*i=='t')
- result+='\t';
- else if(*i=='\\')
- result+='\\';
- else if(*i=='"')
- result+='"';
- else if(*i=='x')
- hexcape=0x100;
- else
- throw ParseError("Invalid escape", src, in.get_line_number());
- escape=false;
- }
- else if(hexcape)
- {
- unsigned digit=0;
- if(*i>='0' && *i<='9')
- digit=*i-'0';
- else if(*i>='a' && *i<='f')
- digit=*i-'a'+10;
- else if(*i>='A' && *i<='F')
- digit=*i-'A'+10;
- else
- throw ParseError("Invalid hex digit", src, in.get_line_number());
-
- hexcape=(hexcape<<4)|digit;
- if(hexcape&0x10000)
- {
- result+=hexcape&0xFF;
- hexcape=0;
- }
- }
- else if(*i=='\\')
- escape=true;
+ good = false;
+ if(dynamic_cast<const data_error *>(&e))
+ throw;
else
- result+=*i;
+ throw data_error(src, in.get_line_number(), e);
}
-
- return result;
-}
-
-string Parser::get_location()
-{
- ostringstream ss;
- ss<<src<<':'<<in.get_line_number();
- return ss.str();
-}
-
-void Parser::parse_error(int c, int state)
-{
- ostringstream ss;
- ss<<get_location()<<": Parse error at '"<<(char)c<<"' (state "<<state<<')';
- throw ParseError(ss.str(), src, in.get_line_number());
}
} // namespace DataFile