]> git.tdb.fi Git - libs/datafile.git/blobdiff - source/parser.cpp
Add binary data format
[libs/datafile.git] / source / parser.cpp
index 8fccec2f94c208d93f966ec88f4279eeb133b081..82ac0069b95d4f22e3d0de1ab0c0b66b04c76423 100644 (file)
@@ -6,10 +6,10 @@ Distributed under the LGPL
 */
 #include <cctype>
 #include <sstream>
-#include "error.h"
+#include "binaryparser.h"
 #include "parser.h"
 #include "statement.h"
-#include "token.h"
+#include "textparser.h"
 
 using namespace std;
 
@@ -19,9 +19,15 @@ namespace DataFile {
 Parser::Parser(istream &i, const string &s):
        in(i),
        src(s),
-       good(true)
+       good(true),
+       mode(new TextParser(in, src))
 { }
 
+Parser::~Parser()
+{
+       delete mode;
+}
+
 Statement Parser::parse()
 {
        if(!good)
@@ -29,351 +35,28 @@ Statement Parser::parse()
 
        try
        {
-               return parse_(0);
-       }
-       catch(const Exception &e)
-       {
-               good=false;
-               throw;
-       }
-}
-
-Statement Parser::parse_(const Token *t)
-{
-       Statement result;
-       bool      sub=false;
-       bool      finish=false;
-
-       while(in)
-       {
-               Token token;
-               if(t)
-               {
-                       token=*t;
-                       t=0;
-               }
-               else
-                       token=parse_token();
-
-               if(result.keyword.empty())
-               {
-                       if(token.str.empty())
-                               break;
-                       else if(token.type!=Token::IDENTIFIER)
-                               throw ParseError(get_location()+": Syntax error at token '"+token.str+"' (expected an identifier)", src, in.get_line_number());
-                       result.keyword=token.str;
-                       result.valid=true;
-                       result.source=src;
-                       result.line=in.get_line_number();
-               }
-               else if(sub)
+               while(1)
                {
-                       if(token.str=="}")
+                       Statement st=mode->parse();
+                       if(st.keyword=="__bin")
                        {
-                               sub=false;
-                               finish=true;
+                               delete mode;
+                               mode=new BinaryParser(in, src);
                        }
-                       else
+                       else if(st.keyword=="__text")
                        {
-                               Statement ss=parse_(&token);
-                               result.sub.push_back(ss);
+                               delete mode;
+                               mode=new TextParser(in, src);
                        }
-               }
-               else if(finish)
-               {
-                       if(token.str!=";")
-                               throw ParseError(get_location()+": Syntax error at token '"+token.str+"' (Expected a ';')", src, in.get_line_number());
-                       break;
-               }
-               else if(token.str=="{")
-                       sub=true;
-               else if(token.str==";")
-                       break;
-               else if(token.type==Token::INTEGER)
-                       result.args.push_back(Value(Value::INTEGER, token.str));
-               else if(token.type==Token::FLOAT)
-                       result.args.push_back(Value(Value::FLOAT, token.str));
-               else if(token.type==Token::STRING)
-                       result.args.push_back(Value(Value::STRING, token.str));
-               else if(token.type==Token::IDENTIFIER)
-               {
-                       if(token.str=="true")
-                               result.args.push_back(Value(Value::BOOLEAN, "1"));
-                       else if(token.str=="false")
-                               result.args.push_back(Value(Value::BOOLEAN, "0"));
                        else
-                               result.args.push_back(Value(Value::ENUM, token.str));
-                       //result.args.push_back(resolve_identifiertoken.str);
+                               return st;
                }
-               else if(token.str=="")
-                       throw ParseError(src+": Unexcepted EOF", src, in.get_line_number());
-               else
-                       throw ParseError(get_location()+": Syntax error", src, in.get_line_number());
        }
-
-       return result;
-}
-
-Token Parser::parse_token()
-{
-       int c=0;
-       unsigned comment=0;
-
-       // Skip over comments and whitespace
-       while(in)
-       {
-               c=in.get();
-               int next=in.peek();
-
-               if(c=='/' && next=='/')
-                       comment=1;
-               else if(c=='/' && next=='*')
-                       comment=2;
-               else if(c=='\n' && comment==1)
-                       comment=0;
-               else if(c=='*' && next=='/' && comment==2)
-                       comment=3;
-               else if(comment==3)   // Skip the second character of block comment end
-                       comment=0;
-               else if(!isspace(c) && !comment)
-                       break;
-       }
-
-       if(comment)  // Didn't hit any non-whitespace
-               throw ParseError(src+": Unfinished comment", src, in.get_line_number());
-
-       enum ParseState
-       {
-               INIT,
-               SIGN,
-               FLOATEXPINIT,
-               FLOATEXPSIGN,
-               STRING,
-               ACCEPT,
-               ZERO,
-               DECIMAL,
-               HEXADECIMAL,
-               OCTAL,
-               FLOAT,
-               FLOATEXP,
-               IDENTIFIER
-       };
-
-       static Token::Type token_type[]=
-       {
-               Token::SPECIAL,
-               Token::SPECIAL,
-               Token::SPECIAL,
-               Token::SPECIAL,
-               Token::STRING,
-               Token::SPECIAL,
-               Token::INTEGER,
-               Token::INTEGER,
-               Token::INTEGER,
-               Token::INTEGER,
-               Token::FLOAT,
-               Token::FLOAT,
-               Token::IDENTIFIER
-       };
-
-       ParseState state=INIT;
-       string     buf;
-       bool       escape=false;
-
-       while(in)
-       {
-               if(state!=INIT)
-                       c=in.get();
-               int next=in.peek();
-
-               buf+=c;
-
-               switch(state)
-               {
-               case INIT:
-                       if(c=='0')
-                               state=ZERO;
-                       else if(c=='-' || c=='+')
-                               state=SIGN;
-                       else if(c=='.')
-                               state=FLOAT;
-                       else if(c=='"')
-                               state=STRING;
-                       else if(c=='{' || c=='}' || c==';')
-                               return Token(Token::SPECIAL, string(1, c));
-                       else if(isdigit(c))
-                               state=DECIMAL;
-                       else if(isalpha(c))
-                               state=IDENTIFIER;
-                       else
-                               parse_error(c, state);
-                       break;
-
-               case SIGN:
-                       if(c=='0')
-                               state=ZERO;
-                       else if(isdigit(c))
-                               state=DECIMAL;
-                       else if(c=='.')
-                               state=FLOAT;
-                       else
-                               parse_error(c, state);
-                       break;
-
-               case ZERO:
-                       if(c=='x')
-                               state=HEXADECIMAL;
-                       else if(isdigit(c))
-                               state=OCTAL;
-                       else if(c=='.')
-                               state=FLOAT;
-                       else
-                               parse_error(c, state);
-                       break;
-
-               case DECIMAL:
-                       if(c=='.')
-                               state=FLOAT;
-                       else if(!isdigit(c))
-                               parse_error(c, state);
-                       break;
-
-               case HEXADECIMAL:
-                       if(!isxdigit(c))
-                               parse_error(c, state);
-                       break;
-
-               case OCTAL:
-                       if(!isodigit(c))
-                               parse_error(c, state);
-                       break;
-
-               case FLOAT:
-                       if(c=='e' || c=='E')
-                               state=FLOATEXPINIT;
-                       else if(!isdigit(c))
-                               parse_error(c, state);
-                       break;
-
-               case FLOATEXPINIT:
-                       if(c=='+' || c=='-')
-                               state=FLOATEXPSIGN;
-                       else if(isdigit(c))
-                               state=FLOATEXP;
-                       else
-                               parse_error(c, state);
-                       break;
-
-               case FLOATEXPSIGN:
-                       if(isdigit(c))
-                               state=FLOATEXP;
-                       else
-                               parse_error(c, state);
-                       break;
-
-               case FLOATEXP:
-                       if(!isdigit(c))
-                               parse_error(c, state);
-                       break;
-
-               case STRING:
-                       if(c=='\\')
-                               escape=!escape;
-                       else if(c=='"' && !escape)
-                               return Token(Token::STRING, unescape_string(buf));
-                       else
-                               escape=false;
-                       break;
-
-               case IDENTIFIER:
-                       if(!isalpha(c) && !isdigit(c) && c!='_')
-                               parse_error(c, state);
-                       break;
-
-               default:
-                       throw Exception(get_location()+": Internal error (bad state)");
-               }
-
-               if(is_delimiter(next) && state>=ACCEPT)
-                       return Token(token_type[state], buf);
-       }
-
-       return Token(Token::SPECIAL, "");
-}
-
-bool Parser::is_delimiter(int c)
-{
-       return (isspace(c) || c=='{' || c=='}' || c==';' || c=='/');
-}
-
-bool Parser::isodigit(int c)
-{
-       return (c>='0' && c<='7');
-}
-
-string Parser::unescape_string(const string &str)
-{
-       string   result;
-       bool     escape=false;
-       unsigned hexcape=0;
-       for(string::const_iterator i=str.begin()+1; i!=str.end()-1; ++i)
+       catch(const Exception &e)
        {
-               if(escape)
-               {
-                       if(*i=='n')
-                               result+='\n';
-                       else if(*i=='t')
-                               result+='\t';
-                       else if(*i=='\\')
-                               result+='\\';
-                       else if(*i=='"')
-                               result+='"';
-                       else if(*i=='x')
-                               hexcape=0x100;
-                       else
-                               throw ParseError("Invalid escape", src, in.get_line_number());
-                       escape=false;
-               }
-               else if(hexcape)
-               {
-                       unsigned digit=0;
-                       if(*i>='0' && *i<='9')
-                               digit=*i-'0';
-                       else if(*i>='a' && *i<='f')
-                               digit=*i-'a'+10;
-                       else if(*i>='A' && *i<='F')
-                               digit=*i-'A'+10;
-                       else
-                               throw ParseError("Invalid hex digit", src, in.get_line_number());
-
-                       hexcape=(hexcape<<4)|digit;
-                       if(hexcape&0x10000)
-                       {
-                               result+=hexcape&0xFF;
-                               hexcape=0;
-                       }
-               }
-               else if(*i=='\\')
-                       escape=true;
-               else
-                       result+=*i;
+               good=false;
+               throw;
        }
-
-       return result;
-}
-
-string Parser::get_location()
-{
-       ostringstream ss;
-       ss<<src<<':'<<in.get_line_number();
-       return ss.str();
-}
-
-void Parser::parse_error(int c, int state)
-{
-       ostringstream ss;
-       ss<<get_location()<<": Parse error at '"<<(char)c<<"' (state "<<state<<')';
-       throw ParseError(ss.str(), src, in.get_line_number());
 }
 
 } // namespace DataFile