]> git.tdb.fi Git - libs/datafile.git/blobdiff - source/parser.cpp
Move all exception classes to a common header
[libs/datafile.git] / source / parser.cpp
index c47969bb2f3f30c5e194f534fbae6ff72884a3ad..5972e2d98f832d312afa4373bfe66d4204d10f8f 100644 (file)
-/*
-This file is part of libmspparser
-Copyright © 2006  Mikko Rasa, Mikkosoft Productions
-Distributed under the LGPL
-*/
-#include <cctype>
-#include <sstream>
-#include "error.h"
+#include <msp/strings/format.h>
+#include "binaryparser.h"
+#include "except.h"
 #include "parser.h"
 #include "statement.h"
-#include "token.h"
+#include "textparser.h"
 
 using namespace std;
 
 namespace Msp {
-namespace Parser {
+namespace DataFile {
 
-Parser::Parser(istream &i, const string &s):
+Parser::Parser(IO::Base &i, const string &s):
        in(i),
+       main_src(s),
        src(s),
-       good(true)
+       good(true),
+       mode(new TextParser(in, src))
 { }
 
-Statement Parser::parse()
+Parser::~Parser()
 {
-       if(!good)
-               throw Exception("Parser is not good");
-
-       try
-       {
-               return parse_(0);
-       }
-       catch(const Exception &e)
-       {
-               good=false;
-               throw;
-       }
+       delete mode;
 }
 
-Statement Parser::parse_(const Token *t)
+Statement Parser::parse(bool raw)
 {
-       Statement result;
-       bool      sub=false;
-       bool      finish=false;
+       if(!good)
+               throw logic_error("Parser::parse() !good");
 
-       while(in)
+       try
        {
-               Token token;
-               if(t)
+               while(1)
                {
-                       token=*t;
-                       t=0;
-               }
-               else
-                       token=parse_token();
-
-               if(result.keyword.empty())
-               {
-                       if(token.str.empty())
-                               break;
-                       else if(token.type!=Token::IDENTIFIER)
-                               throw ParseError(get_location()+": Syntax error at token '"+token.str+"' (expected an identifier)", src, in.get_line_number());
-                       result.keyword=token.str;
-                       result.valid=true;
-                       result.source=src;
-                       result.line=in.get_line_number();
-               }
-               else if(sub)
-               {
-                       if(token.str=="}")
-                       {
-                               sub=false;
-                               finish=true;
-                       }
-                       else
+                       Statement st = mode->parse();
+                       if(!st.keyword.compare(0, 2, "__"))
                        {
-                               Statement ss=parse_(&token);
-                               result.sub.push_back(ss);
+                               st.control = true;
+                               process_control_statement(st);
                        }
+
+                       if(raw || !st.control)
+                               return st;
+                       else if(!good)  // This will occur with an __end statement
+                               return Statement();
                }
-               else if(finish)
-               {
-                       if(token.str!=";")
-                               throw ParseError(get_location()+": Syntax error at token '"+token.str+"' (Expected a ';')", src, in.get_line_number());
-                       break;
-               }
-               else if(token.str=="{")
-                       sub=true;
-               else if(token.str==";")
-                       break;
-               else if(token.type==Token::INTEGER)
-                       result.args.push_back(Value(Value::INTEGER, token.str));
-               else if(token.type==Token::FLOAT)
-                       result.args.push_back(Value(Value::FLOAT, token.str));
-               else if(token.type==Token::STRING)
-                       result.args.push_back(Value(Value::STRING, token.str));
-               else if(token.type==Token::IDENTIFIER)
-               {
-                       if(token.str=="true")
-                               result.args.push_back(Value(Value::BOOLEAN, "1"));
-                       else if(token.str=="false")
-                               result.args.push_back(Value(Value::BOOLEAN, "0"));
-                       else
-                               result.args.push_back(Value(Value::ENUM, token.str));
-                       //result.args.push_back(resolve_identifiertoken.str);
-               }
-               else if(token.str=="")
-                       throw ParseError(src+": Unexcepted EOF", src, in.get_line_number());
+       }
+       catch(const exception &e)
+       {
+               good = false;
+               if(dynamic_cast<const data_error *>(&e))
+                       throw;
                else
-                       throw ParseError(get_location()+": Syntax error", src, in.get_line_number());
+                       throw data_error(src, in.get_line_number(), e);
        }
-
-       return result;
 }
 
-Token Parser::parse_token()
+void Parser::process_control_statement(const Statement &st)
 {
-       int c=0;
-       unsigned comment=0;
-
-       // Skip over comments and whitespace
-       while(in)
+       if(st.keyword=="__bin")
        {
-               c=in.get();
-               int next=in.peek();
+               delete mode;
+               mode = new BinaryParser(in, src);
 
-               if(c=='/' && next=='/')
-                       comment=1;
-               else if(c=='/' && next=='*')
-                       comment=2;
-               else if(c=='\n' && comment==1)
-                       comment=0;
-               else if(c=='*' && next=='/' && comment==2)
-                       comment=3;
-               else if(comment==3)   // Skip the second character of block comment end
-                       comment=0;
-               else if(!isspace(c) && !comment)
-                       break;
+               while(in.peek()=='\n')
+                       in.get();
        }
-
-       if(comment)  // Didn't hit any non-whitespace
-               throw ParseError(src+": Unfinished comment", src, in.get_line_number());
-
-       enum ParseState
-       {
-               INIT,
-               SIGN,
-               FLOATEXPINIT,
-               FLOATEXPSIGN,
-               STRING,
-               ACCEPT,
-               ZERO,
-               DECIMAL,
-               HEXADECIMAL,
-               OCTAL,
-               FLOAT,
-               FLOATEXP,
-               IDENTIFIER
-       };
-
-       static Token::Type token_type[]=
+       else if(st.keyword=="__text")
        {
-               Token::SPECIAL,
-               Token::SPECIAL,
-               Token::SPECIAL,
-               Token::SPECIAL,
-               Token::STRING,
-               Token::SPECIAL,
-               Token::INTEGER,
-               Token::INTEGER,
-               Token::INTEGER,
-               Token::INTEGER,
-               Token::FLOAT,
-               Token::FLOAT,
-               Token::IDENTIFIER
-       };
-
-       ParseState state=INIT;
-       string     buf;
-       bool       escape=false;
-
-       while(in)
+               delete mode;
+               mode = new TextParser(in, src);
+       }
+       else if(st.keyword=="__z")
+               in.set_decompress();
+       else if(st.keyword=="__src")
        {
-               if(state!=INIT)
-                       c=in.get();
-               int next=in.peek();
-
-               buf+=c;
-
-               switch(state)
-               {
-               case INIT:
-                       if(c=='0')
-                               state=ZERO;
-                       else if(c=='-' || c=='+')
-                               state=SIGN;
-                       else if(c=='.')
-                               state=FLOAT;
-                       else if(c=='"')
-                               state=STRING;
-                       else if(c=='{' || c=='}' || c==';')
-                               return Token(Token::SPECIAL, string(1, c));
-                       else if(isdigit(c))
-                               state=DECIMAL;
-                       else if(isalpha(c))
-                               state=IDENTIFIER;
-                       else
-                               parse_error(c, state);
-                       break;
-
-               case SIGN:
-                       if(c=='0')
-                               state=ZERO;
-                       else if(isdigit(c))
-                               state=DECIMAL;
-                       else if(c=='.')
-                               state=FLOAT;
-                       else
-                               parse_error(c, state);
-                       break;
-
-               case ZERO:
-                       if(c=='x')
-                               state=HEXADECIMAL;
-                       else if(isdigit(c))
-                               state=OCTAL;
-                       else if(c=='.')
-                               state=FLOAT;
-                       else
-                               parse_error(c, state);
-                       break;
-
-               case DECIMAL:
-                       if(c=='.')
-                               state=FLOAT;
-                       else if(!isdigit(c))
-                               parse_error(c, state);
-                       break;
-
-               case HEXADECIMAL:
-                       if(!isxdigit(c))
-                               parse_error(c, state);
-                       break;
-
-               case OCTAL:
-                       if(!isodigit(c))
-                               parse_error(c, state);
-                       break;
-
-               case FLOAT:
-                       if(c=='e' || c=='E')
-                               state=FLOATEXPINIT;
-                       else if(!isdigit(c))
-                               parse_error(c, state);
-                       break;
-
-               case FLOATEXPINIT:
-                       if(c=='+' || c=='-')
-                               state=FLOATEXPSIGN;
-                       else if(isdigit(c))
-                               state=FLOATEXP;
-                       else
-                               parse_error(c, state);
-                       break;
-
-               case FLOATEXPSIGN:
-                       if(isdigit(c))
-                               state=FLOATEXP;
-                       else
-                               parse_error(c, state);
-                       break;
-
-               case FLOATEXP:
-                       if(!isdigit(c))
-                               parse_error(c, state);
-                       break;
-
-               case STRING:
-                       if(c=='\\')
-                               escape=!escape;
-                       else if(c=='"' && !escape)
-                               return Token(Token::STRING, unescape_string(buf));
-                       else
-                               escape=false;
-                       break;
-
-               case IDENTIFIER:
-                       if(!isalpha(c) && !isdigit(c) && c!='_')
-                               parse_error(c, state);
-                       break;
-
-               default:
-                       throw Exception(get_location()+": Internal error (bad state)");
-               }
-
-               if(is_delimiter(next) && state>=ACCEPT)
-                       return Token(token_type[state], buf);
+               string s = st.args[0].get<string>();
+               if(s.empty())
+                       src = main_src;
+               else
+                       src = format("%s[%s]", main_src, s);
        }
-
-       return Token(Token::SPECIAL, "");
+       else if(st.keyword=="__end")
+               good = false;
+       else
+               mode->process_control_statement(st);
 }
 
-bool Parser::is_delimiter(int c)
+const StatementKey *Parser::peek(unsigned level)
 {
-       return (isspace(c) || c=='{' || c=='}' || c==';' || c=='/');
-}
-
-bool Parser::isodigit(int c)
-{
-       return (c>='0' && c<='7');
-}
-
-string Parser::unescape_string(const string &str)
-{
-       string   result;
-       bool     escape=false;
-       unsigned hexcape=0;
-       for(string::const_iterator i=str.begin()+1; i!=str.end()-1; ++i)
+       while(good)
        {
-               if(escape)
-               {
-                       if(*i=='n')
-                               result+='\n';
-                       else if(*i=='t')
-                               result+='\t';
-                       else if(*i=='\\')
-                               result+='\\';
-                       else if(*i=='"')
-                               result+='"';
-                       else if(*i=='x')
-                               hexcape=0x100;
-                       else
-                               throw ParseError("Invalid escape", src, in.get_line_number());
-                       escape=false;
-               }
-               else if(hexcape)
-               {
-                       unsigned digit=0;
-                       if(*i>='0' && *i<='9')
-                               digit=*i-'0';
-                       else if(*i>='a' && *i<='f')
-                               digit=*i-'a'+10;
-                       else if(*i>='A' && *i<='F')
-                               digit=*i-'A'+10;
-                       else
-                               throw ParseError("Invalid hex digit", src, in.get_line_number());
-
-                       hexcape=(hexcape<<4)|digit;
-                       if(hexcape&0x10000)
-                       {
-                               result+=hexcape&0xFF;
-                               hexcape=0;
-                       }
-               }
-               else if(*i=='\\')
-                       escape=true;
+               const StatementKey *key = mode->peek(level);
+               if(key && !key->keyword.compare(0, 2, "__"))
+                       process_control_statement(mode->parse());
                else
-                       result+=*i;
+                       return key;
        }
 
-       return result;
-}
-
-string Parser::get_location()
-{
-       ostringstream ss;
-       ss<<src<<':'<<in.get_line_number();
-       return ss.str();
+       return 0;
 }
 
-void Parser::parse_error(int c, int state)
+bool Parser::parse_and_load(unsigned level, Loader &ldr, const LoaderAction &act)
 {
-       ostringstream ss;
-       ss<<get_location()<<": Parse error at '"<<(char)c<<"' (state "<<state<<')';
-       throw ParseError(ss.str(), src, in.get_line_number());
+       // Peek first to get any control statements processed
+       peek(level);
+       return mode->parse_and_load(level, ldr, act);
 }
 
-} // namespace Parser
+} // namespace DataFile
 } // namespace Msp