]> git.tdb.fi Git - libs/datafile.git/blobdiff - source/textparser.cpp
Move token-to-argument conversion to Statement
[libs/datafile.git] / source / textparser.cpp
index f7c830d0030ebcf5428c3c0abf06c63ba043ce60..7dc4bde7e46ed659368498798bf0b6b3b62fa246 100644 (file)
@@ -1,12 +1,6 @@
-/* $Id$
-
-This file is part of libmspdatafile
-Copyright © 2007  Mikko Rasa, Mikkosoft Productions
-Distributed under the LGPL
-*/
-
-#include <msp/strings/formatter.h>
+#include <msp/strings/format.h>
 #include <msp/strings/utils.h>
+#include "except.h"
 #include "input.h"
 #include "textparser.h"
 #include "token.h"
@@ -28,74 +22,54 @@ Statement TextParser::parse()
 Statement TextParser::parse_statement(const Token *t)
 {
        Statement result;
-       bool      sub=false;
-       bool      finish=false;
+       unsigned sub = 0;
 
        while(in)
        {
                Token token;
                if(t)
                {
-                       token=*t;
-                       t=0;
+                       token = *t;
+                       t = 0;
                }
                else
-                       token=parse_token();
+                       token = parse_token();
 
                if(result.keyword.empty())
                {
                        if(token.str.empty())
                                break;
                        else if(token.type!=Token::IDENTIFIER)
-                               throw ParseError(format("%s: Syntax error at token '%s' (expected an identifier)", get_location(), token.str), src, in.get_line_number());
-                       result.keyword=token.str;
-                       result.valid=true;
-                       result.source=src;
-                       result.line=in.get_line_number();
+                               throw syntax_error(token.str);
+                       result.keyword = token.str;
+                       result.valid = true;
+                       result.source = src;
+                       result.line = in.get_line_number();
                }
-               else if(sub)
+               else if(sub==1)
                {
                        if(token.str=="}")
-                       {
-                               sub=false;
-                               finish=true;
-                       }
+                               sub = 2;
                        else
                        {
-                               Statement ss=parse_statement(&token);
+                               Statement ss = parse_statement(&token);
                                result.sub.push_back(ss);
                        }
                }
-               else if(finish)
+               else if(sub==2)
                {
                        if(token.str!=";")
-                               throw ParseError(format("%s: Syntax error at token '%s' (Expected a ';')", get_location(), token.str), src, in.get_line_number());
+                               throw syntax_error(token.str);
                        break;
                }
                else if(token.str=="{")
-                       sub=true;
+                       sub = 1;
                else if(token.str==";")
                        break;
-               else if(token.type==Token::INTEGER)
-                       result.args.push_back(Value(INTEGER, token.str));
-               else if(token.type==Token::FLOAT)
-                       result.args.push_back(Value(FLOAT, token.str));
-               else if(token.type==Token::STRING)
-                       result.args.push_back(Value(STRING, token.str));
-               else if(token.type==Token::IDENTIFIER)
-               {
-                       if(token.str=="true")
-                               result.args.push_back(Value(BOOLEAN, "1"));
-                       else if(token.str=="false")
-                               result.args.push_back(Value(BOOLEAN, "0"));
-                       else
-                               result.args.push_back(Value(ENUM, token.str));
-                       //result.args.push_back(resolve_identifiertoken.str);
-               }
-               else if(token.str=="")
-                       throw ParseError(src+": Unexcepted end of input", src, in.get_line_number());
+               else if(token.type!=Token::SPECIAL)
+                       result.append_from_token(token);
                else
-                       throw ParseError(get_location()+": Syntax error", src, in.get_line_number());
+                       throw syntax_error(token.str);
        }
 
        return result;
@@ -103,31 +77,31 @@ Statement TextParser::parse_statement(const Token *t)
 
 Token TextParser::parse_token()
 {
-       int c=0;
-       int comment=0;
+       int c = 0;
+       int comment = 0;
 
        // Skip over comments and whitespace
        while(in && comment>=0)
        {
-               c=in.get();
-               int next=in.peek();
+               c = in.get();
+               int next = in.peek();
 
-               if(c=='/' && next=='/')
-                       comment=1;
-               else if(c=='/' && next=='*')
-                       comment=2;
+               if(c=='/' && next=='/' && !comment)
+                       comment = 1;
+               else if(c=='/' && next=='*' && !comment)
+                       comment = 2;
                else if(c=='\n' && comment==1)
-                       comment=0;
+                       comment = 0;
                else if(c=='*' && next=='/' && comment==2)
-                       comment=3;
+                       comment = 3;
                else if(comment==3)   // Skip the second character of block comment end
-                       comment=0;
-               else if(!isspace(c) && !comment)
-                       comment=-1;
+                       comment = 0;
+               else if(c!=-1 && !isspace(c) && !comment)
+                       comment = -1;
        }
 
        if(comment>0)  // EOF while in comment
-               throw ParseError(src+": Unfinished comment at end of input", src, in.get_line_number());
+               throw parse_error(string());
        else if(comment==0)  // Didn't hit any non-whitespace
                return Token(Token::SPECIAL, "");
 
@@ -138,6 +112,7 @@ Token TextParser::parse_token()
                FLOATEXPINIT,
                FLOATEXPSIGN,
                STRING,
+               STRING_ESCAPE,
                ACCEPT,
                ZERO,
                DECIMAL,
@@ -145,6 +120,7 @@ Token TextParser::parse_token()
                OCTAL,
                FLOAT,
                FLOATEXP,
+               STRING_END,
                IDENTIFIER
        };
 
@@ -154,7 +130,8 @@ Token TextParser::parse_token()
                Token::SPECIAL,
                Token::SPECIAL,
                Token::SPECIAL,
-               Token::STRING,
+               Token::SPECIAL,
+               Token::SPECIAL,
                Token::SPECIAL,
                Token::INTEGER,
                Token::INTEGER,
@@ -162,138 +139,143 @@ Token TextParser::parse_token()
                Token::INTEGER,
                Token::FLOAT,
                Token::FLOAT,
+               Token::STRING,
                Token::IDENTIFIER
        };
 
-       ParseState state=INIT;
-       string     buf;
-       bool       escape=false;
+       ParseState state = INIT;
+       string buf;
 
        while(in || state==INIT)
        {
                if(state!=INIT)
-                       c=in.get();
-               int next=in.peek();
+                       c = in.get();
+               int next = in.peek();
 
-               buf+=c;
+               buf += c;
 
                switch(state)
                {
                case INIT:
                        if(c=='0')
-                               state=ZERO;
+                               state = ZERO;
                        else if(c=='-' || c=='+')
-                               state=SIGN;
+                               state = SIGN;
                        else if(c=='.')
-                               state=FLOAT;
+                               state = FLOAT;
                        else if(c=='"')
-                               state=STRING;
+                               state = STRING;
                        else if(c=='{' || c=='}' || c==';')
                                return Token(Token::SPECIAL, string(1, c));
                        else if(isdigit(c))
-                               state=DECIMAL;
-                       else if(isalpha(c) || c=='_')
-                               state=IDENTIFIER;
+                               state = DECIMAL;
+                       else if(isalpha(c) || c=='_' || c=='\\')
+                               state = IDENTIFIER;
                        else
-                               parse_error(c, state);
+                               throw parse_error(buf);
                        break;
 
                case SIGN:
                        if(c=='0')
-                               state=ZERO;
+                               state = ZERO;
                        else if(isdigit(c))
-                               state=DECIMAL;
+                               state = DECIMAL;
                        else if(c=='.')
-                               state=FLOAT;
+                               state = FLOAT;
                        else
-                               parse_error(c, state);
+                               throw parse_error(buf);
                        break;
 
                case ZERO:
                        if(c=='x')
-                               state=HEXADECIMAL;
+                               state = HEXADECIMAL;
                        else if(isdigit(c))
-                               state=OCTAL;
+                               state = OCTAL;
                        else if(c=='.')
-                               state=FLOAT;
+                               state = FLOAT;
                        else
-                               parse_error(c, state);
+                               throw parse_error(buf);
                        break;
 
                case DECIMAL:
                        if(c=='.')
-                               state=FLOAT;
+                               state = FLOAT;
+                       else if(c=='e' || c=='E')
+                               state = FLOATEXPINIT;
                        else if(!isdigit(c))
-                               parse_error(c, state);
+                               throw parse_error(buf);
                        break;
 
                case HEXADECIMAL:
                        if(!isxdigit(c))
-                               parse_error(c, state);
+                               throw parse_error(buf);
                        break;
 
                case OCTAL:
                        if(!isodigit(c))
-                               parse_error(c, state);
+                               throw parse_error(buf);
                        break;
 
                case FLOAT:
                        if(c=='e' || c=='E')
-                               state=FLOATEXPINIT;
+                               state = FLOATEXPINIT;
                        else if(!isdigit(c))
-                               parse_error(c, state);
+                               throw parse_error(buf);
                        break;
 
                case FLOATEXPINIT:
                        if(c=='+' || c=='-')
-                               state=FLOATEXPSIGN;
+                               state = FLOATEXPSIGN;
                        else if(isdigit(c))
-                               state=FLOATEXP;
+                               state = FLOATEXP;
                        else
-                               parse_error(c, state);
+                               throw parse_error(buf);
                        break;
 
                case FLOATEXPSIGN:
                        if(isdigit(c))
-                               state=FLOATEXP;
+                               state = FLOATEXP;
                        else
-                               parse_error(c, state);
+                               throw parse_error(buf);
                        break;
 
                case FLOATEXP:
                        if(!isdigit(c))
-                               parse_error(c, state);
+                               throw parse_error(buf);
                        break;
 
                case STRING:
                        if(c=='\\')
-                               escape=!escape;
-                       else if(c=='"' && !escape)
-                       {
-                               try
-                               {
-                                       return Token(Token::STRING, c_unescape(buf.substr(1, buf.size()-2)));
-                               }
-                               catch(const Exception &e)
-                               {
-                                       throw ParseError(format("%s: %s", get_location(), e.what()), src, in.get_line_number());
-                               }
-                       }
-                       else
-                               escape=false;
+                               state = STRING_ESCAPE;
+                       else if(c=='"')
+                               state = STRING_END;
+                       break;
+
+               case STRING_ESCAPE:
+                       state = STRING;
                        break;
 
                case IDENTIFIER:
-                       if(!isalpha(c) && !isdigit(c) && c!='_')
-                               parse_error(c, state);
+                       if(!isalpha(c) && !isdigit(c) && c!='_' && c!='-' && c!='/')
+                               throw parse_error(buf);
                        break;
 
+               case STRING_END:
+                       throw parse_error(buf);
+
                default:
-                       throw Exception(get_location()+": Internal error (bad state)");
+                       throw logic_error("bad parser state");
                }
 
                if(is_delimiter(next) && state>=ACCEPT)
-                       return Token(token_type[state], buf);
+               {
+                       if(state==IDENTIFIER && buf[0]=='\\')
+                               return Token(Token::IDENTIFIER, buf.substr(1));
+                       else if(state==STRING_END)
+                               return Token(Token::STRING, c_unescape(buf.substr(1, buf.size()-2)));
+                       else
+                               return Token(token_type[state], buf);
+               }
        }
 
        return Token(Token::SPECIAL, "");
@@ -309,17 +291,5 @@ bool TextParser::isodigit(int c)
        return (c>='0' && c<='7');
 }
 
-string TextParser::get_location()
-{
-       ostringstream ss;
-       ss<<src<<':'<<in.get_line_number();
-       return ss.str();
-}
-
-void TextParser::parse_error(int c, int state)
-{
-       throw ParseError(format("%s: Parse error at '%c' (state %d)", get_location(), static_cast<char>(c), state), src, in.get_line_number());
-}
-
 } // namespace DataFile
 } // namespace Msp