]> git.tdb.fi Git - libs/datafile.git/blobdiff - source/textparser.cpp
Move token-to-argument conversion to Statement
[libs/datafile.git] / source / textparser.cpp
index d8e422ee7ff4d4d710a764a62383955f377305f5..7dc4bde7e46ed659368498798bf0b6b3b62fa246 100644 (file)
@@ -1,12 +1,6 @@
-/* $Id$
-
-This file is part of libmspdatafile
-Copyright © 2007-2008  Mikko Rasa, Mikkosoft Productions
-Distributed under the LGPL
-*/
-
-#include <msp/strings/formatter.h>
+#include <msp/strings/format.h>
 #include <msp/strings/utils.h>
+#include "except.h"
 #include "input.h"
 #include "textparser.h"
 #include "token.h"
@@ -28,8 +22,7 @@ Statement TextParser::parse()
 Statement TextParser::parse_statement(const Token *t)
 {
        Statement result;
-       bool      sub = false;
-       bool      finish = false;
+       unsigned sub = 0;
 
        while(in)
        {
@@ -47,55 +40,36 @@ Statement TextParser::parse_statement(const Token *t)
                        if(token.str.empty())
                                break;
                        else if(token.type!=Token::IDENTIFIER)
-                               throw_at(ParseError(format("Syntax error at token '%s' (expected an identifier)", token.str)), get_location());
+                               throw syntax_error(token.str);
                        result.keyword = token.str;
                        result.valid = true;
                        result.source = src;
                        result.line = in.get_line_number();
                }
-               else if(sub)
+               else if(sub==1)
                {
                        if(token.str=="}")
-                       {
-                               sub = false;
-                               finish = true;
-                       }
+                               sub = 2;
                        else
                        {
                                Statement ss = parse_statement(&token);
                                result.sub.push_back(ss);
                        }
                }
-               else if(finish)
+               else if(sub==2)
                {
                        if(token.str!=";")
-                               throw_at(ParseError(format("Syntax error at token '%s' (Expected a ';')", token.str)), get_location());
+                               throw syntax_error(token.str);
                        break;
                }
                else if(token.str=="{")
-                       sub = true;
+                       sub = 1;
                else if(token.str==";")
                        break;
-               else if(token.type==Token::INTEGER)
-                       result.args.push_back(Value(INTEGER, token.str));
-               else if(token.type==Token::FLOAT)
-                       result.args.push_back(Value(FLOAT, token.str));
-               else if(token.type==Token::STRING)
-                       result.args.push_back(Value(STRING, token.str));
-               else if(token.type==Token::IDENTIFIER)
-               {
-                       if(token.str=="true")
-                               result.args.push_back(Value(BOOLEAN, "1"));
-                       else if(token.str=="false")
-                               result.args.push_back(Value(BOOLEAN, "0"));
-                       else
-                               result.args.push_back(Value(ENUM, token.str));
-                       //result.args.push_back(resolve_identifiertoken.str);
-               }
-               else if(token.str=="")
-                       throw_at(ParseError("Unexcepted end of input"), get_location());
+               else if(token.type!=Token::SPECIAL)
+                       result.append_from_token(token);
                else
-                       throw_at(ParseError("Syntax error"), get_location());
+                       throw syntax_error(token.str);
        }
 
        return result;
@@ -112,9 +86,9 @@ Token TextParser::parse_token()
                c = in.get();
                int next = in.peek();
 
-               if(c=='/' && next=='/')
+               if(c=='/' && next=='/' && !comment)
                        comment = 1;
-               else if(c=='/' && next=='*')
+               else if(c=='/' && next=='*' && !comment)
                        comment = 2;
                else if(c=='\n' && comment==1)
                        comment = 0;
@@ -122,12 +96,12 @@ Token TextParser::parse_token()
                        comment = 3;
                else if(comment==3)   // Skip the second character of block comment end
                        comment = 0;
-               else if(!isspace(c) && !comment)
+               else if(c!=-1 && !isspace(c) && !comment)
                        comment = -1;
        }
 
        if(comment>0)  // EOF while in comment
-               throw_at(ParseError("Unfinished comment at end of input"), get_location());
+               throw parse_error(string());
        else if(comment==0)  // Didn't hit any non-whitespace
                return Token(Token::SPECIAL, "");
 
@@ -138,6 +112,7 @@ Token TextParser::parse_token()
                FLOATEXPINIT,
                FLOATEXPSIGN,
                STRING,
+               STRING_ESCAPE,
                ACCEPT,
                ZERO,
                DECIMAL,
@@ -145,6 +120,7 @@ Token TextParser::parse_token()
                OCTAL,
                FLOAT,
                FLOATEXP,
+               STRING_END,
                IDENTIFIER
        };
 
@@ -154,7 +130,8 @@ Token TextParser::parse_token()
                Token::SPECIAL,
                Token::SPECIAL,
                Token::SPECIAL,
-               Token::STRING,
+               Token::SPECIAL,
+               Token::SPECIAL,
                Token::SPECIAL,
                Token::INTEGER,
                Token::INTEGER,
@@ -162,12 +139,12 @@ Token TextParser::parse_token()
                Token::INTEGER,
                Token::FLOAT,
                Token::FLOAT,
+               Token::STRING,
                Token::IDENTIFIER
        };
 
        ParseState state = INIT;
-       string     buf;
-       bool       escape = false;
+       string buf;
 
        while(in || state==INIT)
        {
@@ -192,10 +169,10 @@ Token TextParser::parse_token()
                                return Token(Token::SPECIAL, string(1, c));
                        else if(isdigit(c))
                                state = DECIMAL;
-                       else if(isalpha(c) || c=='_')
+                       else if(isalpha(c) || c=='_' || c=='\\')
                                state = IDENTIFIER;
                        else
-                               parse_error(c, "0-9A-Za-z_.\"{};+-");
+                               throw parse_error(buf);
                        break;
 
                case SIGN:
@@ -206,7 +183,7 @@ Token TextParser::parse_token()
                        else if(c=='.')
                                state = FLOAT;
                        else
-                               parse_error(c, "0-9.");
+                               throw parse_error(buf);
                        break;
 
                case ZERO:
@@ -217,31 +194,33 @@ Token TextParser::parse_token()
                        else if(c=='.')
                                state = FLOAT;
                        else
-                               parse_error(c, "0-9A-Fa-f.");
+                               throw parse_error(buf);
                        break;
 
                case DECIMAL:
                        if(c=='.')
                                state = FLOAT;
+                       else if(c=='e' || c=='E')
+                               state = FLOATEXPINIT;
                        else if(!isdigit(c))
-                               parse_error(c, "0-9.");
+                               throw parse_error(buf);
                        break;
 
                case HEXADECIMAL:
                        if(!isxdigit(c))
-                               parse_error(c, "0-9A-Fa-f");
+                               throw parse_error(buf);
                        break;
 
                case OCTAL:
                        if(!isodigit(c))
-                               parse_error(c, "0-7");
+                               throw parse_error(buf);
                        break;
 
                case FLOAT:
                        if(c=='e' || c=='E')
                                state = FLOATEXPINIT;
                        else if(!isdigit(c))
-                               parse_error(c, "0-9Ee");
+                               throw parse_error(buf);
                        break;
 
                case FLOATEXPINIT:
@@ -250,51 +229,53 @@ Token TextParser::parse_token()
                        else if(isdigit(c))
                                state = FLOATEXP;
                        else
-                               parse_error(c, "0-9+-");
+                               throw parse_error(buf);
                        break;
 
                case FLOATEXPSIGN:
                        if(isdigit(c))
                                state = FLOATEXP;
                        else
-                               parse_error(c, "0-9");
+                               throw parse_error(buf);
                        break;
 
                case FLOATEXP:
                        if(!isdigit(c))
-                               parse_error(c, "0-9");
+                               throw parse_error(buf);
                        break;
 
                case STRING:
                        if(c=='\\')
-                               escape = !escape;
-                       else if(c=='"' && !escape)
-                       {
-                               try
-                               {
-                                       return Token(Token::STRING, c_unescape(buf.substr(1, buf.size()-2)));
-                               }
-                               catch(Exception &e)
-                               {
-                                       e.at(get_location());
-                                       throw;
-                               }
-                       }
-                       else
-                               escape = false;
+                               state = STRING_ESCAPE;
+                       else if(c=='"')
+                               state = STRING_END;
+                       break;
+
+               case STRING_ESCAPE:
+                       state = STRING;
                        break;
 
                case IDENTIFIER:
-                       if(!isalpha(c) && !isdigit(c) && c!='_')
-                               parse_error(c, "0-9A-Za-z_");
+                       if(!isalpha(c) && !isdigit(c) && c!='_' && c!='-' && c!='/')
+                               throw parse_error(buf);
                        break;
 
+               case STRING_END:
+                       throw parse_error(buf);
+
                default:
-                       throw_at(InvalidState("Internal error (bad state)"), get_location());
+                       throw logic_error("bad parser state");
                }
 
                if(is_delimiter(next) && state>=ACCEPT)
-                       return Token(token_type[state], buf);
+               {
+                       if(state==IDENTIFIER && buf[0]=='\\')
+                               return Token(Token::IDENTIFIER, buf.substr(1));
+                       else if(state==STRING_END)
+                               return Token(Token::STRING, c_unescape(buf.substr(1, buf.size()-2)));
+                       else
+                               return Token(token_type[state], buf);
+               }
        }
 
        return Token(Token::SPECIAL, "");
@@ -310,17 +291,5 @@ bool TextParser::isodigit(int c)
        return (c>='0' && c<='7');
 }
 
-string TextParser::get_location()
-{
-       ostringstream ss;
-       ss<<src<<':'<<in.get_line_number();
-       return ss.str();
-}
-
-void TextParser::parse_error(int c, const char *e)
-{
-       throw_at(ParseError(format("Parse error at '%c', expected one of \"%s\"", static_cast<char>(c), e)), get_location());
-}
-
 } // namespace DataFile
 } // namespace Msp