]> git.tdb.fi Git - libs/datafile.git/blobdiff - source/textparser.cpp
Recognize floating-point literals with an exponent but no decimal point
[libs/datafile.git] / source / textparser.cpp
index d8e422ee7ff4d4d710a764a62383955f377305f5..9bed08ac9e0ea5a154208b24cda76dc75925267b 100644 (file)
@@ -1,11 +1,4 @@
-/* $Id$
-
-This file is part of libmspdatafile
-Copyright © 2007-2008  Mikko Rasa, Mikkosoft Productions
-Distributed under the LGPL
-*/
-
-#include <msp/strings/formatter.h>
+#include <msp/strings/format.h>
 #include <msp/strings/utils.h>
 #include "input.h"
 #include "textparser.h"
@@ -16,6 +9,28 @@ using namespace std;
 namespace Msp {
 namespace DataFile {
 
+class parse_error: public runtime_error
+{
+public:
+       parse_error(const std::string &t):
+               runtime_error(t.empty() ? "at end of input" : format("after '%s'", t))
+       { }
+
+       virtual ~parse_error() throw() { }
+};
+
+
+class syntax_error: public runtime_error
+{
+public:
+       syntax_error(const std::string &t):
+               runtime_error(t.empty() ? "at end of input" : format("at '%s'", t))
+       { }
+
+       virtual ~syntax_error() throw() { }
+};
+
+
 TextParser::TextParser(Input &i, const string &s):
        ParserMode(i, s)
 { }
@@ -28,8 +43,8 @@ Statement TextParser::parse()
 Statement TextParser::parse_statement(const Token *t)
 {
        Statement result;
-       bool      sub = false;
-       bool      finish = false;
+       bool sub = false;
+       bool finish = false;
 
        while(in)
        {
@@ -47,7 +62,7 @@ Statement TextParser::parse_statement(const Token *t)
                        if(token.str.empty())
                                break;
                        else if(token.type!=Token::IDENTIFIER)
-                               throw_at(ParseError(format("Syntax error at token '%s' (expected an identifier)", token.str)), get_location());
+                               throw syntax_error(token.str);
                        result.keyword = token.str;
                        result.valid = true;
                        result.source = src;
@@ -69,7 +84,7 @@ Statement TextParser::parse_statement(const Token *t)
                else if(finish)
                {
                        if(token.str!=";")
-                               throw_at(ParseError(format("Syntax error at token '%s' (Expected a ';')", token.str)), get_location());
+                               throw syntax_error(token.str);
                        break;
                }
                else if(token.str=="{")
@@ -77,25 +92,22 @@ Statement TextParser::parse_statement(const Token *t)
                else if(token.str==";")
                        break;
                else if(token.type==Token::INTEGER)
-                       result.args.push_back(Value(INTEGER, token.str));
+                       result.append(lexical_cast<IntType::Store>(token.str));
                else if(token.type==Token::FLOAT)
-                       result.args.push_back(Value(FLOAT, token.str));
+                       result.append(lexical_cast<FloatType::Store>(token.str));
                else if(token.type==Token::STRING)
-                       result.args.push_back(Value(STRING, token.str));
+                       result.append(token.str);
                else if(token.type==Token::IDENTIFIER)
                {
                        if(token.str=="true")
-                               result.args.push_back(Value(BOOLEAN, "1"));
+                               result.append(true);
                        else if(token.str=="false")
-                               result.args.push_back(Value(BOOLEAN, "0"));
+                               result.append(false);
                        else
-                               result.args.push_back(Value(ENUM, token.str));
-                       //result.args.push_back(resolve_identifiertoken.str);
+                               result.append(Symbol(token.str));
                }
-               else if(token.str=="")
-                       throw_at(ParseError("Unexcepted end of input"), get_location());
                else
-                       throw_at(ParseError("Syntax error"), get_location());
+                       throw syntax_error(token.str);
        }
 
        return result;
@@ -122,12 +134,12 @@ Token TextParser::parse_token()
                        comment = 3;
                else if(comment==3)   // Skip the second character of block comment end
                        comment = 0;
-               else if(!isspace(c) && !comment)
+               else if(c!=-1 && !isspace(c) && !comment)
                        comment = -1;
        }
 
        if(comment>0)  // EOF while in comment
-               throw_at(ParseError("Unfinished comment at end of input"), get_location());
+               throw parse_error(string());
        else if(comment==0)  // Didn't hit any non-whitespace
                return Token(Token::SPECIAL, "");
 
@@ -145,6 +157,7 @@ Token TextParser::parse_token()
                OCTAL,
                FLOAT,
                FLOATEXP,
+               STRING_END,
                IDENTIFIER
        };
 
@@ -154,7 +167,7 @@ Token TextParser::parse_token()
                Token::SPECIAL,
                Token::SPECIAL,
                Token::SPECIAL,
-               Token::STRING,
+               Token::SPECIAL,
                Token::SPECIAL,
                Token::INTEGER,
                Token::INTEGER,
@@ -162,12 +175,13 @@ Token TextParser::parse_token()
                Token::INTEGER,
                Token::FLOAT,
                Token::FLOAT,
+               Token::STRING,
                Token::IDENTIFIER
        };
 
        ParseState state = INIT;
-       string     buf;
-       bool       escape = false;
+       string buf;
+       bool escape = false;
 
        while(in || state==INIT)
        {
@@ -192,10 +206,10 @@ Token TextParser::parse_token()
                                return Token(Token::SPECIAL, string(1, c));
                        else if(isdigit(c))
                                state = DECIMAL;
-                       else if(isalpha(c) || c=='_')
+                       else if(isalpha(c) || c=='_' || c=='\\')
                                state = IDENTIFIER;
                        else
-                               parse_error(c, "0-9A-Za-z_.\"{};+-");
+                               throw parse_error(buf);
                        break;
 
                case SIGN:
@@ -206,7 +220,7 @@ Token TextParser::parse_token()
                        else if(c=='.')
                                state = FLOAT;
                        else
-                               parse_error(c, "0-9.");
+                               throw parse_error(buf);
                        break;
 
                case ZERO:
@@ -217,31 +231,33 @@ Token TextParser::parse_token()
                        else if(c=='.')
                                state = FLOAT;
                        else
-                               parse_error(c, "0-9A-Fa-f.");
+                               throw parse_error(buf);
                        break;
 
                case DECIMAL:
                        if(c=='.')
                                state = FLOAT;
+                       else if(c=='e' || c=='E')
+                               state = FLOATEXPINIT;
                        else if(!isdigit(c))
-                               parse_error(c, "0-9.");
+                               throw parse_error(buf);
                        break;
 
                case HEXADECIMAL:
                        if(!isxdigit(c))
-                               parse_error(c, "0-9A-Fa-f");
+                               throw parse_error(buf);
                        break;
 
                case OCTAL:
                        if(!isodigit(c))
-                               parse_error(c, "0-7");
+                               throw parse_error(buf);
                        break;
 
                case FLOAT:
                        if(c=='e' || c=='E')
                                state = FLOATEXPINIT;
                        else if(!isdigit(c))
-                               parse_error(c, "0-9Ee");
+                               throw parse_error(buf);
                        break;
 
                case FLOATEXPINIT:
@@ -250,51 +266,51 @@ Token TextParser::parse_token()
                        else if(isdigit(c))
                                state = FLOATEXP;
                        else
-                               parse_error(c, "0-9+-");
+                               throw parse_error(buf);
                        break;
 
                case FLOATEXPSIGN:
                        if(isdigit(c))
                                state = FLOATEXP;
                        else
-                               parse_error(c, "0-9");
+                               throw parse_error(buf);
                        break;
 
                case FLOATEXP:
                        if(!isdigit(c))
-                               parse_error(c, "0-9");
+                               throw parse_error(buf);
                        break;
 
                case STRING:
                        if(c=='\\')
                                escape = !escape;
                        else if(c=='"' && !escape)
-                       {
-                               try
-                               {
-                                       return Token(Token::STRING, c_unescape(buf.substr(1, buf.size()-2)));
-                               }
-                               catch(Exception &e)
-                               {
-                                       e.at(get_location());
-                                       throw;
-                               }
-                       }
+                               state = STRING_END;
                        else
                                escape = false;
                        break;
 
                case IDENTIFIER:
-                       if(!isalpha(c) && !isdigit(c) && c!='_')
-                               parse_error(c, "0-9A-Za-z_");
+                       if(!isalpha(c) && !isdigit(c) && c!='_' && c!='-' && c!='/')
+                               throw parse_error(buf);
                        break;
 
+               case STRING_END:
+                       throw parse_error(buf);
+
                default:
-                       throw_at(InvalidState("Internal error (bad state)"), get_location());
+                       throw logic_error("bad parser state");
                }
 
                if(is_delimiter(next) && state>=ACCEPT)
-                       return Token(token_type[state], buf);
+               {
+                       if(state==IDENTIFIER && buf[0]=='\\')
+                               return Token(Token::IDENTIFIER, buf.substr(1));
+                       else if(state==STRING_END)
+                               return Token(Token::STRING, c_unescape(buf.substr(1, buf.size()-2)));
+                       else
+                               return Token(token_type[state], buf);
+               }
        }
 
        return Token(Token::SPECIAL, "");
@@ -310,17 +326,5 @@ bool TextParser::isodigit(int c)
        return (c>='0' && c<='7');
 }
 
-string TextParser::get_location()
-{
-       ostringstream ss;
-       ss<<src<<':'<<in.get_line_number();
-       return ss.str();
-}
-
-void TextParser::parse_error(int c, const char *e)
-{
-       throw_at(ParseError(format("Parse error at '%c', expected one of \"%s\"", static_cast<char>(c), e)), get_location());
-}
-
 } // namespace DataFile
 } // namespace Msp