Move token-to-argument conversion to Statement

[libs/datafile.git] / source / textparser.cpp
diff --git a/source/textparser.cpp b/source/textparser.cpp

index d8e422ee7ff4d4d710a764a62383955f377305f5..7dc4bde7e46ed659368498798bf0b6b3b62fa246 100644 (file)
--- a/source/textparser.cpp
+++ b/source/textparser.cpp
@@ -1,12 +1,6 @@
-/* $Id$
-
-This file is part of libmspdatafile
-Copyright © 2007-2008  Mikko Rasa, Mikkosoft Productions
-Distributed under the LGPL
-*/
-
-#include <msp/strings/formatter.h>
+#include <msp/strings/format.h>
  #include <msp/strings/utils.h>
+#include "except.h"
  #include "input.h"
  #include "textparser.h"
  #include "token.h"
@@ -28,8 +22,7 @@ Statement TextParser::parse()
  Statement TextParser::parse_statement(const Token *t)
  {
         Statement result;
-       bool      sub = false;
-       bool      finish = false;
+       unsigned sub = 0;
  
         while(in)
         {
@@ -47,55 +40,36 @@ Statement TextParser::parse_statement(const Token *t)
                         if(token.str.empty())
                                 break;
                         else if(token.type!=Token::IDENTIFIER)
-                               throw_at(ParseError(format("Syntax error at token '%s' (expected an identifier)", token.str)), get_location());
+                               throw syntax_error(token.str);
                         result.keyword = token.str;
                         result.valid = true;
                         result.source = src;
                         result.line = in.get_line_number();
                 }
-               else if(sub)
+               else if(sub==1)
                 {
                         if(token.str=="}")
-                       {
-                               sub = false;
-                               finish = true;
-                       }
+                               sub = 2;
                         else
                         {
                                 Statement ss = parse_statement(&token);
                                 result.sub.push_back(ss);
                         }
                 }
-               else if(finish)
+               else if(sub==2)
                 {
                         if(token.str!=";")
-                               throw_at(ParseError(format("Syntax error at token '%s' (Expected a ';')", token.str)), get_location());
+                               throw syntax_error(token.str);
                         break;
                 }
                 else if(token.str=="{")
-                       sub = true;
+                       sub = 1;
                 else if(token.str==";")
                         break;
-               else if(token.type==Token::INTEGER)
-                       result.args.push_back(Value(INTEGER, token.str));
-               else if(token.type==Token::FLOAT)
-                       result.args.push_back(Value(FLOAT, token.str));
-               else if(token.type==Token::STRING)
-                       result.args.push_back(Value(STRING, token.str));
-               else if(token.type==Token::IDENTIFIER)
-               {
-                       if(token.str=="true")
-                               result.args.push_back(Value(BOOLEAN, "1"));
-                       else if(token.str=="false")
-                               result.args.push_back(Value(BOOLEAN, "0"));
-                       else
-                               result.args.push_back(Value(ENUM, token.str));
-                       //result.args.push_back(resolve_identifiertoken.str);
-               }
-               else if(token.str=="")
-                       throw_at(ParseError("Unexcepted end of input"), get_location());
+               else if(token.type!=Token::SPECIAL)
+                       result.append_from_token(token);
                 else
-                       throw_at(ParseError("Syntax error"), get_location());
+                       throw syntax_error(token.str);
         }
  
         return result;
@@ -112,9 +86,9 @@ Token TextParser::parse_token()
                 c = in.get();
                 int next = in.peek();
  
-               if(c=='/' && next=='/')
+               if(c=='/' && next=='/' && !comment)
                         comment = 1;
-               else if(c=='/' && next=='*')
+               else if(c=='/' && next=='*' && !comment)
                         comment = 2;
                 else if(c=='\n' && comment==1)
                         comment = 0;
@@ -122,12 +96,12 @@ Token TextParser::parse_token()
                         comment = 3;
                 else if(comment==3)   // Skip the second character of block comment end
                         comment = 0;
-               else if(!isspace(c) && !comment)
+               else if(c!=-1 && !isspace(c) && !comment)
                         comment = -1;
         }
  
         if(comment>0)  // EOF while in comment
-               throw_at(ParseError("Unfinished comment at end of input"), get_location());
+               throw parse_error(string());
         else if(comment==0)  // Didn't hit any non-whitespace
                 return Token(Token::SPECIAL, "");
  
@@ -138,6 +112,7 @@ Token TextParser::parse_token()
                 FLOATEXPINIT,
                 FLOATEXPSIGN,
                 STRING,
+               STRING_ESCAPE,
                 ACCEPT,
                 ZERO,
                 DECIMAL,
@@ -145,6 +120,7 @@ Token TextParser::parse_token()
                 OCTAL,
                 FLOAT,
                 FLOATEXP,
+               STRING_END,
                 IDENTIFIER
         };
  
@@ -154,7 +130,8 @@ Token TextParser::parse_token()
                 Token::SPECIAL,
                 Token::SPECIAL,
                 Token::SPECIAL,
-               Token::STRING,
+               Token::SPECIAL,
+               Token::SPECIAL,
                 Token::SPECIAL,
                 Token::INTEGER,
                 Token::INTEGER,
@@ -162,12 +139,12 @@ Token TextParser::parse_token()
                 Token::INTEGER,
                 Token::FLOAT,
                 Token::FLOAT,
+               Token::STRING,
                 Token::IDENTIFIER
         };
  
         ParseState state = INIT;
-       string     buf;
-       bool       escape = false;
+       string buf;
  
         while(in || state==INIT)
         {
@@ -192,10 +169,10 @@ Token TextParser::parse_token()
                                 return Token(Token::SPECIAL, string(1, c));
                         else if(isdigit(c))
                                 state = DECIMAL;
-                       else if(isalpha(c) || c=='_')
+                       else if(isalpha(c) || c=='_' || c=='\\')
                                 state = IDENTIFIER;
                         else
-                               parse_error(c, "0-9A-Za-z_.\"{};+-");
+                               throw parse_error(buf);
                         break;
  
                 case SIGN:
@@ -206,7 +183,7 @@ Token TextParser::parse_token()
                         else if(c=='.')
                                 state = FLOAT;
                         else
-                               parse_error(c, "0-9.");
+                               throw parse_error(buf);
                         break;
  
                 case ZERO:
@@ -217,31 +194,33 @@ Token TextParser::parse_token()
                         else if(c=='.')
                                 state = FLOAT;
                         else
-                               parse_error(c, "0-9A-Fa-f.");
+                               throw parse_error(buf);
                         break;
  
                 case DECIMAL:
                         if(c=='.')
                                 state = FLOAT;
+                       else if(c=='e' || c=='E')
+                               state = FLOATEXPINIT;
                         else if(!isdigit(c))
-                               parse_error(c, "0-9.");
+                               throw parse_error(buf);
                         break;
  
                 case HEXADECIMAL:
                         if(!isxdigit(c))
-                               parse_error(c, "0-9A-Fa-f");
+                               throw parse_error(buf);
                         break;
  
                 case OCTAL:
                         if(!isodigit(c))
-                               parse_error(c, "0-7");
+                               throw parse_error(buf);
                         break;
  
                 case FLOAT:
                         if(c=='e' || c=='E')
                                 state = FLOATEXPINIT;
                         else if(!isdigit(c))
-                               parse_error(c, "0-9Ee");
+                               throw parse_error(buf);
                         break;
  
                 case FLOATEXPINIT:
@@ -250,51 +229,53 @@ Token TextParser::parse_token()
                         else if(isdigit(c))
                                 state = FLOATEXP;
                         else
-                               parse_error(c, "0-9+-");
+                               throw parse_error(buf);
                         break;
  
                 case FLOATEXPSIGN:
                         if(isdigit(c))
                                 state = FLOATEXP;
                         else
-                               parse_error(c, "0-9");
+                               throw parse_error(buf);
                         break;
  
                 case FLOATEXP:
                         if(!isdigit(c))
-                               parse_error(c, "0-9");
+                               throw parse_error(buf);
                         break;
  
                 case STRING:
                         if(c=='\\')
-                               escape = !escape;
-                       else if(c=='"' && !escape)
-                       {
-                               try
-                               {
-                                       return Token(Token::STRING, c_unescape(buf.substr(1, buf.size()-2)));
-                               }
-                               catch(Exception &e)
-                               {
-                                       e.at(get_location());
-                                       throw;
-                               }
-                       }
-                       else
-                               escape = false;
+                               state = STRING_ESCAPE;
+                       else if(c=='"')
+                               state = STRING_END;
+                       break;
+
+               case STRING_ESCAPE:
+                       state = STRING;
                         break;
  
                 case IDENTIFIER:
-                       if(!isalpha(c) && !isdigit(c) && c!='_')
-                               parse_error(c, "0-9A-Za-z_");
+                       if(!isalpha(c) && !isdigit(c) && c!='_' && c!='-' && c!='/')
+                               throw parse_error(buf);
                         break;
  
+               case STRING_END:
+                       throw parse_error(buf);
+
                 default:
-                       throw_at(InvalidState("Internal error (bad state)"), get_location());
+                       throw logic_error("bad parser state");
                 }
  
                 if(is_delimiter(next) && state>=ACCEPT)
-                       return Token(token_type[state], buf);
+               {
+                       if(state==IDENTIFIER && buf[0]=='\\')
+                               return Token(Token::IDENTIFIER, buf.substr(1));
+                       else if(state==STRING_END)
+                               return Token(Token::STRING, c_unescape(buf.substr(1, buf.size()-2)));
+                       else
+                               return Token(token_type[state], buf);
+               }
         }
  
         return Token(Token::SPECIAL, "");
@@ -310,17 +291,5 @@ bool TextParser::isodigit(int c)
         return (c>='0' && c<='7');
  }
  
-string TextParser::get_location()
-{
-       ostringstream ss;
-       ss<<src<<':'<<in.get_line_number();
-       return ss.str();
-}
-
-void TextParser::parse_error(int c, const char *e)
-{
-       throw_at(ParseError(format("Parse error at '%c', expected one of \"%s\"", static_cast<char>(c), e)), get_location());
-}
-
  } // namespace DataFile
  } // namespace Msp