]> git.tdb.fi Git - libs/datafile.git/blobdiff - source/textparser.cpp
Style update: add spaces around assignments
[libs/datafile.git] / source / textparser.cpp
index 298b7cac363ebd2b75a712b2ea41d18dd17b3801..d8e422ee7ff4d4d710a764a62383955f377305f5 100644 (file)
@@ -1,11 +1,12 @@
 /* $Id$
 
 This file is part of libmspdatafile
-Copyright © 2007  Mikko Rasa, Mikkosoft Productions
+Copyright © 2007-2008  Mikko Rasa, Mikkosoft Productions
 Distributed under the LGPL
 */
 
 #include <msp/strings/formatter.h>
+#include <msp/strings/utils.h>
 #include "input.h"
 #include "textparser.h"
 #include "token.h"
@@ -27,52 +28,52 @@ Statement TextParser::parse()
 Statement TextParser::parse_statement(const Token *t)
 {
        Statement result;
-       bool      sub=false;
-       bool      finish=false;
+       bool      sub = false;
+       bool      finish = false;
 
        while(in)
        {
                Token token;
                if(t)
                {
-                       token=*t;
-                       t=0;
+                       token = *t;
+                       t = 0;
                }
                else
-                       token=parse_token();
+                       token = parse_token();
 
                if(result.keyword.empty())
                {
                        if(token.str.empty())
                                break;
                        else if(token.type!=Token::IDENTIFIER)
-                               throw ParseError(format("%s: Syntax error at token '%s' (expected an identifier)", get_location(), token.str), src, in.get_line_number());
-                       result.keyword=token.str;
-                       result.valid=true;
-                       result.source=src;
-                       result.line=in.get_line_number();
+                               throw_at(ParseError(format("Syntax error at token '%s' (expected an identifier)", token.str)), get_location());
+                       result.keyword = token.str;
+                       result.valid = true;
+                       result.source = src;
+                       result.line = in.get_line_number();
                }
                else if(sub)
                {
                        if(token.str=="}")
                        {
-                               sub=false;
-                               finish=true;
+                               sub = false;
+                               finish = true;
                        }
                        else
                        {
-                               Statement ss=parse_statement(&token);
+                               Statement ss = parse_statement(&token);
                                result.sub.push_back(ss);
                        }
                }
                else if(finish)
                {
                        if(token.str!=";")
-                               throw ParseError(format("%s: Syntax error at token '%s' (Expected a ';')", get_location(), token.str), src, in.get_line_number());
+                               throw_at(ParseError(format("Syntax error at token '%s' (Expected a ';')", token.str)), get_location());
                        break;
                }
                else if(token.str=="{")
-                       sub=true;
+                       sub = true;
                else if(token.str==";")
                        break;
                else if(token.type==Token::INTEGER)
@@ -92,9 +93,9 @@ Statement TextParser::parse_statement(const Token *t)
                        //result.args.push_back(resolve_identifiertoken.str);
                }
                else if(token.str=="")
-                       throw ParseError(src+": Unexcepted end of input", src, in.get_line_number());
+                       throw_at(ParseError("Unexcepted end of input"), get_location());
                else
-                       throw ParseError(get_location()+": Syntax error", src, in.get_line_number());
+                       throw_at(ParseError("Syntax error"), get_location());
        }
 
        return result;
@@ -102,31 +103,33 @@ Statement TextParser::parse_statement(const Token *t)
 
 Token TextParser::parse_token()
 {
-       int c=0;
-       unsigned comment=0;
+       int c = 0;
+       int comment = 0;
 
        // Skip over comments and whitespace
-       while(in)
+       while(in && comment>=0)
        {
-               c=in.get();
-               int next=in.peek();
+               c = in.get();
+               int next = in.peek();
 
                if(c=='/' && next=='/')
-                       comment=1;
+                       comment = 1;
                else if(c=='/' && next=='*')
-                       comment=2;
+                       comment = 2;
                else if(c=='\n' && comment==1)
-                       comment=0;
+                       comment = 0;
                else if(c=='*' && next=='/' && comment==2)
-                       comment=3;
+                       comment = 3;
                else if(comment==3)   // Skip the second character of block comment end
-                       comment=0;
+                       comment = 0;
                else if(!isspace(c) && !comment)
-                       break;
+                       comment = -1;
        }
 
-       if(comment)  // Didn't hit any non-whitespace
-               throw ParseError(src+": Unfinished comment at end of input", src, in.get_line_number());
+       if(comment>0)  // EOF while in comment
+               throw_at(ParseError("Unfinished comment at end of input"), get_location());
+       else if(comment==0)  // Didn't hit any non-whitespace
+               return Token(Token::SPECIAL, "");
 
        enum ParseState
        {
@@ -162,122 +165,132 @@ Token TextParser::parse_token()
                Token::IDENTIFIER
        };
 
-       ParseState state=INIT;
+       ParseState state = INIT;
        string     buf;
-       bool       escape=false;
+       bool       escape = false;
 
-       while(in)
+       while(in || state==INIT)
        {
                if(state!=INIT)
-                       c=in.get();
-               int next=in.peek();
+                       c = in.get();
+               int next = in.peek();
 
-               buf+=c;
+               buf += c;
 
                switch(state)
                {
                case INIT:
                        if(c=='0')
-                               state=ZERO;
+                               state = ZERO;
                        else if(c=='-' || c=='+')
-                               state=SIGN;
+                               state = SIGN;
                        else if(c=='.')
-                               state=FLOAT;
+                               state = FLOAT;
                        else if(c=='"')
-                               state=STRING;
+                               state = STRING;
                        else if(c=='{' || c=='}' || c==';')
                                return Token(Token::SPECIAL, string(1, c));
                        else if(isdigit(c))
-                               state=DECIMAL;
+                               state = DECIMAL;
                        else if(isalpha(c) || c=='_')
-                               state=IDENTIFIER;
+                               state = IDENTIFIER;
                        else
-                               parse_error(c, state);
+                               parse_error(c, "0-9A-Za-z_.\"{};+-");
                        break;
 
                case SIGN:
                        if(c=='0')
-                               state=ZERO;
+                               state = ZERO;
                        else if(isdigit(c))
-                               state=DECIMAL;
+                               state = DECIMAL;
                        else if(c=='.')
-                               state=FLOAT;
+                               state = FLOAT;
                        else
-                               parse_error(c, state);
+                               parse_error(c, "0-9.");
                        break;
 
                case ZERO:
                        if(c=='x')
-                               state=HEXADECIMAL;
+                               state = HEXADECIMAL;
                        else if(isdigit(c))
-                               state=OCTAL;
+                               state = OCTAL;
                        else if(c=='.')
-                               state=FLOAT;
+                               state = FLOAT;
                        else
-                               parse_error(c, state);
+                               parse_error(c, "0-9A-Fa-f.");
                        break;
 
                case DECIMAL:
                        if(c=='.')
-                               state=FLOAT;
+                               state = FLOAT;
                        else if(!isdigit(c))
-                               parse_error(c, state);
+                               parse_error(c, "0-9.");
                        break;
 
                case HEXADECIMAL:
                        if(!isxdigit(c))
-                               parse_error(c, state);
+                               parse_error(c, "0-9A-Fa-f");
                        break;
 
                case OCTAL:
                        if(!isodigit(c))
-                               parse_error(c, state);
+                               parse_error(c, "0-7");
                        break;
 
                case FLOAT:
                        if(c=='e' || c=='E')
-                               state=FLOATEXPINIT;
+                               state = FLOATEXPINIT;
                        else if(!isdigit(c))
-                               parse_error(c, state);
+                               parse_error(c, "0-9Ee");
                        break;
 
                case FLOATEXPINIT:
                        if(c=='+' || c=='-')
-                               state=FLOATEXPSIGN;
+                               state = FLOATEXPSIGN;
                        else if(isdigit(c))
-                               state=FLOATEXP;
+                               state = FLOATEXP;
                        else
-                               parse_error(c, state);
+                               parse_error(c, "0-9+-");
                        break;
 
                case FLOATEXPSIGN:
                        if(isdigit(c))
-                               state=FLOATEXP;
+                               state = FLOATEXP;
                        else
-                               parse_error(c, state);
+                               parse_error(c, "0-9");
                        break;
 
                case FLOATEXP:
                        if(!isdigit(c))
-                               parse_error(c, state);
+                               parse_error(c, "0-9");
                        break;
 
                case STRING:
                        if(c=='\\')
-                               escape=!escape;
+                               escape = !escape;
                        else if(c=='"' && !escape)
-                               return Token(Token::STRING, unescape_string(buf));
+                       {
+                               try
+                               {
+                                       return Token(Token::STRING, c_unescape(buf.substr(1, buf.size()-2)));
+                               }
+                               catch(Exception &e)
+                               {
+                                       e.at(get_location());
+                                       throw;
+                               }
+                       }
                        else
-                               escape=false;
+                               escape = false;
                        break;
 
                case IDENTIFIER:
                        if(!isalpha(c) && !isdigit(c) && c!='_')
-                               parse_error(c, state);
+                               parse_error(c, "0-9A-Za-z_");
                        break;
 
                default:
-                       throw Exception(get_location()+": Internal error (bad state)");
+                       throw_at(InvalidState("Internal error (bad state)"), get_location());
                }
 
                if(is_delimiter(next) && state>=ACCEPT)
@@ -297,57 +310,6 @@ bool TextParser::isodigit(int c)
        return (c>='0' && c<='7');
 }
 
-string TextParser::unescape_string(const string &str)
-{
-       string   result;
-       bool     escape=false;
-       unsigned hexcape=0;
-       for(string::const_iterator i=str.begin()+1; i!=str.end()-1; ++i)
-       {
-               if(escape)
-               {
-                       if(*i=='n')
-                               result+='\n';
-                       else if(*i=='t')
-                               result+='\t';
-                       else if(*i=='\\')
-                               result+='\\';
-                       else if(*i=='"')
-                               result+='"';
-                       else if(*i=='x')
-                               hexcape=0x100;
-                       else
-                               throw ParseError(format("%s: Invalid escape sequence '\\%c'", get_location(), *i), src, in.get_line_number());
-                       escape=false;
-               }
-               else if(hexcape)
-               {
-                       unsigned digit=0;
-                       if(*i>='0' && *i<='9')
-                               digit=*i-'0';
-                       else if(*i>='a' && *i<='f')
-                               digit=*i-'a'+10;
-                       else if(*i>='A' && *i<='F')
-                               digit=*i-'A'+10;
-                       else
-                               throw ParseError(get_location()+": Invalid hex digit", src, in.get_line_number());
-
-                       hexcape=(hexcape<<4)|digit;
-                       if(hexcape&0x10000)
-                       {
-                               result+=hexcape&0xFF;
-                               hexcape=0;
-                       }
-               }
-               else if(*i=='\\')
-                       escape=true;
-               else
-                       result+=*i;
-       }
-
-       return result;
-}
-
 string TextParser::get_location()
 {
        ostringstream ss;
@@ -355,9 +317,9 @@ string TextParser::get_location()
        return ss.str();
 }
 
-void TextParser::parse_error(int c, int state)
+void TextParser::parse_error(int c, const char *e)
 {
-       throw ParseError(format("%s: Parse error at '%c' (state %d)", get_location(), static_cast<char>(c), state), src, in.get_line_number());
+       throw_at(ParseError(format("Parse error at '%c', expected one of \"%s\"", static_cast<char>(c), e)), get_location());
 }
 
 } // namespace DataFile