]> git.tdb.fi Git - libs/datafile.git/blobdiff - source/textparser.cpp
Cosmetic changes
[libs/datafile.git] / source / textparser.cpp
index 225425b4e23166eea05fe0697f16974c327ab1c0..c7b13e93c11e927178929289cf1ebf2164add537 100644 (file)
@@ -1,5 +1,6 @@
 #include <msp/strings/format.h>
 #include <msp/strings/utils.h>
+#include "except.h"
 #include "input.h"
 #include "textparser.h"
 #include "token.h"
@@ -9,42 +10,19 @@ using namespace std;
 namespace Msp {
 namespace DataFile {
 
-class parse_error: public runtime_error
-{
-public:
-       parse_error(const std::string &t):
-               runtime_error(t.empty() ? "at end of input" : format("after '%s'", t))
-       { }
-
-       virtual ~parse_error() throw() { }
-};
-
-
-class syntax_error: public runtime_error
-{
-public:
-       syntax_error(const std::string &t):
-               runtime_error(t.empty() ? "at end of input" : format("at '%s'", t))
-       { }
-
-       virtual ~syntax_error() throw() { }
-};
-
-
 TextParser::TextParser(Input &i, const string &s):
        ParserMode(i, s)
 { }
 
-Statement TextParser::parse(bool)
+Statement TextParser::parse()
 {
-       return parse_statement(0);
+       return parse_statement(nullptr);
 }
 
 Statement TextParser::parse_statement(const Token *t)
 {
        Statement result;
-       bool sub = false;
-       bool finish = false;
+       unsigned sub = 0;
 
        while(in)
        {
@@ -52,7 +30,7 @@ Statement TextParser::parse_statement(const Token *t)
                if(t)
                {
                        token = *t;
-                       t = 0;
+                       t = nullptr;
                }
                else
                        token = parse_token();
@@ -68,44 +46,28 @@ Statement TextParser::parse_statement(const Token *t)
                        result.source = src;
                        result.line = in.get_line_number();
                }
-               else if(sub)
+               else if(sub==1)
                {
                        if(token.str=="}")
-                       {
-                               sub = false;
-                               finish = true;
-                       }
+                               sub = 2;
                        else
                        {
                                Statement ss = parse_statement(&token);
                                result.sub.push_back(ss);
                        }
                }
-               else if(finish)
+               else if(sub==2)
                {
                        if(token.str!=";")
                                throw syntax_error(token.str);
                        break;
                }
                else if(token.str=="{")
-                       sub = true;
+                       sub = 1;
                else if(token.str==";")
                        break;
-               else if(token.type==Token::INTEGER)
-                       result.append(lexical_cast<IntType::Store>(token.str));
-               else if(token.type==Token::FLOAT)
-                       result.append(lexical_cast<FloatType::Store>(token.str));
-               else if(token.type==Token::STRING)
-                       result.append(token.str);
-               else if(token.type==Token::IDENTIFIER)
-               {
-                       if(token.str=="true")
-                               result.append(true);
-                       else if(token.str=="false")
-                               result.append(false);
-                       else
-                               result.append(Symbol(token.str));
-               }
+               else if(token.type!=Token::SPECIAL)
+                       result.append_from_token(token);
                else
                        throw syntax_error(token.str);
        }
@@ -124,9 +86,9 @@ Token TextParser::parse_token()
                c = in.get();
                int next = in.peek();
 
-               if(c=='/' && next=='/')
+               if(c=='/' && next=='/' && !comment)
                        comment = 1;
-               else if(c=='/' && next=='*')
+               else if(c=='/' && next=='*' && !comment)
                        comment = 2;
                else if(c=='\n' && comment==1)
                        comment = 0;
@@ -150,6 +112,8 @@ Token TextParser::parse_token()
                FLOATEXPINIT,
                FLOATEXPSIGN,
                STRING,
+               STRING_ESCAPE,
+               STRING_BASE64,
                ACCEPT,
                ZERO,
                DECIMAL,
@@ -158,6 +122,7 @@ Token TextParser::parse_token()
                FLOAT,
                FLOATEXP,
                STRING_END,
+               STRING_BASE64_PAD,
                IDENTIFIER
        };
 
@@ -169,6 +134,8 @@ Token TextParser::parse_token()
                Token::SPECIAL,
                Token::SPECIAL,
                Token::SPECIAL,
+               Token::SPECIAL,
+               Token::SPECIAL,
                Token::INTEGER,
                Token::INTEGER,
                Token::INTEGER,
@@ -176,12 +143,12 @@ Token TextParser::parse_token()
                Token::FLOAT,
                Token::FLOAT,
                Token::STRING,
+               Token::STRING,
                Token::IDENTIFIER
        };
 
        ParseState state = INIT;
        string buf;
-       bool escape = false;
 
        while(in || state==INIT)
        {
@@ -202,6 +169,8 @@ Token TextParser::parse_token()
                                state = FLOAT;
                        else if(c=='"')
                                state = STRING;
+                       else if(c=='=')
+                               state = STRING_BASE64;
                        else if(c=='{' || c=='}' || c==';')
                                return Token(Token::SPECIAL, string(1, c));
                        else if(isdigit(c))
@@ -283,11 +252,25 @@ Token TextParser::parse_token()
 
                case STRING:
                        if(c=='\\')
-                               escape = !escape;
-                       else if(c=='"' && !escape)
+                               state = STRING_ESCAPE;
+                       else if(c=='"')
                                state = STRING_END;
-                       else
-                               escape = false;
+                       break;
+
+               case STRING_ESCAPE:
+                       state = STRING;
+                       break;
+
+               case STRING_BASE64:
+                       if(c=='=')
+                               state = STRING_BASE64_PAD;
+                       else if(!isalnum(c) && c!='+' && c!='/')
+                               throw parse_error(buf);
+                       break;
+
+               case STRING_BASE64_PAD:
+                       if(c!='=')
+                               throw parse_error(buf);
                        break;
 
                case IDENTIFIER:
@@ -308,6 +291,8 @@ Token TextParser::parse_token()
                                return Token(Token::IDENTIFIER, buf.substr(1));
                        else if(state==STRING_END)
                                return Token(Token::STRING, c_unescape(buf.substr(1, buf.size()-2)));
+                       else if(state==STRING_BASE64 || state==STRING_BASE64_PAD)
+                               return Token(Token::STRING, base64_decode(buf));
                        else
                                return Token(token_type[state], buf);
                }
@@ -326,5 +311,42 @@ bool TextParser::isodigit(int c)
        return (c>='0' && c<='7');
 }
 
+string TextParser::base64_decode(const string &data)
+{
+       string bin;
+       bin.reserve(data.size()*3/4);
+       unsigned accum = 0;
+       unsigned a_bits = 0;
+       for(char c: data)
+       {
+               unsigned d;
+               if(c>='A' && c<='Z')
+                       d = c-'A';
+               else if(c>='a' && c<='z')
+                       d = 26+(c-'a');
+               else if(c>='0' && c<='9')
+                       d = 52+(c-'0');
+               else if(c=='+')
+                       d = 62;
+               else if(c=='/')
+                       d = 63;
+               else if(c=='=')
+                       continue;
+               else
+                       throw invalid_argument("TextParser::base64_decode");
+
+               accum = (accum<<6)|d;
+               a_bits += 6;
+
+               if(a_bits>=8)
+               {
+                       bin += (accum>>(a_bits-8))&0xFF;
+                       a_bits -= 8;
+               }
+       }
+
+       return bin;
+}
+
 } // namespace DataFile
 } // namespace Msp