]> git.tdb.fi Git - libs/datafile.git/commitdiff
Add support for base64-encoded strings in text format
authorMikko Rasa <tdb@tdb.fi>
Mon, 12 Apr 2021 21:53:03 +0000 (00:53 +0300)
committerMikko Rasa <tdb@tdb.fi>
Mon, 12 Apr 2021 21:53:49 +0000 (00:53 +0300)
They're much more efficient both space and decoding wise than using C
escapes to encode binary data.

source/textparser.cpp
source/textparser.h

index 7dc4bde7e46ed659368498798bf0b6b3b62fa246..7ea0600f8565a5fc743fc0d44c10bf76db3af0f9 100644 (file)
@@ -113,6 +113,7 @@ Token TextParser::parse_token()
                FLOATEXPSIGN,
                STRING,
                STRING_ESCAPE,
+               STRING_BASE64,
                ACCEPT,
                ZERO,
                DECIMAL,
@@ -121,6 +122,7 @@ Token TextParser::parse_token()
                FLOAT,
                FLOATEXP,
                STRING_END,
+               STRING_BASE64_PAD,
                IDENTIFIER
        };
 
@@ -133,6 +135,7 @@ Token TextParser::parse_token()
                Token::SPECIAL,
                Token::SPECIAL,
                Token::SPECIAL,
+               Token::SPECIAL,
                Token::INTEGER,
                Token::INTEGER,
                Token::INTEGER,
@@ -140,6 +143,7 @@ Token TextParser::parse_token()
                Token::FLOAT,
                Token::FLOAT,
                Token::STRING,
+               Token::STRING,
                Token::IDENTIFIER
        };
 
@@ -165,6 +169,8 @@ Token TextParser::parse_token()
                                state = FLOAT;
                        else if(c=='"')
                                state = STRING;
+                       else if(c=='=')
+                               state = STRING_BASE64;
                        else if(c=='{' || c=='}' || c==';')
                                return Token(Token::SPECIAL, string(1, c));
                        else if(isdigit(c))
@@ -255,6 +261,18 @@ Token TextParser::parse_token()
                        state = STRING;
                        break;
 
+               case STRING_BASE64:
+                       if(c=='=')
+                               state = STRING_BASE64_PAD;
+                       else if(!isalnum(c) && c!='+' && c!='/')
+                               throw parse_error(buf);
+                       break;
+
+               case STRING_BASE64_PAD:
+                       if(c!='=')
+                               throw parse_error(buf);
+                       break;
+
                case IDENTIFIER:
                        if(!isalpha(c) && !isdigit(c) && c!='_' && c!='-' && c!='/')
                                throw parse_error(buf);
@@ -273,6 +291,8 @@ Token TextParser::parse_token()
                                return Token(Token::IDENTIFIER, buf.substr(1));
                        else if(state==STRING_END)
                                return Token(Token::STRING, c_unescape(buf.substr(1, buf.size()-2)));
+                       else if(state==STRING_BASE64 || state==STRING_BASE64_PAD)
+                               return Token(Token::STRING, base64_decode(buf));
                        else
                                return Token(token_type[state], buf);
                }
@@ -291,5 +311,41 @@ bool TextParser::isodigit(int c)
        return (c>='0' && c<='7');
 }
 
+string TextParser::base64_decode(const string &data)
+{
+       string bin;
+       unsigned accum = 0;
+       unsigned a_bits = 0;
+       for(string::const_iterator i=data.begin(); i!=data.end(); ++i)
+       {
+               unsigned d;
+               if(*i>='A' && *i<='Z')
+                       d = *i-'A';
+               else if(*i>='a' && *i<='z')
+                       d = 26+(*i-'a');
+               else if(*i>='0' && *i<='9')
+                       d = 52+(*i-'0');
+               else if(*i=='+')
+                       d = 62;
+               else if(*i=='/')
+                       d = 63;
+               else if(*i=='=')
+                       continue;
+               else
+                       throw invalid_argument("TextParser::base64_decode");
+
+               accum = (accum<<6)|d;
+               a_bits += 6;
+
+               if(a_bits>=8)
+               {
+                       bin += (accum>>(a_bits-8))&0xFF;
+                       a_bits -= 8;
+               }
+       }
+
+       return bin;
+}
+
 } // namespace DataFile
 } // namespace Msp
index e8c89227ddc7300b7e745faca33a3f08c0089b8b..1eec9c6f0e8a97fe4a8adcf015277b95e0cd937f 100644 (file)
@@ -19,6 +19,7 @@ protected:
        Token parse_token();
        bool is_delimiter(int);
        bool isodigit(int);
+       static std::string base64_decode(const std::string &);
 };
 
 } // namespace DataFile