From 242191b162cd16cc1e67e38069eff3147827a8b6 Mon Sep 17 00:00:00 2001 From: Mikko Rasa Date: Tue, 13 Apr 2021 00:53:03 +0300 Subject: [PATCH] Add support for base64-encoded strings in text format They're much more efficient both space and decoding wise than using C escapes to encode binary data. --- source/textparser.cpp | 56 +++++++++++++++++++++++++++++++++++++++++++ source/textparser.h | 1 + 2 files changed, 57 insertions(+) diff --git a/source/textparser.cpp b/source/textparser.cpp index 7dc4bde..7ea0600 100644 --- a/source/textparser.cpp +++ b/source/textparser.cpp @@ -113,6 +113,7 @@ Token TextParser::parse_token() FLOATEXPSIGN, STRING, STRING_ESCAPE, + STRING_BASE64, ACCEPT, ZERO, DECIMAL, @@ -121,6 +122,7 @@ Token TextParser::parse_token() FLOAT, FLOATEXP, STRING_END, + STRING_BASE64_PAD, IDENTIFIER }; @@ -133,6 +135,7 @@ Token TextParser::parse_token() Token::SPECIAL, Token::SPECIAL, Token::SPECIAL, + Token::SPECIAL, Token::INTEGER, Token::INTEGER, Token::INTEGER, @@ -140,6 +143,7 @@ Token TextParser::parse_token() Token::FLOAT, Token::FLOAT, Token::STRING, + Token::STRING, Token::IDENTIFIER }; @@ -165,6 +169,8 @@ Token TextParser::parse_token() state = FLOAT; else if(c=='"') state = STRING; + else if(c=='=') + state = STRING_BASE64; else if(c=='{' || c=='}' || c==';') return Token(Token::SPECIAL, string(1, c)); else if(isdigit(c)) @@ -255,6 +261,18 @@ Token TextParser::parse_token() state = STRING; break; + case STRING_BASE64: + if(c=='=') + state = STRING_BASE64_PAD; + else if(!isalnum(c) && c!='+' && c!='/') + throw parse_error(buf); + break; + + case STRING_BASE64_PAD: + if(c!='=') + throw parse_error(buf); + break; + case IDENTIFIER: if(!isalpha(c) && !isdigit(c) && c!='_' && c!='-' && c!='/') throw parse_error(buf); @@ -273,6 +291,8 @@ Token TextParser::parse_token() return Token(Token::IDENTIFIER, buf.substr(1)); else if(state==STRING_END) return Token(Token::STRING, c_unescape(buf.substr(1, buf.size()-2))); + else if(state==STRING_BASE64 || state==STRING_BASE64_PAD) + return Token(Token::STRING, base64_decode(buf)); else return Token(token_type[state], buf); } @@ -291,5 +311,41 @@ bool TextParser::isodigit(int c) return (c>='0' && c<='7'); } +string TextParser::base64_decode(const string &data) +{ + string bin; + unsigned accum = 0; + unsigned a_bits = 0; + for(string::const_iterator i=data.begin(); i!=data.end(); ++i) + { + unsigned d; + if(*i>='A' && *i<='Z') + d = *i-'A'; + else if(*i>='a' && *i<='z') + d = 26+(*i-'a'); + else if(*i>='0' && *i<='9') + d = 52+(*i-'0'); + else if(*i=='+') + d = 62; + else if(*i=='/') + d = 63; + else if(*i=='=') + continue; + else + throw invalid_argument("TextParser::base64_decode"); + + accum = (accum<<6)|d; + a_bits += 6; + + if(a_bits>=8) + { + bin += (accum>>(a_bits-8))&0xFF; + a_bits -= 8; + } + } + + return bin; +} + } // namespace DataFile } // namespace Msp diff --git a/source/textparser.h b/source/textparser.h index e8c8922..1eec9c6 100644 --- a/source/textparser.h +++ b/source/textparser.h @@ -19,6 +19,7 @@ protected: Token parse_token(); bool is_delimiter(int); bool isodigit(int); + static std::string base64_decode(const std::string &); }; } // namespace DataFile -- 2.43.0