X-Git-Url: http://git.tdb.fi/?a=blobdiff_plain;f=source%2Ftextparser.cpp;h=b8515a9b487fa2749d78c5c41a734c2f00c1b70f;hb=256b44a5009467171af53316141277027bcc0ba4;hp=00192ccc147ce24f4c181a751a741d8fc1481b3d;hpb=e5d760ccfaaa01884be2424b62e47a24466e0c4b;p=libs%2Fdatafile.git

diff --git a/source/textparser.cpp b/source/textparser.cpp
index 00192cc..b8515a9 100644
--- a/source/textparser.cpp
+++ b/source/textparser.cpp
@@ -1,12 +1,6 @@
-/* $Id$
-
-This file is part of libmspdatafile
-Copyright Â© 2007-2008, 2010  Mikko Rasa, Mikkosoft Productions
-Distributed under the LGPL
-*/
-
-#include <msp/strings/formatter.h>
+#include <msp/strings/format.h>
 #include <msp/strings/utils.h>
+#include "except.h"
 #include "input.h"
 #include "textparser.h"
 #include "token.h"
@@ -28,8 +22,7 @@ Statement TextParser::parse()
 Statement TextParser::parse_statement(const Token *t)
 {
 	Statement result;
-	bool      sub = false;
-	bool      finish = false;
+	unsigned sub = 0;
 
 	while(in)
 	{
@@ -47,54 +40,36 @@ Statement TextParser::parse_statement(const Token *t)
 			if(token.str.empty())
 				break;
 			else if(token.type!=Token::IDENTIFIER)
-				throw_at(ParseError(format("Syntax error at token '%s' (expected an identifier)", token.str)), get_location());
+				throw syntax_error(token.str);
 			result.keyword = token.str;
 			result.valid = true;
 			result.source = src;
 			result.line = in.get_line_number();
 		}
-		else if(sub)
+		else if(sub==1)
 		{
 			if(token.str=="}")
-			{
-				sub = false;
-				finish = true;
-			}
+				sub = 2;
 			else
 			{
 				Statement ss = parse_statement(&token);
 				result.sub.push_back(ss);
 			}
 		}
-		else if(finish)
+		else if(sub==2)
 		{
 			if(token.str!=";")
-				throw_at(ParseError(format("Syntax error at token '%s' (Expected a ';')", token.str)), get_location());
+				throw syntax_error(token.str);
 			break;
 		}
 		else if(token.str=="{")
-			sub = true;
+			sub = 1;
 		else if(token.str==";")
 			break;
-		else if(token.type==Token::INTEGER)
-			result.append(lexical_cast<IntType::Store>(token.str));
-		else if(token.type==Token::FLOAT)
-			result.append(lexical_cast<FloatType::Store>(token.str));
-		else if(token.type==Token::STRING)
-			result.append(token.str);
-		else if(token.type==Token::IDENTIFIER)
-		{
-			if(token.str=="true")
-				result.append(true);
-			else if(token.str=="false")
-				result.append(false);
-			else
-				result.append(Symbol(token.str));
-		}
-		else if(token.str=="")
-			throw_at(ParseError("Unexcepted end of input"), get_location());
+		else if(token.type!=Token::SPECIAL)
+			result.append_from_token(token);
 		else
-			throw_at(ParseError("Syntax error"), get_location());
+			throw syntax_error(token.str);
 	}
 
 	return result;
@@ -111,9 +86,9 @@ Token TextParser::parse_token()
 		c = in.get();
 		int next = in.peek();
 
-		if(c=='/' && next=='/')
+		if(c=='/' && next=='/' && !comment)
 			comment = 1;
-		else if(c=='/' && next=='*')
+		else if(c=='/' && next=='*' && !comment)
 			comment = 2;
 		else if(c=='\n' && comment==1)
 			comment = 0;
@@ -126,7 +101,7 @@ Token TextParser::parse_token()
 	}
 
 	if(comment>0)  // EOF while in comment
-		throw_at(ParseError("Unfinished comment at end of input"), get_location());
+		throw parse_error(string());
 	else if(comment==0)  // Didn't hit any non-whitespace
 		return Token(Token::SPECIAL, "");
 
@@ -137,6 +112,8 @@ Token TextParser::parse_token()
 		FLOATEXPINIT,
 		FLOATEXPSIGN,
 		STRING,
+		STRING_ESCAPE,
+		STRING_BASE64,
 		ACCEPT,
 		ZERO,
 		DECIMAL,
@@ -145,6 +122,7 @@ Token TextParser::parse_token()
 		FLOAT,
 		FLOATEXP,
 		STRING_END,
+		STRING_BASE64_PAD,
 		IDENTIFIER
 	};
 
@@ -156,6 +134,8 @@ Token TextParser::parse_token()
 		Token::SPECIAL,
 		Token::SPECIAL,
 		Token::SPECIAL,
+		Token::SPECIAL,
+		Token::SPECIAL,
 		Token::INTEGER,
 		Token::INTEGER,
 		Token::INTEGER,
@@ -163,12 +143,12 @@ Token TextParser::parse_token()
 		Token::FLOAT,
 		Token::FLOAT,
 		Token::STRING,
+		Token::STRING,
 		Token::IDENTIFIER
 	};
 
 	ParseState state = INIT;
-	string     buf;
-	bool       escape = false;
+	string buf;
 
 	while(in || state==INIT)
 	{
@@ -189,6 +169,8 @@ Token TextParser::parse_token()
 				state = FLOAT;
 			else if(c=='"')
 				state = STRING;
+			else if(c=='=')
+				state = STRING_BASE64;
 			else if(c=='{' || c=='}' || c==';')
 				return Token(Token::SPECIAL, string(1, c));
 			else if(isdigit(c))
@@ -196,7 +178,7 @@ Token TextParser::parse_token()
 			else if(isalpha(c) || c=='_' || c=='\\')
 				state = IDENTIFIER;
 			else
-				parse_error(c, "0-9A-Za-z_\\.\"{};+-");
+				throw parse_error(buf);
 			break;
 
 		case SIGN:
@@ -207,7 +189,7 @@ Token TextParser::parse_token()
 			else if(c=='.')
 				state = FLOAT;
 			else
-				parse_error(c, "0-9.");
+				throw parse_error(buf);
 			break;
 
 		case ZERO:
@@ -218,31 +200,33 @@ Token TextParser::parse_token()
 			else if(c=='.')
 				state = FLOAT;
 			else
-				parse_error(c, "0-9A-Fa-f.");
+				throw parse_error(buf);
 			break;
 
 		case DECIMAL:
 			if(c=='.')
 				state = FLOAT;
+			else if(c=='e' || c=='E')
+				state = FLOATEXPINIT;
 			else if(!isdigit(c))
-				parse_error(c, "0-9.");
+				throw parse_error(buf);
 			break;
 
 		case HEXADECIMAL:
 			if(!isxdigit(c))
-				parse_error(c, "0-9A-Fa-f");
+				throw parse_error(buf);
 			break;
 
 		case OCTAL:
 			if(!isodigit(c))
-				parse_error(c, "0-7");
+				throw parse_error(buf);
 			break;
 
 		case FLOAT:
 			if(c=='e' || c=='E')
 				state = FLOATEXPINIT;
 			else if(!isdigit(c))
-				parse_error(c, "0-9Ee");
+				throw parse_error(buf);
 			break;
 
 		case FLOATEXPINIT:
@@ -251,40 +235,54 @@ Token TextParser::parse_token()
 			else if(isdigit(c))
 				state = FLOATEXP;
 			else
-				parse_error(c, "0-9+-");
+				throw parse_error(buf);
 			break;
 
 		case FLOATEXPSIGN:
 			if(isdigit(c))
 				state = FLOATEXP;
 			else
-				parse_error(c, "0-9");
+				throw parse_error(buf);
 			break;
 
 		case FLOATEXP:
 			if(!isdigit(c))
-				parse_error(c, "0-9");
+				throw parse_error(buf);
 			break;
 
 		case STRING:
 			if(c=='\\')
-				escape = !escape;
-			else if(c=='"' && !escape)
+				state = STRING_ESCAPE;
+			else if(c=='"')
 				state = STRING_END;
-			else
-				escape = false;
+			break;
+
+		case STRING_ESCAPE:
+			state = STRING;
+			break;
+
+		case STRING_BASE64:
+			if(c=='=')
+				state = STRING_BASE64_PAD;
+			else if(!isalnum(c) && c!='+' && c!='/')
+				throw parse_error(buf);
+			break;
+
+		case STRING_BASE64_PAD:
+			if(c!='=')
+				throw parse_error(buf);
 			break;
 
 		case IDENTIFIER:
 			if(!isalpha(c) && !isdigit(c) && c!='_' && c!='-' && c!='/')
-				parse_error(c, "0-9A-Za-z_/-");
+				throw parse_error(buf);
 			break;
 
 		case STRING_END:
-			throw_at(ParseError("Garbage after string"), get_location());
+			throw parse_error(buf);
 
 		default:
-			throw_at(InvalidState("Internal error (bad state)"), get_location());
+			throw logic_error("bad parser state");
 		}
 
 		if(is_delimiter(next) && state>=ACCEPT)
@@ -292,17 +290,9 @@ Token TextParser::parse_token()
 			if(state==IDENTIFIER && buf[0]=='\\')
 				return Token(Token::IDENTIFIER, buf.substr(1));
 			else if(state==STRING_END)
-			{
-				try
-				{
-					return Token(Token::STRING, c_unescape(buf.substr(1, buf.size()-2)));
-				}
-				catch(Exception &e)
-				{
-					e.at(get_location());
-					throw;
-				}
-			}
+				return Token(Token::STRING, c_unescape(buf.substr(1, buf.size()-2)));
+			else if(state==STRING_BASE64 || state==STRING_BASE64_PAD)
+				return Token(Token::STRING, base64_decode(buf));
 			else
 				return Token(token_type[state], buf);
 		}
@@ -321,16 +311,41 @@ bool TextParser::isodigit(int c)
 	return (c>='0' && c<='7');
 }
 
-string TextParser::get_location()
+string TextParser::base64_decode(const string &data)
 {
-	ostringstream ss;
-	ss<<src<<':'<<in.get_line_number();
-	return ss.str();
-}
+	string bin;
+	bin.reserve(data.size()*3/4);
+	unsigned accum = 0;
+	unsigned a_bits = 0;
+	for(char c: data)
+	{
+		unsigned d;
+		if(c>='A' && c<='Z')
+			d = c-'A';
+		else if(c>='a' && c<='z')
+			d = 26+(c-'a');
+		else if(c>='0' && c<='9')
+			d = 52+(c-'0');
+		else if(c=='+')
+			d = 62;
+		else if(c=='/')
+			d = 63;
+		else if(c=='=')
+			continue;
+		else
+			throw invalid_argument("TextParser::base64_decode");
 
-void TextParser::parse_error(int c, const char *e)
-{
-	throw_at(ParseError(format("Parse error at '%c', expected one of \"%s\"", static_cast<char>(c), e)), get_location());
+		accum = (accum<<6)|d;
+		a_bits += 6;
+
+		if(a_bits>=8)
+		{
+			bin += (accum>>(a_bits-8))&0xFF;
+			a_bits -= 8;
+		}
+	}
+
+	return bin;
 }
 
 } // namespace DataFile