X-Git-Url: http://git.tdb.fi/?a=blobdiff_plain;f=source%2Ftextparser.cpp;h=05521a90456a617e2e82b1fb664225d512a22af4;hb=29fafaa2c570b0cf92f41eeb534cfb65a841a892;hp=298b7cac363ebd2b75a712b2ea41d18dd17b3801;hpb=27630d44298cb67e075c166f4421288cc8ca117e;p=libs%2Fdatafile.git

diff --git a/source/textparser.cpp b/source/textparser.cpp
index 298b7ca..05521a9 100644
--- a/source/textparser.cpp
+++ b/source/textparser.cpp
@@ -1,11 +1,5 @@
-/* $Id$
-
-This file is part of libmspdatafile
-Copyright Â© 2007  Mikko Rasa, Mikkosoft Productions
-Distributed under the LGPL
-*/
-
-#include <msp/strings/formatter.h>
+#include <msp/strings/format.h>
+#include <msp/strings/utils.h>
 #include "input.h"
 #include "textparser.h"
 #include "token.h"
@@ -15,6 +9,28 @@ using namespace std;
 namespace Msp {
 namespace DataFile {
 
+class parse_error: public runtime_error
+{
+public:
+	parse_error(const std::string &t):
+		runtime_error(t.empty() ? "at end of input" : format("after '%s'", t))
+	{ }
+
+	virtual ~parse_error() throw() { }
+};
+
+
+class syntax_error: public runtime_error
+{
+public:
+	syntax_error(const std::string &t):
+		runtime_error(t.empty() ? "at end of input" : format("at '%s'", t))
+	{ }
+
+	virtual ~syntax_error() throw() { }
+};
+
+
 TextParser::TextParser(Input &i, const string &s):
 	ParserMode(i, s)
 { }
@@ -27,74 +43,71 @@ Statement TextParser::parse()
 Statement TextParser::parse_statement(const Token *t)
 {
 	Statement result;
-	bool      sub=false;
-	bool      finish=false;
+	bool sub = false;
+	bool finish = false;
 
 	while(in)
 	{
 		Token token;
 		if(t)
 		{
-			token=*t;
-			t=0;
+			token = *t;
+			t = 0;
 		}
 		else
-			token=parse_token();
+			token = parse_token();
 
 		if(result.keyword.empty())
 		{
 			if(token.str.empty())
 				break;
 			else if(token.type!=Token::IDENTIFIER)
-				throw ParseError(format("%s: Syntax error at token '%s' (expected an identifier)", get_location(), token.str), src, in.get_line_number());
-			result.keyword=token.str;
-			result.valid=true;
-			result.source=src;
-			result.line=in.get_line_number();
+				throw syntax_error(token.str);
+			result.keyword = token.str;
+			result.valid = true;
+			result.source = src;
+			result.line = in.get_line_number();
 		}
 		else if(sub)
 		{
 			if(token.str=="}")
 			{
-				sub=false;
-				finish=true;
+				sub = false;
+				finish = true;
 			}
 			else
 			{
-				Statement ss=parse_statement(&token);
+				Statement ss = parse_statement(&token);
 				result.sub.push_back(ss);
 			}
 		}
 		else if(finish)
 		{
 			if(token.str!=";")
-				throw ParseError(format("%s: Syntax error at token '%s' (Expected a ';')", get_location(), token.str), src, in.get_line_number());
+				throw syntax_error(token.str);
 			break;
 		}
 		else if(token.str=="{")
-			sub=true;
+			sub = true;
 		else if(token.str==";")
 			break;
 		else if(token.type==Token::INTEGER)
-			result.args.push_back(Value(INTEGER, token.str));
+			result.append(lexical_cast<IntType::Store>(token.str));
 		else if(token.type==Token::FLOAT)
-			result.args.push_back(Value(FLOAT, token.str));
+			result.append(lexical_cast<FloatType::Store>(token.str));
 		else if(token.type==Token::STRING)
-			result.args.push_back(Value(STRING, token.str));
+			result.append(token.str);
 		else if(token.type==Token::IDENTIFIER)
 		{
 			if(token.str=="true")
-				result.args.push_back(Value(BOOLEAN, "1"));
+				result.append(true);
 			else if(token.str=="false")
-				result.args.push_back(Value(BOOLEAN, "0"));
+				result.append(false);
 			else
-				result.args.push_back(Value(ENUM, token.str));
-			//result.args.push_back(resolve_identifiertoken.str);
+				result.append(Symbol(token.str));
 		}
-		else if(token.str=="")
-			throw ParseError(src+": Unexcepted end of input", src, in.get_line_number());
 		else
-			throw ParseError(get_location()+": Syntax error", src, in.get_line_number());
+			throw syntax_error(token.str);
 	}
 
 	return result;
@@ -102,31 +115,33 @@ Statement TextParser::parse_statement(const Token *t)
 
 Token TextParser::parse_token()
 {
-	int c=0;
-	unsigned comment=0;
+	int c = 0;
+	int comment = 0;
 
 	// Skip over comments and whitespace
-	while(in)
+	while(in && comment>=0)
 	{
-		c=in.get();
-		int next=in.peek();
+		c = in.get();
+		int next = in.peek();
 
-		if(c=='/' && next=='/')
-			comment=1;
-		else if(c=='/' && next=='*')
-			comment=2;
+		if(c=='/' && next=='/' && !comment)
+			comment = 1;
+		else if(c=='/' && next=='*' && !comment)
+			comment = 2;
 		else if(c=='\n' && comment==1)
-			comment=0;
+			comment = 0;
 		else if(c=='*' && next=='/' && comment==2)
-			comment=3;
+			comment = 3;
 		else if(comment==3)   // Skip the second character of block comment end
-			comment=0;
-		else if(!isspace(c) && !comment)
-			break;
+			comment = 0;
+		else if(c!=-1 && !isspace(c) && !comment)
+			comment = -1;
 	}
 
-	if(comment)  // Didn't hit any non-whitespace
-		throw ParseError(src+": Unfinished comment at end of input", src, in.get_line_number());
+	if(comment>0)  // EOF while in comment
+		throw parse_error(string());
+	else if(comment==0)  // Didn't hit any non-whitespace
+		return Token(Token::SPECIAL, "");
 
 	enum ParseState
 	{
@@ -142,6 +157,7 @@ Token TextParser::parse_token()
 		OCTAL,
 		FLOAT,
 		FLOATEXP,
+		STRING_END,
 		IDENTIFIER
 	};
 
@@ -151,7 +167,7 @@ Token TextParser::parse_token()
 		Token::SPECIAL,
 		Token::SPECIAL,
 		Token::SPECIAL,
-		Token::STRING,
+		Token::SPECIAL,
 		Token::SPECIAL,
 		Token::INTEGER,
 		Token::INTEGER,
@@ -159,129 +175,142 @@ Token TextParser::parse_token()
 		Token::INTEGER,
 		Token::FLOAT,
 		Token::FLOAT,
+		Token::STRING,
 		Token::IDENTIFIER
 	};
 
-	ParseState state=INIT;
-	string     buf;
-	bool       escape=false;
+	ParseState state = INIT;
+	string buf;
+	bool escape = false;
 
-	while(in)
+	while(in || state==INIT)
 	{
 		if(state!=INIT)
-			c=in.get();
-		int next=in.peek();
+			c = in.get();
+		int next = in.peek();
 
-		buf+=c;
+		buf += c;
 
 		switch(state)
 		{
 		case INIT:
 			if(c=='0')
-				state=ZERO;
+				state = ZERO;
 			else if(c=='-' || c=='+')
-				state=SIGN;
+				state = SIGN;
 			else if(c=='.')
-				state=FLOAT;
+				state = FLOAT;
 			else if(c=='"')
-				state=STRING;
+				state = STRING;
 			else if(c=='{' || c=='}' || c==';')
 				return Token(Token::SPECIAL, string(1, c));
 			else if(isdigit(c))
-				state=DECIMAL;
-			else if(isalpha(c) || c=='_')
-				state=IDENTIFIER;
+				state = DECIMAL;
+			else if(isalpha(c) || c=='_' || c=='\\')
+				state = IDENTIFIER;
 			else
-				parse_error(c, state);
+				throw parse_error(buf);
 			break;
 
 		case SIGN:
 			if(c=='0')
-				state=ZERO;
+				state = ZERO;
 			else if(isdigit(c))
-				state=DECIMAL;
+				state = DECIMAL;
 			else if(c=='.')
-				state=FLOAT;
+				state = FLOAT;
 			else
-				parse_error(c, state);
+				throw parse_error(buf);
 			break;
 
 		case ZERO:
 			if(c=='x')
-				state=HEXADECIMAL;
+				state = HEXADECIMAL;
 			else if(isdigit(c))
-				state=OCTAL;
+				state = OCTAL;
 			else if(c=='.')
-				state=FLOAT;
+				state = FLOAT;
 			else
-				parse_error(c, state);
+				throw parse_error(buf);
 			break;
 
 		case DECIMAL:
 			if(c=='.')
-				state=FLOAT;
+				state = FLOAT;
+			else if(c=='e' || c=='E')
+				state = FLOATEXPINIT;
 			else if(!isdigit(c))
-				parse_error(c, state);
+				throw parse_error(buf);
 			break;
 
 		case HEXADECIMAL:
 			if(!isxdigit(c))
-				parse_error(c, state);
+				throw parse_error(buf);
 			break;
 
 		case OCTAL:
 			if(!isodigit(c))
-				parse_error(c, state);
+				throw parse_error(buf);
 			break;
 
 		case FLOAT:
 			if(c=='e' || c=='E')
-				state=FLOATEXPINIT;
+				state = FLOATEXPINIT;
 			else if(!isdigit(c))
-				parse_error(c, state);
+				throw parse_error(buf);
 			break;
 
 		case FLOATEXPINIT:
 			if(c=='+' || c=='-')
-				state=FLOATEXPSIGN;
+				state = FLOATEXPSIGN;
 			else if(isdigit(c))
-				state=FLOATEXP;
+				state = FLOATEXP;
 			else
-				parse_error(c, state);
+				throw parse_error(buf);
 			break;
 
 		case FLOATEXPSIGN:
 			if(isdigit(c))
-				state=FLOATEXP;
+				state = FLOATEXP;
 			else
-				parse_error(c, state);
+				throw parse_error(buf);
 			break;
 
 		case FLOATEXP:
 			if(!isdigit(c))
-				parse_error(c, state);
+				throw parse_error(buf);
 			break;
 
 		case STRING:
 			if(c=='\\')
-				escape=!escape;
+				escape = !escape;
 			else if(c=='"' && !escape)
-				return Token(Token::STRING, unescape_string(buf));
+				state = STRING_END;
 			else
-				escape=false;
+				escape = false;
 			break;
 
 		case IDENTIFIER:
-			if(!isalpha(c) && !isdigit(c) && c!='_')
-				parse_error(c, state);
+			if(!isalpha(c) && !isdigit(c) && c!='_' && c!='-' && c!='/')
+				throw parse_error(buf);
 			break;
 
+		case STRING_END:
+			throw parse_error(buf);
+
 		default:
-			throw Exception(get_location()+": Internal error (bad state)");
+			throw logic_error("bad parser state");
 		}
 
 		if(is_delimiter(next) && state>=ACCEPT)
-			return Token(token_type[state], buf);
+		{
+			if(state==IDENTIFIER && buf[0]=='\\')
+				return Token(Token::IDENTIFIER, buf.substr(1));
+			else if(state==STRING_END)
+				return Token(Token::STRING, c_unescape(buf.substr(1, buf.size()-2)));
+			else
+				return Token(token_type[state], buf);
+		}
 	}
 
 	return Token(Token::SPECIAL, "");
@@ -297,68 +326,5 @@ bool TextParser::isodigit(int c)
 	return (c>='0' && c<='7');
 }
 
-string TextParser::unescape_string(const string &str)
-{
-	string   result;
-	bool     escape=false;
-	unsigned hexcape=0;
-	for(string::const_iterator i=str.begin()+1; i!=str.end()-1; ++i)
-	{
-		if(escape)
-		{
-			if(*i=='n')
-				result+='\n';
-			else if(*i=='t')
-				result+='\t';
-			else if(*i=='\\')
-				result+='\\';
-			else if(*i=='"')
-				result+='"';
-			else if(*i=='x')
-				hexcape=0x100;
-			else
-				throw ParseError(format("%s: Invalid escape sequence '\\%c'", get_location(), *i), src, in.get_line_number());
-			escape=false;
-		}
-		else if(hexcape)
-		{
-			unsigned digit=0;
-			if(*i>='0' && *i<='9')
-				digit=*i-'0';
-			else if(*i>='a' && *i<='f')
-				digit=*i-'a'+10;
-			else if(*i>='A' && *i<='F')
-				digit=*i-'A'+10;
-			else
-				throw ParseError(get_location()+": Invalid hex digit", src, in.get_line_number());
-
-			hexcape=(hexcape<<4)|digit;
-			if(hexcape&0x10000)
-			{
-				result+=hexcape&0xFF;
-				hexcape=0;
-			}
-		}
-		else if(*i=='\\')
-			escape=true;
-		else
-			result+=*i;
-	}
-
-	return result;
-}
-
-string TextParser::get_location()
-{
-	ostringstream ss;
-	ss<<src<<':'<<in.get_line_number();
-	return ss.str();
-}
-
-void TextParser::parse_error(int c, int state)
-{
-	throw ParseError(format("%s: Parse error at '%c' (state %d)", get_location(), static_cast<char>(c), state), src, in.get_line_number());
-}
-
 } // namespace DataFile
 } // namespace Msp