description "Mikkosoft Productions datafile library";
require "mspcore";
+ require "mspstrings";
library "mspdatafile"
{
install true;
install_headers "msp/datafile";
};
+
+ program "mspdatatool"
+ {
+ source "tool.cpp";
+ install true;
+ build_info
+ {
+ libpath ".";
+ library "mspdatafile";
+ };
+ };
};
--- /dev/null
+/* $Id$
+
+This file is part of libmspdatafile
+Copyright © 2006 Mikko Rasa, Mikkosoft Productions
+Distributed under the LGPL
+*/
+
+#ifndef MSP_DATAFILE_BINARYDICT_H_
+#define MSP_DATAFILE_BINARYDICT_H_
+
+#include <string>
+
+namespace Msp {
+namespace DataFile {
+
+struct DictEntry
+{
+ std::string keyword;
+ std::string args;
+
+ DictEntry() { }
+ DictEntry(const std::string &k, const std::string &a): keyword(k), args(a) { }
+
+ bool operator<(const DictEntry &o) const
+ { return keyword<o.keyword || (keyword==o.keyword && args<o.args); }
+};
+
+} // namespace DataFile
+} // namespace Msp
+
+#endif
--- /dev/null
+/* $Id$
+
+This file is part of libmspdatafile
+Copyright © 2007 Mikko Rasa, Mikkosoft Productions
+Distributed under the LGPL
+*/
+
+#include <sys/param.h>
+#include <msp/strings/formatter.h>
+#include "binaryparser.h"
+#include "input.h"
+
+using namespace std;
+
+namespace Msp {
+namespace DataFile {
+
+BinaryParser::BinaryParser(Input &i, const string &s):
+ ParserMode(i, s),
+ first(true)
+{
+ dict[1]=DictEntry("__kw", "iss");
+ dict[2]=DictEntry("__enum", "is");
+}
+
+Statement BinaryParser::parse()
+{
+ while(1)
+ {
+ Statement st=parse_statement();
+ if(st.keyword=="__kw")
+ {
+ if(st.args.size()!=3)
+ throw TypeError(src+": Keyword definition must have three arguments");
+
+ const unsigned id=st.args[0].get<unsigned>();
+ const string &kw=st.args[1].get<const string &>();
+ const string &args=st.args[2].get<const string &>();
+ dict[id]=DictEntry(kw, args);
+ }
+ else if(st.keyword=="__enum")
+ {
+ if(st.args.size()!=2)
+ throw TypeError(src+": Enum definition must have three arguments");
+
+ const unsigned id=st.args[0].get<unsigned>();
+ enums[id]=st.args[1].get<const std::string &>();
+ }
+ else
+ return st;
+ }
+}
+
+Statement BinaryParser::parse_statement()
+{
+ while(first && in.peek()=='\n')
+ in.get();
+ first=false;
+
+ unsigned id=parse_int();
+ if(!in)
+ return Statement();
+
+ Dictionary::const_iterator i=dict.find(id);
+ if(i==dict.end())
+ throw ParseError(format("%s: Unknown statement ID %d", src, id), src, 0);
+ const DictEntry &de=i->second;
+
+ Statement result;
+ result.keyword=de.keyword;
+ result.source=src;
+
+ for(unsigned j=0; j<de.args.size(); ++j)
+ {
+ switch(de.args[j])
+ {
+ case 'i':
+ result.args.push_back(parse_int());
+ break;
+ case 'f':
+ result.args.push_back(parse_float());
+ break;
+ case 's':
+ result.args.push_back(parse_string());
+ break;
+ case 'b':
+ result.args.push_back(parse_bool());
+ break;
+ case 'e':
+ result.args.push_back(parse_enum());
+ break;
+ }
+ }
+
+ unsigned nsub=parse_int();
+ for(unsigned j=0; j<nsub; ++j)
+ result.sub.push_back(parse());
+
+ result.valid=true;
+
+ return result;
+}
+
+long long BinaryParser::parse_int()
+{
+ long long result=0;
+ unsigned bits=0;
+
+ while(in)
+ {
+ int c=in.get();
+
+ result=result<<7 | c&0x7F;
+ bits+=7;
+
+ if(!(c&0x80))
+ break;
+ }
+
+ const long long mask=1<<(bits-1);
+ result=(result^mask)-mask;
+
+ return result;
+}
+
+float BinaryParser::parse_float()
+{
+ union
+ {
+ float f;
+ char d[sizeof(float)];
+ };
+
+#if BYTE_ORDER == LITTLE_ENDIAN
+ for(unsigned i=sizeof(float)-1; i--;)
+ d[i]=in.get();
+#else
+ for(unsigned i=0; i<sizeof(float); ++i)
+ d[i]=in.get();
+#endif
+
+ return f;
+}
+
+bool BinaryParser::parse_bool()
+{
+ return in.get();
+}
+
+string BinaryParser::parse_string()
+{
+ unsigned len=parse_int();
+ string result;
+ result.reserve(len);
+ for(unsigned i=0; i<len; ++i)
+ result+=in.get();
+ return result;
+}
+
+string BinaryParser::parse_enum()
+{
+ unsigned id=parse_int();
+ EnumMap::iterator i=enums.find(id);
+ if(i==enums.end())
+ throw KeyError("Unknown enum");
+ return i->second;
+}
+
+} // namespace DataFile
+} // namespace Msp
--- /dev/null
+/* $Id$
+
+This file is part of libmspdatafile
+Copyright © 2007 Mikko Rasa, Mikkosoft Productions
+Distributed under the LGPL
+*/
+
+#ifndef MSP_DATAFILE_BINARYPARSER_H_
+#define MSP_DATAFILE_BINARYPARSER_H_
+
+#include <map>
+#include "binarydict.h"
+#include "parsermode.h"
+
+namespace Msp {
+namespace DataFile {
+
+class BinaryParser: public ParserMode
+{
+private:
+ typedef std::map<unsigned, DictEntry> Dictionary;
+ typedef std::map<unsigned, std::string> EnumMap;
+
+ Dictionary dict;
+ EnumMap enums;
+ bool first;
+
+public:
+ BinaryParser(Input &i, const std::string &s);
+ Statement parse();
+private:
+ Statement parse_statement();
+ long long parse_int();
+ float parse_float();
+ std::string parse_string();
+ bool parse_bool();
+ std::string parse_enum();
+};
+
+} // namespace DataFile
+} // namespace Msp
+
+#endif
--- /dev/null
+/* $Id$
+
+This file is part of libmspdatafile
+Copyright © 2006 Mikko Rasa, Mikkosoft Productions
+Distributed under the LGPL
+*/
+
+#include "binarywriter.h"
+#include "statement.h"
+
+using namespace std;
+
+namespace Msp {
+namespace DataFile {
+
+BinaryWriter::BinaryWriter(ostream &o):
+ WriterMode(o),
+ next_st_id(3),
+ next_enum_id(1)
+{
+ dict[DictEntry("__kw", "iss")]=1;
+ dict[DictEntry("__enum", "is")]=1;
+}
+
+void BinaryWriter::write(const Statement &st)
+{
+ collect_keywords(st);
+ write_(st);
+}
+
+void BinaryWriter::write_(const Statement &st)
+{
+ Dictionary::iterator i=dict.find(create_entry(st));
+ if(i==dict.end())
+ throw InvalidParameterValue("Unknown statement");
+
+ write_int(i->second);
+ for(ValueArray::const_iterator j=st.args.begin(); j!=st.args.end(); ++j)
+ switch(j->get_type())
+ {
+ case INTEGER: write_int (j->get<long long>()); break;
+ case STRING: write_string(j->get<const string &>()); break;
+ case BOOLEAN: write_int (j->get<bool>()); break;
+ case FLOAT: write_float (j->get<float>()); break;
+ case ENUM: write_enum (j->get_raw()); break;
+ }
+
+ write_int(st.sub.size());
+ for(list<Statement>::const_iterator j=st.sub.begin(); j!=st.sub.end(); ++j)
+ write(*j);
+}
+
+DictEntry BinaryWriter::create_entry(const Statement &st)
+{
+ static const char types[]="ifsbe";
+
+ string args;
+ for(ValueArray::const_iterator i=st.args.begin(); i!=st.args.end(); ++i)
+ {
+ if(i->get_type()>=5)
+ throw InvalidParameterValue("Invalid argument type");
+ args+=types[i->get_type()];
+ }
+
+ return DictEntry(st.keyword, args);
+}
+
+void BinaryWriter::collect_keywords(const Statement &st)
+{
+ DictEntry de=create_entry(st);
+
+ if(!dict.count(de))
+ {
+ Statement kst;
+ kst.keyword="__kw";
+ kst.args.push_back(next_st_id);
+ kst.args.push_back(de.keyword);
+ kst.args.push_back(de.args);
+ write_(kst);
+
+ dict.insert(Dictionary::value_type(de, next_st_id++)).first;
+ }
+
+ for(ValueArray::const_iterator i=st.args.begin(); i!=st.args.end(); ++i)
+ if(i->get_type()==ENUM && !enums.count(i->get_raw()))
+ {
+ Statement est;
+ est.keyword="__enum";
+ est.args.push_back(next_enum_id);
+ est.args.push_back(i->get_raw());
+ write_(est);
+
+ enums[i->get_raw()]=next_enum_id++;
+ }
+
+ for(list<Statement>::const_iterator i=st.sub.begin(); i!=st.sub.end(); ++i)
+ collect_keywords(*i);
+}
+
+void BinaryWriter::write_int(long long n)
+{
+ unsigned i=1;
+ for(; n>>(i*7); ++i);
+ for(; i--;)
+ out.put(n>>(i*7) & 0x7F | (i?0x80:0));
+}
+
+void BinaryWriter::write_string(const string &s)
+{
+ write_int(s.size());
+ out.write(s.data(), s.size());
+}
+
+void BinaryWriter::write_float(float f)
+{
+ union
+ {
+ float v;
+ char d[sizeof(float)];
+ };
+
+ v=f;
+#if BYTE_ORDER == LITTLE_ENDIAN
+ for(unsigned i=sizeof(float); i--;)
+ out.put(d[i]);
+#else
+ for(unsigned i=0; i<sizeof(float); ++i)
+ out.put(d[i]);
+#endif
+}
+
+void BinaryWriter::write_enum(const string &e)
+{
+ EnumMap::const_iterator i=enums.find(e);
+ if(i==enums.end())
+ throw InvalidParameterValue("Unknown enum");
+ write_int(i->second);
+}
+
+} // namespace DataFile
+} // namespace Msp
--- /dev/null
+/* $Id$
+
+This file is part of libmspdatafile
+Copyright © 2006 Mikko Rasa, Mikkosoft Productions
+Distributed under the LGPL
+*/
+
+#ifndef MSP_DATAFILE_BINARYWRITER_H_
+#define MSP_DATAFILE_BINARYWRITER_H_
+
+#include <map>
+#include "binarydict.h"
+#include "writermode.h"
+
+namespace Msp {
+namespace DataFile {
+
+class BinaryWriter: public WriterMode
+{
+private:
+ typedef std::map<DictEntry, unsigned> Dictionary;
+ typedef std::map<std::string, unsigned> EnumMap;
+
+ Dictionary dict;
+ unsigned next_st_id;
+ EnumMap enums;
+ unsigned next_enum_id;
+
+public:
+ BinaryWriter(std::ostream &o);
+ void write(const Statement &st);
+private:
+ void write_(const Statement &st);
+ DictEntry create_entry(const Statement &st);
+ void collect_keywords(const Statement &st);
+ void write_int(long long n);
+ void write_string(const std::string &s);
+ void write_float(float f);
+ void write_enum(const std::string &e);
+};
+
+} // namespace DataFile
+} // namespace Msp
+
+#endif
*/
#include <cctype>
#include <sstream>
-#include "error.h"
+#include "binaryparser.h"
#include "parser.h"
#include "statement.h"
-#include "token.h"
+#include "textparser.h"
using namespace std;
Parser::Parser(istream &i, const string &s):
in(i),
src(s),
- good(true)
+ good(true),
+ mode(new TextParser(in, src))
{ }
+Parser::~Parser()
+{
+ delete mode;
+}
+
Statement Parser::parse()
{
if(!good)
try
{
- return parse_(0);
- }
- catch(const Exception &e)
- {
- good=false;
- throw;
- }
-}
-
-Statement Parser::parse_(const Token *t)
-{
- Statement result;
- bool sub=false;
- bool finish=false;
-
- while(in)
- {
- Token token;
- if(t)
- {
- token=*t;
- t=0;
- }
- else
- token=parse_token();
-
- if(result.keyword.empty())
- {
- if(token.str.empty())
- break;
- else if(token.type!=Token::IDENTIFIER)
- throw ParseError(get_location()+": Syntax error at token '"+token.str+"' (expected an identifier)", src, in.get_line_number());
- result.keyword=token.str;
- result.valid=true;
- result.source=src;
- result.line=in.get_line_number();
- }
- else if(sub)
+ while(1)
{
- if(token.str=="}")
+ Statement st=mode->parse();
+ if(st.keyword=="__bin")
{
- sub=false;
- finish=true;
+ delete mode;
+ mode=new BinaryParser(in, src);
}
- else
+ else if(st.keyword=="__text")
{
- Statement ss=parse_(&token);
- result.sub.push_back(ss);
+ delete mode;
+ mode=new TextParser(in, src);
}
- }
- else if(finish)
- {
- if(token.str!=";")
- throw ParseError(get_location()+": Syntax error at token '"+token.str+"' (Expected a ';')", src, in.get_line_number());
- break;
- }
- else if(token.str=="{")
- sub=true;
- else if(token.str==";")
- break;
- else if(token.type==Token::INTEGER)
- result.args.push_back(Value(Value::INTEGER, token.str));
- else if(token.type==Token::FLOAT)
- result.args.push_back(Value(Value::FLOAT, token.str));
- else if(token.type==Token::STRING)
- result.args.push_back(Value(Value::STRING, token.str));
- else if(token.type==Token::IDENTIFIER)
- {
- if(token.str=="true")
- result.args.push_back(Value(Value::BOOLEAN, "1"));
- else if(token.str=="false")
- result.args.push_back(Value(Value::BOOLEAN, "0"));
else
- result.args.push_back(Value(Value::ENUM, token.str));
- //result.args.push_back(resolve_identifiertoken.str);
+ return st;
}
- else if(token.str=="")
- throw ParseError(src+": Unexcepted EOF", src, in.get_line_number());
- else
- throw ParseError(get_location()+": Syntax error", src, in.get_line_number());
}
-
- return result;
-}
-
-Token Parser::parse_token()
-{
- int c=0;
- unsigned comment=0;
-
- // Skip over comments and whitespace
- while(in)
- {
- c=in.get();
- int next=in.peek();
-
- if(c=='/' && next=='/')
- comment=1;
- else if(c=='/' && next=='*')
- comment=2;
- else if(c=='\n' && comment==1)
- comment=0;
- else if(c=='*' && next=='/' && comment==2)
- comment=3;
- else if(comment==3) // Skip the second character of block comment end
- comment=0;
- else if(!isspace(c) && !comment)
- break;
- }
-
- if(comment) // Didn't hit any non-whitespace
- throw ParseError(src+": Unfinished comment", src, in.get_line_number());
-
- enum ParseState
- {
- INIT,
- SIGN,
- FLOATEXPINIT,
- FLOATEXPSIGN,
- STRING,
- ACCEPT,
- ZERO,
- DECIMAL,
- HEXADECIMAL,
- OCTAL,
- FLOAT,
- FLOATEXP,
- IDENTIFIER
- };
-
- static Token::Type token_type[]=
- {
- Token::SPECIAL,
- Token::SPECIAL,
- Token::SPECIAL,
- Token::SPECIAL,
- Token::STRING,
- Token::SPECIAL,
- Token::INTEGER,
- Token::INTEGER,
- Token::INTEGER,
- Token::INTEGER,
- Token::FLOAT,
- Token::FLOAT,
- Token::IDENTIFIER
- };
-
- ParseState state=INIT;
- string buf;
- bool escape=false;
-
- while(in)
- {
- if(state!=INIT)
- c=in.get();
- int next=in.peek();
-
- buf+=c;
-
- switch(state)
- {
- case INIT:
- if(c=='0')
- state=ZERO;
- else if(c=='-' || c=='+')
- state=SIGN;
- else if(c=='.')
- state=FLOAT;
- else if(c=='"')
- state=STRING;
- else if(c=='{' || c=='}' || c==';')
- return Token(Token::SPECIAL, string(1, c));
- else if(isdigit(c))
- state=DECIMAL;
- else if(isalpha(c))
- state=IDENTIFIER;
- else
- parse_error(c, state);
- break;
-
- case SIGN:
- if(c=='0')
- state=ZERO;
- else if(isdigit(c))
- state=DECIMAL;
- else if(c=='.')
- state=FLOAT;
- else
- parse_error(c, state);
- break;
-
- case ZERO:
- if(c=='x')
- state=HEXADECIMAL;
- else if(isdigit(c))
- state=OCTAL;
- else if(c=='.')
- state=FLOAT;
- else
- parse_error(c, state);
- break;
-
- case DECIMAL:
- if(c=='.')
- state=FLOAT;
- else if(!isdigit(c))
- parse_error(c, state);
- break;
-
- case HEXADECIMAL:
- if(!isxdigit(c))
- parse_error(c, state);
- break;
-
- case OCTAL:
- if(!isodigit(c))
- parse_error(c, state);
- break;
-
- case FLOAT:
- if(c=='e' || c=='E')
- state=FLOATEXPINIT;
- else if(!isdigit(c))
- parse_error(c, state);
- break;
-
- case FLOATEXPINIT:
- if(c=='+' || c=='-')
- state=FLOATEXPSIGN;
- else if(isdigit(c))
- state=FLOATEXP;
- else
- parse_error(c, state);
- break;
-
- case FLOATEXPSIGN:
- if(isdigit(c))
- state=FLOATEXP;
- else
- parse_error(c, state);
- break;
-
- case FLOATEXP:
- if(!isdigit(c))
- parse_error(c, state);
- break;
-
- case STRING:
- if(c=='\\')
- escape=!escape;
- else if(c=='"' && !escape)
- return Token(Token::STRING, unescape_string(buf));
- else
- escape=false;
- break;
-
- case IDENTIFIER:
- if(!isalpha(c) && !isdigit(c) && c!='_')
- parse_error(c, state);
- break;
-
- default:
- throw Exception(get_location()+": Internal error (bad state)");
- }
-
- if(is_delimiter(next) && state>=ACCEPT)
- return Token(token_type[state], buf);
- }
-
- return Token(Token::SPECIAL, "");
-}
-
-bool Parser::is_delimiter(int c)
-{
- return (isspace(c) || c=='{' || c=='}' || c==';' || c=='/');
-}
-
-bool Parser::isodigit(int c)
-{
- return (c>='0' && c<='7');
-}
-
-string Parser::unescape_string(const string &str)
-{
- string result;
- bool escape=false;
- unsigned hexcape=0;
- for(string::const_iterator i=str.begin()+1; i!=str.end()-1; ++i)
+ catch(const Exception &e)
{
- if(escape)
- {
- if(*i=='n')
- result+='\n';
- else if(*i=='t')
- result+='\t';
- else if(*i=='\\')
- result+='\\';
- else if(*i=='"')
- result+='"';
- else if(*i=='x')
- hexcape=0x100;
- else
- throw ParseError("Invalid escape", src, in.get_line_number());
- escape=false;
- }
- else if(hexcape)
- {
- unsigned digit=0;
- if(*i>='0' && *i<='9')
- digit=*i-'0';
- else if(*i>='a' && *i<='f')
- digit=*i-'a'+10;
- else if(*i>='A' && *i<='F')
- digit=*i-'A'+10;
- else
- throw ParseError("Invalid hex digit", src, in.get_line_number());
-
- hexcape=(hexcape<<4)|digit;
- if(hexcape&0x10000)
- {
- result+=hexcape&0xFF;
- hexcape=0;
- }
- }
- else if(*i=='\\')
- escape=true;
- else
- result+=*i;
+ good=false;
+ throw;
}
-
- return result;
-}
-
-string Parser::get_location()
-{
- ostringstream ss;
- ss<<src<<':'<<in.get_line_number();
- return ss.str();
-}
-
-void Parser::parse_error(int c, int state)
-{
- ostringstream ss;
- ss<<get_location()<<": Parse error at '"<<(char)c<<"' (state "<<state<<')';
- throw ParseError(ss.str(), src, in.get_line_number());
}
} // namespace DataFile
/* $Id$
This file is part of libmspdatafile
-Copyright © 2006 Mikko Rasa, Mikkosoft Productions
+Copyright © 2006-2007 Mikko Rasa, Mikkosoft Productions
Distributed under the LGPL
*/
+
#ifndef MSP_DATAFILE_PARSER_H_
#define MSP_DATAFILE_PARSER_H_
namespace Msp {
namespace DataFile {
+class ParserMode;
class Statement;
struct Token;
{
public:
Parser(std::istream &, const std::string &);
+ ~Parser();
+
Statement parse();
operator bool() const { return in; }
private:
Input in;
std::string src;
bool good;
-
- Statement parse_(const Token *);
- Token parse_token();
- bool is_delimiter(int);
- bool isodigit(int);
- std::string unescape_string(const std::string &);
- std::string get_location();
- void parse_error(int, int);
+ ParserMode *mode;
};
} // namespace DataFile
--- /dev/null
+/* $Id$
+
+This file is part of libmspdatafile
+Copyright © 2007 Mikko Rasa, Mikkosoft Productions
+Distributed under the LGPL
+*/
+
+#ifndef MSP_DATAFILE_PARSERMODE_H_
+#define MSP_DATAFILE_PARSERMODE_H_
+
+#include "statement.h"
+
+namespace Msp {
+namespace DataFile {
+
+class Input;
+
+class ParserMode
+{
+protected:
+ Input ∈
+ std::string src;
+
+ ParserMode(Input &i, const std::string &s): in(i), src(s) { }
+public:
+ virtual ~ParserMode() { }
+
+ virtual Statement parse() =0;
+};
+
+} // namespace DataFile
+} // namespace Msp
+
+#endif
--- /dev/null
+/* $Id: statement.h 19 2007-08-21 14:11:23Z tdb $
+
+This file is part of libmspdatafile
+Copyright © 2006 Mikko Rasa, Mikkosoft Productions
+Distributed under the LGPL
+*/
+
+#include <msp/strings/formatter.h>
+#include "statement.h"
+
+namespace Msp {
+namespace DataFile {
+
+std::string Statement::get_location() const
+{
+ std::string result=source;
+ if(line)
+ result+=format(":%d", line);
+ return result;
+}
+
+} // namespace DataFile
+} // namespace Msp
#define MSP_DATAFILE_STATEMENT_H_
#include <list>
-#include <sstream>
-#include <vector>
#include "value.h"
namespace Msp {
std::list<Statement> sub;
Statement(): valid(false), line(0) { }
- std::string get_location() const
- { std::ostringstream ss; ss<<source<<':'<<line; return ss.str(); }
+ std::string get_location() const;
};
} // namespace DataFile
--- /dev/null
+/* $Id$
+
+This file is part of libmspdatafile
+Copyright © 2007 Mikko Rasa, Mikkosoft Productions
+Distributed under the LGPL
+*/
+
+#include <msp/strings/formatter.h>
+#include "input.h"
+#include "textparser.h"
+#include "token.h"
+
+using namespace std;
+
+namespace Msp {
+namespace DataFile {
+
+TextParser::TextParser(Input &i, const string &s):
+ ParserMode(i, s)
+{ }
+
+Statement TextParser::parse()
+{
+ return parse_statement(0);
+}
+
+Statement TextParser::parse_statement(const Token *t)
+{
+ Statement result;
+ bool sub=false;
+ bool finish=false;
+
+ while(in)
+ {
+ Token token;
+ if(t)
+ {
+ token=*t;
+ t=0;
+ }
+ else
+ token=parse_token();
+
+ if(result.keyword.empty())
+ {
+ if(token.str.empty())
+ break;
+ else if(token.type!=Token::IDENTIFIER)
+ throw ParseError(format("%s: Syntax error at token '%s' (expected an identifier)", get_location(), token.str), src, in.get_line_number());
+ result.keyword=token.str;
+ result.valid=true;
+ result.source=src;
+ result.line=in.get_line_number();
+ }
+ else if(sub)
+ {
+ if(token.str=="}")
+ {
+ sub=false;
+ finish=true;
+ }
+ else
+ {
+ Statement ss=parse_statement(&token);
+ result.sub.push_back(ss);
+ }
+ }
+ else if(finish)
+ {
+ if(token.str!=";")
+ throw ParseError(format("%s: Syntax error at token '%s' (Expected a ';')", get_location(), token.str), src, in.get_line_number());
+ break;
+ }
+ else if(token.str=="{")
+ sub=true;
+ else if(token.str==";")
+ break;
+ else if(token.type==Token::INTEGER)
+ result.args.push_back(Value(INTEGER, token.str));
+ else if(token.type==Token::FLOAT)
+ result.args.push_back(Value(FLOAT, token.str));
+ else if(token.type==Token::STRING)
+ result.args.push_back(Value(STRING, token.str));
+ else if(token.type==Token::IDENTIFIER)
+ {
+ if(token.str=="true")
+ result.args.push_back(Value(BOOLEAN, "1"));
+ else if(token.str=="false")
+ result.args.push_back(Value(BOOLEAN, "0"));
+ else
+ result.args.push_back(Value(ENUM, token.str));
+ //result.args.push_back(resolve_identifiertoken.str);
+ }
+ else if(token.str=="")
+ throw ParseError(src+": Unexcepted end of input", src, in.get_line_number());
+ else
+ throw ParseError(get_location()+": Syntax error", src, in.get_line_number());
+ }
+
+ return result;
+}
+
+Token TextParser::parse_token()
+{
+ int c=0;
+ unsigned comment=0;
+
+ // Skip over comments and whitespace
+ while(in)
+ {
+ c=in.get();
+ int next=in.peek();
+
+ if(c=='/' && next=='/')
+ comment=1;
+ else if(c=='/' && next=='*')
+ comment=2;
+ else if(c=='\n' && comment==1)
+ comment=0;
+ else if(c=='*' && next=='/' && comment==2)
+ comment=3;
+ else if(comment==3) // Skip the second character of block comment end
+ comment=0;
+ else if(!isspace(c) && !comment)
+ break;
+ }
+
+ if(comment) // Didn't hit any non-whitespace
+ throw ParseError(src+": Unfinished comment at end of input", src, in.get_line_number());
+
+ enum ParseState
+ {
+ INIT,
+ SIGN,
+ FLOATEXPINIT,
+ FLOATEXPSIGN,
+ STRING,
+ ACCEPT,
+ ZERO,
+ DECIMAL,
+ HEXADECIMAL,
+ OCTAL,
+ FLOAT,
+ FLOATEXP,
+ IDENTIFIER
+ };
+
+ static Token::Type token_type[]=
+ {
+ Token::SPECIAL,
+ Token::SPECIAL,
+ Token::SPECIAL,
+ Token::SPECIAL,
+ Token::STRING,
+ Token::SPECIAL,
+ Token::INTEGER,
+ Token::INTEGER,
+ Token::INTEGER,
+ Token::INTEGER,
+ Token::FLOAT,
+ Token::FLOAT,
+ Token::IDENTIFIER
+ };
+
+ ParseState state=INIT;
+ string buf;
+ bool escape=false;
+
+ while(in)
+ {
+ if(state!=INIT)
+ c=in.get();
+ int next=in.peek();
+
+ buf+=c;
+
+ switch(state)
+ {
+ case INIT:
+ if(c=='0')
+ state=ZERO;
+ else if(c=='-' || c=='+')
+ state=SIGN;
+ else if(c=='.')
+ state=FLOAT;
+ else if(c=='"')
+ state=STRING;
+ else if(c=='{' || c=='}' || c==';')
+ return Token(Token::SPECIAL, string(1, c));
+ else if(isdigit(c))
+ state=DECIMAL;
+ else if(isalpha(c) || c=='_')
+ state=IDENTIFIER;
+ else
+ parse_error(c, state);
+ break;
+
+ case SIGN:
+ if(c=='0')
+ state=ZERO;
+ else if(isdigit(c))
+ state=DECIMAL;
+ else if(c=='.')
+ state=FLOAT;
+ else
+ parse_error(c, state);
+ break;
+
+ case ZERO:
+ if(c=='x')
+ state=HEXADECIMAL;
+ else if(isdigit(c))
+ state=OCTAL;
+ else if(c=='.')
+ state=FLOAT;
+ else
+ parse_error(c, state);
+ break;
+
+ case DECIMAL:
+ if(c=='.')
+ state=FLOAT;
+ else if(!isdigit(c))
+ parse_error(c, state);
+ break;
+
+ case HEXADECIMAL:
+ if(!isxdigit(c))
+ parse_error(c, state);
+ break;
+
+ case OCTAL:
+ if(!isodigit(c))
+ parse_error(c, state);
+ break;
+
+ case FLOAT:
+ if(c=='e' || c=='E')
+ state=FLOATEXPINIT;
+ else if(!isdigit(c))
+ parse_error(c, state);
+ break;
+
+ case FLOATEXPINIT:
+ if(c=='+' || c=='-')
+ state=FLOATEXPSIGN;
+ else if(isdigit(c))
+ state=FLOATEXP;
+ else
+ parse_error(c, state);
+ break;
+
+ case FLOATEXPSIGN:
+ if(isdigit(c))
+ state=FLOATEXP;
+ else
+ parse_error(c, state);
+ break;
+
+ case FLOATEXP:
+ if(!isdigit(c))
+ parse_error(c, state);
+ break;
+
+ case STRING:
+ if(c=='\\')
+ escape=!escape;
+ else if(c=='"' && !escape)
+ return Token(Token::STRING, unescape_string(buf));
+ else
+ escape=false;
+ break;
+
+ case IDENTIFIER:
+ if(!isalpha(c) && !isdigit(c) && c!='_')
+ parse_error(c, state);
+ break;
+
+ default:
+ throw Exception(get_location()+": Internal error (bad state)");
+ }
+
+ if(is_delimiter(next) && state>=ACCEPT)
+ return Token(token_type[state], buf);
+ }
+
+ return Token(Token::SPECIAL, "");
+}
+
+bool TextParser::is_delimiter(int c)
+{
+ return (isspace(c) || c=='{' || c=='}' || c==';' || c=='/');
+}
+
+bool TextParser::isodigit(int c)
+{
+ return (c>='0' && c<='7');
+}
+
+string TextParser::unescape_string(const string &str)
+{
+ string result;
+ bool escape=false;
+ unsigned hexcape=0;
+ for(string::const_iterator i=str.begin()+1; i!=str.end()-1; ++i)
+ {
+ if(escape)
+ {
+ if(*i=='n')
+ result+='\n';
+ else if(*i=='t')
+ result+='\t';
+ else if(*i=='\\')
+ result+='\\';
+ else if(*i=='"')
+ result+='"';
+ else if(*i=='x')
+ hexcape=0x100;
+ else
+ throw ParseError(format("%s: Invalid escape sequence '\\%c'", get_location(), *i), src, in.get_line_number());
+ escape=false;
+ }
+ else if(hexcape)
+ {
+ unsigned digit=0;
+ if(*i>='0' && *i<='9')
+ digit=*i-'0';
+ else if(*i>='a' && *i<='f')
+ digit=*i-'a'+10;
+ else if(*i>='A' && *i<='F')
+ digit=*i-'A'+10;
+ else
+ throw ParseError(get_location()+": Invalid hex digit", src, in.get_line_number());
+
+ hexcape=(hexcape<<4)|digit;
+ if(hexcape&0x10000)
+ {
+ result+=hexcape&0xFF;
+ hexcape=0;
+ }
+ }
+ else if(*i=='\\')
+ escape=true;
+ else
+ result+=*i;
+ }
+
+ return result;
+}
+
+string TextParser::get_location()
+{
+ ostringstream ss;
+ ss<<src<<':'<<in.get_line_number();
+ return ss.str();
+}
+
+void TextParser::parse_error(int c, int state)
+{
+ throw ParseError(format("%s: Parse error at '%c' (state %d)", get_location(), static_cast<char>(c), state), src, in.get_line_number());
+}
+
+} // namespace DataFile
+} // namespace Msp
--- /dev/null
+/* $Id$
+
+This file is part of libmspdatafile
+Copyright © 2007 Mikko Rasa, Mikkosoft Productions
+Distributed under the LGPL
+*/
+
+#ifndef MSP_DATAFILE_TEXTPARSER_H_
+#define MSP_DATAFILE_TEXTPARSER_H_
+
+#include "parsermode.h"
+
+namespace Msp {
+namespace DataFile {
+
+class Token;
+
+class TextParser: public ParserMode
+{
+public:
+ TextParser(Input &, const std::string &);
+ virtual Statement parse();
+protected:
+ Statement parse_statement(const Token *);
+ Token parse_token();
+ bool is_delimiter(int);
+ bool isodigit(int);
+ std::string unescape_string(const std::string &);
+ std::string get_location();
+ void parse_error(int, int);
+};
+
+} // namespace DataFile
+} // namespace Msp
+
+#endif
--- /dev/null
+/* $Id$
+
+This file is part of libmspdatafile
+Copyright © 2006 Mikko Rasa, Mikkosoft Productions
+Distributed under the LGPL
+*/
+
+#include "statement.h"
+#include "textwriter.h"
+
+using namespace std;
+
+namespace Msp {
+namespace DataFile {
+
+TextWriter::TextWriter(ostream &o):
+ WriterMode(o)
+{ }
+
+void TextWriter::write(const Statement &st)
+{
+ write_(st, 0);
+}
+
+void TextWriter::write_(const Statement &st, unsigned level)
+{
+ string indent(level, '\t');
+
+ out<<indent<<st.keyword;
+ for(ValueArray::const_iterator i=st.args.begin(); i!=st.args.end(); ++i)
+ {
+ out<<' ';
+ if(i->get_type()==STRING)
+ out<<'\"'<<i->get_raw()<<'\"';
+ else if(i->get_type()==BOOLEAN)
+ out<<(i->get<bool>() ? "true" : "false");
+ else
+ out<<i->get_raw();
+ }
+ if(!st.sub.empty())
+ {
+ out<<'\n'<<indent<<"{\n";
+ for(list<Statement>::const_iterator i=st.sub.begin(); i!=st.sub.end(); ++i)
+ write_(*i, level+1);
+ out<<indent<<'}';
+ }
+ out<<";\n";
+}
+
+} // namespace DataFile
+} // namespace Msp
--- /dev/null
+/* $Id$
+
+This file is part of libmspdatafile
+Copyright © 2006 Mikko Rasa, Mikkosoft Productions
+Distributed under the LGPL
+*/
+
+#ifndef MSP_DATAFILE_TEXTWRITER_H_
+#define MSP_DATAFILE_TEXTWRITER_H_
+
+#include "writermode.h"
+
+namespace Msp {
+namespace DataFile {
+
+class TextWriter: public WriterMode
+{
+public:
+ TextWriter(std::ostream &o);
+ void write(const Statement &st);
+private:
+ void write_(const Statement &st, unsigned);
+};
+
+} // namespace DataFile
+} // namespace Msp
+
+#endif
#ifndef MSP_DATAFILE_VALUE_H_
#define MSP_DATAFILE_VALUE_H_
-#include <sstream>
-#include <string>
#include <vector>
+#include <msp/strings/lexicalcast.h>
#include "error.h"
namespace Msp {
namespace DataFile {
+enum Type
+{
+ INTEGER,
+ FLOAT,
+ STRING,
+ BOOLEAN,
+ ENUM
+};
+
+template<typename T> struct TypeResolver { static const Type type=ENUM; };
+template<> struct TypeResolver<short> { static const Type type=INTEGER; };
+template<> struct TypeResolver<unsigned short> { static const Type type=INTEGER; };
+template<> struct TypeResolver<int> { static const Type type=INTEGER; };
+template<> struct TypeResolver<unsigned> { static const Type type=INTEGER; };
+template<> struct TypeResolver<long> { static const Type type=INTEGER; };
+template<> struct TypeResolver<unsigned long> { static const Type type=INTEGER; };
+template<> struct TypeResolver<long long> { static const Type type=INTEGER; };
+template<> struct TypeResolver<unsigned long long> { static const Type type=INTEGER; };
+template<> struct TypeResolver<float> { static const Type type=FLOAT; };
+template<> struct TypeResolver<double> { static const Type type=FLOAT; };
+template<> struct TypeResolver<bool> { static const Type type=BOOLEAN; };
+template<> struct TypeResolver<std::string> { static const Type type=STRING; };
+template<typename T> struct TypeResolver<const T> { static const Type type=TypeResolver<T>::type; };
+template<typename T> struct TypeResolver<T &> { static const Type type=TypeResolver<T>::type; };
+template<typename T> struct TypeResolver<const T &> { static const Type type=TypeResolver<T>::type; };
+
class Value
{
public:
- enum Type
- {
- INTEGER,
- FLOAT,
- STRING,
- BOOLEAN,
- ENUM
- };
-
Value(Type t, const std::string &d): type(t), data(d) { }
+
+ template<typename T>
+ Value(T d): type(TypeResolver<T>::type), data(lexical_cast(d)) { }
+
template<typename T>
T get() const;
+
+ Type get_type() const { return type; }
+ const std::string &get_raw() const { return data; }
private:
Type type;
std::string data;
};
-typedef std::vector<Value> ValueArray;
-
-template<typename T> struct TypeResolver { static const Value::Type type=Value::ENUM; };
-
-template<> struct TypeResolver<short> { static const Value::Type type=Value::INTEGER; };
-template<> struct TypeResolver<unsigned short> { static const Value::Type type=Value::INTEGER; };
-template<> struct TypeResolver<int> { static const Value::Type type=Value::INTEGER; };
-template<> struct TypeResolver<unsigned> { static const Value::Type type=Value::INTEGER; };
-template<> struct TypeResolver<long> { static const Value::Type type=Value::INTEGER; };
-template<> struct TypeResolver<unsigned long> { static const Value::Type type=Value::INTEGER; };
-template<> struct TypeResolver<float> { static const Value::Type type=Value::FLOAT; };
-template<> struct TypeResolver<double> { static const Value::Type type=Value::FLOAT; };
-template<> struct TypeResolver<bool> { static const Value::Type type=Value::BOOLEAN; };
-template<Value::Type T> inline bool check_type(Value::Type) { return false; }
+typedef std::vector<Value> ValueArray;
-template<> inline bool check_type<Value::INTEGER>(Value::Type t) { return t==Value::INTEGER; }
-template<> inline bool check_type<Value::FLOAT>(Value::Type t) { return t==Value::INTEGER || t==Value::FLOAT; }
-template<> inline bool check_type<Value::BOOLEAN>(Value::Type t) { return t==Value::BOOLEAN; }
-template<> inline bool check_type<Value::STRING>(Value::Type t) { return t==Value::STRING; }
-template<> inline bool check_type<Value::ENUM>(Value::Type t) { return t==Value::ENUM; }
+template<Type T> inline bool check_type(Type t) { return t==T; }
+template<> inline bool check_type<FLOAT>(Type t) { return t==INTEGER || t==FLOAT; }
template<typename T>
inline T Value::get() const
inline std::string Value::get<std::string>() const
{
if(type!=STRING)
- throw TypeError("Value is not a string");
+ throw TypeError("Type mismatch");
return data;
}
inline const std::string &Value::get<const std::string&>() const
{
if(type!=STRING)
- throw TypeError("Value is not a string");
+ throw TypeError("Type mismatch");
return data;
}
--- /dev/null
+/* $Id$
+
+This file is part of libmspdatafile
+Copyright © 2006 Mikko Rasa, Mikkosoft Productions
+Distributed under the LGPL
+*/
+
+#include "binarywriter.h"
+#include "statement.h"
+#include "textwriter.h"
+#include "writer.h"
+
+using namespace std;
+
+namespace Msp {
+namespace DataFile {
+
+Writer::Writer(ostream &o):
+ out(o),
+ mode(new TextWriter(out)),
+ binary(false)
+{ }
+
+void Writer::write(const Statement &st)
+{
+ mode->write(st);
+}
+
+void Writer::set_binary(bool b)
+{
+ if(b==binary)
+ return;
+
+ binary=b;
+
+ Statement st;
+ if(binary)
+ st.keyword="__bin";
+ else
+ st.keyword="__text";
+
+ mode->write(st);
+
+ delete mode;
+ if(binary)
+ mode=new BinaryWriter(out);
+ else
+ mode=new TextWriter(out);
+}
+
+} // namespace DataFile
+} // namespace Msp
--- /dev/null
+/* $Id$
+
+This file is part of libmspdatafile
+Copyright © 2006 Mikko Rasa, Mikkosoft Productions
+Distributed under the LGPL
+*/
+
+#ifndef MSP_DATAFILE_WRITER_H_
+#define MSP_DATAFILE_WRITER_H_
+
+#include <map>
+#include <ostream>
+#include "binarydict.h"
+
+namespace Msp {
+namespace DataFile {
+
+class Statement;
+class WriterMode;
+
+class Writer
+{
+private:
+ std::ostream &out;
+ WriterMode *mode;
+ bool binary;
+
+public:
+ Writer(std::ostream &);
+ void write(const Statement &);
+ void set_binary(bool);
+};
+
+} // namespace DataFile
+} // namespace Msp
+
+#endif
--- /dev/null
+/* $Id$
+
+This file is part of libmspdatafile
+Copyright © 2006 Mikko Rasa, Mikkosoft Productions
+Distributed under the LGPL
+*/
+
+#ifndef MSP_DATAFILE_WRITERMODE_H_
+#define MSP_DATAFILE_WRITERMODE_H_
+
+#include <ostream>
+
+namespace Msp {
+namespace DataFile {
+
+class Statement;
+
+class WriterMode
+{
+protected:
+ std::ostream &out;
+
+ WriterMode(std::ostream &o): out(o) { }
+public:
+ virtual ~WriterMode() { }
+
+ virtual void write(const Statement &st) =0;
+};
+
+} // namespace DataFile
+} // namespace Msp
+
+#endif
--- /dev/null
+/* $Id$ */
+#include <fstream>
+#include <iostream>
+#include <msp/core/application.h>
+#include <msp/core/getopt.h>
+#include "source/parser.h"
+#include "source/statement.h"
+#include "source/writer.h"
+
+using namespace std;
+using namespace Msp;
+
+class DataTool: public Application
+{
+private:
+ string in_fn;
+ string out_fn;
+ bool binary;
+public:
+ DataTool(int argc, char **argv);
+ int main();
+
+ static Application::RegApp<DataTool> reg;
+};
+
+
+DataTool::DataTool(int argc, char **argv):
+ out_fn("-")
+{
+ GetOpt getopt;
+ getopt.add_option('o', "output", out_fn, GetOpt::REQUIRED_ARG);
+ getopt.add_option('b', "binary", binary, GetOpt::NO_ARG);
+ getopt(argc, argv);
+
+ const vector<string> &args=getopt.get_args();
+ if(args.empty())
+ throw UsageError("Must give input filename");
+
+ in_fn=args[0];
+}
+
+int DataTool::main()
+{
+ ifstream in(in_fn.c_str());
+ if(!in)
+ {
+ cerr<<"Couldn't open input file\n";
+ return 1;
+ }
+
+ ostream *out;
+ if(out_fn=="-")
+ out=&cout;
+ else
+ out=new ofstream(out_fn.c_str());
+
+ DataFile::Parser parser(in, in_fn);
+ DataFile::Writer writer(*out);
+ if(binary)
+ writer.set_binary(true);
+
+ while(parser)
+ {
+ DataFile::Statement st=parser.parse();
+ if(st.valid)
+ writer.write(st);
+ }
+
+ if(out!=&cout)
+ delete out;
+
+ return 0;
+}
+
+Application::RegApp<DataTool> DataTool::reg;