From: Mikko Rasa Date: Mon, 17 Sep 2007 13:58:50 +0000 (+0000) Subject: Add binary data format X-Git-Tag: 1.0~22 X-Git-Url: http://git.tdb.fi/?p=libs%2Fdatafile.git;a=commitdiff_plain;h=27630d44298cb67e075c166f4421288cc8ca117e Add binary data format Add writing support Add a tool for conversion and validation Some refactoring --- diff --git a/Build b/Build index 3662195..b400475 100644 --- a/Build +++ b/Build @@ -6,6 +6,7 @@ package "mspdatafile" description "Mikkosoft Productions datafile library"; require "mspcore"; + require "mspstrings"; library "mspdatafile" { @@ -13,4 +14,15 @@ package "mspdatafile" install true; install_headers "msp/datafile"; }; + + program "mspdatatool" + { + source "tool.cpp"; + install true; + build_info + { + libpath "."; + library "mspdatafile"; + }; + }; }; diff --git a/source/binarydict.h b/source/binarydict.h new file mode 100644 index 0000000..3b19561 --- /dev/null +++ b/source/binarydict.h @@ -0,0 +1,31 @@ +/* $Id$ + +This file is part of libmspdatafile +Copyright © 2006 Mikko Rasa, Mikkosoft Productions +Distributed under the LGPL +*/ + +#ifndef MSP_DATAFILE_BINARYDICT_H_ +#define MSP_DATAFILE_BINARYDICT_H_ + +#include + +namespace Msp { +namespace DataFile { + +struct DictEntry +{ + std::string keyword; + std::string args; + + DictEntry() { } + DictEntry(const std::string &k, const std::string &a): keyword(k), args(a) { } + + bool operator<(const DictEntry &o) const + { return keyword +#include +#include "binaryparser.h" +#include "input.h" + +using namespace std; + +namespace Msp { +namespace DataFile { + +BinaryParser::BinaryParser(Input &i, const string &s): + ParserMode(i, s), + first(true) +{ + dict[1]=DictEntry("__kw", "iss"); + dict[2]=DictEntry("__enum", "is"); +} + +Statement BinaryParser::parse() +{ + while(1) + { + Statement st=parse_statement(); + if(st.keyword=="__kw") + { + if(st.args.size()!=3) + throw TypeError(src+": Keyword definition must have three arguments"); + + const unsigned id=st.args[0].get(); + const string &kw=st.args[1].get(); + const string &args=st.args[2].get(); + dict[id]=DictEntry(kw, args); + } + else if(st.keyword=="__enum") + { + if(st.args.size()!=2) + throw TypeError(src+": Enum definition must have three arguments"); + + const unsigned id=st.args[0].get(); + enums[id]=st.args[1].get(); + } + else + return st; + } +} + +Statement BinaryParser::parse_statement() +{ + while(first && in.peek()=='\n') + in.get(); + first=false; + + unsigned id=parse_int(); + if(!in) + return Statement(); + + Dictionary::const_iterator i=dict.find(id); + if(i==dict.end()) + throw ParseError(format("%s: Unknown statement ID %d", src, id), src, 0); + const DictEntry &de=i->second; + + Statement result; + result.keyword=de.keyword; + result.source=src; + + for(unsigned j=0; jsecond; +} + +} // namespace DataFile +} // namespace Msp diff --git a/source/binaryparser.h b/source/binaryparser.h new file mode 100644 index 0000000..560703c --- /dev/null +++ b/source/binaryparser.h @@ -0,0 +1,43 @@ +/* $Id$ + +This file is part of libmspdatafile +Copyright © 2007 Mikko Rasa, Mikkosoft Productions +Distributed under the LGPL +*/ + +#ifndef MSP_DATAFILE_BINARYPARSER_H_ +#define MSP_DATAFILE_BINARYPARSER_H_ + +#include +#include "binarydict.h" +#include "parsermode.h" + +namespace Msp { +namespace DataFile { + +class BinaryParser: public ParserMode +{ +private: + typedef std::map Dictionary; + typedef std::map EnumMap; + + Dictionary dict; + EnumMap enums; + bool first; + +public: + BinaryParser(Input &i, const std::string &s); + Statement parse(); +private: + Statement parse_statement(); + long long parse_int(); + float parse_float(); + std::string parse_string(); + bool parse_bool(); + std::string parse_enum(); +}; + +} // namespace DataFile +} // namespace Msp + +#endif diff --git a/source/binarywriter.cpp b/source/binarywriter.cpp new file mode 100644 index 0000000..e89def3 --- /dev/null +++ b/source/binarywriter.cpp @@ -0,0 +1,141 @@ +/* $Id$ + +This file is part of libmspdatafile +Copyright © 2006 Mikko Rasa, Mikkosoft Productions +Distributed under the LGPL +*/ + +#include "binarywriter.h" +#include "statement.h" + +using namespace std; + +namespace Msp { +namespace DataFile { + +BinaryWriter::BinaryWriter(ostream &o): + WriterMode(o), + next_st_id(3), + next_enum_id(1) +{ + dict[DictEntry("__kw", "iss")]=1; + dict[DictEntry("__enum", "is")]=1; +} + +void BinaryWriter::write(const Statement &st) +{ + collect_keywords(st); + write_(st); +} + +void BinaryWriter::write_(const Statement &st) +{ + Dictionary::iterator i=dict.find(create_entry(st)); + if(i==dict.end()) + throw InvalidParameterValue("Unknown statement"); + + write_int(i->second); + for(ValueArray::const_iterator j=st.args.begin(); j!=st.args.end(); ++j) + switch(j->get_type()) + { + case INTEGER: write_int (j->get()); break; + case STRING: write_string(j->get()); break; + case BOOLEAN: write_int (j->get()); break; + case FLOAT: write_float (j->get()); break; + case ENUM: write_enum (j->get_raw()); break; + } + + write_int(st.sub.size()); + for(list::const_iterator j=st.sub.begin(); j!=st.sub.end(); ++j) + write(*j); +} + +DictEntry BinaryWriter::create_entry(const Statement &st) +{ + static const char types[]="ifsbe"; + + string args; + for(ValueArray::const_iterator i=st.args.begin(); i!=st.args.end(); ++i) + { + if(i->get_type()>=5) + throw InvalidParameterValue("Invalid argument type"); + args+=types[i->get_type()]; + } + + return DictEntry(st.keyword, args); +} + +void BinaryWriter::collect_keywords(const Statement &st) +{ + DictEntry de=create_entry(st); + + if(!dict.count(de)) + { + Statement kst; + kst.keyword="__kw"; + kst.args.push_back(next_st_id); + kst.args.push_back(de.keyword); + kst.args.push_back(de.args); + write_(kst); + + dict.insert(Dictionary::value_type(de, next_st_id++)).first; + } + + for(ValueArray::const_iterator i=st.args.begin(); i!=st.args.end(); ++i) + if(i->get_type()==ENUM && !enums.count(i->get_raw())) + { + Statement est; + est.keyword="__enum"; + est.args.push_back(next_enum_id); + est.args.push_back(i->get_raw()); + write_(est); + + enums[i->get_raw()]=next_enum_id++; + } + + for(list::const_iterator i=st.sub.begin(); i!=st.sub.end(); ++i) + collect_keywords(*i); +} + +void BinaryWriter::write_int(long long n) +{ + unsigned i=1; + for(; n>>(i*7); ++i); + for(; i--;) + out.put(n>>(i*7) & 0x7F | (i?0x80:0)); +} + +void BinaryWriter::write_string(const string &s) +{ + write_int(s.size()); + out.write(s.data(), s.size()); +} + +void BinaryWriter::write_float(float f) +{ + union + { + float v; + char d[sizeof(float)]; + }; + + v=f; +#if BYTE_ORDER == LITTLE_ENDIAN + for(unsigned i=sizeof(float); i--;) + out.put(d[i]); +#else + for(unsigned i=0; isecond); +} + +} // namespace DataFile +} // namespace Msp diff --git a/source/binarywriter.h b/source/binarywriter.h new file mode 100644 index 0000000..cf8f9fc --- /dev/null +++ b/source/binarywriter.h @@ -0,0 +1,45 @@ +/* $Id$ + +This file is part of libmspdatafile +Copyright © 2006 Mikko Rasa, Mikkosoft Productions +Distributed under the LGPL +*/ + +#ifndef MSP_DATAFILE_BINARYWRITER_H_ +#define MSP_DATAFILE_BINARYWRITER_H_ + +#include +#include "binarydict.h" +#include "writermode.h" + +namespace Msp { +namespace DataFile { + +class BinaryWriter: public WriterMode +{ +private: + typedef std::map Dictionary; + typedef std::map EnumMap; + + Dictionary dict; + unsigned next_st_id; + EnumMap enums; + unsigned next_enum_id; + +public: + BinaryWriter(std::ostream &o); + void write(const Statement &st); +private: + void write_(const Statement &st); + DictEntry create_entry(const Statement &st); + void collect_keywords(const Statement &st); + void write_int(long long n); + void write_string(const std::string &s); + void write_float(float f); + void write_enum(const std::string &e); +}; + +} // namespace DataFile +} // namespace Msp + +#endif diff --git a/source/parser.cpp b/source/parser.cpp index 8fccec2..82ac006 100644 --- a/source/parser.cpp +++ b/source/parser.cpp @@ -6,10 +6,10 @@ Distributed under the LGPL */ #include #include -#include "error.h" +#include "binaryparser.h" #include "parser.h" #include "statement.h" -#include "token.h" +#include "textparser.h" using namespace std; @@ -19,9 +19,15 @@ namespace DataFile { Parser::Parser(istream &i, const string &s): in(i), src(s), - good(true) + good(true), + mode(new TextParser(in, src)) { } +Parser::~Parser() +{ + delete mode; +} + Statement Parser::parse() { if(!good) @@ -29,351 +35,28 @@ Statement Parser::parse() try { - return parse_(0); - } - catch(const Exception &e) - { - good=false; - throw; - } -} - -Statement Parser::parse_(const Token *t) -{ - Statement result; - bool sub=false; - bool finish=false; - - while(in) - { - Token token; - if(t) - { - token=*t; - t=0; - } - else - token=parse_token(); - - if(result.keyword.empty()) - { - if(token.str.empty()) - break; - else if(token.type!=Token::IDENTIFIER) - throw ParseError(get_location()+": Syntax error at token '"+token.str+"' (expected an identifier)", src, in.get_line_number()); - result.keyword=token.str; - result.valid=true; - result.source=src; - result.line=in.get_line_number(); - } - else if(sub) + while(1) { - if(token.str=="}") + Statement st=mode->parse(); + if(st.keyword=="__bin") { - sub=false; - finish=true; + delete mode; + mode=new BinaryParser(in, src); } - else + else if(st.keyword=="__text") { - Statement ss=parse_(&token); - result.sub.push_back(ss); + delete mode; + mode=new TextParser(in, src); } - } - else if(finish) - { - if(token.str!=";") - throw ParseError(get_location()+": Syntax error at token '"+token.str+"' (Expected a ';')", src, in.get_line_number()); - break; - } - else if(token.str=="{") - sub=true; - else if(token.str==";") - break; - else if(token.type==Token::INTEGER) - result.args.push_back(Value(Value::INTEGER, token.str)); - else if(token.type==Token::FLOAT) - result.args.push_back(Value(Value::FLOAT, token.str)); - else if(token.type==Token::STRING) - result.args.push_back(Value(Value::STRING, token.str)); - else if(token.type==Token::IDENTIFIER) - { - if(token.str=="true") - result.args.push_back(Value(Value::BOOLEAN, "1")); - else if(token.str=="false") - result.args.push_back(Value(Value::BOOLEAN, "0")); else - result.args.push_back(Value(Value::ENUM, token.str)); - //result.args.push_back(resolve_identifiertoken.str); + return st; } - else if(token.str=="") - throw ParseError(src+": Unexcepted EOF", src, in.get_line_number()); - else - throw ParseError(get_location()+": Syntax error", src, in.get_line_number()); } - - return result; -} - -Token Parser::parse_token() -{ - int c=0; - unsigned comment=0; - - // Skip over comments and whitespace - while(in) - { - c=in.get(); - int next=in.peek(); - - if(c=='/' && next=='/') - comment=1; - else if(c=='/' && next=='*') - comment=2; - else if(c=='\n' && comment==1) - comment=0; - else if(c=='*' && next=='/' && comment==2) - comment=3; - else if(comment==3) // Skip the second character of block comment end - comment=0; - else if(!isspace(c) && !comment) - break; - } - - if(comment) // Didn't hit any non-whitespace - throw ParseError(src+": Unfinished comment", src, in.get_line_number()); - - enum ParseState - { - INIT, - SIGN, - FLOATEXPINIT, - FLOATEXPSIGN, - STRING, - ACCEPT, - ZERO, - DECIMAL, - HEXADECIMAL, - OCTAL, - FLOAT, - FLOATEXP, - IDENTIFIER - }; - - static Token::Type token_type[]= - { - Token::SPECIAL, - Token::SPECIAL, - Token::SPECIAL, - Token::SPECIAL, - Token::STRING, - Token::SPECIAL, - Token::INTEGER, - Token::INTEGER, - Token::INTEGER, - Token::INTEGER, - Token::FLOAT, - Token::FLOAT, - Token::IDENTIFIER - }; - - ParseState state=INIT; - string buf; - bool escape=false; - - while(in) - { - if(state!=INIT) - c=in.get(); - int next=in.peek(); - - buf+=c; - - switch(state) - { - case INIT: - if(c=='0') - state=ZERO; - else if(c=='-' || c=='+') - state=SIGN; - else if(c=='.') - state=FLOAT; - else if(c=='"') - state=STRING; - else if(c=='{' || c=='}' || c==';') - return Token(Token::SPECIAL, string(1, c)); - else if(isdigit(c)) - state=DECIMAL; - else if(isalpha(c)) - state=IDENTIFIER; - else - parse_error(c, state); - break; - - case SIGN: - if(c=='0') - state=ZERO; - else if(isdigit(c)) - state=DECIMAL; - else if(c=='.') - state=FLOAT; - else - parse_error(c, state); - break; - - case ZERO: - if(c=='x') - state=HEXADECIMAL; - else if(isdigit(c)) - state=OCTAL; - else if(c=='.') - state=FLOAT; - else - parse_error(c, state); - break; - - case DECIMAL: - if(c=='.') - state=FLOAT; - else if(!isdigit(c)) - parse_error(c, state); - break; - - case HEXADECIMAL: - if(!isxdigit(c)) - parse_error(c, state); - break; - - case OCTAL: - if(!isodigit(c)) - parse_error(c, state); - break; - - case FLOAT: - if(c=='e' || c=='E') - state=FLOATEXPINIT; - else if(!isdigit(c)) - parse_error(c, state); - break; - - case FLOATEXPINIT: - if(c=='+' || c=='-') - state=FLOATEXPSIGN; - else if(isdigit(c)) - state=FLOATEXP; - else - parse_error(c, state); - break; - - case FLOATEXPSIGN: - if(isdigit(c)) - state=FLOATEXP; - else - parse_error(c, state); - break; - - case FLOATEXP: - if(!isdigit(c)) - parse_error(c, state); - break; - - case STRING: - if(c=='\\') - escape=!escape; - else if(c=='"' && !escape) - return Token(Token::STRING, unescape_string(buf)); - else - escape=false; - break; - - case IDENTIFIER: - if(!isalpha(c) && !isdigit(c) && c!='_') - parse_error(c, state); - break; - - default: - throw Exception(get_location()+": Internal error (bad state)"); - } - - if(is_delimiter(next) && state>=ACCEPT) - return Token(token_type[state], buf); - } - - return Token(Token::SPECIAL, ""); -} - -bool Parser::is_delimiter(int c) -{ - return (isspace(c) || c=='{' || c=='}' || c==';' || c=='/'); -} - -bool Parser::isodigit(int c) -{ - return (c>='0' && c<='7'); -} - -string Parser::unescape_string(const string &str) -{ - string result; - bool escape=false; - unsigned hexcape=0; - for(string::const_iterator i=str.begin()+1; i!=str.end()-1; ++i) + catch(const Exception &e) { - if(escape) - { - if(*i=='n') - result+='\n'; - else if(*i=='t') - result+='\t'; - else if(*i=='\\') - result+='\\'; - else if(*i=='"') - result+='"'; - else if(*i=='x') - hexcape=0x100; - else - throw ParseError("Invalid escape", src, in.get_line_number()); - escape=false; - } - else if(hexcape) - { - unsigned digit=0; - if(*i>='0' && *i<='9') - digit=*i-'0'; - else if(*i>='a' && *i<='f') - digit=*i-'a'+10; - else if(*i>='A' && *i<='F') - digit=*i-'A'+10; - else - throw ParseError("Invalid hex digit", src, in.get_line_number()); - - hexcape=(hexcape<<4)|digit; - if(hexcape&0x10000) - { - result+=hexcape&0xFF; - hexcape=0; - } - } - else if(*i=='\\') - escape=true; - else - result+=*i; + good=false; + throw; } - - return result; -} - -string Parser::get_location() -{ - ostringstream ss; - ss< +#include "statement.h" + +namespace Msp { +namespace DataFile { + +std::string Statement::get_location() const +{ + std::string result=source; + if(line) + result+=format(":%d", line); + return result; +} + +} // namespace DataFile +} // namespace Msp diff --git a/source/statement.h b/source/statement.h index 6b3c54b..f2fa6e6 100644 --- a/source/statement.h +++ b/source/statement.h @@ -8,8 +8,6 @@ Distributed under the LGPL #define MSP_DATAFILE_STATEMENT_H_ #include -#include -#include #include "value.h" namespace Msp { @@ -26,8 +24,7 @@ public: std::list sub; Statement(): valid(false), line(0) { } - std::string get_location() const - { std::ostringstream ss; ss< +#include "input.h" +#include "textparser.h" +#include "token.h" + +using namespace std; + +namespace Msp { +namespace DataFile { + +TextParser::TextParser(Input &i, const string &s): + ParserMode(i, s) +{ } + +Statement TextParser::parse() +{ + return parse_statement(0); +} + +Statement TextParser::parse_statement(const Token *t) +{ + Statement result; + bool sub=false; + bool finish=false; + + while(in) + { + Token token; + if(t) + { + token=*t; + t=0; + } + else + token=parse_token(); + + if(result.keyword.empty()) + { + if(token.str.empty()) + break; + else if(token.type!=Token::IDENTIFIER) + throw ParseError(format("%s: Syntax error at token '%s' (expected an identifier)", get_location(), token.str), src, in.get_line_number()); + result.keyword=token.str; + result.valid=true; + result.source=src; + result.line=in.get_line_number(); + } + else if(sub) + { + if(token.str=="}") + { + sub=false; + finish=true; + } + else + { + Statement ss=parse_statement(&token); + result.sub.push_back(ss); + } + } + else if(finish) + { + if(token.str!=";") + throw ParseError(format("%s: Syntax error at token '%s' (Expected a ';')", get_location(), token.str), src, in.get_line_number()); + break; + } + else if(token.str=="{") + sub=true; + else if(token.str==";") + break; + else if(token.type==Token::INTEGER) + result.args.push_back(Value(INTEGER, token.str)); + else if(token.type==Token::FLOAT) + result.args.push_back(Value(FLOAT, token.str)); + else if(token.type==Token::STRING) + result.args.push_back(Value(STRING, token.str)); + else if(token.type==Token::IDENTIFIER) + { + if(token.str=="true") + result.args.push_back(Value(BOOLEAN, "1")); + else if(token.str=="false") + result.args.push_back(Value(BOOLEAN, "0")); + else + result.args.push_back(Value(ENUM, token.str)); + //result.args.push_back(resolve_identifiertoken.str); + } + else if(token.str=="") + throw ParseError(src+": Unexcepted end of input", src, in.get_line_number()); + else + throw ParseError(get_location()+": Syntax error", src, in.get_line_number()); + } + + return result; +} + +Token TextParser::parse_token() +{ + int c=0; + unsigned comment=0; + + // Skip over comments and whitespace + while(in) + { + c=in.get(); + int next=in.peek(); + + if(c=='/' && next=='/') + comment=1; + else if(c=='/' && next=='*') + comment=2; + else if(c=='\n' && comment==1) + comment=0; + else if(c=='*' && next=='/' && comment==2) + comment=3; + else if(comment==3) // Skip the second character of block comment end + comment=0; + else if(!isspace(c) && !comment) + break; + } + + if(comment) // Didn't hit any non-whitespace + throw ParseError(src+": Unfinished comment at end of input", src, in.get_line_number()); + + enum ParseState + { + INIT, + SIGN, + FLOATEXPINIT, + FLOATEXPSIGN, + STRING, + ACCEPT, + ZERO, + DECIMAL, + HEXADECIMAL, + OCTAL, + FLOAT, + FLOATEXP, + IDENTIFIER + }; + + static Token::Type token_type[]= + { + Token::SPECIAL, + Token::SPECIAL, + Token::SPECIAL, + Token::SPECIAL, + Token::STRING, + Token::SPECIAL, + Token::INTEGER, + Token::INTEGER, + Token::INTEGER, + Token::INTEGER, + Token::FLOAT, + Token::FLOAT, + Token::IDENTIFIER + }; + + ParseState state=INIT; + string buf; + bool escape=false; + + while(in) + { + if(state!=INIT) + c=in.get(); + int next=in.peek(); + + buf+=c; + + switch(state) + { + case INIT: + if(c=='0') + state=ZERO; + else if(c=='-' || c=='+') + state=SIGN; + else if(c=='.') + state=FLOAT; + else if(c=='"') + state=STRING; + else if(c=='{' || c=='}' || c==';') + return Token(Token::SPECIAL, string(1, c)); + else if(isdigit(c)) + state=DECIMAL; + else if(isalpha(c) || c=='_') + state=IDENTIFIER; + else + parse_error(c, state); + break; + + case SIGN: + if(c=='0') + state=ZERO; + else if(isdigit(c)) + state=DECIMAL; + else if(c=='.') + state=FLOAT; + else + parse_error(c, state); + break; + + case ZERO: + if(c=='x') + state=HEXADECIMAL; + else if(isdigit(c)) + state=OCTAL; + else if(c=='.') + state=FLOAT; + else + parse_error(c, state); + break; + + case DECIMAL: + if(c=='.') + state=FLOAT; + else if(!isdigit(c)) + parse_error(c, state); + break; + + case HEXADECIMAL: + if(!isxdigit(c)) + parse_error(c, state); + break; + + case OCTAL: + if(!isodigit(c)) + parse_error(c, state); + break; + + case FLOAT: + if(c=='e' || c=='E') + state=FLOATEXPINIT; + else if(!isdigit(c)) + parse_error(c, state); + break; + + case FLOATEXPINIT: + if(c=='+' || c=='-') + state=FLOATEXPSIGN; + else if(isdigit(c)) + state=FLOATEXP; + else + parse_error(c, state); + break; + + case FLOATEXPSIGN: + if(isdigit(c)) + state=FLOATEXP; + else + parse_error(c, state); + break; + + case FLOATEXP: + if(!isdigit(c)) + parse_error(c, state); + break; + + case STRING: + if(c=='\\') + escape=!escape; + else if(c=='"' && !escape) + return Token(Token::STRING, unescape_string(buf)); + else + escape=false; + break; + + case IDENTIFIER: + if(!isalpha(c) && !isdigit(c) && c!='_') + parse_error(c, state); + break; + + default: + throw Exception(get_location()+": Internal error (bad state)"); + } + + if(is_delimiter(next) && state>=ACCEPT) + return Token(token_type[state], buf); + } + + return Token(Token::SPECIAL, ""); +} + +bool TextParser::is_delimiter(int c) +{ + return (isspace(c) || c=='{' || c=='}' || c==';' || c=='/'); +} + +bool TextParser::isodigit(int c) +{ + return (c>='0' && c<='7'); +} + +string TextParser::unescape_string(const string &str) +{ + string result; + bool escape=false; + unsigned hexcape=0; + for(string::const_iterator i=str.begin()+1; i!=str.end()-1; ++i) + { + if(escape) + { + if(*i=='n') + result+='\n'; + else if(*i=='t') + result+='\t'; + else if(*i=='\\') + result+='\\'; + else if(*i=='"') + result+='"'; + else if(*i=='x') + hexcape=0x100; + else + throw ParseError(format("%s: Invalid escape sequence '\\%c'", get_location(), *i), src, in.get_line_number()); + escape=false; + } + else if(hexcape) + { + unsigned digit=0; + if(*i>='0' && *i<='9') + digit=*i-'0'; + else if(*i>='a' && *i<='f') + digit=*i-'a'+10; + else if(*i>='A' && *i<='F') + digit=*i-'A'+10; + else + throw ParseError(get_location()+": Invalid hex digit", src, in.get_line_number()); + + hexcape=(hexcape<<4)|digit; + if(hexcape&0x10000) + { + result+=hexcape&0xFF; + hexcape=0; + } + } + else if(*i=='\\') + escape=true; + else + result+=*i; + } + + return result; +} + +string TextParser::get_location() +{ + ostringstream ss; + ss<(c), state), src, in.get_line_number()); +} + +} // namespace DataFile +} // namespace Msp diff --git a/source/textparser.h b/source/textparser.h new file mode 100644 index 0000000..9dfcf6d --- /dev/null +++ b/source/textparser.h @@ -0,0 +1,36 @@ +/* $Id$ + +This file is part of libmspdatafile +Copyright © 2007 Mikko Rasa, Mikkosoft Productions +Distributed under the LGPL +*/ + +#ifndef MSP_DATAFILE_TEXTPARSER_H_ +#define MSP_DATAFILE_TEXTPARSER_H_ + +#include "parsermode.h" + +namespace Msp { +namespace DataFile { + +class Token; + +class TextParser: public ParserMode +{ +public: + TextParser(Input &, const std::string &); + virtual Statement parse(); +protected: + Statement parse_statement(const Token *); + Token parse_token(); + bool is_delimiter(int); + bool isodigit(int); + std::string unescape_string(const std::string &); + std::string get_location(); + void parse_error(int, int); +}; + +} // namespace DataFile +} // namespace Msp + +#endif diff --git a/source/textwriter.cpp b/source/textwriter.cpp new file mode 100644 index 0000000..68111c0 --- /dev/null +++ b/source/textwriter.cpp @@ -0,0 +1,51 @@ +/* $Id$ + +This file is part of libmspdatafile +Copyright © 2006 Mikko Rasa, Mikkosoft Productions +Distributed under the LGPL +*/ + +#include "statement.h" +#include "textwriter.h" + +using namespace std; + +namespace Msp { +namespace DataFile { + +TextWriter::TextWriter(ostream &o): + WriterMode(o) +{ } + +void TextWriter::write(const Statement &st) +{ + write_(st, 0); +} + +void TextWriter::write_(const Statement &st, unsigned level) +{ + string indent(level, '\t'); + + out<get_type()==STRING) + out<<'\"'<get_raw()<<'\"'; + else if(i->get_type()==BOOLEAN) + out<<(i->get() ? "true" : "false"); + else + out<get_raw(); + } + if(!st.sub.empty()) + { + out<<'\n'<::const_iterator i=st.sub.begin(); i!=st.sub.end(); ++i) + write_(*i, level+1); + out< -#include #include +#include #include "error.h" namespace Msp { namespace DataFile { +enum Type +{ + INTEGER, + FLOAT, + STRING, + BOOLEAN, + ENUM +}; + +template struct TypeResolver { static const Type type=ENUM; }; +template<> struct TypeResolver { static const Type type=INTEGER; }; +template<> struct TypeResolver { static const Type type=INTEGER; }; +template<> struct TypeResolver { static const Type type=INTEGER; }; +template<> struct TypeResolver { static const Type type=INTEGER; }; +template<> struct TypeResolver { static const Type type=INTEGER; }; +template<> struct TypeResolver { static const Type type=INTEGER; }; +template<> struct TypeResolver { static const Type type=INTEGER; }; +template<> struct TypeResolver { static const Type type=INTEGER; }; +template<> struct TypeResolver { static const Type type=FLOAT; }; +template<> struct TypeResolver { static const Type type=FLOAT; }; +template<> struct TypeResolver { static const Type type=BOOLEAN; }; +template<> struct TypeResolver { static const Type type=STRING; }; +template struct TypeResolver { static const Type type=TypeResolver::type; }; +template struct TypeResolver { static const Type type=TypeResolver::type; }; +template struct TypeResolver { static const Type type=TypeResolver::type; }; + class Value { public: - enum Type - { - INTEGER, - FLOAT, - STRING, - BOOLEAN, - ENUM - }; - Value(Type t, const std::string &d): type(t), data(d) { } + + template + Value(T d): type(TypeResolver::type), data(lexical_cast(d)) { } + template T get() const; + + Type get_type() const { return type; } + const std::string &get_raw() const { return data; } private: Type type; std::string data; }; -typedef std::vector ValueArray; - -template struct TypeResolver { static const Value::Type type=Value::ENUM; }; - -template<> struct TypeResolver { static const Value::Type type=Value::INTEGER; }; -template<> struct TypeResolver { static const Value::Type type=Value::INTEGER; }; -template<> struct TypeResolver { static const Value::Type type=Value::INTEGER; }; -template<> struct TypeResolver { static const Value::Type type=Value::INTEGER; }; -template<> struct TypeResolver { static const Value::Type type=Value::INTEGER; }; -template<> struct TypeResolver { static const Value::Type type=Value::INTEGER; }; -template<> struct TypeResolver { static const Value::Type type=Value::FLOAT; }; -template<> struct TypeResolver { static const Value::Type type=Value::FLOAT; }; -template<> struct TypeResolver { static const Value::Type type=Value::BOOLEAN; }; -template inline bool check_type(Value::Type) { return false; } +typedef std::vector ValueArray; -template<> inline bool check_type(Value::Type t) { return t==Value::INTEGER; } -template<> inline bool check_type(Value::Type t) { return t==Value::INTEGER || t==Value::FLOAT; } -template<> inline bool check_type(Value::Type t) { return t==Value::BOOLEAN; } -template<> inline bool check_type(Value::Type t) { return t==Value::STRING; } -template<> inline bool check_type(Value::Type t) { return t==Value::ENUM; } +template inline bool check_type(Type t) { return t==T; } +template<> inline bool check_type(Type t) { return t==INTEGER || t==FLOAT; } template inline T Value::get() const @@ -76,7 +83,7 @@ template<> inline std::string Value::get() const { if(type!=STRING) - throw TypeError("Value is not a string"); + throw TypeError("Type mismatch"); return data; } @@ -84,7 +91,7 @@ template<> inline const std::string &Value::get() const { if(type!=STRING) - throw TypeError("Value is not a string"); + throw TypeError("Type mismatch"); return data; } diff --git a/source/writer.cpp b/source/writer.cpp new file mode 100644 index 0000000..cdd78b6 --- /dev/null +++ b/source/writer.cpp @@ -0,0 +1,52 @@ +/* $Id$ + +This file is part of libmspdatafile +Copyright © 2006 Mikko Rasa, Mikkosoft Productions +Distributed under the LGPL +*/ + +#include "binarywriter.h" +#include "statement.h" +#include "textwriter.h" +#include "writer.h" + +using namespace std; + +namespace Msp { +namespace DataFile { + +Writer::Writer(ostream &o): + out(o), + mode(new TextWriter(out)), + binary(false) +{ } + +void Writer::write(const Statement &st) +{ + mode->write(st); +} + +void Writer::set_binary(bool b) +{ + if(b==binary) + return; + + binary=b; + + Statement st; + if(binary) + st.keyword="__bin"; + else + st.keyword="__text"; + + mode->write(st); + + delete mode; + if(binary) + mode=new BinaryWriter(out); + else + mode=new TextWriter(out); +} + +} // namespace DataFile +} // namespace Msp diff --git a/source/writer.h b/source/writer.h new file mode 100644 index 0000000..756e19a --- /dev/null +++ b/source/writer.h @@ -0,0 +1,37 @@ +/* $Id$ + +This file is part of libmspdatafile +Copyright © 2006 Mikko Rasa, Mikkosoft Productions +Distributed under the LGPL +*/ + +#ifndef MSP_DATAFILE_WRITER_H_ +#define MSP_DATAFILE_WRITER_H_ + +#include +#include +#include "binarydict.h" + +namespace Msp { +namespace DataFile { + +class Statement; +class WriterMode; + +class Writer +{ +private: + std::ostream &out; + WriterMode *mode; + bool binary; + +public: + Writer(std::ostream &); + void write(const Statement &); + void set_binary(bool); +}; + +} // namespace DataFile +} // namespace Msp + +#endif diff --git a/source/writermode.h b/source/writermode.h new file mode 100644 index 0000000..e5853ce --- /dev/null +++ b/source/writermode.h @@ -0,0 +1,33 @@ +/* $Id$ + +This file is part of libmspdatafile +Copyright © 2006 Mikko Rasa, Mikkosoft Productions +Distributed under the LGPL +*/ + +#ifndef MSP_DATAFILE_WRITERMODE_H_ +#define MSP_DATAFILE_WRITERMODE_H_ + +#include + +namespace Msp { +namespace DataFile { + +class Statement; + +class WriterMode +{ +protected: + std::ostream &out; + + WriterMode(std::ostream &o): out(o) { } +public: + virtual ~WriterMode() { } + + virtual void write(const Statement &st) =0; +}; + +} // namespace DataFile +} // namespace Msp + +#endif diff --git a/tool.cpp b/tool.cpp new file mode 100644 index 0000000..533abb0 --- /dev/null +++ b/tool.cpp @@ -0,0 +1,75 @@ +/* $Id$ */ +#include +#include +#include +#include +#include "source/parser.h" +#include "source/statement.h" +#include "source/writer.h" + +using namespace std; +using namespace Msp; + +class DataTool: public Application +{ +private: + string in_fn; + string out_fn; + bool binary; +public: + DataTool(int argc, char **argv); + int main(); + + static Application::RegApp reg; +}; + + +DataTool::DataTool(int argc, char **argv): + out_fn("-") +{ + GetOpt getopt; + getopt.add_option('o', "output", out_fn, GetOpt::REQUIRED_ARG); + getopt.add_option('b', "binary", binary, GetOpt::NO_ARG); + getopt(argc, argv); + + const vector &args=getopt.get_args(); + if(args.empty()) + throw UsageError("Must give input filename"); + + in_fn=args[0]; +} + +int DataTool::main() +{ + ifstream in(in_fn.c_str()); + if(!in) + { + cerr<<"Couldn't open input file\n"; + return 1; + } + + ostream *out; + if(out_fn=="-") + out=&cout; + else + out=new ofstream(out_fn.c_str()); + + DataFile::Parser parser(in, in_fn); + DataFile::Writer writer(*out); + if(binary) + writer.set_binary(true); + + while(parser) + { + DataFile::Statement st=parser.parse(); + if(st.valid) + writer.write(st); + } + + if(out!=&cout) + delete out; + + return 0; +} + +Application::RegApp DataTool::reg;