]> git.tdb.fi Git - libs/datafile.git/commitdiff
Add binary data format
authorMikko Rasa <tdb@tdb.fi>
Mon, 17 Sep 2007 13:58:50 +0000 (13:58 +0000)
committerMikko Rasa <tdb@tdb.fi>
Mon, 17 Sep 2007 13:58:50 +0000 (13:58 +0000)
Add writing support
Add a tool for conversion and validation
Some refactoring

20 files changed:
Build
source/binarydict.h [new file with mode: 0644]
source/binaryparser.cpp [new file with mode: 0644]
source/binaryparser.h [new file with mode: 0644]
source/binarywriter.cpp [new file with mode: 0644]
source/binarywriter.h [new file with mode: 0644]
source/parser.cpp
source/parser.h
source/parsermode.h [new file with mode: 0644]
source/statement.cpp [new file with mode: 0644]
source/statement.h
source/textparser.cpp [new file with mode: 0644]
source/textparser.h [new file with mode: 0644]
source/textwriter.cpp [new file with mode: 0644]
source/textwriter.h [new file with mode: 0644]
source/value.h
source/writer.cpp [new file with mode: 0644]
source/writer.h [new file with mode: 0644]
source/writermode.h [new file with mode: 0644]
tool.cpp [new file with mode: 0644]

diff --git a/Build b/Build
index 36621950cda7c0c62ed7d6fad928636db64efd04..b400475a0c3f58136111ede50a854bfa613181ec 100644 (file)
--- a/Build
+++ b/Build
@@ -6,6 +6,7 @@ package "mspdatafile"
        description "Mikkosoft Productions datafile library";
 
        require "mspcore";
+       require "mspstrings";
 
        library "mspdatafile"
        {
@@ -13,4 +14,15 @@ package "mspdatafile"
                install true;
                install_headers "msp/datafile";
        };
+
+       program "mspdatatool"
+       {
+               source "tool.cpp";
+               install true;
+               build_info
+               {
+                       libpath ".";
+                       library "mspdatafile";
+               };
+       };
 };
diff --git a/source/binarydict.h b/source/binarydict.h
new file mode 100644 (file)
index 0000000..3b19561
--- /dev/null
@@ -0,0 +1,31 @@
+/* $Id$
+
+This file is part of libmspdatafile
+Copyright © 2006  Mikko Rasa, Mikkosoft Productions
+Distributed under the LGPL
+*/
+
+#ifndef MSP_DATAFILE_BINARYDICT_H_
+#define MSP_DATAFILE_BINARYDICT_H_
+
+#include <string>
+
+namespace Msp {
+namespace DataFile {
+
+struct DictEntry
+{
+       std::string keyword;
+       std::string args;
+
+       DictEntry() { }
+       DictEntry(const std::string &k, const std::string &a): keyword(k), args(a) { }
+
+       bool operator<(const DictEntry &o) const
+       { return keyword<o.keyword || (keyword==o.keyword && args<o.args); }
+};
+
+} // namespace DataFile
+} // namespace Msp
+
+#endif
diff --git a/source/binaryparser.cpp b/source/binaryparser.cpp
new file mode 100644 (file)
index 0000000..298770a
--- /dev/null
@@ -0,0 +1,170 @@
+/* $Id$
+
+This file is part of libmspdatafile
+Copyright © 2007  Mikko Rasa, Mikkosoft Productions
+Distributed under the LGPL
+*/
+
+#include <sys/param.h>
+#include <msp/strings/formatter.h>
+#include "binaryparser.h"
+#include "input.h"
+
+using namespace std;
+
+namespace Msp {
+namespace DataFile {
+
+BinaryParser::BinaryParser(Input &i, const string &s):
+       ParserMode(i, s),
+       first(true)
+{
+       dict[1]=DictEntry("__kw", "iss");
+       dict[2]=DictEntry("__enum", "is");
+}
+
+Statement BinaryParser::parse()
+{
+       while(1)
+       {
+               Statement st=parse_statement();
+               if(st.keyword=="__kw")
+               {
+                       if(st.args.size()!=3)
+                               throw TypeError(src+": Keyword definition must have three arguments");
+
+                       const unsigned id=st.args[0].get<unsigned>();
+                       const string &kw=st.args[1].get<const string &>();
+                       const string &args=st.args[2].get<const string &>();
+                       dict[id]=DictEntry(kw, args);
+               }
+               else if(st.keyword=="__enum")
+               {
+                       if(st.args.size()!=2)
+                               throw TypeError(src+": Enum definition must have three arguments");
+
+                       const unsigned id=st.args[0].get<unsigned>();
+                       enums[id]=st.args[1].get<const std::string &>();
+               }
+               else
+                       return st;
+       }
+}
+
+Statement BinaryParser::parse_statement()
+{
+       while(first && in.peek()=='\n')
+               in.get();
+       first=false;
+
+       unsigned id=parse_int();
+       if(!in)
+               return Statement();
+
+       Dictionary::const_iterator i=dict.find(id);
+       if(i==dict.end())
+               throw ParseError(format("%s: Unknown statement ID %d", src, id), src, 0);
+       const DictEntry &de=i->second;
+
+       Statement result;
+       result.keyword=de.keyword;
+       result.source=src;
+
+       for(unsigned j=0; j<de.args.size(); ++j)
+       {
+               switch(de.args[j])
+               {
+               case 'i':
+                       result.args.push_back(parse_int());
+                       break;
+               case 'f':
+                       result.args.push_back(parse_float());
+                       break;
+               case 's':
+                       result.args.push_back(parse_string());
+                       break;
+               case 'b':
+                       result.args.push_back(parse_bool());
+                       break;
+               case 'e':
+                       result.args.push_back(parse_enum());
+                       break;
+               }
+       }
+
+       unsigned nsub=parse_int();
+       for(unsigned j=0; j<nsub; ++j)
+               result.sub.push_back(parse());
+
+       result.valid=true;
+
+       return result;
+}
+
+long long BinaryParser::parse_int()
+{
+       long long result=0;
+       unsigned bits=0;
+
+       while(in)
+       {
+               int c=in.get();
+
+               result=result<<7 | c&0x7F;
+               bits+=7;
+
+               if(!(c&0x80))
+                       break;
+       }
+
+       const long long mask=1<<(bits-1);
+       result=(result^mask)-mask;
+
+       return result;
+}
+
+float BinaryParser::parse_float()
+{
+       union
+       {
+               float f;
+               char d[sizeof(float)];
+       };
+
+#if BYTE_ORDER == LITTLE_ENDIAN
+       for(unsigned i=sizeof(float)-1; i--;)
+               d[i]=in.get();
+#else
+       for(unsigned i=0; i<sizeof(float); ++i)
+               d[i]=in.get();
+#endif
+
+       return f;
+}
+
+bool BinaryParser::parse_bool()
+{
+       return in.get();
+}
+
+string BinaryParser::parse_string()
+{
+       unsigned len=parse_int();
+       string result;
+       result.reserve(len);
+       for(unsigned i=0; i<len; ++i)
+               result+=in.get();
+       return result;
+}
+
+string BinaryParser::parse_enum()
+{
+       unsigned id=parse_int();
+       EnumMap::iterator i=enums.find(id);
+       if(i==enums.end())
+               throw KeyError("Unknown enum");
+       return i->second;
+}
+
+} // namespace DataFile
+} // namespace Msp
diff --git a/source/binaryparser.h b/source/binaryparser.h
new file mode 100644 (file)
index 0000000..560703c
--- /dev/null
@@ -0,0 +1,43 @@
+/* $Id$
+
+This file is part of libmspdatafile
+Copyright © 2007  Mikko Rasa, Mikkosoft Productions
+Distributed under the LGPL
+*/
+
+#ifndef MSP_DATAFILE_BINARYPARSER_H_
+#define MSP_DATAFILE_BINARYPARSER_H_
+
+#include <map>
+#include "binarydict.h"
+#include "parsermode.h"
+
+namespace Msp {
+namespace DataFile {
+
+class BinaryParser: public ParserMode
+{
+private:
+       typedef std::map<unsigned, DictEntry> Dictionary;
+       typedef std::map<unsigned, std::string> EnumMap;
+
+       Dictionary dict;
+       EnumMap enums;
+       bool first;
+
+public:
+       BinaryParser(Input &i, const std::string &s);
+       Statement parse();
+private:
+       Statement parse_statement();
+       long long parse_int();
+       float parse_float();
+       std::string parse_string();
+       bool parse_bool();
+       std::string parse_enum();
+};
+
+} // namespace DataFile
+} // namespace Msp
+
+#endif
diff --git a/source/binarywriter.cpp b/source/binarywriter.cpp
new file mode 100644 (file)
index 0000000..e89def3
--- /dev/null
@@ -0,0 +1,141 @@
+/* $Id$
+
+This file is part of libmspdatafile
+Copyright © 2006  Mikko Rasa, Mikkosoft Productions
+Distributed under the LGPL
+*/
+
+#include "binarywriter.h"
+#include "statement.h"
+
+using namespace std;
+
+namespace Msp {
+namespace DataFile {
+
+BinaryWriter::BinaryWriter(ostream &o):
+       WriterMode(o),
+       next_st_id(3),
+       next_enum_id(1)
+{
+       dict[DictEntry("__kw", "iss")]=1;
+       dict[DictEntry("__enum", "is")]=1;
+}
+
+void BinaryWriter::write(const Statement &st)
+{
+       collect_keywords(st);
+       write_(st);
+}
+
+void BinaryWriter::write_(const Statement &st)
+{
+       Dictionary::iterator i=dict.find(create_entry(st));
+       if(i==dict.end())
+               throw InvalidParameterValue("Unknown statement");
+
+       write_int(i->second);
+       for(ValueArray::const_iterator j=st.args.begin(); j!=st.args.end(); ++j)
+               switch(j->get_type())
+               {
+               case INTEGER: write_int   (j->get<long long>()); break;
+               case STRING:  write_string(j->get<const string &>()); break;
+               case BOOLEAN: write_int   (j->get<bool>()); break;
+               case FLOAT:   write_float (j->get<float>()); break;
+               case ENUM:    write_enum  (j->get_raw()); break;
+               }
+
+       write_int(st.sub.size());
+       for(list<Statement>::const_iterator j=st.sub.begin(); j!=st.sub.end(); ++j)
+               write(*j);
+}
+
+DictEntry BinaryWriter::create_entry(const Statement &st)
+{
+       static const char types[]="ifsbe";
+
+       string args;
+       for(ValueArray::const_iterator i=st.args.begin(); i!=st.args.end(); ++i)
+       {
+               if(i->get_type()>=5)
+                       throw InvalidParameterValue("Invalid argument type");
+               args+=types[i->get_type()];
+       }
+
+       return DictEntry(st.keyword, args);
+}
+
+void BinaryWriter::collect_keywords(const Statement &st)
+{
+       DictEntry de=create_entry(st);
+
+       if(!dict.count(de))
+       {
+               Statement kst;
+               kst.keyword="__kw";
+               kst.args.push_back(next_st_id);
+               kst.args.push_back(de.keyword);
+               kst.args.push_back(de.args);
+               write_(kst);
+
+               dict.insert(Dictionary::value_type(de, next_st_id++)).first;
+       }
+
+       for(ValueArray::const_iterator i=st.args.begin(); i!=st.args.end(); ++i)
+               if(i->get_type()==ENUM && !enums.count(i->get_raw()))
+               {
+                       Statement est;
+                       est.keyword="__enum";
+                       est.args.push_back(next_enum_id);
+                       est.args.push_back(i->get_raw());
+                       write_(est);
+
+                       enums[i->get_raw()]=next_enum_id++;
+               }
+
+       for(list<Statement>::const_iterator i=st.sub.begin(); i!=st.sub.end(); ++i)
+               collect_keywords(*i);
+}
+
+void BinaryWriter::write_int(long long n)
+{
+       unsigned i=1;
+       for(; n>>(i*7); ++i);
+       for(; i--;)
+               out.put(n>>(i*7) & 0x7F | (i?0x80:0));
+}
+
+void BinaryWriter::write_string(const string &s)
+{
+       write_int(s.size());
+       out.write(s.data(), s.size());
+}
+
+void BinaryWriter::write_float(float f)
+{
+       union
+       {
+               float v;
+               char d[sizeof(float)];
+       };
+
+       v=f;
+#if BYTE_ORDER == LITTLE_ENDIAN
+       for(unsigned i=sizeof(float); i--;)
+               out.put(d[i]);
+#else
+       for(unsigned i=0; i<sizeof(float); ++i)
+               out.put(d[i]);
+#endif
+}
+
+void BinaryWriter::write_enum(const string &e)
+{
+       EnumMap::const_iterator i=enums.find(e);
+       if(i==enums.end())
+               throw InvalidParameterValue("Unknown enum");
+       write_int(i->second);
+}
+
+} // namespace DataFile
+} // namespace Msp
diff --git a/source/binarywriter.h b/source/binarywriter.h
new file mode 100644 (file)
index 0000000..cf8f9fc
--- /dev/null
@@ -0,0 +1,45 @@
+/* $Id$
+
+This file is part of libmspdatafile
+Copyright © 2006  Mikko Rasa, Mikkosoft Productions
+Distributed under the LGPL
+*/
+
+#ifndef MSP_DATAFILE_BINARYWRITER_H_
+#define MSP_DATAFILE_BINARYWRITER_H_
+
+#include <map>
+#include "binarydict.h"
+#include "writermode.h"
+
+namespace Msp {
+namespace DataFile {
+
+class BinaryWriter: public WriterMode
+{
+private:
+       typedef std::map<DictEntry, unsigned> Dictionary;
+       typedef std::map<std::string, unsigned> EnumMap;
+
+       Dictionary dict;
+       unsigned next_st_id;
+       EnumMap enums;
+       unsigned next_enum_id;
+
+public:
+       BinaryWriter(std::ostream &o);
+       void write(const Statement &st);
+private:
+       void write_(const Statement &st);
+       DictEntry create_entry(const Statement &st);
+       void collect_keywords(const Statement &st);
+       void write_int(long long n);
+       void write_string(const std::string &s);
+       void write_float(float f);
+       void write_enum(const std::string &e);
+};
+
+} // namespace DataFile
+} // namespace Msp
+
+#endif
index 8fccec2f94c208d93f966ec88f4279eeb133b081..82ac0069b95d4f22e3d0de1ab0c0b66b04c76423 100644 (file)
@@ -6,10 +6,10 @@ Distributed under the LGPL
 */
 #include <cctype>
 #include <sstream>
-#include "error.h"
+#include "binaryparser.h"
 #include "parser.h"
 #include "statement.h"
-#include "token.h"
+#include "textparser.h"
 
 using namespace std;
 
@@ -19,9 +19,15 @@ namespace DataFile {
 Parser::Parser(istream &i, const string &s):
        in(i),
        src(s),
-       good(true)
+       good(true),
+       mode(new TextParser(in, src))
 { }
 
+Parser::~Parser()
+{
+       delete mode;
+}
+
 Statement Parser::parse()
 {
        if(!good)
@@ -29,351 +35,28 @@ Statement Parser::parse()
 
        try
        {
-               return parse_(0);
-       }
-       catch(const Exception &e)
-       {
-               good=false;
-               throw;
-       }
-}
-
-Statement Parser::parse_(const Token *t)
-{
-       Statement result;
-       bool      sub=false;
-       bool      finish=false;
-
-       while(in)
-       {
-               Token token;
-               if(t)
-               {
-                       token=*t;
-                       t=0;
-               }
-               else
-                       token=parse_token();
-
-               if(result.keyword.empty())
-               {
-                       if(token.str.empty())
-                               break;
-                       else if(token.type!=Token::IDENTIFIER)
-                               throw ParseError(get_location()+": Syntax error at token '"+token.str+"' (expected an identifier)", src, in.get_line_number());
-                       result.keyword=token.str;
-                       result.valid=true;
-                       result.source=src;
-                       result.line=in.get_line_number();
-               }
-               else if(sub)
+               while(1)
                {
-                       if(token.str=="}")
+                       Statement st=mode->parse();
+                       if(st.keyword=="__bin")
                        {
-                               sub=false;
-                               finish=true;
+                               delete mode;
+                               mode=new BinaryParser(in, src);
                        }
-                       else
+                       else if(st.keyword=="__text")
                        {
-                               Statement ss=parse_(&token);
-                               result.sub.push_back(ss);
+                               delete mode;
+                               mode=new TextParser(in, src);
                        }
-               }
-               else if(finish)
-               {
-                       if(token.str!=";")
-                               throw ParseError(get_location()+": Syntax error at token '"+token.str+"' (Expected a ';')", src, in.get_line_number());
-                       break;
-               }
-               else if(token.str=="{")
-                       sub=true;
-               else if(token.str==";")
-                       break;
-               else if(token.type==Token::INTEGER)
-                       result.args.push_back(Value(Value::INTEGER, token.str));
-               else if(token.type==Token::FLOAT)
-                       result.args.push_back(Value(Value::FLOAT, token.str));
-               else if(token.type==Token::STRING)
-                       result.args.push_back(Value(Value::STRING, token.str));
-               else if(token.type==Token::IDENTIFIER)
-               {
-                       if(token.str=="true")
-                               result.args.push_back(Value(Value::BOOLEAN, "1"));
-                       else if(token.str=="false")
-                               result.args.push_back(Value(Value::BOOLEAN, "0"));
                        else
-                               result.args.push_back(Value(Value::ENUM, token.str));
-                       //result.args.push_back(resolve_identifiertoken.str);
+                               return st;
                }
-               else if(token.str=="")
-                       throw ParseError(src+": Unexcepted EOF", src, in.get_line_number());
-               else
-                       throw ParseError(get_location()+": Syntax error", src, in.get_line_number());
        }
-
-       return result;
-}
-
-Token Parser::parse_token()
-{
-       int c=0;
-       unsigned comment=0;
-
-       // Skip over comments and whitespace
-       while(in)
-       {
-               c=in.get();
-               int next=in.peek();
-
-               if(c=='/' && next=='/')
-                       comment=1;
-               else if(c=='/' && next=='*')
-                       comment=2;
-               else if(c=='\n' && comment==1)
-                       comment=0;
-               else if(c=='*' && next=='/' && comment==2)
-                       comment=3;
-               else if(comment==3)   // Skip the second character of block comment end
-                       comment=0;
-               else if(!isspace(c) && !comment)
-                       break;
-       }
-
-       if(comment)  // Didn't hit any non-whitespace
-               throw ParseError(src+": Unfinished comment", src, in.get_line_number());
-
-       enum ParseState
-       {
-               INIT,
-               SIGN,
-               FLOATEXPINIT,
-               FLOATEXPSIGN,
-               STRING,
-               ACCEPT,
-               ZERO,
-               DECIMAL,
-               HEXADECIMAL,
-               OCTAL,
-               FLOAT,
-               FLOATEXP,
-               IDENTIFIER
-       };
-
-       static Token::Type token_type[]=
-       {
-               Token::SPECIAL,
-               Token::SPECIAL,
-               Token::SPECIAL,
-               Token::SPECIAL,
-               Token::STRING,
-               Token::SPECIAL,
-               Token::INTEGER,
-               Token::INTEGER,
-               Token::INTEGER,
-               Token::INTEGER,
-               Token::FLOAT,
-               Token::FLOAT,
-               Token::IDENTIFIER
-       };
-
-       ParseState state=INIT;
-       string     buf;
-       bool       escape=false;
-
-       while(in)
-       {
-               if(state!=INIT)
-                       c=in.get();
-               int next=in.peek();
-
-               buf+=c;
-
-               switch(state)
-               {
-               case INIT:
-                       if(c=='0')
-                               state=ZERO;
-                       else if(c=='-' || c=='+')
-                               state=SIGN;
-                       else if(c=='.')
-                               state=FLOAT;
-                       else if(c=='"')
-                               state=STRING;
-                       else if(c=='{' || c=='}' || c==';')
-                               return Token(Token::SPECIAL, string(1, c));
-                       else if(isdigit(c))
-                               state=DECIMAL;
-                       else if(isalpha(c))
-                               state=IDENTIFIER;
-                       else
-                               parse_error(c, state);
-                       break;
-
-               case SIGN:
-                       if(c=='0')
-                               state=ZERO;
-                       else if(isdigit(c))
-                               state=DECIMAL;
-                       else if(c=='.')
-                               state=FLOAT;
-                       else
-                               parse_error(c, state);
-                       break;
-
-               case ZERO:
-                       if(c=='x')
-                               state=HEXADECIMAL;
-                       else if(isdigit(c))
-                               state=OCTAL;
-                       else if(c=='.')
-                               state=FLOAT;
-                       else
-                               parse_error(c, state);
-                       break;
-
-               case DECIMAL:
-                       if(c=='.')
-                               state=FLOAT;
-                       else if(!isdigit(c))
-                               parse_error(c, state);
-                       break;
-
-               case HEXADECIMAL:
-                       if(!isxdigit(c))
-                               parse_error(c, state);
-                       break;
-
-               case OCTAL:
-                       if(!isodigit(c))
-                               parse_error(c, state);
-                       break;
-
-               case FLOAT:
-                       if(c=='e' || c=='E')
-                               state=FLOATEXPINIT;
-                       else if(!isdigit(c))
-                               parse_error(c, state);
-                       break;
-
-               case FLOATEXPINIT:
-                       if(c=='+' || c=='-')
-                               state=FLOATEXPSIGN;
-                       else if(isdigit(c))
-                               state=FLOATEXP;
-                       else
-                               parse_error(c, state);
-                       break;
-
-               case FLOATEXPSIGN:
-                       if(isdigit(c))
-                               state=FLOATEXP;
-                       else
-                               parse_error(c, state);
-                       break;
-
-               case FLOATEXP:
-                       if(!isdigit(c))
-                               parse_error(c, state);
-                       break;
-
-               case STRING:
-                       if(c=='\\')
-                               escape=!escape;
-                       else if(c=='"' && !escape)
-                               return Token(Token::STRING, unescape_string(buf));
-                       else
-                               escape=false;
-                       break;
-
-               case IDENTIFIER:
-                       if(!isalpha(c) && !isdigit(c) && c!='_')
-                               parse_error(c, state);
-                       break;
-
-               default:
-                       throw Exception(get_location()+": Internal error (bad state)");
-               }
-
-               if(is_delimiter(next) && state>=ACCEPT)
-                       return Token(token_type[state], buf);
-       }
-
-       return Token(Token::SPECIAL, "");
-}
-
-bool Parser::is_delimiter(int c)
-{
-       return (isspace(c) || c=='{' || c=='}' || c==';' || c=='/');
-}
-
-bool Parser::isodigit(int c)
-{
-       return (c>='0' && c<='7');
-}
-
-string Parser::unescape_string(const string &str)
-{
-       string   result;
-       bool     escape=false;
-       unsigned hexcape=0;
-       for(string::const_iterator i=str.begin()+1; i!=str.end()-1; ++i)
+       catch(const Exception &e)
        {
-               if(escape)
-               {
-                       if(*i=='n')
-                               result+='\n';
-                       else if(*i=='t')
-                               result+='\t';
-                       else if(*i=='\\')
-                               result+='\\';
-                       else if(*i=='"')
-                               result+='"';
-                       else if(*i=='x')
-                               hexcape=0x100;
-                       else
-                               throw ParseError("Invalid escape", src, in.get_line_number());
-                       escape=false;
-               }
-               else if(hexcape)
-               {
-                       unsigned digit=0;
-                       if(*i>='0' && *i<='9')
-                               digit=*i-'0';
-                       else if(*i>='a' && *i<='f')
-                               digit=*i-'a'+10;
-                       else if(*i>='A' && *i<='F')
-                               digit=*i-'A'+10;
-                       else
-                               throw ParseError("Invalid hex digit", src, in.get_line_number());
-
-                       hexcape=(hexcape<<4)|digit;
-                       if(hexcape&0x10000)
-                       {
-                               result+=hexcape&0xFF;
-                               hexcape=0;
-                       }
-               }
-               else if(*i=='\\')
-                       escape=true;
-               else
-                       result+=*i;
+               good=false;
+               throw;
        }
-
-       return result;
-}
-
-string Parser::get_location()
-{
-       ostringstream ss;
-       ss<<src<<':'<<in.get_line_number();
-       return ss.str();
-}
-
-void Parser::parse_error(int c, int state)
-{
-       ostringstream ss;
-       ss<<get_location()<<": Parse error at '"<<(char)c<<"' (state "<<state<<')';
-       throw ParseError(ss.str(), src, in.get_line_number());
 }
 
 } // namespace DataFile
index e127c5b3653fbfd02e77bbec7c344e5061ed113e..5bec123ae74f7aeee3485dbbeeb226d0bad928b7 100644 (file)
@@ -1,9 +1,10 @@
 /* $Id$
 
 This file is part of libmspdatafile
-Copyright © 2006  Mikko Rasa, Mikkosoft Productions
+Copyright © 2006-2007  Mikko Rasa, Mikkosoft Productions
 Distributed under the LGPL
 */
+
 #ifndef MSP_DATAFILE_PARSER_H_
 #define MSP_DATAFILE_PARSER_H_
 
@@ -14,6 +15,7 @@ Distributed under the LGPL
 namespace Msp {
 namespace DataFile {
 
+class ParserMode;
 class Statement;
 struct Token;
 
@@ -21,20 +23,15 @@ class Parser
 {
 public:
        Parser(std::istream &, const std::string &);
+       ~Parser();
+
        Statement parse();
        operator bool() const { return in; }
 private:
        Input       in;
        std::string src;
        bool        good;
-
-       Statement parse_(const Token *);
-       Token parse_token();
-       bool  is_delimiter(int);
-       bool  isodigit(int);
-       std::string unescape_string(const std::string &);
-       std::string get_location();
-       void  parse_error(int, int);
+       ParserMode  *mode;
 };
 
 } // namespace DataFile
diff --git a/source/parsermode.h b/source/parsermode.h
new file mode 100644 (file)
index 0000000..824525d
--- /dev/null
@@ -0,0 +1,34 @@
+/* $Id$
+
+This file is part of libmspdatafile
+Copyright © 2007  Mikko Rasa, Mikkosoft Productions
+Distributed under the LGPL
+*/
+
+#ifndef MSP_DATAFILE_PARSERMODE_H_
+#define MSP_DATAFILE_PARSERMODE_H_
+
+#include "statement.h"
+
+namespace Msp {
+namespace DataFile {
+
+class Input;
+
+class ParserMode
+{
+protected:
+       Input &in;
+       std::string src;
+
+       ParserMode(Input &i, const std::string &s): in(i), src(s) { }
+public:
+       virtual ~ParserMode() { }
+
+       virtual Statement parse() =0;
+};
+
+} // namespace DataFile
+} // namespace Msp
+
+#endif
diff --git a/source/statement.cpp b/source/statement.cpp
new file mode 100644 (file)
index 0000000..2df8b4c
--- /dev/null
@@ -0,0 +1,23 @@
+/* $Id: statement.h 19 2007-08-21 14:11:23Z tdb $
+
+This file is part of libmspdatafile
+Copyright © 2006  Mikko Rasa, Mikkosoft Productions
+Distributed under the LGPL
+*/
+
+#include <msp/strings/formatter.h>
+#include "statement.h"
+
+namespace Msp {
+namespace DataFile {
+
+std::string Statement::get_location() const
+{
+       std::string result=source;
+       if(line)
+               result+=format(":%d", line);
+       return result;
+}
+
+} // namespace DataFile
+} // namespace Msp
index 6b3c54b8b7989d386fff3c3cb98ef3354f5433ad..f2fa6e629bf7a1d50676bbe25b4f3096e87310dc 100644 (file)
@@ -8,8 +8,6 @@ Distributed under the LGPL
 #define MSP_DATAFILE_STATEMENT_H_
 
 #include <list>
-#include <sstream>
-#include <vector>
 #include "value.h"
 
 namespace Msp {
@@ -26,8 +24,7 @@ public:
        std::list<Statement> sub;
 
        Statement(): valid(false), line(0) { }
-       std::string get_location() const
-       { std::ostringstream ss; ss<<source<<':'<<line; return ss.str(); }
+       std::string get_location() const;
 };
 
 } // namespace DataFile
diff --git a/source/textparser.cpp b/source/textparser.cpp
new file mode 100644 (file)
index 0000000..298b7ca
--- /dev/null
@@ -0,0 +1,364 @@
+/* $Id$
+
+This file is part of libmspdatafile
+Copyright © 2007  Mikko Rasa, Mikkosoft Productions
+Distributed under the LGPL
+*/
+
+#include <msp/strings/formatter.h>
+#include "input.h"
+#include "textparser.h"
+#include "token.h"
+
+using namespace std;
+
+namespace Msp {
+namespace DataFile {
+
+TextParser::TextParser(Input &i, const string &s):
+       ParserMode(i, s)
+{ }
+
+Statement TextParser::parse()
+{
+       return parse_statement(0);
+}
+
+Statement TextParser::parse_statement(const Token *t)
+{
+       Statement result;
+       bool      sub=false;
+       bool      finish=false;
+
+       while(in)
+       {
+               Token token;
+               if(t)
+               {
+                       token=*t;
+                       t=0;
+               }
+               else
+                       token=parse_token();
+
+               if(result.keyword.empty())
+               {
+                       if(token.str.empty())
+                               break;
+                       else if(token.type!=Token::IDENTIFIER)
+                               throw ParseError(format("%s: Syntax error at token '%s' (expected an identifier)", get_location(), token.str), src, in.get_line_number());
+                       result.keyword=token.str;
+                       result.valid=true;
+                       result.source=src;
+                       result.line=in.get_line_number();
+               }
+               else if(sub)
+               {
+                       if(token.str=="}")
+                       {
+                               sub=false;
+                               finish=true;
+                       }
+                       else
+                       {
+                               Statement ss=parse_statement(&token);
+                               result.sub.push_back(ss);
+                       }
+               }
+               else if(finish)
+               {
+                       if(token.str!=";")
+                               throw ParseError(format("%s: Syntax error at token '%s' (Expected a ';')", get_location(), token.str), src, in.get_line_number());
+                       break;
+               }
+               else if(token.str=="{")
+                       sub=true;
+               else if(token.str==";")
+                       break;
+               else if(token.type==Token::INTEGER)
+                       result.args.push_back(Value(INTEGER, token.str));
+               else if(token.type==Token::FLOAT)
+                       result.args.push_back(Value(FLOAT, token.str));
+               else if(token.type==Token::STRING)
+                       result.args.push_back(Value(STRING, token.str));
+               else if(token.type==Token::IDENTIFIER)
+               {
+                       if(token.str=="true")
+                               result.args.push_back(Value(BOOLEAN, "1"));
+                       else if(token.str=="false")
+                               result.args.push_back(Value(BOOLEAN, "0"));
+                       else
+                               result.args.push_back(Value(ENUM, token.str));
+                       //result.args.push_back(resolve_identifiertoken.str);
+               }
+               else if(token.str=="")
+                       throw ParseError(src+": Unexcepted end of input", src, in.get_line_number());
+               else
+                       throw ParseError(get_location()+": Syntax error", src, in.get_line_number());
+       }
+
+       return result;
+}
+
+Token TextParser::parse_token()
+{
+       int c=0;
+       unsigned comment=0;
+
+       // Skip over comments and whitespace
+       while(in)
+       {
+               c=in.get();
+               int next=in.peek();
+
+               if(c=='/' && next=='/')
+                       comment=1;
+               else if(c=='/' && next=='*')
+                       comment=2;
+               else if(c=='\n' && comment==1)
+                       comment=0;
+               else if(c=='*' && next=='/' && comment==2)
+                       comment=3;
+               else if(comment==3)   // Skip the second character of block comment end
+                       comment=0;
+               else if(!isspace(c) && !comment)
+                       break;
+       }
+
+       if(comment)  // Didn't hit any non-whitespace
+               throw ParseError(src+": Unfinished comment at end of input", src, in.get_line_number());
+
+       enum ParseState
+       {
+               INIT,
+               SIGN,
+               FLOATEXPINIT,
+               FLOATEXPSIGN,
+               STRING,
+               ACCEPT,
+               ZERO,
+               DECIMAL,
+               HEXADECIMAL,
+               OCTAL,
+               FLOAT,
+               FLOATEXP,
+               IDENTIFIER
+       };
+
+       static Token::Type token_type[]=
+       {
+               Token::SPECIAL,
+               Token::SPECIAL,
+               Token::SPECIAL,
+               Token::SPECIAL,
+               Token::STRING,
+               Token::SPECIAL,
+               Token::INTEGER,
+               Token::INTEGER,
+               Token::INTEGER,
+               Token::INTEGER,
+               Token::FLOAT,
+               Token::FLOAT,
+               Token::IDENTIFIER
+       };
+
+       ParseState state=INIT;
+       string     buf;
+       bool       escape=false;
+
+       while(in)
+       {
+               if(state!=INIT)
+                       c=in.get();
+               int next=in.peek();
+
+               buf+=c;
+
+               switch(state)
+               {
+               case INIT:
+                       if(c=='0')
+                               state=ZERO;
+                       else if(c=='-' || c=='+')
+                               state=SIGN;
+                       else if(c=='.')
+                               state=FLOAT;
+                       else if(c=='"')
+                               state=STRING;
+                       else if(c=='{' || c=='}' || c==';')
+                               return Token(Token::SPECIAL, string(1, c));
+                       else if(isdigit(c))
+                               state=DECIMAL;
+                       else if(isalpha(c) || c=='_')
+                               state=IDENTIFIER;
+                       else
+                               parse_error(c, state);
+                       break;
+
+               case SIGN:
+                       if(c=='0')
+                               state=ZERO;
+                       else if(isdigit(c))
+                               state=DECIMAL;
+                       else if(c=='.')
+                               state=FLOAT;
+                       else
+                               parse_error(c, state);
+                       break;
+
+               case ZERO:
+                       if(c=='x')
+                               state=HEXADECIMAL;
+                       else if(isdigit(c))
+                               state=OCTAL;
+                       else if(c=='.')
+                               state=FLOAT;
+                       else
+                               parse_error(c, state);
+                       break;
+
+               case DECIMAL:
+                       if(c=='.')
+                               state=FLOAT;
+                       else if(!isdigit(c))
+                               parse_error(c, state);
+                       break;
+
+               case HEXADECIMAL:
+                       if(!isxdigit(c))
+                               parse_error(c, state);
+                       break;
+
+               case OCTAL:
+                       if(!isodigit(c))
+                               parse_error(c, state);
+                       break;
+
+               case FLOAT:
+                       if(c=='e' || c=='E')
+                               state=FLOATEXPINIT;
+                       else if(!isdigit(c))
+                               parse_error(c, state);
+                       break;
+
+               case FLOATEXPINIT:
+                       if(c=='+' || c=='-')
+                               state=FLOATEXPSIGN;
+                       else if(isdigit(c))
+                               state=FLOATEXP;
+                       else
+                               parse_error(c, state);
+                       break;
+
+               case FLOATEXPSIGN:
+                       if(isdigit(c))
+                               state=FLOATEXP;
+                       else
+                               parse_error(c, state);
+                       break;
+
+               case FLOATEXP:
+                       if(!isdigit(c))
+                               parse_error(c, state);
+                       break;
+
+               case STRING:
+                       if(c=='\\')
+                               escape=!escape;
+                       else if(c=='"' && !escape)
+                               return Token(Token::STRING, unescape_string(buf));
+                       else
+                               escape=false;
+                       break;
+
+               case IDENTIFIER:
+                       if(!isalpha(c) && !isdigit(c) && c!='_')
+                               parse_error(c, state);
+                       break;
+
+               default:
+                       throw Exception(get_location()+": Internal error (bad state)");
+               }
+
+               if(is_delimiter(next) && state>=ACCEPT)
+                       return Token(token_type[state], buf);
+       }
+
+       return Token(Token::SPECIAL, "");
+}
+
+bool TextParser::is_delimiter(int c)
+{
+       return (isspace(c) || c=='{' || c=='}' || c==';' || c=='/');
+}
+
+bool TextParser::isodigit(int c)
+{
+       return (c>='0' && c<='7');
+}
+
+string TextParser::unescape_string(const string &str)
+{
+       string   result;
+       bool     escape=false;
+       unsigned hexcape=0;
+       for(string::const_iterator i=str.begin()+1; i!=str.end()-1; ++i)
+       {
+               if(escape)
+               {
+                       if(*i=='n')
+                               result+='\n';
+                       else if(*i=='t')
+                               result+='\t';
+                       else if(*i=='\\')
+                               result+='\\';
+                       else if(*i=='"')
+                               result+='"';
+                       else if(*i=='x')
+                               hexcape=0x100;
+                       else
+                               throw ParseError(format("%s: Invalid escape sequence '\\%c'", get_location(), *i), src, in.get_line_number());
+                       escape=false;
+               }
+               else if(hexcape)
+               {
+                       unsigned digit=0;
+                       if(*i>='0' && *i<='9')
+                               digit=*i-'0';
+                       else if(*i>='a' && *i<='f')
+                               digit=*i-'a'+10;
+                       else if(*i>='A' && *i<='F')
+                               digit=*i-'A'+10;
+                       else
+                               throw ParseError(get_location()+": Invalid hex digit", src, in.get_line_number());
+
+                       hexcape=(hexcape<<4)|digit;
+                       if(hexcape&0x10000)
+                       {
+                               result+=hexcape&0xFF;
+                               hexcape=0;
+                       }
+               }
+               else if(*i=='\\')
+                       escape=true;
+               else
+                       result+=*i;
+       }
+
+       return result;
+}
+
+string TextParser::get_location()
+{
+       ostringstream ss;
+       ss<<src<<':'<<in.get_line_number();
+       return ss.str();
+}
+
+void TextParser::parse_error(int c, int state)
+{
+       throw ParseError(format("%s: Parse error at '%c' (state %d)", get_location(), static_cast<char>(c), state), src, in.get_line_number());
+}
+
+} // namespace DataFile
+} // namespace Msp
diff --git a/source/textparser.h b/source/textparser.h
new file mode 100644 (file)
index 0000000..9dfcf6d
--- /dev/null
@@ -0,0 +1,36 @@
+/* $Id$
+
+This file is part of libmspdatafile
+Copyright © 2007  Mikko Rasa, Mikkosoft Productions
+Distributed under the LGPL
+*/
+
+#ifndef MSP_DATAFILE_TEXTPARSER_H_
+#define MSP_DATAFILE_TEXTPARSER_H_
+
+#include "parsermode.h"
+
+namespace Msp {
+namespace DataFile {
+
+class Token;
+
+class TextParser: public ParserMode
+{
+public:
+       TextParser(Input &, const std::string &);
+       virtual Statement parse();
+protected:
+       Statement parse_statement(const Token *);
+       Token parse_token();
+       bool  is_delimiter(int);
+       bool  isodigit(int);
+       std::string unescape_string(const std::string &);
+       std::string get_location();
+       void  parse_error(int, int);
+};
+
+} // namespace DataFile
+} // namespace Msp
+
+#endif
diff --git a/source/textwriter.cpp b/source/textwriter.cpp
new file mode 100644 (file)
index 0000000..68111c0
--- /dev/null
@@ -0,0 +1,51 @@
+/* $Id$
+
+This file is part of libmspdatafile
+Copyright © 2006  Mikko Rasa, Mikkosoft Productions
+Distributed under the LGPL
+*/
+
+#include "statement.h"
+#include "textwriter.h"
+
+using namespace std;
+
+namespace Msp {
+namespace DataFile {
+
+TextWriter::TextWriter(ostream &o):
+       WriterMode(o)
+{ }
+
+void TextWriter::write(const Statement &st)
+{
+       write_(st, 0);
+}
+
+void TextWriter::write_(const Statement &st, unsigned level)
+{
+       string indent(level, '\t');
+
+       out<<indent<<st.keyword;
+       for(ValueArray::const_iterator i=st.args.begin(); i!=st.args.end(); ++i)
+       {
+               out<<' ';
+               if(i->get_type()==STRING)
+                       out<<'\"'<<i->get_raw()<<'\"';
+               else if(i->get_type()==BOOLEAN)
+                       out<<(i->get<bool>() ? "true" : "false");
+               else
+                       out<<i->get_raw();
+       }
+       if(!st.sub.empty())
+       {
+               out<<'\n'<<indent<<"{\n";
+               for(list<Statement>::const_iterator i=st.sub.begin(); i!=st.sub.end(); ++i)
+                       write_(*i, level+1);
+               out<<indent<<'}';
+       }
+       out<<";\n";
+}
+
+} // namespace DataFile
+} // namespace Msp
diff --git a/source/textwriter.h b/source/textwriter.h
new file mode 100644 (file)
index 0000000..5c83b7a
--- /dev/null
@@ -0,0 +1,28 @@
+/* $Id$
+
+This file is part of libmspdatafile
+Copyright © 2006  Mikko Rasa, Mikkosoft Productions
+Distributed under the LGPL
+*/
+
+#ifndef MSP_DATAFILE_TEXTWRITER_H_
+#define MSP_DATAFILE_TEXTWRITER_H_
+
+#include "writermode.h"
+
+namespace Msp {
+namespace DataFile {
+
+class TextWriter: public WriterMode
+{
+public:
+       TextWriter(std::ostream &o);
+       void write(const Statement &st);
+private:
+       void write_(const Statement &st, unsigned);
+};
+
+} // namespace DataFile
+} // namespace Msp
+
+#endif
index f809c8fe4d3001cfba4660f45b9e9556a15d3ab8..fda83e494db884a228036fafdf3f6ba5c3ec9439 100644 (file)
@@ -7,54 +7,61 @@ Distributed under the LGPL
 #ifndef MSP_DATAFILE_VALUE_H_
 #define MSP_DATAFILE_VALUE_H_
 
-#include <sstream>
-#include <string>
 #include <vector>
+#include <msp/strings/lexicalcast.h>
 #include "error.h"
 
 namespace Msp {
 namespace DataFile {
 
+enum Type
+{
+       INTEGER,
+       FLOAT,
+       STRING,
+       BOOLEAN,
+       ENUM
+};
+
+template<typename T> struct TypeResolver       { static const Type type=ENUM; };
+template<> struct TypeResolver<short>          { static const Type type=INTEGER; };
+template<> struct TypeResolver<unsigned short> { static const Type type=INTEGER; };
+template<> struct TypeResolver<int>            { static const Type type=INTEGER; };
+template<> struct TypeResolver<unsigned>       { static const Type type=INTEGER; };
+template<> struct TypeResolver<long>           { static const Type type=INTEGER; };
+template<> struct TypeResolver<unsigned long>  { static const Type type=INTEGER; };
+template<> struct TypeResolver<long long>      { static const Type type=INTEGER; };
+template<> struct TypeResolver<unsigned long long> { static const Type type=INTEGER; };
+template<> struct TypeResolver<float>          { static const Type type=FLOAT; };
+template<> struct TypeResolver<double>         { static const Type type=FLOAT; };
+template<> struct TypeResolver<bool>           { static const Type type=BOOLEAN; };
+template<> struct TypeResolver<std::string>    { static const Type type=STRING; };
+template<typename T> struct TypeResolver<const T>   { static const Type type=TypeResolver<T>::type; };
+template<typename T> struct TypeResolver<T &>       { static const Type type=TypeResolver<T>::type; };
+template<typename T> struct TypeResolver<const T &> { static const Type type=TypeResolver<T>::type; };
+
 class Value
 {
 public:
-       enum Type
-       {
-               INTEGER,
-               FLOAT,
-               STRING,
-               BOOLEAN,
-               ENUM
-       };
-
        Value(Type t, const std::string &d): type(t), data(d) { }
+
+       template<typename T>
+       Value(T d): type(TypeResolver<T>::type), data(lexical_cast(d)) { }
+
        template<typename T>
        T get() const;
+
+       Type get_type() const { return type; }
+       const std::string &get_raw() const { return data; }
 private:
        Type type;
        std::string data;
 };
-typedef std::vector<Value> ValueArray;
-
-template<typename T> struct TypeResolver { static const Value::Type type=Value::ENUM; };
-
-template<> struct TypeResolver<short>          { static const Value::Type type=Value::INTEGER; };
-template<> struct TypeResolver<unsigned short> { static const Value::Type type=Value::INTEGER; };
-template<> struct TypeResolver<int>            { static const Value::Type type=Value::INTEGER; };
-template<> struct TypeResolver<unsigned>       { static const Value::Type type=Value::INTEGER; };
-template<> struct TypeResolver<long>           { static const Value::Type type=Value::INTEGER; };
-template<> struct TypeResolver<unsigned long>  { static const Value::Type type=Value::INTEGER; };
-template<> struct TypeResolver<float>          { static const Value::Type type=Value::FLOAT; };
-template<> struct TypeResolver<double>         { static const Value::Type type=Value::FLOAT; };
-template<> struct TypeResolver<bool>           { static const Value::Type type=Value::BOOLEAN; };
 
-template<Value::Type T> inline bool check_type(Value::Type) { return false; }
+typedef std::vector<Value> ValueArray;
 
-template<> inline bool check_type<Value::INTEGER>(Value::Type t) { return t==Value::INTEGER; }
-template<> inline bool check_type<Value::FLOAT>(Value::Type t)   { return t==Value::INTEGER || t==Value::FLOAT; }
-template<> inline bool check_type<Value::BOOLEAN>(Value::Type t) { return t==Value::BOOLEAN; }
-template<> inline bool check_type<Value::STRING>(Value::Type t)  { return t==Value::STRING; }
-template<> inline bool check_type<Value::ENUM>(Value::Type t)    { return t==Value::ENUM; }
+template<Type T> inline bool check_type(Type t)  { return t==T; }
+template<> inline bool check_type<FLOAT>(Type t) { return t==INTEGER || t==FLOAT; }
 
 template<typename T>
 inline T Value::get() const
@@ -76,7 +83,7 @@ template<>
 inline std::string Value::get<std::string>() const
 {
        if(type!=STRING)
-               throw TypeError("Value is not a string");
+               throw TypeError("Type mismatch");
        return data;
 }
 
@@ -84,7 +91,7 @@ template<>
 inline const std::string &Value::get<const std::string&>() const
 {
        if(type!=STRING)
-               throw TypeError("Value is not a string");
+               throw TypeError("Type mismatch");
        return data;
 }
 
diff --git a/source/writer.cpp b/source/writer.cpp
new file mode 100644 (file)
index 0000000..cdd78b6
--- /dev/null
@@ -0,0 +1,52 @@
+/* $Id$
+
+This file is part of libmspdatafile
+Copyright © 2006  Mikko Rasa, Mikkosoft Productions
+Distributed under the LGPL
+*/
+
+#include "binarywriter.h"
+#include "statement.h"
+#include "textwriter.h"
+#include "writer.h"
+
+using namespace std;
+
+namespace Msp {
+namespace DataFile {
+
+Writer::Writer(ostream &o):
+       out(o),
+       mode(new TextWriter(out)),
+       binary(false)
+{ }
+
+void Writer::write(const Statement &st)
+{
+       mode->write(st);
+}
+
+void Writer::set_binary(bool b)
+{
+       if(b==binary)
+               return;
+
+       binary=b;
+
+       Statement st;
+       if(binary)
+               st.keyword="__bin";
+       else
+               st.keyword="__text";
+
+       mode->write(st);
+
+       delete mode;
+       if(binary)
+               mode=new BinaryWriter(out);
+       else
+               mode=new TextWriter(out);
+}
+
+} // namespace DataFile
+} // namespace Msp
diff --git a/source/writer.h b/source/writer.h
new file mode 100644 (file)
index 0000000..756e19a
--- /dev/null
@@ -0,0 +1,37 @@
+/* $Id$
+
+This file is part of libmspdatafile
+Copyright © 2006  Mikko Rasa, Mikkosoft Productions
+Distributed under the LGPL
+*/
+
+#ifndef MSP_DATAFILE_WRITER_H_
+#define MSP_DATAFILE_WRITER_H_
+
+#include <map>
+#include <ostream>
+#include "binarydict.h"
+
+namespace Msp {
+namespace DataFile {
+
+class Statement;
+class WriterMode;
+
+class Writer
+{
+private:
+       std::ostream &out;
+       WriterMode *mode;
+       bool binary;
+
+public:
+       Writer(std::ostream &);
+       void write(const Statement &);
+       void set_binary(bool);
+};
+
+} // namespace DataFile
+} // namespace Msp
+
+#endif
diff --git a/source/writermode.h b/source/writermode.h
new file mode 100644 (file)
index 0000000..e5853ce
--- /dev/null
@@ -0,0 +1,33 @@
+/* $Id$
+
+This file is part of libmspdatafile
+Copyright © 2006  Mikko Rasa, Mikkosoft Productions
+Distributed under the LGPL
+*/
+
+#ifndef MSP_DATAFILE_WRITERMODE_H_
+#define MSP_DATAFILE_WRITERMODE_H_
+
+#include <ostream>
+
+namespace Msp {
+namespace DataFile {
+
+class Statement;
+
+class WriterMode
+{
+protected:
+       std::ostream &out;
+
+       WriterMode(std::ostream &o): out(o) { }
+public:
+       virtual ~WriterMode() { }
+
+       virtual void write(const Statement &st) =0;
+};
+
+} // namespace DataFile
+} // namespace Msp
+
+#endif
diff --git a/tool.cpp b/tool.cpp
new file mode 100644 (file)
index 0000000..533abb0
--- /dev/null
+++ b/tool.cpp
@@ -0,0 +1,75 @@
+/* $Id$ */
+#include <fstream>
+#include <iostream>
+#include <msp/core/application.h>
+#include <msp/core/getopt.h>
+#include "source/parser.h"
+#include "source/statement.h"
+#include "source/writer.h"
+
+using namespace std;
+using namespace Msp;
+
+class DataTool: public Application
+{
+private:
+       string in_fn;
+       string out_fn;
+       bool binary;
+public:
+       DataTool(int argc, char **argv);
+       int main();
+
+       static Application::RegApp<DataTool> reg;
+};
+
+
+DataTool::DataTool(int argc, char **argv):
+       out_fn("-")
+{
+       GetOpt getopt;
+       getopt.add_option('o', "output", out_fn, GetOpt::REQUIRED_ARG);
+       getopt.add_option('b', "binary", binary, GetOpt::NO_ARG);
+       getopt(argc, argv);
+
+       const vector<string> &args=getopt.get_args();
+       if(args.empty())
+               throw UsageError("Must give input filename");
+
+       in_fn=args[0];
+}
+
+int DataTool::main()
+{
+       ifstream in(in_fn.c_str());
+       if(!in)
+       {
+               cerr<<"Couldn't open input file\n";
+               return 1;
+       }
+
+       ostream *out;
+       if(out_fn=="-")
+               out=&cout;
+       else
+               out=new ofstream(out_fn.c_str());
+
+       DataFile::Parser parser(in, in_fn);
+       DataFile::Writer writer(*out);
+       if(binary)
+               writer.set_binary(true);
+
+       while(parser)
+       {
+               DataFile::Statement st=parser.parse();
+               if(st.valid)
+                       writer.write(st);
+       }
+
+       if(out!=&cout)
+               delete out;
+
+       return 0;
+}
+
+Application::RegApp<DataTool> DataTool::reg;