]> git.tdb.fi Git - libs/datafile.git/commitdiff
More efficient way of loading binary files
authorMikko Rasa <tdb@tdb.fi>
Mon, 5 Aug 2013 11:56:26 +0000 (14:56 +0300)
committerMikko Rasa <tdb@tdb.fi>
Mon, 5 Aug 2013 11:59:29 +0000 (14:59 +0300)
The Variant array for storing arguments in Statement is too slow.  Bypass
it by passing args directly to the action in a sort of ad-hoc struct.

13 files changed:
source/argumentstore.cpp [new file with mode: 0644]
source/argumentstore.h [new file with mode: 0644]
source/binaryparser.cpp
source/binaryparser.h
source/loader.cpp
source/loader.h
source/loaderaction.h
source/parser.cpp
source/parser.h
source/parsermode.h
source/statement.cpp
source/statement.h
source/type.h

diff --git a/source/argumentstore.cpp b/source/argumentstore.cpp
new file mode 100644 (file)
index 0000000..49bdbad
--- /dev/null
@@ -0,0 +1,37 @@
+#include "argumentstore.h"
+
+namespace Msp {
+namespace DataFile {
+
+ArgumentStore::ArgumentStore(const StatementInfo &n):
+       info(n),
+       store(new char[info.args_size])
+{
+       for(unsigned i=0; i<info.key.signature.size(); ++i)
+               switch(info.key.signature[i])
+               {
+               case StringType::signature:
+                       new(store+info.arg_offsets[i]) StringType::Store;
+                       break;
+               case SymbolType::signature:
+                       new(store+info.arg_offsets[i]) SymbolType::Store;
+                       break;
+               }
+}
+
+ArgumentStore::~ArgumentStore()
+{
+       for(unsigned i=0; i<info.key.signature.size(); ++i)
+               switch(info.key.signature[i])
+               {
+               case StringType::signature:
+                       reinterpret_cast<StringType::Store *>(store+info.arg_offsets[i])->~basic_string();
+                       break;
+               case SymbolType::signature:
+                       reinterpret_cast<SymbolType::Store *>(store+info.arg_offsets[i])->~Symbol();
+                       break;
+               }
+}
+
+} // namespace DataFile
+} // namespace Msp
diff --git a/source/argumentstore.h b/source/argumentstore.h
new file mode 100644 (file)
index 0000000..c2d5d76
--- /dev/null
@@ -0,0 +1,56 @@
+#ifndef MSP_DATAFILE_ARGUMENTSTORE_H_
+#define MSP_DATAFILE_ARGUMENTSTORE_H_
+
+#include "statement.h"
+
+namespace Msp {
+namespace DataFile {
+
+class ArgumentStore
+{
+private:
+       const StatementInfo &info;
+       char *store;
+
+public:
+       ArgumentStore(const StatementInfo &);
+       ~ArgumentStore();
+
+       const StatementInfo &get_info() const { return info; }
+
+       template<typename T>
+       void set(unsigned i, const T &v)
+       {
+               *reinterpret_cast<typename TypeInfo<T>::Store *>(store+info.arg_offsets[i]) = v;
+       }
+
+       template<typename T>
+       typename TypeInfo<T>::Load get(unsigned i) const
+       {
+               return extract<typename TypeInfo<T>::Store>(store+info.arg_offsets[i], info.key.signature[i]);
+       }
+
+private:
+       template<typename T>
+       T extract(const char *, char) const;
+};
+
+template<typename T>
+inline T ArgumentStore::extract(const char *p, char) const
+{
+       return *reinterpret_cast<const T *>(p);
+}
+
+template<>
+inline FloatType::Store ArgumentStore::extract<FloatType::Store>(const char *p, char s) const
+{
+       if(s==IntType::signature)
+               return *reinterpret_cast<const IntType::Store *>(p);
+       else
+               return *reinterpret_cast<const FloatType::Store *>(p);
+}
+
+} // namespace DataFile
+} // namespace Msp
+
+#endif
index 098a5fd4f96c936cad7aae6bfba31b0627c357ef..051133f79a32cecb540c021f87b6930f5e168566 100644 (file)
@@ -2,9 +2,11 @@
 #include <sys/param.h>
 #include <msp/core/maputils.h>
 #include <msp/strings/format.h>
+#include "argumentstore.h"
 #include "binaryparser.h"
 #include "binfloat.h"
 #include "input.h"
+#include "loaderaction.h"
 
 using namespace std;
 
@@ -21,31 +23,48 @@ public:
        virtual ~bad_definition() throw() { }
 };
 
+class nesting_error: public logic_error
+{
+public:
+       nesting_error(const std::string &w):
+               logic_error(w)
+       { }
+
+       virtual ~nesting_error() throw() { }
+};
+
 
 BinaryParser::BinaryParser(Input &i, const string &s):
        ParserMode(i, s),
-       float_precision(32)
+       float_precision(32),
+       cur_info(0)
 {
-       dict[-1] = StatementKey("__kwd", "iss");
-       dict[-2] = StatementKey("__str", "is");
-       dict[-3] = StatementKey("__flt", "i");
+       dict[-1] = StatementInfo("__kwd", "iss");
+       dict[-2] = StatementInfo("__str", "is");
+       dict[-3] = StatementInfo("__flt", "i");
 }
 
 Statement BinaryParser::parse()
 {
-       int id = parse_int();
-       if(!in)
-               return Statement();
+       const StatementKey *key;
+       if(cur_info)
+               key = &cur_info->key;
+       else
+       {
+               int id = parse_int();
+               if(!in)
+                       return Statement();
 
-       const StatementKey &key = get_item(dict, id);
+               key = &get_item(dict, id).key;
+       }
 
        Statement result;
-       result.keyword = key.keyword;
+       result.keyword = key->keyword;
        result.source = src;
 
-       for(unsigned j=0; j<key.signature.size(); ++j)
+       for(unsigned j=0; j<key->signature.size(); ++j)
        {
-               switch(key.signature[j])
+               switch(key->signature[j])
                {
                case IntType::signature:
                        result.args.push_back(parse_int());
@@ -65,11 +84,15 @@ Statement BinaryParser::parse()
                }
        }
 
+       if(!sub_remaining.empty())
+               --sub_remaining.back();
+
        unsigned nsub = parse_int();
        for(unsigned j = 0; j<nsub; ++j)
                result.sub.push_back(parse());
 
        result.valid = true;
+       cur_info = 0;
 
        return result;
 }
@@ -89,7 +112,7 @@ void BinaryParser::process_control_statement(const Statement &st)
                                if(!valid_signatures[j])
                                        throw bad_definition("__kwd");
 
-               dict[id] = StatementKey(kw, args);
+               dict[id] = StatementInfo(kw, args);
        }
        else if(st.keyword=="__str")
        {
@@ -103,6 +126,73 @@ void BinaryParser::process_control_statement(const Statement &st)
                float_precision = st.args[0].get<unsigned>();
 }
 
+const StatementKey *BinaryParser::peek(unsigned level)
+{
+       if(level>sub_remaining.size())
+               throw nesting_error("bad level");
+       while(level<sub_remaining.size())
+       {
+               // Discard any substatements that haven't been parsed yet
+               for(unsigned i=sub_remaining.back(); i-->0; )
+                       parse();
+               sub_remaining.pop_back();
+               cur_info = 0;
+       }
+
+       if(!sub_remaining.empty() && sub_remaining.back()==0)
+       {
+               // No more substatements on this level
+               cur_info = 0;
+               return 0;
+       }
+
+       if(cur_info)
+               return &cur_info->key;
+
+       int id = parse_int();
+       if(!in)
+               return 0;
+
+       cur_info = &get_item(dict, id);
+       return &cur_info->key;
+}
+
+bool BinaryParser::parse_and_load(unsigned level, Loader &ldr, const LoaderAction &act)
+{
+       if(!cur_info && !peek(level))
+               return false;
+
+       ArgumentStore args(*cur_info);
+       for(unsigned i=0; i<cur_info->key.signature.size(); ++i)
+               switch(cur_info->key.signature[i])
+               {
+               case IntType::signature:
+                       args.set(i, parse_int());
+                       break;
+               case FloatType::signature:
+                       args.set(i, parse_float());
+                       break;
+               case BoolType::signature:
+                       args.set(i, parse_bool());
+                       break;
+               case StringType::signature:
+                       args.set(i, parse_string());
+                       break;
+               case SymbolType::signature:
+                       args.set(i, parse_symbol());
+                       break;
+               }
+
+       if(!sub_remaining.empty())
+               --sub_remaining.back();
+       sub_remaining.push_back(parse_int());
+       cur_info = 0;
+
+       act.execute(ldr, args);
+
+       return true;
+}
+
 IntType::Store BinaryParser::parse_int()
 {
        IntType::Store result = 0;
index 255209a971bed7fdd5e7149e7a9dd20a8fb30cd4..dfb0de995c7092dd60c9ebc19d6d636b408de84b 100644 (file)
@@ -14,18 +14,24 @@ Parses data in binary format.
 class BinaryParser: public ParserMode
 {
 private:
-       typedef std::map<int, StatementKey> Dictionary;
+       typedef std::map<int, StatementInfo> Dictionary;
        typedef std::map<unsigned, std::string> StringMap;
 
        Dictionary dict;
        StringMap strings;
        unsigned float_precision;
+       StatementInfo *cur_info;
+       std::vector<unsigned> sub_remaining;
 
 public:
        BinaryParser(Input &i, const std::string &s);
 
        virtual Statement parse();
        virtual void process_control_statement(const Statement &);
+
+       virtual const StatementKey *peek(unsigned);
+       virtual bool parse_and_load(unsigned, Loader &, const LoaderAction &);
+
 private:
        IntType::Store parse_int();
        FloatType::Store parse_float();
index c2de4011232c93da747bfdead7df18af72c6087d..e1a0d4abd7bcd80adfa4040c2a9bdf43ffb43ee7 100644 (file)
@@ -71,6 +71,7 @@ public:
 
 Loader::Loader():
        cur_st(0),
+       direct(false),
        check_sub_loads(false)
 { }
 
@@ -84,9 +85,15 @@ void Loader::load(Parser &p)
 {
        while(p)
        {
-               Statement st = p.parse();
-               if(st.valid)
-                       load_statement(st);
+               if(p.peek(0))
+                       load_direct(p, 0);
+               else
+               {
+                       // Parse in raw mode so we can peek immediately after a mode change
+                       Statement st = p.parse(true);
+                       if(st.valid && !st.control)
+                               load_statement(st);
+               }
        }
        finish();
 }
@@ -98,6 +105,29 @@ void Loader::load(const Statement &st)
        finish();
 }
 
+void Loader::load_direct(Parser &p, unsigned l)
+{
+       SetForScope<Parser *> set_parser(cur_parser, &p);
+       SetForScope<unsigned> set_level(cur_level, l);
+
+       while(p)
+       {
+               const StatementKey *key = p.peek(l);
+               if(!key)
+                       break;
+
+               LoaderAction *act = find_action(*key);
+               if(act)
+               {
+                       SetFlag set_direct(direct);
+                       if(!p.parse_and_load(l, *this, *act))
+                               throw logic_error("direct load failed");
+               }
+               else
+                       load_statement(p.parse());
+       }
+}
+
 void Loader::load_statement(const Statement &st)
 {
        SetForScope<const Statement *> set_cst(cur_st, &st);
@@ -134,11 +164,18 @@ void Loader::load_statement(const Statement &st)
 
 void Loader::load_sub_with(Loader &ldr)
 {
-       if(!cur_st)
+       if(direct)
+       {
+               ldr.load_direct(*cur_parser, cur_level+1);
+               ldr.finish();
+       }
+       else if(cur_st)
+       {
+               ldr.load(*cur_st);
+               sub_loaded = true;
+       }
+       else
                throw logic_error("no current statement");
-
-       ldr.load(*cur_st);
-       sub_loaded = true;
 }
 
 void Loader::add(const string &kwd, LoaderAction *act)
index bb1292cc7b3c2740adc14a6c596bde54c4a59678..8a1103d11f14d4c251033a20a1189a2a886599e8 100644 (file)
@@ -38,8 +38,11 @@ private:
        typedef std::map<StatementKey, LoaderAction *> ActionMap;
 
        ActionMap actions;
+       Parser *cur_parser;
+       unsigned cur_level;
        const Statement *cur_st;
        bool sub_loaded;
+       bool direct;
        std::list<Loader *> aux_loaders;
 protected:
        bool check_sub_loads;
@@ -55,6 +58,9 @@ private:
        /** Loads data from a statement. */
        void load(const Statement &st);
 
+       /** Loads statemsnts from a parser, feeding them directly to actions. */
+       void load_direct(Parser &, unsigned);
+
        /** Processes a single statement */
        void load_statement(const Statement &st);
 
index afca01e9ba3b478d95bd320e550f0cf7fbf08fff..82fd3b437902a3494167ae295c7f065184424a5b 100644 (file)
@@ -1,6 +1,7 @@
 #ifndef MSP_DATAFILE_LOADERACTION_H_
 #define MSP_DATAFILE_LOADERACTION_H_
 
+#include "argumentstore.h"
 #include "statement.h"
 
 namespace Msp {
@@ -21,6 +22,8 @@ public:
        /** Called to process a statement. */
        virtual void execute(Loader &, const Statement &) const = 0;
 
+       virtual void execute(Loader &, const ArgumentStore &) const = 0;
+
        virtual std::string get_signature() const = 0;
 };
 
@@ -44,6 +47,11 @@ public:
                (dynamic_cast<L &>(l).*func)();
        };
 
+       virtual void execute(Loader &l, const ArgumentStore &) const
+       {
+               (dynamic_cast<L &>(l).*func)();
+       };
+
        virtual std::string get_signature() const
        { return std::string(); }
 };
@@ -68,6 +76,11 @@ public:
                (dynamic_cast<L &>(l).*func)(st.args[0].get<A0>());
        }
 
+       virtual void execute(Loader &l, const ArgumentStore &as) const
+       {
+               (dynamic_cast<L &>(l).*func)(as.get<A0>(0));
+       }
+
        virtual std::string get_signature() const
        { return std::string(1, TypeInfo<A0>::signature); }
 };
@@ -96,6 +109,16 @@ public:
                (dynamic_cast<L &>(l).*func)(values);
        }
 
+       virtual void execute(Loader &l, const ArgumentStore &as) const
+       {
+               std::vector<A0> values;
+               unsigned n_args = as.get_info().key.signature.size();
+               values.reserve(n_args);
+               for(unsigned i=0; i<n_args; ++i)
+                       values.push_back(as.get<A0>(i));
+               (dynamic_cast<L &>(l).*func)(values);
+       }
+
        virtual std::string get_signature() const
        {
                std::string result;
@@ -125,6 +148,11 @@ public:
                (dynamic_cast<L &>(l).*func)(st);
        }
 
+       virtual void execute(Loader &, const ArgumentStore &) const
+       {
+               throw std::logic_error("incompatible format");
+       }
+
        virtual std::string get_signature() const
        { return "*"; }
 };
@@ -146,6 +174,11 @@ public:
                (dynamic_cast<L &>(l).*func)(st.args[0].get<A0>(), st.args[1].get<A1>());
        }
 
+       virtual void execute(Loader &l, const ArgumentStore &as) const
+       {
+               (dynamic_cast<L &>(l).*func)(as.get<A0>(0), as.get<A1>(1));
+       }
+
        virtual std::string get_signature() const
        {
                std::string result;
@@ -172,6 +205,11 @@ public:
                (dynamic_cast<L &>(l).*func)(st.args[0].get<A0>(), st.args[1].get<A1>(), st.args[2].get<A2>());
        }
 
+       virtual void execute(Loader &l, const ArgumentStore &as) const
+       {
+               (dynamic_cast<L &>(l).*func)(as.get<A0>(0), as.get<A1>(1), as.get<A2>(2));
+       }
+
        virtual std::string get_signature() const
        {
                std::string result;
@@ -199,6 +237,11 @@ public:
                (dynamic_cast<L &>(l).*func)(st.args[0].get<A0>(), st.args[1].get<A1>(), st.args[2].get<A2>(), st.args[3].get<A3>());
        }
 
+       virtual void execute(Loader &l, const ArgumentStore &as) const
+       {
+               (dynamic_cast<L &>(l).*func)(as.get<A0>(0), as.get<A1>(1), as.get<A2>(2), as.get<A3>(3));
+       }
+
        virtual std::string get_signature() const
        {
                std::string result;
@@ -227,6 +270,11 @@ public:
                (dynamic_cast<L &>(l).*func)(st.args[0].get<A0>(), st.args[1].get<A1>(), st.args[2].get<A2>(), st.args[3].get<A3>(), st.args[4].get<A4>());
        }
 
+       virtual void execute(Loader &l, const ArgumentStore &as) const
+       {
+               (dynamic_cast<L &>(l).*func)(as.get<A0>(0), as.get<A1>(1), as.get<A2>(2), as.get<A3>(3), as.get<A4>(4));
+       }
+
        virtual std::string get_signature() const
        {
                std::string result;
@@ -256,6 +304,11 @@ public:
                dynamic_cast<typename L::Loader &>(l).get_object().*ptr0 = st.args[0].get<T0>();
        }
 
+       virtual void execute(Loader &l, const ArgumentStore &as) const
+       {
+               dynamic_cast<typename L::Loader &>(l).get_object().*ptr0 = as.get<T0>(0);
+       }
+
        virtual std::string get_signature() const
        { return std::string(1, TypeInfo<T0>::signature); }
 };
@@ -278,6 +331,12 @@ public:
                ldr.get_object().*ptr0 = &ldr.get_collection().template get<T0>(st.args[0].get<std::string>());
        }
 
+       virtual void execute(Loader &l, const ArgumentStore &as) const
+       {
+               typename L::Loader &ldr = dynamic_cast<typename L::Loader &>(l);
+               ldr.get_object().*ptr0 = &ldr.get_collection().template get<T0>(as.get<std::string>(0));
+       }
+
        virtual std::string get_signature() const
        { return std::string(1, TypeInfo<std::string>::signature); }
 };
@@ -302,6 +361,12 @@ public:
                dynamic_cast<typename L::Loader &>(l).get_object().*ptr1 = st.args[1].get<T1>();
        }
 
+       virtual void execute(Loader &l, const ArgumentStore &as) const
+       {
+               dynamic_cast<typename L::Loader &>(l).get_object().*ptr0 = as.get<T0>(0);
+               dynamic_cast<typename L::Loader &>(l).get_object().*ptr1 = as.get<T1>(1);
+       }
+
        virtual std::string get_signature() const
        {
                std::string result;
index d298eca44765ca134ae8b65f191c31296830ca92..9ad70d8742884bfdfa40d5bebbc467b9e289afe7 100644 (file)
@@ -86,5 +86,24 @@ void Parser::process_control_statement(const Statement &st)
                mode->process_control_statement(st);
 }
 
+const StatementKey *Parser::peek(unsigned level)
+{
+       while(1)
+       {
+               const StatementKey *key = mode->peek(level);
+               if(key && !key->keyword.compare(0, 2, "__"))
+                       process_control_statement(mode->parse());
+               else
+                       return key;
+       }
+}
+
+bool Parser::parse_and_load(unsigned level, Loader &ldr, const LoaderAction &act)
+{
+       // Peek first to get any control statements processed
+       peek(level);
+       return mode->parse_and_load(level, ldr, act);
+}
+
 } // namespace DataFile
 } // namespace Msp
index 0989bd9d2d1fe378920c469c845f71435d799b28..243db1feb5fd186aa8eda14e93ad9a71bc5f113c 100644 (file)
@@ -7,8 +7,11 @@
 namespace Msp {
 namespace DataFile {
 
+class Loader;
+class LoaderAction;
 class ParserMode;
 class Statement;
+class StatementKey;
 
 /**
 Frontend for loading datafiles.  Handles switching between text and binary
@@ -28,18 +31,25 @@ public:
        Parser(IO::Base &i, const std::string &s);
        ~Parser();
 
-       /**
-       Reads a statement from the input.  If the end of input was reached, an empty
-       invalid statement will be returned.  If an error occurs, the parser will be
-       marked as bad and no more statements may be read, even if the exception was
-       caught.
-       */
+       /** Reads a statement from the input.  If the end of input was reached, an
+       empty invalid statement will be returned.  If an error occurs, the parser
+       will be marked as bad and no more statements may be read, even if the
+       exception was caught. */
        Statement parse(bool raw = false);
 
 private:
        void process_control_statement(const Statement &);
 
 public:
+       /** Returns a key for the next statement, consisting of its keyword and
+       signature.  Not supported in all modes. */
+       const StatementKey *peek(unsigned);
+
+       /** Parses a statement and feeds its arguments to an action.  The action
+       must be appropriate for the statement.  Use peek() to determine the
+       statement's signature. */
+       bool parse_and_load(unsigned, Loader &, const LoaderAction &);
+
        operator bool() const { return good && in; }
 };
 
index 620e9a91acb7f325535b6b0c87fe30545d409cc1..491647e4b09a0b9c307d4f4aa32a6078403ed9a5 100644 (file)
@@ -7,6 +7,8 @@ namespace Msp {
 namespace DataFile {
 
 class Input;
+class Loader;
+class LoaderAction;
 
 /**
 Base class for parse modes.
@@ -23,6 +25,9 @@ public:
 
        virtual Statement parse() = 0;
        virtual void process_control_statement(const Statement &) { }
+
+       virtual const StatementKey *peek(unsigned) { return 0; }
+       virtual bool parse_and_load(unsigned, Loader &, const LoaderAction &) { return false; }
 };
 
 } // namespace DataFile
index 29961a963fcfa97ce6d8461c0a384a64ce465bcd..3b6f7165b0ef1fa50874c255b12cd987c2563b22 100644 (file)
@@ -1,5 +1,6 @@
 #include <msp/strings/format.h>
 #include "statement.h"
+#include "type.h"
 
 using namespace std;
 
@@ -33,5 +34,38 @@ string Statement::get_signature() const
        return result;
 }
 
+
+StatementInfo::StatementInfo():
+       args_size(0)
+{ }
+
+StatementInfo::StatementInfo(const string &k, const string &s):
+       key(k, s),
+       args_size(0)
+{
+       for(string::const_iterator i=key.signature.begin(); i!=key.signature.end(); ++i)
+       {
+               arg_offsets.push_back(args_size);
+               switch(*i)
+               {
+               case IntType::signature:
+                       args_size += sizeof(IntType::Store);
+                       break;
+               case FloatType::signature:
+                       args_size += sizeof(FloatType::Store);
+                       break;
+               case BoolType::signature:
+                       args_size += sizeof(BoolType::Store);
+                       break;
+               case StringType::signature:
+                       args_size += sizeof(StringType::Store);
+                       break;
+               case SymbolType::signature:
+                       args_size += sizeof(SymbolType::Store);
+                       break;
+               }
+       }
+}
+
 } // namespace DataFile
 } // namespace Msp
index 80b7b15af8da82566f3526d1494e82a3a787c2e4..184ef9955cbc78af80231eabda307bb45605be54 100644 (file)
@@ -49,6 +49,16 @@ struct StatementKey
        { return keyword<o.keyword || (keyword==o.keyword && signature<o.signature); }
 };
 
+struct StatementInfo
+{
+       StatementKey key;
+       unsigned args_size;
+       std::vector<unsigned> arg_offsets;
+
+       StatementInfo();
+       StatementInfo(const std::string &, const std::string &);
+};
+
 } // namespace DataFile
 } // namespace Msp
 
index 17545bae23ef0922669583344097a65c3e3e04cd..f40657eb14a07003a295cb979ae4813523183d17 100644 (file)
@@ -11,6 +11,8 @@ struct Symbol
 {
        std::string name;
 
+       Symbol() { }
+
        template<typename T>
        Symbol(const T &n): name(lexical_cast<std::string>(n)) { }