From 3b78eeb8b92dc3524d6a0456b4daf0a0f3dbf813 Mon Sep 17 00:00:00 2001 From: Mikko Rasa Date: Mon, 5 Aug 2013 14:56:26 +0300 Subject: [PATCH] More efficient way of loading binary files The Variant array for storing arguments in Statement is too slow. Bypass it by passing args directly to the action in a sort of ad-hoc struct. --- source/argumentstore.cpp | 37 +++++++++++++ source/argumentstore.h | 56 +++++++++++++++++++ source/binaryparser.cpp | 114 ++++++++++++++++++++++++++++++++++----- source/binaryparser.h | 8 ++- source/loader.cpp | 51 +++++++++++++++--- source/loader.h | 6 +++ source/loaderaction.h | 65 ++++++++++++++++++++++ source/parser.cpp | 19 +++++++ source/parser.h | 22 +++++--- source/parsermode.h | 5 ++ source/statement.cpp | 34 ++++++++++++ source/statement.h | 10 ++++ source/type.h | 2 + 13 files changed, 403 insertions(+), 26 deletions(-) create mode 100644 source/argumentstore.cpp create mode 100644 source/argumentstore.h diff --git a/source/argumentstore.cpp b/source/argumentstore.cpp new file mode 100644 index 0000000..49bdbad --- /dev/null +++ b/source/argumentstore.cpp @@ -0,0 +1,37 @@ +#include "argumentstore.h" + +namespace Msp { +namespace DataFile { + +ArgumentStore::ArgumentStore(const StatementInfo &n): + info(n), + store(new char[info.args_size]) +{ + for(unsigned i=0; i(store+info.arg_offsets[i])->~basic_string(); + break; + case SymbolType::signature: + reinterpret_cast(store+info.arg_offsets[i])->~Symbol(); + break; + } +} + +} // namespace DataFile +} // namespace Msp diff --git a/source/argumentstore.h b/source/argumentstore.h new file mode 100644 index 0000000..c2d5d76 --- /dev/null +++ b/source/argumentstore.h @@ -0,0 +1,56 @@ +#ifndef MSP_DATAFILE_ARGUMENTSTORE_H_ +#define MSP_DATAFILE_ARGUMENTSTORE_H_ + +#include "statement.h" + +namespace Msp { +namespace DataFile { + +class ArgumentStore +{ +private: + const StatementInfo &info; + char *store; + +public: + ArgumentStore(const StatementInfo &); + ~ArgumentStore(); + + const StatementInfo &get_info() const { return info; } + + template + void set(unsigned i, const T &v) + { + *reinterpret_cast::Store *>(store+info.arg_offsets[i]) = v; + } + + template + typename TypeInfo::Load get(unsigned i) const + { + return extract::Store>(store+info.arg_offsets[i], info.key.signature[i]); + } + +private: + template + T extract(const char *, char) const; +}; + +template +inline T ArgumentStore::extract(const char *p, char) const +{ + return *reinterpret_cast(p); +} + +template<> +inline FloatType::Store ArgumentStore::extract(const char *p, char s) const +{ + if(s==IntType::signature) + return *reinterpret_cast(p); + else + return *reinterpret_cast(p); +} + +} // namespace DataFile +} // namespace Msp + +#endif diff --git a/source/binaryparser.cpp b/source/binaryparser.cpp index 098a5fd..051133f 100644 --- a/source/binaryparser.cpp +++ b/source/binaryparser.cpp @@ -2,9 +2,11 @@ #include #include #include +#include "argumentstore.h" #include "binaryparser.h" #include "binfloat.h" #include "input.h" +#include "loaderaction.h" using namespace std; @@ -21,31 +23,48 @@ public: virtual ~bad_definition() throw() { } }; +class nesting_error: public logic_error +{ +public: + nesting_error(const std::string &w): + logic_error(w) + { } + + virtual ~nesting_error() throw() { } +}; + BinaryParser::BinaryParser(Input &i, const string &s): ParserMode(i, s), - float_precision(32) + float_precision(32), + cur_info(0) { - dict[-1] = StatementKey("__kwd", "iss"); - dict[-2] = StatementKey("__str", "is"); - dict[-3] = StatementKey("__flt", "i"); + dict[-1] = StatementInfo("__kwd", "iss"); + dict[-2] = StatementInfo("__str", "is"); + dict[-3] = StatementInfo("__flt", "i"); } Statement BinaryParser::parse() { - int id = parse_int(); - if(!in) - return Statement(); + const StatementKey *key; + if(cur_info) + key = &cur_info->key; + else + { + int id = parse_int(); + if(!in) + return Statement(); - const StatementKey &key = get_item(dict, id); + key = &get_item(dict, id).key; + } Statement result; - result.keyword = key.keyword; + result.keyword = key->keyword; result.source = src; - for(unsigned j=0; jsignature.size(); ++j) { - switch(key.signature[j]) + switch(key->signature[j]) { case IntType::signature: result.args.push_back(parse_int()); @@ -65,11 +84,15 @@ Statement BinaryParser::parse() } } + if(!sub_remaining.empty()) + --sub_remaining.back(); + unsigned nsub = parse_int(); for(unsigned j = 0; j(); } +const StatementKey *BinaryParser::peek(unsigned level) +{ + if(level>sub_remaining.size()) + throw nesting_error("bad level"); + while(level0; ) + parse(); + sub_remaining.pop_back(); + cur_info = 0; + } + + if(!sub_remaining.empty() && sub_remaining.back()==0) + { + // No more substatements on this level + cur_info = 0; + return 0; + } + + if(cur_info) + return &cur_info->key; + + int id = parse_int(); + if(!in) + return 0; + + cur_info = &get_item(dict, id); + return &cur_info->key; +} + +bool BinaryParser::parse_and_load(unsigned level, Loader &ldr, const LoaderAction &act) +{ + if(!cur_info && !peek(level)) + return false; + + ArgumentStore args(*cur_info); + for(unsigned i=0; ikey.signature.size(); ++i) + switch(cur_info->key.signature[i]) + { + case IntType::signature: + args.set(i, parse_int()); + break; + case FloatType::signature: + args.set(i, parse_float()); + break; + case BoolType::signature: + args.set(i, parse_bool()); + break; + case StringType::signature: + args.set(i, parse_string()); + break; + case SymbolType::signature: + args.set(i, parse_symbol()); + break; + } + + if(!sub_remaining.empty()) + --sub_remaining.back(); + sub_remaining.push_back(parse_int()); + cur_info = 0; + + act.execute(ldr, args); + + return true; +} + IntType::Store BinaryParser::parse_int() { IntType::Store result = 0; diff --git a/source/binaryparser.h b/source/binaryparser.h index 255209a..dfb0de9 100644 --- a/source/binaryparser.h +++ b/source/binaryparser.h @@ -14,18 +14,24 @@ Parses data in binary format. class BinaryParser: public ParserMode { private: - typedef std::map Dictionary; + typedef std::map Dictionary; typedef std::map StringMap; Dictionary dict; StringMap strings; unsigned float_precision; + StatementInfo *cur_info; + std::vector sub_remaining; public: BinaryParser(Input &i, const std::string &s); virtual Statement parse(); virtual void process_control_statement(const Statement &); + + virtual const StatementKey *peek(unsigned); + virtual bool parse_and_load(unsigned, Loader &, const LoaderAction &); + private: IntType::Store parse_int(); FloatType::Store parse_float(); diff --git a/source/loader.cpp b/source/loader.cpp index c2de401..e1a0d4a 100644 --- a/source/loader.cpp +++ b/source/loader.cpp @@ -71,6 +71,7 @@ public: Loader::Loader(): cur_st(0), + direct(false), check_sub_loads(false) { } @@ -84,9 +85,15 @@ void Loader::load(Parser &p) { while(p) { - Statement st = p.parse(); - if(st.valid) - load_statement(st); + if(p.peek(0)) + load_direct(p, 0); + else + { + // Parse in raw mode so we can peek immediately after a mode change + Statement st = p.parse(true); + if(st.valid && !st.control) + load_statement(st); + } } finish(); } @@ -98,6 +105,29 @@ void Loader::load(const Statement &st) finish(); } +void Loader::load_direct(Parser &p, unsigned l) +{ + SetForScope set_parser(cur_parser, &p); + SetForScope set_level(cur_level, l); + + while(p) + { + const StatementKey *key = p.peek(l); + if(!key) + break; + + LoaderAction *act = find_action(*key); + if(act) + { + SetFlag set_direct(direct); + if(!p.parse_and_load(l, *this, *act)) + throw logic_error("direct load failed"); + } + else + load_statement(p.parse()); + } +} + void Loader::load_statement(const Statement &st) { SetForScope set_cst(cur_st, &st); @@ -134,11 +164,18 @@ void Loader::load_statement(const Statement &st) void Loader::load_sub_with(Loader &ldr) { - if(!cur_st) + if(direct) + { + ldr.load_direct(*cur_parser, cur_level+1); + ldr.finish(); + } + else if(cur_st) + { + ldr.load(*cur_st); + sub_loaded = true; + } + else throw logic_error("no current statement"); - - ldr.load(*cur_st); - sub_loaded = true; } void Loader::add(const string &kwd, LoaderAction *act) diff --git a/source/loader.h b/source/loader.h index bb1292c..8a1103d 100644 --- a/source/loader.h +++ b/source/loader.h @@ -38,8 +38,11 @@ private: typedef std::map ActionMap; ActionMap actions; + Parser *cur_parser; + unsigned cur_level; const Statement *cur_st; bool sub_loaded; + bool direct; std::list aux_loaders; protected: bool check_sub_loads; @@ -55,6 +58,9 @@ private: /** Loads data from a statement. */ void load(const Statement &st); + /** Loads statemsnts from a parser, feeding them directly to actions. */ + void load_direct(Parser &, unsigned); + /** Processes a single statement */ void load_statement(const Statement &st); diff --git a/source/loaderaction.h b/source/loaderaction.h index afca01e..82fd3b4 100644 --- a/source/loaderaction.h +++ b/source/loaderaction.h @@ -1,6 +1,7 @@ #ifndef MSP_DATAFILE_LOADERACTION_H_ #define MSP_DATAFILE_LOADERACTION_H_ +#include "argumentstore.h" #include "statement.h" namespace Msp { @@ -21,6 +22,8 @@ public: /** Called to process a statement. */ virtual void execute(Loader &, const Statement &) const = 0; + virtual void execute(Loader &, const ArgumentStore &) const = 0; + virtual std::string get_signature() const = 0; }; @@ -44,6 +47,11 @@ public: (dynamic_cast(l).*func)(); }; + virtual void execute(Loader &l, const ArgumentStore &) const + { + (dynamic_cast(l).*func)(); + }; + virtual std::string get_signature() const { return std::string(); } }; @@ -68,6 +76,11 @@ public: (dynamic_cast(l).*func)(st.args[0].get()); } + virtual void execute(Loader &l, const ArgumentStore &as) const + { + (dynamic_cast(l).*func)(as.get(0)); + } + virtual std::string get_signature() const { return std::string(1, TypeInfo::signature); } }; @@ -96,6 +109,16 @@ public: (dynamic_cast(l).*func)(values); } + virtual void execute(Loader &l, const ArgumentStore &as) const + { + std::vector values; + unsigned n_args = as.get_info().key.signature.size(); + values.reserve(n_args); + for(unsigned i=0; i(i)); + (dynamic_cast(l).*func)(values); + } + virtual std::string get_signature() const { std::string result; @@ -125,6 +148,11 @@ public: (dynamic_cast(l).*func)(st); } + virtual void execute(Loader &, const ArgumentStore &) const + { + throw std::logic_error("incompatible format"); + } + virtual std::string get_signature() const { return "*"; } }; @@ -146,6 +174,11 @@ public: (dynamic_cast(l).*func)(st.args[0].get(), st.args[1].get()); } + virtual void execute(Loader &l, const ArgumentStore &as) const + { + (dynamic_cast(l).*func)(as.get(0), as.get(1)); + } + virtual std::string get_signature() const { std::string result; @@ -172,6 +205,11 @@ public: (dynamic_cast(l).*func)(st.args[0].get(), st.args[1].get(), st.args[2].get()); } + virtual void execute(Loader &l, const ArgumentStore &as) const + { + (dynamic_cast(l).*func)(as.get(0), as.get(1), as.get(2)); + } + virtual std::string get_signature() const { std::string result; @@ -199,6 +237,11 @@ public: (dynamic_cast(l).*func)(st.args[0].get(), st.args[1].get(), st.args[2].get(), st.args[3].get()); } + virtual void execute(Loader &l, const ArgumentStore &as) const + { + (dynamic_cast(l).*func)(as.get(0), as.get(1), as.get(2), as.get(3)); + } + virtual std::string get_signature() const { std::string result; @@ -227,6 +270,11 @@ public: (dynamic_cast(l).*func)(st.args[0].get(), st.args[1].get(), st.args[2].get(), st.args[3].get(), st.args[4].get()); } + virtual void execute(Loader &l, const ArgumentStore &as) const + { + (dynamic_cast(l).*func)(as.get(0), as.get(1), as.get(2), as.get(3), as.get(4)); + } + virtual std::string get_signature() const { std::string result; @@ -256,6 +304,11 @@ public: dynamic_cast(l).get_object().*ptr0 = st.args[0].get(); } + virtual void execute(Loader &l, const ArgumentStore &as) const + { + dynamic_cast(l).get_object().*ptr0 = as.get(0); + } + virtual std::string get_signature() const { return std::string(1, TypeInfo::signature); } }; @@ -278,6 +331,12 @@ public: ldr.get_object().*ptr0 = &ldr.get_collection().template get(st.args[0].get()); } + virtual void execute(Loader &l, const ArgumentStore &as) const + { + typename L::Loader &ldr = dynamic_cast(l); + ldr.get_object().*ptr0 = &ldr.get_collection().template get(as.get(0)); + } + virtual std::string get_signature() const { return std::string(1, TypeInfo::signature); } }; @@ -302,6 +361,12 @@ public: dynamic_cast(l).get_object().*ptr1 = st.args[1].get(); } + virtual void execute(Loader &l, const ArgumentStore &as) const + { + dynamic_cast(l).get_object().*ptr0 = as.get(0); + dynamic_cast(l).get_object().*ptr1 = as.get(1); + } + virtual std::string get_signature() const { std::string result; diff --git a/source/parser.cpp b/source/parser.cpp index d298eca..9ad70d8 100644 --- a/source/parser.cpp +++ b/source/parser.cpp @@ -86,5 +86,24 @@ void Parser::process_control_statement(const Statement &st) mode->process_control_statement(st); } +const StatementKey *Parser::peek(unsigned level) +{ + while(1) + { + const StatementKey *key = mode->peek(level); + if(key && !key->keyword.compare(0, 2, "__")) + process_control_statement(mode->parse()); + else + return key; + } +} + +bool Parser::parse_and_load(unsigned level, Loader &ldr, const LoaderAction &act) +{ + // Peek first to get any control statements processed + peek(level); + return mode->parse_and_load(level, ldr, act); +} + } // namespace DataFile } // namespace Msp diff --git a/source/parser.h b/source/parser.h index 0989bd9..243db1f 100644 --- a/source/parser.h +++ b/source/parser.h @@ -7,8 +7,11 @@ namespace Msp { namespace DataFile { +class Loader; +class LoaderAction; class ParserMode; class Statement; +class StatementKey; /** Frontend for loading datafiles. Handles switching between text and binary @@ -28,18 +31,25 @@ public: Parser(IO::Base &i, const std::string &s); ~Parser(); - /** - Reads a statement from the input. If the end of input was reached, an empty - invalid statement will be returned. If an error occurs, the parser will be - marked as bad and no more statements may be read, even if the exception was - caught. - */ + /** Reads a statement from the input. If the end of input was reached, an + empty invalid statement will be returned. If an error occurs, the parser + will be marked as bad and no more statements may be read, even if the + exception was caught. */ Statement parse(bool raw = false); private: void process_control_statement(const Statement &); public: + /** Returns a key for the next statement, consisting of its keyword and + signature. Not supported in all modes. */ + const StatementKey *peek(unsigned); + + /** Parses a statement and feeds its arguments to an action. The action + must be appropriate for the statement. Use peek() to determine the + statement's signature. */ + bool parse_and_load(unsigned, Loader &, const LoaderAction &); + operator bool() const { return good && in; } }; diff --git a/source/parsermode.h b/source/parsermode.h index 620e9a9..491647e 100644 --- a/source/parsermode.h +++ b/source/parsermode.h @@ -7,6 +7,8 @@ namespace Msp { namespace DataFile { class Input; +class Loader; +class LoaderAction; /** Base class for parse modes. @@ -23,6 +25,9 @@ public: virtual Statement parse() = 0; virtual void process_control_statement(const Statement &) { } + + virtual const StatementKey *peek(unsigned) { return 0; } + virtual bool parse_and_load(unsigned, Loader &, const LoaderAction &) { return false; } }; } // namespace DataFile diff --git a/source/statement.cpp b/source/statement.cpp index 29961a9..3b6f716 100644 --- a/source/statement.cpp +++ b/source/statement.cpp @@ -1,5 +1,6 @@ #include #include "statement.h" +#include "type.h" using namespace std; @@ -33,5 +34,38 @@ string Statement::get_signature() const return result; } + +StatementInfo::StatementInfo(): + args_size(0) +{ } + +StatementInfo::StatementInfo(const string &k, const string &s): + key(k, s), + args_size(0) +{ + for(string::const_iterator i=key.signature.begin(); i!=key.signature.end(); ++i) + { + arg_offsets.push_back(args_size); + switch(*i) + { + case IntType::signature: + args_size += sizeof(IntType::Store); + break; + case FloatType::signature: + args_size += sizeof(FloatType::Store); + break; + case BoolType::signature: + args_size += sizeof(BoolType::Store); + break; + case StringType::signature: + args_size += sizeof(StringType::Store); + break; + case SymbolType::signature: + args_size += sizeof(SymbolType::Store); + break; + } + } +} + } // namespace DataFile } // namespace Msp diff --git a/source/statement.h b/source/statement.h index 80b7b15..184ef99 100644 --- a/source/statement.h +++ b/source/statement.h @@ -49,6 +49,16 @@ struct StatementKey { return keyword arg_offsets; + + StatementInfo(); + StatementInfo(const std::string &, const std::string &); +}; + } // namespace DataFile } // namespace Msp diff --git a/source/type.h b/source/type.h index 17545ba..f40657e 100644 --- a/source/type.h +++ b/source/type.h @@ -11,6 +11,8 @@ struct Symbol { std::string name; + Symbol() { } + template Symbol(const T &n): name(lexical_cast(n)) { } -- 2.45.2