From 57d8b8c6f07808efdd1b9647d12447c177ce1e7d Mon Sep 17 00:00:00 2001 From: Mikko Rasa Date: Sat, 29 Sep 2012 02:34:47 +0300 Subject: [PATCH] Add an intelligent packed collection class --- source/collection.h | 3 + source/packcollection.cpp | 118 +++++++++++++++++++++++++++++ source/packcollection.h | 153 ++++++++++++++++++++++++++++++++++++++ tool/packer.cpp | 140 ++++++++++++++++++++++++++++++++++ tool/packer.h | 37 +++++++++ tool/tool.cpp | 18 ++++- tool/tool.h | 2 + 7 files changed, 470 insertions(+), 1 deletion(-) create mode 100644 source/packcollection.cpp create mode 100644 source/packcollection.h create mode 100644 tool/packer.cpp create mode 100644 tool/packer.h diff --git a/source/collection.h b/source/collection.h index c83b437..f79e567 100644 --- a/source/collection.h +++ b/source/collection.h @@ -42,6 +42,9 @@ method for details. Collections also support a notion of "future objects". These are objects which are known to be possible to load, but loading them is deferred to the first time they are requested. + +Other classes are available to provide refined ways of loading objects from +files. See DirectoryCollection and PackCollection. */ class Collection { diff --git a/source/packcollection.cpp b/source/packcollection.cpp new file mode 100644 index 0000000..6d348db --- /dev/null +++ b/source/packcollection.cpp @@ -0,0 +1,118 @@ +#include +#include "packcollection.h" + +using namespace std; + +namespace Msp { +namespace DataFile { + +void PackCollection::add_pack_file(const string &fn) +{ + packs.push_back(Pack(fn)); + Pack &pack = packs.back(); + load(pack, fn); + + ObjectMap pack_objs; + pack.collect_objects(pack_objs); + for(ObjectMap::const_iterator i=pack_objs.begin(); i!=pack_objs.end(); ++i) + { + if(i->second->get_keyword().empty()) + add_future(i->first); + else + add_future_with_keyword(i->first, i->second->get_keyword()); + } + + objects.insert(pack_objs.begin(), pack_objs.end()); +} + + +PackCollection::Pack::Pack(const string &fn): + filename(fn), + base_offset(0) +{ } + +void PackCollection::Pack::collect_objects(ObjectMap &objs) const +{ + for(list::const_iterator i=files.begin(); i!=files.end(); ++i) + i->collect_objects(objs); +} + + +PackCollection::File::File(const Pack &p, const string &fn): + pack(p), + filename(fn), + offset(0), + length(0), + collection(false), + loaded(false) +{ } + +RefPtr PackCollection::File::open() const +{ + RefPtr io_file = new IO::BufferedFile(pack.get_filename()); + io_file->seek(pack.get_base_offset()+offset, IO::S_BEG); + return io_file; +} + +string PackCollection::File::get_full_name() const +{ + return format("%s/%s", pack.get_filename(), filename); +} + +void PackCollection::File::set_loaded() +{ + loaded = true; +} + +void PackCollection::File::collect_objects(ObjectMap &objs) const +{ + for(list::const_iterator i=objects.begin(); i!=objects.end(); ++i) + objs[i->get_name()] = &*i; +} + + +PackCollection::Object::Object(File &f, const string &n, const string &k): + file(f), + name(n), + keyword(k) +{ } + + +PackCollection::Pack::Loader::Loader(Pack &p): + ObjectLoader(p) +{ + add("file", &Loader::file); + add("base_offset", &Pack::base_offset); +} + +void PackCollection::Pack::Loader::file(const string &fn) +{ + obj.files.push_back(File(obj, fn)); + load_sub(obj.files.back()); +} + + +PackCollection::File::Loader::Loader(File &f): + ObjectLoader(f) +{ + add("object", &Loader::object); + add("slice", &File::offset, &File::length); +} + +void PackCollection::File::Loader::finish() +{ + if(!obj.collection) + { + PackCollection::Object ob(obj, obj.filename, string()); + obj.objects.push_back(ob); + } +} + +void PackCollection::File::Loader::object(const string &name, const string &kwd) +{ + obj.objects.push_back(PackCollection::Object(obj, name, kwd)); + obj.collection = true; +} + +} // namespace DataFile +} // namespace Msp diff --git a/source/packcollection.h b/source/packcollection.h new file mode 100644 index 0000000..946271a --- /dev/null +++ b/source/packcollection.h @@ -0,0 +1,153 @@ +#ifndef MSP_DATAFILE_PACKCOLLECTION_H_ +#define MSP_DATAFILE_PACKCOLLECTION_H_ + +#include "collection.h" +#include "objectloader.h" + +namespace Msp { +namespace DataFile { + +/** +A collection class that loads data from pack files. As opposed to plain +collection files, pack files are composed from a number of logical files. They +also contain a directory with a list of objects contained in the pack and which +logical files they are in. This allows the pack to be loaded in a piecewise +manner instead of all at once. + +It's possible for a pack file to contain plain collection files as well. When +an object from such a file is requested, the entire sub-collection it is stored +in is loaded. +*/ +class PackCollection: public Collection +{ +private: + class File; + struct Object; + + typedef std::map ObjectMap; + + class Pack + { + public: + class Loader: public ObjectLoader + { + public: + Loader(Pack &); + private: + void file(const std::string &); + }; + + private: + std::string filename; + unsigned base_offset; + std::list files; + + public: + Pack(const std::string &); + + const std::string &get_filename() const { return filename; } + unsigned get_base_offset() const { return base_offset; } + + void collect_objects(ObjectMap &) const; + }; + + class File + { + public: + class Loader: public ObjectLoader + { + public: + Loader(File &); + private: + virtual void finish(); + void object(const std::string &, const std::string &); + }; + + private: + const Pack &pack; + std::string filename; + unsigned offset; + unsigned length; + bool collection; + std::list objects; + bool loaded; + + public: + File(const Pack &, const std::string &); + + RefPtr open() const; + const std::string &get_filename() const { return filename; } + std::string get_full_name() const; + bool is_collection() const { return collection; } + + void set_loaded(); + bool is_loaded() const { return loaded; } + + void collect_objects(ObjectMap &) const; + }; + + class Object + { + private: + File &file; + std::string name; + std::string keyword; + + public: + Object(File &, const std::string &, const std::string &); + + File &get_file() const { return file; } + const std::string &get_name() const { return name; } + const std::string &get_keyword() const { return keyword; } + }; + + std::list packs; + ObjectMap objects; + +public: + /** Adds a pack file to the collection. The directory is read immediately, + and packed objects are loaded as they are needed. */ + void add_pack_file(const std::string &); + +protected: + template + CollectionItemType &add_type() + { + return Collection::add_type().creator(&PackCollection::create); + } + +private: + template + T *create(const std::string &name) + { + ObjectMap::iterator i = objects.find(name); + if(i==objects.end()) + return 0; + + File &file = i->second->get_file(); + if(file.is_loaded()) + return 0; + file.set_loaded(); + + RefPtr in = file.open(); + Parser parser(*in, file.get_full_name()); + if(file.is_collection()) + { + Loader ldr(*this); + ldr.load(parser); + return 0; + } + else + { + RefPtr item = new T; + ItemLoader ldr(*item, *this); + ldr.load(parser); + return item.release(); + } + } +}; + +} // namespace DataFile +} // namespace Msp + +#endif diff --git a/tool/packer.cpp b/tool/packer.cpp new file mode 100644 index 0000000..51b7d8f --- /dev/null +++ b/tool/packer.cpp @@ -0,0 +1,140 @@ +#include +#include +#include +#include +#include +#include +#include "packer.h" +#include "tool.h" + +using namespace std; +using namespace Msp; + +Packer::Packer(DataTool &t): + tool(t), + tmp_file(tempfile()), + tmp_buf(new IO::Buffered(*tmp_file)), + dir_alloc(0) +{ } + +IO::File *Packer::tempfile() +{ + for(unsigned i=0;; ++i) + { + try + { + std::string filename = format("/tmp/mspdatatool.%d", i); + /*filename.reserve(25); + filename.append("/tmp/mspdatatool."); + for(unsigned i=0; i<8; ++i) + filename.append(rand());*/ + IO::File *file = new IO::File(filename, IO::M_RDWR, IO::File::C_NEW); + FS::unlink(filename); + return file; + } + catch(const IO::file_already_exists &) + { + continue; + } + } +} + +Packer::~Packer() +{ + delete tmp_buf; + delete tmp_file; +} + +void Packer::pack_file(const string &fn) +{ + if(!tmp_file) + throw logic_error("Packer::pack_file"); + + unsigned offset = tmp_file->tell(); + + IO::BufferedFile in(fn); + DataFile::Parser parser(in, fn); + + DataFile::Writer *writer = tool.create_writer(*tmp_buf); + + bool collection = FS::extpart(fn)==".mdc"; + std::list objects; + while(parser) + { + DataFile::Statement st = parser.parse(true); + if(st.valid) + { + bool sys = !st.keyword.compare(0, 2, "__"); + if(collection && !sys) + { + if(st.get_signature()=="s") + { + Object obj; + obj.name = st.args[0].get(); + obj.keyword = st.keyword; + objects.push_back(obj); + } + else + collection = false; + } + if(!sys || st.keyword=="__src") + writer->write(st); + } + } + writer->write(DataFile::Statement("__end")); + delete writer; + + tmp_buf->flush(); + unsigned length = tmp_file->tell()-offset; + + DataFile::Statement st("file"); + st.append(FS::basename(fn)); + st.sub.push_back((DataFile::Statement("slice"), offset, length)); + if(collection) + { + for(list::const_iterator i=objects.begin(); i!=objects.end(); ++i) + st.sub.push_back((DataFile::Statement("object"), i->name, i->keyword)); + dir_alloc += objects.size()*100; + } + directory.push_back(st); + dir_alloc += 100; +} + +void Packer::create_pack(const string &fn) +{ + vector dir_buffer(dir_alloc); + IO::Memory mem(&dir_buffer[0], dir_buffer.size(), IO::M_WRITE); + + unsigned base_offset = 0; + while(1) + { + mem.seek(0, IO::S_BEG); + + DataFile::Writer *writer = tool.create_writer(mem); + + for(list::const_iterator i=directory.begin(); i!=directory.end(); ++i) + writer->write(*i); + if(base_offset==0) + base_offset = mem.tell(); + writer->write((DataFile::Statement("base_offset"), base_offset)); + writer->write(DataFile::Statement("__end")); + delete writer; + + unsigned dir_size = mem.tell(); + if(dir_size<=base_offset) + break; + base_offset = dir_size; + } + + IO::File out(fn, IO::M_WRITE); + out.write(&dir_buffer[0], base_offset); + tmp_file->seek(0, IO::S_BEG); + while(!tmp_file->eof()) + { + char buf[16384]; + unsigned len = tmp_file->read(buf, sizeof(buf)); + if(!len) + break; + out.write(buf, len); + } +} diff --git a/tool/packer.h b/tool/packer.h new file mode 100644 index 0000000..351b45d --- /dev/null +++ b/tool/packer.h @@ -0,0 +1,37 @@ +#ifndef PACKER_H_ +#define PACKER_H_ + +#include +#include +#include +#include + +class DataTool; + +class Packer +{ +private: + struct Object + { + std::string name; + std::string keyword; + }; + + DataTool &tool; + Msp::IO::File *tmp_file; + Msp::IO::Buffered *tmp_buf; + std::list directory; + unsigned dir_alloc; + +public: + Packer(DataTool &); +private: + static Msp::IO::File *tempfile(); +public: + ~Packer(); + + void pack_file(const std::string &); + void create_pack(const std::string &); +}; + +#endif diff --git a/tool/tool.cpp b/tool/tool.cpp index 5469106..81c6be9 100644 --- a/tool/tool.cpp +++ b/tool/tool.cpp @@ -5,6 +5,7 @@ #include #include #include "compiler.h" +#include "packer.h" #include "tool.h" using namespace std; @@ -16,6 +17,7 @@ DataTool::DataTool(int argc, char **argv): compile(false), float_size(0), compress(false), + pack(false), debug(false) { GetOpt getopt; @@ -24,17 +26,23 @@ DataTool::DataTool(int argc, char **argv): getopt.add_option('f', "float-size", float_size, GetOpt::REQUIRED_ARG); getopt.add_option('g', "debug", debug, GetOpt::NO_ARG); getopt.add_option('o', "output", out_fn, GetOpt::REQUIRED_ARG); + getopt.add_option('p', "pack", pack, GetOpt::NO_ARG); getopt.add_option('z', "compress", compress, GetOpt::NO_ARG); getopt(argc, argv); in_fns = getopt.get_args(); if(in_fns.empty()) in_fns.push_back("-"); + + if(pack && out_fn=="-") + throw usage_error("Can't write pack to stdout"); } int DataTool::main() { - if(compile) + if(pack) + do_pack(); + else if(compile) do_compile(); else do_transfer(); @@ -84,6 +92,14 @@ void DataTool::do_compile() delete out; } +void DataTool::do_pack() +{ + Packer packer(*this); + for(vector::const_iterator i=in_fns.begin(); i!=in_fns.end(); ++i) + packer.pack_file(*i); + packer.create_pack(out_fn); +} + IO::Base *DataTool::open_output(const string &fn) { if(fn=="-") diff --git a/tool/tool.h b/tool/tool.h index c2a1d7b..a682d01 100644 --- a/tool/tool.h +++ b/tool/tool.h @@ -14,6 +14,7 @@ private: bool compile; unsigned float_size; bool compress; + bool pack; bool debug; public: @@ -23,6 +24,7 @@ public: private: void do_transfer(); void do_compile(); + void do_pack(); Msp::IO::Base *open_output(const std::string &); Msp::IO::Base *open_input(const std::string &); public: -- 2.45.2