]> git.tdb.fi Git - libs/datafile.git/commitdiff
Add an intelligent packed collection class
authorMikko Rasa <tdb@tdb.fi>
Fri, 28 Sep 2012 23:34:47 +0000 (02:34 +0300)
committerMikko Rasa <tdb@tdb.fi>
Fri, 28 Sep 2012 23:34:47 +0000 (02:34 +0300)
source/collection.h
source/packcollection.cpp [new file with mode: 0644]
source/packcollection.h [new file with mode: 0644]
tool/packer.cpp [new file with mode: 0644]
tool/packer.h [new file with mode: 0644]
tool/tool.cpp
tool/tool.h

index c83b4376d20b2b6d4854b29dfc0fbda9c0c286b5..f79e567d5c0e4e5fc1a5d6f8c2c9e481265a6d27 100644 (file)
@@ -42,6 +42,9 @@ method for details.
 Collections also support a notion of "future objects".  These are objects which
 are known to be possible to load, but loading them is deferred to the first
 time they are requested.
+
+Other classes are available to provide refined ways of loading objects from
+files.  See DirectoryCollection and PackCollection.
 */
 class Collection
 {
diff --git a/source/packcollection.cpp b/source/packcollection.cpp
new file mode 100644 (file)
index 0000000..6d348db
--- /dev/null
@@ -0,0 +1,118 @@
+#include <msp/strings/format.h>
+#include "packcollection.h"
+
+using namespace std;
+
+namespace Msp {
+namespace DataFile {
+
+void PackCollection::add_pack_file(const string &fn)
+{
+       packs.push_back(Pack(fn));
+       Pack &pack = packs.back();
+       load(pack, fn);
+
+       ObjectMap pack_objs;
+       pack.collect_objects(pack_objs);
+       for(ObjectMap::const_iterator i=pack_objs.begin(); i!=pack_objs.end(); ++i)
+       {
+               if(i->second->get_keyword().empty())
+                       add_future(i->first);
+               else
+                       add_future_with_keyword(i->first, i->second->get_keyword());
+       }
+
+       objects.insert(pack_objs.begin(), pack_objs.end());
+}
+
+
+PackCollection::Pack::Pack(const string &fn):
+       filename(fn),
+       base_offset(0)
+{ }
+
+void PackCollection::Pack::collect_objects(ObjectMap &objs) const
+{
+       for(list<File>::const_iterator i=files.begin(); i!=files.end(); ++i)
+               i->collect_objects(objs);
+}
+
+
+PackCollection::File::File(const Pack &p, const string &fn):
+       pack(p),
+       filename(fn),
+       offset(0),
+       length(0),
+       collection(false),
+       loaded(false)
+{ }
+
+RefPtr<IO::Base> PackCollection::File::open() const
+{
+       RefPtr<IO::BufferedFile> io_file = new IO::BufferedFile(pack.get_filename());
+       io_file->seek(pack.get_base_offset()+offset, IO::S_BEG);
+       return io_file;
+}
+
+string PackCollection::File::get_full_name() const
+{
+       return format("%s/%s", pack.get_filename(), filename);
+}
+
+void PackCollection::File::set_loaded()
+{
+       loaded = true;
+}
+
+void PackCollection::File::collect_objects(ObjectMap &objs) const
+{
+       for(list<Object>::const_iterator i=objects.begin(); i!=objects.end(); ++i)
+               objs[i->get_name()] = &*i;
+}
+
+
+PackCollection::Object::Object(File &f, const string &n, const string &k):
+       file(f),
+       name(n),
+       keyword(k)
+{ }
+
+
+PackCollection::Pack::Loader::Loader(Pack &p):
+       ObjectLoader<Pack>(p)
+{
+       add("file",        &Loader::file);
+       add("base_offset", &Pack::base_offset);
+}
+
+void PackCollection::Pack::Loader::file(const string &fn)
+{
+       obj.files.push_back(File(obj, fn));
+       load_sub(obj.files.back());
+}
+
+
+PackCollection::File::Loader::Loader(File &f):
+       ObjectLoader<File>(f)
+{
+       add("object", &Loader::object);
+       add("slice",  &File::offset, &File::length);
+}
+
+void PackCollection::File::Loader::finish()
+{
+       if(!obj.collection)
+       {
+               PackCollection::Object ob(obj, obj.filename, string());
+               obj.objects.push_back(ob);
+       }
+}
+
+void PackCollection::File::Loader::object(const string &name, const string &kwd)
+{
+       obj.objects.push_back(PackCollection::Object(obj, name, kwd));
+       obj.collection = true;
+}
+
+} // namespace DataFile
+} // namespace Msp
diff --git a/source/packcollection.h b/source/packcollection.h
new file mode 100644 (file)
index 0000000..946271a
--- /dev/null
@@ -0,0 +1,153 @@
+#ifndef MSP_DATAFILE_PACKCOLLECTION_H_
+#define MSP_DATAFILE_PACKCOLLECTION_H_
+
+#include "collection.h"
+#include "objectloader.h"
+
+namespace Msp {
+namespace DataFile {
+
+/**
+A collection class that loads data from pack files.  As opposed to plain
+collection files, pack files are composed from a number of logical files.  They
+also contain a directory with a list of objects contained in the pack and which
+logical files they are in.  This allows the pack to be loaded in a piecewise
+manner instead of all at once.
+
+It's possible for a pack file to contain plain collection files as well.  When
+an object from such a file is requested, the entire sub-collection it is stored
+in is loaded.
+*/
+class PackCollection: public Collection
+{
+private:
+       class File;
+       struct Object;
+
+       typedef std::map<std::string, const Object *> ObjectMap;
+
+       class Pack
+       {
+       public:
+               class Loader: public ObjectLoader<Pack>
+               {
+               public:
+                       Loader(Pack &);
+               private:
+                       void file(const std::string &);
+               };
+
+       private:
+               std::string filename;
+               unsigned base_offset;
+               std::list<File> files;
+
+       public:
+               Pack(const std::string &);
+
+               const std::string &get_filename() const { return filename; }
+               unsigned get_base_offset() const { return base_offset; }
+
+               void collect_objects(ObjectMap &) const;
+       };
+
+       class File
+       {
+       public:
+               class Loader: public ObjectLoader<File>
+               {
+               public:
+                       Loader(File &);
+               private:
+                       virtual void finish();
+                       void object(const std::string &, const std::string &);
+               };
+
+       private:
+               const Pack &pack;
+               std::string filename;
+               unsigned offset;
+               unsigned length;
+               bool collection;
+               std::list<Object> objects;
+               bool loaded;
+
+       public:
+               File(const Pack &, const std::string &);
+
+               RefPtr<IO::Base> open() const;
+               const std::string &get_filename() const { return filename; }
+               std::string get_full_name() const;
+               bool is_collection() const { return collection; }
+
+               void set_loaded();
+               bool is_loaded() const { return loaded; }
+
+               void collect_objects(ObjectMap &) const;
+       };
+
+       class Object
+       {
+       private:
+               File &file;
+               std::string name;
+               std::string keyword;
+
+       public:
+               Object(File &, const std::string &, const std::string &);
+
+               File &get_file() const { return file; }
+               const std::string &get_name() const { return name; }
+               const std::string &get_keyword() const { return keyword; }
+       };
+
+       std::list<Pack> packs;
+       ObjectMap objects;
+
+public:
+       /** Adds a pack file to the collection.  The directory is read immediately,
+       and packed objects are loaded as they are needed. */
+       void add_pack_file(const std::string &);
+
+protected:
+       template<typename T>
+       CollectionItemType<T> &add_type()
+       {
+               return Collection::add_type<T>().creator(&PackCollection::create<T>);
+       }
+
+private:
+       template<typename T>
+       T *create(const std::string &name)
+       {
+               ObjectMap::iterator i = objects.find(name);
+               if(i==objects.end())
+                       return 0;
+
+               File &file = i->second->get_file();
+               if(file.is_loaded())
+                       return 0;
+               file.set_loaded();
+
+               RefPtr<IO::Base> in = file.open();
+               Parser parser(*in, file.get_full_name());
+               if(file.is_collection())
+               {
+                       Loader ldr(*this);
+                       ldr.load(parser);
+                       return 0;
+               }
+               else
+               {
+                       RefPtr<T> item = new T;
+                       ItemLoader<T> ldr(*item, *this);
+                       ldr.load(parser);
+                       return item.release();
+               }
+       }
+};
+
+} // namespace DataFile
+} // namespace Msp
+
+#endif
diff --git a/tool/packer.cpp b/tool/packer.cpp
new file mode 100644 (file)
index 0000000..51b7d8f
--- /dev/null
@@ -0,0 +1,140 @@
+#include <msp/datafile/parser.h>
+#include <msp/datafile/statement.h>
+#include <msp/datafile/writer.h>
+#include <msp/fs/utils.h>
+#include <msp/io/memory.h>
+#include <msp/strings/format.h>
+#include "packer.h"
+#include "tool.h"
+
+using namespace std;
+using namespace Msp;
+
+Packer::Packer(DataTool &t):
+       tool(t),
+       tmp_file(tempfile()),
+       tmp_buf(new IO::Buffered(*tmp_file)),
+       dir_alloc(0)
+{ }
+
+IO::File *Packer::tempfile()
+{
+       for(unsigned i=0;; ++i)
+       {
+               try
+               {
+                       std::string filename = format("/tmp/mspdatatool.%d", i);
+                       /*filename.reserve(25);
+                       filename.append("/tmp/mspdatatool.");
+                       for(unsigned i=0; i<8; ++i)
+                               filename.append(rand());*/
+                       IO::File *file = new IO::File(filename, IO::M_RDWR, IO::File::C_NEW);
+                       FS::unlink(filename);
+                       return file;
+               }
+               catch(const IO::file_already_exists &)
+               {
+                       continue;
+               }
+       }
+}
+
+Packer::~Packer()
+{
+       delete tmp_buf;
+       delete tmp_file;
+}
+
+void Packer::pack_file(const string &fn)
+{
+       if(!tmp_file)
+               throw logic_error("Packer::pack_file");
+
+       unsigned offset = tmp_file->tell();
+
+       IO::BufferedFile in(fn);
+       DataFile::Parser parser(in, fn);
+
+       DataFile::Writer *writer = tool.create_writer(*tmp_buf);
+
+       bool collection = FS::extpart(fn)==".mdc";
+       std::list<Object> objects;
+       while(parser)
+       {
+               DataFile::Statement st = parser.parse(true);
+               if(st.valid)
+               {
+                       bool sys = !st.keyword.compare(0, 2, "__");
+                       if(collection && !sys)
+                       {
+                               if(st.get_signature()=="s")
+                               {
+                                       Object obj;
+                                       obj.name = st.args[0].get<string>();
+                                       obj.keyword = st.keyword;
+                                       objects.push_back(obj);
+                               }
+                               else
+                                       collection = false;
+                       }
+                       if(!sys || st.keyword=="__src")
+                               writer->write(st);
+               }
+       }
+       writer->write(DataFile::Statement("__end"));
+       delete writer;
+
+       tmp_buf->flush();
+       unsigned length = tmp_file->tell()-offset;
+
+       DataFile::Statement st("file");
+       st.append(FS::basename(fn));
+       st.sub.push_back((DataFile::Statement("slice"), offset, length));
+       if(collection)
+       {
+               for(list<Object>::const_iterator i=objects.begin(); i!=objects.end(); ++i)
+                       st.sub.push_back((DataFile::Statement("object"), i->name, i->keyword));
+               dir_alloc += objects.size()*100;
+       }
+       directory.push_back(st);
+       dir_alloc += 100;
+}
+
+void Packer::create_pack(const string &fn)
+{
+       vector<char> dir_buffer(dir_alloc);
+       IO::Memory mem(&dir_buffer[0], dir_buffer.size(), IO::M_WRITE);
+
+       unsigned base_offset = 0;
+       while(1)
+       {
+               mem.seek(0, IO::S_BEG);
+
+               DataFile::Writer *writer = tool.create_writer(mem);
+
+               for(list<DataFile::Statement>::const_iterator i=directory.begin(); i!=directory.end(); ++i)
+                       writer->write(*i);
+               if(base_offset==0)
+                       base_offset = mem.tell();
+               writer->write((DataFile::Statement("base_offset"), base_offset));
+               writer->write(DataFile::Statement("__end"));
+               delete writer;
+
+               unsigned dir_size = mem.tell();
+               if(dir_size<=base_offset)
+                       break;
+               base_offset = dir_size;
+       }
+
+       IO::File out(fn, IO::M_WRITE);
+       out.write(&dir_buffer[0], base_offset);
+       tmp_file->seek(0, IO::S_BEG);
+       while(!tmp_file->eof())
+       {
+               char buf[16384];
+               unsigned len = tmp_file->read(buf, sizeof(buf));
+               if(!len)
+                       break;
+               out.write(buf, len);
+       }
+}
diff --git a/tool/packer.h b/tool/packer.h
new file mode 100644 (file)
index 0000000..351b45d
--- /dev/null
@@ -0,0 +1,37 @@
+#ifndef PACKER_H_
+#define PACKER_H_
+
+#include <string>
+#include <msp/datafile/statement.h>
+#include <msp/io/buffered.h>
+#include <msp/io/file.h>
+
+class DataTool;
+
+class Packer
+{
+private:
+       struct Object
+       {
+               std::string name;
+               std::string keyword;
+       };
+
+       DataTool &tool;
+       Msp::IO::File *tmp_file;
+       Msp::IO::Buffered *tmp_buf;
+       std::list<Msp::DataFile::Statement> directory;
+       unsigned dir_alloc;
+
+public:
+       Packer(DataTool &);
+private:
+       static Msp::IO::File *tempfile();
+public:
+       ~Packer();
+
+       void pack_file(const std::string &);
+       void create_pack(const std::string &);
+};
+
+#endif
index 5469106f50322b7497ea9626c5a4b2400187aa1c..81c6be9b831b8bd74a80258337cd5d692f0d4734 100644 (file)
@@ -5,6 +5,7 @@
 #include <msp/datafile/parser.h>
 #include <msp/datafile/statement.h>
 #include "compiler.h"
+#include "packer.h"
 #include "tool.h"
 
 using namespace std;
@@ -16,6 +17,7 @@ DataTool::DataTool(int argc, char **argv):
        compile(false),
        float_size(0),
        compress(false),
+       pack(false),
        debug(false)
 {
        GetOpt getopt;
@@ -24,17 +26,23 @@ DataTool::DataTool(int argc, char **argv):
        getopt.add_option('f', "float-size", float_size, GetOpt::REQUIRED_ARG);
        getopt.add_option('g', "debug", debug, GetOpt::NO_ARG);
        getopt.add_option('o', "output", out_fn, GetOpt::REQUIRED_ARG);
+       getopt.add_option('p', "pack", pack, GetOpt::NO_ARG);
        getopt.add_option('z', "compress", compress, GetOpt::NO_ARG);
        getopt(argc, argv);
 
        in_fns = getopt.get_args();
        if(in_fns.empty())
                in_fns.push_back("-");
+
+       if(pack && out_fn=="-")
+               throw usage_error("Can't write pack to stdout");
 }
 
 int DataTool::main()
 {
-       if(compile)
+       if(pack)
+               do_pack();
+       else if(compile)
                do_compile();
        else
                do_transfer();
@@ -84,6 +92,14 @@ void DataTool::do_compile()
        delete out;
 }
 
+void DataTool::do_pack()
+{
+       Packer packer(*this);
+       for(vector<string>::const_iterator i=in_fns.begin(); i!=in_fns.end(); ++i)
+               packer.pack_file(*i);
+       packer.create_pack(out_fn);
+}
+
 IO::Base *DataTool::open_output(const string &fn)
 {
        if(fn=="-")
index c2a1d7bb98d4eec3d1637c9b084940518f3707b1..a682d01781502bdd6a2fd452b3d16683cddfe87b 100644 (file)
@@ -14,6 +14,7 @@ private:
        bool compile;
        unsigned float_size;
        bool compress;
+       bool pack;
        bool debug;
 
 public:
@@ -23,6 +24,7 @@ public:
 private:
        void do_transfer();
        void do_compile();
+       void do_pack();
        Msp::IO::Base *open_output(const std::string &);
        Msp::IO::Base *open_input(const std::string &);
 public: