From: Mikko Rasa Date: Thu, 2 Aug 2012 08:11:40 +0000 (+0300) Subject: Use custom encoding for floats in binary format X-Git-Url: http://git.tdb.fi/?p=libs%2Fdatafile.git;a=commitdiff_plain;h=19179a622c1de88de5ed7047643eec79f285bf2a Use custom encoding for floats in binary format This makes the binary format fully machine-independent, and provides control over the precision of floating point values. --- diff --git a/source/binaryparser.cpp b/source/binaryparser.cpp index 81a86ea..fa99a7e 100644 --- a/source/binaryparser.cpp +++ b/source/binaryparser.cpp @@ -1,7 +1,9 @@ +#include #include #include #include #include "binaryparser.h" +#include "binfloat.h" #include "input.h" using namespace std; @@ -22,10 +24,12 @@ public: BinaryParser::BinaryParser(Input &i, const string &s): ParserMode(i, s), - first(true) + first(true), + float_precision(32) { dict[-1] = DictEntry("__kwd", "iss"); dict[-2] = DictEntry("__str", "is"); + dict[-3] = DictEntry("__flt", "i"); } Statement BinaryParser::parse() @@ -51,6 +55,8 @@ Statement BinaryParser::parse() const unsigned id = st.args[0].get(); strings[id] = st.args[1].get(); } + else if(st.keyword=="__flt") + float_precision = st.args[0].get(); else return st; } @@ -127,21 +133,47 @@ IntType::Store BinaryParser::parse_int() FloatType::Store BinaryParser::parse_float() { - union + UInt64 encoded = 0; + for(unsigned i=0; i::is_iec559) + return bf.compose_iec559(); + else + { + /* Put the float together with arithmetic since we don't know its + internal layout */ + FloatType::Store f = 0; + if(bf.infinity) + { + if(numeric_limits::has_infinity) + f = numeric_limits::infinity(); + else + f = numeric_limits::max(); + } + else + { + for(unsigned i=0; i<64; ++i) + { + f /= 2; + if(bf.mantissa&1) + f += 1; + bf.mantissa >>= 1; + } + for(int i=0; ibf.exponent; --i) + f /= 2; + } + if(bf.sign) + f = -f; + return f; + } } BoolType::Store BinaryParser::parse_bool() diff --git a/source/binaryparser.h b/source/binaryparser.h index b10d7b8..b4fec6b 100644 --- a/source/binaryparser.h +++ b/source/binaryparser.h @@ -21,6 +21,7 @@ private: Dictionary dict; StringMap strings; bool first; + unsigned float_precision; public: BinaryParser(Input &i, const std::string &s); diff --git a/source/binarywriter.cpp b/source/binarywriter.cpp index 9c9c893..ffdc040 100644 --- a/source/binarywriter.cpp +++ b/source/binarywriter.cpp @@ -1,5 +1,7 @@ +#include #include #include "binarywriter.h" +#include "binfloat.h" #include "statement.h" using namespace std; @@ -10,10 +12,23 @@ namespace DataFile { BinaryWriter::BinaryWriter(IO::Base &o): WriterMode(o), next_kwd_id(1), - next_str_id(1) + next_str_id(1), + float_precision(32) { dict[DictEntry("__kwd", "iss")] = -1; dict[DictEntry("__str", "is")] = -2; + dict[DictEntry("__flt", "i")] = -3; +} + +void BinaryWriter::set_float_precision(unsigned fp) +{ + if(fp<16 || fp>64 || fp%8) + throw invalid_argument("BinaryWriter::set_float_precision"); + float_precision = fp; + Statement fst; + fst.keyword = "__flt"; + fst.args.push_back(float_precision); + write_(fst); } void BinaryWriter::write(const Statement &st) @@ -116,20 +131,44 @@ void BinaryWriter::write_string(const StringType::Store &s) void BinaryWriter::write_float(FloatType::Store f) { - union + BinFloat bf; + + if(numeric_limits::is_iec559) + bf = BinFloat::explode_iec559(f); + else { - float v; - char d[sizeof(float)]; - }; - - v = f; -#if BYTE_ORDER == LITTLE_ENDIAN - for(unsigned i = sizeof(float); i--;) - out.put(d[i]); -#else - for(unsigned i = 0; if)) + bf.infinity = true; + else if(f!=0) + { + for(; f<1; f*=2) + --bf.exponent; + for(; f>=2; f/=2) + ++bf.exponent; + for(unsigned i=0; i<64; ++i) + { + bf.mantissa <<= 1; + if(f>=1) + { + bf.mantissa |= 1; + f -= 1; + } + f *= 2; + } + } + } + + UInt64 encoded = bf.compose(float_precision); + for(unsigned i=float_precision/8; i--; ) + out.put((encoded>>(i*8))&0xFF); } void BinaryWriter::write_symbol(const SymbolType::Store &s) diff --git a/source/binarywriter.h b/source/binarywriter.h index 1a11ae5..2f67bbd 100644 --- a/source/binarywriter.h +++ b/source/binarywriter.h @@ -22,10 +22,12 @@ private: unsigned next_kwd_id; StringMap strings; unsigned next_str_id; + unsigned float_precision; public: BinaryWriter(IO::Base &o); + virtual void set_float_precision(unsigned); virtual void write(const Statement &st); private: void write_(const Statement &st); diff --git a/source/binfloat.cpp b/source/binfloat.cpp new file mode 100644 index 0000000..fa4be73 --- /dev/null +++ b/source/binfloat.cpp @@ -0,0 +1,72 @@ +#include +#include "binfloat.h" + +using namespace std; + + +namespace Msp { +namespace DataFile { + +BinFloat BinFloat::explode(UInt64 value, const Bits &bits) +{ + UInt64 mantissa_mask = (UInt64(1)<>bits.mantissa)&exponent_mask; + bf.sign = value>>(bits.mantissa+bits.exponent); + bf.infinity = (bf.exponent==exponent_mask); + + if(bf.exponent==0 || bf.infinity) + // Zeroes and infinities have zero mantissa + bf.mantissa = 0; + else + { + // Extract mantissa, add the implied one and align it to high bits + bf.mantissa = (value&mantissa_mask) | (UInt64(1)<>1; + + return bf; +} + +UInt64 BinFloat::compose(const Bits &bits) +{ + UInt64 mantissa_mask = (UInt64(1)<>1); + // Shift down and round the mantissa + UInt64 rounded_mantissa = ((mantissa>>(62-bits.mantissa))+1)>>1; + // If the integer part is greater than one, we need to use a higher exponent + if((rounded_mantissa>>bits.mantissa)>1) + ++biased_exponent; + + if(biased_exponent>=exponent_mask || infinity) + // Overflow, return infinity + return UInt64(sign< + union Conversion + { + T f; + typename MatchingInt::UnsignedType i; + }; + + bool sign; + bool infinity; + int exponent; + UInt64 mantissa; + + static BinFloat explode(UInt64, const Bits &); + + template + static BinFloat explode_iec559(T v) + { + Conversion c; + c.f = v; + return explode(c.i, sizeof(T)*CHAR_BIT); + } + + UInt64 compose(const Bits &); + + template + T compose_iec559() + { + Conversion c; + c.i = compose(sizeof(T)*CHAR_BIT); + return c.f; + } +}; + +} // namespace DataFile +} // namespace Msp + +#endif diff --git a/source/textwriter.cpp b/source/textwriter.cpp index 0e5b674..65f4870 100644 --- a/source/textwriter.cpp +++ b/source/textwriter.cpp @@ -9,9 +9,15 @@ namespace Msp { namespace DataFile { TextWriter::TextWriter(IO::Base &o): - WriterMode(o) + WriterMode(o), + float_format("%#.7g") { } +void TextWriter::set_float_precision(unsigned fp) +{ + float_format = format("%%#.%dg", fp/4-1); +} + void TextWriter::write(const Statement &st) { write_(st, 0); @@ -32,7 +38,7 @@ void TextWriter::write_(const Statement &st, unsigned level) else if(i->get_signature()==IntType::signature) out.write(lexical_cast(i->get())); else if(i->get_signature()==FloatType::signature) - out.write(format("%15g", (i->get()))); + out.write(format(float_format, i->get())); else if(i->get_signature()==SymbolType::signature) { string name = i->get().name; diff --git a/source/textwriter.h b/source/textwriter.h index ad127de..a0ed1b7 100644 --- a/source/textwriter.h +++ b/source/textwriter.h @@ -8,9 +8,13 @@ namespace DataFile { class TextWriter: public WriterMode { +private: + std::string float_format; + public: TextWriter(IO::Base &o); + virtual void set_float_precision(unsigned); virtual void write(const Statement &st); private: void write_(const Statement &st, unsigned); diff --git a/source/writer.cpp b/source/writer.cpp index c6a1e12..6091409 100644 --- a/source/writer.cpp +++ b/source/writer.cpp @@ -46,5 +46,10 @@ void Writer::set_binary(bool b) mode = new TextWriter(out); } +void Writer::set_float_precision(unsigned fp) +{ + mode->set_float_precision(fp); +} + } // namespace DataFile } // namespace Msp diff --git a/source/writer.h b/source/writer.h index 159bf95..c8d75e3 100644 --- a/source/writer.h +++ b/source/writer.h @@ -38,6 +38,11 @@ public: @param b true for binary mode, false for text */ void set_binary(bool b); + + /** Sets the precision of floating point numbers in bits. Depending on the + mode not all values may be valid, but any value between 16 and 64 that is + divisible by 8 is guaranteed to work. */ + void set_float_precision(unsigned); }; } // namespace DataFile diff --git a/source/writermode.h b/source/writermode.h index 3778db6..21df83f 100644 --- a/source/writermode.h +++ b/source/writermode.h @@ -17,6 +17,7 @@ protected: public: virtual ~WriterMode() { } + virtual void set_float_precision(unsigned) = 0; virtual void write(const Statement &st) = 0; }; diff --git a/tool/tool.cpp b/tool/tool.cpp index 4e66087..5f9febe 100644 --- a/tool/tool.cpp +++ b/tool/tool.cpp @@ -15,11 +15,13 @@ DataTool::DataTool(int argc, char **argv): in_fn("-"), out_fn("-"), binary(false), - compile(false) + compile(false), + float_size(0) { GetOpt getopt; getopt.add_option('b', "binary", binary, GetOpt::NO_ARG); getopt.add_option('c', "compile", compile, GetOpt::NO_ARG); + getopt.add_option('f', "float-size", float_size, GetOpt::REQUIRED_ARG); getopt.add_option('o', "output", out_fn, GetOpt::REQUIRED_ARG); getopt(argc, argv); @@ -49,6 +51,8 @@ int DataTool::main() DataFile::Writer writer(out_buf); if(binary) writer.set_binary(true); + if(float_size) + writer.set_float_precision(float_size); if(compile) { diff --git a/tool/tool.h b/tool/tool.h index a43fbfb..43c82e1 100644 --- a/tool/tool.h +++ b/tool/tool.h @@ -11,6 +11,7 @@ private: std::string out_fn; bool binary; bool compile; + unsigned float_size; public: DataTool(int argc, char **argv);