]> git.tdb.fi Git - libs/datafile.git/commitdiff
Use custom encoding for floats in binary format
authorMikko Rasa <tdb@tdb.fi>
Thu, 2 Aug 2012 08:11:40 +0000 (11:11 +0300)
committerMikko Rasa <tdb@tdb.fi>
Thu, 2 Aug 2012 08:35:21 +0000 (11:35 +0300)
This makes the binary format fully machine-independent, and provides
control over the precision of floating point values.

13 files changed:
source/binaryparser.cpp
source/binaryparser.h
source/binarywriter.cpp
source/binarywriter.h
source/binfloat.cpp [new file with mode: 0644]
source/binfloat.h [new file with mode: 0644]
source/textwriter.cpp
source/textwriter.h
source/writer.cpp
source/writer.h
source/writermode.h
tool/tool.cpp
tool/tool.h

index 81a86ea9a3b66b7ae9c9a6ba0a3dfff4f134ff17..fa99a7e47708ba3c1d4991c16d605d9ae77c7b28 100644 (file)
@@ -1,7 +1,9 @@
+#include <limits>
 #include <sys/param.h>
 #include <msp/core/maputils.h>
 #include <msp/strings/format.h>
 #include "binaryparser.h"
+#include "binfloat.h"
 #include "input.h"
 
 using namespace std;
@@ -22,10 +24,12 @@ public:
 
 BinaryParser::BinaryParser(Input &i, const string &s):
        ParserMode(i, s),
-       first(true)
+       first(true),
+       float_precision(32)
 {
        dict[-1] = DictEntry("__kwd", "iss");
        dict[-2] = DictEntry("__str", "is");
+       dict[-3] = DictEntry("__flt", "i");
 }
 
 Statement BinaryParser::parse()
@@ -51,6 +55,8 @@ Statement BinaryParser::parse()
                        const unsigned id = st.args[0].get<unsigned>();
                        strings[id] = st.args[1].get<const string &>();
                }
+               else if(st.keyword=="__flt")
+                       float_precision = st.args[0].get<unsigned>();
                else
                        return st;
        }
@@ -127,21 +133,47 @@ IntType::Store BinaryParser::parse_int()
 
 FloatType::Store BinaryParser::parse_float()
 {
-       union
+       UInt64 encoded = 0;
+       for(unsigned i=0; i<float_precision; i+=8)
        {
-               float f;
-               char d[sizeof(float)];
-       };
-
-#if BYTE_ORDER == LITTLE_ENDIAN
-       for(unsigned i = sizeof(float); i--;)
-               d[i] = in.get();
-#else
-       for(unsigned i = 0; i<sizeof(float); ++i)
-               d[i] = in.get();
-#endif
-
-       return f;
+               int c = in.get();
+               encoded = (encoded<<8) | (c&0xFF);
+       }
+
+       BinFloat bf = BinFloat::explode(encoded, float_precision);
+
+       if(numeric_limits<FloatType::Store>::is_iec559)
+               return bf.compose_iec559<FloatType::Store>();
+       else
+       {
+               /* Put the float together with arithmetic since we don't know its
+               internal layout */
+               FloatType::Store f = 0;
+               if(bf.infinity)
+               {
+                       if(numeric_limits<FloatType::Store>::has_infinity)
+                               f = numeric_limits<FloatType::Store>::infinity();
+                       else
+                               f = numeric_limits<FloatType::Store>::max();
+               }
+               else
+               {
+                       for(unsigned i=0; i<64; ++i)
+                       {
+                               f /= 2;
+                               if(bf.mantissa&1)
+                                       f += 1;
+                               bf.mantissa >>= 1;
+                       }
+                       for(int i=0; i<bf.exponent; ++i)
+                               f *= 2;
+                       for(int i=0; i>bf.exponent; --i)
+                               f /= 2;
+               }
+               if(bf.sign)
+                       f = -f;
+               return f;
+       }
 }
 
 BoolType::Store BinaryParser::parse_bool()
index b10d7b8791916a8e5ce37257c88da1b33603f6bd..b4fec6b4c9e681f6f635a7e348868d65097eddeb 100644 (file)
@@ -21,6 +21,7 @@ private:
        Dictionary dict;
        StringMap strings;
        bool first;
+       unsigned float_precision;
 
 public:
        BinaryParser(Input &i, const std::string &s);
index 9c9c893e890354bc1198c0cfdd4603ede61b3270..ffdc040f115565bbb18239ee9f2c14a137507fde 100644 (file)
@@ -1,5 +1,7 @@
+#include <limits>
 #include <msp/core/maputils.h>
 #include "binarywriter.h"
+#include "binfloat.h"
 #include "statement.h"
 
 using namespace std;
@@ -10,10 +12,23 @@ namespace DataFile {
 BinaryWriter::BinaryWriter(IO::Base &o):
        WriterMode(o),
        next_kwd_id(1),
-       next_str_id(1)
+       next_str_id(1),
+       float_precision(32)
 {
        dict[DictEntry("__kwd", "iss")] = -1;
        dict[DictEntry("__str", "is")] = -2;
+       dict[DictEntry("__flt", "i")] = -3;
+}
+
+void BinaryWriter::set_float_precision(unsigned fp)
+{
+       if(fp<16 || fp>64 || fp%8)
+               throw invalid_argument("BinaryWriter::set_float_precision");
+       float_precision = fp;
+       Statement fst;
+       fst.keyword = "__flt";
+       fst.args.push_back(float_precision);
+       write_(fst);
 }
 
 void BinaryWriter::write(const Statement &st)
@@ -116,20 +131,44 @@ void BinaryWriter::write_string(const StringType::Store &s)
 
 void BinaryWriter::write_float(FloatType::Store f)
 {
-       union
+       BinFloat bf;
+
+       if(numeric_limits<FloatType::Store>::is_iec559)
+               bf = BinFloat::explode_iec559(f);
+       else
        {
-               float v;
-               char d[sizeof(float)];
-       };
-
-       v = f;
-#if BYTE_ORDER == LITTLE_ENDIAN
-       for(unsigned i = sizeof(float); i--;)
-               out.put(d[i]);
-#else
-       for(unsigned i = 0; i<sizeof(float); ++i)
-               out.put(d[i]);
-#endif
+               /* The structure of the float is unknown, so we must use arithmetic to
+               reduce it to components. */
+               bf.sign = f<0;
+               bf.exponent = 0;
+               bf.mantissa = 0;
+
+               if(f<0)
+                       f = -f;
+               if(!(f+f>f))
+                       bf.infinity = true;
+               else if(f!=0)
+               {
+                       for(; f<1; f*=2)
+                               --bf.exponent;
+                       for(; f>=2; f/=2)
+                               ++bf.exponent;
+                       for(unsigned i=0; i<64; ++i)
+                       {
+                               bf.mantissa <<= 1;
+                               if(f>=1)
+                               {
+                                       bf.mantissa |= 1;
+                                       f -= 1;
+                               }
+                               f *= 2;
+                       }
+               }
+       }
+
+       UInt64 encoded = bf.compose(float_precision);
+       for(unsigned i=float_precision/8; i--; )
+               out.put((encoded>>(i*8))&0xFF);
 }
 
 void BinaryWriter::write_symbol(const SymbolType::Store &s)
index 1a11ae5df9d4f17e4080c678b8a7f10d75f16fd2..2f67bbd24e43bad9403865a8488098ed82d703a9 100644 (file)
@@ -22,10 +22,12 @@ private:
        unsigned next_kwd_id;
        StringMap strings;
        unsigned next_str_id;
+       unsigned float_precision;
 
 public:
        BinaryWriter(IO::Base &o);
 
+       virtual void set_float_precision(unsigned);
        virtual void write(const Statement &st);
 private:
        void write_(const Statement &st);
diff --git a/source/binfloat.cpp b/source/binfloat.cpp
new file mode 100644 (file)
index 0000000..fa4be73
--- /dev/null
@@ -0,0 +1,72 @@
+#include <cmath>
+#include "binfloat.h"
+
+using namespace std;
+
+
+namespace Msp {
+namespace DataFile {
+
+BinFloat BinFloat::explode(UInt64 value, const Bits &bits)
+{
+       UInt64 mantissa_mask = (UInt64(1)<<bits.mantissa)-1;
+       int exponent_mask = (1<<bits.exponent)-1;
+
+       BinFloat bf;
+       // Extract biased exponent and sign
+       bf.exponent = (value>>bits.mantissa)&exponent_mask;
+       bf.sign = value>>(bits.mantissa+bits.exponent);
+       bf.infinity = (bf.exponent==exponent_mask);
+
+       if(bf.exponent==0 || bf.infinity)
+               // Zeroes and infinities have zero mantissa
+               bf.mantissa = 0;
+       else
+       {
+               // Extract mantissa, add the implied one and align it to high bits
+               bf.mantissa = (value&mantissa_mask) | (UInt64(1)<<bits.mantissa);
+               bf.mantissa <<= 63-bits.mantissa;
+       }
+
+       // Unbias the exponent
+       bf.exponent -= exponent_mask>>1;
+
+       return bf;
+}
+
+UInt64 BinFloat::compose(const Bits &bits)
+{
+       UInt64 mantissa_mask = (UInt64(1)<<bits.mantissa)-1;
+       int exponent_mask = (1<<bits.exponent)-1;
+
+       int biased_exponent = exponent+(exponent_mask>>1);
+       // Shift down and round the mantissa
+       UInt64 rounded_mantissa = ((mantissa>>(62-bits.mantissa))+1)>>1;
+       // If the integer part is greater than one, we need to use a higher exponent
+       if((rounded_mantissa>>bits.mantissa)>1)
+               ++biased_exponent;
+
+       if(biased_exponent>=exponent_mask || infinity)
+               // Overflow, return infinity
+               return UInt64(sign<<bits.exponent | exponent_mask)<<bits.mantissa;
+       else if(biased_exponent<=0 || !mantissa)
+               // Underflow, return zero
+               return 0;
+       else
+       {
+               UInt64 value = rounded_mantissa&mantissa_mask;
+               value |= UInt64(biased_exponent)<<bits.mantissa;
+               value |= UInt64(sign)<<(bits.mantissa+bits.exponent);
+               return value;
+       }
+}
+
+
+// exponent = log_2(bits)*3-7
+BinFloat::Bits::Bits(unsigned bits):
+       exponent(log(bits)*4.3281-7),
+       mantissa(bits-exponent-1)
+{ }
+
+} // namespace DataFile
+} // namespace Msp
diff --git a/source/binfloat.h b/source/binfloat.h
new file mode 100644 (file)
index 0000000..d16570f
--- /dev/null
@@ -0,0 +1,63 @@
+#ifndef MSP_DATAFILE_BINFLOAT_H_
+#define MSP_DATAFILE_BINFLOAT_H_
+
+#include "type.h"
+
+namespace Msp {
+namespace DataFile {
+
+/**
+Facilitates splitting floating-point numbers into parts and putting them back
+together.  Supports arbitary sizes up to 64 bits.  The 16, 32 and 64 bit
+formats exactly match those defined by ISO/IEC 60559:2011.
+
+The exponent is stored in an unbiased form.  The mantissa is stored with the
+integer part included, aligned to the high bits of a 64-bit integer.
+*/
+struct BinFloat
+{
+       struct Bits
+       {
+               unsigned exponent;
+               unsigned mantissa;
+
+               Bits(unsigned);
+       };
+
+       template<typename T>
+       union Conversion
+       {
+               T f;
+               typename MatchingInt<T>::UnsignedType i;
+       };
+
+       bool sign;
+       bool infinity;
+       int exponent;
+       UInt64 mantissa;
+
+       static BinFloat explode(UInt64, const Bits &);
+
+       template<typename T>
+       static BinFloat explode_iec559(T v)
+       {
+               Conversion<T> c;
+               c.f = v;
+               return explode(c.i, sizeof(T)*CHAR_BIT);
+       }
+
+       UInt64 compose(const Bits &);
+
+       template<typename T>
+       T compose_iec559()
+       {
+               Conversion<T> c;
+               c.i = compose(sizeof(T)*CHAR_BIT);
+               return c.f;
+       }
+};
+
+} // namespace DataFile
+} // namespace Msp
+
+#endif
index 0e5b6742653d80599809d645b77e1f91b9f60675..65f4870ea6e466816ee25d9dd40dbc550d00faa8 100644 (file)
@@ -9,9 +9,15 @@ namespace Msp {
 namespace DataFile {
 
 TextWriter::TextWriter(IO::Base &o):
-       WriterMode(o)
+       WriterMode(o),
+       float_format("%#.7g")
 { }
 
+void TextWriter::set_float_precision(unsigned fp)
+{
+       float_format = format("%%#.%dg", fp/4-1);
+}
+
 void TextWriter::write(const Statement &st)
 {
        write_(st, 0);
@@ -32,7 +38,7 @@ void TextWriter::write_(const Statement &st, unsigned level)
                else if(i->get_signature()==IntType::signature)
                        out.write(lexical_cast(i->get<IntType::Store>()));
                else if(i->get_signature()==FloatType::signature)
-                       out.write(format("%15g", (i->get<FloatType::Store>())));
+                       out.write(format(float_format, i->get<FloatType::Store>()));
                else if(i->get_signature()==SymbolType::signature)
                {
                        string name = i->get<SymbolType::Store>().name;
index ad127debcb0813e98230502b8acfb224178341f8..a0ed1b79888e882a86b083d0a037c6126887ba78 100644 (file)
@@ -8,9 +8,13 @@ namespace DataFile {
 
 class TextWriter: public WriterMode
 {
+private:
+       std::string float_format;
+
 public:
        TextWriter(IO::Base &o);
 
+       virtual void set_float_precision(unsigned);
        virtual void write(const Statement &st);
 private:
        void write_(const Statement &st, unsigned);
index c6a1e1259c2d861a32e4e6a963fe398a8001900a..60914090f742acaa97b90f1f1f9c58fd03756523 100644 (file)
@@ -46,5 +46,10 @@ void Writer::set_binary(bool b)
                mode = new TextWriter(out);
 }
 
+void Writer::set_float_precision(unsigned fp)
+{
+       mode->set_float_precision(fp);
+}
+
 } // namespace DataFile
 } // namespace Msp
index 159bf9552f2ca68c80f740c4f88b222b7a736dd1..c8d75e32dcd643bb09e3ec281331800691c99e02 100644 (file)
@@ -38,6 +38,11 @@ public:
        @param  b  true for binary mode, false for text
        */
        void set_binary(bool b);
+
+       /** Sets the precision of floating point numbers in bits.  Depending on the
+       mode not all values may be valid, but any value between 16 and 64 that is
+       divisible by 8 is guaranteed to work. */
+       void set_float_precision(unsigned);
 };
 
 } // namespace DataFile
index 3778db6c8f753c4ea36e3560c471538ea0c37fd7..21df83fd428db75f360dde92728c15896f8aebbe 100644 (file)
@@ -17,6 +17,7 @@ protected:
 public:
        virtual ~WriterMode() { }
 
+       virtual void set_float_precision(unsigned) = 0;
        virtual void write(const Statement &st) = 0;
 };
 
index 4e6608766a8e7cbacb47900e337ba0c80436e6f6..5f9febeedda26d87a749551de4035d7ee6e07a30 100644 (file)
@@ -15,11 +15,13 @@ DataTool::DataTool(int argc, char **argv):
        in_fn("-"),
        out_fn("-"),
        binary(false),
-       compile(false)
+       compile(false),
+       float_size(0)
 {
        GetOpt getopt;
        getopt.add_option('b', "binary", binary, GetOpt::NO_ARG);
        getopt.add_option('c', "compile", compile, GetOpt::NO_ARG);
+       getopt.add_option('f', "float-size", float_size, GetOpt::REQUIRED_ARG);
        getopt.add_option('o', "output", out_fn, GetOpt::REQUIRED_ARG);
        getopt(argc, argv);
 
@@ -49,6 +51,8 @@ int DataTool::main()
                DataFile::Writer writer(out_buf);
                if(binary)
                        writer.set_binary(true);
+               if(float_size)
+                       writer.set_float_precision(float_size);
 
                if(compile)
                {
index a43fbfbf02d30958fb54f3ea3a853803e4a84922..43c82e121740b01689231f08a4919bb310065b5a 100644 (file)
@@ -11,6 +11,7 @@ private:
        std::string out_fn;
        bool binary;
        bool compile;
+       unsigned float_size;
 
 public:
        DataTool(int argc, char **argv);