From: Mikko Rasa Date: Tue, 9 Dec 2008 09:12:30 +0000 (+0000) Subject: Rewrite lexical_cast to use internal conversion routines X-Git-Tag: strings-1.1~9 X-Git-Url: http://git.tdb.fi/?a=commitdiff_plain;h=f5aa787e1a715867a8024816ccd58e9a4c7e23a4;p=libs%2Fcore.git Rewrite lexical_cast to use internal conversion routines Add getters to Fmt --- diff --git a/source/fmt.cpp b/source/fmt.cpp index fdd73f3..0c1531d 100644 --- a/source/fmt.cpp +++ b/source/fmt.cpp @@ -1,9 +1,10 @@ /* $Id$ This file is part of libmspstrings -Copyright © 2006-2007 Mikko Rasa +Copyright © 2006-2008 Mikko Rasa Distributed under the LGPL */ + #include #include "fmt.h" @@ -22,10 +23,11 @@ Fmt &Fmt::reset() fillc=' '; base=DEC; sbase=false; - fmode=EXP; + fmode=AUTOFLT; spoint=false; align=RIGHT; ucase=false; + type=STR; return *this; } @@ -94,27 +96,36 @@ void Fmt::parse(const char *f) } } - if(*f=='x' || *f=='X') + type=NUM; + if(*f=='d' || *f=='u') + base=DEC; + else if(*f=='x' || *f=='X') base=HEX; else if(*f=='o') base=OCT; + else if(*f=='b') + base=BIN; else if(*f=='e' || *f=='E') fmode=SCI; else if(*f=='f' || *f=='F') fmode=FIXED; else if(*f=='g' || *f=='G') - fmode=EXP; - else if(*f=='p') + fmode=AUTOFLT; + else if(*f=='p' || *f=='P') { base=HEX; sbase=true; } - else if(*f=='d' || *f=='i' || *f=='u' || *f=='c' || *f=='s') - ; + else if(*f=='c') + type=CHAR; + else if(*f=='s') + type=STR; + else if(*f=='i') + base=AUTOBASE; else throw InvalidParameterValue("Invalid conversion specifier"); - if(*f=='E' || *f=='F' || *f=='G' || *f=='X') + if(*f=='E' || *f=='F' || *f=='G' || *f=='X' || *f=='P') ucase=true; ++f; diff --git a/source/fmt.h b/source/fmt.h index c4b46b5..7dc605f 100644 --- a/source/fmt.h +++ b/source/fmt.h @@ -1,7 +1,7 @@ /* $Id$ This file is part of libmspstrings -Copyright © 2006-2007 Mikko Rasa +Copyright © 2006-2008 Mikko Rasa Distributed under the LGPL */ @@ -23,43 +23,39 @@ chaining calls to the various setter functions, or with a mixture of both. Since type information for conversions is acquired through templates, the meaning of the conversion specifier character is reduced to only specifying -what the conversion should look like. In particular, the d, i, u, c and s -conversions are no-ops. +what the conversion should look like. Of special note is the s conversion, +which will result in a default conversion for any data type. Size modifiers +are not supported and there is no difference between signed and unsigned +conversions. + +Some new conversions are supported: + + b/B Binary integer conversion + P Uppercase pointer conversion (like %#X) */ class Fmt { public: - Fmt() { reset(); } - Fmt(const char *f) { reset(); parse(f); } - Fmt(const std::string &f) { reset(); parse(f.c_str()); } - Fmt &width(unsigned w) { wd=w; return *this; } - Fmt &precision(unsigned p) { prec=p; return *this; } - Fmt &showpos(bool s=true) { spos=s; return *this; } - Fmt &fill(wchar_t f) { fillc=f; return *this; } - Fmt &fixed() { fmode=FIXED; return *this; } - Fmt &scientific() { fmode=SCI; return *this; } - Fmt &showpoint(bool s=true) { spoint=s; return *this; } - Fmt &showbase(bool s=true) { sbase=s; return *this; } - Fmt &left() { align=LEFT; return *this; } - Fmt &right() { align=RIGHT; return *this; } - Fmt &dec() { base=DEC; return *this; } - Fmt &hex() { base=HEX; return *this; } - Fmt &oct() { base=OCT; return *this; } - Fmt &uppercase(bool u=true) { ucase=u; return *this; } - Fmt &reset(); - void apply(std::ostream &) const; -private: + enum Type + { + NUM, + CHAR, + STR + }; + enum Base { - DEC, - HEX, - OCT + AUTOBASE = 0, + DEC = 10, + HEX = 16, + OCT = 8, + BIN = 2 }; enum FloatMode { FIXED, - EXP, + AUTOFLT, SCI }; @@ -69,6 +65,7 @@ private: RIGHT }; +private: unsigned wd; unsigned prec; bool spos; @@ -79,7 +76,47 @@ private: bool spoint; Align align; bool ucase; + Type type; + +public: + Fmt() { reset(); } + Fmt(const char *f) { reset(); parse(f); } + Fmt(const std::string &f) { reset(); parse(f.c_str()); } + + Fmt &width(unsigned w) { wd=w; return *this; } + Fmt &precision(unsigned p) { prec=p; return *this; } + Fmt &showpos(bool s=true) { spos=s; return *this; } + Fmt &fill(wchar_t f) { fillc=f; return *this; } + Fmt &fixed() { fmode=FIXED; return *this; } + Fmt &scientific() { fmode=SCI; return *this; } + Fmt &showpoint(bool s=true) { spoint=s; return *this; } + Fmt &showbase(bool s=true) { sbase=s; return *this; } + Fmt &left() { align=LEFT; return *this; } + Fmt &right() { align=RIGHT; return *this; } + Fmt &dec() { base=DEC; return *this; } + Fmt &hex() { base=HEX; return *this; } + Fmt &oct() { base=OCT; return *this; } + Fmt &bin() { base=BIN; return *this; } + Fmt &uppercase(bool u=true) { ucase=u; return *this; } + Fmt &numeric() { type=NUM; return *this; } + Fmt &character() { type=CHAR; return *this; } + Fmt &string() { type=STR; return *this; } + Fmt &reset(); + unsigned get_width() const { return wd; } + unsigned get_precision() const { return prec; } + bool get_showpos() const { return spos; } + wchar_t get_fill() const { return fillc; } + Base get_base() const { return base; } + bool get_showbase() const { return sbase; } + FloatMode get_floatmode() const { return fmode; } + bool get_showpoint() const { return spoint; } + Align get_align() const { return align; } + bool get_uppercase() const { return ucase; } + Type get_type() const { return type; } + + void apply(std::ostream &) const; +private: void parse(const char *); }; diff --git a/source/lexicalcast.cpp b/source/lexicalcast.cpp new file mode 100644 index 0000000..687a37b --- /dev/null +++ b/source/lexicalcast.cpp @@ -0,0 +1,619 @@ +/* $Id$ + +This file is part of libmspstrings +Copyright © 2006-2008 Mikko Rasa +Distributed under the LGPL +*/ + +#include +#include +#include "lexicalcast.h" + +using namespace std; + +namespace { + +using namespace Msp; + +template +struct IsSigned +{ enum { result=!(static_cast(-1)>0) }; }; + +templatesizeof(unsigned long))> +struct Temporary +{ typedef unsigned long Type; }; + +template +struct Temporary +{ +#ifdef WIN32 + typedef __int64 Type; +#else + typedef unsigned long long Type; +#endif +}; + +/* Helper to avoid warnings about an unsigned type never being < 0 */ +template::result> +struct IsNegative +{ static bool eval(T v) { return v<0; } }; + +template +struct IsNegative +{ static bool eval(T) { return false; } }; + +/* Helper to avoid errors about ambiguous function calls since there are no +overloads of abs for unsigned types */ +template::result> +struct Absolute +{ static T eval(T v) { return v<0 ? -v : v; } }; + +template +struct Absolute +{ static T eval(T v) { return v; } }; + + +/*** Integer conversions ***/ + +const char udigits[]="0123456789ABCDEF"; +const char ldigits[]="0123456789abcdef"; + +template +char *int_to_str(T v, const Fmt &f, char *end) +{ + if(f.get_type()==Fmt::CHAR) + { + *--end=v; + return end; + } + + char *ptr=end; + + // Find out the base to use + unsigned base=f.get_base(); + if(!base) + base=10; + + // Format the number, starting from the least significant digit + const char *digits=(f.get_uppercase() ? udigits : ldigits); + if(v) + { + typename Temporary::Type w=Absolute::eval(v); + while(w) + { + *--ptr=digits[w%base]; + w/=base; + } + } + else + *--ptr=digits[0]; + + char sign=(IsNegative::eval(v) ? '-' : f.get_showpos() ? '+' : 0); + if(f.get_fill()=='0') + { + /* Zero-fill, taking base/sign size into account. The expression is a + bit ugly, but saves having to write code for creating the prefix both + ways. */ + unsigned pfxsize=((f.get_showbase() && base!=10) ? base==8 ? 1 : 2 : 0) + (sign!=0); + for(unsigned i=(end-ptr)+pfxsize; i +string int_to_str(T v, const Fmt &f) +{ + unsigned size=max(f.get_width(), max(f.get_precision(), sizeof(T)*8+3)); + char *buf=new char[size]; + string result(int_to_str(v, f, buf+size), buf+size); + delete[] buf; + return result; +} + +template +T str_to_int(const std::string &s, const Fmt &f) +{ + if(s.empty()) + throw LexicalError("Empty input in integer conversion"); + + std::string::const_iterator i=s.begin(); + + // See if the input starts with a sign + bool neg=false; + if(*i=='-') + { + if(!IsSigned::result) + throw LexicalError("Negative sign in unsigned integer conversion"); + neg=true; + ++i; + } + else if(*i=='+') + ++i; + + // Must have some digits to convert + if(i==s.end()) + throw LexicalError("Missing digits in integer conversion"); + + T base=f.get_base(); + if(!base && i!=s.end()) + { + // Automatic base detection requested, figure it out + if(*i=='0' && ++i!=s.end()) + { + if(*i=='x' || *i=='X') + { + base=16; + ++i; + } + else if(*i=='b' || *i=='B') + { + base=2; + ++i; + } + else + base=8; + } + else + base=10; + } + + // Parse the digits + T result=0; + for(; i!=s.end(); ++i) + { + T digit=base; + if(*i>='0' && *i<='9') + digit=*i-'0'; + else if(*i>='A' && *i<='F') + digit=*i-'A'+10; + else if(*i>='a' && *i<='f') + digit=*i-'a'+10; + if(digit>=base) + throw LexicalError("Invalid digit in integer conversion"); + T next=result*base+digit; + if(next/base!=result) + throw LexicalError("Overflow in integer conversion"); + result=next; + } + + if(neg) + result=-result; + + return result; +} + + +/*** Boolean conversions ***/ + +string bool_to_str(bool b, const Fmt &f) +{ + if(f.get_type()==Fmt::STR) + return b ? "true" : "false"; + else + return b ? "1" : "0"; +} + +bool str_to_bool(const string &s) +{ + if(s=="1" || s=="true" || s=="yes" || s=="on") + return true; + else if(s=="0" || s=="false" || s=="no" || s=="off") + return true; + throw LexicalError("Invalid input in boolean conversion"); +} + + +/*** Floating-point conversions ***/ + +template +string flt_to_str(T v, const Fmt &f) +{ + if(f.get_type()==Fmt::CHAR) + throw LexicalError("Character format in floating-point conversion"); + + Fmt::FloatMode mode=f.get_floatmode(); + long double w=abs(v); + char sign=(v<0 ? '-' : f.get_showpos() ? '+' : 0); + + // Handle infinity and not-a-number as special cases + if(!(w+w>w) && w!=0) + { + string result; + if(sign) + result+=sign; + if(!(w>=0)) + result+=(f.get_uppercase() ? "NAN" : "nan"); + else + result+=(f.get_uppercase() ? "INF" : "inf"); + if(result.size()=10) + { + long double div=1; + while(div*10=static_cast(digits)) + { + point=1; + showexp=true; + } + else + { + point=max(exp, 0)+1; + if(exp<0) + digits+=-exp; + } + } + + // Apply rounding + w+=5.0l/pow(10.0l, digits); + if(w>10) + { + // Rounding bumped us to the next exponent, deal with it + w/=10; + if(mode==Fmt::AUTOFLT && exp+1==static_cast(digits)) + { + point=1; + showexp=true; + } + if(!showexp) + { + ++digits; + ++point; + } + else + ++exp; + } + + // Create a buffer and start from the end + unsigned size=max(f.get_width(), digits+8); + char *buf=new char[size]; + char *end=buf+size; + char *ptr=end; + + // Format exponent + if(showexp) + { + ptr=int_to_str(exp, Fmt().showpos().fill('0').width(3), ptr); + *--ptr=(f.get_uppercase() ? 'E' : 'e'); + } + + // Format mantissa left-to-right + char *eptr=ptr; + ptr-=digits+(point(i)>=-exp) + { + int digit=static_cast(w); + *mptr++='0'+digit; + w=(w-digit)*10; + } + else + *mptr++='0'; + } + + if(f.get_showpoint()) + { + // Radix point requested but not displayed yet, add it + if(digits<=point) + *mptr++='.'; + } + else if(mode==Fmt::AUTOFLT && digits>point) + { + // Remove trailing zeroes from fraction and a lone radix point + while(mptr[-1]=='0') + --mptr; + if(mptr[-1]=='.') + --mptr; + if(mptr!=eptr) + { + while(mptr!=ptr) + *--eptr=*--mptr; + ptr=eptr; + } + } + + // Add filling and sign + if(f.get_fill()=='0') + { + unsigned pfxlen=(sign!=0); + while(end-ptr+pfxlen +T str_to_flt(const string &s, const Fmt &) +{ + if(s.empty()) + throw LexicalError("Empty input in floating-point conversion"); + + std::string::const_iterator i=s.begin(); + + // See if the input starts with a sign + bool neg=false; + if(*i=='-') + { + neg=true; + ++i; + } + else if(*i=='+') + ++i; + + // Must have some digits to convert + if(i==s.end()) + throw LexicalError("Missing digits in floating-point conversion"); + + long double v=0; + int exp=0; + + // Parse mantissa + bool point_seen=false; + for(; i!=s.end(); ++i) + { + if(*i=='.') + { + if(point_seen) + throw LexicalError("Extra point in floating-point conversion"); + point_seen=true; + } + else if(*i>='0' && *i<='9') + { + v=v*10+(*i-'0'); + if(point_seen) + --exp; + } + else if(*i=='e' || *i=='E') + { + // We have an exponent + ++i; + + exp+=str_to_int(string(i, s.end()), Fmt()); + // str_to_int has eaten the rest of the input or thrown + break; + } + else + throw LexicalError("Invalid digit in floating-point conversion"); + } + + // Scale and negate the result as needed + while(exp>0) + { + v*=10; + --exp; + } + while(exp<0) + { + v/=10; + ++exp; + } + + if(neg) + v=-v; + + return v; +} + + +/*** String conversions ***/ + +string str_to_str(const string &s, const Fmt &f) +{ + if(f.get_type()==Fmt::NUM) + throw LexicalError("Numeric format in string conversion"); + return s; +} + +} + +namespace Msp { + +void LexicalConverter::result(const string &s) +{ + if(s.size()(p), c.get_fmt())); } + + +/*** operator>> ***/ + +void operator>>(const LexicalConverter &c, char &v) +{ + if(c.get_fmt().get_type()==Fmt::NUM) + v=str_to_int(c.get(), c.get_fmt()); + else + { + const std::string &s=c.get(); + if(s.empty()) + throw LexicalError("Empty input in character conversion"); + if(s.size()>1) + throw LexicalError("Extra input in character conversion"); + v=s[0]; + } +} + +void operator>>(const LexicalConverter &c, signed char &v) +{ v=str_to_int(c.get(), c.get_fmt()); } + +void operator>>(const LexicalConverter &c, short &v) +{ v=str_to_int(c.get(), c.get_fmt()); } + +void operator>>(const LexicalConverter &c, int &v) +{ v=str_to_int(c.get(), c.get_fmt()); } + +void operator>>(const LexicalConverter &c, long &v) +{ v=str_to_int(c.get(), c.get_fmt()); } + +void operator>>(const LexicalConverter &c, unsigned char &v) +{ v=str_to_int(c.get(), c.get_fmt()); } + +void operator>>(const LexicalConverter &c, unsigned short &v) +{ v=str_to_int(c.get(), c.get_fmt()); } + +void operator>>(const LexicalConverter &c, unsigned int &v) +{ v=str_to_int(c.get(), c.get_fmt()); } + +void operator>>(const LexicalConverter &c, unsigned long &v) +{ v=str_to_int(c.get(), c.get_fmt()); } + +#ifdef __GNUC__ +void operator>>(const LexicalConverter &c, long long &v) +{ v=str_to_int(c.get(), c.get_fmt()); } + +void operator>>(const LexicalConverter &c, unsigned long long &v) +{ v=str_to_int(c.get(), c.get_fmt()); } +#endif + +void operator>>(const LexicalConverter &c, bool &v) +{ v=str_to_bool(c.get()); } + +void operator>>(const LexicalConverter &c, float &v) +{ v=str_to_flt(c.get(), c.get_fmt()); } + +void operator>>(const LexicalConverter &c, double &v) +{ v=str_to_flt(c.get(), c.get_fmt()); } + +void operator>>(const LexicalConverter &c, long double &v) +{ v=str_to_flt(c.get(), c.get_fmt()); } + +void operator>>(const LexicalConverter &c, string &s) +{ s=str_to_str(c.get(), c.get_fmt()); } + +} // namespace Msp diff --git a/source/lexicalcast.h b/source/lexicalcast.h index a2bb439..42393fe 100644 --- a/source/lexicalcast.h +++ b/source/lexicalcast.h @@ -1,7 +1,7 @@ /* $Id$ This file is part of libmspstrings -Copyright © 2006-2007 Mikko Rasa +Copyright © 2006-2008 Mikko Rasa Distributed under the LGPL */ @@ -15,39 +15,110 @@ Distributed under the LGPL namespace Msp { +/** +Thrown for errors in lexical conversions +*/ class LexicalError: public Exception { public: LexicalError(const std::string &w_): Exception(w_) { } }; -template -T lexical_cast(const std::string &s) +/** +Helper class for lexical_cast to facilitate operator overloading. +*/ +class LexicalConverter { - std::istringstream ss(s); - ss.setf(std::ios_base::fmtflags(0), std::ios_base::skipws); +private: + Fmt fmt; + std::string buf; + +public: + LexicalConverter(const Fmt &f): fmt(f) { } + LexicalConverter(const std::string &s, const Fmt &f): fmt(f), buf(s) { } + + const Fmt &get_fmt() const { return fmt; } + const std::string &get() const { return buf; } + void result(const std::string &); +}; + +void operator<<(LexicalConverter &, char); +void operator<<(LexicalConverter &, signed char); +void operator<<(LexicalConverter &, short); +void operator<<(LexicalConverter &, int); +void operator<<(LexicalConverter &, long); +void operator<<(LexicalConverter &, unsigned char); +void operator<<(LexicalConverter &, unsigned short); +void operator<<(LexicalConverter &, unsigned); +void operator<<(LexicalConverter &, unsigned long); +#ifdef __GNUC__ +void operator<<(LexicalConverter &, long long); +void operator<<(LexicalConverter &, unsigned long long); +#endif +void operator<<(LexicalConverter &, bool); +void operator<<(LexicalConverter &, float); +void operator<<(LexicalConverter &, double); +void operator<<(LexicalConverter &, long double); +void operator<<(LexicalConverter &, const std::string &); +void operator<<(LexicalConverter &, const char *); +void operator<<(LexicalConverter &, const void *); + +void operator>>(const LexicalConverter &, char &); +void operator>>(const LexicalConverter &, signed char &); +void operator>>(const LexicalConverter &, short &); +void operator>>(const LexicalConverter &, int &); +void operator>>(const LexicalConverter &, long &); +void operator>>(const LexicalConverter &, unsigned char &); +void operator>>(const LexicalConverter &, unsigned short &); +void operator>>(const LexicalConverter &, unsigned int &); +void operator>>(const LexicalConverter &, unsigned long &); +#ifdef __GNUC__ +void operator>>(const LexicalConverter &, long long &); +void operator>>(const LexicalConverter &, unsigned long long &); +#endif +void operator>>(const LexicalConverter &, bool &); +void operator>>(const LexicalConverter &, float &); +void operator>>(const LexicalConverter &, double &); +void operator>>(const LexicalConverter &, long double &); +void operator>>(const LexicalConverter &, std::string &); - T tmp; - ss>>tmp; +// Generic operators using stringstream +template +void operator<<(LexicalConverter &c, const T &v) +{ + std::ostringstream ss; + ss< +void operator>>(const LexicalConverter &c, T &v) +{ + std::istringstream ss(c.get()); + ss.setf(std::ios_base::fmtflags(0), std::ios_base::skipws); + ss>>v; if(ss.fail() || !ss.eof()) throw LexicalError("Conversion failure"); - - return tmp; } -template<> -inline std::string lexical_cast(const std::string &s) +// The main interface to the lexical conversion machinery + +template +inline T lexical_cast(const std::string &s, const Fmt &f=Fmt()) { - return s; + LexicalConverter conv(s, f); + T result; + conv>>result; + return result; } template -std::string lexical_cast(const T &v, const Fmt &f=Fmt()) +inline std::string lexical_cast(const T &v, const Fmt &f=Fmt()) { - std::ostringstream ss; - ss<