source/strings/lexicalcast.cpp

   1 #include <cmath>
   2 #include <limits>
   3 #include <msp/core/inttypes.h>
   4 #include "format.h"
   5 #include "lexicalcast.h"
   6
   7 using namespace std;
   8
   9 namespace {
  10
  11 using namespace Msp;
  12
  13 template<typename T>
  14 struct IsSigned
  15 { enum { result = !(static_cast<T>(-1)>0) }; };
  16
  17 template<typename T, bool f = (sizeof(T)>sizeof(unsigned long))>
  18 struct Temporary
  19 { typedef unsigned long Type; };
  20
  21 template<typename T>
  22 struct Temporary<T, true>
  23 {
  24         typedef UInt64 Type;
  25 };
  26
  27 /* Helper to avoid warnings about an unsigned type never being < 0 */
  28 template<typename T, bool f = IsSigned<T>::result>
  29 struct IsNegative
  30 { static bool eval(T v) { return v<0; } };
  31
  32 template<typename T>
  33 struct IsNegative<T, false>
  34 { static bool eval(T) { return false; } };
  35
  36 /* Helper to avoid errors about ambiguous function calls since there are no
  37 overloads of abs for unsigned types */
  38 template<typename T, bool f = IsSigned<T>::result>
  39 struct Absolute
  40 { static T eval(T v) { return v<0 ? -v : v; } };
  41
  42 template<typename T>
  43 struct Absolute<T, false>
  44 { static T eval(T v) { return v; } };
  45
  46
  47 /*** Integer conversions ***/
  48
  49 const char udigits[] = "0123456789ABCDEF";
  50 const char ldigits[] = "0123456789abcdef";
  51
  52 template<typename T>
  53 char *int_to_str(T v, const Fmt &f, char *end)
  54 {
  55         if(f.get_type()==Fmt::CHAR)
  56         {
  57                 *--end = v;
  58                 return end;
  59         }
  60
  61         char *ptr = end;
  62
  63         // Find out the base to use
  64         unsigned base = f.get_base();
  65         if(!base)
  66                 base = 10;
  67
  68         // Format the number, starting from the least significant digit
  69         const char *digits = (f.get_uppercase() ? udigits : ldigits);
  70         if(v)
  71         {
  72                 typename Temporary<T>::Type w = Absolute<T>::eval(v);
  73                 while(w)
  74                 {
  75                         *--ptr = digits[w%base];
  76                         w /= base;
  77                 }
  78         }
  79         else
  80                 *--ptr = digits[0];
  81
  82         char sign = (IsNegative<T>::eval(v) ? '-' : f.get_showpos() ? '+' : 0);
  83         if(f.get_fill()=='0')
  84         {
  85                 /* Zero-fill, taking base/sign size into account.  The expression is a
  86                 bit ugly, but saves having to write code for creating the prefix both
  87                 ways. */
  88                 unsigned pfxsize = ((f.get_showbase() && base!=10) ? base==8 ? 1 : 2 : 0) + (sign!=0);
  89                 for(unsigned i=(end-ptr)+pfxsize; i<f.get_width(); ++i)
  90                         *--ptr = '0';
  91         }
  92
  93         if(f.get_showbase() && v!=0)
  94         {
  95                 // Add base indicator
  96                 if(base==2)
  97                         *--ptr = (f.get_uppercase() ? 'B' : 'b');
  98                 else if(base==16)
  99                         *--ptr = (f.get_uppercase() ? 'X' : 'x');
 100                 if(base!=10)
 101                         *--ptr = '0';
 102         }
 103
 104         if(sign)
 105                 *--ptr = sign;
 106
 107         return ptr;
 108 }
 109
 110 template<typename T>
 111 string int_to_str(T v, const Fmt &f)
 112 {
 113         unsigned size = max(f.get_width(), max<unsigned>(f.get_precision(), sizeof(T)*8+3));
 114         char *buf = new char[size];
 115         string result(int_to_str(v, f, buf+size), buf+size);
 116         delete[] buf;
 117         return result;
 118 }
 119
 120 template<typename T>
 121 T str_to_int(const std::string &s, const Fmt &f)
 122 {
 123         if(s.empty())
 124                 throw lexical_error("conversion of '' to integer");
 125
 126         std::string::const_iterator i = s.begin();
 127
 128         // See if the input starts with a sign
 129         bool neg = false;
 130         if(*i=='-')
 131         {
 132                 if(!IsSigned<T>::result)
 133                         throw lexical_error(format("conversion of '%s' to unsigned integer", s));
 134                 neg = true;
 135                 ++i;
 136         }
 137         else if(*i=='+')
 138                 ++i;
 139
 140         // Must have some digits to convert
 141         if(i==s.end())
 142                 throw lexical_error(format("conversion of '%s' to integer", s));
 143
 144         T base = f.get_base();
 145         if(!base && i!=s.end())
 146         {
 147                 // Automatic base detection requested, figure it out
 148                 if(*i=='0' && ++i!=s.end())
 149                 {
 150                         if(*i=='x' || *i=='X')
 151                         {
 152                                 base = 16;
 153                                 ++i;
 154                         }
 155                         else if(*i=='b' || *i=='B')
 156                         {
 157                                 base = 2;
 158                                 ++i;
 159                         }
 160                         else
 161                                 base = 8;
 162                 }
 163                 else
 164                         base = 10;
 165         }
 166
 167         // Parse the digits
 168         T result = 0;
 169         for(; i!=s.end(); ++i)
 170         {
 171                 T digit = base;
 172                 if(*i>='0' && *i<='9')
 173                         digit = *i-'0';
 174                 else if(*i>='A' && *i<='F')
 175                         digit = *i-'A'+10;
 176                 else if(*i>='a' && *i<='f')
 177                         digit = *i-'a'+10;
 178                 if(digit>=base)
 179                         throw lexical_error(format("conversion of '%s' to integer (base-%d)", s, base));
 180                 T next = result*base+digit;
 181                 if(next/base!=result)
 182                         throw lexical_error(format("conversion of '%s' to %d-bit integer", s, sizeof(T)*8));
 183                 result = next;
 184         }
 185
 186         if(neg)
 187                 result = -result;
 188
 189         return result;
 190 }
 191
 192
 193 /*** Boolean conversions ***/
 194
 195 string bool_to_str(bool b, const Fmt &f)
 196 {
 197         if(f.get_type()==Fmt::STR)
 198                 return b ? "true" : "false";
 199         else
 200                 return b ? "1" : "0";
 201 }
 202
 203 bool str_to_bool(const string &s)
 204 {
 205         if(s.empty())
 206                 throw lexical_error("conversion of '' to boolean");
 207
 208         if(s=="1" || s=="true" || s=="yes" || s=="on")
 209                 return true;
 210         else if(s=="0" || s=="false" || s=="no" || s=="off")
 211                 return false;
 212
 213         throw lexical_error(format("conversion of '%s' to boolean", s));
 214 }
 215
 216
 217 /*** Floating-point conversions ***/
 218
 219 template<typename T>
 220 string flt_to_str(T v, const Fmt &f)
 221 {
 222         if(f.get_type()==Fmt::CHAR)
 223                 throw format_mismatch("floating-point conversion with character format");
 224
 225         Fmt::FloatMode mode = f.get_floatmode();
 226         long double w = abs(v);
 227         char sign = (v<0 ? '-' : f.get_showpos() ? '+' : 0);
 228
 229         // Handle infinity and not-a-number as special cases
 230         if(!(w+w>w) && w!=0)
 231         {
 232                 string result;
 233                 if(sign)
 234                         result += sign;
 235                 if(!(w>=0))
 236                         result += (f.get_uppercase() ? "NAN" : "nan");
 237                 else
 238                         result += (f.get_uppercase() ? "INF" : "inf");
 239                 if(result.size()<f.get_width())
 240                         result = string(f.get_width()-result.size(), ' ')+result;
 241                 return result;
 242         }
 243
 244         /* Find out the base-10 exponent.  Building up the multiplier / divisor
 245         first helps with accuracy in some cases. */
 246         int exp = 0;
 247         if(w>=10)
 248         {
 249                 long double div = 1;
 250                 while(div*10<w)
 251                 {
 252                         ++exp;
 253                         div *= 10;
 254                 }
 255                 w /= div;
 256         }
 257         else if(mode!=Fmt::FIXED && w<1 && w!=0)
 258         {
 259                 long double mul = 1;
 260                 while(w*mul<1)
 261                 {
 262                         --exp;
 263                         mul *= 10;
 264                 }
 265                 w *= mul;
 266         }
 267
 268         // Decide how to format the number
 269         unsigned digits;
 270         unsigned point = 1;
 271         bool showexp = false;
 272         if(mode==Fmt::FIXED)
 273         {
 274                 point = exp+1;
 275                 digits = point+f.get_precision();
 276         }
 277         else if(mode==Fmt::SCI)
 278         {
 279                 digits = f.get_precision()+1;
 280                 showexp = true;
 281         }
 282         else
 283         {
 284                 digits = max(f.get_precision(), 1U);
 285                 if(exp<-4 || exp>=static_cast<int>(digits))
 286                 {
 287                         point = 1;
 288                         showexp = true;
 289                 }
 290                 else
 291                 {
 292                         point = max(exp, 0)+1;
 293                         if(exp<0)
 294                                 digits += -exp;
 295                 }
 296         }
 297
 298         // Apply rounding
 299         w += 5.0l/pow(10.0l, static_cast<long double>(digits));
 300         if(w>10)
 301         {
 302                 // Rounding bumped us to the next exponent, deal with it
 303                 w /= 10;
 304                 if(mode==Fmt::AUTOFLT && exp+1==static_cast<int>(digits))
 305                 {
 306                         point = 1;
 307                         showexp = true;
 308                 }
 309                 if(!showexp)
 310                 {
 311                         ++digits;
 312                         ++point;
 313                 }
 314                 else
 315                         ++exp;
 316         }
 317
 318         // Create a buffer and start from the end
 319         unsigned size = max(f.get_width(), digits+8);
 320         char *buf = new char[size];
 321         char *end = buf+size;
 322         char *ptr = end;
 323
 324         // Format exponent
 325         if(showexp)
 326         {
 327                 ptr = int_to_str(exp, Fmt().showpos().fill('0').width(3), ptr);
 328                 *--ptr = (f.get_uppercase() ? 'E' : 'e');
 329         }
 330
 331         // Format mantissa left-to-right
 332         char *eptr = ptr;
 333         ptr -= digits+(point<digits || f.get_showpoint());
 334         char *mptr = ptr;
 335         for(unsigned i=0; i<digits; ++i)
 336         {
 337                 if(i==point)
 338                         *mptr++ = '.';
 339                 if(showexp || static_cast<int>(i)>=-exp)
 340                 {
 341                         int digit = static_cast<int>(w);
 342                         *mptr++ = '0'+digit;
 343                         w = (w-digit)*10;
 344                 }
 345                 else
 346                         *mptr++ = '0';
 347         }
 348
 349         if(f.get_showpoint())
 350         {
 351                 // Radix point requested but not displayed yet, add it
 352                 if(digits<=point)
 353                         *mptr++ = '.';
 354         }
 355         else if(mode==Fmt::AUTOFLT && digits>point)
 356         {
 357                 // Remove trailing zeroes from fraction and a lone radix point
 358                 while(mptr[-1]=='0')
 359                         --mptr;
 360                 if(mptr[-1]=='.')
 361                         --mptr;
 362                 if(mptr!=eptr)
 363                 {
 364                         while(mptr!=ptr)
 365                                 *--eptr = *--mptr;
 366                         ptr = eptr;
 367                 }
 368         }
 369
 370         // Add filling and sign
 371         if(f.get_fill()=='0')
 372         {
 373                 unsigned pfxlen = (sign!=0);
 374                 while(end-ptr+pfxlen<f.get_width())
 375                         *--ptr = '0';
 376         }
 377         if(sign)
 378                 *--ptr = sign;
 379
 380         string result(ptr, end);
 381         delete[] buf;
 382         return result;
 383 }
 384
 385 template<typename T>
 386 T str_to_flt(const string &s, const Fmt &)
 387 {
 388         if(s.empty())
 389                 throw lexical_error("conversion of '' to floating-point");
 390
 391         std::string::const_iterator i = s.begin();
 392
 393         // See if the input starts with a sign
 394         bool neg = false;
 395         if(*i=='-')
 396         {
 397                 neg = true;
 398                 ++i;
 399         }
 400         else if(*i=='+')
 401                 ++i;
 402
 403         // Must have some digits to convert
 404         if(i==s.end())
 405                 throw lexical_error(format("conversion of '%s' to floating-point", s));
 406
 407         long double v = 0;
 408         int exp = 0;
 409
 410         // Parse mantissa
 411         bool point_seen = false;
 412         for(; i!=s.end(); ++i)
 413         {
 414                 if(*i=='.')
 415                 {
 416                         if(point_seen)
 417                                 throw lexical_error(format("conversion of '%s' to floating-point", s));
 418                         point_seen = true;
 419                 }
 420                 else if(*i>='0' && *i<='9')
 421                 {
 422                         v = v*10+(*i-'0');
 423                         if(point_seen)
 424                                 --exp;
 425                 }
 426                 else if(*i=='e' || *i=='E')
 427                 {
 428                         // We have an exponent
 429                         ++i;
 430
 431                         exp += str_to_int<int>(string(i, s.end()), Fmt());
 432                         // str_to_int has eaten the rest of the input or thrown
 433                         break;
 434                 }
 435                 else
 436                         throw lexical_error(format("conversion of '%s' to floating-point", s));
 437         }
 438
 439         // Scale and negate the result as needed
 440         while(exp>0)
 441         {
 442                 v *= 10;
 443                 --exp;
 444         }
 445         while(exp<0)
 446         {
 447                 v /= 10;
 448                 ++exp;
 449         }
 450
 451         if(neg)
 452                 v = -v;
 453
 454         return v;
 455 }
 456
 457
 458 /*** String conversions ***/
 459
 460 string str_to_str(const string &s, const Fmt &f)
 461 {
 462         if(f.get_type()==Fmt::NUM)
 463                 throw format_mismatch("string conversion with numeric format");
 464         return s;
 465 }
 466
 467 }
 468
 469 namespace Msp {
 470
 471 void LexicalConverter::result(const string &s)
 472 {
 473         if(s.size()<fmt.get_width())
 474         {
 475                 if(fmt.get_align()==Fmt::RIGHT)
 476                         buf = string(fmt.get_width()-s.size(), fmt.get_fill())+s;
 477                 else
 478                         buf = s+string(fmt.get_width()-s.size(), fmt.get_fill());
 479         }
 480         else
 481                 buf = s;
 482 }
 483
 484
 485 /*** operator<< ***/
 486
 487 void operator<<(LexicalConverter &c, char v)
 488 {
 489         Fmt::Type type = c.get_fmt().get_type();
 490         if(type==Fmt::NUM)
 491                 c.result(int_to_str(v, c.get_fmt()));
 492         else
 493                 c.result(string(1, v));
 494 }
 495
 496 void operator<<(LexicalConverter &c, signed char v)
 497 { c.result(int_to_str(v, c.get_fmt())); }
 498
 499 void operator<<(LexicalConverter &c, short v)
 500 { c.result(int_to_str(v, c.get_fmt())); }
 501
 502 void operator<<(LexicalConverter &c, int v)
 503 { c.result(int_to_str(v, c.get_fmt())); }
 504
 505 void operator<<(LexicalConverter &c, long v)
 506 { c.result(int_to_str(v, c.get_fmt())); }
 507
 508 void operator<<(LexicalConverter &c, unsigned char v)
 509 { c.result(int_to_str(v, c.get_fmt())); }
 510
 511 void operator<<(LexicalConverter &c, unsigned short v)
 512 { c.result(int_to_str(v, c.get_fmt())); }
 513
 514 void operator<<(LexicalConverter &c, unsigned v)
 515 { c.result(int_to_str(v, c.get_fmt())); }
 516
 517 void operator<<(LexicalConverter &c, unsigned long v)
 518 { c.result(int_to_str(v, c.get_fmt())); }
 519
 520 #ifdef __GNUC__
 521 void operator<<(LexicalConverter &c, long long v)
 522 { c.result(int_to_str(v, c.get_fmt())); }
 523
 524 void operator<<(LexicalConverter &c, unsigned long long v)
 525 { c.result(int_to_str(v, c.get_fmt())); }
 526 #endif
 527
 528 void operator<<(LexicalConverter &c, bool v)
 529 { c.result(bool_to_str(v, c.get_fmt())); }
 530
 531 void operator<<(LexicalConverter &c, float v)
 532 { c.result(flt_to_str(v, c.get_fmt())); }
 533
 534 void operator<<(LexicalConverter &c, double v)
 535 { c.result(flt_to_str(v, c.get_fmt())); }
 536
 537 void operator<<(LexicalConverter &c, long double v)
 538 { c.result(flt_to_str(v, c.get_fmt())); }
 539
 540 void operator<<(LexicalConverter &c, const string &s)
 541 { c.result(str_to_str(s, c.get_fmt())); }
 542
 543 void operator<<(LexicalConverter &c, const char *s)
 544 { c.result(str_to_str(s, c.get_fmt())); }
 545
 546 void operator<<(LexicalConverter &c, const void *p)
 547 { c.result(int_to_str(reinterpret_cast<unsigned long>(p), c.get_fmt())); }
 548
 549
 550 /*** operator>> ***/
 551
 552 void operator>>(const LexicalConverter &c, char &v)
 553 {
 554         if(c.get_fmt().get_type()==Fmt::NUM)
 555                 v = str_to_int<char>(c.get(), c.get_fmt());
 556         else
 557         {
 558                 const std::string &s = c.get();
 559                 if(s.empty())
 560                         throw lexical_error("conversion of '' to character");
 561                 if(s.size()>1)
 562                         throw lexical_error(format("conversion of '%s' to character", s));
 563                 v = s[0];
 564         }
 565 }
 566
 567 void operator>>(const LexicalConverter &c, signed char &v)
 568 { v = str_to_int<signed char>(c.get(), c.get_fmt()); }
 569
 570 void operator>>(const LexicalConverter &c, short &v)
 571 { v = str_to_int<short>(c.get(), c.get_fmt()); }
 572
 573 void operator>>(const LexicalConverter &c, int &v)
 574 { v = str_to_int<int>(c.get(), c.get_fmt()); }
 575
 576 void operator>>(const LexicalConverter &c, long &v)
 577 { v = str_to_int<long>(c.get(), c.get_fmt()); }
 578
 579 void operator>>(const LexicalConverter &c, unsigned char &v)
 580 { v = str_to_int<unsigned char>(c.get(), c.get_fmt()); }
 581
 582 void operator>>(const LexicalConverter &c, unsigned short &v)
 583 { v = str_to_int<unsigned short>(c.get(), c.get_fmt()); }
 584
 585 void operator>>(const LexicalConverter &c, unsigned int &v)
 586 { v = str_to_int<unsigned int>(c.get(), c.get_fmt()); }
 587
 588 void operator>>(const LexicalConverter &c, unsigned long &v)
 589 { v = str_to_int<unsigned long>(c.get(), c.get_fmt()); }
 590
 591 #ifdef __GNUC__
 592 void operator>>(const LexicalConverter &c, long long &v)
 593 { v = str_to_int<long long>(c.get(), c.get_fmt()); }
 594
 595 void operator>>(const LexicalConverter &c, unsigned long long &v)
 596 { v = str_to_int<unsigned long long>(c.get(), c.get_fmt()); }
 597 #endif
 598
 599 void operator>>(const LexicalConverter &c, bool &v)
 600 { v = str_to_bool(c.get()); }
 601
 602 void operator>>(const LexicalConverter &c, float &v)
 603 { v = str_to_flt<float>(c.get(), c.get_fmt()); }
 604
 605 void operator>>(const LexicalConverter &c, double &v)
 606 { v = str_to_flt<double>(c.get(), c.get_fmt()); }
 607
 608 void operator>>(const LexicalConverter &c, long double &v)
 609 { v = str_to_flt<long double>(c.get(), c.get_fmt()); }
 610
 611 void operator>>(const LexicalConverter &c, string &s)
 612 { s = str_to_str(c.get(), c.get_fmt()); }
 613
 614 } // namespace Msp