source/strings/lexicalcast.cpp

   1 #include <cmath>
   2 #include <limits>
   3 #include <msp/core/inttypes.h>
   4 #include "format.h"
   5 #include "lexicalcast.h"
   6
   7 using namespace std;
   8
   9 namespace {
  10
  11 using namespace Msp;
  12
  13 /* Helper to avoid warnings about an unsigned type never being < 0 */
  14 template<typename T, bool f = is_signed<T>::value>
  15 struct IsNegative
  16 { static bool eval(T v) { return v<0; } };
  17
  18 template<typename T>
  19 struct IsNegative<T, false>
  20 { static bool eval(T) { return false; } };
  21
  22 /* Helper to avoid errors about ambiguous function calls since there are no
  23 overloads of abs for unsigned types */
  24 template<typename T, bool f = is_signed<T>::value>
  25 struct Absolute
  26 { static T eval(T v) { return v<0 ? -v : v; } };
  27
  28 template<typename T>
  29 struct Absolute<T, false>
  30 { static T eval(T v) { return v; } };
  31
  32
  33 /*** Integer conversions ***/
  34
  35 const char udigits[] = "0123456789ABCDEF";
  36 const char ldigits[] = "0123456789abcdef";
  37
  38 template<typename T>
  39 char *int_to_str(T v, const Fmt &f, char *end)
  40 {
  41         if(f.get_type()==Fmt::CHAR)
  42         {
  43                 *--end = v;
  44                 return end;
  45         }
  46
  47         char *ptr = end;
  48
  49         // Find out the base to use
  50         unsigned base = f.get_base();
  51         if(!base)
  52                 base = 10;
  53
  54         // Format the number, starting from the least significant digit
  55         const char *digits = (f.get_uppercase() ? udigits : ldigits);
  56         if(v)
  57         {
  58                 typename std::make_unsigned<T>::type w = Absolute<T>::eval(v);
  59                 while(w)
  60                 {
  61                         *--ptr = digits[w%base];
  62                         w /= base;
  63                 }
  64         }
  65         else
  66                 *--ptr = digits[0];
  67
  68         char sign = (IsNegative<T>::eval(v) ? '-' : f.get_showpos() ? '+' : 0);
  69         if(f.get_fill()=='0')
  70         {
  71                 /* Zero-fill, taking base/sign size into account.  The expression is a
  72                 bit ugly, but saves having to write code for creating the prefix both
  73                 ways. */
  74                 unsigned pfxsize = ((f.get_showbase() && base!=10) ? base==8 ? 1 : 2 : 0) + (sign!=0);
  75                 for(unsigned i=(end-ptr)+pfxsize; i<f.get_width(); ++i)
  76                         *--ptr = '0';
  77         }
  78
  79         if(f.get_showbase() && v!=0)
  80         {
  81                 // Add base indicator
  82                 if(base==2)
  83                         *--ptr = (f.get_uppercase() ? 'B' : 'b');
  84                 else if(base==16)
  85                         *--ptr = (f.get_uppercase() ? 'X' : 'x');
  86                 if(base!=10)
  87                         *--ptr = '0';
  88         }
  89
  90         if(sign)
  91                 *--ptr = sign;
  92
  93         return ptr;
  94 }
  95
  96 template<typename T>
  97 string int_to_str(T v, const Fmt &f)
  98 {
  99         unsigned size = max(f.get_width(), max<unsigned>(f.get_precision(), sizeof(T)*8+3));
 100         char *buf = new char[size];
 101         string result(int_to_str(v, f, buf+size), buf+size);
 102         delete[] buf;
 103         return result;
 104 }
 105
 106 template<typename T>
 107 T str_to_int(const string &s, const Fmt &f)
 108 {
 109         if(s.empty())
 110                 throw lexical_error("conversion of '' to integer");
 111
 112         auto i = s.begin();
 113
 114         // See if the input starts with a sign
 115         bool neg = false;
 116         if(*i=='-')
 117         {
 118                 if(is_unsigned<T>::value)
 119                         throw lexical_error(format("conversion of '%s' to unsigned integer", s));
 120                 neg = true;
 121                 ++i;
 122         }
 123         else if(*i=='+')
 124                 ++i;
 125
 126         // Must have some digits to convert
 127         if(i==s.end())
 128                 throw lexical_error(format("conversion of '%s' to integer", s));
 129
 130         T base = f.get_base();
 131         if(!base && i!=s.end())
 132         {
 133                 // Automatic base detection requested, figure it out
 134                 if(*i=='0' && ++i!=s.end())
 135                 {
 136                         if(*i=='x' || *i=='X')
 137                         {
 138                                 base = 16;
 139                                 ++i;
 140                         }
 141                         else if(*i=='b' || *i=='B')
 142                         {
 143                                 base = 2;
 144                                 ++i;
 145                         }
 146                         else
 147                                 base = 8;
 148                 }
 149                 else
 150                         base = 10;
 151         }
 152
 153         // Parse the digits
 154         T result = 0;
 155         for(; i!=s.end(); ++i)
 156         {
 157                 T digit = base;
 158                 if(*i>='0' && *i<='9')
 159                         digit = *i-'0';
 160                 else if(*i>='A' && *i<='F')
 161                         digit = *i-'A'+10;
 162                 else if(*i>='a' && *i<='f')
 163                         digit = *i-'a'+10;
 164                 if(digit>=base)
 165                         throw lexical_error(format("conversion of '%s' to integer (base-%d)", s, base));
 166                 T next = result*base+digit;
 167                 if(next/base!=result)
 168                         throw lexical_error(format("conversion of '%s' to %d-bit integer", s, sizeof(T)*8));
 169                 result = next;
 170         }
 171
 172         if(neg)
 173                 result = -result;
 174
 175         return result;
 176 }
 177
 178
 179 /*** Boolean conversions ***/
 180
 181 string bool_to_str(bool b, const Fmt &f)
 182 {
 183         if(f.get_type()==Fmt::STR)
 184                 return b ? "true" : "false";
 185         else
 186                 return b ? "1" : "0";
 187 }
 188
 189 bool str_to_bool(const string &s)
 190 {
 191         if(s.empty())
 192                 throw lexical_error("conversion of '' to boolean");
 193
 194         if(s=="1" || s=="true" || s=="yes" || s=="on")
 195                 return true;
 196         else if(s=="0" || s=="false" || s=="no" || s=="off")
 197                 return false;
 198
 199         throw lexical_error(format("conversion of '%s' to boolean", s));
 200 }
 201
 202
 203 /*** Floating-point conversions ***/
 204
 205 template<typename T>
 206 string flt_to_str(T v, const Fmt &f)
 207 {
 208         if(f.get_type()==Fmt::CHAR)
 209                 throw format_mismatch("floating-point conversion with character format");
 210
 211         Fmt::FloatMode mode = f.get_floatmode();
 212         long double w = abs(v);
 213         char sign = (v<0 ? '-' : f.get_showpos() ? '+' : 0);
 214
 215         // Handle infinity and not-a-number as special cases
 216         if(!(w+w>w) && w!=0)
 217         {
 218                 string result;
 219                 if(sign)
 220                         result += sign;
 221                 if(!(w>=0))
 222                         result += (f.get_uppercase() ? "NAN" : "nan");
 223                 else
 224                         result += (f.get_uppercase() ? "INF" : "inf");
 225                 if(result.size()<f.get_width())
 226                         result = string(f.get_width()-result.size(), ' ')+result;
 227                 return result;
 228         }
 229
 230         /* Find out the base-10 exponent.  Building up the multiplier / divisor
 231         first helps with accuracy in some cases. */
 232         int exp = 0;
 233         if(w>=10)
 234         {
 235                 long double div = 1;
 236                 while(div*10<=w)
 237                 {
 238                         ++exp;
 239                         div *= 10;
 240                 }
 241                 w /= div;
 242         }
 243         else if(mode!=Fmt::FIXED && w<1 && w!=0)
 244         {
 245                 long double mul = 1;
 246                 while(w*mul<1)
 247                 {
 248                         --exp;
 249                         mul *= 10;
 250                 }
 251                 w *= mul;
 252         }
 253
 254         // Decide how to format the number
 255         unsigned digits;
 256         unsigned leading_zeroes = 0;
 257         unsigned point = 1;
 258         bool showexp = false;
 259         if(mode==Fmt::FIXED)
 260         {
 261                 point = exp+1;
 262                 digits = point+f.get_precision();
 263         }
 264         else if(mode==Fmt::SCI)
 265         {
 266                 digits = f.get_precision()+1;
 267                 showexp = true;
 268         }
 269         else
 270         {
 271                 digits = max(f.get_precision(), 1U);
 272                 if(exp<-4 || exp>=static_cast<int>(digits))
 273                 {
 274                         point = 1;
 275                         showexp = true;
 276                 }
 277                 else
 278                 {
 279                         if(exp<0)
 280                                 leading_zeroes = -exp;
 281                         else
 282                                 point = exp+1;
 283                 }
 284         }
 285
 286         // Apply rounding
 287         w += 5.0l/pow(10.0l, static_cast<long double>(digits));
 288         if(w>=10)
 289         {
 290                 // Rounding bumped us to the next exponent, deal with it
 291                 w /= 10;
 292                 if(mode==Fmt::AUTOFLT && exp+1==static_cast<int>(digits))
 293                 {
 294                         point = 1;
 295                         showexp = true;
 296                 }
 297                 if(!showexp)
 298                 {
 299                         if(mode==Fmt::FIXED)
 300                                 ++digits;
 301                         if(leading_zeroes)
 302                                 --leading_zeroes;
 303                         else
 304                                 ++point;
 305                 }
 306                 else
 307                         ++exp;
 308         }
 309
 310         digits += leading_zeroes;
 311
 312         // Create a buffer and start from the end
 313         unsigned size = max(f.get_width(), digits+8);
 314         char *buf = new char[size];
 315         char *end = buf+size;
 316         char *ptr = end;
 317
 318         // Format exponent
 319         if(showexp)
 320         {
 321                 ptr = int_to_str(exp, Fmt().showpos().fill('0').width(3), ptr);
 322                 *--ptr = (f.get_uppercase() ? 'E' : 'e');
 323         }
 324
 325         // Format mantissa left-to-right
 326         char *eptr = ptr;
 327         ptr -= digits+(point<digits || f.get_showpoint());
 328         char *mptr = ptr;
 329         for(unsigned i=0; i<digits; ++i)
 330         {
 331                 if(i==point)
 332                         *mptr++ = '.';
 333                 if(!leading_zeroes)
 334                 {
 335                         int digit = static_cast<int>(w);
 336                         *mptr++ = '0'+digit;
 337                         w = (w-digit)*10;
 338                 }
 339                 else
 340                 {
 341                         *mptr++ = '0';
 342                         --leading_zeroes;
 343                 }
 344         }
 345
 346         if(f.get_showpoint())
 347         {
 348                 // Radix point requested but not displayed yet, add it
 349                 if(digits<=point)
 350                         *mptr++ = '.';
 351         }
 352         else if(mode==Fmt::AUTOFLT && digits>point)
 353         {
 354                 // Remove trailing zeroes from fraction and a lone radix point
 355                 while(mptr[-1]=='0')
 356                         --mptr;
 357                 if(mptr[-1]=='.')
 358                         --mptr;
 359                 if(mptr!=eptr)
 360                 {
 361                         while(mptr!=ptr)
 362                                 *--eptr = *--mptr;
 363                         ptr = eptr;
 364                 }
 365         }
 366
 367         // Add filling and sign
 368         if(f.get_fill()=='0')
 369         {
 370                 unsigned pfxlen = (sign!=0);
 371                 while(end-ptr+pfxlen<f.get_width())
 372                         *--ptr = '0';
 373         }
 374         if(sign)
 375                 *--ptr = sign;
 376
 377         string result(ptr, end);
 378         delete[] buf;
 379         return result;
 380 }
 381
 382 template<typename T>
 383 T str_to_flt(const string &s, const Fmt &)
 384 {
 385         if(s.empty())
 386                 throw lexical_error("conversion of '' to floating-point");
 387
 388         auto i = s.begin();
 389
 390         // See if the input starts with a sign
 391         bool neg = false;
 392         if(*i=='-')
 393         {
 394                 neg = true;
 395                 ++i;
 396         }
 397         else if(*i=='+')
 398                 ++i;
 399
 400         // Must have some digits to convert
 401         if(i==s.end())
 402                 throw lexical_error(format("conversion of '%s' to floating-point", s));
 403
 404         long double v = 0;
 405         int exp = 0;
 406
 407         // Parse mantissa
 408         bool point_seen = false;
 409         for(; i!=s.end(); ++i)
 410         {
 411                 if(*i=='.')
 412                 {
 413                         if(point_seen)
 414                                 throw lexical_error(format("conversion of '%s' to floating-point", s));
 415                         point_seen = true;
 416                 }
 417                 else if(*i>='0' && *i<='9')
 418                 {
 419                         v = v*10+(*i-'0');
 420                         if(point_seen)
 421                                 --exp;
 422                 }
 423                 else if(*i=='e' || *i=='E')
 424                 {
 425                         // We have an exponent
 426                         ++i;
 427
 428                         exp += str_to_int<int>(string(i, s.end()), Fmt());
 429                         // str_to_int has eaten the rest of the input or thrown
 430                         break;
 431                 }
 432                 else
 433                         throw lexical_error(format("conversion of '%s' to floating-point", s));
 434         }
 435
 436         // Scale and negate the result as needed
 437         while(exp>0)
 438         {
 439                 v *= 10;
 440                 --exp;
 441         }
 442         while(exp<0)
 443         {
 444                 v /= 10;
 445                 ++exp;
 446         }
 447
 448         if(neg)
 449                 v = -v;
 450
 451         return v;
 452 }
 453
 454
 455 /*** String conversions ***/
 456
 457 string str_to_str(const string &s, const Fmt &f)
 458 {
 459         if(f.get_type()==Fmt::NUM)
 460                 throw format_mismatch("string conversion with numeric format");
 461         return s;
 462 }
 463
 464 }
 465
 466 namespace Msp {
 467
 468 void LexicalConverter::result(const string &s)
 469 {
 470         filled = true;
 471         if(s.size()<fmt.get_width())
 472         {
 473                 if(fmt.get_align()==Fmt::RIGHT)
 474                         buf = string(fmt.get_width()-s.size(), fmt.get_fill())+s;
 475                 else
 476                         buf = s+string(fmt.get_width()-s.size(), fmt.get_fill());
 477         }
 478         else
 479                 buf = s;
 480 }
 481
 482 const string &LexicalConverter::get() const
 483 {
 484         if(!filled)
 485                 throw lexical_error("conversion not performed");
 486         return buf;
 487 }
 488
 489
 490 /*** operator<< ***/
 491
 492 void operator<<(LexicalConverter &c, char v)
 493 {
 494         Fmt::Type type = c.get_fmt().get_type();
 495         if(type==Fmt::NUM)
 496                 c.result(int_to_str(v, c.get_fmt()));
 497         else
 498                 c.result(string(1, v));
 499 }
 500
 501 void operator<<(LexicalConverter &c, signed char v)
 502 { c.result(int_to_str(v, c.get_fmt())); }
 503
 504 void operator<<(LexicalConverter &c, short v)
 505 { c.result(int_to_str(v, c.get_fmt())); }
 506
 507 void operator<<(LexicalConverter &c, int v)
 508 { c.result(int_to_str(v, c.get_fmt())); }
 509
 510 void operator<<(LexicalConverter &c, long v)
 511 { c.result(int_to_str(v, c.get_fmt())); }
 512
 513 void operator<<(LexicalConverter &c, unsigned char v)
 514 { c.result(int_to_str(v, c.get_fmt())); }
 515
 516 void operator<<(LexicalConverter &c, unsigned short v)
 517 { c.result(int_to_str(v, c.get_fmt())); }
 518
 519 void operator<<(LexicalConverter &c, unsigned v)
 520 { c.result(int_to_str(v, c.get_fmt())); }
 521
 522 void operator<<(LexicalConverter &c, unsigned long v)
 523 { c.result(int_to_str(v, c.get_fmt())); }
 524
 525 #ifdef __GNUC__
 526 void operator<<(LexicalConverter &c, long long v)
 527 { c.result(int_to_str(v, c.get_fmt())); }
 528
 529 void operator<<(LexicalConverter &c, unsigned long long v)
 530 { c.result(int_to_str(v, c.get_fmt())); }
 531 #endif
 532
 533 void operator<<(LexicalConverter &c, bool v)
 534 { c.result(bool_to_str(v, c.get_fmt())); }
 535
 536 void operator<<(LexicalConverter &c, float v)
 537 { c.result(flt_to_str(v, c.get_fmt())); }
 538
 539 void operator<<(LexicalConverter &c, double v)
 540 { c.result(flt_to_str(v, c.get_fmt())); }
 541
 542 void operator<<(LexicalConverter &c, long double v)
 543 { c.result(flt_to_str(v, c.get_fmt())); }
 544
 545 void operator<<(LexicalConverter &c, const string &s)
 546 { c.result(str_to_str(s, c.get_fmt())); }
 547
 548 void operator<<(LexicalConverter &c, const char *s)
 549 { c.result(str_to_str(s, c.get_fmt())); }
 550
 551 void operator<<(LexicalConverter &c, const void *p)
 552 { c.result(int_to_str(reinterpret_cast<IntPtr>(p), c.get_fmt())); }
 553
 554
 555 /*** operator>> ***/
 556
 557 void operator>>(const LexicalConverter &c, char &v)
 558 {
 559         if(c.get_fmt().get_type()==Fmt::NUM)
 560                 v = str_to_int<char>(c.get(), c.get_fmt());
 561         else
 562         {
 563                 const string &s = c.get();
 564                 if(s.empty())
 565                         throw lexical_error("conversion of '' to character");
 566                 if(s.size()>1)
 567                         throw lexical_error(format("conversion of '%s' to character", s));
 568                 v = s[0];
 569         }
 570 }
 571
 572 void operator>>(const LexicalConverter &c, signed char &v)
 573 { v = str_to_int<signed char>(c.get(), c.get_fmt()); }
 574
 575 void operator>>(const LexicalConverter &c, short &v)
 576 { v = str_to_int<short>(c.get(), c.get_fmt()); }
 577
 578 void operator>>(const LexicalConverter &c, int &v)
 579 { v = str_to_int<int>(c.get(), c.get_fmt()); }
 580
 581 void operator>>(const LexicalConverter &c, long &v)
 582 { v = str_to_int<long>(c.get(), c.get_fmt()); }
 583
 584 void operator>>(const LexicalConverter &c, unsigned char &v)
 585 { v = str_to_int<unsigned char>(c.get(), c.get_fmt()); }
 586
 587 void operator>>(const LexicalConverter &c, unsigned short &v)
 588 { v = str_to_int<unsigned short>(c.get(), c.get_fmt()); }
 589
 590 void operator>>(const LexicalConverter &c, unsigned int &v)
 591 { v = str_to_int<unsigned int>(c.get(), c.get_fmt()); }
 592
 593 void operator>>(const LexicalConverter &c, unsigned long &v)
 594 { v = str_to_int<unsigned long>(c.get(), c.get_fmt()); }
 595
 596 #ifdef __GNUC__
 597 void operator>>(const LexicalConverter &c, long long &v)
 598 { v = str_to_int<long long>(c.get(), c.get_fmt()); }
 599
 600 void operator>>(const LexicalConverter &c, unsigned long long &v)
 601 { v = str_to_int<unsigned long long>(c.get(), c.get_fmt()); }
 602 #endif
 603
 604 void operator>>(const LexicalConverter &c, bool &v)
 605 { v = str_to_bool(c.get()); }
 606
 607 void operator>>(const LexicalConverter &c, float &v)
 608 { v = str_to_flt<float>(c.get(), c.get_fmt()); }
 609
 610 void operator>>(const LexicalConverter &c, double &v)
 611 { v = str_to_flt<double>(c.get(), c.get_fmt()); }
 612
 613 void operator>>(const LexicalConverter &c, long double &v)
 614 { v = str_to_flt<long double>(c.get(), c.get_fmt()); }
 615
 616 void operator>>(const LexicalConverter &c, string &s)
 617 { s = str_to_str(c.get(), c.get_fmt()); }
 618
 619 } // namespace Msp