source/strings/lexicalcast.cpp

   1 #include <cmath>
   2 #include <cstdint>
   3 #include <limits>
   4 #include "format.h"
   5 #include "lexicalcast.h"
   6
   7 using namespace std;
   8
   9 namespace {
  10
  11 using namespace Msp;
  12
  13 /* Helper to avoid warnings about an unsigned type never being < 0 */
  14 template<typename T, bool f = is_signed<T>::value>
  15 struct IsNegative
  16 { static bool eval(T v) { return v<0; } };
  17
  18 template<typename T>
  19 struct IsNegative<T, false>
  20 { static bool eval(T) { return false; } };
  21
  22 template<typename T, bool f = is_signed<T>::value>
  23 struct Negate
  24 { static T eval(T v) { return -v; } };
  25
  26 template<typename T>
  27 struct Negate<T, false>
  28 { static T eval(T v) { return (~v)+1; } };
  29
  30 /* Helper to avoid errors about ambiguous function calls since there are no
  31 overloads of abs for unsigned types */
  32 template<typename T, bool f = is_signed<T>::value>
  33 struct Absolute
  34 { static T eval(T v) { return v<0 ? -v : v; } };
  35
  36 template<typename T>
  37 struct Absolute<T, false>
  38 { static T eval(T v) { return v; } };
  39
  40
  41 /*** Integer conversions ***/
  42
  43 const char udigits[] = "0123456789ABCDEF";
  44 const char ldigits[] = "0123456789abcdef";
  45
  46 template<typename T>
  47 char *int_to_str(T v, const Fmt &f, char *end)
  48 {
  49         if(f.get_type()==Fmt::CHAR)
  50         {
  51                 *--end = v;
  52                 return end;
  53         }
  54
  55         char *ptr = end;
  56
  57         // Find out the base to use
  58         unsigned base = f.get_base();
  59         if(!base)
  60                 base = 10;
  61
  62         // Format the number, starting from the least significant digit
  63         const char *digits = (f.get_uppercase() ? udigits : ldigits);
  64         if(v)
  65         {
  66                 typename std::make_unsigned<T>::type w = Absolute<T>::eval(v);
  67                 while(w)
  68                 {
  69                         *--ptr = digits[w%base];
  70                         w /= base;
  71                 }
  72         }
  73         else
  74                 *--ptr = digits[0];
  75
  76         char sign = (IsNegative<T>::eval(v) ? '-' : f.get_showpos() ? '+' : 0);
  77         if(f.get_fill()=='0')
  78         {
  79                 /* Zero-fill, taking base/sign size into account.  The expression is a
  80                 bit ugly, but saves having to write code for creating the prefix both
  81                 ways. */
  82                 unsigned pfxsize = ((f.get_showbase() && base!=10) ? base==8 ? 1 : 2 : 0) + (sign!=0);
  83                 for(unsigned i=(end-ptr)+pfxsize; i<f.get_width(); ++i)
  84                         *--ptr = '0';
  85         }
  86
  87         if(f.get_showbase() && v!=0)
  88         {
  89                 // Add base indicator
  90                 if(base==2)
  91                         *--ptr = (f.get_uppercase() ? 'B' : 'b');
  92                 else if(base==16)
  93                         *--ptr = (f.get_uppercase() ? 'X' : 'x');
  94                 if(base!=10)
  95                         *--ptr = '0';
  96         }
  97
  98         if(sign)
  99                 *--ptr = sign;
 100
 101         return ptr;
 102 }
 103
 104 template<typename T>
 105 string int_to_str(T v, const Fmt &f)
 106 {
 107         unsigned size = max(f.get_width(), max<unsigned>(f.get_precision(), sizeof(T)*8+3));
 108         char *buf = new char[size];
 109         string result(int_to_str(v, f, buf+size), buf+size);
 110         delete[] buf;
 111         return result;
 112 }
 113
 114 template<typename T>
 115 T str_to_int(const string &s, const Fmt &f)
 116 {
 117         if(s.empty())
 118                 throw lexical_error("conversion of '' to integer");
 119
 120         auto i = s.begin();
 121
 122         // See if the input starts with a sign
 123         bool neg = false;
 124         if(*i=='-')
 125         {
 126                 if(is_unsigned<T>::value)
 127                         throw lexical_error(format("conversion of '%s' to unsigned integer", s));
 128                 neg = true;
 129                 ++i;
 130         }
 131         else if(*i=='+')
 132                 ++i;
 133
 134         // Must have some digits to convert
 135         if(i==s.end())
 136                 throw lexical_error(format("conversion of '%s' to integer", s));
 137
 138         T base = f.get_base();
 139         if(!base && i!=s.end())
 140         {
 141                 // Automatic base detection requested, figure it out
 142                 if(*i=='0' && ++i!=s.end())
 143                 {
 144                         if(*i=='x' || *i=='X')
 145                         {
 146                                 base = 16;
 147                                 ++i;
 148                         }
 149                         else if(*i=='b' || *i=='B')
 150                         {
 151                                 base = 2;
 152                                 ++i;
 153                         }
 154                         else
 155                                 base = 8;
 156                 }
 157                 else
 158                         base = 10;
 159         }
 160
 161         // Parse the digits
 162         T result = 0;
 163         for(; i!=s.end(); ++i)
 164         {
 165                 T digit = base;
 166                 if(*i>='0' && *i<='9')
 167                         digit = *i-'0';
 168                 else if(*i>='A' && *i<='F')
 169                         digit = *i-'A'+10;
 170                 else if(*i>='a' && *i<='f')
 171                         digit = *i-'a'+10;
 172                 if(digit>=base)
 173                         throw lexical_error(format("conversion of '%s' to integer (base-%d)", s, base));
 174                 T next = result*base+digit;
 175                 if(next/base!=result)
 176                         throw lexical_error(format("conversion of '%s' to %d-bit integer", s, sizeof(T)*8));
 177                 result = next;
 178         }
 179
 180         if(neg)
 181                 result = Negate<T>::eval(result);
 182
 183         return result;
 184 }
 185
 186
 187 /*** Boolean conversions ***/
 188
 189 string bool_to_str(bool b, const Fmt &f)
 190 {
 191         if(f.get_type()==Fmt::STR)
 192                 return b ? "true" : "false";
 193         else
 194                 return b ? "1" : "0";
 195 }
 196
 197 bool str_to_bool(const string &s)
 198 {
 199         if(s.empty())
 200                 throw lexical_error("conversion of '' to boolean");
 201
 202         if(s=="1" || s=="true" || s=="yes" || s=="on")
 203                 return true;
 204         else if(s=="0" || s=="false" || s=="no" || s=="off")
 205                 return false;
 206
 207         throw lexical_error(format("conversion of '%s' to boolean", s));
 208 }
 209
 210
 211 /*** Floating-point conversions ***/
 212
 213 template<typename T>
 214 string flt_to_str(T v, const Fmt &f)
 215 {
 216         if(f.get_type()==Fmt::CHAR)
 217                 throw format_mismatch("floating-point conversion with character format");
 218
 219         Fmt::FloatMode mode = f.get_floatmode();
 220         long double w = abs(v);
 221         char sign = (v<0 ? '-' : f.get_showpos() ? '+' : 0);
 222
 223         // Handle infinity and not-a-number as special cases
 224         if(!(w+w>w) && w!=0)
 225         {
 226                 string result;
 227                 if(sign)
 228                         result += sign;
 229                 if(!(w>=0))
 230                         result += (f.get_uppercase() ? "NAN" : "nan");
 231                 else
 232                         result += (f.get_uppercase() ? "INF" : "inf");
 233                 if(result.size()<f.get_width())
 234                         result = string(f.get_width()-result.size(), ' ')+result;
 235                 return result;
 236         }
 237
 238         /* Find out the base-10 exponent.  Building up the multiplier / divisor
 239         first helps with accuracy in some cases. */
 240         int exp = 0;
 241         if(w>=10)
 242         {
 243                 long double div = 1;
 244                 while(div*10<=w)
 245                 {
 246                         ++exp;
 247                         div *= 10;
 248                 }
 249                 w /= div;
 250         }
 251         else if(mode!=Fmt::FIXED && w<1 && w!=0)
 252         {
 253                 long double mul = 1;
 254                 while(w*mul<1)
 255                 {
 256                         --exp;
 257                         mul *= 10;
 258                 }
 259                 w *= mul;
 260         }
 261
 262         // Decide how to format the number
 263         unsigned digits;
 264         unsigned leading_zeroes = 0;
 265         unsigned point = 1;
 266         bool showexp = false;
 267         if(mode==Fmt::FIXED)
 268         {
 269                 point = exp+1;
 270                 digits = point+f.get_precision();
 271         }
 272         else if(mode==Fmt::SCI)
 273         {
 274                 digits = f.get_precision()+1;
 275                 showexp = true;
 276         }
 277         else
 278         {
 279                 digits = max(f.get_precision(), 1U);
 280                 if(exp<-4 || exp>=static_cast<int>(digits))
 281                 {
 282                         point = 1;
 283                         showexp = true;
 284                 }
 285                 else
 286                 {
 287                         if(exp<0)
 288                                 leading_zeroes = -exp;
 289                         else
 290                                 point = exp+1;
 291                 }
 292         }
 293
 294         // Apply rounding
 295         w += 5.0l/pow(10.0l, static_cast<long double>(digits));
 296         if(w>=10)
 297         {
 298                 // Rounding bumped us to the next exponent, deal with it
 299                 w /= 10;
 300                 if(mode==Fmt::AUTOFLT && exp+1==static_cast<int>(digits))
 301                 {
 302                         point = 1;
 303                         showexp = true;
 304                 }
 305                 if(!showexp)
 306                 {
 307                         if(mode==Fmt::FIXED)
 308                                 ++digits;
 309                         if(leading_zeroes)
 310                                 --leading_zeroes;
 311                         else
 312                                 ++point;
 313                 }
 314                 else
 315                         ++exp;
 316         }
 317
 318         digits += leading_zeroes;
 319
 320         // Create a buffer and start from the end
 321         unsigned size = max(f.get_width(), digits+8);
 322         char *buf = new char[size];
 323         char *end = buf+size;
 324         char *ptr = end;
 325
 326         // Format exponent
 327         if(showexp)
 328         {
 329                 ptr = int_to_str(exp, Fmt().showpos().fill('0').width(3), ptr);
 330                 *--ptr = (f.get_uppercase() ? 'E' : 'e');
 331         }
 332
 333         // Format mantissa left-to-right
 334         char *eptr = ptr;
 335         ptr -= digits+(point<digits || f.get_showpoint());
 336         char *mptr = ptr;
 337         for(unsigned i=0; i<digits; ++i)
 338         {
 339                 if(i==point)
 340                         *mptr++ = '.';
 341                 if(!leading_zeroes)
 342                 {
 343                         int digit = static_cast<int>(w);
 344                         *mptr++ = '0'+digit;
 345                         w = (w-digit)*10;
 346                 }
 347                 else
 348                 {
 349                         *mptr++ = '0';
 350                         --leading_zeroes;
 351                 }
 352         }
 353
 354         if(f.get_showpoint())
 355         {
 356                 // Radix point requested but not displayed yet, add it
 357                 if(digits<=point)
 358                         *mptr++ = '.';
 359         }
 360         else if(mode==Fmt::AUTOFLT && digits>point)
 361         {
 362                 // Remove trailing zeroes from fraction and a lone radix point
 363                 while(mptr[-1]=='0')
 364                         --mptr;
 365                 if(mptr[-1]=='.')
 366                         --mptr;
 367                 if(mptr!=eptr)
 368                 {
 369                         while(mptr!=ptr)
 370                                 *--eptr = *--mptr;
 371                         ptr = eptr;
 372                 }
 373         }
 374
 375         // Add filling and sign
 376         if(f.get_fill()=='0')
 377         {
 378                 unsigned pfxlen = (sign!=0);
 379                 while(end-ptr+pfxlen<f.get_width())
 380                         *--ptr = '0';
 381         }
 382         if(sign)
 383                 *--ptr = sign;
 384
 385         string result(ptr, end);
 386         delete[] buf;
 387         return result;
 388 }
 389
 390 template<typename T>
 391 T str_to_flt(const string &s, const Fmt &)
 392 {
 393         if(s.empty())
 394                 throw lexical_error("conversion of '' to floating-point");
 395
 396         auto i = s.begin();
 397
 398         // See if the input starts with a sign
 399         bool neg = false;
 400         if(*i=='-')
 401         {
 402                 neg = true;
 403                 ++i;
 404         }
 405         else if(*i=='+')
 406                 ++i;
 407
 408         // Must have some digits to convert
 409         if(i==s.end())
 410                 throw lexical_error(format("conversion of '%s' to floating-point", s));
 411
 412         long double v = 0;
 413         int exp = 0;
 414
 415         // Parse mantissa
 416         bool point_seen = false;
 417         for(; i!=s.end(); ++i)
 418         {
 419                 if(*i=='.')
 420                 {
 421                         if(point_seen)
 422                                 throw lexical_error(format("conversion of '%s' to floating-point", s));
 423                         point_seen = true;
 424                 }
 425                 else if(*i>='0' && *i<='9')
 426                 {
 427                         v = v*10+(*i-'0');
 428                         if(point_seen)
 429                                 --exp;
 430                 }
 431                 else if(*i=='e' || *i=='E')
 432                 {
 433                         // We have an exponent
 434                         ++i;
 435
 436                         exp += str_to_int<int>(string(i, s.end()), Fmt());
 437                         // str_to_int has eaten the rest of the input or thrown
 438                         break;
 439                 }
 440                 else
 441                         throw lexical_error(format("conversion of '%s' to floating-point", s));
 442         }
 443
 444         // Scale and negate the result as needed
 445         while(exp>0)
 446         {
 447                 v *= 10;
 448                 --exp;
 449         }
 450         while(exp<0)
 451         {
 452                 v /= 10;
 453                 ++exp;
 454         }
 455
 456         if(neg)
 457                 v = -v;
 458
 459         return v;
 460 }
 461
 462
 463 /*** String conversions ***/
 464
 465 string str_to_str(const string &s, const Fmt &f)
 466 {
 467         if(f.get_type()==Fmt::NUM)
 468                 throw format_mismatch("string conversion with numeric format");
 469         return s;
 470 }
 471
 472 }
 473
 474 namespace Msp {
 475
 476 void LexicalConverter::result(const string &s)
 477 {
 478         filled = true;
 479         if(s.size()<fmt.get_width())
 480         {
 481                 if(fmt.get_align()==Fmt::RIGHT)
 482                         buf = string(fmt.get_width()-s.size(), fmt.get_fill())+s;
 483                 else
 484                         buf = s+string(fmt.get_width()-s.size(), fmt.get_fill());
 485         }
 486         else
 487                 buf = s;
 488 }
 489
 490 const string &LexicalConverter::get() const
 491 {
 492         if(!filled)
 493                 throw lexical_error("conversion not performed");
 494         return buf;
 495 }
 496
 497
 498 /*** operator<< ***/
 499
 500 void operator<<(LexicalConverter &c, char v)
 501 {
 502         Fmt::Type type = c.get_fmt().get_type();
 503         if(type==Fmt::NUM)
 504                 c.result(int_to_str(v, c.get_fmt()));
 505         else
 506                 c.result(string(1, v));
 507 }
 508
 509 void operator<<(LexicalConverter &c, signed char v)
 510 { c.result(int_to_str(v, c.get_fmt())); }
 511
 512 void operator<<(LexicalConverter &c, short v)
 513 { c.result(int_to_str(v, c.get_fmt())); }
 514
 515 void operator<<(LexicalConverter &c, int v)
 516 { c.result(int_to_str(v, c.get_fmt())); }
 517
 518 void operator<<(LexicalConverter &c, long v)
 519 { c.result(int_to_str(v, c.get_fmt())); }
 520
 521 void operator<<(LexicalConverter &c, unsigned char v)
 522 { c.result(int_to_str(v, c.get_fmt())); }
 523
 524 void operator<<(LexicalConverter &c, unsigned short v)
 525 { c.result(int_to_str(v, c.get_fmt())); }
 526
 527 void operator<<(LexicalConverter &c, unsigned v)
 528 { c.result(int_to_str(v, c.get_fmt())); }
 529
 530 void operator<<(LexicalConverter &c, unsigned long v)
 531 { c.result(int_to_str(v, c.get_fmt())); }
 532
 533 #ifdef __GNUC__
 534 void operator<<(LexicalConverter &c, long long v)
 535 { c.result(int_to_str(v, c.get_fmt())); }
 536
 537 void operator<<(LexicalConverter &c, unsigned long long v)
 538 { c.result(int_to_str(v, c.get_fmt())); }
 539 #endif
 540
 541 void operator<<(LexicalConverter &c, bool v)
 542 { c.result(bool_to_str(v, c.get_fmt())); }
 543
 544 void operator<<(LexicalConverter &c, float v)
 545 { c.result(flt_to_str(v, c.get_fmt())); }
 546
 547 void operator<<(LexicalConverter &c, double v)
 548 { c.result(flt_to_str(v, c.get_fmt())); }
 549
 550 void operator<<(LexicalConverter &c, long double v)
 551 { c.result(flt_to_str(v, c.get_fmt())); }
 552
 553 void operator<<(LexicalConverter &c, const string &s)
 554 { c.result(str_to_str(s, c.get_fmt())); }
 555
 556 void operator<<(LexicalConverter &c, const char *s)
 557 { c.result(str_to_str(s, c.get_fmt())); }
 558
 559 void operator<<(LexicalConverter &c, const void *p)
 560 { c.result(int_to_str(reinterpret_cast<intptr_t>(p), c.get_fmt())); }
 561
 562
 563 /*** operator>> ***/
 564
 565 void operator>>(const LexicalConverter &c, char &v)
 566 {
 567         if(c.get_fmt().get_type()==Fmt::NUM)
 568                 v = str_to_int<char>(c.get(), c.get_fmt());
 569         else
 570         {
 571                 const string &s = c.get();
 572                 if(s.empty())
 573                         throw lexical_error("conversion of '' to character");
 574                 if(s.size()>1)
 575                         throw lexical_error(format("conversion of '%s' to character", s));
 576                 v = s[0];
 577         }
 578 }
 579
 580 void operator>>(const LexicalConverter &c, signed char &v)
 581 { v = str_to_int<signed char>(c.get(), c.get_fmt()); }
 582
 583 void operator>>(const LexicalConverter &c, short &v)
 584 { v = str_to_int<short>(c.get(), c.get_fmt()); }
 585
 586 void operator>>(const LexicalConverter &c, int &v)
 587 { v = str_to_int<int>(c.get(), c.get_fmt()); }
 588
 589 void operator>>(const LexicalConverter &c, long &v)
 590 { v = str_to_int<long>(c.get(), c.get_fmt()); }
 591
 592 void operator>>(const LexicalConverter &c, unsigned char &v)
 593 { v = str_to_int<unsigned char>(c.get(), c.get_fmt()); }
 594
 595 void operator>>(const LexicalConverter &c, unsigned short &v)
 596 { v = str_to_int<unsigned short>(c.get(), c.get_fmt()); }
 597
 598 void operator>>(const LexicalConverter &c, unsigned int &v)
 599 { v = str_to_int<unsigned int>(c.get(), c.get_fmt()); }
 600
 601 void operator>>(const LexicalConverter &c, unsigned long &v)
 602 { v = str_to_int<unsigned long>(c.get(), c.get_fmt()); }
 603
 604 #ifdef __GNUC__
 605 void operator>>(const LexicalConverter &c, long long &v)
 606 { v = str_to_int<long long>(c.get(), c.get_fmt()); }
 607
 608 void operator>>(const LexicalConverter &c, unsigned long long &v)
 609 { v = str_to_int<unsigned long long>(c.get(), c.get_fmt()); }
 610 #endif
 611
 612 void operator>>(const LexicalConverter &c, bool &v)
 613 { v = str_to_bool(c.get()); }
 614
 615 void operator>>(const LexicalConverter &c, float &v)
 616 { v = str_to_flt<float>(c.get(), c.get_fmt()); }
 617
 618 void operator>>(const LexicalConverter &c, double &v)
 619 { v = str_to_flt<double>(c.get(), c.get_fmt()); }
 620
 621 void operator>>(const LexicalConverter &c, long double &v)
 622 { v = str_to_flt<long double>(c.get(), c.get_fmt()); }
 623
 624 void operator>>(const LexicalConverter &c, string &s)
 625 { s = str_to_str(c.get(), c.get_fmt()); }
 626
 627 } // namespace Msp