source/strings/lexicalcast.cpp

   1 /* $Id$
   2
   3 This file is part of libmspstrings
   4 Copyright © 2006-2008 Mikko Rasa
   5 Distributed under the LGPL
   6 */
   7
   8 #include <cmath>
   9 #include <limits>
  10 #include "lexicalcast.h"
  11
  12 using namespace std;
  13
  14 namespace {
  15
  16 using namespace Msp;
  17
  18 template<typename T>
  19 struct IsSigned
  20 { enum { result = !(static_cast<T>(-1)>0) }; };
  21
  22 template<typename T, bool f = (sizeof(T)>sizeof(unsigned long))>
  23 struct Temporary
  24 { typedef unsigned long Type; };
  25
  26 template<typename T>
  27 struct Temporary<T, true>
  28 {
  29 #ifdef WIN32
  30         typedef __int64 Type;
  31 #else
  32         typedef unsigned long long Type;
  33 #endif
  34 };
  35
  36 /* Helper to avoid warnings about an unsigned type never being < 0 */
  37 template<typename T, bool f = IsSigned<T>::result>
  38 struct IsNegative
  39 { static bool eval(T v) { return v<0; } };
  40
  41 template<typename T>
  42 struct IsNegative<T, false>
  43 { static bool eval(T) { return false; } };
  44
  45 /* Helper to avoid errors about ambiguous function calls since there are no
  46 overloads of abs for unsigned types */
  47 template<typename T, bool f = IsSigned<T>::result>
  48 struct Absolute
  49 { static T eval(T v) { return v<0 ? -v : v; } };
  50
  51 template<typename T>
  52 struct Absolute<T, false>
  53 { static T eval(T v) { return v; } };
  54
  55
  56 /*** Integer conversions ***/
  57
  58 const char udigits[] = "0123456789ABCDEF";
  59 const char ldigits[] = "0123456789abcdef";
  60
  61 template<typename T>
  62 char *int_to_str(T v, const Fmt &f, char *end)
  63 {
  64         if(f.get_type()==Fmt::CHAR)
  65         {
  66                 *--end = v;
  67                 return end;
  68         }
  69
  70         char *ptr = end;
  71
  72         // Find out the base to use
  73         unsigned base = f.get_base();
  74         if(!base)
  75                 base = 10;
  76
  77         // Format the number, starting from the least significant digit
  78         const char *digits = (f.get_uppercase() ? udigits : ldigits);
  79         if(v)
  80         {
  81                 typename Temporary<T>::Type w = Absolute<T>::eval(v);
  82                 while(w)
  83                 {
  84                         *--ptr = digits[w%base];
  85                         w /= base;
  86                 }
  87         }
  88         else
  89                 *--ptr = digits[0];
  90
  91         char sign = (IsNegative<T>::eval(v) ? '-' : f.get_showpos() ? '+' : 0);
  92         if(f.get_fill()=='0')
  93         {
  94                 /* Zero-fill, taking base/sign size into account.  The expression is a
  95                 bit ugly, but saves having to write code for creating the prefix both
  96                 ways. */
  97                 unsigned pfxsize = ((f.get_showbase() && base!=10) ? base==8 ? 1 : 2 : 0) + (sign!=0);
  98                 for(unsigned i=(end-ptr)+pfxsize; i<f.get_width(); ++i)
  99                         *--ptr = '0';
 100         }
 101
 102         if(f.get_showbase() && v!=0)
 103         {
 104                 // Add base indicator
 105                 if(base==2)
 106                         *--ptr = (f.get_uppercase() ? 'B' : 'b');
 107                 else if(base==16)
 108                         *--ptr = (f.get_uppercase() ? 'X' : 'x');
 109                 if(base!=10)
 110                         *--ptr = '0';
 111         }
 112
 113         if(sign)
 114                 *--ptr = sign;
 115
 116         return ptr;
 117 }
 118
 119 template<typename T>
 120 string int_to_str(T v, const Fmt &f)
 121 {
 122         unsigned size = max(f.get_width(), max<unsigned>(f.get_precision(), sizeof(T)*8+3));
 123         char *buf = new char[size];
 124         string result(int_to_str(v, f, buf+size), buf+size);
 125         delete[] buf;
 126         return result;
 127 }
 128
 129 template<typename T>
 130 T str_to_int(const std::string &s, const Fmt &f)
 131 {
 132         if(s.empty())
 133                 throw LexicalError("Empty input in integer conversion");
 134
 135         std::string::const_iterator i = s.begin();
 136
 137         // See if the input starts with a sign
 138         bool neg = false;
 139         if(*i=='-')
 140         {
 141                 if(!IsSigned<T>::result)
 142                         throw LexicalError("Negative sign in unsigned integer conversion");
 143                 neg = true;
 144                 ++i;
 145         }
 146         else if(*i=='+')
 147                 ++i;
 148
 149         // Must have some digits to convert
 150         if(i==s.end())
 151                 throw LexicalError("Missing digits in integer conversion");
 152
 153         T base = f.get_base();
 154         if(!base && i!=s.end())
 155         {
 156                 // Automatic base detection requested, figure it out
 157                 if(*i=='0' && ++i!=s.end())
 158                 {
 159                         if(*i=='x' || *i=='X')
 160                         {
 161                                 base = 16;
 162                                 ++i;
 163                         }
 164                         else if(*i=='b' || *i=='B')
 165                         {
 166                                 base = 2;
 167                                 ++i;
 168                         }
 169                         else
 170                                 base = 8;
 171                 }
 172                 else
 173                         base = 10;
 174         }
 175
 176         // Parse the digits
 177         T result = 0;
 178         for(; i!=s.end(); ++i)
 179         {
 180                 T digit = base;
 181                 if(*i>='0' && *i<='9')
 182                         digit = *i-'0';
 183                 else if(*i>='A' && *i<='F')
 184                         digit = *i-'A'+10;
 185                 else if(*i>='a' && *i<='f')
 186                         digit = *i-'a'+10;
 187                 if(digit>=base)
 188                         throw LexicalError("Invalid digit in integer conversion");
 189                 T next = result*base+digit;
 190                 if(next/base!=result)
 191                         throw LexicalError("Overflow in integer conversion");
 192                 result = next;
 193         }
 194
 195         if(neg)
 196                 result = -result;
 197
 198         return result;
 199 }
 200
 201
 202 /*** Boolean conversions ***/
 203
 204 string bool_to_str(bool b, const Fmt &f)
 205 {
 206         if(f.get_type()==Fmt::STR)
 207                 return b ? "true" : "false";
 208         else
 209                 return b ? "1" : "0";
 210 }
 211
 212 bool str_to_bool(const string &s)
 213 {
 214         if(s.empty())
 215                 throw LexicalError("Empty input in boolean conversion");
 216
 217         if(s=="1" || s=="true" || s=="yes" || s=="on")
 218                 return true;
 219         else if(s=="0" || s=="false" || s=="no" || s=="off")
 220                 return false;
 221
 222         throw LexicalError("Invalid input in boolean conversion");
 223 }
 224
 225
 226 /*** Floating-point conversions ***/
 227
 228 template<typename T>
 229 string flt_to_str(T v, const Fmt &f)
 230 {
 231         if(f.get_type()==Fmt::CHAR)
 232                 throw LexicalError("Character format in floating-point conversion");
 233
 234         Fmt::FloatMode mode = f.get_floatmode();
 235         long double w = abs(v);
 236         char sign = (v<0 ? '-' : f.get_showpos() ? '+' : 0);
 237
 238         // Handle infinity and not-a-number as special cases
 239         if(!(w+w>w) && w!=0)
 240         {
 241                 string result;
 242                 if(sign)
 243                         result += sign;
 244                 if(!(w>=0))
 245                         result += (f.get_uppercase() ? "NAN" : "nan");
 246                 else
 247                         result += (f.get_uppercase() ? "INF" : "inf");
 248                 if(result.size()<f.get_width())
 249                         result = string(f.get_width()-result.size(), ' ')+result;
 250                 return result;
 251         }
 252
 253         /* Find out the base-10 exponent.  Building up the multiplier / divisor
 254         first helps with accuracy in some cases. */
 255         int exp = 0;
 256         if(w>=10)
 257         {
 258                 long double div = 1;
 259                 while(div*10<w)
 260                 {
 261                         ++exp;
 262                         div *= 10;
 263                 }
 264                 w /= div;
 265         }
 266         else if(mode!=Fmt::FIXED && w<1 && w!=0)
 267         {
 268                 long double mul = 1;
 269                 while(w*mul<1)
 270                 {
 271                         --exp;
 272                         mul *= 10;
 273                 }
 274                 w *= mul;
 275         }
 276
 277         // Decide how to format the number
 278         unsigned digits;
 279         unsigned point = 1;
 280         bool showexp = false;
 281         if(mode==Fmt::FIXED)
 282         {
 283                 point = exp+1;
 284                 digits = point+f.get_precision();
 285         }
 286         else if(mode==Fmt::SCI)
 287         {
 288                 digits = f.get_precision()+1;
 289                 showexp = true;
 290         }
 291         else
 292         {
 293                 digits = max(f.get_precision(), 1U);
 294                 if(exp<-4 || exp>=static_cast<int>(digits))
 295                 {
 296                         point = 1;
 297                         showexp = true;
 298                 }
 299                 else
 300                 {
 301                         point = max(exp, 0)+1;
 302                         if(exp<0)
 303                                 digits += -exp;
 304                 }
 305         }
 306
 307         // Apply rounding
 308         w += 5.0l/pow(10.0l, static_cast<long double>(digits));
 309         if(w>10)
 310         {
 311                 // Rounding bumped us to the next exponent, deal with it
 312                 w /= 10;
 313                 if(mode==Fmt::AUTOFLT && exp+1==static_cast<int>(digits))
 314                 {
 315                         point = 1;
 316                         showexp = true;
 317                 }
 318                 if(!showexp)
 319                 {
 320                         ++digits;
 321                         ++point;
 322                 }
 323                 else
 324                         ++exp;
 325         }
 326
 327         // Create a buffer and start from the end
 328         unsigned size = max(f.get_width(), digits+8);
 329         char *buf = new char[size];
 330         char *end = buf+size;
 331         char *ptr = end;
 332
 333         // Format exponent
 334         if(showexp)
 335         {
 336                 ptr = int_to_str(exp, Fmt().showpos().fill('0').width(3), ptr);
 337                 *--ptr = (f.get_uppercase() ? 'E' : 'e');
 338         }
 339
 340         // Format mantissa left-to-right
 341         char *eptr = ptr;
 342         ptr -= digits+(point<digits || f.get_showpoint());
 343         char *mptr = ptr;
 344         for(unsigned i=0; i<digits; ++i)
 345         {
 346                 if(i==point)
 347                         *mptr++ = '.';
 348                 if(showexp || static_cast<int>(i)>=-exp)
 349                 {
 350                         int digit = static_cast<int>(w);
 351                         *mptr++ = '0'+digit;
 352                         w = (w-digit)*10;
 353                 }
 354                 else
 355                         *mptr++ = '0';
 356         }
 357
 358         if(f.get_showpoint())
 359         {
 360                 // Radix point requested but not displayed yet, add it
 361                 if(digits<=point)
 362                         *mptr++ = '.';
 363         }
 364         else if(mode==Fmt::AUTOFLT && digits>point)
 365         {
 366                 // Remove trailing zeroes from fraction and a lone radix point
 367                 while(mptr[-1]=='0')
 368                         --mptr;
 369                 if(mptr[-1]=='.')
 370                         --mptr;
 371                 if(mptr!=eptr)
 372                 {
 373                         while(mptr!=ptr)
 374                                 *--eptr = *--mptr;
 375                         ptr = eptr;
 376                 }
 377         }
 378
 379         // Add filling and sign
 380         if(f.get_fill()=='0')
 381         {
 382                 unsigned pfxlen = (sign!=0);
 383                 while(end-ptr+pfxlen<f.get_width())
 384                         *--ptr = '0';
 385         }
 386         if(sign)
 387                 *--ptr = sign;
 388
 389         string result(ptr, end);
 390         delete[] buf;
 391         return result;
 392 }
 393
 394 template<typename T>
 395 T str_to_flt(const string &s, const Fmt &)
 396 {
 397         if(s.empty())
 398                 throw LexicalError("Empty input in floating-point conversion");
 399
 400         std::string::const_iterator i = s.begin();
 401
 402         // See if the input starts with a sign
 403         bool neg = false;
 404         if(*i=='-')
 405         {
 406                 neg = true;
 407                 ++i;
 408         }
 409         else if(*i=='+')
 410                 ++i;
 411
 412         // Must have some digits to convert
 413         if(i==s.end())
 414                 throw LexicalError("Missing digits in floating-point conversion");
 415
 416         long double v = 0;
 417         int exp = 0;
 418
 419         // Parse mantissa
 420         bool point_seen = false;
 421         for(; i!=s.end(); ++i)
 422         {
 423                 if(*i=='.')
 424                 {
 425                         if(point_seen)
 426                                 throw LexicalError("Extra point in floating-point conversion");
 427                         point_seen = true;
 428                 }
 429                 else if(*i>='0' && *i<='9')
 430                 {
 431                         v = v*10+(*i-'0');
 432                         if(point_seen)
 433                                 --exp;
 434                 }
 435                 else if(*i=='e' || *i=='E')
 436                 {
 437                         // We have an exponent
 438                         ++i;
 439
 440                         exp += str_to_int<int>(string(i, s.end()), Fmt());
 441                         // str_to_int has eaten the rest of the input or thrown
 442                         break;
 443                 }
 444                 else
 445                         throw LexicalError("Invalid digit in floating-point conversion");
 446         }
 447
 448         // Scale and negate the result as needed
 449         while(exp>0)
 450         {
 451                 v *= 10;
 452                 --exp;
 453         }
 454         while(exp<0)
 455         {
 456                 v /= 10;
 457                 ++exp;
 458         }
 459
 460         if(neg)
 461                 v = -v;
 462
 463         return v;
 464 }
 465
 466
 467 /*** String conversions ***/
 468
 469 string str_to_str(const string &s, const Fmt &f)
 470 {
 471         if(f.get_type()==Fmt::NUM)
 472                 throw LexicalError("Numeric format in string conversion");
 473         return s;
 474 }
 475
 476 }
 477
 478 namespace Msp {
 479
 480 void LexicalConverter::result(const string &s)
 481 {
 482         if(s.size()<fmt.get_width())
 483         {
 484                 if(fmt.get_align()==Fmt::RIGHT)
 485                         buf = string(fmt.get_width()-s.size(), fmt.get_fill())+s;
 486                 else
 487                         buf = s+string(fmt.get_width()-s.size(), fmt.get_fill());
 488         }
 489         else
 490                 buf = s;
 491 }
 492
 493
 494 /*** operator<< ***/
 495
 496 void operator<<(LexicalConverter &c, char v)
 497 {
 498         Fmt::Type type = c.get_fmt().get_type();
 499         if(type==Fmt::NUM)
 500                 c.result(int_to_str(v, c.get_fmt()));
 501         else
 502                 c.result(string(1, v));
 503 }
 504
 505 void operator<<(LexicalConverter &c, signed char v)
 506 { c.result(int_to_str(v, c.get_fmt())); }
 507
 508 void operator<<(LexicalConverter &c, short v)
 509 { c.result(int_to_str(v, c.get_fmt())); }
 510
 511 void operator<<(LexicalConverter &c, int v)
 512 { c.result(int_to_str(v, c.get_fmt())); }
 513
 514 void operator<<(LexicalConverter &c, long v)
 515 { c.result(int_to_str(v, c.get_fmt())); }
 516
 517 void operator<<(LexicalConverter &c, unsigned char v)
 518 { c.result(int_to_str(v, c.get_fmt())); }
 519
 520 void operator<<(LexicalConverter &c, unsigned short v)
 521 { c.result(int_to_str(v, c.get_fmt())); }
 522
 523 void operator<<(LexicalConverter &c, unsigned v)
 524 { c.result(int_to_str(v, c.get_fmt())); }
 525
 526 void operator<<(LexicalConverter &c, unsigned long v)
 527 { c.result(int_to_str(v, c.get_fmt())); }
 528
 529 #ifdef __GNUC__
 530 void operator<<(LexicalConverter &c, long long v)
 531 { c.result(int_to_str(v, c.get_fmt())); }
 532
 533 void operator<<(LexicalConverter &c, unsigned long long v)
 534 { c.result(int_to_str(v, c.get_fmt())); }
 535 #endif
 536
 537 void operator<<(LexicalConverter &c, bool v)
 538 { c.result(bool_to_str(v, c.get_fmt())); }
 539
 540 void operator<<(LexicalConverter &c, float v)
 541 { c.result(flt_to_str(v, c.get_fmt())); }
 542
 543 void operator<<(LexicalConverter &c, double v)
 544 { c.result(flt_to_str(v, c.get_fmt())); }
 545
 546 void operator<<(LexicalConverter &c, long double v)
 547 { c.result(flt_to_str(v, c.get_fmt())); }
 548
 549 void operator<<(LexicalConverter &c, const string &s)
 550 { c.result(str_to_str(s, c.get_fmt())); }
 551
 552 void operator<<(LexicalConverter &c, const char *s)
 553 { c.result(str_to_str(s, c.get_fmt())); }
 554
 555 void operator<<(LexicalConverter &c, const void *p)
 556 { c.result(int_to_str(reinterpret_cast<unsigned long>(p), c.get_fmt())); }
 557
 558
 559 /*** operator>> ***/
 560
 561 void operator>>(const LexicalConverter &c, char &v)
 562 {
 563         if(c.get_fmt().get_type()==Fmt::NUM)
 564                 v = str_to_int<char>(c.get(), c.get_fmt());
 565         else
 566         {
 567                 const std::string &s = c.get();
 568                 if(s.empty())
 569                         throw LexicalError("Empty input in character conversion");
 570                 if(s.size()>1)
 571                         throw LexicalError("Extra input in character conversion");
 572                 v = s[0];
 573         }
 574 }
 575
 576 void operator>>(const LexicalConverter &c, signed char &v)
 577 { v = str_to_int<signed char>(c.get(), c.get_fmt()); }
 578
 579 void operator>>(const LexicalConverter &c, short &v)
 580 { v = str_to_int<short>(c.get(), c.get_fmt()); }
 581
 582 void operator>>(const LexicalConverter &c, int &v)
 583 { v = str_to_int<int>(c.get(), c.get_fmt()); }
 584
 585 void operator>>(const LexicalConverter &c, long &v)
 586 { v = str_to_int<long>(c.get(), c.get_fmt()); }
 587
 588 void operator>>(const LexicalConverter &c, unsigned char &v)
 589 { v = str_to_int<unsigned char>(c.get(), c.get_fmt()); }
 590
 591 void operator>>(const LexicalConverter &c, unsigned short &v)
 592 { v = str_to_int<unsigned short>(c.get(), c.get_fmt()); }
 593
 594 void operator>>(const LexicalConverter &c, unsigned int &v)
 595 { v = str_to_int<unsigned int>(c.get(), c.get_fmt()); }
 596
 597 void operator>>(const LexicalConverter &c, unsigned long &v)
 598 { v = str_to_int<unsigned long>(c.get(), c.get_fmt()); }
 599
 600 #ifdef __GNUC__
 601 void operator>>(const LexicalConverter &c, long long &v)
 602 { v = str_to_int<long long>(c.get(), c.get_fmt()); }
 603
 604 void operator>>(const LexicalConverter &c, unsigned long long &v)
 605 { v = str_to_int<unsigned long long>(c.get(), c.get_fmt()); }
 606 #endif
 607
 608 void operator>>(const LexicalConverter &c, bool &v)
 609 { v = str_to_bool(c.get()); }
 610
 611 void operator>>(const LexicalConverter &c, float &v)
 612 { v = str_to_flt<float>(c.get(), c.get_fmt()); }
 613
 614 void operator>>(const LexicalConverter &c, double &v)
 615 { v = str_to_flt<double>(c.get(), c.get_fmt()); }
 616
 617 void operator>>(const LexicalConverter &c, long double &v)
 618 { v = str_to_flt<long double>(c.get(), c.get_fmt()); }
 619
 620 void operator>>(const LexicalConverter &c, string &s)
 621 { s = str_to_str(c.get(), c.get_fmt()); }
 622
 623 } // namespace Msp