source/strings/lexicalcast.cpp

   1 /* $Id$
   2
   3 This file is part of libmspstrings
   4 Copyright © 2006-2008 Mikko Rasa
   5 Distributed under the LGPL
   6 */
   7
   8 #include <cmath>
   9 #include <limits>
  10 #include "format.h"
  11 #include "lexicalcast.h"
  12
  13 using namespace std;
  14
  15 namespace {
  16
  17 using namespace Msp;
  18
  19 template<typename T>
  20 struct IsSigned
  21 { enum { result = !(static_cast<T>(-1)>0) }; };
  22
  23 template<typename T, bool f = (sizeof(T)>sizeof(unsigned long))>
  24 struct Temporary
  25 { typedef unsigned long Type; };
  26
  27 template<typename T>
  28 struct Temporary<T, true>
  29 {
  30 #ifdef WIN32
  31         typedef __int64 Type;
  32 #else
  33         typedef unsigned long long Type;
  34 #endif
  35 };
  36
  37 /* Helper to avoid warnings about an unsigned type never being < 0 */
  38 template<typename T, bool f = IsSigned<T>::result>
  39 struct IsNegative
  40 { static bool eval(T v) { return v<0; } };
  41
  42 template<typename T>
  43 struct IsNegative<T, false>
  44 { static bool eval(T) { return false; } };
  45
  46 /* Helper to avoid errors about ambiguous function calls since there are no
  47 overloads of abs for unsigned types */
  48 template<typename T, bool f = IsSigned<T>::result>
  49 struct Absolute
  50 { static T eval(T v) { return v<0 ? -v : v; } };
  51
  52 template<typename T>
  53 struct Absolute<T, false>
  54 { static T eval(T v) { return v; } };
  55
  56
  57 /*** Integer conversions ***/
  58
  59 const char udigits[] = "0123456789ABCDEF";
  60 const char ldigits[] = "0123456789abcdef";
  61
  62 template<typename T>
  63 char *int_to_str(T v, const Fmt &f, char *end)
  64 {
  65         if(f.get_type()==Fmt::CHAR)
  66         {
  67                 *--end = v;
  68                 return end;
  69         }
  70
  71         char *ptr = end;
  72
  73         // Find out the base to use
  74         unsigned base = f.get_base();
  75         if(!base)
  76                 base = 10;
  77
  78         // Format the number, starting from the least significant digit
  79         const char *digits = (f.get_uppercase() ? udigits : ldigits);
  80         if(v)
  81         {
  82                 typename Temporary<T>::Type w = Absolute<T>::eval(v);
  83                 while(w)
  84                 {
  85                         *--ptr = digits[w%base];
  86                         w /= base;
  87                 }
  88         }
  89         else
  90                 *--ptr = digits[0];
  91
  92         char sign = (IsNegative<T>::eval(v) ? '-' : f.get_showpos() ? '+' : 0);
  93         if(f.get_fill()=='0')
  94         {
  95                 /* Zero-fill, taking base/sign size into account.  The expression is a
  96                 bit ugly, but saves having to write code for creating the prefix both
  97                 ways. */
  98                 unsigned pfxsize = ((f.get_showbase() && base!=10) ? base==8 ? 1 : 2 : 0) + (sign!=0);
  99                 for(unsigned i=(end-ptr)+pfxsize; i<f.get_width(); ++i)
 100                         *--ptr = '0';
 101         }
 102
 103         if(f.get_showbase() && v!=0)
 104         {
 105                 // Add base indicator
 106                 if(base==2)
 107                         *--ptr = (f.get_uppercase() ? 'B' : 'b');
 108                 else if(base==16)
 109                         *--ptr = (f.get_uppercase() ? 'X' : 'x');
 110                 if(base!=10)
 111                         *--ptr = '0';
 112         }
 113
 114         if(sign)
 115                 *--ptr = sign;
 116
 117         return ptr;
 118 }
 119
 120 template<typename T>
 121 string int_to_str(T v, const Fmt &f)
 122 {
 123         unsigned size = max(f.get_width(), max<unsigned>(f.get_precision(), sizeof(T)*8+3));
 124         char *buf = new char[size];
 125         string result(int_to_str(v, f, buf+size), buf+size);
 126         delete[] buf;
 127         return result;
 128 }
 129
 130 template<typename T>
 131 T str_to_int(const std::string &s, const Fmt &f)
 132 {
 133         if(s.empty())
 134                 throw lexical_error("conversion of '' to integer");
 135
 136         std::string::const_iterator i = s.begin();
 137
 138         // See if the input starts with a sign
 139         bool neg = false;
 140         if(*i=='-')
 141         {
 142                 if(!IsSigned<T>::result)
 143                         throw lexical_error(format("conversion of '%s' to unsigned integer", s));
 144                 neg = true;
 145                 ++i;
 146         }
 147         else if(*i=='+')
 148                 ++i;
 149
 150         // Must have some digits to convert
 151         if(i==s.end())
 152                 throw lexical_error(format("conversion of '%s' to integer", s));
 153
 154         T base = f.get_base();
 155         if(!base && i!=s.end())
 156         {
 157                 // Automatic base detection requested, figure it out
 158                 if(*i=='0' && ++i!=s.end())
 159                 {
 160                         if(*i=='x' || *i=='X')
 161                         {
 162                                 base = 16;
 163                                 ++i;
 164                         }
 165                         else if(*i=='b' || *i=='B')
 166                         {
 167                                 base = 2;
 168                                 ++i;
 169                         }
 170                         else
 171                                 base = 8;
 172                 }
 173                 else
 174                         base = 10;
 175         }
 176
 177         // Parse the digits
 178         T result = 0;
 179         for(; i!=s.end(); ++i)
 180         {
 181                 T digit = base;
 182                 if(*i>='0' && *i<='9')
 183                         digit = *i-'0';
 184                 else if(*i>='A' && *i<='F')
 185                         digit = *i-'A'+10;
 186                 else if(*i>='a' && *i<='f')
 187                         digit = *i-'a'+10;
 188                 if(digit>=base)
 189                         throw lexical_error(format("conversion of '%s' to integer (base-%d)", s, base));
 190                 T next = result*base+digit;
 191                 if(next/base!=result)
 192                         throw lexical_error(format("conversion of '%s' to %d-bit integer", s, sizeof(T)*8));
 193                 result = next;
 194         }
 195
 196         if(neg)
 197                 result = -result;
 198
 199         return result;
 200 }
 201
 202
 203 /*** Boolean conversions ***/
 204
 205 string bool_to_str(bool b, const Fmt &f)
 206 {
 207         if(f.get_type()==Fmt::STR)
 208                 return b ? "true" : "false";
 209         else
 210                 return b ? "1" : "0";
 211 }
 212
 213 bool str_to_bool(const string &s)
 214 {
 215         if(s.empty())
 216                 throw lexical_error("conversion of '' to boolean");
 217
 218         if(s=="1" || s=="true" || s=="yes" || s=="on")
 219                 return true;
 220         else if(s=="0" || s=="false" || s=="no" || s=="off")
 221                 return false;
 222
 223         throw lexical_error(format("conversion of '%s' to boolean", s));
 224 }
 225
 226
 227 /*** Floating-point conversions ***/
 228
 229 template<typename T>
 230 string flt_to_str(T v, const Fmt &f)
 231 {
 232         if(f.get_type()==Fmt::CHAR)
 233                 throw format_mismatch("floating-point conversion with character format");
 234
 235         Fmt::FloatMode mode = f.get_floatmode();
 236         long double w = abs(v);
 237         char sign = (v<0 ? '-' : f.get_showpos() ? '+' : 0);
 238
 239         // Handle infinity and not-a-number as special cases
 240         if(!(w+w>w) && w!=0)
 241         {
 242                 string result;
 243                 if(sign)
 244                         result += sign;
 245                 if(!(w>=0))
 246                         result += (f.get_uppercase() ? "NAN" : "nan");
 247                 else
 248                         result += (f.get_uppercase() ? "INF" : "inf");
 249                 if(result.size()<f.get_width())
 250                         result = string(f.get_width()-result.size(), ' ')+result;
 251                 return result;
 252         }
 253
 254         /* Find out the base-10 exponent.  Building up the multiplier / divisor
 255         first helps with accuracy in some cases. */
 256         int exp = 0;
 257         if(w>=10)
 258         {
 259                 long double div = 1;
 260                 while(div*10<w)
 261                 {
 262                         ++exp;
 263                         div *= 10;
 264                 }
 265                 w /= div;
 266         }
 267         else if(mode!=Fmt::FIXED && w<1 && w!=0)
 268         {
 269                 long double mul = 1;
 270                 while(w*mul<1)
 271                 {
 272                         --exp;
 273                         mul *= 10;
 274                 }
 275                 w *= mul;
 276         }
 277
 278         // Decide how to format the number
 279         unsigned digits;
 280         unsigned point = 1;
 281         bool showexp = false;
 282         if(mode==Fmt::FIXED)
 283         {
 284                 point = exp+1;
 285                 digits = point+f.get_precision();
 286         }
 287         else if(mode==Fmt::SCI)
 288         {
 289                 digits = f.get_precision()+1;
 290                 showexp = true;
 291         }
 292         else
 293         {
 294                 digits = max(f.get_precision(), 1U);
 295                 if(exp<-4 || exp>=static_cast<int>(digits))
 296                 {
 297                         point = 1;
 298                         showexp = true;
 299                 }
 300                 else
 301                 {
 302                         point = max(exp, 0)+1;
 303                         if(exp<0)
 304                                 digits += -exp;
 305                 }
 306         }
 307
 308         // Apply rounding
 309         w += 5.0l/pow(10.0l, static_cast<long double>(digits));
 310         if(w>10)
 311         {
 312                 // Rounding bumped us to the next exponent, deal with it
 313                 w /= 10;
 314                 if(mode==Fmt::AUTOFLT && exp+1==static_cast<int>(digits))
 315                 {
 316                         point = 1;
 317                         showexp = true;
 318                 }
 319                 if(!showexp)
 320                 {
 321                         ++digits;
 322                         ++point;
 323                 }
 324                 else
 325                         ++exp;
 326         }
 327
 328         // Create a buffer and start from the end
 329         unsigned size = max(f.get_width(), digits+8);
 330         char *buf = new char[size];
 331         char *end = buf+size;
 332         char *ptr = end;
 333
 334         // Format exponent
 335         if(showexp)
 336         {
 337                 ptr = int_to_str(exp, Fmt().showpos().fill('0').width(3), ptr);
 338                 *--ptr = (f.get_uppercase() ? 'E' : 'e');
 339         }
 340
 341         // Format mantissa left-to-right
 342         char *eptr = ptr;
 343         ptr -= digits+(point<digits || f.get_showpoint());
 344         char *mptr = ptr;
 345         for(unsigned i=0; i<digits; ++i)
 346         {
 347                 if(i==point)
 348                         *mptr++ = '.';
 349                 if(showexp || static_cast<int>(i)>=-exp)
 350                 {
 351                         int digit = static_cast<int>(w);
 352                         *mptr++ = '0'+digit;
 353                         w = (w-digit)*10;
 354                 }
 355                 else
 356                         *mptr++ = '0';
 357         }
 358
 359         if(f.get_showpoint())
 360         {
 361                 // Radix point requested but not displayed yet, add it
 362                 if(digits<=point)
 363                         *mptr++ = '.';
 364         }
 365         else if(mode==Fmt::AUTOFLT && digits>point)
 366         {
 367                 // Remove trailing zeroes from fraction and a lone radix point
 368                 while(mptr[-1]=='0')
 369                         --mptr;
 370                 if(mptr[-1]=='.')
 371                         --mptr;
 372                 if(mptr!=eptr)
 373                 {
 374                         while(mptr!=ptr)
 375                                 *--eptr = *--mptr;
 376                         ptr = eptr;
 377                 }
 378         }
 379
 380         // Add filling and sign
 381         if(f.get_fill()=='0')
 382         {
 383                 unsigned pfxlen = (sign!=0);
 384                 while(end-ptr+pfxlen<f.get_width())
 385                         *--ptr = '0';
 386         }
 387         if(sign)
 388                 *--ptr = sign;
 389
 390         string result(ptr, end);
 391         delete[] buf;
 392         return result;
 393 }
 394
 395 template<typename T>
 396 T str_to_flt(const string &s, const Fmt &)
 397 {
 398         if(s.empty())
 399                 throw lexical_error("conversion of '' to floating-point");
 400
 401         std::string::const_iterator i = s.begin();
 402
 403         // See if the input starts with a sign
 404         bool neg = false;
 405         if(*i=='-')
 406         {
 407                 neg = true;
 408                 ++i;
 409         }
 410         else if(*i=='+')
 411                 ++i;
 412
 413         // Must have some digits to convert
 414         if(i==s.end())
 415                 throw lexical_error(format("conversion of '%s' to floating-point", s));
 416
 417         long double v = 0;
 418         int exp = 0;
 419
 420         // Parse mantissa
 421         bool point_seen = false;
 422         for(; i!=s.end(); ++i)
 423         {
 424                 if(*i=='.')
 425                 {
 426                         if(point_seen)
 427                                 throw lexical_error(format("conversion of '%s' to floating-point", s));
 428                         point_seen = true;
 429                 }
 430                 else if(*i>='0' && *i<='9')
 431                 {
 432                         v = v*10+(*i-'0');
 433                         if(point_seen)
 434                                 --exp;
 435                 }
 436                 else if(*i=='e' || *i=='E')
 437                 {
 438                         // We have an exponent
 439                         ++i;
 440
 441                         exp += str_to_int<int>(string(i, s.end()), Fmt());
 442                         // str_to_int has eaten the rest of the input or thrown
 443                         break;
 444                 }
 445                 else
 446                         throw lexical_error(format("conversion of '%s' to floating-point", s));
 447         }
 448
 449         // Scale and negate the result as needed
 450         while(exp>0)
 451         {
 452                 v *= 10;
 453                 --exp;
 454         }
 455         while(exp<0)
 456         {
 457                 v /= 10;
 458                 ++exp;
 459         }
 460
 461         if(neg)
 462                 v = -v;
 463
 464         return v;
 465 }
 466
 467
 468 /*** String conversions ***/
 469
 470 string str_to_str(const string &s, const Fmt &f)
 471 {
 472         if(f.get_type()==Fmt::NUM)
 473                 throw format_mismatch("string conversion with numeric format");
 474         return s;
 475 }
 476
 477 }
 478
 479 namespace Msp {
 480
 481 void LexicalConverter::result(const string &s)
 482 {
 483         if(s.size()<fmt.get_width())
 484         {
 485                 if(fmt.get_align()==Fmt::RIGHT)
 486                         buf = string(fmt.get_width()-s.size(), fmt.get_fill())+s;
 487                 else
 488                         buf = s+string(fmt.get_width()-s.size(), fmt.get_fill());
 489         }
 490         else
 491                 buf = s;
 492 }
 493
 494
 495 /*** operator<< ***/
 496
 497 void operator<<(LexicalConverter &c, char v)
 498 {
 499         Fmt::Type type = c.get_fmt().get_type();
 500         if(type==Fmt::NUM)
 501                 c.result(int_to_str(v, c.get_fmt()));
 502         else
 503                 c.result(string(1, v));
 504 }
 505
 506 void operator<<(LexicalConverter &c, signed char v)
 507 { c.result(int_to_str(v, c.get_fmt())); }
 508
 509 void operator<<(LexicalConverter &c, short v)
 510 { c.result(int_to_str(v, c.get_fmt())); }
 511
 512 void operator<<(LexicalConverter &c, int v)
 513 { c.result(int_to_str(v, c.get_fmt())); }
 514
 515 void operator<<(LexicalConverter &c, long v)
 516 { c.result(int_to_str(v, c.get_fmt())); }
 517
 518 void operator<<(LexicalConverter &c, unsigned char v)
 519 { c.result(int_to_str(v, c.get_fmt())); }
 520
 521 void operator<<(LexicalConverter &c, unsigned short v)
 522 { c.result(int_to_str(v, c.get_fmt())); }
 523
 524 void operator<<(LexicalConverter &c, unsigned v)
 525 { c.result(int_to_str(v, c.get_fmt())); }
 526
 527 void operator<<(LexicalConverter &c, unsigned long v)
 528 { c.result(int_to_str(v, c.get_fmt())); }
 529
 530 #ifdef __GNUC__
 531 void operator<<(LexicalConverter &c, long long v)
 532 { c.result(int_to_str(v, c.get_fmt())); }
 533
 534 void operator<<(LexicalConverter &c, unsigned long long v)
 535 { c.result(int_to_str(v, c.get_fmt())); }
 536 #endif
 537
 538 void operator<<(LexicalConverter &c, bool v)
 539 { c.result(bool_to_str(v, c.get_fmt())); }
 540
 541 void operator<<(LexicalConverter &c, float v)
 542 { c.result(flt_to_str(v, c.get_fmt())); }
 543
 544 void operator<<(LexicalConverter &c, double v)
 545 { c.result(flt_to_str(v, c.get_fmt())); }
 546
 547 void operator<<(LexicalConverter &c, long double v)
 548 { c.result(flt_to_str(v, c.get_fmt())); }
 549
 550 void operator<<(LexicalConverter &c, const string &s)
 551 { c.result(str_to_str(s, c.get_fmt())); }
 552
 553 void operator<<(LexicalConverter &c, const char *s)
 554 { c.result(str_to_str(s, c.get_fmt())); }
 555
 556 void operator<<(LexicalConverter &c, const void *p)
 557 { c.result(int_to_str(reinterpret_cast<unsigned long>(p), c.get_fmt())); }
 558
 559
 560 /*** operator>> ***/
 561
 562 void operator>>(const LexicalConverter &c, char &v)
 563 {
 564         if(c.get_fmt().get_type()==Fmt::NUM)
 565                 v = str_to_int<char>(c.get(), c.get_fmt());
 566         else
 567         {
 568                 const std::string &s = c.get();
 569                 if(s.empty())
 570                         throw lexical_error("conversion of '' to character");
 571                 if(s.size()>1)
 572                         throw lexical_error(format("conversion of '%s' to character", s));
 573                 v = s[0];
 574         }
 575 }
 576
 577 void operator>>(const LexicalConverter &c, signed char &v)
 578 { v = str_to_int<signed char>(c.get(), c.get_fmt()); }
 579
 580 void operator>>(const LexicalConverter &c, short &v)
 581 { v = str_to_int<short>(c.get(), c.get_fmt()); }
 582
 583 void operator>>(const LexicalConverter &c, int &v)
 584 { v = str_to_int<int>(c.get(), c.get_fmt()); }
 585
 586 void operator>>(const LexicalConverter &c, long &v)
 587 { v = str_to_int<long>(c.get(), c.get_fmt()); }
 588
 589 void operator>>(const LexicalConverter &c, unsigned char &v)
 590 { v = str_to_int<unsigned char>(c.get(), c.get_fmt()); }
 591
 592 void operator>>(const LexicalConverter &c, unsigned short &v)
 593 { v = str_to_int<unsigned short>(c.get(), c.get_fmt()); }
 594
 595 void operator>>(const LexicalConverter &c, unsigned int &v)
 596 { v = str_to_int<unsigned int>(c.get(), c.get_fmt()); }
 597
 598 void operator>>(const LexicalConverter &c, unsigned long &v)
 599 { v = str_to_int<unsigned long>(c.get(), c.get_fmt()); }
 600
 601 #ifdef __GNUC__
 602 void operator>>(const LexicalConverter &c, long long &v)
 603 { v = str_to_int<long long>(c.get(), c.get_fmt()); }
 604
 605 void operator>>(const LexicalConverter &c, unsigned long long &v)
 606 { v = str_to_int<unsigned long long>(c.get(), c.get_fmt()); }
 607 #endif
 608
 609 void operator>>(const LexicalConverter &c, bool &v)
 610 { v = str_to_bool(c.get()); }
 611
 612 void operator>>(const LexicalConverter &c, float &v)
 613 { v = str_to_flt<float>(c.get(), c.get_fmt()); }
 614
 615 void operator>>(const LexicalConverter &c, double &v)
 616 { v = str_to_flt<double>(c.get(), c.get_fmt()); }
 617
 618 void operator>>(const LexicalConverter &c, long double &v)
 619 { v = str_to_flt<long double>(c.get(), c.get_fmt()); }
 620
 621 void operator>>(const LexicalConverter &c, string &s)
 622 { s = str_to_str(c.get(), c.get_fmt()); }
 623
 624 } // namespace Msp