lib/strtod.c

   1 /* Copyright (C) 1991-1992, 1997, 1999, 2003, 2006, 2008-2021 Free Software
   2    Foundation, Inc.
   3
   4    This program is free software: you can redistribute it and/or modify
   5    it under the terms of the GNU General Public License as published by
   6    the Free Software Foundation; either version 3 of the License, or
   7    (at your option) any later version.
   8
   9    This program is distributed in the hope that it will be useful,
  10    but WITHOUT ANY WARRANTY; without even the implied warranty of
  11    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12    GNU General Public License for more details.
  13
  14    You should have received a copy of the GNU General Public License
  15    along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
  16
  17 #if ! defined USE_LONG_DOUBLE
  18 # include <config.h>
  19 #endif
  20
  21 /* Specification.  */
  22 #include <stdlib.h>
  23
  24 #include <ctype.h>      /* isspace() */
  25 #include <errno.h>
  26 #include <float.h>      /* {DBL,LDBL}_{MIN,MAX} */
  27 #include <limits.h>     /* LONG_{MIN,MAX} */
  28 #include <locale.h>     /* localeconv() */
  29 #include <math.h>       /* NAN */
  30 #include <stdbool.h>
  31 #include <stdio.h>      /* sprintf() */
  32 #include <string.h>     /* strdup() */
  33 #if HAVE_NL_LANGINFO
  34 # include <langinfo.h>
  35 #endif
  36
  37 #include "c-ctype.h"
  38
  39 #undef MIN
  40 #undef MAX
  41 #ifdef USE_LONG_DOUBLE
  42 # define STRTOD strtold
  43 # define LDEXP ldexpl
  44 # if defined __hpux && defined __hppa
  45    /* We cannot call strtold on HP-UX/hppa, because its return type is a struct,
  46       not a 'long double'.  */
  47 #  define HAVE_UNDERLYING_STRTOD 0
  48 # elif STRTOLD_HAS_UNDERFLOW_BUG
  49    /* strtold would not set errno=ERANGE upon underflow.  */
  50 #  define HAVE_UNDERLYING_STRTOD 0
  51 # else
  52 #  define HAVE_UNDERLYING_STRTOD HAVE_STRTOLD
  53 # endif
  54 # define DOUBLE long double
  55 # define MIN LDBL_MIN
  56 # define MAX LDBL_MAX
  57 # define L_(literal) literal##L
  58 #else
  59 # define STRTOD strtod
  60 # define LDEXP ldexp
  61 # define HAVE_UNDERLYING_STRTOD 1
  62 # define DOUBLE double
  63 # define MIN DBL_MIN
  64 # define MAX DBL_MAX
  65 # define L_(literal) literal
  66 #endif
  67
  68 #if (defined USE_LONG_DOUBLE ? HAVE_LDEXPM_IN_LIBC : HAVE_LDEXP_IN_LIBC)
  69 # define USE_LDEXP 1
  70 #else
  71 # define USE_LDEXP 0
  72 #endif
  73
  74 /* Return true if C is a space in the current locale, avoiding
  75    problems with signed char and isspace.  */
  76 static bool
  77 locale_isspace (char c)
  78 {
  79   unsigned char uc = c;
  80   return isspace (uc) != 0;
  81 }
  82
  83 /* Determine the decimal-point character according to the current locale.  */
  84 static char
  85 decimal_point_char (void)
  86 {
  87   const char *point;
  88   /* Determine it in a multithread-safe way.  We know nl_langinfo is
  89      multithread-safe on glibc systems and Mac OS X systems, but is not required
  90      to be multithread-safe by POSIX.  sprintf(), however, is multithread-safe.
  91      localeconv() is rarely multithread-safe.  */
  92 #if HAVE_NL_LANGINFO && (__GLIBC__ || defined __UCLIBC__ || (defined __APPLE__ && defined __MACH__))
  93   point = nl_langinfo (RADIXCHAR);
  94 #elif 1
  95   char pointbuf[5];
  96   sprintf (pointbuf, "%#.0f", 1.0);
  97   point = &pointbuf[1];
  98 #else
  99   point = localeconv () -> decimal_point;
 100 #endif
 101   /* The decimal point is always a single byte: either '.' or ','.  */
 102   return (point[0] != '\0' ? point[0] : '.');
 103 }
 104
 105 #if !USE_LDEXP
 106  #undef LDEXP
 107  #define LDEXP dummy_ldexp
 108  /* A dummy definition that will never be invoked.  */
 109  static DOUBLE LDEXP (DOUBLE x _GL_UNUSED, int exponent _GL_UNUSED)
 110  {
 111    abort ();
 112    return L_(0.0);
 113  }
 114 #endif
 115
 116 /* Return X * BASE**EXPONENT.  Return an extreme value and set errno
 117    to ERANGE if underflow or overflow occurs.  */
 118 static DOUBLE
 119 scale_radix_exp (DOUBLE x, int radix, long int exponent)
 120 {
 121   /* If RADIX == 10, this code is neither precise nor fast; it is
 122      merely a straightforward and relatively portable approximation.
 123      If N == 2, this code is precise on a radix-2 implementation,
 124      albeit perhaps not fast if ldexp is not in libc.  */
 125
 126   long int e = exponent;
 127
 128   if (USE_LDEXP && radix == 2)
 129     return LDEXP (x, e < INT_MIN ? INT_MIN : INT_MAX < e ? INT_MAX : e);
 130   else
 131     {
 132       DOUBLE r = x;
 133
 134       if (r != 0)
 135         {
 136           if (e < 0)
 137             {
 138               while (e++ != 0)
 139                 {
 140                   r /= radix;
 141                   if (r == 0 && x != 0)
 142                     {
 143                       errno = ERANGE;
 144                       break;
 145                     }
 146                 }
 147             }
 148           else
 149             {
 150               while (e-- != 0)
 151                 {
 152                   if (r < -MAX / radix)
 153                     {
 154                       errno = ERANGE;
 155                       return -HUGE_VAL;
 156                     }
 157                   else if (MAX / radix < r)
 158                     {
 159                       errno = ERANGE;
 160                       return HUGE_VAL;
 161                     }
 162                   else
 163                     r *= radix;
 164                 }
 165             }
 166         }
 167
 168       return r;
 169     }
 170 }
 171
 172 /* Parse a number at NPTR; this is a bit like strtol (NPTR, ENDPTR)
 173    except there are no leading spaces or signs or "0x", and ENDPTR is
 174    nonnull.  The number uses a base BASE (either 10 or 16) fraction, a
 175    radix RADIX (either 10 or 2) exponent, and exponent character
 176    EXPCHAR.  BASE is RADIX**RADIX_MULTIPLIER.  */
 177 static DOUBLE
 178 parse_number (const char *nptr,
 179               int base, int radix, int radix_multiplier, char radixchar,
 180               char expchar,
 181               char **endptr)
 182 {
 183   const char *s = nptr;
 184   const char *digits_start;
 185   const char *digits_end;
 186   const char *radixchar_ptr;
 187   long int exponent;
 188   DOUBLE num;
 189
 190   /* First, determine the start and end of the digit sequence.  */
 191   digits_start = s;
 192   radixchar_ptr = NULL;
 193   for (;; ++s)
 194     {
 195       if (base == 16 ? c_isxdigit (*s) : c_isdigit (*s))
 196         ;
 197       else if (radixchar_ptr == NULL && *s == radixchar)
 198         {
 199           /* Record that we have found the decimal point.  */
 200           radixchar_ptr = s;
 201         }
 202       else
 203         /* Any other character terminates the digit sequence.  */
 204         break;
 205     }
 206   digits_end = s;
 207   /* Now radixchar_ptr == NULL or
 208      digits_start <= radixchar_ptr < digits_end.  */
 209
 210   if (false)
 211     { /* Unoptimized.  */
 212       exponent =
 213         (radixchar_ptr != NULL
 214          ? - (long int) (digits_end - radixchar_ptr - 1)
 215          : 0);
 216     }
 217   else
 218     { /* Remove trailing zero digits.  This reduces rounding errors for
 219          inputs such as 1.0000000000 or 10000000000e-10.  */
 220       while (digits_end > digits_start)
 221         {
 222           if (digits_end - 1 == radixchar_ptr || *(digits_end - 1) == '0')
 223             digits_end--;
 224           else
 225             break;
 226         }
 227       exponent =
 228         (radixchar_ptr != NULL
 229          ? (digits_end > radixchar_ptr
 230             ? - (long int) (digits_end - radixchar_ptr - 1)
 231             : (long int) (radixchar_ptr - digits_end))
 232          : (long int) (s - digits_end));
 233     }
 234
 235   /* Then, convert the digit sequence to a number.  */
 236   {
 237     const char *dp;
 238     num = 0;
 239     for (dp = digits_start; dp < digits_end; dp++)
 240       if (dp != radixchar_ptr)
 241         {
 242           int digit;
 243
 244           /* Make sure that multiplication by BASE will not overflow.  */
 245           if (!(num <= MAX / base))
 246             {
 247               /* The value of the digit and all subsequent digits don't matter,
 248                  since we have already gotten as many digits as can be
 249                  represented in a 'DOUBLE'.  This doesn't necessarily mean that
 250                  the result will overflow: The exponent may reduce it to within
 251                  range.  */
 252               exponent +=
 253                 (digits_end - dp)
 254                 - (radixchar_ptr >= dp && radixchar_ptr < digits_end ? 1 : 0);
 255               break;
 256             }
 257
 258           /* Eat the next digit.  */
 259           if (c_isdigit (*dp))
 260             digit = *dp - '0';
 261           else if (base == 16 && c_isxdigit (*dp))
 262             digit = c_tolower (*dp) - ('a' - 10);
 263           else
 264             abort ();
 265           num = num * base + digit;
 266         }
 267   }
 268
 269   exponent = exponent * radix_multiplier;
 270
 271   /* Finally, parse the exponent.  */
 272   if (c_tolower (*s) == expchar && ! locale_isspace (s[1]))
 273     {
 274       /* Add any given exponent to the implicit one.  */
 275       int saved_errno = errno;
 276       char *end;
 277       long int value = strtol (s + 1, &end, 10);
 278       errno = saved_errno;
 279
 280       if (s + 1 != end)
 281         {
 282           /* Skip past the exponent, and add in the implicit exponent,
 283              resulting in an extreme value on overflow.  */
 284           s = end;
 285           exponent =
 286             (exponent < 0
 287              ? (value < LONG_MIN - exponent ? LONG_MIN : exponent + value)
 288              : (LONG_MAX - exponent < value ? LONG_MAX : exponent + value));
 289         }
 290     }
 291
 292   *endptr = (char *) s;
 293   return scale_radix_exp (num, radix, exponent);
 294 }
 295
 296 /* HP cc on HP-UX 10.20 has a bug with the constant expression -0.0.
 297    ICC 10.0 has a bug when optimizing the expression -zero.
 298    The expression -MIN * MIN does not work when cross-compiling
 299    to PowerPC on Mac OS X 10.5.  */
 300 static DOUBLE
 301 minus_zero (void)
 302 {
 303 #if defined __hpux || defined __sgi || defined __ICC
 304   return -MIN * MIN;
 305 #else
 306   return -0.0;
 307 #endif
 308 }
 309
 310 /* Convert NPTR to a DOUBLE.  If ENDPTR is not NULL, a pointer to the
 311    character after the last one used in the number is put in *ENDPTR.  */
 312 DOUBLE
 313 STRTOD (const char *nptr, char **endptr)
 314 #if HAVE_UNDERLYING_STRTOD
 315 # ifdef USE_LONG_DOUBLE
 316 #  undef strtold
 317 # else
 318 #  undef strtod
 319 # endif
 320 #else
 321 # undef STRTOD
 322 # define STRTOD(NPTR,ENDPTR) \
 323    parse_number (NPTR, 10, 10, 1, radixchar, 'e', ENDPTR)
 324 #endif
 325 /* From here on, STRTOD refers to the underlying implementation.  It needs
 326    to handle only finite unsigned decimal numbers with non-null ENDPTR.  */
 327 {
 328   char radixchar;
 329   bool negative = false;
 330
 331   /* The number so far.  */
 332   DOUBLE num;
 333
 334   const char *s = nptr;
 335   const char *end;
 336   char *endbuf;
 337   int saved_errno = errno;
 338
 339   radixchar = decimal_point_char ();
 340
 341   /* Eat whitespace.  */
 342   while (locale_isspace (*s))
 343     ++s;
 344
 345   /* Get the sign.  */
 346   negative = *s == '-';
 347   if (*s == '-' || *s == '+')
 348     ++s;
 349
 350   num = STRTOD (s, &endbuf);
 351   end = endbuf;
 352
 353   if (c_isdigit (s[*s == radixchar]))
 354     {
 355       /* If a hex float was converted incorrectly, do it ourselves.
 356          If the string starts with "0x" but does not contain digits,
 357          consume the "0" ourselves.  If a hex float is followed by a
 358          'p' but no exponent, then adjust the end pointer.  */
 359       if (*s == '0' && c_tolower (s[1]) == 'x')
 360         {
 361           if (! c_isxdigit (s[2 + (s[2] == radixchar)]))
 362             {
 363               end = s + 1;
 364
 365               /* strtod() on z/OS returns ERANGE for "0x".  */
 366               errno = saved_errno;
 367             }
 368           else if (end <= s + 2)
 369             {
 370               num = parse_number (s + 2, 16, 2, 4, radixchar, 'p', &endbuf);
 371               end = endbuf;
 372             }
 373           else
 374             {
 375               const char *p = s + 2;
 376               while (p < end && c_tolower (*p) != 'p')
 377                 p++;
 378               if (p < end && ! c_isdigit (p[1 + (p[1] == '-' || p[1] == '+')]))
 379                 {
 380                   char *dup = strdup (s);
 381                   errno = saved_errno;
 382                   if (!dup)
 383                     {
 384                       /* Not really our day, is it.  Rounding errors are
 385                          better than outright failure.  */
 386                       num =
 387                         parse_number (s + 2, 16, 2, 4, radixchar, 'p', &endbuf);
 388                     }
 389                   else
 390                     {
 391                       dup[p - s] = '\0';
 392                       num = STRTOD (dup, &endbuf);
 393                       saved_errno = errno;
 394                       free (dup);
 395                       errno = saved_errno;
 396                     }
 397                   end = p;
 398                 }
 399             }
 400         }
 401       else
 402         {
 403           /* If "1e 1" was misparsed as 10.0 instead of 1.0, re-do the
 404              underlying STRTOD on a copy of the original string
 405              truncated to avoid the bug.  */
 406           const char *e = s + 1;
 407           while (e < end && c_tolower (*e) != 'e')
 408             e++;
 409           if (e < end && ! c_isdigit (e[1 + (e[1] == '-' || e[1] == '+')]))
 410             {
 411               char *dup = strdup (s);
 412               errno = saved_errno;
 413               if (!dup)
 414                 {
 415                   /* Not really our day, is it.  Rounding errors are
 416                      better than outright failure.  */
 417                   num = parse_number (s, 10, 10, 1, radixchar, 'e', &endbuf);
 418                 }
 419               else
 420                 {
 421                   dup[e - s] = '\0';
 422                   num = STRTOD (dup, &endbuf);
 423                   saved_errno = errno;
 424                   free (dup);
 425                   errno = saved_errno;
 426                 }
 427               end = e;
 428             }
 429         }
 430
 431       s = end;
 432     }
 433
 434   /* Check for infinities and NaNs.  */
 435   else if (c_tolower (*s) == 'i'
 436            && c_tolower (s[1]) == 'n'
 437            && c_tolower (s[2]) == 'f')
 438     {
 439       s += 3;
 440       if (c_tolower (*s) == 'i'
 441           && c_tolower (s[1]) == 'n'
 442           && c_tolower (s[2]) == 'i'
 443           && c_tolower (s[3]) == 't'
 444           && c_tolower (s[4]) == 'y')
 445         s += 5;
 446       num = HUGE_VAL;
 447       errno = saved_errno;
 448     }
 449   else if (c_tolower (*s) == 'n'
 450            && c_tolower (s[1]) == 'a'
 451            && c_tolower (s[2]) == 'n')
 452     {
 453       s += 3;
 454       if (*s == '(')
 455         {
 456           const char *p = s + 1;
 457           while (c_isalnum (*p))
 458             p++;
 459           if (*p == ')')
 460             s = p + 1;
 461         }
 462
 463       /* If the underlying implementation misparsed the NaN, assume
 464          its result is incorrect, and return a NaN.  Normally it's
 465          better to use the underlying implementation's result, since a
 466          nice implementation populates the bits of the NaN according
 467          to interpreting n-char-sequence as a hexadecimal number.  */
 468       if (s != end || num == num)
 469         num = NAN;
 470       errno = saved_errno;
 471     }
 472   else
 473     {
 474       /* No conversion could be performed.  */
 475       errno = EINVAL;
 476       s = nptr;
 477     }
 478
 479   if (endptr != NULL)
 480     *endptr = (char *) s;
 481   /* Special case -0.0, since at least ICC miscompiles negation.  We
 482      can't use copysign(), as that drags in -lm on some platforms.  */
 483   if (!num && negative)
 484     return minus_zero ();
 485   return negative ? -num : num;
 486 }