float.c

   1 /* float.c     floating-point constant support for the Netwide Assembler
   2  *
   3  * The Netwide Assembler is copyright (C) 1996 Simon Tatham and
   4  * Julian Hall. All rights reserved. The software is
   5  * redistributable under the license given in the file "LICENSE"
   6  * distributed in the NASM archive.
   7  *
   8  * initial version 13/ix/96 by Simon Tatham
   9  */
  10
  11 #include "compiler.h"
  12
  13 #include <ctype.h>
  14 #include <stdio.h>
  15 #include <stdlib.h>
  16 #include <string.h>
  17 #include <inttypes.h>
  18
  19 #include "nasm.h"
  20 #include "float.h"
  21
  22 /*
  23  * -----------------
  24  *  local variables
  25  * -----------------
  26  */
  27 static efunc error;
  28 static bool daz = false;        /* denormals as zero */
  29 static enum float_round rc = FLOAT_RC_NEAR;     /* rounding control */
  30
  31 /*
  32  * -----------
  33  *  constants
  34  * -----------
  35  */
  36
  37 /* "A limb is like a digit but bigger */
  38 typedef uint32_t fp_limb;
  39 typedef uint64_t fp_2limb;
  40
  41 #define LIMB_BITS       32
  42 #define LIMB_BYTES      (LIMB_BITS/8)
  43 #define LIMB_TOP_BIT    ((fp_limb)1 << (LIMB_BITS-1))
  44 #define LIMB_MASK       ((fp_limb)(~0))
  45 #define LIMB_ALL_BYTES  ((fp_limb)0x01010101)
  46 #define LIMB_BYTE(x)    ((x)*LIMB_ALL_BYTES)
  47
  48 /* 112 bits + 64 bits for accuracy + 16 bits for rounding */
  49 #define MANT_LIMBS 6
  50
  51 /* 52 digits fit in 176 bits because 10^53 > 2^176 > 10^52 */
  52 #define MANT_DIGITS 52
  53
  54 /* the format and the argument list depend on MANT_LIMBS */
  55 #define MANT_FMT "%08x_%08x_%08x_%08x_%08x_%08x"
  56 #define MANT_ARG SOME_ARG(mant, 0)
  57
  58 #define SOME_ARG(a,i) (a)[(i)+0], (a)[(i)+1], (a)[(i)+2], (a)[(i)+3],   \
  59         (a)[(i)+4], (a)[(i)+5]
  60
  61 /*
  62  * ---------------------------------------------------------------------------
  63  *  emit a printf()-like debug message... but only if DEBUG_FLOAT was defined
  64  * ---------------------------------------------------------------------------
  65  */
  66
  67 #ifdef DEBUG_FLOAT
  68 #define dprintf(x) printf x
  69 #else                           /*  */
  70 #define dprintf(x) do { } while (0)
  71 #endif                          /*  */
  72
  73 /*
  74  * ---------------------------------------------------------------------------
  75  *  multiply
  76  * ---------------------------------------------------------------------------
  77  */
  78 static int float_multiply(fp_limb *to, fp_limb *from)
  79 {
  80     fp_2limb temp[MANT_LIMBS * 2];
  81     int i, j;
  82
  83     /*
  84      * guaranteed that top bit of 'from' is set -- so we only have
  85      * to worry about _one_ bit shift to the left
  86      */
  87     dprintf(("%s=" MANT_FMT "\n", "mul1", SOME_ARG(to, 0)));
  88     dprintf(("%s=" MANT_FMT "\n", "mul2", SOME_ARG(from, 0)));
  89
  90     memset(temp, 0, sizeof temp);
  91
  92     for (i = 0; i < MANT_LIMBS; i++) {
  93         for (j = 0; j < MANT_LIMBS; j++) {
  94             fp_2limb n;
  95             n = (fp_2limb) to[i] * (fp_2limb) from[j];
  96             temp[i + j] += n >> LIMB_BITS;
  97             temp[i + j + 1] += (fp_limb)n;
  98         }
  99     }
 100
 101     for (i = MANT_LIMBS * 2; --i;) {
 102         temp[i - 1] += temp[i] >> LIMB_BITS;
 103         temp[i] &= LIMB_MASK;
 104     }
 105
 106     dprintf(("%s=" MANT_FMT "_" MANT_FMT "\n", "temp", SOME_ARG(temp, 0),
 107              SOME_ARG(temp, MANT_LIMBS)));
 108
 109     if (temp[0] & LIMB_TOP_BIT) {
 110         for (i = 0; i < MANT_LIMBS; i++) {
 111             to[i] = temp[i] & LIMB_MASK;
 112         }
 113         dprintf(("%s=" MANT_FMT " (%i)\n", "prod", SOME_ARG(to, 0), 0));
 114         return 0;
 115     } else {
 116         for (i = 0; i < MANT_LIMBS; i++) {
 117             to[i] = (temp[i] << 1) + !!(temp[i + 1] & LIMB_TOP_BIT);
 118         }
 119         dprintf(("%s=" MANT_FMT " (%i)\n", "prod", SOME_ARG(to, 0), -1));
 120         return -1;
 121     }
 122 }
 123
 124 /*
 125  * ---------------------------------------------------------------------------
 126  *  read an exponent; returns INT32_MAX on error
 127  * ---------------------------------------------------------------------------
 128  */
 129 static int32_t read_exponent(const char *string, int32_t max)
 130 {
 131     int32_t i = 0;
 132     bool neg = false;
 133
 134     if (*string == '+') {
 135         string++;
 136     } else if (*string == '-') {
 137         neg = true;
 138         string++;
 139     }
 140     while (*string) {
 141         if (*string >= '0' && *string <= '9') {
 142             i = (i * 10) + (*string - '0');
 143
 144             /*
 145              * To ensure that underflows and overflows are
 146              * handled properly we must avoid wraparounds of
 147              * the signed integer value that is used to hold
 148              * the exponent. Therefore we cap the exponent at
 149              * +/-5000, which is slightly more/less than
 150              * what's required for normal and denormal numbers
 151              * in single, double, and extended precision, but
 152              * sufficient to avoid signed integer wraparound.
 153              */
 154             if (i > max)
 155                 i = max;
 156         } else if (*string == '_') {
 157             /* do nothing */
 158         } else {
 159             error(ERR_NONFATAL|ERR_PASS1,
 160                   "invalid character in floating-point constant %s: '%c'",
 161                   "exponent", *string);
 162             return INT32_MAX;
 163         }
 164         string++;
 165     }
 166
 167     return neg ? -i : i;
 168 }
 169
 170 /*
 171  * ---------------------------------------------------------------------------
 172  *  convert
 173  * ---------------------------------------------------------------------------
 174  */
 175 static bool ieee_flconvert(const char *string, fp_limb *mant,
 176                            int32_t * exponent)
 177 {
 178     char digits[MANT_DIGITS];
 179     char *p, *q, *r;
 180     fp_limb mult[MANT_LIMBS], bit;
 181     fp_limb *m;
 182     int32_t tenpwr, twopwr;
 183     int32_t extratwos;
 184     bool started, seendot, warned;
 185
 186     warned = false;
 187     p = digits;
 188     tenpwr = 0;
 189     started = seendot = false;
 190
 191     while (*string && *string != 'E' && *string != 'e') {
 192         if (*string == '.') {
 193             if (!seendot) {
 194                 seendot = true;
 195             } else {
 196                 error(ERR_NONFATAL|ERR_PASS1,
 197                       "too many periods in floating-point constant");
 198                 return false;
 199             }
 200         } else if (*string >= '0' && *string <= '9') {
 201             if (*string == '0' && !started) {
 202                 if (seendot) {
 203                     tenpwr--;
 204                 }
 205             } else {
 206                 started = true;
 207                 if (p < digits + sizeof(digits)) {
 208                     *p++ = *string - '0';
 209                 } else {
 210                     if (!warned) {
 211                         error(ERR_WARNING|ERR_WARN_FL_TOOLONG|ERR_PASS1,
 212                               "floating-point constant significand contains "
 213                               "more than %i digits", MANT_DIGITS);
 214                         warned = true;
 215                     }
 216                 }
 217                 if (!seendot) {
 218                     tenpwr++;
 219                 }
 220             }
 221         } else if (*string == '_') {
 222             /* do nothing */
 223         } else {
 224             error(ERR_NONFATAL|ERR_PASS1,
 225                   "invalid character in floating-point constant %s: '%c'",
 226                   "significand", *string);
 227             return false;
 228         }
 229         string++;
 230     }
 231
 232     if (*string) {
 233         int32_t e;
 234
 235         string++;               /* eat the E */
 236         e = read_exponent(string, 5000);
 237         if (e == INT32_MAX)
 238             return false;
 239         tenpwr += e;
 240     }
 241
 242     /*
 243      * At this point, the memory interval [digits,p) contains a
 244      * series of decimal digits zzzzzzz, such that our number X
 245      * satisfies X = 0.zzzzzzz * 10^tenpwr.
 246      */
 247     q = digits;
 248     dprintf(("X = 0."));
 249     while (q < p) {
 250         dprintf(("%c", *q + '0'));
 251         q++;
 252     }
 253     dprintf((" * 10^%i\n", tenpwr));
 254
 255     /*
 256      * Now convert [digits,p) to our internal representation.
 257      */
 258     bit = LIMB_TOP_BIT;
 259     for (m = mant; m < mant + MANT_LIMBS; m++) {
 260         *m = 0;
 261     }
 262     m = mant;
 263     q = digits;
 264     started = false;
 265     twopwr = 0;
 266     while (m < mant + MANT_LIMBS) {
 267         fp_limb carry = 0;
 268         while (p > q && !p[-1]) {
 269             p--;
 270         }
 271         if (p <= q) {
 272             break;
 273         }
 274         for (r = p; r-- > q;) {
 275             int32_t i;
 276             i = 2 * *r + carry;
 277             if (i >= 10) {
 278                 carry = 1;
 279                 i -= 10;
 280             } else {
 281                 carry = 0;
 282             }
 283             *r = i;
 284         }
 285         if (carry) {
 286             *m |= bit;
 287             started = true;
 288         }
 289         if (started) {
 290             if (bit == 1) {
 291                 bit = LIMB_TOP_BIT;
 292                 m++;
 293             } else {
 294                 bit >>= 1;
 295             }
 296         } else {
 297             twopwr--;
 298         }
 299     }
 300     twopwr += tenpwr;
 301
 302     /*
 303      * At this point, the 'mant' array contains the first frac-
 304      * tional places of a base-2^16 real number which when mul-
 305      * tiplied by 2^twopwr and 5^tenpwr gives X.
 306      */
 307     dprintf(("X = " MANT_FMT " * 2^%i * 5^%i\n", MANT_ARG, twopwr,
 308              tenpwr));
 309
 310     /*
 311      * Now multiply 'mant' by 5^tenpwr.
 312      */
 313     if (tenpwr < 0) {           /* mult = 5^-1 = 0.2 */
 314         for (m = mult; m < mult + MANT_LIMBS - 1; m++) {
 315             *m = LIMB_BYTE(0xcc);
 316         }
 317         mult[MANT_LIMBS - 1] = LIMB_BYTE(0xcc)+1;
 318         extratwos = -2;
 319         tenpwr = -tenpwr;
 320
 321         /*
 322          * If tenpwr was 1000...000b, then it becomes 1000...000b. See
 323          * the "ANSI C" comment below for more details on that case.
 324          *
 325          * Because we already truncated tenpwr to +5000...-5000 inside
 326          * the exponent parsing code, this shouldn't happen though.
 327          */
 328     } else if (tenpwr > 0) {    /* mult = 5^+1 = 5.0 */
 329         mult[0] = (fp_limb)5 << (LIMB_BITS-3); /* 0xA000... */
 330         for (m = mult + 1; m < mult + MANT_LIMBS; m++) {
 331             *m = 0;
 332         }
 333         extratwos = 3;
 334     } else {
 335         extratwos = 0;
 336     }
 337     while (tenpwr) {
 338         dprintf(("loop=" MANT_FMT " * 2^%i * 5^%i (%i)\n", MANT_ARG,
 339                  twopwr, tenpwr, extratwos));
 340         if (tenpwr & 1) {
 341             dprintf(("mant*mult\n"));
 342             twopwr += extratwos + float_multiply(mant, mult);
 343         }
 344         dprintf(("mult*mult\n"));
 345         extratwos = extratwos * 2 + float_multiply(mult, mult);
 346         tenpwr >>= 1;
 347
 348         /*
 349          * In ANSI C, the result of right-shifting a signed integer is
 350          * considered implementation-specific. To ensure that the loop
 351          * terminates even if tenpwr was 1000...000b to begin with, we
 352          * manually clear the MSB, in case a 1 was shifted in.
 353          *
 354          * Because we already truncated tenpwr to +5000...-5000 inside
 355          * the exponent parsing code, this shouldn't matter; neverthe-
 356          * less it is the right thing to do here.
 357          */
 358         tenpwr &= (uint32_t) - 1 >> 1;
 359     }
 360
 361     /*
 362      * At this point, the 'mant' array contains the first frac-
 363      * tional places of a base-2^16 real number in [0.5,1) that
 364      * when multiplied by 2^twopwr gives X. Or it contains zero
 365      * of course. We are done.
 366      */
 367     *exponent = twopwr;
 368     return true;
 369 }
 370
 371 /*
 372  * ---------------------------------------------------------------------------
 373  *  operations of specific bits
 374  * ---------------------------------------------------------------------------
 375  */
 376
 377 /* Set a bit, using *bigendian* bit numbering (0 = MSB) */
 378 static void set_bit(fp_limb *mant, int bit)
 379 {
 380     mant[bit/LIMB_BITS] |= LIMB_TOP_BIT >> (bit & (LIMB_BITS-1));
 381 }
 382
 383 /* Test a single bit */
 384 static int test_bit(const fp_limb *mant, int bit)
 385 {
 386     return (mant[bit/LIMB_BITS] >> (~bit & (LIMB_BITS-1))) & 1;
 387 }
 388
 389 /* Report if the mantissa value is all zero */
 390 static bool is_zero(const fp_limb *mant)
 391 {
 392     int i;
 393
 394     for (i = 0; i < MANT_LIMBS; i++)
 395         if (mant[i])
 396             return false;
 397
 398     return true;
 399 }
 400
 401 /*
 402  * ---------------------------------------------------------------------------
 403  *  round a mantissa off after i words
 404  * ---------------------------------------------------------------------------
 405  */
 406
 407 #define ROUND_COLLECT_BITS                      \
 408     do {                                        \
 409         m = mant[i] & (2*bit-1);                \
 410         for (j = i+1; j < MANT_LIMBS; j++)      \
 411             m = m | mant[j];                    \
 412     } while (0)
 413
 414 #define ROUND_ABS_DOWN                          \
 415     do {                                        \
 416         mant[i] &= ~(bit-1);                    \
 417         for (j = i+1; j < MANT_LIMBS; j++)      \
 418             mant[j] = 0;                        \
 419         return false;                           \
 420     } while (0)
 421
 422 #define ROUND_ABS_UP                            \
 423     do {                                        \
 424         mant[i] = (mant[i] & ~(bit-1)) + bit;   \
 425         for (j = i+1; j < MANT_LIMBS; j++)      \
 426             mant[j] = 0;                        \
 427         while (i > 0 && !mant[i])               \
 428             ++mant[--i];                        \
 429         return !mant[0];                        \
 430     } while (0)
 431
 432 static bool ieee_round(bool minus, fp_limb *mant, int bits)
 433 {
 434     fp_limb m = 0;
 435     int32_t j;
 436     int i = bits / LIMB_BITS;
 437     int p = bits % LIMB_BITS;
 438     fp_limb bit = LIMB_TOP_BIT >> p;
 439
 440     if (rc == FLOAT_RC_NEAR) {
 441         if (mant[i] & bit) {
 442             mant[i] &= ~bit;
 443             ROUND_COLLECT_BITS;
 444             mant[i] |= bit;
 445             if (m) {
 446                 ROUND_ABS_UP;
 447             } else {
 448                 if (test_bit(mant, bits-1)) {
 449                     ROUND_ABS_UP;
 450                 } else {
 451                     ROUND_ABS_DOWN;
 452                 }
 453             }
 454         } else {
 455             ROUND_ABS_DOWN;
 456         }
 457     } else if (rc == FLOAT_RC_ZERO ||
 458                rc == (minus ? FLOAT_RC_UP : FLOAT_RC_DOWN)) {
 459         ROUND_ABS_DOWN;
 460     } else {
 461         /* rc == (minus ? FLOAT_RC_DOWN : FLOAT_RC_UP) */
 462         /* Round toward +/- infinity */
 463         ROUND_COLLECT_BITS;
 464         if (m) {
 465             ROUND_ABS_UP;
 466         } else {
 467             ROUND_ABS_DOWN;
 468         }
 469     }
 470     return false;
 471 }
 472
 473 /* Returns a value >= 16 if not a valid hex digit */
 474 static unsigned int hexval(char c)
 475 {
 476     unsigned int v = (unsigned char) c;
 477
 478     if (v >= '0' && v <= '9')
 479         return v - '0';
 480     else
 481         return (v|0x20) - 'a' + 10;
 482 }
 483
 484 /* Handle floating-point numbers with radix 2^bits and binary exponent */
 485 static bool ieee_flconvert_bin(const char *string, int bits,
 486                                fp_limb *mant, int32_t *exponent)
 487 {
 488     static const int log2tbl[16] =
 489         { -1, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3 };
 490     fp_limb mult[MANT_LIMBS + 1], *mp;
 491     int ms;
 492     int32_t twopwr;
 493     bool seendot, seendigit;
 494     unsigned char c;
 495     int radix = 1 << bits;
 496     fp_limb v;
 497
 498     twopwr = 0;
 499     seendot = seendigit = false;
 500     ms = 0;
 501     mp = NULL;
 502
 503     memset(mult, 0, sizeof mult);
 504
 505     while ((c = *string++) != '\0') {
 506         if (c == '.') {
 507             if (!seendot)
 508                 seendot = true;
 509             else {
 510                 error(ERR_NONFATAL|ERR_PASS1,
 511                       "too many periods in floating-point constant");
 512                 return false;
 513             }
 514         } else if ((v = hexval(c)) < (unsigned int)radix) {
 515             if (!seendigit && v) {
 516                 int l = log2tbl[v];
 517
 518                 seendigit = true;
 519                 mp = mult;
 520                 ms = (LIMB_BITS-1)-l;
 521
 522                 twopwr = seendot ? twopwr-bits+l : l+1-bits;
 523             }
 524
 525             if (seendigit) {
 526                 if (ms <= 0) {
 527                     *mp |= v >> -ms;
 528                     mp++;
 529                     if (mp > &mult[MANT_LIMBS])
 530                         mp = &mult[MANT_LIMBS]; /* Guard slot */
 531                     ms += LIMB_BITS;
 532                 }
 533                 *mp |= v << ms;
 534                 ms -= bits;
 535
 536                 if (!seendot)
 537                     twopwr += bits;
 538             } else {
 539                 if (seendot)
 540                     twopwr -= bits;
 541             }
 542         } else if (c == 'p' || c == 'P') {
 543             int32_t e;
 544             e = read_exponent(string, 20000);
 545             if (e == INT32_MAX)
 546                 return false;
 547             twopwr += e;
 548             break;
 549         } else if (c == '_') {
 550             /* ignore */
 551         } else {
 552             error(ERR_NONFATAL|ERR_PASS1,
 553                   "floating-point constant: `%c' is invalid character", c);
 554             return false;
 555         }
 556     }
 557
 558     if (!seendigit) {
 559         memset(mant, 0, sizeof mult); /* Zero */
 560         *exponent = 0;
 561     } else {
 562         memcpy(mant, mult, sizeof mult);
 563         *exponent = twopwr;
 564     }
 565
 566     return true;
 567 }
 568
 569 /*
 570  * Shift a mantissa to the right by i bits.
 571  */
 572 static void ieee_shr(fp_limb *mant, int i)
 573 {
 574     fp_limb n, m;
 575     int j = 0;
 576     int sr, sl, offs;
 577
 578     sr = i % LIMB_BITS; sl = LIMB_BITS-sr;
 579     offs = i/LIMB_BITS;
 580
 581     if (sr == 0) {
 582         if (offs)
 583             for (j = MANT_LIMBS-1; j >= offs; j--)
 584                 mant[j] = mant[j-offs];
 585     } else {
 586         n = mant[MANT_LIMBS-1-offs] >> sr;
 587         for (j = MANT_LIMBS-1; j > offs; j--) {
 588             m = mant[j-offs-1];
 589             mant[j] = (m << sl) | n;
 590             n = m >> sr;
 591         }
 592         mant[j--] = n;
 593     }
 594     while (j >= 0)
 595         mant[j--] = 0;
 596 }
 597
 598 /* Produce standard IEEE formats, with implicit or explicit integer
 599    bit; this makes the following assumptions:
 600
 601    - the sign bit is the MSB, followed by the exponent,
 602      followed by the integer bit if present.
 603    - the sign bit plus exponent fit in 16 bits.
 604    - the exponent bias is 2^(n-1)-1 for an n-bit exponent */
 605
 606 struct ieee_format {
 607     int bytes;
 608     int mantissa;               /* Fractional bits in the mantissa */
 609     int explicit;               /* Explicit integer */
 610     int exponent;               /* Bits in the exponent */
 611 };
 612
 613 /*
 614  * The 16- and 128-bit formats are expected to be in IEEE 754r.
 615  * AMD SSE5 uses the 16-bit format.
 616  *
 617  * The 32- and 64-bit formats are the original IEEE 754 formats.
 618  *
 619  * The 80-bit format is x87-specific, but widely used.
 620  *
 621  * The 8-bit format appears to be the consensus 8-bit floating-point
 622  * format.  It is apparently used in graphics applications.
 623  */
 624 static const struct ieee_format ieee_8   = {  1,   3, 0,  4 };
 625 static const struct ieee_format ieee_16  = {  2,  10, 0,  5 };
 626 static const struct ieee_format ieee_32  = {  4,  23, 0,  8 };
 627 static const struct ieee_format ieee_64  = {  8,  52, 0, 11 };
 628 static const struct ieee_format ieee_80  = { 10,  63, 1, 15 };
 629 static const struct ieee_format ieee_128 = { 16, 112, 0, 15 };
 630
 631 /* Types of values we can generate */
 632 enum floats {
 633     FL_ZERO,
 634     FL_DENORMAL,
 635     FL_NORMAL,
 636     FL_INFINITY,
 637     FL_QNAN,
 638     FL_SNAN
 639 };
 640
 641 static int to_float(const char *str, int s, uint8_t * result,
 642                     const struct ieee_format *fmt)
 643 {
 644     fp_limb mant[MANT_LIMBS];
 645     int32_t exponent = 0;
 646     int32_t expmax = 1 << (fmt->exponent - 1);
 647     fp_limb one_mask = LIMB_TOP_BIT >>
 648         ((fmt->exponent+fmt->explicit) % LIMB_BITS);
 649     int one_pos = (fmt->exponent+fmt->explicit)/LIMB_BITS;
 650     int i;
 651     int shift;
 652     enum floats type;
 653     bool ok;
 654     bool minus = s < 0;
 655     int bits = fmt->bytes * 8;
 656
 657     if (str[0] == '_') {
 658         /* Special tokens */
 659
 660         switch (str[2]) {
 661         case 'n':              /* __nan__ */
 662         case 'N':
 663         case 'q':              /* __qnan__ */
 664         case 'Q':
 665             type = FL_QNAN;
 666             break;
 667         case 's':              /* __snan__ */
 668         case 'S':
 669             type = FL_SNAN;
 670             break;
 671         case 'i':              /* __infinity__ */
 672         case 'I':
 673             type = FL_INFINITY;
 674             break;
 675         default:
 676             error(ERR_NONFATAL|ERR_PASS1,
 677                   "internal error: unknown FP constant token `%s'\n", str);
 678             type = FL_QNAN;
 679             break;
 680         }
 681     } else {
 682         if (str[0] == '0') {
 683             switch (str[1]) {
 684             case 'x': case 'X':
 685             case 'h': case 'H':
 686                 ok = ieee_flconvert_bin(str+2, 4, mant, &exponent);
 687                 break;
 688             case 'o': case 'O':
 689             case 'q': case 'Q':
 690                 ok = ieee_flconvert_bin(str+2, 3, mant, &exponent);
 691                 break;
 692             case 'b': case 'B':
 693             case 'y': case 'Y':
 694                 ok = ieee_flconvert_bin(str+2, 1, mant, &exponent);
 695                 break;
 696             case 'd': case 'D':
 697             case 't': case 'T':
 698                 ok = ieee_flconvert(str+2, mant, &exponent);
 699                 break;
 700             default:
 701                 /* Leading zero was just a zero? */
 702                 ok = ieee_flconvert(str, mant, &exponent);
 703                 break;
 704             }
 705         } else if (str[0] == '$') {
 706             ok = ieee_flconvert_bin(str+1, 4, mant, &exponent);
 707         } else {
 708             ok = ieee_flconvert(str, mant, &exponent);
 709         }
 710
 711         if (!ok) {
 712             type = FL_QNAN;
 713         } else if (mant[0] & LIMB_TOP_BIT) {
 714             /*
 715              * Non-zero.
 716              */
 717             exponent--;
 718             if (exponent >= 2 - expmax && exponent <= expmax) {
 719                 type = FL_NORMAL;
 720             } else if (exponent > 0) {
 721                 if (pass0 == 1)
 722                     error(ERR_WARNING|ERR_WARN_FL_OVERFLOW|ERR_PASS1,
 723                           "overflow in floating-point constant");
 724                 type = FL_INFINITY;
 725             } else {
 726                 /* underflow or denormal; the denormal code handles
 727                    actual underflow. */
 728                 type = FL_DENORMAL;
 729             }
 730         } else {
 731             /* Zero */
 732             type = FL_ZERO;
 733         }
 734     }
 735
 736     switch (type) {
 737     case FL_ZERO:
 738     zero:
 739         memset(mant, 0, sizeof mant);
 740         break;
 741
 742     case FL_DENORMAL:
 743     {
 744         shift = -(exponent + expmax - 2 - fmt->exponent)
 745             + fmt->explicit;
 746         ieee_shr(mant, shift);
 747         ieee_round(minus, mant, bits);
 748         if (mant[one_pos] & one_mask) {
 749             /* One's position is set, we rounded up into normal range */
 750             exponent = 1;
 751             if (!fmt->explicit)
 752                 mant[one_pos] &= ~one_mask;     /* remove explicit one */
 753             mant[0] |= exponent << (LIMB_BITS-1 - fmt->exponent);
 754         } else {
 755             if (daz || is_zero(mant)) {
 756                 /* Flush denormals to zero */
 757                 error(ERR_WARNING|ERR_WARN_FL_UNDERFLOW|ERR_PASS1,
 758                       "underflow in floating-point constant");
 759                 goto zero;
 760             } else {
 761                 error(ERR_WARNING|ERR_WARN_FL_DENORM|ERR_PASS1,
 762                       "denormal floating-point constant");
 763             }
 764         }
 765         break;
 766     }
 767
 768     case FL_NORMAL:
 769         exponent += expmax - 1;
 770         ieee_shr(mant, fmt->exponent+fmt->explicit);
 771         ieee_round(minus, mant, bits);
 772         /* did we scale up by one? */
 773         if (test_bit(mant, fmt->exponent+fmt->explicit-1)) {
 774             ieee_shr(mant, 1);
 775             exponent++;
 776             if (exponent >= (expmax << 1)-1) {
 777                     error(ERR_WARNING|ERR_WARN_FL_OVERFLOW|ERR_PASS1,
 778                           "overflow in floating-point constant");
 779                 type = FL_INFINITY;
 780                 goto overflow;
 781             }
 782         }
 783
 784         if (!fmt->explicit)
 785             mant[one_pos] &= ~one_mask; /* remove explicit one */
 786         mant[0] |= exponent << (LIMB_BITS-1 - fmt->exponent);
 787         break;
 788
 789     case FL_INFINITY:
 790     case FL_QNAN:
 791     case FL_SNAN:
 792     overflow:
 793         memset(mant, 0, sizeof mant);
 794         mant[0] = (((fp_limb)1 << fmt->exponent)-1)
 795             << (LIMB_BITS-1 - fmt->exponent);
 796         if (fmt->explicit)
 797             mant[one_pos] |= one_mask;
 798         if (type == FL_QNAN)
 799             set_bit(mant, fmt->exponent+fmt->explicit+1);
 800         else if (type == FL_SNAN)
 801             set_bit(mant, fmt->exponent+fmt->explicit+fmt->mantissa);
 802         break;
 803     }
 804
 805     mant[0] |= minus ? LIMB_TOP_BIT : 0;
 806
 807     for (i = fmt->bytes - 1; i >= 0; i--)
 808         *result++ = mant[i/LIMB_BYTES] >> (((LIMB_BYTES-1)-(i%LIMB_BYTES))*8);
 809
 810     return 1;                   /* success */
 811 }
 812
 813 int float_const(const char *number, int sign, uint8_t * result,
 814                 int bytes, efunc err)
 815 {
 816     error = err;
 817
 818     switch (bytes) {
 819     case 1:
 820         return to_float(number, sign, result, &ieee_8);
 821     case 2:
 822         return to_float(number, sign, result, &ieee_16);
 823     case 4:
 824         return to_float(number, sign, result, &ieee_32);
 825     case 8:
 826         return to_float(number, sign, result, &ieee_64);
 827     case 10:
 828         return to_float(number, sign, result, &ieee_80);
 829     case 16:
 830         return to_float(number, sign, result, &ieee_128);
 831     default:
 832         error(ERR_PANIC, "strange value %d passed to float_const", bytes);
 833         return 0;
 834     }
 835 }
 836
 837 /* Set floating-point options */
 838 int float_option(const char *option)
 839 {
 840     if (!nasm_stricmp(option, "daz")) {
 841         daz = true;
 842         return 0;
 843     } else if (!nasm_stricmp(option, "nodaz")) {
 844         daz = false;
 845         return 0;
 846     } else if (!nasm_stricmp(option, "near")) {
 847         rc = FLOAT_RC_NEAR;
 848         return 0;
 849     } else if (!nasm_stricmp(option, "down")) {
 850         rc = FLOAT_RC_DOWN;
 851         return 0;
 852     } else if (!nasm_stricmp(option, "up")) {
 853         rc = FLOAT_RC_UP;
 854         return 0;
 855     } else if (!nasm_stricmp(option, "zero")) {
 856         rc = FLOAT_RC_ZERO;
 857         return 0;
 858     } else if (!nasm_stricmp(option, "default")) {
 859         rc = FLOAT_RC_NEAR;
 860         daz = false;
 861         return 0;
 862     } else {
 863         return -1;              /* Unknown option */
 864     }
 865 }