third-party/mruby/mrbgems/mruby-sprintf/src/sprintf.c

   1 /*
   2 ** sprintf.c - Kernel.#sprintf
   3 **
   4 ** See Copyright Notice in mruby.h
   5 */
   6
   7 #include <mruby.h>
   8 #include <limits.h>
   9 #include <string.h>
  10 #include <mruby/string.h>
  11 #include <mruby/hash.h>
  12 #include <mruby/numeric.h>
  13 #ifndef MRB_WITHOUT_FLOAT
  14 #include <math.h>
  15 #endif
  16 #include <ctype.h>
  17
  18 #define BIT_DIGITS(N)   (((N)*146)/485 + 1)  /* log2(10) =~ 146/485 */
  19 #define BITSPERDIG MRB_INT_BIT
  20 #define EXTENDSIGN(n, l) (((~0U << (n)) >> (((n)*(l)) % BITSPERDIG)) & ~(~0U << (n)))
  21
  22 mrb_value mrb_str_format(mrb_state *, mrb_int, const mrb_value *, mrb_value);
  23 #ifndef MRB_WITHOUT_FLOAT
  24 static void fmt_setup(char*,size_t,int,int,mrb_int,mrb_int);
  25 #endif
  26
  27 static char*
  28 remove_sign_bits(char *str, int base)
  29 {
  30   char *t;
  31
  32   t = str;
  33   if (base == 16) {
  34     while (*t == 'f') {
  35       t++;
  36     }
  37   }
  38   else if (base == 8) {
  39     *t |= EXTENDSIGN(3, strlen(t));
  40     while (*t == '7') {
  41       t++;
  42     }
  43   }
  44   else if (base == 2) {
  45     while (*t == '1') {
  46       t++;
  47     }
  48   }
  49
  50   return t;
  51 }
  52
  53 static char
  54 sign_bits(int base, const char *p)
  55 {
  56   char c;
  57
  58   switch (base) {
  59   case 16:
  60     if (*p == 'X') c = 'F';
  61     else c = 'f';
  62     break;
  63   case 8:
  64     c = '7'; break;
  65   case 2:
  66     c = '1'; break;
  67   default:
  68     c = '.'; break;
  69   }
  70   return c;
  71 }
  72
  73 static mrb_value
  74 mrb_fix2binstr(mrb_state *mrb, mrb_value x, int base)
  75 {
  76   char buf[66], *b = buf + sizeof buf;
  77   mrb_int num = mrb_fixnum(x);
  78   uint64_t val = (uint64_t)num;
  79   char d;
  80
  81   if (base != 2) {
  82     mrb_raisef(mrb, E_ARGUMENT_ERROR, "invalid radix %d", base);
  83   }
  84   if (val == 0) {
  85     return mrb_str_new_lit(mrb, "0");
  86   }
  87   *--b = '\0';
  88   do {
  89     *--b = mrb_digitmap[(int)(val % base)];
  90   } while (val /= base);
  91
  92   if (num < 0) {
  93     b = remove_sign_bits(b, base);
  94     switch (base) {
  95     case 16: d = 'f'; break;
  96     case 8:  d = '7'; break;
  97     case 2:  d = '1'; break;
  98     default: d = 0;   break;
  99     }
 100
 101     if (d && *b != d) {
 102       *--b = d;
 103     }
 104   }
 105
 106   return mrb_str_new_cstr(mrb, b);
 107 }
 108
 109 #define FNONE  0
 110 #define FSHARP 1
 111 #define FMINUS 2
 112 #define FPLUS  4
 113 #define FZERO  8
 114 #define FSPACE 16
 115 #define FWIDTH 32
 116 #define FPREC  64
 117 #define FPREC0 128
 118
 119 #define CHECK(l) do {\
 120   while ((l) >= bsiz - blen) {\
 121     if (bsiz > MRB_INT_MAX/2) mrb_raise(mrb, E_ARGUMENT_ERROR, "too big specifier"); \
 122     bsiz*=2;\
 123   }\
 124   mrb_str_resize(mrb, result, bsiz);\
 125   buf = RSTRING_PTR(result);\
 126 } while (0)
 127
 128 #define PUSH(s, l) do { \
 129   CHECK(l);\
 130   memcpy(&buf[blen], s, l);\
 131   blen += (mrb_int)(l);\
 132 } while (0)
 133
 134 #define FILL(c, l) do { \
 135   CHECK(l);\
 136   memset(&buf[blen], c, l);\
 137   blen += (l);\
 138 } while (0)
 139
 140 static void
 141 check_next_arg(mrb_state *mrb, int posarg, int nextarg)
 142 {
 143   switch (posarg) {
 144   case -1:
 145     mrb_raisef(mrb, E_ARGUMENT_ERROR, "unnumbered(%d) mixed with numbered", nextarg);
 146     break;
 147   case -2:
 148     mrb_raisef(mrb, E_ARGUMENT_ERROR, "unnumbered(%d) mixed with named", nextarg);
 149     break;
 150   default:
 151     break;
 152   }
 153 }
 154
 155 static void
 156 check_pos_arg(mrb_state *mrb, int posarg, mrb_int n)
 157 {
 158   if (posarg > 0) {
 159     mrb_raisef(mrb, E_ARGUMENT_ERROR, "numbered(%i) after unnumbered(%d)",
 160                n, posarg);
 161   }
 162   if (posarg == -2) {
 163     mrb_raisef(mrb, E_ARGUMENT_ERROR, "numbered(%i) after named", n);
 164   }
 165   if (n < 1) {
 166     mrb_raisef(mrb, E_ARGUMENT_ERROR, "invalid index - %i$", n);
 167   }
 168 }
 169
 170 static void
 171 check_name_arg(mrb_state *mrb, int posarg, const char *name, size_t len)
 172 {
 173   if (posarg > 0) {
 174     mrb_raisef(mrb, E_ARGUMENT_ERROR, "named%l after unnumbered(%d)",
 175                name, len, posarg);
 176   }
 177   if (posarg == -1) {
 178     mrb_raisef(mrb, E_ARGUMENT_ERROR, "named%l after numbered", name, len);
 179   }
 180 }
 181
 182 #define GETNEXTARG() (\
 183   check_next_arg(mrb, posarg, nextarg),\
 184   (posarg = nextarg++, GETNTHARG(posarg)))
 185
 186 #define GETARG() (!mrb_undef_p(nextvalue) ? nextvalue : GETNEXTARG())
 187
 188 #define GETPOSARG(n) (\
 189   check_pos_arg(mrb, posarg, n),\
 190   (posarg = -1, GETNTHARG(n)))
 191
 192 #define GETNTHARG(nth) \
 193   ((nth >= argc) ? (mrb_raise(mrb, E_ARGUMENT_ERROR, "too few arguments"), mrb_undef_value()) : argv[nth])
 194
 195 #define GETNAMEARG(id, name, len) (\
 196   check_name_arg(mrb, posarg, name, len),\
 197   (posarg = -2, mrb_hash_fetch(mrb, get_hash(mrb, &hash, argc, argv), id, mrb_undef_value())))
 198
 199 #define GETNUM(n, val) \
 200   for (; p < end && ISDIGIT(*p); p++) {\
 201     if (n > (MRB_INT_MAX - (*p - '0'))/10) {\
 202       mrb_raise(mrb, E_ARGUMENT_ERROR, #val " too big"); \
 203     } \
 204     n = 10 * n + (*p - '0'); \
 205   } \
 206   if (p >= end) { \
 207     mrb_raise(mrb, E_ARGUMENT_ERROR, "malformed format string - %*[0-9]"); \
 208   }
 209
 210 #define GETASTER(num) do { \
 211   mrb_value tmp_v; \
 212   t = p++; \
 213   n = 0; \
 214   GETNUM(n, val); \
 215   if (*p == '$') { \
 216     tmp_v = GETPOSARG(n); \
 217   } \
 218   else { \
 219     tmp_v = GETNEXTARG(); \
 220     p = t; \
 221   } \
 222   num = mrb_int(mrb, tmp_v); \
 223 } while (0)
 224
 225 static mrb_value
 226 get_hash(mrb_state *mrb, mrb_value *hash, mrb_int argc, const mrb_value *argv)
 227 {
 228   mrb_value tmp;
 229
 230   if (!mrb_undef_p(*hash)) return *hash;
 231   if (argc != 2) {
 232     mrb_raise(mrb, E_ARGUMENT_ERROR, "one hash required");
 233   }
 234   tmp = mrb_check_hash_type(mrb, argv[1]);
 235   if (mrb_nil_p(tmp)) {
 236     mrb_raise(mrb, E_ARGUMENT_ERROR, "one hash required");
 237   }
 238   return (*hash = tmp);
 239 }
 240
 241 /*
 242  *  call-seq:
 243  *     format(format_string [, arguments...] )   -> string
 244  *     sprintf(format_string [, arguments...] )  -> string
 245  *
 246  *  Returns the string resulting from applying <i>format_string</i> to
 247  *  any additional arguments.  Within the format string, any characters
 248  *  other than format sequences are copied to the result.
 249  *
 250  *  The syntax of a format sequence is follows.
 251  *
 252  *    %[flags][width][.precision]type
 253  *
 254  *  A format
 255  *  sequence consists of a percent sign, followed by optional flags,
 256  *  width, and precision indicators, then terminated with a field type
 257  *  character.  The field type controls how the corresponding
 258  *  <code>sprintf</code> argument is to be interpreted, while the flags
 259  *  modify that interpretation.
 260  *
 261  *  The field type characters are:
 262  *
 263  *      Field |  Integer Format
 264  *      ------+--------------------------------------------------------------
 265  *        b   | Convert argument as a binary number.
 266  *            | Negative numbers will be displayed as a two's complement
 267  *            | prefixed with '..1'.
 268  *        B   | Equivalent to 'b', but uses an uppercase 0B for prefix
 269  *            | in the alternative format by #.
 270  *        d   | Convert argument as a decimal number.
 271  *        i   | Identical to 'd'.
 272  *        o   | Convert argument as an octal number.
 273  *            | Negative numbers will be displayed as a two's complement
 274  *            | prefixed with '..7'.
 275  *        u   | Identical to 'd'.
 276  *        x   | Convert argument as a hexadecimal number.
 277  *            | Negative numbers will be displayed as a two's complement
 278  *            | prefixed with '..f' (representing an infinite string of
 279  *            | leading 'ff's).
 280  *        X   | Equivalent to 'x', but uses uppercase letters.
 281  *
 282  *      Field |  Float Format
 283  *      ------+--------------------------------------------------------------
 284  *        e   | Convert floating point argument into exponential notation
 285  *            | with one digit before the decimal point as [-]d.dddddde[+-]dd.
 286  *            | The precision specifies the number of digits after the decimal
 287  *            | point (defaulting to six).
 288  *        E   | Equivalent to 'e', but uses an uppercase E to indicate
 289  *            | the exponent.
 290  *        f   | Convert floating point argument as [-]ddd.dddddd,
 291  *            | where the precision specifies the number of digits after
 292  *            | the decimal point.
 293  *        g   | Convert a floating point number using exponential form
 294  *            | if the exponent is less than -4 or greater than or
 295  *            | equal to the precision, or in dd.dddd form otherwise.
 296  *            | The precision specifies the number of significant digits.
 297  *        G   | Equivalent to 'g', but use an uppercase 'E' in exponent form.
 298  *        a   | Convert floating point argument as [-]0xh.hhhhp[+-]dd,
 299  *            | which is consisted from optional sign, "0x", fraction part
 300  *            | as hexadecimal, "p", and exponential part as decimal.
 301  *        A   | Equivalent to 'a', but use uppercase 'X' and 'P'.
 302  *
 303  *      Field |  Other Format
 304  *      ------+--------------------------------------------------------------
 305  *        c   | Argument is the numeric code for a single character or
 306  *            | a single character string itself.
 307  *        p   | The valuing of argument.inspect.
 308  *        s   | Argument is a string to be substituted.  If the format
 309  *            | sequence contains a precision, at most that many characters
 310  *            | will be copied.
 311  *        %   | A percent sign itself will be displayed.  No argument taken.
 312  *
 313  *  The flags modifies the behavior of the formats.
 314  *  The flag characters are:
 315  *
 316  *    Flag     | Applies to    | Meaning
 317  *    ---------+---------------+-----------------------------------------
 318  *    space    | bBdiouxX      | Leave a space at the start of
 319  *             | aAeEfgG       | non-negative numbers.
 320  *             | (numeric fmt) | For 'o', 'x', 'X', 'b' and 'B', use
 321  *             |               | a minus sign with absolute value for
 322  *             |               | negative values.
 323  *    ---------+---------------+-----------------------------------------
 324  *    (digit)$ | all           | Specifies the absolute argument number
 325  *             |               | for this field.  Absolute and relative
 326  *             |               | argument numbers cannot be mixed in a
 327  *             |               | sprintf string.
 328  *    ---------+---------------+-----------------------------------------
 329  *     #       | bBoxX         | Use an alternative format.
 330  *             | aAeEfgG       | For the conversions 'o', increase the precision
 331  *             |               | until the first digit will be '0' if
 332  *             |               | it is not formatted as complements.
 333  *             |               | For the conversions 'x', 'X', 'b' and 'B'
 334  *             |               | on non-zero, prefix the result with "0x",
 335  *             |               | "0X", "0b" and "0B", respectively.
 336  *             |               | For 'a', 'A', 'e', 'E', 'f', 'g', and 'G',
 337  *             |               | force a decimal point to be added,
 338  *             |               | even if no digits follow.
 339  *             |               | For 'g' and 'G', do not remove trailing zeros.
 340  *    ---------+---------------+-----------------------------------------
 341  *    +        | bBdiouxX      | Add a leading plus sign to non-negative
 342  *             | aAeEfgG       | numbers.
 343  *             | (numeric fmt) | For 'o', 'x', 'X', 'b' and 'B', use
 344  *             |               | a minus sign with absolute value for
 345  *             |               | negative values.
 346  *    ---------+---------------+-----------------------------------------
 347  *    -        | all           | Left-justify the result of this conversion.
 348  *    ---------+---------------+-----------------------------------------
 349  *    0 (zero) | bBdiouxX      | Pad with zeros, not spaces.
 350  *             | aAeEfgG       | For 'o', 'x', 'X', 'b' and 'B', radix-1
 351  *             | (numeric fmt) | is used for negative numbers formatted as
 352  *             |               | complements.
 353  *    ---------+---------------+-----------------------------------------
 354  *    *        | all           | Use the next argument as the field width.
 355  *             |               | If negative, left-justify the result. If the
 356  *             |               | asterisk is followed by a number and a dollar
 357  *             |               | sign, use the indicated argument as the width.
 358  *
 359  *  Examples of flags:
 360  *
 361  *   # '+' and space flag specifies the sign of non-negative numbers.
 362  *   sprintf("%d", 123)  #=> "123"
 363  *   sprintf("%+d", 123) #=> "+123"
 364  *   sprintf("% d", 123) #=> " 123"
 365  *
 366  *   # '#' flag for 'o' increases number of digits to show '0'.
 367  *   # '+' and space flag changes format of negative numbers.
 368  *   sprintf("%o", 123)   #=> "173"
 369  *   sprintf("%#o", 123)  #=> "0173"
 370  *   sprintf("%+o", -123) #=> "-173"
 371  *   sprintf("%o", -123)  #=> "..7605"
 372  *   sprintf("%#o", -123) #=> "..7605"
 373  *
 374  *   # '#' flag for 'x' add a prefix '0x' for non-zero numbers.
 375  *   # '+' and space flag disables complements for negative numbers.
 376  *   sprintf("%x", 123)   #=> "7b"
 377  *   sprintf("%#x", 123)  #=> "0x7b"
 378  *   sprintf("%+x", -123) #=> "-7b"
 379  *   sprintf("%x", -123)  #=> "..f85"
 380  *   sprintf("%#x", -123) #=> "0x..f85"
 381  *   sprintf("%#x", 0)    #=> "0"
 382  *
 383  *   # '#' for 'X' uses the prefix '0X'.
 384  *   sprintf("%X", 123)  #=> "7B"
 385  *   sprintf("%#X", 123) #=> "0X7B"
 386  *
 387  *   # '#' flag for 'b' add a prefix '0b' for non-zero numbers.
 388  *   # '+' and space flag disables complements for negative numbers.
 389  *   sprintf("%b", 123)   #=> "1111011"
 390  *   sprintf("%#b", 123)  #=> "0b1111011"
 391  *   sprintf("%+b", -123) #=> "-1111011"
 392  *   sprintf("%b", -123)  #=> "..10000101"
 393  *   sprintf("%#b", -123) #=> "0b..10000101"
 394  *   sprintf("%#b", 0)    #=> "0"
 395  *
 396  *   # '#' for 'B' uses the prefix '0B'.
 397  *   sprintf("%B", 123)  #=> "1111011"
 398  *   sprintf("%#B", 123) #=> "0B1111011"
 399  *
 400  *   # '#' for 'e' forces to show the decimal point.
 401  *   sprintf("%.0e", 1)  #=> "1e+00"
 402  *   sprintf("%#.0e", 1) #=> "1.e+00"
 403  *
 404  *   # '#' for 'f' forces to show the decimal point.
 405  *   sprintf("%.0f", 1234)  #=> "1234"
 406  *   sprintf("%#.0f", 1234) #=> "1234."
 407  *
 408  *   # '#' for 'g' forces to show the decimal point.
 409  *   # It also disables stripping lowest zeros.
 410  *   sprintf("%g", 123.4)   #=> "123.4"
 411  *   sprintf("%#g", 123.4)  #=> "123.400"
 412  *   sprintf("%g", 123456)  #=> "123456"
 413  *   sprintf("%#g", 123456) #=> "123456."
 414  *
 415  *  The field width is an optional integer, followed optionally by a
 416  *  period and a precision.  The width specifies the minimum number of
 417  *  characters that will be written to the result for this field.
 418  *
 419  *  Examples of width:
 420  *
 421  *   # padding is done by spaces,       width=20
 422  *   # 0 or radix-1.             <------------------>
 423  *   sprintf("%20d", 123)   #=> "                 123"
 424  *   sprintf("%+20d", 123)  #=> "                +123"
 425  *   sprintf("%020d", 123)  #=> "00000000000000000123"
 426  *   sprintf("%+020d", 123) #=> "+0000000000000000123"
 427  *   sprintf("% 020d", 123) #=> " 0000000000000000123"
 428  *   sprintf("%-20d", 123)  #=> "123                 "
 429  *   sprintf("%-+20d", 123) #=> "+123                "
 430  *   sprintf("%- 20d", 123) #=> " 123                "
 431  *   sprintf("%020x", -123) #=> "..ffffffffffffffff85"
 432  *
 433  *  For
 434  *  numeric fields, the precision controls the number of decimal places
 435  *  displayed.  For string fields, the precision determines the maximum
 436  *  number of characters to be copied from the string.  (Thus, the format
 437  *  sequence <code>%10.10s</code> will always contribute exactly ten
 438  *  characters to the result.)
 439  *
 440  *  Examples of precisions:
 441  *
 442  *   # precision for 'd', 'o', 'x' and 'b' is
 443  *   # minimum number of digits               <------>
 444  *   sprintf("%20.8d", 123)  #=> "            00000123"
 445  *   sprintf("%20.8o", 123)  #=> "            00000173"
 446  *   sprintf("%20.8x", 123)  #=> "            0000007b"
 447  *   sprintf("%20.8b", 123)  #=> "            01111011"
 448  *   sprintf("%20.8d", -123) #=> "           -00000123"
 449  *   sprintf("%20.8o", -123) #=> "            ..777605"
 450  *   sprintf("%20.8x", -123) #=> "            ..ffff85"
 451  *   sprintf("%20.8b", -11)  #=> "            ..110101"
 452  *
 453  *   # "0x" and "0b" for '#x' and '#b' is not counted for
 454  *   # precision but "0" for '#o' is counted.  <------>
 455  *   sprintf("%#20.8d", 123)  #=> "            00000123"
 456  *   sprintf("%#20.8o", 123)  #=> "            00000173"
 457  *   sprintf("%#20.8x", 123)  #=> "          0x0000007b"
 458  *   sprintf("%#20.8b", 123)  #=> "          0b01111011"
 459  *   sprintf("%#20.8d", -123) #=> "           -00000123"
 460  *   sprintf("%#20.8o", -123) #=> "            ..777605"
 461  *   sprintf("%#20.8x", -123) #=> "          0x..ffff85"
 462  *   sprintf("%#20.8b", -11)  #=> "          0b..110101"
 463  *
 464  *   # precision for 'e' is number of
 465  *   # digits after the decimal point           <------>
 466  *   sprintf("%20.8e", 1234.56789) #=> "      1.23456789e+03"
 467  *
 468  *   # precision for 'f' is number of
 469  *   # digits after the decimal point               <------>
 470  *   sprintf("%20.8f", 1234.56789) #=> "       1234.56789000"
 471  *
 472  *   # precision for 'g' is number of
 473  *   # significant digits                          <------->
 474  *   sprintf("%20.8g", 1234.56789) #=> "           1234.5679"
 475  *
 476  *   #                                         <------->
 477  *   sprintf("%20.8g", 123456789)  #=> "       1.2345679e+08"
 478  *
 479  *   # precision for 's' is
 480  *   # maximum number of characters                    <------>
 481  *   sprintf("%20.8s", "string test") #=> "            string t"
 482  *
 483  *  Examples:
 484  *
 485  *     sprintf("%d %04x", 123, 123)               #=> "123 007b"
 486  *     sprintf("%08b '%4s'", 123, 123)            #=> "01111011 ' 123'"
 487  *     sprintf("%1$*2$s %2$d %1$s", "hello", 8)   #=> "   hello 8 hello"
 488  *     sprintf("%1$*2$s %2$d", "hello", -8)       #=> "hello    -8"
 489  *     sprintf("%+g:% g:%-g", 1.23, 1.23, 1.23)   #=> "+1.23: 1.23:1.23"
 490  *     sprintf("%u", -123)                        #=> "-123"
 491  *
 492  *  For more complex formatting, Ruby supports a reference by name.
 493  *  %<name>s style uses format style, but %{name} style doesn't.
 494  *
 495  *  Exapmles:
 496  *    sprintf("%<foo>d : %<bar>f", { :foo => 1, :bar => 2 })
 497  *      #=> 1 : 2.000000
 498  *    sprintf("%{foo}f", { :foo => 1 })
 499  *      # => "1f"
 500  */
 501
 502 mrb_value
 503 mrb_f_sprintf(mrb_state *mrb, mrb_value obj)
 504 {
 505   mrb_int argc;
 506   mrb_value *argv;
 507
 508   mrb_get_args(mrb, "*", &argv, &argc);
 509
 510   if (argc <= 0) {
 511     mrb_raise(mrb, E_ARGUMENT_ERROR, "too few arguments");
 512     return mrb_nil_value();
 513   }
 514   else {
 515     return mrb_str_format(mrb, argc - 1, argv + 1, argv[0]);
 516   }
 517 }
 518
 519 static int
 520 mrb_int2str(char *buf, size_t len, mrb_int n)
 521 {
 522 #ifdef MRB_DISABLE_STDIO
 523   char *bufend = buf + len;
 524   char *p = bufend - 1;
 525
 526   if (len < 1) return -1;
 527
 528   *p -- = '\0';
 529   len --;
 530
 531   if (n < 0) {
 532     if (len < 1) return -1;
 533
 534     *p -- = '-';
 535     len --;
 536     n = -n;
 537   }
 538
 539   if (n > 0) {
 540     for (; n > 0; len --, n /= 10) {
 541       if (len < 1) return -1;
 542
 543       *p -- = '0' + (n % 10);
 544     }
 545     p ++;
 546   }
 547   else if (len > 0) {
 548     *p = '0';
 549     len --;
 550   }
 551   else {
 552     return -1;
 553   }
 554
 555   memmove(buf, p, bufend - p);
 556
 557   return bufend - p - 1;
 558 #else
 559   return snprintf(buf, len, "%" MRB_PRId, n);
 560 #endif /* MRB_DISABLE_STDIO */
 561 }
 562
 563 mrb_value
 564 mrb_str_format(mrb_state *mrb, mrb_int argc, const mrb_value *argv, mrb_value fmt)
 565 {
 566   const char *p, *end;
 567   char *buf;
 568   mrb_int blen;
 569   mrb_int bsiz;
 570   mrb_value result;
 571   mrb_int n;
 572   mrb_int width;
 573   mrb_int prec;
 574   int nextarg = 1;
 575   int posarg = 0;
 576   mrb_value nextvalue;
 577   mrb_value str;
 578   mrb_value hash = mrb_undef_value();
 579
 580 #define CHECK_FOR_WIDTH(f)                                                  \
 581   if ((f) & FWIDTH) {                                                       \
 582     mrb_raise(mrb, E_ARGUMENT_ERROR, "width given twice");         \
 583   }                                                                         \
 584   if ((f) & FPREC0) {                                                       \
 585     mrb_raise(mrb, E_ARGUMENT_ERROR, "width after precision");     \
 586   }
 587 #define CHECK_FOR_FLAGS(f)                                                  \
 588   if ((f) & FWIDTH) {                                                       \
 589     mrb_raise(mrb, E_ARGUMENT_ERROR, "flag after width");          \
 590   }                                                                         \
 591   if ((f) & FPREC0) {                                                       \
 592     mrb_raise(mrb, E_ARGUMENT_ERROR, "flag after precision");      \
 593   }
 594
 595   ++argc;
 596   --argv;
 597   mrb_to_str(mrb, fmt);
 598   p = RSTRING_PTR(fmt);
 599   end = p + RSTRING_LEN(fmt);
 600   blen = 0;
 601   bsiz = 120;
 602   result = mrb_str_new_capa(mrb, bsiz);
 603   buf = RSTRING_PTR(result);
 604   memset(buf, 0, bsiz);
 605
 606   for (; p < end; p++) {
 607     const char *t;
 608     mrb_sym id = 0;
 609     int flags = FNONE;
 610
 611     for (t = p; t < end && *t != '%'; t++) ;
 612     if (t + 1 == end) ++t;
 613     PUSH(p, t - p);
 614     if (t >= end)
 615       goto sprint_exit; /* end of fmt string */
 616
 617     p = t + 1;    /* skip '%' */
 618
 619     width = prec = -1;
 620     nextvalue = mrb_undef_value();
 621
 622 retry:
 623     switch (*p) {
 624       default:
 625         mrb_raisef(mrb, E_ARGUMENT_ERROR, "malformed format string - %%%c", *p);
 626         break;
 627
 628       case ' ':
 629         CHECK_FOR_FLAGS(flags);
 630         flags |= FSPACE;
 631         p++;
 632         goto retry;
 633
 634       case '#':
 635         CHECK_FOR_FLAGS(flags);
 636         flags |= FSHARP;
 637         p++;
 638         goto retry;
 639
 640       case '+':
 641         CHECK_FOR_FLAGS(flags);
 642         flags |= FPLUS;
 643         p++;
 644         goto retry;
 645
 646       case '-':
 647         CHECK_FOR_FLAGS(flags);
 648         flags |= FMINUS;
 649         p++;
 650         goto retry;
 651
 652       case '0':
 653         CHECK_FOR_FLAGS(flags);
 654         flags |= FZERO;
 655         p++;
 656         goto retry;
 657
 658       case '1': case '2': case '3': case '4':
 659       case '5': case '6': case '7': case '8': case '9':
 660         n = 0;
 661         GETNUM(n, width);
 662         if (*p == '$') {
 663           if (!mrb_undef_p(nextvalue)) {
 664             mrb_raisef(mrb, E_ARGUMENT_ERROR, "value given twice - %i$", n);
 665           }
 666           nextvalue = GETPOSARG(n);
 667           p++;
 668           goto retry;
 669         }
 670         CHECK_FOR_WIDTH(flags);
 671         width = n;
 672         flags |= FWIDTH;
 673         goto retry;
 674
 675       case '<':
 676       case '{': {
 677         const char *start = p;
 678         char term = (*p == '<') ? '>' : '}';
 679         mrb_value symname;
 680
 681         for (; p < end && *p != term; )
 682           p++;
 683         if (id) {
 684           mrb_raisef(mrb, E_ARGUMENT_ERROR, "name%l after <%n>",
 685                      start, p - start + 1, id);
 686         }
 687         symname = mrb_str_new(mrb, start + 1, p - start - 1);
 688         id = mrb_intern_str(mrb, symname);
 689         nextvalue = GETNAMEARG(mrb_symbol_value(id), start, p - start + 1);
 690         if (mrb_undef_p(nextvalue)) {
 691           mrb_raisef(mrb, E_KEY_ERROR, "key%l not found", start, p - start + 1);
 692         }
 693         if (term == '}') goto format_s;
 694         p++;
 695         goto retry;
 696       }
 697
 698       case '*':
 699         CHECK_FOR_WIDTH(flags);
 700         flags |= FWIDTH;
 701         GETASTER(width);
 702         if (width < 0) {
 703           flags |= FMINUS;
 704           width = -width;
 705         }
 706         p++;
 707         goto retry;
 708
 709       case '.':
 710         if (flags & FPREC0) {
 711           mrb_raise(mrb, E_ARGUMENT_ERROR, "precision given twice");
 712         }
 713         flags |= FPREC|FPREC0;
 714
 715         prec = 0;
 716         p++;
 717         if (*p == '*') {
 718           GETASTER(prec);
 719           if (prec < 0) {  /* ignore negative precision */
 720             flags &= ~FPREC;
 721           }
 722           p++;
 723           goto retry;
 724         }
 725
 726         GETNUM(prec, precision);
 727         goto retry;
 728
 729       case '\n':
 730       case '\0':
 731         p--;
 732         /* fallthrough */
 733       case '%':
 734         if (flags != FNONE) {
 735           mrb_raise(mrb, E_ARGUMENT_ERROR, "invalid format character - %");
 736         }
 737         PUSH("%", 1);
 738         break;
 739
 740       case 'c': {
 741         mrb_value val = GETARG();
 742         mrb_value tmp;
 743         char *c;
 744
 745         tmp = mrb_check_string_type(mrb, val);
 746         if (!mrb_nil_p(tmp)) {
 747           if (RSTRING_LEN(tmp) != 1) {
 748             mrb_raise(mrb, E_ARGUMENT_ERROR, "%c requires a character");
 749           }
 750         }
 751         else if (mrb_fixnum_p(val)) {
 752           mrb_int n = mrb_fixnum(val);
 753 #ifndef MRB_UTF8_STRING
 754           char buf[1];
 755
 756           buf[0] = (char)n&0xff;
 757           tmp = mrb_str_new(mrb, buf, 1);
 758 #else
 759           if (n < 0x80) {
 760             char buf[1];
 761
 762             buf[0] = (char)n;
 763             tmp = mrb_str_new(mrb, buf, 1);
 764           }
 765           else {
 766             tmp = mrb_funcall(mrb, val, "chr", 0);
 767             mrb_check_type(mrb, tmp, MRB_TT_STRING);
 768           }
 769 #endif
 770         }
 771         else {
 772           mrb_raise(mrb, E_ARGUMENT_ERROR, "invalid character");
 773         }
 774         c = RSTRING_PTR(tmp);
 775         n = RSTRING_LEN(tmp);
 776         if (!(flags & FWIDTH)) {
 777           PUSH(c, n);
 778         }
 779         else if ((flags & FMINUS)) {
 780           PUSH(c, n);
 781           if (width>0) FILL(' ', width-1);
 782         }
 783         else {
 784           if (width>0) FILL(' ', width-1);
 785           PUSH(c, n);
 786         }
 787       }
 788       break;
 789
 790       case 's':
 791       case 'p':
 792   format_s:
 793       {
 794         mrb_value arg = GETARG();
 795         mrb_int len;
 796         mrb_int slen;
 797
 798         if (*p == 'p') arg = mrb_inspect(mrb, arg);
 799         str = mrb_obj_as_string(mrb, arg);
 800         len = RSTRING_LEN(str);
 801         if (RSTRING(result)->flags & MRB_STR_EMBED) {
 802           mrb_int tmp_n = len;
 803           RSTRING(result)->flags &= ~MRB_STR_EMBED_LEN_MASK;
 804           RSTRING(result)->flags |= tmp_n << MRB_STR_EMBED_LEN_SHIFT;
 805         }
 806         else {
 807           RSTRING(result)->as.heap.len = blen;
 808         }
 809         if (flags&(FPREC|FWIDTH)) {
 810           slen = RSTRING_LEN(str);
 811           if (slen < 0) {
 812             mrb_raise(mrb, E_ARGUMENT_ERROR, "invalid mbstring sequence");
 813           }
 814           if ((flags&FPREC) && (prec < slen)) {
 815             char *p = RSTRING_PTR(str) + prec;
 816             slen = prec;
 817             len = (mrb_int)(p - RSTRING_PTR(str));
 818           }
 819           /* need to adjust multi-byte string pos */
 820           if ((flags&FWIDTH) && (width > slen)) {
 821             width -= (int)slen;
 822             if (!(flags&FMINUS)) {
 823               FILL(' ', width);
 824             }
 825             PUSH(RSTRING_PTR(str), len);
 826             if (flags&FMINUS) {
 827               FILL(' ', width);
 828             }
 829             break;
 830           }
 831         }
 832         PUSH(RSTRING_PTR(str), len);
 833       }
 834       break;
 835
 836       case 'd':
 837       case 'i':
 838       case 'o':
 839       case 'x':
 840       case 'X':
 841       case 'b':
 842       case 'B':
 843       case 'u': {
 844         mrb_value val = GETARG();
 845         char nbuf[69], *s;
 846         const char *prefix = NULL;
 847         int sign = 0, dots = 0;
 848         char sc = 0;
 849         mrb_int v = 0;
 850         int base;
 851         mrb_int len;
 852
 853         if (flags & FSHARP) {
 854           switch (*p) {
 855             case 'o': prefix = "0"; break;
 856             case 'x': prefix = "0x"; break;
 857             case 'X': prefix = "0X"; break;
 858             case 'b': prefix = "0b"; break;
 859             case 'B': prefix = "0B"; break;
 860             default: break;
 861           }
 862         }
 863
 864   bin_retry:
 865         switch (mrb_type(val)) {
 866 #ifndef MRB_WITHOUT_FLOAT
 867           case MRB_TT_FLOAT:
 868             val = mrb_flo_to_fixnum(mrb, val);
 869             if (mrb_fixnum_p(val)) goto bin_retry;
 870             break;
 871 #endif
 872           case MRB_TT_STRING:
 873             val = mrb_str_to_inum(mrb, val, 0, TRUE);
 874             goto bin_retry;
 875           case MRB_TT_FIXNUM:
 876             v = mrb_fixnum(val);
 877             break;
 878           default:
 879             val = mrb_Integer(mrb, val);
 880             goto bin_retry;
 881         }
 882
 883         switch (*p) {
 884           case 'o':
 885             base = 8; break;
 886           case 'x':
 887           case 'X':
 888             base = 16; break;
 889           case 'b':
 890           case 'B':
 891             base = 2; break;
 892           case 'u':
 893           case 'd':
 894           case 'i':
 895             sign = 1;
 896             /* fall through */
 897           default:
 898             base = 10; break;
 899         }
 900
 901         if (sign) {
 902           if (v >= 0) {
 903             if (flags & FPLUS) {
 904               sc = '+';
 905               width--;
 906             }
 907             else if (flags & FSPACE) {
 908               sc = ' ';
 909               width--;
 910             }
 911           }
 912           else {
 913             sc = '-';
 914             width--;
 915           }
 916           mrb_assert(base == 10);
 917           mrb_int2str(nbuf, sizeof(nbuf)-1, v);
 918           s = nbuf;
 919           if (v < 0) s++;       /* skip minus sign */
 920         }
 921         else {
 922           s = nbuf;
 923           if (v < 0) {
 924             dots = 1;
 925             val = mrb_fix2binstr(mrb, mrb_fixnum_value(v), base);
 926           }
 927           else {
 928             val = mrb_fixnum_to_str(mrb, mrb_fixnum_value(v), base);
 929           }
 930           strncpy(++s, RSTRING_PTR(val), sizeof(nbuf)-2);
 931           if (v < 0) {
 932             char d;
 933
 934             s = remove_sign_bits(s, base);
 935             switch (base) {
 936               case 16: d = 'f'; break;
 937               case 8:  d = '7'; break;
 938               case 2:  d = '1'; break;
 939               default: d = 0; break;
 940             }
 941
 942             if (d && *s != d) {
 943               *--s = d;
 944             }
 945           }
 946         }
 947         {
 948           size_t size;
 949           size = strlen(s);
 950           /* PARANOID: assert(size <= MRB_INT_MAX) */
 951           len = (mrb_int)size;
 952         }
 953
 954         if (*p == 'X') {
 955           char *pp = s;
 956           int c;
 957           while ((c = (int)(unsigned char)*pp) != 0) {
 958             *pp = toupper(c);
 959             pp++;
 960           }
 961         }
 962
 963         if (prefix && !prefix[1]) { /* octal */
 964           if (dots) {
 965             prefix = NULL;
 966           }
 967           else if (len == 1 && *s == '0') {
 968             len = 0;
 969             if (flags & FPREC) prec--;
 970           }
 971           else if ((flags & FPREC) && (prec > len)) {
 972             prefix = NULL;
 973           }
 974         }
 975         else if (len == 1 && *s == '0') {
 976           prefix = NULL;
 977         }
 978
 979         if (prefix) {
 980           size_t size;
 981           size = strlen(prefix);
 982           /* PARANOID: assert(size <= MRB_INT_MAX).
 983            *  this check is absolutely paranoid. */
 984           width -= (mrb_int)size;
 985         }
 986
 987         if ((flags & (FZERO|FMINUS|FPREC)) == FZERO) {
 988           prec = width;
 989           width = 0;
 990         }
 991         else {
 992           if (prec < len) {
 993             if (!prefix && prec == 0 && len == 1 && *s == '0') len = 0;
 994             prec = len;
 995           }
 996           width -= prec;
 997         }
 998
 999         if (!(flags&FMINUS) && width > 0) {
1000           FILL(' ', width);
1001           width = 0;
1002         }
1003
1004         if (sc) PUSH(&sc, 1);
1005
1006         if (prefix) {
1007           int plen = (int)strlen(prefix);
1008           PUSH(prefix, plen);
1009         }
1010         if (dots) {
1011           prec -= 2;
1012           width -= 2;
1013           PUSH("..", 2);
1014         }
1015
1016         if (prec > len) {
1017           CHECK(prec - len);
1018           if ((flags & (FMINUS|FPREC)) != FMINUS) {
1019             char c = '0';
1020             FILL(c, prec - len);
1021           } else if (v < 0) {
1022             char c = sign_bits(base, p);
1023             FILL(c, prec - len);
1024           }
1025         }
1026         PUSH(s, len);
1027         if (width > 0) {
1028           FILL(' ', width);
1029         }
1030       }
1031       break;
1032
1033 #ifndef MRB_WITHOUT_FLOAT
1034       case 'f':
1035       case 'g':
1036       case 'G':
1037       case 'e':
1038       case 'E':
1039       case 'a':
1040       case 'A': {
1041         mrb_value val = GETARG();
1042         double fval;
1043         mrb_int need = 6;
1044         char fbuf[64];
1045
1046         fval = mrb_float(mrb_Float(mrb, val));
1047         if (!isfinite(fval)) {
1048           const char *expr;
1049           const mrb_int elen = 3;
1050           char sign = '\0';
1051
1052           if (isnan(fval)) {
1053             expr = "NaN";
1054           }
1055           else {
1056             expr = "Inf";
1057           }
1058           need = elen;
1059           if (!isnan(fval) && fval < 0.0)
1060             sign = '-';
1061           else if (flags & (FPLUS|FSPACE))
1062             sign = (flags & FPLUS) ? '+' : ' ';
1063           if (sign)
1064             ++need;
1065           if ((flags & FWIDTH) && need < width)
1066             need = width;
1067
1068           if (need < 0) {
1069             mrb_raise(mrb, E_ARGUMENT_ERROR, "width too big");
1070           }
1071           FILL(' ', need);
1072           if (flags & FMINUS) {
1073             if (sign)
1074               buf[blen - need--] = sign;
1075             memcpy(&buf[blen - need], expr, elen);
1076           }
1077           else {
1078             if (sign)
1079               buf[blen - elen - 1] = sign;
1080             memcpy(&buf[blen - elen], expr, elen);
1081           }
1082           break;
1083         }
1084
1085         need = 0;
1086         if (*p != 'e' && *p != 'E') {
1087           int i;
1088           frexp(fval, &i);
1089           if (i > 0)
1090             need = BIT_DIGITS(i);
1091         }
1092         if (need > MRB_INT_MAX - ((flags&FPREC) ? prec : 6)) {
1093         too_big_width:
1094           mrb_raise(mrb, E_ARGUMENT_ERROR,
1095                     (width > prec ? "width too big" : "prec too big"));
1096         }
1097         need += (flags&FPREC) ? prec : 6;
1098         if ((flags&FWIDTH) && need < width)
1099           need = width;
1100         if (need > MRB_INT_MAX - 20) {
1101           goto too_big_width;
1102         }
1103         need += 20;
1104
1105         CHECK(need);
1106         fmt_setup(fbuf, sizeof(fbuf), *p, flags, width, prec);
1107         n = mrb_float_to_cstr(mrb, &buf[blen], need, fbuf, fval);
1108         if (n < 0 || n >= need) {
1109           mrb_raise(mrb, E_RUNTIME_ERROR, "formatting error");
1110         }
1111         blen += n;
1112       }
1113       break;
1114 #endif
1115     }
1116     flags = FNONE;
1117   }
1118
1119   sprint_exit:
1120 #if 0
1121   /* XXX - We cannot validate the number of arguments if (digit)$ style used.
1122    */
1123   if (posarg >= 0 && nextarg < argc) {
1124     const char *mesg = "too many arguments for format string";
1125     if (mrb_test(ruby_debug)) mrb_raise(mrb, E_ARGUMENT_ERROR, mesg);
1126     if (mrb_test(ruby_verbose)) mrb_warn(mrb, "%s", mesg);
1127   }
1128 #endif
1129   mrb_str_resize(mrb, result, blen);
1130
1131   return result;
1132 }
1133
1134 #ifndef MRB_WITHOUT_FLOAT
1135 static void
1136 fmt_setup(char *buf, size_t size, int c, int flags, mrb_int width, mrb_int prec)
1137 {
1138   char *end = buf + size;
1139   int n;
1140
1141   *buf++ = '%';
1142   if (flags & FSHARP) *buf++ = '#';
1143   if (flags & FPLUS)  *buf++ = '+';
1144   if (flags & FMINUS) *buf++ = '-';
1145   if (flags & FZERO)  *buf++ = '0';
1146   if (flags & FSPACE) *buf++ = ' ';
1147
1148   if (flags & FWIDTH) {
1149     n = mrb_int2str(buf, end - buf, width);
1150     buf += n;
1151   }
1152
1153   if (flags & FPREC) {
1154     *buf ++ = '.';
1155     n = mrb_int2str(buf, end - buf, prec);
1156     buf += n;
1157   }
1158
1159   *buf++ = c;
1160   *buf = '\0';
1161 }
1162 #endif