src/expr.c

   1 /* expr -- evaluate expressions.
   2    Copyright (C) 86, 1991-1997, 1999-2008 Free Software Foundation, Inc.
   3
   4    This program is free software: you can redistribute it and/or modify
   5    it under the terms of the GNU General Public License as published by
   6    the Free Software Foundation, either version 3 of the License, or
   7    (at your option) any later version.
   8
   9    This program is distributed in the hope that it will be useful,
  10    but WITHOUT ANY WARRANTY; without even the implied warranty of
  11    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12    GNU General Public License for more details.
  13
  14    You should have received a copy of the GNU General Public License
  15    along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
  16
  17 /* Author: Mike Parker.
  18    Modified for arbitrary-precision calculation by James Youngman.
  19
  20    This program evaluates expressions.  Each token (operator, operand,
  21    parenthesis) of the expression must be a seperate argument.  The
  22    parser used is a reasonably general one, though any incarnation of
  23    it is language-specific.  It is especially nice for expressions.
  24
  25    No parse tree is needed; a new node is evaluated immediately.
  26    One function can handle multiple operators all of equal precedence,
  27    provided they all associate ((x op x) op x).
  28
  29    Define EVAL_TRACE to print an evaluation trace.  */
  30
  31 #include <config.h>
  32 #include <stdio.h>
  33 #include <sys/types.h>
  34 #include "system.h"
  35
  36 #include <assert.h>
  37 #include <regex.h>
  38 #if HAVE_GMP
  39 #include <gmp.h>
  40 #endif
  41 #include "error.h"
  42 #include "quotearg.h"
  43 #include "strnumcmp.h"
  44 #include "xstrtol.h"
  45
  46 /* The official name of this program (e.g., no `g' prefix).  */
  47 #define PROGRAM_NAME "expr"
  48
  49 #define AUTHORS proper_name ("Mike Parker"), proper_name ("James Youngman")
  50
  51 /* Exit statuses.  */
  52 enum
  53   {
  54     /* Invalid expression: e.g., its form does not conform to the
  55        grammar for expressions.  Our grammar is an extension of the
  56        POSIX grammar.  */
  57     EXPR_INVALID = 2,
  58
  59     /* An internal error occurred, e.g., arithmetic overflow, storage
  60        exhaustion.  */
  61     EXPR_FAILURE
  62   };
  63
  64 /* The kinds of value we can have.
  65    In the comments below, a variable is described as "arithmetic" if
  66    it is either integer or mp_integer.   Variables are of type mp_integer
  67    only if GNU MP is available, but the type designator is always defined. */
  68 enum valtype
  69 {
  70   integer,
  71   mp_integer,
  72   string
  73 };
  74 typedef enum valtype TYPE;
  75
  76 /* A value is.... */
  77 struct valinfo
  78 {
  79   TYPE type;                    /* Which kind. */
  80   union
  81   {                             /* The value itself. */
  82     /* We could use intmax_t but that would integrate less well with GMP,
  83        since GMP has mpz_set_si but no intmax_t equivalent. */
  84     signed long int i;
  85 #if HAVE_GMP
  86     mpz_t z;
  87 #endif
  88     char *s;
  89   } u;
  90 };
  91 typedef struct valinfo VALUE;
  92
  93 /* The arguments given to the program, minus the program name.  */
  94 static char **args;
  95
  96 static VALUE *eval (bool);
  97 static bool nomoreargs (void);
  98 static bool null (VALUE *v);
  99 static void printv (VALUE *v);
 100
 101 /* Arithmetic is done in one of three modes.
 102
 103    The --bignum option forces all arithmetic to use bignums other than
 104    string indexing (mode==MP_ALWAYS).  The --no-bignum option forces
 105    all arithmetic to use native types rather than bignums
 106    (mode==MP_NEVER).
 107
 108    The default mode is MP_AUTO if GMP is available and MP_NEVER if
 109    not.  Most functions will process a bignum if one is found, but
 110    will not convert a native integer to a string if the mode is
 111    MP_NEVER. */
 112 enum arithmetic_mode
 113   {
 114     MP_NEVER,                   /* Never use bignums */
 115 #if HAVE_GMP
 116     MP_ALWAYS,                  /* Always use bignums. */
 117     MP_AUTO,                    /* Switch if result would otherwise overflow */
 118 #endif
 119   };
 120 static enum arithmetic_mode mode =
 121 #if HAVE_GMP
 122   MP_AUTO
 123 #else
 124   MP_NEVER
 125 #endif
 126   ;
 127
 128
 129 void
 130 usage (int status)
 131 {
 132   if (status != EXIT_SUCCESS)
 133     fprintf (stderr, _("Try `%s --help' for more information.\n"),
 134              program_name);
 135   else
 136     {
 137       printf (_("\
 138 Usage: %s EXPRESSION\n\
 139   or:  %s OPTION\n\
 140 "),
 141               program_name, program_name);
 142       putchar ('\n');
 143       fputs (_("\
 144       --bignum     always use arbitrary-precision arithmetic\n\
 145       --no-bignum  always use single-precision arithmetic\n"),
 146                stdout);
 147       fputs (HELP_OPTION_DESCRIPTION, stdout);
 148       fputs (VERSION_OPTION_DESCRIPTION, stdout);
 149       fputs (_("\
 150 \n\
 151 Print the value of EXPRESSION to standard output.  A blank line below\n\
 152 separates increasing precedence groups.  EXPRESSION may be:\n\
 153 \n\
 154   ARG1 | ARG2       ARG1 if it is neither null nor 0, otherwise ARG2\n\
 155 \n\
 156   ARG1 & ARG2       ARG1 if neither argument is null or 0, otherwise 0\n\
 157 "), stdout);
 158       fputs (_("\
 159 \n\
 160   ARG1 < ARG2       ARG1 is less than ARG2\n\
 161   ARG1 <= ARG2      ARG1 is less than or equal to ARG2\n\
 162   ARG1 = ARG2       ARG1 is equal to ARG2\n\
 163   ARG1 != ARG2      ARG1 is unequal to ARG2\n\
 164   ARG1 >= ARG2      ARG1 is greater than or equal to ARG2\n\
 165   ARG1 > ARG2       ARG1 is greater than ARG2\n\
 166 "), stdout);
 167       fputs (_("\
 168 \n\
 169   ARG1 + ARG2       arithmetic sum of ARG1 and ARG2\n\
 170   ARG1 - ARG2       arithmetic difference of ARG1 and ARG2\n\
 171 "), stdout);
 172       /* Tell xgettext that the "% A" below is not a printf-style
 173          format string:  xgettext:no-c-format */
 174       fputs (_("\
 175 \n\
 176   ARG1 * ARG2       arithmetic product of ARG1 and ARG2\n\
 177   ARG1 / ARG2       arithmetic quotient of ARG1 divided by ARG2\n\
 178   ARG1 % ARG2       arithmetic remainder of ARG1 divided by ARG2\n\
 179 "), stdout);
 180       fputs (_("\
 181 \n\
 182   STRING : REGEXP   anchored pattern match of REGEXP in STRING\n\
 183 \n\
 184   match STRING REGEXP        same as STRING : REGEXP\n\
 185   substr STRING POS LENGTH   substring of STRING, POS counted from 1\n\
 186   index STRING CHARS         index in STRING where any CHARS is found, or 0\n\
 187   length STRING              length of STRING\n\
 188 "), stdout);
 189       fputs (_("\
 190   + TOKEN                    interpret TOKEN as a string, even if it is a\n\
 191                                keyword like `match' or an operator like `/'\n\
 192 \n\
 193   ( EXPRESSION )             value of EXPRESSION\n\
 194 "), stdout);
 195       fputs (_("\
 196 \n\
 197 Beware that many operators need to be escaped or quoted for shells.\n\
 198 Comparisons are arithmetic if both ARGs are numbers, else lexicographical.\n\
 199 Pattern matches return the string matched between \\( and \\) or null; if\n\
 200 \\( and \\) are not used, they return the number of characters matched or 0.\n\
 201 "), stdout);
 202       fputs (_("\
 203 \n\
 204 Exit status is 0 if EXPRESSION is neither null nor 0, 1 if EXPRESSION is null\n\
 205 or 0, 2 if EXPRESSION is syntactically invalid, and 3 if an error occurred.\n\
 206 "), stdout);
 207       emit_bug_reporting_address ();
 208     }
 209   exit (status);
 210 }
 211
 212 /* Report a syntax error and exit.  */
 213 static void
 214 syntax_error (void)
 215 {
 216   error (EXPR_INVALID, 0, _("syntax error"));
 217 }
 218
 219 /* Report an integer overflow for operation OP and exit.  */
 220 static void
 221 integer_overflow (char op)
 222 {
 223   error (EXPR_FAILURE, 0,
 224          _("arithmetic operation %c produced an out of range value, "
 225            "but arbitrary-precision arithmetic is not available"), op);
 226 }
 227
 228 static void die (int exit_status, int errno_val, char const *msg)
 229   ATTRIBUTE_NORETURN;
 230 static void
 231 die (int exit_status, int errno_val, char const *msg)
 232 {
 233   assert (exit_status != 0);
 234   error (exit_status, errno_val, "%s", msg);
 235   abort (); /* notreached */
 236 }
 237
 238 static void
 239 string_too_long (void)
 240 {
 241   die (EXPR_FAILURE, ERANGE, _("string too long"));
 242 }
 243
 244 enum
 245 {
 246   USE_BIGNUM = CHAR_MAX + 1,
 247   NO_USE_BIGNUM
 248 };
 249
 250 static struct option const long_options[] =
 251 {
 252   {"bignum", no_argument, NULL, USE_BIGNUM},
 253   {"no-bignum", no_argument, NULL, NO_USE_BIGNUM},
 254   {GETOPT_HELP_OPTION_DECL},
 255   {GETOPT_VERSION_OPTION_DECL},
 256   {NULL, 0, NULL, 0}
 257 };
 258
 259 int
 260 main (int argc, char **argv)
 261 {
 262   VALUE *v;
 263   int c;
 264
 265   initialize_main (&argc, &argv);
 266   set_program_name (argv[0]);
 267   setlocale (LC_ALL, "");
 268   bindtextdomain (PACKAGE, LOCALEDIR);
 269   textdomain (PACKAGE);
 270
 271   initialize_exit_failure (EXPR_FAILURE);
 272   atexit (close_stdout);
 273
 274   /* The argument -0 should not result in an error message. */
 275   opterr = 0;
 276
 277   while ((c = getopt_long (argc, argv, "+", long_options, NULL)) != -1)
 278     {
 279       /* "expr -0" should interpret the -0 as an integer argument.
 280          arguments like --foo should also be interpreted as a string
 281          argument to be "evaluated".
 282        */
 283       if ('?' == c)
 284         {
 285           --optind;
 286           break;
 287         }
 288       else
 289         switch (c)
 290           {
 291           case USE_BIGNUM:
 292 #if HAVE_GMP
 293             mode = MP_ALWAYS;
 294 #else
 295             error (EXPR_FAILURE, 0,
 296                    _("arbitrary-precision support is not available"));
 297 #endif
 298             break;
 299
 300           case NO_USE_BIGNUM:
 301             mode = MP_NEVER;
 302             break;
 303
 304             case_GETOPT_HELP_CHAR;
 305
 306             case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
 307           }
 308     }
 309
 310   if (argc <= optind)
 311     {
 312       error (0, 0, _("missing operand"));
 313       usage (EXPR_INVALID);
 314     }
 315
 316   args = argv + optind;
 317
 318   v = eval (true);
 319   if (!nomoreargs ())
 320     syntax_error ();
 321   printv (v);
 322
 323   exit (null (v));
 324 }
 325
 326 /* Return a VALUE for I.  */
 327
 328 static VALUE *
 329 int_value (long int i)
 330 {
 331   VALUE *v = xmalloc (sizeof *v);
 332 #if HAVE_GMP
 333   if (mode == MP_ALWAYS)
 334     {
 335       /* all integer values are handled as bignums. */
 336       mpz_init_set_si (v->u.z, i);
 337       v->type = mp_integer;
 338       return v;
 339     }
 340 #endif
 341
 342   v->type = integer;
 343   v->u.i = i;
 344   return v;
 345 }
 346
 347 /* Return a VALUE for S.  */
 348
 349 static VALUE *
 350 str_value (char const *s)
 351 {
 352   VALUE *v = xmalloc (sizeof *v);
 353   v->type = string;
 354   v->u.s = xstrdup (s);
 355   return v;
 356 }
 357
 358
 359 static VALUE *
 360 substr_value (char const *s, size_t len, size_t pos, size_t nchars_wanted)
 361 {
 362   if (pos >= len)
 363     return str_value ("");
 364   else
 365     {
 366       VALUE *v = xmalloc (sizeof *v);
 367       size_t vlen = MIN (nchars_wanted, len - pos + 1);
 368       char *vlim;
 369       v->type = string;
 370       v->u.s = xmalloc (vlen + 1);
 371       vlim = mempcpy (v->u.s, s + pos, vlen);
 372       *vlim = '\0';
 373       return v;
 374     }
 375 }
 376
 377
 378 /* Free VALUE V, including structure components.  */
 379
 380 static void
 381 freev (VALUE *v)
 382 {
 383   if (v->type == string)
 384     {
 385       free (v->u.s);
 386     }
 387   else if (v->type == mp_integer)
 388     {
 389       assert (mode != MP_NEVER);
 390 #if HAVE_GMP
 391       mpz_clear (v->u.z);
 392 #endif
 393     }
 394   free (v);
 395 }
 396
 397 /* Print VALUE V.  */
 398
 399 static void
 400 printv (VALUE *v)
 401 {
 402   switch (v->type)
 403     {
 404     case integer:
 405       printf ("%ld\n", v->u.i);
 406       break;
 407     case string:
 408       puts (v->u.s);
 409       break;
 410 #if HAVE_GMP
 411     case mp_integer:
 412       mpz_out_str (stdout, 10, v->u.z);
 413       putchar ('\n');
 414       break;
 415 #endif
 416     default:
 417       abort ();
 418     }
 419
 420 }
 421
 422 /* Return true if V is a null-string or zero-number.  */
 423
 424 static bool
 425 null (VALUE *v)
 426 {
 427   switch (v->type)
 428     {
 429     case integer:
 430       return v->u.i == 0;
 431 #if HAVE_GMP
 432     case mp_integer:
 433       return mpz_sgn (v->u.z) == 0;
 434 #endif
 435     case string:
 436       {
 437         char const *cp = v->u.s;
 438         if (*cp == '\0')
 439           return true;
 440
 441         cp += (*cp == '-');
 442
 443         do
 444           {
 445             if (*cp != '0')
 446               return false;
 447           }
 448         while (*++cp);
 449
 450         return true;
 451       }
 452     default:
 453       abort ();
 454     }
 455 }
 456
 457 /* Return true if CP takes the form of an integer.  */
 458
 459 static bool
 460 looks_like_integer (char const *cp)
 461 {
 462   cp += (*cp == '-');
 463
 464   do
 465     if (! ISDIGIT (*cp))
 466       return false;
 467   while (*++cp);
 468
 469   return true;
 470 }
 471
 472 /* Coerce V to a string value (can't fail).  */
 473
 474 static void
 475 tostring (VALUE *v)
 476 {
 477   char buf[INT_BUFSIZE_BOUND (long int)];
 478
 479   switch (v->type)
 480     {
 481     case integer:
 482       snprintf (buf, sizeof buf, "%ld", v->u.i);
 483       v->u.s = xstrdup (buf);
 484       v->type = string;
 485       break;
 486 #if HAVE_GMP
 487     case mp_integer:
 488       {
 489         char *s = mpz_get_str (NULL, 10, v->u.z);
 490         if (!s)
 491           {
 492             xalloc_die ();
 493           }
 494         mpz_clear (v->u.z);
 495         v->u.s = s;
 496         v->type = string;
 497       }
 498       break;
 499 #endif
 500     case string:
 501       break;
 502     default:
 503       abort ();
 504     }
 505 }
 506
 507 /* Coerce V to an arithmetic value.
 508    Return true on success, false on failure.  */
 509
 510 static bool
 511 toarith (VALUE *v)
 512 {
 513   switch (v->type)
 514     {
 515     case integer:
 516     case mp_integer:
 517       return true;
 518
 519     case string:
 520       {
 521         long int value;
 522
 523         if (! looks_like_integer (v->u.s))
 524           return false;
 525         if (xstrtol (v->u.s, NULL, 10, &value, NULL) != LONGINT_OK)
 526           {
 527 #if HAVE_GMP
 528             if (mode != MP_NEVER)
 529               {
 530                 char *s = v->u.s;
 531                 if (mpz_init_set_str (v->u.z, s, 10))
 532                   abort ();  /* Bug in looks_like_integer, perhaps. */
 533                 v->type = mp_integer;
 534                 free (s);
 535               }
 536             else
 537               {
 538                 error (EXPR_FAILURE, ERANGE, "%s", v->u.s);
 539               }
 540 #else
 541             error (EXPR_FAILURE, ERANGE, "%s", v->u.s);
 542 #endif
 543           }
 544         else
 545           {
 546             free (v->u.s);
 547             v->u.i = value;
 548             v->type = integer;
 549           }
 550         return true;
 551       }
 552     default:
 553       abort ();
 554     }
 555 }
 556
 557 /* Extract a size_t value from a positive arithmetic value, V.
 558    The extracted value is stored in *VAL. */
 559 static bool
 560 getsize (const VALUE *v, size_t *val, bool *negative)
 561 {
 562   if (v->type == integer)
 563     {
 564       if (v->u.i < 0)
 565         {
 566           *negative = true;
 567           return false;
 568         }
 569       else
 570         {
 571           *negative = false;
 572           *val = v->u.i;
 573           return true;
 574         }
 575     }
 576   else if (v->type == mp_integer)
 577     {
 578 #if HAVE_GMP
 579       if (mpz_sgn (v->u.z) < 0)
 580         {
 581           *negative = true;
 582           return false;
 583         }
 584       else if (mpz_fits_ulong_p (v->u.z))
 585         {
 586           unsigned long ul;
 587           ul = mpz_get_ui (v->u.z);
 588           *val = ul;
 589           return true;
 590         }
 591       else
 592         {
 593           *negative = false;
 594           return false;
 595         }
 596 #else
 597       abort ();
 598 #endif
 599
 600     }
 601   else
 602     {
 603       abort ();                 /* should not pass a string. */
 604     }
 605 }
 606
 607
 608
 609 /* Return true and advance if the next token matches STR exactly.
 610    STR must not be NULL.  */
 611
 612 static bool
 613 nextarg (char const *str)
 614 {
 615   if (*args == NULL)
 616     return false;
 617   else
 618     {
 619       bool r = STREQ (*args, str);
 620       args += r;
 621       return r;
 622     }
 623 }
 624
 625 /* Return true if there no more tokens.  */
 626
 627 static bool
 628 nomoreargs (void)
 629 {
 630   return *args == 0;
 631 }
 632
 633 #ifdef EVAL_TRACE
 634 /* Print evaluation trace and args remaining.  */
 635
 636 static void
 637 trace (fxn)
 638      char *fxn;
 639 {
 640   char **a;
 641
 642   printf ("%s:", fxn);
 643   for (a = args; *a; a++)
 644     printf (" %s", *a);
 645   putchar ('\n');
 646 }
 647 #endif
 648
 649 /* Do the : operator.
 650    SV is the VALUE for the lhs (the string),
 651    PV is the VALUE for the rhs (the pattern).  */
 652
 653 static VALUE *
 654 docolon (VALUE *sv, VALUE *pv)
 655 {
 656   VALUE *v IF_LINT (= NULL);
 657   const char *errmsg;
 658   struct re_pattern_buffer re_buffer;
 659   char fastmap[UCHAR_MAX + 1];
 660   struct re_registers re_regs;
 661   regoff_t matchlen;
 662
 663   tostring (sv);
 664   tostring (pv);
 665
 666   re_regs.num_regs = 0;
 667   re_regs.start = NULL;
 668   re_regs.end = NULL;
 669
 670   re_buffer.buffer = NULL;
 671   re_buffer.allocated = 0;
 672   re_buffer.fastmap = fastmap;
 673   re_buffer.translate = NULL;
 674   re_syntax_options =
 675     RE_SYNTAX_POSIX_BASIC & ~RE_CONTEXT_INVALID_DUP & ~RE_NO_EMPTY_RANGES;
 676   errmsg = re_compile_pattern (pv->u.s, strlen (pv->u.s), &re_buffer);
 677   if (errmsg)
 678     error (EXPR_INVALID, 0, "%s", errmsg);
 679   re_buffer.newline_anchor = 0;
 680
 681   matchlen = re_match (&re_buffer, sv->u.s, strlen (sv->u.s), 0, &re_regs);
 682   if (0 <= matchlen)
 683     {
 684       /* Were \(...\) used? */
 685       if (re_buffer.re_nsub > 0)
 686         {
 687           sv->u.s[re_regs.end[1]] = '\0';
 688           v = str_value (sv->u.s + re_regs.start[1]);
 689         }
 690       else
 691         v = int_value (matchlen);
 692     }
 693   else if (matchlen == -1)
 694     {
 695       /* Match failed -- return the right kind of null.  */
 696       if (re_buffer.re_nsub > 0)
 697         v = str_value ("");
 698       else
 699         v = int_value (0);
 700     }
 701   else
 702     error (EXPR_FAILURE,
 703            (matchlen == -2 ? errno : EOVERFLOW),
 704            _("error in regular expression matcher"));
 705
 706   if (0 < re_regs.num_regs)
 707     {
 708       free (re_regs.start);
 709       free (re_regs.end);
 710     }
 711   re_buffer.fastmap = NULL;
 712   regfree (&re_buffer);
 713   return v;
 714 }
 715
 716 /* Handle bare operands and ( expr ) syntax.  */
 717
 718 static VALUE *
 719 eval7 (bool evaluate)
 720 {
 721   VALUE *v;
 722
 723 #ifdef EVAL_TRACE
 724   trace ("eval7");
 725 #endif
 726   if (nomoreargs ())
 727     syntax_error ();
 728
 729   if (nextarg ("("))
 730     {
 731       v = eval (evaluate);
 732       if (!nextarg (")"))
 733         syntax_error ();
 734       return v;
 735     }
 736
 737   if (nextarg (")"))
 738     syntax_error ();
 739
 740   return str_value (*args++);
 741 }
 742
 743 /* Handle match, substr, index, and length keywords, and quoting "+".  */
 744
 745 static VALUE *
 746 eval6 (bool evaluate)
 747 {
 748   VALUE *l;
 749   VALUE *r;
 750   VALUE *v;
 751   VALUE *i1;
 752   VALUE *i2;
 753
 754 #ifdef EVAL_TRACE
 755   trace ("eval6");
 756 #endif
 757   if (nextarg ("+"))
 758     {
 759       if (nomoreargs ())
 760         syntax_error ();
 761       return str_value (*args++);
 762     }
 763   else if (nextarg ("length"))
 764     {
 765       r = eval6 (evaluate);
 766       tostring (r);
 767       v = int_value (strlen (r->u.s));
 768       freev (r);
 769       return v;
 770     }
 771   else if (nextarg ("match"))
 772     {
 773       l = eval6 (evaluate);
 774       r = eval6 (evaluate);
 775       if (evaluate)
 776         {
 777           v = docolon (l, r);
 778           freev (l);
 779         }
 780       else
 781         v = l;
 782       freev (r);
 783       return v;
 784     }
 785   else if (nextarg ("index"))
 786     {
 787       size_t pos, len;
 788
 789       l = eval6 (evaluate);
 790       r = eval6 (evaluate);
 791       tostring (l);
 792       tostring (r);
 793       pos = strcspn (l->u.s, r->u.s);
 794       len = strlen (l->u.s);
 795       if (pos == len)
 796         {
 797           v = int_value (0);
 798         }
 799       else
 800         {
 801           if (pos < LONG_MAX)
 802             {
 803               v = int_value (pos + 1);
 804             }
 805           else
 806             {
 807 #if HAVE_GMP
 808               if (mode != MP_NEVER
 809                   && pos < ULONG_MAX)
 810                 {
 811                   v = xmalloc (sizeof *v);
 812                   mpz_init_set_ui (v->u.z, pos+1);
 813                   v->type = mp_integer;
 814                 }
 815               else
 816 #endif
 817                 {
 818                   string_too_long ();
 819                 }
 820             }
 821         }
 822       freev (l);
 823       freev (r);
 824       return v;
 825     }
 826   else if (nextarg ("substr"))
 827     {
 828       size_t llen;
 829       l = eval6 (evaluate);
 830       i1 = eval6 (evaluate);
 831       i2 = eval6 (evaluate);
 832       tostring (l);
 833       llen = strlen (l->u.s);
 834
 835       if (!toarith (i1) || !toarith (i2))
 836         v = str_value ("");
 837       else
 838         {
 839           size_t pos, len;
 840           bool negative = false;
 841
 842           if (getsize (i1, &pos, &negative))
 843             if (getsize (i2, &len, &negative))
 844               if (pos == 0 || len == 0)
 845                 v = str_value ("");
 846               else
 847                 v = substr_value (l->u.s, llen, pos-1, len);
 848             else
 849               if (negative)
 850                 v = str_value ("");
 851               else
 852                 die (EXPR_FAILURE, ERANGE, _("string offset is too large"));
 853           else
 854             if (negative)
 855               v = str_value ("");
 856             else
 857               die (EXPR_FAILURE, ERANGE, _("substring length too large"));
 858         }
 859       freev (l);
 860       freev (i1);
 861       freev (i2);
 862       return v;
 863     }
 864   else
 865     return eval7 (evaluate);
 866 }
 867
 868 /* Handle : operator (pattern matching).
 869    Calls docolon to do the real work.  */
 870
 871 static VALUE *
 872 eval5 (bool evaluate)
 873 {
 874   VALUE *l;
 875   VALUE *r;
 876   VALUE *v;
 877
 878 #ifdef EVAL_TRACE
 879   trace ("eval5");
 880 #endif
 881   l = eval6 (evaluate);
 882   while (1)
 883     {
 884       if (nextarg (":"))
 885         {
 886           r = eval6 (evaluate);
 887           if (evaluate)
 888             {
 889               v = docolon (l, r);
 890               freev (l);
 891               l = v;
 892             }
 893           freev (r);
 894         }
 895       else
 896         return l;
 897     }
 898 }
 899
 900
 901 #if HAVE_GMP
 902 static void
 903 promote (VALUE *x)
 904 {
 905   if (x->type == integer)
 906     mpz_init_set_si (x->u.z, x->u.i);
 907 }
 908 #endif
 909
 910 /* L = L * R.  Both L and R are arithmetic. */
 911 static void
 912 domult (VALUE *l, VALUE *r)
 913 {
 914   if (l->type == integer && r->type == integer)
 915     {
 916       long int val = 0;
 917       val = l->u.i * r->u.i;
 918       if (! (l->u.i == 0 || r->u.i == 0
 919              || ((val < 0) == ((l->u.i < 0) ^ (r->u.i < 0))
 920                  && val / l->u.i == r->u.i)))
 921         {
 922           /* Result would (did) overflow.  Handle with MP if available. */
 923           if (mode != MP_NEVER)
 924             {
 925 #if HAVE_GMP
 926               mpz_init_set_si (l->u.z, l->u.i);
 927               mpz_mul_si (l->u.z, l->u.z, r->u.i); /* L*=R */
 928               l->type = mp_integer;
 929 #endif
 930             }
 931           else
 932             {
 933               integer_overflow ('*');
 934             }
 935         }
 936       else
 937         {
 938           l->u.i = val;
 939         }
 940     }
 941   else
 942     {
 943       /* At least one operand is already mp_integer, so promote the other. */
 944 #if HAVE_GMP
 945       /* We could use mpz_mul_si here if R is not already mp_integer,
 946          but for the moment we'll try to minimise code paths. */
 947       if (l->type == integer)
 948         mpz_init_set_si (l->u.z, l->u.i);
 949       if (r->type == integer)
 950         mpz_init_set_si (r->u.z, r->u.i);
 951       l->type = r->type = mp_integer;
 952       mpz_mul (l->u.z, l->u.z, r->u.z); /* L*=R */
 953 #else
 954       abort ();
 955 #endif
 956     }
 957 }
 958
 959 /* L = L / R or (if WANT_MODULUS) L = L % R */
 960 static void
 961 dodivide (VALUE *l, VALUE *r, bool want_modulus)
 962 {
 963   if (r->type == integer && r->u.i == 0)
 964     error (EXPR_INVALID, 0, _("division by zero"));
 965 #if HAVE_GMP
 966   if (r->type == mp_integer && mpz_sgn (r->u.z) == 0)
 967     error (EXPR_INVALID, 0, _("division by zero"));
 968 #endif
 969   if (l->type == integer && r->type == integer)
 970     {
 971       if (l->u.i < - INT_MAX && r->u.i == -1)
 972         {
 973           /* Some x86-style hosts raise an exception for
 974              INT_MIN / -1 and INT_MIN % -1, so handle these
 975              problematic cases specially.  */
 976           if (want_modulus)
 977             {
 978               /* X mod -1 is zero for all negative X.
 979                  Although strictly this is implementation-defined,
 980                  we don't want to coredump, so we avoid the calculation. */
 981               l->u.i = 0;
 982               return;
 983             }
 984           else
 985             {
 986               if (mode != MP_NEVER)
 987                 {
 988 #if HAVE_GMP
 989                   /* Handle the case by promoting. */
 990                   mpz_init_set_si (l->u.z, l->u.i);
 991                   l->type = mp_integer;
 992 #endif
 993                 }
 994               else
 995                 {
 996                   integer_overflow ('/');
 997                 }
 998             }
 999         }
1000       else
1001         {
1002           l->u.i = want_modulus ? l->u.i % r->u.i : l->u.i / r->u.i;
1003           return;
1004         }
1005     }
1006   /* If we get to here, at least one operand is mp_integer
1007      and R is not 0. */
1008 #if HAVE_GMP
1009   {
1010     int sign_l, sign_r;
1011     promote (l);
1012     promote (r);
1013     sign_l = mpz_sgn (l->u.z);
1014     sign_r = mpz_sgn (r->u.z);
1015
1016     if (!want_modulus)
1017       {
1018         if (!sign_l)
1019           {
1020             mpz_set_si (l->u.z, 0);
1021           }
1022         else if (sign_l < 0 || sign_r < 0)
1023           {
1024             /* At least one operand is negative.  For integer arithmetic,
1025                it's platform-dependent if the operation rounds up or down.
1026                We mirror what the implementation does. */
1027             switch ((3*sign_l) / (2*sign_r))
1028               {
1029               case  2:          /* round toward +inf. */
1030               case -1:          /* round toward +inf. */
1031                 mpz_cdiv_q (l->u.z, l->u.z, r->u.z);
1032                 break;
1033               case -2:          /* round toward -inf. */
1034               case  1:          /* round toward -inf */
1035                 mpz_fdiv_q (l->u.z, l->u.z, r->u.z);
1036                 break;
1037               default:
1038                 abort ();
1039               }
1040           }
1041         else
1042           {
1043             /* Both operands positive.  Round toward -inf. */
1044             mpz_fdiv_q (l->u.z, l->u.z, r->u.z);
1045           }
1046       }
1047     else
1048       {
1049         mpz_mod (l->u.z, l->u.z, r->u.z); /* L = L % R */
1050
1051         /* If either operand is negative, it's platform-dependent if
1052            the remainer is positive or negative.  We mirror what the
1053            implementation does. */
1054         if (sign_l % sign_r < 0)
1055           mpz_neg (l->u.z, l->u.z); /* L = (-L) */
1056       }
1057   }
1058 #else
1059   abort ();
1060 #endif
1061 }
1062
1063
1064 /* Handle *, /, % operators.  */
1065
1066 static VALUE *
1067 eval4 (bool evaluate)
1068 {
1069   VALUE *l;
1070   VALUE *r;
1071   enum { multiply, divide, mod } fxn;
1072
1073 #ifdef EVAL_TRACE
1074   trace ("eval4");
1075 #endif
1076   l = eval5 (evaluate);
1077   while (1)
1078     {
1079       if (nextarg ("*"))
1080         fxn = multiply;
1081       else if (nextarg ("/"))
1082         fxn = divide;
1083       else if (nextarg ("%"))
1084         fxn = mod;
1085       else
1086         return l;
1087       r = eval5 (evaluate);
1088       if (evaluate)
1089         {
1090           if (!toarith (l) || !toarith (r))
1091             error (EXPR_INVALID, 0, _("non-numeric argument"));
1092           switch (fxn)
1093             {
1094             case multiply:
1095               domult (l, r);
1096               break;
1097             case divide:
1098             case mod:
1099               dodivide (l, r, fxn==mod);
1100               break;
1101             }
1102         }
1103       freev (r);
1104     }
1105 }
1106
1107 /* L = L + R, or L = L - R */
1108 static void
1109 doadd (VALUE *l, VALUE *r, bool add)
1110 {
1111   long int val = 0;
1112
1113   if (!toarith (l) || !toarith (r))
1114     error (EXPR_INVALID, 0, _("non-numeric argument"));
1115   if (l->type == integer && r->type == integer)
1116     {
1117       if (add)
1118         {
1119           val = l->u.i + r->u.i;
1120           if ((val < l->u.i) == (r->u.i < 0))
1121             {
1122               l->u.i = val;
1123               return;
1124             }
1125         }
1126       else
1127         {
1128           val = l->u.i - r->u.i;
1129           if ((l->u.i < val) == (r->u.i < 0))
1130             {
1131               l->u.i = val;
1132               return;
1133             }
1134         }
1135     }
1136   /* If we get to here, either the operation overflowed or at least
1137      one operand is an mp_integer. */
1138   if (mode != MP_NEVER)
1139     {
1140 #if HAVE_GMP
1141       promote (l);
1142       promote (r);
1143       if (add)
1144         mpz_add (l->u.z, l->u.z, r->u.z);
1145       else
1146         mpz_sub (l->u.z, l->u.z, r->u.z);
1147 #endif
1148     }
1149   else
1150     {
1151       integer_overflow ('-');
1152     }
1153 }
1154
1155
1156
1157 /* Handle +, - operators.  */
1158
1159 static VALUE *
1160 eval3 (bool evaluate)
1161 {
1162   VALUE *l;
1163   VALUE *r;
1164   bool add;
1165
1166 #ifdef EVAL_TRACE
1167   trace ("eval3");
1168 #endif
1169   l = eval4 (evaluate);
1170   while (1)
1171     {
1172       if (nextarg ("+"))
1173         add = true;
1174       else if (nextarg ("-"))
1175         add = false;
1176       else
1177         return l;
1178       r = eval4 (evaluate);
1179       if (evaluate)
1180         {
1181           doadd (l, r, add);
1182         }
1183       freev (r);
1184     }
1185 }
1186
1187 /* Handle comparisons.  */
1188
1189 static VALUE *
1190 eval2 (bool evaluate)
1191 {
1192   VALUE *l;
1193
1194 #ifdef EVAL_TRACE
1195   trace ("eval2");
1196 #endif
1197   l = eval3 (evaluate);
1198   while (1)
1199     {
1200       VALUE *r;
1201       enum
1202         {
1203           less_than, less_equal, equal, not_equal, greater_equal, greater_than
1204         } fxn;
1205       bool val = false;
1206
1207       if (nextarg ("<"))
1208         fxn = less_than;
1209       else if (nextarg ("<="))
1210         fxn = less_equal;
1211       else if (nextarg ("=") || nextarg ("=="))
1212         fxn = equal;
1213       else if (nextarg ("!="))
1214         fxn = not_equal;
1215       else if (nextarg (">="))
1216         fxn = greater_equal;
1217       else if (nextarg (">"))
1218         fxn = greater_than;
1219       else
1220         return l;
1221       r = eval3 (evaluate);
1222
1223       if (evaluate)
1224         {
1225           int cmp;
1226           tostring (l);
1227           tostring (r);
1228
1229           if (looks_like_integer (l->u.s) && looks_like_integer (r->u.s))
1230             cmp = strintcmp (l->u.s, r->u.s);
1231           else
1232             {
1233               errno = 0;
1234               cmp = strcoll (l->u.s, r->u.s);
1235
1236               if (errno)
1237                 {
1238                   error (0, errno, _("string comparison failed"));
1239                   error (0, 0, _("set LC_ALL='C' to work around the problem"));
1240                   error (EXPR_INVALID, 0,
1241                          _("the strings compared were %s and %s"),
1242                          quotearg_n_style (0, locale_quoting_style, l->u.s),
1243                          quotearg_n_style (1, locale_quoting_style, r->u.s));
1244                 }
1245             }
1246
1247           switch (fxn)
1248             {
1249             case less_than:     val = (cmp <  0); break;
1250             case less_equal:    val = (cmp <= 0); break;
1251             case equal:         val = (cmp == 0); break;
1252             case not_equal:     val = (cmp != 0); break;
1253             case greater_equal: val = (cmp >= 0); break;
1254             case greater_than:  val = (cmp >  0); break;
1255             default: abort ();
1256             }
1257         }
1258
1259       freev (l);
1260       freev (r);
1261       l = int_value (val);
1262     }
1263 }
1264
1265 /* Handle &.  */
1266
1267 static VALUE *
1268 eval1 (bool evaluate)
1269 {
1270   VALUE *l;
1271   VALUE *r;
1272
1273 #ifdef EVAL_TRACE
1274   trace ("eval1");
1275 #endif
1276   l = eval2 (evaluate);
1277   while (1)
1278     {
1279       if (nextarg ("&"))
1280         {
1281           r = eval2 (evaluate & ~ null (l));
1282           if (null (l) || null (r))
1283             {
1284               freev (l);
1285               freev (r);
1286               l = int_value (0);
1287             }
1288           else
1289             freev (r);
1290         }
1291       else
1292         return l;
1293     }
1294 }
1295
1296 /* Handle |.  */
1297
1298 static VALUE *
1299 eval (bool evaluate)
1300 {
1301   VALUE *l;
1302   VALUE *r;
1303
1304 #ifdef EVAL_TRACE
1305   trace ("eval");
1306 #endif
1307   l = eval1 (evaluate);
1308   while (1)
1309     {
1310       if (nextarg ("|"))
1311         {
1312           r = eval1 (evaluate & null (l));
1313           if (null (l))
1314             {
1315               freev (l);
1316               l = r;
1317               if (null (l))
1318                 {
1319                   freev (l);
1320                   l = int_value (0);
1321                 }
1322             }
1323           else
1324             freev (r);
1325         }
1326       else
1327         return l;
1328     }
1329 }