src/expr.c

   1 /* expr -- evaluate expressions.
   2    Copyright (C) 86, 1991-1997, 1999-2008 Free Software Foundation, Inc.
   3
   4    This program is free software: you can redistribute it and/or modify
   5    it under the terms of the GNU General Public License as published by
   6    the Free Software Foundation, either version 3 of the License, or
   7    (at your option) any later version.
   8
   9    This program is distributed in the hope that it will be useful,
  10    but WITHOUT ANY WARRANTY; without even the implied warranty of
  11    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12    GNU General Public License for more details.
  13
  14    You should have received a copy of the GNU General Public License
  15    along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
  16
  17 /* Author: Mike Parker.
  18    Modified for arbitrary-precision calculation by James Youngman.
  19
  20    This program evaluates expressions.  Each token (operator, operand,
  21    parenthesis) of the expression must be a seperate argument.  The
  22    parser used is a reasonably general one, though any incarnation of
  23    it is language-specific.  It is especially nice for expressions.
  24
  25    No parse tree is needed; a new node is evaluated immediately.
  26    One function can handle multiple operators all of equal precedence,
  27    provided they all associate ((x op x) op x).
  28
  29    Define EVAL_TRACE to print an evaluation trace.  */
  30
  31 #include <config.h>
  32 #include <stdio.h>
  33 #include <sys/types.h>
  34 #include "system.h"
  35
  36 #include <assert.h>
  37 #include <regex.h>
  38 #if HAVE_GMP
  39 #include <gmp.h>
  40 #endif
  41 #include "error.h"
  42 #include "quotearg.h"
  43 #include "strnumcmp.h"
  44 #include "xstrtol.h"
  45
  46 /* The official name of this program (e.g., no `g' prefix).  */
  47 #define PROGRAM_NAME "expr"
  48
  49 #define AUTHORS proper_name ("Mike Parker"), proper_name ("James Youngman")
  50
  51 /* Exit statuses.  */
  52 enum
  53   {
  54     /* Invalid expression: e.g., its form does not conform to the
  55        grammar for expressions.  Our grammar is an extension of the
  56        POSIX grammar.  */
  57     EXPR_INVALID = 2,
  58
  59     /* An internal error occurred, e.g., arithmetic overflow, storage
  60        exhaustion.  */
  61     EXPR_FAILURE
  62   };
  63
  64 /* The kinds of value we can have.
  65    In the comments below, a variable is described as "arithmetic" if
  66    it is either integer or mp_integer.   Variables are of type mp_integer
  67    only if GNU MP is available, but the type designator is always defined. */
  68 enum valtype
  69 {
  70   integer,
  71   mp_integer,
  72   string
  73 };
  74 typedef enum valtype TYPE;
  75
  76 /* A value is.... */
  77 struct valinfo
  78 {
  79   TYPE type;                    /* Which kind. */
  80   union
  81   {                             /* The value itself. */
  82     /* We could use intmax_t but that would integrate less well with GMP,
  83        since GMP has mpz_set_si but no intmax_t equivalent. */
  84     signed long int i;
  85 #if HAVE_GMP
  86     mpz_t z;
  87 #endif
  88     char *s;
  89   } u;
  90 };
  91 typedef struct valinfo VALUE;
  92
  93 /* The arguments given to the program, minus the program name.  */
  94 static char **args;
  95
  96 static VALUE *eval (bool);
  97 static bool nomoreargs (void);
  98 static bool null (VALUE *v);
  99 static void printv (VALUE *v);
 100
 101 /* Arithmetic is done in one of three modes.
 102
 103    The --bignum option forces all arithmetic to use bignums other than
 104    string indexing (mode==MP_ALWAYS).  The --no-bignum option forces
 105    all arithmetic to use native types rather than bignums
 106    (mode==MP_NEVER).
 107
 108    The default mode is MP_AUTO if GMP is available and MP_NEVER if
 109    not.  Most functions will process a bignum if one is found, but
 110    will not convert a native integer to a string if the mode is
 111    MP_NEVER. */
 112 enum arithmetic_mode
 113   {
 114     MP_NEVER,                   /* Never use bignums */
 115 #if HAVE_GMP
 116     MP_ALWAYS,                  /* Always use bignums. */
 117     MP_AUTO,                    /* Switch if result would otherwise overflow */
 118 #endif
 119   };
 120 static enum arithmetic_mode mode =
 121 #if HAVE_GMP
 122   MP_AUTO
 123 #else
 124   MP_NEVER
 125 #endif
 126   ;
 127
 128
 129 void
 130 usage (int status)
 131 {
 132   if (status != EXIT_SUCCESS)
 133     fprintf (stderr, _("Try `%s --help' for more information.\n"),
 134              program_name);
 135   else
 136     {
 137       printf (_("\
 138 Usage: %s EXPRESSION\n\
 139   or:  %s OPTION\n\
 140 "),
 141               program_name, program_name);
 142       putchar ('\n');
 143       fputs (_("\
 144       --bignum     always use arbitrary-precision arithmetic\n\
 145       --no-bignum  always use single-precision arithmetic\n"),
 146                stdout);
 147       fputs (HELP_OPTION_DESCRIPTION, stdout);
 148       fputs (VERSION_OPTION_DESCRIPTION, stdout);
 149       fputs (_("\
 150 \n\
 151 Print the value of EXPRESSION to standard output.  A blank line below\n\
 152 separates increasing precedence groups.  EXPRESSION may be:\n\
 153 \n\
 154   ARG1 | ARG2       ARG1 if it is neither null nor 0, otherwise ARG2\n\
 155 \n\
 156   ARG1 & ARG2       ARG1 if neither argument is null or 0, otherwise 0\n\
 157 "), stdout);
 158       fputs (_("\
 159 \n\
 160   ARG1 < ARG2       ARG1 is less than ARG2\n\
 161   ARG1 <= ARG2      ARG1 is less than or equal to ARG2\n\
 162   ARG1 = ARG2       ARG1 is equal to ARG2\n\
 163   ARG1 != ARG2      ARG1 is unequal to ARG2\n\
 164   ARG1 >= ARG2      ARG1 is greater than or equal to ARG2\n\
 165   ARG1 > ARG2       ARG1 is greater than ARG2\n\
 166 "), stdout);
 167       fputs (_("\
 168 \n\
 169   ARG1 + ARG2       arithmetic sum of ARG1 and ARG2\n\
 170   ARG1 - ARG2       arithmetic difference of ARG1 and ARG2\n\
 171 "), stdout);
 172       /* Tell xgettext that the "% A" below is not a printf-style
 173          format string:  xgettext:no-c-format */
 174       fputs (_("\
 175 \n\
 176   ARG1 * ARG2       arithmetic product of ARG1 and ARG2\n\
 177   ARG1 / ARG2       arithmetic quotient of ARG1 divided by ARG2\n\
 178   ARG1 % ARG2       arithmetic remainder of ARG1 divided by ARG2\n\
 179 "), stdout);
 180       fputs (_("\
 181 \n\
 182   STRING : REGEXP   anchored pattern match of REGEXP in STRING\n\
 183 \n\
 184   match STRING REGEXP        same as STRING : REGEXP\n\
 185   substr STRING POS LENGTH   substring of STRING, POS counted from 1\n\
 186   index STRING CHARS         index in STRING where any CHARS is found, or 0\n\
 187   length STRING              length of STRING\n\
 188 "), stdout);
 189       fputs (_("\
 190   + TOKEN                    interpret TOKEN as a string, even if it is a\n\
 191                                keyword like `match' or an operator like `/'\n\
 192 \n\
 193   ( EXPRESSION )             value of EXPRESSION\n\
 194 "), stdout);
 195       fputs (_("\
 196 \n\
 197 Beware that many operators need to be escaped or quoted for shells.\n\
 198 Comparisons are arithmetic if both ARGs are numbers, else lexicographical.\n\
 199 Pattern matches return the string matched between \\( and \\) or null; if\n\
 200 \\( and \\) are not used, they return the number of characters matched or 0.\n\
 201 "), stdout);
 202       fputs (_("\
 203 \n\
 204 Exit status is 0 if EXPRESSION is neither null nor 0, 1 if EXPRESSION is null\n\
 205 or 0, 2 if EXPRESSION is syntactically invalid, and 3 if an error occurred.\n\
 206 "), stdout);
 207       emit_bug_reporting_address ();
 208     }
 209   exit (status);
 210 }
 211
 212 /* Report a syntax error and exit.  */
 213 static void
 214 syntax_error (void)
 215 {
 216   error (EXPR_INVALID, 0, _("syntax error"));
 217 }
 218
 219 /* Report an integer overflow for operation OP and exit.  */
 220 static void
 221 integer_overflow (char op)
 222 {
 223   error (EXPR_FAILURE, 0,
 224          _("arithmetic operation %c produced an out of range value, "
 225            "but arbitrary-precision arithmetic is not available"), op);
 226 }
 227
 228 static void die (int exit_status, int errno_val, char const *msg)
 229   ATTRIBUTE_NORETURN;
 230 static void
 231 die (int exit_status, int errno_val, char const *msg)
 232 {
 233   assert (exit_status != 0);
 234   error (exit_status, errno_val, "%s", msg);
 235   abort (); /* notreached */
 236 }
 237
 238 static void
 239 string_too_long (void)
 240 {
 241   die (EXPR_FAILURE, ERANGE, _("string too long"));
 242 }
 243
 244 enum
 245 {
 246   USE_BIGNUM = CHAR_MAX + 1,
 247   NO_USE_BIGNUM
 248 };
 249
 250 static struct option const long_options[] =
 251 {
 252   {"bignum", no_argument, NULL, USE_BIGNUM},
 253   {"no-bignum", no_argument, NULL, NO_USE_BIGNUM},
 254   {GETOPT_HELP_OPTION_DECL},
 255   {GETOPT_VERSION_OPTION_DECL},
 256   {NULL, 0, NULL, 0}
 257 };
 258
 259 int
 260 main (int argc, char **argv)
 261 {
 262   VALUE *v;
 263   int c;
 264
 265   initialize_main (&argc, &argv);
 266   set_program_name (argv[0]);
 267   setlocale (LC_ALL, "");
 268   bindtextdomain (PACKAGE, LOCALEDIR);
 269   textdomain (PACKAGE);
 270
 271   initialize_exit_failure (EXPR_FAILURE);
 272   atexit (close_stdout);
 273
 274   /* The argument -0 should not result in an error message. */
 275   opterr = 0;
 276
 277   while ((c = getopt_long (argc, argv, "+", long_options, NULL)) != -1)
 278     {
 279       /* "expr -0" should interpret the -0 as an integer argument.
 280          arguments like --foo should also be interpreted as a string
 281          argument to be "evaluated".
 282        */
 283       if ('?' == c)
 284         {
 285           --optind;
 286           break;
 287         }
 288       else
 289         switch (c)
 290           {
 291           case USE_BIGNUM:
 292 #if HAVE_GMP
 293             mode = MP_ALWAYS;
 294 #else
 295             error (0, 0, _("arbitrary-precision support is not available"));
 296 #endif
 297             break;
 298
 299           case NO_USE_BIGNUM:
 300             mode = MP_NEVER;
 301             break;
 302
 303             case_GETOPT_HELP_CHAR;
 304
 305             case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
 306           }
 307     }
 308
 309   if (argc <= optind)
 310     {
 311       error (0, 0, _("missing operand"));
 312       usage (EXPR_INVALID);
 313     }
 314
 315   args = argv + optind;
 316
 317   v = eval (true);
 318   if (!nomoreargs ())
 319     syntax_error ();
 320   printv (v);
 321
 322   exit (null (v));
 323 }
 324
 325 /* Return a VALUE for I.  */
 326
 327 static VALUE *
 328 int_value (long int i)
 329 {
 330   VALUE *v = xmalloc (sizeof *v);
 331 #if HAVE_GMP
 332   if (mode == MP_ALWAYS)
 333     {
 334       /* all integer values are handled as bignums. */
 335       mpz_init_set_si (v->u.z, i);
 336       v->type = mp_integer;
 337       return v;
 338     }
 339 #endif
 340
 341   v->type = integer;
 342   v->u.i = i;
 343   return v;
 344 }
 345
 346 /* Return a VALUE for S.  */
 347
 348 static VALUE *
 349 str_value (char const *s)
 350 {
 351   VALUE *v = xmalloc (sizeof *v);
 352   v->type = string;
 353   v->u.s = xstrdup (s);
 354   return v;
 355 }
 356
 357
 358 static VALUE *
 359 substr_value (char const *s, size_t len, size_t pos, size_t nchars_wanted)
 360 {
 361   if (pos >= len)
 362     return str_value ("");
 363   else
 364     {
 365       VALUE *v = xmalloc (sizeof *v);
 366       size_t vlen = MIN (nchars_wanted, len - pos + 1);
 367       char *vlim;
 368       v->type = string;
 369       v->u.s = xmalloc (vlen + 1);
 370       vlim = mempcpy (v->u.s, s + pos, vlen);
 371       *vlim = '\0';
 372       return v;
 373     }
 374 }
 375
 376
 377 /* Free VALUE V, including structure components.  */
 378
 379 static void
 380 freev (VALUE *v)
 381 {
 382   if (v->type == string)
 383     {
 384       free (v->u.s);
 385     }
 386   else if (v->type == mp_integer)
 387     {
 388       assert (mode != MP_NEVER);
 389 #if HAVE_GMP
 390       mpz_clear (v->u.z);
 391 #endif
 392     }
 393   free (v);
 394 }
 395
 396 /* Print VALUE V.  */
 397
 398 static void
 399 printv (VALUE *v)
 400 {
 401   switch (v->type)
 402     {
 403     case integer:
 404       printf ("%ld\n", v->u.i);
 405       break;
 406     case string:
 407       puts (v->u.s);
 408       break;
 409 #if HAVE_GMP
 410     case mp_integer:
 411       mpz_out_str (stdout, 10, v->u.z);
 412       putchar ('\n');
 413       break;
 414 #endif
 415     default:
 416       abort ();
 417     }
 418
 419 }
 420
 421 /* Return true if V is a null-string or zero-number.  */
 422
 423 static bool
 424 null (VALUE *v)
 425 {
 426   switch (v->type)
 427     {
 428     case integer:
 429       return v->u.i == 0;
 430 #if HAVE_GMP
 431     case mp_integer:
 432       return mpz_sgn (v->u.z) == 0;
 433 #endif
 434     case string:
 435       {
 436         char const *cp = v->u.s;
 437         if (*cp == '\0')
 438           return true;
 439
 440         cp += (*cp == '-');
 441
 442         do
 443           {
 444             if (*cp != '0')
 445               return false;
 446           }
 447         while (*++cp);
 448
 449         return true;
 450       }
 451     default:
 452       abort ();
 453     }
 454 }
 455
 456 /* Return true if CP takes the form of an integer.  */
 457
 458 static bool
 459 looks_like_integer (char const *cp)
 460 {
 461   cp += (*cp == '-');
 462
 463   do
 464     if (! ISDIGIT (*cp))
 465       return false;
 466   while (*++cp);
 467
 468   return true;
 469 }
 470
 471 /* Coerce V to a string value (can't fail).  */
 472
 473 static void
 474 tostring (VALUE *v)
 475 {
 476   char buf[INT_BUFSIZE_BOUND (long int)];
 477
 478   switch (v->type)
 479     {
 480     case integer:
 481       snprintf (buf, sizeof buf, "%ld", v->u.i);
 482       v->u.s = xstrdup (buf);
 483       v->type = string;
 484       break;
 485 #if HAVE_GMP
 486     case mp_integer:
 487       {
 488         char *s = mpz_get_str (NULL, 10, v->u.z);
 489         if (!s)
 490           {
 491             xalloc_die ();
 492           }
 493         mpz_clear (v->u.z);
 494         v->u.s = s;
 495         v->type = string;
 496       }
 497       break;
 498 #endif
 499     case string:
 500       break;
 501     default:
 502       abort ();
 503     }
 504 }
 505
 506 /* Coerce V to an arithmetic value.
 507    Return true on success, false on failure.  */
 508
 509 static bool
 510 toarith (VALUE *v)
 511 {
 512   switch (v->type)
 513     {
 514     case integer:
 515     case mp_integer:
 516       return true;
 517
 518     case string:
 519       {
 520         long int value;
 521
 522         if (! looks_like_integer (v->u.s))
 523           return false;
 524         if (xstrtol (v->u.s, NULL, 10, &value, NULL) != LONGINT_OK)
 525           {
 526 #if HAVE_GMP
 527             if (mode != MP_NEVER)
 528               {
 529                 char *s = v->u.s;
 530                 if (mpz_init_set_str (v->u.z, s, 10))
 531                   abort ();  /* Bug in looks_like_integer, perhaps. */
 532                 v->type = mp_integer;
 533                 free (s);
 534               }
 535             else
 536               {
 537                 error (EXPR_FAILURE, ERANGE, "%s", v->u.s);
 538               }
 539 #else
 540             error (EXPR_FAILURE, ERANGE, "%s", v->u.s);
 541 #endif
 542           }
 543         else
 544           {
 545             free (v->u.s);
 546             v->u.i = value;
 547             v->type = integer;
 548           }
 549         return true;
 550       }
 551     default:
 552       abort ();
 553     }
 554 }
 555
 556 /* Extract a size_t value from a positive arithmetic value, V.
 557    The extracted value is stored in *VAL. */
 558 static bool
 559 getsize (const VALUE *v, size_t *val, bool *negative)
 560 {
 561   if (v->type == integer)
 562     {
 563       if (v->u.i < 0)
 564         {
 565           *negative = true;
 566           return false;
 567         }
 568       else
 569         {
 570           *negative = false;
 571           *val = v->u.i;
 572           return true;
 573         }
 574     }
 575   else if (v->type == mp_integer)
 576     {
 577 #if HAVE_GMP
 578       if (mpz_sgn (v->u.z) < 0)
 579         {
 580           *negative = true;
 581           return false;
 582         }
 583       else if (mpz_fits_ulong_p (v->u.z))
 584         {
 585           unsigned long ul;
 586           ul = mpz_get_ui (v->u.z);
 587           *val = ul;
 588           return true;
 589         }
 590       else
 591         {
 592           *negative = false;
 593           return false;
 594         }
 595 #else
 596       abort ();
 597 #endif
 598
 599     }
 600   else
 601     {
 602       abort ();                 /* should not pass a string. */
 603     }
 604 }
 605
 606
 607
 608 /* Return true and advance if the next token matches STR exactly.
 609    STR must not be NULL.  */
 610
 611 static bool
 612 nextarg (char const *str)
 613 {
 614   if (*args == NULL)
 615     return false;
 616   else
 617     {
 618       bool r = STREQ (*args, str);
 619       args += r;
 620       return r;
 621     }
 622 }
 623
 624 /* Return true if there no more tokens.  */
 625
 626 static bool
 627 nomoreargs (void)
 628 {
 629   return *args == 0;
 630 }
 631
 632 #ifdef EVAL_TRACE
 633 /* Print evaluation trace and args remaining.  */
 634
 635 static void
 636 trace (fxn)
 637      char *fxn;
 638 {
 639   char **a;
 640
 641   printf ("%s:", fxn);
 642   for (a = args; *a; a++)
 643     printf (" %s", *a);
 644   putchar ('\n');
 645 }
 646 #endif
 647
 648 /* Do the : operator.
 649    SV is the VALUE for the lhs (the string),
 650    PV is the VALUE for the rhs (the pattern).  */
 651
 652 static VALUE *
 653 docolon (VALUE *sv, VALUE *pv)
 654 {
 655   VALUE *v IF_LINT (= NULL);
 656   const char *errmsg;
 657   struct re_pattern_buffer re_buffer;
 658   char fastmap[UCHAR_MAX + 1];
 659   struct re_registers re_regs;
 660   regoff_t matchlen;
 661
 662   tostring (sv);
 663   tostring (pv);
 664
 665   re_regs.num_regs = 0;
 666   re_regs.start = NULL;
 667   re_regs.end = NULL;
 668
 669   re_buffer.buffer = NULL;
 670   re_buffer.allocated = 0;
 671   re_buffer.fastmap = fastmap;
 672   re_buffer.translate = NULL;
 673   re_syntax_options =
 674     RE_SYNTAX_POSIX_BASIC & ~RE_CONTEXT_INVALID_DUP & ~RE_NO_EMPTY_RANGES;
 675   errmsg = re_compile_pattern (pv->u.s, strlen (pv->u.s), &re_buffer);
 676   if (errmsg)
 677     error (EXPR_INVALID, 0, "%s", errmsg);
 678   re_buffer.newline_anchor = 0;
 679
 680   matchlen = re_match (&re_buffer, sv->u.s, strlen (sv->u.s), 0, &re_regs);
 681   if (0 <= matchlen)
 682     {
 683       /* Were \(...\) used? */
 684       if (re_buffer.re_nsub > 0)
 685         {
 686           sv->u.s[re_regs.end[1]] = '\0';
 687           v = str_value (sv->u.s + re_regs.start[1]);
 688         }
 689       else
 690         v = int_value (matchlen);
 691     }
 692   else if (matchlen == -1)
 693     {
 694       /* Match failed -- return the right kind of null.  */
 695       if (re_buffer.re_nsub > 0)
 696         v = str_value ("");
 697       else
 698         v = int_value (0);
 699     }
 700   else
 701     error (EXPR_FAILURE,
 702            (matchlen == -2 ? errno : EOVERFLOW),
 703            _("error in regular expression matcher"));
 704
 705   if (0 < re_regs.num_regs)
 706     {
 707       free (re_regs.start);
 708       free (re_regs.end);
 709     }
 710   re_buffer.fastmap = NULL;
 711   regfree (&re_buffer);
 712   return v;
 713 }
 714
 715 /* Handle bare operands and ( expr ) syntax.  */
 716
 717 static VALUE *
 718 eval7 (bool evaluate)
 719 {
 720   VALUE *v;
 721
 722 #ifdef EVAL_TRACE
 723   trace ("eval7");
 724 #endif
 725   if (nomoreargs ())
 726     syntax_error ();
 727
 728   if (nextarg ("("))
 729     {
 730       v = eval (evaluate);
 731       if (!nextarg (")"))
 732         syntax_error ();
 733       return v;
 734     }
 735
 736   if (nextarg (")"))
 737     syntax_error ();
 738
 739   return str_value (*args++);
 740 }
 741
 742 /* Handle match, substr, index, and length keywords, and quoting "+".  */
 743
 744 static VALUE *
 745 eval6 (bool evaluate)
 746 {
 747   VALUE *l;
 748   VALUE *r;
 749   VALUE *v;
 750   VALUE *i1;
 751   VALUE *i2;
 752
 753 #ifdef EVAL_TRACE
 754   trace ("eval6");
 755 #endif
 756   if (nextarg ("+"))
 757     {
 758       if (nomoreargs ())
 759         syntax_error ();
 760       return str_value (*args++);
 761     }
 762   else if (nextarg ("length"))
 763     {
 764       r = eval6 (evaluate);
 765       tostring (r);
 766       v = int_value (strlen (r->u.s));
 767       freev (r);
 768       return v;
 769     }
 770   else if (nextarg ("match"))
 771     {
 772       l = eval6 (evaluate);
 773       r = eval6 (evaluate);
 774       if (evaluate)
 775         {
 776           v = docolon (l, r);
 777           freev (l);
 778         }
 779       else
 780         v = l;
 781       freev (r);
 782       return v;
 783     }
 784   else if (nextarg ("index"))
 785     {
 786       size_t pos, len;
 787
 788       l = eval6 (evaluate);
 789       r = eval6 (evaluate);
 790       tostring (l);
 791       tostring (r);
 792       pos = strcspn (l->u.s, r->u.s);
 793       len = strlen (l->u.s);
 794       if (pos == len)
 795         {
 796           v = int_value (0);
 797         }
 798       else
 799         {
 800           if (pos < LONG_MAX)
 801             {
 802               v = int_value (pos + 1);
 803             }
 804           else
 805             {
 806 #if HAVE_GMP
 807               if (mode != MP_NEVER
 808                   && pos < ULONG_MAX)
 809                 {
 810                   v = xmalloc (sizeof *v);
 811                   mpz_init_set_ui (v->u.z, pos+1);
 812                   v->type = mp_integer;
 813                 }
 814               else
 815 #endif
 816                 {
 817                   string_too_long ();
 818                 }
 819             }
 820         }
 821       freev (l);
 822       freev (r);
 823       return v;
 824     }
 825   else if (nextarg ("substr"))
 826     {
 827       size_t llen;
 828       l = eval6 (evaluate);
 829       i1 = eval6 (evaluate);
 830       i2 = eval6 (evaluate);
 831       tostring (l);
 832       llen = strlen (l->u.s);
 833
 834       if (!toarith (i1) || !toarith (i2))
 835         v = str_value ("");
 836       else
 837         {
 838           size_t pos, len;
 839           bool negative = false;
 840
 841           if (getsize (i1, &pos, &negative))
 842             if (getsize (i2, &len, &negative))
 843               if (pos == 0 || len == 0)
 844                 v = str_value ("");
 845               else
 846                 v = substr_value (l->u.s, llen, pos-1, len);
 847             else
 848               if (negative)
 849                 v = str_value ("");
 850               else
 851                 die (EXPR_FAILURE, ERANGE, _("string offset is too large"));
 852           else
 853             if (negative)
 854               v = str_value ("");
 855             else
 856               die (EXPR_FAILURE, ERANGE, _("substring length too large"));
 857         }
 858       freev (l);
 859       freev (i1);
 860       freev (i2);
 861       return v;
 862     }
 863   else
 864     return eval7 (evaluate);
 865 }
 866
 867 /* Handle : operator (pattern matching).
 868    Calls docolon to do the real work.  */
 869
 870 static VALUE *
 871 eval5 (bool evaluate)
 872 {
 873   VALUE *l;
 874   VALUE *r;
 875   VALUE *v;
 876
 877 #ifdef EVAL_TRACE
 878   trace ("eval5");
 879 #endif
 880   l = eval6 (evaluate);
 881   while (1)
 882     {
 883       if (nextarg (":"))
 884         {
 885           r = eval6 (evaluate);
 886           if (evaluate)
 887             {
 888               v = docolon (l, r);
 889               freev (l);
 890               l = v;
 891             }
 892           freev (r);
 893         }
 894       else
 895         return l;
 896     }
 897 }
 898
 899
 900 #if HAVE_GMP
 901 static void
 902 promote (VALUE *x)
 903 {
 904   if (x->type == integer)
 905     mpz_init_set_si (x->u.z, x->u.i);
 906 }
 907 #endif
 908
 909 /* L = L * R.  Both L and R are arithmetic. */
 910 static void
 911 domult (VALUE *l, VALUE *r)
 912 {
 913   if (l->type == integer && r->type == integer)
 914     {
 915       long int val = 0;
 916       val = l->u.i * r->u.i;
 917       if (! (l->u.i == 0 || r->u.i == 0
 918              || ((val < 0) == ((l->u.i < 0) ^ (r->u.i < 0))
 919                  && val / l->u.i == r->u.i)))
 920         {
 921           /* Result would (did) overflow.  Handle with MP if available. */
 922           if (mode != MP_NEVER)
 923             {
 924 #if HAVE_GMP
 925               mpz_init_set_si (l->u.z, l->u.i);
 926               mpz_mul_si (l->u.z, l->u.z, r->u.i); /* L*=R */
 927               l->type = mp_integer;
 928 #endif
 929             }
 930           else
 931             {
 932               integer_overflow ('*');
 933             }
 934         }
 935       else
 936         {
 937           l->u.i = val;
 938         }
 939     }
 940   else
 941     {
 942       /* At least one operand is already mp_integer, so promote the other. */
 943 #if HAVE_GMP
 944       /* We could use mpz_mul_si here if R is not already mp_integer,
 945          but for the moment we'll try to minimise code paths. */
 946       if (l->type == integer)
 947         mpz_init_set_si (l->u.z, l->u.i);
 948       if (r->type == integer)
 949         mpz_init_set_si (r->u.z, r->u.i);
 950       l->type = r->type = mp_integer;
 951       mpz_mul (l->u.z, l->u.z, r->u.z); /* L*=R */
 952 #else
 953       abort ();
 954 #endif
 955     }
 956 }
 957
 958 /* L = L / R or (if WANT_MODULUS) L = L % R */
 959 static void
 960 dodivide (VALUE *l, VALUE *r, bool want_modulus)
 961 {
 962   if (r->type == integer && r->u.i == 0)
 963     error (EXPR_INVALID, 0, _("division by zero"));
 964 #if HAVE_GMP
 965   if (r->type == mp_integer && mpz_sgn (r->u.z) == 0)
 966     error (EXPR_INVALID, 0, _("division by zero"));
 967 #endif
 968   if (l->type == integer && r->type == integer)
 969     {
 970       if (l->u.i < - INT_MAX && r->u.i == -1)
 971         {
 972           /* Some x86-style hosts raise an exception for
 973              INT_MIN / -1 and INT_MIN % -1, so handle these
 974              problematic cases specially.  */
 975           if (want_modulus)
 976             {
 977               /* X mod -1 is zero for all negative X.
 978                  Although strictly this is implementation-defined,
 979                  we don't want to coredump, so we avoid the calculation. */
 980               l->u.i = 0;
 981               return;
 982             }
 983           else
 984             {
 985               if (mode != MP_NEVER)
 986                 {
 987 #if HAVE_GMP
 988                   /* Handle the case by promoting. */
 989                   mpz_init_set_si (l->u.z, l->u.i);
 990                   l->type = mp_integer;
 991 #endif
 992                 }
 993               else
 994                 {
 995                   integer_overflow ('/');
 996                 }
 997             }
 998         }
 999       else
1000         {
1001           l->u.i = want_modulus ? l->u.i % r->u.i : l->u.i / r->u.i;
1002           return;
1003         }
1004     }
1005   /* If we get to here, at least one operand is mp_integer
1006      and R is not 0. */
1007 #if HAVE_GMP
1008   {
1009     int sign_l, sign_r;
1010     promote (l);
1011     promote (r);
1012     sign_l = mpz_sgn (l->u.z);
1013     sign_r = mpz_sgn (r->u.z);
1014
1015     if (!want_modulus)
1016       {
1017         if (!sign_l)
1018           {
1019             mpz_set_si (l->u.z, 0);
1020           }
1021         else if (sign_l < 0 || sign_r < 0)
1022           {
1023             /* At least one operand is negative.  For integer arithmetic,
1024                it's platform-dependent if the operation rounds up or down.
1025                We mirror what the implementation does. */
1026             switch ((3*sign_l) / (2*sign_r))
1027               {
1028               case  2:          /* round toward +inf. */
1029               case -1:          /* round toward +inf. */
1030                 mpz_cdiv_q (l->u.z, l->u.z, r->u.z);
1031                 break;
1032               case -2:          /* round toward -inf. */
1033               case  1:          /* round toward -inf */
1034                 mpz_fdiv_q (l->u.z, l->u.z, r->u.z);
1035                 break;
1036               default:
1037                 abort ();
1038               }
1039           }
1040         else
1041           {
1042             /* Both operands positive.  Round toward -inf. */
1043             mpz_fdiv_q (l->u.z, l->u.z, r->u.z);
1044           }
1045       }
1046     else
1047       {
1048         mpz_mod (l->u.z, l->u.z, r->u.z); /* L = L % R */
1049
1050         /* If either operand is negative, it's platform-dependent if
1051            the remainer is positive or negative.  We mirror what the
1052            implementation does. */
1053         if (sign_l % sign_r < 0)
1054           mpz_neg (l->u.z, l->u.z); /* L = (-L) */
1055       }
1056   }
1057 #else
1058   abort ();
1059 #endif
1060 }
1061
1062
1063 /* Handle *, /, % operators.  */
1064
1065 static VALUE *
1066 eval4 (bool evaluate)
1067 {
1068   VALUE *l;
1069   VALUE *r;
1070   enum { multiply, divide, mod } fxn;
1071
1072 #ifdef EVAL_TRACE
1073   trace ("eval4");
1074 #endif
1075   l = eval5 (evaluate);
1076   while (1)
1077     {
1078       if (nextarg ("*"))
1079         fxn = multiply;
1080       else if (nextarg ("/"))
1081         fxn = divide;
1082       else if (nextarg ("%"))
1083         fxn = mod;
1084       else
1085         return l;
1086       r = eval5 (evaluate);
1087       if (evaluate)
1088         {
1089           if (!toarith (l) || !toarith (r))
1090             error (EXPR_INVALID, 0, _("non-numeric argument"));
1091           switch (fxn)
1092             {
1093             case multiply:
1094               domult (l, r);
1095               break;
1096             case divide:
1097             case mod:
1098               dodivide (l, r, fxn==mod);
1099               break;
1100             }
1101         }
1102       freev (r);
1103     }
1104 }
1105
1106 /* L = L + R, or L = L - R */
1107 static void
1108 doadd (VALUE *l, VALUE *r, bool add)
1109 {
1110   long int val = 0;
1111
1112   if (!toarith (l) || !toarith (r))
1113     error (EXPR_INVALID, 0, _("non-numeric argument"));
1114   if (l->type == integer && r->type == integer)
1115     {
1116       if (add)
1117         {
1118           val = l->u.i + r->u.i;
1119           if ((val < l->u.i) == (r->u.i < 0))
1120             {
1121               l->u.i = val;
1122               return;
1123             }
1124         }
1125       else
1126         {
1127           val = l->u.i - r->u.i;
1128           if ((l->u.i < val) == (r->u.i < 0))
1129             {
1130               l->u.i = val;
1131               return;
1132             }
1133         }
1134     }
1135   /* If we get to here, either the operation overflowed or at least
1136      one operand is an mp_integer. */
1137   if (mode != MP_NEVER)
1138     {
1139 #if HAVE_GMP
1140       promote (l);
1141       promote (r);
1142       if (add)
1143         mpz_add (l->u.z, l->u.z, r->u.z);
1144       else
1145         mpz_sub (l->u.z, l->u.z, r->u.z);
1146 #endif
1147     }
1148   else
1149     {
1150       integer_overflow ('-');
1151     }
1152 }
1153
1154
1155
1156 /* Handle +, - operators.  */
1157
1158 static VALUE *
1159 eval3 (bool evaluate)
1160 {
1161   VALUE *l;
1162   VALUE *r;
1163   bool add;
1164
1165 #ifdef EVAL_TRACE
1166   trace ("eval3");
1167 #endif
1168   l = eval4 (evaluate);
1169   while (1)
1170     {
1171       if (nextarg ("+"))
1172         add = true;
1173       else if (nextarg ("-"))
1174         add = false;
1175       else
1176         return l;
1177       r = eval4 (evaluate);
1178       if (evaluate)
1179         {
1180           doadd (l, r, add);
1181         }
1182       freev (r);
1183     }
1184 }
1185
1186 /* Handle comparisons.  */
1187
1188 static VALUE *
1189 eval2 (bool evaluate)
1190 {
1191   VALUE *l;
1192
1193 #ifdef EVAL_TRACE
1194   trace ("eval2");
1195 #endif
1196   l = eval3 (evaluate);
1197   while (1)
1198     {
1199       VALUE *r;
1200       enum
1201         {
1202           less_than, less_equal, equal, not_equal, greater_equal, greater_than
1203         } fxn;
1204       bool val = false;
1205
1206       if (nextarg ("<"))
1207         fxn = less_than;
1208       else if (nextarg ("<="))
1209         fxn = less_equal;
1210       else if (nextarg ("=") || nextarg ("=="))
1211         fxn = equal;
1212       else if (nextarg ("!="))
1213         fxn = not_equal;
1214       else if (nextarg (">="))
1215         fxn = greater_equal;
1216       else if (nextarg (">"))
1217         fxn = greater_than;
1218       else
1219         return l;
1220       r = eval3 (evaluate);
1221
1222       if (evaluate)
1223         {
1224           int cmp;
1225           tostring (l);
1226           tostring (r);
1227
1228           if (looks_like_integer (l->u.s) && looks_like_integer (r->u.s))
1229             cmp = strintcmp (l->u.s, r->u.s);
1230           else
1231             {
1232               errno = 0;
1233               cmp = strcoll (l->u.s, r->u.s);
1234
1235               if (errno)
1236                 {
1237                   error (0, errno, _("string comparison failed"));
1238                   error (0, 0, _("set LC_ALL='C' to work around the problem"));
1239                   error (EXPR_INVALID, 0,
1240                          _("the strings compared were %s and %s"),
1241                          quotearg_n_style (0, locale_quoting_style, l->u.s),
1242                          quotearg_n_style (1, locale_quoting_style, r->u.s));
1243                 }
1244             }
1245
1246           switch (fxn)
1247             {
1248             case less_than:     val = (cmp <  0); break;
1249             case less_equal:    val = (cmp <= 0); break;
1250             case equal:         val = (cmp == 0); break;
1251             case not_equal:     val = (cmp != 0); break;
1252             case greater_equal: val = (cmp >= 0); break;
1253             case greater_than:  val = (cmp >  0); break;
1254             default: abort ();
1255             }
1256         }
1257
1258       freev (l);
1259       freev (r);
1260       l = int_value (val);
1261     }
1262 }
1263
1264 /* Handle &.  */
1265
1266 static VALUE *
1267 eval1 (bool evaluate)
1268 {
1269   VALUE *l;
1270   VALUE *r;
1271
1272 #ifdef EVAL_TRACE
1273   trace ("eval1");
1274 #endif
1275   l = eval2 (evaluate);
1276   while (1)
1277     {
1278       if (nextarg ("&"))
1279         {
1280           r = eval2 (evaluate & ~ null (l));
1281           if (null (l) || null (r))
1282             {
1283               freev (l);
1284               freev (r);
1285               l = int_value (0);
1286             }
1287           else
1288             freev (r);
1289         }
1290       else
1291         return l;
1292     }
1293 }
1294
1295 /* Handle |.  */
1296
1297 static VALUE *
1298 eval (bool evaluate)
1299 {
1300   VALUE *l;
1301   VALUE *r;
1302
1303 #ifdef EVAL_TRACE
1304   trace ("eval");
1305 #endif
1306   l = eval1 (evaluate);
1307   while (1)
1308     {
1309       if (nextarg ("|"))
1310         {
1311           r = eval1 (evaluate & null (l));
1312           if (null (l))
1313             {
1314               freev (l);
1315               l = r;
1316               if (null (l))
1317                 {
1318                   freev (l);
1319                   l = int_value (0);
1320                 }
1321             }
1322           else
1323             freev (r);
1324         }
1325       else
1326         return l;
1327     }
1328 }