1 /* expr -- evaluate expressions.
2 Copyright (C) 86, 1991-1997, 1999-2008 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
17 /* Author: Mike Parker.
18 Modified for arbitrary-precision calculation by James Youngman.
20 This program evaluates expressions. Each token (operator, operand,
21 parenthesis) of the expression must be a seperate argument. The
22 parser used is a reasonably general one, though any incarnation of
23 it is language-specific. It is especially nice for expressions.
25 No parse tree is needed; a new node is evaluated immediately.
26 One function can handle multiple operators all of equal precedence,
27 provided they all associate ((x op x) op x).
29 Define EVAL_TRACE to print an evaluation trace. */
33 #include <sys/types.h>
43 #include "strnumcmp.h"
46 /* The official name of this program (e.g., no `g' prefix). */
47 #define PROGRAM_NAME "expr"
49 #define AUTHORS proper_name ("Mike Parker"), proper_name ("James Youngman")
54 /* Invalid expression: e.g., its form does not conform to the
55 grammar for expressions. Our grammar is an extension of the
59 /* An internal error occurred, e.g., arithmetic overflow, storage
64 /* The kinds of value we can have.
65 In the comments below, a variable is described as "arithmetic" if
66 it is either integer or mp_integer. Variables are of type mp_integer
67 only if GNU MP is available, but the type designator is always defined. */
74 typedef enum valtype TYPE;
79 TYPE type; /* Which kind. */
81 { /* The value itself. */
82 /* We could use intmax_t but that would integrate less well with GMP,
83 since GMP has mpz_set_si but no intmax_t equivalent. */
91 typedef struct valinfo VALUE;
93 /* The arguments given to the program, minus the program name. */
96 static VALUE *eval (bool);
97 static bool nomoreargs (void);
98 static bool null (VALUE *v);
99 static void printv (VALUE *v);
101 /* Arithmetic is done in one of three modes.
103 The --bignum option forces all arithmetic to use bignums other than
104 string indexing (mode==MP_ALWAYS). The --no-bignum option forces
105 all arithmetic to use native types rather than bignums
108 The default mode is MP_AUTO if GMP is available and MP_NEVER if
109 not. Most functions will process a bignum if one is found, but
110 will not convert a native integer to a string if the mode is
114 MP_NEVER, /* Never use bignums */
116 MP_ALWAYS, /* Always use bignums. */
117 MP_AUTO, /* Switch if result would otherwise overflow */
120 static enum arithmetic_mode mode =
132 if (status != EXIT_SUCCESS)
133 fprintf (stderr, _("Try `%s --help' for more information.\n"),
138 Usage: %s EXPRESSION\n\
141 program_name, program_name);
144 --bignum always use arbitrary-precision arithmetic\n\
145 --no-bignum always use single-precision arithmetic\n"),
147 fputs (HELP_OPTION_DESCRIPTION, stdout);
148 fputs (VERSION_OPTION_DESCRIPTION, stdout);
151 Print the value of EXPRESSION to standard output. A blank line below\n\
152 separates increasing precedence groups. EXPRESSION may be:\n\
154 ARG1 | ARG2 ARG1 if it is neither null nor 0, otherwise ARG2\n\
156 ARG1 & ARG2 ARG1 if neither argument is null or 0, otherwise 0\n\
160 ARG1 < ARG2 ARG1 is less than ARG2\n\
161 ARG1 <= ARG2 ARG1 is less than or equal to ARG2\n\
162 ARG1 = ARG2 ARG1 is equal to ARG2\n\
163 ARG1 != ARG2 ARG1 is unequal to ARG2\n\
164 ARG1 >= ARG2 ARG1 is greater than or equal to ARG2\n\
165 ARG1 > ARG2 ARG1 is greater than ARG2\n\
169 ARG1 + ARG2 arithmetic sum of ARG1 and ARG2\n\
170 ARG1 - ARG2 arithmetic difference of ARG1 and ARG2\n\
172 /* Tell xgettext that the "% A" below is not a printf-style
173 format string: xgettext:no-c-format */
176 ARG1 * ARG2 arithmetic product of ARG1 and ARG2\n\
177 ARG1 / ARG2 arithmetic quotient of ARG1 divided by ARG2\n\
178 ARG1 % ARG2 arithmetic remainder of ARG1 divided by ARG2\n\
182 STRING : REGEXP anchored pattern match of REGEXP in STRING\n\
184 match STRING REGEXP same as STRING : REGEXP\n\
185 substr STRING POS LENGTH substring of STRING, POS counted from 1\n\
186 index STRING CHARS index in STRING where any CHARS is found, or 0\n\
187 length STRING length of STRING\n\
190 + TOKEN interpret TOKEN as a string, even if it is a\n\
191 keyword like `match' or an operator like `/'\n\
193 ( EXPRESSION ) value of EXPRESSION\n\
197 Beware that many operators need to be escaped or quoted for shells.\n\
198 Comparisons are arithmetic if both ARGs are numbers, else lexicographical.\n\
199 Pattern matches return the string matched between \\( and \\) or null; if\n\
200 \\( and \\) are not used, they return the number of characters matched or 0.\n\
204 Exit status is 0 if EXPRESSION is neither null nor 0, 1 if EXPRESSION is null\n\
205 or 0, 2 if EXPRESSION is syntactically invalid, and 3 if an error occurred.\n\
207 emit_bug_reporting_address ();
212 /* Report a syntax error and exit. */
216 error (EXPR_INVALID, 0, _("syntax error"));
219 /* Report an integer overflow for operation OP and exit. */
221 integer_overflow (char op)
223 error (EXPR_FAILURE, 0,
224 _("arithmetic operation %c produced an out of range value, "
225 "but arbitrary-precision arithmetic is not available"), op);
228 static void die (int exit_status, int errno_val, char const *msg)
231 die (int exit_status, int errno_val, char const *msg)
233 assert (exit_status != 0);
234 error (exit_status, errno_val, "%s", msg);
235 abort (); /* notreached */
239 string_too_long (void)
241 die (EXPR_FAILURE, ERANGE, _("string too long"));
246 USE_BIGNUM = CHAR_MAX + 1,
250 static struct option const long_options[] =
252 {"bignum", no_argument, NULL, USE_BIGNUM},
253 {"no-bignum", no_argument, NULL, NO_USE_BIGNUM},
254 {GETOPT_HELP_OPTION_DECL},
255 {GETOPT_VERSION_OPTION_DECL},
260 main (int argc, char **argv)
265 initialize_main (&argc, &argv);
266 set_program_name (argv[0]);
267 setlocale (LC_ALL, "");
268 bindtextdomain (PACKAGE, LOCALEDIR);
269 textdomain (PACKAGE);
271 initialize_exit_failure (EXPR_FAILURE);
272 atexit (close_stdout);
274 /* The argument -0 should not result in an error message. */
277 while ((c = getopt_long (argc, argv, "+", long_options, NULL)) != -1)
279 /* "expr -0" should interpret the -0 as an integer argument.
280 arguments like --foo should also be interpreted as a string
281 argument to be "evaluated".
295 error (EXPR_FAILURE, 0,
296 _("arbitrary-precision support is not available"));
304 case_GETOPT_HELP_CHAR;
306 case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
312 error (0, 0, _("missing operand"));
313 usage (EXPR_INVALID);
316 args = argv + optind;
326 /* Return a VALUE for I. */
329 int_value (long int i)
331 VALUE *v = xmalloc (sizeof *v);
333 if (mode == MP_ALWAYS)
335 /* all integer values are handled as bignums. */
336 mpz_init_set_si (v->u.z, i);
337 v->type = mp_integer;
347 /* Return a VALUE for S. */
350 str_value (char const *s)
352 VALUE *v = xmalloc (sizeof *v);
354 v->u.s = xstrdup (s);
360 substr_value (char const *s, size_t len, size_t pos, size_t nchars_wanted)
363 return str_value ("");
366 VALUE *v = xmalloc (sizeof *v);
367 size_t vlen = MIN (nchars_wanted, len - pos + 1);
370 v->u.s = xmalloc (vlen + 1);
371 vlim = mempcpy (v->u.s, s + pos, vlen);
378 /* Free VALUE V, including structure components. */
383 if (v->type == string)
387 else if (v->type == mp_integer)
389 assert (mode != MP_NEVER);
405 printf ("%ld\n", v->u.i);
412 mpz_out_str (stdout, 10, v->u.z);
422 /* Return true if V is a null-string or zero-number. */
433 return mpz_sgn (v->u.z) == 0;
437 char const *cp = v->u.s;
457 /* Return true if CP takes the form of an integer. */
460 looks_like_integer (char const *cp)
472 /* Coerce V to a string value (can't fail). */
477 char buf[INT_BUFSIZE_BOUND (long int)];
482 snprintf (buf, sizeof buf, "%ld", v->u.i);
483 v->u.s = xstrdup (buf);
489 char *s = mpz_get_str (NULL, 10, v->u.z);
507 /* Coerce V to an arithmetic value.
508 Return true on success, false on failure. */
523 if (! looks_like_integer (v->u.s))
525 if (xstrtol (v->u.s, NULL, 10, &value, NULL) != LONGINT_OK)
528 if (mode != MP_NEVER)
531 if (mpz_init_set_str (v->u.z, s, 10))
532 abort (); /* Bug in looks_like_integer, perhaps. */
533 v->type = mp_integer;
538 error (EXPR_FAILURE, ERANGE, "%s", v->u.s);
541 error (EXPR_FAILURE, ERANGE, "%s", v->u.s);
557 /* Extract a size_t value from a positive arithmetic value, V.
558 The extracted value is stored in *VAL. */
560 getsize (const VALUE *v, size_t *val, bool *negative)
562 if (v->type == integer)
576 else if (v->type == mp_integer)
579 if (mpz_sgn (v->u.z) < 0)
584 else if (mpz_fits_ulong_p (v->u.z))
587 ul = mpz_get_ui (v->u.z);
603 abort (); /* should not pass a string. */
609 /* Return true and advance if the next token matches STR exactly.
610 STR must not be NULL. */
613 nextarg (char const *str)
619 bool r = STREQ (*args, str);
625 /* Return true if there no more tokens. */
634 /* Print evaluation trace and args remaining. */
643 for (a = args; *a; a++)
649 /* Do the : operator.
650 SV is the VALUE for the lhs (the string),
651 PV is the VALUE for the rhs (the pattern). */
654 docolon (VALUE *sv, VALUE *pv)
656 VALUE *v IF_LINT (= NULL);
658 struct re_pattern_buffer re_buffer;
659 char fastmap[UCHAR_MAX + 1];
660 struct re_registers re_regs;
666 re_regs.num_regs = 0;
667 re_regs.start = NULL;
670 re_buffer.buffer = NULL;
671 re_buffer.allocated = 0;
672 re_buffer.fastmap = fastmap;
673 re_buffer.translate = NULL;
675 RE_SYNTAX_POSIX_BASIC & ~RE_CONTEXT_INVALID_DUP & ~RE_NO_EMPTY_RANGES;
676 errmsg = re_compile_pattern (pv->u.s, strlen (pv->u.s), &re_buffer);
678 error (EXPR_INVALID, 0, "%s", errmsg);
679 re_buffer.newline_anchor = 0;
681 matchlen = re_match (&re_buffer, sv->u.s, strlen (sv->u.s), 0, &re_regs);
684 /* Were \(...\) used? */
685 if (re_buffer.re_nsub > 0)
687 sv->u.s[re_regs.end[1]] = '\0';
688 v = str_value (sv->u.s + re_regs.start[1]);
691 v = int_value (matchlen);
693 else if (matchlen == -1)
695 /* Match failed -- return the right kind of null. */
696 if (re_buffer.re_nsub > 0)
703 (matchlen == -2 ? errno : EOVERFLOW),
704 _("error in regular expression matcher"));
706 if (0 < re_regs.num_regs)
708 free (re_regs.start);
711 re_buffer.fastmap = NULL;
712 regfree (&re_buffer);
716 /* Handle bare operands and ( expr ) syntax. */
719 eval7 (bool evaluate)
740 return str_value (*args++);
743 /* Handle match, substr, index, and length keywords, and quoting "+". */
746 eval6 (bool evaluate)
761 return str_value (*args++);
763 else if (nextarg ("length"))
765 r = eval6 (evaluate);
767 v = int_value (strlen (r->u.s));
771 else if (nextarg ("match"))
773 l = eval6 (evaluate);
774 r = eval6 (evaluate);
785 else if (nextarg ("index"))
789 l = eval6 (evaluate);
790 r = eval6 (evaluate);
793 pos = strcspn (l->u.s, r->u.s);
794 len = strlen (l->u.s);
803 v = int_value (pos + 1);
811 v = xmalloc (sizeof *v);
812 mpz_init_set_ui (v->u.z, pos+1);
813 v->type = mp_integer;
826 else if (nextarg ("substr"))
829 l = eval6 (evaluate);
830 i1 = eval6 (evaluate);
831 i2 = eval6 (evaluate);
833 llen = strlen (l->u.s);
835 if (!toarith (i1) || !toarith (i2))
840 bool negative = false;
842 if (getsize (i1, &pos, &negative))
843 if (getsize (i2, &len, &negative))
844 if (pos == 0 || len == 0)
847 v = substr_value (l->u.s, llen, pos-1, len);
852 die (EXPR_FAILURE, ERANGE, _("string offset is too large"));
857 die (EXPR_FAILURE, ERANGE, _("substring length too large"));
865 return eval7 (evaluate);
868 /* Handle : operator (pattern matching).
869 Calls docolon to do the real work. */
872 eval5 (bool evaluate)
881 l = eval6 (evaluate);
886 r = eval6 (evaluate);
905 if (x->type == integer)
906 mpz_init_set_si (x->u.z, x->u.i);
910 /* L = L * R. Both L and R are arithmetic. */
912 domult (VALUE *l, VALUE *r)
914 if (l->type == integer && r->type == integer)
917 val = l->u.i * r->u.i;
918 if (! (l->u.i == 0 || r->u.i == 0
919 || ((val < 0) == ((l->u.i < 0) ^ (r->u.i < 0))
920 && val / l->u.i == r->u.i)))
922 /* Result would (did) overflow. Handle with MP if available. */
923 if (mode != MP_NEVER)
926 mpz_init_set_si (l->u.z, l->u.i);
927 mpz_mul_si (l->u.z, l->u.z, r->u.i); /* L*=R */
928 l->type = mp_integer;
933 integer_overflow ('*');
943 /* At least one operand is already mp_integer, so promote the other. */
945 /* We could use mpz_mul_si here if R is not already mp_integer,
946 but for the moment we'll try to minimise code paths. */
947 if (l->type == integer)
948 mpz_init_set_si (l->u.z, l->u.i);
949 if (r->type == integer)
950 mpz_init_set_si (r->u.z, r->u.i);
951 l->type = r->type = mp_integer;
952 mpz_mul (l->u.z, l->u.z, r->u.z); /* L*=R */
959 /* L = L / R or (if WANT_MODULUS) L = L % R */
961 dodivide (VALUE *l, VALUE *r, bool want_modulus)
963 if (r->type == integer && r->u.i == 0)
964 error (EXPR_INVALID, 0, _("division by zero"));
966 if (r->type == mp_integer && mpz_sgn (r->u.z) == 0)
967 error (EXPR_INVALID, 0, _("division by zero"));
969 if (l->type == integer && r->type == integer)
971 if (l->u.i < - INT_MAX && r->u.i == -1)
973 /* Some x86-style hosts raise an exception for
974 INT_MIN / -1 and INT_MIN % -1, so handle these
975 problematic cases specially. */
978 /* X mod -1 is zero for all negative X.
979 Although strictly this is implementation-defined,
980 we don't want to coredump, so we avoid the calculation. */
986 if (mode != MP_NEVER)
989 /* Handle the case by promoting. */
990 mpz_init_set_si (l->u.z, l->u.i);
991 l->type = mp_integer;
996 integer_overflow ('/');
1002 l->u.i = want_modulus ? l->u.i % r->u.i : l->u.i / r->u.i;
1006 /* If we get to here, at least one operand is mp_integer
1013 sign_l = mpz_sgn (l->u.z);
1014 sign_r = mpz_sgn (r->u.z);
1020 mpz_set_si (l->u.z, 0);
1022 else if (sign_l < 0 || sign_r < 0)
1024 /* At least one operand is negative. For integer arithmetic,
1025 it's platform-dependent if the operation rounds up or down.
1026 We mirror what the implementation does. */
1027 switch ((3*sign_l) / (2*sign_r))
1029 case 2: /* round toward +inf. */
1030 case -1: /* round toward +inf. */
1031 mpz_cdiv_q (l->u.z, l->u.z, r->u.z);
1033 case -2: /* round toward -inf. */
1034 case 1: /* round toward -inf */
1035 mpz_fdiv_q (l->u.z, l->u.z, r->u.z);
1043 /* Both operands positive. Round toward -inf. */
1044 mpz_fdiv_q (l->u.z, l->u.z, r->u.z);
1049 mpz_mod (l->u.z, l->u.z, r->u.z); /* L = L % R */
1051 /* If either operand is negative, it's platform-dependent if
1052 the remainer is positive or negative. We mirror what the
1053 implementation does. */
1054 if (sign_l % sign_r < 0)
1055 mpz_neg (l->u.z, l->u.z); /* L = (-L) */
1064 /* Handle *, /, % operators. */
1067 eval4 (bool evaluate)
1071 enum { multiply, divide, mod } fxn;
1076 l = eval5 (evaluate);
1081 else if (nextarg ("/"))
1083 else if (nextarg ("%"))
1087 r = eval5 (evaluate);
1090 if (!toarith (l) || !toarith (r))
1091 error (EXPR_INVALID, 0, _("non-numeric argument"));
1099 dodivide (l, r, fxn==mod);
1107 /* L = L + R, or L = L - R */
1109 doadd (VALUE *l, VALUE *r, bool add)
1113 if (!toarith (l) || !toarith (r))
1114 error (EXPR_INVALID, 0, _("non-numeric argument"));
1115 if (l->type == integer && r->type == integer)
1119 val = l->u.i + r->u.i;
1120 if ((val < l->u.i) == (r->u.i < 0))
1128 val = l->u.i - r->u.i;
1129 if ((l->u.i < val) == (r->u.i < 0))
1136 /* If we get to here, either the operation overflowed or at least
1137 one operand is an mp_integer. */
1138 if (mode != MP_NEVER)
1144 mpz_add (l->u.z, l->u.z, r->u.z);
1146 mpz_sub (l->u.z, l->u.z, r->u.z);
1151 integer_overflow ('-');
1157 /* Handle +, - operators. */
1160 eval3 (bool evaluate)
1169 l = eval4 (evaluate);
1174 else if (nextarg ("-"))
1178 r = eval4 (evaluate);
1187 /* Handle comparisons. */
1190 eval2 (bool evaluate)
1197 l = eval3 (evaluate);
1203 less_than, less_equal, equal, not_equal, greater_equal, greater_than
1209 else if (nextarg ("<="))
1211 else if (nextarg ("=") || nextarg ("=="))
1213 else if (nextarg ("!="))
1215 else if (nextarg (">="))
1216 fxn = greater_equal;
1217 else if (nextarg (">"))
1221 r = eval3 (evaluate);
1229 if (looks_like_integer (l->u.s) && looks_like_integer (r->u.s))
1230 cmp = strintcmp (l->u.s, r->u.s);
1234 cmp = strcoll (l->u.s, r->u.s);
1238 error (0, errno, _("string comparison failed"));
1239 error (0, 0, _("set LC_ALL='C' to work around the problem"));
1240 error (EXPR_INVALID, 0,
1241 _("the strings compared were %s and %s"),
1242 quotearg_n_style (0, locale_quoting_style, l->u.s),
1243 quotearg_n_style (1, locale_quoting_style, r->u.s));
1249 case less_than: val = (cmp < 0); break;
1250 case less_equal: val = (cmp <= 0); break;
1251 case equal: val = (cmp == 0); break;
1252 case not_equal: val = (cmp != 0); break;
1253 case greater_equal: val = (cmp >= 0); break;
1254 case greater_than: val = (cmp > 0); break;
1261 l = int_value (val);
1268 eval1 (bool evaluate)
1276 l = eval2 (evaluate);
1281 r = eval2 (evaluate & ~ null (l));
1282 if (null (l) || null (r))
1299 eval (bool evaluate)
1307 l = eval1 (evaluate);
1312 r = eval1 (evaluate & null (l));