Bump to version 1.22.1
[platform/upstream/busybox.git] / editors / awk.c
index d69720d..d0e3781 100644 (file)
@@ -7,12 +7,45 @@
  * Licensed under GPLv2 or later, see file LICENSE in this source tree.
  */
 
+//config:config AWK
+//config:      bool "awk"
+//config:      default y
+//config:      help
+//config:        Awk is used as a pattern scanning and processing language. This is
+//config:        the BusyBox implementation of that programming language.
+//config:
+//config:config FEATURE_AWK_LIBM
+//config:      bool "Enable math functions (requires libm)"
+//config:      default y
+//config:      depends on AWK
+//config:      help
+//config:        Enable math functions of the Awk programming language.
+//config:        NOTE: This will require libm to be present for linking.
+//config:
+//config:config FEATURE_AWK_GNU_EXTENSIONS
+//config:      bool "Enable a few GNU extensions"
+//config:      default y
+//config:      depends on AWK
+//config:      help
+//config:        Enable a few features from gawk:
+//config:        * command line option -e AWK_PROGRAM
+//config:        * simultaneous use of -f and -e on the command line.
+//config:          This enables the use of awk library files.
+//config:          Ex: awk -f mylib.awk -e '{print myfunction($1);}' ...
+
+//applet:IF_AWK(APPLET_NOEXEC(awk, awk, BB_DIR_USR_BIN, BB_SUID_DROP, awk))
+
+//kbuild:lib-$(CONFIG_AWK) += awk.o
+
 //usage:#define awk_trivial_usage
 //usage:       "[OPTIONS] [AWK_PROGRAM] [FILE]..."
 //usage:#define awk_full_usage "\n\n"
 //usage:       "       -v VAR=VAL      Set variable"
 //usage:     "\n       -F SEP          Use SEP as field separator"
 //usage:     "\n       -f FILE         Read program from FILE"
+//usage:       IF_FEATURE_AWK_GNU_EXTENSIONS(
+//usage:     "\n       -e AWK_PROGRAM"
+//usage:       )
 
 #include "libbb.h"
 #include "xregex.h"
 #endif
 
 
+#define OPTSTR_AWK \
+       "F:v:f:" \
+       IF_FEATURE_AWK_GNU_EXTENSIONS("e:") \
+       "W:"
+#define OPTCOMPLSTR_AWK \
+       "v::f::" \
+       IF_FEATURE_AWK_GNU_EXTENSIONS("e::")
+enum {
+       OPTBIT_F,       /* define field separator */
+       OPTBIT_v,       /* define variable */
+       OPTBIT_f,       /* pull in awk program from file */
+       IF_FEATURE_AWK_GNU_EXTENSIONS(OPTBIT_e,) /* -e AWK_PROGRAM */
+       OPTBIT_W,       /* -W ignored */
+       OPT_F = 1 << OPTBIT_F,
+       OPT_v = 1 << OPTBIT_v,
+       OPT_f = 1 << OPTBIT_f,
+       OPT_e = IF_FEATURE_AWK_GNU_EXTENSIONS((1 << OPTBIT_e)) + 0,
+       OPT_W = 1 << OPTBIT_W
+};
 
 #define        MAXVARFMT       240
 #define        MINNVBLOCK      64
@@ -155,7 +207,7 @@ typedef struct tsplitter_s {
 
 /* simple token classes */
 /* Order and hex values are very important!!!  See next_token() */
-#define        TC_SEQSTART      1                              /* ( */
+#define        TC_SEQSTART     1                       /* ( */
 #define        TC_SEQTERM      (1 << 1)                /* ) */
 #define        TC_REGEXP       (1 << 2)                /* /.../ */
 #define        TC_OUTRDR       (1 << 3)                /* | > >> */
@@ -190,7 +242,7 @@ typedef struct tsplitter_s {
 
 /* combined token classes */
 #define        TC_BINOP   (TC_BINOPX | TC_COMMA | TC_PIPE | TC_IN)
-#define        TC_UNARYOP (TC_UOPPRE | TC_UOPPOST)
+//#define      TC_UNARYOP (TC_UOPPRE | TC_UOPPOST)
 #define        TC_OPERAND (TC_VARIABLE | TC_ARRAY | TC_FUNCTION \
                    | TC_BUILTIN | TC_GETLINE | TC_SEQSTART | TC_STRING | TC_NUMBER)
 
@@ -1812,6 +1864,18 @@ static void handle_special(var *v)
                is_f0_split = FALSE;
 
        } else if (v == intvar[FS]) {
+               /*
+                * The POSIX-2008 standard says that changing FS should have no effect on the
+                * current input line, but only on the next one. The language is:
+                *
+                * > Before the first reference to a field in the record is evaluated, the record
+                * > shall be split into fields, according to the rules in Regular Expressions,
+                * > using the value of FS that was current at the time the record was read.
+                *
+                * So, split up current line before assignment to FS:
+                */
+               split_f0();
+
                mk_splitter(getvar_s(v), &fsplitter);
 
        } else if (v == intvar[RS]) {
@@ -2003,8 +2067,8 @@ static int fmt_num(char *b, int size, const char *format, double n, int int_as_i
        char c;
        const char *s = format;
 
-       if (int_as_int && n == (int)n) {
-               r = snprintf(b, size, "%d", (int)n);
+       if (int_as_int && n == (long long)n) {
+               r = snprintf(b, size, "%lld", (long long)n);
        } else {
                do { c = *s; } while (c && *++s);
                if (strchr("diouxX", c)) {
@@ -2649,7 +2713,8 @@ static var *evaluate(node *op, var *res)
                        var *vbeg, *v;
                        const char *sv_progname;
 
-                       if (!op->r.f->body.first)
+                       /* The body might be empty, still has to eval the args */
+                       if (!op->r.n->info && !op->r.f->body.first)
                                syntax_error(EMSG_UNDEF_FUNC);
 
                        vbeg = v = nvalloc(op->r.f->nargs + 1);
@@ -2720,7 +2785,7 @@ static var *evaluate(node *op, var *res)
 
                        switch (opn) {
                        case F_in:
-                               R_d = (int)L_d;
+                               R_d = (long long)L_d;
                                break;
 
                        case F_rn:
@@ -2771,8 +2836,16 @@ static var *evaluate(node *op, var *res)
                                break;
 
                        case F_le:
-                               if (!op1)
+                               debug_printf_eval("length: L.s:'%s'\n", L.s);
+                               if (!op1) {
                                        L.s = getvar_s(intvar[F0]);
+                                       debug_printf_eval("length: L.s='%s'\n", L.s);
+                               }
+                               else if (L.v->type & VF_ARRAY) {
+                                       R_d = L.v->x.array->nel;
+                                       debug_printf_eval("length: array_len:%d\n", L.v->x.array->nel);
+                                       break;
+                               }
                                R_d = strlen(L.s);
                                break;
 
@@ -2918,7 +2991,7 @@ static var *evaluate(node *op, var *res)
                        case '%':
                                if (R_d == 0)
                                        syntax_error(EMSG_DIV_BY_ZERO);
-                               L_d -= (int)(L_d / R_d) * R_d;
+                               L_d -= (long long)(L_d / R_d) * R_d;
                                break;
                        }
                        debug_printf_eval("BINARY/REPLACE result:%f\n", L_d);
@@ -3066,6 +3139,9 @@ int awk_main(int argc, char **argv)
        char *opt_F;
        llist_t *list_v = NULL;
        llist_t *list_f = NULL;
+#if ENABLE_FEATURE_AWK_GNU_EXTENSIONS
+       llist_t *list_e = NULL;
+#endif
        int i, j;
        var *v;
        var tv;
@@ -3124,47 +3200,51 @@ int awk_main(int argc, char **argv)
                        *s1 = '=';
                }
        }
-       opt_complementary = "v::f::"; /* -v and -f can occur multiple times */
-       opt = getopt32(argv, "F:v:f:W:", &opt_F, &list_v, &list_f, NULL);
+       opt_complementary = OPTCOMPLSTR_AWK;
+       opt = getopt32(argv, OPTSTR_AWK, &opt_F, &list_v, &list_f, IF_FEATURE_AWK_GNU_EXTENSIONS(&list_e,) NULL);
        argv += optind;
        argc -= optind;
-       if (opt & 0x1) { /* -F */
+       if (opt & OPT_W)
+               bb_error_msg("warning: option -W is ignored");
+       if (opt & OPT_F) {
                unescape_string_in_place(opt_F);
                setvar_s(intvar[FS], opt_F);
        }
-       while (list_v) { /* -v */
+       while (list_v) {
                if (!is_assignment(llist_pop(&list_v)))
                        bb_show_usage();
        }
-       if (list_f) { /* -f */
-               do {
-                       char *s = NULL;
-                       FILE *from_file;
-
-                       g_progname = llist_pop(&list_f);
-                       from_file = xfopen_stdin(g_progname);
-                       /* one byte is reserved for some trick in next_token */
-                       for (i = j = 1; j > 0; i += j) {
-                               s = xrealloc(s, i + 4096);
-                               j = fread(s + i, 1, 4094, from_file);
-                       }
-                       s[i] = '\0';
-                       fclose(from_file);
-                       parse_program(s + 1);
-                       free(s);
-               } while (list_f);
-               argc++;
-       } else { // no -f: take program from 1st parameter
-               if (!argc)
+       while (list_f) {
+               char *s = NULL;
+               FILE *from_file;
+
+               g_progname = llist_pop(&list_f);
+               from_file = xfopen_stdin(g_progname);
+               /* one byte is reserved for some trick in next_token */
+               for (i = j = 1; j > 0; i += j) {
+                       s = xrealloc(s, i + 4096);
+                       j = fread(s + i, 1, 4094, from_file);
+               }
+               s[i] = '\0';
+               fclose(from_file);
+               parse_program(s + 1);
+               free(s);
+       }
+       g_progname = "cmd. line";
+#if ENABLE_FEATURE_AWK_GNU_EXTENSIONS
+       while (list_e) {
+               parse_program(llist_pop(&list_e));
+       }
+#endif
+       if (!(opt & (OPT_f | OPT_e))) {
+               if (!*argv)
                        bb_show_usage();
-               g_progname = "cmd. line";
                parse_program(*argv++);
+               argc--;
        }
-       if (opt & 0x8) // -W
-               bb_error_msg("warning: option -W is ignored");
 
        /* fill in ARGV array */
-       setvar_i(intvar[ARGC], argc);
+       setvar_i(intvar[ARGC], argc + 1);
        setari_u(intvar[ARGV], 0, "awk");
        i = 0;
        while (*argv)