2 * array.c - routines for awk arrays.
6 * Copyright (C) 1986, 1988, 1989, 1991-2014 the Free Software Foundation, Inc.
8 * This file is part of GAWK, the GNU implementation of the
9 * AWK Programming Language.
11 * GAWK is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 3 of the License, or
14 * (at your option) any later version.
16 * GAWK is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
21 * You should have received a copy of the GNU General Public License
22 * along with this program; if not, write to the Free Software
23 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
28 extern FILE *output_fp;
29 extern NODE **fmt_list; /* declared in eval.c */
31 static size_t SUBSEPlen;
33 static char indent_char[] = " ";
35 static NODE **null_lookup(NODE *symbol, NODE *subs);
36 static NODE **null_dump(NODE *symbol, NODE *subs);
37 static afunc_t null_array_func[] = {
53 static afunc_t *array_types[MAX_ATYPE];
54 static int num_array_types = 0;
56 /* array func to index mapping */
57 #define AFUNC(F) (F ## _ind)
59 /* register_array_func --- add routines to handle arrays */
62 register_array_func(afunc_t *afunc)
64 if (afunc && num_array_types < MAX_ATYPE) {
65 if (afunc != str_array_func && ! afunc[AFUNC(atypeof)])
67 array_types[num_array_types++] = afunc;
68 if (afunc[AFUNC(ainit)]) /* execute init routine if any */
69 (void) (*afunc[AFUNC(ainit)])(NULL, NULL);
76 /* array_init --- register all builtin array types */
81 (void) register_array_func(str_array_func); /* the default */
83 (void) register_array_func(int_array_func);
84 (void) register_array_func(cint_array_func);
89 /* make_array --- create an array node */
96 memset(array, '\0', sizeof(NODE));
97 array->type = Node_var_array;
98 array->array_funcs = null_array_func;
99 /* vname, flags, and parent_array not set here */
105 /* null_array --- force symbol to be an empty typeless array */
108 null_array(NODE *symbol)
110 symbol->type = Node_var_array;
111 symbol->array_funcs = null_array_func;
112 symbol->buckets = NULL;
113 symbol->table_size = symbol->array_size = 0;
114 symbol->array_capacity = 0;
117 assert(symbol->xarray == NULL);
119 /* vname, parent_array not (re)initialized */
123 /* null_lookup --- assign type to an empty array. */
126 null_lookup(NODE *symbol, NODE *subs)
129 afunc_t *afunc = NULL;
131 assert(symbol->table_size == 0);
134 * Check which array type wants to accept this sub; traverse
135 * array type list in reverse order.
137 for (i = num_array_types - 1; i >= 1; i--) {
138 afunc = array_types[i];
139 if (afunc[AFUNC(atypeof)](symbol, subs) != NULL)
142 if (i == 0 || afunc == NULL)
143 afunc = array_types[0]; /* default is str_array_func */
144 symbol->array_funcs = afunc;
146 /* We have the right type of array; install the subscript */
147 return symbol->alookup(symbol, subs);
150 /* null_length --- default function for array length interface */
153 null_length(NODE *symbol, NODE *subs ATTRIBUTE_UNUSED)
160 /* null_afunc --- default function for array interface */
163 null_afunc(NODE *symbol ATTRIBUTE_UNUSED, NODE *subs ATTRIBUTE_UNUSED)
168 /* null_dump --- dump function for an empty array */
171 null_dump(NODE *symbol, NODE *subs ATTRIBUTE_UNUSED)
173 fprintf(output_fp, "array `%s' is empty\n", array_vname(symbol));
178 /* assoc_copy --- duplicate input array "symbol" */
181 assoc_copy(NODE *symbol, NODE *newsymb)
183 assert(newsymb->vname != NULL);
185 assoc_clear(newsymb);
186 (void) symbol->acopy(symbol, newsymb);
187 newsymb->array_funcs = symbol->array_funcs;
188 newsymb->flags = symbol->flags;
193 /* assoc_dump --- dump array */
196 assoc_dump(NODE *symbol, NODE *ndump)
199 (void) symbol->adump(symbol, ndump);
203 /* make_aname --- construct a 'vname' for a (sub)array */
206 make_aname(const NODE *symbol)
208 static char *aname = NULL;
210 static size_t max_alen;
213 if (symbol->parent_array != NULL) {
216 (void) make_aname(symbol->parent_array);
217 slen = strlen(symbol->vname); /* subscript in parent array */
218 if (alen + slen + 4 > max_alen) { /* sizeof("[\"\"]") = 4 */
219 max_alen = alen + slen + 4 + SLEN;
220 erealloc(aname, char *, (max_alen + 1) * sizeof(char *), "make_aname");
222 alen += sprintf(aname + alen, "[\"%s\"]", symbol->vname);
224 alen = strlen(symbol->vname);
226 max_alen = alen + SLEN;
227 emalloc(aname, char *, (max_alen + 1) * sizeof(char *), "make_aname");
228 } else if (alen > max_alen) {
229 max_alen = alen + SLEN;
230 erealloc(aname, char *, (max_alen + 1) * sizeof(char *), "make_aname");
232 memcpy(aname, symbol->vname, alen + 1);
240 * array_vname --- print the name of the array
242 * Returns a pointer to a statically maintained dynamically allocated string.
243 * It's appropriate for printing the name once; if the caller wants
244 * to save it, they have to make a copy.
248 array_vname(const NODE *symbol)
250 static char *message = NULL;
251 static size_t msglen = 0;
255 const NODE *save_symbol = symbol;
256 const char *from = _("from %s");
259 if (symbol->type != Node_array_ref
260 || symbol->orig_array->type != Node_var_array
262 if (symbol->type != Node_var_array || symbol->parent_array == NULL)
263 return symbol->vname;
264 return make_aname(symbol);
267 /* First, we have to compute the length of the string: */
271 while (symbol->type == Node_array_ref) {
272 len += strlen(symbol->vname);
274 symbol = symbol->prev_array;
277 /* Get the (sub)array name */
278 if (symbol->parent_array == NULL)
279 aname = symbol->vname;
281 aname = make_aname(symbol);
282 len += strlen(aname);
284 * Each node contributes by strlen(from) minus the length
285 * of "%s" in the translation (which is at least 2)
286 * plus 2 for ", " or ")\0"; this adds up to strlen(from).
288 len += n * strlen(from);
290 /* (Re)allocate memory: */
291 if (message == NULL) {
292 emalloc(message, char *, len, "array_vname");
294 } else if (len > msglen) {
295 erealloc(message, char *, len, "array_vname");
298 current buffer can hold new name */
300 /* We're ready to print: */
301 symbol = save_symbol;
304 * Ancient systems have sprintf() returning char *, not int.
305 * If you have one of those, use sprintf(..); s += strlen(s) instead.
308 s += sprintf(s, "%s (", symbol->vname);
310 symbol = symbol->prev_array;
311 if (symbol->type != Node_array_ref)
313 s += sprintf(s, from, symbol->vname);
314 s += sprintf(s, ", ");
316 s += sprintf(s, from, aname);
324 * force_array --- proceed to the actual Node_var_array,
325 * change Node_var_new to an array.
326 * If canfatal and type isn't good, die fatally,
327 * otherwise return the final actual value.
331 force_array(NODE *symbol, bool canfatal)
333 NODE *save_symbol = symbol;
334 bool isparam = false;
336 if (symbol->type == Node_param_list) {
337 save_symbol = symbol = GET_PARAM(symbol->param_cnt);
339 if (symbol->type == Node_array_ref)
340 symbol = symbol->orig_array;
343 switch (symbol->type) {
345 symbol->xarray = NULL; /* make sure union is as it should be */
347 symbol->parent_array = NULL; /* main array has no parent */
354 /* notably Node_var but catches also e.g. a[1] = "x"; a[1][1] = "y" */
356 if (symbol->type == Node_val)
357 fatal(_("attempt to use a scalar value as array"));
359 fatal(_("attempt to use scalar parameter `%s' as an array"),
362 fatal(_("attempt to use scalar `%s' as an array"),
372 /* set_SUBSEP --- update SUBSEP related variables when SUBSEP assigned to */
377 SUBSEP_node->var_value = force_string(SUBSEP_node->var_value);
378 SUBSEP = SUBSEP_node->var_value->stptr;
379 SUBSEPlen = SUBSEP_node->var_value->stlen;
383 /* concat_exp --- concatenate expression list into a single string */
386 concat_exp(int nargs, bool do_subsep)
388 /* do_subsep is false for Op_concat */
393 size_t subseplen = 0;
395 extern NODE **args_array;
401 subseplen = SUBSEPlen;
404 for (i = 1; i <= nargs; i++) {
406 if (r->type == Node_var_array) {
408 DEREF(args_array[i]); /* avoid memory leak */
409 fatal(_("attempt to use array `%s' in a scalar context"), array_vname(r));
415 len += (nargs - 1) * subseplen;
417 emalloc(str, char *, len + 2, "concat_exp");
419 r = args_array[nargs];
420 memcpy(str, r->stptr, r->stlen);
423 for (i = nargs - 1; i > 0; i--) {
426 else if (subseplen > 0) {
427 memcpy(s, SUBSEP, subseplen);
431 memcpy(s, r->stptr, r->stlen);
436 return make_str_node(str, len, ALREADY_MALLOCED);
441 * adjust_fcall_stack: remove subarray(s) of symbol[] from
442 * function call stack.
446 adjust_fcall_stack(NODE *symbol, int nsubs)
453 * Solve the nasty problem of disappearing subarray arguments:
455 * function f(c, d) { delete c; .. use non-existent array d .. }
456 * BEGIN { a[0][0] = 1; f(a, a[0]); .. }
458 * The fix is to convert 'd' to a local empty array; This has
459 * to be done before clearing the parent array to avoid referring to
460 * already free-ed memory.
462 * Similar situations exist for builtins accepting more than
463 * one array argument: split, patsplit, asort and asorti. For example:
465 * BEGIN { a[0][0] = 1; split("abc", a, "", a[0]) }
467 * These cases do not involve the function call stack, and are
468 * handled individually in their respective routines.
471 func = frame_ptr->func_node;
472 if (func == NULL) /* in main */
474 pcount = func->param_cnt;
475 sp = frame_ptr->stack;
477 for (; pcount > 0; pcount--) {
479 if (r->type != Node_array_ref
480 || r->orig_array->type != Node_var_array)
486 && symbol->parent_array != NULL
490 * 'symbol' is a subarray, and 'r' is the same subarray:
492 * function f(c, d) { delete c[0]; .. }
493 * BEGIN { a[0][0] = 1; f(a, a[0]); .. }
495 * But excludes cases like (nsubs = 0):
497 * function f(c, d) { delete c; ..}
498 * BEGIN { a[0][0] = 1; f(a[0], a[0]); ...}
502 r->parent_array = NULL;
507 for (n = n->parent_array; n != NULL; n = n->parent_array) {
508 assert(n->type == Node_var_array);
511 * 'r' is a subarray of 'symbol':
513 * function f(c, d) { delete c; .. use d as array .. }
514 * BEGIN { a[0][0] = 1; f(a, a[0]); .. }
516 * BEGIN { a[0][0][0][0] = 1; f(a[0], a[0][0][0]); .. }
520 r->parent_array = NULL;
528 /* do_delete --- perform `delete array[s]' */
532 * `nsubs' is no of subscripts
536 do_delete(NODE *symbol, int nsubs)
541 assert(symbol->type == Node_var_array);
542 subs = val = NULL; /* silence the compiler */
545 * The force_string() call is needed to make sure that
546 * the string subscript is reasonable. For example, with it:
548 * $ ./gawk --posix 'BEGIN { CONVFMT="%ld"; delete a[1.233]}'
549 * gawk: cmd. line:1: fatal: `%l' is not permitted in POSIX awk formats
551 * Without it, the code does not fail.
554 #define free_subs(n) do { \
555 NODE *s = PEEK(n - 1); \
556 if (s->type == Node_val) { \
557 (void) force_string(s); /* may have side effects. */ \
565 adjust_fcall_stack(symbol, 0); /* fix function call stack; See above. */
570 /* NB: subscripts are in reverse order on stack */
572 for (i = nsubs; i > 0; i--) {
574 if (subs->type != Node_val) {
576 fatal(_("attempt to use array `%s' in a scalar context"), array_vname(subs));
579 val = in_array(symbol, subs);
582 subs = force_string(subs);
583 lintwarn(_("delete: index `%s' not in array `%s'"),
584 subs->stptr, array_vname(symbol));
586 /* avoid memory leak, free all subs */
592 if (val->type != Node_var_array) {
593 /* e.g.: a[1] = 1; delete a[1][1] */
596 subs = force_string(subs);
597 fatal(_("attempt to use scalar `%s[\"%.*s\"]' as an array"),
607 if (val->type == Node_var_array) {
608 adjust_fcall_stack(val, nsubs); /* fix function call stack; See above. */
610 /* cleared a sub-array, free Node_var_array */
616 (void) assoc_remove(symbol, subs);
623 /* do_delete_loop --- simulate ``for (iggy in foo) delete foo[iggy]'' */
626 * The primary hassle here is that `iggy' needs to have some arbitrary
627 * array index put in it before we can clear the array, we can't
628 * just replace the loop with `delete foo'.
632 do_delete_loop(NODE *symbol, NODE **lhs)
637 akind.flags = AINDEX|ADELETE; /* need a single index */
638 list = symbol->alist(symbol, & akind);
640 if (assoc_empty(symbol))
647 /* blast the array in one shot */
648 adjust_fcall_stack(symbol, 0);
653 /* value_info --- print scalar node info */
662 if (n == Nnull_string || n == Null_field) {
663 fprintf(output_fp, "<(null)>");
667 if ((n->flags & (STRING|STRCUR)) != 0) {
668 fprintf(output_fp, "<");
669 fprintf(output_fp, "\"%.*s\"", PREC_STR, n->stptr);
670 if ((n->flags & (NUMBER|NUMCUR)) != 0) {
673 fprintf(output_fp, ":%s",
674 mpg_fmt("%.*R*g", PREC_NUM, ROUND_MODE, n->mpg_numbr));
675 else if (is_mpg_integer(n))
676 fprintf(output_fp, ":%s", mpg_fmt("%Zd", n->mpg_i));
679 fprintf(output_fp, ":%.*g", PREC_NUM, n->numbr);
681 fprintf(output_fp, ">");
685 fprintf(output_fp, "<%s>",
686 mpg_fmt("%.*R*g", PREC_NUM, ROUND_MODE, n->mpg_numbr));
687 else if (is_mpg_integer(n))
688 fprintf(output_fp, "<%s>", mpg_fmt("%Zd", n->mpg_i));
691 fprintf(output_fp, "<%.*g>", PREC_NUM, n->numbr);
694 fprintf(output_fp, ":%s", flags2str(n->flags));
696 if ((n->flags & FIELD) == 0)
697 fprintf(output_fp, ":%ld", n->valref);
699 fprintf(output_fp, ":");
701 if ((n->flags & (STRING|STRCUR)) == STRCUR) {
702 fprintf(output_fp, "][");
703 fprintf(output_fp, "stfmt=%d, ", n->stfmt);
704 fprintf(output_fp, "CONVFMT=\"%s\"", n->stfmt <= -1 ? "%ld"
705 : fmt_list[n->stfmt]->stptr);
714 indent(int indent_level)
717 for (i = 0; i < indent_level; i++)
718 fprintf(output_fp, "%s", indent_char);
721 /* assoc_info --- print index, value info */
724 assoc_info(NODE *subs, NODE *val, NODE *ndump, const char *aname)
726 int indent_level = ndump->alevel;
729 indent(indent_level);
730 fprintf(output_fp, "I: [%s:", aname);
731 if ((subs->flags & (MPFN|MPZN|INTIND)) == INTIND)
732 fprintf(output_fp, "<%ld>", (long) subs->numbr);
735 fprintf(output_fp, "]\n");
737 indent(indent_level);
738 if (val->type == Node_val) {
739 fprintf(output_fp, "V: [scalar: ");
742 fprintf(output_fp, "V: [");
745 assoc_dump(val, ndump);
748 indent(indent_level);
750 fprintf(output_fp, "]\n");
754 /* do_adump --- dump an array: interface to assoc_dump */
764 * depth < 0, no index and value info.
765 * = 0, main array index and value info; does not descend into sub-arrays.
766 * > 0, descends into 'depth' sub-arrays, and prints index and value info.
771 depth = get_number_si(tmp);
774 symbol = POP_PARAM();
775 if (symbol->type != Node_var_array)
776 fatal(_("adump: first argument not an array"));
778 ndump.type = Node_dump_array;
779 ndump.adepth = depth;
781 assoc_dump(symbol, & ndump);
782 return make_number((AWKNUM) 0);
786 /* asort_actual --- do the actual work to sort the input array */
789 asort_actual(int nargs, sort_context_t ctxt)
791 NODE *array, *dest = NULL, *result;
793 NODE **list = NULL, **ptr, **lhs;
794 unsigned long num_elems, i;
795 const char *sort_str;
797 if (nargs == 3) /* 3rd optional arg */
800 s = dupnode(Nnull_string); /* "" => default sorting */
804 if (s->stlen == 0) { /* default sorting */
806 sort_str = "@val_type_asc";
808 sort_str = "@ind_str_asc";
811 if (nargs >= 2) { /* 2nd optional arg */
813 if (dest->type != Node_var_array) {
814 fatal(ctxt == ASORT ?
815 _("asort: second argument not an array") :
816 _("asorti: second argument not an array"));
821 if (array->type != Node_var_array) {
822 fatal(ctxt == ASORT ?
823 _("asort: first argument not an array") :
824 _("asorti: first argument not an array"));
828 for (r = dest->parent_array; r != NULL; r = r->parent_array) {
830 fatal(ctxt == ASORT ?
831 _("asort: cannot use a subarray of first arg for second arg") :
832 _("asorti: cannot use a subarray of first arg for second arg"));
834 for (r = array->parent_array; r != NULL; r = r->parent_array) {
836 fatal(ctxt == ASORT ?
837 _("asort: cannot use a subarray of second arg for first arg") :
838 _("asorti: cannot use a subarray of second arg for first arg"));
842 /* sorting happens inside assoc_list */
843 list = assoc_list(array, sort_str, ctxt);
846 num_elems = assoc_length(array);
847 if (num_elems == 0 || list == NULL) {
848 /* source array is empty */
849 if (dest != NULL && dest != array)
853 return make_number((AWKNUM) 0);
857 * Must not assoc_clear() the source array before constructing
858 * the output array. assoc_list() does not duplicate array values
859 * which are needed for asort().
862 if (dest != NULL && dest != array) {
866 /* use 'result' as a temporary destination array */
867 result = make_array();
868 result->vname = array->vname;
869 result->parent_array = array->parent_array;
872 if (ctxt == ASORTI) {
873 /* We want the indices of the source array. */
875 for (i = 1, ptr = list; i <= num_elems; i++, ptr += 2) {
876 subs = make_number(i);
877 lhs = assoc_lookup(result, subs);
880 if (result->astore != NULL)
881 (*result->astore)(result, subs);
885 /* We want the values of the source array. */
887 for (i = 1, ptr = list; i <= num_elems; i++) {
888 subs = make_number(i);
890 /* free index node */
897 if (r->type == Node_val) {
898 lhs = assoc_lookup(result, subs);
904 subs = force_string(subs);
905 arr->vname = subs->stptr;
907 subs->flags &= ~STRCUR;
908 arr->parent_array = array; /* actual parent, not the temporary one. */
909 lhs = assoc_lookup(result, subs);
911 *lhs = assoc_copy(r, arr);
913 if (result->astore != NULL)
914 (*result->astore)(result, subs);
921 if (result != dest) {
922 /* dest == NULL or dest == array */
924 *array = *result; /* copy result into array */
928 dest != NULL and dest != array */
930 return make_number((AWKNUM) num_elems);
933 /* do_asort --- sort array by value */
938 return asort_actual(nargs, ASORT);
941 /* do_asorti --- sort array by index */
946 return asort_actual(nargs, ASORTI);
951 * cmp_strings --- compare two strings; logic similar to cmp_nodes() in eval.c
952 * except the extra case-sensitive comparison when the case-insensitive
957 cmp_strings(const NODE *n1, const NODE *n2)
970 return len2 == 0 ? 0 : -1;
974 /* len1 > 0 && len2 > 0 */
975 lmin = len1 < len2 ? len1 : len2;
978 const unsigned char *cp1 = (const unsigned char *) s1;
979 const unsigned char *cp2 = (const unsigned char *) s2;
982 if (gawk_mb_cur_max > 1) {
983 ret = strncasecmpmbs((const unsigned char *) cp1,
984 (const unsigned char *) cp2, lmin);
987 for (ret = 0; lmin-- > 0 && ret == 0; cp1++, cp2++)
988 ret = casetable[*cp1] - casetable[*cp2];
992 * If case insensitive result is "they're the same",
993 * use case sensitive comparison to force distinct order.
997 ret = memcmp(s1, s2, lmin);
998 if (ret != 0 || len1 == len2)
1000 return (len1 < len2) ? -1 : 1;
1003 /* sort_up_index_string --- qsort comparison function; ascending index strings. */
1006 sort_up_index_string(const void *p1, const void *p2)
1008 const NODE *t1, *t2;
1010 /* Array indices are strings */
1011 t1 = *((const NODE *const *) p1);
1012 t2 = *((const NODE *const *) p2);
1013 return cmp_strings(t1, t2);
1017 /* sort_down_index_str --- qsort comparison function; descending index strings. */
1020 sort_down_index_string(const void *p1, const void *p2)
1023 * Negation versus transposed arguments: when all keys are
1024 * distinct, as with array indices here, either method will
1025 * transform an ascending sort into a descending one. But if
1026 * there are equal keys--such as when IGNORECASE is honored--
1027 * that get disambiguated into a determisitc order, negation
1028 * will reverse those but transposed arguments would retain
1029 * their relative order within the rest of the reversed sort.
1031 return -sort_up_index_string(p1, p2);
1035 /* sort_up_index_number --- qsort comparison function; ascending index numbers. */
1038 sort_up_index_number(const void *p1, const void *p2)
1040 const NODE *t1, *t2;
1043 t1 = *((const NODE *const *) p1);
1044 t2 = *((const NODE *const *) p2);
1046 ret = cmp_numbers(t1, t2);
1050 /* break a tie with the index string itself */
1051 t1 = force_string((NODE *) t1);
1052 t2 = force_string((NODE *) t2);
1053 return cmp_strings(t1, t2);
1056 /* sort_down_index_number --- qsort comparison function; descending index numbers */
1059 sort_down_index_number(const void *p1, const void *p2)
1061 return -sort_up_index_number(p1, p2);
1065 /* sort_up_value_string --- qsort comparison function; ascending value string */
1068 sort_up_value_string(const void *p1, const void *p2)
1070 const NODE *t1, *t2;
1072 t1 = *((const NODE *const *) p1 + 1);
1073 t2 = *((const NODE *const *) p2 + 1);
1075 if (t1->type == Node_var_array) {
1076 /* return 0 if t2 is a sub-array too, else return 1 */
1077 return (t2->type != Node_var_array);
1079 if (t2->type == Node_var_array)
1080 return -1; /* t1 (scalar) < t2 (sub-array) */
1082 /* t1 and t2 both have string values */
1083 return cmp_strings(t1, t2);
1087 /* sort_down_value_string --- qsort comparison function; descending value string */
1090 sort_down_value_string(const void *p1, const void *p2)
1092 return -sort_up_value_string(p1, p2);
1096 /* sort_up_value_number --- qsort comparison function; ascending value number */
1099 sort_up_value_number(const void *p1, const void *p2)
1104 t1 = *((NODE *const *) p1 + 1);
1105 t2 = *((NODE *const *) p2 + 1);
1107 if (t1->type == Node_var_array) {
1108 /* return 0 if t2 is a sub-array too, else return 1 */
1109 return (t2->type != Node_var_array);
1111 if (t2->type == Node_var_array)
1112 return -1; /* t1 (scalar) < t2 (sub-array) */
1114 ret = cmp_numbers(t1, t2);
1119 * Use string value to guarantee same sort order on all
1120 * versions of qsort().
1122 t1 = force_string(t1);
1123 t2 = force_string(t2);
1124 return cmp_strings(t1, t2);
1128 /* sort_down_value_number --- qsort comparison function; descending value number */
1131 sort_down_value_number(const void *p1, const void *p2)
1133 return -sort_up_value_number(p1, p2);
1137 /* sort_up_value_type --- qsort comparison function; ascending value type */
1140 sort_up_value_type(const void *p1, const void *p2)
1144 /* we want to compare the element values */
1145 n1 = *((NODE *const *) p1 + 1);
1146 n2 = *((NODE *const *) p2 + 1);
1148 /* 1. Arrays vs. scalar, scalar is less than array */
1149 if (n1->type == Node_var_array) {
1150 /* return 0 if n2 is a sub-array too, else return 1 */
1151 return (n2->type != Node_var_array);
1153 if (n2->type == Node_var_array) {
1154 return -1; /* n1 (scalar) < n2 (sub-array) */
1158 /* 2. Resolve MAYBE_NUM, so that have only NUMBER or STRING */
1159 if ((n1->flags & MAYBE_NUM) != 0)
1160 (void) force_number(n1);
1161 if ((n2->flags & MAYBE_NUM) != 0)
1162 (void) force_number(n2);
1164 /* 2.5. Resolve INTIND, so that is STRING, and not NUMBER */
1165 if ((n1->flags & INTIND) != 0)
1166 (void) force_string(n1);
1167 if ((n2->flags & INTIND) != 0)
1168 (void) force_string(n2);
1170 if ((n1->flags & NUMBER) != 0 && (n2->flags & NUMBER) != 0) {
1171 return cmp_numbers(n1, n2);
1174 /* 3. All numbers are less than all strings. This is aribitrary. */
1175 if ((n1->flags & NUMBER) != 0 && (n2->flags & STRING) != 0) {
1177 } else if ((n1->flags & STRING) != 0 && (n2->flags & NUMBER) != 0) {
1181 /* 4. Two strings */
1182 return cmp_strings(n1, n2);
1185 /* sort_down_value_type --- qsort comparison function; descending value type */
1188 sort_down_value_type(const void *p1, const void *p2)
1190 return -sort_up_value_type(p1, p2);
1193 /* sort_user_func --- user defined qsort comparison function */
1196 sort_user_func(const void *p1, const void *p2)
1198 NODE *idx1, *idx2, *val1, *val2, *r;
1202 idx1 = *((NODE *const *) p1);
1203 idx2 = *((NODE *const *) p2);
1204 val1 = *((NODE *const *) p1 + 1);
1205 val2 = *((NODE *const *) p2 + 1);
1207 code = TOP()->code_ptr; /* comparison function call instructions */
1209 /* setup 4 arguments to comp_func() */
1212 if (val1->type == Node_val)
1218 if (val2->type == Node_val)
1222 /* execute the comparison function */
1223 (void) (*interpret)(code);
1225 /* return value of the comparison function */
1229 * mpfr_sgn(mpz_sgn): Returns a positive value if op > 0,
1230 * zero if op = 0, and a negative value if op < 0.
1232 if (is_mpg_float(r))
1233 ret = mpfr_sgn(r->mpg_numbr);
1234 else if (is_mpg_integer(r))
1235 ret = mpz_sgn(r->mpg_i);
1238 ret = (r->numbr < 0.0) ? -1 : (r->numbr > 0.0);
1244 /* assoc_list -- construct, and optionally sort, a list of array elements */
1247 assoc_list(NODE *symbol, const char *sort_str, sort_context_t sort_ctxt)
1249 typedef int (*qsort_compfunc)(const void *, const void *);
1251 static const struct qsort_funcs {
1253 qsort_compfunc comp_func;
1256 { "@ind_str_asc", sort_up_index_string, AINDEX|AISTR|AASC },
1257 { "@ind_num_asc", sort_up_index_number, AINDEX|AINUM|AASC },
1258 { "@val_str_asc", sort_up_value_string, AVALUE|AVSTR|AASC },
1259 { "@val_num_asc", sort_up_value_number, AVALUE|AVNUM|AASC },
1260 { "@ind_str_desc", sort_down_index_string, AINDEX|AISTR|ADESC },
1261 { "@ind_num_desc", sort_down_index_number, AINDEX|AINUM|ADESC },
1262 { "@val_str_desc", sort_down_value_string, AVALUE|AVSTR|ADESC },
1263 { "@val_num_desc", sort_down_value_number, AVALUE|AVNUM|ADESC },
1264 { "@val_type_asc", sort_up_value_type, AVALUE|AASC },
1265 { "@val_type_desc", sort_down_value_type, AVALUE|ADESC },
1266 { "@unsorted", 0, AINDEX },
1270 * N.B.: AASC and ADESC are hints to the specific array types.
1271 * See cint_list() in cint_array.c.
1276 unsigned long num_elems, j;
1278 qsort_compfunc cmp_func = 0;
1279 INSTRUCTION *code = NULL;
1282 assoc_kind_t assoc_kind = ANONE;
1286 for (qi = 0, j = sizeof(sort_funcs)/sizeof(sort_funcs[0]); qi < j; qi++) {
1287 if (strcmp(sort_funcs[qi].name, sort_str) == 0)
1292 cmp_func = sort_funcs[qi].comp_func;
1293 assoc_kind = sort_funcs[qi].kind;
1295 if (symbol->array_funcs != cint_array_func)
1296 assoc_kind &= ~(AASC|ADESC);
1298 if (sort_ctxt != SORTED_IN || (assoc_kind & AVALUE) != 0) {
1299 /* need index and value pair in the list */
1301 assoc_kind |= (AINDEX|AVALUE);
1305 } else { /* unrecognized */
1309 for (sp = sort_str; *sp != '\0' && ! isspace((unsigned char) *sp); sp++)
1312 /* empty string or string with space(s) not valid as function name */
1313 if (sp == sort_str || *sp != '\0')
1314 fatal(_("`%s' is invalid as a function name"), sort_str);
1316 f = lookup(sort_str);
1317 if (f == NULL || f->type != Node_func)
1318 fatal(_("sort comparison function `%s' is not defined"), sort_str);
1320 cmp_func = sort_user_func;
1322 /* need index and value pair in the list */
1323 assoc_kind |= (AVALUE|AINDEX);
1326 /* make function call instructions */
1327 code = bcalloc(Op_func_call, 2, 0);
1328 code->func_body = f;
1329 code->func_name = NULL; /* not needed, func_body already assigned */
1330 (code + 1)->expr_count = 4; /* function takes 4 arguments */
1331 code->nexti = bcalloc(Op_stop, 1, 0);
1334 * make non-redirected getline, exit, `next' and `nextfile' fatal in
1335 * callback function by setting currule in interpret()
1339 save_rule = currule; /* save current rule */
1345 akind.flags = (unsigned int) assoc_kind; /* kludge */
1346 list = symbol->alist(symbol, & akind);
1347 assoc_kind = (assoc_kind_t) akind.flags; /* symbol->alist can modify it */
1349 if (list == NULL || ! cmp_func || (assoc_kind & (AASC|ADESC)) != 0)
1350 return list; /* empty list or unsorted, or list already sorted */
1352 num_elems = assoc_length(symbol);
1354 qsort(list, num_elems, elem_size * sizeof(NODE *), cmp_func); /* shazzam! */
1356 if (cmp_func == sort_user_func) {
1358 currule = save_rule; /* restore current rule */
1359 bcfree(code->nexti); /* Op_stop */
1360 bcfree(code); /* Op_func_call */
1363 if (sort_ctxt == SORTED_IN && (assoc_kind & (AINDEX|AVALUE)) == (AINDEX|AVALUE)) {
1364 /* relocate all index nodes to the first half of the list. */
1365 for (j = 1; j < num_elems; j++)
1366 list[j] = list[2 * j];
1368 /* give back extra memory */
1370 erealloc(list, NODE **, num_elems * sizeof(NODE *), "assoc_list");