2 * int_array.c - routines for arrays of integer indices.
6 * Copyright (C) 1986, 1988, 1989, 1991-2013 the Free Software Foundation, Inc.
8 * This file is part of GAWK, the GNU implementation of the
9 * AWK Programming Language.
11 * GAWK is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 3 of the License, or
14 * (at your option) any later version.
16 * GAWK is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
21 * You should have received a copy of the GNU General Public License
22 * along with this program; if not, write to the Free Software
23 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
28 extern FILE *output_fp;
29 extern void indent(int indent_level);
30 extern NODE **is_integer(NODE *symbol, NODE *subs);
32 static size_t INT_CHAIN_MAX = 2;
34 static NODE **int_array_init(NODE *symbol, NODE *subs);
35 static NODE **int_lookup(NODE *symbol, NODE *subs);
36 static NODE **int_exists(NODE *symbol, NODE *subs);
37 static NODE **int_clear(NODE *symbol, NODE *subs);
38 static NODE **int_remove(NODE *symbol, NODE *subs);
39 static NODE **int_list(NODE *symbol, NODE *t);
40 static NODE **int_copy(NODE *symbol, NODE *newsymb);
41 static NODE **int_dump(NODE *symbol, NODE *ndump);
43 static uint32_t int_hash(uint32_t k, uint32_t hsize);
44 static inline NODE **int_find(NODE *symbol, long k, uint32_t hash1);
45 static NODE **int_insert(NODE *symbol, long k, uint32_t hash1);
46 static void grow_int_table(NODE *symbol);
48 afunc_t int_array_func[] = {
63 /* int_array_init --- array initialization routine */
66 int_array_init(NODE *symbol, NODE *subs ATTRIBUTE_UNUSED)
68 if (symbol == NULL) { /* first time */
71 /* check relevant environment variables */
72 if ((newval = getenv_long("INT_CHAIN_MAX")) > 0)
73 INT_CHAIN_MAX = newval;
77 return (NODE **) ! NULL;
80 /* is_integer --- check if subscript is an integer */
83 is_integer(NODE *symbol, NODE *subs)
88 if (subs == Nnull_string || do_mpfr)
91 if ((subs->flags & NUMINT) != 0)
92 return (NODE **) ! NULL;
94 if ((subs->flags & NUMBER) != 0) {
96 if (d <= INT32_MAX && d >= INT32_MIN && d == (int32_t) d) {
97 subs->flags |= NUMINT;
98 return (NODE **) ! NULL;
103 /* a[3]=1; print "3" in a -- true
104 * a[3]=1; print "+3" in a -- false
105 * a[3]=1; print "03" in a -- false
106 * a[-3]=1; print "-3" in a -- true
109 if ((subs->flags & (STRING|STRCUR)) != 0) {
110 char *cp = subs->stptr, *cpend, *ptr;
112 size_t len = subs->stlen;
114 if (len == 0 || (! isdigit((unsigned char) *cp) && *cp != '-'))
117 ((*cp == '0') /* "00", "011" .. */
118 || (*cp == '-' && *(cp + 1) == '0') /* "-0", "-011" .. */
122 if (len == 1 && *cp != '-') { /* single digit */
123 subs->numbr = (long) (*cp - '0');
124 if ((subs->flags & MAYBE_NUM) != 0) {
125 subs->flags &= ~MAYBE_NUM;
126 subs->flags |= NUMBER;
128 subs->flags |= (NUMCUR|NUMINT);
129 return (NODE **) ! NULL;
137 l = strtol(cp, & ptr, 10);
139 if (errno != 0 || ptr != cpend)
142 if ((subs->flags & MAYBE_NUM) != 0) {
143 subs->flags &= ~MAYBE_NUM;
144 subs->flags |= NUMBER;
146 subs->flags |= NUMCUR;
147 if (l <= INT32_MAX && l >= INT32_MIN) {
148 subs->flags |= NUMINT;
149 return (NODE **) ! NULL;
157 * int_lookup --- Find SYMBOL[SUBS] in the assoc array. Install it with value ""
158 * if it isn't there. Returns a pointer ala get_lhs to where its value is stored.
162 int_lookup(NODE *symbol, NODE *subs)
171 * N.B: symbol->table_size is the total # of non-integers (symbol->xarray)
172 * and integer elements. Also, symbol->xarray must have at least one
173 * item in it, and can not exist if there are no integer elements.
174 * In that case, symbol->xarray is promoted to 'symbol' (See int_remove).
178 if (! is_integer(symbol, subs)) {
181 xn = symbol->xarray = make_array();
182 xn->vname = symbol->vname; /* shallow copy */
184 } else if ((lhs = xn->aexists(xn, subs)) != NULL)
186 symbol->table_size++;
187 return assoc_lookup(xn, subs);
191 if (symbol->buckets == NULL)
192 grow_int_table(symbol);
194 hash1 = int_hash(k, symbol->array_size);
195 if ((lhs = int_find(symbol, k, hash1)) != NULL)
198 /* It's not there, install it */
200 symbol->table_size++;
202 /* first see if we would need to grow the array, before installing */
203 size = symbol->table_size;
204 if ((xn = symbol->xarray) != NULL)
205 size -= xn->table_size;
207 if ((symbol->flags & ARRAYMAXED) == 0
208 && (size / symbol->array_size) > INT_CHAIN_MAX) {
209 grow_int_table(symbol);
210 /* have to recompute hash value for new size */
211 hash1 = int_hash(k, symbol->array_size);
214 return int_insert(symbol, k, hash1);
219 * int_exists --- test whether the array element symbol[subs] exists or not,
220 * return pointer to value if it does.
224 int_exists(NODE *symbol, NODE *subs)
229 if (! is_integer(symbol, subs)) {
230 NODE *xn = symbol->xarray;
233 return xn->aexists(xn, subs);
235 if (symbol->buckets == NULL)
239 hash1 = int_hash(k, symbol->array_size);
240 return int_find(symbol, k, hash1);
243 /* int_clear --- flush all the values in symbol[] */
246 int_clear(NODE *symbol, NODE *subs ATTRIBUTE_UNUSED)
253 if (symbol->xarray != NULL) {
254 NODE *xn = symbol->xarray;
257 symbol->xarray = NULL;
260 for (i = 0; i < symbol->array_size; i++) {
261 for (b = symbol->buckets[i]; b != NULL; b = next) {
263 for (j = 0; j < b->aicount; j++) {
265 if (r->type == Node_var_array) {
266 assoc_clear(r); /* recursively clear all sub-arrays */
274 symbol->buckets[i] = NULL;
276 if (symbol->buckets != NULL)
277 efree(symbol->buckets);
278 symbol->ainit(symbol, NULL); /* re-initialize symbol */
283 /* int_remove --- If SUBS is already in the table, remove it. */
286 int_remove(NODE *symbol, NODE *subs)
289 BUCKET *b, *prev = NULL;
292 NODE *xn = symbol->xarray;
294 if (symbol->table_size == 0 || symbol->buckets == NULL)
297 if (! is_integer(symbol, subs)) {
298 if (xn == NULL || xn->aremove(xn, subs) == NULL)
300 if (xn->table_size == 0) {
302 symbol->xarray = NULL;
304 symbol->table_size--;
305 assert(symbol->table_size > 0);
306 return (NODE **) ! NULL;
310 hash1 = int_hash(k, symbol->array_size);
312 for (b = symbol->buckets[hash1]; b != NULL; prev = b, b = b->ainext) {
313 for (i = 0; i < b->aicount; i++) {
314 if (k != b->ainum[i])
318 if (i == 0 && b->aicount == 2) {
319 /* removing the 1st item; move 2nd item from position 1 to 0 */
321 b->ainum[0] = b->ainum[1];
322 b->aivalue[0] = b->aivalue[1];
324 removing the only item or the 2nd item */
330 if (b == NULL) /* item not in array */
336 if (b->aicount == 0) {
339 prev->ainext = b->ainext;
341 symbol->buckets[hash1] = b->ainext;
345 } else if (b != symbol->buckets[hash1]) {
346 BUCKET *head = symbol->buckets[hash1];
348 assert(b->aicount == 1);
349 /* move the last element from head to bucket to make it full. */
350 i = --head->aicount; /* head has one less element */
351 b->ainum[1] = head->ainum[i];
352 b->aivalue[1] = head->aivalue[i];
353 b->aicount++; /* bucket has one more element */
355 /* head is now empty; delete head */
356 symbol->buckets[hash1] = head->ainext;
362 symbol->table_size--;
363 if (xn == NULL && symbol->table_size == 0) {
364 efree(symbol->buckets);
365 symbol->ainit(symbol, NULL); /* re-initialize array 'symbol' */
366 } else if (xn != NULL && symbol->table_size == xn->table_size) {
367 /* promote xn (str_array) to symbol */
368 xn->flags &= ~XARRAY;
369 xn->parent_array = symbol->parent_array;
370 efree(symbol->buckets);
375 return (NODE **) ! NULL; /* return success */
379 /* int_copy --- duplicate input array "symbol" */
382 int_copy(NODE *symbol, NODE *newsymb)
384 BUCKET **old, **new, **pnew;
385 BUCKET *chain, *newchain;
387 unsigned long i, cursize;
389 assert(symbol->buckets != NULL);
391 /* find the current hash size */
392 cursize = symbol->array_size;
394 /* allocate new table */
395 emalloc(new, BUCKET **, cursize * sizeof(BUCKET *), "int_copy");
396 memset(new, '\0', cursize * sizeof(BUCKET *));
398 old = symbol->buckets;
400 for (i = 0; i < cursize; i++) {
401 for (chain = old[i], pnew = & new[i]; chain != NULL;
402 chain = chain->ainext
405 newchain->aicount = chain->aicount;
406 newchain->ainext = NULL;
407 for (j = 0; j < chain->aicount; j++) {
411 * copy the corresponding key and
412 * value from the original input list
414 newchain->ainum[j] = chain->ainum[j];
416 oldval = chain->aivalue[j];
417 if (oldval->type == Node_val)
418 newchain->aivalue[j] = dupnode(oldval);
422 r->vname = estrdup(oldval->vname, strlen(oldval->vname));
423 r->parent_array = newsymb;
424 newchain->aivalue[j] = assoc_copy(oldval, r);
429 newchain->ainext = NULL;
430 pnew = & newchain->ainext;
434 if (symbol->xarray != NULL) {
438 n->vname = newsymb->vname; /* shallow copy */
439 (void) xn->acopy(xn, n);
442 newsymb->xarray = NULL;
444 newsymb->table_size = symbol->table_size;
445 newsymb->buckets = new;
446 newsymb->array_size = cursize;
447 newsymb->flags = symbol->flags;
453 /* int_list --- return a list of array items */
456 int_list(NODE *symbol, NODE *t)
459 unsigned long num_elems, list_size, i, k = 0;
462 int j, elem_size = 1;
464 static char buf[100];
465 assoc_kind_t assoc_kind;
467 if (symbol->table_size == 0)
470 assoc_kind = (assoc_kind_t) t->flags;
471 num_elems = symbol->table_size;
472 if ((assoc_kind & (AINDEX|AVALUE|ADELETE)) == (AINDEX|ADELETE))
475 if ((assoc_kind & (AINDEX|AVALUE)) == (AINDEX|AVALUE))
477 list_size = elem_size * num_elems;
479 if (symbol->xarray != NULL) {
481 list = xn->alist(xn, t);
482 assert(list != NULL);
483 if (num_elems == 1 || num_elems == xn->table_size)
485 erealloc(list, NODE **, list_size * sizeof(NODE *), "int_list");
486 k = elem_size * xn->table_size;
488 emalloc(list, NODE **, list_size * sizeof(NODE *), "int_list");
492 for (i = 0; i < symbol->array_size; i++) {
493 for (b = symbol->buckets[i]; b != NULL; b = b->ainext) {
494 for (j = 0; j < b->aicount; j++) {
497 if ((assoc_kind & AISTR) != 0) {
498 sprintf(buf, "%ld", num);
499 subs = make_string(buf, strlen(buf));
501 subs->flags |= (NUMCUR|NUMINT);
503 subs = make_number((AWKNUM) num);
504 subs->flags |= (INTIND|NUMINT);
509 if ((assoc_kind & AVALUE) != 0) {
511 if (r->type == Node_val) {
512 if ((assoc_kind & AVNUM) != 0)
513 (void) force_number(r);
514 else if ((assoc_kind & AVSTR) != 0)
529 /* int_kilobytes --- calculate memory consumption of the assoc array */
532 int_kilobytes(NODE *symbol)
534 unsigned long i, bucket_cnt = 0;
537 extern AWKNUM str_kilobytes(NODE *symbol);
539 for (i = 0; i < symbol->array_size; i++) {
540 for (b = symbol->buckets[i]; b != NULL; b = b->ainext)
543 kb = (((AWKNUM) bucket_cnt) * sizeof (BUCKET) +
544 ((AWKNUM) symbol->array_size) * sizeof (BUCKET *)) / 1024.0;
546 if (symbol->xarray != NULL)
547 kb += str_kilobytes(symbol->xarray);
553 /* int_dump --- dump array info */
556 int_dump(NODE *symbol, NODE *ndump)
563 unsigned long str_size = 0, int_size = 0;
565 size_t j, bucket_cnt;
566 static size_t hash_dist[HCNT + 1];
568 indent_level = ndump->alevel;
570 if (symbol->xarray != NULL) {
572 str_size = xn->table_size;
574 int_size = symbol->table_size - str_size;
576 if ((symbol->flags & XARRAY) == 0)
577 fprintf(output_fp, "%s `%s'\n",
578 (symbol->parent_array == NULL) ? "array" : "sub-array",
579 array_vname(symbol));
582 indent(indent_level);
583 fprintf(output_fp, "array_func: int_array_func\n");
584 if (symbol->flags != 0) {
585 indent(indent_level);
586 fprintf(output_fp, "flags: %s\n", flags2str(symbol->flags));
588 indent(indent_level);
589 fprintf(output_fp, "INT_CHAIN_MAX: %lu\n", (unsigned long) INT_CHAIN_MAX);
590 indent(indent_level);
591 fprintf(output_fp, "array_size: %lu (int)\n", (unsigned long) symbol->array_size);
592 indent(indent_level);
593 fprintf(output_fp, "table_size: %lu (total), %lu (int), %lu (str)\n",
594 (unsigned long) symbol->table_size, int_size, str_size);
595 indent(indent_level);
596 fprintf(output_fp, "Avg # of items per chain (int): %.2g\n",
597 ((AWKNUM) int_size) / symbol->array_size);
599 indent(indent_level);
600 fprintf(output_fp, "memory: %.2g kB (total)\n", int_kilobytes(symbol));
602 /* hash value distribution */
604 memset(hash_dist, '\0', (HCNT + 1) * sizeof(size_t));
605 for (i = 0; i < symbol->array_size; i++) {
607 for (b = symbol->buckets[i]; b != NULL; b = b->ainext)
608 bucket_cnt += b->aicount;
609 if (bucket_cnt >= HCNT)
611 hash_dist[bucket_cnt]++;
614 indent(indent_level);
615 fprintf(output_fp, "Hash distribution:\n");
617 for (j = 0; j <= HCNT; j++) {
618 if (hash_dist[j] > 0) {
619 indent(indent_level);
621 fprintf(output_fp, "[>=%lu]:%lu\n",
622 (unsigned long) HCNT, (unsigned long) hash_dist[j]);
624 fprintf(output_fp, "[%lu]:%lu\n",
625 (unsigned long) j, (unsigned long) hash_dist[j]);
632 if (ndump->adepth >= 0) {
636 fprintf(output_fp, "\n");
638 aname = make_aname(symbol);
639 subs = make_number((AWKNUM) 0);
640 subs->flags |= (INTIND|NUMINT);
642 for (i = 0; i < symbol->array_size; i++) {
643 for (b = symbol->buckets[i]; b != NULL; b = b->ainext) {
644 for (j = 0; j < b->aicount; j++) {
645 subs->numbr = b->ainum[j];
646 assoc_info(subs, b->aivalue[j], ndump, aname);
654 fprintf(output_fp, "\n");
655 xn->adump(xn, ndump);
664 /* int_hash --- calculate the hash function of the integer subs */
667 int_hash(uint32_t k, uint32_t hsize)
671 * Code snippet copied from:
672 * Hash functions (http://www.azillionmonkeys.com/qed/hash.html).
673 * Copyright 2004-2008 by Paul Hsieh. Licenced under LGPL 2.1.
676 /* This is the final mixing function used by Paul Hsieh in SuperFastHash. */
690 /* int_find --- locate symbol[subs] */
692 static inline NODE **
693 int_find(NODE *symbol, long k, uint32_t hash1)
698 assert(symbol->buckets != NULL);
699 for (b = symbol->buckets[hash1]; b != NULL; b = b->ainext) {
700 for (i = 0; i < b->aicount; i++) {
701 if (b->ainum[i] == k)
702 return (b->aivalue + i);
709 /* int_insert --- install subs in the assoc array */
712 int_insert(NODE *symbol, long k, uint32_t hash1)
717 b = symbol->buckets[hash1];
719 /* Only the first bucket in the chain can be partially full, but is never empty. */
721 if (b == NULL || (i = b->aicount) == 2) {
724 b->ainext = symbol->buckets[hash1];
725 symbol->buckets[hash1] = b;
730 b->aivalue[i] = dupnode(Nnull_string);
732 return & b->aivalue[i];
736 /* grow_int_table --- grow the hash table */
739 grow_int_table(NODE *symbol)
742 BUCKET *chain, *next;
744 unsigned long oldsize, newsize, k;
747 * This is an array of primes. We grow the table by an order of
748 * magnitude each time (not just doubling) so that growing is a
749 * rare operation. We expect, on average, that it won't happen
750 * more than twice. The final size is also chosen to be small
751 * enough so that MS-DOG mallocs can handle it. When things are
752 * very large (> 8K), we just double more or less, instead of
753 * just jumping from 8K to 64K.
756 static const unsigned long sizes[] = {
757 13, 127, 1021, 8191, 16381, 32749, 65497,
758 131101, 262147, 524309, 1048583, 2097169,
759 4194319, 8388617, 16777259, 33554467,
760 67108879, 134217757, 268435459, 536870923,
764 /* find next biggest hash size */
765 newsize = oldsize = symbol->array_size;
767 for (i = 0, j = sizeof(sizes)/sizeof(sizes[0]); i < j; i++) {
768 if (oldsize < sizes[i]) {
773 if (newsize == oldsize) { /* table already at max (!) */
774 symbol->flags |= ARRAYMAXED;
778 /* allocate new table */
779 emalloc(new, BUCKET **, newsize * sizeof(BUCKET *), "grow_int_table");
780 memset(new, '\0', newsize * sizeof(BUCKET *));
782 old = symbol->buckets;
783 symbol->buckets = new;
784 symbol->array_size = newsize;
786 /* brand new hash table */
788 return; /* DO NOT initialize symbol->table_size */
790 /* old hash table there, move stuff to new, free old */
791 /* note that symbol->table_size does not change if an old array. */
793 for (k = 0; k < oldsize; k++) {
795 for (chain = old[k]; chain != NULL; chain = next) {
796 for (i = 0; i < chain->aicount; i++) {
797 num = chain->ainum[i];
798 *int_insert(symbol, num, int_hash(num, newsize)) = chain->aivalue[i];
800 next = chain->ainext;