src/bucket.c

   1 /* bucket.c - The routines for playing with hash buckets. */
   2
   3 /* This file is part of GDBM, the GNU data base manager.
   4    Copyright (C) 1990, 1991, 1993, 2007, 2011 Free Software Foundation,
   5    Inc.
   6
   7    GDBM is free software; you can redistribute it and/or modify
   8    it under the terms of the GNU General Public License as published by
   9    the Free Software Foundation; either version 3, or (at your option)
  10    any later version.
  11
  12    GDBM is distributed in the hope that it will be useful,
  13    but WITHOUT ANY WARRANTY; without even the implied warranty of
  14    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15    GNU General Public License for more details.
  16
  17    You should have received a copy of the GNU General Public License
  18    along with GDBM. If not, see <http://www.gnu.org/licenses/>.   */
  19
  20 /* Include system configuration before all else. */
  21 #include "autoconf.h"
  22
  23 #include "gdbmdefs.h"
  24
  25
  26 /* Initializing a new hash buckets sets all bucket entries to -1 hash value. */
  27 void
  28 _gdbm_new_bucket (GDBM_FILE dbf, hash_bucket *bucket, int bits)
  29 {
  30   int index;
  31
  32   /* Initialize the avail block. */
  33   bucket->av_count = 0;
  34
  35   /* Set the information fields first. */
  36   bucket->bucket_bits = bits;
  37   bucket->count = 0;
  38
  39   /* Initialize all bucket elements. */
  40   for (index = 0; index < dbf->header->bucket_elems; index++)
  41     bucket->h_table[index].hash_value = -1;
  42 }
  43
  44
  45
  46 /* Find a bucket for DBF that is pointed to by the bucket directory from
  47    location DIR_INDEX.   The bucket cache is first checked to see if it
  48    is already in memory.  If not, a bucket may be tossed to read the new
  49    bucket.  In any case, the requested bucket is make the "current" bucket
  50    and dbf->bucket points to the correct bucket. */
  51
  52 void
  53 _gdbm_get_bucket (GDBM_FILE dbf, int dir_index)
  54 {
  55   int rc;
  56   off_t bucket_adr;     /* The address of the correct hash bucket.  */
  57   off_t file_pos;       /* The return address for lseek. */
  58   int   index;          /* Loop index. */
  59
  60   /* Initial set up. */
  61   dbf->bucket_dir = dir_index;
  62   bucket_adr = dbf->dir [dir_index];
  63
  64   if (dbf->bucket_cache == NULL)
  65     {
  66       if(_gdbm_init_cache(dbf, DEFAULT_CACHESIZE) == -1)
  67         _gdbm_fatal(dbf, _("couldn't init cache"));
  68     }
  69
  70   /* Is that one is not already current, we must find it. */
  71   if (dbf->cache_entry->ca_adr != bucket_adr)
  72     {
  73       /* Look in the cache. */
  74       for (index = 0; index < dbf->cache_size; index++)
  75         {
  76           if (dbf->bucket_cache[index].ca_adr == bucket_adr)
  77             {
  78               dbf->bucket = dbf->bucket_cache[index].ca_bucket;
  79               dbf->cache_entry = &dbf->bucket_cache[index];
  80               return;
  81             }
  82         }
  83
  84       /* It is not in the cache, read it from the disk. */
  85       dbf->last_read = (dbf->last_read + 1) % dbf->cache_size;
  86       if (dbf->bucket_cache[dbf->last_read].ca_changed)
  87         _gdbm_write_bucket (dbf, &dbf->bucket_cache[dbf->last_read]);
  88       dbf->bucket_cache[dbf->last_read].ca_adr = bucket_adr;
  89       dbf->bucket = dbf->bucket_cache[dbf->last_read].ca_bucket;
  90       dbf->cache_entry = &dbf->bucket_cache[dbf->last_read];
  91       dbf->cache_entry->ca_data.elem_loc = -1;
  92       dbf->cache_entry->ca_changed = FALSE;
  93
  94       /* Read the bucket. */
  95       file_pos = __lseek (dbf, bucket_adr, SEEK_SET);
  96       if (file_pos != bucket_adr)
  97         _gdbm_fatal (dbf, _("lseek error"));
  98
  99       rc = _gdbm_full_read (dbf, dbf->bucket, dbf->header->bucket_size);
 100       if (rc)
 101         _gdbm_fatal (dbf, gdbm_strerror (rc));
 102     }
 103
 104   return;
 105 }
 106
 107
 108 /* Split the current bucket.  This includes moving all items in the bucket to
 109    a new bucket.  This doesn't require any disk reads because all hash values
 110    are stored in the buckets.  Splitting the current bucket may require
 111    doubling the size of the hash directory.  */
 112 void
 113 _gdbm_split_bucket (GDBM_FILE dbf, int next_insert)
 114 {
 115   hash_bucket *bucket[2];       /* Pointers to the new buckets. */
 116
 117   int          new_bits;        /* The number of bits for the new buckets. */
 118   int          cache_0;         /* Location in the cache for the buckets. */
 119   int          cache_1;
 120   off_t        adr_0;           /* File address of the new bucket 0. */
 121   off_t        adr_1;           /* File address of the new bucket 1. */
 122   avail_elem   old_bucket;      /* Avail Struct for the old bucket. */
 123
 124   off_t        dir_start0;      /* Used in updating the directory. */
 125   off_t        dir_start1;
 126   off_t        dir_end;
 127
 128   off_t       *new_dir;         /* Pointer to the new directory. */
 129   off_t        dir_adr;         /* Address of the new directory. */
 130   int          dir_size;        /* Size of the new directory. */
 131   off_t        old_adr[31];     /* Address of the old directories. */
 132   int          old_size[31];    /* Size of the old directories. */
 133   int          old_count;       /* Number of old directories. */
 134
 135   int          index;           /* Used in array indexing. */
 136   int          index1;          /* Used in array indexing. */
 137   int          elem_loc;        /* Location in new bucket to put element. */
 138   bucket_element *old_el;       /* Pointer into the old bucket. */
 139   int          select;          /* Used to index bucket during movement. */
 140
 141
 142   /* No directories are yet old. */
 143   old_count = 0;
 144
 145   if (dbf->bucket_cache == NULL)
 146     {
 147       if(_gdbm_init_cache(dbf, DEFAULT_CACHESIZE) == -1)
 148         _gdbm_fatal(dbf, _("couldn't init cache"));
 149     }
 150
 151   while (dbf->bucket->count == dbf->header->bucket_elems)
 152     {
 153       /* Initialize the "new" buckets in the cache. */
 154       do
 155         {
 156           dbf->last_read = (dbf->last_read + 1) % dbf->cache_size;
 157           cache_0 = dbf->last_read;
 158         }
 159       while (dbf->bucket_cache[cache_0].ca_bucket == dbf->bucket);
 160       bucket[0] = dbf->bucket_cache[cache_0].ca_bucket;
 161       if (dbf->bucket_cache[cache_0].ca_changed)
 162         _gdbm_write_bucket (dbf, &dbf->bucket_cache[cache_0]);
 163       do
 164         {
 165           dbf->last_read = (dbf->last_read + 1) % dbf->cache_size;
 166           cache_1 = dbf->last_read;
 167         }
 168       while (dbf->bucket_cache[cache_1].ca_bucket == dbf->bucket);
 169       bucket[1] = dbf->bucket_cache[cache_1].ca_bucket;
 170       if (dbf->bucket_cache[cache_1].ca_changed)
 171         _gdbm_write_bucket (dbf, &dbf->bucket_cache[cache_1]);
 172       new_bits = dbf->bucket->bucket_bits+1;
 173       _gdbm_new_bucket (dbf, bucket[0], new_bits);
 174       _gdbm_new_bucket (dbf, bucket[1], new_bits);
 175       adr_0 = _gdbm_alloc (dbf, dbf->header->bucket_size);
 176       dbf->bucket_cache[cache_0].ca_adr = adr_0;
 177       adr_1 = _gdbm_alloc (dbf, dbf->header->bucket_size);
 178       dbf->bucket_cache[cache_1].ca_adr = adr_1;
 179
 180
 181
 182       /* Double the directory size if necessary. */
 183       if (dbf->header->dir_bits == dbf->bucket->bucket_bits)
 184         {
 185           dir_size = dbf->header->dir_size * 2;
 186           dir_adr  = _gdbm_alloc (dbf, dir_size);
 187           new_dir  = (off_t *) malloc (dir_size);
 188           if (new_dir == NULL) _gdbm_fatal (dbf, _("malloc error"));
 189           for (index = 0;
 190                 index < dbf->header->dir_size/sizeof (off_t); index++)
 191             {
 192               new_dir[2*index]   = dbf->dir[index];
 193               new_dir[2*index+1] = dbf->dir[index];
 194             }
 195
 196           /* Update header. */
 197           old_adr[old_count] = dbf->header->dir;
 198           dbf->header->dir = dir_adr;
 199           old_size[old_count] = dbf->header->dir_size;
 200           dbf->header->dir_size = dir_size;
 201           dbf->header->dir_bits = new_bits;
 202           old_count++;
 203
 204           /* Now update dbf.  */
 205           dbf->header_changed = TRUE;
 206           dbf->bucket_dir *= 2;
 207           free (dbf->dir);
 208           dbf->dir = new_dir;
 209         }
 210
 211       /* Copy all elements in dbf->bucket into the new buckets. */
 212       for (index = 0; index < dbf->header->bucket_elems; index++)
 213         {
 214           old_el = & (dbf->bucket->h_table[index]);
 215           select = (old_el->hash_value >> (31-new_bits)) & 1;
 216           elem_loc = old_el->hash_value % dbf->header->bucket_elems;
 217           while (bucket[select]->h_table[elem_loc].hash_value != -1)
 218             elem_loc = (elem_loc + 1) % dbf->header->bucket_elems;
 219           bucket[select]->h_table[elem_loc] = *old_el;
 220           bucket[select]->count += 1;
 221         }
 222
 223       /* Allocate avail space for the bucket[1]. */
 224       bucket[1]->bucket_avail[0].av_adr
 225         = _gdbm_alloc (dbf, dbf->header->block_size);
 226       bucket[1]->bucket_avail[0].av_size = dbf->header->block_size;
 227       bucket[1]->av_count = 1;
 228
 229       /* Copy the avail elements in dbf->bucket to bucket[0]. */
 230       bucket[0]->av_count = dbf->bucket->av_count;
 231       index = 0;
 232       index1 = 0;
 233       if (bucket[0]->av_count == BUCKET_AVAIL)
 234         {
 235           /* The avail is full, move the first one to bucket[1]. */
 236           _gdbm_put_av_elem (dbf->bucket->bucket_avail[0],
 237                              bucket[1]->bucket_avail,
 238                              &bucket[1]->av_count, FALSE);
 239           index = 1;
 240           bucket[0]->av_count --;
 241         }
 242       for (; index < dbf->bucket->av_count; index++)
 243         {
 244           bucket[0]->bucket_avail[index1++] = dbf->bucket->bucket_avail[index];
 245         }
 246
 247       /* Update the directory.  We have new file addresses for both buckets. */
 248       dir_start1 = (dbf->bucket_dir >> (dbf->header->dir_bits - new_bits)) | 1;
 249       dir_end = (dir_start1 + 1) << (dbf->header->dir_bits - new_bits);
 250       dir_start1 = dir_start1 << (dbf->header->dir_bits - new_bits);
 251       dir_start0 = dir_start1 - (dir_end - dir_start1);
 252       for (index = dir_start0; index < dir_start1; index++)
 253         dbf->dir[index] = adr_0;
 254       for (index = dir_start1; index < dir_end; index++)
 255         dbf->dir[index] = adr_1;
 256
 257
 258       /* Set changed flags. */
 259       dbf->bucket_cache[cache_0].ca_changed = TRUE;
 260       dbf->bucket_cache[cache_1].ca_changed = TRUE;
 261       dbf->bucket_changed = TRUE;
 262       dbf->directory_changed = TRUE;
 263       dbf->second_changed = TRUE;
 264
 265       /* Update the cache! */
 266       dbf->bucket_dir = next_insert >> (31-dbf->header->dir_bits);
 267
 268       /* Invalidate old cache entry. */
 269       old_bucket.av_adr  = dbf->cache_entry->ca_adr;
 270       old_bucket.av_size = dbf->header->bucket_size;
 271       dbf->cache_entry->ca_adr = 0;
 272       dbf->cache_entry->ca_changed = FALSE;
 273
 274       /* Set dbf->bucket to the proper bucket. */
 275       if (dbf->dir[dbf->bucket_dir] == adr_0)
 276         {
 277           dbf->bucket = bucket[0];
 278           dbf->cache_entry = &dbf->bucket_cache[cache_0];
 279           _gdbm_put_av_elem (old_bucket,
 280                              bucket[1]->bucket_avail,
 281                              &bucket[1]->av_count, FALSE);
 282         }
 283       else
 284         {
 285           dbf->bucket = bucket[1];
 286           dbf->cache_entry = &dbf->bucket_cache[cache_1];
 287           _gdbm_put_av_elem (old_bucket,
 288                              bucket[0]->bucket_avail,
 289                              &bucket[0]->av_count, FALSE);
 290         }
 291
 292     }
 293
 294   /* Get rid of old directories. */
 295   for (index = 0; index < old_count; index++)
 296     _gdbm_free (dbf, old_adr[index], old_size[index]);
 297 }
 298
 299
 300 /* The only place where a bucket is written.  CA_ENTRY is the
 301    cache entry containing the bucket to be written. */
 302
 303 void
 304 _gdbm_write_bucket (GDBM_FILE dbf, cache_elem *ca_entry)
 305 {
 306   int rc;
 307   off_t file_pos;       /* The return value for lseek. */
 308
 309   file_pos = __lseek (dbf, ca_entry->ca_adr, SEEK_SET);
 310   if (file_pos != ca_entry->ca_adr)
 311     _gdbm_fatal (dbf, _("lseek error"));
 312   rc = _gdbm_full_write (dbf, ca_entry->ca_bucket, dbf->header->bucket_size);
 313   if (rc)
 314     _gdbm_fatal (dbf, gdbm_strerror (rc));
 315
 316   ca_entry->ca_changed = FALSE;
 317   ca_entry->ca_data.hash_val = -1;
 318   ca_entry->ca_data.elem_loc = -1;
 319 }