1 /* bucket.c - The routines for playing with hash buckets. */
3 /* This file is part of GDBM, the GNU data base manager.
4 Copyright (C) 1990, 1991, 1993, 2007, 2011 Free Software Foundation,
7 GDBM is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
12 GDBM is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GDBM. If not, see <http://www.gnu.org/licenses/>. */
20 /* Include system configuration before all else. */
26 /* Initializing a new hash buckets sets all bucket entries to -1 hash value. */
28 _gdbm_new_bucket (GDBM_FILE dbf, hash_bucket *bucket, int bits)
32 /* Initialize the avail block. */
35 /* Set the information fields first. */
36 bucket->bucket_bits = bits;
39 /* Initialize all bucket elements. */
40 for (index = 0; index < dbf->header->bucket_elems; index++)
41 bucket->h_table[index].hash_value = -1;
46 /* Find a bucket for DBF that is pointed to by the bucket directory from
47 location DIR_INDEX. The bucket cache is first checked to see if it
48 is already in memory. If not, a bucket may be tossed to read the new
49 bucket. In any case, the requested bucket is make the "current" bucket
50 and dbf->bucket points to the correct bucket. */
53 _gdbm_get_bucket (GDBM_FILE dbf, int dir_index)
56 off_t bucket_adr; /* The address of the correct hash bucket. */
57 off_t file_pos; /* The return address for lseek. */
58 int index; /* Loop index. */
61 dbf->bucket_dir = dir_index;
62 bucket_adr = dbf->dir [dir_index];
64 if (dbf->bucket_cache == NULL)
66 if(_gdbm_init_cache(dbf, DEFAULT_CACHESIZE) == -1)
67 _gdbm_fatal(dbf, _("couldn't init cache"));
70 /* Is that one is not already current, we must find it. */
71 if (dbf->cache_entry->ca_adr != bucket_adr)
73 /* Look in the cache. */
74 for (index = 0; index < dbf->cache_size; index++)
76 if (dbf->bucket_cache[index].ca_adr == bucket_adr)
78 dbf->bucket = dbf->bucket_cache[index].ca_bucket;
79 dbf->cache_entry = &dbf->bucket_cache[index];
84 /* It is not in the cache, read it from the disk. */
85 dbf->last_read = (dbf->last_read + 1) % dbf->cache_size;
86 if (dbf->bucket_cache[dbf->last_read].ca_changed)
87 _gdbm_write_bucket (dbf, &dbf->bucket_cache[dbf->last_read]);
88 dbf->bucket_cache[dbf->last_read].ca_adr = bucket_adr;
89 dbf->bucket = dbf->bucket_cache[dbf->last_read].ca_bucket;
90 dbf->cache_entry = &dbf->bucket_cache[dbf->last_read];
91 dbf->cache_entry->ca_data.elem_loc = -1;
92 dbf->cache_entry->ca_changed = FALSE;
94 /* Read the bucket. */
95 file_pos = __lseek (dbf, bucket_adr, SEEK_SET);
96 if (file_pos != bucket_adr)
97 _gdbm_fatal (dbf, _("lseek error"));
99 rc = _gdbm_full_read (dbf, dbf->bucket, dbf->header->bucket_size);
101 _gdbm_fatal (dbf, gdbm_strerror (rc));
108 /* Split the current bucket. This includes moving all items in the bucket to
109 a new bucket. This doesn't require any disk reads because all hash values
110 are stored in the buckets. Splitting the current bucket may require
111 doubling the size of the hash directory. */
113 _gdbm_split_bucket (GDBM_FILE dbf, int next_insert)
115 hash_bucket *bucket[2]; /* Pointers to the new buckets. */
117 int new_bits; /* The number of bits for the new buckets. */
118 int cache_0; /* Location in the cache for the buckets. */
120 off_t adr_0; /* File address of the new bucket 0. */
121 off_t adr_1; /* File address of the new bucket 1. */
122 avail_elem old_bucket; /* Avail Struct for the old bucket. */
124 off_t dir_start0; /* Used in updating the directory. */
128 off_t *new_dir; /* Pointer to the new directory. */
129 off_t dir_adr; /* Address of the new directory. */
130 int dir_size; /* Size of the new directory. */
131 off_t old_adr[31]; /* Address of the old directories. */
132 int old_size[31]; /* Size of the old directories. */
133 int old_count; /* Number of old directories. */
135 int index; /* Used in array indexing. */
136 int index1; /* Used in array indexing. */
137 int elem_loc; /* Location in new bucket to put element. */
138 bucket_element *old_el; /* Pointer into the old bucket. */
139 int select; /* Used to index bucket during movement. */
142 /* No directories are yet old. */
145 if (dbf->bucket_cache == NULL)
147 if(_gdbm_init_cache(dbf, DEFAULT_CACHESIZE) == -1)
148 _gdbm_fatal(dbf, _("couldn't init cache"));
151 while (dbf->bucket->count == dbf->header->bucket_elems)
153 /* Initialize the "new" buckets in the cache. */
156 dbf->last_read = (dbf->last_read + 1) % dbf->cache_size;
157 cache_0 = dbf->last_read;
159 while (dbf->bucket_cache[cache_0].ca_bucket == dbf->bucket);
160 bucket[0] = dbf->bucket_cache[cache_0].ca_bucket;
161 if (dbf->bucket_cache[cache_0].ca_changed)
162 _gdbm_write_bucket (dbf, &dbf->bucket_cache[cache_0]);
165 dbf->last_read = (dbf->last_read + 1) % dbf->cache_size;
166 cache_1 = dbf->last_read;
168 while (dbf->bucket_cache[cache_1].ca_bucket == dbf->bucket);
169 bucket[1] = dbf->bucket_cache[cache_1].ca_bucket;
170 if (dbf->bucket_cache[cache_1].ca_changed)
171 _gdbm_write_bucket (dbf, &dbf->bucket_cache[cache_1]);
172 new_bits = dbf->bucket->bucket_bits+1;
173 _gdbm_new_bucket (dbf, bucket[0], new_bits);
174 _gdbm_new_bucket (dbf, bucket[1], new_bits);
175 adr_0 = _gdbm_alloc (dbf, dbf->header->bucket_size);
176 dbf->bucket_cache[cache_0].ca_adr = adr_0;
177 adr_1 = _gdbm_alloc (dbf, dbf->header->bucket_size);
178 dbf->bucket_cache[cache_1].ca_adr = adr_1;
182 /* Double the directory size if necessary. */
183 if (dbf->header->dir_bits == dbf->bucket->bucket_bits)
185 dir_size = dbf->header->dir_size * 2;
186 dir_adr = _gdbm_alloc (dbf, dir_size);
187 new_dir = (off_t *) malloc (dir_size);
188 if (new_dir == NULL) _gdbm_fatal (dbf, _("malloc error"));
190 index < dbf->header->dir_size/sizeof (off_t); index++)
192 new_dir[2*index] = dbf->dir[index];
193 new_dir[2*index+1] = dbf->dir[index];
197 old_adr[old_count] = dbf->header->dir;
198 dbf->header->dir = dir_adr;
199 old_size[old_count] = dbf->header->dir_size;
200 dbf->header->dir_size = dir_size;
201 dbf->header->dir_bits = new_bits;
204 /* Now update dbf. */
205 dbf->header_changed = TRUE;
206 dbf->bucket_dir *= 2;
211 /* Copy all elements in dbf->bucket into the new buckets. */
212 for (index = 0; index < dbf->header->bucket_elems; index++)
214 old_el = & (dbf->bucket->h_table[index]);
215 select = (old_el->hash_value >> (31-new_bits)) & 1;
216 elem_loc = old_el->hash_value % dbf->header->bucket_elems;
217 while (bucket[select]->h_table[elem_loc].hash_value != -1)
218 elem_loc = (elem_loc + 1) % dbf->header->bucket_elems;
219 bucket[select]->h_table[elem_loc] = *old_el;
220 bucket[select]->count += 1;
223 /* Allocate avail space for the bucket[1]. */
224 bucket[1]->bucket_avail[0].av_adr
225 = _gdbm_alloc (dbf, dbf->header->block_size);
226 bucket[1]->bucket_avail[0].av_size = dbf->header->block_size;
227 bucket[1]->av_count = 1;
229 /* Copy the avail elements in dbf->bucket to bucket[0]. */
230 bucket[0]->av_count = dbf->bucket->av_count;
233 if (bucket[0]->av_count == BUCKET_AVAIL)
235 /* The avail is full, move the first one to bucket[1]. */
236 _gdbm_put_av_elem (dbf->bucket->bucket_avail[0],
237 bucket[1]->bucket_avail,
238 &bucket[1]->av_count, FALSE);
240 bucket[0]->av_count --;
242 for (; index < dbf->bucket->av_count; index++)
244 bucket[0]->bucket_avail[index1++] = dbf->bucket->bucket_avail[index];
247 /* Update the directory. We have new file addresses for both buckets. */
248 dir_start1 = (dbf->bucket_dir >> (dbf->header->dir_bits - new_bits)) | 1;
249 dir_end = (dir_start1 + 1) << (dbf->header->dir_bits - new_bits);
250 dir_start1 = dir_start1 << (dbf->header->dir_bits - new_bits);
251 dir_start0 = dir_start1 - (dir_end - dir_start1);
252 for (index = dir_start0; index < dir_start1; index++)
253 dbf->dir[index] = adr_0;
254 for (index = dir_start1; index < dir_end; index++)
255 dbf->dir[index] = adr_1;
258 /* Set changed flags. */
259 dbf->bucket_cache[cache_0].ca_changed = TRUE;
260 dbf->bucket_cache[cache_1].ca_changed = TRUE;
261 dbf->bucket_changed = TRUE;
262 dbf->directory_changed = TRUE;
263 dbf->second_changed = TRUE;
265 /* Update the cache! */
266 dbf->bucket_dir = next_insert >> (31-dbf->header->dir_bits);
268 /* Invalidate old cache entry. */
269 old_bucket.av_adr = dbf->cache_entry->ca_adr;
270 old_bucket.av_size = dbf->header->bucket_size;
271 dbf->cache_entry->ca_adr = 0;
272 dbf->cache_entry->ca_changed = FALSE;
274 /* Set dbf->bucket to the proper bucket. */
275 if (dbf->dir[dbf->bucket_dir] == adr_0)
277 dbf->bucket = bucket[0];
278 dbf->cache_entry = &dbf->bucket_cache[cache_0];
279 _gdbm_put_av_elem (old_bucket,
280 bucket[1]->bucket_avail,
281 &bucket[1]->av_count, FALSE);
285 dbf->bucket = bucket[1];
286 dbf->cache_entry = &dbf->bucket_cache[cache_1];
287 _gdbm_put_av_elem (old_bucket,
288 bucket[0]->bucket_avail,
289 &bucket[0]->av_count, FALSE);
294 /* Get rid of old directories. */
295 for (index = 0; index < old_count; index++)
296 _gdbm_free (dbf, old_adr[index], old_size[index]);
300 /* The only place where a bucket is written. CA_ENTRY is the
301 cache entry containing the bucket to be written. */
304 _gdbm_write_bucket (GDBM_FILE dbf, cache_elem *ca_entry)
307 off_t file_pos; /* The return value for lseek. */
309 file_pos = __lseek (dbf, ca_entry->ca_adr, SEEK_SET);
310 if (file_pos != ca_entry->ca_adr)
311 _gdbm_fatal (dbf, _("lseek error"));
312 rc = _gdbm_full_write (dbf, ca_entry->ca_bucket, dbf->header->bucket_size);
314 _gdbm_fatal (dbf, gdbm_strerror (rc));
316 ca_entry->ca_changed = FALSE;
317 ca_entry->ca_data.hash_val = -1;
318 ca_entry->ca_data.elem_loc = -1;