1 /* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
3 * libdatrie - Double-Array Trie Library
4 * Copyright (C) 2006 Theppitak Karoonboonyanan <theppitak@gmail.com>
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
22 * tail.c - trie tail for keeping suffixes
24 * Author: Theppitak Karoonboonyanan <theppitak@gmail.com>
29 #ifndef _MSC_VER /* for SIZE_MAX */
35 #include "trie-private.h"
36 #include "fileutils.h"
38 /*----------------------------------*
39 * INTERNAL TYPES DECLARATIONS *
40 *----------------------------------*/
42 /*-----------------------------------*
43 * PRIVATE METHODS DECLARATIONS *
44 *-----------------------------------*/
46 static TrieIndex tail_alloc_block (Tail *t);
47 static void tail_free_block (Tail *t, TrieIndex block);
49 /* ==================== BEGIN IMPLEMENTATION PART ==================== */
51 /*------------------------------------*
52 * INTERNAL TYPES IMPLEMENTATIONS *
53 *------------------------------------*/
55 /*------------------------------*
56 * PRIVATE DATA DEFINITONS *
57 *------------------------------*/
71 /*-----------------------------*
72 * METHODS IMPLEMENTAIONS *
73 *-----------------------------*/
75 #define TAIL_SIGNATURE 0xDFFCDFFC
76 #define TAIL_START_BLOCKNO 1
80 * INT32: pointer to first free slot
81 * INT32: number of tail blocks
84 * INT32: pointer to next free block (-1 for allocated blocks)
85 * INT32: data for the key
87 * BYTES[length]: suffix string (no terminating '\0')
91 * @brief Create a new tail object
93 * Create a new empty tail object.
100 t = (Tail *) malloc (sizeof (Tail));
112 * @brief Read tail data from file
114 * @param file : the file to read
116 * @return a pointer to the openned tail data, NULL on failure
118 * Read tail data from the opened file, starting from the current
119 * file pointer until the end of tail data block. On return, the
120 * file pointer is left at the position after the read block.
123 tail_fread (FILE *file)
130 /* check signature */
131 save_pos = ftell (file);
132 if (!file_read_int32 (file, (int32 *) &sig) || TAIL_SIGNATURE != sig)
135 t = (Tail *) malloc (sizeof (Tail));
139 if (!file_read_int32 (file, &t->first_free) ||
140 !file_read_int32 (file, &t->num_tails))
142 goto exit_tail_created;
144 if (t->num_tails > SIZE_MAX / sizeof (TailBlock))
145 goto exit_tail_created;
146 t->tails = (TailBlock *) malloc (t->num_tails * sizeof (TailBlock));
147 if (UNLIKELY (!t->tails))
148 goto exit_tail_created;
149 for (i = 0; i < t->num_tails; i++) {
152 if (!file_read_int32 (file, &t->tails[i].next_free) ||
153 !file_read_int32 (file, &t->tails[i].data) ||
154 !file_read_int16 (file, &length))
159 t->tails[i].suffix = (TrieChar *) malloc (length + 1);
160 if (UNLIKELY (!t->tails[i].suffix))
163 if (!file_read_chars (file, (char *)t->tails[i].suffix, length)) {
164 free (t->tails[i].suffix);
168 t->tails[i].suffix[length] = '\0';
175 free (t->tails[--i].suffix);
181 fseek (file, save_pos, SEEK_SET);
186 * @brief Free tail data
188 * @param t : the tail data
190 * @return 0 on success, non-zero on failure
192 * Free the given tail data.
200 for (i = 0; i < t->num_tails; i++)
201 if (t->tails[i].suffix)
202 free (t->tails[i].suffix);
209 * @brief Write tail data
211 * @param t : the tail data
212 * @param file : the file to write to
214 * @return 0 on success, non-zero on failure
216 * Write tail data to the given @a file, starting from the current file
217 * pointer. On return, the file pointer is left after the tail data block.
220 tail_fwrite (const Tail *t, FILE *file)
224 if (!file_write_int32 (file, TAIL_SIGNATURE) ||
225 !file_write_int32 (file, t->first_free) ||
226 !file_write_int32 (file, t->num_tails))
230 for (i = 0; i < t->num_tails; i++) {
233 if (!file_write_int32 (file, t->tails[i].next_free) ||
234 !file_write_int32 (file, t->tails[i].data))
239 length = t->tails[i].suffix ? strlen ((const char *)t->tails[i].suffix)
241 if (!file_write_int16 (file, length))
244 !file_write_chars (file, (char *)t->tails[i].suffix, length))
257 * @param t : the tail data
258 * @param index : the index of the suffix
260 * @return pointer to the indexed suffix string.
262 * Get suffix from tail with given @a index. The returned string is a pointer
263 * to internal storage, which should be accessed read-only by the caller.
264 * No need to free() it.
267 tail_get_suffix (const Tail *t, TrieIndex index)
269 index -= TAIL_START_BLOCKNO;
270 return LIKELY (index < t->num_tails) ? t->tails[index].suffix : NULL;
274 * @brief Set suffix of existing entry
276 * @param t : the tail data
277 * @param index : the index of the suffix
278 * @param suffix : the new suffix
280 * Set suffix of existing entry of given @a index in tail.
283 tail_set_suffix (Tail *t, TrieIndex index, const TrieChar *suffix)
285 index -= TAIL_START_BLOCKNO;
286 if (LIKELY (index < t->num_tails)) {
287 /* suffix and t->tails[index].suffix may overlap;
288 * so, dup it before it's overwritten
290 TrieChar *tmp = NULL;
292 tmp = (TrieChar *) strdup ((const char *)suffix);
296 if (t->tails[index].suffix)
297 free (t->tails[index].suffix);
298 t->tails[index].suffix = tmp;
306 * @brief Add a new suffix
308 * @param t : the tail data
309 * @param suffix : the new suffix
311 * @return the index of the newly added suffix,
312 * or TRIE_INDEX_ERROR on failure.
314 * Add a new suffix entry to tail.
317 tail_add_suffix (Tail *t, const TrieChar *suffix)
321 new_block = tail_alloc_block (t);
322 if (UNLIKELY (TRIE_INDEX_ERROR == new_block))
323 return TRIE_INDEX_ERROR;
325 tail_set_suffix (t, new_block, suffix);
331 tail_alloc_block (Tail *t)
335 if (0 != t->first_free) {
336 block = t->first_free;
337 t->first_free = t->tails[block].next_free;
341 block = t->num_tails;
343 new_block = realloc (t->tails, (t->num_tails + 1) * sizeof (TailBlock));
344 if (UNLIKELY (!new_block))
345 return TRIE_INDEX_ERROR;
347 t->tails = (TailBlock *) new_block;
350 t->tails[block].next_free = -1;
351 t->tails[block].data = TRIE_DATA_ERROR;
352 t->tails[block].suffix = NULL;
354 return block + TAIL_START_BLOCKNO;
358 tail_free_block (Tail *t, TrieIndex block)
362 block -= TAIL_START_BLOCKNO;
364 if (block >= t->num_tails)
367 t->tails[block].data = TRIE_DATA_ERROR;
368 if (NULL != t->tails[block].suffix) {
369 free (t->tails[block].suffix);
370 t->tails[block].suffix = NULL;
373 /* find insertion point */
375 for (i = t->first_free; i != 0 && i < block; i = t->tails[i].next_free)
378 /* insert free block between j and i */
379 t->tails[block].next_free = i;
381 t->tails[j].next_free = block;
383 t->first_free = block;
387 * @brief Get data associated to suffix entry
389 * @param t : the tail data
390 * @param index : the index of the suffix
392 * @return the data associated to the suffix entry
394 * Get data associated to suffix entry @a index in tail data.
397 tail_get_data (const Tail *t, TrieIndex index)
399 index -= TAIL_START_BLOCKNO;
400 return LIKELY (index < t->num_tails)
401 ? t->tails[index].data : TRIE_DATA_ERROR;
405 * @brief Set data associated to suffix entry
407 * @param t : the tail data
408 * @param index : the index of the suffix
409 * @param data : the data to set
411 * @return boolean indicating success
413 * Set data associated to suffix entry @a index in tail data.
416 tail_set_data (Tail *t, TrieIndex index, TrieData data)
418 index -= TAIL_START_BLOCKNO;
419 if (LIKELY (index < t->num_tails)) {
420 t->tails[index].data = data;
427 * @brief Delete suffix entry
429 * @param t : the tail data
430 * @param index : the index of the suffix to delete
432 * Delete suffix entry from the tail data.
435 tail_delete (Tail *t, TrieIndex index)
437 tail_free_block (t, index);
441 * @brief Walk in tail with a string
443 * @param t : the tail data
444 * @param s : the tail data index
445 * @param suffix_idx : pointer to current character index in suffix
446 * @param str : the string to use in walking
447 * @param len : total characters in @a str to walk
449 * @return total number of characters successfully walked
451 * Walk in the tail data @a t at entry @a s, from given character position
452 * @a *suffix_idx, using @a len characters of given string @a str. On return,
453 * @a *suffix_idx is updated to the position after the last successful walk,
454 * and the function returns the total number of character succesfully walked.
457 tail_walk_str (const Tail *t,
463 const TrieChar *suffix;
467 suffix = tail_get_suffix (t, s);
468 if (UNLIKELY (!suffix))
471 i = 0; j = *suffix_idx;
473 if (str[i] != suffix[j])
476 /* stop and stay at null-terminator */
486 * @brief Walk in tail with a character
488 * @param t : the tail data
489 * @param s : the tail data index
490 * @param suffix_idx : pointer to current character index in suffix
491 * @param c : the character to use in walking
493 * @return boolean indicating success
495 * Walk in the tail data @a t at entry @a s, from given character position
496 * @a *suffix_idx, using given character @a c. If the walk is successful,
497 * it returns TRUE, and @a *suffix_idx is updated to the next character.
498 * Otherwise, it returns FALSE, and @a *suffix_idx is left unchanged.
501 tail_walk_char (const Tail *t,
506 const TrieChar *suffix;
507 TrieChar suffix_char;
509 suffix = tail_get_suffix (t, s);
510 if (UNLIKELY (!suffix))
513 suffix_char = suffix[*suffix_idx];
514 if (suffix_char == c) {
515 if (0 != suffix_char)