1 /* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
3 * libdatrie - Double-Array Trie Library
4 * Copyright (C) 2006 Theppitak Karoonboonyanan <theppitak@gmail.com>
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
22 * alpha-map.c - map between character codes and trie alphabet
24 * Author: Theppitak Karoonboonyanan <theppitak@gmail.com>
33 #include "alpha-map.h"
34 #include "alpha-map-private.h"
35 #include "trie-private.h"
36 #include "fileutils.h"
39 * @brief Alphabet string length
41 * @param str : the array of null-terminated AlphaChar string to measure
43 * @return the total characters in @a str.
46 alpha_char_strlen (const AlphaChar *str)
50 for (p = str; *p; p++)
56 * @brief Compare alphabet strings
58 * @param str1, str2 : the arrays of null-terminated AlphaChar strings
61 * @return negative if @a str1 < @a str2;
62 * 0 if @a str1 == @a str2;
63 * positive if @a str1 > @a str2
65 * Available since: 0.2.7
68 alpha_char_strcmp (const AlphaChar *str1, const AlphaChar *str2)
70 while (*str1 && *str1 == *str2) {
80 /*------------------------------*
81 * PRIVATE DATA DEFINITONS *
82 *------------------------------*/
84 typedef struct _AlphaRange {
85 struct _AlphaRange *next;
92 AlphaRange *first_range;
95 /* alpha-to-trie map */
96 AlphaChar alpha_begin;
99 TrieIndex *alpha_to_trie_map;
101 /* trie-to-alpha map */
103 AlphaChar *trie_to_alpha_map;
106 /*-----------------------------------*
107 * PRIVATE METHODS DECLARATIONS *
108 *-----------------------------------*/
109 static int alpha_map_get_total_ranges (const AlphaMap *alpha_map);
110 static int alpha_map_add_range_only (AlphaMap *alpha_map,
111 AlphaChar begin, AlphaChar end);
112 static int alpha_map_recalc_work_area (AlphaMap *alpha_map);
114 /*-----------------------------*
115 * METHODS IMPLEMENTAIONS *
116 *-----------------------------*/
118 #define ALPHAMAP_SIGNATURE 0xD9FCD9FC
122 * - INT32: total ranges
125 * - INT32: range begin
130 * @brief Create new alphabet map
132 * @return a pointer to the newly created alphabet map, NULL on failure
134 * Create a new empty alphabet map. The map contents can then be added with
135 * alpha_map_add_range().
137 * The created object must be freed with alpha_map_free().
144 alpha_map = (AlphaMap *) malloc (sizeof (AlphaMap));
145 if (UNLIKELY (!alpha_map))
148 alpha_map->first_range = NULL;
151 alpha_map->alpha_begin = 0;
152 alpha_map->alpha_end = 0;
153 alpha_map->alpha_map_sz = 0;
154 alpha_map->alpha_to_trie_map = NULL;
156 alpha_map->trie_map_sz = 0;
157 alpha_map->trie_to_alpha_map = NULL;
163 * @brief Create a clone of alphabet map
165 * @param a_map : the source alphabet map to clone
167 * @return a pointer to the alphabet map clone, NULL on failure
169 * The created object must be freed with alpha_map_free().
172 alpha_map_clone (const AlphaMap *a_map)
177 alpha_map = alpha_map_new ();
178 if (UNLIKELY (!alpha_map))
181 for (range = a_map->first_range; range; range = range->next) {
182 if (alpha_map_add_range_only (alpha_map, range->begin, range->end) != 0)
183 goto exit_map_created;
186 if (alpha_map_recalc_work_area (alpha_map) != 0)
187 goto exit_map_created;
192 alpha_map_free (alpha_map);
197 * @brief Free an alphabet map object
199 * @param alpha_map : the alphabet map object to free
201 * Destruct the @a alpha_map and free its allocated memory.
204 alpha_map_free (AlphaMap *alpha_map)
208 p = alpha_map->first_range;
216 if (alpha_map->alpha_to_trie_map)
217 free (alpha_map->alpha_to_trie_map);
218 if (alpha_map->trie_to_alpha_map)
219 free (alpha_map->trie_to_alpha_map);
225 alpha_map_fread_bin (FILE *file)
232 /* check signature */
233 save_pos = ftell (file);
234 if (!file_read_int32 (file, (int32 *) &sig) || ALPHAMAP_SIGNATURE != sig)
237 alpha_map = alpha_map_new ();
238 if (UNLIKELY (!alpha_map))
241 /* read number of ranges */
242 if (!file_read_int32 (file, &total))
243 goto exit_map_created;
245 /* read character ranges */
246 for (i = 0; i < total; i++) {
249 if (!file_read_int32 (file, &b) || !file_read_int32 (file, &e))
250 goto exit_map_created;
251 alpha_map_add_range_only (alpha_map, b, e);
255 if (UNLIKELY (alpha_map_recalc_work_area (alpha_map) != 0))
256 goto exit_map_created;
261 alpha_map_free (alpha_map);
263 fseek (file, save_pos, SEEK_SET);
268 alpha_map_get_total_ranges (const AlphaMap *alpha_map)
273 for (n = 0, range = alpha_map->first_range; range; range = range->next) {
281 alpha_map_fwrite_bin (const AlphaMap *alpha_map, FILE *file)
285 if (!file_write_int32 (file, ALPHAMAP_SIGNATURE) ||
286 !file_write_int32 (file, alpha_map_get_total_ranges (alpha_map)))
291 for (range = alpha_map->first_range; range; range = range->next) {
292 if (!file_write_int32 (file, range->begin) ||
293 !file_write_int32 (file, range->end))
303 alpha_map_add_range_only (AlphaMap *alpha_map, AlphaChar begin, AlphaChar end)
305 AlphaRange *q, *r, *begin_node, *end_node;
310 begin_node = end_node = 0;
312 /* Skip first ranges till 'begin' is covered */
313 for (q = 0, r = alpha_map->first_range;
314 r && r->begin <= begin;
317 if (begin <= r->end) {
318 /* 'r' covers 'begin' -> take 'r' as beginning point */
322 if (r->end + 1 == begin) {
323 /* 'begin' is next to 'r'-end
324 * -> extend 'r'-end to cover 'begin'
331 if (!begin_node && r && r->begin <= end + 1) {
332 /* ['begin', 'end'] overlaps into 'r'-begin
333 * or 'r' is next to 'end' if r->begin == end + 1
334 * -> extend 'r'-begin to include the range
339 /* Run upto the first range that exceeds 'end' */
340 while (r && r->begin <= end + 1) {
342 /* 'r' covers 'end' -> take 'r' as ending point */
344 } else if (r != begin_node) {
345 /* ['begin', 'end'] covers the whole 'r' -> remove 'r' */
351 alpha_map->first_range = r->next;
353 r = alpha_map->first_range;
360 if (!end_node && q && begin <= q->end) {
361 /* ['begin', 'end'] overlaps 'q' at the end
362 * -> extend 'q'-end to include the range
368 if (begin_node && end_node) {
369 if (begin_node != end_node) {
370 /* Merge begin_node and end_node ranges together */
371 assert (begin_node->next == end_node);
372 begin_node->end = end_node->end;
373 begin_node->next = end_node->next;
376 } else if (!begin_node && !end_node) {
377 /* ['begin', 'end'] overlaps with none of the ranges
378 * -> insert a new range
380 AlphaRange *range = (AlphaRange *) malloc (sizeof (AlphaRange));
382 if (UNLIKELY (!range))
385 range->begin = begin;
388 /* insert it between 'q' and 'r' */
392 alpha_map->first_range = range;
401 alpha_map_recalc_work_area (AlphaMap *alpha_map)
405 /* free old existing map */
406 if (alpha_map->alpha_to_trie_map) {
407 free (alpha_map->alpha_to_trie_map);
408 alpha_map->alpha_to_trie_map = NULL;
410 if (alpha_map->trie_to_alpha_map) {
411 free (alpha_map->trie_to_alpha_map);
412 alpha_map->trie_to_alpha_map = NULL;
415 range = alpha_map->first_range;
417 const AlphaChar alpha_begin = range->begin;
420 TrieIndex trie_last = 0;
423 /* reconstruct alpha-to-trie map */
424 alpha_map->alpha_begin = alpha_begin;
425 while (range->next) {
428 alpha_map->alpha_end = range->end;
429 alpha_map->alpha_map_sz = n_cells = range->end - alpha_begin + 1;
430 alpha_map->alpha_to_trie_map
431 = (TrieIndex *) malloc (n_cells * sizeof (TrieIndex));
432 if (UNLIKELY (!alpha_map->alpha_to_trie_map))
433 goto error_alpha_map_not_created;
434 for (i = 0; i < n_cells; i++) {
435 alpha_map->alpha_to_trie_map[i] = TRIE_INDEX_MAX;
437 for (range = alpha_map->first_range; range; range = range->next) {
438 for (a = range->begin; a <= range->end; a++) {
439 alpha_map->alpha_to_trie_map[a - alpha_begin] = ++trie_last;
443 /* reconstruct trie-to-alpha map */
444 alpha_map->trie_map_sz = n_cells = trie_last + 1;
445 alpha_map->trie_to_alpha_map
446 = (AlphaChar *) malloc (n_cells * sizeof (AlphaChar));
447 if (UNLIKELY (!alpha_map->trie_to_alpha_map))
448 goto error_alpha_map_created;
449 alpha_map->trie_to_alpha_map[0] = 0;
451 for (range = alpha_map->first_range; range; range = range->next) {
452 for (a = range->begin; a <= range->end; a++) {
453 alpha_map->trie_to_alpha_map[tc++] = a;
460 error_alpha_map_created:
461 free (alpha_map->alpha_to_trie_map);
462 alpha_map->alpha_to_trie_map = NULL;
463 error_alpha_map_not_created:
468 * @brief Add a range to alphabet map
470 * @param alpha_map : the alphabet map object
471 * @param begin : the first character of the range
472 * @param end : the last character of the range
474 * @return 0 on success, non-zero on failure
476 * Add a range of character codes from @a begin to @a end to the
480 alpha_map_add_range (AlphaMap *alpha_map, AlphaChar begin, AlphaChar end)
482 int res = alpha_map_add_range_only (alpha_map, begin, end);
485 return alpha_map_recalc_work_area (alpha_map);
489 alpha_map_char_to_trie (const AlphaMap *alpha_map, AlphaChar ac)
491 TrieIndex alpha_begin;
493 if (UNLIKELY (0 == ac))
496 if (UNLIKELY (!alpha_map->alpha_to_trie_map))
497 return TRIE_INDEX_MAX;
499 alpha_begin = alpha_map->alpha_begin;
500 if (alpha_begin <= ac && ac <= alpha_map->alpha_end)
502 return alpha_map->alpha_to_trie_map[ac - alpha_begin];
505 return TRIE_INDEX_MAX;
509 alpha_map_trie_to_char (const AlphaMap *alpha_map, TrieChar tc)
511 if (tc < alpha_map->trie_map_sz)
512 return alpha_map->trie_to_alpha_map[tc];
514 return ALPHA_CHAR_ERROR;
518 alpha_map_char_to_trie_str (const AlphaMap *alpha_map, const AlphaChar *str)
520 TrieChar *trie_str, *p;
522 trie_str = (TrieChar *) malloc (alpha_char_strlen (str) + 1);
523 if (UNLIKELY (!trie_str))
526 for (p = trie_str; *str; p++, str++) {
527 TrieIndex tc = alpha_map_char_to_trie (alpha_map, *str);
528 if (TRIE_INDEX_MAX == tc)
529 goto error_str_allocated;
542 alpha_map_trie_to_char_str (const AlphaMap *alpha_map, const TrieChar *str)
544 AlphaChar *alpha_str, *p;
546 alpha_str = (AlphaChar *) malloc ((strlen ((const char *)str) + 1)
547 * sizeof (AlphaChar));
548 if (UNLIKELY (!alpha_str))
551 for (p = alpha_str; *str; p++, str++) {
552 *p = (AlphaChar) alpha_map_trie_to_char (alpha_map, *str);