From 97fab079a7b1d5fbe23b5673e15d1a7f8033d5dd Mon Sep 17 00:00:00 2001 From: DongHun Kwak Date: Wed, 11 Oct 2017 17:34:33 +0900 Subject: [PATCH] Imported Upstream version 0.2.8 Change-Id: Idd951c52270d7d824bae04e38c5f2254b7733753 Signed-off-by: DongHun Kwak --- ChangeLog | 110 +++++++++++++++++++++++++++++++++++++++++++++ NEWS | 10 +++++ configure.ac | 4 +- datrie/alpha-map-private.h | 2 +- datrie/alpha-map.c | 21 +++++++-- datrie/alpha-map.h | 19 ++++++++ datrie/darray.c | 14 +++--- datrie/trie.c | 73 +++++++++++++++++------------- datrie/trie.h | 40 +++++++++++++++++ datrie/triedefs.h | 7 +-- tests/Makefile.am | 8 ++++ tests/test_iterator.c | 1 + tests/test_nonalpha.c | 92 +++++++++++++++++++++++++++++++++++++ tests/test_walk.c | 34 +++++++------- tests/utils.c | 80 ++++++++++++++++----------------- 15 files changed, 410 insertions(+), 105 deletions(-) create mode 100644 tests/test_nonalpha.c diff --git a/ChangeLog b/ChangeLog index 50fb04f..b37fa3d 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,113 @@ +2014-01-10 Theppitak Karoonboonyanan + + * configure.ac: + - Bump library revision to reflect code changes. + + * NEWS, configure.ac: + === Version 0.2.8 === + +2014-01-09 Theppitak Karoonboonyanan + + Improve documentation. + + * datrie/triedefs.h: + - Refine descriptions of data types. + * datrie/trie.c (trie_iterator_new): + - Fix typo on trie_root() mentioning. + * datrie/trie.c (trie_store, trie_store_if_absent): + - Adjust wording. + * datrie/alpha-map.h, datrie/trie.h: + - Add detailed description of AlphaMap and Trie types. + +2014-01-08 Theppitak Karoonboonyanan + + Clarify message in test_nonalpha. + + * tests/test_nonalpha.c (main): + - Clarify message on false key duplication. + +2014-01-08 Theppitak Karoonboonyanan + + Add test on keys with non-alphabet input chars. + + * tests/Makefile.am, +tests/test_nonalpha.c: + - Add test to ensure that operations on keys with non-alphabet + input chars fail. + +2014-01-08 Theppitak Karoonboonyanan + + Fail trie operations on non-alphabet inputs. + + alpha_map_char_to_trie() tried to return TRIE_CHAR_MAX to indicate + out-of-range error. But this value is indeed valid in trie operations. + Doing so could allow false key duplication when different non-alphabet + chars and TRIE_CHAR_MAX itself were all mapped to TRIE_CHAR_MAX. + So, let's fail all trie operations on non-alphabet input chars. + + * datrie/alpha-map-private.h, datrie/alpha-map.c + (alpha_map_char_to_trie): + - Make alpha_map_char_to_trie return TrieIndex type, using + TRIE_INDEX_MAX to indicate out-of-range error. + This allows TRIE_CHAR_MAX to be returned as a valid output. + * datrie/alpha-map.c (alpha_map_char_to_trie_str): + - Fail if alpha_map_char_to_trie() returns error code. + * datrie/trie.c (trie_retrieve, trie_store_conditionally, trie_delete, + trie_state_walk, trie_state_is_walkable): + - Check return value from alpha_map_char_to_trie() and return + failure status on error. + - Also cast TrieIndex return values to TrieChar on function calls. + + Thanks Naoki Youshinaga for the suggestion. + +2014-01-07 Theppitak Karoonboonyanan + + Check for NULL result from AlphaMap string funcs. + + * datrie/trie.c (trie_store_conditionally): + - Return failure on NULL alpha_map_char_to_trie_str(). + +2014-01-07 Theppitak Karoonboonyanan + + Return NULL on allocation errors in AlphaMap funcs. + + * datrie/alpha-map.c + (alpha_map_char_to_trie_str, alpha_map_trie_to_char_str): + - Return NULL on malloc() error. + +2014-01-03 Theppitak Karoonboonyanan + + Fix edge case with TRIE_CHAR_MAX as TrieChar. + + The trie input char with value TRIE_CHAR_MAX (255), was always + skipped by double-array algorithms. Let's include it. + + * datrie/darray.c (da_has_children, da_output_symbols, + da_relocate_base, da_first_separate, da_next_separate): + - Include the last char in trie char iterations. + + * datrie/darray.c (da_first_separate, da_next_separate): + - Declare characters as TrieIndex type instead of TrieChar, + to prevent infinite loop due to unsigned char overflow. + + Thanks Naoki Youshinaga for the report, test case, and analysis. + +2013-10-25 Theppitak Karoonboonyanan + + Fix comiler warnings in tests. + + * tests/test_walk.c (main): + - Remove unused var i; + - Remove extra printf() args. + * tests/test_iterator.c: + - Add missing #include for free(). + * tests/test_walk.c (walk_dict), tests/utils.c (dict_src): + - Cast string literals to (AlphaChar *) to fix signedness + differences. + +2013-10-25 Theppitak Karoonboonyanan + + * configure.ac: Post-release version suffix added. + 2013-10-22 Theppitak Karoonboonyanan * NEWS, configure.ac: diff --git a/NEWS b/NEWS index 5570568..bc3b9e5 100644 --- a/NEWS +++ b/NEWS @@ -1,5 +1,15 @@ libdatrie +0.2.8 (2014-01-10) +===== +- Fix compiler warnings in test suites. +- Fix edge-case error on alphabet set of size 255. + (Thanks Naoki Youshinaga for the report, test case, and analysis.) +- Fail trie operations on non-alphabet inputs, rather than silently allowing + them to sneak in as false keys. + (Thanks Naoki Youshinaga for the suggestion.) +- Improved documentation. + 0.2.7.1 (2013-10-22) ======= - Bump library versioning to reflect API addition. diff --git a/configure.ac b/configure.ac index ec56c43..82d83d5 100644 --- a/configure.ac +++ b/configure.ac @@ -2,7 +2,7 @@ # Process this file with autoconf to produce a configure script. AC_PREREQ(2.59) -AC_INIT(libdatrie, 0.2.7.1, thep@linux.thai.net) +AC_INIT(libdatrie, 0.2.8, thep@linux.thai.net) AC_CONFIG_SRCDIR([datrie/trie.h]) AC_CONFIG_HEADER([config.h]) AC_CONFIG_MACRO_DIR([m4]) @@ -14,7 +14,7 @@ AM_INIT_AUTOMAKE(dist-xz no-dist-gzip) # Interfaces added: CURRENT++ REVISION=0 AGE++ # Interfaces changed/removed: CURRENT++ REVISION=0 AGE=0 LT_CURRENT=4 -LT_REVISION=0 +LT_REVISION=1 LT_AGE=3 AC_SUBST(LT_CURRENT) AC_SUBST(LT_REVISION) diff --git a/datrie/alpha-map-private.h b/datrie/alpha-map-private.h index d3aceb4..0848ad7 100644 --- a/datrie/alpha-map-private.h +++ b/datrie/alpha-map-private.h @@ -34,7 +34,7 @@ AlphaMap * alpha_map_fread_bin (FILE *file); int alpha_map_fwrite_bin (const AlphaMap *alpha_map, FILE *file); -TrieChar alpha_map_char_to_trie (const AlphaMap *alpha_map, +TrieIndex alpha_map_char_to_trie (const AlphaMap *alpha_map, AlphaChar ac); AlphaChar alpha_map_trie_to_char (const AlphaMap *alpha_map, diff --git a/datrie/alpha-map.c b/datrie/alpha-map.c index f25604f..1a8fae9 100644 --- a/datrie/alpha-map.c +++ b/datrie/alpha-map.c @@ -369,10 +369,10 @@ alpha_map_add_range (AlphaMap *alpha_map, AlphaChar begin, AlphaChar end) return 0; } -TrieChar +TrieIndex alpha_map_char_to_trie (const AlphaMap *alpha_map, AlphaChar ac) { - TrieChar alpha_begin; + TrieIndex alpha_begin; AlphaRange *range; if (0 == ac) @@ -386,7 +386,7 @@ alpha_map_char_to_trie (const AlphaMap *alpha_map, AlphaChar ac) alpha_begin += range->end - range->begin + 1; } - return TRIE_CHAR_MAX; + return TRIE_INDEX_MAX; } AlphaChar @@ -415,12 +415,22 @@ alpha_map_char_to_trie_str (const AlphaMap *alpha_map, const AlphaChar *str) TrieChar *trie_str, *p; trie_str = (TrieChar *) malloc (alpha_char_strlen (str) + 1); + if (!trie_str) + return NULL; + for (p = trie_str; *str; p++, str++) { - *p = alpha_map_char_to_trie (alpha_map, *str); + TrieIndex tc = alpha_map_char_to_trie (alpha_map, *str); + if (TRIE_INDEX_MAX == tc) + goto error_str_allocated; + *p = (TrieChar) tc; } *p = 0; return trie_str; + +error_str_allocated: + free (trie_str); + return NULL; } AlphaChar * @@ -430,6 +440,9 @@ alpha_map_trie_to_char_str (const AlphaMap *alpha_map, const TrieChar *str) alpha_str = (AlphaChar *) malloc ((strlen ((const char *)str) + 1) * sizeof (AlphaChar)); + if (!alpha_str) + return NULL; + for (p = alpha_str; *str; p++, str++) { *p = (AlphaChar) alpha_map_trie_to_char (alpha_map, *str); } diff --git a/datrie/alpha-map.h b/datrie/alpha-map.h index 7246160..8c34ad5 100644 --- a/datrie/alpha-map.h +++ b/datrie/alpha-map.h @@ -39,6 +39,25 @@ extern "C" { /** * @file alpha-map.h * @brief AlphaMap data type and functions + * + * AlphaMap is a mapping between AlphaChar and TrieChar. AlphaChar is the + * alphabet character used in words of a target language, while TrieChar + * is a small integer with packed range of values and is actually used in + * trie state transition calculations. + * + * Since double-array trie relies on sparse state transition table, + * a small set of input characters can make the table small, i.e. with + * small number of columns. But in real life, alphabet characters can be + * of non-continuous range of values. The unused slots between them can + * waste the space in the table, and can increase the chance of unused + * array cells. + * + * AlphaMap is thus defined for mapping between non-continuous ranges of + * values of AlphaChar and packed and continuous range of Triechar. + * + * In this implementation, TrieChar is defined as a single-byte integer, + * which means the largest AlphaChar set that is supported is of 255 + * values, as the special value of 0 is reserved for null-termination code. */ /** diff --git a/datrie/darray.c b/datrie/darray.c index 8d976a6..c07712a 100644 --- a/datrie/darray.c +++ b/datrie/darray.c @@ -499,7 +499,7 @@ da_has_children (const DArray *d, return FALSE; max_c = MIN_VAL (TRIE_CHAR_MAX, d->num_cells - base); - for (c = 0; c < max_c; c++) { + for (c = 0; c <= max_c; c++) { if (da_get_check (d, base + c) == s) return TRUE; } @@ -519,7 +519,7 @@ da_output_symbols (const DArray *d, base = da_get_base (d, s); max_c = MIN_VAL (TRIE_CHAR_MAX, d->num_cells - base); - for (c = 0; c < max_c; c++) { + for (c = 0; c <= max_c; c++) { if (da_get_check (d, base + c) == s) symbols_add_fast (syms, (TrieChar) c); } @@ -617,7 +617,7 @@ da_relocate_base (DArray *d, TrieIndex c, max_c; max_c = MIN_VAL (TRIE_CHAR_MAX, d->num_cells - old_next_base); - for (c = 0; c < max_c; c++) { + for (c = 0; c <= max_c; c++) { if (da_get_check (d, old_next_base + c) == old_next) da_set_check (d, old_next_base + c, new_next); } @@ -765,11 +765,11 @@ TrieIndex da_first_separate (DArray *d, TrieIndex root, TrieString *keybuff) { TrieIndex base; - TrieChar c, max_c; + TrieIndex c, max_c; while ((base = da_get_base (d, root)) >= 0) { max_c = MIN_VAL (TRIE_CHAR_MAX, d->num_cells - base); - for (c = 0; c < max_c; c++) { + for (c = 0; c <= max_c; c++) { if (da_get_check (d, base + c) == root) break; } @@ -811,7 +811,7 @@ da_next_separate (DArray *d, TrieIndex root, TrieIndex sep, TrieString *keybuff) { TrieIndex parent; TrieIndex base; - TrieChar c, max_c; + TrieIndex c, max_c; while (sep != root) { parent = da_get_check (d, sep); @@ -822,7 +822,7 @@ da_next_separate (DArray *d, TrieIndex root, TrieIndex sep, TrieString *keybuff) /* find next sibling of sep */ max_c = MIN_VAL (TRIE_CHAR_MAX, d->num_cells - base); - while (++c < max_c) { + while (++c <= max_c) { if (da_get_check (d, base + c) == parent) { trie_string_append_char (keybuff, c); return da_first_separate (d, base + c, keybuff); diff --git a/datrie/trie.c b/datrie/trie.c index ae3ff25..17699d3 100644 --- a/datrie/trie.c +++ b/datrie/trie.c @@ -333,11 +333,11 @@ trie_retrieve (const Trie *trie, const AlphaChar *key, TrieData *o_data) /* walk through branches */ s = da_get_root (trie->da); for (p = key; !trie_da_is_separate (trie->da, s); p++) { - if (!da_walk (trie->da, &s, - alpha_map_char_to_trie (trie->alpha_map, *p))) - { + TrieIndex tc = alpha_map_char_to_trie (trie->alpha_map, *p); + if (TRIE_INDEX_MAX == tc) + return FALSE; + if (!da_walk (trie->da, &s, (TrieChar) tc)) return FALSE; - } if (0 == *p) break; } @@ -346,11 +346,11 @@ trie_retrieve (const Trie *trie, const AlphaChar *key, TrieData *o_data) s = trie_da_get_tail_index (trie->da, s); suffix_idx = 0; for ( ; ; p++) { - if (!tail_walk_char (trie->tail, s, &suffix_idx, - alpha_map_char_to_trie (trie->alpha_map, *p))) - { + TrieIndex tc = alpha_map_char_to_trie (trie->alpha_map, *p); + if (TRIE_INDEX_MAX == tc) + return FALSE; + if (!tail_walk_char (trie->tail, s, &suffix_idx, (TrieChar) tc)) return FALSE; - } if (0 == *p) break; } @@ -368,7 +368,7 @@ trie_retrieve (const Trie *trie, const AlphaChar *key, TrieData *o_data) * @param key : the key for the entry to retrieve * @param data : the data associated to the entry * - * @return boolean value indicating the success of the process + * @return boolean value indicating the success of the operation * * Store a @a data for the given @a key in @a trie. If @a key does not * exist in @a trie, it will be appended. If it does, its current data will @@ -387,7 +387,7 @@ trie_store (Trie *trie, const AlphaChar *key, TrieData data) * @param key : the key for the entry to retrieve * @param data : the data associated to the entry * - * @return boolean value indicating the success of the process + * @return boolean value indicating the success of the operation * * Store a @a data for the given @a key in @a trie. If @a key does not * exist in @a trie, it will be appended. If it does, the function will @@ -417,13 +417,16 @@ trie_store_conditionally (Trie *trie, /* walk through branches */ s = da_get_root (trie->da); for (p = key; !trie_da_is_separate (trie->da, s); p++) { - if (!da_walk (trie->da, &s, - alpha_map_char_to_trie (trie->alpha_map, *p))) - { + TrieIndex tc = alpha_map_char_to_trie (trie->alpha_map, *p); + if (TRIE_INDEX_MAX == tc) + return FALSE; + if (!da_walk (trie->da, &s, (TrieChar) tc)) { TrieChar *key_str; Bool res; key_str = alpha_map_char_to_trie_str (trie->alpha_map, p); + if (!key_str) + return FALSE; res = trie_branch_in_branch (trie, s, key_str, data); free (key_str); @@ -438,13 +441,16 @@ trie_store_conditionally (Trie *trie, t = trie_da_get_tail_index (trie->da, s); suffix_idx = 0; for ( ; ; p++) { - if (!tail_walk_char (trie->tail, t, &suffix_idx, - alpha_map_char_to_trie (trie->alpha_map, *p))) - { + TrieIndex tc = alpha_map_char_to_trie (trie->alpha_map, *p); + if (TRIE_INDEX_MAX == tc) + return FALSE; + if (!tail_walk_char (trie->tail, t, &suffix_idx, (TrieChar) tc)) { TrieChar *tail_str; Bool res; tail_str = alpha_map_char_to_trie_str (trie->alpha_map, sep); + if (!tail_str) + return FALSE; res = trie_branch_in_tail (trie, s, tail_str, data); free (tail_str); @@ -547,11 +553,11 @@ trie_delete (Trie *trie, const AlphaChar *key) /* walk through branches */ s = da_get_root (trie->da); for (p = key; !trie_da_is_separate (trie->da, s); p++) { - if (!da_walk (trie->da, &s, - alpha_map_char_to_trie (trie->alpha_map, *p))) - { + TrieIndex tc = alpha_map_char_to_trie (trie->alpha_map, *p); + if (TRIE_INDEX_MAX == tc) + return FALSE; + if (!da_walk (trie->da, &s, (TrieChar) tc)) return FALSE; - } if (0 == *p) break; } @@ -560,11 +566,11 @@ trie_delete (Trie *trie, const AlphaChar *key) t = trie_da_get_tail_index (trie->da, s); suffix_idx = 0; for ( ; ; p++) { - if (!tail_walk_char (trie->tail, t, &suffix_idx, - alpha_map_char_to_trie (trie->alpha_map, *p))) - { + TrieIndex tc = alpha_map_char_to_trie (trie->alpha_map, *p); + if (TRIE_INDEX_MAX == tc) + return FALSE; + if (!tail_walk_char (trie->tail, t, &suffix_idx, (TrieChar) tc)) return FALSE; - } if (0 == *p) break; } @@ -742,12 +748,14 @@ trie_state_rewind (TrieState *s) Bool trie_state_walk (TrieState *s, AlphaChar c) { - TrieChar tc = alpha_map_char_to_trie (s->trie->alpha_map, c); + TrieIndex tc = alpha_map_char_to_trie (s->trie->alpha_map, c); + if (TRIE_INDEX_MAX == tc) + return FALSE; if (!s->is_suffix) { Bool ret; - ret = da_walk (s->trie->da, &s->index, tc); + ret = da_walk (s->trie->da, &s->index, (TrieChar) tc); if (ret && trie_da_is_separate (s->trie->da, s->index)) { s->index = trie_da_get_tail_index (s->trie->da, s->index); @@ -757,7 +765,8 @@ trie_state_walk (TrieState *s, AlphaChar c) return ret; } else { - return tail_walk_char (s->trie->tail, s->index, &s->suffix_idx, tc); + return tail_walk_char (s->trie->tail, s->index, &s->suffix_idx, + (TrieChar) tc); } } @@ -774,13 +783,15 @@ trie_state_walk (TrieState *s, AlphaChar c) Bool trie_state_is_walkable (const TrieState *s, AlphaChar c) { - TrieChar tc = alpha_map_char_to_trie (s->trie->alpha_map, c); + TrieIndex tc = alpha_map_char_to_trie (s->trie->alpha_map, c); + if (TRIE_INDEX_MAX == tc) + return FALSE; if (!s->is_suffix) - return da_is_walkable (s->trie->da, s->index, tc); + return da_is_walkable (s->trie->da, s->index, (TrieChar) tc); else return tail_is_walkable_char (s->trie->tail, s->index, s->suffix_idx, - tc); + (TrieChar) tc); } /** @@ -878,7 +889,7 @@ trie_state_get_data (const TrieState *s) * Create a new trie iterator for iterating entries of a sub-trie rooted at * state @a s. * - * Use it with the result of trie_get_root() to iterate the whole trie. + * Use it with the result of trie_root() to iterate the whole trie. * * The created object must be freed with trie_iterator_free(). * diff --git a/datrie/trie.h b/datrie/trie.h index 16a2af6..2624ae5 100644 --- a/datrie/trie.h +++ b/datrie/trie.h @@ -37,6 +37,46 @@ extern "C" { /** * @file trie.h * @brief Trie data type and functions + * + * Trie is a kind of digital search tree, an efficient indexing method with + * O(1) time complexity for searching. Comparably as efficient as hashing, + * trie also provides flexibility on incremental matching and key spelling + * manipulation. This makes it ideal for lexical analyzers, as well as + * spelling dictionaries. + * + * This library is an implementation of double-array structure for representing + * trie, as proposed by Junichi Aoe. The details of the implementation can be + * found at http://linux.thai.net/~thep/datrie/datrie.html + * + * A Trie is associated with an AlphaMap, a map between actual alphabet + * characters and the raw character used to walk through trie. + * You can define the alphabet set by adding ranges of character codes + * to it before associating it to a trie. And the keys to be added to the trie + * must be only in such ranges. + * + * A new Trie can be created in memory using trie_new(), saved to file using + * trie_save(), and loaded later with trie_new_from_file(). + * It can even be embeded in another file using trie_fwrite() and read back + * using trie_fread(). + * After use, Trie objects must be freed using trie_free(). + * + * Operations on trie include: + * + * - Add/delete entries with trie_store() and trie_delete() + * - Retrieve entries with trie_retrieve() + * - Walk through trie stepwise with TrieState and its functions + * (trie_root(), trie_state_walk(), trie_state_rewind(), + * trie_state_clone(), trie_state_copy(), + * trie_state_is_walkable(), trie_state_walkable_chars(), + * trie_state_is_single(), trie_state_get_data(). + * And do not forget to free TrieState objects with trie_state_free() + * after use.) + * - Enumerate all keys using trie_enumerate() + * - Iterate entries using TrieIterator and its functions + * (trie_iterator_new(), trie_iterator_next(), trie_iterator_get_key(), + * trie_iterator_get_data(). + * And do not forget to free TrieIterator objects with trie_iterator_free() + * after use.) */ /** diff --git a/datrie/triedefs.h b/datrie/triedefs.h index 01a21cf..1579e2e 100644 --- a/datrie/triedefs.h +++ b/datrie/triedefs.h @@ -35,7 +35,7 @@ */ /** - * @brief Trie character type for alphabet + * @brief Alphabet character type for use as input/output strings of trie keys */ typedef uint32 AlphaChar; @@ -45,7 +45,8 @@ typedef uint32 AlphaChar; #define ALPHA_CHAR_ERROR (~(AlphaChar)0) /** - * @brief Trie character type for key + * @brief Raw character type mapped into packed set from AlphaChar, + * for use in actual trie transition calculations */ typedef unsigned char TrieChar; /** @@ -55,7 +56,7 @@ typedef unsigned char TrieChar; #define TRIE_CHAR_MAX 255 /** - * @brief Type of Trie index + * @brief Type of index into Trie double-array and tail structures */ typedef int32 TrieIndex; /** diff --git a/tests/Makefile.am b/tests/Makefile.am index 410f36b..9720c3d 100644 --- a/tests/Makefile.am +++ b/tests/Makefile.am @@ -7,6 +7,7 @@ TESTS = \ test_iterator \ test_store-retrieve \ test_file \ + test_nonalpha \ $(NULL) check_PROGRAMS = \ @@ -14,6 +15,7 @@ check_PROGRAMS = \ test_iterator \ test_store-retrieve \ test_file \ + test_nonalpha \ $(NULL) noinst_HEADERS = \ @@ -44,3 +46,9 @@ test_file_SOURCES = \ $(NULL) test_file_LDADD = $(top_builddir)/datrie/libdatrie.la +test_nonalpha_SOURCES = \ + test_nonalpha.c \ + utils.c \ + $(NULL) +test_nonalpha_LDADD = $(top_builddir)/datrie/libdatrie.la + diff --git a/tests/test_iterator.c b/tests/test_iterator.c index 21ab7ff..03fc6ed 100644 --- a/tests/test_iterator.c +++ b/tests/test_iterator.c @@ -27,6 +27,7 @@ #include #include "utils.h" #include +#include int main () diff --git a/tests/test_nonalpha.c b/tests/test_nonalpha.c new file mode 100644 index 0000000..06f2f54 --- /dev/null +++ b/tests/test_nonalpha.c @@ -0,0 +1,92 @@ +/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * libdatrie - Double-Array Trie Library + * Copyright (C) 2014 Theppitak Karoonboonyanan + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/* + * test_nonalpha.c - Test for datrie behaviors on non-alphabet inputs + * Created: 2014-01-06 + * Author: Theppitak Karoonboonyanan + */ + +#include +#include "utils.h" +#include + +const AlphaChar *nonalpha_src[] = { + (AlphaChar *)L"a6acus", + (AlphaChar *)L"a5acus", + NULL +}; + +int +main () +{ + Trie *test_trie; + DictRec *dict_p; + const AlphaChar **nonalpha_key; + TrieData trie_data; + Bool is_fail; + + msg_step ("Preparing trie"); + test_trie = en_trie_new (); + if (!test_trie) { + fprintf (stderr, "Fail to create test trie\n"); + goto err_trie_not_created; + } + + /* store */ + msg_step ("Adding data to trie"); + for (dict_p = dict_src; dict_p->key; dict_p++) { + if (!trie_store (test_trie, dict_p->key, dict_p->data)) { + printf ("Failed to add key '%ls', data %d.\n", + dict_p->key, dict_p->data); + goto err_trie_created; + } + } + + /* test storing keys with non-alphabet chars */ + is_fail = FALSE; + for (nonalpha_key = nonalpha_src; *nonalpha_key; nonalpha_key++) { + if (trie_retrieve (test_trie, *nonalpha_key, &trie_data)) { + printf ("False duplication on key '%ls', with existing data %d.\n", + *nonalpha_key, trie_data); + is_fail = TRUE; + } + if (trie_store (test_trie, *nonalpha_key, TRIE_DATA_UNREAD)) { + printf ("Wrongly added key '%ls' containing non-alphanet char\n", + *nonalpha_key); + is_fail = TRUE; + } + } + + if (is_fail) + goto err_trie_created; + + trie_free (test_trie); + return 0; + +err_trie_created: + trie_free (test_trie); +err_trie_not_created: + return 1; +} + +/* +vi:ts=4:ai:expandtab +*/ diff --git a/tests/test_walk.c b/tests/test_walk.c index 4a2783f..45f1119 100644 --- a/tests/test_walk.c +++ b/tests/test_walk.c @@ -45,13 +45,13 @@ * */ DictRec walk_dict[] = { - {L"pool", TRIE_DATA_UNREAD}, - {L"prize", TRIE_DATA_UNREAD}, - {L"preview", TRIE_DATA_UNREAD}, - {L"prepare", TRIE_DATA_UNREAD}, - {L"produce", TRIE_DATA_UNREAD}, - {L"progress", TRIE_DATA_UNREAD}, - {NULL, TRIE_DATA_ERROR}, + {(AlphaChar *)L"pool", TRIE_DATA_UNREAD}, + {(AlphaChar *)L"prize", TRIE_DATA_UNREAD}, + {(AlphaChar *)L"preview", TRIE_DATA_UNREAD}, + {(AlphaChar *)L"prepare", TRIE_DATA_UNREAD}, + {(AlphaChar *)L"produce", TRIE_DATA_UNREAD}, + {(AlphaChar *)L"progress", TRIE_DATA_UNREAD}, + {(AlphaChar *)NULL, TRIE_DATA_ERROR}, }; static Bool @@ -88,7 +88,7 @@ main () DictRec *dict_p; TrieState *s, *t, *u; AlphaChar walkables[ALPHABET_SIZE]; - int n, i; + int n; Bool is_failed; TrieData data; @@ -151,11 +151,11 @@ main () is_failed = TRUE; } if (!is_walkables_include (L'o', walkables, n)) { - printf ("Walkable chars do not include 'o'\n", n); + printf ("Walkable chars do not include 'o'\n"); is_failed = TRUE; } if (!is_walkables_include (L'r', walkables, n)) { - printf ("Walkable chars do not include 'r'\n", n); + printf ("Walkable chars do not include 'r'\n"); is_failed = TRUE; } if (is_failed) { @@ -228,15 +228,15 @@ main () is_failed = TRUE; } if (!is_walkables_include (L'e', walkables, n)) { - printf ("Walkable chars do not include 'e'\n", n); + printf ("Walkable chars do not include 'e'\n"); is_failed = TRUE; } if (!is_walkables_include (L'i', walkables, n)) { - printf ("Walkable chars do not include 'i'\n", n); + printf ("Walkable chars do not include 'i'\n"); is_failed = TRUE; } if (!is_walkables_include (L'o', walkables, n)) { - printf ("Walkable chars do not include 'o'\n", n); + printf ("Walkable chars do not include 'o'\n"); is_failed = TRUE; } if (is_failed) { @@ -305,11 +305,11 @@ main () is_failed = TRUE; } if (!is_walkables_include (L'p', walkables, n)) { - printf ("Walkable chars do not include 'p'\n", n); + printf ("Walkable chars do not include 'p'\n"); is_failed = TRUE; } if (!is_walkables_include (L'v', walkables, n)) { - printf ("Walkable chars do not include 'v'\n", n); + printf ("Walkable chars do not include 'v'\n"); is_failed = TRUE; } if (is_failed) { @@ -422,11 +422,11 @@ main () is_failed = TRUE; } if (!is_walkables_include (L'd', walkables, n)) { - printf ("Walkable chars do not include 'd'\n", n); + printf ("Walkable chars do not include 'd'\n"); is_failed = TRUE; } if (!is_walkables_include (L'g', walkables, n)) { - printf ("Walkable chars do not include 'g'\n", n); + printf ("Walkable chars do not include 'g'\n"); is_failed = TRUE; } if (is_failed) { diff --git a/tests/utils.c b/tests/utils.c index eeb0468..fc421ea 100644 --- a/tests/utils.c +++ b/tests/utils.c @@ -86,46 +86,46 @@ err_map_not_created: * Dict source for testing * *---------------------------*/ DictRec dict_src[] = { - {L"a", TRIE_DATA_UNREAD}, - {L"abacus", TRIE_DATA_UNREAD}, - {L"abandon", TRIE_DATA_UNREAD}, - {L"accident", TRIE_DATA_UNREAD}, - {L"accredit", TRIE_DATA_UNREAD}, - {L"algorithm", TRIE_DATA_UNREAD}, - {L"ammonia", TRIE_DATA_UNREAD}, - {L"angel", TRIE_DATA_UNREAD}, - {L"angle", TRIE_DATA_UNREAD}, - {L"azure", TRIE_DATA_UNREAD}, - {L"bat", TRIE_DATA_UNREAD}, - {L"bet", TRIE_DATA_UNREAD}, - {L"best", TRIE_DATA_UNREAD}, - {L"home", TRIE_DATA_UNREAD}, - {L"house", TRIE_DATA_UNREAD}, - {L"hut", TRIE_DATA_UNREAD}, - {L"king", TRIE_DATA_UNREAD}, - {L"kite", TRIE_DATA_UNREAD}, - {L"name", TRIE_DATA_UNREAD}, - {L"net", TRIE_DATA_UNREAD}, - {L"network", TRIE_DATA_UNREAD}, - {L"nut", TRIE_DATA_UNREAD}, - {L"nutshell", TRIE_DATA_UNREAD}, - {L"quality", TRIE_DATA_UNREAD}, - {L"quantum", TRIE_DATA_UNREAD}, - {L"quantity", TRIE_DATA_UNREAD}, - {L"quartz", TRIE_DATA_UNREAD}, - {L"quick", TRIE_DATA_UNREAD}, - {L"quiz", TRIE_DATA_UNREAD}, - {L"run", TRIE_DATA_UNREAD}, - {L"tape", TRIE_DATA_UNREAD}, - {L"test", TRIE_DATA_UNREAD}, - {L"what", TRIE_DATA_UNREAD}, - {L"when", TRIE_DATA_UNREAD}, - {L"where", TRIE_DATA_UNREAD}, - {L"which", TRIE_DATA_UNREAD}, - {L"who", TRIE_DATA_UNREAD}, - {L"why", TRIE_DATA_UNREAD}, - {L"zebra", TRIE_DATA_UNREAD}, - {NULL, TRIE_DATA_ERROR}, + {(AlphaChar *)L"a", TRIE_DATA_UNREAD}, + {(AlphaChar *)L"abacus", TRIE_DATA_UNREAD}, + {(AlphaChar *)L"abandon", TRIE_DATA_UNREAD}, + {(AlphaChar *)L"accident", TRIE_DATA_UNREAD}, + {(AlphaChar *)L"accredit", TRIE_DATA_UNREAD}, + {(AlphaChar *)L"algorithm", TRIE_DATA_UNREAD}, + {(AlphaChar *)L"ammonia", TRIE_DATA_UNREAD}, + {(AlphaChar *)L"angel", TRIE_DATA_UNREAD}, + {(AlphaChar *)L"angle", TRIE_DATA_UNREAD}, + {(AlphaChar *)L"azure", TRIE_DATA_UNREAD}, + {(AlphaChar *)L"bat", TRIE_DATA_UNREAD}, + {(AlphaChar *)L"bet", TRIE_DATA_UNREAD}, + {(AlphaChar *)L"best", TRIE_DATA_UNREAD}, + {(AlphaChar *)L"home", TRIE_DATA_UNREAD}, + {(AlphaChar *)L"house", TRIE_DATA_UNREAD}, + {(AlphaChar *)L"hut", TRIE_DATA_UNREAD}, + {(AlphaChar *)L"king", TRIE_DATA_UNREAD}, + {(AlphaChar *)L"kite", TRIE_DATA_UNREAD}, + {(AlphaChar *)L"name", TRIE_DATA_UNREAD}, + {(AlphaChar *)L"net", TRIE_DATA_UNREAD}, + {(AlphaChar *)L"network", TRIE_DATA_UNREAD}, + {(AlphaChar *)L"nut", TRIE_DATA_UNREAD}, + {(AlphaChar *)L"nutshell", TRIE_DATA_UNREAD}, + {(AlphaChar *)L"quality", TRIE_DATA_UNREAD}, + {(AlphaChar *)L"quantum", TRIE_DATA_UNREAD}, + {(AlphaChar *)L"quantity", TRIE_DATA_UNREAD}, + {(AlphaChar *)L"quartz", TRIE_DATA_UNREAD}, + {(AlphaChar *)L"quick", TRIE_DATA_UNREAD}, + {(AlphaChar *)L"quiz", TRIE_DATA_UNREAD}, + {(AlphaChar *)L"run", TRIE_DATA_UNREAD}, + {(AlphaChar *)L"tape", TRIE_DATA_UNREAD}, + {(AlphaChar *)L"test", TRIE_DATA_UNREAD}, + {(AlphaChar *)L"what", TRIE_DATA_UNREAD}, + {(AlphaChar *)L"when", TRIE_DATA_UNREAD}, + {(AlphaChar *)L"where", TRIE_DATA_UNREAD}, + {(AlphaChar *)L"which", TRIE_DATA_UNREAD}, + {(AlphaChar *)L"who", TRIE_DATA_UNREAD}, + {(AlphaChar *)L"why", TRIE_DATA_UNREAD}, + {(AlphaChar *)L"zebra", TRIE_DATA_UNREAD}, + {(AlphaChar *)NULL, TRIE_DATA_ERROR}, }; int -- 2.7.4