+2015-10-20 Theppitak Karoonboonyanan <theppitak@gmail.com>
+
+ * configure.ac:
+ - Bump library revision to reflect code changes.
+
+ * NEWS, configure.ac:
+ === Version 0.2.10 ===
+
+2015-10-13 Theppitak Karoonboonyanan <theppitak@gmail.com>
+
+ Optimize AlphaMap mapping.
+
+ alpha_map_char_to_trie() is called everywhere before trie state
+ transition. It's an important bottleneck.
+
+ We won't change the persistent AlphaMap structure, but will add
+ pre-calculated lookup tables for use at run-time.
+
+ * datrie/alpha-map.c (struct _AlphaMap):
+ - Add members for alpha-to-trie and trie-to-alpha lookup tables.
+ * datrie/alpha-map.c (alpha_map_new, alpha_map_free):
+ - Initialize & free the tables properly.
+ * datrie/alpha-map.c (alpha_map_add_range -> alpha_map_add_range_only
+ + alpha_map_recalc_work_area):
+ - Split alpha_map_add_range() API into two parts: adding the range
+ as usual and recalculate the lookup tables.
+ * datrie/alpha-map.c (alpha_map_clone, alpha_map_fread_bin):
+ - Call alpha_map_add_range_only() repeatedly before calling
+ alpha_map_recalc_work_area() once.
+ * datrie/alpha-map.c (alpha_map_char_to_trie, alpha_map_trie_to_char):
+ - Look up the pre-calculated tables instead of calculating on
+ every call.
+
+ This appears to save time by 14.6% for total alpha_char_to_trie()
+ calls and even lower its bottleneck rank by 1 rank on a libthai
+ test case. It reduces 0.2% run time of the total libthai test case.
+
+ Note that the time saved would be even more in case of multiple
+ uncontinuous alphabet ranges, at the expense of more memory used.
+
+2015-08-18 Theppitak Karoonboonyanan <theppitak@gmail.com>
+
+ Fix doxygen version checking.
+
+ * configure.ac:
+ - Correctly compare doxygen versions. Simple expr comparison
+ didn't work with version 1.8.10.
+
+ Thanks Petr Gajdos <pgajdos@suse.cz> for the patch.
+
+2015-06-24 Theppitak Karoonboonyanan <theppitak@gmail.com>
+
+ * datrie/tail.c (tail_set_suffix):
+ - Catch strdup() failure.
+
+2015-06-24 Theppitak Karoonboonyanan <theppitak@gmail.com>
+
+ * configure.ac: Post-release version suffix added.
+
2015-05-03 Theppitak Karoonboonyanan <theppitak@gmail.com>
* NEWS, configure.ac:
# Process this file with autoconf to produce a configure script.
AC_PREREQ(2.59)
-AC_INIT(libdatrie, 0.2.9, theppitak@gmail.com)
+AC_INIT(libdatrie, 0.2.10, theppitak@gmail.com)
AC_CONFIG_SRCDIR([datrie/trie.h])
AC_CONFIG_HEADER([config.h])
AC_CONFIG_MACRO_DIR([m4])
# Interfaces added: CURRENT++ REVISION=0 AGE++
# Interfaces changed/removed: CURRENT++ REVISION=0 AGE=0
LT_CURRENT=4
-LT_REVISION=2
+LT_REVISION=3
LT_AGE=3
AC_SUBST(LT_CURRENT)
AC_SUBST(LT_REVISION)
else
AC_MSG_CHECKING([doxygen >= $DOXYGEN_REQ_VER])
DOXYGEN_VER=$($DOXYGEN --version)
- if expr $DOXYGEN_VER \< $DOXYGEN_REQ_VER > /dev/null; then
- AC_MSG_RESULT([$DOXYGEN_VER, no, documentation disabled])
- enable_doxygen_doc="no"
- else
- AC_MSG_RESULT([$DOXYGEN_VER, yes])
- fi
+ AX_COMPARE_VERSION([$DOXYGEN_VER],[ge],[DOXYGEN_REQ_VER],
+ [AC_MSG_RESULT([$DOXYGEN_VER, yes])],
+ [AC_MSG_RESULT([$DOXYGEN_VER, no, documentation disabled]); enable_doxygen_doc="no"])
fi
fi
struct _AlphaMap {
AlphaRange *first_range;
+
+ /* work area */
+ /* alpha-to-trie map */
+ AlphaChar alpha_begin;
+ AlphaChar alpha_end;
+ int alpha_map_sz;
+ TrieIndex *alpha_to_trie_map;
+
+ /* trie-to-alpha map */
+ int trie_map_sz;
+ AlphaChar *trie_to_alpha_map;
};
/*-----------------------------------*
* PRIVATE METHODS DECLARATIONS *
*-----------------------------------*/
static int alpha_map_get_total_ranges (const AlphaMap *alpha_map);
+static int alpha_map_add_range_only (AlphaMap *alpha_map,
+ AlphaChar begin, AlphaChar end);
+static int alpha_map_recalc_work_area (AlphaMap *alpha_map);
/*-----------------------------*
* METHODS IMPLEMENTAIONS *
alpha_map->first_range = NULL;
+ /* work area */
+ alpha_map->alpha_begin = 0;
+ alpha_map->alpha_end = 0;
+ alpha_map->alpha_map_sz = 0;
+ alpha_map->alpha_to_trie_map = NULL;
+
+ alpha_map->trie_map_sz = 0;
+ alpha_map->trie_to_alpha_map = NULL;
+
return alpha_map;
}
return NULL;
for (range = a_map->first_range; range; range = range->next) {
- if (alpha_map_add_range (alpha_map, range->begin, range->end) != 0) {
- alpha_map_free (alpha_map);
- return NULL;
- }
+ if (alpha_map_add_range_only (alpha_map, range->begin, range->end) != 0)
+ goto exit_map_created;
}
+ if (alpha_map_recalc_work_area (alpha_map) != 0)
+ goto exit_map_created;
+
return alpha_map;
+
+exit_map_created:
+ alpha_map_free (alpha_map);
+ return NULL;
}
/**
p = q;
}
+ /* work area */
+ if (alpha_map->alpha_to_trie_map)
+ free (alpha_map->alpha_to_trie_map);
+ if (alpha_map->trie_to_alpha_map)
+ free (alpha_map->trie_to_alpha_map);
+
free (alpha_map);
}
if (!file_read_int32 (file, &b) || !file_read_int32 (file, &e))
goto exit_map_created;
- alpha_map_add_range (alpha_map, b, e);
+ alpha_map_add_range_only (alpha_map, b, e);
}
+ /* work area */
+ if (UNLIKELY (alpha_map_recalc_work_area (alpha_map) != 0))
+ goto exit_map_created;
+
return alpha_map;
exit_map_created:
return 0;
}
-/**
- * @brief Add a range to alphabet map
- *
- * @param alpha_map : the alphabet map object
- * @param begin : the first character of the range
- * @param end : the last character of the range
- *
- * @return 0 on success, non-zero on failure
- *
- * Add a range of character codes from @a begin to @a end to the
- * alphabet set.
- */
-int
-alpha_map_add_range (AlphaMap *alpha_map, AlphaChar begin, AlphaChar end)
+static int
+alpha_map_add_range_only (AlphaMap *alpha_map, AlphaChar begin, AlphaChar end)
{
AlphaRange *q, *r, *begin_node, *end_node;
return 0;
}
+static int
+alpha_map_recalc_work_area (AlphaMap *alpha_map)
+{
+ AlphaRange *range;
+
+ /* free old existing map */
+ if (alpha_map->alpha_to_trie_map) {
+ free (alpha_map->alpha_to_trie_map);
+ alpha_map->alpha_to_trie_map = NULL;
+ }
+ if (alpha_map->trie_to_alpha_map) {
+ free (alpha_map->trie_to_alpha_map);
+ alpha_map->trie_to_alpha_map = NULL;
+ }
+
+ range = alpha_map->first_range;
+ if (range) {
+ const AlphaChar alpha_begin = range->begin;
+ int n_cells, i;
+ AlphaChar a;
+ TrieChar trie_last = 0;
+ TrieChar tc;
+
+ /* reconstruct alpha-to-trie map */
+ alpha_map->alpha_begin = alpha_begin;
+ while (range->next) {
+ range = range->next;
+ }
+ alpha_map->alpha_end = range->end;
+ alpha_map->alpha_map_sz = n_cells = range->end - alpha_begin + 1;
+ alpha_map->alpha_to_trie_map
+ = (TrieIndex *) malloc (n_cells * sizeof (TrieIndex));
+ if (UNLIKELY (!alpha_map->alpha_to_trie_map))
+ goto error_alpha_map_not_created;
+ for (i = 0; i < n_cells; i++) {
+ alpha_map->alpha_to_trie_map[i] = TRIE_INDEX_MAX;
+ }
+ for (range = alpha_map->first_range; range; range = range->next) {
+ for (a = range->begin; a <= range->end; a++) {
+ alpha_map->alpha_to_trie_map[a - alpha_begin] = ++trie_last;
+ }
+ }
+
+ /* reconstruct trie-to-alpha map */
+ alpha_map->trie_map_sz = n_cells = trie_last + 1;
+ alpha_map->trie_to_alpha_map
+ = (AlphaChar *) malloc (n_cells * sizeof (AlphaChar));
+ if (UNLIKELY (!alpha_map->trie_to_alpha_map))
+ goto error_alpha_map_created;
+ alpha_map->trie_to_alpha_map[0] = 0;
+ tc = 1;
+ for (range = alpha_map->first_range; range; range = range->next) {
+ for (a = range->begin; a <= range->end; a++) {
+ alpha_map->trie_to_alpha_map[tc++] = a;
+ }
+ }
+ }
+
+ return 0;
+
+error_alpha_map_created:
+ free (alpha_map->alpha_to_trie_map);
+ alpha_map->alpha_to_trie_map = NULL;
+error_alpha_map_not_created:
+ return -1;
+}
+
+/**
+ * @brief Add a range to alphabet map
+ *
+ * @param alpha_map : the alphabet map object
+ * @param begin : the first character of the range
+ * @param end : the last character of the range
+ *
+ * @return 0 on success, non-zero on failure
+ *
+ * Add a range of character codes from @a begin to @a end to the
+ * alphabet set.
+ */
+int
+alpha_map_add_range (AlphaMap *alpha_map, AlphaChar begin, AlphaChar end)
+{
+ int res = alpha_map_add_range_only (alpha_map, begin, end);
+ if (res != 0)
+ return res;
+ return alpha_map_recalc_work_area (alpha_map);
+}
+
TrieIndex
alpha_map_char_to_trie (const AlphaMap *alpha_map, AlphaChar ac)
{
TrieIndex alpha_begin;
- AlphaRange *range;
if (UNLIKELY (0 == ac))
return 0;
- alpha_begin = 1;
- for (range = alpha_map->first_range; range; range = range->next) {
- if (range->begin <= ac && ac <= range->end)
- return alpha_begin + (ac - range->begin);
+ if (UNLIKELY (!alpha_map->alpha_to_trie_map))
+ return TRIE_INDEX_MAX;
- alpha_begin += range->end - range->begin + 1;
+ alpha_begin = alpha_map->alpha_begin;
+ if (alpha_begin <= ac && ac <= alpha_map->alpha_end)
+ {
+ return alpha_map->alpha_to_trie_map[ac - alpha_begin];
}
return TRIE_INDEX_MAX;
AlphaChar
alpha_map_trie_to_char (const AlphaMap *alpha_map, TrieChar tc)
{
- TrieChar alpha_begin;
- AlphaRange *range;
-
- if (UNLIKELY (0 == tc))
- return 0;
-
- alpha_begin = 1;
- for (range = alpha_map->first_range; range; range = range->next) {
- if (tc <= alpha_begin + (range->end - range->begin))
- return range->begin + (tc - alpha_begin);
-
- alpha_begin += range->end - range->begin + 1;
- }
+ if (tc < alpha_map->trie_map_sz)
+ return alpha_map->trie_to_alpha_map[tc];
return ALPHA_CHAR_ERROR;
}