1 /* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
3 * trietool.c - Trie manipulation tool
5 * Author: Theppitak Karoonboonyanan <theppitak@gmail.com>
14 #if defined(HAVE_LOCALE_CHARSET)
15 # include <localcharset.h>
16 #elif defined (HAVE_LANGINFO_CODESET)
17 # include <langinfo.h>
18 # define locale_charset() nl_langinfo(CODESET)
25 #include <datrie/trie.h>
27 /* iconv encoding name for AlphaChar string */
28 #define ALPHA_ENC "UCS-4LE"
30 #define N_ELEMENTS(a) (sizeof(a)/sizeof((a)[0]))
34 const char *trie_name;
35 iconv_t to_alpha_conv;
36 iconv_t from_alpha_conv;
40 static void init_conv (ProgEnv *env);
41 static size_t conv_to_alpha (ProgEnv *env,
45 static size_t conv_from_alpha (ProgEnv *env,
49 static void close_conv (ProgEnv *env);
51 static int prepare_trie (ProgEnv *env);
52 static int close_trie (ProgEnv *env);
54 static int decode_switch (int argc, char *argv[], ProgEnv *env);
55 static int decode_command (int argc, char *argv[], ProgEnv *env);
57 static int command_add (int argc, char *argv[], ProgEnv *env);
58 static int command_add_list (int argc, char *argv[], ProgEnv *env);
59 static int command_delete (int argc, char *argv[], ProgEnv *env);
60 static int command_delete_list (int argc, char *argv[], ProgEnv *env);
61 static int command_query (int argc, char *argv[], ProgEnv *env);
62 static int command_list (int argc, char *argv[], ProgEnv *env);
64 static void usage (const char *prog_name, int exit_status);
66 static char *string_trim (char *s);
69 main (int argc, char *argv[])
79 i = decode_switch (argc, argv, &env);
81 usage (argv[0], EXIT_FAILURE);
83 env.trie_name = argv[i++];
85 if (prepare_trie (&env) != 0)
88 ret = decode_command (argc - i, argv + i, &env);
90 if (close_trie (&env) != 0)
99 init_conv (ProgEnv *env)
101 const char *prev_locale;
102 const char *locale_codeset;
104 prev_locale = setlocale (LC_CTYPE, "");
105 locale_codeset = locale_charset();
106 setlocale (LC_CTYPE, prev_locale);
108 env->to_alpha_conv = iconv_open (ALPHA_ENC, locale_codeset);
109 env->from_alpha_conv = iconv_open (locale_codeset, ALPHA_ENC);
113 conv_to_alpha (ProgEnv *env, const char *in, AlphaChar *out, size_t out_size)
115 char *in_p = (char *) in;
116 char *out_p = (char *) out;
117 size_t in_left = strlen (in);
118 size_t out_left = out_size * sizeof (AlphaChar);
120 const unsigned char *byte_p;
122 assert (sizeof (AlphaChar) == 4);
124 /* convert to UCS-4LE */
125 res = iconv (env->to_alpha_conv, (char **) &in_p, &in_left,
128 if (res == (size_t) -1)
131 /* convert UCS-4LE to AlphaChar string */
133 for (byte_p = (const unsigned char *) out;
134 res < out_size && byte_p + 3 < (unsigned char*) out_p;
137 out[res++] = byte_p[0]
142 if (res < out_size) {
150 conv_from_alpha (ProgEnv *env, const AlphaChar *in, char *out, size_t out_size)
152 size_t in_left = alpha_char_strlen (in) * sizeof (AlphaChar);
155 assert (sizeof (AlphaChar) == 4);
157 /* convert AlphaChar to UCS-4LE */
158 for (res = 0; in[res]; res++) {
161 b[0] = in[res] & 0xff;
162 b[1] = (in[res] >> 8) & 0xff;
163 b[2] = (in[res] >> 16) & 0xff;
164 b[3] = (in[res] >> 24) & 0xff;
166 memcpy ((char *) &in[res], b, 4);
169 /* convert UCS-4LE to locale codeset */
170 res = iconv (env->from_alpha_conv, (char **) &in, &in_left,
178 close_conv (ProgEnv *env)
180 iconv_close (env->to_alpha_conv);
181 iconv_close (env->from_alpha_conv);
185 prepare_trie (ProgEnv *env)
189 snprintf (buff, sizeof (buff),
190 "%s/%s.tri", env->path, env->trie_name);
191 env->trie = trie_new_from_file (buff);
197 snprintf (buff, sizeof (buff),
198 "%s/%s.abm", env->path, env->trie_name);
199 sbm = fopen (buff, "r");
201 fprintf (stderr, "Cannot open alphabet map file %s\n", buff);
205 alpha_map = alpha_map_new ();
207 while (fgets (buff, sizeof (buff), sbm)) {
212 * where: b = begin char, e = end char; both in hex values
214 if (sscanf (buff, " [ %x , %x ] ", &b, &e) != 2)
217 fprintf (stderr, "Range begin (%x) > range end (%x)\n", b, e);
221 alpha_map_add_range (alpha_map, b, e);
224 env->trie = trie_new (alpha_map);
226 alpha_map_free (alpha_map);
234 close_trie (ProgEnv *env)
236 if (trie_is_dirty (env->trie)) {
239 snprintf (path, sizeof (path),
240 "%s/%s.tri", env->path, env->trie_name);
241 if (trie_save (env->trie, path) != 0) {
242 fprintf (stderr, "Cannot save trie to %s\n", path);
247 trie_free (env->trie);
252 decode_switch (int argc, char *argv[], ProgEnv *env)
256 for (opt_idx = 1; opt_idx < argc && *argv[opt_idx] == '-'; opt_idx++) {
257 if (strcmp (argv[opt_idx], "-h") == 0 ||
258 strcmp (argv[opt_idx], "--help") == 0)
260 usage (argv[0], EXIT_FAILURE);
261 } else if (strcmp (argv[opt_idx], "-V") == 0 ||
262 strcmp (argv[opt_idx], "--version") == 0)
264 printf ("%s\n", VERSION);
266 } else if (strcmp (argv[opt_idx], "-p") == 0 ||
267 strcmp (argv[opt_idx], "--path") == 0)
269 env->path = argv[++opt_idx];
270 } else if (strcmp (argv[opt_idx], "--") == 0) {
274 fprintf (stderr, "Unknown option: %s\n", argv[opt_idx]);
283 decode_command (int argc, char *argv[], ProgEnv *env)
287 for (opt_idx = 0; opt_idx < argc; opt_idx++) {
288 if (strcmp (argv[opt_idx], "add") == 0) {
290 opt_idx += command_add (argc - opt_idx, argv + opt_idx, env);
291 } else if (strcmp (argv[opt_idx], "add-list") == 0) {
293 opt_idx += command_add_list (argc - opt_idx, argv + opt_idx, env);
294 } else if (strcmp (argv[opt_idx], "delete") == 0) {
296 opt_idx += command_delete (argc - opt_idx, argv + opt_idx, env);
297 } else if (strcmp (argv[opt_idx], "delete-list") == 0) {
299 opt_idx += command_delete_list (argc - opt_idx, argv + opt_idx, env);
300 } else if (strcmp (argv[opt_idx], "query") == 0) {
302 opt_idx += command_query (argc - opt_idx, argv + opt_idx, env);
303 } else if (strcmp (argv[opt_idx], "list") == 0) {
305 opt_idx += command_list (argc - opt_idx, argv + opt_idx, env);
307 fprintf (stderr, "Unknown command: %s\n", argv[opt_idx]);
316 command_add (int argc, char *argv[], ProgEnv *env)
321 while (opt_idx < argc) {
323 AlphaChar key_alpha[256];
326 key = argv[opt_idx++];
327 data = (opt_idx < argc) ? atoi (argv[opt_idx++]) : TRIE_DATA_ERROR;
329 conv_to_alpha (env, key, key_alpha, N_ELEMENTS (key_alpha));
330 if (!trie_store (env->trie, key_alpha, data)) {
331 fprintf (stderr, "Failed to add entry '%s' with data %d\n",
340 command_add_list (int argc, char *argv[], ProgEnv *env)
342 const char *enc_name, *input_name;
350 saved_conv = env->to_alpha_conv;
351 if (strcmp (argv[0], "-e") == 0 ||
352 strcmp (argv[0], "--encoding") == 0)
354 if (++opt_idx >= argc) {
355 fprintf (stderr, "add-list option \"%s\" requires encoding name",
359 enc_name = argv[opt_idx++];
361 if (opt_idx >= argc) {
362 fprintf (stderr, "add-list requires input word list file name\n");
365 input_name = argv[opt_idx++];
368 iconv_t conv = iconv_open (ALPHA_ENC, enc_name);
369 if ((iconv_t) -1 == conv) {
371 "Conversion from \"%s\" to \"%s\" is not supported.\n",
372 enc_name, ALPHA_ENC);
376 env->to_alpha_conv = conv;
379 input = fopen (input_name, "r");
381 fprintf (stderr, "add-list: Cannot open input file \"%s\"\n",
383 goto exit_iconv_openned;
386 while (fgets (line, sizeof line, input)) {
388 AlphaChar key_alpha[256];
391 key = string_trim (line);
393 /* find key boundary */
394 for (data = key; *data && !strchr ("\t,", *data); ++data)
396 /* mark key ending and find data begin */
399 while (isspace (*data))
403 data_val = ('\0' != *data) ? atoi (data) : TRIE_DATA_ERROR;
406 conv_to_alpha (env, key, key_alpha, N_ELEMENTS (key_alpha));
407 if (!trie_store (env->trie, key_alpha, data_val))
408 fprintf (stderr, "Failed to add key '%s' with data %d.\n",
417 iconv_close (env->to_alpha_conv);
418 env->to_alpha_conv = saved_conv;
425 command_delete (int argc, char *argv[], ProgEnv *env)
429 for (opt_idx = 0; opt_idx < argc; opt_idx++) {
430 AlphaChar key_alpha[256];
432 conv_to_alpha (env, argv[opt_idx], key_alpha, N_ELEMENTS (key_alpha));
433 if (!trie_delete (env->trie, key_alpha)) {
434 fprintf (stderr, "No entry '%s'. Not deleted.\n", argv[opt_idx]);
442 command_delete_list (int argc, char *argv[], ProgEnv *env)
444 const char *enc_name, *input_name;
452 saved_conv = env->to_alpha_conv;
453 if (strcmp (argv[0], "-e") == 0 ||
454 strcmp (argv[0], "--encoding") == 0)
456 if (++opt_idx >= argc) {
457 fprintf (stderr, "delete-list option \"%s\" requires encoding name",
461 enc_name = argv[opt_idx++];
463 if (opt_idx >= argc) {
464 fprintf (stderr, "delete-list requires input word list file name\n");
467 input_name = argv[opt_idx++];
470 iconv_t conv = iconv_open (ALPHA_ENC, enc_name);
471 if ((iconv_t) -1 == conv) {
473 "Conversion from \"%s\" to \"%s\" is not supported.\n",
474 enc_name, ALPHA_ENC);
478 env->to_alpha_conv = conv;
481 input = fopen (input_name, "r");
483 fprintf (stderr, "delete-list: Cannot open input file \"%s\"\n",
485 goto exit_iconv_openned;
488 while (fgets (line, sizeof line, input)) {
491 p = string_trim (line);
493 AlphaChar key_alpha[256];
495 conv_to_alpha (env, p, key_alpha, N_ELEMENTS (key_alpha));
496 if (!trie_delete (env->trie, key_alpha)) {
497 fprintf (stderr, "No entry '%s'. Not deleted.\n", p);
506 iconv_close (env->to_alpha_conv);
507 env->to_alpha_conv = saved_conv;
514 command_query (int argc, char *argv[], ProgEnv *env)
516 AlphaChar key_alpha[256];
520 fprintf (stderr, "query: No key specified.\n");
524 conv_to_alpha (env, argv[0], key_alpha, N_ELEMENTS (key_alpha));
525 if (trie_retrieve (env->trie, key_alpha, &data)) {
526 printf ("%d\n", data);
528 fprintf (stderr, "query: Key '%s' not found.\n", argv[0]);
535 list_enum_func (const AlphaChar *key, TrieData key_data, void *user_data)
537 ProgEnv *env = (ProgEnv *) user_data;
538 char key_locale[1024];
540 conv_from_alpha (env, key, key_locale, N_ELEMENTS (key_locale));
541 printf ("%s\t%d\n", key_locale, key_data);
546 command_list (int argc, char *argv[], ProgEnv *env)
548 trie_enumerate (env->trie, list_enum_func, (void *) env);
554 usage (const char *prog_name, int exit_status)
556 printf ("%s - double-array trie manipulator\n", prog_name);
557 printf ("Usage: %s [OPTION]... TRIE CMD ARG ...\n", prog_name);
560 " -p, --path DIR set trie directory to DIR [default=.]\n"
561 " -h, --help display this help and exit\n"
562 " -V, --version output version information and exit\n"
565 " add WORD DATA ...\n"
566 " Add WORD with DATA to trie\n"
567 " add-list [OPTION] LISTFILE\n"
568 " Add words and data listed in LISTFILE to trie\n"
570 " -e, --encoding ENC specify character encoding of LISTFILE\n"
572 " Delete WORD from trie\n"
573 " delete-list [OPTION] LISTFILE\n"
574 " Delete words listed in LISTFILE from trie\n"
576 " -e, --encoding ENC specify character encoding of LISTFILE\n"
578 " Query WORD data from trie\n"
580 " List all words in trie\n"
587 string_trim (char *s)
591 /* skip leading white spaces */
592 while (*s && isspace (*s))
595 /* trim trailing white spaces */
596 p = s + strlen (s) - 1;