1 /* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
3 * trietool.c - Trie manipulation tool
5 * Author: Theppitak Karoonboonyanan <theppitak@gmail.com>
14 #if defined(HAVE_LOCALE_CHARSET)
15 # include <localcharset.h>
16 #elif defined (HAVE_LANGINFO_CODESET)
17 # include <langinfo.h>
18 # define locale_charset() nl_langinfo(CODESET)
24 #include <datrie/trie.h>
26 /* iconv encoding name for AlphaChar string */
27 #define ALPHA_ENC "UCS-4LE"
29 #define N_ELEMENTS(a) (sizeof(a)/sizeof((a)[0]))
33 const char *trie_name;
34 iconv_t to_alpha_conv;
35 iconv_t from_alpha_conv;
39 static void init_conv (ProgEnv *env);
40 static size_t conv_to_alpha (ProgEnv *env,
44 static size_t conv_from_alpha (ProgEnv *env,
48 static void close_conv (ProgEnv *env);
50 static int prepare_trie (ProgEnv *env);
51 static int close_trie (ProgEnv *env);
53 static int decode_switch (int argc, char *argv[], ProgEnv *env);
54 static int decode_command (int argc, char *argv[], ProgEnv *env);
56 static int command_add (int argc, char *argv[], ProgEnv *env);
57 static int command_add_list (int argc, char *argv[], ProgEnv *env);
58 static int command_delete (int argc, char *argv[], ProgEnv *env);
59 static int command_delete_list (int argc, char *argv[], ProgEnv *env);
60 static int command_query (int argc, char *argv[], ProgEnv *env);
61 static int command_list (int argc, char *argv[], ProgEnv *env);
63 static void usage (const char *prog_name, int exit_status);
65 static char *string_trim (char *s);
68 main (int argc, char *argv[])
78 i = decode_switch (argc, argv, &env);
80 usage (argv[0], EXIT_FAILURE);
82 env.trie_name = argv[i++];
84 if (prepare_trie (&env) != 0)
87 ret = decode_command (argc - i, argv + i, &env);
89 if (close_trie (&env) != 0)
98 init_conv (ProgEnv *env)
100 const char *prev_locale;
101 const char *locale_codeset;
103 prev_locale = setlocale (LC_CTYPE, "");
104 locale_codeset = locale_charset();
105 setlocale (LC_CTYPE, prev_locale);
107 env->to_alpha_conv = iconv_open (ALPHA_ENC, locale_codeset);
108 env->from_alpha_conv = iconv_open (locale_codeset, ALPHA_ENC);
112 conv_to_alpha (ProgEnv *env, const char *in, AlphaChar *out, size_t out_size)
114 char *in_p = (char *) in;
115 char *out_p = (char *) out;
116 size_t in_left = strlen (in);
117 size_t out_left = out_size * sizeof (AlphaChar);
119 const unsigned char *byte_p;
121 assert (sizeof (AlphaChar) == 4);
123 /* convert to UCS-4LE */
124 res = iconv (env->to_alpha_conv, (char **) &in_p, &in_left,
127 if (res == (size_t) -1)
130 /* convert UCS-4LE to AlphaChar string */
132 for (byte_p = (const unsigned char *) out;
133 res < out_size && byte_p + 3 < (unsigned char*) out_p;
136 out[res++] = byte_p[0]
141 if (res < out_size) {
149 conv_from_alpha (ProgEnv *env, const AlphaChar *in, char *out, size_t out_size)
151 size_t in_left = alpha_char_strlen (in) * sizeof (AlphaChar);
154 assert (sizeof (AlphaChar) == 4);
156 /* convert AlphaChar to UCS-4LE */
157 for (res = 0; in[res]; res++) {
160 b[0] = in[res] & 0xff;
161 b[1] = (in[res] >> 8) & 0xff;
162 b[2] = (in[res] >> 16) & 0xff;
163 b[3] = (in[res] >> 24) & 0xff;
165 memcpy ((char *) &in[res], b, 4);
168 /* convert UCS-4LE to locale codeset */
169 res = iconv (env->from_alpha_conv, (char **) &in, &in_left,
177 close_conv (ProgEnv *env)
179 iconv_close (env->to_alpha_conv);
180 iconv_close (env->from_alpha_conv);
184 full_path (const char *path, const char *name, const char *ext)
186 int full_size = strlen (path) + strlen (name) + strlen (ext) + 2;
187 char *full_path_buff = (char *) malloc (full_size);
188 sprintf (full_path_buff, "%s/%s%s", path, name, ext);
189 return full_path_buff;
193 prepare_trie (ProgEnv *env)
198 path_name = full_path (env->path, env->trie_name, ".tri");
199 env->trie = trie_new_from_file (path_name);
206 path_name = full_path (env->path, env->trie_name, ".abm");
207 sbm = fopen (path_name, "r");
209 fprintf (stderr, "Cannot open alphabet map file %s\n", path_name);
215 alpha_map = alpha_map_new ();
217 while (fgets (buff, sizeof (buff), sbm)) {
222 * where: b = begin char, e = end char; both in hex values
224 if (sscanf (buff, " [ %x , %x ] ", &b, &e) != 2)
227 fprintf (stderr, "Range begin (%x) > range end (%x)\n", b, e);
231 alpha_map_add_range (alpha_map, b, e);
234 env->trie = trie_new (alpha_map);
236 alpha_map_free (alpha_map);
244 close_trie (ProgEnv *env)
246 if (trie_is_dirty (env->trie)) {
247 char *path = full_path (env->path, env->trie_name, ".tri");
248 if (trie_save (env->trie, path) != 0) {
249 fprintf (stderr, "Cannot save trie to %s\n", path);
256 trie_free (env->trie);
261 decode_switch (int argc, char *argv[], ProgEnv *env)
265 for (opt_idx = 1; opt_idx < argc && *argv[opt_idx] == '-'; opt_idx++) {
266 if (strcmp (argv[opt_idx], "-h") == 0 ||
267 strcmp (argv[opt_idx], "--help") == 0)
269 usage (argv[0], EXIT_FAILURE);
270 } else if (strcmp (argv[opt_idx], "-V") == 0 ||
271 strcmp (argv[opt_idx], "--version") == 0)
273 printf ("%s\n", VERSION);
275 } else if (strcmp (argv[opt_idx], "-p") == 0 ||
276 strcmp (argv[opt_idx], "--path") == 0)
278 env->path = argv[++opt_idx];
279 } else if (strcmp (argv[opt_idx], "--") == 0) {
283 fprintf (stderr, "Unknown option: %s\n", argv[opt_idx]);
292 decode_command (int argc, char *argv[], ProgEnv *env)
296 for (opt_idx = 0; opt_idx < argc; opt_idx++) {
297 if (strcmp (argv[opt_idx], "add") == 0) {
299 opt_idx += command_add (argc - opt_idx, argv + opt_idx, env);
300 } else if (strcmp (argv[opt_idx], "add-list") == 0) {
302 opt_idx += command_add_list (argc - opt_idx, argv + opt_idx, env);
303 } else if (strcmp (argv[opt_idx], "delete") == 0) {
305 opt_idx += command_delete (argc - opt_idx, argv + opt_idx, env);
306 } else if (strcmp (argv[opt_idx], "delete-list") == 0) {
308 opt_idx += command_delete_list (argc - opt_idx, argv + opt_idx, env);
309 } else if (strcmp (argv[opt_idx], "query") == 0) {
311 opt_idx += command_query (argc - opt_idx, argv + opt_idx, env);
312 } else if (strcmp (argv[opt_idx], "list") == 0) {
314 opt_idx += command_list (argc - opt_idx, argv + opt_idx, env);
316 fprintf (stderr, "Unknown command: %s\n", argv[opt_idx]);
325 command_add (int argc, char *argv[], ProgEnv *env)
330 while (opt_idx < argc) {
332 AlphaChar key_alpha[256];
335 key = argv[opt_idx++];
336 data = (opt_idx < argc) ? atoi (argv[opt_idx++]) : TRIE_DATA_ERROR;
338 conv_to_alpha (env, key, key_alpha, N_ELEMENTS (key_alpha));
339 if (!trie_store (env->trie, key_alpha, data)) {
340 fprintf (stderr, "Failed to add entry '%s' with data %d\n",
349 command_add_list (int argc, char *argv[], ProgEnv *env)
351 const char *enc_name, *input_name;
359 saved_conv = env->to_alpha_conv;
360 if (strcmp (argv[0], "-e") == 0 ||
361 strcmp (argv[0], "--encoding") == 0)
363 if (++opt_idx >= argc) {
364 fprintf (stderr, "add-list option \"%s\" requires encoding name",
368 enc_name = argv[opt_idx++];
370 if (opt_idx >= argc) {
371 fprintf (stderr, "add-list requires input word list file name\n");
374 input_name = argv[opt_idx++];
377 iconv_t conv = iconv_open (ALPHA_ENC, enc_name);
378 if ((iconv_t) -1 == conv) {
380 "Conversion from \"%s\" to \"%s\" is not supported.\n",
381 enc_name, ALPHA_ENC);
385 env->to_alpha_conv = conv;
388 input = fopen (input_name, "r");
390 fprintf (stderr, "add-list: Cannot open input file \"%s\"\n",
392 goto exit_iconv_openned;
395 while (fgets (line, sizeof line, input)) {
397 AlphaChar key_alpha[256];
400 key = string_trim (line);
402 /* find key boundary */
403 for (data = key; *data && !strchr ("\t,", *data); ++data)
405 /* mark key ending and find data begin */
408 while (isspace (*data))
412 data_val = ('\0' != *data) ? atoi (data) : TRIE_DATA_ERROR;
415 conv_to_alpha (env, key, key_alpha, N_ELEMENTS (key_alpha));
416 if (!trie_store (env->trie, key_alpha, data_val))
417 fprintf (stderr, "Failed to add key '%s' with data %d.\n",
426 iconv_close (env->to_alpha_conv);
427 env->to_alpha_conv = saved_conv;
434 command_delete (int argc, char *argv[], ProgEnv *env)
438 for (opt_idx = 0; opt_idx < argc; opt_idx++) {
439 AlphaChar key_alpha[256];
441 conv_to_alpha (env, argv[opt_idx], key_alpha, N_ELEMENTS (key_alpha));
442 if (!trie_delete (env->trie, key_alpha)) {
443 fprintf (stderr, "No entry '%s'. Not deleted.\n", argv[opt_idx]);
451 command_delete_list (int argc, char *argv[], ProgEnv *env)
453 const char *enc_name, *input_name;
461 saved_conv = env->to_alpha_conv;
462 if (strcmp (argv[0], "-e") == 0 ||
463 strcmp (argv[0], "--encoding") == 0)
465 if (++opt_idx >= argc) {
466 fprintf (stderr, "delete-list option \"%s\" requires encoding name",
470 enc_name = argv[opt_idx++];
472 if (opt_idx >= argc) {
473 fprintf (stderr, "delete-list requires input word list file name\n");
476 input_name = argv[opt_idx++];
479 iconv_t conv = iconv_open (ALPHA_ENC, enc_name);
480 if ((iconv_t) -1 == conv) {
482 "Conversion from \"%s\" to \"%s\" is not supported.\n",
483 enc_name, ALPHA_ENC);
487 env->to_alpha_conv = conv;
490 input = fopen (input_name, "r");
492 fprintf (stderr, "delete-list: Cannot open input file \"%s\"\n",
494 goto exit_iconv_openned;
497 while (fgets (line, sizeof line, input)) {
500 p = string_trim (line);
502 AlphaChar key_alpha[256];
504 conv_to_alpha (env, p, key_alpha, N_ELEMENTS (key_alpha));
505 if (!trie_delete (env->trie, key_alpha)) {
506 fprintf (stderr, "No entry '%s'. Not deleted.\n", p);
515 iconv_close (env->to_alpha_conv);
516 env->to_alpha_conv = saved_conv;
523 command_query (int argc, char *argv[], ProgEnv *env)
525 AlphaChar key_alpha[256];
529 fprintf (stderr, "query: No key specified.\n");
533 conv_to_alpha (env, argv[0], key_alpha, N_ELEMENTS (key_alpha));
534 if (trie_retrieve (env->trie, key_alpha, &data)) {
535 printf ("%d\n", data);
537 fprintf (stderr, "query: Key '%s' not found.\n", argv[0]);
544 list_enum_func (const AlphaChar *key, TrieData key_data, void *user_data)
546 ProgEnv *env = (ProgEnv *) user_data;
547 char key_locale[1024];
549 conv_from_alpha (env, key, key_locale, N_ELEMENTS (key_locale));
550 printf ("%s\t%d\n", key_locale, key_data);
555 command_list (int argc, char *argv[], ProgEnv *env)
557 trie_enumerate (env->trie, list_enum_func, (void *) env);
563 usage (const char *prog_name, int exit_status)
565 printf ("%s - double-array trie manipulator\n", prog_name);
566 printf ("Usage: %s [OPTION]... TRIE CMD ARG ...\n", prog_name);
567 printf ("Options:\n");
569 " -p, --path DIR set trie directory to DIR [default=.]\n"
572 " -h, --help display this help and exit\n"
575 " -V, --version output version information and exit\n"
578 printf ("Commands:\n");
580 " add WORD DATA ...\n"
581 " Add WORD with DATA to trie\n"
584 " add-list [OPTION] LISTFILE\n"
585 " Add words and data listed in LISTFILE to trie\n"
587 " -e, --encoding ENC specify character encoding of LISTFILE\n"
591 " Delete WORD from trie\n"
594 " delete-list [OPTION] LISTFILE\n"
595 " Delete words listed in LISTFILE from trie\n"
597 " -e, --encoding ENC specify character encoding of LISTFILE\n"
601 " Query WORD data from trie\n"
605 " List all words in trie\n"
612 string_trim (char *s)
616 /* skip leading white spaces */
617 while (*s && isspace (*s))
620 /* trim trailing white spaces */
621 p = s + strlen (s) - 1;