1 /** @file scim_generic_table.cpp
2 * Implementation of class GenericKeyIndexLib and GenericTablePhraseLib.
6 * Smart Common Input Method
8 * Copyright (c) 2002-2005 James Su <suzhe@tsinghua.org.cn>
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
24 * $Id: scim_generic_table.cpp,v 1.10 2006/01/12 08:43:29 suzhe Exp $
28 #define Uses_STL_FUNCTIONAL
29 #define Uses_STL_VECTOR
30 #define Uses_STL_IOSTREAM
31 #define Uses_STL_FSTREAM
32 #define Uses_STL_ALGORITHM
33 #define Uses_STL_UTILITY
34 #define Uses_STL_IOMANIP
38 #define Uses_SCIM_UTILITY
39 #define Uses_SCIM_LOOKUP_TABLE
44 #include "scim_generic_table.h"
45 #include "scim_table_private.h"
51 _trim_blank (const String &str)
53 String::size_type begin, len;
55 begin = str.find_first_not_of (" \t\n\v");
57 if (begin == String::npos)
60 len = str.find_last_not_of (" \t\n\v");
62 if (len != String::npos)
63 len = len - begin + 1;
65 return str.substr (begin, len);
69 _get_param_portion (const String &str, const String &delim = "=")
72 String::size_type pos = ret.find_first_of (delim);
74 if (pos != String::npos)
75 ret.erase (pos, String::npos);
77 return _trim_blank (ret);
81 _get_value_portion (const String &str, const String &delim = "=")
84 String::size_type pos;
86 pos = ret.find_first_of (delim);
88 if (pos != String::npos)
89 ret.erase (0, pos + 1);
93 return _trim_blank (ret);
102 while (fp && !feof (fp)) {
103 if (!fgets (temp, 4096, fp)) break;
105 res = _trim_blank (String (temp));
108 !(res.length () >= 3 && res.substr (0, 3) == String ("###")))
115 static inline WideString
116 _hex_to_wide_string (const String &str)
121 while (i <= str.length () - 6 && str [i] == '0' && tolower (str [i+1]) == 'x') {
122 ucs4_t wc = (ucs4_t) strtol (str.substr (i+2, 4).c_str (), NULL, 16);
132 // Implementations of GenericTableHeader members.
133 GenericTableHeader::GenericTableHeader ()
134 : m_keyboard_layout (SCIM_KEYBOARD_Unknown),
135 m_max_key_length (0),
136 m_show_key_prompt (false),
137 m_auto_select (false),
138 m_auto_wildcard (false),
139 m_auto_commit (false),
142 m_discard_invalid_key (false),
143 m_dynamic_adjust (false),
144 m_always_show_lookup (true),
145 m_use_full_width_punct (true),
146 m_def_full_width_punct (true),
147 m_use_full_width_letter (true),
148 m_def_full_width_letter (false),
153 GenericTableHeader::~GenericTableHeader ()
158 GenericTableHeader::clear ()
161 m_icon_file = String ();
162 m_serial_number = String ();
163 m_author = String ();
164 m_languages = String ();
165 m_status_prompt = String ();
166 m_valid_input_chars = String ();
167 m_key_end_chars = String ();
168 m_single_wildcard_chars = String ();
169 m_multi_wildcard_chars = String ();
170 m_default_name = String ();
172 m_local_names.clear ();
173 m_char_prompts.clear ();
175 m_split_keys.clear ();
176 m_commit_keys.clear ();
177 m_forward_keys.clear ();
178 m_page_up_keys.clear ();
179 m_page_down_keys.clear ();
180 m_select_keys.clear ();
181 m_keyboard_layout = SCIM_KEYBOARD_Unknown;
182 m_max_key_length = 0;
183 m_auto_select = false;
184 m_auto_wildcard = false;
185 m_auto_commit = false;
188 m_dynamic_adjust = false;
189 m_always_show_lookup = true;
190 m_use_full_width_punct = true;
191 m_def_full_width_punct = true;
192 m_use_full_width_letter = true;
193 m_def_full_width_letter = false;
198 GenericTableHeader::is_valid_input_char (char input) const
200 return std::binary_search (m_valid_input_chars.begin (), m_valid_input_chars.end (), input);
204 GenericTableHeader::is_key_end_char (char input) const
206 return std::binary_search (m_key_end_chars.begin (), m_key_end_chars.end (), input);
210 GenericTableHeader::is_single_wildcard_char (char single) const
212 return std::binary_search (m_single_wildcard_chars.begin (), m_single_wildcard_chars.end (), single);
216 GenericTableHeader::is_multi_wildcard_char (char multi) const
218 return std::binary_search (m_multi_wildcard_chars.begin (), m_multi_wildcard_chars.end (), multi);
222 GenericTableHeader::is_split_char (char split) const
225 for (size_t i = 0; i < m_split_keys.size (); ++ i)
226 if (m_split_keys [i].get_ascii_code () == split)
234 GenericTableHeader::load (FILE *fp)
236 if (!fp || feof (fp)) return false;
243 String single_wildcard;
244 String multi_wildcard;
245 String key_end_chars;
247 std::vector <KeyEvent> split_keys;
250 if (_get_line (fp) != String ("BEGIN_DEFINITION"))
254 temp = _get_line (fp);
256 if (temp.length () == 0) return false;
257 if (temp == String ("END_DEFINITION")) break;
259 paramstr = _get_param_portion (temp);
260 valuestr = _get_value_portion (temp);
262 if (paramstr.length () == 0 && valuestr.length () == 0) {
263 std::cerr << "Invalid line in header: " << temp << "\n";
267 if (paramstr == "NAME") { // Get table default name.
268 m_default_name = valuestr;
269 } else if (paramstr.substr (0,5) == "NAME.") { //Get table name for each locales.
270 m_local_names.push_back (
271 paramstr.substr (5,paramstr.length () - 5) + " = " + valuestr);
272 } else if (paramstr == "UUID") {
274 } else if (paramstr == "ICON") {
275 m_icon_file = valuestr;
276 } else if (paramstr == "LANGUAGES") { //Get supported languages.
277 m_languages = valuestr;
278 } else if (paramstr == "AUTHOR") { //Get the table author.
280 } else if (paramstr == "SERIAL_NUMBER") {
281 m_serial_number = valuestr;
282 } else if (paramstr == "KEYBOARD_LAYOUT") {
283 m_keyboard_layout = scim_string_to_keyboard_layout (valuestr);
284 } else if (paramstr == "STATUS_PROMPT") {
285 m_status_prompt = valuestr;
286 } else if (paramstr == "SHOW_KEY_PROMPT") { //Get show_key_prompt value.
287 if (valuestr == "TRUE" || valuestr == "true" || valuestr == "True")
288 m_show_key_prompt = true;
290 m_show_key_prompt = false;
291 } else if (paramstr == "AUTO_SELECT") { //Get auto_select value.
292 if (valuestr == "TRUE" || valuestr == "true" || valuestr == "True")
293 m_auto_select = true;
295 m_auto_select = false;
296 } else if (paramstr == "AUTO_WILDCARD") { //Get auto wildcard value.
297 if (valuestr == "TRUE" || valuestr == "true" || valuestr == "True")
298 m_auto_wildcard = true;
300 m_auto_wildcard = false;
301 } else if (paramstr == "AUTO_COMMIT") { //Get auto commit value.
302 if (valuestr == "TRUE" || valuestr == "true" || valuestr == "True")
303 m_auto_commit = true;
305 m_auto_commit = false;
306 } else if (paramstr == "AUTO_SPLIT") { //Get auto split value.
307 if (valuestr == "TRUE" || valuestr == "true" || valuestr == "True")
310 m_auto_split = false;
311 } else if (paramstr == "AUTO_FILL") { //Get auto fill value.
312 if (valuestr == "TRUE" || valuestr == "true" || valuestr == "True")
316 } else if (paramstr == "DISCARD_INVALID_KEY") { //Get auto fill value.
317 if (valuestr == "TRUE" || valuestr == "true" || valuestr == "True")
318 m_discard_invalid_key = true;
320 m_discard_invalid_key = false;
321 } else if (paramstr == "DYNAMIC_ADJUST") { //Get dynamic_adjust value.
322 if (valuestr == "TRUE" || valuestr == "true" || valuestr == "True")
323 m_dynamic_adjust = true;
325 m_dynamic_adjust = false;
326 } else if (paramstr == "ALWAYS_SHOW_LOOKUP") { //Get always_show_lookup value.
327 if (valuestr == "TRUE" || valuestr == "true" || valuestr == "True")
328 m_always_show_lookup = true;
330 m_always_show_lookup = false;
331 } else if (paramstr == "USE_FULL_WIDTH_PUNCT") { //Get use_full_width_punct value.
332 if (valuestr == "TRUE" || valuestr == "true" || valuestr == "True")
333 m_use_full_width_punct = true;
335 m_use_full_width_punct = false;
336 } else if (paramstr == "DEF_FULL_WIDTH_PUNCT") { //Get def_full_width_punct value.
337 if (valuestr == "TRUE" || valuestr == "true" || valuestr == "True")
338 m_def_full_width_punct = true;
340 m_def_full_width_punct = false;
341 } else if (paramstr == "USE_FULL_WIDTH_LETTER") { //Get def_full_width_letter value.
342 if (valuestr == "TRUE" || valuestr == "true" || valuestr == "True")
343 m_use_full_width_letter = true;
345 m_use_full_width_letter = false;
346 } else if (paramstr == "DEF_FULL_WIDTH_LETTER") { //Get def_full_width_letter value.
347 if (valuestr == "TRUE" || valuestr == "true" || valuestr == "True")
348 m_def_full_width_letter = true;
350 m_def_full_width_letter = false;
351 } else if (paramstr == "VALID_INPUT_CHARS") { //Get valid input chars.
352 m_valid_input_chars = valuestr;
353 } else if (paramstr == "KEY_END_CHARS") { //Get valid input chars.
354 key_end_chars = valuestr;
355 } else if (paramstr == "SINGLE_WILDCARD_CHAR") { //Get single wildcard char.
356 single_wildcard = valuestr;
357 } else if (paramstr == "MULTI_WILDCARD_CHAR") { //Get multi wildcard char.
358 multi_wildcard = valuestr;
359 } else if (paramstr == "SPLIT_KEYS") { //Get split keys.
360 scim_string_to_key_list (split_keys, valuestr);
361 } else if (paramstr == "COMMIT_KEYS") { //Get commit keys.
362 scim_string_to_key_list (m_commit_keys, valuestr);
363 } else if (paramstr == "FORWARD_KEYS") { //Get forward keys.
364 scim_string_to_key_list (m_forward_keys, valuestr);
365 } else if (paramstr == "SELECT_KEYS") { //Get select keys.
366 scim_string_to_key_list (m_select_keys, valuestr);
367 } else if (paramstr == "PAGE_UP_KEYS") {
368 scim_string_to_key_list (m_page_up_keys, valuestr);
369 } else if (paramstr == "PAGE_DOWN_KEYS") {
370 scim_string_to_key_list (m_page_down_keys, valuestr);
371 } else if (paramstr == "MODE_SWITCH_KEYS") {
372 scim_string_to_key_list (m_mode_switch_keys, valuestr);
373 } else if (paramstr == "FULL_WIDTH_PUNCT_KEYS") {
374 scim_string_to_key_list (m_full_width_punct_keys, valuestr);
375 } else if (paramstr == "FULL_WIDTH_LETTER_KEYS") {
376 scim_string_to_key_list (m_full_width_letter_keys, valuestr);
377 } else if (paramstr == "MAX_KEY_LENGTH") {
378 m_max_key_length = atoi (valuestr.c_str ());
379 } else if (paramstr == "BEGIN_CHAR_PROMPTS_DEFINITION") { //Read char names.
381 temp = _get_line (fp);
383 if (temp == String ("END_CHAR_PROMPTS_DEFINITION"))
386 if (temp.length () < 3 || temp [1] != ' ')
389 m_char_prompts.push_back (String (temp));
392 std::cerr << "Invalid line in header: " << temp << "\n";
396 //Post process inputted information.
398 if (!m_uuid.length () || !m_serial_number.length ())
401 if (m_max_key_length <= 0 || m_max_key_length > SCIM_GT_MAX_KEY_LENGTH)
404 if (!m_valid_input_chars.length ())
407 if (m_default_name.length () == 0) {
408 if (m_local_names.size ())
409 m_default_name = _get_value_portion (m_local_names [0]);
416 std::sort (m_valid_input_chars.begin (), m_valid_input_chars.end ());
418 for (i=0; i<single_wildcard.length (); ++i)
419 if (!is_valid_input_char (single_wildcard [i]))
420 m_single_wildcard_chars.push_back (single_wildcard [i]);
422 std::sort (m_single_wildcard_chars.begin (), m_single_wildcard_chars.end ());
424 for (i=0; i<multi_wildcard.length (); ++i)
425 if (!is_valid_input_char (multi_wildcard [i]) && !is_single_wildcard_char (multi_wildcard [i]))
426 m_multi_wildcard_chars.push_back (multi_wildcard [i]);
428 for (i=0; i<key_end_chars.length (); ++i)
429 if (is_valid_input_char (key_end_chars [i]))
430 m_key_end_chars.push_back (key_end_chars [i]);
432 for (i=0; i<split_keys.size (); ++i)
433 if (!is_valid_input_char (split_keys [i].get_ascii_code ()))
434 m_split_keys.push_back (split_keys [i]);
436 std::sort (m_key_end_chars.begin (), m_key_end_chars.end ());
437 std::sort (m_multi_wildcard_chars.begin (), m_multi_wildcard_chars.end ());
438 std::sort (m_char_prompts.begin (), m_char_prompts.end ());
439 std::sort (m_local_names.begin (), m_local_names.end ());
441 if (m_select_keys.size () > SCIM_LOOKUP_TABLE_MAX_PAGESIZE)
442 m_select_keys.erase (m_select_keys.begin () + SCIM_LOOKUP_TABLE_MAX_PAGESIZE,
443 m_select_keys.end ());
449 GenericTableHeader::save (FILE *fp)
454 if (!fp) return false;
456 fprintf (fp, "### Begin Table definition.\n");
457 fprintf (fp, "BEGIN_DEFINITION\n");
459 fprintf (fp, "UUID = %s\n", m_uuid.c_str ());
461 fprintf (fp, "SERIAL_NUMBER = %s\n", m_serial_number.c_str ());
463 if (m_icon_file.length ())
464 fprintf (fp, "ICON = %s\n", m_icon_file.c_str ());
466 fprintf (fp, "### ICON =\n");
468 if (m_default_name.length ())
469 fprintf (fp, "NAME = %s\n", m_default_name.c_str ());
471 fprintf (fp, "### NAME =\n");
473 for (i = 0; i < m_local_names.size (); ++i)
474 fprintf (fp, "NAME.%s\n", m_local_names [i].c_str ());
476 if (m_languages.length ())
477 fprintf (fp, "LANGUAGES = %s\n", m_languages.c_str ());
479 fprintf (fp, "### LOCALES =\n");
481 if (m_author.length ())
482 fprintf (fp, "AUTHOR = %s\n", m_author.c_str ());
484 fprintf (fp, "### AUTHOR =\n");
486 if (m_status_prompt.length ())
487 fprintf (fp, "STATUS_PROMPT = %s\n", m_status_prompt.c_str ());
489 fprintf (fp, "### STATUS_PROMPT =\n");
491 fprintf (fp, "KEYBOARD_LAYOUT = %s\n", scim_keyboard_layout_to_string (m_keyboard_layout).c_str ());
492 fprintf (fp, "VALID_INPUT_CHARS = %s\n", m_valid_input_chars.c_str ());
494 if (m_key_end_chars.length ())
495 fprintf (fp, "KEY_END_CHARS = %s\n", m_key_end_chars.c_str ());
497 fprintf (fp, "### KEY_END_CHARS =\n");
499 if (m_single_wildcard_chars.length ())
500 fprintf (fp, "SINGLE_WILDCARD_CHAR = %s\n", m_single_wildcard_chars.c_str ());
502 fprintf (fp, "### SINGLE_WILDCARD_CHAR =\n");
504 if (m_multi_wildcard_chars.length ())
505 fprintf (fp, "MULTI_WILDCARD_CHAR = %s\n", m_multi_wildcard_chars.c_str ());
507 fprintf (fp, "### MULTI_WILDCARD_CHAR =\n");
509 scim_key_list_to_string (temp, m_split_keys);
511 fprintf (fp, "SPLIT_KEYS = %s\n", temp.c_str ());
513 fprintf (fp, "### SPLIT_KEYS =\n");
515 scim_key_list_to_string (temp, m_commit_keys);
517 fprintf (fp, "COMMIT_KEYS = %s\n", temp.c_str ());
519 fprintf (fp, "### COMMIT_KEYS =\n");
521 scim_key_list_to_string (temp, m_forward_keys);
523 fprintf (fp, "FORWARD_KEYS = %s\n", temp.c_str ());
525 fprintf (fp, "### FORWARD_KEYS =\n");
527 scim_key_list_to_string (temp, m_select_keys);
529 fprintf (fp, "SELECT_KEYS = %s\n", temp.c_str ());
531 fprintf (fp, "### SELECT_KEYS =\n");
533 scim_key_list_to_string (temp, m_page_up_keys);
535 fprintf (fp, "PAGE_UP_KEYS = %s\n", temp.c_str ());
537 fprintf (fp, "### PAGE_UP_KEYS =\n");
539 scim_key_list_to_string (temp, m_page_down_keys);
541 fprintf (fp, "PAGE_DOWN_KEYS = %s\n", temp.c_str ());
543 fprintf (fp, "### PAGE_DOWN_KEYS =\n");
545 scim_key_list_to_string (temp, m_mode_switch_keys);
547 fprintf (fp, "MODE_SWITCH_KEYS = %s\n", temp.c_str ());
549 fprintf (fp, "### MODE_SWITCH_KEYS =\n");
551 scim_key_list_to_string (temp, m_full_width_punct_keys);
553 fprintf (fp, "FULL_WIDTH_PUNCT_KEYS = %s\n", temp.c_str ());
555 fprintf (fp, "### FULL_WIDTH_PUNCT_KEYS =\n");
557 scim_key_list_to_string (temp, m_full_width_letter_keys);
559 fprintf (fp, "FULL_WIDTH_LETTER_KEYS = %s\n", temp.c_str ());
561 fprintf (fp, "### FULL_WIDTH_LETTER_KEYS =\n");
563 fprintf (fp, "MAX_KEY_LENGTH = %u\n", m_max_key_length);
565 fprintf (fp, "SHOW_KEY_PROMPT = %s\n", (m_show_key_prompt?"TRUE":"FALSE"));
566 fprintf (fp, "AUTO_SELECT = %s\n", (m_auto_select?"TRUE":"FALSE"));
567 fprintf (fp, "AUTO_WILDCARD = %s\n", (m_auto_wildcard?"TRUE":"FALSE"));
568 fprintf (fp, "AUTO_COMMIT = %s\n", (m_auto_commit?"TRUE":"FALSE"));
569 fprintf (fp, "AUTO_SPLIT = %s\n", (m_auto_split?"TRUE":"FALSE"));
570 fprintf (fp, "AUTO_FILL = %s\n", (m_auto_fill?"TRUE":"FALSE"));
571 fprintf (fp, "DISCARD_INVALID_KEY = %s\n", (m_discard_invalid_key?"TRUE":"FALSE"));
572 fprintf (fp, "DYNAMIC_ADJUST = %s\n", (m_dynamic_adjust?"TRUE":"FALSE"));
573 fprintf (fp, "ALWAYS_SHOW_LOOKUP = %s\n", (m_always_show_lookup?"TRUE":"FALSE"));
574 fprintf (fp, "USE_FULL_WIDTH_PUNCT = %s\n", (m_use_full_width_punct?"TRUE":"FALSE"));
575 fprintf (fp, "DEF_FULL_WIDTH_PUNCT = %s\n", (m_def_full_width_punct?"TRUE":"FALSE"));
576 fprintf (fp, "USE_FULL_WIDTH_LETTER = %s\n", (m_use_full_width_letter?"TRUE":"FALSE"));
577 fprintf (fp, "DEF_FULL_WIDTH_LETTER = %s\n", (m_def_full_width_letter?"TRUE":"FALSE"));
579 if (m_char_prompts.size ()) {
580 fprintf (fp, "BEGIN_CHAR_PROMPTS_DEFINITION\n");
581 for (i = 0; i < m_char_prompts.size (); ++ i)
582 fprintf (fp, "%s\n", m_char_prompts [i].c_str ());
583 fprintf (fp, "END_CHAR_PROMPTS_DEFINITION\n");
586 fprintf (fp, "END_DEFINITION\n\n");
594 GenericTableHeader::get_name (const String & locale) const
596 if (locale.length () == 0)
597 return utf8_mbstowcs (m_default_name);
599 String lang, param, value;
600 String::size_type dot;
602 dot = locale.find_first_of ('.');
603 if (dot != String::npos)
604 lang = locale.substr (0, dot);
608 for (size_t i=0; i<m_local_names.size (); ++i) {
609 param = _get_param_portion (m_local_names [i]);
610 value = _get_value_portion (m_local_names [i]);
611 if ((param.length () > lang.length () && param.substr (0, lang.length ()) == lang) ||
612 (param.length () < lang.length () && lang.substr (0, param.length ()) == param) ||
614 return utf8_mbstowcs (value);
616 return utf8_mbstowcs (m_default_name);
619 class __StringLessThanByFirstChar
622 bool operator () (const String & lhs, char rhs) const {
623 return lhs [0] < rhs;
625 bool operator () (char lhs, const String & rhs) const {
626 return lhs < rhs [0];
628 bool operator () (const String & lhs, const String & rhs) const {
629 return lhs [0] < rhs [0];
634 GenericTableHeader::get_char_prompt (char input) const
636 std::vector <String>::const_iterator it =
637 std::lower_bound (m_char_prompts.begin (),
638 m_char_prompts.end (),
640 __StringLessThanByFirstChar ());
642 if (it != m_char_prompts.end () && (*it) [0] == input)
643 return utf8_mbstowcs (it->substr (2, it->length () - 2));
645 return utf8_mbstowcs (&input, 1);
649 GenericTableHeader::get_key_prompt (const String &key) const
653 for (uint32 i=0; i<key.length (); ++i)
654 prompt += get_char_prompt (key [i]);
660 // Implementations of GenericTableContent members.
662 class OffsetLessByPhrase
664 const unsigned char *m_ptr;
666 OffsetLessByPhrase (const unsigned char *p) : m_ptr (p) {}
668 bool operator () (uint32 lhs, uint32 rhs) const {
669 const unsigned char *l = m_ptr + lhs;
670 const unsigned char *r = m_ptr + rhs;
672 size_t lklen = (size_t) (*(l++) & 0x3F);
673 size_t rklen = (size_t) (*(r++) & 0x3F);
675 size_t llen = (size_t) (*l);
676 size_t rlen = (size_t) (*r);
678 for (l += (3+lklen), r += (3+rklen); llen && rlen; --llen, --rlen, ++l, ++r)
679 if (*l != *r) return *l < *r;
684 bool operator () (uint32 lhs, const String &rhs) const {
685 const unsigned char *l = m_ptr + lhs;
686 const unsigned char *r = (const unsigned char *) rhs.c_str ();
688 size_t lklen = (size_t) (*(l++) & 0x3F);
689 size_t llen = (size_t) (*l);
690 size_t rlen = rhs.length ();
692 for (l += (3+lklen); llen && rlen; --llen, --rlen, ++l, ++r)
693 if (*l != *r) return *l < *r;
698 bool operator () (const String &lhs, uint32 rhs) const {
699 const unsigned char *l = (const unsigned char *) lhs.c_str ();
700 const unsigned char *r = m_ptr + rhs;
702 size_t llen = lhs.length ();
703 size_t rklen = (size_t) (*(r++) & 0x3F);
704 size_t rlen = (size_t) (*r);
706 for (r += (3+rklen); llen && rlen; --llen, --rlen, ++l, ++r)
707 if (*l != *r) return *l < *r;
713 class OffsetLessByKeyFixedLen
715 const unsigned char *m_ptr;
718 OffsetLessByKeyFixedLen (const unsigned char *p, size_t len) : m_ptr (p), m_len (len) {}
720 bool operator () (uint32 lhs, uint32 rhs) const {
721 const unsigned char *l = m_ptr + lhs + 4;
722 const unsigned char *r = m_ptr + rhs + 4;
724 for (size_t i = 0; i < m_len; ++i, ++l, ++r)
725 if (*l != *r) return *l < *r;
730 bool operator () (uint32 lhs, const String &rhs) const {
731 const unsigned char *l = m_ptr + lhs + 4;
732 const unsigned char *r = (const unsigned char *) rhs.c_str ();
734 for (size_t i = 0; i < m_len; ++i, ++l, ++r)
735 if (*l != *r) return *l < *r;
740 bool operator () (const String &lhs, uint32 rhs) const {
741 const unsigned char *l = (const unsigned char *) lhs.c_str ();
742 const unsigned char *r = m_ptr + rhs + 4;
744 for (size_t i = 0; i < m_len; ++i, ++l, ++r)
745 if (*l != *r) return *l < *r;
751 class OffsetLessByKeyFixedLenMask
753 const unsigned char *m_ptr;
755 int m_mask [SCIM_GT_MAX_KEY_LENGTH];
757 OffsetLessByKeyFixedLenMask (const unsigned char *p,
761 : m_ptr (p), m_len (len) {
762 for (size_t i = 0; i < len; ++i) {
763 if (key [i] == wc) m_mask [i] = 0;
768 bool operator () (uint32 lhs, uint32 rhs) const {
769 const unsigned char *l = m_ptr + lhs + 4;
770 const unsigned char *r = m_ptr + rhs + 4;
772 for (size_t i = 0; i < m_len; ++i, ++l, ++r)
773 if (m_mask [i] && *l != *r) return *l < *r;
778 bool operator () (uint32 lhs, const String &rhs) const {
779 const unsigned char *l = m_ptr + lhs + 4;
780 const unsigned char *r = (const unsigned char *) rhs.c_str ();
782 for (size_t i = 0; i < m_len; ++i, ++l, ++r)
783 if (m_mask [i] && *l != *r) return *l < *r;
788 bool operator () (const String &lhs, uint32 rhs) const {
789 const unsigned char *l = (const unsigned char *) lhs.c_str ();
790 const unsigned char *r = m_ptr + rhs + 4;
792 for (size_t i = 0; i < m_len; ++i, ++l, ++r)
793 if (m_mask [i] && *l != *r) return *l < *r;
799 class OffsetCompareByKeyLenAndFreq
801 const unsigned char *m_ptr;
803 OffsetCompareByKeyLenAndFreq (const unsigned char *p) : m_ptr (p) {}
805 bool operator () (uint32 lhs, uint32 rhs) const {
806 const unsigned char *l = m_ptr + lhs;
807 const unsigned char *r = m_ptr + rhs;
809 size_t llen = (*l & 0x3F);
810 size_t rlen = (*r & 0x3F);
814 else if (llen == rlen)
815 return scim_bytestouint16 (l + 2) > scim_bytestouint16 (r + 2);
821 class OffsetGreaterByPhraseLength
823 const unsigned char *m_ptr;
825 OffsetGreaterByPhraseLength (const unsigned char *p) : m_ptr (p) {}
827 bool operator () (uint32 lhs, uint32 rhs) const {
828 const unsigned char *l = m_ptr + lhs + 1;
829 const unsigned char *r = m_ptr + rhs + 1;
834 return scim_bytestouint16 (l + 1) > scim_bytestouint16 (r + 1);
840 GenericTableContent::GenericTableContent ()
841 : m_single_wildcard_char (0),
842 m_multi_wildcard_char (0),
843 m_max_key_length (0),
849 m_content_allocated_size (0),
853 m_offsets_by_phrases_inited (false)
858 GenericTableContent::init (const GenericTableHeader &header)
864 for (i = 0; i < 256; ++ i)
865 m_char_attrs [i] = 0;
867 m_single_wildcard_char = 0;
868 m_multi_wildcard_char = 0;
870 m_max_key_length = std::min (header.get_max_key_length (), (size_t) SCIM_GT_MAX_KEY_LENGTH);
872 if (m_max_key_length) {
873 if (m_offsets) delete [] m_offsets;
874 if (m_offsets_attrs) delete [] m_offsets_attrs;
876 m_offsets = new(std::nothrow) std::vector <uint32> [m_max_key_length];
877 if (!m_offsets) return false;
879 m_offsets_attrs = new(std::nothrow) std::vector <OffsetGroupAttr> [m_max_key_length];
880 if (!m_offsets_attrs) {
885 String chars = header.get_valid_input_chars ();
887 for (i = 0; i < chars.length (); ++ i)
888 m_char_attrs [(size_t) ((unsigned char) chars [i])] = GT_CHAR_ATTR_VALID_CHAR;
890 chars = header.get_key_end_chars ();
891 for (i = 0; i < chars.length (); ++ i)
892 m_char_attrs [(size_t) ((unsigned char) chars [i])] |= GT_CHAR_ATTR_KEY_END_CHAR;
894 set_single_wildcard_chars (header.get_single_wildcard_chars ());
895 set_multi_wildcard_chars (header.get_multi_wildcard_chars ());
902 GenericTableContent::~GenericTableContent ()
905 munmap (m_mmapped_ptr, m_mmapped_size);
906 } else if (m_content) {
911 delete [] m_offsets_attrs;
915 GenericTableContent::set_single_wildcard_chars (const String &single)
917 if (m_max_key_length) {
919 for (i = 0; i < 256; ++ i) {
920 if (is_single_wildcard_char (m_char_attrs [i]))
921 m_char_attrs [i] = 0;
924 m_single_wildcard_char = 0;
926 for (i = 0; i < single.length (); ++ i)
927 if (!m_char_attrs [(size_t) ((unsigned char) single[i])])
928 m_char_attrs [(size_t) ((unsigned char) single[i])] = GT_CHAR_ATTR_SINGLE_WILDCARD;
930 for (i = 0; i < 256; ++ i) {
931 if (m_char_attrs [i] == GT_CHAR_ATTR_SINGLE_WILDCARD) {
932 m_single_wildcard_char = (char) i;
937 //No defined single wildcard char, choose one
938 if (!m_single_wildcard_char) {
939 for (i = 1; i < 256; ++ i) {
940 if (!m_char_attrs [i]) {
941 m_single_wildcard_char = (char) i;
942 m_char_attrs [i] = GT_CHAR_ATTR_SINGLE_WILDCARD;
951 GenericTableContent::set_multi_wildcard_chars (const String &multi)
953 if (m_max_key_length) {
955 for (i = 0; i < 256; ++ i) {
956 if (is_multi_wildcard_char (m_char_attrs [i]))
957 m_char_attrs [i] = 0;
960 m_multi_wildcard_char = 0;
962 for (i = 0; i < multi.length (); ++ i)
963 if (!m_char_attrs [(uint32) multi[i]])
964 m_char_attrs [(uint32) multi[i]] = GT_CHAR_ATTR_MULTI_WILDCARD;
966 for (i = 0; i < 256; ++ i) {
967 if (m_char_attrs [i] == GT_CHAR_ATTR_MULTI_WILDCARD) {
968 m_multi_wildcard_char = (char) i;
973 //No defined multi wildcard char, choose one
974 if (!m_multi_wildcard_char) {
975 for (i = 1; i < 256; ++ i) {
976 if (!m_char_attrs [i]) {
977 m_multi_wildcard_char = (char) i;
978 m_char_attrs [i] = GT_CHAR_ATTR_MULTI_WILDCARD;
987 GenericTableContent::set_max_key_length (size_t max_key_length)
989 if (m_max_key_length && m_offsets && m_offsets_attrs && max_key_length > m_max_key_length) {
990 std::vector<uint32> *offsets;
991 std::vector<OffsetGroupAttr> *offsets_attrs;
993 offsets = new(std::nothrow) std::vector <uint32> [max_key_length];
994 if (!offsets) return;
996 offsets_attrs = new(std::nothrow) std::vector <OffsetGroupAttr> [max_key_length];
997 if (!offsets_attrs) {
1002 for (size_t i = 0; i < m_max_key_length; ++i) {
1003 offsets [i] = m_offsets [i];
1004 offsets_attrs [i] = m_offsets_attrs [i];
1007 delete [] m_offsets;
1008 delete [] m_offsets_attrs;
1010 m_offsets = offsets;
1011 m_offsets_attrs = offsets_attrs;
1012 m_max_key_length = max_key_length;
1017 GenericTableContent::load_text (FILE *fp)
1019 if (!fp || feof (fp) || !m_max_key_length || !m_offsets)
1028 WideString wide_phrase;
1030 std::vector <String> keys;
1031 std::vector <String> phrases;
1032 std::vector <uint32> frequencies;
1035 uint32 lengths_count [SCIM_GT_MAX_KEY_LENGTH];
1041 for (i = 0; i < SCIM_GT_MAX_KEY_LENGTH; ++i)
1042 lengths_count [i] = 0;
1044 if (_get_line (fp) != String ("BEGIN_TABLE"))
1047 while (!feof (fp)) {
1048 temp = _get_line (fp);
1050 if (temp.length () == 0) return false;
1051 if (temp == String ("END_TABLE")) break;
1053 paramstr = _get_param_portion (temp, " \t");
1054 valuestr = _get_value_portion (temp, " \t");
1056 if (paramstr.length () == 0 || valuestr.length () == 0) {
1057 std::cerr << "Invalid line in content: " << temp << "\n";
1061 phrasestr = _get_param_portion (valuestr, " \t");
1062 freqstr = _get_value_portion (valuestr, " \t");
1064 if (phrasestr.length () >= 6 && phrasestr [0] == '0' && tolower (phrasestr[1]) == 'x')
1065 wide_phrase = _hex_to_wide_string (phrasestr);
1067 wide_phrase = utf8_mbstowcs (phrasestr);
1069 if (!wide_phrase.length ()) continue;
1071 if (freqstr.length ())
1072 freq = atoi (freqstr.c_str ());
1076 phrasestr = utf8_wcstombs (wide_phrase);
1077 while (phrasestr.length () > 255) {
1078 wide_phrase.erase (wide_phrase.length () - 1);
1079 phrasestr = utf8_wcstombs (wide_phrase);
1082 if (paramstr.length () > m_max_key_length)
1083 paramstr.erase (m_max_key_length);
1085 if (is_valid_no_wildcard_key (paramstr)) {
1086 keys.push_back (paramstr);
1087 phrases.push_back (phrasestr);
1088 frequencies.push_back (freq);
1089 ++lengths_count [paramstr.length ()];
1093 // Use phrase sequence as frequency to retain the correct order, if the frequency is missing.
1094 for (i = 0; i < frequencies.size (); ++i) {
1095 if (frequencies [i] == (uint32) ~0)
1096 frequencies [i] = lengths_count [keys [i].length ()] --;
1098 if (frequencies [i] > 65535)
1099 frequencies [i] = 65535;
1102 // Calculate the content size.
1103 uint32 content_size = 0;
1104 for (i = 0; i < keys.size (); ++i) {
1105 content_size += keys [i].length ();
1106 content_size += phrases [i].length ();
1110 // The content can not be larger than 2GB
1111 if (content_size >= 0x7FFFFFFF)
1114 m_content = new(std::nothrow) unsigned char [content_size];
1116 if (!m_content) return false;
1118 m_content_allocated_size = content_size;
1119 m_content_size = content_size;
1121 // Store the phrases and build index
1122 unsigned char *p = m_content;
1123 uint32 key_length, phrase_length;
1124 for (i = 0; i < keys.size (); ++i) {
1125 key_length = keys [i].length ();
1126 phrase_length = phrases [i].length ();
1128 m_offsets [key_length - 1].push_back ((uint32) (p - m_content));
1130 *(p++) = static_cast <unsigned char> (0x80 | (key_length & 0x3F));
1131 *(p++) = static_cast <unsigned char> (phrase_length);
1133 scim_uint16tobytes (p, frequencies [i]);
1136 memcpy (p, keys [i].c_str (), key_length);
1139 memcpy (p, phrases [i].c_str (), phrase_length);
1143 sort_all_offsets ();
1149 GenericTableContent::load_binary (FILE *fp, bool mmapped)
1151 if (!fp || feof (fp) || !m_max_key_length || !m_offsets)
1156 if (_get_line (fp) != String ("BEGIN_TABLE"))
1159 unsigned char buff [4];
1160 long cur_pos, end_pos;
1162 if (fread (buff, 4, 1, fp) != 1)
1165 uint32 content_size = scim_bytestouint32 (buff);
1167 if (!content_size || content_size >= 0x7FFFFFFF)
1170 cur_pos = ftell (fp);
1171 fseek (fp, 0, SEEK_END);
1172 end_pos = ftell (fp);
1173 fseek (fp, cur_pos, SEEK_SET);
1175 if (end_pos < content_size)
1179 m_mmapped_ptr = mmap (0, (size_t) end_pos, PROT_READ | PROT_WRITE, MAP_PRIVATE, fileno (fp), 0);
1180 if (m_mmapped_ptr != MAP_FAILED) {
1182 m_mmapped_size = end_pos;
1183 m_content_size = content_size;
1184 m_content = static_cast<unsigned char*>(m_mmapped_ptr) + cur_pos;
1192 // if not mapped, or map failed, then load the content directly.
1194 m_content = new(std::nothrow) unsigned char [content_size];
1196 if (!m_content) return false;
1198 m_content_allocated_size = content_size;
1199 m_content_size = content_size;
1201 if (fread (m_content, m_content_size, 1, fp) != 1) {
1207 // Now create the index
1208 unsigned char *p = m_content;
1210 uint32 phrase_length;
1212 while (p - m_content < m_content_size) {
1213 key_length = static_cast<uint32> ((*p) & 0x3F);
1214 phrase_length = static_cast<uint32> (*(p+1));
1216 if (!key_length || !phrase_length) {
1222 m_offsets [key_length - 1].push_back (static_cast<uint32>(p - m_content));
1224 p += (4 + key_length + phrase_length);
1227 sort_all_offsets ();
1233 GenericTableContent::load_freq_text (FILE *fp)
1235 if (!valid () || !fp || feof (fp)) return false;
1244 if (_get_line (fp) != String ("BEGIN_FREQUENCY_TABLE"))
1247 while (!feof (fp)) {
1248 temp = _get_line (fp);
1250 if (temp.length () == 0) return false;
1251 if (temp == String ("END_FREQUENCY_TABLE")) break;
1253 paramstr = _get_param_portion (temp, " \t");
1254 valuestr = _get_value_portion (temp, " \t");
1256 if (paramstr.length () == 0 || valuestr.length () == 0)
1259 offset = atoi (paramstr.c_str ());
1260 freq = atoi (valuestr.c_str ());
1262 if (!set_phrase_frequency (offset, freq))
1272 GenericTableContent::load_freq_binary (FILE *fp)
1274 if (!valid () || !fp || feof (fp)) return false;
1281 unsigned char buf [8];
1283 if (_get_line (fp) != String ("BEGIN_FREQUENCY_TABLE"))
1286 while (!feof (fp)) {
1287 if (fread (buf, 8, 1, fp) != 1)
1290 offset = scim_bytestouint32 (buf);
1291 freq = scim_bytestouint32 (buf+4);
1293 if (offset == 0xFFFF && freq == 0xFFFF)
1296 if (!set_phrase_frequency (offset, freq))
1306 GenericTableContent::save_text (FILE *fp) const
1308 if (!fp || !valid ())
1312 uint32 phrase_length;
1314 const unsigned char *p;
1316 if (fprintf (fp, "### Begin Table data.\n") < 0 || fprintf (fp, "BEGIN_TABLE\n") < 0)
1319 for (size_t i = 0; i < m_max_key_length; ++i) {
1320 for (std::vector <uint32>::const_iterator it = m_offsets [i].begin (); it != m_offsets [i].end (); ++ it) {
1321 p = m_content + *it;
1323 key_length = static_cast<uint32> ((*(p++) & 0x3F));
1324 phrase_length = static_cast<uint32> (*(p++));
1325 freq = scim_bytestouint16 (p);
1328 if (fwrite (p, key_length, 1, fp) != 1)
1331 if (fputc ('\t', fp) == EOF)
1336 if (fwrite (p, phrase_length, 1, fp) != 1)
1341 if (fputc ('\t', fp) == EOF)
1344 if (fprintf (fp, "%u\n", freq) < 0)
1350 if (fprintf (fp, "END_TABLE\n") < 0)
1359 GenericTableContent::save_binary (FILE *fp) const
1361 if (!fp || !valid ())
1365 uint32 phrase_length;
1366 const unsigned char *p;
1369 unsigned char buf [4];
1370 uint32 content_size = 0;
1372 // Calculate the content size
1373 for (i = 0; i < m_max_key_length; ++i) {
1374 for (std::vector <uint32>::const_iterator it = m_offsets [i].begin (); it != m_offsets [i].end (); ++ it) {
1375 p = m_content + *it;
1377 key_length = static_cast<uint32> ((*(p++) & 0x3F));
1378 phrase_length = static_cast<uint32> (*p);
1379 content_size += (key_length + phrase_length + 4);
1384 if (fprintf (fp, "### Begin Table data.\n") < 0 || fprintf (fp, "BEGIN_TABLE\n") < 0)
1387 scim_uint32tobytes (buf, content_size);
1389 // Write the content size first.
1390 if (fwrite (buf, 4, 1, fp) != 1)
1394 for (i = 0; i < m_max_key_length; ++i) {
1395 for (std::vector <uint32>::const_iterator it = m_offsets [i].begin (); it != m_offsets [i].end (); ++ it) {
1396 p = m_content + *it;
1398 key_length = static_cast<uint32> ((*p) & 0x3F);
1399 phrase_length = static_cast<uint32> (*(p + 1));
1401 if (fwrite (p, 4 + key_length + phrase_length, 1, fp) != 1)
1407 if (fprintf (fp, "END_TABLE\n") < 0)
1416 GenericTableContent::save_freq_text (FILE *fp) const
1418 if (!fp || !valid ()) return false;
1420 if (fprintf (fp, "### Begin Frequency data.\n") < 0 || fprintf (fp, "BEGIN_FREQUENCY_TABLE\n") < 0)
1423 const unsigned char *p;
1426 for (size_t i = 0; i < m_max_key_length; ++i) {
1427 for (std::vector <uint32>::const_iterator it = m_offsets [i].begin (); it != m_offsets [i].end (); ++ it) {
1428 p = m_content + *it;
1430 // Test if the phrase is valid and modified (0x80+0x40).
1431 if (((*p) & 0xC0) == 0xC0) {
1432 freq = scim_bytestouint16 (p+2);
1434 if (fprintf (fp, "%u\t%u\n", *it, freq) < 0)
1440 if (fprintf (fp, "END_FREQUENCY_TABLE\n") < 0)
1449 GenericTableContent::save_freq_binary (FILE *fp) const
1451 if (!fp || !valid ()) return false;
1453 if (fprintf (fp, "### Begin Frequency Table data.\n") < 0 || fprintf (fp, "BEGIN_FREQUENCY_TABLE\n") < 0)
1456 const unsigned char *p;
1457 unsigned char buf [8];
1459 for (size_t i = 0; i < m_max_key_length; ++i) {
1460 for (std::vector <uint32>::const_iterator it = m_offsets [i].begin (); it != m_offsets [i].end (); ++ it) {
1461 p = m_content + *it;
1463 // Test if the phrase is valid and modified (0x80+0x40).
1464 if (((*p) & 0xC0) == 0xC0) {
1465 scim_uint32tobytes (buf, *it); // Offset
1466 scim_uint32tobytes (buf+4, scim_bytestouint16 (p+2)); // Frequency
1468 if (fwrite (buf, 8, 1, fp) != 1)
1474 scim_uint32tobytes (buf, 0xFFFF);
1475 scim_uint32tobytes (buf+4, 0xFFFF);
1477 if (fwrite (buf, 8, 1, fp) != 1)
1480 if (fprintf (fp, "END_FREQUENCY_TABLE\n") < 0)
1489 GenericTableContent::valid () const
1491 return m_content && m_content_size && m_offsets && m_offsets_attrs && m_max_key_length;
1495 GenericTableContent::is_valid_key (const String & key) const
1497 int multi_wildcard_count = 0;
1499 if (key.length () > m_max_key_length)
1502 for (String::const_iterator i = key.begin (); i != key.end (); ++ i) {
1503 if (!is_defined_char (*i))
1505 else if (is_multi_wildcard_char (*i))
1506 multi_wildcard_count ++;
1509 return multi_wildcard_count < 2;
1513 GenericTableContent::is_wildcard_key (const String & key) const
1515 for (String::const_iterator i = key.begin (); i != key.end (); ++ i) {
1516 if (is_wildcard_char (*i))
1524 GenericTableContent::is_pure_wildcard_key (const String & key) const
1526 for (String::const_iterator i = key.begin (); i != key.end (); ++ i) {
1527 if (!is_wildcard_char (*i))
1535 GenericTableContent::is_valid_no_wildcard_key (const String & key) const
1537 if (key.length () > m_max_key_length)
1540 for (String::const_iterator i = key.begin (); i != key.end (); ++ i)
1541 if (is_wildcard_char (*i) || !is_valid_char (*i))
1548 GenericTableContent::clear ()
1551 munmap (m_mmapped_ptr, m_mmapped_size);
1552 } else if (m_content) {
1553 delete [] m_content;
1558 m_content_allocated_size = 0;
1565 for (size_t i = 0; i < m_max_key_length; ++ i)
1566 m_offsets [i].clear ();
1569 if (m_offsets_attrs) {
1570 for (size_t i = 0; i < m_max_key_length; ++ i)
1571 m_offsets_attrs [i].clear ();
1576 GenericTableContent::expand_content_space (uint32 add)
1578 bool result = false;
1580 if (m_content_allocated_size - m_content_size < add) {
1581 uint32 new_size = m_content_size * 2 + 1;
1583 while (new_size - m_content_size < add)
1584 new_size = new_size * 2;
1586 unsigned char *new_space = new (std::nothrow) unsigned char [new_size];
1589 m_content_allocated_size = new_size;
1591 memcpy (new_space, m_content, m_content_size);
1592 delete [] m_content;
1594 m_content = new_space;
1606 GenericTableContent::sort_all_offsets ()
1609 for (size_t i = 0; i < m_max_key_length; ++i)
1610 std::stable_sort (m_offsets [i].begin (),
1611 m_offsets [i].end (),
1612 OffsetLessByKeyFixedLen (m_content, i + 1));
1613 init_all_offsets_attrs ();
1618 GenericTableContent::init_all_offsets_attrs ()
1620 for (size_t i = 1; i <= m_max_key_length; ++i)
1621 init_offsets_attrs (i);
1625 GenericTableContent::init_offsets_attrs (size_t len)
1627 if (valid () && len && len <= m_max_key_length) {
1631 m_offsets_attrs [len].clear ();
1633 std::vector <uint32>::const_iterator i;
1636 OffsetGroupAttr attr (len+1);
1640 wildcard.insert (wildcard.begin (), len+1, m_single_wildcard_char);
1642 // Set the wildcard chars
1643 attr.mask.set (wildcard);
1645 for (i = m_offsets [len].begin (); i != m_offsets [len].end (); ++ i) {
1646 attr.mask.set (get_key (*i));
1647 if (++count == OFFSET_GROUP_SIZE) {
1648 attr.end = i - m_offsets [len].begin () + 1;
1649 m_offsets_attrs [len].push_back (attr);
1651 attr.begin = attr.end;
1653 attr.mask.set (wildcard);
1658 attr.end = i - m_offsets [len].begin ();
1659 m_offsets_attrs [len].push_back (attr);
1662 // fprintf (stderr, "%d Groups for len %d\n", m_offsets_attrs [len].size (), len + 1);
1667 GenericTableContent::expand_multi_wildcard_key (std::vector <String> &keys, const String &key) const
1671 String::const_iterator begin1 = key.begin ();
1672 String::const_iterator begin2 = key.begin ();
1673 String::const_iterator end1 = key.end ();
1674 String::const_iterator end2 = key.end ();
1676 for (; begin2 != end2; ++ begin2)
1677 if (is_multi_wildcard_char (*begin2))
1680 // No multi wildcard char
1681 if (begin2 == end2) {
1682 keys.push_back (key);
1686 String wildcard (&m_single_wildcard_char, 1);
1687 uint32 remain = m_max_key_length - key.length ();
1692 keys.push_back (String (begin1, end1) + wildcard + String (begin2, end2));
1694 for (; remain; -- remain) {
1695 wildcard.push_back (m_single_wildcard_char);
1696 keys.push_back (String (begin1, end1) + wildcard + String (begin2, end2));
1701 GenericTableContent::transform_single_wildcard (String &key) const
1703 bool result = false;
1704 for (String::iterator i = key.begin (); i != key.end (); ++i) {
1705 if (is_single_wildcard_char (*i)) {
1706 *i = m_single_wildcard_char;
1714 GenericTableContent::find_no_wildcard_key (std::vector <uint32> &offsets, const String &key, size_t len) const
1716 size_t key_len = key.length () - 1;
1718 size_t old_size = offsets.size ();
1720 if (!len) len = key_len;
1724 OffsetLessByKeyFixedLen find_op (m_content, key_len + 1);
1725 OffsetLessByKeyFixedLen sort_op (m_content, len + 1);
1727 std::vector <OffsetGroupAttr>::iterator i = m_offsets_attrs [len].begin ();
1728 for (; i != m_offsets_attrs [len].end (); ++ i) {
1729 if (i->mask.check (key)) {
1731 std::stable_sort (m_offsets [len].begin () + i->begin,
1732 m_offsets [len].begin () + i->end,
1737 std::vector<uint32>::const_iterator lb, ub;
1738 lb = std::lower_bound (m_offsets [len].begin () + i->begin,
1739 m_offsets [len].begin () + i->end,
1742 ub = std::upper_bound (m_offsets [len].begin () + i->begin,
1743 m_offsets [len].begin () + i->end,
1746 offsets.insert (offsets.end (), lb, ub);
1751 return offsets.size () > old_size;
1755 GenericTableContent::find_wildcard_key (std::vector <uint32> &offsets, const String &key) const
1757 size_t idx = key.length () - 1;
1759 size_t old_size = offsets.size ();
1762 std::vector <OffsetGroupAttr>::iterator i = m_offsets_attrs [idx].begin ();
1763 OffsetLessByKeyFixedLenMask less_op (m_content, idx+1, key, m_single_wildcard_char);
1765 for (; i != m_offsets_attrs [idx].end (); ++ i) {
1766 if (i->mask.check (key)) {
1768 std::stable_sort (m_offsets [idx].begin () + i->begin,
1769 m_offsets [idx].begin () + i->end,
1772 std::vector<uint32>::const_iterator lb, ub;
1773 lb = std::lower_bound (m_offsets [idx].begin () + i->begin,
1774 m_offsets [idx].begin () + i->end,
1777 ub = std::upper_bound (m_offsets [idx].begin () + i->begin,
1778 m_offsets [idx].begin () + i->end,
1781 offsets.insert (offsets.end (), lb, ub);
1786 return offsets.size () > old_size;
1790 GenericTableContent::find (std::vector <uint32> &offsets,
1794 bool sort_by_length) const
1796 if (!valid () || key.length () > m_max_key_length) return false;
1800 transform_single_wildcard (nkey);
1802 size_t old_size = offsets.size ();
1804 std::vector <uint32>::size_type begin = offsets.size ();
1806 if (!is_wildcard_key (nkey)) {
1807 find_no_wildcard_key (offsets, nkey);
1809 if (auto_wildcard) {
1810 for (size_t len = nkey.length () + 1; len <= m_max_key_length; ++ len)
1811 find_no_wildcard_key (offsets, nkey, len);
1815 std::vector <String> nkeys;
1817 expand_multi_wildcard_key (nkeys, nkey);
1819 for (std::vector <String>::iterator i = nkeys.begin (); i != nkeys.end (); ++i) {
1820 // A pure wildcard key, copy all offsets with equal length directly.
1821 if (is_pure_wildcard_key (*i)) {
1822 offsets.insert (offsets.end (),
1823 m_offsets [i->length () - 1].begin (),
1824 m_offsets [i->length () - 1].end ());
1826 find_wildcard_key (offsets, *i);
1831 // Sort all matched offsets by phrase length.
1834 std::stable_sort (offsets.begin () + begin,
1836 OffsetGreaterByPhraseLength (m_content));
1838 std::stable_sort (offsets.begin () + begin,
1840 OffsetCompareByKeyLenAndFreq (m_content));
1843 return offsets.size () > old_size;
1847 GenericTableContent::search (const String &key, int search_type) const
1849 if (!valid () || key.length () > m_max_key_length ||
1850 (key.length () == m_max_key_length && search_type == GT_SEARCH_ONLY_LONGER))
1855 transform_single_wildcard (nkey);
1857 if (!is_wildcard_key (nkey)) {
1858 if (search_type != GT_SEARCH_ONLY_LONGER && search_no_wildcard_key (nkey))
1861 if (search_type != GT_SEARCH_NO_LONGER) {
1862 for (size_t len = nkey.length () + 1; len <= m_max_key_length; ++ len)
1863 if (search_no_wildcard_key (nkey, len))
1867 std::vector <String> nkeys;
1869 expand_multi_wildcard_key (nkeys, nkey);
1871 // Single wildcard key and need search longer
1872 if (search_type != GT_SEARCH_NO_LONGER &&
1873 nkey.length () < m_max_key_length &&
1874 nkeys.size () == 1) {
1875 nkey.push_back (m_multi_wildcard_char);
1877 expand_multi_wildcard_key (nkeys, nkey);
1879 if (search_type == GT_SEARCH_INCLUDE_LONGER)
1880 nkeys.push_back (nkey);
1883 // It's multi wildcard key and only search longer
1884 else if (nkeys.size () > 1 && GT_SEARCH_ONLY_LONGER) {
1885 for (size_t i = 0; i < nkeys.size (); ++i)
1886 if (nkeys [i].length () < m_max_key_length)
1887 nkeys [i].push_back (m_single_wildcard_char);
1890 for (std::vector <String>::iterator i = nkeys.begin (); i != nkeys.end (); ++i) {
1891 // A pure wildcard key, copy all offsets with equal length directly.
1892 if (is_pure_wildcard_key (*i) && m_offsets [i->length () - 1].size ())
1894 else if (search_wildcard_key (*i))
1903 GenericTableContent::search_no_wildcard_key (const String &key, size_t len) const
1905 size_t key_len = key.length () - 1;
1907 if (!len) len = key_len;
1911 OffsetLessByKeyFixedLen find_op (m_content, key_len + 1);
1912 OffsetLessByKeyFixedLen sort_op (m_content, len + 1);
1914 std::vector <OffsetGroupAttr>::iterator i = m_offsets_attrs [len].begin ();
1915 for (; i != m_offsets_attrs [len].end (); ++ i) {
1916 if (i->mask.check (key)) {
1918 std::stable_sort (m_offsets [len].begin () + i->begin,
1919 m_offsets [len].begin () + i->end,
1924 if (std::binary_search (m_offsets [len].begin () + i->begin,
1925 m_offsets [len].begin () + i->end,
1937 GenericTableContent::search_wildcard_key (const String &key) const
1939 size_t idx = key.length () - 1;
1942 std::vector <OffsetGroupAttr>::iterator i = m_offsets_attrs [idx].begin ();
1943 OffsetLessByKeyFixedLenMask less_op (m_content, idx+1, key, m_single_wildcard_char);
1945 for (; i != m_offsets_attrs [idx].end (); ++ i) {
1946 if (i->mask.check (key)) {
1948 std::stable_sort (m_offsets [idx].begin () + i->begin,
1949 m_offsets [idx].begin () + i->end,
1952 if (std::binary_search (m_offsets [idx].begin () + i->begin,
1953 m_offsets [idx].begin () + i->end,
1965 GenericTableContent::search_phrase (const String &key, const WideString &phrase) const
1967 if (!valid () || key.length () > m_max_key_length) return false;
1969 if (!is_wildcard_key (key) && phrase.length ()) {
1970 std::vector <uint32> offsets;
1971 if (find_no_wildcard_key (offsets, key)) {
1972 String utf8_phrase = utf8_wcstombs (phrase);
1973 OffsetLessByPhrase op (m_content);
1975 std::sort (offsets.begin (), offsets.end (), op);
1976 return std::binary_search (offsets.begin (), offsets.end (), utf8_phrase, op);
1983 GenericTableContent::delete_phrase (uint32 offset)
1985 size_t len = get_key_length (offset);
1986 if (!m_mmapped && len && len <= m_max_key_length) {
1987 *(m_content + offset) &= 0x7F;
1988 std::stable_sort (m_offsets [len - 1].begin (),
1989 m_offsets [len - 1].end ());
1991 std::vector <uint32>::iterator lb, ub;
1993 lb = std::lower_bound (m_offsets [len - 1].begin (),
1994 m_offsets [len - 1].end (),
1996 ub = std::upper_bound (m_offsets [len - 1].begin (),
1997 m_offsets [len - 1].end (),
2000 m_offsets [len - 1].erase (lb);
2002 std::stable_sort (m_offsets [len - 1].begin (),
2003 m_offsets [len - 1].end (),
2004 OffsetLessByKeyFixedLen (m_content, len));
2006 init_offsets_attrs (len);
2013 std::stable_sort (m_offsets [len - 1].begin (),
2014 m_offsets [len - 1].end (),
2015 OffsetLessByKeyFixedLen (m_content, len));
2021 GenericTableContent::add_phrase (const String &key, const WideString &phrase, int freq)
2023 if (!m_mmapped && m_offsets &&
2024 is_valid_no_wildcard_key (key) && phrase.length () &&
2025 !search_phrase (key, phrase)) {
2027 String utf8_phrase = utf8_wcstombs (phrase);
2028 size_t key_len = key.length ();
2029 size_t phrase_len = utf8_phrase.length ();
2030 size_t add_size = key_len + phrase_len + 4;
2032 if (phrase_len <= 255 && expand_content_space (add_size)) {
2033 unsigned char *ptr = m_content + m_content_size;
2035 if (freq > 0xFFFF) freq = 0xFFFF;
2037 // Add the phrase content
2038 *(ptr++) = (unsigned char) (0x80 | (key_len & 0x3F));
2039 *(ptr++) = (unsigned char) phrase_len;
2040 scim_uint16tobytes (ptr, (uint16) freq);
2042 memcpy (ptr, key.c_str (), key_len);
2044 memcpy (ptr, utf8_phrase.c_str (), phrase_len);
2046 // Added the offset.
2047 m_offsets [key_len - 1].push_back (m_content_size);
2049 std::stable_sort (m_offsets [key_len - 1].begin (),
2050 m_offsets [key_len - 1].end (),
2051 OffsetLessByKeyFixedLen (m_content, key_len));
2053 m_content_size += add_size;
2055 init_offsets_attrs (key_len);
2057 if (m_offsets_by_phrases_inited)
2058 init_offsets_by_phrases ();
2069 GenericTableContent::init_offsets_by_phrases () const
2071 if (!valid ()) return;
2073 m_offsets_by_phrases.clear ();
2075 for (int i = 0; i < m_max_key_length; ++i) {
2076 m_offsets_by_phrases.insert (m_offsets_by_phrases.end (),
2077 m_offsets [i].begin (),
2078 m_offsets [i].end ());
2081 std::stable_sort (m_offsets_by_phrases.begin (), m_offsets_by_phrases.end (), OffsetLessByPhrase (m_content));
2083 m_offsets_by_phrases_inited = true;
2087 GenericTableContent::find_phrase (std::vector <uint32> &offsets, const WideString &phrase) const
2089 if (!valid ()) return false;
2091 if (!m_offsets_by_phrases_inited)
2092 init_offsets_by_phrases ();
2094 String utf8 = utf8_wcstombs (phrase);
2096 if (!utf8.length ()) return false;
2098 std::vector <uint32>::const_iterator lb, ub;
2100 lb = std::lower_bound (m_offsets_by_phrases.begin (),
2101 m_offsets_by_phrases.end (),
2103 OffsetLessByPhrase (m_content));
2104 ub = std::upper_bound (m_offsets_by_phrases.begin (),
2105 m_offsets_by_phrases.end (),
2107 OffsetLessByPhrase (m_content));
2109 offsets.insert (offsets.end (), lb, ub);
2115 GenericTableContent::get_max_phrase_length () const
2117 if (!valid ()) return 0;
2121 for (int i = 0; i < m_max_key_length; ++i) {
2122 for (std::vector <uint32>::const_iterator j = m_offsets [i].begin (); j != m_offsets [i].end (); ++j)
2123 if (get_phrase_length (*j) > max_len)
2124 max_len = get_phrase_length (*j);
2130 // Implementation of GenericTableLibrary
2131 GenericTableLibrary::GenericTableLibrary ()
2132 : m_header_loaded (false),
2133 m_content_loaded (false)
2137 static const char scim_generic_table_phrase_lib_text_header [] = "SCIM_Generic_Table_Phrase_Library_TEXT";
2138 static const char scim_generic_table_phrase_lib_binary_header [] = "SCIM_Generic_Table_Phrase_Library_BINARY";
2139 static const char scim_generic_table_phrase_lib_version [] = "VERSION_1_0";
2141 static const char scim_generic_table_freq_lib_text_header [] = "SCIM_Generic_Table_Frequency_Library_TEXT";
2142 static const char scim_generic_table_freq_lib_binary_header [] = "SCIM_Generic_Table_Frequency_Library_BINARY";
2143 static const char scim_generic_table_freq_lib_version [] = "VERSION_1_0";
2146 GenericTableLibrary::init (const String &sys, const String &usr, const String &freq, bool all)
2148 // Can only be initialized one time.
2149 if (m_header_loaded || m_content_loaded) return false;
2152 if (!sys.length () && !usr.length ()) return false;
2158 bool ok = load_header ();
2161 ok = load_content ();
2167 GenericTableLibrary::load_header ()
2169 if (m_header_loaded) return true;
2173 if (m_sys_file.length ()) fp = fopen (m_sys_file.c_str (), "rb");
2174 else if (m_usr_file.length ()) fp = fopen (m_usr_file.c_str (), "rb");
2176 if (!fp) return false;
2181 GenericTableHeader header;
2184 bool binary = false;
2186 magic = _get_line (fp);
2187 version = _get_line (fp);
2189 if (version == String (scim_generic_table_phrase_lib_version) &&
2190 (magic == String (scim_generic_table_phrase_lib_text_header) ||
2191 magic == String (scim_generic_table_phrase_lib_binary_header)))
2195 ok = header.load (fp);
2198 ok = m_sys_content.init (header);
2201 ok = m_usr_content.init (header);
2205 m_header_loaded = true;
2213 GenericTableLibrary::load_content () const
2215 if (m_content_loaded) return true;
2217 if (!m_header_loaded) return false;
2219 FILE *sys_fp = 0, *usr_fp = 0, *freq_fp = 0;
2221 if (m_sys_file.length ()) sys_fp = fopen (m_sys_file.c_str (), "rb");
2222 if (m_usr_file.length ()) usr_fp = fopen (m_usr_file.c_str (), "rb");
2223 if (m_freq_file.length ()) freq_fp = fopen (m_freq_file.c_str (), "rb");
2231 bool sys_loaded = false;
2232 bool usr_loaded = false;
2234 GenericTableHeader header;
2237 // Load system table.
2238 magic = _get_line (sys_fp);
2239 version = _get_line (sys_fp);
2241 if (version == String (scim_generic_table_phrase_lib_version)) {
2242 if (magic == String (scim_generic_table_phrase_lib_text_header)) {
2245 } else if (magic == String (scim_generic_table_phrase_lib_binary_header)) {
2252 ok = header.load (sys_fp);
2255 ok = (header.get_uuid () == m_header.get_uuid () &&
2256 header.get_serial_number () == m_header.get_serial_number ());
2260 ok = m_sys_content.load_binary (sys_fp, true);
2262 ok = m_sys_content.load_text (sys_fp);
2274 magic = _get_line (usr_fp);
2275 version = _get_line (usr_fp);
2277 if (version == String (scim_generic_table_phrase_lib_version)) {
2278 if (magic == String (scim_generic_table_phrase_lib_text_header)) {
2281 } else if (magic == String (scim_generic_table_phrase_lib_binary_header)) {
2288 ok = header.load (usr_fp);
2291 ok = (header.get_uuid () == m_header.get_uuid () &&
2292 header.get_serial_number () == m_header.get_serial_number ());
2296 ok = m_usr_content.load_binary (usr_fp, false);
2298 ok = m_usr_content.load_text (usr_fp);
2306 if (sys_loaded && freq_fp) {
2310 magic = _get_line (freq_fp);
2311 version = _get_line (freq_fp);
2313 if (version == String (scim_generic_table_freq_lib_version)) {
2314 if (magic == String (scim_generic_table_freq_lib_text_header)) {
2317 } else if (magic == String (scim_generic_table_freq_lib_binary_header)) {
2324 ok = header.load (freq_fp);
2327 ok = (header.get_uuid () == m_header.get_uuid () &&
2328 header.get_serial_number () == m_header.get_serial_number ());
2332 ok = m_sys_content.load_freq_binary (freq_fp);
2334 ok = m_sys_content.load_freq_text (freq_fp);
2340 m_content_loaded = (sys_loaded || usr_loaded);
2342 return m_content_loaded;
2346 GenericTableLibrary::save (const String &sys, const String &usr, const String &freq, bool binary)
2348 if (!load_content ()) return false;
2350 FILE *sys_fp = 0, *usr_fp = 0, *freq_fp = 0;
2351 bool sys_saved = false;
2352 bool usr_saved = false;
2353 bool freq_saved = false;
2356 // First unlink the table files to avoid crashing the exsiting scim processes
2357 // which are mapping the same files.
2358 if (sys.length ()) unlink (sys.c_str ());
2359 if (usr.length ()) unlink (usr.c_str ());
2360 if (freq.length ()) unlink (freq.c_str ());
2362 if (sys.length () && m_sys_content.valid ())
2363 sys_fp = fopen (sys.c_str (), "wb");
2365 if (usr.length () && m_usr_content.valid ())
2366 usr_fp = fopen (usr.c_str (), "wb");
2368 if (freq.length () && m_sys_content.updated ())
2369 freq_fp = fopen (freq.c_str (), "wb");
2372 ok = (fprintf (sys_fp, "%s\n%s\n",
2374 scim_generic_table_phrase_lib_binary_header :
2375 scim_generic_table_phrase_lib_text_header),
2376 scim_generic_table_phrase_lib_version) > 0);
2379 ok = m_header.save (sys_fp);
2383 ok = m_sys_content.save_binary (sys_fp);
2385 ok = m_sys_content.save_text (sys_fp);
2394 ok = (fprintf (usr_fp, "%s\n%s\n",
2396 scim_generic_table_phrase_lib_binary_header :
2397 scim_generic_table_phrase_lib_text_header),
2398 scim_generic_table_phrase_lib_version) > 0);
2401 ok = m_header.save (usr_fp);
2405 ok = m_usr_content.save_binary (usr_fp);
2407 ok = m_usr_content.save_text (usr_fp);
2416 ok = (fprintf (freq_fp, "%s\n%s\n",
2418 scim_generic_table_freq_lib_binary_header :
2419 scim_generic_table_freq_lib_text_header),
2420 scim_generic_table_freq_lib_version) > 0);
2423 ok = m_header.save (freq_fp);
2427 ok = m_sys_content.save_freq_binary (freq_fp);
2429 ok = m_sys_content.save_freq_text (freq_fp);
2437 return sys_saved || usr_saved || freq_saved;
2440 class IndexCompareByKeyLenAndFreqInLibrary
2442 const GenericTableLibrary *m_lib;
2444 IndexCompareByKeyLenAndFreqInLibrary (const GenericTableLibrary *p) : m_lib (p) {}
2446 bool operator () (uint32 lhs, uint32 rhs) const {
2447 size_t llen = m_lib->get_key_length (lhs);
2448 size_t rlen = m_lib->get_key_length (rhs);
2452 else if (llen == rlen)
2453 return m_lib->get_phrase_frequency (lhs) > m_lib->get_phrase_frequency (rhs);
2459 class IndexGreaterByPhraseLengthInLibrary
2461 const GenericTableLibrary *m_lib;
2463 IndexGreaterByPhraseLengthInLibrary (const GenericTableLibrary *p) : m_lib (p) {}
2465 bool operator () (uint32 lhs, uint32 rhs) const {
2466 size_t llen = m_lib->get_phrase_length (lhs);
2467 size_t rlen = m_lib->get_phrase_length (rhs);
2471 else if (llen == rlen)
2472 return m_lib->get_phrase_frequency (lhs) > m_lib->get_phrase_frequency (rhs);
2479 GenericTableLibrary::find (std::vector <uint32> &indexes,
2482 bool sort_by_length) const
2486 if (!load_content ()) return false;
2488 if (m_usr_content.valid ()) {
2489 m_usr_content.find (indexes, key, is_auto_wildcard (), user_first, sort_by_length);
2491 for (std::vector <uint32>::iterator i = indexes.begin (); i != indexes.end (); ++i)
2495 if (m_sys_content.valid ()) {
2496 m_sys_content.find (indexes, key, is_auto_wildcard (), user_first, sort_by_length);
2501 std::stable_sort (indexes.begin (),
2503 IndexGreaterByPhraseLengthInLibrary (this));
2505 std::stable_sort (indexes.begin (),
2507 IndexCompareByKeyLenAndFreqInLibrary (this));
2510 return indexes.size () > 0;
2514 GenericTableLibrary::find_phrase (std::vector <uint32> &indexes, const WideString &phrase) const
2518 if (!load_content ()) return false;
2520 if (m_usr_content.valid ()) {
2521 m_usr_content.find_phrase (indexes, phrase);
2523 for (std::vector <uint32>::iterator i = indexes.begin (); i != indexes.end (); ++i)
2527 if (m_sys_content.valid ()) {
2528 m_sys_content.find_phrase (indexes, phrase);
2531 return indexes.size () > 0;
2535 vi:ts=4:nowrap:ai:expandtab