1 Index: source/data/brkitr/word.txt
2 ===================================================================
3 --- source/data/brkitr/word.txt (revision 264859)
4 +++ source/data/brkitr/word.txt (working copy)
6 # 5.0 or later as the definition of Complex_Context was corrected to include all
7 # characters requiring dictionary break.
9 -$Control = [\p{Grapheme_Cluster_Break = Control}];
10 +$Control = [\p{Grapheme_Cluster_Break = Control}];
11 $HangulSyllable = [\uac00-\ud7a3];
12 $ComplexContext = [:LineBreak = Complex_Context:];
13 $KanaKanji = [$Han $Hiragana $Katakana];
14 -$dictionaryCJK = [$KanaKanji $HangulSyllable];
15 -$dictionary = [$ComplexContext $dictionaryCJK];
16 +$dictionary = [$ComplexContext];
18 -# leave CJK scripts out of ALetterPlus
19 -$ALetterPlus = [$ALetter-$dictionaryCJK [$ComplexContext-$Extend-$Control]];
20 +$ALetterPlus = [$ALetter [$ComplexContext-$Extend-$Control]];
26 $Regional_IndicatorEx $Regional_IndicatorEx;
28 -# special handling for CJK characters: chain for later dictionary segmentation
29 -$HangulSyllable $HangulSyllable {200};
30 -$KanaKanji $KanaKanji {400}; # different rule status if both kana and kanji found
33 ## -------------------------------------------------
38 $BackRegional_IndicatorEx $BackRegional_IndicatorEx;
40 -# special handling for CJK characters: chain for later dictionary segmentation
41 -$HangulSyllable $HangulSyllable;
42 -$KanaKanji $KanaKanji; #different rule status if both kanji and kana found
44 ## -------------------------------------------------
47 Index: source/data/brkitr/brklocal.mk
48 ===================================================================
49 --- source/data/brkitr/brklocal.mk (revision 264859)
50 +++ source/data/brkitr/brklocal.mk (working copy)
54 # List of dictionary files (dict).
55 -BRK_DICT_SOURCE = cjdict.txt khmerdict.txt laodict.txt thaidict.txt
56 +BRK_DICT_SOURCE = khmerdict.txt laodict.txt thaidict.txt
59 # List of break iterator files (brk).
60 -BRK_SOURCE = char.txt line.txt line_fi.txt sent.txt sent_el.txt title.txt word.txt
61 +BRK_SOURCE = char.txt line.txt line_fi.txt sent.txt sent_el.txt title.txt word.txt word_ja.txt
65 -BRK_RES_SOURCE = el.txt en.txt en_US.txt fi.txt
66 +BRK_RES_SOURCE = el.txt en.txt en_US.txt fi.txt ja.txt
68 Index: source/data/brkitr/root.txt
69 ===================================================================
70 --- source/data/brkitr/root.txt (revision 264859)
71 +++ source/data/brkitr/root.txt (working copy)
73 word:process(dependency){"word.brk"}
76 - Hani:process(dependency){"cjdict.dict"}
77 - Hira:process(dependency){"cjdict.dict"}
78 - Kata:process(dependency){"cjdict.dict"}
79 Khmr:process(dependency){"khmerdict.dict"}
80 Laoo:process(dependency){"laodict.dict"}
81 Thai:process(dependency){"thaidict.dict"}
82 Index: source/data/brkitr/ja.txt
83 ===================================================================
84 --- source/data/brkitr/ja.txt (revision 264859)
85 +++ source/data/brkitr/ja.txt (working copy)
90 - line:process(dependency){"line_ja.brk"}
91 + word:process(dependency){"word_ja.brk"}