3 # make-uniuni -- script for creating the file uniuni.cpp
5 # Copyright (C) 2005-2018 Free Software Foundation, Inc.
6 # Written by Werner Lemberg <wl@gnu.org>
8 # This file is part of groff.
10 # groff is free software; you can redistribute it and/or modify it under
11 # the terms of the GNU General Public License as published by the Free
12 # Software Foundation, either version 3 of the License, or
13 # (at your option) any later version.
15 # groff is distributed in the hope that it will be useful, but WITHOUT ANY
16 # WARRANTY; without even the implied warranty of MERCHANTABILITY or
17 # FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
20 # You should have received a copy of the GNU General Public License
21 # along with this program. If not, see <http://www.gnu.org/licenses/>.
26 # make-uniuni <version-string> < UnicodeData.txt > uniuni.cpp
28 # 'UnicodeData.txt' is the central database file from the Unicode standard.
29 # Unfortunately, it doesn't contain a version number which must be thus
30 # provided manually as a parameter to the filter.
32 # This program needs a C preprocessor.
39 if test $# -ne 1; then
40 echo "usage: $0 <version-string> < UnicodeData.txt > uniuni.cpp"
46 # Remove ranges and control characters,
47 # then extract the decomposition field,
48 # then remove lines without decomposition,
49 # then remove all compatibility decompositions.
50 sed -e '/^[^;]*;</d' \
51 | sed -e 's/;[^;]*;[^;]*;[^;]*;[^;]*;\([^;]*\);.*$/;\1/' \
52 | sed -e '/^[^;]*;$/d' \
53 | sed -e '/^[^;]*;</d' > $$1
55 # Prepare input for running cpp.
57 | sed -e 's/^\([^;]*\);/#define \1 /' \
60 | sed -e 's/^\([^;]*\);.*$/\1 u\1/' >> $$2
62 # Run C preprocessor to recursively decompose.
65 # Convert it back to original format.
72 -e 's/^\([^ ]*\) /\1;/' > $$4
77 /* Copyright (C) 2002-2014 Free Software Foundation, Inc.
78 Written by Werner Lemberg <wl@gnu.org>
80 This file is part of groff.
82 groff is free software; you can redistribute it and/or modify it under
83 the terms of the GNU General Public License as published by the Free
84 Software Foundation, either version 3 of the License, or
85 (at your option) any later version.
87 groff is distributed in the hope that it will be useful, but WITHOUT ANY
88 WARRANTY; without even the implied warranty of MERCHANTABILITY or
89 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
92 You should have received a copy of the GNU General Public License
93 along with this program. If not, see <http://www.gnu.org/licenses/>. */
95 // This code has been algorithmically derived from the file
96 // UnicodeData.txt, version $version_string, available from unicode.org,
97 // on `date '+%Y-%m-%d'`.
100 #include "stringclass.h"
105 struct unicode_decompose {
109 declare_ptable(unicode_decompose)
110 implement_ptable(unicode_decompose)
112 PTABLE(unicode_decompose) unicode_decompose_table;
114 // the first digit in the composite string gives the number of composites
119 } unicode_decompose_list[] = {
125 -e 's/\(.*\);\(.*_.*_.*_.*\)$/ { "\1", "4\2" },/' \
126 -e 's/\(.*\);\(.*_.*_.*\)$/ { "\1", "3\2" },/' \
127 -e 's/\(.*\);\(.*_.*\)$/ { "\1", "2\2" },/' \
128 -e 's/\(.*\);\(.*\)$/ { "\1", "1\2" },/'
134 // global constructor
136 static struct unicode_decompose_init {
137 unicode_decompose_init();
138 } _unicode_decompose_init;
140 unicode_decompose_init::unicode_decompose_init()
142 for (unsigned int i = 0;
143 i < sizeof(unicode_decompose_list)/sizeof(unicode_decompose_list[0]);
145 unicode_decompose *dec = new unicode_decompose[1];
146 dec->value = (char *)unicode_decompose_list[i].value;
147 unicode_decompose_table.define(unicode_decompose_list[i].key, dec);
151 const char *decompose_unicode(const char *s)
153 unicode_decompose *result = unicode_decompose_table.lookup(s);
154 return result ? result->value : 0;
159 # Remove temporary files.