2ceedebb60afc4d8f1cef3c94b802caba81526c4
[profile/ivi/navit.git] / navit / navit / linguistics.c
1 #include <string.h>
2 #include <stdio.h>
3 #include <glib.h>
4 #include "debug.h"
5 #include "linguistics.h"
6
7 static const char *special[][3]={
8 /* Capital Diacritics */
9 /* ¨ Diaresis */
10 {"Ä","A","AE"},
11 {"Ë","E"},
12 {"Ï","I"},
13 {"Ö","O","OE"},
14 {"Ü","U","UE"},
15 {"Ÿ","Y"},
16 /* ˝ Double Acute Accent */
17 {"Ő","O","Ö"},
18 {"Ű","U","Ü"},
19 /* ´ Acute Accent */
20 {"Á","A"},
21 {"Ć","C"},
22 {"É","E"},
23 {"Í","I"},
24 {"Ĺ","L"},
25 {"Ń","N"},
26 {"Ó","O"},
27 {"Ŕ","R"},
28 {"Ś","S"},
29 {"Ú","U"},
30 {"Ý","Y"},
31 {"Ź","Z"},
32 /* ˛ Ogonek (nosinė) */
33 {"Ą","A"},
34 {"Ę","E"},
35 {"Į","I"},
36 {"Ų","U"},
37 /* ˙ Dot */
38 {"Ċ","C"},
39 {"Ė","E"},
40 {"Ġ","G"},
41 {"İ","I"},
42 {"Ŀ","L"},
43 {"Ż","Z"},
44 /* – Stroke */
45 {"Đ","D","DJ"}, /* Croatian Dj, not to be confused with the similar-looking Icelandic Eth */
46 {"Ħ","H"},
47 {"Ł","L"},
48 {"Ŧ","T"},
49 /* ˚ Ring */
50 {"Å","A","AA"},
51 {"Ů","U"},
52 /* ˇ Caron (haček, paukščiukas) */
53 {"Č","C"},
54 {"Ď","D"},
55 {"Ě","E"},
56 {"Ľ","L"},
57 {"Ň","N"},
58 {"Ř","R"},
59 {"Š","S"},
60 {"Ť","T"},
61 {"Ž","Z"},
62 /* / Slash */
63 {"Ø","O","OE"},
64 /* ¯ Macron */
65 {"Ā","A","AA"},
66 {"Ē","E","EE"},
67 {"Ī","I","II"},
68 {"Ō","O","OO"},
69 {"Ū","U","UU"},
70 /* ˘ Brevis */
71 {"Ă","A"},
72 {"Ĕ","E"},
73 {"Ğ","G"},
74 {"Ĭ","I"},
75 {"Ŏ","O"},
76 {"Ŭ","U"},
77 /* ^ Circumflex */
78 {"Â","A"},
79 {"Ĉ","C"},
80 {"Ê","E"},
81 {"Ĝ","G"},
82 {"Ĥ","H"},
83 {"Î","I"},
84 {"Ĵ","J"},
85 {"Ô","O"},
86 {"Ŝ","S"},
87 {"Û","U"},
88 {"Ŵ","W"},
89 {"Ŷ","Y"},
90 /* ¸ Cedilla */
91 {"Ç","C"},
92 {"Ģ","G","GJ"},
93 {"Ķ","K","KJ"},
94 {"Ļ","L","LJ"},
95 {"Ņ","N","NJ"},
96 {"Ŗ","R"},
97 {"Ş","S"},
98 {"Ţ","T"},
99 /* ~ Tilde */
100 {"Ã","A"},
101 {"Ĩ","I"},
102 {"Ñ","N"},
103 {"Õ","O"},
104 {"Ũ","U"},
105 /* ` Grave */
106 {"À","A"},
107 {"È","E"},
108 {"Ì","I"},
109 {"Ò","O"},
110 {"Ù","U"},
111 /* ligatures */
112 {"Æ","A","AE"},
113 {"IJ","IJ"},
114 {"Œ","O","OE"},
115 /* special letters */
116 {"Ð","D","DH"}, /* Icelandic Eth, not to be confused with the similar-looking Croatian Dj */
117 {"Ŋ","N","NG"},
118 {"Þ","T","TH"},
119 /* Small Diacritics */
120 /* ¨ Diaresis */
121 {"ä","a","ae"},
122 {"ë","e"},
123 {"ï","i"},
124 {"ö","o","oe"},
125 {"ü","u","ue"},
126 {"ÿ","y"},
127 /* ˝ Double Acute Accent */
128 {"ő","o","ö"},
129 {"ű","u","ü"},
130 /* ´ Acute Accent */
131 {"á","a"},
132 {"ć","c"},
133 {"é","e"},
134 {"í","i"},
135 {"ĺ","l"},
136 {"ń","n"},
137 {"ó","o"},
138 {"ŕ","r"},
139 {"ś","s"},
140 {"ú","u"},
141 {"ý","y"},
142 {"ź","z"},
143 /* ˛ Ogonek (nosinė) */
144 {"ą","a"},
145 {"ę","e"},
146 {"į","i"},
147 {"ų","u"},
148 /* ˙ Dot (and dotless i) */
149 {"ċ","c"},
150 {"ė","e"},
151 {"ġ","g"},
152 {"ı","i"},
153 {"ŀ","l"},
154 {"ż","z"},
155 /* – Stroke */
156 {"đ","d","dj"},
157 {"ħ","h"},
158 {"ł","l"},
159 {"ŧ","t"},
160 /* ˚ Ring */
161 {"å","a", "aa"},
162 {"ů","u"},
163 /* ˇ Caron (haček, paukščiukas) */
164 {"č","c"},
165 {"ď","d"},
166 {"ě","e"},
167 {"ľ","l"},
168 {"ň","n"},
169 {"ř","r"},
170 {"š","s"},
171 {"ť","t"},
172 {"ž","z"},
173 /* / Slash */
174 {"ø","o", "oe"},
175 /* Macron */
176 {"ā","a","aa"},
177 {"ē","e","ee"},
178 {"ī","i","ii"},
179 {"ō","o","oo"},
180 {"ū","u","uu"},
181 /* ˘ Brevis */
182 {"ă","a"},
183 {"ĕ","e"},
184 {"ğ","g"},
185 {"ĭ","i"},
186 {"ŏ","o"},
187 {"ŭ","u"},
188 /* ^ Circumflex */
189 {"â","a"},
190 {"ĉ","c"},
191 {"ê","e"},
192 {"ĝ","g"},
193 {"ĥ","h"},
194 {"î","i"},
195 {"ĵ","j"},
196 {"ô","o"},
197 {"ŝ","s"},
198 {"û","u"},
199 {"ŵ","w"},
200 {"ŷ","y"},
201 /* ¸ Cedilla */
202 {"ç","c"},
203 {"ģ","g","gj"},
204 {"ķ","k","kj"},
205 {"ļ","l","lj"},
206 {"ņ","n","nj"},
207 {"ŗ","r"},
208 {"ş","s"},
209 {"ţ","t"},
210 /* ~ Tilde */
211 {"ã","a"},
212 {"ĩ","i"},
213 {"õ","o"},
214 {"ñ","n"},
215 {"ũ","u"},
216 /* ` Grave */
217 {"à","a"},
218 {"è","e"},
219 {"ì","i"},
220 {"ò","o"},
221 {"ù","u"},
222 /* ligatures */
223 {"æ","a","ae"},
224 {"ij","ij"},
225 {"œ","o","oe"},
226 {"ß","s","ss"},
227 /* special letters */
228 {"ð","d","dh"},
229 {"ŋ","n","ng"},
230 {"þ","t","th"},
231 };
232
233 char *
234 linguistics_expand_special(char *str, int mode)
235 {
236         char *in=str;
237         char *out,*ret;
238         int found=0;
239         out=ret=g_strdup(str);
240         if (!mode) 
241                 return ret;
242         while (*in) {
243                 char *next=g_utf8_find_next_char(in, NULL);
244                 int i,len=next-in;
245                 int match=0;
246                 if (len > 1) {
247                         for (i = 0 ; i < sizeof(special)/sizeof(special[0]); i++) {
248                                 const char *search=special[i][0];
249                                 if (!strncmp(in,search,len)) {
250                                         const char *replace=special[i][mode];
251                                         if (replace) {
252                                                 int replace_len=strlen(replace);
253                                                 dbg_assert(replace_len <= len);
254                                                 dbg(1,"found %s %s %d %s %d\n",in,search,len,replace,replace_len);
255                                                 strcpy(out, replace);
256                                                 out+=replace_len;
257                                                 match=1;
258                                                 break;
259                                         }
260                                 }
261                         }
262                 }
263                 if (match) {
264                         found=1;
265                         in=next;
266                 } else {
267                         while (len-- > 0) 
268                                 *out++=*in++;
269                 }
270         }
271         *out++='\0';
272         if (!found) {
273                 g_free(ret);
274                 ret=NULL;
275         }
276         return ret;
277 }
278
279 char *
280 linguistics_next_word(char *str)
281 {
282         int len=strcspn(str, " -/()");
283         if (!str[len] || !str[len+1])
284                 return NULL;
285         return str+len+1;
286 }
287
288 int
289 linguistics_search(char *str)
290 {
291         if (!g_strcasecmp(str,"str"))
292                 return 0;
293         if (!g_strcasecmp(str,"str."))
294                 return 0;
295         if (!g_strcasecmp(str,"strasse"))
296                 return 0;
297         if (!g_strcasecmp(str,"weg"))
298                 return 0;
299         return 1;
300 }
301
302 void
303 linguistics_init(void)
304 {
305 }