Fix:map_csv:Disable default notification of each deleted item.
[profile/ivi/navit.git] / navit / navit / linguistics.c
1 #include <string.h>
2 #include <stdio.h>
3 #include <glib.h>
4 #include "debug.h"
5 #include "linguistics.h"
6
7 static const char *special[][3]={
8 /* Capital Diacritics */
9 /* ¨ Diaresis */
10 {"Ä","A","AE"},
11 {"Ë","E"},
12 {"Ï","I"},
13 {"Ö","O","OE"},
14 {"Ü","U","UE"},
15 {"Ÿ","Y"},
16 /* ˝ Double Acute Accent */
17 {"Ő","O","Ö"},
18 {"Ű","U","Ü"},
19 /* ´ Acute Accent */
20 {"Á","A"},
21 {"Ć","C"},
22 {"É","E"},
23 {"Í","I"},
24 {"Ĺ","L"},
25 {"Ń","N"},
26 {"Ó","O"},
27 {"Ŕ","R"},
28 {"Ś","S"},
29 {"Ú","U"},
30 {"Ý","Y"},
31 {"Ź","Z"},
32 /* ˛ Ogonek (nosinė) */
33 {"Ą","A"},
34 {"Ę","E"},
35 {"Į","I"},
36 {"Ų","U"},
37 /* ˙ Dot */
38 {"Ċ","C"},
39 {"Ė","E"},
40 {"Ġ","G"},
41 {"İ","I"},
42 {"Ŀ","L"},
43 {"Ż","Z"},
44 /* – Stroke */
45 {"Đ","D","DJ"}, /* Croatian Dj, not to be confused with the similar-looking Icelandic Eth */
46 {"Ħ","H"},
47 {"Ł","L"},
48 {"Ŧ","T"},
49 /* ˚ Ring */
50 {"Å","A","AA"},
51 {"Ů","U"},
52 /* ˇ Caron (haček, paukščiukas) */
53 {"Č","C"},
54 {"Ď","D"},
55 {"Ě","E"},
56 {"Ľ","L"},
57 {"Ň","N"},
58 {"Ř","R"},
59 {"Š","S"},
60 {"Ť","T"},
61 {"Ž","Z"},
62 /* / Slash */
63 {"Ø","O","OE"},
64 /* ¯ Macron */
65 {"Ā","A","AA"},
66 {"Ē","E","EE"},
67 {"Ī","I","II"},
68 {"Ō","O","OO"},
69 {"Ū","U","UU"},
70 /* ˘ Brevis */
71 {"Ă","A"},
72 {"Ĕ","E"},
73 {"Ğ","G"},
74 {"Ĭ","I"},
75 {"Ŏ","O"},
76 {"Ŭ","U"},
77 /* ^ Circumflex */
78 {"Â","A"},
79 {"Ĉ","C"},
80 {"Ê","E"},
81 {"Ĝ","G"},
82 {"Ĥ","H"},
83 {"Î","I"},
84 {"Ĵ","J"},
85 {"Ô","O"},
86 {"Ŝ","S"},
87 {"Û","U"},
88 {"Ŵ","W"},
89 {"Ŷ","Y"},
90 /* ¸ Cedilla */
91 {"Ç","C"},
92 {"Ģ","G","GJ"},
93 {"Ķ","K","KJ"},
94 {"Ļ","L","LJ"},
95 {"Ņ","N","NJ"},
96 {"Ŗ","R"},
97 {"Ş","S"},
98 {"Ţ","T"},
99 /* ~ Tilde */
100 {"Ã","A"},
101 {"Ĩ","I"},
102 {"Ñ","N"},
103 {"Õ","O"},
104 {"Ũ","U"},
105 /* ` Grave */
106 {"À","A"},
107 {"È","E"},
108 {"Ì","I"},
109 {"Ò","O"},
110 {"Ù","U"},
111 /* ligatures */
112 {"Æ","A","AE"},
113 {"IJ","IJ"},
114 {"Œ","O","OE"},
115 /* special letters */
116 {"Ð","D","DH"}, /* Icelandic Eth, not to be confused with the similar-looking Croatian Dj */
117 {"Ŋ","N","NG"},
118 {"Þ","T","TH"},
119 /* Small Diacritics */
120 /* ¨ Diaresis */
121 {"ä","a","ae"},
122 {"ë","e"},
123 {"ï","i"},
124 {"ö","o","oe"},
125 {"ü","u","ue"},
126 {"ÿ","y"},
127 /* ˝ Double Acute Accent */
128 {"ő","o","ö"},
129 {"ű","u","ü"},
130 /* ´ Acute Accent */
131 {"á","a"},
132 {"ć","c"},
133 {"é","e"},
134 {"í","i"},
135 {"ĺ","l"},
136 {"ń","n"},
137 {"ó","o"},
138 {"ŕ","r"},
139 {"ś","s"},
140 {"ú","u"},
141 {"ý","y"},
142 {"ź","z"},
143 /* ˛ Ogonek (nosinė) */
144 {"ą","a"},
145 {"ę","e"},
146 {"į","i"},
147 {"ų","u"},
148 /* ˙ Dot (and dotless i) */
149 {"ċ","c"},
150 {"ė","e"},
151 {"ġ","g"},
152 {"ı","i"},
153 {"ŀ","l"},
154 {"ż","z"},
155 /* – Stroke */
156 {"đ","d","dj"},
157 {"ħ","h"},
158 {"ł","l"},
159 {"ŧ","t"},
160 /* ˚ Ring */
161 {"å","a", "aa"},
162 {"ů","u"},
163 /* ˇ Caron (haček, paukščiukas) */
164 {"č","c"},
165 {"ď","d"},
166 {"ě","e"},
167 {"ľ","l"},
168 {"ň","n"},
169 {"ř","r"},
170 {"š","s"},
171 {"ť","t"},
172 {"ž","z"},
173 /* / Slash */
174 {"ø","o", "oe"},
175 /* Macron */
176 {"ā","a","aa"},
177 {"ē","e","ee"},
178 {"ī","i","ii"},
179 {"ō","o","oo"},
180 {"ū","u","uu"},
181 /* ˘ Brevis */
182 {"ă","a"},
183 {"ĕ","e"},
184 {"ğ","g"},
185 {"ĭ","i"},
186 {"ŏ","o"},
187 {"ŭ","u"},
188 /* ^ Circumflex */
189 {"â","a"},
190 {"ĉ","c"},
191 {"ê","e"},
192 {"ĝ","g"},
193 {"ĥ","h"},
194 {"î","i"},
195 {"ĵ","j"},
196 {"ô","o"},
197 {"ŝ","s"},
198 {"û","u"},
199 {"ŵ","w"},
200 {"ŷ","y"},
201 /* ¸ Cedilla */
202 {"ç","c"},
203 {"ģ","g","gj"},
204 {"ķ","k","kj"},
205 {"ļ","l","lj"},
206 {"ņ","n","nj"},
207 {"ŗ","r"},
208 {"ş","s"},
209 {"ţ","t"},
210 /* ~ Tilde */
211 {"ã","a"},
212 {"ĩ","i"},
213 {"õ","o"},
214 {"ñ","n"},
215 {"ũ","u"},
216 /* ` Grave */
217 {"à","a"},
218 {"è","e"},
219 {"ì","i"},
220 {"ò","o"},
221 {"ù","u"},
222 /* ligatures */
223 {"æ","a","ae"},
224 {"ij","ij"},
225 {"œ","o","oe"},
226 {"ß","s","ss"},
227 /* special letters */
228 {"ð","d","dh"},
229 {"ŋ","n","ng"},
230 {"þ","t","th"},
231 };
232
233 /**
234  * @brief Replace special characters in string (e.g. umlauts) with plain letters.
235  * This is useful e.g. to canonicalize a string for comparison.
236  *
237  * @param str string to process
238  * @param mode Replacement mode. 0=do nothing, 1=replace with single
239  * ASCII letter, 2=replace with multiple letters if the commonly used
240  * ASCII replacement has multitple letter (e.g. a-umlaut -> ae)
241  * @returns copy of string, with characters replaced
242  */
243 char *
244 linguistics_expand_special(char *str, int mode)
245 {
246         char *in=str;
247         char *out,*ret;
248         int found=0;
249         out=ret=g_strdup(str);
250         if (!mode) 
251                 return ret;
252         while (*in) {
253                 char *next=g_utf8_find_next_char(in, NULL);
254                 int i,len=next-in;
255                 int match=0;
256                 if (len > 1) {
257                         for (i = 0 ; i < sizeof(special)/sizeof(special[0]); i++) {
258                                 const char *search=special[i][0];
259                                 if (!strncmp(in,search,len)) {
260                                         const char *replace=special[i][mode];
261                                         if (replace) {
262                                                 int replace_len=strlen(replace);
263                                                 dbg_assert(replace_len <= len);
264                                                 dbg(1,"found %s %s %d %s %d\n",in,search,len,replace,replace_len);
265                                                 strcpy(out, replace);
266                                                 out+=replace_len;
267                                                 match=1;
268                                                 break;
269                                         }
270                                 }
271                         }
272                 }
273                 if (match) {
274                         found=1;
275                         in=next;
276                 } else {
277                         while (len-- > 0) 
278                                 *out++=*in++;
279                 }
280         }
281         *out++='\0';
282         if (!found) {
283                 g_free(ret);
284                 ret=NULL;
285         }
286         return ret;
287 }
288
289 char *
290 linguistics_next_word(char *str)
291 {
292         int len=strcspn(str, " -/()");
293         if (!str[len] || !str[len+1])
294                 return NULL;
295         return str+len+1;
296 }
297
298 int
299 linguistics_search(char *str)
300 {
301         if (!g_strcasecmp(str,"str"))
302                 return 0;
303         if (!g_strcasecmp(str,"str."))
304                 return 0;
305         if (!g_strcasecmp(str,"strasse"))
306                 return 0;
307         if (!g_strcasecmp(str,"weg"))
308                 return 0;
309         return 1;
310 }
311
312 void
313 linguistics_init(void)
314 {
315 }