1 /* Transliteration using the locale's data.
2 Copyright (C) 2000, 2009 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4 Contributed by Ulrich Drepper <drepper@cygnus.com>, 2000.
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
16 You should have received a copy of the GNU Lesser General Public
17 License along with the GNU C Library; if not, write to the Free
18 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
27 #include <gnu/option-groups.h>
29 #include <bits/libc-lock.h>
30 #include "gconv_int.h"
31 #include "../locale/localeinfo.h"
35 __gconv_transliterate (struct __gconv_step *step,
36 struct __gconv_step_data *step_data,
37 void *trans_data __attribute__ ((unused)),
38 const unsigned char *inbufstart,
39 const unsigned char **inbufp,
40 const unsigned char *inbufend,
41 unsigned char **outbufstart, size_t *irreversible)
43 /* Find out about the locale's transliteration. */
45 const uint32_t *from_idx;
46 const uint32_t *from_tbl;
47 const uint32_t *to_idx;
48 const uint32_t *to_tbl;
49 const uint32_t *winbuf;
50 const uint32_t *winbufend;
54 /* The input buffer. There are actually 4-byte values. */
55 winbuf = (const uint32_t *) *inbufp;
56 winbufend = (const uint32_t *) inbufend;
58 __gconv_fct fct = step->__fct;
60 if (step->__shlib_handle != NULL)
64 #if __OPTION_EGLIBC_LOCALE_CODE
65 /* If there is no transliteration information in the locale don't do
66 anything and return the error. */
67 size = _NL_CURRENT_WORD (LC_CTYPE, _NL_CTYPE_TRANSLIT_TAB_SIZE);
71 /* Get the rest of the values. */
73 (const uint32_t *) _NL_CURRENT (LC_CTYPE, _NL_CTYPE_TRANSLIT_FROM_IDX);
75 (const uint32_t *) _NL_CURRENT (LC_CTYPE, _NL_CTYPE_TRANSLIT_FROM_TBL);
77 (const uint32_t *) _NL_CURRENT (LC_CTYPE, _NL_CTYPE_TRANSLIT_TO_IDX);
79 (const uint32_t *) _NL_CURRENT (LC_CTYPE, _NL_CTYPE_TRANSLIT_TO_TBL);
81 /* Test whether there is enough input. */
82 if (winbuf + 1 > winbufend)
83 return (winbuf == winbufend
84 ? __GCONV_EMPTY_INPUT : __GCONV_INCOMPLETE_INPUT);
86 /* The array starting at FROM_IDX contains indeces to the string table
87 in FROM_TBL. The indeces are sorted wrt to the strings. I.e., we
88 are doing binary search. */
93 uint_fast32_t med = (low + high) / 2;
97 /* Compare the string at this index with the string at the current
98 position in the input buffer. */
103 if (from_tbl[idx + cnt] != winbuf[cnt])
104 /* Does not match. */
108 while (from_tbl[idx + cnt] != L'\0' && winbuf + cnt < winbufend);
110 if (cnt > 0 && from_tbl[idx + cnt] == L'\0')
112 /* Found a matching input sequence. Now try to convert the
113 possible replacements. */
114 uint32_t idx2 = to_idx[med];
118 /* Determine length of replacement. */
119 uint_fast32_t len = 0;
121 const unsigned char *toinptr;
122 unsigned char *outptr;
124 while (to_tbl[idx2 + len] != L'\0')
127 /* Try this input text. */
128 toinptr = (const unsigned char *) &to_tbl[idx2];
129 outptr = *outbufstart;
130 res = DL_CALL_FCT (fct,
131 (step, step_data, &toinptr,
132 (const unsigned char *) &to_tbl[idx2 + len],
133 &outptr, NULL, 0, 0));
134 if (res != __GCONV_ILLEGAL_INPUT)
136 /* If the conversion succeeds we have to increment the
138 if (res == __GCONV_EMPTY_INPUT)
140 *inbufp += cnt * sizeof (uint32_t);
144 /* Do not increment the output pointer if we could not
145 store the entire output. */
146 if (res != __GCONV_FULL_OUTPUT)
147 *outbufstart = outptr;
152 /* Next replacement. */
155 while (to_tbl[idx2] != L'\0');
157 /* Nothing found, continue searching. */
160 /* This means that the input buffer contents matches a prefix of
161 an entry. Since we cannot match it unless we get more input,
162 we will tell the caller about it. */
163 return __GCONV_INCOMPLETE_INPUT;
165 if (winbuf + cnt >= winbufend || from_tbl[idx + cnt] < winbuf[cnt])
172 /* Maybe the character is supposed to be ignored. */
173 if (_NL_CURRENT_WORD (LC_CTYPE, _NL_CTYPE_TRANSLIT_IGNORE_LEN) != 0)
175 int n = _NL_CURRENT_WORD (LC_CTYPE, _NL_CTYPE_TRANSLIT_IGNORE_LEN);
176 const uint32_t *ranges =
177 (const uint32_t *) _NL_CURRENT (LC_CTYPE, _NL_CTYPE_TRANSLIT_IGNORE);
178 const uint32_t wc = *(const uint32_t *) (*inbufp);
181 /* Test whether there is enough input. */
182 if (winbuf + 1 > winbufend)
183 return (winbuf == winbufend
184 ? __GCONV_EMPTY_INPUT : __GCONV_INCOMPLETE_INPUT);
186 for (i = 0; i < n; ranges += 3, ++i)
187 if (ranges[0] <= wc && wc <= ranges[1]
188 && (wc - ranges[0]) % ranges[2] == 0)
190 /* Matches the range. Ignore it. */
195 else if (wc < ranges[0])
196 /* There cannot be any other matching range since they are
202 /* One last chance: use the default replacement. */
203 if (_NL_CURRENT_WORD (LC_CTYPE, _NL_CTYPE_TRANSLIT_DEFAULT_MISSING_LEN) != 0)
205 const uint32_t *default_missing = (const uint32_t *)
206 _NL_CURRENT (LC_CTYPE, _NL_CTYPE_TRANSLIT_DEFAULT_MISSING);
207 const unsigned char *toinptr = (const unsigned char *) default_missing;
208 uint32_t len = _NL_CURRENT_WORD (LC_CTYPE,
209 _NL_CTYPE_TRANSLIT_DEFAULT_MISSING_LEN);
210 unsigned char *outptr;
213 /* Test whether there is enough input. */
214 if (winbuf + 1 > winbufend)
215 return (winbuf == winbufend
216 ? __GCONV_EMPTY_INPUT : __GCONV_INCOMPLETE_INPUT);
218 outptr = *outbufstart;
219 res = DL_CALL_FCT (fct,
220 (step, step_data, &toinptr,
221 (const unsigned char *) (default_missing + len),
222 &outptr, NULL, 0, 0));
224 if (res != __GCONV_ILLEGAL_INPUT)
226 /* If the conversion succeeds we have to increment the
228 if (res == __GCONV_EMPTY_INPUT)
230 /* This worked but is not reversible. */
235 *outbufstart = outptr;
241 /* Haven't found a match. */
242 return __GCONV_ILLEGAL_INPUT;
246 /* Structure to represent results of found (or not) transliteration
250 /* This structure must remain the first member. */
251 struct trans_struct info;
259 /* Tree with results of previous calls to __gconv_translit_find. */
260 static void *search_tree;
262 /* We modify global data. */
263 __libc_lock_define_initialized (static, lock);
266 /* Compare two transliteration entries. */
268 trans_compare (const void *p1, const void *p2)
270 const struct known_trans *s1 = (const struct known_trans *) p1;
271 const struct known_trans *s2 = (const struct known_trans *) p2;
273 return strcmp (s1->info.name, s2->info.name);
277 /* Open (maybe reopen) the module named in the struct. Get the function
278 and data structure pointers we need. */
280 open_translit (struct known_trans *trans)
282 __gconv_trans_query_fct queryfct;
284 trans->handle = __libc_dlopen (trans->fname);
285 if (trans->handle == NULL)
289 /* Find the required symbol. */
290 queryfct = __libc_dlsym (trans->handle, "gconv_trans_context");
291 if (queryfct == NULL)
293 /* We cannot live with that. */
295 __libc_dlclose (trans->handle);
296 trans->handle = NULL;
300 /* Get the context. */
301 if (queryfct (trans->info.name, &trans->info.csnames, &trans->info.ncsnames)
305 /* Of course we also have to have the actual function. */
306 trans->info.trans_fct = __libc_dlsym (trans->handle, "gconv_trans");
307 if (trans->info.trans_fct == NULL)
310 /* Now the optional functions. */
311 trans->info.trans_init_fct =
312 __libc_dlsym (trans->handle, "gconv_trans_init");
313 trans->info.trans_context_fct =
314 __libc_dlsym (trans->handle, "gconv_trans_context");
315 trans->info.trans_end_fct =
316 __libc_dlsym (trans->handle, "gconv_trans_end");
318 trans->open_count = 1;
326 __gconv_translit_find (struct trans_struct *trans)
328 struct known_trans **found;
329 const struct path_elem *runp;
332 /* We have to have a name. */
333 assert (trans->name != NULL);
335 /* Acquire the lock. */
336 __libc_lock_lock (lock);
338 /* See whether we know this module already. */
339 found = __tfind (trans, &search_tree, trans_compare);
342 /* Is this module available? */
343 if ((*found)->handle != NULL)
345 /* Maybe we have to reopen the file. */
346 if ((*found)->handle != (void *) -1)
347 /* The object is not unloaded. */
349 else if (open_translit (*found) == 0)
352 *trans = (*found)->info;
353 (*found)->open_count++;
360 size_t name_len = strlen (trans->name) + 1;
362 struct known_trans *newp;
364 /* We have to continue looking for the module. */
365 if (__gconv_path_elem == NULL)
368 /* See whether we have to append .so. */
369 if (name_len <= 4 || memcmp (&trans->name[name_len - 4], ".so", 3) != 0)
372 /* Create a new entry. */
373 newp = (struct known_trans *) malloc (sizeof (struct known_trans)
374 + (__gconv_max_path_elem_len
381 /* Clear the struct. */
382 memset (newp, '\0', sizeof (struct known_trans));
384 /* Store a copy of the module name. */
385 newp->info.name = cp = (char *) (newp + 1);
386 cp = __mempcpy (cp, trans->name, name_len);
390 /* Search in all the directories. */
391 for (runp = __gconv_path_elem; runp->name != NULL; ++runp)
393 cp = __mempcpy (__stpcpy ((char *) newp->fname, runp->name),
394 trans->name, name_len);
396 memcpy (cp, ".so", sizeof (".so"));
398 if (open_translit (newp) == 0)
400 /* We found a module. */
409 /* In any case we'll add the entry to our search tree. */
410 if (__tsearch (newp, &search_tree, trans_compare) == NULL)
412 /* Yickes, this should not happen. Unload the object. */
414 /* XXX unload here. */
419 __libc_lock_unlock (lock);