1 /* Provide access to the collection of available transformation modules.
2 Copyright (C) 1997, 1998 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4 Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997.
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Library General Public License as
8 published by the Free Software Foundation; either version 2 of the
9 License, or (at your option) any later version.
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Library General Public License for more details.
16 You should have received a copy of the GNU Library General Public
17 License along with the GNU C Library; see the file COPYING.LIB. If not,
18 write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
19 Boston, MA 02111-1307, USA. */
24 #include <bits/libc-lock.h>
26 #include <gconv_int.h>
29 /* Simple data structure for alias mapping. We have two names, `from'
31 void *__gconv_alias_db;
33 /* Array with available modules. */
34 size_t __gconv_nmodules;
35 struct gconv_module **__gconv_modules_db;
37 /* We modify global data. */
38 __libc_lock_define_initialized (static, lock)
41 /* Function for searching alias. */
43 __gconv_alias_compare (const void *p1, const void *p2)
45 struct gconv_alias *s1 = (struct gconv_alias *) p1;
46 struct gconv_alias *s2 = (struct gconv_alias *) p2;
47 return __strcasecmp (s1->fromname, s2->fromname);
51 /* To search for a derivation we create a list of intermediate steps.
52 Each element contains a pointer to the element which precedes it
53 in the derivation order. */
54 struct derivation_step
56 const char *result_set;
57 struct gconv_module *code;
58 struct derivation_step *last;
59 struct derivation_step *next;
62 #define NEW_STEP(result, module, last_mod) \
63 ({ struct derivation_step *newp = alloca (sizeof (struct derivation_step)); \
64 newp->result_set = result; \
65 newp->code = module; \
66 newp->last = last_mod; \
71 /* If a specific transformation is used more than once we should not need
72 to start looking for it again. Instead cache each successful result. */
73 struct known_derivation
77 struct gconv_step *steps;
81 /* Compare function for database of found derivations. */
83 derivation_compare (const void *p1, const void *p2)
85 struct known_derivation *s1 = (struct known_derivation *) p1;
86 struct known_derivation *s2 = (struct known_derivation *) p2;
89 result = strcmp (s1->from, s2->from);
91 result = strcmp (s1->to, s2->to);
95 /* The search tree for known derivations. */
96 static void *known_derivations;
98 /* Look up whether given transformation was already requested before. */
101 derivation_lookup (const char *fromset, const char *toset,
102 struct gconv_step **handle, size_t *nsteps)
104 struct known_derivation key = { fromset, toset, NULL, 0 };
105 struct known_derivation **result;
107 result = __tfind (&key, &known_derivations, derivation_compare);
112 *handle = (*result)->steps;
113 *nsteps = (*result)->nsteps;
115 /* Please note that we return GCONV_OK even if the last search for
116 this transformation was unsuccessful. */
120 /* Add new derivation to list of known ones. */
123 add_derivation (const char *fromset, const char *toset,
124 struct gconv_step *handle, size_t nsteps)
126 struct known_derivation *new_deriv;
127 size_t fromset_len = strlen (fromset) + 1;
128 size_t toset_len = strlen (toset) + 1;
130 new_deriv = (struct known_derivation *)
131 malloc (sizeof (struct known_derivation) + fromset_len + toset_len);
132 if (new_deriv != NULL)
134 new_deriv->from = memcpy (new_deriv + 1, fromset, fromset_len);
135 new_deriv->to = memcpy ((char *) new_deriv->from + fromset_len,
138 new_deriv->steps = handle;
139 new_deriv->nsteps = nsteps;
141 __tsearch (new_deriv, &known_derivations, derivation_compare);
143 /* Please note that we don't complain if the allocation failed. This
144 is not tragically but in case we use the memory debugging facilities
145 not all memory will be freed. */
150 free_derivation (void *p)
152 struct known_derivation *deriv = (struct known_derivation *) p;
155 for (cnt = 0; cnt < deriv->nsteps; ++cnt)
156 if (deriv->steps[cnt].end_fct)
157 (*deriv->steps[cnt].end_fct) (&deriv->steps[cnt]);
159 free ((struct gconv_step *) deriv->steps);
166 gen_steps (struct derivation_step *best, const char *toset,
167 const char *fromset, struct gconv_step **handle, size_t *nsteps)
170 struct gconv_step *result;
171 struct derivation_step *current;
172 int status = GCONV_NOMEM;
174 /* First determine number of steps. */
175 for (current = best; current->last != NULL; current = current->last)
178 result = (struct gconv_step *) malloc (sizeof (struct gconv_step)
186 while (step_cnt-- > 0)
188 result[step_cnt].from_name = (step_cnt == 0
190 : current->last->result_set);
191 result[step_cnt].to_name = (step_cnt + 1 == *nsteps
192 ? __strdup (current->result_set)
193 : result[step_cnt + 1].from_name);
196 if (current->code->module_name[0] == '/')
198 /* Load the module, return handle for it. */
199 struct gconv_loaded_object *shlib_handle =
200 __gconv_find_shlib (current->code->module_name);
202 if (shlib_handle == NULL)
208 result[step_cnt].shlib_handle = shlib_handle;
209 result[step_cnt].modname = shlib_handle->name;
210 result[step_cnt].counter = 0;
211 result[step_cnt].fct = shlib_handle->fct;
212 result[step_cnt].init_fct = shlib_handle->init_fct;
213 result[step_cnt].end_fct = shlib_handle->end_fct;
217 /* It's a builtin transformation. */
218 __gconv_get_builtin_trans (current->code->module_name,
221 /* Call the init function. */
222 if (result[step_cnt].init_fct != NULL)
223 (*result[step_cnt].init_fct) (&result[step_cnt]);
225 current = current->last;
230 /* Something went wrong while initializing the modules. */
231 while (++step_cnt < *nsteps)
233 if (result[step_cnt].end_fct != NULL)
234 (*result[step_cnt].end_fct) (&result[step_cnt]);
236 __gconv_release_shlib (result[step_cnt].shlib_handle);
241 status = GCONV_NOCONV;
254 /* The main function: find a possible derivation from the `fromset' (either
255 the given name or the alias) to the `toset' (again with alias). */
258 find_derivation (const char *toset, const char *toset_expand,
259 const char *fromset, const char *fromset_expand,
260 struct gconv_step **handle, size_t *nsteps)
262 __libc_lock_define_initialized (static, lock)
263 struct derivation_step *first, *current, **lastp, *best = NULL;
264 int best_cost_hi = 0;
265 int best_cost_lo = 0;
268 result = derivation_lookup (fromset_expand ?: fromset, toset_expand ?: toset,
270 if (result == GCONV_OK)
273 __libc_lock_lock (lock);
275 /* There is a small chance that this derivation is meanwhile found. This
276 can happen if in `find_derivation' we look for this derivation, didn't
277 find it but at the same time another thread looked for this derivation. */
278 result = derivation_lookup (fromset_expand ?: fromset, toset_expand ?: toset,
280 if (result == GCONV_OK)
284 For now we use a simple algorithm with quadratic runtime behaviour.
285 The task is to match the `toset' with any of the available rules,
286 starting from FROMSET. */
287 if (fromset_expand != NULL)
289 first = NEW_STEP (fromset_expand, NULL, NULL);
290 first->next = NEW_STEP (fromset, NULL, NULL);
291 lastp = &first->next->next;
295 first = NEW_STEP (fromset, NULL, NULL);
296 lastp = &first->next;
300 while (current != NULL)
302 /* Now match all the available module specifications against the
303 current charset name. If any of them matches check whether
304 we already have a derivation for this charset. If yes, use the
305 one with the lower costs. Otherwise add the new charset at the
309 for (cnt = 0; cnt < __gconv_nmodules; ++cnt)
311 const char *result_set = NULL;
313 if (__gconv_modules_db[cnt]->from_pattern == NULL)
315 if (__strcasecmp (current->result_set,
316 __gconv_modules_db[cnt]->from_constpfx) == 0)
318 if (strcmp (__gconv_modules_db[cnt]->to_string, "-") == 0)
319 result_set = toset_expand ?: toset;
321 result_set = __gconv_modules_db[cnt]->to_string;
325 /* We have a regular expression. First see if the prefix
327 if (__strncasecmp (current->result_set,
328 __gconv_modules_db[cnt]->from_constpfx,
329 __gconv_modules_db[cnt]->from_constpfx_len)
332 /* First compile the regex if not already done. */
333 if (__gconv_modules_db[cnt]->from_regex == NULL)
335 regex_t *newp = (regex_t *) malloc (sizeof (regex_t));
337 if (__regcomp (newp, __gconv_modules_db[cnt]->from_pattern,
338 REG_EXTENDED | REG_ICASE) != 0)
340 /* Something is wrong. Remember this. */
342 __gconv_modules_db[cnt]->from_regex = (regex_t *) -1L;
345 __gconv_modules_db[cnt]->from_regex = newp;
348 if (__gconv_modules_db[cnt]->from_regex != (regex_t *) -1L)
350 /* Try to match the from name. */
353 if (__regexec (__gconv_modules_db[cnt]->from_regex,
354 current->result_set, 4, match, 0) == 0
355 && match[0].rm_so == 0
356 && current->result_set[match[0].rm_eo] == '\0')
358 /* At least the whole <from> string is matched.
359 We must now match sed-like possible
360 subexpressions from the match to the
362 #define ENSURE_LEN(LEN) \
363 if (wp + (LEN) >= constr + len - 1) \
365 char *newp = alloca (len += 128); \
366 memcpy (newp, constr, wp - constr); \
367 wp = newp + (wp - constr); \
371 char *constr = alloca (len);
373 const char *cp = __gconv_modules_db[cnt]->to_string;
382 else if (cp[1] == '\0')
383 /* Backslash at end of string. */
393 else if (*cp < '1' || *cp > '3')
398 if (match[idx].rm_so == -1)
402 ENSURE_LEN (match[idx].rm_eo
405 ¤t->result_set[match[idx].rm_so],
412 if (*cp == '\0' && wp != constr)
414 /* Terminate the constructed string. */
422 if (result_set != NULL)
424 /* We managed to find a derivation. First see whether
425 this is what we are looking for. */
426 if (__strcasecmp (result_set, toset) == 0
427 || (toset_expand != NULL
428 && __strcasecmp (result_set, toset_expand) == 0))
430 /* Determine the costs. If they are lower than the
431 previous solution (or this is the first solution)
432 remember this solution. */
433 int cost_hi = __gconv_modules_db[cnt]->cost_hi;
434 int cost_lo = __gconv_modules_db[cnt]->cost_lo;
435 struct derivation_step *runp = current;
436 while (runp->code != NULL)
438 cost_hi += runp->code->cost_hi;
439 cost_lo += runp->code->cost_lo;
442 if (best == NULL || cost_hi < best_cost_hi
443 || (cost_hi == best_cost_hi && cost_lo < best_cost_lo))
445 best = NEW_STEP (result_set, __gconv_modules_db[cnt],
447 best_cost_hi = cost_hi;
448 best_cost_lo = cost_lo;
453 /* Append at the end if there is no entry with this name. */
454 struct derivation_step *runp = first;
458 if (__strcasecmp (result_set, runp->result_set) == 0)
465 *lastp = NEW_STEP (result_set, __gconv_modules_db[cnt],
467 lastp = &(*lastp)->next;
473 /* Go on with the next entry. */
474 current = current->next;
478 /* We really found a way to do the transformation. Now build a data
479 structure describing the transformation steps.*/
480 result = gen_steps (best, toset_expand ?: toset, fromset_expand ?: fromset,
484 /* We haven't found a transformation. Clear the result values. */
489 /* Add result in any case to list of known derivations. */
490 add_derivation (fromset_expand ?: fromset, toset_expand ?: toset,
493 __libc_lock_unlock (lock);
501 __gconv_find_transform (const char *toset, const char *fromset,
502 struct gconv_step **handle, size_t *nsteps)
504 __libc_once_define (static, once);
505 const char *fromset_expand = NULL;
506 const char *toset_expand = NULL;
509 /* Ensure that the configuration data is read. */
510 __libc_once (once, __gconv_read_conf);
512 /* Acquire the lock. */
513 __libc_lock_lock (lock);
515 /* If we don't have a module database return with an error. */
516 if (__gconv_modules_db == NULL)
519 /* See whether the names are aliases. */
520 if (__gconv_alias_db != NULL)
522 struct gconv_alias key;
523 struct gconv_alias **found;
525 key.fromname = fromset;
526 found = __tfind (&key, &__gconv_alias_db, __gconv_alias_compare);
527 fromset_expand = found != NULL ? (*found)->toname : NULL;
529 key.fromname = toset;
530 found = __tfind (&key, &__gconv_alias_db, __gconv_alias_compare);
531 toset_expand = found != NULL ? (*found)->toname : NULL;
534 result = find_derivation (toset, toset_expand, fromset, fromset_expand,
538 /* Increment the user counter. */
539 if (result == GCONV_OK)
541 size_t cnt = *nsteps;
542 struct gconv_step *steps = *handle;
545 if (steps[--cnt].counter++ == 0)
547 steps[cnt].shlib_handle =
548 __gconv_find_shlib (steps[cnt].modname);
549 if (steps[cnt].shlib_handle == NULL)
551 /* Oops, this is the second time we use this module (after
552 unloading) and this time loading failed!? */
553 while (++cnt < *nsteps)
554 __gconv_release_shlib (steps[cnt].shlib_handle);
555 result = GCONV_NOCONV;
563 /* Release the lock. */
564 __libc_lock_unlock (lock);
566 /* The following code is necessary since `find_derivation' will return
567 GCONV_OK even when no derivation was found but the same request
568 was processed before. I.e., negative results will also be cached. */
569 return (result == GCONV_OK
570 ? (*handle == NULL ? GCONV_NOCONV : GCONV_OK)
575 /* Release the entries of the modules list. */
578 __gconv_close_transform (struct gconv_step *steps, size_t nsteps)
580 int result = GCONV_OK;
583 /* Acquire the lock. */
584 __libc_lock_lock (lock);
587 if (steps[nsteps].shlib_handle != NULL
588 && --steps[nsteps].counter == 0)
590 result = __gconv_release_shlib (steps[nsteps].shlib_handle);
591 if (result != GCONV_OK)
593 steps[nsteps].shlib_handle = NULL;
596 /* Release the lock. */
597 __libc_lock_unlock (lock);
604 /* Free all resources if necessary. */
605 static void __attribute__ ((unused))
610 if (__gconv_alias_db != NULL)
611 __tdestroy (__gconv_alias_db, free);
613 for (cnt = 0; cnt < __gconv_nmodules; ++cnt)
615 if (__gconv_modules_db[cnt]->from_regex != NULL)
616 __regfree ((regex_t *) __gconv_modules_db[cnt]->from_regex);
618 /* Modules which names do not start with a slash are builtin
619 transformations and the memory is not allocated dynamically. */
620 if (__gconv_modules_db[cnt]->module_name[0] == '/')
621 free (__gconv_modules_db[cnt]);
624 if (known_derivations != NULL)
625 __tdestroy (known_derivations, free_derivation);
628 text_set_element (__libc_subfreeres, free_mem);