1 /* Provide access to the collection of available transformation modules.
2 Copyright (C) 1997-2003, 2004, 2005, 2006, 2007
3 Free Software Foundation, Inc.
4 This file is part of the GNU C Library.
5 Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997.
7 The GNU C Library is free software; you can redistribute it and/or
8 modify it under the terms of the GNU Lesser General Public
9 License as published by the Free Software Foundation; either
10 version 2.1 of the License, or (at your option) any later version.
12 The GNU C Library is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 Lesser General Public License for more details.
17 You should have received a copy of the GNU Lesser General Public
18 License along with the GNU C Library; if not, see
19 <http://www.gnu.org/licenses/>. */
26 #include <sys/param.h>
27 #include <bits/libc-lock.h>
28 #include <locale/localeinfo.h>
31 #include <gconv_int.h>
35 /* Simple data structure for alias mapping. We have two names, `from'
37 void *__gconv_alias_db;
39 /* Array with available modules. */
40 struct gconv_module *__gconv_modules_db;
42 /* We modify global data. */
43 __libc_lock_define_initialized (, __gconv_lock)
46 /* Provide access to module database. */
48 __gconv_get_modules_db (void)
50 return __gconv_modules_db;
54 __gconv_get_alias_db (void)
56 return __gconv_alias_db;
60 /* Function for searching alias. */
62 __gconv_alias_compare (const void *p1, const void *p2)
64 const struct gconv_alias *s1 = (const struct gconv_alias *) p1;
65 const struct gconv_alias *s2 = (const struct gconv_alias *) p2;
66 return strcmp (s1->fromname, s2->fromname);
70 /* To search for a derivation we create a list of intermediate steps.
71 Each element contains a pointer to the element which precedes it
72 in the derivation order. */
73 struct derivation_step
75 const char *result_set;
76 size_t result_set_len;
79 struct gconv_module *code;
80 struct derivation_step *last;
81 struct derivation_step *next;
84 #define NEW_STEP(result, hi, lo, module, last_mod) \
85 ({ struct derivation_step *newp = alloca (sizeof (struct derivation_step)); \
86 newp->result_set = result; \
87 newp->result_set_len = strlen (result); \
90 newp->code = module; \
91 newp->last = last_mod; \
96 /* If a specific transformation is used more than once we should not need
97 to start looking for it again. Instead cache each successful result. */
98 struct known_derivation
102 struct __gconv_step *steps;
106 /* Compare function for database of found derivations. */
108 derivation_compare (const void *p1, const void *p2)
110 const struct known_derivation *s1 = (const struct known_derivation *) p1;
111 const struct known_derivation *s2 = (const struct known_derivation *) p2;
114 result = strcmp (s1->from, s2->from);
116 result = strcmp (s1->to, s2->to);
120 /* The search tree for known derivations. */
121 static void *known_derivations;
123 /* Look up whether given transformation was already requested before. */
126 derivation_lookup (const char *fromset, const char *toset,
127 struct __gconv_step **handle, size_t *nsteps)
129 struct known_derivation key = { fromset, toset, NULL, 0 };
130 struct known_derivation **result;
132 result = __tfind (&key, &known_derivations, derivation_compare);
135 return __GCONV_NOCONV;
137 *handle = (*result)->steps;
138 *nsteps = (*result)->nsteps;
140 /* Please note that we return GCONV_OK even if the last search for
141 this transformation was unsuccessful. */
145 /* Add new derivation to list of known ones. */
148 add_derivation (const char *fromset, const char *toset,
149 struct __gconv_step *handle, size_t nsteps)
151 struct known_derivation *new_deriv;
152 size_t fromset_len = strlen (fromset) + 1;
153 size_t toset_len = strlen (toset) + 1;
155 new_deriv = (struct known_derivation *)
156 malloc (sizeof (struct known_derivation) + fromset_len + toset_len);
157 if (new_deriv != NULL)
159 new_deriv->from = (char *) (new_deriv + 1);
160 new_deriv->to = memcpy (__mempcpy (new_deriv + 1, fromset, fromset_len),
163 new_deriv->steps = handle;
164 new_deriv->nsteps = nsteps;
166 if (__tsearch (new_deriv, &known_derivations, derivation_compare)
168 /* There is some kind of memory allocation problem. */
171 /* Please note that we don't complain if the allocation failed. This
172 is not tragically but in case we use the memory debugging facilities
173 not all memory will be freed. */
176 static void __libc_freeres_fn_section
177 free_derivation (void *p)
179 struct known_derivation *deriv = (struct known_derivation *) p;
182 for (cnt = 0; cnt < deriv->nsteps; ++cnt)
183 if (deriv->steps[cnt].__counter > 0
184 && deriv->steps[cnt].__end_fct != NULL)
186 assert (deriv->steps[cnt].__shlib_handle != NULL);
188 __gconv_end_fct end_fct = deriv->steps[cnt].__end_fct;
190 PTR_DEMANGLE (end_fct);
192 DL_CALL_FCT (end_fct, (&deriv->steps[cnt]));
195 /* Free the name strings. */
196 if (deriv->steps != NULL)
198 free ((char *) deriv->steps[0].__from_name);
199 free ((char *) deriv->steps[deriv->nsteps - 1].__to_name);
200 free ((struct __gconv_step *) deriv->steps);
207 /* Decrement the reference count for a single step in a steps array. */
210 __gconv_release_step (struct __gconv_step *step)
212 /* Skip builtin modules; they are not reference counted. */
213 if (step->__shlib_handle != NULL && --step->__counter == 0)
215 /* Call the destructor. */
216 if (step->__end_fct != NULL)
218 assert (step->__shlib_handle != NULL);
220 __gconv_end_fct end_fct = step->__end_fct;
222 PTR_DEMANGLE (end_fct);
224 DL_CALL_FCT (end_fct, (step));
228 /* Release the loaded module. */
229 __gconv_release_shlib (step->__shlib_handle);
230 step->__shlib_handle = NULL;
233 else if (step->__shlib_handle == NULL)
234 /* Builtin modules should not have end functions. */
235 assert (step->__end_fct == NULL);
240 gen_steps (struct derivation_step *best, const char *toset,
241 const char *fromset, struct __gconv_step **handle, size_t *nsteps)
244 struct __gconv_step *result;
245 struct derivation_step *current;
246 int status = __GCONV_NOMEM;
248 /* First determine number of steps. */
249 for (current = best; current->last != NULL; current = current->last)
252 result = (struct __gconv_step *) malloc (sizeof (struct __gconv_step)
261 while (step_cnt-- > 0)
263 result[step_cnt].__from_name = (step_cnt == 0
265 : (char *)current->last->result_set);
266 result[step_cnt].__to_name = (step_cnt + 1 == *nsteps
267 ? __strdup (current->result_set)
268 : result[step_cnt + 1].__from_name);
270 result[step_cnt].__counter = 1;
271 result[step_cnt].__data = NULL;
274 if (current->code->module_name[0] == '/')
276 /* Load the module, return handle for it. */
277 struct __gconv_loaded_object *shlib_handle =
278 __gconv_find_shlib (current->code->module_name);
280 if (shlib_handle == NULL)
286 result[step_cnt].__shlib_handle = shlib_handle;
287 result[step_cnt].__modname = shlib_handle->name;
288 result[step_cnt].__fct = shlib_handle->fct;
289 result[step_cnt].__init_fct = shlib_handle->init_fct;
290 result[step_cnt].__end_fct = shlib_handle->end_fct;
292 /* These settings can be overridden by the init function. */
293 result[step_cnt].__btowc_fct = NULL;
295 /* Call the init function. */
296 __gconv_init_fct init_fct = result[step_cnt].__init_fct;
297 if (init_fct != NULL)
299 assert (result[step_cnt].__shlib_handle != NULL);
302 PTR_DEMANGLE (init_fct);
304 status = DL_CALL_FCT (init_fct, (&result[step_cnt]));
306 if (__builtin_expect (status, __GCONV_OK) != __GCONV_OK)
309 /* Make sure we unload this modules. */
311 result[step_cnt].__end_fct = NULL;
316 if (result[step_cnt].__btowc_fct != NULL)
317 PTR_MANGLE (result[step_cnt].__btowc_fct);
323 /* It's a builtin transformation. */
324 __gconv_get_builtin_trans (current->code->module_name,
327 current = current->last;
330 if (__builtin_expect (failed, 0) != 0)
332 /* Something went wrong while initializing the modules. */
333 while (++step_cnt < *nsteps)
334 __gconv_release_step (&result[step_cnt]);
338 if (status == __GCONV_OK)
339 status = __GCONV_NOCONV;
357 increment_counter (struct __gconv_step *steps, size_t nsteps)
359 /* Increment the user counter. */
361 int result = __GCONV_OK;
365 struct __gconv_step *step = &steps[cnt];
367 if (step->__counter++ == 0)
369 /* Skip builtin modules. */
370 if (step->__modname != NULL)
372 /* Reopen a previously used module. */
373 step->__shlib_handle = __gconv_find_shlib (step->__modname);
374 if (step->__shlib_handle == NULL)
376 /* Oops, this is the second time we use this module
377 (after unloading) and this time loading failed!? */
379 while (++cnt < nsteps)
380 __gconv_release_step (&steps[cnt]);
381 result = __GCONV_NOCONV;
385 /* The function addresses defined by the module may
387 step->__fct = step->__shlib_handle->fct;
388 step->__init_fct = step->__shlib_handle->init_fct;
389 step->__end_fct = step->__shlib_handle->end_fct;
391 /* These settings can be overridden by the init function. */
392 step->__btowc_fct = NULL;
395 /* Call the init function. */
396 __gconv_init_fct init_fct = step->__init_fct;
397 if (init_fct != NULL)
400 PTR_DEMANGLE (init_fct);
402 DL_CALL_FCT (init_fct, (step));
405 if (step->__btowc_fct != NULL)
406 PTR_MANGLE (step->__btowc_fct);
416 /* The main function: find a possible derivation from the `fromset' (either
417 the given name or the alias) to the `toset' (again with alias). */
420 find_derivation (const char *toset, const char *toset_expand,
421 const char *fromset, const char *fromset_expand,
422 struct __gconv_step **handle, size_t *nsteps)
424 struct derivation_step *first, *current, **lastp, *solution = NULL;
425 int best_cost_hi = INT_MAX;
426 int best_cost_lo = INT_MAX;
429 /* Look whether an earlier call to `find_derivation' has already
430 computed a possible derivation. If so, return it immediately. */
431 result = derivation_lookup (fromset_expand ?: fromset, toset_expand ?: toset,
433 if (result == __GCONV_OK)
436 result = increment_counter (*handle, *nsteps);
441 /* The task is to find a sequence of transformations, backed by the
442 existing modules - whether builtin or dynamically loadable -,
443 starting at `fromset' (or `fromset_expand') and ending at `toset'
444 (or `toset_expand'), and with minimal cost.
446 For computer scientists, this is a shortest path search in the
447 graph where the nodes are all possible charsets and the edges are
448 the transformations listed in __gconv_modules_db.
450 For now we use a simple algorithm with quadratic runtime behaviour.
451 A breadth-first search, starting at `fromset' and `fromset_expand'.
452 The list starting at `first' contains all nodes that have been
453 visited up to now, in the order in which they have been visited --
454 excluding the goal nodes `toset' and `toset_expand' which get
455 managed in the list starting at `solution'.
456 `current' walks through the list starting at `first' and looks
457 which nodes are reachable from the current node, adding them to
458 the end of the list [`first' or `solution' respectively] (if
459 they are visited the first time) or updating them in place (if
460 they have have already been visited).
461 In each node of either list, cost_lo and cost_hi contain the
462 minimum cost over any paths found up to now, starting at `fromset'
463 or `fromset_expand', ending at that node. best_cost_lo and
464 best_cost_hi represent the minimum over the elements of the
467 if (fromset_expand != NULL)
469 first = NEW_STEP (fromset_expand, 0, 0, NULL, NULL);
470 first->next = NEW_STEP (fromset, 0, 0, NULL, NULL);
471 lastp = &first->next->next;
475 first = NEW_STEP (fromset, 0, 0, NULL, NULL);
476 lastp = &first->next;
479 for (current = first; current != NULL; current = current->next)
481 /* Now match all the available module specifications against the
482 current charset name. If any of them matches check whether
483 we already have a derivation for this charset. If yes, use the
484 one with the lower costs. Otherwise add the new charset at the
487 The module database is organized in a tree form which allows
488 searching for prefixes. So we search for the first entry with a
489 matching prefix and any other matching entry can be found from
491 struct gconv_module *node;
493 /* Maybe it is not necessary anymore to look for a solution for
494 this entry since the cost is already as high (or higher) as
495 the cost for the best solution so far. */
496 if (current->cost_hi > best_cost_hi
497 || (current->cost_hi == best_cost_hi
498 && current->cost_lo >= best_cost_lo))
501 node = __gconv_modules_db;
504 int cmpres = strcmp (current->result_set, node->from_string);
507 /* Walk through the list of modules with this prefix and
508 try to match the name. */
509 struct gconv_module *runp;
511 /* Check all the modules with this prefix. */
515 const char *result_set = (strcmp (runp->to_string, "-") == 0
516 ? (toset_expand ?: toset)
518 int cost_hi = runp->cost_hi + current->cost_hi;
519 int cost_lo = runp->cost_lo + current->cost_lo;
520 struct derivation_step *step;
522 /* We managed to find a derivation. First see whether
523 we have reached one of the goal nodes. */
524 if (strcmp (result_set, toset) == 0
525 || (toset_expand != NULL
526 && strcmp (result_set, toset_expand) == 0))
528 /* Append to the `solution' list if there
529 is no entry with this name. */
530 for (step = solution; step != NULL; step = step->next)
531 if (strcmp (result_set, step->result_set) == 0)
536 step = NEW_STEP (result_set,
539 step->next = solution;
542 else if (step->cost_hi > cost_hi
543 || (step->cost_hi == cost_hi
544 && step->cost_lo > cost_lo))
546 /* A better path was found for the node,
547 on the `solution' list. */
549 step->last = current;
550 step->cost_hi = cost_hi;
551 step->cost_lo = cost_lo;
554 /* Update best_cost accordingly. */
555 if (cost_hi < best_cost_hi
556 || (cost_hi == best_cost_hi
557 && cost_lo < best_cost_lo))
559 best_cost_hi = cost_hi;
560 best_cost_lo = cost_lo;
563 else if (cost_hi < best_cost_hi
564 || (cost_hi == best_cost_hi
565 && cost_lo < best_cost_lo))
567 /* Append at the end of the `first' list if there
568 is no entry with this name. */
569 for (step = first; step != NULL; step = step->next)
570 if (strcmp (result_set, step->result_set) == 0)
575 *lastp = NEW_STEP (result_set,
578 lastp = &(*lastp)->next;
580 else if (step->cost_hi > cost_hi
581 || (step->cost_hi == cost_hi
582 && step->cost_lo > cost_lo))
584 /* A better path was found for the node,
585 on the `first' list. */
587 step->last = current;
589 /* Update the cost for all steps. */
590 for (step = first; step != NULL;
592 /* But don't update the start nodes. */
593 if (step->code != NULL)
595 struct derivation_step *back;
598 hi = step->code->cost_hi;
599 lo = step->code->cost_lo;
601 for (back = step->last; back->code != NULL;
604 hi += back->code->cost_hi;
605 lo += back->code->cost_lo;
612 /* Likewise for the nodes on the solution list.
613 Also update best_cost accordingly. */
614 for (step = solution; step != NULL;
617 step->cost_hi = (step->code->cost_hi
618 + step->last->cost_hi);
619 step->cost_lo = (step->code->cost_lo
620 + step->last->cost_lo);
622 if (step->cost_hi < best_cost_hi
623 || (step->cost_hi == best_cost_hi
624 && step->cost_lo < best_cost_lo))
626 best_cost_hi = step->cost_hi;
627 best_cost_lo = step->cost_lo;
635 while (runp != NULL);
646 if (solution != NULL)
648 /* We really found a way to do the transformation. */
650 /* Choose the best solution. This is easy because we know that
651 the solution list has at most length 2 (one for every possible
653 if (solution->next != NULL)
655 struct derivation_step *solution2 = solution->next;
657 if (solution2->cost_hi < solution->cost_hi
658 || (solution2->cost_hi == solution->cost_hi
659 && solution2->cost_lo < solution->cost_lo))
660 solution = solution2;
663 /* Now build a data structure describing the transformation steps. */
664 result = gen_steps (solution, toset_expand ?: toset,
665 fromset_expand ?: fromset, handle, nsteps);
669 /* We haven't found a transformation. Clear the result values. */
674 /* Add result in any case to list of known derivations. */
675 add_derivation (fromset_expand ?: fromset, toset_expand ?: toset,
682 /* Control of initialization. */
683 __libc_once_define (static, once);
687 do_lookup_alias (const char *name)
689 struct gconv_alias key;
690 struct gconv_alias **found;
692 key.fromname = (char *) name;
693 found = __tfind (&key, &__gconv_alias_db, __gconv_alias_compare);
694 return found != NULL ? (*found)->toname : NULL;
700 __gconv_compare_alias (const char *name1, const char *name2)
704 /* Ensure that the configuration data is read. */
705 __libc_once (once, __gconv_read_conf);
707 if (__gconv_compare_alias_cache (name1, name2, &result) != 0)
708 result = strcmp (do_lookup_alias (name1) ?: name1,
709 do_lookup_alias (name2) ?: name2);
717 __gconv_find_transform (const char *toset, const char *fromset,
718 struct __gconv_step **handle, size_t *nsteps,
721 const char *fromset_expand;
722 const char *toset_expand;
725 /* Ensure that the configuration data is read. */
726 __libc_once (once, __gconv_read_conf);
728 /* Acquire the lock. */
729 __libc_lock_lock (__gconv_lock);
731 result = __gconv_lookup_cache (toset, fromset, handle, nsteps, flags);
732 if (result != __GCONV_NODB)
734 /* We have a cache and could resolve the request, successful or not. */
735 __libc_lock_unlock (__gconv_lock);
739 /* If we don't have a module database return with an error. */
740 if (__gconv_modules_db == NULL)
742 __libc_lock_unlock (__gconv_lock);
743 return __GCONV_NOCONV;
746 /* See whether the names are aliases. */
747 fromset_expand = do_lookup_alias (fromset);
748 toset_expand = do_lookup_alias (toset);
750 if (__builtin_expect (flags & GCONV_AVOID_NOCONV, 0)
751 /* We are not supposed to create a pseudo transformation (means
752 copying) when the input and output character set are the same. */
753 && (strcmp (toset, fromset) == 0
754 || (toset_expand != NULL && strcmp (toset_expand, fromset) == 0)
755 || (fromset_expand != NULL
756 && (strcmp (toset, fromset_expand) == 0
757 || (toset_expand != NULL
758 && strcmp (toset_expand, fromset_expand) == 0)))))
760 /* Both character sets are the same. */
761 __libc_lock_unlock (__gconv_lock);
762 return __GCONV_NULCONV;
765 result = find_derivation (toset, toset_expand, fromset, fromset_expand,
768 /* Release the lock. */
769 __libc_lock_unlock (__gconv_lock);
771 /* The following code is necessary since `find_derivation' will return
772 GCONV_OK even when no derivation was found but the same request
773 was processed before. I.e., negative results will also be cached. */
774 return (result == __GCONV_OK
775 ? (*handle == NULL ? __GCONV_NOCONV : __GCONV_OK)
780 /* Release the entries of the modules list. */
783 __gconv_close_transform (struct __gconv_step *steps, size_t nsteps)
785 int result = __GCONV_OK;
788 /* Acquire the lock. */
789 __libc_lock_lock (__gconv_lock);
794 __gconv_release_step (&steps[cnt]);
797 /* If we use the cache we free a bit more since we don't keep any
798 transformation records around, they are cheap enough to
800 __gconv_release_cache (steps, nsteps);
802 /* Release the lock. */
803 __libc_lock_unlock (__gconv_lock);
809 /* Free the modules mentioned. */
811 internal_function __libc_freeres_fn_section
812 free_modules_db (struct gconv_module *node)
814 if (node->left != NULL)
815 free_modules_db (node->left);
816 if (node->right != NULL)
817 free_modules_db (node->right);
820 struct gconv_module *act = node;
822 if (act->module_name[0] == '/')
825 while (node != NULL);
829 /* Free all resources if necessary. */
830 libc_freeres_fn (free_mem)
832 /* First free locale memory. This needs to be done before freeing derivations,
833 as ctype cleanup functions dereference steps arrays which we free below. */
834 _nl_locale_subfreeres ();
836 /* finddomain.c has similar problem. */
837 extern void _nl_finddomain_subfreeres (void) attribute_hidden;
838 _nl_finddomain_subfreeres ();
840 if (__gconv_alias_db != NULL)
841 __tdestroy (__gconv_alias_db, free);
843 if (__gconv_modules_db != NULL)
844 free_modules_db (__gconv_modules_db);
846 if (known_derivations != NULL)
847 __tdestroy (known_derivations, free_derivation);