1 /* Provide access to the collection of available transformation modules.
2 Copyright (C) 1997-2024 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, see
17 <https://www.gnu.org/licenses/>. */
24 #include <sys/param.h>
25 #include <libc-lock.h>
26 #include <locale/localeinfo.h>
29 #include <gconv_int.h>
30 #include <pointer_guard.h>
33 /* Simple data structure for alias mapping. We have two names, `from'
35 void *__gconv_alias_db;
37 /* Array with available modules. */
38 struct gconv_module *__gconv_modules_db;
40 /* We modify global data. */
41 __libc_lock_define_initialized (, __gconv_lock)
44 /* Provide access to module database. */
46 __gconv_get_modules_db (void)
48 return __gconv_modules_db;
52 __gconv_get_alias_db (void)
54 return __gconv_alias_db;
58 /* Function for searching alias. */
60 __gconv_alias_compare (const void *p1, const void *p2)
62 const struct gconv_alias *s1 = (const struct gconv_alias *) p1;
63 const struct gconv_alias *s2 = (const struct gconv_alias *) p2;
64 return strcmp (s1->fromname, s2->fromname);
68 /* To search for a derivation we create a list of intermediate steps.
69 Each element contains a pointer to the element which precedes it
70 in the derivation order. */
71 struct derivation_step
73 const char *result_set;
74 size_t result_set_len;
77 struct gconv_module *code;
78 struct derivation_step *last;
79 struct derivation_step *next;
82 #define NEW_STEP(result, hi, lo, module, last_mod) \
83 ({ struct derivation_step *newp = alloca (sizeof (struct derivation_step)); \
84 newp->result_set = result; \
85 newp->result_set_len = strlen (result); \
88 newp->code = module; \
89 newp->last = last_mod; \
94 /* If a specific transformation is used more than once we should not need
95 to start looking for it again. Instead cache each successful result. */
96 struct known_derivation
100 struct __gconv_step *steps;
104 /* Compare function for database of found derivations. */
106 derivation_compare (const void *p1, const void *p2)
108 const struct known_derivation *s1 = (const struct known_derivation *) p1;
109 const struct known_derivation *s2 = (const struct known_derivation *) p2;
112 result = strcmp (s1->from, s2->from);
114 result = strcmp (s1->to, s2->to);
118 /* The search tree for known derivations. */
119 static void *known_derivations;
121 /* Look up whether given transformation was already requested before. */
123 derivation_lookup (const char *fromset, const char *toset,
124 struct __gconv_step **handle, size_t *nsteps)
126 struct known_derivation key = { fromset, toset, NULL, 0 };
127 struct known_derivation **result;
129 result = __tfind (&key, &known_derivations, derivation_compare);
132 return __GCONV_NOCONV;
134 *handle = (*result)->steps;
135 *nsteps = (*result)->nsteps;
137 /* Please note that we return GCONV_OK even if the last search for
138 this transformation was unsuccessful. */
142 /* Add new derivation to list of known ones. */
144 add_derivation (const char *fromset, const char *toset,
145 struct __gconv_step *handle, size_t nsteps)
147 struct known_derivation *new_deriv;
148 size_t fromset_len = strlen (fromset) + 1;
149 size_t toset_len = strlen (toset) + 1;
151 new_deriv = (struct known_derivation *)
152 malloc (sizeof (struct known_derivation) + fromset_len + toset_len);
153 if (new_deriv != NULL)
155 new_deriv->from = (char *) (new_deriv + 1);
156 new_deriv->to = memcpy (__mempcpy (new_deriv + 1, fromset, fromset_len),
159 new_deriv->steps = handle;
160 new_deriv->nsteps = nsteps;
162 if (__tsearch (new_deriv, &known_derivations, derivation_compare)
164 /* There is some kind of memory allocation problem. */
167 /* Please note that we don't complain if the allocation failed. This
168 is not tragically but in case we use the memory debugging facilities
169 not all memory will be freed. */
173 free_derivation (void *p)
175 struct known_derivation *deriv = (struct known_derivation *) p;
178 for (cnt = 0; cnt < deriv->nsteps; ++cnt)
179 if (deriv->steps[cnt].__counter > 0
180 && deriv->steps[cnt].__shlib_handle != NULL)
182 __gconv_end_fct end_fct = deriv->steps[cnt].__end_fct;
183 PTR_DEMANGLE (end_fct);
185 DL_CALL_FCT (end_fct, (&deriv->steps[cnt]));
188 /* Free the name strings. */
189 if (deriv->steps != NULL)
191 free ((char *) deriv->steps[0].__from_name);
192 free ((char *) deriv->steps[deriv->nsteps - 1].__to_name);
193 free ((struct __gconv_step *) deriv->steps);
200 /* Decrement the reference count for a single step in a steps array. */
202 __gconv_release_step (struct __gconv_step *step)
204 /* Skip builtin modules; they are not reference counted. */
205 if (step->__shlib_handle != NULL && --step->__counter == 0)
207 /* Call the destructor. */
208 __gconv_end_fct end_fct = step->__end_fct;
209 PTR_DEMANGLE (end_fct);
211 DL_CALL_FCT (end_fct, (step));
214 /* Release the loaded module. */
215 __gconv_release_shlib (step->__shlib_handle);
216 step->__shlib_handle = NULL;
219 else if (step->__shlib_handle == NULL)
220 /* Builtin modules should not have end functions. */
221 assert (step->__end_fct == NULL);
225 gen_steps (struct derivation_step *best, const char *toset,
226 const char *fromset, struct __gconv_step **handle, size_t *nsteps)
229 struct __gconv_step *result;
230 struct derivation_step *current;
231 int status = __GCONV_NOMEM;
232 char *from_name = NULL;
233 char *to_name = NULL;
235 /* First determine number of steps. */
236 for (current = best; current->last != NULL; current = current->last)
239 result = (struct __gconv_step *) malloc (sizeof (struct __gconv_step)
248 while (step_cnt-- > 0)
252 result[step_cnt].__from_name = from_name = __strdup (fromset);
253 if (from_name == NULL)
260 result[step_cnt].__from_name = (char *)current->last->result_set;
262 if (step_cnt + 1 == *nsteps)
264 result[step_cnt].__to_name = to_name
265 = __strdup (current->result_set);
273 result[step_cnt].__to_name = result[step_cnt + 1].__from_name;
275 result[step_cnt].__counter = 1;
276 result[step_cnt].__data = NULL;
279 if (current->code->module_name[0] == '/')
281 /* Load the module, return handle for it. */
282 struct __gconv_loaded_object *shlib_handle =
283 __gconv_find_shlib (current->code->module_name);
285 if (shlib_handle == NULL)
291 result[step_cnt].__shlib_handle = shlib_handle;
292 result[step_cnt].__modname = shlib_handle->name;
293 result[step_cnt].__fct = shlib_handle->fct;
294 result[step_cnt].__init_fct = shlib_handle->init_fct;
295 result[step_cnt].__end_fct = shlib_handle->end_fct;
297 /* These settings can be overridden by the init function. */
298 result[step_cnt].__btowc_fct = NULL;
300 /* Call the init function. */
301 __gconv_init_fct init_fct = result[step_cnt].__init_fct;
302 PTR_DEMANGLE (init_fct);
303 if (init_fct != NULL)
305 status = DL_CALL_FCT (init_fct, (&result[step_cnt]));
307 if (__builtin_expect (status, __GCONV_OK) != __GCONV_OK)
310 /* Do not call the end function because the init
311 function has failed. */
312 result[step_cnt].__end_fct = NULL;
313 PTR_MANGLE (result[step_cnt].__end_fct);
314 /* Make sure we unload this module. */
319 PTR_MANGLE (result[step_cnt].__btowc_fct);
323 /* It's a builtin transformation. */
324 __gconv_get_builtin_trans (current->code->module_name,
327 current = current->last;
330 if (__builtin_expect (failed, 0) != 0)
332 /* Something went wrong while initializing the modules. */
333 while (++step_cnt < *nsteps)
334 __gconv_release_step (&result[step_cnt]);
340 if (status == __GCONV_OK)
341 status = __GCONV_NOCONV;
358 increment_counter (struct __gconv_step *steps, size_t nsteps)
360 /* Increment the user counter. */
362 int result = __GCONV_OK;
366 struct __gconv_step *step = &steps[cnt];
368 if (step->__counter++ == 0)
370 /* Skip builtin modules. */
371 if (step->__modname != NULL)
373 /* Reopen a previously used module. */
374 step->__shlib_handle = __gconv_find_shlib (step->__modname);
375 if (step->__shlib_handle == NULL)
377 /* Oops, this is the second time we use this module
378 (after unloading) and this time loading failed!? */
380 while (++cnt < nsteps)
381 __gconv_release_step (&steps[cnt]);
382 result = __GCONV_NOCONV;
386 /* The function addresses defined by the module may
388 step->__fct = step->__shlib_handle->fct;
389 step->__init_fct = step->__shlib_handle->init_fct;
390 step->__end_fct = step->__shlib_handle->end_fct;
392 /* These settings can be overridden by the init function. */
393 step->__btowc_fct = NULL;
395 /* Call the init function. */
396 __gconv_init_fct init_fct = step->__init_fct;
397 PTR_DEMANGLE (init_fct);
398 if (init_fct != NULL)
399 DL_CALL_FCT (init_fct, (step));
400 PTR_MANGLE (step->__btowc_fct);
409 /* The main function: find a possible derivation from the `fromset' (either
410 the given name or the alias) to the `toset' (again with alias). */
412 find_derivation (const char *toset, const char *toset_expand,
413 const char *fromset, const char *fromset_expand,
414 struct __gconv_step **handle, size_t *nsteps)
416 struct derivation_step *first, *current, **lastp, *solution = NULL;
417 int best_cost_hi = INT_MAX;
418 int best_cost_lo = INT_MAX;
421 /* Look whether an earlier call to `find_derivation' has already
422 computed a possible derivation. If so, return it immediately. */
423 result = derivation_lookup (fromset_expand ?: fromset, toset_expand ?: toset,
425 if (result == __GCONV_OK)
428 result = increment_counter (*handle, *nsteps);
433 /* The task is to find a sequence of transformations, backed by the
434 existing modules - whether builtin or dynamically loadable -,
435 starting at `fromset' (or `fromset_expand') and ending at `toset'
436 (or `toset_expand'), and with minimal cost.
438 For computer scientists, this is a shortest path search in the
439 graph where the nodes are all possible charsets and the edges are
440 the transformations listed in __gconv_modules_db.
442 For now we use a simple algorithm with quadratic runtime behaviour.
443 A breadth-first search, starting at `fromset' and `fromset_expand'.
444 The list starting at `first' contains all nodes that have been
445 visited up to now, in the order in which they have been visited --
446 excluding the goal nodes `toset' and `toset_expand' which get
447 managed in the list starting at `solution'.
448 `current' walks through the list starting at `first' and looks
449 which nodes are reachable from the current node, adding them to
450 the end of the list [`first' or `solution' respectively] (if
451 they are visited the first time) or updating them in place (if
452 they have have already been visited).
453 In each node of either list, cost_lo and cost_hi contain the
454 minimum cost over any paths found up to now, starting at `fromset'
455 or `fromset_expand', ending at that node. best_cost_lo and
456 best_cost_hi represent the minimum over the elements of the
459 if (fromset_expand != NULL)
461 first = NEW_STEP (fromset_expand, 0, 0, NULL, NULL);
462 first->next = NEW_STEP (fromset, 0, 0, NULL, NULL);
463 lastp = &first->next->next;
467 first = NEW_STEP (fromset, 0, 0, NULL, NULL);
468 lastp = &first->next;
471 for (current = first; current != NULL; current = current->next)
473 /* Now match all the available module specifications against the
474 current charset name. If any of them matches check whether
475 we already have a derivation for this charset. If yes, use the
476 one with the lower costs. Otherwise add the new charset at the
479 The module database is organized in a tree form which allows
480 searching for prefixes. So we search for the first entry with a
481 matching prefix and any other matching entry can be found from
483 struct gconv_module *node;
485 /* Maybe it is not necessary anymore to look for a solution for
486 this entry since the cost is already as high (or higher) as
487 the cost for the best solution so far. */
488 if (current->cost_hi > best_cost_hi
489 || (current->cost_hi == best_cost_hi
490 && current->cost_lo >= best_cost_lo))
493 node = __gconv_modules_db;
496 int cmpres = strcmp (current->result_set, node->from_string);
499 /* Walk through the list of modules with this prefix and
500 try to match the name. */
501 struct gconv_module *runp;
503 /* Check all the modules with this prefix. */
507 const char *result_set = (strcmp (runp->to_string, "-") == 0
508 ? (toset_expand ?: toset)
510 int cost_hi = runp->cost_hi + current->cost_hi;
511 int cost_lo = runp->cost_lo + current->cost_lo;
512 struct derivation_step *step;
514 /* We managed to find a derivation. First see whether
515 we have reached one of the goal nodes. */
516 if (strcmp (result_set, toset) == 0
517 || (toset_expand != NULL
518 && strcmp (result_set, toset_expand) == 0))
520 /* Append to the `solution' list if there
521 is no entry with this name. */
522 for (step = solution; step != NULL; step = step->next)
523 if (strcmp (result_set, step->result_set) == 0)
528 step = NEW_STEP (result_set,
531 step->next = solution;
534 else if (step->cost_hi > cost_hi
535 || (step->cost_hi == cost_hi
536 && step->cost_lo > cost_lo))
538 /* A better path was found for the node,
539 on the `solution' list. */
541 step->last = current;
542 step->cost_hi = cost_hi;
543 step->cost_lo = cost_lo;
546 /* Update best_cost accordingly. */
547 if (cost_hi < best_cost_hi
548 || (cost_hi == best_cost_hi
549 && cost_lo < best_cost_lo))
551 best_cost_hi = cost_hi;
552 best_cost_lo = cost_lo;
555 else if (cost_hi < best_cost_hi
556 || (cost_hi == best_cost_hi
557 && cost_lo < best_cost_lo))
559 /* Append at the end of the `first' list if there
560 is no entry with this name. */
561 for (step = first; step != NULL; step = step->next)
562 if (strcmp (result_set, step->result_set) == 0)
567 *lastp = NEW_STEP (result_set,
570 lastp = &(*lastp)->next;
572 else if (step->cost_hi > cost_hi
573 || (step->cost_hi == cost_hi
574 && step->cost_lo > cost_lo))
576 /* A better path was found for the node,
577 on the `first' list. */
579 step->last = current;
581 /* Update the cost for all steps. */
582 for (step = first; step != NULL;
584 /* But don't update the start nodes. */
585 if (step->code != NULL)
587 struct derivation_step *back;
590 hi = step->code->cost_hi;
591 lo = step->code->cost_lo;
593 for (back = step->last; back->code != NULL;
596 hi += back->code->cost_hi;
597 lo += back->code->cost_lo;
604 /* Likewise for the nodes on the solution list.
605 Also update best_cost accordingly. */
606 for (step = solution; step != NULL;
609 step->cost_hi = (step->code->cost_hi
610 + step->last->cost_hi);
611 step->cost_lo = (step->code->cost_lo
612 + step->last->cost_lo);
614 if (step->cost_hi < best_cost_hi
615 || (step->cost_hi == best_cost_hi
616 && step->cost_lo < best_cost_lo))
618 best_cost_hi = step->cost_hi;
619 best_cost_lo = step->cost_lo;
627 while (runp != NULL);
638 if (solution != NULL)
640 /* We really found a way to do the transformation. */
642 /* Choose the best solution. This is easy because we know that
643 the solution list has at most length 2 (one for every possible
645 if (solution->next != NULL)
647 struct derivation_step *solution2 = solution->next;
649 if (solution2->cost_hi < solution->cost_hi
650 || (solution2->cost_hi == solution->cost_hi
651 && solution2->cost_lo < solution->cost_lo))
652 solution = solution2;
655 /* Now build a data structure describing the transformation steps. */
656 result = gen_steps (solution, toset_expand ?: toset,
657 fromset_expand ?: fromset, handle, nsteps);
661 /* We haven't found a transformation. Clear the result values. */
666 /* Add result in any case to list of known derivations. */
667 add_derivation (fromset_expand ?: fromset, toset_expand ?: toset,
675 do_lookup_alias (const char *name)
677 struct gconv_alias key;
678 struct gconv_alias **found;
680 key.fromname = (char *) name;
681 found = __tfind (&key, &__gconv_alias_db, __gconv_alias_compare);
682 return found != NULL ? (*found)->toname : NULL;
687 __gconv_compare_alias (const char *name1, const char *name2)
691 /* Ensure that the configuration data is read. */
692 __gconv_load_conf ();
694 if (__gconv_compare_alias_cache (name1, name2, &result) != 0)
695 result = strcmp (do_lookup_alias (name1) ?: name1,
696 do_lookup_alias (name2) ?: name2);
703 __gconv_find_transform (const char *toset, const char *fromset,
704 struct __gconv_step **handle, size_t *nsteps,
707 const char *fromset_expand;
708 const char *toset_expand;
711 /* Ensure that the configuration data is read. */
712 __gconv_load_conf ();
714 /* Acquire the lock. */
715 __libc_lock_lock (__gconv_lock);
717 result = __gconv_lookup_cache (toset, fromset, handle, nsteps, flags);
718 if (result != __GCONV_NODB)
720 /* We have a cache and could resolve the request, successful or not. */
721 __libc_lock_unlock (__gconv_lock);
725 /* If we don't have a module database return with an error. */
726 if (__gconv_modules_db == NULL)
728 __libc_lock_unlock (__gconv_lock);
729 return __GCONV_NOCONV;
732 /* See whether the names are aliases. */
733 fromset_expand = do_lookup_alias (fromset);
734 toset_expand = do_lookup_alias (toset);
736 if (__builtin_expect (flags & GCONV_AVOID_NOCONV, 0)
737 /* We are not supposed to create a pseudo transformation (means
738 copying) when the input and output character set are the same. */
739 && (strcmp (toset, fromset) == 0
740 || (toset_expand != NULL && strcmp (toset_expand, fromset) == 0)
741 || (fromset_expand != NULL
742 && (strcmp (toset, fromset_expand) == 0
743 || (toset_expand != NULL
744 && strcmp (toset_expand, fromset_expand) == 0)))))
746 /* Both character sets are the same. */
747 __libc_lock_unlock (__gconv_lock);
748 return __GCONV_NULCONV;
751 result = find_derivation (toset, toset_expand, fromset, fromset_expand,
754 /* Release the lock. */
755 __libc_lock_unlock (__gconv_lock);
757 /* The following code is necessary since `find_derivation' will return
758 GCONV_OK even when no derivation was found but the same request
759 was processed before. I.e., negative results will also be cached. */
760 return (result == __GCONV_OK
761 ? (*handle == NULL ? __GCONV_NOCONV : __GCONV_OK)
766 /* Release the entries of the modules list. */
768 __gconv_close_transform (struct __gconv_step *steps, size_t nsteps)
770 int result = __GCONV_OK;
773 /* Acquire the lock. */
774 __libc_lock_lock (__gconv_lock);
779 __gconv_release_step (&steps[cnt]);
782 /* If we use the cache we free a bit more since we don't keep any
783 transformation records around, they are cheap enough to
785 __gconv_release_cache (steps, nsteps);
787 /* Release the lock. */
788 __libc_lock_unlock (__gconv_lock);
794 /* Free the modules mentioned. */
796 free_modules_db (struct gconv_module *node)
798 if (node->left != NULL)
799 free_modules_db (node->left);
800 if (node->right != NULL)
801 free_modules_db (node->right);
804 struct gconv_module *act = node;
806 if (act->module_name[0] == '/')
809 while (node != NULL);
813 /* Free all resources if necessary. */
815 __gconv_db_freemem (void)
817 /* First free locale memory. This needs to be done before freeing
818 derivations, as ctype cleanup functions dereference steps arrays which we
820 _nl_locale_subfreeres ();
822 /* finddomain.c has similar problem. */
823 extern void _nl_finddomain_subfreeres (void) attribute_hidden;
824 _nl_finddomain_subfreeres ();
826 if (__gconv_alias_db != NULL)
827 __tdestroy (__gconv_alias_db, free);
829 if (__gconv_modules_db != NULL)
830 free_modules_db (__gconv_modules_db);
832 if (known_derivations != NULL)
833 __tdestroy (known_derivations, free_derivation);