2 /* Copyright (C) 2002-2020 Free Software Foundation, Inc.
3 Written by Werner Lemberg <wl@gnu.org>
5 This file is part of groff.
7 groff is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation, either version 3 of the License, or
10 (at your option) any later version.
12 groff is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with this program. If not, see <http://www.gnu.org/licenses/>. */
21 #include "stringclass.h"
26 struct glyph_to_unicode {
30 declare_ptable(glyph_to_unicode)
31 implement_ptable(glyph_to_unicode)
33 PTABLE(glyph_to_unicode) glyph_to_unicode_table;
35 // The entries commented out in the table below can't be used in glyph
41 } glyph_to_unicode_list[] = {
125 { "ff", "0066_0066" },
126 { "Fi", "0066_0066_0069" },
127 { "Fl", "0066_0066_006C" },
128 { "fi", "0066_0069" },
129 { "fl", "0066_006C" },
173 // The soft hyphen U+00AD is meaningful only in the input file,
174 // not in the output.
327 // the curly phi variant
333 // the stroked phi variant
337 // '-' and 'hy' denote a HYPHEN, usually a glyph with a smaller width than
338 // the MINUS sign. Users who are viewing broken man pages that assume
339 // that '-' denotes a U+002D character can either fix the broken man pages
340 // or apply the workaround described in the PROBLEMS file.
397 { "product", "220F" },
398 { "coproduct", "2210" },
400 // 'mi' and '\-' represent a MINUS sign. But it is used in many man pages
401 // to denote the U+002D character that introduces a command-line option.
402 // For devices that support copy&paste, such as devhtml and devutf8, the
403 // user can apply the workaround described in the PROBLEMS file.
418 { "integral", "222B" },
447 { "parenlefttp", "239B" },
448 { "parenleftex", "239C" },
449 { "parenleftbt", "239D" },
450 { "parenrighttp", "239E" },
451 { "parenrightex", "239F" },
452 { "parenrightbt", "23A0" },
453 { "bracketlefttp", "23A1" },
454 { "bracketleftex", "23A2" },
455 { "bracketleftbt", "23A3" },
456 { "bracketrighttp", "23A4" },
457 { "bracketrightex", "23A5" },
458 { "bracketrightbt", "23A6" },
460 { "bracelefttp", "23A7" },
462 { "braceleftmid", "23A8" },
464 { "braceleftbt", "23A9" },
466 { "braceex", "23AA" },
467 { "braceleftex", "23AA" },
468 { "bracerightex", "23AA" },
470 { "bracerighttp", "23AB" },
472 { "bracerightmid", "23AC" },
474 { "bracerightbt", "23AD" },
487 // The 'left angle bracket' and 'right angle bracket' could be mapped to
488 // either U+2329,U+232A or U+3008,U+3009 or U+27E8,U+27E9. But the first
489 // and second possibility are double-width characters (see Unicode's
490 // 'DerivedEastAsianWidth.txt' file) and are therefore not suitable for
491 // general use, whereas the third possibility is single-width.
493 // The devhtml device overrides this mapping, because
495 // http://www.w3.org/TR/html401/sgml/entities.html
497 // says that in HTML, '⟨' and '⟩' are U+2329,U+232A,
503 // global constructor
504 static struct glyph_to_unicode_init {
505 glyph_to_unicode_init();
506 } _glyph_to_unicode_init;
508 glyph_to_unicode_init::glyph_to_unicode_init()
510 for (unsigned int i = 0;
511 i < sizeof(glyph_to_unicode_list)/sizeof(glyph_to_unicode_list[0]);
513 glyph_to_unicode *gtu = new glyph_to_unicode[1];
514 gtu->value = (char *)glyph_to_unicode_list[i].value;
515 glyph_to_unicode_table.define(glyph_to_unicode_list[i].key, gtu);
519 const char *glyph_name_to_unicode(const char *s)
521 glyph_to_unicode *result = glyph_to_unicode_table.lookup(s);
522 return result ? result->value : 0;