2 /* Copyright (C) 2002, 2003, 2004, 2006, 2009, 2012
3 Free Software Foundation, Inc.
4 Written by Werner Lemberg <wl@gnu.org>
6 This file is part of groff.
8 groff is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation, either version 3 of the License, or
11 (at your option) any later version.
13 groff is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with this program. If not, see <http://www.gnu.org/licenses/>. */
22 #include "stringclass.h"
27 struct glyph_to_unicode {
31 declare_ptable(glyph_to_unicode)
32 implement_ptable(glyph_to_unicode)
34 PTABLE(glyph_to_unicode) glyph_to_unicode_table;
36 // The entries commented out in the table below can't be used in glyph
42 } glyph_to_unicode_list[] = {
126 { "ff", "0066_0066" },
127 { "Fi", "0066_0066_0069" },
128 { "Fl", "0066_0066_006C" },
129 { "fi", "0066_0069" },
130 { "fl", "0066_006C" },
174 // The soft hypen U+00AD is meaningful only in the input file,
175 // not in the output.
328 // the curly phi variant
334 // the stroked phi variant
338 // `-' and `hy' denote a HYPHEN, usually a glyph with a smaller width than
339 // the MINUS sign. Users who are viewing broken man pages that assume
340 // that `-' denotes a U+002D character can either fix the broken man pages
341 // or apply the workaround described in the PROBLEMS file.
398 { "product", "220F" },
399 { "coproduct", "2210" },
401 // `mi' and `\-' represent a MINUS sign. But it is used in many man pages
402 // to denote the U+002D character that introduces a command-line option.
403 // For devices that support copy&paste, such as devhtml and devutf8, the
404 // user can apply the workaround described in the PROBLEMS file.
419 { "integral", "222B" },
448 { "parenlefttp", "239B" },
449 { "parenleftex", "239C" },
450 { "parenleftbt", "239D" },
451 { "parenrighttp", "239E" },
452 { "parenrightex", "239F" },
453 { "parenrightbt", "23A0" },
454 { "bracketlefttp", "23A1" },
455 { "bracketleftex", "23A2" },
456 { "bracketleftbt", "23A3" },
457 { "bracketrighttp", "23A4" },
458 { "bracketrightex", "23A5" },
459 { "bracketrightbt", "23A6" },
461 { "bracelefttp", "23A7" },
463 { "braceleftmid", "23A8" },
465 { "braceleftbt", "23A9" },
467 { "braceex", "23AA" },
468 { "braceleftex", "23AA" },
469 { "bracerightex", "23AA" },
471 { "bracerighttp", "23AB" },
473 { "bracerightmid", "23AC" },
475 { "bracerightbt", "23AD" },
494 // The `left angle bracket' and `right angle bracket' could be mapped to
495 // either U+2329,U+232A or U+3008,U+3009 or U+27E8,U+27E9. But the first
496 // and second possibility are double-width characters (see Unicode's
497 // `DerivedEastAsianWidth.txt' file) and are therefore not suitable for
498 // general use, whereas the third possibility is single-width.
500 // The devhtml device overrides this mapping, because
502 // http://www.w3.org/TR/html401/sgml/entities.html
504 // says that in HTML, `⟨' and `⟩' are U+2329,U+232A,
510 // global constructor
511 static struct glyph_to_unicode_init {
512 glyph_to_unicode_init();
513 } _glyph_to_unicode_init;
515 glyph_to_unicode_init::glyph_to_unicode_init()
517 for (unsigned int i = 0;
518 i < sizeof(glyph_to_unicode_list)/sizeof(glyph_to_unicode_list[0]);
520 glyph_to_unicode *gtu = new glyph_to_unicode[1];
521 gtu->value = (char *)glyph_to_unicode_list[i].value;
522 glyph_to_unicode_table.define(glyph_to_unicode_list[i].key, gtu);
526 const char *glyph_name_to_unicode(const char *s)
528 glyph_to_unicode *result = glyph_to_unicode_table.lookup(s);
529 return result ? result->value : 0;