2 * Copyright (c) 2003 The NetBSD Foundation, Inc.
5 * This code is derived from software contributed to The NetBSD Foundation
6 * by Ben Collver <collver1@attbi.com>.
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
17 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
18 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
19 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
20 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
21 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
22 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
23 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
25 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
26 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27 * POSSIBILITY OF SUCH DAMAGE.
30 * This utility allows you to generate from an ISO10646-1 encoded
31 * BDF font other BDF fonts in any possible encoding. This way, you can
32 * derive from a single ISO10646-1 master font a whole set of 8-bit
33 * fonts in all ISO 8859 and various other encodings. (Hopefully
34 * a future XFree86 release will have a similar facility built into
35 * the server, which can reencode ISO10646-1 on the fly, because
36 * storing the same fonts in many different encodings is clearly
37 * a waste of storage capacity).
43 #if !defined(NEED_BASENAME) && !defined(Lynx)
53 /* global variable for argv[0] */
54 static const char *my_name = NULL;
58 basename(char *pathname)
62 ptr = strrchr(pathname, '/');
63 return ((ptr == NULL) ? pathname : &ptr[1]);
67 /* "CLASS" "z" string and memory manipulation */
83 zrealloc(void *ptr, size_t size)
86 temp = realloc(ptr, size);
95 zstrdup(const char *str)
100 fprintf(stderr, "%s: zstrdup(NULL)\n", my_name);
103 retval = strdup(str);
104 if (retval == NULL) {
112 zstrcpy(char **dest, const char *source)
116 *dest = zstrdup(source);
120 zquotedcpy(char **dest, const char *source)
122 const char *start, *end;
130 end = strrchr(start, '"');
132 *dest = zmalloc(end-start+1);
133 strncpy(*dest, start, end-start);
134 (*dest)[end-start] = '\0';
136 *dest = zstrdup(source);
141 zstrcat(char **dest, const char *source)
147 dest_size = strlen(*dest) + 1;
148 source_size = strlen(source);
149 *dest = zrealloc(*dest, dest_size + source_size);
150 strcpy(*dest + dest_size - 1, source);
158 for (t = s; *t != '\000'; t++)
162 #define zs_true(x) (x != NULL && strcmp(x, "0") != 0)
163 #define zi_true(x) (x == 1)
165 /* "CLASS" "dynamic array" */
176 da_new(const char *name)
180 da = zmalloc(sizeof(da_t));
186 zstrcpy(&(da->name), name);
191 da_fetch(da_t *da, int key)
195 if (key >= 0 && key < da->size && da->values[key] != NULL)
198 if (key == -1 && da->nv != NULL)
205 da_fetch_int(da_t *da, int key)
209 t = da_fetch(da, key);
215 #define da_fetch_str(a,k) \
216 (char *)da_fetch(a,k)
219 da_add(da_t *da, int key, void *value)
223 if (key >= da->size) {
225 da->values = zrealloc(da->values,
226 da->size * sizeof(void *));
227 for (; i < da->size; i++)
228 da->values[i] = NULL;
230 if (da->values[key] != NULL) {
231 free(da->values[key]);
240 da->values[key] = value;
241 } else if (key == -1) {
249 da_add_str(da_t *da, int key, const char *value)
251 da_add(da, key, value?zstrdup(value):NULL);
255 da_add_int(da_t *da, int key, int value)
259 v = zmalloc(sizeof(int));
264 #define da_count(da) (da->count)
265 #define da_size(da) (da->size)
272 for (i = da->size; i; i--)
274 if (da->values != NULL)
281 /* "CLASS" file input */
283 #define TYPICAL_LINE_SIZE (80)
285 /* read a line and strip trailing whitespace */
287 read_line(FILE *fp, char **buffer)
289 int buffer_size = TYPICAL_LINE_SIZE;
294 *buffer = zmalloc(TYPICAL_LINE_SIZE);
297 if ((c = getc(fp)) == EOF)
300 while (c != '\n' && !eof) {
301 if (position + 1 >= buffer_size) {
302 buffer_size = buffer_size * 2 + 1;
303 *buffer = zrealloc(*buffer, buffer_size);
305 (*buffer)[position++] = c;
306 (*buffer)[position] = '\0';
318 while (position > 1) {
320 if (!isspace((*buffer)[position]))
322 (*buffer)[position] = '\0';
331 DEC VT100 graphics characters in the range 1-31 (as expected by
332 some old xterm versions and a few other applications)
334 #define decmap_size 31
335 static int decmap[decmap_size] = {
336 0x25C6, /* BLACK DIAMOND */
337 0x2592, /* MEDIUM SHADE */
338 0x2409, /* SYMBOL FOR HORIZONTAL TABULATION */
339 0x240C, /* SYMBOL FOR FORM FEED */
340 0x240D, /* SYMBOL FOR CARRIAGE RETURN */
341 0x240A, /* SYMBOL FOR LINE FEED */
342 0x00B0, /* DEGREE SIGN */
343 0x00B1, /* PLUS-MINUS SIGN */
344 0x2424, /* SYMBOL FOR NEWLINE */
345 0x240B, /* SYMBOL FOR VERTICAL TABULATION */
346 0x2518, /* BOX DRAWINGS LIGHT UP AND LEFT */
347 0x2510, /* BOX DRAWINGS LIGHT DOWN AND LEFT */
348 0x250C, /* BOX DRAWINGS LIGHT DOWN AND RIGHT */
349 0x2514, /* BOX DRAWINGS LIGHT UP AND RIGHT */
350 0x253C, /* BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL */
351 0x23BA, /* HORIZONTAL SCAN LINE-1 (Unicode 3.2 draft) */
352 0x23BB, /* HORIZONTAL SCAN LINE-3 (Unicode 3.2 draft) */
353 0x2500, /* BOX DRAWINGS LIGHT HORIZONTAL */
354 0x23BC, /* HORIZONTAL SCAN LINE-7 (Unicode 3.2 draft) */
355 0x23BD, /* HORIZONTAL SCAN LINE-9 (Unicode 3.2 draft) */
356 0x251C, /* BOX DRAWINGS LIGHT VERTICAL AND RIGHT */
357 0x2524, /* BOX DRAWINGS LIGHT VERTICAL AND LEFT */
358 0x2534, /* BOX DRAWINGS LIGHT UP AND HORIZONTAL */
359 0x252C, /* BOX DRAWINGS LIGHT DOWN AND HORIZONTAL */
360 0x2502, /* BOX DRAWINGS LIGHT VERTICAL */
361 0x2264, /* LESS-THAN OR EQUAL TO */
362 0x2265, /* GREATER-THAN OR EQUAL TO */
363 0x03C0, /* GREEK SMALL LETTER PI */
364 0x2260, /* NOT EQUAL TO */
365 0x00A3, /* POUND SIGN */
366 0x00B7 /* MIDDLE DOT */
372 return ((ucs >= 0x00 && ucs <= 0x1f) ||
373 (ucs >= 0x7f && ucs <= 0x9f));
377 is_blockgraphics(int ucs)
379 return ucs >= 0x2500 && ucs <= 0x25FF;
382 /* calculate the bounding box that covers both provided bounding boxes */
391 combine_bbx(int awidth, int aheight, int axoff, int ayoff,
392 int cwidth, int cheight, int cxoff, int cyoff, bbx_t *r)
395 r->cheight = cheight;
399 if (axoff < r->cxoff) {
400 r->cwidth += r->cxoff - axoff;
403 if (ayoff < r->cyoff) {
404 r->cheight += r->cyoff - ayoff;
407 if (awidth + axoff > r->cwidth + r->cxoff) {
408 r->cwidth = awidth + axoff - r->cxoff;
410 if (aheight + ayoff > r->cheight + r->cyoff) {
411 r->cheight = aheight + ayoff - r->cyoff;
420 "Usage: ucs2any [+d|-d] <source-name> { <mapping-file> <registry-encoding> }\n"
424 " +d put DEC VT100 graphics characters in the C0 range\n"
425 " (default for upright charcell fonts)\n"
427 " -d do not put DEC VT100 graphics characters in the\n"
428 " C0 range (default for all other font types)\n"
430 " <source-name> is the name of an ISO10646-1 encoded BDF file\n"
432 " <mapping-file> is the name of a character set table like those on\n"
433 " <ftp://ftp.unicode.org/Public/MAPPINGS/>\n"
435 " <registry-encoding> are the CHARSET_REGISTRY and CHARSET_ENCODING\n"
436 " field values for the font name (XLFD) of the\n"
437 " target font, separated by a hyphen\n"
441 " ucs2any 6x13.bdf 8859-1.TXT iso8859-1 8859-2.TXT iso8859-2\n"
443 "will generate the files 6x13-iso8859-1.bdf and 6x13-iso8859-2.bdf\n"
448 chars_compare(const void *aa, const void *bb)
450 int a = *(const int *)aa;
451 int b = *(const int *)bb;
457 * Return != 0 if "string" starts with "pattern" followed by whitespace.
458 * If it does, return a pointer to the first non space char.
461 startswith(const char *string, const char *pattern)
463 int l = strlen(pattern);
465 if (strlen(string) <= l) return NULL;
466 if (strncmp(string, pattern, l) != 0) return NULL;
468 if (!isspace(*string)) return NULL;
469 while (isspace(*string))
475 main(int argc, char *argv[])
479 char *fsource = NULL;
485 const char *nextc = NULL;
486 char *startfont = NULL;
488 char *spacing = NULL;
494 char *registry = NULL;
495 char *encoding = NULL;
496 char *fontname = NULL;
501 int default_char_index = -1;
502 int startproperties_index = -1;
503 int fontname_index = -1;
504 int charset_registry_index = -1;
505 int slant_index = -1;
506 int spacing_index = -1;
507 int charset_encoding_index = -1;
508 int fontboundingbox_index = -1;
518 char *registry_encoding = NULL;
521 bbx.cheight = bbx.cxoff = bbx.cyoff = -1;
523 startchar = da_new("startchar");
524 my_char = da_new("my_char");
526 headers = da_new("headers");
534 if (strcmp(argv[ai], "+d") == 0) {
537 } else if (strcmp(argv[ai], "-d") == 0) {
546 /* open and read source file */
548 fsource_fp = fopen(fsource, "r");
549 if (fsource_fp == NULL) {
550 fprintf(stderr, "%s: Can't read file '%s': %s!\n", my_name,
551 fsource, strerror(errno));
558 while (read_line(fsource_fp, &l)) {
559 if (startswith(l, "CHARS"))
561 if (startswith(l, "STARTFONT")) {
562 zstrcpy(&startfont, l);
563 } else if (startswith(l, "_XMBDFED_INFO") ||
564 startswith(l, "XFREE86_GLYPH_RANGES"))
567 } else if ((nextc = startswith(l, "DEFAULT_CHAR")) != NULL)
569 default_char = atoi(nextc);
570 default_char_index = ++nextheader;
571 da_add_str(headers, default_char_index, NULL);
573 if ((nextc = startswith(l, "STARTPROPERTIES")) != NULL)
575 properties = atoi(nextc);
576 startproperties_index = ++nextheader;
577 da_add_str(headers, startproperties_index, NULL);
578 } else if ((nextc = startswith(l, "FONT")) != NULL)
581 /* slightly simplistic check ... */
582 zquotedcpy(&fontname, nextc);
583 if ((term = strstr(fontname, "-ISO10646-1")) == NULL) {
585 "%s: FONT name in '%s' is '%s' and not '*-ISO10646-1'!\n",
586 my_name, fsource, fontname);
590 fontname_index = ++nextheader;
591 da_add_str(headers, fontname_index, NULL);
592 } else if ((nextc = startswith(l, "CHARSET_REGISTRY")) != NULL)
594 if (strcmp(nextc, "\"ISO10646\"") != 0) {
596 "%s: CHARSET_REGISTRY in '%s' is '%s' and not 'ISO10646'!\n",
597 my_name, fsource, nextc);
600 charset_registry_index = ++nextheader;
601 da_add_str(headers, charset_registry_index, NULL);
602 } else if ((nextc = startswith(l, "CHARSET_ENCODING")) != NULL)
604 if (strcmp(nextc, "\"1\"") != 0) {
606 "%s: CHARSET_ENCODING in '%s' is '%s' and not '1'!\n",
607 my_name, fsource, nextc);
610 charset_encoding_index = ++nextheader;
611 da_add_str(headers, charset_encoding_index, NULL);
612 } else if (startswith(l, "FONTBOUNDINGBOX")) {
613 fontboundingbox_index = ++nextheader;
614 da_add_str(headers, fontboundingbox_index, NULL);
615 } else if ((nextc = startswith(l, "SLANT")) != NULL)
617 zquotedcpy(&slant, nextc);
618 slant_index = ++nextheader;
619 da_add_str(headers, slant_index, NULL);
620 } else if ((nextc = startswith(l, "SPACING")) != NULL)
622 zquotedcpy(&spacing, nextc);
623 zstrtoupper(spacing);
624 spacing_index = ++nextheader;
625 da_add_str(headers, spacing_index, NULL);
626 } else if ((nextc = startswith(l, "COMMENT")) != NULL) {
627 if (strncmp(nextc, "$Id: ", 5)==0) {
629 char *id = NULL, *end = NULL;
630 id = zstrdup(nextc + 5);
631 end = strrchr(id, '$');
632 if (end) *end = '\0';
633 zstrcpy(&header, "COMMENT Derived from ");
634 zstrcat(&header, id);
635 zstrcat(&header, "\n");
637 da_add_str(headers, ++nextheader, header);
640 da_add_str(headers, ++nextheader, l);
643 da_add_str(headers, ++nextheader, l);
649 if (startfont == NULL) {
650 fprintf(stderr, "%s: No STARTFONT line found in '%s'!\n",
655 /* read characters */
656 while (read_line(fsource_fp, &l)) {
657 if (startswith(l, "STARTCHAR")) {
661 } else if ((nextc = startswith(l, "ENCODING")) != NULL) {
663 da_add_str(startchar, code, sc);
664 da_add_str(my_char, code, "");
665 } else if (strcmp(l, "ENDFONT")==0) {
667 zstrcpy(&sc, "STARTCHAR ???\n");
669 zstrcpy(&t, da_fetch_str(my_char, code));
672 da_add_str(my_char, code, t);
673 if (strcmp(l, "ENDCHAR")==0) {
675 zstrcpy(&sc, "STARTCHAR ???\n");
685 zstrcpy(&fmap, argv[ai]);
689 char * hyphen = strchr(argv[i], '-');
690 if (!hyphen || strchr(hyphen+1, '-') != NULL) {
692 "%s: Argument registry-encoding '%s' not in expected format!\n",
693 my_name, i < argc ? fmap : "");
696 temp = zstrdup(argv[i]);
697 hyphen = strchr(temp, '-');
698 if (hyphen) *hyphen = 0;
699 zstrcpy(®istry, temp);
700 zstrcpy(&encoding, hyphen+1);
703 fprintf(stderr, "map file argument \"%s\" needs a "
704 "coresponding registry-encoding argument\n", fmap);
711 /* open and read source file */
712 fmap_fp = fopen(fmap, "r");
713 if (fmap_fp == NULL) {
715 "%s: Can't read mapping file '%s': %s!\n",
716 my_name, fmap, strerror(errno));
722 for (;read_line(fmap_fp, &l); free(l)) {
725 for (p = l; isspace(p[0]); p++)
727 if (p[0] == '\0' || p[0] == '#')
729 if (p[0] == '0' && (p[1] == 'x' || p[1] == 'X')) {
730 target = strtol(p+2, &endp, 16);
731 if (*endp == '\0') goto bad;
735 for (; isspace(p[0]); p++)
737 if (p[0] == '0' && (p[1] == 'x' || p[1] == 'X')) {
738 ucs = strtol(p+2, &endp, 16);
739 if (*endp == '\0') goto bad;
743 if (!is_control(ucs)) {
744 if (zs_true(da_fetch_str(startchar, ucs)))
746 da_add_int(map, target, ucs);
748 if (!((is_blockgraphics(ucs) &&
749 strcmp(slant, "R") != 0) ||
753 "No glyph for character U+%04X (0x%02x) available.\n",
760 fprintf(stderr, "Unrecognized line in '%s':\n%s\n", fmap, l);
764 /* add default character */
765 if (!zi_true(da_fetch_int(map, 0))) {
766 if (zs_true(da_fetch_str(startchar, default_char))) {
767 da_add_int(map, 0, default_char);
768 da_add_str(startchar, default_char,
769 "STARTCHAR defaultchar\n");
771 fprintf(stderr, "%s",
772 "No default character defined.\n");
776 if (dec_chars == 1 ||
777 (dec_chars == -1 && strcmp(slant, "R") == 0 &&
778 strcmp(spacing, "C") == 0))
780 /* add DEC VT100 graphics characters in the range 1-31
781 (as expected by some old xterm versions) */
782 for (i = 0; i < decmap_size; i++) {
783 if (zs_true(da_fetch_str(startchar, decmap[i])))
785 da_add_int(map, i + 1, decmap[i]);
790 /* list of characters that will be written out */
794 "No characters found for %s-%s.\n",
800 chars = zmalloc(j * sizeof(int));
801 memset(chars, 0, j * sizeof(int));
802 for (k = 0, i = 0; k < da_count(map) && i < da_size(map); i++) {
803 if (da_fetch(map, i) != NULL)
806 qsort(chars, j, sizeof(int), chars_compare);
808 /* find overall font bounding box */
810 for (i = 0; i < j; i++) {
811 ucs = da_fetch_int(map, chars[i]);
812 zstrcpy(&t, da_fetch_str(my_char, ucs));
813 if ((nextc = startswith(t, "BBX")) != NULL
814 || (nextc = strstr(t, "\nBBX")) != NULL)
819 if (*nextc == '\n') {
821 while (isspace(*nextc))
824 for (;isspace(*nextc);)
826 w = strtol(nextc, &endp, 10);
828 if (*nextc == '\0') goto bbxbad;
829 for (;isspace(*nextc);)
831 h = strtol(nextc, &endp, 10);
833 if (*nextc == '\0') goto bbxbad;
834 for (;isspace(*nextc);)
836 x = strtol(nextc, &endp, 10);
838 if (*nextc == '\0') goto bbxbad;
839 for (;isspace(*nextc);)
841 y = strtol(nextc, &endp, 10);
842 if (bbx.cwidth == -1) {
848 combine_bbx(bbx.cwidth, bbx.cheight,
849 bbx.cxoff, bbx.cyoff,
854 fprintf(stderr, "Unparsable BBX found for U+%04x!\n", ucs);
857 "Warning: No BBX found for U+%04X!\n",
862 if (!registry) registry = zstrdup("");
863 if (!encoding) encoding = zstrdup("");
865 /* generate output file name */
866 zstrcpy(®istry_encoding, "-");
867 zstrcat(®istry_encoding, registry);
868 zstrcat(®istry_encoding, "-");
869 zstrcat(®istry_encoding, encoding);
872 char * p = strstr(fsource, ".bdf");
874 zstrcpy(&fout, fsource);
875 p = strstr(fout, ".bdf");
877 zstrcat(&fout, registry_encoding);
878 zstrcat(&fout, ".bdf");
880 zstrcpy(&fout, fsource);
881 zstrcat(&fout, registry_encoding);
885 /* remove path prefix */
886 zstrcpy(&t, basename(fout));
889 /* write new BDF file */
890 fprintf(stderr, "Writing %d characters into file '%s'.\n",
892 fout_fp = fopen(fout, "w");
893 if (fout_fp == NULL) {
894 fprintf(stderr, "%s: Can't write file '%s': %s!\n",
895 my_name, fout, strerror(errno));
899 fprintf(fout_fp, "%s\n", startfont);
900 fprintf(fout_fp, "%s",
901 "COMMENT AUTOMATICALLY GENERATED FILE. DO NOT EDIT!\n");
903 "COMMENT Generated with 'ucs2any %s %s %s-%s'\n",
904 fsource, fmap, registry, encoding);
905 fprintf(fout_fp, "%s",
906 "COMMENT from an ISO10646-1 encoded source BDF font.\n");
907 fprintf(fout_fp, "%s",
908 "COMMENT ucs2any by Ben Collver <collver1@attbi.com>, 2003, based on\n");
909 fprintf(fout_fp, "%s",
910 "COMMENT ucs2any.pl by Markus Kuhn <http://www.cl.cam.ac.uk/~mgk25/>, 2000.\n");
912 for (i = 0; i <= nextheader; i++) {
913 if (i == default_char_index)
914 fprintf(fout_fp, "DEFAULT_CHAR %d\n", default_char);
915 else if (i == startproperties_index)
916 fprintf(fout_fp, "STARTPROPERTIES %d\n", properties);
917 else if (i == fontname_index) {
918 fprintf(fout_fp, "FONT %s%s\n", fontname, registry_encoding);
920 else if (i == charset_registry_index)
921 fprintf(fout_fp, "CHARSET_REGISTRY \"%s\"\n", registry);
922 else if (i == slant_index)
923 fprintf(fout_fp, "SLANT \"%s\"\n", slant);
924 else if (i == charset_encoding_index)
925 fprintf(fout_fp, "CHARSET_ENCODING \"%s\"\n", encoding);
926 else if (i == fontboundingbox_index)
927 fprintf(fout_fp, "FONTBOUNDINGBOX %d %d %d %d\n", bbx.cwidth, bbx.cheight, bbx.cxoff, bbx.cyoff);
928 else if (i == spacing_index)
929 fprintf(fout_fp, "SPACING \"%s\"\n", spacing);
931 fprintf(fout_fp, "%s\n", da_fetch_str(headers, i));
934 fprintf(fout_fp, "CHARS %d\n", j);
936 /* Write characters */
937 for (i = 0; i < j; i++) {
938 ucs = da_fetch_int(map, chars[i]);
939 fprintf(fout_fp, "%s", da_fetch_str(startchar,
941 fprintf(fout_fp, "ENCODING %d\n", chars[i]);
942 fprintf(fout_fp, "%s", da_fetch_str(my_char,
945 fprintf(fout_fp, "%s", "ENDFONT\n");