1 /* liblouis Braille Translation and Back-Translation
4 Based on the Linux screenreader BRLTTY, copyright (C) 1999-2006 by
7 Copyright (C) 2004, 2005, 2006
8 ViewPlus Technologies, Inc. www.viewplus.com
10 JJB Software, Inc. www.jjb-software.com
13 This file is free software; you can redistribute it and/or modify it
14 under the terms of the Lesser or Library GNU General Public License
16 Free Software Foundation; either version 3, or (at your option) any
19 This file is distributed in the hope that it will be useful, but
20 WITHOUT ANY WARRANTY; without even the implied warranty of
21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22 Library GNU General Public License for more details.
24 You should have received a copy of the Library GNU General Public
25 License along with this program; see the file COPYING. If not, write to
26 the Free Software Foundation, 51 Franklin Street, Fifth Floor,
27 Boston, MA 02110-1301, USA.
29 Maintained by John J. Boyer john.boyer@jjb-software.com
43 #define QUOTESUB 28 /*Stand-in for double quotes in strings */
46 /* Contributed by Michel Such <michel.such@free.fr */
49 /* Adapted from BRLTTY code (see sys_progs_wihdows.h) */
56 printf ("Insufficient memory: %s", strerror (errno), "\n");
61 reallocWrapper (void *address, size_t size)
63 if (!(address = realloc (address, size)) && size)
69 strdupWrapper (const char *string)
71 char *address = strdup (string);
78 lou_getProgramPath (void)
83 if ((handle = GetModuleHandle (NULL)))
90 buffer = reallocWrapper (buffer, size <<= 1);
93 DWORD length = GetModuleFileName (handle, buffer, size);
97 printf ("GetModuleFileName\n");
105 path = strdupWrapper (buffer);
108 if (path[--length] == '\\')
111 strncpy (path, path, length + 1);
112 path[length + 1] = '\0';
122 printf ("GetModuleHandle\n");
135 /* End of MS contribution */
138 #include "android/log.h"
141 /* The folowing variables and functions make it possible to specify the
142 * path on which all tables for liblouis and all files for liblouisutdml,
143 * in their proper directories, will be found.
146 static char dataPath[MAXSTRING];
147 static char *dataPathPtr;
150 lou_setDataPath (char *path)
155 strcpy (dataPath, path);
156 dataPathPtr = dataPath;
166 /* End of dataPath code.*/
168 static char tablePath[MAXSTRING];
169 static FILE *logFile = NULL;
170 static char initialLogFileName[256];
173 lou_logFile (const char *fileName)
175 if (fileName == NULL || fileName[0] == 0)
177 if (initialLogFileName[0] == 0)
178 strcpy (initialLogFileName, fileName);
179 logFile = fopen (fileName, "wb");
180 if (logFile == NULL && initialLogFileName[0] != 0)
181 logFile = fopen (initialLogFileName, "wb");
184 fprintf (stderr, "Cannot open log file %s\n", fileName);
190 lou_logPrint (char *format, ...)
192 #ifndef __SYMBIAN32__
196 if (logFile == NULL && initialLogFileName[0] != 0)
197 logFile = fopen (initialLogFileName, "wb");
200 va_start (argp, format);
202 vfprintf (logFile, format, argp);
203 fprintf (logFile, "\n");
205 __android_log_vprint(ANDROID_LOG_DEBUG, "liblouis", format, argp);
220 eqasc2uni (const unsigned char *a, const widechar * b, const int len)
223 for (k = 0; k < len; k++)
224 if ((widechar) a[k] != b[k])
232 widechar chars[MAXSTRING];
236 static int errorCount;
237 static int warningCount;
238 static TranslationTableHeader *table;
239 static TranslationTableOffset tableSize;
240 static TranslationTableOffset tableUsed;
242 static const char *characterClassNames[] = {
255 struct CharacterClass
257 struct CharacterClass *next;
258 TranslationTableCharacterAttributes attribute;
262 static struct CharacterClass *characterClasses;
263 static TranslationTableCharacterAttributes characterClassAttribute;
265 static const char *opcodeNames[CTO_None] = {
274 "lastwordbeforecaps",
284 "lastworditalbefore",
295 "lastwordboldbefore",
306 "lastwordunderbefore",
307 "lastwordunderafter",
388 static short opcodeLengths[CTO_None] = { 0 };
391 { noEncoding, bigEndian, littleEndian, ascii8 } EncodingType;
396 const char *fileName;
399 EncodingType encoding;
403 int checkencoding[2];
404 widechar line[MAXSTRING];
408 static char scratchBuf[MAXSTRING];
411 showString (widechar const *chars, int length)
413 /*Translate a string of characters to the encoding used in character
417 scratchBuf[bufPos++] = '\'';
418 for (charPos = 0; charPos < length; charPos++)
420 if (chars[charPos] >= 32 && chars[charPos] < 127)
421 scratchBuf[bufPos++] = (char) chars[charPos];
430 hexLength = sprintf (hexbuf, "%x", chars[charPos]);
438 leadingZeros = 4 - hexLength;
448 leadingZeros = 8 - hexLength;
455 if ((bufPos + leadingZeros + hexLength + 4) >= sizeof (scratchBuf))
457 scratchBuf[bufPos++] = '\\';
458 scratchBuf[bufPos++] = escapeLetter;
459 for (hexPos = 0; hexPos < leadingZeros; hexPos++)
460 scratchBuf[bufPos++] = '0';
461 for (hexPos = 0; hexPos < hexLength; hexPos++)
462 scratchBuf[bufPos++] = hexbuf[hexPos];
465 scratchBuf[bufPos++] = '\'';
466 scratchBuf[bufPos] = 0;
471 showDots (widechar const *dots, int length)
473 /* Translate a sequence of dots to the encoding used in dots operands.
477 for (dotsPos = 0; bufPos < sizeof (scratchBuf) && dotsPos < length;
480 if ((dots[dotsPos] & B1))
481 scratchBuf[bufPos++] = '1';
482 if ((dots[dotsPos] & B2))
483 scratchBuf[bufPos++] = '2';
484 if ((dots[dotsPos] & B3))
485 scratchBuf[bufPos++] = '3';
486 if ((dots[dotsPos] & B4))
487 scratchBuf[bufPos++] = '4';
488 if ((dots[dotsPos] & B5))
489 scratchBuf[bufPos++] = '5';
490 if ((dots[dotsPos] & B6))
491 scratchBuf[bufPos++] = '6';
492 if ((dots[dotsPos] & B7))
493 scratchBuf[bufPos++] = '7';
494 if ((dots[dotsPos] & B8))
495 scratchBuf[bufPos++] = '8';
496 if ((dots[dotsPos] & B9))
497 scratchBuf[bufPos++] = '9';
498 if ((dots[dotsPos] & B10))
499 scratchBuf[bufPos++] = 'A';
500 if ((dots[dotsPos] & B11))
501 scratchBuf[bufPos++] = 'B';
502 if ((dots[dotsPos] & B12))
503 scratchBuf[bufPos++] = 'C';
504 if ((dots[dotsPos] & B13))
505 scratchBuf[bufPos++] = 'D';
506 if ((dots[dotsPos] & B14))
507 scratchBuf[bufPos++] = 'E';
508 if ((dots[dotsPos] & B15))
509 scratchBuf[bufPos++] = 'F';
510 if ((dots[dotsPos] == B16))
511 scratchBuf[bufPos++] = '0';
512 if (dotsPos != length - 1)
513 scratchBuf[bufPos++] = '-';
515 scratchBuf[bufPos] = 0;
516 return &scratchBuf[0];
520 showAttributes (TranslationTableCharacterAttributes a)
522 /* Show attributes using the letters used after the $ in multipass
526 scratchBuf[bufPos++] = 's';
527 if ((a & CTC_Letter))
528 scratchBuf[bufPos++] = 'l';
530 scratchBuf[bufPos++] = 'd';
531 if ((a & CTC_Punctuation))
532 scratchBuf[bufPos++] = 'p';
533 if ((a & CTC_UpperCase))
534 scratchBuf[bufPos++] = 'U';
535 if ((a & CTC_LowerCase))
536 scratchBuf[bufPos++] = 'u';
538 scratchBuf[bufPos++] = 'm';
540 scratchBuf[bufPos++] = 'S';
541 if ((a & CTC_LitDigit))
542 scratchBuf[bufPos++] = 'D';
543 if ((a & CTC_Class1))
544 scratchBuf[bufPos++] = 'w';
545 if ((a & CTC_Class2))
546 scratchBuf[bufPos++] = 'x';
547 if ((a & CTC_Class3))
548 scratchBuf[bufPos++] = 'y';
549 if ((a & CTC_Class4))
550 scratchBuf[bufPos++] = 'z';
551 scratchBuf[bufPos] = 0;
555 static void compileError (FileInfo * nested, char *format, ...);
558 getAChar (FileInfo * nested)
560 /*Read a big endian, little *ndian or ASCII 8 file and convert it to
561 * 16- or 32-bit unsigned integers */
562 int ch1 = 0, ch2 = 0;
564 if (nested->encoding == ascii8)
565 if (nested->status == 2)
568 return nested->checkencoding[1];
570 while ((ch1 = fgetc (nested->in)) != EOF)
572 if (nested->status < 2)
573 nested->checkencoding[nested->status] = ch1;
575 if (nested->status == 2)
577 if (nested->checkencoding[0] == 0xfe
578 && nested->checkencoding[1] == 0xff)
579 nested->encoding = bigEndian;
580 else if (nested->checkencoding[0] == 0xff
581 && nested->checkencoding[1] == 0xfe)
582 nested->encoding = littleEndian;
583 else if (nested->checkencoding[0] < 128
584 && nested->checkencoding[1] < 128)
586 nested->encoding = ascii8;
587 return nested->checkencoding[0];
591 compileError (nested,
592 "encoding is neither big-endian, little-endian nor ASCII 8.");
598 switch (nested->encoding)
606 ch2 = fgetc (nested->in);
609 character = (ch1 << 8) | ch2;
610 return (int) character;
613 ch2 = fgetc (nested->in);
616 character = (ch2 << 8) | ch1;
617 return (int) character;
620 if (ch1 == EOF || ch2 == EOF)
627 getALine (FileInfo * nested)
629 /*Read a line of widechar's from an input file */
633 while ((ch = getAChar (nested)) != EOF)
637 if (pch == '\\' && ch == 10)
642 if (ch == 10 || nested->linelen >= MAXSTRING)
644 nested->line[nested->linelen++] = (widechar) ch;
647 nested->line[nested->linelen] = 0;
651 nested->lineNumber++;
655 static int lastToken;
657 getToken (FileInfo * nested, CharsString * result, const char *description)
659 /*Find the next string of contiguous non-whitespace characters. If this
660 * is the last token on the line, return 2 instead of 1. */
661 while (nested->line[nested->linepos] && nested->line[nested->linepos] <= 32)
664 while (nested->line[nested->linepos] && nested->line[nested->linepos] > 32)
665 result->chars[result->length++] = nested->line[nested->linepos++];
668 /* Not enough tokens */
670 compileError (nested, "%s not specified.", description);
673 result->chars[result->length] = 0;
674 while (nested->line[nested->linepos] && nested->line[nested->linepos] <= 32)
676 if (nested->line[nested->linepos] == 0)
689 compileError (FileInfo * nested, char *format, ...)
691 #ifndef __SYMBIAN32__
692 char buffer[MAXSTRING];
694 va_start (arguments, format);
696 (void) _vsnprintf (buffer, sizeof (buffer), format, arguments);
698 (void) vsnprintf (buffer, sizeof (buffer), format, arguments);
702 lou_logPrint ("%s:%d: error: %s", nested->fileName,
703 nested->lineNumber, buffer);
705 lou_logPrint ("error: %s", buffer);
711 compileWarning (FileInfo * nested, char *format, ...)
713 #ifndef __SYMBIAN32__
714 char buffer[MAXSTRING];
716 va_start (arguments, format);
718 (void) _vsnprintf (buffer, sizeof (buffer), format, arguments);
720 (void) vsnprintf (buffer, sizeof (buffer), format, arguments);
724 lou_logPrint ("%s:%d: warning: %s", nested->fileName,
725 nested->lineNumber, buffer);
727 lou_logPrint ("warning: %s", buffer);
733 allocateSpaceInTable (FileInfo * nested, TranslationTableOffset * offset,
736 /* allocate memory for translation table and expand previously allocated
737 * memory if necessary */
738 int spaceNeeded = ((count + OFFSETSIZE - 1) / OFFSETSIZE) * OFFSETSIZE;
739 TranslationTableOffset size = tableUsed + spaceNeeded;
740 if (size > tableSize)
743 size += (size / OFFSETSIZE);
744 newTable = realloc (table, size);
747 compileError (nested, "Not enough memory for translation table.");
750 memset (((unsigned char *) newTable) + tableSize, 0, size - tableSize);
751 table = (TranslationTableHeader *) newTable;
756 *offset = (tableUsed - sizeof (*table)) / OFFSETSIZE;
757 tableUsed += spaceNeeded;
763 reserveSpaceInTable (FileInfo * nested, int count)
765 return (allocateSpaceInTable (nested, NULL, count));
769 allocateHeader (FileInfo * nested)
771 /*Allocate memory for the table header and a guess on the number of
773 const TranslationTableOffset startSize = 2 * sizeof (*table);
776 tableUsed = sizeof (*table) + OFFSETSIZE; /*So no offset is ever zero */
777 if (!(table = malloc (startSize)))
779 compileError (nested, "Not enough memory");
785 memset (table, 0, startSize);
786 tableSize = startSize;
791 stringHash (const widechar * c)
793 /*hash function for strings */
794 unsigned long int makeHash = (((unsigned long int) c[0] << 8) +
795 (unsigned long int) c[1]) % HASHNUM;
796 return (int) makeHash;
800 charHash (widechar c)
802 unsigned long int makeHash = (unsigned long int) c % HASHNUM;
803 return (int) makeHash;
806 static TranslationTableCharacter *
807 compile_findCharOrDots (widechar c, int m)
809 /*Look up a character or dot pattern. If m is 0 look up a character,
810 * otherwise look up a dot pattern. Although the algorithms are almost
811 * identical, different tables are needed for characters and dots because
812 * of the possibility of conflicts.*/
813 TranslationTableCharacter *character;
814 TranslationTableOffset bucket;
815 unsigned long int makeHash = (unsigned long int) c % HASHNUM;
817 bucket = table->characters[makeHash];
819 bucket = table->dots[makeHash];
822 character = (TranslationTableCharacter *) & table->ruleArea[bucket];
823 if (character->realchar == c)
825 bucket = character->next;
830 static TranslationTableCharacter noChar = { 0, 0, 0, CTC_Space, 32, 32, 32 };
831 static TranslationTableCharacter noDots =
832 { 0, 0, 0, CTC_Space, B16, B16, B16 };
833 static char *unknownDots (widechar dots);
835 static TranslationTableCharacter *
836 definedCharOrDots (FileInfo * nested, widechar c, int m)
838 TranslationTableCharacter *notFound;
839 TranslationTableCharacter *charOrDots = compile_findCharOrDots (c, m);
845 compileError (nested,
846 "character %s should be defined at this point but is not",
852 compileError (nested,
853 "cell %s should be defined at this point but is not",
859 static TranslationTableCharacter *
860 addCharOrDots (FileInfo * nested, widechar c, int m)
862 /*See if a character or dot pattern is in the appropriate table. If not,
863 * insert it. In either
864 * case, return a pointer to it. */
865 TranslationTableOffset bucket;
866 TranslationTableCharacter *character;
867 TranslationTableCharacter *oldchar;
868 TranslationTableOffset offset;
869 unsigned long int makeHash;
870 if ((character = compile_findCharOrDots (c, m)))
872 if (!allocateSpaceInTable (nested, &offset, sizeof (*character)))
874 character = (TranslationTableCharacter *) & table->ruleArea[offset];
875 memset (character, 0, sizeof (*character));
876 character->realchar = c;
877 makeHash = (unsigned long int) c % HASHNUM;
879 bucket = table->characters[makeHash];
881 bucket = table->dots[makeHash];
885 table->characters[makeHash] = offset;
887 table->dots[makeHash] = offset;
891 oldchar = (TranslationTableCharacter *) & table->ruleArea[bucket];
892 while (oldchar->next)
894 (TranslationTableCharacter *) & table->ruleArea[oldchar->next];
895 oldchar->next = offset;
901 getCharOrDots (widechar c, int m)
904 TranslationTableOffset bucket;
905 unsigned long int makeHash = (unsigned long int) c % HASHNUM;
907 bucket = table->charToDots[makeHash];
909 bucket = table->dotsToChar[makeHash];
912 cdPtr = (CharOrDots *) & table->ruleArea[bucket];
913 if (cdPtr->lookFor == c)
915 bucket = cdPtr->next;
921 getDotsForChar (widechar c)
923 CharOrDots *cdPtr = getCharOrDots (c, 0);
930 getCharFromDots (widechar d)
932 CharOrDots *cdPtr = getCharOrDots (d, 1);
939 putCharAndDots (FileInfo * nested, widechar c, widechar d)
941 TranslationTableOffset bucket;
943 CharOrDots *oldcdPtr = NULL;
944 TranslationTableOffset offset;
945 unsigned long int makeHash;
946 if (!(cdPtr = getCharOrDots (c, 0)))
948 if (!allocateSpaceInTable (nested, &offset, sizeof (*cdPtr)))
950 cdPtr = (CharOrDots *) & table->ruleArea[offset];
954 makeHash = (unsigned long int) c % HASHNUM;
955 bucket = table->charToDots[makeHash];
957 table->charToDots[makeHash] = offset;
960 oldcdPtr = (CharOrDots *) & table->ruleArea[bucket];
961 while (oldcdPtr->next)
962 oldcdPtr = (CharOrDots *) & table->ruleArea[oldcdPtr->next];
963 oldcdPtr->next = offset;
966 if (!(cdPtr = getCharOrDots (d, 1)))
968 if (!allocateSpaceInTable (nested, &offset, sizeof (*cdPtr)))
970 cdPtr = (CharOrDots *) & table->ruleArea[offset];
974 makeHash = (unsigned long int) d % HASHNUM;
975 bucket = table->dotsToChar[makeHash];
977 table->dotsToChar[makeHash] = offset;
980 oldcdPtr = (CharOrDots *) & table->ruleArea[bucket];
981 while (oldcdPtr->next)
982 oldcdPtr = (CharOrDots *) & table->ruleArea[oldcdPtr->next];
983 oldcdPtr->next = offset;
990 unknownDots (widechar dots)
992 /*Print out dot numbers */
993 static char buffer[20];
1031 static TranslationTableOffset newRuleOffset = 0;
1032 static TranslationTableRule *newRule = NULL;
1035 charactersDefined (FileInfo * nested)
1037 /*Check that all characters are defined by character-definition
1041 if ((newRule->opcode >= CTO_Space && newRule->opcode <= CTO_LitDigit)
1042 || newRule->opcode == CTO_SwapDd
1044 newRule->opcode == CTO_Replace || newRule->opcode == CTO_MultInd
1045 || newRule->opcode == CTO_Repeated ||
1046 ((newRule->opcode >= CTO_Context && newRule->opcode <=
1047 CTO_Pass4) && newRule->opcode != CTO_Correct))
1049 for (k = 0; k < newRule->charslen; k++)
1050 if (!compile_findCharOrDots (newRule->charsdots[k], 0))
1052 compileError (nested, "Character %s is not defined", showString
1053 (&newRule->charsdots[k], 1));
1056 if (!(newRule->opcode == CTO_Correct || newRule->opcode ==
1057 CTO_NoBreak || newRule->opcode == CTO_SwapCc || newRule->opcode ==
1060 for (k = newRule->charslen; k < newRule->charslen + newRule->dotslen;
1062 if (!compile_findCharOrDots (newRule->charsdots[k], 1))
1064 compileError (nested, "Dot pattern %s is not defined.",
1065 unknownDots (newRule->charsdots[k]));
1072 static int noback = 0;
1073 static int nofor = 0;
1075 /*The following functions are
1076 called by addRule to handle various
1080 add_0_single (FileInfo * nested)
1082 /*direction = 0, newRule->charslen = 1*/
1083 TranslationTableRule *currentRule;
1084 TranslationTableOffset *currentOffsetPtr;
1085 TranslationTableCharacter *character;
1087 if (newRule->opcode == CTO_CompDots || newRule->opcode == CTO_Comp6)
1089 if (newRule->opcode >= CTO_Pass2 && newRule->opcode <= CTO_Pass4)
1091 character = definedCharOrDots (nested, newRule->charsdots[0], m);
1092 if (m != 1 && character->attributes & CTC_Letter && (newRule->opcode
1095 || newRule->opcode ==
1098 if (table->noLetsignCount < LETSIGNSIZE)
1099 table->noLetsign[table->noLetsignCount++] = newRule->charsdots[0];
1101 if (newRule->opcode >= CTO_Space && newRule->opcode < CTO_UpLow)
1102 character->definitionRule = newRuleOffset;
1103 currentOffsetPtr = &character->otherRules;
1104 while (*currentOffsetPtr)
1106 currentRule = (TranslationTableRule *)
1107 & table->ruleArea[*currentOffsetPtr];
1108 if (currentRule->charslen == 0)
1110 if (currentRule->opcode >= CTO_Space && currentRule->opcode < CTO_UpLow)
1111 if (!(newRule->opcode >= CTO_Space && newRule->opcode < CTO_UpLow))
1113 currentOffsetPtr = ¤tRule->charsnext;
1115 newRule->charsnext = *currentOffsetPtr;
1116 *currentOffsetPtr = newRuleOffset;
1120 add_0_multiple (void)
1122 /*direction = 0 newRule->charslen > 1*/
1123 TranslationTableRule *currentRule = NULL;
1124 TranslationTableOffset *currentOffsetPtr =
1125 &table->forRules[stringHash (&newRule->charsdots[0])];
1126 while (*currentOffsetPtr)
1128 currentRule = (TranslationTableRule *)
1129 & table->ruleArea[*currentOffsetPtr];
1130 if (newRule->charslen > currentRule->charslen)
1132 if (newRule->charslen == currentRule->charslen)
1133 if ((currentRule->opcode == CTO_Always)
1134 && (newRule->opcode != CTO_Always))
1136 currentOffsetPtr = ¤tRule->charsnext;
1138 newRule->charsnext = *currentOffsetPtr;
1139 *currentOffsetPtr = newRuleOffset;
1143 add_1_single (FileInfo * nested)
1145 /*direction = 1, newRule->dotslen = 1*/
1146 TranslationTableRule *currentRule;
1147 TranslationTableOffset *currentOffsetPtr;
1148 TranslationTableCharacter *dots;
1149 if (newRule->opcode == CTO_NoBreak || newRule->opcode == CTO_SwapCc ||
1150 (newRule->opcode >= CTO_Context
1152 newRule->opcode <= CTO_Pass4)
1153 || newRule->opcode == CTO_Repeated || (newRule->opcode == CTO_Always
1154 && newRule->charslen == 1))
1155 return; /*too ambiguous */
1156 dots = definedCharOrDots (nested, newRule->charsdots[newRule->charslen], 1);
1157 if (newRule->opcode >= CTO_Space && newRule->opcode < CTO_UpLow)
1158 dots->definitionRule = newRuleOffset;
1159 currentOffsetPtr = &dots->otherRules;
1160 while (*currentOffsetPtr)
1162 currentRule = (TranslationTableRule *)
1163 & table->ruleArea[*currentOffsetPtr];
1164 if (newRule->charslen > currentRule->charslen ||
1165 currentRule->dotslen == 0)
1167 if (currentRule->opcode >= CTO_Space && currentRule->opcode < CTO_UpLow)
1168 if (!(newRule->opcode >= CTO_Space && newRule->opcode < CTO_UpLow))
1170 currentOffsetPtr = ¤tRule->dotsnext;
1172 newRule->dotsnext = *currentOffsetPtr;
1173 *currentOffsetPtr = newRuleOffset;
1177 add_1_multiple (void)
1179 /*direction = 1, newRule->dotslen > 1*/
1180 TranslationTableRule *currentRule = NULL;
1181 TranslationTableOffset *currentOffsetPtr = &table->backRules[stringHash
1186 if (newRule->opcode == CTO_NoBreak || newRule->opcode == CTO_SwapCc ||
1187 (newRule->opcode >= CTO_Context && newRule->opcode <= CTO_Pass4))
1189 while (*currentOffsetPtr)
1193 currentRule = (TranslationTableRule *)
1194 & table->ruleArea[*currentOffsetPtr];
1195 currentLength = currentRule->dotslen + currentRule->charslen;
1196 newLength = newRule->dotslen + newRule->charslen;
1197 if (newLength > currentLength)
1199 if (currentLength == newLength)
1200 if ((currentRule->opcode == CTO_Always)
1201 && (newRule->opcode != CTO_Always))
1203 currentOffsetPtr = ¤tRule->dotsnext;
1205 newRule->dotsnext = *currentOffsetPtr;
1206 *currentOffsetPtr = newRuleOffset;
1210 makeRuleChain (TranslationTableOffset * offsetPtr)
1212 TranslationTableRule *currentRule;
1215 currentRule = (TranslationTableRule *) & table->ruleArea[*offsetPtr];
1216 offsetPtr = ¤tRule->charsnext;
1218 newRule->charsnext = *offsetPtr;
1219 *offsetPtr = newRuleOffset;
1223 addPassRule (FileInfo * nested)
1225 TranslationTableOffset *offsetPtr;
1226 switch (newRule->opcode)
1229 offsetPtr = &table->attribOrSwapRules[0];
1232 offsetPtr = &table->attribOrSwapRules[1];
1235 offsetPtr = &table->attribOrSwapRules[2];
1238 offsetPtr = &table->attribOrSwapRules[3];
1241 offsetPtr = &table->attribOrSwapRules[4];
1246 makeRuleChain (offsetPtr);
1253 TranslationTableOpcode opcode,
1254 CharsString * ruleChars,
1255 CharsString * ruleDots,
1256 TranslationTableCharacterAttributes after,
1257 TranslationTableCharacterAttributes before)
1259 /*Add a rule to the table, using the hash function to find the start of
1260 * chains and chaining both the chars and dots strings */
1261 int ruleSize = sizeof (TranslationTableRule) - (DEFAULTRULESIZE * CHARSIZE);
1262 int direction = 0; /*0 = forward translation; 1 = bacward */
1264 ruleSize += CHARSIZE * ruleChars->length;
1266 ruleSize += CHARSIZE * ruleDots->length;
1267 if (!allocateSpaceInTable (nested, &newRuleOffset, ruleSize))
1269 newRule = (TranslationTableRule *) & table->ruleArea[newRuleOffset];
1270 newRule->opcode = opcode;
1271 newRule->after = after;
1272 newRule->before = before;
1274 memcpy (&newRule->charsdots[0], &ruleChars->chars[0],
1275 CHARSIZE * (newRule->charslen = ruleChars->length));
1277 newRule->charslen = 0;
1279 memcpy (&newRule->charsdots[newRule->charslen],
1280 &ruleDots->chars[0], CHARSIZE * (newRule->dotslen =
1283 newRule->dotslen = 0;
1284 if (!charactersDefined (nested))
1287 /*link new rule into table. */
1288 if (opcode == CTO_SwapCc || opcode == CTO_SwapCd || opcode == CTO_SwapDd)
1290 if (opcode >= CTO_Context && opcode <= CTO_Pass4 && newRule->charslen == 0)
1291 return addPassRule (nested);
1292 if (newRule->charslen == 0 || nofor)
1294 while (direction < 2)
1296 if (direction == 0 && newRule->charslen == 1)
1297 add_0_single (nested);
1298 else if (direction == 0 && newRule->charslen > 1)
1300 else if (direction == 1 && newRule->dotslen == 1 && !noback)
1301 add_1_single (nested);
1302 else if (direction == 1 && newRule->dotslen > 1 && !noback)
1308 if (newRule->dotslen == 0)
1314 static const struct CharacterClass *
1315 findCharacterClass (const CharsString * name)
1317 /*Find a character class, whether predefined or user-defined */
1318 const struct CharacterClass *class = characterClasses;
1321 if ((name->length == class->length) &&
1322 (memcmp (&name->chars[0], class->name, CHARSIZE *
1323 name->length) == 0))
1325 class = class->next;
1330 static struct CharacterClass *
1331 addCharacterClass (FileInfo * nested, const widechar * name, int length)
1333 /*Define a character class, Whether predefined or user-defined */
1334 struct CharacterClass *class;
1335 if (characterClassAttribute)
1337 if ((class = malloc (sizeof (*class) + CHARSIZE * (length - 1))))
1339 memset (class, 0, sizeof (*class));
1340 memcpy (class->name, name, CHARSIZE * (class->length = length));
1341 class->attribute = characterClassAttribute;
1342 characterClassAttribute <<= 1;
1343 class->next = characterClasses;
1344 characterClasses = class;
1348 compileError (nested, "character class table overflow.");
1353 deallocateCharacterClasses (void)
1355 while (characterClasses)
1357 struct CharacterClass *class = characterClasses;
1358 characterClasses = characterClasses->next;
1365 allocateCharacterClasses (void)
1367 /*Allocate memory for predifined character classes */
1369 characterClasses = NULL;
1370 characterClassAttribute = 1;
1371 while (characterClassNames[k])
1373 widechar wname[MAXSTRING];
1374 int length = strlen (characterClassNames[k]);
1376 for (kk = 0; kk < length; kk++)
1377 wname[kk] = (widechar) characterClassNames[k][kk];
1378 if (!addCharacterClass (NULL, wname, length))
1380 deallocateCharacterClasses ();
1388 static TranslationTableOpcode
1389 getOpcode (FileInfo * nested, const CharsString * token)
1391 static TranslationTableOpcode lastOpcode = 0;
1392 TranslationTableOpcode opcode = lastOpcode;
1396 if (token->length == opcodeLengths[opcode])
1397 if (eqasc2uni ((unsigned char *) opcodeNames[opcode],
1398 &token->chars[0], token->length))
1400 lastOpcode = opcode;
1404 if (opcode >= CTO_None)
1407 while (opcode != lastOpcode);
1408 compileError (nested, "opcode %s not defined.", showString
1409 (&token->chars[0], token->length));
1413 TranslationTableOpcode
1414 findOpcodeNumber (const char *toFind)
1416 /* Used by tools such as lou_debug */
1417 static TranslationTableOpcode lastOpcode = 0;
1418 TranslationTableOpcode opcode = lastOpcode;
1419 int length = strlen (toFind);
1422 if (length == opcodeLengths[opcode] && strcasecmp (toFind,
1423 opcodeNames[opcode])
1426 lastOpcode = opcode;
1430 if (opcode >= CTO_None)
1433 while (opcode != lastOpcode);
1438 findOpcodeName (TranslationTableOpcode opcode)
1440 /* Used by tools such as lou_debug */
1441 if (opcode < 0 || opcode >= CTO_None)
1443 sprintf (scratchBuf, "%d", opcode);
1446 return opcodeNames[opcode];
1450 hexValue (FileInfo * nested, const widechar * digits, int length)
1453 unsigned int binaryValue = 0;
1454 for (k = 0; k < length; k++)
1456 unsigned int hexDigit = 0;
1457 if (digits[k] >= '0' && digits[k] <= '9')
1458 hexDigit = digits[k] - '0';
1459 else if (digits[k] >= 'a' && digits[k] <= 'f')
1460 hexDigit = digits[k] - 'a' + 10;
1461 else if (digits[k] >= 'A' && digits[k] <= 'F')
1462 hexDigit = digits[k] - 'A' + 10;
1465 compileError (nested, "invalid %d-digit hexadecimal number",
1467 return (widechar) 0xffffffff;
1469 binaryValue |= hexDigit << (4 * (length - 1 - k));
1471 return (widechar) binaryValue;
1475 static int first0Bit[MAXBYTES] = { 0x80, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC, 0XFE };
1478 parseChars (FileInfo * nested, CharsString * result, CharsString * token)
1482 int lastOutSize = 0;
1484 unsigned int ch = 0;
1486 unsigned int utf32 = 0;
1488 while (in < token->length)
1490 ch = token->chars[in++] & 0xff;
1494 { /* escape sequence */
1495 switch (ch = token->chars[in])
1528 if (token->length - in > 4)
1530 ch = hexValue (nested, &token->chars[in + 1], 4);
1539 compileError (nested,
1540 "liblouis has not been compiled for 32-bit Unicode");
1543 if (token->length - in > 5)
1545 ch = hexValue (nested, &token->chars[in + 1], 5);
1553 if (token->length - in > 8)
1555 ch = hexValue (nested, &token->chars[in + 1], 8);
1560 compileError (nested, "invalid escape sequence '\\%c'", ch);
1565 result->chars[out++] = (widechar) ch;
1566 if (out >= MAXSTRING)
1568 result->length = out;
1575 for (numBytes = MAXBYTES - 1; numBytes >= 0; numBytes--)
1576 if (ch >= first0Bit[numBytes])
1580 utf32 = ch & (0XFF - first0Bit[numBytes]);
1581 for (k = 0; k < numBytes; k++)
1583 if (in >= MAXSTRING)
1585 if (token->chars[in] < 128 || (token->chars[in] & 0x0040))
1587 compileWarning (nested, "invalid UTF-8. Assuming Latin-1.");
1588 result->chars[out++] = token->chars[lastIn];
1592 utf32 = (utf32 << 6) + (token->chars[in++] & 0x3f);
1594 if (CHARSIZE == 2 && utf32 > 0xffff)
1596 result->chars[out++] = (widechar) utf32;
1597 if (out >= MAXSTRING)
1599 result->length = lastOutSize;
1603 result->length = out;
1608 extParseChars (const char *inString, widechar * outString)
1610 /* Parse external character strings */
1614 for (k = 0; inString[k] && k < MAXSTRING; k++)
1615 wideIn.chars[k] = inString[k];
1616 wideIn.chars[k] = 0;
1618 parseChars (NULL, &result, &wideIn);
1624 for (k = 0; k < result.length; k++)
1625 outString[k] = result.chars[k];
1627 return result.length;
1631 parseDots (FileInfo * nested, CharsString * cells, const CharsString * token)
1633 /*get dot patterns */
1634 widechar cell = 0; /*assembly place for dots */
1639 for (index = 0; index < token->length; index++)
1641 int started = index != start;
1642 widechar character = token->chars[index];
1644 { /*or dots to make up Braille cell */
1698 if (started && !cell)
1702 compileError (nested, "dot specified more than once.");
1708 case '0': /*blank */
1712 case '-': /*got all dots for this cell */
1715 compileError (nested, "missing cell specification.");
1718 cells->chars[cellCount++] = cell | B16;
1724 compileError (nested, "invalid dot number %s.", showString
1731 compileError (nested, "missing cell specification.");
1734 cells->chars[cellCount++] = cell | B16; /*last cell */
1735 cells->length = cellCount;
1740 extParseDots (const char *inString, widechar * outString)
1742 /* Parse external dot patterns */
1746 for (k = 0; inString[k] && k < MAXSTRING; k++)
1747 wideIn.chars[k] = inString[k];
1748 wideIn.chars[k] = 0;
1750 parseDots (NULL, &result, &wideIn);
1756 for (k = 0; k < result.length; k++)
1757 outString[k] = result.chars[k];
1759 return result.length;
1763 getCharacters (FileInfo * nested, CharsString * characters)
1765 /*Get ruleChars string */
1767 if (getToken (nested, &token, "characters"))
1768 if (parseChars (nested, characters, &token))
1774 getRuleCharsText (FileInfo * nested, CharsString * ruleChars)
1777 if (getToken (nested, &token, "Characters operand"))
1778 if (parseChars (nested, ruleChars, &token))
1784 getRuleDotsText (FileInfo * nested, CharsString * ruleDots)
1787 if (getToken (nested, &token, "characters"))
1788 if (parseChars (nested, ruleDots, &token))
1794 getRuleDotsPattern (FileInfo * nested, CharsString * ruleDots)
1796 /*Interpret the dets operand */
1798 if (getToken (nested, &token, "Dots operand"))
1800 if (token.length == 1 && token.chars[0] == '=')
1802 ruleDots->length = 0;
1805 if (parseDots (nested, ruleDots, &token))
1812 getCharacterClass (FileInfo * nested, const struct CharacterClass **class)
1815 if (getToken (nested, &token, "character class name"))
1817 if ((*class = findCharacterClass (&token)))
1819 compileError (nested, "character class not defined.");
1824 static int compileFile (const char *fileName);
1827 includeFile (FileInfo * nested, CharsString * includedFile)
1829 /*Implement include opcode*/
1831 char includeThis[MAXSTRING];
1832 for (k = 0; k < includedFile->length; k++)
1833 includeThis[k] = (char) includedFile->chars[k];
1835 return compileFile (includeThis);
1840 struct RuleName *next;
1841 TranslationTableOffset ruleOffset;
1845 static struct RuleName *ruleNames = NULL;
1846 static TranslationTableOffset
1847 findRuleName (const CharsString * name)
1849 const struct RuleName *nameRule = ruleNames;
1852 if ((name->length == nameRule->length) &&
1853 (memcmp (&name->chars[0], nameRule->name, CHARSIZE *
1854 name->length) == 0))
1855 return nameRule->ruleOffset;
1856 nameRule = nameRule->next;
1862 addRuleName (FileInfo * nested, CharsString * name)
1865 struct RuleName *nameRule;
1866 if (!(nameRule = malloc (sizeof (*nameRule) + CHARSIZE *
1867 (name->length - 1))))
1869 compileError (nested, "not enough memory");
1872 memset (nameRule, 0, sizeof (*nameRule));
1873 for (k = 0; k < name->length; k++)
1875 TranslationTableCharacter *ch = definedCharOrDots
1876 (nested, name->chars[k],
1878 if (!(ch->attributes & CTC_Letter))
1880 compileError (nested, "a name may contain only letters");
1883 nameRule->name[k] = name->chars[k];
1885 nameRule->length = name->length;
1886 nameRule->ruleOffset = newRuleOffset;
1887 nameRule->next = ruleNames;
1888 ruleNames = nameRule;
1893 deallocateRuleNames (void)
1897 struct RuleName *nameRule = ruleNames;
1898 ruleNames = ruleNames->next;
1905 compileSwapDots (FileInfo * nested, CharsString * source, CharsString * dest)
1909 CharsString dotsSource;
1910 CharsString dotsDest;
1912 dotsSource.length = 0;
1913 while (k <= source->length)
1915 if (source->chars[k] != ',' && k != source->length)
1916 dotsSource.chars[dotsSource.length++] = source->chars[k];
1919 if (!parseDots (nested, &dotsDest, &dotsSource))
1921 dest->chars[dest->length++] = dotsDest.length + 1;
1922 for (kk = 0; kk < dotsDest.length; kk++)
1923 dest->chars[dest->length++] = dotsDest.chars[kk];
1924 dotsSource.length = 0;
1932 compileSwap (FileInfo * nested, TranslationTableOpcode opcode)
1934 CharsString ruleChars;
1935 CharsString ruleDots;
1937 CharsString matches;
1938 CharsString replacements;
1939 if (!getToken (nested, &name, "name operand"))
1941 if (!getToken (nested, &matches, "matches operand"))
1943 if (!getToken (nested, &replacements, "replacements operand"))
1945 if (opcode == CTO_SwapCc || opcode == CTO_SwapCd)
1947 if (!parseChars (nested, &ruleChars, &matches))
1952 if (!compileSwapDots (nested, &matches, &ruleChars))
1955 if (opcode == CTO_SwapCc)
1957 if (!parseChars (nested, &ruleDots, &replacements))
1962 if (!compileSwapDots (nested, &replacements, &ruleDots))
1965 if (!addRule (nested, opcode, &ruleChars, &ruleDots, 0, 0))
1967 if (!addRuleName (nested, &name))
1974 getNumber (widechar * source, widechar * dest)
1976 /*Convert a string of wide character digits to an integer*/
1979 while (source[k] >= '0' && source[k] <= '9')
1980 *dest = 10 * *dest + (source[k++] - '0');
1984 /* Start of multipass compiler*/
1985 static CharsString passRuleChars;
1986 static CharsString passRuleDots;
1987 static CharsString passHoldString;
1988 static CharsString passLine;
1989 static int passLinepos;
1990 static int passPrevLinepos;
1991 static widechar passHoldNumber;
1992 static widechar passEmphasis;
1993 static TranslationTableCharacterAttributes passAttributes;
1994 static FileInfo *passNested;
1995 static TranslationTableOpcode passOpcode;
1996 static widechar *passInstructions;
2000 passGetAttributes ()
2006 switch (passLine.chars[passLinepos])
2009 passAttributes = 0xffffffff;
2012 passAttributes |= CTC_Digit;
2015 passAttributes |= CTC_LitDigit;
2018 passAttributes |= CTC_Letter;
2021 passAttributes |= CTC_Math;
2023 case pass_punctuation:
2024 passAttributes |= CTC_Punctuation;
2027 passAttributes |= CTC_Sign;
2030 passAttributes |= CTC_Space;
2032 case pass_uppercase:
2033 passAttributes |= CTC_UpperCase;
2035 case pass_lowercase:
2036 passAttributes |= CTC_LowerCase;
2039 passAttributes |= CTC_Class1;
2042 passAttributes |= CTC_Class2;
2045 passAttributes |= CTC_Class3;
2048 passAttributes |= CTC_Class4;
2057 if (!passAttributes)
2059 compileError (passNested, "Missing attribute");
2074 switch (passLine.chars[passLinepos])
2077 passEmphasis |= italic;
2080 passEmphasis |= bold;
2083 passEmphasis |= underline;
2086 passEmphasis |= computer_braille;
2097 compileError (passNested, "emphasis indicators expected");
2107 CharsString collectDots;
2108 collectDots.length = 0;
2109 while (passLinepos < passLine.length && (passLine.chars[passLinepos]
2111 || (passLine.chars[passLinepos] >=
2114 chars[passLinepos] <= '9')
2117 chars[passLinepos] | 32) >= 'a'
2119 chars[passLinepos] | 32) <=
2121 collectDots.chars[collectDots.length++] = passLine.chars[passLinepos++];
2122 if (!parseDots (passNested, &passHoldString, &collectDots))
2130 passHoldString.length = 0;
2133 if (!passLine.chars[passLinepos])
2135 compileError (passNested, "unterminated string");
2138 if (passLine.chars[passLinepos] == 34)
2140 if (passLine.chars[passLinepos] == QUOTESUB)
2141 passHoldString.chars[passHoldString.length++] = 34;
2143 passHoldString.chars[passHoldString.length++] =
2144 passLine.chars[passLinepos];
2147 passHoldString.chars[passHoldString.length] = 0;
2155 /*Convert a string of wide character digits to an integer */
2157 while (passLine.chars[passLinepos] >= '0'
2158 && passLine.chars[passLinepos] <= '9')
2160 10 * passHoldNumber + (passLine.chars[passLinepos++] - '0');
2167 TranslationTableCharacterAttributes attr;
2168 passHoldString.length = 0;
2171 attr = definedCharOrDots (passNested, passLine.chars[passLinepos],
2173 if (passHoldString.length == 0)
2175 if (!(attr & CTC_Letter))
2181 if (!(attr & CTC_Letter))
2183 passHoldString.chars[passHoldString.length++] =
2184 passLine.chars[passLinepos];
2187 while (passLinepos < passLine.length);
2192 passIsKeyword (const char *token)
2195 int length = strlen (token);
2196 int ch = passLine.chars[passLinepos + length + 1];
2197 if (((ch | 32) >= 'a' && (ch | 32) <= 'z') || (ch >= '0' && ch <= '9'))
2199 for (k = 0; k < length && passLine.chars[passLinepos + k + 1]
2200 == (widechar) token[k]; k++);
2203 passLinepos += length + 1;
2211 struct PassName *next;
2216 static struct PassName *passNames = NULL;
2219 passFindName (const CharsString * name)
2221 const struct PassName *curname = passNames;
2222 CharsString augmentedName;
2223 for (augmentedName.length = 0; augmentedName.length < name->length;
2224 augmentedName.length++)
2225 augmentedName.chars[augmentedName.length] =
2226 name->chars[augmentedName.length];
2227 augmentedName.chars[augmentedName.length++] = passOpcode;
2230 if ((augmentedName.length == curname->length) &&
2232 (&augmentedName.chars[0], curname->name,
2233 CHARSIZE * name->length) == 0))
2234 return curname->varnum;
2235 curname = curname->next;
2237 compileError (passNested, "name not found");
2242 passAddName (CharsString * name, int var)
2245 struct PassName *curname;
2246 CharsString augmentedName;
2247 for (augmentedName.length = 0;
2248 augmentedName.length < name->length; augmentedName.length++)
2250 chars[augmentedName.length] = name->chars[augmentedName.length];
2251 augmentedName.chars[augmentedName.length++] = passOpcode;
2254 malloc (sizeof (*curname) + CHARSIZE * (augmentedName.length - 1))))
2256 compileError (passNested, "not enough memory");
2259 memset (curname, 0, sizeof (*curname));
2260 for (k = 0; k < augmentedName.length; k++)
2262 curname->name[k] = augmentedName.chars[k];
2264 curname->length = augmentedName.length;
2265 curname->varnum = var;
2266 curname->next = passNames;
2267 passNames = curname;
2272 passGetScriptToken ()
2274 while (passLinepos < passLine.length)
2276 passPrevLinepos = passLinepos;
2277 switch (passLine.chars[passLinepos])
2281 if (passGetString ())
2283 return pass_invalidToken;
2288 return pass_invalidToken;
2289 case '#': /*comment */
2290 passLinepos = passLine.length + 1;
2291 return pass_noMoreTokens;
2293 if (passLine.chars[passLinepos + 1] == '=')
2308 if (passLine.chars[passLinepos] == '=')
2316 if (passLine.chars[passLinepos] == '=')
2327 return pass_leftParen;
2330 return pass_rightParen;
2335 if (passLine.chars[passLinepos = 1] == '&')
2340 return pass_invalidToken;
2342 if (passLine.chars[passLinepos + 1] == '|')
2347 return pass_invalidToken;
2349 if (passIsKeyword ("ttr"))
2350 return pass_attributes;
2352 return pass_nameFound;
2354 if (passIsKeyword ("ack"))
2355 return pass_lookback;
2356 if (passIsKeyword ("ool"))
2357 return pass_boolean;
2359 return pass_nameFound;
2361 if (passIsKeyword ("lass"))
2364 return pass_nameFound;
2366 if (passIsKeyword ("ef"))
2369 return pass_nameFound;
2371 if (passIsKeyword ("mph"))
2372 return pass_emphasis;
2374 return pass_nameFound;
2376 if (passIsKeyword ("ind"))
2378 if (passIsKeyword ("irst"))
2381 return pass_nameFound;
2383 if (passIsKeyword ("roup"))
2386 return pass_nameFound;
2388 if (passIsKeyword ("f"))
2391 return pass_nameFound;
2393 if (passIsKeyword ("ast"))
2396 return pass_nameFound;
2398 if (passIsKeyword ("ark"))
2401 return pass_nameFound;
2403 if (passIsKeyword ("epgroup"))
2404 return pass_repGroup;
2405 if (passIsKeyword ("epcopy"))
2407 if (passIsKeyword ("epomit"))
2409 if (passIsKeyword ("ep"))
2410 return pass_replace;
2412 return pass_nameFound;
2414 if (passIsKeyword ("cript"))
2416 if (passIsKeyword ("wap"))
2419 return pass_nameFound;
2421 if (passIsKeyword ("hen"))
2424 return pass_nameFound;
2426 if (passLine.chars[passLinepos] <= 32)
2431 if (passLine.chars[passLinepos] >= '0'
2432 && passLine.chars[passLinepos] <= '9')
2435 return pass_numberFound;
2439 if (!passGetName ())
2440 return pass_invalidToken;
2442 return pass_nameFound;
2446 return pass_noMoreTokens;
2452 pass_Codes passCode = passGetScriptToken ();
2453 if (passCode != pass_leftParen)
2455 compileError (passNested, "'(' expected");
2464 pass_Codes passCode = passGetScriptToken ();
2465 if (passCode != pass_nameFound)
2467 compileError (passNested, "a name expected");
2476 pass_Codes passCode = passGetScriptToken ();
2477 if (passCode != pass_comma)
2479 compileError (passNested, "',' expected");
2488 pass_Codes passCode = passGetScriptToken ();
2489 if (passCode != pass_numberFound)
2491 compileError (passNested, "a number expected");
2500 pass_Codes passCode = passGetScriptToken ();
2501 if (passCode != pass_rightParen)
2503 compileError (passNested, "')' expected");
2512 pass_Codes passCode = passGetScriptToken ();
2513 if (!(passCode == pass_comma || passCode == pass_rightParen))
2515 compileError (passNested, "invalid range");
2518 if (passCode == pass_rightParen)
2520 passInstructions[passIC++] = 1;
2521 passInstructions[passIC++] = 1;
2524 if (!passIsNumber ())
2526 passInstructions[passIC++] = passHoldNumber;
2527 passCode = passGetScriptToken ();
2528 if (!(passCode == pass_comma || passCode == pass_rightParen))
2530 compileError (passNested, "invalid range");
2533 if (passCode == pass_rightParen)
2535 passInstructions[passIC++] = passHoldNumber;
2538 if (!passIsNumber ())
2540 passInstructions[passIC++] = passHoldNumber;
2541 if (!passIsRightParen ())
2547 passInsertAttributes ()
2549 passInstructions[passIC++] = pass_attributes;
2550 passInstructions[passIC++] = passAttributes >> 16;
2551 passInstructions[passIC++] = passAttributes & 0xffff;
2552 if (!passGetRange ())
2558 compilePassOpcode (FileInfo * nested, TranslationTableOpcode opcode)
2560 /*Compile the operands of a pass opcode */
2561 TranslationTableCharacterAttributes after = 0;
2562 TranslationTableCharacterAttributes before = 0;
2564 const struct CharacterClass *class;
2565 TranslationTableOffset ruleOffset = 0;
2566 TranslationTableRule *rule = NULL;
2569 pass_Codes passCode;
2572 passInstructions = passRuleDots.chars;
2573 passIC = 0; /*Instruction counter */
2574 passRuleChars.length = 0;
2575 passNested = nested;
2576 passOpcode = opcode;
2577 /* passHoldString and passLine are static variables declared
2580 passHoldString.length = 0;
2581 for (k = nested->linepos; k < nested->linelen; k++)
2582 passHoldString.chars[passHoldString.length++] = nested->line[k];
2583 if (!eqasc2uni ((unsigned char *) "script", passHoldString.chars, 6))
2586 #define SEPCHAR 0x0001
2587 for (k = 0; k < passHoldString.length && passHoldString.chars[k] > 32;
2589 if (k < passHoldString.length)
2590 passHoldString.chars[k] = SEPCHAR;
2593 compileError (passNested, "Invalid multipass operands");
2597 parseChars (passNested, &passLine, &passHoldString);
2601 passCode = passGetScriptToken ();
2602 if (passCode != pass_script)
2604 compileError (passNested, "Invalid multipass statement");
2610 passCode = passGetScriptToken ();
2614 if (!passIsLeftParen ())
2618 if (!passIsComma ())
2620 if (!passIsNumber ())
2622 if (!passIsRightParen ())
2624 passAddName (&passHoldString, passHoldNumber);
2630 compileError (passNested,
2631 "invalid definition in declarative part");
2639 passCode = passGetScriptToken ();
2640 passSubOp = passCode;
2644 passInstructions[passIC++] = pass_not;
2647 passInstructions[passIC++] = pass_first;
2650 passInstructions[passIC++] = pass_last;
2653 passInstructions[passIC++] = pass_search;
2656 if (opcode != CTO_Context && opcode != CTO_Correct)
2658 compileError (passNested,
2659 "Character strings can only be used with the context and correct opcodes.");
2662 passInstructions[passIC++] = pass_string;
2665 if (passOpcode == CTO_Correct || passOpcode == CTO_Context)
2667 compileError (passNested,
2668 "dot patterns cannot be specified in the if part\
2669 of the correct or context opcodes");
2672 passInstructions[passIC++] = pass_dots;
2674 passInstructions[passIC++] = passHoldString.length;
2675 for (kk = 0; kk < passHoldString.length; kk++)
2676 passInstructions[passIC++] = passHoldString.chars[kk];
2678 case pass_attributes:
2679 if (!passIsLeftParen ())
2681 if (!passGetAttributes ())
2683 if (!passInsertAttributes ())
2687 if (!passIsLeftParen ())
2689 if (!passGetEmphasis ())
2691 /*Right parenthis handled by subfunctiion */
2694 passInstructions[passIC++] = pass_lookback;
2695 passCode = passGetScriptToken ();
2696 if (passCode != pass_leftParen)
2698 passInstructions[passIC++] = 1;
2699 passLinepos = passPrevLinepos;
2702 if (!passIsNumber ())
2704 if (!passIsRightParen ())
2706 passInstructions[passIC] = passHoldNumber;
2709 if (!passIsLeftParen ())
2713 passInstructions[passIC++] = pass_startReplace;
2714 passInstructions[passIC++] = pass_endReplace;
2717 passInstructions[passIC++] = pass_startReplace;
2718 if (!passIsLeftParen ())
2721 case pass_rightParen:
2722 passInstructions[passIC++] = pass_endReplace;
2724 case pass_groupstart:
2726 if (!passIsLeftParen ())
2728 if (!passGetName ())
2730 if (!passIsRightParen ())
2732 ruleOffset = findRuleName (&passHoldString);
2734 rule = (TranslationTableRule *) & table->ruleArea[ruleOffset];
2735 if (rule && rule->opcode == CTO_Grouping)
2737 passInstructions[passIC++] = passSubOp;
2738 passInstructions[passIC++] = ruleOffset >> 16;
2739 passInstructions[passIC++] = ruleOffset & 0xffff;
2744 compileError (passNested, "%s is not a grouping name",
2745 showString (&passHoldString.chars[0],
2746 passHoldString.length));
2751 if (!passIsLeftParen ())
2753 if (!passGetName ())
2755 if (!passIsRightParen ())
2757 if (!(class = findCharacterClass (&passHoldString)))
2759 passAttributes = class->attribute;
2760 passInsertAttributes ();
2763 ruleOffset = findRuleName (&passHoldString);
2764 if (!passIsLeftParen ())
2766 if (!passGetName ())
2768 if (!passIsRightParen ())
2770 ruleOffset = findRuleName (&passHoldString);
2772 rule = (TranslationTableRule *) & table->ruleArea[ruleOffset];
2774 && (rule->opcode == CTO_SwapCc || rule->opcode == CTO_SwapCd
2775 || rule->opcode == CTO_SwapDd))
2777 passInstructions[passIC++] = pass_swap;
2778 passInstructions[passIC++] = ruleOffset >> 16;
2779 passInstructions[passIC++] = ruleOffset & 0xffff;
2780 if (!passGetRange ())
2784 compileError (passNested,
2785 "%s is not a swap name.",
2786 showString (&passHoldString.chars[0],
2787 passHoldString.length));
2789 case pass_nameFound:
2790 passHoldNumber = passFindName (&passHoldString);
2791 passCode = passGetScriptToken ();
2792 if (!(passCode == pass_eq || passCode == pass_lt || passCode
2793 == pass_gt || passCode == pass_noteq || passCode ==
2794 pass_lteq || passCode == pass_gteq))
2796 compileError (nested,
2797 "invalid comparison operator in if part");
2800 passInstructions[passIC++] = passCode;
2801 passInstructions[passIC++] = passHoldNumber;
2802 if (!passIsNumber ())
2804 passInstructions[passIC++] = passHoldNumber;
2807 passInstructions[passIC++] = pass_endTest;
2811 compileError (passNested, "invalid choice in if part");
2820 passCode = passGetScriptToken ();
2821 passSubOp = passCode;
2825 if (opcode != CTO_Correct)
2827 compileError (passNested,
2828 "Character strings can only be used in the then part with the correct opcode.");
2831 passInstructions[passIC++] = pass_string;
2832 goto thenDoCharsDots;
2834 if (opcode == CTO_Correct)
2836 compileError (passNested,
2837 "Dot patterns cannot be used with the correct opcode.");
2840 passInstructions[passIC++] = pass_dots;
2842 passInstructions[passIC++] = passHoldString.length;
2843 for (kk = 0; kk < passHoldString.length; kk++)
2844 passInstructions[passIC++] = passHoldString.chars[kk];
2846 case pass_nameFound:
2847 passHoldNumber = passFindName (&passHoldString);
2848 passCode = passGetScriptToken ();
2849 if (!(passCode == pass_plus || passCode == pass_hyphen
2850 || passCode == pass_eq))
2852 compileError (nested,
2853 "Invalid variable operator in then part");
2856 passInstructions[passIC++] = passCode;
2857 passInstructions[passIC++] = passHoldNumber;
2858 if (!passIsNumber ())
2860 passInstructions[passIC++] = passHoldNumber;
2863 passInstructions[passIC++] = pass_copy;
2866 passInstructions[passIC++] = pass_omit;
2869 ruleOffset = findRuleName (&passHoldString);
2870 if (!passIsLeftParen ())
2872 if (!passGetName ())
2874 if (!passIsRightParen ())
2876 ruleOffset = findRuleName (&passHoldString);
2878 rule = (TranslationTableRule *) & table->ruleArea[ruleOffset];
2880 && (rule->opcode == CTO_SwapCc || rule->opcode == CTO_SwapCd
2881 || rule->opcode == CTO_SwapDd))
2883 passInstructions[passIC++] = pass_swap;
2884 passInstructions[passIC++] = ruleOffset >> 16;
2885 passInstructions[passIC++] = ruleOffset & 0xffff;
2886 if (!passGetRange ())
2890 compileError (passNested,
2891 "%s is not a swap name.",
2892 showString (&passHoldString.chars[0],
2893 passHoldString.length));
2895 case pass_noMoreTokens:
2899 compileError (passNested, "invalid action in then part");
2906 /* Older machine-language-like "assembler". */
2908 /*Compile test part */
2909 for (k = 0; k < passLine.length && passLine.chars[k] != SEPCHAR; k++);
2911 passLine.chars[endTest] = pass_endTest;
2913 while (passLinepos <= endTest)
2915 switch ((passSubOp = passLine.chars[passLinepos]))
2918 passInstructions[passIC++] = pass_lookback;
2921 if (passHoldNumber == 0)
2923 passInstructions[passIC++] = passHoldNumber;
2926 passInstructions[passIC++] = pass_not;
2930 passInstructions[passIC++] = pass_first;
2934 passInstructions[passIC++] = pass_last;
2938 passInstructions[passIC++] = pass_search;
2942 if (opcode != CTO_Context && opcode != CTO_Correct)
2944 compileError (passNested,
2945 "Character strings can only be used with the context and correct opcodes.");
2949 passInstructions[passIC++] = pass_string;
2951 goto testDoCharsDots;
2954 passInstructions[passIC++] = pass_dots;
2957 if (passHoldString.length == 0)
2959 passInstructions[passIC++] = passHoldString.length;
2960 for (kk = 0; kk < passHoldString.length; kk++)
2961 passInstructions[passIC++] = passHoldString.chars[kk];
2963 case pass_startReplace:
2964 passInstructions[passIC++] = pass_startReplace;
2967 case pass_endReplace:
2968 passInstructions[passIC++] = pass_endReplace;
2974 switch (passLine.chars[passLinepos])
2977 passInstructions[passIC++] = pass_eq;
2980 if (passLine.chars[passLinepos + 1] == pass_eq)
2983 passInstructions[passIC++] = pass_lteq;
2986 passInstructions[passIC++] = pass_lt;
2989 if (passLine.chars[passLinepos + 1] == pass_eq)
2992 passInstructions[passIC++] = pass_gteq;
2995 passInstructions[passIC++] = pass_gt;
2997 passInstructions[passIC++] = passHoldNumber;
3000 passInstructions[passIC++] = passHoldNumber;
3003 compileError (passNested, "incorrect comparison operator");
3007 case pass_attributes:
3009 passGetAttributes ();
3011 passInstructions[passIC++] = pass_attributes;
3012 passInstructions[passIC++] = passAttributes >> 16;
3013 passInstructions[passIC++] = passAttributes & 0xffff;
3015 if (passLine.chars[passLinepos] == pass_until)
3018 passInstructions[passIC++] = 1;
3019 passInstructions[passIC++] = 0xffff;
3023 if (passHoldNumber == 0)
3025 passHoldNumber = passInstructions[passIC++] = 1;
3026 passInstructions[passIC++] = 1; /*This is not an error */
3029 passInstructions[passIC++] = passHoldNumber;
3030 if (passLine.chars[passLinepos] != pass_hyphen)
3032 passInstructions[passIC++] = passHoldNumber;
3037 if (passHoldNumber == 0)
3039 compileError (passNested, "invalid range");
3042 passInstructions[passIC++] = passHoldNumber;
3044 case pass_groupstart:
3048 ruleOffset = findRuleName (&passHoldString);
3050 rule = (TranslationTableRule *) & table->ruleArea[ruleOffset];
3051 if (rule && rule->opcode == CTO_Grouping)
3053 passInstructions[passIC++] = passSubOp;
3054 passInstructions[passIC++] = ruleOffset >> 16;
3055 passInstructions[passIC++] = ruleOffset & 0xffff;
3060 compileError (passNested, "%s is not a grouping name",
3061 showString (&passHoldString.chars[0],
3062 passHoldString.length));
3068 if ((class = findCharacterClass (&passHoldString)))
3070 passAttributes = class->attribute;
3071 goto insertAttributes;
3073 ruleOffset = findRuleName (&passHoldString);
3075 rule = (TranslationTableRule *) & table->ruleArea[ruleOffset];
3077 && (rule->opcode == CTO_SwapCc || rule->opcode == CTO_SwapCd
3078 || rule->opcode == CTO_SwapDd))
3080 passInstructions[passIC++] = pass_swap;
3081 passInstructions[passIC++] = ruleOffset >> 16;
3082 passInstructions[passIC++] = ruleOffset & 0xffff;
3085 compileError (passNested,
3086 "%s is neither a class name nor a swap name.",
3087 showString (&passHoldString.chars[0],
3088 passHoldString.length));
3091 passInstructions[passIC++] = pass_endTest;
3095 compileError (passNested,
3096 "incorrect operator '%c ' in test part",
3097 passLine.chars[passLinepos]);
3101 } /*Compile action part */
3103 /* Compile action part */
3104 while (passLinepos < passLine.length &&
3105 passLine.chars[passLinepos] <= 32)
3107 while (passLinepos < passLine.length &&
3108 passLine.chars[passLinepos] > 32)
3110 switch ((passSubOp = passLine.chars[passLinepos]))
3113 if (opcode != CTO_Correct)
3115 compileError (passNested,
3116 "Character strings can only be used with the ccorrect opcode.");
3120 passInstructions[passIC++] = pass_string;
3122 goto actionDoCharsDots;
3124 if (opcode == CTO_Correct)
3126 compileError (passNested,
3127 "Dot patterns cannot be used with the correct opcode.");
3132 passInstructions[passIC++] = pass_dots;
3134 if (passHoldString.length == 0)
3136 passInstructions[passIC++] = passHoldString.length;
3137 for (kk = 0; kk < passHoldString.length; kk++)
3138 passInstructions[passIC++] = passHoldString.chars[kk];
3143 switch (passLine.chars[passLinepos])
3146 passInstructions[passIC++] = pass_eq;
3147 passInstructions[passIC++] = passHoldNumber;
3150 passInstructions[passIC++] = passHoldNumber;
3154 passInstructions[passIC++] = passLine.chars[passLinepos];
3155 passInstructions[passIC++] = passHoldNumber;
3158 compileError (passNested,
3159 "incorrect variable operator in action part");
3164 passInstructions[passIC++] = pass_copy;
3168 passInstructions[passIC++] = pass_omit;
3171 case pass_groupreplace:
3172 case pass_groupstart:
3176 ruleOffset = findRuleName (&passHoldString);
3178 rule = (TranslationTableRule *) & table->ruleArea[ruleOffset];
3179 if (rule && rule->opcode == CTO_Grouping)
3181 passInstructions[passIC++] = passSubOp;
3182 passInstructions[passIC++] = ruleOffset >> 16;
3183 passInstructions[passIC++] = ruleOffset & 0xffff;
3186 compileError (passNested, "%s is not a grouping name",
3187 showString (&passHoldString.chars[0],
3188 passHoldString.length));
3193 ruleOffset = findRuleName (&passHoldString);
3195 rule = (TranslationTableRule *) & table->ruleArea[ruleOffset];
3197 && (rule->opcode == CTO_SwapCc || rule->opcode == CTO_SwapCd
3198 || rule->opcode == CTO_SwapDd))
3200 passInstructions[passIC++] = pass_swap;
3201 passInstructions[passIC++] = ruleOffset >> 16;
3202 passInstructions[passIC++] = ruleOffset & 0xffff;
3205 compileError (passNested, "%s is not a swap name.",
3206 showString (&passHoldString.chars[0],
3207 passHoldString.length));
3211 compileError (passNested, "incorrect operator in action part");
3217 /*Analyze and add rule */
3218 passRuleDots.length = passIC;
3220 while (passIC < passRuleDots.length)
3223 switch (passInstructions[passIC])
3227 case pass_attributes:
3231 case pass_groupstart:
3246 case pass_startReplace:
3247 case pass_endReplace:
3252 compileError (passNested,
3253 "Test/if part must contain characters, dots, attributes or class \
3261 switch (passInstructions[passIC])
3265 for (k = 0; k < passInstructions[passIC + 1]; k++)
3266 passRuleChars.chars[k] = passInstructions[passIC + 2 + k];
3267 passRuleChars.length = k;
3270 case pass_attributes:
3271 case pass_groupstart:
3274 after = passRuleDots.length;
3280 if (!addRule (passNested, opcode, &passRuleChars, &passRuleDots,
3286 /* End of multipass compiler */
3289 compileBrailleIndicator (FileInfo * nested, char *ermsg,
3290 TranslationTableOpcode opcode,
3291 TranslationTableOffset * rule)
3295 if (getToken (nested, &token, ermsg))
3296 if (parseDots (nested, &cells, &token))
3297 if (!addRule (nested, opcode, NULL, &cells, 0, 0))
3299 *rule = newRuleOffset;
3304 compileNumber (FileInfo * nested)
3308 if (!getToken (nested, &token, "number"))
3310 getNumber (&token.chars[0], &dest);
3313 compileError (nested, "a nonzero positive number is required");
3320 compileGrouping (FileInfo * nested)
3324 CharsString groupChars;
3325 CharsString groupDots;
3326 CharsString dotsParsed;
3327 TranslationTableCharacter *charsDotsPtr;
3330 if (!getToken (nested, &name, "name operand"))
3332 if (!getRuleCharsText (nested, &groupChars))
3334 if (!getToken (nested, &groupDots, "dots operand"))
3336 for (k = 0; k < groupDots.length && groupDots.chars[k] != ','; k++);
3337 if (k == groupDots.length)
3339 compileError (nested,
3340 "Dots operand must consist of two cells separated by a comma");
3343 groupDots.chars[k] = '-';
3344 if (!parseDots (nested, &dotsParsed, &groupDots))
3346 if (groupChars.length != 2 || dotsParsed.length != 2)
3348 compileError (nested,
3349 "two Unicode characters and two cells separated by a comma are needed.");
3352 charsDotsPtr = addCharOrDots (nested, groupChars.chars[0], 0);
3353 charsDotsPtr->attributes |= CTC_Math;
3354 charsDotsPtr->uppercase = charsDotsPtr->realchar;
3355 charsDotsPtr->lowercase = charsDotsPtr->realchar;
3356 charsDotsPtr = addCharOrDots (nested, groupChars.chars[1], 0);
3357 charsDotsPtr->attributes |= CTC_Math;
3358 charsDotsPtr->uppercase = charsDotsPtr->realchar;
3359 charsDotsPtr->lowercase = charsDotsPtr->realchar;
3360 charsDotsPtr = addCharOrDots (nested, dotsParsed.chars[0], 1);
3361 charsDotsPtr->attributes |= CTC_Math;
3362 charsDotsPtr->uppercase = charsDotsPtr->realchar;
3363 charsDotsPtr->lowercase = charsDotsPtr->realchar;
3364 charsDotsPtr = addCharOrDots (nested, dotsParsed.chars[1], 1);
3365 charsDotsPtr->attributes |= CTC_Math;
3366 charsDotsPtr->uppercase = charsDotsPtr->realchar;
3367 charsDotsPtr->lowercase = charsDotsPtr->realchar;
3368 if (!addRule (nested, CTO_Grouping, &groupChars, &dotsParsed, 0, 0))
3370 if (!addRuleName (nested, &name))
3372 putCharAndDots (nested, groupChars.chars[0], dotsParsed.chars[0]);
3373 putCharAndDots (nested, groupChars.chars[1], dotsParsed.chars[1]);
3374 endChar = groupChars.chars[1];
3375 endDots = dotsParsed.chars[1];
3376 groupChars.length = dotsParsed.length = 1;
3377 if (!addRule (nested, CTO_Math, &groupChars, &dotsParsed, 0, 0))
3379 groupChars.chars[0] = endChar;
3380 dotsParsed.chars[0] = endDots;
3381 if (!addRule (nested, CTO_Math, &groupChars, &dotsParsed, 0, 0))
3387 compileUplow (FileInfo * nested)
3390 TranslationTableCharacter *upperChar;
3391 TranslationTableCharacter *lowerChar;
3392 TranslationTableCharacter *upperCell = NULL;
3393 TranslationTableCharacter *lowerCell = NULL;
3394 CharsString ruleChars;
3395 CharsString ruleDots;
3396 CharsString upperDots;
3397 CharsString lowerDots;
3398 int haveLowerDots = 0;
3399 TranslationTableCharacterAttributes attr;
3400 if (!getRuleCharsText (nested, &ruleChars))
3402 if (!getToken (nested, &ruleDots, "dots operand"))
3404 for (k = 0; k < ruleDots.length && ruleDots.chars[k] != ','; k++);
3405 if (k == ruleDots.length)
3407 if (!parseDots (nested, &upperDots, &ruleDots))
3409 lowerDots.length = upperDots.length;
3410 for (k = 0; k < upperDots.length; k++)
3411 lowerDots.chars[k] = upperDots.chars[k];
3412 lowerDots.chars[k] = 0;
3416 haveLowerDots = ruleDots.length;
3417 ruleDots.length = k;
3418 if (!parseDots (nested, &upperDots, &ruleDots))
3420 ruleDots.length = 0;
3422 for (; k < haveLowerDots; k++)
3423 ruleDots.chars[ruleDots.length++] = ruleDots.chars[k];
3424 if (!parseDots (nested, &lowerDots, &ruleDots))
3427 if (ruleChars.length != 2 || upperDots.length < 1)
3429 compileError (nested,
3430 "Exactly two Unicode characters and at least one cell are required.");
3433 if (haveLowerDots && lowerDots.length < 1)
3435 compileError (nested, "at least one cell is required after the comma.");
3438 upperChar = addCharOrDots (nested, ruleChars.chars[0], 0);
3439 upperChar->attributes |= CTC_Letter | CTC_UpperCase;
3440 upperChar->uppercase = ruleChars.chars[0];
3441 upperChar->lowercase = ruleChars.chars[1];
3442 lowerChar = addCharOrDots (nested, ruleChars.chars[1], 0);
3443 lowerChar->attributes |= CTC_Letter | CTC_LowerCase;
3444 lowerChar->uppercase = ruleChars.chars[0];
3445 lowerChar->lowercase = ruleChars.chars[1];
3446 for (k = 0; k < upperDots.length; k++)
3447 if (!compile_findCharOrDots (upperDots.chars[k], 1))
3449 attr = CTC_Letter | CTC_UpperCase;
3450 upperCell = addCharOrDots (nested, upperDots.chars[k], 1);
3451 if (upperDots.length != 1)
3453 upperCell->attributes |= attr;
3454 upperCell->uppercase = upperCell->realchar;
3458 for (k = 0; k < lowerDots.length; k++)
3459 if (!compile_findCharOrDots (lowerDots.chars[k], 1))
3461 attr = CTC_Letter | CTC_LowerCase;
3462 lowerCell = addCharOrDots (nested, lowerDots.chars[k], 1);
3463 if (lowerDots.length != 1)
3465 lowerCell->attributes |= attr;
3466 lowerCell->lowercase = lowerCell->realchar;
3469 else if (upperCell != NULL && upperDots.length == 1)
3470 upperCell->attributes |= CTC_LowerCase;
3471 if (lowerDots.length == 1)
3472 putCharAndDots (nested, ruleChars.chars[1], lowerDots.chars[0]);
3473 if (upperCell != NULL)
3474 upperCell->lowercase = lowerDots.chars[0];
3475 if (lowerCell != NULL)
3476 lowerCell->uppercase = upperDots.chars[0];
3477 if (upperDots.length == 1)
3478 putCharAndDots (nested, ruleChars.chars[0], upperDots.chars[0]);
3479 ruleChars.length = 1;
3480 ruleChars.chars[2] = ruleChars.chars[0];
3481 ruleChars.chars[0] = ruleChars.chars[1];
3482 if (!addRule (nested, CTO_LowerCase, &ruleChars, &lowerDots, 0, 0))
3484 ruleChars.chars[0] = ruleChars.chars[2];
3485 if (!addRule (nested, CTO_UpperCase, &ruleChars, &upperDots, 0, 0))
3490 /*Functions for compiling hyphenation tables*/
3492 typedef struct /*hyphenation dictionary: finite state machine */
3495 HyphenationState *states;
3498 #define DEFAULTSTATE 0xffff
3499 #define HYPHENHASHSIZE 8191
3510 HyphenHashEntry *entries[HYPHENHASHSIZE];
3513 /* a hash function from ASU - adapted from Gtk+ */
3515 hyphenStringHash (const CharsString * s)
3518 unsigned int h = 0, g;
3519 for (k = 0; k < s->length; k++)
3521 h = (h << 4) + s->chars[k];
3522 if ((g = h & 0xf0000000))
3531 static HyphenHashTab *
3532 hyphenHashNew (void)
3534 HyphenHashTab *hashTab;
3535 hashTab = malloc (sizeof (HyphenHashTab));
3536 memset (hashTab, 0, sizeof (HyphenHashTab));
3541 hyphenHashFree (HyphenHashTab * hashTab)
3544 HyphenHashEntry *e, *next;
3545 for (i = 0; i < HYPHENHASHSIZE; i++)
3546 for (e = hashTab->entries[i]; e; e = next)
3555 /* assumes that key is not already present! */
3557 hyphenHashInsert (HyphenHashTab * hashTab, const CharsString * key, int val)
3561 i = hyphenStringHash (key) % HYPHENHASHSIZE;
3562 e = malloc (sizeof (HyphenHashEntry));
3563 e->next = hashTab->entries[i];
3564 e->key = malloc ((key->length + 1) * CHARSIZE);
3565 e->key->length = key->length;
3566 for (j = 0; j < key->length; j++)
3567 e->key->chars[j] = key->chars[j];
3569 hashTab->entries[i] = e;
3572 /* return val if found, otherwise DEFAULTSTATE */
3574 hyphenHashLookup (HyphenHashTab * hashTab, const CharsString * key)
3578 if (key->length == 0)
3580 i = hyphenStringHash (key) % HYPHENHASHSIZE;
3581 for (e = hashTab->entries[i]; e; e = e->next)
3583 if (key->length != e->key->length)
3585 for (j = 0; j < key->length; j++)
3586 if (key->chars[j] != e->key->chars[j])
3588 if (j == key->length)
3591 return DEFAULTSTATE;
3595 hyphenGetNewState (HyphenDict * dict, HyphenHashTab * hashTab, const
3596 CharsString * string)
3598 hyphenHashInsert (hashTab, string, dict->numStates);
3599 /* predicate is true if dict->numStates is a power of two */
3600 if (!(dict->numStates & (dict->numStates - 1)))
3601 dict->states = realloc (dict->states,
3602 (dict->numStates << 1) *
3603 sizeof (HyphenationState));
3604 dict->states[dict->numStates].hyphenPattern = 0;
3605 dict->states[dict->numStates].fallbackState = DEFAULTSTATE;
3606 dict->states[dict->numStates].numTrans = 0;
3607 dict->states[dict->numStates].trans.pointer = NULL;
3608 return dict->numStates++;
3611 /* add a transition from state1 to state2 through ch - assumes that the
3612 transition does not already exist */
3614 hyphenAddTrans (HyphenDict * dict, int state1, int state2, widechar ch)
3617 numTrans = dict->states[state1].numTrans;
3619 dict->states[state1].trans.pointer = malloc (sizeof (HyphenationTrans));
3620 else if (!(numTrans & (numTrans - 1)))
3621 dict->states[state1].trans.pointer = realloc
3622 (dict->states[state1].trans.pointer,
3623 (numTrans << 1) * sizeof (HyphenationTrans));
3624 dict->states[state1].trans.pointer[numTrans].ch = ch;
3625 dict->states[state1].trans.pointer[numTrans].newState = state2;
3626 dict->states[state1].numTrans++;
3630 compileHyphenation (FileInfo * nested, CharsString * encoding)
3633 HyphenationTrans *holdPointer;
3634 HyphenHashTab *hashTab;
3636 char pattern[MAXSTRING];
3637 unsigned int stateNum = 0, lastState = 0;
3638 int i, j, k = encoding->length;
3643 TranslationTableOffset holdOffset;
3644 /*Set aside enough space for hyphenation states and transitions in
3645 * translation table. Must be done before anything else*/
3646 reserveSpaceInTable (nested, 250000);
3647 hashTab = hyphenHashNew ();
3649 dict.states = malloc (sizeof (HyphenationState));
3650 dict.states[0].hyphenPattern = 0;
3651 dict.states[0].fallbackState = DEFAULTSTATE;
3652 dict.states[0].numTrans = 0;
3653 dict.states[0].trans.pointer = NULL;
3656 if (encoding->chars[0] == 'I')
3658 if (!getToken (nested, &hyph, NULL))
3664 if (!getToken (nested, &word, NULL))
3666 parseChars (nested, &hyph, &word);
3668 if (hyph.length == 0 || hyph.chars[0] == '#' || hyph.chars[0] ==
3669 '%' || hyph.chars[0] == '<')
3670 continue; /*comment */
3671 for (i = 0; i < hyph.length; i++)
3672 definedCharOrDots (nested, hyph.chars[i], 0);
3675 for (i = 0; i < hyph.length; i++)
3677 if (hyph.chars[i] >= '0' && hyph.chars[i] <= '9')
3678 pattern[j] = (char) hyph.chars[i];
3681 word.chars[j] = hyph.chars[i];
3688 for (i = 0; pattern[i] == '0'; i++);
3689 found = hyphenHashLookup (hashTab, &word);
3690 if (found != DEFAULTSTATE)
3693 stateNum = hyphenGetNewState (&dict, hashTab, &word);
3697 allocateSpaceInTable (nested,
3698 &dict.states[stateNum].hyphenPattern, k);
3699 memcpy (&table->ruleArea[dict.states[stateNum].hyphenPattern],
3702 /* now, put in the prefix transitions */
3703 while (found == DEFAULTSTATE)
3705 lastState = stateNum;
3706 ch = word.chars[word.length-- - 1];
3707 found = hyphenHashLookup (hashTab, &word);
3708 if (found != DEFAULTSTATE)
3711 stateNum = hyphenGetNewState (&dict, hashTab, &word);
3712 hyphenAddTrans (&dict, stateNum, lastState, ch);
3715 while (getALine (nested));
3716 /* put in the fallback states */
3717 for (i = 0; i < HYPHENHASHSIZE; i++)
3719 for (e = hashTab->entries[i]; e; e = e->next)
3721 for (j = 1; j <= e->key->length; j++)
3724 for (k = j; k < e->key->length; k++)
3725 word.chars[word.length++] = e->key->chars[k];
3726 stateNum = hyphenHashLookup (hashTab, &word);
3727 if (stateNum != DEFAULTSTATE)
3731 dict.states[e->val].fallbackState = stateNum;
3734 hyphenHashFree (hashTab);
3735 /*Transfer hyphenation information to table*/
3736 for (i = 0; i < dict.numStates; i++)
3738 if (dict.states[i].numTrans == 0)
3739 dict.states[i].trans.offset = 0;
3742 holdPointer = dict.states[i].trans.pointer;
3743 allocateSpaceInTable (nested,
3744 &dict.states[i].trans.offset,
3745 dict.states[i].numTrans *
3746 sizeof (HyphenationTrans));
3747 memcpy (&table->ruleArea[dict.states[i].trans.offset],
3749 dict.states[i].numTrans * sizeof (HyphenationTrans));
3753 allocateSpaceInTable (nested,
3754 &holdOffset, dict.numStates *
3755 sizeof (HyphenationState));
3756 table->hyphenStatesArray = holdOffset;
3757 /* Prevents segmentajion fault if table is reallocated */
3758 memcpy (&table->ruleArea[table->hyphenStatesArray], &dict.states[0],
3759 dict.numStates * sizeof (HyphenationState));
3765 compileNoBreak (FileInfo * nested)
3768 CharsString ruleDots;
3769 CharsString otherDots;
3770 CharsString dotsBefore;
3771 CharsString dotsAfter;
3772 int haveDotsAfter = 0;
3773 if (!getToken (nested, &ruleDots, "dots operand"))
3775 for (k = 0; k < ruleDots.length && ruleDots.chars[k] != ','; k++);
3776 if (k == ruleDots.length)
3778 if (!parseDots (nested, &dotsBefore, &ruleDots))
3780 dotsAfter.length = dotsBefore.length;
3781 for (k = 0; k < dotsBefore.length; k++)
3782 dotsAfter.chars[k] = dotsBefore.chars[k];
3783 dotsAfter.chars[k] = 0;
3787 haveDotsAfter = ruleDots.length;
3788 ruleDots.length = k;
3789 if (!parseDots (nested, &dotsBefore, &ruleDots))
3791 otherDots.length = 0;
3793 for (; k < haveDotsAfter; k++)
3794 otherDots.chars[otherDots.length++] = ruleDots.chars[k];
3795 if (!parseDots (nested, &dotsAfter, &otherDots))
3798 for (k = 0; k < dotsBefore.length; k++)
3799 dotsBefore.chars[k] = getCharFromDots (dotsBefore.chars[k]);
3800 for (k = 0; k < dotsAfter.length; k++)
3801 dotsAfter.chars[k] = getCharFromDots (dotsAfter.chars[k]);
3802 if (!addRule (nested, CTO_NoBreak, &dotsBefore, &dotsAfter, 0, 0))
3804 table->noBreak = newRuleOffset;
3809 compileCharDef (FileInfo * nested,
3810 TranslationTableOpcode opcode,
3811 TranslationTableCharacterAttributes attributes)
3813 CharsString ruleChars;
3814 CharsString ruleDots;
3815 TranslationTableCharacter *character;
3816 TranslationTableCharacter *cell;
3817 TranslationTableCharacter *otherCell;
3818 TranslationTableCharacterAttributes attr;
3820 if (!getRuleCharsText (nested, &ruleChars))
3822 if (attributes & (CTC_UpperCase | CTC_LowerCase))
3823 attributes |= CTC_Letter;
3824 if (!getRuleDotsPattern (nested, &ruleDots))
3826 if (ruleChars.length != 1 || ruleDots.length < 1)
3828 compileError (nested,
3829 "Exactly one Unicode character and at least one cell are required.");
3832 character = addCharOrDots (nested, ruleChars.chars[0], 0);
3833 character->attributes |= attributes;
3834 character->uppercase = character->lowercase = character->realchar;
3835 cell = compile_findCharOrDots (ruleDots.chars[0], 1);
3836 if (ruleDots.length == 1 && cell)
3837 cell->attributes |= attributes;
3840 for (k = 0; k < ruleDots.length; k++)
3842 if (!compile_findCharOrDots (ruleDots.chars[k], 1))
3845 otherCell = addCharOrDots (nested, ruleDots.chars[k], 1);
3846 if (ruleDots.length != 1)
3848 otherCell->attributes |= attr;
3849 otherCell->uppercase = otherCell->lowercase =
3850 otherCell->realchar;
3854 if (!addRule (nested, opcode, &ruleChars, &ruleDots, 0, 0))
3856 if (ruleDots.length == 1)
3857 putCharAndDots (nested, ruleChars.chars[0], ruleDots.chars[0]);
3862 compileRule (FileInfo * nested)
3866 TranslationTableOpcode opcode;
3867 CharsString ruleChars;
3868 CharsString ruleDots;
3870 CharsString scratchPad;
3871 TranslationTableCharacterAttributes after = 0;
3872 TranslationTableCharacterAttributes before = 0;
3877 if (!getToken (nested, &token, NULL))
3878 return 1; /*blank line */
3879 if (token.chars[0] == '#' || token.chars[0] == '<')
3880 return 1; /*comment */
3881 if (nested->lineNumber == 1 && (eqasc2uni ((unsigned char *) "ISO",
3883 eqasc2uni ((unsigned char *) "UTF-8",
3886 compileHyphenation (nested, &token);
3889 opcode = getOpcode (nested, &token);
3891 { /*Carry out operations */
3894 case CTO_IncludeFile:
3896 CharsString includedFile;
3897 if (getToken (nested, &token, "include file name"))
3898 if (parseChars (nested, &includedFile, &token))
3899 if (!includeFile (nested, &includedFile))
3907 compileBrailleIndicator (nested, "undefined character opcode",
3908 CTO_Undefined, &table->undefined);
3910 case CTO_CapitalSign:
3912 compileBrailleIndicator (nested, "capital sign", CTO_CapitalRule,
3913 &table->capitalSign);
3915 case CTO_BeginCapitalSign:
3917 compileBrailleIndicator (nested, "begin capital sign",
3918 CTO_BeginCapitalRule,
3919 &table->beginCapitalSign);
3921 case CTO_LenBegcaps:
3922 ok = table->lenBeginCaps = compileNumber (nested);
3924 case CTO_EndCapitalSign:
3926 compileBrailleIndicator (nested, "end capitals sign",
3927 CTO_EndCapitalRule, &table->endCapitalSign);
3929 case CTO_FirstWordCaps:
3931 compileBrailleIndicator (nested, "first word capital sign",
3932 CTO_FirstWordCapsRule,
3933 &table->firstWordCaps);
3935 case CTO_LastWordCapsBefore:
3937 compileBrailleIndicator (nested, "capital sign before last word",
3938 CTO_LastWordCapsBeforeRule,
3939 &table->lastWordCapsBefore);
3941 case CTO_LastWordCapsAfter:
3943 compileBrailleIndicator (nested, "capital sign after last word",
3944 CTO_LastWordCapsAfterRule,
3945 &table->lastWordCapsAfter);
3947 case CTO_LenCapsPhrase:
3948 ok = table->lenCapsPhrase = compileNumber (nested);
3950 case CTO_LetterSign:
3952 compileBrailleIndicator (nested, "letter sign", CTO_LetterRule,
3953 &table->letterSign);
3955 case CTO_NoLetsignBefore:
3956 if (getRuleCharsText (nested, &ruleChars))
3958 if ((table->noLetsignBeforeCount + ruleChars.length) > LETSIGNSIZE)
3960 compileError (nested, "More than %d characters", LETSIGNSIZE);
3964 for (k = 0; k < ruleChars.length; k++)
3965 table->noLetsignBefore[table->noLetsignBeforeCount++] =
3970 if (getRuleCharsText (nested, &ruleChars))
3972 if ((table->noLetsignCount + ruleChars.length) > LETSIGNSIZE)
3974 compileError (nested, "More than %d characters", LETSIGNSIZE);
3978 for (k = 0; k < ruleChars.length; k++)
3979 table->noLetsign[table->noLetsignCount++] = ruleChars.chars[k];
3982 case CTO_NoLetsignAfter:
3983 if (getRuleCharsText (nested, &ruleChars))
3985 if ((table->noLetsignAfterCount + ruleChars.length) > LETSIGNSIZE)
3987 compileError (nested, "More than %d characters", LETSIGNSIZE);
3991 for (k = 0; k < ruleChars.length; k++)
3992 table->noLetsignAfter[table->noLetsignAfterCount++] =
3996 case CTO_NumberSign:
3998 compileBrailleIndicator (nested, "number sign", CTO_NumberRule,
3999 &table->numberSign);
4001 case CTO_FirstWordItal:
4003 compileBrailleIndicator (nested, "first word italic",
4004 CTO_FirstWordItalRule,
4005 &table->firstWordItal);
4008 case CTO_LastWordItalBefore:
4010 compileBrailleIndicator (nested, "first word italic before",
4011 CTO_LastWordItalBeforeRule,
4012 &table->lastWordItalBefore);
4014 case CTO_LastWordItalAfter:
4016 compileBrailleIndicator (nested, "last word italic after",
4017 CTO_LastWordItalAfterRule,
4018 &table->lastWordItalAfter);
4021 case CTO_FirstLetterItal:
4023 compileBrailleIndicator (nested, "first letter italic",
4024 CTO_FirstLetterItalRule,
4025 &table->firstLetterItal);
4028 case CTO_LastLetterItal:
4030 compileBrailleIndicator (nested, "last letter italic",
4031 CTO_LastLetterItalRule,
4032 &table->lastLetterItal);
4034 case CTO_SingleLetterItal:
4036 compileBrailleIndicator (nested, "single letter italic",
4037 CTO_SingleLetterItalRule,
4038 &table->singleLetterItal);
4042 compileBrailleIndicator (nested, "italic word", CTO_ItalWordRule,
4045 case CTO_LenItalPhrase:
4046 ok = table->lenItalPhrase = compileNumber (nested);
4048 case CTO_FirstWordBold:
4050 compileBrailleIndicator (nested, "first word bold",
4051 CTO_FirstWordBoldRule,
4052 &table->firstWordBold);
4055 case CTO_LastWordBoldBefore:
4057 compileBrailleIndicator (nested, "last word bold before",
4058 CTO_LastWordBoldBeforeRule,
4059 &table->lastWordBoldBefore);
4061 case CTO_LastWordBoldAfter:
4063 compileBrailleIndicator (nested, "last word bold after",
4064 CTO_LastWordBoldAfterRule,
4065 &table->lastWordBoldAfter);
4068 case CTO_FirstLetterBold:
4070 compileBrailleIndicator (nested, "first letter bold",
4071 CTO_FirstLetterBoldRule,
4072 &table->firstLetterBold);
4075 case CTO_LastLetterBold:
4077 compileBrailleIndicator (nested, "last letter bold",
4078 CTO_LastLetterBoldRule,
4079 &table->lastLetterBold);
4081 case CTO_SingleLetterBold:
4083 compileBrailleIndicator (nested, "single letter bold",
4084 CTO_SingleLetterBoldRule,
4085 &table->singleLetterBold);
4089 compileBrailleIndicator (nested, "bold word", CTO_BoldWordRule,
4092 case CTO_LenBoldPhrase:
4093 ok = table->lenBoldPhrase = compileNumber (nested);
4095 case CTO_FirstWordUnder:
4097 compileBrailleIndicator (nested, "first word underline",
4098 CTO_FirstWordUnderRule,
4099 &table->firstWordUnder);
4102 case CTO_LastWordUnderBefore:
4104 compileBrailleIndicator (nested, "last word underline before",
4105 CTO_LastWordUnderBeforeRule,
4106 &table->lastWordUnderBefore);
4108 case CTO_LastWordUnderAfter:
4110 compileBrailleIndicator (nested, "last word underline after",
4111 CTO_LastWordUnderAfterRule,
4112 &table->lastWordUnderAfter);
4115 case CTO_FirstLetterUnder:
4117 compileBrailleIndicator (nested, "first letter underline",
4118 CTO_FirstLetterUnderRule,
4119 &table->firstLetterUnder);
4122 case CTO_LastLetterUnder:
4124 compileBrailleIndicator (nested, "last letter underline",
4125 CTO_LastLetterUnderRule,
4126 &table->lastLetterUnder);
4128 case CTO_SingleLetterUnder:
4130 compileBrailleIndicator (nested, "single letter underline",
4131 CTO_SingleLetterUnderRule,
4132 &table->singleLetterUnder);
4136 compileBrailleIndicator (nested, "underlined word", CTO_UnderWordRule,
4139 case CTO_LenUnderPhrase:
4140 ok = table->lenUnderPhrase = compileNumber (nested);
4144 compileBrailleIndicator (nested, "begin computer braille",
4145 CTO_BegCompRule, &table->begComp);
4149 compileBrailleIndicator (nested, "end computer braslle",
4150 CTO_EndCompRule, &table->endComp);
4153 table->syllables = 1;
4160 case CTO_JoinableWord:
4162 case CTO_SuffixableWord:
4163 case CTO_PrefixableWord:
4165 case CTO_BegMidWord:
4167 case CTO_MidEndWord:
4176 if (getRuleCharsText (nested, &ruleChars))
4177 if (getRuleDotsPattern (nested, &ruleDots))
4178 if (!addRule (nested, opcode, &ruleChars, &ruleDots, after, before))
4183 if (!getRuleCharsText (nested, &ruleChars))
4185 if (ruleChars.length != 1 || ruleChars.chars[0] > 255)
4187 compileError (nested,
4188 "first operand must be 1 character and < 256");
4191 if (!getRuleDotsPattern (nested, &ruleDots))
4193 if (!addRule (nested, opcode, &ruleChars, &ruleDots, after, before))
4195 table->compdotsPattern[ruleChars.chars[0]] = newRuleOffset;
4198 if (!getRuleCharsText (nested, &ruleChars))
4200 if (ruleChars.chars[0] != '@')
4202 compileError (nested, "The operand must begin with an at sign (@)");
4205 for (k = 1; k < ruleChars.length; k++)
4206 scratchPad.chars[k - 1] = ruleChars.chars[k];
4207 scratchPad.length = ruleChars.length - 1;
4208 if (!parseDots (nested, &ruleDots, &scratchPad))
4210 if (!addRule (nested, opcode, &ruleChars, &ruleDots, before, after))
4213 case CTO_CapsNoCont:
4214 ruleChars.length = 1;
4215 ruleChars.chars[0] = 'a';
4217 (nested, CTO_CapsNoContRule, &ruleChars, NULL, after, before))
4219 table->capsNoCont = newRuleOffset;
4222 if (getRuleCharsText (nested, &ruleChars))
4225 ruleDots.length = ruleDots.chars[0] = 0;
4228 getRuleDotsText (nested, &ruleDots);
4229 if (ruleDots.chars[0] == '#')
4230 ruleDots.length = ruleDots.chars[0] = 0;
4231 else if (ruleDots.chars[0] == '\\' && ruleDots.chars[1] == '#')
4232 memcpy (&ruleDots.chars[0], &ruleDots.chars[1],
4233 ruleDots.length-- * CHARSIZE);
4236 for (k = 0; k < ruleChars.length; k++)
4237 addCharOrDots (nested, ruleChars.chars[k], 0);
4238 for (k = 0; k < ruleDots.length; k++)
4239 addCharOrDots (nested, ruleDots.chars[k], 0);
4240 if (!addRule (nested, opcode, &ruleChars, &ruleDots, after, before))
4244 if (table->numPasses < 2)
4245 table->numPasses = 2;
4248 if (table->numPasses < 3)
4249 table->numPasses = 3;
4252 if (table->numPasses < 4)
4253 table->numPasses = 4;
4256 if (!compilePassOpcode (nested, opcode))
4260 if (!compilePassOpcode (nested, opcode))
4262 table->corrections = 1;
4264 case CTO_Contraction:
4268 if (getRuleCharsText (nested, &ruleChars))
4269 if (!addRule (nested, opcode, &ruleChars, NULL, after, before))
4275 ruleChars.length = 0;
4276 if (getToken (nested, &token, "multiple braille indicators") &&
4277 parseDots (nested, &cells, &token))
4279 while ((lastToken = getToken (nested, &token, "multind opcodes")))
4281 opcode = getOpcode (nested, &token);
4282 if (opcode >= CTO_CapitalSign && opcode < CTO_MultInd)
4283 ruleChars.chars[ruleChars.length++] = (widechar) opcode;
4286 compileError (nested, "Not a braille indicator opcode.");
4295 if (!addRule (nested, CTO_MultInd, &ruleChars, &cells, after, before))
4302 CharsString characters;
4303 const struct CharacterClass *class;
4304 if (!characterClasses)
4306 if (!allocateCharacterClasses ())
4309 if (getToken (nested, &token, "character class name"))
4311 if ((class = findCharacterClass (&token)))
4313 compileError (nested, "character class already defined.");
4317 addCharacterClass (nested, &token.chars[0], token.length)))
4319 if (getCharacters (nested, &characters))
4322 for (index = 0; index < characters.length; ++index)
4324 TranslationTableRule *defRule;
4325 TranslationTableCharacter *character =
4327 (nested, characters.chars[index], 0);
4328 character->attributes |= class->attribute;
4329 defRule = (TranslationTableRule *)
4330 & table->ruleArea[character->definitionRule];
4331 if (defRule->dotslen == 1)
4333 character = definedCharOrDots
4335 defRule->charsdots[defRule->charslen], 1);
4336 character->attributes |= class->attribute;
4346 TranslationTableCharacterAttributes *attributes;
4347 const struct CharacterClass *class;
4349 attributes = &after;
4352 attributes = &before;
4355 if (!characterClasses)
4357 if (!allocateCharacterClasses ())
4360 if (getCharacterClass (nested, &class))
4362 *attributes |= class->attribute;
4376 if (!compileSwap (nested, opcode))
4381 if (getRuleCharsText (nested, &ruleChars))
4382 if (getRuleDotsPattern (nested, &ruleDots))
4384 if (ruleChars.length != 1 || ruleDots.length < 1)
4386 compileError (nested,
4387 "One Unicode character and at least one cell are required.");
4391 (nested, opcode, &ruleChars, &ruleDots, after, before))
4396 compileCharDef (nested, opcode, CTC_Space);
4399 compileCharDef (nested, opcode, CTC_Digit);
4402 compileCharDef (nested, opcode, CTC_LitDigit);
4404 case CTO_Punctuation:
4405 compileCharDef (nested, opcode, CTC_Punctuation);
4408 compileCharDef (nested, opcode, CTC_Math);
4411 compileCharDef (nested, opcode, CTC_Sign);
4414 compileCharDef (nested, opcode, CTC_Letter);
4417 compileCharDef (nested, opcode, CTC_UpperCase);
4420 compileCharDef (nested, opcode, CTC_LowerCase);
4423 ok = compileNoBreak (nested);
4426 ok = compileGrouping (nested);
4429 ok = compileUplow (nested);
4432 if (getRuleCharsText (nested, &ruleChars))
4433 if (getRuleDotsPattern (nested, &ruleDots))
4435 if (ruleChars.length != 1 || ruleDots.length != 1)
4437 compileError (nested,
4438 "Exactly one character and one cell are required.");
4441 putCharAndDots (nested, ruleChars.chars[0], ruleDots.chars[0]);
4445 compileError (nested, "unimplemented opcode.");
4452 lou_readCharFromFile (const char *fileName, int *mode)
4454 /*Read a character from a file, whether big-endian, little-endian or
4457 static FileInfo nested;
4458 if (fileName == NULL)
4463 nested.fileName = fileName;
4464 nested.encoding = noEncoding;
4466 nested.lineNumber = 0;
4467 if (!(nested.in = fopen (nested.fileName, "r")))
4469 lou_logPrint ("Cannot open file '%s'", nested.fileName);
4474 if (nested.in == NULL)
4479 ch = getAChar (&nested);
4489 static int fileCount = 0;
4491 findTable (const char *tableName)
4493 /* Search paths for tables */
4497 char trialPath[MAXSTRING];
4498 if (tableName == NULL || tableName[0] == 0)
4500 strcpy (trialPath, tablePath);
4501 strcat (trialPath, tableName);
4502 if ((tableFile = fopen (trialPath, "rb")))
4504 pathEnd[0] = DIR_SEP;
4506 /* See if table is on environment path LOUIS_TABLEPATH */
4507 pathList = getenv ("LOUIS_TABLEPATH");
4513 int currentListPos = 0;
4514 listLength = strlen (pathList);
4515 for (k = 0; k < listLength; k++)
4516 if (pathList[k] == ',')
4518 if (k == listLength || k == 0)
4519 { /* Only one file */
4520 strcpy (trialPath, pathList);
4521 strcat (trialPath, pathEnd);
4522 strcat (trialPath, tableName);
4523 if ((tableFile = fopen (trialPath, "rb")))
4527 { /* Compile a list of files */
4528 strncpy (trialPath, pathList, k);
4530 strcat (trialPath, pathEnd);
4531 strcat (trialPath, tableName);
4532 currentListPos = k + 1;
4533 if ((tableFile = fopen (trialPath, "rb")))
4535 while (currentListPos < listLength)
4537 for (k = currentListPos; k < listLength; k++)
4538 if (pathList[k] == ',')
4541 &pathList[currentListPos], k - currentListPos);
4542 trialPath[k - currentListPos] = 0;
4543 strcat (trialPath, pathEnd);
4544 strcat (trialPath, tableName);
4545 if ((tableFile = fopen (trialPath, "rb")))
4546 currentListPos = k + 1;
4554 /* See if table in current directory or on a path in
4556 if ((tableFile = fopen (tableName, "rb")))
4558 /* See if table on dataPath. */
4559 pathList = lou_getDataPath ();
4562 strcpy (trialPath, pathList);
4563 strcat (trialPath, pathEnd);
4565 strcat (trialPath, "liblouis\\tables\\");
4567 strcat (trialPath, "liblouis/tables/");
4569 strcat (trialPath, tableName);
4570 if ((tableFile = fopen (trialPath, "rb")))
4573 /* See if table on installed or program path. */
4575 strcpy (trialPath, lou_getProgramPath ());
4576 strcat (trialPath, "\\share\\liblouss\\tables\\");
4578 strcpy (trialPath, TABLESDIR);
4579 strcat (trialPath, pathEnd);
4581 strcat (trialPath, tableName);
4582 if ((tableFile = fopen (trialPath, "rb")))
4588 compileFile (const char *fileName)
4590 /*Compile a table file */
4593 nested.fileName = fileName;
4594 nested.encoding = noEncoding;
4596 nested.lineNumber = 0;
4597 if ((nested.in = findTable (fileName)))
4599 while (getALine (&nested))
4600 compileRule (&nested);
4606 lou_logPrint ("Cannot open table '%s'", nested.fileName);
4614 compileString (const char *inString)
4616 /* This function can be used to make changes to tables on the fly. */
4619 if (inString == NULL)
4621 nested.fileName = inString;
4622 nested.encoding = noEncoding;
4623 nested.lineNumber = 1;
4626 for (k = 0; inString[k]; k++)
4627 nested.line[k] = inString[k];
4629 return compileRule (&nested);
4633 makeDoubleRule (TranslationTableOpcode opcode, TranslationTableOffset
4634 * singleRule, TranslationTableOffset * doubleRule)
4637 TranslationTableRule *rule;
4638 if (!*singleRule || *doubleRule)
4640 rule = (TranslationTableRule *) & table->ruleArea[*singleRule];
4641 memcpy (dots.chars, &rule->charsdots[0], rule->dotslen * CHARSIZE);
4642 memcpy (&dots.chars[rule->dotslen], &rule->charsdots[0],
4643 rule->dotslen * CHARSIZE);
4644 dots.length = 2 * rule->dotslen;
4645 if (!addRule (NULL, opcode, NULL, &dots, 0, 0))
4647 *doubleRule = newRuleOffset;
4654 if (!table->lenBeginCaps)
4655 table->lenBeginCaps = 2;
4656 makeDoubleRule (CTO_FirstWordItal, &table->lastWordItalBefore,
4657 &table->firstWordItal);
4658 if (!table->lenItalPhrase)
4659 table->lenItalPhrase = 4;
4660 makeDoubleRule (CTO_FirstWordBold, &table->lastWordBoldBefore,
4661 &table->firstWordBold);
4662 if (!table->lenBoldPhrase)
4663 table->lenBoldPhrase = 4;
4664 makeDoubleRule (CTO_FirstWordUnder, &table->lastWordUnderBefore,
4665 &table->firstWordUnder);
4666 if (!table->lenUnderPhrase)
4667 table->lenUnderPhrase = 4;
4668 if (table->numPasses == 0)
4669 table->numPasses = 1;
4674 doLang2table (const char *tableList)
4676 static char newList[MAXSTRING];
4678 char buffer[MAXSTRING];
4682 if (tableList == NULL || *tableList == 0)
4684 strcpy (newList, tableList);
4685 for (k = strlen (newList) - 1; k >= 0 && newList[k] != '='; k--);
4691 strcpy (buffer, newList);
4692 langCode = &newList[k + 1];
4693 langCodeLen = strlen (langCode);
4694 strcat (buffer, "lang2table");
4695 l2t = fopen (buffer, "r");
4698 while ((fgets (buffer, sizeof (buffer) - 2, l2t)))
4703 for (k = 0; buffer[k] < 32; k++);
4704 if (buffer[k] == '#' || buffer[k] < 32)
4706 codeInFile = &buffer[k];
4708 while (buffer[k] > 32)
4710 codeInFileLen = k - codeInFileLen;
4711 codeInFile[codeInFileLen] = 0;
4713 (codeInFileLen == langCodeLen
4714 && strcasecmp (langCode, codeInFile) == 0))
4716 while (buffer[k] < 32)
4718 tableInFile = &buffer[k];
4719 while (buffer[k] > 32)
4722 strcat (newList, tableInFile);
4733 compileTranslationTable (const char *tl)
4735 /*compile source tables into a table in memory */
4736 const char *tableList;
4738 char mainTable[MAXSTRING];
4739 char subTable[MAXSTRING];
4741 int currentListPos = 0;
4746 characterClasses = NULL;
4748 tableList = doLang2table (tl);
4749 if (tableList == NULL)
4751 if (!opcodeLengths[0])
4753 TranslationTableOpcode opcode;
4754 for (opcode = 0; opcode < CTO_None; opcode++)
4755 opcodeLengths[opcode] = strlen (opcodeNames[opcode]);
4757 allocateHeader (NULL);
4758 /*Compile things that are necesary for the proper operation of
4759 liblouis or liblouisxml or liblouisutdml */
4760 compileString ("space \\s 0");
4761 compileString ("noback sign \\x0000 0");
4762 compileString ("space \\x00a0 a unbreakable space");
4763 compileString ("space \\x001b 1b escape");
4764 compileString ("space \\xffff 123456789abcdef ENDSEGMENT");
4765 listLength = strlen (tableList);
4766 for (k = currentListPos; k < listLength; k++)
4767 if (tableList[k] == ',')
4769 if (k == listLength)
4770 { /* Only one file */
4771 strcpy (tablePath, tableList);
4772 for (k = strlen (tablePath); k >= 0; k--)
4773 if (tablePath[k] == '\\' || tablePath[k] == '/')
4775 strcpy (mainTable, &tablePath[k + 1]);
4777 if (!compileFile (mainTable))
4781 { /* Compile a list of files */
4782 currentListPos = k + 1;
4783 strncpy (tablePath, tableList, k);
4785 for (k = strlen (tablePath); k >= 0; k--)
4786 if (tablePath[k] == '\\' || tablePath[k] == '/')
4788 strcpy (mainTable, &tablePath[k + 1]);
4790 if (!compileFile (mainTable))
4792 while (currentListPos < listLength)
4794 for (k = currentListPos; k < listLength; k++)
4795 if (tableList[k] == ',')
4797 strncpy (subTable, &tableList[currentListPos], k - currentListPos);
4798 subTable[k - currentListPos] = 0;
4799 if (!compileFile (subTable))
4801 currentListPos = k + 1;
4804 /*Clean up after compiling files*/
4806 if (characterClasses)
4807 deallocateCharacterClasses ();
4809 deallocateRuleNames ();
4811 lou_logPrint ("%d warnings issued", warningCount);
4815 table->tableSize = tableSize;
4816 table->bytesUsed = tableUsed;
4820 if (!(errorCount == 1 && fileCount == 1))
4821 lou_logPrint ("%d errors found.", errorCount);
4826 return (void *) table;
4833 int tableListLength;
4836 static ChainEntry *tableChain = NULL;
4837 static ChainEntry *lastTrans = NULL;
4839 getTable (const char *tableList)
4841 /*Keep track of which tables have already been compiled */
4843 ChainEntry *currentEntry = NULL;
4844 ChainEntry *lastEntry = NULL;
4846 if (tableList == NULL || *tableList == 0)
4848 errorCount = fileCount = 0;
4849 tableListLen = strlen (tableList);
4850 /*See if this is the last table used. */
4851 if (lastTrans != NULL)
4852 if (tableListLen == lastTrans->tableListLength && (memcmp
4857 tableListLen)) == 0)
4858 return (table = lastTrans->table);
4859 /*See if Table has already been compiled*/
4860 currentEntry = tableChain;
4861 while (currentEntry != NULL)
4863 if (tableListLen == currentEntry->tableListLength && (memcmp
4871 lastTrans = currentEntry;
4872 return (table = currentEntry->table);
4874 lastEntry = currentEntry;
4875 currentEntry = currentEntry->next;
4877 if ((newTable = compileTranslationTable (tableList)))
4879 /*Add a new entry to the table chain. */
4880 int entrySize = sizeof (ChainEntry) + tableListLen;
4881 ChainEntry *newEntry = malloc (entrySize);
4882 if (tableChain == NULL)
4883 tableChain = newEntry;
4885 lastEntry->next = newEntry;
4886 newEntry->next = NULL;
4887 newEntry->table = newTable;
4888 newEntry->tableListLength = tableListLen;
4889 memcpy (&newEntry->tableList[0], tableList, tableListLen);
4890 lastTrans = newEntry;
4891 return newEntry->table;
4899 if (lastTrans == NULL)
4901 strncpy (scratchBuf, lastTrans->tableList, lastTrans->tableListLength);
4902 scratchBuf[lastTrans->tableListLength] = 0;
4907 lou_getTable (const char *tableList)
4909 /* Search paths for tables and keep track of compiled tables. */
4913 char trialPath[MAXSTRING];
4914 if (tableList == NULL || tableList[0] == 0)
4916 errorCount = fileCount = 0;
4917 pathEnd[0] = DIR_SEP;
4919 /* See if table is on environment path LOUIS_TABLEPATH */
4920 pathList = getenv ("LOUIS_TABLEPATH");
4926 int currentListPos = 0;
4927 listLength = strlen (pathList);
4928 for (k = 0; k < listLength; k++)
4929 if (pathList[k] == ',')
4931 if (k == listLength || k == 0)
4932 { /* Only one file */
4933 strcpy (trialPath, pathList);
4934 strcat (trialPath, pathEnd);
4935 strcat (trialPath, tableList);
4936 table = getTable (trialPath);
4941 { /* Compile a list of files */
4942 strncpy (trialPath, pathList, k);
4944 strcat (trialPath, pathEnd);
4945 strcat (trialPath, tableList);
4946 currentListPos = k + 1;
4947 table = getTable (trialPath);
4950 while (currentListPos < listLength)
4952 for (k = currentListPos; k < listLength; k++)
4953 if (pathList[k] == ',')
4956 &pathList[currentListPos], k - currentListPos);
4957 trialPath[k - currentListPos] = 0;
4958 strcat (trialPath, pathEnd);
4959 strcat (trialPath, tableList);
4960 table = getTable (trialPath);
4961 currentListPos = k + 1;
4970 /* See if table in current directory or on a path in
4972 if (errorCount > 0 && (!(errorCount == 1 && fileCount == 1)))
4974 table = getTable (tableList);
4978 /* See if table on dataPath. */
4979 if (errorCount > 0 && (!(errorCount == 1 && fileCount == 1)))
4981 pathList = lou_getDataPath ();
4984 strcpy (trialPath, pathList);
4985 strcat (trialPath, pathEnd);
4987 strcat (trialPath, "liblouis\\tables\\");
4989 strcat (trialPath, "liblouis/tables/");
4991 strcat (trialPath, tableList);
4992 table = getTable (trialPath);
4997 /* See if table on installed or program path. */
4998 if (errorCount > 0 && (!(errorCount == 1 && fileCount == 1)))
5001 strcpy (trialPath, lou_getProgramPath ());
5002 strcat (trialPath, "\\share\\liblouss\\tables\\");
5004 strcpy (trialPath, TABLESDIR);
5005 strcat (trialPath, pathEnd);
5007 strcat (trialPath, tableList);
5008 table = getTable (trialPath);
5011 lou_logPrint ("%s could not be found", tableList);
5015 static unsigned char *destSpacing = NULL;
5016 static int sizeDestSpacing = 0;
5017 static unsigned short *typebuf = NULL;
5018 static int sizeTypebuf = 0;
5019 static widechar *passbuf1 = NULL;
5020 static int sizePassbuf1 = 0;
5021 static widechar *passbuf2 = NULL;
5022 static int sizePassbuf2 = 0;
5023 static int *srcMapping = NULL;
5024 static int *prevSrcMapping = NULL;
5025 static int sizeSrcMapping = 0;
5026 static int sizePrevSrcMapping = 0;
5028 liblouis_allocMem (AllocBuf buffer, int srcmax, int destmax)
5037 if (destmax > sizeTypebuf)
5039 if (typebuf != NULL)
5041 typebuf = malloc ((destmax + 4) * sizeof (unsigned short));
5042 sizeTypebuf = destmax;
5045 case alloc_destSpacing:
5046 if (destmax > sizeDestSpacing)
5048 if (destSpacing != NULL)
5050 destSpacing = malloc (destmax + 4);
5051 sizeDestSpacing = destmax;
5054 case alloc_passbuf1:
5055 if (destmax > sizePassbuf1)
5057 if (passbuf1 != NULL)
5059 passbuf1 = malloc ((destmax + 4) * CHARSIZE);
5060 sizePassbuf1 = destmax;
5063 case alloc_passbuf2:
5064 if (destmax > sizePassbuf2)
5066 if (passbuf2 != NULL)
5068 passbuf2 = malloc ((destmax + 4) * CHARSIZE);
5069 sizePassbuf2 = destmax;
5072 case alloc_srcMapping:
5075 if (srcmax >= destmax)
5079 if (mapSize > sizeSrcMapping)
5081 if (srcMapping != NULL)
5083 srcMapping = malloc ((mapSize + 4) * sizeof (int));
5084 sizeSrcMapping = mapSize;
5088 case alloc_prevSrcMapping:
5091 if (srcmax >= destmax)
5095 if (mapSize > sizePrevSrcMapping)
5097 if (prevSrcMapping != NULL)
5098 free (prevSrcMapping);
5099 prevSrcMapping = malloc ((mapSize + 4) * sizeof (int));
5100 sizePrevSrcMapping = mapSize;
5103 return prevSrcMapping;
5112 ChainEntry *currentEntry;
5113 ChainEntry *previousEntry;
5114 if (logFile != NULL)
5116 if (tableChain != NULL)
5118 currentEntry = tableChain;
5119 while (currentEntry)
5121 free (currentEntry->table);
5122 previousEntry = currentEntry;
5123 currentEntry = currentEntry->next;
5124 free (previousEntry);
5129 if (typebuf != NULL)
5133 if (destSpacing != NULL)
5136 sizeDestSpacing = 0;
5137 if (passbuf1 != NULL)
5141 if (passbuf2 != NULL)
5145 if (srcMapping != NULL)
5149 if (prevSrcMapping != NULL)
5150 free (prevSrcMapping);
5151 prevSrcMapping = NULL;
5152 sizePrevSrcMapping = 0;
5153 opcodeLengths[0] = 0;
5159 static char *version = PACKAGE_VERSION;
5170 lou_compileString (const char *tableList, const char *inString)
5172 if (!lou_getTable (tableList))
5174 return compileString (inString);
5178 * This procedure provides a target for cals that serve as breakpoints
5183 lou_getTablePaths ()
5185 static char paths[MAXSTRING];
5187 strcpy (paths, tablePath);
5188 strcat (paths, ",");
5189 pathList = getenv ("LOUIS_TABLEPATH");
5192 strcat (paths, pathList);
5193 strcat (paths, ",");
5195 pathList = getcwd (scratchBuf, MAXSTRING);
5198 strcat (paths, pathList);
5199 strcat (paths, ",");
5201 pathList = lou_getDataPath ();
5204 strcat (paths, pathList);
5205 strcat (paths, ",");
5208 strcpy (paths, lou_getProgramPath ());
5209 strcat (paths, "\\share\\liblouss\\tables\\");
5211 strcpy (paths, TABLESDIR);
5219 char *hook = "debug hook";
5220 printf ("%s\n", hook);