1 /* This file contains code common to the translator and back-translator.
2 * It is included immediately after the headr files. */
4 /* liblouis Braille Translation and Back-Translation
7 Based on the Linux screenreader BRLTTY, copyright (C) 1999-2006 by
10 Copyright (C) 2004, 2005, 2006
11 ViewPlus Technologies, Inc. www.viewplus.com
13 JJB Software, Inc. www.jjb-software.com
16 This file is free software; you can redistribute it and/or modify it
17 under the terms of the Lesser or Library GNU General Public License
19 Free Software Foundation; either version 3, or (at your option) any
22 This file is distributed in the hope that it will be useful, but
23 WITHOUT ANY WARRANTY; without even the implied warranty of
24 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
25 Library GNU General Public License for more details.
27 You should have received a copy of the Library GNU General Public
28 License along with this program; see the file COPYING. If not, write to
29 the Free Software Foundation, 51 Franklin Street, Fifth Floor,
30 Boston, MA 02110-1301, USA.
32 Maintained by John J. Boyer john.boyer@abilitiessoft.com
35 /*additional bits in typebuf*/
36 #define capsemph 0x8000
37 #define EMPHASIS 0x000f
38 #define STARTWORD 0x4000
39 #define FIRSTWORD 0x2000
40 #define SYLLABLEMARKS 0x00c0
41 #define INTERNALMARKS 0xff00
43 static const TranslationTableHeader *table;
44 static int src, srcmax;
45 static int dest, destmax;
47 static int currentPass = 1;
48 static const widechar *currentInput;
49 static widechar *passbuf1 = NULL;
50 static widechar *passbuf2 = NULL;
51 static widechar *currentOutput;
52 static int *prevSrcMapping = NULL;
53 static int *srcMapping = NULL;
54 static unsigned short *typebuf = NULL;
55 static unsigned char *srcSpacing = NULL;
56 static unsigned char *destSpacing = NULL;
57 static int haveEmphasis = 0;
58 static TranslationTableOpcode transOpcode;
59 static TranslationTableOpcode prevTransOpcode;
60 static const TranslationTableRule *transRule;
61 static int transCharslen;
62 static int checkAttr (const widechar c,
63 const TranslationTableCharacterAttributes a, int nm);
64 static int putCharacter (widechar c);
65 static int makeCorrections ();
66 static int passDoTest ();
67 static int passDoAction ();
68 static int passVariables[NUMVAR];
69 static int passCharDots;
71 static widechar const *passInstructions;
72 static int passIC; /*Instruction counter */
73 static int startMatch;
75 static int startReplace;
76 static int endReplace;
78 static int srcIncremented;
79 static int *outputPositions;
80 static int *inputPositions;
81 static int cursorPosition;
82 static int cursorStatus;
83 static const TranslationTableRule **appliedRules;
84 static int maxAppliedRules;
85 static int appliedRulesCount;
87 static TranslationTableCharacter *
88 findCharOrDots (widechar c, int m)
90 /*Look up character or dot pattern in the appropriate
92 static TranslationTableCharacter noChar =
93 { 0, 0, 0, CTC_Space, 32, 32, 32 };
94 static TranslationTableCharacter noDots =
95 { 0, 0, 0, CTC_Space, B16, B16, B16 };
96 TranslationTableCharacter *notFound;
97 TranslationTableCharacter *character;
98 TranslationTableOffset bucket;
99 unsigned long int makeHash = (unsigned long int) c % HASHNUM;
102 bucket = table->characters[makeHash];
107 bucket = table->dots[makeHash];
112 character = (TranslationTableCharacter *) & table->ruleArea[bucket];
113 if (character->realchar == c)
115 bucket = character->next;
117 notFound->realchar = notFound->uppercase = notFound->lowercase = c;
122 checkAttr (const widechar c, const TranslationTableCharacterAttributes
125 static widechar prevc = 0;
126 static TranslationTableCharacterAttributes preva = 0;
129 preva = (findCharOrDots (c, m))->attributes;
132 return ((preva & a) ? 1 : 0);
136 findAttribOrSwapRules ()
138 int save_transCharslen = transCharslen;
139 const TranslationTableRule *save_transRule = transRule;
140 TranslationTableOpcode save_transOpcode = transOpcode;
141 TranslationTableOffset ruleOffset;
142 ruleOffset = table->attribOrSwapRules[currentPass];
146 transRule = (TranslationTableRule *) & table->ruleArea[ruleOffset];
147 transOpcode = transRule->opcode;
150 ruleOffset = transRule->charsnext;
152 transCharslen = save_transCharslen;
153 transRule = save_transRule;
154 transOpcode = save_transOpcode;
159 compareChars (const widechar * address1, const widechar * address2, int
165 for (k = 0; k < count; k++)
166 if ((findCharOrDots (address1[k], m))->lowercase !=
167 (findCharOrDots (address2[k], m))->lowercase)
175 if (!table->corrections)
180 memset (passVariables, 0, sizeof(int) * NUMVAR);
183 int length = srcmax - src;
184 const TranslationTableCharacter *character = findCharOrDots
185 (currentInput[src], 0);
186 const TranslationTableCharacter *character2;
188 if (!findAttribOrSwapRules ())
191 TranslationTableOffset ruleOffset = 0;
192 unsigned long int makeHash = 0;
198 makeHash = (unsigned long int) character->lowercase << 8;
199 character2 = findCharOrDots (currentInput[src + 1], 0);
200 makeHash += (unsigned long int) character2->lowercase;
202 ruleOffset = table->forRules[makeHash];
208 ruleOffset = character->otherRules;
210 case 2: /*No rule found */
211 transOpcode = CTO_Always;
218 (TranslationTableRule *) & table->ruleArea[ruleOffset];
219 transOpcode = transRule->opcode;
220 transCharslen = transRule->charslen;
221 if (tryThis == 1 || (transCharslen <= length &&
222 compareChars (&transRule->
227 if (srcIncremented && transOpcode == CTO_Correct &&
234 ruleOffset = transRule->charsnext;
245 srcMapping[dest] = prevSrcMapping[src];
246 currentOutput[dest++] = currentInput[src++];
249 if (appliedRules != NULL && appliedRulesCount < maxAppliedRules)
250 appliedRules[appliedRulesCount++] = transRule;
251 if (!passDoAction ())
253 if (endReplace == src)
262 { // We have to transform typebuf accordingly
264 unsigned short *typebuf_temp;
265 if ((typebuf_temp = malloc (dest * sizeof (unsigned short))) == NULL)
267 for (pos = 0; pos < dest; pos++)
268 typebuf_temp[pos] = typebuf[srcMapping[pos]];
269 memcpy (typebuf, typebuf_temp, dest * sizeof (unsigned short));
283 for (k = passIC + 2; k < passIC + 2 + passInstructions[passIC + 1]; k++)
284 if (currentInput[kk] == ENDSEGMENT || passInstructions[k] !=
291 swapTest (int swapIC, int *callSrc)
295 int curSrc = *callSrc;
296 TranslationTableOffset swapRuleOffset;
297 TranslationTableRule *swapRule;
299 (passInstructions[swapIC + 1] << 16) | passInstructions[swapIC + 2];
300 swapRule = (TranslationTableRule *) & table->ruleArea[swapRuleOffset];
301 for (curLen = 0; curLen < passInstructions[swapIC + 3]; curLen++)
303 if (swapRule->opcode == CTO_SwapDd)
305 for (curTest = 1; curTest < swapRule->charslen; curTest += 2)
307 if (currentInput[curSrc] == swapRule->charsdots[curTest])
313 for (curTest = 0; curTest < swapRule->charslen; curTest++)
315 if (currentInput[curSrc] == swapRule->charsdots[curTest])
319 if (curTest >= swapRule->charslen)
323 if (passInstructions[swapIC + 3] == passInstructions[swapIC + 4])
328 while (curLen < passInstructions[swapIC + 4])
330 if (swapRule->opcode == CTO_SwapDd)
332 for (curTest = 1; curTest < swapRule->charslen; curTest += 2)
334 if (currentInput[curSrc] == swapRule->charsdots[curTest])
340 for (curTest = 0; curTest < swapRule->charslen; curTest++)
342 if (currentInput[curSrc] == swapRule->charsdots[curTest])
346 if (curTest >= swapRule->charslen)
359 swapReplace (int start, int end)
361 TranslationTableOffset swapRuleOffset;
362 TranslationTableRule *swapRule;
363 widechar *replacements;
369 (passInstructions[passIC + 1] << 16) | passInstructions[passIC + 2];
370 swapRule = (TranslationTableRule *) & table->ruleArea[swapRuleOffset];
371 replacements = &swapRule->charsdots[swapRule->charslen];
372 for (curSrc = start; curSrc < end; curSrc++)
374 for (curTest = 0; curTest < swapRule->charslen; curTest++)
375 if (currentInput[curSrc] == swapRule->charsdots[curTest])
377 if (curTest == swapRule->charslen)
380 for (curRep = 0; curRep < curTest; curRep++)
381 if (swapRule->opcode == CTO_SwapCc)
384 curPos += replacements[curPos];
385 if (swapRule->opcode == CTO_SwapCc)
387 if ((dest + 1) >= srcmax)
389 srcMapping[dest] = prevSrcMapping[curSrc];
390 currentOutput[dest++] = replacements[curPos];
395 if ((dest + replacements[curPos] - 1) >= destmax)
397 for (k = dest + replacements[curPos] - 1; k >= dest; --k)
398 srcMapping[k] = prevSrcMapping[curSrc];
399 memcpy (¤tOutput[dest], &replacements[curPos + 1],
400 (replacements[curPos]) * CHARSIZE);
401 dest += replacements[curPos] - 1;
407 static TranslationTableRule *groupingRule;
408 static widechar groupingOp;
413 widechar startCharDots = groupingRule->charsdots[2 * passCharDots];
414 widechar endCharDots = groupingRule->charsdots[2 * passCharDots + 1];
415 widechar *curin = (widechar *) currentInput;
418 TranslationTableOffset replaceOffset = passInstructions[passIC + 1] <<
419 16 | (passInstructions[passIC + 2] & 0xff);
420 TranslationTableRule *replaceRule = (TranslationTableRule *) &
421 table->ruleArea[replaceOffset];
422 widechar replaceStart = replaceRule->charsdots[2 * passCharDots];
423 widechar replaceEnd = replaceRule->charsdots[2 * passCharDots + 1];
424 if (groupingOp == pass_groupstart)
426 curin[startReplace] = replaceStart;
427 for (curPos = startReplace + 1; curPos < srcmax; curPos++)
429 if (currentInput[curPos] == startCharDots)
431 if (currentInput[curPos] == endCharDots)
436 if (curPos == srcmax)
438 curin[curPos] = replaceEnd;
442 if (transOpcode == CTO_Context)
444 startCharDots = groupingRule->charsdots[2];
445 endCharDots = groupingRule->charsdots[3];
446 replaceStart = replaceRule->charsdots[2];
447 replaceEnd = replaceRule->charsdots[3];
449 currentOutput[dest] = replaceEnd;
450 for (curPos = dest - 1; curPos >= 0; curPos--)
452 if (currentOutput[curPos] == endCharDots)
454 if (currentOutput[curPos] == startCharDots)
461 currentOutput[curPos] = replaceStart;
470 widechar startCharDots = groupingRule->charsdots[2 * passCharDots];
471 widechar endCharDots = groupingRule->charsdots[2 * passCharDots + 1];
472 widechar *curin = (widechar *) currentInput;
475 if (groupingOp == pass_groupstart)
477 for (curPos = startReplace + 1; curPos < srcmax; curPos++)
479 if (currentInput[curPos] == startCharDots)
481 if (currentInput[curPos] == endCharDots)
486 if (curPos == srcmax)
489 for (; curPos < srcmax; curPos++)
490 curin[curPos - 1] = curin[curPos];
495 for (curPos = dest - 1; curPos >= 0; curPos--)
497 if (currentOutput[curPos] == endCharDots)
499 if (currentOutput[curPos] == startCharDots)
507 for (; curPos < dest; curPos++)
508 currentOutput[curPos - 1] = currentOutput[curPos];
515 static int searchSrc;
523 TranslationTableOffset ruleOffset;
524 TranslationTableRule *rule;
525 TranslationTableCharacterAttributes attributes;
526 int stepper = passSrc;
527 while (stepper < srcmax)
529 searchIC = passIC + 1;
531 while (searchIC < transRule->dotslen)
534 if (searchSrc > srcmax)
536 switch (passInstructions[searchIC])
539 searchSrc -= passInstructions[searchIC + 1];
551 for (k = searchIC + 2;
552 k < searchIC + 2 + passInstructions[searchIC + 1]; k++)
553 if (currentInput[kk] == ENDSEGMENT || passInstructions[k] !=
559 searchSrc += passInstructions[searchIC + 1];
560 searchIC += passInstructions[searchIC + 1] + 2;
562 case pass_startReplace:
565 case pass_endReplace:
568 case pass_attributes:
570 (passInstructions[searchIC + 1] << 16) |
571 passInstructions[searchIC + 2];
572 for (k = 0; k < passInstructions[searchIC + 3]; k++)
574 if (currentInput[searchSrc] == ENDSEGMENT)
578 (((findCharOrDots (currentInput[searchSrc++],
580 attributes & attributes)) ? 1 : 0);
586 for (k = passInstructions[searchIC + 3]; k <
587 passInstructions[searchIC + 4]; k++)
589 if (currentInput[searchSrc] == ENDSEGMENT)
595 (findCharOrDots (currentInput[searchSrc],
597 attributes & attributes))
604 case pass_groupstart:
606 ruleOffset = (passInstructions[searchIC + 1] << 16) |
607 passInstructions[searchIC + 2];
608 rule = (TranslationTableRule *) & table->ruleArea[ruleOffset];
609 if (passInstructions[searchIC] == pass_groupstart)
611 (currentInput[searchSrc] == rule->charsdots[2 *
616 (currentInput[searchSrc] == rule->charsdots[2 *
619 if (groupingRule != NULL && groupingOp == pass_groupstart
620 && rule == groupingRule)
622 if (currentInput[searchSrc] == rule->charsdots[2 *
625 else if (currentInput[searchSrc] ==
626 rule->charsdots[2 * passCharDots + 1])
633 itsTrue = swapTest (searchIC, &searchSrc);
637 if (passVariables[passInstructions[searchIC + 1]] !=
638 passInstructions[searchIC + 2])
643 if (passVariables[passInstructions[searchIC + 1]] >=
644 passInstructions[searchIC + 2])
649 if (passVariables[passInstructions[searchIC + 1]] <=
650 passInstructions[searchIC + 2])
655 if (passVariables[passInstructions[searchIC + 1]] >
656 passInstructions[searchIC + 2])
661 if (passVariables[passInstructions[searchIC + 1]] <
662 passInstructions[searchIC + 2])
669 if ((groupingRule && level == 1) || !groupingRule)
672 searchIC = transRule->dotslen;
677 if ((!not && !itsTrue) || (not && itsTrue))
691 TranslationTableOffset ruleOffset = 0;
692 TranslationTableRule *rule = NULL;
693 TranslationTableCharacterAttributes attributes = 0;
696 passInstructions = &transRule->charsdots[transCharslen];
698 startMatch = endMatch = passSrc;
699 startReplace = endReplace = -1;
700 if (transOpcode == CTO_Context || transOpcode == CTO_Correct)
704 while (passIC < transRule->dotslen)
707 if (passSrc > srcmax)
709 switch (passInstructions[passIC])
717 if (passSrc != srcmax)
722 passSrc -= passInstructions[passIC + 1];
733 itsTrue = matchCurrentInput ();
734 passSrc += passInstructions[passIC + 1];
735 passIC += passInstructions[passIC + 1] + 2;
737 case pass_startReplace:
738 startReplace = passSrc;
741 case pass_endReplace:
742 endReplace = passSrc;
745 case pass_attributes:
747 (passInstructions[passIC + 1] << 16) | passInstructions[passIC +
749 for (k = 0; k < passInstructions[passIC + 3]; k++)
751 if (currentInput[passSrc] == ENDSEGMENT)
755 (((findCharOrDots (currentInput[passSrc++],
757 attributes & attributes)) ? 1 : 0);
763 for (k = passInstructions[passIC + 3]; k <
764 passInstructions[passIC + 4]; k++)
766 if (currentInput[passSrc] == ENDSEGMENT)
773 (findCharOrDots (currentInput[passSrc],
775 attributes & attributes))
782 case pass_groupstart:
784 ruleOffset = (passInstructions[passIC + 1] << 16) |
785 passInstructions[passIC + 2];
786 rule = (TranslationTableRule *) & table->ruleArea[ruleOffset];
787 if (passIC == 0 || (passIC > 0 && passInstructions[passIC - 1] ==
791 groupingOp = passInstructions[passIC];
793 if (passInstructions[passIC] == pass_groupstart)
794 itsTrue = (currentInput[passSrc] == rule->charsdots[2 *
798 itsTrue = (currentInput[passSrc] == rule->charsdots[2 *
805 itsTrue = swapTest (passIC, &passSrc);
809 if (passVariables[passInstructions[passIC + 1]] !=
810 passInstructions[passIC + 2])
815 if (passVariables[passInstructions[passIC + 1]] >=
816 passInstructions[passIC + 2])
821 if (passVariables[passInstructions[passIC + 1]] <=
822 passInstructions[passIC + 2])
827 if (passVariables[passInstructions[passIC + 1]] >
828 passInstructions[passIC + 2])
833 if (passVariables[passInstructions[passIC + 1]] <
834 passInstructions[passIC + 2])
839 itsTrue = doPassSearch ();
840 if ((!not && !itsTrue) || (not && itsTrue))
847 if (startReplace == -1)
849 startReplace = startMatch;
850 endReplace = endMatch;
857 if ((!not && !itsTrue) || (not && itsTrue))
868 TranslationTableOffset ruleOffset = 0;
869 TranslationTableRule *rule = NULL;
870 if ((dest + startReplace - startMatch) > destmax)
872 if (transOpcode != CTO_Context)
873 memmove (&srcMapping[dest], &prevSrcMapping[startMatch],
874 (startReplace - startMatch) * sizeof (int));
875 for (k = startMatch; k < startReplace; k++)
876 if (transOpcode == CTO_Context)
878 if (!putCharacter (currentInput[k]))
882 currentOutput[dest++] = currentInput[k];
883 while (passIC < transRule->dotslen)
884 switch (passInstructions[passIC])
888 if ((dest + passInstructions[passIC + 1]) > destmax)
890 for (k = 0; k < passInstructions[passIC + 1]; ++k)
891 srcMapping[dest + k] = prevSrcMapping[startReplace];
892 memcpy (¤tOutput[dest], &passInstructions[passIC + 2],
893 passInstructions[passIC + 1] * CHARSIZE);
894 dest += passInstructions[passIC + 1];
895 passIC += passInstructions[passIC + 1] + 2;
898 passVariables[passInstructions[passIC + 1]] =
899 passInstructions[passIC + 2];
903 passVariables[passInstructions[passIC + 1]]--;
904 if (passVariables[passInstructions[passIC + 1]] < 0)
905 passVariables[passInstructions[passIC + 1]] = 0;
909 passVariables[passInstructions[passIC + 1]]++;
912 case pass_groupstart:
913 ruleOffset = (passInstructions[passIC + 1] << 16) |
914 passInstructions[passIC + 2];
915 rule = (TranslationTableRule *) & table->ruleArea[ruleOffset];
916 srcMapping[dest] = prevSrcMapping[startMatch];
917 currentOutput[dest++] = rule->charsdots[2 * passCharDots];
921 ruleOffset = (passInstructions[passIC + 1] << 16) |
922 passInstructions[passIC + 2];
923 rule = (TranslationTableRule *) & table->ruleArea[ruleOffset];
924 srcMapping[dest] = prevSrcMapping[startMatch];
925 currentOutput[dest++] = rule->charsdots[2 * passCharDots + 1];
929 if (!swapReplace (startReplace, endReplace))
933 case pass_groupreplace:
934 if (!groupingRule || !replaceGrouping ())
944 dest -= startReplace - startMatch;
945 k = endReplace - startReplace;
946 if ((dest + k) > destmax)
948 memmove (&srcMapping[dest], &prevSrcMapping[startReplace],
950 memcpy (¤tOutput[dest], ¤tInput[startReplace],
954 endReplace = passSrc;
967 for (k = 0; k < transCharslen; k++)
968 if (transRule->charsdots[k] != currentInput[kk++])
976 int length = srcmax - src;
977 const TranslationTableCharacter *dots;
978 const TranslationTableCharacter *dots2;
980 TranslationTableOffset ruleOffset = 0;
981 unsigned long int makeHash = 0;
982 if (findAttribOrSwapRules ())
984 dots = findCharOrDots (currentInput[src], 1);
985 for (tryThis = 0; tryThis < 3; tryThis++)
992 /*Hash function optimized for forward translation */
993 makeHash = (unsigned long int) dots->lowercase << 8;
994 dots2 = findCharOrDots (currentInput[src + 1], 1);
995 makeHash += (unsigned long int) dots2->lowercase;
997 ruleOffset = table->forRules[makeHash];
1003 ruleOffset = dots->otherRules;
1005 case 2: /*No rule found */
1006 transOpcode = CTO_Always;
1012 transRule = (TranslationTableRule *) & table->ruleArea[ruleOffset];
1013 transOpcode = transRule->opcode;
1014 transCharslen = transRule->charslen;
1015 if (tryThis == 1 || ((transCharslen <= length) && checkDots ()))
1016 switch (transOpcode)
1017 { /*check validity of this Translation */
1019 if (currentPass != 2 || !srcIncremented)
1025 if (currentPass != 3 || !srcIncremented)
1031 if (currentPass != 4 || !srcIncremented)
1039 ruleOffset = transRule->charsnext;
1048 prevTransOpcode = CTO_None;
1051 memset (passVariables, 0, sizeof(int) * NUMVAR);
1052 while (src < srcmax)
1053 { /*the main multipass translation loop */
1056 switch (transOpcode)
1062 if (appliedRules != NULL && appliedRulesCount < maxAppliedRules)
1063 appliedRules[appliedRulesCount++] = transRule;
1064 if (!passDoAction ())
1066 if (endReplace == src)
1071 if ((dest + 1) > destmax)
1073 srcMapping[dest] = prevSrcMapping[src];
1074 currentOutput[dest++] = currentInput[src++];
1080 srcMapping[dest] = prevSrcMapping[src];
1081 failure:if (src < srcmax)
1083 while (checkAttr (currentInput[src], CTC_Space, 1))
1084 if (++src == srcmax)