more files!
authorraster <raster@7cbeb6ba-43b4-40fd-8cce-4c39aea84d33>
Sat, 11 Apr 2009 08:11:47 +0000 (08:11 +0000)
committerraster <raster@7cbeb6ba-43b4-40fd-8cce-4c39aea84d33>
Sat, 11 Apr 2009 08:11:47 +0000 (08:11 +0000)
git-svn-id: svn+ssh://svn.enlightenment.org/var/svn/e/trunk/evas@39974 7cbeb6ba-43b4-40fd-8cce-4c39aea84d33

src/lib/engines/common/evas_intl/evas_intl_arabic.c [new file with mode: 0644]
src/lib/engines/common/evas_intl/evas_intl_arabic.h [new file with mode: 0644]

diff --git a/src/lib/engines/common/evas_intl/evas_intl_arabic.c b/src/lib/engines/common/evas_intl/evas_intl_arabic.c
new file mode 100644 (file)
index 0000000..3de2a13
--- /dev/null
@@ -0,0 +1,314 @@
+/* Authors:
+ *     Tom Hacohen (tom@stsob.com)
+ */
+
+#include "../evas_intl_utils.h"
+
+#ifdef ARABIC_SUPPORT
+/* arabic contextualizing */
+
+/* arabic input forms */
+#define ARABIC_ALEPH_MADDA     0x0622
+#define ARABIC_ALEPH           0x0627
+#define ARABIC_BET             0x0628
+#define ARABIC_TA_MARBUTA      0x0629
+#define ARABIC_TAW             0x062A
+#define ARABIC_TA              0x062B
+#define ARABIC_GIMEL           0x062C
+#define ARABIC_HETH            0x062D
+#define ARABIC_HA              0x062E
+#define        ARABIC_DALET            0x062F
+#define ARABIC_DAL             0x0630
+#define ARABIC_RESH            0x0631
+#define ARABIC_ZAYIN           0x0632
+#define ARABIC_SHIN            0x0633
+#define ARABIC_SH              0x0634
+#define ARABIC_TSADE           0x0635
+#define ARABIC_DAD             0x0636
+#define ARABIC_TETH            0x0637
+#define ARABIC_ZA              0x0638
+#define ARABIC_AYIN            0x0639
+#define ARABIC_GHAIN           0x063A
+#define ARABIC_PE              0x0641
+#define ARABIC_QOPH            0x0642
+#define ARABIC_KAPH            0x0643
+#define ARABIC_LAMED           0x0644
+#define ARABIC_MEM             0x0645
+#define ARABIC_NUN             0x0646
+#define ARABIC_HE              0x0647
+#define ARABIC_WAW             0x0648
+#define ARABIC_ALEPH_MAQSURA   0x0649
+#define ARABIC_YODH            0x064A
+/* arabic contextual forms */
+#define ARABIC_ISOLATED_ALEPH_MADDA    0xFE81
+#define ARABIC_ISOLATED_ALEPH          0xFE8D
+#define ARABIC_ISOLATED_TA_MARBUTA     0xFE93
+#define ARABIC_ISOLATED_BET            0xFE8F
+#define ARABIC_ISOLATED_TAW            0xFE95
+#define ARABIC_ISOLATED_TA             0xFE99
+#define ARABIC_ISOLATED_GIMEL          0xFE9D
+#define ARABIC_ISOLATED_HETH           0xFEA1
+#define ARABIC_ISOLATED_HA             0xFEA5
+#define        ARABIC_ISOLATED_DALET           0xFEA9
+#define ARABIC_ISOLATED_DAL            0xFEAB
+#define ARABIC_ISOLATED_RESH           0xFEAD
+#define ARABIC_ISOLATED_ZAYIN          0xFEAF
+#define ARABIC_ISOLATED_SHIN           0xFEB1
+#define ARABIC_ISOLATED_SH             0xFEB5
+#define ARABIC_ISOLATED_TSADE          0xFEB9
+#define ARABIC_ISOLATED_DAD            0xFEBD
+#define ARABIC_ISOLATED_TETH           0xFEC1
+#define ARABIC_ISOLATED_ZA             0xFEC5
+#define ARABIC_ISOLATED_AYIN           0xFEC9
+#define ARABIC_ISOLATED_GHAIN          0xFECD
+#define ARABIC_ISOLATED_PE             0xFED1
+#define ARABIC_ISOLATED_QOPH           0xFED5
+#define ARABIC_ISOLATED_KAPH           0xFED9
+#define ARABIC_ISOLATED_LAMED          0xFEDD
+#define ARABIC_ISOLATED_MEM            0xFEE1
+#define ARABIC_ISOLATED_NUN            0xFEE5
+#define ARABIC_ISOLATED_HE             0xFEE9
+#define ARABIC_ISOLATED_WAW            0xFEED
+#define ARABIC_ISOLATED_ALEPH_MAQSURA  0xFEEF
+#define ARABIC_ISOLATED_YODH           0xFEF1
+
+#define ARABIC_IS_SPECIAL_LETTER(c)    ((c) == ARABIC_ISOLATED_ALEPH ||  \
+                                         (c) == ARABIC_ISOLATED_DALET || \
+                                         (c) == ARABIC_ISOLATED_DAL ||   \
+                                         (c) == ARABIC_ISOLATED_RESH ||  \
+                                         (c) == ARABIC_ISOLATED_ZAYIN || \
+                                         (c) == ARABIC_ISOLATED_WAW  || \
+                                         (c) == ARABIC_ISOLATED_TA_MARBUTA)
+/* from the first to last (including all forms, and special cases
+ * like aleph maqsura in some forms*/
+#define ARABIC_IS_CONTEXT(c)   (((c) >= ARABIC_ISOLATED_ALEPH && (c) <= ARABIC_ISOLATED_YODH + 3) || \
+                                ((c) >= ARABIC_ISOLATED_ALEPH_MADDA && (c) <= ARABIC_ISOLATED_ALEPH_MADDA + 3) || \
+                                (c) == 0xFBE8 || \
+                                (c) == 0xFBE9) 
+#define ARABIC_IS_LETTER(c)    ARABIC_IS_CONTEXT(c)
+/* used for arabic context logic */
+/* each value is the offset from the regular char in unicode */
+enum _ArabicContext {
+               ARABIC_CONTEXT_ISOLATED = 0,
+               ARABIC_CONTEXT_FINAL = 1,
+               ARABIC_CONTEXT_INITIAL = 2,
+               ARABIC_CONTEXT_MEDIAL = 3
+};
+typedef enum _ArabicContext ArabicContext;
+
+static FriBidiChar
+_evas_intl_arabic_isolated_to_context(FriBidiChar chr, ArabicContext context);
+
+static int
+_evas_intl_arabic_text_to_isolated(FriBidiChar *text);
+
+static FriBidiChar
+_evas_intl_arabic_general_to_isolated(FriBidiChar chr);
+
+/* FIXME: there are issues with text that's already in context
+ * vowels support is needed (skip them when analysing context)*/
+int
+evas_intl_arabic_to_context(FriBidiChar *text)
+{
+       int i;
+       int len;
+       int start_of_context = 1; /* assume the first is special/non arabic */
+       int last_is_first = 0;
+       int last_letter = 0;
+       
+       /* check for empty string */
+       if (!*text)
+               return;
+
+       len = _evas_intl_arabic_text_to_isolated(text);
+       /*FIXME: make it skip vowels */
+       for (i = 0 ; i < len ; i++)  {
+
+               if (! ARABIC_IS_LETTER(text[i])) {
+                       /* mark so it won't be touched,
+                        * though start formating */
+                       if (last_letter && !start_of_context) {
+                               ArabicContext tmp = (last_is_first) ?
+                                       ARABIC_CONTEXT_ISOLATED
+                                       :
+                                       ARABIC_CONTEXT_FINAL;
+                               text[i-1] = _evas_intl_arabic_isolated_to_context(
+                                       last_letter,
+                                       tmp);
+                               
+                       }
+                       last_is_first = 0;
+                       start_of_context = 1;
+                       last_letter = 0;
+                       continue;
+               }
+               /* adjust the last letter */
+               last_letter = text[i];
+               if (ARABIC_IS_SPECIAL_LETTER(text[i])) {
+                       if (!start_of_context) 
+                               text[i] = _evas_intl_arabic_isolated_to_context(text[i], ARABIC_CONTEXT_FINAL);
+                       /* else: leave isolated */
+                               
+                       start_of_context = 1;
+                       last_is_first = 0;
+                       continue;
+               }
+
+               if (start_of_context) {
+                       text[i] = _evas_intl_arabic_isolated_to_context(text[i], ARABIC_CONTEXT_INITIAL);
+                       last_is_first = 1;
+               }
+               else {
+                       text[i] = _evas_intl_arabic_isolated_to_context(text[i], ARABIC_CONTEXT_MEDIAL);
+                       last_is_first = 0;
+               }
+               /* spceial chars don't get here. */
+               start_of_context = 0;
+               
+       }
+       /* if it's arabic and not isolated, the last is always final */
+       i--;
+       if (last_letter && !start_of_context) {
+                               ArabicContext tmp = (last_is_first) ? ARABIC_CONTEXT_ISOLATED : ARABIC_CONTEXT_FINAL;
+                               /* because it's medial atm, and should be isolated */
+                               text[i] = _evas_intl_arabic_isolated_to_context(
+                                       last_letter,
+                                       tmp);
+                       }
+
+       return len;
+}
+
+/* I wish I could think about a simpler way to do it.
+ * Just match every word with it's isolated form */
+static FriBidiChar
+_evas_intl_arabic_general_to_isolated(FriBidiChar chr)
+{
+       switch (chr) {
+               case ARABIC_ALEPH_MADDA:
+                       return ARABIC_ISOLATED_ALEPH_MADDA;
+
+               case ARABIC_ALEPH:
+                       return ARABIC_ISOLATED_ALEPH;
+
+               case ARABIC_TA_MARBUTA:
+                       return ARABIC_ISOLATED_TA_MARBUTA;
+               case ARABIC_BET:
+                       return ARABIC_ISOLATED_BET;
+               
+               case ARABIC_TAW:
+                       return ARABIC_ISOLATED_TAW;
+
+               case ARABIC_TA:
+                       return ARABIC_ISOLATED_TA;
+               
+               case ARABIC_GIMEL:
+                       return ARABIC_ISOLATED_GIMEL;
+               
+               case ARABIC_HETH:
+                       return ARABIC_ISOLATED_HETH;
+               
+               case ARABIC_HA:
+                       return ARABIC_ISOLATED_HA;
+               
+               case ARABIC_DALET:
+                       return ARABIC_ISOLATED_DALET;
+               
+               case ARABIC_DAL:
+                       return ARABIC_ISOLATED_DAL;
+               
+               case ARABIC_RESH:
+                       return ARABIC_ISOLATED_RESH;
+               
+               case ARABIC_ZAYIN:
+                       return ARABIC_ISOLATED_ZAYIN;
+               
+               case ARABIC_SHIN:
+                       return ARABIC_ISOLATED_SHIN;
+               
+               case ARABIC_SH:
+                       return ARABIC_ISOLATED_SH;
+               
+               case ARABIC_TSADE:
+                       return ARABIC_ISOLATED_TSADE;
+               
+               case ARABIC_DAD:
+                       return ARABIC_ISOLATED_DAD;
+               
+               case ARABIC_TETH:
+                       return ARABIC_ISOLATED_TETH;
+               
+               case ARABIC_ZA:
+                       return ARABIC_ISOLATED_ZA;
+               
+               case ARABIC_AYIN:
+                       return ARABIC_ISOLATED_AYIN;
+               
+               case ARABIC_GHAIN:
+                       return ARABIC_ISOLATED_GHAIN;
+               
+               case ARABIC_PE:
+                       return ARABIC_ISOLATED_PE;
+               
+               case ARABIC_QOPH:
+                       return ARABIC_ISOLATED_QOPH;
+               
+               case ARABIC_KAPH:
+                       return ARABIC_ISOLATED_KAPH;
+               
+               case ARABIC_LAMED:
+                       return ARABIC_ISOLATED_LAMED;
+
+               case ARABIC_MEM:
+                       return ARABIC_ISOLATED_MEM;
+               
+               case ARABIC_NUN:
+                       return ARABIC_ISOLATED_NUN;
+               
+               case ARABIC_HE:
+                       return ARABIC_ISOLATED_HE;
+               
+               case ARABIC_WAW:
+                       return ARABIC_ISOLATED_WAW;
+
+               case ARABIC_ALEPH_MAQSURA:
+                       return ARABIC_ISOLATED_ALEPH_MAQSURA;
+               
+               case ARABIC_YODH:
+                       return ARABIC_ISOLATED_YODH;
+               default:
+                       return chr;
+       }
+}
+
+static FriBidiChar
+_evas_intl_arabic_isolated_to_context(FriBidiChar chr, ArabicContext context)
+{
+       if (ARABIC_IS_SPECIAL_LETTER(chr)) {
+               if (context == ARABIC_CONTEXT_INITIAL)
+                       return chr;
+               else
+                       return chr + ARABIC_CONTEXT_FINAL;
+       }
+       /* HACK AROUND ALIF MAQSURA */
+       else if (chr == ARABIC_ISOLATED_ALEPH_MAQSURA && context > 1) {
+               chr = 0xFBE8; /* the initial form */
+               context -= 2;
+       }
+       return chr + context;
+}
+
+static int
+_evas_intl_arabic_text_to_isolated(FriBidiChar *text)
+{
+       int i=0;
+       while (*text) {
+               /* if it's not arabic/it's already in context
+                * it's just returned the same */
+               *text = _evas_intl_arabic_general_to_isolated(*text);
+               text++;
+               i++;
+       }
+       return i;
+}
+#endif
\ No newline at end of file
diff --git a/src/lib/engines/common/evas_intl/evas_intl_arabic.h b/src/lib/engines/common/evas_intl/evas_intl_arabic.h
new file mode 100644 (file)
index 0000000..182f4d7
--- /dev/null
@@ -0,0 +1,7 @@
+#ifndef _EVAS_INTL_ARABIC
+#define _EVAS_INTL_ARABIC
+
+int
+evas_intl_arabic_to_context(FriBidiChar *text);
+
+#endif