New libunibreak. 07/37707/2
authorVictor Cebollada <v.cebollada@samsung.com>
Thu, 2 Apr 2015 16:25:32 +0000 (17:25 +0100)
committerPaul Wisbey <p.wisbey@samsung.com>
Wed, 8 Apr 2015 09:54:50 +0000 (02:54 -0700)
Change-Id: I7850ca7c30d24338fa8f692c67cae19e94ac143e
Signed-off-by: Victor Cebollada <v.cebollada@samsung.com>
text/dali/internal/libunibreak/linebreak.c
text/dali/internal/libunibreak/linebreak.h
text/dali/internal/libunibreak/linebreakdata.c
text/dali/internal/libunibreak/linebreakdef.c
text/dali/internal/libunibreak/linebreakdef.h
text/dali/internal/libunibreak/wordbreak.c
text/dali/internal/libunibreak/wordbreakdata.c

index 4e13247..81c7aa6 100644 (file)
@@ -1,10 +1,11 @@
-/* vim: set tabstop=4 shiftwidth=4: */
+/* vim: set expandtab tabstop=4 softtabstop=4 shiftwidth=4: */
 
 /*
  * Line breaking in a Unicode sequence.  Designed to be used in a
  * generic text renderer.
  *
- * Copyright (C) 2008-2010 Wu Yongwei <wuyongwei at gmail dot com>
+ * Copyright (C) 2008-2013 Wu Yongwei <wuyongwei at gmail dot com>
+ * Copyright (C) 2013 Petr Filipsky <philodej at gmail dot com>
  *
  * This software is provided 'as-is', without any express or implied
  * warranty.  In no event will the author be held liable for any damages
  *    distribution.
  *
  * The main reference is Unicode Standard Annex 14 (UAX #14):
- *             <URL:http://www.unicode.org/reports/tr14/>
+ *      <URL:http://www.unicode.org/reports/tr14/>
  *
  * When this library was designed, this annex was at Revision 19, for
  * Unicode 5.0.0:
- *             <URL:http://www.unicode.org/reports/tr14/tr14-19.html>
+ *      <URL:http://www.unicode.org/reports/tr14/tr14-19.html>
  *
- * This library has been updated according to Revision 24, for
- * Unicode 5.2.0:
- *             <URL:http://www.unicode.org/reports/tr14/tr14-24.html>
+ * This library has been updated according to Revision 30, for
+ * Unicode 6.2.0:
+ *      <URL:http://www.unicode.org/reports/tr14/tr14-30.html>
  *
  * The Unicode Terms of Use are available at
- *             <URL:http://www.unicode.org/copyright.html>
+ *      <URL:http://www.unicode.org/copyright.html>
  */
 
 /**
- * @file       linebreak.c
+ * @file    linebreak.c
  *
  * Implementation of the line breaking algorithm as described in Unicode
  * Standard Annex 14.
  *
- * @version    2.0, 2010/01/03
- * @author     Wu Yongwei
+ * @version 2.5, 2013/11/14
+ * @author  Wu Yongwei
+ * @author  Petr Filipsky
  */
 
 #include <assert.h>
 #include "linebreakdef.h"
 
 /**
+ * Special value used internally to indicate an undefined break result.
+ */
+#define LINEBREAK_UNDEFINED -1
+
+/**
  * Size of the second-level index to the line breaking properties.
  */
 #define LINEBREAK_INDEX_SIZE 40
@@ -70,153 +77,192 @@ const int linebreak_version = LINEBREAK_VERSION;
  */
 enum BreakAction
 {
-       DIR_BRK,                /**< Direct break opportunity */
-       IND_BRK,                /**< Indirect break opportunity */
-       CMI_BRK,                /**< Indirect break opportunity for combining marks */
-       CMP_BRK,                /**< Prohibited break for combining marks */
-       PRH_BRK                 /**< Prohibited break */
+    DIR_BRK,        /**< Direct break opportunity */
+    IND_BRK,        /**< Indirect break opportunity */
+    CMI_BRK,        /**< Indirect break opportunity for combining marks */
+    CMP_BRK,        /**< Prohibited break for combining marks */
+    PRH_BRK         /**< Prohibited break */
 };
 
 /**
  * Break action pair table.  This is a direct mapping of Table 2 of
- * Unicode Standard Annex 14, Revision 24.
+ * Unicode Standard Annex 14, Revision 30.
  */
-static enum BreakAction baTable[LBP_JT][LBP_JT] = {
-       {       /* OP */
-               PRH_BRK, PRH_BRK, PRH_BRK, PRH_BRK, PRH_BRK, PRH_BRK, PRH_BRK,
-               PRH_BRK, PRH_BRK, PRH_BRK, PRH_BRK, PRH_BRK, PRH_BRK, PRH_BRK,
-               PRH_BRK, PRH_BRK, PRH_BRK, PRH_BRK, PRH_BRK, PRH_BRK, CMP_BRK,
-               PRH_BRK, PRH_BRK, PRH_BRK, PRH_BRK, PRH_BRK, PRH_BRK },
-       {       /* CL */
-               DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, PRH_BRK, PRH_BRK,
-               PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
-               DIR_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, CMI_BRK,
-               PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK },
-       {       /* CP */
-               DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, PRH_BRK, PRH_BRK,
-               PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, DIR_BRK,
-               DIR_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, CMI_BRK,
-               PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK },
-       {       /* QU */
-               PRH_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
-               PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK,
-               IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK, CMI_BRK,
-               PRH_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK },
-       {       /* GL */
-               IND_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
-               PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK,
-               IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK, CMI_BRK,
-               PRH_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK },
-       {       /* NS */
-               DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
-               PRH_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
-               DIR_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, CMI_BRK,
-               PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK },
-       {       /* EX */
-               DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
-               PRH_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
-               DIR_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, CMI_BRK,
-               PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK },
-       {       /* SY */
-               DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
-               PRH_BRK, PRH_BRK, DIR_BRK, DIR_BRK, IND_BRK, DIR_BRK, DIR_BRK,
-               DIR_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, CMI_BRK,
-               PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK },
-       {       /* IS */
-               DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
-               PRH_BRK, PRH_BRK, DIR_BRK, DIR_BRK, IND_BRK, IND_BRK, DIR_BRK,
-               DIR_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, CMI_BRK,
-               PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK },
-       {       /* PR */
-               IND_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
-               PRH_BRK, PRH_BRK, DIR_BRK, DIR_BRK, IND_BRK, IND_BRK, IND_BRK,
-               DIR_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, CMI_BRK,
-               PRH_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK },
-       {       /* PO */
-               IND_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
-               PRH_BRK, PRH_BRK, DIR_BRK, DIR_BRK, IND_BRK, IND_BRK, DIR_BRK,
-               DIR_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, CMI_BRK,
-               PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK },
-       {       /* NU */
-               IND_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
-               PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, DIR_BRK,
-               IND_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, CMI_BRK,
-               PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK },
-       {       /* AL */
-               IND_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
-               PRH_BRK, PRH_BRK, DIR_BRK, DIR_BRK, IND_BRK, IND_BRK, DIR_BRK,
-               IND_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, CMI_BRK,
-               PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK },
-       {       /* ID */
-               DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
-               PRH_BRK, PRH_BRK, DIR_BRK, IND_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
-               IND_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, CMI_BRK,
-               PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK },
-       {       /* IN */
-               DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
-               PRH_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
-               IND_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, CMI_BRK,
-               PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK },
-       {       /* HY */
-               DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, DIR_BRK, IND_BRK, PRH_BRK,
-               PRH_BRK, PRH_BRK, DIR_BRK, DIR_BRK, IND_BRK, DIR_BRK, DIR_BRK,
-               DIR_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, CMI_BRK,
-               PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK },
-       {       /* BA */
-               DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, DIR_BRK, IND_BRK, PRH_BRK,
-               PRH_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
-               DIR_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, CMI_BRK,
-               PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK },
-       {       /* BB */
-               IND_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
-               PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK,
-               IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK, CMI_BRK,
-               PRH_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK },
-       {       /* B2 */
-               DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
-               PRH_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
-               DIR_BRK, IND_BRK, IND_BRK, DIR_BRK, PRH_BRK, PRH_BRK, CMI_BRK,
-               PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK },
-       {       /* ZW */
-               DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
-               DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
-               DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, PRH_BRK, DIR_BRK,
-               DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK },
-       {       /* CM */
-               IND_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
-               PRH_BRK, PRH_BRK, DIR_BRK, DIR_BRK, IND_BRK, IND_BRK, DIR_BRK,
-               IND_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, CMI_BRK,
-               PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK },
-       {       /* WJ */
-               IND_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
-               PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK,
-               IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK, CMI_BRK,
-               PRH_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK },
-       {       /* H2 */
-               DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
-               PRH_BRK, PRH_BRK, DIR_BRK, IND_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
-               IND_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, CMI_BRK,
-               PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, IND_BRK, IND_BRK },
-       {       /* H3 */
-               DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
-               PRH_BRK, PRH_BRK, DIR_BRK, IND_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
-               IND_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, CMI_BRK,
-               PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, IND_BRK },
-       {       /* JL */
-               DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
-               PRH_BRK, PRH_BRK, DIR_BRK, IND_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
-               IND_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, CMI_BRK,
-               PRH_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, DIR_BRK },
-       {       /* JV */
-               DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
-               PRH_BRK, PRH_BRK, DIR_BRK, IND_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
-               IND_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, CMI_BRK,
-               PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, IND_BRK, IND_BRK },
-       {       /* JT */
-               DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
-               PRH_BRK, PRH_BRK, DIR_BRK, IND_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
-               IND_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, CMI_BRK,
-               PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, IND_BRK }
+static enum BreakAction baTable[LBP_RI][LBP_RI] = {
+    {   /* OP */
+        PRH_BRK, PRH_BRK, PRH_BRK, PRH_BRK, PRH_BRK, PRH_BRK, PRH_BRK,
+        PRH_BRK, PRH_BRK, PRH_BRK, PRH_BRK, PRH_BRK, PRH_BRK, PRH_BRK,
+        PRH_BRK, PRH_BRK, PRH_BRK, PRH_BRK, PRH_BRK, PRH_BRK, PRH_BRK,
+        CMP_BRK, PRH_BRK, PRH_BRK, PRH_BRK, PRH_BRK, PRH_BRK, PRH_BRK,
+        PRH_BRK },
+    {   /* CL */
+        DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, PRH_BRK, PRH_BRK,
+        PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
+        DIR_BRK, DIR_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK,
+        CMI_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
+        DIR_BRK },
+    {   /* CP */
+        DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, PRH_BRK, PRH_BRK,
+        PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK,
+        DIR_BRK, DIR_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK,
+        CMI_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
+        DIR_BRK },
+    {   /* QU */
+        PRH_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
+        PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK,
+        IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
+        CMI_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK,
+        IND_BRK },
+    {   /* GL */
+        IND_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
+        PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK,
+        IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
+        CMI_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK,
+        IND_BRK },
+    {   /* NS */
+        DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
+        PRH_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
+        DIR_BRK, DIR_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK,
+        CMI_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
+        DIR_BRK },
+    {   /* EX */
+        DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
+        PRH_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
+        DIR_BRK, DIR_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK,
+        CMI_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
+        DIR_BRK },
+    {   /* SY */
+        DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
+        PRH_BRK, PRH_BRK, DIR_BRK, DIR_BRK, IND_BRK, DIR_BRK, DIR_BRK,
+        DIR_BRK, DIR_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK,
+        CMI_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
+        DIR_BRK },
+    {   /* IS */
+        DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
+        PRH_BRK, PRH_BRK, DIR_BRK, DIR_BRK, IND_BRK, IND_BRK, IND_BRK,
+        DIR_BRK, DIR_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK,
+        CMI_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
+        DIR_BRK },
+    {   /* PR */
+        IND_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
+        PRH_BRK, PRH_BRK, DIR_BRK, DIR_BRK, IND_BRK, IND_BRK, IND_BRK,
+        IND_BRK, DIR_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK,
+        CMI_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK,
+        DIR_BRK },
+    {   /* PO */
+        IND_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
+        PRH_BRK, PRH_BRK, DIR_BRK, DIR_BRK, IND_BRK, IND_BRK, IND_BRK,
+        DIR_BRK, DIR_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK,
+        CMI_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
+        DIR_BRK },
+    {   /* NU */
+        IND_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
+        PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK,
+        DIR_BRK, IND_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK,
+        CMI_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
+        DIR_BRK },
+    {   /* AL */
+        IND_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
+        PRH_BRK, PRH_BRK, DIR_BRK, DIR_BRK, IND_BRK, IND_BRK, IND_BRK,
+        DIR_BRK, IND_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK,
+        CMI_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
+        DIR_BRK },
+    {   /* HL */
+        IND_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
+        PRH_BRK, PRH_BRK, DIR_BRK, DIR_BRK, IND_BRK, IND_BRK, IND_BRK,
+        DIR_BRK, IND_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK,
+        CMI_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
+        DIR_BRK },
+    {   /* ID */
+        DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
+        PRH_BRK, PRH_BRK, DIR_BRK, IND_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
+        DIR_BRK, IND_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK,
+        CMI_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
+        DIR_BRK },
+    {   /* IN */
+        DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
+        PRH_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
+        DIR_BRK, IND_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK,
+        CMI_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
+        DIR_BRK },
+    {   /* HY */
+        DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, DIR_BRK, IND_BRK, PRH_BRK,
+        PRH_BRK, PRH_BRK, DIR_BRK, DIR_BRK, IND_BRK, DIR_BRK, DIR_BRK,
+        DIR_BRK, DIR_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK,
+        CMI_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
+        DIR_BRK },
+    {   /* BA */
+        DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, DIR_BRK, IND_BRK, PRH_BRK,
+        PRH_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
+        DIR_BRK, DIR_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK,
+        CMI_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
+        DIR_BRK },
+    {   /* BB */
+        IND_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
+        PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK,
+        IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
+        CMI_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK,
+        IND_BRK },
+    {   /* B2 */
+        DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
+        PRH_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
+        DIR_BRK, DIR_BRK, IND_BRK, IND_BRK, DIR_BRK, PRH_BRK, PRH_BRK,
+        CMI_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
+        DIR_BRK },
+    {   /* ZW */
+        DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
+        DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
+        DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, PRH_BRK,
+        DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
+        DIR_BRK },
+    {   /* CM */
+        IND_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
+        PRH_BRK, PRH_BRK, DIR_BRK, DIR_BRK, IND_BRK, IND_BRK, IND_BRK,
+        DIR_BRK, IND_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK,
+        CMI_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
+        DIR_BRK },
+    {   /* WJ */
+        IND_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
+        PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK,
+        IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
+        CMI_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK,
+        IND_BRK },
+    {   /* H2 */
+        DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
+        PRH_BRK, PRH_BRK, DIR_BRK, IND_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
+        DIR_BRK, IND_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK,
+        CMI_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, IND_BRK, IND_BRK,
+        DIR_BRK },
+    {   /* H3 */
+        DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
+        PRH_BRK, PRH_BRK, DIR_BRK, IND_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
+        DIR_BRK, IND_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK,
+        CMI_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, IND_BRK,
+        DIR_BRK },
+    {   /* JL */
+        DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
+        PRH_BRK, PRH_BRK, DIR_BRK, IND_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
+        DIR_BRK, IND_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK,
+        CMI_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, DIR_BRK,
+        DIR_BRK },
+    {   /* JV */
+        DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
+        PRH_BRK, PRH_BRK, DIR_BRK, IND_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
+        DIR_BRK, IND_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK,
+        CMI_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, IND_BRK, IND_BRK,
+        DIR_BRK },
+    {   /* JT */
+        DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
+        PRH_BRK, PRH_BRK, DIR_BRK, IND_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
+        DIR_BRK, IND_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK,
+        CMI_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, IND_BRK,
+        DIR_BRK },
+    {   /* RI */
+        DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
+        PRH_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
+        DIR_BRK, DIR_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK,
+        CMI_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
+        IND_BRK },
 };
 
 /**
@@ -224,8 +270,8 @@ static enum BreakAction baTable[LBP_JT][LBP_JT] = {
  */
 struct LineBreakPropertiesIndex
 {
-       utf32_t end;                                    /**< End coding point */
-       struct LineBreakProperties *lbp;/**< Pointer to line breaking properties */
+    utf32_t end;                    /**< End coding point */
+    struct LineBreakProperties *lbp;/**< Pointer to line breaking properties */
 };
 
 /**
@@ -233,7 +279,7 @@ struct LineBreakPropertiesIndex
  */
 static struct LineBreakPropertiesIndex lb_prop_index[LINEBREAK_INDEX_SIZE] =
 {
-       { 0xFFFFFFFF, lb_prop_default }
+    { 0xFFFFFFFF, lb_prop_default }
 };
 
 /**
@@ -244,84 +290,84 @@ static struct LineBreakPropertiesIndex lb_prop_index[LINEBREAK_INDEX_SIZE] =
  */
 void init_linebreak(void)
 {
-       size_t i;
-       size_t iPropDefault;
-       size_t len;
-       size_t step;
-
-       len = 0;
-       while (lb_prop_default[len].prop != LBP_Undefined)
-               ++len;
-       step = len / LINEBREAK_INDEX_SIZE;
-       iPropDefault = 0;
-       for (i = 0; i < LINEBREAK_INDEX_SIZE; ++i)
-       {
-               lb_prop_index[i].lbp = lb_prop_default + iPropDefault;
-               iPropDefault += step;
-               lb_prop_index[i].end = lb_prop_default[iPropDefault].start - 1;
-       }
-       lb_prop_index[--i].end = 0xFFFFFFFF;
+    size_t i;
+    size_t iPropDefault;
+    size_t len;
+    size_t step;
+
+    len = 0;
+    while (lb_prop_default[len].prop != LBP_Undefined)
+        ++len;
+    step = len / LINEBREAK_INDEX_SIZE;
+    iPropDefault = 0;
+    for (i = 0; i < LINEBREAK_INDEX_SIZE; ++i)
+    {
+        lb_prop_index[i].lbp = lb_prop_default + iPropDefault;
+        iPropDefault += step;
+        lb_prop_index[i].end = lb_prop_default[iPropDefault].start - 1;
+    }
+    lb_prop_index[--i].end = 0xFFFFFFFF;
 }
 
 /**
  * Gets the language-specific line breaking properties.
  *
- * @param lang language of the text
- * @return             pointer to the language-specific line breaking
- *                             properties array if found; \c NULL otherwise
+ * @param lang  language of the text
+ * @return      pointer to the language-specific line breaking
+ *              properties array if found; \c NULL otherwise
  */
 static struct LineBreakProperties *get_lb_prop_lang(const char *lang)
 {
-       struct LineBreakPropertiesLang *lbplIter;
-       if (lang != NULL)
-       {
-               for (lbplIter = lb_prop_lang_map; lbplIter->lang != NULL; ++lbplIter)
-               {
-                       if (strncmp(lang, lbplIter->lang, lbplIter->namelen) == 0)
-                       {
-                               return lbplIter->lbp;
-                       }
-               }
-       }
-       return NULL;
+    struct LineBreakPropertiesLang *lbplIter;
+    if (lang != NULL)
+    {
+        for (lbplIter = lb_prop_lang_map; lbplIter->lang != NULL; ++lbplIter)
+        {
+            if (strncmp(lang, lbplIter->lang, lbplIter->namelen) == 0)
+            {
+                return lbplIter->lbp;
+            }
+        }
+    }
+    return NULL;
 }
 
 /**
  * Gets the line breaking class of a character from a line breaking
  * properties array.
  *
- * @param ch   character to check
- * @param lbp  pointer to the line breaking properties array
- * @return             the line breaking class if found; \c LBP_XX otherwise
+ * @param ch   character to check
+ * @param lbp  pointer to the line breaking properties array
+ * @return     the line breaking class if found; \c LBP_XX otherwise
  */
 static enum LineBreakClass get_char_lb_class(
-               utf32_t ch,
-               struct LineBreakProperties *lbp)
+        utf32_t ch,
+        struct LineBreakProperties *lbp)
 {
-       while (lbp->prop != LBP_Undefined && ch >= lbp->start)
-       {
-               if (ch <= lbp->end)
-                       return lbp->prop;
-               ++lbp;
-       }
-       return LBP_XX;
+    while (lbp->prop != LBP_Undefined && ch >= lbp->start)
+    {
+        if (ch <= lbp->end)
+            return lbp->prop;
+        ++lbp;
+    }
+    return LBP_XX;
 }
 
 /**
  * Gets the line breaking class of a character from the default line
  * breaking properties array.
  *
- * @param ch   character to check
- * @return             the line breaking class if found; \c LBP_XX otherwise
+ * @param ch  character to check
+ * @return    the line breaking class if found; \c LBP_XX otherwise
  */
 static enum LineBreakClass get_char_lb_class_default(
-               utf32_t ch)
+        utf32_t ch)
 {
-       size_t i = 0;
-       while (ch > lb_prop_index[i].end)
-               ++i;
-       assert(i < LINEBREAK_INDEX_SIZE);
-       return get_char_lb_class(ch, lb_prop_index[i].lbp);
+    size_t i = 0;
+    while (ch > lb_prop_index[i].end)
+        ++i;
+    assert(i < LINEBREAK_INDEX_SIZE);
+    return get_char_lb_class(ch, lb_prop_index[i].lbp);
 }
 
 /**
@@ -330,30 +376,30 @@ static enum LineBreakClass get_char_lb_class_default(
  * and then the default data if there is no language-specific property
  * available for the character.
  *
- * @param ch           character to check
- * @param lbpLang      pointer to the language-specific line breaking
- *                                     properties array
- * @return                     the line breaking class if found; \c LBP_XX
- *                                     otherwise
+ * @param ch       character to check
+ * @param lbpLang  pointer to the language-specific line breaking
+ *                 properties array
+ * @return         the line breaking class if found; \c LBP_XX
+ *                 otherwise
  */
 static enum LineBreakClass get_char_lb_class_lang(
-               utf32_t ch,
-               struct LineBreakProperties *lbpLang)
+        utf32_t ch,
+        struct LineBreakProperties *lbpLang)
 {
-       enum LineBreakClass lbcResult;
-
-       /* Find the language-specific line breaking class for a character */
-       if (lbpLang)
-       {
-               lbcResult = get_char_lb_class(ch, lbpLang);
-               if (lbcResult != LBP_XX)
-                       return lbcResult;
-       }
-
-       /* Find the generic language-specific line breaking class, if no
-        * language context is provided, or language-specific data are not
-        * available for the specific character in the specified language */
-       return get_char_lb_class_default(ch);
+    enum LineBreakClass lbcResult;
+
+    /* Find the language-specific line breaking class for a character */
+    if (lbpLang)
+    {
+        lbcResult = get_char_lb_class(ch, lbpLang);
+        if (lbcResult != LBP_XX)
+            return lbcResult;
+    }
+
+    /* Find the generic language-specific line breaking class, if no
+     * language context is provided, or language-specific data are not
+     * available for the specific character in the specified language */
+    return get_char_lb_class_default(ch);
 }
 
 /**
@@ -361,32 +407,214 @@ static enum LineBreakClass get_char_lb_class_lang(
  * characters.  They are treated in a simplistic way in this
  * implementation.
  *
- * @param lbc  line breaking class to resolve
- * @param lang language of the text
- * @return             the resolved line breaking class
+ * @param lbc   line breaking class to resolve
+ * @param lang  language of the text
+ * @return      the resolved line breaking class
  */
 static enum LineBreakClass resolve_lb_class(
-               enum LineBreakClass lbc,
-               const char *lang)
+        enum LineBreakClass lbc,
+        const char *lang)
+{
+    switch (lbc)
+    {
+    case LBP_AI:
+        if (lang != NULL &&
+                (strncmp(lang, "zh", 2) == 0 || /* Chinese */
+                 strncmp(lang, "ja", 2) == 0 || /* Japanese */
+                 strncmp(lang, "ko", 2) == 0))  /* Korean */
+        {
+            return LBP_ID;
+        }
+        else
+        {
+            return LBP_AL;
+        }
+    case LBP_CJ:
+        /* Simplified for `normal' line breaking.  See
+         * <url:http://www.unicode.org/reports/tr14/tr14-30.html#CJ>
+         * for details. */
+        return LBP_ID;
+    case LBP_SA:
+    case LBP_SG:
+    case LBP_XX:
+        return LBP_AL;
+    default:
+        return lbc;
+    }
+}
+
+/**
+ * Treats specially for the first character in a line.
+ *
+ * @param[in,out] lbpCtx  pointer to the line breaking context
+ * @pre                   \a lbpCtx->lbcCur has a valid line break class
+ * @post                  \a lbpCtx->lbcCur has the updated line break class
+ */
+static void treat_first_char(
+        struct LineBreakContext* lbpCtx)
+{
+    switch (lbpCtx->lbcCur)
+    {
+    case LBP_LF:
+    case LBP_NL:
+        lbpCtx->lbcCur = LBP_BK;        /* Rule LB5 */
+        break;
+    case LBP_CB:
+        lbpCtx->lbcCur = LBP_BA;        /* Rule LB20 */
+        break;
+    case LBP_SP:
+        lbpCtx->lbcCur = LBP_WJ;        /* Leading space treated as WJ */
+        break;
+    default:
+        break;
+    }
+}
+
+/**
+ * Tries telling the line break opportunity by simple rules.
+ *
+ * @param[in,out] lbpCtx  pointer to the line breaking context
+ * @pre                   \a lbpCtx->lbcCur has the current line break
+ *                        class; and \a lbpCtx->lbcNew has the line
+ *                        break class for the next character
+ * @post                  \a lbpCtx->lbcCur has the updated line break
+ *                        class
+ * @return                break result, one of #LINEBREAK_MUSTBREAK,
+ *                        #LINEBREAK_ALLOWBREAK, and #LINEBREAK_NOBREAK
+ *                        if identified; or #LINEBREAK_UNDEFINED if
+ *                        table lookup is needed
+ */
+static int get_lb_result_simple(
+        struct LineBreakContext* lbpCtx)
+{
+    if (lbpCtx->lbcCur == LBP_BK
+        || (lbpCtx->lbcCur == LBP_CR && lbpCtx->lbcNew != LBP_LF))
+    {
+        return LINEBREAK_MUSTBREAK;     /* Rules LB4 and LB5 */
+    }
+
+    switch (lbpCtx->lbcNew)
+    {
+    case LBP_SP:
+        return LINEBREAK_NOBREAK;       /* Rule LB7; no change to lbcCur */
+    case LBP_BK:
+    case LBP_LF:
+    case LBP_NL:
+        lbpCtx->lbcCur = LBP_BK;        /* Mandatory break after */
+        return LINEBREAK_NOBREAK;       /* Rule LB6 */
+    case LBP_CR:
+        lbpCtx->lbcCur = LBP_CR;
+        return LINEBREAK_NOBREAK;       /* Rule LB6 */
+    case LBP_CB:
+        lbpCtx->lbcCur = LBP_BA;
+        return LINEBREAK_ALLOWBREAK;    /* Rule LB20 */
+    default:
+        return LINEBREAK_UNDEFINED;     /* Table lookup is needed */
+    }
+}
+
+/**
+ * Tells the line break opportunity by table lookup.
+ *
+ * @param[in,out] lbpCtx  pointer to the line breaking context
+ * @pre                   \a lbpCtx->lbcCur has the current line break
+ *                        class; \a lbpCtx->lbcLast has the line break
+ *                        class for the last character; and \a
+ *                        lbcCur->lbcNew has the line break class for
+ *                        the next character
+ * @post                  \a lbpCtx->lbcCur has the updated line break
+ *                        class
+ * @return                break result, one of #LINEBREAK_MUSTBREAK,
+ *                        #LINEBREAK_ALLOWBREAK, and #LINEBREAK_NOBREAK
+ */
+static int get_lb_result_lookup(
+        struct LineBreakContext* lbpCtx)
+{
+    /* TODO: Rule LB21a, as introduced by Revision 28 of UAX#14, is not
+     * yet implemented below. */
+    int brk = LINEBREAK_UNDEFINED;
+    assert(lbpCtx->lbcCur <= LBP_RI);
+    assert(lbpCtx->lbcNew <= LBP_RI);
+    switch (baTable[lbpCtx->lbcCur - 1][lbpCtx->lbcNew - 1])
+    {
+    case DIR_BRK:
+        brk = LINEBREAK_ALLOWBREAK;
+        break;
+    case CMI_BRK:
+    case IND_BRK:
+        brk = (lbpCtx->lbcLast == LBP_SP)
+            ? LINEBREAK_ALLOWBREAK
+            : LINEBREAK_NOBREAK;
+        break;
+    case CMP_BRK:
+        brk = LINEBREAK_NOBREAK;
+        if (lbpCtx->lbcLast != LBP_SP)
+            return brk;                 /* Do not update lbcCur */
+        break;
+    case PRH_BRK:
+        brk = LINEBREAK_NOBREAK;
+        break;
+    }
+    lbpCtx->lbcCur = lbpCtx->lbcNew;
+    return brk;
+}
+
+/**
+ * Initializes line breaking context for a given language.
+ *
+ * @param[in,out] lbpCtx  pointer to the line breaking context
+ * @param[in]     ch      the first character to process
+ * @param[in]     lang    language of the input
+ * @post                  the line breaking context is initialized
+ */
+void lb_init_break_context(
+        struct LineBreakContext* lbpCtx,
+        utf32_t ch,
+        const char* lang)
+{
+    lbpCtx->lang = lang;
+    lbpCtx->lbpLang = get_lb_prop_lang(lang);
+    lbpCtx->lbcLast = LBP_Undefined;
+    lbpCtx->lbcNew = LBP_Undefined;
+    lbpCtx->lbcCur = resolve_lb_class(
+                        get_char_lb_class_lang(ch, lbpCtx->lbpLang),
+                        lbpCtx->lang);
+    treat_first_char(lbpCtx);
+}
+
+/**
+ * Updates LineBreakingContext for the next code point and returns
+ * the detected break.
+ *
+ * @param[in,out] lbpCtx  pointer to the line breaking context
+ * @param[in]     ch      Unicode code point
+ * @return                break result, one of #LINEBREAK_MUSTBREAK,
+ *                        #LINEBREAK_ALLOWBREAK, and #LINEBREAK_NOBREAK
+ * @post                  the line breaking context is updated
+ */
+int lb_process_next_char(
+        struct LineBreakContext* lbpCtx,
+        utf32_t ch )
 {
-       switch (lbc)
-       {
-       case LBP_AI:
-               if (lang != NULL &&
-                               (strncmp(lang, "zh", 2) == 0 || /* Chinese */
-                                strncmp(lang, "ja", 2) == 0 || /* Japanese */
-                                strncmp(lang, "ko", 2) == 0))  /* Korean */
-               {
-                       return LBP_ID;
-               }
-               /* Fall through */
-       case LBP_SA:
-       case LBP_SG:
-       case LBP_XX:
-               return LBP_AL;
-       default:
-               return lbc;
-       }
+    int brk;
+
+    lbpCtx->lbcLast = lbpCtx->lbcNew;
+    lbpCtx->lbcNew = get_char_lb_class_lang(ch, lbpCtx->lbpLang);
+    brk = get_lb_result_simple(lbpCtx);
+    switch (brk)
+    {
+    case LINEBREAK_MUSTBREAK:
+        lbpCtx->lbcCur = resolve_lb_class(lbpCtx->lbcNew, lbpCtx->lang);
+        treat_first_char(lbpCtx);
+        break;
+    case LINEBREAK_UNDEFINED:
+        lbpCtx->lbcNew = resolve_lb_class(lbpCtx->lbcNew, lbpCtx->lang);
+        brk = get_lb_result_lookup(lbpCtx);
+        break;
+    default:
+        break;
+    }
+    return brk;
 }
 
 /**
@@ -394,59 +622,59 @@ static enum LineBreakClass resolve_lb_class(
  * be advanced to the next complete character, unless the end of string
  * is reached in the middle of a UTF-8 sequence.
  *
- * @param[in]     s            input UTF-8 string
- * @param[in]     len  length of the string in bytes
- * @param[in,out] ip   pointer to the index
- * @return                             the Unicode character beginning at the index; or
- *                                             #EOS if end of input is encountered
+ * @param[in]     s    input UTF-8 string
+ * @param[in]     len  length of the string in bytes
+ * @param[in,out] ip   pointer to the index
+ * @return             the Unicode character beginning at the index; or
+ *                     #EOS if end of input is encountered
  */
 utf32_t lb_get_next_char_utf8(
-               const utf8_t *s,
-               size_t len,
-               size_t *ip)
+        const utf8_t *s,
+        size_t len,
+        size_t *ip)
 {
-       utf8_t ch;
-       utf32_t res;
-
-       assert(*ip <= len);
-       if (*ip == len)
-               return EOS;
-       ch = s[*ip];
-
-       if (ch < 0xC2 || ch > 0xF4)
-       {       /* One-byte sequence, tail (should not occur), or invalid */
-               *ip += 1;
-               return ch;
-       }
-       else if (ch < 0xE0)
-       {       /* Two-byte sequence */
-               if (*ip + 2 > len)
-                       return EOS;
-               res = ((ch & 0x1F) << 6) + (s[*ip + 1] & 0x3F);
-               *ip += 2;
-               return res;
-       }
-       else if (ch < 0xF0)
-       {       /* Three-byte sequence */
-               if (*ip + 3 > len)
-                       return EOS;
-               res = ((ch & 0x0F) << 12) +
-                         ((s[*ip + 1] & 0x3F) << 6) +
-                         ((s[*ip + 2] & 0x3F));
-               *ip += 3;
-               return res;
-       }
-       else
-       {       /* Four-byte sequence */
-               if (*ip + 4 > len)
-                       return EOS;
-               res = ((ch & 0x07) << 18) +
-                         ((s[*ip + 1] & 0x3F) << 12) +
-                         ((s[*ip + 2] & 0x3F) << 6) +
-                         ((s[*ip + 3] & 0x3F));
-               *ip += 4;
-               return res;
-       }
+    utf8_t ch;
+    utf32_t res;
+
+    assert(*ip <= len);
+    if (*ip == len)
+        return EOS;
+    ch = s[*ip];
+
+    if (ch < 0xC2 || ch > 0xF4)
+    {   /* One-byte sequence, tail (should not occur), or invalid */
+        *ip += 1;
+        return ch;
+    }
+    else if (ch < 0xE0)
+    {   /* Two-byte sequence */
+        if (*ip + 2 > len)
+            return EOS;
+        res = ((ch & 0x1F) << 6) + (s[*ip + 1] & 0x3F);
+        *ip += 2;
+        return res;
+    }
+    else if (ch < 0xF0)
+    {   /* Three-byte sequence */
+        if (*ip + 3 > len)
+            return EOS;
+        res = ((ch & 0x0F) << 12) +
+              ((s[*ip + 1] & 0x3F) << 6) +
+              ((s[*ip + 2] & 0x3F));
+        *ip += 3;
+        return res;
+    }
+    else
+    {   /* Four-byte sequence */
+        if (*ip + 4 > len)
+            return EOS;
+        res = ((ch & 0x07) << 18) +
+              ((s[*ip + 1] & 0x3F) << 12) +
+              ((s[*ip + 2] & 0x3F) << 6) +
+              ((s[*ip + 3] & 0x3F));
+        *ip += 4;
+        return res;
+    }
 }
 
 /**
@@ -454,371 +682,174 @@ utf32_t lb_get_next_char_utf8(
  * be advanced to the next complete character, unless the end of string
  * is reached in the middle of a UTF-16 surrogate pair.
  *
- * @param[in]     s            input UTF-16 string
- * @param[in]     len  length of the string in words
- * @param[in,out] ip   pointer to the index
- * @return                             the Unicode character beginning at the index; or
- *                                             #EOS if end of input is encountered
+ * @param[in]     s    input UTF-16 string
+ * @param[in]     len  length of the string in words
+ * @param[in,out] ip   pointer to the index
+ * @return             the Unicode character beginning at the index; or
+ *                     #EOS if end of input is encountered
  */
 utf32_t lb_get_next_char_utf16(
-               const utf16_t *s,
-               size_t len,
-               size_t *ip)
+        const utf16_t *s,
+        size_t len,
+        size_t *ip)
 {
-       utf16_t ch;
-
-       assert(*ip <= len);
-       if (*ip == len)
-               return EOS;
-       ch = s[(*ip)++];
-
-       if (ch < 0xD800 || ch > 0xDBFF)
-       {       /* If the character is not a high surrogate */
-               return ch;
-       }
-       if (*ip == len)
-       {       /* If the input ends here (an error) */
-               --(*ip);
-               return EOS;
-       }
-       if (s[*ip] < 0xDC00 || s[*ip] > 0xDFFF)
-       {       /* If the next character is not the low surrogate (an error) */
-               return ch;
-       }
-       /* Return the constructed character and advance the index again */
-       return (((utf32_t)ch & 0x3FF) << 10) + (s[(*ip)++] & 0x3FF) + 0x10000;
+    utf16_t ch;
+
+    assert(*ip <= len);
+    if (*ip == len)
+        return EOS;
+    ch = s[(*ip)++];
+
+    if (ch < 0xD800 || ch > 0xDBFF)
+    {   /* If the character is not a high surrogate */
+        return ch;
+    }
+    if (*ip == len)
+    {   /* If the input ends here (an error) */
+        --(*ip);
+        return EOS;
+    }
+    if (s[*ip] < 0xDC00 || s[*ip] > 0xDFFF)
+    {   /* If the next character is not the low surrogate (an error) */
+        return ch;
+    }
+    /* Return the constructed character and advance the index again */
+    return (((utf32_t)ch & 0x3FF) << 10) + (s[(*ip)++] & 0x3FF) + 0x10000;
 }
 
 /**
  * Gets the next Unicode character in a UTF-32 sequence.  The index will
  * be advanced to the next character.
  *
- * @param[in]     s            input UTF-32 string
- * @param[in]     len  length of the string in dwords
- * @param[in,out] ip   pointer to the index
- * @return                             the Unicode character beginning at the index; or
- *                                             #EOS if end of input is encountered
+ * @param[in]     s    input UTF-32 string
+ * @param[in]     len  length of the string in dwords
+ * @param[in,out] ip   pointer to the index
+ * @return             the Unicode character beginning at the index; or
+ *                     #EOS if end of input is encountered
  */
 utf32_t lb_get_next_char_utf32(
-               const utf32_t *s,
-               size_t len,
-               size_t *ip)
+        const utf32_t *s,
+        size_t len,
+        size_t *ip)
 {
-       assert(*ip <= len);
-       if (*ip == len)
-               return EOS;
-       return s[(*ip)++];
+    assert(*ip <= len);
+    if (*ip == len)
+        return EOS;
+    return s[(*ip)++];
 }
 
 /**
  * Sets the line breaking information for a generic input string.
  *
- * @param[in]  s                       input string
- * @param[in]  len                     length of the input
- * @param[in]  lang                    language of the input
- * @param[out] brks                    pointer to the output breaking data,
- *                                                     containing #LINEBREAK_MUSTBREAK,
- *                                                     #LINEBREAK_ALLOWBREAK, #LINEBREAK_NOBREAK,
- *                                                     or #LINEBREAK_INSIDEACHAR
- * @param[in] get_next_char    function to get the next UTF-32 character
+ * @param[in]  s             input string
+ * @param[in]  len           length of the input
+ * @param[in]  lang          language of the input
+ * @param[out] brks          pointer to the output breaking data,
+ *                           containing #LINEBREAK_MUSTBREAK,
+ *                           #LINEBREAK_ALLOWBREAK, #LINEBREAK_NOBREAK,
+ *                           or #LINEBREAK_INSIDEACHAR
+ * @param[in] get_next_char  function to get the next UTF-32 character
  */
 void set_linebreaks(
-               const void *s,
-               size_t len,
-               const char *lang,
-               char *brks,
-               get_next_char_t get_next_char)
+        const void *s,
+        size_t len,
+        const char *lang,
+        char *brks,
+        get_next_char_t get_next_char)
 {
-       utf32_t ch;
-       enum LineBreakClass lbcCur;
-       enum LineBreakClass lbcNew;
-       enum LineBreakClass lbcLast;
-       struct LineBreakProperties *lbpLang;
-       size_t posCur = 0;
-       size_t posLast = 0;
-       // TIZEN ONLY : (2013.08.19) for special processing at Zero-width space character
-       int zw_flag = 0;
-       //
-
-       --posLast;      /* To be ++'d later */
-       ch = get_next_char(s, len, &posCur);
-       if (ch == EOS)
-               return;
-       lbpLang = get_lb_prop_lang(lang);
-       lbcCur = resolve_lb_class(get_char_lb_class_lang(ch, lbpLang), lang);
-       lbcNew = LBP_Undefined;
-
-nextline:
-
-       /* Special treatment for the first character */
-       switch (lbcCur)
-       {
-       case LBP_LF:
-       case LBP_NL:
-               lbcCur = LBP_BK;
-               break;
-       case LBP_CB:
-               lbcCur = LBP_BA;
-               break;
-       case LBP_SP:
-               lbcCur = LBP_WJ;
-               break;
-       default:
-               break;
-       }
-
-       /* Process a line till an explicit break or end of string */
-       for (;;)
-       {
-               for (++posLast; posLast < posCur - 1; ++posLast)
-               {
-                       brks[posLast] = LINEBREAK_INSIDEACHAR;
-               }
-               assert(posLast == posCur - 1);
-               lbcLast = lbcNew;
-               ch = get_next_char(s, len, &posCur);
-               if (ch == EOS)
-                       break;
-               lbcNew = get_char_lb_class_lang(ch, lbpLang);
-               if (lbcCur == LBP_BK || (lbcCur == LBP_CR && lbcNew != LBP_LF))
-               {
-                       brks[posLast] = LINEBREAK_MUSTBREAK;
-                       lbcCur = resolve_lb_class(lbcNew, lang);
-                       goto nextline;
-               }
-
-               // TIZEN ONLY : (2013.08.19) for special processing at Zero-width space character
-               /*
-               switch (lbcNew)
-               {
-               case LBP_SP:
-                       brks[posLast] = LINEBREAK_NOBREAK;
-                       continue;
-               case LBP_BK:
-               case LBP_LF:
-               case LBP_NL:
-                       brks[posLast] = LINEBREAK_NOBREAK;
-                       lbcCur = LBP_BK;
-                       continue;
-               case LBP_CR:
-                       brks[posLast] = LINEBREAK_NOBREAK;
-                       lbcCur = LBP_CR;
-                       continue;
-               case LBP_CB:
-                       brks[posLast] = LINEBREAK_ALLOWBREAK;
-                       lbcCur = LBP_BA;
-                       continue;
-               default:
-                       break;
-               }
-
-               lbcNew = resolve_lb_class(lbcNew, lang);
-
-               assert(lbcCur <= LBP_JT);
-               assert(lbcNew <= LBP_JT);
-               switch (baTable[lbcCur - 1][lbcNew - 1])
-               {
-               case DIR_BRK:
-                       brks[posLast] = LINEBREAK_ALLOWBREAK;
-                       break;
-               case CMI_BRK:
-               case IND_BRK:
-                       if (lbcLast == LBP_SP)
-                       {
-                               brks[posLast] = LINEBREAK_ALLOWBREAK;
-                       }
-                       else
-                       {
-                               brks[posLast] = LINEBREAK_NOBREAK;
-                       }
-                       break;
-               case CMP_BRK:
-                       brks[posLast] = LINEBREAK_NOBREAK;
-                       if (lbcLast != LBP_SP)
-                               continue;
-                       break;
-               case PRH_BRK:
-                       brks[posLast] = LINEBREAK_NOBREAK;
-                       break;
-               }
-
-               lbcCur = lbcNew;
-               */
-
-               // TIZEN ONLY - START
-               if (lbcCur == LBP_ZW && !zw_flag)
-               {
-                       zw_flag = 1;
-                       posLast = -1;
-                       posCur = 0;
-                       ch = get_next_char(s, len, &posCur);
-                       lbcCur = resolve_lb_class(get_char_lb_class_lang(ch, lbpLang), lang);
-                       lbcNew = LBP_Undefined;
-                       goto nextline;
-               }
-               else if (zw_flag)
-               {
-                       if (lbcCur == LBP_ZW)
-                               brks[posLast] = LINEBREAK_ALLOWBREAK;
-                       else
-                               brks[posLast] = LINEBREAK_NOBREAK;
-                       lbcCur = lbcNew;
-               }
-               else
-               {
-                       // TIZEN ONLY(20131106): For Hangul word wrap
-                       switch (lbcCur)
-                       {
-                               case LBP_H2:                    /**< Hangul LV */
-                               case LBP_H3:                    /**< Hangul LVT */
-                               case LBP_JL:                    /**< Hangul L Jamo */
-                               case LBP_JV:                    /**< Hangul V Jamo */
-                               case LBP_JT:                    /**< Hangul T Jamo */
-                                       lbcCur = LBP_AL;
-                                       break;
-                               default:
-                                       break;
-                       }
-
-                       switch (lbcNew)
-                       {
-                               case LBP_H2:                    /**< Hangul LV */
-                               case LBP_H3:                    /**< Hangul LVT */
-                               case LBP_JL:                    /**< Hangul L Jamo */
-                               case LBP_JV:                    /**< Hangul V Jamo */
-                               case LBP_JT:                    /**< Hangul T Jamo */
-                                       lbcNew = LBP_AL;
-                                       break;
-                               default:
-                                       break;
-                       }
-                       //
-
-                       switch (lbcNew)
-                       {
-                               case LBP_SP:
-                                       brks[posLast] = LINEBREAK_NOBREAK;
-                                       continue;
-                               case LBP_BK:
-                               case LBP_LF:
-                               case LBP_NL:
-                                       brks[posLast] = LINEBREAK_NOBREAK;
-                                       lbcCur = LBP_BK;
-                                       continue;
-                               case LBP_CR:
-                                       brks[posLast] = LINEBREAK_NOBREAK;
-                                       lbcCur = LBP_CR;
-                                       continue;
-                               case LBP_CB:
-                                       brks[posLast] = LINEBREAK_ALLOWBREAK;
-                                       lbcCur = LBP_BA;
-                                       continue;
-                               default:
-                                       break;
-                       }
-
-                       lbcNew = resolve_lb_class(lbcNew, lang);
-
-                       assert(lbcCur <= LBP_JT);
-                       assert(lbcNew <= LBP_JT);
-                       switch (baTable[lbcCur - 1][lbcNew - 1])
-                       {
-                               case DIR_BRK:
-                                       brks[posLast] = LINEBREAK_ALLOWBREAK;
-                                       break;
-                               case CMI_BRK:
-                               case IND_BRK:
-                                       if (lbcLast == LBP_SP)
-                                       {
-                                               brks[posLast] = LINEBREAK_ALLOWBREAK;
-                                       }
-                                       else
-                                       {
-                                               brks[posLast] = LINEBREAK_NOBREAK;
-                                       }
-                                       break;
-                               case CMP_BRK:
-                                       brks[posLast] = LINEBREAK_NOBREAK;
-                                       if (lbcLast != LBP_SP)
-                                               continue;
-                                       break;
-                               case PRH_BRK:
-                                       brks[posLast] = LINEBREAK_NOBREAK;
-                                       break;
-                       }
-                       lbcCur = lbcNew;
-               }
-               // TIZEN ONLY - END
-       }
-
-       assert(posLast == posCur - 1 && posCur <= len);
-       /* Break after the last character */
-       brks[posLast] = LINEBREAK_MUSTBREAK;
-       /* When the input contains incomplete sequences */
-       while (posCur < len)
-       {
-               brks[posCur++] = LINEBREAK_INSIDEACHAR;
-       }
+    utf32_t ch;
+    struct LineBreakContext lbCtx;
+    size_t posCur = 0;
+    size_t posLast = 0;
+
+    --posLast;  /* To be ++'d later */
+    ch = get_next_char(s, len, &posCur);
+    if (ch == EOS)
+        return;
+    lb_init_break_context(&lbCtx, ch, lang);
+
+    /* Process a line till an explicit break or end of string */
+    for (;;)
+    {
+        for (++posLast; posLast < posCur - 1; ++posLast)
+        {
+            brks[posLast] = LINEBREAK_INSIDEACHAR;
+        }
+        assert(posLast == posCur - 1);
+        ch = get_next_char(s, len, &posCur);
+        if (ch == EOS)
+            break;
+        brks[posLast] = lb_process_next_char(&lbCtx, ch);
+    }
+
+    assert(posLast == posCur - 1 && posCur <= len);
+    /* Break after the last character */
+    brks[posLast] = LINEBREAK_MUSTBREAK;
+    /* When the input contains incomplete sequences */
+    while (posCur < len)
+    {
+        brks[posCur++] = LINEBREAK_INSIDEACHAR;
+    }
 }
 
 /**
  * Sets the line breaking information for a UTF-8 input string.
  *
- * @param[in]  s       input UTF-8 string
- * @param[in]  len     length of the input
- * @param[in]  lang    language of the input
- * @param[out] brks    pointer to the output breaking data, containing
- *                                     #LINEBREAK_MUSTBREAK, #LINEBREAK_ALLOWBREAK,
- *                                     #LINEBREAK_NOBREAK, or #LINEBREAK_INSIDEACHAR
+ * @param[in]  s     input UTF-8 string
+ * @param[in]  len   length of the input
+ * @param[in]  lang  language of the input
+ * @param[out] brks  pointer to the output breaking data, containing
+ *                   #LINEBREAK_MUSTBREAK, #LINEBREAK_ALLOWBREAK,
+ *                   #LINEBREAK_NOBREAK, or #LINEBREAK_INSIDEACHAR
  */
 void set_linebreaks_utf8(
-               const utf8_t *s,
-               size_t len,
-               const char *lang,
-               char *brks)
+        const utf8_t *s,
+        size_t len,
+        const char *lang,
+        char *brks)
 {
-       set_linebreaks(s, len, lang, brks,
-                                  (get_next_char_t)lb_get_next_char_utf8);
+    set_linebreaks(s, len, lang, brks,
+                   (get_next_char_t)lb_get_next_char_utf8);
 }
 
 /**
  * Sets the line breaking information for a UTF-16 input string.
  *
- * @param[in]  s       input UTF-16 string
- * @param[in]  len     length of the input
- * @param[in]  lang    language of the input
- * @param[out] brks    pointer to the output breaking data, containing
- *                                     #LINEBREAK_MUSTBREAK, #LINEBREAK_ALLOWBREAK,
- *                                     #LINEBREAK_NOBREAK, or #LINEBREAK_INSIDEACHAR
+ * @param[in]  s     input UTF-16 string
+ * @param[in]  len   length of the input
+ * @param[in]  lang  language of the input
+ * @param[out] brks  pointer to the output breaking data, containing
+ *                   #LINEBREAK_MUSTBREAK, #LINEBREAK_ALLOWBREAK,
+ *                   #LINEBREAK_NOBREAK, or #LINEBREAK_INSIDEACHAR
  */
 void set_linebreaks_utf16(
-               const utf16_t *s,
-               size_t len,
-               const char *lang,
-               char *brks)
+        const utf16_t *s,
+        size_t len,
+        const char *lang,
+        char *brks)
 {
-       set_linebreaks(s, len, lang, brks,
-                                  (get_next_char_t)lb_get_next_char_utf16);
+    set_linebreaks(s, len, lang, brks,
+                   (get_next_char_t)lb_get_next_char_utf16);
 }
 
 /**
  * Sets the line breaking information for a UTF-32 input string.
  *
- * @param[in]  s       input UTF-32 string
- * @param[in]  len     length of the input
- * @param[in]  lang    language of the input
- * @param[out] brks    pointer to the output breaking data, containing
- *                                     #LINEBREAK_MUSTBREAK, #LINEBREAK_ALLOWBREAK,
- *                                     #LINEBREAK_NOBREAK, or #LINEBREAK_INSIDEACHAR
+ * @param[in]  s     input UTF-32 string
+ * @param[in]  len   length of the input
+ * @param[in]  lang  language of the input
+ * @param[out] brks  pointer to the output breaking data, containing
+ *                   #LINEBREAK_MUSTBREAK, #LINEBREAK_ALLOWBREAK,
+ *                   #LINEBREAK_NOBREAK, or #LINEBREAK_INSIDEACHAR
  */
 void set_linebreaks_utf32(
-               const utf32_t *s,
-               size_t len,
-               const char *lang,
-               char *brks)
+        const utf32_t *s,
+        size_t len,
+        const char *lang,
+        char *brks)
 {
-       set_linebreaks(s, len, lang, brks,
-                                  (get_next_char_t)lb_get_next_char_utf32);
+    set_linebreaks(s, len, lang, brks,
+                   (get_next_char_t)lb_get_next_char_utf32);
 }
 
 /**
@@ -828,21 +859,21 @@ void set_linebreaks_utf32(
  * complicated cases involving combining marks, spaces, etc. cannot be
  * correctly processed.
  *
- * @param char1 the first Unicode character
- * @param char2 the second Unicode character
- * @param lang  language of the input
- * @return      one of #LINEBREAK_MUSTBREAK, #LINEBREAK_ALLOWBREAK,
- *                             #LINEBREAK_NOBREAK, or #LINEBREAK_INSIDEACHAR
+ * @param char1  the first Unicode character
+ * @param char2  the second Unicode character
+ * @param lang   language of the input
+ * @return       one of #LINEBREAK_MUSTBREAK, #LINEBREAK_ALLOWBREAK,
+ *               #LINEBREAK_NOBREAK, or #LINEBREAK_INSIDEACHAR
  */
 int is_line_breakable(
-               utf32_t char1,
-               utf32_t char2,
-               const char* lang)
+        utf32_t char1,
+        utf32_t char2,
+        const char* lang)
 {
-       utf32_t s[2];
-       char brks[2];
-       s[0] = char1;
-       s[1] = char2;
-       set_linebreaks_utf32(s, 2, lang, brks);
-       return brks[0];
+    utf32_t s[2];
+    char brks[2];
+    s[0] = char1;
+    s[1] = char2;
+    set_linebreaks_utf32(s, 2, lang, brks);
+    return brks[0];
 }
index abc1ae9..94fbca0 100644 (file)
@@ -1,10 +1,10 @@
-/* vim: set tabstop=4 shiftwidth=4: */
+/* vim: set expandtab tabstop=4 softtabstop=4 shiftwidth=4: */
 
 /*
  * Line breaking in a Unicode sequence.  Designed to be used in a
  * generic text renderer.
  *
- * Copyright (C) 2008-2010 Wu Yongwei <wuyongwei at gmail dot com>
+ * Copyright (C) 2008-2012 Wu Yongwei <wuyongwei at gmail dot com>
  *
  * This software is provided 'as-is', without any express or implied
  * warranty.  In no event will the author be held liable for any damages
  *    distribution.
  *
  * The main reference is Unicode Standard Annex 14 (UAX #14):
- *             <URL:http://www.unicode.org/reports/tr14/>
+ *      <URL:http://www.unicode.org/reports/tr14/>
  *
  * When this library was designed, this annex was at Revision 19, for
  * Unicode 5.0.0:
- *             <URL:http://www.unicode.org/reports/tr14/tr14-19.html>
+ *      <URL:http://www.unicode.org/reports/tr14/tr14-19.html>
  *
- * This library has been updated according to Revision 24, for
- * Unicode 5.2.0:
- *             <URL:http://www.unicode.org/reports/tr14/tr14-24.html>
+ * This library has been updated according to Revision 30, for
+ * Unicode 6.2.0:
+ *      <URL:http://www.unicode.org/reports/tr14/tr14-30.html>
  *
  * The Unicode Terms of Use are available at
- *             <URL:http://www.unicode.org/copyright.html>
+ *      <URL:http://www.unicode.org/copyright.html>
  */
 
 /**
- * @file       linebreak.h
+ * @file    linebreak.h
  *
  * Header file for the line breaking algorithm.
  *
- * @version    2.0, 2010/01/03
- * @author     Wu Yongwei
+ * @version 2.2, 2012/10/06
+ * @author  Wu Yongwei
  */
 
 #ifndef LINEBREAK_H
 extern "C" {
 #endif
 
-#define LINEBREAK_VERSION      0x0200  /**< Version of the library linebreak */
+#define LINEBREAK_VERSION   0x0202  /**< Version of the library linebreak */
 extern const int linebreak_version;
 
 #ifndef LINEBREAK_UTF_TYPES_DEFINED
 #define LINEBREAK_UTF_TYPES_DEFINED
-typedef unsigned char  utf8_t;         /**< Type for UTF-8 data points */
-typedef unsigned short utf16_t;        /**< Type for UTF-16 data points */
-typedef unsigned int   utf32_t;        /**< Type for UTF-32 data points */
+typedef unsigned char   utf8_t;     /**< Type for UTF-8 data points */
+typedef unsigned short  utf16_t;    /**< Type for UTF-16 data points */
+typedef unsigned int    utf32_t;    /**< Type for UTF-32 data points */
 #endif
 
-#define LINEBREAK_MUSTBREAK            0       /**< Break is mandatory */
-#define LINEBREAK_ALLOWBREAK   1       /**< Break is allowed */
-#define LINEBREAK_NOBREAK              2       /**< No break is possible */
-#define LINEBREAK_INSIDEACHAR  3       /**< A UTF-8/16 sequence is unfinished */
+#define LINEBREAK_MUSTBREAK     0   /**< Break is mandatory */
+#define LINEBREAK_ALLOWBREAK    1   /**< Break is allowed */
+#define LINEBREAK_NOBREAK       2   /**< No break is possible */
+#define LINEBREAK_INSIDEACHAR   3   /**< A UTF-8/16 sequence is unfinished */
 
 void init_linebreak(void);
 void set_linebreaks_utf8(
-               const utf8_t *s, size_t len, const char* lang, char *brks);
+        const utf8_t *s, size_t len, const char* lang, char *brks);
 void set_linebreaks_utf16(
-               const utf16_t *s, size_t len, const char* lang, char *brks);
+        const utf16_t *s, size_t len, const char* lang, char *brks);
 void set_linebreaks_utf32(
-               const utf32_t *s, size_t len, const char* lang, char *brks);
+        const utf32_t *s, size_t len, const char* lang, char *brks);
 int is_line_breakable(utf32_t char1, utf32_t char2, const char* lang);
 
 #ifdef __cplusplus
index 0021479..1038a14 100644 (file)
@@ -1,6 +1,6 @@
 /* The content of this file is generated from:
-# LineBreak-6.0.0.txt
-# Date: 2010-08-18, 17:25:00 PDT [KW]
+# LineBreak-7.0.0.txt
+# Date: 2014-02-28, 23:15:00 GMT [KW, LI]
 */
 
 #include "linebreak.h"
@@ -93,11 +93,13 @@ struct LineBreakProperties lb_prop_default[] = {
        { 0x0363, 0x036F, LBP_CM },
        { 0x0370, 0x037D, LBP_AL },
        { 0x037E, 0x037E, LBP_IS },
-       { 0x0384, 0x0482, LBP_AL },
+       { 0x037F, 0x0482, LBP_AL },
        { 0x0483, 0x0489, LBP_CM },
        { 0x048A, 0x0587, LBP_AL },
        { 0x0589, 0x0589, LBP_IS },
        { 0x058A, 0x058A, LBP_BA },
+       { 0x058D, 0x058E, LBP_AL },
+       { 0x058F, 0x058F, LBP_PR },
        { 0x0591, 0x05BD, LBP_CM },
        { 0x05BE, 0x05BE, LBP_BA },
        { 0x05BF, 0x05BF, LBP_CM },
@@ -107,12 +109,15 @@ struct LineBreakProperties lb_prop_default[] = {
        { 0x05C4, 0x05C5, LBP_CM },
        { 0x05C6, 0x05C6, LBP_EX },
        { 0x05C7, 0x05C7, LBP_CM },
-       { 0x05D0, 0x0608, LBP_AL },
+       { 0x05D0, 0x05F2, LBP_HL },
+       { 0x05F3, 0x0608, LBP_AL },
        { 0x0609, 0x060B, LBP_PO },
        { 0x060C, 0x060D, LBP_IS },
        { 0x060E, 0x060F, LBP_AL },
        { 0x0610, 0x061A, LBP_CM },
-       { 0x061B, 0x061F, LBP_EX },
+       { 0x061B, 0x061B, LBP_EX },
+       { 0x061C, 0x061C, LBP_CM },
+       { 0x061E, 0x061F, LBP_EX },
        { 0x0620, 0x064A, LBP_AL },
        { 0x064B, 0x065F, LBP_CM },
        { 0x0660, 0x0669, LBP_NU },
@@ -155,8 +160,8 @@ struct LineBreakProperties lb_prop_default[] = {
        { 0x0829, 0x082D, LBP_CM },
        { 0x0830, 0x0858, LBP_AL },
        { 0x0859, 0x085B, LBP_CM },
-       { 0x085E, 0x085E, LBP_AL },
-       { 0x0900, 0x0903, LBP_CM },
+       { 0x085E, 0x08B2, LBP_AL },
+       { 0x08E4, 0x0903, LBP_CM },
        { 0x0904, 0x0939, LBP_AL },
        { 0x093A, 0x093C, LBP_CM },
        { 0x093D, 0x093D, LBP_AL },
@@ -167,7 +172,7 @@ struct LineBreakProperties lb_prop_default[] = {
        { 0x0962, 0x0963, LBP_CM },
        { 0x0964, 0x0965, LBP_BA },
        { 0x0966, 0x096F, LBP_NU },
-       { 0x0970, 0x097F, LBP_AL },
+       { 0x0970, 0x0980, LBP_AL },
        { 0x0981, 0x0983, LBP_CM },
        { 0x0985, 0x09B9, LBP_AL },
        { 0x09BC, 0x09BC, LBP_CM },
@@ -199,6 +204,7 @@ struct LineBreakProperties lb_prop_default[] = {
        { 0x0AD0, 0x0AE1, LBP_AL },
        { 0x0AE2, 0x0AE3, LBP_CM },
        { 0x0AE6, 0x0AEF, LBP_NU },
+       { 0x0AF0, 0x0AF0, LBP_AL },
        { 0x0AF1, 0x0AF1, LBP_PR },
        { 0x0B01, 0x0B03, LBP_CM },
        { 0x0B05, 0x0B39, LBP_AL },
@@ -218,14 +224,14 @@ struct LineBreakProperties lb_prop_default[] = {
        { 0x0BF0, 0x0BF8, LBP_AL },
        { 0x0BF9, 0x0BF9, LBP_PR },
        { 0x0BFA, 0x0BFA, LBP_AL },
-       { 0x0C01, 0x0C03, LBP_CM },
+       { 0x0C00, 0x0C03, LBP_CM },
        { 0x0C05, 0x0C3D, LBP_AL },
        { 0x0C3E, 0x0C56, LBP_CM },
        { 0x0C58, 0x0C61, LBP_AL },
        { 0x0C62, 0x0C63, LBP_CM },
        { 0x0C66, 0x0C6F, LBP_NU },
        { 0x0C78, 0x0C7F, LBP_AL },
-       { 0x0C82, 0x0C83, LBP_CM },
+       { 0x0C81, 0x0C83, LBP_CM },
        { 0x0C85, 0x0CB9, LBP_AL },
        { 0x0CBC, 0x0CBC, LBP_CM },
        { 0x0CBD, 0x0CBD, LBP_AL },
@@ -234,7 +240,7 @@ struct LineBreakProperties lb_prop_default[] = {
        { 0x0CE2, 0x0CE3, LBP_CM },
        { 0x0CE6, 0x0CEF, LBP_NU },
        { 0x0CF1, 0x0CF2, LBP_AL },
-       { 0x0D02, 0x0D03, LBP_CM },
+       { 0x0D01, 0x0D03, LBP_CM },
        { 0x0D05, 0x0D3D, LBP_AL },
        { 0x0D3E, 0x0D4D, LBP_CM },
        { 0x0D4E, 0x0D4E, LBP_AL },
@@ -247,7 +253,9 @@ struct LineBreakProperties lb_prop_default[] = {
        { 0x0D7A, 0x0D7F, LBP_AL },
        { 0x0D82, 0x0D83, LBP_CM },
        { 0x0D85, 0x0DC6, LBP_AL },
-       { 0x0DCA, 0x0DF3, LBP_CM },
+       { 0x0DCA, 0x0DDF, LBP_CM },
+       { 0x0DE6, 0x0DEF, LBP_NU },
+       { 0x0DF2, 0x0DF3, LBP_CM },
        { 0x0DF4, 0x0DF4, LBP_AL },
        { 0x0E01, 0x0E3A, LBP_SA },
        { 0x0E3F, 0x0E3F, LBP_PR },
@@ -257,7 +265,7 @@ struct LineBreakProperties lb_prop_default[] = {
        { 0x0E5A, 0x0E5B, LBP_BA },
        { 0x0E81, 0x0ECD, LBP_SA },
        { 0x0ED0, 0x0ED9, LBP_NU },
-       { 0x0EDC, 0x0EDD, LBP_SA },
+       { 0x0EDC, 0x0EDF, LBP_SA },
        { 0x0F00, 0x0F00, LBP_AL },
        { 0x0F01, 0x0F04, LBP_BB },
        { 0x0F05, 0x0F05, LBP_AL },
@@ -310,7 +318,7 @@ struct LineBreakProperties lb_prop_default[] = {
        { 0x1050, 0x108F, LBP_SA },
        { 0x1090, 0x1099, LBP_NU },
        { 0x109A, 0x109F, LBP_SA },
-       { 0x10A0, 0x10FC, LBP_AL },
+       { 0x10A0, 0x10FF, LBP_AL },
        { 0x1100, 0x115F, LBP_JL },
        { 0x1160, 0x11A7, LBP_JV },
        { 0x11A8, 0x11FF, LBP_JT },
@@ -358,7 +366,7 @@ struct LineBreakProperties lb_prop_default[] = {
        { 0x1810, 0x1819, LBP_NU },
        { 0x1820, 0x18A8, LBP_AL },
        { 0x18A9, 0x18A9, LBP_CM },
-       { 0x18AA, 0x191C, LBP_AL },
+       { 0x18AA, 0x191E, LBP_AL },
        { 0x1920, 0x193B, LBP_CM },
        { 0x1940, 0x1940, LBP_AL },
        { 0x1944, 0x1945, LBP_EX },
@@ -373,7 +381,7 @@ struct LineBreakProperties lb_prop_default[] = {
        { 0x1A7F, 0x1A7F, LBP_CM },
        { 0x1A80, 0x1A99, LBP_NU },
        { 0x1AA0, 0x1AAD, LBP_SA },
-       { 0x1B00, 0x1B04, LBP_CM },
+       { 0x1AB0, 0x1B04, LBP_CM },
        { 0x1B05, 0x1B33, LBP_AL },
        { 0x1B34, 0x1B44, LBP_CM },
        { 0x1B45, 0x1B4B, LBP_AL },
@@ -386,10 +394,10 @@ struct LineBreakProperties lb_prop_default[] = {
        { 0x1B74, 0x1B7C, LBP_AL },
        { 0x1B80, 0x1B82, LBP_CM },
        { 0x1B83, 0x1BA0, LBP_AL },
-       { 0x1BA1, 0x1BAA, LBP_CM },
+       { 0x1BA1, 0x1BAD, LBP_CM },
        { 0x1BAE, 0x1BAF, LBP_AL },
        { 0x1BB0, 0x1BB9, LBP_NU },
-       { 0x1BC0, 0x1BE5, LBP_AL },
+       { 0x1BBA, 0x1BE5, LBP_AL },
        { 0x1BE6, 0x1BF3, LBP_CM },
        { 0x1BFC, 0x1C23, LBP_AL },
        { 0x1C24, 0x1C37, LBP_CM },
@@ -399,13 +407,16 @@ struct LineBreakProperties lb_prop_default[] = {
        { 0x1C50, 0x1C59, LBP_NU },
        { 0x1C5A, 0x1C7D, LBP_AL },
        { 0x1C7E, 0x1C7F, LBP_BA },
+       { 0x1CC0, 0x1CC7, LBP_AL },
        { 0x1CD0, 0x1CD2, LBP_CM },
        { 0x1CD3, 0x1CD3, LBP_AL },
        { 0x1CD4, 0x1CE8, LBP_CM },
        { 0x1CE9, 0x1CEC, LBP_AL },
        { 0x1CED, 0x1CED, LBP_CM },
        { 0x1CEE, 0x1CF1, LBP_AL },
-       { 0x1CF2, 0x1CF2, LBP_CM },
+       { 0x1CF2, 0x1CF4, LBP_CM },
+       { 0x1CF5, 0x1CF6, LBP_AL },
+       { 0x1CF8, 0x1CF9, LBP_CM },
        { 0x1D00, 0x1DBF, LBP_AL },
        { 0x1DC0, 0x1DFF, LBP_CM },
        { 0x1E00, 0x1FFC, LBP_AL },
@@ -452,7 +463,7 @@ struct LineBreakProperties lb_prop_default[] = {
        { 0x205D, 0x205F, LBP_BA },
        { 0x2060, 0x2060, LBP_WJ },
        { 0x2061, 0x2064, LBP_AL },
-       { 0x206A, 0x206F, LBP_CM },
+       { 0x2066, 0x206F, LBP_CM },
        { 0x2070, 0x2071, LBP_AL },
        { 0x2074, 0x2074, LBP_AI },
        { 0x2075, 0x207C, LBP_AL },
@@ -469,7 +480,9 @@ struct LineBreakProperties lb_prop_default[] = {
        { 0x20A7, 0x20A7, LBP_PO },
        { 0x20A8, 0x20B5, LBP_PR },
        { 0x20B6, 0x20B6, LBP_PO },
-       { 0x20B7, 0x20B9, LBP_PR },
+       { 0x20B7, 0x20BA, LBP_PR },
+       { 0x20BB, 0x20BB, LBP_PO },
+       { 0x20BC, 0x20CF, LBP_PR },
        { 0x20D0, 0x20F0, LBP_CM },
        { 0x2100, 0x2102, LBP_AL },
        { 0x2103, 0x2103, LBP_PO },
@@ -558,12 +571,21 @@ struct LineBreakProperties lb_prop_default[] = {
        { 0x22A5, 0x22A5, LBP_AI },
        { 0x22A6, 0x22BE, LBP_AL },
        { 0x22BF, 0x22BF, LBP_AI },
-       { 0x22C0, 0x2311, LBP_AL },
+       { 0x22C0, 0x2307, LBP_AL },
+       { 0x2308, 0x2308, LBP_OP },
+       { 0x2309, 0x2309, LBP_CL },
+       { 0x230A, 0x230A, LBP_OP },
+       { 0x230B, 0x230B, LBP_CL },
+       { 0x230C, 0x2311, LBP_AL },
        { 0x2312, 0x2312, LBP_AI },
-       { 0x2313, 0x2328, LBP_AL },
+       { 0x2313, 0x2319, LBP_AL },
+       { 0x231A, 0x231B, LBP_ID },
+       { 0x231C, 0x2328, LBP_AL },
        { 0x2329, 0x2329, LBP_OP },
        { 0x232A, 0x232A, LBP_CL },
-       { 0x232B, 0x244A, LBP_AL },
+       { 0x232B, 0x23EF, LBP_AL },
+       { 0x23F0, 0x23F3, LBP_ID },
+       { 0x23F4, 0x244A, LBP_AL },
        { 0x2460, 0x24FE, LBP_AI },
        { 0x24FF, 0x24FF, LBP_AL },
        { 0x2500, 0x254B, LBP_AI },
@@ -595,19 +617,23 @@ struct LineBreakProperties lb_prop_default[] = {
        { 0x25E2, 0x25E5, LBP_AI },
        { 0x25E6, 0x25EE, LBP_AL },
        { 0x25EF, 0x25EF, LBP_AI },
-       { 0x25F0, 0x2604, LBP_AL },
+       { 0x25F0, 0x25FF, LBP_AL },
+       { 0x2600, 0x2603, LBP_ID },
+       { 0x2604, 0x2604, LBP_AL },
        { 0x2605, 0x2606, LBP_AI },
        { 0x2607, 0x2608, LBP_AL },
        { 0x2609, 0x2609, LBP_AI },
        { 0x260A, 0x260D, LBP_AL },
        { 0x260E, 0x260F, LBP_AI },
        { 0x2610, 0x2613, LBP_AL },
-       { 0x2614, 0x2617, LBP_AI },
-       { 0x2618, 0x261B, LBP_AL },
-       { 0x261C, 0x261C, LBP_AI },
-       { 0x261D, 0x261D, LBP_AL },
-       { 0x261E, 0x261E, LBP_AI },
-       { 0x261F, 0x263F, LBP_AL },
+       { 0x2614, 0x2615, LBP_ID },
+       { 0x2616, 0x2617, LBP_AI },
+       { 0x2618, 0x2618, LBP_ID },
+       { 0x2619, 0x2619, LBP_AL },
+       { 0x261A, 0x261F, LBP_ID },
+       { 0x2620, 0x2638, LBP_AL },
+       { 0x2639, 0x263B, LBP_ID },
+       { 0x263C, 0x263F, LBP_AL },
        { 0x2640, 0x2640, LBP_AI },
        { 0x2641, 0x2641, LBP_AL },
        { 0x2642, 0x2642, LBP_AI },
@@ -616,28 +642,49 @@ struct LineBreakProperties lb_prop_default[] = {
        { 0x2662, 0x2662, LBP_AL },
        { 0x2663, 0x2665, LBP_AI },
        { 0x2666, 0x2666, LBP_AL },
-       { 0x2667, 0x266A, LBP_AI },
+       { 0x2667, 0x2667, LBP_AI },
+       { 0x2668, 0x2668, LBP_ID },
+       { 0x2669, 0x266A, LBP_AI },
        { 0x266B, 0x266B, LBP_AL },
        { 0x266C, 0x266D, LBP_AI },
        { 0x266E, 0x266E, LBP_AL },
        { 0x266F, 0x266F, LBP_AI },
-       { 0x2670, 0x269D, LBP_AL },
+       { 0x2670, 0x267E, LBP_AL },
+       { 0x267F, 0x267F, LBP_ID },
+       { 0x2680, 0x269D, LBP_AL },
        { 0x269E, 0x269F, LBP_AI },
-       { 0x26A0, 0x26BD, LBP_AL },
-       { 0x26BE, 0x26BF, LBP_AI },
-       { 0x26C0, 0x26C3, LBP_AL },
-       { 0x26C4, 0x26CD, LBP_AI },
+       { 0x26A0, 0x26BC, LBP_AL },
+       { 0x26BD, 0x26C8, LBP_ID },
+       { 0x26C9, 0x26CC, LBP_AI },
+       { 0x26CD, 0x26CD, LBP_ID },
        { 0x26CE, 0x26CE, LBP_AL },
-       { 0x26CF, 0x26E1, LBP_AI },
+       { 0x26CF, 0x26D1, LBP_ID },
+       { 0x26D2, 0x26D2, LBP_AI },
+       { 0x26D3, 0x26D4, LBP_ID },
+       { 0x26D5, 0x26D7, LBP_AI },
+       { 0x26D8, 0x26D9, LBP_ID },
+       { 0x26DA, 0x26DB, LBP_AI },
+       { 0x26DC, 0x26DC, LBP_ID },
+       { 0x26DD, 0x26DE, LBP_AI },
+       { 0x26DF, 0x26E1, LBP_ID },
        { 0x26E2, 0x26E2, LBP_AL },
        { 0x26E3, 0x26E3, LBP_AI },
        { 0x26E4, 0x26E7, LBP_AL },
-       { 0x26E8, 0x26FF, LBP_AI },
-       { 0x2701, 0x2756, LBP_AL },
+       { 0x26E8, 0x26E9, LBP_AI },
+       { 0x26EA, 0x26EA, LBP_ID },
+       { 0x26EB, 0x26F0, LBP_AI },
+       { 0x26F1, 0x26F5, LBP_ID },
+       { 0x26F6, 0x26F6, LBP_AI },
+       { 0x26F7, 0x26FA, LBP_ID },
+       { 0x26FB, 0x26FC, LBP_AI },
+       { 0x26FD, 0x2704, LBP_ID },
+       { 0x2705, 0x2707, LBP_AL },
+       { 0x2708, 0x270D, LBP_ID },
+       { 0x270E, 0x2756, LBP_AL },
        { 0x2757, 0x2757, LBP_AI },
        { 0x2758, 0x275A, LBP_AL },
-       { 0x275B, 0x275E, LBP_QU },
-       { 0x275F, 0x2761, LBP_AL },
+       { 0x275B, 0x2760, LBP_QU },
+       { 0x2761, 0x2761, LBP_AL },
        { 0x2762, 0x2763, LBP_EX },
        { 0x2764, 0x2767, LBP_AL },
        { 0x2768, 0x2768, LBP_OP },
@@ -702,8 +749,9 @@ struct LineBreakProperties lb_prop_default[] = {
        { 0x29FD, 0x29FD, LBP_CL },
        { 0x29FE, 0x2B54, LBP_AL },
        { 0x2B55, 0x2B59, LBP_AI },
-       { 0x2C00, 0x2CEE, LBP_AL },
+       { 0x2B5A, 0x2CEE, LBP_AL },
        { 0x2CEF, 0x2CF1, LBP_CM },
+       { 0x2CF2, 0x2CF3, LBP_AL },
        { 0x2CF9, 0x2CF9, LBP_EX },
        { 0x2CFA, 0x2CFC, LBP_BA },
        { 0x2CFD, 0x2CFD, LBP_AL },
@@ -736,7 +784,16 @@ struct LineBreakProperties lb_prop_default[] = {
        { 0x2E2E, 0x2E2E, LBP_EX },
        { 0x2E2F, 0x2E2F, LBP_AL },
        { 0x2E30, 0x2E31, LBP_BA },
-       { 0x2E80, 0x3000, LBP_ID },
+       { 0x2E32, 0x2E32, LBP_AL },
+       { 0x2E33, 0x2E34, LBP_BA },
+       { 0x2E35, 0x2E39, LBP_AL },
+       { 0x2E3A, 0x2E3B, LBP_B2 },
+       { 0x2E3C, 0x2E3E, LBP_BA },
+       { 0x2E3F, 0x2E3F, LBP_AL },
+       { 0x2E40, 0x2E41, LBP_BA },
+       { 0x2E42, 0x2E42, LBP_OP },
+       { 0x2E80, 0x2FFB, LBP_ID },
+       { 0x3000, 0x3000, LBP_BA },
        { 0x3001, 0x3002, LBP_CL },
        { 0x3003, 0x3004, LBP_ID },
        { 0x3005, 0x3005, LBP_NS },
@@ -765,58 +822,63 @@ struct LineBreakProperties lb_prop_default[] = {
        { 0x301E, 0x301F, LBP_CL },
        { 0x3020, 0x3029, LBP_ID },
        { 0x302A, 0x302F, LBP_CM },
-       { 0x3030, 0x303A, LBP_ID },
+       { 0x3030, 0x3034, LBP_ID },
+       { 0x3035, 0x3035, LBP_CM },
+       { 0x3036, 0x303A, LBP_ID },
        { 0x303B, 0x303C, LBP_NS },
        { 0x303D, 0x303F, LBP_ID },
-       { 0x3041, 0x3041, LBP_NS },
+       { 0x3041, 0x3041, LBP_CJ },
        { 0x3042, 0x3042, LBP_ID },
-       { 0x3043, 0x3043, LBP_NS },
+       { 0x3043, 0x3043, LBP_CJ },
        { 0x3044, 0x3044, LBP_ID },
-       { 0x3045, 0x3045, LBP_NS },
+       { 0x3045, 0x3045, LBP_CJ },
        { 0x3046, 0x3046, LBP_ID },
-       { 0x3047, 0x3047, LBP_NS },
+       { 0x3047, 0x3047, LBP_CJ },
        { 0x3048, 0x3048, LBP_ID },
-       { 0x3049, 0x3049, LBP_NS },
+       { 0x3049, 0x3049, LBP_CJ },
        { 0x304A, 0x3062, LBP_ID },
-       { 0x3063, 0x3063, LBP_NS },
+       { 0x3063, 0x3063, LBP_CJ },
        { 0x3064, 0x3082, LBP_ID },
-       { 0x3083, 0x3083, LBP_NS },
+       { 0x3083, 0x3083, LBP_CJ },
        { 0x3084, 0x3084, LBP_ID },
-       { 0x3085, 0x3085, LBP_NS },
+       { 0x3085, 0x3085, LBP_CJ },
        { 0x3086, 0x3086, LBP_ID },
-       { 0x3087, 0x3087, LBP_NS },
+       { 0x3087, 0x3087, LBP_CJ },
        { 0x3088, 0x308D, LBP_ID },
-       { 0x308E, 0x308E, LBP_NS },
+       { 0x308E, 0x308E, LBP_CJ },
        { 0x308F, 0x3094, LBP_ID },
-       { 0x3095, 0x3096, LBP_NS },
+       { 0x3095, 0x3096, LBP_CJ },
        { 0x3099, 0x309A, LBP_CM },
        { 0x309B, 0x309E, LBP_NS },
        { 0x309F, 0x309F, LBP_ID },
-       { 0x30A0, 0x30A1, LBP_NS },
+       { 0x30A0, 0x30A0, LBP_NS },
+       { 0x30A1, 0x30A1, LBP_CJ },
        { 0x30A2, 0x30A2, LBP_ID },
-       { 0x30A3, 0x30A3, LBP_NS },
+       { 0x30A3, 0x30A3, LBP_CJ },
        { 0x30A4, 0x30A4, LBP_ID },
-       { 0x30A5, 0x30A5, LBP_NS },
+       { 0x30A5, 0x30A5, LBP_CJ },
        { 0x30A6, 0x30A6, LBP_ID },
-       { 0x30A7, 0x30A7, LBP_NS },
+       { 0x30A7, 0x30A7, LBP_CJ },
        { 0x30A8, 0x30A8, LBP_ID },
-       { 0x30A9, 0x30A9, LBP_NS },
+       { 0x30A9, 0x30A9, LBP_CJ },
        { 0x30AA, 0x30C2, LBP_ID },
-       { 0x30C3, 0x30C3, LBP_NS },
+       { 0x30C3, 0x30C3, LBP_CJ },
        { 0x30C4, 0x30E2, LBP_ID },
-       { 0x30E3, 0x30E3, LBP_NS },
+       { 0x30E3, 0x30E3, LBP_CJ },
        { 0x30E4, 0x30E4, LBP_ID },
-       { 0x30E5, 0x30E5, LBP_NS },
+       { 0x30E5, 0x30E5, LBP_CJ },
        { 0x30E6, 0x30E6, LBP_ID },
-       { 0x30E7, 0x30E7, LBP_NS },
+       { 0x30E7, 0x30E7, LBP_CJ },
        { 0x30E8, 0x30ED, LBP_ID },
-       { 0x30EE, 0x30EE, LBP_NS },
+       { 0x30EE, 0x30EE, LBP_CJ },
        { 0x30EF, 0x30F4, LBP_ID },
-       { 0x30F5, 0x30F6, LBP_NS },
+       { 0x30F5, 0x30F6, LBP_CJ },
        { 0x30F7, 0x30FA, LBP_ID },
-       { 0x30FB, 0x30FE, LBP_NS },
+       { 0x30FB, 0x30FB, LBP_NS },
+       { 0x30FC, 0x30FC, LBP_CJ },
+       { 0x30FD, 0x30FE, LBP_NS },
        { 0x30FF, 0x31E3, LBP_ID },
-       { 0x31F0, 0x31FF, LBP_NS },
+       { 0x31F0, 0x31FF, LBP_CJ },
        { 0x3200, 0x3247, LBP_ID },
        { 0x3248, 0x324F, LBP_AI },
        { 0x3250, 0x4DBF, LBP_ID },
@@ -835,8 +897,10 @@ struct LineBreakProperties lb_prop_default[] = {
        { 0xA62A, 0xA66E, LBP_AL },
        { 0xA66F, 0xA672, LBP_CM },
        { 0xA673, 0xA673, LBP_AL },
-       { 0xA67C, 0xA67D, LBP_CM },
-       { 0xA67E, 0xA6EF, LBP_AL },
+       { 0xA674, 0xA67D, LBP_CM },
+       { 0xA67E, 0xA69D, LBP_AL },
+       { 0xA69F, 0xA69F, LBP_CM },
+       { 0xA6A0, 0xA6EF, LBP_AL },
        { 0xA6F0, 0xA6F1, LBP_CM },
        { 0xA6F2, 0xA6F2, LBP_AL },
        { 0xA6F3, 0xA6F7, LBP_BA },
@@ -875,7 +939,11 @@ struct LineBreakProperties lb_prop_default[] = {
        { 0xA9C7, 0xA9C9, LBP_BA },
        { 0xA9CA, 0xA9CF, LBP_AL },
        { 0xA9D0, 0xA9D9, LBP_NU },
-       { 0xA9DE, 0xAA28, LBP_AL },
+       { 0xA9DE, 0xA9DF, LBP_AL },
+       { 0xA9E0, 0xA9EF, LBP_SA },
+       { 0xA9F0, 0xA9F9, LBP_NU },
+       { 0xA9FA, 0xA9FE, LBP_SA },
+       { 0xAA00, 0xAA28, LBP_AL },
        { 0xAA29, 0xAA36, LBP_CM },
        { 0xAA40, 0xAA42, LBP_AL },
        { 0xAA43, 0xAA43, LBP_CM },
@@ -885,6 +953,11 @@ struct LineBreakProperties lb_prop_default[] = {
        { 0xAA5C, 0xAA5C, LBP_AL },
        { 0xAA5D, 0xAA5F, LBP_BA },
        { 0xAA60, 0xAADF, LBP_SA },
+       { 0xAAE0, 0xAAEA, LBP_AL },
+       { 0xAAEB, 0xAAEF, LBP_CM },
+       { 0xAAF0, 0xAAF1, LBP_BA },
+       { 0xAAF2, 0xAAF4, LBP_AL },
+       { 0xAAF5, 0xAAF6, LBP_CM },
        { 0xAB01, 0xABE2, LBP_AL },
        { 0xABE3, 0xABEA, LBP_CM },
        { 0xABEB, 0xABEB, LBP_BA },
@@ -1693,11 +1766,15 @@ struct LineBreakProperties lb_prop_default[] = {
        { 0xD800, 0xDFFF, LBP_SG },
        { 0xE000, 0xF8FF, LBP_XX },
        { 0xF900, 0xFAFF, LBP_ID },
-       { 0xFB00, 0xFB1D, LBP_AL },
+       { 0xFB00, 0xFB17, LBP_AL },
+       { 0xFB1D, 0xFB1D, LBP_HL },
        { 0xFB1E, 0xFB1E, LBP_CM },
-       { 0xFB1F, 0xFD3D, LBP_AL },
-       { 0xFD3E, 0xFD3E, LBP_OP },
-       { 0xFD3F, 0xFD3F, LBP_CL },
+       { 0xFB1F, 0xFB28, LBP_HL },
+       { 0xFB29, 0xFB29, LBP_AL },
+       { 0xFB2A, 0xFB4F, LBP_HL },
+       { 0xFB50, 0xFD3D, LBP_AL },
+       { 0xFD3E, 0xFD3E, LBP_CL },
+       { 0xFD3F, 0xFD3F, LBP_OP },
        { 0xFD50, 0xFDFB, LBP_AL },
        { 0xFDFC, 0xFDFC, LBP_PO },
        { 0xFDFD, 0xFDFD, LBP_AL },
@@ -1709,7 +1786,7 @@ struct LineBreakProperties lb_prop_default[] = {
        { 0xFE17, 0xFE17, LBP_OP },
        { 0xFE18, 0xFE18, LBP_CL },
        { 0xFE19, 0xFE19, LBP_IN },
-       { 0xFE20, 0xFE26, LBP_CM },
+       { 0xFE20, 0xFE2D, LBP_CM },
        { 0xFE30, 0xFE34, LBP_ID },
        { 0xFE35, 0xFE35, LBP_OP },
        { 0xFE36, 0xFE36, LBP_CL },
@@ -1779,7 +1856,7 @@ struct LineBreakProperties lb_prop_default[] = {
        { 0xFF63, 0xFF64, LBP_CL },
        { 0xFF65, 0xFF65, LBP_NS },
        { 0xFF66, 0xFF66, LBP_AL },
-       { 0xFF67, 0xFF70, LBP_NS },
+       { 0xFF67, 0xFF70, LBP_CJ },
        { 0xFF71, 0xFF9D, LBP_AL },
        { 0xFF9E, 0xFF9F, LBP_NS },
        { 0xFFA0, 0xFFDC, LBP_AL },
@@ -1795,13 +1872,17 @@ struct LineBreakProperties lb_prop_default[] = {
        { 0x10100, 0x10102, LBP_BA },
        { 0x10107, 0x101FC, LBP_AL },
        { 0x101FD, 0x101FD, LBP_CM },
-       { 0x10280, 0x1039D, LBP_AL },
+       { 0x10280, 0x102D0, LBP_AL },
+       { 0x102E0, 0x102E0, LBP_CM },
+       { 0x102E1, 0x10375, LBP_AL },
+       { 0x10376, 0x1037A, LBP_CM },
+       { 0x10380, 0x1039D, LBP_AL },
        { 0x1039F, 0x1039F, LBP_BA },
        { 0x103A0, 0x103CF, LBP_AL },
        { 0x103D0, 0x103D0, LBP_BA },
        { 0x103D1, 0x1049D, LBP_AL },
        { 0x104A0, 0x104A9, LBP_NU },
-       { 0x10800, 0x10855, LBP_AL },
+       { 0x10500, 0x10855, LBP_AL },
        { 0x10857, 0x10857, LBP_BA },
        { 0x10858, 0x1091B, LBP_AL },
        { 0x1091F, 0x1091F, LBP_BA },
@@ -1811,7 +1892,12 @@ struct LineBreakProperties lb_prop_default[] = {
        { 0x10A38, 0x10A3F, LBP_CM },
        { 0x10A40, 0x10A47, LBP_AL },
        { 0x10A50, 0x10A57, LBP_BA },
-       { 0x10A58, 0x10B35, LBP_AL },
+       { 0x10A58, 0x10AE4, LBP_AL },
+       { 0x10AE5, 0x10AE6, LBP_CM },
+       { 0x10AEB, 0x10AEF, LBP_AL },
+       { 0x10AF0, 0x10AF5, LBP_BA },
+       { 0x10AF6, 0x10AF6, LBP_IN },
+       { 0x10B00, 0x10B35, LBP_AL },
        { 0x10B39, 0x10B3F, LBP_BA },
        { 0x10B40, 0x10E7E, LBP_AL },
        { 0x11000, 0x11002, LBP_CM },
@@ -1820,13 +1906,70 @@ struct LineBreakProperties lb_prop_default[] = {
        { 0x11047, 0x11048, LBP_BA },
        { 0x11049, 0x11065, LBP_AL },
        { 0x11066, 0x1106F, LBP_NU },
-       { 0x11080, 0x11082, LBP_CM },
+       { 0x1107F, 0x11082, LBP_CM },
        { 0x11083, 0x110AF, LBP_AL },
        { 0x110B0, 0x110BA, LBP_CM },
        { 0x110BB, 0x110BD, LBP_AL },
        { 0x110BE, 0x110C1, LBP_BA },
-       { 0x12000, 0x12462, LBP_AL },
-       { 0x12470, 0x12473, LBP_BA },
+       { 0x110D0, 0x110E8, LBP_AL },
+       { 0x110F0, 0x110F9, LBP_NU },
+       { 0x11100, 0x11102, LBP_CM },
+       { 0x11103, 0x11126, LBP_AL },
+       { 0x11127, 0x11134, LBP_CM },
+       { 0x11136, 0x1113F, LBP_NU },
+       { 0x11140, 0x11143, LBP_BA },
+       { 0x11150, 0x11172, LBP_AL },
+       { 0x11173, 0x11173, LBP_CM },
+       { 0x11174, 0x11174, LBP_AL },
+       { 0x11175, 0x11175, LBP_BB },
+       { 0x11176, 0x11176, LBP_AL },
+       { 0x11180, 0x11182, LBP_CM },
+       { 0x11183, 0x111B2, LBP_AL },
+       { 0x111B3, 0x111C0, LBP_CM },
+       { 0x111C1, 0x111C4, LBP_AL },
+       { 0x111C5, 0x111C6, LBP_BA },
+       { 0x111C7, 0x111C7, LBP_AL },
+       { 0x111C8, 0x111C8, LBP_BA },
+       { 0x111CD, 0x111CD, LBP_AL },
+       { 0x111D0, 0x111D9, LBP_NU },
+       { 0x111DA, 0x1122B, LBP_AL },
+       { 0x1122C, 0x11237, LBP_CM },
+       { 0x11238, 0x11239, LBP_BA },
+       { 0x1123A, 0x1123A, LBP_AL },
+       { 0x1123B, 0x1123C, LBP_BA },
+       { 0x1123D, 0x112DE, LBP_AL },
+       { 0x112DF, 0x112EA, LBP_CM },
+       { 0x112F0, 0x112F9, LBP_NU },
+       { 0x11301, 0x11303, LBP_CM },
+       { 0x11305, 0x11339, LBP_AL },
+       { 0x1133C, 0x1133C, LBP_CM },
+       { 0x1133D, 0x1133D, LBP_AL },
+       { 0x1133E, 0x11357, LBP_CM },
+       { 0x1135D, 0x11361, LBP_AL },
+       { 0x11362, 0x11374, LBP_CM },
+       { 0x11480, 0x114AF, LBP_AL },
+       { 0x114B0, 0x114C3, LBP_CM },
+       { 0x114C4, 0x114C7, LBP_AL },
+       { 0x114D0, 0x114D9, LBP_NU },
+       { 0x11580, 0x115AE, LBP_AL },
+       { 0x115AF, 0x115C0, LBP_CM },
+       { 0x115C1, 0x115C1, LBP_BB },
+       { 0x115C2, 0x115C3, LBP_BA },
+       { 0x115C4, 0x115C5, LBP_EX },
+       { 0x115C6, 0x115C8, LBP_AL },
+       { 0x115C9, 0x115C9, LBP_BA },
+       { 0x11600, 0x1162F, LBP_AL },
+       { 0x11630, 0x11640, LBP_CM },
+       { 0x11641, 0x11642, LBP_BA },
+       { 0x11643, 0x11644, LBP_AL },
+       { 0x11650, 0x11659, LBP_NU },
+       { 0x11680, 0x116AA, LBP_AL },
+       { 0x116AB, 0x116B7, LBP_CM },
+       { 0x116C0, 0x116C9, LBP_NU },
+       { 0x118A0, 0x118DF, LBP_AL },
+       { 0x118E0, 0x118E9, LBP_NU },
+       { 0x118EA, 0x1246E, LBP_AL },
+       { 0x12470, 0x12474, LBP_BA },
        { 0x13000, 0x13257, LBP_AL },
        { 0x13258, 0x1325A, LBP_OP },
        { 0x1325B, 0x1325D, LBP_CL },
@@ -1840,8 +1983,27 @@ struct LineBreakProperties lb_prop_default[] = {
        { 0x1328A, 0x13378, LBP_AL },
        { 0x13379, 0x13379, LBP_OP },
        { 0x1337A, 0x1337B, LBP_CL },
-       { 0x1337C, 0x16A38, LBP_AL },
+       { 0x1337C, 0x16A5E, LBP_AL },
+       { 0x16A60, 0x16A69, LBP_NU },
+       { 0x16A6E, 0x16A6F, LBP_BA },
+       { 0x16AD0, 0x16AED, LBP_AL },
+       { 0x16AF0, 0x16AF4, LBP_CM },
+       { 0x16AF5, 0x16AF5, LBP_BA },
+       { 0x16B00, 0x16B2F, LBP_AL },
+       { 0x16B30, 0x16B36, LBP_CM },
+       { 0x16B37, 0x16B39, LBP_BA },
+       { 0x16B3A, 0x16B43, LBP_AL },
+       { 0x16B44, 0x16B44, LBP_BA },
+       { 0x16B45, 0x16B45, LBP_AL },
+       { 0x16B50, 0x16B59, LBP_NU },
+       { 0x16B5B, 0x16F50, LBP_AL },
+       { 0x16F51, 0x16F92, LBP_CM },
+       { 0x16F93, 0x16F9F, LBP_AL },
        { 0x1B000, 0x1B001, LBP_ID },
+       { 0x1BC00, 0x1BC9C, LBP_AL },
+       { 0x1BC9D, 0x1BC9E, LBP_CM },
+       { 0x1BC9F, 0x1BC9F, LBP_BA },
+       { 0x1BCA0, 0x1BCA3, LBP_CM },
        { 0x1D000, 0x1D164, LBP_AL },
        { 0x1D165, 0x1D169, LBP_CM },
        { 0x1D16A, 0x1D16C, LBP_AL },
@@ -1854,13 +2016,49 @@ struct LineBreakProperties lb_prop_default[] = {
        { 0x1D242, 0x1D244, LBP_CM },
        { 0x1D245, 0x1D7CB, LBP_AL },
        { 0x1D7CE, 0x1D7FF, LBP_NU },
-       { 0x1F000, 0x1F0DF, LBP_AL },
+       { 0x1E800, 0x1E8CF, LBP_AL },
+       { 0x1E8D0, 0x1E8D6, LBP_CM },
+       { 0x1EE00, 0x1EEF1, LBP_AL },
+       { 0x1F000, 0x1F0F5, LBP_ID },
        { 0x1F100, 0x1F12D, LBP_AI },
        { 0x1F12E, 0x1F12E, LBP_AL },
-       { 0x1F130, 0x1F19A, LBP_AI },
-       { 0x1F1E6, 0x1F1FF, LBP_AL },
-       { 0x1F200, 0x1F251, LBP_ID },
-       { 0x1F300, 0x1F773, LBP_AL },
+       { 0x1F130, 0x1F169, LBP_AI },
+       { 0x1F16A, 0x1F16B, LBP_AL },
+       { 0x1F170, 0x1F19A, LBP_AI },
+       { 0x1F1E6, 0x1F1FF, LBP_RI },
+       { 0x1F200, 0x1F39B, LBP_ID },
+       { 0x1F39C, 0x1F39D, LBP_AL },
+       { 0x1F39E, 0x1F3B4, LBP_ID },
+       { 0x1F3B5, 0x1F3B6, LBP_AL },
+       { 0x1F3B7, 0x1F3BB, LBP_ID },
+       { 0x1F3BC, 0x1F3BC, LBP_AL },
+       { 0x1F3BD, 0x1F49F, LBP_ID },
+       { 0x1F4A0, 0x1F4A0, LBP_AL },
+       { 0x1F4A1, 0x1F4A1, LBP_ID },
+       { 0x1F4A2, 0x1F4A2, LBP_AL },
+       { 0x1F4A3, 0x1F4A3, LBP_ID },
+       { 0x1F4A4, 0x1F4A4, LBP_AL },
+       { 0x1F4A5, 0x1F4AE, LBP_ID },
+       { 0x1F4AF, 0x1F4AF, LBP_AL },
+       { 0x1F4B0, 0x1F4B0, LBP_ID },
+       { 0x1F4B1, 0x1F4B2, LBP_AL },
+       { 0x1F4B3, 0x1F4FE, LBP_ID },
+       { 0x1F500, 0x1F506, LBP_AL },
+       { 0x1F507, 0x1F516, LBP_ID },
+       { 0x1F517, 0x1F524, LBP_AL },
+       { 0x1F525, 0x1F531, LBP_ID },
+       { 0x1F532, 0x1F549, LBP_AL },
+       { 0x1F54A, 0x1F5D3, LBP_ID },
+       { 0x1F5D4, 0x1F5DB, LBP_AL },
+       { 0x1F5DC, 0x1F5F3, LBP_ID },
+       { 0x1F5F4, 0x1F5F9, LBP_AL },
+       { 0x1F5FA, 0x1F64F, LBP_ID },
+       { 0x1F650, 0x1F675, LBP_AL },
+       { 0x1F676, 0x1F678, LBP_QU },
+       { 0x1F679, 0x1F67B, LBP_NS },
+       { 0x1F67C, 0x1F67F, LBP_AL },
+       { 0x1F680, 0x1F6F3, LBP_ID },
+       { 0x1F700, 0x1F8AD, LBP_AL },
        { 0x20000, 0x3FFFD, LBP_ID },
        { 0xE0001, 0xE01EF, LBP_CM },
        { 0xF0000, 0x10FFFD, LBP_XX },
index 9ddb4d9..3455afd 100644 (file)
@@ -1,10 +1,10 @@
-/* vim: set tabstop=4 shiftwidth=4: */
+/* vim: set expandtab tabstop=4 softtabstop=4 shiftwidth=4: */
 
 /*
  * Line breaking in a Unicode sequence.  Designed to be used in a
  * generic text renderer.
  *
- * Copyright (C) 2008-2010 Wu Yongwei <wuyongwei at gmail dot com>
+ * Copyright (C) 2008-2012 Wu Yongwei <wuyongwei at gmail dot com>
  *
  * This software is provided 'as-is', without any express or implied
  * warranty.  In no event will the author be held liable for any damages
  *    distribution.
  *
  * The main reference is Unicode Standard Annex 14 (UAX #14):
- *             <URL:http://www.unicode.org/reports/tr14/>
+ *      <URL:http://www.unicode.org/reports/tr14/>
  *
  * When this library was designed, this annex was at Revision 19, for
  * Unicode 5.0.0:
- *             <URL:http://www.unicode.org/reports/tr14/tr14-19.html>
+ *      <URL:http://www.unicode.org/reports/tr14/tr14-19.html>
  *
- * This library has been updated according to Revision 24, for
- * Unicode 5.2.0:
- *             <URL:http://www.unicode.org/reports/tr14/tr14-24.html>
+ * This library has been updated according to Revision 30, for
+ * Unicode 6.2.0:
+ *      <URL:http://www.unicode.org/reports/tr14/tr14-30.html>
  *
  * The Unicode Terms of Use are available at
- *             <URL:http://www.unicode.org/copyright.html>
+ *      <URL:http://www.unicode.org/copyright.html>
  */
 
 /**
- * @file       linebreakdef.c
+ * @file    linebreakdef.c
  *
  * Definition of language-specific data.
  *
- * @version    2.0, 2010/01/03
- * @author     Wu Yongwei
+ * @version 2.2, 2012/10/06
+ * @author  Wu Yongwei
  */
 
 #include "linebreak.h"
  * English-specifc data over the default Unicode rules.
  */
 static struct LineBreakProperties lb_prop_English[] = {
-       { 0x2018, 0x2018, LBP_OP },     /* Left single quotation mark: opening */
-       { 0x201C, 0x201C, LBP_OP },     /* Left double quotation mark: opening */
-       { 0x201D, 0x201D, LBP_CL },     /* Right double quotation mark: closing */
-       { 0, 0, LBP_Undefined }
+    { 0x2018, 0x2018, LBP_OP }, /* Left single quotation mark: opening */
+    { 0x201C, 0x201C, LBP_OP }, /* Left double quotation mark: opening */
+    { 0x201D, 0x201D, LBP_CL }, /* Right double quotation mark: closing */
+    { 0, 0, LBP_Undefined }
 };
 
 /**
  * German-specifc data over the default Unicode rules.
  */
 static struct LineBreakProperties lb_prop_German[] = {
-       { 0x00AB, 0x00AB, LBP_CL },     /* Left double angle quotation mark: closing */
-       { 0x00BB, 0x00BB, LBP_OP },     /* Right double angle quotation mark: opening */
-       { 0x2018, 0x2018, LBP_CL },     /* Left single quotation mark: closing */
-       { 0x201C, 0x201C, LBP_CL },     /* Left double quotation mark: closing */
-       { 0x2039, 0x2039, LBP_CL },     /* Left single angle quotation mark: closing */
-       { 0x203A, 0x203A, LBP_OP },     /* Right single angle quotation mark: opening */
-       { 0, 0, LBP_Undefined }
+    { 0x00AB, 0x00AB, LBP_CL }, /* Left double angle quotation mark: closing */
+    { 0x00BB, 0x00BB, LBP_OP }, /* Right double angle quotation mark: opening */
+    { 0x2018, 0x2018, LBP_CL }, /* Left single quotation mark: closing */
+    { 0x201C, 0x201C, LBP_CL }, /* Left double quotation mark: closing */
+    { 0x2039, 0x2039, LBP_CL }, /* Left single angle quotation mark: closing */
+    { 0x203A, 0x203A, LBP_OP }, /* Right single angle quotation mark: opening */
+    { 0, 0, LBP_Undefined }
 };
 
 /**
  * Spanish-specifc data over the default Unicode rules.
  */
 static struct LineBreakProperties lb_prop_Spanish[] = {
-       { 0x00AB, 0x00AB, LBP_OP },     /* Left double angle quotation mark: opening */
-       { 0x00BB, 0x00BB, LBP_CL },     /* Right double angle quotation mark: closing */
-       { 0x2018, 0x2018, LBP_OP },     /* Left single quotation mark: opening */
-       { 0x201C, 0x201C, LBP_OP },     /* Left double quotation mark: opening */
-       { 0x201D, 0x201D, LBP_CL },     /* Right double quotation mark: closing */
-       { 0x2039, 0x2039, LBP_OP },     /* Left single angle quotation mark: opening */
-       { 0x203A, 0x203A, LBP_CL },     /* Right single angle quotation mark: closing */
-       { 0, 0, LBP_Undefined }
+    { 0x00AB, 0x00AB, LBP_OP }, /* Left double angle quotation mark: opening */
+    { 0x00BB, 0x00BB, LBP_CL }, /* Right double angle quotation mark: closing */
+    { 0x2018, 0x2018, LBP_OP }, /* Left single quotation mark: opening */
+    { 0x201C, 0x201C, LBP_OP }, /* Left double quotation mark: opening */
+    { 0x201D, 0x201D, LBP_CL }, /* Right double quotation mark: closing */
+    { 0x2039, 0x2039, LBP_OP }, /* Left single angle quotation mark: opening */
+    { 0x203A, 0x203A, LBP_CL }, /* Right single angle quotation mark: closing */
+    { 0, 0, LBP_Undefined }
 };
 
 /**
  * French-specifc data over the default Unicode rules.
  */
 static struct LineBreakProperties lb_prop_French[] = {
-       { 0x00AB, 0x00AB, LBP_OP },     /* Left double angle quotation mark: opening */
-       { 0x00BB, 0x00BB, LBP_CL },     /* Right double angle quotation mark: closing */
-       { 0x2018, 0x2018, LBP_OP },     /* Left single quotation mark: opening */
-       { 0x201C, 0x201C, LBP_OP },     /* Left double quotation mark: opening */
-       { 0x201D, 0x201D, LBP_CL },     /* Right double quotation mark: closing */
-       { 0x2039, 0x2039, LBP_OP },     /* Left single angle quotation mark: opening */
-       { 0x203A, 0x203A, LBP_CL },     /* Right single angle quotation mark: closing */
-       { 0, 0, LBP_Undefined }
+    { 0x00AB, 0x00AB, LBP_OP }, /* Left double angle quotation mark: opening */
+    { 0x00BB, 0x00BB, LBP_CL }, /* Right double angle quotation mark: closing */
+    { 0x2018, 0x2018, LBP_OP }, /* Left single quotation mark: opening */
+    { 0x201C, 0x201C, LBP_OP }, /* Left double quotation mark: opening */
+    { 0x201D, 0x201D, LBP_CL }, /* Right double quotation mark: closing */
+    { 0x2039, 0x2039, LBP_OP }, /* Left single angle quotation mark: opening */
+    { 0x203A, 0x203A, LBP_CL }, /* Right single angle quotation mark: closing */
+    { 0, 0, LBP_Undefined }
 };
 
 /**
  * Russian-specifc data over the default Unicode rules.
  */
 static struct LineBreakProperties lb_prop_Russian[] = {
-       { 0x00AB, 0x00AB, LBP_OP },     /* Left double angle quotation mark: opening */
-       { 0x00BB, 0x00BB, LBP_CL },     /* Right double angle quotation mark: closing */
-       { 0x201C, 0x201C, LBP_CL },     /* Left double quotation mark: closing */
-       { 0, 0, LBP_Undefined }
+    { 0x00AB, 0x00AB, LBP_OP }, /* Left double angle quotation mark: opening */
+    { 0x00BB, 0x00BB, LBP_CL }, /* Right double angle quotation mark: closing */
+    { 0x201C, 0x201C, LBP_CL }, /* Left double quotation mark: closing */
+    { 0, 0, LBP_Undefined }
 };
 
 /**
  * Chinese-specifc data over the default Unicode rules.
  */
 static struct LineBreakProperties lb_prop_Chinese[] = {
-       { 0x2018, 0x2018, LBP_OP },     /* Left single quotation mark: opening */
-       { 0x2019, 0x2019, LBP_CL },     /* Right single quotation mark: closing */
-       { 0x201C, 0x201C, LBP_OP },     /* Left double quotation mark: opening */
-       { 0x201D, 0x201D, LBP_CL },     /* Right double quotation mark: closing */
-       { 0, 0, LBP_Undefined }
+    { 0x2018, 0x2018, LBP_OP }, /* Left single quotation mark: opening */
+    { 0x2019, 0x2019, LBP_CL }, /* Right single quotation mark: closing */
+    { 0x201C, 0x201C, LBP_OP }, /* Left double quotation mark: opening */
+    { 0x201D, 0x201D, LBP_CL }, /* Right double quotation mark: closing */
+    { 0, 0, LBP_Undefined }
 };
 
 /**
@@ -129,11 +129,11 @@ static struct LineBreakProperties lb_prop_Chinese[] = {
  * you may want to redefine \e lb_prop_lang_map in your C source file.
  */
 struct LineBreakPropertiesLang lb_prop_lang_map[] = {
-       { "en", 2, lb_prop_English },
-       { "de", 2, lb_prop_German },
-       { "es", 2, lb_prop_Spanish },
-       { "fr", 2, lb_prop_French },
-       { "ru", 2, lb_prop_Russian },
-       { "zh", 2, lb_prop_Chinese },
-       { NULL, 0, NULL }
+    { "en", 2, lb_prop_English },
+    { "de", 2, lb_prop_German },
+    { "es", 2, lb_prop_Spanish },
+    { "fr", 2, lb_prop_French },
+    { "ru", 2, lb_prop_Russian },
+    { "zh", 2, lb_prop_Chinese },
+    { NULL, 0, NULL }
 };
index bc4eee2..d557aba 100644 (file)
@@ -1,10 +1,11 @@
-/* vim: set tabstop=4 shiftwidth=4: */
+/* vim: set expandtab tabstop=4 softtabstop=4 shiftwidth=4: */
 
 /*
  * Line breaking in a Unicode sequence.  Designed to be used in a
  * generic text renderer.
  *
- * Copyright (C) 2008-2010 Wu Yongwei <wuyongwei at gmail dot com>
+ * Copyright (C) 2008-2013 Wu Yongwei <wuyongwei at gmail dot com>
+ * Copyright (C) 2013 Petr Filipsky <philodej at gmail dot com>
  *
  * This software is provided 'as-is', without any express or implied
  * warranty.  In no event will the author be held liable for any damages
  *    distribution.
  *
  * The main reference is Unicode Standard Annex 14 (UAX #14):
- *             <URL:http://www.unicode.org/reports/tr14/>
+ *      <URL:http://www.unicode.org/reports/tr14/>
  *
  * When this library was designed, this annex was at Revision 19, for
  * Unicode 5.0.0:
- *             <URL:http://www.unicode.org/reports/tr14/tr14-19.html>
+ *      <URL:http://www.unicode.org/reports/tr14/tr14-19.html>
  *
- * This library has been updated according to Revision 24, for
- * Unicode 5.2.0:
- *             <URL:http://www.unicode.org/reports/tr14/tr14-24.html>
+ * This library has been updated according to Revision 30, for
+ * Unicode 6.2.0:
+ *      <URL:http://www.unicode.org/reports/tr14/tr14-30.html>
  *
  * The Unicode Terms of Use are available at
- *             <URL:http://www.unicode.org/copyright.html>
+ *      <URL:http://www.unicode.org/copyright.html>
  */
 
 /**
- * @file       linebreakdef.h
+ * @file    linebreakdef.h
  *
  * Definitions of internal data structures, declarations of global
  * variables, and function prototypes for the line breaking algorithm.
  *
- * @version    2.0, 2010/01/03
- * @author     Wu Yongwei
+ * @version 2.4, 2013/11/10
+ * @author  Wu Yongwei
+ * @author  Petr Filipsky
  */
 
 /**
  * Constant value to mark the end of string.  It is not a valid Unicode
  * character.
  */
-#define EOS 0xFFFF
+#define EOS 0xFFFFFFFF
 
 /**
  * Line break classes.  This is a direct mapping of Table 1 of Unicode
- * Standard Annex 14, Revision 19.
+ * Standard Annex 14, Revision 26.
  */
 enum LineBreakClass
 {
-       /* This is used to signal an error condition. */
-       LBP_Undefined,  /**< Undefined */
+    /* This is used to signal an error condition. */
+    LBP_Undefined,  /**< Undefined */
 
-       /* The following break classes are treated in the pair table. */
-       LBP_OP,                 /**< Opening punctuation */
-       LBP_CL,                 /**< Closing punctuation */
-       LBP_CP,                 /**< Closing parenthesis */
-       LBP_QU,                 /**< Ambiguous quotation */
-       LBP_GL,                 /**< Glue */
-       LBP_NS,                 /**< Non-starters */
-       LBP_EX,                 /**< Exclamation/Interrogation */
-       LBP_SY,                 /**< Symbols allowing break after */
-       LBP_IS,                 /**< Infix separator */
-       LBP_PR,                 /**< Prefix */
-       LBP_PO,                 /**< Postfix */
-       LBP_NU,                 /**< Numeric */
-       LBP_AL,                 /**< Alphabetic */
-       LBP_ID,                 /**< Ideographic */
-       LBP_IN,                 /**< Inseparable characters */
-       LBP_HY,                 /**< Hyphen */
-       LBP_BA,                 /**< Break after */
-       LBP_BB,                 /**< Break before */
-       LBP_B2,                 /**< Break on either side (but not pair) */
-       LBP_ZW,                 /**< Zero-width space */
-       LBP_CM,                 /**< Combining marks */
-       LBP_WJ,                 /**< Word joiner */
-       LBP_H2,                 /**< Hangul LV */
-       LBP_H3,                 /**< Hangul LVT */
-       LBP_JL,                 /**< Hangul L Jamo */
-       LBP_JV,                 /**< Hangul V Jamo */
-       LBP_JT,                 /**< Hangul T Jamo */
+    /* The following break classes are treated in the pair table. */
+    LBP_OP,         /**< Opening punctuation */
+    LBP_CL,         /**< Closing punctuation */
+    LBP_CP,         /**< Closing parenthesis */
+    LBP_QU,         /**< Ambiguous quotation */
+    LBP_GL,         /**< Glue */
+    LBP_NS,         /**< Non-starters */
+    LBP_EX,         /**< Exclamation/Interrogation */
+    LBP_SY,         /**< Symbols allowing break after */
+    LBP_IS,         /**< Infix separator */
+    LBP_PR,         /**< Prefix */
+    LBP_PO,         /**< Postfix */
+    LBP_NU,         /**< Numeric */
+    LBP_AL,         /**< Alphabetic */
+    LBP_HL,         /**< Hebrew letter */
+    LBP_ID,         /**< Ideographic */
+    LBP_IN,         /**< Inseparable characters */
+    LBP_HY,         /**< Hyphen */
+    LBP_BA,         /**< Break after */
+    LBP_BB,         /**< Break before */
+    LBP_B2,         /**< Break on either side (but not pair) */
+    LBP_ZW,         /**< Zero-width space */
+    LBP_CM,         /**< Combining marks */
+    LBP_WJ,         /**< Word joiner */
+    LBP_H2,         /**< Hangul LV */
+    LBP_H3,         /**< Hangul LVT */
+    LBP_JL,         /**< Hangul L Jamo */
+    LBP_JV,         /**< Hangul V Jamo */
+    LBP_JT,         /**< Hangul T Jamo */
+    LBP_RI,         /**< Regional indicator */
 
-       /* The following break classes are not treated in the pair table */
-       LBP_AI,                 /**< Ambiguous (alphabetic or ideograph) */
-       LBP_BK,                 /**< Break (mandatory) */
-       LBP_CB,                 /**< Contingent break */
-       LBP_CR,                 /**< Carriage return */
-       LBP_LF,                 /**< Line feed */
-       LBP_NL,                 /**< Next line */
-       LBP_SA,                 /**< South-East Asian */
-       LBP_SG,                 /**< Surrogates */
-       LBP_SP,                 /**< Space */
-       LBP_XX                  /**< Unknown */
+    /* The following break classes are not treated in the pair table */
+    LBP_AI,         /**< Ambiguous (alphabetic or ideograph) */
+    LBP_BK,         /**< Break (mandatory) */
+    LBP_CB,         /**< Contingent break */
+    LBP_CJ,         /**< Conditional Japanese starter */
+    LBP_CR,         /**< Carriage return */
+    LBP_LF,         /**< Line feed */
+    LBP_NL,         /**< Next line */
+    LBP_SA,         /**< South-East Asian */
+    LBP_SG,         /**< Surrogates */
+    LBP_SP,         /**< Space */
+    LBP_XX          /**< Unknown */
 };
 
 /**
@@ -111,9 +116,9 @@ enum LineBreakClass
  */
 struct LineBreakProperties
 {
-       utf32_t start;                          /**< Starting coding point */
-       utf32_t end;                            /**< End coding point */
-       enum LineBreakClass prop;       /**< The line breaking property */
+    utf32_t start;              /**< Starting coding point */
+    utf32_t end;                /**< End coding point */
+    enum LineBreakClass prop;   /**< The line breaking property */
 };
 
 /**
@@ -122,9 +127,22 @@ struct LineBreakProperties
  */
 struct LineBreakPropertiesLang
 {
-       const char *lang;                                       /**< Language name */
-       size_t namelen;                                         /**< Length of name to match */
-       struct LineBreakProperties *lbp;        /**< Pointer to associated data */
+    const char *lang;                   /**< Language name */
+    size_t namelen;                     /**< Length of name to match */
+    struct LineBreakProperties *lbp;    /**< Pointer to associated data */
+};
+
+/**
+ * Context representing internal state of the line breaking algorithm.
+ * This is useful to callers if incremental analysis is wanted.
+ */
+struct LineBreakContext
+{
+    const char *lang;               /**< Language name */
+    struct LineBreakProperties *lbpLang;/**< Pointer to LineBreakProperties */
+    enum LineBreakClass lbcCur;     /**< Breaking class of current codepoint */
+    enum LineBreakClass lbcNew;     /**< Breaking class of next codepoint */
+    enum LineBreakClass lbcLast;    /**< Breaking class of last codepoint */
 };
 
 /**
@@ -141,9 +159,16 @@ extern struct LineBreakPropertiesLang lb_prop_lang_map[];
 utf32_t lb_get_next_char_utf8(const utf8_t *s, size_t len, size_t *ip);
 utf32_t lb_get_next_char_utf16(const utf16_t *s, size_t len, size_t *ip);
 utf32_t lb_get_next_char_utf32(const utf32_t *s, size_t len, size_t *ip);
+void lb_init_break_context(
+        struct LineBreakContext* lbpCtx,
+        utf32_t ch,
+        const char* lang);
+int lb_process_next_char(
+        struct LineBreakContext* lbpCtx,
+        utf32_t ch);
 void set_linebreaks(
-               const void *s,
-               size_t len,
-               const char *lang,
-               char *brks,
-               get_next_char_t get_next_char);
+        const void *s,
+        size_t len,
+        const char *lang,
+        char *brks,
+        get_next_char_t get_next_char);
index 4e53429..e67a1f8 100644 (file)
@@ -257,7 +257,6 @@ static void set_wordbreaks(
             break;
 
         case WBP_ALetter:
-        case WBP_Hebrew:
             if ((wbcSeqStart == WBP_ALetter) || /* WB5,6,7 */
                     (wbcLast == WBP_Numeric) || /* WB10 */
                     (wbcSeqStart == WBP_ExtendNumLet)) /* WB13b */
index d0d0585..fe5afe3 100644 (file)
@@ -1,6 +1,6 @@
 /* The content of this file is generated from:
-# WordBreakProperty-7.0.0.txt
-# Date: 2014-02-19, 15:51:39 GMT [MD]
+# WordBreakProperty-6.2.0.txt
+# Date: 2012-08-13, 19:12:09 GMT [MD]
 */
 
 #include "linebreak.h"
@@ -10,8 +10,7 @@ static struct WordBreakProperties wb_prop_default[] = {
        {0x000A, 0x000A, WBP_LF},
        {0x000B, 0x000C, WBP_Newline},
        {0x000D, 0x000D, WBP_CR},
-       {0x0022, 0x0022, WBP_Double},
-       {0x0027, 0x0027, WBP_Single},
+       {0x0027, 0x0027, WBP_MidNumLet},
        {0x002C, 0x002C, WBP_MidNum},
        {0x002E, 0x002E, WBP_MidNumLet},
        {0x0030, 0x0039, WBP_Numeric},
@@ -37,7 +36,6 @@ static struct WordBreakProperties wb_prop_default[] = {
        {0x0295, 0x02AF, WBP_ALetter},
        {0x02B0, 0x02C1, WBP_ALetter},
        {0x02C6, 0x02D1, WBP_ALetter},
-       {0x02D7, 0x02D7, WBP_MidLetter},
        {0x02E0, 0x02E4, WBP_ALetter},
        {0x02EC, 0x02EC, WBP_ALetter},
        {0x02EE, 0x02EE, WBP_ALetter},
@@ -48,7 +46,6 @@ static struct WordBreakProperties wb_prop_default[] = {
        {0x037A, 0x037A, WBP_ALetter},
        {0x037B, 0x037D, WBP_ALetter},
        {0x037E, 0x037E, WBP_MidNum},
-       {0x037F, 0x037F, WBP_ALetter},
        {0x0386, 0x0386, WBP_ALetter},
        {0x0387, 0x0387, WBP_MidLetter},
        {0x0388, 0x038A, WBP_ALetter},
@@ -58,7 +55,7 @@ static struct WordBreakProperties wb_prop_default[] = {
        {0x03F7, 0x0481, WBP_ALetter},
        {0x0483, 0x0487, WBP_Extend},
        {0x0488, 0x0489, WBP_Extend},
-       {0x048A, 0x052F, WBP_ALetter},
+       {0x048A, 0x0527, WBP_ALetter},
        {0x0531, 0x0556, WBP_ALetter},
        {0x0559, 0x0559, WBP_ALetter},
        {0x0561, 0x0587, WBP_ALetter},
@@ -68,14 +65,13 @@ static struct WordBreakProperties wb_prop_default[] = {
        {0x05C1, 0x05C2, WBP_Extend},
        {0x05C4, 0x05C5, WBP_Extend},
        {0x05C7, 0x05C7, WBP_Extend},
-       {0x05D0, 0x05EA, WBP_Hebrew},
-       {0x05F0, 0x05F2, WBP_Hebrew},
+       {0x05D0, 0x05EA, WBP_ALetter},
+       {0x05F0, 0x05F2, WBP_ALetter},
        {0x05F3, 0x05F3, WBP_ALetter},
        {0x05F4, 0x05F4, WBP_MidLetter},
-       {0x0600, 0x0605, WBP_Format},
+       {0x0600, 0x0604, WBP_Format},
        {0x060C, 0x060D, WBP_MidNum},
        {0x0610, 0x061A, WBP_Extend},
-       {0x061C, 0x061C, WBP_Format},
        {0x0620, 0x063F, WBP_ALetter},
        {0x0640, 0x0640, WBP_ALetter},
        {0x0641, 0x064A, WBP_ALetter},
@@ -121,8 +117,10 @@ static struct WordBreakProperties wb_prop_default[] = {
        {0x0829, 0x082D, WBP_Extend},
        {0x0840, 0x0858, WBP_ALetter},
        {0x0859, 0x085B, WBP_Extend},
-       {0x08A0, 0x08B2, WBP_ALetter},
-       {0x08E4, 0x0902, WBP_Extend},
+       {0x08A0, 0x08A0, WBP_ALetter},
+       {0x08A2, 0x08AC, WBP_ALetter},
+       {0x08E4, 0x08FE, WBP_Extend},
+       {0x0900, 0x0902, WBP_Extend},
        {0x0903, 0x0903, WBP_Extend},
        {0x0904, 0x0939, WBP_ALetter},
        {0x093A, 0x093A, WBP_Extend},
@@ -140,7 +138,8 @@ static struct WordBreakProperties wb_prop_default[] = {
        {0x0962, 0x0963, WBP_Extend},
        {0x0966, 0x096F, WBP_Numeric},
        {0x0971, 0x0971, WBP_ALetter},
-       {0x0972, 0x0980, WBP_ALetter},
+       {0x0972, 0x0977, WBP_ALetter},
+       {0x0979, 0x097F, WBP_ALetter},
        {0x0981, 0x0981, WBP_Extend},
        {0x0982, 0x0983, WBP_Extend},
        {0x0985, 0x098C, WBP_ALetter},
@@ -248,12 +247,12 @@ static struct WordBreakProperties wb_prop_default[] = {
        {0x0BD0, 0x0BD0, WBP_ALetter},
        {0x0BD7, 0x0BD7, WBP_Extend},
        {0x0BE6, 0x0BEF, WBP_Numeric},
-       {0x0C00, 0x0C00, WBP_Extend},
        {0x0C01, 0x0C03, WBP_Extend},
        {0x0C05, 0x0C0C, WBP_ALetter},
        {0x0C0E, 0x0C10, WBP_ALetter},
        {0x0C12, 0x0C28, WBP_ALetter},
-       {0x0C2A, 0x0C39, WBP_ALetter},
+       {0x0C2A, 0x0C33, WBP_ALetter},
+       {0x0C35, 0x0C39, WBP_ALetter},
        {0x0C3D, 0x0C3D, WBP_ALetter},
        {0x0C3E, 0x0C40, WBP_Extend},
        {0x0C41, 0x0C44, WBP_Extend},
@@ -264,7 +263,6 @@ static struct WordBreakProperties wb_prop_default[] = {
        {0x0C60, 0x0C61, WBP_ALetter},
        {0x0C62, 0x0C63, WBP_Extend},
        {0x0C66, 0x0C6F, WBP_Numeric},
-       {0x0C81, 0x0C81, WBP_Extend},
        {0x0C82, 0x0C83, WBP_Extend},
        {0x0C85, 0x0C8C, WBP_ALetter},
        {0x0C8E, 0x0C90, WBP_ALetter},
@@ -286,7 +284,6 @@ static struct WordBreakProperties wb_prop_default[] = {
        {0x0CE2, 0x0CE3, WBP_Extend},
        {0x0CE6, 0x0CEF, WBP_Numeric},
        {0x0CF1, 0x0CF2, WBP_ALetter},
-       {0x0D01, 0x0D01, WBP_Extend},
        {0x0D02, 0x0D03, WBP_Extend},
        {0x0D05, 0x0D0C, WBP_ALetter},
        {0x0D0E, 0x0D10, WBP_ALetter},
@@ -314,7 +311,6 @@ static struct WordBreakProperties wb_prop_default[] = {
        {0x0DD2, 0x0DD4, WBP_Extend},
        {0x0DD6, 0x0DD6, WBP_Extend},
        {0x0DD8, 0x0DDF, WBP_Extend},
-       {0x0DE6, 0x0DEF, WBP_Numeric},
        {0x0DF2, 0x0DF3, WBP_Extend},
        {0x0E31, 0x0E31, WBP_Extend},
        {0x0E34, 0x0E3A, WBP_Extend},
@@ -395,7 +391,6 @@ static struct WordBreakProperties wb_prop_default[] = {
        {0x1681, 0x169A, WBP_ALetter},
        {0x16A0, 0x16EA, WBP_ALetter},
        {0x16EE, 0x16F0, WBP_ALetter},
-       {0x16F1, 0x16F8, WBP_ALetter},
        {0x1700, 0x170C, WBP_ALetter},
        {0x170E, 0x1711, WBP_ALetter},
        {0x1712, 0x1714, WBP_Extend},
@@ -416,7 +411,6 @@ static struct WordBreakProperties wb_prop_default[] = {
        {0x17DD, 0x17DD, WBP_Extend},
        {0x17E0, 0x17E9, WBP_Numeric},
        {0x180B, 0x180D, WBP_Extend},
-       {0x180E, 0x180E, WBP_Format},
        {0x1810, 0x1819, WBP_Numeric},
        {0x1820, 0x1842, WBP_ALetter},
        {0x1843, 0x1843, WBP_ALetter},
@@ -425,7 +419,7 @@ static struct WordBreakProperties wb_prop_default[] = {
        {0x18A9, 0x18A9, WBP_Extend},
        {0x18AA, 0x18AA, WBP_ALetter},
        {0x18B0, 0x18F5, WBP_ALetter},
-       {0x1900, 0x191E, WBP_ALetter},
+       {0x1900, 0x191C, WBP_ALetter},
        {0x1920, 0x1922, WBP_Extend},
        {0x1923, 0x1926, WBP_Extend},
        {0x1927, 0x1928, WBP_Extend},
@@ -440,8 +434,7 @@ static struct WordBreakProperties wb_prop_default[] = {
        {0x19D0, 0x19D9, WBP_Numeric},
        {0x1A00, 0x1A16, WBP_ALetter},
        {0x1A17, 0x1A18, WBP_Extend},
-       {0x1A19, 0x1A1A, WBP_Extend},
-       {0x1A1B, 0x1A1B, WBP_Extend},
+       {0x1A19, 0x1A1B, WBP_Extend},
        {0x1A55, 0x1A55, WBP_Extend},
        {0x1A56, 0x1A56, WBP_Extend},
        {0x1A57, 0x1A57, WBP_Extend},
@@ -456,8 +449,6 @@ static struct WordBreakProperties wb_prop_default[] = {
        {0x1A7F, 0x1A7F, WBP_Extend},
        {0x1A80, 0x1A89, WBP_Numeric},
        {0x1A90, 0x1A99, WBP_Numeric},
-       {0x1AB0, 0x1ABD, WBP_Extend},
-       {0x1ABE, 0x1ABE, WBP_Extend},
        {0x1B00, 0x1B03, WBP_Extend},
        {0x1B04, 0x1B04, WBP_Extend},
        {0x1B05, 0x1B33, WBP_ALetter},
@@ -480,7 +471,8 @@ static struct WordBreakProperties wb_prop_default[] = {
        {0x1BA6, 0x1BA7, WBP_Extend},
        {0x1BA8, 0x1BA9, WBP_Extend},
        {0x1BAA, 0x1BAA, WBP_Extend},
-       {0x1BAB, 0x1BAD, WBP_Extend},
+       {0x1BAB, 0x1BAB, WBP_Extend},
+       {0x1BAC, 0x1BAD, WBP_Extend},
        {0x1BAE, 0x1BAF, WBP_ALetter},
        {0x1BB0, 0x1BB9, WBP_Numeric},
        {0x1BBA, 0x1BE5, WBP_ALetter},
@@ -512,14 +504,13 @@ static struct WordBreakProperties wb_prop_default[] = {
        {0x1CF2, 0x1CF3, WBP_Extend},
        {0x1CF4, 0x1CF4, WBP_Extend},
        {0x1CF5, 0x1CF6, WBP_ALetter},
-       {0x1CF8, 0x1CF9, WBP_Extend},
        {0x1D00, 0x1D2B, WBP_ALetter},
        {0x1D2C, 0x1D6A, WBP_ALetter},
        {0x1D6B, 0x1D77, WBP_ALetter},
        {0x1D78, 0x1D78, WBP_ALetter},
        {0x1D79, 0x1D9A, WBP_ALetter},
        {0x1D9B, 0x1DBF, WBP_ALetter},
-       {0x1DC0, 0x1DF5, WBP_Extend},
+       {0x1DC0, 0x1DE6, WBP_Extend},
        {0x1DFC, 0x1DFF, WBP_Extend},
        {0x1E00, 0x1F15, WBP_ALetter},
        {0x1F18, 0x1F1D, WBP_ALetter},
@@ -553,7 +544,7 @@ static struct WordBreakProperties wb_prop_default[] = {
        {0x2044, 0x2044, WBP_MidNum},
        {0x2054, 0x2054, WBP_ExtendNumLet},
        {0x2060, 0x2064, WBP_Format},
-       {0x2066, 0x206F, WBP_Format},
+       {0x206A, 0x206F, WBP_Format},
        {0x2071, 0x2071, WBP_ALetter},
        {0x207F, 0x207F, WBP_ALetter},
        {0x2090, 0x209C, WBP_ALetter},
@@ -640,8 +631,7 @@ static struct WordBreakProperties wb_prop_default[] = {
        {0xA670, 0xA672, WBP_Extend},
        {0xA674, 0xA67D, WBP_Extend},
        {0xA67F, 0xA67F, WBP_ALetter},
-       {0xA680, 0xA69B, WBP_ALetter},
-       {0xA69C, 0xA69D, WBP_ALetter},
+       {0xA680, 0xA697, WBP_ALetter},
        {0xA69F, 0xA69F, WBP_Extend},
        {0xA6A0, 0xA6E5, WBP_ALetter},
        {0xA6E6, 0xA6EF, WBP_ALetter},
@@ -652,9 +642,8 @@ static struct WordBreakProperties wb_prop_default[] = {
        {0xA771, 0xA787, WBP_ALetter},
        {0xA788, 0xA788, WBP_ALetter},
        {0xA78B, 0xA78E, WBP_ALetter},
-       {0xA790, 0xA7AD, WBP_ALetter},
-       {0xA7B0, 0xA7B1, WBP_ALetter},
-       {0xA7F7, 0xA7F7, WBP_ALetter},
+       {0xA790, 0xA793, WBP_ALetter},
+       {0xA7A0, 0xA7AA, WBP_ALetter},
        {0xA7F8, 0xA7F9, WBP_ALetter},
        {0xA7FA, 0xA7FA, WBP_ALetter},
        {0xA7FB, 0xA801, WBP_ALetter},
@@ -694,8 +683,6 @@ static struct WordBreakProperties wb_prop_default[] = {
        {0xA9BD, 0xA9C0, WBP_Extend},
        {0xA9CF, 0xA9CF, WBP_ALetter},
        {0xA9D0, 0xA9D9, WBP_Numeric},
-       {0xA9E5, 0xA9E5, WBP_Extend},
-       {0xA9F0, 0xA9F9, WBP_Numeric},
        {0xAA00, 0xAA28, WBP_ALetter},
        {0xAA29, 0xAA2E, WBP_Extend},
        {0xAA2F, 0xAA30, WBP_Extend},
@@ -709,8 +696,6 @@ static struct WordBreakProperties wb_prop_default[] = {
        {0xAA4D, 0xAA4D, WBP_Extend},
        {0xAA50, 0xAA59, WBP_Numeric},
        {0xAA7B, 0xAA7B, WBP_Extend},
-       {0xAA7C, 0xAA7C, WBP_Extend},
-       {0xAA7D, 0xAA7D, WBP_Extend},
        {0xAAB0, 0xAAB0, WBP_Extend},
        {0xAAB2, 0xAAB4, WBP_Extend},
        {0xAAB7, 0xAAB8, WBP_Extend},
@@ -729,9 +714,6 @@ static struct WordBreakProperties wb_prop_default[] = {
        {0xAB11, 0xAB16, WBP_ALetter},
        {0xAB20, 0xAB26, WBP_ALetter},
        {0xAB28, 0xAB2E, WBP_ALetter},
-       {0xAB30, 0xAB5A, WBP_ALetter},
-       {0xAB5C, 0xAB5F, WBP_ALetter},
-       {0xAB64, 0xAB65, WBP_ALetter},
        {0xABC0, 0xABE2, WBP_ALetter},
        {0xABE3, 0xABE4, WBP_Extend},
        {0xABE5, 0xABE5, WBP_Extend},
@@ -746,16 +728,15 @@ static struct WordBreakProperties wb_prop_default[] = {
        {0xD7CB, 0xD7FB, WBP_ALetter},
        {0xFB00, 0xFB06, WBP_ALetter},
        {0xFB13, 0xFB17, WBP_ALetter},
-       {0xFB1D, 0xFB1D, WBP_Hebrew},
+       {0xFB1D, 0xFB1D, WBP_ALetter},
        {0xFB1E, 0xFB1E, WBP_Extend},
-       {0xFB1F, 0xFB28, WBP_Hebrew},
-       {0xFB2A, 0xFB36, WBP_Hebrew},
-       {0xFB38, 0xFB3C, WBP_Hebrew},
-       {0xFB3E, 0xFB3E, WBP_Hebrew},
-       {0xFB40, 0xFB41, WBP_Hebrew},
-       {0xFB43, 0xFB44, WBP_Hebrew},
-       {0xFB46, 0xFB4F, WBP_Hebrew},
-       {0xFB50, 0xFBB1, WBP_ALetter},
+       {0xFB1F, 0xFB28, WBP_ALetter},
+       {0xFB2A, 0xFB36, WBP_ALetter},
+       {0xFB38, 0xFB3C, WBP_ALetter},
+       {0xFB3E, 0xFB3E, WBP_ALetter},
+       {0xFB40, 0xFB41, WBP_ALetter},
+       {0xFB43, 0xFB44, WBP_ALetter},
+       {0xFB46, 0xFBB1, WBP_ALetter},
        {0xFBD3, 0xFD3D, WBP_ALetter},
        {0xFD50, 0xFD8F, WBP_ALetter},
        {0xFD92, 0xFDC7, WBP_ALetter},
@@ -764,7 +745,7 @@ static struct WordBreakProperties wb_prop_default[] = {
        {0xFE10, 0xFE10, WBP_MidNum},
        {0xFE13, 0xFE13, WBP_MidLetter},
        {0xFE14, 0xFE14, WBP_MidNum},
-       {0xFE20, 0xFE2D, WBP_Extend},
+       {0xFE20, 0xFE26, WBP_Extend},
        {0xFE33, 0xFE34, WBP_ExtendNumLet},
        {0xFE4D, 0xFE4F, WBP_ExtendNumLet},
        {0xFE50, 0xFE50, WBP_MidNum},
@@ -803,14 +784,11 @@ static struct WordBreakProperties wb_prop_default[] = {
        {0x101FD, 0x101FD, WBP_Extend},
        {0x10280, 0x1029C, WBP_ALetter},
        {0x102A0, 0x102D0, WBP_ALetter},
-       {0x102E0, 0x102E0, WBP_Extend},
-       {0x10300, 0x1031F, WBP_ALetter},
+       {0x10300, 0x1031E, WBP_ALetter},
        {0x10330, 0x10340, WBP_ALetter},
        {0x10341, 0x10341, WBP_ALetter},
        {0x10342, 0x10349, WBP_ALetter},
        {0x1034A, 0x1034A, WBP_ALetter},
-       {0x10350, 0x10375, WBP_ALetter},
-       {0x10376, 0x1037A, WBP_Extend},
        {0x10380, 0x1039D, WBP_ALetter},
        {0x103A0, 0x103C3, WBP_ALetter},
        {0x103C8, 0x103CF, WBP_ALetter},
@@ -818,19 +796,12 @@ static struct WordBreakProperties wb_prop_default[] = {
        {0x10400, 0x1044F, WBP_ALetter},
        {0x10450, 0x1049D, WBP_ALetter},
        {0x104A0, 0x104A9, WBP_Numeric},
-       {0x10500, 0x10527, WBP_ALetter},
-       {0x10530, 0x10563, WBP_ALetter},
-       {0x10600, 0x10736, WBP_ALetter},
-       {0x10740, 0x10755, WBP_ALetter},
-       {0x10760, 0x10767, WBP_ALetter},
        {0x10800, 0x10805, WBP_ALetter},
        {0x10808, 0x10808, WBP_ALetter},
        {0x1080A, 0x10835, WBP_ALetter},
        {0x10837, 0x10838, WBP_ALetter},
        {0x1083C, 0x1083C, WBP_ALetter},
        {0x1083F, 0x10855, WBP_ALetter},
-       {0x10860, 0x10876, WBP_ALetter},
-       {0x10880, 0x1089E, WBP_ALetter},
        {0x10900, 0x10915, WBP_ALetter},
        {0x10920, 0x10939, WBP_ALetter},
        {0x10980, 0x109B7, WBP_ALetter},
@@ -845,14 +816,9 @@ static struct WordBreakProperties wb_prop_default[] = {
        {0x10A38, 0x10A3A, WBP_Extend},
        {0x10A3F, 0x10A3F, WBP_Extend},
        {0x10A60, 0x10A7C, WBP_ALetter},
-       {0x10A80, 0x10A9C, WBP_ALetter},
-       {0x10AC0, 0x10AC7, WBP_ALetter},
-       {0x10AC9, 0x10AE4, WBP_ALetter},
-       {0x10AE5, 0x10AE6, WBP_Extend},
        {0x10B00, 0x10B35, WBP_ALetter},
        {0x10B40, 0x10B55, WBP_ALetter},
        {0x10B60, 0x10B72, WBP_ALetter},
-       {0x10B80, 0x10B91, WBP_ALetter},
        {0x10C00, 0x10C48, WBP_ALetter},
        {0x11000, 0x11000, WBP_Extend},
        {0x11001, 0x11001, WBP_Extend},
@@ -860,7 +826,7 @@ static struct WordBreakProperties wb_prop_default[] = {
        {0x11003, 0x11037, WBP_ALetter},
        {0x11038, 0x11046, WBP_Extend},
        {0x11066, 0x1106F, WBP_Numeric},
-       {0x1107F, 0x11081, WBP_Extend},
+       {0x11080, 0x11081, WBP_Extend},
        {0x11082, 0x11082, WBP_Extend},
        {0x11083, 0x110AF, WBP_ALetter},
        {0x110B0, 0x110B2, WBP_Extend},
@@ -876,9 +842,6 @@ static struct WordBreakProperties wb_prop_default[] = {
        {0x1112C, 0x1112C, WBP_Extend},
        {0x1112D, 0x11134, WBP_Extend},
        {0x11136, 0x1113F, WBP_Numeric},
-       {0x11150, 0x11172, WBP_ALetter},
-       {0x11173, 0x11173, WBP_Extend},
-       {0x11176, 0x11176, WBP_ALetter},
        {0x11180, 0x11181, WBP_Extend},
        {0x11182, 0x11182, WBP_Extend},
        {0x11183, 0x111B2, WBP_ALetter},
@@ -887,68 +850,6 @@ static struct WordBreakProperties wb_prop_default[] = {
        {0x111BF, 0x111C0, WBP_Extend},
        {0x111C1, 0x111C4, WBP_ALetter},
        {0x111D0, 0x111D9, WBP_Numeric},
-       {0x111DA, 0x111DA, WBP_ALetter},
-       {0x11200, 0x11211, WBP_ALetter},
-       {0x11213, 0x1122B, WBP_ALetter},
-       {0x1122C, 0x1122E, WBP_Extend},
-       {0x1122F, 0x11231, WBP_Extend},
-       {0x11232, 0x11233, WBP_Extend},
-       {0x11234, 0x11234, WBP_Extend},
-       {0x11235, 0x11235, WBP_Extend},
-       {0x11236, 0x11237, WBP_Extend},
-       {0x112B0, 0x112DE, WBP_ALetter},
-       {0x112DF, 0x112DF, WBP_Extend},
-       {0x112E0, 0x112E2, WBP_Extend},
-       {0x112E3, 0x112EA, WBP_Extend},
-       {0x112F0, 0x112F9, WBP_Numeric},
-       {0x11301, 0x11301, WBP_Extend},
-       {0x11302, 0x11303, WBP_Extend},
-       {0x11305, 0x1130C, WBP_ALetter},
-       {0x1130F, 0x11310, WBP_ALetter},
-       {0x11313, 0x11328, WBP_ALetter},
-       {0x1132A, 0x11330, WBP_ALetter},
-       {0x11332, 0x11333, WBP_ALetter},
-       {0x11335, 0x11339, WBP_ALetter},
-       {0x1133C, 0x1133C, WBP_Extend},
-       {0x1133D, 0x1133D, WBP_ALetter},
-       {0x1133E, 0x1133F, WBP_Extend},
-       {0x11340, 0x11340, WBP_Extend},
-       {0x11341, 0x11344, WBP_Extend},
-       {0x11347, 0x11348, WBP_Extend},
-       {0x1134B, 0x1134D, WBP_Extend},
-       {0x11357, 0x11357, WBP_Extend},
-       {0x1135D, 0x11361, WBP_ALetter},
-       {0x11362, 0x11363, WBP_Extend},
-       {0x11366, 0x1136C, WBP_Extend},
-       {0x11370, 0x11374, WBP_Extend},
-       {0x11480, 0x114AF, WBP_ALetter},
-       {0x114B0, 0x114B2, WBP_Extend},
-       {0x114B3, 0x114B8, WBP_Extend},
-       {0x114B9, 0x114B9, WBP_Extend},
-       {0x114BA, 0x114BA, WBP_Extend},
-       {0x114BB, 0x114BE, WBP_Extend},
-       {0x114BF, 0x114C0, WBP_Extend},
-       {0x114C1, 0x114C1, WBP_Extend},
-       {0x114C2, 0x114C3, WBP_Extend},
-       {0x114C4, 0x114C5, WBP_ALetter},
-       {0x114C7, 0x114C7, WBP_ALetter},
-       {0x114D0, 0x114D9, WBP_Numeric},
-       {0x11580, 0x115AE, WBP_ALetter},
-       {0x115AF, 0x115B1, WBP_Extend},
-       {0x115B2, 0x115B5, WBP_Extend},
-       {0x115B8, 0x115BB, WBP_Extend},
-       {0x115BC, 0x115BD, WBP_Extend},
-       {0x115BE, 0x115BE, WBP_Extend},
-       {0x115BF, 0x115C0, WBP_Extend},
-       {0x11600, 0x1162F, WBP_ALetter},
-       {0x11630, 0x11632, WBP_Extend},
-       {0x11633, 0x1163A, WBP_Extend},
-       {0x1163B, 0x1163C, WBP_Extend},
-       {0x1163D, 0x1163D, WBP_Extend},
-       {0x1163E, 0x1163E, WBP_Extend},
-       {0x1163F, 0x11640, WBP_Extend},
-       {0x11644, 0x11644, WBP_ALetter},
-       {0x11650, 0x11659, WBP_Numeric},
        {0x11680, 0x116AA, WBP_ALetter},
        {0x116AB, 0x116AB, WBP_Extend},
        {0x116AC, 0x116AC, WBP_Extend},
@@ -958,36 +859,16 @@ static struct WordBreakProperties wb_prop_default[] = {
        {0x116B6, 0x116B6, WBP_Extend},
        {0x116B7, 0x116B7, WBP_Extend},
        {0x116C0, 0x116C9, WBP_Numeric},
-       {0x118A0, 0x118DF, WBP_ALetter},
-       {0x118E0, 0x118E9, WBP_Numeric},
-       {0x118FF, 0x118FF, WBP_ALetter},
-       {0x11AC0, 0x11AF8, WBP_ALetter},
-       {0x12000, 0x12398, WBP_ALetter},
-       {0x12400, 0x1246E, WBP_ALetter},
+       {0x12000, 0x1236E, WBP_ALetter},
+       {0x12400, 0x12462, WBP_ALetter},
        {0x13000, 0x1342E, WBP_ALetter},
        {0x16800, 0x16A38, WBP_ALetter},
-       {0x16A40, 0x16A5E, WBP_ALetter},
-       {0x16A60, 0x16A69, WBP_Numeric},
-       {0x16AD0, 0x16AED, WBP_ALetter},
-       {0x16AF0, 0x16AF4, WBP_Extend},
-       {0x16B00, 0x16B2F, WBP_ALetter},
-       {0x16B30, 0x16B36, WBP_Extend},
-       {0x16B40, 0x16B43, WBP_ALetter},
-       {0x16B50, 0x16B59, WBP_Numeric},
-       {0x16B63, 0x16B77, WBP_ALetter},
-       {0x16B7D, 0x16B8F, WBP_ALetter},
        {0x16F00, 0x16F44, WBP_ALetter},
        {0x16F50, 0x16F50, WBP_ALetter},
        {0x16F51, 0x16F7E, WBP_Extend},
        {0x16F8F, 0x16F92, WBP_Extend},
        {0x16F93, 0x16F9F, WBP_ALetter},
        {0x1B000, 0x1B000, WBP_Katakana},
-       {0x1BC00, 0x1BC6A, WBP_ALetter},
-       {0x1BC70, 0x1BC7C, WBP_ALetter},
-       {0x1BC80, 0x1BC88, WBP_ALetter},
-       {0x1BC90, 0x1BC99, WBP_ALetter},
-       {0x1BC9D, 0x1BC9E, WBP_Extend},
-       {0x1BCA0, 0x1BCA3, WBP_Format},
        {0x1D165, 0x1D166, WBP_Extend},
        {0x1D167, 0x1D169, WBP_Extend},
        {0x1D16D, 0x1D172, WBP_Extend},
@@ -1027,8 +908,6 @@ static struct WordBreakProperties wb_prop_default[] = {
        {0x1D7AA, 0x1D7C2, WBP_ALetter},
        {0x1D7C4, 0x1D7CB, WBP_ALetter},
        {0x1D7CE, 0x1D7FF, WBP_Numeric},
-       {0x1E800, 0x1E8C4, WBP_ALetter},
-       {0x1E8D0, 0x1E8D6, WBP_Extend},
        {0x1EE00, 0x1EE03, WBP_ALetter},
        {0x1EE05, 0x1EE1F, WBP_ALetter},
        {0x1EE21, 0x1EE22, WBP_ALetter},
@@ -1062,9 +941,6 @@ static struct WordBreakProperties wb_prop_default[] = {
        {0x1EEA1, 0x1EEA3, WBP_ALetter},
        {0x1EEA5, 0x1EEA9, WBP_ALetter},
        {0x1EEAB, 0x1EEBB, WBP_ALetter},
-       {0x1F130, 0x1F149, WBP_ALetter},
-       {0x1F150, 0x1F169, WBP_ALetter},
-       {0x1F170, 0x1F189, WBP_ALetter},
        {0x1F1E6, 0x1F1FF, WBP_Regional},
        {0xE0001, 0xE0001, WBP_Format},
        {0xE0020, 0xE007F, WBP_Format},