-/* vim: set tabstop=4 shiftwidth=4: */
+/* vim: set expandtab tabstop=4 softtabstop=4 shiftwidth=4: */
/*
* Line breaking in a Unicode sequence. Designed to be used in a
* generic text renderer.
*
- * Copyright (C) 2008-2010 Wu Yongwei <wuyongwei at gmail dot com>
+ * Copyright (C) 2008-2013 Wu Yongwei <wuyongwei at gmail dot com>
+ * Copyright (C) 2013 Petr Filipsky <philodej at gmail dot com>
*
* This software is provided 'as-is', without any express or implied
* warranty. In no event will the author be held liable for any damages
* distribution.
*
* The main reference is Unicode Standard Annex 14 (UAX #14):
- * <URL:http://www.unicode.org/reports/tr14/>
+ * <URL:http://www.unicode.org/reports/tr14/>
*
* When this library was designed, this annex was at Revision 19, for
* Unicode 5.0.0:
- * <URL:http://www.unicode.org/reports/tr14/tr14-19.html>
+ * <URL:http://www.unicode.org/reports/tr14/tr14-19.html>
*
- * This library has been updated according to Revision 24, for
- * Unicode 5.2.0:
- * <URL:http://www.unicode.org/reports/tr14/tr14-24.html>
+ * This library has been updated according to Revision 30, for
+ * Unicode 6.2.0:
+ * <URL:http://www.unicode.org/reports/tr14/tr14-30.html>
*
* The Unicode Terms of Use are available at
- * <URL:http://www.unicode.org/copyright.html>
+ * <URL:http://www.unicode.org/copyright.html>
*/
/**
- * @file linebreak.c
+ * @file linebreak.c
*
* Implementation of the line breaking algorithm as described in Unicode
* Standard Annex 14.
*
- * @version 2.0, 2010/01/03
- * @author Wu Yongwei
+ * @version 2.5, 2013/11/14
+ * @author Wu Yongwei
+ * @author Petr Filipsky
*/
#include <assert.h>
#include "linebreakdef.h"
/**
+ * Special value used internally to indicate an undefined break result.
+ */
+#define LINEBREAK_UNDEFINED -1
+
+/**
* Size of the second-level index to the line breaking properties.
*/
#define LINEBREAK_INDEX_SIZE 40
*/
enum BreakAction
{
- DIR_BRK, /**< Direct break opportunity */
- IND_BRK, /**< Indirect break opportunity */
- CMI_BRK, /**< Indirect break opportunity for combining marks */
- CMP_BRK, /**< Prohibited break for combining marks */
- PRH_BRK /**< Prohibited break */
+ DIR_BRK, /**< Direct break opportunity */
+ IND_BRK, /**< Indirect break opportunity */
+ CMI_BRK, /**< Indirect break opportunity for combining marks */
+ CMP_BRK, /**< Prohibited break for combining marks */
+ PRH_BRK /**< Prohibited break */
};
/**
* Break action pair table. This is a direct mapping of Table 2 of
- * Unicode Standard Annex 14, Revision 24.
+ * Unicode Standard Annex 14, Revision 30.
*/
-static enum BreakAction baTable[LBP_JT][LBP_JT] = {
- { /* OP */
- PRH_BRK, PRH_BRK, PRH_BRK, PRH_BRK, PRH_BRK, PRH_BRK, PRH_BRK,
- PRH_BRK, PRH_BRK, PRH_BRK, PRH_BRK, PRH_BRK, PRH_BRK, PRH_BRK,
- PRH_BRK, PRH_BRK, PRH_BRK, PRH_BRK, PRH_BRK, PRH_BRK, CMP_BRK,
- PRH_BRK, PRH_BRK, PRH_BRK, PRH_BRK, PRH_BRK, PRH_BRK },
- { /* CL */
- DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, PRH_BRK, PRH_BRK,
- PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
- DIR_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, CMI_BRK,
- PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK },
- { /* CP */
- DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, PRH_BRK, PRH_BRK,
- PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, DIR_BRK,
- DIR_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, CMI_BRK,
- PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK },
- { /* QU */
- PRH_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
- PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK,
- IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK, CMI_BRK,
- PRH_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK },
- { /* GL */
- IND_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
- PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK,
- IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK, CMI_BRK,
- PRH_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK },
- { /* NS */
- DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
- PRH_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
- DIR_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, CMI_BRK,
- PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK },
- { /* EX */
- DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
- PRH_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
- DIR_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, CMI_BRK,
- PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK },
- { /* SY */
- DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
- PRH_BRK, PRH_BRK, DIR_BRK, DIR_BRK, IND_BRK, DIR_BRK, DIR_BRK,
- DIR_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, CMI_BRK,
- PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK },
- { /* IS */
- DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
- PRH_BRK, PRH_BRK, DIR_BRK, DIR_BRK, IND_BRK, IND_BRK, DIR_BRK,
- DIR_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, CMI_BRK,
- PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK },
- { /* PR */
- IND_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
- PRH_BRK, PRH_BRK, DIR_BRK, DIR_BRK, IND_BRK, IND_BRK, IND_BRK,
- DIR_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, CMI_BRK,
- PRH_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK },
- { /* PO */
- IND_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
- PRH_BRK, PRH_BRK, DIR_BRK, DIR_BRK, IND_BRK, IND_BRK, DIR_BRK,
- DIR_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, CMI_BRK,
- PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK },
- { /* NU */
- IND_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
- PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, DIR_BRK,
- IND_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, CMI_BRK,
- PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK },
- { /* AL */
- IND_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
- PRH_BRK, PRH_BRK, DIR_BRK, DIR_BRK, IND_BRK, IND_BRK, DIR_BRK,
- IND_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, CMI_BRK,
- PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK },
- { /* ID */
- DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
- PRH_BRK, PRH_BRK, DIR_BRK, IND_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
- IND_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, CMI_BRK,
- PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK },
- { /* IN */
- DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
- PRH_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
- IND_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, CMI_BRK,
- PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK },
- { /* HY */
- DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, DIR_BRK, IND_BRK, PRH_BRK,
- PRH_BRK, PRH_BRK, DIR_BRK, DIR_BRK, IND_BRK, DIR_BRK, DIR_BRK,
- DIR_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, CMI_BRK,
- PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK },
- { /* BA */
- DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, DIR_BRK, IND_BRK, PRH_BRK,
- PRH_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
- DIR_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, CMI_BRK,
- PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK },
- { /* BB */
- IND_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
- PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK,
- IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK, CMI_BRK,
- PRH_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK },
- { /* B2 */
- DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
- PRH_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
- DIR_BRK, IND_BRK, IND_BRK, DIR_BRK, PRH_BRK, PRH_BRK, CMI_BRK,
- PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK },
- { /* ZW */
- DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
- DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
- DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, PRH_BRK, DIR_BRK,
- DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK },
- { /* CM */
- IND_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
- PRH_BRK, PRH_BRK, DIR_BRK, DIR_BRK, IND_BRK, IND_BRK, DIR_BRK,
- IND_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, CMI_BRK,
- PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK },
- { /* WJ */
- IND_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
- PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK,
- IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK, CMI_BRK,
- PRH_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK },
- { /* H2 */
- DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
- PRH_BRK, PRH_BRK, DIR_BRK, IND_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
- IND_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, CMI_BRK,
- PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, IND_BRK, IND_BRK },
- { /* H3 */
- DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
- PRH_BRK, PRH_BRK, DIR_BRK, IND_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
- IND_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, CMI_BRK,
- PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, IND_BRK },
- { /* JL */
- DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
- PRH_BRK, PRH_BRK, DIR_BRK, IND_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
- IND_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, CMI_BRK,
- PRH_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, DIR_BRK },
- { /* JV */
- DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
- PRH_BRK, PRH_BRK, DIR_BRK, IND_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
- IND_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, CMI_BRK,
- PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, IND_BRK, IND_BRK },
- { /* JT */
- DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
- PRH_BRK, PRH_BRK, DIR_BRK, IND_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
- IND_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, CMI_BRK,
- PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, IND_BRK }
+static enum BreakAction baTable[LBP_RI][LBP_RI] = {
+ { /* OP */
+ PRH_BRK, PRH_BRK, PRH_BRK, PRH_BRK, PRH_BRK, PRH_BRK, PRH_BRK,
+ PRH_BRK, PRH_BRK, PRH_BRK, PRH_BRK, PRH_BRK, PRH_BRK, PRH_BRK,
+ PRH_BRK, PRH_BRK, PRH_BRK, PRH_BRK, PRH_BRK, PRH_BRK, PRH_BRK,
+ CMP_BRK, PRH_BRK, PRH_BRK, PRH_BRK, PRH_BRK, PRH_BRK, PRH_BRK,
+ PRH_BRK },
+ { /* CL */
+ DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, PRH_BRK, PRH_BRK,
+ PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
+ DIR_BRK, DIR_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK,
+ CMI_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
+ DIR_BRK },
+ { /* CP */
+ DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, PRH_BRK, PRH_BRK,
+ PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK,
+ DIR_BRK, DIR_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK,
+ CMI_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
+ DIR_BRK },
+ { /* QU */
+ PRH_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
+ PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK,
+ IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
+ CMI_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK,
+ IND_BRK },
+ { /* GL */
+ IND_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
+ PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK,
+ IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
+ CMI_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK,
+ IND_BRK },
+ { /* NS */
+ DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
+ PRH_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
+ DIR_BRK, DIR_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK,
+ CMI_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
+ DIR_BRK },
+ { /* EX */
+ DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
+ PRH_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
+ DIR_BRK, DIR_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK,
+ CMI_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
+ DIR_BRK },
+ { /* SY */
+ DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
+ PRH_BRK, PRH_BRK, DIR_BRK, DIR_BRK, IND_BRK, DIR_BRK, DIR_BRK,
+ DIR_BRK, DIR_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK,
+ CMI_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
+ DIR_BRK },
+ { /* IS */
+ DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
+ PRH_BRK, PRH_BRK, DIR_BRK, DIR_BRK, IND_BRK, IND_BRK, IND_BRK,
+ DIR_BRK, DIR_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK,
+ CMI_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
+ DIR_BRK },
+ { /* PR */
+ IND_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
+ PRH_BRK, PRH_BRK, DIR_BRK, DIR_BRK, IND_BRK, IND_BRK, IND_BRK,
+ IND_BRK, DIR_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK,
+ CMI_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK,
+ DIR_BRK },
+ { /* PO */
+ IND_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
+ PRH_BRK, PRH_BRK, DIR_BRK, DIR_BRK, IND_BRK, IND_BRK, IND_BRK,
+ DIR_BRK, DIR_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK,
+ CMI_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
+ DIR_BRK },
+ { /* NU */
+ IND_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
+ PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK,
+ DIR_BRK, IND_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK,
+ CMI_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
+ DIR_BRK },
+ { /* AL */
+ IND_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
+ PRH_BRK, PRH_BRK, DIR_BRK, DIR_BRK, IND_BRK, IND_BRK, IND_BRK,
+ DIR_BRK, IND_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK,
+ CMI_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
+ DIR_BRK },
+ { /* HL */
+ IND_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
+ PRH_BRK, PRH_BRK, DIR_BRK, DIR_BRK, IND_BRK, IND_BRK, IND_BRK,
+ DIR_BRK, IND_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK,
+ CMI_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
+ DIR_BRK },
+ { /* ID */
+ DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
+ PRH_BRK, PRH_BRK, DIR_BRK, IND_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
+ DIR_BRK, IND_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK,
+ CMI_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
+ DIR_BRK },
+ { /* IN */
+ DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
+ PRH_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
+ DIR_BRK, IND_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK,
+ CMI_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
+ DIR_BRK },
+ { /* HY */
+ DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, DIR_BRK, IND_BRK, PRH_BRK,
+ PRH_BRK, PRH_BRK, DIR_BRK, DIR_BRK, IND_BRK, DIR_BRK, DIR_BRK,
+ DIR_BRK, DIR_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK,
+ CMI_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
+ DIR_BRK },
+ { /* BA */
+ DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, DIR_BRK, IND_BRK, PRH_BRK,
+ PRH_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
+ DIR_BRK, DIR_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK,
+ CMI_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
+ DIR_BRK },
+ { /* BB */
+ IND_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
+ PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK,
+ IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
+ CMI_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK,
+ IND_BRK },
+ { /* B2 */
+ DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
+ PRH_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
+ DIR_BRK, DIR_BRK, IND_BRK, IND_BRK, DIR_BRK, PRH_BRK, PRH_BRK,
+ CMI_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
+ DIR_BRK },
+ { /* ZW */
+ DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
+ DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
+ DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, PRH_BRK,
+ DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
+ DIR_BRK },
+ { /* CM */
+ IND_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
+ PRH_BRK, PRH_BRK, DIR_BRK, DIR_BRK, IND_BRK, IND_BRK, IND_BRK,
+ DIR_BRK, IND_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK,
+ CMI_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
+ DIR_BRK },
+ { /* WJ */
+ IND_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
+ PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK,
+ IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
+ CMI_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK,
+ IND_BRK },
+ { /* H2 */
+ DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
+ PRH_BRK, PRH_BRK, DIR_BRK, IND_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
+ DIR_BRK, IND_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK,
+ CMI_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, IND_BRK, IND_BRK,
+ DIR_BRK },
+ { /* H3 */
+ DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
+ PRH_BRK, PRH_BRK, DIR_BRK, IND_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
+ DIR_BRK, IND_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK,
+ CMI_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, IND_BRK,
+ DIR_BRK },
+ { /* JL */
+ DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
+ PRH_BRK, PRH_BRK, DIR_BRK, IND_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
+ DIR_BRK, IND_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK,
+ CMI_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, DIR_BRK,
+ DIR_BRK },
+ { /* JV */
+ DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
+ PRH_BRK, PRH_BRK, DIR_BRK, IND_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
+ DIR_BRK, IND_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK,
+ CMI_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, IND_BRK, IND_BRK,
+ DIR_BRK },
+ { /* JT */
+ DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
+ PRH_BRK, PRH_BRK, DIR_BRK, IND_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
+ DIR_BRK, IND_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK,
+ CMI_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, IND_BRK,
+ DIR_BRK },
+ { /* RI */
+ DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
+ PRH_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
+ DIR_BRK, DIR_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK,
+ CMI_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
+ IND_BRK },
};
/**
*/
struct LineBreakPropertiesIndex
{
- utf32_t end; /**< End coding point */
- struct LineBreakProperties *lbp;/**< Pointer to line breaking properties */
+ utf32_t end; /**< End coding point */
+ struct LineBreakProperties *lbp;/**< Pointer to line breaking properties */
};
/**
*/
static struct LineBreakPropertiesIndex lb_prop_index[LINEBREAK_INDEX_SIZE] =
{
- { 0xFFFFFFFF, lb_prop_default }
+ { 0xFFFFFFFF, lb_prop_default }
};
/**
*/
void init_linebreak(void)
{
- size_t i;
- size_t iPropDefault;
- size_t len;
- size_t step;
-
- len = 0;
- while (lb_prop_default[len].prop != LBP_Undefined)
- ++len;
- step = len / LINEBREAK_INDEX_SIZE;
- iPropDefault = 0;
- for (i = 0; i < LINEBREAK_INDEX_SIZE; ++i)
- {
- lb_prop_index[i].lbp = lb_prop_default + iPropDefault;
- iPropDefault += step;
- lb_prop_index[i].end = lb_prop_default[iPropDefault].start - 1;
- }
- lb_prop_index[--i].end = 0xFFFFFFFF;
+ size_t i;
+ size_t iPropDefault;
+ size_t len;
+ size_t step;
+
+ len = 0;
+ while (lb_prop_default[len].prop != LBP_Undefined)
+ ++len;
+ step = len / LINEBREAK_INDEX_SIZE;
+ iPropDefault = 0;
+ for (i = 0; i < LINEBREAK_INDEX_SIZE; ++i)
+ {
+ lb_prop_index[i].lbp = lb_prop_default + iPropDefault;
+ iPropDefault += step;
+ lb_prop_index[i].end = lb_prop_default[iPropDefault].start - 1;
+ }
+ lb_prop_index[--i].end = 0xFFFFFFFF;
}
/**
* Gets the language-specific line breaking properties.
*
- * @param lang language of the text
- * @return pointer to the language-specific line breaking
- * properties array if found; \c NULL otherwise
+ * @param lang language of the text
+ * @return pointer to the language-specific line breaking
+ * properties array if found; \c NULL otherwise
*/
static struct LineBreakProperties *get_lb_prop_lang(const char *lang)
{
- struct LineBreakPropertiesLang *lbplIter;
- if (lang != NULL)
- {
- for (lbplIter = lb_prop_lang_map; lbplIter->lang != NULL; ++lbplIter)
- {
- if (strncmp(lang, lbplIter->lang, lbplIter->namelen) == 0)
- {
- return lbplIter->lbp;
- }
- }
- }
- return NULL;
+ struct LineBreakPropertiesLang *lbplIter;
+ if (lang != NULL)
+ {
+ for (lbplIter = lb_prop_lang_map; lbplIter->lang != NULL; ++lbplIter)
+ {
+ if (strncmp(lang, lbplIter->lang, lbplIter->namelen) == 0)
+ {
+ return lbplIter->lbp;
+ }
+ }
+ }
+ return NULL;
}
/**
* Gets the line breaking class of a character from a line breaking
* properties array.
*
- * @param ch character to check
- * @param lbp pointer to the line breaking properties array
- * @return the line breaking class if found; \c LBP_XX otherwise
+ * @param ch character to check
+ * @param lbp pointer to the line breaking properties array
+ * @return the line breaking class if found; \c LBP_XX otherwise
*/
static enum LineBreakClass get_char_lb_class(
- utf32_t ch,
- struct LineBreakProperties *lbp)
+ utf32_t ch,
+ struct LineBreakProperties *lbp)
{
- while (lbp->prop != LBP_Undefined && ch >= lbp->start)
- {
- if (ch <= lbp->end)
- return lbp->prop;
- ++lbp;
- }
- return LBP_XX;
+ while (lbp->prop != LBP_Undefined && ch >= lbp->start)
+ {
+ if (ch <= lbp->end)
+ return lbp->prop;
+ ++lbp;
+ }
+ return LBP_XX;
}
/**
* Gets the line breaking class of a character from the default line
* breaking properties array.
*
- * @param ch character to check
- * @return the line breaking class if found; \c LBP_XX otherwise
+ * @param ch character to check
+ * @return the line breaking class if found; \c LBP_XX otherwise
*/
static enum LineBreakClass get_char_lb_class_default(
- utf32_t ch)
+ utf32_t ch)
{
- size_t i = 0;
- while (ch > lb_prop_index[i].end)
- ++i;
- assert(i < LINEBREAK_INDEX_SIZE);
- return get_char_lb_class(ch, lb_prop_index[i].lbp);
+ size_t i = 0;
+ while (ch > lb_prop_index[i].end)
+ ++i;
+ assert(i < LINEBREAK_INDEX_SIZE);
+ return get_char_lb_class(ch, lb_prop_index[i].lbp);
}
/**
* and then the default data if there is no language-specific property
* available for the character.
*
- * @param ch character to check
- * @param lbpLang pointer to the language-specific line breaking
- * properties array
- * @return the line breaking class if found; \c LBP_XX
- * otherwise
+ * @param ch character to check
+ * @param lbpLang pointer to the language-specific line breaking
+ * properties array
+ * @return the line breaking class if found; \c LBP_XX
+ * otherwise
*/
static enum LineBreakClass get_char_lb_class_lang(
- utf32_t ch,
- struct LineBreakProperties *lbpLang)
+ utf32_t ch,
+ struct LineBreakProperties *lbpLang)
{
- enum LineBreakClass lbcResult;
-
- /* Find the language-specific line breaking class for a character */
- if (lbpLang)
- {
- lbcResult = get_char_lb_class(ch, lbpLang);
- if (lbcResult != LBP_XX)
- return lbcResult;
- }
-
- /* Find the generic language-specific line breaking class, if no
- * language context is provided, or language-specific data are not
- * available for the specific character in the specified language */
- return get_char_lb_class_default(ch);
+ enum LineBreakClass lbcResult;
+
+ /* Find the language-specific line breaking class for a character */
+ if (lbpLang)
+ {
+ lbcResult = get_char_lb_class(ch, lbpLang);
+ if (lbcResult != LBP_XX)
+ return lbcResult;
+ }
+
+ /* Find the generic language-specific line breaking class, if no
+ * language context is provided, or language-specific data are not
+ * available for the specific character in the specified language */
+ return get_char_lb_class_default(ch);
}
/**
* characters. They are treated in a simplistic way in this
* implementation.
*
- * @param lbc line breaking class to resolve
- * @param lang language of the text
- * @return the resolved line breaking class
+ * @param lbc line breaking class to resolve
+ * @param lang language of the text
+ * @return the resolved line breaking class
*/
static enum LineBreakClass resolve_lb_class(
- enum LineBreakClass lbc,
- const char *lang)
+ enum LineBreakClass lbc,
+ const char *lang)
+{
+ switch (lbc)
+ {
+ case LBP_AI:
+ if (lang != NULL &&
+ (strncmp(lang, "zh", 2) == 0 || /* Chinese */
+ strncmp(lang, "ja", 2) == 0 || /* Japanese */
+ strncmp(lang, "ko", 2) == 0)) /* Korean */
+ {
+ return LBP_ID;
+ }
+ else
+ {
+ return LBP_AL;
+ }
+ case LBP_CJ:
+ /* Simplified for `normal' line breaking. See
+ * <url:http://www.unicode.org/reports/tr14/tr14-30.html#CJ>
+ * for details. */
+ return LBP_ID;
+ case LBP_SA:
+ case LBP_SG:
+ case LBP_XX:
+ return LBP_AL;
+ default:
+ return lbc;
+ }
+}
+
+/**
+ * Treats specially for the first character in a line.
+ *
+ * @param[in,out] lbpCtx pointer to the line breaking context
+ * @pre \a lbpCtx->lbcCur has a valid line break class
+ * @post \a lbpCtx->lbcCur has the updated line break class
+ */
+static void treat_first_char(
+ struct LineBreakContext* lbpCtx)
+{
+ switch (lbpCtx->lbcCur)
+ {
+ case LBP_LF:
+ case LBP_NL:
+ lbpCtx->lbcCur = LBP_BK; /* Rule LB5 */
+ break;
+ case LBP_CB:
+ lbpCtx->lbcCur = LBP_BA; /* Rule LB20 */
+ break;
+ case LBP_SP:
+ lbpCtx->lbcCur = LBP_WJ; /* Leading space treated as WJ */
+ break;
+ default:
+ break;
+ }
+}
+
+/**
+ * Tries telling the line break opportunity by simple rules.
+ *
+ * @param[in,out] lbpCtx pointer to the line breaking context
+ * @pre \a lbpCtx->lbcCur has the current line break
+ * class; and \a lbpCtx->lbcNew has the line
+ * break class for the next character
+ * @post \a lbpCtx->lbcCur has the updated line break
+ * class
+ * @return break result, one of #LINEBREAK_MUSTBREAK,
+ * #LINEBREAK_ALLOWBREAK, and #LINEBREAK_NOBREAK
+ * if identified; or #LINEBREAK_UNDEFINED if
+ * table lookup is needed
+ */
+static int get_lb_result_simple(
+ struct LineBreakContext* lbpCtx)
+{
+ if (lbpCtx->lbcCur == LBP_BK
+ || (lbpCtx->lbcCur == LBP_CR && lbpCtx->lbcNew != LBP_LF))
+ {
+ return LINEBREAK_MUSTBREAK; /* Rules LB4 and LB5 */
+ }
+
+ switch (lbpCtx->lbcNew)
+ {
+ case LBP_SP:
+ return LINEBREAK_NOBREAK; /* Rule LB7; no change to lbcCur */
+ case LBP_BK:
+ case LBP_LF:
+ case LBP_NL:
+ lbpCtx->lbcCur = LBP_BK; /* Mandatory break after */
+ return LINEBREAK_NOBREAK; /* Rule LB6 */
+ case LBP_CR:
+ lbpCtx->lbcCur = LBP_CR;
+ return LINEBREAK_NOBREAK; /* Rule LB6 */
+ case LBP_CB:
+ lbpCtx->lbcCur = LBP_BA;
+ return LINEBREAK_ALLOWBREAK; /* Rule LB20 */
+ default:
+ return LINEBREAK_UNDEFINED; /* Table lookup is needed */
+ }
+}
+
+/**
+ * Tells the line break opportunity by table lookup.
+ *
+ * @param[in,out] lbpCtx pointer to the line breaking context
+ * @pre \a lbpCtx->lbcCur has the current line break
+ * class; \a lbpCtx->lbcLast has the line break
+ * class for the last character; and \a
+ * lbcCur->lbcNew has the line break class for
+ * the next character
+ * @post \a lbpCtx->lbcCur has the updated line break
+ * class
+ * @return break result, one of #LINEBREAK_MUSTBREAK,
+ * #LINEBREAK_ALLOWBREAK, and #LINEBREAK_NOBREAK
+ */
+static int get_lb_result_lookup(
+ struct LineBreakContext* lbpCtx)
+{
+ /* TODO: Rule LB21a, as introduced by Revision 28 of UAX#14, is not
+ * yet implemented below. */
+ int brk = LINEBREAK_UNDEFINED;
+ assert(lbpCtx->lbcCur <= LBP_RI);
+ assert(lbpCtx->lbcNew <= LBP_RI);
+ switch (baTable[lbpCtx->lbcCur - 1][lbpCtx->lbcNew - 1])
+ {
+ case DIR_BRK:
+ brk = LINEBREAK_ALLOWBREAK;
+ break;
+ case CMI_BRK:
+ case IND_BRK:
+ brk = (lbpCtx->lbcLast == LBP_SP)
+ ? LINEBREAK_ALLOWBREAK
+ : LINEBREAK_NOBREAK;
+ break;
+ case CMP_BRK:
+ brk = LINEBREAK_NOBREAK;
+ if (lbpCtx->lbcLast != LBP_SP)
+ return brk; /* Do not update lbcCur */
+ break;
+ case PRH_BRK:
+ brk = LINEBREAK_NOBREAK;
+ break;
+ }
+ lbpCtx->lbcCur = lbpCtx->lbcNew;
+ return brk;
+}
+
+/**
+ * Initializes line breaking context for a given language.
+ *
+ * @param[in,out] lbpCtx pointer to the line breaking context
+ * @param[in] ch the first character to process
+ * @param[in] lang language of the input
+ * @post the line breaking context is initialized
+ */
+void lb_init_break_context(
+ struct LineBreakContext* lbpCtx,
+ utf32_t ch,
+ const char* lang)
+{
+ lbpCtx->lang = lang;
+ lbpCtx->lbpLang = get_lb_prop_lang(lang);
+ lbpCtx->lbcLast = LBP_Undefined;
+ lbpCtx->lbcNew = LBP_Undefined;
+ lbpCtx->lbcCur = resolve_lb_class(
+ get_char_lb_class_lang(ch, lbpCtx->lbpLang),
+ lbpCtx->lang);
+ treat_first_char(lbpCtx);
+}
+
+/**
+ * Updates LineBreakingContext for the next code point and returns
+ * the detected break.
+ *
+ * @param[in,out] lbpCtx pointer to the line breaking context
+ * @param[in] ch Unicode code point
+ * @return break result, one of #LINEBREAK_MUSTBREAK,
+ * #LINEBREAK_ALLOWBREAK, and #LINEBREAK_NOBREAK
+ * @post the line breaking context is updated
+ */
+int lb_process_next_char(
+ struct LineBreakContext* lbpCtx,
+ utf32_t ch )
{
- switch (lbc)
- {
- case LBP_AI:
- if (lang != NULL &&
- (strncmp(lang, "zh", 2) == 0 || /* Chinese */
- strncmp(lang, "ja", 2) == 0 || /* Japanese */
- strncmp(lang, "ko", 2) == 0)) /* Korean */
- {
- return LBP_ID;
- }
- /* Fall through */
- case LBP_SA:
- case LBP_SG:
- case LBP_XX:
- return LBP_AL;
- default:
- return lbc;
- }
+ int brk;
+
+ lbpCtx->lbcLast = lbpCtx->lbcNew;
+ lbpCtx->lbcNew = get_char_lb_class_lang(ch, lbpCtx->lbpLang);
+ brk = get_lb_result_simple(lbpCtx);
+ switch (brk)
+ {
+ case LINEBREAK_MUSTBREAK:
+ lbpCtx->lbcCur = resolve_lb_class(lbpCtx->lbcNew, lbpCtx->lang);
+ treat_first_char(lbpCtx);
+ break;
+ case LINEBREAK_UNDEFINED:
+ lbpCtx->lbcNew = resolve_lb_class(lbpCtx->lbcNew, lbpCtx->lang);
+ brk = get_lb_result_lookup(lbpCtx);
+ break;
+ default:
+ break;
+ }
+ return brk;
}
/**
* be advanced to the next complete character, unless the end of string
* is reached in the middle of a UTF-8 sequence.
*
- * @param[in] s input UTF-8 string
- * @param[in] len length of the string in bytes
- * @param[in,out] ip pointer to the index
- * @return the Unicode character beginning at the index; or
- * #EOS if end of input is encountered
+ * @param[in] s input UTF-8 string
+ * @param[in] len length of the string in bytes
+ * @param[in,out] ip pointer to the index
+ * @return the Unicode character beginning at the index; or
+ * #EOS if end of input is encountered
*/
utf32_t lb_get_next_char_utf8(
- const utf8_t *s,
- size_t len,
- size_t *ip)
+ const utf8_t *s,
+ size_t len,
+ size_t *ip)
{
- utf8_t ch;
- utf32_t res;
-
- assert(*ip <= len);
- if (*ip == len)
- return EOS;
- ch = s[*ip];
-
- if (ch < 0xC2 || ch > 0xF4)
- { /* One-byte sequence, tail (should not occur), or invalid */
- *ip += 1;
- return ch;
- }
- else if (ch < 0xE0)
- { /* Two-byte sequence */
- if (*ip + 2 > len)
- return EOS;
- res = ((ch & 0x1F) << 6) + (s[*ip + 1] & 0x3F);
- *ip += 2;
- return res;
- }
- else if (ch < 0xF0)
- { /* Three-byte sequence */
- if (*ip + 3 > len)
- return EOS;
- res = ((ch & 0x0F) << 12) +
- ((s[*ip + 1] & 0x3F) << 6) +
- ((s[*ip + 2] & 0x3F));
- *ip += 3;
- return res;
- }
- else
- { /* Four-byte sequence */
- if (*ip + 4 > len)
- return EOS;
- res = ((ch & 0x07) << 18) +
- ((s[*ip + 1] & 0x3F) << 12) +
- ((s[*ip + 2] & 0x3F) << 6) +
- ((s[*ip + 3] & 0x3F));
- *ip += 4;
- return res;
- }
+ utf8_t ch;
+ utf32_t res;
+
+ assert(*ip <= len);
+ if (*ip == len)
+ return EOS;
+ ch = s[*ip];
+
+ if (ch < 0xC2 || ch > 0xF4)
+ { /* One-byte sequence, tail (should not occur), or invalid */
+ *ip += 1;
+ return ch;
+ }
+ else if (ch < 0xE0)
+ { /* Two-byte sequence */
+ if (*ip + 2 > len)
+ return EOS;
+ res = ((ch & 0x1F) << 6) + (s[*ip + 1] & 0x3F);
+ *ip += 2;
+ return res;
+ }
+ else if (ch < 0xF0)
+ { /* Three-byte sequence */
+ if (*ip + 3 > len)
+ return EOS;
+ res = ((ch & 0x0F) << 12) +
+ ((s[*ip + 1] & 0x3F) << 6) +
+ ((s[*ip + 2] & 0x3F));
+ *ip += 3;
+ return res;
+ }
+ else
+ { /* Four-byte sequence */
+ if (*ip + 4 > len)
+ return EOS;
+ res = ((ch & 0x07) << 18) +
+ ((s[*ip + 1] & 0x3F) << 12) +
+ ((s[*ip + 2] & 0x3F) << 6) +
+ ((s[*ip + 3] & 0x3F));
+ *ip += 4;
+ return res;
+ }
}
/**
* be advanced to the next complete character, unless the end of string
* is reached in the middle of a UTF-16 surrogate pair.
*
- * @param[in] s input UTF-16 string
- * @param[in] len length of the string in words
- * @param[in,out] ip pointer to the index
- * @return the Unicode character beginning at the index; or
- * #EOS if end of input is encountered
+ * @param[in] s input UTF-16 string
+ * @param[in] len length of the string in words
+ * @param[in,out] ip pointer to the index
+ * @return the Unicode character beginning at the index; or
+ * #EOS if end of input is encountered
*/
utf32_t lb_get_next_char_utf16(
- const utf16_t *s,
- size_t len,
- size_t *ip)
+ const utf16_t *s,
+ size_t len,
+ size_t *ip)
{
- utf16_t ch;
-
- assert(*ip <= len);
- if (*ip == len)
- return EOS;
- ch = s[(*ip)++];
-
- if (ch < 0xD800 || ch > 0xDBFF)
- { /* If the character is not a high surrogate */
- return ch;
- }
- if (*ip == len)
- { /* If the input ends here (an error) */
- --(*ip);
- return EOS;
- }
- if (s[*ip] < 0xDC00 || s[*ip] > 0xDFFF)
- { /* If the next character is not the low surrogate (an error) */
- return ch;
- }
- /* Return the constructed character and advance the index again */
- return (((utf32_t)ch & 0x3FF) << 10) + (s[(*ip)++] & 0x3FF) + 0x10000;
+ utf16_t ch;
+
+ assert(*ip <= len);
+ if (*ip == len)
+ return EOS;
+ ch = s[(*ip)++];
+
+ if (ch < 0xD800 || ch > 0xDBFF)
+ { /* If the character is not a high surrogate */
+ return ch;
+ }
+ if (*ip == len)
+ { /* If the input ends here (an error) */
+ --(*ip);
+ return EOS;
+ }
+ if (s[*ip] < 0xDC00 || s[*ip] > 0xDFFF)
+ { /* If the next character is not the low surrogate (an error) */
+ return ch;
+ }
+ /* Return the constructed character and advance the index again */
+ return (((utf32_t)ch & 0x3FF) << 10) + (s[(*ip)++] & 0x3FF) + 0x10000;
}
/**
* Gets the next Unicode character in a UTF-32 sequence. The index will
* be advanced to the next character.
*
- * @param[in] s input UTF-32 string
- * @param[in] len length of the string in dwords
- * @param[in,out] ip pointer to the index
- * @return the Unicode character beginning at the index; or
- * #EOS if end of input is encountered
+ * @param[in] s input UTF-32 string
+ * @param[in] len length of the string in dwords
+ * @param[in,out] ip pointer to the index
+ * @return the Unicode character beginning at the index; or
+ * #EOS if end of input is encountered
*/
utf32_t lb_get_next_char_utf32(
- const utf32_t *s,
- size_t len,
- size_t *ip)
+ const utf32_t *s,
+ size_t len,
+ size_t *ip)
{
- assert(*ip <= len);
- if (*ip == len)
- return EOS;
- return s[(*ip)++];
+ assert(*ip <= len);
+ if (*ip == len)
+ return EOS;
+ return s[(*ip)++];
}
/**
* Sets the line breaking information for a generic input string.
*
- * @param[in] s input string
- * @param[in] len length of the input
- * @param[in] lang language of the input
- * @param[out] brks pointer to the output breaking data,
- * containing #LINEBREAK_MUSTBREAK,
- * #LINEBREAK_ALLOWBREAK, #LINEBREAK_NOBREAK,
- * or #LINEBREAK_INSIDEACHAR
- * @param[in] get_next_char function to get the next UTF-32 character
+ * @param[in] s input string
+ * @param[in] len length of the input
+ * @param[in] lang language of the input
+ * @param[out] brks pointer to the output breaking data,
+ * containing #LINEBREAK_MUSTBREAK,
+ * #LINEBREAK_ALLOWBREAK, #LINEBREAK_NOBREAK,
+ * or #LINEBREAK_INSIDEACHAR
+ * @param[in] get_next_char function to get the next UTF-32 character
*/
void set_linebreaks(
- const void *s,
- size_t len,
- const char *lang,
- char *brks,
- get_next_char_t get_next_char)
+ const void *s,
+ size_t len,
+ const char *lang,
+ char *brks,
+ get_next_char_t get_next_char)
{
- utf32_t ch;
- enum LineBreakClass lbcCur;
- enum LineBreakClass lbcNew;
- enum LineBreakClass lbcLast;
- struct LineBreakProperties *lbpLang;
- size_t posCur = 0;
- size_t posLast = 0;
- // TIZEN ONLY : (2013.08.19) for special processing at Zero-width space character
- int zw_flag = 0;
- //
-
- --posLast; /* To be ++'d later */
- ch = get_next_char(s, len, &posCur);
- if (ch == EOS)
- return;
- lbpLang = get_lb_prop_lang(lang);
- lbcCur = resolve_lb_class(get_char_lb_class_lang(ch, lbpLang), lang);
- lbcNew = LBP_Undefined;
-
-nextline:
-
- /* Special treatment for the first character */
- switch (lbcCur)
- {
- case LBP_LF:
- case LBP_NL:
- lbcCur = LBP_BK;
- break;
- case LBP_CB:
- lbcCur = LBP_BA;
- break;
- case LBP_SP:
- lbcCur = LBP_WJ;
- break;
- default:
- break;
- }
-
- /* Process a line till an explicit break or end of string */
- for (;;)
- {
- for (++posLast; posLast < posCur - 1; ++posLast)
- {
- brks[posLast] = LINEBREAK_INSIDEACHAR;
- }
- assert(posLast == posCur - 1);
- lbcLast = lbcNew;
- ch = get_next_char(s, len, &posCur);
- if (ch == EOS)
- break;
- lbcNew = get_char_lb_class_lang(ch, lbpLang);
- if (lbcCur == LBP_BK || (lbcCur == LBP_CR && lbcNew != LBP_LF))
- {
- brks[posLast] = LINEBREAK_MUSTBREAK;
- lbcCur = resolve_lb_class(lbcNew, lang);
- goto nextline;
- }
-
- // TIZEN ONLY : (2013.08.19) for special processing at Zero-width space character
- /*
- switch (lbcNew)
- {
- case LBP_SP:
- brks[posLast] = LINEBREAK_NOBREAK;
- continue;
- case LBP_BK:
- case LBP_LF:
- case LBP_NL:
- brks[posLast] = LINEBREAK_NOBREAK;
- lbcCur = LBP_BK;
- continue;
- case LBP_CR:
- brks[posLast] = LINEBREAK_NOBREAK;
- lbcCur = LBP_CR;
- continue;
- case LBP_CB:
- brks[posLast] = LINEBREAK_ALLOWBREAK;
- lbcCur = LBP_BA;
- continue;
- default:
- break;
- }
-
- lbcNew = resolve_lb_class(lbcNew, lang);
-
- assert(lbcCur <= LBP_JT);
- assert(lbcNew <= LBP_JT);
- switch (baTable[lbcCur - 1][lbcNew - 1])
- {
- case DIR_BRK:
- brks[posLast] = LINEBREAK_ALLOWBREAK;
- break;
- case CMI_BRK:
- case IND_BRK:
- if (lbcLast == LBP_SP)
- {
- brks[posLast] = LINEBREAK_ALLOWBREAK;
- }
- else
- {
- brks[posLast] = LINEBREAK_NOBREAK;
- }
- break;
- case CMP_BRK:
- brks[posLast] = LINEBREAK_NOBREAK;
- if (lbcLast != LBP_SP)
- continue;
- break;
- case PRH_BRK:
- brks[posLast] = LINEBREAK_NOBREAK;
- break;
- }
-
- lbcCur = lbcNew;
- */
-
- // TIZEN ONLY - START
- if (lbcCur == LBP_ZW && !zw_flag)
- {
- zw_flag = 1;
- posLast = -1;
- posCur = 0;
- ch = get_next_char(s, len, &posCur);
- lbcCur = resolve_lb_class(get_char_lb_class_lang(ch, lbpLang), lang);
- lbcNew = LBP_Undefined;
- goto nextline;
- }
- else if (zw_flag)
- {
- if (lbcCur == LBP_ZW)
- brks[posLast] = LINEBREAK_ALLOWBREAK;
- else
- brks[posLast] = LINEBREAK_NOBREAK;
- lbcCur = lbcNew;
- }
- else
- {
- // TIZEN ONLY(20131106): For Hangul word wrap
- switch (lbcCur)
- {
- case LBP_H2: /**< Hangul LV */
- case LBP_H3: /**< Hangul LVT */
- case LBP_JL: /**< Hangul L Jamo */
- case LBP_JV: /**< Hangul V Jamo */
- case LBP_JT: /**< Hangul T Jamo */
- lbcCur = LBP_AL;
- break;
- default:
- break;
- }
-
- switch (lbcNew)
- {
- case LBP_H2: /**< Hangul LV */
- case LBP_H3: /**< Hangul LVT */
- case LBP_JL: /**< Hangul L Jamo */
- case LBP_JV: /**< Hangul V Jamo */
- case LBP_JT: /**< Hangul T Jamo */
- lbcNew = LBP_AL;
- break;
- default:
- break;
- }
- //
-
- switch (lbcNew)
- {
- case LBP_SP:
- brks[posLast] = LINEBREAK_NOBREAK;
- continue;
- case LBP_BK:
- case LBP_LF:
- case LBP_NL:
- brks[posLast] = LINEBREAK_NOBREAK;
- lbcCur = LBP_BK;
- continue;
- case LBP_CR:
- brks[posLast] = LINEBREAK_NOBREAK;
- lbcCur = LBP_CR;
- continue;
- case LBP_CB:
- brks[posLast] = LINEBREAK_ALLOWBREAK;
- lbcCur = LBP_BA;
- continue;
- default:
- break;
- }
-
- lbcNew = resolve_lb_class(lbcNew, lang);
-
- assert(lbcCur <= LBP_JT);
- assert(lbcNew <= LBP_JT);
- switch (baTable[lbcCur - 1][lbcNew - 1])
- {
- case DIR_BRK:
- brks[posLast] = LINEBREAK_ALLOWBREAK;
- break;
- case CMI_BRK:
- case IND_BRK:
- if (lbcLast == LBP_SP)
- {
- brks[posLast] = LINEBREAK_ALLOWBREAK;
- }
- else
- {
- brks[posLast] = LINEBREAK_NOBREAK;
- }
- break;
- case CMP_BRK:
- brks[posLast] = LINEBREAK_NOBREAK;
- if (lbcLast != LBP_SP)
- continue;
- break;
- case PRH_BRK:
- brks[posLast] = LINEBREAK_NOBREAK;
- break;
- }
- lbcCur = lbcNew;
- }
- // TIZEN ONLY - END
- }
-
- assert(posLast == posCur - 1 && posCur <= len);
- /* Break after the last character */
- brks[posLast] = LINEBREAK_MUSTBREAK;
- /* When the input contains incomplete sequences */
- while (posCur < len)
- {
- brks[posCur++] = LINEBREAK_INSIDEACHAR;
- }
+ utf32_t ch;
+ struct LineBreakContext lbCtx;
+ size_t posCur = 0;
+ size_t posLast = 0;
+
+ --posLast; /* To be ++'d later */
+ ch = get_next_char(s, len, &posCur);
+ if (ch == EOS)
+ return;
+ lb_init_break_context(&lbCtx, ch, lang);
+
+ /* Process a line till an explicit break or end of string */
+ for (;;)
+ {
+ for (++posLast; posLast < posCur - 1; ++posLast)
+ {
+ brks[posLast] = LINEBREAK_INSIDEACHAR;
+ }
+ assert(posLast == posCur - 1);
+ ch = get_next_char(s, len, &posCur);
+ if (ch == EOS)
+ break;
+ brks[posLast] = lb_process_next_char(&lbCtx, ch);
+ }
+
+ assert(posLast == posCur - 1 && posCur <= len);
+ /* Break after the last character */
+ brks[posLast] = LINEBREAK_MUSTBREAK;
+ /* When the input contains incomplete sequences */
+ while (posCur < len)
+ {
+ brks[posCur++] = LINEBREAK_INSIDEACHAR;
+ }
}
/**
* Sets the line breaking information for a UTF-8 input string.
*
- * @param[in] s input UTF-8 string
- * @param[in] len length of the input
- * @param[in] lang language of the input
- * @param[out] brks pointer to the output breaking data, containing
- * #LINEBREAK_MUSTBREAK, #LINEBREAK_ALLOWBREAK,
- * #LINEBREAK_NOBREAK, or #LINEBREAK_INSIDEACHAR
+ * @param[in] s input UTF-8 string
+ * @param[in] len length of the input
+ * @param[in] lang language of the input
+ * @param[out] brks pointer to the output breaking data, containing
+ * #LINEBREAK_MUSTBREAK, #LINEBREAK_ALLOWBREAK,
+ * #LINEBREAK_NOBREAK, or #LINEBREAK_INSIDEACHAR
*/
void set_linebreaks_utf8(
- const utf8_t *s,
- size_t len,
- const char *lang,
- char *brks)
+ const utf8_t *s,
+ size_t len,
+ const char *lang,
+ char *brks)
{
- set_linebreaks(s, len, lang, brks,
- (get_next_char_t)lb_get_next_char_utf8);
+ set_linebreaks(s, len, lang, brks,
+ (get_next_char_t)lb_get_next_char_utf8);
}
/**
* Sets the line breaking information for a UTF-16 input string.
*
- * @param[in] s input UTF-16 string
- * @param[in] len length of the input
- * @param[in] lang language of the input
- * @param[out] brks pointer to the output breaking data, containing
- * #LINEBREAK_MUSTBREAK, #LINEBREAK_ALLOWBREAK,
- * #LINEBREAK_NOBREAK, or #LINEBREAK_INSIDEACHAR
+ * @param[in] s input UTF-16 string
+ * @param[in] len length of the input
+ * @param[in] lang language of the input
+ * @param[out] brks pointer to the output breaking data, containing
+ * #LINEBREAK_MUSTBREAK, #LINEBREAK_ALLOWBREAK,
+ * #LINEBREAK_NOBREAK, or #LINEBREAK_INSIDEACHAR
*/
void set_linebreaks_utf16(
- const utf16_t *s,
- size_t len,
- const char *lang,
- char *brks)
+ const utf16_t *s,
+ size_t len,
+ const char *lang,
+ char *brks)
{
- set_linebreaks(s, len, lang, brks,
- (get_next_char_t)lb_get_next_char_utf16);
+ set_linebreaks(s, len, lang, brks,
+ (get_next_char_t)lb_get_next_char_utf16);
}
/**
* Sets the line breaking information for a UTF-32 input string.
*
- * @param[in] s input UTF-32 string
- * @param[in] len length of the input
- * @param[in] lang language of the input
- * @param[out] brks pointer to the output breaking data, containing
- * #LINEBREAK_MUSTBREAK, #LINEBREAK_ALLOWBREAK,
- * #LINEBREAK_NOBREAK, or #LINEBREAK_INSIDEACHAR
+ * @param[in] s input UTF-32 string
+ * @param[in] len length of the input
+ * @param[in] lang language of the input
+ * @param[out] brks pointer to the output breaking data, containing
+ * #LINEBREAK_MUSTBREAK, #LINEBREAK_ALLOWBREAK,
+ * #LINEBREAK_NOBREAK, or #LINEBREAK_INSIDEACHAR
*/
void set_linebreaks_utf32(
- const utf32_t *s,
- size_t len,
- const char *lang,
- char *brks)
+ const utf32_t *s,
+ size_t len,
+ const char *lang,
+ char *brks)
{
- set_linebreaks(s, len, lang, brks,
- (get_next_char_t)lb_get_next_char_utf32);
+ set_linebreaks(s, len, lang, brks,
+ (get_next_char_t)lb_get_next_char_utf32);
}
/**
* complicated cases involving combining marks, spaces, etc. cannot be
* correctly processed.
*
- * @param char1 the first Unicode character
- * @param char2 the second Unicode character
- * @param lang language of the input
- * @return one of #LINEBREAK_MUSTBREAK, #LINEBREAK_ALLOWBREAK,
- * #LINEBREAK_NOBREAK, or #LINEBREAK_INSIDEACHAR
+ * @param char1 the first Unicode character
+ * @param char2 the second Unicode character
+ * @param lang language of the input
+ * @return one of #LINEBREAK_MUSTBREAK, #LINEBREAK_ALLOWBREAK,
+ * #LINEBREAK_NOBREAK, or #LINEBREAK_INSIDEACHAR
*/
int is_line_breakable(
- utf32_t char1,
- utf32_t char2,
- const char* lang)
+ utf32_t char1,
+ utf32_t char2,
+ const char* lang)
{
- utf32_t s[2];
- char brks[2];
- s[0] = char1;
- s[1] = char2;
- set_linebreaks_utf32(s, 2, lang, brks);
- return brks[0];
+ utf32_t s[2];
+ char brks[2];
+ s[0] = char1;
+ s[1] = char2;
+ set_linebreaks_utf32(s, 2, lang, brks);
+ return brks[0];
}
-/* vim: set tabstop=4 shiftwidth=4: */
+/* vim: set expandtab tabstop=4 softtabstop=4 shiftwidth=4: */
/*
* Line breaking in a Unicode sequence. Designed to be used in a
* generic text renderer.
*
- * Copyright (C) 2008-2010 Wu Yongwei <wuyongwei at gmail dot com>
+ * Copyright (C) 2008-2012 Wu Yongwei <wuyongwei at gmail dot com>
*
* This software is provided 'as-is', without any express or implied
* warranty. In no event will the author be held liable for any damages
* distribution.
*
* The main reference is Unicode Standard Annex 14 (UAX #14):
- * <URL:http://www.unicode.org/reports/tr14/>
+ * <URL:http://www.unicode.org/reports/tr14/>
*
* When this library was designed, this annex was at Revision 19, for
* Unicode 5.0.0:
- * <URL:http://www.unicode.org/reports/tr14/tr14-19.html>
+ * <URL:http://www.unicode.org/reports/tr14/tr14-19.html>
*
- * This library has been updated according to Revision 24, for
- * Unicode 5.2.0:
- * <URL:http://www.unicode.org/reports/tr14/tr14-24.html>
+ * This library has been updated according to Revision 30, for
+ * Unicode 6.2.0:
+ * <URL:http://www.unicode.org/reports/tr14/tr14-30.html>
*
* The Unicode Terms of Use are available at
- * <URL:http://www.unicode.org/copyright.html>
+ * <URL:http://www.unicode.org/copyright.html>
*/
/**
- * @file linebreak.h
+ * @file linebreak.h
*
* Header file for the line breaking algorithm.
*
- * @version 2.0, 2010/01/03
- * @author Wu Yongwei
+ * @version 2.2, 2012/10/06
+ * @author Wu Yongwei
*/
#ifndef LINEBREAK_H
extern "C" {
#endif
-#define LINEBREAK_VERSION 0x0200 /**< Version of the library linebreak */
+#define LINEBREAK_VERSION 0x0202 /**< Version of the library linebreak */
extern const int linebreak_version;
#ifndef LINEBREAK_UTF_TYPES_DEFINED
#define LINEBREAK_UTF_TYPES_DEFINED
-typedef unsigned char utf8_t; /**< Type for UTF-8 data points */
-typedef unsigned short utf16_t; /**< Type for UTF-16 data points */
-typedef unsigned int utf32_t; /**< Type for UTF-32 data points */
+typedef unsigned char utf8_t; /**< Type for UTF-8 data points */
+typedef unsigned short utf16_t; /**< Type for UTF-16 data points */
+typedef unsigned int utf32_t; /**< Type for UTF-32 data points */
#endif
-#define LINEBREAK_MUSTBREAK 0 /**< Break is mandatory */
-#define LINEBREAK_ALLOWBREAK 1 /**< Break is allowed */
-#define LINEBREAK_NOBREAK 2 /**< No break is possible */
-#define LINEBREAK_INSIDEACHAR 3 /**< A UTF-8/16 sequence is unfinished */
+#define LINEBREAK_MUSTBREAK 0 /**< Break is mandatory */
+#define LINEBREAK_ALLOWBREAK 1 /**< Break is allowed */
+#define LINEBREAK_NOBREAK 2 /**< No break is possible */
+#define LINEBREAK_INSIDEACHAR 3 /**< A UTF-8/16 sequence is unfinished */
void init_linebreak(void);
void set_linebreaks_utf8(
- const utf8_t *s, size_t len, const char* lang, char *brks);
+ const utf8_t *s, size_t len, const char* lang, char *brks);
void set_linebreaks_utf16(
- const utf16_t *s, size_t len, const char* lang, char *brks);
+ const utf16_t *s, size_t len, const char* lang, char *brks);
void set_linebreaks_utf32(
- const utf32_t *s, size_t len, const char* lang, char *brks);
+ const utf32_t *s, size_t len, const char* lang, char *brks);
int is_line_breakable(utf32_t char1, utf32_t char2, const char* lang);
#ifdef __cplusplus
/* The content of this file is generated from:
-# LineBreak-6.0.0.txt
-# Date: 2010-08-18, 17:25:00 PDT [KW]
+# LineBreak-7.0.0.txt
+# Date: 2014-02-28, 23:15:00 GMT [KW, LI]
*/
#include "linebreak.h"
{ 0x0363, 0x036F, LBP_CM },
{ 0x0370, 0x037D, LBP_AL },
{ 0x037E, 0x037E, LBP_IS },
- { 0x0384, 0x0482, LBP_AL },
+ { 0x037F, 0x0482, LBP_AL },
{ 0x0483, 0x0489, LBP_CM },
{ 0x048A, 0x0587, LBP_AL },
{ 0x0589, 0x0589, LBP_IS },
{ 0x058A, 0x058A, LBP_BA },
+ { 0x058D, 0x058E, LBP_AL },
+ { 0x058F, 0x058F, LBP_PR },
{ 0x0591, 0x05BD, LBP_CM },
{ 0x05BE, 0x05BE, LBP_BA },
{ 0x05BF, 0x05BF, LBP_CM },
{ 0x05C4, 0x05C5, LBP_CM },
{ 0x05C6, 0x05C6, LBP_EX },
{ 0x05C7, 0x05C7, LBP_CM },
- { 0x05D0, 0x0608, LBP_AL },
+ { 0x05D0, 0x05F2, LBP_HL },
+ { 0x05F3, 0x0608, LBP_AL },
{ 0x0609, 0x060B, LBP_PO },
{ 0x060C, 0x060D, LBP_IS },
{ 0x060E, 0x060F, LBP_AL },
{ 0x0610, 0x061A, LBP_CM },
- { 0x061B, 0x061F, LBP_EX },
+ { 0x061B, 0x061B, LBP_EX },
+ { 0x061C, 0x061C, LBP_CM },
+ { 0x061E, 0x061F, LBP_EX },
{ 0x0620, 0x064A, LBP_AL },
{ 0x064B, 0x065F, LBP_CM },
{ 0x0660, 0x0669, LBP_NU },
{ 0x0829, 0x082D, LBP_CM },
{ 0x0830, 0x0858, LBP_AL },
{ 0x0859, 0x085B, LBP_CM },
- { 0x085E, 0x085E, LBP_AL },
- { 0x0900, 0x0903, LBP_CM },
+ { 0x085E, 0x08B2, LBP_AL },
+ { 0x08E4, 0x0903, LBP_CM },
{ 0x0904, 0x0939, LBP_AL },
{ 0x093A, 0x093C, LBP_CM },
{ 0x093D, 0x093D, LBP_AL },
{ 0x0962, 0x0963, LBP_CM },
{ 0x0964, 0x0965, LBP_BA },
{ 0x0966, 0x096F, LBP_NU },
- { 0x0970, 0x097F, LBP_AL },
+ { 0x0970, 0x0980, LBP_AL },
{ 0x0981, 0x0983, LBP_CM },
{ 0x0985, 0x09B9, LBP_AL },
{ 0x09BC, 0x09BC, LBP_CM },
{ 0x0AD0, 0x0AE1, LBP_AL },
{ 0x0AE2, 0x0AE3, LBP_CM },
{ 0x0AE6, 0x0AEF, LBP_NU },
+ { 0x0AF0, 0x0AF0, LBP_AL },
{ 0x0AF1, 0x0AF1, LBP_PR },
{ 0x0B01, 0x0B03, LBP_CM },
{ 0x0B05, 0x0B39, LBP_AL },
{ 0x0BF0, 0x0BF8, LBP_AL },
{ 0x0BF9, 0x0BF9, LBP_PR },
{ 0x0BFA, 0x0BFA, LBP_AL },
- { 0x0C01, 0x0C03, LBP_CM },
+ { 0x0C00, 0x0C03, LBP_CM },
{ 0x0C05, 0x0C3D, LBP_AL },
{ 0x0C3E, 0x0C56, LBP_CM },
{ 0x0C58, 0x0C61, LBP_AL },
{ 0x0C62, 0x0C63, LBP_CM },
{ 0x0C66, 0x0C6F, LBP_NU },
{ 0x0C78, 0x0C7F, LBP_AL },
- { 0x0C82, 0x0C83, LBP_CM },
+ { 0x0C81, 0x0C83, LBP_CM },
{ 0x0C85, 0x0CB9, LBP_AL },
{ 0x0CBC, 0x0CBC, LBP_CM },
{ 0x0CBD, 0x0CBD, LBP_AL },
{ 0x0CE2, 0x0CE3, LBP_CM },
{ 0x0CE6, 0x0CEF, LBP_NU },
{ 0x0CF1, 0x0CF2, LBP_AL },
- { 0x0D02, 0x0D03, LBP_CM },
+ { 0x0D01, 0x0D03, LBP_CM },
{ 0x0D05, 0x0D3D, LBP_AL },
{ 0x0D3E, 0x0D4D, LBP_CM },
{ 0x0D4E, 0x0D4E, LBP_AL },
{ 0x0D7A, 0x0D7F, LBP_AL },
{ 0x0D82, 0x0D83, LBP_CM },
{ 0x0D85, 0x0DC6, LBP_AL },
- { 0x0DCA, 0x0DF3, LBP_CM },
+ { 0x0DCA, 0x0DDF, LBP_CM },
+ { 0x0DE6, 0x0DEF, LBP_NU },
+ { 0x0DF2, 0x0DF3, LBP_CM },
{ 0x0DF4, 0x0DF4, LBP_AL },
{ 0x0E01, 0x0E3A, LBP_SA },
{ 0x0E3F, 0x0E3F, LBP_PR },
{ 0x0E5A, 0x0E5B, LBP_BA },
{ 0x0E81, 0x0ECD, LBP_SA },
{ 0x0ED0, 0x0ED9, LBP_NU },
- { 0x0EDC, 0x0EDD, LBP_SA },
+ { 0x0EDC, 0x0EDF, LBP_SA },
{ 0x0F00, 0x0F00, LBP_AL },
{ 0x0F01, 0x0F04, LBP_BB },
{ 0x0F05, 0x0F05, LBP_AL },
{ 0x1050, 0x108F, LBP_SA },
{ 0x1090, 0x1099, LBP_NU },
{ 0x109A, 0x109F, LBP_SA },
- { 0x10A0, 0x10FC, LBP_AL },
+ { 0x10A0, 0x10FF, LBP_AL },
{ 0x1100, 0x115F, LBP_JL },
{ 0x1160, 0x11A7, LBP_JV },
{ 0x11A8, 0x11FF, LBP_JT },
{ 0x1810, 0x1819, LBP_NU },
{ 0x1820, 0x18A8, LBP_AL },
{ 0x18A9, 0x18A9, LBP_CM },
- { 0x18AA, 0x191C, LBP_AL },
+ { 0x18AA, 0x191E, LBP_AL },
{ 0x1920, 0x193B, LBP_CM },
{ 0x1940, 0x1940, LBP_AL },
{ 0x1944, 0x1945, LBP_EX },
{ 0x1A7F, 0x1A7F, LBP_CM },
{ 0x1A80, 0x1A99, LBP_NU },
{ 0x1AA0, 0x1AAD, LBP_SA },
- { 0x1B00, 0x1B04, LBP_CM },
+ { 0x1AB0, 0x1B04, LBP_CM },
{ 0x1B05, 0x1B33, LBP_AL },
{ 0x1B34, 0x1B44, LBP_CM },
{ 0x1B45, 0x1B4B, LBP_AL },
{ 0x1B74, 0x1B7C, LBP_AL },
{ 0x1B80, 0x1B82, LBP_CM },
{ 0x1B83, 0x1BA0, LBP_AL },
- { 0x1BA1, 0x1BAA, LBP_CM },
+ { 0x1BA1, 0x1BAD, LBP_CM },
{ 0x1BAE, 0x1BAF, LBP_AL },
{ 0x1BB0, 0x1BB9, LBP_NU },
- { 0x1BC0, 0x1BE5, LBP_AL },
+ { 0x1BBA, 0x1BE5, LBP_AL },
{ 0x1BE6, 0x1BF3, LBP_CM },
{ 0x1BFC, 0x1C23, LBP_AL },
{ 0x1C24, 0x1C37, LBP_CM },
{ 0x1C50, 0x1C59, LBP_NU },
{ 0x1C5A, 0x1C7D, LBP_AL },
{ 0x1C7E, 0x1C7F, LBP_BA },
+ { 0x1CC0, 0x1CC7, LBP_AL },
{ 0x1CD0, 0x1CD2, LBP_CM },
{ 0x1CD3, 0x1CD3, LBP_AL },
{ 0x1CD4, 0x1CE8, LBP_CM },
{ 0x1CE9, 0x1CEC, LBP_AL },
{ 0x1CED, 0x1CED, LBP_CM },
{ 0x1CEE, 0x1CF1, LBP_AL },
- { 0x1CF2, 0x1CF2, LBP_CM },
+ { 0x1CF2, 0x1CF4, LBP_CM },
+ { 0x1CF5, 0x1CF6, LBP_AL },
+ { 0x1CF8, 0x1CF9, LBP_CM },
{ 0x1D00, 0x1DBF, LBP_AL },
{ 0x1DC0, 0x1DFF, LBP_CM },
{ 0x1E00, 0x1FFC, LBP_AL },
{ 0x205D, 0x205F, LBP_BA },
{ 0x2060, 0x2060, LBP_WJ },
{ 0x2061, 0x2064, LBP_AL },
- { 0x206A, 0x206F, LBP_CM },
+ { 0x2066, 0x206F, LBP_CM },
{ 0x2070, 0x2071, LBP_AL },
{ 0x2074, 0x2074, LBP_AI },
{ 0x2075, 0x207C, LBP_AL },
{ 0x20A7, 0x20A7, LBP_PO },
{ 0x20A8, 0x20B5, LBP_PR },
{ 0x20B6, 0x20B6, LBP_PO },
- { 0x20B7, 0x20B9, LBP_PR },
+ { 0x20B7, 0x20BA, LBP_PR },
+ { 0x20BB, 0x20BB, LBP_PO },
+ { 0x20BC, 0x20CF, LBP_PR },
{ 0x20D0, 0x20F0, LBP_CM },
{ 0x2100, 0x2102, LBP_AL },
{ 0x2103, 0x2103, LBP_PO },
{ 0x22A5, 0x22A5, LBP_AI },
{ 0x22A6, 0x22BE, LBP_AL },
{ 0x22BF, 0x22BF, LBP_AI },
- { 0x22C0, 0x2311, LBP_AL },
+ { 0x22C0, 0x2307, LBP_AL },
+ { 0x2308, 0x2308, LBP_OP },
+ { 0x2309, 0x2309, LBP_CL },
+ { 0x230A, 0x230A, LBP_OP },
+ { 0x230B, 0x230B, LBP_CL },
+ { 0x230C, 0x2311, LBP_AL },
{ 0x2312, 0x2312, LBP_AI },
- { 0x2313, 0x2328, LBP_AL },
+ { 0x2313, 0x2319, LBP_AL },
+ { 0x231A, 0x231B, LBP_ID },
+ { 0x231C, 0x2328, LBP_AL },
{ 0x2329, 0x2329, LBP_OP },
{ 0x232A, 0x232A, LBP_CL },
- { 0x232B, 0x244A, LBP_AL },
+ { 0x232B, 0x23EF, LBP_AL },
+ { 0x23F0, 0x23F3, LBP_ID },
+ { 0x23F4, 0x244A, LBP_AL },
{ 0x2460, 0x24FE, LBP_AI },
{ 0x24FF, 0x24FF, LBP_AL },
{ 0x2500, 0x254B, LBP_AI },
{ 0x25E2, 0x25E5, LBP_AI },
{ 0x25E6, 0x25EE, LBP_AL },
{ 0x25EF, 0x25EF, LBP_AI },
- { 0x25F0, 0x2604, LBP_AL },
+ { 0x25F0, 0x25FF, LBP_AL },
+ { 0x2600, 0x2603, LBP_ID },
+ { 0x2604, 0x2604, LBP_AL },
{ 0x2605, 0x2606, LBP_AI },
{ 0x2607, 0x2608, LBP_AL },
{ 0x2609, 0x2609, LBP_AI },
{ 0x260A, 0x260D, LBP_AL },
{ 0x260E, 0x260F, LBP_AI },
{ 0x2610, 0x2613, LBP_AL },
- { 0x2614, 0x2617, LBP_AI },
- { 0x2618, 0x261B, LBP_AL },
- { 0x261C, 0x261C, LBP_AI },
- { 0x261D, 0x261D, LBP_AL },
- { 0x261E, 0x261E, LBP_AI },
- { 0x261F, 0x263F, LBP_AL },
+ { 0x2614, 0x2615, LBP_ID },
+ { 0x2616, 0x2617, LBP_AI },
+ { 0x2618, 0x2618, LBP_ID },
+ { 0x2619, 0x2619, LBP_AL },
+ { 0x261A, 0x261F, LBP_ID },
+ { 0x2620, 0x2638, LBP_AL },
+ { 0x2639, 0x263B, LBP_ID },
+ { 0x263C, 0x263F, LBP_AL },
{ 0x2640, 0x2640, LBP_AI },
{ 0x2641, 0x2641, LBP_AL },
{ 0x2642, 0x2642, LBP_AI },
{ 0x2662, 0x2662, LBP_AL },
{ 0x2663, 0x2665, LBP_AI },
{ 0x2666, 0x2666, LBP_AL },
- { 0x2667, 0x266A, LBP_AI },
+ { 0x2667, 0x2667, LBP_AI },
+ { 0x2668, 0x2668, LBP_ID },
+ { 0x2669, 0x266A, LBP_AI },
{ 0x266B, 0x266B, LBP_AL },
{ 0x266C, 0x266D, LBP_AI },
{ 0x266E, 0x266E, LBP_AL },
{ 0x266F, 0x266F, LBP_AI },
- { 0x2670, 0x269D, LBP_AL },
+ { 0x2670, 0x267E, LBP_AL },
+ { 0x267F, 0x267F, LBP_ID },
+ { 0x2680, 0x269D, LBP_AL },
{ 0x269E, 0x269F, LBP_AI },
- { 0x26A0, 0x26BD, LBP_AL },
- { 0x26BE, 0x26BF, LBP_AI },
- { 0x26C0, 0x26C3, LBP_AL },
- { 0x26C4, 0x26CD, LBP_AI },
+ { 0x26A0, 0x26BC, LBP_AL },
+ { 0x26BD, 0x26C8, LBP_ID },
+ { 0x26C9, 0x26CC, LBP_AI },
+ { 0x26CD, 0x26CD, LBP_ID },
{ 0x26CE, 0x26CE, LBP_AL },
- { 0x26CF, 0x26E1, LBP_AI },
+ { 0x26CF, 0x26D1, LBP_ID },
+ { 0x26D2, 0x26D2, LBP_AI },
+ { 0x26D3, 0x26D4, LBP_ID },
+ { 0x26D5, 0x26D7, LBP_AI },
+ { 0x26D8, 0x26D9, LBP_ID },
+ { 0x26DA, 0x26DB, LBP_AI },
+ { 0x26DC, 0x26DC, LBP_ID },
+ { 0x26DD, 0x26DE, LBP_AI },
+ { 0x26DF, 0x26E1, LBP_ID },
{ 0x26E2, 0x26E2, LBP_AL },
{ 0x26E3, 0x26E3, LBP_AI },
{ 0x26E4, 0x26E7, LBP_AL },
- { 0x26E8, 0x26FF, LBP_AI },
- { 0x2701, 0x2756, LBP_AL },
+ { 0x26E8, 0x26E9, LBP_AI },
+ { 0x26EA, 0x26EA, LBP_ID },
+ { 0x26EB, 0x26F0, LBP_AI },
+ { 0x26F1, 0x26F5, LBP_ID },
+ { 0x26F6, 0x26F6, LBP_AI },
+ { 0x26F7, 0x26FA, LBP_ID },
+ { 0x26FB, 0x26FC, LBP_AI },
+ { 0x26FD, 0x2704, LBP_ID },
+ { 0x2705, 0x2707, LBP_AL },
+ { 0x2708, 0x270D, LBP_ID },
+ { 0x270E, 0x2756, LBP_AL },
{ 0x2757, 0x2757, LBP_AI },
{ 0x2758, 0x275A, LBP_AL },
- { 0x275B, 0x275E, LBP_QU },
- { 0x275F, 0x2761, LBP_AL },
+ { 0x275B, 0x2760, LBP_QU },
+ { 0x2761, 0x2761, LBP_AL },
{ 0x2762, 0x2763, LBP_EX },
{ 0x2764, 0x2767, LBP_AL },
{ 0x2768, 0x2768, LBP_OP },
{ 0x29FD, 0x29FD, LBP_CL },
{ 0x29FE, 0x2B54, LBP_AL },
{ 0x2B55, 0x2B59, LBP_AI },
- { 0x2C00, 0x2CEE, LBP_AL },
+ { 0x2B5A, 0x2CEE, LBP_AL },
{ 0x2CEF, 0x2CF1, LBP_CM },
+ { 0x2CF2, 0x2CF3, LBP_AL },
{ 0x2CF9, 0x2CF9, LBP_EX },
{ 0x2CFA, 0x2CFC, LBP_BA },
{ 0x2CFD, 0x2CFD, LBP_AL },
{ 0x2E2E, 0x2E2E, LBP_EX },
{ 0x2E2F, 0x2E2F, LBP_AL },
{ 0x2E30, 0x2E31, LBP_BA },
- { 0x2E80, 0x3000, LBP_ID },
+ { 0x2E32, 0x2E32, LBP_AL },
+ { 0x2E33, 0x2E34, LBP_BA },
+ { 0x2E35, 0x2E39, LBP_AL },
+ { 0x2E3A, 0x2E3B, LBP_B2 },
+ { 0x2E3C, 0x2E3E, LBP_BA },
+ { 0x2E3F, 0x2E3F, LBP_AL },
+ { 0x2E40, 0x2E41, LBP_BA },
+ { 0x2E42, 0x2E42, LBP_OP },
+ { 0x2E80, 0x2FFB, LBP_ID },
+ { 0x3000, 0x3000, LBP_BA },
{ 0x3001, 0x3002, LBP_CL },
{ 0x3003, 0x3004, LBP_ID },
{ 0x3005, 0x3005, LBP_NS },
{ 0x301E, 0x301F, LBP_CL },
{ 0x3020, 0x3029, LBP_ID },
{ 0x302A, 0x302F, LBP_CM },
- { 0x3030, 0x303A, LBP_ID },
+ { 0x3030, 0x3034, LBP_ID },
+ { 0x3035, 0x3035, LBP_CM },
+ { 0x3036, 0x303A, LBP_ID },
{ 0x303B, 0x303C, LBP_NS },
{ 0x303D, 0x303F, LBP_ID },
- { 0x3041, 0x3041, LBP_NS },
+ { 0x3041, 0x3041, LBP_CJ },
{ 0x3042, 0x3042, LBP_ID },
- { 0x3043, 0x3043, LBP_NS },
+ { 0x3043, 0x3043, LBP_CJ },
{ 0x3044, 0x3044, LBP_ID },
- { 0x3045, 0x3045, LBP_NS },
+ { 0x3045, 0x3045, LBP_CJ },
{ 0x3046, 0x3046, LBP_ID },
- { 0x3047, 0x3047, LBP_NS },
+ { 0x3047, 0x3047, LBP_CJ },
{ 0x3048, 0x3048, LBP_ID },
- { 0x3049, 0x3049, LBP_NS },
+ { 0x3049, 0x3049, LBP_CJ },
{ 0x304A, 0x3062, LBP_ID },
- { 0x3063, 0x3063, LBP_NS },
+ { 0x3063, 0x3063, LBP_CJ },
{ 0x3064, 0x3082, LBP_ID },
- { 0x3083, 0x3083, LBP_NS },
+ { 0x3083, 0x3083, LBP_CJ },
{ 0x3084, 0x3084, LBP_ID },
- { 0x3085, 0x3085, LBP_NS },
+ { 0x3085, 0x3085, LBP_CJ },
{ 0x3086, 0x3086, LBP_ID },
- { 0x3087, 0x3087, LBP_NS },
+ { 0x3087, 0x3087, LBP_CJ },
{ 0x3088, 0x308D, LBP_ID },
- { 0x308E, 0x308E, LBP_NS },
+ { 0x308E, 0x308E, LBP_CJ },
{ 0x308F, 0x3094, LBP_ID },
- { 0x3095, 0x3096, LBP_NS },
+ { 0x3095, 0x3096, LBP_CJ },
{ 0x3099, 0x309A, LBP_CM },
{ 0x309B, 0x309E, LBP_NS },
{ 0x309F, 0x309F, LBP_ID },
- { 0x30A0, 0x30A1, LBP_NS },
+ { 0x30A0, 0x30A0, LBP_NS },
+ { 0x30A1, 0x30A1, LBP_CJ },
{ 0x30A2, 0x30A2, LBP_ID },
- { 0x30A3, 0x30A3, LBP_NS },
+ { 0x30A3, 0x30A3, LBP_CJ },
{ 0x30A4, 0x30A4, LBP_ID },
- { 0x30A5, 0x30A5, LBP_NS },
+ { 0x30A5, 0x30A5, LBP_CJ },
{ 0x30A6, 0x30A6, LBP_ID },
- { 0x30A7, 0x30A7, LBP_NS },
+ { 0x30A7, 0x30A7, LBP_CJ },
{ 0x30A8, 0x30A8, LBP_ID },
- { 0x30A9, 0x30A9, LBP_NS },
+ { 0x30A9, 0x30A9, LBP_CJ },
{ 0x30AA, 0x30C2, LBP_ID },
- { 0x30C3, 0x30C3, LBP_NS },
+ { 0x30C3, 0x30C3, LBP_CJ },
{ 0x30C4, 0x30E2, LBP_ID },
- { 0x30E3, 0x30E3, LBP_NS },
+ { 0x30E3, 0x30E3, LBP_CJ },
{ 0x30E4, 0x30E4, LBP_ID },
- { 0x30E5, 0x30E5, LBP_NS },
+ { 0x30E5, 0x30E5, LBP_CJ },
{ 0x30E6, 0x30E6, LBP_ID },
- { 0x30E7, 0x30E7, LBP_NS },
+ { 0x30E7, 0x30E7, LBP_CJ },
{ 0x30E8, 0x30ED, LBP_ID },
- { 0x30EE, 0x30EE, LBP_NS },
+ { 0x30EE, 0x30EE, LBP_CJ },
{ 0x30EF, 0x30F4, LBP_ID },
- { 0x30F5, 0x30F6, LBP_NS },
+ { 0x30F5, 0x30F6, LBP_CJ },
{ 0x30F7, 0x30FA, LBP_ID },
- { 0x30FB, 0x30FE, LBP_NS },
+ { 0x30FB, 0x30FB, LBP_NS },
+ { 0x30FC, 0x30FC, LBP_CJ },
+ { 0x30FD, 0x30FE, LBP_NS },
{ 0x30FF, 0x31E3, LBP_ID },
- { 0x31F0, 0x31FF, LBP_NS },
+ { 0x31F0, 0x31FF, LBP_CJ },
{ 0x3200, 0x3247, LBP_ID },
{ 0x3248, 0x324F, LBP_AI },
{ 0x3250, 0x4DBF, LBP_ID },
{ 0xA62A, 0xA66E, LBP_AL },
{ 0xA66F, 0xA672, LBP_CM },
{ 0xA673, 0xA673, LBP_AL },
- { 0xA67C, 0xA67D, LBP_CM },
- { 0xA67E, 0xA6EF, LBP_AL },
+ { 0xA674, 0xA67D, LBP_CM },
+ { 0xA67E, 0xA69D, LBP_AL },
+ { 0xA69F, 0xA69F, LBP_CM },
+ { 0xA6A0, 0xA6EF, LBP_AL },
{ 0xA6F0, 0xA6F1, LBP_CM },
{ 0xA6F2, 0xA6F2, LBP_AL },
{ 0xA6F3, 0xA6F7, LBP_BA },
{ 0xA9C7, 0xA9C9, LBP_BA },
{ 0xA9CA, 0xA9CF, LBP_AL },
{ 0xA9D0, 0xA9D9, LBP_NU },
- { 0xA9DE, 0xAA28, LBP_AL },
+ { 0xA9DE, 0xA9DF, LBP_AL },
+ { 0xA9E0, 0xA9EF, LBP_SA },
+ { 0xA9F0, 0xA9F9, LBP_NU },
+ { 0xA9FA, 0xA9FE, LBP_SA },
+ { 0xAA00, 0xAA28, LBP_AL },
{ 0xAA29, 0xAA36, LBP_CM },
{ 0xAA40, 0xAA42, LBP_AL },
{ 0xAA43, 0xAA43, LBP_CM },
{ 0xAA5C, 0xAA5C, LBP_AL },
{ 0xAA5D, 0xAA5F, LBP_BA },
{ 0xAA60, 0xAADF, LBP_SA },
+ { 0xAAE0, 0xAAEA, LBP_AL },
+ { 0xAAEB, 0xAAEF, LBP_CM },
+ { 0xAAF0, 0xAAF1, LBP_BA },
+ { 0xAAF2, 0xAAF4, LBP_AL },
+ { 0xAAF5, 0xAAF6, LBP_CM },
{ 0xAB01, 0xABE2, LBP_AL },
{ 0xABE3, 0xABEA, LBP_CM },
{ 0xABEB, 0xABEB, LBP_BA },
{ 0xD800, 0xDFFF, LBP_SG },
{ 0xE000, 0xF8FF, LBP_XX },
{ 0xF900, 0xFAFF, LBP_ID },
- { 0xFB00, 0xFB1D, LBP_AL },
+ { 0xFB00, 0xFB17, LBP_AL },
+ { 0xFB1D, 0xFB1D, LBP_HL },
{ 0xFB1E, 0xFB1E, LBP_CM },
- { 0xFB1F, 0xFD3D, LBP_AL },
- { 0xFD3E, 0xFD3E, LBP_OP },
- { 0xFD3F, 0xFD3F, LBP_CL },
+ { 0xFB1F, 0xFB28, LBP_HL },
+ { 0xFB29, 0xFB29, LBP_AL },
+ { 0xFB2A, 0xFB4F, LBP_HL },
+ { 0xFB50, 0xFD3D, LBP_AL },
+ { 0xFD3E, 0xFD3E, LBP_CL },
+ { 0xFD3F, 0xFD3F, LBP_OP },
{ 0xFD50, 0xFDFB, LBP_AL },
{ 0xFDFC, 0xFDFC, LBP_PO },
{ 0xFDFD, 0xFDFD, LBP_AL },
{ 0xFE17, 0xFE17, LBP_OP },
{ 0xFE18, 0xFE18, LBP_CL },
{ 0xFE19, 0xFE19, LBP_IN },
- { 0xFE20, 0xFE26, LBP_CM },
+ { 0xFE20, 0xFE2D, LBP_CM },
{ 0xFE30, 0xFE34, LBP_ID },
{ 0xFE35, 0xFE35, LBP_OP },
{ 0xFE36, 0xFE36, LBP_CL },
{ 0xFF63, 0xFF64, LBP_CL },
{ 0xFF65, 0xFF65, LBP_NS },
{ 0xFF66, 0xFF66, LBP_AL },
- { 0xFF67, 0xFF70, LBP_NS },
+ { 0xFF67, 0xFF70, LBP_CJ },
{ 0xFF71, 0xFF9D, LBP_AL },
{ 0xFF9E, 0xFF9F, LBP_NS },
{ 0xFFA0, 0xFFDC, LBP_AL },
{ 0x10100, 0x10102, LBP_BA },
{ 0x10107, 0x101FC, LBP_AL },
{ 0x101FD, 0x101FD, LBP_CM },
- { 0x10280, 0x1039D, LBP_AL },
+ { 0x10280, 0x102D0, LBP_AL },
+ { 0x102E0, 0x102E0, LBP_CM },
+ { 0x102E1, 0x10375, LBP_AL },
+ { 0x10376, 0x1037A, LBP_CM },
+ { 0x10380, 0x1039D, LBP_AL },
{ 0x1039F, 0x1039F, LBP_BA },
{ 0x103A0, 0x103CF, LBP_AL },
{ 0x103D0, 0x103D0, LBP_BA },
{ 0x103D1, 0x1049D, LBP_AL },
{ 0x104A0, 0x104A9, LBP_NU },
- { 0x10800, 0x10855, LBP_AL },
+ { 0x10500, 0x10855, LBP_AL },
{ 0x10857, 0x10857, LBP_BA },
{ 0x10858, 0x1091B, LBP_AL },
{ 0x1091F, 0x1091F, LBP_BA },
{ 0x10A38, 0x10A3F, LBP_CM },
{ 0x10A40, 0x10A47, LBP_AL },
{ 0x10A50, 0x10A57, LBP_BA },
- { 0x10A58, 0x10B35, LBP_AL },
+ { 0x10A58, 0x10AE4, LBP_AL },
+ { 0x10AE5, 0x10AE6, LBP_CM },
+ { 0x10AEB, 0x10AEF, LBP_AL },
+ { 0x10AF0, 0x10AF5, LBP_BA },
+ { 0x10AF6, 0x10AF6, LBP_IN },
+ { 0x10B00, 0x10B35, LBP_AL },
{ 0x10B39, 0x10B3F, LBP_BA },
{ 0x10B40, 0x10E7E, LBP_AL },
{ 0x11000, 0x11002, LBP_CM },
{ 0x11047, 0x11048, LBP_BA },
{ 0x11049, 0x11065, LBP_AL },
{ 0x11066, 0x1106F, LBP_NU },
- { 0x11080, 0x11082, LBP_CM },
+ { 0x1107F, 0x11082, LBP_CM },
{ 0x11083, 0x110AF, LBP_AL },
{ 0x110B0, 0x110BA, LBP_CM },
{ 0x110BB, 0x110BD, LBP_AL },
{ 0x110BE, 0x110C1, LBP_BA },
- { 0x12000, 0x12462, LBP_AL },
- { 0x12470, 0x12473, LBP_BA },
+ { 0x110D0, 0x110E8, LBP_AL },
+ { 0x110F0, 0x110F9, LBP_NU },
+ { 0x11100, 0x11102, LBP_CM },
+ { 0x11103, 0x11126, LBP_AL },
+ { 0x11127, 0x11134, LBP_CM },
+ { 0x11136, 0x1113F, LBP_NU },
+ { 0x11140, 0x11143, LBP_BA },
+ { 0x11150, 0x11172, LBP_AL },
+ { 0x11173, 0x11173, LBP_CM },
+ { 0x11174, 0x11174, LBP_AL },
+ { 0x11175, 0x11175, LBP_BB },
+ { 0x11176, 0x11176, LBP_AL },
+ { 0x11180, 0x11182, LBP_CM },
+ { 0x11183, 0x111B2, LBP_AL },
+ { 0x111B3, 0x111C0, LBP_CM },
+ { 0x111C1, 0x111C4, LBP_AL },
+ { 0x111C5, 0x111C6, LBP_BA },
+ { 0x111C7, 0x111C7, LBP_AL },
+ { 0x111C8, 0x111C8, LBP_BA },
+ { 0x111CD, 0x111CD, LBP_AL },
+ { 0x111D0, 0x111D9, LBP_NU },
+ { 0x111DA, 0x1122B, LBP_AL },
+ { 0x1122C, 0x11237, LBP_CM },
+ { 0x11238, 0x11239, LBP_BA },
+ { 0x1123A, 0x1123A, LBP_AL },
+ { 0x1123B, 0x1123C, LBP_BA },
+ { 0x1123D, 0x112DE, LBP_AL },
+ { 0x112DF, 0x112EA, LBP_CM },
+ { 0x112F0, 0x112F9, LBP_NU },
+ { 0x11301, 0x11303, LBP_CM },
+ { 0x11305, 0x11339, LBP_AL },
+ { 0x1133C, 0x1133C, LBP_CM },
+ { 0x1133D, 0x1133D, LBP_AL },
+ { 0x1133E, 0x11357, LBP_CM },
+ { 0x1135D, 0x11361, LBP_AL },
+ { 0x11362, 0x11374, LBP_CM },
+ { 0x11480, 0x114AF, LBP_AL },
+ { 0x114B0, 0x114C3, LBP_CM },
+ { 0x114C4, 0x114C7, LBP_AL },
+ { 0x114D0, 0x114D9, LBP_NU },
+ { 0x11580, 0x115AE, LBP_AL },
+ { 0x115AF, 0x115C0, LBP_CM },
+ { 0x115C1, 0x115C1, LBP_BB },
+ { 0x115C2, 0x115C3, LBP_BA },
+ { 0x115C4, 0x115C5, LBP_EX },
+ { 0x115C6, 0x115C8, LBP_AL },
+ { 0x115C9, 0x115C9, LBP_BA },
+ { 0x11600, 0x1162F, LBP_AL },
+ { 0x11630, 0x11640, LBP_CM },
+ { 0x11641, 0x11642, LBP_BA },
+ { 0x11643, 0x11644, LBP_AL },
+ { 0x11650, 0x11659, LBP_NU },
+ { 0x11680, 0x116AA, LBP_AL },
+ { 0x116AB, 0x116B7, LBP_CM },
+ { 0x116C0, 0x116C9, LBP_NU },
+ { 0x118A0, 0x118DF, LBP_AL },
+ { 0x118E0, 0x118E9, LBP_NU },
+ { 0x118EA, 0x1246E, LBP_AL },
+ { 0x12470, 0x12474, LBP_BA },
{ 0x13000, 0x13257, LBP_AL },
{ 0x13258, 0x1325A, LBP_OP },
{ 0x1325B, 0x1325D, LBP_CL },
{ 0x1328A, 0x13378, LBP_AL },
{ 0x13379, 0x13379, LBP_OP },
{ 0x1337A, 0x1337B, LBP_CL },
- { 0x1337C, 0x16A38, LBP_AL },
+ { 0x1337C, 0x16A5E, LBP_AL },
+ { 0x16A60, 0x16A69, LBP_NU },
+ { 0x16A6E, 0x16A6F, LBP_BA },
+ { 0x16AD0, 0x16AED, LBP_AL },
+ { 0x16AF0, 0x16AF4, LBP_CM },
+ { 0x16AF5, 0x16AF5, LBP_BA },
+ { 0x16B00, 0x16B2F, LBP_AL },
+ { 0x16B30, 0x16B36, LBP_CM },
+ { 0x16B37, 0x16B39, LBP_BA },
+ { 0x16B3A, 0x16B43, LBP_AL },
+ { 0x16B44, 0x16B44, LBP_BA },
+ { 0x16B45, 0x16B45, LBP_AL },
+ { 0x16B50, 0x16B59, LBP_NU },
+ { 0x16B5B, 0x16F50, LBP_AL },
+ { 0x16F51, 0x16F92, LBP_CM },
+ { 0x16F93, 0x16F9F, LBP_AL },
{ 0x1B000, 0x1B001, LBP_ID },
+ { 0x1BC00, 0x1BC9C, LBP_AL },
+ { 0x1BC9D, 0x1BC9E, LBP_CM },
+ { 0x1BC9F, 0x1BC9F, LBP_BA },
+ { 0x1BCA0, 0x1BCA3, LBP_CM },
{ 0x1D000, 0x1D164, LBP_AL },
{ 0x1D165, 0x1D169, LBP_CM },
{ 0x1D16A, 0x1D16C, LBP_AL },
{ 0x1D242, 0x1D244, LBP_CM },
{ 0x1D245, 0x1D7CB, LBP_AL },
{ 0x1D7CE, 0x1D7FF, LBP_NU },
- { 0x1F000, 0x1F0DF, LBP_AL },
+ { 0x1E800, 0x1E8CF, LBP_AL },
+ { 0x1E8D0, 0x1E8D6, LBP_CM },
+ { 0x1EE00, 0x1EEF1, LBP_AL },
+ { 0x1F000, 0x1F0F5, LBP_ID },
{ 0x1F100, 0x1F12D, LBP_AI },
{ 0x1F12E, 0x1F12E, LBP_AL },
- { 0x1F130, 0x1F19A, LBP_AI },
- { 0x1F1E6, 0x1F1FF, LBP_AL },
- { 0x1F200, 0x1F251, LBP_ID },
- { 0x1F300, 0x1F773, LBP_AL },
+ { 0x1F130, 0x1F169, LBP_AI },
+ { 0x1F16A, 0x1F16B, LBP_AL },
+ { 0x1F170, 0x1F19A, LBP_AI },
+ { 0x1F1E6, 0x1F1FF, LBP_RI },
+ { 0x1F200, 0x1F39B, LBP_ID },
+ { 0x1F39C, 0x1F39D, LBP_AL },
+ { 0x1F39E, 0x1F3B4, LBP_ID },
+ { 0x1F3B5, 0x1F3B6, LBP_AL },
+ { 0x1F3B7, 0x1F3BB, LBP_ID },
+ { 0x1F3BC, 0x1F3BC, LBP_AL },
+ { 0x1F3BD, 0x1F49F, LBP_ID },
+ { 0x1F4A0, 0x1F4A0, LBP_AL },
+ { 0x1F4A1, 0x1F4A1, LBP_ID },
+ { 0x1F4A2, 0x1F4A2, LBP_AL },
+ { 0x1F4A3, 0x1F4A3, LBP_ID },
+ { 0x1F4A4, 0x1F4A4, LBP_AL },
+ { 0x1F4A5, 0x1F4AE, LBP_ID },
+ { 0x1F4AF, 0x1F4AF, LBP_AL },
+ { 0x1F4B0, 0x1F4B0, LBP_ID },
+ { 0x1F4B1, 0x1F4B2, LBP_AL },
+ { 0x1F4B3, 0x1F4FE, LBP_ID },
+ { 0x1F500, 0x1F506, LBP_AL },
+ { 0x1F507, 0x1F516, LBP_ID },
+ { 0x1F517, 0x1F524, LBP_AL },
+ { 0x1F525, 0x1F531, LBP_ID },
+ { 0x1F532, 0x1F549, LBP_AL },
+ { 0x1F54A, 0x1F5D3, LBP_ID },
+ { 0x1F5D4, 0x1F5DB, LBP_AL },
+ { 0x1F5DC, 0x1F5F3, LBP_ID },
+ { 0x1F5F4, 0x1F5F9, LBP_AL },
+ { 0x1F5FA, 0x1F64F, LBP_ID },
+ { 0x1F650, 0x1F675, LBP_AL },
+ { 0x1F676, 0x1F678, LBP_QU },
+ { 0x1F679, 0x1F67B, LBP_NS },
+ { 0x1F67C, 0x1F67F, LBP_AL },
+ { 0x1F680, 0x1F6F3, LBP_ID },
+ { 0x1F700, 0x1F8AD, LBP_AL },
{ 0x20000, 0x3FFFD, LBP_ID },
{ 0xE0001, 0xE01EF, LBP_CM },
{ 0xF0000, 0x10FFFD, LBP_XX },
-/* vim: set tabstop=4 shiftwidth=4: */
+/* vim: set expandtab tabstop=4 softtabstop=4 shiftwidth=4: */
/*
* Line breaking in a Unicode sequence. Designed to be used in a
* generic text renderer.
*
- * Copyright (C) 2008-2010 Wu Yongwei <wuyongwei at gmail dot com>
+ * Copyright (C) 2008-2012 Wu Yongwei <wuyongwei at gmail dot com>
*
* This software is provided 'as-is', without any express or implied
* warranty. In no event will the author be held liable for any damages
* distribution.
*
* The main reference is Unicode Standard Annex 14 (UAX #14):
- * <URL:http://www.unicode.org/reports/tr14/>
+ * <URL:http://www.unicode.org/reports/tr14/>
*
* When this library was designed, this annex was at Revision 19, for
* Unicode 5.0.0:
- * <URL:http://www.unicode.org/reports/tr14/tr14-19.html>
+ * <URL:http://www.unicode.org/reports/tr14/tr14-19.html>
*
- * This library has been updated according to Revision 24, for
- * Unicode 5.2.0:
- * <URL:http://www.unicode.org/reports/tr14/tr14-24.html>
+ * This library has been updated according to Revision 30, for
+ * Unicode 6.2.0:
+ * <URL:http://www.unicode.org/reports/tr14/tr14-30.html>
*
* The Unicode Terms of Use are available at
- * <URL:http://www.unicode.org/copyright.html>
+ * <URL:http://www.unicode.org/copyright.html>
*/
/**
- * @file linebreakdef.c
+ * @file linebreakdef.c
*
* Definition of language-specific data.
*
- * @version 2.0, 2010/01/03
- * @author Wu Yongwei
+ * @version 2.2, 2012/10/06
+ * @author Wu Yongwei
*/
#include "linebreak.h"
* English-specifc data over the default Unicode rules.
*/
static struct LineBreakProperties lb_prop_English[] = {
- { 0x2018, 0x2018, LBP_OP }, /* Left single quotation mark: opening */
- { 0x201C, 0x201C, LBP_OP }, /* Left double quotation mark: opening */
- { 0x201D, 0x201D, LBP_CL }, /* Right double quotation mark: closing */
- { 0, 0, LBP_Undefined }
+ { 0x2018, 0x2018, LBP_OP }, /* Left single quotation mark: opening */
+ { 0x201C, 0x201C, LBP_OP }, /* Left double quotation mark: opening */
+ { 0x201D, 0x201D, LBP_CL }, /* Right double quotation mark: closing */
+ { 0, 0, LBP_Undefined }
};
/**
* German-specifc data over the default Unicode rules.
*/
static struct LineBreakProperties lb_prop_German[] = {
- { 0x00AB, 0x00AB, LBP_CL }, /* Left double angle quotation mark: closing */
- { 0x00BB, 0x00BB, LBP_OP }, /* Right double angle quotation mark: opening */
- { 0x2018, 0x2018, LBP_CL }, /* Left single quotation mark: closing */
- { 0x201C, 0x201C, LBP_CL }, /* Left double quotation mark: closing */
- { 0x2039, 0x2039, LBP_CL }, /* Left single angle quotation mark: closing */
- { 0x203A, 0x203A, LBP_OP }, /* Right single angle quotation mark: opening */
- { 0, 0, LBP_Undefined }
+ { 0x00AB, 0x00AB, LBP_CL }, /* Left double angle quotation mark: closing */
+ { 0x00BB, 0x00BB, LBP_OP }, /* Right double angle quotation mark: opening */
+ { 0x2018, 0x2018, LBP_CL }, /* Left single quotation mark: closing */
+ { 0x201C, 0x201C, LBP_CL }, /* Left double quotation mark: closing */
+ { 0x2039, 0x2039, LBP_CL }, /* Left single angle quotation mark: closing */
+ { 0x203A, 0x203A, LBP_OP }, /* Right single angle quotation mark: opening */
+ { 0, 0, LBP_Undefined }
};
/**
* Spanish-specifc data over the default Unicode rules.
*/
static struct LineBreakProperties lb_prop_Spanish[] = {
- { 0x00AB, 0x00AB, LBP_OP }, /* Left double angle quotation mark: opening */
- { 0x00BB, 0x00BB, LBP_CL }, /* Right double angle quotation mark: closing */
- { 0x2018, 0x2018, LBP_OP }, /* Left single quotation mark: opening */
- { 0x201C, 0x201C, LBP_OP }, /* Left double quotation mark: opening */
- { 0x201D, 0x201D, LBP_CL }, /* Right double quotation mark: closing */
- { 0x2039, 0x2039, LBP_OP }, /* Left single angle quotation mark: opening */
- { 0x203A, 0x203A, LBP_CL }, /* Right single angle quotation mark: closing */
- { 0, 0, LBP_Undefined }
+ { 0x00AB, 0x00AB, LBP_OP }, /* Left double angle quotation mark: opening */
+ { 0x00BB, 0x00BB, LBP_CL }, /* Right double angle quotation mark: closing */
+ { 0x2018, 0x2018, LBP_OP }, /* Left single quotation mark: opening */
+ { 0x201C, 0x201C, LBP_OP }, /* Left double quotation mark: opening */
+ { 0x201D, 0x201D, LBP_CL }, /* Right double quotation mark: closing */
+ { 0x2039, 0x2039, LBP_OP }, /* Left single angle quotation mark: opening */
+ { 0x203A, 0x203A, LBP_CL }, /* Right single angle quotation mark: closing */
+ { 0, 0, LBP_Undefined }
};
/**
* French-specifc data over the default Unicode rules.
*/
static struct LineBreakProperties lb_prop_French[] = {
- { 0x00AB, 0x00AB, LBP_OP }, /* Left double angle quotation mark: opening */
- { 0x00BB, 0x00BB, LBP_CL }, /* Right double angle quotation mark: closing */
- { 0x2018, 0x2018, LBP_OP }, /* Left single quotation mark: opening */
- { 0x201C, 0x201C, LBP_OP }, /* Left double quotation mark: opening */
- { 0x201D, 0x201D, LBP_CL }, /* Right double quotation mark: closing */
- { 0x2039, 0x2039, LBP_OP }, /* Left single angle quotation mark: opening */
- { 0x203A, 0x203A, LBP_CL }, /* Right single angle quotation mark: closing */
- { 0, 0, LBP_Undefined }
+ { 0x00AB, 0x00AB, LBP_OP }, /* Left double angle quotation mark: opening */
+ { 0x00BB, 0x00BB, LBP_CL }, /* Right double angle quotation mark: closing */
+ { 0x2018, 0x2018, LBP_OP }, /* Left single quotation mark: opening */
+ { 0x201C, 0x201C, LBP_OP }, /* Left double quotation mark: opening */
+ { 0x201D, 0x201D, LBP_CL }, /* Right double quotation mark: closing */
+ { 0x2039, 0x2039, LBP_OP }, /* Left single angle quotation mark: opening */
+ { 0x203A, 0x203A, LBP_CL }, /* Right single angle quotation mark: closing */
+ { 0, 0, LBP_Undefined }
};
/**
* Russian-specifc data over the default Unicode rules.
*/
static struct LineBreakProperties lb_prop_Russian[] = {
- { 0x00AB, 0x00AB, LBP_OP }, /* Left double angle quotation mark: opening */
- { 0x00BB, 0x00BB, LBP_CL }, /* Right double angle quotation mark: closing */
- { 0x201C, 0x201C, LBP_CL }, /* Left double quotation mark: closing */
- { 0, 0, LBP_Undefined }
+ { 0x00AB, 0x00AB, LBP_OP }, /* Left double angle quotation mark: opening */
+ { 0x00BB, 0x00BB, LBP_CL }, /* Right double angle quotation mark: closing */
+ { 0x201C, 0x201C, LBP_CL }, /* Left double quotation mark: closing */
+ { 0, 0, LBP_Undefined }
};
/**
* Chinese-specifc data over the default Unicode rules.
*/
static struct LineBreakProperties lb_prop_Chinese[] = {
- { 0x2018, 0x2018, LBP_OP }, /* Left single quotation mark: opening */
- { 0x2019, 0x2019, LBP_CL }, /* Right single quotation mark: closing */
- { 0x201C, 0x201C, LBP_OP }, /* Left double quotation mark: opening */
- { 0x201D, 0x201D, LBP_CL }, /* Right double quotation mark: closing */
- { 0, 0, LBP_Undefined }
+ { 0x2018, 0x2018, LBP_OP }, /* Left single quotation mark: opening */
+ { 0x2019, 0x2019, LBP_CL }, /* Right single quotation mark: closing */
+ { 0x201C, 0x201C, LBP_OP }, /* Left double quotation mark: opening */
+ { 0x201D, 0x201D, LBP_CL }, /* Right double quotation mark: closing */
+ { 0, 0, LBP_Undefined }
};
/**
* you may want to redefine \e lb_prop_lang_map in your C source file.
*/
struct LineBreakPropertiesLang lb_prop_lang_map[] = {
- { "en", 2, lb_prop_English },
- { "de", 2, lb_prop_German },
- { "es", 2, lb_prop_Spanish },
- { "fr", 2, lb_prop_French },
- { "ru", 2, lb_prop_Russian },
- { "zh", 2, lb_prop_Chinese },
- { NULL, 0, NULL }
+ { "en", 2, lb_prop_English },
+ { "de", 2, lb_prop_German },
+ { "es", 2, lb_prop_Spanish },
+ { "fr", 2, lb_prop_French },
+ { "ru", 2, lb_prop_Russian },
+ { "zh", 2, lb_prop_Chinese },
+ { NULL, 0, NULL }
};
-/* vim: set tabstop=4 shiftwidth=4: */
+/* vim: set expandtab tabstop=4 softtabstop=4 shiftwidth=4: */
/*
* Line breaking in a Unicode sequence. Designed to be used in a
* generic text renderer.
*
- * Copyright (C) 2008-2010 Wu Yongwei <wuyongwei at gmail dot com>
+ * Copyright (C) 2008-2013 Wu Yongwei <wuyongwei at gmail dot com>
+ * Copyright (C) 2013 Petr Filipsky <philodej at gmail dot com>
*
* This software is provided 'as-is', without any express or implied
* warranty. In no event will the author be held liable for any damages
* distribution.
*
* The main reference is Unicode Standard Annex 14 (UAX #14):
- * <URL:http://www.unicode.org/reports/tr14/>
+ * <URL:http://www.unicode.org/reports/tr14/>
*
* When this library was designed, this annex was at Revision 19, for
* Unicode 5.0.0:
- * <URL:http://www.unicode.org/reports/tr14/tr14-19.html>
+ * <URL:http://www.unicode.org/reports/tr14/tr14-19.html>
*
- * This library has been updated according to Revision 24, for
- * Unicode 5.2.0:
- * <URL:http://www.unicode.org/reports/tr14/tr14-24.html>
+ * This library has been updated according to Revision 30, for
+ * Unicode 6.2.0:
+ * <URL:http://www.unicode.org/reports/tr14/tr14-30.html>
*
* The Unicode Terms of Use are available at
- * <URL:http://www.unicode.org/copyright.html>
+ * <URL:http://www.unicode.org/copyright.html>
*/
/**
- * @file linebreakdef.h
+ * @file linebreakdef.h
*
* Definitions of internal data structures, declarations of global
* variables, and function prototypes for the line breaking algorithm.
*
- * @version 2.0, 2010/01/03
- * @author Wu Yongwei
+ * @version 2.4, 2013/11/10
+ * @author Wu Yongwei
+ * @author Petr Filipsky
*/
/**
* Constant value to mark the end of string. It is not a valid Unicode
* character.
*/
-#define EOS 0xFFFF
+#define EOS 0xFFFFFFFF
/**
* Line break classes. This is a direct mapping of Table 1 of Unicode
- * Standard Annex 14, Revision 19.
+ * Standard Annex 14, Revision 26.
*/
enum LineBreakClass
{
- /* This is used to signal an error condition. */
- LBP_Undefined, /**< Undefined */
+ /* This is used to signal an error condition. */
+ LBP_Undefined, /**< Undefined */
- /* The following break classes are treated in the pair table. */
- LBP_OP, /**< Opening punctuation */
- LBP_CL, /**< Closing punctuation */
- LBP_CP, /**< Closing parenthesis */
- LBP_QU, /**< Ambiguous quotation */
- LBP_GL, /**< Glue */
- LBP_NS, /**< Non-starters */
- LBP_EX, /**< Exclamation/Interrogation */
- LBP_SY, /**< Symbols allowing break after */
- LBP_IS, /**< Infix separator */
- LBP_PR, /**< Prefix */
- LBP_PO, /**< Postfix */
- LBP_NU, /**< Numeric */
- LBP_AL, /**< Alphabetic */
- LBP_ID, /**< Ideographic */
- LBP_IN, /**< Inseparable characters */
- LBP_HY, /**< Hyphen */
- LBP_BA, /**< Break after */
- LBP_BB, /**< Break before */
- LBP_B2, /**< Break on either side (but not pair) */
- LBP_ZW, /**< Zero-width space */
- LBP_CM, /**< Combining marks */
- LBP_WJ, /**< Word joiner */
- LBP_H2, /**< Hangul LV */
- LBP_H3, /**< Hangul LVT */
- LBP_JL, /**< Hangul L Jamo */
- LBP_JV, /**< Hangul V Jamo */
- LBP_JT, /**< Hangul T Jamo */
+ /* The following break classes are treated in the pair table. */
+ LBP_OP, /**< Opening punctuation */
+ LBP_CL, /**< Closing punctuation */
+ LBP_CP, /**< Closing parenthesis */
+ LBP_QU, /**< Ambiguous quotation */
+ LBP_GL, /**< Glue */
+ LBP_NS, /**< Non-starters */
+ LBP_EX, /**< Exclamation/Interrogation */
+ LBP_SY, /**< Symbols allowing break after */
+ LBP_IS, /**< Infix separator */
+ LBP_PR, /**< Prefix */
+ LBP_PO, /**< Postfix */
+ LBP_NU, /**< Numeric */
+ LBP_AL, /**< Alphabetic */
+ LBP_HL, /**< Hebrew letter */
+ LBP_ID, /**< Ideographic */
+ LBP_IN, /**< Inseparable characters */
+ LBP_HY, /**< Hyphen */
+ LBP_BA, /**< Break after */
+ LBP_BB, /**< Break before */
+ LBP_B2, /**< Break on either side (but not pair) */
+ LBP_ZW, /**< Zero-width space */
+ LBP_CM, /**< Combining marks */
+ LBP_WJ, /**< Word joiner */
+ LBP_H2, /**< Hangul LV */
+ LBP_H3, /**< Hangul LVT */
+ LBP_JL, /**< Hangul L Jamo */
+ LBP_JV, /**< Hangul V Jamo */
+ LBP_JT, /**< Hangul T Jamo */
+ LBP_RI, /**< Regional indicator */
- /* The following break classes are not treated in the pair table */
- LBP_AI, /**< Ambiguous (alphabetic or ideograph) */
- LBP_BK, /**< Break (mandatory) */
- LBP_CB, /**< Contingent break */
- LBP_CR, /**< Carriage return */
- LBP_LF, /**< Line feed */
- LBP_NL, /**< Next line */
- LBP_SA, /**< South-East Asian */
- LBP_SG, /**< Surrogates */
- LBP_SP, /**< Space */
- LBP_XX /**< Unknown */
+ /* The following break classes are not treated in the pair table */
+ LBP_AI, /**< Ambiguous (alphabetic or ideograph) */
+ LBP_BK, /**< Break (mandatory) */
+ LBP_CB, /**< Contingent break */
+ LBP_CJ, /**< Conditional Japanese starter */
+ LBP_CR, /**< Carriage return */
+ LBP_LF, /**< Line feed */
+ LBP_NL, /**< Next line */
+ LBP_SA, /**< South-East Asian */
+ LBP_SG, /**< Surrogates */
+ LBP_SP, /**< Space */
+ LBP_XX /**< Unknown */
};
/**
*/
struct LineBreakProperties
{
- utf32_t start; /**< Starting coding point */
- utf32_t end; /**< End coding point */
- enum LineBreakClass prop; /**< The line breaking property */
+ utf32_t start; /**< Starting coding point */
+ utf32_t end; /**< End coding point */
+ enum LineBreakClass prop; /**< The line breaking property */
};
/**
*/
struct LineBreakPropertiesLang
{
- const char *lang; /**< Language name */
- size_t namelen; /**< Length of name to match */
- struct LineBreakProperties *lbp; /**< Pointer to associated data */
+ const char *lang; /**< Language name */
+ size_t namelen; /**< Length of name to match */
+ struct LineBreakProperties *lbp; /**< Pointer to associated data */
+};
+
+/**
+ * Context representing internal state of the line breaking algorithm.
+ * This is useful to callers if incremental analysis is wanted.
+ */
+struct LineBreakContext
+{
+ const char *lang; /**< Language name */
+ struct LineBreakProperties *lbpLang;/**< Pointer to LineBreakProperties */
+ enum LineBreakClass lbcCur; /**< Breaking class of current codepoint */
+ enum LineBreakClass lbcNew; /**< Breaking class of next codepoint */
+ enum LineBreakClass lbcLast; /**< Breaking class of last codepoint */
};
/**
utf32_t lb_get_next_char_utf8(const utf8_t *s, size_t len, size_t *ip);
utf32_t lb_get_next_char_utf16(const utf16_t *s, size_t len, size_t *ip);
utf32_t lb_get_next_char_utf32(const utf32_t *s, size_t len, size_t *ip);
+void lb_init_break_context(
+ struct LineBreakContext* lbpCtx,
+ utf32_t ch,
+ const char* lang);
+int lb_process_next_char(
+ struct LineBreakContext* lbpCtx,
+ utf32_t ch);
void set_linebreaks(
- const void *s,
- size_t len,
- const char *lang,
- char *brks,
- get_next_char_t get_next_char);
+ const void *s,
+ size_t len,
+ const char *lang,
+ char *brks,
+ get_next_char_t get_next_char);
break;
case WBP_ALetter:
- case WBP_Hebrew:
if ((wbcSeqStart == WBP_ALetter) || /* WB5,6,7 */
(wbcLast == WBP_Numeric) || /* WB10 */
(wbcSeqStart == WBP_ExtendNumLet)) /* WB13b */
/* The content of this file is generated from:
-# WordBreakProperty-7.0.0.txt
-# Date: 2014-02-19, 15:51:39 GMT [MD]
+# WordBreakProperty-6.2.0.txt
+# Date: 2012-08-13, 19:12:09 GMT [MD]
*/
#include "linebreak.h"
{0x000A, 0x000A, WBP_LF},
{0x000B, 0x000C, WBP_Newline},
{0x000D, 0x000D, WBP_CR},
- {0x0022, 0x0022, WBP_Double},
- {0x0027, 0x0027, WBP_Single},
+ {0x0027, 0x0027, WBP_MidNumLet},
{0x002C, 0x002C, WBP_MidNum},
{0x002E, 0x002E, WBP_MidNumLet},
{0x0030, 0x0039, WBP_Numeric},
{0x0295, 0x02AF, WBP_ALetter},
{0x02B0, 0x02C1, WBP_ALetter},
{0x02C6, 0x02D1, WBP_ALetter},
- {0x02D7, 0x02D7, WBP_MidLetter},
{0x02E0, 0x02E4, WBP_ALetter},
{0x02EC, 0x02EC, WBP_ALetter},
{0x02EE, 0x02EE, WBP_ALetter},
{0x037A, 0x037A, WBP_ALetter},
{0x037B, 0x037D, WBP_ALetter},
{0x037E, 0x037E, WBP_MidNum},
- {0x037F, 0x037F, WBP_ALetter},
{0x0386, 0x0386, WBP_ALetter},
{0x0387, 0x0387, WBP_MidLetter},
{0x0388, 0x038A, WBP_ALetter},
{0x03F7, 0x0481, WBP_ALetter},
{0x0483, 0x0487, WBP_Extend},
{0x0488, 0x0489, WBP_Extend},
- {0x048A, 0x052F, WBP_ALetter},
+ {0x048A, 0x0527, WBP_ALetter},
{0x0531, 0x0556, WBP_ALetter},
{0x0559, 0x0559, WBP_ALetter},
{0x0561, 0x0587, WBP_ALetter},
{0x05C1, 0x05C2, WBP_Extend},
{0x05C4, 0x05C5, WBP_Extend},
{0x05C7, 0x05C7, WBP_Extend},
- {0x05D0, 0x05EA, WBP_Hebrew},
- {0x05F0, 0x05F2, WBP_Hebrew},
+ {0x05D0, 0x05EA, WBP_ALetter},
+ {0x05F0, 0x05F2, WBP_ALetter},
{0x05F3, 0x05F3, WBP_ALetter},
{0x05F4, 0x05F4, WBP_MidLetter},
- {0x0600, 0x0605, WBP_Format},
+ {0x0600, 0x0604, WBP_Format},
{0x060C, 0x060D, WBP_MidNum},
{0x0610, 0x061A, WBP_Extend},
- {0x061C, 0x061C, WBP_Format},
{0x0620, 0x063F, WBP_ALetter},
{0x0640, 0x0640, WBP_ALetter},
{0x0641, 0x064A, WBP_ALetter},
{0x0829, 0x082D, WBP_Extend},
{0x0840, 0x0858, WBP_ALetter},
{0x0859, 0x085B, WBP_Extend},
- {0x08A0, 0x08B2, WBP_ALetter},
- {0x08E4, 0x0902, WBP_Extend},
+ {0x08A0, 0x08A0, WBP_ALetter},
+ {0x08A2, 0x08AC, WBP_ALetter},
+ {0x08E4, 0x08FE, WBP_Extend},
+ {0x0900, 0x0902, WBP_Extend},
{0x0903, 0x0903, WBP_Extend},
{0x0904, 0x0939, WBP_ALetter},
{0x093A, 0x093A, WBP_Extend},
{0x0962, 0x0963, WBP_Extend},
{0x0966, 0x096F, WBP_Numeric},
{0x0971, 0x0971, WBP_ALetter},
- {0x0972, 0x0980, WBP_ALetter},
+ {0x0972, 0x0977, WBP_ALetter},
+ {0x0979, 0x097F, WBP_ALetter},
{0x0981, 0x0981, WBP_Extend},
{0x0982, 0x0983, WBP_Extend},
{0x0985, 0x098C, WBP_ALetter},
{0x0BD0, 0x0BD0, WBP_ALetter},
{0x0BD7, 0x0BD7, WBP_Extend},
{0x0BE6, 0x0BEF, WBP_Numeric},
- {0x0C00, 0x0C00, WBP_Extend},
{0x0C01, 0x0C03, WBP_Extend},
{0x0C05, 0x0C0C, WBP_ALetter},
{0x0C0E, 0x0C10, WBP_ALetter},
{0x0C12, 0x0C28, WBP_ALetter},
- {0x0C2A, 0x0C39, WBP_ALetter},
+ {0x0C2A, 0x0C33, WBP_ALetter},
+ {0x0C35, 0x0C39, WBP_ALetter},
{0x0C3D, 0x0C3D, WBP_ALetter},
{0x0C3E, 0x0C40, WBP_Extend},
{0x0C41, 0x0C44, WBP_Extend},
{0x0C60, 0x0C61, WBP_ALetter},
{0x0C62, 0x0C63, WBP_Extend},
{0x0C66, 0x0C6F, WBP_Numeric},
- {0x0C81, 0x0C81, WBP_Extend},
{0x0C82, 0x0C83, WBP_Extend},
{0x0C85, 0x0C8C, WBP_ALetter},
{0x0C8E, 0x0C90, WBP_ALetter},
{0x0CE2, 0x0CE3, WBP_Extend},
{0x0CE6, 0x0CEF, WBP_Numeric},
{0x0CF1, 0x0CF2, WBP_ALetter},
- {0x0D01, 0x0D01, WBP_Extend},
{0x0D02, 0x0D03, WBP_Extend},
{0x0D05, 0x0D0C, WBP_ALetter},
{0x0D0E, 0x0D10, WBP_ALetter},
{0x0DD2, 0x0DD4, WBP_Extend},
{0x0DD6, 0x0DD6, WBP_Extend},
{0x0DD8, 0x0DDF, WBP_Extend},
- {0x0DE6, 0x0DEF, WBP_Numeric},
{0x0DF2, 0x0DF3, WBP_Extend},
{0x0E31, 0x0E31, WBP_Extend},
{0x0E34, 0x0E3A, WBP_Extend},
{0x1681, 0x169A, WBP_ALetter},
{0x16A0, 0x16EA, WBP_ALetter},
{0x16EE, 0x16F0, WBP_ALetter},
- {0x16F1, 0x16F8, WBP_ALetter},
{0x1700, 0x170C, WBP_ALetter},
{0x170E, 0x1711, WBP_ALetter},
{0x1712, 0x1714, WBP_Extend},
{0x17DD, 0x17DD, WBP_Extend},
{0x17E0, 0x17E9, WBP_Numeric},
{0x180B, 0x180D, WBP_Extend},
- {0x180E, 0x180E, WBP_Format},
{0x1810, 0x1819, WBP_Numeric},
{0x1820, 0x1842, WBP_ALetter},
{0x1843, 0x1843, WBP_ALetter},
{0x18A9, 0x18A9, WBP_Extend},
{0x18AA, 0x18AA, WBP_ALetter},
{0x18B0, 0x18F5, WBP_ALetter},
- {0x1900, 0x191E, WBP_ALetter},
+ {0x1900, 0x191C, WBP_ALetter},
{0x1920, 0x1922, WBP_Extend},
{0x1923, 0x1926, WBP_Extend},
{0x1927, 0x1928, WBP_Extend},
{0x19D0, 0x19D9, WBP_Numeric},
{0x1A00, 0x1A16, WBP_ALetter},
{0x1A17, 0x1A18, WBP_Extend},
- {0x1A19, 0x1A1A, WBP_Extend},
- {0x1A1B, 0x1A1B, WBP_Extend},
+ {0x1A19, 0x1A1B, WBP_Extend},
{0x1A55, 0x1A55, WBP_Extend},
{0x1A56, 0x1A56, WBP_Extend},
{0x1A57, 0x1A57, WBP_Extend},
{0x1A7F, 0x1A7F, WBP_Extend},
{0x1A80, 0x1A89, WBP_Numeric},
{0x1A90, 0x1A99, WBP_Numeric},
- {0x1AB0, 0x1ABD, WBP_Extend},
- {0x1ABE, 0x1ABE, WBP_Extend},
{0x1B00, 0x1B03, WBP_Extend},
{0x1B04, 0x1B04, WBP_Extend},
{0x1B05, 0x1B33, WBP_ALetter},
{0x1BA6, 0x1BA7, WBP_Extend},
{0x1BA8, 0x1BA9, WBP_Extend},
{0x1BAA, 0x1BAA, WBP_Extend},
- {0x1BAB, 0x1BAD, WBP_Extend},
+ {0x1BAB, 0x1BAB, WBP_Extend},
+ {0x1BAC, 0x1BAD, WBP_Extend},
{0x1BAE, 0x1BAF, WBP_ALetter},
{0x1BB0, 0x1BB9, WBP_Numeric},
{0x1BBA, 0x1BE5, WBP_ALetter},
{0x1CF2, 0x1CF3, WBP_Extend},
{0x1CF4, 0x1CF4, WBP_Extend},
{0x1CF5, 0x1CF6, WBP_ALetter},
- {0x1CF8, 0x1CF9, WBP_Extend},
{0x1D00, 0x1D2B, WBP_ALetter},
{0x1D2C, 0x1D6A, WBP_ALetter},
{0x1D6B, 0x1D77, WBP_ALetter},
{0x1D78, 0x1D78, WBP_ALetter},
{0x1D79, 0x1D9A, WBP_ALetter},
{0x1D9B, 0x1DBF, WBP_ALetter},
- {0x1DC0, 0x1DF5, WBP_Extend},
+ {0x1DC0, 0x1DE6, WBP_Extend},
{0x1DFC, 0x1DFF, WBP_Extend},
{0x1E00, 0x1F15, WBP_ALetter},
{0x1F18, 0x1F1D, WBP_ALetter},
{0x2044, 0x2044, WBP_MidNum},
{0x2054, 0x2054, WBP_ExtendNumLet},
{0x2060, 0x2064, WBP_Format},
- {0x2066, 0x206F, WBP_Format},
+ {0x206A, 0x206F, WBP_Format},
{0x2071, 0x2071, WBP_ALetter},
{0x207F, 0x207F, WBP_ALetter},
{0x2090, 0x209C, WBP_ALetter},
{0xA670, 0xA672, WBP_Extend},
{0xA674, 0xA67D, WBP_Extend},
{0xA67F, 0xA67F, WBP_ALetter},
- {0xA680, 0xA69B, WBP_ALetter},
- {0xA69C, 0xA69D, WBP_ALetter},
+ {0xA680, 0xA697, WBP_ALetter},
{0xA69F, 0xA69F, WBP_Extend},
{0xA6A0, 0xA6E5, WBP_ALetter},
{0xA6E6, 0xA6EF, WBP_ALetter},
{0xA771, 0xA787, WBP_ALetter},
{0xA788, 0xA788, WBP_ALetter},
{0xA78B, 0xA78E, WBP_ALetter},
- {0xA790, 0xA7AD, WBP_ALetter},
- {0xA7B0, 0xA7B1, WBP_ALetter},
- {0xA7F7, 0xA7F7, WBP_ALetter},
+ {0xA790, 0xA793, WBP_ALetter},
+ {0xA7A0, 0xA7AA, WBP_ALetter},
{0xA7F8, 0xA7F9, WBP_ALetter},
{0xA7FA, 0xA7FA, WBP_ALetter},
{0xA7FB, 0xA801, WBP_ALetter},
{0xA9BD, 0xA9C0, WBP_Extend},
{0xA9CF, 0xA9CF, WBP_ALetter},
{0xA9D0, 0xA9D9, WBP_Numeric},
- {0xA9E5, 0xA9E5, WBP_Extend},
- {0xA9F0, 0xA9F9, WBP_Numeric},
{0xAA00, 0xAA28, WBP_ALetter},
{0xAA29, 0xAA2E, WBP_Extend},
{0xAA2F, 0xAA30, WBP_Extend},
{0xAA4D, 0xAA4D, WBP_Extend},
{0xAA50, 0xAA59, WBP_Numeric},
{0xAA7B, 0xAA7B, WBP_Extend},
- {0xAA7C, 0xAA7C, WBP_Extend},
- {0xAA7D, 0xAA7D, WBP_Extend},
{0xAAB0, 0xAAB0, WBP_Extend},
{0xAAB2, 0xAAB4, WBP_Extend},
{0xAAB7, 0xAAB8, WBP_Extend},
{0xAB11, 0xAB16, WBP_ALetter},
{0xAB20, 0xAB26, WBP_ALetter},
{0xAB28, 0xAB2E, WBP_ALetter},
- {0xAB30, 0xAB5A, WBP_ALetter},
- {0xAB5C, 0xAB5F, WBP_ALetter},
- {0xAB64, 0xAB65, WBP_ALetter},
{0xABC0, 0xABE2, WBP_ALetter},
{0xABE3, 0xABE4, WBP_Extend},
{0xABE5, 0xABE5, WBP_Extend},
{0xD7CB, 0xD7FB, WBP_ALetter},
{0xFB00, 0xFB06, WBP_ALetter},
{0xFB13, 0xFB17, WBP_ALetter},
- {0xFB1D, 0xFB1D, WBP_Hebrew},
+ {0xFB1D, 0xFB1D, WBP_ALetter},
{0xFB1E, 0xFB1E, WBP_Extend},
- {0xFB1F, 0xFB28, WBP_Hebrew},
- {0xFB2A, 0xFB36, WBP_Hebrew},
- {0xFB38, 0xFB3C, WBP_Hebrew},
- {0xFB3E, 0xFB3E, WBP_Hebrew},
- {0xFB40, 0xFB41, WBP_Hebrew},
- {0xFB43, 0xFB44, WBP_Hebrew},
- {0xFB46, 0xFB4F, WBP_Hebrew},
- {0xFB50, 0xFBB1, WBP_ALetter},
+ {0xFB1F, 0xFB28, WBP_ALetter},
+ {0xFB2A, 0xFB36, WBP_ALetter},
+ {0xFB38, 0xFB3C, WBP_ALetter},
+ {0xFB3E, 0xFB3E, WBP_ALetter},
+ {0xFB40, 0xFB41, WBP_ALetter},
+ {0xFB43, 0xFB44, WBP_ALetter},
+ {0xFB46, 0xFBB1, WBP_ALetter},
{0xFBD3, 0xFD3D, WBP_ALetter},
{0xFD50, 0xFD8F, WBP_ALetter},
{0xFD92, 0xFDC7, WBP_ALetter},
{0xFE10, 0xFE10, WBP_MidNum},
{0xFE13, 0xFE13, WBP_MidLetter},
{0xFE14, 0xFE14, WBP_MidNum},
- {0xFE20, 0xFE2D, WBP_Extend},
+ {0xFE20, 0xFE26, WBP_Extend},
{0xFE33, 0xFE34, WBP_ExtendNumLet},
{0xFE4D, 0xFE4F, WBP_ExtendNumLet},
{0xFE50, 0xFE50, WBP_MidNum},
{0x101FD, 0x101FD, WBP_Extend},
{0x10280, 0x1029C, WBP_ALetter},
{0x102A0, 0x102D0, WBP_ALetter},
- {0x102E0, 0x102E0, WBP_Extend},
- {0x10300, 0x1031F, WBP_ALetter},
+ {0x10300, 0x1031E, WBP_ALetter},
{0x10330, 0x10340, WBP_ALetter},
{0x10341, 0x10341, WBP_ALetter},
{0x10342, 0x10349, WBP_ALetter},
{0x1034A, 0x1034A, WBP_ALetter},
- {0x10350, 0x10375, WBP_ALetter},
- {0x10376, 0x1037A, WBP_Extend},
{0x10380, 0x1039D, WBP_ALetter},
{0x103A0, 0x103C3, WBP_ALetter},
{0x103C8, 0x103CF, WBP_ALetter},
{0x10400, 0x1044F, WBP_ALetter},
{0x10450, 0x1049D, WBP_ALetter},
{0x104A0, 0x104A9, WBP_Numeric},
- {0x10500, 0x10527, WBP_ALetter},
- {0x10530, 0x10563, WBP_ALetter},
- {0x10600, 0x10736, WBP_ALetter},
- {0x10740, 0x10755, WBP_ALetter},
- {0x10760, 0x10767, WBP_ALetter},
{0x10800, 0x10805, WBP_ALetter},
{0x10808, 0x10808, WBP_ALetter},
{0x1080A, 0x10835, WBP_ALetter},
{0x10837, 0x10838, WBP_ALetter},
{0x1083C, 0x1083C, WBP_ALetter},
{0x1083F, 0x10855, WBP_ALetter},
- {0x10860, 0x10876, WBP_ALetter},
- {0x10880, 0x1089E, WBP_ALetter},
{0x10900, 0x10915, WBP_ALetter},
{0x10920, 0x10939, WBP_ALetter},
{0x10980, 0x109B7, WBP_ALetter},
{0x10A38, 0x10A3A, WBP_Extend},
{0x10A3F, 0x10A3F, WBP_Extend},
{0x10A60, 0x10A7C, WBP_ALetter},
- {0x10A80, 0x10A9C, WBP_ALetter},
- {0x10AC0, 0x10AC7, WBP_ALetter},
- {0x10AC9, 0x10AE4, WBP_ALetter},
- {0x10AE5, 0x10AE6, WBP_Extend},
{0x10B00, 0x10B35, WBP_ALetter},
{0x10B40, 0x10B55, WBP_ALetter},
{0x10B60, 0x10B72, WBP_ALetter},
- {0x10B80, 0x10B91, WBP_ALetter},
{0x10C00, 0x10C48, WBP_ALetter},
{0x11000, 0x11000, WBP_Extend},
{0x11001, 0x11001, WBP_Extend},
{0x11003, 0x11037, WBP_ALetter},
{0x11038, 0x11046, WBP_Extend},
{0x11066, 0x1106F, WBP_Numeric},
- {0x1107F, 0x11081, WBP_Extend},
+ {0x11080, 0x11081, WBP_Extend},
{0x11082, 0x11082, WBP_Extend},
{0x11083, 0x110AF, WBP_ALetter},
{0x110B0, 0x110B2, WBP_Extend},
{0x1112C, 0x1112C, WBP_Extend},
{0x1112D, 0x11134, WBP_Extend},
{0x11136, 0x1113F, WBP_Numeric},
- {0x11150, 0x11172, WBP_ALetter},
- {0x11173, 0x11173, WBP_Extend},
- {0x11176, 0x11176, WBP_ALetter},
{0x11180, 0x11181, WBP_Extend},
{0x11182, 0x11182, WBP_Extend},
{0x11183, 0x111B2, WBP_ALetter},
{0x111BF, 0x111C0, WBP_Extend},
{0x111C1, 0x111C4, WBP_ALetter},
{0x111D0, 0x111D9, WBP_Numeric},
- {0x111DA, 0x111DA, WBP_ALetter},
- {0x11200, 0x11211, WBP_ALetter},
- {0x11213, 0x1122B, WBP_ALetter},
- {0x1122C, 0x1122E, WBP_Extend},
- {0x1122F, 0x11231, WBP_Extend},
- {0x11232, 0x11233, WBP_Extend},
- {0x11234, 0x11234, WBP_Extend},
- {0x11235, 0x11235, WBP_Extend},
- {0x11236, 0x11237, WBP_Extend},
- {0x112B0, 0x112DE, WBP_ALetter},
- {0x112DF, 0x112DF, WBP_Extend},
- {0x112E0, 0x112E2, WBP_Extend},
- {0x112E3, 0x112EA, WBP_Extend},
- {0x112F0, 0x112F9, WBP_Numeric},
- {0x11301, 0x11301, WBP_Extend},
- {0x11302, 0x11303, WBP_Extend},
- {0x11305, 0x1130C, WBP_ALetter},
- {0x1130F, 0x11310, WBP_ALetter},
- {0x11313, 0x11328, WBP_ALetter},
- {0x1132A, 0x11330, WBP_ALetter},
- {0x11332, 0x11333, WBP_ALetter},
- {0x11335, 0x11339, WBP_ALetter},
- {0x1133C, 0x1133C, WBP_Extend},
- {0x1133D, 0x1133D, WBP_ALetter},
- {0x1133E, 0x1133F, WBP_Extend},
- {0x11340, 0x11340, WBP_Extend},
- {0x11341, 0x11344, WBP_Extend},
- {0x11347, 0x11348, WBP_Extend},
- {0x1134B, 0x1134D, WBP_Extend},
- {0x11357, 0x11357, WBP_Extend},
- {0x1135D, 0x11361, WBP_ALetter},
- {0x11362, 0x11363, WBP_Extend},
- {0x11366, 0x1136C, WBP_Extend},
- {0x11370, 0x11374, WBP_Extend},
- {0x11480, 0x114AF, WBP_ALetter},
- {0x114B0, 0x114B2, WBP_Extend},
- {0x114B3, 0x114B8, WBP_Extend},
- {0x114B9, 0x114B9, WBP_Extend},
- {0x114BA, 0x114BA, WBP_Extend},
- {0x114BB, 0x114BE, WBP_Extend},
- {0x114BF, 0x114C0, WBP_Extend},
- {0x114C1, 0x114C1, WBP_Extend},
- {0x114C2, 0x114C3, WBP_Extend},
- {0x114C4, 0x114C5, WBP_ALetter},
- {0x114C7, 0x114C7, WBP_ALetter},
- {0x114D0, 0x114D9, WBP_Numeric},
- {0x11580, 0x115AE, WBP_ALetter},
- {0x115AF, 0x115B1, WBP_Extend},
- {0x115B2, 0x115B5, WBP_Extend},
- {0x115B8, 0x115BB, WBP_Extend},
- {0x115BC, 0x115BD, WBP_Extend},
- {0x115BE, 0x115BE, WBP_Extend},
- {0x115BF, 0x115C0, WBP_Extend},
- {0x11600, 0x1162F, WBP_ALetter},
- {0x11630, 0x11632, WBP_Extend},
- {0x11633, 0x1163A, WBP_Extend},
- {0x1163B, 0x1163C, WBP_Extend},
- {0x1163D, 0x1163D, WBP_Extend},
- {0x1163E, 0x1163E, WBP_Extend},
- {0x1163F, 0x11640, WBP_Extend},
- {0x11644, 0x11644, WBP_ALetter},
- {0x11650, 0x11659, WBP_Numeric},
{0x11680, 0x116AA, WBP_ALetter},
{0x116AB, 0x116AB, WBP_Extend},
{0x116AC, 0x116AC, WBP_Extend},
{0x116B6, 0x116B6, WBP_Extend},
{0x116B7, 0x116B7, WBP_Extend},
{0x116C0, 0x116C9, WBP_Numeric},
- {0x118A0, 0x118DF, WBP_ALetter},
- {0x118E0, 0x118E9, WBP_Numeric},
- {0x118FF, 0x118FF, WBP_ALetter},
- {0x11AC0, 0x11AF8, WBP_ALetter},
- {0x12000, 0x12398, WBP_ALetter},
- {0x12400, 0x1246E, WBP_ALetter},
+ {0x12000, 0x1236E, WBP_ALetter},
+ {0x12400, 0x12462, WBP_ALetter},
{0x13000, 0x1342E, WBP_ALetter},
{0x16800, 0x16A38, WBP_ALetter},
- {0x16A40, 0x16A5E, WBP_ALetter},
- {0x16A60, 0x16A69, WBP_Numeric},
- {0x16AD0, 0x16AED, WBP_ALetter},
- {0x16AF0, 0x16AF4, WBP_Extend},
- {0x16B00, 0x16B2F, WBP_ALetter},
- {0x16B30, 0x16B36, WBP_Extend},
- {0x16B40, 0x16B43, WBP_ALetter},
- {0x16B50, 0x16B59, WBP_Numeric},
- {0x16B63, 0x16B77, WBP_ALetter},
- {0x16B7D, 0x16B8F, WBP_ALetter},
{0x16F00, 0x16F44, WBP_ALetter},
{0x16F50, 0x16F50, WBP_ALetter},
{0x16F51, 0x16F7E, WBP_Extend},
{0x16F8F, 0x16F92, WBP_Extend},
{0x16F93, 0x16F9F, WBP_ALetter},
{0x1B000, 0x1B000, WBP_Katakana},
- {0x1BC00, 0x1BC6A, WBP_ALetter},
- {0x1BC70, 0x1BC7C, WBP_ALetter},
- {0x1BC80, 0x1BC88, WBP_ALetter},
- {0x1BC90, 0x1BC99, WBP_ALetter},
- {0x1BC9D, 0x1BC9E, WBP_Extend},
- {0x1BCA0, 0x1BCA3, WBP_Format},
{0x1D165, 0x1D166, WBP_Extend},
{0x1D167, 0x1D169, WBP_Extend},
{0x1D16D, 0x1D172, WBP_Extend},
{0x1D7AA, 0x1D7C2, WBP_ALetter},
{0x1D7C4, 0x1D7CB, WBP_ALetter},
{0x1D7CE, 0x1D7FF, WBP_Numeric},
- {0x1E800, 0x1E8C4, WBP_ALetter},
- {0x1E8D0, 0x1E8D6, WBP_Extend},
{0x1EE00, 0x1EE03, WBP_ALetter},
{0x1EE05, 0x1EE1F, WBP_ALetter},
{0x1EE21, 0x1EE22, WBP_ALetter},
{0x1EEA1, 0x1EEA3, WBP_ALetter},
{0x1EEA5, 0x1EEA9, WBP_ALetter},
{0x1EEAB, 0x1EEBB, WBP_ALetter},
- {0x1F130, 0x1F149, WBP_ALetter},
- {0x1F150, 0x1F169, WBP_ALetter},
- {0x1F170, 0x1F189, WBP_ALetter},
{0x1F1E6, 0x1F1FF, WBP_Regional},
{0xE0001, 0xE0001, WBP_Format},
{0xE0020, 0xE007F, WBP_Format},