}
/* table of html escapes (that i can find) this should be ordered with the
- * most common first as it's a linear search to match - no hash for this.
+ * sorted by there escape strings and values as it's a binary search to match - no hash for this.
*
- * these are stored as one large string and one additional array that
- * contains the offsets to the tokens for space efficiency.
+ * these are stored as array of struct of Escape_Value structure (no Runtime sort will happen)
*/
+
+
+/**
+ * @internal
+ * @var escape_values_e_sorted[]
+ * This array consists of Escape_Value structure sorted by escape string
+ * And new added value must be placed sorted position, and reflected on escape_values_v_sorted
+ */
+typedef struct _Escape_Value Escape_Value;
+
+struct _Escape_Value
+{
+ char *escape;
+ char *value;
+ size_t escape_len;
+ size_t value_len;
+};
+
+#define ESCAPE_VALUE(e,v) {e,v,strlen(e),strlen(v)}
+
+static const Escape_Value escape_values_e_sorted[] = {
+ ESCAPE_VALUE("Á", "\xc3\x81"),
+ ESCAPE_VALUE("Â", "\xc3\x82"),
+ ESCAPE_VALUE("&Aelig;", "\xc3\x86"),
+ ESCAPE_VALUE("À", "\xc3\x80"),
+ ESCAPE_VALUE("Å", "\xc3\x85"),
+ ESCAPE_VALUE("Ã", "\xc3\x83"),
+ ESCAPE_VALUE("Ä", "\xc3\x84"),
+ ESCAPE_VALUE("Ç", "\xc3\x87"),
+ ESCAPE_VALUE("‡", "\xe2\x80\xa1"),
+ ESCAPE_VALUE("É", "\xc3\x89"),
+ ESCAPE_VALUE("Ê", "\xc3\x8a"),
+ ESCAPE_VALUE("È", "\xc3\x88"),
+ ESCAPE_VALUE("&Eth;", "\xc3\x90"),
+ ESCAPE_VALUE("Ë", "\xc3\x8b"),
+ ESCAPE_VALUE("Í", "\xc3\x8d"),
+ ESCAPE_VALUE("Î", "\xc3\x8e"),
+ ESCAPE_VALUE("Ì", "\xc3\x8c"),
+ ESCAPE_VALUE("Ï", "\xc3\x8f"),
+ ESCAPE_VALUE("Ñ", "\xc3\x91"),
+ ESCAPE_VALUE("Ó", "\xc3\x93"),
+ ESCAPE_VALUE("Ô", "\xc3\x94"),
+ ESCAPE_VALUE("Ò", "\xc3\x92"),
+ ESCAPE_VALUE("Ø", "\xc3\x98"),
+ ESCAPE_VALUE("Õ", "\xc3\x95"),
+ ESCAPE_VALUE("Ö", "\xc3\x96"),
+ ESCAPE_VALUE("&Thorn;", "\xc3\x9e"),
+ ESCAPE_VALUE("Ú", "\xc3\x9a"),
+ ESCAPE_VALUE("Û", "\xc3\x9b"),
+ ESCAPE_VALUE("Ù", "\xc3\x99"),
+ ESCAPE_VALUE("Ý", "\xc3\x9d"),
+ ESCAPE_VALUE("á", "\xc3\xa1"),
+ ESCAPE_VALUE("â", "\xc3\xa2"),
+ ESCAPE_VALUE("´", "\xc2\xb4"),
+ ESCAPE_VALUE("æ", "\xc3\xa6"),
+ ESCAPE_VALUE("à", "\xc3\xa0"),
+ ESCAPE_VALUE("α", "\xce\x91"),
+ ESCAPE_VALUE("∧", "\xe2\x88\xa7"),
+ ESCAPE_VALUE("å", "\xc3\xa5"),
+ ESCAPE_VALUE("ã", "\xc3\xa3"),
+ ESCAPE_VALUE("ä", "\xc3\xa4"),
+ ESCAPE_VALUE("β", "\xce\x92"),
+ ESCAPE_VALUE("¦", "\xc2\xa6"),
+ ESCAPE_VALUE("•", "\xe2\x80\xa2"),
+ ESCAPE_VALUE("ç", "\xc3\xa7"),
+ ESCAPE_VALUE("¸", "\xc2\xb8"),
+ ESCAPE_VALUE("¢", "\xc2\xa2"),
+ ESCAPE_VALUE("χ", "\xce\xa7"),
+ ESCAPE_VALUE("©", "\xc2\xa9"),
+ ESCAPE_VALUE("¤", "\xc2\xa4"),
+ ESCAPE_VALUE("†", "\xe2\x80\xa0"),
+ ESCAPE_VALUE("↓", "\xe2\x86\x93"),
+ ESCAPE_VALUE("°", "\xc2\xb0"),
+ ESCAPE_VALUE("δ", "\xce\x94"),
+ ESCAPE_VALUE("÷", "\xc3\xb7"),
+ ESCAPE_VALUE("é", "\xc3\xa9"),
+ ESCAPE_VALUE("ê", "\xc3\xaa"),
+ ESCAPE_VALUE("è", "\xc3\xa8"),
+ ESCAPE_VALUE("ε", "\xce\x95"),
+ ESCAPE_VALUE("≡", "\xe2\x89\xa1"),
+ ESCAPE_VALUE("η", "\xce\x97"),
+ ESCAPE_VALUE("ð", "\xc3\xb0"),
+ ESCAPE_VALUE("ë", "\xc3\xab"),
+ ESCAPE_VALUE("€", "\xe2\x82\xac"),
+ ESCAPE_VALUE("∃", "\xe2\x88\x83"),
+ ESCAPE_VALUE("∀", "\xe2\x88\x80"),
+ ESCAPE_VALUE("½", "\xc2\xbd"),
+ ESCAPE_VALUE("¼", "\xc2\xbc"),
+ ESCAPE_VALUE("¾", "\xc2\xbe"),
+ ESCAPE_VALUE("γ", "\xce\x93"),
+ ESCAPE_VALUE("↔", "\xe2\x86\x94"),
+ ESCAPE_VALUE("…", "\xe2\x80\xa6"),
+ ESCAPE_VALUE("í", "\xc3\xad"),
+ ESCAPE_VALUE("î", "\xc3\xae"),
+ ESCAPE_VALUE("¡", "\xc2\xa1"),
+ ESCAPE_VALUE("ì", "\xc3\xac"),
+ ESCAPE_VALUE("∫", "\xe2\x88\xab"),
+ ESCAPE_VALUE("ι", "\xce\x99"),
+ ESCAPE_VALUE("¿", "\xc2\xbf"),
+ ESCAPE_VALUE("ï", "\xc3\xaf"),
+ ESCAPE_VALUE("κ", "\xce\x9a"),
+ ESCAPE_VALUE("λ", "\xce\x9b"),
+ ESCAPE_VALUE("«", "\xc2\xab"),
+ ESCAPE_VALUE("←", "\xe2\x86\x90"),
+ ESCAPE_VALUE("←", "\xe2\x87\x90"),
+ ESCAPE_VALUE("‎", "\xe2\x80\x8e"),
+ ESCAPE_VALUE("¯", "\xc2\xaf"),
+ ESCAPE_VALUE("µ", "\xc2\xb5"),
+ ESCAPE_VALUE("·", "\xc2\xb7"),
+ ESCAPE_VALUE("μ", "\xce\x9c"),
+ ESCAPE_VALUE("∇", "\xe2\x88\x87"),
+ ESCAPE_VALUE(" ", "\xc2\xa0"),
+ ESCAPE_VALUE("≠", "\xe2\x89\xa0"),
+ ESCAPE_VALUE("¬", "\xc2\xac"),
+ ESCAPE_VALUE("ñ", "\xc3\xb1"),
+ ESCAPE_VALUE("ν", "\xce\x9d"),
+ ESCAPE_VALUE("ó", "\xc3\xb3"),
+ ESCAPE_VALUE("ô", "\xc3\xb4"),
+ ESCAPE_VALUE("ò", "\xc3\xb2"),
+ ESCAPE_VALUE("ω", "\xce\xa9"),
+ ESCAPE_VALUE("ο", "\xce\x9f"),
+ ESCAPE_VALUE("⊕", "\xe2\x8a\x95"),
+ ESCAPE_VALUE("∨", "\xe2\x88\xa8"),
+ ESCAPE_VALUE("ª", "\xc2\xaa"),
+ ESCAPE_VALUE("º", "\xc2\xba"),
+ ESCAPE_VALUE("ø", "\xc3\xb8"),
+ ESCAPE_VALUE("õ", "\xc3\xb5"),
+ ESCAPE_VALUE("ö", "\xc3\xb6"),
+ ESCAPE_VALUE("¶", "\xc2\xb6"),
+ ESCAPE_VALUE("⊥", "\xe2\x8a\xa5"),
+ ESCAPE_VALUE("φ", "\xce\xa6"),
+ ESCAPE_VALUE("π", "\xce\xa0"),
+ ESCAPE_VALUE("±", "\xc2\xb1"),
+ ESCAPE_VALUE("£", "\xc2\xa3"),
+ ESCAPE_VALUE("∏", "\xe2\x88\x8f"),
+ ESCAPE_VALUE("ψ", "\xce\xa8"),
+ ESCAPE_VALUE("»", "\xc2\xbb"),
+ ESCAPE_VALUE("→", "\xe2\x86\x92"),
+ ESCAPE_VALUE("→", "\xe2\x87\x92"),
+ ESCAPE_VALUE("®", "\xc2\xae"),
+ ESCAPE_VALUE("ρ", "\xce\xa1"),
+ ESCAPE_VALUE("‏", "\xe2\x80\x8f"),
+ ESCAPE_VALUE("§", "\xc2\xa7"),
+ ESCAPE_VALUE("­", "\xc2\xad"),
+ ESCAPE_VALUE("σ", "\xce\xa3"),
+ ESCAPE_VALUE("∑", "\xe2\x88\x91"),
+ ESCAPE_VALUE("¹", "\xc2\xb9"),
+ ESCAPE_VALUE("²", "\xc2\xb2"),
+ ESCAPE_VALUE("³", "\xc2\xb3"),
+ ESCAPE_VALUE("ß", "\xc3\x9f"),
+ ESCAPE_VALUE("τ", "\xce\xa4"),
+ ESCAPE_VALUE("θ", "\xce\x98"),
+ ESCAPE_VALUE("þ", "\xc3\xbe"),
+ ESCAPE_VALUE("×", "\xc3\x97"),
+ ESCAPE_VALUE("ú", "\xc3\xba"),
+ ESCAPE_VALUE("↑", "\xe2\x86\x91"),
+ ESCAPE_VALUE("û", "\xc3\xbb"),
+ ESCAPE_VALUE("ù", "\xc3\xb9"),
+ ESCAPE_VALUE("¨", "\xc2\xa8"),
+ ESCAPE_VALUE("υ", "\xce\xa5"),
+ ESCAPE_VALUE("ü", "\xc3\xbc"),
+ ESCAPE_VALUE("ξ", "\xce\x9e"),
+ ESCAPE_VALUE("ý", "\xc3\xbd"),
+ ESCAPE_VALUE("¥", "\xc2\xa5"),
+ ESCAPE_VALUE("ÿ", "\xc3\xbf"),
+ ESCAPE_VALUE("ζ", "\xce\x96"),
+ ESCAPE_VALUE("‍", "\xe2\x80\x8d"),
+ ESCAPE_VALUE("‌", "\xe2\x80\x8c"),
+};
+
+
/**
* @internal
- * @var escape_strings[]
- * This string consists of NULL terminated pairs of strings, the first of
- * every pair is an escape and the second is the value of the escape.
+ * @var escape_values_e_common_sorted[]
+ * same as escape_values_e_sorted with small subset of common escapes
*/
-static const char escape_strings[] =
-/* most common escaped stuff */
-""\0" "\x22\0"
-"&\0" "\x26\0"
-"'\0" "\x27\0"
-"<\0" "\x3c\0"
-">\0" "\x3e\0"
-/* all the rest */
-" \0" "\xc2\xa0\0"
-"¡\0" "\xc2\xa1\0"
-"¢\0" "\xc2\xa2\0"
-"£\0" "\xc2\xa3\0"
-"¤\0" "\xc2\xa4\0"
-"¥\0" "\xc2\xa5\0"
-"¦\0" "\xc2\xa6\0"
-"§\0" "\xc2\xa7\0"
-"¨\0" "\xc2\xa8\0"
-"©\0" "\xc2\xa9\0"
-"ª\0" "\xc2\xaa\0"
-"«\0" "\xc2\xab\0"
-"¬\0" "\xc2\xac\0"
-"­\0" "\xc2\xad\0"
-"®\0" "\xc2\xae\0"
-"¯\0" "\xc2\xaf\0"
-"°\0" "\xc2\xb0\0"
-"±\0" "\xc2\xb1\0"
-"²\0" "\xc2\xb2\0"
-"³\0" "\xc2\xb3\0"
-"´\0" "\xc2\xb4\0"
-"µ\0" "\xc2\xb5\0"
-"¶\0" "\xc2\xb6\0"
-"·\0" "\xc2\xb7\0"
-"¸\0" "\xc2\xb8\0"
-"¹\0" "\xc2\xb9\0"
-"º\0" "\xc2\xba\0"
-"»\0" "\xc2\xbb\0"
-"¼\0" "\xc2\xbc\0"
-"½\0" "\xc2\xbd\0"
-"¾\0" "\xc2\xbe\0"
-"¿\0" "\xc2\xbf\0"
-"À\0" "\xc3\x80\0"
-"Á\0" "\xc3\x81\0"
-"Â\0" "\xc3\x82\0"
-"Ã\0" "\xc3\x83\0"
-"Ä\0" "\xc3\x84\0"
-"Å\0" "\xc3\x85\0"
-"&Aelig;\0" "\xc3\x86\0"
-"Ç\0" "\xc3\x87\0"
-"È\0" "\xc3\x88\0"
-"É\0" "\xc3\x89\0"
-"Ê\0" "\xc3\x8a\0"
-"Ë\0" "\xc3\x8b\0"
-"Ì\0" "\xc3\x8c\0"
-"Í\0" "\xc3\x8d\0"
-"Î\0" "\xc3\x8e\0"
-"Ï\0" "\xc3\x8f\0"
-"&Eth;\0" "\xc3\x90\0"
-"Ñ\0" "\xc3\x91\0"
-"Ò\0" "\xc3\x92\0"
-"Ó\0" "\xc3\x93\0"
-"Ô\0" "\xc3\x94\0"
-"Õ\0" "\xc3\x95\0"
-"Ö\0" "\xc3\x96\0"
-"×\0" "\xc3\x97\0"
-"Ø\0" "\xc3\x98\0"
-"Ù\0" "\xc3\x99\0"
-"Ú\0" "\xc3\x9a\0"
-"Û\0" "\xc3\x9b\0"
-"Ý\0" "\xc3\x9d\0"
-"&Thorn;\0" "\xc3\x9e\0"
-"ß\0" "\xc3\x9f\0"
-"à\0" "\xc3\xa0\0"
-"á\0" "\xc3\xa1\0"
-"â\0" "\xc3\xa2\0"
-"ã\0" "\xc3\xa3\0"
-"ä\0" "\xc3\xa4\0"
-"å\0" "\xc3\xa5\0"
-"æ\0" "\xc3\xa6\0"
-"ç\0" "\xc3\xa7\0"
-"è\0" "\xc3\xa8\0"
-"é\0" "\xc3\xa9\0"
-"ê\0" "\xc3\xaa\0"
-"ë\0" "\xc3\xab\0"
-"ì\0" "\xc3\xac\0"
-"í\0" "\xc3\xad\0"
-"î\0" "\xc3\xae\0"
-"ï\0" "\xc3\xaf\0"
-"ð\0" "\xc3\xb0\0"
-"ñ\0" "\xc3\xb1\0"
-"ò\0" "\xc3\xb2\0"
-"ó\0" "\xc3\xb3\0"
-"ô\0" "\xc3\xb4\0"
-"õ\0" "\xc3\xb5\0"
-"ö\0" "\xc3\xb6\0"
-"÷\0" "\xc3\xb7\0"
-"ø\0" "\xc3\xb8\0"
-"ù\0" "\xc3\xb9\0"
-"ú\0" "\xc3\xba\0"
-"û\0" "\xc3\xbb\0"
-"ü\0" "\xc3\xbc\0"
-"ý\0" "\xc3\xbd\0"
-"þ\0" "\xc3\xbe\0"
-"ÿ\0" "\xc3\xbf\0"
-"α\0" "\xce\x91\0"
-"β\0" "\xce\x92\0"
-"γ\0" "\xce\x93\0"
-"δ\0" "\xce\x94\0"
-"ε\0" "\xce\x95\0"
-"ζ\0" "\xce\x96\0"
-"η\0" "\xce\x97\0"
-"θ\0" "\xce\x98\0"
-"ι\0" "\xce\x99\0"
-"κ\0" "\xce\x9a\0"
-"λ\0" "\xce\x9b\0"
-"μ\0" "\xce\x9c\0"
-"ν\0" "\xce\x9d\0"
-"ξ\0" "\xce\x9e\0"
-"ο\0" "\xce\x9f\0"
-"π\0" "\xce\xa0\0"
-"ρ\0" "\xce\xa1\0"
-"σ\0" "\xce\xa3\0"
-"τ\0" "\xce\xa4\0"
-"υ\0" "\xce\xa5\0"
-"φ\0" "\xce\xa6\0"
-"χ\0" "\xce\xa7\0"
-"ψ\0" "\xce\xa8\0"
-"ω\0" "\xce\xa9\0"
-"…\0" "\xe2\x80\xa6\0"
-"€\0" "\xe2\x82\xac\0"
-"←\0" "\xe2\x86\x90\0"
-"↑\0" "\xe2\x86\x91\0"
-"→\0" "\xe2\x86\x92\0"
-"↓\0" "\xe2\x86\x93\0"
-"↔\0" "\xe2\x86\x94\0"
-"←\0" "\xe2\x87\x90\0"
-"→\0" "\xe2\x87\x92\0"
-"∀\0" "\xe2\x88\x80\0"
-"∃\0" "\xe2\x88\x83\0"
-"∇\0" "\xe2\x88\x87\0"
-"∏\0" "\xe2\x88\x8f\0"
-"∑\0" "\xe2\x88\x91\0"
-"∧\0" "\xe2\x88\xa7\0"
-"∨\0" "\xe2\x88\xa8\0"
-"∫\0" "\xe2\x88\xab\0"
-"≠\0" "\xe2\x89\xa0\0"
-"≡\0" "\xe2\x89\xa1\0"
-"⊕\0" "\xe2\x8a\x95\0"
-"⊥\0" "\xe2\x8a\xa5\0"
-"†\0" "\xe2\x80\xa0\0"
-"‡\0" "\xe2\x80\xa1\0"
-"•\0" "\xe2\x80\xa2\0"
-"‌\0" "\xe2\x80\x8c\0"
-"‍\0" "\xe2\x80\x8d\0"
-"‎\0" "\xe2\x80\x8e\0"
-"‏\0" "\xe2\x80\x8f\0"
-;
+static const Escape_Value escape_values_e_common_sorted[] = {
+ ESCAPE_VALUE("&", "\x26"),
+ ESCAPE_VALUE("'", "\x27"),
+ ESCAPE_VALUE(">", "\x3e"),
+ ESCAPE_VALUE(""", "\x22"),
+ ESCAPE_VALUE("<", "\x3c"),
+};
+
+/**
+ * @internal
+ * @var escape_values_v_sorted[]
+ * This array consists of Escape_Value structure sorted by escape value
+ * And new added value must be placed sorted position, and reflected on escape_values_e_sorted
+ */
+static const Escape_Value escape_values_v_sorted[] = {
+ ESCAPE_VALUE(" ", "\xc2\xa0"),
+ ESCAPE_VALUE("¡", "\xc2\xa1"),
+ ESCAPE_VALUE("¢", "\xc2\xa2"),
+ ESCAPE_VALUE("£", "\xc2\xa3"),
+ ESCAPE_VALUE("¤", "\xc2\xa4"),
+ ESCAPE_VALUE("¥", "\xc2\xa5"),
+ ESCAPE_VALUE("¦", "\xc2\xa6"),
+ ESCAPE_VALUE("§", "\xc2\xa7"),
+ ESCAPE_VALUE("¨", "\xc2\xa8"),
+ ESCAPE_VALUE("©", "\xc2\xa9"),
+ ESCAPE_VALUE("ª", "\xc2\xaa"),
+ ESCAPE_VALUE("«", "\xc2\xab"),
+ ESCAPE_VALUE("¬", "\xc2\xac"),
+ ESCAPE_VALUE("­", "\xc2\xad"),
+ ESCAPE_VALUE("®", "\xc2\xae"),
+ ESCAPE_VALUE("¯", "\xc2\xaf"),
+ ESCAPE_VALUE("°", "\xc2\xb0"),
+ ESCAPE_VALUE("±", "\xc2\xb1"),
+ ESCAPE_VALUE("²", "\xc2\xb2"),
+ ESCAPE_VALUE("³", "\xc2\xb3"),
+ ESCAPE_VALUE("´", "\xc2\xb4"),
+ ESCAPE_VALUE("µ", "\xc2\xb5"),
+ ESCAPE_VALUE("¶", "\xc2\xb6"),
+ ESCAPE_VALUE("·", "\xc2\xb7"),
+ ESCAPE_VALUE("¸", "\xc2\xb8"),
+ ESCAPE_VALUE("¹", "\xc2\xb9"),
+ ESCAPE_VALUE("º", "\xc2\xba"),
+ ESCAPE_VALUE("»", "\xc2\xbb"),
+ ESCAPE_VALUE("¼", "\xc2\xbc"),
+ ESCAPE_VALUE("½", "\xc2\xbd"),
+ ESCAPE_VALUE("¾", "\xc2\xbe"),
+ ESCAPE_VALUE("¿", "\xc2\xbf"),
+ ESCAPE_VALUE("À", "\xc3\x80"),
+ ESCAPE_VALUE("Á", "\xc3\x81"),
+ ESCAPE_VALUE("Â", "\xc3\x82"),
+ ESCAPE_VALUE("Ã", "\xc3\x83"),
+ ESCAPE_VALUE("Ä", "\xc3\x84"),
+ ESCAPE_VALUE("Å", "\xc3\x85"),
+ ESCAPE_VALUE("&Aelig;", "\xc3\x86"),
+ ESCAPE_VALUE("Ç", "\xc3\x87"),
+ ESCAPE_VALUE("È", "\xc3\x88"),
+ ESCAPE_VALUE("É", "\xc3\x89"),
+ ESCAPE_VALUE("Ê", "\xc3\x8a"),
+ ESCAPE_VALUE("Ë", "\xc3\x8b"),
+ ESCAPE_VALUE("Ì", "\xc3\x8c"),
+ ESCAPE_VALUE("Í", "\xc3\x8d"),
+ ESCAPE_VALUE("Î", "\xc3\x8e"),
+ ESCAPE_VALUE("Ï", "\xc3\x8f"),
+ ESCAPE_VALUE("&Eth;", "\xc3\x90"),
+ ESCAPE_VALUE("Ñ", "\xc3\x91"),
+ ESCAPE_VALUE("Ò", "\xc3\x92"),
+ ESCAPE_VALUE("Ó", "\xc3\x93"),
+ ESCAPE_VALUE("Ô", "\xc3\x94"),
+ ESCAPE_VALUE("Õ", "\xc3\x95"),
+ ESCAPE_VALUE("Ö", "\xc3\x96"),
+ ESCAPE_VALUE("×", "\xc3\x97"),
+ ESCAPE_VALUE("Ø", "\xc3\x98"),
+ ESCAPE_VALUE("Ù", "\xc3\x99"),
+ ESCAPE_VALUE("Ú", "\xc3\x9a"),
+ ESCAPE_VALUE("Û", "\xc3\x9b"),
+ ESCAPE_VALUE("Ý", "\xc3\x9d"),
+ ESCAPE_VALUE("&Thorn;", "\xc3\x9e"),
+ ESCAPE_VALUE("ß", "\xc3\x9f"),
+ ESCAPE_VALUE("à", "\xc3\xa0"),
+ ESCAPE_VALUE("á", "\xc3\xa1"),
+ ESCAPE_VALUE("â", "\xc3\xa2"),
+ ESCAPE_VALUE("ã", "\xc3\xa3"),
+ ESCAPE_VALUE("ä", "\xc3\xa4"),
+ ESCAPE_VALUE("å", "\xc3\xa5"),
+ ESCAPE_VALUE("æ", "\xc3\xa6"),
+ ESCAPE_VALUE("ç", "\xc3\xa7"),
+ ESCAPE_VALUE("è", "\xc3\xa8"),
+ ESCAPE_VALUE("é", "\xc3\xa9"),
+ ESCAPE_VALUE("ê", "\xc3\xaa"),
+ ESCAPE_VALUE("ë", "\xc3\xab"),
+ ESCAPE_VALUE("ì", "\xc3\xac"),
+ ESCAPE_VALUE("í", "\xc3\xad"),
+ ESCAPE_VALUE("î", "\xc3\xae"),
+ ESCAPE_VALUE("ï", "\xc3\xaf"),
+ ESCAPE_VALUE("ð", "\xc3\xb0"),
+ ESCAPE_VALUE("ñ", "\xc3\xb1"),
+ ESCAPE_VALUE("ò", "\xc3\xb2"),
+ ESCAPE_VALUE("ó", "\xc3\xb3"),
+ ESCAPE_VALUE("ô", "\xc3\xb4"),
+ ESCAPE_VALUE("õ", "\xc3\xb5"),
+ ESCAPE_VALUE("ö", "\xc3\xb6"),
+ ESCAPE_VALUE("÷", "\xc3\xb7"),
+ ESCAPE_VALUE("ø", "\xc3\xb8"),
+ ESCAPE_VALUE("ù", "\xc3\xb9"),
+ ESCAPE_VALUE("ú", "\xc3\xba"),
+ ESCAPE_VALUE("û", "\xc3\xbb"),
+ ESCAPE_VALUE("ü", "\xc3\xbc"),
+ ESCAPE_VALUE("ý", "\xc3\xbd"),
+ ESCAPE_VALUE("þ", "\xc3\xbe"),
+ ESCAPE_VALUE("ÿ", "\xc3\xbf"),
+ ESCAPE_VALUE("α", "\xce\x91"),
+ ESCAPE_VALUE("β", "\xce\x92"),
+ ESCAPE_VALUE("γ", "\xce\x93"),
+ ESCAPE_VALUE("δ", "\xce\x94"),
+ ESCAPE_VALUE("ε", "\xce\x95"),
+ ESCAPE_VALUE("ζ", "\xce\x96"),
+ ESCAPE_VALUE("η", "\xce\x97"),
+ ESCAPE_VALUE("θ", "\xce\x98"),
+ ESCAPE_VALUE("ι", "\xce\x99"),
+ ESCAPE_VALUE("κ", "\xce\x9a"),
+ ESCAPE_VALUE("λ", "\xce\x9b"),
+ ESCAPE_VALUE("μ", "\xce\x9c"),
+ ESCAPE_VALUE("ν", "\xce\x9d"),
+ ESCAPE_VALUE("ξ", "\xce\x9e"),
+ ESCAPE_VALUE("ο", "\xce\x9f"),
+ ESCAPE_VALUE("π", "\xce\xa0"),
+ ESCAPE_VALUE("ρ", "\xce\xa1"),
+ ESCAPE_VALUE("σ", "\xce\xa3"),
+ ESCAPE_VALUE("τ", "\xce\xa4"),
+ ESCAPE_VALUE("υ", "\xce\xa5"),
+ ESCAPE_VALUE("φ", "\xce\xa6"),
+ ESCAPE_VALUE("χ", "\xce\xa7"),
+ ESCAPE_VALUE("ψ", "\xce\xa8"),
+ ESCAPE_VALUE("ω", "\xce\xa9"),
+ ESCAPE_VALUE("‌", "\xe2\x80\x8c"),
+ ESCAPE_VALUE("‍", "\xe2\x80\x8d"),
+ ESCAPE_VALUE("‎", "\xe2\x80\x8e"),
+ ESCAPE_VALUE("‏", "\xe2\x80\x8f"),
+ ESCAPE_VALUE("†", "\xe2\x80\xa0"),
+ ESCAPE_VALUE("‡", "\xe2\x80\xa1"),
+ ESCAPE_VALUE("•", "\xe2\x80\xa2"),
+ ESCAPE_VALUE("…", "\xe2\x80\xa6"),
+ ESCAPE_VALUE("€", "\xe2\x82\xac"),
+ ESCAPE_VALUE("←", "\xe2\x86\x90"),
+ ESCAPE_VALUE("↑", "\xe2\x86\x91"),
+ ESCAPE_VALUE("→", "\xe2\x86\x92"),
+ ESCAPE_VALUE("↓", "\xe2\x86\x93"),
+ ESCAPE_VALUE("↔", "\xe2\x86\x94"),
+ ESCAPE_VALUE("←", "\xe2\x87\x90"),
+ ESCAPE_VALUE("→", "\xe2\x87\x92"),
+ ESCAPE_VALUE("∀", "\xe2\x88\x80"),
+ ESCAPE_VALUE("∃", "\xe2\x88\x83"),
+ ESCAPE_VALUE("∇", "\xe2\x88\x87"),
+ ESCAPE_VALUE("∏", "\xe2\x88\x8f"),
+ ESCAPE_VALUE("∑", "\xe2\x88\x91"),
+ ESCAPE_VALUE("∧", "\xe2\x88\xa7"),
+ ESCAPE_VALUE("∨", "\xe2\x88\xa8"),
+ ESCAPE_VALUE("∫", "\xe2\x88\xab"),
+ ESCAPE_VALUE("≠", "\xe2\x89\xa0"),
+ ESCAPE_VALUE("≡", "\xe2\x89\xa1"),
+ ESCAPE_VALUE("⊕", "\xe2\x8a\x95"),
+ ESCAPE_VALUE("⊥", "\xe2\x8a\xa5"),
+};
+
+/**
+ * @internal
+ * @var escape_values_v_common_sorted[]
+ * same as escape_values_v_sorted with small subset of common escapes
+ */
+static const Escape_Value escape_values_v_common_sorted[] = {
+ ESCAPE_VALUE(""", "\x22"),
+ ESCAPE_VALUE("&", "\x26"),
+ ESCAPE_VALUE("'", "\x27"),
+ ESCAPE_VALUE("<", "\x3c"),
+ ESCAPE_VALUE(">", "\x3e"),
+};
+
+
/**
* @internal
return ((s == s_end) && reached_end);
}
+
+/**
+ * @internal
+ *
+ * @param s the escape string to search for its index
+ * @param s_len length of s string
+ * @param escape_values array of Escape_Value to look inside, Sorted by Escape
+ * @param escape_values_len is the len of Escape_Value array
+ */
+int _escaped_string_search(const char * s, size_t s_len, const Escape_Value escape_values[], const size_t escape_values_len)
+{
+ int l = 0;
+ int r = escape_values_len - 1;
+ while (l <= r)
+ {
+ int m = (l + r) / 2;
+ int res = strncmp(s, escape_values[m].escape, MAX(escape_values[m].escape_len, s_len));
+ if (res == 0)
+ {
+ //Handle special case when s_len is less than escape_len
+ //then we will continue searching
+ //example (">",1,....)
+ if (escape_values[m].escape_len > s_len)
+ res = -1;
+ else if (escape_values[m].escape_len < s_len)
+ res = 1;
+ else return m;
+ }
+ if (res > 0)
+ l = m + 1;
+ else
+ r = m - 1;
+ }
+ return -1;
+}
+
+/**
+ * @internal
+ *
+ * @param s the value string to search for its index
+ * @param escape_values array of Escape_Value to look inside, Sorted by Value
+ * @param escape_values_len is the len of Escape_Value array
+ */
+int _escaped_value_search(const char * s, const Escape_Value escape_values[], const size_t escape_values_len)
+{
+ int l = 0;
+ int r = escape_values_len - 1;
+ while (l <= r)
+ {
+ int m = (l + r) / 2;
+ int res = strncmp(s, escape_values[m].value, escape_values[m].value_len);
+ if (res == 0)
+ return m;
+ if (res > 0)
+ l = m + 1;
+ else
+ r = m - 1;
+ }
+ return -1;
+}
+
+
/**
* @internal
*
* @param s the string to match
*/
+
static inline const char *
_escaped_char_match(const char *s, int *adv)
{
- const char *map_itr, *map_end, *mc, *sc;
-
- map_itr = escape_strings;
- map_end = map_itr + sizeof(escape_strings);
-
- while (map_itr < map_end)
+ static const size_t escape_common_size = sizeof(escape_values_v_common_sorted) / sizeof(Escape_Value);
+ int n_ret = _escaped_value_search(s, escape_values_v_common_sorted, escape_common_size);
+ if (n_ret != -1)
{
- const char *escape;
- int match;
-
- escape = map_itr;
- _escaped_advance_after_end_of_string(&map_itr);
- if (map_itr >= map_end) break;
-
- mc = map_itr;
- sc = s;
- match = 1;
- while ((*mc) && (*sc))
- {
- if ((unsigned char)*sc < (unsigned char)*mc) return NULL;
- if (*sc != *mc)
- {
- match = 0;
- break;
- }
- mc++;
- sc++;
- }
- if (match)
+ *adv = (int) escape_values_v_common_sorted[n_ret].value_len;
+ return escape_values_v_common_sorted[n_ret].escape;
+ }
+ else
+ {
+ static const size_t escape_size = sizeof(escape_values_v_sorted) / sizeof(Escape_Value);
+ n_ret = _escaped_value_search(s, escape_values_v_sorted, escape_size);
+ if (n_ret != -1)
{
- *adv = mc - map_itr;
- return escape;
+ *adv = (int)escape_values_v_sorted[n_ret].value_len;
+ return escape_values_v_sorted[n_ret].escape;
}
- _escaped_advance_after_end_of_string(&map_itr);
}
return NULL;
}
}
else
{
- const char *map_itr, *map_end;
-
- map_itr = escape_strings;
- map_end = map_itr + sizeof(escape_strings);
-
- while (map_itr < map_end)
+ static const size_t escape_common_size = sizeof(escape_values_e_common_sorted) / sizeof(Escape_Value);
+ int n_ret = _escaped_string_search(s, s_end-s, escape_values_e_common_sorted, escape_common_size);
+ if (n_ret != -1)
+ {
+ return escape_values_e_common_sorted[n_ret].value;
+ }
+ else
{
- if (_escaped_is_eq_and_advance(s, s_end, &map_itr, map_end))
- return map_itr;
- if (map_itr < map_end)
- _escaped_advance_after_end_of_string(&map_itr);
+ static const size_t escape_size = sizeof(escape_values_e_sorted) / sizeof(Escape_Value);
+ n_ret = _escaped_string_search(s, s_end-s, escape_values_e_sorted, escape_size);
+ if (n_ret != -1)
+ return escape_values_e_sorted[n_ret].value;
}
}