--- /dev/null
+# -*- coding: utf-8 -*-
+
+"""
+pythoncompat
+"""
+
+
+import sys
+
+# -------
+# Pythons
+# -------
+
+# Syntax sugar.
+_ver = sys.version_info
+
+#: Python 2.x?
+is_py2 = (_ver[0] == 2)
+
+#: Python 3.x?
+is_py3 = (_ver[0] == 3)
+
+#: Python 3.0.x
+is_py30 = (is_py3 and _ver[1] == 0)
+
+#: Python 3.1.x
+is_py31 = (is_py3 and _ver[1] == 1)
+
+#: Python 3.2.x
+is_py32 = (is_py3 and _ver[1] == 2)
+
+#: Python 3.3.x
+is_py33 = (is_py3 and _ver[1] == 3)
+
+#: Python 3.4.x
+is_py34 = (is_py3 and _ver[1] == 4)
+
+#: Python 2.7.x
+is_py27 = (is_py2 and _ver[1] == 7)
+
+#: Python 2.6.x
+is_py26 = (is_py2 and _ver[1] == 6)
+
+#: Python 2.5.x
+is_py25 = (is_py2 and _ver[1] == 5)
+
+#: Python 2.4.x
+is_py24 = (is_py2 and _ver[1] == 4) # I'm assuming this is not by choice.
+
+
+# ---------
+# Platforms
+# ---------
+
+
+# Syntax sugar.
+_ver = sys.version.lower()
+
+is_pypy = ('pypy' in _ver)
+is_jython = ('jython' in _ver)
+is_ironpython = ('iron' in _ver)
+
+# Assume CPython, if nothing else.
+is_cpython = not any((is_pypy, is_jython, is_ironpython))
+
+# Windows-based system.
+is_windows = 'win32' in str(sys.platform).lower()
+
+# Standard Linux 2+ system.
+is_linux = ('linux' in str(sys.platform).lower())
+is_osx = ('darwin' in str(sys.platform).lower())
+is_hpux = ('hpux' in str(sys.platform).lower()) # Complete guess.
+is_solaris = ('solar==' in str(sys.platform).lower()) # Complete guess.
+
+try:
+ import simplejson as json
+except ImportError:
+ import json
+
+# ---------
+# Specifics
+# ---------
+
+try:
+ import cchardet as chardet
+except ImportError:
+ from .packages import chardet
+
+
+if is_py2:
+ from urllib import quote, unquote, quote_plus, unquote_plus, urlencode
+ from urlparse import urlparse, urlunparse, urljoin, urlsplit, urldefrag
+ from urllib2 import parse_http_list
+ import cookielib
+ from Cookie import Morsel
+ from StringIO import StringIO
+ from .packages.urllib3.packages.ordered_dict import OrderedDict
+
+ builtin_str = str
+ bytes = str
+ str = unicode
+ basestring = basestring
+ numeric_types = (int, long, float)
+
+
+
+elif is_py3:
+ from urllib.parse import urlparse, urlunparse, urljoin, urlsplit, urlencode, quote, unquote, quote_plus, unquote_plus, urldefrag
+ from urllib.request import parse_http_list
+ from http import cookiejar as cookielib
+ from http.cookies import Morsel
+ from io import StringIO
+<<<<<<< HEAD
+=======
+ from .packages import chardet
+>>>>>>> 8f86ff633a2646442f2d8976046b5c0025e772c1
+ from collections import OrderedDict
+
+ builtin_str = str
+ str = str
+ bytes = bytes
+ basestring = (str, bytes)
+ numeric_types = (int, float)
-######################## BEGIN LICENSE BLOCK ########################
-# This library is free software; you can redistribute it and/or
-# modify it under the terms of the GNU Lesser General Public
-# License as published by the Free Software Foundation; either
-# version 2.1 of the License, or (at your option) any later version.
-#
-# This library is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-# Lesser General Public License for more details.
-#
-# You should have received a copy of the GNU Lesser General Public
-# License along with this library; if not, write to the Free Software
-# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
-# 02110-1301 USA
-######################### END LICENSE BLOCK #########################
-
-__version__ = "1.1"
-
-
-def detect(aBuf):
- from . import universaldetector
- u = universaldetector.UniversalDetector()
- u.reset()
- u.feed(aBuf)
- u.close()
- return u.result
+######################## BEGIN LICENSE BLOCK ########################\r
+# This library is free software; you can redistribute it and/or\r
+# modify it under the terms of the GNU Lesser General Public\r
+# License as published by the Free Software Foundation; either\r
+# version 2.1 of the License, or (at your option) any later version.\r
+# \r
+# This library is distributed in the hope that it will be useful,\r
+# but WITHOUT ANY WARRANTY; without even the implied warranty of\r
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU\r
+# Lesser General Public License for more details.\r
+# \r
+# You should have received a copy of the GNU Lesser General Public\r
+# License along with this library; if not, write to the Free Software\r
+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA\r
+# 02110-1301 USA\r
+######################### END LICENSE BLOCK #########################\r
+\r
+__version__ = "1.0.0"\r
+\r
+def detect(aBuf):\r
+ from . import universaldetector\r
+ u = universaldetector.UniversalDetector()\r
+ u.reset()\r
+ u.feed(aBuf)\r
+ u.close()\r
+ return u.result\r
######################## BEGIN LICENSE BLOCK ########################
# The Original Code is Mozilla Communicator client code.
-#
+#
# The Initial Developer of the Original Code is
# Netscape Communications Corporation.
# Portions created by the Initial Developer are Copyright (C) 1998
# the Initial Developer. All Rights Reserved.
-#
+#
# Contributor(s):
# Mark Pilgrim - port to Python
#
# modify it under the terms of the GNU Lesser General Public
# License as published by the Free Software Foundation; either
# version 2.1 of the License, or (at your option) any later version.
-#
+#
# This library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Lesser General Public License for more details.
-#
+#
# You should have received a copy of the GNU Lesser General Public
# License along with this library; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
######################### END LICENSE BLOCK #########################
# Big5 frequency table
-# by Taiwan's Mandarin Promotion Council
+# by Taiwan's Mandarin Promotion Council
# <http://www.edu.tw:81/mandr/>
-#
+#
# 128 --> 0.42261
# 256 --> 0.57851
# 512 --> 0.74851
# 1024 --> 0.89384
# 2048 --> 0.97583
-#
+#
# Ideal Distribution Ratio = 0.74851/(1-0.74851) =2.98
# Random Distribution Ration = 512/(5401-512)=0.105
-#
+#
# Typical Distribution Ratio about 25% of Ideal one, still much higher than RDR
BIG5_TYPICAL_DISTRIBUTION_RATIO = 0.75
#Char to FreqOrder table
BIG5_TABLE_SIZE = 5376
-Big5CharToFreqOrder = ( \
+Big5CharToFreqOrder = (
1,1801,1506, 255,1431, 198, 9, 82, 6,5008, 177, 202,3681,1256,2821, 110, # 16
3814, 33,3274, 261, 76, 44,2114, 16,2946,2187,1176, 659,3971, 26,3451,2653, # 32
1198,3972,3350,4202, 410,2215, 302, 590, 361,1964, 8, 204, 58,4510,5009,1932, # 48
13936,13937,13938,13939,13940,13941,13942,13943,13944,13945,13946,13947,13948,13949,13950,13951, #13952
13952,13953,13954,13955,13956,13957,13958,13959,13960,13961,13962,13963,13964,13965,13966,13967, #13968
13968,13969,13970,13971,13972) #13973
+
+# flake8: noqa
-######################## BEGIN LICENSE BLOCK ########################
-# The Original Code is Mozilla Communicator client code.
-#
-# The Initial Developer of the Original Code is
-# Netscape Communications Corporation.
-# Portions created by the Initial Developer are Copyright (C) 1998
-# the Initial Developer. All Rights Reserved.
-#
-# Contributor(s):
-# Mark Pilgrim - port to Python
-#
-# This library is free software; you can redistribute it and/or
-# modify it under the terms of the GNU Lesser General Public
-# License as published by the Free Software Foundation; either
-# version 2.1 of the License, or (at your option) any later version.
-#
-# This library is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-# Lesser General Public License for more details.
-#
-# You should have received a copy of the GNU Lesser General Public
-# License along with this library; if not, write to the Free Software
-# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
-# 02110-1301 USA
-######################### END LICENSE BLOCK #########################
-
-from .mbcharsetprober import MultiByteCharSetProber
-from .codingstatemachine import CodingStateMachine
-from .chardistribution import Big5DistributionAnalysis
-from .mbcssm import Big5SMModel
-
-
-class Big5Prober(MultiByteCharSetProber):
- def __init__(self):
- MultiByteCharSetProber.__init__(self)
- self._mCodingSM = CodingStateMachine(Big5SMModel)
- self._mDistributionAnalyzer = Big5DistributionAnalysis()
- self.reset()
-
- def get_charset_name(self):
- return "Big5"
+######################## BEGIN LICENSE BLOCK ########################\r
+# The Original Code is Mozilla Communicator client code.\r
+#\r
+# The Initial Developer of the Original Code is\r
+# Netscape Communications Corporation.\r
+# Portions created by the Initial Developer are Copyright (C) 1998\r
+# the Initial Developer. All Rights Reserved.\r
+#\r
+# Contributor(s):\r
+# Mark Pilgrim - port to Python\r
+#\r
+# This library is free software; you can redistribute it and/or\r
+# modify it under the terms of the GNU Lesser General Public\r
+# License as published by the Free Software Foundation; either\r
+# version 2.1 of the License, or (at your option) any later version.\r
+#\r
+# This library is distributed in the hope that it will be useful,\r
+# but WITHOUT ANY WARRANTY; without even the implied warranty of\r
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU\r
+# Lesser General Public License for more details.\r
+#\r
+# You should have received a copy of the GNU Lesser General Public\r
+# License along with this library; if not, write to the Free Software\r
+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA\r
+# 02110-1301 USA\r
+######################### END LICENSE BLOCK #########################\r
+\r
+from .mbcharsetprober import MultiByteCharSetProber\r
+from .codingstatemachine import CodingStateMachine\r
+from .chardistribution import Big5DistributionAnalysis\r
+from .mbcssm import Big5SMModel\r
+\r
+\r
+class Big5Prober(MultiByteCharSetProber):\r
+ def __init__(self):\r
+ MultiByteCharSetProber.__init__(self)\r
+ self._mCodingSM = CodingStateMachine(Big5SMModel)\r
+ self._mDistributionAnalyzer = Big5DistributionAnalysis()\r
+ self.reset()\r
+\r
+ def get_charset_name(self):\r
+ return "Big5"\r
-######################## BEGIN LICENSE BLOCK ########################
-# The Original Code is Mozilla Communicator client code.
-#
-# The Initial Developer of the Original Code is
-# Netscape Communications Corporation.
-# Portions created by the Initial Developer are Copyright (C) 1998
-# the Initial Developer. All Rights Reserved.
-#
-# Contributor(s):
-# Mark Pilgrim - port to Python
-#
-# This library is free software; you can redistribute it and/or
-# modify it under the terms of the GNU Lesser General Public
-# License as published by the Free Software Foundation; either
-# version 2.1 of the License, or (at your option) any later version.
-#
-# This library is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-# Lesser General Public License for more details.
-#
-# You should have received a copy of the GNU Lesser General Public
-# License along with this library; if not, write to the Free Software
-# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
-# 02110-1301 USA
-######################### END LICENSE BLOCK #########################
-
-from .euctwfreq import (EUCTWCharToFreqOrder, EUCTW_TABLE_SIZE,
- EUCTW_TYPICAL_DISTRIBUTION_RATIO)
-from .euckrfreq import (EUCKRCharToFreqOrder, EUCKR_TABLE_SIZE,
- EUCKR_TYPICAL_DISTRIBUTION_RATIO)
-from .gb2312freq import (GB2312CharToFreqOrder, GB2312_TABLE_SIZE,
- GB2312_TYPICAL_DISTRIBUTION_RATIO)
-from .big5freq import (Big5CharToFreqOrder, BIG5_TABLE_SIZE,
- BIG5_TYPICAL_DISTRIBUTION_RATIO)
-from .jisfreq import (JISCharToFreqOrder, JIS_TABLE_SIZE,
- JIS_TYPICAL_DISTRIBUTION_RATIO)
-from .compat import wrap_ord
-
-ENOUGH_DATA_THRESHOLD = 1024
-SURE_YES = 0.99
-SURE_NO = 0.01
-
-
-class CharDistributionAnalysis:
- def __init__(self):
- # Mapping table to get frequency order from char order (get from
- # GetOrder())
- self._mCharToFreqOrder = None
- # Size of above table
- self._mTableSize = None
- # This is a constant value which varies from language to language,
- # used in calculating confidence. See
- # http://www.mozilla.org/projects/intl/UniversalCharsetDetection.html
- # for further detail.
- self._mTypicalDistributionRatio = None
- self.reset()
-
- def reset(self):
- """reset analyser, clear any state"""
- # If this flag is set to constants.True, detection is done and
- # conclusion has been made
- self._mDone = False
- # Total characters encountered
- self._mTotalChars = 0
- # The number of characters whose frequency order is less than 512
- self._mFreqChars = 0
-
- def feed(self, aStr, aCharLen):
- """feed a character with known length"""
- if aCharLen == 2:
- # we only care about 2-bytes character in our distribution analysis
- order = self.get_order(aStr)
- else:
- order = -1
- if order >= 0:
- self._mTotalChars += 1
- # order is valid
- if order < self._mTableSize:
- if 512 > self._mCharToFreqOrder[order]:
- self._mFreqChars += 1
-
- def get_confidence(self):
- """return confidence based on existing data"""
- # if we didn't receive any character in our consideration range, return
- # negative answer
- if self._mTotalChars <= 0:
- return SURE_NO
-
- if self._mTotalChars != self._mFreqChars:
- r = self._mFreqChars / ((self._mTotalChars - self._mFreqChars)
- * self._mTypicalDistributionRatio)
- if r < SURE_YES:
- return r
-
- # normalize confidence (we don't want to be 100% sure)
- return SURE_YES
-
- def got_enough_data(self):
- # It is not necessary to receive all data to draw conclusion.
- # For charset detection, certain amount of data is enough
- return self._mTotalChars > ENOUGH_DATA_THRESHOLD
-
- def get_order(self, aStr):
- # We do not handle characters based on the original encoding string,
- # but convert this encoding string to a number, here called order.
- # This allows multiple encodings of a language to share one frequency
- # table.
- return -1
-
-
-class EUCTWDistributionAnalysis(CharDistributionAnalysis):
- def __init__(self):
- CharDistributionAnalysis.__init__(self)
- self._mCharToFreqOrder = EUCTWCharToFreqOrder
- self._mTableSize = EUCTW_TABLE_SIZE
- self._mTypicalDistributionRatio = EUCTW_TYPICAL_DISTRIBUTION_RATIO
-
- def get_order(self, aStr):
- # for euc-TW encoding, we are interested
- # first byte range: 0xc4 -- 0xfe
- # second byte range: 0xa1 -- 0xfe
- # no validation needed here. State machine has done that
- if aStr[0] >= '\xC4':
- return 94 * (wrap_ord(aStr[0]) - 0xC4) + wrap_ord(aStr[1]) - 0xA1
- else:
- return -1
-
-
-class EUCKRDistributionAnalysis(CharDistributionAnalysis):
- def __init__(self):
- CharDistributionAnalysis.__init__(self)
- self._mCharToFreqOrder = EUCKRCharToFreqOrder
- self._mTableSize = EUCKR_TABLE_SIZE
- self._mTypicalDistributionRatio = EUCKR_TYPICAL_DISTRIBUTION_RATIO
-
- def get_order(self, aStr):
- # for euc-KR encoding, we are interested
- # first byte range: 0xb0 -- 0xfe
- # second byte range: 0xa1 -- 0xfe
- # no validation needed here. State machine has done that
- if aStr[0] >= '\xB0':
- return 94 * (wrap_ord(aStr[0]) - 0xB0) + wrap_ord(aStr[1]) - 0xA1
- else:
- return -1
-
-
-class GB2312DistributionAnalysis(CharDistributionAnalysis):
- def __init__(self):
- CharDistributionAnalysis.__init__(self)
- self._mCharToFreqOrder = GB2312CharToFreqOrder
- self._mTableSize = GB2312_TABLE_SIZE
- self._mTypicalDistributionRatio = GB2312_TYPICAL_DISTRIBUTION_RATIO
-
- def get_order(self, aStr):
- # for GB2312 encoding, we are interested
- # first byte range: 0xb0 -- 0xfe
- # second byte range: 0xa1 -- 0xfe
- # no validation needed here. State machine has done that
- if (aStr[0] >= '\xB0') and (aStr[1] >= '\xA1'):
- return 94 * (wrap_ord(aStr[0]) - 0xB0) + wrap_ord(aStr[1]) - 0xA1
- else:
- return -1
-
-
-class Big5DistributionAnalysis(CharDistributionAnalysis):
- def __init__(self):
- CharDistributionAnalysis.__init__(self)
- self._mCharToFreqOrder = Big5CharToFreqOrder
- self._mTableSize = BIG5_TABLE_SIZE
- self._mTypicalDistributionRatio = BIG5_TYPICAL_DISTRIBUTION_RATIO
-
- def get_order(self, aStr):
- # for big5 encoding, we are interested
- # first byte range: 0xa4 -- 0xfe
- # second byte range: 0x40 -- 0x7e , 0xa1 -- 0xfe
- # no validation needed here. State machine has done that
- if aStr[0] >= '\xA4':
- if aStr[1] >= '\xA1':
- return (157 * (wrap_ord(aStr[0]) - 0xA4) + wrap_ord(aStr[1])
- - 0xA1 + 63)
- else:
- return (157 * (wrap_ord(aStr[0]) - 0xA4) + wrap_ord(aStr[1])
- - 0x40)
- else:
- return -1
-
-
-class SJISDistributionAnalysis(CharDistributionAnalysis):
- def __init__(self):
- CharDistributionAnalysis.__init__(self)
- self._mCharToFreqOrder = JISCharToFreqOrder
- self._mTableSize = JIS_TABLE_SIZE
- self._mTypicalDistributionRatio = JIS_TYPICAL_DISTRIBUTION_RATIO
-
- def get_order(self, aStr):
- # for sjis encoding, we are interested
- # first byte range: 0x81 -- 0x9f , 0xe0 -- 0xfe
- # second byte range: 0x40 -- 0x7e, 0x81 -- oxfe
- # no validation needed here. State machine has done that
- if (aStr[0] >= '\x81') and (aStr[0] <= '\x9F'):
- order = 188 * (wrap_ord(aStr[0]) - 0x81)
- elif (aStr[0] >= '\xE0') and (aStr[0] <= '\xEF'):
- order = 188 * (wrap_ord(aStr[0]) - 0xE0 + 31)
- else:
- return -1
- order = order + wrap_ord(aStr[1]) - 0x40
- if aStr[1] > '\x7F':
- order = -1
- return order
-
-
-class EUCJPDistributionAnalysis(CharDistributionAnalysis):
- def __init__(self):
- CharDistributionAnalysis.__init__(self)
- self._mCharToFreqOrder = JISCharToFreqOrder
- self._mTableSize = JIS_TABLE_SIZE
- self._mTypicalDistributionRatio = JIS_TYPICAL_DISTRIBUTION_RATIO
-
- def get_order(self, aStr):
- # for euc-JP encoding, we are interested
- # first byte range: 0xa0 -- 0xfe
- # second byte range: 0xa1 -- 0xfe
- # no validation needed here. State machine has done that
- if aStr[0] >= '\xA0':
- return 94 * (wrap_ord(aStr[0]) - 0xA1) + wrap_ord(aStr[1]) - 0xa1
- else:
- return -1
+######################## BEGIN LICENSE BLOCK ########################\r
+# The Original Code is Mozilla Communicator client code.\r
+#\r
+# The Initial Developer of the Original Code is\r
+# Netscape Communications Corporation.\r
+# Portions created by the Initial Developer are Copyright (C) 1998\r
+# the Initial Developer. All Rights Reserved.\r
+#\r
+# Contributor(s):\r
+# Mark Pilgrim - port to Python\r
+#\r
+# This library is free software; you can redistribute it and/or\r
+# modify it under the terms of the GNU Lesser General Public\r
+# License as published by the Free Software Foundation; either\r
+# version 2.1 of the License, or (at your option) any later version.\r
+#\r
+# This library is distributed in the hope that it will be useful,\r
+# but WITHOUT ANY WARRANTY; without even the implied warranty of\r
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU\r
+# Lesser General Public License for more details.\r
+#\r
+# You should have received a copy of the GNU Lesser General Public\r
+# License along with this library; if not, write to the Free Software\r
+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA\r
+# 02110-1301 USA\r
+######################### END LICENSE BLOCK #########################\r
+\r
+from .euctwfreq import (EUCTWCharToFreqOrder, EUCTW_TABLE_SIZE,\r
+ EUCTW_TYPICAL_DISTRIBUTION_RATIO)\r
+from .euckrfreq import (EUCKRCharToFreqOrder, EUCKR_TABLE_SIZE,\r
+ EUCKR_TYPICAL_DISTRIBUTION_RATIO)\r
+from .gb2312freq import (GB2312CharToFreqOrder, GB2312_TABLE_SIZE,\r
+ GB2312_TYPICAL_DISTRIBUTION_RATIO)\r
+from .big5freq import (Big5CharToFreqOrder, BIG5_TABLE_SIZE,\r
+ BIG5_TYPICAL_DISTRIBUTION_RATIO)\r
+from .jisfreq import (JISCharToFreqOrder, JIS_TABLE_SIZE,\r
+ JIS_TYPICAL_DISTRIBUTION_RATIO)\r
+from .compat import wrap_ord\r
+\r
+ENOUGH_DATA_THRESHOLD = 1024\r
+SURE_YES = 0.99\r
+SURE_NO = 0.01\r
+\r
+\r
+class CharDistributionAnalysis:\r
+ def __init__(self):\r
+ # Mapping table to get frequency order from char order (get from\r
+ # GetOrder())\r
+ self._mCharToFreqOrder = None\r
+ self._mTableSize = None # Size of above table\r
+ # This is a constant value which varies from language to language,\r
+ # used in calculating confidence. See\r
+ # http://www.mozilla.org/projects/intl/UniversalCharsetDetection.html\r
+ # for further detail.\r
+ self._mTypicalDistributionRatio = None\r
+ self.reset()\r
+\r
+ def reset(self):\r
+ """reset analyser, clear any state"""\r
+ # If this flag is set to True, detection is done and conclusion has\r
+ # been made\r
+ self._mDone = False\r
+ self._mTotalChars = 0 # Total characters encountered\r
+ # The number of characters whose frequency order is less than 512\r
+ self._mFreqChars = 0\r
+\r
+ def feed(self, aBuf, aCharLen):\r
+ """feed a character with known length"""\r
+ if aCharLen == 2:\r
+ # we only care about 2-bytes character in our distribution analysis\r
+ order = self.get_order(aBuf)\r
+ else:\r
+ order = -1\r
+ if order >= 0:\r
+ self._mTotalChars += 1\r
+ # order is valid\r
+ if order < self._mTableSize:\r
+ if 512 > self._mCharToFreqOrder[order]:\r
+ self._mFreqChars += 1\r
+\r
+ def get_confidence(self):\r
+ """return confidence based on existing data"""\r
+ # if we didn't receive any character in our consideration range,\r
+ # return negative answer\r
+ if self._mTotalChars <= 0:\r
+ return SURE_NO\r
+\r
+ if self._mTotalChars != self._mFreqChars:\r
+ r = (self._mFreqChars / ((self._mTotalChars - self._mFreqChars)\r
+ * self._mTypicalDistributionRatio))\r
+ if r < SURE_YES:\r
+ return r\r
+\r
+ # normalize confidence (we don't want to be 100% sure)\r
+ return SURE_YES\r
+\r
+ def got_enough_data(self):\r
+ # It is not necessary to receive all data to draw conclusion.\r
+ # For charset detection, certain amount of data is enough\r
+ return self._mTotalChars > ENOUGH_DATA_THRESHOLD\r
+\r
+ def get_order(self, aBuf):\r
+ # We do not handle characters based on the original encoding string,\r
+ # but convert this encoding string to a number, here called order.\r
+ # This allows multiple encodings of a language to share one frequency\r
+ # table.\r
+ return -1\r
+\r
+\r
+class EUCTWDistributionAnalysis(CharDistributionAnalysis):\r
+ def __init__(self):\r
+ CharDistributionAnalysis.__init__(self)\r
+ self._mCharToFreqOrder = EUCTWCharToFreqOrder\r
+ self._mTableSize = EUCTW_TABLE_SIZE\r
+ self._mTypicalDistributionRatio = EUCTW_TYPICAL_DISTRIBUTION_RATIO\r
+\r
+ def get_order(self, aBuf):\r
+ # for euc-TW encoding, we are interested\r
+ # first byte range: 0xc4 -- 0xfe\r
+ # second byte range: 0xa1 -- 0xfe\r
+ # no validation needed here. State machine has done that\r
+ first_char = wrap_ord(aBuf[0])\r
+ if first_char >= 0xC4:\r
+ return 94 * (first_char - 0xC4) + wrap_ord(aBuf[1]) - 0xA1\r
+ else:\r
+ return -1\r
+\r
+\r
+class EUCKRDistributionAnalysis(CharDistributionAnalysis):\r
+ def __init__(self):\r
+ CharDistributionAnalysis.__init__(self)\r
+ self._mCharToFreqOrder = EUCKRCharToFreqOrder\r
+ self._mTableSize = EUCKR_TABLE_SIZE\r
+ self._mTypicalDistributionRatio = EUCKR_TYPICAL_DISTRIBUTION_RATIO\r
+\r
+ def get_order(self, aBuf):\r
+ # for euc-KR encoding, we are interested\r
+ # first byte range: 0xb0 -- 0xfe\r
+ # second byte range: 0xa1 -- 0xfe\r
+ # no validation needed here. State machine has done that\r
+ first_char = wrap_ord(aBuf[0])\r
+ if first_char >= 0xB0:\r
+ return 94 * (first_char - 0xB0) + wrap_ord(aBuf[1]) - 0xA1\r
+ else:\r
+ return -1\r
+\r
+\r
+class GB2312DistributionAnalysis(CharDistributionAnalysis):\r
+ def __init__(self):\r
+ CharDistributionAnalysis.__init__(self)\r
+ self._mCharToFreqOrder = GB2312CharToFreqOrder\r
+ self._mTableSize = GB2312_TABLE_SIZE\r
+ self._mTypicalDistributionRatio = GB2312_TYPICAL_DISTRIBUTION_RATIO\r
+\r
+ def get_order(self, aBuf):\r
+ # for GB2312 encoding, we are interested\r
+ # first byte range: 0xb0 -- 0xfe\r
+ # second byte range: 0xa1 -- 0xfe\r
+ # no validation needed here. State machine has done that\r
+ first_char, second_char = wrap_ord(aBuf[0]), wrap_ord(aBuf[1])\r
+ if (first_char >= 0xB0) and (second_char >= 0xA1):\r
+ return 94 * (first_char - 0xB0) + second_char - 0xA1\r
+ else:\r
+ return -1\r
+\r
+\r
+class Big5DistributionAnalysis(CharDistributionAnalysis):\r
+ def __init__(self):\r
+ CharDistributionAnalysis.__init__(self)\r
+ self._mCharToFreqOrder = Big5CharToFreqOrder\r
+ self._mTableSize = BIG5_TABLE_SIZE\r
+ self._mTypicalDistributionRatio = BIG5_TYPICAL_DISTRIBUTION_RATIO\r
+\r
+ def get_order(self, aBuf):\r
+ # for big5 encoding, we are interested\r
+ # first byte range: 0xa4 -- 0xfe\r
+ # second byte range: 0x40 -- 0x7e , 0xa1 -- 0xfe\r
+ # no validation needed here. State machine has done that\r
+ first_char, second_char = wrap_ord(aBuf[0]), wrap_ord(aBuf[1])\r
+ if first_char >= 0xA4:\r
+ if second_char >= 0xA1:\r
+ return 157 * (first_char - 0xA4) + second_char - 0xA1 + 63\r
+ else:\r
+ return 157 * (first_char - 0xA4) + second_char - 0x40\r
+ else:\r
+ return -1\r
+\r
+\r
+class SJISDistributionAnalysis(CharDistributionAnalysis):\r
+ def __init__(self):\r
+ CharDistributionAnalysis.__init__(self)\r
+ self._mCharToFreqOrder = JISCharToFreqOrder\r
+ self._mTableSize = JIS_TABLE_SIZE\r
+ self._mTypicalDistributionRatio = JIS_TYPICAL_DISTRIBUTION_RATIO\r
+\r
+ def get_order(self, aBuf):\r
+ # for sjis encoding, we are interested\r
+ # first byte range: 0x81 -- 0x9f , 0xe0 -- 0xfe\r
+ # second byte range: 0x40 -- 0x7e, 0x81 -- oxfe\r
+ # no validation needed here. State machine has done that\r
+ first_char, second_char = wrap_ord(aBuf[0]), wrap_ord(aBuf[1])\r
+ if (first_char >= 0x81) and (first_char <= 0x9F):\r
+ order = 188 * (first_char - 0x81)\r
+ elif (first_char >= 0xE0) and (first_char <= 0xEF):\r
+ order = 188 * (first_char - 0xE0 + 31)\r
+ else:\r
+ return -1\r
+ order = order + second_char - 0x40\r
+ if second_char > 0x7F:\r
+ order = -1\r
+ return order\r
+\r
+\r
+class EUCJPDistributionAnalysis(CharDistributionAnalysis):\r
+ def __init__(self):\r
+ CharDistributionAnalysis.__init__(self)\r
+ self._mCharToFreqOrder = JISCharToFreqOrder\r
+ self._mTableSize = JIS_TABLE_SIZE\r
+ self._mTypicalDistributionRatio = JIS_TYPICAL_DISTRIBUTION_RATIO\r
+\r
+ def get_order(self, aBuf):\r
+ # for euc-JP encoding, we are interested\r
+ # first byte range: 0xa0 -- 0xfe\r
+ # second byte range: 0xa1 -- 0xfe\r
+ # no validation needed here. State machine has done that\r
+ char = wrap_ord(aBuf[0])\r
+ if char >= 0xA0:\r
+ return 94 * (char - 0xA1) + wrap_ord(aBuf[1]) - 0xa1\r
+ else:\r
+ return -1\r
-######################## BEGIN LICENSE BLOCK ########################
-# The Original Code is Mozilla Communicator client code.
-#
-# The Initial Developer of the Original Code is
-# Netscape Communications Corporation.
-# Portions created by the Initial Developer are Copyright (C) 1998
-# the Initial Developer. All Rights Reserved.
-#
-# Contributor(s):
-# Mark Pilgrim - port to Python
-#
-# This library is free software; you can redistribute it and/or
-# modify it under the terms of the GNU Lesser General Public
-# License as published by the Free Software Foundation; either
-# version 2.1 of the License, or (at your option) any later version.
-#
-# This library is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-# Lesser General Public License for more details.
-#
-# You should have received a copy of the GNU Lesser General Public
-# License along with this library; if not, write to the Free Software
-# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
-# 02110-1301 USA
-######################### END LICENSE BLOCK #########################
-
-import sys
-from . import constants
-from .charsetprober import CharSetProber
-
-
-class CharSetGroupProber(CharSetProber):
- def __init__(self):
- CharSetProber.__init__(self)
- self._mActiveNum = 0
- self._mProbers = []
- self._mBestGuessProber = None
-
- def reset(self):
- CharSetProber.reset(self)
- self._mActiveNum = 0
- for prober in self._mProbers:
- if prober:
- prober.reset()
- prober.active = True
- self._mActiveNum += 1
- self._mBestGuessProber = None
-
- def get_charset_name(self):
- if not self._mBestGuessProber:
- self.get_confidence()
- if not self._mBestGuessProber:
- return None
- # self._mBestGuessProber = self._mProbers[0]
- return self._mBestGuessProber.get_charset_name()
-
- def feed(self, aBuf):
- for prober in self._mProbers:
- if not prober:
- continue
- if not prober.active:
- continue
- st = prober.feed(aBuf)
- if not st:
- continue
- if st == constants.eFoundIt:
- self._mBestGuessProber = prober
- return self.get_state()
- elif st == constants.eNotMe:
- prober.active = False
- self._mActiveNum -= 1
- if self._mActiveNum <= 0:
- self._mState = constants.eNotMe
- return self.get_state()
- return self.get_state()
-
- def get_confidence(self):
- st = self.get_state()
- if st == constants.eFoundIt:
- return 0.99
- elif st == constants.eNotMe:
- return 0.01
- bestConf = 0.0
- self._mBestGuessProber = None
- for prober in self._mProbers:
- if not prober:
- continue
- if not prober.active:
- if constants._debug:
- sys.stderr.write(prober.get_charset_name() + ' not '
- 'active\n')
- continue
- cf = prober.get_confidence()
- if constants._debug:
- sys.stderr.write('%s confidence = %s\n' %
- (prober.get_charset_name(), cf))
- if bestConf < cf:
- bestConf = cf
- self._mBestGuessProber = prober
-
- if not self._mBestGuessProber:
- return 0.0
- return bestConf
-# else:
-# self._mBestGuessProber = self._mProbers[0]
-# return self._mBestGuessProber.get_confidence()
+######################## BEGIN LICENSE BLOCK ########################\r
+# The Original Code is Mozilla Communicator client code.\r
+# \r
+# The Initial Developer of the Original Code is\r
+# Netscape Communications Corporation.\r
+# Portions created by the Initial Developer are Copyright (C) 1998\r
+# the Initial Developer. All Rights Reserved.\r
+# \r
+# Contributor(s):\r
+# Mark Pilgrim - port to Python\r
+#\r
+# This library is free software; you can redistribute it and/or\r
+# modify it under the terms of the GNU Lesser General Public\r
+# License as published by the Free Software Foundation; either\r
+# version 2.1 of the License, or (at your option) any later version.\r
+# \r
+# This library is distributed in the hope that it will be useful,\r
+# but WITHOUT ANY WARRANTY; without even the implied warranty of\r
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU\r
+# Lesser General Public License for more details.\r
+# \r
+# You should have received a copy of the GNU Lesser General Public\r
+# License along with this library; if not, write to the Free Software\r
+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA\r
+# 02110-1301 USA\r
+######################### END LICENSE BLOCK #########################\r
+\r
+from . import constants\r
+import sys\r
+from .charsetprober import CharSetProber\r
+\r
+\r
+class CharSetGroupProber(CharSetProber):\r
+ def __init__(self):\r
+ CharSetProber.__init__(self)\r
+ self._mActiveNum = 0\r
+ self._mProbers = []\r
+ self._mBestGuessProber = None\r
+\r
+ def reset(self):\r
+ CharSetProber.reset(self)\r
+ self._mActiveNum = 0\r
+ for prober in self._mProbers:\r
+ if prober:\r
+ prober.reset()\r
+ prober.active = True\r
+ self._mActiveNum += 1\r
+ self._mBestGuessProber = None\r
+\r
+ def get_charset_name(self):\r
+ if not self._mBestGuessProber:\r
+ self.get_confidence()\r
+ if not self._mBestGuessProber:\r
+ return None\r
+# self._mBestGuessProber = self._mProbers[0]\r
+ return self._mBestGuessProber.get_charset_name()\r
+\r
+ def feed(self, aBuf):\r
+ for prober in self._mProbers:\r
+ if not prober:\r
+ continue\r
+ if not prober.active:\r
+ continue\r
+ st = prober.feed(aBuf)\r
+ if not st:\r
+ continue\r
+ if st == constants.eFoundIt:\r
+ self._mBestGuessProber = prober\r
+ return self.get_state()\r
+ elif st == constants.eNotMe:\r
+ prober.active = False\r
+ self._mActiveNum -= 1\r
+ if self._mActiveNum <= 0:\r
+ self._mState = constants.eNotMe\r
+ return self.get_state()\r
+ return self.get_state()\r
+\r
+ def get_confidence(self):\r
+ st = self.get_state()\r
+ if st == constants.eFoundIt:\r
+ return 0.99\r
+ elif st == constants.eNotMe:\r
+ return 0.01\r
+ bestConf = 0.0\r
+ self._mBestGuessProber = None\r
+ for prober in self._mProbers:\r
+ if not prober:\r
+ continue\r
+ if not prober.active:\r
+ if constants._debug:\r
+ sys.stderr.write(prober.get_charset_name()\r
+ + ' not active\n')\r
+ continue\r
+ cf = prober.get_confidence()\r
+ if constants._debug:\r
+ sys.stderr.write('%s confidence = %s\n' %\r
+ (prober.get_charset_name(), cf))\r
+ if bestConf < cf:\r
+ bestConf = cf\r
+ self._mBestGuessProber = prober\r
+ if not self._mBestGuessProber:\r
+ return 0.0\r
+ return bestConf\r
+# else:\r
+# self._mBestGuessProber = self._mProbers[0]\r
+# return self._mBestGuessProber.get_confidence()\r
-######################## BEGIN LICENSE BLOCK ########################
-# The Original Code is mozilla.org code.
-#
-# The Initial Developer of the Original Code is
-# Netscape Communications Corporation.
-# Portions created by the Initial Developer are Copyright (C) 1998
-# the Initial Developer. All Rights Reserved.
-#
-# Contributor(s):
-# Mark Pilgrim - port to Python
-#
-# This library is free software; you can redistribute it and/or
-# modify it under the terms of the GNU Lesser General Public
-# License as published by the Free Software Foundation; either
-# version 2.1 of the License, or (at your option) any later version.
-#
-# This library is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-# Lesser General Public License for more details.
-#
-# You should have received a copy of the GNU Lesser General Public
-# License along with this library; if not, write to the Free Software
-# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
-# 02110-1301 USA
-######################### END LICENSE BLOCK #########################
-
-from .constants import eStart, eError
-from .compat import wrap_ord
-
-
-class CodingStateMachine:
- def __init__(self, sm):
- self._mModel = sm
- self._mCurrentBytePos = 0
- self._mCurrentCharLen = 0
- self.reset()
-
- def reset(self):
- self._mCurrentState = eStart
-
- def next_state(self, c):
- # for each byte we get its class
- # if it is first byte, we also get byte length
- try:
- byteCls = self._mModel['classTable'][wrap_ord(c)]
- except IndexError:
- return eError
- if self._mCurrentState == eStart:
- self._mCurrentBytePos = 0
- self._mCurrentCharLen = self._mModel['charLenTable'][byteCls]
-
- # from byte's class and stateTable, we get its next state
- next_state = (self._mCurrentState + self._mModel['classFactor'] +
- byteCls)
- self._mCurrentState = self._mModel['stateTable'][next_state]
- self._mCurrentBytePos += 1
- return self._mCurrentState
-
- def get_current_charlen(self):
- return self._mCurrentCharLen
-
- def get_coding_state_machine(self):
- return self._mModel['name']
+######################## BEGIN LICENSE BLOCK ########################\r
+# The Original Code is mozilla.org code.\r
+#\r
+# The Initial Developer of the Original Code is\r
+# Netscape Communications Corporation.\r
+# Portions created by the Initial Developer are Copyright (C) 1998\r
+# the Initial Developer. All Rights Reserved.\r
+#\r
+# Contributor(s):\r
+# Mark Pilgrim - port to Python\r
+#\r
+# This library is free software; you can redistribute it and/or\r
+# modify it under the terms of the GNU Lesser General Public\r
+# License as published by the Free Software Foundation; either\r
+# version 2.1 of the License, or (at your option) any later version.\r
+#\r
+# This library is distributed in the hope that it will be useful,\r
+# but WITHOUT ANY WARRANTY; without even the implied warranty of\r
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU\r
+# Lesser General Public License for more details.\r
+#\r
+# You should have received a copy of the GNU Lesser General Public\r
+# License along with this library; if not, write to the Free Software\r
+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA\r
+# 02110-1301 USA\r
+######################### END LICENSE BLOCK #########################\r
+\r
+from .constants import eStart\r
+from .compat import wrap_ord\r
+\r
+\r
+class CodingStateMachine:\r
+ def __init__(self, sm):\r
+ self._mModel = sm\r
+ self._mCurrentBytePos = 0\r
+ self._mCurrentCharLen = 0\r
+ self.reset()\r
+\r
+ def reset(self):\r
+ self._mCurrentState = eStart\r
+\r
+ def next_state(self, c):\r
+ # for each byte we get its class\r
+ # if it is first byte, we also get byte length\r
+ # PY3K: aBuf is a byte stream, so c is an int, not a byte\r
+ byteCls = self._mModel['classTable'][wrap_ord(c)]\r
+ if self._mCurrentState == eStart:\r
+ self._mCurrentBytePos = 0\r
+ self._mCurrentCharLen = self._mModel['charLenTable'][byteCls]\r
+ # from byte's class and stateTable, we get its next state\r
+ curr_state = (self._mCurrentState * self._mModel['classFactor']\r
+ + byteCls)\r
+ self._mCurrentState = self._mModel['stateTable'][curr_state]\r
+ self._mCurrentBytePos += 1\r
+ return self._mCurrentState\r
+\r
+ def get_current_charlen(self):\r
+ return self._mCurrentCharLen\r
+\r
+ def get_coding_state_machine(self):\r
+ return self._mModel['name']\r
######################### END LICENSE BLOCK #########################
-from sys import version_info
-
-
def wrap_ord(a):
- if isinstance(a, str) and version_info < (3, 0):
+ if isinstance(a, str):
return ord(a)
- elif isinstance(a, int) and version_info >= (3, 0):
+ elif isinstance(a, int):
return a
-######################## BEGIN LICENSE BLOCK ########################
-# The Original Code is Mozilla Universal charset detector code.
-#
-# The Initial Developer of the Original Code is
-# Netscape Communications Corporation.
-# Portions created by the Initial Developer are Copyright (C) 2001
-# the Initial Developer. All Rights Reserved.
-#
-# Contributor(s):
-# Mark Pilgrim - port to Python
-# Shy Shalom - original C code
-#
-# This library is free software; you can redistribute it and/or
-# modify it under the terms of the GNU Lesser General Public
-# License as published by the Free Software Foundation; either
-# version 2.1 of the License, or (at your option) any later version.
-#
-# This library is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-# Lesser General Public License for more details.
-#
-# You should have received a copy of the GNU Lesser General Public
-# License along with this library; if not, write to the Free Software
-# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
-# 02110-1301 USA
-######################### END LICENSE BLOCK #########################
-
-_debug = 0
-
-eDetecting = 0
-eFoundIt = 1
-eNotMe = 2
-
-eStart = 0
-eError = 1
-eItsMe = 2
-
-SHORTCUT_THRESHOLD = 0.95
+######################## BEGIN LICENSE BLOCK ########################\r
+# The Original Code is Mozilla Universal charset detector code.\r
+#\r
+# The Initial Developer of the Original Code is\r
+# Netscape Communications Corporation.\r
+# Portions created by the Initial Developer are Copyright (C) 2001\r
+# the Initial Developer. All Rights Reserved.\r
+#\r
+# Contributor(s):\r
+# Mark Pilgrim - port to Python\r
+# Shy Shalom - original C code\r
+#\r
+# This library is free software; you can redistribute it and/or\r
+# modify it under the terms of the GNU Lesser General Public\r
+# License as published by the Free Software Foundation; either\r
+# version 2.1 of the License, or (at your option) any later version.\r
+# \r
+# This library is distributed in the hope that it will be useful,\r
+# but WITHOUT ANY WARRANTY; without even the implied warranty of\r
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU\r
+# Lesser General Public License for more details.\r
+# \r
+# You should have received a copy of the GNU Lesser General Public\r
+# License along with this library; if not, write to the Free Software\r
+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA\r
+# 02110-1301 USA\r
+######################### END LICENSE BLOCK #########################\r
+\r
+_debug = 0\r
+\r
+eDetecting = 0\r
+eFoundIt = 1\r
+eNotMe = 2\r
+\r
+eStart = 0\r
+eError = 1\r
+eItsMe = 2\r
+\r
+SHORTCUT_THRESHOLD = 0.95\r
-######################## BEGIN LICENSE BLOCK ########################
-# The Original Code is mozilla.org code.
-#
-# The Initial Developer of the Original Code is
-# Netscape Communications Corporation.
-# Portions created by the Initial Developer are Copyright (C) 1998
-# the Initial Developer. All Rights Reserved.
-#
-# Contributor(s):
-# Mark Pilgrim - port to Python
-#
-# This library is free software; you can redistribute it and/or
-# modify it under the terms of the GNU Lesser General Public
-# License as published by the Free Software Foundation; either
-# version 2.1 of the License, or (at your option) any later version.
-#
-# This library is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-# Lesser General Public License for more details.
-#
-# You should have received a copy of the GNU Lesser General Public
-# License along with this library; if not, write to the Free Software
-# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
-# 02110-1301 USA
-######################### END LICENSE BLOCK #########################
-
-from . import constants
-from .escsm import (HZSMModel, ISO2022CNSMModel, ISO2022JPSMModel,
- ISO2022KRSMModel)
-from .charsetprober import CharSetProber
-from .codingstatemachine import CodingStateMachine
-
-
-class EscCharSetProber(CharSetProber):
- def __init__(self):
- CharSetProber.__init__(self)
- self._mCodingSM = [
- CodingStateMachine(HZSMModel),
- CodingStateMachine(ISO2022CNSMModel),
- CodingStateMachine(ISO2022JPSMModel),
- CodingStateMachine(ISO2022KRSMModel)
- ]
- self.reset()
-
- def reset(self):
- CharSetProber.reset(self)
- for codingSM in self._mCodingSM:
- if not codingSM:
- continue
- codingSM.active = True
- codingSM.reset()
- self._mActiveSM = len(self._mCodingSM)
- self._mDetectedCharset = None
-
- def get_charset_name(self):
- return self._mDetectedCharset
-
- def get_confidence(self):
- if self._mDetectedCharset:
- return 0.99
- else:
- return 0.00
-
- def feed(self, aBuf):
- for c in aBuf:
- for codingSM in self._mCodingSM:
- if not codingSM:
- continue
- if not codingSM.active:
- continue
- codingState = codingSM.next_state(c)
- if codingState == constants.eError:
- codingSM.active = False
- self._mActiveSM -= 1
- if self._mActiveSM <= 0:
- self._mState = constants.eNotMe
- return self.get_state()
- elif codingState == constants.eItsMe:
- self._mState = constants.eFoundIt
- self._mDetectedCharset = codingSM.get_coding_state_machine()
- return self.get_state()
-
- return self.get_state()
+######################## BEGIN LICENSE BLOCK ########################\r
+# The Original Code is mozilla.org code.\r
+#\r
+# The Initial Developer of the Original Code is\r
+# Netscape Communications Corporation.\r
+# Portions created by the Initial Developer are Copyright (C) 1998\r
+# the Initial Developer. All Rights Reserved.\r
+#\r
+# Contributor(s):\r
+# Mark Pilgrim - port to Python\r
+#\r
+# This library is free software; you can redistribute it and/or\r
+# modify it under the terms of the GNU Lesser General Public\r
+# License as published by the Free Software Foundation; either\r
+# version 2.1 of the License, or (at your option) any later version.\r
+#\r
+# This library is distributed in the hope that it will be useful,\r
+# but WITHOUT ANY WARRANTY; without even the implied warranty of\r
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU\r
+# Lesser General Public License for more details.\r
+#\r
+# You should have received a copy of the GNU Lesser General Public\r
+# License along with this library; if not, write to the Free Software\r
+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA\r
+# 02110-1301 USA\r
+######################### END LICENSE BLOCK #########################\r
+\r
+from . import constants\r
+from .escsm import (HZSMModel, ISO2022CNSMModel, ISO2022JPSMModel,\r
+ ISO2022KRSMModel)\r
+from .charsetprober import CharSetProber\r
+from .codingstatemachine import CodingStateMachine\r
+from .compat import wrap_ord\r
+\r
+\r
+class EscCharSetProber(CharSetProber):\r
+ def __init__(self):\r
+ CharSetProber.__init__(self)\r
+ self._mCodingSM = [\r
+ CodingStateMachine(HZSMModel),\r
+ CodingStateMachine(ISO2022CNSMModel),\r
+ CodingStateMachine(ISO2022JPSMModel),\r
+ CodingStateMachine(ISO2022KRSMModel)\r
+ ]\r
+ self.reset()\r
+\r
+ def reset(self):\r
+ CharSetProber.reset(self)\r
+ for codingSM in self._mCodingSM:\r
+ if not codingSM:\r
+ continue\r
+ codingSM.active = True\r
+ codingSM.reset()\r
+ self._mActiveSM = len(self._mCodingSM)\r
+ self._mDetectedCharset = None\r
+\r
+ def get_charset_name(self):\r
+ return self._mDetectedCharset\r
+\r
+ def get_confidence(self):\r
+ if self._mDetectedCharset:\r
+ return 0.99\r
+ else:\r
+ return 0.00\r
+\r
+ def feed(self, aBuf):\r
+ for c in aBuf:\r
+ # PY3K: aBuf is a byte array, so c is an int, not a byte\r
+ for codingSM in self._mCodingSM:\r
+ if not codingSM:\r
+ continue\r
+ if not codingSM.active:\r
+ continue\r
+ codingState = codingSM.next_state(wrap_ord(c))\r
+ if codingState == constants.eError:\r
+ codingSM.active = False\r
+ self._mActiveSM -= 1\r
+ if self._mActiveSM <= 0:\r
+ self._mState = constants.eNotMe\r
+ return self.get_state()\r
+ elif codingState == constants.eItsMe:\r
+ self._mState = constants.eFoundIt\r
+ self._mDetectedCharset = codingSM.get_coding_state_machine() # nopep8\r
+ return self.get_state()\r
+\r
+ return self.get_state()\r
-######################## BEGIN LICENSE BLOCK ########################
-# The Original Code is mozilla.org code.
-#
-# The Initial Developer of the Original Code is
-# Netscape Communications Corporation.
-# Portions created by the Initial Developer are Copyright (C) 1998
-# the Initial Developer. All Rights Reserved.
-#
-# Contributor(s):
-# Mark Pilgrim - port to Python
-#
-# This library is free software; you can redistribute it and/or
-# modify it under the terms of the GNU Lesser General Public
-# License as published by the Free Software Foundation; either
-# version 2.1 of the License, or (at your option) any later version.
-#
-# This library is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-# Lesser General Public License for more details.
-#
-# You should have received a copy of the GNU Lesser General Public
-# License along with this library; if not, write to the Free Software
-# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
-# 02110-1301 USA
-######################### END LICENSE BLOCK #########################
-
-from .constants import eStart, eError, eItsMe
-
-HZ_cls = (
- 1, 0, 0, 0, 0, 0, 0, 0, # 00 - 07
- 0, 0, 0, 0, 0, 0, 0, 0, # 08 - 0f
- 0, 0, 0, 0, 0, 0, 0, 0, # 10 - 17
- 0, 0, 0, 1, 0, 0, 0, 0, # 18 - 1f
- 0, 0, 0, 0, 0, 0, 0, 0, # 20 - 27
- 0, 0, 0, 0, 0, 0, 0, 0, # 28 - 2f
- 0, 0, 0, 0, 0, 0, 0, 0, # 30 - 37
- 0, 0, 0, 0, 0, 0, 0, 0, # 38 - 3f
- 0, 0, 0, 0, 0, 0, 0, 0, # 40 - 47
- 0, 0, 0, 0, 0, 0, 0, 0, # 48 - 4f
- 0, 0, 0, 0, 0, 0, 0, 0, # 50 - 57
- 0, 0, 0, 0, 0, 0, 0, 0, # 58 - 5f
- 0, 0, 0, 0, 0, 0, 0, 0, # 60 - 67
- 0, 0, 0, 0, 0, 0, 0, 0, # 68 - 6f
- 0, 0, 0, 0, 0, 0, 0, 0, # 70 - 77
- 0, 0, 0, 4, 0, 5, 2, 0, # 78 - 7f
- 1, 1, 1, 1, 1, 1, 1, 1, # 80 - 87
- 1, 1, 1, 1, 1, 1, 1, 1, # 88 - 8f
- 1, 1, 1, 1, 1, 1, 1, 1, # 90 - 97
- 1, 1, 1, 1, 1, 1, 1, 1, # 98 - 9f
- 1, 1, 1, 1, 1, 1, 1, 1, # a0 - a7
- 1, 1, 1, 1, 1, 1, 1, 1, # a8 - af
- 1, 1, 1, 1, 1, 1, 1, 1, # b0 - b7
- 1, 1, 1, 1, 1, 1, 1, 1, # b8 - bf
- 1, 1, 1, 1, 1, 1, 1, 1, # c0 - c7
- 1, 1, 1, 1, 1, 1, 1, 1, # c8 - cf
- 1, 1, 1, 1, 1, 1, 1, 1, # d0 - d7
- 1, 1, 1, 1, 1, 1, 1, 1, # d8 - df
- 1, 1, 1, 1, 1, 1, 1, 1, # e0 - e7
- 1, 1, 1, 1, 1, 1, 1, 1, # e8 - ef
- 1, 1, 1, 1, 1, 1, 1, 1, # f0 - f7
- 1, 1, 1, 1, 1, 1, 1, 1, # f8 - ff
-)
-
-HZ_st = (
- eStart, eError, 3, eStart, eStart, eStart, eError, eError, # 00-07
- eError, eError, eError, eError, eItsMe, eItsMe, eItsMe, eItsMe, # 08-0f
- eItsMe, eItsMe, eError, eError, eStart, eStart, 4, eError, # 10-17
- 5, eError, 6, eError, 5, 5, 4, eError, # 18-1f
- 4, eError, 4, 4, 4, eError, 4, eError, # 20-27
- 4, eItsMe, eStart, eStart, eStart, eStart, eStart, eStart, # 28-2f
-)
-
-HZCharLenTable = (0, 0, 0, 0, 0, 0)
-
-HZSMModel = {'classTable': HZ_cls,
- 'classFactor': 6,
- 'stateTable': HZ_st,
- 'charLenTable': HZCharLenTable,
- 'name': "HZ-GB-2312"}
-
-ISO2022CN_cls = (
- 2, 0, 0, 0, 0, 0, 0, 0, # 00 - 07
- 0, 0, 0, 0, 0, 0, 0, 0, # 08 - 0f
- 0, 0, 0, 0, 0, 0, 0, 0, # 10 - 17
- 0, 0, 0, 1, 0, 0, 0, 0, # 18 - 1f
- 0, 0, 0, 0, 0, 0, 0, 0, # 20 - 27
- 0, 3, 0, 0, 0, 0, 0, 0, # 28 - 2f
- 0, 0, 0, 0, 0, 0, 0, 0, # 30 - 37
- 0, 0, 0, 0, 0, 0, 0, 0, # 38 - 3f
- 0, 0, 0, 4, 0, 0, 0, 0, # 40 - 47
- 0, 0, 0, 0, 0, 0, 0, 0, # 48 - 4f
- 0, 0, 0, 0, 0, 0, 0, 0, # 50 - 57
- 0, 0, 0, 0, 0, 0, 0, 0, # 58 - 5f
- 0, 0, 0, 0, 0, 0, 0, 0, # 60 - 67
- 0, 0, 0, 0, 0, 0, 0, 0, # 68 - 6f
- 0, 0, 0, 0, 0, 0, 0, 0, # 70 - 77
- 0, 0, 0, 0, 0, 0, 0, 0, # 78 - 7f
- 2, 2, 2, 2, 2, 2, 2, 2, # 80 - 87
- 2, 2, 2, 2, 2, 2, 2, 2, # 88 - 8f
- 2, 2, 2, 2, 2, 2, 2, 2, # 90 - 97
- 2, 2, 2, 2, 2, 2, 2, 2, # 98 - 9f
- 2, 2, 2, 2, 2, 2, 2, 2, # a0 - a7
- 2, 2, 2, 2, 2, 2, 2, 2, # a8 - af
- 2, 2, 2, 2, 2, 2, 2, 2, # b0 - b7
- 2, 2, 2, 2, 2, 2, 2, 2, # b8 - bf
- 2, 2, 2, 2, 2, 2, 2, 2, # c0 - c7
- 2, 2, 2, 2, 2, 2, 2, 2, # c8 - cf
- 2, 2, 2, 2, 2, 2, 2, 2, # d0 - d7
- 2, 2, 2, 2, 2, 2, 2, 2, # d8 - df
- 2, 2, 2, 2, 2, 2, 2, 2, # e0 - e7
- 2, 2, 2, 2, 2, 2, 2, 2, # e8 - ef
- 2, 2, 2, 2, 2, 2, 2, 2, # f0 - f7
- 2, 2, 2, 2, 2, 2, 2, 2, # f8 - ff
-)
-
-ISO2022CN_st = (
- eStart, 3, eError, eStart, eStart, eStart, eStart, eStart, # 00-07
- eStart, eError, eError, eError, eError, eError, eError, eError, # 08-0f
- eError, eError, eItsMe, eItsMe, eItsMe, eItsMe, eItsMe, eItsMe, # 10-17
- eItsMe, eItsMe, eItsMe, eError, eError, eError, 4, eError, # 18-1f
- eError, eError, eError, eItsMe, eError, eError, eError, eError, # 20-27
- 5, 6, eError, eError, eError, eError, eError, eError, # 28-2f
- eError, eError, eError, eItsMe, eError, eError, eError, eError, # 30-37
- eError, eError, eError, eError, eError, eItsMe, eError, eStart, # 38-3f
-)
-
-ISO2022CNCharLenTable = (0, 0, 0, 0, 0, 0, 0, 0, 0)
-
-ISO2022CNSMModel = {'classTable': ISO2022CN_cls,
- 'classFactor': 9,
- 'stateTable': ISO2022CN_st,
- 'charLenTable': ISO2022CNCharLenTable,
- 'name': "ISO-2022-CN"}
-
-ISO2022JP_cls = (
- 2, 0, 0, 0, 0, 0, 0, 0, # 00 - 07
- 0, 0, 0, 0, 0, 0, 2, 2, # 08 - 0f
- 0, 0, 0, 0, 0, 0, 0, 0, # 10 - 17
- 0, 0, 0, 1, 0, 0, 0, 0, # 18 - 1f
- 0, 0, 0, 0, 7, 0, 0, 0, # 20 - 27
- 3, 0, 0, 0, 0, 0, 0, 0, # 28 - 2f
- 0, 0, 0, 0, 0, 0, 0, 0, # 30 - 37
- 0, 0, 0, 0, 0, 0, 0, 0, # 38 - 3f
- 6, 0, 4, 0, 8, 0, 0, 0, # 40 - 47
- 0, 9, 5, 0, 0, 0, 0, 0, # 48 - 4f
- 0, 0, 0, 0, 0, 0, 0, 0, # 50 - 57
- 0, 0, 0, 0, 0, 0, 0, 0, # 58 - 5f
- 0, 0, 0, 0, 0, 0, 0, 0, # 60 - 67
- 0, 0, 0, 0, 0, 0, 0, 0, # 68 - 6f
- 0, 0, 0, 0, 0, 0, 0, 0, # 70 - 77
- 0, 0, 0, 0, 0, 0, 0, 0, # 78 - 7f
- 2, 2, 2, 2, 2, 2, 2, 2, # 80 - 87
- 2, 2, 2, 2, 2, 2, 2, 2, # 88 - 8f
- 2, 2, 2, 2, 2, 2, 2, 2, # 90 - 97
- 2, 2, 2, 2, 2, 2, 2, 2, # 98 - 9f
- 2, 2, 2, 2, 2, 2, 2, 2, # a0 - a7
- 2, 2, 2, 2, 2, 2, 2, 2, # a8 - af
- 2, 2, 2, 2, 2, 2, 2, 2, # b0 - b7
- 2, 2, 2, 2, 2, 2, 2, 2, # b8 - bf
- 2, 2, 2, 2, 2, 2, 2, 2, # c0 - c7
- 2, 2, 2, 2, 2, 2, 2, 2, # c8 - cf
- 2, 2, 2, 2, 2, 2, 2, 2, # d0 - d7
- 2, 2, 2, 2, 2, 2, 2, 2, # d8 - df
- 2, 2, 2, 2, 2, 2, 2, 2, # e0 - e7
- 2, 2, 2, 2, 2, 2, 2, 2, # e8 - ef
- 2, 2, 2, 2, 2, 2, 2, 2, # f0 - f7
- 2, 2, 2, 2, 2, 2, 2, 2, # f8 - ff
-)
-
-ISO2022JP_st = (
- eStart, 3, eError, eStart, eStart, eStart, eStart, eStart, # 00-07
- eStart, eStart, eError, eError, eError, eError, eError, eError, # 08-0f
- eError, eError, eError, eError, eItsMe, eItsMe, eItsMe, eItsMe, # 10-17
- eItsMe, eItsMe, eItsMe, eItsMe, eItsMe, eItsMe, eError, eError, # 18-1f
- eError, 5, eError, eError, eError, 4, eError, eError, # 20-27
- eError, eError, eError, 6, eItsMe, eError, eItsMe, eError, # 28-2f
- eError, eError, eError, eError, eError, eError, eItsMe, eItsMe, # 30-37
- eError, eError, eError, eItsMe, eError, eError, eError, eError, # 38-3f
- eError, eError, eError, eError, eItsMe, eError, eStart, eStart, # 40-47
-)
-
-ISO2022JPCharLenTable = (0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
-
-ISO2022JPSMModel = {'classTable': ISO2022JP_cls,
- 'classFactor': 10,
- 'stateTable': ISO2022JP_st,
- 'charLenTable': ISO2022JPCharLenTable,
- 'name': "ISO-2022-JP"}
-
-ISO2022KR_cls = (
- 2, 0, 0, 0, 0, 0, 0, 0, # 00 - 07
- 0, 0, 0, 0, 0, 0, 0, 0, # 08 - 0f
- 0, 0, 0, 0, 0, 0, 0, 0, # 10 - 17
- 0, 0, 0, 1, 0, 0, 0, 0, # 18 - 1f
- 0, 0, 0, 0, 3, 0, 0, 0, # 20 - 27
- 0, 4, 0, 0, 0, 0, 0, 0, # 28 - 2f
- 0, 0, 0, 0, 0, 0, 0, 0, # 30 - 37
- 0, 0, 0, 0, 0, 0, 0, 0, # 38 - 3f
- 0, 0, 0, 5, 0, 0, 0, 0, # 40 - 47
- 0, 0, 0, 0, 0, 0, 0, 0, # 48 - 4f
- 0, 0, 0, 0, 0, 0, 0, 0, # 50 - 57
- 0, 0, 0, 0, 0, 0, 0, 0, # 58 - 5f
- 0, 0, 0, 0, 0, 0, 0, 0, # 60 - 67
- 0, 0, 0, 0, 0, 0, 0, 0, # 68 - 6f
- 0, 0, 0, 0, 0, 0, 0, 0, # 70 - 77
- 0, 0, 0, 0, 0, 0, 0, 0, # 78 - 7f
- 2, 2, 2, 2, 2, 2, 2, 2, # 80 - 87
- 2, 2, 2, 2, 2, 2, 2, 2, # 88 - 8f
- 2, 2, 2, 2, 2, 2, 2, 2, # 90 - 97
- 2, 2, 2, 2, 2, 2, 2, 2, # 98 - 9f
- 2, 2, 2, 2, 2, 2, 2, 2, # a0 - a7
- 2, 2, 2, 2, 2, 2, 2, 2, # a8 - af
- 2, 2, 2, 2, 2, 2, 2, 2, # b0 - b7
- 2, 2, 2, 2, 2, 2, 2, 2, # b8 - bf
- 2, 2, 2, 2, 2, 2, 2, 2, # c0 - c7
- 2, 2, 2, 2, 2, 2, 2, 2, # c8 - cf
- 2, 2, 2, 2, 2, 2, 2, 2, # d0 - d7
- 2, 2, 2, 2, 2, 2, 2, 2, # d8 - df
- 2, 2, 2, 2, 2, 2, 2, 2, # e0 - e7
- 2, 2, 2, 2, 2, 2, 2, 2, # e8 - ef
- 2, 2, 2, 2, 2, 2, 2, 2, # f0 - f7
- 2, 2, 2, 2, 2, 2, 2, 2, # f8 - ff
-)
-
-ISO2022KR_st = (
- eStart, 3, eError, eStart, eStart, eStart, eError, eError, # 00-07
- eError, eError, eError, eError, eItsMe, eItsMe, eItsMe, eItsMe, # 08-0f
- eItsMe, eItsMe, eError, eError, eError, 4, eError, eError, # 10-17
- eError, eError, eError, eError, 5, eError, eError, eError, # 18-1f
- eError, eError, eError, eItsMe, eStart, eStart, eStart, eStart, # 20-27
-)
-
-ISO2022KRCharLenTable = (0, 0, 0, 0, 0, 0)
-
-ISO2022KRSMModel = {'classTable': ISO2022KR_cls,
- 'classFactor': 6,
- 'stateTable': ISO2022KR_st,
- 'charLenTable': ISO2022KRCharLenTable,
- 'name': "ISO-2022-KR"}
+######################## BEGIN LICENSE BLOCK ########################\r
+# The Original Code is mozilla.org code.\r
+#\r
+# The Initial Developer of the Original Code is\r
+# Netscape Communications Corporation.\r
+# Portions created by the Initial Developer are Copyright (C) 1998\r
+# the Initial Developer. All Rights Reserved.\r
+#\r
+# Contributor(s):\r
+# Mark Pilgrim - port to Python\r
+#\r
+# This library is free software; you can redistribute it and/or\r
+# modify it under the terms of the GNU Lesser General Public\r
+# License as published by the Free Software Foundation; either\r
+# version 2.1 of the License, or (at your option) any later version.\r
+#\r
+# This library is distributed in the hope that it will be useful,\r
+# but WITHOUT ANY WARRANTY; without even the implied warranty of\r
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU\r
+# Lesser General Public License for more details.\r
+#\r
+# You should have received a copy of the GNU Lesser General Public\r
+# License along with this library; if not, write to the Free Software\r
+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA\r
+# 02110-1301 USA\r
+######################### END LICENSE BLOCK #########################\r
+\r
+from .constants import eStart, eError, eItsMe\r
+\r
+HZ_cls = (\r
+1,0,0,0,0,0,0,0, # 00 - 07\r
+0,0,0,0,0,0,0,0, # 08 - 0f\r
+0,0,0,0,0,0,0,0, # 10 - 17\r
+0,0,0,1,0,0,0,0, # 18 - 1f\r
+0,0,0,0,0,0,0,0, # 20 - 27\r
+0,0,0,0,0,0,0,0, # 28 - 2f\r
+0,0,0,0,0,0,0,0, # 30 - 37\r
+0,0,0,0,0,0,0,0, # 38 - 3f\r
+0,0,0,0,0,0,0,0, # 40 - 47\r
+0,0,0,0,0,0,0,0, # 48 - 4f\r
+0,0,0,0,0,0,0,0, # 50 - 57\r
+0,0,0,0,0,0,0,0, # 58 - 5f\r
+0,0,0,0,0,0,0,0, # 60 - 67\r
+0,0,0,0,0,0,0,0, # 68 - 6f\r
+0,0,0,0,0,0,0,0, # 70 - 77\r
+0,0,0,4,0,5,2,0, # 78 - 7f\r
+1,1,1,1,1,1,1,1, # 80 - 87\r
+1,1,1,1,1,1,1,1, # 88 - 8f\r
+1,1,1,1,1,1,1,1, # 90 - 97\r
+1,1,1,1,1,1,1,1, # 98 - 9f\r
+1,1,1,1,1,1,1,1, # a0 - a7\r
+1,1,1,1,1,1,1,1, # a8 - af\r
+1,1,1,1,1,1,1,1, # b0 - b7\r
+1,1,1,1,1,1,1,1, # b8 - bf\r
+1,1,1,1,1,1,1,1, # c0 - c7\r
+1,1,1,1,1,1,1,1, # c8 - cf\r
+1,1,1,1,1,1,1,1, # d0 - d7\r
+1,1,1,1,1,1,1,1, # d8 - df\r
+1,1,1,1,1,1,1,1, # e0 - e7\r
+1,1,1,1,1,1,1,1, # e8 - ef\r
+1,1,1,1,1,1,1,1, # f0 - f7\r
+1,1,1,1,1,1,1,1, # f8 - ff\r
+)\r
+\r
+HZ_st = (\r
+eStart,eError, 3,eStart,eStart,eStart,eError,eError,# 00-07\r
+eError,eError,eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,# 08-0f\r
+eItsMe,eItsMe,eError,eError,eStart,eStart, 4,eError,# 10-17\r
+ 5,eError, 6,eError, 5, 5, 4,eError,# 18-1f\r
+ 4,eError, 4, 4, 4,eError, 4,eError,# 20-27\r
+ 4,eItsMe,eStart,eStart,eStart,eStart,eStart,eStart,# 28-2f\r
+)\r
+\r
+HZCharLenTable = (0, 0, 0, 0, 0, 0)\r
+\r
+HZSMModel = {'classTable': HZ_cls,\r
+ 'classFactor': 6,\r
+ 'stateTable': HZ_st,\r
+ 'charLenTable': HZCharLenTable,\r
+ 'name': "HZ-GB-2312"}\r
+\r
+ISO2022CN_cls = (\r
+2,0,0,0,0,0,0,0, # 00 - 07\r
+0,0,0,0,0,0,0,0, # 08 - 0f\r
+0,0,0,0,0,0,0,0, # 10 - 17\r
+0,0,0,1,0,0,0,0, # 18 - 1f\r
+0,0,0,0,0,0,0,0, # 20 - 27\r
+0,3,0,0,0,0,0,0, # 28 - 2f\r
+0,0,0,0,0,0,0,0, # 30 - 37\r
+0,0,0,0,0,0,0,0, # 38 - 3f\r
+0,0,0,4,0,0,0,0, # 40 - 47\r
+0,0,0,0,0,0,0,0, # 48 - 4f\r
+0,0,0,0,0,0,0,0, # 50 - 57\r
+0,0,0,0,0,0,0,0, # 58 - 5f\r
+0,0,0,0,0,0,0,0, # 60 - 67\r
+0,0,0,0,0,0,0,0, # 68 - 6f\r
+0,0,0,0,0,0,0,0, # 70 - 77\r
+0,0,0,0,0,0,0,0, # 78 - 7f\r
+2,2,2,2,2,2,2,2, # 80 - 87\r
+2,2,2,2,2,2,2,2, # 88 - 8f\r
+2,2,2,2,2,2,2,2, # 90 - 97\r
+2,2,2,2,2,2,2,2, # 98 - 9f\r
+2,2,2,2,2,2,2,2, # a0 - a7\r
+2,2,2,2,2,2,2,2, # a8 - af\r
+2,2,2,2,2,2,2,2, # b0 - b7\r
+2,2,2,2,2,2,2,2, # b8 - bf\r
+2,2,2,2,2,2,2,2, # c0 - c7\r
+2,2,2,2,2,2,2,2, # c8 - cf\r
+2,2,2,2,2,2,2,2, # d0 - d7\r
+2,2,2,2,2,2,2,2, # d8 - df\r
+2,2,2,2,2,2,2,2, # e0 - e7\r
+2,2,2,2,2,2,2,2, # e8 - ef\r
+2,2,2,2,2,2,2,2, # f0 - f7\r
+2,2,2,2,2,2,2,2, # f8 - ff\r
+)\r
+\r
+ISO2022CN_st = (\r
+eStart, 3,eError,eStart,eStart,eStart,eStart,eStart,# 00-07\r
+eStart,eError,eError,eError,eError,eError,eError,eError,# 08-0f\r
+eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,# 10-17\r
+eItsMe,eItsMe,eItsMe,eError,eError,eError, 4,eError,# 18-1f\r
+eError,eError,eError,eItsMe,eError,eError,eError,eError,# 20-27\r
+ 5, 6,eError,eError,eError,eError,eError,eError,# 28-2f\r
+eError,eError,eError,eItsMe,eError,eError,eError,eError,# 30-37\r
+eError,eError,eError,eError,eError,eItsMe,eError,eStart,# 38-3f\r
+)\r
+\r
+ISO2022CNCharLenTable = (0, 0, 0, 0, 0, 0, 0, 0, 0)\r
+\r
+ISO2022CNSMModel = {'classTable': ISO2022CN_cls,\r
+ 'classFactor': 9,\r
+ 'stateTable': ISO2022CN_st,\r
+ 'charLenTable': ISO2022CNCharLenTable,\r
+ 'name': "ISO-2022-CN"}\r
+\r
+ISO2022JP_cls = (\r
+2,0,0,0,0,0,0,0, # 00 - 07\r
+0,0,0,0,0,0,2,2, # 08 - 0f\r
+0,0,0,0,0,0,0,0, # 10 - 17\r
+0,0,0,1,0,0,0,0, # 18 - 1f\r
+0,0,0,0,7,0,0,0, # 20 - 27\r
+3,0,0,0,0,0,0,0, # 28 - 2f\r
+0,0,0,0,0,0,0,0, # 30 - 37\r
+0,0,0,0,0,0,0,0, # 38 - 3f\r
+6,0,4,0,8,0,0,0, # 40 - 47\r
+0,9,5,0,0,0,0,0, # 48 - 4f\r
+0,0,0,0,0,0,0,0, # 50 - 57\r
+0,0,0,0,0,0,0,0, # 58 - 5f\r
+0,0,0,0,0,0,0,0, # 60 - 67\r
+0,0,0,0,0,0,0,0, # 68 - 6f\r
+0,0,0,0,0,0,0,0, # 70 - 77\r
+0,0,0,0,0,0,0,0, # 78 - 7f\r
+2,2,2,2,2,2,2,2, # 80 - 87\r
+2,2,2,2,2,2,2,2, # 88 - 8f\r
+2,2,2,2,2,2,2,2, # 90 - 97\r
+2,2,2,2,2,2,2,2, # 98 - 9f\r
+2,2,2,2,2,2,2,2, # a0 - a7\r
+2,2,2,2,2,2,2,2, # a8 - af\r
+2,2,2,2,2,2,2,2, # b0 - b7\r
+2,2,2,2,2,2,2,2, # b8 - bf\r
+2,2,2,2,2,2,2,2, # c0 - c7\r
+2,2,2,2,2,2,2,2, # c8 - cf\r
+2,2,2,2,2,2,2,2, # d0 - d7\r
+2,2,2,2,2,2,2,2, # d8 - df\r
+2,2,2,2,2,2,2,2, # e0 - e7\r
+2,2,2,2,2,2,2,2, # e8 - ef\r
+2,2,2,2,2,2,2,2, # f0 - f7\r
+2,2,2,2,2,2,2,2, # f8 - ff\r
+)\r
+\r
+ISO2022JP_st = (\r
+eStart, 3,eError,eStart,eStart,eStart,eStart,eStart,# 00-07\r
+eStart,eStart,eError,eError,eError,eError,eError,eError,# 08-0f\r
+eError,eError,eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,# 10-17\r
+eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eError,eError,# 18-1f\r
+eError, 5,eError,eError,eError, 4,eError,eError,# 20-27\r
+eError,eError,eError, 6,eItsMe,eError,eItsMe,eError,# 28-2f\r
+eError,eError,eError,eError,eError,eError,eItsMe,eItsMe,# 30-37\r
+eError,eError,eError,eItsMe,eError,eError,eError,eError,# 38-3f\r
+eError,eError,eError,eError,eItsMe,eError,eStart,eStart,# 40-47\r
+)\r
+\r
+ISO2022JPCharLenTable = (0, 0, 0, 0, 0, 0, 0, 0, 0, 0)\r
+\r
+ISO2022JPSMModel = {'classTable': ISO2022JP_cls,\r
+ 'classFactor': 10,\r
+ 'stateTable': ISO2022JP_st,\r
+ 'charLenTable': ISO2022JPCharLenTable,\r
+ 'name': "ISO-2022-JP"}\r
+\r
+ISO2022KR_cls = (\r
+2,0,0,0,0,0,0,0, # 00 - 07\r
+0,0,0,0,0,0,0,0, # 08 - 0f\r
+0,0,0,0,0,0,0,0, # 10 - 17\r
+0,0,0,1,0,0,0,0, # 18 - 1f\r
+0,0,0,0,3,0,0,0, # 20 - 27\r
+0,4,0,0,0,0,0,0, # 28 - 2f\r
+0,0,0,0,0,0,0,0, # 30 - 37\r
+0,0,0,0,0,0,0,0, # 38 - 3f\r
+0,0,0,5,0,0,0,0, # 40 - 47\r
+0,0,0,0,0,0,0,0, # 48 - 4f\r
+0,0,0,0,0,0,0,0, # 50 - 57\r
+0,0,0,0,0,0,0,0, # 58 - 5f\r
+0,0,0,0,0,0,0,0, # 60 - 67\r
+0,0,0,0,0,0,0,0, # 68 - 6f\r
+0,0,0,0,0,0,0,0, # 70 - 77\r
+0,0,0,0,0,0,0,0, # 78 - 7f\r
+2,2,2,2,2,2,2,2, # 80 - 87\r
+2,2,2,2,2,2,2,2, # 88 - 8f\r
+2,2,2,2,2,2,2,2, # 90 - 97\r
+2,2,2,2,2,2,2,2, # 98 - 9f\r
+2,2,2,2,2,2,2,2, # a0 - a7\r
+2,2,2,2,2,2,2,2, # a8 - af\r
+2,2,2,2,2,2,2,2, # b0 - b7\r
+2,2,2,2,2,2,2,2, # b8 - bf\r
+2,2,2,2,2,2,2,2, # c0 - c7\r
+2,2,2,2,2,2,2,2, # c8 - cf\r
+2,2,2,2,2,2,2,2, # d0 - d7\r
+2,2,2,2,2,2,2,2, # d8 - df\r
+2,2,2,2,2,2,2,2, # e0 - e7\r
+2,2,2,2,2,2,2,2, # e8 - ef\r
+2,2,2,2,2,2,2,2, # f0 - f7\r
+2,2,2,2,2,2,2,2, # f8 - ff\r
+)\r
+\r
+ISO2022KR_st = (\r
+eStart, 3,eError,eStart,eStart,eStart,eError,eError,# 00-07\r
+eError,eError,eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,# 08-0f\r
+eItsMe,eItsMe,eError,eError,eError, 4,eError,eError,# 10-17\r
+eError,eError,eError,eError, 5,eError,eError,eError,# 18-1f\r
+eError,eError,eError,eItsMe,eStart,eStart,eStart,eStart,# 20-27\r
+)\r
+\r
+ISO2022KRCharLenTable = (0, 0, 0, 0, 0, 0)\r
+\r
+ISO2022KRSMModel = {'classTable': ISO2022KR_cls,\r
+ 'classFactor': 6,\r
+ 'stateTable': ISO2022KR_st,\r
+ 'charLenTable': ISO2022KRCharLenTable,\r
+ 'name': "ISO-2022-KR"}\r
+\r
+# flake8: noqa\r
-######################## BEGIN LICENSE BLOCK ########################
-# The Original Code is mozilla.org code.
-#
-# The Initial Developer of the Original Code is
-# Netscape Communications Corporation.
-# Portions created by the Initial Developer are Copyright (C) 1998
-# the Initial Developer. All Rights Reserved.
-#
-# Contributor(s):
-# Mark Pilgrim - port to Python
-#
-# This library is free software; you can redistribute it and/or
-# modify it under the terms of the GNU Lesser General Public
-# License as published by the Free Software Foundation; either
-# version 2.1 of the License, or (at your option) any later version.
-#
-# This library is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-# Lesser General Public License for more details.
-#
-# You should have received a copy of the GNU Lesser General Public
-# License along with this library; if not, write to the Free Software
-# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
-# 02110-1301 USA
-######################### END LICENSE BLOCK #########################
-
-import sys
-from . import constants
-from .mbcharsetprober import MultiByteCharSetProber
-from .codingstatemachine import CodingStateMachine
-from .chardistribution import EUCJPDistributionAnalysis
-from .jpcntx import EUCJPContextAnalysis
-from .mbcssm import EUCJPSMModel
-
-
-class EUCJPProber(MultiByteCharSetProber):
- def __init__(self):
- MultiByteCharSetProber.__init__(self)
- self._mCodingSM = CodingStateMachine(EUCJPSMModel)
- self._mDistributionAnalyzer = EUCJPDistributionAnalysis()
- self._mContextAnalyzer = EUCJPContextAnalysis()
- self.reset()
-
- def reset(self):
- MultiByteCharSetProber.reset(self)
- self._mContextAnalyzer.reset()
-
- def get_charset_name(self):
- return "EUC-JP"
-
- def feed(self, aBuf):
- aLen = len(aBuf)
- for i in range(0, aLen):
- codingState = self._mCodingSM.next_state(aBuf[i])
- if codingState == constants.eError:
- if constants._debug:
- sys.stderr.write(self.get_charset_name() +
- ' prober hit error at byte ' + str(i) +
- '\n')
- self._mState = constants.eNotMe
- break
- elif codingState == constants.eItsMe:
- self._mState = constants.eFoundIt
- break
- elif codingState == constants.eStart:
- charLen = self._mCodingSM.get_current_charlen()
- if i == 0:
- self._mLastChar[1] = aBuf[0]
- self._mContextAnalyzer.feed(self._mLastChar, charLen)
- self._mDistributionAnalyzer.feed(self._mLastChar, charLen)
- else:
- self._mContextAnalyzer.feed(aBuf[i - 1:i + 1], charLen)
- self._mDistributionAnalyzer.feed(aBuf[i - 1:i + 1],
- charLen)
-
- self._mLastChar[0] = aBuf[aLen - 1]
-
- if self.get_state() == constants.eDetecting:
- if (self._mContextAnalyzer.got_enough_data() and
- (self.get_confidence() > constants.SHORTCUT_THRESHOLD)):
- self._mState = constants.eFoundIt
-
- return self.get_state()
-
- def get_confidence(self):
- contxtCf = self._mContextAnalyzer.get_confidence()
- distribCf = self._mDistributionAnalyzer.get_confidence()
- return max(contxtCf, distribCf)
+######################## BEGIN LICENSE BLOCK ########################\r
+# The Original Code is mozilla.org code.\r
+#\r
+# The Initial Developer of the Original Code is\r
+# Netscape Communications Corporation.\r
+# Portions created by the Initial Developer are Copyright (C) 1998\r
+# the Initial Developer. All Rights Reserved.\r
+#\r
+# Contributor(s):\r
+# Mark Pilgrim - port to Python\r
+#\r
+# This library is free software; you can redistribute it and/or\r
+# modify it under the terms of the GNU Lesser General Public\r
+# License as published by the Free Software Foundation; either\r
+# version 2.1 of the License, or (at your option) any later version.\r
+#\r
+# This library is distributed in the hope that it will be useful,\r
+# but WITHOUT ANY WARRANTY; without even the implied warranty of\r
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU\r
+# Lesser General Public License for more details.\r
+#\r
+# You should have received a copy of the GNU Lesser General Public\r
+# License along with this library; if not, write to the Free Software\r
+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA\r
+# 02110-1301 USA\r
+######################### END LICENSE BLOCK #########################\r
+\r
+import sys\r
+from . import constants\r
+from .mbcharsetprober import MultiByteCharSetProber\r
+from .codingstatemachine import CodingStateMachine\r
+from .chardistribution import EUCJPDistributionAnalysis\r
+from .jpcntx import EUCJPContextAnalysis\r
+from .mbcssm import EUCJPSMModel\r
+\r
+\r
+class EUCJPProber(MultiByteCharSetProber):\r
+ def __init__(self):\r
+ MultiByteCharSetProber.__init__(self)\r
+ self._mCodingSM = CodingStateMachine(EUCJPSMModel)\r
+ self._mDistributionAnalyzer = EUCJPDistributionAnalysis()\r
+ self._mContextAnalyzer = EUCJPContextAnalysis()\r
+ self.reset()\r
+\r
+ def reset(self):\r
+ MultiByteCharSetProber.reset(self)\r
+ self._mContextAnalyzer.reset()\r
+\r
+ def get_charset_name(self):\r
+ return "EUC-JP"\r
+\r
+ def feed(self, aBuf):\r
+ aLen = len(aBuf)\r
+ for i in range(0, aLen):\r
+ # PY3K: aBuf is a byte array, so aBuf[i] is an int, not a byte\r
+ codingState = self._mCodingSM.next_state(aBuf[i])\r
+ if codingState == constants.eError:\r
+ if constants._debug:\r
+ sys.stderr.write(self.get_charset_name()\r
+ + ' prober hit error at byte ' + str(i)\r
+ + '\n')\r
+ self._mState = constants.eNotMe\r
+ break\r
+ elif codingState == constants.eItsMe:\r
+ self._mState = constants.eFoundIt\r
+ break\r
+ elif codingState == constants.eStart:\r
+ charLen = self._mCodingSM.get_current_charlen()\r
+ if i == 0:\r
+ self._mLastChar[1] = aBuf[0]\r
+ self._mContextAnalyzer.feed(self._mLastChar, charLen)\r
+ self._mDistributionAnalyzer.feed(self._mLastChar, charLen)\r
+ else:\r
+ self._mContextAnalyzer.feed(aBuf[i - 1:i + 1], charLen)\r
+ self._mDistributionAnalyzer.feed(aBuf[i - 1:i + 1],\r
+ charLen)\r
+\r
+ self._mLastChar[0] = aBuf[aLen - 1]\r
+\r
+ if self.get_state() == constants.eDetecting:\r
+ if (self._mContextAnalyzer.got_enough_data() and\r
+ (self.get_confidence() > constants.SHORTCUT_THRESHOLD)):\r
+ self._mState = constants.eFoundIt\r
+\r
+ return self.get_state()\r
+\r
+ def get_confidence(self):\r
+ contxtCf = self._mContextAnalyzer.get_confidence()\r
+ distribCf = self._mDistributionAnalyzer.get_confidence()\r
+ return max(contxtCf, distribCf)\r
8704,8705,8706,8707,8708,8709,8710,8711,8712,8713,8714,8715,8716,8717,8718,8719,
8720,8721,8722,8723,8724,8725,8726,8727,8728,8729,8730,8731,8732,8733,8734,8735,
8736,8737,8738,8739,8740,8741)
+
+# flake8: noqa
-######################## BEGIN LICENSE BLOCK ########################
-# The Original Code is mozilla.org code.
-#
-# The Initial Developer of the Original Code is
-# Netscape Communications Corporation.
-# Portions created by the Initial Developer are Copyright (C) 1998
-# the Initial Developer. All Rights Reserved.
-#
-# Contributor(s):
-# Mark Pilgrim - port to Python
-#
-# This library is free software; you can redistribute it and/or
-# modify it under the terms of the GNU Lesser General Public
-# License as published by the Free Software Foundation; either
-# version 2.1 of the License, or (at your option) any later version.
-#
-# This library is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-# Lesser General Public License for more details.
-#
-# You should have received a copy of the GNU Lesser General Public
-# License along with this library; if not, write to the Free Software
-# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
-# 02110-1301 USA
-######################### END LICENSE BLOCK #########################
-
-from .mbcharsetprober import MultiByteCharSetProber
-from .codingstatemachine import CodingStateMachine
-from .chardistribution import EUCKRDistributionAnalysis
-from .mbcssm import EUCKRSMModel
-
-
-class EUCKRProber(MultiByteCharSetProber):
- def __init__(self):
- MultiByteCharSetProber.__init__(self)
- self._mCodingSM = CodingStateMachine(EUCKRSMModel)
- self._mDistributionAnalyzer = EUCKRDistributionAnalysis()
- self.reset()
-
- def get_charset_name(self):
- return "EUC-KR"
+######################## BEGIN LICENSE BLOCK ########################\r
+# The Original Code is mozilla.org code.\r
+#\r
+# The Initial Developer of the Original Code is\r
+# Netscape Communications Corporation.\r
+# Portions created by the Initial Developer are Copyright (C) 1998\r
+# the Initial Developer. All Rights Reserved.\r
+#\r
+# Contributor(s):\r
+# Mark Pilgrim - port to Python\r
+#\r
+# This library is free software; you can redistribute it and/or\r
+# modify it under the terms of the GNU Lesser General Public\r
+# License as published by the Free Software Foundation; either\r
+# version 2.1 of the License, or (at your option) any later version.\r
+#\r
+# This library is distributed in the hope that it will be useful,\r
+# but WITHOUT ANY WARRANTY; without even the implied warranty of\r
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU\r
+# Lesser General Public License for more details.\r
+#\r
+# You should have received a copy of the GNU Lesser General Public\r
+# License along with this library; if not, write to the Free Software\r
+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA\r
+# 02110-1301 USA\r
+######################### END LICENSE BLOCK #########################\r
+\r
+from .mbcharsetprober import MultiByteCharSetProber\r
+from .codingstatemachine import CodingStateMachine\r
+from .chardistribution import EUCKRDistributionAnalysis\r
+from .mbcssm import EUCKRSMModel\r
+\r
+\r
+class EUCKRProber(MultiByteCharSetProber):\r
+ def __init__(self):\r
+ MultiByteCharSetProber.__init__(self)\r
+ self._mCodingSM = CodingStateMachine(EUCKRSMModel)\r
+ self._mDistributionAnalyzer = EUCKRDistributionAnalysis()\r
+ self.reset()\r
+\r
+ def get_charset_name(self):\r
+ return "EUC-KR"\r
# modify it under the terms of the GNU Lesser General Public
# License as published by the Free Software Foundation; either
# version 2.1 of the License, or (at your option) any later version.
-#
+#
# This library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Lesser General Public License for more details.
-#
+#
# You should have received a copy of the GNU Lesser General Public
# License along with this library; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
######################### END LICENSE BLOCK #########################
# EUCTW frequency table
-# Converted from big5 work
-# by Taiwan's Mandarin Promotion Council
+# Converted from big5 work
+# by Taiwan's Mandarin Promotion Council
# <http:#www.edu.tw:81/mandr/>
# 128 --> 0.42261
#
# Idea Distribution Ratio = 0.74851/(1-0.74851) =2.98
# Random Distribution Ration = 512/(5401-512)=0.105
-#
+#
# Typical Distribution Ratio about 25% of Ideal one, still much higher than RDR
EUCTW_TYPICAL_DISTRIBUTION_RATIO = 0.75
-# Char to FreqOrder table ,
+# Char to FreqOrder table ,
EUCTW_TABLE_SIZE = 8102
-EUCTWCharToFreqOrder = ( \
+EUCTWCharToFreqOrder = (
1,1800,1506, 255,1431, 198, 9, 82, 6,7310, 177, 202,3615,1256,2808, 110, # 2742
3735, 33,3241, 261, 76, 44,2113, 16,2931,2184,1176, 659,3868, 26,3404,2643, # 2758
1198,3869,3313,4060, 410,2211, 302, 590, 361,1963, 8, 204, 58,4296,7311,1931, # 2774
8694,8695,8696,8697,8698,8699,8700,8701,8702,8703,8704,8705,8706,8707,8708,8709, # 8710
8710,8711,8712,8713,8714,8715,8716,8717,8718,8719,8720,8721,8722,8723,8724,8725, # 8726
8726,8727,8728,8729,8730,8731,8732,8733,8734,8735,8736,8737,8738,8739,8740,8741) # 8742
+
+# flake8: noqa
-######################## BEGIN LICENSE BLOCK ########################
-# The Original Code is mozilla.org code.
-#
-# The Initial Developer of the Original Code is
-# Netscape Communications Corporation.
-# Portions created by the Initial Developer are Copyright (C) 1998
-# the Initial Developer. All Rights Reserved.
-#
-# Contributor(s):
-# Mark Pilgrim - port to Python
-#
-# This library is free software; you can redistribute it and/or
-# modify it under the terms of the GNU Lesser General Public
-# License as published by the Free Software Foundation; either
-# version 2.1 of the License, or (at your option) any later version.
-#
-# This library is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-# Lesser General Public License for more details.
-#
-# You should have received a copy of the GNU Lesser General Public
-# License along with this library; if not, write to the Free Software
-# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
-# 02110-1301 USA
-######################### END LICENSE BLOCK #########################
-
-from .mbcharsetprober import MultiByteCharSetProber
-from .codingstatemachine import CodingStateMachine
-from .chardistribution import EUCTWDistributionAnalysis
-from .mbcssm import EUCTWSMModel
-
-
-class EUCTWProber(MultiByteCharSetProber):
- def __init__(self):
- MultiByteCharSetProber.__init__(self)
- self._mCodingSM = CodingStateMachine(EUCTWSMModel)
- self._mDistributionAnalyzer = EUCTWDistributionAnalysis()
- self.reset()
-
- def get_charset_name(self):
- return "EUC-TW"
+######################## BEGIN LICENSE BLOCK ########################\r
+# The Original Code is mozilla.org code.\r
+#\r
+# The Initial Developer of the Original Code is\r
+# Netscape Communications Corporation.\r
+# Portions created by the Initial Developer are Copyright (C) 1998\r
+# the Initial Developer. All Rights Reserved.\r
+#\r
+# Contributor(s):\r
+# Mark Pilgrim - port to Python\r
+#\r
+# This library is free software; you can redistribute it and/or\r
+# modify it under the terms of the GNU Lesser General Public\r
+# License as published by the Free Software Foundation; either\r
+# version 2.1 of the License, or (at your option) any later version.\r
+# \r
+# This library is distributed in the hope that it will be useful,\r
+# but WITHOUT ANY WARRANTY; without even the implied warranty of\r
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU\r
+# Lesser General Public License for more details.\r
+# \r
+# You should have received a copy of the GNU Lesser General Public\r
+# License along with this library; if not, write to the Free Software\r
+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA\r
+# 02110-1301 USA\r
+######################### END LICENSE BLOCK #########################\r
+\r
+from .mbcharsetprober import MultiByteCharSetProber\r
+from .codingstatemachine import CodingStateMachine\r
+from .chardistribution import EUCTWDistributionAnalysis\r
+from .mbcssm import EUCTWSMModel\r
+\r
+class EUCTWProber(MultiByteCharSetProber):\r
+ def __init__(self):\r
+ MultiByteCharSetProber.__init__(self)\r
+ self._mCodingSM = CodingStateMachine(EUCTWSMModel)\r
+ self._mDistributionAnalyzer = EUCTWDistributionAnalysis()\r
+ self.reset()\r
+\r
+ def get_charset_name(self):\r
+ return "EUC-TW"\r
# modify it under the terms of the GNU Lesser General Public
# License as published by the Free Software Foundation; either
# version 2.1 of the License, or (at your option) any later version.
-#
+#
# This library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Lesser General Public License for more details.
-#
+#
# You should have received a copy of the GNU Lesser General Public
# License along with this library; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
#
# Ideal Distribution Ratio = 0.79135/(1-0.79135) = 3.79
# Random Distribution Ration = 512 / (3755 - 512) = 0.157
-#
+#
# Typical Distribution Ratio about 25% of Ideal one, still much higher that RDR
GB2312_TYPICAL_DISTRIBUTION_RATIO = 0.9
GB2312_TABLE_SIZE = 3760
-GB2312CharToFreqOrder = ( \
+GB2312CharToFreqOrder = (
1671, 749,1443,2364,3924,3807,2330,3921,1704,3463,2691,1511,1515, 572,3191,2205,
2361, 224,2558, 479,1711, 963,3162, 440,4060,1905,2966,2947,3580,2647,3961,3842,
2204, 869,4207, 970,2678,5626,2944,2956,1479,4048, 514,3595, 588,1346,2820,3409,
5867,5507,6273,4206,6274,4789,6098,6764,3619,3646,3833,3804,2394,3788,4936,3978,
4866,4899,6099,6100,5559,6478,6765,3599,5868,6101,5869,5870,6275,6766,4527,6767)
+# flake8: noqa
-######################## BEGIN LICENSE BLOCK ########################
-# The Original Code is mozilla.org code.
-#
-# The Initial Developer of the Original Code is
-# Netscape Communications Corporation.
-# Portions created by the Initial Developer are Copyright (C) 1998
-# the Initial Developer. All Rights Reserved.
-#
-# Contributor(s):
-# Mark Pilgrim - port to Python
-#
-# This library is free software; you can redistribute it and/or
-# modify it under the terms of the GNU Lesser General Public
-# License as published by the Free Software Foundation; either
-# version 2.1 of the License, or (at your option) any later version.
-#
-# This library is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-# Lesser General Public License for more details.
-#
-# You should have received a copy of the GNU Lesser General Public
-# License along with this library; if not, write to the Free Software
-# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
-# 02110-1301 USA
-######################### END LICENSE BLOCK #########################
-
-from .mbcharsetprober import MultiByteCharSetProber
-from .codingstatemachine import CodingStateMachine
-from .chardistribution import GB2312DistributionAnalysis
-from .mbcssm import GB2312SMModel
-
-
-class GB2312Prober(MultiByteCharSetProber):
- def __init__(self):
- MultiByteCharSetProber.__init__(self)
- self._mCodingSM = CodingStateMachine(GB2312SMModel)
- self._mDistributionAnalyzer = GB2312DistributionAnalysis()
- self.reset()
-
- def get_charset_name(self):
- return "GB2312"
+######################## BEGIN LICENSE BLOCK ########################\r
+# The Original Code is mozilla.org code.\r
+#\r
+# The Initial Developer of the Original Code is\r
+# Netscape Communications Corporation.\r
+# Portions created by the Initial Developer are Copyright (C) 1998\r
+# the Initial Developer. All Rights Reserved.\r
+#\r
+# Contributor(s):\r
+# Mark Pilgrim - port to Python\r
+#\r
+# This library is free software; you can redistribute it and/or\r
+# modify it under the terms of the GNU Lesser General Public\r
+# License as published by the Free Software Foundation; either\r
+# version 2.1 of the License, or (at your option) any later version.\r
+# \r
+# This library is distributed in the hope that it will be useful,\r
+# but WITHOUT ANY WARRANTY; without even the implied warranty of\r
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU\r
+# Lesser General Public License for more details.\r
+# \r
+# You should have received a copy of the GNU Lesser General Public\r
+# License along with this library; if not, write to the Free Software\r
+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA\r
+# 02110-1301 USA\r
+######################### END LICENSE BLOCK #########################\r
+\r
+from .mbcharsetprober import MultiByteCharSetProber\r
+from .codingstatemachine import CodingStateMachine\r
+from .chardistribution import GB2312DistributionAnalysis\r
+from .mbcssm import GB2312SMModel\r
+\r
+class GB2312Prober(MultiByteCharSetProber):\r
+ def __init__(self):\r
+ MultiByteCharSetProber.__init__(self)\r
+ self._mCodingSM = CodingStateMachine(GB2312SMModel)\r
+ self._mDistributionAnalyzer = GB2312DistributionAnalysis()\r
+ self.reset()\r
+\r
+ def get_charset_name(self):\r
+ return "GB2312"\r
-######################## BEGIN LICENSE BLOCK ########################
-# The Original Code is Mozilla Universal charset detector code.
-#
-# The Initial Developer of the Original Code is
-# Shy Shalom
-# Portions created by the Initial Developer are Copyright (C) 2005
-# the Initial Developer. All Rights Reserved.
-#
-# Contributor(s):
-# Mark Pilgrim - port to Python
-#
-# This library is free software; you can redistribute it and/or
-# modify it under the terms of the GNU Lesser General Public
-# License as published by the Free Software Foundation; either
-# version 2.1 of the License, or (at your option) any later version.
-#
-# This library is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-# Lesser General Public License for more details.
-#
-# You should have received a copy of the GNU Lesser General Public
-# License along with this library; if not, write to the Free Software
-# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
-# 02110-1301 USA
-######################### END LICENSE BLOCK #########################
-
-from .charsetprober import CharSetProber
-from . import constants
-
-# This prober doesn't actually recognize a language or a charset.
-# It is a helper prober for the use of the Hebrew model probers
-
-### General ideas of the Hebrew charset recognition ###
-#
-# Four main charsets exist in Hebrew:
-# "ISO-8859-8" - Visual Hebrew
-# "windows-1255" - Logical Hebrew
-# "ISO-8859-8-I" - Logical Hebrew
-# "x-mac-hebrew" - ?? Logical Hebrew ??
-#
-# Both "ISO" charsets use a completely identical set of code points, whereas
-# "windows-1255" and "x-mac-hebrew" are two different proper supersets of
-# these code points. windows-1255 defines additional characters in the range
-# 0x80-0x9F as some misc punctuation marks as well as some Hebrew-specific
-# diacritics and additional 'Yiddish' ligature letters in the range 0xc0-0xd6.
-# x-mac-hebrew defines similar additional code points but with a different
-# mapping.
-#
-# As far as an average Hebrew text with no diacritics is concerned, all four
-# charsets are identical with respect to code points. Meaning that for the
-# main Hebrew alphabet, all four map the same values to all 27 Hebrew letters
-# (including final letters).
-#
-# The dominant difference between these charsets is their directionality.
-# "Visual" directionality means that the text is ordered as if the renderer is
-# not aware of a BIDI rendering algorithm. The renderer sees the text and
-# draws it from left to right. The text itself when ordered naturally is read
-# backwards. A buffer of Visual Hebrew generally looks like so:
-# "[last word of first line spelled backwards] [whole line ordered backwards
-# and spelled backwards] [first word of first line spelled backwards]
-# [end of line] [last word of second line] ... etc' "
-# adding punctuation marks, numbers and English text to visual text is
-# naturally also "visual" and from left to right.
-#
-# "Logical" directionality means the text is ordered "naturally" according to
-# the order it is read. It is the responsibility of the renderer to display
-# the text from right to left. A BIDI algorithm is used to place general
-# punctuation marks, numbers and English text in the text.
-#
-# Texts in x-mac-hebrew are almost impossible to find on the Internet. From
-# what little evidence I could find, it seems that its general directionality
-# is Logical.
-#
-# To sum up all of the above, the Hebrew probing mechanism knows about two
-# charsets:
-# Visual Hebrew - "ISO-8859-8" - backwards text - Words and sentences are
-# backwards while line order is natural. For charset recognition purposes
-# the line order is unimportant (In fact, for this implementation, even
-# word order is unimportant).
-# Logical Hebrew - "windows-1255" - normal, naturally ordered text.
-#
-# "ISO-8859-8-I" is a subset of windows-1255 and doesn't need to be
-# specifically identified.
-# "x-mac-hebrew" is also identified as windows-1255. A text in x-mac-hebrew
-# that contain special punctuation marks or diacritics is displayed with
-# some unconverted characters showing as question marks. This problem might
-# be corrected using another model prober for x-mac-hebrew. Due to the fact
-# that x-mac-hebrew texts are so rare, writing another model prober isn't
-# worth the effort and performance hit.
-#
-#### The Prober ####
-#
-# The prober is divided between two SBCharSetProbers and a HebrewProber,
-# all of which are managed, created, fed data, inquired and deleted by the
-# SBCSGroupProber. The two SBCharSetProbers identify that the text is in
-# fact some kind of Hebrew, Logical or Visual. The final decision about which
-# one is it is made by the HebrewProber by combining final-letter scores
-# with the scores of the two SBCharSetProbers to produce a final answer.
-#
-# The SBCSGroupProber is responsible for stripping the original text of HTML
-# tags, English characters, numbers, low-ASCII punctuation characters, spaces
-# and new lines. It reduces any sequence of such characters to a single space.
-# The buffer fed to each prober in the SBCS group prober is pure text in
-# high-ASCII.
-# The two SBCharSetProbers (model probers) share the same language model:
-# Win1255Model.
-# The first SBCharSetProber uses the model normally as any other
-# SBCharSetProber does, to recognize windows-1255, upon which this model was
-# built. The second SBCharSetProber is told to make the pair-of-letter
-# lookup in the language model backwards. This in practice exactly simulates
-# a visual Hebrew model using the windows-1255 logical Hebrew model.
-#
-# The HebrewProber is not using any language model. All it does is look for
-# final-letter evidence suggesting the text is either logical Hebrew or visual
-# Hebrew. Disjointed from the model probers, the results of the HebrewProber
-# alone are meaningless. HebrewProber always returns 0.00 as confidence
-# since it never identifies a charset by itself. Instead, the pointer to the
-# HebrewProber is passed to the model probers as a helper "Name Prober".
-# When the Group prober receives a positive identification from any prober,
-# it asks for the name of the charset identified. If the prober queried is a
-# Hebrew model prober, the model prober forwards the call to the
-# HebrewProber to make the final decision. In the HebrewProber, the
-# decision is made according to the final-letters scores maintained and Both
-# model probers scores. The answer is returned in the form of the name of the
-# charset identified, either "windows-1255" or "ISO-8859-8".
-
-# windows-1255 / ISO-8859-8 code points of interest
-FINAL_KAF = '\xea'
-NORMAL_KAF = '\xeb'
-FINAL_MEM = '\xed'
-NORMAL_MEM = '\xee'
-FINAL_NUN = '\xef'
-NORMAL_NUN = '\xf0'
-FINAL_PE = '\xf3'
-NORMAL_PE = '\xf4'
-FINAL_TSADI = '\xf5'
-NORMAL_TSADI = '\xf6'
-
-# Minimum Visual vs Logical final letter score difference.
-# If the difference is below this, don't rely solely on the final letter score
-# distance.
-MIN_FINAL_CHAR_DISTANCE = 5
-
-# Minimum Visual vs Logical model score difference.
-# If the difference is below this, don't rely at all on the model score
-# distance.
-MIN_MODEL_DISTANCE = 0.01
-
-VISUAL_HEBREW_NAME = "ISO-8859-8"
-LOGICAL_HEBREW_NAME = "windows-1255"
-
-
-class HebrewProber(CharSetProber):
- def __init__(self):
- CharSetProber.__init__(self)
- self._mLogicalProber = None
- self._mVisualProber = None
- self.reset()
-
- def reset(self):
- self._mFinalCharLogicalScore = 0
- self._mFinalCharVisualScore = 0
- # The two last characters seen in the previous buffer,
- # mPrev and mBeforePrev are initialized to space in order to simulate a
- # word delimiter at the beginning of the data
- self._mPrev = ' '
- self._mBeforePrev = ' '
- # These probers are owned by the group prober.
-
- def set_model_probers(self, logicalProber, visualProber):
- self._mLogicalProber = logicalProber
- self._mVisualProber = visualProber
-
- def is_final(self, c):
- return c in [FINAL_KAF, FINAL_MEM, FINAL_NUN, FINAL_PE, FINAL_TSADI]
-
- def is_non_final(self, c):
- # The normal Tsadi is not a good Non-Final letter due to words like
- # 'lechotet' (to chat) containing an apostrophe after the tsadi. This
- # apostrophe is converted to a space in FilterWithoutEnglishLetters
- # causing the Non-Final tsadi to appear at an end of a word even
- # though this is not the case in the original text.
- # The letters Pe and Kaf rarely display a related behavior of not being
- # a good Non-Final letter. Words like 'Pop', 'Winamp' and 'Mubarak'
- # for example legally end with a Non-Final Pe or Kaf. However, the
- # benefit of these letters as Non-Final letters outweighs the damage
- # since these words are quite rare.
- return c in [NORMAL_KAF, NORMAL_MEM, NORMAL_NUN, NORMAL_PE]
-
- def feed(self, aBuf):
- # Final letter analysis for logical-visual decision.
- # Look for evidence that the received buffer is either logical Hebrew
- # or visual Hebrew.
- # The following cases are checked:
- # 1) A word longer than 1 letter, ending with a final letter. This is
- # an indication that the text is laid out "naturally" since the final
- # letter really appears at the end. +1 for logical score.
- # 2) A word longer than 1 letter, ending with a Non-Final letter. In
- # normal Hebrew, words ending with Kaf, Mem, Nun, Pe or Tsadi,
- # should not end with the Non-Final form of that letter. Exceptions
- # to this rule are mentioned above in isNonFinal(). This is an
- # indication that the text is laid out backwards. +1 for visual
- # score
- # 3) A word longer than 1 letter, starting with a final letter. Final
- # letters should not appear at the beginning of a word. This is an
- # indication that the text is laid out backwards. +1 for visual
- # score.
- #
- # The visual score and logical score are accumulated throughout the
- # text and are finally checked against each other in GetCharSetName().
- # No checking for final letters in the middle of words is done since
- # that case is not an indication for either Logical or Visual text.
- #
- # We automatically filter out all 7-bit characters (replace them with
- # spaces) so the word boundary detection works properly. [MAP]
-
- if self.get_state() == constants.eNotMe:
- # Both model probers say it's not them. No reason to continue.
- return constants.eNotMe
-
- aBuf = self.filter_high_bit_only(aBuf)
-
- for cur in aBuf:
- if cur == ' ':
- # We stand on a space - a word just ended
- if self._mBeforePrev != ' ':
- # next-to-last char was not a space so self._mPrev is not a
- # 1 letter word
- if self.is_final(self._mPrev):
- # case (1) [-2:not space][-1:final letter][cur:space]
- self._mFinalCharLogicalScore += 1
- elif self.is_non_final(self._mPrev):
- # case (2) [-2:not space]
- # [-1:Non-Final letter][cur:space]
- self._mFinalCharVisualScore += 1
- else:
- # Not standing on a space
- if ((self._mBeforePrev == ' ') and
- (self.is_final(self._mPrev)) and (cur != ' ')):
- # case (3) [-2:space][-1:final letter][cur:not space]
- self._mFinalCharVisualScore += 1
- self._mBeforePrev = self._mPrev
- self._mPrev = cur
-
- # Forever detecting, till the end or until both model probers return
- # eNotMe (handled above)
- return constants.eDetecting
-
- def get_charset_name(self):
- # Make the decision: is it Logical or Visual?
- # If the final letter score distance is dominant enough, rely on it.
- finalsub = self._mFinalCharLogicalScore - self._mFinalCharVisualScore
- if finalsub >= MIN_FINAL_CHAR_DISTANCE:
- return LOGICAL_HEBREW_NAME
- if finalsub <= -MIN_FINAL_CHAR_DISTANCE:
- return VISUAL_HEBREW_NAME
-
- # It's not dominant enough, try to rely on the model scores instead.
- modelsub = (self._mLogicalProber.get_confidence()
- - self._mVisualProber.get_confidence())
- if modelsub > MIN_MODEL_DISTANCE:
- return LOGICAL_HEBREW_NAME
- if modelsub < -MIN_MODEL_DISTANCE:
- return VISUAL_HEBREW_NAME
-
- # Still no good, back to final letter distance, maybe it'll save
- # the day.
- if finalsub < 0.0:
- return VISUAL_HEBREW_NAME
-
- # (finalsub > 0 - Logical) or (don't know what to do) default to
- # Logical.
- return LOGICAL_HEBREW_NAME
-
- def get_state(self):
- # Remain active as long as any of the model probers are active.
- if (self._mLogicalProber.get_state() == constants.eNotMe) and \
- (self._mVisualProber.get_state() == constants.eNotMe):
- return constants.eNotMe
- return constants.eDetecting
+######################## BEGIN LICENSE BLOCK ########################\r
+# The Original Code is Mozilla Universal charset detector code.\r
+#\r
+# The Initial Developer of the Original Code is\r
+# Shy Shalom\r
+# Portions created by the Initial Developer are Copyright (C) 2005\r
+# the Initial Developer. All Rights Reserved.\r
+#\r
+# Contributor(s):\r
+# Mark Pilgrim - port to Python\r
+#\r
+# This library is free software; you can redistribute it and/or\r
+# modify it under the terms of the GNU Lesser General Public\r
+# License as published by the Free Software Foundation; either\r
+# version 2.1 of the License, or (at your option) any later version.\r
+#\r
+# This library is distributed in the hope that it will be useful,\r
+# but WITHOUT ANY WARRANTY; without even the implied warranty of\r
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU\r
+# Lesser General Public License for more details.\r
+#\r
+# You should have received a copy of the GNU Lesser General Public\r
+# License along with this library; if not, write to the Free Software\r
+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA\r
+# 02110-1301 USA\r
+######################### END LICENSE BLOCK #########################\r
+\r
+from .charsetprober import CharSetProber\r
+from .constants import eNotMe, eDetecting\r
+from .compat import wrap_ord\r
+\r
+# This prober doesn't actually recognize a language or a charset.\r
+# It is a helper prober for the use of the Hebrew model probers\r
+\r
+### General ideas of the Hebrew charset recognition ###\r
+#\r
+# Four main charsets exist in Hebrew:\r
+# "ISO-8859-8" - Visual Hebrew\r
+# "windows-1255" - Logical Hebrew\r
+# "ISO-8859-8-I" - Logical Hebrew\r
+# "x-mac-hebrew" - ?? Logical Hebrew ??\r
+#\r
+# Both "ISO" charsets use a completely identical set of code points, whereas\r
+# "windows-1255" and "x-mac-hebrew" are two different proper supersets of\r
+# these code points. windows-1255 defines additional characters in the range\r
+# 0x80-0x9F as some misc punctuation marks as well as some Hebrew-specific\r
+# diacritics and additional 'Yiddish' ligature letters in the range 0xc0-0xd6.\r
+# x-mac-hebrew defines similar additional code points but with a different\r
+# mapping.\r
+#\r
+# As far as an average Hebrew text with no diacritics is concerned, all four\r
+# charsets are identical with respect to code points. Meaning that for the\r
+# main Hebrew alphabet, all four map the same values to all 27 Hebrew letters\r
+# (including final letters).\r
+#\r
+# The dominant difference between these charsets is their directionality.\r
+# "Visual" directionality means that the text is ordered as if the renderer is\r
+# not aware of a BIDI rendering algorithm. The renderer sees the text and\r
+# draws it from left to right. The text itself when ordered naturally is read\r
+# backwards. A buffer of Visual Hebrew generally looks like so:\r
+# "[last word of first line spelled backwards] [whole line ordered backwards\r
+# and spelled backwards] [first word of first line spelled backwards]\r
+# [end of line] [last word of second line] ... etc' "\r
+# adding punctuation marks, numbers and English text to visual text is\r
+# naturally also "visual" and from left to right.\r
+#\r
+# "Logical" directionality means the text is ordered "naturally" according to\r
+# the order it is read. It is the responsibility of the renderer to display\r
+# the text from right to left. A BIDI algorithm is used to place general\r
+# punctuation marks, numbers and English text in the text.\r
+#\r
+# Texts in x-mac-hebrew are almost impossible to find on the Internet. From\r
+# what little evidence I could find, it seems that its general directionality\r
+# is Logical.\r
+#\r
+# To sum up all of the above, the Hebrew probing mechanism knows about two\r
+# charsets:\r
+# Visual Hebrew - "ISO-8859-8" - backwards text - Words and sentences are\r
+# backwards while line order is natural. For charset recognition purposes\r
+# the line order is unimportant (In fact, for this implementation, even\r
+# word order is unimportant).\r
+# Logical Hebrew - "windows-1255" - normal, naturally ordered text.\r
+#\r
+# "ISO-8859-8-I" is a subset of windows-1255 and doesn't need to be\r
+# specifically identified.\r
+# "x-mac-hebrew" is also identified as windows-1255. A text in x-mac-hebrew\r
+# that contain special punctuation marks or diacritics is displayed with\r
+# some unconverted characters showing as question marks. This problem might\r
+# be corrected using another model prober for x-mac-hebrew. Due to the fact\r
+# that x-mac-hebrew texts are so rare, writing another model prober isn't\r
+# worth the effort and performance hit.\r
+#\r
+#### The Prober ####\r
+#\r
+# The prober is divided between two SBCharSetProbers and a HebrewProber,\r
+# all of which are managed, created, fed data, inquired and deleted by the\r
+# SBCSGroupProber. The two SBCharSetProbers identify that the text is in\r
+# fact some kind of Hebrew, Logical or Visual. The final decision about which\r
+# one is it is made by the HebrewProber by combining final-letter scores\r
+# with the scores of the two SBCharSetProbers to produce a final answer.\r
+#\r
+# The SBCSGroupProber is responsible for stripping the original text of HTML\r
+# tags, English characters, numbers, low-ASCII punctuation characters, spaces\r
+# and new lines. It reduces any sequence of such characters to a single space.\r
+# The buffer fed to each prober in the SBCS group prober is pure text in\r
+# high-ASCII.\r
+# The two SBCharSetProbers (model probers) share the same language model:\r
+# Win1255Model.\r
+# The first SBCharSetProber uses the model normally as any other\r
+# SBCharSetProber does, to recognize windows-1255, upon which this model was\r
+# built. The second SBCharSetProber is told to make the pair-of-letter\r
+# lookup in the language model backwards. This in practice exactly simulates\r
+# a visual Hebrew model using the windows-1255 logical Hebrew model.\r
+#\r
+# The HebrewProber is not using any language model. All it does is look for\r
+# final-letter evidence suggesting the text is either logical Hebrew or visual\r
+# Hebrew. Disjointed from the model probers, the results of the HebrewProber\r
+# alone are meaningless. HebrewProber always returns 0.00 as confidence\r
+# since it never identifies a charset by itself. Instead, the pointer to the\r
+# HebrewProber is passed to the model probers as a helper "Name Prober".\r
+# When the Group prober receives a positive identification from any prober,\r
+# it asks for the name of the charset identified. If the prober queried is a\r
+# Hebrew model prober, the model prober forwards the call to the\r
+# HebrewProber to make the final decision. In the HebrewProber, the\r
+# decision is made according to the final-letters scores maintained and Both\r
+# model probers scores. The answer is returned in the form of the name of the\r
+# charset identified, either "windows-1255" or "ISO-8859-8".\r
+\r
+# windows-1255 / ISO-8859-8 code points of interest\r
+FINAL_KAF = 0xea\r
+NORMAL_KAF = 0xeb\r
+FINAL_MEM = 0xed\r
+NORMAL_MEM = 0xee\r
+FINAL_NUN = 0xef\r
+NORMAL_NUN = 0xf0\r
+FINAL_PE = 0xf3\r
+NORMAL_PE = 0xf4\r
+FINAL_TSADI = 0xf5\r
+NORMAL_TSADI = 0xf6\r
+\r
+# Minimum Visual vs Logical final letter score difference.\r
+# If the difference is below this, don't rely solely on the final letter score\r
+# distance.\r
+MIN_FINAL_CHAR_DISTANCE = 5\r
+\r
+# Minimum Visual vs Logical model score difference.\r
+# If the difference is below this, don't rely at all on the model score\r
+# distance.\r
+MIN_MODEL_DISTANCE = 0.01\r
+\r
+VISUAL_HEBREW_NAME = "ISO-8859-8"\r
+LOGICAL_HEBREW_NAME = "windows-1255"\r
+\r
+\r
+class HebrewProber(CharSetProber):\r
+ def __init__(self):\r
+ CharSetProber.__init__(self)\r
+ self._mLogicalProber = None\r
+ self._mVisualProber = None\r
+ self.reset()\r
+\r
+ def reset(self):\r
+ self._mFinalCharLogicalScore = 0\r
+ self._mFinalCharVisualScore = 0\r
+ # The two last characters seen in the previous buffer,\r
+ # mPrev and mBeforePrev are initialized to space in order to simulate\r
+ # a word delimiter at the beginning of the data\r
+ self._mPrev = ' '\r
+ self._mBeforePrev = ' '\r
+ # These probers are owned by the group prober.\r
+\r
+ def set_model_probers(self, logicalProber, visualProber):\r
+ self._mLogicalProber = logicalProber\r
+ self._mVisualProber = visualProber\r
+\r
+ def is_final(self, c):\r
+ return wrap_ord(c) in [FINAL_KAF, FINAL_MEM, FINAL_NUN, FINAL_PE,\r
+ FINAL_TSADI]\r
+\r
+ def is_non_final(self, c):\r
+ # The normal Tsadi is not a good Non-Final letter due to words like\r
+ # 'lechotet' (to chat) containing an apostrophe after the tsadi. This\r
+ # apostrophe is converted to a space in FilterWithoutEnglishLetters\r
+ # causing the Non-Final tsadi to appear at an end of a word even\r
+ # though this is not the case in the original text.\r
+ # The letters Pe and Kaf rarely display a related behavior of not being\r
+ # a good Non-Final letter. Words like 'Pop', 'Winamp' and 'Mubarak'\r
+ # for example legally end with a Non-Final Pe or Kaf. However, the\r
+ # benefit of these letters as Non-Final letters outweighs the damage\r
+ # since these words are quite rare.\r
+ return wrap_ord(c) in [NORMAL_KAF, NORMAL_MEM, NORMAL_NUN, NORMAL_PE]\r
+\r
+ def feed(self, aBuf):\r
+ # Final letter analysis for logical-visual decision.\r
+ # Look for evidence that the received buffer is either logical Hebrew\r
+ # or visual Hebrew.\r
+ # The following cases are checked:\r
+ # 1) A word longer than 1 letter, ending with a final letter. This is\r
+ # an indication that the text is laid out "naturally" since the\r
+ # final letter really appears at the end. +1 for logical score.\r
+ # 2) A word longer than 1 letter, ending with a Non-Final letter. In\r
+ # normal Hebrew, words ending with Kaf, Mem, Nun, Pe or Tsadi,\r
+ # should not end with the Non-Final form of that letter. Exceptions\r
+ # to this rule are mentioned above in isNonFinal(). This is an\r
+ # indication that the text is laid out backwards. +1 for visual\r
+ # score\r
+ # 3) A word longer than 1 letter, starting with a final letter. Final\r
+ # letters should not appear at the beginning of a word. This is an\r
+ # indication that the text is laid out backwards. +1 for visual\r
+ # score.\r
+ #\r
+ # The visual score and logical score are accumulated throughout the\r
+ # text and are finally checked against each other in GetCharSetName().\r
+ # No checking for final letters in the middle of words is done since\r
+ # that case is not an indication for either Logical or Visual text.\r
+ #\r
+ # We automatically filter out all 7-bit characters (replace them with\r
+ # spaces) so the word boundary detection works properly. [MAP]\r
+\r
+ if self.get_state() == eNotMe:\r
+ # Both model probers say it's not them. No reason to continue.\r
+ return eNotMe\r
+\r
+ aBuf = self.filter_high_bit_only(aBuf)\r
+\r
+ for cur in aBuf:\r
+ if cur == ' ':\r
+ # We stand on a space - a word just ended\r
+ if self._mBeforePrev != ' ':\r
+ # next-to-last char was not a space so self._mPrev is not a\r
+ # 1 letter word\r
+ if self.is_final(self._mPrev):\r
+ # case (1) [-2:not space][-1:final letter][cur:space]\r
+ self._mFinalCharLogicalScore += 1\r
+ elif self.is_non_final(self._mPrev):\r
+ # case (2) [-2:not space][-1:Non-Final letter][\r
+ # cur:space]\r
+ self._mFinalCharVisualScore += 1\r
+ else:\r
+ # Not standing on a space\r
+ if ((self._mBeforePrev == ' ') and\r
+ (self.is_final(self._mPrev)) and (cur != ' ')):\r
+ # case (3) [-2:space][-1:final letter][cur:not space]\r
+ self._mFinalCharVisualScore += 1\r
+ self._mBeforePrev = self._mPrev\r
+ self._mPrev = cur\r
+\r
+ # Forever detecting, till the end or until both model probers return\r
+ # eNotMe (handled above)\r
+ return eDetecting\r
+\r
+ def get_charset_name(self):\r
+ # Make the decision: is it Logical or Visual?\r
+ # If the final letter score distance is dominant enough, rely on it.\r
+ finalsub = self._mFinalCharLogicalScore - self._mFinalCharVisualScore\r
+ if finalsub >= MIN_FINAL_CHAR_DISTANCE:\r
+ return LOGICAL_HEBREW_NAME\r
+ if finalsub <= -MIN_FINAL_CHAR_DISTANCE:\r
+ return VISUAL_HEBREW_NAME\r
+\r
+ # It's not dominant enough, try to rely on the model scores instead.\r
+ modelsub = (self._mLogicalProber.get_confidence()\r
+ - self._mVisualProber.get_confidence())\r
+ if modelsub > MIN_MODEL_DISTANCE:\r
+ return LOGICAL_HEBREW_NAME\r
+ if modelsub < -MIN_MODEL_DISTANCE:\r
+ return VISUAL_HEBREW_NAME\r
+\r
+ # Still no good, back to final letter distance, maybe it'll save the\r
+ # day.\r
+ if finalsub < 0.0:\r
+ return VISUAL_HEBREW_NAME\r
+\r
+ # (finalsub > 0 - Logical) or (don't know what to do) default to\r
+ # Logical.\r
+ return LOGICAL_HEBREW_NAME\r
+\r
+ def get_state(self):\r
+ # Remain active as long as any of the model probers are active.\r
+ if (self._mLogicalProber.get_state() == eNotMe) and \\r
+ (self._mVisualProber.get_state() == eNotMe):\r
+ return eNotMe\r
+ return eDetecting\r
# modify it under the terms of the GNU Lesser General Public
# License as published by the Free Software Foundation; either
# version 2.1 of the License, or (at your option) any later version.
-#
+#
# This library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Lesser General Public License for more details.
-#
+#
# You should have received a copy of the GNU Lesser General Public
# License along with this library; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
# Sampling from about 20M text materials include literature and computer technology
#
# Japanese frequency table, applied to both S-JIS and EUC-JP
-# They are sorted in order.
+# They are sorted in order.
# 128 --> 0.77094
# 256 --> 0.85710
#
# Ideal Distribution Ratio = 0.92635 / (1-0.92635) = 12.58
# Random Distribution Ration = 512 / (2965+62+83+86-512) = 0.191
-#
-# Typical Distribution Ratio, 25% of IDR
+#
+# Typical Distribution Ratio, 25% of IDR
JIS_TYPICAL_DISTRIBUTION_RATIO = 3.0
-# Char to FreqOrder table ,
+# Char to FreqOrder table ,
JIS_TABLE_SIZE = 4368
-JISCharToFreqOrder = ( \
+JISCharToFreqOrder = (
40, 1, 6, 182, 152, 180, 295,2127, 285, 381,3295,4304,3068,4606,3165,3510, # 16
3511,1822,2785,4607,1193,2226,5070,4608, 171,2996,1247, 18, 179,5071, 856,1661, # 32
1262,5072, 619, 127,3431,3512,3230,1899,1700, 232, 228,1294,1298, 284, 283,2041, # 48
8224,8225,8226,8227,8228,8229,8230,8231,8232,8233,8234,8235,8236,8237,8238,8239, # 8240
8240,8241,8242,8243,8244,8245,8246,8247,8248,8249,8250,8251,8252,8253,8254,8255, # 8256
8256,8257,8258,8259,8260,8261,8262,8263,8264,8265,8266,8267,8268,8269,8270,8271) # 8272
+
+# flake8: noqa
-######################## BEGIN LICENSE BLOCK ########################
-# The Original Code is Mozilla Communicator client code.
-#
-# The Initial Developer of the Original Code is
-# Netscape Communications Corporation.
-# Portions created by the Initial Developer are Copyright (C) 1998
-# the Initial Developer. All Rights Reserved.
-#
-# Contributor(s):
-# Mark Pilgrim - port to Python
-#
-# This library is free software; you can redistribute it and/or
-# modify it under the terms of the GNU Lesser General Public
-# License as published by the Free Software Foundation; either
-# version 2.1 of the License, or (at your option) any later version.
-#
-# This library is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-# Lesser General Public License for more details.
-#
-# You should have received a copy of the GNU Lesser General Public
-# License along with this library; if not, write to the Free Software
-# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
-# 02110-1301 USA
-######################### END LICENSE BLOCK #########################
-
-from .compat import wrap_ord
-
-NUM_OF_CATEGORY = 6
-DONT_KNOW = -1
-ENOUGH_REL_THRESHOLD = 100
-MAX_REL_THRESHOLD = 1000
-MINIMUM_DATA_THRESHOLD = 4
-
-# This is hiragana 2-char sequence table, the number in each cell represents
-# its frequency category
-jp2CharContext = (
- (0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 1),
-
- (2, 4, 0, 4, 0, 3, 0, 4, 0, 3, 4, 4, 4, 2, 4, 3, 3, 4, 3, 2, 3, 3, 4, 2,
- 3, 3, 3, 2, 4, 1, 4, 3, 3, 1, 5, 4, 3, 4, 3, 4, 3, 5, 3, 0, 3, 5, 4,
- 2, 0, 3, 1, 0, 3, 3, 0, 3, 3, 0, 1, 1, 0, 4, 3, 0, 3, 3, 0, 4, 0, 2,
- 0, 3, 5, 5, 5, 5, 4, 0, 4, 1, 0, 3, 4),
-
- (0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2),
-
- (0, 4, 0, 5, 0, 5, 0, 4, 0, 4, 5, 4, 4, 3, 5, 3, 5, 1, 5, 3, 4, 3, 4, 4,
- 3, 4, 3, 3, 4, 3, 5, 4, 4, 3, 5, 5, 3, 5, 5, 5, 3, 5, 5, 3, 4, 5, 5,
- 3, 1, 3, 2, 0, 3, 4, 0, 4, 2, 0, 4, 2, 1, 5, 3, 2, 3, 5, 0, 4, 0, 2,
- 0, 5, 4, 4, 5, 4, 5, 0, 4, 0, 0, 4, 4),
-
- (0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
-
- (0, 3, 0, 4, 0, 3, 0, 3, 0, 4, 5, 4, 3, 3, 3, 3, 4, 3, 5, 4, 4, 3, 5, 4,
- 4, 3, 4, 3, 4, 4, 4, 4, 5, 3, 4, 4, 3, 4, 5, 5, 4, 5, 5, 1, 4, 5, 4,
- 3, 0, 3, 3, 1, 3, 3, 0, 4, 4, 0, 3, 3, 1, 5, 3, 3, 3, 5, 0, 4, 0, 3,
- 0, 4, 4, 3, 4, 3, 3, 0, 4, 1, 1, 3, 4),
-
- (0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
-
- (0, 4, 0, 3, 0, 3, 0, 4, 0, 3, 4, 4, 3, 2, 2, 1, 2, 1, 3, 1, 3, 3, 3, 3,
- 3, 4, 3, 1, 3, 3, 5, 3, 3, 0, 4, 3, 0, 5, 4, 3, 3, 5, 4, 4, 3, 4, 4,
- 5, 0, 1, 2, 0, 1, 2, 0, 2, 2, 0, 1, 0, 0, 5, 2, 2, 1, 4, 0, 3, 0, 1,
- 0, 4, 4, 3, 5, 4, 3, 0, 2, 1, 0, 4, 3),
-
- (0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
-
- (0, 3, 0, 5, 0, 4, 0, 2, 1, 4, 4, 2, 4, 1, 4, 2, 4, 2, 4, 3, 3, 3, 4, 3,
- 3, 3, 3, 1, 4, 2, 3, 3, 3, 1, 4, 4, 1, 1, 1, 4, 3, 3, 2, 0, 2, 4, 3,
- 2, 0, 3, 3, 0, 3, 1, 1, 0, 0, 0, 3, 3, 0, 4, 2, 2, 3, 4, 0, 4, 0, 3,
- 0, 4, 4, 5, 3, 4, 4, 0, 3, 0, 0, 1, 4),
-
- (1, 4, 0, 4, 0, 4, 0, 4, 0, 3, 5, 4, 4, 3, 4, 3, 5, 4, 3, 3, 4, 3, 5, 4,
- 4, 4, 4, 3, 4, 2, 4, 3, 3, 1, 5, 4, 3, 2, 4, 5, 4, 5, 5, 4, 4, 5,
- 4, 4, 0, 3, 2, 2, 3, 3, 0, 4, 3, 1, 3, 2, 1, 4, 3, 3, 4, 5, 0, 3,
- 0, 2, 0, 4, 5, 5, 4, 5, 4, 0, 4, 0, 0, 5, 4),
-
- (0, 5, 0, 5, 0, 4, 0, 3, 0, 4, 4, 3, 4, 3, 3, 3, 4, 0, 4, 4, 4, 3, 4, 3,
- 4, 3, 3, 1, 4, 2, 4, 3, 4, 0, 5, 4, 1, 4, 5, 4, 4, 5, 3, 2, 4, 3,
- 4, 3, 2, 4, 1, 3, 3, 3, 2, 3, 2, 0, 4, 3, 3, 4, 3, 3, 3, 4, 0, 4,
- 0, 3, 0, 4, 5, 4, 4, 4, 3, 0, 4, 1, 0, 1, 3),
-
- (0, 3, 1, 4, 0, 3, 0, 2, 0, 3, 4, 4, 3, 1, 4, 2, 3, 3, 4, 3, 4, 3, 4, 3,
- 4, 4, 3, 2, 3, 1, 5, 4, 4, 1, 4, 4, 3, 5, 4, 4, 3, 5, 5, 4, 3, 4,
- 4, 3, 1, 2, 3, 1, 2, 2, 0, 3, 2, 0, 3, 1, 0, 5, 3, 3, 3, 4, 3, 3,
- 3, 3, 4, 4, 4, 4, 5, 4, 2, 0, 3, 3, 2, 4, 3),
-
- (0, 2, 0, 3, 0, 1, 0, 1, 0, 0, 3, 2, 0, 0, 2, 0, 1, 0, 2, 1, 3, 3, 3, 1,
- 2, 3, 1, 0, 1, 0, 4, 2, 1, 1, 3, 3, 0, 4, 3, 3, 1, 4, 3, 3, 0, 3,
- 3, 2, 0, 0, 0, 0, 1, 0, 0, 2, 0, 0, 0, 0, 0, 4, 1, 0, 2, 3, 2, 2,
- 2, 1, 3, 3, 3, 4, 4, 3, 2, 0, 3, 1, 0, 3, 3),
-
- (0, 4, 0, 4, 0, 3, 0, 3, 0, 4, 4, 4, 3, 3, 3, 3, 3, 3, 4, 3, 4, 2, 4, 3,
- 4, 3, 3, 2, 4, 3, 4, 5, 4, 1, 4, 5, 3, 5, 4, 5, 3, 5, 4, 0, 3, 5,
- 5, 3, 1, 3, 3, 2, 2, 3, 0, 3, 4, 1, 3, 3, 2, 4, 3, 3, 3, 4, 0, 4,
- 0, 3, 0, 4, 5, 4, 4, 5, 3, 0, 4, 1, 0, 3, 4),
-
- (0, 2, 0, 3, 0, 3, 0, 0, 0, 2, 2, 2, 1, 0, 1, 0, 0, 0, 3, 0, 3, 0, 3, 0,
- 1, 3, 1, 0, 3, 1, 3, 3, 3, 1, 3, 3, 3, 0, 1, 3, 1, 3, 4, 0, 0, 3,
- 1, 1, 0, 3, 2, 0, 0, 0, 0, 1, 3, 0, 1, 0, 0, 3, 3, 2, 0, 3, 0, 0,
- 0, 0, 0, 3, 4, 3, 4, 3, 3, 0, 3, 0, 0, 2, 3),
-
- (2, 3, 0, 3, 0, 2, 0, 1, 0, 3, 3, 4, 3, 1, 3, 1, 1, 1, 3, 1, 4, 3, 4, 3,
- 3, 3, 0, 0, 3, 1, 5, 4, 3, 1, 4, 3, 2, 5, 5, 4, 4, 4, 4, 3, 3, 4,
- 4, 4, 0, 2, 1, 1, 3, 2, 0, 1, 2, 0, 0, 1, 0, 4, 1, 3, 3, 3, 0, 3,
- 0, 1, 0, 4, 4, 4, 5, 5, 3, 0, 2, 0, 0, 4, 4),
-
- (0, 2, 0, 1, 0, 3, 1, 3, 0, 2, 3, 3, 3, 0, 3, 1, 0, 0, 3, 0, 3, 2, 3, 1,
- 3, 2, 1, 1, 0, 0, 4, 2, 1, 0, 2, 3, 1, 4, 3, 2, 0, 4, 4, 3, 1, 3,
- 1, 3, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 4, 1, 1, 1, 2, 0, 3,
- 0, 0, 0, 3, 4, 2, 4, 3, 2, 0, 1, 0, 0, 3, 3),
-
- (0, 1, 0, 4, 0, 5, 0, 4, 0, 2, 4, 4, 2, 3, 3, 2, 3, 3, 5, 3, 3, 3, 4, 3,
- 4, 2, 3, 0, 4, 3, 3, 3, 4, 1, 4, 3, 2, 1, 5, 5, 3, 4, 5, 1, 3, 5,
- 4, 2, 0, 3, 3, 0, 1, 3, 0, 4, 2, 0, 1, 3, 1, 4, 3, 3, 3, 3, 0, 3,
- 0, 1, 0, 3, 4, 4, 4, 5, 5, 0, 3, 0, 1, 4, 5),
-
- (0, 2, 0, 3, 0, 3, 0, 0, 0, 2, 3, 1, 3, 0, 4, 0, 1, 1, 3, 0, 3, 4, 3, 2,
- 3, 1, 0, 3, 3, 2, 3, 1, 3, 0, 2, 3, 0, 2, 1, 4, 1, 2, 2, 0, 0, 3,
- 3, 0, 0, 2, 0, 0, 0, 1, 0, 0, 0, 0, 2, 2, 0, 3, 2, 1, 3, 3, 0, 2,
- 0, 2, 0, 0, 3, 3, 1, 2, 4, 0, 3, 0, 2, 2, 3),
-
- (2, 4, 0, 5, 0, 4, 0, 4, 0, 2, 4, 4, 4, 3, 4, 3, 3, 3, 1, 2, 4, 3, 4, 3,
- 4, 4, 5, 0, 3, 3, 3, 3, 2, 0, 4, 3, 1, 4, 3, 4, 1, 4, 4, 3, 3, 4,
- 4, 3, 1, 2, 3, 0, 4, 2, 0, 4, 1, 0, 3, 3, 0, 4, 3, 3, 3, 4, 0, 4,
- 0, 2, 0, 3, 5, 3, 4, 5, 2, 0, 3, 0, 0, 4, 5),
-
- (0, 3, 0, 4, 0, 1, 0, 1, 0, 1, 3, 2, 2, 1, 3, 0, 3, 0, 2, 0, 2, 0, 3, 0,
- 2, 0, 0, 0, 1, 0, 1, 1, 0, 0, 3, 1, 0, 0, 0, 4, 0, 3, 1, 0, 2, 1,
- 3, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 4, 2, 2, 3, 1, 0, 3,
- 0, 0, 0, 1, 4, 4, 4, 3, 0, 0, 4, 0, 0, 1, 4),
-
- (1, 4, 1, 5, 0, 3, 0, 3, 0, 4, 5, 4, 4, 3, 5, 3, 3, 4, 4, 3, 4, 1, 3, 3,
- 3, 3, 2, 1, 4, 1, 5, 4, 3, 1, 4, 4, 3, 5, 4, 4, 3, 5, 4, 3, 3, 4,
- 4, 4, 0, 3, 3, 1, 2, 3, 0, 3, 1, 0, 3, 3, 0, 5, 4, 4, 4, 4, 4, 4,
- 3, 3, 5, 4, 4, 3, 3, 5, 4, 0, 3, 2, 0, 4, 4),
-
- (0, 2, 0, 3, 0, 1, 0, 0, 0, 1, 3, 3, 3, 2, 4, 1, 3, 0, 3, 1, 3, 0, 2, 2,
- 1, 1, 0, 0, 2, 0, 4, 3, 1, 0, 4, 3, 0, 4, 4, 4, 1, 4, 3, 1, 1, 3,
- 3, 1, 0, 2, 0, 0, 1, 3, 0, 0, 0, 0, 2, 0, 0, 4, 3, 2, 4, 3, 5, 4,
- 3, 3, 3, 4, 3, 3, 4, 3, 3, 0, 2, 1, 0, 3, 3),
-
- (0, 2, 0, 4, 0, 3, 0, 2, 0, 2, 5, 5, 3, 4, 4, 4, 4, 1, 4, 3, 3, 0, 4, 3,
- 4, 3, 1, 3, 3, 2, 4, 3, 0, 3, 4, 3, 0, 3, 4, 4, 2, 4, 4, 0, 4, 5,
- 3, 3, 2, 2, 1, 1, 1, 2, 0, 1, 5, 0, 3, 3, 2, 4, 3, 3, 3, 4, 0, 3,
- 0, 2, 0, 4, 4, 3, 5, 5, 0, 0, 3, 0, 2, 3, 3),
-
- (0, 3, 0, 4, 0, 3, 0, 1, 0, 3, 4, 3, 3, 1, 3, 3, 3, 0, 3, 1, 3, 0, 4, 3,
- 3, 1, 1, 0, 3, 0, 3, 3, 0, 0, 4, 4, 0, 1, 5, 4, 3, 3, 5, 0, 3, 3,
- 4, 3, 0, 2, 0, 1, 1, 1, 0, 1, 3, 0, 1, 2, 1, 3, 3, 2, 3, 3, 0, 3,
- 0, 1, 0, 1, 3, 3, 4, 4, 1, 0, 1, 2, 2, 1, 3),
-
- (0, 1, 0, 4, 0, 4, 0, 3, 0, 1, 3, 3, 3, 2, 3, 1, 1, 0, 3, 0, 3, 3, 4, 3,
- 2, 4, 2, 0, 1, 0, 4, 3, 2, 0, 4, 3, 0, 5, 3, 3, 2, 4, 4, 4, 3, 3,
- 3, 4, 0, 1, 3, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 4, 2, 3, 3, 3, 0, 3,
- 0, 0, 0, 4, 4, 4, 5, 3, 2, 0, 3, 3, 0, 3, 5),
-
- (0, 2, 0, 3, 0, 0, 0, 3, 0, 1, 3, 0, 2, 0, 0, 0, 1, 0, 3, 1, 1, 3, 3, 0,
- 0, 3, 0, 0, 3, 0, 2, 3, 1, 0, 3, 1, 0, 3, 3, 2, 0, 4, 2, 2, 0, 2,
- 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 1, 2, 0, 1, 0, 1,
- 0, 0, 0, 1, 3, 1, 2, 0, 0, 0, 1, 0, 0, 1, 4),
-
- (0, 3, 0, 3, 0, 5, 0, 1, 0, 2, 4, 3, 1, 3, 3, 2, 1, 1, 5, 2, 1, 0, 5, 1,
- 2, 0, 0, 0, 3, 3, 2, 2, 3, 2, 4, 3, 0, 0, 3, 3, 1, 3, 3, 0, 2, 5,
- 3, 4, 0, 3, 3, 0, 1, 2, 0, 2, 2, 0, 3, 2, 0, 2, 2, 3, 3, 3, 0, 2,
- 0, 1, 0, 3, 4, 4, 2, 5, 4, 0, 3, 0, 0, 3, 5),
-
- (0, 3, 0, 3, 0, 3, 0, 1, 0, 3, 3, 3, 3, 0, 3, 0, 2, 0, 2, 1, 1, 0, 2, 0,
- 1, 0, 0, 0, 2, 1, 0, 0, 1, 0, 3, 2, 0, 0, 3, 3, 1, 2, 3, 1, 0, 3,
- 3, 0, 0, 1, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 2, 3, 1, 2, 3, 0, 3,
- 0, 1, 0, 3, 2, 1, 0, 4, 3, 0, 1, 1, 0, 3, 3),
-
- (0, 4, 0, 5, 0, 3, 0, 3, 0, 4, 5, 5, 4, 3, 5, 3, 4, 3, 5, 3, 3, 2, 5, 3,
- 4, 4, 4, 3, 4, 3, 4, 5, 5, 3, 4, 4, 3, 4, 4, 5, 4, 4, 4, 3, 4, 5,
- 5, 4, 2, 3, 4, 2, 3, 4, 0, 3, 3, 1, 4, 3, 2, 4, 3, 3, 5, 5, 0, 3,
- 0, 3, 0, 5, 5, 5, 5, 4, 4, 0, 4, 0, 1, 4, 4),
-
- (0, 4, 0, 4, 0, 3, 0, 3, 0, 3, 5, 4, 4, 2, 3, 2, 5, 1, 3, 2, 5, 1, 4, 2,
- 3, 2, 3, 3, 4, 3, 3, 3, 3, 2, 5, 4, 1, 3, 3, 5, 3, 4, 4, 0, 4, 4,
- 3, 1, 1, 3, 1, 0, 2, 3, 0, 2, 3, 0, 3, 0, 0, 4, 3, 1, 3, 4, 0, 3,
- 0, 2, 0, 4, 4, 4, 3, 4, 5, 0, 4, 0, 0, 3, 4),
-
- (0, 3, 0, 3, 0, 3, 1, 2, 0, 3, 4, 4, 3, 3, 3, 0, 2, 2, 4, 3, 3, 1, 3, 3,
- 3, 1, 1, 0, 3, 1, 4, 3, 2, 3, 4, 4, 2, 4, 4, 4, 3, 4, 4, 3, 2, 4,
- 4, 3, 1, 3, 3, 1, 3, 3, 0, 4, 1, 0, 2, 2, 1, 4, 3, 2, 3, 3, 5, 4,
- 3, 3, 5, 4, 4, 3, 3, 0, 4, 0, 3, 2, 2, 4, 4),
-
- (0, 2, 0, 1, 0, 0, 0, 0, 0, 1, 2, 1, 3, 0, 0, 0, 0, 0, 2, 0, 1, 2, 1, 0,
- 0, 1, 0, 0, 0, 0, 3, 0, 0, 1, 0, 1, 1, 3, 1, 0, 0, 0, 1, 1, 0, 1,
- 1, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 2, 2, 0, 3, 4,
- 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1),
-
- (0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 4, 0, 4, 1, 4, 0, 3, 0, 4, 0, 3, 0, 4, 0,
- 3, 0, 3, 0, 4, 1, 5, 1, 4, 0, 0, 3, 0, 5, 0, 5, 2, 0, 1, 0, 0, 0,
- 2, 1, 4, 0, 1, 3, 0, 0, 3, 0, 0, 3, 1, 1, 4, 1, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0),
-
- (1, 4, 0, 5, 0, 3, 0, 2, 0, 3, 5, 4, 4, 3, 4, 3, 5, 3, 4, 3, 3, 0, 4, 3,
- 3, 3, 3, 3, 3, 2, 4, 4, 3, 1, 3, 4, 4, 5, 4, 4, 3, 4, 4, 1, 3, 5,
- 4, 3, 3, 3, 1, 2, 2, 3, 3, 1, 3, 1, 3, 3, 3, 5, 3, 3, 4, 5, 0, 3,
- 0, 3, 0, 3, 4, 3, 4, 4, 3, 0, 3, 0, 2, 4, 3),
-
- (0, 1, 0, 4, 0, 0, 0, 0, 0, 1, 4, 0, 4, 1, 4, 2, 4, 0, 3, 0, 1, 0, 1, 0,
- 0, 0, 0, 0, 2, 0, 3, 1, 1, 1, 0, 3, 0, 0, 0, 1, 2, 1, 0, 0, 1, 1,
- 1, 1, 0, 1, 0, 0, 0, 1, 0, 0, 3, 0, 0, 0, 0, 3, 2, 0, 2, 2, 0, 1,
- 0, 0, 0, 2, 3, 2, 3, 3, 0, 0, 0, 0, 2, 1, 0),
-
- (0, 5, 1, 5, 0, 3, 0, 3, 0, 5, 4, 4, 5, 1, 5, 3, 3, 0, 4, 3, 4, 3, 5, 3,
- 4, 3, 3, 2, 4, 3, 4, 3, 3, 0, 3, 3, 1, 4, 4, 3, 4, 4, 4, 3, 4, 5,
- 5, 3, 2, 3, 1, 1, 3, 3, 1, 3, 1, 1, 3, 3, 2, 4, 5, 3, 3, 5, 0, 4,
- 0, 3, 0, 4, 4, 3, 5, 3, 3, 0, 3, 4, 0, 4, 3),
-
- (0, 5, 0, 5, 0, 3, 0, 2, 0, 4, 4, 3, 5, 2, 4, 3, 3, 3, 4, 4, 4, 3, 5, 3,
- 5, 3, 3, 1, 4, 0, 4, 3, 3, 0, 3, 3, 0, 4, 4, 4, 4, 5, 4, 3, 3, 5,
- 5, 3, 2, 3, 1, 2, 3, 2, 0, 1, 0, 0, 3, 2, 2, 4, 4, 3, 1, 5, 0, 4,
- 0, 3, 0, 4, 3, 1, 3, 2, 1, 0, 3, 3, 0, 3, 3),
-
- (0, 4, 0, 5, 0, 5, 0, 4, 0, 4, 5, 5, 5, 3, 4, 3, 3, 2, 5, 4, 4, 3, 5, 3,
- 5, 3, 4, 0, 4, 3, 4, 4, 3, 2, 4, 4, 3, 4, 5, 4, 4, 5, 5, 0, 3, 5,
- 5, 4, 1, 3, 3, 2, 3, 3, 1, 3, 1, 0, 4, 3, 1, 4, 4, 3, 4, 5, 0, 4,
- 0, 2, 0, 4, 3, 4, 4, 3, 3, 0, 4, 0, 0, 5, 5),
-
- (0, 4, 0, 4, 0, 5, 0, 1, 1, 3, 3, 4, 4, 3, 4, 1, 3, 0, 5, 1, 3, 0, 3, 1,
- 3, 1, 1, 0, 3, 0, 3, 3, 4, 0, 4, 3, 0, 4, 4, 4, 3, 4, 4, 0, 3, 5,
- 4, 1, 0, 3, 0, 0, 2, 3, 0, 3, 1, 0, 3, 1, 0, 3, 2, 1, 3, 5, 0, 3,
- 0, 1, 0, 3, 2, 3, 3, 4, 4, 0, 2, 2, 0, 4, 4),
-
- (2, 4, 0, 5, 0, 4, 0, 3, 0, 4, 5, 5, 4, 3, 5, 3, 5, 3, 5, 3, 5, 2, 5, 3,
- 4, 3, 3, 4, 3, 4, 5, 3, 2, 1, 5, 4, 3, 2, 3, 4, 5, 3, 4, 1, 2, 5,
- 4, 3, 0, 3, 3, 0, 3, 2, 0, 2, 3, 0, 4, 1, 0, 3, 4, 3, 3, 5, 0, 3,
- 0, 1, 0, 4, 5, 5, 5, 4, 3, 0, 4, 2, 0, 3, 5),
-
- (0, 5, 0, 4, 0, 4, 0, 2, 0, 5, 4, 3, 4, 3, 4, 3, 3, 3, 4, 3, 4, 2, 5, 3,
- 5, 3, 4, 1, 4, 3, 4, 4, 4, 0, 3, 5, 0, 4, 4, 4, 4, 5, 3, 1, 3, 4,
- 5, 3, 3, 3, 3, 3, 3, 3, 0, 2, 2, 0, 3, 3, 2, 4, 3, 3, 3, 5, 3, 4,
- 1, 3, 3, 5, 3, 2, 0, 0, 0, 0, 4, 3, 1, 3, 3),
-
- (0, 1, 0, 3, 0, 3, 0, 1, 0, 1, 3, 3, 3, 2, 3, 3, 3, 0, 3, 0, 0, 0, 3, 1,
- 3, 0, 0, 0, 2, 2, 2, 3, 0, 0, 3, 2, 0, 1, 2, 4, 1, 3, 3, 0, 0, 3,
- 3, 3, 0, 1, 0, 0, 2, 1, 0, 0, 3, 0, 3, 1, 0, 3, 0, 0, 1, 3, 0, 2,
- 0, 1, 0, 3, 3, 1, 3, 3, 0, 0, 1, 1, 0, 3, 3),
-
- (0, 2, 0, 3, 0, 2, 1, 4, 0, 2, 2, 3, 1, 1, 3, 1, 1, 0, 2, 0, 3, 1, 2, 3,
- 1, 3, 0, 0, 1, 0, 4, 3, 2, 3, 3, 3, 1, 4, 2, 3, 3, 3, 3, 1, 0, 3,
- 1, 4, 0, 1, 1, 0, 1, 2, 0, 1, 1, 0, 1, 1, 0, 3, 1, 3, 2, 2, 0, 1,
- 0, 0, 0, 2, 3, 3, 3, 1, 0, 0, 0, 0, 0, 2, 3),
-
- (0, 5, 0, 4, 0, 5, 0, 2, 0, 4, 5, 5, 3, 3, 4, 3, 3, 1, 5, 4, 4, 2, 4, 4,
- 4, 3, 4, 2, 4, 3, 5, 5, 4, 3, 3, 4, 3, 3, 5, 5, 4, 5, 5, 1, 3, 4,
- 5, 3, 1, 4, 3, 1, 3, 3, 0, 3, 3, 1, 4, 3, 1, 4, 5, 3, 3, 5, 0, 4,
- 0, 3, 0, 5, 3, 3, 1, 4, 3, 0, 4, 0, 1, 5, 3),
-
- (0, 5, 0, 5, 0, 4, 0, 2, 0, 4, 4, 3, 4, 3, 3, 3, 3, 3, 5, 4, 4, 4, 4, 4,
- 4, 5, 3, 3, 5, 2, 4, 4, 4, 3, 4, 4, 3, 3, 4, 4, 5, 5, 3, 3, 4, 3,
- 4, 3, 3, 4, 3, 3, 3, 3, 1, 2, 2, 1, 4, 3, 3, 5, 4, 4, 3, 4, 0, 4,
- 0, 3, 0, 4, 4, 4, 4, 4, 1, 0, 4, 2, 0, 2, 4),
-
- (0, 4, 0, 4, 0, 3, 0, 1, 0, 3, 5, 2, 3, 0, 3, 0, 2, 1, 4, 2, 3, 3, 4, 1,
- 4, 3, 3, 2, 4, 1, 3, 3, 3, 0, 3, 3, 0, 0, 3, 3, 3, 5, 3, 3, 3, 3,
- 3, 2, 0, 2, 0, 0, 2, 0, 0, 2, 0, 0, 1, 0, 0, 3, 1, 2, 2, 3, 0, 3,
- 0, 2, 0, 4, 4, 3, 3, 4, 1, 0, 3, 0, 0, 2, 4),
-
- (0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 2, 0, 0, 0, 0, 0, 1, 0, 2, 0,
- 1, 0, 0, 0, 0, 0, 3, 1, 3, 0, 3, 2, 0, 0, 0, 1, 0, 3, 2, 0, 0, 2,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 3, 4, 0, 2, 0, 0, 0, 0, 0, 0, 2),
-
- (0, 2, 1, 3, 0, 2, 0, 2, 0, 3, 3, 3, 3, 1, 3, 1, 3, 3, 3, 3, 3, 3, 4, 2,
- 2, 1, 2, 1, 4, 0, 4, 3, 1, 3, 3, 3, 2, 4, 3, 5, 4, 3, 3, 3, 3, 3,
- 3, 3, 0, 1, 3, 0, 2, 0, 0, 1, 0, 0, 1, 0, 0, 4, 2, 0, 2, 3, 0, 3,
- 3, 0, 3, 3, 4, 2, 3, 1, 4, 0, 1, 2, 0, 2, 3),
-
- (0, 3, 0, 3, 0, 1, 0, 3, 0, 2, 3, 3, 3, 0, 3, 1, 2, 0, 3, 3, 2, 3, 3, 2,
- 3, 2, 3, 1, 3, 0, 4, 3, 2, 0, 3, 3, 1, 4, 3, 3, 2, 3, 4, 3, 1, 3,
- 3, 1, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 4, 1, 1, 0, 3, 0, 3,
- 1, 0, 2, 3, 3, 3, 3, 3, 1, 0, 0, 2, 0, 3, 3),
-
- (0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 2, 0, 3, 0, 0, 0, 0, 0, 0, 0, 3, 0,
- 0, 0, 0, 0, 0, 0, 3, 0, 3, 0, 3, 1, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 3, 0, 2, 0, 2, 3, 0, 0, 0, 0, 0, 0, 0, 0, 3),
-
- (0, 2, 0, 3, 1, 3, 0, 3, 0, 2, 3, 3, 3, 1, 3, 1, 3, 1, 3, 1, 3, 3, 3, 1,
- 3, 0, 2, 3, 1, 1, 4, 3, 3, 2, 3, 3, 1, 2, 2, 4, 1, 3, 3, 0, 1, 4,
- 2, 3, 0, 1, 3, 0, 3, 0, 0, 1, 3, 0, 2, 0, 0, 3, 3, 2, 1, 3, 0, 3,
- 0, 2, 0, 3, 4, 4, 4, 3, 1, 0, 3, 0, 0, 3, 3),
-
- (0, 2, 0, 1, 0, 2, 0, 0, 0, 1, 3, 2, 2, 1, 3, 0, 1, 1, 3, 0, 3, 2, 3, 1,
- 2, 0, 2, 0, 1, 1, 3, 3, 3, 0, 3, 3, 1, 1, 2, 3, 2, 3, 3, 1, 2, 3,
- 2, 0, 0, 1, 0, 0, 0, 0, 0, 0, 3, 0, 1, 0, 0, 2, 1, 2, 1, 3, 0, 3,
- 0, 0, 0, 3, 4, 4, 4, 3, 2, 0, 2, 0, 0, 2, 4),
-
- (0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1,
- 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,
- 0, 0, 0, 0, 1, 3, 1, 0, 0, 0, 0, 0, 0, 0, 3),
-
- (0, 3, 0, 3, 0, 2, 0, 3, 0, 3, 3, 3, 2, 3, 2, 2, 2, 0, 3, 1, 3, 3, 3, 2,
- 3, 3, 0, 0, 3, 0, 3, 2, 2, 0, 2, 3, 1, 4, 3, 4, 3, 3, 2, 3, 1, 5,
- 4, 4, 0, 3, 1, 2, 1, 3, 0, 3, 1, 1, 2, 0, 2, 3, 1, 3, 1, 3, 0, 3,
- 0, 1, 0, 3, 3, 4, 4, 2, 1, 0, 2, 1, 0, 2, 4),
-
- (0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 4, 2, 5, 1, 4, 0, 2, 0, 2, 1, 3, 1, 4, 0,
- 2, 1, 0, 0, 2, 1, 4, 1, 1, 0, 3, 3, 0, 5, 1, 3, 2, 3, 3, 1, 0, 3,
- 2, 3, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 4, 0, 1, 0, 3, 0, 2,
- 0, 1, 0, 3, 3, 3, 4, 3, 3, 0, 0, 0, 0, 2, 3),
-
- (0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 2, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 2, 1, 0, 0, 1, 0, 0, 0, 0, 0, 3),
-
- (0, 1, 0, 3, 0, 4, 0, 3, 0, 2, 4, 3, 1, 0, 3, 2, 2, 1, 3, 1, 2, 2, 3, 1,
- 1, 1, 2, 1, 3, 0, 1, 2, 0, 1, 3, 2, 1, 3, 0, 5, 5, 1, 0, 0, 1, 3,
- 2, 1, 0, 3, 0, 0, 1, 0, 0, 0, 0, 0, 3, 4, 0, 1, 1, 1, 3, 2, 0, 2,
- 0, 1, 0, 2, 3, 3, 1, 2, 3, 0, 1, 0, 1, 0, 4),
-
- (0, 0, 0, 1, 0, 3, 0, 3, 0, 2, 2, 1, 0, 0, 4, 0, 3, 0, 3, 1, 3, 0, 3, 0,
- 3, 0, 1, 0, 3, 0, 3, 1, 3, 0, 3, 3, 0, 0, 1, 2, 1, 1, 1, 0, 1, 2,
- 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 1, 2, 0, 0, 2,
- 0, 0, 0, 0, 2, 3, 3, 3, 3, 0, 0, 0, 0, 1, 4),
-
- (0, 0, 0, 3, 0, 3, 0, 0, 0, 0, 3, 1, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 0, 0,
- 0, 0, 0, 0, 1, 0, 3, 0, 2, 0, 2, 3, 0, 0, 2, 2, 3, 1, 2, 0, 0, 1,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 3, 0, 0, 2, 0, 0, 0, 0, 2, 3),
-
- (2, 4, 0, 5, 0, 5, 0, 4, 0, 3, 4, 3, 3, 3, 4, 3, 3, 3, 4, 3, 4, 4, 5, 4,
- 5, 5, 5, 2, 3, 0, 5, 5, 4, 1, 5, 4, 3, 1, 5, 4, 3, 4, 4, 3, 3, 4,
- 3, 3, 0, 3, 2, 0, 2, 3, 0, 3, 0, 0, 3, 3, 0, 5, 3, 2, 3, 3, 0, 3,
- 0, 3, 0, 3, 4, 5, 4, 5, 3, 0, 4, 3, 0, 3, 4),
-
- (0, 3, 0, 3, 0, 3, 0, 3, 0, 3, 3, 4, 3, 2, 3, 2, 3, 0, 4, 3, 3, 3, 3, 3,
- 3, 3, 3, 0, 3, 2, 4, 3, 3, 1, 3, 4, 3, 4, 4, 4, 3, 4, 4, 3, 2, 4,
- 4, 1, 0, 2, 0, 0, 1, 1, 0, 2, 0, 0, 3, 1, 0, 5, 3, 2, 1, 3, 0, 3,
- 0, 1, 2, 4, 3, 2, 4, 3, 3, 0, 3, 2, 0, 4, 4),
-
- (0, 3, 0, 3, 0, 1, 0, 0, 0, 1, 4, 3, 3, 2, 3, 1, 3, 1, 4, 2, 3, 2, 4, 2,
- 3, 4, 3, 0, 2, 2, 3, 3, 3, 0, 3, 3, 3, 0, 3, 4, 1, 3, 3, 0, 3, 4,
- 3, 3, 0, 1, 1, 0, 1, 0, 0, 0, 4, 0, 3, 0, 0, 3, 1, 2, 1, 3, 0, 4,
- 0, 1, 0, 4, 3, 3, 4, 3, 3, 0, 2, 0, 0, 3, 3),
-
- (0, 3, 0, 4, 0, 1, 0, 3, 0, 3, 4, 3, 3, 0, 3, 3, 3, 1, 3, 1, 3, 3, 4, 3,
- 3, 3, 0, 0, 3, 1, 5, 3, 3, 1, 3, 3, 2, 5, 4, 3, 3, 4, 5, 3, 2, 5,
- 3, 4, 0, 1, 0, 0, 0, 0, 0, 2, 0, 0, 1, 1, 0, 4, 2, 2, 1, 3, 0, 3,
- 0, 2, 0, 4, 4, 3, 5, 3, 2, 0, 1, 1, 0, 3, 4),
-
- (0, 5, 0, 4, 0, 5, 0, 2, 0, 4, 4, 3, 3, 2, 3, 3, 3, 1, 4, 3, 4, 1, 5, 3,
- 4, 3, 4, 0, 4, 2, 4, 3, 4, 1, 5, 4, 0, 4, 4, 4, 4, 5, 4, 1, 3, 5,
- 4, 2, 1, 4, 1, 1, 3, 2, 0, 3, 1, 0, 3, 2, 1, 4, 3, 3, 3, 4, 0, 4,
- 0, 3, 0, 4, 4, 4, 3, 3, 3, 0, 4, 2, 0, 3, 4),
-
- (1, 4, 0, 4, 0, 3, 0, 1, 0, 3, 3, 3, 1, 1, 3, 3, 2, 2, 3, 3, 1, 0, 3, 2,
- 2, 1, 2, 0, 3, 1, 2, 1, 2, 0, 3, 2, 0, 2, 2, 3, 3, 4, 3, 0, 3, 3,
- 1, 2, 0, 1, 1, 3, 1, 2, 0, 0, 3, 0, 1, 1, 0, 3, 2, 2, 3, 3, 0, 3,
- 0, 0, 0, 2, 3, 3, 4, 3, 3, 0, 1, 0, 0, 1, 4),
-
- (0, 4, 0, 4, 0, 4, 0, 0, 0, 3, 4, 4, 3, 1, 4, 2, 3, 2, 3, 3, 3, 1, 4, 3,
- 4, 0, 3, 0, 4, 2, 3, 3, 2, 2, 5, 4, 2, 1, 3, 4, 3, 4, 3, 1, 3, 3,
- 4, 2, 0, 2, 1, 0, 3, 3, 0, 0, 2, 0, 3, 1, 0, 4, 4, 3, 4, 3, 0, 4,
- 0, 1, 0, 2, 4, 4, 4, 4, 4, 0, 3, 2, 0, 3, 3),
-
- (0, 0, 0, 1, 0, 4, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 3, 2, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
- 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 2),
-
- (0, 2, 0, 3, 0, 4, 0, 4, 0, 1, 3, 3, 3, 0, 4, 0, 2, 1, 2, 1, 1, 1, 2, 0,
- 3, 1, 1, 0, 1, 0, 3, 1, 0, 0, 3, 3, 2, 0, 1, 1, 0, 0, 0, 0, 0, 1,
- 0, 2, 0, 2, 2, 0, 3, 1, 0, 0, 1, 0, 1, 1, 0, 1, 2, 0, 3, 0, 0, 0,
- 0, 1, 0, 0, 3, 3, 4, 3, 1, 0, 1, 0, 3, 0, 2),
-
- (0, 0, 0, 3, 0, 5, 0, 0, 0, 0, 1, 0, 2, 0, 3, 1, 0, 1, 3, 0, 0, 0, 2, 0,
- 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 4, 0, 0, 0, 2, 3, 0, 1, 4, 1, 0, 2,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 1, 0, 0,
- 0, 0, 0, 0, 0, 2, 0, 0, 3, 0, 0, 0, 0, 0, 3),
-
- (0, 2, 0, 5, 0, 5, 0, 1, 0, 2, 4, 3, 3, 2, 5, 1, 3, 2, 3, 3, 3, 0, 4, 1,
- 2, 0, 3, 0, 4, 0, 2, 2, 1, 1, 5, 3, 0, 0, 1, 4, 2, 3, 2, 0, 3, 3,
- 3, 2, 0, 2, 4, 1, 1, 2, 0, 1, 1, 0, 3, 1, 0, 1, 3, 1, 2, 3, 0, 2,
- 0, 0, 0, 1, 3, 5, 4, 4, 4, 0, 3, 0, 0, 1, 3),
-
- (0, 4, 0, 5, 0, 4, 0, 4, 0, 4, 5, 4, 3, 3, 4, 3, 3, 3, 4, 3, 4, 4, 5, 3,
- 4, 5, 4, 2, 4, 2, 3, 4, 3, 1, 4, 4, 1, 3, 5, 4, 4, 5, 5, 4, 4, 5,
- 5, 5, 2, 3, 3, 1, 4, 3, 1, 3, 3, 0, 3, 3, 1, 4, 3, 4, 4, 4, 0, 3,
- 0, 4, 0, 3, 3, 4, 4, 5, 0, 0, 4, 3, 0, 4, 5),
-
- (0, 4, 0, 4, 0, 3, 0, 3, 0, 3, 4, 4, 4, 3, 3, 2, 4, 3, 4, 3, 4, 3, 5, 3,
- 4, 3, 2, 1, 4, 2, 4, 4, 3, 1, 3, 4, 2, 4, 5, 5, 3, 4, 5, 4, 1, 5,
- 4, 3, 0, 3, 2, 2, 3, 2, 1, 3, 1, 0, 3, 3, 3, 5, 3, 3, 3, 5, 4, 4,
- 2, 3, 3, 4, 3, 3, 3, 2, 1, 0, 3, 2, 1, 4, 3),
-
- (0, 4, 0, 5, 0, 4, 0, 3, 0, 3, 5, 5, 3, 2, 4, 3, 4, 0, 5, 4, 4, 1, 4, 4,
- 4, 3, 3, 3, 4, 3, 5, 5, 2, 3, 3, 4, 1, 2, 5, 5, 3, 5, 5, 2, 3, 5,
- 5, 4, 0, 3, 2, 0, 3, 3, 1, 1, 5, 1, 4, 1, 0, 4, 3, 2, 3, 5, 0, 4,
- 0, 3, 0, 5, 4, 3, 4, 3, 0, 0, 4, 1, 0, 4, 4),
-
- (1, 3, 0, 4, 0, 2, 0, 2, 0, 2, 5, 5, 3, 3, 3, 3, 3, 0, 4, 2, 3, 4, 4, 4,
- 3, 4, 0, 0, 3, 4, 5, 4, 3, 3, 3, 3, 2, 5, 5, 4, 5, 5, 5, 4, 3, 5,
- 5, 5, 1, 3, 1, 0, 1, 0, 0, 3, 2, 0, 4, 2, 0, 5, 2, 3, 2, 4, 1, 3,
- 0, 3, 0, 4, 5, 4, 5, 4, 3, 0, 4, 2, 0, 5, 4),
-
- (0, 3, 0, 4, 0, 5, 0, 3, 0, 3, 4, 4, 3, 2, 3, 2, 3, 3, 3, 3, 3, 2, 4, 3,
- 3, 2, 2, 0, 3, 3, 3, 3, 3, 1, 3, 3, 3, 0, 4, 4, 3, 4, 4, 1, 1, 4,
- 4, 2, 0, 3, 1, 0, 1, 1, 0, 4, 1, 0, 2, 3, 1, 3, 3, 1, 3, 4, 0, 3,
- 0, 1, 0, 3, 1, 3, 0, 0, 1, 0, 2, 0, 0, 4, 4),
-
- (0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
-
- (0, 3, 0, 3, 0, 2, 0, 3, 0, 1, 5, 4, 3, 3, 3, 1, 4, 2, 1, 2, 3, 4, 4, 2,
- 4, 4, 5, 0, 3, 1, 4, 3, 4, 0, 4, 3, 3, 3, 2, 3, 2, 5, 3, 4, 3, 2,
- 2, 3, 0, 0, 3, 0, 2, 1, 0, 1, 2, 0, 0, 0, 0, 2, 1, 1, 3, 1, 0, 2,
- 0, 4, 0, 3, 4, 4, 4, 5, 2, 0, 2, 0, 0, 1, 3),
-
- (0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0,
- 0, 1, 1, 0, 0, 0, 4, 2, 1, 1, 0, 1, 0, 3, 2, 0, 0, 3, 1, 1, 1, 2,
- 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 1, 0, 0, 0, 2,
- 0, 0, 0, 1, 4, 0, 4, 2, 1, 0, 0, 0, 0, 0, 1),
-
- (0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1,
- 0, 1, 0, 0, 0, 0, 3, 1, 0, 0, 0, 2, 0, 2, 1, 0, 0, 1, 2, 1, 0, 1,
- 1, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 3, 1, 0, 0, 0, 0,
- 0, 1, 0, 0, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 2),
-
- (0, 4, 0, 4, 0, 4, 0, 3, 0, 4, 4, 3, 4, 2, 4, 3, 2, 0, 4, 4, 4, 3, 5, 3,
- 5, 3, 3, 2, 4, 2, 4, 3, 4, 3, 1, 4, 0, 2, 3, 4, 4, 4, 3, 3, 3, 4,
- 4, 4, 3, 4, 1, 3, 4, 3, 2, 1, 2, 1, 3, 3, 3, 4, 4, 3, 3, 5, 0, 4,
- 0, 3, 0, 4, 3, 3, 3, 2, 1, 0, 3, 0, 0, 3, 3),
-
- (0, 4, 0, 3, 0, 3, 0, 3, 0, 3, 5, 5, 3, 3, 3, 3, 4, 3, 4, 3, 3, 3, 4, 4,
- 4, 3, 3, 3, 3, 4, 3, 5, 3, 3, 1, 3, 2, 4, 5, 5, 5, 5, 4, 3, 4, 5,
- 5, 3, 2, 2, 3, 3, 3, 3, 2, 3, 3, 1, 2, 3, 2, 4, 3, 3, 3, 4, 0, 4,
- 0, 2, 0, 4, 3, 2, 2, 1, 2, 0, 3, 0, 0, 4, 1),
-)
-
-
-class JapaneseContextAnalysis:
- def __init__(self):
- self.reset()
-
- def reset(self):
- # total sequence received
- self._mTotalRel = 0
- # category counters, each interger counts sequence in its category
- self._mRelSample = [0] * NUM_OF_CATEGORY
- # if last byte in current buffer is not the last byte of a character,
- # we need to know how many bytes to skip in next buffer
- self._mNeedToSkipCharNum = 0
- # The order of previous char
- self._mLastCharOrder = -1
- # If this flag is set to constants.True, detection is done and
- # conclusion has been made
- self._mDone = False
-
- def feed(self, aBuf, aLen):
- if self._mDone:
- return
-
- # The buffer we got is byte oriented, and a character may span in more
- # than one buffers. In case the last one or two byte in last buffer is
- # not complete, we record how many byte needed to complete that
- # character and skip these bytes here. We can choose to record those
- # bytes as well and analyse the character once it is complete, but
- # since a character will not make much difference, by simply skipping
- # this character will simply our logic and improve performance.
- i = self._mNeedToSkipCharNum
- while i < aLen:
- order, charLen = self.get_order(aBuf[i:i + 2])
- i += charLen
- if i > aLen:
- self._mNeedToSkipCharNum = i - aLen
- self._mLastCharOrder = -1
- else:
- if (order != -1) and (self._mLastCharOrder != -1):
- self._mTotalRel += 1
- if self._mTotalRel > MAX_REL_THRESHOLD:
- self._mDone = True
- break
- self._mRelSample[
- jp2CharContext[self._mLastCharOrder][order]
- ] += 1
- self._mLastCharOrder = order
-
- def got_enough_data(self):
- return self._mTotalRel > ENOUGH_REL_THRESHOLD
-
- def get_confidence(self):
- # This is just one way to calculate confidence. It works well for me.
- if self._mTotalRel > MINIMUM_DATA_THRESHOLD:
- return (self._mTotalRel - self._mRelSample[0]) / self._mTotalRel
- else:
- return DONT_KNOW
-
- def get_order(self, aStr):
- return -1, 1
-
-
-class SJISContextAnalysis(JapaneseContextAnalysis):
- def get_order(self, aStr):
- if not aStr:
- return -1, 1
- # find out current char's byte length
- char = wrap_ord(aStr[0])
- try:
- if (((char >= 0x81) and (char <= 0x9F)) or
- ((char >= 0xE0) and (char <= 0xFC))):
- charLen = 2
- else:
- charLen = 1
- except UnicodeDecodeError:
- return -1, 1
-
- # return its order if it is hiragana
- if len(aStr) > 1:
- char_1 = wrap_ord(aStr[1])
- if ((char == 202) and (0x9F <= char_1 <= 0xF1)):
- return char_1 - 0x9F, charLen
-
- return -1, charLen
-
-
-class EUCJPContextAnalysis(JapaneseContextAnalysis):
- def get_order(self, aStr):
- if not aStr:
- return -1, 1
- # find out current char's byte length
- char = wrap_ord(aStr[0])
- try:
- if (char == 0x8E) or (0xA1 <= char <= 0xFE):
- charLen = 2
- elif aStr[0] == 0x8F:
- charLen = 3
- else:
- charLen = 1
- except UnicodeDecodeError:
- return -1, 1
-
- # return its order if it is hiragana
- if len(aStr) > 1:
- char_1 = wrap_ord(aStr[1])
- if (char == 0xA4) and (0xA1 <= char_1 <= 0xF3):
- return char_1 - 0xA1, charLen
-
- return -1, charLen
+######################## BEGIN LICENSE BLOCK ########################\r
+# The Original Code is Mozilla Communicator client code.\r
+#\r
+# The Initial Developer of the Original Code is\r
+# Netscape Communications Corporation.\r
+# Portions created by the Initial Developer are Copyright (C) 1998\r
+# the Initial Developer. All Rights Reserved.\r
+#\r
+# Contributor(s):\r
+# Mark Pilgrim - port to Python\r
+#\r
+# This library is free software; you can redistribute it and/or\r
+# modify it under the terms of the GNU Lesser General Public\r
+# License as published by the Free Software Foundation; either\r
+# version 2.1 of the License, or (at your option) any later version.\r
+#\r
+# This library is distributed in the hope that it will be useful,\r
+# but WITHOUT ANY WARRANTY; without even the implied warranty of\r
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU\r
+# Lesser General Public License for more details.\r
+#\r
+# You should have received a copy of the GNU Lesser General Public\r
+# License along with this library; if not, write to the Free Software\r
+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA\r
+# 02110-1301 USA\r
+######################### END LICENSE BLOCK #########################\r
+\r
+from .compat import wrap_ord\r
+\r
+NUM_OF_CATEGORY = 6\r
+DONT_KNOW = -1\r
+ENOUGH_REL_THRESHOLD = 100\r
+MAX_REL_THRESHOLD = 1000\r
+MINIMUM_DATA_THRESHOLD = 4\r
+\r
+# This is hiragana 2-char sequence table, the number in each cell represents its frequency category\r
+jp2CharContext = (\r
+(0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1),\r
+(2,4,0,4,0,3,0,4,0,3,4,4,4,2,4,3,3,4,3,2,3,3,4,2,3,3,3,2,4,1,4,3,3,1,5,4,3,4,3,4,3,5,3,0,3,5,4,2,0,3,1,0,3,3,0,3,3,0,1,1,0,4,3,0,3,3,0,4,0,2,0,3,5,5,5,5,4,0,4,1,0,3,4),\r
+(0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2),\r
+(0,4,0,5,0,5,0,4,0,4,5,4,4,3,5,3,5,1,5,3,4,3,4,4,3,4,3,3,4,3,5,4,4,3,5,5,3,5,5,5,3,5,5,3,4,5,5,3,1,3,2,0,3,4,0,4,2,0,4,2,1,5,3,2,3,5,0,4,0,2,0,5,4,4,5,4,5,0,4,0,0,4,4),\r
+(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0),\r
+(0,3,0,4,0,3,0,3,0,4,5,4,3,3,3,3,4,3,5,4,4,3,5,4,4,3,4,3,4,4,4,4,5,3,4,4,3,4,5,5,4,5,5,1,4,5,4,3,0,3,3,1,3,3,0,4,4,0,3,3,1,5,3,3,3,5,0,4,0,3,0,4,4,3,4,3,3,0,4,1,1,3,4),\r
+(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0),\r
+(0,4,0,3,0,3,0,4,0,3,4,4,3,2,2,1,2,1,3,1,3,3,3,3,3,4,3,1,3,3,5,3,3,0,4,3,0,5,4,3,3,5,4,4,3,4,4,5,0,1,2,0,1,2,0,2,2,0,1,0,0,5,2,2,1,4,0,3,0,1,0,4,4,3,5,4,3,0,2,1,0,4,3),\r
+(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0),\r
+(0,3,0,5,0,4,0,2,1,4,4,2,4,1,4,2,4,2,4,3,3,3,4,3,3,3,3,1,4,2,3,3,3,1,4,4,1,1,1,4,3,3,2,0,2,4,3,2,0,3,3,0,3,1,1,0,0,0,3,3,0,4,2,2,3,4,0,4,0,3,0,4,4,5,3,4,4,0,3,0,0,1,4),\r
+(1,4,0,4,0,4,0,4,0,3,5,4,4,3,4,3,5,4,3,3,4,3,5,4,4,4,4,3,4,2,4,3,3,1,5,4,3,2,4,5,4,5,5,4,4,5,4,4,0,3,2,2,3,3,0,4,3,1,3,2,1,4,3,3,4,5,0,3,0,2,0,4,5,5,4,5,4,0,4,0,0,5,4),\r
+(0,5,0,5,0,4,0,3,0,4,4,3,4,3,3,3,4,0,4,4,4,3,4,3,4,3,3,1,4,2,4,3,4,0,5,4,1,4,5,4,4,5,3,2,4,3,4,3,2,4,1,3,3,3,2,3,2,0,4,3,3,4,3,3,3,4,0,4,0,3,0,4,5,4,4,4,3,0,4,1,0,1,3),\r
+(0,3,1,4,0,3,0,2,0,3,4,4,3,1,4,2,3,3,4,3,4,3,4,3,4,4,3,2,3,1,5,4,4,1,4,4,3,5,4,4,3,5,5,4,3,4,4,3,1,2,3,1,2,2,0,3,2,0,3,1,0,5,3,3,3,4,3,3,3,3,4,4,4,4,5,4,2,0,3,3,2,4,3),\r
+(0,2,0,3,0,1,0,1,0,0,3,2,0,0,2,0,1,0,2,1,3,3,3,1,2,3,1,0,1,0,4,2,1,1,3,3,0,4,3,3,1,4,3,3,0,3,3,2,0,0,0,0,1,0,0,2,0,0,0,0,0,4,1,0,2,3,2,2,2,1,3,3,3,4,4,3,2,0,3,1,0,3,3),\r
+(0,4,0,4,0,3,0,3,0,4,4,4,3,3,3,3,3,3,4,3,4,2,4,3,4,3,3,2,4,3,4,5,4,1,4,5,3,5,4,5,3,5,4,0,3,5,5,3,1,3,3,2,2,3,0,3,4,1,3,3,2,4,3,3,3,4,0,4,0,3,0,4,5,4,4,5,3,0,4,1,0,3,4),\r
+(0,2,0,3,0,3,0,0,0,2,2,2,1,0,1,0,0,0,3,0,3,0,3,0,1,3,1,0,3,1,3,3,3,1,3,3,3,0,1,3,1,3,4,0,0,3,1,1,0,3,2,0,0,0,0,1,3,0,1,0,0,3,3,2,0,3,0,0,0,0,0,3,4,3,4,3,3,0,3,0,0,2,3),\r
+(2,3,0,3,0,2,0,1,0,3,3,4,3,1,3,1,1,1,3,1,4,3,4,3,3,3,0,0,3,1,5,4,3,1,4,3,2,5,5,4,4,4,4,3,3,4,4,4,0,2,1,1,3,2,0,1,2,0,0,1,0,4,1,3,3,3,0,3,0,1,0,4,4,4,5,5,3,0,2,0,0,4,4),\r
+(0,2,0,1,0,3,1,3,0,2,3,3,3,0,3,1,0,0,3,0,3,2,3,1,3,2,1,1,0,0,4,2,1,0,2,3,1,4,3,2,0,4,4,3,1,3,1,3,0,1,0,0,1,0,0,0,1,0,0,0,0,4,1,1,1,2,0,3,0,0,0,3,4,2,4,3,2,0,1,0,0,3,3),\r
+(0,1,0,4,0,5,0,4,0,2,4,4,2,3,3,2,3,3,5,3,3,3,4,3,4,2,3,0,4,3,3,3,4,1,4,3,2,1,5,5,3,4,5,1,3,5,4,2,0,3,3,0,1,3,0,4,2,0,1,3,1,4,3,3,3,3,0,3,0,1,0,3,4,4,4,5,5,0,3,0,1,4,5),\r
+(0,2,0,3,0,3,0,0,0,2,3,1,3,0,4,0,1,1,3,0,3,4,3,2,3,1,0,3,3,2,3,1,3,0,2,3,0,2,1,4,1,2,2,0,0,3,3,0,0,2,0,0,0,1,0,0,0,0,2,2,0,3,2,1,3,3,0,2,0,2,0,0,3,3,1,2,4,0,3,0,2,2,3),\r
+(2,4,0,5,0,4,0,4,0,2,4,4,4,3,4,3,3,3,1,2,4,3,4,3,4,4,5,0,3,3,3,3,2,0,4,3,1,4,3,4,1,4,4,3,3,4,4,3,1,2,3,0,4,2,0,4,1,0,3,3,0,4,3,3,3,4,0,4,0,2,0,3,5,3,4,5,2,0,3,0,0,4,5),\r
+(0,3,0,4,0,1,0,1,0,1,3,2,2,1,3,0,3,0,2,0,2,0,3,0,2,0,0,0,1,0,1,1,0,0,3,1,0,0,0,4,0,3,1,0,2,1,3,0,0,0,0,0,0,3,0,0,0,0,0,0,0,4,2,2,3,1,0,3,0,0,0,1,4,4,4,3,0,0,4,0,0,1,4),\r
+(1,4,1,5,0,3,0,3,0,4,5,4,4,3,5,3,3,4,4,3,4,1,3,3,3,3,2,1,4,1,5,4,3,1,4,4,3,5,4,4,3,5,4,3,3,4,4,4,0,3,3,1,2,3,0,3,1,0,3,3,0,5,4,4,4,4,4,4,3,3,5,4,4,3,3,5,4,0,3,2,0,4,4),\r
+(0,2,0,3,0,1,0,0,0,1,3,3,3,2,4,1,3,0,3,1,3,0,2,2,1,1,0,0,2,0,4,3,1,0,4,3,0,4,4,4,1,4,3,1,1,3,3,1,0,2,0,0,1,3,0,0,0,0,2,0,0,4,3,2,4,3,5,4,3,3,3,4,3,3,4,3,3,0,2,1,0,3,3),\r
+(0,2,0,4,0,3,0,2,0,2,5,5,3,4,4,4,4,1,4,3,3,0,4,3,4,3,1,3,3,2,4,3,0,3,4,3,0,3,4,4,2,4,4,0,4,5,3,3,2,2,1,1,1,2,0,1,5,0,3,3,2,4,3,3,3,4,0,3,0,2,0,4,4,3,5,5,0,0,3,0,2,3,3),\r
+(0,3,0,4,0,3,0,1,0,3,4,3,3,1,3,3,3,0,3,1,3,0,4,3,3,1,1,0,3,0,3,3,0,0,4,4,0,1,5,4,3,3,5,0,3,3,4,3,0,2,0,1,1,1,0,1,3,0,1,2,1,3,3,2,3,3,0,3,0,1,0,1,3,3,4,4,1,0,1,2,2,1,3),\r
+(0,1,0,4,0,4,0,3,0,1,3,3,3,2,3,1,1,0,3,0,3,3,4,3,2,4,2,0,1,0,4,3,2,0,4,3,0,5,3,3,2,4,4,4,3,3,3,4,0,1,3,0,0,1,0,0,1,0,0,0,0,4,2,3,3,3,0,3,0,0,0,4,4,4,5,3,2,0,3,3,0,3,5),\r
+(0,2,0,3,0,0,0,3,0,1,3,0,2,0,0,0,1,0,3,1,1,3,3,0,0,3,0,0,3,0,2,3,1,0,3,1,0,3,3,2,0,4,2,2,0,2,0,0,0,4,0,0,0,0,0,0,0,0,0,0,0,2,1,2,0,1,0,1,0,0,0,1,3,1,2,0,0,0,1,0,0,1,4),\r
+(0,3,0,3,0,5,0,1,0,2,4,3,1,3,3,2,1,1,5,2,1,0,5,1,2,0,0,0,3,3,2,2,3,2,4,3,0,0,3,3,1,3,3,0,2,5,3,4,0,3,3,0,1,2,0,2,2,0,3,2,0,2,2,3,3,3,0,2,0,1,0,3,4,4,2,5,4,0,3,0,0,3,5),\r
+(0,3,0,3,0,3,0,1,0,3,3,3,3,0,3,0,2,0,2,1,1,0,2,0,1,0,0,0,2,1,0,0,1,0,3,2,0,0,3,3,1,2,3,1,0,3,3,0,0,1,0,0,0,0,0,2,0,0,0,0,0,2,3,1,2,3,0,3,0,1,0,3,2,1,0,4,3,0,1,1,0,3,3),\r
+(0,4,0,5,0,3,0,3,0,4,5,5,4,3,5,3,4,3,5,3,3,2,5,3,4,4,4,3,4,3,4,5,5,3,4,4,3,4,4,5,4,4,4,3,4,5,5,4,2,3,4,2,3,4,0,3,3,1,4,3,2,4,3,3,5,5,0,3,0,3,0,5,5,5,5,4,4,0,4,0,1,4,4),\r
+(0,4,0,4,0,3,0,3,0,3,5,4,4,2,3,2,5,1,3,2,5,1,4,2,3,2,3,3,4,3,3,3,3,2,5,4,1,3,3,5,3,4,4,0,4,4,3,1,1,3,1,0,2,3,0,2,3,0,3,0,0,4,3,1,3,4,0,3,0,2,0,4,4,4,3,4,5,0,4,0,0,3,4),\r
+(0,3,0,3,0,3,1,2,0,3,4,4,3,3,3,0,2,2,4,3,3,1,3,3,3,1,1,0,3,1,4,3,2,3,4,4,2,4,4,4,3,4,4,3,2,4,4,3,1,3,3,1,3,3,0,4,1,0,2,2,1,4,3,2,3,3,5,4,3,3,5,4,4,3,3,0,4,0,3,2,2,4,4),\r
+(0,2,0,1,0,0,0,0,0,1,2,1,3,0,0,0,0,0,2,0,1,2,1,0,0,1,0,0,0,0,3,0,0,1,0,1,1,3,1,0,0,0,1,1,0,1,1,0,0,0,0,0,2,0,0,0,0,0,0,0,0,1,1,2,2,0,3,4,0,0,0,1,1,0,0,1,0,0,0,0,0,1,1),\r
+(0,1,0,0,0,1,0,0,0,0,4,0,4,1,4,0,3,0,4,0,3,0,4,0,3,0,3,0,4,1,5,1,4,0,0,3,0,5,0,5,2,0,1,0,0,0,2,1,4,0,1,3,0,0,3,0,0,3,1,1,4,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0),\r
+(1,4,0,5,0,3,0,2,0,3,5,4,4,3,4,3,5,3,4,3,3,0,4,3,3,3,3,3,3,2,4,4,3,1,3,4,4,5,4,4,3,4,4,1,3,5,4,3,3,3,1,2,2,3,3,1,3,1,3,3,3,5,3,3,4,5,0,3,0,3,0,3,4,3,4,4,3,0,3,0,2,4,3),\r
+(0,1,0,4,0,0,0,0,0,1,4,0,4,1,4,2,4,0,3,0,1,0,1,0,0,0,0,0,2,0,3,1,1,1,0,3,0,0,0,1,2,1,0,0,1,1,1,1,0,1,0,0,0,1,0,0,3,0,0,0,0,3,2,0,2,2,0,1,0,0,0,2,3,2,3,3,0,0,0,0,2,1,0),\r
+(0,5,1,5,0,3,0,3,0,5,4,4,5,1,5,3,3,0,4,3,4,3,5,3,4,3,3,2,4,3,4,3,3,0,3,3,1,4,4,3,4,4,4,3,4,5,5,3,2,3,1,1,3,3,1,3,1,1,3,3,2,4,5,3,3,5,0,4,0,3,0,4,4,3,5,3,3,0,3,4,0,4,3),\r
+(0,5,0,5,0,3,0,2,0,4,4,3,5,2,4,3,3,3,4,4,4,3,5,3,5,3,3,1,4,0,4,3,3,0,3,3,0,4,4,4,4,5,4,3,3,5,5,3,2,3,1,2,3,2,0,1,0,0,3,2,2,4,4,3,1,5,0,4,0,3,0,4,3,1,3,2,1,0,3,3,0,3,3),\r
+(0,4,0,5,0,5,0,4,0,4,5,5,5,3,4,3,3,2,5,4,4,3,5,3,5,3,4,0,4,3,4,4,3,2,4,4,3,4,5,4,4,5,5,0,3,5,5,4,1,3,3,2,3,3,1,3,1,0,4,3,1,4,4,3,4,5,0,4,0,2,0,4,3,4,4,3,3,0,4,0,0,5,5),\r
+(0,4,0,4,0,5,0,1,1,3,3,4,4,3,4,1,3,0,5,1,3,0,3,1,3,1,1,0,3,0,3,3,4,0,4,3,0,4,4,4,3,4,4,0,3,5,4,1,0,3,0,0,2,3,0,3,1,0,3,1,0,3,2,1,3,5,0,3,0,1,0,3,2,3,3,4,4,0,2,2,0,4,4),\r
+(2,4,0,5,0,4,0,3,0,4,5,5,4,3,5,3,5,3,5,3,5,2,5,3,4,3,3,4,3,4,5,3,2,1,5,4,3,2,3,4,5,3,4,1,2,5,4,3,0,3,3,0,3,2,0,2,3,0,4,1,0,3,4,3,3,5,0,3,0,1,0,4,5,5,5,4,3,0,4,2,0,3,5),\r
+(0,5,0,4,0,4,0,2,0,5,4,3,4,3,4,3,3,3,4,3,4,2,5,3,5,3,4,1,4,3,4,4,4,0,3,5,0,4,4,4,4,5,3,1,3,4,5,3,3,3,3,3,3,3,0,2,2,0,3,3,2,4,3,3,3,5,3,4,1,3,3,5,3,2,0,0,0,0,4,3,1,3,3),\r
+(0,1,0,3,0,3,0,1,0,1,3,3,3,2,3,3,3,0,3,0,0,0,3,1,3,0,0,0,2,2,2,3,0,0,3,2,0,1,2,4,1,3,3,0,0,3,3,3,0,1,0,0,2,1,0,0,3,0,3,1,0,3,0,0,1,3,0,2,0,1,0,3,3,1,3,3,0,0,1,1,0,3,3),\r
+(0,2,0,3,0,2,1,4,0,2,2,3,1,1,3,1,1,0,2,0,3,1,2,3,1,3,0,0,1,0,4,3,2,3,3,3,1,4,2,3,3,3,3,1,0,3,1,4,0,1,1,0,1,2,0,1,1,0,1,1,0,3,1,3,2,2,0,1,0,0,0,2,3,3,3,1,0,0,0,0,0,2,3),\r
+(0,5,0,4,0,5,0,2,0,4,5,5,3,3,4,3,3,1,5,4,4,2,4,4,4,3,4,2,4,3,5,5,4,3,3,4,3,3,5,5,4,5,5,1,3,4,5,3,1,4,3,1,3,3,0,3,3,1,4,3,1,4,5,3,3,5,0,4,0,3,0,5,3,3,1,4,3,0,4,0,1,5,3),\r
+(0,5,0,5,0,4,0,2,0,4,4,3,4,3,3,3,3,3,5,4,4,4,4,4,4,5,3,3,5,2,4,4,4,3,4,4,3,3,4,4,5,5,3,3,4,3,4,3,3,4,3,3,3,3,1,2,2,1,4,3,3,5,4,4,3,4,0,4,0,3,0,4,4,4,4,4,1,0,4,2,0,2,4),\r
+(0,4,0,4,0,3,0,1,0,3,5,2,3,0,3,0,2,1,4,2,3,3,4,1,4,3,3,2,4,1,3,3,3,0,3,3,0,0,3,3,3,5,3,3,3,3,3,2,0,2,0,0,2,0,0,2,0,0,1,0,0,3,1,2,2,3,0,3,0,2,0,4,4,3,3,4,1,0,3,0,0,2,4),\r
+(0,0,0,4,0,0,0,0,0,0,1,0,1,0,2,0,0,0,0,0,1,0,2,0,1,0,0,0,0,0,3,1,3,0,3,2,0,0,0,1,0,3,2,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,4,0,2,0,0,0,0,0,0,2),\r
+(0,2,1,3,0,2,0,2,0,3,3,3,3,1,3,1,3,3,3,3,3,3,4,2,2,1,2,1,4,0,4,3,1,3,3,3,2,4,3,5,4,3,3,3,3,3,3,3,0,1,3,0,2,0,0,1,0,0,1,0,0,4,2,0,2,3,0,3,3,0,3,3,4,2,3,1,4,0,1,2,0,2,3),\r
+(0,3,0,3,0,1,0,3,0,2,3,3,3,0,3,1,2,0,3,3,2,3,3,2,3,2,3,1,3,0,4,3,2,0,3,3,1,4,3,3,2,3,4,3,1,3,3,1,1,0,1,1,0,1,0,1,0,1,0,0,0,4,1,1,0,3,0,3,1,0,2,3,3,3,3,3,1,0,0,2,0,3,3),\r
+(0,0,0,0,0,0,0,0,0,0,3,0,2,0,3,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,3,0,3,0,3,1,0,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,2,0,2,3,0,0,0,0,0,0,0,0,3),\r
+(0,2,0,3,1,3,0,3,0,2,3,3,3,1,3,1,3,1,3,1,3,3,3,1,3,0,2,3,1,1,4,3,3,2,3,3,1,2,2,4,1,3,3,0,1,4,2,3,0,1,3,0,3,0,0,1,3,0,2,0,0,3,3,2,1,3,0,3,0,2,0,3,4,4,4,3,1,0,3,0,0,3,3),\r
+(0,2,0,1,0,2,0,0,0,1,3,2,2,1,3,0,1,1,3,0,3,2,3,1,2,0,2,0,1,1,3,3,3,0,3,3,1,1,2,3,2,3,3,1,2,3,2,0,0,1,0,0,0,0,0,0,3,0,1,0,0,2,1,2,1,3,0,3,0,0,0,3,4,4,4,3,2,0,2,0,0,2,4),\r
+(0,0,0,1,0,1,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,2,2,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,3,1,0,0,0,0,0,0,0,3),\r
+(0,3,0,3,0,2,0,3,0,3,3,3,2,3,2,2,2,0,3,1,3,3,3,2,3,3,0,0,3,0,3,2,2,0,2,3,1,4,3,4,3,3,2,3,1,5,4,4,0,3,1,2,1,3,0,3,1,1,2,0,2,3,1,3,1,3,0,3,0,1,0,3,3,4,4,2,1,0,2,1,0,2,4),\r
+(0,1,0,3,0,1,0,2,0,1,4,2,5,1,4,0,2,0,2,1,3,1,4,0,2,1,0,0,2,1,4,1,1,0,3,3,0,5,1,3,2,3,3,1,0,3,2,3,0,1,0,0,0,0,0,0,1,0,0,0,0,4,0,1,0,3,0,2,0,1,0,3,3,3,4,3,3,0,0,0,0,2,3),\r
+(0,0,0,1,0,0,0,0,0,0,2,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,1,0,0,1,0,0,0,0,0,3),\r
+(0,1,0,3,0,4,0,3,0,2,4,3,1,0,3,2,2,1,3,1,2,2,3,1,1,1,2,1,3,0,1,2,0,1,3,2,1,3,0,5,5,1,0,0,1,3,2,1,0,3,0,0,1,0,0,0,0,0,3,4,0,1,1,1,3,2,0,2,0,1,0,2,3,3,1,2,3,0,1,0,1,0,4),\r
+(0,0,0,1,0,3,0,3,0,2,2,1,0,0,4,0,3,0,3,1,3,0,3,0,3,0,1,0,3,0,3,1,3,0,3,3,0,0,1,2,1,1,1,0,1,2,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,2,2,1,2,0,0,2,0,0,0,0,2,3,3,3,3,0,0,0,0,1,4),\r
+(0,0,0,3,0,3,0,0,0,0,3,1,1,0,3,0,1,0,2,0,1,0,0,0,0,0,0,0,1,0,3,0,2,0,2,3,0,0,2,2,3,1,2,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,2,0,0,0,0,2,3),\r
+(2,4,0,5,0,5,0,4,0,3,4,3,3,3,4,3,3,3,4,3,4,4,5,4,5,5,5,2,3,0,5,5,4,1,5,4,3,1,5,4,3,4,4,3,3,4,3,3,0,3,2,0,2,3,0,3,0,0,3,3,0,5,3,2,3,3,0,3,0,3,0,3,4,5,4,5,3,0,4,3,0,3,4),\r
+(0,3,0,3,0,3,0,3,0,3,3,4,3,2,3,2,3,0,4,3,3,3,3,3,3,3,3,0,3,2,4,3,3,1,3,4,3,4,4,4,3,4,4,3,2,4,4,1,0,2,0,0,1,1,0,2,0,0,3,1,0,5,3,2,1,3,0,3,0,1,2,4,3,2,4,3,3,0,3,2,0,4,4),\r
+(0,3,0,3,0,1,0,0,0,1,4,3,3,2,3,1,3,1,4,2,3,2,4,2,3,4,3,0,2,2,3,3,3,0,3,3,3,0,3,4,1,3,3,0,3,4,3,3,0,1,1,0,1,0,0,0,4,0,3,0,0,3,1,2,1,3,0,4,0,1,0,4,3,3,4,3,3,0,2,0,0,3,3),\r
+(0,3,0,4,0,1,0,3,0,3,4,3,3,0,3,3,3,1,3,1,3,3,4,3,3,3,0,0,3,1,5,3,3,1,3,3,2,5,4,3,3,4,5,3,2,5,3,4,0,1,0,0,0,0,0,2,0,0,1,1,0,4,2,2,1,3,0,3,0,2,0,4,4,3,5,3,2,0,1,1,0,3,4),\r
+(0,5,0,4,0,5,0,2,0,4,4,3,3,2,3,3,3,1,4,3,4,1,5,3,4,3,4,0,4,2,4,3,4,1,5,4,0,4,4,4,4,5,4,1,3,5,4,2,1,4,1,1,3,2,0,3,1,0,3,2,1,4,3,3,3,4,0,4,0,3,0,4,4,4,3,3,3,0,4,2,0,3,4),\r
+(1,4,0,4,0,3,0,1,0,3,3,3,1,1,3,3,2,2,3,3,1,0,3,2,2,1,2,0,3,1,2,1,2,0,3,2,0,2,2,3,3,4,3,0,3,3,1,2,0,1,1,3,1,2,0,0,3,0,1,1,0,3,2,2,3,3,0,3,0,0,0,2,3,3,4,3,3,0,1,0,0,1,4),\r
+(0,4,0,4,0,4,0,0,0,3,4,4,3,1,4,2,3,2,3,3,3,1,4,3,4,0,3,0,4,2,3,3,2,2,5,4,2,1,3,4,3,4,3,1,3,3,4,2,0,2,1,0,3,3,0,0,2,0,3,1,0,4,4,3,4,3,0,4,0,1,0,2,4,4,4,4,4,0,3,2,0,3,3),\r
+(0,0,0,1,0,4,0,0,0,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,3,2,0,0,1,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,2),\r
+(0,2,0,3,0,4,0,4,0,1,3,3,3,0,4,0,2,1,2,1,1,1,2,0,3,1,1,0,1,0,3,1,0,0,3,3,2,0,1,1,0,0,0,0,0,1,0,2,0,2,2,0,3,1,0,0,1,0,1,1,0,1,2,0,3,0,0,0,0,1,0,0,3,3,4,3,1,0,1,0,3,0,2),\r
+(0,0,0,3,0,5,0,0,0,0,1,0,2,0,3,1,0,1,3,0,0,0,2,0,0,0,1,0,0,0,1,1,0,0,4,0,0,0,2,3,0,1,4,1,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,1,0,0,0,0,0,0,0,2,0,0,3,0,0,0,0,0,3),\r
+(0,2,0,5,0,5,0,1,0,2,4,3,3,2,5,1,3,2,3,3,3,0,4,1,2,0,3,0,4,0,2,2,1,1,5,3,0,0,1,4,2,3,2,0,3,3,3,2,0,2,4,1,1,2,0,1,1,0,3,1,0,1,3,1,2,3,0,2,0,0,0,1,3,5,4,4,4,0,3,0,0,1,3),\r
+(0,4,0,5,0,4,0,4,0,4,5,4,3,3,4,3,3,3,4,3,4,4,5,3,4,5,4,2,4,2,3,4,3,1,4,4,1,3,5,4,4,5,5,4,4,5,5,5,2,3,3,1,4,3,1,3,3,0,3,3,1,4,3,4,4,4,0,3,0,4,0,3,3,4,4,5,0,0,4,3,0,4,5),\r
+(0,4,0,4,0,3,0,3,0,3,4,4,4,3,3,2,4,3,4,3,4,3,5,3,4,3,2,1,4,2,4,4,3,1,3,4,2,4,5,5,3,4,5,4,1,5,4,3,0,3,2,2,3,2,1,3,1,0,3,3,3,5,3,3,3,5,4,4,2,3,3,4,3,3,3,2,1,0,3,2,1,4,3),\r
+(0,4,0,5,0,4,0,3,0,3,5,5,3,2,4,3,4,0,5,4,4,1,4,4,4,3,3,3,4,3,5,5,2,3,3,4,1,2,5,5,3,5,5,2,3,5,5,4,0,3,2,0,3,3,1,1,5,1,4,1,0,4,3,2,3,5,0,4,0,3,0,5,4,3,4,3,0,0,4,1,0,4,4),\r
+(1,3,0,4,0,2,0,2,0,2,5,5,3,3,3,3,3,0,4,2,3,4,4,4,3,4,0,0,3,4,5,4,3,3,3,3,2,5,5,4,5,5,5,4,3,5,5,5,1,3,1,0,1,0,0,3,2,0,4,2,0,5,2,3,2,4,1,3,0,3,0,4,5,4,5,4,3,0,4,2,0,5,4),\r
+(0,3,0,4,0,5,0,3,0,3,4,4,3,2,3,2,3,3,3,3,3,2,4,3,3,2,2,0,3,3,3,3,3,1,3,3,3,0,4,4,3,4,4,1,1,4,4,2,0,3,1,0,1,1,0,4,1,0,2,3,1,3,3,1,3,4,0,3,0,1,0,3,1,3,0,0,1,0,2,0,0,4,4),\r
+(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0),\r
+(0,3,0,3,0,2,0,3,0,1,5,4,3,3,3,1,4,2,1,2,3,4,4,2,4,4,5,0,3,1,4,3,4,0,4,3,3,3,2,3,2,5,3,4,3,2,2,3,0,0,3,0,2,1,0,1,2,0,0,0,0,2,1,1,3,1,0,2,0,4,0,3,4,4,4,5,2,0,2,0,0,1,3),\r
+(0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,1,1,0,0,1,1,0,0,0,4,2,1,1,0,1,0,3,2,0,0,3,1,1,1,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,1,0,0,0,2,0,0,0,1,4,0,4,2,1,0,0,0,0,0,1),\r
+(0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,1,0,0,0,0,0,0,1,0,1,0,0,0,0,3,1,0,0,0,2,0,2,1,0,0,1,2,1,0,1,1,0,0,3,0,0,0,0,0,0,0,0,0,0,0,1,3,1,0,0,0,0,0,1,0,0,2,1,0,0,0,0,0,0,0,0,2),\r
+(0,4,0,4,0,4,0,3,0,4,4,3,4,2,4,3,2,0,4,4,4,3,5,3,5,3,3,2,4,2,4,3,4,3,1,4,0,2,3,4,4,4,3,3,3,4,4,4,3,4,1,3,4,3,2,1,2,1,3,3,3,4,4,3,3,5,0,4,0,3,0,4,3,3,3,2,1,0,3,0,0,3,3),\r
+(0,4,0,3,0,3,0,3,0,3,5,5,3,3,3,3,4,3,4,3,3,3,4,4,4,3,3,3,3,4,3,5,3,3,1,3,2,4,5,5,5,5,4,3,4,5,5,3,2,2,3,3,3,3,2,3,3,1,2,3,2,4,3,3,3,4,0,4,0,2,0,4,3,2,2,1,2,0,3,0,0,4,1),\r
+)\r
+\r
+class JapaneseContextAnalysis:\r
+ def __init__(self):\r
+ self.reset()\r
+\r
+ def reset(self):\r
+ self._mTotalRel = 0 # total sequence received\r
+ # category counters, each interger counts sequence in its category\r
+ self._mRelSample = [0] * NUM_OF_CATEGORY\r
+ # if last byte in current buffer is not the last byte of a character,\r
+ # we need to know how many bytes to skip in next buffer\r
+ self._mNeedToSkipCharNum = 0\r
+ self._mLastCharOrder = -1 # The order of previous char\r
+ # If this flag is set to True, detection is done and conclusion has\r
+ # been made\r
+ self._mDone = False\r
+\r
+ def feed(self, aBuf, aLen):\r
+ if self._mDone:\r
+ return\r
+\r
+ # The buffer we got is byte oriented, and a character may span in more than one\r
+ # buffers. In case the last one or two byte in last buffer is not\r
+ # complete, we record how many byte needed to complete that character\r
+ # and skip these bytes here. We can choose to record those bytes as\r
+ # well and analyse the character once it is complete, but since a\r
+ # character will not make much difference, by simply skipping\r
+ # this character will simply our logic and improve performance.\r
+ i = self._mNeedToSkipCharNum\r
+ while i < aLen:\r
+ order, charLen = self.get_order(aBuf[i:i + 2])\r
+ i += charLen\r
+ if i > aLen:\r
+ self._mNeedToSkipCharNum = i - aLen\r
+ self._mLastCharOrder = -1\r
+ else:\r
+ if (order != -1) and (self._mLastCharOrder != -1):\r
+ self._mTotalRel += 1\r
+ if self._mTotalRel > MAX_REL_THRESHOLD:\r
+ self._mDone = True\r
+ break\r
+ self._mRelSample[jp2CharContext[self._mLastCharOrder][order]] += 1\r
+ self._mLastCharOrder = order\r
+\r
+ def got_enough_data(self):\r
+ return self._mTotalRel > ENOUGH_REL_THRESHOLD\r
+\r
+ def get_confidence(self):\r
+ # This is just one way to calculate confidence. It works well for me.\r
+ if self._mTotalRel > MINIMUM_DATA_THRESHOLD:\r
+ return (self._mTotalRel - self._mRelSample[0]) / self._mTotalRel\r
+ else:\r
+ return DONT_KNOW\r
+\r
+ def get_order(self, aBuf):\r
+ return -1, 1\r
+\r
+class SJISContextAnalysis(JapaneseContextAnalysis):\r
+ def get_order(self, aBuf):\r
+ if not aBuf:\r
+ return -1, 1\r
+ # find out current char's byte length\r
+ first_char = wrap_ord(aBuf[0])\r
+ if ((0x81 <= first_char <= 0x9F) or (0xE0 <= first_char <= 0xFC)):\r
+ charLen = 2\r
+ else:\r
+ charLen = 1\r
+\r
+ # return its order if it is hiragana\r
+ if len(aBuf) > 1:\r
+ second_char = wrap_ord(aBuf[1])\r
+ if (first_char == 202) and (0x9F <= second_char <= 0xF1):\r
+ return second_char - 0x9F, charLen\r
+\r
+ return -1, charLen\r
+\r
+class EUCJPContextAnalysis(JapaneseContextAnalysis):\r
+ def get_order(self, aBuf):\r
+ if not aBuf:\r
+ return -1, 1\r
+ # find out current char's byte length\r
+ first_char = wrap_ord(aBuf[0])\r
+ if (first_char == 0x8E) or (0xA1 <= first_char <= 0xFE):\r
+ charLen = 2\r
+ elif first_char == 0x8F:\r
+ charLen = 3\r
+ else:\r
+ charLen = 1\r
+\r
+ # return its order if it is hiragana\r
+ if len(aBuf) > 1:\r
+ second_char = wrap_ord(aBuf[1])\r
+ if (first_char == 0xA4) and (0xA1 <= second_char <= 0xF3):\r
+ return second_char - 0xA1, charLen\r
+\r
+ return -1, charLen\r
+\r
+# flake8: noqa\r
-######################## BEGIN LICENSE BLOCK ########################
-# The Original Code is Mozilla Communicator client code.
-#
-# The Initial Developer of the Original Code is
-# Netscape Communications Corporation.
-# Portions created by the Initial Developer are Copyright (C) 1998
-# the Initial Developer. All Rights Reserved.
-#
-# Contributor(s):
-# Mark Pilgrim - port to Python
-#
-# This library is free software; you can redistribute it and/or
-# modify it under the terms of the GNU Lesser General Public
-# License as published by the Free Software Foundation; either
-# version 2.1 of the License, or (at your option) any later version.
-#
-# This library is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-# Lesser General Public License for more details.
-#
-# You should have received a copy of the GNU Lesser General Public
-# License along with this library; if not, write to the Free Software
-# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
-# 02110-1301 USA
-######################### END LICENSE BLOCK #########################
-
-# 255: Control characters that usually does not exist in any text
-# 254: Carriage/Return
-# 253: symbol (punctuation) that does not belong to word
-# 252: 0 - 9
-
-# Character Mapping Table:
-# this table is modified base on win1251BulgarianCharToOrderMap, so
-# only number <64 is sure valid
-
-Latin5_BulgarianCharToOrderMap = (
-255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 254, 255, 255, 254, 255, 255, # 00
-255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, # 10
-253, 253, 253, 253, 253, 253, 253, 253, 253, 253, 253, 253, 253, 253, 253, 253, # 20
-252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 253, 253, 253, 253, 253, 253, # 30
-253, 77, 90, 99, 100, 72, 109, 107, 101, 79, 185, 81, 102, 76, 94, 82, # 40
-110, 186, 108, 91, 74, 119, 84, 96, 111, 187, 115, 253, 253, 253, 253, 253, # 50
-253, 65, 69, 70, 66, 63, 68, 112, 103, 92, 194, 104, 95, 86, 87, 71, # 60
-116, 195, 85, 93, 97, 113, 196, 197, 198, 199, 200, 253, 253, 253, 253, 253, # 70
-194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, # 80
-210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, # 90
- 81, 226, 227, 228, 229, 230, 105, 231, 232, 233, 234, 235, 236, 45, 237, 238, # a0
- 31, 32, 35, 43, 37, 44, 55, 47, 40, 59, 33, 46, 38, 36, 41, 30, # b0
- 39, 28, 34, 51, 48, 49, 53, 50, 54, 57, 61, 239, 67, 240, 60, 56, # c0
- 1, 18, 9, 20, 11, 3, 23, 15, 2, 26, 12, 10, 14, 6, 4, 13, # d0
- 7, 8, 5, 19, 29, 25, 22, 21, 27, 24, 17, 75, 52, 241, 42, 16, # e0
- 62, 242, 243, 244, 58, 245, 98, 246, 247, 248, 249, 250, 251, 91, 252, 253, # f0
-)
-
-win1251BulgarianCharToOrderMap = (
-255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 254, 255, 255, 254, 255, 255, # 00
-255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, # 10
-253, 253, 253, 253, 253, 253, 253, 253, 253, 253, 253, 253, 253, 253, 253, 253, # 20
-252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 253, 253, 253, 253, 253, 253, # 30
-253, 77, 90, 99, 100, 72, 109, 107, 101, 79, 185, 81, 102, 76, 94, 82, # 40
-110, 186, 108, 91, 74, 119, 84, 96, 111, 187, 115, 253, 253, 253, 253, 253, # 50
-253, 65, 69, 70, 66, 63, 68, 112, 103, 92, 194, 104, 95, 86, 87, 71, # 60
-116, 195, 85, 93, 97, 113, 196, 197, 198, 199, 200, 253, 253, 253, 253, 253, # 70
-206, 207, 208, 209, 210, 211, 212, 213, 120, 214, 215, 216, 217, 218, 219, 220, # 80
-221, 78, 64, 83, 121, 98, 117, 105, 222, 223, 224, 225, 226, 227, 228, 229, # 90
- 88, 230, 231, 232, 233, 122, 89, 106, 234, 235, 236, 237, 238, 45, 239, 240, # a0
- 73, 80, 118, 114, 241, 242, 243, 244, 245, 62, 58, 246, 247, 248, 249, 250, # b0
- 31, 32, 35, 43, 37, 44, 55, 47, 40, 59, 33, 46, 38, 36, 41, 30, # c0
- 39, 28, 34, 51, 48, 49, 53, 50, 54, 57, 61, 251, 67, 252, 60, 56, # d0
- 1, 18, 9, 20, 11, 3, 23, 15, 2, 26, 12, 10, 14, 6, 4, 13, # e0
- 7, 8, 5, 19, 29, 25, 22, 21, 27, 24, 17, 75, 52, 253, 42, 16, # f0
-)
-
-# Model Table:
-# total sequences: 100%
-# first 512 sequences: 96.9392%
-# first 1024 sequences:3.0618%
-# rest sequences: 0.2992%
-# negative sequences: 0.0020%
-BulgarianLangModel = (
- 0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 3, 3, 3, 3, 3, 3, 3,
- 3, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
- 3, 3, 0, 3, 3, 3, 2, 2, 3, 2, 2, 1, 2, 2, 3, 1, 3, 3, 2, 3, 3, 3, 3, 3, 3,
- 3, 3, 3, 3, 3, 3, 0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 3, 0, 1, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 1, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 3, 2, 3, 3,
- 3, 3, 3, 3, 3, 3, 0, 3, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0,
- 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 3, 2, 2, 2, 3, 3, 3, 3,
- 3, 3, 3, 3, 3, 3, 3, 3, 3, 1, 3, 2, 3, 3, 3, 3, 3, 3, 3, 3, 0, 3, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 3, 2, 3, 3, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 1, 3,
- 2, 3, 3, 3, 3, 3, 3, 3, 3, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0,
- 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3,
- 3, 3, 3, 3, 3, 3, 2, 3, 2, 2, 1, 3, 3, 3, 3, 2, 2, 2, 1, 1, 2, 0, 1, 0, 1,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 1, 3, 3, 3, 3, 3, 3, 3, 2, 3, 2, 2, 3, 3, 1, 1, 2,
- 3, 3, 2, 3, 3, 3, 3, 2, 1, 2, 0, 2, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0,
- 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 3, 3,
- 3, 3, 3, 3, 3, 1, 3, 3, 3, 3, 3, 2, 3, 2, 3, 3, 3, 3, 3, 2, 3, 3, 1, 3, 0,
- 3, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 3, 3, 3, 3, 3, 3, 3, 3, 1, 3, 3, 2, 3,
- 3, 3, 1, 3, 3, 2, 3, 2, 2, 2, 0, 0, 2, 0, 2, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 1, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 3, 3, 3, 2, 2, 3, 3, 3, 1, 2, 2, 3, 2, 1,
- 1, 2, 0, 2, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 1, 0, 0, 1,
- 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 3, 3, 3, 3, 3, 3, 3, 2, 3, 3,
- 1, 2, 3, 2, 2, 2, 3, 3, 3, 3, 3, 2, 2, 3, 1, 2, 0, 2, 1, 2, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 1, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 1, 3, 3, 3, 3, 3, 1, 3, 3, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 2, 3, 2,
- 2, 2, 3, 1, 2, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 3, 3, 3, 3, 3, 3, 3,
- 3, 3, 3, 3, 1, 1, 1, 2, 2, 1, 3, 1, 3, 2, 2, 3, 0, 0, 1, 0, 1, 0, 1, 0, 0,
- 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 1, 3, 3, 3, 3, 3, 2, 2, 3, 2, 2, 3, 1, 2, 1, 1, 1, 2, 3,
- 1, 3, 1, 2, 2, 0, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 3, 3, 3, 3,
- 3, 1, 3, 2, 2, 3, 3, 1, 2, 3, 1, 1, 3, 3, 3, 3, 1, 2, 2, 1, 1, 1, 0, 2, 0,
- 2, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
- 1, 2, 2, 3, 3, 3, 2, 2, 1, 1, 2, 0, 2, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 3,
- 0, 1, 2, 1, 3, 3, 2, 3, 3, 3, 3, 3, 2, 3, 2, 1, 0, 3, 1, 2, 1, 2, 1, 2, 3,
- 2, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 2, 3, 3, 3, 3, 3, 3, 3, 3,
- 3, 3, 3, 3, 0, 0, 3, 1, 3, 3, 2, 3, 3, 2, 2, 2, 0, 1, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 2, 3, 3, 3, 3, 0, 3, 3, 3, 3, 3, 2, 1, 1, 2, 1, 3, 3, 0, 3, 1, 1, 1,
- 1, 3, 2, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 3, 3, 2, 2, 2, 3, 3, 3, 3,
- 3, 3, 3, 3, 3, 3, 3, 1, 1, 3, 1, 3, 3, 2, 3, 2, 2, 2, 3, 0, 2, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 2, 3, 3, 2, 2, 3, 2, 1, 1, 1, 1, 1, 3, 1, 3,
- 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 2,
- 3, 2, 0, 3, 2, 0, 3, 0, 2, 0, 0, 2, 1, 3, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 1, 3, 3, 3, 3, 2, 1, 1, 1, 1, 2, 1, 1, 2, 1, 1, 1, 2,
- 2, 1, 2, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 3, 3, 3,
- 3, 2, 1, 3, 1, 1, 2, 1, 3, 2, 1, 1, 0, 1, 2, 3, 2, 1, 1, 1, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 3, 3, 3, 3, 2, 2, 1, 0, 1, 0, 0, 1, 0,
- 0, 0, 2, 1, 0, 3, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,
- 3, 3, 3, 2, 3, 2, 3, 3, 1, 3, 2, 1, 1, 1, 2, 1, 1, 2, 1, 3, 0, 1, 0, 0, 0,
- 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 1, 1, 2, 2, 3, 3, 2, 3, 2, 2,
- 2, 3, 1, 2, 2, 1, 1, 2, 1, 1, 2, 2, 0, 1, 1, 0, 1, 0, 2, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 3, 3, 3, 3, 2, 1, 3, 1, 0, 2, 2, 1, 3, 2, 1, 0, 0, 2, 0, 2, 0, 1,
- 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 3, 3, 3, 3, 3, 3, 1, 2,
- 0, 2, 3, 1, 2, 3, 2, 0, 1, 3, 1, 2, 1, 1, 1, 0, 0, 1, 0, 0, 2, 2, 2, 3, 2,
- 2, 2, 2, 1, 2, 1, 1, 2, 2, 1, 1, 2, 0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0,
- 0, 0, 1, 1, 0, 1, 3, 3, 3, 3, 3, 2, 1, 2, 2, 1, 2, 0, 2, 0, 1, 0, 1, 2, 1,
- 2, 1, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 3, 3, 2, 3, 3,
- 1, 1, 3, 1, 0, 3, 2, 1, 0, 0, 0, 1, 2, 0, 2, 0, 1, 0, 0, 0, 1, 0, 1, 2, 1,
- 2, 2, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 0, 1, 2, 1, 1, 1,
- 0, 0, 0, 0, 0, 1, 1, 0, 0, 3, 1, 0, 1, 0, 2, 3, 2, 2, 2, 3, 2, 2, 2, 2, 2,
- 1, 0, 2, 1, 2, 1, 1, 1, 0, 1, 2, 1, 2, 2, 2, 1, 1, 1, 2, 2, 2, 2, 1, 2, 1,
- 1, 0, 1, 2, 1, 2, 2, 2, 1, 1, 1, 0, 1, 1, 1, 1, 2, 0, 1, 0, 0, 0, 0, 2, 3,
- 2, 3, 3, 0, 0, 2, 1, 0, 2, 1, 0, 0, 0, 0, 2, 3, 0, 2, 0, 0, 0, 0, 0, 1, 0,
- 0, 2, 0, 1, 2, 2, 1, 2, 1, 2, 2, 1, 1, 1, 2, 1, 1, 1, 0, 1, 2, 2, 1, 1, 1,
- 1, 1, 0, 1, 1, 1, 0, 0, 1, 2, 0, 0, 3, 3, 2, 2, 3, 0, 2, 3, 1, 1, 2, 0, 0,
- 0, 1, 0, 0, 2, 0, 2, 0, 0, 0, 1, 0, 1, 0, 1, 2, 0, 2, 2, 1, 1, 1, 1, 2, 1,
- 0, 1, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0,
- 0, 2, 3, 2, 3, 3, 0, 0, 3, 0, 1, 1, 0, 1, 0, 0, 0, 2, 2, 1, 2, 0, 0, 0, 0,
- 0, 0, 0, 0, 2, 0, 1, 2, 2, 2, 1, 1, 1, 1, 1, 2, 2, 2, 1, 0, 2, 0, 1, 0, 1,
- 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 3, 3, 3, 3, 2, 2, 2, 2, 2, 0,
- 2, 1, 1, 1, 1, 2, 1, 2, 1, 1, 0, 2, 0, 1, 0, 1, 0, 0, 2, 0, 1, 2, 1, 1, 1,
- 1, 1, 1, 1, 2, 2, 1, 1, 0, 2, 0, 1, 0, 2, 0, 0, 1, 1, 1, 0, 0, 2, 0, 0, 0,
- 1, 1, 0, 0, 2, 3, 3, 3, 3, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 1, 1,
- 0, 0, 0, 0, 0, 0, 1, 2, 0, 1, 2, 2, 2, 2, 1, 1, 2, 1, 1, 2, 2, 2, 1, 2, 0,
- 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 2, 3, 3, 3, 3, 0, 2,
- 2, 0, 2, 1, 0, 0, 0, 1, 1, 1, 2, 0, 2, 0, 0, 0, 3, 0, 0, 0, 0, 2, 0, 2, 2,
- 1, 1, 1, 2, 1, 2, 1, 1, 2, 2, 2, 1, 2, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 2, 1,
- 0, 0, 0, 1, 1, 0, 0, 2, 3, 3, 3, 3, 0, 2, 1, 0, 0, 2, 0, 0, 0, 0, 0, 1, 2,
- 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 1, 2, 1, 1, 1, 2, 1, 1, 1, 1, 2, 2, 2,
- 0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 3, 3, 2, 2,
- 3, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 1,
- 0, 2, 2, 1, 1, 1, 1, 1, 2, 1, 1, 2, 2, 1, 2, 2, 1, 0, 1, 1, 1, 1, 1, 0, 1,
- 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 3, 1, 0, 1, 0, 2, 2, 2, 2, 3, 2, 1, 1, 1, 2,
- 3, 0, 0, 1, 0, 2, 1, 1, 0, 1, 1, 1, 1, 2, 1, 1, 1, 1, 2, 2, 1, 2, 1, 2, 2,
- 1, 1, 0, 1, 2, 1, 2, 2, 1, 1, 1, 0, 0, 1, 1, 1, 2, 1, 0, 1, 0, 0, 0, 0, 2,
- 1, 0, 1, 0, 3, 1, 2, 2, 2, 2, 1, 2, 2, 1, 1, 1, 0, 2, 1, 2, 2, 1, 1, 2, 1,
- 1, 0, 2, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 1, 2, 0, 1, 1, 0, 2, 1, 1, 1, 1,
- 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 2, 1, 1, 1, 1, 2, 2, 2, 2, 1, 2, 2,
- 2, 1, 2, 2, 1, 1, 2, 1, 2, 3, 2, 2, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 2, 2, 2, 3, 2, 0, 1, 2, 0, 1, 2, 1, 1, 0, 1, 0, 1, 2, 1, 2, 0, 0, 0,
- 1, 1, 0, 0, 0, 1, 0, 0, 2, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 2, 0, 1, 1,
- 1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 2, 0, 0, 0, 0, 1, 2, 2, 2,
- 2, 2, 2, 2, 1, 2, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 2,
- 2, 2, 2, 1, 1, 2, 1, 2, 1, 1, 1, 0, 2, 1, 2, 1, 1, 1, 0, 2, 1, 1, 1, 1, 0,
- 1, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0,
- 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 3, 2, 0,
- 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1,
- 2, 1, 1, 1, 1, 1, 1, 0, 0, 2, 2, 2, 2, 2, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0,
- 1, 0, 0, 0, 1, 1, 0, 1, 2, 3, 1, 2, 1, 0, 1, 1, 0, 2, 2, 2, 0, 0, 1, 0, 0,
- 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 2, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1,
- 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 2, 2, 2,
- 2, 2, 0, 0, 2, 0, 0, 2, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,
- 2, 0, 2, 2, 1, 1, 1, 1, 1, 0, 0, 1, 2, 1, 1, 0, 1, 0, 1, 0, 0, 0, 0, 1, 1,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 2, 2, 2, 0, 0, 2, 0, 1, 1, 0, 0, 0,
- 1, 0, 0, 2, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 1, 2, 2, 3, 2, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 2, 0, 0, 0, 1, 0,
- 0, 0, 0, 0, 0, 0, 2, 1, 1, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0,
- 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 1, 2, 2, 2, 1, 2, 1, 2, 2, 1,
- 1, 2, 1, 1, 1, 0, 1, 1, 1, 1, 2, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 2, 1,
- 1, 1, 1, 1, 1, 0, 0, 1, 2, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0,
- 0, 0, 0, 1, 0, 0, 1, 3, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 1, 0, 0, 1,
- 0, 2, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 1, 0,
- 2, 0, 1, 0, 0, 1, 1, 2, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 1, 2, 2, 2, 2, 0, 1, 1, 0, 2, 1, 0, 1, 1, 1, 0, 0, 1, 0,
- 2, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0,
- 1, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2,
- 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 1, 0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 1, 0, 0, 1, 2, 1, 1, 1, 1, 1, 1, 2, 2, 1,
- 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 2, 1, 1, 1, 1, 0, 0,
- 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2,
- 1, 2, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 1, 0, 0, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0,
- 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 2, 0, 0, 2, 0, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0,
- 0, 1, 1, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 2, 0, 0, 0, 0, 0, 0,
- 2, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0,
- 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,
-)
-
-Latin5BulgarianModel = {
- 'charToOrderMap': Latin5_BulgarianCharToOrderMap,
- 'precedenceMatrix': BulgarianLangModel,
- 'mTypicalPositiveRatio': 0.969392,
- 'keepEnglishLetter': False,
- 'charsetName': "ISO-8859-5"
-}
-
-Win1251BulgarianModel = {
- 'charToOrderMap': win1251BulgarianCharToOrderMap,
- 'precedenceMatrix': BulgarianLangModel,
- 'mTypicalPositiveRatio': 0.969392,
- 'keepEnglishLetter': False,
- 'charsetName': "windows-1251"
-}
+######################## BEGIN LICENSE BLOCK ########################\r
+# The Original Code is Mozilla Communicator client code.\r
+#\r
+# The Initial Developer of the Original Code is\r
+# Netscape Communications Corporation.\r
+# Portions created by the Initial Developer are Copyright (C) 1998\r
+# the Initial Developer. All Rights Reserved.\r
+#\r
+# Contributor(s):\r
+# Mark Pilgrim - port to Python\r
+#\r
+# This library is free software; you can redistribute it and/or\r
+# modify it under the terms of the GNU Lesser General Public\r
+# License as published by the Free Software Foundation; either\r
+# version 2.1 of the License, or (at your option) any later version.\r
+#\r
+# This library is distributed in the hope that it will be useful,\r
+# but WITHOUT ANY WARRANTY; without even the implied warranty of\r
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU\r
+# Lesser General Public License for more details.\r
+#\r
+# You should have received a copy of the GNU Lesser General Public\r
+# License along with this library; if not, write to the Free Software\r
+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA\r
+# 02110-1301 USA\r
+######################### END LICENSE BLOCK #########################\r
+\r
+# 255: Control characters that usually does not exist in any text\r
+# 254: Carriage/Return\r
+# 253: symbol (punctuation) that does not belong to word\r
+# 252: 0 - 9\r
+\r
+# Character Mapping Table:\r
+# this table is modified base on win1251BulgarianCharToOrderMap, so\r
+# only number <64 is sure valid\r
+\r
+Latin5_BulgarianCharToOrderMap = (\r
+255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00\r
+255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10\r
+253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20\r
+252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30\r
+253, 77, 90, 99,100, 72,109,107,101, 79,185, 81,102, 76, 94, 82, # 40\r
+110,186,108, 91, 74,119, 84, 96,111,187,115,253,253,253,253,253, # 50\r
+253, 65, 69, 70, 66, 63, 68,112,103, 92,194,104, 95, 86, 87, 71, # 60\r
+116,195, 85, 93, 97,113,196,197,198,199,200,253,253,253,253,253, # 70\r
+194,195,196,197,198,199,200,201,202,203,204,205,206,207,208,209, # 80\r
+210,211,212,213,214,215,216,217,218,219,220,221,222,223,224,225, # 90\r
+ 81,226,227,228,229,230,105,231,232,233,234,235,236, 45,237,238, # a0\r
+ 31, 32, 35, 43, 37, 44, 55, 47, 40, 59, 33, 46, 38, 36, 41, 30, # b0\r
+ 39, 28, 34, 51, 48, 49, 53, 50, 54, 57, 61,239, 67,240, 60, 56, # c0\r
+ 1, 18, 9, 20, 11, 3, 23, 15, 2, 26, 12, 10, 14, 6, 4, 13, # d0\r
+ 7, 8, 5, 19, 29, 25, 22, 21, 27, 24, 17, 75, 52,241, 42, 16, # e0\r
+ 62,242,243,244, 58,245, 98,246,247,248,249,250,251, 91,252,253, # f0\r
+)\r
+\r
+win1251BulgarianCharToOrderMap = (\r
+255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00\r
+255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10\r
+253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20\r
+252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30\r
+253, 77, 90, 99,100, 72,109,107,101, 79,185, 81,102, 76, 94, 82, # 40\r
+110,186,108, 91, 74,119, 84, 96,111,187,115,253,253,253,253,253, # 50\r
+253, 65, 69, 70, 66, 63, 68,112,103, 92,194,104, 95, 86, 87, 71, # 60\r
+116,195, 85, 93, 97,113,196,197,198,199,200,253,253,253,253,253, # 70\r
+206,207,208,209,210,211,212,213,120,214,215,216,217,218,219,220, # 80\r
+221, 78, 64, 83,121, 98,117,105,222,223,224,225,226,227,228,229, # 90\r
+ 88,230,231,232,233,122, 89,106,234,235,236,237,238, 45,239,240, # a0\r
+ 73, 80,118,114,241,242,243,244,245, 62, 58,246,247,248,249,250, # b0\r
+ 31, 32, 35, 43, 37, 44, 55, 47, 40, 59, 33, 46, 38, 36, 41, 30, # c0\r
+ 39, 28, 34, 51, 48, 49, 53, 50, 54, 57, 61,251, 67,252, 60, 56, # d0\r
+ 1, 18, 9, 20, 11, 3, 23, 15, 2, 26, 12, 10, 14, 6, 4, 13, # e0\r
+ 7, 8, 5, 19, 29, 25, 22, 21, 27, 24, 17, 75, 52,253, 42, 16, # f0\r
+)\r
+\r
+# Model Table:\r
+# total sequences: 100%\r
+# first 512 sequences: 96.9392%\r
+# first 1024 sequences:3.0618%\r
+# rest sequences: 0.2992%\r
+# negative sequences: 0.0020%\r
+BulgarianLangModel = (\r
+0,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,2,3,3,3,3,3,\r
+3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,3,3,2,2,3,2,2,1,2,2,\r
+3,1,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,0,3,3,3,3,3,3,3,3,3,3,0,3,0,1,\r
+0,0,0,0,0,0,0,0,0,0,1,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,\r
+3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,2,3,3,3,3,3,3,3,3,0,3,1,0,\r
+0,1,0,0,0,0,0,0,0,0,1,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,\r
+3,2,2,2,3,3,3,3,3,3,3,3,3,3,3,3,3,1,3,2,3,3,3,3,3,3,3,3,0,3,0,0,\r
+0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
+3,2,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,1,3,2,3,3,3,3,3,3,3,3,0,3,0,0,\r
+0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
+3,3,3,3,3,3,3,3,3,3,3,2,3,2,2,1,3,3,3,3,2,2,2,1,1,2,0,1,0,1,0,0,\r
+0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,1,\r
+3,3,3,3,3,3,3,2,3,2,2,3,3,1,1,2,3,3,2,3,3,3,3,2,1,2,0,2,0,3,0,0,\r
+0,0,0,0,0,0,0,1,0,0,2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,1,\r
+3,3,3,3,3,3,3,1,3,3,3,3,3,2,3,2,3,3,3,3,3,2,3,3,1,3,0,3,0,2,0,0,\r
+0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,\r
+3,3,3,3,3,3,3,3,1,3,3,2,3,3,3,1,3,3,2,3,2,2,2,0,0,2,0,2,0,2,0,0,\r
+0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,1,\r
+3,3,3,3,3,3,3,3,3,0,3,3,3,2,2,3,3,3,1,2,2,3,2,1,1,2,0,2,0,0,0,0,\r
+1,0,0,0,0,0,0,0,0,0,2,0,0,1,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,\r
+3,3,3,3,3,3,3,2,3,3,1,2,3,2,2,2,3,3,3,3,3,2,2,3,1,2,0,2,1,2,0,0,\r
+0,0,0,0,0,0,0,0,0,0,3,0,0,1,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,1,\r
+3,3,3,3,3,1,3,3,3,3,3,2,3,3,3,2,3,3,2,3,2,2,2,3,1,2,0,1,0,1,0,0,\r
+0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,\r
+3,3,3,3,3,3,3,3,3,3,3,1,1,1,2,2,1,3,1,3,2,2,3,0,0,1,0,1,0,1,0,0,\r
+0,0,0,1,0,0,0,0,1,0,2,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,\r
+3,3,3,3,3,2,2,3,2,2,3,1,2,1,1,1,2,3,1,3,1,2,2,0,1,1,1,1,0,1,0,0,\r
+0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,\r
+3,3,3,3,3,1,3,2,2,3,3,1,2,3,1,1,3,3,3,3,1,2,2,1,1,1,0,2,0,2,0,1,\r
+0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,\r
+3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,1,2,2,3,3,3,2,2,1,1,2,0,2,0,1,0,0,\r
+0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,\r
+3,0,1,2,1,3,3,2,3,3,3,3,3,2,3,2,1,0,3,1,2,1,2,1,2,3,2,1,0,1,0,0,\r
+0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
+1,1,1,2,3,3,3,3,3,3,3,3,3,3,3,3,0,0,3,1,3,3,2,3,3,2,2,2,0,1,0,0,\r
+0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
+2,3,3,3,3,0,3,3,3,3,3,2,1,1,2,1,3,3,0,3,1,1,1,1,3,2,0,1,0,0,0,0,\r
+0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,\r
+3,3,2,2,2,3,3,3,3,3,3,3,3,3,3,3,1,1,3,1,3,3,2,3,2,2,2,3,0,2,0,0,\r
+0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
+3,3,3,3,3,2,3,3,2,2,3,2,1,1,1,1,1,3,1,3,1,1,0,0,0,1,0,0,0,1,0,0,\r
+0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,\r
+3,3,3,3,3,2,3,2,0,3,2,0,3,0,2,0,0,2,1,3,1,0,0,1,0,0,0,1,0,0,0,0,\r
+0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,\r
+3,3,3,3,2,1,1,1,1,2,1,1,2,1,1,1,2,2,1,2,1,1,1,0,1,1,0,1,0,1,0,0,\r
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,\r
+3,3,3,3,2,1,3,1,1,2,1,3,2,1,1,0,1,2,3,2,1,1,1,0,0,0,0,0,0,0,0,0,\r
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
+2,3,3,3,3,2,2,1,0,1,0,0,1,0,0,0,2,1,0,3,0,0,1,0,0,0,0,0,0,0,0,0,\r
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,\r
+3,3,3,2,3,2,3,3,1,3,2,1,1,1,2,1,1,2,1,3,0,1,0,0,0,1,0,0,0,0,0,0,\r
+0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
+3,1,1,2,2,3,3,2,3,2,2,2,3,1,2,2,1,1,2,1,1,2,2,0,1,1,0,1,0,2,0,0,\r
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
+3,3,3,3,2,1,3,1,0,2,2,1,3,2,1,0,0,2,0,2,0,1,0,0,0,0,0,0,0,1,0,0,\r
+0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,\r
+3,3,3,3,3,3,1,2,0,2,3,1,2,3,2,0,1,3,1,2,1,1,1,0,0,1,0,0,2,2,2,3,\r
+2,2,2,2,1,2,1,1,2,2,1,1,2,0,1,1,1,0,0,1,1,0,0,1,1,0,0,0,1,1,0,1,\r
+3,3,3,3,3,2,1,2,2,1,2,0,2,0,1,0,1,2,1,2,1,1,0,0,0,1,0,1,0,0,0,0,\r
+0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,1,\r
+3,3,2,3,3,1,1,3,1,0,3,2,1,0,0,0,1,2,0,2,0,1,0,0,0,1,0,1,2,1,2,2,\r
+1,1,1,1,1,1,1,2,2,2,1,1,1,1,1,1,1,0,1,2,1,1,1,0,0,0,0,0,1,1,0,0,\r
+3,1,0,1,0,2,3,2,2,2,3,2,2,2,2,2,1,0,2,1,2,1,1,1,0,1,2,1,2,2,2,1,\r
+1,1,2,2,2,2,1,2,1,1,0,1,2,1,2,2,2,1,1,1,0,1,1,1,1,2,0,1,0,0,0,0,\r
+2,3,2,3,3,0,0,2,1,0,2,1,0,0,0,0,2,3,0,2,0,0,0,0,0,1,0,0,2,0,1,2,\r
+2,1,2,1,2,2,1,1,1,2,1,1,1,0,1,2,2,1,1,1,1,1,0,1,1,1,0,0,1,2,0,0,\r
+3,3,2,2,3,0,2,3,1,1,2,0,0,0,1,0,0,2,0,2,0,0,0,1,0,1,0,1,2,0,2,2,\r
+1,1,1,1,2,1,0,1,2,2,2,1,1,1,1,1,1,1,0,1,1,1,0,0,0,0,0,0,1,1,0,0,\r
+2,3,2,3,3,0,0,3,0,1,1,0,1,0,0,0,2,2,1,2,0,0,0,0,0,0,0,0,2,0,1,2,\r
+2,2,1,1,1,1,1,2,2,2,1,0,2,0,1,0,1,0,0,1,0,1,0,0,1,0,0,0,0,1,0,0,\r
+3,3,3,3,2,2,2,2,2,0,2,1,1,1,1,2,1,2,1,1,0,2,0,1,0,1,0,0,2,0,1,2,\r
+1,1,1,1,1,1,1,2,2,1,1,0,2,0,1,0,2,0,0,1,1,1,0,0,2,0,0,0,1,1,0,0,\r
+2,3,3,3,3,1,0,0,0,0,0,0,0,0,0,0,2,0,0,1,1,0,0,0,0,0,0,1,2,0,1,2,\r
+2,2,2,1,1,2,1,1,2,2,2,1,2,0,1,1,1,1,1,1,0,1,1,1,1,0,0,1,1,1,0,0,\r
+2,3,3,3,3,0,2,2,0,2,1,0,0,0,1,1,1,2,0,2,0,0,0,3,0,0,0,0,2,0,2,2,\r
+1,1,1,2,1,2,1,1,2,2,2,1,2,0,1,1,1,0,1,1,1,1,0,2,1,0,0,0,1,1,0,0,\r
+2,3,3,3,3,0,2,1,0,0,2,0,0,0,0,0,1,2,0,2,0,0,0,0,0,0,0,0,2,0,1,2,\r
+1,1,1,2,1,1,1,1,2,2,2,0,1,0,1,1,1,0,0,1,1,1,0,0,1,0,0,0,0,1,0,0,\r
+3,3,2,2,3,0,1,0,1,0,0,0,0,0,0,0,1,1,0,3,0,0,0,0,0,0,0,0,1,0,2,2,\r
+1,1,1,1,1,2,1,1,2,2,1,2,2,1,0,1,1,1,1,1,0,1,0,0,1,0,0,0,1,1,0,0,\r
+3,1,0,1,0,2,2,2,2,3,2,1,1,1,2,3,0,0,1,0,2,1,1,0,1,1,1,1,2,1,1,1,\r
+1,2,2,1,2,1,2,2,1,1,0,1,2,1,2,2,1,1,1,0,0,1,1,1,2,1,0,1,0,0,0,0,\r
+2,1,0,1,0,3,1,2,2,2,2,1,2,2,1,1,1,0,2,1,2,2,1,1,2,1,1,0,2,1,1,1,\r
+1,2,2,2,2,2,2,2,1,2,0,1,1,0,2,1,1,1,1,1,0,0,1,1,1,1,0,1,0,0,0,0,\r
+2,1,1,1,1,2,2,2,2,1,2,2,2,1,2,2,1,1,2,1,2,3,2,2,1,1,1,1,0,1,0,0,\r
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
+2,2,2,3,2,0,1,2,0,1,2,1,1,0,1,0,1,2,1,2,0,0,0,1,1,0,0,0,1,0,0,2,\r
+1,1,0,0,1,1,0,1,1,1,1,0,2,0,1,1,1,0,0,1,1,0,0,0,0,1,0,0,0,1,0,0,\r
+2,0,0,0,0,1,2,2,2,2,2,2,2,1,2,1,1,1,1,1,1,1,0,1,1,1,1,1,2,1,1,1,\r
+1,2,2,2,2,1,1,2,1,2,1,1,1,0,2,1,2,1,1,1,0,2,1,1,1,1,0,1,0,0,0,0,\r
+3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,\r
+1,1,0,1,0,1,1,1,1,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
+2,2,2,3,2,0,0,0,0,1,0,0,0,0,0,0,1,1,0,2,0,0,0,0,0,0,0,0,1,0,1,2,\r
+1,1,1,1,1,1,0,0,2,2,2,2,2,0,1,1,0,1,1,1,1,1,0,0,1,0,0,0,1,1,0,1,\r
+2,3,1,2,1,0,1,1,0,2,2,2,0,0,1,0,0,1,1,1,1,0,0,0,0,0,0,0,1,0,1,2,\r
+1,1,1,1,2,1,1,1,1,1,1,1,1,0,1,1,0,1,0,1,0,1,0,0,1,0,0,0,0,1,0,0,\r
+2,2,2,2,2,0,0,2,0,0,2,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,2,0,2,2,\r
+1,1,1,1,1,0,0,1,2,1,1,0,1,0,1,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,\r
+1,2,2,2,2,0,0,2,0,1,1,0,0,0,1,0,0,2,0,2,0,0,0,0,0,0,0,0,0,0,1,1,\r
+0,0,0,1,1,1,1,1,1,1,1,1,1,0,1,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,\r
+1,2,2,3,2,0,0,1,0,0,1,0,0,0,0,0,0,1,0,2,0,0,0,1,0,0,0,0,0,0,0,2,\r
+1,1,0,0,1,0,0,0,1,1,0,0,1,0,1,1,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,\r
+2,1,2,2,2,1,2,1,2,2,1,1,2,1,1,1,0,1,1,1,1,2,0,1,0,1,1,1,1,0,1,1,\r
+1,1,2,1,1,1,1,1,1,0,0,1,2,1,1,1,1,1,1,0,0,1,1,1,0,0,0,0,0,0,0,0,\r
+1,0,0,1,3,1,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,\r
+0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
+2,2,2,2,1,0,0,1,0,2,0,0,0,0,0,1,1,1,0,1,0,0,0,0,0,0,0,0,2,0,0,1,\r
+0,2,0,1,0,0,1,1,2,0,1,0,1,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,\r
+1,2,2,2,2,0,1,1,0,2,1,0,1,1,1,0,0,1,0,2,0,1,0,0,0,0,0,0,0,0,0,1,\r
+0,1,0,0,1,0,0,0,1,1,0,0,1,0,0,1,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,\r
+2,2,2,2,2,0,0,1,0,0,0,1,0,1,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,1,\r
+0,1,0,1,1,1,0,0,1,1,1,0,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,\r
+2,0,1,0,0,1,2,1,1,1,1,1,1,2,2,1,0,0,1,0,1,0,0,0,0,1,1,1,1,0,0,0,\r
+1,1,2,1,1,1,1,0,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
+2,2,1,2,1,0,0,1,0,0,0,0,0,0,0,0,1,1,0,1,0,0,0,0,0,0,0,0,0,0,0,1,\r
+0,0,0,0,0,0,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
+3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
+1,0,0,1,2,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,\r
+0,1,1,0,1,1,1,0,0,1,0,0,1,0,1,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,\r
+1,0,1,0,0,1,1,1,1,1,1,1,1,1,1,1,0,0,1,0,2,0,0,2,0,1,0,0,1,0,0,1,\r
+1,1,0,0,1,1,0,1,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,\r
+0,0,0,0,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,\r
+1,1,1,1,1,1,1,2,0,0,0,0,0,0,2,1,0,1,1,0,0,1,1,1,0,1,0,0,0,0,0,0,\r
+2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
+1,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,0,1,1,0,1,1,1,1,1,0,1,0,0,\r
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,\r
+)\r
+\r
+Latin5BulgarianModel = {\r
+ 'charToOrderMap': Latin5_BulgarianCharToOrderMap,\r
+ 'precedenceMatrix': BulgarianLangModel,\r
+ 'mTypicalPositiveRatio': 0.969392,\r
+ 'keepEnglishLetter': False,\r
+ 'charsetName': "ISO-8859-5"\r
+}\r
+\r
+Win1251BulgarianModel = {\r
+ 'charToOrderMap': win1251BulgarianCharToOrderMap,\r
+ 'precedenceMatrix': BulgarianLangModel,\r
+ 'mTypicalPositiveRatio': 0.969392,\r
+ 'keepEnglishLetter': False,\r
+ 'charsetName': "windows-1251"\r
+}\r
+\r
+\r
+# flake8: noqa\r
-######################## BEGIN LICENSE BLOCK ########################
-# The Original Code is Mozilla Communicator client code.
-#
-# The Initial Developer of the Original Code is
-# Netscape Communications Corporation.
-# Portions created by the Initial Developer are Copyright (C) 1998
-# the Initial Developer. All Rights Reserved.
-#
-# Contributor(s):
-# Mark Pilgrim - port to Python
-#
-# This library is free software; you can redistribute it and/or
-# modify it under the terms of the GNU Lesser General Public
-# License as published by the Free Software Foundation; either
-# version 2.1 of the License, or (at your option) any later version.
-#
-# This library is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-# Lesser General Public License for more details.
-#
-# You should have received a copy of the GNU Lesser General Public
-# License along with this library; if not, write to the Free Software
-# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
-# 02110-1301 USA
-######################### END LICENSE BLOCK #########################
-
-# KOI8-R language model
-# Character Mapping Table:
-KOI8R_CharToOrderMap = (
-255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00
-255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10
-253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20
-252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30
-253,142,143,144,145,146,147,148,149,150,151,152, 74,153, 75,154, # 40
-155,156,157,158,159,160,161,162,163,164,165,253,253,253,253,253, # 50
-253, 71,172, 66,173, 65,174, 76,175, 64,176,177, 77, 72,178, 69, # 60
- 67,179, 78, 73,180,181, 79,182,183,184,185,253,253,253,253,253, # 70
-191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206, # 80
-207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222, # 90
-223,224,225, 68,226,227,228,229,230,231,232,233,234,235,236,237, # a0
-238,239,240,241,242,243,244,245,246,247,248,249,250,251,252,253, # b0
- 27, 3, 21, 28, 13, 2, 39, 19, 26, 4, 23, 11, 8, 12, 5, 1, # c0
- 15, 16, 9, 7, 6, 14, 24, 10, 17, 18, 20, 25, 30, 29, 22, 54, # d0
- 59, 37, 44, 58, 41, 48, 53, 46, 55, 42, 60, 36, 49, 38, 31, 34, # e0
- 35, 43, 45, 32, 40, 52, 56, 33, 61, 62, 51, 57, 47, 63, 50, 70, # f0
-)
-
-win1251_CharToOrderMap = (
-255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00
-255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10
-253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20
-252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30
-253,142,143,144,145,146,147,148,149,150,151,152, 74,153, 75,154, # 40
-155,156,157,158,159,160,161,162,163,164,165,253,253,253,253,253, # 50
-253, 71,172, 66,173, 65,174, 76,175, 64,176,177, 77, 72,178, 69, # 60
- 67,179, 78, 73,180,181, 79,182,183,184,185,253,253,253,253,253, # 70
-191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,
-207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,
-223,224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,
-239,240,241,242,243,244,245,246, 68,247,248,249,250,251,252,253,
- 37, 44, 33, 46, 41, 48, 56, 51, 42, 60, 36, 49, 38, 31, 34, 35,
- 45, 32, 40, 52, 53, 55, 58, 50, 57, 63, 70, 62, 61, 47, 59, 43,
- 3, 21, 10, 19, 13, 2, 24, 20, 4, 23, 11, 8, 12, 5, 1, 15,
- 9, 7, 6, 14, 39, 26, 28, 22, 25, 29, 54, 18, 17, 30, 27, 16,
-)
-
-latin5_CharToOrderMap = (
-255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00
-255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10
-253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20
-252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30
-253,142,143,144,145,146,147,148,149,150,151,152, 74,153, 75,154, # 40
-155,156,157,158,159,160,161,162,163,164,165,253,253,253,253,253, # 50
-253, 71,172, 66,173, 65,174, 76,175, 64,176,177, 77, 72,178, 69, # 60
- 67,179, 78, 73,180,181, 79,182,183,184,185,253,253,253,253,253, # 70
-191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,
-207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,
-223,224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,
- 37, 44, 33, 46, 41, 48, 56, 51, 42, 60, 36, 49, 38, 31, 34, 35,
- 45, 32, 40, 52, 53, 55, 58, 50, 57, 63, 70, 62, 61, 47, 59, 43,
- 3, 21, 10, 19, 13, 2, 24, 20, 4, 23, 11, 8, 12, 5, 1, 15,
- 9, 7, 6, 14, 39, 26, 28, 22, 25, 29, 54, 18, 17, 30, 27, 16,
-239, 68,240,241,242,243,244,245,246,247,248,249,250,251,252,255,
-)
-
-macCyrillic_CharToOrderMap = (
-255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00
-255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10
-253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20
-252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30
-253,142,143,144,145,146,147,148,149,150,151,152, 74,153, 75,154, # 40
-155,156,157,158,159,160,161,162,163,164,165,253,253,253,253,253, # 50
-253, 71,172, 66,173, 65,174, 76,175, 64,176,177, 77, 72,178, 69, # 60
- 67,179, 78, 73,180,181, 79,182,183,184,185,253,253,253,253,253, # 70
- 37, 44, 33, 46, 41, 48, 56, 51, 42, 60, 36, 49, 38, 31, 34, 35,
- 45, 32, 40, 52, 53, 55, 58, 50, 57, 63, 70, 62, 61, 47, 59, 43,
-191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,
-207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,
-223,224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,
-239,240,241,242,243,244,245,246,247,248,249,250,251,252, 68, 16,
- 3, 21, 10, 19, 13, 2, 24, 20, 4, 23, 11, 8, 12, 5, 1, 15,
- 9, 7, 6, 14, 39, 26, 28, 22, 25, 29, 54, 18, 17, 30, 27,255,
-)
-
-IBM855_CharToOrderMap = (
-255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00
-255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10
-253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20
-252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30
-253,142,143,144,145,146,147,148,149,150,151,152, 74,153, 75,154, # 40
-155,156,157,158,159,160,161,162,163,164,165,253,253,253,253,253, # 50
-253, 71,172, 66,173, 65,174, 76,175, 64,176,177, 77, 72,178, 69, # 60
- 67,179, 78, 73,180,181, 79,182,183,184,185,253,253,253,253,253, # 70
-191,192,193,194, 68,195,196,197,198,199,200,201,202,203,204,205,
-206,207,208,209,210,211,212,213,214,215,216,217, 27, 59, 54, 70,
- 3, 37, 21, 44, 28, 58, 13, 41, 2, 48, 39, 53, 19, 46,218,219,
-220,221,222,223,224, 26, 55, 4, 42,225,226,227,228, 23, 60,229,
-230,231,232,233,234,235, 11, 36,236,237,238,239,240,241,242,243,
- 8, 49, 12, 38, 5, 31, 1, 34, 15,244,245,246,247, 35, 16,248,
- 43, 9, 45, 7, 32, 6, 40, 14, 52, 24, 56, 10, 33, 17, 61,249,
-250, 18, 62, 20, 51, 25, 57, 30, 47, 29, 63, 22, 50,251,252,255,
-)
-
-IBM866_CharToOrderMap = (
-255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00
-255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10
-253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20
-252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30
-253,142,143,144,145,146,147,148,149,150,151,152, 74,153, 75,154, # 40
-155,156,157,158,159,160,161,162,163,164,165,253,253,253,253,253, # 50
-253, 71,172, 66,173, 65,174, 76,175, 64,176,177, 77, 72,178, 69, # 60
- 67,179, 78, 73,180,181, 79,182,183,184,185,253,253,253,253,253, # 70
- 37, 44, 33, 46, 41, 48, 56, 51, 42, 60, 36, 49, 38, 31, 34, 35,
- 45, 32, 40, 52, 53, 55, 58, 50, 57, 63, 70, 62, 61, 47, 59, 43,
- 3, 21, 10, 19, 13, 2, 24, 20, 4, 23, 11, 8, 12, 5, 1, 15,
-191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,
-207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,
-223,224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,
- 9, 7, 6, 14, 39, 26, 28, 22, 25, 29, 54, 18, 17, 30, 27, 16,
-239, 68,240,241,242,243,244,245,246,247,248,249,250,251,252,255,
-)
-
-# Model Table:
-# total sequences: 100%
-# first 512 sequences: 97.6601%
-# first 1024 sequences: 2.3389%
-# rest sequences: 0.1237%
-# negative sequences: 0.0009%
-RussianLangModel = (
-0,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,1,1,3,3,3,3,1,3,3,3,2,3,2,3,3,
-3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,2,2,2,2,2,0,0,2,
-3,3,3,2,3,3,3,3,3,3,3,3,3,3,2,3,3,0,0,3,3,3,3,3,3,3,3,3,2,3,2,0,
-0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-3,3,3,2,2,3,3,3,3,3,3,3,3,3,2,3,3,0,0,3,3,3,3,3,3,3,3,2,3,3,1,0,
-0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-3,2,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,0,0,3,3,3,3,3,3,3,3,3,3,3,2,1,
-0,0,0,0,0,0,0,2,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,0,0,3,3,3,3,3,3,3,3,3,3,3,2,1,
-0,0,0,0,0,1,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-3,3,3,3,3,3,3,3,2,2,2,3,1,3,3,1,3,3,3,3,2,2,3,0,2,2,2,3,3,2,1,0,
-0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,
-3,3,3,3,3,3,2,3,3,3,3,3,2,2,3,2,3,3,3,2,1,2,2,0,1,2,2,2,2,2,2,0,
-0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,
-3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,2,3,0,2,2,3,3,2,1,2,0,
-0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,1,0,0,2,0,0,0,0,0,0,0,0,0,
-3,3,3,3,3,3,2,3,3,1,2,3,2,2,3,2,3,3,3,3,2,2,3,0,3,2,2,3,1,1,1,0,
-0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-3,3,3,3,3,3,3,3,2,2,3,3,3,3,3,2,3,3,3,3,2,2,2,0,3,3,3,2,2,2,2,0,
-0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-3,3,3,3,3,3,3,3,3,3,2,3,2,3,3,3,3,3,3,2,3,2,2,0,1,3,2,1,2,2,1,0,
-0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,
-3,3,3,3,3,3,3,3,3,3,3,2,1,1,3,0,1,1,1,1,2,1,1,0,2,2,2,1,2,0,1,0,
-0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-3,3,3,3,3,3,2,3,3,2,2,2,2,1,3,2,3,2,3,2,1,2,2,0,1,1,2,1,2,1,2,0,
-0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-3,3,3,3,3,3,3,3,3,3,3,3,2,2,3,2,3,3,3,2,2,2,2,0,2,2,2,2,3,1,1,0,
-0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,
-3,2,3,2,2,3,3,3,3,3,3,3,3,3,1,3,2,0,0,3,3,3,3,2,3,3,3,3,2,3,2,0,
-0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-2,3,3,3,3,3,2,2,3,3,0,2,1,0,3,2,3,2,3,0,0,1,2,0,0,1,0,1,2,1,1,0,
-0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-3,0,3,0,2,3,3,3,3,2,3,3,3,3,1,2,2,0,0,2,3,2,2,2,3,2,3,2,2,3,0,0,
-0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-3,2,3,0,2,3,2,3,0,1,2,3,3,2,0,2,3,0,0,2,3,2,2,0,1,3,1,3,2,2,1,0,
-0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-3,1,3,0,2,3,3,3,3,3,3,3,3,2,1,3,2,0,0,2,2,3,3,3,2,3,3,0,2,2,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-3,3,3,3,3,3,2,2,3,3,2,2,2,3,3,0,0,1,1,1,1,1,2,0,0,1,1,1,1,0,1,0,
-0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-3,3,3,3,3,3,2,2,3,3,3,3,3,3,3,0,3,2,3,3,2,3,2,0,2,1,0,1,1,0,1,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,
-3,3,3,3,3,3,2,3,3,3,2,2,2,2,3,1,3,2,3,1,1,2,1,0,2,2,2,2,1,3,1,0,
-0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,
-2,2,3,3,3,3,3,1,2,2,1,3,1,0,3,0,0,3,0,0,0,1,1,0,1,2,1,0,0,0,0,0,
-0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-3,2,2,1,1,3,3,3,2,2,1,2,2,3,1,1,2,0,0,2,2,1,3,0,0,2,1,1,2,1,1,0,
-0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-3,2,3,3,3,3,1,2,2,2,1,2,1,3,3,1,1,2,1,2,1,2,2,0,2,0,0,1,1,0,1,0,
-0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-2,3,3,3,3,3,2,1,3,2,2,3,2,0,3,2,0,3,0,1,0,1,1,0,0,1,1,1,1,0,1,0,
-0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-3,3,2,3,3,3,2,2,2,3,3,1,2,1,2,1,0,1,0,1,1,0,1,0,0,2,1,1,1,0,1,0,
-0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,
-3,1,1,2,1,2,3,3,2,2,1,2,2,3,0,2,1,0,0,2,2,3,2,1,2,2,2,2,2,3,1,0,
-0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-3,3,3,3,3,1,1,0,1,1,2,2,1,1,3,0,0,1,3,1,1,1,0,0,0,1,0,1,1,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-2,1,3,3,3,2,0,0,0,2,1,0,1,0,2,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-2,0,1,0,0,2,3,2,2,2,1,2,2,2,1,2,1,0,0,1,1,1,0,2,0,1,1,1,0,0,1,1,
-1,0,0,0,0,0,1,2,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,
-2,3,3,3,3,0,0,0,0,1,0,0,0,0,3,0,1,2,1,0,0,0,0,0,0,0,1,1,0,0,1,1,
-1,0,1,0,1,2,0,0,1,1,2,1,0,1,1,1,1,0,1,1,1,1,0,1,0,0,1,0,0,1,1,0,
-2,2,3,2,2,2,3,1,2,2,2,2,2,2,2,2,1,1,1,1,1,1,1,0,1,0,1,1,1,0,2,1,
-1,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1,1,1,0,1,0,1,1,0,1,1,1,0,1,1,0,
-3,3,3,2,2,2,2,3,2,2,1,1,2,2,2,2,1,1,3,1,2,1,2,0,0,1,1,0,1,0,2,1,
-1,1,1,1,1,2,1,0,1,1,1,1,0,1,0,0,1,1,0,0,1,0,1,0,0,1,0,0,0,1,1,0,
-2,0,0,1,0,3,2,2,2,2,1,2,1,2,1,2,0,0,0,2,1,2,2,1,1,2,2,0,1,1,0,2,
-1,1,1,1,1,0,1,1,1,2,1,1,1,2,1,0,1,2,1,1,1,1,0,1,1,1,0,0,1,0,0,1,
-1,3,2,2,2,1,1,1,2,3,0,0,0,0,2,0,2,2,1,0,0,0,0,0,0,1,0,0,0,0,1,1,
-1,0,1,1,0,1,0,1,1,0,1,1,0,2,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,1,1,0,
-2,3,2,3,2,1,2,2,2,2,1,0,0,0,2,0,0,1,1,0,0,0,0,0,0,0,1,1,0,0,2,1,
-1,1,2,1,0,2,0,0,1,0,1,0,0,1,0,0,1,1,0,1,1,0,0,0,0,0,1,0,0,0,0,0,
-3,0,0,1,0,2,2,2,3,2,2,2,2,2,2,2,0,0,0,2,1,2,1,1,1,2,2,0,0,0,1,2,
-1,1,1,1,1,0,1,2,1,1,1,1,1,1,1,0,1,1,1,1,1,1,0,1,1,1,1,1,1,0,0,1,
-2,3,2,3,3,2,0,1,1,1,0,0,1,0,2,0,1,1,3,1,0,0,0,0,0,0,0,1,0,0,2,1,
-1,1,1,1,1,1,1,0,1,0,1,1,1,1,0,1,1,1,0,0,1,1,0,1,0,0,0,0,0,0,1,0,
-2,3,3,3,3,1,2,2,2,2,0,1,1,0,2,1,1,1,2,1,0,1,1,0,0,1,0,1,0,0,2,0,
-0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-2,3,3,3,2,0,0,1,1,2,2,1,0,0,2,0,1,1,3,0,0,1,0,0,0,0,0,1,0,1,2,1,
-1,1,2,0,1,1,1,0,1,0,1,1,0,1,0,1,1,1,1,0,1,0,0,0,0,0,0,1,0,1,1,0,
-1,3,2,3,2,1,0,0,2,2,2,0,1,0,2,0,1,1,1,0,1,0,0,0,3,0,1,1,0,0,2,1,
-1,1,1,0,1,1,0,0,0,0,1,1,0,1,0,0,2,1,1,0,1,0,0,0,1,0,1,0,0,1,1,0,
-3,1,2,1,1,2,2,2,2,2,2,1,2,2,1,1,0,0,0,2,2,2,0,0,0,1,2,1,0,1,0,1,
-2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,2,1,1,1,0,1,0,1,1,0,1,1,1,0,0,1,
-3,0,0,0,0,2,0,1,1,1,1,1,1,1,0,1,0,0,0,1,1,1,0,1,0,1,1,0,0,1,0,1,
-1,1,0,0,1,0,0,0,1,0,1,1,0,0,1,0,1,0,1,0,0,0,0,1,0,0,0,1,0,0,0,1,
-1,3,3,2,2,0,0,0,2,2,0,0,0,1,2,0,1,1,2,0,0,0,0,0,0,0,0,1,0,0,2,1,
-0,1,1,0,0,1,1,0,0,0,1,1,0,1,1,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,1,0,
-2,3,2,3,2,0,0,0,0,1,1,0,0,0,2,0,2,0,2,0,0,0,0,0,1,0,0,1,0,0,1,1,
-1,1,2,0,1,2,1,0,1,1,2,1,1,1,1,1,2,1,1,0,1,0,0,1,1,1,1,1,0,1,1,0,
-1,3,2,2,2,1,0,0,2,2,1,0,1,2,2,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,
-0,0,1,1,0,1,1,0,0,1,1,0,1,1,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,
-1,0,0,1,0,2,3,1,2,2,2,2,2,2,1,1,0,0,0,1,0,1,0,2,1,1,1,0,0,0,0,1,
-1,1,0,1,1,0,1,1,1,1,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,
-2,0,2,0,0,1,0,3,2,1,2,1,2,2,0,1,0,0,0,2,1,0,0,2,1,1,1,1,0,2,0,2,
-2,1,1,1,1,1,1,1,1,1,1,1,1,2,1,0,1,1,1,1,0,0,0,1,1,1,1,0,1,0,0,1,
-1,2,2,2,2,1,0,0,1,0,0,0,0,0,2,0,1,1,1,1,0,0,0,0,1,0,1,2,0,0,2,0,
-1,0,1,1,1,2,1,0,1,0,1,1,0,0,1,0,1,1,1,0,1,0,0,0,1,0,0,1,0,1,1,0,
-2,1,2,2,2,0,3,0,1,1,0,0,0,0,2,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,
-0,0,0,1,1,1,0,0,1,0,1,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,
-1,2,2,3,2,2,0,0,1,1,2,0,1,2,1,0,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,1,
-0,1,1,0,0,1,1,0,0,1,1,0,0,1,1,0,1,1,0,0,1,0,0,0,0,0,0,0,0,1,1,0,
-2,2,1,1,2,1,2,2,2,2,2,1,2,2,0,1,0,0,0,1,2,2,2,1,2,1,1,1,1,1,2,1,
-1,1,1,1,1,1,1,1,1,1,0,0,1,1,1,0,1,1,1,0,0,0,0,1,1,1,0,1,1,0,0,1,
-1,2,2,2,2,0,1,0,2,2,0,0,0,0,2,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,2,0,
-0,0,1,0,0,1,0,0,0,0,1,0,1,1,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,
-0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-1,2,2,2,2,0,0,0,2,2,2,0,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,1,
-0,1,1,0,0,1,1,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-1,2,2,2,2,0,0,0,0,1,0,0,1,1,2,0,0,0,0,1,0,1,0,0,1,0,0,2,0,0,0,1,
-0,0,1,0,0,1,0,0,0,1,1,0,0,0,0,0,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,
-1,2,2,2,1,1,2,0,2,1,1,1,1,0,2,2,0,0,0,0,0,0,0,0,0,1,1,0,0,0,1,1,
-0,0,1,0,1,1,0,0,0,0,1,0,0,0,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,
-1,0,2,1,2,0,0,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,
-0,0,1,0,1,1,0,0,0,0,1,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,
-1,0,0,0,0,2,0,1,2,1,0,1,1,1,0,1,0,0,0,1,0,1,0,0,1,0,1,0,0,0,0,1,
-0,0,0,0,0,1,0,0,1,1,0,0,1,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,
-2,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,
-1,0,0,0,1,0,0,0,1,1,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,0,0,0,0,
-2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,
-1,1,1,0,1,0,1,0,0,1,1,1,1,0,0,0,1,0,0,0,0,1,0,0,0,1,0,1,0,0,0,0,
-1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,
-1,1,0,1,1,0,1,0,1,0,0,0,0,1,1,0,1,1,0,0,0,0,0,1,0,1,1,0,1,0,0,0,
-0,1,1,1,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,
-)
-
-Koi8rModel = {
- 'charToOrderMap': KOI8R_CharToOrderMap,
- 'precedenceMatrix': RussianLangModel,
- 'mTypicalPositiveRatio': 0.976601,
- 'keepEnglishLetter': False,
- 'charsetName': "KOI8-R"
-}
-
-Win1251CyrillicModel = {
- 'charToOrderMap': win1251_CharToOrderMap,
- 'precedenceMatrix': RussianLangModel,
- 'mTypicalPositiveRatio': 0.976601,
- 'keepEnglishLetter': False,
- 'charsetName': "windows-1251"
-}
-
-Latin5CyrillicModel = {
- 'charToOrderMap': latin5_CharToOrderMap,
- 'precedenceMatrix': RussianLangModel,
- 'mTypicalPositiveRatio': 0.976601,
- 'keepEnglishLetter': False,
- 'charsetName': "ISO-8859-5"
-}
-
-MacCyrillicModel = {
- 'charToOrderMap': macCyrillic_CharToOrderMap,
- 'precedenceMatrix': RussianLangModel,
- 'mTypicalPositiveRatio': 0.976601,
- 'keepEnglishLetter': False,
- 'charsetName': "MacCyrillic"
-};
-
-Ibm866Model = {
- 'charToOrderMap': IBM866_CharToOrderMap,
- 'precedenceMatrix': RussianLangModel,
- 'mTypicalPositiveRatio': 0.976601,
- 'keepEnglishLetter': False,
- 'charsetName': "IBM866"
-}
-
-Ibm855Model = {
- 'charToOrderMap': IBM855_CharToOrderMap,
- 'precedenceMatrix': RussianLangModel,
- 'mTypicalPositiveRatio': 0.976601,
- 'keepEnglishLetter': False,
- 'charsetName': "IBM855"
-}
-
-# flake8: noqa
+######################## BEGIN LICENSE BLOCK ########################\r
+# The Original Code is Mozilla Communicator client code.\r
+#\r
+# The Initial Developer of the Original Code is\r
+# Netscape Communications Corporation.\r
+# Portions created by the Initial Developer are Copyright (C) 1998\r
+# the Initial Developer. All Rights Reserved.\r
+#\r
+# Contributor(s):\r
+# Mark Pilgrim - port to Python\r
+#\r
+# This library is free software; you can redistribute it and/or\r
+# modify it under the terms of the GNU Lesser General Public\r
+# License as published by the Free Software Foundation; either\r
+# version 2.1 of the License, or (at your option) any later version.\r
+#\r
+# This library is distributed in the hope that it will be useful,\r
+# but WITHOUT ANY WARRANTY; without even the implied warranty of\r
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU\r
+# Lesser General Public License for more details.\r
+#\r
+# You should have received a copy of the GNU Lesser General Public\r
+# License along with this library; if not, write to the Free Software\r
+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA\r
+# 02110-1301 USA\r
+######################### END LICENSE BLOCK #########################\r
+\r
+from . import constants\r
+\r
+# KOI8-R language model\r
+# Character Mapping Table:\r
+KOI8R_CharToOrderMap = (\r
+255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00\r
+255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10\r
+253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20\r
+252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30\r
+253,142,143,144,145,146,147,148,149,150,151,152, 74,153, 75,154, # 40\r
+155,156,157,158,159,160,161,162,163,164,165,253,253,253,253,253, # 50\r
+253, 71,172, 66,173, 65,174, 76,175, 64,176,177, 77, 72,178, 69, # 60\r
+ 67,179, 78, 73,180,181, 79,182,183,184,185,253,253,253,253,253, # 70\r
+191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206, # 80\r
+207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222, # 90\r
+223,224,225, 68,226,227,228,229,230,231,232,233,234,235,236,237, # a0\r
+238,239,240,241,242,243,244,245,246,247,248,249,250,251,252,253, # b0\r
+ 27, 3, 21, 28, 13, 2, 39, 19, 26, 4, 23, 11, 8, 12, 5, 1, # c0\r
+ 15, 16, 9, 7, 6, 14, 24, 10, 17, 18, 20, 25, 30, 29, 22, 54, # d0\r
+ 59, 37, 44, 58, 41, 48, 53, 46, 55, 42, 60, 36, 49, 38, 31, 34, # e0\r
+ 35, 43, 45, 32, 40, 52, 56, 33, 61, 62, 51, 57, 47, 63, 50, 70, # f0\r
+)\r
+\r
+win1251_CharToOrderMap = (\r
+255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00\r
+255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10\r
+253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20\r
+252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30\r
+253,142,143,144,145,146,147,148,149,150,151,152, 74,153, 75,154, # 40\r
+155,156,157,158,159,160,161,162,163,164,165,253,253,253,253,253, # 50\r
+253, 71,172, 66,173, 65,174, 76,175, 64,176,177, 77, 72,178, 69, # 60\r
+ 67,179, 78, 73,180,181, 79,182,183,184,185,253,253,253,253,253, # 70\r
+191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,\r
+207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,\r
+223,224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,\r
+239,240,241,242,243,244,245,246, 68,247,248,249,250,251,252,253,\r
+ 37, 44, 33, 46, 41, 48, 56, 51, 42, 60, 36, 49, 38, 31, 34, 35,\r
+ 45, 32, 40, 52, 53, 55, 58, 50, 57, 63, 70, 62, 61, 47, 59, 43,\r
+ 3, 21, 10, 19, 13, 2, 24, 20, 4, 23, 11, 8, 12, 5, 1, 15,\r
+ 9, 7, 6, 14, 39, 26, 28, 22, 25, 29, 54, 18, 17, 30, 27, 16,\r
+)\r
+\r
+latin5_CharToOrderMap = (\r
+255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00\r
+255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10\r
+253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20\r
+252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30\r
+253,142,143,144,145,146,147,148,149,150,151,152, 74,153, 75,154, # 40\r
+155,156,157,158,159,160,161,162,163,164,165,253,253,253,253,253, # 50\r
+253, 71,172, 66,173, 65,174, 76,175, 64,176,177, 77, 72,178, 69, # 60\r
+ 67,179, 78, 73,180,181, 79,182,183,184,185,253,253,253,253,253, # 70\r
+191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,\r
+207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,\r
+223,224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,\r
+ 37, 44, 33, 46, 41, 48, 56, 51, 42, 60, 36, 49, 38, 31, 34, 35,\r
+ 45, 32, 40, 52, 53, 55, 58, 50, 57, 63, 70, 62, 61, 47, 59, 43,\r
+ 3, 21, 10, 19, 13, 2, 24, 20, 4, 23, 11, 8, 12, 5, 1, 15,\r
+ 9, 7, 6, 14, 39, 26, 28, 22, 25, 29, 54, 18, 17, 30, 27, 16,\r
+239, 68,240,241,242,243,244,245,246,247,248,249,250,251,252,255,\r
+)\r
+\r
+macCyrillic_CharToOrderMap = (\r
+255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00\r
+255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10\r
+253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20\r
+252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30\r
+253,142,143,144,145,146,147,148,149,150,151,152, 74,153, 75,154, # 40\r
+155,156,157,158,159,160,161,162,163,164,165,253,253,253,253,253, # 50\r
+253, 71,172, 66,173, 65,174, 76,175, 64,176,177, 77, 72,178, 69, # 60\r
+ 67,179, 78, 73,180,181, 79,182,183,184,185,253,253,253,253,253, # 70\r
+ 37, 44, 33, 46, 41, 48, 56, 51, 42, 60, 36, 49, 38, 31, 34, 35,\r
+ 45, 32, 40, 52, 53, 55, 58, 50, 57, 63, 70, 62, 61, 47, 59, 43,\r
+191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,\r
+207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,\r
+223,224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,\r
+239,240,241,242,243,244,245,246,247,248,249,250,251,252, 68, 16,\r
+ 3, 21, 10, 19, 13, 2, 24, 20, 4, 23, 11, 8, 12, 5, 1, 15,\r
+ 9, 7, 6, 14, 39, 26, 28, 22, 25, 29, 54, 18, 17, 30, 27,255,\r
+)\r
+\r
+IBM855_CharToOrderMap = (\r
+255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00\r
+255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10\r
+253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20\r
+252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30\r
+253,142,143,144,145,146,147,148,149,150,151,152, 74,153, 75,154, # 40\r
+155,156,157,158,159,160,161,162,163,164,165,253,253,253,253,253, # 50\r
+253, 71,172, 66,173, 65,174, 76,175, 64,176,177, 77, 72,178, 69, # 60\r
+ 67,179, 78, 73,180,181, 79,182,183,184,185,253,253,253,253,253, # 70\r
+191,192,193,194, 68,195,196,197,198,199,200,201,202,203,204,205,\r
+206,207,208,209,210,211,212,213,214,215,216,217, 27, 59, 54, 70,\r
+ 3, 37, 21, 44, 28, 58, 13, 41, 2, 48, 39, 53, 19, 46,218,219,\r
+220,221,222,223,224, 26, 55, 4, 42,225,226,227,228, 23, 60,229,\r
+230,231,232,233,234,235, 11, 36,236,237,238,239,240,241,242,243,\r
+ 8, 49, 12, 38, 5, 31, 1, 34, 15,244,245,246,247, 35, 16,248,\r
+ 43, 9, 45, 7, 32, 6, 40, 14, 52, 24, 56, 10, 33, 17, 61,249,\r
+250, 18, 62, 20, 51, 25, 57, 30, 47, 29, 63, 22, 50,251,252,255,\r
+)\r
+\r
+IBM866_CharToOrderMap = (\r
+255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00\r
+255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10\r
+253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20\r
+252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30\r
+253,142,143,144,145,146,147,148,149,150,151,152, 74,153, 75,154, # 40\r
+155,156,157,158,159,160,161,162,163,164,165,253,253,253,253,253, # 50\r
+253, 71,172, 66,173, 65,174, 76,175, 64,176,177, 77, 72,178, 69, # 60\r
+ 67,179, 78, 73,180,181, 79,182,183,184,185,253,253,253,253,253, # 70\r
+ 37, 44, 33, 46, 41, 48, 56, 51, 42, 60, 36, 49, 38, 31, 34, 35,\r
+ 45, 32, 40, 52, 53, 55, 58, 50, 57, 63, 70, 62, 61, 47, 59, 43,\r
+ 3, 21, 10, 19, 13, 2, 24, 20, 4, 23, 11, 8, 12, 5, 1, 15,\r
+191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,\r
+207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,\r
+223,224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,\r
+ 9, 7, 6, 14, 39, 26, 28, 22, 25, 29, 54, 18, 17, 30, 27, 16,\r
+239, 68,240,241,242,243,244,245,246,247,248,249,250,251,252,255,\r
+)\r
+\r
+# Model Table:\r
+# total sequences: 100%\r
+# first 512 sequences: 97.6601%\r
+# first 1024 sequences: 2.3389%\r
+# rest sequences: 0.1237%\r
+# negative sequences: 0.0009%\r
+RussianLangModel = (\r
+0,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,1,1,3,3,3,3,1,3,3,3,2,3,2,3,3,\r
+3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,2,2,2,2,2,0,0,2,\r
+3,3,3,2,3,3,3,3,3,3,3,3,3,3,2,3,3,0,0,3,3,3,3,3,3,3,3,3,2,3,2,0,\r
+0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
+3,3,3,2,2,3,3,3,3,3,3,3,3,3,2,3,3,0,0,3,3,3,3,3,3,3,3,2,3,3,1,0,\r
+0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
+3,2,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,0,0,3,3,3,3,3,3,3,3,3,3,3,2,1,\r
+0,0,0,0,0,0,0,2,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
+3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,0,0,3,3,3,3,3,3,3,3,3,3,3,2,1,\r
+0,0,0,0,0,1,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
+3,3,3,3,3,3,3,3,2,2,2,3,1,3,3,1,3,3,3,3,2,2,3,0,2,2,2,3,3,2,1,0,\r
+0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,\r
+3,3,3,3,3,3,2,3,3,3,3,3,2,2,3,2,3,3,3,2,1,2,2,0,1,2,2,2,2,2,2,0,\r
+0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,\r
+3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,2,3,0,2,2,3,3,2,1,2,0,\r
+0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,1,0,0,2,0,0,0,0,0,0,0,0,0,\r
+3,3,3,3,3,3,2,3,3,1,2,3,2,2,3,2,3,3,3,3,2,2,3,0,3,2,2,3,1,1,1,0,\r
+0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
+3,3,3,3,3,3,3,3,2,2,3,3,3,3,3,2,3,3,3,3,2,2,2,0,3,3,3,2,2,2,2,0,\r
+0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
+3,3,3,3,3,3,3,3,3,3,2,3,2,3,3,3,3,3,3,2,3,2,2,0,1,3,2,1,2,2,1,0,\r
+0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,\r
+3,3,3,3,3,3,3,3,3,3,3,2,1,1,3,0,1,1,1,1,2,1,1,0,2,2,2,1,2,0,1,0,\r
+0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
+3,3,3,3,3,3,2,3,3,2,2,2,2,1,3,2,3,2,3,2,1,2,2,0,1,1,2,1,2,1,2,0,\r
+0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
+3,3,3,3,3,3,3,3,3,3,3,3,2,2,3,2,3,3,3,2,2,2,2,0,2,2,2,2,3,1,1,0,\r
+0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,\r
+3,2,3,2,2,3,3,3,3,3,3,3,3,3,1,3,2,0,0,3,3,3,3,2,3,3,3,3,2,3,2,0,\r
+0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
+2,3,3,3,3,3,2,2,3,3,0,2,1,0,3,2,3,2,3,0,0,1,2,0,0,1,0,1,2,1,1,0,\r
+0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
+3,0,3,0,2,3,3,3,3,2,3,3,3,3,1,2,2,0,0,2,3,2,2,2,3,2,3,2,2,3,0,0,\r
+0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
+3,2,3,0,2,3,2,3,0,1,2,3,3,2,0,2,3,0,0,2,3,2,2,0,1,3,1,3,2,2,1,0,\r
+0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
+3,1,3,0,2,3,3,3,3,3,3,3,3,2,1,3,2,0,0,2,2,3,3,3,2,3,3,0,2,2,0,0,\r
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
+3,3,3,3,3,3,2,2,3,3,2,2,2,3,3,0,0,1,1,1,1,1,2,0,0,1,1,1,1,0,1,0,\r
+0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
+3,3,3,3,3,3,2,2,3,3,3,3,3,3,3,0,3,2,3,3,2,3,2,0,2,1,0,1,1,0,1,0,\r
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,\r
+3,3,3,3,3,3,2,3,3,3,2,2,2,2,3,1,3,2,3,1,1,2,1,0,2,2,2,2,1,3,1,0,\r
+0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,\r
+2,2,3,3,3,3,3,1,2,2,1,3,1,0,3,0,0,3,0,0,0,1,1,0,1,2,1,0,0,0,0,0,\r
+0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
+3,2,2,1,1,3,3,3,2,2,1,2,2,3,1,1,2,0,0,2,2,1,3,0,0,2,1,1,2,1,1,0,\r
+0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
+3,2,3,3,3,3,1,2,2,2,1,2,1,3,3,1,1,2,1,2,1,2,2,0,2,0,0,1,1,0,1,0,\r
+0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
+2,3,3,3,3,3,2,1,3,2,2,3,2,0,3,2,0,3,0,1,0,1,1,0,0,1,1,1,1,0,1,0,\r
+0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
+3,3,2,3,3,3,2,2,2,3,3,1,2,1,2,1,0,1,0,1,1,0,1,0,0,2,1,1,1,0,1,0,\r
+0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,\r
+3,1,1,2,1,2,3,3,2,2,1,2,2,3,0,2,1,0,0,2,2,3,2,1,2,2,2,2,2,3,1,0,\r
+0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
+3,3,3,3,3,1,1,0,1,1,2,2,1,1,3,0,0,1,3,1,1,1,0,0,0,1,0,1,1,0,0,0,\r
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
+2,1,3,3,3,2,0,0,0,2,1,0,1,0,2,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
+2,0,1,0,0,2,3,2,2,2,1,2,2,2,1,2,1,0,0,1,1,1,0,2,0,1,1,1,0,0,1,1,\r
+1,0,0,0,0,0,1,2,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,\r
+2,3,3,3,3,0,0,0,0,1,0,0,0,0,3,0,1,2,1,0,0,0,0,0,0,0,1,1,0,0,1,1,\r
+1,0,1,0,1,2,0,0,1,1,2,1,0,1,1,1,1,0,1,1,1,1,0,1,0,0,1,0,0,1,1,0,\r
+2,2,3,2,2,2,3,1,2,2,2,2,2,2,2,2,1,1,1,1,1,1,1,0,1,0,1,1,1,0,2,1,\r
+1,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1,1,1,0,1,0,1,1,0,1,1,1,0,1,1,0,\r
+3,3,3,2,2,2,2,3,2,2,1,1,2,2,2,2,1,1,3,1,2,1,2,0,0,1,1,0,1,0,2,1,\r
+1,1,1,1,1,2,1,0,1,1,1,1,0,1,0,0,1,1,0,0,1,0,1,0,0,1,0,0,0,1,1,0,\r
+2,0,0,1,0,3,2,2,2,2,1,2,1,2,1,2,0,0,0,2,1,2,2,1,1,2,2,0,1,1,0,2,\r
+1,1,1,1,1,0,1,1,1,2,1,1,1,2,1,0,1,2,1,1,1,1,0,1,1,1,0,0,1,0,0,1,\r
+1,3,2,2,2,1,1,1,2,3,0,0,0,0,2,0,2,2,1,0,0,0,0,0,0,1,0,0,0,0,1,1,\r
+1,0,1,1,0,1,0,1,1,0,1,1,0,2,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,1,1,0,\r
+2,3,2,3,2,1,2,2,2,2,1,0,0,0,2,0,0,1,1,0,0,0,0,0,0,0,1,1,0,0,2,1,\r
+1,1,2,1,0,2,0,0,1,0,1,0,0,1,0,0,1,1,0,1,1,0,0,0,0,0,1,0,0,0,0,0,\r
+3,0,0,1,0,2,2,2,3,2,2,2,2,2,2,2,0,0,0,2,1,2,1,1,1,2,2,0,0,0,1,2,\r
+1,1,1,1,1,0,1,2,1,1,1,1,1,1,1,0,1,1,1,1,1,1,0,1,1,1,1,1,1,0,0,1,\r
+2,3,2,3,3,2,0,1,1,1,0,0,1,0,2,0,1,1,3,1,0,0,0,0,0,0,0,1,0,0,2,1,\r
+1,1,1,1,1,1,1,0,1,0,1,1,1,1,0,1,1,1,0,0,1,1,0,1,0,0,0,0,0,0,1,0,\r
+2,3,3,3,3,1,2,2,2,2,0,1,1,0,2,1,1,1,2,1,0,1,1,0,0,1,0,1,0,0,2,0,\r
+0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
+2,3,3,3,2,0,0,1,1,2,2,1,0,0,2,0,1,1,3,0,0,1,0,0,0,0,0,1,0,1,2,1,\r
+1,1,2,0,1,1,1,0,1,0,1,1,0,1,0,1,1,1,1,0,1,0,0,0,0,0,0,1,0,1,1,0,\r
+1,3,2,3,2,1,0,0,2,2,2,0,1,0,2,0,1,1,1,0,1,0,0,0,3,0,1,1,0,0,2,1,\r
+1,1,1,0,1,1,0,0,0,0,1,1,0,1,0,0,2,1,1,0,1,0,0,0,1,0,1,0,0,1,1,0,\r
+3,1,2,1,1,2,2,2,2,2,2,1,2,2,1,1,0,0,0,2,2,2,0,0,0,1,2,1,0,1,0,1,\r
+2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,2,1,1,1,0,1,0,1,1,0,1,1,1,0,0,1,\r
+3,0,0,0,0,2,0,1,1,1,1,1,1,1,0,1,0,0,0,1,1,1,0,1,0,1,1,0,0,1,0,1,\r
+1,1,0,0,1,0,0,0,1,0,1,1,0,0,1,0,1,0,1,0,0,0,0,1,0,0,0,1,0,0,0,1,\r
+1,3,3,2,2,0,0,0,2,2,0,0,0,1,2,0,1,1,2,0,0,0,0,0,0,0,0,1,0,0,2,1,\r
+0,1,1,0,0,1,1,0,0,0,1,1,0,1,1,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,1,0,\r
+2,3,2,3,2,0,0,0,0,1,1,0,0,0,2,0,2,0,2,0,0,0,0,0,1,0,0,1,0,0,1,1,\r
+1,1,2,0,1,2,1,0,1,1,2,1,1,1,1,1,2,1,1,0,1,0,0,1,1,1,1,1,0,1,1,0,\r
+1,3,2,2,2,1,0,0,2,2,1,0,1,2,2,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,\r
+0,0,1,1,0,1,1,0,0,1,1,0,1,1,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,\r
+1,0,0,1,0,2,3,1,2,2,2,2,2,2,1,1,0,0,0,1,0,1,0,2,1,1,1,0,0,0,0,1,\r
+1,1,0,1,1,0,1,1,1,1,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,\r
+2,0,2,0,0,1,0,3,2,1,2,1,2,2,0,1,0,0,0,2,1,0,0,2,1,1,1,1,0,2,0,2,\r
+2,1,1,1,1,1,1,1,1,1,1,1,1,2,1,0,1,1,1,1,0,0,0,1,1,1,1,0,1,0,0,1,\r
+1,2,2,2,2,1,0,0,1,0,0,0,0,0,2,0,1,1,1,1,0,0,0,0,1,0,1,2,0,0,2,0,\r
+1,0,1,1,1,2,1,0,1,0,1,1,0,0,1,0,1,1,1,0,1,0,0,0,1,0,0,1,0,1,1,0,\r
+2,1,2,2,2,0,3,0,1,1,0,0,0,0,2,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,\r
+0,0,0,1,1,1,0,0,1,0,1,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,\r
+1,2,2,3,2,2,0,0,1,1,2,0,1,2,1,0,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,1,\r
+0,1,1,0,0,1,1,0,0,1,1,0,0,1,1,0,1,1,0,0,1,0,0,0,0,0,0,0,0,1,1,0,\r
+2,2,1,1,2,1,2,2,2,2,2,1,2,2,0,1,0,0,0,1,2,2,2,1,2,1,1,1,1,1,2,1,\r
+1,1,1,1,1,1,1,1,1,1,0,0,1,1,1,0,1,1,1,0,0,0,0,1,1,1,0,1,1,0,0,1,\r
+1,2,2,2,2,0,1,0,2,2,0,0,0,0,2,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,2,0,\r
+0,0,1,0,0,1,0,0,0,0,1,0,1,1,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,\r
+0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,\r
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
+1,2,2,2,2,0,0,0,2,2,2,0,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,1,\r
+0,1,1,0,0,1,1,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
+1,2,2,2,2,0,0,0,0,1,0,0,1,1,2,0,0,0,0,1,0,1,0,0,1,0,0,2,0,0,0,1,\r
+0,0,1,0,0,1,0,0,0,1,1,0,0,0,0,0,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,\r
+1,2,2,2,1,1,2,0,2,1,1,1,1,0,2,2,0,0,0,0,0,0,0,0,0,1,1,0,0,0,1,1,\r
+0,0,1,0,1,1,0,0,0,0,1,0,0,0,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,\r
+1,0,2,1,2,0,0,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,\r
+0,0,1,0,1,1,0,0,0,0,1,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,\r
+1,0,0,0,0,2,0,1,2,1,0,1,1,1,0,1,0,0,0,1,0,1,0,0,1,0,1,0,0,0,0,1,\r
+0,0,0,0,0,1,0,0,1,1,0,0,1,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,\r
+2,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,\r
+1,0,0,0,1,0,0,0,1,1,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,0,0,0,0,\r
+2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,\r
+1,1,1,0,1,0,1,0,0,1,1,1,1,0,0,0,1,0,0,0,0,1,0,0,0,1,0,1,0,0,0,0,\r
+1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,\r
+1,1,0,1,1,0,1,0,1,0,0,0,0,1,1,0,1,1,0,0,0,0,0,1,0,1,1,0,1,0,0,0,\r
+0,1,1,1,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
+0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,\r
+)\r
+\r
+Koi8rModel = {\r
+ 'charToOrderMap': KOI8R_CharToOrderMap,\r
+ 'precedenceMatrix': RussianLangModel,\r
+ 'mTypicalPositiveRatio': 0.976601,\r
+ 'keepEnglishLetter': False,\r
+ 'charsetName': "KOI8-R"\r
+}\r
+\r
+Win1251CyrillicModel = {\r
+ 'charToOrderMap': win1251_CharToOrderMap,\r
+ 'precedenceMatrix': RussianLangModel,\r
+ 'mTypicalPositiveRatio': 0.976601,\r
+ 'keepEnglishLetter': False,\r
+ 'charsetName': "windows-1251"\r
+}\r
+\r
+Latin5CyrillicModel = {\r
+ 'charToOrderMap': latin5_CharToOrderMap,\r
+ 'precedenceMatrix': RussianLangModel,\r
+ 'mTypicalPositiveRatio': 0.976601,\r
+ 'keepEnglishLetter': False,\r
+ 'charsetName': "ISO-8859-5"\r
+}\r
+\r
+MacCyrillicModel = {\r
+ 'charToOrderMap': macCyrillic_CharToOrderMap,\r
+ 'precedenceMatrix': RussianLangModel,\r
+ 'mTypicalPositiveRatio': 0.976601,\r
+ 'keepEnglishLetter': False,\r
+ 'charsetName': "MacCyrillic"\r
+};\r
+\r
+Ibm866Model = {\r
+ 'charToOrderMap': IBM866_CharToOrderMap,\r
+ 'precedenceMatrix': RussianLangModel,\r
+ 'mTypicalPositiveRatio': 0.976601,\r
+ 'keepEnglishLetter': False,\r
+ 'charsetName': "IBM866"\r
+}\r
+\r
+Ibm855Model = {\r
+ 'charToOrderMap': IBM855_CharToOrderMap,\r
+ 'precedenceMatrix': RussianLangModel,\r
+ 'mTypicalPositiveRatio': 0.976601,\r
+ 'keepEnglishLetter': False,\r
+ 'charsetName': "IBM855"\r
+}\r
+\r
+# flake8: noqa\r
-######################## BEGIN LICENSE BLOCK ########################
-# The Original Code is Mozilla Communicator client code.
-#
-# The Initial Developer of the Original Code is
-# Netscape Communications Corporation.
-# Portions created by the Initial Developer are Copyright (C) 1998
-# the Initial Developer. All Rights Reserved.
-#
-# Contributor(s):
-# Mark Pilgrim - port to Python
-#
-# This library is free software; you can redistribute it and/or
-# modify it under the terms of the GNU Lesser General Public
-# License as published by the Free Software Foundation; either
-# version 2.1 of the License, or (at your option) any later version.
-#
-# This library is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-# Lesser General Public License for more details.
-#
-# You should have received a copy of the GNU Lesser General Public
-# License along with this library; if not, write to the Free Software
-# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
-# 02110-1301 USA
-######################### END LICENSE BLOCK #########################
-
-# 255: Control characters that usually does not exist in any text
-# 254: Carriage/Return
-# 253: symbol (punctuation) that does not belong to word
-# 252: 0 - 9
-
-# Character Mapping Table:
-Latin7_CharToOrderMap = (
-255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00
-255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10
-253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20
-252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30
-253, 82,100,104, 94, 98,101,116,102,111,187,117, 92, 88,113, 85, # 40
- 79,118,105, 83, 67,114,119, 95, 99,109,188,253,253,253,253,253, # 50
-253, 72, 70, 80, 81, 60, 96, 93, 89, 68,120, 97, 77, 86, 69, 55, # 60
- 78,115, 65, 66, 58, 76,106,103, 87,107,112,253,253,253,253,253, # 70
-255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 80
-255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 90
-253,233, 90,253,253,253,253,253,253,253,253,253,253, 74,253,253, # a0
-253,253,253,253,247,248, 61, 36, 46, 71, 73,253, 54,253,108,123, # b0
-110, 31, 51, 43, 41, 34, 91, 40, 52, 47, 44, 53, 38, 49, 59, 39, # c0
- 35, 48,250, 37, 33, 45, 56, 50, 84, 57,120,121, 17, 18, 22, 15, # d0
-124, 1, 29, 20, 21, 3, 32, 13, 25, 5, 11, 16, 10, 6, 30, 4, # e0
- 9, 8, 14, 7, 2, 12, 28, 23, 42, 24, 64, 75, 19, 26, 27,253, # f0
-)
-
-win1253_CharToOrderMap = (
-255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00
-255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10
-253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20
-252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30
-253, 82,100,104, 94, 98,101,116,102,111,187,117, 92, 88,113, 85, # 40
- 79,118,105, 83, 67,114,119, 95, 99,109,188,253,253,253,253,253, # 50
-253, 72, 70, 80, 81, 60, 96, 93, 89, 68,120, 97, 77, 86, 69, 55, # 60
- 78,115, 65, 66, 58, 76,106,103, 87,107,112,253,253,253,253,253, # 70
-255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 80
-255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 90
-253,233, 61,253,253,253,253,253,253,253,253,253,253, 74,253,253, # a0
-253,253,253,253,247,253,253, 36, 46, 71, 73,253, 54,253,108,123, # b0
-110, 31, 51, 43, 41, 34, 91, 40, 52, 47, 44, 53, 38, 49, 59, 39, # c0
- 35, 48,250, 37, 33, 45, 56, 50, 84, 57,120,121, 17, 18, 22, 15, # d0
-124, 1, 29, 20, 21, 3, 32, 13, 25, 5, 11, 16, 10, 6, 30, 4, # e0
- 9, 8, 14, 7, 2, 12, 28, 23, 42, 24, 64, 75, 19, 26, 27,253, # f0
-)
-
-# Model Table:
-# total sequences: 100%
-# first 512 sequences: 98.2851%
-# first 1024 sequences:1.7001%
-# rest sequences: 0.0359%
-# negative sequences: 0.0148%
-GreekLangModel = (
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,3,2,2,3,3,3,3,3,3,3,3,1,3,3,3,0,2,2,3,3,0,3,0,3,2,0,3,3,3,0,
-3,0,0,0,2,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,3,3,3,3,3,0,3,3,0,3,2,3,3,0,3,2,3,3,3,0,0,3,0,3,0,3,3,2,0,0,0,
-2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,
-0,2,3,2,2,3,3,3,3,3,3,3,3,0,3,3,3,3,0,2,3,3,0,3,3,3,3,2,3,3,3,0,
-2,0,0,0,2,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,2,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,0,2,1,3,3,3,3,2,3,3,2,3,3,2,0,
-0,0,0,0,2,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,3,3,3,3,0,3,3,3,3,3,3,0,3,3,0,3,3,3,3,3,3,3,3,3,3,0,3,2,3,3,0,
-2,0,1,0,2,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,
-0,3,3,3,3,3,2,3,0,0,0,0,3,3,0,3,1,3,3,3,0,3,3,0,3,3,3,3,0,0,0,0,
-2,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,3,3,3,3,3,0,3,0,3,3,3,3,3,0,3,2,2,2,3,0,2,3,3,3,3,3,2,3,3,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,3,3,3,3,3,3,2,2,2,3,3,3,3,0,3,1,3,3,3,3,2,3,3,3,3,3,3,3,2,2,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,3,3,3,3,3,2,0,3,0,0,0,3,3,2,3,3,3,3,3,0,0,3,2,3,0,2,3,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,3,0,3,3,3,3,0,0,3,3,0,2,3,0,3,0,3,3,3,0,0,3,0,3,0,2,2,3,3,0,0,
-0,0,1,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,3,3,3,3,3,2,0,3,2,3,3,3,3,0,3,3,3,3,3,0,3,3,2,3,2,3,3,2,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,3,3,2,3,2,3,3,3,3,3,3,0,2,3,2,3,2,2,2,3,2,3,3,2,3,0,2,2,2,3,0,
-2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,3,0,0,0,3,3,3,2,3,3,0,0,3,0,3,0,0,0,3,2,0,3,0,3,0,0,2,0,2,0,
-0,0,0,0,2,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,3,3,3,3,0,3,3,3,3,3,3,0,3,3,0,3,0,0,0,3,3,0,3,3,3,0,0,1,2,3,0,
-3,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,3,3,3,3,3,2,0,0,3,2,2,3,3,0,3,3,3,3,3,2,1,3,0,3,2,3,3,2,1,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,3,3,0,2,3,3,3,3,3,3,0,0,3,0,3,0,0,0,3,3,0,3,2,3,0,0,3,3,3,0,
-3,0,0,0,2,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,3,3,3,3,0,3,3,3,3,3,3,0,0,3,0,3,0,0,0,3,2,0,3,2,3,0,0,3,2,3,0,
-2,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,3,1,2,2,3,3,3,3,3,3,0,2,3,0,3,0,0,0,3,3,0,3,0,2,0,0,2,3,1,0,
-2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,3,0,3,3,3,3,0,3,0,3,3,2,3,0,3,3,3,3,3,3,0,3,3,3,0,2,3,0,0,3,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,3,0,3,3,3,0,0,3,0,0,0,3,3,0,3,0,2,3,3,0,0,3,0,3,0,3,3,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,3,0,0,0,3,3,3,3,3,3,0,0,3,0,2,0,0,0,3,3,0,3,0,3,0,0,2,0,2,0,
-0,0,0,0,1,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,3,3,3,3,3,3,0,3,0,2,0,3,2,0,3,2,3,2,3,0,0,3,2,3,2,3,3,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,3,0,0,2,3,3,3,3,3,0,0,0,3,0,2,1,0,0,3,2,2,2,0,3,0,0,2,2,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,3,0,3,3,3,2,0,3,0,3,0,3,3,0,2,1,2,3,3,0,0,3,0,3,0,3,3,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,2,3,3,3,0,3,3,3,3,3,3,0,2,3,0,3,0,0,0,2,1,0,2,2,3,0,0,2,2,2,0,
-0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,3,0,0,2,3,3,3,2,3,0,0,1,3,0,2,0,0,0,0,3,0,1,0,2,0,0,1,1,1,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,3,3,3,3,3,1,0,3,0,0,0,3,2,0,3,2,3,3,3,0,0,3,0,3,2,2,2,1,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,3,0,3,3,3,0,0,3,0,0,0,0,2,0,2,3,3,2,2,2,2,3,0,2,0,2,2,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,3,3,3,3,2,0,0,0,0,0,0,2,3,0,2,0,2,3,2,0,0,3,0,3,0,3,1,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,3,2,3,3,2,2,3,0,2,0,3,0,0,0,2,0,0,0,0,1,2,0,2,0,2,0,
-0,2,0,2,0,2,2,0,0,1,0,2,2,2,0,2,2,2,0,2,2,2,0,0,2,0,0,1,0,0,0,0,
-0,2,0,3,3,2,0,0,0,0,0,0,1,3,0,2,0,2,2,2,0,0,2,0,3,0,0,2,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,3,0,2,3,2,0,2,2,0,2,0,2,2,0,2,0,2,2,2,0,0,0,0,0,0,2,3,0,0,0,2,
-0,1,2,0,0,0,0,2,2,0,0,0,2,1,0,2,2,0,0,0,0,0,0,1,0,2,0,0,0,0,0,0,
-0,0,2,1,0,2,3,2,2,3,2,3,2,0,0,3,3,3,0,0,3,2,0,0,0,1,1,0,2,0,2,2,
-0,2,0,2,0,2,2,0,0,2,0,2,2,2,0,2,2,2,2,0,0,2,0,0,0,2,0,1,0,0,0,0,
-0,3,0,3,3,2,2,0,3,0,0,0,2,2,0,2,2,2,1,2,0,0,1,2,2,0,0,3,0,0,0,2,
-0,1,2,0,0,0,1,2,0,0,0,0,0,0,0,2,2,0,1,0,0,2,0,0,0,2,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,2,3,3,2,2,0,0,0,2,0,2,3,3,0,2,0,0,0,0,0,0,2,2,2,0,2,2,0,2,0,2,
-0,2,2,0,0,2,2,2,2,1,0,0,2,2,0,2,0,0,2,0,0,0,0,0,0,2,0,0,0,0,0,0,
-0,2,0,3,2,3,0,0,0,3,0,0,2,2,0,2,0,2,2,2,0,0,2,0,0,0,0,0,0,0,0,2,
-0,0,2,2,0,0,2,2,2,0,0,0,0,0,0,2,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,2,0,0,3,2,0,2,2,2,2,2,0,0,0,2,0,0,0,0,2,0,1,0,0,2,0,1,0,0,0,
-0,2,2,2,0,2,2,0,1,2,0,2,2,2,0,2,2,2,2,1,2,2,0,0,2,0,0,0,0,0,0,0,
-0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,
-0,2,0,2,0,2,2,0,0,0,0,1,2,1,0,0,2,2,0,0,2,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,3,2,3,0,0,2,0,0,0,2,2,0,2,0,0,0,1,0,0,2,0,2,0,2,2,0,0,0,0,
-0,0,2,0,0,0,0,2,2,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,
-0,2,2,3,2,2,0,0,0,0,0,0,1,3,0,2,0,2,2,0,0,0,1,0,2,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,2,0,2,0,3,2,0,2,0,0,0,0,0,0,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,
-0,0,2,0,0,0,0,1,1,0,0,2,1,2,0,2,2,0,1,0,0,1,0,0,0,2,0,0,0,0,0,0,
-0,3,0,2,2,2,0,0,2,0,0,0,2,0,0,0,2,3,0,2,0,0,0,0,0,0,2,2,0,0,0,2,
-0,1,2,0,0,0,1,2,2,1,0,0,0,2,0,0,2,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,2,1,2,0,2,2,0,2,0,0,2,0,0,0,0,1,2,1,0,2,1,0,0,0,0,0,0,0,0,0,0,
-0,0,2,0,0,0,3,1,2,2,0,2,0,0,0,0,2,0,0,0,2,0,0,3,0,0,0,0,2,2,2,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,2,1,0,2,0,1,2,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,1,0,0,0,0,0,0,2,
-0,2,2,0,0,2,2,2,2,2,0,1,2,0,0,0,2,2,0,1,0,2,0,0,2,2,0,0,0,0,0,0,
-0,0,0,0,1,0,0,0,0,0,0,0,3,0,0,2,0,0,0,0,0,0,0,0,2,0,2,0,0,0,0,2,
-0,1,2,0,0,0,0,2,2,1,0,1,0,1,0,2,2,2,1,0,0,0,0,0,0,1,0,0,0,0,0,0,
-0,2,0,1,2,0,0,0,0,0,0,0,0,0,0,2,0,0,2,2,0,0,0,0,1,0,0,0,0,0,0,2,
-0,2,2,0,0,0,0,2,2,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,2,0,0,2,0,0,0,
-0,2,2,2,2,0,0,0,3,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,2,0,0,0,0,0,0,1,
-0,0,2,0,0,0,0,1,2,0,0,0,0,0,0,2,2,1,1,0,0,0,0,0,0,1,0,0,0,0,0,0,
-0,2,0,2,2,2,0,0,2,0,0,0,0,0,0,0,2,2,2,0,0,0,2,0,0,0,0,0,0,0,0,2,
-0,0,1,0,0,0,0,2,1,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,
-0,3,0,2,0,0,0,0,0,0,0,0,2,0,0,0,0,0,2,0,0,0,0,0,0,0,2,0,0,0,0,2,
-0,0,2,0,0,0,0,2,2,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,2,0,2,2,1,0,0,0,0,0,0,2,0,0,2,0,2,2,2,0,0,0,0,0,0,2,0,0,0,0,2,
-0,0,2,0,0,2,0,2,2,0,0,0,0,2,0,2,0,0,0,0,0,2,0,0,0,2,0,0,0,0,0,0,
-0,0,3,0,0,0,2,2,0,2,2,0,0,0,0,0,2,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,2,0,0,0,0,0,
-0,2,2,2,2,2,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,1,
-0,0,0,0,0,0,0,2,1,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,2,2,0,0,0,0,0,2,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,
-0,2,0,0,0,2,0,0,0,0,0,1,0,0,0,0,2,2,0,0,0,1,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,2,0,0,0,
-0,2,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,1,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,2,0,2,0,0,0,
-0,0,0,0,0,0,0,0,2,1,0,0,0,0,0,0,2,0,0,0,1,2,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-)
-
-Latin7GreekModel = {
- 'charToOrderMap': Latin7_CharToOrderMap,
- 'precedenceMatrix': GreekLangModel,
- 'mTypicalPositiveRatio': 0.982851,
- 'keepEnglishLetter': False,
- 'charsetName': "ISO-8859-7"
-}
-
-Win1253GreekModel = {
- 'charToOrderMap': win1253_CharToOrderMap,
- 'precedenceMatrix': GreekLangModel,
- 'mTypicalPositiveRatio': 0.982851,
- 'keepEnglishLetter': False,
- 'charsetName': "windows-1253"
-}
-
-# flake8: noqa
+######################## BEGIN LICENSE BLOCK ########################\r
+# The Original Code is Mozilla Communicator client code.\r
+#\r
+# The Initial Developer of the Original Code is\r
+# Netscape Communications Corporation.\r
+# Portions created by the Initial Developer are Copyright (C) 1998\r
+# the Initial Developer. All Rights Reserved.\r
+#\r
+# Contributor(s):\r
+# Mark Pilgrim - port to Python\r
+#\r
+# This library is free software; you can redistribute it and/or\r
+# modify it under the terms of the GNU Lesser General Public\r
+# License as published by the Free Software Foundation; either\r
+# version 2.1 of the License, or (at your option) any later version.\r
+#\r
+# This library is distributed in the hope that it will be useful,\r
+# but WITHOUT ANY WARRANTY; without even the implied warranty of\r
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU\r
+# Lesser General Public License for more details.\r
+#\r
+# You should have received a copy of the GNU Lesser General Public\r
+# License along with this library; if not, write to the Free Software\r
+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA\r
+# 02110-1301 USA\r
+######################### END LICENSE BLOCK #########################\r
+\r
+from . import constants\r
+\r
+# 255: Control characters that usually does not exist in any text\r
+# 254: Carriage/Return\r
+# 253: symbol (punctuation) that does not belong to word\r
+# 252: 0 - 9\r
+\r
+# Character Mapping Table:\r
+Latin7_CharToOrderMap = (\r
+255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00\r
+255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10\r
+253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20\r
+252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30\r
+253, 82,100,104, 94, 98,101,116,102,111,187,117, 92, 88,113, 85, # 40\r
+ 79,118,105, 83, 67,114,119, 95, 99,109,188,253,253,253,253,253, # 50\r
+253, 72, 70, 80, 81, 60, 96, 93, 89, 68,120, 97, 77, 86, 69, 55, # 60\r
+ 78,115, 65, 66, 58, 76,106,103, 87,107,112,253,253,253,253,253, # 70\r
+255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 80\r
+255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 90\r
+253,233, 90,253,253,253,253,253,253,253,253,253,253, 74,253,253, # a0\r
+253,253,253,253,247,248, 61, 36, 46, 71, 73,253, 54,253,108,123, # b0\r
+110, 31, 51, 43, 41, 34, 91, 40, 52, 47, 44, 53, 38, 49, 59, 39, # c0\r
+ 35, 48,250, 37, 33, 45, 56, 50, 84, 57,120,121, 17, 18, 22, 15, # d0\r
+124, 1, 29, 20, 21, 3, 32, 13, 25, 5, 11, 16, 10, 6, 30, 4, # e0\r
+ 9, 8, 14, 7, 2, 12, 28, 23, 42, 24, 64, 75, 19, 26, 27,253, # f0\r
+)\r
+\r
+win1253_CharToOrderMap = (\r
+255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00\r
+255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10\r
+253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20\r
+252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30\r
+253, 82,100,104, 94, 98,101,116,102,111,187,117, 92, 88,113, 85, # 40\r
+ 79,118,105, 83, 67,114,119, 95, 99,109,188,253,253,253,253,253, # 50\r
+253, 72, 70, 80, 81, 60, 96, 93, 89, 68,120, 97, 77, 86, 69, 55, # 60\r
+ 78,115, 65, 66, 58, 76,106,103, 87,107,112,253,253,253,253,253, # 70\r
+255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 80\r
+255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 90\r
+253,233, 61,253,253,253,253,253,253,253,253,253,253, 74,253,253, # a0\r
+253,253,253,253,247,253,253, 36, 46, 71, 73,253, 54,253,108,123, # b0\r
+110, 31, 51, 43, 41, 34, 91, 40, 52, 47, 44, 53, 38, 49, 59, 39, # c0\r
+ 35, 48,250, 37, 33, 45, 56, 50, 84, 57,120,121, 17, 18, 22, 15, # d0\r
+124, 1, 29, 20, 21, 3, 32, 13, 25, 5, 11, 16, 10, 6, 30, 4, # e0\r
+ 9, 8, 14, 7, 2, 12, 28, 23, 42, 24, 64, 75, 19, 26, 27,253, # f0\r
+)\r
+\r
+# Model Table:\r
+# total sequences: 100%\r
+# first 512 sequences: 98.2851%\r
+# first 1024 sequences:1.7001%\r
+# rest sequences: 0.0359%\r
+# negative sequences: 0.0148%\r
+GreekLangModel = (\r
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
+0,0,3,2,2,3,3,3,3,3,3,3,3,1,3,3,3,0,2,2,3,3,0,3,0,3,2,0,3,3,3,0,\r
+3,0,0,0,2,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
+0,3,3,3,3,3,0,3,3,0,3,2,3,3,0,3,2,3,3,3,0,0,3,0,3,0,3,3,2,0,0,0,\r
+2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,\r
+0,2,3,2,2,3,3,3,3,3,3,3,3,0,3,3,3,3,0,2,3,3,0,3,3,3,3,2,3,3,3,0,\r
+2,0,0,0,2,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
+0,2,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,0,2,1,3,3,3,3,2,3,3,2,3,3,2,0,\r
+0,0,0,0,2,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
+0,3,3,3,3,0,3,3,3,3,3,3,0,3,3,0,3,3,3,3,3,3,3,3,3,3,0,3,2,3,3,0,\r
+2,0,1,0,2,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,\r
+0,3,3,3,3,3,2,3,0,0,0,0,3,3,0,3,1,3,3,3,0,3,3,0,3,3,3,3,0,0,0,0,\r
+2,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
+0,3,3,3,3,3,0,3,0,3,3,3,3,3,0,3,2,2,2,3,0,2,3,3,3,3,3,2,3,3,0,0,\r
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
+0,3,3,3,3,3,3,2,2,2,3,3,3,3,0,3,1,3,3,3,3,2,3,3,3,3,3,3,3,2,2,0,\r
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
+0,3,3,3,3,3,2,0,3,0,0,0,3,3,2,3,3,3,3,3,0,0,3,2,3,0,2,3,0,0,0,0,\r
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
+0,3,0,3,3,3,3,0,0,3,3,0,2,3,0,3,0,3,3,3,0,0,3,0,3,0,2,2,3,3,0,0,\r
+0,0,1,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
+0,3,3,3,3,3,2,0,3,2,3,3,3,3,0,3,3,3,3,3,0,3,3,2,3,2,3,3,2,0,0,0,\r
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
+0,3,3,2,3,2,3,3,3,3,3,3,0,2,3,2,3,2,2,2,3,2,3,3,2,3,0,2,2,2,3,0,\r
+2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
+0,0,3,0,0,0,3,3,3,2,3,3,0,0,3,0,3,0,0,0,3,2,0,3,0,3,0,0,2,0,2,0,\r
+0,0,0,0,2,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
+0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
+0,3,3,3,3,0,3,3,3,3,3,3,0,3,3,0,3,0,0,0,3,3,0,3,3,3,0,0,1,2,3,0,\r
+3,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
+0,3,3,3,3,3,2,0,0,3,2,2,3,3,0,3,3,3,3,3,2,1,3,0,3,2,3,3,2,1,0,0,\r
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
+0,0,3,3,0,2,3,3,3,3,3,3,0,0,3,0,3,0,0,0,3,3,0,3,2,3,0,0,3,3,3,0,\r
+3,0,0,0,2,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
+0,3,3,3,3,0,3,3,3,3,3,3,0,0,3,0,3,0,0,0,3,2,0,3,2,3,0,0,3,2,3,0,\r
+2,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
+0,0,3,1,2,2,3,3,3,3,3,3,0,2,3,0,3,0,0,0,3,3,0,3,0,2,0,0,2,3,1,0,\r
+2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
+0,3,0,3,3,3,3,0,3,0,3,3,2,3,0,3,3,3,3,3,3,0,3,3,3,0,2,3,0,0,3,0,\r
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
+0,3,0,3,3,3,0,0,3,0,0,0,3,3,0,3,0,2,3,3,0,0,3,0,3,0,3,3,0,0,0,0,\r
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
+0,0,3,0,0,0,3,3,3,3,3,3,0,0,3,0,2,0,0,0,3,3,0,3,0,3,0,0,2,0,2,0,\r
+0,0,0,0,1,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
+0,3,3,3,3,3,3,0,3,0,2,0,3,2,0,3,2,3,2,3,0,0,3,2,3,2,3,3,0,0,0,0,\r
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
+0,0,3,0,0,2,3,3,3,3,3,0,0,0,3,0,2,1,0,0,3,2,2,2,0,3,0,0,2,2,0,0,\r
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
+0,3,0,3,3,3,2,0,3,0,3,0,3,3,0,2,1,2,3,3,0,0,3,0,3,0,3,3,0,0,0,0,\r
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
+0,2,3,3,3,0,3,3,3,3,3,3,0,2,3,0,3,0,0,0,2,1,0,2,2,3,0,0,2,2,2,0,\r
+0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
+0,0,3,0,0,2,3,3,3,2,3,0,0,1,3,0,2,0,0,0,0,3,0,1,0,2,0,0,1,1,1,0,\r
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
+0,3,3,3,3,3,1,0,3,0,0,0,3,2,0,3,2,3,3,3,0,0,3,0,3,2,2,2,1,0,0,0,\r
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
+0,3,0,3,3,3,0,0,3,0,0,0,0,2,0,2,3,3,2,2,2,2,3,0,2,0,2,2,0,0,0,0,\r
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
+0,3,3,3,3,2,0,0,0,0,0,0,2,3,0,2,0,2,3,2,0,0,3,0,3,0,3,1,0,0,0,0,\r
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
+0,0,0,0,0,0,3,2,3,3,2,2,3,0,2,0,3,0,0,0,2,0,0,0,0,1,2,0,2,0,2,0,\r
+0,2,0,2,0,2,2,0,0,1,0,2,2,2,0,2,2,2,0,2,2,2,0,0,2,0,0,1,0,0,0,0,\r
+0,2,0,3,3,2,0,0,0,0,0,0,1,3,0,2,0,2,2,2,0,0,2,0,3,0,0,2,0,0,0,0,\r
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
+0,3,0,2,3,2,0,2,2,0,2,0,2,2,0,2,0,2,2,2,0,0,0,0,0,0,2,3,0,0,0,2,\r
+0,1,2,0,0,0,0,2,2,0,0,0,2,1,0,2,2,0,0,0,0,0,0,1,0,2,0,0,0,0,0,0,\r
+0,0,2,1,0,2,3,2,2,3,2,3,2,0,0,3,3,3,0,0,3,2,0,0,0,1,1,0,2,0,2,2,\r
+0,2,0,2,0,2,2,0,0,2,0,2,2,2,0,2,2,2,2,0,0,2,0,0,0,2,0,1,0,0,0,0,\r
+0,3,0,3,3,2,2,0,3,0,0,0,2,2,0,2,2,2,1,2,0,0,1,2,2,0,0,3,0,0,0,2,\r
+0,1,2,0,0,0,1,2,0,0,0,0,0,0,0,2,2,0,1,0,0,2,0,0,0,2,0,0,0,0,0,0,\r
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
+0,2,3,3,2,2,0,0,0,2,0,2,3,3,0,2,0,0,0,0,0,0,2,2,2,0,2,2,0,2,0,2,\r
+0,2,2,0,0,2,2,2,2,1,0,0,2,2,0,2,0,0,2,0,0,0,0,0,0,2,0,0,0,0,0,0,\r
+0,2,0,3,2,3,0,0,0,3,0,0,2,2,0,2,0,2,2,2,0,0,2,0,0,0,0,0,0,0,0,2,\r
+0,0,2,2,0,0,2,2,2,0,0,0,0,0,0,2,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,\r
+0,0,2,0,0,3,2,0,2,2,2,2,2,0,0,0,2,0,0,0,0,2,0,1,0,0,2,0,1,0,0,0,\r
+0,2,2,2,0,2,2,0,1,2,0,2,2,2,0,2,2,2,2,1,2,2,0,0,2,0,0,0,0,0,0,0,\r
+0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,\r
+0,2,0,2,0,2,2,0,0,0,0,1,2,1,0,0,2,2,0,0,2,0,0,0,0,0,0,0,0,0,0,0,\r
+0,0,0,3,2,3,0,0,2,0,0,0,2,2,0,2,0,0,0,1,0,0,2,0,2,0,2,2,0,0,0,0,\r
+0,0,2,0,0,0,0,2,2,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,\r
+0,2,2,3,2,2,0,0,0,0,0,0,1,3,0,2,0,2,2,0,0,0,1,0,2,0,0,0,0,0,0,0,\r
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
+0,2,0,2,0,3,2,0,2,0,0,0,0,0,0,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,\r
+0,0,2,0,0,0,0,1,1,0,0,2,1,2,0,2,2,0,1,0,0,1,0,0,0,2,0,0,0,0,0,0,\r
+0,3,0,2,2,2,0,0,2,0,0,0,2,0,0,0,2,3,0,2,0,0,0,0,0,0,2,2,0,0,0,2,\r
+0,1,2,0,0,0,1,2,2,1,0,0,0,2,0,0,2,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,\r
+0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
+0,2,1,2,0,2,2,0,2,0,0,2,0,0,0,0,1,2,1,0,2,1,0,0,0,0,0,0,0,0,0,0,\r
+0,0,2,0,0,0,3,1,2,2,0,2,0,0,0,0,2,0,0,0,2,0,0,3,0,0,0,0,2,2,2,0,\r
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
+0,2,1,0,2,0,1,2,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,1,0,0,0,0,0,0,2,\r
+0,2,2,0,0,2,2,2,2,2,0,1,2,0,0,0,2,2,0,1,0,2,0,0,2,2,0,0,0,0,0,0,\r
+0,0,0,0,1,0,0,0,0,0,0,0,3,0,0,2,0,0,0,0,0,0,0,0,2,0,2,0,0,0,0,2,\r
+0,1,2,0,0,0,0,2,2,1,0,1,0,1,0,2,2,2,1,0,0,0,0,0,0,1,0,0,0,0,0,0,\r
+0,2,0,1,2,0,0,0,0,0,0,0,0,0,0,2,0,0,2,2,0,0,0,0,1,0,0,0,0,0,0,2,\r
+0,2,2,0,0,0,0,2,2,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,2,0,0,2,0,0,0,\r
+0,2,2,2,2,0,0,0,3,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,2,0,0,0,0,0,0,1,\r
+0,0,2,0,0,0,0,1,2,0,0,0,0,0,0,2,2,1,1,0,0,0,0,0,0,1,0,0,0,0,0,0,\r
+0,2,0,2,2,2,0,0,2,0,0,0,0,0,0,0,2,2,2,0,0,0,2,0,0,0,0,0,0,0,0,2,\r
+0,0,1,0,0,0,0,2,1,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,\r
+0,3,0,2,0,0,0,0,0,0,0,0,2,0,0,0,0,0,2,0,0,0,0,0,0,0,2,0,0,0,0,2,\r
+0,0,2,0,0,0,0,2,2,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
+0,2,0,2,2,1,0,0,0,0,0,0,2,0,0,2,0,2,2,2,0,0,0,0,0,0,2,0,0,0,0,2,\r
+0,0,2,0,0,2,0,2,2,0,0,0,0,2,0,2,0,0,0,0,0,2,0,0,0,2,0,0,0,0,0,0,\r
+0,0,3,0,0,0,2,2,0,2,2,0,0,0,0,0,2,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,\r
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
+0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
+0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,2,0,0,0,0,0,\r
+0,2,2,2,2,2,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,1,\r
+0,0,0,0,0,0,0,2,1,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
+0,0,0,0,0,0,0,2,2,0,0,0,0,0,2,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,\r
+0,2,0,0,0,2,0,0,0,0,0,1,0,0,0,0,2,2,0,0,0,1,0,0,0,0,0,0,0,0,0,0,\r
+0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,2,0,0,0,\r
+0,2,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
+0,0,1,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
+0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,2,0,2,0,0,0,\r
+0,0,0,0,0,0,0,0,2,1,0,0,0,0,0,0,2,0,0,0,1,2,0,0,0,0,0,0,0,0,0,0,\r
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
+)\r
+\r
+Latin7GreekModel = {\r
+ 'charToOrderMap': Latin7_CharToOrderMap,\r
+ 'precedenceMatrix': GreekLangModel,\r
+ 'mTypicalPositiveRatio': 0.982851,\r
+ 'keepEnglishLetter': False,\r
+ 'charsetName': "ISO-8859-7"\r
+}\r
+\r
+Win1253GreekModel = {\r
+ 'charToOrderMap': win1253_CharToOrderMap,\r
+ 'precedenceMatrix': GreekLangModel,\r
+ 'mTypicalPositiveRatio': 0.982851,\r
+ 'keepEnglishLetter': False,\r
+ 'charsetName': "windows-1253"\r
+}\r
+\r
+# flake8: noqa\r
-######################## BEGIN LICENSE BLOCK ########################
-# The Original Code is Mozilla Universal charset detector code.
-#
-# The Initial Developer of the Original Code is
-# Simon Montagu
-# Portions created by the Initial Developer are Copyright (C) 2005
-# the Initial Developer. All Rights Reserved.
-#
-# Contributor(s):
-# Mark Pilgrim - port to Python
-# Shy Shalom - original C code
-# Shoshannah Forbes - original C code (?)
-#
-# This library is free software; you can redistribute it and/or
-# modify it under the terms of the GNU Lesser General Public
-# License as published by the Free Software Foundation; either
-# version 2.1 of the License, or (at your option) any later version.
-#
-# This library is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-# Lesser General Public License for more details.
-#
-# You should have received a copy of the GNU Lesser General Public
-# License along with this library; if not, write to the Free Software
-# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
-# 02110-1301 USA
-######################### END LICENSE BLOCK #########################
-
-# 255: Control characters that usually does not exist in any text
-# 254: Carriage/Return
-# 253: symbol (punctuation) that does not belong to word
-# 252: 0 - 9
-
-# Windows-1255 language model
-# Character Mapping Table:
-win1255_CharToOrderMap = (
-255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00
-255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10
-253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20
-252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30
-253, 69, 91, 79, 80, 92, 89, 97, 90, 68,111,112, 82, 73, 95, 85, # 40
- 78,121, 86, 71, 67,102,107, 84,114,103,115,253,253,253,253,253, # 50
-253, 50, 74, 60, 61, 42, 76, 70, 64, 53,105, 93, 56, 65, 54, 49, # 60
- 66,110, 51, 43, 44, 63, 81, 77, 98, 75,108,253,253,253,253,253, # 70
-124,202,203,204,205, 40, 58,206,207,208,209,210,211,212,213,214,
-215, 83, 52, 47, 46, 72, 32, 94,216,113,217,109,218,219,220,221,
- 34,116,222,118,100,223,224,117,119,104,125,225,226, 87, 99,227,
-106,122,123,228, 55,229,230,101,231,232,120,233, 48, 39, 57,234,
- 30, 59, 41, 88, 33, 37, 36, 31, 29, 35,235, 62, 28,236,126,237,
-238, 38, 45,239,240,241,242,243,127,244,245,246,247,248,249,250,
- 9, 8, 20, 16, 3, 2, 24, 14, 22, 1, 25, 15, 4, 11, 6, 23,
- 12, 19, 13, 26, 18, 27, 21, 17, 7, 10, 5,251,252,128, 96,253,
-)
-
-# Model Table:
-# total sequences: 100%
-# first 512 sequences: 98.4004%
-# first 1024 sequences: 1.5981%
-# rest sequences: 0.087%
-# negative sequences: 0.0015%
-HebrewLangModel = (
-0,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,2,3,2,1,2,0,1,0,0,
-3,0,3,1,0,0,1,3,2,0,1,1,2,0,2,2,2,1,1,1,1,2,1,1,1,2,0,0,2,2,0,1,
-3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,2,2,
-1,2,1,2,1,2,0,0,2,0,0,0,0,0,1,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,
-3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,2,
-1,2,1,3,1,1,0,0,2,0,0,0,1,0,1,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,
-3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,1,0,1,2,2,1,3,
-1,2,1,1,2,2,0,0,2,2,0,0,0,0,1,0,1,0,0,0,1,0,0,0,0,0,0,1,0,1,1,0,
-3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,2,2,2,2,3,2,
-1,2,1,2,2,2,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,
-3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,2,3,2,2,3,2,2,2,1,2,2,2,2,
-1,2,1,1,2,2,0,1,2,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,
-3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,0,2,2,2,2,2,
-0,2,0,2,2,2,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,
-3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,0,2,2,2,
-0,2,1,2,2,2,0,0,2,1,0,0,0,0,1,0,1,0,0,0,0,0,0,2,0,0,0,0,0,0,1,0,
-3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,2,1,2,3,2,2,2,
-1,2,1,2,2,2,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,1,0,
-3,3,3,3,3,3,3,3,3,2,3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,1,0,2,0,2,
-0,2,1,2,2,2,0,0,1,2,0,0,0,0,1,0,1,0,0,0,0,0,0,1,0,0,0,2,0,0,1,0,
-3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,2,3,2,2,3,2,1,2,1,1,1,
-0,1,1,1,1,1,3,0,1,0,0,0,0,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,
-3,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,0,1,1,0,0,1,0,0,1,0,0,0,0,
-0,0,1,0,0,0,0,0,2,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,2,2,2,2,2,
-0,2,0,1,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,
-3,3,3,3,3,3,3,3,3,2,3,3,3,2,1,2,3,3,2,3,3,3,3,2,3,2,1,2,0,2,1,2,
-0,2,0,2,2,2,0,0,1,2,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,
-3,3,3,3,3,3,3,3,3,2,3,3,3,1,2,2,3,3,2,3,2,3,2,2,3,1,2,2,0,2,2,2,
-0,2,1,2,2,2,0,0,1,2,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,0,0,1,0,0,1,0,
-3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,2,3,3,2,2,2,3,3,3,3,1,3,2,2,2,
-0,2,0,1,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,
-3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,3,3,3,2,3,2,2,2,1,2,2,0,2,2,2,2,
-0,2,0,2,2,2,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,
-3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,1,3,2,3,3,2,3,3,2,2,1,2,2,2,2,2,2,
-0,2,1,2,1,2,0,0,1,0,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,1,0,
-3,3,3,3,3,3,2,3,2,3,3,2,3,3,3,3,2,3,2,3,3,3,3,3,2,2,2,2,2,2,2,1,
-0,2,0,1,2,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,
-3,3,3,3,3,3,3,3,3,2,1,2,3,3,3,3,3,3,3,2,3,2,3,2,1,2,3,0,2,1,2,2,
-0,2,1,1,2,1,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,2,0,
-3,3,3,3,3,3,3,3,3,2,3,3,3,3,2,1,3,1,2,2,2,1,2,3,3,1,2,1,2,2,2,2,
-0,1,1,1,1,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,2,0,0,0,0,0,0,0,0,
-3,3,3,3,3,3,3,3,3,3,0,2,3,3,3,1,3,3,3,1,2,2,2,2,1,1,2,2,2,2,2,2,
-0,2,0,1,1,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,
-3,3,3,3,3,3,2,3,3,3,2,2,3,3,3,2,1,2,3,2,3,2,2,2,2,1,2,1,1,1,2,2,
-0,2,1,1,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,
-3,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,0,0,0,1,0,0,0,0,0,
-1,0,1,0,0,0,0,0,2,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-3,3,3,3,3,2,3,3,2,3,1,2,2,2,2,3,2,3,1,1,2,2,1,2,2,1,1,0,2,2,2,2,
-0,1,0,1,2,2,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,
-3,0,0,1,1,0,1,0,0,1,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,2,2,0,
-0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-3,0,1,0,1,0,1,1,0,1,1,0,0,0,1,1,0,1,1,1,0,0,0,0,0,0,1,0,0,0,0,0,
-0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-3,0,0,0,1,1,0,1,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,
-3,2,2,1,2,2,2,2,2,2,2,1,2,2,1,2,2,1,1,1,1,1,1,1,1,2,1,1,0,3,3,3,
-0,3,0,2,2,2,2,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,
-2,2,2,3,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,1,2,2,1,2,2,2,1,1,1,2,0,1,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-2,2,2,2,2,2,2,2,2,2,2,1,2,2,2,2,2,2,2,2,2,2,2,0,2,2,0,0,0,0,0,0,
-0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-2,3,1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,1,2,1,0,2,1,0,
-0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-3,1,1,1,1,1,1,1,1,1,1,0,0,1,1,1,1,0,1,1,1,1,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,
-0,3,1,1,2,2,2,2,2,1,2,2,2,1,1,2,2,2,2,2,2,2,1,2,2,1,0,1,1,1,1,0,
-0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-3,2,1,1,1,1,2,1,1,2,1,0,1,1,1,1,1,1,1,1,1,1,1,0,1,0,0,0,0,0,0,0,
-0,0,2,0,0,0,0,0,0,0,0,1,1,0,0,0,0,1,1,0,0,1,1,0,0,0,0,0,0,1,0,0,
-2,1,1,2,2,2,2,2,2,2,2,2,2,2,1,2,2,2,2,2,1,2,1,2,1,1,1,1,0,0,0,0,
-0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-1,2,1,2,2,2,2,2,2,2,2,2,2,1,2,1,2,1,1,2,1,1,1,2,1,2,1,2,0,1,0,1,
-0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,3,1,2,2,2,1,2,2,2,2,2,2,2,2,1,2,1,1,1,1,1,1,2,1,2,1,1,0,1,0,1,
-0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-2,1,2,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,
-0,2,0,1,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,
-3,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-2,1,1,1,1,1,1,1,0,1,1,0,1,0,0,1,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,2,0,1,1,1,0,1,0,0,0,1,1,0,1,1,0,0,0,0,0,1,1,0,0,
-0,1,1,1,2,1,2,2,2,0,2,0,2,0,1,1,2,1,1,1,1,2,1,0,1,1,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,
-1,0,1,0,0,0,0,0,1,0,1,2,2,0,1,0,0,1,1,2,2,1,2,0,2,0,0,0,1,2,0,1,
-2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,2,0,2,1,2,0,2,0,0,1,1,1,1,1,1,0,1,0,0,0,1,0,0,1,
-2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,1,0,0,0,0,0,1,0,2,1,1,0,1,0,0,1,1,1,2,2,0,0,1,0,0,0,1,0,0,1,
-1,1,2,1,0,1,1,1,0,1,0,1,1,1,1,0,0,0,1,0,1,0,0,0,0,0,0,0,0,2,2,1,
-0,2,0,1,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-2,1,0,0,1,0,1,1,1,1,0,0,0,0,0,1,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-1,1,1,1,1,1,1,1,1,2,1,0,1,1,1,1,1,1,1,1,1,1,1,0,1,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,1,1,1,0,0,0,0,1,1,1,0,1,1,0,1,0,0,0,1,1,0,1,
-2,0,1,0,1,0,1,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,1,0,1,1,1,0,1,0,0,1,1,2,1,1,2,0,1,0,0,0,1,1,0,1,
-1,0,0,1,0,0,1,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,1,0,1,1,2,0,1,0,0,0,0,2,1,1,2,0,2,0,0,0,1,1,0,1,
-1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,1,0,2,1,1,0,1,0,0,2,2,1,2,1,1,0,1,0,0,0,1,1,0,1,
-2,0,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,1,2,2,0,0,0,0,0,1,1,0,1,0,0,1,0,0,0,0,1,0,1,
-1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,1,2,2,0,0,0,0,2,1,1,1,0,2,1,1,0,0,0,2,1,0,1,
-1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,1,0,1,1,2,0,1,0,0,1,1,0,2,1,1,0,1,0,0,0,1,1,0,1,
-2,2,1,1,1,0,1,1,0,1,1,0,1,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,1,0,2,1,1,0,1,0,0,1,1,0,1,2,1,0,2,0,0,0,1,1,0,1,
-2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,
-0,1,0,0,2,0,2,1,1,0,1,0,1,0,0,1,0,0,0,0,1,0,0,0,1,0,0,0,0,0,1,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,1,0,1,1,2,0,1,0,0,1,1,1,0,1,0,0,1,0,0,0,1,0,0,1,
-1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-1,0,0,0,0,0,0,0,1,0,1,1,0,0,1,0,0,2,1,1,1,1,1,0,1,0,0,0,0,1,0,1,
-0,1,1,1,2,1,1,1,1,0,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,1,2,1,0,0,0,0,0,1,1,1,1,1,0,1,0,0,0,1,1,0,0,
-)
-
-Win1255HebrewModel = {
- 'charToOrderMap': win1255_CharToOrderMap,
- 'precedenceMatrix': HebrewLangModel,
- 'mTypicalPositiveRatio': 0.984004,
- 'keepEnglishLetter': False,
- 'charsetName': "windows-1255"
-}
-
-# flake8: noqa
+######################## BEGIN LICENSE BLOCK ########################\r
+# The Original Code is Mozilla Universal charset detector code.\r
+#\r
+# The Initial Developer of the Original Code is\r
+# Simon Montagu\r
+# Portions created by the Initial Developer are Copyright (C) 2005\r
+# the Initial Developer. All Rights Reserved.\r
+#\r
+# Contributor(s):\r
+# Mark Pilgrim - port to Python\r
+# Shy Shalom - original C code\r
+# Shoshannah Forbes - original C code (?)\r
+#\r
+# This library is free software; you can redistribute it and/or\r
+# modify it under the terms of the GNU Lesser General Public\r
+# License as published by the Free Software Foundation; either\r
+# version 2.1 of the License, or (at your option) any later version.\r
+#\r
+# This library is distributed in the hope that it will be useful,\r
+# but WITHOUT ANY WARRANTY; without even the implied warranty of\r
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU\r
+# Lesser General Public License for more details.\r
+#\r
+# You should have received a copy of the GNU Lesser General Public\r
+# License along with this library; if not, write to the Free Software\r
+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA\r
+# 02110-1301 USA\r
+######################### END LICENSE BLOCK #########################\r
+\r
+from . import constants\r
+\r
+# 255: Control characters that usually does not exist in any text\r
+# 254: Carriage/Return\r
+# 253: symbol (punctuation) that does not belong to word\r
+# 252: 0 - 9\r
+\r
+# Windows-1255 language model\r
+# Character Mapping Table:\r
+win1255_CharToOrderMap = (\r
+255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00\r
+255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10\r
+253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20\r
+252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30\r
+253, 69, 91, 79, 80, 92, 89, 97, 90, 68,111,112, 82, 73, 95, 85, # 40\r
+ 78,121, 86, 71, 67,102,107, 84,114,103,115,253,253,253,253,253, # 50\r
+253, 50, 74, 60, 61, 42, 76, 70, 64, 53,105, 93, 56, 65, 54, 49, # 60\r
+ 66,110, 51, 43, 44, 63, 81, 77, 98, 75,108,253,253,253,253,253, # 70\r
+124,202,203,204,205, 40, 58,206,207,208,209,210,211,212,213,214,\r
+215, 83, 52, 47, 46, 72, 32, 94,216,113,217,109,218,219,220,221,\r
+ 34,116,222,118,100,223,224,117,119,104,125,225,226, 87, 99,227,\r
+106,122,123,228, 55,229,230,101,231,232,120,233, 48, 39, 57,234,\r
+ 30, 59, 41, 88, 33, 37, 36, 31, 29, 35,235, 62, 28,236,126,237,\r
+238, 38, 45,239,240,241,242,243,127,244,245,246,247,248,249,250,\r
+ 9, 8, 20, 16, 3, 2, 24, 14, 22, 1, 25, 15, 4, 11, 6, 23,\r
+ 12, 19, 13, 26, 18, 27, 21, 17, 7, 10, 5,251,252,128, 96,253,\r
+)\r
+\r
+# Model Table:\r
+# total sequences: 100%\r
+# first 512 sequences: 98.4004%\r
+# first 1024 sequences: 1.5981%\r
+# rest sequences: 0.087%\r
+# negative sequences: 0.0015%\r
+HebrewLangModel = (\r
+0,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,2,3,2,1,2,0,1,0,0,\r
+3,0,3,1,0,0,1,3,2,0,1,1,2,0,2,2,2,1,1,1,1,2,1,1,1,2,0,0,2,2,0,1,\r
+3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,2,2,\r
+1,2,1,2,1,2,0,0,2,0,0,0,0,0,1,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,\r
+3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,2,\r
+1,2,1,3,1,1,0,0,2,0,0,0,1,0,1,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,\r
+3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,1,0,1,2,2,1,3,\r
+1,2,1,1,2,2,0,0,2,2,0,0,0,0,1,0,1,0,0,0,1,0,0,0,0,0,0,1,0,1,1,0,\r
+3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,2,2,2,2,3,2,\r
+1,2,1,2,2,2,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,\r
+3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,2,3,2,2,3,2,2,2,1,2,2,2,2,\r
+1,2,1,1,2,2,0,1,2,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,\r
+3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,0,2,2,2,2,2,\r
+0,2,0,2,2,2,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,\r
+3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,0,2,2,2,\r
+0,2,1,2,2,2,0,0,2,1,0,0,0,0,1,0,1,0,0,0,0,0,0,2,0,0,0,0,0,0,1,0,\r
+3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,2,1,2,3,2,2,2,\r
+1,2,1,2,2,2,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,1,0,\r
+3,3,3,3,3,3,3,3,3,2,3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,1,0,2,0,2,\r
+0,2,1,2,2,2,0,0,1,2,0,0,0,0,1,0,1,0,0,0,0,0,0,1,0,0,0,2,0,0,1,0,\r
+3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,2,3,2,2,3,2,1,2,1,1,1,\r
+0,1,1,1,1,1,3,0,1,0,0,0,0,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,\r
+3,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,0,1,1,0,0,1,0,0,1,0,0,0,0,\r
+0,0,1,0,0,0,0,0,2,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
+3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,2,2,2,2,2,\r
+0,2,0,1,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,\r
+3,3,3,3,3,3,3,3,3,2,3,3,3,2,1,2,3,3,2,3,3,3,3,2,3,2,1,2,0,2,1,2,\r
+0,2,0,2,2,2,0,0,1,2,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,\r
+3,3,3,3,3,3,3,3,3,2,3,3,3,1,2,2,3,3,2,3,2,3,2,2,3,1,2,2,0,2,2,2,\r
+0,2,1,2,2,2,0,0,1,2,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,0,0,1,0,0,1,0,\r
+3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,2,3,3,2,2,2,3,3,3,3,1,3,2,2,2,\r
+0,2,0,1,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,\r
+3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,3,3,3,2,3,2,2,2,1,2,2,0,2,2,2,2,\r
+0,2,0,2,2,2,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,\r
+3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,1,3,2,3,3,2,3,3,2,2,1,2,2,2,2,2,2,\r
+0,2,1,2,1,2,0,0,1,0,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,1,0,\r
+3,3,3,3,3,3,2,3,2,3,3,2,3,3,3,3,2,3,2,3,3,3,3,3,2,2,2,2,2,2,2,1,\r
+0,2,0,1,2,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,\r
+3,3,3,3,3,3,3,3,3,2,1,2,3,3,3,3,3,3,3,2,3,2,3,2,1,2,3,0,2,1,2,2,\r
+0,2,1,1,2,1,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,2,0,\r
+3,3,3,3,3,3,3,3,3,2,3,3,3,3,2,1,3,1,2,2,2,1,2,3,3,1,2,1,2,2,2,2,\r
+0,1,1,1,1,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,2,0,0,0,0,0,0,0,0,\r
+3,3,3,3,3,3,3,3,3,3,0,2,3,3,3,1,3,3,3,1,2,2,2,2,1,1,2,2,2,2,2,2,\r
+0,2,0,1,1,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,\r
+3,3,3,3,3,3,2,3,3,3,2,2,3,3,3,2,1,2,3,2,3,2,2,2,2,1,2,1,1,1,2,2,\r
+0,2,1,1,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,\r
+3,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,0,0,0,1,0,0,0,0,0,\r
+1,0,1,0,0,0,0,0,2,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
+3,3,3,3,3,2,3,3,2,3,1,2,2,2,2,3,2,3,1,1,2,2,1,2,2,1,1,0,2,2,2,2,\r
+0,1,0,1,2,2,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,\r
+3,0,0,1,1,0,1,0,0,1,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,2,2,0,\r
+0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
+3,0,1,0,1,0,1,1,0,1,1,0,0,0,1,1,0,1,1,1,0,0,0,0,0,0,1,0,0,0,0,0,\r
+0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
+3,0,0,0,1,1,0,1,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,\r
+0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,\r
+3,2,2,1,2,2,2,2,2,2,2,1,2,2,1,2,2,1,1,1,1,1,1,1,1,2,1,1,0,3,3,3,\r
+0,3,0,2,2,2,2,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,\r
+2,2,2,3,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,1,2,2,1,2,2,2,1,1,1,2,0,1,\r
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
+2,2,2,2,2,2,2,2,2,2,2,1,2,2,2,2,2,2,2,2,2,2,2,0,2,2,0,0,0,0,0,0,\r
+0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
+2,3,1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,1,2,1,0,2,1,0,\r
+0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
+3,1,1,1,1,1,1,1,1,1,1,0,0,1,1,1,1,0,1,1,1,1,0,0,0,0,0,0,0,0,0,0,\r
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,\r
+0,3,1,1,2,2,2,2,2,1,2,2,2,1,1,2,2,2,2,2,2,2,1,2,2,1,0,1,1,1,1,0,\r
+0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
+3,2,1,1,1,1,2,1,1,2,1,0,1,1,1,1,1,1,1,1,1,1,1,0,1,0,0,0,0,0,0,0,\r
+0,0,2,0,0,0,0,0,0,0,0,1,1,0,0,0,0,1,1,0,0,1,1,0,0,0,0,0,0,1,0,0,\r
+2,1,1,2,2,2,2,2,2,2,2,2,2,2,1,2,2,2,2,2,1,2,1,2,1,1,1,1,0,0,0,0,\r
+0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
+1,2,1,2,2,2,2,2,2,2,2,2,2,1,2,1,2,1,1,2,1,1,1,2,1,2,1,2,0,1,0,1,\r
+0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
+0,3,1,2,2,2,1,2,2,2,2,2,2,2,2,1,2,1,1,1,1,1,1,2,1,2,1,1,0,1,0,1,\r
+0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
+2,1,2,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,\r
+0,2,0,1,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,\r
+3,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,\r
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
+2,1,1,1,1,1,1,1,0,1,1,0,1,0,0,1,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,\r
+0,0,0,0,0,0,0,0,2,0,1,1,1,0,1,0,0,0,1,1,0,1,1,0,0,0,0,0,1,1,0,0,\r
+0,1,1,1,2,1,2,2,2,0,2,0,2,0,1,1,2,1,1,1,1,2,1,0,1,1,0,0,0,0,0,0,\r
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
+2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
+1,0,1,0,0,0,0,0,1,0,1,2,2,0,1,0,0,1,1,2,2,1,2,0,2,0,0,0,1,2,0,1,\r
+2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
+0,0,0,0,0,0,0,0,2,0,2,1,2,0,2,0,0,1,1,1,1,1,1,0,1,0,0,0,1,0,0,1,\r
+2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
+0,0,1,0,0,0,0,0,1,0,2,1,1,0,1,0,0,1,1,1,2,2,0,0,1,0,0,0,1,0,0,1,\r
+1,1,2,1,0,1,1,1,0,1,0,1,1,1,1,0,0,0,1,0,1,0,0,0,0,0,0,0,0,2,2,1,\r
+0,2,0,1,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
+2,1,0,0,1,0,1,1,1,1,0,0,0,0,0,1,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,\r
+0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
+1,1,1,1,1,1,1,1,1,2,1,0,1,1,1,1,1,1,1,1,1,1,1,0,1,0,0,0,0,0,0,0,\r
+0,0,0,0,0,0,0,0,0,0,1,1,1,0,0,0,0,1,1,1,0,1,1,0,1,0,0,0,1,1,0,1,\r
+2,0,1,0,1,0,1,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
+1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
+0,0,0,0,0,0,0,0,1,0,1,1,1,0,1,0,0,1,1,2,1,1,2,0,1,0,0,0,1,1,0,1,\r
+1,0,0,1,0,0,1,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
+0,0,0,0,0,0,0,0,1,0,1,1,2,0,1,0,0,0,0,2,1,1,2,0,2,0,0,0,1,1,0,1,\r
+1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
+0,0,0,0,0,0,0,0,1,0,2,1,1,0,1,0,0,2,2,1,2,1,1,0,1,0,0,0,1,1,0,1,\r
+2,0,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
+0,0,0,0,0,0,0,0,0,0,1,2,2,0,0,0,0,0,1,1,0,1,0,0,1,0,0,0,0,1,0,1,\r
+1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
+0,0,0,0,0,0,0,0,0,0,1,2,2,0,0,0,0,2,1,1,1,0,2,1,1,0,0,0,2,1,0,1,\r
+1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
+0,0,0,0,0,0,0,0,1,0,1,1,2,0,1,0,0,1,1,0,2,1,1,0,1,0,0,0,1,1,0,1,\r
+2,2,1,1,1,0,1,1,0,1,1,0,1,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,\r
+0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
+2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
+0,0,0,0,0,0,0,0,1,0,2,1,1,0,1,0,0,1,1,0,1,2,1,0,2,0,0,0,1,1,0,1,\r
+2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,\r
+0,1,0,0,2,0,2,1,1,0,1,0,1,0,0,1,0,0,0,0,1,0,0,0,1,0,0,0,0,0,1,0,\r
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
+1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
+0,0,0,0,0,0,0,0,1,0,1,1,2,0,1,0,0,1,1,1,0,1,0,0,1,0,0,0,1,0,0,1,\r
+1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
+1,0,0,0,0,0,0,0,1,0,1,1,0,0,1,0,0,2,1,1,1,1,1,0,1,0,0,0,0,1,0,1,\r
+0,1,1,1,2,1,1,1,1,0,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,0,0,0,0,0,0,0,\r
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
+1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
+0,0,0,0,0,0,0,0,0,0,1,2,1,0,0,0,0,0,1,1,1,1,1,0,1,0,0,0,1,1,0,0,\r
+)\r
+\r
+Win1255HebrewModel = {\r
+ 'charToOrderMap': win1255_CharToOrderMap,\r
+ 'precedenceMatrix': HebrewLangModel,\r
+ 'mTypicalPositiveRatio': 0.984004,\r
+ 'keepEnglishLetter': False,\r
+ 'charsetName': "windows-1255"\r
+}\r
+\r
+# flake8: noqa\r
-######################## BEGIN LICENSE BLOCK ########################
-# The Original Code is Mozilla Communicator client code.
-#
-# The Initial Developer of the Original Code is
-# Netscape Communications Corporation.
-# Portions created by the Initial Developer are Copyright (C) 1998
-# the Initial Developer. All Rights Reserved.
-#
-# Contributor(s):
-# Mark Pilgrim - port to Python
-#
-# This library is free software; you can redistribute it and/or
-# modify it under the terms of the GNU Lesser General Public
-# License as published by the Free Software Foundation; either
-# version 2.1 of the License, or (at your option) any later version.
-#
-# This library is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-# Lesser General Public License for more details.
-#
-# You should have received a copy of the GNU Lesser General Public
-# License along with this library; if not, write to the Free Software
-# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
-# 02110-1301 USA
-######################### END LICENSE BLOCK #########################
-
-# 255: Control characters that usually does not exist in any text
-# 254: Carriage/Return
-# 253: symbol (punctuation) that does not belong to word
-# 252: 0 - 9
-
-# Character Mapping Table:
-Latin2_HungarianCharToOrderMap = (
-255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00
-255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10
-253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20
-252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30
-253, 28, 40, 54, 45, 32, 50, 49, 38, 39, 53, 36, 41, 34, 35, 47,
- 46, 71, 43, 33, 37, 57, 48, 64, 68, 55, 52,253,253,253,253,253,
-253, 2, 18, 26, 17, 1, 27, 12, 20, 9, 22, 7, 6, 13, 4, 8,
- 23, 67, 10, 5, 3, 21, 19, 65, 62, 16, 11,253,253,253,253,253,
-159,160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,
-175,176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,
-191,192,193,194,195,196,197, 75,198,199,200,201,202,203,204,205,
- 79,206,207,208,209,210,211,212,213,214,215,216,217,218,219,220,
-221, 51, 81,222, 78,223,224,225,226, 44,227,228,229, 61,230,231,
-232,233,234, 58,235, 66, 59,236,237,238, 60, 69, 63,239,240,241,
- 82, 14, 74,242, 70, 80,243, 72,244, 15, 83, 77, 84, 30, 76, 85,
-245,246,247, 25, 73, 42, 24,248,249,250, 31, 56, 29,251,252,253,
-)
-
-win1250HungarianCharToOrderMap = (
-255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00
-255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10
-253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20
-252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30
-253, 28, 40, 54, 45, 32, 50, 49, 38, 39, 53, 36, 41, 34, 35, 47,
- 46, 72, 43, 33, 37, 57, 48, 64, 68, 55, 52,253,253,253,253,253,
-253, 2, 18, 26, 17, 1, 27, 12, 20, 9, 22, 7, 6, 13, 4, 8,
- 23, 67, 10, 5, 3, 21, 19, 65, 62, 16, 11,253,253,253,253,253,
-161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,176,
-177,178,179,180, 78,181, 69,182,183,184,185,186,187,188,189,190,
-191,192,193,194,195,196,197, 76,198,199,200,201,202,203,204,205,
- 81,206,207,208,209,210,211,212,213,214,215,216,217,218,219,220,
-221, 51, 83,222, 80,223,224,225,226, 44,227,228,229, 61,230,231,
-232,233,234, 58,235, 66, 59,236,237,238, 60, 70, 63,239,240,241,
- 84, 14, 75,242, 71, 82,243, 73,244, 15, 85, 79, 86, 30, 77, 87,
-245,246,247, 25, 74, 42, 24,248,249,250, 31, 56, 29,251,252,253,
-)
-
-# Model Table:
-# total sequences: 100%
-# first 512 sequences: 94.7368%
-# first 1024 sequences:5.2623%
-# rest sequences: 0.8894%
-# negative sequences: 0.0009%
-HungarianLangModel = (
-0,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,1,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
-3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,2,2,3,3,1,1,2,2,2,2,2,1,2,
-3,2,2,3,3,3,3,3,2,3,3,3,3,3,3,1,2,3,3,3,3,2,3,3,1,1,3,3,0,1,1,1,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,
-3,2,1,3,3,3,3,3,2,3,3,3,3,3,1,1,2,3,3,3,3,3,3,3,1,1,3,2,0,1,1,1,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,
-3,3,3,3,3,3,3,3,3,3,3,1,1,2,3,3,3,1,3,3,3,3,3,1,3,3,2,2,0,3,2,3,
-0,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,
-3,3,3,3,3,3,2,3,3,3,2,3,3,2,3,3,3,3,3,2,3,3,2,2,3,2,3,2,0,3,2,2,
-0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,
-3,3,3,3,3,3,2,3,3,3,3,3,2,3,3,3,1,2,3,2,2,3,1,2,3,3,2,2,0,3,3,3,
-0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,
-3,3,3,3,3,3,3,3,3,3,2,2,3,3,3,3,3,3,2,3,3,3,3,2,3,3,3,3,0,2,3,2,
-0,0,0,1,1,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,
-3,3,3,3,3,3,3,3,3,3,3,1,1,1,3,3,2,1,3,2,2,3,2,1,3,2,2,1,0,3,3,1,
-0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,
-3,2,2,3,3,3,3,3,1,2,3,3,3,3,1,2,1,3,3,3,3,2,2,3,1,1,3,2,0,1,1,1,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,
-3,3,3,3,3,3,3,3,2,2,3,3,3,3,3,2,1,3,3,3,3,3,2,2,1,3,3,3,0,1,1,2,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,
-3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,2,3,3,2,3,3,3,2,0,3,2,3,
-0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,1,0,
-3,3,3,3,3,3,2,3,3,3,2,3,2,3,3,3,1,3,2,2,2,3,1,1,3,3,1,1,0,3,3,2,
-0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,
-3,3,3,3,3,3,3,2,3,3,3,2,3,2,3,3,3,2,3,3,3,3,3,1,2,3,2,2,0,2,2,2,
-0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,
-3,3,3,2,2,2,3,1,3,3,2,2,1,3,3,3,1,1,3,1,2,3,2,3,2,2,2,1,0,2,2,2,
-0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,
-3,1,1,3,3,3,3,3,1,2,3,3,3,3,1,2,1,3,3,3,2,2,3,2,1,0,3,2,0,1,1,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-3,1,1,3,3,3,3,3,1,2,3,3,3,3,1,1,0,3,3,3,3,0,2,3,0,0,2,1,0,1,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-3,3,3,3,3,3,2,2,3,3,2,2,2,2,3,3,0,1,2,3,2,3,2,2,3,2,1,2,0,2,2,2,
-0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,
-3,3,3,3,3,3,1,2,3,3,3,2,1,2,3,3,2,2,2,3,2,3,3,1,3,3,1,1,0,2,3,2,
-0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,
-3,3,3,1,2,2,2,2,3,3,3,1,1,1,3,3,1,1,3,1,1,3,2,1,2,3,1,1,0,2,2,2,
-0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,
-3,3,3,2,1,2,1,1,3,3,1,1,1,1,3,3,1,1,2,2,1,2,1,1,2,2,1,1,0,2,2,1,
-0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,
-3,3,3,1,1,2,1,1,3,3,1,0,1,1,3,3,2,0,1,1,2,3,1,0,2,2,1,0,0,1,3,2,
-0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,
-3,2,1,3,3,3,3,3,1,2,3,2,3,3,2,1,1,3,2,3,2,1,2,2,0,1,2,1,0,0,1,1,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,
-3,3,3,3,2,2,2,2,3,1,2,2,1,1,3,3,0,3,2,1,2,3,2,1,3,3,1,1,0,2,1,3,
-0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,
-3,3,3,2,2,2,3,2,3,3,3,2,1,1,3,3,1,1,1,2,2,3,2,3,2,2,2,1,0,2,2,1,
-0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,
-1,0,0,3,3,3,3,3,0,0,3,3,2,3,0,0,0,2,3,3,1,0,1,2,0,0,1,1,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-3,1,2,3,3,3,3,3,1,2,3,3,2,2,1,1,0,3,3,2,2,1,2,2,1,0,2,2,0,1,1,1,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-3,3,2,2,1,3,1,2,3,3,2,2,1,1,2,2,1,1,1,1,3,2,1,1,1,1,2,1,0,1,2,1,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,
-2,3,3,1,1,1,1,1,3,3,3,0,1,1,3,3,1,1,1,1,1,2,2,0,3,1,1,2,0,2,1,1,
-0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,
-3,1,0,1,2,1,2,2,0,1,2,3,1,2,0,0,0,2,1,1,1,1,1,2,0,0,1,1,0,0,0,0,
-1,2,1,2,2,2,1,2,1,2,0,2,0,2,2,1,1,2,1,1,2,1,1,1,0,1,0,0,0,1,1,0,
-1,1,1,2,3,2,3,3,0,1,2,2,3,1,0,1,0,2,1,2,2,0,1,1,0,0,1,1,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-1,0,0,3,3,2,2,1,0,0,3,2,3,2,0,0,0,1,1,3,0,0,1,1,0,0,2,1,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-3,1,1,2,2,3,3,1,0,1,3,2,3,1,1,1,0,1,1,1,1,1,3,1,0,0,2,2,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-3,1,1,1,2,2,2,1,0,1,2,3,3,2,0,0,0,2,1,1,1,2,1,1,1,0,1,1,1,0,0,0,
-1,2,2,2,2,2,1,1,1,2,0,2,1,1,1,1,1,2,1,1,1,1,1,1,0,1,1,1,0,0,1,1,
-3,2,2,1,0,0,1,1,2,2,0,3,0,1,2,1,1,0,0,1,1,1,0,1,1,1,1,0,2,1,1,1,
-2,2,1,1,1,2,1,2,1,1,1,1,1,1,1,2,1,1,1,2,3,1,1,1,1,1,1,1,1,1,0,1,
-2,3,3,0,1,0,0,0,3,3,1,0,0,1,2,2,1,0,0,0,0,2,0,0,1,1,1,0,2,1,1,1,
-2,1,1,1,1,1,1,2,1,1,0,1,1,0,1,1,1,0,1,2,1,1,0,1,1,1,1,1,1,1,0,1,
-2,3,3,0,1,0,0,0,2,2,0,0,0,0,1,2,2,0,0,0,0,1,0,0,1,1,0,0,2,0,1,0,
-2,1,1,1,1,2,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1,1,2,0,1,1,1,1,1,0,1,
-3,2,2,0,1,0,1,0,2,3,2,0,0,1,2,2,1,0,0,1,1,1,0,0,2,1,0,1,2,2,1,1,
-2,1,1,1,1,1,1,2,1,1,1,1,1,1,0,2,1,0,1,1,0,1,1,1,0,1,1,2,1,1,0,1,
-2,2,2,0,0,1,0,0,2,2,1,1,0,0,2,1,1,0,0,0,1,2,0,0,2,1,0,0,2,1,1,1,
-2,1,1,1,1,2,1,2,1,1,1,2,2,1,1,2,1,1,1,2,1,1,1,1,1,1,1,1,1,1,0,1,
-1,2,3,0,0,0,1,0,3,2,1,0,0,1,2,1,1,0,0,0,0,2,1,0,1,1,0,0,2,1,2,1,
-1,1,0,0,0,1,0,1,1,1,1,1,2,0,0,1,0,0,0,2,0,0,1,1,1,1,1,1,1,1,0,1,
-3,0,0,2,1,2,2,1,0,0,2,1,2,2,0,0,0,2,1,1,1,0,1,1,0,0,1,1,2,0,0,0,
-1,2,1,2,2,1,1,2,1,2,0,1,1,1,1,1,1,1,1,1,2,1,1,0,0,1,1,1,1,0,0,1,
-1,3,2,0,0,0,1,0,2,2,2,0,0,0,2,2,1,0,0,0,0,3,1,1,1,1,0,0,2,1,1,1,
-2,1,0,1,1,1,0,1,1,1,1,1,1,1,0,2,1,0,0,1,0,1,1,0,1,1,1,1,1,1,0,1,
-2,3,2,0,0,0,1,0,2,2,0,0,0,0,2,1,1,0,0,0,0,2,1,0,1,1,0,0,2,1,1,0,
-2,1,1,1,1,2,1,2,1,2,0,1,1,1,0,2,1,1,1,2,1,1,1,1,0,1,1,1,1,1,0,1,
-3,1,1,2,2,2,3,2,1,1,2,2,1,1,0,1,0,2,2,1,1,1,1,1,0,0,1,1,0,1,1,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-2,2,2,0,0,0,0,0,2,2,0,0,0,0,2,2,1,0,0,0,1,1,0,0,1,2,0,0,2,1,1,1,
-2,2,1,1,1,2,1,2,1,1,0,1,1,1,1,2,1,1,1,2,1,1,1,1,0,1,2,1,1,1,0,1,
-1,0,0,1,2,3,2,1,0,0,2,0,1,1,0,0,0,1,1,1,1,0,1,1,0,0,1,0,0,0,0,0,
-1,2,1,2,1,2,1,1,1,2,0,2,1,1,1,0,1,2,0,0,1,1,1,0,0,0,0,0,0,0,0,0,
-2,3,2,0,0,0,0,0,1,1,2,1,0,0,1,1,1,0,0,0,0,2,0,0,1,1,0,0,2,1,1,1,
-2,1,1,1,1,1,1,2,1,0,1,1,1,1,0,2,1,1,1,1,1,1,0,1,0,1,1,1,1,1,0,1,
-1,2,2,0,1,1,1,0,2,2,2,0,0,0,3,2,1,0,0,0,1,1,0,0,1,1,0,1,1,1,0,0,
-1,1,0,1,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,2,1,1,1,0,0,1,1,1,0,1,0,1,
-2,1,0,2,1,1,2,2,1,1,2,1,1,1,0,0,0,1,1,0,1,1,1,1,0,0,1,1,1,0,0,0,
-1,2,2,2,2,2,1,1,1,2,0,2,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,0,0,0,1,0,
-1,2,3,0,0,0,1,0,2,2,0,0,0,0,2,2,0,0,0,0,0,1,0,0,1,0,0,0,2,0,1,0,
-2,1,1,1,1,1,0,2,0,0,0,1,2,1,1,1,1,0,1,2,0,1,0,1,0,1,1,1,0,1,0,1,
-2,2,2,0,0,0,1,0,2,1,2,0,0,0,1,1,2,0,0,0,0,1,0,0,1,1,0,0,2,1,0,1,
-2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,2,0,1,1,1,1,1,0,1,
-1,2,2,0,0,0,1,0,2,2,2,0,0,0,1,1,0,0,0,0,0,1,1,0,2,0,0,1,1,1,0,1,
-1,0,1,1,1,1,1,1,0,1,1,1,1,0,0,1,0,0,1,1,0,1,0,1,1,1,1,1,0,0,0,1,
-1,0,0,1,0,1,2,1,0,0,1,1,1,2,0,0,0,1,1,0,1,0,1,1,0,0,1,0,0,0,0,0,
-0,2,1,2,1,1,1,1,1,2,0,2,0,1,1,0,1,2,1,0,1,1,1,0,0,0,0,0,0,1,0,0,
-2,1,1,0,1,2,0,0,1,1,1,0,0,0,1,1,0,0,0,0,0,1,0,0,1,0,0,0,2,1,0,1,
-2,2,1,1,1,1,1,2,1,1,0,1,1,1,1,2,1,1,1,2,1,1,0,1,0,1,1,1,1,1,0,1,
-1,2,2,0,0,0,0,0,1,1,0,0,0,0,2,1,0,0,0,0,0,2,0,0,2,2,0,0,2,0,0,1,
-2,1,1,1,1,1,1,1,0,1,1,0,1,1,0,1,0,0,0,1,1,1,1,0,0,1,1,1,1,0,0,1,
-1,1,2,0,0,3,1,0,2,1,1,1,0,0,1,1,1,0,0,0,1,1,0,0,0,1,0,0,1,0,1,0,
-1,2,1,0,1,1,1,2,1,1,0,1,1,1,1,1,0,0,0,1,1,1,1,1,0,1,0,0,0,1,0,0,
-2,1,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,0,1,0,0,0,0,2,0,0,0,
-2,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,1,1,1,1,2,1,1,0,0,1,1,1,1,1,0,1,
-2,1,1,1,2,1,1,1,0,1,1,2,1,0,0,0,0,1,1,1,1,0,1,0,0,0,0,1,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-1,1,0,1,1,1,1,1,0,0,1,1,2,1,0,0,0,1,1,0,0,0,1,1,0,0,1,0,1,0,0,0,
-1,2,1,1,1,1,1,1,1,1,0,1,0,1,1,1,1,1,1,0,1,1,1,0,0,0,0,0,0,1,0,0,
-2,0,0,0,1,1,1,1,0,0,1,1,0,0,0,0,0,1,1,1,2,0,0,1,0,0,1,0,1,0,0,0,
-0,1,1,1,1,1,1,1,1,2,0,1,1,1,1,0,1,1,1,0,1,1,1,0,0,0,0,0,0,0,0,0,
-1,0,0,1,1,1,1,1,0,0,2,1,0,1,0,0,0,1,0,1,0,0,0,0,0,0,1,0,0,0,0,0,
-0,1,1,1,1,1,1,0,1,1,0,1,0,1,1,0,1,1,0,0,1,1,1,0,0,0,0,0,0,0,0,0,
-1,0,0,1,1,1,0,0,0,0,1,0,2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,
-0,1,1,1,1,1,0,0,1,1,0,1,0,1,0,0,1,1,1,0,1,1,1,0,0,0,0,0,0,0,0,0,
-0,0,0,1,0,0,0,0,0,0,1,1,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,1,1,1,0,1,0,0,1,1,0,1,0,1,1,0,1,1,1,0,1,1,1,0,0,0,0,0,0,0,0,0,
-2,1,1,1,1,1,1,1,1,1,1,0,0,1,1,1,0,0,1,0,0,1,0,1,0,1,1,1,0,0,1,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-1,0,0,1,1,1,1,0,0,0,1,1,1,0,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,
-0,1,1,1,1,1,1,0,1,1,0,1,0,1,0,0,1,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0,
-)
-
-Latin2HungarianModel = {
- 'charToOrderMap': Latin2_HungarianCharToOrderMap,
- 'precedenceMatrix': HungarianLangModel,
- 'mTypicalPositiveRatio': 0.947368,
- 'keepEnglishLetter': True,
- 'charsetName': "ISO-8859-2"
-}
-
-Win1250HungarianModel = {
- 'charToOrderMap': win1250HungarianCharToOrderMap,
- 'precedenceMatrix': HungarianLangModel,
- 'mTypicalPositiveRatio': 0.947368,
- 'keepEnglishLetter': True,
- 'charsetName': "windows-1250"
-}
-
-# flake8: noqa
+######################## BEGIN LICENSE BLOCK ########################\r
+# The Original Code is Mozilla Communicator client code.\r
+#\r
+# The Initial Developer of the Original Code is\r
+# Netscape Communications Corporation.\r
+# Portions created by the Initial Developer are Copyright (C) 1998\r
+# the Initial Developer. All Rights Reserved.\r
+#\r
+# Contributor(s):\r
+# Mark Pilgrim - port to Python\r
+#\r
+# This library is free software; you can redistribute it and/or\r
+# modify it under the terms of the GNU Lesser General Public\r
+# License as published by the Free Software Foundation; either\r
+# version 2.1 of the License, or (at your option) any later version.\r
+#\r
+# This library is distributed in the hope that it will be useful,\r
+# but WITHOUT ANY WARRANTY; without even the implied warranty of\r
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU\r
+# Lesser General Public License for more details.\r
+#\r
+# You should have received a copy of the GNU Lesser General Public\r
+# License along with this library; if not, write to the Free Software\r
+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA\r
+# 02110-1301 USA\r
+######################### END LICENSE BLOCK #########################\r
+\r
+from . import constants\r
+\r
+# 255: Control characters that usually does not exist in any text\r
+# 254: Carriage/Return\r
+# 253: symbol (punctuation) that does not belong to word\r
+# 252: 0 - 9\r
+\r
+# Character Mapping Table:\r
+Latin2_HungarianCharToOrderMap = (\r
+255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00\r
+255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10\r
+253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20\r
+252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30\r
+253, 28, 40, 54, 45, 32, 50, 49, 38, 39, 53, 36, 41, 34, 35, 47,\r
+ 46, 71, 43, 33, 37, 57, 48, 64, 68, 55, 52,253,253,253,253,253,\r
+253, 2, 18, 26, 17, 1, 27, 12, 20, 9, 22, 7, 6, 13, 4, 8,\r
+ 23, 67, 10, 5, 3, 21, 19, 65, 62, 16, 11,253,253,253,253,253,\r
+159,160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,\r
+175,176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,\r
+191,192,193,194,195,196,197, 75,198,199,200,201,202,203,204,205,\r
+ 79,206,207,208,209,210,211,212,213,214,215,216,217,218,219,220,\r
+221, 51, 81,222, 78,223,224,225,226, 44,227,228,229, 61,230,231,\r
+232,233,234, 58,235, 66, 59,236,237,238, 60, 69, 63,239,240,241,\r
+ 82, 14, 74,242, 70, 80,243, 72,244, 15, 83, 77, 84, 30, 76, 85,\r
+245,246,247, 25, 73, 42, 24,248,249,250, 31, 56, 29,251,252,253,\r
+)\r
+\r
+win1250HungarianCharToOrderMap = (\r
+255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00\r
+255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10\r
+253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20\r
+252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30\r
+253, 28, 40, 54, 45, 32, 50, 49, 38, 39, 53, 36, 41, 34, 35, 47,\r
+ 46, 72, 43, 33, 37, 57, 48, 64, 68, 55, 52,253,253,253,253,253,\r
+253, 2, 18, 26, 17, 1, 27, 12, 20, 9, 22, 7, 6, 13, 4, 8,\r
+ 23, 67, 10, 5, 3, 21, 19, 65, 62, 16, 11,253,253,253,253,253,\r
+161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,176,\r
+177,178,179,180, 78,181, 69,182,183,184,185,186,187,188,189,190,\r
+191,192,193,194,195,196,197, 76,198,199,200,201,202,203,204,205,\r
+ 81,206,207,208,209,210,211,212,213,214,215,216,217,218,219,220,\r
+221, 51, 83,222, 80,223,224,225,226, 44,227,228,229, 61,230,231,\r
+232,233,234, 58,235, 66, 59,236,237,238, 60, 70, 63,239,240,241,\r
+ 84, 14, 75,242, 71, 82,243, 73,244, 15, 85, 79, 86, 30, 77, 87,\r
+245,246,247, 25, 74, 42, 24,248,249,250, 31, 56, 29,251,252,253,\r
+)\r
+\r
+# Model Table:\r
+# total sequences: 100%\r
+# first 512 sequences: 94.7368%\r
+# first 1024 sequences:5.2623%\r
+# rest sequences: 0.8894%\r
+# negative sequences: 0.0009%\r
+HungarianLangModel = (\r
+0,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,1,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,\r
+3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,2,2,3,3,1,1,2,2,2,2,2,1,2,\r
+3,2,2,3,3,3,3,3,2,3,3,3,3,3,3,1,2,3,3,3,3,2,3,3,1,1,3,3,0,1,1,1,\r
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,\r
+3,2,1,3,3,3,3,3,2,3,3,3,3,3,1,1,2,3,3,3,3,3,3,3,1,1,3,2,0,1,1,1,\r
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,\r
+3,3,3,3,3,3,3,3,3,3,3,1,1,2,3,3,3,1,3,3,3,3,3,1,3,3,2,2,0,3,2,3,\r
+0,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,\r
+3,3,3,3,3,3,2,3,3,3,2,3,3,2,3,3,3,3,3,2,3,3,2,2,3,2,3,2,0,3,2,2,\r
+0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,\r
+3,3,3,3,3,3,2,3,3,3,3,3,2,3,3,3,1,2,3,2,2,3,1,2,3,3,2,2,0,3,3,3,\r
+0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,\r
+3,3,3,3,3,3,3,3,3,3,2,2,3,3,3,3,3,3,2,3,3,3,3,2,3,3,3,3,0,2,3,2,\r
+0,0,0,1,1,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,\r
+3,3,3,3,3,3,3,3,3,3,3,1,1,1,3,3,2,1,3,2,2,3,2,1,3,2,2,1,0,3,3,1,\r
+0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,\r
+3,2,2,3,3,3,3,3,1,2,3,3,3,3,1,2,1,3,3,3,3,2,2,3,1,1,3,2,0,1,1,1,\r
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,\r
+3,3,3,3,3,3,3,3,2,2,3,3,3,3,3,2,1,3,3,3,3,3,2,2,1,3,3,3,0,1,1,2,\r
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,\r
+3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,2,3,3,2,3,3,3,2,0,3,2,3,\r
+0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,1,0,\r
+3,3,3,3,3,3,2,3,3,3,2,3,2,3,3,3,1,3,2,2,2,3,1,1,3,3,1,1,0,3,3,2,\r
+0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,\r
+3,3,3,3,3,3,3,2,3,3,3,2,3,2,3,3,3,2,3,3,3,3,3,1,2,3,2,2,0,2,2,2,\r
+0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,\r
+3,3,3,2,2,2,3,1,3,3,2,2,1,3,3,3,1,1,3,1,2,3,2,3,2,2,2,1,0,2,2,2,\r
+0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,\r
+3,1,1,3,3,3,3,3,1,2,3,3,3,3,1,2,1,3,3,3,2,2,3,2,1,0,3,2,0,1,1,0,\r
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
+3,1,1,3,3,3,3,3,1,2,3,3,3,3,1,1,0,3,3,3,3,0,2,3,0,0,2,1,0,1,0,0,\r
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
+3,3,3,3,3,3,2,2,3,3,2,2,2,2,3,3,0,1,2,3,2,3,2,2,3,2,1,2,0,2,2,2,\r
+0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,\r
+3,3,3,3,3,3,1,2,3,3,3,2,1,2,3,3,2,2,2,3,2,3,3,1,3,3,1,1,0,2,3,2,\r
+0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,\r
+3,3,3,1,2,2,2,2,3,3,3,1,1,1,3,3,1,1,3,1,1,3,2,1,2,3,1,1,0,2,2,2,\r
+0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,\r
+3,3,3,2,1,2,1,1,3,3,1,1,1,1,3,3,1,1,2,2,1,2,1,1,2,2,1,1,0,2,2,1,\r
+0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,\r
+3,3,3,1,1,2,1,1,3,3,1,0,1,1,3,3,2,0,1,1,2,3,1,0,2,2,1,0,0,1,3,2,\r
+0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,\r
+3,2,1,3,3,3,3,3,1,2,3,2,3,3,2,1,1,3,2,3,2,1,2,2,0,1,2,1,0,0,1,1,\r
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,\r
+3,3,3,3,2,2,2,2,3,1,2,2,1,1,3,3,0,3,2,1,2,3,2,1,3,3,1,1,0,2,1,3,\r
+0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,\r
+3,3,3,2,2,2,3,2,3,3,3,2,1,1,3,3,1,1,1,2,2,3,2,3,2,2,2,1,0,2,2,1,\r
+0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,\r
+1,0,0,3,3,3,3,3,0,0,3,3,2,3,0,0,0,2,3,3,1,0,1,2,0,0,1,1,0,0,0,0,\r
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
+3,1,2,3,3,3,3,3,1,2,3,3,2,2,1,1,0,3,3,2,2,1,2,2,1,0,2,2,0,1,1,1,\r
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
+3,3,2,2,1,3,1,2,3,3,2,2,1,1,2,2,1,1,1,1,3,2,1,1,1,1,2,1,0,1,2,1,\r
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,\r
+2,3,3,1,1,1,1,1,3,3,3,0,1,1,3,3,1,1,1,1,1,2,2,0,3,1,1,2,0,2,1,1,\r
+0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,\r
+3,1,0,1,2,1,2,2,0,1,2,3,1,2,0,0,0,2,1,1,1,1,1,2,0,0,1,1,0,0,0,0,\r
+1,2,1,2,2,2,1,2,1,2,0,2,0,2,2,1,1,2,1,1,2,1,1,1,0,1,0,0,0,1,1,0,\r
+1,1,1,2,3,2,3,3,0,1,2,2,3,1,0,1,0,2,1,2,2,0,1,1,0,0,1,1,0,0,0,0,\r
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
+1,0,0,3,3,2,2,1,0,0,3,2,3,2,0,0,0,1,1,3,0,0,1,1,0,0,2,1,0,0,0,0,\r
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
+3,1,1,2,2,3,3,1,0,1,3,2,3,1,1,1,0,1,1,1,1,1,3,1,0,0,2,2,0,0,0,0,\r
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
+3,1,1,1,2,2,2,1,0,1,2,3,3,2,0,0,0,2,1,1,1,2,1,1,1,0,1,1,1,0,0,0,\r
+1,2,2,2,2,2,1,1,1,2,0,2,1,1,1,1,1,2,1,1,1,1,1,1,0,1,1,1,0,0,1,1,\r
+3,2,2,1,0,0,1,1,2,2,0,3,0,1,2,1,1,0,0,1,1,1,0,1,1,1,1,0,2,1,1,1,\r
+2,2,1,1,1,2,1,2,1,1,1,1,1,1,1,2,1,1,1,2,3,1,1,1,1,1,1,1,1,1,0,1,\r
+2,3,3,0,1,0,0,0,3,3,1,0,0,1,2,2,1,0,0,0,0,2,0,0,1,1,1,0,2,1,1,1,\r
+2,1,1,1,1,1,1,2,1,1,0,1,1,0,1,1,1,0,1,2,1,1,0,1,1,1,1,1,1,1,0,1,\r
+2,3,3,0,1,0,0,0,2,2,0,0,0,0,1,2,2,0,0,0,0,1,0,0,1,1,0,0,2,0,1,0,\r
+2,1,1,1,1,2,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1,1,2,0,1,1,1,1,1,0,1,\r
+3,2,2,0,1,0,1,0,2,3,2,0,0,1,2,2,1,0,0,1,1,1,0,0,2,1,0,1,2,2,1,1,\r
+2,1,1,1,1,1,1,2,1,1,1,1,1,1,0,2,1,0,1,1,0,1,1,1,0,1,1,2,1,1,0,1,\r
+2,2,2,0,0,1,0,0,2,2,1,1,0,0,2,1,1,0,0,0,1,2,0,0,2,1,0,0,2,1,1,1,\r
+2,1,1,1,1,2,1,2,1,1,1,2,2,1,1,2,1,1,1,2,1,1,1,1,1,1,1,1,1,1,0,1,\r
+1,2,3,0,0,0,1,0,3,2,1,0,0,1,2,1,1,0,0,0,0,2,1,0,1,1,0,0,2,1,2,1,\r
+1,1,0,0,0,1,0,1,1,1,1,1,2,0,0,1,0,0,0,2,0,0,1,1,1,1,1,1,1,1,0,1,\r
+3,0,0,2,1,2,2,1,0,0,2,1,2,2,0,0,0,2,1,1,1,0,1,1,0,0,1,1,2,0,0,0,\r
+1,2,1,2,2,1,1,2,1,2,0,1,1,1,1,1,1,1,1,1,2,1,1,0,0,1,1,1,1,0,0,1,\r
+1,3,2,0,0,0,1,0,2,2,2,0,0,0,2,2,1,0,0,0,0,3,1,1,1,1,0,0,2,1,1,1,\r
+2,1,0,1,1,1,0,1,1,1,1,1,1,1,0,2,1,0,0,1,0,1,1,0,1,1,1,1,1,1,0,1,\r
+2,3,2,0,0,0,1,0,2,2,0,0,0,0,2,1,1,0,0,0,0,2,1,0,1,1,0,0,2,1,1,0,\r
+2,1,1,1,1,2,1,2,1,2,0,1,1,1,0,2,1,1,1,2,1,1,1,1,0,1,1,1,1,1,0,1,\r
+3,1,1,2,2,2,3,2,1,1,2,2,1,1,0,1,0,2,2,1,1,1,1,1,0,0,1,1,0,1,1,0,\r
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
+2,2,2,0,0,0,0,0,2,2,0,0,0,0,2,2,1,0,0,0,1,1,0,0,1,2,0,0,2,1,1,1,\r
+2,2,1,1,1,2,1,2,1,1,0,1,1,1,1,2,1,1,1,2,1,1,1,1,0,1,2,1,1,1,0,1,\r
+1,0,0,1,2,3,2,1,0,0,2,0,1,1,0,0,0,1,1,1,1,0,1,1,0,0,1,0,0,0,0,0,\r
+1,2,1,2,1,2,1,1,1,2,0,2,1,1,1,0,1,2,0,0,1,1,1,0,0,0,0,0,0,0,0,0,\r
+2,3,2,0,0,0,0,0,1,1,2,1,0,0,1,1,1,0,0,0,0,2,0,0,1,1,0,0,2,1,1,1,\r
+2,1,1,1,1,1,1,2,1,0,1,1,1,1,0,2,1,1,1,1,1,1,0,1,0,1,1,1,1,1,0,1,\r
+1,2,2,0,1,1,1,0,2,2,2,0,0,0,3,2,1,0,0,0,1,1,0,0,1,1,0,1,1,1,0,0,\r
+1,1,0,1,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,2,1,1,1,0,0,1,1,1,0,1,0,1,\r
+2,1,0,2,1,1,2,2,1,1,2,1,1,1,0,0,0,1,1,0,1,1,1,1,0,0,1,1,1,0,0,0,\r
+1,2,2,2,2,2,1,1,1,2,0,2,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,0,0,0,1,0,\r
+1,2,3,0,0,0,1,0,2,2,0,0,0,0,2,2,0,0,0,0,0,1,0,0,1,0,0,0,2,0,1,0,\r
+2,1,1,1,1,1,0,2,0,0,0,1,2,1,1,1,1,0,1,2,0,1,0,1,0,1,1,1,0,1,0,1,\r
+2,2,2,0,0,0,1,0,2,1,2,0,0,0,1,1,2,0,0,0,0,1,0,0,1,1,0,0,2,1,0,1,\r
+2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,2,0,1,1,1,1,1,0,1,\r
+1,2,2,0,0,0,1,0,2,2,2,0,0,0,1,1,0,0,0,0,0,1,1,0,2,0,0,1,1,1,0,1,\r
+1,0,1,1,1,1,1,1,0,1,1,1,1,0,0,1,0,0,1,1,0,1,0,1,1,1,1,1,0,0,0,1,\r
+1,0,0,1,0,1,2,1,0,0,1,1,1,2,0,0,0,1,1,0,1,0,1,1,0,0,1,0,0,0,0,0,\r
+0,2,1,2,1,1,1,1,1,2,0,2,0,1,1,0,1,2,1,0,1,1,1,0,0,0,0,0,0,1,0,0,\r
+2,1,1,0,1,2,0,0,1,1,1,0,0,0,1,1,0,0,0,0,0,1,0,0,1,0,0,0,2,1,0,1,\r
+2,2,1,1,1,1,1,2,1,1,0,1,1,1,1,2,1,1,1,2,1,1,0,1,0,1,1,1,1,1,0,1,\r
+1,2,2,0,0,0,0,0,1,1,0,0,0,0,2,1,0,0,0,0,0,2,0,0,2,2,0,0,2,0,0,1,\r
+2,1,1,1,1,1,1,1,0,1,1,0,1,1,0,1,0,0,0,1,1,1,1,0,0,1,1,1,1,0,0,1,\r
+1,1,2,0,0,3,1,0,2,1,1,1,0,0,1,1,1,0,0,0,1,1,0,0,0,1,0,0,1,0,1,0,\r
+1,2,1,0,1,1,1,2,1,1,0,1,1,1,1,1,0,0,0,1,1,1,1,1,0,1,0,0,0,1,0,0,\r
+2,1,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,0,1,0,0,0,0,2,0,0,0,\r
+2,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,1,1,1,1,2,1,1,0,0,1,1,1,1,1,0,1,\r
+2,1,1,1,2,1,1,1,0,1,1,2,1,0,0,0,0,1,1,1,1,0,1,0,0,0,0,1,0,0,0,0,\r
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
+1,1,0,1,1,1,1,1,0,0,1,1,2,1,0,0,0,1,1,0,0,0,1,1,0,0,1,0,1,0,0,0,\r
+1,2,1,1,1,1,1,1,1,1,0,1,0,1,1,1,1,1,1,0,1,1,1,0,0,0,0,0,0,1,0,0,\r
+2,0,0,0,1,1,1,1,0,0,1,1,0,0,0,0,0,1,1,1,2,0,0,1,0,0,1,0,1,0,0,0,\r
+0,1,1,1,1,1,1,1,1,2,0,1,1,1,1,0,1,1,1,0,1,1,1,0,0,0,0,0,0,0,0,0,\r
+1,0,0,1,1,1,1,1,0,0,2,1,0,1,0,0,0,1,0,1,0,0,0,0,0,0,1,0,0,0,0,0,\r
+0,1,1,1,1,1,1,0,1,1,0,1,0,1,1,0,1,1,0,0,1,1,1,0,0,0,0,0,0,0,0,0,\r
+1,0,0,1,1,1,0,0,0,0,1,0,2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,\r
+0,1,1,1,1,1,0,0,1,1,0,1,0,1,0,0,1,1,1,0,1,1,1,0,0,0,0,0,0,0,0,0,\r
+0,0,0,1,0,0,0,0,0,0,1,1,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
+0,1,1,1,0,1,0,0,1,1,0,1,0,1,1,0,1,1,1,0,1,1,1,0,0,0,0,0,0,0,0,0,\r
+2,1,1,1,1,1,1,1,1,1,1,0,0,1,1,1,0,0,1,0,0,1,0,1,0,1,1,1,0,0,1,0,\r
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
+1,0,0,1,1,1,1,0,0,0,1,1,1,0,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,\r
+0,1,1,1,1,1,1,0,1,1,0,1,0,1,0,0,1,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0,\r
+)\r
+\r
+Latin2HungarianModel = {\r
+ 'charToOrderMap': Latin2_HungarianCharToOrderMap,\r
+ 'precedenceMatrix': HungarianLangModel,\r
+ 'mTypicalPositiveRatio': 0.947368,\r
+ 'keepEnglishLetter': True,\r
+ 'charsetName': "ISO-8859-2"\r
+}\r
+\r
+Win1250HungarianModel = {\r
+ 'charToOrderMap': win1250HungarianCharToOrderMap,\r
+ 'precedenceMatrix': HungarianLangModel,\r
+ 'mTypicalPositiveRatio': 0.947368,\r
+ 'keepEnglishLetter': True,\r
+ 'charsetName': "windows-1250"\r
+}\r
+\r
+# flake8: noqa\r
-######################## BEGIN LICENSE BLOCK ########################
-# The Original Code is Mozilla Communicator client code.
-#
-# The Initial Developer of the Original Code is
-# Netscape Communications Corporation.
-# Portions created by the Initial Developer are Copyright (C) 1998
-# the Initial Developer. All Rights Reserved.
-#
-# Contributor(s):
-# Mark Pilgrim - port to Python
-#
-# This library is free software; you can redistribute it and/or
-# modify it under the terms of the GNU Lesser General Public
-# License as published by the Free Software Foundation; either
-# version 2.1 of the License, or (at your option) any later version.
-#
-# This library is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-# Lesser General Public License for more details.
-#
-# You should have received a copy of the GNU Lesser General Public
-# License along with this library; if not, write to the Free Software
-# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
-# 02110-1301 USA
-######################### END LICENSE BLOCK #########################
-
-# 255: Control characters that usually does not exist in any text
-# 254: Carriage/Return
-# 253: symbol (punctuation) that does not belong to word
-# 252: 0 - 9
-
-# The following result for thai was collected from a limited sample (1M).
-
-# Character Mapping Table:
-TIS620CharToOrderMap = (
-255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00
-255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10
-253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20
-252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30
-253,182,106,107,100,183,184,185,101, 94,186,187,108,109,110,111, # 40
-188,189,190, 89, 95,112,113,191,192,193,194,253,253,253,253,253, # 50
-253, 64, 72, 73,114, 74,115,116,102, 81,201,117, 90,103, 78, 82, # 60
- 96,202, 91, 79, 84,104,105, 97, 98, 92,203,253,253,253,253,253, # 70
-209,210,211,212,213, 88,214,215,216,217,218,219,220,118,221,222,
-223,224, 99, 85, 83,225,226,227,228,229,230,231,232,233,234,235,
-236, 5, 30,237, 24,238, 75, 8, 26, 52, 34, 51,119, 47, 58, 57,
- 49, 53, 55, 43, 20, 19, 44, 14, 48, 3, 17, 25, 39, 62, 31, 54,
- 45, 9, 16, 2, 61, 15,239, 12, 42, 46, 18, 21, 76, 4, 66, 63,
- 22, 10, 1, 36, 23, 13, 40, 27, 32, 35, 86,240,241,242,243,244,
- 11, 28, 41, 29, 33,245, 50, 37, 6, 7, 67, 77, 38, 93,246,247,
- 68, 56, 59, 65, 69, 60, 70, 80, 71, 87,248,249,250,251,252,253,
-)
-
-# Model Table:
-# total sequences: 100%
-# first 512 sequences: 92.6386%
-# first 1024 sequences:7.3177%
-# rest sequences: 1.0230%
-# negative sequences: 0.0436%
-ThaiLangModel = (
-0,1,3,3,3,3,0,0,3,3,0,3,3,0,3,3,3,3,3,3,3,3,0,0,3,3,3,0,3,3,3,3,
-0,3,3,0,0,0,1,3,0,3,3,2,3,3,0,1,2,3,3,3,3,0,2,0,2,0,0,3,2,1,2,2,
-3,0,3,3,2,3,0,0,3,3,0,3,3,0,3,3,3,3,3,3,3,3,3,0,3,2,3,0,2,2,2,3,
-0,2,3,0,0,0,0,1,0,1,2,3,1,1,3,2,2,0,1,1,0,0,1,0,0,0,0,0,0,0,1,1,
-3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,2,2,2,2,2,2,2,3,3,2,3,2,3,3,2,2,2,
-3,1,2,3,0,3,3,2,2,1,2,3,3,1,2,0,1,3,0,1,0,0,1,0,0,0,0,0,0,0,1,1,
-3,3,2,2,3,3,3,3,1,2,3,3,3,3,3,2,2,2,2,3,3,2,2,3,3,2,2,3,2,3,2,2,
-3,3,1,2,3,1,2,2,3,3,1,0,2,1,0,0,3,1,2,1,0,0,1,0,0,0,0,0,0,1,0,1,
-3,3,3,3,3,3,2,2,3,3,3,3,2,3,2,2,3,3,2,2,3,2,2,2,2,1,1,3,1,2,1,1,
-3,2,1,0,2,1,0,1,0,1,1,0,1,1,0,0,1,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,
-3,3,3,2,3,2,3,3,2,2,3,2,3,3,2,3,1,1,2,3,2,2,2,3,2,2,2,2,2,1,2,1,
-2,2,1,1,3,3,2,1,0,1,2,2,0,1,3,0,0,0,1,1,0,0,0,0,0,2,3,0,0,2,1,1,
-3,3,2,3,3,2,0,0,3,3,0,3,3,0,2,2,3,1,2,2,1,1,1,0,2,2,2,0,2,2,1,1,
-0,2,1,0,2,0,0,2,0,1,0,0,1,0,0,0,1,1,1,1,0,0,0,0,0,0,0,0,0,0,1,0,
-3,3,2,3,3,2,0,0,3,3,0,2,3,0,2,1,2,2,2,2,1,2,0,0,2,2,2,0,2,2,1,1,
-0,2,1,0,2,0,0,2,0,1,1,0,1,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,
-3,3,2,3,2,3,2,0,2,2,1,3,2,1,3,2,1,2,3,2,2,3,0,2,3,2,2,1,2,2,2,2,
-1,2,2,0,0,0,0,2,0,1,2,0,1,1,1,0,1,0,3,1,1,0,0,0,0,0,0,0,0,0,1,0,
-3,3,2,3,3,2,3,2,2,2,3,2,2,3,2,2,1,2,3,2,2,3,1,3,2,2,2,3,2,2,2,3,
-3,2,1,3,0,1,1,1,0,2,1,1,1,1,1,0,1,0,1,1,0,0,0,0,0,0,0,0,0,2,0,0,
-1,0,0,3,0,3,3,3,3,3,0,0,3,0,2,2,3,3,3,3,3,0,0,0,1,1,3,0,0,0,0,2,
-0,0,1,0,0,0,0,0,0,0,2,3,0,0,0,3,0,2,0,0,0,0,0,3,0,0,0,0,0,0,0,0,
-2,0,3,3,3,3,0,0,2,3,0,0,3,0,3,3,2,3,3,3,3,3,0,0,3,3,3,0,0,0,3,3,
-0,0,3,0,0,0,0,2,0,0,2,1,1,3,0,0,1,0,0,2,3,0,1,0,0,0,0,0,0,0,1,0,
-3,3,3,3,2,3,3,3,3,3,3,3,1,2,1,3,3,2,2,1,2,2,2,3,1,1,2,0,2,1,2,1,
-2,2,1,0,0,0,1,1,0,1,0,1,1,0,0,0,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,
-3,0,2,1,2,3,3,3,0,2,0,2,2,0,2,1,3,2,2,1,2,1,0,0,2,2,1,0,2,1,2,2,
-0,1,1,0,0,0,0,1,0,1,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,
-3,3,3,3,2,1,3,3,1,1,3,0,2,3,1,1,3,2,1,1,2,0,2,2,3,2,1,1,1,1,1,2,
-3,0,0,1,3,1,2,1,2,0,3,0,0,0,1,0,3,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,
-3,3,1,1,3,2,3,3,3,1,3,2,1,3,2,1,3,2,2,2,2,1,3,3,1,2,1,3,1,2,3,0,
-2,1,1,3,2,2,2,1,2,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,
-3,3,2,3,2,3,3,2,3,2,3,2,3,3,2,1,0,3,2,2,2,1,2,2,2,1,2,2,1,2,1,1,
-2,2,2,3,0,1,3,1,1,1,1,0,1,1,0,2,1,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,
-3,3,3,3,2,3,2,2,1,1,3,2,3,2,3,2,0,3,2,2,1,2,0,2,2,2,1,2,2,2,2,1,
-3,2,1,2,2,1,0,2,0,1,0,0,1,1,0,0,0,0,0,1,1,0,1,0,0,0,0,0,0,0,0,1,
-3,3,3,3,3,2,3,1,2,3,3,2,2,3,0,1,1,2,0,3,3,2,2,3,0,1,1,3,0,0,0,0,
-3,1,0,3,3,0,2,0,2,1,0,0,3,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-3,3,3,2,3,2,3,3,0,1,3,1,1,2,1,2,1,1,3,1,1,0,2,3,1,1,1,1,1,1,1,1,
-3,1,1,2,2,2,2,1,1,1,0,0,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,
-3,2,2,1,1,2,1,3,3,2,3,2,2,3,2,2,3,1,2,2,1,2,0,3,2,1,2,2,2,2,2,1,
-3,2,1,2,2,2,1,1,1,1,0,0,1,1,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,
-3,3,3,3,3,3,3,3,1,3,3,0,2,1,0,3,2,0,0,3,1,0,1,1,0,1,0,0,0,0,0,1,
-1,0,0,1,0,3,2,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-3,0,2,2,2,3,0,0,1,3,0,3,2,0,3,2,2,3,3,3,3,3,1,0,2,2,2,0,2,2,1,2,
-0,2,3,0,0,0,0,1,0,1,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,
-3,0,2,3,1,3,3,2,3,3,0,3,3,0,3,2,2,3,2,3,3,3,0,0,2,2,3,0,1,1,1,3,
-0,0,3,0,0,0,2,2,0,1,3,0,1,2,2,2,3,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,
-3,2,3,3,2,0,3,3,2,2,3,1,3,2,1,3,2,0,1,2,2,0,2,3,2,1,0,3,0,0,0,0,
-3,0,0,2,3,1,3,0,0,3,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-3,1,3,2,2,2,1,2,0,1,3,1,1,3,1,3,0,0,2,1,1,1,1,2,1,1,1,0,2,1,0,1,
-1,2,0,0,0,3,1,1,0,0,0,0,1,0,1,0,0,1,0,1,0,0,0,0,0,3,1,0,0,0,1,0,
-3,3,3,3,2,2,2,2,2,1,3,1,1,1,2,0,1,1,2,1,2,1,3,2,0,0,3,1,1,1,1,1,
-3,1,0,2,3,0,0,0,3,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,2,3,0,3,3,0,2,0,0,0,0,0,0,0,3,0,0,1,0,0,0,0,0,0,0,0,0,0,0,
-0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,2,3,1,3,0,0,1,2,0,0,2,0,3,3,2,3,3,3,2,3,0,0,2,2,2,0,0,0,2,2,
-0,0,1,0,0,0,0,3,0,0,0,0,2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,
-0,0,0,3,0,2,0,0,0,0,0,0,0,0,0,0,1,2,3,1,3,3,0,0,1,0,3,0,0,0,0,0,
-0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-3,3,1,2,3,1,2,3,1,0,3,0,2,2,1,0,2,1,1,2,0,1,0,0,1,1,1,1,0,1,0,0,
-1,0,0,0,0,1,1,0,3,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-3,3,3,3,2,1,0,1,1,1,3,1,2,2,2,2,2,2,1,1,1,1,0,3,1,0,1,3,1,1,1,1,
-1,1,0,2,0,1,3,1,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,2,0,1,
-3,0,2,2,1,3,3,2,3,3,0,1,1,0,2,2,1,2,1,3,3,1,0,0,3,2,0,0,0,0,2,1,
-0,1,0,0,0,0,1,2,0,1,1,3,1,1,2,2,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,
-0,0,3,0,0,1,0,0,0,3,0,0,3,0,3,1,0,1,1,1,3,2,0,0,0,3,0,0,0,0,2,0,
-0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,2,0,0,0,0,0,0,0,0,0,
-3,3,1,3,2,1,3,3,1,2,2,0,1,2,1,0,1,2,0,0,0,0,0,3,0,0,0,3,0,0,0,0,
-3,0,0,1,1,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-3,0,1,2,0,3,3,3,2,2,0,1,1,0,1,3,0,0,0,2,2,0,0,0,0,3,1,0,1,0,0,0,
-0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-3,0,2,3,1,2,0,0,2,1,0,3,1,0,1,2,0,1,1,1,1,3,0,0,3,1,1,0,2,2,1,1,
-0,2,0,0,0,0,0,1,0,1,0,0,1,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-3,0,0,3,1,2,0,0,2,2,0,1,2,0,1,0,1,3,1,2,1,0,0,0,2,0,3,0,0,0,1,0,
-0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-3,0,1,1,2,2,0,0,0,2,0,2,1,0,1,1,0,1,1,1,2,1,0,0,1,1,1,0,2,1,1,1,
-0,1,1,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,1,
-0,0,0,2,0,1,3,1,1,1,1,0,0,0,0,3,2,0,1,0,0,0,1,2,0,0,0,1,0,0,0,0,
-0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,3,3,3,3,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-1,0,2,3,2,2,0,0,0,1,0,0,0,0,2,3,2,1,2,2,3,0,0,0,2,3,1,0,0,0,1,1,
-0,0,1,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,1,1,0,1,0,0,0,0,0,0,0,0,0,
-3,3,2,2,0,1,0,0,0,0,2,0,2,0,1,0,0,0,1,1,0,0,0,2,1,0,1,0,1,1,0,0,
-0,1,0,2,0,0,1,0,3,0,1,0,0,0,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-3,3,1,0,0,1,0,0,0,0,0,1,1,2,0,0,0,0,1,0,0,1,3,1,0,0,0,0,1,1,0,0,
-0,1,0,0,0,0,3,0,0,0,0,0,0,3,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,
-3,3,1,1,1,1,2,3,0,0,2,1,1,1,1,1,0,2,1,1,0,0,0,2,1,0,1,2,1,1,0,1,
-2,1,0,3,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-1,3,1,0,0,0,0,0,0,0,3,0,0,0,3,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,1,
-0,0,0,2,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-3,3,2,0,0,0,0,0,0,1,2,1,0,1,1,0,2,0,0,1,0,0,2,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,2,0,0,0,1,3,0,1,0,0,0,2,0,0,0,0,0,0,0,1,2,0,0,0,0,0,
-3,3,0,0,1,1,2,0,0,1,2,1,0,1,1,1,0,1,1,0,0,2,1,1,0,1,0,0,1,1,1,0,
-0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,
-2,2,2,1,0,0,0,0,1,0,0,0,0,3,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,
-2,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-2,3,0,0,1,1,0,0,0,2,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-1,1,0,1,2,0,1,2,0,0,1,1,0,2,0,1,0,0,1,0,0,0,0,1,0,0,0,2,0,0,0,0,
-1,0,0,1,0,1,1,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,1,0,0,0,0,0,0,0,1,1,0,1,1,0,2,1,3,0,0,0,0,1,1,0,0,0,0,0,0,0,3,
-1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-2,0,1,0,1,0,0,2,0,0,2,0,0,1,1,2,0,0,1,1,0,0,0,1,0,0,0,1,1,0,0,0,
-1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,
-1,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,1,1,0,0,0,
-2,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-2,0,0,0,0,2,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,3,0,0,0,
-2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,1,0,0,0,0,
-1,0,0,0,0,0,0,0,0,1,0,0,0,0,2,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,1,1,0,0,2,1,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-)
-
-TIS620ThaiModel = {
- 'charToOrderMap': TIS620CharToOrderMap,
- 'precedenceMatrix': ThaiLangModel,
- 'mTypicalPositiveRatio': 0.926386,
- 'keepEnglishLetter': False,
- 'charsetName': "TIS-620"
-}
-
-# flake8: noqa
+######################## BEGIN LICENSE BLOCK ########################\r
+# The Original Code is Mozilla Communicator client code.\r
+#\r
+# The Initial Developer of the Original Code is\r
+# Netscape Communications Corporation.\r
+# Portions created by the Initial Developer are Copyright (C) 1998\r
+# the Initial Developer. All Rights Reserved.\r
+#\r
+# Contributor(s):\r
+# Mark Pilgrim - port to Python\r
+#\r
+# This library is free software; you can redistribute it and/or\r
+# modify it under the terms of the GNU Lesser General Public\r
+# License as published by the Free Software Foundation; either\r
+# version 2.1 of the License, or (at your option) any later version.\r
+#\r
+# This library is distributed in the hope that it will be useful,\r
+# but WITHOUT ANY WARRANTY; without even the implied warranty of\r
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU\r
+# Lesser General Public License for more details.\r
+#\r
+# You should have received a copy of the GNU Lesser General Public\r
+# License along with this library; if not, write to the Free Software\r
+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA\r
+# 02110-1301 USA\r
+######################### END LICENSE BLOCK #########################\r
+\r
+# 255: Control characters that usually does not exist in any text\r
+# 254: Carriage/Return\r
+# 253: symbol (punctuation) that does not belong to word\r
+# 252: 0 - 9\r
+\r
+# The following result for thai was collected from a limited sample (1M).\r
+\r
+# Character Mapping Table:\r
+TIS620CharToOrderMap = (\r
+255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00\r
+255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10\r
+253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20\r
+252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30\r
+253,182,106,107,100,183,184,185,101, 94,186,187,108,109,110,111, # 40\r
+188,189,190, 89, 95,112,113,191,192,193,194,253,253,253,253,253, # 50\r
+253, 64, 72, 73,114, 74,115,116,102, 81,201,117, 90,103, 78, 82, # 60\r
+ 96,202, 91, 79, 84,104,105, 97, 98, 92,203,253,253,253,253,253, # 70\r
+209,210,211,212,213, 88,214,215,216,217,218,219,220,118,221,222,\r
+223,224, 99, 85, 83,225,226,227,228,229,230,231,232,233,234,235,\r
+236, 5, 30,237, 24,238, 75, 8, 26, 52, 34, 51,119, 47, 58, 57,\r
+ 49, 53, 55, 43, 20, 19, 44, 14, 48, 3, 17, 25, 39, 62, 31, 54,\r
+ 45, 9, 16, 2, 61, 15,239, 12, 42, 46, 18, 21, 76, 4, 66, 63,\r
+ 22, 10, 1, 36, 23, 13, 40, 27, 32, 35, 86,240,241,242,243,244,\r
+ 11, 28, 41, 29, 33,245, 50, 37, 6, 7, 67, 77, 38, 93,246,247,\r
+ 68, 56, 59, 65, 69, 60, 70, 80, 71, 87,248,249,250,251,252,253,\r
+)\r
+\r
+# Model Table:\r
+# total sequences: 100%\r
+# first 512 sequences: 92.6386%\r
+# first 1024 sequences:7.3177%\r
+# rest sequences: 1.0230%\r
+# negative sequences: 0.0436%\r
+ThaiLangModel = (\r
+0,1,3,3,3,3,0,0,3,3,0,3,3,0,3,3,3,3,3,3,3,3,0,0,3,3,3,0,3,3,3,3,\r
+0,3,3,0,0,0,1,3,0,3,3,2,3,3,0,1,2,3,3,3,3,0,2,0,2,0,0,3,2,1,2,2,\r
+3,0,3,3,2,3,0,0,3,3,0,3,3,0,3,3,3,3,3,3,3,3,3,0,3,2,3,0,2,2,2,3,\r
+0,2,3,0,0,0,0,1,0,1,2,3,1,1,3,2,2,0,1,1,0,0,1,0,0,0,0,0,0,0,1,1,\r
+3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,2,2,2,2,2,2,2,3,3,2,3,2,3,3,2,2,2,\r
+3,1,2,3,0,3,3,2,2,1,2,3,3,1,2,0,1,3,0,1,0,0,1,0,0,0,0,0,0,0,1,1,\r
+3,3,2,2,3,3,3,3,1,2,3,3,3,3,3,2,2,2,2,3,3,2,2,3,3,2,2,3,2,3,2,2,\r
+3,3,1,2,3,1,2,2,3,3,1,0,2,1,0,0,3,1,2,1,0,0,1,0,0,0,0,0,0,1,0,1,\r
+3,3,3,3,3,3,2,2,3,3,3,3,2,3,2,2,3,3,2,2,3,2,2,2,2,1,1,3,1,2,1,1,\r
+3,2,1,0,2,1,0,1,0,1,1,0,1,1,0,0,1,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,\r
+3,3,3,2,3,2,3,3,2,2,3,2,3,3,2,3,1,1,2,3,2,2,2,3,2,2,2,2,2,1,2,1,\r
+2,2,1,1,3,3,2,1,0,1,2,2,0,1,3,0,0,0,1,1,0,0,0,0,0,2,3,0,0,2,1,1,\r
+3,3,2,3,3,2,0,0,3,3,0,3,3,0,2,2,3,1,2,2,1,1,1,0,2,2,2,0,2,2,1,1,\r
+0,2,1,0,2,0,0,2,0,1,0,0,1,0,0,0,1,1,1,1,0,0,0,0,0,0,0,0,0,0,1,0,\r
+3,3,2,3,3,2,0,0,3,3,0,2,3,0,2,1,2,2,2,2,1,2,0,0,2,2,2,0,2,2,1,1,\r
+0,2,1,0,2,0,0,2,0,1,1,0,1,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,\r
+3,3,2,3,2,3,2,0,2,2,1,3,2,1,3,2,1,2,3,2,2,3,0,2,3,2,2,1,2,2,2,2,\r
+1,2,2,0,0,0,0,2,0,1,2,0,1,1,1,0,1,0,3,1,1,0,0,0,0,0,0,0,0,0,1,0,\r
+3,3,2,3,3,2,3,2,2,2,3,2,2,3,2,2,1,2,3,2,2,3,1,3,2,2,2,3,2,2,2,3,\r
+3,2,1,3,0,1,1,1,0,2,1,1,1,1,1,0,1,0,1,1,0,0,0,0,0,0,0,0,0,2,0,0,\r
+1,0,0,3,0,3,3,3,3,3,0,0,3,0,2,2,3,3,3,3,3,0,0,0,1,1,3,0,0,0,0,2,\r
+0,0,1,0,0,0,0,0,0,0,2,3,0,0,0,3,0,2,0,0,0,0,0,3,0,0,0,0,0,0,0,0,\r
+2,0,3,3,3,3,0,0,2,3,0,0,3,0,3,3,2,3,3,3,3,3,0,0,3,3,3,0,0,0,3,3,\r
+0,0,3,0,0,0,0,2,0,0,2,1,1,3,0,0,1,0,0,2,3,0,1,0,0,0,0,0,0,0,1,0,\r
+3,3,3,3,2,3,3,3,3,3,3,3,1,2,1,3,3,2,2,1,2,2,2,3,1,1,2,0,2,1,2,1,\r
+2,2,1,0,0,0,1,1,0,1,0,1,1,0,0,0,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,\r
+3,0,2,1,2,3,3,3,0,2,0,2,2,0,2,1,3,2,2,1,2,1,0,0,2,2,1,0,2,1,2,2,\r
+0,1,1,0,0,0,0,1,0,1,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
+3,3,3,3,2,1,3,3,1,1,3,0,2,3,1,1,3,2,1,1,2,0,2,2,3,2,1,1,1,1,1,2,\r
+3,0,0,1,3,1,2,1,2,0,3,0,0,0,1,0,3,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,\r
+3,3,1,1,3,2,3,3,3,1,3,2,1,3,2,1,3,2,2,2,2,1,3,3,1,2,1,3,1,2,3,0,\r
+2,1,1,3,2,2,2,1,2,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,\r
+3,3,2,3,2,3,3,2,3,2,3,2,3,3,2,1,0,3,2,2,2,1,2,2,2,1,2,2,1,2,1,1,\r
+2,2,2,3,0,1,3,1,1,1,1,0,1,1,0,2,1,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
+3,3,3,3,2,3,2,2,1,1,3,2,3,2,3,2,0,3,2,2,1,2,0,2,2,2,1,2,2,2,2,1,\r
+3,2,1,2,2,1,0,2,0,1,0,0,1,1,0,0,0,0,0,1,1,0,1,0,0,0,0,0,0,0,0,1,\r
+3,3,3,3,3,2,3,1,2,3,3,2,2,3,0,1,1,2,0,3,3,2,2,3,0,1,1,3,0,0,0,0,\r
+3,1,0,3,3,0,2,0,2,1,0,0,3,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
+3,3,3,2,3,2,3,3,0,1,3,1,1,2,1,2,1,1,3,1,1,0,2,3,1,1,1,1,1,1,1,1,\r
+3,1,1,2,2,2,2,1,1,1,0,0,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,\r
+3,2,2,1,1,2,1,3,3,2,3,2,2,3,2,2,3,1,2,2,1,2,0,3,2,1,2,2,2,2,2,1,\r
+3,2,1,2,2,2,1,1,1,1,0,0,1,1,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
+3,3,3,3,3,3,3,3,1,3,3,0,2,1,0,3,2,0,0,3,1,0,1,1,0,1,0,0,0,0,0,1,\r
+1,0,0,1,0,3,2,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
+3,0,2,2,2,3,0,0,1,3,0,3,2,0,3,2,2,3,3,3,3,3,1,0,2,2,2,0,2,2,1,2,\r
+0,2,3,0,0,0,0,1,0,1,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,\r
+3,0,2,3,1,3,3,2,3,3,0,3,3,0,3,2,2,3,2,3,3,3,0,0,2,2,3,0,1,1,1,3,\r
+0,0,3,0,0,0,2,2,0,1,3,0,1,2,2,2,3,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,\r
+3,2,3,3,2,0,3,3,2,2,3,1,3,2,1,3,2,0,1,2,2,0,2,3,2,1,0,3,0,0,0,0,\r
+3,0,0,2,3,1,3,0,0,3,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
+3,1,3,2,2,2,1,2,0,1,3,1,1,3,1,3,0,0,2,1,1,1,1,2,1,1,1,0,2,1,0,1,\r
+1,2,0,0,0,3,1,1,0,0,0,0,1,0,1,0,0,1,0,1,0,0,0,0,0,3,1,0,0,0,1,0,\r
+3,3,3,3,2,2,2,2,2,1,3,1,1,1,2,0,1,1,2,1,2,1,3,2,0,0,3,1,1,1,1,1,\r
+3,1,0,2,3,0,0,0,3,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
+0,0,0,2,3,0,3,3,0,2,0,0,0,0,0,0,0,3,0,0,1,0,0,0,0,0,0,0,0,0,0,0,\r
+0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
+0,0,2,3,1,3,0,0,1,2,0,0,2,0,3,3,2,3,3,3,2,3,0,0,2,2,2,0,0,0,2,2,\r
+0,0,1,0,0,0,0,3,0,0,0,0,2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,\r
+0,0,0,3,0,2,0,0,0,0,0,0,0,0,0,0,1,2,3,1,3,3,0,0,1,0,3,0,0,0,0,0,\r
+0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
+3,3,1,2,3,1,2,3,1,0,3,0,2,2,1,0,2,1,1,2,0,1,0,0,1,1,1,1,0,1,0,0,\r
+1,0,0,0,0,1,1,0,3,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
+3,3,3,3,2,1,0,1,1,1,3,1,2,2,2,2,2,2,1,1,1,1,0,3,1,0,1,3,1,1,1,1,\r
+1,1,0,2,0,1,3,1,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,2,0,1,\r
+3,0,2,2,1,3,3,2,3,3,0,1,1,0,2,2,1,2,1,3,3,1,0,0,3,2,0,0,0,0,2,1,\r
+0,1,0,0,0,0,1,2,0,1,1,3,1,1,2,2,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,\r
+0,0,3,0,0,1,0,0,0,3,0,0,3,0,3,1,0,1,1,1,3,2,0,0,0,3,0,0,0,0,2,0,\r
+0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,2,0,0,0,0,0,0,0,0,0,\r
+3,3,1,3,2,1,3,3,1,2,2,0,1,2,1,0,1,2,0,0,0,0,0,3,0,0,0,3,0,0,0,0,\r
+3,0,0,1,1,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
+3,0,1,2,0,3,3,3,2,2,0,1,1,0,1,3,0,0,0,2,2,0,0,0,0,3,1,0,1,0,0,0,\r
+0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
+3,0,2,3,1,2,0,0,2,1,0,3,1,0,1,2,0,1,1,1,1,3,0,0,3,1,1,0,2,2,1,1,\r
+0,2,0,0,0,0,0,1,0,1,0,0,1,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
+3,0,0,3,1,2,0,0,2,2,0,1,2,0,1,0,1,3,1,2,1,0,0,0,2,0,3,0,0,0,1,0,\r
+0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
+3,0,1,1,2,2,0,0,0,2,0,2,1,0,1,1,0,1,1,1,2,1,0,0,1,1,1,0,2,1,1,1,\r
+0,1,1,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,1,\r
+0,0,0,2,0,1,3,1,1,1,1,0,0,0,0,3,2,0,1,0,0,0,1,2,0,0,0,1,0,0,0,0,\r
+0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
+0,0,0,0,0,3,3,3,3,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,\r
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
+1,0,2,3,2,2,0,0,0,1,0,0,0,0,2,3,2,1,2,2,3,0,0,0,2,3,1,0,0,0,1,1,\r
+0,0,1,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,1,1,0,1,0,0,0,0,0,0,0,0,0,\r
+3,3,2,2,0,1,0,0,0,0,2,0,2,0,1,0,0,0,1,1,0,0,0,2,1,0,1,0,1,1,0,0,\r
+0,1,0,2,0,0,1,0,3,0,1,0,0,0,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
+3,3,1,0,0,1,0,0,0,0,0,1,1,2,0,0,0,0,1,0,0,1,3,1,0,0,0,0,1,1,0,0,\r
+0,1,0,0,0,0,3,0,0,0,0,0,0,3,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,\r
+3,3,1,1,1,1,2,3,0,0,2,1,1,1,1,1,0,2,1,1,0,0,0,2,1,0,1,2,1,1,0,1,\r
+2,1,0,3,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
+1,3,1,0,0,0,0,0,0,0,3,0,0,0,3,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,1,\r
+0,0,0,2,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
+3,3,2,0,0,0,0,0,0,1,2,1,0,1,1,0,2,0,0,1,0,0,2,0,0,0,0,0,0,0,0,0,\r
+0,0,0,0,0,0,2,0,0,0,1,3,0,1,0,0,0,2,0,0,0,0,0,0,0,1,2,0,0,0,0,0,\r
+3,3,0,0,1,1,2,0,0,1,2,1,0,1,1,1,0,1,1,0,0,2,1,1,0,1,0,0,1,1,1,0,\r
+0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
+2,2,2,1,0,0,0,0,1,0,0,0,0,3,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,\r
+2,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
+2,3,0,0,1,1,0,0,0,2,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
+0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
+3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
+1,1,0,1,2,0,1,2,0,0,1,1,0,2,0,1,0,0,1,0,0,0,0,1,0,0,0,2,0,0,0,0,\r
+1,0,0,1,0,1,1,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
+0,1,0,0,0,0,0,0,0,1,1,0,1,1,0,2,1,3,0,0,0,0,1,1,0,0,0,0,0,0,0,3,\r
+1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,\r
+0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
+2,0,1,0,1,0,0,2,0,0,2,0,0,1,1,2,0,0,1,1,0,0,0,1,0,0,0,1,1,0,0,0,\r
+1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,\r
+1,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,\r
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
+3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,1,1,0,0,0,\r
+2,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,\r
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
+2,0,0,0,0,2,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,\r
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
+2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,3,0,0,0,\r
+2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,1,0,0,0,0,\r
+1,0,0,0,0,0,0,0,0,1,0,0,0,0,2,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,\r
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
+0,0,1,1,0,0,2,1,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
+0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
+2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
+)\r
+\r
+TIS620ThaiModel = {\r
+ 'charToOrderMap': TIS620CharToOrderMap,\r
+ 'precedenceMatrix': ThaiLangModel,\r
+ 'mTypicalPositiveRatio': 0.926386,\r
+ 'keepEnglishLetter': False,\r
+ 'charsetName': "TIS-620"\r
+}\r
+\r
+# flake8: noqa\r
-######################## BEGIN LICENSE BLOCK ########################
-# The Original Code is Mozilla Universal charset detector code.
-#
-# The Initial Developer of the Original Code is
-# Netscape Communications Corporation.
-# Portions created by the Initial Developer are Copyright (C) 2001
-# the Initial Developer. All Rights Reserved.
-#
-# Contributor(s):
-# Mark Pilgrim - port to Python
-# Shy Shalom - original C code
-#
-# This library is free software; you can redistribute it and/or
-# modify it under the terms of the GNU Lesser General Public
-# License as published by the Free Software Foundation; either
-# version 2.1 of the License, or (at your option) any later version.
-#
-# This library is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-# Lesser General Public License for more details.
-#
-# You should have received a copy of the GNU Lesser General Public
-# License along with this library; if not, write to the Free Software
-# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
-# 02110-1301 USA
-######################### END LICENSE BLOCK #########################
-
-from .charsetprober import CharSetProber
-from .constants import eError, eNotMe
-from .compat import wrap_ord
-
-FREQ_CAT_NUM = 4
-
-UDF = 0 # undefined
-OTH = 1 # other
-ASC = 2 # ascii capital letter
-ASS = 3 # ascii small letter
-ACV = 4 # accent capital vowel
-ACO = 5 # accent capital other
-ASV = 6 # accent small vowel
-ASO = 7 # accent small other
-CLASS_NUM = 8 # total classes
-
-Latin1_CharToClass = (
- OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 00 - 07
- OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 08 - 0F
- OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 10 - 17
- OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 18 - 1F
- OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 20 - 27
- OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 28 - 2F
- OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 30 - 37
- OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 38 - 3F
- OTH, ASC, ASC, ASC, ASC, ASC, ASC, ASC, # 40 - 47
- ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC, # 48 - 4F
- ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC, # 50 - 57
- ASC, ASC, ASC, OTH, OTH, OTH, OTH, OTH, # 58 - 5F
- OTH, ASS, ASS, ASS, ASS, ASS, ASS, ASS, # 60 - 67
- ASS, ASS, ASS, ASS, ASS, ASS, ASS, ASS, # 68 - 6F
- ASS, ASS, ASS, ASS, ASS, ASS, ASS, ASS, # 70 - 77
- ASS, ASS, ASS, OTH, OTH, OTH, OTH, OTH, # 78 - 7F
- OTH, UDF, OTH, ASO, OTH, OTH, OTH, OTH, # 80 - 87
- OTH, OTH, ACO, OTH, ACO, UDF, ACO, UDF, # 88 - 8F
- UDF, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 90 - 97
- OTH, OTH, ASO, OTH, ASO, UDF, ASO, ACO, # 98 - 9F
- OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # A0 - A7
- OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # A8 - AF
- OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # B0 - B7
- OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # B8 - BF
- ACV, ACV, ACV, ACV, ACV, ACV, ACO, ACO, # C0 - C7
- ACV, ACV, ACV, ACV, ACV, ACV, ACV, ACV, # C8 - CF
- ACO, ACO, ACV, ACV, ACV, ACV, ACV, OTH, # D0 - D7
- ACV, ACV, ACV, ACV, ACV, ACO, ACO, ACO, # D8 - DF
- ASV, ASV, ASV, ASV, ASV, ASV, ASO, ASO, # E0 - E7
- ASV, ASV, ASV, ASV, ASV, ASV, ASV, ASV, # E8 - EF
- ASO, ASO, ASV, ASV, ASV, ASV, ASV, OTH, # F0 - F7
- ASV, ASV, ASV, ASV, ASV, ASO, ASO, ASO, # F8 - FF
-)
-
-# 0 : illegal
-# 1 : very unlikely
-# 2 : normal
-# 3 : very likely
-Latin1ClassModel = (
- # UDF OTH ASC ASS ACV ACO ASV ASO
- 0, 0, 0, 0, 0, 0, 0, 0, # UDF
- 0, 3, 3, 3, 3, 3, 3, 3, # OTH
- 0, 3, 3, 3, 3, 3, 3, 3, # ASC
- 0, 3, 3, 3, 1, 1, 3, 3, # ASS
- 0, 3, 3, 3, 1, 2, 1, 2, # ACV
- 0, 3, 3, 3, 3, 3, 3, 3, # ACO
- 0, 3, 1, 3, 1, 1, 1, 3, # ASV
- 0, 3, 1, 3, 1, 1, 3, 3, # ASO
-)
-
-
-class Latin1Prober(CharSetProber):
- def __init__(self):
- CharSetProber.__init__(self)
- self.reset()
-
- def reset(self):
- self._mLastCharClass = OTH
- self._mFreqCounter = [0] * FREQ_CAT_NUM
- CharSetProber.reset(self)
-
- def get_charset_name(self):
- return "windows-1252"
-
- def feed(self, aBuf):
- aBuf = self.filter_with_english_letters(aBuf)
- for c in aBuf:
- try:
- charClass = Latin1_CharToClass[wrap_ord(c)]
- except IndexError:
- return eError
- freq = Latin1ClassModel[(self._mLastCharClass * CLASS_NUM)
- + charClass]
- if freq == 0:
- self._mState = eNotMe
- break
- self._mFreqCounter[freq] += 1
- self._mLastCharClass = charClass
-
- return self.get_state()
-
- def get_confidence(self):
- if self.get_state() == eNotMe:
- return 0.01
-
- total = sum(self._mFreqCounter)
- if total < 0.01:
- confidence = 0.0
- else:
- confidence = ((self._mFreqCounter[3] / total)
- - (self._mFreqCounter[1] * 20.0 / total))
- if confidence < 0.0:
- confidence = 0.0
- # lower the confidence of latin1 so that other more accurate detector
- # can take priority.
- confidence = confidence * 0.5
- return confidence
+######################## BEGIN LICENSE BLOCK ########################\r
+# The Original Code is Mozilla Universal charset detector code.\r
+#\r
+# The Initial Developer of the Original Code is\r
+# Netscape Communications Corporation.\r
+# Portions created by the Initial Developer are Copyright (C) 2001\r
+# the Initial Developer. All Rights Reserved.\r
+#\r
+# Contributor(s):\r
+# Mark Pilgrim - port to Python\r
+# Shy Shalom - original C code\r
+#\r
+# This library is free software; you can redistribute it and/or\r
+# modify it under the terms of the GNU Lesser General Public\r
+# License as published by the Free Software Foundation; either\r
+# version 2.1 of the License, or (at your option) any later version.\r
+#\r
+# This library is distributed in the hope that it will be useful,\r
+# but WITHOUT ANY WARRANTY; without even the implied warranty of\r
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU\r
+# Lesser General Public License for more details.\r
+#\r
+# You should have received a copy of the GNU Lesser General Public\r
+# License along with this library; if not, write to the Free Software\r
+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA\r
+# 02110-1301 USA\r
+######################### END LICENSE BLOCK #########################\r
+\r
+from .charsetprober import CharSetProber\r
+from .constants import eNotMe\r
+from .compat import wrap_ord\r
+\r
+FREQ_CAT_NUM = 4\r
+\r
+UDF = 0 # undefined\r
+OTH = 1 # other\r
+ASC = 2 # ascii capital letter\r
+ASS = 3 # ascii small letter\r
+ACV = 4 # accent capital vowel\r
+ACO = 5 # accent capital other\r
+ASV = 6 # accent small vowel\r
+ASO = 7 # accent small other\r
+CLASS_NUM = 8 # total classes\r
+\r
+Latin1_CharToClass = (\r
+ OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 00 - 07\r
+ OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 08 - 0F\r
+ OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 10 - 17\r
+ OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 18 - 1F\r
+ OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 20 - 27\r
+ OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 28 - 2F\r
+ OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 30 - 37\r
+ OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 38 - 3F\r
+ OTH, ASC, ASC, ASC, ASC, ASC, ASC, ASC, # 40 - 47\r
+ ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC, # 48 - 4F\r
+ ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC, # 50 - 57\r
+ ASC, ASC, ASC, OTH, OTH, OTH, OTH, OTH, # 58 - 5F\r
+ OTH, ASS, ASS, ASS, ASS, ASS, ASS, ASS, # 60 - 67\r
+ ASS, ASS, ASS, ASS, ASS, ASS, ASS, ASS, # 68 - 6F\r
+ ASS, ASS, ASS, ASS, ASS, ASS, ASS, ASS, # 70 - 77\r
+ ASS, ASS, ASS, OTH, OTH, OTH, OTH, OTH, # 78 - 7F\r
+ OTH, UDF, OTH, ASO, OTH, OTH, OTH, OTH, # 80 - 87\r
+ OTH, OTH, ACO, OTH, ACO, UDF, ACO, UDF, # 88 - 8F\r
+ UDF, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 90 - 97\r
+ OTH, OTH, ASO, OTH, ASO, UDF, ASO, ACO, # 98 - 9F\r
+ OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # A0 - A7\r
+ OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # A8 - AF\r
+ OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # B0 - B7\r
+ OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # B8 - BF\r
+ ACV, ACV, ACV, ACV, ACV, ACV, ACO, ACO, # C0 - C7\r
+ ACV, ACV, ACV, ACV, ACV, ACV, ACV, ACV, # C8 - CF\r
+ ACO, ACO, ACV, ACV, ACV, ACV, ACV, OTH, # D0 - D7\r
+ ACV, ACV, ACV, ACV, ACV, ACO, ACO, ACO, # D8 - DF\r
+ ASV, ASV, ASV, ASV, ASV, ASV, ASO, ASO, # E0 - E7\r
+ ASV, ASV, ASV, ASV, ASV, ASV, ASV, ASV, # E8 - EF\r
+ ASO, ASO, ASV, ASV, ASV, ASV, ASV, OTH, # F0 - F7\r
+ ASV, ASV, ASV, ASV, ASV, ASO, ASO, ASO, # F8 - FF\r
+)\r
+\r
+# 0 : illegal\r
+# 1 : very unlikely\r
+# 2 : normal\r
+# 3 : very likely\r
+Latin1ClassModel = (\r
+ # UDF OTH ASC ASS ACV ACO ASV ASO\r
+ 0, 0, 0, 0, 0, 0, 0, 0, # UDF\r
+ 0, 3, 3, 3, 3, 3, 3, 3, # OTH\r
+ 0, 3, 3, 3, 3, 3, 3, 3, # ASC\r
+ 0, 3, 3, 3, 1, 1, 3, 3, # ASS\r
+ 0, 3, 3, 3, 1, 2, 1, 2, # ACV\r
+ 0, 3, 3, 3, 3, 3, 3, 3, # ACO\r
+ 0, 3, 1, 3, 1, 1, 1, 3, # ASV\r
+ 0, 3, 1, 3, 1, 1, 3, 3, # ASO\r
+)\r
+\r
+\r
+class Latin1Prober(CharSetProber):\r
+ def __init__(self):\r
+ CharSetProber.__init__(self)\r
+ self.reset()\r
+\r
+ def reset(self):\r
+ self._mLastCharClass = OTH\r
+ self._mFreqCounter = [0] * FREQ_CAT_NUM\r
+ CharSetProber.reset(self)\r
+\r
+ def get_charset_name(self):\r
+ return "windows-1252"\r
+\r
+ def feed(self, aBuf):\r
+ aBuf = self.filter_with_english_letters(aBuf)\r
+ for c in aBuf:\r
+ charClass = Latin1_CharToClass[wrap_ord(c)]\r
+ freq = Latin1ClassModel[(self._mLastCharClass * CLASS_NUM)\r
+ + charClass]\r
+ if freq == 0:\r
+ self._mState = eNotMe\r
+ break\r
+ self._mFreqCounter[freq] += 1\r
+ self._mLastCharClass = charClass\r
+\r
+ return self.get_state()\r
+\r
+ def get_confidence(self):\r
+ if self.get_state() == eNotMe:\r
+ return 0.01\r
+\r
+ total = sum(self._mFreqCounter)\r
+ if total < 0.01:\r
+ confidence = 0.0\r
+ else:\r
+ confidence = ((self._mFreqCounter[3] / total)\r
+ - (self._mFreqCounter[1] * 20.0 / total))\r
+ if confidence < 0.0:\r
+ confidence = 0.0\r
+ # lower the confidence of latin1 so that other more accurate\r
+ # detector can take priority.\r
+ confidence = confidence * 0.5\r
+ return confidence\r
-######################## BEGIN LICENSE BLOCK ########################
-# The Original Code is Mozilla Universal charset detector code.
-#
-# The Initial Developer of the Original Code is
-# Netscape Communications Corporation.
-# Portions created by the Initial Developer are Copyright (C) 2001
-# the Initial Developer. All Rights Reserved.
-#
-# Contributor(s):
-# Mark Pilgrim - port to Python
-# Shy Shalom - original C code
-# Proofpoint, Inc.
-#
-# This library is free software; you can redistribute it and/or
-# modify it under the terms of the GNU Lesser General Public
-# License as published by the Free Software Foundation; either
-# version 2.1 of the License, or (at your option) any later version.
-#
-# This library is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-# Lesser General Public License for more details.
-#
-# You should have received a copy of the GNU Lesser General Public
-# License along with this library; if not, write to the Free Software
-# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
-# 02110-1301 USA
-######################### END LICENSE BLOCK #########################
-
-import sys
-from . import constants
-from .charsetprober import CharSetProber
-
-
-class MultiByteCharSetProber(CharSetProber):
- def __init__(self):
- CharSetProber.__init__(self)
- self._mDistributionAnalyzer = None
- self._mCodingSM = None
- self._mLastChar = ['\x00', '\x00']
-
- def reset(self):
- CharSetProber.reset(self)
- if self._mCodingSM:
- self._mCodingSM.reset()
- if self._mDistributionAnalyzer:
- self._mDistributionAnalyzer.reset()
- self._mLastChar = ['\x00', '\x00']
-
- def get_charset_name(self):
- pass
-
- def feed(self, aBuf):
- aLen = len(aBuf)
- for i in range(0, aLen):
- codingState = self._mCodingSM.next_state(aBuf[i])
- if codingState == constants.eError:
- if constants._debug:
- sys.stderr.write(self.get_charset_name() +
- ' prober hit error at byte ' + str(i) +
- '\n')
- self._mState = constants.eNotMe
- break
- elif codingState == constants.eItsMe:
- self._mState = constants.eFoundIt
- break
- elif codingState == constants.eStart:
- charLen = self._mCodingSM.get_current_charlen()
- if i == 0:
- self._mLastChar[1] = aBuf[0]
- self._mDistributionAnalyzer.feed(self._mLastChar, charLen)
- else:
- self._mDistributionAnalyzer.feed(aBuf[i - 1:i + 1],
- charLen)
-
- self._mLastChar[0] = aBuf[aLen - 1]
-
- if self.get_state() == constants.eDetecting:
- if (self._mDistributionAnalyzer.got_enough_data() and
- (self.get_confidence() > constants.SHORTCUT_THRESHOLD)):
- self._mState = constants.eFoundIt
-
- return self.get_state()
-
- def get_confidence(self):
- return self._mDistributionAnalyzer.get_confidence()
+######################## BEGIN LICENSE BLOCK ########################\r
+# The Original Code is Mozilla Universal charset detector code.\r
+#\r
+# The Initial Developer of the Original Code is\r
+# Netscape Communications Corporation.\r
+# Portions created by the Initial Developer are Copyright (C) 2001\r
+# the Initial Developer. All Rights Reserved.\r
+#\r
+# Contributor(s):\r
+# Mark Pilgrim - port to Python\r
+# Shy Shalom - original C code\r
+# Proofpoint, Inc.\r
+#\r
+# This library is free software; you can redistribute it and/or\r
+# modify it under the terms of the GNU Lesser General Public\r
+# License as published by the Free Software Foundation; either\r
+# version 2.1 of the License, or (at your option) any later version.\r
+#\r
+# This library is distributed in the hope that it will be useful,\r
+# but WITHOUT ANY WARRANTY; without even the implied warranty of\r
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU\r
+# Lesser General Public License for more details.\r
+#\r
+# You should have received a copy of the GNU Lesser General Public\r
+# License along with this library; if not, write to the Free Software\r
+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA\r
+# 02110-1301 USA\r
+######################### END LICENSE BLOCK #########################\r
+\r
+import sys\r
+from . import constants\r
+from .charsetprober import CharSetProber\r
+\r
+\r
+class MultiByteCharSetProber(CharSetProber):\r
+ def __init__(self):\r
+ CharSetProber.__init__(self)\r
+ self._mDistributionAnalyzer = None\r
+ self._mCodingSM = None\r
+ self._mLastChar = [0, 0]\r
+\r
+ def reset(self):\r
+ CharSetProber.reset(self)\r
+ if self._mCodingSM:\r
+ self._mCodingSM.reset()\r
+ if self._mDistributionAnalyzer:\r
+ self._mDistributionAnalyzer.reset()\r
+ self._mLastChar = [0, 0]\r
+\r
+ def get_charset_name(self):\r
+ pass\r
+\r
+ def feed(self, aBuf):\r
+ aLen = len(aBuf)\r
+ for i in range(0, aLen):\r
+ codingState = self._mCodingSM.next_state(aBuf[i])\r
+ if codingState == constants.eError:\r
+ if constants._debug:\r
+ sys.stderr.write(self.get_charset_name()\r
+ + ' prober hit error at byte ' + str(i)\r
+ + '\n')\r
+ self._mState = constants.eNotMe\r
+ break\r
+ elif codingState == constants.eItsMe:\r
+ self._mState = constants.eFoundIt\r
+ break\r
+ elif codingState == constants.eStart:\r
+ charLen = self._mCodingSM.get_current_charlen()\r
+ if i == 0:\r
+ self._mLastChar[1] = aBuf[0]\r
+ self._mDistributionAnalyzer.feed(self._mLastChar, charLen)\r
+ else:\r
+ self._mDistributionAnalyzer.feed(aBuf[i - 1:i + 1],\r
+ charLen)\r
+\r
+ self._mLastChar[0] = aBuf[aLen - 1]\r
+\r
+ if self.get_state() == constants.eDetecting:\r
+ if (self._mDistributionAnalyzer.got_enough_data() and\r
+ (self.get_confidence() > constants.SHORTCUT_THRESHOLD)):\r
+ self._mState = constants.eFoundIt\r
+\r
+ return self.get_state()\r
+\r
+ def get_confidence(self):\r
+ return self._mDistributionAnalyzer.get_confidence()\r
-######################## BEGIN LICENSE BLOCK ########################
-# The Original Code is Mozilla Universal charset detector code.
-#
-# The Initial Developer of the Original Code is
-# Netscape Communications Corporation.
-# Portions created by the Initial Developer are Copyright (C) 2001
-# the Initial Developer. All Rights Reserved.
-#
-# Contributor(s):
-# Mark Pilgrim - port to Python
-# Shy Shalom - original C code
-# Proofpoint, Inc.
-#
-# This library is free software; you can redistribute it and/or
-# modify it under the terms of the GNU Lesser General Public
-# License as published by the Free Software Foundation; either
-# version 2.1 of the License, or (at your option) any later version.
-#
-# This library is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-# Lesser General Public License for more details.
-#
-# You should have received a copy of the GNU Lesser General Public
-# License along with this library; if not, write to the Free Software
-# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
-# 02110-1301 USA
-######################### END LICENSE BLOCK #########################
-
-from .charsetgroupprober import CharSetGroupProber
-from .utf8prober import UTF8Prober
-from .sjisprober import SJISProber
-from .eucjpprober import EUCJPProber
-from .gb2312prober import GB2312Prober
-from .euckrprober import EUCKRProber
-from .big5prober import Big5Prober
-from .euctwprober import EUCTWProber
-
-
-class MBCSGroupProber(CharSetGroupProber):
- def __init__(self):
- CharSetGroupProber.__init__(self)
- self._mProbers = [
- UTF8Prober(),
- SJISProber(),
- EUCJPProber(),
- GB2312Prober(),
- EUCKRProber(),
- Big5Prober(),
- EUCTWProber()
- ]
- self.reset()
+######################## BEGIN LICENSE BLOCK ########################\r
+# The Original Code is Mozilla Universal charset detector code.\r
+#\r
+# The Initial Developer of the Original Code is\r
+# Netscape Communications Corporation.\r
+# Portions created by the Initial Developer are Copyright (C) 2001\r
+# the Initial Developer. All Rights Reserved.\r
+#\r
+# Contributor(s):\r
+# Mark Pilgrim - port to Python\r
+# Shy Shalom - original C code\r
+# Proofpoint, Inc.\r
+#\r
+# This library is free software; you can redistribute it and/or\r
+# modify it under the terms of the GNU Lesser General Public\r
+# License as published by the Free Software Foundation; either\r
+# version 2.1 of the License, or (at your option) any later version.\r
+#\r
+# This library is distributed in the hope that it will be useful,\r
+# but WITHOUT ANY WARRANTY; without even the implied warranty of\r
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU\r
+# Lesser General Public License for more details.\r
+#\r
+# You should have received a copy of the GNU Lesser General Public\r
+# License along with this library; if not, write to the Free Software\r
+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA\r
+# 02110-1301 USA\r
+######################### END LICENSE BLOCK #########################\r
+\r
+from .charsetgroupprober import CharSetGroupProber\r
+from .utf8prober import UTF8Prober\r
+from .sjisprober import SJISProber\r
+from .eucjpprober import EUCJPProber\r
+from .gb2312prober import GB2312Prober\r
+from .euckrprober import EUCKRProber\r
+from .big5prober import Big5Prober\r
+from .euctwprober import EUCTWProber\r
+\r
+\r
+class MBCSGroupProber(CharSetGroupProber):\r
+ def __init__(self):\r
+ CharSetGroupProber.__init__(self)\r
+ self._mProbers = [\r
+ UTF8Prober(),\r
+ SJISProber(),\r
+ EUCJPProber(),\r
+ GB2312Prober(),\r
+ EUCKRProber(),\r
+ Big5Prober(),\r
+ EUCTWProber()\r
+ ]\r
+ self.reset()\r
-######################## BEGIN LICENSE BLOCK ########################
-# The Original Code is mozilla.org code.
-#
-# The Initial Developer of the Original Code is
-# Netscape Communications Corporation.
-# Portions created by the Initial Developer are Copyright (C) 1998
-# the Initial Developer. All Rights Reserved.
-#
-# Contributor(s):
-# Mark Pilgrim - port to Python
-#
-# This library is free software; you can redistribute it and/or
-# modify it under the terms of the GNU Lesser General Public
-# License as published by the Free Software Foundation; either
-# version 2.1 of the License, or (at your option) any later version.
-#
-# This library is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-# Lesser General Public License for more details.
-#
-# You should have received a copy of the GNU Lesser General Public
-# License along with this library; if not, write to the Free Software
-# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
-# 02110-1301 USA
-######################### END LICENSE BLOCK #########################
-
-from .constants import eStart, eError, eItsMe
-
-# BIG5
-
-BIG5_cls = (
- 1, 1, 1, 1, 1, 1, 1, 1, # 00 - 07 #allow 0x00 as legal value
- 1, 1, 1, 1, 1, 1, 0, 0, # 08 - 0f
- 1, 1, 1, 1, 1, 1, 1, 1, # 10 - 17
- 1, 1, 1, 0, 1, 1, 1, 1, # 18 - 1f
- 1, 1, 1, 1, 1, 1, 1, 1, # 20 - 27
- 1, 1, 1, 1, 1, 1, 1, 1, # 28 - 2f
- 1, 1, 1, 1, 1, 1, 1, 1, # 30 - 37
- 1, 1, 1, 1, 1, 1, 1, 1, # 38 - 3f
- 2, 2, 2, 2, 2, 2, 2, 2, # 40 - 47
- 2, 2, 2, 2, 2, 2, 2, 2, # 48 - 4f
- 2, 2, 2, 2, 2, 2, 2, 2, # 50 - 57
- 2, 2, 2, 2, 2, 2, 2, 2, # 58 - 5f
- 2, 2, 2, 2, 2, 2, 2, 2, # 60 - 67
- 2, 2, 2, 2, 2, 2, 2, 2, # 68 - 6f
- 2, 2, 2, 2, 2, 2, 2, 2, # 70 - 77
- 2, 2, 2, 2, 2, 2, 2, 1, # 78 - 7f
- 4, 4, 4, 4, 4, 4, 4, 4, # 80 - 87
- 4, 4, 4, 4, 4, 4, 4, 4, # 88 - 8f
- 4, 4, 4, 4, 4, 4, 4, 4, # 90 - 97
- 4, 4, 4, 4, 4, 4, 4, 4, # 98 - 9f
- 4, 3, 3, 3, 3, 3, 3, 3, # a0 - a7
- 3, 3, 3, 3, 3, 3, 3, 3, # a8 - af
- 3, 3, 3, 3, 3, 3, 3, 3, # b0 - b7
- 3, 3, 3, 3, 3, 3, 3, 3, # b8 - bf
- 3, 3, 3, 3, 3, 3, 3, 3, # c0 - c7
- 3, 3, 3, 3, 3, 3, 3, 3, # c8 - cf
- 3, 3, 3, 3, 3, 3, 3, 3, # d0 - d7
- 3, 3, 3, 3, 3, 3, 3, 3, # d8 - df
- 3, 3, 3, 3, 3, 3, 3, 3, # e0 - e7
- 3, 3, 3, 3, 3, 3, 3, 3, # e8 - ef
- 3, 3, 3, 3, 3, 3, 3, 3, # f0 - f7
- 3, 3, 3, 3, 3, 3, 3, 0) # f8 - ff
-
-BIG5_st = (
- eError, eStart, eStart, 3, eError, eError, eError, eError, # 00-07
- eError, eError, eItsMe, eItsMe, eItsMe, eItsMe, eItsMe, eError, # 08-0f
- eError, eStart, eStart, eStart, eStart, eStart, eStart, eStart, # 10-17
-)
-
-Big5CharLenTable = (0, 1, 1, 2, 0)
-
-Big5SMModel = {'classTable': BIG5_cls,
- 'classFactor': 5,
- 'stateTable': BIG5_st,
- 'charLenTable': Big5CharLenTable,
- 'name': 'Big5'}
-
-# EUC-JP
-
-EUCJP_cls = (
- 4, 4, 4, 4, 4, 4, 4, 4, # 00 - 07
- 4, 4, 4, 4, 4, 4, 5, 5, # 08 - 0f
- 4, 4, 4, 4, 4, 4, 4, 4, # 10 - 17
- 4, 4, 4, 5, 4, 4, 4, 4, # 18 - 1f
- 4, 4, 4, 4, 4, 4, 4, 4, # 20 - 27
- 4, 4, 4, 4, 4, 4, 4, 4, # 28 - 2f
- 4, 4, 4, 4, 4, 4, 4, 4, # 30 - 37
- 4, 4, 4, 4, 4, 4, 4, 4, # 38 - 3f
- 4, 4, 4, 4, 4, 4, 4, 4, # 40 - 47
- 4, 4, 4, 4, 4, 4, 4, 4, # 48 - 4f
- 4, 4, 4, 4, 4, 4, 4, 4, # 50 - 57
- 4, 4, 4, 4, 4, 4, 4, 4, # 58 - 5f
- 4, 4, 4, 4, 4, 4, 4, 4, # 60 - 67
- 4, 4, 4, 4, 4, 4, 4, 4, # 68 - 6f
- 4, 4, 4, 4, 4, 4, 4, 4, # 70 - 77
- 4, 4, 4, 4, 4, 4, 4, 4, # 78 - 7f
- 5, 5, 5, 5, 5, 5, 5, 5, # 80 - 87
- 5, 5, 5, 5, 5, 5, 1, 3, # 88 - 8f
- 5, 5, 5, 5, 5, 5, 5, 5, # 90 - 97
- 5, 5, 5, 5, 5, 5, 5, 5, # 98 - 9f
- 5, 2, 2, 2, 2, 2, 2, 2, # a0 - a7
- 2, 2, 2, 2, 2, 2, 2, 2, # a8 - af
- 2, 2, 2, 2, 2, 2, 2, 2, # b0 - b7
- 2, 2, 2, 2, 2, 2, 2, 2, # b8 - bf
- 2, 2, 2, 2, 2, 2, 2, 2, # c0 - c7
- 2, 2, 2, 2, 2, 2, 2, 2, # c8 - cf
- 2, 2, 2, 2, 2, 2, 2, 2, # d0 - d7
- 2, 2, 2, 2, 2, 2, 2, 2, # d8 - df
- 0, 0, 0, 0, 0, 0, 0, 0, # e0 - e7
- 0, 0, 0, 0, 0, 0, 0, 0, # e8 - ef
- 0, 0, 0, 0, 0, 0, 0, 0, # f0 - f7
- 0, 0, 0, 0, 0, 0, 0, 5) # f8 - ff
-
-EUCJP_st = (
- 3, 4, 3, 5, eStart, eError, eError, eError, # 00-07
- eError, eError, eError, eError, eItsMe, eItsMe, eItsMe, eItsMe, # 08-0f
- eItsMe, eItsMe, eStart, eError, eStart, eError, eError, eError, # 10-17
- eError, eError, eStart, eError, eError, eError, 3, eError, # 18-1f
- 3, eError, eError, eError, eStart, eStart, eStart, eStart, # 20-27
-)
-
-EUCJPCharLenTable = (2, 2, 2, 3, 1, 0)
-
-EUCJPSMModel = {'classTable': EUCJP_cls,
- 'classFactor': 6,
- 'stateTable': EUCJP_st,
- 'charLenTable': EUCJPCharLenTable,
- 'name': 'EUC-JP'}
-
-# EUC-KR
-
-EUCKR_cls = (
- 1, 1, 1, 1, 1, 1, 1, 1, # 00 - 07
- 1, 1, 1, 1, 1, 1, 0, 0, # 08 - 0f
- 1, 1, 1, 1, 1, 1, 1, 1, # 10 - 17
- 1, 1, 1, 0, 1, 1, 1, 1, # 18 - 1f
- 1, 1, 1, 1, 1, 1, 1, 1, # 20 - 27
- 1, 1, 1, 1, 1, 1, 1, 1, # 28 - 2f
- 1, 1, 1, 1, 1, 1, 1, 1, # 30 - 37
- 1, 1, 1, 1, 1, 1, 1, 1, # 38 - 3f
- 1, 1, 1, 1, 1, 1, 1, 1, # 40 - 47
- 1, 1, 1, 1, 1, 1, 1, 1, # 48 - 4f
- 1, 1, 1, 1, 1, 1, 1, 1, # 50 - 57
- 1, 1, 1, 1, 1, 1, 1, 1, # 58 - 5f
- 1, 1, 1, 1, 1, 1, 1, 1, # 60 - 67
- 1, 1, 1, 1, 1, 1, 1, 1, # 68 - 6f
- 1, 1, 1, 1, 1, 1, 1, 1, # 70 - 77
- 1, 1, 1, 1, 1, 1, 1, 1, # 78 - 7f
- 0, 0, 0, 0, 0, 0, 0, 0, # 80 - 87
- 0, 0, 0, 0, 0, 0, 0, 0, # 88 - 8f
- 0, 0, 0, 0, 0, 0, 0, 0, # 90 - 97
- 0, 0, 0, 0, 0, 0, 0, 0, # 98 - 9f
- 0, 2, 2, 2, 2, 2, 2, 2, # a0 - a7
- 2, 2, 2, 2, 2, 3, 3, 3, # a8 - af
- 2, 2, 2, 2, 2, 2, 2, 2, # b0 - b7
- 2, 2, 2, 2, 2, 2, 2, 2, # b8 - bf
- 2, 2, 2, 2, 2, 2, 2, 2, # c0 - c7
- 2, 3, 2, 2, 2, 2, 2, 2, # c8 - cf
- 2, 2, 2, 2, 2, 2, 2, 2, # d0 - d7
- 2, 2, 2, 2, 2, 2, 2, 2, # d8 - df
- 2, 2, 2, 2, 2, 2, 2, 2, # e0 - e7
- 2, 2, 2, 2, 2, 2, 2, 2, # e8 - ef
- 2, 2, 2, 2, 2, 2, 2, 2, # f0 - f7
- 2, 2, 2, 2, 2, 2, 2, 0, # f8 - ff
-)
-
-EUCKR_st = (
- eError, eStart, 3, eError, eError, eError, eError, eError, # 00-07
- eItsMe, eItsMe, eItsMe, eItsMe, eError, eError, eStart, eStart, # 08-0f
-)
-
-EUCKRCharLenTable = (0, 1, 2, 0)
-
-EUCKRSMModel = {'classTable': EUCKR_cls,
- 'classFactor': 4,
- 'stateTable': EUCKR_st,
- 'charLenTable': EUCKRCharLenTable,
- 'name': 'EUC-KR'}
-
-# EUC-TW
-
-EUCTW_cls = (
- 2, 2, 2, 2, 2, 2, 2, 2, # 00 - 07
- 2, 2, 2, 2, 2, 2, 0, 0, # 08 - 0f
- 2, 2, 2, 2, 2, 2, 2, 2, # 10 - 17
- 2, 2, 2, 0, 2, 2, 2, 2, # 18 - 1f
- 2, 2, 2, 2, 2, 2, 2, 2, # 20 - 27
- 2, 2, 2, 2, 2, 2, 2, 2, # 28 - 2f
- 2, 2, 2, 2, 2, 2, 2, 2, # 30 - 37
- 2, 2, 2, 2, 2, 2, 2, 2, # 38 - 3f
- 2, 2, 2, 2, 2, 2, 2, 2, # 40 - 47
- 2, 2, 2, 2, 2, 2, 2, 2, # 48 - 4f
- 2, 2, 2, 2, 2, 2, 2, 2, # 50 - 57
- 2, 2, 2, 2, 2, 2, 2, 2, # 58 - 5f
- 2, 2, 2, 2, 2, 2, 2, 2, # 60 - 67
- 2, 2, 2, 2, 2, 2, 2, 2, # 68 - 6f
- 2, 2, 2, 2, 2, 2, 2, 2, # 70 - 77
- 2, 2, 2, 2, 2, 2, 2, 2, # 78 - 7f
- 0, 0, 0, 0, 0, 0, 0, 0, # 80 - 87
- 0, 0, 0, 0, 0, 0, 6, 0, # 88 - 8f
- 0, 0, 0, 0, 0, 0, 0, 0, # 90 - 97
- 0, 0, 0, 0, 0, 0, 0, 0, # 98 - 9f
- 0, 3, 4, 4, 4, 4, 4, 4, # a0 - a7
- 5, 5, 1, 1, 1, 1, 1, 1, # a8 - af
- 1, 1, 1, 1, 1, 1, 1, 1, # b0 - b7
- 1, 1, 1, 1, 1, 1, 1, 1, # b8 - bf
- 1, 1, 3, 1, 3, 3, 3, 3, # c0 - c7
- 3, 3, 3, 3, 3, 3, 3, 3, # c8 - cf
- 3, 3, 3, 3, 3, 3, 3, 3, # d0 - d7
- 3, 3, 3, 3, 3, 3, 3, 3, # d8 - df
- 3, 3, 3, 3, 3, 3, 3, 3, # e0 - e7
- 3, 3, 3, 3, 3, 3, 3, 3, # e8 - ef
- 3, 3, 3, 3, 3, 3, 3, 3, # f0 - f7
- 3, 3, 3, 3, 3, 3, 3, 0, # f8 - ff
-)
-
-EUCTW_st = (
- eError, eError, eStart, 3, 3, 3, 4, eError, # 00-07
- eError, eError, eError, eError, eError, eError, eItsMe, eItsMe, # 08-0f
- eItsMe, eItsMe, eItsMe, eItsMe, eItsMe, eError, eStart, eError, # 10-17
- eStart, eStart, eStart, eError, eError, eError, eError, eError, # 18-1f
- 5, eError, eError, eError, eStart, eError, eStart, eStart, # 20-27
- eStart, eError, eStart, eStart, eStart, eStart, eStart, eStart, # 28-2f
-)
-
-EUCTWCharLenTable = (0, 0, 1, 2, 2, 2, 3)
-
-EUCTWSMModel = {'classTable': EUCTW_cls,
- 'classFactor': 7,
- 'stateTable': EUCTW_st,
- 'charLenTable': EUCTWCharLenTable,
- 'name': 'x-euc-tw'}
-
-# GB2312
-
-GB2312_cls = (
- 1, 1, 1, 1, 1, 1, 1, 1, # 00 - 07
- 1, 1, 1, 1, 1, 1, 0, 0, # 08 - 0f
- 1, 1, 1, 1, 1, 1, 1, 1, # 10 - 17
- 1, 1, 1, 0, 1, 1, 1, 1, # 18 - 1f
- 1, 1, 1, 1, 1, 1, 1, 1, # 20 - 27
- 1, 1, 1, 1, 1, 1, 1, 1, # 28 - 2f
- 3, 3, 3, 3, 3, 3, 3, 3, # 30 - 37
- 3, 3, 1, 1, 1, 1, 1, 1, # 38 - 3f
- 2, 2, 2, 2, 2, 2, 2, 2, # 40 - 47
- 2, 2, 2, 2, 2, 2, 2, 2, # 48 - 4f
- 2, 2, 2, 2, 2, 2, 2, 2, # 50 - 57
- 2, 2, 2, 2, 2, 2, 2, 2, # 58 - 5f
- 2, 2, 2, 2, 2, 2, 2, 2, # 60 - 67
- 2, 2, 2, 2, 2, 2, 2, 2, # 68 - 6f
- 2, 2, 2, 2, 2, 2, 2, 2, # 70 - 77
- 2, 2, 2, 2, 2, 2, 2, 4, # 78 - 7f
- 5, 6, 6, 6, 6, 6, 6, 6, # 80 - 87
- 6, 6, 6, 6, 6, 6, 6, 6, # 88 - 8f
- 6, 6, 6, 6, 6, 6, 6, 6, # 90 - 97
- 6, 6, 6, 6, 6, 6, 6, 6, # 98 - 9f
- 6, 6, 6, 6, 6, 6, 6, 6, # a0 - a7
- 6, 6, 6, 6, 6, 6, 6, 6, # a8 - af
- 6, 6, 6, 6, 6, 6, 6, 6, # b0 - b7
- 6, 6, 6, 6, 6, 6, 6, 6, # b8 - bf
- 6, 6, 6, 6, 6, 6, 6, 6, # c0 - c7
- 6, 6, 6, 6, 6, 6, 6, 6, # c8 - cf
- 6, 6, 6, 6, 6, 6, 6, 6, # d0 - d7
- 6, 6, 6, 6, 6, 6, 6, 6, # d8 - df
- 6, 6, 6, 6, 6, 6, 6, 6, # e0 - e7
- 6, 6, 6, 6, 6, 6, 6, 6, # e8 - ef
- 6, 6, 6, 6, 6, 6, 6, 6, # f0 - f7
- 6, 6, 6, 6, 6, 6, 6, 0, # f8 - ff
-)
-
-GB2312_st = (
- eError, eStart, eStart, eStart, eStart, eStart, 3, eError, # 00-07
- eError, eError, eError, eError, eError, eError, eItsMe, eItsMe, # 08-0f
- eItsMe, eItsMe, eItsMe, eItsMe, eItsMe, eError, eError, eStart, # 10-17
- 4, eError, eStart, eStart, eError, eError, eError, eError, # 18-1f
- eError, eError, 5, eError, eError, eError, eItsMe, eError, # 20-27
- eError, eError, eStart, eStart, eStart, eStart, eStart, eStart, # 28-2f
-)
-
-# To be accurate, the length of class 6 can be either 2 or 4.
-# But it is not necessary to discriminate between the two since
-# it is used for frequency analysis only, and we are validing
-# each code range there as well. So it is safe to set it to be
-# 2 here.
-GB2312CharLenTable = (0, 1, 1, 1, 1, 1, 2)
-
-GB2312SMModel = {'classTable': GB2312_cls,
- 'classFactor': 7,
- 'stateTable': GB2312_st,
- 'charLenTable': GB2312CharLenTable,
- 'name': 'GB2312'}
-
-# Shift_JIS
-
-SJIS_cls = (
- 1, 1, 1, 1, 1, 1, 1, 1, # 00 - 07
- 1, 1, 1, 1, 1, 1, 0, 0, # 08 - 0f
- 1, 1, 1, 1, 1, 1, 1, 1, # 10 - 17
- 1, 1, 1, 0, 1, 1, 1, 1, # 18 - 1f
- 1, 1, 1, 1, 1, 1, 1, 1, # 20 - 27
- 1, 1, 1, 1, 1, 1, 1, 1, # 28 - 2f
- 1, 1, 1, 1, 1, 1, 1, 1, # 30 - 37
- 1, 1, 1, 1, 1, 1, 1, 1, # 38 - 3f
- 2, 2, 2, 2, 2, 2, 2, 2, # 40 - 47
- 2, 2, 2, 2, 2, 2, 2, 2, # 48 - 4f
- 2, 2, 2, 2, 2, 2, 2, 2, # 50 - 57
- 2, 2, 2, 2, 2, 2, 2, 2, # 58 - 5f
- 2, 2, 2, 2, 2, 2, 2, 2, # 60 - 67
- 2, 2, 2, 2, 2, 2, 2, 2, # 68 - 6f
- 2, 2, 2, 2, 2, 2, 2, 2, # 70 - 77
- 2, 2, 2, 2, 2, 2, 2, 1, # 78 - 7f
- 3, 3, 3, 3, 3, 3, 3, 3, # 80 - 87
- 3, 3, 3, 3, 3, 3, 3, 3, # 88 - 8f
- 3, 3, 3, 3, 3, 3, 3, 3, # 90 - 97
- 3, 3, 3, 3, 3, 3, 3, 3, # 98 - 9f
- #0xa0 is illegal in sjis encoding, but some pages does
- #contain such byte. We need to be more error forgiven.
- 2, 2, 2, 2, 2, 2, 2, 2, # a0 - a7
- 2, 2, 2, 2, 2, 2, 2, 2, # a8 - af
- 2, 2, 2, 2, 2, 2, 2, 2, # b0 - b7
- 2, 2, 2, 2, 2, 2, 2, 2, # b8 - bf
- 2, 2, 2, 2, 2, 2, 2, 2, # c0 - c7
- 2, 2, 2, 2, 2, 2, 2, 2, # c8 - cf
- 2, 2, 2, 2, 2, 2, 2, 2, # d0 - d7
- 2, 2, 2, 2, 2, 2, 2, 2, # d8 - df
- 3, 3, 3, 3, 3, 3, 3, 3, # e0 - e7
- 3, 3, 3, 3, 3, 4, 4, 4, # e8 - ef
- 4, 4, 4, 4, 4, 4, 4, 4, # f0 - f7
- 4, 4, 4, 4, 4, 0, 0, 0) # f8 - ff
-
-SJIS_st = (
- eError, eStart, eStart, 3, eError, eError, eError, eError, # 00-07
- eError, eError, eError, eError, eItsMe, eItsMe, eItsMe, eItsMe, # 08-0f
- eItsMe, eItsMe, eError, eError, eStart, eStart, eStart, eStart, # 10-17
-)
-
-SJISCharLenTable = (0, 1, 1, 2, 0, 0)
-
-SJISSMModel = {'classTable': SJIS_cls,
- 'classFactor': 6,
- 'stateTable': SJIS_st,
- 'charLenTable': SJISCharLenTable,
- 'name': 'Shift_JIS'}
-
-# UCS2-BE
-
-UCS2BE_cls = (
- 0, 0, 0, 0, 0, 0, 0, 0, # 00 - 07
- 0, 0, 1, 0, 0, 2, 0, 0, # 08 - 0f
- 0, 0, 0, 0, 0, 0, 0, 0, # 10 - 17
- 0, 0, 0, 3, 0, 0, 0, 0, # 18 - 1f
- 0, 0, 0, 0, 0, 0, 0, 0, # 20 - 27
- 0, 3, 3, 3, 3, 3, 0, 0, # 28 - 2f
- 0, 0, 0, 0, 0, 0, 0, 0, # 30 - 37
- 0, 0, 0, 0, 0, 0, 0, 0, # 38 - 3f
- 0, 0, 0, 0, 0, 0, 0, 0, # 40 - 47
- 0, 0, 0, 0, 0, 0, 0, 0, # 48 - 4f
- 0, 0, 0, 0, 0, 0, 0, 0, # 50 - 57
- 0, 0, 0, 0, 0, 0, 0, 0, # 58 - 5f
- 0, 0, 0, 0, 0, 0, 0, 0, # 60 - 67
- 0, 0, 0, 0, 0, 0, 0, 0, # 68 - 6f
- 0, 0, 0, 0, 0, 0, 0, 0, # 70 - 77
- 0, 0, 0, 0, 0, 0, 0, 0, # 78 - 7f
- 0, 0, 0, 0, 0, 0, 0, 0, # 80 - 87
- 0, 0, 0, 0, 0, 0, 0, 0, # 88 - 8f
- 0, 0, 0, 0, 0, 0, 0, 0, # 90 - 97
- 0, 0, 0, 0, 0, 0, 0, 0, # 98 - 9f
- 0, 0, 0, 0, 0, 0, 0, 0, # a0 - a7
- 0, 0, 0, 0, 0, 0, 0, 0, # a8 - af
- 0, 0, 0, 0, 0, 0, 0, 0, # b0 - b7
- 0, 0, 0, 0, 0, 0, 0, 0, # b8 - bf
- 0, 0, 0, 0, 0, 0, 0, 0, # c0 - c7
- 0, 0, 0, 0, 0, 0, 0, 0, # c8 - cf
- 0, 0, 0, 0, 0, 0, 0, 0, # d0 - d7
- 0, 0, 0, 0, 0, 0, 0, 0, # d8 - df
- 0, 0, 0, 0, 0, 0, 0, 0, # e0 - e7
- 0, 0, 0, 0, 0, 0, 0, 0, # e8 - ef
- 0, 0, 0, 0, 0, 0, 0, 0, # f0 - f7
- 0, 0, 0, 0, 0, 0, 4, 5, # f8 - ff
-)
-
-UCS2BE_st = (
- 5, 7, 7, eError, 4, 3, eError, eError, # 00-07
- eError, eError, eError, eError, eItsMe, eItsMe, eItsMe, eItsMe, # 08-0f
- eItsMe, eItsMe, 6, 6, 6, 6, eError, eError, # 10-17
- 6, 6, 6, 6, 6, eItsMe, 6, 6, # 18-1f
- 6, 6, 6, 6, 5, 7, 7, eError, # 20-27
- 5, 8, 6, 6, eError, 6, 6, 6, # 28-2f
- 6, 6, 6, 6, eError, eError, eStart, eStart, # 30-37
-)
-
-UCS2BECharLenTable = (2, 2, 2, 0, 2, 2)
-
-UCS2BESMModel = {'classTable': UCS2BE_cls,
- 'classFactor': 6,
- 'stateTable': UCS2BE_st,
- 'charLenTable': UCS2BECharLenTable,
- 'name': 'UTF-16BE'}
-
-# UCS2-LE
-
-UCS2LE_cls = (
- 0, 0, 0, 0, 0, 0, 0, 0, # 00 - 07
- 0, 0, 1, 0, 0, 2, 0, 0, # 08 - 0f
- 0, 0, 0, 0, 0, 0, 0, 0, # 10 - 17
- 0, 0, 0, 3, 0, 0, 0, 0, # 18 - 1f
- 0, 0, 0, 0, 0, 0, 0, 0, # 20 - 27
- 0, 3, 3, 3, 3, 3, 0, 0, # 28 - 2f
- 0, 0, 0, 0, 0, 0, 0, 0, # 30 - 37
- 0, 0, 0, 0, 0, 0, 0, 0, # 38 - 3f
- 0, 0, 0, 0, 0, 0, 0, 0, # 40 - 47
- 0, 0, 0, 0, 0, 0, 0, 0, # 48 - 4f
- 0, 0, 0, 0, 0, 0, 0, 0, # 50 - 57
- 0, 0, 0, 0, 0, 0, 0, 0, # 58 - 5f
- 0, 0, 0, 0, 0, 0, 0, 0, # 60 - 67
- 0, 0, 0, 0, 0, 0, 0, 0, # 68 - 6f
- 0, 0, 0, 0, 0, 0, 0, 0, # 70 - 77
- 0, 0, 0, 0, 0, 0, 0, 0, # 78 - 7f
- 0, 0, 0, 0, 0, 0, 0, 0, # 80 - 87
- 0, 0, 0, 0, 0, 0, 0, 0, # 88 - 8f
- 0, 0, 0, 0, 0, 0, 0, 0, # 90 - 97
- 0, 0, 0, 0, 0, 0, 0, 0, # 98 - 9f
- 0, 0, 0, 0, 0, 0, 0, 0, # a0 - a7
- 0, 0, 0, 0, 0, 0, 0, 0, # a8 - af
- 0, 0, 0, 0, 0, 0, 0, 0, # b0 - b7
- 0, 0, 0, 0, 0, 0, 0, 0, # b8 - bf
- 0, 0, 0, 0, 0, 0, 0, 0, # c0 - c7
- 0, 0, 0, 0, 0, 0, 0, 0, # c8 - cf
- 0, 0, 0, 0, 0, 0, 0, 0, # d0 - d7
- 0, 0, 0, 0, 0, 0, 0, 0, # d8 - df
- 0, 0, 0, 0, 0, 0, 0, 0, # e0 - e7
- 0, 0, 0, 0, 0, 0, 0, 0, # e8 - ef
- 0, 0, 0, 0, 0, 0, 0, 0, # f0 - f7
- 0, 0, 0, 0, 0, 0, 4, 5, # f8 - ff
-)
-
-UCS2LE_st = (
- 6, 6, 7, 6, 4, 3, eError, eError, # 00-07
- eError, eError, eError, eError, eItsMe, eItsMe, eItsMe, eItsMe, # 08-0f
- eItsMe, eItsMe, 5, 5, 5, eError, eItsMe, eError, # 10-17
- 5, 5, 5, eError, 5, eError, 6, 6, # 18-1f
- 7, 6, 8, 8, 5, 5, 5, eError, # 20-27
- 5, 5, 5, eError, eError, eError, 5, 5, # 28-2f
- 5, 5, 5, eError, 5, eError, eStart, eStart, # 30-37
-)
-
-UCS2LECharLenTable = (2, 2, 2, 2, 2, 2)
-
-UCS2LESMModel = {'classTable': UCS2LE_cls,
- 'classFactor': 6,
- 'stateTable': UCS2LE_st,
- 'charLenTable': UCS2LECharLenTable,
- 'name': 'UTF-16LE'}
-
-# UTF-8
-
-UTF8_cls = (
- 1, 1, 1, 1, 1, 1, 1, 1, # 00 - 07 #allow 0x00 as a legal value
- 1, 1, 1, 1, 1, 1, 0, 0, # 08 - 0f
- 1, 1, 1, 1, 1, 1, 1, 1, # 10 - 17
- 1, 1, 1, 0, 1, 1, 1, 1, # 18 - 1f
- 1, 1, 1, 1, 1, 1, 1, 1, # 20 - 27
- 1, 1, 1, 1, 1, 1, 1, 1, # 28 - 2f
- 1, 1, 1, 1, 1, 1, 1, 1, # 30 - 37
- 1, 1, 1, 1, 1, 1, 1, 1, # 38 - 3f
- 1, 1, 1, 1, 1, 1, 1, 1, # 40 - 47
- 1, 1, 1, 1, 1, 1, 1, 1, # 48 - 4f
- 1, 1, 1, 1, 1, 1, 1, 1, # 50 - 57
- 1, 1, 1, 1, 1, 1, 1, 1, # 58 - 5f
- 1, 1, 1, 1, 1, 1, 1, 1, # 60 - 67
- 1, 1, 1, 1, 1, 1, 1, 1, # 68 - 6f
- 1, 1, 1, 1, 1, 1, 1, 1, # 70 - 77
- 1, 1, 1, 1, 1, 1, 1, 1, # 78 - 7f
- 2, 2, 2, 2, 3, 3, 3, 3, # 80 - 87
- 4, 4, 4, 4, 4, 4, 4, 4, # 88 - 8f
- 4, 4, 4, 4, 4, 4, 4, 4, # 90 - 97
- 4, 4, 4, 4, 4, 4, 4, 4, # 98 - 9f
- 5, 5, 5, 5, 5, 5, 5, 5, # a0 - a7
- 5, 5, 5, 5, 5, 5, 5, 5, # a8 - af
- 5, 5, 5, 5, 5, 5, 5, 5, # b0 - b7
- 5, 5, 5, 5, 5, 5, 5, 5, # b8 - bf
- 0, 0, 6, 6, 6, 6, 6, 6, # c0 - c7
- 6, 6, 6, 6, 6, 6, 6, 6, # c8 - cf
- 6, 6, 6, 6, 6, 6, 6, 6, # d0 - d7
- 6, 6, 6, 6, 6, 6, 6, 6, # d8 - df
- 7, 8, 8, 8, 8, 8, 8, 8, # e0 - e7
- 8, 8, 8, 8, 8, 9, 8, 8, # e8 - ef
- 10, 11, 11, 11, 11, 11, 11, 11, # f0 - f7
- 12, 13, 13, 13, 14, 15, 0, 0, # f8 - ff
-)
-
-UTF8_st = (
- eError, eStart, eError, eError, eError, eError, 12, 10, # 00-07
- 9, 11, 8, 7, 6, 5, 4, 3, # 08-0f
- eError, eError, eError, eError, eError, eError, eError, eError, # 10-17
- eError, eError, eError, eError, eError, eError, eError, eError, # 18-1f
- eItsMe, eItsMe, eItsMe, eItsMe, eItsMe, eItsMe, eItsMe, eItsMe, # 20-27
- eItsMe, eItsMe, eItsMe, eItsMe, eItsMe, eItsMe, eItsMe, eItsMe, # 28-2f
- eError, eError, 5, 5, 5, 5, eError, eError, # 30-37
- eError, eError, eError, eError, eError, eError, eError, eError, # 38-3f
- eError, eError, eError, 5, 5, 5, eError, eError, # 40-47
- eError, eError, eError, eError, eError, eError, eError, eError, # 48-4f
- eError, eError, 7, 7, 7, 7, eError, eError, # 50-57
- eError, eError, eError, eError, eError, eError, eError, eError, # 58-5f
- eError, eError, eError, eError, 7, 7, eError, eError, # 60-67
- eError, eError, eError, eError, eError, eError, eError, eError, # 68-6f
- eError, eError, 9, 9, 9, 9, eError, eError, # 70-77
- eError, eError, eError, eError, eError, eError, eError, eError, # 78-7f
- eError, eError, eError, eError, eError, 9, eError, eError, # 80-87
- eError, eError, eError, eError, eError, eError, eError, eError, # 88-8f
- eError, eError, 12, 12, 12, 12, eError, eError, # 90-97
- eError, eError, eError, eError, eError, eError, eError, eError, # 98-9f
- eError, eError, eError, eError, eError, 12, eError, eError, # a0-a7
- eError, eError, eError, eError, eError, eError, eError, eError, # a8-af
- eError, eError, 12, 12, 12, eError, eError, eError, # b0-b7
- eError, eError, eError, eError, eError, eError, eError, eError, # b8-bf
- eError, eError, eStart, eStart, eStart, eStart, eError, eError, # c0-c7
- eError, eError, eError, eError, eError, eError, eError, eError, # c8-cf
-)
-
-UTF8CharLenTable = (0, 1, 0, 0, 0, 0, 2, 3, 3, 3, 4, 4, 5, 5, 6, 6)
-
-UTF8SMModel = {'classTable': UTF8_cls,
- 'classFactor': 16,
- 'stateTable': UTF8_st,
- 'charLenTable': UTF8CharLenTable,
- 'name': 'UTF-8'}
+######################## BEGIN LICENSE BLOCK ########################\r
+# The Original Code is mozilla.org code.\r
+#\r
+# The Initial Developer of the Original Code is\r
+# Netscape Communications Corporation.\r
+# Portions created by the Initial Developer are Copyright (C) 1998\r
+# the Initial Developer. All Rights Reserved.\r
+#\r
+# Contributor(s):\r
+# Mark Pilgrim - port to Python\r
+#\r
+# This library is free software; you can redistribute it and/or\r
+# modify it under the terms of the GNU Lesser General Public\r
+# License as published by the Free Software Foundation; either\r
+# version 2.1 of the License, or (at your option) any later version.\r
+#\r
+# This library is distributed in the hope that it will be useful,\r
+# but WITHOUT ANY WARRANTY; without even the implied warranty of\r
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU\r
+# Lesser General Public License for more details.\r
+#\r
+# You should have received a copy of the GNU Lesser General Public\r
+# License along with this library; if not, write to the Free Software\r
+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA\r
+# 02110-1301 USA\r
+######################### END LICENSE BLOCK #########################\r
+\r
+from .constants import eStart, eError, eItsMe\r
+\r
+# BIG5\r
+\r
+BIG5_cls = (\r
+ 1,1,1,1,1,1,1,1, # 00 - 07 #allow 0x00 as legal value\r
+ 1,1,1,1,1,1,0,0, # 08 - 0f\r
+ 1,1,1,1,1,1,1,1, # 10 - 17\r
+ 1,1,1,0,1,1,1,1, # 18 - 1f\r
+ 1,1,1,1,1,1,1,1, # 20 - 27\r
+ 1,1,1,1,1,1,1,1, # 28 - 2f\r
+ 1,1,1,1,1,1,1,1, # 30 - 37\r
+ 1,1,1,1,1,1,1,1, # 38 - 3f\r
+ 2,2,2,2,2,2,2,2, # 40 - 47\r
+ 2,2,2,2,2,2,2,2, # 48 - 4f\r
+ 2,2,2,2,2,2,2,2, # 50 - 57\r
+ 2,2,2,2,2,2,2,2, # 58 - 5f\r
+ 2,2,2,2,2,2,2,2, # 60 - 67\r
+ 2,2,2,2,2,2,2,2, # 68 - 6f\r
+ 2,2,2,2,2,2,2,2, # 70 - 77\r
+ 2,2,2,2,2,2,2,1, # 78 - 7f\r
+ 4,4,4,4,4,4,4,4, # 80 - 87\r
+ 4,4,4,4,4,4,4,4, # 88 - 8f\r
+ 4,4,4,4,4,4,4,4, # 90 - 97\r
+ 4,4,4,4,4,4,4,4, # 98 - 9f\r
+ 4,3,3,3,3,3,3,3, # a0 - a7\r
+ 3,3,3,3,3,3,3,3, # a8 - af\r
+ 3,3,3,3,3,3,3,3, # b0 - b7\r
+ 3,3,3,3,3,3,3,3, # b8 - bf\r
+ 3,3,3,3,3,3,3,3, # c0 - c7\r
+ 3,3,3,3,3,3,3,3, # c8 - cf\r
+ 3,3,3,3,3,3,3,3, # d0 - d7\r
+ 3,3,3,3,3,3,3,3, # d8 - df\r
+ 3,3,3,3,3,3,3,3, # e0 - e7\r
+ 3,3,3,3,3,3,3,3, # e8 - ef\r
+ 3,3,3,3,3,3,3,3, # f0 - f7\r
+ 3,3,3,3,3,3,3,0 # f8 - ff\r
+)\r
+\r
+BIG5_st = (\r
+ eError,eStart,eStart, 3,eError,eError,eError,eError,#00-07\r
+ eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eError,#08-0f\r
+ eError,eStart,eStart,eStart,eStart,eStart,eStart,eStart#10-17\r
+)\r
+\r
+Big5CharLenTable = (0, 1, 1, 2, 0)\r
+\r
+Big5SMModel = {'classTable': BIG5_cls,\r
+ 'classFactor': 5,\r
+ 'stateTable': BIG5_st,\r
+ 'charLenTable': Big5CharLenTable,\r
+ 'name': 'Big5'}\r
+\r
+# EUC-JP\r
+\r
+EUCJP_cls = (\r
+ 4,4,4,4,4,4,4,4, # 00 - 07\r
+ 4,4,4,4,4,4,5,5, # 08 - 0f\r
+ 4,4,4,4,4,4,4,4, # 10 - 17\r
+ 4,4,4,5,4,4,4,4, # 18 - 1f\r
+ 4,4,4,4,4,4,4,4, # 20 - 27\r
+ 4,4,4,4,4,4,4,4, # 28 - 2f\r
+ 4,4,4,4,4,4,4,4, # 30 - 37\r
+ 4,4,4,4,4,4,4,4, # 38 - 3f\r
+ 4,4,4,4,4,4,4,4, # 40 - 47\r
+ 4,4,4,4,4,4,4,4, # 48 - 4f\r
+ 4,4,4,4,4,4,4,4, # 50 - 57\r
+ 4,4,4,4,4,4,4,4, # 58 - 5f\r
+ 4,4,4,4,4,4,4,4, # 60 - 67\r
+ 4,4,4,4,4,4,4,4, # 68 - 6f\r
+ 4,4,4,4,4,4,4,4, # 70 - 77\r
+ 4,4,4,4,4,4,4,4, # 78 - 7f\r
+ 5,5,5,5,5,5,5,5, # 80 - 87\r
+ 5,5,5,5,5,5,1,3, # 88 - 8f\r
+ 5,5,5,5,5,5,5,5, # 90 - 97\r
+ 5,5,5,5,5,5,5,5, # 98 - 9f\r
+ 5,2,2,2,2,2,2,2, # a0 - a7\r
+ 2,2,2,2,2,2,2,2, # a8 - af\r
+ 2,2,2,2,2,2,2,2, # b0 - b7\r
+ 2,2,2,2,2,2,2,2, # b8 - bf\r
+ 2,2,2,2,2,2,2,2, # c0 - c7\r
+ 2,2,2,2,2,2,2,2, # c8 - cf\r
+ 2,2,2,2,2,2,2,2, # d0 - d7\r
+ 2,2,2,2,2,2,2,2, # d8 - df\r
+ 0,0,0,0,0,0,0,0, # e0 - e7\r
+ 0,0,0,0,0,0,0,0, # e8 - ef\r
+ 0,0,0,0,0,0,0,0, # f0 - f7\r
+ 0,0,0,0,0,0,0,5 # f8 - ff\r
+)\r
+\r
+EUCJP_st = (\r
+ 3, 4, 3, 5,eStart,eError,eError,eError,#00-07\r
+ eError,eError,eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,#08-0f\r
+ eItsMe,eItsMe,eStart,eError,eStart,eError,eError,eError,#10-17\r
+ eError,eError,eStart,eError,eError,eError, 3,eError,#18-1f\r
+ 3,eError,eError,eError,eStart,eStart,eStart,eStart#20-27\r
+)\r
+\r
+EUCJPCharLenTable = (2, 2, 2, 3, 1, 0)\r
+\r
+EUCJPSMModel = {'classTable': EUCJP_cls,\r
+ 'classFactor': 6,\r
+ 'stateTable': EUCJP_st,\r
+ 'charLenTable': EUCJPCharLenTable,\r
+ 'name': 'EUC-JP'}\r
+\r
+# EUC-KR\r
+\r
+EUCKR_cls = (\r
+ 1,1,1,1,1,1,1,1, # 00 - 07\r
+ 1,1,1,1,1,1,0,0, # 08 - 0f\r
+ 1,1,1,1,1,1,1,1, # 10 - 17\r
+ 1,1,1,0,1,1,1,1, # 18 - 1f\r
+ 1,1,1,1,1,1,1,1, # 20 - 27\r
+ 1,1,1,1,1,1,1,1, # 28 - 2f\r
+ 1,1,1,1,1,1,1,1, # 30 - 37\r
+ 1,1,1,1,1,1,1,1, # 38 - 3f\r
+ 1,1,1,1,1,1,1,1, # 40 - 47\r
+ 1,1,1,1,1,1,1,1, # 48 - 4f\r
+ 1,1,1,1,1,1,1,1, # 50 - 57\r
+ 1,1,1,1,1,1,1,1, # 58 - 5f\r
+ 1,1,1,1,1,1,1,1, # 60 - 67\r
+ 1,1,1,1,1,1,1,1, # 68 - 6f\r
+ 1,1,1,1,1,1,1,1, # 70 - 77\r
+ 1,1,1,1,1,1,1,1, # 78 - 7f\r
+ 0,0,0,0,0,0,0,0, # 80 - 87\r
+ 0,0,0,0,0,0,0,0, # 88 - 8f\r
+ 0,0,0,0,0,0,0,0, # 90 - 97\r
+ 0,0,0,0,0,0,0,0, # 98 - 9f\r
+ 0,2,2,2,2,2,2,2, # a0 - a7\r
+ 2,2,2,2,2,3,3,3, # a8 - af\r
+ 2,2,2,2,2,2,2,2, # b0 - b7\r
+ 2,2,2,2,2,2,2,2, # b8 - bf\r
+ 2,2,2,2,2,2,2,2, # c0 - c7\r
+ 2,3,2,2,2,2,2,2, # c8 - cf\r
+ 2,2,2,2,2,2,2,2, # d0 - d7\r
+ 2,2,2,2,2,2,2,2, # d8 - df\r
+ 2,2,2,2,2,2,2,2, # e0 - e7\r
+ 2,2,2,2,2,2,2,2, # e8 - ef\r
+ 2,2,2,2,2,2,2,2, # f0 - f7\r
+ 2,2,2,2,2,2,2,0 # f8 - ff\r
+)\r
+\r
+EUCKR_st = (\r
+ eError,eStart, 3,eError,eError,eError,eError,eError,#00-07\r
+ eItsMe,eItsMe,eItsMe,eItsMe,eError,eError,eStart,eStart #08-0f\r
+)\r
+\r
+EUCKRCharLenTable = (0, 1, 2, 0)\r
+\r
+EUCKRSMModel = {'classTable': EUCKR_cls,\r
+ 'classFactor': 4,\r
+ 'stateTable': EUCKR_st,\r
+ 'charLenTable': EUCKRCharLenTable,\r
+ 'name': 'EUC-KR'}\r
+\r
+# EUC-TW\r
+\r
+EUCTW_cls = (\r
+ 2,2,2,2,2,2,2,2, # 00 - 07\r
+ 2,2,2,2,2,2,0,0, # 08 - 0f\r
+ 2,2,2,2,2,2,2,2, # 10 - 17\r
+ 2,2,2,0,2,2,2,2, # 18 - 1f\r
+ 2,2,2,2,2,2,2,2, # 20 - 27\r
+ 2,2,2,2,2,2,2,2, # 28 - 2f\r
+ 2,2,2,2,2,2,2,2, # 30 - 37\r
+ 2,2,2,2,2,2,2,2, # 38 - 3f\r
+ 2,2,2,2,2,2,2,2, # 40 - 47\r
+ 2,2,2,2,2,2,2,2, # 48 - 4f\r
+ 2,2,2,2,2,2,2,2, # 50 - 57\r
+ 2,2,2,2,2,2,2,2, # 58 - 5f\r
+ 2,2,2,2,2,2,2,2, # 60 - 67\r
+ 2,2,2,2,2,2,2,2, # 68 - 6f\r
+ 2,2,2,2,2,2,2,2, # 70 - 77\r
+ 2,2,2,2,2,2,2,2, # 78 - 7f\r
+ 0,0,0,0,0,0,0,0, # 80 - 87\r
+ 0,0,0,0,0,0,6,0, # 88 - 8f\r
+ 0,0,0,0,0,0,0,0, # 90 - 97\r
+ 0,0,0,0,0,0,0,0, # 98 - 9f\r
+ 0,3,4,4,4,4,4,4, # a0 - a7\r
+ 5,5,1,1,1,1,1,1, # a8 - af\r
+ 1,1,1,1,1,1,1,1, # b0 - b7\r
+ 1,1,1,1,1,1,1,1, # b8 - bf\r
+ 1,1,3,1,3,3,3,3, # c0 - c7\r
+ 3,3,3,3,3,3,3,3, # c8 - cf\r
+ 3,3,3,3,3,3,3,3, # d0 - d7\r
+ 3,3,3,3,3,3,3,3, # d8 - df\r
+ 3,3,3,3,3,3,3,3, # e0 - e7\r
+ 3,3,3,3,3,3,3,3, # e8 - ef\r
+ 3,3,3,3,3,3,3,3, # f0 - f7\r
+ 3,3,3,3,3,3,3,0 # f8 - ff\r
+)\r
+\r
+EUCTW_st = (\r
+ eError,eError,eStart, 3, 3, 3, 4,eError,#00-07\r
+ eError,eError,eError,eError,eError,eError,eItsMe,eItsMe,#08-0f\r
+ eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eError,eStart,eError,#10-17\r
+ eStart,eStart,eStart,eError,eError,eError,eError,eError,#18-1f\r
+ 5,eError,eError,eError,eStart,eError,eStart,eStart,#20-27\r
+ eStart,eError,eStart,eStart,eStart,eStart,eStart,eStart #28-2f\r
+)\r
+\r
+EUCTWCharLenTable = (0, 0, 1, 2, 2, 2, 3)\r
+\r
+EUCTWSMModel = {'classTable': EUCTW_cls,\r
+ 'classFactor': 7,\r
+ 'stateTable': EUCTW_st,\r
+ 'charLenTable': EUCTWCharLenTable,\r
+ 'name': 'x-euc-tw'}\r
+\r
+# GB2312\r
+\r
+GB2312_cls = (\r
+ 1,1,1,1,1,1,1,1, # 00 - 07\r
+ 1,1,1,1,1,1,0,0, # 08 - 0f\r
+ 1,1,1,1,1,1,1,1, # 10 - 17\r
+ 1,1,1,0,1,1,1,1, # 18 - 1f\r
+ 1,1,1,1,1,1,1,1, # 20 - 27\r
+ 1,1,1,1,1,1,1,1, # 28 - 2f\r
+ 3,3,3,3,3,3,3,3, # 30 - 37\r
+ 3,3,1,1,1,1,1,1, # 38 - 3f\r
+ 2,2,2,2,2,2,2,2, # 40 - 47\r
+ 2,2,2,2,2,2,2,2, # 48 - 4f\r
+ 2,2,2,2,2,2,2,2, # 50 - 57\r
+ 2,2,2,2,2,2,2,2, # 58 - 5f\r
+ 2,2,2,2,2,2,2,2, # 60 - 67\r
+ 2,2,2,2,2,2,2,2, # 68 - 6f\r
+ 2,2,2,2,2,2,2,2, # 70 - 77\r
+ 2,2,2,2,2,2,2,4, # 78 - 7f\r
+ 5,6,6,6,6,6,6,6, # 80 - 87\r
+ 6,6,6,6,6,6,6,6, # 88 - 8f\r
+ 6,6,6,6,6,6,6,6, # 90 - 97\r
+ 6,6,6,6,6,6,6,6, # 98 - 9f\r
+ 6,6,6,6,6,6,6,6, # a0 - a7\r
+ 6,6,6,6,6,6,6,6, # a8 - af\r
+ 6,6,6,6,6,6,6,6, # b0 - b7\r
+ 6,6,6,6,6,6,6,6, # b8 - bf\r
+ 6,6,6,6,6,6,6,6, # c0 - c7\r
+ 6,6,6,6,6,6,6,6, # c8 - cf\r
+ 6,6,6,6,6,6,6,6, # d0 - d7\r
+ 6,6,6,6,6,6,6,6, # d8 - df\r
+ 6,6,6,6,6,6,6,6, # e0 - e7\r
+ 6,6,6,6,6,6,6,6, # e8 - ef\r
+ 6,6,6,6,6,6,6,6, # f0 - f7\r
+ 6,6,6,6,6,6,6,0 # f8 - ff\r
+)\r
+\r
+GB2312_st = (\r
+ eError,eStart,eStart,eStart,eStart,eStart, 3,eError,#00-07\r
+ eError,eError,eError,eError,eError,eError,eItsMe,eItsMe,#08-0f\r
+ eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eError,eError,eStart,#10-17\r
+ 4,eError,eStart,eStart,eError,eError,eError,eError,#18-1f\r
+ eError,eError, 5,eError,eError,eError,eItsMe,eError,#20-27\r
+ eError,eError,eStart,eStart,eStart,eStart,eStart,eStart #28-2f\r
+)\r
+\r
+# To be accurate, the length of class 6 can be either 2 or 4.\r
+# But it is not necessary to discriminate between the two since\r
+# it is used for frequency analysis only, and we are validing\r
+# each code range there as well. So it is safe to set it to be\r
+# 2 here.\r
+GB2312CharLenTable = (0, 1, 1, 1, 1, 1, 2)\r
+\r
+GB2312SMModel = {'classTable': GB2312_cls,\r
+ 'classFactor': 7,\r
+ 'stateTable': GB2312_st,\r
+ 'charLenTable': GB2312CharLenTable,\r
+ 'name': 'GB2312'}\r
+\r
+# Shift_JIS\r
+\r
+SJIS_cls = (\r
+ 1,1,1,1,1,1,1,1, # 00 - 07\r
+ 1,1,1,1,1,1,0,0, # 08 - 0f\r
+ 1,1,1,1,1,1,1,1, # 10 - 17\r
+ 1,1,1,0,1,1,1,1, # 18 - 1f\r
+ 1,1,1,1,1,1,1,1, # 20 - 27\r
+ 1,1,1,1,1,1,1,1, # 28 - 2f\r
+ 1,1,1,1,1,1,1,1, # 30 - 37\r
+ 1,1,1,1,1,1,1,1, # 38 - 3f\r
+ 2,2,2,2,2,2,2,2, # 40 - 47\r
+ 2,2,2,2,2,2,2,2, # 48 - 4f\r
+ 2,2,2,2,2,2,2,2, # 50 - 57\r
+ 2,2,2,2,2,2,2,2, # 58 - 5f\r
+ 2,2,2,2,2,2,2,2, # 60 - 67\r
+ 2,2,2,2,2,2,2,2, # 68 - 6f\r
+ 2,2,2,2,2,2,2,2, # 70 - 77\r
+ 2,2,2,2,2,2,2,1, # 78 - 7f\r
+ 3,3,3,3,3,3,3,3, # 80 - 87\r
+ 3,3,3,3,3,3,3,3, # 88 - 8f\r
+ 3,3,3,3,3,3,3,3, # 90 - 97\r
+ 3,3,3,3,3,3,3,3, # 98 - 9f\r
+ #0xa0 is illegal in sjis encoding, but some pages does\r
+ #contain such byte. We need to be more error forgiven.\r
+ 2,2,2,2,2,2,2,2, # a0 - a7\r
+ 2,2,2,2,2,2,2,2, # a8 - af\r
+ 2,2,2,2,2,2,2,2, # b0 - b7\r
+ 2,2,2,2,2,2,2,2, # b8 - bf\r
+ 2,2,2,2,2,2,2,2, # c0 - c7\r
+ 2,2,2,2,2,2,2,2, # c8 - cf\r
+ 2,2,2,2,2,2,2,2, # d0 - d7\r
+ 2,2,2,2,2,2,2,2, # d8 - df\r
+ 3,3,3,3,3,3,3,3, # e0 - e7\r
+ 3,3,3,3,3,4,4,4, # e8 - ef\r
+ 4,4,4,4,4,4,4,4, # f0 - f7\r
+ 4,4,4,4,4,0,0,0 # f8 - ff\r
+)\r
+\r
+\r
+SJIS_st = (\r
+ eError,eStart,eStart, 3,eError,eError,eError,eError,#00-07\r
+ eError,eError,eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,#08-0f\r
+ eItsMe,eItsMe,eError,eError,eStart,eStart,eStart,eStart #10-17\r
+)\r
+\r
+SJISCharLenTable = (0, 1, 1, 2, 0, 0)\r
+\r
+SJISSMModel = {'classTable': SJIS_cls,\r
+ 'classFactor': 6,\r
+ 'stateTable': SJIS_st,\r
+ 'charLenTable': SJISCharLenTable,\r
+ 'name': 'Shift_JIS'}\r
+\r
+# UCS2-BE\r
+\r
+UCS2BE_cls = (\r
+ 0,0,0,0,0,0,0,0, # 00 - 07\r
+ 0,0,1,0,0,2,0,0, # 08 - 0f\r
+ 0,0,0,0,0,0,0,0, # 10 - 17\r
+ 0,0,0,3,0,0,0,0, # 18 - 1f\r
+ 0,0,0,0,0,0,0,0, # 20 - 27\r
+ 0,3,3,3,3,3,0,0, # 28 - 2f\r
+ 0,0,0,0,0,0,0,0, # 30 - 37\r
+ 0,0,0,0,0,0,0,0, # 38 - 3f\r
+ 0,0,0,0,0,0,0,0, # 40 - 47\r
+ 0,0,0,0,0,0,0,0, # 48 - 4f\r
+ 0,0,0,0,0,0,0,0, # 50 - 57\r
+ 0,0,0,0,0,0,0,0, # 58 - 5f\r
+ 0,0,0,0,0,0,0,0, # 60 - 67\r
+ 0,0,0,0,0,0,0,0, # 68 - 6f\r
+ 0,0,0,0,0,0,0,0, # 70 - 77\r
+ 0,0,0,0,0,0,0,0, # 78 - 7f\r
+ 0,0,0,0,0,0,0,0, # 80 - 87\r
+ 0,0,0,0,0,0,0,0, # 88 - 8f\r
+ 0,0,0,0,0,0,0,0, # 90 - 97\r
+ 0,0,0,0,0,0,0,0, # 98 - 9f\r
+ 0,0,0,0,0,0,0,0, # a0 - a7\r
+ 0,0,0,0,0,0,0,0, # a8 - af\r
+ 0,0,0,0,0,0,0,0, # b0 - b7\r
+ 0,0,0,0,0,0,0,0, # b8 - bf\r
+ 0,0,0,0,0,0,0,0, # c0 - c7\r
+ 0,0,0,0,0,0,0,0, # c8 - cf\r
+ 0,0,0,0,0,0,0,0, # d0 - d7\r
+ 0,0,0,0,0,0,0,0, # d8 - df\r
+ 0,0,0,0,0,0,0,0, # e0 - e7\r
+ 0,0,0,0,0,0,0,0, # e8 - ef\r
+ 0,0,0,0,0,0,0,0, # f0 - f7\r
+ 0,0,0,0,0,0,4,5 # f8 - ff\r
+)\r
+\r
+UCS2BE_st = (\r
+ 5, 7, 7,eError, 4, 3,eError,eError,#00-07\r
+ eError,eError,eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,#08-0f\r
+ eItsMe,eItsMe, 6, 6, 6, 6,eError,eError,#10-17\r
+ 6, 6, 6, 6, 6,eItsMe, 6, 6,#18-1f\r
+ 6, 6, 6, 6, 5, 7, 7,eError,#20-27\r
+ 5, 8, 6, 6,eError, 6, 6, 6,#28-2f\r
+ 6, 6, 6, 6,eError,eError,eStart,eStart #30-37\r
+)\r
+\r
+UCS2BECharLenTable = (2, 2, 2, 0, 2, 2)\r
+\r
+UCS2BESMModel = {'classTable': UCS2BE_cls,\r
+ 'classFactor': 6,\r
+ 'stateTable': UCS2BE_st,\r
+ 'charLenTable': UCS2BECharLenTable,\r
+ 'name': 'UTF-16BE'}\r
+\r
+# UCS2-LE\r
+\r
+UCS2LE_cls = (\r
+ 0,0,0,0,0,0,0,0, # 00 - 07\r
+ 0,0,1,0,0,2,0,0, # 08 - 0f\r
+ 0,0,0,0,0,0,0,0, # 10 - 17\r
+ 0,0,0,3,0,0,0,0, # 18 - 1f\r
+ 0,0,0,0,0,0,0,0, # 20 - 27\r
+ 0,3,3,3,3,3,0,0, # 28 - 2f\r
+ 0,0,0,0,0,0,0,0, # 30 - 37\r
+ 0,0,0,0,0,0,0,0, # 38 - 3f\r
+ 0,0,0,0,0,0,0,0, # 40 - 47\r
+ 0,0,0,0,0,0,0,0, # 48 - 4f\r
+ 0,0,0,0,0,0,0,0, # 50 - 57\r
+ 0,0,0,0,0,0,0,0, # 58 - 5f\r
+ 0,0,0,0,0,0,0,0, # 60 - 67\r
+ 0,0,0,0,0,0,0,0, # 68 - 6f\r
+ 0,0,0,0,0,0,0,0, # 70 - 77\r
+ 0,0,0,0,0,0,0,0, # 78 - 7f\r
+ 0,0,0,0,0,0,0,0, # 80 - 87\r
+ 0,0,0,0,0,0,0,0, # 88 - 8f\r
+ 0,0,0,0,0,0,0,0, # 90 - 97\r
+ 0,0,0,0,0,0,0,0, # 98 - 9f\r
+ 0,0,0,0,0,0,0,0, # a0 - a7\r
+ 0,0,0,0,0,0,0,0, # a8 - af\r
+ 0,0,0,0,0,0,0,0, # b0 - b7\r
+ 0,0,0,0,0,0,0,0, # b8 - bf\r
+ 0,0,0,0,0,0,0,0, # c0 - c7\r
+ 0,0,0,0,0,0,0,0, # c8 - cf\r
+ 0,0,0,0,0,0,0,0, # d0 - d7\r
+ 0,0,0,0,0,0,0,0, # d8 - df\r
+ 0,0,0,0,0,0,0,0, # e0 - e7\r
+ 0,0,0,0,0,0,0,0, # e8 - ef\r
+ 0,0,0,0,0,0,0,0, # f0 - f7\r
+ 0,0,0,0,0,0,4,5 # f8 - ff\r
+)\r
+\r
+UCS2LE_st = (\r
+ 6, 6, 7, 6, 4, 3,eError,eError,#00-07\r
+ eError,eError,eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,#08-0f\r
+ eItsMe,eItsMe, 5, 5, 5,eError,eItsMe,eError,#10-17\r
+ 5, 5, 5,eError, 5,eError, 6, 6,#18-1f\r
+ 7, 6, 8, 8, 5, 5, 5,eError,#20-27\r
+ 5, 5, 5,eError,eError,eError, 5, 5,#28-2f\r
+ 5, 5, 5,eError, 5,eError,eStart,eStart #30-37\r
+)\r
+\r
+UCS2LECharLenTable = (2, 2, 2, 2, 2, 2)\r
+\r
+UCS2LESMModel = {'classTable': UCS2LE_cls,\r
+ 'classFactor': 6,\r
+ 'stateTable': UCS2LE_st,\r
+ 'charLenTable': UCS2LECharLenTable,\r
+ 'name': 'UTF-16LE'}\r
+\r
+# UTF-8\r
+\r
+UTF8_cls = (\r
+ 1,1,1,1,1,1,1,1, # 00 - 07 #allow 0x00 as a legal value\r
+ 1,1,1,1,1,1,0,0, # 08 - 0f\r
+ 1,1,1,1,1,1,1,1, # 10 - 17\r
+ 1,1,1,0,1,1,1,1, # 18 - 1f\r
+ 1,1,1,1,1,1,1,1, # 20 - 27\r
+ 1,1,1,1,1,1,1,1, # 28 - 2f\r
+ 1,1,1,1,1,1,1,1, # 30 - 37\r
+ 1,1,1,1,1,1,1,1, # 38 - 3f\r
+ 1,1,1,1,1,1,1,1, # 40 - 47\r
+ 1,1,1,1,1,1,1,1, # 48 - 4f\r
+ 1,1,1,1,1,1,1,1, # 50 - 57\r
+ 1,1,1,1,1,1,1,1, # 58 - 5f\r
+ 1,1,1,1,1,1,1,1, # 60 - 67\r
+ 1,1,1,1,1,1,1,1, # 68 - 6f\r
+ 1,1,1,1,1,1,1,1, # 70 - 77\r
+ 1,1,1,1,1,1,1,1, # 78 - 7f\r
+ 2,2,2,2,3,3,3,3, # 80 - 87\r
+ 4,4,4,4,4,4,4,4, # 88 - 8f\r
+ 4,4,4,4,4,4,4,4, # 90 - 97\r
+ 4,4,4,4,4,4,4,4, # 98 - 9f\r
+ 5,5,5,5,5,5,5,5, # a0 - a7\r
+ 5,5,5,5,5,5,5,5, # a8 - af\r
+ 5,5,5,5,5,5,5,5, # b0 - b7\r
+ 5,5,5,5,5,5,5,5, # b8 - bf\r
+ 0,0,6,6,6,6,6,6, # c0 - c7\r
+ 6,6,6,6,6,6,6,6, # c8 - cf\r
+ 6,6,6,6,6,6,6,6, # d0 - d7\r
+ 6,6,6,6,6,6,6,6, # d8 - df\r
+ 7,8,8,8,8,8,8,8, # e0 - e7\r
+ 8,8,8,8,8,9,8,8, # e8 - ef\r
+ 10,11,11,11,11,11,11,11, # f0 - f7\r
+ 12,13,13,13,14,15,0,0 # f8 - ff\r
+)\r
+\r
+UTF8_st = (\r
+ eError,eStart,eError,eError,eError,eError, 12, 10,#00-07\r
+ 9, 11, 8, 7, 6, 5, 4, 3,#08-0f\r
+ eError,eError,eError,eError,eError,eError,eError,eError,#10-17\r
+ eError,eError,eError,eError,eError,eError,eError,eError,#18-1f\r
+ eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,#20-27\r
+ eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,#28-2f\r
+ eError,eError, 5, 5, 5, 5,eError,eError,#30-37\r
+ eError,eError,eError,eError,eError,eError,eError,eError,#38-3f\r
+ eError,eError,eError, 5, 5, 5,eError,eError,#40-47\r
+ eError,eError,eError,eError,eError,eError,eError,eError,#48-4f\r
+ eError,eError, 7, 7, 7, 7,eError,eError,#50-57\r
+ eError,eError,eError,eError,eError,eError,eError,eError,#58-5f\r
+ eError,eError,eError,eError, 7, 7,eError,eError,#60-67\r
+ eError,eError,eError,eError,eError,eError,eError,eError,#68-6f\r
+ eError,eError, 9, 9, 9, 9,eError,eError,#70-77\r
+ eError,eError,eError,eError,eError,eError,eError,eError,#78-7f\r
+ eError,eError,eError,eError,eError, 9,eError,eError,#80-87\r
+ eError,eError,eError,eError,eError,eError,eError,eError,#88-8f\r
+ eError,eError, 12, 12, 12, 12,eError,eError,#90-97\r
+ eError,eError,eError,eError,eError,eError,eError,eError,#98-9f\r
+ eError,eError,eError,eError,eError, 12,eError,eError,#a0-a7\r
+ eError,eError,eError,eError,eError,eError,eError,eError,#a8-af\r
+ eError,eError, 12, 12, 12,eError,eError,eError,#b0-b7\r
+ eError,eError,eError,eError,eError,eError,eError,eError,#b8-bf\r
+ eError,eError,eStart,eStart,eStart,eStart,eError,eError,#c0-c7\r
+ eError,eError,eError,eError,eError,eError,eError,eError #c8-cf\r
+)\r
+\r
+UTF8CharLenTable = (0, 1, 0, 0, 0, 0, 2, 3, 3, 3, 4, 4, 5, 5, 6, 6)\r
+\r
+UTF8SMModel = {'classTable': UTF8_cls,\r
+ 'classFactor': 16,\r
+ 'stateTable': UTF8_st,\r
+ 'charLenTable': UTF8CharLenTable,\r
+ 'name': 'UTF-8'}\r
+\r
+# flake8: noqa\r
-######################## BEGIN LICENSE BLOCK ########################
-# The Original Code is Mozilla Universal charset detector code.
-#
-# The Initial Developer of the Original Code is
-# Netscape Communications Corporation.
-# Portions created by the Initial Developer are Copyright (C) 2001
-# the Initial Developer. All Rights Reserved.
-#
-# Contributor(s):
-# Mark Pilgrim - port to Python
-# Shy Shalom - original C code
-#
-# This library is free software; you can redistribute it and/or
-# modify it under the terms of the GNU Lesser General Public
-# License as published by the Free Software Foundation; either
-# version 2.1 of the License, or (at your option) any later version.
-#
-# This library is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-# Lesser General Public License for more details.
-#
-# You should have received a copy of the GNU Lesser General Public
-# License along with this library; if not, write to the Free Software
-# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
-# 02110-1301 USA
-######################### END LICENSE BLOCK #########################
-
-import sys
-from . import constants
-from .charsetprober import CharSetProber
-from .compat import wrap_ord
-
-SAMPLE_SIZE = 64
-SB_ENOUGH_REL_THRESHOLD = 1024
-POSITIVE_SHORTCUT_THRESHOLD = 0.95
-NEGATIVE_SHORTCUT_THRESHOLD = 0.05
-SYMBOL_CAT_ORDER = 250
-NUMBER_OF_SEQ_CAT = 4
-POSITIVE_CAT = NUMBER_OF_SEQ_CAT - 1
-#NEGATIVE_CAT = 0
-
-
-class SingleByteCharSetProber(CharSetProber):
- def __init__(self, model, reversed=False, nameProber=None):
- CharSetProber.__init__(self)
- self._mModel = model
- # TRUE if we need to reverse every pair in the model lookup
- self._mReversed = reversed
- # Optional auxiliary prober for name decision
- self._mNameProber = nameProber
- self.reset()
-
- def reset(self):
- CharSetProber.reset(self)
- self._mLastOrder = 255 # char order of last character
- self._mSeqCounters = [0] * NUMBER_OF_SEQ_CAT
- self._mTotalSeqs = 0
- self._mTotalChar = 0
- self._mFreqChar = 0 # characters that fall in our sampling range
-
- def get_charset_name(self):
- if self._mNameProber:
- return self._mNameProber.get_charset_name()
- else:
- return self._mModel['charsetName']
-
- def feed(self, aBuf):
- if not self._mModel['keepEnglishLetter']:
- aBuf = self.filter_without_english_letters(aBuf)
- aLen = len(aBuf)
- if not aLen:
- return self.get_state()
- for c in aBuf:
- try:
- order = self._mModel['charToOrderMap'][wrap_ord(c)]
- except IndexError:
- return constants.eError
- if order < SYMBOL_CAT_ORDER:
- self._mTotalChar += 1
- if order < SAMPLE_SIZE:
- self._mFreqChar += 1
- if self._mLastOrder < SAMPLE_SIZE:
- self._mTotalSeqs += 1
- if not self._mReversed:
- i = (self._mLastOrder * SAMPLE_SIZE) + order
- else: # reverse the order of the letters in the lookup
- i = (order * SAMPLE_SIZE) + self._mLastOrder
- model = self._mModel['precedenceMatrix'][i]
- self._mSeqCounters[model] += 1
- self._mLastOrder = order
-
- if self.get_state() == constants.eDetecting:
- if self._mTotalSeqs > SB_ENOUGH_REL_THRESHOLD:
- cf = self.get_confidence()
- if cf > POSITIVE_SHORTCUT_THRESHOLD:
- if constants._debug:
- sys.stderr.write('%s confidence = %s, we have a'
- ' winner\n' %
- (self._mModel['charsetName'],
- cf))
- self._mState = constants.eFoundIt
- elif cf < NEGATIVE_SHORTCUT_THRESHOLD:
- if constants._debug:
- sys.stderr.write('%s confidence = %s, below negative'
- ' shortcut threshhold %s\n' %
- (self._mModel['charsetName'], cf,
- NEGATIVE_SHORTCUT_THRESHOLD))
- self._mState = constants.eNotMe
-
- return self.get_state()
-
- def get_confidence(self):
- r = 0.01
- if self._mTotalSeqs > 0:
- #print(self._mSeqCounters[POSITIVE_CAT], self._mTotalSeqs,
- # self._mModel['mTypicalPositiveRatio'])
- r = ((1.0 * self._mSeqCounters[POSITIVE_CAT]) / self._mTotalSeqs
- / self._mModel['mTypicalPositiveRatio'])
- #print(r, self._mFreqChar, self._mTotalChar)
- r = r * self._mFreqChar / self._mTotalChar
- if r >= 1.0:
- r = 0.99
- return r
+######################## BEGIN LICENSE BLOCK ########################\r
+# The Original Code is Mozilla Universal charset detector code.\r
+#\r
+# The Initial Developer of the Original Code is\r
+# Netscape Communications Corporation.\r
+# Portions created by the Initial Developer are Copyright (C) 2001\r
+# the Initial Developer. All Rights Reserved.\r
+#\r
+# Contributor(s):\r
+# Mark Pilgrim - port to Python\r
+# Shy Shalom - original C code\r
+#\r
+# This library is free software; you can redistribute it and/or\r
+# modify it under the terms of the GNU Lesser General Public\r
+# License as published by the Free Software Foundation; either\r
+# version 2.1 of the License, or (at your option) any later version.\r
+#\r
+# This library is distributed in the hope that it will be useful,\r
+# but WITHOUT ANY WARRANTY; without even the implied warranty of\r
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU\r
+# Lesser General Public License for more details.\r
+#\r
+# You should have received a copy of the GNU Lesser General Public\r
+# License along with this library; if not, write to the Free Software\r
+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA\r
+# 02110-1301 USA\r
+######################### END LICENSE BLOCK #########################\r
+\r
+import sys\r
+from . import constants\r
+from .charsetprober import CharSetProber\r
+from .compat import wrap_ord\r
+\r
+SAMPLE_SIZE = 64\r
+SB_ENOUGH_REL_THRESHOLD = 1024\r
+POSITIVE_SHORTCUT_THRESHOLD = 0.95\r
+NEGATIVE_SHORTCUT_THRESHOLD = 0.05\r
+SYMBOL_CAT_ORDER = 250\r
+NUMBER_OF_SEQ_CAT = 4\r
+POSITIVE_CAT = NUMBER_OF_SEQ_CAT - 1\r
+#NEGATIVE_CAT = 0\r
+\r
+\r
+class SingleByteCharSetProber(CharSetProber):\r
+ def __init__(self, model, reversed=False, nameProber=None):\r
+ CharSetProber.__init__(self)\r
+ self._mModel = model\r
+ # TRUE if we need to reverse every pair in the model lookup\r
+ self._mReversed = reversed\r
+ # Optional auxiliary prober for name decision\r
+ self._mNameProber = nameProber\r
+ self.reset()\r
+\r
+ def reset(self):\r
+ CharSetProber.reset(self)\r
+ # char order of last character\r
+ self._mLastOrder = 255\r
+ self._mSeqCounters = [0] * NUMBER_OF_SEQ_CAT\r
+ self._mTotalSeqs = 0\r
+ self._mTotalChar = 0\r
+ # characters that fall in our sampling range\r
+ self._mFreqChar = 0\r
+\r
+ def get_charset_name(self):\r
+ if self._mNameProber:\r
+ return self._mNameProber.get_charset_name()\r
+ else:\r
+ return self._mModel['charsetName']\r
+\r
+ def feed(self, aBuf):\r
+ if not self._mModel['keepEnglishLetter']:\r
+ aBuf = self.filter_without_english_letters(aBuf)\r
+ aLen = len(aBuf)\r
+ if not aLen:\r
+ return self.get_state()\r
+ for c in aBuf:\r
+ order = self._mModel['charToOrderMap'][wrap_ord(c)]\r
+ if order < SYMBOL_CAT_ORDER:\r
+ self._mTotalChar += 1\r
+ if order < SAMPLE_SIZE:\r
+ self._mFreqChar += 1\r
+ if self._mLastOrder < SAMPLE_SIZE:\r
+ self._mTotalSeqs += 1\r
+ if not self._mReversed:\r
+ i = (self._mLastOrder * SAMPLE_SIZE) + order\r
+ model = self._mModel['precedenceMatrix'][i]\r
+ else: # reverse the order of the letters in the lookup\r
+ i = (order * SAMPLE_SIZE) + self._mLastOrder\r
+ model = self._mModel['precedenceMatrix'][i]\r
+ self._mSeqCounters[model] += 1\r
+ self._mLastOrder = order\r
+\r
+ if self.get_state() == constants.eDetecting:\r
+ if self._mTotalSeqs > SB_ENOUGH_REL_THRESHOLD:\r
+ cf = self.get_confidence()\r
+ if cf > POSITIVE_SHORTCUT_THRESHOLD:\r
+ if constants._debug:\r
+ sys.stderr.write('%s confidence = %s, we have a'\r
+ 'winner\n' %\r
+ (self._mModel['charsetName'], cf))\r
+ self._mState = constants.eFoundIt\r
+ elif cf < NEGATIVE_SHORTCUT_THRESHOLD:\r
+ if constants._debug:\r
+ sys.stderr.write('%s confidence = %s, below negative'\r
+ 'shortcut threshhold %s\n' %\r
+ (self._mModel['charsetName'], cf,\r
+ NEGATIVE_SHORTCUT_THRESHOLD))\r
+ self._mState = constants.eNotMe\r
+\r
+ return self.get_state()\r
+\r
+ def get_confidence(self):\r
+ r = 0.01\r
+ if self._mTotalSeqs > 0:\r
+ r = ((1.0 * self._mSeqCounters[POSITIVE_CAT]) / self._mTotalSeqs\r
+ / self._mModel['mTypicalPositiveRatio'])\r
+ r = r * self._mFreqChar / self._mTotalChar\r
+ if r >= 1.0:\r
+ r = 0.99\r
+ return r\r
-######################## BEGIN LICENSE BLOCK ########################
-# The Original Code is Mozilla Universal charset detector code.
-#
-# The Initial Developer of the Original Code is
-# Netscape Communications Corporation.
-# Portions created by the Initial Developer are Copyright (C) 2001
-# the Initial Developer. All Rights Reserved.
-#
-# Contributor(s):
-# Mark Pilgrim - port to Python
-# Shy Shalom - original C code
-#
-# This library is free software; you can redistribute it and/or
-# modify it under the terms of the GNU Lesser General Public
-# License as published by the Free Software Foundation; either
-# version 2.1 of the License, or (at your option) any later version.
-#
-# This library is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-# Lesser General Public License for more details.
-#
-# You should have received a copy of the GNU Lesser General Public
-# License along with this library; if not, write to the Free Software
-# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
-# 02110-1301 USA
-######################### END LICENSE BLOCK #########################
-
-from .charsetgroupprober import CharSetGroupProber
-from .sbcharsetprober import SingleByteCharSetProber
-from .langcyrillicmodel import (Win1251CyrillicModel, Koi8rModel,
- Latin5CyrillicModel, MacCyrillicModel,
- Ibm866Model, Ibm855Model)
-from .langgreekmodel import Latin7GreekModel, Win1253GreekModel
-from .langbulgarianmodel import Latin5BulgarianModel, Win1251BulgarianModel
-from .langhungarianmodel import Latin2HungarianModel, Win1250HungarianModel
-from .langthaimodel import TIS620ThaiModel
-from .langhebrewmodel import Win1255HebrewModel
-from .hebrewprober import HebrewProber
-
-
-class SBCSGroupProber(CharSetGroupProber):
- def __init__(self):
- CharSetGroupProber.__init__(self)
- self._mProbers = [
- SingleByteCharSetProber(Win1251CyrillicModel),
- SingleByteCharSetProber(Koi8rModel),
- SingleByteCharSetProber(Latin5CyrillicModel),
- SingleByteCharSetProber(MacCyrillicModel),
- SingleByteCharSetProber(Ibm866Model),
- SingleByteCharSetProber(Ibm855Model),
- SingleByteCharSetProber(Latin7GreekModel),
- SingleByteCharSetProber(Win1253GreekModel),
- SingleByteCharSetProber(Latin5BulgarianModel),
- SingleByteCharSetProber(Win1251BulgarianModel),
- SingleByteCharSetProber(Latin2HungarianModel),
- SingleByteCharSetProber(Win1250HungarianModel),
- SingleByteCharSetProber(TIS620ThaiModel),
- ]
- hebrewProber = HebrewProber()
- logicalHebrewProber = SingleByteCharSetProber(Win1255HebrewModel,
- False, hebrewProber)
- visualHebrewProber = SingleByteCharSetProber(Win1255HebrewModel, True,
- hebrewProber)
- hebrewProber.set_model_probers(logicalHebrewProber, visualHebrewProber)
- self._mProbers.extend([hebrewProber, logicalHebrewProber,
- visualHebrewProber])
-
- self.reset()
+######################## BEGIN LICENSE BLOCK ########################\r
+# The Original Code is Mozilla Universal charset detector code.\r
+#\r
+# The Initial Developer of the Original Code is\r
+# Netscape Communications Corporation.\r
+# Portions created by the Initial Developer are Copyright (C) 2001\r
+# the Initial Developer. All Rights Reserved.\r
+#\r
+# Contributor(s):\r
+# Mark Pilgrim - port to Python\r
+# Shy Shalom - original C code\r
+#\r
+# This library is free software; you can redistribute it and/or\r
+# modify it under the terms of the GNU Lesser General Public\r
+# License as published by the Free Software Foundation; either\r
+# version 2.1 of the License, or (at your option) any later version.\r
+#\r
+# This library is distributed in the hope that it will be useful,\r
+# but WITHOUT ANY WARRANTY; without even the implied warranty of\r
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU\r
+# Lesser General Public License for more details.\r
+#\r
+# You should have received a copy of the GNU Lesser General Public\r
+# License along with this library; if not, write to the Free Software\r
+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA\r
+# 02110-1301 USA\r
+######################### END LICENSE BLOCK #########################\r
+\r
+from .charsetgroupprober import CharSetGroupProber\r
+from .sbcharsetprober import SingleByteCharSetProber\r
+from .langcyrillicmodel import (Win1251CyrillicModel, Koi8rModel,\r
+ Latin5CyrillicModel, MacCyrillicModel,\r
+ Ibm866Model, Ibm855Model)\r
+from .langgreekmodel import Latin7GreekModel, Win1253GreekModel\r
+from .langbulgarianmodel import Latin5BulgarianModel, Win1251BulgarianModel\r
+from .langhungarianmodel import Latin2HungarianModel, Win1250HungarianModel\r
+from .langthaimodel import TIS620ThaiModel\r
+from .langhebrewmodel import Win1255HebrewModel\r
+from .hebrewprober import HebrewProber\r
+\r
+\r
+class SBCSGroupProber(CharSetGroupProber):\r
+ def __init__(self):\r
+ CharSetGroupProber.__init__(self)\r
+ self._mProbers = [\r
+ SingleByteCharSetProber(Win1251CyrillicModel),\r
+ SingleByteCharSetProber(Koi8rModel),\r
+ SingleByteCharSetProber(Latin5CyrillicModel),\r
+ SingleByteCharSetProber(MacCyrillicModel),\r
+ SingleByteCharSetProber(Ibm866Model),\r
+ SingleByteCharSetProber(Ibm855Model),\r
+ SingleByteCharSetProber(Latin7GreekModel),\r
+ SingleByteCharSetProber(Win1253GreekModel),\r
+ SingleByteCharSetProber(Latin5BulgarianModel),\r
+ SingleByteCharSetProber(Win1251BulgarianModel),\r
+ SingleByteCharSetProber(Latin2HungarianModel),\r
+ SingleByteCharSetProber(Win1250HungarianModel),\r
+ SingleByteCharSetProber(TIS620ThaiModel),\r
+ ]\r
+ hebrewProber = HebrewProber()\r
+ logicalHebrewProber = SingleByteCharSetProber(Win1255HebrewModel,\r
+ False, hebrewProber)\r
+ visualHebrewProber = SingleByteCharSetProber(Win1255HebrewModel, True,\r
+ hebrewProber)\r
+ hebrewProber.set_model_probers(logicalHebrewProber, visualHebrewProber)\r
+ self._mProbers.extend([hebrewProber, logicalHebrewProber,\r
+ visualHebrewProber])\r
+\r
+ self.reset()\r
-######################## BEGIN LICENSE BLOCK ########################
-# The Original Code is mozilla.org code.
-#
-# The Initial Developer of the Original Code is
-# Netscape Communications Corporation.
-# Portions created by the Initial Developer are Copyright (C) 1998
-# the Initial Developer. All Rights Reserved.
-#
-# Contributor(s):
-# Mark Pilgrim - port to Python
-#
-# This library is free software; you can redistribute it and/or
-# modify it under the terms of the GNU Lesser General Public
-# License as published by the Free Software Foundation; either
-# version 2.1 of the License, or (at your option) any later version.
-#
-# This library is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-# Lesser General Public License for more details.
-#
-# You should have received a copy of the GNU Lesser General Public
-# License along with this library; if not, write to the Free Software
-# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
-# 02110-1301 USA
-######################### END LICENSE BLOCK #########################
-
-import sys
-from .mbcharsetprober import MultiByteCharSetProber
-from .codingstatemachine import CodingStateMachine
-from .chardistribution import SJISDistributionAnalysis
-from .jpcntx import SJISContextAnalysis
-from .mbcssm import SJISSMModel
-from . import constants
-
-
-class SJISProber(MultiByteCharSetProber):
- def __init__(self):
- MultiByteCharSetProber.__init__(self)
- self._mCodingSM = CodingStateMachine(SJISSMModel)
- self._mDistributionAnalyzer = SJISDistributionAnalysis()
- self._mContextAnalyzer = SJISContextAnalysis()
- self.reset()
-
- def reset(self):
- MultiByteCharSetProber.reset(self)
- self._mContextAnalyzer.reset()
-
- def get_charset_name(self):
- return "SHIFT_JIS"
-
- def feed(self, aBuf):
- aLen = len(aBuf)
- for i in range(0, aLen):
- codingState = self._mCodingSM.next_state(aBuf[i])
- if codingState == constants.eError:
- if constants._debug:
- sys.stderr.write(self.get_charset_name() + ' prober hit'
- 'error at byte ' + str(i) + '\n')
- self._mState = constants.eNotMe
- break
- elif codingState == constants.eItsMe:
- self._mState = constants.eFoundIt
- break
- elif codingState == constants.eStart:
- charLen = self._mCodingSM.get_current_charlen()
- if i == 0:
- self._mLastChar[1] = aBuf[0]
- self._mContextAnalyzer.feed(self._mLastChar[2 - charLen:],
- charLen)
- self._mDistributionAnalyzer.feed(self._mLastChar, charLen)
- else:
- self._mContextAnalyzer.feed(aBuf[i + 1 - charLen:i + 3 -
- charLen], charLen)
- self._mDistributionAnalyzer.feed(aBuf[i - 1:i + 1],
- charLen)
-
- self._mLastChar[0] = aBuf[aLen - 1]
-
- if self.get_state() == constants.eDetecting:
- if (self._mContextAnalyzer.got_enough_data() and
- (self.get_confidence() > constants.SHORTCUT_THRESHOLD)):
- self._mState = constants.eFoundIt
-
- return self.get_state()
-
- def get_confidence(self):
- contxtCf = self._mContextAnalyzer.get_confidence()
- distribCf = self._mDistributionAnalyzer.get_confidence()
- return max(contxtCf, distribCf)
+######################## BEGIN LICENSE BLOCK ########################\r
+# The Original Code is mozilla.org code.\r
+#\r
+# The Initial Developer of the Original Code is\r
+# Netscape Communications Corporation.\r
+# Portions created by the Initial Developer are Copyright (C) 1998\r
+# the Initial Developer. All Rights Reserved.\r
+#\r
+# Contributor(s):\r
+# Mark Pilgrim - port to Python\r
+#\r
+# This library is free software; you can redistribute it and/or\r
+# modify it under the terms of the GNU Lesser General Public\r
+# License as published by the Free Software Foundation; either\r
+# version 2.1 of the License, or (at your option) any later version.\r
+#\r
+# This library is distributed in the hope that it will be useful,\r
+# but WITHOUT ANY WARRANTY; without even the implied warranty of\r
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU\r
+# Lesser General Public License for more details.\r
+#\r
+# You should have received a copy of the GNU Lesser General Public\r
+# License along with this library; if not, write to the Free Software\r
+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA\r
+# 02110-1301 USA\r
+######################### END LICENSE BLOCK #########################\r
+\r
+import sys\r
+from .mbcharsetprober import MultiByteCharSetProber\r
+from .codingstatemachine import CodingStateMachine\r
+from .chardistribution import SJISDistributionAnalysis\r
+from .jpcntx import SJISContextAnalysis\r
+from .mbcssm import SJISSMModel\r
+from . import constants\r
+\r
+\r
+class SJISProber(MultiByteCharSetProber):\r
+ def __init__(self):\r
+ MultiByteCharSetProber.__init__(self)\r
+ self._mCodingSM = CodingStateMachine(SJISSMModel)\r
+ self._mDistributionAnalyzer = SJISDistributionAnalysis()\r
+ self._mContextAnalyzer = SJISContextAnalysis()\r
+ self.reset()\r
+\r
+ def reset(self):\r
+ MultiByteCharSetProber.reset(self)\r
+ self._mContextAnalyzer.reset()\r
+\r
+ def get_charset_name(self):\r
+ return "SHIFT_JIS"\r
+\r
+ def feed(self, aBuf):\r
+ aLen = len(aBuf)\r
+ for i in range(0, aLen):\r
+ codingState = self._mCodingSM.next_state(aBuf[i])\r
+ if codingState == constants.eError:\r
+ if constants._debug:\r
+ sys.stderr.write(self.get_charset_name()\r
+ + ' prober hit error at byte ' + str(i)\r
+ + '\n')\r
+ self._mState = constants.eNotMe\r
+ break\r
+ elif codingState == constants.eItsMe:\r
+ self._mState = constants.eFoundIt\r
+ break\r
+ elif codingState == constants.eStart:\r
+ charLen = self._mCodingSM.get_current_charlen()\r
+ if i == 0:\r
+ self._mLastChar[1] = aBuf[0]\r
+ self._mContextAnalyzer.feed(self._mLastChar[2 - charLen:],\r
+ charLen)\r
+ self._mDistributionAnalyzer.feed(self._mLastChar, charLen)\r
+ else:\r
+ self._mContextAnalyzer.feed(aBuf[i + 1 - charLen:i + 3\r
+ - charLen], charLen)\r
+ self._mDistributionAnalyzer.feed(aBuf[i - 1:i + 1],\r
+ charLen)\r
+\r
+ self._mLastChar[0] = aBuf[aLen - 1]\r
+\r
+ if self.get_state() == constants.eDetecting:\r
+ if (self._mContextAnalyzer.got_enough_data() and\r
+ (self.get_confidence() > constants.SHORTCUT_THRESHOLD)):\r
+ self._mState = constants.eFoundIt\r
+\r
+ return self.get_state()\r
+\r
+ def get_confidence(self):\r
+ contxtCf = self._mContextAnalyzer.get_confidence()\r
+ distribCf = self._mDistributionAnalyzer.get_confidence()\r
+ return max(contxtCf, distribCf)\r
-######################## BEGIN LICENSE BLOCK ########################
-# The Original Code is Mozilla Universal charset detector code.
-#
-# The Initial Developer of the Original Code is
-# Netscape Communications Corporation.
-# Portions created by the Initial Developer are Copyright (C) 2001
-# the Initial Developer. All Rights Reserved.
-#
-# Contributor(s):
-# Mark Pilgrim - port to Python
-# Shy Shalom - original C code
-#
-# This library is free software; you can redistribute it and/or
-# modify it under the terms of the GNU Lesser General Public
-# License as published by the Free Software Foundation; either
-# version 2.1 of the License, or (at your option) any later version.
-#
-# This library is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-# Lesser General Public License for more details.
-#
-# You should have received a copy of the GNU Lesser General Public
-# License along with this library; if not, write to the Free Software
-# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
-# 02110-1301 USA
-######################### END LICENSE BLOCK #########################
-
-import sys
-from . import constants
-from .latin1prober import Latin1Prober # windows-1252
-from .mbcsgroupprober import MBCSGroupProber # multi-byte character sets
-from .sbcsgroupprober import SBCSGroupProber # single-byte character sets
-from .escprober import EscCharSetProber # ISO-2122, etc.
-import re
-import logging
-
-logger = logging.getLogger(__name__)
-
-MINIMUM_THRESHOLD = 0.20
-ePureAscii = 0
-eEscAscii = 1
-eHighbyte = 2
-
-
-class UniversalDetector:
- def __init__(self):
- self._highBitDetector = re.compile(b'[\x80-\xFF]')
- self._escDetector = re.compile(b'(\033|~{)')
- self._mEscCharSetProber = None
- self._mCharSetProbers = []
- self.reset()
-
- def reset(self):
- self.result = {'encoding': None, 'confidence': 0.0}
- self.done = False
- self._mStart = True
- self._mGotData = False
- self._mInputState = ePureAscii
- self._mLastChar = b''
- if self._mEscCharSetProber:
- self._mEscCharSetProber.reset()
- for prober in self._mCharSetProbers:
- prober.reset()
-
- def feed(self, aBuf):
- if self.done:
- return
-
- charmap = (
- # EF BB BF UTF-8 with BOM
- ('\xEF\xBB\xBF', {'encoding': "UTF-8", 'confidence': 1.0}),
- # FF FE 00 00 UTF-32, little-endian BOM
- ('\xFF\xFE\x00\x00', {'encoding': "UTF-32LE", 'confidence': 1.0}),
- # 00 00 FE FF UTF-32, big-endian BOM
- ('\x00\x00\xFE\xFF', {'encoding': "UTF-32BE", 'confidence': 1.0}),
- # FE FF 00 00 UCS-4, unusual octet order BOM (3412)
- ('\xFE\xFF\x00\x00', {'encoding': "X-ISO-10646-UCS-4-3412",
- 'confidence': 1.0}),
- # 00 00 FF FE UCS-4, unusual octet order BOM (2143)
- ('\x00\x00\xFF\xFE', {'encoding': "X-ISO-10646-UCS-4-2143",
- 'confidence': 1.0}),
- # FF FE UTF-16, little endian BOM
- ('\xFF\xFE', {'encoding': "UTF-16LE", 'confidence': 1.0}),
- # FE FF UTF-16, big endian BOM
- ('\xFE\xFF', {'encoding': "UTF-16BE", 'confidence': 1.0}),
- )
-
- aLen = len(aBuf)
- if not aLen:
- return
-
- if not self._mGotData:
- # If the data starts with BOM, we know it is UTF
- for chunk, result in charmap:
- if aBuf[:len(chunk)] == chunk:
- self.result = result
- break
-
- self._mGotData = True
- if self.result['encoding'] and (self.result['confidence'] > 0.0):
- self.done = True
- return
-
- if self._mInputState == ePureAscii:
- if self._highBitDetector.search(aBuf):
- self._mInputState = eHighbyte
- elif ((self._mInputState == ePureAscii) and
- self._escDetector.search(self._mLastChar + aBuf)):
- self._mInputState = eEscAscii
-
- self._mLastChar = aBuf[-1:]
-
- if self._mInputState == eEscAscii:
- if not self._mEscCharSetProber:
- self._mEscCharSetProber = EscCharSetProber()
- if self._mEscCharSetProber.feed(aBuf) == constants.eFoundIt:
- self.result = {
- 'encoding': self._mEscCharSetProber.get_charset_name(),
- 'confidence': self._mEscCharSetProber.get_confidence(),
- }
- self.done = True
- elif self._mInputState == eHighbyte:
- if not self._mCharSetProbers:
- self._mCharSetProbers = [MBCSGroupProber(), SBCSGroupProber(),
- Latin1Prober()]
- for prober in self._mCharSetProbers:
- try:
- if prober.feed(aBuf) == constants.eFoundIt:
- self.result = {'encoding': prober.get_charset_name(),
- 'confidence': prober.get_confidence()}
- self.done = True
- break
- except (UnicodeDecodeError, UnicodeEncodeError) as e:
- logger.exception(e)
-
- def close(self):
- if self.done:
- return
- if not self._mGotData:
- if constants._debug:
- sys.stderr.write('no data received!\n')
- return
- self.done = True
-
- if self._mInputState == ePureAscii:
- self.result = {'encoding': 'ascii', 'confidence': 1.0}
- return self.result
-
- if self._mInputState == eHighbyte:
- proberConfidence = None
- maxProberConfidence = 0.0
- maxProber = None
- for prober in self._mCharSetProbers:
- if not prober:
- continue
- proberConfidence = prober.get_confidence()
- if proberConfidence > maxProberConfidence:
- maxProberConfidence = proberConfidence
- maxProber = prober
- if maxProber and (maxProberConfidence > MINIMUM_THRESHOLD):
- self.result = {'encoding': maxProber.get_charset_name(),
- 'confidence': maxProber.get_confidence()}
- return self.result
-
- if constants._debug:
- sys.stderr.write('no probers hit minimum threshhold\n')
- for prober in self._mCharSetProbers[0].mProbers:
- if not prober:
- continue
- sys.stderr.write('%s confidence = %s\n' %
- (prober.get_charset_name(),
- prober.get_confidence()))
+######################## BEGIN LICENSE BLOCK ########################\r
+# The Original Code is Mozilla Universal charset detector code.\r
+#\r
+# The Initial Developer of the Original Code is\r
+# Netscape Communications Corporation.\r
+# Portions created by the Initial Developer are Copyright (C) 2001\r
+# the Initial Developer. All Rights Reserved.\r
+#\r
+# Contributor(s):\r
+# Mark Pilgrim - port to Python\r
+# Shy Shalom - original C code\r
+#\r
+# This library is free software; you can redistribute it and/or\r
+# modify it under the terms of the GNU Lesser General Public\r
+# License as published by the Free Software Foundation; either\r
+# version 2.1 of the License, or (at your option) any later version.\r
+#\r
+# This library is distributed in the hope that it will be useful,\r
+# but WITHOUT ANY WARRANTY; without even the implied warranty of\r
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU\r
+# Lesser General Public License for more details.\r
+#\r
+# You should have received a copy of the GNU Lesser General Public\r
+# License along with this library; if not, write to the Free Software\r
+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA\r
+# 02110-1301 USA\r
+######################### END LICENSE BLOCK #########################\r
+\r
+from . import constants\r
+import sys\r
+from .latin1prober import Latin1Prober # windows-1252\r
+from .mbcsgroupprober import MBCSGroupProber # multi-byte character sets\r
+from .sbcsgroupprober import SBCSGroupProber # single-byte character sets\r
+from .escprober import EscCharSetProber # ISO-2122, etc.\r
+import re\r
+\r
+MINIMUM_THRESHOLD = 0.20\r
+ePureAscii = 0\r
+eEscAscii = 1\r
+eHighbyte = 2\r
+\r
+\r
+class UniversalDetector:\r
+ def __init__(self):\r
+ self._highBitDetector = re.compile(b'[\x80-\xFF]')\r
+ self._escDetector = re.compile(b'(\033|~{)')\r
+ self._mEscCharSetProber = None\r
+ self._mCharSetProbers = []\r
+ self.reset()\r
+\r
+ def reset(self):\r
+ self.result = {'encoding': None, 'confidence': 0.0}\r
+ self.done = False\r
+ self._mStart = True\r
+ self._mGotData = False\r
+ self._mInputState = ePureAscii\r
+ self._mLastChar = b''\r
+ if self._mEscCharSetProber:\r
+ self._mEscCharSetProber.reset()\r
+ for prober in self._mCharSetProbers:\r
+ prober.reset()\r
+\r
+ def feed(self, aBuf):\r
+ if self.done:\r
+ return\r
+\r
+ aLen = len(aBuf)\r
+ if not aLen:\r
+ return\r
+\r
+ if not self._mGotData:\r
+ # If the data starts with BOM, we know it is UTF\r
+ if aBuf[:3] == '\xEF\xBB\xBF':\r
+ # EF BB BF UTF-8 with BOM\r
+ self.result = {'encoding': "UTF-8", 'confidence': 1.0}\r
+ elif aBuf[:4] == '\xFF\xFE\x00\x00':\r
+ # FF FE 00 00 UTF-32, little-endian BOM\r
+ self.result = {'encoding': "UTF-32LE", 'confidence': 1.0}\r
+ elif aBuf[:4] == '\x00\x00\xFE\xFF':\r
+ # 00 00 FE FF UTF-32, big-endian BOM\r
+ self.result = {'encoding': "UTF-32BE", 'confidence': 1.0}\r
+ elif aBuf[:4] == '\xFE\xFF\x00\x00':\r
+ # FE FF 00 00 UCS-4, unusual octet order BOM (3412)\r
+ self.result = {\r
+ 'encoding': "X-ISO-10646-UCS-4-3412",\r
+ 'confidence': 1.0\r
+ }\r
+ elif aBuf[:4] == '\x00\x00\xFF\xFE':\r
+ # 00 00 FF FE UCS-4, unusual octet order BOM (2143)\r
+ self.result = {\r
+ 'encoding': "X-ISO-10646-UCS-4-2143",\r
+ 'confidence': 1.0\r
+ }\r
+ elif aBuf[:2] == '\xFF\xFE':\r
+ # FF FE UTF-16, little endian BOM\r
+ self.result = {'encoding': "UTF-16LE", 'confidence': 1.0}\r
+ elif aBuf[:2] == '\xFE\xFF':\r
+ # FE FF UTF-16, big endian BOM\r
+ self.result = {'encoding': "UTF-16BE", 'confidence': 1.0}\r
+\r
+ self._mGotData = True\r
+ if self.result['encoding'] and (self.result['confidence'] > 0.0):\r
+ self.done = True\r
+ return\r
+\r
+ if self._mInputState == ePureAscii:\r
+ if self._highBitDetector.search(aBuf):\r
+ self._mInputState = eHighbyte\r
+ elif ((self._mInputState == ePureAscii) and\r
+ self._escDetector.search(self._mLastChar + aBuf)):\r
+ self._mInputState = eEscAscii\r
+\r
+ self._mLastChar = aBuf[-1:]\r
+\r
+ if self._mInputState == eEscAscii:\r
+ if not self._mEscCharSetProber:\r
+ self._mEscCharSetProber = EscCharSetProber()\r
+ if self._mEscCharSetProber.feed(aBuf) == constants.eFoundIt:\r
+ self.result = {\r
+ 'encoding': self._mEscCharSetProber.get_charset_name(),\r
+ 'confidence': self._mEscCharSetProber.get_confidence()\r
+ }\r
+ self.done = True\r
+ elif self._mInputState == eHighbyte:\r
+ if not self._mCharSetProbers:\r
+ self._mCharSetProbers = [MBCSGroupProber(), SBCSGroupProber(),\r
+ Latin1Prober()]\r
+ for prober in self._mCharSetProbers:\r
+ if prober.feed(aBuf) == constants.eFoundIt:\r
+ self.result = {'encoding': prober.get_charset_name(),\r
+ 'confidence': prober.get_confidence()}\r
+ self.done = True\r
+ break\r
+\r
+ def close(self):\r
+ if self.done:\r
+ return\r
+ if not self._mGotData:\r
+ if constants._debug:\r
+ sys.stderr.write('no data received!\n')\r
+ return\r
+ self.done = True\r
+\r
+ if self._mInputState == ePureAscii:\r
+ self.result = {'encoding': 'ascii', 'confidence': 1.0}\r
+ return self.result\r
+\r
+ if self._mInputState == eHighbyte:\r
+ proberConfidence = None\r
+ maxProberConfidence = 0.0\r
+ maxProber = None\r
+ for prober in self._mCharSetProbers:\r
+ if not prober:\r
+ continue\r
+ proberConfidence = prober.get_confidence()\r
+ if proberConfidence > maxProberConfidence:\r
+ maxProberConfidence = proberConfidence\r
+ maxProber = prober\r
+ if maxProber and (maxProberConfidence > MINIMUM_THRESHOLD):\r
+ self.result = {'encoding': maxProber.get_charset_name(),\r
+ 'confidence': maxProber.get_confidence()}\r
+ return self.result\r
+\r
+ if constants._debug:\r
+ sys.stderr.write('no probers hit minimum threshhold\n')\r
+ for prober in self._mCharSetProbers[0].mProbers:\r
+ if not prober:\r
+ continue\r
+ sys.stderr.write('%s confidence = %s\n' %\r
+ (prober.get_charset_name(),\r
+ prober.get_confidence()))\r
-######################## BEGIN LICENSE BLOCK ########################
-# The Original Code is mozilla.org code.
-#
-# The Initial Developer of the Original Code is
-# Netscape Communications Corporation.
-# Portions created by the Initial Developer are Copyright (C) 1998
-# the Initial Developer. All Rights Reserved.
-#
-# Contributor(s):
-# Mark Pilgrim - port to Python
-#
-# This library is free software; you can redistribute it and/or
-# modify it under the terms of the GNU Lesser General Public
-# License as published by the Free Software Foundation; either
-# version 2.1 of the License, or (at your option) any later version.
-#
-# This library is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-# Lesser General Public License for more details.
-#
-# You should have received a copy of the GNU Lesser General Public
-# License along with this library; if not, write to the Free Software
-# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
-# 02110-1301 USA
-######################### END LICENSE BLOCK #########################
-
-from . import constants
-from .charsetprober import CharSetProber
-from .codingstatemachine import CodingStateMachine
-from .mbcssm import UTF8SMModel
-
-ONE_CHAR_PROB = 0.5
-
-
-class UTF8Prober(CharSetProber):
- def __init__(self):
- CharSetProber.__init__(self)
- self._mCodingSM = CodingStateMachine(UTF8SMModel)
- self.reset()
-
- def reset(self):
- CharSetProber.reset(self)
- self._mCodingSM.reset()
- self._mNumOfMBChar = 0
-
- def get_charset_name(self):
- return "utf-8"
-
- def feed(self, aBuf):
- for c in aBuf:
- codingState = self._mCodingSM.next_state(c)
- if codingState == constants.eError:
- self._mState = constants.eNotMe
- break
- elif codingState == constants.eItsMe:
- self._mState = constants.eFoundIt
- break
- elif codingState == constants.eStart:
- if self._mCodingSM.get_current_charlen() >= 2:
- self._mNumOfMBChar += 1
-
- if self.get_state() == constants.eDetecting:
- if self.get_confidence() > constants.SHORTCUT_THRESHOLD:
- self._mState = constants.eFoundIt
-
- return self.get_state()
-
- def get_confidence(self):
- unlike = 0.99
- if self._mNumOfMBChar < 6:
- for i in range(0, self._mNumOfMBChar):
- unlike = unlike * ONE_CHAR_PROB
- return 1.0 - unlike
- else:
- return unlike
+######################## BEGIN LICENSE BLOCK ########################\r
+# The Original Code is mozilla.org code.\r
+#\r
+# The Initial Developer of the Original Code is\r
+# Netscape Communications Corporation.\r
+# Portions created by the Initial Developer are Copyright (C) 1998\r
+# the Initial Developer. All Rights Reserved.\r
+#\r
+# Contributor(s):\r
+# Mark Pilgrim - port to Python\r
+#\r
+# This library is free software; you can redistribute it and/or\r
+# modify it under the terms of the GNU Lesser General Public\r
+# License as published by the Free Software Foundation; either\r
+# version 2.1 of the License, or (at your option) any later version.\r
+#\r
+# This library is distributed in the hope that it will be useful,\r
+# but WITHOUT ANY WARRANTY; without even the implied warranty of\r
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU\r
+# Lesser General Public License for more details.\r
+#\r
+# You should have received a copy of the GNU Lesser General Public\r
+# License along with this library; if not, write to the Free Software\r
+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA\r
+# 02110-1301 USA\r
+######################### END LICENSE BLOCK #########################\r
+\r
+from . import constants\r
+from .charsetprober import CharSetProber\r
+from .codingstatemachine import CodingStateMachine\r
+from .mbcssm import UTF8SMModel\r
+\r
+ONE_CHAR_PROB = 0.5\r
+\r
+\r
+class UTF8Prober(CharSetProber):\r
+ def __init__(self):\r
+ CharSetProber.__init__(self)\r
+ self._mCodingSM = CodingStateMachine(UTF8SMModel)\r
+ self.reset()\r
+\r
+ def reset(self):\r
+ CharSetProber.reset(self)\r
+ self._mCodingSM.reset()\r
+ self._mNumOfMBChar = 0\r
+\r
+ def get_charset_name(self):\r
+ return "utf-8"\r
+\r
+ def feed(self, aBuf):\r
+ for c in aBuf:\r
+ codingState = self._mCodingSM.next_state(c)\r
+ if codingState == constants.eError:\r
+ self._mState = constants.eNotMe\r
+ break\r
+ elif codingState == constants.eItsMe:\r
+ self._mState = constants.eFoundIt\r
+ break\r
+ elif codingState == constants.eStart:\r
+ if self._mCodingSM.get_current_charlen() >= 2:\r
+ self._mNumOfMBChar += 1\r
+\r
+ if self.get_state() == constants.eDetecting:\r
+ if self.get_confidence() > constants.SHORTCUT_THRESHOLD:\r
+ self._mState = constants.eFoundIt\r
+\r
+ return self.get_state()\r
+\r
+ def get_confidence(self):\r
+ unlike = 0.99\r
+ if self._mNumOfMBChar < 6:\r
+ for i in range(0, self._mNumOfMBChar):\r
+ unlike = unlike * ONE_CHAR_PROB\r
+ return 1.0 - unlike\r
+ else:\r
+ return unlike\r
--- /dev/null
+from requests.packages import chardet
+
+with open('test', 'rb') as f:
+ print(chardet.detect(f.read()))
\ No newline at end of file
--- /dev/null
+import requests
+
+r = requests.get('http://readability.com')
+r.encoding = None
+
+print(r.text)
\ No newline at end of file
--- /dev/null
+{
+ "url": "http://httpbin.org/get",
+ "headers": {
+ "Content-Length": "",
+ "Connection": "keep-alive",
+ "Accept": "*/*",
+ "User-Agent": "curl/7.24.0 (x86_64-apple-darwin12.0) libcurl/7.24.0 OpenSSL/0.9.8r zlib/1.2.5",
+ "Host": "httpbin.org",
+ "Content-Type": ""
+ },
+ "args": {},
+ "origin": "184.72.18.219"
+}
\ No newline at end of file