Initial work towards replacing charade
authorIan Cordasco <graffatcolmingov@gmail.com>
Wed, 18 Dec 2013 14:42:12 +0000 (08:42 -0600)
committerIan Cordasco <graffatcolmingov@gmail.com>
Wed, 18 Dec 2013 14:42:12 +0000 (08:42 -0600)
40 files changed:
Makefile
requests/packages/charade/__main__.py [deleted file]
requests/packages/chardet/__init__.py [moved from requests/packages/charade/__init__.py with 55% similarity]
requests/packages/chardet/big5freq.py [moved from requests/packages/charade/big5freq.py with 100% similarity]
requests/packages/chardet/big5prober.py [moved from requests/packages/charade/big5prober.py with 97% similarity]
requests/packages/chardet/chardetect.py [new file with mode: 0755]
requests/packages/chardet/chardistribution.py [moved from requests/packages/charade/chardistribution.py with 97% similarity]
requests/packages/chardet/charsetgroupprober.py [moved from requests/packages/charade/charsetgroupprober.py with 97% similarity]
requests/packages/chardet/charsetprober.py [moved from requests/packages/charade/charsetprober.py with 100% similarity]
requests/packages/chardet/codingstatemachine.py [moved from requests/packages/charade/codingstatemachine.py with 97% similarity]
requests/packages/chardet/compat.py [moved from requests/packages/charade/compat.py with 100% similarity]
requests/packages/chardet/constants.py [moved from requests/packages/charade/constants.py with 97% similarity]
requests/packages/chardet/cp949prober.py [moved from requests/packages/charade/cp949prober.py with 97% similarity]
requests/packages/chardet/escprober.py [moved from requests/packages/charade/escprober.py with 97% similarity]
requests/packages/chardet/escsm.py [moved from requests/packages/charade/escsm.py with 97% similarity]
requests/packages/chardet/eucjpprober.py [moved from requests/packages/charade/eucjpprober.py with 97% similarity]
requests/packages/chardet/euckrfreq.py [moved from requests/packages/charade/euckrfreq.py with 100% similarity]
requests/packages/chardet/euckrprober.py [moved from requests/packages/charade/euckrprober.py with 97% similarity]
requests/packages/chardet/euctwfreq.py [moved from requests/packages/charade/euctwfreq.py with 100% similarity]
requests/packages/chardet/euctwprober.py [moved from requests/packages/charade/euctwprober.py with 97% similarity]
requests/packages/chardet/gb2312freq.py [moved from requests/packages/charade/gb2312freq.py with 100% similarity]
requests/packages/chardet/gb2312prober.py [moved from requests/packages/charade/gb2312prober.py with 97% similarity]
requests/packages/chardet/hebrewprober.py [moved from requests/packages/charade/hebrewprober.py with 97% similarity]
requests/packages/chardet/jisfreq.py [moved from requests/packages/charade/jisfreq.py with 100% similarity]
requests/packages/chardet/jpcntx.py [moved from requests/packages/charade/jpcntx.py with 98% similarity]
requests/packages/chardet/langbulgarianmodel.py [moved from requests/packages/charade/langbulgarianmodel.py with 98% similarity]
requests/packages/chardet/langcyrillicmodel.py [moved from requests/packages/charade/langcyrillicmodel.py with 98% similarity]
requests/packages/chardet/langgreekmodel.py [moved from requests/packages/charade/langgreekmodel.py with 98% similarity]
requests/packages/chardet/langhebrewmodel.py [moved from requests/packages/charade/langhebrewmodel.py with 98% similarity]
requests/packages/chardet/langhungarianmodel.py [moved from requests/packages/charade/langhungarianmodel.py with 98% similarity]
requests/packages/chardet/langthaimodel.py [moved from requests/packages/charade/langthaimodel.py with 98% similarity]
requests/packages/chardet/latin1prober.py [moved from requests/packages/charade/latin1prober.py with 96% similarity]
requests/packages/chardet/mbcharsetprober.py [moved from requests/packages/charade/mbcharsetprober.py with 97% similarity]
requests/packages/chardet/mbcsgroupprober.py [moved from requests/packages/charade/mbcsgroupprober.py with 97% similarity]
requests/packages/chardet/mbcssm.py [moved from requests/packages/charade/mbcssm.py with 97% similarity]
requests/packages/chardet/sbcharsetprober.py [moved from requests/packages/charade/sbcharsetprober.py with 97% similarity]
requests/packages/chardet/sbcsgroupprober.py [moved from requests/packages/charade/sbcsgroupprober.py with 97% similarity]
requests/packages/chardet/sjisprober.py [moved from requests/packages/charade/sjisprober.py with 97% similarity]
requests/packages/chardet/universaldetector.py [moved from requests/packages/charade/universaldetector.py with 90% similarity]
requests/packages/chardet/utf8prober.py [moved from requests/packages/charade/utf8prober.py with 97% similarity]

index 7204163..6396bb7 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -15,7 +15,7 @@ ci: init
 certs:
        curl http://ci.kennethreitz.org/job/ca-bundle/lastSuccessfulBuild/artifact/cacerts.pem -o requests/cacert.pem
 
-deps: urllib3 charade
+deps: urllib3 chardet
 
 urllib3:
        rm -fr requests/packages/urllib3
@@ -23,11 +23,11 @@ urllib3:
        mv urllib3/urllib3 requests/packages/
        rm -fr urllib3
 
-charade:
-       rm -fr requests/packages/charade
-       git clone https://github.com/sigmavirus24/charade.git
-       mv charade/charade requests/packages/
-       rm -fr charade
+chardet:
+       rm -fr requests/packages/chardet
+       git clone https://github.com/chardet/chardet.git
+       mv chardet/chardet requests/packages/
+       rm -fr chardet
 
 publish:
        python setup.py sdist upload
diff --git a/requests/packages/charade/__main__.py b/requests/packages/charade/__main__.py
deleted file mode 100644 (file)
index c0d587f..0000000
+++ /dev/null
@@ -1,7 +0,0 @@
-'''\r
-support ';python -m charade <file1> [file2] ...' package execution syntax (2.7+)\r
-'''\r
-\r
-from charade import charade_cli\r
-\r
-charade_cli()\r
similarity index 55%
rename from requests/packages/charade/__init__.py
rename to requests/packages/chardet/__init__.py
index 26362e9..e4f0799 100644 (file)
@@ -1,66 +1,32 @@
-######################## BEGIN LICENSE BLOCK ########################\r
-# This library is free software; you can redistribute it and/or\r
-# modify it under the terms of the GNU Lesser General Public\r
-# License as published by the Free Software Foundation; either\r
-# version 2.1 of the License, or (at your option) any later version.\r
-#\r
-# This library is distributed in the hope that it will be useful,\r
-# but WITHOUT ANY WARRANTY; without even the implied warranty of\r
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU\r
-# Lesser General Public License for more details.\r
-#\r
-# You should have received a copy of the GNU Lesser General Public\r
-# License along with this library; if not, write to the Free Software\r
-# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA\r
-# 02110-1301  USA\r
-######################### END LICENSE BLOCK #########################\r
-\r
-__version__ = "1.0.3"\r
-from sys import version_info\r
-\r
-\r
-def detect(aBuf):\r
-    if ((version_info < (3, 0) and isinstance(aBuf, unicode)) or\r
-            (version_info >= (3, 0) and not isinstance(aBuf, bytes))):\r
-        raise ValueError('Expected a bytes object, not a unicode object')\r
-\r
-    from . import universaldetector\r
-    u = universaldetector.UniversalDetector()\r
-    u.reset()\r
-    u.feed(aBuf)\r
-    u.close()\r
-    return u.result\r
-\r
-def _description_of(path):\r
-    """Return a string describing the probable encoding of a file."""\r
-    from charade.universaldetector import UniversalDetector\r
-\r
-    u = UniversalDetector()\r
-    for line in open(path, 'rb'):\r
-        u.feed(line)\r
-    u.close()\r
-    result = u.result\r
-    if result['encoding']:\r
-        return '%s: %s with confidence %s' % (path,\r
-                                              result['encoding'],\r
-                                              result['confidence'])\r
-    else:\r
-        return '%s: no result' % path\r
-\r
-\r
-def charade_cli():\r
-    """\r
-    Script which takes one or more file paths and reports on their detected\r
-    encodings\r
-\r
-    Example::\r
-\r
-        % chardetect.py somefile someotherfile\r
-        somefile: windows-1252 with confidence 0.5\r
-        someotherfile: ascii with confidence 1.0\r
-\r
-    """\r
-    from sys import argv\r
-    for path in argv[1:]:\r
-        print(_description_of(path))\r
-        
\ No newline at end of file
+######################## BEGIN LICENSE BLOCK ########################
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
+# 02110-1301  USA
+######################### END LICENSE BLOCK #########################
+
+__version__ = "2.2.1"
+from sys import version_info
+
+
+def detect(aBuf):
+    if ((version_info < (3, 0) and isinstance(aBuf, unicode)) or
+            (version_info >= (3, 0) and not isinstance(aBuf, bytes))):
+        raise ValueError('Expected a bytes object, not a unicode object')
+
+    from . import universaldetector
+    u = universaldetector.UniversalDetector()
+    u.reset()
+    u.feed(aBuf)
+    u.close()
+    return u.result
similarity index 97%
rename from requests/packages/charade/big5prober.py
rename to requests/packages/chardet/big5prober.py
index 7382f7c..becce81 100644 (file)
@@ -1,42 +1,42 @@
-######################## BEGIN LICENSE BLOCK ########################\r
-# The Original Code is Mozilla Communicator client code.\r
-#\r
-# The Initial Developer of the Original Code is\r
-# Netscape Communications Corporation.\r
-# Portions created by the Initial Developer are Copyright (C) 1998\r
-# the Initial Developer. All Rights Reserved.\r
-#\r
-# Contributor(s):\r
-#   Mark Pilgrim - port to Python\r
-#\r
-# This library is free software; you can redistribute it and/or\r
-# modify it under the terms of the GNU Lesser General Public\r
-# License as published by the Free Software Foundation; either\r
-# version 2.1 of the License, or (at your option) any later version.\r
-#\r
-# This library is distributed in the hope that it will be useful,\r
-# but WITHOUT ANY WARRANTY; without even the implied warranty of\r
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU\r
-# Lesser General Public License for more details.\r
-#\r
-# You should have received a copy of the GNU Lesser General Public\r
-# License along with this library; if not, write to the Free Software\r
-# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA\r
-# 02110-1301  USA\r
-######################### END LICENSE BLOCK #########################\r
-\r
-from .mbcharsetprober import MultiByteCharSetProber\r
-from .codingstatemachine import CodingStateMachine\r
-from .chardistribution import Big5DistributionAnalysis\r
-from .mbcssm import Big5SMModel\r
-\r
-\r
-class Big5Prober(MultiByteCharSetProber):\r
-    def __init__(self):\r
-        MultiByteCharSetProber.__init__(self)\r
-        self._mCodingSM = CodingStateMachine(Big5SMModel)\r
-        self._mDistributionAnalyzer = Big5DistributionAnalysis()\r
-        self.reset()\r
-\r
-    def get_charset_name(self):\r
-        return "Big5"\r
+######################## BEGIN LICENSE BLOCK ########################
+# The Original Code is Mozilla Communicator client code.
+#
+# The Initial Developer of the Original Code is
+# Netscape Communications Corporation.
+# Portions created by the Initial Developer are Copyright (C) 1998
+# the Initial Developer. All Rights Reserved.
+#
+# Contributor(s):
+#   Mark Pilgrim - port to Python
+#
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
+# 02110-1301  USA
+######################### END LICENSE BLOCK #########################
+
+from .mbcharsetprober import MultiByteCharSetProber
+from .codingstatemachine import CodingStateMachine
+from .chardistribution import Big5DistributionAnalysis
+from .mbcssm import Big5SMModel
+
+
+class Big5Prober(MultiByteCharSetProber):
+    def __init__(self):
+        MultiByteCharSetProber.__init__(self)
+        self._mCodingSM = CodingStateMachine(Big5SMModel)
+        self._mDistributionAnalyzer = Big5DistributionAnalysis()
+        self.reset()
+
+    def get_charset_name(self):
+        return "Big5"
diff --git a/requests/packages/chardet/chardetect.py b/requests/packages/chardet/chardetect.py
new file mode 100755 (executable)
index 0000000..ecd0163
--- /dev/null
@@ -0,0 +1,46 @@
+#!/usr/bin/env python
+"""
+Script which takes one or more file paths and reports on their detected
+encodings
+
+Example::
+
+    % chardetect somefile someotherfile
+    somefile: windows-1252 with confidence 0.5
+    someotherfile: ascii with confidence 1.0
+
+If no paths are provided, it takes its input from stdin.
+
+"""
+from io import open
+from sys import argv, stdin
+
+from chardet.universaldetector import UniversalDetector
+
+
+def description_of(file, name='stdin'):
+    """Return a string describing the probable encoding of a file."""
+    u = UniversalDetector()
+    for line in file:
+        u.feed(line)
+    u.close()
+    result = u.result
+    if result['encoding']:
+        return '%s: %s with confidence %s' % (name,
+                                              result['encoding'],
+                                              result['confidence'])
+    else:
+        return '%s: no result' % name
+
+
+def main():
+    if len(argv) <= 1:
+        print(description_of(stdin))
+    else:
+        for path in argv[1:]:
+            with open(path, 'rb') as f:
+                print(description_of(f, path))
+
+
+if __name__ == '__main__':
+    main()
-######################## BEGIN LICENSE BLOCK ########################\r
-# The Original Code is Mozilla Communicator client code.\r
-#\r
-# The Initial Developer of the Original Code is\r
-# Netscape Communications Corporation.\r
-# Portions created by the Initial Developer are Copyright (C) 1998\r
-# the Initial Developer. All Rights Reserved.\r
-#\r
-# Contributor(s):\r
-#   Mark Pilgrim - port to Python\r
-#\r
-# This library is free software; you can redistribute it and/or\r
-# modify it under the terms of the GNU Lesser General Public\r
-# License as published by the Free Software Foundation; either\r
-# version 2.1 of the License, or (at your option) any later version.\r
-#\r
-# This library is distributed in the hope that it will be useful,\r
-# but WITHOUT ANY WARRANTY; without even the implied warranty of\r
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU\r
-# Lesser General Public License for more details.\r
-#\r
-# You should have received a copy of the GNU Lesser General Public\r
-# License along with this library; if not, write to the Free Software\r
-# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA\r
-# 02110-1301  USA\r
-######################### END LICENSE BLOCK #########################\r
-\r
-from .euctwfreq import (EUCTWCharToFreqOrder, EUCTW_TABLE_SIZE,\r
-                        EUCTW_TYPICAL_DISTRIBUTION_RATIO)\r
-from .euckrfreq import (EUCKRCharToFreqOrder, EUCKR_TABLE_SIZE,\r
-                        EUCKR_TYPICAL_DISTRIBUTION_RATIO)\r
-from .gb2312freq import (GB2312CharToFreqOrder, GB2312_TABLE_SIZE,\r
-                         GB2312_TYPICAL_DISTRIBUTION_RATIO)\r
-from .big5freq import (Big5CharToFreqOrder, BIG5_TABLE_SIZE,\r
-                       BIG5_TYPICAL_DISTRIBUTION_RATIO)\r
-from .jisfreq import (JISCharToFreqOrder, JIS_TABLE_SIZE,\r
-                      JIS_TYPICAL_DISTRIBUTION_RATIO)\r
-from .compat import wrap_ord\r
-\r
-ENOUGH_DATA_THRESHOLD = 1024\r
-SURE_YES = 0.99\r
-SURE_NO = 0.01\r
-MINIMUM_DATA_THRESHOLD = 3\r
-\r
-\r
-class CharDistributionAnalysis:\r
-    def __init__(self):\r
-        # Mapping table to get frequency order from char order (get from\r
-        # GetOrder())\r
-        self._mCharToFreqOrder = None\r
-        self._mTableSize = None  # Size of above table\r
-        # This is a constant value which varies from language to language,\r
-        # used in calculating confidence.  See\r
-        # http://www.mozilla.org/projects/intl/UniversalCharsetDetection.html\r
-        # for further detail.\r
-        self._mTypicalDistributionRatio = None\r
-        self.reset()\r
-\r
-    def reset(self):\r
-        """reset analyser, clear any state"""\r
-        # If this flag is set to True, detection is done and conclusion has\r
-        # been made\r
-        self._mDone = False\r
-        self._mTotalChars = 0  # Total characters encountered\r
-        # The number of characters whose frequency order is less than 512\r
-        self._mFreqChars = 0\r
-\r
-    def feed(self, aBuf, aCharLen):\r
-        """feed a character with known length"""\r
-        if aCharLen == 2:\r
-            # we only care about 2-bytes character in our distribution analysis\r
-            order = self.get_order(aBuf)\r
-        else:\r
-            order = -1\r
-        if order >= 0:\r
-            self._mTotalChars += 1\r
-            # order is valid\r
-            if order < self._mTableSize:\r
-                if 512 > self._mCharToFreqOrder[order]:\r
-                    self._mFreqChars += 1\r
-\r
-    def get_confidence(self):\r
-        """return confidence based on existing data"""\r
-        # if we didn't receive any character in our consideration range,\r
-        # return negative answer\r
-        if self._mTotalChars <= 0 or self._mFreqChars <= MINIMUM_DATA_THRESHOLD:\r
-            return SURE_NO\r
-\r
-        if self._mTotalChars != self._mFreqChars:\r
-            r = (self._mFreqChars / ((self._mTotalChars - self._mFreqChars)\r
-                 * self._mTypicalDistributionRatio))\r
-            if r < SURE_YES:\r
-                return r\r
-\r
-        # normalize confidence (we don't want to be 100% sure)\r
-        return SURE_YES\r
-\r
-    def got_enough_data(self):\r
-        # It is not necessary to receive all data to draw conclusion.\r
-        # For charset detection, certain amount of data is enough\r
-        return self._mTotalChars > ENOUGH_DATA_THRESHOLD\r
-\r
-    def get_order(self, aBuf):\r
-        # We do not handle characters based on the original encoding string,\r
-        # but convert this encoding string to a number, here called order.\r
-        # This allows multiple encodings of a language to share one frequency\r
-        # table.\r
-        return -1\r
-\r
-\r
-class EUCTWDistributionAnalysis(CharDistributionAnalysis):\r
-    def __init__(self):\r
-        CharDistributionAnalysis.__init__(self)\r
-        self._mCharToFreqOrder = EUCTWCharToFreqOrder\r
-        self._mTableSize = EUCTW_TABLE_SIZE\r
-        self._mTypicalDistributionRatio = EUCTW_TYPICAL_DISTRIBUTION_RATIO\r
-\r
-    def get_order(self, aBuf):\r
-        # for euc-TW encoding, we are interested\r
-        #   first  byte range: 0xc4 -- 0xfe\r
-        #   second byte range: 0xa1 -- 0xfe\r
-        # no validation needed here. State machine has done that\r
-        first_char = wrap_ord(aBuf[0])\r
-        if first_char >= 0xC4:\r
-            return 94 * (first_char - 0xC4) + wrap_ord(aBuf[1]) - 0xA1\r
-        else:\r
-            return -1\r
-\r
-\r
-class EUCKRDistributionAnalysis(CharDistributionAnalysis):\r
-    def __init__(self):\r
-        CharDistributionAnalysis.__init__(self)\r
-        self._mCharToFreqOrder = EUCKRCharToFreqOrder\r
-        self._mTableSize = EUCKR_TABLE_SIZE\r
-        self._mTypicalDistributionRatio = EUCKR_TYPICAL_DISTRIBUTION_RATIO\r
-\r
-    def get_order(self, aBuf):\r
-        # for euc-KR encoding, we are interested\r
-        #   first  byte range: 0xb0 -- 0xfe\r
-        #   second byte range: 0xa1 -- 0xfe\r
-        # no validation needed here. State machine has done that\r
-        first_char = wrap_ord(aBuf[0])\r
-        if first_char >= 0xB0:\r
-            return 94 * (first_char - 0xB0) + wrap_ord(aBuf[1]) - 0xA1\r
-        else:\r
-            return -1\r
-\r
-\r
-class GB2312DistributionAnalysis(CharDistributionAnalysis):\r
-    def __init__(self):\r
-        CharDistributionAnalysis.__init__(self)\r
-        self._mCharToFreqOrder = GB2312CharToFreqOrder\r
-        self._mTableSize = GB2312_TABLE_SIZE\r
-        self._mTypicalDistributionRatio = GB2312_TYPICAL_DISTRIBUTION_RATIO\r
-\r
-    def get_order(self, aBuf):\r
-        # for GB2312 encoding, we are interested\r
-        #  first  byte range: 0xb0 -- 0xfe\r
-        #  second byte range: 0xa1 -- 0xfe\r
-        # no validation needed here. State machine has done that\r
-        first_char, second_char = wrap_ord(aBuf[0]), wrap_ord(aBuf[1])\r
-        if (first_char >= 0xB0) and (second_char >= 0xA1):\r
-            return 94 * (first_char - 0xB0) + second_char - 0xA1\r
-        else:\r
-            return -1\r
-\r
-\r
-class Big5DistributionAnalysis(CharDistributionAnalysis):\r
-    def __init__(self):\r
-        CharDistributionAnalysis.__init__(self)\r
-        self._mCharToFreqOrder = Big5CharToFreqOrder\r
-        self._mTableSize = BIG5_TABLE_SIZE\r
-        self._mTypicalDistributionRatio = BIG5_TYPICAL_DISTRIBUTION_RATIO\r
-\r
-    def get_order(self, aBuf):\r
-        # for big5 encoding, we are interested\r
-        #   first  byte range: 0xa4 -- 0xfe\r
-        #   second byte range: 0x40 -- 0x7e , 0xa1 -- 0xfe\r
-        # no validation needed here. State machine has done that\r
-        first_char, second_char = wrap_ord(aBuf[0]), wrap_ord(aBuf[1])\r
-        if first_char >= 0xA4:\r
-            if second_char >= 0xA1:\r
-                return 157 * (first_char - 0xA4) + second_char - 0xA1 + 63\r
-            else:\r
-                return 157 * (first_char - 0xA4) + second_char - 0x40\r
-        else:\r
-            return -1\r
-\r
-\r
-class SJISDistributionAnalysis(CharDistributionAnalysis):\r
-    def __init__(self):\r
-        CharDistributionAnalysis.__init__(self)\r
-        self._mCharToFreqOrder = JISCharToFreqOrder\r
-        self._mTableSize = JIS_TABLE_SIZE\r
-        self._mTypicalDistributionRatio = JIS_TYPICAL_DISTRIBUTION_RATIO\r
-\r
-    def get_order(self, aBuf):\r
-        # for sjis encoding, we are interested\r
-        #   first  byte range: 0x81 -- 0x9f , 0xe0 -- 0xfe\r
-        #   second byte range: 0x40 -- 0x7e,  0x81 -- oxfe\r
-        # no validation needed here. State machine has done that\r
-        first_char, second_char = wrap_ord(aBuf[0]), wrap_ord(aBuf[1])\r
-        if (first_char >= 0x81) and (first_char <= 0x9F):\r
-            order = 188 * (first_char - 0x81)\r
-        elif (first_char >= 0xE0) and (first_char <= 0xEF):\r
-            order = 188 * (first_char - 0xE0 + 31)\r
-        else:\r
-            return -1\r
-        order = order + second_char - 0x40\r
-        if second_char > 0x7F:\r
-            order = -1\r
-        return order\r
-\r
-\r
-class EUCJPDistributionAnalysis(CharDistributionAnalysis):\r
-    def __init__(self):\r
-        CharDistributionAnalysis.__init__(self)\r
-        self._mCharToFreqOrder = JISCharToFreqOrder\r
-        self._mTableSize = JIS_TABLE_SIZE\r
-        self._mTypicalDistributionRatio = JIS_TYPICAL_DISTRIBUTION_RATIO\r
-\r
-    def get_order(self, aBuf):\r
-        # for euc-JP encoding, we are interested\r
-        #   first  byte range: 0xa0 -- 0xfe\r
-        #   second byte range: 0xa1 -- 0xfe\r
-        # no validation needed here. State machine has done that\r
-        char = wrap_ord(aBuf[0])\r
-        if char >= 0xA0:\r
-            return 94 * (char - 0xA1) + wrap_ord(aBuf[1]) - 0xa1\r
-        else:\r
-            return -1\r
+######################## BEGIN LICENSE BLOCK ########################
+# The Original Code is Mozilla Communicator client code.
+#
+# The Initial Developer of the Original Code is
+# Netscape Communications Corporation.
+# Portions created by the Initial Developer are Copyright (C) 1998
+# the Initial Developer. All Rights Reserved.
+#
+# Contributor(s):
+#   Mark Pilgrim - port to Python
+#
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
+# 02110-1301  USA
+######################### END LICENSE BLOCK #########################
+
+from .euctwfreq import (EUCTWCharToFreqOrder, EUCTW_TABLE_SIZE,
+                        EUCTW_TYPICAL_DISTRIBUTION_RATIO)
+from .euckrfreq import (EUCKRCharToFreqOrder, EUCKR_TABLE_SIZE,
+                        EUCKR_TYPICAL_DISTRIBUTION_RATIO)
+from .gb2312freq import (GB2312CharToFreqOrder, GB2312_TABLE_SIZE,
+                         GB2312_TYPICAL_DISTRIBUTION_RATIO)
+from .big5freq import (Big5CharToFreqOrder, BIG5_TABLE_SIZE,
+                       BIG5_TYPICAL_DISTRIBUTION_RATIO)
+from .jisfreq import (JISCharToFreqOrder, JIS_TABLE_SIZE,
+                      JIS_TYPICAL_DISTRIBUTION_RATIO)
+from .compat import wrap_ord
+
+ENOUGH_DATA_THRESHOLD = 1024
+SURE_YES = 0.99
+SURE_NO = 0.01
+MINIMUM_DATA_THRESHOLD = 3
+
+
+class CharDistributionAnalysis:
+    def __init__(self):
+        # Mapping table to get frequency order from char order (get from
+        # GetOrder())
+        self._mCharToFreqOrder = None
+        self._mTableSize = None  # Size of above table
+        # This is a constant value which varies from language to language,
+        # used in calculating confidence.  See
+        # http://www.mozilla.org/projects/intl/UniversalCharsetDetection.html
+        # for further detail.
+        self._mTypicalDistributionRatio = None
+        self.reset()
+
+    def reset(self):
+        """reset analyser, clear any state"""
+        # If this flag is set to True, detection is done and conclusion has
+        # been made
+        self._mDone = False
+        self._mTotalChars = 0  # Total characters encountered
+        # The number of characters whose frequency order is less than 512
+        self._mFreqChars = 0
+
+    def feed(self, aBuf, aCharLen):
+        """feed a character with known length"""
+        if aCharLen == 2:
+            # we only care about 2-bytes character in our distribution analysis
+            order = self.get_order(aBuf)
+        else:
+            order = -1
+        if order >= 0:
+            self._mTotalChars += 1
+            # order is valid
+            if order < self._mTableSize:
+                if 512 > self._mCharToFreqOrder[order]:
+                    self._mFreqChars += 1
+
+    def get_confidence(self):
+        """return confidence based on existing data"""
+        # if we didn't receive any character in our consideration range,
+        # return negative answer
+        if self._mTotalChars <= 0 or self._mFreqChars <= MINIMUM_DATA_THRESHOLD:
+            return SURE_NO
+
+        if self._mTotalChars != self._mFreqChars:
+            r = (self._mFreqChars / ((self._mTotalChars - self._mFreqChars)
+                 * self._mTypicalDistributionRatio))
+            if r < SURE_YES:
+                return r
+
+        # normalize confidence (we don't want to be 100% sure)
+        return SURE_YES
+
+    def got_enough_data(self):
+        # It is not necessary to receive all data to draw conclusion.
+        # For charset detection, certain amount of data is enough
+        return self._mTotalChars > ENOUGH_DATA_THRESHOLD
+
+    def get_order(self, aBuf):
+        # We do not handle characters based on the original encoding string,
+        # but convert this encoding string to a number, here called order.
+        # This allows multiple encodings of a language to share one frequency
+        # table.
+        return -1
+
+
+class EUCTWDistributionAnalysis(CharDistributionAnalysis):
+    def __init__(self):
+        CharDistributionAnalysis.__init__(self)
+        self._mCharToFreqOrder = EUCTWCharToFreqOrder
+        self._mTableSize = EUCTW_TABLE_SIZE
+        self._mTypicalDistributionRatio = EUCTW_TYPICAL_DISTRIBUTION_RATIO
+
+    def get_order(self, aBuf):
+        # for euc-TW encoding, we are interested
+        #   first  byte range: 0xc4 -- 0xfe
+        #   second byte range: 0xa1 -- 0xfe
+        # no validation needed here. State machine has done that
+        first_char = wrap_ord(aBuf[0])
+        if first_char >= 0xC4:
+            return 94 * (first_char - 0xC4) + wrap_ord(aBuf[1]) - 0xA1
+        else:
+            return -1
+
+
+class EUCKRDistributionAnalysis(CharDistributionAnalysis):
+    def __init__(self):
+        CharDistributionAnalysis.__init__(self)
+        self._mCharToFreqOrder = EUCKRCharToFreqOrder
+        self._mTableSize = EUCKR_TABLE_SIZE
+        self._mTypicalDistributionRatio = EUCKR_TYPICAL_DISTRIBUTION_RATIO
+
+    def get_order(self, aBuf):
+        # for euc-KR encoding, we are interested
+        #   first  byte range: 0xb0 -- 0xfe
+        #   second byte range: 0xa1 -- 0xfe
+        # no validation needed here. State machine has done that
+        first_char = wrap_ord(aBuf[0])
+        if first_char >= 0xB0:
+            return 94 * (first_char - 0xB0) + wrap_ord(aBuf[1]) - 0xA1
+        else:
+            return -1
+
+
+class GB2312DistributionAnalysis(CharDistributionAnalysis):
+    def __init__(self):
+        CharDistributionAnalysis.__init__(self)
+        self._mCharToFreqOrder = GB2312CharToFreqOrder
+        self._mTableSize = GB2312_TABLE_SIZE
+        self._mTypicalDistributionRatio = GB2312_TYPICAL_DISTRIBUTION_RATIO
+
+    def get_order(self, aBuf):
+        # for GB2312 encoding, we are interested
+        #  first  byte range: 0xb0 -- 0xfe
+        #  second byte range: 0xa1 -- 0xfe
+        # no validation needed here. State machine has done that
+        first_char, second_char = wrap_ord(aBuf[0]), wrap_ord(aBuf[1])
+        if (first_char >= 0xB0) and (second_char >= 0xA1):
+            return 94 * (first_char - 0xB0) + second_char - 0xA1
+        else:
+            return -1
+
+
+class Big5DistributionAnalysis(CharDistributionAnalysis):
+    def __init__(self):
+        CharDistributionAnalysis.__init__(self)
+        self._mCharToFreqOrder = Big5CharToFreqOrder
+        self._mTableSize = BIG5_TABLE_SIZE
+        self._mTypicalDistributionRatio = BIG5_TYPICAL_DISTRIBUTION_RATIO
+
+    def get_order(self, aBuf):
+        # for big5 encoding, we are interested
+        #   first  byte range: 0xa4 -- 0xfe
+        #   second byte range: 0x40 -- 0x7e , 0xa1 -- 0xfe
+        # no validation needed here. State machine has done that
+        first_char, second_char = wrap_ord(aBuf[0]), wrap_ord(aBuf[1])
+        if first_char >= 0xA4:
+            if second_char >= 0xA1:
+                return 157 * (first_char - 0xA4) + second_char - 0xA1 + 63
+            else:
+                return 157 * (first_char - 0xA4) + second_char - 0x40
+        else:
+            return -1
+
+
+class SJISDistributionAnalysis(CharDistributionAnalysis):
+    def __init__(self):
+        CharDistributionAnalysis.__init__(self)
+        self._mCharToFreqOrder = JISCharToFreqOrder
+        self._mTableSize = JIS_TABLE_SIZE
+        self._mTypicalDistributionRatio = JIS_TYPICAL_DISTRIBUTION_RATIO
+
+    def get_order(self, aBuf):
+        # for sjis encoding, we are interested
+        #   first  byte range: 0x81 -- 0x9f , 0xe0 -- 0xfe
+        #   second byte range: 0x40 -- 0x7e,  0x81 -- oxfe
+        # no validation needed here. State machine has done that
+        first_char, second_char = wrap_ord(aBuf[0]), wrap_ord(aBuf[1])
+        if (first_char >= 0x81) and (first_char <= 0x9F):
+            order = 188 * (first_char - 0x81)
+        elif (first_char >= 0xE0) and (first_char <= 0xEF):
+            order = 188 * (first_char - 0xE0 + 31)
+        else:
+            return -1
+        order = order + second_char - 0x40
+        if second_char > 0x7F:
+            order = -1
+        return order
+
+
+class EUCJPDistributionAnalysis(CharDistributionAnalysis):
+    def __init__(self):
+        CharDistributionAnalysis.__init__(self)
+        self._mCharToFreqOrder = JISCharToFreqOrder
+        self._mTableSize = JIS_TABLE_SIZE
+        self._mTypicalDistributionRatio = JIS_TYPICAL_DISTRIBUTION_RATIO
+
+    def get_order(self, aBuf):
+        # for euc-JP encoding, we are interested
+        #   first  byte range: 0xa0 -- 0xfe
+        #   second byte range: 0xa1 -- 0xfe
+        # no validation needed here. State machine has done that
+        char = wrap_ord(aBuf[0])
+        if char >= 0xA0:
+            return 94 * (char - 0xA1) + wrap_ord(aBuf[1]) - 0xa1
+        else:
+            return -1
-######################## BEGIN LICENSE BLOCK ########################\r
-# The Original Code is Mozilla Communicator client code.\r
-# \r
-# The Initial Developer of the Original Code is\r
-# Netscape Communications Corporation.\r
-# Portions created by the Initial Developer are Copyright (C) 1998\r
-# the Initial Developer. All Rights Reserved.\r
-# \r
-# Contributor(s):\r
-#   Mark Pilgrim - port to Python\r
-#\r
-# This library is free software; you can redistribute it and/or\r
-# modify it under the terms of the GNU Lesser General Public\r
-# License as published by the Free Software Foundation; either\r
-# version 2.1 of the License, or (at your option) any later version.\r
-# \r
-# This library is distributed in the hope that it will be useful,\r
-# but WITHOUT ANY WARRANTY; without even the implied warranty of\r
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU\r
-# Lesser General Public License for more details.\r
-# \r
-# You should have received a copy of the GNU Lesser General Public\r
-# License along with this library; if not, write to the Free Software\r
-# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA\r
-# 02110-1301  USA\r
-######################### END LICENSE BLOCK #########################\r
-\r
-from . import constants\r
-import sys\r
-from .charsetprober import CharSetProber\r
-\r
-\r
-class CharSetGroupProber(CharSetProber):\r
-    def __init__(self):\r
-        CharSetProber.__init__(self)\r
-        self._mActiveNum = 0\r
-        self._mProbers = []\r
-        self._mBestGuessProber = None\r
-\r
-    def reset(self):\r
-        CharSetProber.reset(self)\r
-        self._mActiveNum = 0\r
-        for prober in self._mProbers:\r
-            if prober:\r
-                prober.reset()\r
-                prober.active = True\r
-                self._mActiveNum += 1\r
-        self._mBestGuessProber = None\r
-\r
-    def get_charset_name(self):\r
-        if not self._mBestGuessProber:\r
-            self.get_confidence()\r
-            if not self._mBestGuessProber:\r
-                return None\r
-#                self._mBestGuessProber = self._mProbers[0]\r
-        return self._mBestGuessProber.get_charset_name()\r
-\r
-    def feed(self, aBuf):\r
-        for prober in self._mProbers:\r
-            if not prober:\r
-                continue\r
-            if not prober.active:\r
-                continue\r
-            st = prober.feed(aBuf)\r
-            if not st:\r
-                continue\r
-            if st == constants.eFoundIt:\r
-                self._mBestGuessProber = prober\r
-                return self.get_state()\r
-            elif st == constants.eNotMe:\r
-                prober.active = False\r
-                self._mActiveNum -= 1\r
-                if self._mActiveNum <= 0:\r
-                    self._mState = constants.eNotMe\r
-                    return self.get_state()\r
-        return self.get_state()\r
-\r
-    def get_confidence(self):\r
-        st = self.get_state()\r
-        if st == constants.eFoundIt:\r
-            return 0.99\r
-        elif st == constants.eNotMe:\r
-            return 0.01\r
-        bestConf = 0.0\r
-        self._mBestGuessProber = None\r
-        for prober in self._mProbers:\r
-            if not prober:\r
-                continue\r
-            if not prober.active:\r
-                if constants._debug:\r
-                    sys.stderr.write(prober.get_charset_name()\r
-                                     + ' not active\n')\r
-                continue\r
-            cf = prober.get_confidence()\r
-            if constants._debug:\r
-                sys.stderr.write('%s confidence = %s\n' %\r
-                                 (prober.get_charset_name(), cf))\r
-            if bestConf < cf:\r
-                bestConf = cf\r
-                self._mBestGuessProber = prober\r
-        if not self._mBestGuessProber:\r
-            return 0.0\r
-        return bestConf\r
-#        else:\r
-#            self._mBestGuessProber = self._mProbers[0]\r
-#            return self._mBestGuessProber.get_confidence()\r
+######################## BEGIN LICENSE BLOCK ########################
+# The Original Code is Mozilla Communicator client code.
+# 
+# The Initial Developer of the Original Code is
+# Netscape Communications Corporation.
+# Portions created by the Initial Developer are Copyright (C) 1998
+# the Initial Developer. All Rights Reserved.
+# 
+# Contributor(s):
+#   Mark Pilgrim - port to Python
+#
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+# 
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# Lesser General Public License for more details.
+# 
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
+# 02110-1301  USA
+######################### END LICENSE BLOCK #########################
+
+from . import constants
+import sys
+from .charsetprober import CharSetProber
+
+
+class CharSetGroupProber(CharSetProber):
+    def __init__(self):
+        CharSetProber.__init__(self)
+        self._mActiveNum = 0
+        self._mProbers = []
+        self._mBestGuessProber = None
+
+    def reset(self):
+        CharSetProber.reset(self)
+        self._mActiveNum = 0
+        for prober in self._mProbers:
+            if prober:
+                prober.reset()
+                prober.active = True
+                self._mActiveNum += 1
+        self._mBestGuessProber = None
+
+    def get_charset_name(self):
+        if not self._mBestGuessProber:
+            self.get_confidence()
+            if not self._mBestGuessProber:
+                return None
+#                self._mBestGuessProber = self._mProbers[0]
+        return self._mBestGuessProber.get_charset_name()
+
+    def feed(self, aBuf):
+        for prober in self._mProbers:
+            if not prober:
+                continue
+            if not prober.active:
+                continue
+            st = prober.feed(aBuf)
+            if not st:
+                continue
+            if st == constants.eFoundIt:
+                self._mBestGuessProber = prober
+                return self.get_state()
+            elif st == constants.eNotMe:
+                prober.active = False
+                self._mActiveNum -= 1
+                if self._mActiveNum <= 0:
+                    self._mState = constants.eNotMe
+                    return self.get_state()
+        return self.get_state()
+
+    def get_confidence(self):
+        st = self.get_state()
+        if st == constants.eFoundIt:
+            return 0.99
+        elif st == constants.eNotMe:
+            return 0.01
+        bestConf = 0.0
+        self._mBestGuessProber = None
+        for prober in self._mProbers:
+            if not prober:
+                continue
+            if not prober.active:
+                if constants._debug:
+                    sys.stderr.write(prober.get_charset_name()
+                                     + ' not active\n')
+                continue
+            cf = prober.get_confidence()
+            if constants._debug:
+                sys.stderr.write('%s confidence = %s\n' %
+                                 (prober.get_charset_name(), cf))
+            if bestConf < cf:
+                bestConf = cf
+                self._mBestGuessProber = prober
+        if not self._mBestGuessProber:
+            return 0.0
+        return bestConf
+#        else:
+#            self._mBestGuessProber = self._mProbers[0]
+#            return self._mBestGuessProber.get_confidence()
@@ -1,61 +1,61 @@
-######################## BEGIN LICENSE BLOCK ########################\r
-# The Original Code is mozilla.org code.\r
-#\r
-# The Initial Developer of the Original Code is\r
-# Netscape Communications Corporation.\r
-# Portions created by the Initial Developer are Copyright (C) 1998\r
-# the Initial Developer. All Rights Reserved.\r
-#\r
-# Contributor(s):\r
-#   Mark Pilgrim - port to Python\r
-#\r
-# This library is free software; you can redistribute it and/or\r
-# modify it under the terms of the GNU Lesser General Public\r
-# License as published by the Free Software Foundation; either\r
-# version 2.1 of the License, or (at your option) any later version.\r
-#\r
-# This library is distributed in the hope that it will be useful,\r
-# but WITHOUT ANY WARRANTY; without even the implied warranty of\r
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU\r
-# Lesser General Public License for more details.\r
-#\r
-# You should have received a copy of the GNU Lesser General Public\r
-# License along with this library; if not, write to the Free Software\r
-# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA\r
-# 02110-1301  USA\r
-######################### END LICENSE BLOCK #########################\r
-\r
-from .constants import eStart\r
-from .compat import wrap_ord\r
-\r
-\r
-class CodingStateMachine:\r
-    def __init__(self, sm):\r
-        self._mModel = sm\r
-        self._mCurrentBytePos = 0\r
-        self._mCurrentCharLen = 0\r
-        self.reset()\r
-\r
-    def reset(self):\r
-        self._mCurrentState = eStart\r
-\r
-    def next_state(self, c):\r
-        # for each byte we get its class\r
-        # if it is first byte, we also get byte length\r
-        # PY3K: aBuf is a byte stream, so c is an int, not a byte\r
-        byteCls = self._mModel['classTable'][wrap_ord(c)]\r
-        if self._mCurrentState == eStart:\r
-            self._mCurrentBytePos = 0\r
-            self._mCurrentCharLen = self._mModel['charLenTable'][byteCls]\r
-        # from byte's class and stateTable, we get its next state\r
-        curr_state = (self._mCurrentState * self._mModel['classFactor']\r
-                      + byteCls)\r
-        self._mCurrentState = self._mModel['stateTable'][curr_state]\r
-        self._mCurrentBytePos += 1\r
-        return self._mCurrentState\r
-\r
-    def get_current_charlen(self):\r
-        return self._mCurrentCharLen\r
-\r
-    def get_coding_state_machine(self):\r
-        return self._mModel['name']\r
+######################## BEGIN LICENSE BLOCK ########################
+# The Original Code is mozilla.org code.
+#
+# The Initial Developer of the Original Code is
+# Netscape Communications Corporation.
+# Portions created by the Initial Developer are Copyright (C) 1998
+# the Initial Developer. All Rights Reserved.
+#
+# Contributor(s):
+#   Mark Pilgrim - port to Python
+#
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
+# 02110-1301  USA
+######################### END LICENSE BLOCK #########################
+
+from .constants import eStart
+from .compat import wrap_ord
+
+
+class CodingStateMachine:
+    def __init__(self, sm):
+        self._mModel = sm
+        self._mCurrentBytePos = 0
+        self._mCurrentCharLen = 0
+        self.reset()
+
+    def reset(self):
+        self._mCurrentState = eStart
+
+    def next_state(self, c):
+        # for each byte we get its class
+        # if it is first byte, we also get byte length
+        # PY3K: aBuf is a byte stream, so c is an int, not a byte
+        byteCls = self._mModel['classTable'][wrap_ord(c)]
+        if self._mCurrentState == eStart:
+            self._mCurrentBytePos = 0
+            self._mCurrentCharLen = self._mModel['charLenTable'][byteCls]
+        # from byte's class and stateTable, we get its next state
+        curr_state = (self._mCurrentState * self._mModel['classFactor']
+                      + byteCls)
+        self._mCurrentState = self._mModel['stateTable'][curr_state]
+        self._mCurrentBytePos += 1
+        return self._mCurrentState
+
+    def get_current_charlen(self):
+        return self._mCurrentCharLen
+
+    def get_coding_state_machine(self):
+        return self._mModel['name']
similarity index 97%
rename from requests/packages/charade/constants.py
rename to requests/packages/chardet/constants.py
index a3d27de..e4d148b 100644 (file)
@@ -1,39 +1,39 @@
-######################## BEGIN LICENSE BLOCK ########################\r
-# The Original Code is Mozilla Universal charset detector code.\r
-#\r
-# The Initial Developer of the Original Code is\r
-# Netscape Communications Corporation.\r
-# Portions created by the Initial Developer are Copyright (C) 2001\r
-# the Initial Developer. All Rights Reserved.\r
-#\r
-# Contributor(s):\r
-#   Mark Pilgrim - port to Python\r
-#   Shy Shalom - original C code\r
-#\r
-# This library is free software; you can redistribute it and/or\r
-# modify it under the terms of the GNU Lesser General Public\r
-# License as published by the Free Software Foundation; either\r
-# version 2.1 of the License, or (at your option) any later version.\r
-# \r
-# This library is distributed in the hope that it will be useful,\r
-# but WITHOUT ANY WARRANTY; without even the implied warranty of\r
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU\r
-# Lesser General Public License for more details.\r
-# \r
-# You should have received a copy of the GNU Lesser General Public\r
-# License along with this library; if not, write to the Free Software\r
-# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA\r
-# 02110-1301  USA\r
-######################### END LICENSE BLOCK #########################\r
-\r
-_debug = 0\r
-\r
-eDetecting = 0\r
-eFoundIt = 1\r
-eNotMe = 2\r
-\r
-eStart = 0\r
-eError = 1\r
-eItsMe = 2\r
-\r
-SHORTCUT_THRESHOLD = 0.95\r
+######################## BEGIN LICENSE BLOCK ########################
+# The Original Code is Mozilla Universal charset detector code.
+#
+# The Initial Developer of the Original Code is
+# Netscape Communications Corporation.
+# Portions created by the Initial Developer are Copyright (C) 2001
+# the Initial Developer. All Rights Reserved.
+#
+# Contributor(s):
+#   Mark Pilgrim - port to Python
+#   Shy Shalom - original C code
+#
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+# 
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# Lesser General Public License for more details.
+# 
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
+# 02110-1301  USA
+######################### END LICENSE BLOCK #########################
+
+_debug = 0
+
+eDetecting = 0
+eFoundIt = 1
+eNotMe = 2
+
+eStart = 0
+eError = 1
+eItsMe = 2
+
+SHORTCUT_THRESHOLD = 0.95
similarity index 97%
rename from requests/packages/charade/cp949prober.py
rename to requests/packages/chardet/cp949prober.py
index 543501f..ff4272f 100644 (file)
@@ -1,44 +1,44 @@
-######################## BEGIN LICENSE BLOCK ########################\r
-# The Original Code is mozilla.org code.\r
-#\r
-# The Initial Developer of the Original Code is\r
-# Netscape Communications Corporation.\r
-# Portions created by the Initial Developer are Copyright (C) 1998\r
-# the Initial Developer. All Rights Reserved.\r
-#\r
-# Contributor(s):\r
-#   Mark Pilgrim - port to Python\r
-#\r
-# This library is free software; you can redistribute it and/or\r
-# modify it under the terms of the GNU Lesser General Public\r
-# License as published by the Free Software Foundation; either\r
-# version 2.1 of the License, or (at your option) any later version.\r
-#\r
-# This library is distributed in the hope that it will be useful,\r
-# but WITHOUT ANY WARRANTY; without even the implied warranty of\r
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU\r
-# Lesser General Public License for more details.\r
-#\r
-# You should have received a copy of the GNU Lesser General Public\r
-# License along with this library; if not, write to the Free Software\r
-# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA\r
-# 02110-1301  USA\r
-######################### END LICENSE BLOCK #########################\r
-\r
-from .mbcharsetprober import MultiByteCharSetProber\r
-from .codingstatemachine import CodingStateMachine\r
-from .chardistribution import EUCKRDistributionAnalysis\r
-from .mbcssm import CP949SMModel\r
-\r
-\r
-class CP949Prober(MultiByteCharSetProber):\r
-    def __init__(self):\r
-        MultiByteCharSetProber.__init__(self)\r
-        self._mCodingSM = CodingStateMachine(CP949SMModel)\r
-        # NOTE: CP949 is a superset of EUC-KR, so the distribution should be\r
-        #       not different.\r
-        self._mDistributionAnalyzer = EUCKRDistributionAnalysis()\r
-        self.reset()\r
-\r
-    def get_charset_name(self):\r
-        return "CP949"\r
+######################## BEGIN LICENSE BLOCK ########################
+# The Original Code is mozilla.org code.
+#
+# The Initial Developer of the Original Code is
+# Netscape Communications Corporation.
+# Portions created by the Initial Developer are Copyright (C) 1998
+# the Initial Developer. All Rights Reserved.
+#
+# Contributor(s):
+#   Mark Pilgrim - port to Python
+#
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
+# 02110-1301  USA
+######################### END LICENSE BLOCK #########################
+
+from .mbcharsetprober import MultiByteCharSetProber
+from .codingstatemachine import CodingStateMachine
+from .chardistribution import EUCKRDistributionAnalysis
+from .mbcssm import CP949SMModel
+
+
+class CP949Prober(MultiByteCharSetProber):
+    def __init__(self):
+        MultiByteCharSetProber.__init__(self)
+        self._mCodingSM = CodingStateMachine(CP949SMModel)
+        # NOTE: CP949 is a superset of EUC-KR, so the distribution should be
+        #       not different.
+        self._mDistributionAnalyzer = EUCKRDistributionAnalysis()
+        self.reset()
+
+    def get_charset_name(self):
+        return "CP949"
similarity index 97%
rename from requests/packages/charade/escprober.py
rename to requests/packages/chardet/escprober.py
index 0063935..80a844f 100644 (file)
@@ -1,86 +1,86 @@
-######################## BEGIN LICENSE BLOCK ########################\r
-# The Original Code is mozilla.org code.\r
-#\r
-# The Initial Developer of the Original Code is\r
-# Netscape Communications Corporation.\r
-# Portions created by the Initial Developer are Copyright (C) 1998\r
-# the Initial Developer. All Rights Reserved.\r
-#\r
-# Contributor(s):\r
-#   Mark Pilgrim - port to Python\r
-#\r
-# This library is free software; you can redistribute it and/or\r
-# modify it under the terms of the GNU Lesser General Public\r
-# License as published by the Free Software Foundation; either\r
-# version 2.1 of the License, or (at your option) any later version.\r
-#\r
-# This library is distributed in the hope that it will be useful,\r
-# but WITHOUT ANY WARRANTY; without even the implied warranty of\r
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU\r
-# Lesser General Public License for more details.\r
-#\r
-# You should have received a copy of the GNU Lesser General Public\r
-# License along with this library; if not, write to the Free Software\r
-# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA\r
-# 02110-1301  USA\r
-######################### END LICENSE BLOCK #########################\r
-\r
-from . import constants\r
-from .escsm import (HZSMModel, ISO2022CNSMModel, ISO2022JPSMModel,\r
-                    ISO2022KRSMModel)\r
-from .charsetprober import CharSetProber\r
-from .codingstatemachine import CodingStateMachine\r
-from .compat import wrap_ord\r
-\r
-\r
-class EscCharSetProber(CharSetProber):\r
-    def __init__(self):\r
-        CharSetProber.__init__(self)\r
-        self._mCodingSM = [\r
-            CodingStateMachine(HZSMModel),\r
-            CodingStateMachine(ISO2022CNSMModel),\r
-            CodingStateMachine(ISO2022JPSMModel),\r
-            CodingStateMachine(ISO2022KRSMModel)\r
-        ]\r
-        self.reset()\r
-\r
-    def reset(self):\r
-        CharSetProber.reset(self)\r
-        for codingSM in self._mCodingSM:\r
-            if not codingSM:\r
-                continue\r
-            codingSM.active = True\r
-            codingSM.reset()\r
-        self._mActiveSM = len(self._mCodingSM)\r
-        self._mDetectedCharset = None\r
-\r
-    def get_charset_name(self):\r
-        return self._mDetectedCharset\r
-\r
-    def get_confidence(self):\r
-        if self._mDetectedCharset:\r
-            return 0.99\r
-        else:\r
-            return 0.00\r
-\r
-    def feed(self, aBuf):\r
-        for c in aBuf:\r
-            # PY3K: aBuf is a byte array, so c is an int, not a byte\r
-            for codingSM in self._mCodingSM:\r
-                if not codingSM:\r
-                    continue\r
-                if not codingSM.active:\r
-                    continue\r
-                codingState = codingSM.next_state(wrap_ord(c))\r
-                if codingState == constants.eError:\r
-                    codingSM.active = False\r
-                    self._mActiveSM -= 1\r
-                    if self._mActiveSM <= 0:\r
-                        self._mState = constants.eNotMe\r
-                        return self.get_state()\r
-                elif codingState == constants.eItsMe:\r
-                    self._mState = constants.eFoundIt\r
-                    self._mDetectedCharset = codingSM.get_coding_state_machine()  # nopep8\r
-                    return self.get_state()\r
-\r
-        return self.get_state()\r
+######################## BEGIN LICENSE BLOCK ########################
+# The Original Code is mozilla.org code.
+#
+# The Initial Developer of the Original Code is
+# Netscape Communications Corporation.
+# Portions created by the Initial Developer are Copyright (C) 1998
+# the Initial Developer. All Rights Reserved.
+#
+# Contributor(s):
+#   Mark Pilgrim - port to Python
+#
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
+# 02110-1301  USA
+######################### END LICENSE BLOCK #########################
+
+from . import constants
+from .escsm import (HZSMModel, ISO2022CNSMModel, ISO2022JPSMModel,
+                    ISO2022KRSMModel)
+from .charsetprober import CharSetProber
+from .codingstatemachine import CodingStateMachine
+from .compat import wrap_ord
+
+
+class EscCharSetProber(CharSetProber):
+    def __init__(self):
+        CharSetProber.__init__(self)
+        self._mCodingSM = [
+            CodingStateMachine(HZSMModel),
+            CodingStateMachine(ISO2022CNSMModel),
+            CodingStateMachine(ISO2022JPSMModel),
+            CodingStateMachine(ISO2022KRSMModel)
+        ]
+        self.reset()
+
+    def reset(self):
+        CharSetProber.reset(self)
+        for codingSM in self._mCodingSM:
+            if not codingSM:
+                continue
+            codingSM.active = True
+            codingSM.reset()
+        self._mActiveSM = len(self._mCodingSM)
+        self._mDetectedCharset = None
+
+    def get_charset_name(self):
+        return self._mDetectedCharset
+
+    def get_confidence(self):
+        if self._mDetectedCharset:
+            return 0.99
+        else:
+            return 0.00
+
+    def feed(self, aBuf):
+        for c in aBuf:
+            # PY3K: aBuf is a byte array, so c is an int, not a byte
+            for codingSM in self._mCodingSM:
+                if not codingSM:
+                    continue
+                if not codingSM.active:
+                    continue
+                codingState = codingSM.next_state(wrap_ord(c))
+                if codingState == constants.eError:
+                    codingSM.active = False
+                    self._mActiveSM -= 1
+                    if self._mActiveSM <= 0:
+                        self._mState = constants.eNotMe
+                        return self.get_state()
+                elif codingState == constants.eItsMe:
+                    self._mState = constants.eFoundIt
+                    self._mDetectedCharset = codingSM.get_coding_state_machine()  # nopep8
+                    return self.get_state()
+
+        return self.get_state()
similarity index 97%
rename from requests/packages/charade/escsm.py
rename to requests/packages/chardet/escsm.py
index 1cf3aa6..bd302b4 100644 (file)
-######################## BEGIN LICENSE BLOCK ########################\r
-# The Original Code is mozilla.org code.\r
-#\r
-# The Initial Developer of the Original Code is\r
-# Netscape Communications Corporation.\r
-# Portions created by the Initial Developer are Copyright (C) 1998\r
-# the Initial Developer. All Rights Reserved.\r
-#\r
-# Contributor(s):\r
-#   Mark Pilgrim - port to Python\r
-#\r
-# This library is free software; you can redistribute it and/or\r
-# modify it under the terms of the GNU Lesser General Public\r
-# License as published by the Free Software Foundation; either\r
-# version 2.1 of the License, or (at your option) any later version.\r
-#\r
-# This library is distributed in the hope that it will be useful,\r
-# but WITHOUT ANY WARRANTY; without even the implied warranty of\r
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU\r
-# Lesser General Public License for more details.\r
-#\r
-# You should have received a copy of the GNU Lesser General Public\r
-# License along with this library; if not, write to the Free Software\r
-# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA\r
-# 02110-1301  USA\r
-######################### END LICENSE BLOCK #########################\r
-\r
-from .constants import eStart, eError, eItsMe\r
-\r
-HZ_cls = (\r
-1,0,0,0,0,0,0,0,  # 00 - 07\r
-0,0,0,0,0,0,0,0,  # 08 - 0f\r
-0,0,0,0,0,0,0,0,  # 10 - 17\r
-0,0,0,1,0,0,0,0,  # 18 - 1f\r
-0,0,0,0,0,0,0,0,  # 20 - 27\r
-0,0,0,0,0,0,0,0,  # 28 - 2f\r
-0,0,0,0,0,0,0,0,  # 30 - 37\r
-0,0,0,0,0,0,0,0,  # 38 - 3f\r
-0,0,0,0,0,0,0,0,  # 40 - 47\r
-0,0,0,0,0,0,0,0,  # 48 - 4f\r
-0,0,0,0,0,0,0,0,  # 50 - 57\r
-0,0,0,0,0,0,0,0,  # 58 - 5f\r
-0,0,0,0,0,0,0,0,  # 60 - 67\r
-0,0,0,0,0,0,0,0,  # 68 - 6f\r
-0,0,0,0,0,0,0,0,  # 70 - 77\r
-0,0,0,4,0,5,2,0,  # 78 - 7f\r
-1,1,1,1,1,1,1,1,  # 80 - 87\r
-1,1,1,1,1,1,1,1,  # 88 - 8f\r
-1,1,1,1,1,1,1,1,  # 90 - 97\r
-1,1,1,1,1,1,1,1,  # 98 - 9f\r
-1,1,1,1,1,1,1,1,  # a0 - a7\r
-1,1,1,1,1,1,1,1,  # a8 - af\r
-1,1,1,1,1,1,1,1,  # b0 - b7\r
-1,1,1,1,1,1,1,1,  # b8 - bf\r
-1,1,1,1,1,1,1,1,  # c0 - c7\r
-1,1,1,1,1,1,1,1,  # c8 - cf\r
-1,1,1,1,1,1,1,1,  # d0 - d7\r
-1,1,1,1,1,1,1,1,  # d8 - df\r
-1,1,1,1,1,1,1,1,  # e0 - e7\r
-1,1,1,1,1,1,1,1,  # e8 - ef\r
-1,1,1,1,1,1,1,1,  # f0 - f7\r
-1,1,1,1,1,1,1,1,  # f8 - ff\r
-)\r
-\r
-HZ_st = (\r
-eStart,eError,     3,eStart,eStart,eStart,eError,eError,# 00-07\r
-eError,eError,eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,# 08-0f\r
-eItsMe,eItsMe,eError,eError,eStart,eStart,     4,eError,# 10-17\r
-     5,eError,     6,eError,     5,     5,     4,eError,# 18-1f\r
-     4,eError,     4,     4,     4,eError,     4,eError,# 20-27\r
-     4,eItsMe,eStart,eStart,eStart,eStart,eStart,eStart,# 28-2f\r
-)\r
-\r
-HZCharLenTable = (0, 0, 0, 0, 0, 0)\r
-\r
-HZSMModel = {'classTable': HZ_cls,\r
-             'classFactor': 6,\r
-             'stateTable': HZ_st,\r
-             'charLenTable': HZCharLenTable,\r
-             'name': "HZ-GB-2312"}\r
-\r
-ISO2022CN_cls = (\r
-2,0,0,0,0,0,0,0,  # 00 - 07\r
-0,0,0,0,0,0,0,0,  # 08 - 0f\r
-0,0,0,0,0,0,0,0,  # 10 - 17\r
-0,0,0,1,0,0,0,0,  # 18 - 1f\r
-0,0,0,0,0,0,0,0,  # 20 - 27\r
-0,3,0,0,0,0,0,0,  # 28 - 2f\r
-0,0,0,0,0,0,0,0,  # 30 - 37\r
-0,0,0,0,0,0,0,0,  # 38 - 3f\r
-0,0,0,4,0,0,0,0,  # 40 - 47\r
-0,0,0,0,0,0,0,0,  # 48 - 4f\r
-0,0,0,0,0,0,0,0,  # 50 - 57\r
-0,0,0,0,0,0,0,0,  # 58 - 5f\r
-0,0,0,0,0,0,0,0,  # 60 - 67\r
-0,0,0,0,0,0,0,0,  # 68 - 6f\r
-0,0,0,0,0,0,0,0,  # 70 - 77\r
-0,0,0,0,0,0,0,0,  # 78 - 7f\r
-2,2,2,2,2,2,2,2,  # 80 - 87\r
-2,2,2,2,2,2,2,2,  # 88 - 8f\r
-2,2,2,2,2,2,2,2,  # 90 - 97\r
-2,2,2,2,2,2,2,2,  # 98 - 9f\r
-2,2,2,2,2,2,2,2,  # a0 - a7\r
-2,2,2,2,2,2,2,2,  # a8 - af\r
-2,2,2,2,2,2,2,2,  # b0 - b7\r
-2,2,2,2,2,2,2,2,  # b8 - bf\r
-2,2,2,2,2,2,2,2,  # c0 - c7\r
-2,2,2,2,2,2,2,2,  # c8 - cf\r
-2,2,2,2,2,2,2,2,  # d0 - d7\r
-2,2,2,2,2,2,2,2,  # d8 - df\r
-2,2,2,2,2,2,2,2,  # e0 - e7\r
-2,2,2,2,2,2,2,2,  # e8 - ef\r
-2,2,2,2,2,2,2,2,  # f0 - f7\r
-2,2,2,2,2,2,2,2,  # f8 - ff\r
-)\r
-\r
-ISO2022CN_st = (\r
-eStart,     3,eError,eStart,eStart,eStart,eStart,eStart,# 00-07\r
-eStart,eError,eError,eError,eError,eError,eError,eError,# 08-0f\r
-eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,# 10-17\r
-eItsMe,eItsMe,eItsMe,eError,eError,eError,     4,eError,# 18-1f\r
-eError,eError,eError,eItsMe,eError,eError,eError,eError,# 20-27\r
-     5,     6,eError,eError,eError,eError,eError,eError,# 28-2f\r
-eError,eError,eError,eItsMe,eError,eError,eError,eError,# 30-37\r
-eError,eError,eError,eError,eError,eItsMe,eError,eStart,# 38-3f\r
-)\r
-\r
-ISO2022CNCharLenTable = (0, 0, 0, 0, 0, 0, 0, 0, 0)\r
-\r
-ISO2022CNSMModel = {'classTable': ISO2022CN_cls,\r
-                    'classFactor': 9,\r
-                    'stateTable': ISO2022CN_st,\r
-                    'charLenTable': ISO2022CNCharLenTable,\r
-                    'name': "ISO-2022-CN"}\r
-\r
-ISO2022JP_cls = (\r
-2,0,0,0,0,0,0,0,  # 00 - 07\r
-0,0,0,0,0,0,2,2,  # 08 - 0f\r
-0,0,0,0,0,0,0,0,  # 10 - 17\r
-0,0,0,1,0,0,0,0,  # 18 - 1f\r
-0,0,0,0,7,0,0,0,  # 20 - 27\r
-3,0,0,0,0,0,0,0,  # 28 - 2f\r
-0,0,0,0,0,0,0,0,  # 30 - 37\r
-0,0,0,0,0,0,0,0,  # 38 - 3f\r
-6,0,4,0,8,0,0,0,  # 40 - 47\r
-0,9,5,0,0,0,0,0,  # 48 - 4f\r
-0,0,0,0,0,0,0,0,  # 50 - 57\r
-0,0,0,0,0,0,0,0,  # 58 - 5f\r
-0,0,0,0,0,0,0,0,  # 60 - 67\r
-0,0,0,0,0,0,0,0,  # 68 - 6f\r
-0,0,0,0,0,0,0,0,  # 70 - 77\r
-0,0,0,0,0,0,0,0,  # 78 - 7f\r
-2,2,2,2,2,2,2,2,  # 80 - 87\r
-2,2,2,2,2,2,2,2,  # 88 - 8f\r
-2,2,2,2,2,2,2,2,  # 90 - 97\r
-2,2,2,2,2,2,2,2,  # 98 - 9f\r
-2,2,2,2,2,2,2,2,  # a0 - a7\r
-2,2,2,2,2,2,2,2,  # a8 - af\r
-2,2,2,2,2,2,2,2,  # b0 - b7\r
-2,2,2,2,2,2,2,2,  # b8 - bf\r
-2,2,2,2,2,2,2,2,  # c0 - c7\r
-2,2,2,2,2,2,2,2,  # c8 - cf\r
-2,2,2,2,2,2,2,2,  # d0 - d7\r
-2,2,2,2,2,2,2,2,  # d8 - df\r
-2,2,2,2,2,2,2,2,  # e0 - e7\r
-2,2,2,2,2,2,2,2,  # e8 - ef\r
-2,2,2,2,2,2,2,2,  # f0 - f7\r
-2,2,2,2,2,2,2,2,  # f8 - ff\r
-)\r
-\r
-ISO2022JP_st = (\r
-eStart,     3,eError,eStart,eStart,eStart,eStart,eStart,# 00-07\r
-eStart,eStart,eError,eError,eError,eError,eError,eError,# 08-0f\r
-eError,eError,eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,# 10-17\r
-eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eError,eError,# 18-1f\r
-eError,     5,eError,eError,eError,     4,eError,eError,# 20-27\r
-eError,eError,eError,     6,eItsMe,eError,eItsMe,eError,# 28-2f\r
-eError,eError,eError,eError,eError,eError,eItsMe,eItsMe,# 30-37\r
-eError,eError,eError,eItsMe,eError,eError,eError,eError,# 38-3f\r
-eError,eError,eError,eError,eItsMe,eError,eStart,eStart,# 40-47\r
-)\r
-\r
-ISO2022JPCharLenTable = (0, 0, 0, 0, 0, 0, 0, 0, 0, 0)\r
-\r
-ISO2022JPSMModel = {'classTable': ISO2022JP_cls,\r
-                    'classFactor': 10,\r
-                    'stateTable': ISO2022JP_st,\r
-                    'charLenTable': ISO2022JPCharLenTable,\r
-                    'name': "ISO-2022-JP"}\r
-\r
-ISO2022KR_cls = (\r
-2,0,0,0,0,0,0,0,  # 00 - 07\r
-0,0,0,0,0,0,0,0,  # 08 - 0f\r
-0,0,0,0,0,0,0,0,  # 10 - 17\r
-0,0,0,1,0,0,0,0,  # 18 - 1f\r
-0,0,0,0,3,0,0,0,  # 20 - 27\r
-0,4,0,0,0,0,0,0,  # 28 - 2f\r
-0,0,0,0,0,0,0,0,  # 30 - 37\r
-0,0,0,0,0,0,0,0,  # 38 - 3f\r
-0,0,0,5,0,0,0,0,  # 40 - 47\r
-0,0,0,0,0,0,0,0,  # 48 - 4f\r
-0,0,0,0,0,0,0,0,  # 50 - 57\r
-0,0,0,0,0,0,0,0,  # 58 - 5f\r
-0,0,0,0,0,0,0,0,  # 60 - 67\r
-0,0,0,0,0,0,0,0,  # 68 - 6f\r
-0,0,0,0,0,0,0,0,  # 70 - 77\r
-0,0,0,0,0,0,0,0,  # 78 - 7f\r
-2,2,2,2,2,2,2,2,  # 80 - 87\r
-2,2,2,2,2,2,2,2,  # 88 - 8f\r
-2,2,2,2,2,2,2,2,  # 90 - 97\r
-2,2,2,2,2,2,2,2,  # 98 - 9f\r
-2,2,2,2,2,2,2,2,  # a0 - a7\r
-2,2,2,2,2,2,2,2,  # a8 - af\r
-2,2,2,2,2,2,2,2,  # b0 - b7\r
-2,2,2,2,2,2,2,2,  # b8 - bf\r
-2,2,2,2,2,2,2,2,  # c0 - c7\r
-2,2,2,2,2,2,2,2,  # c8 - cf\r
-2,2,2,2,2,2,2,2,  # d0 - d7\r
-2,2,2,2,2,2,2,2,  # d8 - df\r
-2,2,2,2,2,2,2,2,  # e0 - e7\r
-2,2,2,2,2,2,2,2,  # e8 - ef\r
-2,2,2,2,2,2,2,2,  # f0 - f7\r
-2,2,2,2,2,2,2,2,  # f8 - ff\r
-)\r
-\r
-ISO2022KR_st = (\r
-eStart,     3,eError,eStart,eStart,eStart,eError,eError,# 00-07\r
-eError,eError,eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,# 08-0f\r
-eItsMe,eItsMe,eError,eError,eError,     4,eError,eError,# 10-17\r
-eError,eError,eError,eError,     5,eError,eError,eError,# 18-1f\r
-eError,eError,eError,eItsMe,eStart,eStart,eStart,eStart,# 20-27\r
-)\r
-\r
-ISO2022KRCharLenTable = (0, 0, 0, 0, 0, 0)\r
-\r
-ISO2022KRSMModel = {'classTable': ISO2022KR_cls,\r
-                    'classFactor': 6,\r
-                    'stateTable': ISO2022KR_st,\r
-                    'charLenTable': ISO2022KRCharLenTable,\r
-                    'name': "ISO-2022-KR"}\r
-\r
-# flake8: noqa\r
+######################## BEGIN LICENSE BLOCK ########################
+# The Original Code is mozilla.org code.
+#
+# The Initial Developer of the Original Code is
+# Netscape Communications Corporation.
+# Portions created by the Initial Developer are Copyright (C) 1998
+# the Initial Developer. All Rights Reserved.
+#
+# Contributor(s):
+#   Mark Pilgrim - port to Python
+#
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
+# 02110-1301  USA
+######################### END LICENSE BLOCK #########################
+
+from .constants import eStart, eError, eItsMe
+
+HZ_cls = (
+1,0,0,0,0,0,0,0,  # 00 - 07
+0,0,0,0,0,0,0,0,  # 08 - 0f
+0,0,0,0,0,0,0,0,  # 10 - 17
+0,0,0,1,0,0,0,0,  # 18 - 1f
+0,0,0,0,0,0,0,0,  # 20 - 27
+0,0,0,0,0,0,0,0,  # 28 - 2f
+0,0,0,0,0,0,0,0,  # 30 - 37
+0,0,0,0,0,0,0,0,  # 38 - 3f
+0,0,0,0,0,0,0,0,  # 40 - 47
+0,0,0,0,0,0,0,0,  # 48 - 4f
+0,0,0,0,0,0,0,0,  # 50 - 57
+0,0,0,0,0,0,0,0,  # 58 - 5f
+0,0,0,0,0,0,0,0,  # 60 - 67
+0,0,0,0,0,0,0,0,  # 68 - 6f
+0,0,0,0,0,0,0,0,  # 70 - 77
+0,0,0,4,0,5,2,0,  # 78 - 7f
+1,1,1,1,1,1,1,1,  # 80 - 87
+1,1,1,1,1,1,1,1,  # 88 - 8f
+1,1,1,1,1,1,1,1,  # 90 - 97
+1,1,1,1,1,1,1,1,  # 98 - 9f
+1,1,1,1,1,1,1,1,  # a0 - a7
+1,1,1,1,1,1,1,1,  # a8 - af
+1,1,1,1,1,1,1,1,  # b0 - b7
+1,1,1,1,1,1,1,1,  # b8 - bf
+1,1,1,1,1,1,1,1,  # c0 - c7
+1,1,1,1,1,1,1,1,  # c8 - cf
+1,1,1,1,1,1,1,1,  # d0 - d7
+1,1,1,1,1,1,1,1,  # d8 - df
+1,1,1,1,1,1,1,1,  # e0 - e7
+1,1,1,1,1,1,1,1,  # e8 - ef
+1,1,1,1,1,1,1,1,  # f0 - f7
+1,1,1,1,1,1,1,1,  # f8 - ff
+)
+
+HZ_st = (
+eStart,eError,     3,eStart,eStart,eStart,eError,eError,# 00-07
+eError,eError,eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,# 08-0f
+eItsMe,eItsMe,eError,eError,eStart,eStart,     4,eError,# 10-17
+     5,eError,     6,eError,     5,     5,     4,eError,# 18-1f
+     4,eError,     4,     4,     4,eError,     4,eError,# 20-27
+     4,eItsMe,eStart,eStart,eStart,eStart,eStart,eStart,# 28-2f
+)
+
+HZCharLenTable = (0, 0, 0, 0, 0, 0)
+
+HZSMModel = {'classTable': HZ_cls,
+             'classFactor': 6,
+             'stateTable': HZ_st,
+             'charLenTable': HZCharLenTable,
+             'name': "HZ-GB-2312"}
+
+ISO2022CN_cls = (
+2,0,0,0,0,0,0,0,  # 00 - 07
+0,0,0,0,0,0,0,0,  # 08 - 0f
+0,0,0,0,0,0,0,0,  # 10 - 17
+0,0,0,1,0,0,0,0,  # 18 - 1f
+0,0,0,0,0,0,0,0,  # 20 - 27
+0,3,0,0,0,0,0,0,  # 28 - 2f
+0,0,0,0,0,0,0,0,  # 30 - 37
+0,0,0,0,0,0,0,0,  # 38 - 3f
+0,0,0,4,0,0,0,0,  # 40 - 47
+0,0,0,0,0,0,0,0,  # 48 - 4f
+0,0,0,0,0,0,0,0,  # 50 - 57
+0,0,0,0,0,0,0,0,  # 58 - 5f
+0,0,0,0,0,0,0,0,  # 60 - 67
+0,0,0,0,0,0,0,0,  # 68 - 6f
+0,0,0,0,0,0,0,0,  # 70 - 77
+0,0,0,0,0,0,0,0,  # 78 - 7f
+2,2,2,2,2,2,2,2,  # 80 - 87
+2,2,2,2,2,2,2,2,  # 88 - 8f
+2,2,2,2,2,2,2,2,  # 90 - 97
+2,2,2,2,2,2,2,2,  # 98 - 9f
+2,2,2,2,2,2,2,2,  # a0 - a7
+2,2,2,2,2,2,2,2,  # a8 - af
+2,2,2,2,2,2,2,2,  # b0 - b7
+2,2,2,2,2,2,2,2,  # b8 - bf
+2,2,2,2,2,2,2,2,  # c0 - c7
+2,2,2,2,2,2,2,2,  # c8 - cf
+2,2,2,2,2,2,2,2,  # d0 - d7
+2,2,2,2,2,2,2,2,  # d8 - df
+2,2,2,2,2,2,2,2,  # e0 - e7
+2,2,2,2,2,2,2,2,  # e8 - ef
+2,2,2,2,2,2,2,2,  # f0 - f7
+2,2,2,2,2,2,2,2,  # f8 - ff
+)
+
+ISO2022CN_st = (
+eStart,     3,eError,eStart,eStart,eStart,eStart,eStart,# 00-07
+eStart,eError,eError,eError,eError,eError,eError,eError,# 08-0f
+eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,# 10-17
+eItsMe,eItsMe,eItsMe,eError,eError,eError,     4,eError,# 18-1f
+eError,eError,eError,eItsMe,eError,eError,eError,eError,# 20-27
+     5,     6,eError,eError,eError,eError,eError,eError,# 28-2f
+eError,eError,eError,eItsMe,eError,eError,eError,eError,# 30-37
+eError,eError,eError,eError,eError,eItsMe,eError,eStart,# 38-3f
+)
+
+ISO2022CNCharLenTable = (0, 0, 0, 0, 0, 0, 0, 0, 0)
+
+ISO2022CNSMModel = {'classTable': ISO2022CN_cls,
+                    'classFactor': 9,
+                    'stateTable': ISO2022CN_st,
+                    'charLenTable': ISO2022CNCharLenTable,
+                    'name': "ISO-2022-CN"}
+
+ISO2022JP_cls = (
+2,0,0,0,0,0,0,0,  # 00 - 07
+0,0,0,0,0,0,2,2,  # 08 - 0f
+0,0,0,0,0,0,0,0,  # 10 - 17
+0,0,0,1,0,0,0,0,  # 18 - 1f
+0,0,0,0,7,0,0,0,  # 20 - 27
+3,0,0,0,0,0,0,0,  # 28 - 2f
+0,0,0,0,0,0,0,0,  # 30 - 37
+0,0,0,0,0,0,0,0,  # 38 - 3f
+6,0,4,0,8,0,0,0,  # 40 - 47
+0,9,5,0,0,0,0,0,  # 48 - 4f
+0,0,0,0,0,0,0,0,  # 50 - 57
+0,0,0,0,0,0,0,0,  # 58 - 5f
+0,0,0,0,0,0,0,0,  # 60 - 67
+0,0,0,0,0,0,0,0,  # 68 - 6f
+0,0,0,0,0,0,0,0,  # 70 - 77
+0,0,0,0,0,0,0,0,  # 78 - 7f
+2,2,2,2,2,2,2,2,  # 80 - 87
+2,2,2,2,2,2,2,2,  # 88 - 8f
+2,2,2,2,2,2,2,2,  # 90 - 97
+2,2,2,2,2,2,2,2,  # 98 - 9f
+2,2,2,2,2,2,2,2,  # a0 - a7
+2,2,2,2,2,2,2,2,  # a8 - af
+2,2,2,2,2,2,2,2,  # b0 - b7
+2,2,2,2,2,2,2,2,  # b8 - bf
+2,2,2,2,2,2,2,2,  # c0 - c7
+2,2,2,2,2,2,2,2,  # c8 - cf
+2,2,2,2,2,2,2,2,  # d0 - d7
+2,2,2,2,2,2,2,2,  # d8 - df
+2,2,2,2,2,2,2,2,  # e0 - e7
+2,2,2,2,2,2,2,2,  # e8 - ef
+2,2,2,2,2,2,2,2,  # f0 - f7
+2,2,2,2,2,2,2,2,  # f8 - ff
+)
+
+ISO2022JP_st = (
+eStart,     3,eError,eStart,eStart,eStart,eStart,eStart,# 00-07
+eStart,eStart,eError,eError,eError,eError,eError,eError,# 08-0f
+eError,eError,eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,# 10-17
+eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eError,eError,# 18-1f
+eError,     5,eError,eError,eError,     4,eError,eError,# 20-27
+eError,eError,eError,     6,eItsMe,eError,eItsMe,eError,# 28-2f
+eError,eError,eError,eError,eError,eError,eItsMe,eItsMe,# 30-37
+eError,eError,eError,eItsMe,eError,eError,eError,eError,# 38-3f
+eError,eError,eError,eError,eItsMe,eError,eStart,eStart,# 40-47
+)
+
+ISO2022JPCharLenTable = (0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
+
+ISO2022JPSMModel = {'classTable': ISO2022JP_cls,
+                    'classFactor': 10,
+                    'stateTable': ISO2022JP_st,
+                    'charLenTable': ISO2022JPCharLenTable,
+                    'name': "ISO-2022-JP"}
+
+ISO2022KR_cls = (
+2,0,0,0,0,0,0,0,  # 00 - 07
+0,0,0,0,0,0,0,0,  # 08 - 0f
+0,0,0,0,0,0,0,0,  # 10 - 17
+0,0,0,1,0,0,0,0,  # 18 - 1f
+0,0,0,0,3,0,0,0,  # 20 - 27
+0,4,0,0,0,0,0,0,  # 28 - 2f
+0,0,0,0,0,0,0,0,  # 30 - 37
+0,0,0,0,0,0,0,0,  # 38 - 3f
+0,0,0,5,0,0,0,0,  # 40 - 47
+0,0,0,0,0,0,0,0,  # 48 - 4f
+0,0,0,0,0,0,0,0,  # 50 - 57
+0,0,0,0,0,0,0,0,  # 58 - 5f
+0,0,0,0,0,0,0,0,  # 60 - 67
+0,0,0,0,0,0,0,0,  # 68 - 6f
+0,0,0,0,0,0,0,0,  # 70 - 77
+0,0,0,0,0,0,0,0,  # 78 - 7f
+2,2,2,2,2,2,2,2,  # 80 - 87
+2,2,2,2,2,2,2,2,  # 88 - 8f
+2,2,2,2,2,2,2,2,  # 90 - 97
+2,2,2,2,2,2,2,2,  # 98 - 9f
+2,2,2,2,2,2,2,2,  # a0 - a7
+2,2,2,2,2,2,2,2,  # a8 - af
+2,2,2,2,2,2,2,2,  # b0 - b7
+2,2,2,2,2,2,2,2,  # b8 - bf
+2,2,2,2,2,2,2,2,  # c0 - c7
+2,2,2,2,2,2,2,2,  # c8 - cf
+2,2,2,2,2,2,2,2,  # d0 - d7
+2,2,2,2,2,2,2,2,  # d8 - df
+2,2,2,2,2,2,2,2,  # e0 - e7
+2,2,2,2,2,2,2,2,  # e8 - ef
+2,2,2,2,2,2,2,2,  # f0 - f7
+2,2,2,2,2,2,2,2,  # f8 - ff
+)
+
+ISO2022KR_st = (
+eStart,     3,eError,eStart,eStart,eStart,eError,eError,# 00-07
+eError,eError,eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,# 08-0f
+eItsMe,eItsMe,eError,eError,eError,     4,eError,eError,# 10-17
+eError,eError,eError,eError,     5,eError,eError,eError,# 18-1f
+eError,eError,eError,eItsMe,eStart,eStart,eStart,eStart,# 20-27
+)
+
+ISO2022KRCharLenTable = (0, 0, 0, 0, 0, 0)
+
+ISO2022KRSMModel = {'classTable': ISO2022KR_cls,
+                    'classFactor': 6,
+                    'stateTable': ISO2022KR_st,
+                    'charLenTable': ISO2022KRCharLenTable,
+                    'name': "ISO-2022-KR"}
+
+# flake8: noqa
similarity index 97%
rename from requests/packages/charade/eucjpprober.py
rename to requests/packages/chardet/eucjpprober.py
index d70cfbb..8e64fdc 100644 (file)
@@ -1,90 +1,90 @@
-######################## BEGIN LICENSE BLOCK ########################\r
-# The Original Code is mozilla.org code.\r
-#\r
-# The Initial Developer of the Original Code is\r
-# Netscape Communications Corporation.\r
-# Portions created by the Initial Developer are Copyright (C) 1998\r
-# the Initial Developer. All Rights Reserved.\r
-#\r
-# Contributor(s):\r
-#   Mark Pilgrim - port to Python\r
-#\r
-# This library is free software; you can redistribute it and/or\r
-# modify it under the terms of the GNU Lesser General Public\r
-# License as published by the Free Software Foundation; either\r
-# version 2.1 of the License, or (at your option) any later version.\r
-#\r
-# This library is distributed in the hope that it will be useful,\r
-# but WITHOUT ANY WARRANTY; without even the implied warranty of\r
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU\r
-# Lesser General Public License for more details.\r
-#\r
-# You should have received a copy of the GNU Lesser General Public\r
-# License along with this library; if not, write to the Free Software\r
-# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA\r
-# 02110-1301  USA\r
-######################### END LICENSE BLOCK #########################\r
-\r
-import sys\r
-from . import constants\r
-from .mbcharsetprober import MultiByteCharSetProber\r
-from .codingstatemachine import CodingStateMachine\r
-from .chardistribution import EUCJPDistributionAnalysis\r
-from .jpcntx import EUCJPContextAnalysis\r
-from .mbcssm import EUCJPSMModel\r
-\r
-\r
-class EUCJPProber(MultiByteCharSetProber):\r
-    def __init__(self):\r
-        MultiByteCharSetProber.__init__(self)\r
-        self._mCodingSM = CodingStateMachine(EUCJPSMModel)\r
-        self._mDistributionAnalyzer = EUCJPDistributionAnalysis()\r
-        self._mContextAnalyzer = EUCJPContextAnalysis()\r
-        self.reset()\r
-\r
-    def reset(self):\r
-        MultiByteCharSetProber.reset(self)\r
-        self._mContextAnalyzer.reset()\r
-\r
-    def get_charset_name(self):\r
-        return "EUC-JP"\r
-\r
-    def feed(self, aBuf):\r
-        aLen = len(aBuf)\r
-        for i in range(0, aLen):\r
-            # PY3K: aBuf is a byte array, so aBuf[i] is an int, not a byte\r
-            codingState = self._mCodingSM.next_state(aBuf[i])\r
-            if codingState == constants.eError:\r
-                if constants._debug:\r
-                    sys.stderr.write(self.get_charset_name()\r
-                                     + ' prober hit error at byte ' + str(i)\r
-                                     + '\n')\r
-                self._mState = constants.eNotMe\r
-                break\r
-            elif codingState == constants.eItsMe:\r
-                self._mState = constants.eFoundIt\r
-                break\r
-            elif codingState == constants.eStart:\r
-                charLen = self._mCodingSM.get_current_charlen()\r
-                if i == 0:\r
-                    self._mLastChar[1] = aBuf[0]\r
-                    self._mContextAnalyzer.feed(self._mLastChar, charLen)\r
-                    self._mDistributionAnalyzer.feed(self._mLastChar, charLen)\r
-                else:\r
-                    self._mContextAnalyzer.feed(aBuf[i - 1:i + 1], charLen)\r
-                    self._mDistributionAnalyzer.feed(aBuf[i - 1:i + 1],\r
-                                                     charLen)\r
-\r
-        self._mLastChar[0] = aBuf[aLen - 1]\r
-\r
-        if self.get_state() == constants.eDetecting:\r
-            if (self._mContextAnalyzer.got_enough_data() and\r
-               (self.get_confidence() > constants.SHORTCUT_THRESHOLD)):\r
-                self._mState = constants.eFoundIt\r
-\r
-        return self.get_state()\r
-\r
-    def get_confidence(self):\r
-        contxtCf = self._mContextAnalyzer.get_confidence()\r
-        distribCf = self._mDistributionAnalyzer.get_confidence()\r
-        return max(contxtCf, distribCf)\r
+######################## BEGIN LICENSE BLOCK ########################
+# The Original Code is mozilla.org code.
+#
+# The Initial Developer of the Original Code is
+# Netscape Communications Corporation.
+# Portions created by the Initial Developer are Copyright (C) 1998
+# the Initial Developer. All Rights Reserved.
+#
+# Contributor(s):
+#   Mark Pilgrim - port to Python
+#
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
+# 02110-1301  USA
+######################### END LICENSE BLOCK #########################
+
+import sys
+from . import constants
+from .mbcharsetprober import MultiByteCharSetProber
+from .codingstatemachine import CodingStateMachine
+from .chardistribution import EUCJPDistributionAnalysis
+from .jpcntx import EUCJPContextAnalysis
+from .mbcssm import EUCJPSMModel
+
+
+class EUCJPProber(MultiByteCharSetProber):
+    def __init__(self):
+        MultiByteCharSetProber.__init__(self)
+        self._mCodingSM = CodingStateMachine(EUCJPSMModel)
+        self._mDistributionAnalyzer = EUCJPDistributionAnalysis()
+        self._mContextAnalyzer = EUCJPContextAnalysis()
+        self.reset()
+
+    def reset(self):
+        MultiByteCharSetProber.reset(self)
+        self._mContextAnalyzer.reset()
+
+    def get_charset_name(self):
+        return "EUC-JP"
+
+    def feed(self, aBuf):
+        aLen = len(aBuf)
+        for i in range(0, aLen):
+            # PY3K: aBuf is a byte array, so aBuf[i] is an int, not a byte
+            codingState = self._mCodingSM.next_state(aBuf[i])
+            if codingState == constants.eError:
+                if constants._debug:
+                    sys.stderr.write(self.get_charset_name()
+                                     + ' prober hit error at byte ' + str(i)
+                                     + '\n')
+                self._mState = constants.eNotMe
+                break
+            elif codingState == constants.eItsMe:
+                self._mState = constants.eFoundIt
+                break
+            elif codingState == constants.eStart:
+                charLen = self._mCodingSM.get_current_charlen()
+                if i == 0:
+                    self._mLastChar[1] = aBuf[0]
+                    self._mContextAnalyzer.feed(self._mLastChar, charLen)
+                    self._mDistributionAnalyzer.feed(self._mLastChar, charLen)
+                else:
+                    self._mContextAnalyzer.feed(aBuf[i - 1:i + 1], charLen)
+                    self._mDistributionAnalyzer.feed(aBuf[i - 1:i + 1],
+                                                     charLen)
+
+        self._mLastChar[0] = aBuf[aLen - 1]
+
+        if self.get_state() == constants.eDetecting:
+            if (self._mContextAnalyzer.got_enough_data() and
+               (self.get_confidence() > constants.SHORTCUT_THRESHOLD)):
+                self._mState = constants.eFoundIt
+
+        return self.get_state()
+
+    def get_confidence(self):
+        contxtCf = self._mContextAnalyzer.get_confidence()
+        distribCf = self._mDistributionAnalyzer.get_confidence()
+        return max(contxtCf, distribCf)
similarity index 97%
rename from requests/packages/charade/euckrprober.py
rename to requests/packages/chardet/euckrprober.py
index def3e42..5982a46 100644 (file)
@@ -1,42 +1,42 @@
-######################## BEGIN LICENSE BLOCK ########################\r
-# The Original Code is mozilla.org code.\r
-#\r
-# The Initial Developer of the Original Code is\r
-# Netscape Communications Corporation.\r
-# Portions created by the Initial Developer are Copyright (C) 1998\r
-# the Initial Developer. All Rights Reserved.\r
-#\r
-# Contributor(s):\r
-#   Mark Pilgrim - port to Python\r
-#\r
-# This library is free software; you can redistribute it and/or\r
-# modify it under the terms of the GNU Lesser General Public\r
-# License as published by the Free Software Foundation; either\r
-# version 2.1 of the License, or (at your option) any later version.\r
-#\r
-# This library is distributed in the hope that it will be useful,\r
-# but WITHOUT ANY WARRANTY; without even the implied warranty of\r
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU\r
-# Lesser General Public License for more details.\r
-#\r
-# You should have received a copy of the GNU Lesser General Public\r
-# License along with this library; if not, write to the Free Software\r
-# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA\r
-# 02110-1301  USA\r
-######################### END LICENSE BLOCK #########################\r
-\r
-from .mbcharsetprober import MultiByteCharSetProber\r
-from .codingstatemachine import CodingStateMachine\r
-from .chardistribution import EUCKRDistributionAnalysis\r
-from .mbcssm import EUCKRSMModel\r
-\r
-\r
-class EUCKRProber(MultiByteCharSetProber):\r
-    def __init__(self):\r
-        MultiByteCharSetProber.__init__(self)\r
-        self._mCodingSM = CodingStateMachine(EUCKRSMModel)\r
-        self._mDistributionAnalyzer = EUCKRDistributionAnalysis()\r
-        self.reset()\r
-\r
-    def get_charset_name(self):\r
-        return "EUC-KR"\r
+######################## BEGIN LICENSE BLOCK ########################
+# The Original Code is mozilla.org code.
+#
+# The Initial Developer of the Original Code is
+# Netscape Communications Corporation.
+# Portions created by the Initial Developer are Copyright (C) 1998
+# the Initial Developer. All Rights Reserved.
+#
+# Contributor(s):
+#   Mark Pilgrim - port to Python
+#
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
+# 02110-1301  USA
+######################### END LICENSE BLOCK #########################
+
+from .mbcharsetprober import MultiByteCharSetProber
+from .codingstatemachine import CodingStateMachine
+from .chardistribution import EUCKRDistributionAnalysis
+from .mbcssm import EUCKRSMModel
+
+
+class EUCKRProber(MultiByteCharSetProber):
+    def __init__(self):
+        MultiByteCharSetProber.__init__(self)
+        self._mCodingSM = CodingStateMachine(EUCKRSMModel)
+        self._mDistributionAnalyzer = EUCKRDistributionAnalysis()
+        self.reset()
+
+    def get_charset_name(self):
+        return "EUC-KR"
similarity index 97%
rename from requests/packages/charade/euctwprober.py
rename to requests/packages/chardet/euctwprober.py
index e601adf..fe652fe 100644 (file)
@@ -1,41 +1,41 @@
-######################## BEGIN LICENSE BLOCK ########################\r
-# The Original Code is mozilla.org code.\r
-#\r
-# The Initial Developer of the Original Code is\r
-# Netscape Communications Corporation.\r
-# Portions created by the Initial Developer are Copyright (C) 1998\r
-# the Initial Developer. All Rights Reserved.\r
-#\r
-# Contributor(s):\r
-#   Mark Pilgrim - port to Python\r
-#\r
-# This library is free software; you can redistribute it and/or\r
-# modify it under the terms of the GNU Lesser General Public\r
-# License as published by the Free Software Foundation; either\r
-# version 2.1 of the License, or (at your option) any later version.\r
-# \r
-# This library is distributed in the hope that it will be useful,\r
-# but WITHOUT ANY WARRANTY; without even the implied warranty of\r
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU\r
-# Lesser General Public License for more details.\r
-# \r
-# You should have received a copy of the GNU Lesser General Public\r
-# License along with this library; if not, write to the Free Software\r
-# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA\r
-# 02110-1301  USA\r
-######################### END LICENSE BLOCK #########################\r
-\r
-from .mbcharsetprober import MultiByteCharSetProber\r
-from .codingstatemachine import CodingStateMachine\r
-from .chardistribution import EUCTWDistributionAnalysis\r
-from .mbcssm import EUCTWSMModel\r
-\r
-class EUCTWProber(MultiByteCharSetProber):\r
-    def __init__(self):\r
-        MultiByteCharSetProber.__init__(self)\r
-        self._mCodingSM = CodingStateMachine(EUCTWSMModel)\r
-        self._mDistributionAnalyzer = EUCTWDistributionAnalysis()\r
-        self.reset()\r
-\r
-    def get_charset_name(self):\r
-        return "EUC-TW"\r
+######################## BEGIN LICENSE BLOCK ########################
+# The Original Code is mozilla.org code.
+#
+# The Initial Developer of the Original Code is
+# Netscape Communications Corporation.
+# Portions created by the Initial Developer are Copyright (C) 1998
+# the Initial Developer. All Rights Reserved.
+#
+# Contributor(s):
+#   Mark Pilgrim - port to Python
+#
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+# 
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# Lesser General Public License for more details.
+# 
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
+# 02110-1301  USA
+######################### END LICENSE BLOCK #########################
+
+from .mbcharsetprober import MultiByteCharSetProber
+from .codingstatemachine import CodingStateMachine
+from .chardistribution import EUCTWDistributionAnalysis
+from .mbcssm import EUCTWSMModel
+
+class EUCTWProber(MultiByteCharSetProber):
+    def __init__(self):
+        MultiByteCharSetProber.__init__(self)
+        self._mCodingSM = CodingStateMachine(EUCTWSMModel)
+        self._mDistributionAnalyzer = EUCTWDistributionAnalysis()
+        self.reset()
+
+    def get_charset_name(self):
+        return "EUC-TW"
similarity index 97%
rename from requests/packages/charade/gb2312prober.py
rename to requests/packages/chardet/gb2312prober.py
index 643fe25..0325a2d 100644 (file)
@@ -1,41 +1,41 @@
-######################## BEGIN LICENSE BLOCK ########################\r
-# The Original Code is mozilla.org code.\r
-#\r
-# The Initial Developer of the Original Code is\r
-# Netscape Communications Corporation.\r
-# Portions created by the Initial Developer are Copyright (C) 1998\r
-# the Initial Developer. All Rights Reserved.\r
-#\r
-# Contributor(s):\r
-#   Mark Pilgrim - port to Python\r
-#\r
-# This library is free software; you can redistribute it and/or\r
-# modify it under the terms of the GNU Lesser General Public\r
-# License as published by the Free Software Foundation; either\r
-# version 2.1 of the License, or (at your option) any later version.\r
-# \r
-# This library is distributed in the hope that it will be useful,\r
-# but WITHOUT ANY WARRANTY; without even the implied warranty of\r
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU\r
-# Lesser General Public License for more details.\r
-# \r
-# You should have received a copy of the GNU Lesser General Public\r
-# License along with this library; if not, write to the Free Software\r
-# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA\r
-# 02110-1301  USA\r
-######################### END LICENSE BLOCK #########################\r
-\r
-from .mbcharsetprober import MultiByteCharSetProber\r
-from .codingstatemachine import CodingStateMachine\r
-from .chardistribution import GB2312DistributionAnalysis\r
-from .mbcssm import GB2312SMModel\r
-\r
-class GB2312Prober(MultiByteCharSetProber):\r
-    def __init__(self):\r
-        MultiByteCharSetProber.__init__(self)\r
-        self._mCodingSM = CodingStateMachine(GB2312SMModel)\r
-        self._mDistributionAnalyzer = GB2312DistributionAnalysis()\r
-        self.reset()\r
-\r
-    def get_charset_name(self):\r
-        return "GB2312"\r
+######################## BEGIN LICENSE BLOCK ########################
+# The Original Code is mozilla.org code.
+#
+# The Initial Developer of the Original Code is
+# Netscape Communications Corporation.
+# Portions created by the Initial Developer are Copyright (C) 1998
+# the Initial Developer. All Rights Reserved.
+#
+# Contributor(s):
+#   Mark Pilgrim - port to Python
+#
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+# 
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# Lesser General Public License for more details.
+# 
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
+# 02110-1301  USA
+######################### END LICENSE BLOCK #########################
+
+from .mbcharsetprober import MultiByteCharSetProber
+from .codingstatemachine import CodingStateMachine
+from .chardistribution import GB2312DistributionAnalysis
+from .mbcssm import GB2312SMModel
+
+class GB2312Prober(MultiByteCharSetProber):
+    def __init__(self):
+        MultiByteCharSetProber.__init__(self)
+        self._mCodingSM = CodingStateMachine(GB2312SMModel)
+        self._mDistributionAnalyzer = GB2312DistributionAnalysis()
+        self.reset()
+
+    def get_charset_name(self):
+        return "GB2312"
similarity index 97%
rename from requests/packages/charade/hebrewprober.py
rename to requests/packages/chardet/hebrewprober.py
index 90d171f..ba225c5 100644 (file)
-######################## BEGIN LICENSE BLOCK ########################\r
-# The Original Code is Mozilla Universal charset detector code.\r
-#\r
-# The Initial Developer of the Original Code is\r
-#          Shy Shalom\r
-# Portions created by the Initial Developer are Copyright (C) 2005\r
-# the Initial Developer. All Rights Reserved.\r
-#\r
-# Contributor(s):\r
-#   Mark Pilgrim - port to Python\r
-#\r
-# This library is free software; you can redistribute it and/or\r
-# modify it under the terms of the GNU Lesser General Public\r
-# License as published by the Free Software Foundation; either\r
-# version 2.1 of the License, or (at your option) any later version.\r
-#\r
-# This library is distributed in the hope that it will be useful,\r
-# but WITHOUT ANY WARRANTY; without even the implied warranty of\r
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU\r
-# Lesser General Public License for more details.\r
-#\r
-# You should have received a copy of the GNU Lesser General Public\r
-# License along with this library; if not, write to the Free Software\r
-# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA\r
-# 02110-1301  USA\r
-######################### END LICENSE BLOCK #########################\r
-\r
-from .charsetprober import CharSetProber\r
-from .constants import eNotMe, eDetecting\r
-from .compat import wrap_ord\r
-\r
-# This prober doesn't actually recognize a language or a charset.\r
-# It is a helper prober for the use of the Hebrew model probers\r
-\r
-### General ideas of the Hebrew charset recognition ###\r
-#\r
-# Four main charsets exist in Hebrew:\r
-# "ISO-8859-8" - Visual Hebrew\r
-# "windows-1255" - Logical Hebrew\r
-# "ISO-8859-8-I" - Logical Hebrew\r
-# "x-mac-hebrew" - ?? Logical Hebrew ??\r
-#\r
-# Both "ISO" charsets use a completely identical set of code points, whereas\r
-# "windows-1255" and "x-mac-hebrew" are two different proper supersets of\r
-# these code points. windows-1255 defines additional characters in the range\r
-# 0x80-0x9F as some misc punctuation marks as well as some Hebrew-specific\r
-# diacritics and additional 'Yiddish' ligature letters in the range 0xc0-0xd6.\r
-# x-mac-hebrew defines similar additional code points but with a different\r
-# mapping.\r
-#\r
-# As far as an average Hebrew text with no diacritics is concerned, all four\r
-# charsets are identical with respect to code points. Meaning that for the\r
-# main Hebrew alphabet, all four map the same values to all 27 Hebrew letters\r
-# (including final letters).\r
-#\r
-# The dominant difference between these charsets is their directionality.\r
-# "Visual" directionality means that the text is ordered as if the renderer is\r
-# not aware of a BIDI rendering algorithm. The renderer sees the text and\r
-# draws it from left to right. The text itself when ordered naturally is read\r
-# backwards. A buffer of Visual Hebrew generally looks like so:\r
-# "[last word of first line spelled backwards] [whole line ordered backwards\r
-# and spelled backwards] [first word of first line spelled backwards]\r
-# [end of line] [last word of second line] ... etc' "\r
-# adding punctuation marks, numbers and English text to visual text is\r
-# naturally also "visual" and from left to right.\r
-#\r
-# "Logical" directionality means the text is ordered "naturally" according to\r
-# the order it is read. It is the responsibility of the renderer to display\r
-# the text from right to left. A BIDI algorithm is used to place general\r
-# punctuation marks, numbers and English text in the text.\r
-#\r
-# Texts in x-mac-hebrew are almost impossible to find on the Internet. From\r
-# what little evidence I could find, it seems that its general directionality\r
-# is Logical.\r
-#\r
-# To sum up all of the above, the Hebrew probing mechanism knows about two\r
-# charsets:\r
-# Visual Hebrew - "ISO-8859-8" - backwards text - Words and sentences are\r
-#    backwards while line order is natural. For charset recognition purposes\r
-#    the line order is unimportant (In fact, for this implementation, even\r
-#    word order is unimportant).\r
-# Logical Hebrew - "windows-1255" - normal, naturally ordered text.\r
-#\r
-# "ISO-8859-8-I" is a subset of windows-1255 and doesn't need to be\r
-#    specifically identified.\r
-# "x-mac-hebrew" is also identified as windows-1255. A text in x-mac-hebrew\r
-#    that contain special punctuation marks or diacritics is displayed with\r
-#    some unconverted characters showing as question marks. This problem might\r
-#    be corrected using another model prober for x-mac-hebrew. Due to the fact\r
-#    that x-mac-hebrew texts are so rare, writing another model prober isn't\r
-#    worth the effort and performance hit.\r
-#\r
-#### The Prober ####\r
-#\r
-# The prober is divided between two SBCharSetProbers and a HebrewProber,\r
-# all of which are managed, created, fed data, inquired and deleted by the\r
-# SBCSGroupProber. The two SBCharSetProbers identify that the text is in\r
-# fact some kind of Hebrew, Logical or Visual. The final decision about which\r
-# one is it is made by the HebrewProber by combining final-letter scores\r
-# with the scores of the two SBCharSetProbers to produce a final answer.\r
-#\r
-# The SBCSGroupProber is responsible for stripping the original text of HTML\r
-# tags, English characters, numbers, low-ASCII punctuation characters, spaces\r
-# and new lines. It reduces any sequence of such characters to a single space.\r
-# The buffer fed to each prober in the SBCS group prober is pure text in\r
-# high-ASCII.\r
-# The two SBCharSetProbers (model probers) share the same language model:\r
-# Win1255Model.\r
-# The first SBCharSetProber uses the model normally as any other\r
-# SBCharSetProber does, to recognize windows-1255, upon which this model was\r
-# built. The second SBCharSetProber is told to make the pair-of-letter\r
-# lookup in the language model backwards. This in practice exactly simulates\r
-# a visual Hebrew model using the windows-1255 logical Hebrew model.\r
-#\r
-# The HebrewProber is not using any language model. All it does is look for\r
-# final-letter evidence suggesting the text is either logical Hebrew or visual\r
-# Hebrew. Disjointed from the model probers, the results of the HebrewProber\r
-# alone are meaningless. HebrewProber always returns 0.00 as confidence\r
-# since it never identifies a charset by itself. Instead, the pointer to the\r
-# HebrewProber is passed to the model probers as a helper "Name Prober".\r
-# When the Group prober receives a positive identification from any prober,\r
-# it asks for the name of the charset identified. If the prober queried is a\r
-# Hebrew model prober, the model prober forwards the call to the\r
-# HebrewProber to make the final decision. In the HebrewProber, the\r
-# decision is made according to the final-letters scores maintained and Both\r
-# model probers scores. The answer is returned in the form of the name of the\r
-# charset identified, either "windows-1255" or "ISO-8859-8".\r
-\r
-# windows-1255 / ISO-8859-8 code points of interest\r
-FINAL_KAF = 0xea\r
-NORMAL_KAF = 0xeb\r
-FINAL_MEM = 0xed\r
-NORMAL_MEM = 0xee\r
-FINAL_NUN = 0xef\r
-NORMAL_NUN = 0xf0\r
-FINAL_PE = 0xf3\r
-NORMAL_PE = 0xf4\r
-FINAL_TSADI = 0xf5\r
-NORMAL_TSADI = 0xf6\r
-\r
-# Minimum Visual vs Logical final letter score difference.\r
-# If the difference is below this, don't rely solely on the final letter score\r
-# distance.\r
-MIN_FINAL_CHAR_DISTANCE = 5\r
-\r
-# Minimum Visual vs Logical model score difference.\r
-# If the difference is below this, don't rely at all on the model score\r
-# distance.\r
-MIN_MODEL_DISTANCE = 0.01\r
-\r
-VISUAL_HEBREW_NAME = "ISO-8859-8"\r
-LOGICAL_HEBREW_NAME = "windows-1255"\r
-\r
-\r
-class HebrewProber(CharSetProber):\r
-    def __init__(self):\r
-        CharSetProber.__init__(self)\r
-        self._mLogicalProber = None\r
-        self._mVisualProber = None\r
-        self.reset()\r
-\r
-    def reset(self):\r
-        self._mFinalCharLogicalScore = 0\r
-        self._mFinalCharVisualScore = 0\r
-        # The two last characters seen in the previous buffer,\r
-        # mPrev and mBeforePrev are initialized to space in order to simulate\r
-        # a word delimiter at the beginning of the data\r
-        self._mPrev = ' '\r
-        self._mBeforePrev = ' '\r
-        # These probers are owned by the group prober.\r
-\r
-    def set_model_probers(self, logicalProber, visualProber):\r
-        self._mLogicalProber = logicalProber\r
-        self._mVisualProber = visualProber\r
-\r
-    def is_final(self, c):\r
-        return wrap_ord(c) in [FINAL_KAF, FINAL_MEM, FINAL_NUN, FINAL_PE,\r
-                               FINAL_TSADI]\r
-\r
-    def is_non_final(self, c):\r
-        # The normal Tsadi is not a good Non-Final letter due to words like\r
-        # 'lechotet' (to chat) containing an apostrophe after the tsadi. This\r
-        # apostrophe is converted to a space in FilterWithoutEnglishLetters\r
-        # causing the Non-Final tsadi to appear at an end of a word even\r
-        # though this is not the case in the original text.\r
-        # The letters Pe and Kaf rarely display a related behavior of not being\r
-        # a good Non-Final letter. Words like 'Pop', 'Winamp' and 'Mubarak'\r
-        # for example legally end with a Non-Final Pe or Kaf. However, the\r
-        # benefit of these letters as Non-Final letters outweighs the damage\r
-        # since these words are quite rare.\r
-        return wrap_ord(c) in [NORMAL_KAF, NORMAL_MEM, NORMAL_NUN, NORMAL_PE]\r
-\r
-    def feed(self, aBuf):\r
-        # Final letter analysis for logical-visual decision.\r
-        # Look for evidence that the received buffer is either logical Hebrew\r
-        # or visual Hebrew.\r
-        # The following cases are checked:\r
-        # 1) A word longer than 1 letter, ending with a final letter. This is\r
-        #    an indication that the text is laid out "naturally" since the\r
-        #    final letter really appears at the end. +1 for logical score.\r
-        # 2) A word longer than 1 letter, ending with a Non-Final letter. In\r
-        #    normal Hebrew, words ending with Kaf, Mem, Nun, Pe or Tsadi,\r
-        #    should not end with the Non-Final form of that letter. Exceptions\r
-        #    to this rule are mentioned above in isNonFinal(). This is an\r
-        #    indication that the text is laid out backwards. +1 for visual\r
-        #    score\r
-        # 3) A word longer than 1 letter, starting with a final letter. Final\r
-        #    letters should not appear at the beginning of a word. This is an\r
-        #    indication that the text is laid out backwards. +1 for visual\r
-        #    score.\r
-        #\r
-        # The visual score and logical score are accumulated throughout the\r
-        # text and are finally checked against each other in GetCharSetName().\r
-        # No checking for final letters in the middle of words is done since\r
-        # that case is not an indication for either Logical or Visual text.\r
-        #\r
-        # We automatically filter out all 7-bit characters (replace them with\r
-        # spaces) so the word boundary detection works properly. [MAP]\r
-\r
-        if self.get_state() == eNotMe:\r
-            # Both model probers say it's not them. No reason to continue.\r
-            return eNotMe\r
-\r
-        aBuf = self.filter_high_bit_only(aBuf)\r
-\r
-        for cur in aBuf:\r
-            if cur == ' ':\r
-                # We stand on a space - a word just ended\r
-                if self._mBeforePrev != ' ':\r
-                    # next-to-last char was not a space so self._mPrev is not a\r
-                    # 1 letter word\r
-                    if self.is_final(self._mPrev):\r
-                        # case (1) [-2:not space][-1:final letter][cur:space]\r
-                        self._mFinalCharLogicalScore += 1\r
-                    elif self.is_non_final(self._mPrev):\r
-                        # case (2) [-2:not space][-1:Non-Final letter][\r
-                        #  cur:space]\r
-                        self._mFinalCharVisualScore += 1\r
-            else:\r
-                # Not standing on a space\r
-                if ((self._mBeforePrev == ' ') and\r
-                        (self.is_final(self._mPrev)) and (cur != ' ')):\r
-                    # case (3) [-2:space][-1:final letter][cur:not space]\r
-                    self._mFinalCharVisualScore += 1\r
-            self._mBeforePrev = self._mPrev\r
-            self._mPrev = cur\r
-\r
-        # Forever detecting, till the end or until both model probers return\r
-        # eNotMe (handled above)\r
-        return eDetecting\r
-\r
-    def get_charset_name(self):\r
-        # Make the decision: is it Logical or Visual?\r
-        # If the final letter score distance is dominant enough, rely on it.\r
-        finalsub = self._mFinalCharLogicalScore - self._mFinalCharVisualScore\r
-        if finalsub >= MIN_FINAL_CHAR_DISTANCE:\r
-            return LOGICAL_HEBREW_NAME\r
-        if finalsub <= -MIN_FINAL_CHAR_DISTANCE:\r
-            return VISUAL_HEBREW_NAME\r
-\r
-        # It's not dominant enough, try to rely on the model scores instead.\r
-        modelsub = (self._mLogicalProber.get_confidence()\r
-                    - self._mVisualProber.get_confidence())\r
-        if modelsub > MIN_MODEL_DISTANCE:\r
-            return LOGICAL_HEBREW_NAME\r
-        if modelsub < -MIN_MODEL_DISTANCE:\r
-            return VISUAL_HEBREW_NAME\r
-\r
-        # Still no good, back to final letter distance, maybe it'll save the\r
-        # day.\r
-        if finalsub < 0.0:\r
-            return VISUAL_HEBREW_NAME\r
-\r
-        # (finalsub > 0 - Logical) or (don't know what to do) default to\r
-        # Logical.\r
-        return LOGICAL_HEBREW_NAME\r
-\r
-    def get_state(self):\r
-        # Remain active as long as any of the model probers are active.\r
-        if (self._mLogicalProber.get_state() == eNotMe) and \\r
-           (self._mVisualProber.get_state() == eNotMe):\r
-            return eNotMe\r
-        return eDetecting\r
+######################## BEGIN LICENSE BLOCK ########################
+# The Original Code is Mozilla Universal charset detector code.
+#
+# The Initial Developer of the Original Code is
+#          Shy Shalom
+# Portions created by the Initial Developer are Copyright (C) 2005
+# the Initial Developer. All Rights Reserved.
+#
+# Contributor(s):
+#   Mark Pilgrim - port to Python
+#
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
+# 02110-1301  USA
+######################### END LICENSE BLOCK #########################
+
+from .charsetprober import CharSetProber
+from .constants import eNotMe, eDetecting
+from .compat import wrap_ord
+
+# This prober doesn't actually recognize a language or a charset.
+# It is a helper prober for the use of the Hebrew model probers
+
+### General ideas of the Hebrew charset recognition ###
+#
+# Four main charsets exist in Hebrew:
+# "ISO-8859-8" - Visual Hebrew
+# "windows-1255" - Logical Hebrew
+# "ISO-8859-8-I" - Logical Hebrew
+# "x-mac-hebrew" - ?? Logical Hebrew ??
+#
+# Both "ISO" charsets use a completely identical set of code points, whereas
+# "windows-1255" and "x-mac-hebrew" are two different proper supersets of
+# these code points. windows-1255 defines additional characters in the range
+# 0x80-0x9F as some misc punctuation marks as well as some Hebrew-specific
+# diacritics and additional 'Yiddish' ligature letters in the range 0xc0-0xd6.
+# x-mac-hebrew defines similar additional code points but with a different
+# mapping.
+#
+# As far as an average Hebrew text with no diacritics is concerned, all four
+# charsets are identical with respect to code points. Meaning that for the
+# main Hebrew alphabet, all four map the same values to all 27 Hebrew letters
+# (including final letters).
+#
+# The dominant difference between these charsets is their directionality.
+# "Visual" directionality means that the text is ordered as if the renderer is
+# not aware of a BIDI rendering algorithm. The renderer sees the text and
+# draws it from left to right. The text itself when ordered naturally is read
+# backwards. A buffer of Visual Hebrew generally looks like so:
+# "[last word of first line spelled backwards] [whole line ordered backwards
+# and spelled backwards] [first word of first line spelled backwards]
+# [end of line] [last word of second line] ... etc' "
+# adding punctuation marks, numbers and English text to visual text is
+# naturally also "visual" and from left to right.
+#
+# "Logical" directionality means the text is ordered "naturally" according to
+# the order it is read. It is the responsibility of the renderer to display
+# the text from right to left. A BIDI algorithm is used to place general
+# punctuation marks, numbers and English text in the text.
+#
+# Texts in x-mac-hebrew are almost impossible to find on the Internet. From
+# what little evidence I could find, it seems that its general directionality
+# is Logical.
+#
+# To sum up all of the above, the Hebrew probing mechanism knows about two
+# charsets:
+# Visual Hebrew - "ISO-8859-8" - backwards text - Words and sentences are
+#    backwards while line order is natural. For charset recognition purposes
+#    the line order is unimportant (In fact, for this implementation, even
+#    word order is unimportant).
+# Logical Hebrew - "windows-1255" - normal, naturally ordered text.
+#
+# "ISO-8859-8-I" is a subset of windows-1255 and doesn't need to be
+#    specifically identified.
+# "x-mac-hebrew" is also identified as windows-1255. A text in x-mac-hebrew
+#    that contain special punctuation marks or diacritics is displayed with
+#    some unconverted characters showing as question marks. This problem might
+#    be corrected using another model prober for x-mac-hebrew. Due to the fact
+#    that x-mac-hebrew texts are so rare, writing another model prober isn't
+#    worth the effort and performance hit.
+#
+#### The Prober ####
+#
+# The prober is divided between two SBCharSetProbers and a HebrewProber,
+# all of which are managed, created, fed data, inquired and deleted by the
+# SBCSGroupProber. The two SBCharSetProbers identify that the text is in
+# fact some kind of Hebrew, Logical or Visual. The final decision about which
+# one is it is made by the HebrewProber by combining final-letter scores
+# with the scores of the two SBCharSetProbers to produce a final answer.
+#
+# The SBCSGroupProber is responsible for stripping the original text of HTML
+# tags, English characters, numbers, low-ASCII punctuation characters, spaces
+# and new lines. It reduces any sequence of such characters to a single space.
+# The buffer fed to each prober in the SBCS group prober is pure text in
+# high-ASCII.
+# The two SBCharSetProbers (model probers) share the same language model:
+# Win1255Model.
+# The first SBCharSetProber uses the model normally as any other
+# SBCharSetProber does, to recognize windows-1255, upon which this model was
+# built. The second SBCharSetProber is told to make the pair-of-letter
+# lookup in the language model backwards. This in practice exactly simulates
+# a visual Hebrew model using the windows-1255 logical Hebrew model.
+#
+# The HebrewProber is not using any language model. All it does is look for
+# final-letter evidence suggesting the text is either logical Hebrew or visual
+# Hebrew. Disjointed from the model probers, the results of the HebrewProber
+# alone are meaningless. HebrewProber always returns 0.00 as confidence
+# since it never identifies a charset by itself. Instead, the pointer to the
+# HebrewProber is passed to the model probers as a helper "Name Prober".
+# When the Group prober receives a positive identification from any prober,
+# it asks for the name of the charset identified. If the prober queried is a
+# Hebrew model prober, the model prober forwards the call to the
+# HebrewProber to make the final decision. In the HebrewProber, the
+# decision is made according to the final-letters scores maintained and Both
+# model probers scores. The answer is returned in the form of the name of the
+# charset identified, either "windows-1255" or "ISO-8859-8".
+
+# windows-1255 / ISO-8859-8 code points of interest
+FINAL_KAF = 0xea
+NORMAL_KAF = 0xeb
+FINAL_MEM = 0xed
+NORMAL_MEM = 0xee
+FINAL_NUN = 0xef
+NORMAL_NUN = 0xf0
+FINAL_PE = 0xf3
+NORMAL_PE = 0xf4
+FINAL_TSADI = 0xf5
+NORMAL_TSADI = 0xf6
+
+# Minimum Visual vs Logical final letter score difference.
+# If the difference is below this, don't rely solely on the final letter score
+# distance.
+MIN_FINAL_CHAR_DISTANCE = 5
+
+# Minimum Visual vs Logical model score difference.
+# If the difference is below this, don't rely at all on the model score
+# distance.
+MIN_MODEL_DISTANCE = 0.01
+
+VISUAL_HEBREW_NAME = "ISO-8859-8"
+LOGICAL_HEBREW_NAME = "windows-1255"
+
+
+class HebrewProber(CharSetProber):
+    def __init__(self):
+        CharSetProber.__init__(self)
+        self._mLogicalProber = None
+        self._mVisualProber = None
+        self.reset()
+
+    def reset(self):
+        self._mFinalCharLogicalScore = 0
+        self._mFinalCharVisualScore = 0
+        # The two last characters seen in the previous buffer,
+        # mPrev and mBeforePrev are initialized to space in order to simulate
+        # a word delimiter at the beginning of the data
+        self._mPrev = ' '
+        self._mBeforePrev = ' '
+        # These probers are owned by the group prober.
+
+    def set_model_probers(self, logicalProber, visualProber):
+        self._mLogicalProber = logicalProber
+        self._mVisualProber = visualProber
+
+    def is_final(self, c):
+        return wrap_ord(c) in [FINAL_KAF, FINAL_MEM, FINAL_NUN, FINAL_PE,
+                               FINAL_TSADI]
+
+    def is_non_final(self, c):
+        # The normal Tsadi is not a good Non-Final letter due to words like
+        # 'lechotet' (to chat) containing an apostrophe after the tsadi. This
+        # apostrophe is converted to a space in FilterWithoutEnglishLetters
+        # causing the Non-Final tsadi to appear at an end of a word even
+        # though this is not the case in the original text.
+        # The letters Pe and Kaf rarely display a related behavior of not being
+        # a good Non-Final letter. Words like 'Pop', 'Winamp' and 'Mubarak'
+        # for example legally end with a Non-Final Pe or Kaf. However, the
+        # benefit of these letters as Non-Final letters outweighs the damage
+        # since these words are quite rare.
+        return wrap_ord(c) in [NORMAL_KAF, NORMAL_MEM, NORMAL_NUN, NORMAL_PE]
+
+    def feed(self, aBuf):
+        # Final letter analysis for logical-visual decision.
+        # Look for evidence that the received buffer is either logical Hebrew
+        # or visual Hebrew.
+        # The following cases are checked:
+        # 1) A word longer than 1 letter, ending with a final letter. This is
+        #    an indication that the text is laid out "naturally" since the
+        #    final letter really appears at the end. +1 for logical score.
+        # 2) A word longer than 1 letter, ending with a Non-Final letter. In
+        #    normal Hebrew, words ending with Kaf, Mem, Nun, Pe or Tsadi,
+        #    should not end with the Non-Final form of that letter. Exceptions
+        #    to this rule are mentioned above in isNonFinal(). This is an
+        #    indication that the text is laid out backwards. +1 for visual
+        #    score
+        # 3) A word longer than 1 letter, starting with a final letter. Final
+        #    letters should not appear at the beginning of a word. This is an
+        #    indication that the text is laid out backwards. +1 for visual
+        #    score.
+        #
+        # The visual score and logical score are accumulated throughout the
+        # text and are finally checked against each other in GetCharSetName().
+        # No checking for final letters in the middle of words is done since
+        # that case is not an indication for either Logical or Visual text.
+        #
+        # We automatically filter out all 7-bit characters (replace them with
+        # spaces) so the word boundary detection works properly. [MAP]
+
+        if self.get_state() == eNotMe:
+            # Both model probers say it's not them. No reason to continue.
+            return eNotMe
+
+        aBuf = self.filter_high_bit_only(aBuf)
+
+        for cur in aBuf:
+            if cur == ' ':
+                # We stand on a space - a word just ended
+                if self._mBeforePrev != ' ':
+                    # next-to-last char was not a space so self._mPrev is not a
+                    # 1 letter word
+                    if self.is_final(self._mPrev):
+                        # case (1) [-2:not space][-1:final letter][cur:space]
+                        self._mFinalCharLogicalScore += 1
+                    elif self.is_non_final(self._mPrev):
+                        # case (2) [-2:not space][-1:Non-Final letter][
+                        #  cur:space]
+                        self._mFinalCharVisualScore += 1
+            else:
+                # Not standing on a space
+                if ((self._mBeforePrev == ' ') and
+                        (self.is_final(self._mPrev)) and (cur != ' ')):
+                    # case (3) [-2:space][-1:final letter][cur:not space]
+                    self._mFinalCharVisualScore += 1
+            self._mBeforePrev = self._mPrev
+            self._mPrev = cur
+
+        # Forever detecting, till the end or until both model probers return
+        # eNotMe (handled above)
+        return eDetecting
+
+    def get_charset_name(self):
+        # Make the decision: is it Logical or Visual?
+        # If the final letter score distance is dominant enough, rely on it.
+        finalsub = self._mFinalCharLogicalScore - self._mFinalCharVisualScore
+        if finalsub >= MIN_FINAL_CHAR_DISTANCE:
+            return LOGICAL_HEBREW_NAME
+        if finalsub <= -MIN_FINAL_CHAR_DISTANCE:
+            return VISUAL_HEBREW_NAME
+
+        # It's not dominant enough, try to rely on the model scores instead.
+        modelsub = (self._mLogicalProber.get_confidence()
+                    - self._mVisualProber.get_confidence())
+        if modelsub > MIN_MODEL_DISTANCE:
+            return LOGICAL_HEBREW_NAME
+        if modelsub < -MIN_MODEL_DISTANCE:
+            return VISUAL_HEBREW_NAME
+
+        # Still no good, back to final letter distance, maybe it'll save the
+        # day.
+        if finalsub < 0.0:
+            return VISUAL_HEBREW_NAME
+
+        # (finalsub > 0 - Logical) or (don't know what to do) default to
+        # Logical.
+        return LOGICAL_HEBREW_NAME
+
+    def get_state(self):
+        # Remain active as long as any of the model probers are active.
+        if (self._mLogicalProber.get_state() == eNotMe) and \
+           (self._mVisualProber.get_state() == eNotMe):
+            return eNotMe
+        return eDetecting
similarity index 98%
rename from requests/packages/charade/jpcntx.py
rename to requests/packages/chardet/jpcntx.py
index e4e9e4d..f7f69ba 100644 (file)
-######################## BEGIN LICENSE BLOCK ########################\r
-# The Original Code is Mozilla Communicator client code.\r
-#\r
-# The Initial Developer of the Original Code is\r
-# Netscape Communications Corporation.\r
-# Portions created by the Initial Developer are Copyright (C) 1998\r
-# the Initial Developer. All Rights Reserved.\r
-#\r
-# Contributor(s):\r
-#   Mark Pilgrim - port to Python\r
-#\r
-# This library is free software; you can redistribute it and/or\r
-# modify it under the terms of the GNU Lesser General Public\r
-# License as published by the Free Software Foundation; either\r
-# version 2.1 of the License, or (at your option) any later version.\r
-#\r
-# This library is distributed in the hope that it will be useful,\r
-# but WITHOUT ANY WARRANTY; without even the implied warranty of\r
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU\r
-# Lesser General Public License for more details.\r
-#\r
-# You should have received a copy of the GNU Lesser General Public\r
-# License along with this library; if not, write to the Free Software\r
-# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA\r
-# 02110-1301  USA\r
-######################### END LICENSE BLOCK #########################\r
-\r
-from .compat import wrap_ord\r
-\r
-NUM_OF_CATEGORY = 6\r
-DONT_KNOW = -1\r
-ENOUGH_REL_THRESHOLD = 100\r
-MAX_REL_THRESHOLD = 1000\r
-MINIMUM_DATA_THRESHOLD = 4\r
-\r
-# This is hiragana 2-char sequence table, the number in each cell represents its frequency category\r
-jp2CharContext = (\r
-(0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1),\r
-(2,4,0,4,0,3,0,4,0,3,4,4,4,2,4,3,3,4,3,2,3,3,4,2,3,3,3,2,4,1,4,3,3,1,5,4,3,4,3,4,3,5,3,0,3,5,4,2,0,3,1,0,3,3,0,3,3,0,1,1,0,4,3,0,3,3,0,4,0,2,0,3,5,5,5,5,4,0,4,1,0,3,4),\r
-(0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2),\r
-(0,4,0,5,0,5,0,4,0,4,5,4,4,3,5,3,5,1,5,3,4,3,4,4,3,4,3,3,4,3,5,4,4,3,5,5,3,5,5,5,3,5,5,3,4,5,5,3,1,3,2,0,3,4,0,4,2,0,4,2,1,5,3,2,3,5,0,4,0,2,0,5,4,4,5,4,5,0,4,0,0,4,4),\r
-(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0),\r
-(0,3,0,4,0,3,0,3,0,4,5,4,3,3,3,3,4,3,5,4,4,3,5,4,4,3,4,3,4,4,4,4,5,3,4,4,3,4,5,5,4,5,5,1,4,5,4,3,0,3,3,1,3,3,0,4,4,0,3,3,1,5,3,3,3,5,0,4,0,3,0,4,4,3,4,3,3,0,4,1,1,3,4),\r
-(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0),\r
-(0,4,0,3,0,3,0,4,0,3,4,4,3,2,2,1,2,1,3,1,3,3,3,3,3,4,3,1,3,3,5,3,3,0,4,3,0,5,4,3,3,5,4,4,3,4,4,5,0,1,2,0,1,2,0,2,2,0,1,0,0,5,2,2,1,4,0,3,0,1,0,4,4,3,5,4,3,0,2,1,0,4,3),\r
-(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0),\r
-(0,3,0,5,0,4,0,2,1,4,4,2,4,1,4,2,4,2,4,3,3,3,4,3,3,3,3,1,4,2,3,3,3,1,4,4,1,1,1,4,3,3,2,0,2,4,3,2,0,3,3,0,3,1,1,0,0,0,3,3,0,4,2,2,3,4,0,4,0,3,0,4,4,5,3,4,4,0,3,0,0,1,4),\r
-(1,4,0,4,0,4,0,4,0,3,5,4,4,3,4,3,5,4,3,3,4,3,5,4,4,4,4,3,4,2,4,3,3,1,5,4,3,2,4,5,4,5,5,4,4,5,4,4,0,3,2,2,3,3,0,4,3,1,3,2,1,4,3,3,4,5,0,3,0,2,0,4,5,5,4,5,4,0,4,0,0,5,4),\r
-(0,5,0,5,0,4,0,3,0,4,4,3,4,3,3,3,4,0,4,4,4,3,4,3,4,3,3,1,4,2,4,3,4,0,5,4,1,4,5,4,4,5,3,2,4,3,4,3,2,4,1,3,3,3,2,3,2,0,4,3,3,4,3,3,3,4,0,4,0,3,0,4,5,4,4,4,3,0,4,1,0,1,3),\r
-(0,3,1,4,0,3,0,2,0,3,4,4,3,1,4,2,3,3,4,3,4,3,4,3,4,4,3,2,3,1,5,4,4,1,4,4,3,5,4,4,3,5,5,4,3,4,4,3,1,2,3,1,2,2,0,3,2,0,3,1,0,5,3,3,3,4,3,3,3,3,4,4,4,4,5,4,2,0,3,3,2,4,3),\r
-(0,2,0,3,0,1,0,1,0,0,3,2,0,0,2,0,1,0,2,1,3,3,3,1,2,3,1,0,1,0,4,2,1,1,3,3,0,4,3,3,1,4,3,3,0,3,3,2,0,0,0,0,1,0,0,2,0,0,0,0,0,4,1,0,2,3,2,2,2,1,3,3,3,4,4,3,2,0,3,1,0,3,3),\r
-(0,4,0,4,0,3,0,3,0,4,4,4,3,3,3,3,3,3,4,3,4,2,4,3,4,3,3,2,4,3,4,5,4,1,4,5,3,5,4,5,3,5,4,0,3,5,5,3,1,3,3,2,2,3,0,3,4,1,3,3,2,4,3,3,3,4,0,4,0,3,0,4,5,4,4,5,3,0,4,1,0,3,4),\r
-(0,2,0,3,0,3,0,0,0,2,2,2,1,0,1,0,0,0,3,0,3,0,3,0,1,3,1,0,3,1,3,3,3,1,3,3,3,0,1,3,1,3,4,0,0,3,1,1,0,3,2,0,0,0,0,1,3,0,1,0,0,3,3,2,0,3,0,0,0,0,0,3,4,3,4,3,3,0,3,0,0,2,3),\r
-(2,3,0,3,0,2,0,1,0,3,3,4,3,1,3,1,1,1,3,1,4,3,4,3,3,3,0,0,3,1,5,4,3,1,4,3,2,5,5,4,4,4,4,3,3,4,4,4,0,2,1,1,3,2,0,1,2,0,0,1,0,4,1,3,3,3,0,3,0,1,0,4,4,4,5,5,3,0,2,0,0,4,4),\r
-(0,2,0,1,0,3,1,3,0,2,3,3,3,0,3,1,0,0,3,0,3,2,3,1,3,2,1,1,0,0,4,2,1,0,2,3,1,4,3,2,0,4,4,3,1,3,1,3,0,1,0,0,1,0,0,0,1,0,0,0,0,4,1,1,1,2,0,3,0,0,0,3,4,2,4,3,2,0,1,0,0,3,3),\r
-(0,1,0,4,0,5,0,4,0,2,4,4,2,3,3,2,3,3,5,3,3,3,4,3,4,2,3,0,4,3,3,3,4,1,4,3,2,1,5,5,3,4,5,1,3,5,4,2,0,3,3,0,1,3,0,4,2,0,1,3,1,4,3,3,3,3,0,3,0,1,0,3,4,4,4,5,5,0,3,0,1,4,5),\r
-(0,2,0,3,0,3,0,0,0,2,3,1,3,0,4,0,1,1,3,0,3,4,3,2,3,1,0,3,3,2,3,1,3,0,2,3,0,2,1,4,1,2,2,0,0,3,3,0,0,2,0,0,0,1,0,0,0,0,2,2,0,3,2,1,3,3,0,2,0,2,0,0,3,3,1,2,4,0,3,0,2,2,3),\r
-(2,4,0,5,0,4,0,4,0,2,4,4,4,3,4,3,3,3,1,2,4,3,4,3,4,4,5,0,3,3,3,3,2,0,4,3,1,4,3,4,1,4,4,3,3,4,4,3,1,2,3,0,4,2,0,4,1,0,3,3,0,4,3,3,3,4,0,4,0,2,0,3,5,3,4,5,2,0,3,0,0,4,5),\r
-(0,3,0,4,0,1,0,1,0,1,3,2,2,1,3,0,3,0,2,0,2,0,3,0,2,0,0,0,1,0,1,1,0,0,3,1,0,0,0,4,0,3,1,0,2,1,3,0,0,0,0,0,0,3,0,0,0,0,0,0,0,4,2,2,3,1,0,3,0,0,0,1,4,4,4,3,0,0,4,0,0,1,4),\r
-(1,4,1,5,0,3,0,3,0,4,5,4,4,3,5,3,3,4,4,3,4,1,3,3,3,3,2,1,4,1,5,4,3,1,4,4,3,5,4,4,3,5,4,3,3,4,4,4,0,3,3,1,2,3,0,3,1,0,3,3,0,5,4,4,4,4,4,4,3,3,5,4,4,3,3,5,4,0,3,2,0,4,4),\r
-(0,2,0,3,0,1,0,0,0,1,3,3,3,2,4,1,3,0,3,1,3,0,2,2,1,1,0,0,2,0,4,3,1,0,4,3,0,4,4,4,1,4,3,1,1,3,3,1,0,2,0,0,1,3,0,0,0,0,2,0,0,4,3,2,4,3,5,4,3,3,3,4,3,3,4,3,3,0,2,1,0,3,3),\r
-(0,2,0,4,0,3,0,2,0,2,5,5,3,4,4,4,4,1,4,3,3,0,4,3,4,3,1,3,3,2,4,3,0,3,4,3,0,3,4,4,2,4,4,0,4,5,3,3,2,2,1,1,1,2,0,1,5,0,3,3,2,4,3,3,3,4,0,3,0,2,0,4,4,3,5,5,0,0,3,0,2,3,3),\r
-(0,3,0,4,0,3,0,1,0,3,4,3,3,1,3,3,3,0,3,1,3,0,4,3,3,1,1,0,3,0,3,3,0,0,4,4,0,1,5,4,3,3,5,0,3,3,4,3,0,2,0,1,1,1,0,1,3,0,1,2,1,3,3,2,3,3,0,3,0,1,0,1,3,3,4,4,1,0,1,2,2,1,3),\r
-(0,1,0,4,0,4,0,3,0,1,3,3,3,2,3,1,1,0,3,0,3,3,4,3,2,4,2,0,1,0,4,3,2,0,4,3,0,5,3,3,2,4,4,4,3,3,3,4,0,1,3,0,0,1,0,0,1,0,0,0,0,4,2,3,3,3,0,3,0,0,0,4,4,4,5,3,2,0,3,3,0,3,5),\r
-(0,2,0,3,0,0,0,3,0,1,3,0,2,0,0,0,1,0,3,1,1,3,3,0,0,3,0,0,3,0,2,3,1,0,3,1,0,3,3,2,0,4,2,2,0,2,0,0,0,4,0,0,0,0,0,0,0,0,0,0,0,2,1,2,0,1,0,1,0,0,0,1,3,1,2,0,0,0,1,0,0,1,4),\r
-(0,3,0,3,0,5,0,1,0,2,4,3,1,3,3,2,1,1,5,2,1,0,5,1,2,0,0,0,3,3,2,2,3,2,4,3,0,0,3,3,1,3,3,0,2,5,3,4,0,3,3,0,1,2,0,2,2,0,3,2,0,2,2,3,3,3,0,2,0,1,0,3,4,4,2,5,4,0,3,0,0,3,5),\r
-(0,3,0,3,0,3,0,1,0,3,3,3,3,0,3,0,2,0,2,1,1,0,2,0,1,0,0,0,2,1,0,0,1,0,3,2,0,0,3,3,1,2,3,1,0,3,3,0,0,1,0,0,0,0,0,2,0,0,0,0,0,2,3,1,2,3,0,3,0,1,0,3,2,1,0,4,3,0,1,1,0,3,3),\r
-(0,4,0,5,0,3,0,3,0,4,5,5,4,3,5,3,4,3,5,3,3,2,5,3,4,4,4,3,4,3,4,5,5,3,4,4,3,4,4,5,4,4,4,3,4,5,5,4,2,3,4,2,3,4,0,3,3,1,4,3,2,4,3,3,5,5,0,3,0,3,0,5,5,5,5,4,4,0,4,0,1,4,4),\r
-(0,4,0,4,0,3,0,3,0,3,5,4,4,2,3,2,5,1,3,2,5,1,4,2,3,2,3,3,4,3,3,3,3,2,5,4,1,3,3,5,3,4,4,0,4,4,3,1,1,3,1,0,2,3,0,2,3,0,3,0,0,4,3,1,3,4,0,3,0,2,0,4,4,4,3,4,5,0,4,0,0,3,4),\r
-(0,3,0,3,0,3,1,2,0,3,4,4,3,3,3,0,2,2,4,3,3,1,3,3,3,1,1,0,3,1,4,3,2,3,4,4,2,4,4,4,3,4,4,3,2,4,4,3,1,3,3,1,3,3,0,4,1,0,2,2,1,4,3,2,3,3,5,4,3,3,5,4,4,3,3,0,4,0,3,2,2,4,4),\r
-(0,2,0,1,0,0,0,0,0,1,2,1,3,0,0,0,0,0,2,0,1,2,1,0,0,1,0,0,0,0,3,0,0,1,0,1,1,3,1,0,0,0,1,1,0,1,1,0,0,0,0,0,2,0,0,0,0,0,0,0,0,1,1,2,2,0,3,4,0,0,0,1,1,0,0,1,0,0,0,0,0,1,1),\r
-(0,1,0,0,0,1,0,0,0,0,4,0,4,1,4,0,3,0,4,0,3,0,4,0,3,0,3,0,4,1,5,1,4,0,0,3,0,5,0,5,2,0,1,0,0,0,2,1,4,0,1,3,0,0,3,0,0,3,1,1,4,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0),\r
-(1,4,0,5,0,3,0,2,0,3,5,4,4,3,4,3,5,3,4,3,3,0,4,3,3,3,3,3,3,2,4,4,3,1,3,4,4,5,4,4,3,4,4,1,3,5,4,3,3,3,1,2,2,3,3,1,3,1,3,3,3,5,3,3,4,5,0,3,0,3,0,3,4,3,4,4,3,0,3,0,2,4,3),\r
-(0,1,0,4,0,0,0,0,0,1,4,0,4,1,4,2,4,0,3,0,1,0,1,0,0,0,0,0,2,0,3,1,1,1,0,3,0,0,0,1,2,1,0,0,1,1,1,1,0,1,0,0,0,1,0,0,3,0,0,0,0,3,2,0,2,2,0,1,0,0,0,2,3,2,3,3,0,0,0,0,2,1,0),\r
-(0,5,1,5,0,3,0,3,0,5,4,4,5,1,5,3,3,0,4,3,4,3,5,3,4,3,3,2,4,3,4,3,3,0,3,3,1,4,4,3,4,4,4,3,4,5,5,3,2,3,1,1,3,3,1,3,1,1,3,3,2,4,5,3,3,5,0,4,0,3,0,4,4,3,5,3,3,0,3,4,0,4,3),\r
-(0,5,0,5,0,3,0,2,0,4,4,3,5,2,4,3,3,3,4,4,4,3,5,3,5,3,3,1,4,0,4,3,3,0,3,3,0,4,4,4,4,5,4,3,3,5,5,3,2,3,1,2,3,2,0,1,0,0,3,2,2,4,4,3,1,5,0,4,0,3,0,4,3,1,3,2,1,0,3,3,0,3,3),\r
-(0,4,0,5,0,5,0,4,0,4,5,5,5,3,4,3,3,2,5,4,4,3,5,3,5,3,4,0,4,3,4,4,3,2,4,4,3,4,5,4,4,5,5,0,3,5,5,4,1,3,3,2,3,3,1,3,1,0,4,3,1,4,4,3,4,5,0,4,0,2,0,4,3,4,4,3,3,0,4,0,0,5,5),\r
-(0,4,0,4,0,5,0,1,1,3,3,4,4,3,4,1,3,0,5,1,3,0,3,1,3,1,1,0,3,0,3,3,4,0,4,3,0,4,4,4,3,4,4,0,3,5,4,1,0,3,0,0,2,3,0,3,1,0,3,1,0,3,2,1,3,5,0,3,0,1,0,3,2,3,3,4,4,0,2,2,0,4,4),\r
-(2,4,0,5,0,4,0,3,0,4,5,5,4,3,5,3,5,3,5,3,5,2,5,3,4,3,3,4,3,4,5,3,2,1,5,4,3,2,3,4,5,3,4,1,2,5,4,3,0,3,3,0,3,2,0,2,3,0,4,1,0,3,4,3,3,5,0,3,0,1,0,4,5,5,5,4,3,0,4,2,0,3,5),\r
-(0,5,0,4,0,4,0,2,0,5,4,3,4,3,4,3,3,3,4,3,4,2,5,3,5,3,4,1,4,3,4,4,4,0,3,5,0,4,4,4,4,5,3,1,3,4,5,3,3,3,3,3,3,3,0,2,2,0,3,3,2,4,3,3,3,5,3,4,1,3,3,5,3,2,0,0,0,0,4,3,1,3,3),\r
-(0,1,0,3,0,3,0,1,0,1,3,3,3,2,3,3,3,0,3,0,0,0,3,1,3,0,0,0,2,2,2,3,0,0,3,2,0,1,2,4,1,3,3,0,0,3,3,3,0,1,0,0,2,1,0,0,3,0,3,1,0,3,0,0,1,3,0,2,0,1,0,3,3,1,3,3,0,0,1,1,0,3,3),\r
-(0,2,0,3,0,2,1,4,0,2,2,3,1,1,3,1,1,0,2,0,3,1,2,3,1,3,0,0,1,0,4,3,2,3,3,3,1,4,2,3,3,3,3,1,0,3,1,4,0,1,1,0,1,2,0,1,1,0,1,1,0,3,1,3,2,2,0,1,0,0,0,2,3,3,3,1,0,0,0,0,0,2,3),\r
-(0,5,0,4,0,5,0,2,0,4,5,5,3,3,4,3,3,1,5,4,4,2,4,4,4,3,4,2,4,3,5,5,4,3,3,4,3,3,5,5,4,5,5,1,3,4,5,3,1,4,3,1,3,3,0,3,3,1,4,3,1,4,5,3,3,5,0,4,0,3,0,5,3,3,1,4,3,0,4,0,1,5,3),\r
-(0,5,0,5,0,4,0,2,0,4,4,3,4,3,3,3,3,3,5,4,4,4,4,4,4,5,3,3,5,2,4,4,4,3,4,4,3,3,4,4,5,5,3,3,4,3,4,3,3,4,3,3,3,3,1,2,2,1,4,3,3,5,4,4,3,4,0,4,0,3,0,4,4,4,4,4,1,0,4,2,0,2,4),\r
-(0,4,0,4,0,3,0,1,0,3,5,2,3,0,3,0,2,1,4,2,3,3,4,1,4,3,3,2,4,1,3,3,3,0,3,3,0,0,3,3,3,5,3,3,3,3,3,2,0,2,0,0,2,0,0,2,0,0,1,0,0,3,1,2,2,3,0,3,0,2,0,4,4,3,3,4,1,0,3,0,0,2,4),\r
-(0,0,0,4,0,0,0,0,0,0,1,0,1,0,2,0,0,0,0,0,1,0,2,0,1,0,0,0,0,0,3,1,3,0,3,2,0,0,0,1,0,3,2,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,4,0,2,0,0,0,0,0,0,2),\r
-(0,2,1,3,0,2,0,2,0,3,3,3,3,1,3,1,3,3,3,3,3,3,4,2,2,1,2,1,4,0,4,3,1,3,3,3,2,4,3,5,4,3,3,3,3,3,3,3,0,1,3,0,2,0,0,1,0,0,1,0,0,4,2,0,2,3,0,3,3,0,3,3,4,2,3,1,4,0,1,2,0,2,3),\r
-(0,3,0,3,0,1,0,3,0,2,3,3,3,0,3,1,2,0,3,3,2,3,3,2,3,2,3,1,3,0,4,3,2,0,3,3,1,4,3,3,2,3,4,3,1,3,3,1,1,0,1,1,0,1,0,1,0,1,0,0,0,4,1,1,0,3,0,3,1,0,2,3,3,3,3,3,1,0,0,2,0,3,3),\r
-(0,0,0,0,0,0,0,0,0,0,3,0,2,0,3,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,3,0,3,0,3,1,0,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,2,0,2,3,0,0,0,0,0,0,0,0,3),\r
-(0,2,0,3,1,3,0,3,0,2,3,3,3,1,3,1,3,1,3,1,3,3,3,1,3,0,2,3,1,1,4,3,3,2,3,3,1,2,2,4,1,3,3,0,1,4,2,3,0,1,3,0,3,0,0,1,3,0,2,0,0,3,3,2,1,3,0,3,0,2,0,3,4,4,4,3,1,0,3,0,0,3,3),\r
-(0,2,0,1,0,2,0,0,0,1,3,2,2,1,3,0,1,1,3,0,3,2,3,1,2,0,2,0,1,1,3,3,3,0,3,3,1,1,2,3,2,3,3,1,2,3,2,0,0,1,0,0,0,0,0,0,3,0,1,0,0,2,1,2,1,3,0,3,0,0,0,3,4,4,4,3,2,0,2,0,0,2,4),\r
-(0,0,0,1,0,1,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,2,2,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,3,1,0,0,0,0,0,0,0,3),\r
-(0,3,0,3,0,2,0,3,0,3,3,3,2,3,2,2,2,0,3,1,3,3,3,2,3,3,0,0,3,0,3,2,2,0,2,3,1,4,3,4,3,3,2,3,1,5,4,4,0,3,1,2,1,3,0,3,1,1,2,0,2,3,1,3,1,3,0,3,0,1,0,3,3,4,4,2,1,0,2,1,0,2,4),\r
-(0,1,0,3,0,1,0,2,0,1,4,2,5,1,4,0,2,0,2,1,3,1,4,0,2,1,0,0,2,1,4,1,1,0,3,3,0,5,1,3,2,3,3,1,0,3,2,3,0,1,0,0,0,0,0,0,1,0,0,0,0,4,0,1,0,3,0,2,0,1,0,3,3,3,4,3,3,0,0,0,0,2,3),\r
-(0,0,0,1,0,0,0,0,0,0,2,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,1,0,0,1,0,0,0,0,0,3),\r
-(0,1,0,3,0,4,0,3,0,2,4,3,1,0,3,2,2,1,3,1,2,2,3,1,1,1,2,1,3,0,1,2,0,1,3,2,1,3,0,5,5,1,0,0,1,3,2,1,0,3,0,0,1,0,0,0,0,0,3,4,0,1,1,1,3,2,0,2,0,1,0,2,3,3,1,2,3,0,1,0,1,0,4),\r
-(0,0,0,1,0,3,0,3,0,2,2,1,0,0,4,0,3,0,3,1,3,0,3,0,3,0,1,0,3,0,3,1,3,0,3,3,0,0,1,2,1,1,1,0,1,2,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,2,2,1,2,0,0,2,0,0,0,0,2,3,3,3,3,0,0,0,0,1,4),\r
-(0,0,0,3,0,3,0,0,0,0,3,1,1,0,3,0,1,0,2,0,1,0,0,0,0,0,0,0,1,0,3,0,2,0,2,3,0,0,2,2,3,1,2,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,2,0,0,0,0,2,3),\r
-(2,4,0,5,0,5,0,4,0,3,4,3,3,3,4,3,3,3,4,3,4,4,5,4,5,5,5,2,3,0,5,5,4,1,5,4,3,1,5,4,3,4,4,3,3,4,3,3,0,3,2,0,2,3,0,3,0,0,3,3,0,5,3,2,3,3,0,3,0,3,0,3,4,5,4,5,3,0,4,3,0,3,4),\r
-(0,3,0,3,0,3,0,3,0,3,3,4,3,2,3,2,3,0,4,3,3,3,3,3,3,3,3,0,3,2,4,3,3,1,3,4,3,4,4,4,3,4,4,3,2,4,4,1,0,2,0,0,1,1,0,2,0,0,3,1,0,5,3,2,1,3,0,3,0,1,2,4,3,2,4,3,3,0,3,2,0,4,4),\r
-(0,3,0,3,0,1,0,0,0,1,4,3,3,2,3,1,3,1,4,2,3,2,4,2,3,4,3,0,2,2,3,3,3,0,3,3,3,0,3,4,1,3,3,0,3,4,3,3,0,1,1,0,1,0,0,0,4,0,3,0,0,3,1,2,1,3,0,4,0,1,0,4,3,3,4,3,3,0,2,0,0,3,3),\r
-(0,3,0,4,0,1,0,3,0,3,4,3,3,0,3,3,3,1,3,1,3,3,4,3,3,3,0,0,3,1,5,3,3,1,3,3,2,5,4,3,3,4,5,3,2,5,3,4,0,1,0,0,0,0,0,2,0,0,1,1,0,4,2,2,1,3,0,3,0,2,0,4,4,3,5,3,2,0,1,1,0,3,4),\r
-(0,5,0,4,0,5,0,2,0,4,4,3,3,2,3,3,3,1,4,3,4,1,5,3,4,3,4,0,4,2,4,3,4,1,5,4,0,4,4,4,4,5,4,1,3,5,4,2,1,4,1,1,3,2,0,3,1,0,3,2,1,4,3,3,3,4,0,4,0,3,0,4,4,4,3,3,3,0,4,2,0,3,4),\r
-(1,4,0,4,0,3,0,1,0,3,3,3,1,1,3,3,2,2,3,3,1,0,3,2,2,1,2,0,3,1,2,1,2,0,3,2,0,2,2,3,3,4,3,0,3,3,1,2,0,1,1,3,1,2,0,0,3,0,1,1,0,3,2,2,3,3,0,3,0,0,0,2,3,3,4,3,3,0,1,0,0,1,4),\r
-(0,4,0,4,0,4,0,0,0,3,4,4,3,1,4,2,3,2,3,3,3,1,4,3,4,0,3,0,4,2,3,3,2,2,5,4,2,1,3,4,3,4,3,1,3,3,4,2,0,2,1,0,3,3,0,0,2,0,3,1,0,4,4,3,4,3,0,4,0,1,0,2,4,4,4,4,4,0,3,2,0,3,3),\r
-(0,0,0,1,0,4,0,0,0,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,3,2,0,0,1,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,2),\r
-(0,2,0,3,0,4,0,4,0,1,3,3,3,0,4,0,2,1,2,1,1,1,2,0,3,1,1,0,1,0,3,1,0,0,3,3,2,0,1,1,0,0,0,0,0,1,0,2,0,2,2,0,3,1,0,0,1,0,1,1,0,1,2,0,3,0,0,0,0,1,0,0,3,3,4,3,1,0,1,0,3,0,2),\r
-(0,0,0,3,0,5,0,0,0,0,1,0,2,0,3,1,0,1,3,0,0,0,2,0,0,0,1,0,0,0,1,1,0,0,4,0,0,0,2,3,0,1,4,1,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,1,0,0,0,0,0,0,0,2,0,0,3,0,0,0,0,0,3),\r
-(0,2,0,5,0,5,0,1,0,2,4,3,3,2,5,1,3,2,3,3,3,0,4,1,2,0,3,0,4,0,2,2,1,1,5,3,0,0,1,4,2,3,2,0,3,3,3,2,0,2,4,1,1,2,0,1,1,0,3,1,0,1,3,1,2,3,0,2,0,0,0,1,3,5,4,4,4,0,3,0,0,1,3),\r
-(0,4,0,5,0,4,0,4,0,4,5,4,3,3,4,3,3,3,4,3,4,4,5,3,4,5,4,2,4,2,3,4,3,1,4,4,1,3,5,4,4,5,5,4,4,5,5,5,2,3,3,1,4,3,1,3,3,0,3,3,1,4,3,4,4,4,0,3,0,4,0,3,3,4,4,5,0,0,4,3,0,4,5),\r
-(0,4,0,4,0,3,0,3,0,3,4,4,4,3,3,2,4,3,4,3,4,3,5,3,4,3,2,1,4,2,4,4,3,1,3,4,2,4,5,5,3,4,5,4,1,5,4,3,0,3,2,2,3,2,1,3,1,0,3,3,3,5,3,3,3,5,4,4,2,3,3,4,3,3,3,2,1,0,3,2,1,4,3),\r
-(0,4,0,5,0,4,0,3,0,3,5,5,3,2,4,3,4,0,5,4,4,1,4,4,4,3,3,3,4,3,5,5,2,3,3,4,1,2,5,5,3,5,5,2,3,5,5,4,0,3,2,0,3,3,1,1,5,1,4,1,0,4,3,2,3,5,0,4,0,3,0,5,4,3,4,3,0,0,4,1,0,4,4),\r
-(1,3,0,4,0,2,0,2,0,2,5,5,3,3,3,3,3,0,4,2,3,4,4,4,3,4,0,0,3,4,5,4,3,3,3,3,2,5,5,4,5,5,5,4,3,5,5,5,1,3,1,0,1,0,0,3,2,0,4,2,0,5,2,3,2,4,1,3,0,3,0,4,5,4,5,4,3,0,4,2,0,5,4),\r
-(0,3,0,4,0,5,0,3,0,3,4,4,3,2,3,2,3,3,3,3,3,2,4,3,3,2,2,0,3,3,3,3,3,1,3,3,3,0,4,4,3,4,4,1,1,4,4,2,0,3,1,0,1,1,0,4,1,0,2,3,1,3,3,1,3,4,0,3,0,1,0,3,1,3,0,0,1,0,2,0,0,4,4),\r
-(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0),\r
-(0,3,0,3,0,2,0,3,0,1,5,4,3,3,3,1,4,2,1,2,3,4,4,2,4,4,5,0,3,1,4,3,4,0,4,3,3,3,2,3,2,5,3,4,3,2,2,3,0,0,3,0,2,1,0,1,2,0,0,0,0,2,1,1,3,1,0,2,0,4,0,3,4,4,4,5,2,0,2,0,0,1,3),\r
-(0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,1,1,0,0,1,1,0,0,0,4,2,1,1,0,1,0,3,2,0,0,3,1,1,1,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,1,0,0,0,2,0,0,0,1,4,0,4,2,1,0,0,0,0,0,1),\r
-(0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,1,0,0,0,0,0,0,1,0,1,0,0,0,0,3,1,0,0,0,2,0,2,1,0,0,1,2,1,0,1,1,0,0,3,0,0,0,0,0,0,0,0,0,0,0,1,3,1,0,0,0,0,0,1,0,0,2,1,0,0,0,0,0,0,0,0,2),\r
-(0,4,0,4,0,4,0,3,0,4,4,3,4,2,4,3,2,0,4,4,4,3,5,3,5,3,3,2,4,2,4,3,4,3,1,4,0,2,3,4,4,4,3,3,3,4,4,4,3,4,1,3,4,3,2,1,2,1,3,3,3,4,4,3,3,5,0,4,0,3,0,4,3,3,3,2,1,0,3,0,0,3,3),\r
-(0,4,0,3,0,3,0,3,0,3,5,5,3,3,3,3,4,3,4,3,3,3,4,4,4,3,3,3,3,4,3,5,3,3,1,3,2,4,5,5,5,5,4,3,4,5,5,3,2,2,3,3,3,3,2,3,3,1,2,3,2,4,3,3,3,4,0,4,0,2,0,4,3,2,2,1,2,0,3,0,0,4,1),\r
-)\r
-\r
-class JapaneseContextAnalysis:\r
-    def __init__(self):\r
-        self.reset()\r
-\r
-    def reset(self):\r
-        self._mTotalRel = 0  # total sequence received\r
-        # category counters, each interger counts sequence in its category\r
-        self._mRelSample = [0] * NUM_OF_CATEGORY\r
-        # if last byte in current buffer is not the last byte of a character,\r
-        # we need to know how many bytes to skip in next buffer\r
-        self._mNeedToSkipCharNum = 0\r
-        self._mLastCharOrder = -1  # The order of previous char\r
-        # If this flag is set to True, detection is done and conclusion has\r
-        # been made\r
-        self._mDone = False\r
-\r
-    def feed(self, aBuf, aLen):\r
-        if self._mDone:\r
-            return\r
-\r
-        # The buffer we got is byte oriented, and a character may span in more than one\r
-        # buffers. In case the last one or two byte in last buffer is not\r
-        # complete, we record how many byte needed to complete that character\r
-        # and skip these bytes here.  We can choose to record those bytes as\r
-        # well and analyse the character once it is complete, but since a\r
-        # character will not make much difference, by simply skipping\r
-        # this character will simply our logic and improve performance.\r
-        i = self._mNeedToSkipCharNum\r
-        while i < aLen:\r
-            order, charLen = self.get_order(aBuf[i:i + 2])\r
-            i += charLen\r
-            if i > aLen:\r
-                self._mNeedToSkipCharNum = i - aLen\r
-                self._mLastCharOrder = -1\r
-            else:\r
-                if (order != -1) and (self._mLastCharOrder != -1):\r
-                    self._mTotalRel += 1\r
-                    if self._mTotalRel > MAX_REL_THRESHOLD:\r
-                        self._mDone = True\r
-                        break\r
-                    self._mRelSample[jp2CharContext[self._mLastCharOrder][order]] += 1\r
-                self._mLastCharOrder = order\r
-\r
-    def got_enough_data(self):\r
-        return self._mTotalRel > ENOUGH_REL_THRESHOLD\r
-\r
-    def get_confidence(self):\r
-        # This is just one way to calculate confidence. It works well for me.\r
-        if self._mTotalRel > MINIMUM_DATA_THRESHOLD:\r
-            return float(self._mTotalRel - self._mRelSample[0]) / self._mTotalRel\r
-        else:\r
-            return DONT_KNOW\r
-\r
-    def get_order(self, aBuf):\r
-        return -1, 1\r
-\r
-class SJISContextAnalysis(JapaneseContextAnalysis):\r
-    def get_order(self, aBuf):\r
-        if not aBuf:\r
-            return -1, 1\r
-        # find out current char's byte length\r
-        first_char = wrap_ord(aBuf[0])\r
-        if ((0x81 <= first_char <= 0x9F) or (0xE0 <= first_char <= 0xFC)):\r
-            charLen = 2\r
-        else:\r
-            charLen = 1\r
-\r
-        # return its order if it is hiragana\r
-        if len(aBuf) > 1:\r
-            second_char = wrap_ord(aBuf[1])\r
-            if (first_char == 202) and (0x9F <= second_char <= 0xF1):\r
-                return second_char - 0x9F, charLen\r
-\r
-        return -1, charLen\r
-\r
-class EUCJPContextAnalysis(JapaneseContextAnalysis):\r
-    def get_order(self, aBuf):\r
-        if not aBuf:\r
-            return -1, 1\r
-        # find out current char's byte length\r
-        first_char = wrap_ord(aBuf[0])\r
-        if (first_char == 0x8E) or (0xA1 <= first_char <= 0xFE):\r
-            charLen = 2\r
-        elif first_char == 0x8F:\r
-            charLen = 3\r
-        else:\r
-            charLen = 1\r
-\r
-        # return its order if it is hiragana\r
-        if len(aBuf) > 1:\r
-            second_char = wrap_ord(aBuf[1])\r
-            if (first_char == 0xA4) and (0xA1 <= second_char <= 0xF3):\r
-                return second_char - 0xA1, charLen\r
-\r
-        return -1, charLen\r
-\r
-# flake8: noqa\r
+######################## BEGIN LICENSE BLOCK ########################
+# The Original Code is Mozilla Communicator client code.
+#
+# The Initial Developer of the Original Code is
+# Netscape Communications Corporation.
+# Portions created by the Initial Developer are Copyright (C) 1998
+# the Initial Developer. All Rights Reserved.
+#
+# Contributor(s):
+#   Mark Pilgrim - port to Python
+#
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
+# 02110-1301  USA
+######################### END LICENSE BLOCK #########################
+
+from .compat import wrap_ord
+
+NUM_OF_CATEGORY = 6
+DONT_KNOW = -1
+ENOUGH_REL_THRESHOLD = 100
+MAX_REL_THRESHOLD = 1000
+MINIMUM_DATA_THRESHOLD = 4
+
+# This is hiragana 2-char sequence table, the number in each cell represents its frequency category
+jp2CharContext = (
+(0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1),
+(2,4,0,4,0,3,0,4,0,3,4,4,4,2,4,3,3,4,3,2,3,3,4,2,3,3,3,2,4,1,4,3,3,1,5,4,3,4,3,4,3,5,3,0,3,5,4,2,0,3,1,0,3,3,0,3,3,0,1,1,0,4,3,0,3,3,0,4,0,2,0,3,5,5,5,5,4,0,4,1,0,3,4),
+(0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2),
+(0,4,0,5,0,5,0,4,0,4,5,4,4,3,5,3,5,1,5,3,4,3,4,4,3,4,3,3,4,3,5,4,4,3,5,5,3,5,5,5,3,5,5,3,4,5,5,3,1,3,2,0,3,4,0,4,2,0,4,2,1,5,3,2,3,5,0,4,0,2,0,5,4,4,5,4,5,0,4,0,0,4,4),
+(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0),
+(0,3,0,4,0,3,0,3,0,4,5,4,3,3,3,3,4,3,5,4,4,3,5,4,4,3,4,3,4,4,4,4,5,3,4,4,3,4,5,5,4,5,5,1,4,5,4,3,0,3,3,1,3,3,0,4,4,0,3,3,1,5,3,3,3,5,0,4,0,3,0,4,4,3,4,3,3,0,4,1,1,3,4),
+(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0),
+(0,4,0,3,0,3,0,4,0,3,4,4,3,2,2,1,2,1,3,1,3,3,3,3,3,4,3,1,3,3,5,3,3,0,4,3,0,5,4,3,3,5,4,4,3,4,4,5,0,1,2,0,1,2,0,2,2,0,1,0,0,5,2,2,1,4,0,3,0,1,0,4,4,3,5,4,3,0,2,1,0,4,3),
+(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0),
+(0,3,0,5,0,4,0,2,1,4,4,2,4,1,4,2,4,2,4,3,3,3,4,3,3,3,3,1,4,2,3,3,3,1,4,4,1,1,1,4,3,3,2,0,2,4,3,2,0,3,3,0,3,1,1,0,0,0,3,3,0,4,2,2,3,4,0,4,0,3,0,4,4,5,3,4,4,0,3,0,0,1,4),
+(1,4,0,4,0,4,0,4,0,3,5,4,4,3,4,3,5,4,3,3,4,3,5,4,4,4,4,3,4,2,4,3,3,1,5,4,3,2,4,5,4,5,5,4,4,5,4,4,0,3,2,2,3,3,0,4,3,1,3,2,1,4,3,3,4,5,0,3,0,2,0,4,5,5,4,5,4,0,4,0,0,5,4),
+(0,5,0,5,0,4,0,3,0,4,4,3,4,3,3,3,4,0,4,4,4,3,4,3,4,3,3,1,4,2,4,3,4,0,5,4,1,4,5,4,4,5,3,2,4,3,4,3,2,4,1,3,3,3,2,3,2,0,4,3,3,4,3,3,3,4,0,4,0,3,0,4,5,4,4,4,3,0,4,1,0,1,3),
+(0,3,1,4,0,3,0,2,0,3,4,4,3,1,4,2,3,3,4,3,4,3,4,3,4,4,3,2,3,1,5,4,4,1,4,4,3,5,4,4,3,5,5,4,3,4,4,3,1,2,3,1,2,2,0,3,2,0,3,1,0,5,3,3,3,4,3,3,3,3,4,4,4,4,5,4,2,0,3,3,2,4,3),
+(0,2,0,3,0,1,0,1,0,0,3,2,0,0,2,0,1,0,2,1,3,3,3,1,2,3,1,0,1,0,4,2,1,1,3,3,0,4,3,3,1,4,3,3,0,3,3,2,0,0,0,0,1,0,0,2,0,0,0,0,0,4,1,0,2,3,2,2,2,1,3,3,3,4,4,3,2,0,3,1,0,3,3),
+(0,4,0,4,0,3,0,3,0,4,4,4,3,3,3,3,3,3,4,3,4,2,4,3,4,3,3,2,4,3,4,5,4,1,4,5,3,5,4,5,3,5,4,0,3,5,5,3,1,3,3,2,2,3,0,3,4,1,3,3,2,4,3,3,3,4,0,4,0,3,0,4,5,4,4,5,3,0,4,1,0,3,4),
+(0,2,0,3,0,3,0,0,0,2,2,2,1,0,1,0,0,0,3,0,3,0,3,0,1,3,1,0,3,1,3,3,3,1,3,3,3,0,1,3,1,3,4,0,0,3,1,1,0,3,2,0,0,0,0,1,3,0,1,0,0,3,3,2,0,3,0,0,0,0,0,3,4,3,4,3,3,0,3,0,0,2,3),
+(2,3,0,3,0,2,0,1,0,3,3,4,3,1,3,1,1,1,3,1,4,3,4,3,3,3,0,0,3,1,5,4,3,1,4,3,2,5,5,4,4,4,4,3,3,4,4,4,0,2,1,1,3,2,0,1,2,0,0,1,0,4,1,3,3,3,0,3,0,1,0,4,4,4,5,5,3,0,2,0,0,4,4),
+(0,2,0,1,0,3,1,3,0,2,3,3,3,0,3,1,0,0,3,0,3,2,3,1,3,2,1,1,0,0,4,2,1,0,2,3,1,4,3,2,0,4,4,3,1,3,1,3,0,1,0,0,1,0,0,0,1,0,0,0,0,4,1,1,1,2,0,3,0,0,0,3,4,2,4,3,2,0,1,0,0,3,3),
+(0,1,0,4,0,5,0,4,0,2,4,4,2,3,3,2,3,3,5,3,3,3,4,3,4,2,3,0,4,3,3,3,4,1,4,3,2,1,5,5,3,4,5,1,3,5,4,2,0,3,3,0,1,3,0,4,2,0,1,3,1,4,3,3,3,3,0,3,0,1,0,3,4,4,4,5,5,0,3,0,1,4,5),
+(0,2,0,3,0,3,0,0,0,2,3,1,3,0,4,0,1,1,3,0,3,4,3,2,3,1,0,3,3,2,3,1,3,0,2,3,0,2,1,4,1,2,2,0,0,3,3,0,0,2,0,0,0,1,0,0,0,0,2,2,0,3,2,1,3,3,0,2,0,2,0,0,3,3,1,2,4,0,3,0,2,2,3),
+(2,4,0,5,0,4,0,4,0,2,4,4,4,3,4,3,3,3,1,2,4,3,4,3,4,4,5,0,3,3,3,3,2,0,4,3,1,4,3,4,1,4,4,3,3,4,4,3,1,2,3,0,4,2,0,4,1,0,3,3,0,4,3,3,3,4,0,4,0,2,0,3,5,3,4,5,2,0,3,0,0,4,5),
+(0,3,0,4,0,1,0,1,0,1,3,2,2,1,3,0,3,0,2,0,2,0,3,0,2,0,0,0,1,0,1,1,0,0,3,1,0,0,0,4,0,3,1,0,2,1,3,0,0,0,0,0,0,3,0,0,0,0,0,0,0,4,2,2,3,1,0,3,0,0,0,1,4,4,4,3,0,0,4,0,0,1,4),
+(1,4,1,5,0,3,0,3,0,4,5,4,4,3,5,3,3,4,4,3,4,1,3,3,3,3,2,1,4,1,5,4,3,1,4,4,3,5,4,4,3,5,4,3,3,4,4,4,0,3,3,1,2,3,0,3,1,0,3,3,0,5,4,4,4,4,4,4,3,3,5,4,4,3,3,5,4,0,3,2,0,4,4),
+(0,2,0,3,0,1,0,0,0,1,3,3,3,2,4,1,3,0,3,1,3,0,2,2,1,1,0,0,2,0,4,3,1,0,4,3,0,4,4,4,1,4,3,1,1,3,3,1,0,2,0,0,1,3,0,0,0,0,2,0,0,4,3,2,4,3,5,4,3,3,3,4,3,3,4,3,3,0,2,1,0,3,3),
+(0,2,0,4,0,3,0,2,0,2,5,5,3,4,4,4,4,1,4,3,3,0,4,3,4,3,1,3,3,2,4,3,0,3,4,3,0,3,4,4,2,4,4,0,4,5,3,3,2,2,1,1,1,2,0,1,5,0,3,3,2,4,3,3,3,4,0,3,0,2,0,4,4,3,5,5,0,0,3,0,2,3,3),
+(0,3,0,4,0,3,0,1,0,3,4,3,3,1,3,3,3,0,3,1,3,0,4,3,3,1,1,0,3,0,3,3,0,0,4,4,0,1,5,4,3,3,5,0,3,3,4,3,0,2,0,1,1,1,0,1,3,0,1,2,1,3,3,2,3,3,0,3,0,1,0,1,3,3,4,4,1,0,1,2,2,1,3),
+(0,1,0,4,0,4,0,3,0,1,3,3,3,2,3,1,1,0,3,0,3,3,4,3,2,4,2,0,1,0,4,3,2,0,4,3,0,5,3,3,2,4,4,4,3,3,3,4,0,1,3,0,0,1,0,0,1,0,0,0,0,4,2,3,3,3,0,3,0,0,0,4,4,4,5,3,2,0,3,3,0,3,5),
+(0,2,0,3,0,0,0,3,0,1,3,0,2,0,0,0,1,0,3,1,1,3,3,0,0,3,0,0,3,0,2,3,1,0,3,1,0,3,3,2,0,4,2,2,0,2,0,0,0,4,0,0,0,0,0,0,0,0,0,0,0,2,1,2,0,1,0,1,0,0,0,1,3,1,2,0,0,0,1,0,0,1,4),
+(0,3,0,3,0,5,0,1,0,2,4,3,1,3,3,2,1,1,5,2,1,0,5,1,2,0,0,0,3,3,2,2,3,2,4,3,0,0,3,3,1,3,3,0,2,5,3,4,0,3,3,0,1,2,0,2,2,0,3,2,0,2,2,3,3,3,0,2,0,1,0,3,4,4,2,5,4,0,3,0,0,3,5),
+(0,3,0,3,0,3,0,1,0,3,3,3,3,0,3,0,2,0,2,1,1,0,2,0,1,0,0,0,2,1,0,0,1,0,3,2,0,0,3,3,1,2,3,1,0,3,3,0,0,1,0,0,0,0,0,2,0,0,0,0,0,2,3,1,2,3,0,3,0,1,0,3,2,1,0,4,3,0,1,1,0,3,3),
+(0,4,0,5,0,3,0,3,0,4,5,5,4,3,5,3,4,3,5,3,3,2,5,3,4,4,4,3,4,3,4,5,5,3,4,4,3,4,4,5,4,4,4,3,4,5,5,4,2,3,4,2,3,4,0,3,3,1,4,3,2,4,3,3,5,5,0,3,0,3,0,5,5,5,5,4,4,0,4,0,1,4,4),
+(0,4,0,4,0,3,0,3,0,3,5,4,4,2,3,2,5,1,3,2,5,1,4,2,3,2,3,3,4,3,3,3,3,2,5,4,1,3,3,5,3,4,4,0,4,4,3,1,1,3,1,0,2,3,0,2,3,0,3,0,0,4,3,1,3,4,0,3,0,2,0,4,4,4,3,4,5,0,4,0,0,3,4),
+(0,3,0,3,0,3,1,2,0,3,4,4,3,3,3,0,2,2,4,3,3,1,3,3,3,1,1,0,3,1,4,3,2,3,4,4,2,4,4,4,3,4,4,3,2,4,4,3,1,3,3,1,3,3,0,4,1,0,2,2,1,4,3,2,3,3,5,4,3,3,5,4,4,3,3,0,4,0,3,2,2,4,4),
+(0,2,0,1,0,0,0,0,0,1,2,1,3,0,0,0,0,0,2,0,1,2,1,0,0,1,0,0,0,0,3,0,0,1,0,1,1,3,1,0,0,0,1,1,0,1,1,0,0,0,0,0,2,0,0,0,0,0,0,0,0,1,1,2,2,0,3,4,0,0,0,1,1,0,0,1,0,0,0,0,0,1,1),
+(0,1,0,0,0,1,0,0,0,0,4,0,4,1,4,0,3,0,4,0,3,0,4,0,3,0,3,0,4,1,5,1,4,0,0,3,0,5,0,5,2,0,1,0,0,0,2,1,4,0,1,3,0,0,3,0,0,3,1,1,4,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0),
+(1,4,0,5,0,3,0,2,0,3,5,4,4,3,4,3,5,3,4,3,3,0,4,3,3,3,3,3,3,2,4,4,3,1,3,4,4,5,4,4,3,4,4,1,3,5,4,3,3,3,1,2,2,3,3,1,3,1,3,3,3,5,3,3,4,5,0,3,0,3,0,3,4,3,4,4,3,0,3,0,2,4,3),
+(0,1,0,4,0,0,0,0,0,1,4,0,4,1,4,2,4,0,3,0,1,0,1,0,0,0,0,0,2,0,3,1,1,1,0,3,0,0,0,1,2,1,0,0,1,1,1,1,0,1,0,0,0,1,0,0,3,0,0,0,0,3,2,0,2,2,0,1,0,0,0,2,3,2,3,3,0,0,0,0,2,1,0),
+(0,5,1,5,0,3,0,3,0,5,4,4,5,1,5,3,3,0,4,3,4,3,5,3,4,3,3,2,4,3,4,3,3,0,3,3,1,4,4,3,4,4,4,3,4,5,5,3,2,3,1,1,3,3,1,3,1,1,3,3,2,4,5,3,3,5,0,4,0,3,0,4,4,3,5,3,3,0,3,4,0,4,3),
+(0,5,0,5,0,3,0,2,0,4,4,3,5,2,4,3,3,3,4,4,4,3,5,3,5,3,3,1,4,0,4,3,3,0,3,3,0,4,4,4,4,5,4,3,3,5,5,3,2,3,1,2,3,2,0,1,0,0,3,2,2,4,4,3,1,5,0,4,0,3,0,4,3,1,3,2,1,0,3,3,0,3,3),
+(0,4,0,5,0,5,0,4,0,4,5,5,5,3,4,3,3,2,5,4,4,3,5,3,5,3,4,0,4,3,4,4,3,2,4,4,3,4,5,4,4,5,5,0,3,5,5,4,1,3,3,2,3,3,1,3,1,0,4,3,1,4,4,3,4,5,0,4,0,2,0,4,3,4,4,3,3,0,4,0,0,5,5),
+(0,4,0,4,0,5,0,1,1,3,3,4,4,3,4,1,3,0,5,1,3,0,3,1,3,1,1,0,3,0,3,3,4,0,4,3,0,4,4,4,3,4,4,0,3,5,4,1,0,3,0,0,2,3,0,3,1,0,3,1,0,3,2,1,3,5,0,3,0,1,0,3,2,3,3,4,4,0,2,2,0,4,4),
+(2,4,0,5,0,4,0,3,0,4,5,5,4,3,5,3,5,3,5,3,5,2,5,3,4,3,3,4,3,4,5,3,2,1,5,4,3,2,3,4,5,3,4,1,2,5,4,3,0,3,3,0,3,2,0,2,3,0,4,1,0,3,4,3,3,5,0,3,0,1,0,4,5,5,5,4,3,0,4,2,0,3,5),
+(0,5,0,4,0,4,0,2,0,5,4,3,4,3,4,3,3,3,4,3,4,2,5,3,5,3,4,1,4,3,4,4,4,0,3,5,0,4,4,4,4,5,3,1,3,4,5,3,3,3,3,3,3,3,0,2,2,0,3,3,2,4,3,3,3,5,3,4,1,3,3,5,3,2,0,0,0,0,4,3,1,3,3),
+(0,1,0,3,0,3,0,1,0,1,3,3,3,2,3,3,3,0,3,0,0,0,3,1,3,0,0,0,2,2,2,3,0,0,3,2,0,1,2,4,1,3,3,0,0,3,3,3,0,1,0,0,2,1,0,0,3,0,3,1,0,3,0,0,1,3,0,2,0,1,0,3,3,1,3,3,0,0,1,1,0,3,3),
+(0,2,0,3,0,2,1,4,0,2,2,3,1,1,3,1,1,0,2,0,3,1,2,3,1,3,0,0,1,0,4,3,2,3,3,3,1,4,2,3,3,3,3,1,0,3,1,4,0,1,1,0,1,2,0,1,1,0,1,1,0,3,1,3,2,2,0,1,0,0,0,2,3,3,3,1,0,0,0,0,0,2,3),
+(0,5,0,4,0,5,0,2,0,4,5,5,3,3,4,3,3,1,5,4,4,2,4,4,4,3,4,2,4,3,5,5,4,3,3,4,3,3,5,5,4,5,5,1,3,4,5,3,1,4,3,1,3,3,0,3,3,1,4,3,1,4,5,3,3,5,0,4,0,3,0,5,3,3,1,4,3,0,4,0,1,5,3),
+(0,5,0,5,0,4,0,2,0,4,4,3,4,3,3,3,3,3,5,4,4,4,4,4,4,5,3,3,5,2,4,4,4,3,4,4,3,3,4,4,5,5,3,3,4,3,4,3,3,4,3,3,3,3,1,2,2,1,4,3,3,5,4,4,3,4,0,4,0,3,0,4,4,4,4,4,1,0,4,2,0,2,4),
+(0,4,0,4,0,3,0,1,0,3,5,2,3,0,3,0,2,1,4,2,3,3,4,1,4,3,3,2,4,1,3,3,3,0,3,3,0,0,3,3,3,5,3,3,3,3,3,2,0,2,0,0,2,0,0,2,0,0,1,0,0,3,1,2,2,3,0,3,0,2,0,4,4,3,3,4,1,0,3,0,0,2,4),
+(0,0,0,4,0,0,0,0,0,0,1,0,1,0,2,0,0,0,0,0,1,0,2,0,1,0,0,0,0,0,3,1,3,0,3,2,0,0,0,1,0,3,2,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,4,0,2,0,0,0,0,0,0,2),
+(0,2,1,3,0,2,0,2,0,3,3,3,3,1,3,1,3,3,3,3,3,3,4,2,2,1,2,1,4,0,4,3,1,3,3,3,2,4,3,5,4,3,3,3,3,3,3,3,0,1,3,0,2,0,0,1,0,0,1,0,0,4,2,0,2,3,0,3,3,0,3,3,4,2,3,1,4,0,1,2,0,2,3),
+(0,3,0,3,0,1,0,3,0,2,3,3,3,0,3,1,2,0,3,3,2,3,3,2,3,2,3,1,3,0,4,3,2,0,3,3,1,4,3,3,2,3,4,3,1,3,3,1,1,0,1,1,0,1,0,1,0,1,0,0,0,4,1,1,0,3,0,3,1,0,2,3,3,3,3,3,1,0,0,2,0,3,3),
+(0,0,0,0,0,0,0,0,0,0,3,0,2,0,3,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,3,0,3,0,3,1,0,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,2,0,2,3,0,0,0,0,0,0,0,0,3),
+(0,2,0,3,1,3,0,3,0,2,3,3,3,1,3,1,3,1,3,1,3,3,3,1,3,0,2,3,1,1,4,3,3,2,3,3,1,2,2,4,1,3,3,0,1,4,2,3,0,1,3,0,3,0,0,1,3,0,2,0,0,3,3,2,1,3,0,3,0,2,0,3,4,4,4,3,1,0,3,0,0,3,3),
+(0,2,0,1,0,2,0,0,0,1,3,2,2,1,3,0,1,1,3,0,3,2,3,1,2,0,2,0,1,1,3,3,3,0,3,3,1,1,2,3,2,3,3,1,2,3,2,0,0,1,0,0,0,0,0,0,3,0,1,0,0,2,1,2,1,3,0,3,0,0,0,3,4,4,4,3,2,0,2,0,0,2,4),
+(0,0,0,1,0,1,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,2,2,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,3,1,0,0,0,0,0,0,0,3),
+(0,3,0,3,0,2,0,3,0,3,3,3,2,3,2,2,2,0,3,1,3,3,3,2,3,3,0,0,3,0,3,2,2,0,2,3,1,4,3,4,3,3,2,3,1,5,4,4,0,3,1,2,1,3,0,3,1,1,2,0,2,3,1,3,1,3,0,3,0,1,0,3,3,4,4,2,1,0,2,1,0,2,4),
+(0,1,0,3,0,1,0,2,0,1,4,2,5,1,4,0,2,0,2,1,3,1,4,0,2,1,0,0,2,1,4,1,1,0,3,3,0,5,1,3,2,3,3,1,0,3,2,3,0,1,0,0,0,0,0,0,1,0,0,0,0,4,0,1,0,3,0,2,0,1,0,3,3,3,4,3,3,0,0,0,0,2,3),
+(0,0,0,1,0,0,0,0,0,0,2,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,1,0,0,1,0,0,0,0,0,3),
+(0,1,0,3,0,4,0,3,0,2,4,3,1,0,3,2,2,1,3,1,2,2,3,1,1,1,2,1,3,0,1,2,0,1,3,2,1,3,0,5,5,1,0,0,1,3,2,1,0,3,0,0,1,0,0,0,0,0,3,4,0,1,1,1,3,2,0,2,0,1,0,2,3,3,1,2,3,0,1,0,1,0,4),
+(0,0,0,1,0,3,0,3,0,2,2,1,0,0,4,0,3,0,3,1,3,0,3,0,3,0,1,0,3,0,3,1,3,0,3,3,0,0,1,2,1,1,1,0,1,2,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,2,2,1,2,0,0,2,0,0,0,0,2,3,3,3,3,0,0,0,0,1,4),
+(0,0,0,3,0,3,0,0,0,0,3,1,1,0,3,0,1,0,2,0,1,0,0,0,0,0,0,0,1,0,3,0,2,0,2,3,0,0,2,2,3,1,2,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,2,0,0,0,0,2,3),
+(2,4,0,5,0,5,0,4,0,3,4,3,3,3,4,3,3,3,4,3,4,4,5,4,5,5,5,2,3,0,5,5,4,1,5,4,3,1,5,4,3,4,4,3,3,4,3,3,0,3,2,0,2,3,0,3,0,0,3,3,0,5,3,2,3,3,0,3,0,3,0,3,4,5,4,5,3,0,4,3,0,3,4),
+(0,3,0,3,0,3,0,3,0,3,3,4,3,2,3,2,3,0,4,3,3,3,3,3,3,3,3,0,3,2,4,3,3,1,3,4,3,4,4,4,3,4,4,3,2,4,4,1,0,2,0,0,1,1,0,2,0,0,3,1,0,5,3,2,1,3,0,3,0,1,2,4,3,2,4,3,3,0,3,2,0,4,4),
+(0,3,0,3,0,1,0,0,0,1,4,3,3,2,3,1,3,1,4,2,3,2,4,2,3,4,3,0,2,2,3,3,3,0,3,3,3,0,3,4,1,3,3,0,3,4,3,3,0,1,1,0,1,0,0,0,4,0,3,0,0,3,1,2,1,3,0,4,0,1,0,4,3,3,4,3,3,0,2,0,0,3,3),
+(0,3,0,4,0,1,0,3,0,3,4,3,3,0,3,3,3,1,3,1,3,3,4,3,3,3,0,0,3,1,5,3,3,1,3,3,2,5,4,3,3,4,5,3,2,5,3,4,0,1,0,0,0,0,0,2,0,0,1,1,0,4,2,2,1,3,0,3,0,2,0,4,4,3,5,3,2,0,1,1,0,3,4),
+(0,5,0,4,0,5,0,2,0,4,4,3,3,2,3,3,3,1,4,3,4,1,5,3,4,3,4,0,4,2,4,3,4,1,5,4,0,4,4,4,4,5,4,1,3,5,4,2,1,4,1,1,3,2,0,3,1,0,3,2,1,4,3,3,3,4,0,4,0,3,0,4,4,4,3,3,3,0,4,2,0,3,4),
+(1,4,0,4,0,3,0,1,0,3,3,3,1,1,3,3,2,2,3,3,1,0,3,2,2,1,2,0,3,1,2,1,2,0,3,2,0,2,2,3,3,4,3,0,3,3,1,2,0,1,1,3,1,2,0,0,3,0,1,1,0,3,2,2,3,3,0,3,0,0,0,2,3,3,4,3,3,0,1,0,0,1,4),
+(0,4,0,4,0,4,0,0,0,3,4,4,3,1,4,2,3,2,3,3,3,1,4,3,4,0,3,0,4,2,3,3,2,2,5,4,2,1,3,4,3,4,3,1,3,3,4,2,0,2,1,0,3,3,0,0,2,0,3,1,0,4,4,3,4,3,0,4,0,1,0,2,4,4,4,4,4,0,3,2,0,3,3),
+(0,0,0,1,0,4,0,0,0,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,3,2,0,0,1,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,2),
+(0,2,0,3,0,4,0,4,0,1,3,3,3,0,4,0,2,1,2,1,1,1,2,0,3,1,1,0,1,0,3,1,0,0,3,3,2,0,1,1,0,0,0,0,0,1,0,2,0,2,2,0,3,1,0,0,1,0,1,1,0,1,2,0,3,0,0,0,0,1,0,0,3,3,4,3,1,0,1,0,3,0,2),
+(0,0,0,3,0,5,0,0,0,0,1,0,2,0,3,1,0,1,3,0,0,0,2,0,0,0,1,0,0,0,1,1,0,0,4,0,0,0,2,3,0,1,4,1,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,1,0,0,0,0,0,0,0,2,0,0,3,0,0,0,0,0,3),
+(0,2,0,5,0,5,0,1,0,2,4,3,3,2,5,1,3,2,3,3,3,0,4,1,2,0,3,0,4,0,2,2,1,1,5,3,0,0,1,4,2,3,2,0,3,3,3,2,0,2,4,1,1,2,0,1,1,0,3,1,0,1,3,1,2,3,0,2,0,0,0,1,3,5,4,4,4,0,3,0,0,1,3),
+(0,4,0,5,0,4,0,4,0,4,5,4,3,3,4,3,3,3,4,3,4,4,5,3,4,5,4,2,4,2,3,4,3,1,4,4,1,3,5,4,4,5,5,4,4,5,5,5,2,3,3,1,4,3,1,3,3,0,3,3,1,4,3,4,4,4,0,3,0,4,0,3,3,4,4,5,0,0,4,3,0,4,5),
+(0,4,0,4,0,3,0,3,0,3,4,4,4,3,3,2,4,3,4,3,4,3,5,3,4,3,2,1,4,2,4,4,3,1,3,4,2,4,5,5,3,4,5,4,1,5,4,3,0,3,2,2,3,2,1,3,1,0,3,3,3,5,3,3,3,5,4,4,2,3,3,4,3,3,3,2,1,0,3,2,1,4,3),
+(0,4,0,5,0,4,0,3,0,3,5,5,3,2,4,3,4,0,5,4,4,1,4,4,4,3,3,3,4,3,5,5,2,3,3,4,1,2,5,5,3,5,5,2,3,5,5,4,0,3,2,0,3,3,1,1,5,1,4,1,0,4,3,2,3,5,0,4,0,3,0,5,4,3,4,3,0,0,4,1,0,4,4),
+(1,3,0,4,0,2,0,2,0,2,5,5,3,3,3,3,3,0,4,2,3,4,4,4,3,4,0,0,3,4,5,4,3,3,3,3,2,5,5,4,5,5,5,4,3,5,5,5,1,3,1,0,1,0,0,3,2,0,4,2,0,5,2,3,2,4,1,3,0,3,0,4,5,4,5,4,3,0,4,2,0,5,4),
+(0,3,0,4,0,5,0,3,0,3,4,4,3,2,3,2,3,3,3,3,3,2,4,3,3,2,2,0,3,3,3,3,3,1,3,3,3,0,4,4,3,4,4,1,1,4,4,2,0,3,1,0,1,1,0,4,1,0,2,3,1,3,3,1,3,4,0,3,0,1,0,3,1,3,0,0,1,0,2,0,0,4,4),
+(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0),
+(0,3,0,3,0,2,0,3,0,1,5,4,3,3,3,1,4,2,1,2,3,4,4,2,4,4,5,0,3,1,4,3,4,0,4,3,3,3,2,3,2,5,3,4,3,2,2,3,0,0,3,0,2,1,0,1,2,0,0,0,0,2,1,1,3,1,0,2,0,4,0,3,4,4,4,5,2,0,2,0,0,1,3),
+(0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,1,1,0,0,1,1,0,0,0,4,2,1,1,0,1,0,3,2,0,0,3,1,1,1,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,1,0,0,0,2,0,0,0,1,4,0,4,2,1,0,0,0,0,0,1),
+(0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,1,0,0,0,0,0,0,1,0,1,0,0,0,0,3,1,0,0,0,2,0,2,1,0,0,1,2,1,0,1,1,0,0,3,0,0,0,0,0,0,0,0,0,0,0,1,3,1,0,0,0,0,0,1,0,0,2,1,0,0,0,0,0,0,0,0,2),
+(0,4,0,4,0,4,0,3,0,4,4,3,4,2,4,3,2,0,4,4,4,3,5,3,5,3,3,2,4,2,4,3,4,3,1,4,0,2,3,4,4,4,3,3,3,4,4,4,3,4,1,3,4,3,2,1,2,1,3,3,3,4,4,3,3,5,0,4,0,3,0,4,3,3,3,2,1,0,3,0,0,3,3),
+(0,4,0,3,0,3,0,3,0,3,5,5,3,3,3,3,4,3,4,3,3,3,4,4,4,3,3,3,3,4,3,5,3,3,1,3,2,4,5,5,5,5,4,3,4,5,5,3,2,2,3,3,3,3,2,3,3,1,2,3,2,4,3,3,3,4,0,4,0,2,0,4,3,2,2,1,2,0,3,0,0,4,1),
+)
+
+class JapaneseContextAnalysis:
+    def __init__(self):
+        self.reset()
+
+    def reset(self):
+        self._mTotalRel = 0  # total sequence received
+        # category counters, each interger counts sequence in its category
+        self._mRelSample = [0] * NUM_OF_CATEGORY
+        # if last byte in current buffer is not the last byte of a character,
+        # we need to know how many bytes to skip in next buffer
+        self._mNeedToSkipCharNum = 0
+        self._mLastCharOrder = -1  # The order of previous char
+        # If this flag is set to True, detection is done and conclusion has
+        # been made
+        self._mDone = False
+
+    def feed(self, aBuf, aLen):
+        if self._mDone:
+            return
+
+        # The buffer we got is byte oriented, and a character may span in more than one
+        # buffers. In case the last one or two byte in last buffer is not
+        # complete, we record how many byte needed to complete that character
+        # and skip these bytes here.  We can choose to record those bytes as
+        # well and analyse the character once it is complete, but since a
+        # character will not make much difference, by simply skipping
+        # this character will simply our logic and improve performance.
+        i = self._mNeedToSkipCharNum
+        while i < aLen:
+            order, charLen = self.get_order(aBuf[i:i + 2])
+            i += charLen
+            if i > aLen:
+                self._mNeedToSkipCharNum = i - aLen
+                self._mLastCharOrder = -1
+            else:
+                if (order != -1) and (self._mLastCharOrder != -1):
+                    self._mTotalRel += 1
+                    if self._mTotalRel > MAX_REL_THRESHOLD:
+                        self._mDone = True
+                        break
+                    self._mRelSample[jp2CharContext[self._mLastCharOrder][order]] += 1
+                self._mLastCharOrder = order
+
+    def got_enough_data(self):
+        return self._mTotalRel > ENOUGH_REL_THRESHOLD
+
+    def get_confidence(self):
+        # This is just one way to calculate confidence. It works well for me.
+        if self._mTotalRel > MINIMUM_DATA_THRESHOLD:
+            return (self._mTotalRel - self._mRelSample[0]) / self._mTotalRel
+        else:
+            return DONT_KNOW
+
+    def get_order(self, aBuf):
+        return -1, 1
+
+class SJISContextAnalysis(JapaneseContextAnalysis):
+    def get_order(self, aBuf):
+        if not aBuf:
+            return -1, 1
+        # find out current char's byte length
+        first_char = wrap_ord(aBuf[0])
+        if ((0x81 <= first_char <= 0x9F) or (0xE0 <= first_char <= 0xFC)):
+            charLen = 2
+        else:
+            charLen = 1
+
+        # return its order if it is hiragana
+        if len(aBuf) > 1:
+            second_char = wrap_ord(aBuf[1])
+            if (first_char == 202) and (0x9F <= second_char <= 0xF1):
+                return second_char - 0x9F, charLen
+
+        return -1, charLen
+
+class EUCJPContextAnalysis(JapaneseContextAnalysis):
+    def get_order(self, aBuf):
+        if not aBuf:
+            return -1, 1
+        # find out current char's byte length
+        first_char = wrap_ord(aBuf[0])
+        if (first_char == 0x8E) or (0xA1 <= first_char <= 0xFE):
+            charLen = 2
+        elif first_char == 0x8F:
+            charLen = 3
+        else:
+            charLen = 1
+
+        # return its order if it is hiragana
+        if len(aBuf) > 1:
+            second_char = wrap_ord(aBuf[1])
+            if (first_char == 0xA4) and (0xA1 <= second_char <= 0xF3):
+                return second_char - 0xA1, charLen
+
+        return -1, charLen
+
+# flake8: noqa
-######################## BEGIN LICENSE BLOCK ########################\r
-# The Original Code is Mozilla Communicator client code.\r
-#\r
-# The Initial Developer of the Original Code is\r
-# Netscape Communications Corporation.\r
-# Portions created by the Initial Developer are Copyright (C) 1998\r
-# the Initial Developer. All Rights Reserved.\r
-#\r
-# Contributor(s):\r
-#   Mark Pilgrim - port to Python\r
-#\r
-# This library is free software; you can redistribute it and/or\r
-# modify it under the terms of the GNU Lesser General Public\r
-# License as published by the Free Software Foundation; either\r
-# version 2.1 of the License, or (at your option) any later version.\r
-#\r
-# This library is distributed in the hope that it will be useful,\r
-# but WITHOUT ANY WARRANTY; without even the implied warranty of\r
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU\r
-# Lesser General Public License for more details.\r
-#\r
-# You should have received a copy of the GNU Lesser General Public\r
-# License along with this library; if not, write to the Free Software\r
-# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA\r
-# 02110-1301  USA\r
-######################### END LICENSE BLOCK #########################\r
-\r
-# 255: Control characters that usually does not exist in any text\r
-# 254: Carriage/Return\r
-# 253: symbol (punctuation) that does not belong to word\r
-# 252: 0 - 9\r
-\r
-# Character Mapping Table:\r
-# this table is modified base on win1251BulgarianCharToOrderMap, so\r
-# only number <64 is sure valid\r
-\r
-Latin5_BulgarianCharToOrderMap = (\r
-255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255,  # 00\r
-255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,  # 10\r
-253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,  # 20\r
-252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253,  # 30\r
-253, 77, 90, 99,100, 72,109,107,101, 79,185, 81,102, 76, 94, 82,  # 40\r
-110,186,108, 91, 74,119, 84, 96,111,187,115,253,253,253,253,253,  # 50\r
-253, 65, 69, 70, 66, 63, 68,112,103, 92,194,104, 95, 86, 87, 71,  # 60\r
-116,195, 85, 93, 97,113,196,197,198,199,200,253,253,253,253,253,  # 70\r
-194,195,196,197,198,199,200,201,202,203,204,205,206,207,208,209,  # 80\r
-210,211,212,213,214,215,216,217,218,219,220,221,222,223,224,225,  # 90\r
- 81,226,227,228,229,230,105,231,232,233,234,235,236, 45,237,238,  # a0\r
- 31, 32, 35, 43, 37, 44, 55, 47, 40, 59, 33, 46, 38, 36, 41, 30,  # b0\r
- 39, 28, 34, 51, 48, 49, 53, 50, 54, 57, 61,239, 67,240, 60, 56,  # c0\r
-  1, 18,  9, 20, 11,  3, 23, 15,  2, 26, 12, 10, 14,  6,  4, 13,  # d0\r
-  7,  8,  5, 19, 29, 25, 22, 21, 27, 24, 17, 75, 52,241, 42, 16,  # e0\r
- 62,242,243,244, 58,245, 98,246,247,248,249,250,251, 91,252,253,  # f0\r
-)\r
-\r
-win1251BulgarianCharToOrderMap = (\r
-255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255,  # 00\r
-255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,  # 10\r
-253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,  # 20\r
-252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253,  # 30\r
-253, 77, 90, 99,100, 72,109,107,101, 79,185, 81,102, 76, 94, 82,  # 40\r
-110,186,108, 91, 74,119, 84, 96,111,187,115,253,253,253,253,253,  # 50\r
-253, 65, 69, 70, 66, 63, 68,112,103, 92,194,104, 95, 86, 87, 71,  # 60\r
-116,195, 85, 93, 97,113,196,197,198,199,200,253,253,253,253,253,  # 70\r
-206,207,208,209,210,211,212,213,120,214,215,216,217,218,219,220,  # 80\r
-221, 78, 64, 83,121, 98,117,105,222,223,224,225,226,227,228,229,  # 90\r
- 88,230,231,232,233,122, 89,106,234,235,236,237,238, 45,239,240,  # a0\r
- 73, 80,118,114,241,242,243,244,245, 62, 58,246,247,248,249,250,  # b0\r
- 31, 32, 35, 43, 37, 44, 55, 47, 40, 59, 33, 46, 38, 36, 41, 30,  # c0\r
- 39, 28, 34, 51, 48, 49, 53, 50, 54, 57, 61,251, 67,252, 60, 56,  # d0\r
-  1, 18,  9, 20, 11,  3, 23, 15,  2, 26, 12, 10, 14,  6,  4, 13,  # e0\r
-  7,  8,  5, 19, 29, 25, 22, 21, 27, 24, 17, 75, 52,253, 42, 16,  # f0\r
-)\r
-\r
-# Model Table:\r
-# total sequences: 100%\r
-# first 512 sequences: 96.9392%\r
-# first 1024 sequences:3.0618%\r
-# rest  sequences:     0.2992%\r
-# negative sequences:  0.0020%\r
-BulgarianLangModel = (\r
-0,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,2,3,3,3,3,3,\r
-3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,3,3,2,2,3,2,2,1,2,2,\r
-3,1,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,0,3,3,3,3,3,3,3,3,3,3,0,3,0,1,\r
-0,0,0,0,0,0,0,0,0,0,1,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,\r
-3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,2,3,3,3,3,3,3,3,3,0,3,1,0,\r
-0,1,0,0,0,0,0,0,0,0,1,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,\r
-3,2,2,2,3,3,3,3,3,3,3,3,3,3,3,3,3,1,3,2,3,3,3,3,3,3,3,3,0,3,0,0,\r
-0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-3,2,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,1,3,2,3,3,3,3,3,3,3,3,0,3,0,0,\r
-0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-3,3,3,3,3,3,3,3,3,3,3,2,3,2,2,1,3,3,3,3,2,2,2,1,1,2,0,1,0,1,0,0,\r
-0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,1,\r
-3,3,3,3,3,3,3,2,3,2,2,3,3,1,1,2,3,3,2,3,3,3,3,2,1,2,0,2,0,3,0,0,\r
-0,0,0,0,0,0,0,1,0,0,2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,1,\r
-3,3,3,3,3,3,3,1,3,3,3,3,3,2,3,2,3,3,3,3,3,2,3,3,1,3,0,3,0,2,0,0,\r
-0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,\r
-3,3,3,3,3,3,3,3,1,3,3,2,3,3,3,1,3,3,2,3,2,2,2,0,0,2,0,2,0,2,0,0,\r
-0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,1,\r
-3,3,3,3,3,3,3,3,3,0,3,3,3,2,2,3,3,3,1,2,2,3,2,1,1,2,0,2,0,0,0,0,\r
-1,0,0,0,0,0,0,0,0,0,2,0,0,1,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,\r
-3,3,3,3,3,3,3,2,3,3,1,2,3,2,2,2,3,3,3,3,3,2,2,3,1,2,0,2,1,2,0,0,\r
-0,0,0,0,0,0,0,0,0,0,3,0,0,1,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,1,\r
-3,3,3,3,3,1,3,3,3,3,3,2,3,3,3,2,3,3,2,3,2,2,2,3,1,2,0,1,0,1,0,0,\r
-0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,\r
-3,3,3,3,3,3,3,3,3,3,3,1,1,1,2,2,1,3,1,3,2,2,3,0,0,1,0,1,0,1,0,0,\r
-0,0,0,1,0,0,0,0,1,0,2,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,\r
-3,3,3,3,3,2,2,3,2,2,3,1,2,1,1,1,2,3,1,3,1,2,2,0,1,1,1,1,0,1,0,0,\r
-0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,\r
-3,3,3,3,3,1,3,2,2,3,3,1,2,3,1,1,3,3,3,3,1,2,2,1,1,1,0,2,0,2,0,1,\r
-0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,\r
-3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,1,2,2,3,3,3,2,2,1,1,2,0,2,0,1,0,0,\r
-0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,\r
-3,0,1,2,1,3,3,2,3,3,3,3,3,2,3,2,1,0,3,1,2,1,2,1,2,3,2,1,0,1,0,0,\r
-0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-1,1,1,2,3,3,3,3,3,3,3,3,3,3,3,3,0,0,3,1,3,3,2,3,3,2,2,2,0,1,0,0,\r
-0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-2,3,3,3,3,0,3,3,3,3,3,2,1,1,2,1,3,3,0,3,1,1,1,1,3,2,0,1,0,0,0,0,\r
-0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,\r
-3,3,2,2,2,3,3,3,3,3,3,3,3,3,3,3,1,1,3,1,3,3,2,3,2,2,2,3,0,2,0,0,\r
-0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-3,3,3,3,3,2,3,3,2,2,3,2,1,1,1,1,1,3,1,3,1,1,0,0,0,1,0,0,0,1,0,0,\r
-0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,\r
-3,3,3,3,3,2,3,2,0,3,2,0,3,0,2,0,0,2,1,3,1,0,0,1,0,0,0,1,0,0,0,0,\r
-0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,\r
-3,3,3,3,2,1,1,1,1,2,1,1,2,1,1,1,2,2,1,2,1,1,1,0,1,1,0,1,0,1,0,0,\r
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,\r
-3,3,3,3,2,1,3,1,1,2,1,3,2,1,1,0,1,2,3,2,1,1,1,0,0,0,0,0,0,0,0,0,\r
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-2,3,3,3,3,2,2,1,0,1,0,0,1,0,0,0,2,1,0,3,0,0,1,0,0,0,0,0,0,0,0,0,\r
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,\r
-3,3,3,2,3,2,3,3,1,3,2,1,1,1,2,1,1,2,1,3,0,1,0,0,0,1,0,0,0,0,0,0,\r
-0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-3,1,1,2,2,3,3,2,3,2,2,2,3,1,2,2,1,1,2,1,1,2,2,0,1,1,0,1,0,2,0,0,\r
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-3,3,3,3,2,1,3,1,0,2,2,1,3,2,1,0,0,2,0,2,0,1,0,0,0,0,0,0,0,1,0,0,\r
-0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,\r
-3,3,3,3,3,3,1,2,0,2,3,1,2,3,2,0,1,3,1,2,1,1,1,0,0,1,0,0,2,2,2,3,\r
-2,2,2,2,1,2,1,1,2,2,1,1,2,0,1,1,1,0,0,1,1,0,0,1,1,0,0,0,1,1,0,1,\r
-3,3,3,3,3,2,1,2,2,1,2,0,2,0,1,0,1,2,1,2,1,1,0,0,0,1,0,1,0,0,0,0,\r
-0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,1,\r
-3,3,2,3,3,1,1,3,1,0,3,2,1,0,0,0,1,2,0,2,0,1,0,0,0,1,0,1,2,1,2,2,\r
-1,1,1,1,1,1,1,2,2,2,1,1,1,1,1,1,1,0,1,2,1,1,1,0,0,0,0,0,1,1,0,0,\r
-3,1,0,1,0,2,3,2,2,2,3,2,2,2,2,2,1,0,2,1,2,1,1,1,0,1,2,1,2,2,2,1,\r
-1,1,2,2,2,2,1,2,1,1,0,1,2,1,2,2,2,1,1,1,0,1,1,1,1,2,0,1,0,0,0,0,\r
-2,3,2,3,3,0,0,2,1,0,2,1,0,0,0,0,2,3,0,2,0,0,0,0,0,1,0,0,2,0,1,2,\r
-2,1,2,1,2,2,1,1,1,2,1,1,1,0,1,2,2,1,1,1,1,1,0,1,1,1,0,0,1,2,0,0,\r
-3,3,2,2,3,0,2,3,1,1,2,0,0,0,1,0,0,2,0,2,0,0,0,1,0,1,0,1,2,0,2,2,\r
-1,1,1,1,2,1,0,1,2,2,2,1,1,1,1,1,1,1,0,1,1,1,0,0,0,0,0,0,1,1,0,0,\r
-2,3,2,3,3,0,0,3,0,1,1,0,1,0,0,0,2,2,1,2,0,0,0,0,0,0,0,0,2,0,1,2,\r
-2,2,1,1,1,1,1,2,2,2,1,0,2,0,1,0,1,0,0,1,0,1,0,0,1,0,0,0,0,1,0,0,\r
-3,3,3,3,2,2,2,2,2,0,2,1,1,1,1,2,1,2,1,1,0,2,0,1,0,1,0,0,2,0,1,2,\r
-1,1,1,1,1,1,1,2,2,1,1,0,2,0,1,0,2,0,0,1,1,1,0,0,2,0,0,0,1,1,0,0,\r
-2,3,3,3,3,1,0,0,0,0,0,0,0,0,0,0,2,0,0,1,1,0,0,0,0,0,0,1,2,0,1,2,\r
-2,2,2,1,1,2,1,1,2,2,2,1,2,0,1,1,1,1,1,1,0,1,1,1,1,0,0,1,1,1,0,0,\r
-2,3,3,3,3,0,2,2,0,2,1,0,0,0,1,1,1,2,0,2,0,0,0,3,0,0,0,0,2,0,2,2,\r
-1,1,1,2,1,2,1,1,2,2,2,1,2,0,1,1,1,0,1,1,1,1,0,2,1,0,0,0,1,1,0,0,\r
-2,3,3,3,3,0,2,1,0,0,2,0,0,0,0,0,1,2,0,2,0,0,0,0,0,0,0,0,2,0,1,2,\r
-1,1,1,2,1,1,1,1,2,2,2,0,1,0,1,1,1,0,0,1,1,1,0,0,1,0,0,0,0,1,0,0,\r
-3,3,2,2,3,0,1,0,1,0,0,0,0,0,0,0,1,1,0,3,0,0,0,0,0,0,0,0,1,0,2,2,\r
-1,1,1,1,1,2,1,1,2,2,1,2,2,1,0,1,1,1,1,1,0,1,0,0,1,0,0,0,1,1,0,0,\r
-3,1,0,1,0,2,2,2,2,3,2,1,1,1,2,3,0,0,1,0,2,1,1,0,1,1,1,1,2,1,1,1,\r
-1,2,2,1,2,1,2,2,1,1,0,1,2,1,2,2,1,1,1,0,0,1,1,1,2,1,0,1,0,0,0,0,\r
-2,1,0,1,0,3,1,2,2,2,2,1,2,2,1,1,1,0,2,1,2,2,1,1,2,1,1,0,2,1,1,1,\r
-1,2,2,2,2,2,2,2,1,2,0,1,1,0,2,1,1,1,1,1,0,0,1,1,1,1,0,1,0,0,0,0,\r
-2,1,1,1,1,2,2,2,2,1,2,2,2,1,2,2,1,1,2,1,2,3,2,2,1,1,1,1,0,1,0,0,\r
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-2,2,2,3,2,0,1,2,0,1,2,1,1,0,1,0,1,2,1,2,0,0,0,1,1,0,0,0,1,0,0,2,\r
-1,1,0,0,1,1,0,1,1,1,1,0,2,0,1,1,1,0,0,1,1,0,0,0,0,1,0,0,0,1,0,0,\r
-2,0,0,0,0,1,2,2,2,2,2,2,2,1,2,1,1,1,1,1,1,1,0,1,1,1,1,1,2,1,1,1,\r
-1,2,2,2,2,1,1,2,1,2,1,1,1,0,2,1,2,1,1,1,0,2,1,1,1,1,0,1,0,0,0,0,\r
-3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,\r
-1,1,0,1,0,1,1,1,1,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-2,2,2,3,2,0,0,0,0,1,0,0,0,0,0,0,1,1,0,2,0,0,0,0,0,0,0,0,1,0,1,2,\r
-1,1,1,1,1,1,0,0,2,2,2,2,2,0,1,1,0,1,1,1,1,1,0,0,1,0,0,0,1,1,0,1,\r
-2,3,1,2,1,0,1,1,0,2,2,2,0,0,1,0,0,1,1,1,1,0,0,0,0,0,0,0,1,0,1,2,\r
-1,1,1,1,2,1,1,1,1,1,1,1,1,0,1,1,0,1,0,1,0,1,0,0,1,0,0,0,0,1,0,0,\r
-2,2,2,2,2,0,0,2,0,0,2,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,2,0,2,2,\r
-1,1,1,1,1,0,0,1,2,1,1,0,1,0,1,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,\r
-1,2,2,2,2,0,0,2,0,1,1,0,0,0,1,0,0,2,0,2,0,0,0,0,0,0,0,0,0,0,1,1,\r
-0,0,0,1,1,1,1,1,1,1,1,1,1,0,1,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,\r
-1,2,2,3,2,0,0,1,0,0,1,0,0,0,0,0,0,1,0,2,0,0,0,1,0,0,0,0,0,0,0,2,\r
-1,1,0,0,1,0,0,0,1,1,0,0,1,0,1,1,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,\r
-2,1,2,2,2,1,2,1,2,2,1,1,2,1,1,1,0,1,1,1,1,2,0,1,0,1,1,1,1,0,1,1,\r
-1,1,2,1,1,1,1,1,1,0,0,1,2,1,1,1,1,1,1,0,0,1,1,1,0,0,0,0,0,0,0,0,\r
-1,0,0,1,3,1,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,\r
-0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-2,2,2,2,1,0,0,1,0,2,0,0,0,0,0,1,1,1,0,1,0,0,0,0,0,0,0,0,2,0,0,1,\r
-0,2,0,1,0,0,1,1,2,0,1,0,1,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,\r
-1,2,2,2,2,0,1,1,0,2,1,0,1,1,1,0,0,1,0,2,0,1,0,0,0,0,0,0,0,0,0,1,\r
-0,1,0,0,1,0,0,0,1,1,0,0,1,0,0,1,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,\r
-2,2,2,2,2,0,0,1,0,0,0,1,0,1,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,1,\r
-0,1,0,1,1,1,0,0,1,1,1,0,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,\r
-2,0,1,0,0,1,2,1,1,1,1,1,1,2,2,1,0,0,1,0,1,0,0,0,0,1,1,1,1,0,0,0,\r
-1,1,2,1,1,1,1,0,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-2,2,1,2,1,0,0,1,0,0,0,0,0,0,0,0,1,1,0,1,0,0,0,0,0,0,0,0,0,0,0,1,\r
-0,0,0,0,0,0,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-1,0,0,1,2,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,\r
-0,1,1,0,1,1,1,0,0,1,0,0,1,0,1,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,\r
-1,0,1,0,0,1,1,1,1,1,1,1,1,1,1,1,0,0,1,0,2,0,0,2,0,1,0,0,1,0,0,1,\r
-1,1,0,0,1,1,0,1,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,\r
-0,0,0,0,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,\r
-1,1,1,1,1,1,1,2,0,0,0,0,0,0,2,1,0,1,1,0,0,1,1,1,0,1,0,0,0,0,0,0,\r
-2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-1,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,0,1,1,0,1,1,1,1,1,0,1,0,0,\r
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,\r
-)\r
-\r
-Latin5BulgarianModel = {\r
-  'charToOrderMap': Latin5_BulgarianCharToOrderMap,\r
-  'precedenceMatrix': BulgarianLangModel,\r
-  'mTypicalPositiveRatio': 0.969392,\r
-  'keepEnglishLetter': False,\r
-  'charsetName': "ISO-8859-5"\r
-}\r
-\r
-Win1251BulgarianModel = {\r
-  'charToOrderMap': win1251BulgarianCharToOrderMap,\r
-  'precedenceMatrix': BulgarianLangModel,\r
-  'mTypicalPositiveRatio': 0.969392,\r
-  'keepEnglishLetter': False,\r
-  'charsetName': "windows-1251"\r
-}\r
-\r
-\r
-# flake8: noqa\r
+######################## BEGIN LICENSE BLOCK ########################
+# The Original Code is Mozilla Communicator client code.
+#
+# The Initial Developer of the Original Code is
+# Netscape Communications Corporation.
+# Portions created by the Initial Developer are Copyright (C) 1998
+# the Initial Developer. All Rights Reserved.
+#
+# Contributor(s):
+#   Mark Pilgrim - port to Python
+#
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
+# 02110-1301  USA
+######################### END LICENSE BLOCK #########################
+
+# 255: Control characters that usually does not exist in any text
+# 254: Carriage/Return
+# 253: symbol (punctuation) that does not belong to word
+# 252: 0 - 9
+
+# Character Mapping Table:
+# this table is modified base on win1251BulgarianCharToOrderMap, so
+# only number <64 is sure valid
+
+Latin5_BulgarianCharToOrderMap = (
+255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255,  # 00
+255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,  # 10
+253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,  # 20
+252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253,  # 30
+253, 77, 90, 99,100, 72,109,107,101, 79,185, 81,102, 76, 94, 82,  # 40
+110,186,108, 91, 74,119, 84, 96,111,187,115,253,253,253,253,253,  # 50
+253, 65, 69, 70, 66, 63, 68,112,103, 92,194,104, 95, 86, 87, 71,  # 60
+116,195, 85, 93, 97,113,196,197,198,199,200,253,253,253,253,253,  # 70
+194,195,196,197,198,199,200,201,202,203,204,205,206,207,208,209,  # 80
+210,211,212,213,214,215,216,217,218,219,220,221,222,223,224,225,  # 90
+ 81,226,227,228,229,230,105,231,232,233,234,235,236, 45,237,238,  # a0
+ 31, 32, 35, 43, 37, 44, 55, 47, 40, 59, 33, 46, 38, 36, 41, 30,  # b0
+ 39, 28, 34, 51, 48, 49, 53, 50, 54, 57, 61,239, 67,240, 60, 56,  # c0
+  1, 18,  9, 20, 11,  3, 23, 15,  2, 26, 12, 10, 14,  6,  4, 13,  # d0
+  7,  8,  5, 19, 29, 25, 22, 21, 27, 24, 17, 75, 52,241, 42, 16,  # e0
+ 62,242,243,244, 58,245, 98,246,247,248,249,250,251, 91,252,253,  # f0
+)
+
+win1251BulgarianCharToOrderMap = (
+255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255,  # 00
+255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,  # 10
+253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,  # 20
+252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253,  # 30
+253, 77, 90, 99,100, 72,109,107,101, 79,185, 81,102, 76, 94, 82,  # 40
+110,186,108, 91, 74,119, 84, 96,111,187,115,253,253,253,253,253,  # 50
+253, 65, 69, 70, 66, 63, 68,112,103, 92,194,104, 95, 86, 87, 71,  # 60
+116,195, 85, 93, 97,113,196,197,198,199,200,253,253,253,253,253,  # 70
+206,207,208,209,210,211,212,213,120,214,215,216,217,218,219,220,  # 80
+221, 78, 64, 83,121, 98,117,105,222,223,224,225,226,227,228,229,  # 90
+ 88,230,231,232,233,122, 89,106,234,235,236,237,238, 45,239,240,  # a0
+ 73, 80,118,114,241,242,243,244,245, 62, 58,246,247,248,249,250,  # b0
+ 31, 32, 35, 43, 37, 44, 55, 47, 40, 59, 33, 46, 38, 36, 41, 30,  # c0
+ 39, 28, 34, 51, 48, 49, 53, 50, 54, 57, 61,251, 67,252, 60, 56,  # d0
+  1, 18,  9, 20, 11,  3, 23, 15,  2, 26, 12, 10, 14,  6,  4, 13,  # e0
+  7,  8,  5, 19, 29, 25, 22, 21, 27, 24, 17, 75, 52,253, 42, 16,  # f0
+)
+
+# Model Table:
+# total sequences: 100%
+# first 512 sequences: 96.9392%
+# first 1024 sequences:3.0618%
+# rest  sequences:     0.2992%
+# negative sequences:  0.0020%
+BulgarianLangModel = (
+0,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,2,3,3,3,3,3,
+3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,3,3,2,2,3,2,2,1,2,2,
+3,1,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,0,3,3,3,3,3,3,3,3,3,3,0,3,0,1,
+0,0,0,0,0,0,0,0,0,0,1,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,
+3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,2,3,3,3,3,3,3,3,3,0,3,1,0,
+0,1,0,0,0,0,0,0,0,0,1,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,
+3,2,2,2,3,3,3,3,3,3,3,3,3,3,3,3,3,1,3,2,3,3,3,3,3,3,3,3,0,3,0,0,
+0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+3,2,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,1,3,2,3,3,3,3,3,3,3,3,0,3,0,0,
+0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+3,3,3,3,3,3,3,3,3,3,3,2,3,2,2,1,3,3,3,3,2,2,2,1,1,2,0,1,0,1,0,0,
+0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,1,
+3,3,3,3,3,3,3,2,3,2,2,3,3,1,1,2,3,3,2,3,3,3,3,2,1,2,0,2,0,3,0,0,
+0,0,0,0,0,0,0,1,0,0,2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,1,
+3,3,3,3,3,3,3,1,3,3,3,3,3,2,3,2,3,3,3,3,3,2,3,3,1,3,0,3,0,2,0,0,
+0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,
+3,3,3,3,3,3,3,3,1,3,3,2,3,3,3,1,3,3,2,3,2,2,2,0,0,2,0,2,0,2,0,0,
+0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,1,
+3,3,3,3,3,3,3,3,3,0,3,3,3,2,2,3,3,3,1,2,2,3,2,1,1,2,0,2,0,0,0,0,
+1,0,0,0,0,0,0,0,0,0,2,0,0,1,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,
+3,3,3,3,3,3,3,2,3,3,1,2,3,2,2,2,3,3,3,3,3,2,2,3,1,2,0,2,1,2,0,0,
+0,0,0,0,0,0,0,0,0,0,3,0,0,1,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,1,
+3,3,3,3,3,1,3,3,3,3,3,2,3,3,3,2,3,3,2,3,2,2,2,3,1,2,0,1,0,1,0,0,
+0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,
+3,3,3,3,3,3,3,3,3,3,3,1,1,1,2,2,1,3,1,3,2,2,3,0,0,1,0,1,0,1,0,0,
+0,0,0,1,0,0,0,0,1,0,2,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,
+3,3,3,3,3,2,2,3,2,2,3,1,2,1,1,1,2,3,1,3,1,2,2,0,1,1,1,1,0,1,0,0,
+0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,
+3,3,3,3,3,1,3,2,2,3,3,1,2,3,1,1,3,3,3,3,1,2,2,1,1,1,0,2,0,2,0,1,
+0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,
+3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,1,2,2,3,3,3,2,2,1,1,2,0,2,0,1,0,0,
+0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,
+3,0,1,2,1,3,3,2,3,3,3,3,3,2,3,2,1,0,3,1,2,1,2,1,2,3,2,1,0,1,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+1,1,1,2,3,3,3,3,3,3,3,3,3,3,3,3,0,0,3,1,3,3,2,3,3,2,2,2,0,1,0,0,
+0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+2,3,3,3,3,0,3,3,3,3,3,2,1,1,2,1,3,3,0,3,1,1,1,1,3,2,0,1,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,
+3,3,2,2,2,3,3,3,3,3,3,3,3,3,3,3,1,1,3,1,3,3,2,3,2,2,2,3,0,2,0,0,
+0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+3,3,3,3,3,2,3,3,2,2,3,2,1,1,1,1,1,3,1,3,1,1,0,0,0,1,0,0,0,1,0,0,
+0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,
+3,3,3,3,3,2,3,2,0,3,2,0,3,0,2,0,0,2,1,3,1,0,0,1,0,0,0,1,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,
+3,3,3,3,2,1,1,1,1,2,1,1,2,1,1,1,2,2,1,2,1,1,1,0,1,1,0,1,0,1,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,
+3,3,3,3,2,1,3,1,1,2,1,3,2,1,1,0,1,2,3,2,1,1,1,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+2,3,3,3,3,2,2,1,0,1,0,0,1,0,0,0,2,1,0,3,0,0,1,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,
+3,3,3,2,3,2,3,3,1,3,2,1,1,1,2,1,1,2,1,3,0,1,0,0,0,1,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+3,1,1,2,2,3,3,2,3,2,2,2,3,1,2,2,1,1,2,1,1,2,2,0,1,1,0,1,0,2,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+3,3,3,3,2,1,3,1,0,2,2,1,3,2,1,0,0,2,0,2,0,1,0,0,0,0,0,0,0,1,0,0,
+0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,
+3,3,3,3,3,3,1,2,0,2,3,1,2,3,2,0,1,3,1,2,1,1,1,0,0,1,0,0,2,2,2,3,
+2,2,2,2,1,2,1,1,2,2,1,1,2,0,1,1,1,0,0,1,1,0,0,1,1,0,0,0,1,1,0,1,
+3,3,3,3,3,2,1,2,2,1,2,0,2,0,1,0,1,2,1,2,1,1,0,0,0,1,0,1,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,1,
+3,3,2,3,3,1,1,3,1,0,3,2,1,0,0,0,1,2,0,2,0,1,0,0,0,1,0,1,2,1,2,2,
+1,1,1,1,1,1,1,2,2,2,1,1,1,1,1,1,1,0,1,2,1,1,1,0,0,0,0,0,1,1,0,0,
+3,1,0,1,0,2,3,2,2,2,3,2,2,2,2,2,1,0,2,1,2,1,1,1,0,1,2,1,2,2,2,1,
+1,1,2,2,2,2,1,2,1,1,0,1,2,1,2,2,2,1,1,1,0,1,1,1,1,2,0,1,0,0,0,0,
+2,3,2,3,3,0,0,2,1,0,2,1,0,0,0,0,2,3,0,2,0,0,0,0,0,1,0,0,2,0,1,2,
+2,1,2,1,2,2,1,1,1,2,1,1,1,0,1,2,2,1,1,1,1,1,0,1,1,1,0,0,1,2,0,0,
+3,3,2,2,3,0,2,3,1,1,2,0,0,0,1,0,0,2,0,2,0,0,0,1,0,1,0,1,2,0,2,2,
+1,1,1,1,2,1,0,1,2,2,2,1,1,1,1,1,1,1,0,1,1,1,0,0,0,0,0,0,1,1,0,0,
+2,3,2,3,3,0,0,3,0,1,1,0,1,0,0,0,2,2,1,2,0,0,0,0,0,0,0,0,2,0,1,2,
+2,2,1,1,1,1,1,2,2,2,1,0,2,0,1,0,1,0,0,1,0,1,0,0,1,0,0,0,0,1,0,0,
+3,3,3,3,2,2,2,2,2,0,2,1,1,1,1,2,1,2,1,1,0,2,0,1,0,1,0,0,2,0,1,2,
+1,1,1,1,1,1,1,2,2,1,1,0,2,0,1,0,2,0,0,1,1,1,0,0,2,0,0,0,1,1,0,0,
+2,3,3,3,3,1,0,0,0,0,0,0,0,0,0,0,2,0,0,1,1,0,0,0,0,0,0,1,2,0,1,2,
+2,2,2,1,1,2,1,1,2,2,2,1,2,0,1,1,1,1,1,1,0,1,1,1,1,0,0,1,1,1,0,0,
+2,3,3,3,3,0,2,2,0,2,1,0,0,0,1,1,1,2,0,2,0,0,0,3,0,0,0,0,2,0,2,2,
+1,1,1,2,1,2,1,1,2,2,2,1,2,0,1,1,1,0,1,1,1,1,0,2,1,0,0,0,1,1,0,0,
+2,3,3,3,3,0,2,1,0,0,2,0,0,0,0,0,1,2,0,2,0,0,0,0,0,0,0,0,2,0,1,2,
+1,1,1,2,1,1,1,1,2,2,2,0,1,0,1,1,1,0,0,1,1,1,0,0,1,0,0,0,0,1,0,0,
+3,3,2,2,3,0,1,0,1,0,0,0,0,0,0,0,1,1,0,3,0,0,0,0,0,0,0,0,1,0,2,2,
+1,1,1,1,1,2,1,1,2,2,1,2,2,1,0,1,1,1,1,1,0,1,0,0,1,0,0,0,1,1,0,0,
+3,1,0,1,0,2,2,2,2,3,2,1,1,1,2,3,0,0,1,0,2,1,1,0,1,1,1,1,2,1,1,1,
+1,2,2,1,2,1,2,2,1,1,0,1,2,1,2,2,1,1,1,0,0,1,1,1,2,1,0,1,0,0,0,0,
+2,1,0,1,0,3,1,2,2,2,2,1,2,2,1,1,1,0,2,1,2,2,1,1,2,1,1,0,2,1,1,1,
+1,2,2,2,2,2,2,2,1,2,0,1,1,0,2,1,1,1,1,1,0,0,1,1,1,1,0,1,0,0,0,0,
+2,1,1,1,1,2,2,2,2,1,2,2,2,1,2,2,1,1,2,1,2,3,2,2,1,1,1,1,0,1,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+2,2,2,3,2,0,1,2,0,1,2,1,1,0,1,0,1,2,1,2,0,0,0,1,1,0,0,0,1,0,0,2,
+1,1,0,0,1,1,0,1,1,1,1,0,2,0,1,1,1,0,0,1,1,0,0,0,0,1,0,0,0,1,0,0,
+2,0,0,0,0,1,2,2,2,2,2,2,2,1,2,1,1,1,1,1,1,1,0,1,1,1,1,1,2,1,1,1,
+1,2,2,2,2,1,1,2,1,2,1,1,1,0,2,1,2,1,1,1,0,2,1,1,1,1,0,1,0,0,0,0,
+3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,
+1,1,0,1,0,1,1,1,1,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+2,2,2,3,2,0,0,0,0,1,0,0,0,0,0,0,1,1,0,2,0,0,0,0,0,0,0,0,1,0,1,2,
+1,1,1,1,1,1,0,0,2,2,2,2,2,0,1,1,0,1,1,1,1,1,0,0,1,0,0,0,1,1,0,1,
+2,3,1,2,1,0,1,1,0,2,2,2,0,0,1,0,0,1,1,1,1,0,0,0,0,0,0,0,1,0,1,2,
+1,1,1,1,2,1,1,1,1,1,1,1,1,0,1,1,0,1,0,1,0,1,0,0,1,0,0,0,0,1,0,0,
+2,2,2,2,2,0,0,2,0,0,2,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,2,0,2,2,
+1,1,1,1,1,0,0,1,2,1,1,0,1,0,1,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,
+1,2,2,2,2,0,0,2,0,1,1,0,0,0,1,0,0,2,0,2,0,0,0,0,0,0,0,0,0,0,1,1,
+0,0,0,1,1,1,1,1,1,1,1,1,1,0,1,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,
+1,2,2,3,2,0,0,1,0,0,1,0,0,0,0,0,0,1,0,2,0,0,0,1,0,0,0,0,0,0,0,2,
+1,1,0,0,1,0,0,0,1,1,0,0,1,0,1,1,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,
+2,1,2,2,2,1,2,1,2,2,1,1,2,1,1,1,0,1,1,1,1,2,0,1,0,1,1,1,1,0,1,1,
+1,1,2,1,1,1,1,1,1,0,0,1,2,1,1,1,1,1,1,0,0,1,1,1,0,0,0,0,0,0,0,0,
+1,0,0,1,3,1,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+2,2,2,2,1,0,0,1,0,2,0,0,0,0,0,1,1,1,0,1,0,0,0,0,0,0,0,0,2,0,0,1,
+0,2,0,1,0,0,1,1,2,0,1,0,1,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,
+1,2,2,2,2,0,1,1,0,2,1,0,1,1,1,0,0,1,0,2,0,1,0,0,0,0,0,0,0,0,0,1,
+0,1,0,0,1,0,0,0,1,1,0,0,1,0,0,1,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,
+2,2,2,2,2,0,0,1,0,0,0,1,0,1,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,1,
+0,1,0,1,1,1,0,0,1,1,1,0,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,
+2,0,1,0,0,1,2,1,1,1,1,1,1,2,2,1,0,0,1,0,1,0,0,0,0,1,1,1,1,0,0,0,
+1,1,2,1,1,1,1,0,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+2,2,1,2,1,0,0,1,0,0,0,0,0,0,0,0,1,1,0,1,0,0,0,0,0,0,0,0,0,0,0,1,
+0,0,0,0,0,0,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+1,0,0,1,2,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,
+0,1,1,0,1,1,1,0,0,1,0,0,1,0,1,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,
+1,0,1,0,0,1,1,1,1,1,1,1,1,1,1,1,0,0,1,0,2,0,0,2,0,1,0,0,1,0,0,1,
+1,1,0,0,1,1,0,1,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,
+1,1,1,1,1,1,1,2,0,0,0,0,0,0,2,1,0,1,1,0,0,1,1,1,0,1,0,0,0,0,0,0,
+2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+1,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,0,1,1,0,1,1,1,1,1,0,1,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,
+)
+
+Latin5BulgarianModel = {
+  'charToOrderMap': Latin5_BulgarianCharToOrderMap,
+  'precedenceMatrix': BulgarianLangModel,
+  'mTypicalPositiveRatio': 0.969392,
+  'keepEnglishLetter': False,
+  'charsetName': "ISO-8859-5"
+}
+
+Win1251BulgarianModel = {
+  'charToOrderMap': win1251BulgarianCharToOrderMap,
+  'precedenceMatrix': BulgarianLangModel,
+  'mTypicalPositiveRatio': 0.969392,
+  'keepEnglishLetter': False,
+  'charsetName': "windows-1251"
+}
+
+
+# flake8: noqa
-######################## BEGIN LICENSE BLOCK ########################\r
-# The Original Code is Mozilla Communicator client code.\r
-#\r
-# The Initial Developer of the Original Code is\r
-# Netscape Communications Corporation.\r
-# Portions created by the Initial Developer are Copyright (C) 1998\r
-# the Initial Developer. All Rights Reserved.\r
-#\r
-# Contributor(s):\r
-#   Mark Pilgrim - port to Python\r
-#\r
-# This library is free software; you can redistribute it and/or\r
-# modify it under the terms of the GNU Lesser General Public\r
-# License as published by the Free Software Foundation; either\r
-# version 2.1 of the License, or (at your option) any later version.\r
-#\r
-# This library is distributed in the hope that it will be useful,\r
-# but WITHOUT ANY WARRANTY; without even the implied warranty of\r
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU\r
-# Lesser General Public License for more details.\r
-#\r
-# You should have received a copy of the GNU Lesser General Public\r
-# License along with this library; if not, write to the Free Software\r
-# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA\r
-# 02110-1301  USA\r
-######################### END LICENSE BLOCK #########################\r
-\r
-# KOI8-R language model\r
-# Character Mapping Table:\r
-KOI8R_CharToOrderMap = (\r
-255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255,  # 00\r
-255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,  # 10\r
-253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,  # 20\r
-252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253,  # 30\r
-253,142,143,144,145,146,147,148,149,150,151,152, 74,153, 75,154,  # 40\r
-155,156,157,158,159,160,161,162,163,164,165,253,253,253,253,253,  # 50\r
-253, 71,172, 66,173, 65,174, 76,175, 64,176,177, 77, 72,178, 69,  # 60\r
- 67,179, 78, 73,180,181, 79,182,183,184,185,253,253,253,253,253,  # 70\r
-191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,  # 80\r
-207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,  # 90\r
-223,224,225, 68,226,227,228,229,230,231,232,233,234,235,236,237,  # a0\r
-238,239,240,241,242,243,244,245,246,247,248,249,250,251,252,253,  # b0\r
- 27,  3, 21, 28, 13,  2, 39, 19, 26,  4, 23, 11,  8, 12,  5,  1,  # c0\r
- 15, 16,  9,  7,  6, 14, 24, 10, 17, 18, 20, 25, 30, 29, 22, 54,  # d0\r
- 59, 37, 44, 58, 41, 48, 53, 46, 55, 42, 60, 36, 49, 38, 31, 34,  # e0\r
- 35, 43, 45, 32, 40, 52, 56, 33, 61, 62, 51, 57, 47, 63, 50, 70,  # f0\r
-)\r
-\r
-win1251_CharToOrderMap = (\r
-255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255,  # 00\r
-255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,  # 10\r
-253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,  # 20\r
-252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253,  # 30\r
-253,142,143,144,145,146,147,148,149,150,151,152, 74,153, 75,154,  # 40\r
-155,156,157,158,159,160,161,162,163,164,165,253,253,253,253,253,  # 50\r
-253, 71,172, 66,173, 65,174, 76,175, 64,176,177, 77, 72,178, 69,  # 60\r
- 67,179, 78, 73,180,181, 79,182,183,184,185,253,253,253,253,253,  # 70\r
-191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,\r
-207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,\r
-223,224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,\r
-239,240,241,242,243,244,245,246, 68,247,248,249,250,251,252,253,\r
- 37, 44, 33, 46, 41, 48, 56, 51, 42, 60, 36, 49, 38, 31, 34, 35,\r
- 45, 32, 40, 52, 53, 55, 58, 50, 57, 63, 70, 62, 61, 47, 59, 43,\r
-  3, 21, 10, 19, 13,  2, 24, 20,  4, 23, 11,  8, 12,  5,  1, 15,\r
-  9,  7,  6, 14, 39, 26, 28, 22, 25, 29, 54, 18, 17, 30, 27, 16,\r
-)\r
-\r
-latin5_CharToOrderMap = (\r
-255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255,  # 00\r
-255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,  # 10\r
-253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,  # 20\r
-252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253,  # 30\r
-253,142,143,144,145,146,147,148,149,150,151,152, 74,153, 75,154,  # 40\r
-155,156,157,158,159,160,161,162,163,164,165,253,253,253,253,253,  # 50\r
-253, 71,172, 66,173, 65,174, 76,175, 64,176,177, 77, 72,178, 69,  # 60\r
- 67,179, 78, 73,180,181, 79,182,183,184,185,253,253,253,253,253,  # 70\r
-191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,\r
-207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,\r
-223,224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,\r
- 37, 44, 33, 46, 41, 48, 56, 51, 42, 60, 36, 49, 38, 31, 34, 35,\r
- 45, 32, 40, 52, 53, 55, 58, 50, 57, 63, 70, 62, 61, 47, 59, 43,\r
-  3, 21, 10, 19, 13,  2, 24, 20,  4, 23, 11,  8, 12,  5,  1, 15,\r
-  9,  7,  6, 14, 39, 26, 28, 22, 25, 29, 54, 18, 17, 30, 27, 16,\r
-239, 68,240,241,242,243,244,245,246,247,248,249,250,251,252,255,\r
-)\r
-\r
-macCyrillic_CharToOrderMap = (\r
-255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255,  # 00\r
-255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,  # 10\r
-253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,  # 20\r
-252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253,  # 30\r
-253,142,143,144,145,146,147,148,149,150,151,152, 74,153, 75,154,  # 40\r
-155,156,157,158,159,160,161,162,163,164,165,253,253,253,253,253,  # 50\r
-253, 71,172, 66,173, 65,174, 76,175, 64,176,177, 77, 72,178, 69,  # 60\r
- 67,179, 78, 73,180,181, 79,182,183,184,185,253,253,253,253,253,  # 70\r
- 37, 44, 33, 46, 41, 48, 56, 51, 42, 60, 36, 49, 38, 31, 34, 35,\r
- 45, 32, 40, 52, 53, 55, 58, 50, 57, 63, 70, 62, 61, 47, 59, 43,\r
-191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,\r
-207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,\r
-223,224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,\r
-239,240,241,242,243,244,245,246,247,248,249,250,251,252, 68, 16,\r
-  3, 21, 10, 19, 13,  2, 24, 20,  4, 23, 11,  8, 12,  5,  1, 15,\r
-  9,  7,  6, 14, 39, 26, 28, 22, 25, 29, 54, 18, 17, 30, 27,255,\r
-)\r
-\r
-IBM855_CharToOrderMap = (\r
-255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255,  # 00\r
-255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,  # 10\r
-253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,  # 20\r
-252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253,  # 30\r
-253,142,143,144,145,146,147,148,149,150,151,152, 74,153, 75,154,  # 40\r
-155,156,157,158,159,160,161,162,163,164,165,253,253,253,253,253,  # 50\r
-253, 71,172, 66,173, 65,174, 76,175, 64,176,177, 77, 72,178, 69,  # 60\r
- 67,179, 78, 73,180,181, 79,182,183,184,185,253,253,253,253,253,  # 70\r
-191,192,193,194, 68,195,196,197,198,199,200,201,202,203,204,205,\r
-206,207,208,209,210,211,212,213,214,215,216,217, 27, 59, 54, 70,\r
-  3, 37, 21, 44, 28, 58, 13, 41,  2, 48, 39, 53, 19, 46,218,219,\r
-220,221,222,223,224, 26, 55,  4, 42,225,226,227,228, 23, 60,229,\r
-230,231,232,233,234,235, 11, 36,236,237,238,239,240,241,242,243,\r
-  8, 49, 12, 38,  5, 31,  1, 34, 15,244,245,246,247, 35, 16,248,\r
- 43,  9, 45,  7, 32,  6, 40, 14, 52, 24, 56, 10, 33, 17, 61,249,\r
-250, 18, 62, 20, 51, 25, 57, 30, 47, 29, 63, 22, 50,251,252,255,\r
-)\r
-\r
-IBM866_CharToOrderMap = (\r
-255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255,  # 00\r
-255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,  # 10\r
-253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,  # 20\r
-252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253,  # 30\r
-253,142,143,144,145,146,147,148,149,150,151,152, 74,153, 75,154,  # 40\r
-155,156,157,158,159,160,161,162,163,164,165,253,253,253,253,253,  # 50\r
-253, 71,172, 66,173, 65,174, 76,175, 64,176,177, 77, 72,178, 69,  # 60\r
- 67,179, 78, 73,180,181, 79,182,183,184,185,253,253,253,253,253,  # 70\r
- 37, 44, 33, 46, 41, 48, 56, 51, 42, 60, 36, 49, 38, 31, 34, 35,\r
- 45, 32, 40, 52, 53, 55, 58, 50, 57, 63, 70, 62, 61, 47, 59, 43,\r
-  3, 21, 10, 19, 13,  2, 24, 20,  4, 23, 11,  8, 12,  5,  1, 15,\r
-191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,\r
-207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,\r
-223,224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,\r
-  9,  7,  6, 14, 39, 26, 28, 22, 25, 29, 54, 18, 17, 30, 27, 16,\r
-239, 68,240,241,242,243,244,245,246,247,248,249,250,251,252,255,\r
-)\r
-\r
-# Model Table:\r
-# total sequences: 100%\r
-# first 512 sequences: 97.6601%\r
-# first 1024 sequences: 2.3389%\r
-# rest  sequences:      0.1237%\r
-# negative sequences:   0.0009%\r
-RussianLangModel = (\r
-0,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,1,1,3,3,3,3,1,3,3,3,2,3,2,3,3,\r
-3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,2,2,2,2,2,0,0,2,\r
-3,3,3,2,3,3,3,3,3,3,3,3,3,3,2,3,3,0,0,3,3,3,3,3,3,3,3,3,2,3,2,0,\r
-0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-3,3,3,2,2,3,3,3,3,3,3,3,3,3,2,3,3,0,0,3,3,3,3,3,3,3,3,2,3,3,1,0,\r
-0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-3,2,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,0,0,3,3,3,3,3,3,3,3,3,3,3,2,1,\r
-0,0,0,0,0,0,0,2,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,0,0,3,3,3,3,3,3,3,3,3,3,3,2,1,\r
-0,0,0,0,0,1,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-3,3,3,3,3,3,3,3,2,2,2,3,1,3,3,1,3,3,3,3,2,2,3,0,2,2,2,3,3,2,1,0,\r
-0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,\r
-3,3,3,3,3,3,2,3,3,3,3,3,2,2,3,2,3,3,3,2,1,2,2,0,1,2,2,2,2,2,2,0,\r
-0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,\r
-3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,2,3,0,2,2,3,3,2,1,2,0,\r
-0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,1,0,0,2,0,0,0,0,0,0,0,0,0,\r
-3,3,3,3,3,3,2,3,3,1,2,3,2,2,3,2,3,3,3,3,2,2,3,0,3,2,2,3,1,1,1,0,\r
-0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-3,3,3,3,3,3,3,3,2,2,3,3,3,3,3,2,3,3,3,3,2,2,2,0,3,3,3,2,2,2,2,0,\r
-0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-3,3,3,3,3,3,3,3,3,3,2,3,2,3,3,3,3,3,3,2,3,2,2,0,1,3,2,1,2,2,1,0,\r
-0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,\r
-3,3,3,3,3,3,3,3,3,3,3,2,1,1,3,0,1,1,1,1,2,1,1,0,2,2,2,1,2,0,1,0,\r
-0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-3,3,3,3,3,3,2,3,3,2,2,2,2,1,3,2,3,2,3,2,1,2,2,0,1,1,2,1,2,1,2,0,\r
-0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-3,3,3,3,3,3,3,3,3,3,3,3,2,2,3,2,3,3,3,2,2,2,2,0,2,2,2,2,3,1,1,0,\r
-0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,\r
-3,2,3,2,2,3,3,3,3,3,3,3,3,3,1,3,2,0,0,3,3,3,3,2,3,3,3,3,2,3,2,0,\r
-0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-2,3,3,3,3,3,2,2,3,3,0,2,1,0,3,2,3,2,3,0,0,1,2,0,0,1,0,1,2,1,1,0,\r
-0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-3,0,3,0,2,3,3,3,3,2,3,3,3,3,1,2,2,0,0,2,3,2,2,2,3,2,3,2,2,3,0,0,\r
-0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-3,2,3,0,2,3,2,3,0,1,2,3,3,2,0,2,3,0,0,2,3,2,2,0,1,3,1,3,2,2,1,0,\r
-0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-3,1,3,0,2,3,3,3,3,3,3,3,3,2,1,3,2,0,0,2,2,3,3,3,2,3,3,0,2,2,0,0,\r
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-3,3,3,3,3,3,2,2,3,3,2,2,2,3,3,0,0,1,1,1,1,1,2,0,0,1,1,1,1,0,1,0,\r
-0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-3,3,3,3,3,3,2,2,3,3,3,3,3,3,3,0,3,2,3,3,2,3,2,0,2,1,0,1,1,0,1,0,\r
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,\r
-3,3,3,3,3,3,2,3,3,3,2,2,2,2,3,1,3,2,3,1,1,2,1,0,2,2,2,2,1,3,1,0,\r
-0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,\r
-2,2,3,3,3,3,3,1,2,2,1,3,1,0,3,0,0,3,0,0,0,1,1,0,1,2,1,0,0,0,0,0,\r
-0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-3,2,2,1,1,3,3,3,2,2,1,2,2,3,1,1,2,0,0,2,2,1,3,0,0,2,1,1,2,1,1,0,\r
-0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-3,2,3,3,3,3,1,2,2,2,1,2,1,3,3,1,1,2,1,2,1,2,2,0,2,0,0,1,1,0,1,0,\r
-0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-2,3,3,3,3,3,2,1,3,2,2,3,2,0,3,2,0,3,0,1,0,1,1,0,0,1,1,1,1,0,1,0,\r
-0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-3,3,2,3,3,3,2,2,2,3,3,1,2,1,2,1,0,1,0,1,1,0,1,0,0,2,1,1,1,0,1,0,\r
-0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,\r
-3,1,1,2,1,2,3,3,2,2,1,2,2,3,0,2,1,0,0,2,2,3,2,1,2,2,2,2,2,3,1,0,\r
-0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-3,3,3,3,3,1,1,0,1,1,2,2,1,1,3,0,0,1,3,1,1,1,0,0,0,1,0,1,1,0,0,0,\r
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-2,1,3,3,3,2,0,0,0,2,1,0,1,0,2,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-2,0,1,0,0,2,3,2,2,2,1,2,2,2,1,2,1,0,0,1,1,1,0,2,0,1,1,1,0,0,1,1,\r
-1,0,0,0,0,0,1,2,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,\r
-2,3,3,3,3,0,0,0,0,1,0,0,0,0,3,0,1,2,1,0,0,0,0,0,0,0,1,1,0,0,1,1,\r
-1,0,1,0,1,2,0,0,1,1,2,1,0,1,1,1,1,0,1,1,1,1,0,1,0,0,1,0,0,1,1,0,\r
-2,2,3,2,2,2,3,1,2,2,2,2,2,2,2,2,1,1,1,1,1,1,1,0,1,0,1,1,1,0,2,1,\r
-1,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1,1,1,0,1,0,1,1,0,1,1,1,0,1,1,0,\r
-3,3,3,2,2,2,2,3,2,2,1,1,2,2,2,2,1,1,3,1,2,1,2,0,0,1,1,0,1,0,2,1,\r
-1,1,1,1,1,2,1,0,1,1,1,1,0,1,0,0,1,1,0,0,1,0,1,0,0,1,0,0,0,1,1,0,\r
-2,0,0,1,0,3,2,2,2,2,1,2,1,2,1,2,0,0,0,2,1,2,2,1,1,2,2,0,1,1,0,2,\r
-1,1,1,1,1,0,1,1,1,2,1,1,1,2,1,0,1,2,1,1,1,1,0,1,1,1,0,0,1,0,0,1,\r
-1,3,2,2,2,1,1,1,2,3,0,0,0,0,2,0,2,2,1,0,0,0,0,0,0,1,0,0,0,0,1,1,\r
-1,0,1,1,0,1,0,1,1,0,1,1,0,2,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,1,1,0,\r
-2,3,2,3,2,1,2,2,2,2,1,0,0,0,2,0,0,1,1,0,0,0,0,0,0,0,1,1,0,0,2,1,\r
-1,1,2,1,0,2,0,0,1,0,1,0,0,1,0,0,1,1,0,1,1,0,0,0,0,0,1,0,0,0,0,0,\r
-3,0,0,1,0,2,2,2,3,2,2,2,2,2,2,2,0,0,0,2,1,2,1,1,1,2,2,0,0,0,1,2,\r
-1,1,1,1,1,0,1,2,1,1,1,1,1,1,1,0,1,1,1,1,1,1,0,1,1,1,1,1,1,0,0,1,\r
-2,3,2,3,3,2,0,1,1,1,0,0,1,0,2,0,1,1,3,1,0,0,0,0,0,0,0,1,0,0,2,1,\r
-1,1,1,1,1,1,1,0,1,0,1,1,1,1,0,1,1,1,0,0,1,1,0,1,0,0,0,0,0,0,1,0,\r
-2,3,3,3,3,1,2,2,2,2,0,1,1,0,2,1,1,1,2,1,0,1,1,0,0,1,0,1,0,0,2,0,\r
-0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-2,3,3,3,2,0,0,1,1,2,2,1,0,0,2,0,1,1,3,0,0,1,0,0,0,0,0,1,0,1,2,1,\r
-1,1,2,0,1,1,1,0,1,0,1,1,0,1,0,1,1,1,1,0,1,0,0,0,0,0,0,1,0,1,1,0,\r
-1,3,2,3,2,1,0,0,2,2,2,0,1,0,2,0,1,1,1,0,1,0,0,0,3,0,1,1,0,0,2,1,\r
-1,1,1,0,1,1,0,0,0,0,1,1,0,1,0,0,2,1,1,0,1,0,0,0,1,0,1,0,0,1,1,0,\r
-3,1,2,1,1,2,2,2,2,2,2,1,2,2,1,1,0,0,0,2,2,2,0,0,0,1,2,1,0,1,0,1,\r
-2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,2,1,1,1,0,1,0,1,1,0,1,1,1,0,0,1,\r
-3,0,0,0,0,2,0,1,1,1,1,1,1,1,0,1,0,0,0,1,1,1,0,1,0,1,1,0,0,1,0,1,\r
-1,1,0,0,1,0,0,0,1,0,1,1,0,0,1,0,1,0,1,0,0,0,0,1,0,0,0,1,0,0,0,1,\r
-1,3,3,2,2,0,0,0,2,2,0,0,0,1,2,0,1,1,2,0,0,0,0,0,0,0,0,1,0,0,2,1,\r
-0,1,1,0,0,1,1,0,0,0,1,1,0,1,1,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,1,0,\r
-2,3,2,3,2,0,0,0,0,1,1,0,0,0,2,0,2,0,2,0,0,0,0,0,1,0,0,1,0,0,1,1,\r
-1,1,2,0,1,2,1,0,1,1,2,1,1,1,1,1,2,1,1,0,1,0,0,1,1,1,1,1,0,1,1,0,\r
-1,3,2,2,2,1,0,0,2,2,1,0,1,2,2,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,\r
-0,0,1,1,0,1,1,0,0,1,1,0,1,1,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,\r
-1,0,0,1,0,2,3,1,2,2,2,2,2,2,1,1,0,0,0,1,0,1,0,2,1,1,1,0,0,0,0,1,\r
-1,1,0,1,1,0,1,1,1,1,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,\r
-2,0,2,0,0,1,0,3,2,1,2,1,2,2,0,1,0,0,0,2,1,0,0,2,1,1,1,1,0,2,0,2,\r
-2,1,1,1,1,1,1,1,1,1,1,1,1,2,1,0,1,1,1,1,0,0,0,1,1,1,1,0,1,0,0,1,\r
-1,2,2,2,2,1,0,0,1,0,0,0,0,0,2,0,1,1,1,1,0,0,0,0,1,0,1,2,0,0,2,0,\r
-1,0,1,1,1,2,1,0,1,0,1,1,0,0,1,0,1,1,1,0,1,0,0,0,1,0,0,1,0,1,1,0,\r
-2,1,2,2,2,0,3,0,1,1,0,0,0,0,2,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,\r
-0,0,0,1,1,1,0,0,1,0,1,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,\r
-1,2,2,3,2,2,0,0,1,1,2,0,1,2,1,0,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,1,\r
-0,1,1,0,0,1,1,0,0,1,1,0,0,1,1,0,1,1,0,0,1,0,0,0,0,0,0,0,0,1,1,0,\r
-2,2,1,1,2,1,2,2,2,2,2,1,2,2,0,1,0,0,0,1,2,2,2,1,2,1,1,1,1,1,2,1,\r
-1,1,1,1,1,1,1,1,1,1,0,0,1,1,1,0,1,1,1,0,0,0,0,1,1,1,0,1,1,0,0,1,\r
-1,2,2,2,2,0,1,0,2,2,0,0,0,0,2,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,2,0,\r
-0,0,1,0,0,1,0,0,0,0,1,0,1,1,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,\r
-0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,\r
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-1,2,2,2,2,0,0,0,2,2,2,0,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,1,\r
-0,1,1,0,0,1,1,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-1,2,2,2,2,0,0,0,0,1,0,0,1,1,2,0,0,0,0,1,0,1,0,0,1,0,0,2,0,0,0,1,\r
-0,0,1,0,0,1,0,0,0,1,1,0,0,0,0,0,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,\r
-1,2,2,2,1,1,2,0,2,1,1,1,1,0,2,2,0,0,0,0,0,0,0,0,0,1,1,0,0,0,1,1,\r
-0,0,1,0,1,1,0,0,0,0,1,0,0,0,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,\r
-1,0,2,1,2,0,0,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,\r
-0,0,1,0,1,1,0,0,0,0,1,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,\r
-1,0,0,0,0,2,0,1,2,1,0,1,1,1,0,1,0,0,0,1,0,1,0,0,1,0,1,0,0,0,0,1,\r
-0,0,0,0,0,1,0,0,1,1,0,0,1,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,\r
-2,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,\r
-1,0,0,0,1,0,0,0,1,1,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,0,0,0,0,\r
-2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,\r
-1,1,1,0,1,0,1,0,0,1,1,1,1,0,0,0,1,0,0,0,0,1,0,0,0,1,0,1,0,0,0,0,\r
-1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,\r
-1,1,0,1,1,0,1,0,1,0,0,0,0,1,1,0,1,1,0,0,0,0,0,1,0,1,1,0,1,0,0,0,\r
-0,1,1,1,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,\r
-)\r
-\r
-Koi8rModel = {\r
-  'charToOrderMap': KOI8R_CharToOrderMap,\r
-  'precedenceMatrix': RussianLangModel,\r
-  'mTypicalPositiveRatio': 0.976601,\r
-  'keepEnglishLetter': False,\r
-  'charsetName': "KOI8-R"\r
-}\r
-\r
-Win1251CyrillicModel = {\r
-  'charToOrderMap': win1251_CharToOrderMap,\r
-  'precedenceMatrix': RussianLangModel,\r
-  'mTypicalPositiveRatio': 0.976601,\r
-  'keepEnglishLetter': False,\r
-  'charsetName': "windows-1251"\r
-}\r
-\r
-Latin5CyrillicModel = {\r
-  'charToOrderMap': latin5_CharToOrderMap,\r
-  'precedenceMatrix': RussianLangModel,\r
-  'mTypicalPositiveRatio': 0.976601,\r
-  'keepEnglishLetter': False,\r
-  'charsetName': "ISO-8859-5"\r
-}\r
-\r
-MacCyrillicModel = {\r
-  'charToOrderMap': macCyrillic_CharToOrderMap,\r
-  'precedenceMatrix': RussianLangModel,\r
-  'mTypicalPositiveRatio': 0.976601,\r
-  'keepEnglishLetter': False,\r
-  'charsetName': "MacCyrillic"\r
-};\r
-\r
-Ibm866Model = {\r
-  'charToOrderMap': IBM866_CharToOrderMap,\r
-  'precedenceMatrix': RussianLangModel,\r
-  'mTypicalPositiveRatio': 0.976601,\r
-  'keepEnglishLetter': False,\r
-  'charsetName': "IBM866"\r
-}\r
-\r
-Ibm855Model = {\r
-  'charToOrderMap': IBM855_CharToOrderMap,\r
-  'precedenceMatrix': RussianLangModel,\r
-  'mTypicalPositiveRatio': 0.976601,\r
-  'keepEnglishLetter': False,\r
-  'charsetName': "IBM855"\r
-}\r
-\r
-# flake8: noqa\r
+######################## BEGIN LICENSE BLOCK ########################
+# The Original Code is Mozilla Communicator client code.
+#
+# The Initial Developer of the Original Code is
+# Netscape Communications Corporation.
+# Portions created by the Initial Developer are Copyright (C) 1998
+# the Initial Developer. All Rights Reserved.
+#
+# Contributor(s):
+#   Mark Pilgrim - port to Python
+#
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
+# 02110-1301  USA
+######################### END LICENSE BLOCK #########################
+
+# KOI8-R language model
+# Character Mapping Table:
+KOI8R_CharToOrderMap = (
+255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255,  # 00
+255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,  # 10
+253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,  # 20
+252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253,  # 30
+253,142,143,144,145,146,147,148,149,150,151,152, 74,153, 75,154,  # 40
+155,156,157,158,159,160,161,162,163,164,165,253,253,253,253,253,  # 50
+253, 71,172, 66,173, 65,174, 76,175, 64,176,177, 77, 72,178, 69,  # 60
+ 67,179, 78, 73,180,181, 79,182,183,184,185,253,253,253,253,253,  # 70
+191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,  # 80
+207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,  # 90
+223,224,225, 68,226,227,228,229,230,231,232,233,234,235,236,237,  # a0
+238,239,240,241,242,243,244,245,246,247,248,249,250,251,252,253,  # b0
+ 27,  3, 21, 28, 13,  2, 39, 19, 26,  4, 23, 11,  8, 12,  5,  1,  # c0
+ 15, 16,  9,  7,  6, 14, 24, 10, 17, 18, 20, 25, 30, 29, 22, 54,  # d0
+ 59, 37, 44, 58, 41, 48, 53, 46, 55, 42, 60, 36, 49, 38, 31, 34,  # e0
+ 35, 43, 45, 32, 40, 52, 56, 33, 61, 62, 51, 57, 47, 63, 50, 70,  # f0
+)
+
+win1251_CharToOrderMap = (
+255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255,  # 00
+255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,  # 10
+253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,  # 20
+252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253,  # 30
+253,142,143,144,145,146,147,148,149,150,151,152, 74,153, 75,154,  # 40
+155,156,157,158,159,160,161,162,163,164,165,253,253,253,253,253,  # 50
+253, 71,172, 66,173, 65,174, 76,175, 64,176,177, 77, 72,178, 69,  # 60
+ 67,179, 78, 73,180,181, 79,182,183,184,185,253,253,253,253,253,  # 70
+191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,
+207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,
+223,224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,
+239,240,241,242,243,244,245,246, 68,247,248,249,250,251,252,253,
+ 37, 44, 33, 46, 41, 48, 56, 51, 42, 60, 36, 49, 38, 31, 34, 35,
+ 45, 32, 40, 52, 53, 55, 58, 50, 57, 63, 70, 62, 61, 47, 59, 43,
+  3, 21, 10, 19, 13,  2, 24, 20,  4, 23, 11,  8, 12,  5,  1, 15,
+  9,  7,  6, 14, 39, 26, 28, 22, 25, 29, 54, 18, 17, 30, 27, 16,
+)
+
+latin5_CharToOrderMap = (
+255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255,  # 00
+255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,  # 10
+253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,  # 20
+252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253,  # 30
+253,142,143,144,145,146,147,148,149,150,151,152, 74,153, 75,154,  # 40
+155,156,157,158,159,160,161,162,163,164,165,253,253,253,253,253,  # 50
+253, 71,172, 66,173, 65,174, 76,175, 64,176,177, 77, 72,178, 69,  # 60
+ 67,179, 78, 73,180,181, 79,182,183,184,185,253,253,253,253,253,  # 70
+191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,
+207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,
+223,224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,
+ 37, 44, 33, 46, 41, 48, 56, 51, 42, 60, 36, 49, 38, 31, 34, 35,
+ 45, 32, 40, 52, 53, 55, 58, 50, 57, 63, 70, 62, 61, 47, 59, 43,
+  3, 21, 10, 19, 13,  2, 24, 20,  4, 23, 11,  8, 12,  5,  1, 15,
+  9,  7,  6, 14, 39, 26, 28, 22, 25, 29, 54, 18, 17, 30, 27, 16,
+239, 68,240,241,242,243,244,245,246,247,248,249,250,251,252,255,
+)
+
+macCyrillic_CharToOrderMap = (
+255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255,  # 00
+255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,  # 10
+253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,  # 20
+252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253,  # 30
+253,142,143,144,145,146,147,148,149,150,151,152, 74,153, 75,154,  # 40
+155,156,157,158,159,160,161,162,163,164,165,253,253,253,253,253,  # 50
+253, 71,172, 66,173, 65,174, 76,175, 64,176,177, 77, 72,178, 69,  # 60
+ 67,179, 78, 73,180,181, 79,182,183,184,185,253,253,253,253,253,  # 70
+ 37, 44, 33, 46, 41, 48, 56, 51, 42, 60, 36, 49, 38, 31, 34, 35,
+ 45, 32, 40, 52, 53, 55, 58, 50, 57, 63, 70, 62, 61, 47, 59, 43,
+191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,
+207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,
+223,224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,
+239,240,241,242,243,244,245,246,247,248,249,250,251,252, 68, 16,
+  3, 21, 10, 19, 13,  2, 24, 20,  4, 23, 11,  8, 12,  5,  1, 15,
+  9,  7,  6, 14, 39, 26, 28, 22, 25, 29, 54, 18, 17, 30, 27,255,
+)
+
+IBM855_CharToOrderMap = (
+255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255,  # 00
+255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,  # 10
+253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,  # 20
+252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253,  # 30
+253,142,143,144,145,146,147,148,149,150,151,152, 74,153, 75,154,  # 40
+155,156,157,158,159,160,161,162,163,164,165,253,253,253,253,253,  # 50
+253, 71,172, 66,173, 65,174, 76,175, 64,176,177, 77, 72,178, 69,  # 60
+ 67,179, 78, 73,180,181, 79,182,183,184,185,253,253,253,253,253,  # 70
+191,192,193,194, 68,195,196,197,198,199,200,201,202,203,204,205,
+206,207,208,209,210,211,212,213,214,215,216,217, 27, 59, 54, 70,
+  3, 37, 21, 44, 28, 58, 13, 41,  2, 48, 39, 53, 19, 46,218,219,
+220,221,222,223,224, 26, 55,  4, 42,225,226,227,228, 23, 60,229,
+230,231,232,233,234,235, 11, 36,236,237,238,239,240,241,242,243,
+  8, 49, 12, 38,  5, 31,  1, 34, 15,244,245,246,247, 35, 16,248,
+ 43,  9, 45,  7, 32,  6, 40, 14, 52, 24, 56, 10, 33, 17, 61,249,
+250, 18, 62, 20, 51, 25, 57, 30, 47, 29, 63, 22, 50,251,252,255,
+)
+
+IBM866_CharToOrderMap = (
+255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255,  # 00
+255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,  # 10
+253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,  # 20
+252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253,  # 30
+253,142,143,144,145,146,147,148,149,150,151,152, 74,153, 75,154,  # 40
+155,156,157,158,159,160,161,162,163,164,165,253,253,253,253,253,  # 50
+253, 71,172, 66,173, 65,174, 76,175, 64,176,177, 77, 72,178, 69,  # 60
+ 67,179, 78, 73,180,181, 79,182,183,184,185,253,253,253,253,253,  # 70
+ 37, 44, 33, 46, 41, 48, 56, 51, 42, 60, 36, 49, 38, 31, 34, 35,
+ 45, 32, 40, 52, 53, 55, 58, 50, 57, 63, 70, 62, 61, 47, 59, 43,
+  3, 21, 10, 19, 13,  2, 24, 20,  4, 23, 11,  8, 12,  5,  1, 15,
+191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,
+207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,
+223,224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,
+  9,  7,  6, 14, 39, 26, 28, 22, 25, 29, 54, 18, 17, 30, 27, 16,
+239, 68,240,241,242,243,244,245,246,247,248,249,250,251,252,255,
+)
+
+# Model Table:
+# total sequences: 100%
+# first 512 sequences: 97.6601%
+# first 1024 sequences: 2.3389%
+# rest  sequences:      0.1237%
+# negative sequences:   0.0009%
+RussianLangModel = (
+0,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,1,1,3,3,3,3,1,3,3,3,2,3,2,3,3,
+3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,2,2,2,2,2,0,0,2,
+3,3,3,2,3,3,3,3,3,3,3,3,3,3,2,3,3,0,0,3,3,3,3,3,3,3,3,3,2,3,2,0,
+0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+3,3,3,2,2,3,3,3,3,3,3,3,3,3,2,3,3,0,0,3,3,3,3,3,3,3,3,2,3,3,1,0,
+0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+3,2,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,0,0,3,3,3,3,3,3,3,3,3,3,3,2,1,
+0,0,0,0,0,0,0,2,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,0,0,3,3,3,3,3,3,3,3,3,3,3,2,1,
+0,0,0,0,0,1,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+3,3,3,3,3,3,3,3,2,2,2,3,1,3,3,1,3,3,3,3,2,2,3,0,2,2,2,3,3,2,1,0,
+0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,
+3,3,3,3,3,3,2,3,3,3,3,3,2,2,3,2,3,3,3,2,1,2,2,0,1,2,2,2,2,2,2,0,
+0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,
+3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,2,3,0,2,2,3,3,2,1,2,0,
+0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,1,0,0,2,0,0,0,0,0,0,0,0,0,
+3,3,3,3,3,3,2,3,3,1,2,3,2,2,3,2,3,3,3,3,2,2,3,0,3,2,2,3,1,1,1,0,
+0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+3,3,3,3,3,3,3,3,2,2,3,3,3,3,3,2,3,3,3,3,2,2,2,0,3,3,3,2,2,2,2,0,
+0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+3,3,3,3,3,3,3,3,3,3,2,3,2,3,3,3,3,3,3,2,3,2,2,0,1,3,2,1,2,2,1,0,
+0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,
+3,3,3,3,3,3,3,3,3,3,3,2,1,1,3,0,1,1,1,1,2,1,1,0,2,2,2,1,2,0,1,0,
+0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+3,3,3,3,3,3,2,3,3,2,2,2,2,1,3,2,3,2,3,2,1,2,2,0,1,1,2,1,2,1,2,0,
+0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+3,3,3,3,3,3,3,3,3,3,3,3,2,2,3,2,3,3,3,2,2,2,2,0,2,2,2,2,3,1,1,0,
+0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,
+3,2,3,2,2,3,3,3,3,3,3,3,3,3,1,3,2,0,0,3,3,3,3,2,3,3,3,3,2,3,2,0,
+0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+2,3,3,3,3,3,2,2,3,3,0,2,1,0,3,2,3,2,3,0,0,1,2,0,0,1,0,1,2,1,1,0,
+0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+3,0,3,0,2,3,3,3,3,2,3,3,3,3,1,2,2,0,0,2,3,2,2,2,3,2,3,2,2,3,0,0,
+0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+3,2,3,0,2,3,2,3,0,1,2,3,3,2,0,2,3,0,0,2,3,2,2,0,1,3,1,3,2,2,1,0,
+0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+3,1,3,0,2,3,3,3,3,3,3,3,3,2,1,3,2,0,0,2,2,3,3,3,2,3,3,0,2,2,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+3,3,3,3,3,3,2,2,3,3,2,2,2,3,3,0,0,1,1,1,1,1,2,0,0,1,1,1,1,0,1,0,
+0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+3,3,3,3,3,3,2,2,3,3,3,3,3,3,3,0,3,2,3,3,2,3,2,0,2,1,0,1,1,0,1,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,
+3,3,3,3,3,3,2,3,3,3,2,2,2,2,3,1,3,2,3,1,1,2,1,0,2,2,2,2,1,3,1,0,
+0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,
+2,2,3,3,3,3,3,1,2,2,1,3,1,0,3,0,0,3,0,0,0,1,1,0,1,2,1,0,0,0,0,0,
+0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+3,2,2,1,1,3,3,3,2,2,1,2,2,3,1,1,2,0,0,2,2,1,3,0,0,2,1,1,2,1,1,0,
+0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+3,2,3,3,3,3,1,2,2,2,1,2,1,3,3,1,1,2,1,2,1,2,2,0,2,0,0,1,1,0,1,0,
+0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+2,3,3,3,3,3,2,1,3,2,2,3,2,0,3,2,0,3,0,1,0,1,1,0,0,1,1,1,1,0,1,0,
+0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+3,3,2,3,3,3,2,2,2,3,3,1,2,1,2,1,0,1,0,1,1,0,1,0,0,2,1,1,1,0,1,0,
+0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,
+3,1,1,2,1,2,3,3,2,2,1,2,2,3,0,2,1,0,0,2,2,3,2,1,2,2,2,2,2,3,1,0,
+0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+3,3,3,3,3,1,1,0,1,1,2,2,1,1,3,0,0,1,3,1,1,1,0,0,0,1,0,1,1,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+2,1,3,3,3,2,0,0,0,2,1,0,1,0,2,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+2,0,1,0,0,2,3,2,2,2,1,2,2,2,1,2,1,0,0,1,1,1,0,2,0,1,1,1,0,0,1,1,
+1,0,0,0,0,0,1,2,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,
+2,3,3,3,3,0,0,0,0,1,0,0,0,0,3,0,1,2,1,0,0,0,0,0,0,0,1,1,0,0,1,1,
+1,0,1,0,1,2,0,0,1,1,2,1,0,1,1,1,1,0,1,1,1,1,0,1,0,0,1,0,0,1,1,0,
+2,2,3,2,2,2,3,1,2,2,2,2,2,2,2,2,1,1,1,1,1,1,1,0,1,0,1,1,1,0,2,1,
+1,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1,1,1,0,1,0,1,1,0,1,1,1,0,1,1,0,
+3,3,3,2,2,2,2,3,2,2,1,1,2,2,2,2,1,1,3,1,2,1,2,0,0,1,1,0,1,0,2,1,
+1,1,1,1,1,2,1,0,1,1,1,1,0,1,0,0,1,1,0,0,1,0,1,0,0,1,0,0,0,1,1,0,
+2,0,0,1,0,3,2,2,2,2,1,2,1,2,1,2,0,0,0,2,1,2,2,1,1,2,2,0,1,1,0,2,
+1,1,1,1,1,0,1,1,1,2,1,1,1,2,1,0,1,2,1,1,1,1,0,1,1,1,0,0,1,0,0,1,
+1,3,2,2,2,1,1,1,2,3,0,0,0,0,2,0,2,2,1,0,0,0,0,0,0,1,0,0,0,0,1,1,
+1,0,1,1,0,1,0,1,1,0,1,1,0,2,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,1,1,0,
+2,3,2,3,2,1,2,2,2,2,1,0,0,0,2,0,0,1,1,0,0,0,0,0,0,0,1,1,0,0,2,1,
+1,1,2,1,0,2,0,0,1,0,1,0,0,1,0,0,1,1,0,1,1,0,0,0,0,0,1,0,0,0,0,0,
+3,0,0,1,0,2,2,2,3,2,2,2,2,2,2,2,0,0,0,2,1,2,1,1,1,2,2,0,0,0,1,2,
+1,1,1,1,1,0,1,2,1,1,1,1,1,1,1,0,1,1,1,1,1,1,0,1,1,1,1,1,1,0,0,1,
+2,3,2,3,3,2,0,1,1,1,0,0,1,0,2,0,1,1,3,1,0,0,0,0,0,0,0,1,0,0,2,1,
+1,1,1,1,1,1,1,0,1,0,1,1,1,1,0,1,1,1,0,0,1,1,0,1,0,0,0,0,0,0,1,0,
+2,3,3,3,3,1,2,2,2,2,0,1,1,0,2,1,1,1,2,1,0,1,1,0,0,1,0,1,0,0,2,0,
+0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+2,3,3,3,2,0,0,1,1,2,2,1,0,0,2,0,1,1,3,0,0,1,0,0,0,0,0,1,0,1,2,1,
+1,1,2,0,1,1,1,0,1,0,1,1,0,1,0,1,1,1,1,0,1,0,0,0,0,0,0,1,0,1,1,0,
+1,3,2,3,2,1,0,0,2,2,2,0,1,0,2,0,1,1,1,0,1,0,0,0,3,0,1,1,0,0,2,1,
+1,1,1,0,1,1,0,0,0,0,1,1,0,1,0,0,2,1,1,0,1,0,0,0,1,0,1,0,0,1,1,0,
+3,1,2,1,1,2,2,2,2,2,2,1,2,2,1,1,0,0,0,2,2,2,0,0,0,1,2,1,0,1,0,1,
+2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,2,1,1,1,0,1,0,1,1,0,1,1,1,0,0,1,
+3,0,0,0,0,2,0,1,1,1,1,1,1,1,0,1,0,0,0,1,1,1,0,1,0,1,1,0,0,1,0,1,
+1,1,0,0,1,0,0,0,1,0,1,1,0,0,1,0,1,0,1,0,0,0,0,1,0,0,0,1,0,0,0,1,
+1,3,3,2,2,0,0,0,2,2,0,0,0,1,2,0,1,1,2,0,0,0,0,0,0,0,0,1,0,0,2,1,
+0,1,1,0,0,1,1,0,0,0,1,1,0,1,1,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,1,0,
+2,3,2,3,2,0,0,0,0,1,1,0,0,0,2,0,2,0,2,0,0,0,0,0,1,0,0,1,0,0,1,1,
+1,1,2,0,1,2,1,0,1,1,2,1,1,1,1,1,2,1,1,0,1,0,0,1,1,1,1,1,0,1,1,0,
+1,3,2,2,2,1,0,0,2,2,1,0,1,2,2,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,
+0,0,1,1,0,1,1,0,0,1,1,0,1,1,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,
+1,0,0,1,0,2,3,1,2,2,2,2,2,2,1,1,0,0,0,1,0,1,0,2,1,1,1,0,0,0,0,1,
+1,1,0,1,1,0,1,1,1,1,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,
+2,0,2,0,0,1,0,3,2,1,2,1,2,2,0,1,0,0,0,2,1,0,0,2,1,1,1,1,0,2,0,2,
+2,1,1,1,1,1,1,1,1,1,1,1,1,2,1,0,1,1,1,1,0,0,0,1,1,1,1,0,1,0,0,1,
+1,2,2,2,2,1,0,0,1,0,0,0,0,0,2,0,1,1,1,1,0,0,0,0,1,0,1,2,0,0,2,0,
+1,0,1,1,1,2,1,0,1,0,1,1,0,0,1,0,1,1,1,0,1,0,0,0,1,0,0,1,0,1,1,0,
+2,1,2,2,2,0,3,0,1,1,0,0,0,0,2,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,
+0,0,0,1,1,1,0,0,1,0,1,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,
+1,2,2,3,2,2,0,0,1,1,2,0,1,2,1,0,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,1,
+0,1,1,0,0,1,1,0,0,1,1,0,0,1,1,0,1,1,0,0,1,0,0,0,0,0,0,0,0,1,1,0,
+2,2,1,1,2,1,2,2,2,2,2,1,2,2,0,1,0,0,0,1,2,2,2,1,2,1,1,1,1,1,2,1,
+1,1,1,1,1,1,1,1,1,1,0,0,1,1,1,0,1,1,1,0,0,0,0,1,1,1,0,1,1,0,0,1,
+1,2,2,2,2,0,1,0,2,2,0,0,0,0,2,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,2,0,
+0,0,1,0,0,1,0,0,0,0,1,0,1,1,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,
+0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+1,2,2,2,2,0,0,0,2,2,2,0,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,1,
+0,1,1,0,0,1,1,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+1,2,2,2,2,0,0,0,0,1,0,0,1,1,2,0,0,0,0,1,0,1,0,0,1,0,0,2,0,0,0,1,
+0,0,1,0,0,1,0,0,0,1,1,0,0,0,0,0,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,
+1,2,2,2,1,1,2,0,2,1,1,1,1,0,2,2,0,0,0,0,0,0,0,0,0,1,1,0,0,0,1,1,
+0,0,1,0,1,1,0,0,0,0,1,0,0,0,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,
+1,0,2,1,2,0,0,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,
+0,0,1,0,1,1,0,0,0,0,1,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,
+1,0,0,0,0,2,0,1,2,1,0,1,1,1,0,1,0,0,0,1,0,1,0,0,1,0,1,0,0,0,0,1,
+0,0,0,0,0,1,0,0,1,1,0,0,1,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,
+2,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,
+1,0,0,0,1,0,0,0,1,1,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,0,0,0,0,
+2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,
+1,1,1,0,1,0,1,0,0,1,1,1,1,0,0,0,1,0,0,0,0,1,0,0,0,1,0,1,0,0,0,0,
+1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,
+1,1,0,1,1,0,1,0,1,0,0,0,0,1,1,0,1,1,0,0,0,0,0,1,0,1,1,0,1,0,0,0,
+0,1,1,1,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,
+)
+
+Koi8rModel = {
+  'charToOrderMap': KOI8R_CharToOrderMap,
+  'precedenceMatrix': RussianLangModel,
+  'mTypicalPositiveRatio': 0.976601,
+  'keepEnglishLetter': False,
+  'charsetName': "KOI8-R"
+}
+
+Win1251CyrillicModel = {
+  'charToOrderMap': win1251_CharToOrderMap,
+  'precedenceMatrix': RussianLangModel,
+  'mTypicalPositiveRatio': 0.976601,
+  'keepEnglishLetter': False,
+  'charsetName': "windows-1251"
+}
+
+Latin5CyrillicModel = {
+  'charToOrderMap': latin5_CharToOrderMap,
+  'precedenceMatrix': RussianLangModel,
+  'mTypicalPositiveRatio': 0.976601,
+  'keepEnglishLetter': False,
+  'charsetName': "ISO-8859-5"
+}
+
+MacCyrillicModel = {
+  'charToOrderMap': macCyrillic_CharToOrderMap,
+  'precedenceMatrix': RussianLangModel,
+  'mTypicalPositiveRatio': 0.976601,
+  'keepEnglishLetter': False,
+  'charsetName': "MacCyrillic"
+};
+
+Ibm866Model = {
+  'charToOrderMap': IBM866_CharToOrderMap,
+  'precedenceMatrix': RussianLangModel,
+  'mTypicalPositiveRatio': 0.976601,
+  'keepEnglishLetter': False,
+  'charsetName': "IBM866"
+}
+
+Ibm855Model = {
+  'charToOrderMap': IBM855_CharToOrderMap,
+  'precedenceMatrix': RussianLangModel,
+  'mTypicalPositiveRatio': 0.976601,
+  'keepEnglishLetter': False,
+  'charsetName': "IBM855"
+}
+
+# flake8: noqa
similarity index 98%
rename from requests/packages/charade/langgreekmodel.py
rename to requests/packages/chardet/langgreekmodel.py
index 93241ce..ddb5837 100644 (file)
-######################## BEGIN LICENSE BLOCK ########################\r
-# The Original Code is Mozilla Communicator client code.\r
-#\r
-# The Initial Developer of the Original Code is\r
-# Netscape Communications Corporation.\r
-# Portions created by the Initial Developer are Copyright (C) 1998\r
-# the Initial Developer. All Rights Reserved.\r
-#\r
-# Contributor(s):\r
-#   Mark Pilgrim - port to Python\r
-#\r
-# This library is free software; you can redistribute it and/or\r
-# modify it under the terms of the GNU Lesser General Public\r
-# License as published by the Free Software Foundation; either\r
-# version 2.1 of the License, or (at your option) any later version.\r
-#\r
-# This library is distributed in the hope that it will be useful,\r
-# but WITHOUT ANY WARRANTY; without even the implied warranty of\r
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU\r
-# Lesser General Public License for more details.\r
-#\r
-# You should have received a copy of the GNU Lesser General Public\r
-# License along with this library; if not, write to the Free Software\r
-# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA\r
-# 02110-1301  USA\r
-######################### END LICENSE BLOCK #########################\r
-\r
-# 255: Control characters that usually does not exist in any text\r
-# 254: Carriage/Return\r
-# 253: symbol (punctuation) that does not belong to word\r
-# 252: 0 - 9\r
-\r
-# Character Mapping Table:\r
-Latin7_CharToOrderMap = (\r
-255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255,  # 00\r
-255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,  # 10\r
-253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,  # 20\r
-252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253,  # 30\r
-253, 82,100,104, 94, 98,101,116,102,111,187,117, 92, 88,113, 85,  # 40\r
- 79,118,105, 83, 67,114,119, 95, 99,109,188,253,253,253,253,253,  # 50\r
-253, 72, 70, 80, 81, 60, 96, 93, 89, 68,120, 97, 77, 86, 69, 55,  # 60\r
- 78,115, 65, 66, 58, 76,106,103, 87,107,112,253,253,253,253,253,  # 70\r
-255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,  # 80\r
-255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,  # 90\r
-253,233, 90,253,253,253,253,253,253,253,253,253,253, 74,253,253,  # a0\r
-253,253,253,253,247,248, 61, 36, 46, 71, 73,253, 54,253,108,123,  # b0\r
-110, 31, 51, 43, 41, 34, 91, 40, 52, 47, 44, 53, 38, 49, 59, 39,  # c0\r
- 35, 48,250, 37, 33, 45, 56, 50, 84, 57,120,121, 17, 18, 22, 15,  # d0\r
-124,  1, 29, 20, 21,  3, 32, 13, 25,  5, 11, 16, 10,  6, 30,  4,  # e0\r
-  9,  8, 14,  7,  2, 12, 28, 23, 42, 24, 64, 75, 19, 26, 27,253,  # f0\r
-)\r
-\r
-win1253_CharToOrderMap = (\r
-255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255,  # 00\r
-255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,  # 10\r
-253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,  # 20\r
-252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253,  # 30\r
-253, 82,100,104, 94, 98,101,116,102,111,187,117, 92, 88,113, 85,  # 40\r
- 79,118,105, 83, 67,114,119, 95, 99,109,188,253,253,253,253,253,  # 50\r
-253, 72, 70, 80, 81, 60, 96, 93, 89, 68,120, 97, 77, 86, 69, 55,  # 60\r
- 78,115, 65, 66, 58, 76,106,103, 87,107,112,253,253,253,253,253,  # 70\r
-255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,  # 80\r
-255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,  # 90\r
-253,233, 61,253,253,253,253,253,253,253,253,253,253, 74,253,253,  # a0\r
-253,253,253,253,247,253,253, 36, 46, 71, 73,253, 54,253,108,123,  # b0\r
-110, 31, 51, 43, 41, 34, 91, 40, 52, 47, 44, 53, 38, 49, 59, 39,  # c0\r
- 35, 48,250, 37, 33, 45, 56, 50, 84, 57,120,121, 17, 18, 22, 15,  # d0\r
-124,  1, 29, 20, 21,  3, 32, 13, 25,  5, 11, 16, 10,  6, 30,  4,  # e0\r
-  9,  8, 14,  7,  2, 12, 28, 23, 42, 24, 64, 75, 19, 26, 27,253,  # f0\r
-)\r
-\r
-# Model Table:\r
-# total sequences: 100%\r
-# first 512 sequences: 98.2851%\r
-# first 1024 sequences:1.7001%\r
-# rest  sequences:     0.0359%\r
-# negative sequences:  0.0148%\r
-GreekLangModel = (\r
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-0,0,3,2,2,3,3,3,3,3,3,3,3,1,3,3,3,0,2,2,3,3,0,3,0,3,2,0,3,3,3,0,\r
-3,0,0,0,2,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-0,3,3,3,3,3,0,3,3,0,3,2,3,3,0,3,2,3,3,3,0,0,3,0,3,0,3,3,2,0,0,0,\r
-2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,\r
-0,2,3,2,2,3,3,3,3,3,3,3,3,0,3,3,3,3,0,2,3,3,0,3,3,3,3,2,3,3,3,0,\r
-2,0,0,0,2,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-0,2,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,0,2,1,3,3,3,3,2,3,3,2,3,3,2,0,\r
-0,0,0,0,2,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-0,3,3,3,3,0,3,3,3,3,3,3,0,3,3,0,3,3,3,3,3,3,3,3,3,3,0,3,2,3,3,0,\r
-2,0,1,0,2,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,\r
-0,3,3,3,3,3,2,3,0,0,0,0,3,3,0,3,1,3,3,3,0,3,3,0,3,3,3,3,0,0,0,0,\r
-2,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-0,3,3,3,3,3,0,3,0,3,3,3,3,3,0,3,2,2,2,3,0,2,3,3,3,3,3,2,3,3,0,0,\r
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-0,3,3,3,3,3,3,2,2,2,3,3,3,3,0,3,1,3,3,3,3,2,3,3,3,3,3,3,3,2,2,0,\r
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-0,3,3,3,3,3,2,0,3,0,0,0,3,3,2,3,3,3,3,3,0,0,3,2,3,0,2,3,0,0,0,0,\r
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-0,3,0,3,3,3,3,0,0,3,3,0,2,3,0,3,0,3,3,3,0,0,3,0,3,0,2,2,3,3,0,0,\r
-0,0,1,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-0,3,3,3,3,3,2,0,3,2,3,3,3,3,0,3,3,3,3,3,0,3,3,2,3,2,3,3,2,0,0,0,\r
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-0,3,3,2,3,2,3,3,3,3,3,3,0,2,3,2,3,2,2,2,3,2,3,3,2,3,0,2,2,2,3,0,\r
-2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-0,0,3,0,0,0,3,3,3,2,3,3,0,0,3,0,3,0,0,0,3,2,0,3,0,3,0,0,2,0,2,0,\r
-0,0,0,0,2,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-0,3,3,3,3,0,3,3,3,3,3,3,0,3,3,0,3,0,0,0,3,3,0,3,3,3,0,0,1,2,3,0,\r
-3,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-0,3,3,3,3,3,2,0,0,3,2,2,3,3,0,3,3,3,3,3,2,1,3,0,3,2,3,3,2,1,0,0,\r
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-0,0,3,3,0,2,3,3,3,3,3,3,0,0,3,0,3,0,0,0,3,3,0,3,2,3,0,0,3,3,3,0,\r
-3,0,0,0,2,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-0,3,3,3,3,0,3,3,3,3,3,3,0,0,3,0,3,0,0,0,3,2,0,3,2,3,0,0,3,2,3,0,\r
-2,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-0,0,3,1,2,2,3,3,3,3,3,3,0,2,3,0,3,0,0,0,3,3,0,3,0,2,0,0,2,3,1,0,\r
-2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-0,3,0,3,3,3,3,0,3,0,3,3,2,3,0,3,3,3,3,3,3,0,3,3,3,0,2,3,0,0,3,0,\r
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-0,3,0,3,3,3,0,0,3,0,0,0,3,3,0,3,0,2,3,3,0,0,3,0,3,0,3,3,0,0,0,0,\r
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-0,0,3,0,0,0,3,3,3,3,3,3,0,0,3,0,2,0,0,0,3,3,0,3,0,3,0,0,2,0,2,0,\r
-0,0,0,0,1,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-0,3,3,3,3,3,3,0,3,0,2,0,3,2,0,3,2,3,2,3,0,0,3,2,3,2,3,3,0,0,0,0,\r
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-0,0,3,0,0,2,3,3,3,3,3,0,0,0,3,0,2,1,0,0,3,2,2,2,0,3,0,0,2,2,0,0,\r
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-0,3,0,3,3,3,2,0,3,0,3,0,3,3,0,2,1,2,3,3,0,0,3,0,3,0,3,3,0,0,0,0,\r
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-0,2,3,3,3,0,3,3,3,3,3,3,0,2,3,0,3,0,0,0,2,1,0,2,2,3,0,0,2,2,2,0,\r
-0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-0,0,3,0,0,2,3,3,3,2,3,0,0,1,3,0,2,0,0,0,0,3,0,1,0,2,0,0,1,1,1,0,\r
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-0,3,3,3,3,3,1,0,3,0,0,0,3,2,0,3,2,3,3,3,0,0,3,0,3,2,2,2,1,0,0,0,\r
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-0,3,0,3,3,3,0,0,3,0,0,0,0,2,0,2,3,3,2,2,2,2,3,0,2,0,2,2,0,0,0,0,\r
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-0,3,3,3,3,2,0,0,0,0,0,0,2,3,0,2,0,2,3,2,0,0,3,0,3,0,3,1,0,0,0,0,\r
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-0,0,0,0,0,0,3,2,3,3,2,2,3,0,2,0,3,0,0,0,2,0,0,0,0,1,2,0,2,0,2,0,\r
-0,2,0,2,0,2,2,0,0,1,0,2,2,2,0,2,2,2,0,2,2,2,0,0,2,0,0,1,0,0,0,0,\r
-0,2,0,3,3,2,0,0,0,0,0,0,1,3,0,2,0,2,2,2,0,0,2,0,3,0,0,2,0,0,0,0,\r
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-0,3,0,2,3,2,0,2,2,0,2,0,2,2,0,2,0,2,2,2,0,0,0,0,0,0,2,3,0,0,0,2,\r
-0,1,2,0,0,0,0,2,2,0,0,0,2,1,0,2,2,0,0,0,0,0,0,1,0,2,0,0,0,0,0,0,\r
-0,0,2,1,0,2,3,2,2,3,2,3,2,0,0,3,3,3,0,0,3,2,0,0,0,1,1,0,2,0,2,2,\r
-0,2,0,2,0,2,2,0,0,2,0,2,2,2,0,2,2,2,2,0,0,2,0,0,0,2,0,1,0,0,0,0,\r
-0,3,0,3,3,2,2,0,3,0,0,0,2,2,0,2,2,2,1,2,0,0,1,2,2,0,0,3,0,0,0,2,\r
-0,1,2,0,0,0,1,2,0,0,0,0,0,0,0,2,2,0,1,0,0,2,0,0,0,2,0,0,0,0,0,0,\r
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-0,2,3,3,2,2,0,0,0,2,0,2,3,3,0,2,0,0,0,0,0,0,2,2,2,0,2,2,0,2,0,2,\r
-0,2,2,0,0,2,2,2,2,1,0,0,2,2,0,2,0,0,2,0,0,0,0,0,0,2,0,0,0,0,0,0,\r
-0,2,0,3,2,3,0,0,0,3,0,0,2,2,0,2,0,2,2,2,0,0,2,0,0,0,0,0,0,0,0,2,\r
-0,0,2,2,0,0,2,2,2,0,0,0,0,0,0,2,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,\r
-0,0,2,0,0,3,2,0,2,2,2,2,2,0,0,0,2,0,0,0,0,2,0,1,0,0,2,0,1,0,0,0,\r
-0,2,2,2,0,2,2,0,1,2,0,2,2,2,0,2,2,2,2,1,2,2,0,0,2,0,0,0,0,0,0,0,\r
-0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,\r
-0,2,0,2,0,2,2,0,0,0,0,1,2,1,0,0,2,2,0,0,2,0,0,0,0,0,0,0,0,0,0,0,\r
-0,0,0,3,2,3,0,0,2,0,0,0,2,2,0,2,0,0,0,1,0,0,2,0,2,0,2,2,0,0,0,0,\r
-0,0,2,0,0,0,0,2,2,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,\r
-0,2,2,3,2,2,0,0,0,0,0,0,1,3,0,2,0,2,2,0,0,0,1,0,2,0,0,0,0,0,0,0,\r
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-0,2,0,2,0,3,2,0,2,0,0,0,0,0,0,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,\r
-0,0,2,0,0,0,0,1,1,0,0,2,1,2,0,2,2,0,1,0,0,1,0,0,0,2,0,0,0,0,0,0,\r
-0,3,0,2,2,2,0,0,2,0,0,0,2,0,0,0,2,3,0,2,0,0,0,0,0,0,2,2,0,0,0,2,\r
-0,1,2,0,0,0,1,2,2,1,0,0,0,2,0,0,2,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,\r
-0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-0,2,1,2,0,2,2,0,2,0,0,2,0,0,0,0,1,2,1,0,2,1,0,0,0,0,0,0,0,0,0,0,\r
-0,0,2,0,0,0,3,1,2,2,0,2,0,0,0,0,2,0,0,0,2,0,0,3,0,0,0,0,2,2,2,0,\r
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-0,2,1,0,2,0,1,2,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,1,0,0,0,0,0,0,2,\r
-0,2,2,0,0,2,2,2,2,2,0,1,2,0,0,0,2,2,0,1,0,2,0,0,2,2,0,0,0,0,0,0,\r
-0,0,0,0,1,0,0,0,0,0,0,0,3,0,0,2,0,0,0,0,0,0,0,0,2,0,2,0,0,0,0,2,\r
-0,1,2,0,0,0,0,2,2,1,0,1,0,1,0,2,2,2,1,0,0,0,0,0,0,1,0,0,0,0,0,0,\r
-0,2,0,1,2,0,0,0,0,0,0,0,0,0,0,2,0,0,2,2,0,0,0,0,1,0,0,0,0,0,0,2,\r
-0,2,2,0,0,0,0,2,2,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,2,0,0,2,0,0,0,\r
-0,2,2,2,2,0,0,0,3,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,2,0,0,0,0,0,0,1,\r
-0,0,2,0,0,0,0,1,2,0,0,0,0,0,0,2,2,1,1,0,0,0,0,0,0,1,0,0,0,0,0,0,\r
-0,2,0,2,2,2,0,0,2,0,0,0,0,0,0,0,2,2,2,0,0,0,2,0,0,0,0,0,0,0,0,2,\r
-0,0,1,0,0,0,0,2,1,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,\r
-0,3,0,2,0,0,0,0,0,0,0,0,2,0,0,0,0,0,2,0,0,0,0,0,0,0,2,0,0,0,0,2,\r
-0,0,2,0,0,0,0,2,2,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-0,2,0,2,2,1,0,0,0,0,0,0,2,0,0,2,0,2,2,2,0,0,0,0,0,0,2,0,0,0,0,2,\r
-0,0,2,0,0,2,0,2,2,0,0,0,0,2,0,2,0,0,0,0,0,2,0,0,0,2,0,0,0,0,0,0,\r
-0,0,3,0,0,0,2,2,0,2,2,0,0,0,0,0,2,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,\r
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,2,0,0,0,0,0,\r
-0,2,2,2,2,2,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,1,\r
-0,0,0,0,0,0,0,2,1,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-0,0,0,0,0,0,0,2,2,0,0,0,0,0,2,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,\r
-0,2,0,0,0,2,0,0,0,0,0,1,0,0,0,0,2,2,0,0,0,1,0,0,0,0,0,0,0,0,0,0,\r
-0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,2,0,0,0,\r
-0,2,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-0,0,1,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,2,0,2,0,0,0,\r
-0,0,0,0,0,0,0,0,2,1,0,0,0,0,0,0,2,0,0,0,1,2,0,0,0,0,0,0,0,0,0,0,\r
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-)\r
-\r
-Latin7GreekModel = {\r
-  'charToOrderMap': Latin7_CharToOrderMap,\r
-  'precedenceMatrix': GreekLangModel,\r
-  'mTypicalPositiveRatio': 0.982851,\r
-  'keepEnglishLetter': False,\r
-  'charsetName': "ISO-8859-7"\r
-}\r
-\r
-Win1253GreekModel = {\r
-  'charToOrderMap': win1253_CharToOrderMap,\r
-  'precedenceMatrix': GreekLangModel,\r
-  'mTypicalPositiveRatio': 0.982851,\r
-  'keepEnglishLetter': False,\r
-  'charsetName': "windows-1253"\r
-}\r
-\r
-# flake8: noqa\r
+######################## BEGIN LICENSE BLOCK ########################
+# The Original Code is Mozilla Communicator client code.
+#
+# The Initial Developer of the Original Code is
+# Netscape Communications Corporation.
+# Portions created by the Initial Developer are Copyright (C) 1998
+# the Initial Developer. All Rights Reserved.
+#
+# Contributor(s):
+#   Mark Pilgrim - port to Python
+#
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
+# 02110-1301  USA
+######################### END LICENSE BLOCK #########################
+
+# 255: Control characters that usually does not exist in any text
+# 254: Carriage/Return
+# 253: symbol (punctuation) that does not belong to word
+# 252: 0 - 9
+
+# Character Mapping Table:
+Latin7_CharToOrderMap = (
+255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255,  # 00
+255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,  # 10
+253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,  # 20
+252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253,  # 30
+253, 82,100,104, 94, 98,101,116,102,111,187,117, 92, 88,113, 85,  # 40
+ 79,118,105, 83, 67,114,119, 95, 99,109,188,253,253,253,253,253,  # 50
+253, 72, 70, 80, 81, 60, 96, 93, 89, 68,120, 97, 77, 86, 69, 55,  # 60
+ 78,115, 65, 66, 58, 76,106,103, 87,107,112,253,253,253,253,253,  # 70
+255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,  # 80
+255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,  # 90
+253,233, 90,253,253,253,253,253,253,253,253,253,253, 74,253,253,  # a0
+253,253,253,253,247,248, 61, 36, 46, 71, 73,253, 54,253,108,123,  # b0
+110, 31, 51, 43, 41, 34, 91, 40, 52, 47, 44, 53, 38, 49, 59, 39,  # c0
+ 35, 48,250, 37, 33, 45, 56, 50, 84, 57,120,121, 17, 18, 22, 15,  # d0
+124,  1, 29, 20, 21,  3, 32, 13, 25,  5, 11, 16, 10,  6, 30,  4,  # e0
+  9,  8, 14,  7,  2, 12, 28, 23, 42, 24, 64, 75, 19, 26, 27,253,  # f0
+)
+
+win1253_CharToOrderMap = (
+255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255,  # 00
+255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,  # 10
+253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,  # 20
+252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253,  # 30
+253, 82,100,104, 94, 98,101,116,102,111,187,117, 92, 88,113, 85,  # 40
+ 79,118,105, 83, 67,114,119, 95, 99,109,188,253,253,253,253,253,  # 50
+253, 72, 70, 80, 81, 60, 96, 93, 89, 68,120, 97, 77, 86, 69, 55,  # 60
+ 78,115, 65, 66, 58, 76,106,103, 87,107,112,253,253,253,253,253,  # 70
+255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,  # 80
+255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,  # 90
+253,233, 61,253,253,253,253,253,253,253,253,253,253, 74,253,253,  # a0
+253,253,253,253,247,253,253, 36, 46, 71, 73,253, 54,253,108,123,  # b0
+110, 31, 51, 43, 41, 34, 91, 40, 52, 47, 44, 53, 38, 49, 59, 39,  # c0
+ 35, 48,250, 37, 33, 45, 56, 50, 84, 57,120,121, 17, 18, 22, 15,  # d0
+124,  1, 29, 20, 21,  3, 32, 13, 25,  5, 11, 16, 10,  6, 30,  4,  # e0
+  9,  8, 14,  7,  2, 12, 28, 23, 42, 24, 64, 75, 19, 26, 27,253,  # f0
+)
+
+# Model Table:
+# total sequences: 100%
+# first 512 sequences: 98.2851%
+# first 1024 sequences:1.7001%
+# rest  sequences:     0.0359%
+# negative sequences:  0.0148%
+GreekLangModel = (
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,3,2,2,3,3,3,3,3,3,3,3,1,3,3,3,0,2,2,3,3,0,3,0,3,2,0,3,3,3,0,
+3,0,0,0,2,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,3,3,3,3,3,0,3,3,0,3,2,3,3,0,3,2,3,3,3,0,0,3,0,3,0,3,3,2,0,0,0,
+2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,
+0,2,3,2,2,3,3,3,3,3,3,3,3,0,3,3,3,3,0,2,3,3,0,3,3,3,3,2,3,3,3,0,
+2,0,0,0,2,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,2,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,0,2,1,3,3,3,3,2,3,3,2,3,3,2,0,
+0,0,0,0,2,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,3,3,3,3,0,3,3,3,3,3,3,0,3,3,0,3,3,3,3,3,3,3,3,3,3,0,3,2,3,3,0,
+2,0,1,0,2,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,
+0,3,3,3,3,3,2,3,0,0,0,0,3,3,0,3,1,3,3,3,0,3,3,0,3,3,3,3,0,0,0,0,
+2,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,3,3,3,3,3,0,3,0,3,3,3,3,3,0,3,2,2,2,3,0,2,3,3,3,3,3,2,3,3,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,3,3,3,3,3,3,2,2,2,3,3,3,3,0,3,1,3,3,3,3,2,3,3,3,3,3,3,3,2,2,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,3,3,3,3,3,2,0,3,0,0,0,3,3,2,3,3,3,3,3,0,0,3,2,3,0,2,3,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,3,0,3,3,3,3,0,0,3,3,0,2,3,0,3,0,3,3,3,0,0,3,0,3,0,2,2,3,3,0,0,
+0,0,1,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,3,3,3,3,3,2,0,3,2,3,3,3,3,0,3,3,3,3,3,0,3,3,2,3,2,3,3,2,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,3,3,2,3,2,3,3,3,3,3,3,0,2,3,2,3,2,2,2,3,2,3,3,2,3,0,2,2,2,3,0,
+2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,3,0,0,0,3,3,3,2,3,3,0,0,3,0,3,0,0,0,3,2,0,3,0,3,0,0,2,0,2,0,
+0,0,0,0,2,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,3,3,3,3,0,3,3,3,3,3,3,0,3,3,0,3,0,0,0,3,3,0,3,3,3,0,0,1,2,3,0,
+3,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,3,3,3,3,3,2,0,0,3,2,2,3,3,0,3,3,3,3,3,2,1,3,0,3,2,3,3,2,1,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,3,3,0,2,3,3,3,3,3,3,0,0,3,0,3,0,0,0,3,3,0,3,2,3,0,0,3,3,3,0,
+3,0,0,0,2,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,3,3,3,3,0,3,3,3,3,3,3,0,0,3,0,3,0,0,0,3,2,0,3,2,3,0,0,3,2,3,0,
+2,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,3,1,2,2,3,3,3,3,3,3,0,2,3,0,3,0,0,0,3,3,0,3,0,2,0,0,2,3,1,0,
+2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,3,0,3,3,3,3,0,3,0,3,3,2,3,0,3,3,3,3,3,3,0,3,3,3,0,2,3,0,0,3,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,3,0,3,3,3,0,0,3,0,0,0,3,3,0,3,0,2,3,3,0,0,3,0,3,0,3,3,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,3,0,0,0,3,3,3,3,3,3,0,0,3,0,2,0,0,0,3,3,0,3,0,3,0,0,2,0,2,0,
+0,0,0,0,1,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,3,3,3,3,3,3,0,3,0,2,0,3,2,0,3,2,3,2,3,0,0,3,2,3,2,3,3,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,3,0,0,2,3,3,3,3,3,0,0,0,3,0,2,1,0,0,3,2,2,2,0,3,0,0,2,2,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,3,0,3,3,3,2,0,3,0,3,0,3,3,0,2,1,2,3,3,0,0,3,0,3,0,3,3,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,2,3,3,3,0,3,3,3,3,3,3,0,2,3,0,3,0,0,0,2,1,0,2,2,3,0,0,2,2,2,0,
+0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,3,0,0,2,3,3,3,2,3,0,0,1,3,0,2,0,0,0,0,3,0,1,0,2,0,0,1,1,1,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,3,3,3,3,3,1,0,3,0,0,0,3,2,0,3,2,3,3,3,0,0,3,0,3,2,2,2,1,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,3,0,3,3,3,0,0,3,0,0,0,0,2,0,2,3,3,2,2,2,2,3,0,2,0,2,2,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,3,3,3,3,2,0,0,0,0,0,0,2,3,0,2,0,2,3,2,0,0,3,0,3,0,3,1,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,3,2,3,3,2,2,3,0,2,0,3,0,0,0,2,0,0,0,0,1,2,0,2,0,2,0,
+0,2,0,2,0,2,2,0,0,1,0,2,2,2,0,2,2,2,0,2,2,2,0,0,2,0,0,1,0,0,0,0,
+0,2,0,3,3,2,0,0,0,0,0,0,1,3,0,2,0,2,2,2,0,0,2,0,3,0,0,2,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,3,0,2,3,2,0,2,2,0,2,0,2,2,0,2,0,2,2,2,0,0,0,0,0,0,2,3,0,0,0,2,
+0,1,2,0,0,0,0,2,2,0,0,0,2,1,0,2,2,0,0,0,0,0,0,1,0,2,0,0,0,0,0,0,
+0,0,2,1,0,2,3,2,2,3,2,3,2,0,0,3,3,3,0,0,3,2,0,0,0,1,1,0,2,0,2,2,
+0,2,0,2,0,2,2,0,0,2,0,2,2,2,0,2,2,2,2,0,0,2,0,0,0,2,0,1,0,0,0,0,
+0,3,0,3,3,2,2,0,3,0,0,0,2,2,0,2,2,2,1,2,0,0,1,2,2,0,0,3,0,0,0,2,
+0,1,2,0,0,0,1,2,0,0,0,0,0,0,0,2,2,0,1,0,0,2,0,0,0,2,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,2,3,3,2,2,0,0,0,2,0,2,3,3,0,2,0,0,0,0,0,0,2,2,2,0,2,2,0,2,0,2,
+0,2,2,0,0,2,2,2,2,1,0,0,2,2,0,2,0,0,2,0,0,0,0,0,0,2,0,0,0,0,0,0,
+0,2,0,3,2,3,0,0,0,3,0,0,2,2,0,2,0,2,2,2,0,0,2,0,0,0,0,0,0,0,0,2,
+0,0,2,2,0,0,2,2,2,0,0,0,0,0,0,2,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,2,0,0,3,2,0,2,2,2,2,2,0,0,0,2,0,0,0,0,2,0,1,0,0,2,0,1,0,0,0,
+0,2,2,2,0,2,2,0,1,2,0,2,2,2,0,2,2,2,2,1,2,2,0,0,2,0,0,0,0,0,0,0,
+0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,
+0,2,0,2,0,2,2,0,0,0,0,1,2,1,0,0,2,2,0,0,2,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,3,2,3,0,0,2,0,0,0,2,2,0,2,0,0,0,1,0,0,2,0,2,0,2,2,0,0,0,0,
+0,0,2,0,0,0,0,2,2,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,
+0,2,2,3,2,2,0,0,0,0,0,0,1,3,0,2,0,2,2,0,0,0,1,0,2,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,2,0,2,0,3,2,0,2,0,0,0,0,0,0,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,
+0,0,2,0,0,0,0,1,1,0,0,2,1,2,0,2,2,0,1,0,0,1,0,0,0,2,0,0,0,0,0,0,
+0,3,0,2,2,2,0,0,2,0,0,0,2,0,0,0,2,3,0,2,0,0,0,0,0,0,2,2,0,0,0,2,
+0,1,2,0,0,0,1,2,2,1,0,0,0,2,0,0,2,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,2,1,2,0,2,2,0,2,0,0,2,0,0,0,0,1,2,1,0,2,1,0,0,0,0,0,0,0,0,0,0,
+0,0,2,0,0,0,3,1,2,2,0,2,0,0,0,0,2,0,0,0,2,0,0,3,0,0,0,0,2,2,2,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,2,1,0,2,0,1,2,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,1,0,0,0,0,0,0,2,
+0,2,2,0,0,2,2,2,2,2,0,1,2,0,0,0,2,2,0,1,0,2,0,0,2,2,0,0,0,0,0,0,
+0,0,0,0,1,0,0,0,0,0,0,0,3,0,0,2,0,0,0,0,0,0,0,0,2,0,2,0,0,0,0,2,
+0,1,2,0,0,0,0,2,2,1,0,1,0,1,0,2,2,2,1,0,0,0,0,0,0,1,0,0,0,0,0,0,
+0,2,0,1,2,0,0,0,0,0,0,0,0,0,0,2,0,0,2,2,0,0,0,0,1,0,0,0,0,0,0,2,
+0,2,2,0,0,0,0,2,2,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,2,0,0,2,0,0,0,
+0,2,2,2,2,0,0,0,3,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,2,0,0,0,0,0,0,1,
+0,0,2,0,0,0,0,1,2,0,0,0,0,0,0,2,2,1,1,0,0,0,0,0,0,1,0,0,0,0,0,0,
+0,2,0,2,2,2,0,0,2,0,0,0,0,0,0,0,2,2,2,0,0,0,2,0,0,0,0,0,0,0,0,2,
+0,0,1,0,0,0,0,2,1,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,
+0,3,0,2,0,0,0,0,0,0,0,0,2,0,0,0,0,0,2,0,0,0,0,0,0,0,2,0,0,0,0,2,
+0,0,2,0,0,0,0,2,2,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,2,0,2,2,1,0,0,0,0,0,0,2,0,0,2,0,2,2,2,0,0,0,0,0,0,2,0,0,0,0,2,
+0,0,2,0,0,2,0,2,2,0,0,0,0,2,0,2,0,0,0,0,0,2,0,0,0,2,0,0,0,0,0,0,
+0,0,3,0,0,0,2,2,0,2,2,0,0,0,0,0,2,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,2,0,0,0,0,0,
+0,2,2,2,2,2,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,1,
+0,0,0,0,0,0,0,2,1,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,2,2,0,0,0,0,0,2,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,
+0,2,0,0,0,2,0,0,0,0,0,1,0,0,0,0,2,2,0,0,0,1,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,2,0,0,0,
+0,2,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,1,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,2,0,2,0,0,0,
+0,0,0,0,0,0,0,0,2,1,0,0,0,0,0,0,2,0,0,0,1,2,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+)
+
+Latin7GreekModel = {
+  'charToOrderMap': Latin7_CharToOrderMap,
+  'precedenceMatrix': GreekLangModel,
+  'mTypicalPositiveRatio': 0.982851,
+  'keepEnglishLetter': False,
+  'charsetName': "ISO-8859-7"
+}
+
+Win1253GreekModel = {
+  'charToOrderMap': win1253_CharToOrderMap,
+  'precedenceMatrix': GreekLangModel,
+  'mTypicalPositiveRatio': 0.982851,
+  'keepEnglishLetter': False,
+  'charsetName': "windows-1253"
+}
+
+# flake8: noqa
similarity index 98%
rename from requests/packages/charade/langhebrewmodel.py
rename to requests/packages/chardet/langhebrewmodel.py
index d871324..75f2bc7 100644 (file)
-######################## BEGIN LICENSE BLOCK ########################\r
-# The Original Code is Mozilla Universal charset detector code.\r
-#\r
-# The Initial Developer of the Original Code is\r
-#          Simon Montagu\r
-# Portions created by the Initial Developer are Copyright (C) 2005\r
-# the Initial Developer. All Rights Reserved.\r
-#\r
-# Contributor(s):\r
-#   Mark Pilgrim - port to Python\r
-#   Shy Shalom - original C code\r
-#   Shoshannah Forbes - original C code (?)\r
-#\r
-# This library is free software; you can redistribute it and/or\r
-# modify it under the terms of the GNU Lesser General Public\r
-# License as published by the Free Software Foundation; either\r
-# version 2.1 of the License, or (at your option) any later version.\r
-#\r
-# This library is distributed in the hope that it will be useful,\r
-# but WITHOUT ANY WARRANTY; without even the implied warranty of\r
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU\r
-# Lesser General Public License for more details.\r
-#\r
-# You should have received a copy of the GNU Lesser General Public\r
-# License along with this library; if not, write to the Free Software\r
-# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA\r
-# 02110-1301  USA\r
-######################### END LICENSE BLOCK #########################\r
-\r
-# 255: Control characters that usually does not exist in any text\r
-# 254: Carriage/Return\r
-# 253: symbol (punctuation) that does not belong to word\r
-# 252: 0 - 9\r
-\r
-# Windows-1255 language model\r
-# Character Mapping Table:\r
-win1255_CharToOrderMap = (\r
-255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255,  # 00\r
-255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,  # 10\r
-253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,  # 20\r
-252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253,  # 30\r
-253, 69, 91, 79, 80, 92, 89, 97, 90, 68,111,112, 82, 73, 95, 85,  # 40\r
- 78,121, 86, 71, 67,102,107, 84,114,103,115,253,253,253,253,253,  # 50\r
-253, 50, 74, 60, 61, 42, 76, 70, 64, 53,105, 93, 56, 65, 54, 49,  # 60\r
- 66,110, 51, 43, 44, 63, 81, 77, 98, 75,108,253,253,253,253,253,  # 70\r
-124,202,203,204,205, 40, 58,206,207,208,209,210,211,212,213,214,\r
-215, 83, 52, 47, 46, 72, 32, 94,216,113,217,109,218,219,220,221,\r
- 34,116,222,118,100,223,224,117,119,104,125,225,226, 87, 99,227,\r
-106,122,123,228, 55,229,230,101,231,232,120,233, 48, 39, 57,234,\r
- 30, 59, 41, 88, 33, 37, 36, 31, 29, 35,235, 62, 28,236,126,237,\r
-238, 38, 45,239,240,241,242,243,127,244,245,246,247,248,249,250,\r
-  9,  8, 20, 16,  3,  2, 24, 14, 22,  1, 25, 15,  4, 11,  6, 23,\r
- 12, 19, 13, 26, 18, 27, 21, 17,  7, 10,  5,251,252,128, 96,253,\r
-)\r
-\r
-# Model Table:\r
-# total sequences: 100%\r
-# first 512 sequences: 98.4004%\r
-# first 1024 sequences: 1.5981%\r
-# rest  sequences:      0.087%\r
-# negative sequences:   0.0015%\r
-HebrewLangModel = (\r
-0,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,2,3,2,1,2,0,1,0,0,\r
-3,0,3,1,0,0,1,3,2,0,1,1,2,0,2,2,2,1,1,1,1,2,1,1,1,2,0,0,2,2,0,1,\r
-3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,2,2,\r
-1,2,1,2,1,2,0,0,2,0,0,0,0,0,1,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,\r
-3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,2,\r
-1,2,1,3,1,1,0,0,2,0,0,0,1,0,1,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,\r
-3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,1,0,1,2,2,1,3,\r
-1,2,1,1,2,2,0,0,2,2,0,0,0,0,1,0,1,0,0,0,1,0,0,0,0,0,0,1,0,1,1,0,\r
-3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,2,2,2,2,3,2,\r
-1,2,1,2,2,2,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,\r
-3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,2,3,2,2,3,2,2,2,1,2,2,2,2,\r
-1,2,1,1,2,2,0,1,2,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,\r
-3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,0,2,2,2,2,2,\r
-0,2,0,2,2,2,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,\r
-3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,0,2,2,2,\r
-0,2,1,2,2,2,0,0,2,1,0,0,0,0,1,0,1,0,0,0,0,0,0,2,0,0,0,0,0,0,1,0,\r
-3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,2,1,2,3,2,2,2,\r
-1,2,1,2,2,2,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,1,0,\r
-3,3,3,3,3,3,3,3,3,2,3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,1,0,2,0,2,\r
-0,2,1,2,2,2,0,0,1,2,0,0,0,0,1,0,1,0,0,0,0,0,0,1,0,0,0,2,0,0,1,0,\r
-3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,2,3,2,2,3,2,1,2,1,1,1,\r
-0,1,1,1,1,1,3,0,1,0,0,0,0,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,\r
-3,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,0,1,1,0,0,1,0,0,1,0,0,0,0,\r
-0,0,1,0,0,0,0,0,2,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,2,2,2,2,2,\r
-0,2,0,1,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,\r
-3,3,3,3,3,3,3,3,3,2,3,3,3,2,1,2,3,3,2,3,3,3,3,2,3,2,1,2,0,2,1,2,\r
-0,2,0,2,2,2,0,0,1,2,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,\r
-3,3,3,3,3,3,3,3,3,2,3,3,3,1,2,2,3,3,2,3,2,3,2,2,3,1,2,2,0,2,2,2,\r
-0,2,1,2,2,2,0,0,1,2,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,0,0,1,0,0,1,0,\r
-3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,2,3,3,2,2,2,3,3,3,3,1,3,2,2,2,\r
-0,2,0,1,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,\r
-3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,3,3,3,2,3,2,2,2,1,2,2,0,2,2,2,2,\r
-0,2,0,2,2,2,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,\r
-3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,1,3,2,3,3,2,3,3,2,2,1,2,2,2,2,2,2,\r
-0,2,1,2,1,2,0,0,1,0,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,1,0,\r
-3,3,3,3,3,3,2,3,2,3,3,2,3,3,3,3,2,3,2,3,3,3,3,3,2,2,2,2,2,2,2,1,\r
-0,2,0,1,2,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,\r
-3,3,3,3,3,3,3,3,3,2,1,2,3,3,3,3,3,3,3,2,3,2,3,2,1,2,3,0,2,1,2,2,\r
-0,2,1,1,2,1,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,2,0,\r
-3,3,3,3,3,3,3,3,3,2,3,3,3,3,2,1,3,1,2,2,2,1,2,3,3,1,2,1,2,2,2,2,\r
-0,1,1,1,1,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,2,0,0,0,0,0,0,0,0,\r
-3,3,3,3,3,3,3,3,3,3,0,2,3,3,3,1,3,3,3,1,2,2,2,2,1,1,2,2,2,2,2,2,\r
-0,2,0,1,1,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,\r
-3,3,3,3,3,3,2,3,3,3,2,2,3,3,3,2,1,2,3,2,3,2,2,2,2,1,2,1,1,1,2,2,\r
-0,2,1,1,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,\r
-3,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,0,0,0,1,0,0,0,0,0,\r
-1,0,1,0,0,0,0,0,2,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-3,3,3,3,3,2,3,3,2,3,1,2,2,2,2,3,2,3,1,1,2,2,1,2,2,1,1,0,2,2,2,2,\r
-0,1,0,1,2,2,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,\r
-3,0,0,1,1,0,1,0,0,1,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,2,2,0,\r
-0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-3,0,1,0,1,0,1,1,0,1,1,0,0,0,1,1,0,1,1,1,0,0,0,0,0,0,1,0,0,0,0,0,\r
-0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-3,0,0,0,1,1,0,1,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,\r
-0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,\r
-3,2,2,1,2,2,2,2,2,2,2,1,2,2,1,2,2,1,1,1,1,1,1,1,1,2,1,1,0,3,3,3,\r
-0,3,0,2,2,2,2,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,\r
-2,2,2,3,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,1,2,2,1,2,2,2,1,1,1,2,0,1,\r
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-2,2,2,2,2,2,2,2,2,2,2,1,2,2,2,2,2,2,2,2,2,2,2,0,2,2,0,0,0,0,0,0,\r
-0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-2,3,1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,1,2,1,0,2,1,0,\r
-0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-3,1,1,1,1,1,1,1,1,1,1,0,0,1,1,1,1,0,1,1,1,1,0,0,0,0,0,0,0,0,0,0,\r
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,\r
-0,3,1,1,2,2,2,2,2,1,2,2,2,1,1,2,2,2,2,2,2,2,1,2,2,1,0,1,1,1,1,0,\r
-0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-3,2,1,1,1,1,2,1,1,2,1,0,1,1,1,1,1,1,1,1,1,1,1,0,1,0,0,0,0,0,0,0,\r
-0,0,2,0,0,0,0,0,0,0,0,1,1,0,0,0,0,1,1,0,0,1,1,0,0,0,0,0,0,1,0,0,\r
-2,1,1,2,2,2,2,2,2,2,2,2,2,2,1,2,2,2,2,2,1,2,1,2,1,1,1,1,0,0,0,0,\r
-0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-1,2,1,2,2,2,2,2,2,2,2,2,2,1,2,1,2,1,1,2,1,1,1,2,1,2,1,2,0,1,0,1,\r
-0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-0,3,1,2,2,2,1,2,2,2,2,2,2,2,2,1,2,1,1,1,1,1,1,2,1,2,1,1,0,1,0,1,\r
-0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-2,1,2,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,\r
-0,2,0,1,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,\r
-3,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,\r
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-2,1,1,1,1,1,1,1,0,1,1,0,1,0,0,1,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,\r
-0,0,0,0,0,0,0,0,2,0,1,1,1,0,1,0,0,0,1,1,0,1,1,0,0,0,0,0,1,1,0,0,\r
-0,1,1,1,2,1,2,2,2,0,2,0,2,0,1,1,2,1,1,1,1,2,1,0,1,1,0,0,0,0,0,0,\r
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-1,0,1,0,0,0,0,0,1,0,1,2,2,0,1,0,0,1,1,2,2,1,2,0,2,0,0,0,1,2,0,1,\r
-2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-0,0,0,0,0,0,0,0,2,0,2,1,2,0,2,0,0,1,1,1,1,1,1,0,1,0,0,0,1,0,0,1,\r
-2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-0,0,1,0,0,0,0,0,1,0,2,1,1,0,1,0,0,1,1,1,2,2,0,0,1,0,0,0,1,0,0,1,\r
-1,1,2,1,0,1,1,1,0,1,0,1,1,1,1,0,0,0,1,0,1,0,0,0,0,0,0,0,0,2,2,1,\r
-0,2,0,1,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-2,1,0,0,1,0,1,1,1,1,0,0,0,0,0,1,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,\r
-0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-1,1,1,1,1,1,1,1,1,2,1,0,1,1,1,1,1,1,1,1,1,1,1,0,1,0,0,0,0,0,0,0,\r
-0,0,0,0,0,0,0,0,0,0,1,1,1,0,0,0,0,1,1,1,0,1,1,0,1,0,0,0,1,1,0,1,\r
-2,0,1,0,1,0,1,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-0,0,0,0,0,0,0,0,1,0,1,1,1,0,1,0,0,1,1,2,1,1,2,0,1,0,0,0,1,1,0,1,\r
-1,0,0,1,0,0,1,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-0,0,0,0,0,0,0,0,1,0,1,1,2,0,1,0,0,0,0,2,1,1,2,0,2,0,0,0,1,1,0,1,\r
-1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-0,0,0,0,0,0,0,0,1,0,2,1,1,0,1,0,0,2,2,1,2,1,1,0,1,0,0,0,1,1,0,1,\r
-2,0,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-0,0,0,0,0,0,0,0,0,0,1,2,2,0,0,0,0,0,1,1,0,1,0,0,1,0,0,0,0,1,0,1,\r
-1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-0,0,0,0,0,0,0,0,0,0,1,2,2,0,0,0,0,2,1,1,1,0,2,1,1,0,0,0,2,1,0,1,\r
-1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-0,0,0,0,0,0,0,0,1,0,1,1,2,0,1,0,0,1,1,0,2,1,1,0,1,0,0,0,1,1,0,1,\r
-2,2,1,1,1,0,1,1,0,1,1,0,1,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,\r
-0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-0,0,0,0,0,0,0,0,1,0,2,1,1,0,1,0,0,1,1,0,1,2,1,0,2,0,0,0,1,1,0,1,\r
-2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,\r
-0,1,0,0,2,0,2,1,1,0,1,0,1,0,0,1,0,0,0,0,1,0,0,0,1,0,0,0,0,0,1,0,\r
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-0,0,0,0,0,0,0,0,1,0,1,1,2,0,1,0,0,1,1,1,0,1,0,0,1,0,0,0,1,0,0,1,\r
-1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-1,0,0,0,0,0,0,0,1,0,1,1,0,0,1,0,0,2,1,1,1,1,1,0,1,0,0,0,0,1,0,1,\r
-0,1,1,1,2,1,1,1,1,0,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,0,0,0,0,0,0,0,\r
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-0,0,0,0,0,0,0,0,0,0,1,2,1,0,0,0,0,0,1,1,1,1,1,0,1,0,0,0,1,1,0,0,\r
-)\r
-\r
-Win1255HebrewModel = {\r
-  'charToOrderMap': win1255_CharToOrderMap,\r
-  'precedenceMatrix': HebrewLangModel,\r
-  'mTypicalPositiveRatio': 0.984004,\r
-  'keepEnglishLetter': False,\r
-  'charsetName': "windows-1255"\r
-}\r
-\r
-# flake8: noqa\r
+######################## BEGIN LICENSE BLOCK ########################
+# The Original Code is Mozilla Universal charset detector code.
+#
+# The Initial Developer of the Original Code is
+#          Simon Montagu
+# Portions created by the Initial Developer are Copyright (C) 2005
+# the Initial Developer. All Rights Reserved.
+#
+# Contributor(s):
+#   Mark Pilgrim - port to Python
+#   Shy Shalom - original C code
+#   Shoshannah Forbes - original C code (?)
+#
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
+# 02110-1301  USA
+######################### END LICENSE BLOCK #########################
+
+# 255: Control characters that usually does not exist in any text
+# 254: Carriage/Return
+# 253: symbol (punctuation) that does not belong to word
+# 252: 0 - 9
+
+# Windows-1255 language model
+# Character Mapping Table:
+win1255_CharToOrderMap = (
+255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255,  # 00
+255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,  # 10
+253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,  # 20
+252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253,  # 30
+253, 69, 91, 79, 80, 92, 89, 97, 90, 68,111,112, 82, 73, 95, 85,  # 40
+ 78,121, 86, 71, 67,102,107, 84,114,103,115,253,253,253,253,253,  # 50
+253, 50, 74, 60, 61, 42, 76, 70, 64, 53,105, 93, 56, 65, 54, 49,  # 60
+ 66,110, 51, 43, 44, 63, 81, 77, 98, 75,108,253,253,253,253,253,  # 70
+124,202,203,204,205, 40, 58,206,207,208,209,210,211,212,213,214,
+215, 83, 52, 47, 46, 72, 32, 94,216,113,217,109,218,219,220,221,
+ 34,116,222,118,100,223,224,117,119,104,125,225,226, 87, 99,227,
+106,122,123,228, 55,229,230,101,231,232,120,233, 48, 39, 57,234,
+ 30, 59, 41, 88, 33, 37, 36, 31, 29, 35,235, 62, 28,236,126,237,
+238, 38, 45,239,240,241,242,243,127,244,245,246,247,248,249,250,
+  9,  8, 20, 16,  3,  2, 24, 14, 22,  1, 25, 15,  4, 11,  6, 23,
+ 12, 19, 13, 26, 18, 27, 21, 17,  7, 10,  5,251,252,128, 96,253,
+)
+
+# Model Table:
+# total sequences: 100%
+# first 512 sequences: 98.4004%
+# first 1024 sequences: 1.5981%
+# rest  sequences:      0.087%
+# negative sequences:   0.0015%
+HebrewLangModel = (
+0,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,2,3,2,1,2,0,1,0,0,
+3,0,3,1,0,0,1,3,2,0,1,1,2,0,2,2,2,1,1,1,1,2,1,1,1,2,0,0,2,2,0,1,
+3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,2,2,
+1,2,1,2,1,2,0,0,2,0,0,0,0,0,1,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,
+3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,2,
+1,2,1,3,1,1,0,0,2,0,0,0,1,0,1,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,
+3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,1,0,1,2,2,1,3,
+1,2,1,1,2,2,0,0,2,2,0,0,0,0,1,0,1,0,0,0,1,0,0,0,0,0,0,1,0,1,1,0,
+3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,2,2,2,2,3,2,
+1,2,1,2,2,2,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,
+3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,2,3,2,2,3,2,2,2,1,2,2,2,2,
+1,2,1,1,2,2,0,1,2,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,
+3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,0,2,2,2,2,2,
+0,2,0,2,2,2,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,
+3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,0,2,2,2,
+0,2,1,2,2,2,0,0,2,1,0,0,0,0,1,0,1,0,0,0,0,0,0,2,0,0,0,0,0,0,1,0,
+3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,2,1,2,3,2,2,2,
+1,2,1,2,2,2,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,1,0,
+3,3,3,3,3,3,3,3,3,2,3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,1,0,2,0,2,
+0,2,1,2,2,2,0,0,1,2,0,0,0,0,1,0,1,0,0,0,0,0,0,1,0,0,0,2,0,0,1,0,
+3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,2,3,2,2,3,2,1,2,1,1,1,
+0,1,1,1,1,1,3,0,1,0,0,0,0,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,
+3,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,0,1,1,0,0,1,0,0,1,0,0,0,0,
+0,0,1,0,0,0,0,0,2,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,2,2,2,2,2,
+0,2,0,1,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,
+3,3,3,3,3,3,3,3,3,2,3,3,3,2,1,2,3,3,2,3,3,3,3,2,3,2,1,2,0,2,1,2,
+0,2,0,2,2,2,0,0,1,2,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,
+3,3,3,3,3,3,3,3,3,2,3,3,3,1,2,2,3,3,2,3,2,3,2,2,3,1,2,2,0,2,2,2,
+0,2,1,2,2,2,0,0,1,2,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,0,0,1,0,0,1,0,
+3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,2,3,3,2,2,2,3,3,3,3,1,3,2,2,2,
+0,2,0,1,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,
+3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,3,3,3,2,3,2,2,2,1,2,2,0,2,2,2,2,
+0,2,0,2,2,2,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,
+3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,1,3,2,3,3,2,3,3,2,2,1,2,2,2,2,2,2,
+0,2,1,2,1,2,0,0,1,0,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,1,0,
+3,3,3,3,3,3,2,3,2,3,3,2,3,3,3,3,2,3,2,3,3,3,3,3,2,2,2,2,2,2,2,1,
+0,2,0,1,2,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,
+3,3,3,3,3,3,3,3,3,2,1,2,3,3,3,3,3,3,3,2,3,2,3,2,1,2,3,0,2,1,2,2,
+0,2,1,1,2,1,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,2,0,
+3,3,3,3,3,3,3,3,3,2,3,3,3,3,2,1,3,1,2,2,2,1,2,3,3,1,2,1,2,2,2,2,
+0,1,1,1,1,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,2,0,0,0,0,0,0,0,0,
+3,3,3,3,3,3,3,3,3,3,0,2,3,3,3,1,3,3,3,1,2,2,2,2,1,1,2,2,2,2,2,2,
+0,2,0,1,1,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,
+3,3,3,3,3,3,2,3,3,3,2,2,3,3,3,2,1,2,3,2,3,2,2,2,2,1,2,1,1,1,2,2,
+0,2,1,1,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,
+3,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,0,0,0,1,0,0,0,0,0,
+1,0,1,0,0,0,0,0,2,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+3,3,3,3,3,2,3,3,2,3,1,2,2,2,2,3,2,3,1,1,2,2,1,2,2,1,1,0,2,2,2,2,
+0,1,0,1,2,2,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,
+3,0,0,1,1,0,1,0,0,1,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,2,2,0,
+0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+3,0,1,0,1,0,1,1,0,1,1,0,0,0,1,1,0,1,1,1,0,0,0,0,0,0,1,0,0,0,0,0,
+0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+3,0,0,0,1,1,0,1,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,
+3,2,2,1,2,2,2,2,2,2,2,1,2,2,1,2,2,1,1,1,1,1,1,1,1,2,1,1,0,3,3,3,
+0,3,0,2,2,2,2,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,
+2,2,2,3,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,1,2,2,1,2,2,2,1,1,1,2,0,1,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+2,2,2,2,2,2,2,2,2,2,2,1,2,2,2,2,2,2,2,2,2,2,2,0,2,2,0,0,0,0,0,0,
+0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+2,3,1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,1,2,1,0,2,1,0,
+0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+3,1,1,1,1,1,1,1,1,1,1,0,0,1,1,1,1,0,1,1,1,1,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,
+0,3,1,1,2,2,2,2,2,1,2,2,2,1,1,2,2,2,2,2,2,2,1,2,2,1,0,1,1,1,1,0,
+0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+3,2,1,1,1,1,2,1,1,2,1,0,1,1,1,1,1,1,1,1,1,1,1,0,1,0,0,0,0,0,0,0,
+0,0,2,0,0,0,0,0,0,0,0,1,1,0,0,0,0,1,1,0,0,1,1,0,0,0,0,0,0,1,0,0,
+2,1,1,2,2,2,2,2,2,2,2,2,2,2,1,2,2,2,2,2,1,2,1,2,1,1,1,1,0,0,0,0,
+0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+1,2,1,2,2,2,2,2,2,2,2,2,2,1,2,1,2,1,1,2,1,1,1,2,1,2,1,2,0,1,0,1,
+0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,3,1,2,2,2,1,2,2,2,2,2,2,2,2,1,2,1,1,1,1,1,1,2,1,2,1,1,0,1,0,1,
+0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+2,1,2,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,
+0,2,0,1,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,
+3,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+2,1,1,1,1,1,1,1,0,1,1,0,1,0,0,1,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,2,0,1,1,1,0,1,0,0,0,1,1,0,1,1,0,0,0,0,0,1,1,0,0,
+0,1,1,1,2,1,2,2,2,0,2,0,2,0,1,1,2,1,1,1,1,2,1,0,1,1,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,
+1,0,1,0,0,0,0,0,1,0,1,2,2,0,1,0,0,1,1,2,2,1,2,0,2,0,0,0,1,2,0,1,
+2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,2,0,2,1,2,0,2,0,0,1,1,1,1,1,1,0,1,0,0,0,1,0,0,1,
+2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,1,0,0,0,0,0,1,0,2,1,1,0,1,0,0,1,1,1,2,2,0,0,1,0,0,0,1,0,0,1,
+1,1,2,1,0,1,1,1,0,1,0,1,1,1,1,0,0,0,1,0,1,0,0,0,0,0,0,0,0,2,2,1,
+0,2,0,1,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+2,1,0,0,1,0,1,1,1,1,0,0,0,0,0,1,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+1,1,1,1,1,1,1,1,1,2,1,0,1,1,1,1,1,1,1,1,1,1,1,0,1,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,1,1,1,0,0,0,0,1,1,1,0,1,1,0,1,0,0,0,1,1,0,1,
+2,0,1,0,1,0,1,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,1,0,1,1,1,0,1,0,0,1,1,2,1,1,2,0,1,0,0,0,1,1,0,1,
+1,0,0,1,0,0,1,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,1,0,1,1,2,0,1,0,0,0,0,2,1,1,2,0,2,0,0,0,1,1,0,1,
+1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,1,0,2,1,1,0,1,0,0,2,2,1,2,1,1,0,1,0,0,0,1,1,0,1,
+2,0,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,1,2,2,0,0,0,0,0,1,1,0,1,0,0,1,0,0,0,0,1,0,1,
+1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,1,2,2,0,0,0,0,2,1,1,1,0,2,1,1,0,0,0,2,1,0,1,
+1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,1,0,1,1,2,0,1,0,0,1,1,0,2,1,1,0,1,0,0,0,1,1,0,1,
+2,2,1,1,1,0,1,1,0,1,1,0,1,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,1,0,2,1,1,0,1,0,0,1,1,0,1,2,1,0,2,0,0,0,1,1,0,1,
+2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,
+0,1,0,0,2,0,2,1,1,0,1,0,1,0,0,1,0,0,0,0,1,0,0,0,1,0,0,0,0,0,1,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,1,0,1,1,2,0,1,0,0,1,1,1,0,1,0,0,1,0,0,0,1,0,0,1,
+1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+1,0,0,0,0,0,0,0,1,0,1,1,0,0,1,0,0,2,1,1,1,1,1,0,1,0,0,0,0,1,0,1,
+0,1,1,1,2,1,1,1,1,0,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,1,2,1,0,0,0,0,0,1,1,1,1,1,0,1,0,0,0,1,1,0,0,
+)
+
+Win1255HebrewModel = {
+  'charToOrderMap': win1255_CharToOrderMap,
+  'precedenceMatrix': HebrewLangModel,
+  'mTypicalPositiveRatio': 0.984004,
+  'keepEnglishLetter': False,
+  'charsetName': "windows-1255"
+}
+
+# flake8: noqa
-######################## BEGIN LICENSE BLOCK ########################\r
-# The Original Code is Mozilla Communicator client code.\r
-#\r
-# The Initial Developer of the Original Code is\r
-# Netscape Communications Corporation.\r
-# Portions created by the Initial Developer are Copyright (C) 1998\r
-# the Initial Developer. All Rights Reserved.\r
-#\r
-# Contributor(s):\r
-#   Mark Pilgrim - port to Python\r
-#\r
-# This library is free software; you can redistribute it and/or\r
-# modify it under the terms of the GNU Lesser General Public\r
-# License as published by the Free Software Foundation; either\r
-# version 2.1 of the License, or (at your option) any later version.\r
-#\r
-# This library is distributed in the hope that it will be useful,\r
-# but WITHOUT ANY WARRANTY; without even the implied warranty of\r
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU\r
-# Lesser General Public License for more details.\r
-#\r
-# You should have received a copy of the GNU Lesser General Public\r
-# License along with this library; if not, write to the Free Software\r
-# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA\r
-# 02110-1301  USA\r
-######################### END LICENSE BLOCK #########################\r
-\r
-# 255: Control characters that usually does not exist in any text\r
-# 254: Carriage/Return\r
-# 253: symbol (punctuation) that does not belong to word\r
-# 252: 0 - 9\r
-\r
-# Character Mapping Table:\r
-Latin2_HungarianCharToOrderMap = (\r
-255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255,  # 00\r
-255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,  # 10\r
-253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,  # 20\r
-252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253,  # 30\r
-253, 28, 40, 54, 45, 32, 50, 49, 38, 39, 53, 36, 41, 34, 35, 47,\r
- 46, 71, 43, 33, 37, 57, 48, 64, 68, 55, 52,253,253,253,253,253,\r
-253,  2, 18, 26, 17,  1, 27, 12, 20,  9, 22,  7,  6, 13,  4,  8,\r
- 23, 67, 10,  5,  3, 21, 19, 65, 62, 16, 11,253,253,253,253,253,\r
-159,160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,\r
-175,176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,\r
-191,192,193,194,195,196,197, 75,198,199,200,201,202,203,204,205,\r
- 79,206,207,208,209,210,211,212,213,214,215,216,217,218,219,220,\r
-221, 51, 81,222, 78,223,224,225,226, 44,227,228,229, 61,230,231,\r
-232,233,234, 58,235, 66, 59,236,237,238, 60, 69, 63,239,240,241,\r
- 82, 14, 74,242, 70, 80,243, 72,244, 15, 83, 77, 84, 30, 76, 85,\r
-245,246,247, 25, 73, 42, 24,248,249,250, 31, 56, 29,251,252,253,\r
-)\r
-\r
-win1250HungarianCharToOrderMap = (\r
-255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255,  # 00\r
-255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,  # 10\r
-253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,  # 20\r
-252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253,  # 30\r
-253, 28, 40, 54, 45, 32, 50, 49, 38, 39, 53, 36, 41, 34, 35, 47,\r
- 46, 72, 43, 33, 37, 57, 48, 64, 68, 55, 52,253,253,253,253,253,\r
-253,  2, 18, 26, 17,  1, 27, 12, 20,  9, 22,  7,  6, 13,  4,  8,\r
- 23, 67, 10,  5,  3, 21, 19, 65, 62, 16, 11,253,253,253,253,253,\r
-161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,176,\r
-177,178,179,180, 78,181, 69,182,183,184,185,186,187,188,189,190,\r
-191,192,193,194,195,196,197, 76,198,199,200,201,202,203,204,205,\r
- 81,206,207,208,209,210,211,212,213,214,215,216,217,218,219,220,\r
-221, 51, 83,222, 80,223,224,225,226, 44,227,228,229, 61,230,231,\r
-232,233,234, 58,235, 66, 59,236,237,238, 60, 70, 63,239,240,241,\r
- 84, 14, 75,242, 71, 82,243, 73,244, 15, 85, 79, 86, 30, 77, 87,\r
-245,246,247, 25, 74, 42, 24,248,249,250, 31, 56, 29,251,252,253,\r
-)\r
-\r
-# Model Table:\r
-# total sequences: 100%\r
-# first 512 sequences: 94.7368%\r
-# first 1024 sequences:5.2623%\r
-# rest  sequences:     0.8894%\r
-# negative sequences:  0.0009%\r
-HungarianLangModel = (\r
-0,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,1,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,\r
-3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,2,2,3,3,1,1,2,2,2,2,2,1,2,\r
-3,2,2,3,3,3,3,3,2,3,3,3,3,3,3,1,2,3,3,3,3,2,3,3,1,1,3,3,0,1,1,1,\r
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,\r
-3,2,1,3,3,3,3,3,2,3,3,3,3,3,1,1,2,3,3,3,3,3,3,3,1,1,3,2,0,1,1,1,\r
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,\r
-3,3,3,3,3,3,3,3,3,3,3,1,1,2,3,3,3,1,3,3,3,3,3,1,3,3,2,2,0,3,2,3,\r
-0,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,\r
-3,3,3,3,3,3,2,3,3,3,2,3,3,2,3,3,3,3,3,2,3,3,2,2,3,2,3,2,0,3,2,2,\r
-0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,\r
-3,3,3,3,3,3,2,3,3,3,3,3,2,3,3,3,1,2,3,2,2,3,1,2,3,3,2,2,0,3,3,3,\r
-0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,\r
-3,3,3,3,3,3,3,3,3,3,2,2,3,3,3,3,3,3,2,3,3,3,3,2,3,3,3,3,0,2,3,2,\r
-0,0,0,1,1,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,\r
-3,3,3,3,3,3,3,3,3,3,3,1,1,1,3,3,2,1,3,2,2,3,2,1,3,2,2,1,0,3,3,1,\r
-0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,\r
-3,2,2,3,3,3,3,3,1,2,3,3,3,3,1,2,1,3,3,3,3,2,2,3,1,1,3,2,0,1,1,1,\r
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,\r
-3,3,3,3,3,3,3,3,2,2,3,3,3,3,3,2,1,3,3,3,3,3,2,2,1,3,3,3,0,1,1,2,\r
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,\r
-3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,2,3,3,2,3,3,3,2,0,3,2,3,\r
-0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,1,0,\r
-3,3,3,3,3,3,2,3,3,3,2,3,2,3,3,3,1,3,2,2,2,3,1,1,3,3,1,1,0,3,3,2,\r
-0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,\r
-3,3,3,3,3,3,3,2,3,3,3,2,3,2,3,3,3,2,3,3,3,3,3,1,2,3,2,2,0,2,2,2,\r
-0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,\r
-3,3,3,2,2,2,3,1,3,3,2,2,1,3,3,3,1,1,3,1,2,3,2,3,2,2,2,1,0,2,2,2,\r
-0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,\r
-3,1,1,3,3,3,3,3,1,2,3,3,3,3,1,2,1,3,3,3,2,2,3,2,1,0,3,2,0,1,1,0,\r
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-3,1,1,3,3,3,3,3,1,2,3,3,3,3,1,1,0,3,3,3,3,0,2,3,0,0,2,1,0,1,0,0,\r
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-3,3,3,3,3,3,2,2,3,3,2,2,2,2,3,3,0,1,2,3,2,3,2,2,3,2,1,2,0,2,2,2,\r
-0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,\r
-3,3,3,3,3,3,1,2,3,3,3,2,1,2,3,3,2,2,2,3,2,3,3,1,3,3,1,1,0,2,3,2,\r
-0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,\r
-3,3,3,1,2,2,2,2,3,3,3,1,1,1,3,3,1,1,3,1,1,3,2,1,2,3,1,1,0,2,2,2,\r
-0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,\r
-3,3,3,2,1,2,1,1,3,3,1,1,1,1,3,3,1,1,2,2,1,2,1,1,2,2,1,1,0,2,2,1,\r
-0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,\r
-3,3,3,1,1,2,1,1,3,3,1,0,1,1,3,3,2,0,1,1,2,3,1,0,2,2,1,0,0,1,3,2,\r
-0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,\r
-3,2,1,3,3,3,3,3,1,2,3,2,3,3,2,1,1,3,2,3,2,1,2,2,0,1,2,1,0,0,1,1,\r
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,\r
-3,3,3,3,2,2,2,2,3,1,2,2,1,1,3,3,0,3,2,1,2,3,2,1,3,3,1,1,0,2,1,3,\r
-0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,\r
-3,3,3,2,2,2,3,2,3,3,3,2,1,1,3,3,1,1,1,2,2,3,2,3,2,2,2,1,0,2,2,1,\r
-0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,\r
-1,0,0,3,3,3,3,3,0,0,3,3,2,3,0,0,0,2,3,3,1,0,1,2,0,0,1,1,0,0,0,0,\r
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-3,1,2,3,3,3,3,3,1,2,3,3,2,2,1,1,0,3,3,2,2,1,2,2,1,0,2,2,0,1,1,1,\r
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-3,3,2,2,1,3,1,2,3,3,2,2,1,1,2,2,1,1,1,1,3,2,1,1,1,1,2,1,0,1,2,1,\r
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,\r
-2,3,3,1,1,1,1,1,3,3,3,0,1,1,3,3,1,1,1,1,1,2,2,0,3,1,1,2,0,2,1,1,\r
-0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,\r
-3,1,0,1,2,1,2,2,0,1,2,3,1,2,0,0,0,2,1,1,1,1,1,2,0,0,1,1,0,0,0,0,\r
-1,2,1,2,2,2,1,2,1,2,0,2,0,2,2,1,1,2,1,1,2,1,1,1,0,1,0,0,0,1,1,0,\r
-1,1,1,2,3,2,3,3,0,1,2,2,3,1,0,1,0,2,1,2,2,0,1,1,0,0,1,1,0,0,0,0,\r
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-1,0,0,3,3,2,2,1,0,0,3,2,3,2,0,0,0,1,1,3,0,0,1,1,0,0,2,1,0,0,0,0,\r
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-3,1,1,2,2,3,3,1,0,1,3,2,3,1,1,1,0,1,1,1,1,1,3,1,0,0,2,2,0,0,0,0,\r
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-3,1,1,1,2,2,2,1,0,1,2,3,3,2,0,0,0,2,1,1,1,2,1,1,1,0,1,1,1,0,0,0,\r
-1,2,2,2,2,2,1,1,1,2,0,2,1,1,1,1,1,2,1,1,1,1,1,1,0,1,1,1,0,0,1,1,\r
-3,2,2,1,0,0,1,1,2,2,0,3,0,1,2,1,1,0,0,1,1,1,0,1,1,1,1,0,2,1,1,1,\r
-2,2,1,1,1,2,1,2,1,1,1,1,1,1,1,2,1,1,1,2,3,1,1,1,1,1,1,1,1,1,0,1,\r
-2,3,3,0,1,0,0,0,3,3,1,0,0,1,2,2,1,0,0,0,0,2,0,0,1,1,1,0,2,1,1,1,\r
-2,1,1,1,1,1,1,2,1,1,0,1,1,0,1,1,1,0,1,2,1,1,0,1,1,1,1,1,1,1,0,1,\r
-2,3,3,0,1,0,0,0,2,2,0,0,0,0,1,2,2,0,0,0,0,1,0,0,1,1,0,0,2,0,1,0,\r
-2,1,1,1,1,2,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1,1,2,0,1,1,1,1,1,0,1,\r
-3,2,2,0,1,0,1,0,2,3,2,0,0,1,2,2,1,0,0,1,1,1,0,0,2,1,0,1,2,2,1,1,\r
-2,1,1,1,1,1,1,2,1,1,1,1,1,1,0,2,1,0,1,1,0,1,1,1,0,1,1,2,1,1,0,1,\r
-2,2,2,0,0,1,0,0,2,2,1,1,0,0,2,1,1,0,0,0,1,2,0,0,2,1,0,0,2,1,1,1,\r
-2,1,1,1,1,2,1,2,1,1,1,2,2,1,1,2,1,1,1,2,1,1,1,1,1,1,1,1,1,1,0,1,\r
-1,2,3,0,0,0,1,0,3,2,1,0,0,1,2,1,1,0,0,0,0,2,1,0,1,1,0,0,2,1,2,1,\r
-1,1,0,0,0,1,0,1,1,1,1,1,2,0,0,1,0,0,0,2,0,0,1,1,1,1,1,1,1,1,0,1,\r
-3,0,0,2,1,2,2,1,0,0,2,1,2,2,0,0,0,2,1,1,1,0,1,1,0,0,1,1,2,0,0,0,\r
-1,2,1,2,2,1,1,2,1,2,0,1,1,1,1,1,1,1,1,1,2,1,1,0,0,1,1,1,1,0,0,1,\r
-1,3,2,0,0,0,1,0,2,2,2,0,0,0,2,2,1,0,0,0,0,3,1,1,1,1,0,0,2,1,1,1,\r
-2,1,0,1,1,1,0,1,1,1,1,1,1,1,0,2,1,0,0,1,0,1,1,0,1,1,1,1,1,1,0,1,\r
-2,3,2,0,0,0,1,0,2,2,0,0,0,0,2,1,1,0,0,0,0,2,1,0,1,1,0,0,2,1,1,0,\r
-2,1,1,1,1,2,1,2,1,2,0,1,1,1,0,2,1,1,1,2,1,1,1,1,0,1,1,1,1,1,0,1,\r
-3,1,1,2,2,2,3,2,1,1,2,2,1,1,0,1,0,2,2,1,1,1,1,1,0,0,1,1,0,1,1,0,\r
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-2,2,2,0,0,0,0,0,2,2,0,0,0,0,2,2,1,0,0,0,1,1,0,0,1,2,0,0,2,1,1,1,\r
-2,2,1,1,1,2,1,2,1,1,0,1,1,1,1,2,1,1,1,2,1,1,1,1,0,1,2,1,1,1,0,1,\r
-1,0,0,1,2,3,2,1,0,0,2,0,1,1,0,0,0,1,1,1,1,0,1,1,0,0,1,0,0,0,0,0,\r
-1,2,1,2,1,2,1,1,1,2,0,2,1,1,1,0,1,2,0,0,1,1,1,0,0,0,0,0,0,0,0,0,\r
-2,3,2,0,0,0,0,0,1,1,2,1,0,0,1,1,1,0,0,0,0,2,0,0,1,1,0,0,2,1,1,1,\r
-2,1,1,1,1,1,1,2,1,0,1,1,1,1,0,2,1,1,1,1,1,1,0,1,0,1,1,1,1,1,0,1,\r
-1,2,2,0,1,1,1,0,2,2,2,0,0,0,3,2,1,0,0,0,1,1,0,0,1,1,0,1,1,1,0,0,\r
-1,1,0,1,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,2,1,1,1,0,0,1,1,1,0,1,0,1,\r
-2,1,0,2,1,1,2,2,1,1,2,1,1,1,0,0,0,1,1,0,1,1,1,1,0,0,1,1,1,0,0,0,\r
-1,2,2,2,2,2,1,1,1,2,0,2,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,0,0,0,1,0,\r
-1,2,3,0,0,0,1,0,2,2,0,0,0,0,2,2,0,0,0,0,0,1,0,0,1,0,0,0,2,0,1,0,\r
-2,1,1,1,1,1,0,2,0,0,0,1,2,1,1,1,1,0,1,2,0,1,0,1,0,1,1,1,0,1,0,1,\r
-2,2,2,0,0,0,1,0,2,1,2,0,0,0,1,1,2,0,0,0,0,1,0,0,1,1,0,0,2,1,0,1,\r
-2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,2,0,1,1,1,1,1,0,1,\r
-1,2,2,0,0,0,1,0,2,2,2,0,0,0,1,1,0,0,0,0,0,1,1,0,2,0,0,1,1,1,0,1,\r
-1,0,1,1,1,1,1,1,0,1,1,1,1,0,0,1,0,0,1,1,0,1,0,1,1,1,1,1,0,0,0,1,\r
-1,0,0,1,0,1,2,1,0,0,1,1,1,2,0,0,0,1,1,0,1,0,1,1,0,0,1,0,0,0,0,0,\r
-0,2,1,2,1,1,1,1,1,2,0,2,0,1,1,0,1,2,1,0,1,1,1,0,0,0,0,0,0,1,0,0,\r
-2,1,1,0,1,2,0,0,1,1,1,0,0,0,1,1,0,0,0,0,0,1,0,0,1,0,0,0,2,1,0,1,\r
-2,2,1,1,1,1,1,2,1,1,0,1,1,1,1,2,1,1,1,2,1,1,0,1,0,1,1,1,1,1,0,1,\r
-1,2,2,0,0,0,0,0,1,1,0,0,0,0,2,1,0,0,0,0,0,2,0,0,2,2,0,0,2,0,0,1,\r
-2,1,1,1,1,1,1,1,0,1,1,0,1,1,0,1,0,0,0,1,1,1,1,0,0,1,1,1,1,0,0,1,\r
-1,1,2,0,0,3,1,0,2,1,1,1,0,0,1,1,1,0,0,0,1,1,0,0,0,1,0,0,1,0,1,0,\r
-1,2,1,0,1,1,1,2,1,1,0,1,1,1,1,1,0,0,0,1,1,1,1,1,0,1,0,0,0,1,0,0,\r
-2,1,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,0,1,0,0,0,0,2,0,0,0,\r
-2,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,1,1,1,1,2,1,1,0,0,1,1,1,1,1,0,1,\r
-2,1,1,1,2,1,1,1,0,1,1,2,1,0,0,0,0,1,1,1,1,0,1,0,0,0,0,1,0,0,0,0,\r
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-1,1,0,1,1,1,1,1,0,0,1,1,2,1,0,0,0,1,1,0,0,0,1,1,0,0,1,0,1,0,0,0,\r
-1,2,1,1,1,1,1,1,1,1,0,1,0,1,1,1,1,1,1,0,1,1,1,0,0,0,0,0,0,1,0,0,\r
-2,0,0,0,1,1,1,1,0,0,1,1,0,0,0,0,0,1,1,1,2,0,0,1,0,0,1,0,1,0,0,0,\r
-0,1,1,1,1,1,1,1,1,2,0,1,1,1,1,0,1,1,1,0,1,1,1,0,0,0,0,0,0,0,0,0,\r
-1,0,0,1,1,1,1,1,0,0,2,1,0,1,0,0,0,1,0,1,0,0,0,0,0,0,1,0,0,0,0,0,\r
-0,1,1,1,1,1,1,0,1,1,0,1,0,1,1,0,1,1,0,0,1,1,1,0,0,0,0,0,0,0,0,0,\r
-1,0,0,1,1,1,0,0,0,0,1,0,2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,\r
-0,1,1,1,1,1,0,0,1,1,0,1,0,1,0,0,1,1,1,0,1,1,1,0,0,0,0,0,0,0,0,0,\r
-0,0,0,1,0,0,0,0,0,0,1,1,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-0,1,1,1,0,1,0,0,1,1,0,1,0,1,1,0,1,1,1,0,1,1,1,0,0,0,0,0,0,0,0,0,\r
-2,1,1,1,1,1,1,1,1,1,1,0,0,1,1,1,0,0,1,0,0,1,0,1,0,1,1,1,0,0,1,0,\r
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-1,0,0,1,1,1,1,0,0,0,1,1,1,0,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,\r
-0,1,1,1,1,1,1,0,1,1,0,1,0,1,0,0,1,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0,\r
-)\r
-\r
-Latin2HungarianModel = {\r
-  'charToOrderMap': Latin2_HungarianCharToOrderMap,\r
-  'precedenceMatrix': HungarianLangModel,\r
-  'mTypicalPositiveRatio': 0.947368,\r
-  'keepEnglishLetter': True,\r
-  'charsetName': "ISO-8859-2"\r
-}\r
-\r
-Win1250HungarianModel = {\r
-  'charToOrderMap': win1250HungarianCharToOrderMap,\r
-  'precedenceMatrix': HungarianLangModel,\r
-  'mTypicalPositiveRatio': 0.947368,\r
-  'keepEnglishLetter': True,\r
-  'charsetName': "windows-1250"\r
-}\r
-\r
-# flake8: noqa\r
+######################## BEGIN LICENSE BLOCK ########################
+# The Original Code is Mozilla Communicator client code.
+#
+# The Initial Developer of the Original Code is
+# Netscape Communications Corporation.
+# Portions created by the Initial Developer are Copyright (C) 1998
+# the Initial Developer. All Rights Reserved.
+#
+# Contributor(s):
+#   Mark Pilgrim - port to Python
+#
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
+# 02110-1301  USA
+######################### END LICENSE BLOCK #########################
+
+# 255: Control characters that usually does not exist in any text
+# 254: Carriage/Return
+# 253: symbol (punctuation) that does not belong to word
+# 252: 0 - 9
+
+# Character Mapping Table:
+Latin2_HungarianCharToOrderMap = (
+255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255,  # 00
+255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,  # 10
+253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,  # 20
+252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253,  # 30
+253, 28, 40, 54, 45, 32, 50, 49, 38, 39, 53, 36, 41, 34, 35, 47,
+ 46, 71, 43, 33, 37, 57, 48, 64, 68, 55, 52,253,253,253,253,253,
+253,  2, 18, 26, 17,  1, 27, 12, 20,  9, 22,  7,  6, 13,  4,  8,
+ 23, 67, 10,  5,  3, 21, 19, 65, 62, 16, 11,253,253,253,253,253,
+159,160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,
+175,176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,
+191,192,193,194,195,196,197, 75,198,199,200,201,202,203,204,205,
+ 79,206,207,208,209,210,211,212,213,214,215,216,217,218,219,220,
+221, 51, 81,222, 78,223,224,225,226, 44,227,228,229, 61,230,231,
+232,233,234, 58,235, 66, 59,236,237,238, 60, 69, 63,239,240,241,
+ 82, 14, 74,242, 70, 80,243, 72,244, 15, 83, 77, 84, 30, 76, 85,
+245,246,247, 25, 73, 42, 24,248,249,250, 31, 56, 29,251,252,253,
+)
+
+win1250HungarianCharToOrderMap = (
+255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255,  # 00
+255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,  # 10
+253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,  # 20
+252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253,  # 30
+253, 28, 40, 54, 45, 32, 50, 49, 38, 39, 53, 36, 41, 34, 35, 47,
+ 46, 72, 43, 33, 37, 57, 48, 64, 68, 55, 52,253,253,253,253,253,
+253,  2, 18, 26, 17,  1, 27, 12, 20,  9, 22,  7,  6, 13,  4,  8,
+ 23, 67, 10,  5,  3, 21, 19, 65, 62, 16, 11,253,253,253,253,253,
+161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,176,
+177,178,179,180, 78,181, 69,182,183,184,185,186,187,188,189,190,
+191,192,193,194,195,196,197, 76,198,199,200,201,202,203,204,205,
+ 81,206,207,208,209,210,211,212,213,214,215,216,217,218,219,220,
+221, 51, 83,222, 80,223,224,225,226, 44,227,228,229, 61,230,231,
+232,233,234, 58,235, 66, 59,236,237,238, 60, 70, 63,239,240,241,
+ 84, 14, 75,242, 71, 82,243, 73,244, 15, 85, 79, 86, 30, 77, 87,
+245,246,247, 25, 74, 42, 24,248,249,250, 31, 56, 29,251,252,253,
+)
+
+# Model Table:
+# total sequences: 100%
+# first 512 sequences: 94.7368%
+# first 1024 sequences:5.2623%
+# rest  sequences:     0.8894%
+# negative sequences:  0.0009%
+HungarianLangModel = (
+0,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,1,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
+3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,2,2,3,3,1,1,2,2,2,2,2,1,2,
+3,2,2,3,3,3,3,3,2,3,3,3,3,3,3,1,2,3,3,3,3,2,3,3,1,1,3,3,0,1,1,1,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,
+3,2,1,3,3,3,3,3,2,3,3,3,3,3,1,1,2,3,3,3,3,3,3,3,1,1,3,2,0,1,1,1,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,
+3,3,3,3,3,3,3,3,3,3,3,1,1,2,3,3,3,1,3,3,3,3,3,1,3,3,2,2,0,3,2,3,
+0,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,
+3,3,3,3,3,3,2,3,3,3,2,3,3,2,3,3,3,3,3,2,3,3,2,2,3,2,3,2,0,3,2,2,
+0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,
+3,3,3,3,3,3,2,3,3,3,3,3,2,3,3,3,1,2,3,2,2,3,1,2,3,3,2,2,0,3,3,3,
+0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,
+3,3,3,3,3,3,3,3,3,3,2,2,3,3,3,3,3,3,2,3,3,3,3,2,3,3,3,3,0,2,3,2,
+0,0,0,1,1,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,
+3,3,3,3,3,3,3,3,3,3,3,1,1,1,3,3,2,1,3,2,2,3,2,1,3,2,2,1,0,3,3,1,
+0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,
+3,2,2,3,3,3,3,3,1,2,3,3,3,3,1,2,1,3,3,3,3,2,2,3,1,1,3,2,0,1,1,1,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,
+3,3,3,3,3,3,3,3,2,2,3,3,3,3,3,2,1,3,3,3,3,3,2,2,1,3,3,3,0,1,1,2,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,
+3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,2,3,3,2,3,3,3,2,0,3,2,3,
+0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,1,0,
+3,3,3,3,3,3,2,3,3,3,2,3,2,3,3,3,1,3,2,2,2,3,1,1,3,3,1,1,0,3,3,2,
+0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,
+3,3,3,3,3,3,3,2,3,3,3,2,3,2,3,3,3,2,3,3,3,3,3,1,2,3,2,2,0,2,2,2,
+0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,
+3,3,3,2,2,2,3,1,3,3,2,2,1,3,3,3,1,1,3,1,2,3,2,3,2,2,2,1,0,2,2,2,
+0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,
+3,1,1,3,3,3,3,3,1,2,3,3,3,3,1,2,1,3,3,3,2,2,3,2,1,0,3,2,0,1,1,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+3,1,1,3,3,3,3,3,1,2,3,3,3,3,1,1,0,3,3,3,3,0,2,3,0,0,2,1,0,1,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+3,3,3,3,3,3,2,2,3,3,2,2,2,2,3,3,0,1,2,3,2,3,2,2,3,2,1,2,0,2,2,2,
+0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,
+3,3,3,3,3,3,1,2,3,3,3,2,1,2,3,3,2,2,2,3,2,3,3,1,3,3,1,1,0,2,3,2,
+0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,
+3,3,3,1,2,2,2,2,3,3,3,1,1,1,3,3,1,1,3,1,1,3,2,1,2,3,1,1,0,2,2,2,
+0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,
+3,3,3,2,1,2,1,1,3,3,1,1,1,1,3,3,1,1,2,2,1,2,1,1,2,2,1,1,0,2,2,1,
+0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,
+3,3,3,1,1,2,1,1,3,3,1,0,1,1,3,3,2,0,1,1,2,3,1,0,2,2,1,0,0,1,3,2,
+0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,
+3,2,1,3,3,3,3,3,1,2,3,2,3,3,2,1,1,3,2,3,2,1,2,2,0,1,2,1,0,0,1,1,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,
+3,3,3,3,2,2,2,2,3,1,2,2,1,1,3,3,0,3,2,1,2,3,2,1,3,3,1,1,0,2,1,3,
+0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,
+3,3,3,2,2,2,3,2,3,3,3,2,1,1,3,3,1,1,1,2,2,3,2,3,2,2,2,1,0,2,2,1,
+0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,
+1,0,0,3,3,3,3,3,0,0,3,3,2,3,0,0,0,2,3,3,1,0,1,2,0,0,1,1,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+3,1,2,3,3,3,3,3,1,2,3,3,2,2,1,1,0,3,3,2,2,1,2,2,1,0,2,2,0,1,1,1,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+3,3,2,2,1,3,1,2,3,3,2,2,1,1,2,2,1,1,1,1,3,2,1,1,1,1,2,1,0,1,2,1,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,
+2,3,3,1,1,1,1,1,3,3,3,0,1,1,3,3,1,1,1,1,1,2,2,0,3,1,1,2,0,2,1,1,
+0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,
+3,1,0,1,2,1,2,2,0,1,2,3,1,2,0,0,0,2,1,1,1,1,1,2,0,0,1,1,0,0,0,0,
+1,2,1,2,2,2,1,2,1,2,0,2,0,2,2,1,1,2,1,1,2,1,1,1,0,1,0,0,0,1,1,0,
+1,1,1,2,3,2,3,3,0,1,2,2,3,1,0,1,0,2,1,2,2,0,1,1,0,0,1,1,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+1,0,0,3,3,2,2,1,0,0,3,2,3,2,0,0,0,1,1,3,0,0,1,1,0,0,2,1,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+3,1,1,2,2,3,3,1,0,1,3,2,3,1,1,1,0,1,1,1,1,1,3,1,0,0,2,2,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+3,1,1,1,2,2,2,1,0,1,2,3,3,2,0,0,0,2,1,1,1,2,1,1,1,0,1,1,1,0,0,0,
+1,2,2,2,2,2,1,1,1,2,0,2,1,1,1,1,1,2,1,1,1,1,1,1,0,1,1,1,0,0,1,1,
+3,2,2,1,0,0,1,1,2,2,0,3,0,1,2,1,1,0,0,1,1,1,0,1,1,1,1,0,2,1,1,1,
+2,2,1,1,1,2,1,2,1,1,1,1,1,1,1,2,1,1,1,2,3,1,1,1,1,1,1,1,1,1,0,1,
+2,3,3,0,1,0,0,0,3,3,1,0,0,1,2,2,1,0,0,0,0,2,0,0,1,1,1,0,2,1,1,1,
+2,1,1,1,1,1,1,2,1,1,0,1,1,0,1,1,1,0,1,2,1,1,0,1,1,1,1,1,1,1,0,1,
+2,3,3,0,1,0,0,0,2,2,0,0,0,0,1,2,2,0,0,0,0,1,0,0,1,1,0,0,2,0,1,0,
+2,1,1,1,1,2,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1,1,2,0,1,1,1,1,1,0,1,
+3,2,2,0,1,0,1,0,2,3,2,0,0,1,2,2,1,0,0,1,1,1,0,0,2,1,0,1,2,2,1,1,
+2,1,1,1,1,1,1,2,1,1,1,1,1,1,0,2,1,0,1,1,0,1,1,1,0,1,1,2,1,1,0,1,
+2,2,2,0,0,1,0,0,2,2,1,1,0,0,2,1,1,0,0,0,1,2,0,0,2,1,0,0,2,1,1,1,
+2,1,1,1,1,2,1,2,1,1,1,2,2,1,1,2,1,1,1,2,1,1,1,1,1,1,1,1,1,1,0,1,
+1,2,3,0,0,0,1,0,3,2,1,0,0,1,2,1,1,0,0,0,0,2,1,0,1,1,0,0,2,1,2,1,
+1,1,0,0,0,1,0,1,1,1,1,1,2,0,0,1,0,0,0,2,0,0,1,1,1,1,1,1,1,1,0,1,
+3,0,0,2,1,2,2,1,0,0,2,1,2,2,0,0,0,2,1,1,1,0,1,1,0,0,1,1,2,0,0,0,
+1,2,1,2,2,1,1,2,1,2,0,1,1,1,1,1,1,1,1,1,2,1,1,0,0,1,1,1,1,0,0,1,
+1,3,2,0,0,0,1,0,2,2,2,0,0,0,2,2,1,0,0,0,0,3,1,1,1,1,0,0,2,1,1,1,
+2,1,0,1,1,1,0,1,1,1,1,1,1,1,0,2,1,0,0,1,0,1,1,0,1,1,1,1,1,1,0,1,
+2,3,2,0,0,0,1,0,2,2,0,0,0,0,2,1,1,0,0,0,0,2,1,0,1,1,0,0,2,1,1,0,
+2,1,1,1,1,2,1,2,1,2,0,1,1,1,0,2,1,1,1,2,1,1,1,1,0,1,1,1,1,1,0,1,
+3,1,1,2,2,2,3,2,1,1,2,2,1,1,0,1,0,2,2,1,1,1,1,1,0,0,1,1,0,1,1,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+2,2,2,0,0,0,0,0,2,2,0,0,0,0,2,2,1,0,0,0,1,1,0,0,1,2,0,0,2,1,1,1,
+2,2,1,1,1,2,1,2,1,1,0,1,1,1,1,2,1,1,1,2,1,1,1,1,0,1,2,1,1,1,0,1,
+1,0,0,1,2,3,2,1,0,0,2,0,1,1,0,0,0,1,1,1,1,0,1,1,0,0,1,0,0,0,0,0,
+1,2,1,2,1,2,1,1,1,2,0,2,1,1,1,0,1,2,0,0,1,1,1,0,0,0,0,0,0,0,0,0,
+2,3,2,0,0,0,0,0,1,1,2,1,0,0,1,1,1,0,0,0,0,2,0,0,1,1,0,0,2,1,1,1,
+2,1,1,1,1,1,1,2,1,0,1,1,1,1,0,2,1,1,1,1,1,1,0,1,0,1,1,1,1,1,0,1,
+1,2,2,0,1,1,1,0,2,2,2,0,0,0,3,2,1,0,0,0,1,1,0,0,1,1,0,1,1,1,0,0,
+1,1,0,1,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,2,1,1,1,0,0,1,1,1,0,1,0,1,
+2,1,0,2,1,1,2,2,1,1,2,1,1,1,0,0,0,1,1,0,1,1,1,1,0,0,1,1,1,0,0,0,
+1,2,2,2,2,2,1,1,1,2,0,2,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,0,0,0,1,0,
+1,2,3,0,0,0,1,0,2,2,0,0,0,0,2,2,0,0,0,0,0,1,0,0,1,0,0,0,2,0,1,0,
+2,1,1,1,1,1,0,2,0,0,0,1,2,1,1,1,1,0,1,2,0,1,0,1,0,1,1,1,0,1,0,1,
+2,2,2,0,0,0,1,0,2,1,2,0,0,0,1,1,2,0,0,0,0,1,0,0,1,1,0,0,2,1,0,1,
+2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,2,0,1,1,1,1,1,0,1,
+1,2,2,0,0,0,1,0,2,2,2,0,0,0,1,1,0,0,0,0,0,1,1,0,2,0,0,1,1,1,0,1,
+1,0,1,1,1,1,1,1,0,1,1,1,1,0,0,1,0,0,1,1,0,1,0,1,1,1,1,1,0,0,0,1,
+1,0,0,1,0,1,2,1,0,0,1,1,1,2,0,0,0,1,1,0,1,0,1,1,0,0,1,0,0,0,0,0,
+0,2,1,2,1,1,1,1,1,2,0,2,0,1,1,0,1,2,1,0,1,1,1,0,0,0,0,0,0,1,0,0,
+2,1,1,0,1,2,0,0,1,1,1,0,0,0,1,1,0,0,0,0,0,1,0,0,1,0,0,0,2,1,0,1,
+2,2,1,1,1,1,1,2,1,1,0,1,1,1,1,2,1,1,1,2,1,1,0,1,0,1,1,1,1,1,0,1,
+1,2,2,0,0,0,0,0,1,1,0,0,0,0,2,1,0,0,0,0,0,2,0,0,2,2,0,0,2,0,0,1,
+2,1,1,1,1,1,1,1,0,1,1,0,1,1,0,1,0,0,0,1,1,1,1,0,0,1,1,1,1,0,0,1,
+1,1,2,0,0,3,1,0,2,1,1,1,0,0,1,1,1,0,0,0,1,1,0,0,0,1,0,0,1,0,1,0,
+1,2,1,0,1,1,1,2,1,1,0,1,1,1,1,1,0,0,0,1,1,1,1,1,0,1,0,0,0,1,0,0,
+2,1,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,0,1,0,0,0,0,2,0,0,0,
+2,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,1,1,1,1,2,1,1,0,0,1,1,1,1,1,0,1,
+2,1,1,1,2,1,1,1,0,1,1,2,1,0,0,0,0,1,1,1,1,0,1,0,0,0,0,1,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+1,1,0,1,1,1,1,1,0,0,1,1,2,1,0,0,0,1,1,0,0,0,1,1,0,0,1,0,1,0,0,0,
+1,2,1,1,1,1,1,1,1,1,0,1,0,1,1,1,1,1,1,0,1,1,1,0,0,0,0,0,0,1,0,0,
+2,0,0,0,1,1,1,1,0,0,1,1,0,0,0,0,0,1,1,1,2,0,0,1,0,0,1,0,1,0,0,0,
+0,1,1,1,1,1,1,1,1,2,0,1,1,1,1,0,1,1,1,0,1,1,1,0,0,0,0,0,0,0,0,0,
+1,0,0,1,1,1,1,1,0,0,2,1,0,1,0,0,0,1,0,1,0,0,0,0,0,0,1,0,0,0,0,0,
+0,1,1,1,1,1,1,0,1,1,0,1,0,1,1,0,1,1,0,0,1,1,1,0,0,0,0,0,0,0,0,0,
+1,0,0,1,1,1,0,0,0,0,1,0,2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,
+0,1,1,1,1,1,0,0,1,1,0,1,0,1,0,0,1,1,1,0,1,1,1,0,0,0,0,0,0,0,0,0,
+0,0,0,1,0,0,0,0,0,0,1,1,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,1,1,1,0,1,0,0,1,1,0,1,0,1,1,0,1,1,1,0,1,1,1,0,0,0,0,0,0,0,0,0,
+2,1,1,1,1,1,1,1,1,1,1,0,0,1,1,1,0,0,1,0,0,1,0,1,0,1,1,1,0,0,1,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+1,0,0,1,1,1,1,0,0,0,1,1,1,0,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,
+0,1,1,1,1,1,1,0,1,1,0,1,0,1,0,0,1,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0,
+)
+
+Latin2HungarianModel = {
+  'charToOrderMap': Latin2_HungarianCharToOrderMap,
+  'precedenceMatrix': HungarianLangModel,
+  'mTypicalPositiveRatio': 0.947368,
+  'keepEnglishLetter': True,
+  'charsetName': "ISO-8859-2"
+}
+
+Win1250HungarianModel = {
+  'charToOrderMap': win1250HungarianCharToOrderMap,
+  'precedenceMatrix': HungarianLangModel,
+  'mTypicalPositiveRatio': 0.947368,
+  'keepEnglishLetter': True,
+  'charsetName': "windows-1250"
+}
+
+# flake8: noqa
similarity index 98%
rename from requests/packages/charade/langthaimodel.py
rename to requests/packages/chardet/langthaimodel.py
index df343a7..0508b1b 100644 (file)
-######################## BEGIN LICENSE BLOCK ########################\r
-# The Original Code is Mozilla Communicator client code.\r
-#\r
-# The Initial Developer of the Original Code is\r
-# Netscape Communications Corporation.\r
-# Portions created by the Initial Developer are Copyright (C) 1998\r
-# the Initial Developer. All Rights Reserved.\r
-#\r
-# Contributor(s):\r
-#   Mark Pilgrim - port to Python\r
-#\r
-# This library is free software; you can redistribute it and/or\r
-# modify it under the terms of the GNU Lesser General Public\r
-# License as published by the Free Software Foundation; either\r
-# version 2.1 of the License, or (at your option) any later version.\r
-#\r
-# This library is distributed in the hope that it will be useful,\r
-# but WITHOUT ANY WARRANTY; without even the implied warranty of\r
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU\r
-# Lesser General Public License for more details.\r
-#\r
-# You should have received a copy of the GNU Lesser General Public\r
-# License along with this library; if not, write to the Free Software\r
-# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA\r
-# 02110-1301  USA\r
-######################### END LICENSE BLOCK #########################\r
-\r
-# 255: Control characters that usually does not exist in any text\r
-# 254: Carriage/Return\r
-# 253: symbol (punctuation) that does not belong to word\r
-# 252: 0 - 9\r
-\r
-# The following result for thai was collected from a limited sample (1M).\r
-\r
-# Character Mapping Table:\r
-TIS620CharToOrderMap = (\r
-255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255,  # 00\r
-255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,  # 10\r
-253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,  # 20\r
-252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253,  # 30\r
-253,182,106,107,100,183,184,185,101, 94,186,187,108,109,110,111,  # 40\r
-188,189,190, 89, 95,112,113,191,192,193,194,253,253,253,253,253,  # 50\r
-253, 64, 72, 73,114, 74,115,116,102, 81,201,117, 90,103, 78, 82,  # 60\r
- 96,202, 91, 79, 84,104,105, 97, 98, 92,203,253,253,253,253,253,  # 70\r
-209,210,211,212,213, 88,214,215,216,217,218,219,220,118,221,222,\r
-223,224, 99, 85, 83,225,226,227,228,229,230,231,232,233,234,235,\r
-236,  5, 30,237, 24,238, 75,  8, 26, 52, 34, 51,119, 47, 58, 57,\r
- 49, 53, 55, 43, 20, 19, 44, 14, 48,  3, 17, 25, 39, 62, 31, 54,\r
- 45,  9, 16,  2, 61, 15,239, 12, 42, 46, 18, 21, 76,  4, 66, 63,\r
- 22, 10,  1, 36, 23, 13, 40, 27, 32, 35, 86,240,241,242,243,244,\r
- 11, 28, 41, 29, 33,245, 50, 37,  6,  7, 67, 77, 38, 93,246,247,\r
- 68, 56, 59, 65, 69, 60, 70, 80, 71, 87,248,249,250,251,252,253,\r
-)\r
-\r
-# Model Table:\r
-# total sequences: 100%\r
-# first 512 sequences: 92.6386%\r
-# first 1024 sequences:7.3177%\r
-# rest  sequences:     1.0230%\r
-# negative sequences:  0.0436%\r
-ThaiLangModel = (\r
-0,1,3,3,3,3,0,0,3,3,0,3,3,0,3,3,3,3,3,3,3,3,0,0,3,3,3,0,3,3,3,3,\r
-0,3,3,0,0,0,1,3,0,3,3,2,3,3,0,1,2,3,3,3,3,0,2,0,2,0,0,3,2,1,2,2,\r
-3,0,3,3,2,3,0,0,3,3,0,3,3,0,3,3,3,3,3,3,3,3,3,0,3,2,3,0,2,2,2,3,\r
-0,2,3,0,0,0,0,1,0,1,2,3,1,1,3,2,2,0,1,1,0,0,1,0,0,0,0,0,0,0,1,1,\r
-3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,2,2,2,2,2,2,2,3,3,2,3,2,3,3,2,2,2,\r
-3,1,2,3,0,3,3,2,2,1,2,3,3,1,2,0,1,3,0,1,0,0,1,0,0,0,0,0,0,0,1,1,\r
-3,3,2,2,3,3,3,3,1,2,3,3,3,3,3,2,2,2,2,3,3,2,2,3,3,2,2,3,2,3,2,2,\r
-3,3,1,2,3,1,2,2,3,3,1,0,2,1,0,0,3,1,2,1,0,0,1,0,0,0,0,0,0,1,0,1,\r
-3,3,3,3,3,3,2,2,3,3,3,3,2,3,2,2,3,3,2,2,3,2,2,2,2,1,1,3,1,2,1,1,\r
-3,2,1,0,2,1,0,1,0,1,1,0,1,1,0,0,1,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,\r
-3,3,3,2,3,2,3,3,2,2,3,2,3,3,2,3,1,1,2,3,2,2,2,3,2,2,2,2,2,1,2,1,\r
-2,2,1,1,3,3,2,1,0,1,2,2,0,1,3,0,0,0,1,1,0,0,0,0,0,2,3,0,0,2,1,1,\r
-3,3,2,3,3,2,0,0,3,3,0,3,3,0,2,2,3,1,2,2,1,1,1,0,2,2,2,0,2,2,1,1,\r
-0,2,1,0,2,0,0,2,0,1,0,0,1,0,0,0,1,1,1,1,0,0,0,0,0,0,0,0,0,0,1,0,\r
-3,3,2,3,3,2,0,0,3,3,0,2,3,0,2,1,2,2,2,2,1,2,0,0,2,2,2,0,2,2,1,1,\r
-0,2,1,0,2,0,0,2,0,1,1,0,1,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,\r
-3,3,2,3,2,3,2,0,2,2,1,3,2,1,3,2,1,2,3,2,2,3,0,2,3,2,2,1,2,2,2,2,\r
-1,2,2,0,0,0,0,2,0,1,2,0,1,1,1,0,1,0,3,1,1,0,0,0,0,0,0,0,0,0,1,0,\r
-3,3,2,3,3,2,3,2,2,2,3,2,2,3,2,2,1,2,3,2,2,3,1,3,2,2,2,3,2,2,2,3,\r
-3,2,1,3,0,1,1,1,0,2,1,1,1,1,1,0,1,0,1,1,0,0,0,0,0,0,0,0,0,2,0,0,\r
-1,0,0,3,0,3,3,3,3,3,0,0,3,0,2,2,3,3,3,3,3,0,0,0,1,1,3,0,0,0,0,2,\r
-0,0,1,0,0,0,0,0,0,0,2,3,0,0,0,3,0,2,0,0,0,0,0,3,0,0,0,0,0,0,0,0,\r
-2,0,3,3,3,3,0,0,2,3,0,0,3,0,3,3,2,3,3,3,3,3,0,0,3,3,3,0,0,0,3,3,\r
-0,0,3,0,0,0,0,2,0,0,2,1,1,3,0,0,1,0,0,2,3,0,1,0,0,0,0,0,0,0,1,0,\r
-3,3,3,3,2,3,3,3,3,3,3,3,1,2,1,3,3,2,2,1,2,2,2,3,1,1,2,0,2,1,2,1,\r
-2,2,1,0,0,0,1,1,0,1,0,1,1,0,0,0,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,\r
-3,0,2,1,2,3,3,3,0,2,0,2,2,0,2,1,3,2,2,1,2,1,0,0,2,2,1,0,2,1,2,2,\r
-0,1,1,0,0,0,0,1,0,1,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-3,3,3,3,2,1,3,3,1,1,3,0,2,3,1,1,3,2,1,1,2,0,2,2,3,2,1,1,1,1,1,2,\r
-3,0,0,1,3,1,2,1,2,0,3,0,0,0,1,0,3,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,\r
-3,3,1,1,3,2,3,3,3,1,3,2,1,3,2,1,3,2,2,2,2,1,3,3,1,2,1,3,1,2,3,0,\r
-2,1,1,3,2,2,2,1,2,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,\r
-3,3,2,3,2,3,3,2,3,2,3,2,3,3,2,1,0,3,2,2,2,1,2,2,2,1,2,2,1,2,1,1,\r
-2,2,2,3,0,1,3,1,1,1,1,0,1,1,0,2,1,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-3,3,3,3,2,3,2,2,1,1,3,2,3,2,3,2,0,3,2,2,1,2,0,2,2,2,1,2,2,2,2,1,\r
-3,2,1,2,2,1,0,2,0,1,0,0,1,1,0,0,0,0,0,1,1,0,1,0,0,0,0,0,0,0,0,1,\r
-3,3,3,3,3,2,3,1,2,3,3,2,2,3,0,1,1,2,0,3,3,2,2,3,0,1,1,3,0,0,0,0,\r
-3,1,0,3,3,0,2,0,2,1,0,0,3,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-3,3,3,2,3,2,3,3,0,1,3,1,1,2,1,2,1,1,3,1,1,0,2,3,1,1,1,1,1,1,1,1,\r
-3,1,1,2,2,2,2,1,1,1,0,0,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,\r
-3,2,2,1,1,2,1,3,3,2,3,2,2,3,2,2,3,1,2,2,1,2,0,3,2,1,2,2,2,2,2,1,\r
-3,2,1,2,2,2,1,1,1,1,0,0,1,1,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-3,3,3,3,3,3,3,3,1,3,3,0,2,1,0,3,2,0,0,3,1,0,1,1,0,1,0,0,0,0,0,1,\r
-1,0,0,1,0,3,2,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-3,0,2,2,2,3,0,0,1,3,0,3,2,0,3,2,2,3,3,3,3,3,1,0,2,2,2,0,2,2,1,2,\r
-0,2,3,0,0,0,0,1,0,1,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,\r
-3,0,2,3,1,3,3,2,3,3,0,3,3,0,3,2,2,3,2,3,3,3,0,0,2,2,3,0,1,1,1,3,\r
-0,0,3,0,0,0,2,2,0,1,3,0,1,2,2,2,3,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,\r
-3,2,3,3,2,0,3,3,2,2,3,1,3,2,1,3,2,0,1,2,2,0,2,3,2,1,0,3,0,0,0,0,\r
-3,0,0,2,3,1,3,0,0,3,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-3,1,3,2,2,2,1,2,0,1,3,1,1,3,1,3,0,0,2,1,1,1,1,2,1,1,1,0,2,1,0,1,\r
-1,2,0,0,0,3,1,1,0,0,0,0,1,0,1,0,0,1,0,1,0,0,0,0,0,3,1,0,0,0,1,0,\r
-3,3,3,3,2,2,2,2,2,1,3,1,1,1,2,0,1,1,2,1,2,1,3,2,0,0,3,1,1,1,1,1,\r
-3,1,0,2,3,0,0,0,3,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-0,0,0,2,3,0,3,3,0,2,0,0,0,0,0,0,0,3,0,0,1,0,0,0,0,0,0,0,0,0,0,0,\r
-0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-0,0,2,3,1,3,0,0,1,2,0,0,2,0,3,3,2,3,3,3,2,3,0,0,2,2,2,0,0,0,2,2,\r
-0,0,1,0,0,0,0,3,0,0,0,0,2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,\r
-0,0,0,3,0,2,0,0,0,0,0,0,0,0,0,0,1,2,3,1,3,3,0,0,1,0,3,0,0,0,0,0,\r
-0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-3,3,1,2,3,1,2,3,1,0,3,0,2,2,1,0,2,1,1,2,0,1,0,0,1,1,1,1,0,1,0,0,\r
-1,0,0,0,0,1,1,0,3,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-3,3,3,3,2,1,0,1,1,1,3,1,2,2,2,2,2,2,1,1,1,1,0,3,1,0,1,3,1,1,1,1,\r
-1,1,0,2,0,1,3,1,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,2,0,1,\r
-3,0,2,2,1,3,3,2,3,3,0,1,1,0,2,2,1,2,1,3,3,1,0,0,3,2,0,0,0,0,2,1,\r
-0,1,0,0,0,0,1,2,0,1,1,3,1,1,2,2,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,\r
-0,0,3,0,0,1,0,0,0,3,0,0,3,0,3,1,0,1,1,1,3,2,0,0,0,3,0,0,0,0,2,0,\r
-0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,2,0,0,0,0,0,0,0,0,0,\r
-3,3,1,3,2,1,3,3,1,2,2,0,1,2,1,0,1,2,0,0,0,0,0,3,0,0,0,3,0,0,0,0,\r
-3,0,0,1,1,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-3,0,1,2,0,3,3,3,2,2,0,1,1,0,1,3,0,0,0,2,2,0,0,0,0,3,1,0,1,0,0,0,\r
-0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-3,0,2,3,1,2,0,0,2,1,0,3,1,0,1,2,0,1,1,1,1,3,0,0,3,1,1,0,2,2,1,1,\r
-0,2,0,0,0,0,0,1,0,1,0,0,1,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-3,0,0,3,1,2,0,0,2,2,0,1,2,0,1,0,1,3,1,2,1,0,0,0,2,0,3,0,0,0,1,0,\r
-0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-3,0,1,1,2,2,0,0,0,2,0,2,1,0,1,1,0,1,1,1,2,1,0,0,1,1,1,0,2,1,1,1,\r
-0,1,1,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,1,\r
-0,0,0,2,0,1,3,1,1,1,1,0,0,0,0,3,2,0,1,0,0,0,1,2,0,0,0,1,0,0,0,0,\r
-0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-0,0,0,0,0,3,3,3,3,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,\r
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-1,0,2,3,2,2,0,0,0,1,0,0,0,0,2,3,2,1,2,2,3,0,0,0,2,3,1,0,0,0,1,1,\r
-0,0,1,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,1,1,0,1,0,0,0,0,0,0,0,0,0,\r
-3,3,2,2,0,1,0,0,0,0,2,0,2,0,1,0,0,0,1,1,0,0,0,2,1,0,1,0,1,1,0,0,\r
-0,1,0,2,0,0,1,0,3,0,1,0,0,0,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-3,3,1,0,0,1,0,0,0,0,0,1,1,2,0,0,0,0,1,0,0,1,3,1,0,0,0,0,1,1,0,0,\r
-0,1,0,0,0,0,3,0,0,0,0,0,0,3,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,\r
-3,3,1,1,1,1,2,3,0,0,2,1,1,1,1,1,0,2,1,1,0,0,0,2,1,0,1,2,1,1,0,1,\r
-2,1,0,3,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-1,3,1,0,0,0,0,0,0,0,3,0,0,0,3,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,1,\r
-0,0,0,2,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-3,3,2,0,0,0,0,0,0,1,2,1,0,1,1,0,2,0,0,1,0,0,2,0,0,0,0,0,0,0,0,0,\r
-0,0,0,0,0,0,2,0,0,0,1,3,0,1,0,0,0,2,0,0,0,0,0,0,0,1,2,0,0,0,0,0,\r
-3,3,0,0,1,1,2,0,0,1,2,1,0,1,1,1,0,1,1,0,0,2,1,1,0,1,0,0,1,1,1,0,\r
-0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-2,2,2,1,0,0,0,0,1,0,0,0,0,3,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,\r
-2,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-2,3,0,0,1,1,0,0,0,2,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-1,1,0,1,2,0,1,2,0,0,1,1,0,2,0,1,0,0,1,0,0,0,0,1,0,0,0,2,0,0,0,0,\r
-1,0,0,1,0,1,1,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-0,1,0,0,0,0,0,0,0,1,1,0,1,1,0,2,1,3,0,0,0,0,1,1,0,0,0,0,0,0,0,3,\r
-1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,\r
-0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-2,0,1,0,1,0,0,2,0,0,2,0,0,1,1,2,0,0,1,1,0,0,0,1,0,0,0,1,1,0,0,0,\r
-1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,\r
-1,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,\r
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,1,1,0,0,0,\r
-2,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,\r
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-2,0,0,0,0,2,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,\r
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,3,0,0,0,\r
-2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,1,0,0,0,0,\r
-1,0,0,0,0,0,0,0,0,1,0,0,0,0,2,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,\r
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-0,0,1,1,0,0,2,1,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-)\r
-\r
-TIS620ThaiModel = {\r
-  'charToOrderMap': TIS620CharToOrderMap,\r
-  'precedenceMatrix': ThaiLangModel,\r
-  'mTypicalPositiveRatio': 0.926386,\r
-  'keepEnglishLetter': False,\r
-  'charsetName': "TIS-620"\r
-}\r
-\r
-# flake8: noqa\r
+######################## BEGIN LICENSE BLOCK ########################
+# The Original Code is Mozilla Communicator client code.
+#
+# The Initial Developer of the Original Code is
+# Netscape Communications Corporation.
+# Portions created by the Initial Developer are Copyright (C) 1998
+# the Initial Developer. All Rights Reserved.
+#
+# Contributor(s):
+#   Mark Pilgrim - port to Python
+#
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
+# 02110-1301  USA
+######################### END LICENSE BLOCK #########################
+
+# 255: Control characters that usually does not exist in any text
+# 254: Carriage/Return
+# 253: symbol (punctuation) that does not belong to word
+# 252: 0 - 9
+
+# The following result for thai was collected from a limited sample (1M).
+
+# Character Mapping Table:
+TIS620CharToOrderMap = (
+255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255,  # 00
+255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,  # 10
+253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,  # 20
+252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253,  # 30
+253,182,106,107,100,183,184,185,101, 94,186,187,108,109,110,111,  # 40
+188,189,190, 89, 95,112,113,191,192,193,194,253,253,253,253,253,  # 50
+253, 64, 72, 73,114, 74,115,116,102, 81,201,117, 90,103, 78, 82,  # 60
+ 96,202, 91, 79, 84,104,105, 97, 98, 92,203,253,253,253,253,253,  # 70
+209,210,211,212,213, 88,214,215,216,217,218,219,220,118,221,222,
+223,224, 99, 85, 83,225,226,227,228,229,230,231,232,233,234,235,
+236,  5, 30,237, 24,238, 75,  8, 26, 52, 34, 51,119, 47, 58, 57,
+ 49, 53, 55, 43, 20, 19, 44, 14, 48,  3, 17, 25, 39, 62, 31, 54,
+ 45,  9, 16,  2, 61, 15,239, 12, 42, 46, 18, 21, 76,  4, 66, 63,
+ 22, 10,  1, 36, 23, 13, 40, 27, 32, 35, 86,240,241,242,243,244,
+ 11, 28, 41, 29, 33,245, 50, 37,  6,  7, 67, 77, 38, 93,246,247,
+ 68, 56, 59, 65, 69, 60, 70, 80, 71, 87,248,249,250,251,252,253,
+)
+
+# Model Table:
+# total sequences: 100%
+# first 512 sequences: 92.6386%
+# first 1024 sequences:7.3177%
+# rest  sequences:     1.0230%
+# negative sequences:  0.0436%
+ThaiLangModel = (
+0,1,3,3,3,3,0,0,3,3,0,3,3,0,3,3,3,3,3,3,3,3,0,0,3,3,3,0,3,3,3,3,
+0,3,3,0,0,0,1,3,0,3,3,2,3,3,0,1,2,3,3,3,3,0,2,0,2,0,0,3,2,1,2,2,
+3,0,3,3,2,3,0,0,3,3,0,3,3,0,3,3,3,3,3,3,3,3,3,0,3,2,3,0,2,2,2,3,
+0,2,3,0,0,0,0,1,0,1,2,3,1,1,3,2,2,0,1,1,0,0,1,0,0,0,0,0,0,0,1,1,
+3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,2,2,2,2,2,2,2,3,3,2,3,2,3,3,2,2,2,
+3,1,2,3,0,3,3,2,2,1,2,3,3,1,2,0,1,3,0,1,0,0,1,0,0,0,0,0,0,0,1,1,
+3,3,2,2,3,3,3,3,1,2,3,3,3,3,3,2,2,2,2,3,3,2,2,3,3,2,2,3,2,3,2,2,
+3,3,1,2,3,1,2,2,3,3,1,0,2,1,0,0,3,1,2,1,0,0,1,0,0,0,0,0,0,1,0,1,
+3,3,3,3,3,3,2,2,3,3,3,3,2,3,2,2,3,3,2,2,3,2,2,2,2,1,1,3,1,2,1,1,
+3,2,1,0,2,1,0,1,0,1,1,0,1,1,0,0,1,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,
+3,3,3,2,3,2,3,3,2,2,3,2,3,3,2,3,1,1,2,3,2,2,2,3,2,2,2,2,2,1,2,1,
+2,2,1,1,3,3,2,1,0,1,2,2,0,1,3,0,0,0,1,1,0,0,0,0,0,2,3,0,0,2,1,1,
+3,3,2,3,3,2,0,0,3,3,0,3,3,0,2,2,3,1,2,2,1,1,1,0,2,2,2,0,2,2,1,1,
+0,2,1,0,2,0,0,2,0,1,0,0,1,0,0,0,1,1,1,1,0,0,0,0,0,0,0,0,0,0,1,0,
+3,3,2,3,3,2,0,0,3,3,0,2,3,0,2,1,2,2,2,2,1,2,0,0,2,2,2,0,2,2,1,1,
+0,2,1,0,2,0,0,2,0,1,1,0,1,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,
+3,3,2,3,2,3,2,0,2,2,1,3,2,1,3,2,1,2,3,2,2,3,0,2,3,2,2,1,2,2,2,2,
+1,2,2,0,0,0,0,2,0,1,2,0,1,1,1,0,1,0,3,1,1,0,0,0,0,0,0,0,0,0,1,0,
+3,3,2,3,3,2,3,2,2,2,3,2,2,3,2,2,1,2,3,2,2,3,1,3,2,2,2,3,2,2,2,3,
+3,2,1,3,0,1,1,1,0,2,1,1,1,1,1,0,1,0,1,1,0,0,0,0,0,0,0,0,0,2,0,0,
+1,0,0,3,0,3,3,3,3,3,0,0,3,0,2,2,3,3,3,3,3,0,0,0,1,1,3,0,0,0,0,2,
+0,0,1,0,0,0,0,0,0,0,2,3,0,0,0,3,0,2,0,0,0,0,0,3,0,0,0,0,0,0,0,0,
+2,0,3,3,3,3,0,0,2,3,0,0,3,0,3,3,2,3,3,3,3,3,0,0,3,3,3,0,0,0,3,3,
+0,0,3,0,0,0,0,2,0,0,2,1,1,3,0,0,1,0,0,2,3,0,1,0,0,0,0,0,0,0,1,0,
+3,3,3,3,2,3,3,3,3,3,3,3,1,2,1,3,3,2,2,1,2,2,2,3,1,1,2,0,2,1,2,1,
+2,2,1,0,0,0,1,1,0,1,0,1,1,0,0,0,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,
+3,0,2,1,2,3,3,3,0,2,0,2,2,0,2,1,3,2,2,1,2,1,0,0,2,2,1,0,2,1,2,2,
+0,1,1,0,0,0,0,1,0,1,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,
+3,3,3,3,2,1,3,3,1,1,3,0,2,3,1,1,3,2,1,1,2,0,2,2,3,2,1,1,1,1,1,2,
+3,0,0,1,3,1,2,1,2,0,3,0,0,0,1,0,3,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,
+3,3,1,1,3,2,3,3,3,1,3,2,1,3,2,1,3,2,2,2,2,1,3,3,1,2,1,3,1,2,3,0,
+2,1,1,3,2,2,2,1,2,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,
+3,3,2,3,2,3,3,2,3,2,3,2,3,3,2,1,0,3,2,2,2,1,2,2,2,1,2,2,1,2,1,1,
+2,2,2,3,0,1,3,1,1,1,1,0,1,1,0,2,1,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,
+3,3,3,3,2,3,2,2,1,1,3,2,3,2,3,2,0,3,2,2,1,2,0,2,2,2,1,2,2,2,2,1,
+3,2,1,2,2,1,0,2,0,1,0,0,1,1,0,0,0,0,0,1,1,0,1,0,0,0,0,0,0,0,0,1,
+3,3,3,3,3,2,3,1,2,3,3,2,2,3,0,1,1,2,0,3,3,2,2,3,0,1,1,3,0,0,0,0,
+3,1,0,3,3,0,2,0,2,1,0,0,3,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+3,3,3,2,3,2,3,3,0,1,3,1,1,2,1,2,1,1,3,1,1,0,2,3,1,1,1,1,1,1,1,1,
+3,1,1,2,2,2,2,1,1,1,0,0,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,
+3,2,2,1,1,2,1,3,3,2,3,2,2,3,2,2,3,1,2,2,1,2,0,3,2,1,2,2,2,2,2,1,
+3,2,1,2,2,2,1,1,1,1,0,0,1,1,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,
+3,3,3,3,3,3,3,3,1,3,3,0,2,1,0,3,2,0,0,3,1,0,1,1,0,1,0,0,0,0,0,1,
+1,0,0,1,0,3,2,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+3,0,2,2,2,3,0,0,1,3,0,3,2,0,3,2,2,3,3,3,3,3,1,0,2,2,2,0,2,2,1,2,
+0,2,3,0,0,0,0,1,0,1,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,
+3,0,2,3,1,3,3,2,3,3,0,3,3,0,3,2,2,3,2,3,3,3,0,0,2,2,3,0,1,1,1,3,
+0,0,3,0,0,0,2,2,0,1,3,0,1,2,2,2,3,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,
+3,2,3,3,2,0,3,3,2,2,3,1,3,2,1,3,2,0,1,2,2,0,2,3,2,1,0,3,0,0,0,0,
+3,0,0,2,3,1,3,0,0,3,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+3,1,3,2,2,2,1,2,0,1,3,1,1,3,1,3,0,0,2,1,1,1,1,2,1,1,1,0,2,1,0,1,
+1,2,0,0,0,3,1,1,0,0,0,0,1,0,1,0,0,1,0,1,0,0,0,0,0,3,1,0,0,0,1,0,
+3,3,3,3,2,2,2,2,2,1,3,1,1,1,2,0,1,1,2,1,2,1,3,2,0,0,3,1,1,1,1,1,
+3,1,0,2,3,0,0,0,3,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,2,3,0,3,3,0,2,0,0,0,0,0,0,0,3,0,0,1,0,0,0,0,0,0,0,0,0,0,0,
+0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,2,3,1,3,0,0,1,2,0,0,2,0,3,3,2,3,3,3,2,3,0,0,2,2,2,0,0,0,2,2,
+0,0,1,0,0,0,0,3,0,0,0,0,2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,
+0,0,0,3,0,2,0,0,0,0,0,0,0,0,0,0,1,2,3,1,3,3,0,0,1,0,3,0,0,0,0,0,
+0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+3,3,1,2,3,1,2,3,1,0,3,0,2,2,1,0,2,1,1,2,0,1,0,0,1,1,1,1,0,1,0,0,
+1,0,0,0,0,1,1,0,3,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+3,3,3,3,2,1,0,1,1,1,3,1,2,2,2,2,2,2,1,1,1,1,0,3,1,0,1,3,1,1,1,1,
+1,1,0,2,0,1,3,1,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,2,0,1,
+3,0,2,2,1,3,3,2,3,3,0,1,1,0,2,2,1,2,1,3,3,1,0,0,3,2,0,0,0,0,2,1,
+0,1,0,0,0,0,1,2,0,1,1,3,1,1,2,2,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,
+0,0,3,0,0,1,0,0,0,3,0,0,3,0,3,1,0,1,1,1,3,2,0,0,0,3,0,0,0,0,2,0,
+0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,2,0,0,0,0,0,0,0,0,0,
+3,3,1,3,2,1,3,3,1,2,2,0,1,2,1,0,1,2,0,0,0,0,0,3,0,0,0,3,0,0,0,0,
+3,0,0,1,1,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+3,0,1,2,0,3,3,3,2,2,0,1,1,0,1,3,0,0,0,2,2,0,0,0,0,3,1,0,1,0,0,0,
+0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+3,0,2,3,1,2,0,0,2,1,0,3,1,0,1,2,0,1,1,1,1,3,0,0,3,1,1,0,2,2,1,1,
+0,2,0,0,0,0,0,1,0,1,0,0,1,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+3,0,0,3,1,2,0,0,2,2,0,1,2,0,1,0,1,3,1,2,1,0,0,0,2,0,3,0,0,0,1,0,
+0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+3,0,1,1,2,2,0,0,0,2,0,2,1,0,1,1,0,1,1,1,2,1,0,0,1,1,1,0,2,1,1,1,
+0,1,1,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,1,
+0,0,0,2,0,1,3,1,1,1,1,0,0,0,0,3,2,0,1,0,0,0,1,2,0,0,0,1,0,0,0,0,
+0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,3,3,3,3,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+1,0,2,3,2,2,0,0,0,1,0,0,0,0,2,3,2,1,2,2,3,0,0,0,2,3,1,0,0,0,1,1,
+0,0,1,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,1,1,0,1,0,0,0,0,0,0,0,0,0,
+3,3,2,2,0,1,0,0,0,0,2,0,2,0,1,0,0,0,1,1,0,0,0,2,1,0,1,0,1,1,0,0,
+0,1,0,2,0,0,1,0,3,0,1,0,0,0,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+3,3,1,0,0,1,0,0,0,0,0,1,1,2,0,0,0,0,1,0,0,1,3,1,0,0,0,0,1,1,0,0,
+0,1,0,0,0,0,3,0,0,0,0,0,0,3,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,
+3,3,1,1,1,1,2,3,0,0,2,1,1,1,1,1,0,2,1,1,0,0,0,2,1,0,1,2,1,1,0,1,
+2,1,0,3,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+1,3,1,0,0,0,0,0,0,0,3,0,0,0,3,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,1,
+0,0,0,2,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+3,3,2,0,0,0,0,0,0,1,2,1,0,1,1,0,2,0,0,1,0,0,2,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,2,0,0,0,1,3,0,1,0,0,0,2,0,0,0,0,0,0,0,1,2,0,0,0,0,0,
+3,3,0,0,1,1,2,0,0,1,2,1,0,1,1,1,0,1,1,0,0,2,1,1,0,1,0,0,1,1,1,0,
+0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,
+2,2,2,1,0,0,0,0,1,0,0,0,0,3,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,
+2,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+2,3,0,0,1,1,0,0,0,2,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+1,1,0,1,2,0,1,2,0,0,1,1,0,2,0,1,0,0,1,0,0,0,0,1,0,0,0,2,0,0,0,0,
+1,0,0,1,0,1,1,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,1,0,0,0,0,0,0,0,1,1,0,1,1,0,2,1,3,0,0,0,0,1,1,0,0,0,0,0,0,0,3,
+1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+2,0,1,0,1,0,0,2,0,0,2,0,0,1,1,2,0,0,1,1,0,0,0,1,0,0,0,1,1,0,0,0,
+1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,
+1,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,1,1,0,0,0,
+2,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+2,0,0,0,0,2,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,3,0,0,0,
+2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,1,0,0,0,0,
+1,0,0,0,0,0,0,0,0,1,0,0,0,0,2,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,1,1,0,0,2,1,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+)
+
+TIS620ThaiModel = {
+  'charToOrderMap': TIS620CharToOrderMap,
+  'precedenceMatrix': ThaiLangModel,
+  'mTypicalPositiveRatio': 0.926386,
+  'keepEnglishLetter': False,
+  'charsetName': "TIS-620"
+}
+
+# flake8: noqa
similarity index 96%
rename from requests/packages/charade/latin1prober.py
rename to requests/packages/chardet/latin1prober.py
index 18eefd4..ad695f5 100644 (file)
-######################## BEGIN LICENSE BLOCK ########################\r
-# The Original Code is Mozilla Universal charset detector code.\r
-#\r
-# The Initial Developer of the Original Code is\r
-# Netscape Communications Corporation.\r
-# Portions created by the Initial Developer are Copyright (C) 2001\r
-# the Initial Developer. All Rights Reserved.\r
-#\r
-# Contributor(s):\r
-#   Mark Pilgrim - port to Python\r
-#   Shy Shalom - original C code\r
-#\r
-# This library is free software; you can redistribute it and/or\r
-# modify it under the terms of the GNU Lesser General Public\r
-# License as published by the Free Software Foundation; either\r
-# version 2.1 of the License, or (at your option) any later version.\r
-#\r
-# This library is distributed in the hope that it will be useful,\r
-# but WITHOUT ANY WARRANTY; without even the implied warranty of\r
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU\r
-# Lesser General Public License for more details.\r
-#\r
-# You should have received a copy of the GNU Lesser General Public\r
-# License along with this library; if not, write to the Free Software\r
-# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA\r
-# 02110-1301  USA\r
-######################### END LICENSE BLOCK #########################\r
-\r
-from .charsetprober import CharSetProber\r
-from .constants import eNotMe\r
-from .compat import wrap_ord\r
-\r
-FREQ_CAT_NUM = 4\r
-\r
-UDF = 0  # undefined\r
-OTH = 1  # other\r
-ASC = 2  # ascii capital letter\r
-ASS = 3  # ascii small letter\r
-ACV = 4  # accent capital vowel\r
-ACO = 5  # accent capital other\r
-ASV = 6  # accent small vowel\r
-ASO = 7  # accent small other\r
-CLASS_NUM = 8  # total classes\r
-\r
-Latin1_CharToClass = (\r
-    OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH,   # 00 - 07\r
-    OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH,   # 08 - 0F\r
-    OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH,   # 10 - 17\r
-    OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH,   # 18 - 1F\r
-    OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH,   # 20 - 27\r
-    OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH,   # 28 - 2F\r
-    OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH,   # 30 - 37\r
-    OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH,   # 38 - 3F\r
-    OTH, ASC, ASC, ASC, ASC, ASC, ASC, ASC,   # 40 - 47\r
-    ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC,   # 48 - 4F\r
-    ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC,   # 50 - 57\r
-    ASC, ASC, ASC, OTH, OTH, OTH, OTH, OTH,   # 58 - 5F\r
-    OTH, ASS, ASS, ASS, ASS, ASS, ASS, ASS,   # 60 - 67\r
-    ASS, ASS, ASS, ASS, ASS, ASS, ASS, ASS,   # 68 - 6F\r
-    ASS, ASS, ASS, ASS, ASS, ASS, ASS, ASS,   # 70 - 77\r
-    ASS, ASS, ASS, OTH, OTH, OTH, OTH, OTH,   # 78 - 7F\r
-    OTH, UDF, OTH, ASO, OTH, OTH, OTH, OTH,   # 80 - 87\r
-    OTH, OTH, ACO, OTH, ACO, UDF, ACO, UDF,   # 88 - 8F\r
-    UDF, OTH, OTH, OTH, OTH, OTH, OTH, OTH,   # 90 - 97\r
-    OTH, OTH, ASO, OTH, ASO, UDF, ASO, ACO,   # 98 - 9F\r
-    OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH,   # A0 - A7\r
-    OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH,   # A8 - AF\r
-    OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH,   # B0 - B7\r
-    OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH,   # B8 - BF\r
-    ACV, ACV, ACV, ACV, ACV, ACV, ACO, ACO,   # C0 - C7\r
-    ACV, ACV, ACV, ACV, ACV, ACV, ACV, ACV,   # C8 - CF\r
-    ACO, ACO, ACV, ACV, ACV, ACV, ACV, OTH,   # D0 - D7\r
-    ACV, ACV, ACV, ACV, ACV, ACO, ACO, ACO,   # D8 - DF\r
-    ASV, ASV, ASV, ASV, ASV, ASV, ASO, ASO,   # E0 - E7\r
-    ASV, ASV, ASV, ASV, ASV, ASV, ASV, ASV,   # E8 - EF\r
-    ASO, ASO, ASV, ASV, ASV, ASV, ASV, OTH,   # F0 - F7\r
-    ASV, ASV, ASV, ASV, ASV, ASO, ASO, ASO,   # F8 - FF\r
-)\r
-\r
-# 0 : illegal\r
-# 1 : very unlikely\r
-# 2 : normal\r
-# 3 : very likely\r
-Latin1ClassModel = (\r
-    # UDF OTH ASC ASS ACV ACO ASV ASO\r
-    0,  0,  0,  0,  0,  0,  0,  0,  # UDF\r
-    0,  3,  3,  3,  3,  3,  3,  3,  # OTH\r
-    0,  3,  3,  3,  3,  3,  3,  3,  # ASC\r
-    0,  3,  3,  3,  1,  1,  3,  3,  # ASS\r
-    0,  3,  3,  3,  1,  2,  1,  2,  # ACV\r
-    0,  3,  3,  3,  3,  3,  3,  3,  # ACO\r
-    0,  3,  1,  3,  1,  1,  1,  3,  # ASV\r
-    0,  3,  1,  3,  1,  1,  3,  3,  # ASO\r
-)\r
-\r
-\r
-class Latin1Prober(CharSetProber):\r
-    def __init__(self):\r
-        CharSetProber.__init__(self)\r
-        self.reset()\r
-\r
-    def reset(self):\r
-        self._mLastCharClass = OTH\r
-        self._mFreqCounter = [0] * FREQ_CAT_NUM\r
-        CharSetProber.reset(self)\r
-\r
-    def get_charset_name(self):\r
-        return "windows-1252"\r
-\r
-    def feed(self, aBuf):\r
-        aBuf = self.filter_with_english_letters(aBuf)\r
-        for c in aBuf:\r
-            charClass = Latin1_CharToClass[wrap_ord(c)]\r
-            freq = Latin1ClassModel[(self._mLastCharClass * CLASS_NUM)\r
-                                    + charClass]\r
-            if freq == 0:\r
-                self._mState = eNotMe\r
-                break\r
-            self._mFreqCounter[freq] += 1\r
-            self._mLastCharClass = charClass\r
-\r
-        return self.get_state()\r
-\r
-    def get_confidence(self):\r
-        if self.get_state() == eNotMe:\r
-            return 0.01\r
-\r
-        total = sum(self._mFreqCounter)\r
-        if total < 0.01:\r
-            confidence = 0.0\r
-        else:\r
-            confidence = ((float(self._mFreqCounter[3]) / total)\r
-                          - (self._mFreqCounter[1] * 20.0 / total))\r
-        if confidence < 0.0:\r
-            confidence = 0.0\r
-        # lower the confidence of latin1 so that other more accurate\r
-        # detector can take priority.\r
-        confidence = confidence * 0.5\r
-        return confidence\r
+######################## BEGIN LICENSE BLOCK ########################
+# The Original Code is Mozilla Universal charset detector code.
+#
+# The Initial Developer of the Original Code is
+# Netscape Communications Corporation.
+# Portions created by the Initial Developer are Copyright (C) 2001
+# the Initial Developer. All Rights Reserved.
+#
+# Contributor(s):
+#   Mark Pilgrim - port to Python
+#   Shy Shalom - original C code
+#
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
+# 02110-1301  USA
+######################### END LICENSE BLOCK #########################
+
+from .charsetprober import CharSetProber
+from .constants import eNotMe
+from .compat import wrap_ord
+
+FREQ_CAT_NUM = 4
+
+UDF = 0  # undefined
+OTH = 1  # other
+ASC = 2  # ascii capital letter
+ASS = 3  # ascii small letter
+ACV = 4  # accent capital vowel
+ACO = 5  # accent capital other
+ASV = 6  # accent small vowel
+ASO = 7  # accent small other
+CLASS_NUM = 8  # total classes
+
+Latin1_CharToClass = (
+    OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH,   # 00 - 07
+    OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH,   # 08 - 0F
+    OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH,   # 10 - 17
+    OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH,   # 18 - 1F
+    OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH,   # 20 - 27
+    OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH,   # 28 - 2F
+    OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH,   # 30 - 37
+    OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH,   # 38 - 3F
+    OTH, ASC, ASC, ASC, ASC, ASC, ASC, ASC,   # 40 - 47
+    ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC,   # 48 - 4F
+    ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC,   # 50 - 57
+    ASC, ASC, ASC, OTH, OTH, OTH, OTH, OTH,   # 58 - 5F
+    OTH, ASS, ASS, ASS, ASS, ASS, ASS, ASS,   # 60 - 67
+    ASS, ASS, ASS, ASS, ASS, ASS, ASS, ASS,   # 68 - 6F
+    ASS, ASS, ASS, ASS, ASS, ASS, ASS, ASS,   # 70 - 77
+    ASS, ASS, ASS, OTH, OTH, OTH, OTH, OTH,   # 78 - 7F
+    OTH, UDF, OTH, ASO, OTH, OTH, OTH, OTH,   # 80 - 87
+    OTH, OTH, ACO, OTH, ACO, UDF, ACO, UDF,   # 88 - 8F
+    UDF, OTH, OTH, OTH, OTH, OTH, OTH, OTH,   # 90 - 97
+    OTH, OTH, ASO, OTH, ASO, UDF, ASO, ACO,   # 98 - 9F
+    OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH,   # A0 - A7
+    OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH,   # A8 - AF
+    OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH,   # B0 - B7
+    OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH,   # B8 - BF
+    ACV, ACV, ACV, ACV, ACV, ACV, ACO, ACO,   # C0 - C7
+    ACV, ACV, ACV, ACV, ACV, ACV, ACV, ACV,   # C8 - CF
+    ACO, ACO, ACV, ACV, ACV, ACV, ACV, OTH,   # D0 - D7
+    ACV, ACV, ACV, ACV, ACV, ACO, ACO, ACO,   # D8 - DF
+    ASV, ASV, ASV, ASV, ASV, ASV, ASO, ASO,   # E0 - E7
+    ASV, ASV, ASV, ASV, ASV, ASV, ASV, ASV,   # E8 - EF
+    ASO, ASO, ASV, ASV, ASV, ASV, ASV, OTH,   # F0 - F7
+    ASV, ASV, ASV, ASV, ASV, ASO, ASO, ASO,   # F8 - FF
+)
+
+# 0 : illegal
+# 1 : very unlikely
+# 2 : normal
+# 3 : very likely
+Latin1ClassModel = (
+    # UDF OTH ASC ASS ACV ACO ASV ASO
+    0,  0,  0,  0,  0,  0,  0,  0,  # UDF
+    0,  3,  3,  3,  3,  3,  3,  3,  # OTH
+    0,  3,  3,  3,  3,  3,  3,  3,  # ASC
+    0,  3,  3,  3,  1,  1,  3,  3,  # ASS
+    0,  3,  3,  3,  1,  2,  1,  2,  # ACV
+    0,  3,  3,  3,  3,  3,  3,  3,  # ACO
+    0,  3,  1,  3,  1,  1,  1,  3,  # ASV
+    0,  3,  1,  3,  1,  1,  3,  3,  # ASO
+)
+
+
+class Latin1Prober(CharSetProber):
+    def __init__(self):
+        CharSetProber.__init__(self)
+        self.reset()
+
+    def reset(self):
+        self._mLastCharClass = OTH
+        self._mFreqCounter = [0] * FREQ_CAT_NUM
+        CharSetProber.reset(self)
+
+    def get_charset_name(self):
+        return "windows-1252"
+
+    def feed(self, aBuf):
+        aBuf = self.filter_with_english_letters(aBuf)
+        for c in aBuf:
+            charClass = Latin1_CharToClass[wrap_ord(c)]
+            freq = Latin1ClassModel[(self._mLastCharClass * CLASS_NUM)
+                                    + charClass]
+            if freq == 0:
+                self._mState = eNotMe
+                break
+            self._mFreqCounter[freq] += 1
+            self._mLastCharClass = charClass
+
+        return self.get_state()
+
+    def get_confidence(self):
+        if self.get_state() == eNotMe:
+            return 0.01
+
+        total = sum(self._mFreqCounter)
+        if total < 0.01:
+            confidence = 0.0
+        else:
+            confidence = ((self._mFreqCounter[3] / total)
+                          - (self._mFreqCounter[1] * 20.0 / total))
+        if confidence < 0.0:
+            confidence = 0.0
+        # lower the confidence of latin1 so that other more accurate
+        # detector can take priority.
+        confidence = confidence * 0.5
+        return confidence
similarity index 97%
rename from requests/packages/charade/mbcharsetprober.py
rename to requests/packages/chardet/mbcharsetprober.py
index 1eee253..bb42f2f 100644 (file)
@@ -1,86 +1,86 @@
-######################## BEGIN LICENSE BLOCK ########################\r
-# The Original Code is Mozilla Universal charset detector code.\r
-#\r
-# The Initial Developer of the Original Code is\r
-# Netscape Communications Corporation.\r
-# Portions created by the Initial Developer are Copyright (C) 2001\r
-# the Initial Developer. All Rights Reserved.\r
-#\r
-# Contributor(s):\r
-#   Mark Pilgrim - port to Python\r
-#   Shy Shalom - original C code\r
-#   Proofpoint, Inc.\r
-#\r
-# This library is free software; you can redistribute it and/or\r
-# modify it under the terms of the GNU Lesser General Public\r
-# License as published by the Free Software Foundation; either\r
-# version 2.1 of the License, or (at your option) any later version.\r
-#\r
-# This library is distributed in the hope that it will be useful,\r
-# but WITHOUT ANY WARRANTY; without even the implied warranty of\r
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU\r
-# Lesser General Public License for more details.\r
-#\r
-# You should have received a copy of the GNU Lesser General Public\r
-# License along with this library; if not, write to the Free Software\r
-# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA\r
-# 02110-1301  USA\r
-######################### END LICENSE BLOCK #########################\r
-\r
-import sys\r
-from . import constants\r
-from .charsetprober import CharSetProber\r
-\r
-\r
-class MultiByteCharSetProber(CharSetProber):\r
-    def __init__(self):\r
-        CharSetProber.__init__(self)\r
-        self._mDistributionAnalyzer = None\r
-        self._mCodingSM = None\r
-        self._mLastChar = [0, 0]\r
-\r
-    def reset(self):\r
-        CharSetProber.reset(self)\r
-        if self._mCodingSM:\r
-            self._mCodingSM.reset()\r
-        if self._mDistributionAnalyzer:\r
-            self._mDistributionAnalyzer.reset()\r
-        self._mLastChar = [0, 0]\r
-\r
-    def get_charset_name(self):\r
-        pass\r
-\r
-    def feed(self, aBuf):\r
-        aLen = len(aBuf)\r
-        for i in range(0, aLen):\r
-            codingState = self._mCodingSM.next_state(aBuf[i])\r
-            if codingState == constants.eError:\r
-                if constants._debug:\r
-                    sys.stderr.write(self.get_charset_name()\r
-                                     + ' prober hit error at byte ' + str(i)\r
-                                     + '\n')\r
-                self._mState = constants.eNotMe\r
-                break\r
-            elif codingState == constants.eItsMe:\r
-                self._mState = constants.eFoundIt\r
-                break\r
-            elif codingState == constants.eStart:\r
-                charLen = self._mCodingSM.get_current_charlen()\r
-                if i == 0:\r
-                    self._mLastChar[1] = aBuf[0]\r
-                    self._mDistributionAnalyzer.feed(self._mLastChar, charLen)\r
-                else:\r
-                    self._mDistributionAnalyzer.feed(aBuf[i - 1:i + 1],\r
-                                                     charLen)\r
-\r
-        self._mLastChar[0] = aBuf[aLen - 1]\r
-\r
-        if self.get_state() == constants.eDetecting:\r
-            if (self._mDistributionAnalyzer.got_enough_data() and\r
-                    (self.get_confidence() > constants.SHORTCUT_THRESHOLD)):\r
-                self._mState = constants.eFoundIt\r
-\r
-        return self.get_state()\r
-\r
-    def get_confidence(self):\r
-        return self._mDistributionAnalyzer.get_confidence()\r
+######################## BEGIN LICENSE BLOCK ########################
+# The Original Code is Mozilla Universal charset detector code.
+#
+# The Initial Developer of the Original Code is
+# Netscape Communications Corporation.
+# Portions created by the Initial Developer are Copyright (C) 2001
+# the Initial Developer. All Rights Reserved.
+#
+# Contributor(s):
+#   Mark Pilgrim - port to Python
+#   Shy Shalom - original C code
+#   Proofpoint, Inc.
+#
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
+# 02110-1301  USA
+######################### END LICENSE BLOCK #########################
+
+import sys
+from . import constants
+from .charsetprober import CharSetProber
+
+
+class MultiByteCharSetProber(CharSetProber):
+    def __init__(self):
+        CharSetProber.__init__(self)
+        self._mDistributionAnalyzer = None
+        self._mCodingSM = None
+        self._mLastChar = [0, 0]
+
+    def reset(self):
+        CharSetProber.reset(self)
+        if self._mCodingSM:
+            self._mCodingSM.reset()
+        if self._mDistributionAnalyzer:
+            self._mDistributionAnalyzer.reset()
+        self._mLastChar = [0, 0]
+
+    def get_charset_name(self):
+        pass
+
+    def feed(self, aBuf):
+        aLen = len(aBuf)
+        for i in range(0, aLen):
+            codingState = self._mCodingSM.next_state(aBuf[i])
+            if codingState == constants.eError:
+                if constants._debug:
+                    sys.stderr.write(self.get_charset_name()
+                                     + ' prober hit error at byte ' + str(i)
+                                     + '\n')
+                self._mState = constants.eNotMe
+                break
+            elif codingState == constants.eItsMe:
+                self._mState = constants.eFoundIt
+                break
+            elif codingState == constants.eStart:
+                charLen = self._mCodingSM.get_current_charlen()
+                if i == 0:
+                    self._mLastChar[1] = aBuf[0]
+                    self._mDistributionAnalyzer.feed(self._mLastChar, charLen)
+                else:
+                    self._mDistributionAnalyzer.feed(aBuf[i - 1:i + 1],
+                                                     charLen)
+
+        self._mLastChar[0] = aBuf[aLen - 1]
+
+        if self.get_state() == constants.eDetecting:
+            if (self._mDistributionAnalyzer.got_enough_data() and
+                    (self.get_confidence() > constants.SHORTCUT_THRESHOLD)):
+                self._mState = constants.eFoundIt
+
+        return self.get_state()
+
+    def get_confidence(self):
+        return self._mDistributionAnalyzer.get_confidence()
similarity index 97%
rename from requests/packages/charade/mbcsgroupprober.py
rename to requests/packages/chardet/mbcsgroupprober.py
index 2f6f5e8..03c9dcf 100644 (file)
@@ -1,54 +1,54 @@
-######################## BEGIN LICENSE BLOCK ########################\r
-# The Original Code is Mozilla Universal charset detector code.\r
-#\r
-# The Initial Developer of the Original Code is\r
-# Netscape Communications Corporation.\r
-# Portions created by the Initial Developer are Copyright (C) 2001\r
-# the Initial Developer. All Rights Reserved.\r
-#\r
-# Contributor(s):\r
-#   Mark Pilgrim - port to Python\r
-#   Shy Shalom - original C code\r
-#   Proofpoint, Inc.\r
-#\r
-# This library is free software; you can redistribute it and/or\r
-# modify it under the terms of the GNU Lesser General Public\r
-# License as published by the Free Software Foundation; either\r
-# version 2.1 of the License, or (at your option) any later version.\r
-#\r
-# This library is distributed in the hope that it will be useful,\r
-# but WITHOUT ANY WARRANTY; without even the implied warranty of\r
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU\r
-# Lesser General Public License for more details.\r
-#\r
-# You should have received a copy of the GNU Lesser General Public\r
-# License along with this library; if not, write to the Free Software\r
-# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA\r
-# 02110-1301  USA\r
-######################### END LICENSE BLOCK #########################\r
-\r
-from .charsetgroupprober import CharSetGroupProber\r
-from .utf8prober import UTF8Prober\r
-from .sjisprober import SJISProber\r
-from .eucjpprober import EUCJPProber\r
-from .gb2312prober import GB2312Prober\r
-from .euckrprober import EUCKRProber\r
-from .cp949prober import CP949Prober\r
-from .big5prober import Big5Prober\r
-from .euctwprober import EUCTWProber\r
-\r
-\r
-class MBCSGroupProber(CharSetGroupProber):\r
-    def __init__(self):\r
-        CharSetGroupProber.__init__(self)\r
-        self._mProbers = [\r
-            UTF8Prober(),\r
-            SJISProber(),\r
-            EUCJPProber(),\r
-            GB2312Prober(),\r
-            EUCKRProber(),\r
-            CP949Prober(),\r
-            Big5Prober(),\r
-            EUCTWProber()\r
-        ]\r
-        self.reset()\r
+######################## BEGIN LICENSE BLOCK ########################
+# The Original Code is Mozilla Universal charset detector code.
+#
+# The Initial Developer of the Original Code is
+# Netscape Communications Corporation.
+# Portions created by the Initial Developer are Copyright (C) 2001
+# the Initial Developer. All Rights Reserved.
+#
+# Contributor(s):
+#   Mark Pilgrim - port to Python
+#   Shy Shalom - original C code
+#   Proofpoint, Inc.
+#
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
+# 02110-1301  USA
+######################### END LICENSE BLOCK #########################
+
+from .charsetgroupprober import CharSetGroupProber
+from .utf8prober import UTF8Prober
+from .sjisprober import SJISProber
+from .eucjpprober import EUCJPProber
+from .gb2312prober import GB2312Prober
+from .euckrprober import EUCKRProber
+from .cp949prober import CP949Prober
+from .big5prober import Big5Prober
+from .euctwprober import EUCTWProber
+
+
+class MBCSGroupProber(CharSetGroupProber):
+    def __init__(self):
+        CharSetGroupProber.__init__(self)
+        self._mProbers = [
+            UTF8Prober(),
+            SJISProber(),
+            EUCJPProber(),
+            GB2312Prober(),
+            EUCKRProber(),
+            CP949Prober(),
+            Big5Prober(),
+            EUCTWProber()
+        ]
+        self.reset()
similarity index 97%
rename from requests/packages/charade/mbcssm.py
rename to requests/packages/chardet/mbcssm.py
index 55c02f0..3f93cfb 100644 (file)
-######################## BEGIN LICENSE BLOCK ########################\r
-# The Original Code is mozilla.org code.\r
-#\r
-# The Initial Developer of the Original Code is\r
-# Netscape Communications Corporation.\r
-# Portions created by the Initial Developer are Copyright (C) 1998\r
-# the Initial Developer. All Rights Reserved.\r
-#\r
-# Contributor(s):\r
-#   Mark Pilgrim - port to Python\r
-#\r
-# This library is free software; you can redistribute it and/or\r
-# modify it under the terms of the GNU Lesser General Public\r
-# License as published by the Free Software Foundation; either\r
-# version 2.1 of the License, or (at your option) any later version.\r
-#\r
-# This library is distributed in the hope that it will be useful,\r
-# but WITHOUT ANY WARRANTY; without even the implied warranty of\r
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU\r
-# Lesser General Public License for more details.\r
-#\r
-# You should have received a copy of the GNU Lesser General Public\r
-# License along with this library; if not, write to the Free Software\r
-# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA\r
-# 02110-1301  USA\r
-######################### END LICENSE BLOCK #########################\r
-\r
-from .constants import eStart, eError, eItsMe\r
-\r
-# BIG5\r
-\r
-BIG5_cls = (\r
-    1,1,1,1,1,1,1,1,  # 00 - 07    #allow 0x00 as legal value\r
-    1,1,1,1,1,1,0,0,  # 08 - 0f\r
-    1,1,1,1,1,1,1,1,  # 10 - 17\r
-    1,1,1,0,1,1,1,1,  # 18 - 1f\r
-    1,1,1,1,1,1,1,1,  # 20 - 27\r
-    1,1,1,1,1,1,1,1,  # 28 - 2f\r
-    1,1,1,1,1,1,1,1,  # 30 - 37\r
-    1,1,1,1,1,1,1,1,  # 38 - 3f\r
-    2,2,2,2,2,2,2,2,  # 40 - 47\r
-    2,2,2,2,2,2,2,2,  # 48 - 4f\r
-    2,2,2,2,2,2,2,2,  # 50 - 57\r
-    2,2,2,2,2,2,2,2,  # 58 - 5f\r
-    2,2,2,2,2,2,2,2,  # 60 - 67\r
-    2,2,2,2,2,2,2,2,  # 68 - 6f\r
-    2,2,2,2,2,2,2,2,  # 70 - 77\r
-    2,2,2,2,2,2,2,1,  # 78 - 7f\r
-    4,4,4,4,4,4,4,4,  # 80 - 87\r
-    4,4,4,4,4,4,4,4,  # 88 - 8f\r
-    4,4,4,4,4,4,4,4,  # 90 - 97\r
-    4,4,4,4,4,4,4,4,  # 98 - 9f\r
-    4,3,3,3,3,3,3,3,  # a0 - a7\r
-    3,3,3,3,3,3,3,3,  # a8 - af\r
-    3,3,3,3,3,3,3,3,  # b0 - b7\r
-    3,3,3,3,3,3,3,3,  # b8 - bf\r
-    3,3,3,3,3,3,3,3,  # c0 - c7\r
-    3,3,3,3,3,3,3,3,  # c8 - cf\r
-    3,3,3,3,3,3,3,3,  # d0 - d7\r
-    3,3,3,3,3,3,3,3,  # d8 - df\r
-    3,3,3,3,3,3,3,3,  # e0 - e7\r
-    3,3,3,3,3,3,3,3,  # e8 - ef\r
-    3,3,3,3,3,3,3,3,  # f0 - f7\r
-    3,3,3,3,3,3,3,0  # f8 - ff\r
-)\r
-\r
-BIG5_st = (\r
-    eError,eStart,eStart,     3,eError,eError,eError,eError,#00-07\r
-    eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eError,#08-0f\r
-    eError,eStart,eStart,eStart,eStart,eStart,eStart,eStart#10-17\r
-)\r
-\r
-Big5CharLenTable = (0, 1, 1, 2, 0)\r
-\r
-Big5SMModel = {'classTable': BIG5_cls,\r
-               'classFactor': 5,\r
-               'stateTable': BIG5_st,\r
-               'charLenTable': Big5CharLenTable,\r
-               'name': 'Big5'}\r
-\r
-# CP949\r
-\r
-CP949_cls  = (\r
-    1,1,1,1,1,1,1,1, 1,1,1,1,1,1,0,0,  # 00 - 0f\r
-    1,1,1,1,1,1,1,1, 1,1,1,0,1,1,1,1,  # 10 - 1f\r
-    1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,  # 20 - 2f\r
-    1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,  # 30 - 3f\r
-    1,4,4,4,4,4,4,4, 4,4,4,4,4,4,4,4,  # 40 - 4f\r
-    4,4,5,5,5,5,5,5, 5,5,5,1,1,1,1,1,  # 50 - 5f\r
-    1,5,5,5,5,5,5,5, 5,5,5,5,5,5,5,5,  # 60 - 6f\r
-    5,5,5,5,5,5,5,5, 5,5,5,1,1,1,1,1,  # 70 - 7f\r
-    0,6,6,6,6,6,6,6, 6,6,6,6,6,6,6,6,  # 80 - 8f\r
-    6,6,6,6,6,6,6,6, 6,6,6,6,6,6,6,6,  # 90 - 9f\r
-    6,7,7,7,7,7,7,7, 7,7,7,7,7,8,8,8,  # a0 - af\r
-    7,7,7,7,7,7,7,7, 7,7,7,7,7,7,7,7,  # b0 - bf\r
-    7,7,7,7,7,7,9,2, 2,3,2,2,2,2,2,2,  # c0 - cf\r
-    2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2,  # d0 - df\r
-    2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2,  # e0 - ef\r
-    2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,0,  # f0 - ff\r
-)\r
-\r
-CP949_st = (\r
-#cls=    0      1      2      3      4      5      6      7      8      9  # previous state =\r
-    eError,eStart,     3,eError,eStart,eStart,     4,     5,eError,     6, # eStart\r
-    eError,eError,eError,eError,eError,eError,eError,eError,eError,eError, # eError\r
-    eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe, # eItsMe\r
-    eError,eError,eStart,eStart,eError,eError,eError,eStart,eStart,eStart, # 3\r
-    eError,eError,eStart,eStart,eStart,eStart,eStart,eStart,eStart,eStart, # 4\r
-    eError,eStart,eStart,eStart,eStart,eStart,eStart,eStart,eStart,eStart, # 5\r
-    eError,eStart,eStart,eStart,eStart,eError,eError,eStart,eStart,eStart, # 6\r
-)\r
-\r
-CP949CharLenTable = (0, 1, 2, 0, 1, 1, 2, 2, 0, 2)\r
-\r
-CP949SMModel = {'classTable': CP949_cls,\r
-                'classFactor': 10,\r
-                'stateTable': CP949_st,\r
-                'charLenTable': CP949CharLenTable,\r
-                'name': 'CP949'}\r
-\r
-# EUC-JP\r
-\r
-EUCJP_cls = (\r
-    4,4,4,4,4,4,4,4,  # 00 - 07\r
-    4,4,4,4,4,4,5,5,  # 08 - 0f\r
-    4,4,4,4,4,4,4,4,  # 10 - 17\r
-    4,4,4,5,4,4,4,4,  # 18 - 1f\r
-    4,4,4,4,4,4,4,4,  # 20 - 27\r
-    4,4,4,4,4,4,4,4,  # 28 - 2f\r
-    4,4,4,4,4,4,4,4,  # 30 - 37\r
-    4,4,4,4,4,4,4,4,  # 38 - 3f\r
-    4,4,4,4,4,4,4,4,  # 40 - 47\r
-    4,4,4,4,4,4,4,4,  # 48 - 4f\r
-    4,4,4,4,4,4,4,4,  # 50 - 57\r
-    4,4,4,4,4,4,4,4,  # 58 - 5f\r
-    4,4,4,4,4,4,4,4,  # 60 - 67\r
-    4,4,4,4,4,4,4,4,  # 68 - 6f\r
-    4,4,4,4,4,4,4,4,  # 70 - 77\r
-    4,4,4,4,4,4,4,4,  # 78 - 7f\r
-    5,5,5,5,5,5,5,5,  # 80 - 87\r
-    5,5,5,5,5,5,1,3,  # 88 - 8f\r
-    5,5,5,5,5,5,5,5,  # 90 - 97\r
-    5,5,5,5,5,5,5,5,  # 98 - 9f\r
-    5,2,2,2,2,2,2,2,  # a0 - a7\r
-    2,2,2,2,2,2,2,2,  # a8 - af\r
-    2,2,2,2,2,2,2,2,  # b0 - b7\r
-    2,2,2,2,2,2,2,2,  # b8 - bf\r
-    2,2,2,2,2,2,2,2,  # c0 - c7\r
-    2,2,2,2,2,2,2,2,  # c8 - cf\r
-    2,2,2,2,2,2,2,2,  # d0 - d7\r
-    2,2,2,2,2,2,2,2,  # d8 - df\r
-    0,0,0,0,0,0,0,0,  # e0 - e7\r
-    0,0,0,0,0,0,0,0,  # e8 - ef\r
-    0,0,0,0,0,0,0,0,  # f0 - f7\r
-    0,0,0,0,0,0,0,5  # f8 - ff\r
-)\r
-\r
-EUCJP_st = (\r
-          3,     4,     3,     5,eStart,eError,eError,eError,#00-07\r
-     eError,eError,eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,#08-0f\r
-     eItsMe,eItsMe,eStart,eError,eStart,eError,eError,eError,#10-17\r
-     eError,eError,eStart,eError,eError,eError,     3,eError,#18-1f\r
-          3,eError,eError,eError,eStart,eStart,eStart,eStart#20-27\r
-)\r
-\r
-EUCJPCharLenTable = (2, 2, 2, 3, 1, 0)\r
-\r
-EUCJPSMModel = {'classTable': EUCJP_cls,\r
-                'classFactor': 6,\r
-                'stateTable': EUCJP_st,\r
-                'charLenTable': EUCJPCharLenTable,\r
-                'name': 'EUC-JP'}\r
-\r
-# EUC-KR\r
-\r
-EUCKR_cls  = (\r
-    1,1,1,1,1,1,1,1,  # 00 - 07\r
-    1,1,1,1,1,1,0,0,  # 08 - 0f\r
-    1,1,1,1,1,1,1,1,  # 10 - 17\r
-    1,1,1,0,1,1,1,1,  # 18 - 1f\r
-    1,1,1,1,1,1,1,1,  # 20 - 27\r
-    1,1,1,1,1,1,1,1,  # 28 - 2f\r
-    1,1,1,1,1,1,1,1,  # 30 - 37\r
-    1,1,1,1,1,1,1,1,  # 38 - 3f\r
-    1,1,1,1,1,1,1,1,  # 40 - 47\r
-    1,1,1,1,1,1,1,1,  # 48 - 4f\r
-    1,1,1,1,1,1,1,1,  # 50 - 57\r
-    1,1,1,1,1,1,1,1,  # 58 - 5f\r
-    1,1,1,1,1,1,1,1,  # 60 - 67\r
-    1,1,1,1,1,1,1,1,  # 68 - 6f\r
-    1,1,1,1,1,1,1,1,  # 70 - 77\r
-    1,1,1,1,1,1,1,1,  # 78 - 7f\r
-    0,0,0,0,0,0,0,0,  # 80 - 87\r
-    0,0,0,0,0,0,0,0,  # 88 - 8f\r
-    0,0,0,0,0,0,0,0,  # 90 - 97\r
-    0,0,0,0,0,0,0,0,  # 98 - 9f\r
-    0,2,2,2,2,2,2,2,  # a0 - a7\r
-    2,2,2,2,2,3,3,3,  # a8 - af\r
-    2,2,2,2,2,2,2,2,  # b0 - b7\r
-    2,2,2,2,2,2,2,2,  # b8 - bf\r
-    2,2,2,2,2,2,2,2,  # c0 - c7\r
-    2,3,2,2,2,2,2,2,  # c8 - cf\r
-    2,2,2,2,2,2,2,2,  # d0 - d7\r
-    2,2,2,2,2,2,2,2,  # d8 - df\r
-    2,2,2,2,2,2,2,2,  # e0 - e7\r
-    2,2,2,2,2,2,2,2,  # e8 - ef\r
-    2,2,2,2,2,2,2,2,  # f0 - f7\r
-    2,2,2,2,2,2,2,0   # f8 - ff\r
-)\r
-\r
-EUCKR_st = (\r
-    eError,eStart,     3,eError,eError,eError,eError,eError,#00-07\r
-    eItsMe,eItsMe,eItsMe,eItsMe,eError,eError,eStart,eStart #08-0f\r
-)\r
-\r
-EUCKRCharLenTable = (0, 1, 2, 0)\r
-\r
-EUCKRSMModel = {'classTable': EUCKR_cls,\r
-                'classFactor': 4,\r
-                'stateTable': EUCKR_st,\r
-                'charLenTable': EUCKRCharLenTable,\r
-                'name': 'EUC-KR'}\r
-\r
-# EUC-TW\r
-\r
-EUCTW_cls = (\r
-    2,2,2,2,2,2,2,2,  # 00 - 07\r
-    2,2,2,2,2,2,0,0,  # 08 - 0f\r
-    2,2,2,2,2,2,2,2,  # 10 - 17\r
-    2,2,2,0,2,2,2,2,  # 18 - 1f\r
-    2,2,2,2,2,2,2,2,  # 20 - 27\r
-    2,2,2,2,2,2,2,2,  # 28 - 2f\r
-    2,2,2,2,2,2,2,2,  # 30 - 37\r
-    2,2,2,2,2,2,2,2,  # 38 - 3f\r
-    2,2,2,2,2,2,2,2,  # 40 - 47\r
-    2,2,2,2,2,2,2,2,  # 48 - 4f\r
-    2,2,2,2,2,2,2,2,  # 50 - 57\r
-    2,2,2,2,2,2,2,2,  # 58 - 5f\r
-    2,2,2,2,2,2,2,2,  # 60 - 67\r
-    2,2,2,2,2,2,2,2,  # 68 - 6f\r
-    2,2,2,2,2,2,2,2,  # 70 - 77\r
-    2,2,2,2,2,2,2,2,  # 78 - 7f\r
-    0,0,0,0,0,0,0,0,  # 80 - 87\r
-    0,0,0,0,0,0,6,0,  # 88 - 8f\r
-    0,0,0,0,0,0,0,0,  # 90 - 97\r
-    0,0,0,0,0,0,0,0,  # 98 - 9f\r
-    0,3,4,4,4,4,4,4,  # a0 - a7\r
-    5,5,1,1,1,1,1,1,  # a8 - af\r
-    1,1,1,1,1,1,1,1,  # b0 - b7\r
-    1,1,1,1,1,1,1,1,  # b8 - bf\r
-    1,1,3,1,3,3,3,3,  # c0 - c7\r
-    3,3,3,3,3,3,3,3,  # c8 - cf\r
-    3,3,3,3,3,3,3,3,  # d0 - d7\r
-    3,3,3,3,3,3,3,3,  # d8 - df\r
-    3,3,3,3,3,3,3,3,  # e0 - e7\r
-    3,3,3,3,3,3,3,3,  # e8 - ef\r
-    3,3,3,3,3,3,3,3,  # f0 - f7\r
-    3,3,3,3,3,3,3,0   # f8 - ff\r
-)\r
-\r
-EUCTW_st = (\r
-    eError,eError,eStart,     3,     3,     3,     4,eError,#00-07\r
-    eError,eError,eError,eError,eError,eError,eItsMe,eItsMe,#08-0f\r
-    eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eError,eStart,eError,#10-17\r
-    eStart,eStart,eStart,eError,eError,eError,eError,eError,#18-1f\r
-         5,eError,eError,eError,eStart,eError,eStart,eStart,#20-27\r
-    eStart,eError,eStart,eStart,eStart,eStart,eStart,eStart #28-2f\r
-)\r
-\r
-EUCTWCharLenTable = (0, 0, 1, 2, 2, 2, 3)\r
-\r
-EUCTWSMModel = {'classTable': EUCTW_cls,\r
-                'classFactor': 7,\r
-                'stateTable': EUCTW_st,\r
-                'charLenTable': EUCTWCharLenTable,\r
-                'name': 'x-euc-tw'}\r
-\r
-# GB2312\r
-\r
-GB2312_cls = (\r
-    1,1,1,1,1,1,1,1,  # 00 - 07\r
-    1,1,1,1,1,1,0,0,  # 08 - 0f\r
-    1,1,1,1,1,1,1,1,  # 10 - 17\r
-    1,1,1,0,1,1,1,1,  # 18 - 1f\r
-    1,1,1,1,1,1,1,1,  # 20 - 27\r
-    1,1,1,1,1,1,1,1,  # 28 - 2f\r
-    3,3,3,3,3,3,3,3,  # 30 - 37\r
-    3,3,1,1,1,1,1,1,  # 38 - 3f\r
-    2,2,2,2,2,2,2,2,  # 40 - 47\r
-    2,2,2,2,2,2,2,2,  # 48 - 4f\r
-    2,2,2,2,2,2,2,2,  # 50 - 57\r
-    2,2,2,2,2,2,2,2,  # 58 - 5f\r
-    2,2,2,2,2,2,2,2,  # 60 - 67\r
-    2,2,2,2,2,2,2,2,  # 68 - 6f\r
-    2,2,2,2,2,2,2,2,  # 70 - 77\r
-    2,2,2,2,2,2,2,4,  # 78 - 7f\r
-    5,6,6,6,6,6,6,6,  # 80 - 87\r
-    6,6,6,6,6,6,6,6,  # 88 - 8f\r
-    6,6,6,6,6,6,6,6,  # 90 - 97\r
-    6,6,6,6,6,6,6,6,  # 98 - 9f\r
-    6,6,6,6,6,6,6,6,  # a0 - a7\r
-    6,6,6,6,6,6,6,6,  # a8 - af\r
-    6,6,6,6,6,6,6,6,  # b0 - b7\r
-    6,6,6,6,6,6,6,6,  # b8 - bf\r
-    6,6,6,6,6,6,6,6,  # c0 - c7\r
-    6,6,6,6,6,6,6,6,  # c8 - cf\r
-    6,6,6,6,6,6,6,6,  # d0 - d7\r
-    6,6,6,6,6,6,6,6,  # d8 - df\r
-    6,6,6,6,6,6,6,6,  # e0 - e7\r
-    6,6,6,6,6,6,6,6,  # e8 - ef\r
-    6,6,6,6,6,6,6,6,  # f0 - f7\r
-    6,6,6,6,6,6,6,0   # f8 - ff\r
-)\r
-\r
-GB2312_st = (\r
-    eError,eStart,eStart,eStart,eStart,eStart,     3,eError,#00-07\r
-    eError,eError,eError,eError,eError,eError,eItsMe,eItsMe,#08-0f\r
-    eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eError,eError,eStart,#10-17\r
-         4,eError,eStart,eStart,eError,eError,eError,eError,#18-1f\r
-    eError,eError,     5,eError,eError,eError,eItsMe,eError,#20-27\r
-    eError,eError,eStart,eStart,eStart,eStart,eStart,eStart #28-2f\r
-)\r
-\r
-# To be accurate, the length of class 6 can be either 2 or 4.\r
-# But it is not necessary to discriminate between the two since\r
-# it is used for frequency analysis only, and we are validing\r
-# each code range there as well. So it is safe to set it to be\r
-# 2 here.\r
-GB2312CharLenTable = (0, 1, 1, 1, 1, 1, 2)\r
-\r
-GB2312SMModel = {'classTable': GB2312_cls,\r
-                  'classFactor': 7,\r
-                  'stateTable': GB2312_st,\r
-                  'charLenTable': GB2312CharLenTable,\r
-                  'name': 'GB2312'}\r
-\r
-# Shift_JIS\r
-\r
-SJIS_cls = (\r
-    1,1,1,1,1,1,1,1,  # 00 - 07\r
-    1,1,1,1,1,1,0,0,  # 08 - 0f\r
-    1,1,1,1,1,1,1,1,  # 10 - 17\r
-    1,1,1,0,1,1,1,1,  # 18 - 1f\r
-    1,1,1,1,1,1,1,1,  # 20 - 27\r
-    1,1,1,1,1,1,1,1,  # 28 - 2f\r
-    1,1,1,1,1,1,1,1,  # 30 - 37\r
-    1,1,1,1,1,1,1,1,  # 38 - 3f\r
-    2,2,2,2,2,2,2,2,  # 40 - 47\r
-    2,2,2,2,2,2,2,2,  # 48 - 4f\r
-    2,2,2,2,2,2,2,2,  # 50 - 57\r
-    2,2,2,2,2,2,2,2,  # 58 - 5f\r
-    2,2,2,2,2,2,2,2,  # 60 - 67\r
-    2,2,2,2,2,2,2,2,  # 68 - 6f\r
-    2,2,2,2,2,2,2,2,  # 70 - 77\r
-    2,2,2,2,2,2,2,1,  # 78 - 7f\r
-    3,3,3,3,3,3,3,3,  # 80 - 87\r
-    3,3,3,3,3,3,3,3,  # 88 - 8f\r
-    3,3,3,3,3,3,3,3,  # 90 - 97\r
-    3,3,3,3,3,3,3,3,  # 98 - 9f\r
-    #0xa0 is illegal in sjis encoding, but some pages does\r
-    #contain such byte. We need to be more error forgiven.\r
-    2,2,2,2,2,2,2,2,  # a0 - a7\r
-    2,2,2,2,2,2,2,2,  # a8 - af\r
-    2,2,2,2,2,2,2,2,  # b0 - b7\r
-    2,2,2,2,2,2,2,2,  # b8 - bf\r
-    2,2,2,2,2,2,2,2,  # c0 - c7\r
-    2,2,2,2,2,2,2,2,  # c8 - cf\r
-    2,2,2,2,2,2,2,2,  # d0 - d7\r
-    2,2,2,2,2,2,2,2,  # d8 - df\r
-    3,3,3,3,3,3,3,3,  # e0 - e7\r
-    3,3,3,3,3,4,4,4,  # e8 - ef\r
-    4,4,4,4,4,4,4,4,  # f0 - f7\r
-    4,4,4,4,4,0,0,0   # f8 - ff\r
-)\r
-\r
-\r
-SJIS_st = (\r
-    eError,eStart,eStart,     3,eError,eError,eError,eError,#00-07\r
-    eError,eError,eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,#08-0f\r
-    eItsMe,eItsMe,eError,eError,eStart,eStart,eStart,eStart #10-17\r
-)\r
-\r
-SJISCharLenTable = (0, 1, 1, 2, 0, 0)\r
-\r
-SJISSMModel = {'classTable': SJIS_cls,\r
-               'classFactor': 6,\r
-               'stateTable': SJIS_st,\r
-               'charLenTable': SJISCharLenTable,\r
-               'name': 'Shift_JIS'}\r
-\r
-# UCS2-BE\r
-\r
-UCS2BE_cls = (\r
-    0,0,0,0,0,0,0,0,  # 00 - 07\r
-    0,0,1,0,0,2,0,0,  # 08 - 0f\r
-    0,0,0,0,0,0,0,0,  # 10 - 17\r
-    0,0,0,3,0,0,0,0,  # 18 - 1f\r
-    0,0,0,0,0,0,0,0,  # 20 - 27\r
-    0,3,3,3,3,3,0,0,  # 28 - 2f\r
-    0,0,0,0,0,0,0,0,  # 30 - 37\r
-    0,0,0,0,0,0,0,0,  # 38 - 3f\r
-    0,0,0,0,0,0,0,0,  # 40 - 47\r
-    0,0,0,0,0,0,0,0,  # 48 - 4f\r
-    0,0,0,0,0,0,0,0,  # 50 - 57\r
-    0,0,0,0,0,0,0,0,  # 58 - 5f\r
-    0,0,0,0,0,0,0,0,  # 60 - 67\r
-    0,0,0,0,0,0,0,0,  # 68 - 6f\r
-    0,0,0,0,0,0,0,0,  # 70 - 77\r
-    0,0,0,0,0,0,0,0,  # 78 - 7f\r
-    0,0,0,0,0,0,0,0,  # 80 - 87\r
-    0,0,0,0,0,0,0,0,  # 88 - 8f\r
-    0,0,0,0,0,0,0,0,  # 90 - 97\r
-    0,0,0,0,0,0,0,0,  # 98 - 9f\r
-    0,0,0,0,0,0,0,0,  # a0 - a7\r
-    0,0,0,0,0,0,0,0,  # a8 - af\r
-    0,0,0,0,0,0,0,0,  # b0 - b7\r
-    0,0,0,0,0,0,0,0,  # b8 - bf\r
-    0,0,0,0,0,0,0,0,  # c0 - c7\r
-    0,0,0,0,0,0,0,0,  # c8 - cf\r
-    0,0,0,0,0,0,0,0,  # d0 - d7\r
-    0,0,0,0,0,0,0,0,  # d8 - df\r
-    0,0,0,0,0,0,0,0,  # e0 - e7\r
-    0,0,0,0,0,0,0,0,  # e8 - ef\r
-    0,0,0,0,0,0,0,0,  # f0 - f7\r
-    0,0,0,0,0,0,4,5   # f8 - ff\r
-)\r
-\r
-UCS2BE_st  = (\r
-          5,     7,     7,eError,     4,     3,eError,eError,#00-07\r
-     eError,eError,eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,#08-0f\r
-     eItsMe,eItsMe,     6,     6,     6,     6,eError,eError,#10-17\r
-          6,     6,     6,     6,     6,eItsMe,     6,     6,#18-1f\r
-          6,     6,     6,     6,     5,     7,     7,eError,#20-27\r
-          5,     8,     6,     6,eError,     6,     6,     6,#28-2f\r
-          6,     6,     6,     6,eError,eError,eStart,eStart #30-37\r
-)\r
-\r
-UCS2BECharLenTable = (2, 2, 2, 0, 2, 2)\r
-\r
-UCS2BESMModel = {'classTable': UCS2BE_cls,\r
-                 'classFactor': 6,\r
-                 'stateTable': UCS2BE_st,\r
-                 'charLenTable': UCS2BECharLenTable,\r
-                 'name': 'UTF-16BE'}\r
-\r
-# UCS2-LE\r
-\r
-UCS2LE_cls = (\r
-    0,0,0,0,0,0,0,0,  # 00 - 07\r
-    0,0,1,0,0,2,0,0,  # 08 - 0f\r
-    0,0,0,0,0,0,0,0,  # 10 - 17\r
-    0,0,0,3,0,0,0,0,  # 18 - 1f\r
-    0,0,0,0,0,0,0,0,  # 20 - 27\r
-    0,3,3,3,3,3,0,0,  # 28 - 2f\r
-    0,0,0,0,0,0,0,0,  # 30 - 37\r
-    0,0,0,0,0,0,0,0,  # 38 - 3f\r
-    0,0,0,0,0,0,0,0,  # 40 - 47\r
-    0,0,0,0,0,0,0,0,  # 48 - 4f\r
-    0,0,0,0,0,0,0,0,  # 50 - 57\r
-    0,0,0,0,0,0,0,0,  # 58 - 5f\r
-    0,0,0,0,0,0,0,0,  # 60 - 67\r
-    0,0,0,0,0,0,0,0,  # 68 - 6f\r
-    0,0,0,0,0,0,0,0,  # 70 - 77\r
-    0,0,0,0,0,0,0,0,  # 78 - 7f\r
-    0,0,0,0,0,0,0,0,  # 80 - 87\r
-    0,0,0,0,0,0,0,0,  # 88 - 8f\r
-    0,0,0,0,0,0,0,0,  # 90 - 97\r
-    0,0,0,0,0,0,0,0,  # 98 - 9f\r
-    0,0,0,0,0,0,0,0,  # a0 - a7\r
-    0,0,0,0,0,0,0,0,  # a8 - af\r
-    0,0,0,0,0,0,0,0,  # b0 - b7\r
-    0,0,0,0,0,0,0,0,  # b8 - bf\r
-    0,0,0,0,0,0,0,0,  # c0 - c7\r
-    0,0,0,0,0,0,0,0,  # c8 - cf\r
-    0,0,0,0,0,0,0,0,  # d0 - d7\r
-    0,0,0,0,0,0,0,0,  # d8 - df\r
-    0,0,0,0,0,0,0,0,  # e0 - e7\r
-    0,0,0,0,0,0,0,0,  # e8 - ef\r
-    0,0,0,0,0,0,0,0,  # f0 - f7\r
-    0,0,0,0,0,0,4,5   # f8 - ff\r
-)\r
-\r
-UCS2LE_st = (\r
-          6,     6,     7,     6,     4,     3,eError,eError,#00-07\r
-     eError,eError,eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,#08-0f\r
-     eItsMe,eItsMe,     5,     5,     5,eError,eItsMe,eError,#10-17\r
-          5,     5,     5,eError,     5,eError,     6,     6,#18-1f\r
-          7,     6,     8,     8,     5,     5,     5,eError,#20-27\r
-          5,     5,     5,eError,eError,eError,     5,     5,#28-2f\r
-          5,     5,     5,eError,     5,eError,eStart,eStart #30-37\r
-)\r
-\r
-UCS2LECharLenTable = (2, 2, 2, 2, 2, 2)\r
-\r
-UCS2LESMModel = {'classTable': UCS2LE_cls,\r
-                 'classFactor': 6,\r
-                 'stateTable': UCS2LE_st,\r
-                 'charLenTable': UCS2LECharLenTable,\r
-                 'name': 'UTF-16LE'}\r
-\r
-# UTF-8\r
-\r
-UTF8_cls = (\r
-    1,1,1,1,1,1,1,1,  # 00 - 07  #allow 0x00 as a legal value\r
-    1,1,1,1,1,1,0,0,  # 08 - 0f\r
-    1,1,1,1,1,1,1,1,  # 10 - 17\r
-    1,1,1,0,1,1,1,1,  # 18 - 1f\r
-    1,1,1,1,1,1,1,1,  # 20 - 27\r
-    1,1,1,1,1,1,1,1,  # 28 - 2f\r
-    1,1,1,1,1,1,1,1,  # 30 - 37\r
-    1,1,1,1,1,1,1,1,  # 38 - 3f\r
-    1,1,1,1,1,1,1,1,  # 40 - 47\r
-    1,1,1,1,1,1,1,1,  # 48 - 4f\r
-    1,1,1,1,1,1,1,1,  # 50 - 57\r
-    1,1,1,1,1,1,1,1,  # 58 - 5f\r
-    1,1,1,1,1,1,1,1,  # 60 - 67\r
-    1,1,1,1,1,1,1,1,  # 68 - 6f\r
-    1,1,1,1,1,1,1,1,  # 70 - 77\r
-    1,1,1,1,1,1,1,1,  # 78 - 7f\r
-    2,2,2,2,3,3,3,3,  # 80 - 87\r
-    4,4,4,4,4,4,4,4,  # 88 - 8f\r
-    4,4,4,4,4,4,4,4,  # 90 - 97\r
-    4,4,4,4,4,4,4,4,  # 98 - 9f\r
-    5,5,5,5,5,5,5,5,  # a0 - a7\r
-    5,5,5,5,5,5,5,5,  # a8 - af\r
-    5,5,5,5,5,5,5,5,  # b0 - b7\r
-    5,5,5,5,5,5,5,5,  # b8 - bf\r
-    0,0,6,6,6,6,6,6,  # c0 - c7\r
-    6,6,6,6,6,6,6,6,  # c8 - cf\r
-    6,6,6,6,6,6,6,6,  # d0 - d7\r
-    6,6,6,6,6,6,6,6,  # d8 - df\r
-    7,8,8,8,8,8,8,8,  # e0 - e7\r
-    8,8,8,8,8,9,8,8,  # e8 - ef\r
-    10,11,11,11,11,11,11,11,  # f0 - f7\r
-    12,13,13,13,14,15,0,0    # f8 - ff\r
-)\r
-\r
-UTF8_st = (\r
-    eError,eStart,eError,eError,eError,eError,     12,   10,#00-07\r
-         9,     11,     8,     7,     6,     5,     4,    3,#08-0f\r
-    eError,eError,eError,eError,eError,eError,eError,eError,#10-17\r
-    eError,eError,eError,eError,eError,eError,eError,eError,#18-1f\r
-    eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,#20-27\r
-    eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,#28-2f\r
-    eError,eError,     5,     5,     5,     5,eError,eError,#30-37\r
-    eError,eError,eError,eError,eError,eError,eError,eError,#38-3f\r
-    eError,eError,eError,     5,     5,     5,eError,eError,#40-47\r
-    eError,eError,eError,eError,eError,eError,eError,eError,#48-4f\r
-    eError,eError,     7,     7,     7,     7,eError,eError,#50-57\r
-    eError,eError,eError,eError,eError,eError,eError,eError,#58-5f\r
-    eError,eError,eError,eError,     7,     7,eError,eError,#60-67\r
-    eError,eError,eError,eError,eError,eError,eError,eError,#68-6f\r
-    eError,eError,     9,     9,     9,     9,eError,eError,#70-77\r
-    eError,eError,eError,eError,eError,eError,eError,eError,#78-7f\r
-    eError,eError,eError,eError,eError,     9,eError,eError,#80-87\r
-    eError,eError,eError,eError,eError,eError,eError,eError,#88-8f\r
-    eError,eError,    12,    12,    12,    12,eError,eError,#90-97\r
-    eError,eError,eError,eError,eError,eError,eError,eError,#98-9f\r
-    eError,eError,eError,eError,eError,    12,eError,eError,#a0-a7\r
-    eError,eError,eError,eError,eError,eError,eError,eError,#a8-af\r
-    eError,eError,    12,    12,    12,eError,eError,eError,#b0-b7\r
-    eError,eError,eError,eError,eError,eError,eError,eError,#b8-bf\r
-    eError,eError,eStart,eStart,eStart,eStart,eError,eError,#c0-c7\r
-    eError,eError,eError,eError,eError,eError,eError,eError #c8-cf\r
-)\r
-\r
-UTF8CharLenTable = (0, 1, 0, 0, 0, 0, 2, 3, 3, 3, 4, 4, 5, 5, 6, 6)\r
-\r
-UTF8SMModel = {'classTable': UTF8_cls,\r
-               'classFactor': 16,\r
-               'stateTable': UTF8_st,\r
-               'charLenTable': UTF8CharLenTable,\r
-               'name': 'UTF-8'}\r
-\r
-# flake8: noqa\r
+######################## BEGIN LICENSE BLOCK ########################
+# The Original Code is mozilla.org code.
+#
+# The Initial Developer of the Original Code is
+# Netscape Communications Corporation.
+# Portions created by the Initial Developer are Copyright (C) 1998
+# the Initial Developer. All Rights Reserved.
+#
+# Contributor(s):
+#   Mark Pilgrim - port to Python
+#
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
+# 02110-1301  USA
+######################### END LICENSE BLOCK #########################
+
+from .constants import eStart, eError, eItsMe
+
+# BIG5
+
+BIG5_cls = (
+    1,1,1,1,1,1,1,1,  # 00 - 07    #allow 0x00 as legal value
+    1,1,1,1,1,1,0,0,  # 08 - 0f
+    1,1,1,1,1,1,1,1,  # 10 - 17
+    1,1,1,0,1,1,1,1,  # 18 - 1f
+    1,1,1,1,1,1,1,1,  # 20 - 27
+    1,1,1,1,1,1,1,1,  # 28 - 2f
+    1,1,1,1,1,1,1,1,  # 30 - 37
+    1,1,1,1,1,1,1,1,  # 38 - 3f
+    2,2,2,2,2,2,2,2,  # 40 - 47
+    2,2,2,2,2,2,2,2,  # 48 - 4f
+    2,2,2,2,2,2,2,2,  # 50 - 57
+    2,2,2,2,2,2,2,2,  # 58 - 5f
+    2,2,2,2,2,2,2,2,  # 60 - 67
+    2,2,2,2,2,2,2,2,  # 68 - 6f
+    2,2,2,2,2,2,2,2,  # 70 - 77
+    2,2,2,2,2,2,2,1,  # 78 - 7f
+    4,4,4,4,4,4,4,4,  # 80 - 87
+    4,4,4,4,4,4,4,4,  # 88 - 8f
+    4,4,4,4,4,4,4,4,  # 90 - 97
+    4,4,4,4,4,4,4,4,  # 98 - 9f
+    4,3,3,3,3,3,3,3,  # a0 - a7
+    3,3,3,3,3,3,3,3,  # a8 - af
+    3,3,3,3,3,3,3,3,  # b0 - b7
+    3,3,3,3,3,3,3,3,  # b8 - bf
+    3,3,3,3,3,3,3,3,  # c0 - c7
+    3,3,3,3,3,3,3,3,  # c8 - cf
+    3,3,3,3,3,3,3,3,  # d0 - d7
+    3,3,3,3,3,3,3,3,  # d8 - df
+    3,3,3,3,3,3,3,3,  # e0 - e7
+    3,3,3,3,3,3,3,3,  # e8 - ef
+    3,3,3,3,3,3,3,3,  # f0 - f7
+    3,3,3,3,3,3,3,0  # f8 - ff
+)
+
+BIG5_st = (
+    eError,eStart,eStart,     3,eError,eError,eError,eError,#00-07
+    eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eError,#08-0f
+    eError,eStart,eStart,eStart,eStart,eStart,eStart,eStart#10-17
+)
+
+Big5CharLenTable = (0, 1, 1, 2, 0)
+
+Big5SMModel = {'classTable': BIG5_cls,
+               'classFactor': 5,
+               'stateTable': BIG5_st,
+               'charLenTable': Big5CharLenTable,
+               'name': 'Big5'}
+
+# CP949
+
+CP949_cls  = (
+    1,1,1,1,1,1,1,1, 1,1,1,1,1,1,0,0,  # 00 - 0f
+    1,1,1,1,1,1,1,1, 1,1,1,0,1,1,1,1,  # 10 - 1f
+    1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,  # 20 - 2f
+    1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,  # 30 - 3f
+    1,4,4,4,4,4,4,4, 4,4,4,4,4,4,4,4,  # 40 - 4f
+    4,4,5,5,5,5,5,5, 5,5,5,1,1,1,1,1,  # 50 - 5f
+    1,5,5,5,5,5,5,5, 5,5,5,5,5,5,5,5,  # 60 - 6f
+    5,5,5,5,5,5,5,5, 5,5,5,1,1,1,1,1,  # 70 - 7f
+    0,6,6,6,6,6,6,6, 6,6,6,6,6,6,6,6,  # 80 - 8f
+    6,6,6,6,6,6,6,6, 6,6,6,6,6,6,6,6,  # 90 - 9f
+    6,7,7,7,7,7,7,7, 7,7,7,7,7,8,8,8,  # a0 - af
+    7,7,7,7,7,7,7,7, 7,7,7,7,7,7,7,7,  # b0 - bf
+    7,7,7,7,7,7,9,2, 2,3,2,2,2,2,2,2,  # c0 - cf
+    2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2,  # d0 - df
+    2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2,  # e0 - ef
+    2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,0,  # f0 - ff
+)
+
+CP949_st = (
+#cls=    0      1      2      3      4      5      6      7      8      9  # previous state =
+    eError,eStart,     3,eError,eStart,eStart,     4,     5,eError,     6, # eStart
+    eError,eError,eError,eError,eError,eError,eError,eError,eError,eError, # eError
+    eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe, # eItsMe
+    eError,eError,eStart,eStart,eError,eError,eError,eStart,eStart,eStart, # 3
+    eError,eError,eStart,eStart,eStart,eStart,eStart,eStart,eStart,eStart, # 4
+    eError,eStart,eStart,eStart,eStart,eStart,eStart,eStart,eStart,eStart, # 5
+    eError,eStart,eStart,eStart,eStart,eError,eError,eStart,eStart,eStart, # 6
+)
+
+CP949CharLenTable = (0, 1, 2, 0, 1, 1, 2, 2, 0, 2)
+
+CP949SMModel = {'classTable': CP949_cls,
+                'classFactor': 10,
+                'stateTable': CP949_st,
+                'charLenTable': CP949CharLenTable,
+                'name': 'CP949'}
+
+# EUC-JP
+
+EUCJP_cls = (
+    4,4,4,4,4,4,4,4,  # 00 - 07
+    4,4,4,4,4,4,5,5,  # 08 - 0f
+    4,4,4,4,4,4,4,4,  # 10 - 17
+    4,4,4,5,4,4,4,4,  # 18 - 1f
+    4,4,4,4,4,4,4,4,  # 20 - 27
+    4,4,4,4,4,4,4,4,  # 28 - 2f
+    4,4,4,4,4,4,4,4,  # 30 - 37
+    4,4,4,4,4,4,4,4,  # 38 - 3f
+    4,4,4,4,4,4,4,4,  # 40 - 47
+    4,4,4,4,4,4,4,4,  # 48 - 4f
+    4,4,4,4,4,4,4,4,  # 50 - 57
+    4,4,4,4,4,4,4,4,  # 58 - 5f
+    4,4,4,4,4,4,4,4,  # 60 - 67
+    4,4,4,4,4,4,4,4,  # 68 - 6f
+    4,4,4,4,4,4,4,4,  # 70 - 77
+    4,4,4,4,4,4,4,4,  # 78 - 7f
+    5,5,5,5,5,5,5,5,  # 80 - 87
+    5,5,5,5,5,5,1,3,  # 88 - 8f
+    5,5,5,5,5,5,5,5,  # 90 - 97
+    5,5,5,5,5,5,5,5,  # 98 - 9f
+    5,2,2,2,2,2,2,2,  # a0 - a7
+    2,2,2,2,2,2,2,2,  # a8 - af
+    2,2,2,2,2,2,2,2,  # b0 - b7
+    2,2,2,2,2,2,2,2,  # b8 - bf
+    2,2,2,2,2,2,2,2,  # c0 - c7
+    2,2,2,2,2,2,2,2,  # c8 - cf
+    2,2,2,2,2,2,2,2,  # d0 - d7
+    2,2,2,2,2,2,2,2,  # d8 - df
+    0,0,0,0,0,0,0,0,  # e0 - e7
+    0,0,0,0,0,0,0,0,  # e8 - ef
+    0,0,0,0,0,0,0,0,  # f0 - f7
+    0,0,0,0,0,0,0,5  # f8 - ff
+)
+
+EUCJP_st = (
+          3,     4,     3,     5,eStart,eError,eError,eError,#00-07
+     eError,eError,eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,#08-0f
+     eItsMe,eItsMe,eStart,eError,eStart,eError,eError,eError,#10-17
+     eError,eError,eStart,eError,eError,eError,     3,eError,#18-1f
+          3,eError,eError,eError,eStart,eStart,eStart,eStart#20-27
+)
+
+EUCJPCharLenTable = (2, 2, 2, 3, 1, 0)
+
+EUCJPSMModel = {'classTable': EUCJP_cls,
+                'classFactor': 6,
+                'stateTable': EUCJP_st,
+                'charLenTable': EUCJPCharLenTable,
+                'name': 'EUC-JP'}
+
+# EUC-KR
+
+EUCKR_cls  = (
+    1,1,1,1,1,1,1,1,  # 00 - 07
+    1,1,1,1,1,1,0,0,  # 08 - 0f
+    1,1,1,1,1,1,1,1,  # 10 - 17
+    1,1,1,0,1,1,1,1,  # 18 - 1f
+    1,1,1,1,1,1,1,1,  # 20 - 27
+    1,1,1,1,1,1,1,1,  # 28 - 2f
+    1,1,1,1,1,1,1,1,  # 30 - 37
+    1,1,1,1,1,1,1,1,  # 38 - 3f
+    1,1,1,1,1,1,1,1,  # 40 - 47
+    1,1,1,1,1,1,1,1,  # 48 - 4f
+    1,1,1,1,1,1,1,1,  # 50 - 57
+    1,1,1,1,1,1,1,1,  # 58 - 5f
+    1,1,1,1,1,1,1,1,  # 60 - 67
+    1,1,1,1,1,1,1,1,  # 68 - 6f
+    1,1,1,1,1,1,1,1,  # 70 - 77
+    1,1,1,1,1,1,1,1,  # 78 - 7f
+    0,0,0,0,0,0,0,0,  # 80 - 87
+    0,0,0,0,0,0,0,0,  # 88 - 8f
+    0,0,0,0,0,0,0,0,  # 90 - 97
+    0,0,0,0,0,0,0,0,  # 98 - 9f
+    0,2,2,2,2,2,2,2,  # a0 - a7
+    2,2,2,2,2,3,3,3,  # a8 - af
+    2,2,2,2,2,2,2,2,  # b0 - b7
+    2,2,2,2,2,2,2,2,  # b8 - bf
+    2,2,2,2,2,2,2,2,  # c0 - c7
+    2,3,2,2,2,2,2,2,  # c8 - cf
+    2,2,2,2,2,2,2,2,  # d0 - d7
+    2,2,2,2,2,2,2,2,  # d8 - df
+    2,2,2,2,2,2,2,2,  # e0 - e7
+    2,2,2,2,2,2,2,2,  # e8 - ef
+    2,2,2,2,2,2,2,2,  # f0 - f7
+    2,2,2,2,2,2,2,0   # f8 - ff
+)
+
+EUCKR_st = (
+    eError,eStart,     3,eError,eError,eError,eError,eError,#00-07
+    eItsMe,eItsMe,eItsMe,eItsMe,eError,eError,eStart,eStart #08-0f
+)
+
+EUCKRCharLenTable = (0, 1, 2, 0)
+
+EUCKRSMModel = {'classTable': EUCKR_cls,
+                'classFactor': 4,
+                'stateTable': EUCKR_st,
+                'charLenTable': EUCKRCharLenTable,
+                'name': 'EUC-KR'}
+
+# EUC-TW
+
+EUCTW_cls = (
+    2,2,2,2,2,2,2,2,  # 00 - 07
+    2,2,2,2,2,2,0,0,  # 08 - 0f
+    2,2,2,2,2,2,2,2,  # 10 - 17
+    2,2,2,0,2,2,2,2,  # 18 - 1f
+    2,2,2,2,2,2,2,2,  # 20 - 27
+    2,2,2,2,2,2,2,2,  # 28 - 2f
+    2,2,2,2,2,2,2,2,  # 30 - 37
+    2,2,2,2,2,2,2,2,  # 38 - 3f
+    2,2,2,2,2,2,2,2,  # 40 - 47
+    2,2,2,2,2,2,2,2,  # 48 - 4f
+    2,2,2,2,2,2,2,2,  # 50 - 57
+    2,2,2,2,2,2,2,2,  # 58 - 5f
+    2,2,2,2,2,2,2,2,  # 60 - 67
+    2,2,2,2,2,2,2,2,  # 68 - 6f
+    2,2,2,2,2,2,2,2,  # 70 - 77
+    2,2,2,2,2,2,2,2,  # 78 - 7f
+    0,0,0,0,0,0,0,0,  # 80 - 87
+    0,0,0,0,0,0,6,0,  # 88 - 8f
+    0,0,0,0,0,0,0,0,  # 90 - 97
+    0,0,0,0,0,0,0,0,  # 98 - 9f
+    0,3,4,4,4,4,4,4,  # a0 - a7
+    5,5,1,1,1,1,1,1,  # a8 - af
+    1,1,1,1,1,1,1,1,  # b0 - b7
+    1,1,1,1,1,1,1,1,  # b8 - bf
+    1,1,3,1,3,3,3,3,  # c0 - c7
+    3,3,3,3,3,3,3,3,  # c8 - cf
+    3,3,3,3,3,3,3,3,  # d0 - d7
+    3,3,3,3,3,3,3,3,  # d8 - df
+    3,3,3,3,3,3,3,3,  # e0 - e7
+    3,3,3,3,3,3,3,3,  # e8 - ef
+    3,3,3,3,3,3,3,3,  # f0 - f7
+    3,3,3,3,3,3,3,0   # f8 - ff
+)
+
+EUCTW_st = (
+    eError,eError,eStart,     3,     3,     3,     4,eError,#00-07
+    eError,eError,eError,eError,eError,eError,eItsMe,eItsMe,#08-0f
+    eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eError,eStart,eError,#10-17
+    eStart,eStart,eStart,eError,eError,eError,eError,eError,#18-1f
+         5,eError,eError,eError,eStart,eError,eStart,eStart,#20-27
+    eStart,eError,eStart,eStart,eStart,eStart,eStart,eStart #28-2f
+)
+
+EUCTWCharLenTable = (0, 0, 1, 2, 2, 2, 3)
+
+EUCTWSMModel = {'classTable': EUCTW_cls,
+                'classFactor': 7,
+                'stateTable': EUCTW_st,
+                'charLenTable': EUCTWCharLenTable,
+                'name': 'x-euc-tw'}
+
+# GB2312
+
+GB2312_cls = (
+    1,1,1,1,1,1,1,1,  # 00 - 07
+    1,1,1,1,1,1,0,0,  # 08 - 0f
+    1,1,1,1,1,1,1,1,  # 10 - 17
+    1,1,1,0,1,1,1,1,  # 18 - 1f
+    1,1,1,1,1,1,1,1,  # 20 - 27
+    1,1,1,1,1,1,1,1,  # 28 - 2f
+    3,3,3,3,3,3,3,3,  # 30 - 37
+    3,3,1,1,1,1,1,1,  # 38 - 3f
+    2,2,2,2,2,2,2,2,  # 40 - 47
+    2,2,2,2,2,2,2,2,  # 48 - 4f
+    2,2,2,2,2,2,2,2,  # 50 - 57
+    2,2,2,2,2,2,2,2,  # 58 - 5f
+    2,2,2,2,2,2,2,2,  # 60 - 67
+    2,2,2,2,2,2,2,2,  # 68 - 6f
+    2,2,2,2,2,2,2,2,  # 70 - 77
+    2,2,2,2,2,2,2,4,  # 78 - 7f
+    5,6,6,6,6,6,6,6,  # 80 - 87
+    6,6,6,6,6,6,6,6,  # 88 - 8f
+    6,6,6,6,6,6,6,6,  # 90 - 97
+    6,6,6,6,6,6,6,6,  # 98 - 9f
+    6,6,6,6,6,6,6,6,  # a0 - a7
+    6,6,6,6,6,6,6,6,  # a8 - af
+    6,6,6,6,6,6,6,6,  # b0 - b7
+    6,6,6,6,6,6,6,6,  # b8 - bf
+    6,6,6,6,6,6,6,6,  # c0 - c7
+    6,6,6,6,6,6,6,6,  # c8 - cf
+    6,6,6,6,6,6,6,6,  # d0 - d7
+    6,6,6,6,6,6,6,6,  # d8 - df
+    6,6,6,6,6,6,6,6,  # e0 - e7
+    6,6,6,6,6,6,6,6,  # e8 - ef
+    6,6,6,6,6,6,6,6,  # f0 - f7
+    6,6,6,6,6,6,6,0   # f8 - ff
+)
+
+GB2312_st = (
+    eError,eStart,eStart,eStart,eStart,eStart,     3,eError,#00-07
+    eError,eError,eError,eError,eError,eError,eItsMe,eItsMe,#08-0f
+    eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eError,eError,eStart,#10-17
+         4,eError,eStart,eStart,eError,eError,eError,eError,#18-1f
+    eError,eError,     5,eError,eError,eError,eItsMe,eError,#20-27
+    eError,eError,eStart,eStart,eStart,eStart,eStart,eStart #28-2f
+)
+
+# To be accurate, the length of class 6 can be either 2 or 4.
+# But it is not necessary to discriminate between the two since
+# it is used for frequency analysis only, and we are validing
+# each code range there as well. So it is safe to set it to be
+# 2 here.
+GB2312CharLenTable = (0, 1, 1, 1, 1, 1, 2)
+
+GB2312SMModel = {'classTable': GB2312_cls,
+                  'classFactor': 7,
+                  'stateTable': GB2312_st,
+                  'charLenTable': GB2312CharLenTable,
+                  'name': 'GB2312'}
+
+# Shift_JIS
+
+SJIS_cls = (
+    1,1,1,1,1,1,1,1,  # 00 - 07
+    1,1,1,1,1,1,0,0,  # 08 - 0f
+    1,1,1,1,1,1,1,1,  # 10 - 17
+    1,1,1,0,1,1,1,1,  # 18 - 1f
+    1,1,1,1,1,1,1,1,  # 20 - 27
+    1,1,1,1,1,1,1,1,  # 28 - 2f
+    1,1,1,1,1,1,1,1,  # 30 - 37
+    1,1,1,1,1,1,1,1,  # 38 - 3f
+    2,2,2,2,2,2,2,2,  # 40 - 47
+    2,2,2,2,2,2,2,2,  # 48 - 4f
+    2,2,2,2,2,2,2,2,  # 50 - 57
+    2,2,2,2,2,2,2,2,  # 58 - 5f
+    2,2,2,2,2,2,2,2,  # 60 - 67
+    2,2,2,2,2,2,2,2,  # 68 - 6f
+    2,2,2,2,2,2,2,2,  # 70 - 77
+    2,2,2,2,2,2,2,1,  # 78 - 7f
+    3,3,3,3,3,3,3,3,  # 80 - 87
+    3,3,3,3,3,3,3,3,  # 88 - 8f
+    3,3,3,3,3,3,3,3,  # 90 - 97
+    3,3,3,3,3,3,3,3,  # 98 - 9f
+    #0xa0 is illegal in sjis encoding, but some pages does
+    #contain such byte. We need to be more error forgiven.
+    2,2,2,2,2,2,2,2,  # a0 - a7
+    2,2,2,2,2,2,2,2,  # a8 - af
+    2,2,2,2,2,2,2,2,  # b0 - b7
+    2,2,2,2,2,2,2,2,  # b8 - bf
+    2,2,2,2,2,2,2,2,  # c0 - c7
+    2,2,2,2,2,2,2,2,  # c8 - cf
+    2,2,2,2,2,2,2,2,  # d0 - d7
+    2,2,2,2,2,2,2,2,  # d8 - df
+    3,3,3,3,3,3,3,3,  # e0 - e7
+    3,3,3,3,3,4,4,4,  # e8 - ef
+    4,4,4,4,4,4,4,4,  # f0 - f7
+    4,4,4,4,4,0,0,0   # f8 - ff
+)
+
+
+SJIS_st = (
+    eError,eStart,eStart,     3,eError,eError,eError,eError,#00-07
+    eError,eError,eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,#08-0f
+    eItsMe,eItsMe,eError,eError,eStart,eStart,eStart,eStart #10-17
+)
+
+SJISCharLenTable = (0, 1, 1, 2, 0, 0)
+
+SJISSMModel = {'classTable': SJIS_cls,
+               'classFactor': 6,
+               'stateTable': SJIS_st,
+               'charLenTable': SJISCharLenTable,
+               'name': 'Shift_JIS'}
+
+# UCS2-BE
+
+UCS2BE_cls = (
+    0,0,0,0,0,0,0,0,  # 00 - 07
+    0,0,1,0,0,2,0,0,  # 08 - 0f
+    0,0,0,0,0,0,0,0,  # 10 - 17
+    0,0,0,3,0,0,0,0,  # 18 - 1f
+    0,0,0,0,0,0,0,0,  # 20 - 27
+    0,3,3,3,3,3,0,0,  # 28 - 2f
+    0,0,0,0,0,0,0,0,  # 30 - 37
+    0,0,0,0,0,0,0,0,  # 38 - 3f
+    0,0,0,0,0,0,0,0,  # 40 - 47
+    0,0,0,0,0,0,0,0,  # 48 - 4f
+    0,0,0,0,0,0,0,0,  # 50 - 57
+    0,0,0,0,0,0,0,0,  # 58 - 5f
+    0,0,0,0,0,0,0,0,  # 60 - 67
+    0,0,0,0,0,0,0,0,  # 68 - 6f
+    0,0,0,0,0,0,0,0,  # 70 - 77
+    0,0,0,0,0,0,0,0,  # 78 - 7f
+    0,0,0,0,0,0,0,0,  # 80 - 87
+    0,0,0,0,0,0,0,0,  # 88 - 8f
+    0,0,0,0,0,0,0,0,  # 90 - 97
+    0,0,0,0,0,0,0,0,  # 98 - 9f
+    0,0,0,0,0,0,0,0,  # a0 - a7
+    0,0,0,0,0,0,0,0,  # a8 - af
+    0,0,0,0,0,0,0,0,  # b0 - b7
+    0,0,0,0,0,0,0,0,  # b8 - bf
+    0,0,0,0,0,0,0,0,  # c0 - c7
+    0,0,0,0,0,0,0,0,  # c8 - cf
+    0,0,0,0,0,0,0,0,  # d0 - d7
+    0,0,0,0,0,0,0,0,  # d8 - df
+    0,0,0,0,0,0,0,0,  # e0 - e7
+    0,0,0,0,0,0,0,0,  # e8 - ef
+    0,0,0,0,0,0,0,0,  # f0 - f7
+    0,0,0,0,0,0,4,5   # f8 - ff
+)
+
+UCS2BE_st  = (
+          5,     7,     7,eError,     4,     3,eError,eError,#00-07
+     eError,eError,eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,#08-0f
+     eItsMe,eItsMe,     6,     6,     6,     6,eError,eError,#10-17
+          6,     6,     6,     6,     6,eItsMe,     6,     6,#18-1f
+          6,     6,     6,     6,     5,     7,     7,eError,#20-27
+          5,     8,     6,     6,eError,     6,     6,     6,#28-2f
+          6,     6,     6,     6,eError,eError,eStart,eStart #30-37
+)
+
+UCS2BECharLenTable = (2, 2, 2, 0, 2, 2)
+
+UCS2BESMModel = {'classTable': UCS2BE_cls,
+                 'classFactor': 6,
+                 'stateTable': UCS2BE_st,
+                 'charLenTable': UCS2BECharLenTable,
+                 'name': 'UTF-16BE'}
+
+# UCS2-LE
+
+UCS2LE_cls = (
+    0,0,0,0,0,0,0,0,  # 00 - 07
+    0,0,1,0,0,2,0,0,  # 08 - 0f
+    0,0,0,0,0,0,0,0,  # 10 - 17
+    0,0,0,3,0,0,0,0,  # 18 - 1f
+    0,0,0,0,0,0,0,0,  # 20 - 27
+    0,3,3,3,3,3,0,0,  # 28 - 2f
+    0,0,0,0,0,0,0,0,  # 30 - 37
+    0,0,0,0,0,0,0,0,  # 38 - 3f
+    0,0,0,0,0,0,0,0,  # 40 - 47
+    0,0,0,0,0,0,0,0,  # 48 - 4f
+    0,0,0,0,0,0,0,0,  # 50 - 57
+    0,0,0,0,0,0,0,0,  # 58 - 5f
+    0,0,0,0,0,0,0,0,  # 60 - 67
+    0,0,0,0,0,0,0,0,  # 68 - 6f
+    0,0,0,0,0,0,0,0,  # 70 - 77
+    0,0,0,0,0,0,0,0,  # 78 - 7f
+    0,0,0,0,0,0,0,0,  # 80 - 87
+    0,0,0,0,0,0,0,0,  # 88 - 8f
+    0,0,0,0,0,0,0,0,  # 90 - 97
+    0,0,0,0,0,0,0,0,  # 98 - 9f
+    0,0,0,0,0,0,0,0,  # a0 - a7
+    0,0,0,0,0,0,0,0,  # a8 - af
+    0,0,0,0,0,0,0,0,  # b0 - b7
+    0,0,0,0,0,0,0,0,  # b8 - bf
+    0,0,0,0,0,0,0,0,  # c0 - c7
+    0,0,0,0,0,0,0,0,  # c8 - cf
+    0,0,0,0,0,0,0,0,  # d0 - d7
+    0,0,0,0,0,0,0,0,  # d8 - df
+    0,0,0,0,0,0,0,0,  # e0 - e7
+    0,0,0,0,0,0,0,0,  # e8 - ef
+    0,0,0,0,0,0,0,0,  # f0 - f7
+    0,0,0,0,0,0,4,5   # f8 - ff
+)
+
+UCS2LE_st = (
+          6,     6,     7,     6,     4,     3,eError,eError,#00-07
+     eError,eError,eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,#08-0f
+     eItsMe,eItsMe,     5,     5,     5,eError,eItsMe,eError,#10-17
+          5,     5,     5,eError,     5,eError,     6,     6,#18-1f
+          7,     6,     8,     8,     5,     5,     5,eError,#20-27
+          5,     5,     5,eError,eError,eError,     5,     5,#28-2f
+          5,     5,     5,eError,     5,eError,eStart,eStart #30-37
+)
+
+UCS2LECharLenTable = (2, 2, 2, 2, 2, 2)
+
+UCS2LESMModel = {'classTable': UCS2LE_cls,
+                 'classFactor': 6,
+                 'stateTable': UCS2LE_st,
+                 'charLenTable': UCS2LECharLenTable,
+                 'name': 'UTF-16LE'}
+
+# UTF-8
+
+UTF8_cls = (
+    1,1,1,1,1,1,1,1,  # 00 - 07  #allow 0x00 as a legal value
+    1,1,1,1,1,1,0,0,  # 08 - 0f
+    1,1,1,1,1,1,1,1,  # 10 - 17
+    1,1,1,0,1,1,1,1,  # 18 - 1f
+    1,1,1,1,1,1,1,1,  # 20 - 27
+    1,1,1,1,1,1,1,1,  # 28 - 2f
+    1,1,1,1,1,1,1,1,  # 30 - 37
+    1,1,1,1,1,1,1,1,  # 38 - 3f
+    1,1,1,1,1,1,1,1,  # 40 - 47
+    1,1,1,1,1,1,1,1,  # 48 - 4f
+    1,1,1,1,1,1,1,1,  # 50 - 57
+    1,1,1,1,1,1,1,1,  # 58 - 5f
+    1,1,1,1,1,1,1,1,  # 60 - 67
+    1,1,1,1,1,1,1,1,  # 68 - 6f
+    1,1,1,1,1,1,1,1,  # 70 - 77
+    1,1,1,1,1,1,1,1,  # 78 - 7f
+    2,2,2,2,3,3,3,3,  # 80 - 87
+    4,4,4,4,4,4,4,4,  # 88 - 8f
+    4,4,4,4,4,4,4,4,  # 90 - 97
+    4,4,4,4,4,4,4,4,  # 98 - 9f
+    5,5,5,5,5,5,5,5,  # a0 - a7
+    5,5,5,5,5,5,5,5,  # a8 - af
+    5,5,5,5,5,5,5,5,  # b0 - b7
+    5,5,5,5,5,5,5,5,  # b8 - bf
+    0,0,6,6,6,6,6,6,  # c0 - c7
+    6,6,6,6,6,6,6,6,  # c8 - cf
+    6,6,6,6,6,6,6,6,  # d0 - d7
+    6,6,6,6,6,6,6,6,  # d8 - df
+    7,8,8,8,8,8,8,8,  # e0 - e7
+    8,8,8,8,8,9,8,8,  # e8 - ef
+    10,11,11,11,11,11,11,11,  # f0 - f7
+    12,13,13,13,14,15,0,0    # f8 - ff
+)
+
+UTF8_st = (
+    eError,eStart,eError,eError,eError,eError,     12,   10,#00-07
+         9,     11,     8,     7,     6,     5,     4,    3,#08-0f
+    eError,eError,eError,eError,eError,eError,eError,eError,#10-17
+    eError,eError,eError,eError,eError,eError,eError,eError,#18-1f
+    eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,#20-27
+    eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,#28-2f
+    eError,eError,     5,     5,     5,     5,eError,eError,#30-37
+    eError,eError,eError,eError,eError,eError,eError,eError,#38-3f
+    eError,eError,eError,     5,     5,     5,eError,eError,#40-47
+    eError,eError,eError,eError,eError,eError,eError,eError,#48-4f
+    eError,eError,     7,     7,     7,     7,eError,eError,#50-57
+    eError,eError,eError,eError,eError,eError,eError,eError,#58-5f
+    eError,eError,eError,eError,     7,     7,eError,eError,#60-67
+    eError,eError,eError,eError,eError,eError,eError,eError,#68-6f
+    eError,eError,     9,     9,     9,     9,eError,eError,#70-77
+    eError,eError,eError,eError,eError,eError,eError,eError,#78-7f
+    eError,eError,eError,eError,eError,     9,eError,eError,#80-87
+    eError,eError,eError,eError,eError,eError,eError,eError,#88-8f
+    eError,eError,    12,    12,    12,    12,eError,eError,#90-97
+    eError,eError,eError,eError,eError,eError,eError,eError,#98-9f
+    eError,eError,eError,eError,eError,    12,eError,eError,#a0-a7
+    eError,eError,eError,eError,eError,eError,eError,eError,#a8-af
+    eError,eError,    12,    12,    12,eError,eError,eError,#b0-b7
+    eError,eError,eError,eError,eError,eError,eError,eError,#b8-bf
+    eError,eError,eStart,eStart,eStart,eStart,eError,eError,#c0-c7
+    eError,eError,eError,eError,eError,eError,eError,eError #c8-cf
+)
+
+UTF8CharLenTable = (0, 1, 0, 0, 0, 0, 2, 3, 3, 3, 4, 4, 5, 5, 6, 6)
+
+UTF8SMModel = {'classTable': UTF8_cls,
+               'classFactor': 16,
+               'stateTable': UTF8_st,
+               'charLenTable': UTF8CharLenTable,
+               'name': 'UTF-8'}
+
+# flake8: noqa
similarity index 97%
rename from requests/packages/charade/sbcharsetprober.py
rename to requests/packages/chardet/sbcharsetprober.py
index da26715..37291bd 100644 (file)
-######################## BEGIN LICENSE BLOCK ########################\r
-# The Original Code is Mozilla Universal charset detector code.\r
-#\r
-# The Initial Developer of the Original Code is\r
-# Netscape Communications Corporation.\r
-# Portions created by the Initial Developer are Copyright (C) 2001\r
-# the Initial Developer. All Rights Reserved.\r
-#\r
-# Contributor(s):\r
-#   Mark Pilgrim - port to Python\r
-#   Shy Shalom - original C code\r
-#\r
-# This library is free software; you can redistribute it and/or\r
-# modify it under the terms of the GNU Lesser General Public\r
-# License as published by the Free Software Foundation; either\r
-# version 2.1 of the License, or (at your option) any later version.\r
-#\r
-# This library is distributed in the hope that it will be useful,\r
-# but WITHOUT ANY WARRANTY; without even the implied warranty of\r
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU\r
-# Lesser General Public License for more details.\r
-#\r
-# You should have received a copy of the GNU Lesser General Public\r
-# License along with this library; if not, write to the Free Software\r
-# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA\r
-# 02110-1301  USA\r
-######################### END LICENSE BLOCK #########################\r
-\r
-import sys\r
-from . import constants\r
-from .charsetprober import CharSetProber\r
-from .compat import wrap_ord\r
-\r
-SAMPLE_SIZE = 64\r
-SB_ENOUGH_REL_THRESHOLD = 1024\r
-POSITIVE_SHORTCUT_THRESHOLD = 0.95\r
-NEGATIVE_SHORTCUT_THRESHOLD = 0.05\r
-SYMBOL_CAT_ORDER = 250\r
-NUMBER_OF_SEQ_CAT = 4\r
-POSITIVE_CAT = NUMBER_OF_SEQ_CAT - 1\r
-#NEGATIVE_CAT = 0\r
-\r
-\r
-class SingleByteCharSetProber(CharSetProber):\r
-    def __init__(self, model, reversed=False, nameProber=None):\r
-        CharSetProber.__init__(self)\r
-        self._mModel = model\r
-        # TRUE if we need to reverse every pair in the model lookup\r
-        self._mReversed = reversed\r
-        # Optional auxiliary prober for name decision\r
-        self._mNameProber = nameProber\r
-        self.reset()\r
-\r
-    def reset(self):\r
-        CharSetProber.reset(self)\r
-        # char order of last character\r
-        self._mLastOrder = 255\r
-        self._mSeqCounters = [0] * NUMBER_OF_SEQ_CAT\r
-        self._mTotalSeqs = 0\r
-        self._mTotalChar = 0\r
-        # characters that fall in our sampling range\r
-        self._mFreqChar = 0\r
-\r
-    def get_charset_name(self):\r
-        if self._mNameProber:\r
-            return self._mNameProber.get_charset_name()\r
-        else:\r
-            return self._mModel['charsetName']\r
-\r
-    def feed(self, aBuf):\r
-        if not self._mModel['keepEnglishLetter']:\r
-            aBuf = self.filter_without_english_letters(aBuf)\r
-        aLen = len(aBuf)\r
-        if not aLen:\r
-            return self.get_state()\r
-        for c in aBuf:\r
-            order = self._mModel['charToOrderMap'][wrap_ord(c)]\r
-            if order < SYMBOL_CAT_ORDER:\r
-                self._mTotalChar += 1\r
-            if order < SAMPLE_SIZE:\r
-                self._mFreqChar += 1\r
-                if self._mLastOrder < SAMPLE_SIZE:\r
-                    self._mTotalSeqs += 1\r
-                    if not self._mReversed:\r
-                        i = (self._mLastOrder * SAMPLE_SIZE) + order\r
-                        model = self._mModel['precedenceMatrix'][i]\r
-                    else:  # reverse the order of the letters in the lookup\r
-                        i = (order * SAMPLE_SIZE) + self._mLastOrder\r
-                        model = self._mModel['precedenceMatrix'][i]\r
-                    self._mSeqCounters[model] += 1\r
-            self._mLastOrder = order\r
-\r
-        if self.get_state() == constants.eDetecting:\r
-            if self._mTotalSeqs > SB_ENOUGH_REL_THRESHOLD:\r
-                cf = self.get_confidence()\r
-                if cf > POSITIVE_SHORTCUT_THRESHOLD:\r
-                    if constants._debug:\r
-                        sys.stderr.write('%s confidence = %s, we have a'\r
-                                         'winner\n' %\r
-                                         (self._mModel['charsetName'], cf))\r
-                    self._mState = constants.eFoundIt\r
-                elif cf < NEGATIVE_SHORTCUT_THRESHOLD:\r
-                    if constants._debug:\r
-                        sys.stderr.write('%s confidence = %s, below negative'\r
-                                         'shortcut threshhold %s\n' %\r
-                                         (self._mModel['charsetName'], cf,\r
-                                          NEGATIVE_SHORTCUT_THRESHOLD))\r
-                    self._mState = constants.eNotMe\r
-\r
-        return self.get_state()\r
-\r
-    def get_confidence(self):\r
-        r = 0.01\r
-        if self._mTotalSeqs > 0:\r
-            r = ((1.0 * self._mSeqCounters[POSITIVE_CAT]) / self._mTotalSeqs\r
-                 / self._mModel['mTypicalPositiveRatio'])\r
-            r = r * self._mFreqChar / self._mTotalChar\r
-            if r >= 1.0:\r
-                r = 0.99\r
-        return r\r
+######################## BEGIN LICENSE BLOCK ########################
+# The Original Code is Mozilla Universal charset detector code.
+#
+# The Initial Developer of the Original Code is
+# Netscape Communications Corporation.
+# Portions created by the Initial Developer are Copyright (C) 2001
+# the Initial Developer. All Rights Reserved.
+#
+# Contributor(s):
+#   Mark Pilgrim - port to Python
+#   Shy Shalom - original C code
+#
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
+# 02110-1301  USA
+######################### END LICENSE BLOCK #########################
+
+import sys
+from . import constants
+from .charsetprober import CharSetProber
+from .compat import wrap_ord
+
+SAMPLE_SIZE = 64
+SB_ENOUGH_REL_THRESHOLD = 1024
+POSITIVE_SHORTCUT_THRESHOLD = 0.95
+NEGATIVE_SHORTCUT_THRESHOLD = 0.05
+SYMBOL_CAT_ORDER = 250
+NUMBER_OF_SEQ_CAT = 4
+POSITIVE_CAT = NUMBER_OF_SEQ_CAT - 1
+#NEGATIVE_CAT = 0
+
+
+class SingleByteCharSetProber(CharSetProber):
+    def __init__(self, model, reversed=False, nameProber=None):
+        CharSetProber.__init__(self)
+        self._mModel = model
+        # TRUE if we need to reverse every pair in the model lookup
+        self._mReversed = reversed
+        # Optional auxiliary prober for name decision
+        self._mNameProber = nameProber
+        self.reset()
+
+    def reset(self):
+        CharSetProber.reset(self)
+        # char order of last character
+        self._mLastOrder = 255
+        self._mSeqCounters = [0] * NUMBER_OF_SEQ_CAT
+        self._mTotalSeqs = 0
+        self._mTotalChar = 0
+        # characters that fall in our sampling range
+        self._mFreqChar = 0
+
+    def get_charset_name(self):
+        if self._mNameProber:
+            return self._mNameProber.get_charset_name()
+        else:
+            return self._mModel['charsetName']
+
+    def feed(self, aBuf):
+        if not self._mModel['keepEnglishLetter']:
+            aBuf = self.filter_without_english_letters(aBuf)
+        aLen = len(aBuf)
+        if not aLen:
+            return self.get_state()
+        for c in aBuf:
+            order = self._mModel['charToOrderMap'][wrap_ord(c)]
+            if order < SYMBOL_CAT_ORDER:
+                self._mTotalChar += 1
+            if order < SAMPLE_SIZE:
+                self._mFreqChar += 1
+                if self._mLastOrder < SAMPLE_SIZE:
+                    self._mTotalSeqs += 1
+                    if not self._mReversed:
+                        i = (self._mLastOrder * SAMPLE_SIZE) + order
+                        model = self._mModel['precedenceMatrix'][i]
+                    else:  # reverse the order of the letters in the lookup
+                        i = (order * SAMPLE_SIZE) + self._mLastOrder
+                        model = self._mModel['precedenceMatrix'][i]
+                    self._mSeqCounters[model] += 1
+            self._mLastOrder = order
+
+        if self.get_state() == constants.eDetecting:
+            if self._mTotalSeqs > SB_ENOUGH_REL_THRESHOLD:
+                cf = self.get_confidence()
+                if cf > POSITIVE_SHORTCUT_THRESHOLD:
+                    if constants._debug:
+                        sys.stderr.write('%s confidence = %s, we have a'
+                                         'winner\n' %
+                                         (self._mModel['charsetName'], cf))
+                    self._mState = constants.eFoundIt
+                elif cf < NEGATIVE_SHORTCUT_THRESHOLD:
+                    if constants._debug:
+                        sys.stderr.write('%s confidence = %s, below negative'
+                                         'shortcut threshhold %s\n' %
+                                         (self._mModel['charsetName'], cf,
+                                          NEGATIVE_SHORTCUT_THRESHOLD))
+                    self._mState = constants.eNotMe
+
+        return self.get_state()
+
+    def get_confidence(self):
+        r = 0.01
+        if self._mTotalSeqs > 0:
+            r = ((1.0 * self._mSeqCounters[POSITIVE_CAT]) / self._mTotalSeqs
+                 / self._mModel['mTypicalPositiveRatio'])
+            r = r * self._mFreqChar / self._mTotalChar
+            if r >= 1.0:
+                r = 0.99
+        return r
similarity index 97%
rename from requests/packages/charade/sbcsgroupprober.py
rename to requests/packages/chardet/sbcsgroupprober.py
index b224814..1b6196c 100644 (file)
@@ -1,69 +1,69 @@
-######################## BEGIN LICENSE BLOCK ########################\r
-# The Original Code is Mozilla Universal charset detector code.\r
-#\r
-# The Initial Developer of the Original Code is\r
-# Netscape Communications Corporation.\r
-# Portions created by the Initial Developer are Copyright (C) 2001\r
-# the Initial Developer. All Rights Reserved.\r
-#\r
-# Contributor(s):\r
-#   Mark Pilgrim - port to Python\r
-#   Shy Shalom - original C code\r
-#\r
-# This library is free software; you can redistribute it and/or\r
-# modify it under the terms of the GNU Lesser General Public\r
-# License as published by the Free Software Foundation; either\r
-# version 2.1 of the License, or (at your option) any later version.\r
-#\r
-# This library is distributed in the hope that it will be useful,\r
-# but WITHOUT ANY WARRANTY; without even the implied warranty of\r
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU\r
-# Lesser General Public License for more details.\r
-#\r
-# You should have received a copy of the GNU Lesser General Public\r
-# License along with this library; if not, write to the Free Software\r
-# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA\r
-# 02110-1301  USA\r
-######################### END LICENSE BLOCK #########################\r
-\r
-from .charsetgroupprober import CharSetGroupProber\r
-from .sbcharsetprober import SingleByteCharSetProber\r
-from .langcyrillicmodel import (Win1251CyrillicModel, Koi8rModel,\r
-                                Latin5CyrillicModel, MacCyrillicModel,\r
-                                Ibm866Model, Ibm855Model)\r
-from .langgreekmodel import Latin7GreekModel, Win1253GreekModel\r
-from .langbulgarianmodel import Latin5BulgarianModel, Win1251BulgarianModel\r
-from .langhungarianmodel import Latin2HungarianModel, Win1250HungarianModel\r
-from .langthaimodel import TIS620ThaiModel\r
-from .langhebrewmodel import Win1255HebrewModel\r
-from .hebrewprober import HebrewProber\r
-\r
-\r
-class SBCSGroupProber(CharSetGroupProber):\r
-    def __init__(self):\r
-        CharSetGroupProber.__init__(self)\r
-        self._mProbers = [\r
-            SingleByteCharSetProber(Win1251CyrillicModel),\r
-            SingleByteCharSetProber(Koi8rModel),\r
-            SingleByteCharSetProber(Latin5CyrillicModel),\r
-            SingleByteCharSetProber(MacCyrillicModel),\r
-            SingleByteCharSetProber(Ibm866Model),\r
-            SingleByteCharSetProber(Ibm855Model),\r
-            SingleByteCharSetProber(Latin7GreekModel),\r
-            SingleByteCharSetProber(Win1253GreekModel),\r
-            SingleByteCharSetProber(Latin5BulgarianModel),\r
-            SingleByteCharSetProber(Win1251BulgarianModel),\r
-            SingleByteCharSetProber(Latin2HungarianModel),\r
-            SingleByteCharSetProber(Win1250HungarianModel),\r
-            SingleByteCharSetProber(TIS620ThaiModel),\r
-        ]\r
-        hebrewProber = HebrewProber()\r
-        logicalHebrewProber = SingleByteCharSetProber(Win1255HebrewModel,\r
-                                                      False, hebrewProber)\r
-        visualHebrewProber = SingleByteCharSetProber(Win1255HebrewModel, True,\r
-                                                     hebrewProber)\r
-        hebrewProber.set_model_probers(logicalHebrewProber, visualHebrewProber)\r
-        self._mProbers.extend([hebrewProber, logicalHebrewProber,\r
-                               visualHebrewProber])\r
-\r
-        self.reset()\r
+######################## BEGIN LICENSE BLOCK ########################
+# The Original Code is Mozilla Universal charset detector code.
+#
+# The Initial Developer of the Original Code is
+# Netscape Communications Corporation.
+# Portions created by the Initial Developer are Copyright (C) 2001
+# the Initial Developer. All Rights Reserved.
+#
+# Contributor(s):
+#   Mark Pilgrim - port to Python
+#   Shy Shalom - original C code
+#
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
+# 02110-1301  USA
+######################### END LICENSE BLOCK #########################
+
+from .charsetgroupprober import CharSetGroupProber
+from .sbcharsetprober import SingleByteCharSetProber
+from .langcyrillicmodel import (Win1251CyrillicModel, Koi8rModel,
+                                Latin5CyrillicModel, MacCyrillicModel,
+                                Ibm866Model, Ibm855Model)
+from .langgreekmodel import Latin7GreekModel, Win1253GreekModel
+from .langbulgarianmodel import Latin5BulgarianModel, Win1251BulgarianModel
+from .langhungarianmodel import Latin2HungarianModel, Win1250HungarianModel
+from .langthaimodel import TIS620ThaiModel
+from .langhebrewmodel import Win1255HebrewModel
+from .hebrewprober import HebrewProber
+
+
+class SBCSGroupProber(CharSetGroupProber):
+    def __init__(self):
+        CharSetGroupProber.__init__(self)
+        self._mProbers = [
+            SingleByteCharSetProber(Win1251CyrillicModel),
+            SingleByteCharSetProber(Koi8rModel),
+            SingleByteCharSetProber(Latin5CyrillicModel),
+            SingleByteCharSetProber(MacCyrillicModel),
+            SingleByteCharSetProber(Ibm866Model),
+            SingleByteCharSetProber(Ibm855Model),
+            SingleByteCharSetProber(Latin7GreekModel),
+            SingleByteCharSetProber(Win1253GreekModel),
+            SingleByteCharSetProber(Latin5BulgarianModel),
+            SingleByteCharSetProber(Win1251BulgarianModel),
+            SingleByteCharSetProber(Latin2HungarianModel),
+            SingleByteCharSetProber(Win1250HungarianModel),
+            SingleByteCharSetProber(TIS620ThaiModel),
+        ]
+        hebrewProber = HebrewProber()
+        logicalHebrewProber = SingleByteCharSetProber(Win1255HebrewModel,
+                                                      False, hebrewProber)
+        visualHebrewProber = SingleByteCharSetProber(Win1255HebrewModel, True,
+                                                     hebrewProber)
+        hebrewProber.set_model_probers(logicalHebrewProber, visualHebrewProber)
+        self._mProbers.extend([hebrewProber, logicalHebrewProber,
+                               visualHebrewProber])
+
+        self.reset()
similarity index 97%
rename from requests/packages/charade/sjisprober.py
rename to requests/packages/chardet/sjisprober.py
index 9bb0cdc..b173614 100644 (file)
@@ -1,91 +1,91 @@
-######################## BEGIN LICENSE BLOCK ########################\r
-# The Original Code is mozilla.org code.\r
-#\r
-# The Initial Developer of the Original Code is\r
-# Netscape Communications Corporation.\r
-# Portions created by the Initial Developer are Copyright (C) 1998\r
-# the Initial Developer. All Rights Reserved.\r
-#\r
-# Contributor(s):\r
-#   Mark Pilgrim - port to Python\r
-#\r
-# This library is free software; you can redistribute it and/or\r
-# modify it under the terms of the GNU Lesser General Public\r
-# License as published by the Free Software Foundation; either\r
-# version 2.1 of the License, or (at your option) any later version.\r
-#\r
-# This library is distributed in the hope that it will be useful,\r
-# but WITHOUT ANY WARRANTY; without even the implied warranty of\r
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU\r
-# Lesser General Public License for more details.\r
-#\r
-# You should have received a copy of the GNU Lesser General Public\r
-# License along with this library; if not, write to the Free Software\r
-# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA\r
-# 02110-1301  USA\r
-######################### END LICENSE BLOCK #########################\r
-\r
-import sys\r
-from .mbcharsetprober import MultiByteCharSetProber\r
-from .codingstatemachine import CodingStateMachine\r
-from .chardistribution import SJISDistributionAnalysis\r
-from .jpcntx import SJISContextAnalysis\r
-from .mbcssm import SJISSMModel\r
-from . import constants\r
-\r
-\r
-class SJISProber(MultiByteCharSetProber):\r
-    def __init__(self):\r
-        MultiByteCharSetProber.__init__(self)\r
-        self._mCodingSM = CodingStateMachine(SJISSMModel)\r
-        self._mDistributionAnalyzer = SJISDistributionAnalysis()\r
-        self._mContextAnalyzer = SJISContextAnalysis()\r
-        self.reset()\r
-\r
-    def reset(self):\r
-        MultiByteCharSetProber.reset(self)\r
-        self._mContextAnalyzer.reset()\r
-\r
-    def get_charset_name(self):\r
-        return "SHIFT_JIS"\r
-\r
-    def feed(self, aBuf):\r
-        aLen = len(aBuf)\r
-        for i in range(0, aLen):\r
-            codingState = self._mCodingSM.next_state(aBuf[i])\r
-            if codingState == constants.eError:\r
-                if constants._debug:\r
-                    sys.stderr.write(self.get_charset_name()\r
-                                     + ' prober hit error at byte ' + str(i)\r
-                                     + '\n')\r
-                self._mState = constants.eNotMe\r
-                break\r
-            elif codingState == constants.eItsMe:\r
-                self._mState = constants.eFoundIt\r
-                break\r
-            elif codingState == constants.eStart:\r
-                charLen = self._mCodingSM.get_current_charlen()\r
-                if i == 0:\r
-                    self._mLastChar[1] = aBuf[0]\r
-                    self._mContextAnalyzer.feed(self._mLastChar[2 - charLen:],\r
-                                                charLen)\r
-                    self._mDistributionAnalyzer.feed(self._mLastChar, charLen)\r
-                else:\r
-                    self._mContextAnalyzer.feed(aBuf[i + 1 - charLen:i + 3\r
-                                                     - charLen], charLen)\r
-                    self._mDistributionAnalyzer.feed(aBuf[i - 1:i + 1],\r
-                                                     charLen)\r
-\r
-        self._mLastChar[0] = aBuf[aLen - 1]\r
-\r
-        if self.get_state() == constants.eDetecting:\r
-            if (self._mContextAnalyzer.got_enough_data() and\r
-               (self.get_confidence() > constants.SHORTCUT_THRESHOLD)):\r
-                self._mState = constants.eFoundIt\r
-\r
-        return self.get_state()\r
-\r
-    def get_confidence(self):\r
-        contxtCf = self._mContextAnalyzer.get_confidence()\r
-        distribCf = self._mDistributionAnalyzer.get_confidence()\r
-        return max(contxtCf, distribCf)\r
+######################## BEGIN LICENSE BLOCK ########################
+# The Original Code is mozilla.org code.
+#
+# The Initial Developer of the Original Code is
+# Netscape Communications Corporation.
+# Portions created by the Initial Developer are Copyright (C) 1998
+# the Initial Developer. All Rights Reserved.
+#
+# Contributor(s):
+#   Mark Pilgrim - port to Python
+#
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
+# 02110-1301  USA
+######################### END LICENSE BLOCK #########################
+
+import sys
+from .mbcharsetprober import MultiByteCharSetProber
+from .codingstatemachine import CodingStateMachine
+from .chardistribution import SJISDistributionAnalysis
+from .jpcntx import SJISContextAnalysis
+from .mbcssm import SJISSMModel
+from . import constants
+
+
+class SJISProber(MultiByteCharSetProber):
+    def __init__(self):
+        MultiByteCharSetProber.__init__(self)
+        self._mCodingSM = CodingStateMachine(SJISSMModel)
+        self._mDistributionAnalyzer = SJISDistributionAnalysis()
+        self._mContextAnalyzer = SJISContextAnalysis()
+        self.reset()
+
+    def reset(self):
+        MultiByteCharSetProber.reset(self)
+        self._mContextAnalyzer.reset()
+
+    def get_charset_name(self):
+        return "SHIFT_JIS"
+
+    def feed(self, aBuf):
+        aLen = len(aBuf)
+        for i in range(0, aLen):
+            codingState = self._mCodingSM.next_state(aBuf[i])
+            if codingState == constants.eError:
+                if constants._debug:
+                    sys.stderr.write(self.get_charset_name()
+                                     + ' prober hit error at byte ' + str(i)
+                                     + '\n')
+                self._mState = constants.eNotMe
+                break
+            elif codingState == constants.eItsMe:
+                self._mState = constants.eFoundIt
+                break
+            elif codingState == constants.eStart:
+                charLen = self._mCodingSM.get_current_charlen()
+                if i == 0:
+                    self._mLastChar[1] = aBuf[0]
+                    self._mContextAnalyzer.feed(self._mLastChar[2 - charLen:],
+                                                charLen)
+                    self._mDistributionAnalyzer.feed(self._mLastChar, charLen)
+                else:
+                    self._mContextAnalyzer.feed(aBuf[i + 1 - charLen:i + 3
+                                                     - charLen], charLen)
+                    self._mDistributionAnalyzer.feed(aBuf[i - 1:i + 1],
+                                                     charLen)
+
+        self._mLastChar[0] = aBuf[aLen - 1]
+
+        if self.get_state() == constants.eDetecting:
+            if (self._mContextAnalyzer.got_enough_data() and
+               (self.get_confidence() > constants.SHORTCUT_THRESHOLD)):
+                self._mState = constants.eFoundIt
+
+        return self.get_state()
+
+    def get_confidence(self):
+        contxtCf = self._mContextAnalyzer.get_confidence()
+        distribCf = self._mDistributionAnalyzer.get_confidence()
+        return max(contxtCf, distribCf)
-######################## BEGIN LICENSE BLOCK ########################\r
-# The Original Code is Mozilla Universal charset detector code.\r
-#\r
-# The Initial Developer of the Original Code is\r
-# Netscape Communications Corporation.\r
-# Portions created by the Initial Developer are Copyright (C) 2001\r
-# the Initial Developer. All Rights Reserved.\r
-#\r
-# Contributor(s):\r
-#   Mark Pilgrim - port to Python\r
-#   Shy Shalom - original C code\r
-#\r
-# This library is free software; you can redistribute it and/or\r
-# modify it under the terms of the GNU Lesser General Public\r
-# License as published by the Free Software Foundation; either\r
-# version 2.1 of the License, or (at your option) any later version.\r
-#\r
-# This library is distributed in the hope that it will be useful,\r
-# but WITHOUT ANY WARRANTY; without even the implied warranty of\r
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU\r
-# Lesser General Public License for more details.\r
-#\r
-# You should have received a copy of the GNU Lesser General Public\r
-# License along with this library; if not, write to the Free Software\r
-# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA\r
-# 02110-1301  USA\r
-######################### END LICENSE BLOCK #########################\r
-\r
-from . import constants\r
-import sys\r
-import codecs\r
-from .latin1prober import Latin1Prober  # windows-1252\r
-from .mbcsgroupprober import MBCSGroupProber  # multi-byte character sets\r
-from .sbcsgroupprober import SBCSGroupProber  # single-byte character sets\r
-from .escprober import EscCharSetProber  # ISO-2122, etc.\r
-import re\r
-\r
-MINIMUM_THRESHOLD = 0.20\r
-ePureAscii = 0\r
-eEscAscii = 1\r
-eHighbyte = 2\r
-\r
-\r
-class UniversalDetector:\r
-    def __init__(self):\r
-        self._highBitDetector = re.compile(b'[\x80-\xFF]')\r
-        self._escDetector = re.compile(b'(\033|~{)')\r
-        self._mEscCharSetProber = None\r
-        self._mCharSetProbers = []\r
-        self.reset()\r
-\r
-    def reset(self):\r
-        self.result = {'encoding': None, 'confidence': 0.0}\r
-        self.done = False\r
-        self._mStart = True\r
-        self._mGotData = False\r
-        self._mInputState = ePureAscii\r
-        self._mLastChar = b''\r
-        if self._mEscCharSetProber:\r
-            self._mEscCharSetProber.reset()\r
-        for prober in self._mCharSetProbers:\r
-            prober.reset()\r
-\r
-    def feed(self, aBuf):\r
-        if self.done:\r
-            return\r
-\r
-        aLen = len(aBuf)\r
-        if not aLen:\r
-            return\r
-\r
-        if not self._mGotData:\r
-            # If the data starts with BOM, we know it is UTF\r
-            if aBuf[:3] == codecs.BOM:\r
-                # EF BB BF  UTF-8 with BOM\r
-                self.result = {'encoding': "UTF-8", 'confidence': 1.0}\r
-            elif aBuf[:4] in (codecs.BOM_UTF32_LE, codecs.BOM_UTF32_BE):\r
-                # FF FE 00 00  UTF-32, little-endian BOM\r
-                # 00 00 FE FF  UTF-32, big-endian BOM\r
-                self.result = {'encoding': "UTF-32", 'confidence': 1.0}\r
-            elif aBuf[:4] == b'\xFE\xFF\x00\x00':\r
-                # FE FF 00 00  UCS-4, unusual octet order BOM (3412)\r
-                self.result = {\r
-                    'encoding': "X-ISO-10646-UCS-4-3412",\r
-                    'confidence': 1.0\r
-                }\r
-            elif aBuf[:4] == b'\x00\x00\xFF\xFE':\r
-                # 00 00 FF FE  UCS-4, unusual octet order BOM (2143)\r
-                self.result = {\r
-                    'encoding': "X-ISO-10646-UCS-4-2143",\r
-                    'confidence': 1.0\r
-                }\r
-            elif aBuf[:2] == codecs.BOM_LE or aBuf[:2] == codecs.BOM_BE:\r
-                # FF FE  UTF-16, little endian BOM\r
-                # FE FF  UTF-16, big endian BOM\r
-                self.result = {'encoding': "UTF-16", 'confidence': 1.0}\r
-\r
-        self._mGotData = True\r
-        if self.result['encoding'] and (self.result['confidence'] > 0.0):\r
-            self.done = True\r
-            return\r
-\r
-        if self._mInputState == ePureAscii:\r
-            if self._highBitDetector.search(aBuf):\r
-                self._mInputState = eHighbyte\r
-            elif ((self._mInputState == ePureAscii) and\r
-                    self._escDetector.search(self._mLastChar + aBuf)):\r
-                self._mInputState = eEscAscii\r
-\r
-        self._mLastChar = aBuf[-1:]\r
-\r
-        if self._mInputState == eEscAscii:\r
-            if not self._mEscCharSetProber:\r
-                self._mEscCharSetProber = EscCharSetProber()\r
-            if self._mEscCharSetProber.feed(aBuf) == constants.eFoundIt:\r
-                self.result = {\r
-                    'encoding': self._mEscCharSetProber.get_charset_name(),\r
-                    'confidence': self._mEscCharSetProber.get_confidence()\r
-                }\r
-                self.done = True\r
-        elif self._mInputState == eHighbyte:\r
-            if not self._mCharSetProbers:\r
-                self._mCharSetProbers = [MBCSGroupProber(), SBCSGroupProber(),\r
-                                         Latin1Prober()]\r
-            for prober in self._mCharSetProbers:\r
-                if prober.feed(aBuf) == constants.eFoundIt:\r
-                    self.result = {'encoding': prober.get_charset_name(),\r
-                                   'confidence': prober.get_confidence()}\r
-                    self.done = True\r
-                    break\r
-\r
-    def close(self):\r
-        if self.done:\r
-            return\r
-        if not self._mGotData:\r
-            if constants._debug:\r
-                sys.stderr.write('no data received!\n')\r
-            return\r
-        self.done = True\r
-\r
-        if self._mInputState == ePureAscii:\r
-            self.result = {'encoding': 'ascii', 'confidence': 1.0}\r
-            return self.result\r
-\r
-        if self._mInputState == eHighbyte:\r
-            proberConfidence = None\r
-            maxProberConfidence = 0.0\r
-            maxProber = None\r
-            for prober in self._mCharSetProbers:\r
-                if not prober:\r
-                    continue\r
-                proberConfidence = prober.get_confidence()\r
-                if proberConfidence > maxProberConfidence:\r
-                    maxProberConfidence = proberConfidence\r
-                    maxProber = prober\r
-            if maxProber and (maxProberConfidence > MINIMUM_THRESHOLD):\r
-                self.result = {'encoding': maxProber.get_charset_name(),\r
-                               'confidence': maxProber.get_confidence()}\r
-                return self.result\r
-\r
-        if constants._debug:\r
-            sys.stderr.write('no probers hit minimum threshhold\n')\r
-            for prober in self._mCharSetProbers[0].mProbers:\r
-                if not prober:\r
-                    continue\r
-                sys.stderr.write('%s confidence = %s\n' %\r
-                                 (prober.get_charset_name(),\r
-                                  prober.get_confidence()))\r
+######################## BEGIN LICENSE BLOCK ########################
+# The Original Code is Mozilla Universal charset detector code.
+#
+# The Initial Developer of the Original Code is
+# Netscape Communications Corporation.
+# Portions created by the Initial Developer are Copyright (C) 2001
+# the Initial Developer. All Rights Reserved.
+#
+# Contributor(s):
+#   Mark Pilgrim - port to Python
+#   Shy Shalom - original C code
+#
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
+# 02110-1301  USA
+######################### END LICENSE BLOCK #########################
+
+from . import constants
+import sys
+import codecs
+from .latin1prober import Latin1Prober  # windows-1252
+from .mbcsgroupprober import MBCSGroupProber  # multi-byte character sets
+from .sbcsgroupprober import SBCSGroupProber  # single-byte character sets
+from .escprober import EscCharSetProber  # ISO-2122, etc.
+import re
+
+MINIMUM_THRESHOLD = 0.20
+ePureAscii = 0
+eEscAscii = 1
+eHighbyte = 2
+
+
+class UniversalDetector:
+    def __init__(self):
+        self._highBitDetector = re.compile(b'[\x80-\xFF]')
+        self._escDetector = re.compile(b'(\033|~{)')
+        self._mEscCharSetProber = None
+        self._mCharSetProbers = []
+        self.reset()
+
+    def reset(self):
+        self.result = {'encoding': None, 'confidence': 0.0}
+        self.done = False
+        self._mStart = True
+        self._mGotData = False
+        self._mInputState = ePureAscii
+        self._mLastChar = b''
+        if self._mEscCharSetProber:
+            self._mEscCharSetProber.reset()
+        for prober in self._mCharSetProbers:
+            prober.reset()
+
+    def feed(self, aBuf):
+        if self.done:
+            return
+
+        aLen = len(aBuf)
+        if not aLen:
+            return
+
+        if not self._mGotData:
+            # If the data starts with BOM, we know it is UTF
+            if aBuf[:3] == codecs.BOM:
+                # EF BB BF  UTF-8 with BOM
+                self.result = {'encoding': "UTF-8", 'confidence': 1.0}
+            elif aBuf[:4] == codecs.BOM_UTF32_LE:
+                # FF FE 00 00  UTF-32, little-endian BOM
+                self.result = {'encoding': "UTF-32LE", 'confidence': 1.0}
+            elif aBuf[:4] == codecs.BOM_UTF32_BE:
+                # 00 00 FE FF  UTF-32, big-endian BOM
+                self.result = {'encoding': "UTF-32BE", 'confidence': 1.0}
+            elif aBuf[:4] == b'\xFE\xFF\x00\x00':
+                # FE FF 00 00  UCS-4, unusual octet order BOM (3412)
+                self.result = {
+                    'encoding': "X-ISO-10646-UCS-4-3412",
+                    'confidence': 1.0
+                }
+            elif aBuf[:4] == b'\x00\x00\xFF\xFE':
+                # 00 00 FF FE  UCS-4, unusual octet order BOM (2143)
+                self.result = {
+                    'encoding': "X-ISO-10646-UCS-4-2143",
+                    'confidence': 1.0
+                }
+            elif aBuf[:2] == codecs.BOM_LE:
+                # FF FE  UTF-16, little endian BOM
+                self.result = {'encoding': "UTF-16LE", 'confidence': 1.0}
+            elif aBuf[:2] == codecs.BOM_BE:
+                # FE FF  UTF-16, big endian BOM
+                self.result = {'encoding': "UTF-16BE", 'confidence': 1.0}
+
+        self._mGotData = True
+        if self.result['encoding'] and (self.result['confidence'] > 0.0):
+            self.done = True
+            return
+
+        if self._mInputState == ePureAscii:
+            if self._highBitDetector.search(aBuf):
+                self._mInputState = eHighbyte
+            elif ((self._mInputState == ePureAscii) and
+                    self._escDetector.search(self._mLastChar + aBuf)):
+                self._mInputState = eEscAscii
+
+        self._mLastChar = aBuf[-1:]
+
+        if self._mInputState == eEscAscii:
+            if not self._mEscCharSetProber:
+                self._mEscCharSetProber = EscCharSetProber()
+            if self._mEscCharSetProber.feed(aBuf) == constants.eFoundIt:
+                self.result = {'encoding': self._mEscCharSetProber.get_charset_name(),
+                               'confidence': self._mEscCharSetProber.get_confidence()}
+                self.done = True
+        elif self._mInputState == eHighbyte:
+            if not self._mCharSetProbers:
+                self._mCharSetProbers = [MBCSGroupProber(), SBCSGroupProber(),
+                                         Latin1Prober()]
+            for prober in self._mCharSetProbers:
+                if prober.feed(aBuf) == constants.eFoundIt:
+                    self.result = {'encoding': prober.get_charset_name(),
+                                   'confidence': prober.get_confidence()}
+                    self.done = True
+                    break
+
+    def close(self):
+        if self.done:
+            return
+        if not self._mGotData:
+            if constants._debug:
+                sys.stderr.write('no data received!\n')
+            return
+        self.done = True
+
+        if self._mInputState == ePureAscii:
+            self.result = {'encoding': 'ascii', 'confidence': 1.0}
+            return self.result
+
+        if self._mInputState == eHighbyte:
+            proberConfidence = None
+            maxProberConfidence = 0.0
+            maxProber = None
+            for prober in self._mCharSetProbers:
+                if not prober:
+                    continue
+                proberConfidence = prober.get_confidence()
+                if proberConfidence > maxProberConfidence:
+                    maxProberConfidence = proberConfidence
+                    maxProber = prober
+            if maxProber and (maxProberConfidence > MINIMUM_THRESHOLD):
+                self.result = {'encoding': maxProber.get_charset_name(),
+                               'confidence': maxProber.get_confidence()}
+                return self.result
+
+        if constants._debug:
+            sys.stderr.write('no probers hit minimum threshhold\n')
+            for prober in self._mCharSetProbers[0].mProbers:
+                if not prober:
+                    continue
+                sys.stderr.write('%s confidence = %s\n' %
+                                 (prober.get_charset_name(),
+                                  prober.get_confidence()))
similarity index 97%
rename from requests/packages/charade/utf8prober.py
rename to requests/packages/chardet/utf8prober.py
index 72c8d3d..1c0bb5d 100644 (file)
@@ -1,76 +1,76 @@
-######################## BEGIN LICENSE BLOCK ########################\r
-# The Original Code is mozilla.org code.\r
-#\r
-# The Initial Developer of the Original Code is\r
-# Netscape Communications Corporation.\r
-# Portions created by the Initial Developer are Copyright (C) 1998\r
-# the Initial Developer. All Rights Reserved.\r
-#\r
-# Contributor(s):\r
-#   Mark Pilgrim - port to Python\r
-#\r
-# This library is free software; you can redistribute it and/or\r
-# modify it under the terms of the GNU Lesser General Public\r
-# License as published by the Free Software Foundation; either\r
-# version 2.1 of the License, or (at your option) any later version.\r
-#\r
-# This library is distributed in the hope that it will be useful,\r
-# but WITHOUT ANY WARRANTY; without even the implied warranty of\r
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU\r
-# Lesser General Public License for more details.\r
-#\r
-# You should have received a copy of the GNU Lesser General Public\r
-# License along with this library; if not, write to the Free Software\r
-# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA\r
-# 02110-1301  USA\r
-######################### END LICENSE BLOCK #########################\r
-\r
-from . import constants\r
-from .charsetprober import CharSetProber\r
-from .codingstatemachine import CodingStateMachine\r
-from .mbcssm import UTF8SMModel\r
-\r
-ONE_CHAR_PROB = 0.5\r
-\r
-\r
-class UTF8Prober(CharSetProber):\r
-    def __init__(self):\r
-        CharSetProber.__init__(self)\r
-        self._mCodingSM = CodingStateMachine(UTF8SMModel)\r
-        self.reset()\r
-\r
-    def reset(self):\r
-        CharSetProber.reset(self)\r
-        self._mCodingSM.reset()\r
-        self._mNumOfMBChar = 0\r
-\r
-    def get_charset_name(self):\r
-        return "utf-8"\r
-\r
-    def feed(self, aBuf):\r
-        for c in aBuf:\r
-            codingState = self._mCodingSM.next_state(c)\r
-            if codingState == constants.eError:\r
-                self._mState = constants.eNotMe\r
-                break\r
-            elif codingState == constants.eItsMe:\r
-                self._mState = constants.eFoundIt\r
-                break\r
-            elif codingState == constants.eStart:\r
-                if self._mCodingSM.get_current_charlen() >= 2:\r
-                    self._mNumOfMBChar += 1\r
-\r
-        if self.get_state() == constants.eDetecting:\r
-            if self.get_confidence() > constants.SHORTCUT_THRESHOLD:\r
-                self._mState = constants.eFoundIt\r
-\r
-        return self.get_state()\r
-\r
-    def get_confidence(self):\r
-        unlike = 0.99\r
-        if self._mNumOfMBChar < 6:\r
-            for i in range(0, self._mNumOfMBChar):\r
-                unlike = unlike * ONE_CHAR_PROB\r
-            return 1.0 - unlike\r
-        else:\r
-            return unlike\r
+######################## BEGIN LICENSE BLOCK ########################
+# The Original Code is mozilla.org code.
+#
+# The Initial Developer of the Original Code is
+# Netscape Communications Corporation.
+# Portions created by the Initial Developer are Copyright (C) 1998
+# the Initial Developer. All Rights Reserved.
+#
+# Contributor(s):
+#   Mark Pilgrim - port to Python
+#
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
+# 02110-1301  USA
+######################### END LICENSE BLOCK #########################
+
+from . import constants
+from .charsetprober import CharSetProber
+from .codingstatemachine import CodingStateMachine
+from .mbcssm import UTF8SMModel
+
+ONE_CHAR_PROB = 0.5
+
+
+class UTF8Prober(CharSetProber):
+    def __init__(self):
+        CharSetProber.__init__(self)
+        self._mCodingSM = CodingStateMachine(UTF8SMModel)
+        self.reset()
+
+    def reset(self):
+        CharSetProber.reset(self)
+        self._mCodingSM.reset()
+        self._mNumOfMBChar = 0
+
+    def get_charset_name(self):
+        return "utf-8"
+
+    def feed(self, aBuf):
+        for c in aBuf:
+            codingState = self._mCodingSM.next_state(c)
+            if codingState == constants.eError:
+                self._mState = constants.eNotMe
+                break
+            elif codingState == constants.eItsMe:
+                self._mState = constants.eFoundIt
+                break
+            elif codingState == constants.eStart:
+                if self._mCodingSM.get_current_charlen() >= 2:
+                    self._mNumOfMBChar += 1
+
+        if self.get_state() == constants.eDetecting:
+            if self.get_confidence() > constants.SHORTCUT_THRESHOLD:
+                self._mState = constants.eFoundIt
+
+        return self.get_state()
+
+    def get_confidence(self):
+        unlike = 0.99
+        if self._mNumOfMBChar < 6:
+            for i in range(0, self._mNumOfMBChar):
+                unlike = unlike * ONE_CHAR_PROB
+            return 1.0 - unlike
+        else:
+            return unlike