The other HTTP methods are supported - see `requests.api`. Full documentation
is at <http://python-requests.org>.
-:copyright: (c) 2013 by Kenneth Reitz.
+:copyright: (c) 2014 by Kenneth Reitz.
:license: Apache 2.0, see LICENSE for more details.
"""
__title__ = 'requests'
-__version__ = '2.0.3'
-__build__ = 0x020003
+__version__ = '2.4.1'
+__build__ = 0x020401
__author__ = 'Kenneth Reitz'
__license__ = 'Apache 2.0'
-__copyright__ = 'Copyright 2013 Kenneth Reitz'
+__copyright__ = 'Copyright 2014 Kenneth Reitz'
# Attempt to enable urllib3's SNI support, if possible
try:
import socket
from .models import Response
+from .packages.urllib3 import Retry
from .packages.urllib3.poolmanager import PoolManager, proxy_from_url
from .packages.urllib3.response import HTTPResponse
from .packages.urllib3.util import Timeout as TimeoutSauce
-from .compat import urlparse, basestring, urldefrag, unquote
+from .compat import urlparse, basestring, urldefrag
from .utils import (DEFAULT_CA_BUNDLE_PATH, get_encoding_from_headers,
- except_on_missing_scheme, get_auth_from_url)
+ prepend_scheme_if_needed, get_auth_from_url)
from .structures import CaseInsensitiveDict
-from .packages.urllib3.exceptions import MaxRetryError
-from .packages.urllib3.exceptions import TimeoutError
-from .packages.urllib3.exceptions import SSLError as _SSLError
+from .packages.urllib3.exceptions import ConnectTimeoutError
from .packages.urllib3.exceptions import HTTPError as _HTTPError
+from .packages.urllib3.exceptions import MaxRetryError
from .packages.urllib3.exceptions import ProxyError as _ProxyError
+from .packages.urllib3.exceptions import ProtocolError
+from .packages.urllib3.exceptions import ReadTimeoutError
+from .packages.urllib3.exceptions import SSLError as _SSLError
from .cookies import extract_cookies_to_jar
-from .exceptions import ConnectionError, Timeout, SSLError, ProxyError
+from .exceptions import (ConnectionError, ConnectTimeout, ReadTimeout, SSLError,
+ ProxyError)
from .auth import _basic_auth_str
DEFAULT_POOLBLOCK = False
:param pool_connections: The number of urllib3 connection pools to cache.
:param pool_maxsize: The maximum number of connections to save in the pool.
- :param max_retries: The maximum number of retries each connection should attempt.
+ :param int max_retries: The maximum number of retries each connection
+ should attempt. Note, this applies only to failed connections and
+ timeouts, never to requests where the server returns a response.
:param pool_block: Whether the connection pool should block for connections.
Usage::
>>> import requests
>>> s = requests.Session()
- >>> a = requests.adapters.HTTPAdapter()
+ >>> a = requests.adapters.HTTPAdapter(max_retries=3)
>>> s.mount('http://', a)
"""
__attrs__ = ['max_retries', 'config', '_pool_connections', '_pool_maxsize',
self.__attrs__)
def __setstate__(self, state):
+ # Can't handle by adding 'proxy_manager' to self.__attrs__ because
+ # because self.poolmanager uses a lambda function, which isn't pickleable.
+ self.proxy_manager = {}
+ self.config = {}
+
for attr, value in state.items():
setattr(self, attr, value)
self.init_poolmanager(self._pool_connections, self._pool_maxsize,
block=self._pool_block)
- def init_poolmanager(self, connections, maxsize, block=DEFAULT_POOLBLOCK):
- """Initializes a urllib3 PoolManager. This method should not be called
- from user code, and is only exposed for use when subclassing the
+ def init_poolmanager(self, connections, maxsize, block=DEFAULT_POOLBLOCK, **pool_kwargs):
+ """Initializes a urllib3 PoolManager.
+
+ This method should not be called from user code, and is only
+ exposed for use when subclassing the
:class:`HTTPAdapter <requests.adapters.HTTPAdapter>`.
:param connections: The number of urllib3 connection pools to cache.
:param maxsize: The maximum number of connections to save in the pool.
:param block: Block when no free connections are available.
+ :param pool_kwargs: Extra keyword arguments used to initialize the Pool Manager.
"""
# save these values for pickling
self._pool_connections = connections
self._pool_block = block
self.poolmanager = PoolManager(num_pools=connections, maxsize=maxsize,
- block=block)
+ block=block, **pool_kwargs)
+
+ def proxy_manager_for(self, proxy, **proxy_kwargs):
+ """Return urllib3 ProxyManager for the given proxy.
+
+ This method should not be called from user code, and is only
+ exposed for use when subclassing the
+ :class:`HTTPAdapter <requests.adapters.HTTPAdapter>`.
+
+ :param proxy: The proxy to return a urllib3 ProxyManager for.
+ :param proxy_kwargs: Extra keyword arguments used to configure the Proxy Manager.
+ :returns: ProxyManager
+ """
+ if not proxy in self.proxy_manager:
+ proxy_headers = self.proxy_headers(proxy)
+ self.proxy_manager[proxy] = proxy_from_url(
+ proxy,
+ proxy_headers=proxy_headers,
+ num_pools=self._pool_connections,
+ maxsize=self._pool_maxsize,
+ block=self._pool_block,
+ **proxy_kwargs)
+
+ return self.proxy_manager[proxy]
def cert_verify(self, conn, url, verify, cert):
"""Verify a SSL certificate. This method should not be called from user
if not cert_loc:
cert_loc = DEFAULT_CA_BUNDLE_PATH
+ if not cert_loc:
+ raise Exception("Could not find a suitable SSL CA certificate bundle.")
+
conn.cert_reqs = 'CERT_REQUIRED'
conn.ca_certs = cert_loc
else:
proxy = proxies.get(urlparse(url.lower()).scheme)
if proxy:
- except_on_missing_scheme(proxy)
- proxy_headers = self.proxy_headers(proxy)
-
- if not proxy in self.proxy_manager:
- self.proxy_manager[proxy] = proxy_from_url(
- proxy,
- proxy_headers=proxy_headers)
-
- conn = self.proxy_manager[proxy].connection_from_url(url)
+ proxy = prepend_scheme_if_needed(proxy, 'http')
+ proxy_manager = self.proxy_manager_for(proxy)
+ conn = proxy_manager.connection_from_url(url)
else:
- conn = self.poolmanager.connection_from_url(url.lower())
+ # Only scheme should be lower case
+ parsed = urlparse(url)
+ url = parsed.geturl()
+ conn = self.poolmanager.connection_from_url(url)
return conn
:param proxies: A dictionary of schemes to proxy URLs.
"""
proxies = proxies or {}
- scheme = urlparse(request.url).scheme.lower()
+ scheme = urlparse(request.url).scheme
proxy = proxies.get(scheme)
if proxy and scheme != 'https':
username, password = get_auth_from_url(proxy)
if username and password:
- # Proxy auth usernames and passwords will be urlencoded, we need
- # to decode them.
- username = unquote(username)
- password = unquote(password)
headers['Proxy-Authorization'] = _basic_auth_str(username,
password)
:param request: The :class:`PreparedRequest <PreparedRequest>` being sent.
:param stream: (optional) Whether to stream the request content.
- :param timeout: (optional) The timeout on the request.
+ :param timeout: (optional) How long to wait for the server to send
+ data before giving up, as a float, or a (`connect timeout, read
+ timeout <user/advanced.html#timeouts>`_) tuple.
+ :type timeout: float or tuple
:param verify: (optional) Whether to verify SSL certificates.
- :param vert: (optional) Any user-provided SSL certificate to be trusted.
+ :param cert: (optional) Any user-provided SSL certificate to be trusted.
:param proxies: (optional) The proxies dictionary to apply to the request.
"""
chunked = not (request.body is None or 'Content-Length' in request.headers)
- if stream:
- timeout = TimeoutSauce(connect=timeout)
+ if isinstance(timeout, tuple):
+ try:
+ connect, read = timeout
+ timeout = TimeoutSauce(connect=connect, read=read)
+ except ValueError as e:
+ # this may raise a string formatting error.
+ err = ("Invalid timeout {0}. Pass a (connect, read) "
+ "timeout tuple, or a single float to set "
+ "both timeouts to the same value".format(timeout))
+ raise ValueError(err)
else:
timeout = TimeoutSauce(connect=timeout, read=timeout)
assert_same_host=False,
preload_content=False,
decode_content=False,
- retries=self.max_retries,
+ retries=Retry(self.max_retries, read=False),
timeout=timeout
)
conn = conn.proxy_pool
low_conn = conn._get_conn(timeout=timeout)
- low_conn.putrequest(request.method, url, skip_accept_encoding=True)
-
- for header, value in request.headers.items():
- low_conn.putheader(header, value)
-
- low_conn.endheaders()
- for i in request.body:
- low_conn.send(hex(len(i))[2:].encode('utf-8'))
- low_conn.send(b'\r\n')
- low_conn.send(i)
- low_conn.send(b'\r\n')
- low_conn.send(b'0\r\n\r\n')
-
- r = low_conn.getresponse()
- resp = HTTPResponse.from_httplib(r,
- pool=conn,
- connection=low_conn,
- preload_content=False,
- decode_content=False
- )
-
- except socket.error as sockerr:
- raise ConnectionError(sockerr)
+ try:
+ low_conn.putrequest(request.method,
+ url,
+ skip_accept_encoding=True)
+
+ for header, value in request.headers.items():
+ low_conn.putheader(header, value)
+
+ low_conn.endheaders()
+
+ for i in request.body:
+ low_conn.send(hex(len(i))[2:].encode('utf-8'))
+ low_conn.send(b'\r\n')
+ low_conn.send(i)
+ low_conn.send(b'\r\n')
+ low_conn.send(b'0\r\n\r\n')
+
+ r = low_conn.getresponse()
+ resp = HTTPResponse.from_httplib(
+ r,
+ pool=conn,
+ connection=low_conn,
+ preload_content=False,
+ decode_content=False
+ )
+ except:
+ # If we hit any problems here, clean up the connection.
+ # Then, reraise so that we can handle the actual exception.
+ low_conn.close()
+ raise
+ else:
+ # All is well, return the connection to the pool.
+ conn._put_conn(low_conn)
+
+ except (ProtocolError, socket.error) as err:
+ raise ConnectionError(err, request=request)
except MaxRetryError as e:
- raise ConnectionError(e)
+ if isinstance(e.reason, ConnectTimeoutError):
+ raise ConnectTimeout(e, request=request)
+
+ raise ConnectionError(e, request=request)
except _ProxyError as e:
raise ProxyError(e)
except (_SSLError, _HTTPError) as e:
if isinstance(e, _SSLError):
- raise SSLError(e)
- elif isinstance(e, TimeoutError):
- raise Timeout(e)
+ raise SSLError(e, request=request)
+ elif isinstance(e, ReadTimeoutError):
+ raise ReadTimeout(e, request=request)
else:
raise
- r = self.build_response(request, resp)
-
- if not stream:
- r.content
-
- return r
+ return self.build_response(request, resp)
:param data: (optional) Dictionary, bytes, or file-like object to send in the body of the :class:`Request`.
:param headers: (optional) Dictionary of HTTP Headers to send with the :class:`Request`.
:param cookies: (optional) Dict or CookieJar object to send with the :class:`Request`.
- :param files: (optional) Dictionary of 'name': file-like-objects (or {'name': ('filename', fileobj)}) for multipart encoding upload.
+ :param files: (optional) Dictionary of ``'name': file-like-objects`` (or ``{'name': ('filename', fileobj)}``) for multipart encoding upload.
:param auth: (optional) Auth tuple to enable Basic/Digest/Custom HTTP Auth.
- :param timeout: (optional) Float describing the timeout of the request.
+ :param timeout: (optional) How long to wait for the server to send data
+ before giving up, as a float, or a (`connect timeout, read timeout
+ <user/advanced.html#timeouts>`_) tuple.
+ :type timeout: float or tuple
:param allow_redirects: (optional) Boolean. Set to True if POST/PUT/DELETE redirect following is allowed.
+ :type allow_redirects: bool
:param proxies: (optional) Dictionary mapping protocol to the URL of the proxy.
:param verify: (optional) if ``True``, the SSL cert will be verified. A CA_BUNDLE path can also be provided.
:param stream: (optional) if ``False``, the response content will be immediately downloaded.
import re
import time
import hashlib
-import logging
from base64 import b64encode
from .compat import urlparse, str
-from .utils import parse_dict_header
-
-log = logging.getLogger(__name__)
+from .cookies import extract_cookies_to_jar
+from .utils import parse_dict_header, to_native_string
CONTENT_TYPE_FORM_URLENCODED = 'application/x-www-form-urlencoded'
CONTENT_TYPE_MULTI_PART = 'multipart/form-data'
def _basic_auth_str(username, password):
"""Returns a Basic Auth string."""
- return 'Basic ' + b64encode(('%s:%s' % (username, password)).encode('latin1')).strip().decode('latin1')
+ authstr = 'Basic ' + to_native_string(
+ b64encode(('%s:%s' % (username, password)).encode('latin1')).strip()
+ )
+
+ return authstr
class AuthBase(object):
self.last_nonce = ''
self.nonce_count = 0
self.chal = {}
+ self.pos = None
def build_digest_header(self, method, url):
else:
_algorithm = algorithm.upper()
# lambdas assume digest modules are imported at the top level
- if _algorithm == 'MD5':
+ if _algorithm == 'MD5' or _algorithm == 'MD5-SESS':
def md5_utf8(x):
if isinstance(x, str):
x = x.encode('utf-8')
x = x.encode('utf-8')
return hashlib.sha1(x).hexdigest()
hash_utf8 = sha_utf8
- # XXX MD5-sess
+
KD = lambda s, d: hash_utf8("%s:%s" % (s, d))
if hash_utf8 is None:
A1 = '%s:%s:%s' % (self.username, realm, self.password)
A2 = '%s:%s' % (method, path)
+ HA1 = hash_utf8(A1)
+ HA2 = hash_utf8(A2)
+
+ if nonce == self.last_nonce:
+ self.nonce_count += 1
+ else:
+ self.nonce_count = 1
+ ncvalue = '%08x' % self.nonce_count
+ s = str(self.nonce_count).encode('utf-8')
+ s += nonce.encode('utf-8')
+ s += time.ctime().encode('utf-8')
+ s += os.urandom(8)
+
+ cnonce = (hashlib.sha1(s).hexdigest()[:16])
+ noncebit = "%s:%s:%s:%s:%s" % (nonce, ncvalue, cnonce, qop, HA2)
+ if _algorithm == 'MD5-SESS':
+ HA1 = hash_utf8('%s:%s:%s' % (HA1, nonce, cnonce))
+
if qop is None:
- respdig = KD(hash_utf8(A1), "%s:%s" % (nonce, hash_utf8(A2)))
+ respdig = KD(HA1, "%s:%s" % (nonce, HA2))
elif qop == 'auth' or 'auth' in qop.split(','):
- if nonce == self.last_nonce:
- self.nonce_count += 1
- else:
- self.nonce_count = 1
-
- ncvalue = '%08x' % self.nonce_count
- s = str(self.nonce_count).encode('utf-8')
- s += nonce.encode('utf-8')
- s += time.ctime().encode('utf-8')
- s += os.urandom(8)
-
- cnonce = (hashlib.sha1(s).hexdigest()[:16])
- noncebit = "%s:%s:%s:%s:%s" % (nonce, ncvalue, cnonce, qop, hash_utf8(A2))
- respdig = KD(hash_utf8(A1), noncebit)
+ respdig = KD(HA1, noncebit)
else:
# XXX handle auth-int.
return None
if entdig:
base += ', digest="%s"' % entdig
if qop:
- base += ', qop=auth, nc=%s, cnonce="%s"' % (ncvalue, cnonce)
+ base += ', qop="auth", nc=%s, cnonce="%s"' % (ncvalue, cnonce)
return 'Digest %s' % (base)
def handle_401(self, r, **kwargs):
"""Takes the given response and tries digest-auth, if needed."""
+ if self.pos is not None:
+ # Rewind the file position indicator of the body to where
+ # it was to resend the request.
+ r.request.body.seek(self.pos)
num_401_calls = getattr(self, 'num_401_calls', 1)
s_auth = r.headers.get('www-authenticate', '')
r.content
r.raw.release_conn()
prep = r.request.copy()
- prep.prepare_cookies(r.cookies)
+ extract_cookies_to_jar(prep._cookies, r.request, r.raw)
+ prep.prepare_cookies(prep._cookies)
prep.headers['Authorization'] = self.build_digest_header(
prep.method, prep.url)
# If we have a saved nonce, skip the 401
if self.last_nonce:
r.headers['Authorization'] = self.build_digest_header(r.method, r.url)
+ try:
+ self.pos = r.body.tell()
+ except AttributeError:
+ pass
r.register_hook('response', self.handle_401)
return r
quGnM/b9Sh/22WA=
-----END CERTIFICATE-----
-# Issuer: CN=Wells Fargo Root Certificate Authority O=Wells Fargo OU=Wells Fargo Certification Authority
-# Subject: CN=Wells Fargo Root Certificate Authority O=Wells Fargo OU=Wells Fargo Certification Authority
-# Label: "Wells Fargo Root CA"
-# Serial: 971282334
-# MD5 Fingerprint: 20:0b:4a:7a:88:a7:a9:42:86:8a:5f:74:56:7b:88:05
-# SHA1 Fingerprint: 93:e6:ab:22:03:03:b5:23:28:dc:da:56:9e:ba:e4:d1:d1:cc:fb:65
-# SHA256 Fingerprint: 03:45:8b:6a:be:ec:c2:14:95:3d:97:14:9a:f4:53:91:69:1d:e9:f9:cd:cc:26:47:86:3a:3d:67:c9:5c:24:3b
------BEGIN CERTIFICATE-----
-MIID5TCCAs2gAwIBAgIEOeSXnjANBgkqhkiG9w0BAQUFADCBgjELMAkGA1UEBhMC
-VVMxFDASBgNVBAoTC1dlbGxzIEZhcmdvMSwwKgYDVQQLEyNXZWxscyBGYXJnbyBD
-ZXJ0aWZpY2F0aW9uIEF1dGhvcml0eTEvMC0GA1UEAxMmV2VsbHMgRmFyZ28gUm9v
-dCBDZXJ0aWZpY2F0ZSBBdXRob3JpdHkwHhcNMDAxMDExMTY0MTI4WhcNMjEwMTE0
-MTY0MTI4WjCBgjELMAkGA1UEBhMCVVMxFDASBgNVBAoTC1dlbGxzIEZhcmdvMSww
-KgYDVQQLEyNXZWxscyBGYXJnbyBDZXJ0aWZpY2F0aW9uIEF1dGhvcml0eTEvMC0G
-A1UEAxMmV2VsbHMgRmFyZ28gUm9vdCBDZXJ0aWZpY2F0ZSBBdXRob3JpdHkwggEi
-MA0GCSqGSIb3DQEBAQUAA4IBDwAwggEKAoIBAQDVqDM7Jvk0/82bfuUER84A4n13
-5zHCLielTWi5MbqNQ1mXx3Oqfz1cQJ4F5aHiidlMuD+b+Qy0yGIZLEWukR5zcUHE
-SxP9cMIlrCL1dQu3U+SlK93OvRw6esP3E48mVJwWa2uv+9iWsWCaSOAlIiR5NM4O
-JgALTqv9i86C1y8IcGjBqAr5dE8Hq6T54oN+J3N0Prj5OEL8pahbSCOz6+MlsoCu
-ltQKnMJ4msZoGK43YjdeUXWoWGPAUe5AeH6orxqg4bB4nVCMe+ez/I4jsNtlAHCE
-AQgAFG5Uhpq6zPk3EPbg3oQtnaSFN9OH4xXQwReQfhkhahKpdv0SAulPIV4XAgMB
-AAGjYTBfMA8GA1UdEwEB/wQFMAMBAf8wTAYDVR0gBEUwQzBBBgtghkgBhvt7hwcB
-CzAyMDAGCCsGAQUFBwIBFiRodHRwOi8vd3d3LndlbGxzZmFyZ28uY29tL2NlcnRw
-b2xpY3kwDQYJKoZIhvcNAQEFBQADggEBANIn3ZwKdyu7IvICtUpKkfnRLb7kuxpo
-7w6kAOnu5+/u9vnldKTC2FJYxHT7zmu1Oyl5GFrvm+0fazbuSCUlFLZWohDo7qd/
-0D+j0MNdJu4HzMPBJCGHHt8qElNvQRbn7a6U+oxy+hNH8Dx+rn0ROhPs7fpvcmR7
-nX1/Jv16+yWt6j4pf0zjAFcysLPp7VMX2YuyFA4w6OXVE8Zkr8QA1dhYJPz1j+zx
-x32l2w8n0cbyQIjmH/ZhqPRCyLk306m+LFZ4wnKbWV01QIroTmMatukgalHizqSQ
-33ZwmVxwQ023tqcZZE6St8WRPH9IFmV7Fv3L/PvZ1dZPIWU7Sn9Ho/s=
------END CERTIFICATE-----
-
# Issuer: CN=Swisscom Root CA 1 O=Swisscom OU=Digital Certificate Services
# Subject: CN=Swisscom Root CA 1 O=Swisscom OU=Digital Certificate Services
# Label: "Swisscom Root CA 1"
zMOl6W8KjptlwlCFtaOgUxLMVYdh84GuEEZhvUQhuMI9dM9+JDX6HAcOmz0iyu8x
L4ysEr3vQCj8KWefshNPZiTEUxnpHikV7+ZtsH8tZ/3zbBt1RqPlShfppNcL
-----END CERTIFICATE-----
+
+# Issuer: CN=ACCVRAIZ1 O=ACCV OU=PKIACCV
+# Subject: CN=ACCVRAIZ1 O=ACCV OU=PKIACCV
+# Label: "ACCVRAIZ1"
+# Serial: 6828503384748696800
+# MD5 Fingerprint: d0:a0:5a:ee:05:b6:09:94:21:a1:7d:f1:b2:29:82:02
+# SHA1 Fingerprint: 93:05:7a:88:15:c6:4f:ce:88:2f:fa:91:16:52:28:78:bc:53:64:17
+# SHA256 Fingerprint: 9a:6e:c0:12:e1:a7:da:9d:be:34:19:4d:47:8a:d7:c0:db:18:22:fb:07:1d:f1:29:81:49:6e:d1:04:38:41:13
+-----BEGIN CERTIFICATE-----
+MIIH0zCCBbugAwIBAgIIXsO3pkN/pOAwDQYJKoZIhvcNAQEFBQAwQjESMBAGA1UE
+AwwJQUNDVlJBSVoxMRAwDgYDVQQLDAdQS0lBQ0NWMQ0wCwYDVQQKDARBQ0NWMQsw
+CQYDVQQGEwJFUzAeFw0xMTA1MDUwOTM3MzdaFw0zMDEyMzEwOTM3MzdaMEIxEjAQ
+BgNVBAMMCUFDQ1ZSQUlaMTEQMA4GA1UECwwHUEtJQUNDVjENMAsGA1UECgwEQUND
+VjELMAkGA1UEBhMCRVMwggIiMA0GCSqGSIb3DQEBAQUAA4ICDwAwggIKAoICAQCb
+qau/YUqXry+XZpp0X9DZlv3P4uRm7x8fRzPCRKPfmt4ftVTdFXxpNRFvu8gMjmoY
+HtiP2Ra8EEg2XPBjs5BaXCQ316PWywlxufEBcoSwfdtNgM3802/J+Nq2DoLSRYWo
+G2ioPej0RGy9ocLLA76MPhMAhN9KSMDjIgro6TenGEyxCQ0jVn8ETdkXhBilyNpA
+lHPrzg5XPAOBOp0KoVdDaaxXbXmQeOW1tDvYvEyNKKGno6e6Ak4l0Squ7a4DIrhr
+IA8wKFSVf+DuzgpmndFALW4ir50awQUZ0m/A8p/4e7MCQvtQqR0tkw8jq8bBD5L/
+0KIV9VMJcRz/RROE5iZe+OCIHAr8Fraocwa48GOEAqDGWuzndN9wrqODJerWx5eH
+k6fGioozl2A3ED6XPm4pFdahD9GILBKfb6qkxkLrQaLjlUPTAYVtjrs78yM2x/47
+4KElB0iryYl0/wiPgL/AlmXz7uxLaL2diMMxs0Dx6M/2OLuc5NF/1OVYm3z61PMO
+m3WR5LpSLhl+0fXNWhn8ugb2+1KoS5kE3fj5tItQo05iifCHJPqDQsGH+tUtKSpa
+cXpkatcnYGMN285J9Y0fkIkyF/hzQ7jSWpOGYdbhdQrqeWZ2iE9x6wQl1gpaepPl
+uUsXQA+xtrn13k/c4LOsOxFwYIRKQ26ZIMApcQrAZQIDAQABo4ICyzCCAscwfQYI
+KwYBBQUHAQEEcTBvMEwGCCsGAQUFBzAChkBodHRwOi8vd3d3LmFjY3YuZXMvZmls
+ZWFkbWluL0FyY2hpdm9zL2NlcnRpZmljYWRvcy9yYWl6YWNjdjEuY3J0MB8GCCsG
+AQUFBzABhhNodHRwOi8vb2NzcC5hY2N2LmVzMB0GA1UdDgQWBBTSh7Tj3zcnk1X2
+VuqB5TbMjB4/vTAPBgNVHRMBAf8EBTADAQH/MB8GA1UdIwQYMBaAFNKHtOPfNyeT
+VfZW6oHlNsyMHj+9MIIBcwYDVR0gBIIBajCCAWYwggFiBgRVHSAAMIIBWDCCASIG
+CCsGAQUFBwICMIIBFB6CARAAQQB1AHQAbwByAGkAZABhAGQAIABkAGUAIABDAGUA
+cgB0AGkAZgBpAGMAYQBjAGkA8wBuACAAUgBhAO0AegAgAGQAZQAgAGwAYQAgAEEA
+QwBDAFYAIAAoAEEAZwBlAG4AYwBpAGEAIABkAGUAIABUAGUAYwBuAG8AbABvAGcA
+7QBhACAAeQAgAEMAZQByAHQAaQBmAGkAYwBhAGMAaQDzAG4AIABFAGwAZQBjAHQA
+cgDzAG4AaQBjAGEALAAgAEMASQBGACAAUQA0ADYAMAAxADEANQA2AEUAKQAuACAA
+QwBQAFMAIABlAG4AIABoAHQAdABwADoALwAvAHcAdwB3AC4AYQBjAGMAdgAuAGUA
+czAwBggrBgEFBQcCARYkaHR0cDovL3d3dy5hY2N2LmVzL2xlZ2lzbGFjaW9uX2Mu
+aHRtMFUGA1UdHwROMEwwSqBIoEaGRGh0dHA6Ly93d3cuYWNjdi5lcy9maWxlYWRt
+aW4vQXJjaGl2b3MvY2VydGlmaWNhZG9zL3JhaXphY2N2MV9kZXIuY3JsMA4GA1Ud
+DwEB/wQEAwIBBjAXBgNVHREEEDAOgQxhY2N2QGFjY3YuZXMwDQYJKoZIhvcNAQEF
+BQADggIBAJcxAp/n/UNnSEQU5CmH7UwoZtCPNdpNYbdKl02125DgBS4OxnnQ8pdp
+D70ER9m+27Up2pvZrqmZ1dM8MJP1jaGo/AaNRPTKFpV8M9xii6g3+CfYCS0b78gU
+JyCpZET/LtZ1qmxNYEAZSUNUY9rizLpm5U9EelvZaoErQNV/+QEnWCzI7UiRfD+m
+AM/EKXMRNt6GGT6d7hmKG9Ww7Y49nCrADdg9ZuM8Db3VlFzi4qc1GwQA9j9ajepD
+vV+JHanBsMyZ4k0ACtrJJ1vnE5Bc5PUzolVt3OAJTS+xJlsndQAJxGJ3KQhfnlms
+tn6tn1QwIgPBHnFk/vk4CpYY3QIUrCPLBhwepH2NDd4nQeit2hW3sCPdK6jT2iWH
+7ehVRE2I9DZ+hJp4rPcOVkkO1jMl1oRQQmwgEh0q1b688nCBpHBgvgW1m54ERL5h
+I6zppSSMEYCUWqKiuUnSwdzRp+0xESyeGabu4VXhwOrPDYTkF7eifKXeVSUG7szA
+h1xA2syVP1XgNce4hL60Xc16gwFy7ofmXx2utYXGJt/mwZrpHgJHnyqobalbz+xF
+d3+YJ5oyXSrjhO7FmGYvliAd3djDJ9ew+f7Zfc3Qn48LFFhRny+Lwzgt3uiP1o2H
+pPVWQxaZLPSkVrQ0uGE3ycJYgBugl6H8WY3pEfbRD0tVNEYqi4Y7
+-----END CERTIFICATE-----
+
+# Issuer: CN=TWCA Global Root CA O=TAIWAN-CA OU=Root CA
+# Subject: CN=TWCA Global Root CA O=TAIWAN-CA OU=Root CA
+# Label: "TWCA Global Root CA"
+# Serial: 3262
+# MD5 Fingerprint: f9:03:7e:cf:e6:9e:3c:73:7a:2a:90:07:69:ff:2b:96
+# SHA1 Fingerprint: 9c:bb:48:53:f6:a4:f6:d3:52:a4:e8:32:52:55:60:13:f5:ad:af:65
+# SHA256 Fingerprint: 59:76:90:07:f7:68:5d:0f:cd:50:87:2f:9f:95:d5:75:5a:5b:2b:45:7d:81:f3:69:2b:61:0a:98:67:2f:0e:1b
+-----BEGIN CERTIFICATE-----
+MIIFQTCCAymgAwIBAgICDL4wDQYJKoZIhvcNAQELBQAwUTELMAkGA1UEBhMCVFcx
+EjAQBgNVBAoTCVRBSVdBTi1DQTEQMA4GA1UECxMHUm9vdCBDQTEcMBoGA1UEAxMT
+VFdDQSBHbG9iYWwgUm9vdCBDQTAeFw0xMjA2MjcwNjI4MzNaFw0zMDEyMzExNTU5
+NTlaMFExCzAJBgNVBAYTAlRXMRIwEAYDVQQKEwlUQUlXQU4tQ0ExEDAOBgNVBAsT
+B1Jvb3QgQ0ExHDAaBgNVBAMTE1RXQ0EgR2xvYmFsIFJvb3QgQ0EwggIiMA0GCSqG
+SIb3DQEBAQUAA4ICDwAwggIKAoICAQCwBdvI64zEbooh745NnHEKH1Jw7W2CnJfF
+10xORUnLQEK1EjRsGcJ0pDFfhQKX7EMzClPSnIyOt7h52yvVavKOZsTuKwEHktSz
+0ALfUPZVr2YOy+BHYC8rMjk1Ujoog/h7FsYYuGLWRyWRzvAZEk2tY/XTP3VfKfCh
+MBwqoJimFb3u/Rk28OKRQ4/6ytYQJ0lM793B8YVwm8rqqFpD/G2Gb3PpN0Wp8DbH
+zIh1HrtsBv+baz4X7GGqcXzGHaL3SekVtTzWoWH1EfcFbx39Eb7QMAfCKbAJTibc
+46KokWofwpFFiFzlmLhxpRUZyXx1EcxwdE8tmx2RRP1WKKD+u4ZqyPpcC1jcxkt2
+yKsi2XMPpfRaAok/T54igu6idFMqPVMnaR1sjjIsZAAmY2E2TqNGtz99sy2sbZCi
+laLOz9qC5wc0GZbpuCGqKX6mOL6OKUohZnkfs8O1CWfe1tQHRvMq2uYiN2DLgbYP
+oA/pyJV/v1WRBXrPPRXAb94JlAGD1zQbzECl8LibZ9WYkTunhHiVJqRaCPgrdLQA
+BDzfuBSO6N+pjWxnkjMdwLfS7JLIvgm/LCkFbwJrnu+8vyq8W8BQj0FwcYeyTbcE
+qYSjMq+u7msXi7Kx/mzhkIyIqJdIzshNy/MGz19qCkKxHh53L46g5pIOBvwFItIm
+4TFRfTLcDwIDAQABoyMwITAOBgNVHQ8BAf8EBAMCAQYwDwYDVR0TAQH/BAUwAwEB
+/zANBgkqhkiG9w0BAQsFAAOCAgEAXzSBdu+WHdXltdkCY4QWwa6gcFGn90xHNcgL
+1yg9iXHZqjNB6hQbbCEAwGxCGX6faVsgQt+i0trEfJdLjbDorMjupWkEmQqSpqsn
+LhpNgb+E1HAerUf+/UqdM+DyucRFCCEK2mlpc3INvjT+lIutwx4116KD7+U4x6WF
+H6vPNOw/KP4M8VeGTslV9xzU2KV9Bnpv1d8Q34FOIWWxtuEXeZVFBs5fzNxGiWNo
+RI2T9GRwoD2dKAXDOXC4Ynsg/eTb6QihuJ49CcdP+yz4k3ZB3lLg4VfSnQO8d57+
+nile98FRYB/e2guyLXW3Q0iT5/Z5xoRdgFlglPx4mI88k1HtQJAH32RjJMtOcQWh
+15QaiDLxInQirqWm2BJpTGCjAu4r7NRjkgtevi92a6O2JryPA9gK8kxkRr05YuWW
+6zRjESjMlfGt7+/cgFhI6Uu46mWs6fyAtbXIRfmswZ/ZuepiiI7E8UuDEq3mi4TW
+nsLrgxifarsbJGAzcMzs9zLzXNl5fe+epP7JI8Mk7hWSsT2RTyaGvWZzJBPqpK5j
+wa19hAM8EHiGG3njxPPyBJUgriOCxLM6AGK/5jYk4Ve6xx6QddVfP5VhK8E7zeWz
+aGHQRiapIVJpLesux+t3zqY6tQMzT3bR51xUAV3LePTJDL/PEo4XLSNolOer/qmy
+KwbQBM0=
+-----END CERTIFICATE-----
environment, you can change the definition of where() to return a separately
packaged CA bundle.
"""
-
import os.path
-
-def where():
- """Return the preferred certificate bundle."""
- # vendored bundle inside Requests
- return '/etc/ssl/certs/'
+try:
+ from certifi import where
+except ImportError:
+ def where():
+ """Return the preferred certificate bundle."""
+ # vendored bundle inside Requests
+ return os.path.join(os.path.dirname(__file__), 'cacert.pem')
if __name__ == '__main__':
print(where())
pythoncompat
"""
-from .packages import charade as chardet
+from .packages import chardet
import sys
try:
import simplejson as json
-except ImportError:
+except (ImportError, SyntaxError):
+ # simplejson does not support Python 3.2, it thows a SyntaxError
+ # because of u'...' Unicode literals.
import json
# ---------
from Cookie import Morsel
from StringIO import StringIO
from .packages.urllib3.packages.ordered_dict import OrderedDict
- from httplib import IncompleteRead
builtin_str = str
bytes = str
elif is_py3:
- from urllib.parse import urlparse, urlunparse, urljoin, urlsplit, urlencode, quote, unquote, quote_plus, unquote_plus, urldefrag # pylint: disable=E0401,E0611
- from urllib.request import parse_http_list, getproxies, proxy_bypass # pylint: disable=E0401,E0611
- from http import cookiejar as cookielib # pylint: disable=E0401
- from http.cookies import Morsel # pylint: disable=E0401
+ from urllib.parse import urlparse, urlunparse, urljoin, urlsplit, urlencode, quote, unquote, quote_plus, unquote_plus, urldefrag
+ from urllib.request import parse_http_list, getproxies, proxy_bypass
+ from http import cookiejar as cookielib
+ from http.cookies import Morsel
from io import StringIO
from collections import OrderedDict
- from http.client import IncompleteRead # pylint: disable=E0401
builtin_str = str
str = str
import time
import collections
-from .compat import cookielib, urlparse, Morsel
+from .compat import cookielib, urlparse, urlunparse, Morsel
try:
import threading
return self.get_host()
def get_full_url(self):
- return self._r.url
+ # Only return the response's URL if the user hadn't set the Host
+ # header
+ if not self._r.headers.get('Host'):
+ return self._r.url
+ # If they did set it, retrieve it and reconstruct the expected domain
+ host = self._r.headers['Host']
+ parsed = urlparse(self._r.url)
+ # Reconstruct the URL as we expect it
+ return urlunparse([
+ parsed.scheme, host, parsed.path, parsed.params, parsed.query,
+ parsed.fragment
+ ])
def is_unverifiable(self):
return True
self.set_cookie(c)
return c
+ def iterkeys(self):
+ """Dict-like iterkeys() that returns an iterator of names of cookies from the jar.
+ See itervalues() and iteritems()."""
+ for cookie in iter(self):
+ yield cookie.name
+
def keys(self):
"""Dict-like keys() that returns a list of names of cookies from the jar.
See values() and items()."""
- keys = []
+ return list(self.iterkeys())
+
+ def itervalues(self):
+ """Dict-like itervalues() that returns an iterator of values of cookies from the jar.
+ See iterkeys() and iteritems()."""
for cookie in iter(self):
- keys.append(cookie.name)
- return keys
+ yield cookie.value
def values(self):
"""Dict-like values() that returns a list of values of cookies from the jar.
See keys() and items()."""
- values = []
+ return list(self.itervalues())
+
+ def iteritems(self):
+ """Dict-like iteritems() that returns an iterator of name-value tuples from the jar.
+ See iterkeys() and itervalues()."""
for cookie in iter(self):
- values.append(cookie.value)
- return values
+ yield cookie.name, cookie.value
def items(self):
"""Dict-like items() that returns a list of name-value tuples from the jar.
See keys() and values(). Allows client-code to call "dict(RequestsCookieJar)
and get a vanilla python dict of key value pairs."""
- items = []
- for cookie in iter(self):
- items.append((cookie.name, cookie.value))
- return items
+ return list(self.iteritems())
def list_domains(self):
"""Utility method to list all the domains in the jar."""
remove_cookie_by_name(self, name)
def set_cookie(self, cookie, *args, **kwargs):
- if cookie.value.startswith('"') and cookie.value.endswith('"'):
+ if hasattr(cookie.value, 'startswith') and cookie.value.startswith('"') and cookie.value.endswith('"'):
cookie.value = cookie.value.replace('\\"', '')
return super(RequestsCookieJar, self).set_cookie(cookie, *args, **kwargs)
def morsel_to_cookie(morsel):
"""Convert a Morsel object into a Cookie containing the one k/v pair."""
+
expires = None
- if morsel["max-age"]:
- expires = time.time() + morsel["max-age"]
+ if morsel['max-age']:
+ expires = time.time() + morsel['max-age']
elif morsel['expires']:
- expires = morsel['expires']
- if type(expires) == type(""):
- time_template = "%a, %d-%b-%Y %H:%M:%S GMT"
- expires = time.mktime(time.strptime(expires, time_template))
- c = create_cookie(
- name=morsel.key,
- value=morsel.value,
- version=morsel['version'] or 0,
- port=None,
- domain=morsel['domain'],
- path=morsel['path'],
- secure=bool(morsel['secure']),
- expires=expires,
- discard=False,
+ time_template = '%a, %d-%b-%Y %H:%M:%S GMT'
+ expires = time.mktime(
+ time.strptime(morsel['expires'], time_template)) - time.timezone
+ return create_cookie(
comment=morsel['comment'],
comment_url=bool(morsel['comment']),
+ discard=False,
+ domain=morsel['domain'],
+ expires=expires,
+ name=morsel.key,
+ path=morsel['path'],
+ port=None,
rest={'HttpOnly': morsel['httponly']},
- rfc2109=False,)
- return c
+ rfc2109=False,
+ secure=bool(morsel['secure']),
+ value=morsel.value,
+ version=morsel['version'] or 0,
+ )
def cookiejar_from_dict(cookie_dict, cookiejar=None, overwrite=True):
cookiejar.set_cookie(create_cookie(name, cookie_dict[name]))
return cookiejar
+
+
+def merge_cookies(cookiejar, cookies):
+ """Add cookies to cookiejar and returns a merged CookieJar.
+
+ :param cookiejar: CookieJar object to add the cookies to.
+ :param cookies: Dictionary or CookieJar object to be added.
+ """
+ if not isinstance(cookiejar, cookielib.CookieJar):
+ raise ValueError('You can only merge into CookieJar')
+
+ if isinstance(cookies, dict):
+ cookiejar = cookiejar_from_dict(
+ cookies, cookiejar=cookiejar, overwrite=False)
+ elif isinstance(cookies, cookielib.CookieJar):
+ try:
+ cookiejar.update(cookies)
+ except AttributeError:
+ for cookie_in_jar in cookies:
+ cookiejar.set_cookie(cookie_in_jar)
+
+ return cookiejar
This module contains the set of Requests' exceptions.
"""
+from .packages.urllib3.exceptions import HTTPError as BaseHTTPError
class RequestException(IOError):
"""There was an ambiguous exception that occurred while handling your
request."""
+ def __init__(self, *args, **kwargs):
+ """
+ Initialize RequestException with `request` and `response` objects.
+ """
+ response = kwargs.pop('response', None)
+ self.response = response
+ self.request = kwargs.pop('request', None)
+ if (response is not None and not self.request and
+ hasattr(response, 'request')):
+ self.request = self.response.request
+ super(RequestException, self).__init__(*args, **kwargs)
+
class HTTPError(RequestException):
"""An HTTP error occurred."""
- def __init__(self, *args, **kwargs):
- """ Initializes HTTPError with optional `response` object. """
- self.response = kwargs.pop('response', None)
- super(HTTPError, self).__init__(*args, **kwargs)
-
class ConnectionError(RequestException):
"""A Connection error occurred."""
class Timeout(RequestException):
- """The request timed out."""
+ """The request timed out.
+
+ Catching this error will catch both
+ :exc:`~requests.exceptions.ConnectTimeout` and
+ :exc:`~requests.exceptions.ReadTimeout` errors.
+ """
+
+
+class ConnectTimeout(ConnectionError, Timeout):
+ """The request timed out while trying to connect to the remote server.
+
+ Requests that produced this error are safe to retry.
+ """
+
+
+class ReadTimeout(Timeout):
+ """The server did not send any data in the allotted amount of time."""
class URLRequired(RequestException):
class ChunkedEncodingError(RequestException):
"""The server declared chunked encoding but sent an invalid chunk."""
+
+
+class ContentDecodingError(RequestException, BaseHTTPError):
+ """Failed to decode response content"""
"""
import collections
-import logging
import datetime
from io import BytesIO, UnsupportedOperation
from .packages.urllib3.fields import RequestField
from .packages.urllib3.filepost import encode_multipart_formdata
from .packages.urllib3.util import parse_url
+from .packages.urllib3.exceptions import (
+ DecodeError, ReadTimeoutError, ProtocolError)
from .exceptions import (
HTTPError, RequestException, MissingSchema, InvalidURL,
- ChunkedEncodingError)
+ ChunkedEncodingError, ContentDecodingError, ConnectionError)
from .utils import (
guess_filename, get_auth_from_url, requote_uri,
stream_decode_response_unicode, to_key_val_list, parse_header_links,
iter_slices, guess_json_utf, super_len, to_native_string)
from .compat import (
cookielib, urlunparse, urlsplit, urlencode, str, bytes, StringIO,
- is_py2, chardet, json, builtin_str, basestring, IncompleteRead)
-
+ is_py2, chardet, json, builtin_str, basestring)
+from .status_codes import codes
+
+#: The set of HTTP status codes that indicate an automatically
+#: processable redirect.
+REDIRECT_STATI = (
+ codes.moved, # 301
+ codes.found, # 302
+ codes.other, # 303
+ codes.temporary_redirect, # 307
+ codes.permanent_redirect, # 308
+)
+DEFAULT_REDIRECT_LIMIT = 30
CONTENT_CHUNK_SIZE = 10 * 1024
ITER_CHUNK_SIZE = 512
-log = logging.getLogger(__name__)
-
class RequestEncodingMixin(object):
@property
self.url = None
#: dictionary of HTTP headers.
self.headers = None
+ # The `CookieJar` used to create the Cookie header will be stored here
+ # after prepare_cookies is called
+ self._cookies = None
#: request body to send to the server.
self.body = None
#: dictionary of callback hooks, for internal usage.
p = PreparedRequest()
p.method = self.method
p.url = self.url
- p.headers = self.headers.copy()
+ p.headers = self.headers.copy() if self.headers is not None else None
+ p._cookies = self._cookies.copy() if self._cookies is not None else None
p.body = self.body
p.hooks = self.hooks
return p
except UnicodeDecodeError:
pass
+ # Don't do any URL preparation for oddball schemes
+ if ':' in url and not url.lower().startswith('http'):
+ self.url = url
+ return
+
# Support for unicode domain names and paths.
scheme, auth, host, port, path, query, fragment = parse_url(url)
if not scheme:
- raise MissingSchema("Invalid URL %r: No schema supplied" % url)
+ raise MissingSchema("Invalid URL {0!r}: No schema supplied. "
+ "Perhaps you meant http://{0}?".format(url))
if not host:
raise InvalidURL("Invalid URL %r: No host supplied" % url)
is_stream = all([
hasattr(data, '__iter__'),
- not isinstance(data, basestring),
- not isinstance(data, list),
- not isinstance(data, dict)
+ not isinstance(data, (basestring, list, tuple, dict))
])
try:
raise NotImplementedError('Streamed bodies and files are mutually exclusive.')
if length is not None:
- self.headers['Content-Length'] = str(length)
+ self.headers['Content-Length'] = builtin_str(length)
else:
self.headers['Transfer-Encoding'] = 'chunked'
else:
else:
if data:
body = self._encode_params(data)
- if isinstance(data, str) or isinstance(data, builtin_str) or hasattr(data, 'read'):
+ if isinstance(data, basestring) or hasattr(data, 'read'):
content_type = None
else:
content_type = 'application/x-www-form-urlencoded'
def prepare_content_length(self, body):
if hasattr(body, 'seek') and hasattr(body, 'tell'):
body.seek(0, 2)
- self.headers['Content-Length'] = str(body.tell())
+ self.headers['Content-Length'] = builtin_str(body.tell())
body.seek(0, 0)
elif body is not None:
l = super_len(body)
if l:
- self.headers['Content-Length'] = str(l)
+ self.headers['Content-Length'] = builtin_str(l)
elif self.method not in ('GET', 'HEAD'):
self.headers['Content-Length'] = '0'
"""Prepares the given HTTP cookie data."""
if isinstance(cookies, cookielib.CookieJar):
- cookies = cookies
+ self._cookies = cookies
else:
- cookies = cookiejar_from_dict(cookies)
+ self._cookies = cookiejar_from_dict(cookies)
- if 'cookie' not in self.headers:
- cookie_header = get_cookie_header(cookies, self)
- if cookie_header is not None:
- self.headers['Cookie'] = cookie_header
+ cookie_header = get_cookie_header(self._cookies, self)
+ if cookie_header is not None:
+ self.headers['Cookie'] = cookie_header
def prepare_hooks(self, hooks):
"""Prepares the given hooks."""
server's response to an HTTP request.
"""
+ __attrs__ = [
+ '_content',
+ 'status_code',
+ 'headers',
+ 'url',
+ 'history',
+ 'encoding',
+ 'reason',
+ 'cookies',
+ 'elapsed',
+ 'request',
+ ]
+
def __init__(self):
super(Response, self).__init__()
self._content = False
self._content_consumed = False
- #: Integer Code of responded HTTP Status.
+ #: Integer Code of responded HTTP Status, e.g. 404 or 200.
self.status_code = None
#: Case-insensitive Dictionary of Response Headers.
self.headers = CaseInsensitiveDict()
#: File-like object representation of response (for advanced usage).
- #: Requires that ``stream=True` on the request.
+ #: Use of ``raw`` requires that ``stream=True`` be set on the request.
# This requirement does not apply for use internally to Requests.
self.raw = None
#: up here. The list is sorted from the oldest to the most recent request.
self.history = []
+ #: Textual reason of responded HTTP Status, e.g. "Not Found" or "OK".
self.reason = None
#: A CookieJar of Cookies the server sent back.
#: and the arrival of the response (as a timedelta)
self.elapsed = datetime.timedelta(0)
+ #: The :class:`PreparedRequest <PreparedRequest>` object to which this
+ #: is a response.
+ self.request = None
+
+ def __getstate__(self):
+ # Consume everything; accessing the content attribute makes
+ # sure the content has been fully read.
+ if not self._content_consumed:
+ self.content
+
+ return dict(
+ (attr, getattr(self, attr, None))
+ for attr in self.__attrs__
+ )
+
+ def __setstate__(self, state):
+ for name, value in state.items():
+ setattr(self, name, value)
+
+ # pickled objects do not have .raw
+ setattr(self, '_content_consumed', True)
+ setattr(self, 'raw', None)
+
def __repr__(self):
return '<Response [%s]>' % (self.status_code)
return True
@property
+ def is_redirect(self):
+ """True if this Response is a well-formed HTTP redirect that could have
+ been processed automatically (by :meth:`Session.resolve_redirects`).
+ """
+ return ('location' in self.headers and self.status_code in REDIRECT_STATI)
+
+ @property
+ def is_permanent_redirect(self):
+ """True if this Response one of the permanant versions of redirect"""
+ return ('location' in self.headers and self.status_code in (codes.moved_permanently, codes.permanent_redirect))
+
+ @property
def apparent_encoding(self):
- """The apparent encoding, provided by the lovely Charade library
- (Thanks, Ian!)."""
+ """The apparent encoding, provided by the chardet library"""
return chardet.detect(self.content)['encoding']
def iter_content(self, chunk_size=1, decode_unicode=False):
large responses. The chunk size is the number of bytes it should
read into memory. This is not necessarily the length of each item
returned as decoding can take place.
- """
- if self._content_consumed:
- # simulate reading small chunks of the content
- return iter_slices(self._content, chunk_size)
+ If decode_unicode is True, content will be decoded using the best
+ available encoding based on the response.
+ """
def generate():
try:
# Special case for urllib3.
try:
- for chunk in self.raw.stream(chunk_size,
- decode_content=True):
+ for chunk in self.raw.stream(chunk_size, decode_content=True):
yield chunk
- except IncompleteRead as e:
+ except ProtocolError as e:
raise ChunkedEncodingError(e)
+ except DecodeError as e:
+ raise ContentDecodingError(e)
+ except ReadTimeoutError as e:
+ raise ConnectionError(e)
except AttributeError:
# Standard file-like object.
while True:
self._content_consumed = True
- gen = generate()
+ # simulate reading small chunks of the content
+ reused_chunks = iter_slices(self._content, chunk_size)
+
+ stream_chunks = generate()
+
+ chunks = reused_chunks if self._content_consumed else stream_chunks
if decode_unicode:
- gen = stream_decode_response_unicode(gen, self)
+ chunks = stream_decode_response_unicode(chunks, self)
- return gen
+ return chunks
def iter_lines(self, chunk_size=ITER_CHUNK_SIZE, decode_unicode=None):
"""Iterates over the response data, one line at a time. When
pending = None
- for chunk in self.iter_content(chunk_size=chunk_size,
- decode_unicode=decode_unicode):
+ for chunk in self.iter_content(chunk_size=chunk_size, decode_unicode=decode_unicode):
if pending is not None:
chunk = pending + chunk
def text(self):
"""Content of the response, in unicode.
- if Response.encoding is None and chardet module is available, encoding
- will be guessed.
+ If Response.encoding is None, encoding will be guessed using
+ ``chardet``.
+
+ The encoding of the response content is determined based solely on HTTP
+ headers, following RFC 2616 to the letter. If you can take advantage of
+ non-HTTP knowledge to make a better guess at the encoding, you should
+ set ``r.encoding`` appropriately before accessing this property.
"""
# Try charset from content-type
# a best guess).
encoding = guess_json_utf(self.content)
if encoding is not None:
- return json.loads(self.content.decode(encoding), **kwargs)
+ try:
+ return json.loads(self.content.decode(encoding), **kwargs)
+ except UnicodeDecodeError:
+ # Wrong UTF codec detected; usually because it's not UTF-8
+ # but some other 8-bit codec. This is an RFC violation,
+ # and the server didn't bother to tell us what codec *was*
+ # used.
+ pass
return json.loads(self.text, **kwargs)
@property
raise HTTPError(http_error_msg, response=self)
def close(self):
- """Closes the underlying file descriptor and releases the connection
- back to the pool.
+ """Releases the connection back to the pool. Once this method has been
+ called the underlying ``raw`` object must not be accessed again.
*Note: Should not normally need to be called explicitly.*
"""
+++ /dev/null
-'''\r
-support ';python -m charade <file1> [file2] ...' package execution syntax (2.7+)\r
-'''\r
-\r
-from requests.packages.charade import charade_cli\r
-\r
-charade_cli()\r
-######################## BEGIN LICENSE BLOCK ########################\r
-# This library is free software; you can redistribute it and/or\r
-# modify it under the terms of the GNU Lesser General Public\r
-# License as published by the Free Software Foundation; either\r
-# version 2.1 of the License, or (at your option) any later version.\r
-#\r
-# This library is distributed in the hope that it will be useful,\r
-# but WITHOUT ANY WARRANTY; without even the implied warranty of\r
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU\r
-# Lesser General Public License for more details.\r
-#\r
-# You should have received a copy of the GNU Lesser General Public\r
-# License along with this library; if not, write to the Free Software\r
-# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA\r
-# 02110-1301 USA\r
-######################### END LICENSE BLOCK #########################\r
-\r
-__version__ = "1.0.3"\r
-from sys import version_info\r
-\r
-\r
-def detect(aBuf):\r
- if ((version_info < (3, 0) and isinstance(aBuf, unicode)) or\r
- (version_info >= (3, 0) and not isinstance(aBuf, bytes))):\r
- raise ValueError('Expected a bytes object, not a unicode object')\r
-\r
- from . import universaldetector\r
- u = universaldetector.UniversalDetector()\r
- u.reset()\r
- u.feed(aBuf)\r
- u.close()\r
- return u.result\r
-\r
-def _description_of(path):\r
- """Return a string describing the probable encoding of a file."""\r
- from requests.packages.charade.universaldetector import UniversalDetector\r
- u = UniversalDetector()\r
- for line in open(path, 'rb'):\r
- u.feed(line)\r
- u.close()\r
- result = u.result\r
- if result['encoding']:\r
- return '%s: %s with confidence %s' % (path,\r
- result['encoding'],\r
- result['confidence'])\r
- else:\r
- return '%s: no result' % path\r
-\r
-\r
-def charade_cli():\r
- """\r
- Script which takes one or more file paths and reports on their detected\r
- encodings\r
-\r
- Example::\r
-\r
- % chardetect.py somefile someotherfile\r
- somefile: windows-1252 with confidence 0.5\r
- someotherfile: ascii with confidence 1.0\r
-\r
- """\r
- from sys import argv\r
- for path in argv[1:]:\r
- print(_description_of(path))\r
-\r
+######################## BEGIN LICENSE BLOCK ########################
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
+# 02110-1301 USA
+######################### END LICENSE BLOCK #########################
+
+__version__ = "2.2.1"
+from sys import version_info
+
+
+def detect(aBuf):
+ if ((version_info < (3, 0) and isinstance(aBuf, unicode)) or
+ (version_info >= (3, 0) and not isinstance(aBuf, bytes))):
+ raise ValueError('Expected a bytes object, not a unicode object')
+
+ from . import universaldetector
+ u = universaldetector.UniversalDetector()
+ u.reset()
+ u.feed(aBuf)
+ u.close()
+ return u.result
-######################## BEGIN LICENSE BLOCK ########################\r
-# The Original Code is Mozilla Communicator client code.\r
-#\r
-# The Initial Developer of the Original Code is\r
-# Netscape Communications Corporation.\r
-# Portions created by the Initial Developer are Copyright (C) 1998\r
-# the Initial Developer. All Rights Reserved.\r
-#\r
-# Contributor(s):\r
-# Mark Pilgrim - port to Python\r
-#\r
-# This library is free software; you can redistribute it and/or\r
-# modify it under the terms of the GNU Lesser General Public\r
-# License as published by the Free Software Foundation; either\r
-# version 2.1 of the License, or (at your option) any later version.\r
-#\r
-# This library is distributed in the hope that it will be useful,\r
-# but WITHOUT ANY WARRANTY; without even the implied warranty of\r
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU\r
-# Lesser General Public License for more details.\r
-#\r
-# You should have received a copy of the GNU Lesser General Public\r
-# License along with this library; if not, write to the Free Software\r
-# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA\r
-# 02110-1301 USA\r
-######################### END LICENSE BLOCK #########################\r
-\r
-from .mbcharsetprober import MultiByteCharSetProber\r
-from .codingstatemachine import CodingStateMachine\r
-from .chardistribution import Big5DistributionAnalysis\r
-from .mbcssm import Big5SMModel\r
-\r
-\r
-class Big5Prober(MultiByteCharSetProber):\r
- def __init__(self):\r
- MultiByteCharSetProber.__init__(self)\r
- self._mCodingSM = CodingStateMachine(Big5SMModel)\r
- self._mDistributionAnalyzer = Big5DistributionAnalysis()\r
- self.reset()\r
-\r
- def get_charset_name(self):\r
- return "Big5"\r
+######################## BEGIN LICENSE BLOCK ########################
+# The Original Code is Mozilla Communicator client code.
+#
+# The Initial Developer of the Original Code is
+# Netscape Communications Corporation.
+# Portions created by the Initial Developer are Copyright (C) 1998
+# the Initial Developer. All Rights Reserved.
+#
+# Contributor(s):
+# Mark Pilgrim - port to Python
+#
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
+# 02110-1301 USA
+######################### END LICENSE BLOCK #########################
+
+from .mbcharsetprober import MultiByteCharSetProber
+from .codingstatemachine import CodingStateMachine
+from .chardistribution import Big5DistributionAnalysis
+from .mbcssm import Big5SMModel
+
+
+class Big5Prober(MultiByteCharSetProber):
+ def __init__(self):
+ MultiByteCharSetProber.__init__(self)
+ self._mCodingSM = CodingStateMachine(Big5SMModel)
+ self._mDistributionAnalyzer = Big5DistributionAnalysis()
+ self.reset()
+
+ def get_charset_name(self):
+ return "Big5"
--- /dev/null
+#!/usr/bin/env python
+"""
+Script which takes one or more file paths and reports on their detected
+encodings
+
+Example::
+
+ % chardetect somefile someotherfile
+ somefile: windows-1252 with confidence 0.5
+ someotherfile: ascii with confidence 1.0
+
+If no paths are provided, it takes its input from stdin.
+
+"""
+from io import open
+from sys import argv, stdin
+
+from chardet.universaldetector import UniversalDetector
+
+
+def description_of(file, name='stdin'):
+ """Return a string describing the probable encoding of a file."""
+ u = UniversalDetector()
+ for line in file:
+ u.feed(line)
+ u.close()
+ result = u.result
+ if result['encoding']:
+ return '%s: %s with confidence %s' % (name,
+ result['encoding'],
+ result['confidence'])
+ else:
+ return '%s: no result' % name
+
+
+def main():
+ if len(argv) <= 1:
+ print(description_of(stdin))
+ else:
+ for path in argv[1:]:
+ with open(path, 'rb') as f:
+ print(description_of(f, path))
+
+
+if __name__ == '__main__':
+ main()
-######################## BEGIN LICENSE BLOCK ########################\r
-# The Original Code is Mozilla Communicator client code.\r
-#\r
-# The Initial Developer of the Original Code is\r
-# Netscape Communications Corporation.\r
-# Portions created by the Initial Developer are Copyright (C) 1998\r
-# the Initial Developer. All Rights Reserved.\r
-#\r
-# Contributor(s):\r
-# Mark Pilgrim - port to Python\r
-#\r
-# This library is free software; you can redistribute it and/or\r
-# modify it under the terms of the GNU Lesser General Public\r
-# License as published by the Free Software Foundation; either\r
-# version 2.1 of the License, or (at your option) any later version.\r
-#\r
-# This library is distributed in the hope that it will be useful,\r
-# but WITHOUT ANY WARRANTY; without even the implied warranty of\r
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU\r
-# Lesser General Public License for more details.\r
-#\r
-# You should have received a copy of the GNU Lesser General Public\r
-# License along with this library; if not, write to the Free Software\r
-# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA\r
-# 02110-1301 USA\r
-######################### END LICENSE BLOCK #########################\r
-\r
-from .euctwfreq import (EUCTWCharToFreqOrder, EUCTW_TABLE_SIZE,\r
- EUCTW_TYPICAL_DISTRIBUTION_RATIO)\r
-from .euckrfreq import (EUCKRCharToFreqOrder, EUCKR_TABLE_SIZE,\r
- EUCKR_TYPICAL_DISTRIBUTION_RATIO)\r
-from .gb2312freq import (GB2312CharToFreqOrder, GB2312_TABLE_SIZE,\r
- GB2312_TYPICAL_DISTRIBUTION_RATIO)\r
-from .big5freq import (Big5CharToFreqOrder, BIG5_TABLE_SIZE,\r
- BIG5_TYPICAL_DISTRIBUTION_RATIO)\r
-from .jisfreq import (JISCharToFreqOrder, JIS_TABLE_SIZE,\r
- JIS_TYPICAL_DISTRIBUTION_RATIO)\r
-from .compat import wrap_ord\r
-\r
-ENOUGH_DATA_THRESHOLD = 1024\r
-SURE_YES = 0.99\r
-SURE_NO = 0.01\r
-MINIMUM_DATA_THRESHOLD = 3\r
-\r
-\r
-class CharDistributionAnalysis:\r
- def __init__(self):\r
- # Mapping table to get frequency order from char order (get from\r
- # GetOrder())\r
- self._mCharToFreqOrder = None\r
- self._mTableSize = None # Size of above table\r
- # This is a constant value which varies from language to language,\r
- # used in calculating confidence. See\r
- # http://www.mozilla.org/projects/intl/UniversalCharsetDetection.html\r
- # for further detail.\r
- self._mTypicalDistributionRatio = None\r
- self.reset()\r
-\r
- def reset(self):\r
- """reset analyser, clear any state"""\r
- # If this flag is set to True, detection is done and conclusion has\r
- # been made\r
- self._mDone = False\r
- self._mTotalChars = 0 # Total characters encountered\r
- # The number of characters whose frequency order is less than 512\r
- self._mFreqChars = 0\r
-\r
- def feed(self, aBuf, aCharLen):\r
- """feed a character with known length"""\r
- if aCharLen == 2:\r
- # we only care about 2-bytes character in our distribution analysis\r
- order = self.get_order(aBuf)\r
- else:\r
- order = -1\r
- if order >= 0:\r
- self._mTotalChars += 1\r
- # order is valid\r
- if order < self._mTableSize:\r
- if 512 > self._mCharToFreqOrder[order]: #pylint: disable=unsubscriptable-object\r
- self._mFreqChars += 1\r
-\r
- def get_confidence(self):\r
- """return confidence based on existing data"""\r
- # if we didn't receive any character in our consideration range,\r
- # return negative answer\r
- if self._mTotalChars <= 0 or self._mFreqChars <= MINIMUM_DATA_THRESHOLD:\r
- return SURE_NO\r
-\r
- if self._mTotalChars != self._mFreqChars:\r
- r = (self._mFreqChars / ((self._mTotalChars - self._mFreqChars)\r
- * self._mTypicalDistributionRatio))\r
- if r < SURE_YES:\r
- return r\r
-\r
- # normalize confidence (we don't want to be 100% sure)\r
- return SURE_YES\r
-\r
- def got_enough_data(self):\r
- # It is not necessary to receive all data to draw conclusion.\r
- # For charset detection, certain amount of data is enough\r
- return self._mTotalChars > ENOUGH_DATA_THRESHOLD\r
-\r
- def get_order(self, aBuf):\r
- # We do not handle characters based on the original encoding string,\r
- # but convert this encoding string to a number, here called order.\r
- # This allows multiple encodings of a language to share one frequency\r
- # table.\r
- return -1\r
-\r
-\r
-class EUCTWDistributionAnalysis(CharDistributionAnalysis):\r
- def __init__(self):\r
- CharDistributionAnalysis.__init__(self)\r
- self._mCharToFreqOrder = EUCTWCharToFreqOrder\r
- self._mTableSize = EUCTW_TABLE_SIZE\r
- self._mTypicalDistributionRatio = EUCTW_TYPICAL_DISTRIBUTION_RATIO\r
-\r
- def get_order(self, aBuf):\r
- # for euc-TW encoding, we are interested\r
- # first byte range: 0xc4 -- 0xfe\r
- # second byte range: 0xa1 -- 0xfe\r
- # no validation needed here. State machine has done that\r
- first_char = wrap_ord(aBuf[0])\r
- if first_char >= 0xC4:\r
- return 94 * (first_char - 0xC4) + wrap_ord(aBuf[1]) - 0xA1\r
- else:\r
- return -1\r
-\r
-\r
-class EUCKRDistributionAnalysis(CharDistributionAnalysis):\r
- def __init__(self):\r
- CharDistributionAnalysis.__init__(self)\r
- self._mCharToFreqOrder = EUCKRCharToFreqOrder\r
- self._mTableSize = EUCKR_TABLE_SIZE\r
- self._mTypicalDistributionRatio = EUCKR_TYPICAL_DISTRIBUTION_RATIO\r
-\r
- def get_order(self, aBuf):\r
- # for euc-KR encoding, we are interested\r
- # first byte range: 0xb0 -- 0xfe\r
- # second byte range: 0xa1 -- 0xfe\r
- # no validation needed here. State machine has done that\r
- first_char = wrap_ord(aBuf[0])\r
- if first_char >= 0xB0:\r
- return 94 * (first_char - 0xB0) + wrap_ord(aBuf[1]) - 0xA1\r
- else:\r
- return -1\r
-\r
-\r
-class GB2312DistributionAnalysis(CharDistributionAnalysis):\r
- def __init__(self):\r
- CharDistributionAnalysis.__init__(self)\r
- self._mCharToFreqOrder = GB2312CharToFreqOrder\r
- self._mTableSize = GB2312_TABLE_SIZE\r
- self._mTypicalDistributionRatio = GB2312_TYPICAL_DISTRIBUTION_RATIO\r
-\r
- def get_order(self, aBuf):\r
- # for GB2312 encoding, we are interested\r
- # first byte range: 0xb0 -- 0xfe\r
- # second byte range: 0xa1 -- 0xfe\r
- # no validation needed here. State machine has done that\r
- first_char, second_char = wrap_ord(aBuf[0]), wrap_ord(aBuf[1])\r
- if (first_char >= 0xB0) and (second_char >= 0xA1):\r
- return 94 * (first_char - 0xB0) + second_char - 0xA1\r
- else:\r
- return -1\r
-\r
-\r
-class Big5DistributionAnalysis(CharDistributionAnalysis):\r
- def __init__(self):\r
- CharDistributionAnalysis.__init__(self)\r
- self._mCharToFreqOrder = Big5CharToFreqOrder\r
- self._mTableSize = BIG5_TABLE_SIZE\r
- self._mTypicalDistributionRatio = BIG5_TYPICAL_DISTRIBUTION_RATIO\r
-\r
- def get_order(self, aBuf):\r
- # for big5 encoding, we are interested\r
- # first byte range: 0xa4 -- 0xfe\r
- # second byte range: 0x40 -- 0x7e , 0xa1 -- 0xfe\r
- # no validation needed here. State machine has done that\r
- first_char, second_char = wrap_ord(aBuf[0]), wrap_ord(aBuf[1])\r
- if first_char >= 0xA4:\r
- if second_char >= 0xA1:\r
- return 157 * (first_char - 0xA4) + second_char - 0xA1 + 63\r
- else:\r
- return 157 * (first_char - 0xA4) + second_char - 0x40\r
- else:\r
- return -1\r
-\r
-\r
-class SJISDistributionAnalysis(CharDistributionAnalysis):\r
- def __init__(self):\r
- CharDistributionAnalysis.__init__(self)\r
- self._mCharToFreqOrder = JISCharToFreqOrder\r
- self._mTableSize = JIS_TABLE_SIZE\r
- self._mTypicalDistributionRatio = JIS_TYPICAL_DISTRIBUTION_RATIO\r
-\r
- def get_order(self, aBuf):\r
- # for sjis encoding, we are interested\r
- # first byte range: 0x81 -- 0x9f , 0xe0 -- 0xfe\r
- # second byte range: 0x40 -- 0x7e, 0x81 -- oxfe\r
- # no validation needed here. State machine has done that\r
- first_char, second_char = wrap_ord(aBuf[0]), wrap_ord(aBuf[1])\r
- if (first_char >= 0x81) and (first_char <= 0x9F):\r
- order = 188 * (first_char - 0x81)\r
- elif (first_char >= 0xE0) and (first_char <= 0xEF):\r
- order = 188 * (first_char - 0xE0 + 31)\r
- else:\r
- return -1\r
- order = order + second_char - 0x40\r
- if second_char > 0x7F:\r
- order = -1\r
- return order\r
-\r
-\r
-class EUCJPDistributionAnalysis(CharDistributionAnalysis):\r
- def __init__(self):\r
- CharDistributionAnalysis.__init__(self)\r
- self._mCharToFreqOrder = JISCharToFreqOrder\r
- self._mTableSize = JIS_TABLE_SIZE\r
- self._mTypicalDistributionRatio = JIS_TYPICAL_DISTRIBUTION_RATIO\r
-\r
- def get_order(self, aBuf):\r
- # for euc-JP encoding, we are interested\r
- # first byte range: 0xa0 -- 0xfe\r
- # second byte range: 0xa1 -- 0xfe\r
- # no validation needed here. State machine has done that\r
- char = wrap_ord(aBuf[0])\r
- if char >= 0xA0:\r
- return 94 * (char - 0xA1) + wrap_ord(aBuf[1]) - 0xa1\r
- else:\r
- return -1\r
+######################## BEGIN LICENSE BLOCK ########################
+# The Original Code is Mozilla Communicator client code.
+#
+# The Initial Developer of the Original Code is
+# Netscape Communications Corporation.
+# Portions created by the Initial Developer are Copyright (C) 1998
+# the Initial Developer. All Rights Reserved.
+#
+# Contributor(s):
+# Mark Pilgrim - port to Python
+#
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
+# 02110-1301 USA
+######################### END LICENSE BLOCK #########################
+
+from .euctwfreq import (EUCTWCharToFreqOrder, EUCTW_TABLE_SIZE,
+ EUCTW_TYPICAL_DISTRIBUTION_RATIO)
+from .euckrfreq import (EUCKRCharToFreqOrder, EUCKR_TABLE_SIZE,
+ EUCKR_TYPICAL_DISTRIBUTION_RATIO)
+from .gb2312freq import (GB2312CharToFreqOrder, GB2312_TABLE_SIZE,
+ GB2312_TYPICAL_DISTRIBUTION_RATIO)
+from .big5freq import (Big5CharToFreqOrder, BIG5_TABLE_SIZE,
+ BIG5_TYPICAL_DISTRIBUTION_RATIO)
+from .jisfreq import (JISCharToFreqOrder, JIS_TABLE_SIZE,
+ JIS_TYPICAL_DISTRIBUTION_RATIO)
+from .compat import wrap_ord
+
+ENOUGH_DATA_THRESHOLD = 1024
+SURE_YES = 0.99
+SURE_NO = 0.01
+MINIMUM_DATA_THRESHOLD = 3
+
+
+class CharDistributionAnalysis:
+ def __init__(self):
+ # Mapping table to get frequency order from char order (get from
+ # GetOrder())
+ self._mCharToFreqOrder = None
+ self._mTableSize = None # Size of above table
+ # This is a constant value which varies from language to language,
+ # used in calculating confidence. See
+ # http://www.mozilla.org/projects/intl/UniversalCharsetDetection.html
+ # for further detail.
+ self._mTypicalDistributionRatio = None
+ self.reset()
+
+ def reset(self):
+ """reset analyser, clear any state"""
+ # If this flag is set to True, detection is done and conclusion has
+ # been made
+ self._mDone = False
+ self._mTotalChars = 0 # Total characters encountered
+ # The number of characters whose frequency order is less than 512
+ self._mFreqChars = 0
+
+ def feed(self, aBuf, aCharLen):
+ """feed a character with known length"""
+ if aCharLen == 2:
+ # we only care about 2-bytes character in our distribution analysis
+ order = self.get_order(aBuf)
+ else:
+ order = -1
+ if order >= 0:
+ self._mTotalChars += 1
+ # order is valid
+ if order < self._mTableSize:
+ if 512 > self._mCharToFreqOrder[order]:
+ self._mFreqChars += 1
+
+ def get_confidence(self):
+ """return confidence based on existing data"""
+ # if we didn't receive any character in our consideration range,
+ # return negative answer
+ if self._mTotalChars <= 0 or self._mFreqChars <= MINIMUM_DATA_THRESHOLD:
+ return SURE_NO
+
+ if self._mTotalChars != self._mFreqChars:
+ r = (self._mFreqChars / ((self._mTotalChars - self._mFreqChars)
+ * self._mTypicalDistributionRatio))
+ if r < SURE_YES:
+ return r
+
+ # normalize confidence (we don't want to be 100% sure)
+ return SURE_YES
+
+ def got_enough_data(self):
+ # It is not necessary to receive all data to draw conclusion.
+ # For charset detection, certain amount of data is enough
+ return self._mTotalChars > ENOUGH_DATA_THRESHOLD
+
+ def get_order(self, aBuf):
+ # We do not handle characters based on the original encoding string,
+ # but convert this encoding string to a number, here called order.
+ # This allows multiple encodings of a language to share one frequency
+ # table.
+ return -1
+
+
+class EUCTWDistributionAnalysis(CharDistributionAnalysis):
+ def __init__(self):
+ CharDistributionAnalysis.__init__(self)
+ self._mCharToFreqOrder = EUCTWCharToFreqOrder
+ self._mTableSize = EUCTW_TABLE_SIZE
+ self._mTypicalDistributionRatio = EUCTW_TYPICAL_DISTRIBUTION_RATIO
+
+ def get_order(self, aBuf):
+ # for euc-TW encoding, we are interested
+ # first byte range: 0xc4 -- 0xfe
+ # second byte range: 0xa1 -- 0xfe
+ # no validation needed here. State machine has done that
+ first_char = wrap_ord(aBuf[0])
+ if first_char >= 0xC4:
+ return 94 * (first_char - 0xC4) + wrap_ord(aBuf[1]) - 0xA1
+ else:
+ return -1
+
+
+class EUCKRDistributionAnalysis(CharDistributionAnalysis):
+ def __init__(self):
+ CharDistributionAnalysis.__init__(self)
+ self._mCharToFreqOrder = EUCKRCharToFreqOrder
+ self._mTableSize = EUCKR_TABLE_SIZE
+ self._mTypicalDistributionRatio = EUCKR_TYPICAL_DISTRIBUTION_RATIO
+
+ def get_order(self, aBuf):
+ # for euc-KR encoding, we are interested
+ # first byte range: 0xb0 -- 0xfe
+ # second byte range: 0xa1 -- 0xfe
+ # no validation needed here. State machine has done that
+ first_char = wrap_ord(aBuf[0])
+ if first_char >= 0xB0:
+ return 94 * (first_char - 0xB0) + wrap_ord(aBuf[1]) - 0xA1
+ else:
+ return -1
+
+
+class GB2312DistributionAnalysis(CharDistributionAnalysis):
+ def __init__(self):
+ CharDistributionAnalysis.__init__(self)
+ self._mCharToFreqOrder = GB2312CharToFreqOrder
+ self._mTableSize = GB2312_TABLE_SIZE
+ self._mTypicalDistributionRatio = GB2312_TYPICAL_DISTRIBUTION_RATIO
+
+ def get_order(self, aBuf):
+ # for GB2312 encoding, we are interested
+ # first byte range: 0xb0 -- 0xfe
+ # second byte range: 0xa1 -- 0xfe
+ # no validation needed here. State machine has done that
+ first_char, second_char = wrap_ord(aBuf[0]), wrap_ord(aBuf[1])
+ if (first_char >= 0xB0) and (second_char >= 0xA1):
+ return 94 * (first_char - 0xB0) + second_char - 0xA1
+ else:
+ return -1
+
+
+class Big5DistributionAnalysis(CharDistributionAnalysis):
+ def __init__(self):
+ CharDistributionAnalysis.__init__(self)
+ self._mCharToFreqOrder = Big5CharToFreqOrder
+ self._mTableSize = BIG5_TABLE_SIZE
+ self._mTypicalDistributionRatio = BIG5_TYPICAL_DISTRIBUTION_RATIO
+
+ def get_order(self, aBuf):
+ # for big5 encoding, we are interested
+ # first byte range: 0xa4 -- 0xfe
+ # second byte range: 0x40 -- 0x7e , 0xa1 -- 0xfe
+ # no validation needed here. State machine has done that
+ first_char, second_char = wrap_ord(aBuf[0]), wrap_ord(aBuf[1])
+ if first_char >= 0xA4:
+ if second_char >= 0xA1:
+ return 157 * (first_char - 0xA4) + second_char - 0xA1 + 63
+ else:
+ return 157 * (first_char - 0xA4) + second_char - 0x40
+ else:
+ return -1
+
+
+class SJISDistributionAnalysis(CharDistributionAnalysis):
+ def __init__(self):
+ CharDistributionAnalysis.__init__(self)
+ self._mCharToFreqOrder = JISCharToFreqOrder
+ self._mTableSize = JIS_TABLE_SIZE
+ self._mTypicalDistributionRatio = JIS_TYPICAL_DISTRIBUTION_RATIO
+
+ def get_order(self, aBuf):
+ # for sjis encoding, we are interested
+ # first byte range: 0x81 -- 0x9f , 0xe0 -- 0xfe
+ # second byte range: 0x40 -- 0x7e, 0x81 -- oxfe
+ # no validation needed here. State machine has done that
+ first_char, second_char = wrap_ord(aBuf[0]), wrap_ord(aBuf[1])
+ if (first_char >= 0x81) and (first_char <= 0x9F):
+ order = 188 * (first_char - 0x81)
+ elif (first_char >= 0xE0) and (first_char <= 0xEF):
+ order = 188 * (first_char - 0xE0 + 31)
+ else:
+ return -1
+ order = order + second_char - 0x40
+ if second_char > 0x7F:
+ order = -1
+ return order
+
+
+class EUCJPDistributionAnalysis(CharDistributionAnalysis):
+ def __init__(self):
+ CharDistributionAnalysis.__init__(self)
+ self._mCharToFreqOrder = JISCharToFreqOrder
+ self._mTableSize = JIS_TABLE_SIZE
+ self._mTypicalDistributionRatio = JIS_TYPICAL_DISTRIBUTION_RATIO
+
+ def get_order(self, aBuf):
+ # for euc-JP encoding, we are interested
+ # first byte range: 0xa0 -- 0xfe
+ # second byte range: 0xa1 -- 0xfe
+ # no validation needed here. State machine has done that
+ char = wrap_ord(aBuf[0])
+ if char >= 0xA0:
+ return 94 * (char - 0xA1) + wrap_ord(aBuf[1]) - 0xa1
+ else:
+ return -1
-######################## BEGIN LICENSE BLOCK ########################\r
-# The Original Code is Mozilla Communicator client code.\r
-# \r
-# The Initial Developer of the Original Code is\r
-# Netscape Communications Corporation.\r
-# Portions created by the Initial Developer are Copyright (C) 1998\r
-# the Initial Developer. All Rights Reserved.\r
-# \r
-# Contributor(s):\r
-# Mark Pilgrim - port to Python\r
-#\r
-# This library is free software; you can redistribute it and/or\r
-# modify it under the terms of the GNU Lesser General Public\r
-# License as published by the Free Software Foundation; either\r
-# version 2.1 of the License, or (at your option) any later version.\r
-# \r
-# This library is distributed in the hope that it will be useful,\r
-# but WITHOUT ANY WARRANTY; without even the implied warranty of\r
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU\r
-# Lesser General Public License for more details.\r
-# \r
-# You should have received a copy of the GNU Lesser General Public\r
-# License along with this library; if not, write to the Free Software\r
-# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA\r
-# 02110-1301 USA\r
-######################### END LICENSE BLOCK #########################\r
-\r
-from . import constants\r
-import sys\r
-from .charsetprober import CharSetProber\r
-\r
-\r
-class CharSetGroupProber(CharSetProber):\r
- def __init__(self):\r
- CharSetProber.__init__(self)\r
- self._mActiveNum = 0\r
- self._mProbers = []\r
- self._mBestGuessProber = None\r
-\r
- def reset(self):\r
- CharSetProber.reset(self)\r
- self._mActiveNum = 0\r
- for prober in self._mProbers:\r
- if prober:\r
- prober.reset()\r
- prober.active = True\r
- self._mActiveNum += 1\r
- self._mBestGuessProber = None\r
-\r
- def get_charset_name(self):\r
- if not self._mBestGuessProber:\r
- self.get_confidence()\r
- if not self._mBestGuessProber:\r
- return None\r
-# self._mBestGuessProber = self._mProbers[0]\r
- return self._mBestGuessProber.get_charset_name()\r
-\r
- def feed(self, aBuf):\r
- for prober in self._mProbers:\r
- if not prober:\r
- continue\r
- if not prober.active:\r
- continue\r
- st = prober.feed(aBuf)\r
- if not st:\r
- continue\r
- if st == constants.eFoundIt:\r
- self._mBestGuessProber = prober\r
- return self.get_state()\r
- elif st == constants.eNotMe:\r
- prober.active = False\r
- self._mActiveNum -= 1\r
- if self._mActiveNum <= 0:\r
- self._mState = constants.eNotMe\r
- return self.get_state()\r
- return self.get_state()\r
-\r
- def get_confidence(self):\r
- st = self.get_state()\r
- if st == constants.eFoundIt:\r
- return 0.99\r
- elif st == constants.eNotMe:\r
- return 0.01\r
- bestConf = 0.0\r
- self._mBestGuessProber = None\r
- for prober in self._mProbers:\r
- if not prober:\r
- continue\r
- if not prober.active:\r
- if constants._debug:\r
- sys.stderr.write(prober.get_charset_name()\r
- + ' not active\n')\r
- continue\r
- cf = prober.get_confidence()\r
- if constants._debug:\r
- sys.stderr.write('%s confidence = %s\n' %\r
- (prober.get_charset_name(), cf))\r
- if bestConf < cf:\r
- bestConf = cf\r
- self._mBestGuessProber = prober\r
- if not self._mBestGuessProber:\r
- return 0.0\r
- return bestConf\r
-# else:\r
-# self._mBestGuessProber = self._mProbers[0]\r
-# return self._mBestGuessProber.get_confidence()\r
+######################## BEGIN LICENSE BLOCK ########################
+# The Original Code is Mozilla Communicator client code.
+#
+# The Initial Developer of the Original Code is
+# Netscape Communications Corporation.
+# Portions created by the Initial Developer are Copyright (C) 1998
+# the Initial Developer. All Rights Reserved.
+#
+# Contributor(s):
+# Mark Pilgrim - port to Python
+#
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
+# 02110-1301 USA
+######################### END LICENSE BLOCK #########################
+
+from . import constants
+import sys
+from .charsetprober import CharSetProber
+
+
+class CharSetGroupProber(CharSetProber):
+ def __init__(self):
+ CharSetProber.__init__(self)
+ self._mActiveNum = 0
+ self._mProbers = []
+ self._mBestGuessProber = None
+
+ def reset(self):
+ CharSetProber.reset(self)
+ self._mActiveNum = 0
+ for prober in self._mProbers:
+ if prober:
+ prober.reset()
+ prober.active = True
+ self._mActiveNum += 1
+ self._mBestGuessProber = None
+
+ def get_charset_name(self):
+ if not self._mBestGuessProber:
+ self.get_confidence()
+ if not self._mBestGuessProber:
+ return None
+# self._mBestGuessProber = self._mProbers[0]
+ return self._mBestGuessProber.get_charset_name()
+
+ def feed(self, aBuf):
+ for prober in self._mProbers:
+ if not prober:
+ continue
+ if not prober.active:
+ continue
+ st = prober.feed(aBuf)
+ if not st:
+ continue
+ if st == constants.eFoundIt:
+ self._mBestGuessProber = prober
+ return self.get_state()
+ elif st == constants.eNotMe:
+ prober.active = False
+ self._mActiveNum -= 1
+ if self._mActiveNum <= 0:
+ self._mState = constants.eNotMe
+ return self.get_state()
+ return self.get_state()
+
+ def get_confidence(self):
+ st = self.get_state()
+ if st == constants.eFoundIt:
+ return 0.99
+ elif st == constants.eNotMe:
+ return 0.01
+ bestConf = 0.0
+ self._mBestGuessProber = None
+ for prober in self._mProbers:
+ if not prober:
+ continue
+ if not prober.active:
+ if constants._debug:
+ sys.stderr.write(prober.get_charset_name()
+ + ' not active\n')
+ continue
+ cf = prober.get_confidence()
+ if constants._debug:
+ sys.stderr.write('%s confidence = %s\n' %
+ (prober.get_charset_name(), cf))
+ if bestConf < cf:
+ bestConf = cf
+ self._mBestGuessProber = prober
+ if not self._mBestGuessProber:
+ return 0.0
+ return bestConf
+# else:
+# self._mBestGuessProber = self._mProbers[0]
+# return self._mBestGuessProber.get_confidence()
-######################## BEGIN LICENSE BLOCK ########################\r
-# The Original Code is mozilla.org code.\r
-#\r
-# The Initial Developer of the Original Code is\r
-# Netscape Communications Corporation.\r
-# Portions created by the Initial Developer are Copyright (C) 1998\r
-# the Initial Developer. All Rights Reserved.\r
-#\r
-# Contributor(s):\r
-# Mark Pilgrim - port to Python\r
-#\r
-# This library is free software; you can redistribute it and/or\r
-# modify it under the terms of the GNU Lesser General Public\r
-# License as published by the Free Software Foundation; either\r
-# version 2.1 of the License, or (at your option) any later version.\r
-#\r
-# This library is distributed in the hope that it will be useful,\r
-# but WITHOUT ANY WARRANTY; without even the implied warranty of\r
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU\r
-# Lesser General Public License for more details.\r
-#\r
-# You should have received a copy of the GNU Lesser General Public\r
-# License along with this library; if not, write to the Free Software\r
-# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA\r
-# 02110-1301 USA\r
-######################### END LICENSE BLOCK #########################\r
-\r
-from .constants import eStart\r
-from .compat import wrap_ord\r
-\r
-\r
-class CodingStateMachine:\r
- def __init__(self, sm):\r
- self._mModel = sm\r
- self._mCurrentBytePos = 0\r
- self._mCurrentCharLen = 0\r
- self.reset()\r
-\r
- def reset(self):\r
- self._mCurrentState = eStart\r
-\r
- def next_state(self, c):\r
- # for each byte we get its class\r
- # if it is first byte, we also get byte length\r
- # PY3K: aBuf is a byte stream, so c is an int, not a byte\r
- byteCls = self._mModel['classTable'][wrap_ord(c)]\r
- if self._mCurrentState == eStart:\r
- self._mCurrentBytePos = 0\r
- self._mCurrentCharLen = self._mModel['charLenTable'][byteCls]\r
- # from byte's class and stateTable, we get its next state\r
- curr_state = (self._mCurrentState * self._mModel['classFactor']\r
- + byteCls)\r
- self._mCurrentState = self._mModel['stateTable'][curr_state]\r
- self._mCurrentBytePos += 1\r
- return self._mCurrentState\r
-\r
- def get_current_charlen(self):\r
- return self._mCurrentCharLen\r
-\r
- def get_coding_state_machine(self):\r
- return self._mModel['name']\r
+######################## BEGIN LICENSE BLOCK ########################
+# The Original Code is mozilla.org code.
+#
+# The Initial Developer of the Original Code is
+# Netscape Communications Corporation.
+# Portions created by the Initial Developer are Copyright (C) 1998
+# the Initial Developer. All Rights Reserved.
+#
+# Contributor(s):
+# Mark Pilgrim - port to Python
+#
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
+# 02110-1301 USA
+######################### END LICENSE BLOCK #########################
+
+from .constants import eStart
+from .compat import wrap_ord
+
+
+class CodingStateMachine:
+ def __init__(self, sm):
+ self._mModel = sm
+ self._mCurrentBytePos = 0
+ self._mCurrentCharLen = 0
+ self.reset()
+
+ def reset(self):
+ self._mCurrentState = eStart
+
+ def next_state(self, c):
+ # for each byte we get its class
+ # if it is first byte, we also get byte length
+ # PY3K: aBuf is a byte stream, so c is an int, not a byte
+ byteCls = self._mModel['classTable'][wrap_ord(c)]
+ if self._mCurrentState == eStart:
+ self._mCurrentBytePos = 0
+ self._mCurrentCharLen = self._mModel['charLenTable'][byteCls]
+ # from byte's class and stateTable, we get its next state
+ curr_state = (self._mCurrentState * self._mModel['classFactor']
+ + byteCls)
+ self._mCurrentState = self._mModel['stateTable'][curr_state]
+ self._mCurrentBytePos += 1
+ return self._mCurrentState
+
+ def get_current_charlen(self):
+ return self._mCurrentCharLen
+
+ def get_coding_state_machine(self):
+ return self._mModel['name']
-######################## BEGIN LICENSE BLOCK ########################\r
-# The Original Code is Mozilla Universal charset detector code.\r
-#\r
-# The Initial Developer of the Original Code is\r
-# Netscape Communications Corporation.\r
-# Portions created by the Initial Developer are Copyright (C) 2001\r
-# the Initial Developer. All Rights Reserved.\r
-#\r
-# Contributor(s):\r
-# Mark Pilgrim - port to Python\r
-# Shy Shalom - original C code\r
-#\r
-# This library is free software; you can redistribute it and/or\r
-# modify it under the terms of the GNU Lesser General Public\r
-# License as published by the Free Software Foundation; either\r
-# version 2.1 of the License, or (at your option) any later version.\r
-# \r
-# This library is distributed in the hope that it will be useful,\r
-# but WITHOUT ANY WARRANTY; without even the implied warranty of\r
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU\r
-# Lesser General Public License for more details.\r
-# \r
-# You should have received a copy of the GNU Lesser General Public\r
-# License along with this library; if not, write to the Free Software\r
-# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA\r
-# 02110-1301 USA\r
-######################### END LICENSE BLOCK #########################\r
-\r
-_debug = 0\r
-\r
-eDetecting = 0\r
-eFoundIt = 1\r
-eNotMe = 2\r
-\r
-eStart = 0\r
-eError = 1\r
-eItsMe = 2\r
-\r
-SHORTCUT_THRESHOLD = 0.95\r
+######################## BEGIN LICENSE BLOCK ########################
+# The Original Code is Mozilla Universal charset detector code.
+#
+# The Initial Developer of the Original Code is
+# Netscape Communications Corporation.
+# Portions created by the Initial Developer are Copyright (C) 2001
+# the Initial Developer. All Rights Reserved.
+#
+# Contributor(s):
+# Mark Pilgrim - port to Python
+# Shy Shalom - original C code
+#
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
+# 02110-1301 USA
+######################### END LICENSE BLOCK #########################
+
+_debug = 0
+
+eDetecting = 0
+eFoundIt = 1
+eNotMe = 2
+
+eStart = 0
+eError = 1
+eItsMe = 2
+
+SHORTCUT_THRESHOLD = 0.95
-######################## BEGIN LICENSE BLOCK ########################\r
-# The Original Code is mozilla.org code.\r
-#\r
-# The Initial Developer of the Original Code is\r
-# Netscape Communications Corporation.\r
-# Portions created by the Initial Developer are Copyright (C) 1998\r
-# the Initial Developer. All Rights Reserved.\r
-#\r
-# Contributor(s):\r
-# Mark Pilgrim - port to Python\r
-#\r
-# This library is free software; you can redistribute it and/or\r
-# modify it under the terms of the GNU Lesser General Public\r
-# License as published by the Free Software Foundation; either\r
-# version 2.1 of the License, or (at your option) any later version.\r
-#\r
-# This library is distributed in the hope that it will be useful,\r
-# but WITHOUT ANY WARRANTY; without even the implied warranty of\r
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU\r
-# Lesser General Public License for more details.\r
-#\r
-# You should have received a copy of the GNU Lesser General Public\r
-# License along with this library; if not, write to the Free Software\r
-# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA\r
-# 02110-1301 USA\r
-######################### END LICENSE BLOCK #########################\r
-\r
-from .mbcharsetprober import MultiByteCharSetProber\r
-from .codingstatemachine import CodingStateMachine\r
-from .chardistribution import EUCKRDistributionAnalysis\r
-from .mbcssm import CP949SMModel\r
-\r
-\r
-class CP949Prober(MultiByteCharSetProber):\r
- def __init__(self):\r
- MultiByteCharSetProber.__init__(self)\r
- self._mCodingSM = CodingStateMachine(CP949SMModel)\r
- # NOTE: CP949 is a superset of EUC-KR, so the distribution should be\r
- # not different.\r
- self._mDistributionAnalyzer = EUCKRDistributionAnalysis()\r
- self.reset()\r
-\r
- def get_charset_name(self):\r
- return "CP949"\r
+######################## BEGIN LICENSE BLOCK ########################
+# The Original Code is mozilla.org code.
+#
+# The Initial Developer of the Original Code is
+# Netscape Communications Corporation.
+# Portions created by the Initial Developer are Copyright (C) 1998
+# the Initial Developer. All Rights Reserved.
+#
+# Contributor(s):
+# Mark Pilgrim - port to Python
+#
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
+# 02110-1301 USA
+######################### END LICENSE BLOCK #########################
+
+from .mbcharsetprober import MultiByteCharSetProber
+from .codingstatemachine import CodingStateMachine
+from .chardistribution import EUCKRDistributionAnalysis
+from .mbcssm import CP949SMModel
+
+
+class CP949Prober(MultiByteCharSetProber):
+ def __init__(self):
+ MultiByteCharSetProber.__init__(self)
+ self._mCodingSM = CodingStateMachine(CP949SMModel)
+ # NOTE: CP949 is a superset of EUC-KR, so the distribution should be
+ # not different.
+ self._mDistributionAnalyzer = EUCKRDistributionAnalysis()
+ self.reset()
+
+ def get_charset_name(self):
+ return "CP949"
-######################## BEGIN LICENSE BLOCK ########################\r
-# The Original Code is mozilla.org code.\r
-#\r
-# The Initial Developer of the Original Code is\r
-# Netscape Communications Corporation.\r
-# Portions created by the Initial Developer are Copyright (C) 1998\r
-# the Initial Developer. All Rights Reserved.\r
-#\r
-# Contributor(s):\r
-# Mark Pilgrim - port to Python\r
-#\r
-# This library is free software; you can redistribute it and/or\r
-# modify it under the terms of the GNU Lesser General Public\r
-# License as published by the Free Software Foundation; either\r
-# version 2.1 of the License, or (at your option) any later version.\r
-#\r
-# This library is distributed in the hope that it will be useful,\r
-# but WITHOUT ANY WARRANTY; without even the implied warranty of\r
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU\r
-# Lesser General Public License for more details.\r
-#\r
-# You should have received a copy of the GNU Lesser General Public\r
-# License along with this library; if not, write to the Free Software\r
-# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA\r
-# 02110-1301 USA\r
-######################### END LICENSE BLOCK #########################\r
-\r
-from . import constants\r
-from .escsm import (HZSMModel, ISO2022CNSMModel, ISO2022JPSMModel,\r
- ISO2022KRSMModel)\r
-from .charsetprober import CharSetProber\r
-from .codingstatemachine import CodingStateMachine\r
-from .compat import wrap_ord\r
-\r
-\r
-class EscCharSetProber(CharSetProber):\r
- def __init__(self):\r
- CharSetProber.__init__(self)\r
- self._mCodingSM = [\r
- CodingStateMachine(HZSMModel),\r
- CodingStateMachine(ISO2022CNSMModel),\r
- CodingStateMachine(ISO2022JPSMModel),\r
- CodingStateMachine(ISO2022KRSMModel)\r
- ]\r
- self.reset()\r
-\r
- def reset(self):\r
- CharSetProber.reset(self)\r
- for codingSM in self._mCodingSM:\r
- if not codingSM:\r
- continue\r
- codingSM.active = True\r
- codingSM.reset()\r
- self._mActiveSM = len(self._mCodingSM)\r
- self._mDetectedCharset = None\r
-\r
- def get_charset_name(self):\r
- return self._mDetectedCharset\r
-\r
- def get_confidence(self):\r
- if self._mDetectedCharset:\r
- return 0.99\r
- else:\r
- return 0.00\r
-\r
- def feed(self, aBuf):\r
- for c in aBuf:\r
- # PY3K: aBuf is a byte array, so c is an int, not a byte\r
- for codingSM in self._mCodingSM:\r
- if not codingSM:\r
- continue\r
- if not codingSM.active:\r
- continue\r
- codingState = codingSM.next_state(wrap_ord(c))\r
- if codingState == constants.eError:\r
- codingSM.active = False\r
- self._mActiveSM -= 1\r
- if self._mActiveSM <= 0:\r
- self._mState = constants.eNotMe\r
- return self.get_state()\r
- elif codingState == constants.eItsMe:\r
- self._mState = constants.eFoundIt\r
- self._mDetectedCharset = codingSM.get_coding_state_machine() # nopep8\r
- return self.get_state()\r
-\r
- return self.get_state()\r
+######################## BEGIN LICENSE BLOCK ########################
+# The Original Code is mozilla.org code.
+#
+# The Initial Developer of the Original Code is
+# Netscape Communications Corporation.
+# Portions created by the Initial Developer are Copyright (C) 1998
+# the Initial Developer. All Rights Reserved.
+#
+# Contributor(s):
+# Mark Pilgrim - port to Python
+#
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
+# 02110-1301 USA
+######################### END LICENSE BLOCK #########################
+
+from . import constants
+from .escsm import (HZSMModel, ISO2022CNSMModel, ISO2022JPSMModel,
+ ISO2022KRSMModel)
+from .charsetprober import CharSetProber
+from .codingstatemachine import CodingStateMachine
+from .compat import wrap_ord
+
+
+class EscCharSetProber(CharSetProber):
+ def __init__(self):
+ CharSetProber.__init__(self)
+ self._mCodingSM = [
+ CodingStateMachine(HZSMModel),
+ CodingStateMachine(ISO2022CNSMModel),
+ CodingStateMachine(ISO2022JPSMModel),
+ CodingStateMachine(ISO2022KRSMModel)
+ ]
+ self.reset()
+
+ def reset(self):
+ CharSetProber.reset(self)
+ for codingSM in self._mCodingSM:
+ if not codingSM:
+ continue
+ codingSM.active = True
+ codingSM.reset()
+ self._mActiveSM = len(self._mCodingSM)
+ self._mDetectedCharset = None
+
+ def get_charset_name(self):
+ return self._mDetectedCharset
+
+ def get_confidence(self):
+ if self._mDetectedCharset:
+ return 0.99
+ else:
+ return 0.00
+
+ def feed(self, aBuf):
+ for c in aBuf:
+ # PY3K: aBuf is a byte array, so c is an int, not a byte
+ for codingSM in self._mCodingSM:
+ if not codingSM:
+ continue
+ if not codingSM.active:
+ continue
+ codingState = codingSM.next_state(wrap_ord(c))
+ if codingState == constants.eError:
+ codingSM.active = False
+ self._mActiveSM -= 1
+ if self._mActiveSM <= 0:
+ self._mState = constants.eNotMe
+ return self.get_state()
+ elif codingState == constants.eItsMe:
+ self._mState = constants.eFoundIt
+ self._mDetectedCharset = codingSM.get_coding_state_machine() # nopep8
+ return self.get_state()
+
+ return self.get_state()
-######################## BEGIN LICENSE BLOCK ########################\r
-# The Original Code is mozilla.org code.\r
-#\r
-# The Initial Developer of the Original Code is\r
-# Netscape Communications Corporation.\r
-# Portions created by the Initial Developer are Copyright (C) 1998\r
-# the Initial Developer. All Rights Reserved.\r
-#\r
-# Contributor(s):\r
-# Mark Pilgrim - port to Python\r
-#\r
-# This library is free software; you can redistribute it and/or\r
-# modify it under the terms of the GNU Lesser General Public\r
-# License as published by the Free Software Foundation; either\r
-# version 2.1 of the License, or (at your option) any later version.\r
-#\r
-# This library is distributed in the hope that it will be useful,\r
-# but WITHOUT ANY WARRANTY; without even the implied warranty of\r
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU\r
-# Lesser General Public License for more details.\r
-#\r
-# You should have received a copy of the GNU Lesser General Public\r
-# License along with this library; if not, write to the Free Software\r
-# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA\r
-# 02110-1301 USA\r
-######################### END LICENSE BLOCK #########################\r
-\r
-from .constants import eStart, eError, eItsMe\r
-\r
-HZ_cls = (\r
-1,0,0,0,0,0,0,0, # 00 - 07\r
-0,0,0,0,0,0,0,0, # 08 - 0f\r
-0,0,0,0,0,0,0,0, # 10 - 17\r
-0,0,0,1,0,0,0,0, # 18 - 1f\r
-0,0,0,0,0,0,0,0, # 20 - 27\r
-0,0,0,0,0,0,0,0, # 28 - 2f\r
-0,0,0,0,0,0,0,0, # 30 - 37\r
-0,0,0,0,0,0,0,0, # 38 - 3f\r
-0,0,0,0,0,0,0,0, # 40 - 47\r
-0,0,0,0,0,0,0,0, # 48 - 4f\r
-0,0,0,0,0,0,0,0, # 50 - 57\r
-0,0,0,0,0,0,0,0, # 58 - 5f\r
-0,0,0,0,0,0,0,0, # 60 - 67\r
-0,0,0,0,0,0,0,0, # 68 - 6f\r
-0,0,0,0,0,0,0,0, # 70 - 77\r
-0,0,0,4,0,5,2,0, # 78 - 7f\r
-1,1,1,1,1,1,1,1, # 80 - 87\r
-1,1,1,1,1,1,1,1, # 88 - 8f\r
-1,1,1,1,1,1,1,1, # 90 - 97\r
-1,1,1,1,1,1,1,1, # 98 - 9f\r
-1,1,1,1,1,1,1,1, # a0 - a7\r
-1,1,1,1,1,1,1,1, # a8 - af\r
-1,1,1,1,1,1,1,1, # b0 - b7\r
-1,1,1,1,1,1,1,1, # b8 - bf\r
-1,1,1,1,1,1,1,1, # c0 - c7\r
-1,1,1,1,1,1,1,1, # c8 - cf\r
-1,1,1,1,1,1,1,1, # d0 - d7\r
-1,1,1,1,1,1,1,1, # d8 - df\r
-1,1,1,1,1,1,1,1, # e0 - e7\r
-1,1,1,1,1,1,1,1, # e8 - ef\r
-1,1,1,1,1,1,1,1, # f0 - f7\r
-1,1,1,1,1,1,1,1, # f8 - ff\r
-)\r
-\r
-HZ_st = (\r
-eStart,eError, 3,eStart,eStart,eStart,eError,eError,# 00-07\r
-eError,eError,eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,# 08-0f\r
-eItsMe,eItsMe,eError,eError,eStart,eStart, 4,eError,# 10-17\r
- 5,eError, 6,eError, 5, 5, 4,eError,# 18-1f\r
- 4,eError, 4, 4, 4,eError, 4,eError,# 20-27\r
- 4,eItsMe,eStart,eStart,eStart,eStart,eStart,eStart,# 28-2f\r
-)\r
-\r
-HZCharLenTable = (0, 0, 0, 0, 0, 0)\r
-\r
-HZSMModel = {'classTable': HZ_cls,\r
- 'classFactor': 6,\r
- 'stateTable': HZ_st,\r
- 'charLenTable': HZCharLenTable,\r
- 'name': "HZ-GB-2312"}\r
-\r
-ISO2022CN_cls = (\r
-2,0,0,0,0,0,0,0, # 00 - 07\r
-0,0,0,0,0,0,0,0, # 08 - 0f\r
-0,0,0,0,0,0,0,0, # 10 - 17\r
-0,0,0,1,0,0,0,0, # 18 - 1f\r
-0,0,0,0,0,0,0,0, # 20 - 27\r
-0,3,0,0,0,0,0,0, # 28 - 2f\r
-0,0,0,0,0,0,0,0, # 30 - 37\r
-0,0,0,0,0,0,0,0, # 38 - 3f\r
-0,0,0,4,0,0,0,0, # 40 - 47\r
-0,0,0,0,0,0,0,0, # 48 - 4f\r
-0,0,0,0,0,0,0,0, # 50 - 57\r
-0,0,0,0,0,0,0,0, # 58 - 5f\r
-0,0,0,0,0,0,0,0, # 60 - 67\r
-0,0,0,0,0,0,0,0, # 68 - 6f\r
-0,0,0,0,0,0,0,0, # 70 - 77\r
-0,0,0,0,0,0,0,0, # 78 - 7f\r
-2,2,2,2,2,2,2,2, # 80 - 87\r
-2,2,2,2,2,2,2,2, # 88 - 8f\r
-2,2,2,2,2,2,2,2, # 90 - 97\r
-2,2,2,2,2,2,2,2, # 98 - 9f\r
-2,2,2,2,2,2,2,2, # a0 - a7\r
-2,2,2,2,2,2,2,2, # a8 - af\r
-2,2,2,2,2,2,2,2, # b0 - b7\r
-2,2,2,2,2,2,2,2, # b8 - bf\r
-2,2,2,2,2,2,2,2, # c0 - c7\r
-2,2,2,2,2,2,2,2, # c8 - cf\r
-2,2,2,2,2,2,2,2, # d0 - d7\r
-2,2,2,2,2,2,2,2, # d8 - df\r
-2,2,2,2,2,2,2,2, # e0 - e7\r
-2,2,2,2,2,2,2,2, # e8 - ef\r
-2,2,2,2,2,2,2,2, # f0 - f7\r
-2,2,2,2,2,2,2,2, # f8 - ff\r
-)\r
-\r
-ISO2022CN_st = (\r
-eStart, 3,eError,eStart,eStart,eStart,eStart,eStart,# 00-07\r
-eStart,eError,eError,eError,eError,eError,eError,eError,# 08-0f\r
-eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,# 10-17\r
-eItsMe,eItsMe,eItsMe,eError,eError,eError, 4,eError,# 18-1f\r
-eError,eError,eError,eItsMe,eError,eError,eError,eError,# 20-27\r
- 5, 6,eError,eError,eError,eError,eError,eError,# 28-2f\r
-eError,eError,eError,eItsMe,eError,eError,eError,eError,# 30-37\r
-eError,eError,eError,eError,eError,eItsMe,eError,eStart,# 38-3f\r
-)\r
-\r
-ISO2022CNCharLenTable = (0, 0, 0, 0, 0, 0, 0, 0, 0)\r
-\r
-ISO2022CNSMModel = {'classTable': ISO2022CN_cls,\r
- 'classFactor': 9,\r
- 'stateTable': ISO2022CN_st,\r
- 'charLenTable': ISO2022CNCharLenTable,\r
- 'name': "ISO-2022-CN"}\r
-\r
-ISO2022JP_cls = (\r
-2,0,0,0,0,0,0,0, # 00 - 07\r
-0,0,0,0,0,0,2,2, # 08 - 0f\r
-0,0,0,0,0,0,0,0, # 10 - 17\r
-0,0,0,1,0,0,0,0, # 18 - 1f\r
-0,0,0,0,7,0,0,0, # 20 - 27\r
-3,0,0,0,0,0,0,0, # 28 - 2f\r
-0,0,0,0,0,0,0,0, # 30 - 37\r
-0,0,0,0,0,0,0,0, # 38 - 3f\r
-6,0,4,0,8,0,0,0, # 40 - 47\r
-0,9,5,0,0,0,0,0, # 48 - 4f\r
-0,0,0,0,0,0,0,0, # 50 - 57\r
-0,0,0,0,0,0,0,0, # 58 - 5f\r
-0,0,0,0,0,0,0,0, # 60 - 67\r
-0,0,0,0,0,0,0,0, # 68 - 6f\r
-0,0,0,0,0,0,0,0, # 70 - 77\r
-0,0,0,0,0,0,0,0, # 78 - 7f\r
-2,2,2,2,2,2,2,2, # 80 - 87\r
-2,2,2,2,2,2,2,2, # 88 - 8f\r
-2,2,2,2,2,2,2,2, # 90 - 97\r
-2,2,2,2,2,2,2,2, # 98 - 9f\r
-2,2,2,2,2,2,2,2, # a0 - a7\r
-2,2,2,2,2,2,2,2, # a8 - af\r
-2,2,2,2,2,2,2,2, # b0 - b7\r
-2,2,2,2,2,2,2,2, # b8 - bf\r
-2,2,2,2,2,2,2,2, # c0 - c7\r
-2,2,2,2,2,2,2,2, # c8 - cf\r
-2,2,2,2,2,2,2,2, # d0 - d7\r
-2,2,2,2,2,2,2,2, # d8 - df\r
-2,2,2,2,2,2,2,2, # e0 - e7\r
-2,2,2,2,2,2,2,2, # e8 - ef\r
-2,2,2,2,2,2,2,2, # f0 - f7\r
-2,2,2,2,2,2,2,2, # f8 - ff\r
-)\r
-\r
-ISO2022JP_st = (\r
-eStart, 3,eError,eStart,eStart,eStart,eStart,eStart,# 00-07\r
-eStart,eStart,eError,eError,eError,eError,eError,eError,# 08-0f\r
-eError,eError,eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,# 10-17\r
-eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eError,eError,# 18-1f\r
-eError, 5,eError,eError,eError, 4,eError,eError,# 20-27\r
-eError,eError,eError, 6,eItsMe,eError,eItsMe,eError,# 28-2f\r
-eError,eError,eError,eError,eError,eError,eItsMe,eItsMe,# 30-37\r
-eError,eError,eError,eItsMe,eError,eError,eError,eError,# 38-3f\r
-eError,eError,eError,eError,eItsMe,eError,eStart,eStart,# 40-47\r
-)\r
-\r
-ISO2022JPCharLenTable = (0, 0, 0, 0, 0, 0, 0, 0, 0, 0)\r
-\r
-ISO2022JPSMModel = {'classTable': ISO2022JP_cls,\r
- 'classFactor': 10,\r
- 'stateTable': ISO2022JP_st,\r
- 'charLenTable': ISO2022JPCharLenTable,\r
- 'name': "ISO-2022-JP"}\r
-\r
-ISO2022KR_cls = (\r
-2,0,0,0,0,0,0,0, # 00 - 07\r
-0,0,0,0,0,0,0,0, # 08 - 0f\r
-0,0,0,0,0,0,0,0, # 10 - 17\r
-0,0,0,1,0,0,0,0, # 18 - 1f\r
-0,0,0,0,3,0,0,0, # 20 - 27\r
-0,4,0,0,0,0,0,0, # 28 - 2f\r
-0,0,0,0,0,0,0,0, # 30 - 37\r
-0,0,0,0,0,0,0,0, # 38 - 3f\r
-0,0,0,5,0,0,0,0, # 40 - 47\r
-0,0,0,0,0,0,0,0, # 48 - 4f\r
-0,0,0,0,0,0,0,0, # 50 - 57\r
-0,0,0,0,0,0,0,0, # 58 - 5f\r
-0,0,0,0,0,0,0,0, # 60 - 67\r
-0,0,0,0,0,0,0,0, # 68 - 6f\r
-0,0,0,0,0,0,0,0, # 70 - 77\r
-0,0,0,0,0,0,0,0, # 78 - 7f\r
-2,2,2,2,2,2,2,2, # 80 - 87\r
-2,2,2,2,2,2,2,2, # 88 - 8f\r
-2,2,2,2,2,2,2,2, # 90 - 97\r
-2,2,2,2,2,2,2,2, # 98 - 9f\r
-2,2,2,2,2,2,2,2, # a0 - a7\r
-2,2,2,2,2,2,2,2, # a8 - af\r
-2,2,2,2,2,2,2,2, # b0 - b7\r
-2,2,2,2,2,2,2,2, # b8 - bf\r
-2,2,2,2,2,2,2,2, # c0 - c7\r
-2,2,2,2,2,2,2,2, # c8 - cf\r
-2,2,2,2,2,2,2,2, # d0 - d7\r
-2,2,2,2,2,2,2,2, # d8 - df\r
-2,2,2,2,2,2,2,2, # e0 - e7\r
-2,2,2,2,2,2,2,2, # e8 - ef\r
-2,2,2,2,2,2,2,2, # f0 - f7\r
-2,2,2,2,2,2,2,2, # f8 - ff\r
-)\r
-\r
-ISO2022KR_st = (\r
-eStart, 3,eError,eStart,eStart,eStart,eError,eError,# 00-07\r
-eError,eError,eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,# 08-0f\r
-eItsMe,eItsMe,eError,eError,eError, 4,eError,eError,# 10-17\r
-eError,eError,eError,eError, 5,eError,eError,eError,# 18-1f\r
-eError,eError,eError,eItsMe,eStart,eStart,eStart,eStart,# 20-27\r
-)\r
-\r
-ISO2022KRCharLenTable = (0, 0, 0, 0, 0, 0)\r
-\r
-ISO2022KRSMModel = {'classTable': ISO2022KR_cls,\r
- 'classFactor': 6,\r
- 'stateTable': ISO2022KR_st,\r
- 'charLenTable': ISO2022KRCharLenTable,\r
- 'name': "ISO-2022-KR"}\r
-\r
-# flake8: noqa\r
+######################## BEGIN LICENSE BLOCK ########################
+# The Original Code is mozilla.org code.
+#
+# The Initial Developer of the Original Code is
+# Netscape Communications Corporation.
+# Portions created by the Initial Developer are Copyright (C) 1998
+# the Initial Developer. All Rights Reserved.
+#
+# Contributor(s):
+# Mark Pilgrim - port to Python
+#
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
+# 02110-1301 USA
+######################### END LICENSE BLOCK #########################
+
+from .constants import eStart, eError, eItsMe
+
+HZ_cls = (
+1,0,0,0,0,0,0,0, # 00 - 07
+0,0,0,0,0,0,0,0, # 08 - 0f
+0,0,0,0,0,0,0,0, # 10 - 17
+0,0,0,1,0,0,0,0, # 18 - 1f
+0,0,0,0,0,0,0,0, # 20 - 27
+0,0,0,0,0,0,0,0, # 28 - 2f
+0,0,0,0,0,0,0,0, # 30 - 37
+0,0,0,0,0,0,0,0, # 38 - 3f
+0,0,0,0,0,0,0,0, # 40 - 47
+0,0,0,0,0,0,0,0, # 48 - 4f
+0,0,0,0,0,0,0,0, # 50 - 57
+0,0,0,0,0,0,0,0, # 58 - 5f
+0,0,0,0,0,0,0,0, # 60 - 67
+0,0,0,0,0,0,0,0, # 68 - 6f
+0,0,0,0,0,0,0,0, # 70 - 77
+0,0,0,4,0,5,2,0, # 78 - 7f
+1,1,1,1,1,1,1,1, # 80 - 87
+1,1,1,1,1,1,1,1, # 88 - 8f
+1,1,1,1,1,1,1,1, # 90 - 97
+1,1,1,1,1,1,1,1, # 98 - 9f
+1,1,1,1,1,1,1,1, # a0 - a7
+1,1,1,1,1,1,1,1, # a8 - af
+1,1,1,1,1,1,1,1, # b0 - b7
+1,1,1,1,1,1,1,1, # b8 - bf
+1,1,1,1,1,1,1,1, # c0 - c7
+1,1,1,1,1,1,1,1, # c8 - cf
+1,1,1,1,1,1,1,1, # d0 - d7
+1,1,1,1,1,1,1,1, # d8 - df
+1,1,1,1,1,1,1,1, # e0 - e7
+1,1,1,1,1,1,1,1, # e8 - ef
+1,1,1,1,1,1,1,1, # f0 - f7
+1,1,1,1,1,1,1,1, # f8 - ff
+)
+
+HZ_st = (
+eStart,eError, 3,eStart,eStart,eStart,eError,eError,# 00-07
+eError,eError,eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,# 08-0f
+eItsMe,eItsMe,eError,eError,eStart,eStart, 4,eError,# 10-17
+ 5,eError, 6,eError, 5, 5, 4,eError,# 18-1f
+ 4,eError, 4, 4, 4,eError, 4,eError,# 20-27
+ 4,eItsMe,eStart,eStart,eStart,eStart,eStart,eStart,# 28-2f
+)
+
+HZCharLenTable = (0, 0, 0, 0, 0, 0)
+
+HZSMModel = {'classTable': HZ_cls,
+ 'classFactor': 6,
+ 'stateTable': HZ_st,
+ 'charLenTable': HZCharLenTable,
+ 'name': "HZ-GB-2312"}
+
+ISO2022CN_cls = (
+2,0,0,0,0,0,0,0, # 00 - 07
+0,0,0,0,0,0,0,0, # 08 - 0f
+0,0,0,0,0,0,0,0, # 10 - 17
+0,0,0,1,0,0,0,0, # 18 - 1f
+0,0,0,0,0,0,0,0, # 20 - 27
+0,3,0,0,0,0,0,0, # 28 - 2f
+0,0,0,0,0,0,0,0, # 30 - 37
+0,0,0,0,0,0,0,0, # 38 - 3f
+0,0,0,4,0,0,0,0, # 40 - 47
+0,0,0,0,0,0,0,0, # 48 - 4f
+0,0,0,0,0,0,0,0, # 50 - 57
+0,0,0,0,0,0,0,0, # 58 - 5f
+0,0,0,0,0,0,0,0, # 60 - 67
+0,0,0,0,0,0,0,0, # 68 - 6f
+0,0,0,0,0,0,0,0, # 70 - 77
+0,0,0,0,0,0,0,0, # 78 - 7f
+2,2,2,2,2,2,2,2, # 80 - 87
+2,2,2,2,2,2,2,2, # 88 - 8f
+2,2,2,2,2,2,2,2, # 90 - 97
+2,2,2,2,2,2,2,2, # 98 - 9f
+2,2,2,2,2,2,2,2, # a0 - a7
+2,2,2,2,2,2,2,2, # a8 - af
+2,2,2,2,2,2,2,2, # b0 - b7
+2,2,2,2,2,2,2,2, # b8 - bf
+2,2,2,2,2,2,2,2, # c0 - c7
+2,2,2,2,2,2,2,2, # c8 - cf
+2,2,2,2,2,2,2,2, # d0 - d7
+2,2,2,2,2,2,2,2, # d8 - df
+2,2,2,2,2,2,2,2, # e0 - e7
+2,2,2,2,2,2,2,2, # e8 - ef
+2,2,2,2,2,2,2,2, # f0 - f7
+2,2,2,2,2,2,2,2, # f8 - ff
+)
+
+ISO2022CN_st = (
+eStart, 3,eError,eStart,eStart,eStart,eStart,eStart,# 00-07
+eStart,eError,eError,eError,eError,eError,eError,eError,# 08-0f
+eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,# 10-17
+eItsMe,eItsMe,eItsMe,eError,eError,eError, 4,eError,# 18-1f
+eError,eError,eError,eItsMe,eError,eError,eError,eError,# 20-27
+ 5, 6,eError,eError,eError,eError,eError,eError,# 28-2f
+eError,eError,eError,eItsMe,eError,eError,eError,eError,# 30-37
+eError,eError,eError,eError,eError,eItsMe,eError,eStart,# 38-3f
+)
+
+ISO2022CNCharLenTable = (0, 0, 0, 0, 0, 0, 0, 0, 0)
+
+ISO2022CNSMModel = {'classTable': ISO2022CN_cls,
+ 'classFactor': 9,
+ 'stateTable': ISO2022CN_st,
+ 'charLenTable': ISO2022CNCharLenTable,
+ 'name': "ISO-2022-CN"}
+
+ISO2022JP_cls = (
+2,0,0,0,0,0,0,0, # 00 - 07
+0,0,0,0,0,0,2,2, # 08 - 0f
+0,0,0,0,0,0,0,0, # 10 - 17
+0,0,0,1,0,0,0,0, # 18 - 1f
+0,0,0,0,7,0,0,0, # 20 - 27
+3,0,0,0,0,0,0,0, # 28 - 2f
+0,0,0,0,0,0,0,0, # 30 - 37
+0,0,0,0,0,0,0,0, # 38 - 3f
+6,0,4,0,8,0,0,0, # 40 - 47
+0,9,5,0,0,0,0,0, # 48 - 4f
+0,0,0,0,0,0,0,0, # 50 - 57
+0,0,0,0,0,0,0,0, # 58 - 5f
+0,0,0,0,0,0,0,0, # 60 - 67
+0,0,0,0,0,0,0,0, # 68 - 6f
+0,0,0,0,0,0,0,0, # 70 - 77
+0,0,0,0,0,0,0,0, # 78 - 7f
+2,2,2,2,2,2,2,2, # 80 - 87
+2,2,2,2,2,2,2,2, # 88 - 8f
+2,2,2,2,2,2,2,2, # 90 - 97
+2,2,2,2,2,2,2,2, # 98 - 9f
+2,2,2,2,2,2,2,2, # a0 - a7
+2,2,2,2,2,2,2,2, # a8 - af
+2,2,2,2,2,2,2,2, # b0 - b7
+2,2,2,2,2,2,2,2, # b8 - bf
+2,2,2,2,2,2,2,2, # c0 - c7
+2,2,2,2,2,2,2,2, # c8 - cf
+2,2,2,2,2,2,2,2, # d0 - d7
+2,2,2,2,2,2,2,2, # d8 - df
+2,2,2,2,2,2,2,2, # e0 - e7
+2,2,2,2,2,2,2,2, # e8 - ef
+2,2,2,2,2,2,2,2, # f0 - f7
+2,2,2,2,2,2,2,2, # f8 - ff
+)
+
+ISO2022JP_st = (
+eStart, 3,eError,eStart,eStart,eStart,eStart,eStart,# 00-07
+eStart,eStart,eError,eError,eError,eError,eError,eError,# 08-0f
+eError,eError,eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,# 10-17
+eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eError,eError,# 18-1f
+eError, 5,eError,eError,eError, 4,eError,eError,# 20-27
+eError,eError,eError, 6,eItsMe,eError,eItsMe,eError,# 28-2f
+eError,eError,eError,eError,eError,eError,eItsMe,eItsMe,# 30-37
+eError,eError,eError,eItsMe,eError,eError,eError,eError,# 38-3f
+eError,eError,eError,eError,eItsMe,eError,eStart,eStart,# 40-47
+)
+
+ISO2022JPCharLenTable = (0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
+
+ISO2022JPSMModel = {'classTable': ISO2022JP_cls,
+ 'classFactor': 10,
+ 'stateTable': ISO2022JP_st,
+ 'charLenTable': ISO2022JPCharLenTable,
+ 'name': "ISO-2022-JP"}
+
+ISO2022KR_cls = (
+2,0,0,0,0,0,0,0, # 00 - 07
+0,0,0,0,0,0,0,0, # 08 - 0f
+0,0,0,0,0,0,0,0, # 10 - 17
+0,0,0,1,0,0,0,0, # 18 - 1f
+0,0,0,0,3,0,0,0, # 20 - 27
+0,4,0,0,0,0,0,0, # 28 - 2f
+0,0,0,0,0,0,0,0, # 30 - 37
+0,0,0,0,0,0,0,0, # 38 - 3f
+0,0,0,5,0,0,0,0, # 40 - 47
+0,0,0,0,0,0,0,0, # 48 - 4f
+0,0,0,0,0,0,0,0, # 50 - 57
+0,0,0,0,0,0,0,0, # 58 - 5f
+0,0,0,0,0,0,0,0, # 60 - 67
+0,0,0,0,0,0,0,0, # 68 - 6f
+0,0,0,0,0,0,0,0, # 70 - 77
+0,0,0,0,0,0,0,0, # 78 - 7f
+2,2,2,2,2,2,2,2, # 80 - 87
+2,2,2,2,2,2,2,2, # 88 - 8f
+2,2,2,2,2,2,2,2, # 90 - 97
+2,2,2,2,2,2,2,2, # 98 - 9f
+2,2,2,2,2,2,2,2, # a0 - a7
+2,2,2,2,2,2,2,2, # a8 - af
+2,2,2,2,2,2,2,2, # b0 - b7
+2,2,2,2,2,2,2,2, # b8 - bf
+2,2,2,2,2,2,2,2, # c0 - c7
+2,2,2,2,2,2,2,2, # c8 - cf
+2,2,2,2,2,2,2,2, # d0 - d7
+2,2,2,2,2,2,2,2, # d8 - df
+2,2,2,2,2,2,2,2, # e0 - e7
+2,2,2,2,2,2,2,2, # e8 - ef
+2,2,2,2,2,2,2,2, # f0 - f7
+2,2,2,2,2,2,2,2, # f8 - ff
+)
+
+ISO2022KR_st = (
+eStart, 3,eError,eStart,eStart,eStart,eError,eError,# 00-07
+eError,eError,eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,# 08-0f
+eItsMe,eItsMe,eError,eError,eError, 4,eError,eError,# 10-17
+eError,eError,eError,eError, 5,eError,eError,eError,# 18-1f
+eError,eError,eError,eItsMe,eStart,eStart,eStart,eStart,# 20-27
+)
+
+ISO2022KRCharLenTable = (0, 0, 0, 0, 0, 0)
+
+ISO2022KRSMModel = {'classTable': ISO2022KR_cls,
+ 'classFactor': 6,
+ 'stateTable': ISO2022KR_st,
+ 'charLenTable': ISO2022KRCharLenTable,
+ 'name': "ISO-2022-KR"}
+
+# flake8: noqa
-######################## BEGIN LICENSE BLOCK ########################\r
-# The Original Code is mozilla.org code.\r
-#\r
-# The Initial Developer of the Original Code is\r
-# Netscape Communications Corporation.\r
-# Portions created by the Initial Developer are Copyright (C) 1998\r
-# the Initial Developer. All Rights Reserved.\r
-#\r
-# Contributor(s):\r
-# Mark Pilgrim - port to Python\r
-#\r
-# This library is free software; you can redistribute it and/or\r
-# modify it under the terms of the GNU Lesser General Public\r
-# License as published by the Free Software Foundation; either\r
-# version 2.1 of the License, or (at your option) any later version.\r
-#\r
-# This library is distributed in the hope that it will be useful,\r
-# but WITHOUT ANY WARRANTY; without even the implied warranty of\r
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU\r
-# Lesser General Public License for more details.\r
-#\r
-# You should have received a copy of the GNU Lesser General Public\r
-# License along with this library; if not, write to the Free Software\r
-# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA\r
-# 02110-1301 USA\r
-######################### END LICENSE BLOCK #########################\r
-\r
-import sys\r
-from . import constants\r
-from .mbcharsetprober import MultiByteCharSetProber\r
-from .codingstatemachine import CodingStateMachine\r
-from .chardistribution import EUCJPDistributionAnalysis\r
-from .jpcntx import EUCJPContextAnalysis\r
-from .mbcssm import EUCJPSMModel\r
-\r
-\r
-class EUCJPProber(MultiByteCharSetProber):\r
- def __init__(self):\r
- MultiByteCharSetProber.__init__(self)\r
- self._mCodingSM = CodingStateMachine(EUCJPSMModel)\r
- self._mDistributionAnalyzer = EUCJPDistributionAnalysis()\r
- self._mContextAnalyzer = EUCJPContextAnalysis()\r
- self.reset()\r
-\r
- def reset(self):\r
- MultiByteCharSetProber.reset(self)\r
- self._mContextAnalyzer.reset()\r
-\r
- def get_charset_name(self):\r
- return "EUC-JP"\r
-\r
- def feed(self, aBuf):\r
- aLen = len(aBuf)\r
- for i in range(0, aLen):\r
- # PY3K: aBuf is a byte array, so aBuf[i] is an int, not a byte\r
- codingState = self._mCodingSM.next_state(aBuf[i])\r
- if codingState == constants.eError:\r
- if constants._debug:\r
- sys.stderr.write(self.get_charset_name()\r
- + ' prober hit error at byte ' + str(i)\r
- + '\n')\r
- self._mState = constants.eNotMe\r
- break\r
- elif codingState == constants.eItsMe:\r
- self._mState = constants.eFoundIt\r
- break\r
- elif codingState == constants.eStart:\r
- charLen = self._mCodingSM.get_current_charlen()\r
- if i == 0:\r
- self._mLastChar[1] = aBuf[0]\r
- self._mContextAnalyzer.feed(self._mLastChar, charLen)\r
- self._mDistributionAnalyzer.feed(self._mLastChar, charLen)\r
- else:\r
- self._mContextAnalyzer.feed(aBuf[i - 1:i + 1], charLen)\r
- self._mDistributionAnalyzer.feed(aBuf[i - 1:i + 1],\r
- charLen)\r
-\r
- self._mLastChar[0] = aBuf[aLen - 1]\r
-\r
- if self.get_state() == constants.eDetecting:\r
- if (self._mContextAnalyzer.got_enough_data() and\r
- (self.get_confidence() > constants.SHORTCUT_THRESHOLD)):\r
- self._mState = constants.eFoundIt\r
-\r
- return self.get_state()\r
-\r
- def get_confidence(self):\r
- contxtCf = self._mContextAnalyzer.get_confidence()\r
- distribCf = self._mDistributionAnalyzer.get_confidence()\r
- return max(contxtCf, distribCf)\r
+######################## BEGIN LICENSE BLOCK ########################
+# The Original Code is mozilla.org code.
+#
+# The Initial Developer of the Original Code is
+# Netscape Communications Corporation.
+# Portions created by the Initial Developer are Copyright (C) 1998
+# the Initial Developer. All Rights Reserved.
+#
+# Contributor(s):
+# Mark Pilgrim - port to Python
+#
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
+# 02110-1301 USA
+######################### END LICENSE BLOCK #########################
+
+import sys
+from . import constants
+from .mbcharsetprober import MultiByteCharSetProber
+from .codingstatemachine import CodingStateMachine
+from .chardistribution import EUCJPDistributionAnalysis
+from .jpcntx import EUCJPContextAnalysis
+from .mbcssm import EUCJPSMModel
+
+
+class EUCJPProber(MultiByteCharSetProber):
+ def __init__(self):
+ MultiByteCharSetProber.__init__(self)
+ self._mCodingSM = CodingStateMachine(EUCJPSMModel)
+ self._mDistributionAnalyzer = EUCJPDistributionAnalysis()
+ self._mContextAnalyzer = EUCJPContextAnalysis()
+ self.reset()
+
+ def reset(self):
+ MultiByteCharSetProber.reset(self)
+ self._mContextAnalyzer.reset()
+
+ def get_charset_name(self):
+ return "EUC-JP"
+
+ def feed(self, aBuf):
+ aLen = len(aBuf)
+ for i in range(0, aLen):
+ # PY3K: aBuf is a byte array, so aBuf[i] is an int, not a byte
+ codingState = self._mCodingSM.next_state(aBuf[i])
+ if codingState == constants.eError:
+ if constants._debug:
+ sys.stderr.write(self.get_charset_name()
+ + ' prober hit error at byte ' + str(i)
+ + '\n')
+ self._mState = constants.eNotMe
+ break
+ elif codingState == constants.eItsMe:
+ self._mState = constants.eFoundIt
+ break
+ elif codingState == constants.eStart:
+ charLen = self._mCodingSM.get_current_charlen()
+ if i == 0:
+ self._mLastChar[1] = aBuf[0]
+ self._mContextAnalyzer.feed(self._mLastChar, charLen)
+ self._mDistributionAnalyzer.feed(self._mLastChar, charLen)
+ else:
+ self._mContextAnalyzer.feed(aBuf[i - 1:i + 1], charLen)
+ self._mDistributionAnalyzer.feed(aBuf[i - 1:i + 1],
+ charLen)
+
+ self._mLastChar[0] = aBuf[aLen - 1]
+
+ if self.get_state() == constants.eDetecting:
+ if (self._mContextAnalyzer.got_enough_data() and
+ (self.get_confidence() > constants.SHORTCUT_THRESHOLD)):
+ self._mState = constants.eFoundIt
+
+ return self.get_state()
+
+ def get_confidence(self):
+ contxtCf = self._mContextAnalyzer.get_confidence()
+ distribCf = self._mDistributionAnalyzer.get_confidence()
+ return max(contxtCf, distribCf)
-######################## BEGIN LICENSE BLOCK ########################\r
-# The Original Code is mozilla.org code.\r
-#\r
-# The Initial Developer of the Original Code is\r
-# Netscape Communications Corporation.\r
-# Portions created by the Initial Developer are Copyright (C) 1998\r
-# the Initial Developer. All Rights Reserved.\r
-#\r
-# Contributor(s):\r
-# Mark Pilgrim - port to Python\r
-#\r
-# This library is free software; you can redistribute it and/or\r
-# modify it under the terms of the GNU Lesser General Public\r
-# License as published by the Free Software Foundation; either\r
-# version 2.1 of the License, or (at your option) any later version.\r
-#\r
-# This library is distributed in the hope that it will be useful,\r
-# but WITHOUT ANY WARRANTY; without even the implied warranty of\r
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU\r
-# Lesser General Public License for more details.\r
-#\r
-# You should have received a copy of the GNU Lesser General Public\r
-# License along with this library; if not, write to the Free Software\r
-# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA\r
-# 02110-1301 USA\r
-######################### END LICENSE BLOCK #########################\r
-\r
-from .mbcharsetprober import MultiByteCharSetProber\r
-from .codingstatemachine import CodingStateMachine\r
-from .chardistribution import EUCKRDistributionAnalysis\r
-from .mbcssm import EUCKRSMModel\r
-\r
-\r
-class EUCKRProber(MultiByteCharSetProber):\r
- def __init__(self):\r
- MultiByteCharSetProber.__init__(self)\r
- self._mCodingSM = CodingStateMachine(EUCKRSMModel)\r
- self._mDistributionAnalyzer = EUCKRDistributionAnalysis()\r
- self.reset()\r
-\r
- def get_charset_name(self):\r
- return "EUC-KR"\r
+######################## BEGIN LICENSE BLOCK ########################
+# The Original Code is mozilla.org code.
+#
+# The Initial Developer of the Original Code is
+# Netscape Communications Corporation.
+# Portions created by the Initial Developer are Copyright (C) 1998
+# the Initial Developer. All Rights Reserved.
+#
+# Contributor(s):
+# Mark Pilgrim - port to Python
+#
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
+# 02110-1301 USA
+######################### END LICENSE BLOCK #########################
+
+from .mbcharsetprober import MultiByteCharSetProber
+from .codingstatemachine import CodingStateMachine
+from .chardistribution import EUCKRDistributionAnalysis
+from .mbcssm import EUCKRSMModel
+
+
+class EUCKRProber(MultiByteCharSetProber):
+ def __init__(self):
+ MultiByteCharSetProber.__init__(self)
+ self._mCodingSM = CodingStateMachine(EUCKRSMModel)
+ self._mDistributionAnalyzer = EUCKRDistributionAnalysis()
+ self.reset()
+
+ def get_charset_name(self):
+ return "EUC-KR"
-######################## BEGIN LICENSE BLOCK ########################\r
-# The Original Code is mozilla.org code.\r
-#\r
-# The Initial Developer of the Original Code is\r
-# Netscape Communications Corporation.\r
-# Portions created by the Initial Developer are Copyright (C) 1998\r
-# the Initial Developer. All Rights Reserved.\r
-#\r
-# Contributor(s):\r
-# Mark Pilgrim - port to Python\r
-#\r
-# This library is free software; you can redistribute it and/or\r
-# modify it under the terms of the GNU Lesser General Public\r
-# License as published by the Free Software Foundation; either\r
-# version 2.1 of the License, or (at your option) any later version.\r
-# \r
-# This library is distributed in the hope that it will be useful,\r
-# but WITHOUT ANY WARRANTY; without even the implied warranty of\r
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU\r
-# Lesser General Public License for more details.\r
-# \r
-# You should have received a copy of the GNU Lesser General Public\r
-# License along with this library; if not, write to the Free Software\r
-# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA\r
-# 02110-1301 USA\r
-######################### END LICENSE BLOCK #########################\r
-\r
-from .mbcharsetprober import MultiByteCharSetProber\r
-from .codingstatemachine import CodingStateMachine\r
-from .chardistribution import EUCTWDistributionAnalysis\r
-from .mbcssm import EUCTWSMModel\r
-\r
-class EUCTWProber(MultiByteCharSetProber):\r
- def __init__(self):\r
- MultiByteCharSetProber.__init__(self)\r
- self._mCodingSM = CodingStateMachine(EUCTWSMModel)\r
- self._mDistributionAnalyzer = EUCTWDistributionAnalysis()\r
- self.reset()\r
-\r
- def get_charset_name(self):\r
- return "EUC-TW"\r
+######################## BEGIN LICENSE BLOCK ########################
+# The Original Code is mozilla.org code.
+#
+# The Initial Developer of the Original Code is
+# Netscape Communications Corporation.
+# Portions created by the Initial Developer are Copyright (C) 1998
+# the Initial Developer. All Rights Reserved.
+#
+# Contributor(s):
+# Mark Pilgrim - port to Python
+#
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
+# 02110-1301 USA
+######################### END LICENSE BLOCK #########################
+
+from .mbcharsetprober import MultiByteCharSetProber
+from .codingstatemachine import CodingStateMachine
+from .chardistribution import EUCTWDistributionAnalysis
+from .mbcssm import EUCTWSMModel
+
+class EUCTWProber(MultiByteCharSetProber):
+ def __init__(self):
+ MultiByteCharSetProber.__init__(self)
+ self._mCodingSM = CodingStateMachine(EUCTWSMModel)
+ self._mDistributionAnalyzer = EUCTWDistributionAnalysis()
+ self.reset()
+
+ def get_charset_name(self):
+ return "EUC-TW"
-######################## BEGIN LICENSE BLOCK ########################\r
-# The Original Code is mozilla.org code.\r
-#\r
-# The Initial Developer of the Original Code is\r
-# Netscape Communications Corporation.\r
-# Portions created by the Initial Developer are Copyright (C) 1998\r
-# the Initial Developer. All Rights Reserved.\r
-#\r
-# Contributor(s):\r
-# Mark Pilgrim - port to Python\r
-#\r
-# This library is free software; you can redistribute it and/or\r
-# modify it under the terms of the GNU Lesser General Public\r
-# License as published by the Free Software Foundation; either\r
-# version 2.1 of the License, or (at your option) any later version.\r
-# \r
-# This library is distributed in the hope that it will be useful,\r
-# but WITHOUT ANY WARRANTY; without even the implied warranty of\r
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU\r
-# Lesser General Public License for more details.\r
-# \r
-# You should have received a copy of the GNU Lesser General Public\r
-# License along with this library; if not, write to the Free Software\r
-# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA\r
-# 02110-1301 USA\r
-######################### END LICENSE BLOCK #########################\r
-\r
-from .mbcharsetprober import MultiByteCharSetProber\r
-from .codingstatemachine import CodingStateMachine\r
-from .chardistribution import GB2312DistributionAnalysis\r
-from .mbcssm import GB2312SMModel\r
-\r
-class GB2312Prober(MultiByteCharSetProber):\r
- def __init__(self):\r
- MultiByteCharSetProber.__init__(self)\r
- self._mCodingSM = CodingStateMachine(GB2312SMModel)\r
- self._mDistributionAnalyzer = GB2312DistributionAnalysis()\r
- self.reset()\r
-\r
- def get_charset_name(self):\r
- return "GB2312"\r
+######################## BEGIN LICENSE BLOCK ########################
+# The Original Code is mozilla.org code.
+#
+# The Initial Developer of the Original Code is
+# Netscape Communications Corporation.
+# Portions created by the Initial Developer are Copyright (C) 1998
+# the Initial Developer. All Rights Reserved.
+#
+# Contributor(s):
+# Mark Pilgrim - port to Python
+#
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
+# 02110-1301 USA
+######################### END LICENSE BLOCK #########################
+
+from .mbcharsetprober import MultiByteCharSetProber
+from .codingstatemachine import CodingStateMachine
+from .chardistribution import GB2312DistributionAnalysis
+from .mbcssm import GB2312SMModel
+
+class GB2312Prober(MultiByteCharSetProber):
+ def __init__(self):
+ MultiByteCharSetProber.__init__(self)
+ self._mCodingSM = CodingStateMachine(GB2312SMModel)
+ self._mDistributionAnalyzer = GB2312DistributionAnalysis()
+ self.reset()
+
+ def get_charset_name(self):
+ return "GB2312"
-######################## BEGIN LICENSE BLOCK ########################\r
-# The Original Code is Mozilla Universal charset detector code.\r
-#\r
-# The Initial Developer of the Original Code is\r
-# Shy Shalom\r
-# Portions created by the Initial Developer are Copyright (C) 2005\r
-# the Initial Developer. All Rights Reserved.\r
-#\r
-# Contributor(s):\r
-# Mark Pilgrim - port to Python\r
-#\r
-# This library is free software; you can redistribute it and/or\r
-# modify it under the terms of the GNU Lesser General Public\r
-# License as published by the Free Software Foundation; either\r
-# version 2.1 of the License, or (at your option) any later version.\r
-#\r
-# This library is distributed in the hope that it will be useful,\r
-# but WITHOUT ANY WARRANTY; without even the implied warranty of\r
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU\r
-# Lesser General Public License for more details.\r
-#\r
-# You should have received a copy of the GNU Lesser General Public\r
-# License along with this library; if not, write to the Free Software\r
-# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA\r
-# 02110-1301 USA\r
-######################### END LICENSE BLOCK #########################\r
-\r
-from .charsetprober import CharSetProber\r
-from .constants import eNotMe, eDetecting\r
-from .compat import wrap_ord\r
-\r
-# This prober doesn't actually recognize a language or a charset.\r
-# It is a helper prober for the use of the Hebrew model probers\r
-\r
-### General ideas of the Hebrew charset recognition ###\r
-#\r
-# Four main charsets exist in Hebrew:\r
-# "ISO-8859-8" - Visual Hebrew\r
-# "windows-1255" - Logical Hebrew\r
-# "ISO-8859-8-I" - Logical Hebrew\r
-# "x-mac-hebrew" - ?? Logical Hebrew ??\r
-#\r
-# Both "ISO" charsets use a completely identical set of code points, whereas\r
-# "windows-1255" and "x-mac-hebrew" are two different proper supersets of\r
-# these code points. windows-1255 defines additional characters in the range\r
-# 0x80-0x9F as some misc punctuation marks as well as some Hebrew-specific\r
-# diacritics and additional 'Yiddish' ligature letters in the range 0xc0-0xd6.\r
-# x-mac-hebrew defines similar additional code points but with a different\r
-# mapping.\r
-#\r
-# As far as an average Hebrew text with no diacritics is concerned, all four\r
-# charsets are identical with respect to code points. Meaning that for the\r
-# main Hebrew alphabet, all four map the same values to all 27 Hebrew letters\r
-# (including final letters).\r
-#\r
-# The dominant difference between these charsets is their directionality.\r
-# "Visual" directionality means that the text is ordered as if the renderer is\r
-# not aware of a BIDI rendering algorithm. The renderer sees the text and\r
-# draws it from left to right. The text itself when ordered naturally is read\r
-# backwards. A buffer of Visual Hebrew generally looks like so:\r
-# "[last word of first line spelled backwards] [whole line ordered backwards\r
-# and spelled backwards] [first word of first line spelled backwards]\r
-# [end of line] [last word of second line] ... etc' "\r
-# adding punctuation marks, numbers and English text to visual text is\r
-# naturally also "visual" and from left to right.\r
-#\r
-# "Logical" directionality means the text is ordered "naturally" according to\r
-# the order it is read. It is the responsibility of the renderer to display\r
-# the text from right to left. A BIDI algorithm is used to place general\r
-# punctuation marks, numbers and English text in the text.\r
-#\r
-# Texts in x-mac-hebrew are almost impossible to find on the Internet. From\r
-# what little evidence I could find, it seems that its general directionality\r
-# is Logical.\r
-#\r
-# To sum up all of the above, the Hebrew probing mechanism knows about two\r
-# charsets:\r
-# Visual Hebrew - "ISO-8859-8" - backwards text - Words and sentences are\r
-# backwards while line order is natural. For charset recognition purposes\r
-# the line order is unimportant (In fact, for this implementation, even\r
-# word order is unimportant).\r
-# Logical Hebrew - "windows-1255" - normal, naturally ordered text.\r
-#\r
-# "ISO-8859-8-I" is a subset of windows-1255 and doesn't need to be\r
-# specifically identified.\r
-# "x-mac-hebrew" is also identified as windows-1255. A text in x-mac-hebrew\r
-# that contain special punctuation marks or diacritics is displayed with\r
-# some unconverted characters showing as question marks. This problem might\r
-# be corrected using another model prober for x-mac-hebrew. Due to the fact\r
-# that x-mac-hebrew texts are so rare, writing another model prober isn't\r
-# worth the effort and performance hit.\r
-#\r
-#### The Prober ####\r
-#\r
-# The prober is divided between two SBCharSetProbers and a HebrewProber,\r
-# all of which are managed, created, fed data, inquired and deleted by the\r
-# SBCSGroupProber. The two SBCharSetProbers identify that the text is in\r
-# fact some kind of Hebrew, Logical or Visual. The final decision about which\r
-# one is it is made by the HebrewProber by combining final-letter scores\r
-# with the scores of the two SBCharSetProbers to produce a final answer.\r
-#\r
-# The SBCSGroupProber is responsible for stripping the original text of HTML\r
-# tags, English characters, numbers, low-ASCII punctuation characters, spaces\r
-# and new lines. It reduces any sequence of such characters to a single space.\r
-# The buffer fed to each prober in the SBCS group prober is pure text in\r
-# high-ASCII.\r
-# The two SBCharSetProbers (model probers) share the same language model:\r
-# Win1255Model.\r
-# The first SBCharSetProber uses the model normally as any other\r
-# SBCharSetProber does, to recognize windows-1255, upon which this model was\r
-# built. The second SBCharSetProber is told to make the pair-of-letter\r
-# lookup in the language model backwards. This in practice exactly simulates\r
-# a visual Hebrew model using the windows-1255 logical Hebrew model.\r
-#\r
-# The HebrewProber is not using any language model. All it does is look for\r
-# final-letter evidence suggesting the text is either logical Hebrew or visual\r
-# Hebrew. Disjointed from the model probers, the results of the HebrewProber\r
-# alone are meaningless. HebrewProber always returns 0.00 as confidence\r
-# since it never identifies a charset by itself. Instead, the pointer to the\r
-# HebrewProber is passed to the model probers as a helper "Name Prober".\r
-# When the Group prober receives a positive identification from any prober,\r
-# it asks for the name of the charset identified. If the prober queried is a\r
-# Hebrew model prober, the model prober forwards the call to the\r
-# HebrewProber to make the final decision. In the HebrewProber, the\r
-# decision is made according to the final-letters scores maintained and Both\r
-# model probers scores. The answer is returned in the form of the name of the\r
-# charset identified, either "windows-1255" or "ISO-8859-8".\r
-\r
-# windows-1255 / ISO-8859-8 code points of interest\r
-FINAL_KAF = 0xea\r
-NORMAL_KAF = 0xeb\r
-FINAL_MEM = 0xed\r
-NORMAL_MEM = 0xee\r
-FINAL_NUN = 0xef\r
-NORMAL_NUN = 0xf0\r
-FINAL_PE = 0xf3\r
-NORMAL_PE = 0xf4\r
-FINAL_TSADI = 0xf5\r
-NORMAL_TSADI = 0xf6\r
-\r
-# Minimum Visual vs Logical final letter score difference.\r
-# If the difference is below this, don't rely solely on the final letter score\r
-# distance.\r
-MIN_FINAL_CHAR_DISTANCE = 5\r
-\r
-# Minimum Visual vs Logical model score difference.\r
-# If the difference is below this, don't rely at all on the model score\r
-# distance.\r
-MIN_MODEL_DISTANCE = 0.01\r
-\r
-VISUAL_HEBREW_NAME = "ISO-8859-8"\r
-LOGICAL_HEBREW_NAME = "windows-1255"\r
-\r
-\r
-class HebrewProber(CharSetProber):\r
- def __init__(self):\r
- CharSetProber.__init__(self)\r
- self._mLogicalProber = None\r
- self._mVisualProber = None\r
- self.reset()\r
-\r
- def reset(self):\r
- self._mFinalCharLogicalScore = 0\r
- self._mFinalCharVisualScore = 0\r
- # The two last characters seen in the previous buffer,\r
- # mPrev and mBeforePrev are initialized to space in order to simulate\r
- # a word delimiter at the beginning of the data\r
- self._mPrev = ' '\r
- self._mBeforePrev = ' '\r
- # These probers are owned by the group prober.\r
-\r
- def set_model_probers(self, logicalProber, visualProber):\r
- self._mLogicalProber = logicalProber\r
- self._mVisualProber = visualProber\r
-\r
- def is_final(self, c):\r
- return wrap_ord(c) in [FINAL_KAF, FINAL_MEM, FINAL_NUN, FINAL_PE,\r
- FINAL_TSADI]\r
-\r
- def is_non_final(self, c):\r
- # The normal Tsadi is not a good Non-Final letter due to words like\r
- # 'lechotet' (to chat) containing an apostrophe after the tsadi. This\r
- # apostrophe is converted to a space in FilterWithoutEnglishLetters\r
- # causing the Non-Final tsadi to appear at an end of a word even\r
- # though this is not the case in the original text.\r
- # The letters Pe and Kaf rarely display a related behavior of not being\r
- # a good Non-Final letter. Words like 'Pop', 'Winamp' and 'Mubarak'\r
- # for example legally end with a Non-Final Pe or Kaf. However, the\r
- # benefit of these letters as Non-Final letters outweighs the damage\r
- # since these words are quite rare.\r
- return wrap_ord(c) in [NORMAL_KAF, NORMAL_MEM, NORMAL_NUN, NORMAL_PE]\r
-\r
- def feed(self, aBuf):\r
- # Final letter analysis for logical-visual decision.\r
- # Look for evidence that the received buffer is either logical Hebrew\r
- # or visual Hebrew.\r
- # The following cases are checked:\r
- # 1) A word longer than 1 letter, ending with a final letter. This is\r
- # an indication that the text is laid out "naturally" since the\r
- # final letter really appears at the end. +1 for logical score.\r
- # 2) A word longer than 1 letter, ending with a Non-Final letter. In\r
- # normal Hebrew, words ending with Kaf, Mem, Nun, Pe or Tsadi,\r
- # should not end with the Non-Final form of that letter. Exceptions\r
- # to this rule are mentioned above in isNonFinal(). This is an\r
- # indication that the text is laid out backwards. +1 for visual\r
- # score\r
- # 3) A word longer than 1 letter, starting with a final letter. Final\r
- # letters should not appear at the beginning of a word. This is an\r
- # indication that the text is laid out backwards. +1 for visual\r
- # score.\r
- #\r
- # The visual score and logical score are accumulated throughout the\r
- # text and are finally checked against each other in GetCharSetName().\r
- # No checking for final letters in the middle of words is done since\r
- # that case is not an indication for either Logical or Visual text.\r
- #\r
- # We automatically filter out all 7-bit characters (replace them with\r
- # spaces) so the word boundary detection works properly. [MAP]\r
-\r
- if self.get_state() == eNotMe:\r
- # Both model probers say it's not them. No reason to continue.\r
- return eNotMe\r
-\r
- aBuf = self.filter_high_bit_only(aBuf)\r
-\r
- for cur in aBuf:\r
- if cur == ' ':\r
- # We stand on a space - a word just ended\r
- if self._mBeforePrev != ' ':\r
- # next-to-last char was not a space so self._mPrev is not a\r
- # 1 letter word\r
- if self.is_final(self._mPrev):\r
- # case (1) [-2:not space][-1:final letter][cur:space]\r
- self._mFinalCharLogicalScore += 1\r
- elif self.is_non_final(self._mPrev):\r
- # case (2) [-2:not space][-1:Non-Final letter][\r
- # cur:space]\r
- self._mFinalCharVisualScore += 1\r
- else:\r
- # Not standing on a space\r
- if ((self._mBeforePrev == ' ') and\r
- (self.is_final(self._mPrev)) and (cur != ' ')):\r
- # case (3) [-2:space][-1:final letter][cur:not space]\r
- self._mFinalCharVisualScore += 1\r
- self._mBeforePrev = self._mPrev\r
- self._mPrev = cur\r
-\r
- # Forever detecting, till the end or until both model probers return\r
- # eNotMe (handled above)\r
- return eDetecting\r
-\r
- def get_charset_name(self):\r
- # Make the decision: is it Logical or Visual?\r
- # If the final letter score distance is dominant enough, rely on it.\r
- finalsub = self._mFinalCharLogicalScore - self._mFinalCharVisualScore\r
- if finalsub >= MIN_FINAL_CHAR_DISTANCE:\r
- return LOGICAL_HEBREW_NAME\r
- if finalsub <= -MIN_FINAL_CHAR_DISTANCE:\r
- return VISUAL_HEBREW_NAME\r
-\r
- # It's not dominant enough, try to rely on the model scores instead.\r
- modelsub = (self._mLogicalProber.get_confidence()\r
- - self._mVisualProber.get_confidence())\r
- if modelsub > MIN_MODEL_DISTANCE:\r
- return LOGICAL_HEBREW_NAME\r
- if modelsub < -MIN_MODEL_DISTANCE:\r
- return VISUAL_HEBREW_NAME\r
-\r
- # Still no good, back to final letter distance, maybe it'll save the\r
- # day.\r
- if finalsub < 0.0:\r
- return VISUAL_HEBREW_NAME\r
-\r
- # (finalsub > 0 - Logical) or (don't know what to do) default to\r
- # Logical.\r
- return LOGICAL_HEBREW_NAME\r
-\r
- def get_state(self):\r
- # Remain active as long as any of the model probers are active.\r
- if (self._mLogicalProber.get_state() == eNotMe) and \\r
- (self._mVisualProber.get_state() == eNotMe):\r
- return eNotMe\r
- return eDetecting\r
+######################## BEGIN LICENSE BLOCK ########################
+# The Original Code is Mozilla Universal charset detector code.
+#
+# The Initial Developer of the Original Code is
+# Shy Shalom
+# Portions created by the Initial Developer are Copyright (C) 2005
+# the Initial Developer. All Rights Reserved.
+#
+# Contributor(s):
+# Mark Pilgrim - port to Python
+#
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
+# 02110-1301 USA
+######################### END LICENSE BLOCK #########################
+
+from .charsetprober import CharSetProber
+from .constants import eNotMe, eDetecting
+from .compat import wrap_ord
+
+# This prober doesn't actually recognize a language or a charset.
+# It is a helper prober for the use of the Hebrew model probers
+
+### General ideas of the Hebrew charset recognition ###
+#
+# Four main charsets exist in Hebrew:
+# "ISO-8859-8" - Visual Hebrew
+# "windows-1255" - Logical Hebrew
+# "ISO-8859-8-I" - Logical Hebrew
+# "x-mac-hebrew" - ?? Logical Hebrew ??
+#
+# Both "ISO" charsets use a completely identical set of code points, whereas
+# "windows-1255" and "x-mac-hebrew" are two different proper supersets of
+# these code points. windows-1255 defines additional characters in the range
+# 0x80-0x9F as some misc punctuation marks as well as some Hebrew-specific
+# diacritics and additional 'Yiddish' ligature letters in the range 0xc0-0xd6.
+# x-mac-hebrew defines similar additional code points but with a different
+# mapping.
+#
+# As far as an average Hebrew text with no diacritics is concerned, all four
+# charsets are identical with respect to code points. Meaning that for the
+# main Hebrew alphabet, all four map the same values to all 27 Hebrew letters
+# (including final letters).
+#
+# The dominant difference between these charsets is their directionality.
+# "Visual" directionality means that the text is ordered as if the renderer is
+# not aware of a BIDI rendering algorithm. The renderer sees the text and
+# draws it from left to right. The text itself when ordered naturally is read
+# backwards. A buffer of Visual Hebrew generally looks like so:
+# "[last word of first line spelled backwards] [whole line ordered backwards
+# and spelled backwards] [first word of first line spelled backwards]
+# [end of line] [last word of second line] ... etc' "
+# adding punctuation marks, numbers and English text to visual text is
+# naturally also "visual" and from left to right.
+#
+# "Logical" directionality means the text is ordered "naturally" according to
+# the order it is read. It is the responsibility of the renderer to display
+# the text from right to left. A BIDI algorithm is used to place general
+# punctuation marks, numbers and English text in the text.
+#
+# Texts in x-mac-hebrew are almost impossible to find on the Internet. From
+# what little evidence I could find, it seems that its general directionality
+# is Logical.
+#
+# To sum up all of the above, the Hebrew probing mechanism knows about two
+# charsets:
+# Visual Hebrew - "ISO-8859-8" - backwards text - Words and sentences are
+# backwards while line order is natural. For charset recognition purposes
+# the line order is unimportant (In fact, for this implementation, even
+# word order is unimportant).
+# Logical Hebrew - "windows-1255" - normal, naturally ordered text.
+#
+# "ISO-8859-8-I" is a subset of windows-1255 and doesn't need to be
+# specifically identified.
+# "x-mac-hebrew" is also identified as windows-1255. A text in x-mac-hebrew
+# that contain special punctuation marks or diacritics is displayed with
+# some unconverted characters showing as question marks. This problem might
+# be corrected using another model prober for x-mac-hebrew. Due to the fact
+# that x-mac-hebrew texts are so rare, writing another model prober isn't
+# worth the effort and performance hit.
+#
+#### The Prober ####
+#
+# The prober is divided between two SBCharSetProbers and a HebrewProber,
+# all of which are managed, created, fed data, inquired and deleted by the
+# SBCSGroupProber. The two SBCharSetProbers identify that the text is in
+# fact some kind of Hebrew, Logical or Visual. The final decision about which
+# one is it is made by the HebrewProber by combining final-letter scores
+# with the scores of the two SBCharSetProbers to produce a final answer.
+#
+# The SBCSGroupProber is responsible for stripping the original text of HTML
+# tags, English characters, numbers, low-ASCII punctuation characters, spaces
+# and new lines. It reduces any sequence of such characters to a single space.
+# The buffer fed to each prober in the SBCS group prober is pure text in
+# high-ASCII.
+# The two SBCharSetProbers (model probers) share the same language model:
+# Win1255Model.
+# The first SBCharSetProber uses the model normally as any other
+# SBCharSetProber does, to recognize windows-1255, upon which this model was
+# built. The second SBCharSetProber is told to make the pair-of-letter
+# lookup in the language model backwards. This in practice exactly simulates
+# a visual Hebrew model using the windows-1255 logical Hebrew model.
+#
+# The HebrewProber is not using any language model. All it does is look for
+# final-letter evidence suggesting the text is either logical Hebrew or visual
+# Hebrew. Disjointed from the model probers, the results of the HebrewProber
+# alone are meaningless. HebrewProber always returns 0.00 as confidence
+# since it never identifies a charset by itself. Instead, the pointer to the
+# HebrewProber is passed to the model probers as a helper "Name Prober".
+# When the Group prober receives a positive identification from any prober,
+# it asks for the name of the charset identified. If the prober queried is a
+# Hebrew model prober, the model prober forwards the call to the
+# HebrewProber to make the final decision. In the HebrewProber, the
+# decision is made according to the final-letters scores maintained and Both
+# model probers scores. The answer is returned in the form of the name of the
+# charset identified, either "windows-1255" or "ISO-8859-8".
+
+# windows-1255 / ISO-8859-8 code points of interest
+FINAL_KAF = 0xea
+NORMAL_KAF = 0xeb
+FINAL_MEM = 0xed
+NORMAL_MEM = 0xee
+FINAL_NUN = 0xef
+NORMAL_NUN = 0xf0
+FINAL_PE = 0xf3
+NORMAL_PE = 0xf4
+FINAL_TSADI = 0xf5
+NORMAL_TSADI = 0xf6
+
+# Minimum Visual vs Logical final letter score difference.
+# If the difference is below this, don't rely solely on the final letter score
+# distance.
+MIN_FINAL_CHAR_DISTANCE = 5
+
+# Minimum Visual vs Logical model score difference.
+# If the difference is below this, don't rely at all on the model score
+# distance.
+MIN_MODEL_DISTANCE = 0.01
+
+VISUAL_HEBREW_NAME = "ISO-8859-8"
+LOGICAL_HEBREW_NAME = "windows-1255"
+
+
+class HebrewProber(CharSetProber):
+ def __init__(self):
+ CharSetProber.__init__(self)
+ self._mLogicalProber = None
+ self._mVisualProber = None
+ self.reset()
+
+ def reset(self):
+ self._mFinalCharLogicalScore = 0
+ self._mFinalCharVisualScore = 0
+ # The two last characters seen in the previous buffer,
+ # mPrev and mBeforePrev are initialized to space in order to simulate
+ # a word delimiter at the beginning of the data
+ self._mPrev = ' '
+ self._mBeforePrev = ' '
+ # These probers are owned by the group prober.
+
+ def set_model_probers(self, logicalProber, visualProber):
+ self._mLogicalProber = logicalProber
+ self._mVisualProber = visualProber
+
+ def is_final(self, c):
+ return wrap_ord(c) in [FINAL_KAF, FINAL_MEM, FINAL_NUN, FINAL_PE,
+ FINAL_TSADI]
+
+ def is_non_final(self, c):
+ # The normal Tsadi is not a good Non-Final letter due to words like
+ # 'lechotet' (to chat) containing an apostrophe after the tsadi. This
+ # apostrophe is converted to a space in FilterWithoutEnglishLetters
+ # causing the Non-Final tsadi to appear at an end of a word even
+ # though this is not the case in the original text.
+ # The letters Pe and Kaf rarely display a related behavior of not being
+ # a good Non-Final letter. Words like 'Pop', 'Winamp' and 'Mubarak'
+ # for example legally end with a Non-Final Pe or Kaf. However, the
+ # benefit of these letters as Non-Final letters outweighs the damage
+ # since these words are quite rare.
+ return wrap_ord(c) in [NORMAL_KAF, NORMAL_MEM, NORMAL_NUN, NORMAL_PE]
+
+ def feed(self, aBuf):
+ # Final letter analysis for logical-visual decision.
+ # Look for evidence that the received buffer is either logical Hebrew
+ # or visual Hebrew.
+ # The following cases are checked:
+ # 1) A word longer than 1 letter, ending with a final letter. This is
+ # an indication that the text is laid out "naturally" since the
+ # final letter really appears at the end. +1 for logical score.
+ # 2) A word longer than 1 letter, ending with a Non-Final letter. In
+ # normal Hebrew, words ending with Kaf, Mem, Nun, Pe or Tsadi,
+ # should not end with the Non-Final form of that letter. Exceptions
+ # to this rule are mentioned above in isNonFinal(). This is an
+ # indication that the text is laid out backwards. +1 for visual
+ # score
+ # 3) A word longer than 1 letter, starting with a final letter. Final
+ # letters should not appear at the beginning of a word. This is an
+ # indication that the text is laid out backwards. +1 for visual
+ # score.
+ #
+ # The visual score and logical score are accumulated throughout the
+ # text and are finally checked against each other in GetCharSetName().
+ # No checking for final letters in the middle of words is done since
+ # that case is not an indication for either Logical or Visual text.
+ #
+ # We automatically filter out all 7-bit characters (replace them with
+ # spaces) so the word boundary detection works properly. [MAP]
+
+ if self.get_state() == eNotMe:
+ # Both model probers say it's not them. No reason to continue.
+ return eNotMe
+
+ aBuf = self.filter_high_bit_only(aBuf)
+
+ for cur in aBuf:
+ if cur == ' ':
+ # We stand on a space - a word just ended
+ if self._mBeforePrev != ' ':
+ # next-to-last char was not a space so self._mPrev is not a
+ # 1 letter word
+ if self.is_final(self._mPrev):
+ # case (1) [-2:not space][-1:final letter][cur:space]
+ self._mFinalCharLogicalScore += 1
+ elif self.is_non_final(self._mPrev):
+ # case (2) [-2:not space][-1:Non-Final letter][
+ # cur:space]
+ self._mFinalCharVisualScore += 1
+ else:
+ # Not standing on a space
+ if ((self._mBeforePrev == ' ') and
+ (self.is_final(self._mPrev)) and (cur != ' ')):
+ # case (3) [-2:space][-1:final letter][cur:not space]
+ self._mFinalCharVisualScore += 1
+ self._mBeforePrev = self._mPrev
+ self._mPrev = cur
+
+ # Forever detecting, till the end or until both model probers return
+ # eNotMe (handled above)
+ return eDetecting
+
+ def get_charset_name(self):
+ # Make the decision: is it Logical or Visual?
+ # If the final letter score distance is dominant enough, rely on it.
+ finalsub = self._mFinalCharLogicalScore - self._mFinalCharVisualScore
+ if finalsub >= MIN_FINAL_CHAR_DISTANCE:
+ return LOGICAL_HEBREW_NAME
+ if finalsub <= -MIN_FINAL_CHAR_DISTANCE:
+ return VISUAL_HEBREW_NAME
+
+ # It's not dominant enough, try to rely on the model scores instead.
+ modelsub = (self._mLogicalProber.get_confidence()
+ - self._mVisualProber.get_confidence())
+ if modelsub > MIN_MODEL_DISTANCE:
+ return LOGICAL_HEBREW_NAME
+ if modelsub < -MIN_MODEL_DISTANCE:
+ return VISUAL_HEBREW_NAME
+
+ # Still no good, back to final letter distance, maybe it'll save the
+ # day.
+ if finalsub < 0.0:
+ return VISUAL_HEBREW_NAME
+
+ # (finalsub > 0 - Logical) or (don't know what to do) default to
+ # Logical.
+ return LOGICAL_HEBREW_NAME
+
+ def get_state(self):
+ # Remain active as long as any of the model probers are active.
+ if (self._mLogicalProber.get_state() == eNotMe) and \
+ (self._mVisualProber.get_state() == eNotMe):
+ return eNotMe
+ return eDetecting
-######################## BEGIN LICENSE BLOCK ########################\r
-# The Original Code is Mozilla Communicator client code.\r
-#\r
-# The Initial Developer of the Original Code is\r
-# Netscape Communications Corporation.\r
-# Portions created by the Initial Developer are Copyright (C) 1998\r
-# the Initial Developer. All Rights Reserved.\r
-#\r
-# Contributor(s):\r
-# Mark Pilgrim - port to Python\r
-#\r
-# This library is free software; you can redistribute it and/or\r
-# modify it under the terms of the GNU Lesser General Public\r
-# License as published by the Free Software Foundation; either\r
-# version 2.1 of the License, or (at your option) any later version.\r
-#\r
-# This library is distributed in the hope that it will be useful,\r
-# but WITHOUT ANY WARRANTY; without even the implied warranty of\r
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU\r
-# Lesser General Public License for more details.\r
-#\r
-# You should have received a copy of the GNU Lesser General Public\r
-# License along with this library; if not, write to the Free Software\r
-# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA\r
-# 02110-1301 USA\r
-######################### END LICENSE BLOCK #########################\r
-\r
-from .compat import wrap_ord\r
-\r
-NUM_OF_CATEGORY = 6\r
-DONT_KNOW = -1\r
-ENOUGH_REL_THRESHOLD = 100\r
-MAX_REL_THRESHOLD = 1000\r
-MINIMUM_DATA_THRESHOLD = 4\r
-\r
-# This is hiragana 2-char sequence table, the number in each cell represents its frequency category\r
-jp2CharContext = (\r
-(0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1),\r
-(2,4,0,4,0,3,0,4,0,3,4,4,4,2,4,3,3,4,3,2,3,3,4,2,3,3,3,2,4,1,4,3,3,1,5,4,3,4,3,4,3,5,3,0,3,5,4,2,0,3,1,0,3,3,0,3,3,0,1,1,0,4,3,0,3,3,0,4,0,2,0,3,5,5,5,5,4,0,4,1,0,3,4),\r
-(0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2),\r
-(0,4,0,5,0,5,0,4,0,4,5,4,4,3,5,3,5,1,5,3,4,3,4,4,3,4,3,3,4,3,5,4,4,3,5,5,3,5,5,5,3,5,5,3,4,5,5,3,1,3,2,0,3,4,0,4,2,0,4,2,1,5,3,2,3,5,0,4,0,2,0,5,4,4,5,4,5,0,4,0,0,4,4),\r
-(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0),\r
-(0,3,0,4,0,3,0,3,0,4,5,4,3,3,3,3,4,3,5,4,4,3,5,4,4,3,4,3,4,4,4,4,5,3,4,4,3,4,5,5,4,5,5,1,4,5,4,3,0,3,3,1,3,3,0,4,4,0,3,3,1,5,3,3,3,5,0,4,0,3,0,4,4,3,4,3,3,0,4,1,1,3,4),\r
-(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0),\r
-(0,4,0,3,0,3,0,4,0,3,4,4,3,2,2,1,2,1,3,1,3,3,3,3,3,4,3,1,3,3,5,3,3,0,4,3,0,5,4,3,3,5,4,4,3,4,4,5,0,1,2,0,1,2,0,2,2,0,1,0,0,5,2,2,1,4,0,3,0,1,0,4,4,3,5,4,3,0,2,1,0,4,3),\r
-(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0),\r
-(0,3,0,5,0,4,0,2,1,4,4,2,4,1,4,2,4,2,4,3,3,3,4,3,3,3,3,1,4,2,3,3,3,1,4,4,1,1,1,4,3,3,2,0,2,4,3,2,0,3,3,0,3,1,1,0,0,0,3,3,0,4,2,2,3,4,0,4,0,3,0,4,4,5,3,4,4,0,3,0,0,1,4),\r
-(1,4,0,4,0,4,0,4,0,3,5,4,4,3,4,3,5,4,3,3,4,3,5,4,4,4,4,3,4,2,4,3,3,1,5,4,3,2,4,5,4,5,5,4,4,5,4,4,0,3,2,2,3,3,0,4,3,1,3,2,1,4,3,3,4,5,0,3,0,2,0,4,5,5,4,5,4,0,4,0,0,5,4),\r
-(0,5,0,5,0,4,0,3,0,4,4,3,4,3,3,3,4,0,4,4,4,3,4,3,4,3,3,1,4,2,4,3,4,0,5,4,1,4,5,4,4,5,3,2,4,3,4,3,2,4,1,3,3,3,2,3,2,0,4,3,3,4,3,3,3,4,0,4,0,3,0,4,5,4,4,4,3,0,4,1,0,1,3),\r
-(0,3,1,4,0,3,0,2,0,3,4,4,3,1,4,2,3,3,4,3,4,3,4,3,4,4,3,2,3,1,5,4,4,1,4,4,3,5,4,4,3,5,5,4,3,4,4,3,1,2,3,1,2,2,0,3,2,0,3,1,0,5,3,3,3,4,3,3,3,3,4,4,4,4,5,4,2,0,3,3,2,4,3),\r
-(0,2,0,3,0,1,0,1,0,0,3,2,0,0,2,0,1,0,2,1,3,3,3,1,2,3,1,0,1,0,4,2,1,1,3,3,0,4,3,3,1,4,3,3,0,3,3,2,0,0,0,0,1,0,0,2,0,0,0,0,0,4,1,0,2,3,2,2,2,1,3,3,3,4,4,3,2,0,3,1,0,3,3),\r
-(0,4,0,4,0,3,0,3,0,4,4,4,3,3,3,3,3,3,4,3,4,2,4,3,4,3,3,2,4,3,4,5,4,1,4,5,3,5,4,5,3,5,4,0,3,5,5,3,1,3,3,2,2,3,0,3,4,1,3,3,2,4,3,3,3,4,0,4,0,3,0,4,5,4,4,5,3,0,4,1,0,3,4),\r
-(0,2,0,3,0,3,0,0,0,2,2,2,1,0,1,0,0,0,3,0,3,0,3,0,1,3,1,0,3,1,3,3,3,1,3,3,3,0,1,3,1,3,4,0,0,3,1,1,0,3,2,0,0,0,0,1,3,0,1,0,0,3,3,2,0,3,0,0,0,0,0,3,4,3,4,3,3,0,3,0,0,2,3),\r
-(2,3,0,3,0,2,0,1,0,3,3,4,3,1,3,1,1,1,3,1,4,3,4,3,3,3,0,0,3,1,5,4,3,1,4,3,2,5,5,4,4,4,4,3,3,4,4,4,0,2,1,1,3,2,0,1,2,0,0,1,0,4,1,3,3,3,0,3,0,1,0,4,4,4,5,5,3,0,2,0,0,4,4),\r
-(0,2,0,1,0,3,1,3,0,2,3,3,3,0,3,1,0,0,3,0,3,2,3,1,3,2,1,1,0,0,4,2,1,0,2,3,1,4,3,2,0,4,4,3,1,3,1,3,0,1,0,0,1,0,0,0,1,0,0,0,0,4,1,1,1,2,0,3,0,0,0,3,4,2,4,3,2,0,1,0,0,3,3),\r
-(0,1,0,4,0,5,0,4,0,2,4,4,2,3,3,2,3,3,5,3,3,3,4,3,4,2,3,0,4,3,3,3,4,1,4,3,2,1,5,5,3,4,5,1,3,5,4,2,0,3,3,0,1,3,0,4,2,0,1,3,1,4,3,3,3,3,0,3,0,1,0,3,4,4,4,5,5,0,3,0,1,4,5),\r
-(0,2,0,3,0,3,0,0,0,2,3,1,3,0,4,0,1,1,3,0,3,4,3,2,3,1,0,3,3,2,3,1,3,0,2,3,0,2,1,4,1,2,2,0,0,3,3,0,0,2,0,0,0,1,0,0,0,0,2,2,0,3,2,1,3,3,0,2,0,2,0,0,3,3,1,2,4,0,3,0,2,2,3),\r
-(2,4,0,5,0,4,0,4,0,2,4,4,4,3,4,3,3,3,1,2,4,3,4,3,4,4,5,0,3,3,3,3,2,0,4,3,1,4,3,4,1,4,4,3,3,4,4,3,1,2,3,0,4,2,0,4,1,0,3,3,0,4,3,3,3,4,0,4,0,2,0,3,5,3,4,5,2,0,3,0,0,4,5),\r
-(0,3,0,4,0,1,0,1,0,1,3,2,2,1,3,0,3,0,2,0,2,0,3,0,2,0,0,0,1,0,1,1,0,0,3,1,0,0,0,4,0,3,1,0,2,1,3,0,0,0,0,0,0,3,0,0,0,0,0,0,0,4,2,2,3,1,0,3,0,0,0,1,4,4,4,3,0,0,4,0,0,1,4),\r
-(1,4,1,5,0,3,0,3,0,4,5,4,4,3,5,3,3,4,4,3,4,1,3,3,3,3,2,1,4,1,5,4,3,1,4,4,3,5,4,4,3,5,4,3,3,4,4,4,0,3,3,1,2,3,0,3,1,0,3,3,0,5,4,4,4,4,4,4,3,3,5,4,4,3,3,5,4,0,3,2,0,4,4),\r
-(0,2,0,3,0,1,0,0,0,1,3,3,3,2,4,1,3,0,3,1,3,0,2,2,1,1,0,0,2,0,4,3,1,0,4,3,0,4,4,4,1,4,3,1,1,3,3,1,0,2,0,0,1,3,0,0,0,0,2,0,0,4,3,2,4,3,5,4,3,3,3,4,3,3,4,3,3,0,2,1,0,3,3),\r
-(0,2,0,4,0,3,0,2,0,2,5,5,3,4,4,4,4,1,4,3,3,0,4,3,4,3,1,3,3,2,4,3,0,3,4,3,0,3,4,4,2,4,4,0,4,5,3,3,2,2,1,1,1,2,0,1,5,0,3,3,2,4,3,3,3,4,0,3,0,2,0,4,4,3,5,5,0,0,3,0,2,3,3),\r
-(0,3,0,4,0,3,0,1,0,3,4,3,3,1,3,3,3,0,3,1,3,0,4,3,3,1,1,0,3,0,3,3,0,0,4,4,0,1,5,4,3,3,5,0,3,3,4,3,0,2,0,1,1,1,0,1,3,0,1,2,1,3,3,2,3,3,0,3,0,1,0,1,3,3,4,4,1,0,1,2,2,1,3),\r
-(0,1,0,4,0,4,0,3,0,1,3,3,3,2,3,1,1,0,3,0,3,3,4,3,2,4,2,0,1,0,4,3,2,0,4,3,0,5,3,3,2,4,4,4,3,3,3,4,0,1,3,0,0,1,0,0,1,0,0,0,0,4,2,3,3,3,0,3,0,0,0,4,4,4,5,3,2,0,3,3,0,3,5),\r
-(0,2,0,3,0,0,0,3,0,1,3,0,2,0,0,0,1,0,3,1,1,3,3,0,0,3,0,0,3,0,2,3,1,0,3,1,0,3,3,2,0,4,2,2,0,2,0,0,0,4,0,0,0,0,0,0,0,0,0,0,0,2,1,2,0,1,0,1,0,0,0,1,3,1,2,0,0,0,1,0,0,1,4),\r
-(0,3,0,3,0,5,0,1,0,2,4,3,1,3,3,2,1,1,5,2,1,0,5,1,2,0,0,0,3,3,2,2,3,2,4,3,0,0,3,3,1,3,3,0,2,5,3,4,0,3,3,0,1,2,0,2,2,0,3,2,0,2,2,3,3,3,0,2,0,1,0,3,4,4,2,5,4,0,3,0,0,3,5),\r
-(0,3,0,3,0,3,0,1,0,3,3,3,3,0,3,0,2,0,2,1,1,0,2,0,1,0,0,0,2,1,0,0,1,0,3,2,0,0,3,3,1,2,3,1,0,3,3,0,0,1,0,0,0,0,0,2,0,0,0,0,0,2,3,1,2,3,0,3,0,1,0,3,2,1,0,4,3,0,1,1,0,3,3),\r
-(0,4,0,5,0,3,0,3,0,4,5,5,4,3,5,3,4,3,5,3,3,2,5,3,4,4,4,3,4,3,4,5,5,3,4,4,3,4,4,5,4,4,4,3,4,5,5,4,2,3,4,2,3,4,0,3,3,1,4,3,2,4,3,3,5,5,0,3,0,3,0,5,5,5,5,4,4,0,4,0,1,4,4),\r
-(0,4,0,4,0,3,0,3,0,3,5,4,4,2,3,2,5,1,3,2,5,1,4,2,3,2,3,3,4,3,3,3,3,2,5,4,1,3,3,5,3,4,4,0,4,4,3,1,1,3,1,0,2,3,0,2,3,0,3,0,0,4,3,1,3,4,0,3,0,2,0,4,4,4,3,4,5,0,4,0,0,3,4),\r
-(0,3,0,3,0,3,1,2,0,3,4,4,3,3,3,0,2,2,4,3,3,1,3,3,3,1,1,0,3,1,4,3,2,3,4,4,2,4,4,4,3,4,4,3,2,4,4,3,1,3,3,1,3,3,0,4,1,0,2,2,1,4,3,2,3,3,5,4,3,3,5,4,4,3,3,0,4,0,3,2,2,4,4),\r
-(0,2,0,1,0,0,0,0,0,1,2,1,3,0,0,0,0,0,2,0,1,2,1,0,0,1,0,0,0,0,3,0,0,1,0,1,1,3,1,0,0,0,1,1,0,1,1,0,0,0,0,0,2,0,0,0,0,0,0,0,0,1,1,2,2,0,3,4,0,0,0,1,1,0,0,1,0,0,0,0,0,1,1),\r
-(0,1,0,0,0,1,0,0,0,0,4,0,4,1,4,0,3,0,4,0,3,0,4,0,3,0,3,0,4,1,5,1,4,0,0,3,0,5,0,5,2,0,1,0,0,0,2,1,4,0,1,3,0,0,3,0,0,3,1,1,4,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0),\r
-(1,4,0,5,0,3,0,2,0,3,5,4,4,3,4,3,5,3,4,3,3,0,4,3,3,3,3,3,3,2,4,4,3,1,3,4,4,5,4,4,3,4,4,1,3,5,4,3,3,3,1,2,2,3,3,1,3,1,3,3,3,5,3,3,4,5,0,3,0,3,0,3,4,3,4,4,3,0,3,0,2,4,3),\r
-(0,1,0,4,0,0,0,0,0,1,4,0,4,1,4,2,4,0,3,0,1,0,1,0,0,0,0,0,2,0,3,1,1,1,0,3,0,0,0,1,2,1,0,0,1,1,1,1,0,1,0,0,0,1,0,0,3,0,0,0,0,3,2,0,2,2,0,1,0,0,0,2,3,2,3,3,0,0,0,0,2,1,0),\r
-(0,5,1,5,0,3,0,3,0,5,4,4,5,1,5,3,3,0,4,3,4,3,5,3,4,3,3,2,4,3,4,3,3,0,3,3,1,4,4,3,4,4,4,3,4,5,5,3,2,3,1,1,3,3,1,3,1,1,3,3,2,4,5,3,3,5,0,4,0,3,0,4,4,3,5,3,3,0,3,4,0,4,3),\r
-(0,5,0,5,0,3,0,2,0,4,4,3,5,2,4,3,3,3,4,4,4,3,5,3,5,3,3,1,4,0,4,3,3,0,3,3,0,4,4,4,4,5,4,3,3,5,5,3,2,3,1,2,3,2,0,1,0,0,3,2,2,4,4,3,1,5,0,4,0,3,0,4,3,1,3,2,1,0,3,3,0,3,3),\r
-(0,4,0,5,0,5,0,4,0,4,5,5,5,3,4,3,3,2,5,4,4,3,5,3,5,3,4,0,4,3,4,4,3,2,4,4,3,4,5,4,4,5,5,0,3,5,5,4,1,3,3,2,3,3,1,3,1,0,4,3,1,4,4,3,4,5,0,4,0,2,0,4,3,4,4,3,3,0,4,0,0,5,5),\r
-(0,4,0,4,0,5,0,1,1,3,3,4,4,3,4,1,3,0,5,1,3,0,3,1,3,1,1,0,3,0,3,3,4,0,4,3,0,4,4,4,3,4,4,0,3,5,4,1,0,3,0,0,2,3,0,3,1,0,3,1,0,3,2,1,3,5,0,3,0,1,0,3,2,3,3,4,4,0,2,2,0,4,4),\r
-(2,4,0,5,0,4,0,3,0,4,5,5,4,3,5,3,5,3,5,3,5,2,5,3,4,3,3,4,3,4,5,3,2,1,5,4,3,2,3,4,5,3,4,1,2,5,4,3,0,3,3,0,3,2,0,2,3,0,4,1,0,3,4,3,3,5,0,3,0,1,0,4,5,5,5,4,3,0,4,2,0,3,5),\r
-(0,5,0,4,0,4,0,2,0,5,4,3,4,3,4,3,3,3,4,3,4,2,5,3,5,3,4,1,4,3,4,4,4,0,3,5,0,4,4,4,4,5,3,1,3,4,5,3,3,3,3,3,3,3,0,2,2,0,3,3,2,4,3,3,3,5,3,4,1,3,3,5,3,2,0,0,0,0,4,3,1,3,3),\r
-(0,1,0,3,0,3,0,1,0,1,3,3,3,2,3,3,3,0,3,0,0,0,3,1,3,0,0,0,2,2,2,3,0,0,3,2,0,1,2,4,1,3,3,0,0,3,3,3,0,1,0,0,2,1,0,0,3,0,3,1,0,3,0,0,1,3,0,2,0,1,0,3,3,1,3,3,0,0,1,1,0,3,3),\r
-(0,2,0,3,0,2,1,4,0,2,2,3,1,1,3,1,1,0,2,0,3,1,2,3,1,3,0,0,1,0,4,3,2,3,3,3,1,4,2,3,3,3,3,1,0,3,1,4,0,1,1,0,1,2,0,1,1,0,1,1,0,3,1,3,2,2,0,1,0,0,0,2,3,3,3,1,0,0,0,0,0,2,3),\r
-(0,5,0,4,0,5,0,2,0,4,5,5,3,3,4,3,3,1,5,4,4,2,4,4,4,3,4,2,4,3,5,5,4,3,3,4,3,3,5,5,4,5,5,1,3,4,5,3,1,4,3,1,3,3,0,3,3,1,4,3,1,4,5,3,3,5,0,4,0,3,0,5,3,3,1,4,3,0,4,0,1,5,3),\r
-(0,5,0,5,0,4,0,2,0,4,4,3,4,3,3,3,3,3,5,4,4,4,4,4,4,5,3,3,5,2,4,4,4,3,4,4,3,3,4,4,5,5,3,3,4,3,4,3,3,4,3,3,3,3,1,2,2,1,4,3,3,5,4,4,3,4,0,4,0,3,0,4,4,4,4,4,1,0,4,2,0,2,4),\r
-(0,4,0,4,0,3,0,1,0,3,5,2,3,0,3,0,2,1,4,2,3,3,4,1,4,3,3,2,4,1,3,3,3,0,3,3,0,0,3,3,3,5,3,3,3,3,3,2,0,2,0,0,2,0,0,2,0,0,1,0,0,3,1,2,2,3,0,3,0,2,0,4,4,3,3,4,1,0,3,0,0,2,4),\r
-(0,0,0,4,0,0,0,0,0,0,1,0,1,0,2,0,0,0,0,0,1,0,2,0,1,0,0,0,0,0,3,1,3,0,3,2,0,0,0,1,0,3,2,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,4,0,2,0,0,0,0,0,0,2),\r
-(0,2,1,3,0,2,0,2,0,3,3,3,3,1,3,1,3,3,3,3,3,3,4,2,2,1,2,1,4,0,4,3,1,3,3,3,2,4,3,5,4,3,3,3,3,3,3,3,0,1,3,0,2,0,0,1,0,0,1,0,0,4,2,0,2,3,0,3,3,0,3,3,4,2,3,1,4,0,1,2,0,2,3),\r
-(0,3,0,3,0,1,0,3,0,2,3,3,3,0,3,1,2,0,3,3,2,3,3,2,3,2,3,1,3,0,4,3,2,0,3,3,1,4,3,3,2,3,4,3,1,3,3,1,1,0,1,1,0,1,0,1,0,1,0,0,0,4,1,1,0,3,0,3,1,0,2,3,3,3,3,3,1,0,0,2,0,3,3),\r
-(0,0,0,0,0,0,0,0,0,0,3,0,2,0,3,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,3,0,3,0,3,1,0,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,2,0,2,3,0,0,0,0,0,0,0,0,3),\r
-(0,2,0,3,1,3,0,3,0,2,3,3,3,1,3,1,3,1,3,1,3,3,3,1,3,0,2,3,1,1,4,3,3,2,3,3,1,2,2,4,1,3,3,0,1,4,2,3,0,1,3,0,3,0,0,1,3,0,2,0,0,3,3,2,1,3,0,3,0,2,0,3,4,4,4,3,1,0,3,0,0,3,3),\r
-(0,2,0,1,0,2,0,0,0,1,3,2,2,1,3,0,1,1,3,0,3,2,3,1,2,0,2,0,1,1,3,3,3,0,3,3,1,1,2,3,2,3,3,1,2,3,2,0,0,1,0,0,0,0,0,0,3,0,1,0,0,2,1,2,1,3,0,3,0,0,0,3,4,4,4,3,2,0,2,0,0,2,4),\r
-(0,0,0,1,0,1,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,2,2,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,3,1,0,0,0,0,0,0,0,3),\r
-(0,3,0,3,0,2,0,3,0,3,3,3,2,3,2,2,2,0,3,1,3,3,3,2,3,3,0,0,3,0,3,2,2,0,2,3,1,4,3,4,3,3,2,3,1,5,4,4,0,3,1,2,1,3,0,3,1,1,2,0,2,3,1,3,1,3,0,3,0,1,0,3,3,4,4,2,1,0,2,1,0,2,4),\r
-(0,1,0,3,0,1,0,2,0,1,4,2,5,1,4,0,2,0,2,1,3,1,4,0,2,1,0,0,2,1,4,1,1,0,3,3,0,5,1,3,2,3,3,1,0,3,2,3,0,1,0,0,0,0,0,0,1,0,0,0,0,4,0,1,0,3,0,2,0,1,0,3,3,3,4,3,3,0,0,0,0,2,3),\r
-(0,0,0,1,0,0,0,0,0,0,2,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,1,0,0,1,0,0,0,0,0,3),\r
-(0,1,0,3,0,4,0,3,0,2,4,3,1,0,3,2,2,1,3,1,2,2,3,1,1,1,2,1,3,0,1,2,0,1,3,2,1,3,0,5,5,1,0,0,1,3,2,1,0,3,0,0,1,0,0,0,0,0,3,4,0,1,1,1,3,2,0,2,0,1,0,2,3,3,1,2,3,0,1,0,1,0,4),\r
-(0,0,0,1,0,3,0,3,0,2,2,1,0,0,4,0,3,0,3,1,3,0,3,0,3,0,1,0,3,0,3,1,3,0,3,3,0,0,1,2,1,1,1,0,1,2,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,2,2,1,2,0,0,2,0,0,0,0,2,3,3,3,3,0,0,0,0,1,4),\r
-(0,0,0,3,0,3,0,0,0,0,3,1,1,0,3,0,1,0,2,0,1,0,0,0,0,0,0,0,1,0,3,0,2,0,2,3,0,0,2,2,3,1,2,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,2,0,0,0,0,2,3),\r
-(2,4,0,5,0,5,0,4,0,3,4,3,3,3,4,3,3,3,4,3,4,4,5,4,5,5,5,2,3,0,5,5,4,1,5,4,3,1,5,4,3,4,4,3,3,4,3,3,0,3,2,0,2,3,0,3,0,0,3,3,0,5,3,2,3,3,0,3,0,3,0,3,4,5,4,5,3,0,4,3,0,3,4),\r
-(0,3,0,3,0,3,0,3,0,3,3,4,3,2,3,2,3,0,4,3,3,3,3,3,3,3,3,0,3,2,4,3,3,1,3,4,3,4,4,4,3,4,4,3,2,4,4,1,0,2,0,0,1,1,0,2,0,0,3,1,0,5,3,2,1,3,0,3,0,1,2,4,3,2,4,3,3,0,3,2,0,4,4),\r
-(0,3,0,3,0,1,0,0,0,1,4,3,3,2,3,1,3,1,4,2,3,2,4,2,3,4,3,0,2,2,3,3,3,0,3,3,3,0,3,4,1,3,3,0,3,4,3,3,0,1,1,0,1,0,0,0,4,0,3,0,0,3,1,2,1,3,0,4,0,1,0,4,3,3,4,3,3,0,2,0,0,3,3),\r
-(0,3,0,4,0,1,0,3,0,3,4,3,3,0,3,3,3,1,3,1,3,3,4,3,3,3,0,0,3,1,5,3,3,1,3,3,2,5,4,3,3,4,5,3,2,5,3,4,0,1,0,0,0,0,0,2,0,0,1,1,0,4,2,2,1,3,0,3,0,2,0,4,4,3,5,3,2,0,1,1,0,3,4),\r
-(0,5,0,4,0,5,0,2,0,4,4,3,3,2,3,3,3,1,4,3,4,1,5,3,4,3,4,0,4,2,4,3,4,1,5,4,0,4,4,4,4,5,4,1,3,5,4,2,1,4,1,1,3,2,0,3,1,0,3,2,1,4,3,3,3,4,0,4,0,3,0,4,4,4,3,3,3,0,4,2,0,3,4),\r
-(1,4,0,4,0,3,0,1,0,3,3,3,1,1,3,3,2,2,3,3,1,0,3,2,2,1,2,0,3,1,2,1,2,0,3,2,0,2,2,3,3,4,3,0,3,3,1,2,0,1,1,3,1,2,0,0,3,0,1,1,0,3,2,2,3,3,0,3,0,0,0,2,3,3,4,3,3,0,1,0,0,1,4),\r
-(0,4,0,4,0,4,0,0,0,3,4,4,3,1,4,2,3,2,3,3,3,1,4,3,4,0,3,0,4,2,3,3,2,2,5,4,2,1,3,4,3,4,3,1,3,3,4,2,0,2,1,0,3,3,0,0,2,0,3,1,0,4,4,3,4,3,0,4,0,1,0,2,4,4,4,4,4,0,3,2,0,3,3),\r
-(0,0,0,1,0,4,0,0,0,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,3,2,0,0,1,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,2),\r
-(0,2,0,3,0,4,0,4,0,1,3,3,3,0,4,0,2,1,2,1,1,1,2,0,3,1,1,0,1,0,3,1,0,0,3,3,2,0,1,1,0,0,0,0,0,1,0,2,0,2,2,0,3,1,0,0,1,0,1,1,0,1,2,0,3,0,0,0,0,1,0,0,3,3,4,3,1,0,1,0,3,0,2),\r
-(0,0,0,3,0,5,0,0,0,0,1,0,2,0,3,1,0,1,3,0,0,0,2,0,0,0,1,0,0,0,1,1,0,0,4,0,0,0,2,3,0,1,4,1,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,1,0,0,0,0,0,0,0,2,0,0,3,0,0,0,0,0,3),\r
-(0,2,0,5,0,5,0,1,0,2,4,3,3,2,5,1,3,2,3,3,3,0,4,1,2,0,3,0,4,0,2,2,1,1,5,3,0,0,1,4,2,3,2,0,3,3,3,2,0,2,4,1,1,2,0,1,1,0,3,1,0,1,3,1,2,3,0,2,0,0,0,1,3,5,4,4,4,0,3,0,0,1,3),\r
-(0,4,0,5,0,4,0,4,0,4,5,4,3,3,4,3,3,3,4,3,4,4,5,3,4,5,4,2,4,2,3,4,3,1,4,4,1,3,5,4,4,5,5,4,4,5,5,5,2,3,3,1,4,3,1,3,3,0,3,3,1,4,3,4,4,4,0,3,0,4,0,3,3,4,4,5,0,0,4,3,0,4,5),\r
-(0,4,0,4,0,3,0,3,0,3,4,4,4,3,3,2,4,3,4,3,4,3,5,3,4,3,2,1,4,2,4,4,3,1,3,4,2,4,5,5,3,4,5,4,1,5,4,3,0,3,2,2,3,2,1,3,1,0,3,3,3,5,3,3,3,5,4,4,2,3,3,4,3,3,3,2,1,0,3,2,1,4,3),\r
-(0,4,0,5,0,4,0,3,0,3,5,5,3,2,4,3,4,0,5,4,4,1,4,4,4,3,3,3,4,3,5,5,2,3,3,4,1,2,5,5,3,5,5,2,3,5,5,4,0,3,2,0,3,3,1,1,5,1,4,1,0,4,3,2,3,5,0,4,0,3,0,5,4,3,4,3,0,0,4,1,0,4,4),\r
-(1,3,0,4,0,2,0,2,0,2,5,5,3,3,3,3,3,0,4,2,3,4,4,4,3,4,0,0,3,4,5,4,3,3,3,3,2,5,5,4,5,5,5,4,3,5,5,5,1,3,1,0,1,0,0,3,2,0,4,2,0,5,2,3,2,4,1,3,0,3,0,4,5,4,5,4,3,0,4,2,0,5,4),\r
-(0,3,0,4,0,5,0,3,0,3,4,4,3,2,3,2,3,3,3,3,3,2,4,3,3,2,2,0,3,3,3,3,3,1,3,3,3,0,4,4,3,4,4,1,1,4,4,2,0,3,1,0,1,1,0,4,1,0,2,3,1,3,3,1,3,4,0,3,0,1,0,3,1,3,0,0,1,0,2,0,0,4,4),\r
-(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0),\r
-(0,3,0,3,0,2,0,3,0,1,5,4,3,3,3,1,4,2,1,2,3,4,4,2,4,4,5,0,3,1,4,3,4,0,4,3,3,3,2,3,2,5,3,4,3,2,2,3,0,0,3,0,2,1,0,1,2,0,0,0,0,2,1,1,3,1,0,2,0,4,0,3,4,4,4,5,2,0,2,0,0,1,3),\r
-(0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,1,1,0,0,1,1,0,0,0,4,2,1,1,0,1,0,3,2,0,0,3,1,1,1,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,1,0,0,0,2,0,0,0,1,4,0,4,2,1,0,0,0,0,0,1),\r
-(0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,1,0,0,0,0,0,0,1,0,1,0,0,0,0,3,1,0,0,0,2,0,2,1,0,0,1,2,1,0,1,1,0,0,3,0,0,0,0,0,0,0,0,0,0,0,1,3,1,0,0,0,0,0,1,0,0,2,1,0,0,0,0,0,0,0,0,2),\r
-(0,4,0,4,0,4,0,3,0,4,4,3,4,2,4,3,2,0,4,4,4,3,5,3,5,3,3,2,4,2,4,3,4,3,1,4,0,2,3,4,4,4,3,3,3,4,4,4,3,4,1,3,4,3,2,1,2,1,3,3,3,4,4,3,3,5,0,4,0,3,0,4,3,3,3,2,1,0,3,0,0,3,3),\r
-(0,4,0,3,0,3,0,3,0,3,5,5,3,3,3,3,4,3,4,3,3,3,4,4,4,3,3,3,3,4,3,5,3,3,1,3,2,4,5,5,5,5,4,3,4,5,5,3,2,2,3,3,3,3,2,3,3,1,2,3,2,4,3,3,3,4,0,4,0,2,0,4,3,2,2,1,2,0,3,0,0,4,1),\r
-)\r
-\r
-class JapaneseContextAnalysis:\r
- def __init__(self):\r
- self.reset()\r
-\r
- def reset(self):\r
- self._mTotalRel = 0 # total sequence received\r
- # category counters, each interger counts sequence in its category\r
- self._mRelSample = [0] * NUM_OF_CATEGORY\r
- # if last byte in current buffer is not the last byte of a character,\r
- # we need to know how many bytes to skip in next buffer\r
- self._mNeedToSkipCharNum = 0\r
- self._mLastCharOrder = -1 # The order of previous char\r
- # If this flag is set to True, detection is done and conclusion has\r
- # been made\r
- self._mDone = False\r
-\r
- def feed(self, aBuf, aLen):\r
- if self._mDone:\r
- return\r
-\r
- # The buffer we got is byte oriented, and a character may span in more than one\r
- # buffers. In case the last one or two byte in last buffer is not\r
- # complete, we record how many byte needed to complete that character\r
- # and skip these bytes here. We can choose to record those bytes as\r
- # well and analyse the character once it is complete, but since a\r
- # character will not make much difference, by simply skipping\r
- # this character will simply our logic and improve performance.\r
- i = self._mNeedToSkipCharNum\r
- while i < aLen:\r
- order, charLen = self.get_order(aBuf[i:i + 2])\r
- i += charLen\r
- if i > aLen:\r
- self._mNeedToSkipCharNum = i - aLen\r
- self._mLastCharOrder = -1\r
- else:\r
- if (order != -1) and (self._mLastCharOrder != -1):\r
- self._mTotalRel += 1\r
- if self._mTotalRel > MAX_REL_THRESHOLD:\r
- self._mDone = True\r
- break\r
- self._mRelSample[jp2CharContext[self._mLastCharOrder][order]] += 1\r
- self._mLastCharOrder = order\r
-\r
- def got_enough_data(self):\r
- return self._mTotalRel > ENOUGH_REL_THRESHOLD\r
-\r
- def get_confidence(self):\r
- # This is just one way to calculate confidence. It works well for me.\r
- if self._mTotalRel > MINIMUM_DATA_THRESHOLD:\r
- return float(self._mTotalRel - self._mRelSample[0]) / self._mTotalRel\r
- else:\r
- return DONT_KNOW\r
-\r
- def get_order(self, aBuf):\r
- return -1, 1\r
-\r
-class SJISContextAnalysis(JapaneseContextAnalysis):\r
- def get_order(self, aBuf):\r
- if not aBuf:\r
- return -1, 1\r
- # find out current char's byte length\r
- first_char = wrap_ord(aBuf[0])\r
- if ((0x81 <= first_char <= 0x9F) or (0xE0 <= first_char <= 0xFC)):\r
- charLen = 2\r
- else:\r
- charLen = 1\r
-\r
- # return its order if it is hiragana\r
- if len(aBuf) > 1:\r
- second_char = wrap_ord(aBuf[1])\r
- if (first_char == 202) and (0x9F <= second_char <= 0xF1):\r
- return second_char - 0x9F, charLen\r
-\r
- return -1, charLen\r
-\r
-class EUCJPContextAnalysis(JapaneseContextAnalysis):\r
- def get_order(self, aBuf):\r
- if not aBuf:\r
- return -1, 1\r
- # find out current char's byte length\r
- first_char = wrap_ord(aBuf[0])\r
- if (first_char == 0x8E) or (0xA1 <= first_char <= 0xFE):\r
- charLen = 2\r
- elif first_char == 0x8F:\r
- charLen = 3\r
- else:\r
- charLen = 1\r
-\r
- # return its order if it is hiragana\r
- if len(aBuf) > 1:\r
- second_char = wrap_ord(aBuf[1])\r
- if (first_char == 0xA4) and (0xA1 <= second_char <= 0xF3):\r
- return second_char - 0xA1, charLen\r
-\r
- return -1, charLen\r
-\r
-# flake8: noqa\r
+######################## BEGIN LICENSE BLOCK ########################
+# The Original Code is Mozilla Communicator client code.
+#
+# The Initial Developer of the Original Code is
+# Netscape Communications Corporation.
+# Portions created by the Initial Developer are Copyright (C) 1998
+# the Initial Developer. All Rights Reserved.
+#
+# Contributor(s):
+# Mark Pilgrim - port to Python
+#
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
+# 02110-1301 USA
+######################### END LICENSE BLOCK #########################
+
+from .compat import wrap_ord
+
+NUM_OF_CATEGORY = 6
+DONT_KNOW = -1
+ENOUGH_REL_THRESHOLD = 100
+MAX_REL_THRESHOLD = 1000
+MINIMUM_DATA_THRESHOLD = 4
+
+# This is hiragana 2-char sequence table, the number in each cell represents its frequency category
+jp2CharContext = (
+(0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1),
+(2,4,0,4,0,3,0,4,0,3,4,4,4,2,4,3,3,4,3,2,3,3,4,2,3,3,3,2,4,1,4,3,3,1,5,4,3,4,3,4,3,5,3,0,3,5,4,2,0,3,1,0,3,3,0,3,3,0,1,1,0,4,3,0,3,3,0,4,0,2,0,3,5,5,5,5,4,0,4,1,0,3,4),
+(0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2),
+(0,4,0,5,0,5,0,4,0,4,5,4,4,3,5,3,5,1,5,3,4,3,4,4,3,4,3,3,4,3,5,4,4,3,5,5,3,5,5,5,3,5,5,3,4,5,5,3,1,3,2,0,3,4,0,4,2,0,4,2,1,5,3,2,3,5,0,4,0,2,0,5,4,4,5,4,5,0,4,0,0,4,4),
+(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0),
+(0,3,0,4,0,3,0,3,0,4,5,4,3,3,3,3,4,3,5,4,4,3,5,4,4,3,4,3,4,4,4,4,5,3,4,4,3,4,5,5,4,5,5,1,4,5,4,3,0,3,3,1,3,3,0,4,4,0,3,3,1,5,3,3,3,5,0,4,0,3,0,4,4,3,4,3,3,0,4,1,1,3,4),
+(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0),
+(0,4,0,3,0,3,0,4,0,3,4,4,3,2,2,1,2,1,3,1,3,3,3,3,3,4,3,1,3,3,5,3,3,0,4,3,0,5,4,3,3,5,4,4,3,4,4,5,0,1,2,0,1,2,0,2,2,0,1,0,0,5,2,2,1,4,0,3,0,1,0,4,4,3,5,4,3,0,2,1,0,4,3),
+(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0),
+(0,3,0,5,0,4,0,2,1,4,4,2,4,1,4,2,4,2,4,3,3,3,4,3,3,3,3,1,4,2,3,3,3,1,4,4,1,1,1,4,3,3,2,0,2,4,3,2,0,3,3,0,3,1,1,0,0,0,3,3,0,4,2,2,3,4,0,4,0,3,0,4,4,5,3,4,4,0,3,0,0,1,4),
+(1,4,0,4,0,4,0,4,0,3,5,4,4,3,4,3,5,4,3,3,4,3,5,4,4,4,4,3,4,2,4,3,3,1,5,4,3,2,4,5,4,5,5,4,4,5,4,4,0,3,2,2,3,3,0,4,3,1,3,2,1,4,3,3,4,5,0,3,0,2,0,4,5,5,4,5,4,0,4,0,0,5,4),
+(0,5,0,5,0,4,0,3,0,4,4,3,4,3,3,3,4,0,4,4,4,3,4,3,4,3,3,1,4,2,4,3,4,0,5,4,1,4,5,4,4,5,3,2,4,3,4,3,2,4,1,3,3,3,2,3,2,0,4,3,3,4,3,3,3,4,0,4,0,3,0,4,5,4,4,4,3,0,4,1,0,1,3),
+(0,3,1,4,0,3,0,2,0,3,4,4,3,1,4,2,3,3,4,3,4,3,4,3,4,4,3,2,3,1,5,4,4,1,4,4,3,5,4,4,3,5,5,4,3,4,4,3,1,2,3,1,2,2,0,3,2,0,3,1,0,5,3,3,3,4,3,3,3,3,4,4,4,4,5,4,2,0,3,3,2,4,3),
+(0,2,0,3,0,1,0,1,0,0,3,2,0,0,2,0,1,0,2,1,3,3,3,1,2,3,1,0,1,0,4,2,1,1,3,3,0,4,3,3,1,4,3,3,0,3,3,2,0,0,0,0,1,0,0,2,0,0,0,0,0,4,1,0,2,3,2,2,2,1,3,3,3,4,4,3,2,0,3,1,0,3,3),
+(0,4,0,4,0,3,0,3,0,4,4,4,3,3,3,3,3,3,4,3,4,2,4,3,4,3,3,2,4,3,4,5,4,1,4,5,3,5,4,5,3,5,4,0,3,5,5,3,1,3,3,2,2,3,0,3,4,1,3,3,2,4,3,3,3,4,0,4,0,3,0,4,5,4,4,5,3,0,4,1,0,3,4),
+(0,2,0,3,0,3,0,0,0,2,2,2,1,0,1,0,0,0,3,0,3,0,3,0,1,3,1,0,3,1,3,3,3,1,3,3,3,0,1,3,1,3,4,0,0,3,1,1,0,3,2,0,0,0,0,1,3,0,1,0,0,3,3,2,0,3,0,0,0,0,0,3,4,3,4,3,3,0,3,0,0,2,3),
+(2,3,0,3,0,2,0,1,0,3,3,4,3,1,3,1,1,1,3,1,4,3,4,3,3,3,0,0,3,1,5,4,3,1,4,3,2,5,5,4,4,4,4,3,3,4,4,4,0,2,1,1,3,2,0,1,2,0,0,1,0,4,1,3,3,3,0,3,0,1,0,4,4,4,5,5,3,0,2,0,0,4,4),
+(0,2,0,1,0,3,1,3,0,2,3,3,3,0,3,1,0,0,3,0,3,2,3,1,3,2,1,1,0,0,4,2,1,0,2,3,1,4,3,2,0,4,4,3,1,3,1,3,0,1,0,0,1,0,0,0,1,0,0,0,0,4,1,1,1,2,0,3,0,0,0,3,4,2,4,3,2,0,1,0,0,3,3),
+(0,1,0,4,0,5,0,4,0,2,4,4,2,3,3,2,3,3,5,3,3,3,4,3,4,2,3,0,4,3,3,3,4,1,4,3,2,1,5,5,3,4,5,1,3,5,4,2,0,3,3,0,1,3,0,4,2,0,1,3,1,4,3,3,3,3,0,3,0,1,0,3,4,4,4,5,5,0,3,0,1,4,5),
+(0,2,0,3,0,3,0,0,0,2,3,1,3,0,4,0,1,1,3,0,3,4,3,2,3,1,0,3,3,2,3,1,3,0,2,3,0,2,1,4,1,2,2,0,0,3,3,0,0,2,0,0,0,1,0,0,0,0,2,2,0,3,2,1,3,3,0,2,0,2,0,0,3,3,1,2,4,0,3,0,2,2,3),
+(2,4,0,5,0,4,0,4,0,2,4,4,4,3,4,3,3,3,1,2,4,3,4,3,4,4,5,0,3,3,3,3,2,0,4,3,1,4,3,4,1,4,4,3,3,4,4,3,1,2,3,0,4,2,0,4,1,0,3,3,0,4,3,3,3,4,0,4,0,2,0,3,5,3,4,5,2,0,3,0,0,4,5),
+(0,3,0,4,0,1,0,1,0,1,3,2,2,1,3,0,3,0,2,0,2,0,3,0,2,0,0,0,1,0,1,1,0,0,3,1,0,0,0,4,0,3,1,0,2,1,3,0,0,0,0,0,0,3,0,0,0,0,0,0,0,4,2,2,3,1,0,3,0,0,0,1,4,4,4,3,0,0,4,0,0,1,4),
+(1,4,1,5,0,3,0,3,0,4,5,4,4,3,5,3,3,4,4,3,4,1,3,3,3,3,2,1,4,1,5,4,3,1,4,4,3,5,4,4,3,5,4,3,3,4,4,4,0,3,3,1,2,3,0,3,1,0,3,3,0,5,4,4,4,4,4,4,3,3,5,4,4,3,3,5,4,0,3,2,0,4,4),
+(0,2,0,3,0,1,0,0,0,1,3,3,3,2,4,1,3,0,3,1,3,0,2,2,1,1,0,0,2,0,4,3,1,0,4,3,0,4,4,4,1,4,3,1,1,3,3,1,0,2,0,0,1,3,0,0,0,0,2,0,0,4,3,2,4,3,5,4,3,3,3,4,3,3,4,3,3,0,2,1,0,3,3),
+(0,2,0,4,0,3,0,2,0,2,5,5,3,4,4,4,4,1,4,3,3,0,4,3,4,3,1,3,3,2,4,3,0,3,4,3,0,3,4,4,2,4,4,0,4,5,3,3,2,2,1,1,1,2,0,1,5,0,3,3,2,4,3,3,3,4,0,3,0,2,0,4,4,3,5,5,0,0,3,0,2,3,3),
+(0,3,0,4,0,3,0,1,0,3,4,3,3,1,3,3,3,0,3,1,3,0,4,3,3,1,1,0,3,0,3,3,0,0,4,4,0,1,5,4,3,3,5,0,3,3,4,3,0,2,0,1,1,1,0,1,3,0,1,2,1,3,3,2,3,3,0,3,0,1,0,1,3,3,4,4,1,0,1,2,2,1,3),
+(0,1,0,4,0,4,0,3,0,1,3,3,3,2,3,1,1,0,3,0,3,3,4,3,2,4,2,0,1,0,4,3,2,0,4,3,0,5,3,3,2,4,4,4,3,3,3,4,0,1,3,0,0,1,0,0,1,0,0,0,0,4,2,3,3,3,0,3,0,0,0,4,4,4,5,3,2,0,3,3,0,3,5),
+(0,2,0,3,0,0,0,3,0,1,3,0,2,0,0,0,1,0,3,1,1,3,3,0,0,3,0,0,3,0,2,3,1,0,3,1,0,3,3,2,0,4,2,2,0,2,0,0,0,4,0,0,0,0,0,0,0,0,0,0,0,2,1,2,0,1,0,1,0,0,0,1,3,1,2,0,0,0,1,0,0,1,4),
+(0,3,0,3,0,5,0,1,0,2,4,3,1,3,3,2,1,1,5,2,1,0,5,1,2,0,0,0,3,3,2,2,3,2,4,3,0,0,3,3,1,3,3,0,2,5,3,4,0,3,3,0,1,2,0,2,2,0,3,2,0,2,2,3,3,3,0,2,0,1,0,3,4,4,2,5,4,0,3,0,0,3,5),
+(0,3,0,3,0,3,0,1,0,3,3,3,3,0,3,0,2,0,2,1,1,0,2,0,1,0,0,0,2,1,0,0,1,0,3,2,0,0,3,3,1,2,3,1,0,3,3,0,0,1,0,0,0,0,0,2,0,0,0,0,0,2,3,1,2,3,0,3,0,1,0,3,2,1,0,4,3,0,1,1,0,3,3),
+(0,4,0,5,0,3,0,3,0,4,5,5,4,3,5,3,4,3,5,3,3,2,5,3,4,4,4,3,4,3,4,5,5,3,4,4,3,4,4,5,4,4,4,3,4,5,5,4,2,3,4,2,3,4,0,3,3,1,4,3,2,4,3,3,5,5,0,3,0,3,0,5,5,5,5,4,4,0,4,0,1,4,4),
+(0,4,0,4,0,3,0,3,0,3,5,4,4,2,3,2,5,1,3,2,5,1,4,2,3,2,3,3,4,3,3,3,3,2,5,4,1,3,3,5,3,4,4,0,4,4,3,1,1,3,1,0,2,3,0,2,3,0,3,0,0,4,3,1,3,4,0,3,0,2,0,4,4,4,3,4,5,0,4,0,0,3,4),
+(0,3,0,3,0,3,1,2,0,3,4,4,3,3,3,0,2,2,4,3,3,1,3,3,3,1,1,0,3,1,4,3,2,3,4,4,2,4,4,4,3,4,4,3,2,4,4,3,1,3,3,1,3,3,0,4,1,0,2,2,1,4,3,2,3,3,5,4,3,3,5,4,4,3,3,0,4,0,3,2,2,4,4),
+(0,2,0,1,0,0,0,0,0,1,2,1,3,0,0,0,0,0,2,0,1,2,1,0,0,1,0,0,0,0,3,0,0,1,0,1,1,3,1,0,0,0,1,1,0,1,1,0,0,0,0,0,2,0,0,0,0,0,0,0,0,1,1,2,2,0,3,4,0,0,0,1,1,0,0,1,0,0,0,0,0,1,1),
+(0,1,0,0,0,1,0,0,0,0,4,0,4,1,4,0,3,0,4,0,3,0,4,0,3,0,3,0,4,1,5,1,4,0,0,3,0,5,0,5,2,0,1,0,0,0,2,1,4,0,1,3,0,0,3,0,0,3,1,1,4,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0),
+(1,4,0,5,0,3,0,2,0,3,5,4,4,3,4,3,5,3,4,3,3,0,4,3,3,3,3,3,3,2,4,4,3,1,3,4,4,5,4,4,3,4,4,1,3,5,4,3,3,3,1,2,2,3,3,1,3,1,3,3,3,5,3,3,4,5,0,3,0,3,0,3,4,3,4,4,3,0,3,0,2,4,3),
+(0,1,0,4,0,0,0,0,0,1,4,0,4,1,4,2,4,0,3,0,1,0,1,0,0,0,0,0,2,0,3,1,1,1,0,3,0,0,0,1,2,1,0,0,1,1,1,1,0,1,0,0,0,1,0,0,3,0,0,0,0,3,2,0,2,2,0,1,0,0,0,2,3,2,3,3,0,0,0,0,2,1,0),
+(0,5,1,5,0,3,0,3,0,5,4,4,5,1,5,3,3,0,4,3,4,3,5,3,4,3,3,2,4,3,4,3,3,0,3,3,1,4,4,3,4,4,4,3,4,5,5,3,2,3,1,1,3,3,1,3,1,1,3,3,2,4,5,3,3,5,0,4,0,3,0,4,4,3,5,3,3,0,3,4,0,4,3),
+(0,5,0,5,0,3,0,2,0,4,4,3,5,2,4,3,3,3,4,4,4,3,5,3,5,3,3,1,4,0,4,3,3,0,3,3,0,4,4,4,4,5,4,3,3,5,5,3,2,3,1,2,3,2,0,1,0,0,3,2,2,4,4,3,1,5,0,4,0,3,0,4,3,1,3,2,1,0,3,3,0,3,3),
+(0,4,0,5,0,5,0,4,0,4,5,5,5,3,4,3,3,2,5,4,4,3,5,3,5,3,4,0,4,3,4,4,3,2,4,4,3,4,5,4,4,5,5,0,3,5,5,4,1,3,3,2,3,3,1,3,1,0,4,3,1,4,4,3,4,5,0,4,0,2,0,4,3,4,4,3,3,0,4,0,0,5,5),
+(0,4,0,4,0,5,0,1,1,3,3,4,4,3,4,1,3,0,5,1,3,0,3,1,3,1,1,0,3,0,3,3,4,0,4,3,0,4,4,4,3,4,4,0,3,5,4,1,0,3,0,0,2,3,0,3,1,0,3,1,0,3,2,1,3,5,0,3,0,1,0,3,2,3,3,4,4,0,2,2,0,4,4),
+(2,4,0,5,0,4,0,3,0,4,5,5,4,3,5,3,5,3,5,3,5,2,5,3,4,3,3,4,3,4,5,3,2,1,5,4,3,2,3,4,5,3,4,1,2,5,4,3,0,3,3,0,3,2,0,2,3,0,4,1,0,3,4,3,3,5,0,3,0,1,0,4,5,5,5,4,3,0,4,2,0,3,5),
+(0,5,0,4,0,4,0,2,0,5,4,3,4,3,4,3,3,3,4,3,4,2,5,3,5,3,4,1,4,3,4,4,4,0,3,5,0,4,4,4,4,5,3,1,3,4,5,3,3,3,3,3,3,3,0,2,2,0,3,3,2,4,3,3,3,5,3,4,1,3,3,5,3,2,0,0,0,0,4,3,1,3,3),
+(0,1,0,3,0,3,0,1,0,1,3,3,3,2,3,3,3,0,3,0,0,0,3,1,3,0,0,0,2,2,2,3,0,0,3,2,0,1,2,4,1,3,3,0,0,3,3,3,0,1,0,0,2,1,0,0,3,0,3,1,0,3,0,0,1,3,0,2,0,1,0,3,3,1,3,3,0,0,1,1,0,3,3),
+(0,2,0,3,0,2,1,4,0,2,2,3,1,1,3,1,1,0,2,0,3,1,2,3,1,3,0,0,1,0,4,3,2,3,3,3,1,4,2,3,3,3,3,1,0,3,1,4,0,1,1,0,1,2,0,1,1,0,1,1,0,3,1,3,2,2,0,1,0,0,0,2,3,3,3,1,0,0,0,0,0,2,3),
+(0,5,0,4,0,5,0,2,0,4,5,5,3,3,4,3,3,1,5,4,4,2,4,4,4,3,4,2,4,3,5,5,4,3,3,4,3,3,5,5,4,5,5,1,3,4,5,3,1,4,3,1,3,3,0,3,3,1,4,3,1,4,5,3,3,5,0,4,0,3,0,5,3,3,1,4,3,0,4,0,1,5,3),
+(0,5,0,5,0,4,0,2,0,4,4,3,4,3,3,3,3,3,5,4,4,4,4,4,4,5,3,3,5,2,4,4,4,3,4,4,3,3,4,4,5,5,3,3,4,3,4,3,3,4,3,3,3,3,1,2,2,1,4,3,3,5,4,4,3,4,0,4,0,3,0,4,4,4,4,4,1,0,4,2,0,2,4),
+(0,4,0,4,0,3,0,1,0,3,5,2,3,0,3,0,2,1,4,2,3,3,4,1,4,3,3,2,4,1,3,3,3,0,3,3,0,0,3,3,3,5,3,3,3,3,3,2,0,2,0,0,2,0,0,2,0,0,1,0,0,3,1,2,2,3,0,3,0,2,0,4,4,3,3,4,1,0,3,0,0,2,4),
+(0,0,0,4,0,0,0,0,0,0,1,0,1,0,2,0,0,0,0,0,1,0,2,0,1,0,0,0,0,0,3,1,3,0,3,2,0,0,0,1,0,3,2,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,4,0,2,0,0,0,0,0,0,2),
+(0,2,1,3,0,2,0,2,0,3,3,3,3,1,3,1,3,3,3,3,3,3,4,2,2,1,2,1,4,0,4,3,1,3,3,3,2,4,3,5,4,3,3,3,3,3,3,3,0,1,3,0,2,0,0,1,0,0,1,0,0,4,2,0,2,3,0,3,3,0,3,3,4,2,3,1,4,0,1,2,0,2,3),
+(0,3,0,3,0,1,0,3,0,2,3,3,3,0,3,1,2,0,3,3,2,3,3,2,3,2,3,1,3,0,4,3,2,0,3,3,1,4,3,3,2,3,4,3,1,3,3,1,1,0,1,1,0,1,0,1,0,1,0,0,0,4,1,1,0,3,0,3,1,0,2,3,3,3,3,3,1,0,0,2,0,3,3),
+(0,0,0,0,0,0,0,0,0,0,3,0,2,0,3,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,3,0,3,0,3,1,0,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,2,0,2,3,0,0,0,0,0,0,0,0,3),
+(0,2,0,3,1,3,0,3,0,2,3,3,3,1,3,1,3,1,3,1,3,3,3,1,3,0,2,3,1,1,4,3,3,2,3,3,1,2,2,4,1,3,3,0,1,4,2,3,0,1,3,0,3,0,0,1,3,0,2,0,0,3,3,2,1,3,0,3,0,2,0,3,4,4,4,3,1,0,3,0,0,3,3),
+(0,2,0,1,0,2,0,0,0,1,3,2,2,1,3,0,1,1,3,0,3,2,3,1,2,0,2,0,1,1,3,3,3,0,3,3,1,1,2,3,2,3,3,1,2,3,2,0,0,1,0,0,0,0,0,0,3,0,1,0,0,2,1,2,1,3,0,3,0,0,0,3,4,4,4,3,2,0,2,0,0,2,4),
+(0,0,0,1,0,1,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,2,2,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,3,1,0,0,0,0,0,0,0,3),
+(0,3,0,3,0,2,0,3,0,3,3,3,2,3,2,2,2,0,3,1,3,3,3,2,3,3,0,0,3,0,3,2,2,0,2,3,1,4,3,4,3,3,2,3,1,5,4,4,0,3,1,2,1,3,0,3,1,1,2,0,2,3,1,3,1,3,0,3,0,1,0,3,3,4,4,2,1,0,2,1,0,2,4),
+(0,1,0,3,0,1,0,2,0,1,4,2,5,1,4,0,2,0,2,1,3,1,4,0,2,1,0,0,2,1,4,1,1,0,3,3,0,5,1,3,2,3,3,1,0,3,2,3,0,1,0,0,0,0,0,0,1,0,0,0,0,4,0,1,0,3,0,2,0,1,0,3,3,3,4,3,3,0,0,0,0,2,3),
+(0,0,0,1,0,0,0,0,0,0,2,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,1,0,0,1,0,0,0,0,0,3),
+(0,1,0,3,0,4,0,3,0,2,4,3,1,0,3,2,2,1,3,1,2,2,3,1,1,1,2,1,3,0,1,2,0,1,3,2,1,3,0,5,5,1,0,0,1,3,2,1,0,3,0,0,1,0,0,0,0,0,3,4,0,1,1,1,3,2,0,2,0,1,0,2,3,3,1,2,3,0,1,0,1,0,4),
+(0,0,0,1,0,3,0,3,0,2,2,1,0,0,4,0,3,0,3,1,3,0,3,0,3,0,1,0,3,0,3,1,3,0,3,3,0,0,1,2,1,1,1,0,1,2,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,2,2,1,2,0,0,2,0,0,0,0,2,3,3,3,3,0,0,0,0,1,4),
+(0,0,0,3,0,3,0,0,0,0,3,1,1,0,3,0,1,0,2,0,1,0,0,0,0,0,0,0,1,0,3,0,2,0,2,3,0,0,2,2,3,1,2,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,2,0,0,0,0,2,3),
+(2,4,0,5,0,5,0,4,0,3,4,3,3,3,4,3,3,3,4,3,4,4,5,4,5,5,5,2,3,0,5,5,4,1,5,4,3,1,5,4,3,4,4,3,3,4,3,3,0,3,2,0,2,3,0,3,0,0,3,3,0,5,3,2,3,3,0,3,0,3,0,3,4,5,4,5,3,0,4,3,0,3,4),
+(0,3,0,3,0,3,0,3,0,3,3,4,3,2,3,2,3,0,4,3,3,3,3,3,3,3,3,0,3,2,4,3,3,1,3,4,3,4,4,4,3,4,4,3,2,4,4,1,0,2,0,0,1,1,0,2,0,0,3,1,0,5,3,2,1,3,0,3,0,1,2,4,3,2,4,3,3,0,3,2,0,4,4),
+(0,3,0,3,0,1,0,0,0,1,4,3,3,2,3,1,3,1,4,2,3,2,4,2,3,4,3,0,2,2,3,3,3,0,3,3,3,0,3,4,1,3,3,0,3,4,3,3,0,1,1,0,1,0,0,0,4,0,3,0,0,3,1,2,1,3,0,4,0,1,0,4,3,3,4,3,3,0,2,0,0,3,3),
+(0,3,0,4,0,1,0,3,0,3,4,3,3,0,3,3,3,1,3,1,3,3,4,3,3,3,0,0,3,1,5,3,3,1,3,3,2,5,4,3,3,4,5,3,2,5,3,4,0,1,0,0,0,0,0,2,0,0,1,1,0,4,2,2,1,3,0,3,0,2,0,4,4,3,5,3,2,0,1,1,0,3,4),
+(0,5,0,4,0,5,0,2,0,4,4,3,3,2,3,3,3,1,4,3,4,1,5,3,4,3,4,0,4,2,4,3,4,1,5,4,0,4,4,4,4,5,4,1,3,5,4,2,1,4,1,1,3,2,0,3,1,0,3,2,1,4,3,3,3,4,0,4,0,3,0,4,4,4,3,3,3,0,4,2,0,3,4),
+(1,4,0,4,0,3,0,1,0,3,3,3,1,1,3,3,2,2,3,3,1,0,3,2,2,1,2,0,3,1,2,1,2,0,3,2,0,2,2,3,3,4,3,0,3,3,1,2,0,1,1,3,1,2,0,0,3,0,1,1,0,3,2,2,3,3,0,3,0,0,0,2,3,3,4,3,3,0,1,0,0,1,4),
+(0,4,0,4,0,4,0,0,0,3,4,4,3,1,4,2,3,2,3,3,3,1,4,3,4,0,3,0,4,2,3,3,2,2,5,4,2,1,3,4,3,4,3,1,3,3,4,2,0,2,1,0,3,3,0,0,2,0,3,1,0,4,4,3,4,3,0,4,0,1,0,2,4,4,4,4,4,0,3,2,0,3,3),
+(0,0,0,1,0,4,0,0,0,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,3,2,0,0,1,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,2),
+(0,2,0,3,0,4,0,4,0,1,3,3,3,0,4,0,2,1,2,1,1,1,2,0,3,1,1,0,1,0,3,1,0,0,3,3,2,0,1,1,0,0,0,0,0,1,0,2,0,2,2,0,3,1,0,0,1,0,1,1,0,1,2,0,3,0,0,0,0,1,0,0,3,3,4,3,1,0,1,0,3,0,2),
+(0,0,0,3,0,5,0,0,0,0,1,0,2,0,3,1,0,1,3,0,0,0,2,0,0,0,1,0,0,0,1,1,0,0,4,0,0,0,2,3,0,1,4,1,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,1,0,0,0,0,0,0,0,2,0,0,3,0,0,0,0,0,3),
+(0,2,0,5,0,5,0,1,0,2,4,3,3,2,5,1,3,2,3,3,3,0,4,1,2,0,3,0,4,0,2,2,1,1,5,3,0,0,1,4,2,3,2,0,3,3,3,2,0,2,4,1,1,2,0,1,1,0,3,1,0,1,3,1,2,3,0,2,0,0,0,1,3,5,4,4,4,0,3,0,0,1,3),
+(0,4,0,5,0,4,0,4,0,4,5,4,3,3,4,3,3,3,4,3,4,4,5,3,4,5,4,2,4,2,3,4,3,1,4,4,1,3,5,4,4,5,5,4,4,5,5,5,2,3,3,1,4,3,1,3,3,0,3,3,1,4,3,4,4,4,0,3,0,4,0,3,3,4,4,5,0,0,4,3,0,4,5),
+(0,4,0,4,0,3,0,3,0,3,4,4,4,3,3,2,4,3,4,3,4,3,5,3,4,3,2,1,4,2,4,4,3,1,3,4,2,4,5,5,3,4,5,4,1,5,4,3,0,3,2,2,3,2,1,3,1,0,3,3,3,5,3,3,3,5,4,4,2,3,3,4,3,3,3,2,1,0,3,2,1,4,3),
+(0,4,0,5,0,4,0,3,0,3,5,5,3,2,4,3,4,0,5,4,4,1,4,4,4,3,3,3,4,3,5,5,2,3,3,4,1,2,5,5,3,5,5,2,3,5,5,4,0,3,2,0,3,3,1,1,5,1,4,1,0,4,3,2,3,5,0,4,0,3,0,5,4,3,4,3,0,0,4,1,0,4,4),
+(1,3,0,4,0,2,0,2,0,2,5,5,3,3,3,3,3,0,4,2,3,4,4,4,3,4,0,0,3,4,5,4,3,3,3,3,2,5,5,4,5,5,5,4,3,5,5,5,1,3,1,0,1,0,0,3,2,0,4,2,0,5,2,3,2,4,1,3,0,3,0,4,5,4,5,4,3,0,4,2,0,5,4),
+(0,3,0,4,0,5,0,3,0,3,4,4,3,2,3,2,3,3,3,3,3,2,4,3,3,2,2,0,3,3,3,3,3,1,3,3,3,0,4,4,3,4,4,1,1,4,4,2,0,3,1,0,1,1,0,4,1,0,2,3,1,3,3,1,3,4,0,3,0,1,0,3,1,3,0,0,1,0,2,0,0,4,4),
+(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0),
+(0,3,0,3,0,2,0,3,0,1,5,4,3,3,3,1,4,2,1,2,3,4,4,2,4,4,5,0,3,1,4,3,4,0,4,3,3,3,2,3,2,5,3,4,3,2,2,3,0,0,3,0,2,1,0,1,2,0,0,0,0,2,1,1,3,1,0,2,0,4,0,3,4,4,4,5,2,0,2,0,0,1,3),
+(0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,1,1,0,0,1,1,0,0,0,4,2,1,1,0,1,0,3,2,0,0,3,1,1,1,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,1,0,0,0,2,0,0,0,1,4,0,4,2,1,0,0,0,0,0,1),
+(0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,1,0,0,0,0,0,0,1,0,1,0,0,0,0,3,1,0,0,0,2,0,2,1,0,0,1,2,1,0,1,1,0,0,3,0,0,0,0,0,0,0,0,0,0,0,1,3,1,0,0,0,0,0,1,0,0,2,1,0,0,0,0,0,0,0,0,2),
+(0,4,0,4,0,4,0,3,0,4,4,3,4,2,4,3,2,0,4,4,4,3,5,3,5,3,3,2,4,2,4,3,4,3,1,4,0,2,3,4,4,4,3,3,3,4,4,4,3,4,1,3,4,3,2,1,2,1,3,3,3,4,4,3,3,5,0,4,0,3,0,4,3,3,3,2,1,0,3,0,0,3,3),
+(0,4,0,3,0,3,0,3,0,3,5,5,3,3,3,3,4,3,4,3,3,3,4,4,4,3,3,3,3,4,3,5,3,3,1,3,2,4,5,5,5,5,4,3,4,5,5,3,2,2,3,3,3,3,2,3,3,1,2,3,2,4,3,3,3,4,0,4,0,2,0,4,3,2,2,1,2,0,3,0,0,4,1),
+)
+
+class JapaneseContextAnalysis:
+ def __init__(self):
+ self.reset()
+
+ def reset(self):
+ self._mTotalRel = 0 # total sequence received
+ # category counters, each interger counts sequence in its category
+ self._mRelSample = [0] * NUM_OF_CATEGORY
+ # if last byte in current buffer is not the last byte of a character,
+ # we need to know how many bytes to skip in next buffer
+ self._mNeedToSkipCharNum = 0
+ self._mLastCharOrder = -1 # The order of previous char
+ # If this flag is set to True, detection is done and conclusion has
+ # been made
+ self._mDone = False
+
+ def feed(self, aBuf, aLen):
+ if self._mDone:
+ return
+
+ # The buffer we got is byte oriented, and a character may span in more than one
+ # buffers. In case the last one or two byte in last buffer is not
+ # complete, we record how many byte needed to complete that character
+ # and skip these bytes here. We can choose to record those bytes as
+ # well and analyse the character once it is complete, but since a
+ # character will not make much difference, by simply skipping
+ # this character will simply our logic and improve performance.
+ i = self._mNeedToSkipCharNum
+ while i < aLen:
+ order, charLen = self.get_order(aBuf[i:i + 2])
+ i += charLen
+ if i > aLen:
+ self._mNeedToSkipCharNum = i - aLen
+ self._mLastCharOrder = -1
+ else:
+ if (order != -1) and (self._mLastCharOrder != -1):
+ self._mTotalRel += 1
+ if self._mTotalRel > MAX_REL_THRESHOLD:
+ self._mDone = True
+ break
+ self._mRelSample[jp2CharContext[self._mLastCharOrder][order]] += 1
+ self._mLastCharOrder = order
+
+ def got_enough_data(self):
+ return self._mTotalRel > ENOUGH_REL_THRESHOLD
+
+ def get_confidence(self):
+ # This is just one way to calculate confidence. It works well for me.
+ if self._mTotalRel > MINIMUM_DATA_THRESHOLD:
+ return (self._mTotalRel - self._mRelSample[0]) / self._mTotalRel
+ else:
+ return DONT_KNOW
+
+ def get_order(self, aBuf):
+ return -1, 1
+
+class SJISContextAnalysis(JapaneseContextAnalysis):
+ def get_order(self, aBuf):
+ if not aBuf:
+ return -1, 1
+ # find out current char's byte length
+ first_char = wrap_ord(aBuf[0])
+ if ((0x81 <= first_char <= 0x9F) or (0xE0 <= first_char <= 0xFC)):
+ charLen = 2
+ else:
+ charLen = 1
+
+ # return its order if it is hiragana
+ if len(aBuf) > 1:
+ second_char = wrap_ord(aBuf[1])
+ if (first_char == 202) and (0x9F <= second_char <= 0xF1):
+ return second_char - 0x9F, charLen
+
+ return -1, charLen
+
+class EUCJPContextAnalysis(JapaneseContextAnalysis):
+ def get_order(self, aBuf):
+ if not aBuf:
+ return -1, 1
+ # find out current char's byte length
+ first_char = wrap_ord(aBuf[0])
+ if (first_char == 0x8E) or (0xA1 <= first_char <= 0xFE):
+ charLen = 2
+ elif first_char == 0x8F:
+ charLen = 3
+ else:
+ charLen = 1
+
+ # return its order if it is hiragana
+ if len(aBuf) > 1:
+ second_char = wrap_ord(aBuf[1])
+ if (first_char == 0xA4) and (0xA1 <= second_char <= 0xF3):
+ return second_char - 0xA1, charLen
+
+ return -1, charLen
+
+# flake8: noqa
-######################## BEGIN LICENSE BLOCK ########################\r
-# The Original Code is Mozilla Communicator client code.\r
-#\r
-# The Initial Developer of the Original Code is\r
-# Netscape Communications Corporation.\r
-# Portions created by the Initial Developer are Copyright (C) 1998\r
-# the Initial Developer. All Rights Reserved.\r
-#\r
-# Contributor(s):\r
-# Mark Pilgrim - port to Python\r
-#\r
-# This library is free software; you can redistribute it and/or\r
-# modify it under the terms of the GNU Lesser General Public\r
-# License as published by the Free Software Foundation; either\r
-# version 2.1 of the License, or (at your option) any later version.\r
-#\r
-# This library is distributed in the hope that it will be useful,\r
-# but WITHOUT ANY WARRANTY; without even the implied warranty of\r
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU\r
-# Lesser General Public License for more details.\r
-#\r
-# You should have received a copy of the GNU Lesser General Public\r
-# License along with this library; if not, write to the Free Software\r
-# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA\r
-# 02110-1301 USA\r
-######################### END LICENSE BLOCK #########################\r
-\r
-# 255: Control characters that usually does not exist in any text\r
-# 254: Carriage/Return\r
-# 253: symbol (punctuation) that does not belong to word\r
-# 252: 0 - 9\r
-\r
-# Character Mapping Table:\r
-# this table is modified base on win1251BulgarianCharToOrderMap, so\r
-# only number <64 is sure valid\r
-\r
-Latin5_BulgarianCharToOrderMap = (\r
-255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00\r
-255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10\r
-253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20\r
-252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30\r
-253, 77, 90, 99,100, 72,109,107,101, 79,185, 81,102, 76, 94, 82, # 40\r
-110,186,108, 91, 74,119, 84, 96,111,187,115,253,253,253,253,253, # 50\r
-253, 65, 69, 70, 66, 63, 68,112,103, 92,194,104, 95, 86, 87, 71, # 60\r
-116,195, 85, 93, 97,113,196,197,198,199,200,253,253,253,253,253, # 70\r
-194,195,196,197,198,199,200,201,202,203,204,205,206,207,208,209, # 80\r
-210,211,212,213,214,215,216,217,218,219,220,221,222,223,224,225, # 90\r
- 81,226,227,228,229,230,105,231,232,233,234,235,236, 45,237,238, # a0\r
- 31, 32, 35, 43, 37, 44, 55, 47, 40, 59, 33, 46, 38, 36, 41, 30, # b0\r
- 39, 28, 34, 51, 48, 49, 53, 50, 54, 57, 61,239, 67,240, 60, 56, # c0\r
- 1, 18, 9, 20, 11, 3, 23, 15, 2, 26, 12, 10, 14, 6, 4, 13, # d0\r
- 7, 8, 5, 19, 29, 25, 22, 21, 27, 24, 17, 75, 52,241, 42, 16, # e0\r
- 62,242,243,244, 58,245, 98,246,247,248,249,250,251, 91,252,253, # f0\r
-)\r
-\r
-win1251BulgarianCharToOrderMap = (\r
-255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00\r
-255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10\r
-253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20\r
-252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30\r
-253, 77, 90, 99,100, 72,109,107,101, 79,185, 81,102, 76, 94, 82, # 40\r
-110,186,108, 91, 74,119, 84, 96,111,187,115,253,253,253,253,253, # 50\r
-253, 65, 69, 70, 66, 63, 68,112,103, 92,194,104, 95, 86, 87, 71, # 60\r
-116,195, 85, 93, 97,113,196,197,198,199,200,253,253,253,253,253, # 70\r
-206,207,208,209,210,211,212,213,120,214,215,216,217,218,219,220, # 80\r
-221, 78, 64, 83,121, 98,117,105,222,223,224,225,226,227,228,229, # 90\r
- 88,230,231,232,233,122, 89,106,234,235,236,237,238, 45,239,240, # a0\r
- 73, 80,118,114,241,242,243,244,245, 62, 58,246,247,248,249,250, # b0\r
- 31, 32, 35, 43, 37, 44, 55, 47, 40, 59, 33, 46, 38, 36, 41, 30, # c0\r
- 39, 28, 34, 51, 48, 49, 53, 50, 54, 57, 61,251, 67,252, 60, 56, # d0\r
- 1, 18, 9, 20, 11, 3, 23, 15, 2, 26, 12, 10, 14, 6, 4, 13, # e0\r
- 7, 8, 5, 19, 29, 25, 22, 21, 27, 24, 17, 75, 52,253, 42, 16, # f0\r
-)\r
-\r
-# Model Table:\r
-# total sequences: 100%\r
-# first 512 sequences: 96.9392%\r
-# first 1024 sequences:3.0618%\r
-# rest sequences: 0.2992%\r
-# negative sequences: 0.0020%\r
-BulgarianLangModel = (\r
-0,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,2,3,3,3,3,3,\r
-3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,3,3,2,2,3,2,2,1,2,2,\r
-3,1,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,0,3,3,3,3,3,3,3,3,3,3,0,3,0,1,\r
-0,0,0,0,0,0,0,0,0,0,1,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,\r
-3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,2,3,3,3,3,3,3,3,3,0,3,1,0,\r
-0,1,0,0,0,0,0,0,0,0,1,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,\r
-3,2,2,2,3,3,3,3,3,3,3,3,3,3,3,3,3,1,3,2,3,3,3,3,3,3,3,3,0,3,0,0,\r
-0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-3,2,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,1,3,2,3,3,3,3,3,3,3,3,0,3,0,0,\r
-0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-3,3,3,3,3,3,3,3,3,3,3,2,3,2,2,1,3,3,3,3,2,2,2,1,1,2,0,1,0,1,0,0,\r
-0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,1,\r
-3,3,3,3,3,3,3,2,3,2,2,3,3,1,1,2,3,3,2,3,3,3,3,2,1,2,0,2,0,3,0,0,\r
-0,0,0,0,0,0,0,1,0,0,2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,1,\r
-3,3,3,3,3,3,3,1,3,3,3,3,3,2,3,2,3,3,3,3,3,2,3,3,1,3,0,3,0,2,0,0,\r
-0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,\r
-3,3,3,3,3,3,3,3,1,3,3,2,3,3,3,1,3,3,2,3,2,2,2,0,0,2,0,2,0,2,0,0,\r
-0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,1,\r
-3,3,3,3,3,3,3,3,3,0,3,3,3,2,2,3,3,3,1,2,2,3,2,1,1,2,0,2,0,0,0,0,\r
-1,0,0,0,0,0,0,0,0,0,2,0,0,1,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,\r
-3,3,3,3,3,3,3,2,3,3,1,2,3,2,2,2,3,3,3,3,3,2,2,3,1,2,0,2,1,2,0,0,\r
-0,0,0,0,0,0,0,0,0,0,3,0,0,1,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,1,\r
-3,3,3,3,3,1,3,3,3,3,3,2,3,3,3,2,3,3,2,3,2,2,2,3,1,2,0,1,0,1,0,0,\r
-0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,\r
-3,3,3,3,3,3,3,3,3,3,3,1,1,1,2,2,1,3,1,3,2,2,3,0,0,1,0,1,0,1,0,0,\r
-0,0,0,1,0,0,0,0,1,0,2,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,\r
-3,3,3,3,3,2,2,3,2,2,3,1,2,1,1,1,2,3,1,3,1,2,2,0,1,1,1,1,0,1,0,0,\r
-0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,\r
-3,3,3,3,3,1,3,2,2,3,3,1,2,3,1,1,3,3,3,3,1,2,2,1,1,1,0,2,0,2,0,1,\r
-0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,\r
-3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,1,2,2,3,3,3,2,2,1,1,2,0,2,0,1,0,0,\r
-0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,\r
-3,0,1,2,1,3,3,2,3,3,3,3,3,2,3,2,1,0,3,1,2,1,2,1,2,3,2,1,0,1,0,0,\r
-0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-1,1,1,2,3,3,3,3,3,3,3,3,3,3,3,3,0,0,3,1,3,3,2,3,3,2,2,2,0,1,0,0,\r
-0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-2,3,3,3,3,0,3,3,3,3,3,2,1,1,2,1,3,3,0,3,1,1,1,1,3,2,0,1,0,0,0,0,\r
-0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,\r
-3,3,2,2,2,3,3,3,3,3,3,3,3,3,3,3,1,1,3,1,3,3,2,3,2,2,2,3,0,2,0,0,\r
-0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-3,3,3,3,3,2,3,3,2,2,3,2,1,1,1,1,1,3,1,3,1,1,0,0,0,1,0,0,0,1,0,0,\r
-0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,\r
-3,3,3,3,3,2,3,2,0,3,2,0,3,0,2,0,0,2,1,3,1,0,0,1,0,0,0,1,0,0,0,0,\r
-0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,\r
-3,3,3,3,2,1,1,1,1,2,1,1,2,1,1,1,2,2,1,2,1,1,1,0,1,1,0,1,0,1,0,0,\r
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,\r
-3,3,3,3,2,1,3,1,1,2,1,3,2,1,1,0,1,2,3,2,1,1,1,0,0,0,0,0,0,0,0,0,\r
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-2,3,3,3,3,2,2,1,0,1,0,0,1,0,0,0,2,1,0,3,0,0,1,0,0,0,0,0,0,0,0,0,\r
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,\r
-3,3,3,2,3,2,3,3,1,3,2,1,1,1,2,1,1,2,1,3,0,1,0,0,0,1,0,0,0,0,0,0,\r
-0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-3,1,1,2,2,3,3,2,3,2,2,2,3,1,2,2,1,1,2,1,1,2,2,0,1,1,0,1,0,2,0,0,\r
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-3,3,3,3,2,1,3,1,0,2,2,1,3,2,1,0,0,2,0,2,0,1,0,0,0,0,0,0,0,1,0,0,\r
-0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,\r
-3,3,3,3,3,3,1,2,0,2,3,1,2,3,2,0,1,3,1,2,1,1,1,0,0,1,0,0,2,2,2,3,\r
-2,2,2,2,1,2,1,1,2,2,1,1,2,0,1,1,1,0,0,1,1,0,0,1,1,0,0,0,1,1,0,1,\r
-3,3,3,3,3,2,1,2,2,1,2,0,2,0,1,0,1,2,1,2,1,1,0,0,0,1,0,1,0,0,0,0,\r
-0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,1,\r
-3,3,2,3,3,1,1,3,1,0,3,2,1,0,0,0,1,2,0,2,0,1,0,0,0,1,0,1,2,1,2,2,\r
-1,1,1,1,1,1,1,2,2,2,1,1,1,1,1,1,1,0,1,2,1,1,1,0,0,0,0,0,1,1,0,0,\r
-3,1,0,1,0,2,3,2,2,2,3,2,2,2,2,2,1,0,2,1,2,1,1,1,0,1,2,1,2,2,2,1,\r
-1,1,2,2,2,2,1,2,1,1,0,1,2,1,2,2,2,1,1,1,0,1,1,1,1,2,0,1,0,0,0,0,\r
-2,3,2,3,3,0,0,2,1,0,2,1,0,0,0,0,2,3,0,2,0,0,0,0,0,1,0,0,2,0,1,2,\r
-2,1,2,1,2,2,1,1,1,2,1,1,1,0,1,2,2,1,1,1,1,1,0,1,1,1,0,0,1,2,0,0,\r
-3,3,2,2,3,0,2,3,1,1,2,0,0,0,1,0,0,2,0,2,0,0,0,1,0,1,0,1,2,0,2,2,\r
-1,1,1,1,2,1,0,1,2,2,2,1,1,1,1,1,1,1,0,1,1,1,0,0,0,0,0,0,1,1,0,0,\r
-2,3,2,3,3,0,0,3,0,1,1,0,1,0,0,0,2,2,1,2,0,0,0,0,0,0,0,0,2,0,1,2,\r
-2,2,1,1,1,1,1,2,2,2,1,0,2,0,1,0,1,0,0,1,0,1,0,0,1,0,0,0,0,1,0,0,\r
-3,3,3,3,2,2,2,2,2,0,2,1,1,1,1,2,1,2,1,1,0,2,0,1,0,1,0,0,2,0,1,2,\r
-1,1,1,1,1,1,1,2,2,1,1,0,2,0,1,0,2,0,0,1,1,1,0,0,2,0,0,0,1,1,0,0,\r
-2,3,3,3,3,1,0,0,0,0,0,0,0,0,0,0,2,0,0,1,1,0,0,0,0,0,0,1,2,0,1,2,\r
-2,2,2,1,1,2,1,1,2,2,2,1,2,0,1,1,1,1,1,1,0,1,1,1,1,0,0,1,1,1,0,0,\r
-2,3,3,3,3,0,2,2,0,2,1,0,0,0,1,1,1,2,0,2,0,0,0,3,0,0,0,0,2,0,2,2,\r
-1,1,1,2,1,2,1,1,2,2,2,1,2,0,1,1,1,0,1,1,1,1,0,2,1,0,0,0,1,1,0,0,\r
-2,3,3,3,3,0,2,1,0,0,2,0,0,0,0,0,1,2,0,2,0,0,0,0,0,0,0,0,2,0,1,2,\r
-1,1,1,2,1,1,1,1,2,2,2,0,1,0,1,1,1,0,0,1,1,1,0,0,1,0,0,0,0,1,0,0,\r
-3,3,2,2,3,0,1,0,1,0,0,0,0,0,0,0,1,1,0,3,0,0,0,0,0,0,0,0,1,0,2,2,\r
-1,1,1,1,1,2,1,1,2,2,1,2,2,1,0,1,1,1,1,1,0,1,0,0,1,0,0,0,1,1,0,0,\r
-3,1,0,1,0,2,2,2,2,3,2,1,1,1,2,3,0,0,1,0,2,1,1,0,1,1,1,1,2,1,1,1,\r
-1,2,2,1,2,1,2,2,1,1,0,1,2,1,2,2,1,1,1,0,0,1,1,1,2,1,0,1,0,0,0,0,\r
-2,1,0,1,0,3,1,2,2,2,2,1,2,2,1,1,1,0,2,1,2,2,1,1,2,1,1,0,2,1,1,1,\r
-1,2,2,2,2,2,2,2,1,2,0,1,1,0,2,1,1,1,1,1,0,0,1,1,1,1,0,1,0,0,0,0,\r
-2,1,1,1,1,2,2,2,2,1,2,2,2,1,2,2,1,1,2,1,2,3,2,2,1,1,1,1,0,1,0,0,\r
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-2,2,2,3,2,0,1,2,0,1,2,1,1,0,1,0,1,2,1,2,0,0,0,1,1,0,0,0,1,0,0,2,\r
-1,1,0,0,1,1,0,1,1,1,1,0,2,0,1,1,1,0,0,1,1,0,0,0,0,1,0,0,0,1,0,0,\r
-2,0,0,0,0,1,2,2,2,2,2,2,2,1,2,1,1,1,1,1,1,1,0,1,1,1,1,1,2,1,1,1,\r
-1,2,2,2,2,1,1,2,1,2,1,1,1,0,2,1,2,1,1,1,0,2,1,1,1,1,0,1,0,0,0,0,\r
-3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,\r
-1,1,0,1,0,1,1,1,1,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-2,2,2,3,2,0,0,0,0,1,0,0,0,0,0,0,1,1,0,2,0,0,0,0,0,0,0,0,1,0,1,2,\r
-1,1,1,1,1,1,0,0,2,2,2,2,2,0,1,1,0,1,1,1,1,1,0,0,1,0,0,0,1,1,0,1,\r
-2,3,1,2,1,0,1,1,0,2,2,2,0,0,1,0,0,1,1,1,1,0,0,0,0,0,0,0,1,0,1,2,\r
-1,1,1,1,2,1,1,1,1,1,1,1,1,0,1,1,0,1,0,1,0,1,0,0,1,0,0,0,0,1,0,0,\r
-2,2,2,2,2,0,0,2,0,0,2,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,2,0,2,2,\r
-1,1,1,1,1,0,0,1,2,1,1,0,1,0,1,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,\r
-1,2,2,2,2,0,0,2,0,1,1,0,0,0,1,0,0,2,0,2,0,0,0,0,0,0,0,0,0,0,1,1,\r
-0,0,0,1,1,1,1,1,1,1,1,1,1,0,1,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,\r
-1,2,2,3,2,0,0,1,0,0,1,0,0,0,0,0,0,1,0,2,0,0,0,1,0,0,0,0,0,0,0,2,\r
-1,1,0,0,1,0,0,0,1,1,0,0,1,0,1,1,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,\r
-2,1,2,2,2,1,2,1,2,2,1,1,2,1,1,1,0,1,1,1,1,2,0,1,0,1,1,1,1,0,1,1,\r
-1,1,2,1,1,1,1,1,1,0,0,1,2,1,1,1,1,1,1,0,0,1,1,1,0,0,0,0,0,0,0,0,\r
-1,0,0,1,3,1,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,\r
-0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-2,2,2,2,1,0,0,1,0,2,0,0,0,0,0,1,1,1,0,1,0,0,0,0,0,0,0,0,2,0,0,1,\r
-0,2,0,1,0,0,1,1,2,0,1,0,1,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,\r
-1,2,2,2,2,0,1,1,0,2,1,0,1,1,1,0,0,1,0,2,0,1,0,0,0,0,0,0,0,0,0,1,\r
-0,1,0,0,1,0,0,0,1,1,0,0,1,0,0,1,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,\r
-2,2,2,2,2,0,0,1,0,0,0,1,0,1,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,1,\r
-0,1,0,1,1,1,0,0,1,1,1,0,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,\r
-2,0,1,0,0,1,2,1,1,1,1,1,1,2,2,1,0,0,1,0,1,0,0,0,0,1,1,1,1,0,0,0,\r
-1,1,2,1,1,1,1,0,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-2,2,1,2,1,0,0,1,0,0,0,0,0,0,0,0,1,1,0,1,0,0,0,0,0,0,0,0,0,0,0,1,\r
-0,0,0,0,0,0,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-1,0,0,1,2,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,\r
-0,1,1,0,1,1,1,0,0,1,0,0,1,0,1,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,\r
-1,0,1,0,0,1,1,1,1,1,1,1,1,1,1,1,0,0,1,0,2,0,0,2,0,1,0,0,1,0,0,1,\r
-1,1,0,0,1,1,0,1,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,\r
-0,0,0,0,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,\r
-1,1,1,1,1,1,1,2,0,0,0,0,0,0,2,1,0,1,1,0,0,1,1,1,0,1,0,0,0,0,0,0,\r
-2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-1,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,0,1,1,0,1,1,1,1,1,0,1,0,0,\r
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,\r
-)\r
-\r
-Latin5BulgarianModel = {\r
- 'charToOrderMap': Latin5_BulgarianCharToOrderMap,\r
- 'precedenceMatrix': BulgarianLangModel,\r
- 'mTypicalPositiveRatio': 0.969392,\r
- 'keepEnglishLetter': False,\r
- 'charsetName': "ISO-8859-5"\r
-}\r
-\r
-Win1251BulgarianModel = {\r
- 'charToOrderMap': win1251BulgarianCharToOrderMap,\r
- 'precedenceMatrix': BulgarianLangModel,\r
- 'mTypicalPositiveRatio': 0.969392,\r
- 'keepEnglishLetter': False,\r
- 'charsetName': "windows-1251"\r
-}\r
-\r
-\r
-# flake8: noqa\r
+######################## BEGIN LICENSE BLOCK ########################
+# The Original Code is Mozilla Communicator client code.
+#
+# The Initial Developer of the Original Code is
+# Netscape Communications Corporation.
+# Portions created by the Initial Developer are Copyright (C) 1998
+# the Initial Developer. All Rights Reserved.
+#
+# Contributor(s):
+# Mark Pilgrim - port to Python
+#
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
+# 02110-1301 USA
+######################### END LICENSE BLOCK #########################
+
+# 255: Control characters that usually does not exist in any text
+# 254: Carriage/Return
+# 253: symbol (punctuation) that does not belong to word
+# 252: 0 - 9
+
+# Character Mapping Table:
+# this table is modified base on win1251BulgarianCharToOrderMap, so
+# only number <64 is sure valid
+
+Latin5_BulgarianCharToOrderMap = (
+255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00
+255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10
+253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20
+252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30
+253, 77, 90, 99,100, 72,109,107,101, 79,185, 81,102, 76, 94, 82, # 40
+110,186,108, 91, 74,119, 84, 96,111,187,115,253,253,253,253,253, # 50
+253, 65, 69, 70, 66, 63, 68,112,103, 92,194,104, 95, 86, 87, 71, # 60
+116,195, 85, 93, 97,113,196,197,198,199,200,253,253,253,253,253, # 70
+194,195,196,197,198,199,200,201,202,203,204,205,206,207,208,209, # 80
+210,211,212,213,214,215,216,217,218,219,220,221,222,223,224,225, # 90
+ 81,226,227,228,229,230,105,231,232,233,234,235,236, 45,237,238, # a0
+ 31, 32, 35, 43, 37, 44, 55, 47, 40, 59, 33, 46, 38, 36, 41, 30, # b0
+ 39, 28, 34, 51, 48, 49, 53, 50, 54, 57, 61,239, 67,240, 60, 56, # c0
+ 1, 18, 9, 20, 11, 3, 23, 15, 2, 26, 12, 10, 14, 6, 4, 13, # d0
+ 7, 8, 5, 19, 29, 25, 22, 21, 27, 24, 17, 75, 52,241, 42, 16, # e0
+ 62,242,243,244, 58,245, 98,246,247,248,249,250,251, 91,252,253, # f0
+)
+
+win1251BulgarianCharToOrderMap = (
+255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00
+255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10
+253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20
+252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30
+253, 77, 90, 99,100, 72,109,107,101, 79,185, 81,102, 76, 94, 82, # 40
+110,186,108, 91, 74,119, 84, 96,111,187,115,253,253,253,253,253, # 50
+253, 65, 69, 70, 66, 63, 68,112,103, 92,194,104, 95, 86, 87, 71, # 60
+116,195, 85, 93, 97,113,196,197,198,199,200,253,253,253,253,253, # 70
+206,207,208,209,210,211,212,213,120,214,215,216,217,218,219,220, # 80
+221, 78, 64, 83,121, 98,117,105,222,223,224,225,226,227,228,229, # 90
+ 88,230,231,232,233,122, 89,106,234,235,236,237,238, 45,239,240, # a0
+ 73, 80,118,114,241,242,243,244,245, 62, 58,246,247,248,249,250, # b0
+ 31, 32, 35, 43, 37, 44, 55, 47, 40, 59, 33, 46, 38, 36, 41, 30, # c0
+ 39, 28, 34, 51, 48, 49, 53, 50, 54, 57, 61,251, 67,252, 60, 56, # d0
+ 1, 18, 9, 20, 11, 3, 23, 15, 2, 26, 12, 10, 14, 6, 4, 13, # e0
+ 7, 8, 5, 19, 29, 25, 22, 21, 27, 24, 17, 75, 52,253, 42, 16, # f0
+)
+
+# Model Table:
+# total sequences: 100%
+# first 512 sequences: 96.9392%
+# first 1024 sequences:3.0618%
+# rest sequences: 0.2992%
+# negative sequences: 0.0020%
+BulgarianLangModel = (
+0,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,2,3,3,3,3,3,
+3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,3,3,2,2,3,2,2,1,2,2,
+3,1,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,0,3,3,3,3,3,3,3,3,3,3,0,3,0,1,
+0,0,0,0,0,0,0,0,0,0,1,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,
+3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,2,3,3,3,3,3,3,3,3,0,3,1,0,
+0,1,0,0,0,0,0,0,0,0,1,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,
+3,2,2,2,3,3,3,3,3,3,3,3,3,3,3,3,3,1,3,2,3,3,3,3,3,3,3,3,0,3,0,0,
+0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+3,2,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,1,3,2,3,3,3,3,3,3,3,3,0,3,0,0,
+0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+3,3,3,3,3,3,3,3,3,3,3,2,3,2,2,1,3,3,3,3,2,2,2,1,1,2,0,1,0,1,0,0,
+0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,1,
+3,3,3,3,3,3,3,2,3,2,2,3,3,1,1,2,3,3,2,3,3,3,3,2,1,2,0,2,0,3,0,0,
+0,0,0,0,0,0,0,1,0,0,2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,1,
+3,3,3,3,3,3,3,1,3,3,3,3,3,2,3,2,3,3,3,3,3,2,3,3,1,3,0,3,0,2,0,0,
+0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,
+3,3,3,3,3,3,3,3,1,3,3,2,3,3,3,1,3,3,2,3,2,2,2,0,0,2,0,2,0,2,0,0,
+0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,1,
+3,3,3,3,3,3,3,3,3,0,3,3,3,2,2,3,3,3,1,2,2,3,2,1,1,2,0,2,0,0,0,0,
+1,0,0,0,0,0,0,0,0,0,2,0,0,1,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,
+3,3,3,3,3,3,3,2,3,3,1,2,3,2,2,2,3,3,3,3,3,2,2,3,1,2,0,2,1,2,0,0,
+0,0,0,0,0,0,0,0,0,0,3,0,0,1,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,1,
+3,3,3,3,3,1,3,3,3,3,3,2,3,3,3,2,3,3,2,3,2,2,2,3,1,2,0,1,0,1,0,0,
+0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,
+3,3,3,3,3,3,3,3,3,3,3,1,1,1,2,2,1,3,1,3,2,2,3,0,0,1,0,1,0,1,0,0,
+0,0,0,1,0,0,0,0,1,0,2,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,
+3,3,3,3,3,2,2,3,2,2,3,1,2,1,1,1,2,3,1,3,1,2,2,0,1,1,1,1,0,1,0,0,
+0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,
+3,3,3,3,3,1,3,2,2,3,3,1,2,3,1,1,3,3,3,3,1,2,2,1,1,1,0,2,0,2,0,1,
+0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,
+3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,1,2,2,3,3,3,2,2,1,1,2,0,2,0,1,0,0,
+0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,
+3,0,1,2,1,3,3,2,3,3,3,3,3,2,3,2,1,0,3,1,2,1,2,1,2,3,2,1,0,1,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+1,1,1,2,3,3,3,3,3,3,3,3,3,3,3,3,0,0,3,1,3,3,2,3,3,2,2,2,0,1,0,0,
+0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+2,3,3,3,3,0,3,3,3,3,3,2,1,1,2,1,3,3,0,3,1,1,1,1,3,2,0,1,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,
+3,3,2,2,2,3,3,3,3,3,3,3,3,3,3,3,1,1,3,1,3,3,2,3,2,2,2,3,0,2,0,0,
+0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+3,3,3,3,3,2,3,3,2,2,3,2,1,1,1,1,1,3,1,3,1,1,0,0,0,1,0,0,0,1,0,0,
+0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,
+3,3,3,3,3,2,3,2,0,3,2,0,3,0,2,0,0,2,1,3,1,0,0,1,0,0,0,1,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,
+3,3,3,3,2,1,1,1,1,2,1,1,2,1,1,1,2,2,1,2,1,1,1,0,1,1,0,1,0,1,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,
+3,3,3,3,2,1,3,1,1,2,1,3,2,1,1,0,1,2,3,2,1,1,1,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+2,3,3,3,3,2,2,1,0,1,0,0,1,0,0,0,2,1,0,3,0,0,1,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,
+3,3,3,2,3,2,3,3,1,3,2,1,1,1,2,1,1,2,1,3,0,1,0,0,0,1,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+3,1,1,2,2,3,3,2,3,2,2,2,3,1,2,2,1,1,2,1,1,2,2,0,1,1,0,1,0,2,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+3,3,3,3,2,1,3,1,0,2,2,1,3,2,1,0,0,2,0,2,0,1,0,0,0,0,0,0,0,1,0,0,
+0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,
+3,3,3,3,3,3,1,2,0,2,3,1,2,3,2,0,1,3,1,2,1,1,1,0,0,1,0,0,2,2,2,3,
+2,2,2,2,1,2,1,1,2,2,1,1,2,0,1,1,1,0,0,1,1,0,0,1,1,0,0,0,1,1,0,1,
+3,3,3,3,3,2,1,2,2,1,2,0,2,0,1,0,1,2,1,2,1,1,0,0,0,1,0,1,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,1,
+3,3,2,3,3,1,1,3,1,0,3,2,1,0,0,0,1,2,0,2,0,1,0,0,0,1,0,1,2,1,2,2,
+1,1,1,1,1,1,1,2,2,2,1,1,1,1,1,1,1,0,1,2,1,1,1,0,0,0,0,0,1,1,0,0,
+3,1,0,1,0,2,3,2,2,2,3,2,2,2,2,2,1,0,2,1,2,1,1,1,0,1,2,1,2,2,2,1,
+1,1,2,2,2,2,1,2,1,1,0,1,2,1,2,2,2,1,1,1,0,1,1,1,1,2,0,1,0,0,0,0,
+2,3,2,3,3,0,0,2,1,0,2,1,0,0,0,0,2,3,0,2,0,0,0,0,0,1,0,0,2,0,1,2,
+2,1,2,1,2,2,1,1,1,2,1,1,1,0,1,2,2,1,1,1,1,1,0,1,1,1,0,0,1,2,0,0,
+3,3,2,2,3,0,2,3,1,1,2,0,0,0,1,0,0,2,0,2,0,0,0,1,0,1,0,1,2,0,2,2,
+1,1,1,1,2,1,0,1,2,2,2,1,1,1,1,1,1,1,0,1,1,1,0,0,0,0,0,0,1,1,0,0,
+2,3,2,3,3,0,0,3,0,1,1,0,1,0,0,0,2,2,1,2,0,0,0,0,0,0,0,0,2,0,1,2,
+2,2,1,1,1,1,1,2,2,2,1,0,2,0,1,0,1,0,0,1,0,1,0,0,1,0,0,0,0,1,0,0,
+3,3,3,3,2,2,2,2,2,0,2,1,1,1,1,2,1,2,1,1,0,2,0,1,0,1,0,0,2,0,1,2,
+1,1,1,1,1,1,1,2,2,1,1,0,2,0,1,0,2,0,0,1,1,1,0,0,2,0,0,0,1,1,0,0,
+2,3,3,3,3,1,0,0,0,0,0,0,0,0,0,0,2,0,0,1,1,0,0,0,0,0,0,1,2,0,1,2,
+2,2,2,1,1,2,1,1,2,2,2,1,2,0,1,1,1,1,1,1,0,1,1,1,1,0,0,1,1,1,0,0,
+2,3,3,3,3,0,2,2,0,2,1,0,0,0,1,1,1,2,0,2,0,0,0,3,0,0,0,0,2,0,2,2,
+1,1,1,2,1,2,1,1,2,2,2,1,2,0,1,1,1,0,1,1,1,1,0,2,1,0,0,0,1,1,0,0,
+2,3,3,3,3,0,2,1,0,0,2,0,0,0,0,0,1,2,0,2,0,0,0,0,0,0,0,0,2,0,1,2,
+1,1,1,2,1,1,1,1,2,2,2,0,1,0,1,1,1,0,0,1,1,1,0,0,1,0,0,0,0,1,0,0,
+3,3,2,2,3,0,1,0,1,0,0,0,0,0,0,0,1,1,0,3,0,0,0,0,0,0,0,0,1,0,2,2,
+1,1,1,1,1,2,1,1,2,2,1,2,2,1,0,1,1,1,1,1,0,1,0,0,1,0,0,0,1,1,0,0,
+3,1,0,1,0,2,2,2,2,3,2,1,1,1,2,3,0,0,1,0,2,1,1,0,1,1,1,1,2,1,1,1,
+1,2,2,1,2,1,2,2,1,1,0,1,2,1,2,2,1,1,1,0,0,1,1,1,2,1,0,1,0,0,0,0,
+2,1,0,1,0,3,1,2,2,2,2,1,2,2,1,1,1,0,2,1,2,2,1,1,2,1,1,0,2,1,1,1,
+1,2,2,2,2,2,2,2,1,2,0,1,1,0,2,1,1,1,1,1,0,0,1,1,1,1,0,1,0,0,0,0,
+2,1,1,1,1,2,2,2,2,1,2,2,2,1,2,2,1,1,2,1,2,3,2,2,1,1,1,1,0,1,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+2,2,2,3,2,0,1,2,0,1,2,1,1,0,1,0,1,2,1,2,0,0,0,1,1,0,0,0,1,0,0,2,
+1,1,0,0,1,1,0,1,1,1,1,0,2,0,1,1,1,0,0,1,1,0,0,0,0,1,0,0,0,1,0,0,
+2,0,0,0,0,1,2,2,2,2,2,2,2,1,2,1,1,1,1,1,1,1,0,1,1,1,1,1,2,1,1,1,
+1,2,2,2,2,1,1,2,1,2,1,1,1,0,2,1,2,1,1,1,0,2,1,1,1,1,0,1,0,0,0,0,
+3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,
+1,1,0,1,0,1,1,1,1,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+2,2,2,3,2,0,0,0,0,1,0,0,0,0,0,0,1,1,0,2,0,0,0,0,0,0,0,0,1,0,1,2,
+1,1,1,1,1,1,0,0,2,2,2,2,2,0,1,1,0,1,1,1,1,1,0,0,1,0,0,0,1,1,0,1,
+2,3,1,2,1,0,1,1,0,2,2,2,0,0,1,0,0,1,1,1,1,0,0,0,0,0,0,0,1,0,1,2,
+1,1,1,1,2,1,1,1,1,1,1,1,1,0,1,1,0,1,0,1,0,1,0,0,1,0,0,0,0,1,0,0,
+2,2,2,2,2,0,0,2,0,0,2,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,2,0,2,2,
+1,1,1,1,1,0,0,1,2,1,1,0,1,0,1,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,
+1,2,2,2,2,0,0,2,0,1,1,0,0,0,1,0,0,2,0,2,0,0,0,0,0,0,0,0,0,0,1,1,
+0,0,0,1,1,1,1,1,1,1,1,1,1,0,1,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,
+1,2,2,3,2,0,0,1,0,0,1,0,0,0,0,0,0,1,0,2,0,0,0,1,0,0,0,0,0,0,0,2,
+1,1,0,0,1,0,0,0,1,1,0,0,1,0,1,1,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,
+2,1,2,2,2,1,2,1,2,2,1,1,2,1,1,1,0,1,1,1,1,2,0,1,0,1,1,1,1,0,1,1,
+1,1,2,1,1,1,1,1,1,0,0,1,2,1,1,1,1,1,1,0,0,1,1,1,0,0,0,0,0,0,0,0,
+1,0,0,1,3,1,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+2,2,2,2,1,0,0,1,0,2,0,0,0,0,0,1,1,1,0,1,0,0,0,0,0,0,0,0,2,0,0,1,
+0,2,0,1,0,0,1,1,2,0,1,0,1,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,
+1,2,2,2,2,0,1,1,0,2,1,0,1,1,1,0,0,1,0,2,0,1,0,0,0,0,0,0,0,0,0,1,
+0,1,0,0,1,0,0,0,1,1,0,0,1,0,0,1,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,
+2,2,2,2,2,0,0,1,0,0,0,1,0,1,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,1,
+0,1,0,1,1,1,0,0,1,1,1,0,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,
+2,0,1,0,0,1,2,1,1,1,1,1,1,2,2,1,0,0,1,0,1,0,0,0,0,1,1,1,1,0,0,0,
+1,1,2,1,1,1,1,0,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+2,2,1,2,1,0,0,1,0,0,0,0,0,0,0,0,1,1,0,1,0,0,0,0,0,0,0,0,0,0,0,1,
+0,0,0,0,0,0,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+1,0,0,1,2,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,
+0,1,1,0,1,1,1,0,0,1,0,0,1,0,1,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,
+1,0,1,0,0,1,1,1,1,1,1,1,1,1,1,1,0,0,1,0,2,0,0,2,0,1,0,0,1,0,0,1,
+1,1,0,0,1,1,0,1,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,
+1,1,1,1,1,1,1,2,0,0,0,0,0,0,2,1,0,1,1,0,0,1,1,1,0,1,0,0,0,0,0,0,
+2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+1,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,0,1,1,0,1,1,1,1,1,0,1,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,
+)
+
+Latin5BulgarianModel = {
+ 'charToOrderMap': Latin5_BulgarianCharToOrderMap,
+ 'precedenceMatrix': BulgarianLangModel,
+ 'mTypicalPositiveRatio': 0.969392,
+ 'keepEnglishLetter': False,
+ 'charsetName': "ISO-8859-5"
+}
+
+Win1251BulgarianModel = {
+ 'charToOrderMap': win1251BulgarianCharToOrderMap,
+ 'precedenceMatrix': BulgarianLangModel,
+ 'mTypicalPositiveRatio': 0.969392,
+ 'keepEnglishLetter': False,
+ 'charsetName': "windows-1251"
+}
+
+
+# flake8: noqa
-######################## BEGIN LICENSE BLOCK ########################\r
-# The Original Code is Mozilla Communicator client code.\r
-#\r
-# The Initial Developer of the Original Code is\r
-# Netscape Communications Corporation.\r
-# Portions created by the Initial Developer are Copyright (C) 1998\r
-# the Initial Developer. All Rights Reserved.\r
-#\r
-# Contributor(s):\r
-# Mark Pilgrim - port to Python\r
-#\r
-# This library is free software; you can redistribute it and/or\r
-# modify it under the terms of the GNU Lesser General Public\r
-# License as published by the Free Software Foundation; either\r
-# version 2.1 of the License, or (at your option) any later version.\r
-#\r
-# This library is distributed in the hope that it will be useful,\r
-# but WITHOUT ANY WARRANTY; without even the implied warranty of\r
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU\r
-# Lesser General Public License for more details.\r
-#\r
-# You should have received a copy of the GNU Lesser General Public\r
-# License along with this library; if not, write to the Free Software\r
-# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA\r
-# 02110-1301 USA\r
-######################### END LICENSE BLOCK #########################\r
-\r
-# KOI8-R language model\r
-# Character Mapping Table:\r
-KOI8R_CharToOrderMap = (\r
-255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00\r
-255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10\r
-253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20\r
-252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30\r
-253,142,143,144,145,146,147,148,149,150,151,152, 74,153, 75,154, # 40\r
-155,156,157,158,159,160,161,162,163,164,165,253,253,253,253,253, # 50\r
-253, 71,172, 66,173, 65,174, 76,175, 64,176,177, 77, 72,178, 69, # 60\r
- 67,179, 78, 73,180,181, 79,182,183,184,185,253,253,253,253,253, # 70\r
-191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206, # 80\r
-207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222, # 90\r
-223,224,225, 68,226,227,228,229,230,231,232,233,234,235,236,237, # a0\r
-238,239,240,241,242,243,244,245,246,247,248,249,250,251,252,253, # b0\r
- 27, 3, 21, 28, 13, 2, 39, 19, 26, 4, 23, 11, 8, 12, 5, 1, # c0\r
- 15, 16, 9, 7, 6, 14, 24, 10, 17, 18, 20, 25, 30, 29, 22, 54, # d0\r
- 59, 37, 44, 58, 41, 48, 53, 46, 55, 42, 60, 36, 49, 38, 31, 34, # e0\r
- 35, 43, 45, 32, 40, 52, 56, 33, 61, 62, 51, 57, 47, 63, 50, 70, # f0\r
-)\r
-\r
-win1251_CharToOrderMap = (\r
-255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00\r
-255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10\r
-253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20\r
-252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30\r
-253,142,143,144,145,146,147,148,149,150,151,152, 74,153, 75,154, # 40\r
-155,156,157,158,159,160,161,162,163,164,165,253,253,253,253,253, # 50\r
-253, 71,172, 66,173, 65,174, 76,175, 64,176,177, 77, 72,178, 69, # 60\r
- 67,179, 78, 73,180,181, 79,182,183,184,185,253,253,253,253,253, # 70\r
-191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,\r
-207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,\r
-223,224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,\r
-239,240,241,242,243,244,245,246, 68,247,248,249,250,251,252,253,\r
- 37, 44, 33, 46, 41, 48, 56, 51, 42, 60, 36, 49, 38, 31, 34, 35,\r
- 45, 32, 40, 52, 53, 55, 58, 50, 57, 63, 70, 62, 61, 47, 59, 43,\r
- 3, 21, 10, 19, 13, 2, 24, 20, 4, 23, 11, 8, 12, 5, 1, 15,\r
- 9, 7, 6, 14, 39, 26, 28, 22, 25, 29, 54, 18, 17, 30, 27, 16,\r
-)\r
-\r
-latin5_CharToOrderMap = (\r
-255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00\r
-255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10\r
-253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20\r
-252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30\r
-253,142,143,144,145,146,147,148,149,150,151,152, 74,153, 75,154, # 40\r
-155,156,157,158,159,160,161,162,163,164,165,253,253,253,253,253, # 50\r
-253, 71,172, 66,173, 65,174, 76,175, 64,176,177, 77, 72,178, 69, # 60\r
- 67,179, 78, 73,180,181, 79,182,183,184,185,253,253,253,253,253, # 70\r
-191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,\r
-207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,\r
-223,224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,\r
- 37, 44, 33, 46, 41, 48, 56, 51, 42, 60, 36, 49, 38, 31, 34, 35,\r
- 45, 32, 40, 52, 53, 55, 58, 50, 57, 63, 70, 62, 61, 47, 59, 43,\r
- 3, 21, 10, 19, 13, 2, 24, 20, 4, 23, 11, 8, 12, 5, 1, 15,\r
- 9, 7, 6, 14, 39, 26, 28, 22, 25, 29, 54, 18, 17, 30, 27, 16,\r
-239, 68,240,241,242,243,244,245,246,247,248,249,250,251,252,255,\r
-)\r
-\r
-macCyrillic_CharToOrderMap = (\r
-255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00\r
-255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10\r
-253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20\r
-252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30\r
-253,142,143,144,145,146,147,148,149,150,151,152, 74,153, 75,154, # 40\r
-155,156,157,158,159,160,161,162,163,164,165,253,253,253,253,253, # 50\r
-253, 71,172, 66,173, 65,174, 76,175, 64,176,177, 77, 72,178, 69, # 60\r
- 67,179, 78, 73,180,181, 79,182,183,184,185,253,253,253,253,253, # 70\r
- 37, 44, 33, 46, 41, 48, 56, 51, 42, 60, 36, 49, 38, 31, 34, 35,\r
- 45, 32, 40, 52, 53, 55, 58, 50, 57, 63, 70, 62, 61, 47, 59, 43,\r
-191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,\r
-207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,\r
-223,224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,\r
-239,240,241,242,243,244,245,246,247,248,249,250,251,252, 68, 16,\r
- 3, 21, 10, 19, 13, 2, 24, 20, 4, 23, 11, 8, 12, 5, 1, 15,\r
- 9, 7, 6, 14, 39, 26, 28, 22, 25, 29, 54, 18, 17, 30, 27,255,\r
-)\r
-\r
-IBM855_CharToOrderMap = (\r
-255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00\r
-255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10\r
-253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20\r
-252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30\r
-253,142,143,144,145,146,147,148,149,150,151,152, 74,153, 75,154, # 40\r
-155,156,157,158,159,160,161,162,163,164,165,253,253,253,253,253, # 50\r
-253, 71,172, 66,173, 65,174, 76,175, 64,176,177, 77, 72,178, 69, # 60\r
- 67,179, 78, 73,180,181, 79,182,183,184,185,253,253,253,253,253, # 70\r
-191,192,193,194, 68,195,196,197,198,199,200,201,202,203,204,205,\r
-206,207,208,209,210,211,212,213,214,215,216,217, 27, 59, 54, 70,\r
- 3, 37, 21, 44, 28, 58, 13, 41, 2, 48, 39, 53, 19, 46,218,219,\r
-220,221,222,223,224, 26, 55, 4, 42,225,226,227,228, 23, 60,229,\r
-230,231,232,233,234,235, 11, 36,236,237,238,239,240,241,242,243,\r
- 8, 49, 12, 38, 5, 31, 1, 34, 15,244,245,246,247, 35, 16,248,\r
- 43, 9, 45, 7, 32, 6, 40, 14, 52, 24, 56, 10, 33, 17, 61,249,\r
-250, 18, 62, 20, 51, 25, 57, 30, 47, 29, 63, 22, 50,251,252,255,\r
-)\r
-\r
-IBM866_CharToOrderMap = (\r
-255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00\r
-255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10\r
-253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20\r
-252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30\r
-253,142,143,144,145,146,147,148,149,150,151,152, 74,153, 75,154, # 40\r
-155,156,157,158,159,160,161,162,163,164,165,253,253,253,253,253, # 50\r
-253, 71,172, 66,173, 65,174, 76,175, 64,176,177, 77, 72,178, 69, # 60\r
- 67,179, 78, 73,180,181, 79,182,183,184,185,253,253,253,253,253, # 70\r
- 37, 44, 33, 46, 41, 48, 56, 51, 42, 60, 36, 49, 38, 31, 34, 35,\r
- 45, 32, 40, 52, 53, 55, 58, 50, 57, 63, 70, 62, 61, 47, 59, 43,\r
- 3, 21, 10, 19, 13, 2, 24, 20, 4, 23, 11, 8, 12, 5, 1, 15,\r
-191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,\r
-207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,\r
-223,224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,\r
- 9, 7, 6, 14, 39, 26, 28, 22, 25, 29, 54, 18, 17, 30, 27, 16,\r
-239, 68,240,241,242,243,244,245,246,247,248,249,250,251,252,255,\r
-)\r
-\r
-# Model Table:\r
-# total sequences: 100%\r
-# first 512 sequences: 97.6601%\r
-# first 1024 sequences: 2.3389%\r
-# rest sequences: 0.1237%\r
-# negative sequences: 0.0009%\r
-RussianLangModel = (\r
-0,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,1,1,3,3,3,3,1,3,3,3,2,3,2,3,3,\r
-3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,2,2,2,2,2,0,0,2,\r
-3,3,3,2,3,3,3,3,3,3,3,3,3,3,2,3,3,0,0,3,3,3,3,3,3,3,3,3,2,3,2,0,\r
-0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-3,3,3,2,2,3,3,3,3,3,3,3,3,3,2,3,3,0,0,3,3,3,3,3,3,3,3,2,3,3,1,0,\r
-0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-3,2,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,0,0,3,3,3,3,3,3,3,3,3,3,3,2,1,\r
-0,0,0,0,0,0,0,2,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,0,0,3,3,3,3,3,3,3,3,3,3,3,2,1,\r
-0,0,0,0,0,1,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-3,3,3,3,3,3,3,3,2,2,2,3,1,3,3,1,3,3,3,3,2,2,3,0,2,2,2,3,3,2,1,0,\r
-0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,\r
-3,3,3,3,3,3,2,3,3,3,3,3,2,2,3,2,3,3,3,2,1,2,2,0,1,2,2,2,2,2,2,0,\r
-0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,\r
-3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,2,3,0,2,2,3,3,2,1,2,0,\r
-0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,1,0,0,2,0,0,0,0,0,0,0,0,0,\r
-3,3,3,3,3,3,2,3,3,1,2,3,2,2,3,2,3,3,3,3,2,2,3,0,3,2,2,3,1,1,1,0,\r
-0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-3,3,3,3,3,3,3,3,2,2,3,3,3,3,3,2,3,3,3,3,2,2,2,0,3,3,3,2,2,2,2,0,\r
-0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-3,3,3,3,3,3,3,3,3,3,2,3,2,3,3,3,3,3,3,2,3,2,2,0,1,3,2,1,2,2,1,0,\r
-0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,\r
-3,3,3,3,3,3,3,3,3,3,3,2,1,1,3,0,1,1,1,1,2,1,1,0,2,2,2,1,2,0,1,0,\r
-0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-3,3,3,3,3,3,2,3,3,2,2,2,2,1,3,2,3,2,3,2,1,2,2,0,1,1,2,1,2,1,2,0,\r
-0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-3,3,3,3,3,3,3,3,3,3,3,3,2,2,3,2,3,3,3,2,2,2,2,0,2,2,2,2,3,1,1,0,\r
-0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,\r
-3,2,3,2,2,3,3,3,3,3,3,3,3,3,1,3,2,0,0,3,3,3,3,2,3,3,3,3,2,3,2,0,\r
-0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-2,3,3,3,3,3,2,2,3,3,0,2,1,0,3,2,3,2,3,0,0,1,2,0,0,1,0,1,2,1,1,0,\r
-0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-3,0,3,0,2,3,3,3,3,2,3,3,3,3,1,2,2,0,0,2,3,2,2,2,3,2,3,2,2,3,0,0,\r
-0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-3,2,3,0,2,3,2,3,0,1,2,3,3,2,0,2,3,0,0,2,3,2,2,0,1,3,1,3,2,2,1,0,\r
-0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-3,1,3,0,2,3,3,3,3,3,3,3,3,2,1,3,2,0,0,2,2,3,3,3,2,3,3,0,2,2,0,0,\r
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-3,3,3,3,3,3,2,2,3,3,2,2,2,3,3,0,0,1,1,1,1,1,2,0,0,1,1,1,1,0,1,0,\r
-0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-3,3,3,3,3,3,2,2,3,3,3,3,3,3,3,0,3,2,3,3,2,3,2,0,2,1,0,1,1,0,1,0,\r
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,\r
-3,3,3,3,3,3,2,3,3,3,2,2,2,2,3,1,3,2,3,1,1,2,1,0,2,2,2,2,1,3,1,0,\r
-0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,\r
-2,2,3,3,3,3,3,1,2,2,1,3,1,0,3,0,0,3,0,0,0,1,1,0,1,2,1,0,0,0,0,0,\r
-0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-3,2,2,1,1,3,3,3,2,2,1,2,2,3,1,1,2,0,0,2,2,1,3,0,0,2,1,1,2,1,1,0,\r
-0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-3,2,3,3,3,3,1,2,2,2,1,2,1,3,3,1,1,2,1,2,1,2,2,0,2,0,0,1,1,0,1,0,\r
-0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-2,3,3,3,3,3,2,1,3,2,2,3,2,0,3,2,0,3,0,1,0,1,1,0,0,1,1,1,1,0,1,0,\r
-0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-3,3,2,3,3,3,2,2,2,3,3,1,2,1,2,1,0,1,0,1,1,0,1,0,0,2,1,1,1,0,1,0,\r
-0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,\r
-3,1,1,2,1,2,3,3,2,2,1,2,2,3,0,2,1,0,0,2,2,3,2,1,2,2,2,2,2,3,1,0,\r
-0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-3,3,3,3,3,1,1,0,1,1,2,2,1,1,3,0,0,1,3,1,1,1,0,0,0,1,0,1,1,0,0,0,\r
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-2,1,3,3,3,2,0,0,0,2,1,0,1,0,2,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-2,0,1,0,0,2,3,2,2,2,1,2,2,2,1,2,1,0,0,1,1,1,0,2,0,1,1,1,0,0,1,1,\r
-1,0,0,0,0,0,1,2,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,\r
-2,3,3,3,3,0,0,0,0,1,0,0,0,0,3,0,1,2,1,0,0,0,0,0,0,0,1,1,0,0,1,1,\r
-1,0,1,0,1,2,0,0,1,1,2,1,0,1,1,1,1,0,1,1,1,1,0,1,0,0,1,0,0,1,1,0,\r
-2,2,3,2,2,2,3,1,2,2,2,2,2,2,2,2,1,1,1,1,1,1,1,0,1,0,1,1,1,0,2,1,\r
-1,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1,1,1,0,1,0,1,1,0,1,1,1,0,1,1,0,\r
-3,3,3,2,2,2,2,3,2,2,1,1,2,2,2,2,1,1,3,1,2,1,2,0,0,1,1,0,1,0,2,1,\r
-1,1,1,1,1,2,1,0,1,1,1,1,0,1,0,0,1,1,0,0,1,0,1,0,0,1,0,0,0,1,1,0,\r
-2,0,0,1,0,3,2,2,2,2,1,2,1,2,1,2,0,0,0,2,1,2,2,1,1,2,2,0,1,1,0,2,\r
-1,1,1,1,1,0,1,1,1,2,1,1,1,2,1,0,1,2,1,1,1,1,0,1,1,1,0,0,1,0,0,1,\r
-1,3,2,2,2,1,1,1,2,3,0,0,0,0,2,0,2,2,1,0,0,0,0,0,0,1,0,0,0,0,1,1,\r
-1,0,1,1,0,1,0,1,1,0,1,1,0,2,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,1,1,0,\r
-2,3,2,3,2,1,2,2,2,2,1,0,0,0,2,0,0,1,1,0,0,0,0,0,0,0,1,1,0,0,2,1,\r
-1,1,2,1,0,2,0,0,1,0,1,0,0,1,0,0,1,1,0,1,1,0,0,0,0,0,1,0,0,0,0,0,\r
-3,0,0,1,0,2,2,2,3,2,2,2,2,2,2,2,0,0,0,2,1,2,1,1,1,2,2,0,0,0,1,2,\r
-1,1,1,1,1,0,1,2,1,1,1,1,1,1,1,0,1,1,1,1,1,1,0,1,1,1,1,1,1,0,0,1,\r
-2,3,2,3,3,2,0,1,1,1,0,0,1,0,2,0,1,1,3,1,0,0,0,0,0,0,0,1,0,0,2,1,\r
-1,1,1,1,1,1,1,0,1,0,1,1,1,1,0,1,1,1,0,0,1,1,0,1,0,0,0,0,0,0,1,0,\r
-2,3,3,3,3,1,2,2,2,2,0,1,1,0,2,1,1,1,2,1,0,1,1,0,0,1,0,1,0,0,2,0,\r
-0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-2,3,3,3,2,0,0,1,1,2,2,1,0,0,2,0,1,1,3,0,0,1,0,0,0,0,0,1,0,1,2,1,\r
-1,1,2,0,1,1,1,0,1,0,1,1,0,1,0,1,1,1,1,0,1,0,0,0,0,0,0,1,0,1,1,0,\r
-1,3,2,3,2,1,0,0,2,2,2,0,1,0,2,0,1,1,1,0,1,0,0,0,3,0,1,1,0,0,2,1,\r
-1,1,1,0,1,1,0,0,0,0,1,1,0,1,0,0,2,1,1,0,1,0,0,0,1,0,1,0,0,1,1,0,\r
-3,1,2,1,1,2,2,2,2,2,2,1,2,2,1,1,0,0,0,2,2,2,0,0,0,1,2,1,0,1,0,1,\r
-2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,2,1,1,1,0,1,0,1,1,0,1,1,1,0,0,1,\r
-3,0,0,0,0,2,0,1,1,1,1,1,1,1,0,1,0,0,0,1,1,1,0,1,0,1,1,0,0,1,0,1,\r
-1,1,0,0,1,0,0,0,1,0,1,1,0,0,1,0,1,0,1,0,0,0,0,1,0,0,0,1,0,0,0,1,\r
-1,3,3,2,2,0,0,0,2,2,0,0,0,1,2,0,1,1,2,0,0,0,0,0,0,0,0,1,0,0,2,1,\r
-0,1,1,0,0,1,1,0,0,0,1,1,0,1,1,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,1,0,\r
-2,3,2,3,2,0,0,0,0,1,1,0,0,0,2,0,2,0,2,0,0,0,0,0,1,0,0,1,0,0,1,1,\r
-1,1,2,0,1,2,1,0,1,1,2,1,1,1,1,1,2,1,1,0,1,0,0,1,1,1,1,1,0,1,1,0,\r
-1,3,2,2,2,1,0,0,2,2,1,0,1,2,2,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,\r
-0,0,1,1,0,1,1,0,0,1,1,0,1,1,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,\r
-1,0,0,1,0,2,3,1,2,2,2,2,2,2,1,1,0,0,0,1,0,1,0,2,1,1,1,0,0,0,0,1,\r
-1,1,0,1,1,0,1,1,1,1,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,\r
-2,0,2,0,0,1,0,3,2,1,2,1,2,2,0,1,0,0,0,2,1,0,0,2,1,1,1,1,0,2,0,2,\r
-2,1,1,1,1,1,1,1,1,1,1,1,1,2,1,0,1,1,1,1,0,0,0,1,1,1,1,0,1,0,0,1,\r
-1,2,2,2,2,1,0,0,1,0,0,0,0,0,2,0,1,1,1,1,0,0,0,0,1,0,1,2,0,0,2,0,\r
-1,0,1,1,1,2,1,0,1,0,1,1,0,0,1,0,1,1,1,0,1,0,0,0,1,0,0,1,0,1,1,0,\r
-2,1,2,2,2,0,3,0,1,1,0,0,0,0,2,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,\r
-0,0,0,1,1,1,0,0,1,0,1,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,\r
-1,2,2,3,2,2,0,0,1,1,2,0,1,2,1,0,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,1,\r
-0,1,1,0,0,1,1,0,0,1,1,0,0,1,1,0,1,1,0,0,1,0,0,0,0,0,0,0,0,1,1,0,\r
-2,2,1,1,2,1,2,2,2,2,2,1,2,2,0,1,0,0,0,1,2,2,2,1,2,1,1,1,1,1,2,1,\r
-1,1,1,1,1,1,1,1,1,1,0,0,1,1,1,0,1,1,1,0,0,0,0,1,1,1,0,1,1,0,0,1,\r
-1,2,2,2,2,0,1,0,2,2,0,0,0,0,2,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,2,0,\r
-0,0,1,0,0,1,0,0,0,0,1,0,1,1,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,\r
-0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,\r
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-1,2,2,2,2,0,0,0,2,2,2,0,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,1,\r
-0,1,1,0,0,1,1,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-1,2,2,2,2,0,0,0,0,1,0,0,1,1,2,0,0,0,0,1,0,1,0,0,1,0,0,2,0,0,0,1,\r
-0,0,1,0,0,1,0,0,0,1,1,0,0,0,0,0,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,\r
-1,2,2,2,1,1,2,0,2,1,1,1,1,0,2,2,0,0,0,0,0,0,0,0,0,1,1,0,0,0,1,1,\r
-0,0,1,0,1,1,0,0,0,0,1,0,0,0,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,\r
-1,0,2,1,2,0,0,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,\r
-0,0,1,0,1,1,0,0,0,0,1,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,\r
-1,0,0,0,0,2,0,1,2,1,0,1,1,1,0,1,0,0,0,1,0,1,0,0,1,0,1,0,0,0,0,1,\r
-0,0,0,0,0,1,0,0,1,1,0,0,1,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,\r
-2,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,\r
-1,0,0,0,1,0,0,0,1,1,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,0,0,0,0,\r
-2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,\r
-1,1,1,0,1,0,1,0,0,1,1,1,1,0,0,0,1,0,0,0,0,1,0,0,0,1,0,1,0,0,0,0,\r
-1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,\r
-1,1,0,1,1,0,1,0,1,0,0,0,0,1,1,0,1,1,0,0,0,0,0,1,0,1,1,0,1,0,0,0,\r
-0,1,1,1,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,\r
-)\r
-\r
-Koi8rModel = {\r
- 'charToOrderMap': KOI8R_CharToOrderMap,\r
- 'precedenceMatrix': RussianLangModel,\r
- 'mTypicalPositiveRatio': 0.976601,\r
- 'keepEnglishLetter': False,\r
- 'charsetName': "KOI8-R"\r
-}\r
-\r
-Win1251CyrillicModel = {\r
- 'charToOrderMap': win1251_CharToOrderMap,\r
- 'precedenceMatrix': RussianLangModel,\r
- 'mTypicalPositiveRatio': 0.976601,\r
- 'keepEnglishLetter': False,\r
- 'charsetName': "windows-1251"\r
-}\r
-\r
-Latin5CyrillicModel = {\r
- 'charToOrderMap': latin5_CharToOrderMap,\r
- 'precedenceMatrix': RussianLangModel,\r
- 'mTypicalPositiveRatio': 0.976601,\r
- 'keepEnglishLetter': False,\r
- 'charsetName': "ISO-8859-5"\r
-}\r
-\r
-MacCyrillicModel = {\r
- 'charToOrderMap': macCyrillic_CharToOrderMap,\r
- 'precedenceMatrix': RussianLangModel,\r
- 'mTypicalPositiveRatio': 0.976601,\r
- 'keepEnglishLetter': False,\r
- 'charsetName': "MacCyrillic"\r
-}\r
-\r
-Ibm866Model = {\r
- 'charToOrderMap': IBM866_CharToOrderMap,\r
- 'precedenceMatrix': RussianLangModel,\r
- 'mTypicalPositiveRatio': 0.976601,\r
- 'keepEnglishLetter': False,\r
- 'charsetName': "IBM866"\r
-}\r
-\r
-Ibm855Model = {\r
- 'charToOrderMap': IBM855_CharToOrderMap,\r
- 'precedenceMatrix': RussianLangModel,\r
- 'mTypicalPositiveRatio': 0.976601,\r
- 'keepEnglishLetter': False,\r
- 'charsetName': "IBM855"\r
-}\r
-\r
-# flake8: noqa\r
+######################## BEGIN LICENSE BLOCK ########################
+# The Original Code is Mozilla Communicator client code.
+#
+# The Initial Developer of the Original Code is
+# Netscape Communications Corporation.
+# Portions created by the Initial Developer are Copyright (C) 1998
+# the Initial Developer. All Rights Reserved.
+#
+# Contributor(s):
+# Mark Pilgrim - port to Python
+#
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
+# 02110-1301 USA
+######################### END LICENSE BLOCK #########################
+
+# KOI8-R language model
+# Character Mapping Table:
+KOI8R_CharToOrderMap = (
+255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00
+255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10
+253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20
+252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30
+253,142,143,144,145,146,147,148,149,150,151,152, 74,153, 75,154, # 40
+155,156,157,158,159,160,161,162,163,164,165,253,253,253,253,253, # 50
+253, 71,172, 66,173, 65,174, 76,175, 64,176,177, 77, 72,178, 69, # 60
+ 67,179, 78, 73,180,181, 79,182,183,184,185,253,253,253,253,253, # 70
+191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206, # 80
+207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222, # 90
+223,224,225, 68,226,227,228,229,230,231,232,233,234,235,236,237, # a0
+238,239,240,241,242,243,244,245,246,247,248,249,250,251,252,253, # b0
+ 27, 3, 21, 28, 13, 2, 39, 19, 26, 4, 23, 11, 8, 12, 5, 1, # c0
+ 15, 16, 9, 7, 6, 14, 24, 10, 17, 18, 20, 25, 30, 29, 22, 54, # d0
+ 59, 37, 44, 58, 41, 48, 53, 46, 55, 42, 60, 36, 49, 38, 31, 34, # e0
+ 35, 43, 45, 32, 40, 52, 56, 33, 61, 62, 51, 57, 47, 63, 50, 70, # f0
+)
+
+win1251_CharToOrderMap = (
+255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00
+255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10
+253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20
+252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30
+253,142,143,144,145,146,147,148,149,150,151,152, 74,153, 75,154, # 40
+155,156,157,158,159,160,161,162,163,164,165,253,253,253,253,253, # 50
+253, 71,172, 66,173, 65,174, 76,175, 64,176,177, 77, 72,178, 69, # 60
+ 67,179, 78, 73,180,181, 79,182,183,184,185,253,253,253,253,253, # 70
+191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,
+207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,
+223,224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,
+239,240,241,242,243,244,245,246, 68,247,248,249,250,251,252,253,
+ 37, 44, 33, 46, 41, 48, 56, 51, 42, 60, 36, 49, 38, 31, 34, 35,
+ 45, 32, 40, 52, 53, 55, 58, 50, 57, 63, 70, 62, 61, 47, 59, 43,
+ 3, 21, 10, 19, 13, 2, 24, 20, 4, 23, 11, 8, 12, 5, 1, 15,
+ 9, 7, 6, 14, 39, 26, 28, 22, 25, 29, 54, 18, 17, 30, 27, 16,
+)
+
+latin5_CharToOrderMap = (
+255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00
+255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10
+253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20
+252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30
+253,142,143,144,145,146,147,148,149,150,151,152, 74,153, 75,154, # 40
+155,156,157,158,159,160,161,162,163,164,165,253,253,253,253,253, # 50
+253, 71,172, 66,173, 65,174, 76,175, 64,176,177, 77, 72,178, 69, # 60
+ 67,179, 78, 73,180,181, 79,182,183,184,185,253,253,253,253,253, # 70
+191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,
+207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,
+223,224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,
+ 37, 44, 33, 46, 41, 48, 56, 51, 42, 60, 36, 49, 38, 31, 34, 35,
+ 45, 32, 40, 52, 53, 55, 58, 50, 57, 63, 70, 62, 61, 47, 59, 43,
+ 3, 21, 10, 19, 13, 2, 24, 20, 4, 23, 11, 8, 12, 5, 1, 15,
+ 9, 7, 6, 14, 39, 26, 28, 22, 25, 29, 54, 18, 17, 30, 27, 16,
+239, 68,240,241,242,243,244,245,246,247,248,249,250,251,252,255,
+)
+
+macCyrillic_CharToOrderMap = (
+255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00
+255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10
+253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20
+252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30
+253,142,143,144,145,146,147,148,149,150,151,152, 74,153, 75,154, # 40
+155,156,157,158,159,160,161,162,163,164,165,253,253,253,253,253, # 50
+253, 71,172, 66,173, 65,174, 76,175, 64,176,177, 77, 72,178, 69, # 60
+ 67,179, 78, 73,180,181, 79,182,183,184,185,253,253,253,253,253, # 70
+ 37, 44, 33, 46, 41, 48, 56, 51, 42, 60, 36, 49, 38, 31, 34, 35,
+ 45, 32, 40, 52, 53, 55, 58, 50, 57, 63, 70, 62, 61, 47, 59, 43,
+191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,
+207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,
+223,224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,
+239,240,241,242,243,244,245,246,247,248,249,250,251,252, 68, 16,
+ 3, 21, 10, 19, 13, 2, 24, 20, 4, 23, 11, 8, 12, 5, 1, 15,
+ 9, 7, 6, 14, 39, 26, 28, 22, 25, 29, 54, 18, 17, 30, 27,255,
+)
+
+IBM855_CharToOrderMap = (
+255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00
+255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10
+253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20
+252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30
+253,142,143,144,145,146,147,148,149,150,151,152, 74,153, 75,154, # 40
+155,156,157,158,159,160,161,162,163,164,165,253,253,253,253,253, # 50
+253, 71,172, 66,173, 65,174, 76,175, 64,176,177, 77, 72,178, 69, # 60
+ 67,179, 78, 73,180,181, 79,182,183,184,185,253,253,253,253,253, # 70
+191,192,193,194, 68,195,196,197,198,199,200,201,202,203,204,205,
+206,207,208,209,210,211,212,213,214,215,216,217, 27, 59, 54, 70,
+ 3, 37, 21, 44, 28, 58, 13, 41, 2, 48, 39, 53, 19, 46,218,219,
+220,221,222,223,224, 26, 55, 4, 42,225,226,227,228, 23, 60,229,
+230,231,232,233,234,235, 11, 36,236,237,238,239,240,241,242,243,
+ 8, 49, 12, 38, 5, 31, 1, 34, 15,244,245,246,247, 35, 16,248,
+ 43, 9, 45, 7, 32, 6, 40, 14, 52, 24, 56, 10, 33, 17, 61,249,
+250, 18, 62, 20, 51, 25, 57, 30, 47, 29, 63, 22, 50,251,252,255,
+)
+
+IBM866_CharToOrderMap = (
+255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00
+255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10
+253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20
+252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30
+253,142,143,144,145,146,147,148,149,150,151,152, 74,153, 75,154, # 40
+155,156,157,158,159,160,161,162,163,164,165,253,253,253,253,253, # 50
+253, 71,172, 66,173, 65,174, 76,175, 64,176,177, 77, 72,178, 69, # 60
+ 67,179, 78, 73,180,181, 79,182,183,184,185,253,253,253,253,253, # 70
+ 37, 44, 33, 46, 41, 48, 56, 51, 42, 60, 36, 49, 38, 31, 34, 35,
+ 45, 32, 40, 52, 53, 55, 58, 50, 57, 63, 70, 62, 61, 47, 59, 43,
+ 3, 21, 10, 19, 13, 2, 24, 20, 4, 23, 11, 8, 12, 5, 1, 15,
+191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,
+207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,
+223,224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,
+ 9, 7, 6, 14, 39, 26, 28, 22, 25, 29, 54, 18, 17, 30, 27, 16,
+239, 68,240,241,242,243,244,245,246,247,248,249,250,251,252,255,
+)
+
+# Model Table:
+# total sequences: 100%
+# first 512 sequences: 97.6601%
+# first 1024 sequences: 2.3389%
+# rest sequences: 0.1237%
+# negative sequences: 0.0009%
+RussianLangModel = (
+0,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,1,1,3,3,3,3,1,3,3,3,2,3,2,3,3,
+3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,2,2,2,2,2,0,0,2,
+3,3,3,2,3,3,3,3,3,3,3,3,3,3,2,3,3,0,0,3,3,3,3,3,3,3,3,3,2,3,2,0,
+0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+3,3,3,2,2,3,3,3,3,3,3,3,3,3,2,3,3,0,0,3,3,3,3,3,3,3,3,2,3,3,1,0,
+0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+3,2,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,0,0,3,3,3,3,3,3,3,3,3,3,3,2,1,
+0,0,0,0,0,0,0,2,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,0,0,3,3,3,3,3,3,3,3,3,3,3,2,1,
+0,0,0,0,0,1,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+3,3,3,3,3,3,3,3,2,2,2,3,1,3,3,1,3,3,3,3,2,2,3,0,2,2,2,3,3,2,1,0,
+0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,
+3,3,3,3,3,3,2,3,3,3,3,3,2,2,3,2,3,3,3,2,1,2,2,0,1,2,2,2,2,2,2,0,
+0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,
+3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,2,3,0,2,2,3,3,2,1,2,0,
+0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,1,0,0,2,0,0,0,0,0,0,0,0,0,
+3,3,3,3,3,3,2,3,3,1,2,3,2,2,3,2,3,3,3,3,2,2,3,0,3,2,2,3,1,1,1,0,
+0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+3,3,3,3,3,3,3,3,2,2,3,3,3,3,3,2,3,3,3,3,2,2,2,0,3,3,3,2,2,2,2,0,
+0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+3,3,3,3,3,3,3,3,3,3,2,3,2,3,3,3,3,3,3,2,3,2,2,0,1,3,2,1,2,2,1,0,
+0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,
+3,3,3,3,3,3,3,3,3,3,3,2,1,1,3,0,1,1,1,1,2,1,1,0,2,2,2,1,2,0,1,0,
+0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+3,3,3,3,3,3,2,3,3,2,2,2,2,1,3,2,3,2,3,2,1,2,2,0,1,1,2,1,2,1,2,0,
+0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+3,3,3,3,3,3,3,3,3,3,3,3,2,2,3,2,3,3,3,2,2,2,2,0,2,2,2,2,3,1,1,0,
+0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,
+3,2,3,2,2,3,3,3,3,3,3,3,3,3,1,3,2,0,0,3,3,3,3,2,3,3,3,3,2,3,2,0,
+0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+2,3,3,3,3,3,2,2,3,3,0,2,1,0,3,2,3,2,3,0,0,1,2,0,0,1,0,1,2,1,1,0,
+0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+3,0,3,0,2,3,3,3,3,2,3,3,3,3,1,2,2,0,0,2,3,2,2,2,3,2,3,2,2,3,0,0,
+0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+3,2,3,0,2,3,2,3,0,1,2,3,3,2,0,2,3,0,0,2,3,2,2,0,1,3,1,3,2,2,1,0,
+0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+3,1,3,0,2,3,3,3,3,3,3,3,3,2,1,3,2,0,0,2,2,3,3,3,2,3,3,0,2,2,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+3,3,3,3,3,3,2,2,3,3,2,2,2,3,3,0,0,1,1,1,1,1,2,0,0,1,1,1,1,0,1,0,
+0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+3,3,3,3,3,3,2,2,3,3,3,3,3,3,3,0,3,2,3,3,2,3,2,0,2,1,0,1,1,0,1,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,
+3,3,3,3,3,3,2,3,3,3,2,2,2,2,3,1,3,2,3,1,1,2,1,0,2,2,2,2,1,3,1,0,
+0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,
+2,2,3,3,3,3,3,1,2,2,1,3,1,0,3,0,0,3,0,0,0,1,1,0,1,2,1,0,0,0,0,0,
+0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+3,2,2,1,1,3,3,3,2,2,1,2,2,3,1,1,2,0,0,2,2,1,3,0,0,2,1,1,2,1,1,0,
+0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+3,2,3,3,3,3,1,2,2,2,1,2,1,3,3,1,1,2,1,2,1,2,2,0,2,0,0,1,1,0,1,0,
+0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+2,3,3,3,3,3,2,1,3,2,2,3,2,0,3,2,0,3,0,1,0,1,1,0,0,1,1,1,1,0,1,0,
+0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+3,3,2,3,3,3,2,2,2,3,3,1,2,1,2,1,0,1,0,1,1,0,1,0,0,2,1,1,1,0,1,0,
+0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,
+3,1,1,2,1,2,3,3,2,2,1,2,2,3,0,2,1,0,0,2,2,3,2,1,2,2,2,2,2,3,1,0,
+0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+3,3,3,3,3,1,1,0,1,1,2,2,1,1,3,0,0,1,3,1,1,1,0,0,0,1,0,1,1,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+2,1,3,3,3,2,0,0,0,2,1,0,1,0,2,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+2,0,1,0,0,2,3,2,2,2,1,2,2,2,1,2,1,0,0,1,1,1,0,2,0,1,1,1,0,0,1,1,
+1,0,0,0,0,0,1,2,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,
+2,3,3,3,3,0,0,0,0,1,0,0,0,0,3,0,1,2,1,0,0,0,0,0,0,0,1,1,0,0,1,1,
+1,0,1,0,1,2,0,0,1,1,2,1,0,1,1,1,1,0,1,1,1,1,0,1,0,0,1,0,0,1,1,0,
+2,2,3,2,2,2,3,1,2,2,2,2,2,2,2,2,1,1,1,1,1,1,1,0,1,0,1,1,1,0,2,1,
+1,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1,1,1,0,1,0,1,1,0,1,1,1,0,1,1,0,
+3,3,3,2,2,2,2,3,2,2,1,1,2,2,2,2,1,1,3,1,2,1,2,0,0,1,1,0,1,0,2,1,
+1,1,1,1,1,2,1,0,1,1,1,1,0,1,0,0,1,1,0,0,1,0,1,0,0,1,0,0,0,1,1,0,
+2,0,0,1,0,3,2,2,2,2,1,2,1,2,1,2,0,0,0,2,1,2,2,1,1,2,2,0,1,1,0,2,
+1,1,1,1,1,0,1,1,1,2,1,1,1,2,1,0,1,2,1,1,1,1,0,1,1,1,0,0,1,0,0,1,
+1,3,2,2,2,1,1,1,2,3,0,0,0,0,2,0,2,2,1,0,0,0,0,0,0,1,0,0,0,0,1,1,
+1,0,1,1,0,1,0,1,1,0,1,1,0,2,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,1,1,0,
+2,3,2,3,2,1,2,2,2,2,1,0,0,0,2,0,0,1,1,0,0,0,0,0,0,0,1,1,0,0,2,1,
+1,1,2,1,0,2,0,0,1,0,1,0,0,1,0,0,1,1,0,1,1,0,0,0,0,0,1,0,0,0,0,0,
+3,0,0,1,0,2,2,2,3,2,2,2,2,2,2,2,0,0,0,2,1,2,1,1,1,2,2,0,0,0,1,2,
+1,1,1,1,1,0,1,2,1,1,1,1,1,1,1,0,1,1,1,1,1,1,0,1,1,1,1,1,1,0,0,1,
+2,3,2,3,3,2,0,1,1,1,0,0,1,0,2,0,1,1,3,1,0,0,0,0,0,0,0,1,0,0,2,1,
+1,1,1,1,1,1,1,0,1,0,1,1,1,1,0,1,1,1,0,0,1,1,0,1,0,0,0,0,0,0,1,0,
+2,3,3,3,3,1,2,2,2,2,0,1,1,0,2,1,1,1,2,1,0,1,1,0,0,1,0,1,0,0,2,0,
+0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+2,3,3,3,2,0,0,1,1,2,2,1,0,0,2,0,1,1,3,0,0,1,0,0,0,0,0,1,0,1,2,1,
+1,1,2,0,1,1,1,0,1,0,1,1,0,1,0,1,1,1,1,0,1,0,0,0,0,0,0,1,0,1,1,0,
+1,3,2,3,2,1,0,0,2,2,2,0,1,0,2,0,1,1,1,0,1,0,0,0,3,0,1,1,0,0,2,1,
+1,1,1,0,1,1,0,0,0,0,1,1,0,1,0,0,2,1,1,0,1,0,0,0,1,0,1,0,0,1,1,0,
+3,1,2,1,1,2,2,2,2,2,2,1,2,2,1,1,0,0,0,2,2,2,0,0,0,1,2,1,0,1,0,1,
+2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,2,1,1,1,0,1,0,1,1,0,1,1,1,0,0,1,
+3,0,0,0,0,2,0,1,1,1,1,1,1,1,0,1,0,0,0,1,1,1,0,1,0,1,1,0,0,1,0,1,
+1,1,0,0,1,0,0,0,1,0,1,1,0,0,1,0,1,0,1,0,0,0,0,1,0,0,0,1,0,0,0,1,
+1,3,3,2,2,0,0,0,2,2,0,0,0,1,2,0,1,1,2,0,0,0,0,0,0,0,0,1,0,0,2,1,
+0,1,1,0,0,1,1,0,0,0,1,1,0,1,1,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,1,0,
+2,3,2,3,2,0,0,0,0,1,1,0,0,0,2,0,2,0,2,0,0,0,0,0,1,0,0,1,0,0,1,1,
+1,1,2,0,1,2,1,0,1,1,2,1,1,1,1,1,2,1,1,0,1,0,0,1,1,1,1,1,0,1,1,0,
+1,3,2,2,2,1,0,0,2,2,1,0,1,2,2,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,
+0,0,1,1,0,1,1,0,0,1,1,0,1,1,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,
+1,0,0,1,0,2,3,1,2,2,2,2,2,2,1,1,0,0,0,1,0,1,0,2,1,1,1,0,0,0,0,1,
+1,1,0,1,1,0,1,1,1,1,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,
+2,0,2,0,0,1,0,3,2,1,2,1,2,2,0,1,0,0,0,2,1,0,0,2,1,1,1,1,0,2,0,2,
+2,1,1,1,1,1,1,1,1,1,1,1,1,2,1,0,1,1,1,1,0,0,0,1,1,1,1,0,1,0,0,1,
+1,2,2,2,2,1,0,0,1,0,0,0,0,0,2,0,1,1,1,1,0,0,0,0,1,0,1,2,0,0,2,0,
+1,0,1,1,1,2,1,0,1,0,1,1,0,0,1,0,1,1,1,0,1,0,0,0,1,0,0,1,0,1,1,0,
+2,1,2,2,2,0,3,0,1,1,0,0,0,0,2,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,
+0,0,0,1,1,1,0,0,1,0,1,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,
+1,2,2,3,2,2,0,0,1,1,2,0,1,2,1,0,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,1,
+0,1,1,0,0,1,1,0,0,1,1,0,0,1,1,0,1,1,0,0,1,0,0,0,0,0,0,0,0,1,1,0,
+2,2,1,1,2,1,2,2,2,2,2,1,2,2,0,1,0,0,0,1,2,2,2,1,2,1,1,1,1,1,2,1,
+1,1,1,1,1,1,1,1,1,1,0,0,1,1,1,0,1,1,1,0,0,0,0,1,1,1,0,1,1,0,0,1,
+1,2,2,2,2,0,1,0,2,2,0,0,0,0,2,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,2,0,
+0,0,1,0,0,1,0,0,0,0,1,0,1,1,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,
+0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+1,2,2,2,2,0,0,0,2,2,2,0,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,1,
+0,1,1,0,0,1,1,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+1,2,2,2,2,0,0,0,0,1,0,0,1,1,2,0,0,0,0,1,0,1,0,0,1,0,0,2,0,0,0,1,
+0,0,1,0,0,1,0,0,0,1,1,0,0,0,0,0,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,
+1,2,2,2,1,1,2,0,2,1,1,1,1,0,2,2,0,0,0,0,0,0,0,0,0,1,1,0,0,0,1,1,
+0,0,1,0,1,1,0,0,0,0,1,0,0,0,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,
+1,0,2,1,2,0,0,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,
+0,0,1,0,1,1,0,0,0,0,1,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,
+1,0,0,0,0,2,0,1,2,1,0,1,1,1,0,1,0,0,0,1,0,1,0,0,1,0,1,0,0,0,0,1,
+0,0,0,0,0,1,0,0,1,1,0,0,1,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,
+2,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,
+1,0,0,0,1,0,0,0,1,1,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,0,0,0,0,
+2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,
+1,1,1,0,1,0,1,0,0,1,1,1,1,0,0,0,1,0,0,0,0,1,0,0,0,1,0,1,0,0,0,0,
+1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,
+1,1,0,1,1,0,1,0,1,0,0,0,0,1,1,0,1,1,0,0,0,0,0,1,0,1,1,0,1,0,0,0,
+0,1,1,1,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,
+)
+
+Koi8rModel = {
+ 'charToOrderMap': KOI8R_CharToOrderMap,
+ 'precedenceMatrix': RussianLangModel,
+ 'mTypicalPositiveRatio': 0.976601,
+ 'keepEnglishLetter': False,
+ 'charsetName': "KOI8-R"
+}
+
+Win1251CyrillicModel = {
+ 'charToOrderMap': win1251_CharToOrderMap,
+ 'precedenceMatrix': RussianLangModel,
+ 'mTypicalPositiveRatio': 0.976601,
+ 'keepEnglishLetter': False,
+ 'charsetName': "windows-1251"
+}
+
+Latin5CyrillicModel = {
+ 'charToOrderMap': latin5_CharToOrderMap,
+ 'precedenceMatrix': RussianLangModel,
+ 'mTypicalPositiveRatio': 0.976601,
+ 'keepEnglishLetter': False,
+ 'charsetName': "ISO-8859-5"
+}
+
+MacCyrillicModel = {
+ 'charToOrderMap': macCyrillic_CharToOrderMap,
+ 'precedenceMatrix': RussianLangModel,
+ 'mTypicalPositiveRatio': 0.976601,
+ 'keepEnglishLetter': False,
+ 'charsetName': "MacCyrillic"
+};
+
+Ibm866Model = {
+ 'charToOrderMap': IBM866_CharToOrderMap,
+ 'precedenceMatrix': RussianLangModel,
+ 'mTypicalPositiveRatio': 0.976601,
+ 'keepEnglishLetter': False,
+ 'charsetName': "IBM866"
+}
+
+Ibm855Model = {
+ 'charToOrderMap': IBM855_CharToOrderMap,
+ 'precedenceMatrix': RussianLangModel,
+ 'mTypicalPositiveRatio': 0.976601,
+ 'keepEnglishLetter': False,
+ 'charsetName': "IBM855"
+}
+
+# flake8: noqa
-######################## BEGIN LICENSE BLOCK ########################\r
-# The Original Code is Mozilla Communicator client code.\r
-#\r
-# The Initial Developer of the Original Code is\r
-# Netscape Communications Corporation.\r
-# Portions created by the Initial Developer are Copyright (C) 1998\r
-# the Initial Developer. All Rights Reserved.\r
-#\r
-# Contributor(s):\r
-# Mark Pilgrim - port to Python\r
-#\r
-# This library is free software; you can redistribute it and/or\r
-# modify it under the terms of the GNU Lesser General Public\r
-# License as published by the Free Software Foundation; either\r
-# version 2.1 of the License, or (at your option) any later version.\r
-#\r
-# This library is distributed in the hope that it will be useful,\r
-# but WITHOUT ANY WARRANTY; without even the implied warranty of\r
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU\r
-# Lesser General Public License for more details.\r
-#\r
-# You should have received a copy of the GNU Lesser General Public\r
-# License along with this library; if not, write to the Free Software\r
-# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA\r
-# 02110-1301 USA\r
-######################### END LICENSE BLOCK #########################\r
-\r
-# 255: Control characters that usually does not exist in any text\r
-# 254: Carriage/Return\r
-# 253: symbol (punctuation) that does not belong to word\r
-# 252: 0 - 9\r
-\r
-# Character Mapping Table:\r
-Latin7_CharToOrderMap = (\r
-255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00\r
-255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10\r
-253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20\r
-252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30\r
-253, 82,100,104, 94, 98,101,116,102,111,187,117, 92, 88,113, 85, # 40\r
- 79,118,105, 83, 67,114,119, 95, 99,109,188,253,253,253,253,253, # 50\r
-253, 72, 70, 80, 81, 60, 96, 93, 89, 68,120, 97, 77, 86, 69, 55, # 60\r
- 78,115, 65, 66, 58, 76,106,103, 87,107,112,253,253,253,253,253, # 70\r
-255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 80\r
-255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 90\r
-253,233, 90,253,253,253,253,253,253,253,253,253,253, 74,253,253, # a0\r
-253,253,253,253,247,248, 61, 36, 46, 71, 73,253, 54,253,108,123, # b0\r
-110, 31, 51, 43, 41, 34, 91, 40, 52, 47, 44, 53, 38, 49, 59, 39, # c0\r
- 35, 48,250, 37, 33, 45, 56, 50, 84, 57,120,121, 17, 18, 22, 15, # d0\r
-124, 1, 29, 20, 21, 3, 32, 13, 25, 5, 11, 16, 10, 6, 30, 4, # e0\r
- 9, 8, 14, 7, 2, 12, 28, 23, 42, 24, 64, 75, 19, 26, 27,253, # f0\r
-)\r
-\r
-win1253_CharToOrderMap = (\r
-255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00\r
-255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10\r
-253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20\r
-252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30\r
-253, 82,100,104, 94, 98,101,116,102,111,187,117, 92, 88,113, 85, # 40\r
- 79,118,105, 83, 67,114,119, 95, 99,109,188,253,253,253,253,253, # 50\r
-253, 72, 70, 80, 81, 60, 96, 93, 89, 68,120, 97, 77, 86, 69, 55, # 60\r
- 78,115, 65, 66, 58, 76,106,103, 87,107,112,253,253,253,253,253, # 70\r
-255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 80\r
-255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 90\r
-253,233, 61,253,253,253,253,253,253,253,253,253,253, 74,253,253, # a0\r
-253,253,253,253,247,253,253, 36, 46, 71, 73,253, 54,253,108,123, # b0\r
-110, 31, 51, 43, 41, 34, 91, 40, 52, 47, 44, 53, 38, 49, 59, 39, # c0\r
- 35, 48,250, 37, 33, 45, 56, 50, 84, 57,120,121, 17, 18, 22, 15, # d0\r
-124, 1, 29, 20, 21, 3, 32, 13, 25, 5, 11, 16, 10, 6, 30, 4, # e0\r
- 9, 8, 14, 7, 2, 12, 28, 23, 42, 24, 64, 75, 19, 26, 27,253, # f0\r
-)\r
-\r
-# Model Table:\r
-# total sequences: 100%\r
-# first 512 sequences: 98.2851%\r
-# first 1024 sequences:1.7001%\r
-# rest sequences: 0.0359%\r
-# negative sequences: 0.0148%\r
-GreekLangModel = (\r
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-0,0,3,2,2,3,3,3,3,3,3,3,3,1,3,3,3,0,2,2,3,3,0,3,0,3,2,0,3,3,3,0,\r
-3,0,0,0,2,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-0,3,3,3,3,3,0,3,3,0,3,2,3,3,0,3,2,3,3,3,0,0,3,0,3,0,3,3,2,0,0,0,\r
-2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,\r
-0,2,3,2,2,3,3,3,3,3,3,3,3,0,3,3,3,3,0,2,3,3,0,3,3,3,3,2,3,3,3,0,\r
-2,0,0,0,2,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-0,2,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,0,2,1,3,3,3,3,2,3,3,2,3,3,2,0,\r
-0,0,0,0,2,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-0,3,3,3,3,0,3,3,3,3,3,3,0,3,3,0,3,3,3,3,3,3,3,3,3,3,0,3,2,3,3,0,\r
-2,0,1,0,2,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,\r
-0,3,3,3,3,3,2,3,0,0,0,0,3,3,0,3,1,3,3,3,0,3,3,0,3,3,3,3,0,0,0,0,\r
-2,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-0,3,3,3,3,3,0,3,0,3,3,3,3,3,0,3,2,2,2,3,0,2,3,3,3,3,3,2,3,3,0,0,\r
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-0,3,3,3,3,3,3,2,2,2,3,3,3,3,0,3,1,3,3,3,3,2,3,3,3,3,3,3,3,2,2,0,\r
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-0,3,3,3,3,3,2,0,3,0,0,0,3,3,2,3,3,3,3,3,0,0,3,2,3,0,2,3,0,0,0,0,\r
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-0,3,0,3,3,3,3,0,0,3,3,0,2,3,0,3,0,3,3,3,0,0,3,0,3,0,2,2,3,3,0,0,\r
-0,0,1,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-0,3,3,3,3,3,2,0,3,2,3,3,3,3,0,3,3,3,3,3,0,3,3,2,3,2,3,3,2,0,0,0,\r
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-0,3,3,2,3,2,3,3,3,3,3,3,0,2,3,2,3,2,2,2,3,2,3,3,2,3,0,2,2,2,3,0,\r
-2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-0,0,3,0,0,0,3,3,3,2,3,3,0,0,3,0,3,0,0,0,3,2,0,3,0,3,0,0,2,0,2,0,\r
-0,0,0,0,2,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-0,3,3,3,3,0,3,3,3,3,3,3,0,3,3,0,3,0,0,0,3,3,0,3,3,3,0,0,1,2,3,0,\r
-3,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-0,3,3,3,3,3,2,0,0,3,2,2,3,3,0,3,3,3,3,3,2,1,3,0,3,2,3,3,2,1,0,0,\r
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-0,0,3,3,0,2,3,3,3,3,3,3,0,0,3,0,3,0,0,0,3,3,0,3,2,3,0,0,3,3,3,0,\r
-3,0,0,0,2,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-0,3,3,3,3,0,3,3,3,3,3,3,0,0,3,0,3,0,0,0,3,2,0,3,2,3,0,0,3,2,3,0,\r
-2,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-0,0,3,1,2,2,3,3,3,3,3,3,0,2,3,0,3,0,0,0,3,3,0,3,0,2,0,0,2,3,1,0,\r
-2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-0,3,0,3,3,3,3,0,3,0,3,3,2,3,0,3,3,3,3,3,3,0,3,3,3,0,2,3,0,0,3,0,\r
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-0,3,0,3,3,3,0,0,3,0,0,0,3,3,0,3,0,2,3,3,0,0,3,0,3,0,3,3,0,0,0,0,\r
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-0,0,3,0,0,0,3,3,3,3,3,3,0,0,3,0,2,0,0,0,3,3,0,3,0,3,0,0,2,0,2,0,\r
-0,0,0,0,1,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-0,3,3,3,3,3,3,0,3,0,2,0,3,2,0,3,2,3,2,3,0,0,3,2,3,2,3,3,0,0,0,0,\r
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-0,0,3,0,0,2,3,3,3,3,3,0,0,0,3,0,2,1,0,0,3,2,2,2,0,3,0,0,2,2,0,0,\r
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-0,3,0,3,3,3,2,0,3,0,3,0,3,3,0,2,1,2,3,3,0,0,3,0,3,0,3,3,0,0,0,0,\r
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-0,2,3,3,3,0,3,3,3,3,3,3,0,2,3,0,3,0,0,0,2,1,0,2,2,3,0,0,2,2,2,0,\r
-0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-0,0,3,0,0,2,3,3,3,2,3,0,0,1,3,0,2,0,0,0,0,3,0,1,0,2,0,0,1,1,1,0,\r
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-0,3,3,3,3,3,1,0,3,0,0,0,3,2,0,3,2,3,3,3,0,0,3,0,3,2,2,2,1,0,0,0,\r
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-0,3,0,3,3,3,0,0,3,0,0,0,0,2,0,2,3,3,2,2,2,2,3,0,2,0,2,2,0,0,0,0,\r
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-0,3,3,3,3,2,0,0,0,0,0,0,2,3,0,2,0,2,3,2,0,0,3,0,3,0,3,1,0,0,0,0,\r
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-0,0,0,0,0,0,3,2,3,3,2,2,3,0,2,0,3,0,0,0,2,0,0,0,0,1,2,0,2,0,2,0,\r
-0,2,0,2,0,2,2,0,0,1,0,2,2,2,0,2,2,2,0,2,2,2,0,0,2,0,0,1,0,0,0,0,\r
-0,2,0,3,3,2,0,0,0,0,0,0,1,3,0,2,0,2,2,2,0,0,2,0,3,0,0,2,0,0,0,0,\r
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-0,3,0,2,3,2,0,2,2,0,2,0,2,2,0,2,0,2,2,2,0,0,0,0,0,0,2,3,0,0,0,2,\r
-0,1,2,0,0,0,0,2,2,0,0,0,2,1,0,2,2,0,0,0,0,0,0,1,0,2,0,0,0,0,0,0,\r
-0,0,2,1,0,2,3,2,2,3,2,3,2,0,0,3,3,3,0,0,3,2,0,0,0,1,1,0,2,0,2,2,\r
-0,2,0,2,0,2,2,0,0,2,0,2,2,2,0,2,2,2,2,0,0,2,0,0,0,2,0,1,0,0,0,0,\r
-0,3,0,3,3,2,2,0,3,0,0,0,2,2,0,2,2,2,1,2,0,0,1,2,2,0,0,3,0,0,0,2,\r
-0,1,2,0,0,0,1,2,0,0,0,0,0,0,0,2,2,0,1,0,0,2,0,0,0,2,0,0,0,0,0,0,\r
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-0,2,3,3,2,2,0,0,0,2,0,2,3,3,0,2,0,0,0,0,0,0,2,2,2,0,2,2,0,2,0,2,\r
-0,2,2,0,0,2,2,2,2,1,0,0,2,2,0,2,0,0,2,0,0,0,0,0,0,2,0,0,0,0,0,0,\r
-0,2,0,3,2,3,0,0,0,3,0,0,2,2,0,2,0,2,2,2,0,0,2,0,0,0,0,0,0,0,0,2,\r
-0,0,2,2,0,0,2,2,2,0,0,0,0,0,0,2,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,\r
-0,0,2,0,0,3,2,0,2,2,2,2,2,0,0,0,2,0,0,0,0,2,0,1,0,0,2,0,1,0,0,0,\r
-0,2,2,2,0,2,2,0,1,2,0,2,2,2,0,2,2,2,2,1,2,2,0,0,2,0,0,0,0,0,0,0,\r
-0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,\r
-0,2,0,2,0,2,2,0,0,0,0,1,2,1,0,0,2,2,0,0,2,0,0,0,0,0,0,0,0,0,0,0,\r
-0,0,0,3,2,3,0,0,2,0,0,0,2,2,0,2,0,0,0,1,0,0,2,0,2,0,2,2,0,0,0,0,\r
-0,0,2,0,0,0,0,2,2,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,\r
-0,2,2,3,2,2,0,0,0,0,0,0,1,3,0,2,0,2,2,0,0,0,1,0,2,0,0,0,0,0,0,0,\r
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-0,2,0,2,0,3,2,0,2,0,0,0,0,0,0,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,\r
-0,0,2,0,0,0,0,1,1,0,0,2,1,2,0,2,2,0,1,0,0,1,0,0,0,2,0,0,0,0,0,0,\r
-0,3,0,2,2,2,0,0,2,0,0,0,2,0,0,0,2,3,0,2,0,0,0,0,0,0,2,2,0,0,0,2,\r
-0,1,2,0,0,0,1,2,2,1,0,0,0,2,0,0,2,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,\r
-0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-0,2,1,2,0,2,2,0,2,0,0,2,0,0,0,0,1,2,1,0,2,1,0,0,0,0,0,0,0,0,0,0,\r
-0,0,2,0,0,0,3,1,2,2,0,2,0,0,0,0,2,0,0,0,2,0,0,3,0,0,0,0,2,2,2,0,\r
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-0,2,1,0,2,0,1,2,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,1,0,0,0,0,0,0,2,\r
-0,2,2,0,0,2,2,2,2,2,0,1,2,0,0,0,2,2,0,1,0,2,0,0,2,2,0,0,0,0,0,0,\r
-0,0,0,0,1,0,0,0,0,0,0,0,3,0,0,2,0,0,0,0,0,0,0,0,2,0,2,0,0,0,0,2,\r
-0,1,2,0,0,0,0,2,2,1,0,1,0,1,0,2,2,2,1,0,0,0,0,0,0,1,0,0,0,0,0,0,\r
-0,2,0,1,2,0,0,0,0,0,0,0,0,0,0,2,0,0,2,2,0,0,0,0,1,0,0,0,0,0,0,2,\r
-0,2,2,0,0,0,0,2,2,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,2,0,0,2,0,0,0,\r
-0,2,2,2,2,0,0,0,3,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,2,0,0,0,0,0,0,1,\r
-0,0,2,0,0,0,0,1,2,0,0,0,0,0,0,2,2,1,1,0,0,0,0,0,0,1,0,0,0,0,0,0,\r
-0,2,0,2,2,2,0,0,2,0,0,0,0,0,0,0,2,2,2,0,0,0,2,0,0,0,0,0,0,0,0,2,\r
-0,0,1,0,0,0,0,2,1,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,\r
-0,3,0,2,0,0,0,0,0,0,0,0,2,0,0,0,0,0,2,0,0,0,0,0,0,0,2,0,0,0,0,2,\r
-0,0,2,0,0,0,0,2,2,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-0,2,0,2,2,1,0,0,0,0,0,0,2,0,0,2,0,2,2,2,0,0,0,0,0,0,2,0,0,0,0,2,\r
-0,0,2,0,0,2,0,2,2,0,0,0,0,2,0,2,0,0,0,0,0,2,0,0,0,2,0,0,0,0,0,0,\r
-0,0,3,0,0,0,2,2,0,2,2,0,0,0,0,0,2,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,\r
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,2,0,0,0,0,0,\r
-0,2,2,2,2,2,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,1,\r
-0,0,0,0,0,0,0,2,1,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-0,0,0,0,0,0,0,2,2,0,0,0,0,0,2,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,\r
-0,2,0,0,0,2,0,0,0,0,0,1,0,0,0,0,2,2,0,0,0,1,0,0,0,0,0,0,0,0,0,0,\r
-0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,2,0,0,0,\r
-0,2,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-0,0,1,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,2,0,2,0,0,0,\r
-0,0,0,0,0,0,0,0,2,1,0,0,0,0,0,0,2,0,0,0,1,2,0,0,0,0,0,0,0,0,0,0,\r
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-)\r
-\r
-Latin7GreekModel = {\r
- 'charToOrderMap': Latin7_CharToOrderMap,\r
- 'precedenceMatrix': GreekLangModel,\r
- 'mTypicalPositiveRatio': 0.982851,\r
- 'keepEnglishLetter': False,\r
- 'charsetName': "ISO-8859-7"\r
-}\r
-\r
-Win1253GreekModel = {\r
- 'charToOrderMap': win1253_CharToOrderMap,\r
- 'precedenceMatrix': GreekLangModel,\r
- 'mTypicalPositiveRatio': 0.982851,\r
- 'keepEnglishLetter': False,\r
- 'charsetName': "windows-1253"\r
-}\r
-\r
-# flake8: noqa\r
+######################## BEGIN LICENSE BLOCK ########################
+# The Original Code is Mozilla Communicator client code.
+#
+# The Initial Developer of the Original Code is
+# Netscape Communications Corporation.
+# Portions created by the Initial Developer are Copyright (C) 1998
+# the Initial Developer. All Rights Reserved.
+#
+# Contributor(s):
+# Mark Pilgrim - port to Python
+#
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
+# 02110-1301 USA
+######################### END LICENSE BLOCK #########################
+
+# 255: Control characters that usually does not exist in any text
+# 254: Carriage/Return
+# 253: symbol (punctuation) that does not belong to word
+# 252: 0 - 9
+
+# Character Mapping Table:
+Latin7_CharToOrderMap = (
+255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00
+255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10
+253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20
+252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30
+253, 82,100,104, 94, 98,101,116,102,111,187,117, 92, 88,113, 85, # 40
+ 79,118,105, 83, 67,114,119, 95, 99,109,188,253,253,253,253,253, # 50
+253, 72, 70, 80, 81, 60, 96, 93, 89, 68,120, 97, 77, 86, 69, 55, # 60
+ 78,115, 65, 66, 58, 76,106,103, 87,107,112,253,253,253,253,253, # 70
+255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 80
+255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 90
+253,233, 90,253,253,253,253,253,253,253,253,253,253, 74,253,253, # a0
+253,253,253,253,247,248, 61, 36, 46, 71, 73,253, 54,253,108,123, # b0
+110, 31, 51, 43, 41, 34, 91, 40, 52, 47, 44, 53, 38, 49, 59, 39, # c0
+ 35, 48,250, 37, 33, 45, 56, 50, 84, 57,120,121, 17, 18, 22, 15, # d0
+124, 1, 29, 20, 21, 3, 32, 13, 25, 5, 11, 16, 10, 6, 30, 4, # e0
+ 9, 8, 14, 7, 2, 12, 28, 23, 42, 24, 64, 75, 19, 26, 27,253, # f0
+)
+
+win1253_CharToOrderMap = (
+255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00
+255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10
+253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20
+252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30
+253, 82,100,104, 94, 98,101,116,102,111,187,117, 92, 88,113, 85, # 40
+ 79,118,105, 83, 67,114,119, 95, 99,109,188,253,253,253,253,253, # 50
+253, 72, 70, 80, 81, 60, 96, 93, 89, 68,120, 97, 77, 86, 69, 55, # 60
+ 78,115, 65, 66, 58, 76,106,103, 87,107,112,253,253,253,253,253, # 70
+255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 80
+255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 90
+253,233, 61,253,253,253,253,253,253,253,253,253,253, 74,253,253, # a0
+253,253,253,253,247,253,253, 36, 46, 71, 73,253, 54,253,108,123, # b0
+110, 31, 51, 43, 41, 34, 91, 40, 52, 47, 44, 53, 38, 49, 59, 39, # c0
+ 35, 48,250, 37, 33, 45, 56, 50, 84, 57,120,121, 17, 18, 22, 15, # d0
+124, 1, 29, 20, 21, 3, 32, 13, 25, 5, 11, 16, 10, 6, 30, 4, # e0
+ 9, 8, 14, 7, 2, 12, 28, 23, 42, 24, 64, 75, 19, 26, 27,253, # f0
+)
+
+# Model Table:
+# total sequences: 100%
+# first 512 sequences: 98.2851%
+# first 1024 sequences:1.7001%
+# rest sequences: 0.0359%
+# negative sequences: 0.0148%
+GreekLangModel = (
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,3,2,2,3,3,3,3,3,3,3,3,1,3,3,3,0,2,2,3,3,0,3,0,3,2,0,3,3,3,0,
+3,0,0,0,2,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,3,3,3,3,3,0,3,3,0,3,2,3,3,0,3,2,3,3,3,0,0,3,0,3,0,3,3,2,0,0,0,
+2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,
+0,2,3,2,2,3,3,3,3,3,3,3,3,0,3,3,3,3,0,2,3,3,0,3,3,3,3,2,3,3,3,0,
+2,0,0,0,2,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,2,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,0,2,1,3,3,3,3,2,3,3,2,3,3,2,0,
+0,0,0,0,2,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,3,3,3,3,0,3,3,3,3,3,3,0,3,3,0,3,3,3,3,3,3,3,3,3,3,0,3,2,3,3,0,
+2,0,1,0,2,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,
+0,3,3,3,3,3,2,3,0,0,0,0,3,3,0,3,1,3,3,3,0,3,3,0,3,3,3,3,0,0,0,0,
+2,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,3,3,3,3,3,0,3,0,3,3,3,3,3,0,3,2,2,2,3,0,2,3,3,3,3,3,2,3,3,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,3,3,3,3,3,3,2,2,2,3,3,3,3,0,3,1,3,3,3,3,2,3,3,3,3,3,3,3,2,2,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,3,3,3,3,3,2,0,3,0,0,0,3,3,2,3,3,3,3,3,0,0,3,2,3,0,2,3,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,3,0,3,3,3,3,0,0,3,3,0,2,3,0,3,0,3,3,3,0,0,3,0,3,0,2,2,3,3,0,0,
+0,0,1,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,3,3,3,3,3,2,0,3,2,3,3,3,3,0,3,3,3,3,3,0,3,3,2,3,2,3,3,2,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,3,3,2,3,2,3,3,3,3,3,3,0,2,3,2,3,2,2,2,3,2,3,3,2,3,0,2,2,2,3,0,
+2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,3,0,0,0,3,3,3,2,3,3,0,0,3,0,3,0,0,0,3,2,0,3,0,3,0,0,2,0,2,0,
+0,0,0,0,2,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,3,3,3,3,0,3,3,3,3,3,3,0,3,3,0,3,0,0,0,3,3,0,3,3,3,0,0,1,2,3,0,
+3,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,3,3,3,3,3,2,0,0,3,2,2,3,3,0,3,3,3,3,3,2,1,3,0,3,2,3,3,2,1,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,3,3,0,2,3,3,3,3,3,3,0,0,3,0,3,0,0,0,3,3,0,3,2,3,0,0,3,3,3,0,
+3,0,0,0,2,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,3,3,3,3,0,3,3,3,3,3,3,0,0,3,0,3,0,0,0,3,2,0,3,2,3,0,0,3,2,3,0,
+2,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,3,1,2,2,3,3,3,3,3,3,0,2,3,0,3,0,0,0,3,3,0,3,0,2,0,0,2,3,1,0,
+2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,3,0,3,3,3,3,0,3,0,3,3,2,3,0,3,3,3,3,3,3,0,3,3,3,0,2,3,0,0,3,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,3,0,3,3,3,0,0,3,0,0,0,3,3,0,3,0,2,3,3,0,0,3,0,3,0,3,3,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,3,0,0,0,3,3,3,3,3,3,0,0,3,0,2,0,0,0,3,3,0,3,0,3,0,0,2,0,2,0,
+0,0,0,0,1,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,3,3,3,3,3,3,0,3,0,2,0,3,2,0,3,2,3,2,3,0,0,3,2,3,2,3,3,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,3,0,0,2,3,3,3,3,3,0,0,0,3,0,2,1,0,0,3,2,2,2,0,3,0,0,2,2,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,3,0,3,3,3,2,0,3,0,3,0,3,3,0,2,1,2,3,3,0,0,3,0,3,0,3,3,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,2,3,3,3,0,3,3,3,3,3,3,0,2,3,0,3,0,0,0,2,1,0,2,2,3,0,0,2,2,2,0,
+0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,3,0,0,2,3,3,3,2,3,0,0,1,3,0,2,0,0,0,0,3,0,1,0,2,0,0,1,1,1,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,3,3,3,3,3,1,0,3,0,0,0,3,2,0,3,2,3,3,3,0,0,3,0,3,2,2,2,1,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,3,0,3,3,3,0,0,3,0,0,0,0,2,0,2,3,3,2,2,2,2,3,0,2,0,2,2,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,3,3,3,3,2,0,0,0,0,0,0,2,3,0,2,0,2,3,2,0,0,3,0,3,0,3,1,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,3,2,3,3,2,2,3,0,2,0,3,0,0,0,2,0,0,0,0,1,2,0,2,0,2,0,
+0,2,0,2,0,2,2,0,0,1,0,2,2,2,0,2,2,2,0,2,2,2,0,0,2,0,0,1,0,0,0,0,
+0,2,0,3,3,2,0,0,0,0,0,0,1,3,0,2,0,2,2,2,0,0,2,0,3,0,0,2,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,3,0,2,3,2,0,2,2,0,2,0,2,2,0,2,0,2,2,2,0,0,0,0,0,0,2,3,0,0,0,2,
+0,1,2,0,0,0,0,2,2,0,0,0,2,1,0,2,2,0,0,0,0,0,0,1,0,2,0,0,0,0,0,0,
+0,0,2,1,0,2,3,2,2,3,2,3,2,0,0,3,3,3,0,0,3,2,0,0,0,1,1,0,2,0,2,2,
+0,2,0,2,0,2,2,0,0,2,0,2,2,2,0,2,2,2,2,0,0,2,0,0,0,2,0,1,0,0,0,0,
+0,3,0,3,3,2,2,0,3,0,0,0,2,2,0,2,2,2,1,2,0,0,1,2,2,0,0,3,0,0,0,2,
+0,1,2,0,0,0,1,2,0,0,0,0,0,0,0,2,2,0,1,0,0,2,0,0,0,2,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,2,3,3,2,2,0,0,0,2,0,2,3,3,0,2,0,0,0,0,0,0,2,2,2,0,2,2,0,2,0,2,
+0,2,2,0,0,2,2,2,2,1,0,0,2,2,0,2,0,0,2,0,0,0,0,0,0,2,0,0,0,0,0,0,
+0,2,0,3,2,3,0,0,0,3,0,0,2,2,0,2,0,2,2,2,0,0,2,0,0,0,0,0,0,0,0,2,
+0,0,2,2,0,0,2,2,2,0,0,0,0,0,0,2,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,2,0,0,3,2,0,2,2,2,2,2,0,0,0,2,0,0,0,0,2,0,1,0,0,2,0,1,0,0,0,
+0,2,2,2,0,2,2,0,1,2,0,2,2,2,0,2,2,2,2,1,2,2,0,0,2,0,0,0,0,0,0,0,
+0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,
+0,2,0,2,0,2,2,0,0,0,0,1,2,1,0,0,2,2,0,0,2,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,3,2,3,0,0,2,0,0,0,2,2,0,2,0,0,0,1,0,0,2,0,2,0,2,2,0,0,0,0,
+0,0,2,0,0,0,0,2,2,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,
+0,2,2,3,2,2,0,0,0,0,0,0,1,3,0,2,0,2,2,0,0,0,1,0,2,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,2,0,2,0,3,2,0,2,0,0,0,0,0,0,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,
+0,0,2,0,0,0,0,1,1,0,0,2,1,2,0,2,2,0,1,0,0,1,0,0,0,2,0,0,0,0,0,0,
+0,3,0,2,2,2,0,0,2,0,0,0,2,0,0,0,2,3,0,2,0,0,0,0,0,0,2,2,0,0,0,2,
+0,1,2,0,0,0,1,2,2,1,0,0,0,2,0,0,2,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,2,1,2,0,2,2,0,2,0,0,2,0,0,0,0,1,2,1,0,2,1,0,0,0,0,0,0,0,0,0,0,
+0,0,2,0,0,0,3,1,2,2,0,2,0,0,0,0,2,0,0,0,2,0,0,3,0,0,0,0,2,2,2,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,2,1,0,2,0,1,2,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,1,0,0,0,0,0,0,2,
+0,2,2,0,0,2,2,2,2,2,0,1,2,0,0,0,2,2,0,1,0,2,0,0,2,2,0,0,0,0,0,0,
+0,0,0,0,1,0,0,0,0,0,0,0,3,0,0,2,0,0,0,0,0,0,0,0,2,0,2,0,0,0,0,2,
+0,1,2,0,0,0,0,2,2,1,0,1,0,1,0,2,2,2,1,0,0,0,0,0,0,1,0,0,0,0,0,0,
+0,2,0,1,2,0,0,0,0,0,0,0,0,0,0,2,0,0,2,2,0,0,0,0,1,0,0,0,0,0,0,2,
+0,2,2,0,0,0,0,2,2,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,2,0,0,2,0,0,0,
+0,2,2,2,2,0,0,0,3,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,2,0,0,0,0,0,0,1,
+0,0,2,0,0,0,0,1,2,0,0,0,0,0,0,2,2,1,1,0,0,0,0,0,0,1,0,0,0,0,0,0,
+0,2,0,2,2,2,0,0,2,0,0,0,0,0,0,0,2,2,2,0,0,0,2,0,0,0,0,0,0,0,0,2,
+0,0,1,0,0,0,0,2,1,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,
+0,3,0,2,0,0,0,0,0,0,0,0,2,0,0,0,0,0,2,0,0,0,0,0,0,0,2,0,0,0,0,2,
+0,0,2,0,0,0,0,2,2,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,2,0,2,2,1,0,0,0,0,0,0,2,0,0,2,0,2,2,2,0,0,0,0,0,0,2,0,0,0,0,2,
+0,0,2,0,0,2,0,2,2,0,0,0,0,2,0,2,0,0,0,0,0,2,0,0,0,2,0,0,0,0,0,0,
+0,0,3,0,0,0,2,2,0,2,2,0,0,0,0,0,2,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,2,0,0,0,0,0,
+0,2,2,2,2,2,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,1,
+0,0,0,0,0,0,0,2,1,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,2,2,0,0,0,0,0,2,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,
+0,2,0,0,0,2,0,0,0,0,0,1,0,0,0,0,2,2,0,0,0,1,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,2,0,0,0,
+0,2,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,1,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,2,0,2,0,0,0,
+0,0,0,0,0,0,0,0,2,1,0,0,0,0,0,0,2,0,0,0,1,2,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+)
+
+Latin7GreekModel = {
+ 'charToOrderMap': Latin7_CharToOrderMap,
+ 'precedenceMatrix': GreekLangModel,
+ 'mTypicalPositiveRatio': 0.982851,
+ 'keepEnglishLetter': False,
+ 'charsetName': "ISO-8859-7"
+}
+
+Win1253GreekModel = {
+ 'charToOrderMap': win1253_CharToOrderMap,
+ 'precedenceMatrix': GreekLangModel,
+ 'mTypicalPositiveRatio': 0.982851,
+ 'keepEnglishLetter': False,
+ 'charsetName': "windows-1253"
+}
+
+# flake8: noqa
-######################## BEGIN LICENSE BLOCK ########################\r
-# The Original Code is Mozilla Universal charset detector code.\r
-#\r
-# The Initial Developer of the Original Code is\r
-# Simon Montagu\r
-# Portions created by the Initial Developer are Copyright (C) 2005\r
-# the Initial Developer. All Rights Reserved.\r
-#\r
-# Contributor(s):\r
-# Mark Pilgrim - port to Python\r
-# Shy Shalom - original C code\r
-# Shoshannah Forbes - original C code (?)\r
-#\r
-# This library is free software; you can redistribute it and/or\r
-# modify it under the terms of the GNU Lesser General Public\r
-# License as published by the Free Software Foundation; either\r
-# version 2.1 of the License, or (at your option) any later version.\r
-#\r
-# This library is distributed in the hope that it will be useful,\r
-# but WITHOUT ANY WARRANTY; without even the implied warranty of\r
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU\r
-# Lesser General Public License for more details.\r
-#\r
-# You should have received a copy of the GNU Lesser General Public\r
-# License along with this library; if not, write to the Free Software\r
-# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA\r
-# 02110-1301 USA\r
-######################### END LICENSE BLOCK #########################\r
-\r
-# 255: Control characters that usually does not exist in any text\r
-# 254: Carriage/Return\r
-# 253: symbol (punctuation) that does not belong to word\r
-# 252: 0 - 9\r
-\r
-# Windows-1255 language model\r
-# Character Mapping Table:\r
-win1255_CharToOrderMap = (\r
-255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00\r
-255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10\r
-253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20\r
-252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30\r
-253, 69, 91, 79, 80, 92, 89, 97, 90, 68,111,112, 82, 73, 95, 85, # 40\r
- 78,121, 86, 71, 67,102,107, 84,114,103,115,253,253,253,253,253, # 50\r
-253, 50, 74, 60, 61, 42, 76, 70, 64, 53,105, 93, 56, 65, 54, 49, # 60\r
- 66,110, 51, 43, 44, 63, 81, 77, 98, 75,108,253,253,253,253,253, # 70\r
-124,202,203,204,205, 40, 58,206,207,208,209,210,211,212,213,214,\r
-215, 83, 52, 47, 46, 72, 32, 94,216,113,217,109,218,219,220,221,\r
- 34,116,222,118,100,223,224,117,119,104,125,225,226, 87, 99,227,\r
-106,122,123,228, 55,229,230,101,231,232,120,233, 48, 39, 57,234,\r
- 30, 59, 41, 88, 33, 37, 36, 31, 29, 35,235, 62, 28,236,126,237,\r
-238, 38, 45,239,240,241,242,243,127,244,245,246,247,248,249,250,\r
- 9, 8, 20, 16, 3, 2, 24, 14, 22, 1, 25, 15, 4, 11, 6, 23,\r
- 12, 19, 13, 26, 18, 27, 21, 17, 7, 10, 5,251,252,128, 96,253,\r
-)\r
-\r
-# Model Table:\r
-# total sequences: 100%\r
-# first 512 sequences: 98.4004%\r
-# first 1024 sequences: 1.5981%\r
-# rest sequences: 0.087%\r
-# negative sequences: 0.0015%\r
-HebrewLangModel = (\r
-0,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,2,3,2,1,2,0,1,0,0,\r
-3,0,3,1,0,0,1,3,2,0,1,1,2,0,2,2,2,1,1,1,1,2,1,1,1,2,0,0,2,2,0,1,\r
-3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,2,2,\r
-1,2,1,2,1,2,0,0,2,0,0,0,0,0,1,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,\r
-3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,2,\r
-1,2,1,3,1,1,0,0,2,0,0,0,1,0,1,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,\r
-3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,1,0,1,2,2,1,3,\r
-1,2,1,1,2,2,0,0,2,2,0,0,0,0,1,0,1,0,0,0,1,0,0,0,0,0,0,1,0,1,1,0,\r
-3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,2,2,2,2,3,2,\r
-1,2,1,2,2,2,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,\r
-3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,2,3,2,2,3,2,2,2,1,2,2,2,2,\r
-1,2,1,1,2,2,0,1,2,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,\r
-3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,0,2,2,2,2,2,\r
-0,2,0,2,2,2,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,\r
-3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,0,2,2,2,\r
-0,2,1,2,2,2,0,0,2,1,0,0,0,0,1,0,1,0,0,0,0,0,0,2,0,0,0,0,0,0,1,0,\r
-3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,2,1,2,3,2,2,2,\r
-1,2,1,2,2,2,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,1,0,\r
-3,3,3,3,3,3,3,3,3,2,3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,1,0,2,0,2,\r
-0,2,1,2,2,2,0,0,1,2,0,0,0,0,1,0,1,0,0,0,0,0,0,1,0,0,0,2,0,0,1,0,\r
-3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,2,3,2,2,3,2,1,2,1,1,1,\r
-0,1,1,1,1,1,3,0,1,0,0,0,0,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,\r
-3,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,0,1,1,0,0,1,0,0,1,0,0,0,0,\r
-0,0,1,0,0,0,0,0,2,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,2,2,2,2,2,\r
-0,2,0,1,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,\r
-3,3,3,3,3,3,3,3,3,2,3,3,3,2,1,2,3,3,2,3,3,3,3,2,3,2,1,2,0,2,1,2,\r
-0,2,0,2,2,2,0,0,1,2,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,\r
-3,3,3,3,3,3,3,3,3,2,3,3,3,1,2,2,3,3,2,3,2,3,2,2,3,1,2,2,0,2,2,2,\r
-0,2,1,2,2,2,0,0,1,2,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,0,0,1,0,0,1,0,\r
-3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,2,3,3,2,2,2,3,3,3,3,1,3,2,2,2,\r
-0,2,0,1,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,\r
-3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,3,3,3,2,3,2,2,2,1,2,2,0,2,2,2,2,\r
-0,2,0,2,2,2,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,\r
-3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,1,3,2,3,3,2,3,3,2,2,1,2,2,2,2,2,2,\r
-0,2,1,2,1,2,0,0,1,0,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,1,0,\r
-3,3,3,3,3,3,2,3,2,3,3,2,3,3,3,3,2,3,2,3,3,3,3,3,2,2,2,2,2,2,2,1,\r
-0,2,0,1,2,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,\r
-3,3,3,3,3,3,3,3,3,2,1,2,3,3,3,3,3,3,3,2,3,2,3,2,1,2,3,0,2,1,2,2,\r
-0,2,1,1,2,1,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,2,0,\r
-3,3,3,3,3,3,3,3,3,2,3,3,3,3,2,1,3,1,2,2,2,1,2,3,3,1,2,1,2,2,2,2,\r
-0,1,1,1,1,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,2,0,0,0,0,0,0,0,0,\r
-3,3,3,3,3,3,3,3,3,3,0,2,3,3,3,1,3,3,3,1,2,2,2,2,1,1,2,2,2,2,2,2,\r
-0,2,0,1,1,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,\r
-3,3,3,3,3,3,2,3,3,3,2,2,3,3,3,2,1,2,3,2,3,2,2,2,2,1,2,1,1,1,2,2,\r
-0,2,1,1,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,\r
-3,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,0,0,0,1,0,0,0,0,0,\r
-1,0,1,0,0,0,0,0,2,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-3,3,3,3,3,2,3,3,2,3,1,2,2,2,2,3,2,3,1,1,2,2,1,2,2,1,1,0,2,2,2,2,\r
-0,1,0,1,2,2,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,\r
-3,0,0,1,1,0,1,0,0,1,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,2,2,0,\r
-0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-3,0,1,0,1,0,1,1,0,1,1,0,0,0,1,1,0,1,1,1,0,0,0,0,0,0,1,0,0,0,0,0,\r
-0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-3,0,0,0,1,1,0,1,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,\r
-0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,\r
-3,2,2,1,2,2,2,2,2,2,2,1,2,2,1,2,2,1,1,1,1,1,1,1,1,2,1,1,0,3,3,3,\r
-0,3,0,2,2,2,2,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,\r
-2,2,2,3,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,1,2,2,1,2,2,2,1,1,1,2,0,1,\r
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-2,2,2,2,2,2,2,2,2,2,2,1,2,2,2,2,2,2,2,2,2,2,2,0,2,2,0,0,0,0,0,0,\r
-0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-2,3,1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,1,2,1,0,2,1,0,\r
-0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-3,1,1,1,1,1,1,1,1,1,1,0,0,1,1,1,1,0,1,1,1,1,0,0,0,0,0,0,0,0,0,0,\r
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,\r
-0,3,1,1,2,2,2,2,2,1,2,2,2,1,1,2,2,2,2,2,2,2,1,2,2,1,0,1,1,1,1,0,\r
-0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-3,2,1,1,1,1,2,1,1,2,1,0,1,1,1,1,1,1,1,1,1,1,1,0,1,0,0,0,0,0,0,0,\r
-0,0,2,0,0,0,0,0,0,0,0,1,1,0,0,0,0,1,1,0,0,1,1,0,0,0,0,0,0,1,0,0,\r
-2,1,1,2,2,2,2,2,2,2,2,2,2,2,1,2,2,2,2,2,1,2,1,2,1,1,1,1,0,0,0,0,\r
-0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-1,2,1,2,2,2,2,2,2,2,2,2,2,1,2,1,2,1,1,2,1,1,1,2,1,2,1,2,0,1,0,1,\r
-0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-0,3,1,2,2,2,1,2,2,2,2,2,2,2,2,1,2,1,1,1,1,1,1,2,1,2,1,1,0,1,0,1,\r
-0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-2,1,2,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,\r
-0,2,0,1,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,\r
-3,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,\r
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-2,1,1,1,1,1,1,1,0,1,1,0,1,0,0,1,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,\r
-0,0,0,0,0,0,0,0,2,0,1,1,1,0,1,0,0,0,1,1,0,1,1,0,0,0,0,0,1,1,0,0,\r
-0,1,1,1,2,1,2,2,2,0,2,0,2,0,1,1,2,1,1,1,1,2,1,0,1,1,0,0,0,0,0,0,\r
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-1,0,1,0,0,0,0,0,1,0,1,2,2,0,1,0,0,1,1,2,2,1,2,0,2,0,0,0,1,2,0,1,\r
-2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-0,0,0,0,0,0,0,0,2,0,2,1,2,0,2,0,0,1,1,1,1,1,1,0,1,0,0,0,1,0,0,1,\r
-2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-0,0,1,0,0,0,0,0,1,0,2,1,1,0,1,0,0,1,1,1,2,2,0,0,1,0,0,0,1,0,0,1,\r
-1,1,2,1,0,1,1,1,0,1,0,1,1,1,1,0,0,0,1,0,1,0,0,0,0,0,0,0,0,2,2,1,\r
-0,2,0,1,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-2,1,0,0,1,0,1,1,1,1,0,0,0,0,0,1,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,\r
-0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-1,1,1,1,1,1,1,1,1,2,1,0,1,1,1,1,1,1,1,1,1,1,1,0,1,0,0,0,0,0,0,0,\r
-0,0,0,0,0,0,0,0,0,0,1,1,1,0,0,0,0,1,1,1,0,1,1,0,1,0,0,0,1,1,0,1,\r
-2,0,1,0,1,0,1,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-0,0,0,0,0,0,0,0,1,0,1,1,1,0,1,0,0,1,1,2,1,1,2,0,1,0,0,0,1,1,0,1,\r
-1,0,0,1,0,0,1,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-0,0,0,0,0,0,0,0,1,0,1,1,2,0,1,0,0,0,0,2,1,1,2,0,2,0,0,0,1,1,0,1,\r
-1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-0,0,0,0,0,0,0,0,1,0,2,1,1,0,1,0,0,2,2,1,2,1,1,0,1,0,0,0,1,1,0,1,\r
-2,0,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-0,0,0,0,0,0,0,0,0,0,1,2,2,0,0,0,0,0,1,1,0,1,0,0,1,0,0,0,0,1,0,1,\r
-1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-0,0,0,0,0,0,0,0,0,0,1,2,2,0,0,0,0,2,1,1,1,0,2,1,1,0,0,0,2,1,0,1,\r
-1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-0,0,0,0,0,0,0,0,1,0,1,1,2,0,1,0,0,1,1,0,2,1,1,0,1,0,0,0,1,1,0,1,\r
-2,2,1,1,1,0,1,1,0,1,1,0,1,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,\r
-0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-0,0,0,0,0,0,0,0,1,0,2,1,1,0,1,0,0,1,1,0,1,2,1,0,2,0,0,0,1,1,0,1,\r
-2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,\r
-0,1,0,0,2,0,2,1,1,0,1,0,1,0,0,1,0,0,0,0,1,0,0,0,1,0,0,0,0,0,1,0,\r
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-0,0,0,0,0,0,0,0,1,0,1,1,2,0,1,0,0,1,1,1,0,1,0,0,1,0,0,0,1,0,0,1,\r
-1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-1,0,0,0,0,0,0,0,1,0,1,1,0,0,1,0,0,2,1,1,1,1,1,0,1,0,0,0,0,1,0,1,\r
-0,1,1,1,2,1,1,1,1,0,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,0,0,0,0,0,0,0,\r
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-0,0,0,0,0,0,0,0,0,0,1,2,1,0,0,0,0,0,1,1,1,1,1,0,1,0,0,0,1,1,0,0,\r
-)\r
-\r
-Win1255HebrewModel = {\r
- 'charToOrderMap': win1255_CharToOrderMap,\r
- 'precedenceMatrix': HebrewLangModel,\r
- 'mTypicalPositiveRatio': 0.984004,\r
- 'keepEnglishLetter': False,\r
- 'charsetName': "windows-1255"\r
-}\r
-\r
-# flake8: noqa\r
+######################## BEGIN LICENSE BLOCK ########################
+# The Original Code is Mozilla Universal charset detector code.
+#
+# The Initial Developer of the Original Code is
+# Simon Montagu
+# Portions created by the Initial Developer are Copyright (C) 2005
+# the Initial Developer. All Rights Reserved.
+#
+# Contributor(s):
+# Mark Pilgrim - port to Python
+# Shy Shalom - original C code
+# Shoshannah Forbes - original C code (?)
+#
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
+# 02110-1301 USA
+######################### END LICENSE BLOCK #########################
+
+# 255: Control characters that usually does not exist in any text
+# 254: Carriage/Return
+# 253: symbol (punctuation) that does not belong to word
+# 252: 0 - 9
+
+# Windows-1255 language model
+# Character Mapping Table:
+win1255_CharToOrderMap = (
+255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00
+255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10
+253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20
+252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30
+253, 69, 91, 79, 80, 92, 89, 97, 90, 68,111,112, 82, 73, 95, 85, # 40
+ 78,121, 86, 71, 67,102,107, 84,114,103,115,253,253,253,253,253, # 50
+253, 50, 74, 60, 61, 42, 76, 70, 64, 53,105, 93, 56, 65, 54, 49, # 60
+ 66,110, 51, 43, 44, 63, 81, 77, 98, 75,108,253,253,253,253,253, # 70
+124,202,203,204,205, 40, 58,206,207,208,209,210,211,212,213,214,
+215, 83, 52, 47, 46, 72, 32, 94,216,113,217,109,218,219,220,221,
+ 34,116,222,118,100,223,224,117,119,104,125,225,226, 87, 99,227,
+106,122,123,228, 55,229,230,101,231,232,120,233, 48, 39, 57,234,
+ 30, 59, 41, 88, 33, 37, 36, 31, 29, 35,235, 62, 28,236,126,237,
+238, 38, 45,239,240,241,242,243,127,244,245,246,247,248,249,250,
+ 9, 8, 20, 16, 3, 2, 24, 14, 22, 1, 25, 15, 4, 11, 6, 23,
+ 12, 19, 13, 26, 18, 27, 21, 17, 7, 10, 5,251,252,128, 96,253,
+)
+
+# Model Table:
+# total sequences: 100%
+# first 512 sequences: 98.4004%
+# first 1024 sequences: 1.5981%
+# rest sequences: 0.087%
+# negative sequences: 0.0015%
+HebrewLangModel = (
+0,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,2,3,2,1,2,0,1,0,0,
+3,0,3,1,0,0,1,3,2,0,1,1,2,0,2,2,2,1,1,1,1,2,1,1,1,2,0,0,2,2,0,1,
+3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,2,2,
+1,2,1,2,1,2,0,0,2,0,0,0,0,0,1,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,
+3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,2,
+1,2,1,3,1,1,0,0,2,0,0,0,1,0,1,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,
+3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,1,0,1,2,2,1,3,
+1,2,1,1,2,2,0,0,2,2,0,0,0,0,1,0,1,0,0,0,1,0,0,0,0,0,0,1,0,1,1,0,
+3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,2,2,2,2,3,2,
+1,2,1,2,2,2,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,
+3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,2,3,2,2,3,2,2,2,1,2,2,2,2,
+1,2,1,1,2,2,0,1,2,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,
+3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,0,2,2,2,2,2,
+0,2,0,2,2,2,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,
+3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,0,2,2,2,
+0,2,1,2,2,2,0,0,2,1,0,0,0,0,1,0,1,0,0,0,0,0,0,2,0,0,0,0,0,0,1,0,
+3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,2,1,2,3,2,2,2,
+1,2,1,2,2,2,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,1,0,
+3,3,3,3,3,3,3,3,3,2,3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,1,0,2,0,2,
+0,2,1,2,2,2,0,0,1,2,0,0,0,0,1,0,1,0,0,0,0,0,0,1,0,0,0,2,0,0,1,0,
+3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,2,3,2,2,3,2,1,2,1,1,1,
+0,1,1,1,1,1,3,0,1,0,0,0,0,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,
+3,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,0,1,1,0,0,1,0,0,1,0,0,0,0,
+0,0,1,0,0,0,0,0,2,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,2,2,2,2,2,
+0,2,0,1,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,
+3,3,3,3,3,3,3,3,3,2,3,3,3,2,1,2,3,3,2,3,3,3,3,2,3,2,1,2,0,2,1,2,
+0,2,0,2,2,2,0,0,1,2,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,
+3,3,3,3,3,3,3,3,3,2,3,3,3,1,2,2,3,3,2,3,2,3,2,2,3,1,2,2,0,2,2,2,
+0,2,1,2,2,2,0,0,1,2,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,0,0,1,0,0,1,0,
+3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,2,3,3,2,2,2,3,3,3,3,1,3,2,2,2,
+0,2,0,1,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,
+3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,3,3,3,2,3,2,2,2,1,2,2,0,2,2,2,2,
+0,2,0,2,2,2,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,
+3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,1,3,2,3,3,2,3,3,2,2,1,2,2,2,2,2,2,
+0,2,1,2,1,2,0,0,1,0,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,1,0,
+3,3,3,3,3,3,2,3,2,3,3,2,3,3,3,3,2,3,2,3,3,3,3,3,2,2,2,2,2,2,2,1,
+0,2,0,1,2,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,
+3,3,3,3,3,3,3,3,3,2,1,2,3,3,3,3,3,3,3,2,3,2,3,2,1,2,3,0,2,1,2,2,
+0,2,1,1,2,1,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,2,0,
+3,3,3,3,3,3,3,3,3,2,3,3,3,3,2,1,3,1,2,2,2,1,2,3,3,1,2,1,2,2,2,2,
+0,1,1,1,1,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,2,0,0,0,0,0,0,0,0,
+3,3,3,3,3,3,3,3,3,3,0,2,3,3,3,1,3,3,3,1,2,2,2,2,1,1,2,2,2,2,2,2,
+0,2,0,1,1,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,
+3,3,3,3,3,3,2,3,3,3,2,2,3,3,3,2,1,2,3,2,3,2,2,2,2,1,2,1,1,1,2,2,
+0,2,1,1,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,
+3,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,0,0,0,1,0,0,0,0,0,
+1,0,1,0,0,0,0,0,2,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+3,3,3,3,3,2,3,3,2,3,1,2,2,2,2,3,2,3,1,1,2,2,1,2,2,1,1,0,2,2,2,2,
+0,1,0,1,2,2,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,
+3,0,0,1,1,0,1,0,0,1,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,2,2,0,
+0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+3,0,1,0,1,0,1,1,0,1,1,0,0,0,1,1,0,1,1,1,0,0,0,0,0,0,1,0,0,0,0,0,
+0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+3,0,0,0,1,1,0,1,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,
+3,2,2,1,2,2,2,2,2,2,2,1,2,2,1,2,2,1,1,1,1,1,1,1,1,2,1,1,0,3,3,3,
+0,3,0,2,2,2,2,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,
+2,2,2,3,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,1,2,2,1,2,2,2,1,1,1,2,0,1,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+2,2,2,2,2,2,2,2,2,2,2,1,2,2,2,2,2,2,2,2,2,2,2,0,2,2,0,0,0,0,0,0,
+0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+2,3,1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,1,2,1,0,2,1,0,
+0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+3,1,1,1,1,1,1,1,1,1,1,0,0,1,1,1,1,0,1,1,1,1,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,
+0,3,1,1,2,2,2,2,2,1,2,2,2,1,1,2,2,2,2,2,2,2,1,2,2,1,0,1,1,1,1,0,
+0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+3,2,1,1,1,1,2,1,1,2,1,0,1,1,1,1,1,1,1,1,1,1,1,0,1,0,0,0,0,0,0,0,
+0,0,2,0,0,0,0,0,0,0,0,1,1,0,0,0,0,1,1,0,0,1,1,0,0,0,0,0,0,1,0,0,
+2,1,1,2,2,2,2,2,2,2,2,2,2,2,1,2,2,2,2,2,1,2,1,2,1,1,1,1,0,0,0,0,
+0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+1,2,1,2,2,2,2,2,2,2,2,2,2,1,2,1,2,1,1,2,1,1,1,2,1,2,1,2,0,1,0,1,
+0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,3,1,2,2,2,1,2,2,2,2,2,2,2,2,1,2,1,1,1,1,1,1,2,1,2,1,1,0,1,0,1,
+0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+2,1,2,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,
+0,2,0,1,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,
+3,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+2,1,1,1,1,1,1,1,0,1,1,0,1,0,0,1,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,2,0,1,1,1,0,1,0,0,0,1,1,0,1,1,0,0,0,0,0,1,1,0,0,
+0,1,1,1,2,1,2,2,2,0,2,0,2,0,1,1,2,1,1,1,1,2,1,0,1,1,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,
+1,0,1,0,0,0,0,0,1,0,1,2,2,0,1,0,0,1,1,2,2,1,2,0,2,0,0,0,1,2,0,1,
+2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,2,0,2,1,2,0,2,0,0,1,1,1,1,1,1,0,1,0,0,0,1,0,0,1,
+2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,1,0,0,0,0,0,1,0,2,1,1,0,1,0,0,1,1,1,2,2,0,0,1,0,0,0,1,0,0,1,
+1,1,2,1,0,1,1,1,0,1,0,1,1,1,1,0,0,0,1,0,1,0,0,0,0,0,0,0,0,2,2,1,
+0,2,0,1,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+2,1,0,0,1,0,1,1,1,1,0,0,0,0,0,1,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+1,1,1,1,1,1,1,1,1,2,1,0,1,1,1,1,1,1,1,1,1,1,1,0,1,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,1,1,1,0,0,0,0,1,1,1,0,1,1,0,1,0,0,0,1,1,0,1,
+2,0,1,0,1,0,1,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,1,0,1,1,1,0,1,0,0,1,1,2,1,1,2,0,1,0,0,0,1,1,0,1,
+1,0,0,1,0,0,1,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,1,0,1,1,2,0,1,0,0,0,0,2,1,1,2,0,2,0,0,0,1,1,0,1,
+1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,1,0,2,1,1,0,1,0,0,2,2,1,2,1,1,0,1,0,0,0,1,1,0,1,
+2,0,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,1,2,2,0,0,0,0,0,1,1,0,1,0,0,1,0,0,0,0,1,0,1,
+1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,1,2,2,0,0,0,0,2,1,1,1,0,2,1,1,0,0,0,2,1,0,1,
+1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,1,0,1,1,2,0,1,0,0,1,1,0,2,1,1,0,1,0,0,0,1,1,0,1,
+2,2,1,1,1,0,1,1,0,1,1,0,1,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,1,0,2,1,1,0,1,0,0,1,1,0,1,2,1,0,2,0,0,0,1,1,0,1,
+2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,
+0,1,0,0,2,0,2,1,1,0,1,0,1,0,0,1,0,0,0,0,1,0,0,0,1,0,0,0,0,0,1,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,1,0,1,1,2,0,1,0,0,1,1,1,0,1,0,0,1,0,0,0,1,0,0,1,
+1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+1,0,0,0,0,0,0,0,1,0,1,1,0,0,1,0,0,2,1,1,1,1,1,0,1,0,0,0,0,1,0,1,
+0,1,1,1,2,1,1,1,1,0,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,1,2,1,0,0,0,0,0,1,1,1,1,1,0,1,0,0,0,1,1,0,0,
+)
+
+Win1255HebrewModel = {
+ 'charToOrderMap': win1255_CharToOrderMap,
+ 'precedenceMatrix': HebrewLangModel,
+ 'mTypicalPositiveRatio': 0.984004,
+ 'keepEnglishLetter': False,
+ 'charsetName': "windows-1255"
+}
+
+# flake8: noqa
-######################## BEGIN LICENSE BLOCK ########################\r
-# The Original Code is Mozilla Communicator client code.\r
-#\r
-# The Initial Developer of the Original Code is\r
-# Netscape Communications Corporation.\r
-# Portions created by the Initial Developer are Copyright (C) 1998\r
-# the Initial Developer. All Rights Reserved.\r
-#\r
-# Contributor(s):\r
-# Mark Pilgrim - port to Python\r
-#\r
-# This library is free software; you can redistribute it and/or\r
-# modify it under the terms of the GNU Lesser General Public\r
-# License as published by the Free Software Foundation; either\r
-# version 2.1 of the License, or (at your option) any later version.\r
-#\r
-# This library is distributed in the hope that it will be useful,\r
-# but WITHOUT ANY WARRANTY; without even the implied warranty of\r
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU\r
-# Lesser General Public License for more details.\r
-#\r
-# You should have received a copy of the GNU Lesser General Public\r
-# License along with this library; if not, write to the Free Software\r
-# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA\r
-# 02110-1301 USA\r
-######################### END LICENSE BLOCK #########################\r
-\r
-# 255: Control characters that usually does not exist in any text\r
-# 254: Carriage/Return\r
-# 253: symbol (punctuation) that does not belong to word\r
-# 252: 0 - 9\r
-\r
-# Character Mapping Table:\r
-Latin2_HungarianCharToOrderMap = (\r
-255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00\r
-255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10\r
-253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20\r
-252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30\r
-253, 28, 40, 54, 45, 32, 50, 49, 38, 39, 53, 36, 41, 34, 35, 47,\r
- 46, 71, 43, 33, 37, 57, 48, 64, 68, 55, 52,253,253,253,253,253,\r
-253, 2, 18, 26, 17, 1, 27, 12, 20, 9, 22, 7, 6, 13, 4, 8,\r
- 23, 67, 10, 5, 3, 21, 19, 65, 62, 16, 11,253,253,253,253,253,\r
-159,160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,\r
-175,176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,\r
-191,192,193,194,195,196,197, 75,198,199,200,201,202,203,204,205,\r
- 79,206,207,208,209,210,211,212,213,214,215,216,217,218,219,220,\r
-221, 51, 81,222, 78,223,224,225,226, 44,227,228,229, 61,230,231,\r
-232,233,234, 58,235, 66, 59,236,237,238, 60, 69, 63,239,240,241,\r
- 82, 14, 74,242, 70, 80,243, 72,244, 15, 83, 77, 84, 30, 76, 85,\r
-245,246,247, 25, 73, 42, 24,248,249,250, 31, 56, 29,251,252,253,\r
-)\r
-\r
-win1250HungarianCharToOrderMap = (\r
-255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00\r
-255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10\r
-253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20\r
-252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30\r
-253, 28, 40, 54, 45, 32, 50, 49, 38, 39, 53, 36, 41, 34, 35, 47,\r
- 46, 72, 43, 33, 37, 57, 48, 64, 68, 55, 52,253,253,253,253,253,\r
-253, 2, 18, 26, 17, 1, 27, 12, 20, 9, 22, 7, 6, 13, 4, 8,\r
- 23, 67, 10, 5, 3, 21, 19, 65, 62, 16, 11,253,253,253,253,253,\r
-161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,176,\r
-177,178,179,180, 78,181, 69,182,183,184,185,186,187,188,189,190,\r
-191,192,193,194,195,196,197, 76,198,199,200,201,202,203,204,205,\r
- 81,206,207,208,209,210,211,212,213,214,215,216,217,218,219,220,\r
-221, 51, 83,222, 80,223,224,225,226, 44,227,228,229, 61,230,231,\r
-232,233,234, 58,235, 66, 59,236,237,238, 60, 70, 63,239,240,241,\r
- 84, 14, 75,242, 71, 82,243, 73,244, 15, 85, 79, 86, 30, 77, 87,\r
-245,246,247, 25, 74, 42, 24,248,249,250, 31, 56, 29,251,252,253,\r
-)\r
-\r
-# Model Table:\r
-# total sequences: 100%\r
-# first 512 sequences: 94.7368%\r
-# first 1024 sequences:5.2623%\r
-# rest sequences: 0.8894%\r
-# negative sequences: 0.0009%\r
-HungarianLangModel = (\r
-0,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,1,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,\r
-3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,2,2,3,3,1,1,2,2,2,2,2,1,2,\r
-3,2,2,3,3,3,3,3,2,3,3,3,3,3,3,1,2,3,3,3,3,2,3,3,1,1,3,3,0,1,1,1,\r
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,\r
-3,2,1,3,3,3,3,3,2,3,3,3,3,3,1,1,2,3,3,3,3,3,3,3,1,1,3,2,0,1,1,1,\r
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,\r
-3,3,3,3,3,3,3,3,3,3,3,1,1,2,3,3,3,1,3,3,3,3,3,1,3,3,2,2,0,3,2,3,\r
-0,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,\r
-3,3,3,3,3,3,2,3,3,3,2,3,3,2,3,3,3,3,3,2,3,3,2,2,3,2,3,2,0,3,2,2,\r
-0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,\r
-3,3,3,3,3,3,2,3,3,3,3,3,2,3,3,3,1,2,3,2,2,3,1,2,3,3,2,2,0,3,3,3,\r
-0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,\r
-3,3,3,3,3,3,3,3,3,3,2,2,3,3,3,3,3,3,2,3,3,3,3,2,3,3,3,3,0,2,3,2,\r
-0,0,0,1,1,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,\r
-3,3,3,3,3,3,3,3,3,3,3,1,1,1,3,3,2,1,3,2,2,3,2,1,3,2,2,1,0,3,3,1,\r
-0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,\r
-3,2,2,3,3,3,3,3,1,2,3,3,3,3,1,2,1,3,3,3,3,2,2,3,1,1,3,2,0,1,1,1,\r
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,\r
-3,3,3,3,3,3,3,3,2,2,3,3,3,3,3,2,1,3,3,3,3,3,2,2,1,3,3,3,0,1,1,2,\r
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,\r
-3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,2,3,3,2,3,3,3,2,0,3,2,3,\r
-0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,1,0,\r
-3,3,3,3,3,3,2,3,3,3,2,3,2,3,3,3,1,3,2,2,2,3,1,1,3,3,1,1,0,3,3,2,\r
-0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,\r
-3,3,3,3,3,3,3,2,3,3,3,2,3,2,3,3,3,2,3,3,3,3,3,1,2,3,2,2,0,2,2,2,\r
-0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,\r
-3,3,3,2,2,2,3,1,3,3,2,2,1,3,3,3,1,1,3,1,2,3,2,3,2,2,2,1,0,2,2,2,\r
-0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,\r
-3,1,1,3,3,3,3,3,1,2,3,3,3,3,1,2,1,3,3,3,2,2,3,2,1,0,3,2,0,1,1,0,\r
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-3,1,1,3,3,3,3,3,1,2,3,3,3,3,1,1,0,3,3,3,3,0,2,3,0,0,2,1,0,1,0,0,\r
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-3,3,3,3,3,3,2,2,3,3,2,2,2,2,3,3,0,1,2,3,2,3,2,2,3,2,1,2,0,2,2,2,\r
-0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,\r
-3,3,3,3,3,3,1,2,3,3,3,2,1,2,3,3,2,2,2,3,2,3,3,1,3,3,1,1,0,2,3,2,\r
-0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,\r
-3,3,3,1,2,2,2,2,3,3,3,1,1,1,3,3,1,1,3,1,1,3,2,1,2,3,1,1,0,2,2,2,\r
-0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,\r
-3,3,3,2,1,2,1,1,3,3,1,1,1,1,3,3,1,1,2,2,1,2,1,1,2,2,1,1,0,2,2,1,\r
-0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,\r
-3,3,3,1,1,2,1,1,3,3,1,0,1,1,3,3,2,0,1,1,2,3,1,0,2,2,1,0,0,1,3,2,\r
-0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,\r
-3,2,1,3,3,3,3,3,1,2,3,2,3,3,2,1,1,3,2,3,2,1,2,2,0,1,2,1,0,0,1,1,\r
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,\r
-3,3,3,3,2,2,2,2,3,1,2,2,1,1,3,3,0,3,2,1,2,3,2,1,3,3,1,1,0,2,1,3,\r
-0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,\r
-3,3,3,2,2,2,3,2,3,3,3,2,1,1,3,3,1,1,1,2,2,3,2,3,2,2,2,1,0,2,2,1,\r
-0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,\r
-1,0,0,3,3,3,3,3,0,0,3,3,2,3,0,0,0,2,3,3,1,0,1,2,0,0,1,1,0,0,0,0,\r
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-3,1,2,3,3,3,3,3,1,2,3,3,2,2,1,1,0,3,3,2,2,1,2,2,1,0,2,2,0,1,1,1,\r
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-3,3,2,2,1,3,1,2,3,3,2,2,1,1,2,2,1,1,1,1,3,2,1,1,1,1,2,1,0,1,2,1,\r
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,\r
-2,3,3,1,1,1,1,1,3,3,3,0,1,1,3,3,1,1,1,1,1,2,2,0,3,1,1,2,0,2,1,1,\r
-0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,\r
-3,1,0,1,2,1,2,2,0,1,2,3,1,2,0,0,0,2,1,1,1,1,1,2,0,0,1,1,0,0,0,0,\r
-1,2,1,2,2,2,1,2,1,2,0,2,0,2,2,1,1,2,1,1,2,1,1,1,0,1,0,0,0,1,1,0,\r
-1,1,1,2,3,2,3,3,0,1,2,2,3,1,0,1,0,2,1,2,2,0,1,1,0,0,1,1,0,0,0,0,\r
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-1,0,0,3,3,2,2,1,0,0,3,2,3,2,0,0,0,1,1,3,0,0,1,1,0,0,2,1,0,0,0,0,\r
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-3,1,1,2,2,3,3,1,0,1,3,2,3,1,1,1,0,1,1,1,1,1,3,1,0,0,2,2,0,0,0,0,\r
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-3,1,1,1,2,2,2,1,0,1,2,3,3,2,0,0,0,2,1,1,1,2,1,1,1,0,1,1,1,0,0,0,\r
-1,2,2,2,2,2,1,1,1,2,0,2,1,1,1,1,1,2,1,1,1,1,1,1,0,1,1,1,0,0,1,1,\r
-3,2,2,1,0,0,1,1,2,2,0,3,0,1,2,1,1,0,0,1,1,1,0,1,1,1,1,0,2,1,1,1,\r
-2,2,1,1,1,2,1,2,1,1,1,1,1,1,1,2,1,1,1,2,3,1,1,1,1,1,1,1,1,1,0,1,\r
-2,3,3,0,1,0,0,0,3,3,1,0,0,1,2,2,1,0,0,0,0,2,0,0,1,1,1,0,2,1,1,1,\r
-2,1,1,1,1,1,1,2,1,1,0,1,1,0,1,1,1,0,1,2,1,1,0,1,1,1,1,1,1,1,0,1,\r
-2,3,3,0,1,0,0,0,2,2,0,0,0,0,1,2,2,0,0,0,0,1,0,0,1,1,0,0,2,0,1,0,\r
-2,1,1,1,1,2,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1,1,2,0,1,1,1,1,1,0,1,\r
-3,2,2,0,1,0,1,0,2,3,2,0,0,1,2,2,1,0,0,1,1,1,0,0,2,1,0,1,2,2,1,1,\r
-2,1,1,1,1,1,1,2,1,1,1,1,1,1,0,2,1,0,1,1,0,1,1,1,0,1,1,2,1,1,0,1,\r
-2,2,2,0,0,1,0,0,2,2,1,1,0,0,2,1,1,0,0,0,1,2,0,0,2,1,0,0,2,1,1,1,\r
-2,1,1,1,1,2,1,2,1,1,1,2,2,1,1,2,1,1,1,2,1,1,1,1,1,1,1,1,1,1,0,1,\r
-1,2,3,0,0,0,1,0,3,2,1,0,0,1,2,1,1,0,0,0,0,2,1,0,1,1,0,0,2,1,2,1,\r
-1,1,0,0,0,1,0,1,1,1,1,1,2,0,0,1,0,0,0,2,0,0,1,1,1,1,1,1,1,1,0,1,\r
-3,0,0,2,1,2,2,1,0,0,2,1,2,2,0,0,0,2,1,1,1,0,1,1,0,0,1,1,2,0,0,0,\r
-1,2,1,2,2,1,1,2,1,2,0,1,1,1,1,1,1,1,1,1,2,1,1,0,0,1,1,1,1,0,0,1,\r
-1,3,2,0,0,0,1,0,2,2,2,0,0,0,2,2,1,0,0,0,0,3,1,1,1,1,0,0,2,1,1,1,\r
-2,1,0,1,1,1,0,1,1,1,1,1,1,1,0,2,1,0,0,1,0,1,1,0,1,1,1,1,1,1,0,1,\r
-2,3,2,0,0,0,1,0,2,2,0,0,0,0,2,1,1,0,0,0,0,2,1,0,1,1,0,0,2,1,1,0,\r
-2,1,1,1,1,2,1,2,1,2,0,1,1,1,0,2,1,1,1,2,1,1,1,1,0,1,1,1,1,1,0,1,\r
-3,1,1,2,2,2,3,2,1,1,2,2,1,1,0,1,0,2,2,1,1,1,1,1,0,0,1,1,0,1,1,0,\r
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-2,2,2,0,0,0,0,0,2,2,0,0,0,0,2,2,1,0,0,0,1,1,0,0,1,2,0,0,2,1,1,1,\r
-2,2,1,1,1,2,1,2,1,1,0,1,1,1,1,2,1,1,1,2,1,1,1,1,0,1,2,1,1,1,0,1,\r
-1,0,0,1,2,3,2,1,0,0,2,0,1,1,0,0,0,1,1,1,1,0,1,1,0,0,1,0,0,0,0,0,\r
-1,2,1,2,1,2,1,1,1,2,0,2,1,1,1,0,1,2,0,0,1,1,1,0,0,0,0,0,0,0,0,0,\r
-2,3,2,0,0,0,0,0,1,1,2,1,0,0,1,1,1,0,0,0,0,2,0,0,1,1,0,0,2,1,1,1,\r
-2,1,1,1,1,1,1,2,1,0,1,1,1,1,0,2,1,1,1,1,1,1,0,1,0,1,1,1,1,1,0,1,\r
-1,2,2,0,1,1,1,0,2,2,2,0,0,0,3,2,1,0,0,0,1,1,0,0,1,1,0,1,1,1,0,0,\r
-1,1,0,1,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,2,1,1,1,0,0,1,1,1,0,1,0,1,\r
-2,1,0,2,1,1,2,2,1,1,2,1,1,1,0,0,0,1,1,0,1,1,1,1,0,0,1,1,1,0,0,0,\r
-1,2,2,2,2,2,1,1,1,2,0,2,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,0,0,0,1,0,\r
-1,2,3,0,0,0,1,0,2,2,0,0,0,0,2,2,0,0,0,0,0,1,0,0,1,0,0,0,2,0,1,0,\r
-2,1,1,1,1,1,0,2,0,0,0,1,2,1,1,1,1,0,1,2,0,1,0,1,0,1,1,1,0,1,0,1,\r
-2,2,2,0,0,0,1,0,2,1,2,0,0,0,1,1,2,0,0,0,0,1,0,0,1,1,0,0,2,1,0,1,\r
-2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,2,0,1,1,1,1,1,0,1,\r
-1,2,2,0,0,0,1,0,2,2,2,0,0,0,1,1,0,0,0,0,0,1,1,0,2,0,0,1,1,1,0,1,\r
-1,0,1,1,1,1,1,1,0,1,1,1,1,0,0,1,0,0,1,1,0,1,0,1,1,1,1,1,0,0,0,1,\r
-1,0,0,1,0,1,2,1,0,0,1,1,1,2,0,0,0,1,1,0,1,0,1,1,0,0,1,0,0,0,0,0,\r
-0,2,1,2,1,1,1,1,1,2,0,2,0,1,1,0,1,2,1,0,1,1,1,0,0,0,0,0,0,1,0,0,\r
-2,1,1,0,1,2,0,0,1,1,1,0,0,0,1,1,0,0,0,0,0,1,0,0,1,0,0,0,2,1,0,1,\r
-2,2,1,1,1,1,1,2,1,1,0,1,1,1,1,2,1,1,1,2,1,1,0,1,0,1,1,1,1,1,0,1,\r
-1,2,2,0,0,0,0,0,1,1,0,0,0,0,2,1,0,0,0,0,0,2,0,0,2,2,0,0,2,0,0,1,\r
-2,1,1,1,1,1,1,1,0,1,1,0,1,1,0,1,0,0,0,1,1,1,1,0,0,1,1,1,1,0,0,1,\r
-1,1,2,0,0,3,1,0,2,1,1,1,0,0,1,1,1,0,0,0,1,1,0,0,0,1,0,0,1,0,1,0,\r
-1,2,1,0,1,1,1,2,1,1,0,1,1,1,1,1,0,0,0,1,1,1,1,1,0,1,0,0,0,1,0,0,\r
-2,1,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,0,1,0,0,0,0,2,0,0,0,\r
-2,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,1,1,1,1,2,1,1,0,0,1,1,1,1,1,0,1,\r
-2,1,1,1,2,1,1,1,0,1,1,2,1,0,0,0,0,1,1,1,1,0,1,0,0,0,0,1,0,0,0,0,\r
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-1,1,0,1,1,1,1,1,0,0,1,1,2,1,0,0,0,1,1,0,0,0,1,1,0,0,1,0,1,0,0,0,\r
-1,2,1,1,1,1,1,1,1,1,0,1,0,1,1,1,1,1,1,0,1,1,1,0,0,0,0,0,0,1,0,0,\r
-2,0,0,0,1,1,1,1,0,0,1,1,0,0,0,0,0,1,1,1,2,0,0,1,0,0,1,0,1,0,0,0,\r
-0,1,1,1,1,1,1,1,1,2,0,1,1,1,1,0,1,1,1,0,1,1,1,0,0,0,0,0,0,0,0,0,\r
-1,0,0,1,1,1,1,1,0,0,2,1,0,1,0,0,0,1,0,1,0,0,0,0,0,0,1,0,0,0,0,0,\r
-0,1,1,1,1,1,1,0,1,1,0,1,0,1,1,0,1,1,0,0,1,1,1,0,0,0,0,0,0,0,0,0,\r
-1,0,0,1,1,1,0,0,0,0,1,0,2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,\r
-0,1,1,1,1,1,0,0,1,1,0,1,0,1,0,0,1,1,1,0,1,1,1,0,0,0,0,0,0,0,0,0,\r
-0,0,0,1,0,0,0,0,0,0,1,1,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-0,1,1,1,0,1,0,0,1,1,0,1,0,1,1,0,1,1,1,0,1,1,1,0,0,0,0,0,0,0,0,0,\r
-2,1,1,1,1,1,1,1,1,1,1,0,0,1,1,1,0,0,1,0,0,1,0,1,0,1,1,1,0,0,1,0,\r
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-1,0,0,1,1,1,1,0,0,0,1,1,1,0,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,\r
-0,1,1,1,1,1,1,0,1,1,0,1,0,1,0,0,1,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0,\r
-)\r
-\r
-Latin2HungarianModel = {\r
- 'charToOrderMap': Latin2_HungarianCharToOrderMap,\r
- 'precedenceMatrix': HungarianLangModel,\r
- 'mTypicalPositiveRatio': 0.947368,\r
- 'keepEnglishLetter': True,\r
- 'charsetName': "ISO-8859-2"\r
-}\r
-\r
-Win1250HungarianModel = {\r
- 'charToOrderMap': win1250HungarianCharToOrderMap,\r
- 'precedenceMatrix': HungarianLangModel,\r
- 'mTypicalPositiveRatio': 0.947368,\r
- 'keepEnglishLetter': True,\r
- 'charsetName': "windows-1250"\r
-}\r
-\r
-# flake8: noqa\r
+######################## BEGIN LICENSE BLOCK ########################
+# The Original Code is Mozilla Communicator client code.
+#
+# The Initial Developer of the Original Code is
+# Netscape Communications Corporation.
+# Portions created by the Initial Developer are Copyright (C) 1998
+# the Initial Developer. All Rights Reserved.
+#
+# Contributor(s):
+# Mark Pilgrim - port to Python
+#
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
+# 02110-1301 USA
+######################### END LICENSE BLOCK #########################
+
+# 255: Control characters that usually does not exist in any text
+# 254: Carriage/Return
+# 253: symbol (punctuation) that does not belong to word
+# 252: 0 - 9
+
+# Character Mapping Table:
+Latin2_HungarianCharToOrderMap = (
+255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00
+255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10
+253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20
+252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30
+253, 28, 40, 54, 45, 32, 50, 49, 38, 39, 53, 36, 41, 34, 35, 47,
+ 46, 71, 43, 33, 37, 57, 48, 64, 68, 55, 52,253,253,253,253,253,
+253, 2, 18, 26, 17, 1, 27, 12, 20, 9, 22, 7, 6, 13, 4, 8,
+ 23, 67, 10, 5, 3, 21, 19, 65, 62, 16, 11,253,253,253,253,253,
+159,160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,
+175,176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,
+191,192,193,194,195,196,197, 75,198,199,200,201,202,203,204,205,
+ 79,206,207,208,209,210,211,212,213,214,215,216,217,218,219,220,
+221, 51, 81,222, 78,223,224,225,226, 44,227,228,229, 61,230,231,
+232,233,234, 58,235, 66, 59,236,237,238, 60, 69, 63,239,240,241,
+ 82, 14, 74,242, 70, 80,243, 72,244, 15, 83, 77, 84, 30, 76, 85,
+245,246,247, 25, 73, 42, 24,248,249,250, 31, 56, 29,251,252,253,
+)
+
+win1250HungarianCharToOrderMap = (
+255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00
+255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10
+253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20
+252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30
+253, 28, 40, 54, 45, 32, 50, 49, 38, 39, 53, 36, 41, 34, 35, 47,
+ 46, 72, 43, 33, 37, 57, 48, 64, 68, 55, 52,253,253,253,253,253,
+253, 2, 18, 26, 17, 1, 27, 12, 20, 9, 22, 7, 6, 13, 4, 8,
+ 23, 67, 10, 5, 3, 21, 19, 65, 62, 16, 11,253,253,253,253,253,
+161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,176,
+177,178,179,180, 78,181, 69,182,183,184,185,186,187,188,189,190,
+191,192,193,194,195,196,197, 76,198,199,200,201,202,203,204,205,
+ 81,206,207,208,209,210,211,212,213,214,215,216,217,218,219,220,
+221, 51, 83,222, 80,223,224,225,226, 44,227,228,229, 61,230,231,
+232,233,234, 58,235, 66, 59,236,237,238, 60, 70, 63,239,240,241,
+ 84, 14, 75,242, 71, 82,243, 73,244, 15, 85, 79, 86, 30, 77, 87,
+245,246,247, 25, 74, 42, 24,248,249,250, 31, 56, 29,251,252,253,
+)
+
+# Model Table:
+# total sequences: 100%
+# first 512 sequences: 94.7368%
+# first 1024 sequences:5.2623%
+# rest sequences: 0.8894%
+# negative sequences: 0.0009%
+HungarianLangModel = (
+0,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,1,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
+3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,2,2,3,3,1,1,2,2,2,2,2,1,2,
+3,2,2,3,3,3,3,3,2,3,3,3,3,3,3,1,2,3,3,3,3,2,3,3,1,1,3,3,0,1,1,1,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,
+3,2,1,3,3,3,3,3,2,3,3,3,3,3,1,1,2,3,3,3,3,3,3,3,1,1,3,2,0,1,1,1,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,
+3,3,3,3,3,3,3,3,3,3,3,1,1,2,3,3,3,1,3,3,3,3,3,1,3,3,2,2,0,3,2,3,
+0,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,
+3,3,3,3,3,3,2,3,3,3,2,3,3,2,3,3,3,3,3,2,3,3,2,2,3,2,3,2,0,3,2,2,
+0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,
+3,3,3,3,3,3,2,3,3,3,3,3,2,3,3,3,1,2,3,2,2,3,1,2,3,3,2,2,0,3,3,3,
+0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,
+3,3,3,3,3,3,3,3,3,3,2,2,3,3,3,3,3,3,2,3,3,3,3,2,3,3,3,3,0,2,3,2,
+0,0,0,1,1,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,
+3,3,3,3,3,3,3,3,3,3,3,1,1,1,3,3,2,1,3,2,2,3,2,1,3,2,2,1,0,3,3,1,
+0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,
+3,2,2,3,3,3,3,3,1,2,3,3,3,3,1,2,1,3,3,3,3,2,2,3,1,1,3,2,0,1,1,1,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,
+3,3,3,3,3,3,3,3,2,2,3,3,3,3,3,2,1,3,3,3,3,3,2,2,1,3,3,3,0,1,1,2,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,
+3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,2,3,3,2,3,3,3,2,0,3,2,3,
+0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,1,0,
+3,3,3,3,3,3,2,3,3,3,2,3,2,3,3,3,1,3,2,2,2,3,1,1,3,3,1,1,0,3,3,2,
+0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,
+3,3,3,3,3,3,3,2,3,3,3,2,3,2,3,3,3,2,3,3,3,3,3,1,2,3,2,2,0,2,2,2,
+0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,
+3,3,3,2,2,2,3,1,3,3,2,2,1,3,3,3,1,1,3,1,2,3,2,3,2,2,2,1,0,2,2,2,
+0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,
+3,1,1,3,3,3,3,3,1,2,3,3,3,3,1,2,1,3,3,3,2,2,3,2,1,0,3,2,0,1,1,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+3,1,1,3,3,3,3,3,1,2,3,3,3,3,1,1,0,3,3,3,3,0,2,3,0,0,2,1,0,1,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+3,3,3,3,3,3,2,2,3,3,2,2,2,2,3,3,0,1,2,3,2,3,2,2,3,2,1,2,0,2,2,2,
+0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,
+3,3,3,3,3,3,1,2,3,3,3,2,1,2,3,3,2,2,2,3,2,3,3,1,3,3,1,1,0,2,3,2,
+0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,
+3,3,3,1,2,2,2,2,3,3,3,1,1,1,3,3,1,1,3,1,1,3,2,1,2,3,1,1,0,2,2,2,
+0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,
+3,3,3,2,1,2,1,1,3,3,1,1,1,1,3,3,1,1,2,2,1,2,1,1,2,2,1,1,0,2,2,1,
+0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,
+3,3,3,1,1,2,1,1,3,3,1,0,1,1,3,3,2,0,1,1,2,3,1,0,2,2,1,0,0,1,3,2,
+0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,
+3,2,1,3,3,3,3,3,1,2,3,2,3,3,2,1,1,3,2,3,2,1,2,2,0,1,2,1,0,0,1,1,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,
+3,3,3,3,2,2,2,2,3,1,2,2,1,1,3,3,0,3,2,1,2,3,2,1,3,3,1,1,0,2,1,3,
+0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,
+3,3,3,2,2,2,3,2,3,3,3,2,1,1,3,3,1,1,1,2,2,3,2,3,2,2,2,1,0,2,2,1,
+0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,
+1,0,0,3,3,3,3,3,0,0,3,3,2,3,0,0,0,2,3,3,1,0,1,2,0,0,1,1,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+3,1,2,3,3,3,3,3,1,2,3,3,2,2,1,1,0,3,3,2,2,1,2,2,1,0,2,2,0,1,1,1,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+3,3,2,2,1,3,1,2,3,3,2,2,1,1,2,2,1,1,1,1,3,2,1,1,1,1,2,1,0,1,2,1,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,
+2,3,3,1,1,1,1,1,3,3,3,0,1,1,3,3,1,1,1,1,1,2,2,0,3,1,1,2,0,2,1,1,
+0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,
+3,1,0,1,2,1,2,2,0,1,2,3,1,2,0,0,0,2,1,1,1,1,1,2,0,0,1,1,0,0,0,0,
+1,2,1,2,2,2,1,2,1,2,0,2,0,2,2,1,1,2,1,1,2,1,1,1,0,1,0,0,0,1,1,0,
+1,1,1,2,3,2,3,3,0,1,2,2,3,1,0,1,0,2,1,2,2,0,1,1,0,0,1,1,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+1,0,0,3,3,2,2,1,0,0,3,2,3,2,0,0,0,1,1,3,0,0,1,1,0,0,2,1,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+3,1,1,2,2,3,3,1,0,1,3,2,3,1,1,1,0,1,1,1,1,1,3,1,0,0,2,2,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+3,1,1,1,2,2,2,1,0,1,2,3,3,2,0,0,0,2,1,1,1,2,1,1,1,0,1,1,1,0,0,0,
+1,2,2,2,2,2,1,1,1,2,0,2,1,1,1,1,1,2,1,1,1,1,1,1,0,1,1,1,0,0,1,1,
+3,2,2,1,0,0,1,1,2,2,0,3,0,1,2,1,1,0,0,1,1,1,0,1,1,1,1,0,2,1,1,1,
+2,2,1,1,1,2,1,2,1,1,1,1,1,1,1,2,1,1,1,2,3,1,1,1,1,1,1,1,1,1,0,1,
+2,3,3,0,1,0,0,0,3,3,1,0,0,1,2,2,1,0,0,0,0,2,0,0,1,1,1,0,2,1,1,1,
+2,1,1,1,1,1,1,2,1,1,0,1,1,0,1,1,1,0,1,2,1,1,0,1,1,1,1,1,1,1,0,1,
+2,3,3,0,1,0,0,0,2,2,0,0,0,0,1,2,2,0,0,0,0,1,0,0,1,1,0,0,2,0,1,0,
+2,1,1,1,1,2,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1,1,2,0,1,1,1,1,1,0,1,
+3,2,2,0,1,0,1,0,2,3,2,0,0,1,2,2,1,0,0,1,1,1,0,0,2,1,0,1,2,2,1,1,
+2,1,1,1,1,1,1,2,1,1,1,1,1,1,0,2,1,0,1,1,0,1,1,1,0,1,1,2,1,1,0,1,
+2,2,2,0,0,1,0,0,2,2,1,1,0,0,2,1,1,0,0,0,1,2,0,0,2,1,0,0,2,1,1,1,
+2,1,1,1,1,2,1,2,1,1,1,2,2,1,1,2,1,1,1,2,1,1,1,1,1,1,1,1,1,1,0,1,
+1,2,3,0,0,0,1,0,3,2,1,0,0,1,2,1,1,0,0,0,0,2,1,0,1,1,0,0,2,1,2,1,
+1,1,0,0,0,1,0,1,1,1,1,1,2,0,0,1,0,0,0,2,0,0,1,1,1,1,1,1,1,1,0,1,
+3,0,0,2,1,2,2,1,0,0,2,1,2,2,0,0,0,2,1,1,1,0,1,1,0,0,1,1,2,0,0,0,
+1,2,1,2,2,1,1,2,1,2,0,1,1,1,1,1,1,1,1,1,2,1,1,0,0,1,1,1,1,0,0,1,
+1,3,2,0,0,0,1,0,2,2,2,0,0,0,2,2,1,0,0,0,0,3,1,1,1,1,0,0,2,1,1,1,
+2,1,0,1,1,1,0,1,1,1,1,1,1,1,0,2,1,0,0,1,0,1,1,0,1,1,1,1,1,1,0,1,
+2,3,2,0,0,0,1,0,2,2,0,0,0,0,2,1,1,0,0,0,0,2,1,0,1,1,0,0,2,1,1,0,
+2,1,1,1,1,2,1,2,1,2,0,1,1,1,0,2,1,1,1,2,1,1,1,1,0,1,1,1,1,1,0,1,
+3,1,1,2,2,2,3,2,1,1,2,2,1,1,0,1,0,2,2,1,1,1,1,1,0,0,1,1,0,1,1,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+2,2,2,0,0,0,0,0,2,2,0,0,0,0,2,2,1,0,0,0,1,1,0,0,1,2,0,0,2,1,1,1,
+2,2,1,1,1,2,1,2,1,1,0,1,1,1,1,2,1,1,1,2,1,1,1,1,0,1,2,1,1,1,0,1,
+1,0,0,1,2,3,2,1,0,0,2,0,1,1,0,0,0,1,1,1,1,0,1,1,0,0,1,0,0,0,0,0,
+1,2,1,2,1,2,1,1,1,2,0,2,1,1,1,0,1,2,0,0,1,1,1,0,0,0,0,0,0,0,0,0,
+2,3,2,0,0,0,0,0,1,1,2,1,0,0,1,1,1,0,0,0,0,2,0,0,1,1,0,0,2,1,1,1,
+2,1,1,1,1,1,1,2,1,0,1,1,1,1,0,2,1,1,1,1,1,1,0,1,0,1,1,1,1,1,0,1,
+1,2,2,0,1,1,1,0,2,2,2,0,0,0,3,2,1,0,0,0,1,1,0,0,1,1,0,1,1,1,0,0,
+1,1,0,1,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,2,1,1,1,0,0,1,1,1,0,1,0,1,
+2,1,0,2,1,1,2,2,1,1,2,1,1,1,0,0,0,1,1,0,1,1,1,1,0,0,1,1,1,0,0,0,
+1,2,2,2,2,2,1,1,1,2,0,2,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,0,0,0,1,0,
+1,2,3,0,0,0,1,0,2,2,0,0,0,0,2,2,0,0,0,0,0,1,0,0,1,0,0,0,2,0,1,0,
+2,1,1,1,1,1,0,2,0,0,0,1,2,1,1,1,1,0,1,2,0,1,0,1,0,1,1,1,0,1,0,1,
+2,2,2,0,0,0,1,0,2,1,2,0,0,0,1,1,2,0,0,0,0,1,0,0,1,1,0,0,2,1,0,1,
+2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,2,0,1,1,1,1,1,0,1,
+1,2,2,0,0,0,1,0,2,2,2,0,0,0,1,1,0,0,0,0,0,1,1,0,2,0,0,1,1,1,0,1,
+1,0,1,1,1,1,1,1,0,1,1,1,1,0,0,1,0,0,1,1,0,1,0,1,1,1,1,1,0,0,0,1,
+1,0,0,1,0,1,2,1,0,0,1,1,1,2,0,0,0,1,1,0,1,0,1,1,0,0,1,0,0,0,0,0,
+0,2,1,2,1,1,1,1,1,2,0,2,0,1,1,0,1,2,1,0,1,1,1,0,0,0,0,0,0,1,0,0,
+2,1,1,0,1,2,0,0,1,1,1,0,0,0,1,1,0,0,0,0,0,1,0,0,1,0,0,0,2,1,0,1,
+2,2,1,1,1,1,1,2,1,1,0,1,1,1,1,2,1,1,1,2,1,1,0,1,0,1,1,1,1,1,0,1,
+1,2,2,0,0,0,0,0,1,1,0,0,0,0,2,1,0,0,0,0,0,2,0,0,2,2,0,0,2,0,0,1,
+2,1,1,1,1,1,1,1,0,1,1,0,1,1,0,1,0,0,0,1,1,1,1,0,0,1,1,1,1,0,0,1,
+1,1,2,0,0,3,1,0,2,1,1,1,0,0,1,1,1,0,0,0,1,1,0,0,0,1,0,0,1,0,1,0,
+1,2,1,0,1,1,1,2,1,1,0,1,1,1,1,1,0,0,0,1,1,1,1,1,0,1,0,0,0,1,0,0,
+2,1,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,0,1,0,0,0,0,2,0,0,0,
+2,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,1,1,1,1,2,1,1,0,0,1,1,1,1,1,0,1,
+2,1,1,1,2,1,1,1,0,1,1,2,1,0,0,0,0,1,1,1,1,0,1,0,0,0,0,1,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+1,1,0,1,1,1,1,1,0,0,1,1,2,1,0,0,0,1,1,0,0,0,1,1,0,0,1,0,1,0,0,0,
+1,2,1,1,1,1,1,1,1,1,0,1,0,1,1,1,1,1,1,0,1,1,1,0,0,0,0,0,0,1,0,0,
+2,0,0,0,1,1,1,1,0,0,1,1,0,0,0,0,0,1,1,1,2,0,0,1,0,0,1,0,1,0,0,0,
+0,1,1,1,1,1,1,1,1,2,0,1,1,1,1,0,1,1,1,0,1,1,1,0,0,0,0,0,0,0,0,0,
+1,0,0,1,1,1,1,1,0,0,2,1,0,1,0,0,0,1,0,1,0,0,0,0,0,0,1,0,0,0,0,0,
+0,1,1,1,1,1,1,0,1,1,0,1,0,1,1,0,1,1,0,0,1,1,1,0,0,0,0,0,0,0,0,0,
+1,0,0,1,1,1,0,0,0,0,1,0,2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,
+0,1,1,1,1,1,0,0,1,1,0,1,0,1,0,0,1,1,1,0,1,1,1,0,0,0,0,0,0,0,0,0,
+0,0,0,1,0,0,0,0,0,0,1,1,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,1,1,1,0,1,0,0,1,1,0,1,0,1,1,0,1,1,1,0,1,1,1,0,0,0,0,0,0,0,0,0,
+2,1,1,1,1,1,1,1,1,1,1,0,0,1,1,1,0,0,1,0,0,1,0,1,0,1,1,1,0,0,1,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+1,0,0,1,1,1,1,0,0,0,1,1,1,0,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,
+0,1,1,1,1,1,1,0,1,1,0,1,0,1,0,0,1,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0,
+)
+
+Latin2HungarianModel = {
+ 'charToOrderMap': Latin2_HungarianCharToOrderMap,
+ 'precedenceMatrix': HungarianLangModel,
+ 'mTypicalPositiveRatio': 0.947368,
+ 'keepEnglishLetter': True,
+ 'charsetName': "ISO-8859-2"
+}
+
+Win1250HungarianModel = {
+ 'charToOrderMap': win1250HungarianCharToOrderMap,
+ 'precedenceMatrix': HungarianLangModel,
+ 'mTypicalPositiveRatio': 0.947368,
+ 'keepEnglishLetter': True,
+ 'charsetName': "windows-1250"
+}
+
+# flake8: noqa
-######################## BEGIN LICENSE BLOCK ########################\r
-# The Original Code is Mozilla Communicator client code.\r
-#\r
-# The Initial Developer of the Original Code is\r
-# Netscape Communications Corporation.\r
-# Portions created by the Initial Developer are Copyright (C) 1998\r
-# the Initial Developer. All Rights Reserved.\r
-#\r
-# Contributor(s):\r
-# Mark Pilgrim - port to Python\r
-#\r
-# This library is free software; you can redistribute it and/or\r
-# modify it under the terms of the GNU Lesser General Public\r
-# License as published by the Free Software Foundation; either\r
-# version 2.1 of the License, or (at your option) any later version.\r
-#\r
-# This library is distributed in the hope that it will be useful,\r
-# but WITHOUT ANY WARRANTY; without even the implied warranty of\r
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU\r
-# Lesser General Public License for more details.\r
-#\r
-# You should have received a copy of the GNU Lesser General Public\r
-# License along with this library; if not, write to the Free Software\r
-# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA\r
-# 02110-1301 USA\r
-######################### END LICENSE BLOCK #########################\r
-\r
-# 255: Control characters that usually does not exist in any text\r
-# 254: Carriage/Return\r
-# 253: symbol (punctuation) that does not belong to word\r
-# 252: 0 - 9\r
-\r
-# The following result for thai was collected from a limited sample (1M).\r
-\r
-# Character Mapping Table:\r
-TIS620CharToOrderMap = (\r
-255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00\r
-255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10\r
-253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20\r
-252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30\r
-253,182,106,107,100,183,184,185,101, 94,186,187,108,109,110,111, # 40\r
-188,189,190, 89, 95,112,113,191,192,193,194,253,253,253,253,253, # 50\r
-253, 64, 72, 73,114, 74,115,116,102, 81,201,117, 90,103, 78, 82, # 60\r
- 96,202, 91, 79, 84,104,105, 97, 98, 92,203,253,253,253,253,253, # 70\r
-209,210,211,212,213, 88,214,215,216,217,218,219,220,118,221,222,\r
-223,224, 99, 85, 83,225,226,227,228,229,230,231,232,233,234,235,\r
-236, 5, 30,237, 24,238, 75, 8, 26, 52, 34, 51,119, 47, 58, 57,\r
- 49, 53, 55, 43, 20, 19, 44, 14, 48, 3, 17, 25, 39, 62, 31, 54,\r
- 45, 9, 16, 2, 61, 15,239, 12, 42, 46, 18, 21, 76, 4, 66, 63,\r
- 22, 10, 1, 36, 23, 13, 40, 27, 32, 35, 86,240,241,242,243,244,\r
- 11, 28, 41, 29, 33,245, 50, 37, 6, 7, 67, 77, 38, 93,246,247,\r
- 68, 56, 59, 65, 69, 60, 70, 80, 71, 87,248,249,250,251,252,253,\r
-)\r
-\r
-# Model Table:\r
-# total sequences: 100%\r
-# first 512 sequences: 92.6386%\r
-# first 1024 sequences:7.3177%\r
-# rest sequences: 1.0230%\r
-# negative sequences: 0.0436%\r
-ThaiLangModel = (\r
-0,1,3,3,3,3,0,0,3,3,0,3,3,0,3,3,3,3,3,3,3,3,0,0,3,3,3,0,3,3,3,3,\r
-0,3,3,0,0,0,1,3,0,3,3,2,3,3,0,1,2,3,3,3,3,0,2,0,2,0,0,3,2,1,2,2,\r
-3,0,3,3,2,3,0,0,3,3,0,3,3,0,3,3,3,3,3,3,3,3,3,0,3,2,3,0,2,2,2,3,\r
-0,2,3,0,0,0,0,1,0,1,2,3,1,1,3,2,2,0,1,1,0,0,1,0,0,0,0,0,0,0,1,1,\r
-3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,2,2,2,2,2,2,2,3,3,2,3,2,3,3,2,2,2,\r
-3,1,2,3,0,3,3,2,2,1,2,3,3,1,2,0,1,3,0,1,0,0,1,0,0,0,0,0,0,0,1,1,\r
-3,3,2,2,3,3,3,3,1,2,3,3,3,3,3,2,2,2,2,3,3,2,2,3,3,2,2,3,2,3,2,2,\r
-3,3,1,2,3,1,2,2,3,3,1,0,2,1,0,0,3,1,2,1,0,0,1,0,0,0,0,0,0,1,0,1,\r
-3,3,3,3,3,3,2,2,3,3,3,3,2,3,2,2,3,3,2,2,3,2,2,2,2,1,1,3,1,2,1,1,\r
-3,2,1,0,2,1,0,1,0,1,1,0,1,1,0,0,1,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,\r
-3,3,3,2,3,2,3,3,2,2,3,2,3,3,2,3,1,1,2,3,2,2,2,3,2,2,2,2,2,1,2,1,\r
-2,2,1,1,3,3,2,1,0,1,2,2,0,1,3,0,0,0,1,1,0,0,0,0,0,2,3,0,0,2,1,1,\r
-3,3,2,3,3,2,0,0,3,3,0,3,3,0,2,2,3,1,2,2,1,1,1,0,2,2,2,0,2,2,1,1,\r
-0,2,1,0,2,0,0,2,0,1,0,0,1,0,0,0,1,1,1,1,0,0,0,0,0,0,0,0,0,0,1,0,\r
-3,3,2,3,3,2,0,0,3,3,0,2,3,0,2,1,2,2,2,2,1,2,0,0,2,2,2,0,2,2,1,1,\r
-0,2,1,0,2,0,0,2,0,1,1,0,1,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,\r
-3,3,2,3,2,3,2,0,2,2,1,3,2,1,3,2,1,2,3,2,2,3,0,2,3,2,2,1,2,2,2,2,\r
-1,2,2,0,0,0,0,2,0,1,2,0,1,1,1,0,1,0,3,1,1,0,0,0,0,0,0,0,0,0,1,0,\r
-3,3,2,3,3,2,3,2,2,2,3,2,2,3,2,2,1,2,3,2,2,3,1,3,2,2,2,3,2,2,2,3,\r
-3,2,1,3,0,1,1,1,0,2,1,1,1,1,1,0,1,0,1,1,0,0,0,0,0,0,0,0,0,2,0,0,\r
-1,0,0,3,0,3,3,3,3,3,0,0,3,0,2,2,3,3,3,3,3,0,0,0,1,1,3,0,0,0,0,2,\r
-0,0,1,0,0,0,0,0,0,0,2,3,0,0,0,3,0,2,0,0,0,0,0,3,0,0,0,0,0,0,0,0,\r
-2,0,3,3,3,3,0,0,2,3,0,0,3,0,3,3,2,3,3,3,3,3,0,0,3,3,3,0,0,0,3,3,\r
-0,0,3,0,0,0,0,2,0,0,2,1,1,3,0,0,1,0,0,2,3,0,1,0,0,0,0,0,0,0,1,0,\r
-3,3,3,3,2,3,3,3,3,3,3,3,1,2,1,3,3,2,2,1,2,2,2,3,1,1,2,0,2,1,2,1,\r
-2,2,1,0,0,0,1,1,0,1,0,1,1,0,0,0,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,\r
-3,0,2,1,2,3,3,3,0,2,0,2,2,0,2,1,3,2,2,1,2,1,0,0,2,2,1,0,2,1,2,2,\r
-0,1,1,0,0,0,0,1,0,1,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-3,3,3,3,2,1,3,3,1,1,3,0,2,3,1,1,3,2,1,1,2,0,2,2,3,2,1,1,1,1,1,2,\r
-3,0,0,1,3,1,2,1,2,0,3,0,0,0,1,0,3,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,\r
-3,3,1,1,3,2,3,3,3,1,3,2,1,3,2,1,3,2,2,2,2,1,3,3,1,2,1,3,1,2,3,0,\r
-2,1,1,3,2,2,2,1,2,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,\r
-3,3,2,3,2,3,3,2,3,2,3,2,3,3,2,1,0,3,2,2,2,1,2,2,2,1,2,2,1,2,1,1,\r
-2,2,2,3,0,1,3,1,1,1,1,0,1,1,0,2,1,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-3,3,3,3,2,3,2,2,1,1,3,2,3,2,3,2,0,3,2,2,1,2,0,2,2,2,1,2,2,2,2,1,\r
-3,2,1,2,2,1,0,2,0,1,0,0,1,1,0,0,0,0,0,1,1,0,1,0,0,0,0,0,0,0,0,1,\r
-3,3,3,3,3,2,3,1,2,3,3,2,2,3,0,1,1,2,0,3,3,2,2,3,0,1,1,3,0,0,0,0,\r
-3,1,0,3,3,0,2,0,2,1,0,0,3,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-3,3,3,2,3,2,3,3,0,1,3,1,1,2,1,2,1,1,3,1,1,0,2,3,1,1,1,1,1,1,1,1,\r
-3,1,1,2,2,2,2,1,1,1,0,0,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,\r
-3,2,2,1,1,2,1,3,3,2,3,2,2,3,2,2,3,1,2,2,1,2,0,3,2,1,2,2,2,2,2,1,\r
-3,2,1,2,2,2,1,1,1,1,0,0,1,1,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-3,3,3,3,3,3,3,3,1,3,3,0,2,1,0,3,2,0,0,3,1,0,1,1,0,1,0,0,0,0,0,1,\r
-1,0,0,1,0,3,2,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-3,0,2,2,2,3,0,0,1,3,0,3,2,0,3,2,2,3,3,3,3,3,1,0,2,2,2,0,2,2,1,2,\r
-0,2,3,0,0,0,0,1,0,1,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,\r
-3,0,2,3,1,3,3,2,3,3,0,3,3,0,3,2,2,3,2,3,3,3,0,0,2,2,3,0,1,1,1,3,\r
-0,0,3,0,0,0,2,2,0,1,3,0,1,2,2,2,3,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,\r
-3,2,3,3,2,0,3,3,2,2,3,1,3,2,1,3,2,0,1,2,2,0,2,3,2,1,0,3,0,0,0,0,\r
-3,0,0,2,3,1,3,0,0,3,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-3,1,3,2,2,2,1,2,0,1,3,1,1,3,1,3,0,0,2,1,1,1,1,2,1,1,1,0,2,1,0,1,\r
-1,2,0,0,0,3,1,1,0,0,0,0,1,0,1,0,0,1,0,1,0,0,0,0,0,3,1,0,0,0,1,0,\r
-3,3,3,3,2,2,2,2,2,1,3,1,1,1,2,0,1,1,2,1,2,1,3,2,0,0,3,1,1,1,1,1,\r
-3,1,0,2,3,0,0,0,3,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-0,0,0,2,3,0,3,3,0,2,0,0,0,0,0,0,0,3,0,0,1,0,0,0,0,0,0,0,0,0,0,0,\r
-0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-0,0,2,3,1,3,0,0,1,2,0,0,2,0,3,3,2,3,3,3,2,3,0,0,2,2,2,0,0,0,2,2,\r
-0,0,1,0,0,0,0,3,0,0,0,0,2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,\r
-0,0,0,3,0,2,0,0,0,0,0,0,0,0,0,0,1,2,3,1,3,3,0,0,1,0,3,0,0,0,0,0,\r
-0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-3,3,1,2,3,1,2,3,1,0,3,0,2,2,1,0,2,1,1,2,0,1,0,0,1,1,1,1,0,1,0,0,\r
-1,0,0,0,0,1,1,0,3,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-3,3,3,3,2,1,0,1,1,1,3,1,2,2,2,2,2,2,1,1,1,1,0,3,1,0,1,3,1,1,1,1,\r
-1,1,0,2,0,1,3,1,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,2,0,1,\r
-3,0,2,2,1,3,3,2,3,3,0,1,1,0,2,2,1,2,1,3,3,1,0,0,3,2,0,0,0,0,2,1,\r
-0,1,0,0,0,0,1,2,0,1,1,3,1,1,2,2,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,\r
-0,0,3,0,0,1,0,0,0,3,0,0,3,0,3,1,0,1,1,1,3,2,0,0,0,3,0,0,0,0,2,0,\r
-0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,2,0,0,0,0,0,0,0,0,0,\r
-3,3,1,3,2,1,3,3,1,2,2,0,1,2,1,0,1,2,0,0,0,0,0,3,0,0,0,3,0,0,0,0,\r
-3,0,0,1,1,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-3,0,1,2,0,3,3,3,2,2,0,1,1,0,1,3,0,0,0,2,2,0,0,0,0,3,1,0,1,0,0,0,\r
-0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-3,0,2,3,1,2,0,0,2,1,0,3,1,0,1,2,0,1,1,1,1,3,0,0,3,1,1,0,2,2,1,1,\r
-0,2,0,0,0,0,0,1,0,1,0,0,1,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-3,0,0,3,1,2,0,0,2,2,0,1,2,0,1,0,1,3,1,2,1,0,0,0,2,0,3,0,0,0,1,0,\r
-0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-3,0,1,1,2,2,0,0,0,2,0,2,1,0,1,1,0,1,1,1,2,1,0,0,1,1,1,0,2,1,1,1,\r
-0,1,1,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,1,\r
-0,0,0,2,0,1,3,1,1,1,1,0,0,0,0,3,2,0,1,0,0,0,1,2,0,0,0,1,0,0,0,0,\r
-0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-0,0,0,0,0,3,3,3,3,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,\r
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-1,0,2,3,2,2,0,0,0,1,0,0,0,0,2,3,2,1,2,2,3,0,0,0,2,3,1,0,0,0,1,1,\r
-0,0,1,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,1,1,0,1,0,0,0,0,0,0,0,0,0,\r
-3,3,2,2,0,1,0,0,0,0,2,0,2,0,1,0,0,0,1,1,0,0,0,2,1,0,1,0,1,1,0,0,\r
-0,1,0,2,0,0,1,0,3,0,1,0,0,0,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-3,3,1,0,0,1,0,0,0,0,0,1,1,2,0,0,0,0,1,0,0,1,3,1,0,0,0,0,1,1,0,0,\r
-0,1,0,0,0,0,3,0,0,0,0,0,0,3,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,\r
-3,3,1,1,1,1,2,3,0,0,2,1,1,1,1,1,0,2,1,1,0,0,0,2,1,0,1,2,1,1,0,1,\r
-2,1,0,3,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-1,3,1,0,0,0,0,0,0,0,3,0,0,0,3,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,1,\r
-0,0,0,2,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-3,3,2,0,0,0,0,0,0,1,2,1,0,1,1,0,2,0,0,1,0,0,2,0,0,0,0,0,0,0,0,0,\r
-0,0,0,0,0,0,2,0,0,0,1,3,0,1,0,0,0,2,0,0,0,0,0,0,0,1,2,0,0,0,0,0,\r
-3,3,0,0,1,1,2,0,0,1,2,1,0,1,1,1,0,1,1,0,0,2,1,1,0,1,0,0,1,1,1,0,\r
-0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-2,2,2,1,0,0,0,0,1,0,0,0,0,3,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,\r
-2,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-2,3,0,0,1,1,0,0,0,2,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-1,1,0,1,2,0,1,2,0,0,1,1,0,2,0,1,0,0,1,0,0,0,0,1,0,0,0,2,0,0,0,0,\r
-1,0,0,1,0,1,1,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-0,1,0,0,0,0,0,0,0,1,1,0,1,1,0,2,1,3,0,0,0,0,1,1,0,0,0,0,0,0,0,3,\r
-1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,\r
-0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-2,0,1,0,1,0,0,2,0,0,2,0,0,1,1,2,0,0,1,1,0,0,0,1,0,0,0,1,1,0,0,0,\r
-1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,\r
-1,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,\r
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,1,1,0,0,0,\r
-2,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,\r
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-2,0,0,0,0,2,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,\r
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,3,0,0,0,\r
-2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,1,0,0,0,0,\r
-1,0,0,0,0,0,0,0,0,1,0,0,0,0,2,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,\r
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-0,0,1,1,0,0,2,1,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\r
-)\r
-\r
-TIS620ThaiModel = {\r
- 'charToOrderMap': TIS620CharToOrderMap,\r
- 'precedenceMatrix': ThaiLangModel,\r
- 'mTypicalPositiveRatio': 0.926386,\r
- 'keepEnglishLetter': False,\r
- 'charsetName': "TIS-620"\r
-}\r
-\r
-# flake8: noqa\r
+######################## BEGIN LICENSE BLOCK ########################
+# The Original Code is Mozilla Communicator client code.
+#
+# The Initial Developer of the Original Code is
+# Netscape Communications Corporation.
+# Portions created by the Initial Developer are Copyright (C) 1998
+# the Initial Developer. All Rights Reserved.
+#
+# Contributor(s):
+# Mark Pilgrim - port to Python
+#
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
+# 02110-1301 USA
+######################### END LICENSE BLOCK #########################
+
+# 255: Control characters that usually does not exist in any text
+# 254: Carriage/Return
+# 253: symbol (punctuation) that does not belong to word
+# 252: 0 - 9
+
+# The following result for thai was collected from a limited sample (1M).
+
+# Character Mapping Table:
+TIS620CharToOrderMap = (
+255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00
+255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10
+253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20
+252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30
+253,182,106,107,100,183,184,185,101, 94,186,187,108,109,110,111, # 40
+188,189,190, 89, 95,112,113,191,192,193,194,253,253,253,253,253, # 50
+253, 64, 72, 73,114, 74,115,116,102, 81,201,117, 90,103, 78, 82, # 60
+ 96,202, 91, 79, 84,104,105, 97, 98, 92,203,253,253,253,253,253, # 70
+209,210,211,212,213, 88,214,215,216,217,218,219,220,118,221,222,
+223,224, 99, 85, 83,225,226,227,228,229,230,231,232,233,234,235,
+236, 5, 30,237, 24,238, 75, 8, 26, 52, 34, 51,119, 47, 58, 57,
+ 49, 53, 55, 43, 20, 19, 44, 14, 48, 3, 17, 25, 39, 62, 31, 54,
+ 45, 9, 16, 2, 61, 15,239, 12, 42, 46, 18, 21, 76, 4, 66, 63,
+ 22, 10, 1, 36, 23, 13, 40, 27, 32, 35, 86,240,241,242,243,244,
+ 11, 28, 41, 29, 33,245, 50, 37, 6, 7, 67, 77, 38, 93,246,247,
+ 68, 56, 59, 65, 69, 60, 70, 80, 71, 87,248,249,250,251,252,253,
+)
+
+# Model Table:
+# total sequences: 100%
+# first 512 sequences: 92.6386%
+# first 1024 sequences:7.3177%
+# rest sequences: 1.0230%
+# negative sequences: 0.0436%
+ThaiLangModel = (
+0,1,3,3,3,3,0,0,3,3,0,3,3,0,3,3,3,3,3,3,3,3,0,0,3,3,3,0,3,3,3,3,
+0,3,3,0,0,0,1,3,0,3,3,2,3,3,0,1,2,3,3,3,3,0,2,0,2,0,0,3,2,1,2,2,
+3,0,3,3,2,3,0,0,3,3,0,3,3,0,3,3,3,3,3,3,3,3,3,0,3,2,3,0,2,2,2,3,
+0,2,3,0,0,0,0,1,0,1,2,3,1,1,3,2,2,0,1,1,0,0,1,0,0,0,0,0,0,0,1,1,
+3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,2,2,2,2,2,2,2,3,3,2,3,2,3,3,2,2,2,
+3,1,2,3,0,3,3,2,2,1,2,3,3,1,2,0,1,3,0,1,0,0,1,0,0,0,0,0,0,0,1,1,
+3,3,2,2,3,3,3,3,1,2,3,3,3,3,3,2,2,2,2,3,3,2,2,3,3,2,2,3,2,3,2,2,
+3,3,1,2,3,1,2,2,3,3,1,0,2,1,0,0,3,1,2,1,0,0,1,0,0,0,0,0,0,1,0,1,
+3,3,3,3,3,3,2,2,3,3,3,3,2,3,2,2,3,3,2,2,3,2,2,2,2,1,1,3,1,2,1,1,
+3,2,1,0,2,1,0,1,0,1,1,0,1,1,0,0,1,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,
+3,3,3,2,3,2,3,3,2,2,3,2,3,3,2,3,1,1,2,3,2,2,2,3,2,2,2,2,2,1,2,1,
+2,2,1,1,3,3,2,1,0,1,2,2,0,1,3,0,0,0,1,1,0,0,0,0,0,2,3,0,0,2,1,1,
+3,3,2,3,3,2,0,0,3,3,0,3,3,0,2,2,3,1,2,2,1,1,1,0,2,2,2,0,2,2,1,1,
+0,2,1,0,2,0,0,2,0,1,0,0,1,0,0,0,1,1,1,1,0,0,0,0,0,0,0,0,0,0,1,0,
+3,3,2,3,3,2,0,0,3,3,0,2,3,0,2,1,2,2,2,2,1,2,0,0,2,2,2,0,2,2,1,1,
+0,2,1,0,2,0,0,2,0,1,1,0,1,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,
+3,3,2,3,2,3,2,0,2,2,1,3,2,1,3,2,1,2,3,2,2,3,0,2,3,2,2,1,2,2,2,2,
+1,2,2,0,0,0,0,2,0,1,2,0,1,1,1,0,1,0,3,1,1,0,0,0,0,0,0,0,0,0,1,0,
+3,3,2,3,3,2,3,2,2,2,3,2,2,3,2,2,1,2,3,2,2,3,1,3,2,2,2,3,2,2,2,3,
+3,2,1,3,0,1,1,1,0,2,1,1,1,1,1,0,1,0,1,1,0,0,0,0,0,0,0,0,0,2,0,0,
+1,0,0,3,0,3,3,3,3,3,0,0,3,0,2,2,3,3,3,3,3,0,0,0,1,1,3,0,0,0,0,2,
+0,0,1,0,0,0,0,0,0,0,2,3,0,0,0,3,0,2,0,0,0,0,0,3,0,0,0,0,0,0,0,0,
+2,0,3,3,3,3,0,0,2,3,0,0,3,0,3,3,2,3,3,3,3,3,0,0,3,3,3,0,0,0,3,3,
+0,0,3,0,0,0,0,2,0,0,2,1,1,3,0,0,1,0,0,2,3,0,1,0,0,0,0,0,0,0,1,0,
+3,3,3,3,2,3,3,3,3,3,3,3,1,2,1,3,3,2,2,1,2,2,2,3,1,1,2,0,2,1,2,1,
+2,2,1,0,0,0,1,1,0,1,0,1,1,0,0,0,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,
+3,0,2,1,2,3,3,3,0,2,0,2,2,0,2,1,3,2,2,1,2,1,0,0,2,2,1,0,2,1,2,2,
+0,1,1,0,0,0,0,1,0,1,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,
+3,3,3,3,2,1,3,3,1,1,3,0,2,3,1,1,3,2,1,1,2,0,2,2,3,2,1,1,1,1,1,2,
+3,0,0,1,3,1,2,1,2,0,3,0,0,0,1,0,3,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,
+3,3,1,1,3,2,3,3,3,1,3,2,1,3,2,1,3,2,2,2,2,1,3,3,1,2,1,3,1,2,3,0,
+2,1,1,3,2,2,2,1,2,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,
+3,3,2,3,2,3,3,2,3,2,3,2,3,3,2,1,0,3,2,2,2,1,2,2,2,1,2,2,1,2,1,1,
+2,2,2,3,0,1,3,1,1,1,1,0,1,1,0,2,1,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,
+3,3,3,3,2,3,2,2,1,1,3,2,3,2,3,2,0,3,2,2,1,2,0,2,2,2,1,2,2,2,2,1,
+3,2,1,2,2,1,0,2,0,1,0,0,1,1,0,0,0,0,0,1,1,0,1,0,0,0,0,0,0,0,0,1,
+3,3,3,3,3,2,3,1,2,3,3,2,2,3,0,1,1,2,0,3,3,2,2,3,0,1,1,3,0,0,0,0,
+3,1,0,3,3,0,2,0,2,1,0,0,3,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+3,3,3,2,3,2,3,3,0,1,3,1,1,2,1,2,1,1,3,1,1,0,2,3,1,1,1,1,1,1,1,1,
+3,1,1,2,2,2,2,1,1,1,0,0,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,
+3,2,2,1,1,2,1,3,3,2,3,2,2,3,2,2,3,1,2,2,1,2,0,3,2,1,2,2,2,2,2,1,
+3,2,1,2,2,2,1,1,1,1,0,0,1,1,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,
+3,3,3,3,3,3,3,3,1,3,3,0,2,1,0,3,2,0,0,3,1,0,1,1,0,1,0,0,0,0,0,1,
+1,0,0,1,0,3,2,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+3,0,2,2,2,3,0,0,1,3,0,3,2,0,3,2,2,3,3,3,3,3,1,0,2,2,2,0,2,2,1,2,
+0,2,3,0,0,0,0,1,0,1,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,
+3,0,2,3,1,3,3,2,3,3,0,3,3,0,3,2,2,3,2,3,3,3,0,0,2,2,3,0,1,1,1,3,
+0,0,3,0,0,0,2,2,0,1,3,0,1,2,2,2,3,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,
+3,2,3,3,2,0,3,3,2,2,3,1,3,2,1,3,2,0,1,2,2,0,2,3,2,1,0,3,0,0,0,0,
+3,0,0,2,3,1,3,0,0,3,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+3,1,3,2,2,2,1,2,0,1,3,1,1,3,1,3,0,0,2,1,1,1,1,2,1,1,1,0,2,1,0,1,
+1,2,0,0,0,3,1,1,0,0,0,0,1,0,1,0,0,1,0,1,0,0,0,0,0,3,1,0,0,0,1,0,
+3,3,3,3,2,2,2,2,2,1,3,1,1,1,2,0,1,1,2,1,2,1,3,2,0,0,3,1,1,1,1,1,
+3,1,0,2,3,0,0,0,3,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,2,3,0,3,3,0,2,0,0,0,0,0,0,0,3,0,0,1,0,0,0,0,0,0,0,0,0,0,0,
+0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,2,3,1,3,0,0,1,2,0,0,2,0,3,3,2,3,3,3,2,3,0,0,2,2,2,0,0,0,2,2,
+0,0,1,0,0,0,0,3,0,0,0,0,2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,
+0,0,0,3,0,2,0,0,0,0,0,0,0,0,0,0,1,2,3,1,3,3,0,0,1,0,3,0,0,0,0,0,
+0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+3,3,1,2,3,1,2,3,1,0,3,0,2,2,1,0,2,1,1,2,0,1,0,0,1,1,1,1,0,1,0,0,
+1,0,0,0,0,1,1,0,3,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+3,3,3,3,2,1,0,1,1,1,3,1,2,2,2,2,2,2,1,1,1,1,0,3,1,0,1,3,1,1,1,1,
+1,1,0,2,0,1,3,1,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,2,0,1,
+3,0,2,2,1,3,3,2,3,3,0,1,1,0,2,2,1,2,1,3,3,1,0,0,3,2,0,0,0,0,2,1,
+0,1,0,0,0,0,1,2,0,1,1,3,1,1,2,2,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,
+0,0,3,0,0,1,0,0,0,3,0,0,3,0,3,1,0,1,1,1,3,2,0,0,0,3,0,0,0,0,2,0,
+0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,2,0,0,0,0,0,0,0,0,0,
+3,3,1,3,2,1,3,3,1,2,2,0,1,2,1,0,1,2,0,0,0,0,0,3,0,0,0,3,0,0,0,0,
+3,0,0,1,1,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+3,0,1,2,0,3,3,3,2,2,0,1,1,0,1,3,0,0,0,2,2,0,0,0,0,3,1,0,1,0,0,0,
+0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+3,0,2,3,1,2,0,0,2,1,0,3,1,0,1,2,0,1,1,1,1,3,0,0,3,1,1,0,2,2,1,1,
+0,2,0,0,0,0,0,1,0,1,0,0,1,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+3,0,0,3,1,2,0,0,2,2,0,1,2,0,1,0,1,3,1,2,1,0,0,0,2,0,3,0,0,0,1,0,
+0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+3,0,1,1,2,2,0,0,0,2,0,2,1,0,1,1,0,1,1,1,2,1,0,0,1,1,1,0,2,1,1,1,
+0,1,1,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,1,
+0,0,0,2,0,1,3,1,1,1,1,0,0,0,0,3,2,0,1,0,0,0,1,2,0,0,0,1,0,0,0,0,
+0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,3,3,3,3,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+1,0,2,3,2,2,0,0,0,1,0,0,0,0,2,3,2,1,2,2,3,0,0,0,2,3,1,0,0,0,1,1,
+0,0,1,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,1,1,0,1,0,0,0,0,0,0,0,0,0,
+3,3,2,2,0,1,0,0,0,0,2,0,2,0,1,0,0,0,1,1,0,0,0,2,1,0,1,0,1,1,0,0,
+0,1,0,2,0,0,1,0,3,0,1,0,0,0,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+3,3,1,0,0,1,0,0,0,0,0,1,1,2,0,0,0,0,1,0,0,1,3,1,0,0,0,0,1,1,0,0,
+0,1,0,0,0,0,3,0,0,0,0,0,0,3,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,
+3,3,1,1,1,1,2,3,0,0,2,1,1,1,1,1,0,2,1,1,0,0,0,2,1,0,1,2,1,1,0,1,
+2,1,0,3,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+1,3,1,0,0,0,0,0,0,0,3,0,0,0,3,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,1,
+0,0,0,2,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+3,3,2,0,0,0,0,0,0,1,2,1,0,1,1,0,2,0,0,1,0,0,2,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,2,0,0,0,1,3,0,1,0,0,0,2,0,0,0,0,0,0,0,1,2,0,0,0,0,0,
+3,3,0,0,1,1,2,0,0,1,2,1,0,1,1,1,0,1,1,0,0,2,1,1,0,1,0,0,1,1,1,0,
+0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,
+2,2,2,1,0,0,0,0,1,0,0,0,0,3,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,
+2,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+2,3,0,0,1,1,0,0,0,2,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+1,1,0,1,2,0,1,2,0,0,1,1,0,2,0,1,0,0,1,0,0,0,0,1,0,0,0,2,0,0,0,0,
+1,0,0,1,0,1,1,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,1,0,0,0,0,0,0,0,1,1,0,1,1,0,2,1,3,0,0,0,0,1,1,0,0,0,0,0,0,0,3,
+1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+2,0,1,0,1,0,0,2,0,0,2,0,0,1,1,2,0,0,1,1,0,0,0,1,0,0,0,1,1,0,0,0,
+1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,
+1,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,1,1,0,0,0,
+2,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+2,0,0,0,0,2,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,3,0,0,0,
+2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,1,0,0,0,0,
+1,0,0,0,0,0,0,0,0,1,0,0,0,0,2,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,1,1,0,0,2,1,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+)
+
+TIS620ThaiModel = {
+ 'charToOrderMap': TIS620CharToOrderMap,
+ 'precedenceMatrix': ThaiLangModel,
+ 'mTypicalPositiveRatio': 0.926386,
+ 'keepEnglishLetter': False,
+ 'charsetName': "TIS-620"
+}
+
+# flake8: noqa
-######################## BEGIN LICENSE BLOCK ########################\r
-# The Original Code is Mozilla Universal charset detector code.\r
-#\r
-# The Initial Developer of the Original Code is\r
-# Netscape Communications Corporation.\r
-# Portions created by the Initial Developer are Copyright (C) 2001\r
-# the Initial Developer. All Rights Reserved.\r
-#\r
-# Contributor(s):\r
-# Mark Pilgrim - port to Python\r
-# Shy Shalom - original C code\r
-#\r
-# This library is free software; you can redistribute it and/or\r
-# modify it under the terms of the GNU Lesser General Public\r
-# License as published by the Free Software Foundation; either\r
-# version 2.1 of the License, or (at your option) any later version.\r
-#\r
-# This library is distributed in the hope that it will be useful,\r
-# but WITHOUT ANY WARRANTY; without even the implied warranty of\r
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU\r
-# Lesser General Public License for more details.\r
-#\r
-# You should have received a copy of the GNU Lesser General Public\r
-# License along with this library; if not, write to the Free Software\r
-# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA\r
-# 02110-1301 USA\r
-######################### END LICENSE BLOCK #########################\r
-\r
-from .charsetprober import CharSetProber\r
-from .constants import eNotMe\r
-from .compat import wrap_ord\r
-\r
-FREQ_CAT_NUM = 4\r
-\r
-UDF = 0 # undefined\r
-OTH = 1 # other\r
-ASC = 2 # ascii capital letter\r
-ASS = 3 # ascii small letter\r
-ACV = 4 # accent capital vowel\r
-ACO = 5 # accent capital other\r
-ASV = 6 # accent small vowel\r
-ASO = 7 # accent small other\r
-CLASS_NUM = 8 # total classes\r
-\r
-Latin1_CharToClass = (\r
- OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 00 - 07\r
- OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 08 - 0F\r
- OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 10 - 17\r
- OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 18 - 1F\r
- OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 20 - 27\r
- OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 28 - 2F\r
- OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 30 - 37\r
- OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 38 - 3F\r
- OTH, ASC, ASC, ASC, ASC, ASC, ASC, ASC, # 40 - 47\r
- ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC, # 48 - 4F\r
- ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC, # 50 - 57\r
- ASC, ASC, ASC, OTH, OTH, OTH, OTH, OTH, # 58 - 5F\r
- OTH, ASS, ASS, ASS, ASS, ASS, ASS, ASS, # 60 - 67\r
- ASS, ASS, ASS, ASS, ASS, ASS, ASS, ASS, # 68 - 6F\r
- ASS, ASS, ASS, ASS, ASS, ASS, ASS, ASS, # 70 - 77\r
- ASS, ASS, ASS, OTH, OTH, OTH, OTH, OTH, # 78 - 7F\r
- OTH, UDF, OTH, ASO, OTH, OTH, OTH, OTH, # 80 - 87\r
- OTH, OTH, ACO, OTH, ACO, UDF, ACO, UDF, # 88 - 8F\r
- UDF, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 90 - 97\r
- OTH, OTH, ASO, OTH, ASO, UDF, ASO, ACO, # 98 - 9F\r
- OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # A0 - A7\r
- OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # A8 - AF\r
- OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # B0 - B7\r
- OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # B8 - BF\r
- ACV, ACV, ACV, ACV, ACV, ACV, ACO, ACO, # C0 - C7\r
- ACV, ACV, ACV, ACV, ACV, ACV, ACV, ACV, # C8 - CF\r
- ACO, ACO, ACV, ACV, ACV, ACV, ACV, OTH, # D0 - D7\r
- ACV, ACV, ACV, ACV, ACV, ACO, ACO, ACO, # D8 - DF\r
- ASV, ASV, ASV, ASV, ASV, ASV, ASO, ASO, # E0 - E7\r
- ASV, ASV, ASV, ASV, ASV, ASV, ASV, ASV, # E8 - EF\r
- ASO, ASO, ASV, ASV, ASV, ASV, ASV, OTH, # F0 - F7\r
- ASV, ASV, ASV, ASV, ASV, ASO, ASO, ASO, # F8 - FF\r
-)\r
-\r
-# 0 : illegal\r
-# 1 : very unlikely\r
-# 2 : normal\r
-# 3 : very likely\r
-Latin1ClassModel = (\r
- # UDF OTH ASC ASS ACV ACO ASV ASO\r
- 0, 0, 0, 0, 0, 0, 0, 0, # UDF\r
- 0, 3, 3, 3, 3, 3, 3, 3, # OTH\r
- 0, 3, 3, 3, 3, 3, 3, 3, # ASC\r
- 0, 3, 3, 3, 1, 1, 3, 3, # ASS\r
- 0, 3, 3, 3, 1, 2, 1, 2, # ACV\r
- 0, 3, 3, 3, 3, 3, 3, 3, # ACO\r
- 0, 3, 1, 3, 1, 1, 1, 3, # ASV\r
- 0, 3, 1, 3, 1, 1, 3, 3, # ASO\r
-)\r
-\r
-\r
-class Latin1Prober(CharSetProber):\r
- def __init__(self):\r
- CharSetProber.__init__(self)\r
- self.reset()\r
-\r
- def reset(self):\r
- self._mLastCharClass = OTH\r
- self._mFreqCounter = [0] * FREQ_CAT_NUM\r
- CharSetProber.reset(self)\r
-\r
- def get_charset_name(self):\r
- return "windows-1252"\r
-\r
- def feed(self, aBuf):\r
- aBuf = self.filter_with_english_letters(aBuf)\r
- for c in aBuf:\r
- charClass = Latin1_CharToClass[wrap_ord(c)]\r
- freq = Latin1ClassModel[(self._mLastCharClass * CLASS_NUM)\r
- + charClass]\r
- if freq == 0:\r
- self._mState = eNotMe\r
- break\r
- self._mFreqCounter[freq] += 1\r
- self._mLastCharClass = charClass\r
-\r
- return self.get_state()\r
-\r
- def get_confidence(self):\r
- if self.get_state() == eNotMe:\r
- return 0.01\r
-\r
- total = sum(self._mFreqCounter)\r
- if total < 0.01:\r
- confidence = 0.0\r
- else:\r
- confidence = ((float(self._mFreqCounter[3]) / total)\r
- - (self._mFreqCounter[1] * 20.0 / total))\r
- if confidence < 0.0:\r
- confidence = 0.0\r
- # lower the confidence of latin1 so that other more accurate\r
- # detector can take priority.\r
- confidence = confidence * 0.5\r
- return confidence\r
+######################## BEGIN LICENSE BLOCK ########################
+# The Original Code is Mozilla Universal charset detector code.
+#
+# The Initial Developer of the Original Code is
+# Netscape Communications Corporation.
+# Portions created by the Initial Developer are Copyright (C) 2001
+# the Initial Developer. All Rights Reserved.
+#
+# Contributor(s):
+# Mark Pilgrim - port to Python
+# Shy Shalom - original C code
+#
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
+# 02110-1301 USA
+######################### END LICENSE BLOCK #########################
+
+from .charsetprober import CharSetProber
+from .constants import eNotMe
+from .compat import wrap_ord
+
+FREQ_CAT_NUM = 4
+
+UDF = 0 # undefined
+OTH = 1 # other
+ASC = 2 # ascii capital letter
+ASS = 3 # ascii small letter
+ACV = 4 # accent capital vowel
+ACO = 5 # accent capital other
+ASV = 6 # accent small vowel
+ASO = 7 # accent small other
+CLASS_NUM = 8 # total classes
+
+Latin1_CharToClass = (
+ OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 00 - 07
+ OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 08 - 0F
+ OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 10 - 17
+ OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 18 - 1F
+ OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 20 - 27
+ OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 28 - 2F
+ OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 30 - 37
+ OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 38 - 3F
+ OTH, ASC, ASC, ASC, ASC, ASC, ASC, ASC, # 40 - 47
+ ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC, # 48 - 4F
+ ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC, # 50 - 57
+ ASC, ASC, ASC, OTH, OTH, OTH, OTH, OTH, # 58 - 5F
+ OTH, ASS, ASS, ASS, ASS, ASS, ASS, ASS, # 60 - 67
+ ASS, ASS, ASS, ASS, ASS, ASS, ASS, ASS, # 68 - 6F
+ ASS, ASS, ASS, ASS, ASS, ASS, ASS, ASS, # 70 - 77
+ ASS, ASS, ASS, OTH, OTH, OTH, OTH, OTH, # 78 - 7F
+ OTH, UDF, OTH, ASO, OTH, OTH, OTH, OTH, # 80 - 87
+ OTH, OTH, ACO, OTH, ACO, UDF, ACO, UDF, # 88 - 8F
+ UDF, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 90 - 97
+ OTH, OTH, ASO, OTH, ASO, UDF, ASO, ACO, # 98 - 9F
+ OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # A0 - A7
+ OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # A8 - AF
+ OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # B0 - B7
+ OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # B8 - BF
+ ACV, ACV, ACV, ACV, ACV, ACV, ACO, ACO, # C0 - C7
+ ACV, ACV, ACV, ACV, ACV, ACV, ACV, ACV, # C8 - CF
+ ACO, ACO, ACV, ACV, ACV, ACV, ACV, OTH, # D0 - D7
+ ACV, ACV, ACV, ACV, ACV, ACO, ACO, ACO, # D8 - DF
+ ASV, ASV, ASV, ASV, ASV, ASV, ASO, ASO, # E0 - E7
+ ASV, ASV, ASV, ASV, ASV, ASV, ASV, ASV, # E8 - EF
+ ASO, ASO, ASV, ASV, ASV, ASV, ASV, OTH, # F0 - F7
+ ASV, ASV, ASV, ASV, ASV, ASO, ASO, ASO, # F8 - FF
+)
+
+# 0 : illegal
+# 1 : very unlikely
+# 2 : normal
+# 3 : very likely
+Latin1ClassModel = (
+ # UDF OTH ASC ASS ACV ACO ASV ASO
+ 0, 0, 0, 0, 0, 0, 0, 0, # UDF
+ 0, 3, 3, 3, 3, 3, 3, 3, # OTH
+ 0, 3, 3, 3, 3, 3, 3, 3, # ASC
+ 0, 3, 3, 3, 1, 1, 3, 3, # ASS
+ 0, 3, 3, 3, 1, 2, 1, 2, # ACV
+ 0, 3, 3, 3, 3, 3, 3, 3, # ACO
+ 0, 3, 1, 3, 1, 1, 1, 3, # ASV
+ 0, 3, 1, 3, 1, 1, 3, 3, # ASO
+)
+
+
+class Latin1Prober(CharSetProber):
+ def __init__(self):
+ CharSetProber.__init__(self)
+ self.reset()
+
+ def reset(self):
+ self._mLastCharClass = OTH
+ self._mFreqCounter = [0] * FREQ_CAT_NUM
+ CharSetProber.reset(self)
+
+ def get_charset_name(self):
+ return "windows-1252"
+
+ def feed(self, aBuf):
+ aBuf = self.filter_with_english_letters(aBuf)
+ for c in aBuf:
+ charClass = Latin1_CharToClass[wrap_ord(c)]
+ freq = Latin1ClassModel[(self._mLastCharClass * CLASS_NUM)
+ + charClass]
+ if freq == 0:
+ self._mState = eNotMe
+ break
+ self._mFreqCounter[freq] += 1
+ self._mLastCharClass = charClass
+
+ return self.get_state()
+
+ def get_confidence(self):
+ if self.get_state() == eNotMe:
+ return 0.01
+
+ total = sum(self._mFreqCounter)
+ if total < 0.01:
+ confidence = 0.0
+ else:
+ confidence = ((self._mFreqCounter[3] / total)
+ - (self._mFreqCounter[1] * 20.0 / total))
+ if confidence < 0.0:
+ confidence = 0.0
+ # lower the confidence of latin1 so that other more accurate
+ # detector can take priority.
+ confidence = confidence * 0.5
+ return confidence
-######################## BEGIN LICENSE BLOCK ########################\r
-# The Original Code is Mozilla Universal charset detector code.\r
-#\r
-# The Initial Developer of the Original Code is\r
-# Netscape Communications Corporation.\r
-# Portions created by the Initial Developer are Copyright (C) 2001\r
-# the Initial Developer. All Rights Reserved.\r
-#\r
-# Contributor(s):\r
-# Mark Pilgrim - port to Python\r
-# Shy Shalom - original C code\r
-# Proofpoint, Inc.\r
-#\r
-# This library is free software; you can redistribute it and/or\r
-# modify it under the terms of the GNU Lesser General Public\r
-# License as published by the Free Software Foundation; either\r
-# version 2.1 of the License, or (at your option) any later version.\r
-#\r
-# This library is distributed in the hope that it will be useful,\r
-# but WITHOUT ANY WARRANTY; without even the implied warranty of\r
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU\r
-# Lesser General Public License for more details.\r
-#\r
-# You should have received a copy of the GNU Lesser General Public\r
-# License along with this library; if not, write to the Free Software\r
-# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA\r
-# 02110-1301 USA\r
-######################### END LICENSE BLOCK #########################\r
-\r
-import sys\r
-from . import constants\r
-from .charsetprober import CharSetProber\r
-\r
-\r
-class MultiByteCharSetProber(CharSetProber):\r
- def __init__(self):\r
- CharSetProber.__init__(self)\r
- self._mDistributionAnalyzer = None\r
- self._mCodingSM = None\r
- self._mLastChar = [0, 0]\r
-\r
- def reset(self):\r
- CharSetProber.reset(self)\r
- if self._mCodingSM:\r
- self._mCodingSM.reset()\r
- if self._mDistributionAnalyzer:\r
- self._mDistributionAnalyzer.reset()\r
- self._mLastChar = [0, 0]\r
-\r
- def get_charset_name(self):\r
- pass\r
-\r
- def feed(self, aBuf):\r
- aLen = len(aBuf)\r
- for i in range(0, aLen):\r
- codingState = self._mCodingSM.next_state(aBuf[i])\r
- if codingState == constants.eError:\r
- if constants._debug:\r
- sys.stderr.write(self.get_charset_name()\r
- + ' prober hit error at byte ' + str(i)\r
- + '\n')\r
- self._mState = constants.eNotMe\r
- break\r
- elif codingState == constants.eItsMe:\r
- self._mState = constants.eFoundIt\r
- break\r
- elif codingState == constants.eStart:\r
- charLen = self._mCodingSM.get_current_charlen()\r
- if i == 0:\r
- self._mLastChar[1] = aBuf[0]\r
- self._mDistributionAnalyzer.feed(self._mLastChar, charLen)\r
- else:\r
- self._mDistributionAnalyzer.feed(aBuf[i - 1:i + 1],\r
- charLen)\r
-\r
- self._mLastChar[0] = aBuf[aLen - 1]\r
-\r
- if self.get_state() == constants.eDetecting:\r
- if (self._mDistributionAnalyzer.got_enough_data() and\r
- (self.get_confidence() > constants.SHORTCUT_THRESHOLD)):\r
- self._mState = constants.eFoundIt\r
-\r
- return self.get_state()\r
-\r
- def get_confidence(self):\r
- return self._mDistributionAnalyzer.get_confidence()\r
+######################## BEGIN LICENSE BLOCK ########################
+# The Original Code is Mozilla Universal charset detector code.
+#
+# The Initial Developer of the Original Code is
+# Netscape Communications Corporation.
+# Portions created by the Initial Developer are Copyright (C) 2001
+# the Initial Developer. All Rights Reserved.
+#
+# Contributor(s):
+# Mark Pilgrim - port to Python
+# Shy Shalom - original C code
+# Proofpoint, Inc.
+#
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
+# 02110-1301 USA
+######################### END LICENSE BLOCK #########################
+
+import sys
+from . import constants
+from .charsetprober import CharSetProber
+
+
+class MultiByteCharSetProber(CharSetProber):
+ def __init__(self):
+ CharSetProber.__init__(self)
+ self._mDistributionAnalyzer = None
+ self._mCodingSM = None
+ self._mLastChar = [0, 0]
+
+ def reset(self):
+ CharSetProber.reset(self)
+ if self._mCodingSM:
+ self._mCodingSM.reset()
+ if self._mDistributionAnalyzer:
+ self._mDistributionAnalyzer.reset()
+ self._mLastChar = [0, 0]
+
+ def get_charset_name(self):
+ pass
+
+ def feed(self, aBuf):
+ aLen = len(aBuf)
+ for i in range(0, aLen):
+ codingState = self._mCodingSM.next_state(aBuf[i])
+ if codingState == constants.eError:
+ if constants._debug:
+ sys.stderr.write(self.get_charset_name()
+ + ' prober hit error at byte ' + str(i)
+ + '\n')
+ self._mState = constants.eNotMe
+ break
+ elif codingState == constants.eItsMe:
+ self._mState = constants.eFoundIt
+ break
+ elif codingState == constants.eStart:
+ charLen = self._mCodingSM.get_current_charlen()
+ if i == 0:
+ self._mLastChar[1] = aBuf[0]
+ self._mDistributionAnalyzer.feed(self._mLastChar, charLen)
+ else:
+ self._mDistributionAnalyzer.feed(aBuf[i - 1:i + 1],
+ charLen)
+
+ self._mLastChar[0] = aBuf[aLen - 1]
+
+ if self.get_state() == constants.eDetecting:
+ if (self._mDistributionAnalyzer.got_enough_data() and
+ (self.get_confidence() > constants.SHORTCUT_THRESHOLD)):
+ self._mState = constants.eFoundIt
+
+ return self.get_state()
+
+ def get_confidence(self):
+ return self._mDistributionAnalyzer.get_confidence()
-######################## BEGIN LICENSE BLOCK ########################\r
-# The Original Code is Mozilla Universal charset detector code.\r
-#\r
-# The Initial Developer of the Original Code is\r
-# Netscape Communications Corporation.\r
-# Portions created by the Initial Developer are Copyright (C) 2001\r
-# the Initial Developer. All Rights Reserved.\r
-#\r
-# Contributor(s):\r
-# Mark Pilgrim - port to Python\r
-# Shy Shalom - original C code\r
-# Proofpoint, Inc.\r
-#\r
-# This library is free software; you can redistribute it and/or\r
-# modify it under the terms of the GNU Lesser General Public\r
-# License as published by the Free Software Foundation; either\r
-# version 2.1 of the License, or (at your option) any later version.\r
-#\r
-# This library is distributed in the hope that it will be useful,\r
-# but WITHOUT ANY WARRANTY; without even the implied warranty of\r
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU\r
-# Lesser General Public License for more details.\r
-#\r
-# You should have received a copy of the GNU Lesser General Public\r
-# License along with this library; if not, write to the Free Software\r
-# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA\r
-# 02110-1301 USA\r
-######################### END LICENSE BLOCK #########################\r
-\r
-from .charsetgroupprober import CharSetGroupProber\r
-from .utf8prober import UTF8Prober\r
-from .sjisprober import SJISProber\r
-from .eucjpprober import EUCJPProber\r
-from .gb2312prober import GB2312Prober\r
-from .euckrprober import EUCKRProber\r
-from .cp949prober import CP949Prober\r
-from .big5prober import Big5Prober\r
-from .euctwprober import EUCTWProber\r
-\r
-\r
-class MBCSGroupProber(CharSetGroupProber):\r
- def __init__(self):\r
- CharSetGroupProber.__init__(self)\r
- self._mProbers = [\r
- UTF8Prober(),\r
- SJISProber(),\r
- EUCJPProber(),\r
- GB2312Prober(),\r
- EUCKRProber(),\r
- CP949Prober(),\r
- Big5Prober(),\r
- EUCTWProber()\r
- ]\r
- self.reset()\r
+######################## BEGIN LICENSE BLOCK ########################
+# The Original Code is Mozilla Universal charset detector code.
+#
+# The Initial Developer of the Original Code is
+# Netscape Communications Corporation.
+# Portions created by the Initial Developer are Copyright (C) 2001
+# the Initial Developer. All Rights Reserved.
+#
+# Contributor(s):
+# Mark Pilgrim - port to Python
+# Shy Shalom - original C code
+# Proofpoint, Inc.
+#
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
+# 02110-1301 USA
+######################### END LICENSE BLOCK #########################
+
+from .charsetgroupprober import CharSetGroupProber
+from .utf8prober import UTF8Prober
+from .sjisprober import SJISProber
+from .eucjpprober import EUCJPProber
+from .gb2312prober import GB2312Prober
+from .euckrprober import EUCKRProber
+from .cp949prober import CP949Prober
+from .big5prober import Big5Prober
+from .euctwprober import EUCTWProber
+
+
+class MBCSGroupProber(CharSetGroupProber):
+ def __init__(self):
+ CharSetGroupProber.__init__(self)
+ self._mProbers = [
+ UTF8Prober(),
+ SJISProber(),
+ EUCJPProber(),
+ GB2312Prober(),
+ EUCKRProber(),
+ CP949Prober(),
+ Big5Prober(),
+ EUCTWProber()
+ ]
+ self.reset()
-######################## BEGIN LICENSE BLOCK ########################\r
-# The Original Code is mozilla.org code.\r
-#\r
-# The Initial Developer of the Original Code is\r
-# Netscape Communications Corporation.\r
-# Portions created by the Initial Developer are Copyright (C) 1998\r
-# the Initial Developer. All Rights Reserved.\r
-#\r
-# Contributor(s):\r
-# Mark Pilgrim - port to Python\r
-#\r
-# This library is free software; you can redistribute it and/or\r
-# modify it under the terms of the GNU Lesser General Public\r
-# License as published by the Free Software Foundation; either\r
-# version 2.1 of the License, or (at your option) any later version.\r
-#\r
-# This library is distributed in the hope that it will be useful,\r
-# but WITHOUT ANY WARRANTY; without even the implied warranty of\r
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU\r
-# Lesser General Public License for more details.\r
-#\r
-# You should have received a copy of the GNU Lesser General Public\r
-# License along with this library; if not, write to the Free Software\r
-# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA\r
-# 02110-1301 USA\r
-######################### END LICENSE BLOCK #########################\r
-\r
-from .constants import eStart, eError, eItsMe\r
-\r
-# BIG5\r
-\r
-BIG5_cls = (\r
- 1,1,1,1,1,1,1,1, # 00 - 07 #allow 0x00 as legal value\r
- 1,1,1,1,1,1,0,0, # 08 - 0f\r
- 1,1,1,1,1,1,1,1, # 10 - 17\r
- 1,1,1,0,1,1,1,1, # 18 - 1f\r
- 1,1,1,1,1,1,1,1, # 20 - 27\r
- 1,1,1,1,1,1,1,1, # 28 - 2f\r
- 1,1,1,1,1,1,1,1, # 30 - 37\r
- 1,1,1,1,1,1,1,1, # 38 - 3f\r
- 2,2,2,2,2,2,2,2, # 40 - 47\r
- 2,2,2,2,2,2,2,2, # 48 - 4f\r
- 2,2,2,2,2,2,2,2, # 50 - 57\r
- 2,2,2,2,2,2,2,2, # 58 - 5f\r
- 2,2,2,2,2,2,2,2, # 60 - 67\r
- 2,2,2,2,2,2,2,2, # 68 - 6f\r
- 2,2,2,2,2,2,2,2, # 70 - 77\r
- 2,2,2,2,2,2,2,1, # 78 - 7f\r
- 4,4,4,4,4,4,4,4, # 80 - 87\r
- 4,4,4,4,4,4,4,4, # 88 - 8f\r
- 4,4,4,4,4,4,4,4, # 90 - 97\r
- 4,4,4,4,4,4,4,4, # 98 - 9f\r
- 4,3,3,3,3,3,3,3, # a0 - a7\r
- 3,3,3,3,3,3,3,3, # a8 - af\r
- 3,3,3,3,3,3,3,3, # b0 - b7\r
- 3,3,3,3,3,3,3,3, # b8 - bf\r
- 3,3,3,3,3,3,3,3, # c0 - c7\r
- 3,3,3,3,3,3,3,3, # c8 - cf\r
- 3,3,3,3,3,3,3,3, # d0 - d7\r
- 3,3,3,3,3,3,3,3, # d8 - df\r
- 3,3,3,3,3,3,3,3, # e0 - e7\r
- 3,3,3,3,3,3,3,3, # e8 - ef\r
- 3,3,3,3,3,3,3,3, # f0 - f7\r
- 3,3,3,3,3,3,3,0 # f8 - ff\r
-)\r
-\r
-BIG5_st = (\r
- eError,eStart,eStart, 3,eError,eError,eError,eError,#00-07\r
- eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eError,#08-0f\r
- eError,eStart,eStart,eStart,eStart,eStart,eStart,eStart#10-17\r
-)\r
-\r
-Big5CharLenTable = (0, 1, 1, 2, 0)\r
-\r
-Big5SMModel = {'classTable': BIG5_cls,\r
- 'classFactor': 5,\r
- 'stateTable': BIG5_st,\r
- 'charLenTable': Big5CharLenTable,\r
- 'name': 'Big5'}\r
-\r
-# CP949\r
-\r
-CP949_cls = (\r
- 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,0,0, # 00 - 0f\r
- 1,1,1,1,1,1,1,1, 1,1,1,0,1,1,1,1, # 10 - 1f\r
- 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, # 20 - 2f\r
- 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, # 30 - 3f\r
- 1,4,4,4,4,4,4,4, 4,4,4,4,4,4,4,4, # 40 - 4f\r
- 4,4,5,5,5,5,5,5, 5,5,5,1,1,1,1,1, # 50 - 5f\r
- 1,5,5,5,5,5,5,5, 5,5,5,5,5,5,5,5, # 60 - 6f\r
- 5,5,5,5,5,5,5,5, 5,5,5,1,1,1,1,1, # 70 - 7f\r
- 0,6,6,6,6,6,6,6, 6,6,6,6,6,6,6,6, # 80 - 8f\r
- 6,6,6,6,6,6,6,6, 6,6,6,6,6,6,6,6, # 90 - 9f\r
- 6,7,7,7,7,7,7,7, 7,7,7,7,7,8,8,8, # a0 - af\r
- 7,7,7,7,7,7,7,7, 7,7,7,7,7,7,7,7, # b0 - bf\r
- 7,7,7,7,7,7,9,2, 2,3,2,2,2,2,2,2, # c0 - cf\r
- 2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2, # d0 - df\r
- 2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2, # e0 - ef\r
- 2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,0, # f0 - ff\r
-)\r
-\r
-CP949_st = (\r
-#cls= 0 1 2 3 4 5 6 7 8 9 # previous state =\r
- eError,eStart, 3,eError,eStart,eStart, 4, 5,eError, 6, # eStart\r
- eError,eError,eError,eError,eError,eError,eError,eError,eError,eError, # eError\r
- eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe, # eItsMe\r
- eError,eError,eStart,eStart,eError,eError,eError,eStart,eStart,eStart, # 3\r
- eError,eError,eStart,eStart,eStart,eStart,eStart,eStart,eStart,eStart, # 4\r
- eError,eStart,eStart,eStart,eStart,eStart,eStart,eStart,eStart,eStart, # 5\r
- eError,eStart,eStart,eStart,eStart,eError,eError,eStart,eStart,eStart, # 6\r
-)\r
-\r
-CP949CharLenTable = (0, 1, 2, 0, 1, 1, 2, 2, 0, 2)\r
-\r
-CP949SMModel = {'classTable': CP949_cls,\r
- 'classFactor': 10,\r
- 'stateTable': CP949_st,\r
- 'charLenTable': CP949CharLenTable,\r
- 'name': 'CP949'}\r
-\r
-# EUC-JP\r
-\r
-EUCJP_cls = (\r
- 4,4,4,4,4,4,4,4, # 00 - 07\r
- 4,4,4,4,4,4,5,5, # 08 - 0f\r
- 4,4,4,4,4,4,4,4, # 10 - 17\r
- 4,4,4,5,4,4,4,4, # 18 - 1f\r
- 4,4,4,4,4,4,4,4, # 20 - 27\r
- 4,4,4,4,4,4,4,4, # 28 - 2f\r
- 4,4,4,4,4,4,4,4, # 30 - 37\r
- 4,4,4,4,4,4,4,4, # 38 - 3f\r
- 4,4,4,4,4,4,4,4, # 40 - 47\r
- 4,4,4,4,4,4,4,4, # 48 - 4f\r
- 4,4,4,4,4,4,4,4, # 50 - 57\r
- 4,4,4,4,4,4,4,4, # 58 - 5f\r
- 4,4,4,4,4,4,4,4, # 60 - 67\r
- 4,4,4,4,4,4,4,4, # 68 - 6f\r
- 4,4,4,4,4,4,4,4, # 70 - 77\r
- 4,4,4,4,4,4,4,4, # 78 - 7f\r
- 5,5,5,5,5,5,5,5, # 80 - 87\r
- 5,5,5,5,5,5,1,3, # 88 - 8f\r
- 5,5,5,5,5,5,5,5, # 90 - 97\r
- 5,5,5,5,5,5,5,5, # 98 - 9f\r
- 5,2,2,2,2,2,2,2, # a0 - a7\r
- 2,2,2,2,2,2,2,2, # a8 - af\r
- 2,2,2,2,2,2,2,2, # b0 - b7\r
- 2,2,2,2,2,2,2,2, # b8 - bf\r
- 2,2,2,2,2,2,2,2, # c0 - c7\r
- 2,2,2,2,2,2,2,2, # c8 - cf\r
- 2,2,2,2,2,2,2,2, # d0 - d7\r
- 2,2,2,2,2,2,2,2, # d8 - df\r
- 0,0,0,0,0,0,0,0, # e0 - e7\r
- 0,0,0,0,0,0,0,0, # e8 - ef\r
- 0,0,0,0,0,0,0,0, # f0 - f7\r
- 0,0,0,0,0,0,0,5 # f8 - ff\r
-)\r
-\r
-EUCJP_st = (\r
- 3, 4, 3, 5,eStart,eError,eError,eError,#00-07\r
- eError,eError,eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,#08-0f\r
- eItsMe,eItsMe,eStart,eError,eStart,eError,eError,eError,#10-17\r
- eError,eError,eStart,eError,eError,eError, 3,eError,#18-1f\r
- 3,eError,eError,eError,eStart,eStart,eStart,eStart#20-27\r
-)\r
-\r
-EUCJPCharLenTable = (2, 2, 2, 3, 1, 0)\r
-\r
-EUCJPSMModel = {'classTable': EUCJP_cls,\r
- 'classFactor': 6,\r
- 'stateTable': EUCJP_st,\r
- 'charLenTable': EUCJPCharLenTable,\r
- 'name': 'EUC-JP'}\r
-\r
-# EUC-KR\r
-\r
-EUCKR_cls = (\r
- 1,1,1,1,1,1,1,1, # 00 - 07\r
- 1,1,1,1,1,1,0,0, # 08 - 0f\r
- 1,1,1,1,1,1,1,1, # 10 - 17\r
- 1,1,1,0,1,1,1,1, # 18 - 1f\r
- 1,1,1,1,1,1,1,1, # 20 - 27\r
- 1,1,1,1,1,1,1,1, # 28 - 2f\r
- 1,1,1,1,1,1,1,1, # 30 - 37\r
- 1,1,1,1,1,1,1,1, # 38 - 3f\r
- 1,1,1,1,1,1,1,1, # 40 - 47\r
- 1,1,1,1,1,1,1,1, # 48 - 4f\r
- 1,1,1,1,1,1,1,1, # 50 - 57\r
- 1,1,1,1,1,1,1,1, # 58 - 5f\r
- 1,1,1,1,1,1,1,1, # 60 - 67\r
- 1,1,1,1,1,1,1,1, # 68 - 6f\r
- 1,1,1,1,1,1,1,1, # 70 - 77\r
- 1,1,1,1,1,1,1,1, # 78 - 7f\r
- 0,0,0,0,0,0,0,0, # 80 - 87\r
- 0,0,0,0,0,0,0,0, # 88 - 8f\r
- 0,0,0,0,0,0,0,0, # 90 - 97\r
- 0,0,0,0,0,0,0,0, # 98 - 9f\r
- 0,2,2,2,2,2,2,2, # a0 - a7\r
- 2,2,2,2,2,3,3,3, # a8 - af\r
- 2,2,2,2,2,2,2,2, # b0 - b7\r
- 2,2,2,2,2,2,2,2, # b8 - bf\r
- 2,2,2,2,2,2,2,2, # c0 - c7\r
- 2,3,2,2,2,2,2,2, # c8 - cf\r
- 2,2,2,2,2,2,2,2, # d0 - d7\r
- 2,2,2,2,2,2,2,2, # d8 - df\r
- 2,2,2,2,2,2,2,2, # e0 - e7\r
- 2,2,2,2,2,2,2,2, # e8 - ef\r
- 2,2,2,2,2,2,2,2, # f0 - f7\r
- 2,2,2,2,2,2,2,0 # f8 - ff\r
-)\r
-\r
-EUCKR_st = (\r
- eError,eStart, 3,eError,eError,eError,eError,eError,#00-07\r
- eItsMe,eItsMe,eItsMe,eItsMe,eError,eError,eStart,eStart #08-0f\r
-)\r
-\r
-EUCKRCharLenTable = (0, 1, 2, 0)\r
-\r
-EUCKRSMModel = {'classTable': EUCKR_cls,\r
- 'classFactor': 4,\r
- 'stateTable': EUCKR_st,\r
- 'charLenTable': EUCKRCharLenTable,\r
- 'name': 'EUC-KR'}\r
-\r
-# EUC-TW\r
-\r
-EUCTW_cls = (\r
- 2,2,2,2,2,2,2,2, # 00 - 07\r
- 2,2,2,2,2,2,0,0, # 08 - 0f\r
- 2,2,2,2,2,2,2,2, # 10 - 17\r
- 2,2,2,0,2,2,2,2, # 18 - 1f\r
- 2,2,2,2,2,2,2,2, # 20 - 27\r
- 2,2,2,2,2,2,2,2, # 28 - 2f\r
- 2,2,2,2,2,2,2,2, # 30 - 37\r
- 2,2,2,2,2,2,2,2, # 38 - 3f\r
- 2,2,2,2,2,2,2,2, # 40 - 47\r
- 2,2,2,2,2,2,2,2, # 48 - 4f\r
- 2,2,2,2,2,2,2,2, # 50 - 57\r
- 2,2,2,2,2,2,2,2, # 58 - 5f\r
- 2,2,2,2,2,2,2,2, # 60 - 67\r
- 2,2,2,2,2,2,2,2, # 68 - 6f\r
- 2,2,2,2,2,2,2,2, # 70 - 77\r
- 2,2,2,2,2,2,2,2, # 78 - 7f\r
- 0,0,0,0,0,0,0,0, # 80 - 87\r
- 0,0,0,0,0,0,6,0, # 88 - 8f\r
- 0,0,0,0,0,0,0,0, # 90 - 97\r
- 0,0,0,0,0,0,0,0, # 98 - 9f\r
- 0,3,4,4,4,4,4,4, # a0 - a7\r
- 5,5,1,1,1,1,1,1, # a8 - af\r
- 1,1,1,1,1,1,1,1, # b0 - b7\r
- 1,1,1,1,1,1,1,1, # b8 - bf\r
- 1,1,3,1,3,3,3,3, # c0 - c7\r
- 3,3,3,3,3,3,3,3, # c8 - cf\r
- 3,3,3,3,3,3,3,3, # d0 - d7\r
- 3,3,3,3,3,3,3,3, # d8 - df\r
- 3,3,3,3,3,3,3,3, # e0 - e7\r
- 3,3,3,3,3,3,3,3, # e8 - ef\r
- 3,3,3,3,3,3,3,3, # f0 - f7\r
- 3,3,3,3,3,3,3,0 # f8 - ff\r
-)\r
-\r
-EUCTW_st = (\r
- eError,eError,eStart, 3, 3, 3, 4,eError,#00-07\r
- eError,eError,eError,eError,eError,eError,eItsMe,eItsMe,#08-0f\r
- eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eError,eStart,eError,#10-17\r
- eStart,eStart,eStart,eError,eError,eError,eError,eError,#18-1f\r
- 5,eError,eError,eError,eStart,eError,eStart,eStart,#20-27\r
- eStart,eError,eStart,eStart,eStart,eStart,eStart,eStart #28-2f\r
-)\r
-\r
-EUCTWCharLenTable = (0, 0, 1, 2, 2, 2, 3)\r
-\r
-EUCTWSMModel = {'classTable': EUCTW_cls,\r
- 'classFactor': 7,\r
- 'stateTable': EUCTW_st,\r
- 'charLenTable': EUCTWCharLenTable,\r
- 'name': 'x-euc-tw'}\r
-\r
-# GB2312\r
-\r
-GB2312_cls = (\r
- 1,1,1,1,1,1,1,1, # 00 - 07\r
- 1,1,1,1,1,1,0,0, # 08 - 0f\r
- 1,1,1,1,1,1,1,1, # 10 - 17\r
- 1,1,1,0,1,1,1,1, # 18 - 1f\r
- 1,1,1,1,1,1,1,1, # 20 - 27\r
- 1,1,1,1,1,1,1,1, # 28 - 2f\r
- 3,3,3,3,3,3,3,3, # 30 - 37\r
- 3,3,1,1,1,1,1,1, # 38 - 3f\r
- 2,2,2,2,2,2,2,2, # 40 - 47\r
- 2,2,2,2,2,2,2,2, # 48 - 4f\r
- 2,2,2,2,2,2,2,2, # 50 - 57\r
- 2,2,2,2,2,2,2,2, # 58 - 5f\r
- 2,2,2,2,2,2,2,2, # 60 - 67\r
- 2,2,2,2,2,2,2,2, # 68 - 6f\r
- 2,2,2,2,2,2,2,2, # 70 - 77\r
- 2,2,2,2,2,2,2,4, # 78 - 7f\r
- 5,6,6,6,6,6,6,6, # 80 - 87\r
- 6,6,6,6,6,6,6,6, # 88 - 8f\r
- 6,6,6,6,6,6,6,6, # 90 - 97\r
- 6,6,6,6,6,6,6,6, # 98 - 9f\r
- 6,6,6,6,6,6,6,6, # a0 - a7\r
- 6,6,6,6,6,6,6,6, # a8 - af\r
- 6,6,6,6,6,6,6,6, # b0 - b7\r
- 6,6,6,6,6,6,6,6, # b8 - bf\r
- 6,6,6,6,6,6,6,6, # c0 - c7\r
- 6,6,6,6,6,6,6,6, # c8 - cf\r
- 6,6,6,6,6,6,6,6, # d0 - d7\r
- 6,6,6,6,6,6,6,6, # d8 - df\r
- 6,6,6,6,6,6,6,6, # e0 - e7\r
- 6,6,6,6,6,6,6,6, # e8 - ef\r
- 6,6,6,6,6,6,6,6, # f0 - f7\r
- 6,6,6,6,6,6,6,0 # f8 - ff\r
-)\r
-\r
-GB2312_st = (\r
- eError,eStart,eStart,eStart,eStart,eStart, 3,eError,#00-07\r
- eError,eError,eError,eError,eError,eError,eItsMe,eItsMe,#08-0f\r
- eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eError,eError,eStart,#10-17\r
- 4,eError,eStart,eStart,eError,eError,eError,eError,#18-1f\r
- eError,eError, 5,eError,eError,eError,eItsMe,eError,#20-27\r
- eError,eError,eStart,eStart,eStart,eStart,eStart,eStart #28-2f\r
-)\r
-\r
-# To be accurate, the length of class 6 can be either 2 or 4.\r
-# But it is not necessary to discriminate between the two since\r
-# it is used for frequency analysis only, and we are validing\r
-# each code range there as well. So it is safe to set it to be\r
-# 2 here.\r
-GB2312CharLenTable = (0, 1, 1, 1, 1, 1, 2)\r
-\r
-GB2312SMModel = {'classTable': GB2312_cls,\r
- 'classFactor': 7,\r
- 'stateTable': GB2312_st,\r
- 'charLenTable': GB2312CharLenTable,\r
- 'name': 'GB2312'}\r
-\r
-# Shift_JIS\r
-\r
-SJIS_cls = (\r
- 1,1,1,1,1,1,1,1, # 00 - 07\r
- 1,1,1,1,1,1,0,0, # 08 - 0f\r
- 1,1,1,1,1,1,1,1, # 10 - 17\r
- 1,1,1,0,1,1,1,1, # 18 - 1f\r
- 1,1,1,1,1,1,1,1, # 20 - 27\r
- 1,1,1,1,1,1,1,1, # 28 - 2f\r
- 1,1,1,1,1,1,1,1, # 30 - 37\r
- 1,1,1,1,1,1,1,1, # 38 - 3f\r
- 2,2,2,2,2,2,2,2, # 40 - 47\r
- 2,2,2,2,2,2,2,2, # 48 - 4f\r
- 2,2,2,2,2,2,2,2, # 50 - 57\r
- 2,2,2,2,2,2,2,2, # 58 - 5f\r
- 2,2,2,2,2,2,2,2, # 60 - 67\r
- 2,2,2,2,2,2,2,2, # 68 - 6f\r
- 2,2,2,2,2,2,2,2, # 70 - 77\r
- 2,2,2,2,2,2,2,1, # 78 - 7f\r
- 3,3,3,3,3,3,3,3, # 80 - 87\r
- 3,3,3,3,3,3,3,3, # 88 - 8f\r
- 3,3,3,3,3,3,3,3, # 90 - 97\r
- 3,3,3,3,3,3,3,3, # 98 - 9f\r
- #0xa0 is illegal in sjis encoding, but some pages does\r
- #contain such byte. We need to be more error forgiven.\r
- 2,2,2,2,2,2,2,2, # a0 - a7\r
- 2,2,2,2,2,2,2,2, # a8 - af\r
- 2,2,2,2,2,2,2,2, # b0 - b7\r
- 2,2,2,2,2,2,2,2, # b8 - bf\r
- 2,2,2,2,2,2,2,2, # c0 - c7\r
- 2,2,2,2,2,2,2,2, # c8 - cf\r
- 2,2,2,2,2,2,2,2, # d0 - d7\r
- 2,2,2,2,2,2,2,2, # d8 - df\r
- 3,3,3,3,3,3,3,3, # e0 - e7\r
- 3,3,3,3,3,4,4,4, # e8 - ef\r
- 4,4,4,4,4,4,4,4, # f0 - f7\r
- 4,4,4,4,4,0,0,0 # f8 - ff\r
-)\r
-\r
-\r
-SJIS_st = (\r
- eError,eStart,eStart, 3,eError,eError,eError,eError,#00-07\r
- eError,eError,eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,#08-0f\r
- eItsMe,eItsMe,eError,eError,eStart,eStart,eStart,eStart #10-17\r
-)\r
-\r
-SJISCharLenTable = (0, 1, 1, 2, 0, 0)\r
-\r
-SJISSMModel = {'classTable': SJIS_cls,\r
- 'classFactor': 6,\r
- 'stateTable': SJIS_st,\r
- 'charLenTable': SJISCharLenTable,\r
- 'name': 'Shift_JIS'}\r
-\r
-# UCS2-BE\r
-\r
-UCS2BE_cls = (\r
- 0,0,0,0,0,0,0,0, # 00 - 07\r
- 0,0,1,0,0,2,0,0, # 08 - 0f\r
- 0,0,0,0,0,0,0,0, # 10 - 17\r
- 0,0,0,3,0,0,0,0, # 18 - 1f\r
- 0,0,0,0,0,0,0,0, # 20 - 27\r
- 0,3,3,3,3,3,0,0, # 28 - 2f\r
- 0,0,0,0,0,0,0,0, # 30 - 37\r
- 0,0,0,0,0,0,0,0, # 38 - 3f\r
- 0,0,0,0,0,0,0,0, # 40 - 47\r
- 0,0,0,0,0,0,0,0, # 48 - 4f\r
- 0,0,0,0,0,0,0,0, # 50 - 57\r
- 0,0,0,0,0,0,0,0, # 58 - 5f\r
- 0,0,0,0,0,0,0,0, # 60 - 67\r
- 0,0,0,0,0,0,0,0, # 68 - 6f\r
- 0,0,0,0,0,0,0,0, # 70 - 77\r
- 0,0,0,0,0,0,0,0, # 78 - 7f\r
- 0,0,0,0,0,0,0,0, # 80 - 87\r
- 0,0,0,0,0,0,0,0, # 88 - 8f\r
- 0,0,0,0,0,0,0,0, # 90 - 97\r
- 0,0,0,0,0,0,0,0, # 98 - 9f\r
- 0,0,0,0,0,0,0,0, # a0 - a7\r
- 0,0,0,0,0,0,0,0, # a8 - af\r
- 0,0,0,0,0,0,0,0, # b0 - b7\r
- 0,0,0,0,0,0,0,0, # b8 - bf\r
- 0,0,0,0,0,0,0,0, # c0 - c7\r
- 0,0,0,0,0,0,0,0, # c8 - cf\r
- 0,0,0,0,0,0,0,0, # d0 - d7\r
- 0,0,0,0,0,0,0,0, # d8 - df\r
- 0,0,0,0,0,0,0,0, # e0 - e7\r
- 0,0,0,0,0,0,0,0, # e8 - ef\r
- 0,0,0,0,0,0,0,0, # f0 - f7\r
- 0,0,0,0,0,0,4,5 # f8 - ff\r
-)\r
-\r
-UCS2BE_st = (\r
- 5, 7, 7,eError, 4, 3,eError,eError,#00-07\r
- eError,eError,eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,#08-0f\r
- eItsMe,eItsMe, 6, 6, 6, 6,eError,eError,#10-17\r
- 6, 6, 6, 6, 6,eItsMe, 6, 6,#18-1f\r
- 6, 6, 6, 6, 5, 7, 7,eError,#20-27\r
- 5, 8, 6, 6,eError, 6, 6, 6,#28-2f\r
- 6, 6, 6, 6,eError,eError,eStart,eStart #30-37\r
-)\r
-\r
-UCS2BECharLenTable = (2, 2, 2, 0, 2, 2)\r
-\r
-UCS2BESMModel = {'classTable': UCS2BE_cls,\r
- 'classFactor': 6,\r
- 'stateTable': UCS2BE_st,\r
- 'charLenTable': UCS2BECharLenTable,\r
- 'name': 'UTF-16BE'}\r
-\r
-# UCS2-LE\r
-\r
-UCS2LE_cls = (\r
- 0,0,0,0,0,0,0,0, # 00 - 07\r
- 0,0,1,0,0,2,0,0, # 08 - 0f\r
- 0,0,0,0,0,0,0,0, # 10 - 17\r
- 0,0,0,3,0,0,0,0, # 18 - 1f\r
- 0,0,0,0,0,0,0,0, # 20 - 27\r
- 0,3,3,3,3,3,0,0, # 28 - 2f\r
- 0,0,0,0,0,0,0,0, # 30 - 37\r
- 0,0,0,0,0,0,0,0, # 38 - 3f\r
- 0,0,0,0,0,0,0,0, # 40 - 47\r
- 0,0,0,0,0,0,0,0, # 48 - 4f\r
- 0,0,0,0,0,0,0,0, # 50 - 57\r
- 0,0,0,0,0,0,0,0, # 58 - 5f\r
- 0,0,0,0,0,0,0,0, # 60 - 67\r
- 0,0,0,0,0,0,0,0, # 68 - 6f\r
- 0,0,0,0,0,0,0,0, # 70 - 77\r
- 0,0,0,0,0,0,0,0, # 78 - 7f\r
- 0,0,0,0,0,0,0,0, # 80 - 87\r
- 0,0,0,0,0,0,0,0, # 88 - 8f\r
- 0,0,0,0,0,0,0,0, # 90 - 97\r
- 0,0,0,0,0,0,0,0, # 98 - 9f\r
- 0,0,0,0,0,0,0,0, # a0 - a7\r
- 0,0,0,0,0,0,0,0, # a8 - af\r
- 0,0,0,0,0,0,0,0, # b0 - b7\r
- 0,0,0,0,0,0,0,0, # b8 - bf\r
- 0,0,0,0,0,0,0,0, # c0 - c7\r
- 0,0,0,0,0,0,0,0, # c8 - cf\r
- 0,0,0,0,0,0,0,0, # d0 - d7\r
- 0,0,0,0,0,0,0,0, # d8 - df\r
- 0,0,0,0,0,0,0,0, # e0 - e7\r
- 0,0,0,0,0,0,0,0, # e8 - ef\r
- 0,0,0,0,0,0,0,0, # f0 - f7\r
- 0,0,0,0,0,0,4,5 # f8 - ff\r
-)\r
-\r
-UCS2LE_st = (\r
- 6, 6, 7, 6, 4, 3,eError,eError,#00-07\r
- eError,eError,eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,#08-0f\r
- eItsMe,eItsMe, 5, 5, 5,eError,eItsMe,eError,#10-17\r
- 5, 5, 5,eError, 5,eError, 6, 6,#18-1f\r
- 7, 6, 8, 8, 5, 5, 5,eError,#20-27\r
- 5, 5, 5,eError,eError,eError, 5, 5,#28-2f\r
- 5, 5, 5,eError, 5,eError,eStart,eStart #30-37\r
-)\r
-\r
-UCS2LECharLenTable = (2, 2, 2, 2, 2, 2)\r
-\r
-UCS2LESMModel = {'classTable': UCS2LE_cls,\r
- 'classFactor': 6,\r
- 'stateTable': UCS2LE_st,\r
- 'charLenTable': UCS2LECharLenTable,\r
- 'name': 'UTF-16LE'}\r
-\r
-# UTF-8\r
-\r
-UTF8_cls = (\r
- 1,1,1,1,1,1,1,1, # 00 - 07 #allow 0x00 as a legal value\r
- 1,1,1,1,1,1,0,0, # 08 - 0f\r
- 1,1,1,1,1,1,1,1, # 10 - 17\r
- 1,1,1,0,1,1,1,1, # 18 - 1f\r
- 1,1,1,1,1,1,1,1, # 20 - 27\r
- 1,1,1,1,1,1,1,1, # 28 - 2f\r
- 1,1,1,1,1,1,1,1, # 30 - 37\r
- 1,1,1,1,1,1,1,1, # 38 - 3f\r
- 1,1,1,1,1,1,1,1, # 40 - 47\r
- 1,1,1,1,1,1,1,1, # 48 - 4f\r
- 1,1,1,1,1,1,1,1, # 50 - 57\r
- 1,1,1,1,1,1,1,1, # 58 - 5f\r
- 1,1,1,1,1,1,1,1, # 60 - 67\r
- 1,1,1,1,1,1,1,1, # 68 - 6f\r
- 1,1,1,1,1,1,1,1, # 70 - 77\r
- 1,1,1,1,1,1,1,1, # 78 - 7f\r
- 2,2,2,2,3,3,3,3, # 80 - 87\r
- 4,4,4,4,4,4,4,4, # 88 - 8f\r
- 4,4,4,4,4,4,4,4, # 90 - 97\r
- 4,4,4,4,4,4,4,4, # 98 - 9f\r
- 5,5,5,5,5,5,5,5, # a0 - a7\r
- 5,5,5,5,5,5,5,5, # a8 - af\r
- 5,5,5,5,5,5,5,5, # b0 - b7\r
- 5,5,5,5,5,5,5,5, # b8 - bf\r
- 0,0,6,6,6,6,6,6, # c0 - c7\r
- 6,6,6,6,6,6,6,6, # c8 - cf\r
- 6,6,6,6,6,6,6,6, # d0 - d7\r
- 6,6,6,6,6,6,6,6, # d8 - df\r
- 7,8,8,8,8,8,8,8, # e0 - e7\r
- 8,8,8,8,8,9,8,8, # e8 - ef\r
- 10,11,11,11,11,11,11,11, # f0 - f7\r
- 12,13,13,13,14,15,0,0 # f8 - ff\r
-)\r
-\r
-UTF8_st = (\r
- eError,eStart,eError,eError,eError,eError, 12, 10,#00-07\r
- 9, 11, 8, 7, 6, 5, 4, 3,#08-0f\r
- eError,eError,eError,eError,eError,eError,eError,eError,#10-17\r
- eError,eError,eError,eError,eError,eError,eError,eError,#18-1f\r
- eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,#20-27\r
- eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,#28-2f\r
- eError,eError, 5, 5, 5, 5,eError,eError,#30-37\r
- eError,eError,eError,eError,eError,eError,eError,eError,#38-3f\r
- eError,eError,eError, 5, 5, 5,eError,eError,#40-47\r
- eError,eError,eError,eError,eError,eError,eError,eError,#48-4f\r
- eError,eError, 7, 7, 7, 7,eError,eError,#50-57\r
- eError,eError,eError,eError,eError,eError,eError,eError,#58-5f\r
- eError,eError,eError,eError, 7, 7,eError,eError,#60-67\r
- eError,eError,eError,eError,eError,eError,eError,eError,#68-6f\r
- eError,eError, 9, 9, 9, 9,eError,eError,#70-77\r
- eError,eError,eError,eError,eError,eError,eError,eError,#78-7f\r
- eError,eError,eError,eError,eError, 9,eError,eError,#80-87\r
- eError,eError,eError,eError,eError,eError,eError,eError,#88-8f\r
- eError,eError, 12, 12, 12, 12,eError,eError,#90-97\r
- eError,eError,eError,eError,eError,eError,eError,eError,#98-9f\r
- eError,eError,eError,eError,eError, 12,eError,eError,#a0-a7\r
- eError,eError,eError,eError,eError,eError,eError,eError,#a8-af\r
- eError,eError, 12, 12, 12,eError,eError,eError,#b0-b7\r
- eError,eError,eError,eError,eError,eError,eError,eError,#b8-bf\r
- eError,eError,eStart,eStart,eStart,eStart,eError,eError,#c0-c7\r
- eError,eError,eError,eError,eError,eError,eError,eError #c8-cf\r
-)\r
-\r
-UTF8CharLenTable = (0, 1, 0, 0, 0, 0, 2, 3, 3, 3, 4, 4, 5, 5, 6, 6)\r
-\r
-UTF8SMModel = {'classTable': UTF8_cls,\r
- 'classFactor': 16,\r
- 'stateTable': UTF8_st,\r
- 'charLenTable': UTF8CharLenTable,\r
- 'name': 'UTF-8'}\r
-\r
-# flake8: noqa\r
+######################## BEGIN LICENSE BLOCK ########################
+# The Original Code is mozilla.org code.
+#
+# The Initial Developer of the Original Code is
+# Netscape Communications Corporation.
+# Portions created by the Initial Developer are Copyright (C) 1998
+# the Initial Developer. All Rights Reserved.
+#
+# Contributor(s):
+# Mark Pilgrim - port to Python
+#
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
+# 02110-1301 USA
+######################### END LICENSE BLOCK #########################
+
+from .constants import eStart, eError, eItsMe
+
+# BIG5
+
+BIG5_cls = (
+ 1,1,1,1,1,1,1,1, # 00 - 07 #allow 0x00 as legal value
+ 1,1,1,1,1,1,0,0, # 08 - 0f
+ 1,1,1,1,1,1,1,1, # 10 - 17
+ 1,1,1,0,1,1,1,1, # 18 - 1f
+ 1,1,1,1,1,1,1,1, # 20 - 27
+ 1,1,1,1,1,1,1,1, # 28 - 2f
+ 1,1,1,1,1,1,1,1, # 30 - 37
+ 1,1,1,1,1,1,1,1, # 38 - 3f
+ 2,2,2,2,2,2,2,2, # 40 - 47
+ 2,2,2,2,2,2,2,2, # 48 - 4f
+ 2,2,2,2,2,2,2,2, # 50 - 57
+ 2,2,2,2,2,2,2,2, # 58 - 5f
+ 2,2,2,2,2,2,2,2, # 60 - 67
+ 2,2,2,2,2,2,2,2, # 68 - 6f
+ 2,2,2,2,2,2,2,2, # 70 - 77
+ 2,2,2,2,2,2,2,1, # 78 - 7f
+ 4,4,4,4,4,4,4,4, # 80 - 87
+ 4,4,4,4,4,4,4,4, # 88 - 8f
+ 4,4,4,4,4,4,4,4, # 90 - 97
+ 4,4,4,4,4,4,4,4, # 98 - 9f
+ 4,3,3,3,3,3,3,3, # a0 - a7
+ 3,3,3,3,3,3,3,3, # a8 - af
+ 3,3,3,3,3,3,3,3, # b0 - b7
+ 3,3,3,3,3,3,3,3, # b8 - bf
+ 3,3,3,3,3,3,3,3, # c0 - c7
+ 3,3,3,3,3,3,3,3, # c8 - cf
+ 3,3,3,3,3,3,3,3, # d0 - d7
+ 3,3,3,3,3,3,3,3, # d8 - df
+ 3,3,3,3,3,3,3,3, # e0 - e7
+ 3,3,3,3,3,3,3,3, # e8 - ef
+ 3,3,3,3,3,3,3,3, # f0 - f7
+ 3,3,3,3,3,3,3,0 # f8 - ff
+)
+
+BIG5_st = (
+ eError,eStart,eStart, 3,eError,eError,eError,eError,#00-07
+ eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eError,#08-0f
+ eError,eStart,eStart,eStart,eStart,eStart,eStart,eStart#10-17
+)
+
+Big5CharLenTable = (0, 1, 1, 2, 0)
+
+Big5SMModel = {'classTable': BIG5_cls,
+ 'classFactor': 5,
+ 'stateTable': BIG5_st,
+ 'charLenTable': Big5CharLenTable,
+ 'name': 'Big5'}
+
+# CP949
+
+CP949_cls = (
+ 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,0,0, # 00 - 0f
+ 1,1,1,1,1,1,1,1, 1,1,1,0,1,1,1,1, # 10 - 1f
+ 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, # 20 - 2f
+ 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, # 30 - 3f
+ 1,4,4,4,4,4,4,4, 4,4,4,4,4,4,4,4, # 40 - 4f
+ 4,4,5,5,5,5,5,5, 5,5,5,1,1,1,1,1, # 50 - 5f
+ 1,5,5,5,5,5,5,5, 5,5,5,5,5,5,5,5, # 60 - 6f
+ 5,5,5,5,5,5,5,5, 5,5,5,1,1,1,1,1, # 70 - 7f
+ 0,6,6,6,6,6,6,6, 6,6,6,6,6,6,6,6, # 80 - 8f
+ 6,6,6,6,6,6,6,6, 6,6,6,6,6,6,6,6, # 90 - 9f
+ 6,7,7,7,7,7,7,7, 7,7,7,7,7,8,8,8, # a0 - af
+ 7,7,7,7,7,7,7,7, 7,7,7,7,7,7,7,7, # b0 - bf
+ 7,7,7,7,7,7,9,2, 2,3,2,2,2,2,2,2, # c0 - cf
+ 2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2, # d0 - df
+ 2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2, # e0 - ef
+ 2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,0, # f0 - ff
+)
+
+CP949_st = (
+#cls= 0 1 2 3 4 5 6 7 8 9 # previous state =
+ eError,eStart, 3,eError,eStart,eStart, 4, 5,eError, 6, # eStart
+ eError,eError,eError,eError,eError,eError,eError,eError,eError,eError, # eError
+ eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe, # eItsMe
+ eError,eError,eStart,eStart,eError,eError,eError,eStart,eStart,eStart, # 3
+ eError,eError,eStart,eStart,eStart,eStart,eStart,eStart,eStart,eStart, # 4
+ eError,eStart,eStart,eStart,eStart,eStart,eStart,eStart,eStart,eStart, # 5
+ eError,eStart,eStart,eStart,eStart,eError,eError,eStart,eStart,eStart, # 6
+)
+
+CP949CharLenTable = (0, 1, 2, 0, 1, 1, 2, 2, 0, 2)
+
+CP949SMModel = {'classTable': CP949_cls,
+ 'classFactor': 10,
+ 'stateTable': CP949_st,
+ 'charLenTable': CP949CharLenTable,
+ 'name': 'CP949'}
+
+# EUC-JP
+
+EUCJP_cls = (
+ 4,4,4,4,4,4,4,4, # 00 - 07
+ 4,4,4,4,4,4,5,5, # 08 - 0f
+ 4,4,4,4,4,4,4,4, # 10 - 17
+ 4,4,4,5,4,4,4,4, # 18 - 1f
+ 4,4,4,4,4,4,4,4, # 20 - 27
+ 4,4,4,4,4,4,4,4, # 28 - 2f
+ 4,4,4,4,4,4,4,4, # 30 - 37
+ 4,4,4,4,4,4,4,4, # 38 - 3f
+ 4,4,4,4,4,4,4,4, # 40 - 47
+ 4,4,4,4,4,4,4,4, # 48 - 4f
+ 4,4,4,4,4,4,4,4, # 50 - 57
+ 4,4,4,4,4,4,4,4, # 58 - 5f
+ 4,4,4,4,4,4,4,4, # 60 - 67
+ 4,4,4,4,4,4,4,4, # 68 - 6f
+ 4,4,4,4,4,4,4,4, # 70 - 77
+ 4,4,4,4,4,4,4,4, # 78 - 7f
+ 5,5,5,5,5,5,5,5, # 80 - 87
+ 5,5,5,5,5,5,1,3, # 88 - 8f
+ 5,5,5,5,5,5,5,5, # 90 - 97
+ 5,5,5,5,5,5,5,5, # 98 - 9f
+ 5,2,2,2,2,2,2,2, # a0 - a7
+ 2,2,2,2,2,2,2,2, # a8 - af
+ 2,2,2,2,2,2,2,2, # b0 - b7
+ 2,2,2,2,2,2,2,2, # b8 - bf
+ 2,2,2,2,2,2,2,2, # c0 - c7
+ 2,2,2,2,2,2,2,2, # c8 - cf
+ 2,2,2,2,2,2,2,2, # d0 - d7
+ 2,2,2,2,2,2,2,2, # d8 - df
+ 0,0,0,0,0,0,0,0, # e0 - e7
+ 0,0,0,0,0,0,0,0, # e8 - ef
+ 0,0,0,0,0,0,0,0, # f0 - f7
+ 0,0,0,0,0,0,0,5 # f8 - ff
+)
+
+EUCJP_st = (
+ 3, 4, 3, 5,eStart,eError,eError,eError,#00-07
+ eError,eError,eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,#08-0f
+ eItsMe,eItsMe,eStart,eError,eStart,eError,eError,eError,#10-17
+ eError,eError,eStart,eError,eError,eError, 3,eError,#18-1f
+ 3,eError,eError,eError,eStart,eStart,eStart,eStart#20-27
+)
+
+EUCJPCharLenTable = (2, 2, 2, 3, 1, 0)
+
+EUCJPSMModel = {'classTable': EUCJP_cls,
+ 'classFactor': 6,
+ 'stateTable': EUCJP_st,
+ 'charLenTable': EUCJPCharLenTable,
+ 'name': 'EUC-JP'}
+
+# EUC-KR
+
+EUCKR_cls = (
+ 1,1,1,1,1,1,1,1, # 00 - 07
+ 1,1,1,1,1,1,0,0, # 08 - 0f
+ 1,1,1,1,1,1,1,1, # 10 - 17
+ 1,1,1,0,1,1,1,1, # 18 - 1f
+ 1,1,1,1,1,1,1,1, # 20 - 27
+ 1,1,1,1,1,1,1,1, # 28 - 2f
+ 1,1,1,1,1,1,1,1, # 30 - 37
+ 1,1,1,1,1,1,1,1, # 38 - 3f
+ 1,1,1,1,1,1,1,1, # 40 - 47
+ 1,1,1,1,1,1,1,1, # 48 - 4f
+ 1,1,1,1,1,1,1,1, # 50 - 57
+ 1,1,1,1,1,1,1,1, # 58 - 5f
+ 1,1,1,1,1,1,1,1, # 60 - 67
+ 1,1,1,1,1,1,1,1, # 68 - 6f
+ 1,1,1,1,1,1,1,1, # 70 - 77
+ 1,1,1,1,1,1,1,1, # 78 - 7f
+ 0,0,0,0,0,0,0,0, # 80 - 87
+ 0,0,0,0,0,0,0,0, # 88 - 8f
+ 0,0,0,0,0,0,0,0, # 90 - 97
+ 0,0,0,0,0,0,0,0, # 98 - 9f
+ 0,2,2,2,2,2,2,2, # a0 - a7
+ 2,2,2,2,2,3,3,3, # a8 - af
+ 2,2,2,2,2,2,2,2, # b0 - b7
+ 2,2,2,2,2,2,2,2, # b8 - bf
+ 2,2,2,2,2,2,2,2, # c0 - c7
+ 2,3,2,2,2,2,2,2, # c8 - cf
+ 2,2,2,2,2,2,2,2, # d0 - d7
+ 2,2,2,2,2,2,2,2, # d8 - df
+ 2,2,2,2,2,2,2,2, # e0 - e7
+ 2,2,2,2,2,2,2,2, # e8 - ef
+ 2,2,2,2,2,2,2,2, # f0 - f7
+ 2,2,2,2,2,2,2,0 # f8 - ff
+)
+
+EUCKR_st = (
+ eError,eStart, 3,eError,eError,eError,eError,eError,#00-07
+ eItsMe,eItsMe,eItsMe,eItsMe,eError,eError,eStart,eStart #08-0f
+)
+
+EUCKRCharLenTable = (0, 1, 2, 0)
+
+EUCKRSMModel = {'classTable': EUCKR_cls,
+ 'classFactor': 4,
+ 'stateTable': EUCKR_st,
+ 'charLenTable': EUCKRCharLenTable,
+ 'name': 'EUC-KR'}
+
+# EUC-TW
+
+EUCTW_cls = (
+ 2,2,2,2,2,2,2,2, # 00 - 07
+ 2,2,2,2,2,2,0,0, # 08 - 0f
+ 2,2,2,2,2,2,2,2, # 10 - 17
+ 2,2,2,0,2,2,2,2, # 18 - 1f
+ 2,2,2,2,2,2,2,2, # 20 - 27
+ 2,2,2,2,2,2,2,2, # 28 - 2f
+ 2,2,2,2,2,2,2,2, # 30 - 37
+ 2,2,2,2,2,2,2,2, # 38 - 3f
+ 2,2,2,2,2,2,2,2, # 40 - 47
+ 2,2,2,2,2,2,2,2, # 48 - 4f
+ 2,2,2,2,2,2,2,2, # 50 - 57
+ 2,2,2,2,2,2,2,2, # 58 - 5f
+ 2,2,2,2,2,2,2,2, # 60 - 67
+ 2,2,2,2,2,2,2,2, # 68 - 6f
+ 2,2,2,2,2,2,2,2, # 70 - 77
+ 2,2,2,2,2,2,2,2, # 78 - 7f
+ 0,0,0,0,0,0,0,0, # 80 - 87
+ 0,0,0,0,0,0,6,0, # 88 - 8f
+ 0,0,0,0,0,0,0,0, # 90 - 97
+ 0,0,0,0,0,0,0,0, # 98 - 9f
+ 0,3,4,4,4,4,4,4, # a0 - a7
+ 5,5,1,1,1,1,1,1, # a8 - af
+ 1,1,1,1,1,1,1,1, # b0 - b7
+ 1,1,1,1,1,1,1,1, # b8 - bf
+ 1,1,3,1,3,3,3,3, # c0 - c7
+ 3,3,3,3,3,3,3,3, # c8 - cf
+ 3,3,3,3,3,3,3,3, # d0 - d7
+ 3,3,3,3,3,3,3,3, # d8 - df
+ 3,3,3,3,3,3,3,3, # e0 - e7
+ 3,3,3,3,3,3,3,3, # e8 - ef
+ 3,3,3,3,3,3,3,3, # f0 - f7
+ 3,3,3,3,3,3,3,0 # f8 - ff
+)
+
+EUCTW_st = (
+ eError,eError,eStart, 3, 3, 3, 4,eError,#00-07
+ eError,eError,eError,eError,eError,eError,eItsMe,eItsMe,#08-0f
+ eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eError,eStart,eError,#10-17
+ eStart,eStart,eStart,eError,eError,eError,eError,eError,#18-1f
+ 5,eError,eError,eError,eStart,eError,eStart,eStart,#20-27
+ eStart,eError,eStart,eStart,eStart,eStart,eStart,eStart #28-2f
+)
+
+EUCTWCharLenTable = (0, 0, 1, 2, 2, 2, 3)
+
+EUCTWSMModel = {'classTable': EUCTW_cls,
+ 'classFactor': 7,
+ 'stateTable': EUCTW_st,
+ 'charLenTable': EUCTWCharLenTable,
+ 'name': 'x-euc-tw'}
+
+# GB2312
+
+GB2312_cls = (
+ 1,1,1,1,1,1,1,1, # 00 - 07
+ 1,1,1,1,1,1,0,0, # 08 - 0f
+ 1,1,1,1,1,1,1,1, # 10 - 17
+ 1,1,1,0,1,1,1,1, # 18 - 1f
+ 1,1,1,1,1,1,1,1, # 20 - 27
+ 1,1,1,1,1,1,1,1, # 28 - 2f
+ 3,3,3,3,3,3,3,3, # 30 - 37
+ 3,3,1,1,1,1,1,1, # 38 - 3f
+ 2,2,2,2,2,2,2,2, # 40 - 47
+ 2,2,2,2,2,2,2,2, # 48 - 4f
+ 2,2,2,2,2,2,2,2, # 50 - 57
+ 2,2,2,2,2,2,2,2, # 58 - 5f
+ 2,2,2,2,2,2,2,2, # 60 - 67
+ 2,2,2,2,2,2,2,2, # 68 - 6f
+ 2,2,2,2,2,2,2,2, # 70 - 77
+ 2,2,2,2,2,2,2,4, # 78 - 7f
+ 5,6,6,6,6,6,6,6, # 80 - 87
+ 6,6,6,6,6,6,6,6, # 88 - 8f
+ 6,6,6,6,6,6,6,6, # 90 - 97
+ 6,6,6,6,6,6,6,6, # 98 - 9f
+ 6,6,6,6,6,6,6,6, # a0 - a7
+ 6,6,6,6,6,6,6,6, # a8 - af
+ 6,6,6,6,6,6,6,6, # b0 - b7
+ 6,6,6,6,6,6,6,6, # b8 - bf
+ 6,6,6,6,6,6,6,6, # c0 - c7
+ 6,6,6,6,6,6,6,6, # c8 - cf
+ 6,6,6,6,6,6,6,6, # d0 - d7
+ 6,6,6,6,6,6,6,6, # d8 - df
+ 6,6,6,6,6,6,6,6, # e0 - e7
+ 6,6,6,6,6,6,6,6, # e8 - ef
+ 6,6,6,6,6,6,6,6, # f0 - f7
+ 6,6,6,6,6,6,6,0 # f8 - ff
+)
+
+GB2312_st = (
+ eError,eStart,eStart,eStart,eStart,eStart, 3,eError,#00-07
+ eError,eError,eError,eError,eError,eError,eItsMe,eItsMe,#08-0f
+ eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eError,eError,eStart,#10-17
+ 4,eError,eStart,eStart,eError,eError,eError,eError,#18-1f
+ eError,eError, 5,eError,eError,eError,eItsMe,eError,#20-27
+ eError,eError,eStart,eStart,eStart,eStart,eStart,eStart #28-2f
+)
+
+# To be accurate, the length of class 6 can be either 2 or 4.
+# But it is not necessary to discriminate between the two since
+# it is used for frequency analysis only, and we are validing
+# each code range there as well. So it is safe to set it to be
+# 2 here.
+GB2312CharLenTable = (0, 1, 1, 1, 1, 1, 2)
+
+GB2312SMModel = {'classTable': GB2312_cls,
+ 'classFactor': 7,
+ 'stateTable': GB2312_st,
+ 'charLenTable': GB2312CharLenTable,
+ 'name': 'GB2312'}
+
+# Shift_JIS
+
+SJIS_cls = (
+ 1,1,1,1,1,1,1,1, # 00 - 07
+ 1,1,1,1,1,1,0,0, # 08 - 0f
+ 1,1,1,1,1,1,1,1, # 10 - 17
+ 1,1,1,0,1,1,1,1, # 18 - 1f
+ 1,1,1,1,1,1,1,1, # 20 - 27
+ 1,1,1,1,1,1,1,1, # 28 - 2f
+ 1,1,1,1,1,1,1,1, # 30 - 37
+ 1,1,1,1,1,1,1,1, # 38 - 3f
+ 2,2,2,2,2,2,2,2, # 40 - 47
+ 2,2,2,2,2,2,2,2, # 48 - 4f
+ 2,2,2,2,2,2,2,2, # 50 - 57
+ 2,2,2,2,2,2,2,2, # 58 - 5f
+ 2,2,2,2,2,2,2,2, # 60 - 67
+ 2,2,2,2,2,2,2,2, # 68 - 6f
+ 2,2,2,2,2,2,2,2, # 70 - 77
+ 2,2,2,2,2,2,2,1, # 78 - 7f
+ 3,3,3,3,3,3,3,3, # 80 - 87
+ 3,3,3,3,3,3,3,3, # 88 - 8f
+ 3,3,3,3,3,3,3,3, # 90 - 97
+ 3,3,3,3,3,3,3,3, # 98 - 9f
+ #0xa0 is illegal in sjis encoding, but some pages does
+ #contain such byte. We need to be more error forgiven.
+ 2,2,2,2,2,2,2,2, # a0 - a7
+ 2,2,2,2,2,2,2,2, # a8 - af
+ 2,2,2,2,2,2,2,2, # b0 - b7
+ 2,2,2,2,2,2,2,2, # b8 - bf
+ 2,2,2,2,2,2,2,2, # c0 - c7
+ 2,2,2,2,2,2,2,2, # c8 - cf
+ 2,2,2,2,2,2,2,2, # d0 - d7
+ 2,2,2,2,2,2,2,2, # d8 - df
+ 3,3,3,3,3,3,3,3, # e0 - e7
+ 3,3,3,3,3,4,4,4, # e8 - ef
+ 4,4,4,4,4,4,4,4, # f0 - f7
+ 4,4,4,4,4,0,0,0 # f8 - ff
+)
+
+
+SJIS_st = (
+ eError,eStart,eStart, 3,eError,eError,eError,eError,#00-07
+ eError,eError,eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,#08-0f
+ eItsMe,eItsMe,eError,eError,eStart,eStart,eStart,eStart #10-17
+)
+
+SJISCharLenTable = (0, 1, 1, 2, 0, 0)
+
+SJISSMModel = {'classTable': SJIS_cls,
+ 'classFactor': 6,
+ 'stateTable': SJIS_st,
+ 'charLenTable': SJISCharLenTable,
+ 'name': 'Shift_JIS'}
+
+# UCS2-BE
+
+UCS2BE_cls = (
+ 0,0,0,0,0,0,0,0, # 00 - 07
+ 0,0,1,0,0,2,0,0, # 08 - 0f
+ 0,0,0,0,0,0,0,0, # 10 - 17
+ 0,0,0,3,0,0,0,0, # 18 - 1f
+ 0,0,0,0,0,0,0,0, # 20 - 27
+ 0,3,3,3,3,3,0,0, # 28 - 2f
+ 0,0,0,0,0,0,0,0, # 30 - 37
+ 0,0,0,0,0,0,0,0, # 38 - 3f
+ 0,0,0,0,0,0,0,0, # 40 - 47
+ 0,0,0,0,0,0,0,0, # 48 - 4f
+ 0,0,0,0,0,0,0,0, # 50 - 57
+ 0,0,0,0,0,0,0,0, # 58 - 5f
+ 0,0,0,0,0,0,0,0, # 60 - 67
+ 0,0,0,0,0,0,0,0, # 68 - 6f
+ 0,0,0,0,0,0,0,0, # 70 - 77
+ 0,0,0,0,0,0,0,0, # 78 - 7f
+ 0,0,0,0,0,0,0,0, # 80 - 87
+ 0,0,0,0,0,0,0,0, # 88 - 8f
+ 0,0,0,0,0,0,0,0, # 90 - 97
+ 0,0,0,0,0,0,0,0, # 98 - 9f
+ 0,0,0,0,0,0,0,0, # a0 - a7
+ 0,0,0,0,0,0,0,0, # a8 - af
+ 0,0,0,0,0,0,0,0, # b0 - b7
+ 0,0,0,0,0,0,0,0, # b8 - bf
+ 0,0,0,0,0,0,0,0, # c0 - c7
+ 0,0,0,0,0,0,0,0, # c8 - cf
+ 0,0,0,0,0,0,0,0, # d0 - d7
+ 0,0,0,0,0,0,0,0, # d8 - df
+ 0,0,0,0,0,0,0,0, # e0 - e7
+ 0,0,0,0,0,0,0,0, # e8 - ef
+ 0,0,0,0,0,0,0,0, # f0 - f7
+ 0,0,0,0,0,0,4,5 # f8 - ff
+)
+
+UCS2BE_st = (
+ 5, 7, 7,eError, 4, 3,eError,eError,#00-07
+ eError,eError,eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,#08-0f
+ eItsMe,eItsMe, 6, 6, 6, 6,eError,eError,#10-17
+ 6, 6, 6, 6, 6,eItsMe, 6, 6,#18-1f
+ 6, 6, 6, 6, 5, 7, 7,eError,#20-27
+ 5, 8, 6, 6,eError, 6, 6, 6,#28-2f
+ 6, 6, 6, 6,eError,eError,eStart,eStart #30-37
+)
+
+UCS2BECharLenTable = (2, 2, 2, 0, 2, 2)
+
+UCS2BESMModel = {'classTable': UCS2BE_cls,
+ 'classFactor': 6,
+ 'stateTable': UCS2BE_st,
+ 'charLenTable': UCS2BECharLenTable,
+ 'name': 'UTF-16BE'}
+
+# UCS2-LE
+
+UCS2LE_cls = (
+ 0,0,0,0,0,0,0,0, # 00 - 07
+ 0,0,1,0,0,2,0,0, # 08 - 0f
+ 0,0,0,0,0,0,0,0, # 10 - 17
+ 0,0,0,3,0,0,0,0, # 18 - 1f
+ 0,0,0,0,0,0,0,0, # 20 - 27
+ 0,3,3,3,3,3,0,0, # 28 - 2f
+ 0,0,0,0,0,0,0,0, # 30 - 37
+ 0,0,0,0,0,0,0,0, # 38 - 3f
+ 0,0,0,0,0,0,0,0, # 40 - 47
+ 0,0,0,0,0,0,0,0, # 48 - 4f
+ 0,0,0,0,0,0,0,0, # 50 - 57
+ 0,0,0,0,0,0,0,0, # 58 - 5f
+ 0,0,0,0,0,0,0,0, # 60 - 67
+ 0,0,0,0,0,0,0,0, # 68 - 6f
+ 0,0,0,0,0,0,0,0, # 70 - 77
+ 0,0,0,0,0,0,0,0, # 78 - 7f
+ 0,0,0,0,0,0,0,0, # 80 - 87
+ 0,0,0,0,0,0,0,0, # 88 - 8f
+ 0,0,0,0,0,0,0,0, # 90 - 97
+ 0,0,0,0,0,0,0,0, # 98 - 9f
+ 0,0,0,0,0,0,0,0, # a0 - a7
+ 0,0,0,0,0,0,0,0, # a8 - af
+ 0,0,0,0,0,0,0,0, # b0 - b7
+ 0,0,0,0,0,0,0,0, # b8 - bf
+ 0,0,0,0,0,0,0,0, # c0 - c7
+ 0,0,0,0,0,0,0,0, # c8 - cf
+ 0,0,0,0,0,0,0,0, # d0 - d7
+ 0,0,0,0,0,0,0,0, # d8 - df
+ 0,0,0,0,0,0,0,0, # e0 - e7
+ 0,0,0,0,0,0,0,0, # e8 - ef
+ 0,0,0,0,0,0,0,0, # f0 - f7
+ 0,0,0,0,0,0,4,5 # f8 - ff
+)
+
+UCS2LE_st = (
+ 6, 6, 7, 6, 4, 3,eError,eError,#00-07
+ eError,eError,eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,#08-0f
+ eItsMe,eItsMe, 5, 5, 5,eError,eItsMe,eError,#10-17
+ 5, 5, 5,eError, 5,eError, 6, 6,#18-1f
+ 7, 6, 8, 8, 5, 5, 5,eError,#20-27
+ 5, 5, 5,eError,eError,eError, 5, 5,#28-2f
+ 5, 5, 5,eError, 5,eError,eStart,eStart #30-37
+)
+
+UCS2LECharLenTable = (2, 2, 2, 2, 2, 2)
+
+UCS2LESMModel = {'classTable': UCS2LE_cls,
+ 'classFactor': 6,
+ 'stateTable': UCS2LE_st,
+ 'charLenTable': UCS2LECharLenTable,
+ 'name': 'UTF-16LE'}
+
+# UTF-8
+
+UTF8_cls = (
+ 1,1,1,1,1,1,1,1, # 00 - 07 #allow 0x00 as a legal value
+ 1,1,1,1,1,1,0,0, # 08 - 0f
+ 1,1,1,1,1,1,1,1, # 10 - 17
+ 1,1,1,0,1,1,1,1, # 18 - 1f
+ 1,1,1,1,1,1,1,1, # 20 - 27
+ 1,1,1,1,1,1,1,1, # 28 - 2f
+ 1,1,1,1,1,1,1,1, # 30 - 37
+ 1,1,1,1,1,1,1,1, # 38 - 3f
+ 1,1,1,1,1,1,1,1, # 40 - 47
+ 1,1,1,1,1,1,1,1, # 48 - 4f
+ 1,1,1,1,1,1,1,1, # 50 - 57
+ 1,1,1,1,1,1,1,1, # 58 - 5f
+ 1,1,1,1,1,1,1,1, # 60 - 67
+ 1,1,1,1,1,1,1,1, # 68 - 6f
+ 1,1,1,1,1,1,1,1, # 70 - 77
+ 1,1,1,1,1,1,1,1, # 78 - 7f
+ 2,2,2,2,3,3,3,3, # 80 - 87
+ 4,4,4,4,4,4,4,4, # 88 - 8f
+ 4,4,4,4,4,4,4,4, # 90 - 97
+ 4,4,4,4,4,4,4,4, # 98 - 9f
+ 5,5,5,5,5,5,5,5, # a0 - a7
+ 5,5,5,5,5,5,5,5, # a8 - af
+ 5,5,5,5,5,5,5,5, # b0 - b7
+ 5,5,5,5,5,5,5,5, # b8 - bf
+ 0,0,6,6,6,6,6,6, # c0 - c7
+ 6,6,6,6,6,6,6,6, # c8 - cf
+ 6,6,6,6,6,6,6,6, # d0 - d7
+ 6,6,6,6,6,6,6,6, # d8 - df
+ 7,8,8,8,8,8,8,8, # e0 - e7
+ 8,8,8,8,8,9,8,8, # e8 - ef
+ 10,11,11,11,11,11,11,11, # f0 - f7
+ 12,13,13,13,14,15,0,0 # f8 - ff
+)
+
+UTF8_st = (
+ eError,eStart,eError,eError,eError,eError, 12, 10,#00-07
+ 9, 11, 8, 7, 6, 5, 4, 3,#08-0f
+ eError,eError,eError,eError,eError,eError,eError,eError,#10-17
+ eError,eError,eError,eError,eError,eError,eError,eError,#18-1f
+ eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,#20-27
+ eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,#28-2f
+ eError,eError, 5, 5, 5, 5,eError,eError,#30-37
+ eError,eError,eError,eError,eError,eError,eError,eError,#38-3f
+ eError,eError,eError, 5, 5, 5,eError,eError,#40-47
+ eError,eError,eError,eError,eError,eError,eError,eError,#48-4f
+ eError,eError, 7, 7, 7, 7,eError,eError,#50-57
+ eError,eError,eError,eError,eError,eError,eError,eError,#58-5f
+ eError,eError,eError,eError, 7, 7,eError,eError,#60-67
+ eError,eError,eError,eError,eError,eError,eError,eError,#68-6f
+ eError,eError, 9, 9, 9, 9,eError,eError,#70-77
+ eError,eError,eError,eError,eError,eError,eError,eError,#78-7f
+ eError,eError,eError,eError,eError, 9,eError,eError,#80-87
+ eError,eError,eError,eError,eError,eError,eError,eError,#88-8f
+ eError,eError, 12, 12, 12, 12,eError,eError,#90-97
+ eError,eError,eError,eError,eError,eError,eError,eError,#98-9f
+ eError,eError,eError,eError,eError, 12,eError,eError,#a0-a7
+ eError,eError,eError,eError,eError,eError,eError,eError,#a8-af
+ eError,eError, 12, 12, 12,eError,eError,eError,#b0-b7
+ eError,eError,eError,eError,eError,eError,eError,eError,#b8-bf
+ eError,eError,eStart,eStart,eStart,eStart,eError,eError,#c0-c7
+ eError,eError,eError,eError,eError,eError,eError,eError #c8-cf
+)
+
+UTF8CharLenTable = (0, 1, 0, 0, 0, 0, 2, 3, 3, 3, 4, 4, 5, 5, 6, 6)
+
+UTF8SMModel = {'classTable': UTF8_cls,
+ 'classFactor': 16,
+ 'stateTable': UTF8_st,
+ 'charLenTable': UTF8CharLenTable,
+ 'name': 'UTF-8'}
+
+# flake8: noqa
-######################## BEGIN LICENSE BLOCK ########################\r
-# The Original Code is Mozilla Universal charset detector code.\r
-#\r
-# The Initial Developer of the Original Code is\r
-# Netscape Communications Corporation.\r
-# Portions created by the Initial Developer are Copyright (C) 2001\r
-# the Initial Developer. All Rights Reserved.\r
-#\r
-# Contributor(s):\r
-# Mark Pilgrim - port to Python\r
-# Shy Shalom - original C code\r
-#\r
-# This library is free software; you can redistribute it and/or\r
-# modify it under the terms of the GNU Lesser General Public\r
-# License as published by the Free Software Foundation; either\r
-# version 2.1 of the License, or (at your option) any later version.\r
-#\r
-# This library is distributed in the hope that it will be useful,\r
-# but WITHOUT ANY WARRANTY; without even the implied warranty of\r
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU\r
-# Lesser General Public License for more details.\r
-#\r
-# You should have received a copy of the GNU Lesser General Public\r
-# License along with this library; if not, write to the Free Software\r
-# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA\r
-# 02110-1301 USA\r
-######################### END LICENSE BLOCK #########################\r
-\r
-import sys\r
-from . import constants\r
-from .charsetprober import CharSetProber\r
-from .compat import wrap_ord\r
-\r
-SAMPLE_SIZE = 64\r
-SB_ENOUGH_REL_THRESHOLD = 1024\r
-POSITIVE_SHORTCUT_THRESHOLD = 0.95\r
-NEGATIVE_SHORTCUT_THRESHOLD = 0.05\r
-SYMBOL_CAT_ORDER = 250\r
-NUMBER_OF_SEQ_CAT = 4\r
-POSITIVE_CAT = NUMBER_OF_SEQ_CAT - 1\r
-#NEGATIVE_CAT = 0\r
-\r
-\r
-class SingleByteCharSetProber(CharSetProber):\r
- def __init__(self, model, reversed=False, nameProber=None):\r
- CharSetProber.__init__(self)\r
- self._mModel = model\r
- # TRUE if we need to reverse every pair in the model lookup\r
- self._mReversed = reversed\r
- # Optional auxiliary prober for name decision\r
- self._mNameProber = nameProber\r
- self.reset()\r
-\r
- def reset(self):\r
- CharSetProber.reset(self)\r
- # char order of last character\r
- self._mLastOrder = 255\r
- self._mSeqCounters = [0] * NUMBER_OF_SEQ_CAT\r
- self._mTotalSeqs = 0\r
- self._mTotalChar = 0\r
- # characters that fall in our sampling range\r
- self._mFreqChar = 0\r
-\r
- def get_charset_name(self):\r
- if self._mNameProber:\r
- return self._mNameProber.get_charset_name()\r
- else:\r
- return self._mModel['charsetName']\r
-\r
- def feed(self, aBuf):\r
- if not self._mModel['keepEnglishLetter']:\r
- aBuf = self.filter_without_english_letters(aBuf)\r
- aLen = len(aBuf)\r
- if not aLen:\r
- return self.get_state()\r
- for c in aBuf:\r
- order = self._mModel['charToOrderMap'][wrap_ord(c)]\r
- if order < SYMBOL_CAT_ORDER:\r
- self._mTotalChar += 1\r
- if order < SAMPLE_SIZE:\r
- self._mFreqChar += 1\r
- if self._mLastOrder < SAMPLE_SIZE:\r
- self._mTotalSeqs += 1\r
- if not self._mReversed:\r
- i = (self._mLastOrder * SAMPLE_SIZE) + order\r
- model = self._mModel['precedenceMatrix'][i]\r
- else: # reverse the order of the letters in the lookup\r
- i = (order * SAMPLE_SIZE) + self._mLastOrder\r
- model = self._mModel['precedenceMatrix'][i]\r
- self._mSeqCounters[model] += 1\r
- self._mLastOrder = order\r
-\r
- if self.get_state() == constants.eDetecting:\r
- if self._mTotalSeqs > SB_ENOUGH_REL_THRESHOLD:\r
- cf = self.get_confidence()\r
- if cf > POSITIVE_SHORTCUT_THRESHOLD:\r
- if constants._debug:\r
- sys.stderr.write('%s confidence = %s, we have a'\r
- 'winner\n' %\r
- (self._mModel['charsetName'], cf))\r
- self._mState = constants.eFoundIt\r
- elif cf < NEGATIVE_SHORTCUT_THRESHOLD:\r
- if constants._debug:\r
- sys.stderr.write('%s confidence = %s, below negative'\r
- 'shortcut threshhold %s\n' %\r
- (self._mModel['charsetName'], cf,\r
- NEGATIVE_SHORTCUT_THRESHOLD))\r
- self._mState = constants.eNotMe\r
-\r
- return self.get_state()\r
-\r
- def get_confidence(self):\r
- r = 0.01\r
- if self._mTotalSeqs > 0:\r
- r = ((1.0 * self._mSeqCounters[POSITIVE_CAT]) / self._mTotalSeqs\r
- / self._mModel['mTypicalPositiveRatio'])\r
- r = r * self._mFreqChar / self._mTotalChar\r
- if r >= 1.0:\r
- r = 0.99\r
- return r\r
+######################## BEGIN LICENSE BLOCK ########################
+# The Original Code is Mozilla Universal charset detector code.
+#
+# The Initial Developer of the Original Code is
+# Netscape Communications Corporation.
+# Portions created by the Initial Developer are Copyright (C) 2001
+# the Initial Developer. All Rights Reserved.
+#
+# Contributor(s):
+# Mark Pilgrim - port to Python
+# Shy Shalom - original C code
+#
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
+# 02110-1301 USA
+######################### END LICENSE BLOCK #########################
+
+import sys
+from . import constants
+from .charsetprober import CharSetProber
+from .compat import wrap_ord
+
+SAMPLE_SIZE = 64
+SB_ENOUGH_REL_THRESHOLD = 1024
+POSITIVE_SHORTCUT_THRESHOLD = 0.95
+NEGATIVE_SHORTCUT_THRESHOLD = 0.05
+SYMBOL_CAT_ORDER = 250
+NUMBER_OF_SEQ_CAT = 4
+POSITIVE_CAT = NUMBER_OF_SEQ_CAT - 1
+#NEGATIVE_CAT = 0
+
+
+class SingleByteCharSetProber(CharSetProber):
+ def __init__(self, model, reversed=False, nameProber=None):
+ CharSetProber.__init__(self)
+ self._mModel = model
+ # TRUE if we need to reverse every pair in the model lookup
+ self._mReversed = reversed
+ # Optional auxiliary prober for name decision
+ self._mNameProber = nameProber
+ self.reset()
+
+ def reset(self):
+ CharSetProber.reset(self)
+ # char order of last character
+ self._mLastOrder = 255
+ self._mSeqCounters = [0] * NUMBER_OF_SEQ_CAT
+ self._mTotalSeqs = 0
+ self._mTotalChar = 0
+ # characters that fall in our sampling range
+ self._mFreqChar = 0
+
+ def get_charset_name(self):
+ if self._mNameProber:
+ return self._mNameProber.get_charset_name()
+ else:
+ return self._mModel['charsetName']
+
+ def feed(self, aBuf):
+ if not self._mModel['keepEnglishLetter']:
+ aBuf = self.filter_without_english_letters(aBuf)
+ aLen = len(aBuf)
+ if not aLen:
+ return self.get_state()
+ for c in aBuf:
+ order = self._mModel['charToOrderMap'][wrap_ord(c)]
+ if order < SYMBOL_CAT_ORDER:
+ self._mTotalChar += 1
+ if order < SAMPLE_SIZE:
+ self._mFreqChar += 1
+ if self._mLastOrder < SAMPLE_SIZE:
+ self._mTotalSeqs += 1
+ if not self._mReversed:
+ i = (self._mLastOrder * SAMPLE_SIZE) + order
+ model = self._mModel['precedenceMatrix'][i]
+ else: # reverse the order of the letters in the lookup
+ i = (order * SAMPLE_SIZE) + self._mLastOrder
+ model = self._mModel['precedenceMatrix'][i]
+ self._mSeqCounters[model] += 1
+ self._mLastOrder = order
+
+ if self.get_state() == constants.eDetecting:
+ if self._mTotalSeqs > SB_ENOUGH_REL_THRESHOLD:
+ cf = self.get_confidence()
+ if cf > POSITIVE_SHORTCUT_THRESHOLD:
+ if constants._debug:
+ sys.stderr.write('%s confidence = %s, we have a'
+ 'winner\n' %
+ (self._mModel['charsetName'], cf))
+ self._mState = constants.eFoundIt
+ elif cf < NEGATIVE_SHORTCUT_THRESHOLD:
+ if constants._debug:
+ sys.stderr.write('%s confidence = %s, below negative'
+ 'shortcut threshhold %s\n' %
+ (self._mModel['charsetName'], cf,
+ NEGATIVE_SHORTCUT_THRESHOLD))
+ self._mState = constants.eNotMe
+
+ return self.get_state()
+
+ def get_confidence(self):
+ r = 0.01
+ if self._mTotalSeqs > 0:
+ r = ((1.0 * self._mSeqCounters[POSITIVE_CAT]) / self._mTotalSeqs
+ / self._mModel['mTypicalPositiveRatio'])
+ r = r * self._mFreqChar / self._mTotalChar
+ if r >= 1.0:
+ r = 0.99
+ return r
-######################## BEGIN LICENSE BLOCK ########################\r
-# The Original Code is Mozilla Universal charset detector code.\r
-#\r
-# The Initial Developer of the Original Code is\r
-# Netscape Communications Corporation.\r
-# Portions created by the Initial Developer are Copyright (C) 2001\r
-# the Initial Developer. All Rights Reserved.\r
-#\r
-# Contributor(s):\r
-# Mark Pilgrim - port to Python\r
-# Shy Shalom - original C code\r
-#\r
-# This library is free software; you can redistribute it and/or\r
-# modify it under the terms of the GNU Lesser General Public\r
-# License as published by the Free Software Foundation; either\r
-# version 2.1 of the License, or (at your option) any later version.\r
-#\r
-# This library is distributed in the hope that it will be useful,\r
-# but WITHOUT ANY WARRANTY; without even the implied warranty of\r
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU\r
-# Lesser General Public License for more details.\r
-#\r
-# You should have received a copy of the GNU Lesser General Public\r
-# License along with this library; if not, write to the Free Software\r
-# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA\r
-# 02110-1301 USA\r
-######################### END LICENSE BLOCK #########################\r
-\r
-from .charsetgroupprober import CharSetGroupProber\r
-from .sbcharsetprober import SingleByteCharSetProber\r
-from .langcyrillicmodel import (Win1251CyrillicModel, Koi8rModel,\r
- Latin5CyrillicModel, MacCyrillicModel,\r
- Ibm866Model, Ibm855Model)\r
-from .langgreekmodel import Latin7GreekModel, Win1253GreekModel\r
-from .langbulgarianmodel import Latin5BulgarianModel, Win1251BulgarianModel\r
-from .langhungarianmodel import Latin2HungarianModel, Win1250HungarianModel\r
-from .langthaimodel import TIS620ThaiModel\r
-from .langhebrewmodel import Win1255HebrewModel\r
-from .hebrewprober import HebrewProber\r
-\r
-\r
-class SBCSGroupProber(CharSetGroupProber):\r
- def __init__(self):\r
- CharSetGroupProber.__init__(self)\r
- self._mProbers = [\r
- SingleByteCharSetProber(Win1251CyrillicModel),\r
- SingleByteCharSetProber(Koi8rModel),\r
- SingleByteCharSetProber(Latin5CyrillicModel),\r
- SingleByteCharSetProber(MacCyrillicModel),\r
- SingleByteCharSetProber(Ibm866Model),\r
- SingleByteCharSetProber(Ibm855Model),\r
- SingleByteCharSetProber(Latin7GreekModel),\r
- SingleByteCharSetProber(Win1253GreekModel),\r
- SingleByteCharSetProber(Latin5BulgarianModel),\r
- SingleByteCharSetProber(Win1251BulgarianModel),\r
- SingleByteCharSetProber(Latin2HungarianModel),\r
- SingleByteCharSetProber(Win1250HungarianModel),\r
- SingleByteCharSetProber(TIS620ThaiModel),\r
- ]\r
- hebrewProber = HebrewProber()\r
- logicalHebrewProber = SingleByteCharSetProber(Win1255HebrewModel,\r
- False, hebrewProber)\r
- visualHebrewProber = SingleByteCharSetProber(Win1255HebrewModel, True,\r
- hebrewProber)\r
- hebrewProber.set_model_probers(logicalHebrewProber, visualHebrewProber)\r
- self._mProbers.extend([hebrewProber, logicalHebrewProber,\r
- visualHebrewProber])\r
-\r
- self.reset()\r
+######################## BEGIN LICENSE BLOCK ########################
+# The Original Code is Mozilla Universal charset detector code.
+#
+# The Initial Developer of the Original Code is
+# Netscape Communications Corporation.
+# Portions created by the Initial Developer are Copyright (C) 2001
+# the Initial Developer. All Rights Reserved.
+#
+# Contributor(s):
+# Mark Pilgrim - port to Python
+# Shy Shalom - original C code
+#
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
+# 02110-1301 USA
+######################### END LICENSE BLOCK #########################
+
+from .charsetgroupprober import CharSetGroupProber
+from .sbcharsetprober import SingleByteCharSetProber
+from .langcyrillicmodel import (Win1251CyrillicModel, Koi8rModel,
+ Latin5CyrillicModel, MacCyrillicModel,
+ Ibm866Model, Ibm855Model)
+from .langgreekmodel import Latin7GreekModel, Win1253GreekModel
+from .langbulgarianmodel import Latin5BulgarianModel, Win1251BulgarianModel
+from .langhungarianmodel import Latin2HungarianModel, Win1250HungarianModel
+from .langthaimodel import TIS620ThaiModel
+from .langhebrewmodel import Win1255HebrewModel
+from .hebrewprober import HebrewProber
+
+
+class SBCSGroupProber(CharSetGroupProber):
+ def __init__(self):
+ CharSetGroupProber.__init__(self)
+ self._mProbers = [
+ SingleByteCharSetProber(Win1251CyrillicModel),
+ SingleByteCharSetProber(Koi8rModel),
+ SingleByteCharSetProber(Latin5CyrillicModel),
+ SingleByteCharSetProber(MacCyrillicModel),
+ SingleByteCharSetProber(Ibm866Model),
+ SingleByteCharSetProber(Ibm855Model),
+ SingleByteCharSetProber(Latin7GreekModel),
+ SingleByteCharSetProber(Win1253GreekModel),
+ SingleByteCharSetProber(Latin5BulgarianModel),
+ SingleByteCharSetProber(Win1251BulgarianModel),
+ SingleByteCharSetProber(Latin2HungarianModel),
+ SingleByteCharSetProber(Win1250HungarianModel),
+ SingleByteCharSetProber(TIS620ThaiModel),
+ ]
+ hebrewProber = HebrewProber()
+ logicalHebrewProber = SingleByteCharSetProber(Win1255HebrewModel,
+ False, hebrewProber)
+ visualHebrewProber = SingleByteCharSetProber(Win1255HebrewModel, True,
+ hebrewProber)
+ hebrewProber.set_model_probers(logicalHebrewProber, visualHebrewProber)
+ self._mProbers.extend([hebrewProber, logicalHebrewProber,
+ visualHebrewProber])
+
+ self.reset()
-######################## BEGIN LICENSE BLOCK ########################\r
-# The Original Code is mozilla.org code.\r
-#\r
-# The Initial Developer of the Original Code is\r
-# Netscape Communications Corporation.\r
-# Portions created by the Initial Developer are Copyright (C) 1998\r
-# the Initial Developer. All Rights Reserved.\r
-#\r
-# Contributor(s):\r
-# Mark Pilgrim - port to Python\r
-#\r
-# This library is free software; you can redistribute it and/or\r
-# modify it under the terms of the GNU Lesser General Public\r
-# License as published by the Free Software Foundation; either\r
-# version 2.1 of the License, or (at your option) any later version.\r
-#\r
-# This library is distributed in the hope that it will be useful,\r
-# but WITHOUT ANY WARRANTY; without even the implied warranty of\r
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU\r
-# Lesser General Public License for more details.\r
-#\r
-# You should have received a copy of the GNU Lesser General Public\r
-# License along with this library; if not, write to the Free Software\r
-# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA\r
-# 02110-1301 USA\r
-######################### END LICENSE BLOCK #########################\r
-\r
-import sys\r
-from .mbcharsetprober import MultiByteCharSetProber\r
-from .codingstatemachine import CodingStateMachine\r
-from .chardistribution import SJISDistributionAnalysis\r
-from .jpcntx import SJISContextAnalysis\r
-from .mbcssm import SJISSMModel\r
-from . import constants\r
-\r
-\r
-class SJISProber(MultiByteCharSetProber):\r
- def __init__(self):\r
- MultiByteCharSetProber.__init__(self)\r
- self._mCodingSM = CodingStateMachine(SJISSMModel)\r
- self._mDistributionAnalyzer = SJISDistributionAnalysis()\r
- self._mContextAnalyzer = SJISContextAnalysis()\r
- self.reset()\r
-\r
- def reset(self):\r
- MultiByteCharSetProber.reset(self)\r
- self._mContextAnalyzer.reset()\r
-\r
- def get_charset_name(self):\r
- return "SHIFT_JIS"\r
-\r
- def feed(self, aBuf):\r
- aLen = len(aBuf)\r
- for i in range(0, aLen):\r
- codingState = self._mCodingSM.next_state(aBuf[i])\r
- if codingState == constants.eError:\r
- if constants._debug:\r
- sys.stderr.write(self.get_charset_name()\r
- + ' prober hit error at byte ' + str(i)\r
- + '\n')\r
- self._mState = constants.eNotMe\r
- break\r
- elif codingState == constants.eItsMe:\r
- self._mState = constants.eFoundIt\r
- break\r
- elif codingState == constants.eStart:\r
- charLen = self._mCodingSM.get_current_charlen()\r
- if i == 0:\r
- self._mLastChar[1] = aBuf[0]\r
- self._mContextAnalyzer.feed(self._mLastChar[2 - charLen:],\r
- charLen)\r
- self._mDistributionAnalyzer.feed(self._mLastChar, charLen)\r
- else:\r
- self._mContextAnalyzer.feed(aBuf[i + 1 - charLen:i + 3\r
- - charLen], charLen)\r
- self._mDistributionAnalyzer.feed(aBuf[i - 1:i + 1],\r
- charLen)\r
-\r
- self._mLastChar[0] = aBuf[aLen - 1]\r
-\r
- if self.get_state() == constants.eDetecting:\r
- if (self._mContextAnalyzer.got_enough_data() and\r
- (self.get_confidence() > constants.SHORTCUT_THRESHOLD)):\r
- self._mState = constants.eFoundIt\r
-\r
- return self.get_state()\r
-\r
- def get_confidence(self):\r
- contxtCf = self._mContextAnalyzer.get_confidence()\r
- distribCf = self._mDistributionAnalyzer.get_confidence()\r
- return max(contxtCf, distribCf)\r
+######################## BEGIN LICENSE BLOCK ########################
+# The Original Code is mozilla.org code.
+#
+# The Initial Developer of the Original Code is
+# Netscape Communications Corporation.
+# Portions created by the Initial Developer are Copyright (C) 1998
+# the Initial Developer. All Rights Reserved.
+#
+# Contributor(s):
+# Mark Pilgrim - port to Python
+#
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
+# 02110-1301 USA
+######################### END LICENSE BLOCK #########################
+
+import sys
+from .mbcharsetprober import MultiByteCharSetProber
+from .codingstatemachine import CodingStateMachine
+from .chardistribution import SJISDistributionAnalysis
+from .jpcntx import SJISContextAnalysis
+from .mbcssm import SJISSMModel
+from . import constants
+
+
+class SJISProber(MultiByteCharSetProber):
+ def __init__(self):
+ MultiByteCharSetProber.__init__(self)
+ self._mCodingSM = CodingStateMachine(SJISSMModel)
+ self._mDistributionAnalyzer = SJISDistributionAnalysis()
+ self._mContextAnalyzer = SJISContextAnalysis()
+ self.reset()
+
+ def reset(self):
+ MultiByteCharSetProber.reset(self)
+ self._mContextAnalyzer.reset()
+
+ def get_charset_name(self):
+ return "SHIFT_JIS"
+
+ def feed(self, aBuf):
+ aLen = len(aBuf)
+ for i in range(0, aLen):
+ codingState = self._mCodingSM.next_state(aBuf[i])
+ if codingState == constants.eError:
+ if constants._debug:
+ sys.stderr.write(self.get_charset_name()
+ + ' prober hit error at byte ' + str(i)
+ + '\n')
+ self._mState = constants.eNotMe
+ break
+ elif codingState == constants.eItsMe:
+ self._mState = constants.eFoundIt
+ break
+ elif codingState == constants.eStart:
+ charLen = self._mCodingSM.get_current_charlen()
+ if i == 0:
+ self._mLastChar[1] = aBuf[0]
+ self._mContextAnalyzer.feed(self._mLastChar[2 - charLen:],
+ charLen)
+ self._mDistributionAnalyzer.feed(self._mLastChar, charLen)
+ else:
+ self._mContextAnalyzer.feed(aBuf[i + 1 - charLen:i + 3
+ - charLen], charLen)
+ self._mDistributionAnalyzer.feed(aBuf[i - 1:i + 1],
+ charLen)
+
+ self._mLastChar[0] = aBuf[aLen - 1]
+
+ if self.get_state() == constants.eDetecting:
+ if (self._mContextAnalyzer.got_enough_data() and
+ (self.get_confidence() > constants.SHORTCUT_THRESHOLD)):
+ self._mState = constants.eFoundIt
+
+ return self.get_state()
+
+ def get_confidence(self):
+ contxtCf = self._mContextAnalyzer.get_confidence()
+ distribCf = self._mDistributionAnalyzer.get_confidence()
+ return max(contxtCf, distribCf)
-######################## BEGIN LICENSE BLOCK ########################\r
-# The Original Code is Mozilla Universal charset detector code.\r
-#\r
-# The Initial Developer of the Original Code is\r
-# Netscape Communications Corporation.\r
-# Portions created by the Initial Developer are Copyright (C) 2001\r
-# the Initial Developer. All Rights Reserved.\r
-#\r
-# Contributor(s):\r
-# Mark Pilgrim - port to Python\r
-# Shy Shalom - original C code\r
-#\r
-# This library is free software; you can redistribute it and/or\r
-# modify it under the terms of the GNU Lesser General Public\r
-# License as published by the Free Software Foundation; either\r
-# version 2.1 of the License, or (at your option) any later version.\r
-#\r
-# This library is distributed in the hope that it will be useful,\r
-# but WITHOUT ANY WARRANTY; without even the implied warranty of\r
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU\r
-# Lesser General Public License for more details.\r
-#\r
-# You should have received a copy of the GNU Lesser General Public\r
-# License along with this library; if not, write to the Free Software\r
-# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA\r
-# 02110-1301 USA\r
-######################### END LICENSE BLOCK #########################\r
-\r
-from . import constants\r
-import sys\r
-import codecs\r
-from .latin1prober import Latin1Prober # windows-1252\r
-from .mbcsgroupprober import MBCSGroupProber # multi-byte character sets\r
-from .sbcsgroupprober import SBCSGroupProber # single-byte character sets\r
-from .escprober import EscCharSetProber # ISO-2122, etc.\r
-import re\r
-\r
-MINIMUM_THRESHOLD = 0.20\r
-ePureAscii = 0\r
-eEscAscii = 1\r
-eHighbyte = 2\r
-\r
-\r
-class UniversalDetector:\r
- def __init__(self):\r
- self._highBitDetector = re.compile(b'[\x80-\xFF]')\r
- self._escDetector = re.compile(b'(\033|~{)')\r
- self._mEscCharSetProber = None\r
- self._mCharSetProbers = []\r
- self.reset()\r
-\r
- def reset(self):\r
- self.result = {'encoding': None, 'confidence': 0.0}\r
- self.done = False\r
- self._mStart = True\r
- self._mGotData = False\r
- self._mInputState = ePureAscii\r
- self._mLastChar = b''\r
- if self._mEscCharSetProber:\r
- self._mEscCharSetProber.reset()\r
- for prober in self._mCharSetProbers:\r
- prober.reset()\r
-\r
- def feed(self, aBuf):\r
- if self.done:\r
- return\r
-\r
- aLen = len(aBuf)\r
- if not aLen:\r
- return\r
-\r
- if not self._mGotData:\r
- # If the data starts with BOM, we know it is UTF\r
- if aBuf[:3] == codecs.BOM:\r
- # EF BB BF UTF-8 with BOM\r
- self.result = {'encoding': "UTF-8", 'confidence': 1.0}\r
- elif aBuf[:4] in (codecs.BOM_UTF32_LE, codecs.BOM_UTF32_BE):\r
- # FF FE 00 00 UTF-32, little-endian BOM\r
- # 00 00 FE FF UTF-32, big-endian BOM\r
- self.result = {'encoding': "UTF-32", 'confidence': 1.0}\r
- elif aBuf[:4] == b'\xFE\xFF\x00\x00':\r
- # FE FF 00 00 UCS-4, unusual octet order BOM (3412)\r
- self.result = {\r
- 'encoding': "X-ISO-10646-UCS-4-3412",\r
- 'confidence': 1.0\r
- }\r
- elif aBuf[:4] == b'\x00\x00\xFF\xFE':\r
- # 00 00 FF FE UCS-4, unusual octet order BOM (2143)\r
- self.result = {\r
- 'encoding': "X-ISO-10646-UCS-4-2143",\r
- 'confidence': 1.0\r
- }\r
- elif aBuf[:2] == codecs.BOM_LE or aBuf[:2] == codecs.BOM_BE:\r
- # FF FE UTF-16, little endian BOM\r
- # FE FF UTF-16, big endian BOM\r
- self.result = {'encoding': "UTF-16", 'confidence': 1.0}\r
-\r
- self._mGotData = True\r
- if self.result['encoding'] and (self.result['confidence'] > 0.0):\r
- self.done = True\r
- return\r
-\r
- if self._mInputState == ePureAscii:\r
- if self._highBitDetector.search(aBuf):\r
- self._mInputState = eHighbyte\r
- elif ((self._mInputState == ePureAscii) and\r
- self._escDetector.search(self._mLastChar + aBuf)):\r
- self._mInputState = eEscAscii\r
-\r
- self._mLastChar = aBuf[-1:]\r
-\r
- if self._mInputState == eEscAscii:\r
- if not self._mEscCharSetProber:\r
- self._mEscCharSetProber = EscCharSetProber()\r
- if self._mEscCharSetProber.feed(aBuf) == constants.eFoundIt:\r
- self.result = {\r
- 'encoding': self._mEscCharSetProber.get_charset_name(),\r
- 'confidence': self._mEscCharSetProber.get_confidence()\r
- }\r
- self.done = True\r
- elif self._mInputState == eHighbyte:\r
- if not self._mCharSetProbers:\r
- self._mCharSetProbers = [MBCSGroupProber(), SBCSGroupProber(),\r
- Latin1Prober()]\r
- for prober in self._mCharSetProbers:\r
- if prober.feed(aBuf) == constants.eFoundIt:\r
- self.result = {'encoding': prober.get_charset_name(),\r
- 'confidence': prober.get_confidence()}\r
- self.done = True\r
- break\r
-\r
- def close(self):\r
- if self.done:\r
- return\r
- if not self._mGotData:\r
- if constants._debug:\r
- sys.stderr.write('no data received!\n')\r
- return\r
- self.done = True\r
-\r
- if self._mInputState == ePureAscii:\r
- self.result = {'encoding': 'ascii', 'confidence': 1.0}\r
- return self.result\r
-\r
- if self._mInputState == eHighbyte:\r
- proberConfidence = None\r
- maxProberConfidence = 0.0\r
- maxProber = None\r
- for prober in self._mCharSetProbers:\r
- if not prober:\r
- continue\r
- proberConfidence = prober.get_confidence()\r
- if proberConfidence > maxProberConfidence:\r
- maxProberConfidence = proberConfidence\r
- maxProber = prober\r
- if maxProber and (maxProberConfidence > MINIMUM_THRESHOLD):\r
- self.result = {'encoding': maxProber.get_charset_name(),\r
- 'confidence': maxProber.get_confidence()}\r
- return self.result\r
-\r
- if constants._debug:\r
- sys.stderr.write('no probers hit minimum threshhold\n')\r
- for prober in self._mCharSetProbers[0].mProbers:\r
- if not prober:\r
- continue\r
- sys.stderr.write('%s confidence = %s\n' %\r
- (prober.get_charset_name(),\r
- prober.get_confidence()))\r
+######################## BEGIN LICENSE BLOCK ########################
+# The Original Code is Mozilla Universal charset detector code.
+#
+# The Initial Developer of the Original Code is
+# Netscape Communications Corporation.
+# Portions created by the Initial Developer are Copyright (C) 2001
+# the Initial Developer. All Rights Reserved.
+#
+# Contributor(s):
+# Mark Pilgrim - port to Python
+# Shy Shalom - original C code
+#
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
+# 02110-1301 USA
+######################### END LICENSE BLOCK #########################
+
+from . import constants
+import sys
+import codecs
+from .latin1prober import Latin1Prober # windows-1252
+from .mbcsgroupprober import MBCSGroupProber # multi-byte character sets
+from .sbcsgroupprober import SBCSGroupProber # single-byte character sets
+from .escprober import EscCharSetProber # ISO-2122, etc.
+import re
+
+MINIMUM_THRESHOLD = 0.20
+ePureAscii = 0
+eEscAscii = 1
+eHighbyte = 2
+
+
+class UniversalDetector:
+ def __init__(self):
+ self._highBitDetector = re.compile(b'[\x80-\xFF]')
+ self._escDetector = re.compile(b'(\033|~{)')
+ self._mEscCharSetProber = None
+ self._mCharSetProbers = []
+ self.reset()
+
+ def reset(self):
+ self.result = {'encoding': None, 'confidence': 0.0}
+ self.done = False
+ self._mStart = True
+ self._mGotData = False
+ self._mInputState = ePureAscii
+ self._mLastChar = b''
+ if self._mEscCharSetProber:
+ self._mEscCharSetProber.reset()
+ for prober in self._mCharSetProbers:
+ prober.reset()
+
+ def feed(self, aBuf):
+ if self.done:
+ return
+
+ aLen = len(aBuf)
+ if not aLen:
+ return
+
+ if not self._mGotData:
+ # If the data starts with BOM, we know it is UTF
+ if aBuf[:3] == codecs.BOM:
+ # EF BB BF UTF-8 with BOM
+ self.result = {'encoding': "UTF-8", 'confidence': 1.0}
+ elif aBuf[:4] == codecs.BOM_UTF32_LE:
+ # FF FE 00 00 UTF-32, little-endian BOM
+ self.result = {'encoding': "UTF-32LE", 'confidence': 1.0}
+ elif aBuf[:4] == codecs.BOM_UTF32_BE:
+ # 00 00 FE FF UTF-32, big-endian BOM
+ self.result = {'encoding': "UTF-32BE", 'confidence': 1.0}
+ elif aBuf[:4] == b'\xFE\xFF\x00\x00':
+ # FE FF 00 00 UCS-4, unusual octet order BOM (3412)
+ self.result = {
+ 'encoding': "X-ISO-10646-UCS-4-3412",
+ 'confidence': 1.0
+ }
+ elif aBuf[:4] == b'\x00\x00\xFF\xFE':
+ # 00 00 FF FE UCS-4, unusual octet order BOM (2143)
+ self.result = {
+ 'encoding': "X-ISO-10646-UCS-4-2143",
+ 'confidence': 1.0
+ }
+ elif aBuf[:2] == codecs.BOM_LE:
+ # FF FE UTF-16, little endian BOM
+ self.result = {'encoding': "UTF-16LE", 'confidence': 1.0}
+ elif aBuf[:2] == codecs.BOM_BE:
+ # FE FF UTF-16, big endian BOM
+ self.result = {'encoding': "UTF-16BE", 'confidence': 1.0}
+
+ self._mGotData = True
+ if self.result['encoding'] and (self.result['confidence'] > 0.0):
+ self.done = True
+ return
+
+ if self._mInputState == ePureAscii:
+ if self._highBitDetector.search(aBuf):
+ self._mInputState = eHighbyte
+ elif ((self._mInputState == ePureAscii) and
+ self._escDetector.search(self._mLastChar + aBuf)):
+ self._mInputState = eEscAscii
+
+ self._mLastChar = aBuf[-1:]
+
+ if self._mInputState == eEscAscii:
+ if not self._mEscCharSetProber:
+ self._mEscCharSetProber = EscCharSetProber()
+ if self._mEscCharSetProber.feed(aBuf) == constants.eFoundIt:
+ self.result = {'encoding': self._mEscCharSetProber.get_charset_name(),
+ 'confidence': self._mEscCharSetProber.get_confidence()}
+ self.done = True
+ elif self._mInputState == eHighbyte:
+ if not self._mCharSetProbers:
+ self._mCharSetProbers = [MBCSGroupProber(), SBCSGroupProber(),
+ Latin1Prober()]
+ for prober in self._mCharSetProbers:
+ if prober.feed(aBuf) == constants.eFoundIt:
+ self.result = {'encoding': prober.get_charset_name(),
+ 'confidence': prober.get_confidence()}
+ self.done = True
+ break
+
+ def close(self):
+ if self.done:
+ return
+ if not self._mGotData:
+ if constants._debug:
+ sys.stderr.write('no data received!\n')
+ return
+ self.done = True
+
+ if self._mInputState == ePureAscii:
+ self.result = {'encoding': 'ascii', 'confidence': 1.0}
+ return self.result
+
+ if self._mInputState == eHighbyte:
+ proberConfidence = None
+ maxProberConfidence = 0.0
+ maxProber = None
+ for prober in self._mCharSetProbers:
+ if not prober:
+ continue
+ proberConfidence = prober.get_confidence()
+ if proberConfidence > maxProberConfidence:
+ maxProberConfidence = proberConfidence
+ maxProber = prober
+ if maxProber and (maxProberConfidence > MINIMUM_THRESHOLD):
+ self.result = {'encoding': maxProber.get_charset_name(),
+ 'confidence': maxProber.get_confidence()}
+ return self.result
+
+ if constants._debug:
+ sys.stderr.write('no probers hit minimum threshhold\n')
+ for prober in self._mCharSetProbers[0].mProbers:
+ if not prober:
+ continue
+ sys.stderr.write('%s confidence = %s\n' %
+ (prober.get_charset_name(),
+ prober.get_confidence()))
-######################## BEGIN LICENSE BLOCK ########################\r
-# The Original Code is mozilla.org code.\r
-#\r
-# The Initial Developer of the Original Code is\r
-# Netscape Communications Corporation.\r
-# Portions created by the Initial Developer are Copyright (C) 1998\r
-# the Initial Developer. All Rights Reserved.\r
-#\r
-# Contributor(s):\r
-# Mark Pilgrim - port to Python\r
-#\r
-# This library is free software; you can redistribute it and/or\r
-# modify it under the terms of the GNU Lesser General Public\r
-# License as published by the Free Software Foundation; either\r
-# version 2.1 of the License, or (at your option) any later version.\r
-#\r
-# This library is distributed in the hope that it will be useful,\r
-# but WITHOUT ANY WARRANTY; without even the implied warranty of\r
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU\r
-# Lesser General Public License for more details.\r
-#\r
-# You should have received a copy of the GNU Lesser General Public\r
-# License along with this library; if not, write to the Free Software\r
-# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA\r
-# 02110-1301 USA\r
-######################### END LICENSE BLOCK #########################\r
-\r
-from . import constants\r
-from .charsetprober import CharSetProber\r
-from .codingstatemachine import CodingStateMachine\r
-from .mbcssm import UTF8SMModel\r
-\r
-ONE_CHAR_PROB = 0.5\r
-\r
-\r
-class UTF8Prober(CharSetProber):\r
- def __init__(self):\r
- CharSetProber.__init__(self)\r
- self._mCodingSM = CodingStateMachine(UTF8SMModel)\r
- self.reset()\r
-\r
- def reset(self):\r
- CharSetProber.reset(self)\r
- self._mCodingSM.reset()\r
- self._mNumOfMBChar = 0\r
-\r
- def get_charset_name(self):\r
- return "utf-8"\r
-\r
- def feed(self, aBuf):\r
- for c in aBuf:\r
- codingState = self._mCodingSM.next_state(c)\r
- if codingState == constants.eError:\r
- self._mState = constants.eNotMe\r
- break\r
- elif codingState == constants.eItsMe:\r
- self._mState = constants.eFoundIt\r
- break\r
- elif codingState == constants.eStart:\r
- if self._mCodingSM.get_current_charlen() >= 2:\r
- self._mNumOfMBChar += 1\r
-\r
- if self.get_state() == constants.eDetecting:\r
- if self.get_confidence() > constants.SHORTCUT_THRESHOLD:\r
- self._mState = constants.eFoundIt\r
-\r
- return self.get_state()\r
-\r
- def get_confidence(self):\r
- unlike = 0.99\r
- if self._mNumOfMBChar < 6:\r
- for i in range(0, self._mNumOfMBChar):\r
- unlike = unlike * ONE_CHAR_PROB\r
- return 1.0 - unlike\r
- else:\r
- return unlike\r
+######################## BEGIN LICENSE BLOCK ########################
+# The Original Code is mozilla.org code.
+#
+# The Initial Developer of the Original Code is
+# Netscape Communications Corporation.
+# Portions created by the Initial Developer are Copyright (C) 1998
+# the Initial Developer. All Rights Reserved.
+#
+# Contributor(s):
+# Mark Pilgrim - port to Python
+#
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
+# 02110-1301 USA
+######################### END LICENSE BLOCK #########################
+
+from . import constants
+from .charsetprober import CharSetProber
+from .codingstatemachine import CodingStateMachine
+from .mbcssm import UTF8SMModel
+
+ONE_CHAR_PROB = 0.5
+
+
+class UTF8Prober(CharSetProber):
+ def __init__(self):
+ CharSetProber.__init__(self)
+ self._mCodingSM = CodingStateMachine(UTF8SMModel)
+ self.reset()
+
+ def reset(self):
+ CharSetProber.reset(self)
+ self._mCodingSM.reset()
+ self._mNumOfMBChar = 0
+
+ def get_charset_name(self):
+ return "utf-8"
+
+ def feed(self, aBuf):
+ for c in aBuf:
+ codingState = self._mCodingSM.next_state(c)
+ if codingState == constants.eError:
+ self._mState = constants.eNotMe
+ break
+ elif codingState == constants.eItsMe:
+ self._mState = constants.eFoundIt
+ break
+ elif codingState == constants.eStart:
+ if self._mCodingSM.get_current_charlen() >= 2:
+ self._mNumOfMBChar += 1
+
+ if self.get_state() == constants.eDetecting:
+ if self.get_confidence() > constants.SHORTCUT_THRESHOLD:
+ self._mState = constants.eFoundIt
+
+ return self.get_state()
+
+ def get_confidence(self):
+ unlike = 0.99
+ if self._mNumOfMBChar < 6:
+ for i in range(0, self._mNumOfMBChar):
+ unlike = unlike * ONE_CHAR_PROB
+ return 1.0 - unlike
+ else:
+ return unlike
-# urllib3/__init__.py
-# Copyright 2008-2013 Andrey Petrov and contributors (see CONTRIBUTORS.txt)
-#
-# This module is part of urllib3 and is released under
-# the MIT License: http://www.opensource.org/licenses/mit-license.php
-
"""
urllib3 - Thread-safe connection pooling and re-using.
"""
from .filepost import encode_multipart_formdata
from .poolmanager import PoolManager, ProxyManager, proxy_from_url
from .response import HTTPResponse
-from .util import make_headers, get_host, Timeout
+from .util.request import make_headers
+from .util.url import get_host
+from .util.timeout import Timeout
+from .util.retry import Retry
# Set default logging handler to avoid "No handler found" warnings.
handler.setFormatter(logging.Formatter('%(asctime)s %(levelname)s %(message)s'))
logger.addHandler(handler)
logger.setLevel(level)
- logger.debug('Added an stderr logging handler to logger: %s' % __name__)
+ logger.debug('Added a stderr logging handler to logger: %s' % __name__)
return handler
# ... Clean up.
del NullHandler
+
+
+# Set security warning to only go off once by default.
+import warnings
+warnings.simplefilter('module', exceptions.SecurityWarning)
+
+def disable_warnings(category=exceptions.HTTPWarning):
+ """
+ Helper for quickly disabling all urllib3 warnings.
+ """
+ warnings.simplefilter('ignore', category)
-# urllib3/_collections.py
-# Copyright 2008-2013 Andrey Petrov and contributors (see CONTRIBUTORS.txt)
-#
-# This module is part of urllib3 and is released under
-# the MIT License: http://www.opensource.org/licenses/mit-license.php
+from collections import Mapping, MutableMapping
+try:
+ from threading import RLock
+except ImportError: # Platform-specific: No threads available
+ class RLock:
+ def __enter__(self):
+ pass
+
+ def __exit__(self, exc_type, exc_value, traceback):
+ pass
-from collections import MutableMapping
-from threading import RLock
try: # Python 2.7+
from collections import OrderedDict
except ImportError:
from .packages.ordered_dict import OrderedDict
+from .packages.six import itervalues
-__all__ = ['RecentlyUsedContainer']
+__all__ = ['RecentlyUsedContainer', 'HTTPHeaderDict']
_Null = object()
def keys(self):
with self.lock:
return self._container.keys()
+
+
+class HTTPHeaderDict(MutableMapping):
+ """
+ :param headers:
+ An iterable of field-value pairs. Must not contain multiple field names
+ when compared case-insensitively.
+
+ :param kwargs:
+ Additional field-value pairs to pass in to ``dict.update``.
+
+ A ``dict`` like container for storing HTTP Headers.
+
+ Field names are stored and compared case-insensitively in compliance with
+ RFC 7230. Iteration provides the first case-sensitive key seen for each
+ case-insensitive pair.
+
+ Using ``__setitem__`` syntax overwrites fields that compare equal
+ case-insensitively in order to maintain ``dict``'s api. For fields that
+ compare equal, instead create a new ``HTTPHeaderDict`` and use ``.add``
+ in a loop.
+
+ If multiple fields that are equal case-insensitively are passed to the
+ constructor or ``.update``, the behavior is undefined and some will be
+ lost.
+
+ >>> headers = HTTPHeaderDict()
+ >>> headers.add('Set-Cookie', 'foo=bar')
+ >>> headers.add('set-cookie', 'baz=quxx')
+ >>> headers['content-length'] = '7'
+ >>> headers['SET-cookie']
+ 'foo=bar, baz=quxx'
+ >>> headers['Content-Length']
+ '7'
+
+ If you want to access the raw headers with their original casing
+ for debugging purposes you can access the private ``._data`` attribute
+ which is a normal python ``dict`` that maps the case-insensitive key to a
+ list of tuples stored as (case-sensitive-original-name, value). Using the
+ structure from above as our example:
+
+ >>> headers._data
+ {'set-cookie': [('Set-Cookie', 'foo=bar'), ('set-cookie', 'baz=quxx')],
+ 'content-length': [('content-length', '7')]}
+ """
+
+ def __init__(self, headers=None, **kwargs):
+ self._data = {}
+ if headers is None:
+ headers = {}
+ self.update(headers, **kwargs)
+
+ def add(self, key, value):
+ """Adds a (name, value) pair, doesn't overwrite the value if it already
+ exists.
+
+ >>> headers = HTTPHeaderDict(foo='bar')
+ >>> headers.add('Foo', 'baz')
+ >>> headers['foo']
+ 'bar, baz'
+ """
+ self._data.setdefault(key.lower(), []).append((key, value))
+
+ def getlist(self, key):
+ """Returns a list of all the values for the named field. Returns an
+ empty list if the key doesn't exist."""
+ return self[key].split(', ') if key in self else []
+
+ def copy(self):
+ h = HTTPHeaderDict()
+ for key in self._data:
+ for rawkey, value in self._data[key]:
+ h.add(rawkey, value)
+ return h
+
+ def __eq__(self, other):
+ if not isinstance(other, Mapping):
+ return False
+ other = HTTPHeaderDict(other)
+ return dict((k1, self[k1]) for k1 in self._data) == \
+ dict((k2, other[k2]) for k2 in other._data)
+
+ def __getitem__(self, key):
+ values = self._data[key.lower()]
+ return ', '.join(value[1] for value in values)
+
+ def __setitem__(self, key, value):
+ self._data[key.lower()] = [(key, value)]
+
+ def __delitem__(self, key):
+ del self._data[key.lower()]
+
+ def __len__(self):
+ return len(self._data)
+
+ def __iter__(self):
+ for headers in itervalues(self._data):
+ yield headers[0][0]
+
+ def __repr__(self):
+ return '%s(%r)' % (self.__class__.__name__, dict(self.items()))
-# urllib3/connection.py
-# Copyright 2008-2013 Andrey Petrov and contributors (see CONTRIBUTORS.txt)
-#
-# This module is part of urllib3 and is released under
-# the MIT License: http://www.opensource.org/licenses/mit-license.php
-
+import datetime
+import sys
import socket
from socket import timeout as SocketTimeout
+import warnings
-try: # Python 3
- from http.client import HTTPConnection, HTTPException
+try: # Python 3
+ from http.client import HTTPConnection as _HTTPConnection, HTTPException
except ImportError:
- from httplib import HTTPConnection, HTTPException
+ from httplib import HTTPConnection as _HTTPConnection, HTTPException
+
class DummyConnection(object):
"Used to detect a failed ConnectionCls import."
pass
-try: # Compiled with SSL?
- ssl = None
+
+try: # Compiled with SSL?
HTTPSConnection = DummyConnection
+ import ssl
+ BaseSSLError = ssl.SSLError
+except (ImportError, AttributeError): # Platform-specific: No SSL.
+ ssl = None
class BaseSSLError(BaseException):
pass
- try: # Python 3
- from http.client import HTTPSConnection
- except ImportError:
- from httplib import HTTPSConnection
-
- import ssl
- BaseSSLError = ssl.SSLError
-
-except (ImportError, AttributeError): # Platform-specific: No SSL.
- pass
from .exceptions import (
ConnectTimeoutError,
+ SystemTimeWarning,
)
from .packages.ssl_match_hostname import match_hostname
-from .util import (
- assert_fingerprint,
+from .packages import six
+
+from .util.ssl_ import (
resolve_cert_reqs,
resolve_ssl_version,
ssl_wrap_socket,
+ assert_fingerprint,
)
+from .util import connection
+
+
+port_by_scheme = {
+ 'http': 80,
+ 'https': 443,
+}
+
+RECENT_DATE = datetime.date(2014, 1, 1)
+
+
+class HTTPConnection(_HTTPConnection, object):
+ """
+ Based on httplib.HTTPConnection but provides an extra constructor
+ backwards-compatibility layer between older and newer Pythons.
+
+ Additional keyword parameters are used to configure attributes of the connection.
+ Accepted parameters include:
+
+ - ``strict``: See the documentation on :class:`urllib3.connectionpool.HTTPConnectionPool`
+ - ``source_address``: Set the source address for the current connection.
+
+ .. note:: This is ignored for Python 2.6. It is only applied for 2.7 and 3.x
+
+ - ``socket_options``: Set specific options on the underlying socket. If not specified, then
+ defaults are loaded from ``HTTPConnection.default_socket_options`` which includes disabling
+ Nagle's algorithm (sets TCP_NODELAY to 1) unless the connection is behind a proxy.
+
+ For example, if you wish to enable TCP Keep Alive in addition to the defaults,
+ you might pass::
+
+ HTTPConnection.default_socket_options + [
+ (socket.SOL_SOCKET, socket.SO_KEEPALIVE, 1),
+ ]
+
+ Or you may want to disable the defaults by passing an empty list (e.g., ``[]``).
+ """
+
+ default_port = port_by_scheme['http']
+
+ #: Disable Nagle's algorithm by default.
+ #: ``[(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1)]``
+ default_socket_options = [(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1)]
+
+ #: Whether this connection verifies the host's certificate.
+ is_verified = False
+
+ def __init__(self, *args, **kw):
+ if six.PY3: # Python 3
+ kw.pop('strict', None)
+
+ # Pre-set source_address in case we have an older Python like 2.6.
+ self.source_address = kw.get('source_address')
+
+ if sys.version_info < (2, 7): # Python 2.6
+ # _HTTPConnection on Python 2.6 will balk at this keyword arg, but
+ # not newer versions. We can still use it when creating a
+ # connection though, so we pop it *after* we have saved it as
+ # self.source_address.
+ kw.pop('source_address', None)
+
+ #: The socket options provided by the user. If no options are
+ #: provided, we use the default options.
+ self.socket_options = kw.pop('socket_options', self.default_socket_options)
+
+ # Superclass also sets self.source_address in Python 2.7+.
+ _HTTPConnection.__init__(self, *args, **kw)
+
+ def _new_conn(self):
+ """ Establish a socket connection and set nodelay settings on it.
+
+ :return: New socket connection.
+ """
+ extra_kw = {}
+ if self.source_address:
+ extra_kw['source_address'] = self.source_address
+
+ if self.socket_options:
+ extra_kw['socket_options'] = self.socket_options
+
+ try:
+ conn = connection.create_connection(
+ (self.host, self.port), self.timeout, **extra_kw)
+
+ except SocketTimeout:
+ raise ConnectTimeoutError(
+ self, "Connection to %s timed out. (connect timeout=%s)" %
+ (self.host, self.timeout))
+
+ return conn
+
+ def _prepare_conn(self, conn):
+ self.sock = conn
+ # the _tunnel_host attribute was added in python 2.6.3 (via
+ # http://hg.python.org/cpython/rev/0f57b30a152f) so pythons 2.6(0-2) do
+ # not have them.
+ if getattr(self, '_tunnel_host', None):
+ # TODO: Fix tunnel so it doesn't depend on self.sock state.
+ self._tunnel()
+ # Mark this connection as not reusable
+ self.auto_open = 0
+
+ def connect(self):
+ conn = self._new_conn()
+ self._prepare_conn(conn)
+
+
+class HTTPSConnection(HTTPConnection):
+ default_port = port_by_scheme['https']
+
+ def __init__(self, host, port=None, key_file=None, cert_file=None,
+ strict=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, **kw):
+
+ HTTPConnection.__init__(self, host, port, strict=strict,
+ timeout=timeout, **kw)
+
+ self.key_file = key_file
+ self.cert_file = cert_file
+
+ # Required property for Google AppEngine 1.9.0 which otherwise causes
+ # HTTPS requests to go out as HTTP. (See Issue #356)
+ self._protocol = 'https'
+
+ def connect(self):
+ conn = self._new_conn()
+ self._prepare_conn(conn)
+ self.sock = ssl.wrap_socket(conn, self.key_file, self.cert_file)
+
+
class VerifiedHTTPSConnection(HTTPSConnection):
"""
Based on httplib.HTTPSConnection but wraps the socket with
cert_reqs = None
ca_certs = None
ssl_version = None
+ assert_fingerprint = None
def set_cert(self, key_file=None, cert_file=None,
cert_reqs=None, ca_certs=None,
def connect(self):
# Add certificate verification
- try:
- sock = socket.create_connection(
- address=(self.host, self.port),
- timeout=self.timeout,
- )
- except SocketTimeout:
- raise ConnectTimeoutError(
- self, "Connection to %s timed out. (connect timeout=%s)" %
- (self.host, self.timeout))
+ conn = self._new_conn()
resolved_cert_reqs = resolve_cert_reqs(self.cert_reqs)
resolved_ssl_version = resolve_ssl_version(self.ssl_version)
- if self._tunnel_host:
- self.sock = sock
+ hostname = self.host
+ if getattr(self, '_tunnel_host', None):
+ # _tunnel_host was added in Python 2.6.3
+ # (See: http://hg.python.org/cpython/rev/0f57b30a152f)
+
+ self.sock = conn
# Calls self._set_hostport(), so self.host is
# self._tunnel_host below.
self._tunnel()
+ # Mark this connection as not reusable
+ self.auto_open = 0
+
+ # Override the host with the one we're requesting data from.
+ hostname = self._tunnel_host
+
+ is_time_off = datetime.date.today() < RECENT_DATE
+ if is_time_off:
+ warnings.warn((
+ 'System time is way off (before {0}). This will probably '
+ 'lead to SSL verification errors').format(RECENT_DATE),
+ SystemTimeWarning
+ )
# Wrap socket using verification with the root certs in
# trusted_root_certs
- self.sock = ssl_wrap_socket(sock, self.key_file, self.cert_file,
+ self.sock = ssl_wrap_socket(conn, self.key_file, self.cert_file,
cert_reqs=resolved_cert_reqs,
ca_certs=self.ca_certs,
- server_hostname=self.host,
+ server_hostname=hostname,
ssl_version=resolved_ssl_version)
- if resolved_cert_reqs != ssl.CERT_NONE:
- if self.assert_fingerprint:
- assert_fingerprint(self.sock.getpeercert(binary_form=True),
- self.assert_fingerprint)
- elif self.assert_hostname is not False:
- match_hostname(self.sock.getpeercert(),
- self.assert_hostname or self.host)
+ if self.assert_fingerprint:
+ assert_fingerprint(self.sock.getpeercert(binary_form=True),
+ self.assert_fingerprint)
+ elif resolved_cert_reqs != ssl.CERT_NONE \
+ and self.assert_hostname is not False:
+ match_hostname(self.sock.getpeercert(),
+ self.assert_hostname or hostname)
+
+ self.is_verified = (resolved_cert_reqs == ssl.CERT_REQUIRED
+ or self.assert_fingerprint is not None)
if ssl:
+ # Make a copy for testing.
+ UnverifiedHTTPSConnection = HTTPSConnection
HTTPSConnection = VerifiedHTTPSConnection
-# urllib3/connectionpool.py
-# Copyright 2008-2013 Andrey Petrov and contributors (see CONTRIBUTORS.txt)
-#
-# This module is part of urllib3 and is released under
-# the MIT License: http://www.opensource.org/licenses/mit-license.php
-
import errno
import logging
+import sys
+import warnings
from socket import error as SocketError, timeout as SocketTimeout
import socket
-try: # Python 3
+try: # Python 3
from queue import LifoQueue, Empty, Full
except ImportError:
from Queue import LifoQueue, Empty, Full
from .exceptions import (
ClosedPoolError,
- ConnectTimeoutError,
+ ProtocolError,
EmptyPoolError,
HostChangedError,
+ LocationValueError,
MaxRetryError,
+ ProxyError,
+ ReadTimeoutError,
SSLError,
TimeoutError,
- ReadTimeoutError,
- ProxyError,
+ InsecureRequestWarning,
)
from .packages.ssl_match_hostname import CertificateError
from .packages import six
from .connection import (
+ port_by_scheme,
DummyConnection,
HTTPConnection, HTTPSConnection, VerifiedHTTPSConnection,
HTTPException, BaseSSLError,
)
from .request import RequestMethods
from .response import HTTPResponse
-from .util import (
- assert_fingerprint,
- get_host,
- is_connection_dropped,
- Timeout,
-)
+
+from .util.connection import is_connection_dropped
+from .util.retry import Retry
+from .util.timeout import Timeout
+from .util.url import get_host
xrange = six.moves.xrange
_Default = object()
-port_by_scheme = {
- 'http': 80,
- 'https': 443,
-}
-
## Pool objects
-
class ConnectionPool(object):
"""
Base class for all connection pools, such as
QueueCls = LifoQueue
def __init__(self, host, port=None):
- # httplib doesn't like it when we include brackets in ipv6 addresses
- host = host.strip('[]')
+ if not host:
+ raise LocationValueError("No host specified.")
- self.host = host
+ # httplib doesn't like it when we include brackets in ipv6 addresses
+ self.host = host.strip('[]')
self.port = port
def __str__(self):
# This is taken from http://hg.python.org/cpython/file/7aaba721ebc0/Lib/socket.py#l252
_blocking_errnos = set([errno.EAGAIN, errno.EWOULDBLOCK])
+
class HTTPConnectionPool(ConnectionPool, RequestMethods):
"""
Thread-safe connection pool for one host.
Headers to include with all requests, unless other headers are given
explicitly.
+ :param retries:
+ Retry configuration to use by default with requests in this pool.
+
:param _proxy:
Parsed proxy URL, should not be used directly, instead, see
:class:`urllib3.connectionpool.ProxyManager`"
:param _proxy_headers:
A dictionary with proxy headers, should not be used directly,
instead, see :class:`urllib3.connectionpool.ProxyManager`"
+
+ :param \**conn_kw:
+ Additional parameters are used to create fresh :class:`urllib3.connection.HTTPConnection`,
+ :class:`urllib3.connection.HTTPSConnection` instances.
"""
scheme = 'http'
def __init__(self, host, port=None, strict=False,
timeout=Timeout.DEFAULT_TIMEOUT, maxsize=1, block=False,
- headers=None, _proxy=None, _proxy_headers=None):
+ headers=None, retries=None,
+ _proxy=None, _proxy_headers=None,
+ **conn_kw):
ConnectionPool.__init__(self, host, port)
RequestMethods.__init__(self, headers)
self.strict = strict
- # This is for backwards compatibility and can be removed once a timeout
- # can only be set to a Timeout object
if not isinstance(timeout, Timeout):
timeout = Timeout.from_float(timeout)
+ if retries is None:
+ retries = Retry.DEFAULT
+
self.timeout = timeout
+ self.retries = retries
self.pool = self.QueueCls(maxsize)
self.block = block
# These are mostly for testing and debugging purposes.
self.num_connections = 0
self.num_requests = 0
+ self.conn_kw = conn_kw
+
+ if self.proxy:
+ # Enable Nagle's algorithm for proxies, to avoid packet fragmentation.
+ # We cannot know if the user has added default socket options, so we cannot replace the
+ # list.
+ self.conn_kw.setdefault('socket_options', [])
def _new_conn(self):
"""
- Return a fresh :class:`httplib.HTTPConnection`.
+ Return a fresh :class:`HTTPConnection`.
"""
self.num_connections += 1
log.info("Starting new HTTP connection (%d): %s" %
(self.num_connections, self.host))
- extra_params = {}
- if not six.PY3: # Python 2
- extra_params['strict'] = self.strict
-
- return self.ConnectionCls(host=self.host, port=self.port,
+ conn = self.ConnectionCls(host=self.host, port=self.port,
timeout=self.timeout.connect_timeout,
- **extra_params)
+ strict=self.strict, **self.conn_kw)
+ return conn
def _get_conn(self, timeout=None):
"""
try:
conn = self.pool.get(block=self.block, timeout=timeout)
- except AttributeError: # self.pool is None
+ except AttributeError: # self.pool is None
raise ClosedPoolError(self, "Pool is closed.")
except Empty:
if conn and is_connection_dropped(conn):
log.info("Resetting dropped connection: %s" % self.host)
conn.close()
+ if getattr(conn, 'auto_open', 1) == 0:
+ # This is a proxied connection that has been mutated by
+ # httplib._tunnel() and cannot be reused (since it would
+ # attempt to bypass the proxy)
+ conn = None
return conn or self._new_conn()
"""
try:
self.pool.put(conn, block=False)
- return # Everything is dandy, done.
+ return # Everything is dandy, done.
except AttributeError:
# self.pool is None.
pass
except Full:
# This should never happen if self.block == True
- log.warning("HttpConnectionPool is full, discarding connection: %s"
- % self.host)
+ log.warning(
+ "Connection pool is full, discarding connection: %s" %
+ self.host)
# Connection never got put back into the pool, close it.
if conn:
conn.close()
+ def _validate_conn(self, conn):
+ """
+ Called right before a request is made, after the socket is created.
+ """
+ pass
+
def _get_timeout(self, timeout):
""" Helper that always returns a :class:`urllib3.util.Timeout` """
if timeout is _Default:
def _make_request(self, conn, method, url, timeout=_Default,
**httplib_request_kw):
"""
- Perform a request on a given httplib connection object taken from our
+ Perform a request on a given urllib connection object taken from our
pool.
:param conn:
self.num_requests += 1
timeout_obj = self._get_timeout(timeout)
+ timeout_obj.start_connect()
+ conn.timeout = timeout_obj.connect_timeout
- try:
- timeout_obj.start_connect()
- conn.timeout = timeout_obj.connect_timeout
- # conn.request() calls httplib.*.request, not the method in
- # urllib3.request. It also calls makefile (recv) on the socket.
- conn.request(method, url, **httplib_request_kw)
- except SocketTimeout:
- raise ConnectTimeoutError(
- self, "Connection to %s timed out. (connect timeout=%s)" %
- (self.host, timeout_obj.connect_timeout))
+ # Trigger any extra validation we need to do.
+ self._validate_conn(conn)
+
+ # conn.request() calls httplib.*.request, not the method in
+ # urllib3.request. It also calls makefile (recv) on the socket.
+ conn.request(method, url, **httplib_request_kw)
# Reset the timeout for the recv() on the socket
read_timeout = timeout_obj.read_timeout
# App Engine doesn't have a sock attr
- if hasattr(conn, 'sock'):
+ if getattr(conn, 'sock', None):
# In Python 3 socket.py will catch EAGAIN and return None when you
# try and read into the file pointer created by http.client, which
# instead raises a BadStatusLine exception. Instead of catching
# timeouts, check for a zero timeout before making the request.
if read_timeout == 0:
raise ReadTimeoutError(
- self, url,
- "Read timed out. (read timeout=%s)" % read_timeout)
+ self, url, "Read timed out. (read timeout=%s)" % read_timeout)
if read_timeout is Timeout.DEFAULT_TIMEOUT:
conn.sock.settimeout(socket.getdefaulttimeout())
- else: # None or a value
+ else: # None or a value
conn.sock.settimeout(read_timeout)
# Receive the response from the server
try:
- try: # Python 2.7+, use buffering of HTTP responses
+ try: # Python 2.7+, use buffering of HTTP responses
httplib_response = conn.getresponse(buffering=True)
- except TypeError: # Python 2.6 and older
+ except TypeError: # Python 2.6 and older
httplib_response = conn.getresponse()
except SocketTimeout:
raise ReadTimeoutError(
# http://bugs.python.org/issue10272
if 'timed out' in str(e) or \
'did not complete (read)' in str(e): # Python 2.6
- raise ReadTimeoutError(self, url, "Read timed out.")
+ raise ReadTimeoutError(
+ self, url, "Read timed out. (read timeout=%s)" % read_timeout)
raise
- except SocketError as e: # Platform-specific: Python 2
+ except SocketError as e: # Platform-specific: Python 2
# See the above comment about EAGAIN in Python 3. In Python 2 we
# have to specifically catch it and throw the timeout error
if e.errno in _blocking_errnos:
raise ReadTimeoutError(
- self, url,
- "Read timed out. (read timeout=%s)" % read_timeout)
+ self, url, "Read timed out. (read timeout=%s)" % read_timeout)
raise
conn.close()
except Empty:
- pass # Done.
+ pass # Done.
def is_same_host(self, url):
"""
# TODO: Add optional support for socket.gethostbyname checking.
scheme, host, port = get_host(url)
+ # Use explicit default port for comparison when none is given
if self.port and not port:
- # Use explicit default port for comparison when none is given.
port = port_by_scheme.get(scheme)
+ elif not self.port and port == port_by_scheme.get(scheme):
+ port = None
return (scheme, host, port) == (self.scheme, self.host, self.port)
- def urlopen(self, method, url, body=None, headers=None, retries=3,
+ def urlopen(self, method, url, body=None, headers=None, retries=None,
redirect=True, assert_same_host=True, timeout=_Default,
pool_timeout=None, release_conn=None, **response_kw):
"""
these headers completely replace any pool-specific headers.
:param retries:
- Number of retries to allow before raising a MaxRetryError exception.
+ Configure the number of retries to allow before raising a
+ :class:`~urllib3.exceptions.MaxRetryError` exception.
+
+ Pass ``None`` to retry until you receive a response. Pass a
+ :class:`~urllib3.util.retry.Retry` object for fine-grained control
+ over different types of retries.
+ Pass an integer number to retry connection errors that many times,
+ but no other types of errors. Pass zero to never retry.
+
+ If ``False``, then retries are disabled and any exception is raised
+ immediately. Also, instead of raising a MaxRetryError on redirects,
+ the redirect response will be returned.
+
+ :type retries: :class:`~urllib3.util.retry.Retry`, False, or an int.
:param redirect:
If True, automatically handle redirects (status codes 301, 302,
- 303, 307, 308). Each redirect counts as a retry.
+ 303, 307, 308). Each redirect counts as a retry. Disabling retries
+ will disable redirect, too.
:param assert_same_host:
If ``True``, will make sure that the host of the pool requests is
if headers is None:
headers = self.headers
- if retries < 0:
- raise MaxRetryError(self, url)
+ if not isinstance(retries, Retry):
+ retries = Retry.from_int(retries, redirect=redirect, default=self.retries)
if release_conn is None:
release_conn = response_kw.get('preload_content', True)
# Check host
if assert_same_host and not self.is_same_host(url):
- raise HostChangedError(self, url, retries - 1)
+ raise HostChangedError(self, url, retries)
conn = None
+ # Merge the proxy headers. Only do this in HTTP. We have to copy the
+ # headers dict so we can safely change it without those changes being
+ # reflected in anyone else's copy.
+ if self.scheme == 'http':
+ headers = headers.copy()
+ headers.update(self.proxy_headers)
+
+ # Must keep the exception bound to a separate variable or else Python 3
+ # complains about UnboundLocalError.
+ err = None
+
try:
- # Request a connection from the queue
+ # Request a connection from the queue.
conn = self._get_conn(timeout=pool_timeout)
- # Make the request on the httplib connection object
+ # Make the request on the httplib connection object.
httplib_response = self._make_request(conn, method, url,
timeout=timeout,
body=body, headers=headers)
# ``response.read()``)
except Empty:
- # Timed out by queue
+ # Timed out by queue.
raise EmptyPoolError(self, "No pool connections are available.")
- except BaseSSLError as e:
+ except (BaseSSLError, CertificateError) as e:
+ # Release connection unconditionally because there is no way to
+ # close it externally in case of exception.
+ release_conn = True
raise SSLError(e)
- except CertificateError as e:
- # Name mismatch
- raise SSLError(e)
-
- except TimeoutError as e:
- # Connection broken, discard.
- conn = None
- # Save the error off for retry logic.
- err = e
+ except (TimeoutError, HTTPException, SocketError) as e:
+ if conn:
+ # Discard the connection for these exceptions. It will be
+ # be replaced during the next _get_conn() call.
+ conn.close()
+ conn = None
- if retries == 0:
- raise
+ stacktrace = sys.exc_info()[2]
+ if isinstance(e, SocketError) and self.proxy:
+ e = ProxyError('Cannot connect to proxy.', e)
+ elif isinstance(e, (SocketError, HTTPException)):
+ e = ProtocolError('Connection aborted.', e)
- except (HTTPException, SocketError) as e:
- if isinstance(e, SocketError) and self.proxy is not None:
- raise ProxyError('Cannot connect to proxy. '
- 'Socket error: %s.' % e)
+ retries = retries.increment(method, url, error=e,
+ _pool=self, _stacktrace=stacktrace)
+ retries.sleep()
- # Connection broken, discard. It will be replaced next _get_conn().
- conn = None
- # This is necessary so we can access e below
+ # Keep track of the error for the retry warning.
err = e
- if retries == 0:
- raise MaxRetryError(self, url, e)
-
finally:
if release_conn:
# Put the connection back to be reused. If the connection is
if not conn:
# Try again
- log.warn("Retrying (%d attempts remain) after connection "
- "broken by '%r': %s" % (retries, err, url))
- return self.urlopen(method, url, body, headers, retries - 1,
+ log.warning("Retrying (%r) after connection "
+ "broken by '%r': %s" % (retries, err, url))
+ return self.urlopen(method, url, body, headers, retries,
redirect, assert_same_host,
timeout=timeout, pool_timeout=pool_timeout,
release_conn=release_conn, **response_kw)
if redirect_location:
if response.status == 303:
method = 'GET'
+
+ try:
+ retries = retries.increment(method, url, response=response, _pool=self)
+ except MaxRetryError:
+ if retries.raise_on_redirect:
+ raise
+ return response
+
log.info("Redirecting %s -> %s" % (url, redirect_location))
return self.urlopen(method, redirect_location, body, headers,
- retries - 1, redirect, assert_same_host,
- timeout=timeout, pool_timeout=pool_timeout,
- release_conn=release_conn, **response_kw)
+ retries=retries, redirect=redirect,
+ assert_same_host=assert_same_host,
+ timeout=timeout, pool_timeout=pool_timeout,
+ release_conn=release_conn, **response_kw)
+
+ # Check if we should retry the HTTP response.
+ if retries.is_forced_retry(method, status_code=response.status):
+ retries = retries.increment(method, url, response=response, _pool=self)
+ retries.sleep()
+ log.info("Forced retry: %s" % url)
+ return self.urlopen(method, url, body, headers,
+ retries=retries, redirect=redirect,
+ assert_same_host=assert_same_host,
+ timeout=timeout, pool_timeout=pool_timeout,
+ release_conn=release_conn, **response_kw)
return response
When Python is compiled with the :mod:`ssl` module, then
:class:`.VerifiedHTTPSConnection` is used, which *can* verify certificates,
- instead of :class:`httplib.HTTPSConnection`.
+ instead of :class:`.HTTPSConnection`.
:class:`.VerifiedHTTPSConnection` uses one of ``assert_fingerprint``,
``assert_hostname`` and ``host`` in this order to verify connections.
ConnectionCls = HTTPSConnection
def __init__(self, host, port=None,
- strict=False, timeout=None, maxsize=1,
- block=False, headers=None,
+ strict=False, timeout=Timeout.DEFAULT_TIMEOUT, maxsize=1,
+ block=False, headers=None, retries=None,
_proxy=None, _proxy_headers=None,
key_file=None, cert_file=None, cert_reqs=None,
ca_certs=None, ssl_version=None,
- assert_hostname=None, assert_fingerprint=None):
+ assert_hostname=None, assert_fingerprint=None,
+ **conn_kw):
HTTPConnectionPool.__init__(self, host, port, strict, timeout, maxsize,
- block, headers, _proxy, _proxy_headers)
+ block, headers, retries, _proxy, _proxy_headers,
+ **conn_kw)
self.key_file = key_file
self.cert_file = cert_file
self.cert_reqs = cert_reqs
set_tunnel = conn.set_tunnel
except AttributeError: # Platform-specific: Python 2.6
set_tunnel = conn._set_tunnel
- set_tunnel(self.host, self.port, self.proxy_headers)
+
+ if sys.version_info <= (2, 6, 4) and not self.proxy_headers: # Python 2.6.4 and older
+ set_tunnel(self.host, self.port)
+ else:
+ set_tunnel(self.host, self.port, self.proxy_headers)
+
# Establish tunnel connection early, because otherwise httplib
# would improperly set Host: header to proxy's IP:port.
conn.connect()
actual_host = self.proxy.host
actual_port = self.proxy.port
- extra_params = {}
- if not six.PY3: # Python 2
- extra_params['strict'] = self.strict
-
conn = self.ConnectionCls(host=actual_host, port=actual_port,
timeout=self.timeout.connect_timeout,
- **extra_params)
+ strict=self.strict, **self.conn_kw)
return self._prepare_conn(conn)
+ def _validate_conn(self, conn):
+ """
+ Called right before a request is made, after the socket is created.
+ """
+ super(HTTPSConnectionPool, self)._validate_conn(conn)
+
+ # Force connect early to allow us to validate the connection.
+ if not getattr(conn, 'sock', None): # AppEngine might not have `.sock`
+ conn.connect()
+
+ if not conn.is_verified:
+ warnings.warn((
+ 'Unverified HTTPS request is being made. '
+ 'Adding certificate verification is strongly advised. See: '
+ 'https://urllib3.readthedocs.org/en/latest/security.html '
+ '(This warning will only appear once by default.)'),
+ InsecureRequestWarning)
+
def connection_from_url(url, **kw):
"""
:class:`.ConnectionPool`. Useful for specifying things like
timeout, maxsize, headers, etc.
- Example: ::
+ Example::
>>> conn = connection_from_url('http://google.com/')
>>> r = conn.request('GET', '/')
-# urllib3/contrib/ntlmpool.py
-# Copyright 2008-2013 Andrey Petrov and contributors (see CONTRIBUTORS.txt)
-#
-# This module is part of urllib3 and is released under
-# the MIT License: http://www.opensource.org/licenses/mit-license.php
-
"""
NTLM authenticating pool, contributed by erikcederstran
except ImportError:
from httplib import HTTPSConnection
from logging import getLogger
+from ntlm import ntlm
-from ntlmpool import ntlm
-
-from requests.packages.urllib3 import HTTPSConnectionPool
+from urllib3 import HTTPSConnectionPool
log = getLogger(__name__)
-'''SSL with SNI-support for Python 2.
+'''SSL with SNI_-support for Python 2. Follow these instructions if you would
+like to verify SSL certificates in Python 2. Note, the default libraries do
+*not* do certificate checking; you need to do additional work to validate
+certificates yourself.
This needs the following packages installed:
* ndg-httpsclient (tested with 0.3.2)
* pyasn1 (tested with 0.1.6)
-To activate it call :func:`~urllib3.contrib.pyopenssl.inject_into_urllib3`.
-This can be done in a ``sitecustomize`` module, or at any other time before
-your application begins using ``urllib3``, like this::
+You can install them with the following command:
+
+ pip install pyopenssl ndg-httpsclient pyasn1
+
+To activate certificate checking, call
+:func:`~urllib3.contrib.pyopenssl.inject_into_urllib3` from your Python code
+before you begin making HTTP requests. This can be done in a ``sitecustomize``
+module, or at any other time before your application begins using ``urllib3``,
+like this::
try:
import urllib3.contrib.pyopenssl
Now you can use :mod:`urllib3` as you normally would, and it will support SNI
when the required modules are installed.
+
+Activating this module also has the positive side effect of disabling SSL/TLS
+encryption in Python 2 (see `CRIME attack`_).
+
+If you want to configure the default list of supported cipher suites, you can
+set the ``urllib3.contrib.pyopenssl.DEFAULT_SSL_CIPHER_LIST`` variable.
+
+Module Variables
+----------------
+
+:var DEFAULT_SSL_CIPHER_LIST: The list of supported SSL/TLS cipher suites.
+ Default: ``ECDH+AESGCM:DH+AESGCM:ECDH+AES256:DH+AES256:ECDH+AES128:DH+AES:
+ ECDH+3DES:DH+3DES:RSA+AESGCM:RSA+AES:RSA+3DES:!aNULL:!MD5:!DSS``
+
+.. _sni: https://en.wikipedia.org/wiki/Server_Name_Indication
+.. _crime attack: https://en.wikipedia.org/wiki/CRIME_(security_exploit)
+
'''
-from ndg.httpsclient.ssl_peer_verification import SUBJ_ALT_NAME_SUPPORT
-from ndg.httpsclient.subj_alt_name import SubjectAltName
+try:
+ from ndg.httpsclient.ssl_peer_verification import SUBJ_ALT_NAME_SUPPORT
+ from ndg.httpsclient.subj_alt_name import SubjectAltName as BaseSubjectAltName
+except SyntaxError as e:
+ raise ImportError(e)
+
import OpenSSL.SSL
from pyasn1.codec.der import decoder as der_decoder
-from socket import _fileobject
+from pyasn1.type import univ, constraint
+from socket import _fileobject, timeout
import ssl
import select
-from cStringIO import StringIO
-from .. import connectionpool
+from .. import connection
from .. import util
__all__ = ['inject_into_urllib3', 'extract_from_urllib3']
+ OpenSSL.SSL.VERIFY_FAIL_IF_NO_PEER_CERT,
}
+# A secure default.
+# Sources for more information on TLS ciphers:
+#
+# - https://wiki.mozilla.org/Security/Server_Side_TLS
+# - https://www.ssllabs.com/projects/best-practices/index.html
+# - https://hynek.me/articles/hardening-your-web-servers-ssl-ciphers/
+#
+# The general intent is:
+# - Prefer cipher suites that offer perfect forward secrecy (DHE/ECDHE),
+# - prefer ECDHE over DHE for better performance,
+# - prefer any AES-GCM over any AES-CBC for better performance and security,
+# - use 3DES as fallback which is secure but slow,
+# - disable NULL authentication, MD5 MACs and DSS for security reasons.
+DEFAULT_SSL_CIPHER_LIST = "ECDH+AESGCM:DH+AESGCM:ECDH+AES256:DH+AES256:" + \
+ "ECDH+AES128:DH+AES:ECDH+3DES:DH+3DES:RSA+AESGCM:RSA+AES:RSA+3DES:" + \
+ "!aNULL:!MD5:!DSS"
+
orig_util_HAS_SNI = util.HAS_SNI
-orig_connectionpool_ssl_wrap_socket = connectionpool.ssl_wrap_socket
+orig_connection_ssl_wrap_socket = connection.ssl_wrap_socket
def inject_into_urllib3():
'Monkey-patch urllib3 with PyOpenSSL-backed SSL-support.'
- connectionpool.ssl_wrap_socket = ssl_wrap_socket
+ connection.ssl_wrap_socket = ssl_wrap_socket
util.HAS_SNI = HAS_SNI
def extract_from_urllib3():
'Undo monkey-patching by :func:`inject_into_urllib3`.'
- connectionpool.ssl_wrap_socket = orig_connectionpool_ssl_wrap_socket
+ connection.ssl_wrap_socket = orig_connection_ssl_wrap_socket
util.HAS_SNI = orig_util_HAS_SNI
### Note: This is a slightly bug-fixed version of same from ndg-httpsclient.
+class SubjectAltName(BaseSubjectAltName):
+ '''ASN.1 implementation for subjectAltNames support'''
+
+ # There is no limit to how many SAN certificates a certificate may have,
+ # however this needs to have some limit so we'll set an arbitrarily high
+ # limit.
+ sizeSpec = univ.SequenceOf.sizeSpec + \
+ constraint.ValueSizeConstraint(1, 1024)
+
+
+### Note: This is a slightly bug-fixed version of same from ndg-httpsclient.
def get_subj_alt_name(peer_cert):
# Search through extensions
dns_name = []
return dns_name
-class fileobject(_fileobject):
-
- def read(self, size=-1):
- # Use max, disallow tiny reads in a loop as they are very inefficient.
- # We never leave read() with any leftover data from a new recv() call
- # in our internal buffer.
- rbufsize = max(self._rbufsize, self.default_bufsize)
- # Our use of StringIO rather than lists of string objects returned by
- # recv() minimizes memory usage and fragmentation that occurs when
- # rbufsize is large compared to the typical return value of recv().
- buf = self._rbuf
- buf.seek(0, 2) # seek end
- if size < 0:
- # Read until EOF
- self._rbuf = StringIO() # reset _rbuf. we consume it via buf.
- while True:
- try:
- data = self._sock.recv(rbufsize)
- except OpenSSL.SSL.WantReadError:
- continue
- if not data:
- break
- buf.write(data)
- return buf.getvalue()
- else:
- # Read until size bytes or EOF seen, whichever comes first
- buf_len = buf.tell()
- if buf_len >= size:
- # Already have size bytes in our buffer? Extract and return.
- buf.seek(0)
- rv = buf.read(size)
- self._rbuf = StringIO()
- self._rbuf.write(buf.read())
- return rv
-
- self._rbuf = StringIO() # reset _rbuf. we consume it via buf.
- while True:
- left = size - buf_len
- # recv() will malloc the amount of memory given as its
- # parameter even though it often returns much less data
- # than that. The returned data string is short lived
- # as we copy it into a StringIO and free it. This avoids
- # fragmentation issues on many platforms.
- try:
- data = self._sock.recv(left)
- except OpenSSL.SSL.WantReadError:
- continue
- if not data:
- break
- n = len(data)
- if n == size and not buf_len:
- # Shortcut. Avoid buffer data copies when:
- # - We have no data in our buffer.
- # AND
- # - Our call to recv returned exactly the
- # number of bytes we were asked to read.
- return data
- if n == left:
- buf.write(data)
- del data # explicit free
- break
- assert n <= left, "recv(%d) returned %d bytes" % (left, n)
- buf.write(data)
- buf_len += n
- del data # explicit free
- #assert buf_len == buf.tell()
- return buf.getvalue()
-
- def readline(self, size=-1):
- buf = self._rbuf
- buf.seek(0, 2) # seek end
- if buf.tell() > 0:
- # check if we already have it in our buffer
- buf.seek(0)
- bline = buf.readline(size)
- if bline.endswith('\n') or len(bline) == size:
- self._rbuf = StringIO()
- self._rbuf.write(buf.read())
- return bline
- del bline
- if size < 0:
- # Read until \n or EOF, whichever comes first
- if self._rbufsize <= 1:
- # Speed up unbuffered case
- buf.seek(0)
- buffers = [buf.read()]
- self._rbuf = StringIO() # reset _rbuf. we consume it via buf.
- data = None
- recv = self._sock.recv
- while True:
- try:
- while data != "\n":
- data = recv(1)
- if not data:
- break
- buffers.append(data)
- except OpenSSL.SSL.WantReadError:
- continue
- break
- return "".join(buffers)
-
- buf.seek(0, 2) # seek end
- self._rbuf = StringIO() # reset _rbuf. we consume it via buf.
- while True:
- try:
- data = self._sock.recv(self._rbufsize)
- except OpenSSL.SSL.WantReadError:
- continue
- if not data:
- break
- nl = data.find('\n')
- if nl >= 0:
- nl += 1
- buf.write(data[:nl])
- self._rbuf.write(data[nl:])
- del data
- break
- buf.write(data)
- return buf.getvalue()
- else:
- # Read until size bytes or \n or EOF seen, whichever comes first
- buf.seek(0, 2) # seek end
- buf_len = buf.tell()
- if buf_len >= size:
- buf.seek(0)
- rv = buf.read(size)
- self._rbuf = StringIO()
- self._rbuf.write(buf.read())
- return rv
- self._rbuf = StringIO() # reset _rbuf. we consume it via buf.
- while True:
- try:
- data = self._sock.recv(self._rbufsize)
- except OpenSSL.SSL.WantReadError:
- continue
- if not data:
- break
- left = size - buf_len
- # did we just receive a newline?
- nl = data.find('\n', 0, left)
- if nl >= 0:
- nl += 1
- # save the excess data to _rbuf
- self._rbuf.write(data[nl:])
- if buf_len:
- buf.write(data[:nl])
- break
- else:
- # Shortcut. Avoid data copy through buf when returning
- # a substring of our first recv().
- return data[:nl]
- n = len(data)
- if n == size and not buf_len:
- # Shortcut. Avoid data copy through buf when
- # returning exactly all of our first recv().
- return data
- if n >= left:
- buf.write(data[:left])
- self._rbuf.write(data[left:])
- break
- buf.write(data)
- buf_len += n
- #assert buf_len == buf.tell()
- return buf.getvalue()
-
-
class WrappedSocket(object):
- '''API-compatibility wrapper for Python OpenSSL's Connection-class.'''
+ '''API-compatibility wrapper for Python OpenSSL's Connection-class.
+
+ Note: _makefile_refs, _drop() and _reuse() are needed for the garbage
+ collector of pypy.
+ '''
- def __init__(self, connection, socket):
+ def __init__(self, connection, socket, suppress_ragged_eofs=True):
self.connection = connection
self.socket = socket
+ self.suppress_ragged_eofs = suppress_ragged_eofs
+ self._makefile_refs = 0
def fileno(self):
return self.socket.fileno()
def makefile(self, mode, bufsize=-1):
- return fileobject(self.connection, mode, bufsize)
+ self._makefile_refs += 1
+ return _fileobject(self, mode, bufsize, close=True)
+
+ def recv(self, *args, **kwargs):
+ try:
+ data = self.connection.recv(*args, **kwargs)
+ except OpenSSL.SSL.SysCallError as e:
+ if self.suppress_ragged_eofs and e.args == (-1, 'Unexpected EOF'):
+ return b''
+ else:
+ raise
+ except OpenSSL.SSL.WantReadError:
+ rd, wd, ed = select.select(
+ [self.socket], [], [], self.socket.gettimeout())
+ if not rd:
+ raise timeout('The read operation timed out')
+ else:
+ return self.recv(*args, **kwargs)
+ else:
+ return data
def settimeout(self, timeout):
return self.socket.settimeout(timeout)
return self.connection.sendall(data)
def close(self):
- return self.connection.shutdown()
+ if self._makefile_refs < 1:
+ return self.connection.shutdown()
+ else:
+ self._makefile_refs -= 1
def getpeercert(self, binary_form=False):
x509 = self.connection.get_peer_certificate()
]
}
+ def _reuse(self):
+ self._makefile_refs += 1
+
+ def _drop(self):
+ if self._makefile_refs < 1:
+ self.close()
+ else:
+ self._makefile_refs -= 1
+
def _verify_callback(cnx, x509, err_no, err_depth, return_code):
return err_no == 0
ctx.load_verify_locations(ca_certs, None)
except OpenSSL.SSL.Error as e:
raise ssl.SSLError('bad ca_certs: %r' % ca_certs, e)
+ else:
+ ctx.set_default_verify_paths()
+
+ # Disable TLS compression to migitate CRIME attack (issue #309)
+ OP_NO_COMPRESSION = 0x20000
+ ctx.set_options(OP_NO_COMPRESSION)
+
+ # Set list of supported ciphersuites.
+ ctx.set_cipher_list(DEFAULT_SSL_CIPHER_LIST)
cnx = OpenSSL.SSL.Connection(ctx, sock)
cnx.set_tlsext_host_name(server_hostname)
-# urllib3/exceptions.py
-# Copyright 2008-2013 Andrey Petrov and contributors (see CONTRIBUTORS.txt)
-#
-# This module is part of urllib3 and is released under
-# the MIT License: http://www.opensource.org/licenses/mit-license.php
-
## Base Exceptions
"Base exception used by this module."
pass
+class HTTPWarning(Warning):
+ "Base warning used by this module."
+ pass
+
+
class PoolError(HTTPError):
"Base exception for errors caused within a pool."
pass
+class ProtocolError(HTTPError):
+ "Raised when something unexpected happens mid-request/response."
+ pass
+
+
+#: Renamed to ProtocolError but aliased for backwards compatibility.
+ConnectionError = ProtocolError
+
+
## Leaf Exceptions
class MaxRetryError(RequestError):
- "Raised when the maximum number of retries is exceeded."
+ """Raised when the maximum number of retries is exceeded.
+
+ :param pool: The connection pool
+ :type pool: :class:`~urllib3.connectionpool.HTTPConnectionPool`
+ :param string url: The requested Url
+ :param exceptions.Exception reason: The underlying error
+
+ """
def __init__(self, pool, url, reason=None):
self.reason = reason
message = "Max retries exceeded with url: %s" % url
if reason:
- message += " (Caused by %s: %s)" % (type(reason), reason)
+ message += " (Caused by %r)" % reason
else:
message += " (Caused by redirect)"
pass
-class LocationParseError(ValueError, HTTPError):
+class LocationValueError(ValueError, HTTPError):
+ "Raised when there is something wrong with a given URL input."
+ pass
+
+
+class LocationParseError(LocationValueError):
"Raised when get_host or similar fails to parse the URL input."
def __init__(self, location):
HTTPError.__init__(self, message)
self.location = location
+
+
+class SecurityWarning(HTTPWarning):
+ "Warned when perfoming security reducing actions"
+ pass
+
+
+class InsecureRequestWarning(SecurityWarning):
+ "Warned when making an unverified HTTPS request."
+ pass
+
+
+class SystemTimeWarning(SecurityWarning):
+ "Warned when system time is suspected to be wrong"
+ pass
-# urllib3/fields.py
-# Copyright 2008-2013 Andrey Petrov and contributors (see CONTRIBUTORS.txt)
-#
-# This module is part of urllib3 and is released under
-# the MIT License: http://www.opensource.org/licenses/mit-license.php
-
import email.utils
import mimetypes
Guess the "Content-Type" of a file.
:param filename:
- The filename to guess the "Content-Type" of using :mod:`mimetimes`.
+ The filename to guess the "Content-Type" of using :mod:`mimetypes`.
:param default:
If no "Content-Type" can be guessed, default to `default`.
"""
"""
A :class:`~urllib3.fields.RequestField` factory from old-style tuple parameters.
- Supports constructing :class:`~urllib3.fields.RequestField` from parameter
- of key/value strings AND key/filetuple. A filetuple is a (filename, data, MIME type)
- tuple where the MIME type is optional. For example: ::
+ Supports constructing :class:`~urllib3.fields.RequestField` from
+ parameter of key/value strings AND key/filetuple. A filetuple is a
+ (filename, data, MIME type) tuple where the MIME type is optional.
+ For example::
'foo': 'bar',
'fakefile': ('foofile.txt', 'contents of foofile'),
'Content-Disposition' fields.
:param header_parts:
- A sequence of (k, v) typles or a :class:`dict` of (k, v) to format as
- `k1="v1"; k2="v2"; ...`.
+ A sequence of (k, v) typles or a :class:`dict` of (k, v) to format
+ as `k1="v1"; k2="v2"; ...`.
"""
parts = []
iterable = header_parts
lines.append('\r\n')
return '\r\n'.join(lines)
- def make_multipart(self, content_disposition=None, content_type=None, content_location=None):
+ def make_multipart(self, content_disposition=None, content_type=None,
+ content_location=None):
"""
Makes this request field into a multipart request field.
"""
self.headers['Content-Disposition'] = content_disposition or 'form-data'
- self.headers['Content-Disposition'] += '; '.join(['', self._render_parts((('name', self._name), ('filename', self._filename)))])
+ self.headers['Content-Disposition'] += '; '.join([
+ '', self._render_parts(
+ (('name', self._name), ('filename', self._filename))
+ )
+ ])
self.headers['Content-Type'] = content_type
self.headers['Content-Location'] = content_location
-# urllib3/filepost.py
-# Copyright 2008-2013 Andrey Petrov and contributors (see CONTRIBUTORS.txt)
-#
-# This module is part of urllib3 and is released under
-# the MIT License: http://www.opensource.org/licenses/mit-license.php
-
import codecs
-import mimetypes
from uuid import uuid4
from io import BytesIO
i = iter(fields)
for field in i:
- if isinstance(field, RequestField):
- yield field
- else:
- yield RequestField.from_tuples(*field)
+ if isinstance(field, RequestField):
+ yield field
+ else:
+ yield RequestField.from_tuples(*field)
def iter_fields(fields):
"""
- Iterate over fields.
+ .. deprecated:: 1.6
- .. deprecated ::
+ Iterate over fields.
- The addition of `~urllib3.fields.RequestField` makes this function
- obsolete. Instead, use :func:`iter_field_objects`, which returns
- `~urllib3.fields.RequestField` objects, instead.
+ The addition of :class:`~urllib3.fields.RequestField` makes this function
+ obsolete. Instead, use :func:`iter_field_objects`, which returns
+ :class:`~urllib3.fields.RequestField` objects.
Supports list of (k, v) tuples and dicts.
-
"""
if isinstance(fields, dict):
return ((k, v) for k, v in six.iteritems(fields))
# Passes Python2.7's test suite and incorporates all the latest updates.
# Copyright 2009 Raymond Hettinger, released under the MIT License.
# http://code.activestate.com/recipes/576693/
-
try:
from thread import get_ident as _get_ident
except ImportError:
for k in self:
yield (k, self[k])
- def update(self,*args, **kwds):
+ def update(*args, **kwds):
'''od.update(E, **F) -> None. Update od from dict/iterable E and F.
If E is a dict instance, does: for k in E: od[k] = E[k]
if PY3:
- import builtins #pylint: disable=import-error
+ import builtins
exec_ = getattr(builtins, "exec")
-"""The match_hostname() function from Python 3.2, essential when using SSL."""
-
-import re
-
-__version__ = '3.2.2'
-
-class CertificateError(ValueError):
- pass
-
-def _dnsname_match(dn, hostname, max_wildcards=1):
- """Matching according to RFC 6125, section 6.4.3
-
- http://tools.ietf.org/html/rfc6125#section-6.4.3
- """
- pats = []
- if not dn:
- return False
-
- parts = dn.split(r'.')
- leftmost = parts[0]
-
- wildcards = leftmost.count('*')
- if wildcards > max_wildcards:
- # Issue #17980: avoid denials of service by refusing more
- # than one wildcard per fragment. A survery of established
- # policy among SSL implementations showed it to be a
- # reasonable choice.
- raise CertificateError(
- "too many wildcards in certificate DNS name: " + repr(dn))
-
- # speed up common case w/o wildcards
- if not wildcards:
- return dn.lower() == hostname.lower()
-
- # RFC 6125, section 6.4.3, subitem 1.
- # The client SHOULD NOT attempt to match a presented identifier in which
- # the wildcard character comprises a label other than the left-most label.
- if leftmost == '*':
- # When '*' is a fragment by itself, it matches a non-empty dotless
- # fragment.
- pats.append('[^.]+')
- elif leftmost.startswith('xn--') or hostname.startswith('xn--'):
- # RFC 6125, section 6.4.3, subitem 3.
- # The client SHOULD NOT attempt to match a presented identifier
- # where the wildcard character is embedded within an A-label or
- # U-label of an internationalized domain name.
- pats.append(re.escape(leftmost))
- else:
- # Otherwise, '*' matches any dotless string, e.g. www*
- pats.append(re.escape(leftmost).replace(r'\*', '[^.]*'))
-
- # add the remaining fragments, ignore any wildcards
- for frag in parts[1:]:
- pats.append(re.escape(frag))
-
- pat = re.compile(r'\A' + r'\.'.join(pats) + r'\Z', re.IGNORECASE)
- return pat.match(hostname)
-
-
-def match_hostname(cert, hostname):
- """Verify that *cert* (in decoded format as returned by
- SSLSocket.getpeercert()) matches the *hostname*. RFC 2818 and RFC 6125
- rules are followed, but IP addresses are not accepted for *hostname*.
-
- CertificateError is raised on failure. On success, the function
- returns nothing.
- """
- if not cert:
- raise ValueError("empty or no certificate")
- dnsnames = []
- san = cert.get('subjectAltName', ())
- for key, value in san:
- if key == 'DNS':
- if _dnsname_match(value, hostname):
- return
- dnsnames.append(value)
- if not dnsnames:
- # The subject is only checked when there is no dNSName entry
- # in subjectAltName
- for sub in cert.get('subject', ()):
- for key, value in sub:
- # XXX according to RFC 2818, the most specific Common Name
- # must be used.
- if key == 'commonName':
- if _dnsname_match(value, hostname):
- return
- dnsnames.append(value)
- if len(dnsnames) > 1:
- raise CertificateError("hostname %r "
- "doesn't match either of %s"
- % (hostname, ', '.join(map(repr, dnsnames))))
- elif len(dnsnames) == 1:
- raise CertificateError("hostname %r "
- "doesn't match %r"
- % (hostname, dnsnames[0]))
- else:
- raise CertificateError("no appropriate commonName or "
- "subjectAltName fields were found")
+try:
+ # Python 3.2+
+ from ssl import CertificateError, match_hostname
+except ImportError:
+ try:
+ # Backport of the function from a pypi module
+ from backports.ssl_match_hostname import CertificateError, match_hostname
+ except ImportError:
+ # Our vendored copy
+ from ._implementation import CertificateError, match_hostname
+
+# Not needed, but documenting what we provide.
+__all__ = ('CertificateError', 'match_hostname')
--- /dev/null
+"""The match_hostname() function from Python 3.3.3, essential when using SSL."""
+
+# Note: This file is under the PSF license as the code comes from the python
+# stdlib. http://docs.python.org/3/license.html
+
+import re
+
+__version__ = '3.4.0.2'
+
+class CertificateError(ValueError):
+ pass
+
+
+def _dnsname_match(dn, hostname, max_wildcards=1):
+ """Matching according to RFC 6125, section 6.4.3
+
+ http://tools.ietf.org/html/rfc6125#section-6.4.3
+ """
+ pats = []
+ if not dn:
+ return False
+
+ # Ported from python3-syntax:
+ # leftmost, *remainder = dn.split(r'.')
+ parts = dn.split(r'.')
+ leftmost = parts[0]
+ remainder = parts[1:]
+
+ wildcards = leftmost.count('*')
+ if wildcards > max_wildcards:
+ # Issue #17980: avoid denials of service by refusing more
+ # than one wildcard per fragment. A survey of established
+ # policy among SSL implementations showed it to be a
+ # reasonable choice.
+ raise CertificateError(
+ "too many wildcards in certificate DNS name: " + repr(dn))
+
+ # speed up common case w/o wildcards
+ if not wildcards:
+ return dn.lower() == hostname.lower()
+
+ # RFC 6125, section 6.4.3, subitem 1.
+ # The client SHOULD NOT attempt to match a presented identifier in which
+ # the wildcard character comprises a label other than the left-most label.
+ if leftmost == '*':
+ # When '*' is a fragment by itself, it matches a non-empty dotless
+ # fragment.
+ pats.append('[^.]+')
+ elif leftmost.startswith('xn--') or hostname.startswith('xn--'):
+ # RFC 6125, section 6.4.3, subitem 3.
+ # The client SHOULD NOT attempt to match a presented identifier
+ # where the wildcard character is embedded within an A-label or
+ # U-label of an internationalized domain name.
+ pats.append(re.escape(leftmost))
+ else:
+ # Otherwise, '*' matches any dotless string, e.g. www*
+ pats.append(re.escape(leftmost).replace(r'\*', '[^.]*'))
+
+ # add the remaining fragments, ignore any wildcards
+ for frag in remainder:
+ pats.append(re.escape(frag))
+
+ pat = re.compile(r'\A' + r'\.'.join(pats) + r'\Z', re.IGNORECASE)
+ return pat.match(hostname)
+
+
+def match_hostname(cert, hostname):
+ """Verify that *cert* (in decoded format as returned by
+ SSLSocket.getpeercert()) matches the *hostname*. RFC 2818 and RFC 6125
+ rules are followed, but IP addresses are not accepted for *hostname*.
+
+ CertificateError is raised on failure. On success, the function
+ returns nothing.
+ """
+ if not cert:
+ raise ValueError("empty or no certificate")
+ dnsnames = []
+ san = cert.get('subjectAltName', ())
+ for key, value in san:
+ if key == 'DNS':
+ if _dnsname_match(value, hostname):
+ return
+ dnsnames.append(value)
+ if not dnsnames:
+ # The subject is only checked when there is no dNSName entry
+ # in subjectAltName
+ for sub in cert.get('subject', ()):
+ for key, value in sub:
+ # XXX according to RFC 2818, the most specific Common Name
+ # must be used.
+ if key == 'commonName':
+ if _dnsname_match(value, hostname):
+ return
+ dnsnames.append(value)
+ if len(dnsnames) > 1:
+ raise CertificateError("hostname %r "
+ "doesn't match either of %s"
+ % (hostname, ', '.join(map(repr, dnsnames))))
+ elif len(dnsnames) == 1:
+ raise CertificateError("hostname %r "
+ "doesn't match %r"
+ % (hostname, dnsnames[0]))
+ else:
+ raise CertificateError("no appropriate commonName or "
+ "subjectAltName fields were found")
-# urllib3/poolmanager.py
-# Copyright 2008-2013 Andrey Petrov and contributors (see CONTRIBUTORS.txt)
-#
-# This module is part of urllib3 and is released under
-# the MIT License: http://www.opensource.org/licenses/mit-license.php
-
import logging
try: # Python 3
from ._collections import RecentlyUsedContainer
from .connectionpool import HTTPConnectionPool, HTTPSConnectionPool
from .connectionpool import port_by_scheme
+from .exceptions import LocationValueError
from .request import RequestMethods
-from .util import parse_url
+from .util.url import parse_url
+from .util.retry import Retry
__all__ = ['PoolManager', 'ProxyManager', 'proxy_from_url']
Additional parameters are used to create fresh
:class:`urllib3.connectionpool.ConnectionPool` instances.
- Example: ::
+ Example::
>>> manager = PoolManager(num_pools=2)
>>> r = manager.request('GET', 'http://google.com/')
``urllib3.connectionpool.port_by_scheme``.
"""
- scheme = scheme or 'http'
+ if not host:
+ raise LocationValueError("No host specified.")
+ scheme = scheme or 'http'
port = port or port_by_scheme.get(scheme, 80)
-
pool_key = (scheme, host, port)
with self.pools.lock:
# Make a fresh ConnectionPool of the desired type
pool = self._new_pool(scheme, host, port)
self.pools[pool_key] = pool
+
return pool
def connection_from_url(self, url):
# Support relative URLs for redirecting.
redirect_location = urljoin(url, redirect_location)
- # RFC 2616, Section 10.3.4
+ # RFC 7231, Section 6.4.4
if response.status == 303:
method = 'GET'
- log.info("Redirecting %s -> %s" % (url, redirect_location))
- kw['retries'] = kw.get('retries', 3) - 1 # Persist retries countdown
+ retries = kw.get('retries')
+ if not isinstance(retries, Retry):
+ retries = Retry.from_int(retries, redirect=redirect)
+
+ kw['retries'] = retries.increment(method, redirect_location)
kw['redirect'] = redirect
+
+ log.info("Redirecting %s -> %s" % (url, redirect_location))
return self.urlopen(method, redirect_location, **kw)
Behaves just like :class:`PoolManager`, but sends all requests through
the defined proxy, using the CONNECT method for HTTPS URLs.
- :param poxy_url:
+ :param proxy_url:
The URL of the proxy to be used.
:param proxy_headers:
if not proxy.port:
port = port_by_scheme.get(proxy.scheme, 80)
proxy = proxy._replace(port=port)
+
+ assert proxy.scheme in ("http", "https"), \
+ 'Not supported proxy scheme %s' % proxy.scheme
+
self.proxy = proxy
self.proxy_headers = proxy_headers or {}
- assert self.proxy.scheme in ("http", "https"), \
- 'Not supported proxy scheme %s' % self.proxy.scheme
+
connection_pool_kw['_proxy'] = self.proxy
connection_pool_kw['_proxy_headers'] = self.proxy_headers
+
super(ProxyManager, self).__init__(
num_pools, headers, **connection_pool_kw)
u = parse_url(url)
if u.scheme == "http":
- # It's too late to set proxy headers on per-request basis for
- # tunnelled HTTPS connections, should use
- # constructor's proxy_headers instead.
- kw['headers'] = self._set_proxy_headers(url, kw.get('headers',
- self.headers))
- kw['headers'].update(self.proxy_headers)
-
- return super(ProxyManager, self).urlopen(method, url, redirect, **kw)
+ # For proxied HTTPS requests, httplib sets the necessary headers
+ # on the CONNECT to the proxy. For HTTP, we'll definitely
+ # need to set 'Host' at the very least.
+ headers = kw.get('headers', self.headers)
+ kw['headers'] = self._set_proxy_headers(url, headers)
+
+ return super(ProxyManager, self).urlopen(method, url, redirect=redirect, **kw)
def proxy_from_url(url, **kw):
-# urllib3/request.py
-# Copyright 2008-2013 Andrey Petrov and contributors (see CONTRIBUTORS.txt)
-#
-# This module is part of urllib3 and is released under
-# the MIT License: http://www.opensource.org/licenses/mit-license.php
-
try:
from urllib.parse import urlencode
except ImportError:
Specifically,
- :meth:`.request_encode_url` is for sending requests whose fields are encoded
- in the URL (such as GET, HEAD, DELETE).
+ :meth:`.request_encode_url` is for sending requests whose fields are
+ encoded in the URL (such as GET, HEAD, DELETE).
:meth:`.request_encode_body` is for sending requests whose fields are
encoded in the *body* of the request using multipart or www-form-urlencoded
"""
_encode_url_methods = set(['DELETE', 'GET', 'HEAD', 'OPTIONS'])
- _encode_body_methods = set(['PATCH', 'POST', 'PUT', 'TRACE'])
def __init__(self, headers=None):
self.headers = headers or {}
def urlopen(self, method, url, body=None, headers=None,
encode_multipart=True, multipart_boundary=None,
- **kw): # Abstract
- raise NotImplementedError("Classes extending RequestMethods must implement "
+ **kw): # Abstract
+ raise NotImplemented("Classes extending RequestMethods must implement "
"their own ``urlopen`` method.")
def request(self, method, url, fields=None, headers=None, **urlopen_kw):
``fields`` based on the ``method`` used.
This is a convenience method that requires the least amount of manual
- effort. It can be used in most situations, while still having the option
- to drop down to more specific methods when necessary, such as
+ effort. It can be used in most situations, while still having the
+ option to drop down to more specific methods when necessary, such as
:meth:`request_encode_url`, :meth:`request_encode_body`,
or even the lowest level :meth:`urlopen`.
"""
if method in self._encode_url_methods:
return self.request_encode_url(method, url, fields=fields,
- headers=headers,
- **urlopen_kw)
+ headers=headers,
+ **urlopen_kw)
else:
return self.request_encode_body(method, url, fields=fields,
- headers=headers,
- **urlopen_kw)
+ headers=headers,
+ **urlopen_kw)
def request_encode_url(self, method, url, fields=None, **urlopen_kw):
"""
the body. This is useful for request methods like POST, PUT, PATCH, etc.
When ``encode_multipart=True`` (default), then
- :meth:`urllib3.filepost.encode_multipart_formdata` is used to encode the
- payload with the appropriate content type. Otherwise
+ :meth:`urllib3.filepost.encode_multipart_formdata` is used to encode
+ the payload with the appropriate content type. Otherwise
:meth:`urllib.urlencode` is used with the
'application/x-www-form-urlencoded' content type.
Multipart encoding must be used when posting files, and it's reasonably
- safe to use it in other times too. However, it may break request signing,
- such as with OAuth.
+ safe to use it in other times too. However, it may break request
+ signing, such as with OAuth.
Supports an optional ``fields`` parameter of key/value strings AND
key/filetuple. A filetuple is a (filename, data, MIME type) tuple where
- the MIME type is optional. For example: ::
+ the MIME type is optional. For example::
fields = {
'foo': 'bar',
When uploading a file, providing a filename (the first parameter of the
tuple) is optional but recommended to best mimick behavior of browsers.
- Note that if ``headers`` are supplied, the 'Content-Type' header will be
- overwritten because it depends on the dynamic random boundary string
+ Note that if ``headers`` are supplied, the 'Content-Type' header will
+ be overwritten because it depends on the dynamic random boundary string
which is used to compose the body of the request. The random boundary
string can be explicitly set with the ``multipart_boundary`` parameter.
"""
if encode_multipart:
- body, content_type = encode_multipart_formdata(fields or {},
- boundary=multipart_boundary)
+ body, content_type = encode_multipart_formdata(
+ fields or {}, boundary=multipart_boundary)
else:
body, content_type = (urlencode(fields or {}),
- 'application/x-www-form-urlencoded')
+ 'application/x-www-form-urlencoded')
if headers is None:
headers = self.headers
-# urllib3/response.py
-# Copyright 2008-2013 Andrey Petrov and contributors (see CONTRIBUTORS.txt)
-#
-# This module is part of urllib3 and is released under
-# the MIT License: http://www.opensource.org/licenses/mit-license.php
-
-
-import logging
import zlib
import io
+from socket import timeout as SocketTimeout
-from .exceptions import DecodeError
+from ._collections import HTTPHeaderDict
+from .exceptions import ProtocolError, DecodeError, ReadTimeoutError
from .packages.six import string_types as basestring, binary_type
-from .util import is_fp_closed
+from .connection import HTTPException, BaseSSLError
+from .util.response import is_fp_closed
-log = logging.getLogger(__name__)
-
class DeflateDecoder(object):
HTTP Response container.
Backwards-compatible to httplib's HTTPResponse but the response ``body`` is
- loaded and decoded on-demand when the ``data`` property is accessed.
+ loaded and decoded on-demand when the ``data`` property is accessed. This
+ class is also compatible with the Python standard library's :mod:`io`
+ module, and can hence be treated as a readable object in the context of that
+ framework.
Extra parameters for behaviour not present in httplib.HTTPResponse:
def __init__(self, body='', headers=None, status=0, version=0, reason=None,
strict=0, preload_content=True, decode_content=True,
original_response=None, pool=None, connection=None):
- self.headers = headers or {}
+
+ self.headers = HTTPHeaderDict()
+ if headers:
+ self.headers.update(headers)
self.status = status
self.version = version
self.reason = reason
self.decode_content = decode_content
self._decoder = None
- self._body = body if body and isinstance(body, basestring) else None
+ self._body = None
self._fp = None
self._original_response = original_response
+ self._fp_bytes_read = 0
+
+ if body and isinstance(body, (basestring, binary_type)):
+ self._body = body
self._pool = pool
self._connection = connection
if self._fp:
return self.read(cache_content=True)
+ def tell(self):
+ """
+ Obtain the number of bytes pulled over the wire so far. May differ from
+ the amount of content returned by :meth:``HTTPResponse.read`` if bytes
+ are encoded on the wire (e.g, compressed).
+ """
+ return self._fp_bytes_read
+
def read(self, amt=None, decode_content=None, cache_content=False):
"""
Similar to :meth:`httplib.HTTPResponse.read`, but with two additional
after having ``.read()`` the file object. (Overridden if ``amt`` is
set.)
"""
- # Note: content-encoding value should be case-insensitive, per RFC 2616
- # Section 3.5
+ # Note: content-encoding value should be case-insensitive, per RFC 7230
+ # Section 3.2
content_encoding = self.headers.get('content-encoding', '').lower()
if self._decoder is None:
if content_encoding in self.CONTENT_DECODERS:
flush_decoder = False
try:
- if amt is None:
- # cStringIO doesn't like amt=None
- data = self._fp.read()
- flush_decoder = True
- else:
- cache_content = False
- data = self._fp.read(amt)
- if amt != 0 and not data: # Platform-specific: Buggy versions of Python.
- # Close the connection when no data is returned
- #
- # This is redundant to what httplib/http.client _should_
- # already do. However, versions of python released before
- # December 15, 2012 (http://bugs.python.org/issue16298) do not
- # properly close the connection in all cases. There is no harm
- # in redundantly calling close.
- self._fp.close()
+ try:
+ if amt is None:
+ # cStringIO doesn't like amt=None
+ data = self._fp.read()
flush_decoder = True
+ else:
+ cache_content = False
+ data = self._fp.read(amt)
+ if amt != 0 and not data: # Platform-specific: Buggy versions of Python.
+ # Close the connection when no data is returned
+ #
+ # This is redundant to what httplib/http.client _should_
+ # already do. However, versions of python released before
+ # December 15, 2012 (http://bugs.python.org/issue16298) do
+ # not properly close the connection in all cases. There is
+ # no harm in redundantly calling close.
+ self._fp.close()
+ flush_decoder = True
+
+ except SocketTimeout:
+ # FIXME: Ideally we'd like to include the url in the ReadTimeoutError but
+ # there is yet no clean way to get at it from this context.
+ raise ReadTimeoutError(self._pool, None, 'Read timed out.')
+
+ except BaseSSLError as e:
+ # FIXME: Is there a better way to differentiate between SSLErrors?
+ if not 'read operation timed out' in str(e): # Defensive:
+ # This shouldn't happen but just in case we're missing an edge
+ # case, let's avoid swallowing SSL errors.
+ raise
+
+ raise ReadTimeoutError(self._pool, None, 'Read timed out.')
+
+ except HTTPException as e:
+ # This includes IncompleteRead.
+ raise ProtocolError('Connection broken: %r' % e, e)
+
+ self._fp_bytes_read += len(data)
try:
if decode_content and self._decoder:
except (IOError, zlib.error) as e:
raise DecodeError(
"Received response with content-encoding: %s, but "
- "failed to decode it." % content_encoding,
- e)
+ "failed to decode it." % content_encoding, e)
if flush_decoder and decode_content and self._decoder:
buf = self._decoder.decompress(binary_type())
if data:
yield data
-
@classmethod
def from_httplib(ResponseCls, r, **response_kw):
"""
with ``original_response=r``.
"""
- # Normalize headers between different versions of Python
- headers = {}
+ headers = HTTPHeaderDict()
for k, v in r.getheaders():
- # Python 3: Header keys are returned capitalised
- k = k.lower()
-
- has_value = headers.get(k)
- if has_value: # Python 3: Repeating header keys are unmerged.
- v = ', '.join([has_value, v])
-
- headers[k] = v
+ headers.add(k, v)
# HTTPResponse objects in Python 3 don't have a .strict attribute
strict = getattr(r, 'strict', 0)
elif hasattr(self._fp, "fileno"):
return self._fp.fileno()
else:
- raise IOError("The file-like object this HTTPResponse is wrapped "
+ raise IOError("The file-like object this HTTPResponse is wrapped "
"around has no file descriptor")
def flush(self):
return self._fp.flush()
def readable(self):
+ # This method is required for `io` module compatibility.
return True
+
+ def readinto(self, b):
+ # This method is required for `io` module compatibility.
+ temp = self.read(len(b))
+ if len(temp) == 0:
+ return 0
+ else:
+ b[:len(temp)] = temp
+ return len(temp)
+++ /dev/null
-# urllib3/util.py
-# Copyright 2008-2013 Andrey Petrov and contributors (see CONTRIBUTORS.txt)
-#
-# This module is part of urllib3 and is released under
-# the MIT License: http://www.opensource.org/licenses/mit-license.php
-
-
-from base64 import b64encode
-from binascii import hexlify, unhexlify
-from collections import namedtuple
-from hashlib import md5, sha1
-from socket import error as SocketError, _GLOBAL_DEFAULT_TIMEOUT
-import time
-
-try:
- from select import poll, POLLIN
-except ImportError: # `poll` doesn't exist on OSX and other platforms
- poll = False
- try:
- from select import select
- except ImportError: # `select` doesn't exist on AppEngine.
- select = False
-
-try: # Test for SSL features
- SSLContext = None
- HAS_SNI = False
-
- import ssl
- from ssl import wrap_socket, CERT_NONE, PROTOCOL_SSLv23
- from ssl import SSLContext # Modern SSL?
- from ssl import HAS_SNI # Has SNI?
-except ImportError:
- pass
-
-from .packages import six
-from .exceptions import LocationParseError, SSLError, TimeoutStateError
-
-
-_Default = object()
-# The default timeout to use for socket connections. This is the attribute used
-# by httplib to define the default timeout
-
-
-def current_time():
- """
- Retrieve the current time, this function is mocked out in unit testing.
- """
- return time.time()
-
-
-class Timeout(object):
- """
- Utility object for storing timeout values.
-
- Example usage:
-
- .. code-block:: python
-
- timeout = urllib3.util.Timeout(connect=2.0, read=7.0)
- pool = HTTPConnectionPool('www.google.com', 80, timeout=timeout)
- pool.request(...) # Etc, etc
-
- :param connect:
- The maximum amount of time to wait for a connection attempt to a server
- to succeed. Omitting the parameter will default the connect timeout to
- the system default, probably `the global default timeout in socket.py
- <http://hg.python.org/cpython/file/603b4d593758/Lib/socket.py#l535>`_.
- None will set an infinite timeout for connection attempts.
-
- :type connect: integer, float, or None
-
- :param read:
- The maximum amount of time to wait between consecutive
- read operations for a response from the server. Omitting
- the parameter will default the read timeout to the system
- default, probably `the global default timeout in socket.py
- <http://hg.python.org/cpython/file/603b4d593758/Lib/socket.py#l535>`_.
- None will set an infinite timeout.
-
- :type read: integer, float, or None
-
- :param total:
- This combines the connect and read timeouts into one; the read timeout
- will be set to the time leftover from the connect attempt. In the
- event that both a connect timeout and a total are specified, or a read
- timeout and a total are specified, the shorter timeout will be applied.
-
- Defaults to None.
-
- :type total: integer, float, or None
-
- .. note::
-
- Many factors can affect the total amount of time for urllib3 to return
- an HTTP response. Specifically, Python's DNS resolver does not obey the
- timeout specified on the socket. Other factors that can affect total
- request time include high CPU load, high swap, the program running at a
- low priority level, or other behaviors. The observed running time for
- urllib3 to return a response may be greater than the value passed to
- `total`.
-
- In addition, the read and total timeouts only measure the time between
- read operations on the socket connecting the client and the server,
- not the total amount of time for the request to return a complete
- response. For most requests, the timeout is raised because the server
- has not sent the first byte in the specified time. This is not always
- the case; if a server streams one byte every fifteen seconds, a timeout
- of 20 seconds will not ever trigger, even though the request will
- take several minutes to complete.
-
- If your goal is to cut off any request after a set amount of wall clock
- time, consider having a second "watcher" thread to cut off a slow
- request.
- """
-
- #: A sentinel object representing the default timeout value
- DEFAULT_TIMEOUT = _GLOBAL_DEFAULT_TIMEOUT
-
- def __init__(self, total=None, connect=_Default, read=_Default):
- self._connect = self._validate_timeout(connect, 'connect')
- self._read = self._validate_timeout(read, 'read')
- self.total = self._validate_timeout(total, 'total')
- self._start_connect = None
-
- def __str__(self):
- return '%s(connect=%r, read=%r, total=%r)' % (
- type(self).__name__, self._connect, self._read, self.total)
-
-
- @classmethod
- def _validate_timeout(cls, value, name):
- """ Check that a timeout attribute is valid
-
- :param value: The timeout value to validate
- :param name: The name of the timeout attribute to validate. This is used
- for clear error messages
- :return: the value
- :raises ValueError: if the type is not an integer or a float, or if it
- is a numeric value less than zero
- """
- if value is _Default:
- return cls.DEFAULT_TIMEOUT
-
- if value is None or value is cls.DEFAULT_TIMEOUT:
- return value
-
- try:
- float(value)
- except (TypeError, ValueError):
- raise ValueError("Timeout value %s was %s, but it must be an "
- "int or float." % (name, value))
-
- try:
- if value < 0:
- raise ValueError("Attempted to set %s timeout to %s, but the "
- "timeout cannot be set to a value less "
- "than 0." % (name, value))
- except TypeError: # Python 3
- raise ValueError("Timeout value %s was %s, but it must be an "
- "int or float." % (name, value))
-
- return value
-
- @classmethod
- def from_float(cls, timeout):
- """ Create a new Timeout from a legacy timeout value.
-
- The timeout value used by httplib.py sets the same timeout on the
- connect(), and recv() socket requests. This creates a :class:`Timeout`
- object that sets the individual timeouts to the ``timeout`` value passed
- to this function.
-
- :param timeout: The legacy timeout value
- :type timeout: integer, float, sentinel default object, or None
- :return: a Timeout object
- :rtype: :class:`Timeout`
- """
- return Timeout(read=timeout, connect=timeout)
-
- def clone(self):
- """ Create a copy of the timeout object
-
- Timeout properties are stored per-pool but each request needs a fresh
- Timeout object to ensure each one has its own start/stop configured.
-
- :return: a copy of the timeout object
- :rtype: :class:`Timeout`
- """
- # We can't use copy.deepcopy because that will also create a new object
- # for _GLOBAL_DEFAULT_TIMEOUT, which socket.py uses as a sentinel to
- # detect the user default.
- return Timeout(connect=self._connect, read=self._read,
- total=self.total)
-
- def start_connect(self):
- """ Start the timeout clock, used during a connect() attempt
-
- :raises urllib3.exceptions.TimeoutStateError: if you attempt
- to start a timer that has been started already.
- """
- if self._start_connect is not None:
- raise TimeoutStateError("Timeout timer has already been started.")
- self._start_connect = current_time()
- return self._start_connect
-
- def get_connect_duration(self):
- """ Gets the time elapsed since the call to :meth:`start_connect`.
-
- :return: the elapsed time
- :rtype: float
- :raises urllib3.exceptions.TimeoutStateError: if you attempt
- to get duration for a timer that hasn't been started.
- """
- if self._start_connect is None:
- raise TimeoutStateError("Can't get connect duration for timer "
- "that has not started.")
- return current_time() - self._start_connect
-
- @property
- def connect_timeout(self):
- """ Get the value to use when setting a connection timeout.
-
- This will be a positive float or integer, the value None
- (never timeout), or the default system timeout.
-
- :return: the connect timeout
- :rtype: int, float, :attr:`Timeout.DEFAULT_TIMEOUT` or None
- """
- if self.total is None:
- return self._connect
-
- if self._connect is None or self._connect is self.DEFAULT_TIMEOUT:
- return self.total
-
- return min(self._connect, self.total)
-
- @property
- def read_timeout(self):
- """ Get the value for the read timeout.
-
- This assumes some time has elapsed in the connection timeout and
- computes the read timeout appropriately.
-
- If self.total is set, the read timeout is dependent on the amount of
- time taken by the connect timeout. If the connection time has not been
- established, a :exc:`~urllib3.exceptions.TimeoutStateError` will be
- raised.
-
- :return: the value to use for the read timeout
- :rtype: int, float, :attr:`Timeout.DEFAULT_TIMEOUT` or None
- :raises urllib3.exceptions.TimeoutStateError: If :meth:`start_connect`
- has not yet been called on this object.
- """
- if (self.total is not None and
- self.total is not self.DEFAULT_TIMEOUT and
- self._read is not None and
- self._read is not self.DEFAULT_TIMEOUT):
- # in case the connect timeout has not yet been established.
- if self._start_connect is None:
- return self._read
- return max(0, min(self.total - self.get_connect_duration(),
- self._read))
- elif self.total is not None and self.total is not self.DEFAULT_TIMEOUT:
- return max(0, self.total - self.get_connect_duration())
- else:
- return self._read
-
-
-class Url(namedtuple('Url', ['scheme', 'auth', 'host', 'port', 'path', 'query', 'fragment'])):
- """
- Datastructure for representing an HTTP URL. Used as a return value for
- :func:`parse_url`.
- """
- slots = ()
-
- def __new__(cls, scheme=None, auth=None, host=None, port=None, path=None, query=None, fragment=None):
- return super(Url, cls).__new__(cls, scheme, auth, host, port, path, query, fragment)
-
- @property
- def hostname(self):
- """For backwards-compatibility with urlparse. We're nice like that."""
- return self.host
-
- @property
- def request_uri(self):
- """Absolute path including the query string."""
- uri = self.path or '/'
-
- if self.query is not None:
- uri += '?' + self.query
-
- return uri
-
- @property
- def netloc(self):
- """Network location including host and port"""
- if self.port:
- return '%s:%d' % (self.host, self.port)
- return self.host
-
-
-def split_first(s, delims):
- """
- Given a string and an iterable of delimiters, split on the first found
- delimiter. Return two split parts and the matched delimiter.
-
- If not found, then the first part is the full input string.
-
- Example: ::
-
- >>> split_first('foo/bar?baz', '?/=')
- ('foo', 'bar?baz', '/')
- >>> split_first('foo/bar?baz', '123')
- ('foo/bar?baz', '', None)
-
- Scales linearly with number of delims. Not ideal for large number of delims.
- """
- min_idx = None
- min_delim = None
- for d in delims:
- idx = s.find(d)
- if idx < 0:
- continue
-
- if min_idx is None or idx < min_idx:
- min_idx = idx
- min_delim = d
-
- if min_idx is None or min_idx < 0:
- return s, '', None
-
- return s[:min_idx], s[min_idx+1:], min_delim
-
-
-def parse_url(url):
- """
- Given a url, return a parsed :class:`.Url` namedtuple. Best-effort is
- performed to parse incomplete urls. Fields not provided will be None.
-
- Partly backwards-compatible with :mod:`urlparse`.
-
- Example: ::
-
- >>> parse_url('http://google.com/mail/')
- Url(scheme='http', host='google.com', port=None, path='/', ...)
- >>> parse_url('google.com:80')
- Url(scheme=None, host='google.com', port=80, path=None, ...)
- >>> parse_url('/foo?bar')
- Url(scheme=None, host=None, port=None, path='/foo', query='bar', ...)
- """
-
- # While this code has overlap with stdlib's urlparse, it is much
- # simplified for our needs and less annoying.
- # Additionally, this implementations does silly things to be optimal
- # on CPython.
-
- scheme = None
- auth = None
- host = None
- port = None
- path = None
- fragment = None
- query = None
-
- # Scheme
- if '://' in url:
- scheme, url = url.split('://', 1)
-
- # Find the earliest Authority Terminator
- # (http://tools.ietf.org/html/rfc3986#section-3.2)
- url, path_, delim = split_first(url, ['/', '?', '#'])
-
- if delim:
- # Reassemble the path
- path = delim + path_
-
- # Auth
- if '@' in url:
- # Last '@' denotes end of auth part
- auth, url = url.rsplit('@', 1)
-
- # IPv6
- if url and url[0] == '[':
- host, url = url.split(']', 1)
- host += ']'
-
- # Port
- if ':' in url:
- _host, port = url.split(':', 1)
-
- if not host:
- host = _host
-
- if port:
- # If given, ports must be integers.
- if not port.isdigit():
- raise LocationParseError("Failed to parse: %s" % url)
- port = int(port)
- else:
- # Blank ports are cool, too. (rfc3986#section-3.2.3)
- port = None
-
- elif not host and url:
- host = url
-
- if not path:
- return Url(scheme, auth, host, port, path, query, fragment)
-
- # Fragment
- if '#' in path:
- path, fragment = path.split('#', 1)
-
- # Query
- if '?' in path:
- path, query = path.split('?', 1)
-
- return Url(scheme, auth, host, port, path, query, fragment)
-
-
-def get_host(url):
- """
- Deprecated. Use :func:`.parse_url` instead.
- """
- p = parse_url(url)
- return p.scheme or 'http', p.hostname, p.port
-
-
-def make_headers(keep_alive=None, accept_encoding=None, user_agent=None,
- basic_auth=None):
- """
- Shortcuts for generating request headers.
-
- :param keep_alive:
- If ``True``, adds 'connection: keep-alive' header.
-
- :param accept_encoding:
- Can be a boolean, list, or string.
- ``True`` translates to 'gzip,deflate'.
- List will get joined by comma.
- String will be used as provided.
-
- :param user_agent:
- String representing the user-agent you want, such as
- "python-urllib3/0.6"
-
- :param basic_auth:
- Colon-separated username:password string for 'authorization: basic ...'
- auth header.
-
- Example: ::
-
- >>> make_headers(keep_alive=True, user_agent="Batman/1.0")
- {'connection': 'keep-alive', 'user-agent': 'Batman/1.0'}
- >>> make_headers(accept_encoding=True)
- {'accept-encoding': 'gzip,deflate'}
- """
- headers = {}
- if accept_encoding:
- if isinstance(accept_encoding, str):
- pass
- elif isinstance(accept_encoding, list):
- accept_encoding = ','.join(accept_encoding)
- else:
- accept_encoding = 'gzip,deflate'
- headers['accept-encoding'] = accept_encoding
-
- if user_agent:
- headers['user-agent'] = user_agent
-
- if keep_alive:
- headers['connection'] = 'keep-alive'
-
- if basic_auth:
- headers['authorization'] = 'Basic ' + \
- b64encode(six.b(basic_auth)).decode('utf-8')
-
- return headers
-
-
-def is_connection_dropped(conn): # Platform-specific
- """
- Returns True if the connection is dropped and should be closed.
-
- :param conn:
- :class:`httplib.HTTPConnection` object.
-
- Note: For platforms like AppEngine, this will always return ``False`` to
- let the platform handle connection recycling transparently for us.
- """
- sock = getattr(conn, 'sock', False)
- if not sock: # Platform-specific: AppEngine
- return False
-
- if not poll:
- if not select: # Platform-specific: AppEngine
- return False
-
- try:
- return select([sock], [], [], 0.0)[0]
- except SocketError:
- return True
-
- # This version is better on platforms that support it.
- p = poll()
- p.register(sock, POLLIN)
- for (fno, ev) in p.poll(0.0):
- if fno == sock.fileno():
- # Either data is buffered (bad), or the connection is dropped.
- return True
-
-
-def resolve_cert_reqs(candidate):
- """
- Resolves the argument to a numeric constant, which can be passed to
- the wrap_socket function/method from the ssl module.
- Defaults to :data:`ssl.CERT_NONE`.
- If given a string it is assumed to be the name of the constant in the
- :mod:`ssl` module or its abbrevation.
- (So you can specify `REQUIRED` instead of `CERT_REQUIRED`.
- If it's neither `None` nor a string we assume it is already the numeric
- constant which can directly be passed to wrap_socket.
- """
- if candidate is None:
- return CERT_NONE
-
- if isinstance(candidate, str):
- res = getattr(ssl, candidate, None)
- if res is None:
- res = getattr(ssl, 'CERT_' + candidate)
- return res
-
- return candidate
-
-
-def resolve_ssl_version(candidate):
- """
- like resolve_cert_reqs
- """
- if candidate is None:
- return PROTOCOL_SSLv23
-
- if isinstance(candidate, str):
- res = getattr(ssl, candidate, None)
- if res is None:
- res = getattr(ssl, 'PROTOCOL_' + candidate)
- return res
-
- return candidate
-
-
-def assert_fingerprint(cert, fingerprint):
- """
- Checks if given fingerprint matches the supplied certificate.
-
- :param cert:
- Certificate as bytes object.
- :param fingerprint:
- Fingerprint as string of hexdigits, can be interspersed by colons.
- """
-
- # Maps the length of a digest to a possible hash function producing
- # this digest.
- hashfunc_map = {
- 16: md5,
- 20: sha1
- }
-
- fingerprint = fingerprint.replace(':', '').lower()
-
- digest_length, rest = divmod(len(fingerprint), 2)
-
- if rest or digest_length not in hashfunc_map:
- raise SSLError('Fingerprint is of invalid length.')
-
- # We need encode() here for py32; works on py2 and p33.
- fingerprint_bytes = unhexlify(fingerprint.encode())
-
- hashfunc = hashfunc_map[digest_length]
-
- cert_digest = hashfunc(cert).digest()
-
- if not cert_digest == fingerprint_bytes:
- raise SSLError('Fingerprints did not match. Expected "{0}", got "{1}".'
- .format(hexlify(fingerprint_bytes),
- hexlify(cert_digest)))
-
-def is_fp_closed(obj):
- """
- Checks whether a given file-like object is closed.
-
- :param obj:
- The file-like object to check.
- """
- if hasattr(obj, 'fp'):
- # Object is a container for another file-like object that gets released
- # on exhaustion (e.g. HTTPResponse)
- return obj.fp is None
-
- return obj.closed
-
-
-if SSLContext is not None: # Python 3.2+
- def ssl_wrap_socket(sock, keyfile=None, certfile=None, cert_reqs=None,
- ca_certs=None, server_hostname=None,
- ssl_version=None):
- """
- All arguments except `server_hostname` have the same meaning as for
- :func:`ssl.wrap_socket`
-
- :param server_hostname:
- Hostname of the expected certificate
- """
- context = SSLContext(ssl_version)
- context.verify_mode = cert_reqs
- if ca_certs:
- try:
- context.load_verify_locations(ca_certs)
- # Py32 raises IOError
- # Py33 raises FileNotFoundError
- except Exception as e: # Reraise as SSLError
- raise SSLError(e)
- if certfile:
- # FIXME: This block needs a test.
- context.load_cert_chain(certfile, keyfile)
- if HAS_SNI: # Platform-specific: OpenSSL with enabled SNI
- return context.wrap_socket(sock, server_hostname=server_hostname)
- return context.wrap_socket(sock)
-
-else: # Python 3.1 and earlier
- def ssl_wrap_socket(sock, keyfile=None, certfile=None, cert_reqs=None,
- ca_certs=None, server_hostname=None,
- ssl_version=None):
- return wrap_socket(sock, keyfile=keyfile, certfile=certfile,
- ca_certs=ca_certs, cert_reqs=cert_reqs,
- ssl_version=ssl_version)
--- /dev/null
+# For backwards compatibility, provide imports that used to be here.
+from .connection import is_connection_dropped
+from .request import make_headers
+from .response import is_fp_closed
+from .ssl_ import (
+ SSLContext,
+ HAS_SNI,
+ assert_fingerprint,
+ resolve_cert_reqs,
+ resolve_ssl_version,
+ ssl_wrap_socket,
+)
+from .timeout import (
+ current_time,
+ Timeout,
+)
+
+from .retry import Retry
+from .url import (
+ get_host,
+ parse_url,
+ split_first,
+ Url,
+)
--- /dev/null
+import socket
+try:
+ from select import poll, POLLIN
+except ImportError: # `poll` doesn't exist on OSX and other platforms
+ poll = False
+ try:
+ from select import select
+ except ImportError: # `select` doesn't exist on AppEngine.
+ select = False
+
+
+def is_connection_dropped(conn): # Platform-specific
+ """
+ Returns True if the connection is dropped and should be closed.
+
+ :param conn:
+ :class:`httplib.HTTPConnection` object.
+
+ Note: For platforms like AppEngine, this will always return ``False`` to
+ let the platform handle connection recycling transparently for us.
+ """
+ sock = getattr(conn, 'sock', False)
+ if sock is False: # Platform-specific: AppEngine
+ return False
+ if sock is None: # Connection already closed (such as by httplib).
+ return True
+
+ if not poll:
+ if not select: # Platform-specific: AppEngine
+ return False
+
+ try:
+ return select([sock], [], [], 0.0)[0]
+ except socket.error:
+ return True
+
+ # This version is better on platforms that support it.
+ p = poll()
+ p.register(sock, POLLIN)
+ for (fno, ev) in p.poll(0.0):
+ if fno == sock.fileno():
+ # Either data is buffered (bad), or the connection is dropped.
+ return True
+
+
+# This function is copied from socket.py in the Python 2.7 standard
+# library test suite. Added to its signature is only `socket_options`.
+def create_connection(address, timeout=socket._GLOBAL_DEFAULT_TIMEOUT,
+ source_address=None, socket_options=None):
+ """Connect to *address* and return the socket object.
+
+ Convenience function. Connect to *address* (a 2-tuple ``(host,
+ port)``) and return the socket object. Passing the optional
+ *timeout* parameter will set the timeout on the socket instance
+ before attempting to connect. If no *timeout* is supplied, the
+ global default timeout setting returned by :func:`getdefaulttimeout`
+ is used. If *source_address* is set it must be a tuple of (host, port)
+ for the socket to bind as a source address before making the connection.
+ An host of '' or port 0 tells the OS to use the default.
+ """
+
+ host, port = address
+ err = None
+ for res in socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM):
+ af, socktype, proto, canonname, sa = res
+ sock = None
+ try:
+ sock = socket.socket(af, socktype, proto)
+
+ # If provided, set socket level options before connecting.
+ # This is the only addition urllib3 makes to this function.
+ _set_socket_options(sock, socket_options)
+
+ if timeout is not socket._GLOBAL_DEFAULT_TIMEOUT:
+ sock.settimeout(timeout)
+ if source_address:
+ sock.bind(source_address)
+ sock.connect(sa)
+ return sock
+
+ except socket.error as _:
+ err = _
+ if sock is not None:
+ sock.close()
+
+ if err is not None:
+ raise err
+ else:
+ raise socket.error("getaddrinfo returns an empty list")
+
+
+def _set_socket_options(sock, options):
+ if options is None:
+ return
+
+ for opt in options:
+ sock.setsockopt(*opt)
--- /dev/null
+from base64 import b64encode
+
+from ..packages.six import b
+
+ACCEPT_ENCODING = 'gzip,deflate'
+
+
+def make_headers(keep_alive=None, accept_encoding=None, user_agent=None,
+ basic_auth=None, proxy_basic_auth=None, disable_cache=None):
+ """
+ Shortcuts for generating request headers.
+
+ :param keep_alive:
+ If ``True``, adds 'connection: keep-alive' header.
+
+ :param accept_encoding:
+ Can be a boolean, list, or string.
+ ``True`` translates to 'gzip,deflate'.
+ List will get joined by comma.
+ String will be used as provided.
+
+ :param user_agent:
+ String representing the user-agent you want, such as
+ "python-urllib3/0.6"
+
+ :param basic_auth:
+ Colon-separated username:password string for 'authorization: basic ...'
+ auth header.
+
+ :param proxy_basic_auth:
+ Colon-separated username:password string for 'proxy-authorization: basic ...'
+ auth header.
+
+ :param disable_cache:
+ If ``True``, adds 'cache-control: no-cache' header.
+
+ Example::
+
+ >>> make_headers(keep_alive=True, user_agent="Batman/1.0")
+ {'connection': 'keep-alive', 'user-agent': 'Batman/1.0'}
+ >>> make_headers(accept_encoding=True)
+ {'accept-encoding': 'gzip,deflate'}
+ """
+ headers = {}
+ if accept_encoding:
+ if isinstance(accept_encoding, str):
+ pass
+ elif isinstance(accept_encoding, list):
+ accept_encoding = ','.join(accept_encoding)
+ else:
+ accept_encoding = ACCEPT_ENCODING
+ headers['accept-encoding'] = accept_encoding
+
+ if user_agent:
+ headers['user-agent'] = user_agent
+
+ if keep_alive:
+ headers['connection'] = 'keep-alive'
+
+ if basic_auth:
+ headers['authorization'] = 'Basic ' + \
+ b64encode(b(basic_auth)).decode('utf-8')
+
+ if proxy_basic_auth:
+ headers['proxy-authorization'] = 'Basic ' + \
+ b64encode(b(proxy_basic_auth)).decode('utf-8')
+
+ if disable_cache:
+ headers['cache-control'] = 'no-cache'
+
+ return headers
--- /dev/null
+def is_fp_closed(obj):
+ """
+ Checks whether a given file-like object is closed.
+
+ :param obj:
+ The file-like object to check.
+ """
+
+ try:
+ # Check via the official file-like-object way.
+ return obj.closed
+ except AttributeError:
+ pass
+
+ try:
+ # Check if the object is a container for another file-like object that
+ # gets released on exhaustion (e.g. HTTPResponse).
+ return obj.fp is None
+ except AttributeError:
+ pass
+
+ raise ValueError("Unable to determine whether fp is closed.")
--- /dev/null
+import time
+import logging
+
+from ..exceptions import (
+ ProtocolError,
+ ConnectTimeoutError,
+ ReadTimeoutError,
+ MaxRetryError,
+)
+from ..packages import six
+
+
+log = logging.getLogger(__name__)
+
+
+class Retry(object):
+ """ Retry configuration.
+
+ Each retry attempt will create a new Retry object with updated values, so
+ they can be safely reused.
+
+ Retries can be defined as a default for a pool::
+
+ retries = Retry(connect=5, read=2, redirect=5)
+ http = PoolManager(retries=retries)
+ response = http.request('GET', 'http://example.com/')
+
+ Or per-request (which overrides the default for the pool)::
+
+ response = http.request('GET', 'http://example.com/', retries=Retry(10))
+
+ Retries can be disabled by passing ``False``::
+
+ response = http.request('GET', 'http://example.com/', retries=False)
+
+ Errors will be wrapped in :class:`~urllib3.exceptions.MaxRetryError` unless
+ retries are disabled, in which case the causing exception will be raised.
+
+
+ :param int total:
+ Total number of retries to allow. Takes precedence over other counts.
+
+ Set to ``None`` to remove this constraint and fall back on other
+ counts. It's a good idea to set this to some sensibly-high value to
+ account for unexpected edge cases and avoid infinite retry loops.
+
+ Set to ``0`` to fail on the first retry.
+
+ Set to ``False`` to disable and imply ``raise_on_redirect=False``.
+
+ :param int connect:
+ How many connection-related errors to retry on.
+
+ These are errors raised before the request is sent to the remote server,
+ which we assume has not triggered the server to process the request.
+
+ Set to ``0`` to fail on the first retry of this type.
+
+ :param int read:
+ How many times to retry on read errors.
+
+ These errors are raised after the request was sent to the server, so the
+ request may have side-effects.
+
+ Set to ``0`` to fail on the first retry of this type.
+
+ :param int redirect:
+ How many redirects to perform. Limit this to avoid infinite redirect
+ loops.
+
+ A redirect is a HTTP response with a status code 301, 302, 303, 307 or
+ 308.
+
+ Set to ``0`` to fail on the first retry of this type.
+
+ Set to ``False`` to disable and imply ``raise_on_redirect=False``.
+
+ :param iterable method_whitelist:
+ Set of uppercased HTTP method verbs that we should retry on.
+
+ By default, we only retry on methods which are considered to be
+ indempotent (multiple requests with the same parameters end with the
+ same state). See :attr:`Retry.DEFAULT_METHOD_WHITELIST`.
+
+ :param iterable status_forcelist:
+ A set of HTTP status codes that we should force a retry on.
+
+ By default, this is disabled with ``None``.
+
+ :param float backoff_factor:
+ A backoff factor to apply between attempts. urllib3 will sleep for::
+
+ {backoff factor} * (2 ^ ({number of total retries} - 1))
+
+ seconds. If the backoff_factor is 0.1, then :func:`.sleep` will sleep
+ for [0.1s, 0.2s, 0.4s, ...] between retries. It will never be longer
+ than :attr:`Retry.MAX_BACKOFF`.
+
+ By default, backoff is disabled (set to 0).
+
+ :param bool raise_on_redirect: Whether, if the number of redirects is
+ exhausted, to raise a MaxRetryError, or to return a response with a
+ response code in the 3xx range.
+ """
+
+ DEFAULT_METHOD_WHITELIST = frozenset([
+ 'HEAD', 'GET', 'PUT', 'DELETE', 'OPTIONS', 'TRACE'])
+
+ #: Maximum backoff time.
+ BACKOFF_MAX = 120
+
+ def __init__(self, total=10, connect=None, read=None, redirect=None,
+ method_whitelist=DEFAULT_METHOD_WHITELIST, status_forcelist=None,
+ backoff_factor=0, raise_on_redirect=True, _observed_errors=0):
+
+ self.total = total
+ self.connect = connect
+ self.read = read
+
+ if redirect is False or total is False:
+ redirect = 0
+ raise_on_redirect = False
+
+ self.redirect = redirect
+ self.status_forcelist = status_forcelist or set()
+ self.method_whitelist = method_whitelist
+ self.backoff_factor = backoff_factor
+ self.raise_on_redirect = raise_on_redirect
+ self._observed_errors = _observed_errors # TODO: use .history instead?
+
+ def new(self, **kw):
+ params = dict(
+ total=self.total,
+ connect=self.connect, read=self.read, redirect=self.redirect,
+ method_whitelist=self.method_whitelist,
+ status_forcelist=self.status_forcelist,
+ backoff_factor=self.backoff_factor,
+ raise_on_redirect=self.raise_on_redirect,
+ _observed_errors=self._observed_errors,
+ )
+ params.update(kw)
+ return type(self)(**params)
+
+ @classmethod
+ def from_int(cls, retries, redirect=True, default=None):
+ """ Backwards-compatibility for the old retries format."""
+ if retries is None:
+ retries = default if default is not None else cls.DEFAULT
+
+ if isinstance(retries, Retry):
+ return retries
+
+ redirect = bool(redirect) and None
+ new_retries = cls(retries, redirect=redirect)
+ log.debug("Converted retries value: %r -> %r" % (retries, new_retries))
+ return new_retries
+
+ def get_backoff_time(self):
+ """ Formula for computing the current backoff
+
+ :rtype: float
+ """
+ if self._observed_errors <= 1:
+ return 0
+
+ backoff_value = self.backoff_factor * (2 ** (self._observed_errors - 1))
+ return min(self.BACKOFF_MAX, backoff_value)
+
+ def sleep(self):
+ """ Sleep between retry attempts using an exponential backoff.
+
+ By default, the backoff factor is 0 and this method will return
+ immediately.
+ """
+ backoff = self.get_backoff_time()
+ if backoff <= 0:
+ return
+ time.sleep(backoff)
+
+ def _is_connection_error(self, err):
+ """ Errors when we're fairly sure that the server did not receive the
+ request, so it should be safe to retry.
+ """
+ return isinstance(err, ConnectTimeoutError)
+
+ def _is_read_error(self, err):
+ """ Errors that occur after the request has been started, so we can't
+ assume that the server did not process any of it.
+ """
+ return isinstance(err, (ReadTimeoutError, ProtocolError))
+
+ def is_forced_retry(self, method, status_code):
+ """ Is this method/response retryable? (Based on method/codes whitelists)
+ """
+ if self.method_whitelist and method.upper() not in self.method_whitelist:
+ return False
+
+ return self.status_forcelist and status_code in self.status_forcelist
+
+ def is_exhausted(self):
+ """ Are we out of retries?
+ """
+ retry_counts = (self.total, self.connect, self.read, self.redirect)
+ retry_counts = list(filter(None, retry_counts))
+ if not retry_counts:
+ return False
+
+ return min(retry_counts) < 0
+
+ def increment(self, method=None, url=None, response=None, error=None, _pool=None, _stacktrace=None):
+ """ Return a new Retry object with incremented retry counters.
+
+ :param response: A response object, or None, if the server did not
+ return a response.
+ :type response: :class:`~urllib3.response.HTTPResponse`
+ :param Exception error: An error encountered during the request, or
+ None if the response was received successfully.
+
+ :return: A new ``Retry`` object.
+ """
+ if self.total is False and error:
+ # Disabled, indicate to re-raise the error.
+ raise six.reraise(type(error), error, _stacktrace)
+
+ total = self.total
+ if total is not None:
+ total -= 1
+
+ _observed_errors = self._observed_errors
+ connect = self.connect
+ read = self.read
+ redirect = self.redirect
+
+ if error and self._is_connection_error(error):
+ # Connect retry?
+ if connect is False:
+ raise six.reraise(type(error), error, _stacktrace)
+ elif connect is not None:
+ connect -= 1
+ _observed_errors += 1
+
+ elif error and self._is_read_error(error):
+ # Read retry?
+ if read is False:
+ raise six.reraise(type(error), error, _stacktrace)
+ elif read is not None:
+ read -= 1
+ _observed_errors += 1
+
+ elif response and response.get_redirect_location():
+ # Redirect retry?
+ if redirect is not None:
+ redirect -= 1
+
+ else:
+ # FIXME: Nothing changed, scenario doesn't make sense.
+ _observed_errors += 1
+
+ new_retry = self.new(
+ total=total,
+ connect=connect, read=read, redirect=redirect,
+ _observed_errors=_observed_errors)
+
+ if new_retry.is_exhausted():
+ raise MaxRetryError(_pool, url, error)
+
+ log.debug("Incremented Retry for (url='%s'): %r" % (url, new_retry))
+
+ return new_retry
+
+
+ def __repr__(self):
+ return ('{cls.__name__}(total={self.total}, connect={self.connect}, '
+ 'read={self.read}, redirect={self.redirect})').format(
+ cls=type(self), self=self)
+
+
+# For backwards compatibility (equivalent to pre-v1.9):
+Retry.DEFAULT = Retry(3)
--- /dev/null
+from binascii import hexlify, unhexlify
+from hashlib import md5, sha1
+
+from ..exceptions import SSLError
+
+
+try: # Test for SSL features
+ SSLContext = None
+ HAS_SNI = False
+
+ import ssl
+ from ssl import wrap_socket, CERT_NONE, PROTOCOL_SSLv23
+ from ssl import SSLContext # Modern SSL?
+ from ssl import HAS_SNI # Has SNI?
+except ImportError:
+ pass
+
+
+def assert_fingerprint(cert, fingerprint):
+ """
+ Checks if given fingerprint matches the supplied certificate.
+
+ :param cert:
+ Certificate as bytes object.
+ :param fingerprint:
+ Fingerprint as string of hexdigits, can be interspersed by colons.
+ """
+
+ # Maps the length of a digest to a possible hash function producing
+ # this digest.
+ hashfunc_map = {
+ 16: md5,
+ 20: sha1
+ }
+
+ fingerprint = fingerprint.replace(':', '').lower()
+ digest_length, odd = divmod(len(fingerprint), 2)
+
+ if odd or digest_length not in hashfunc_map:
+ raise SSLError('Fingerprint is of invalid length.')
+
+ # We need encode() here for py32; works on py2 and p33.
+ fingerprint_bytes = unhexlify(fingerprint.encode())
+
+ hashfunc = hashfunc_map[digest_length]
+
+ cert_digest = hashfunc(cert).digest()
+
+ if not cert_digest == fingerprint_bytes:
+ raise SSLError('Fingerprints did not match. Expected "{0}", got "{1}".'
+ .format(hexlify(fingerprint_bytes),
+ hexlify(cert_digest)))
+
+
+def resolve_cert_reqs(candidate):
+ """
+ Resolves the argument to a numeric constant, which can be passed to
+ the wrap_socket function/method from the ssl module.
+ Defaults to :data:`ssl.CERT_NONE`.
+ If given a string it is assumed to be the name of the constant in the
+ :mod:`ssl` module or its abbrevation.
+ (So you can specify `REQUIRED` instead of `CERT_REQUIRED`.
+ If it's neither `None` nor a string we assume it is already the numeric
+ constant which can directly be passed to wrap_socket.
+ """
+ if candidate is None:
+ return CERT_NONE
+
+ if isinstance(candidate, str):
+ res = getattr(ssl, candidate, None)
+ if res is None:
+ res = getattr(ssl, 'CERT_' + candidate)
+ return res
+
+ return candidate
+
+
+def resolve_ssl_version(candidate):
+ """
+ like resolve_cert_reqs
+ """
+ if candidate is None:
+ return PROTOCOL_SSLv23
+
+ if isinstance(candidate, str):
+ res = getattr(ssl, candidate, None)
+ if res is None:
+ res = getattr(ssl, 'PROTOCOL_' + candidate)
+ return res
+
+ return candidate
+
+
+if SSLContext is not None: # Python 3.2+
+ def ssl_wrap_socket(sock, keyfile=None, certfile=None, cert_reqs=None,
+ ca_certs=None, server_hostname=None,
+ ssl_version=None):
+ """
+ All arguments except `server_hostname` have the same meaning as for
+ :func:`ssl.wrap_socket`
+
+ :param server_hostname:
+ Hostname of the expected certificate
+ """
+ context = SSLContext(ssl_version)
+ context.verify_mode = cert_reqs
+
+ # Disable TLS compression to migitate CRIME attack (issue #309)
+ OP_NO_COMPRESSION = 0x20000
+ context.options |= OP_NO_COMPRESSION
+
+ if ca_certs:
+ try:
+ context.load_verify_locations(ca_certs)
+ # Py32 raises IOError
+ # Py33 raises FileNotFoundError
+ except Exception as e: # Reraise as SSLError
+ raise SSLError(e)
+ if certfile:
+ # FIXME: This block needs a test.
+ context.load_cert_chain(certfile, keyfile)
+ if HAS_SNI: # Platform-specific: OpenSSL with enabled SNI
+ return context.wrap_socket(sock, server_hostname=server_hostname)
+ return context.wrap_socket(sock)
+
+else: # Python 3.1 and earlier
+ def ssl_wrap_socket(sock, keyfile=None, certfile=None, cert_reqs=None,
+ ca_certs=None, server_hostname=None,
+ ssl_version=None):
+ return wrap_socket(sock, keyfile=keyfile, certfile=certfile,
+ ca_certs=ca_certs, cert_reqs=cert_reqs,
+ ssl_version=ssl_version)
--- /dev/null
+# The default socket timeout, used by httplib to indicate that no timeout was
+# specified by the user
+from socket import _GLOBAL_DEFAULT_TIMEOUT
+import time
+
+from ..exceptions import TimeoutStateError
+
+# A sentinel value to indicate that no timeout was specified by the user in
+# urllib3
+_Default = object()
+
+def current_time():
+ """
+ Retrieve the current time. This function is mocked out in unit testing.
+ """
+ return time.time()
+
+
+class Timeout(object):
+ """ Timeout configuration.
+
+ Timeouts can be defined as a default for a pool::
+
+ timeout = Timeout(connect=2.0, read=7.0)
+ http = PoolManager(timeout=timeout)
+ response = http.request('GET', 'http://example.com/')
+
+ Or per-request (which overrides the default for the pool)::
+
+ response = http.request('GET', 'http://example.com/', timeout=Timeout(10))
+
+ Timeouts can be disabled by setting all the parameters to ``None``::
+
+ no_timeout = Timeout(connect=None, read=None)
+ response = http.request('GET', 'http://example.com/, timeout=no_timeout)
+
+
+ :param total:
+ This combines the connect and read timeouts into one; the read timeout
+ will be set to the time leftover from the connect attempt. In the
+ event that both a connect timeout and a total are specified, or a read
+ timeout and a total are specified, the shorter timeout will be applied.
+
+ Defaults to None.
+
+ :type total: integer, float, or None
+
+ :param connect:
+ The maximum amount of time to wait for a connection attempt to a server
+ to succeed. Omitting the parameter will default the connect timeout to
+ the system default, probably `the global default timeout in socket.py
+ <http://hg.python.org/cpython/file/603b4d593758/Lib/socket.py#l535>`_.
+ None will set an infinite timeout for connection attempts.
+
+ :type connect: integer, float, or None
+
+ :param read:
+ The maximum amount of time to wait between consecutive
+ read operations for a response from the server. Omitting
+ the parameter will default the read timeout to the system
+ default, probably `the global default timeout in socket.py
+ <http://hg.python.org/cpython/file/603b4d593758/Lib/socket.py#l535>`_.
+ None will set an infinite timeout.
+
+ :type read: integer, float, or None
+
+ .. note::
+
+ Many factors can affect the total amount of time for urllib3 to return
+ an HTTP response.
+
+ For example, Python's DNS resolver does not obey the timeout specified
+ on the socket. Other factors that can affect total request time include
+ high CPU load, high swap, the program running at a low priority level,
+ or other behaviors.
+
+ In addition, the read and total timeouts only measure the time between
+ read operations on the socket connecting the client and the server,
+ not the total amount of time for the request to return a complete
+ response. For most requests, the timeout is raised because the server
+ has not sent the first byte in the specified time. This is not always
+ the case; if a server streams one byte every fifteen seconds, a timeout
+ of 20 seconds will not trigger, even though the request will take
+ several minutes to complete.
+
+ If your goal is to cut off any request after a set amount of wall clock
+ time, consider having a second "watcher" thread to cut off a slow
+ request.
+ """
+
+ #: A sentinel object representing the default timeout value
+ DEFAULT_TIMEOUT = _GLOBAL_DEFAULT_TIMEOUT
+
+ def __init__(self, total=None, connect=_Default, read=_Default):
+ self._connect = self._validate_timeout(connect, 'connect')
+ self._read = self._validate_timeout(read, 'read')
+ self.total = self._validate_timeout(total, 'total')
+ self._start_connect = None
+
+ def __str__(self):
+ return '%s(connect=%r, read=%r, total=%r)' % (
+ type(self).__name__, self._connect, self._read, self.total)
+
+ @classmethod
+ def _validate_timeout(cls, value, name):
+ """ Check that a timeout attribute is valid.
+
+ :param value: The timeout value to validate
+ :param name: The name of the timeout attribute to validate. This is
+ used to specify in error messages.
+ :return: The validated and casted version of the given value.
+ :raises ValueError: If the type is not an integer or a float, or if it
+ is a numeric value less than zero.
+ """
+ if value is _Default:
+ return cls.DEFAULT_TIMEOUT
+
+ if value is None or value is cls.DEFAULT_TIMEOUT:
+ return value
+
+ try:
+ float(value)
+ except (TypeError, ValueError):
+ raise ValueError("Timeout value %s was %s, but it must be an "
+ "int or float." % (name, value))
+
+ try:
+ if value < 0:
+ raise ValueError("Attempted to set %s timeout to %s, but the "
+ "timeout cannot be set to a value less "
+ "than 0." % (name, value))
+ except TypeError: # Python 3
+ raise ValueError("Timeout value %s was %s, but it must be an "
+ "int or float." % (name, value))
+
+ return value
+
+ @classmethod
+ def from_float(cls, timeout):
+ """ Create a new Timeout from a legacy timeout value.
+
+ The timeout value used by httplib.py sets the same timeout on the
+ connect(), and recv() socket requests. This creates a :class:`Timeout`
+ object that sets the individual timeouts to the ``timeout`` value
+ passed to this function.
+
+ :param timeout: The legacy timeout value.
+ :type timeout: integer, float, sentinel default object, or None
+ :return: Timeout object
+ :rtype: :class:`Timeout`
+ """
+ return Timeout(read=timeout, connect=timeout)
+
+ def clone(self):
+ """ Create a copy of the timeout object
+
+ Timeout properties are stored per-pool but each request needs a fresh
+ Timeout object to ensure each one has its own start/stop configured.
+
+ :return: a copy of the timeout object
+ :rtype: :class:`Timeout`
+ """
+ # We can't use copy.deepcopy because that will also create a new object
+ # for _GLOBAL_DEFAULT_TIMEOUT, which socket.py uses as a sentinel to
+ # detect the user default.
+ return Timeout(connect=self._connect, read=self._read,
+ total=self.total)
+
+ def start_connect(self):
+ """ Start the timeout clock, used during a connect() attempt
+
+ :raises urllib3.exceptions.TimeoutStateError: if you attempt
+ to start a timer that has been started already.
+ """
+ if self._start_connect is not None:
+ raise TimeoutStateError("Timeout timer has already been started.")
+ self._start_connect = current_time()
+ return self._start_connect
+
+ def get_connect_duration(self):
+ """ Gets the time elapsed since the call to :meth:`start_connect`.
+
+ :return: Elapsed time.
+ :rtype: float
+ :raises urllib3.exceptions.TimeoutStateError: if you attempt
+ to get duration for a timer that hasn't been started.
+ """
+ if self._start_connect is None:
+ raise TimeoutStateError("Can't get connect duration for timer "
+ "that has not started.")
+ return current_time() - self._start_connect
+
+ @property
+ def connect_timeout(self):
+ """ Get the value to use when setting a connection timeout.
+
+ This will be a positive float or integer, the value None
+ (never timeout), or the default system timeout.
+
+ :return: Connect timeout.
+ :rtype: int, float, :attr:`Timeout.DEFAULT_TIMEOUT` or None
+ """
+ if self.total is None:
+ return self._connect
+
+ if self._connect is None or self._connect is self.DEFAULT_TIMEOUT:
+ return self.total
+
+ return min(self._connect, self.total)
+
+ @property
+ def read_timeout(self):
+ """ Get the value for the read timeout.
+
+ This assumes some time has elapsed in the connection timeout and
+ computes the read timeout appropriately.
+
+ If self.total is set, the read timeout is dependent on the amount of
+ time taken by the connect timeout. If the connection time has not been
+ established, a :exc:`~urllib3.exceptions.TimeoutStateError` will be
+ raised.
+
+ :return: Value to use for the read timeout.
+ :rtype: int, float, :attr:`Timeout.DEFAULT_TIMEOUT` or None
+ :raises urllib3.exceptions.TimeoutStateError: If :meth:`start_connect`
+ has not yet been called on this object.
+ """
+ if (self.total is not None and
+ self.total is not self.DEFAULT_TIMEOUT and
+ self._read is not None and
+ self._read is not self.DEFAULT_TIMEOUT):
+ # In case the connect timeout has not yet been established.
+ if self._start_connect is None:
+ return self._read
+ return max(0, min(self.total - self.get_connect_duration(),
+ self._read))
+ elif self.total is not None and self.total is not self.DEFAULT_TIMEOUT:
+ return max(0, self.total - self.get_connect_duration())
+ else:
+ return self._read
--- /dev/null
+from collections import namedtuple
+
+from ..exceptions import LocationParseError
+
+
+url_attrs = ['scheme', 'auth', 'host', 'port', 'path', 'query', 'fragment']
+
+
+class Url(namedtuple('Url', url_attrs)):
+ """
+ Datastructure for representing an HTTP URL. Used as a return value for
+ :func:`parse_url`.
+ """
+ slots = ()
+
+ def __new__(cls, scheme=None, auth=None, host=None, port=None, path=None,
+ query=None, fragment=None):
+ return super(Url, cls).__new__(cls, scheme, auth, host, port, path,
+ query, fragment)
+
+ @property
+ def hostname(self):
+ """For backwards-compatibility with urlparse. We're nice like that."""
+ return self.host
+
+ @property
+ def request_uri(self):
+ """Absolute path including the query string."""
+ uri = self.path or '/'
+
+ if self.query is not None:
+ uri += '?' + self.query
+
+ return uri
+
+ @property
+ def netloc(self):
+ """Network location including host and port"""
+ if self.port:
+ return '%s:%d' % (self.host, self.port)
+ return self.host
+
+
+def split_first(s, delims):
+ """
+ Given a string and an iterable of delimiters, split on the first found
+ delimiter. Return two split parts and the matched delimiter.
+
+ If not found, then the first part is the full input string.
+
+ Example::
+
+ >>> split_first('foo/bar?baz', '?/=')
+ ('foo', 'bar?baz', '/')
+ >>> split_first('foo/bar?baz', '123')
+ ('foo/bar?baz', '', None)
+
+ Scales linearly with number of delims. Not ideal for large number of delims.
+ """
+ min_idx = None
+ min_delim = None
+ for d in delims:
+ idx = s.find(d)
+ if idx < 0:
+ continue
+
+ if min_idx is None or idx < min_idx:
+ min_idx = idx
+ min_delim = d
+
+ if min_idx is None or min_idx < 0:
+ return s, '', None
+
+ return s[:min_idx], s[min_idx+1:], min_delim
+
+
+def parse_url(url):
+ """
+ Given a url, return a parsed :class:`.Url` namedtuple. Best-effort is
+ performed to parse incomplete urls. Fields not provided will be None.
+
+ Partly backwards-compatible with :mod:`urlparse`.
+
+ Example::
+
+ >>> parse_url('http://google.com/mail/')
+ Url(scheme='http', host='google.com', port=None, path='/', ...)
+ >>> parse_url('google.com:80')
+ Url(scheme=None, host='google.com', port=80, path=None, ...)
+ >>> parse_url('/foo?bar')
+ Url(scheme=None, host=None, port=None, path='/foo', query='bar', ...)
+ """
+
+ # While this code has overlap with stdlib's urlparse, it is much
+ # simplified for our needs and less annoying.
+ # Additionally, this implementations does silly things to be optimal
+ # on CPython.
+
+ if not url:
+ # Empty
+ return Url()
+
+ scheme = None
+ auth = None
+ host = None
+ port = None
+ path = None
+ fragment = None
+ query = None
+
+ # Scheme
+ if '://' in url:
+ scheme, url = url.split('://', 1)
+
+ # Find the earliest Authority Terminator
+ # (http://tools.ietf.org/html/rfc3986#section-3.2)
+ url, path_, delim = split_first(url, ['/', '?', '#'])
+
+ if delim:
+ # Reassemble the path
+ path = delim + path_
+
+ # Auth
+ if '@' in url:
+ # Last '@' denotes end of auth part
+ auth, url = url.rsplit('@', 1)
+
+ # IPv6
+ if url and url[0] == '[':
+ host, url = url.split(']', 1)
+ host += ']'
+
+ # Port
+ if ':' in url:
+ _host, port = url.split(':', 1)
+
+ if not host:
+ host = _host
+
+ if port:
+ # If given, ports must be integers.
+ if not port.isdigit():
+ raise LocationParseError(url)
+ port = int(port)
+ else:
+ # Blank ports are cool, too. (rfc3986#section-3.2.3)
+ port = None
+
+ elif not host and url:
+ host = url
+
+ if not path:
+ return Url(scheme, auth, host, port, path, query, fragment)
+
+ # Fragment
+ if '#' in path:
+ path, fragment = path.split('#', 1)
+
+ # Query
+ if '?' in path:
+ path, query = path.split('?', 1)
+
+ return Url(scheme, auth, host, port, path, query, fragment)
+
+
+def get_host(url):
+ """
+ Deprecated. Use :func:`.parse_url` instead.
+ """
+ p = parse_url(url)
+ return p.scheme or 'http', p.hostname, p.port
from collections import Mapping
from datetime import datetime
-from .compat import cookielib, OrderedDict, urljoin, urlparse, urlunparse
-from .cookies import cookiejar_from_dict, extract_cookies_to_jar, RequestsCookieJar
-from .models import Request, PreparedRequest
+from .auth import _basic_auth_str
+from .compat import cookielib, OrderedDict, urljoin, urlparse, builtin_str
+from .cookies import (
+ cookiejar_from_dict, extract_cookies_to_jar, RequestsCookieJar, merge_cookies)
+from .models import Request, PreparedRequest, DEFAULT_REDIRECT_LIMIT
from .hooks import default_hooks, dispatch_hook
-from .utils import to_key_val_list, default_headers
-from .exceptions import TooManyRedirects, InvalidSchema
+from .utils import to_key_val_list, default_headers, to_native_string
+from .exceptions import (
+ TooManyRedirects, InvalidSchema, ChunkedEncodingError, ContentDecodingError)
from .structures import CaseInsensitiveDict
from .adapters import HTTPAdapter
-from .utils import requote_uri, get_environ_proxies, get_netrc_auth
+from .utils import (
+ requote_uri, get_environ_proxies, get_netrc_auth, should_bypass_proxies,
+ get_auth_from_url
+)
from .status_codes import codes
-REDIRECT_STATI = (
- codes.moved, # 301
- codes.found, # 302
- codes.other, # 303
- codes.temporary_moved, # 307
-)
-DEFAULT_REDIRECT_LIMIT = 30
+
+# formerly defined here, reexposed here for backward compatibility
+from .models import REDIRECT_STATI
def merge_setting(request_setting, session_setting, dict_class=OrderedDict):
if v is None:
del merged_setting[k]
+ merged_setting = dict((k, v) for (k, v) in merged_setting.items() if v is not None)
+
return merged_setting
+def merge_hooks(request_hooks, session_hooks, dict_class=OrderedDict):
+ """
+ Properly merges both requests and session hooks.
+
+ This is necessary because when request_hooks == {'response': []}, the
+ merge breaks Session hooks entirely.
+ """
+ if session_hooks is None or session_hooks.get('response') == []:
+ return request_hooks
+
+ if request_hooks is None or request_hooks.get('response') == []:
+ return session_hooks
+
+ return merge_setting(request_hooks, session_hooks, dict_class)
+
+
class SessionRedirectMixin(object):
def resolve_redirects(self, resp, req, stream=False, timeout=None,
verify=True, cert=None, proxies=None):
"""Receives a Response. Returns a generator of Responses."""
i = 0
+ hist = [] # keep track of history
- # ((resp.status_code is codes.see_other))
- while ('location' in resp.headers and resp.status_code in REDIRECT_STATI):
+ while resp.is_redirect:
prepared_request = req.copy()
- resp.content # Consume socket so it can be released
+ if i > 0:
+ # Update history and keep track of redirects.
+ hist.append(resp)
+ new_hist = list(hist)
+ resp.history = new_hist
+
+ try:
+ resp.content # Consume socket so it can be released
+ except (ChunkedEncodingError, ContentDecodingError, RuntimeError):
+ resp.raw.read(decode_content=False)
if i >= self.max_redirects:
raise TooManyRedirects('Exceeded %s redirects.' % self.max_redirects)
# The scheme should be lower case...
parsed = urlparse(url)
- parsed = (parsed.scheme.lower(), parsed.netloc, parsed.path,
- parsed.params, parsed.query, parsed.fragment)
- url = urlunparse(parsed)
+ url = parsed.geturl()
- # Facilitate non-RFC2616-compliant 'location' headers
+ # Facilitate relative 'location' headers, as allowed by RFC 7231.
# (e.g. '/path/to/resource' instead of 'http://domain.tld/path/to/resource')
# Compliant with RFC3986, we percent encode the url.
if not urlparse(url).netloc:
else:
url = requote_uri(url)
- prepared_request.url = url
+ prepared_request.url = to_native_string(url)
+ # Cache the url, unless it redirects to itself.
+ if resp.is_permanent_redirect and req.url != prepared_request.url:
+ self.redirect_cache[req.url] = prepared_request.url
- # http://www.w3.org/Protocols/rfc2616/rfc2616-sec10.html#sec10.3.4
+ # http://tools.ietf.org/html/rfc7231#section-6.4.4
if (resp.status_code == codes.see_other and
method != 'HEAD'):
method = 'GET'
# Do what the browsers do, despite standards...
- if (resp.status_code in (codes.moved, codes.found) and
- method not in ('GET', 'HEAD')):
+ # First, turn 302s into GETs.
+ if resp.status_code == codes.found and method != 'HEAD':
+ method = 'GET'
+
+ # Second, if a POST is responded to with a 301, turn it into a GET.
+ # This bizarre behaviour is explained in Issue 1704.
+ if resp.status_code == codes.moved and method == 'POST':
method = 'GET'
prepared_request.method = method
# https://github.com/kennethreitz/requests/issues/1084
- if resp.status_code not in (codes.temporary, codes.resume):
+ if resp.status_code not in (codes.temporary_redirect, codes.permanent_redirect):
if 'Content-Length' in prepared_request.headers:
del prepared_request.headers['Content-Length']
except KeyError:
pass
- prepared_request.prepare_cookies(self.cookies)
+ extract_cookies_to_jar(prepared_request._cookies, prepared_request, resp.raw)
+ prepared_request._cookies.update(self.cookies)
+ prepared_request.prepare_cookies(prepared_request._cookies)
+
+ # Rebuild auth and proxy information.
+ proxies = self.rebuild_proxies(prepared_request, proxies)
+ self.rebuild_auth(prepared_request, resp)
+
+ # Override the original request.
+ req = prepared_request
resp = self.send(
- prepared_request,
+ req,
stream=stream,
timeout=timeout,
verify=verify,
i += 1
yield resp
+ def rebuild_auth(self, prepared_request, response):
+ """
+ When being redirected we may want to strip authentication from the
+ request to avoid leaking credentials. This method intelligently removes
+ and reapplies authentication where possible to avoid credential loss.
+ """
+ headers = prepared_request.headers
+ url = prepared_request.url
+
+ if 'Authorization' in headers:
+ # If we get redirected to a new host, we should strip out any
+ #Â authentication headers.
+ original_parsed = urlparse(response.request.url)
+ redirect_parsed = urlparse(url)
+
+ if (original_parsed.hostname != redirect_parsed.hostname):
+ del headers['Authorization']
+
+ # .netrc might have more auth for us on our new host.
+ new_auth = get_netrc_auth(url) if self.trust_env else None
+ if new_auth is not None:
+ prepared_request.prepare_auth(new_auth)
+
+ return
+
+ def rebuild_proxies(self, prepared_request, proxies):
+ """
+ This method re-evaluates the proxy configuration by considering the
+ environment variables. If we are redirected to a URL covered by
+ NO_PROXY, we strip the proxy configuration. Otherwise, we set missing
+ proxy keys for this URL (in case they were stripped by a previous
+ redirect).
+
+ This method also replaces the Proxy-Authorization header where
+ necessary.
+ """
+ headers = prepared_request.headers
+ url = prepared_request.url
+ scheme = urlparse(url).scheme
+ new_proxies = proxies.copy() if proxies is not None else {}
+
+ if self.trust_env and not should_bypass_proxies(url):
+ environ_proxies = get_environ_proxies(url)
+
+ proxy = environ_proxies.get(scheme)
+
+ if proxy:
+ new_proxies.setdefault(scheme, environ_proxies[scheme])
+
+ if 'Proxy-Authorization' in headers:
+ del headers['Proxy-Authorization']
+
+ try:
+ username, password = get_auth_from_url(new_proxies[scheme])
+ except KeyError:
+ username, password = None, None
+
+ if username and password:
+ headers['Proxy-Authorization'] = _basic_auth_str(username, password)
+
+ return new_proxies
+
class Session(SessionRedirectMixin):
"""A Requests session.
__attrs__ = [
'headers', 'cookies', 'auth', 'timeout', 'proxies', 'hooks',
'params', 'verify', 'cert', 'prefetch', 'adapters', 'stream',
- 'trust_env', 'max_redirects']
+ 'trust_env', 'max_redirects', 'redirect_cache']
def __init__(self):
self.mount('https://', HTTPAdapter())
self.mount('http://', HTTPAdapter())
+ self.redirect_cache = {}
+
def __enter__(self):
return self
:class:`Session`.
:param request: :class:`Request` instance to prepare with this
- session's settings.
+ session's settings.
"""
cookies = request.cookies or {}
cookies = cookiejar_from_dict(cookies)
# Merge with session cookies
- merged_cookies = RequestsCookieJar()
- merged_cookies.update(self.cookies)
- merged_cookies.update(cookies)
+ merged_cookies = merge_cookies(
+ merge_cookies(RequestsCookieJar(), self.cookies), cookies)
# Set environment's basic authentication if not explicitly set.
params=merge_setting(request.params, self.params),
auth=merge_setting(auth, self.auth),
cookies=merged_cookies,
- hooks=merge_setting(request.hooks, self.hooks),
+ hooks=merge_hooks(request.hooks, self.hooks),
)
return p
:class:`Request`.
:param cookies: (optional) Dict or CookieJar object to send with the
:class:`Request`.
- :param files: (optional) Dictionary of 'filename': file-like-objects
+ :param files: (optional) Dictionary of ``'filename': file-like-objects``
for multipart encoding upload.
:param auth: (optional) Auth tuple or callable to enable
Basic/Digest/Custom HTTP Auth.
- :param timeout: (optional) Float describing the timeout of the
- request.
- :param allow_redirects: (optional) Boolean. Set to True by default.
+ :param timeout: (optional) How long to wait for the server to send
+ data before giving up, as a float, or a (`connect timeout, read
+ timeout <user/advanced.html#timeouts>`_) tuple.
+ :type timeout: float or tuple
+ :param allow_redirects: (optional) Set to True by default.
+ :type allow_redirects: bool
:param proxies: (optional) Dictionary mapping protocol to the URL of
the proxy.
:param stream: (optional) whether to immediately download the response
:param cert: (optional) if String, path to ssl client cert file (.pem).
If Tuple, ('cert', 'key') pair.
"""
+
+ method = builtin_str(method)
+
# Create the Request.
req = Request(
method = method.upper(),
)
prep = self.prepare_request(req)
- # Add param cookies to session cookies
- self.cookies = cookiejar_from_dict(cookies, cookiejar=self.cookies, overwrite=False)
-
proxies = proxies or {}
- # Gather clues from the surrounding environment.
- if self.trust_env:
- # Set environment's proxies.
- env_proxies = get_environ_proxies(url) or {}
- for (k, v) in env_proxies.items():
- proxies.setdefault(k, v)
-
- # Look for configuration.
- if not verify and verify is not False:
- verify = os.environ.get('REQUESTS_CA_BUNDLE')
-
- # Curl compatibility.
- if not verify and verify is not False:
- verify = os.environ.get('CURL_CA_BUNDLE')
-
- # Merge all the kwargs.
- proxies = merge_setting(proxies, self.proxies)
- stream = merge_setting(stream, self.stream)
- verify = merge_setting(verify, self.verify)
- cert = merge_setting(cert, self.cert)
+ settings = self.merge_environment_settings(
+ prep.url, proxies, stream, verify, cert
+ )
# Send the request.
send_kwargs = {
- 'stream': stream,
'timeout': timeout,
- 'verify': verify,
- 'cert': cert,
- 'proxies': proxies,
'allow_redirects': allow_redirects,
}
+ send_kwargs.update(settings)
resp = self.send(prep, **send_kwargs)
return resp
if not isinstance(request, PreparedRequest):
raise ValueError('You can only send PreparedRequests.')
- # Set up variables needed for resolve_redirects and dispatching of
- # hooks
+ while request.url in self.redirect_cache:
+ request.url = self.redirect_cache.get(request.url)
+
+ # Set up variables needed for resolve_redirects and dispatching of hooks
allow_redirects = kwargs.pop('allow_redirects', True)
stream = kwargs.get('stream')
timeout = kwargs.get('timeout')
# Start time (approximately) of the request
start = datetime.utcnow()
+
# Send the request
r = adapter.send(request, **kwargs)
+
# Total elapsed time of the request (approximately)
r.elapsed = datetime.utcnow() - start
# Persist cookies
if r.history:
+
# If the hooks create history then we want those cookies too
for resp in r.history:
extract_cookies_to_jar(self.cookies, resp.request, resp.raw)
+
extract_cookies_to_jar(self.cookies, request, r.raw)
# Redirect resolving generator.
- gen = self.resolve_redirects(r, request, stream=stream,
- timeout=timeout, verify=verify, cert=cert,
- proxies=proxies)
+ gen = self.resolve_redirects(r, request,
+ stream=stream,
+ timeout=timeout,
+ verify=verify,
+ cert=cert,
+ proxies=proxies)
# Resolve redirects if allowed.
history = [resp for resp in gen] if allow_redirects else []
history.insert(0, r)
# Get the last request made
r = history.pop()
- r.history = tuple(history)
+ r.history = history
+
+ if not stream:
+ r.content
return r
+ def merge_environment_settings(self, url, proxies, stream, verify, cert):
+ """Check the environment and merge it with some settings."""
+ # Gather clues from the surrounding environment.
+ if self.trust_env:
+ # Set environment's proxies.
+ env_proxies = get_environ_proxies(url) or {}
+ for (k, v) in env_proxies.items():
+ proxies.setdefault(k, v)
+
+ # Look for requests environment configuration and be compatible
+ # with cURL.
+ if verify is True or verify is None:
+ verify = (os.environ.get('REQUESTS_CA_BUNDLE') or
+ os.environ.get('CURL_CA_BUNDLE'))
+
+ # Merge all the kwargs.
+ proxies = merge_setting(proxies, self.proxies)
+ stream = merge_setting(stream, self.stream)
+ verify = merge_setting(verify, self.verify)
+ cert = merge_setting(cert, self.cert)
+
+ return {'verify': verify, 'proxies': proxies, 'stream': stream,
+ 'cert': cert}
+
def get_adapter(self, url):
"""Returns the appropriate connnection adapter for the given URL."""
for (prefix, adapter) in self.adapters.items():
"""Registers a connection adapter to a prefix.
Adapters are sorted in descending order by key length."""
+
self.adapters[prefix] = adapter
keys_to_move = [k for k in self.adapters if len(k) < len(prefix)]
+
for key in keys_to_move:
self.adapters[key] = self.adapters.pop(key)
305: ('use_proxy',),
306: ('switch_proxy',),
307: ('temporary_redirect', 'temporary_moved', 'temporary'),
- 308: ('resume_incomplete', 'resume'),
+ 308: ('permanent_redirect',
+ 'resume_incomplete', 'resume',), # These 2 to be removed in 3.0
# Client Error.
400: ('bad_request', 'bad'),
"""
-import os
import collections
-from itertools import islice
-
-
-class IteratorProxy(object):
- """docstring for IteratorProxy"""
- def __init__(self, i):
- self.i = i
- # self.i = chain.from_iterable(i)
-
- def __iter__(self):
- return self.i
-
- def __len__(self):
- if hasattr(self.i, '__len__'):
- return len(self.i)
- if hasattr(self.i, 'len'):
- return self.i.len
- if hasattr(self.i, 'fileno'):
- return os.fstat(self.i.fileno()).st_size
-
- def read(self, n):
- return "".join(islice(self.i, None, n))
class CaseInsensitiveDict(collections.MutableMapping):
case of the last key to be set, and ``iter(instance)``,
``keys()``, ``items()``, ``iterkeys()``, and ``iteritems()``
will contain case-sensitive keys. However, querying and contains
- testing is case insensitive:
+ testing is case insensitive::
cid = CaseInsensitiveDict()
cid['Accept'] = 'application/json'
return CaseInsensitiveDict(self._store.values())
def __repr__(self):
- return '%s(%r)' % (self.__class__.__name__, dict(self.items()))
-
+ return str(dict(self.items()))
class LookupDict(dict):
"""Dictionary lookup object."""
import platform
import re
import sys
-from netrc import netrc, NetrcParseError
+import socket
+import struct
from . import __version__
from . import certs
from .compat import parse_http_list as _parse_list_header
-from .compat import (quote, urlparse, bytes, str, OrderedDict, urlunparse,
- is_py2, is_py3, builtin_str, getproxies, proxy_bypass)
+from .compat import (quote, urlparse, bytes, str, OrderedDict, unquote, is_py2,
+ builtin_str, getproxies, proxy_bypass, urlunparse)
from .cookies import RequestsCookieJar, cookiejar_from_dict
from .structures import CaseInsensitiveDict
-from .exceptions import MissingSchema, InvalidURL
+from .exceptions import InvalidURL
_hush_pyflakes = (RequestsCookieJar,)
return os.fstat(fileno).st_size
if hasattr(o, 'getvalue'):
- # e.g. BytesIO, cStringIO.StringI
+ # e.g. BytesIO, cStringIO.StringIO
return len(o.getvalue())
+
def get_netrc_auth(url):
"""Returns the Requests tuple auth for a given url from netrc."""
try:
- locations = (os.path.expanduser('~/{0}'.format(f)) for f in NETRC_FILES)
+ from netrc import netrc, NetrcParseError
+
netrc_path = None
- for loc in locations:
- if os.path.exists(loc) and not netrc_path:
+ for f in NETRC_FILES:
+ try:
+ loc = os.path.expanduser('~/{0}'.format(f))
+ except KeyError:
+ # os.path.expanduser can fail when $HOME is undefined and
+ # getpwuid fails. See http://bugs.python.org/issue20164 &
+ # https://github.com/kennethreitz/requests/issues/1846
+ return
+
+ if os.path.exists(loc):
netrc_path = loc
+ break
# Abort early if there isn't one.
if netrc_path is None:
- return netrc_path
+ return
ri = urlparse(url)
return quote(unquote_unreserved(uri), safe="!#$%&'()*+,/:;=?@[]~")
-def get_environ_proxies(url):
- """Return a dict of environment proxies."""
+def address_in_network(ip, net):
+ """
+ This function allows you to check if on IP belongs to a network subnet
+ Example: returns True if ip = 192.168.1.1 and net = 192.168.1.0/24
+ returns False if ip = 192.168.1.1 and net = 192.168.100.0/24
+ """
+ ipaddr = struct.unpack('=L', socket.inet_aton(ip))[0]
+ netaddr, bits = net.split('/')
+ netmask = struct.unpack('=L', socket.inet_aton(dotted_netmask(int(bits))))[0]
+ network = struct.unpack('=L', socket.inet_aton(netaddr))[0] & netmask
+ return (ipaddr & netmask) == (network & netmask)
+
+def dotted_netmask(mask):
+ """
+ Converts mask from /xx format to xxx.xxx.xxx.xxx
+ Example: if mask is 24 function returns 255.255.255.0
+ """
+ bits = 0xffffffff ^ (1 << 32 - mask) - 1
+ return socket.inet_ntoa(struct.pack('>I', bits))
+
+
+def is_ipv4_address(string_ip):
+ try:
+ socket.inet_aton(string_ip)
+ except socket.error:
+ return False
+ return True
+
+
+def is_valid_cidr(string_network):
+ """Very simple check of the cidr format in no_proxy variable"""
+ if string_network.count('/') == 1:
+ try:
+ mask = int(string_network.split('/')[1])
+ except ValueError:
+ return False
+
+ if mask < 1 or mask > 32:
+ return False
+
+ try:
+ socket.inet_aton(string_network.split('/')[0])
+ except socket.error:
+ return False
+ else:
+ return False
+ return True
+
+
+def should_bypass_proxies(url):
+ """
+ Returns whether we should bypass proxies or not.
+ """
get_proxy = lambda k: os.environ.get(k) or os.environ.get(k.upper())
# First check whether no_proxy is defined. If it is, check that the URL
# the end of the netloc, both with and without the port.
no_proxy = no_proxy.replace(' ', '').split(',')
- for host in no_proxy:
- if netloc.endswith(host) or netloc.split(':')[0].endswith(host):
- # The URL does match something in no_proxy, so we don't want
- # to apply the proxies on this URL.
- return {}
+ ip = netloc.split(':')[0]
+ if is_ipv4_address(ip):
+ for proxy_ip in no_proxy:
+ if is_valid_cidr(proxy_ip):
+ if address_in_network(ip, proxy_ip):
+ return True
+ else:
+ for host in no_proxy:
+ if netloc.endswith(host) or netloc.split(':')[0].endswith(host):
+ # The URL does match something in no_proxy, so we don't want
+ # to apply the proxies on this URL.
+ return True
# If the system proxy settings indicate that this URL should be bypassed,
# don't proxy.
- if proxy_bypass(netloc):
- return {}
+ # The proxy_bypass function is incredibly buggy on OS X in early versions
+ # of Python 2.6, so allow this call to fail. Only catch the specific
+ # exceptions we've seen, though: this call failing in other ways can reveal
+ # legitimate problems.
+ try:
+ bypass = proxy_bypass(netloc)
+ except (TypeError, socket.gaierror):
+ bypass = False
+
+ if bypass:
+ return True
+
+ return False
- # If we get here, we either didn't have no_proxy set or we're not going
- # anywhere that no_proxy applies to, and the system settings don't require
- # bypassing the proxy for the current URL.
- return getproxies()
+def get_environ_proxies(url):
+ """Return a dict of environment proxies."""
+ if should_bypass_proxies(url):
+ return {}
+ else:
+ return getproxies()
-def default_user_agent():
+def default_user_agent(name="python-requests"):
"""Return a string representing the default user agent."""
_implementation = platform.python_implementation()
p_system = 'Unknown'
p_release = 'Unknown'
- return " ".join(['python-requests/%s' % __version__,
+ return " ".join(['%s/%s' % (name, __version__),
'%s/%s' % (_implementation, _implementation_version),
'%s/%s' % (p_system, p_release)])
def default_headers():
return CaseInsensitiveDict({
'User-Agent': default_user_agent(),
- 'Accept-Encoding': ', '.join(('gzip', 'deflate', 'compress')),
- 'Accept': '*/*'
+ 'Accept-Encoding': ', '.join(('gzip', 'deflate')),
+ 'Accept': '*/*',
+ 'Connection': 'keep-alive',
})
return None
-def except_on_missing_scheme(url):
- """Given a URL, raise a MissingSchema exception if the scheme is missing.
- """
- scheme, netloc, path, params, query, fragment = urlparse(url)
+def prepend_scheme_if_needed(url, new_scheme):
+ '''Given a URL that may or may not have a scheme, prepend the given scheme.
+ Does not replace a present scheme with the one provided as an argument.'''
+ scheme, netloc, path, params, query, fragment = urlparse(url, new_scheme)
- if not scheme:
- raise MissingSchema('Proxy URLs must have explicit schemes.')
+ # urlparse is a finicky beast, and sometimes decides that there isn't a
+ # netloc present. Assume that it's being over-cautious, and switch netloc
+ # and path if urlparse decided there was no netloc.
+ if not netloc:
+ netloc, path = path, netloc
+
+ return urlunparse((scheme, netloc, path, params, query, fragment))
def get_auth_from_url(url):
"""Given a url with authentication components, extract them into a tuple of
username,password."""
- if url:
- parsed = urlparse(url)
- return (parsed.username, parsed.password)
- else:
- return ('', '')
+ parsed = urlparse(url)
+
+ try:
+ auth = (unquote(parsed.username), unquote(parsed.password))
+ except (AttributeError, TypeError):
+ auth = ('', '')
+
+ return auth
def to_native_string(string, encoding='ascii'):
import requests
+from codecs import open
+
try:
from setuptools import setup
except ImportError:
packages = [
'requests',
'requests.packages',
- 'requests.packages.charade',
+ 'requests.packages.chardet',
'requests.packages.urllib3',
'requests.packages.urllib3.packages',
'requests.packages.urllib3.contrib',
- 'requests.packages.urllib3.packages.ssl_match_hostname'
+ 'requests.packages.urllib3.util',
+ 'requests.packages.urllib3.packages.ssl_match_hostname',
]
requires = []
+with open('README.rst', 'r', 'utf-8') as f:
+ readme = f.read()
+with open('HISTORY.rst', 'r', 'utf-8') as f:
+ history = f.read()
+
setup(
name='requests',
version=requests.__version__,
description='Python HTTP for Humans.',
- long_description=open('README.rst').read() + '\n\n' +
- open('HISTORY.rst').read(),
+ long_description=readme + '\n\n' + history,
author='Kenneth Reitz',
author_email='me@kennethreitz.com',
url='http://python-requests.org',
package_dir={'requests': 'requests'},
include_package_data=True,
install_requires=requires,
- license=open('LICENSE').read(),
+ license='Apache 2.0',
zip_safe=False,
classifiers=(
'Development Status :: 5 - Production/Stable',
'Programming Language :: Python :: 2.7',
'Programming Language :: Python :: 3',
'Programming Language :: Python :: 3.3',
+ 'Programming Language :: Python :: 3.4'
),
+ extras_require={
+ 'security': ['pyOpenSSL', 'ndg-httpsclient', 'pyasn1'],
+ },
)
from __future__ import division
import json
import os
-import unittest
import pickle
+import unittest
+import collections
+import io
import requests
import pytest
-from requests.auth import HTTPDigestAuth
from requests.adapters import HTTPAdapter
-from requests.compat import str, cookielib, getproxies, urljoin, urlparse
-from requests.cookies import cookiejar_from_dict
-from requests.exceptions import InvalidURL, MissingSchema
+from requests.auth import HTTPDigestAuth, _basic_auth_str
+from requests.compat import (
+ Morsel, cookielib, getproxies, str, urljoin, urlparse, is_py3, builtin_str)
+from requests.cookies import cookiejar_from_dict, morsel_to_cookie
+from requests.exceptions import (InvalidURL, MissingSchema, ConnectTimeout,
+ ReadTimeout, ConnectionError, Timeout)
+from requests.models import PreparedRequest
from requests.structures import CaseInsensitiveDict
+from requests.sessions import SessionRedirectMixin
+from requests.models import urlencode
+from requests.hooks import default_hooks
try:
import StringIO
except ImportError:
import io as StringIO
+if is_py3:
+ def u(s):
+ return s
+else:
+ def u(s):
+ return s.decode('unicode-escape')
+
+
+# Requests to this URL should always fail with a connection timeout (nothing
+# listening on that port)
+TARPIT = "http://10.255.255.1"
HTTPBIN = os.environ.get('HTTPBIN_URL', 'http://httpbin.org/')
# Issue #1483: Make sure the URL always has a trailing slash
HTTPBIN = HTTPBIN.rstrip('/') + '/'
requests.post
def test_invalid_url(self):
- self.assertRaises(MissingSchema, requests.get, 'hiwpefhipowhefopw')
- self.assertRaises(InvalidURL, requests.get, 'http://')
+ with pytest.raises(MissingSchema):
+ requests.get('hiwpefhipowhefopw')
+ with pytest.raises(InvalidURL):
+ requests.get('http://')
def test_basic_building(self):
req = requests.Request()
def test_no_content_length(self):
get_req = requests.Request('GET', httpbin('get')).prepare()
- self.assertTrue('Content-Length' not in get_req.headers)
+ assert 'Content-Length' not in get_req.headers
head_req = requests.Request('HEAD', httpbin('head')).prepare()
- self.assertTrue('Content-Length' not in head_req.headers)
+ assert 'Content-Length' not in head_req.headers
def test_path_is_not_double_encoded(self):
request = requests.Request('GET', "http://0.0.0.0/get/test case").prepare()
- self.assertEqual(request.path_url, "/get/test%20case")
+ assert request.path_url == '/get/test%20case'
def test_params_are_added_before_fragment(self):
request = requests.Request('GET',
"http://example.com/path#fragment", params={"a": "b"}).prepare()
- self.assertEqual(request.url,
- "http://example.com/path?a=b#fragment")
+ assert request.url == "http://example.com/path?a=b#fragment"
request = requests.Request('GET',
"http://example.com/path?key=value#fragment", params={"a": "b"}).prepare()
- self.assertEqual(request.url,
- "http://example.com/path?key=value&a=b#fragment")
+ assert request.url == "http://example.com/path?key=value&a=b#fragment"
def test_mixed_case_scheme_acceptable(self):
s = requests.Session()
url = scheme + parts.netloc + parts.path
r = requests.Request('GET', url)
r = s.send(r.prepare())
- self.assertEqual(r.status_code, 200,
- "failed for scheme %s" % scheme)
+ assert r.status_code == 200, 'failed for scheme {0}'.format(scheme)
def test_HTTP_200_OK_GET_ALTERNATIVE(self):
r = requests.Request('GET', httpbin('get'))
r = s.send(r.prepare())
- self.assertEqual(r.status_code, 200)
+ assert r.status_code == 200
def test_HTTP_302_ALLOW_REDIRECT_GET(self):
r = requests.get(httpbin('redirect', '1'))
- self.assertEqual(r.status_code, 200)
+ assert r.status_code == 200
+ assert r.history[0].status_code == 302
+ assert r.history[0].is_redirect
# def test_HTTP_302_ALLOW_REDIRECT_POST(self):
# r = requests.post(httpbin('status', '302'), data={'some': 'data'})
r = requests.get(httpbin('user-agent'), headers=heads)
- self.assertTrue(heads['User-agent'] in r.text)
- self.assertEqual(r.status_code, 200)
+ assert heads['User-agent'] in r.text
+ assert r.status_code == 200
def test_HTTP_200_OK_GET_WITH_MIXED_PARAMS(self):
heads = {'User-agent': 'Mozilla/5.0'}
r = requests.get(httpbin('get') + '?test=true', params={'q': 'test'}, headers=heads)
- self.assertEqual(r.status_code, 200)
+ assert r.status_code == 200
def test_set_cookie_on_301(self):
s = requests.session()
url = httpbin('cookies/set?foo=bar')
- r = s.get(url)
- self.assertTrue(s.cookies['foo'] == 'bar')
+ s.get(url)
+ assert s.cookies['foo'] == 'bar'
def test_cookie_sent_on_redirect(self):
s = requests.session()
s.get(httpbin('cookies/set?foo=bar'))
r = s.get(httpbin('redirect/1')) # redirects to httpbin('get')
- self.assertTrue("Cookie" in r.json()["headers"])
+ assert 'Cookie' in r.json()['headers']
def test_cookie_removed_on_expire(self):
s = requests.session()
s.get(httpbin('cookies/set?foo=bar'))
- self.assertTrue(s.cookies['foo'] == 'bar')
+ assert s.cookies['foo'] == 'bar'
s.get(
httpbin('response-headers'),
params={
def test_cookie_quote_wrapped(self):
s = requests.session()
s.get(httpbin('cookies/set?foo="bar:baz"'))
- self.assertTrue(s.cookies['foo'] == '"bar:baz"')
+ assert s.cookies['foo'] == '"bar:baz"'
def test_cookie_persists_via_api(self):
s = requests.session()
- r = s.get(httpbin('redirect/1'), cookies={'foo':'bar'})
- self.assertTrue('foo' in r.request.headers['Cookie'])
- self.assertTrue('foo' in r.history[0].request.headers['Cookie'])
+ r = s.get(httpbin('redirect/1'), cookies={'foo': 'bar'})
+ assert 'foo' in r.request.headers['Cookie']
+ assert 'foo' in r.history[0].request.headers['Cookie']
def test_request_cookie_overrides_session_cookie(self):
s = requests.session()
# Session cookie should not be modified
assert s.cookies['foo'] == 'bar'
+ def test_request_cookies_not_persisted(self):
+ s = requests.session()
+ s.get(httpbin('cookies'), cookies={'foo': 'baz'})
+ # Sending a request with cookies should not add cookies to the session
+ assert not s.cookies
+
def test_generic_cookiejar_works(self):
cj = cookielib.CookieJar()
cookiejar_from_dict({'foo': 'bar'}, cj)
# Make sure the session cj is still the custom one
assert s.cookies is cj
+ def test_param_cookiejar_works(self):
+ cj = cookielib.CookieJar()
+ cookiejar_from_dict({'foo': 'bar'}, cj)
+ s = requests.session()
+ r = s.get(httpbin('cookies'), cookies=cj)
+ # Make sure the cookie was sent
+ assert r.json()['cookies']['foo'] == 'bar'
+
def test_requests_in_history_are_not_overridden(self):
resp = requests.get(httpbin('redirect/3'))
urls = [r.url for r in resp.history]
req_urls = [r.request.url for r in resp.history]
- self.assertEquals(urls, req_urls)
+ assert urls == req_urls
+
+ def test_history_is_always_a_list(self):
+ """
+ Show that even with redirects, Response.history is always a list.
+ """
+ resp = requests.get(httpbin('get'))
+ assert isinstance(resp.history, list)
+ resp = requests.get(httpbin('redirect/1'))
+ assert isinstance(resp.history, list)
+ assert not isinstance(resp.history, tuple)
+
+ def test_headers_on_session_with_None_are_not_sent(self):
+ """Do not send headers in Session.headers with None values."""
+ ses = requests.Session()
+ ses.headers['Accept-Encoding'] = None
+ req = requests.Request('GET', 'http://httpbin.org/get')
+ prep = ses.prepare_request(req)
+ assert 'Accept-Encoding' not in prep.headers
def test_user_agent_transfers(self):
}
r = requests.get(httpbin('user-agent'), headers=heads)
- self.assertTrue(heads['User-agent'] in r.text)
+ assert heads['User-agent'] in r.text
heads = {
'user-agent': 'Mozilla/5.0 (github.com/kennethreitz/requests)'
}
r = requests.get(httpbin('user-agent'), headers=heads)
- self.assertTrue(heads['user-agent'] in r.text)
+ assert heads['user-agent'] in r.text
def test_HTTP_200_OK_HEAD(self):
r = requests.head(httpbin('get'))
- self.assertEqual(r.status_code, 200)
+ assert r.status_code == 200
def test_HTTP_200_OK_PUT(self):
r = requests.put(httpbin('put'))
- self.assertEqual(r.status_code, 200)
+ assert r.status_code == 200
def test_BASICAUTH_TUPLE_HTTP_200_OK_GET(self):
auth = ('user', 'pass')
url = httpbin('basic-auth', 'user', 'pass')
r = requests.get(url, auth=auth)
- self.assertEqual(r.status_code, 200)
+ assert r.status_code == 200
r = requests.get(url)
- self.assertEqual(r.status_code, 401)
+ assert r.status_code == 401
s = requests.session()
s.auth = auth
r = s.get(url)
- self.assertEqual(r.status_code, 200)
+ assert r.status_code == 200
+
+ def test_connection_error(self):
+ """Connecting to an unknown domain should raise a ConnectionError"""
+ with pytest.raises(ConnectionError):
+ requests.get("http://fooobarbangbazbing.httpbin.org")
+
+ with pytest.raises(ConnectionError):
+ requests.get("http://httpbin.org:1")
def test_basicauth_with_netrc(self):
auth = ('user', 'pass')
# Should use netrc and work.
r = requests.get(url)
- self.assertEqual(r.status_code, 200)
+ assert r.status_code == 200
# Given auth should override and fail.
r = requests.get(url, auth=wrong_auth)
- self.assertEqual(r.status_code, 401)
+ assert r.status_code == 401
s = requests.session()
# Should use netrc and work.
r = s.get(url)
- self.assertEqual(r.status_code, 200)
+ assert r.status_code == 200
# Given auth should override and fail.
s.auth = wrong_auth
r = s.get(url)
- self.assertEqual(r.status_code, 401)
+ assert r.status_code == 401
def test_DIGEST_HTTP_200_OK_GET(self):
url = httpbin('digest-auth', 'auth', 'user', 'pass')
r = requests.get(url, auth=auth)
- self.assertEqual(r.status_code, 200)
+ assert r.status_code == 200
r = requests.get(url)
- self.assertEqual(r.status_code, 401)
+ assert r.status_code == 401
s = requests.session()
s.auth = HTTPDigestAuth('user', 'pass')
r = s.get(url)
- self.assertEqual(r.status_code, 200)
+ assert r.status_code == 200
def test_DIGEST_AUTH_RETURNS_COOKIE(self):
url = httpbin('digest-auth', 'auth', 'user', 'pass')
url = httpbin('digest-auth', 'auth', 'user', 'pass')
r = requests.get(url, auth=auth, stream=True)
- self.assertNotEqual(r.raw.read(), b'')
+ assert r.raw.read() != b''
r = requests.get(url, auth=auth, stream=False)
- self.assertEqual(r.raw.read(), b'')
-
+ assert r.raw.read() == b''
def test_DIGESTAUTH_WRONG_HTTP_401_GET(self):
url = httpbin('digest-auth', 'auth', 'user', 'pass')
r = requests.get(url, auth=auth)
- self.assertEqual(r.status_code, 401)
+ assert r.status_code == 401
r = requests.get(url)
- self.assertEqual(r.status_code, 401)
+ assert r.status_code == 401
s = requests.session()
s.auth = auth
r = s.get(url)
- self.assertEqual(r.status_code, 401)
+ assert r.status_code == 401
+
+ def test_DIGESTAUTH_QUOTES_QOP_VALUE(self):
+
+ auth = HTTPDigestAuth('user', 'pass')
+ url = httpbin('digest-auth', 'auth', 'user', 'pass')
+
+ r = requests.get(url, auth=auth)
+ assert '"auth"' in r.request.headers['Authorization']
def test_POSTBIN_GET_POST_FILES(self):
post1 = requests.post(url).raise_for_status()
post1 = requests.post(url, data={'some': 'data'})
- self.assertEqual(post1.status_code, 200)
+ assert post1.status_code == 200
with open('requirements.txt') as f:
post2 = requests.post(url, files={'some': f})
- self.assertEqual(post2.status_code, 200)
+ assert post2.status_code == 200
post4 = requests.post(url, data='[{"some": "json"}]')
- self.assertEqual(post4.status_code, 200)
+ assert post4.status_code == 200
- try:
+ with pytest.raises(ValueError):
requests.post(url, files=['bad file data'])
- except ValueError:
- pass
def test_POSTBIN_GET_POST_FILES_WITH_DATA(self):
post1 = requests.post(url).raise_for_status()
post1 = requests.post(url, data={'some': 'data'})
- self.assertEqual(post1.status_code, 200)
+ assert post1.status_code == 200
with open('requirements.txt') as f:
- post2 = requests.post(url, data={'some': 'data'}, files={'some': f})
- self.assertEqual(post2.status_code, 200)
+ post2 = requests.post(url,
+ data={'some': 'data'}, files={'some': f})
+ assert post2.status_code == 200
post4 = requests.post(url, data='[{"some": "json"}]')
- self.assertEqual(post4.status_code, 200)
+ assert post4.status_code == 200
- try:
+ with pytest.raises(ValueError):
requests.post(url, files=['bad file data'])
- except ValueError:
- pass
def test_conflicting_post_params(self):
url = httpbin('post')
with open('requirements.txt') as f:
pytest.raises(ValueError, "requests.post(url, data='[{\"some\": \"data\"}]', files={'some': f})")
- pytest.raises(ValueError, "requests.post(url, data=u'[{\"some\": \"data\"}]', files={'some': f})")
+ pytest.raises(ValueError, "requests.post(url, data=u('[{\"some\": \"data\"}]'), files={'some': f})")
def test_request_ok_set(self):
r = requests.get(httpbin('status', '404'))
- self.assertEqual(r.ok, False)
+ assert not r.ok
def test_status_raising(self):
r = requests.get(httpbin('status', '404'))
- self.assertRaises(requests.exceptions.HTTPError, r.raise_for_status)
+ with pytest.raises(requests.exceptions.HTTPError):
+ r.raise_for_status()
r = requests.get(httpbin('status', '500'))
- self.assertFalse(r.ok)
+ assert not r.ok
def test_decompress_gzip(self):
r = requests.get(httpbin('gzip'))
requests.get(httpbin('ø'), params={'foo': 'foo'})
def test_unicode_header_name(self):
- requests.put(httpbin('put'), headers={str('Content-Type'): 'application/octet-stream'}, data='\xff') # compat.str is unicode.
+ requests.put(
+ httpbin('put'),
+ headers={str('Content-Type'): 'application/octet-stream'},
+ data='\xff') # compat.str is unicode.
+
+ def test_pyopenssl_redirect(self):
+ requests.get('https://httpbin.org/status/301')
def test_urlencoded_get_query_multivalued_param(self):
r = requests.get(httpbin('get'), params=dict(test=['foo', 'baz']))
- self.assertEqual(r.status_code, 200)
- self.assertEqual(r.url, httpbin('get?test=foo&test=baz'))
+ assert r.status_code == 200
+ assert r.url == httpbin('get?test=foo&test=baz')
def test_different_encodings_dont_break_post(self):
r = requests.post(httpbin('post'),
- data={'stuff': json.dumps({'a': 123})},
- params={'blah': 'asdf1234'},
- files={'file': ('test_requests.py', open(__file__, 'rb'))})
- self.assertEqual(r.status_code, 200)
+ data={'stuff': json.dumps({'a': 123})},
+ params={'blah': 'asdf1234'},
+ files={'file': ('test_requests.py', open(__file__, 'rb'))})
+ assert r.status_code == 200
def test_unicode_multipart_post(self):
r = requests.post(httpbin('post'),
- data={'stuff': u'ëlïxr'},
- files={'file': ('test_requests.py', open(__file__, 'rb'))})
- self.assertEqual(r.status_code, 200)
+ data={'stuff': u('ëlïxr')},
+ files={'file': ('test_requests.py', open(__file__, 'rb'))})
+ assert r.status_code == 200
r = requests.post(httpbin('post'),
- data={'stuff': u'ëlïxr'.encode('utf-8')},
- files={'file': ('test_requests.py', open(__file__, 'rb'))})
- self.assertEqual(r.status_code, 200)
+ data={'stuff': u('ëlïxr').encode('utf-8')},
+ files={'file': ('test_requests.py', open(__file__, 'rb'))})
+ assert r.status_code == 200
r = requests.post(httpbin('post'),
- data={'stuff': 'elixr'},
- files={'file': ('test_requests.py', open(__file__, 'rb'))})
- self.assertEqual(r.status_code, 200)
+ data={'stuff': 'elixr'},
+ files={'file': ('test_requests.py', open(__file__, 'rb'))})
+ assert r.status_code == 200
r = requests.post(httpbin('post'),
- data={'stuff': 'elixr'.encode('utf-8')},
- files={'file': ('test_requests.py', open(__file__, 'rb'))})
- self.assertEqual(r.status_code, 200)
+ data={'stuff': 'elixr'.encode('utf-8')},
+ files={'file': ('test_requests.py', open(__file__, 'rb'))})
+ assert r.status_code == 200
def test_unicode_multipart_post_fieldnames(self):
filename = os.path.splitext(__file__)[0] + '.py'
files={'file': ('test_requests.py',
open(filename, 'rb'))})
prep = r.prepare()
- self.assertTrue(b'name="stuff"' in prep.body)
- self.assertFalse(b'name="b\'stuff\'"' in prep.body)
+ assert b'name="stuff"' in prep.body
+ assert b'name="b\'stuff\'"' not in prep.body
+
+ def test_unicode_method_name(self):
+ files = {'file': open('test_requests.py', 'rb')}
+ r = requests.request(
+ method=u('POST'), url=httpbin('post'), files=files)
+ assert r.status_code == 200
def test_custom_content_type(self):
- r = requests.post(httpbin('post'),
- data={'stuff': json.dumps({'a': 123})},
- files={'file1': ('test_requests.py', open(__file__, 'rb')),
- 'file2': ('test_requests', open(__file__, 'rb'),
- 'text/py-content-type')})
- self.assertEqual(r.status_code, 200)
- self.assertTrue(b"text/py-content-type" in r.request.body)
+ r = requests.post(
+ httpbin('post'),
+ data={'stuff': json.dumps({'a': 123})},
+ files={'file1': ('test_requests.py', open(__file__, 'rb')),
+ 'file2': ('test_requests', open(__file__, 'rb'),
+ 'text/py-content-type')})
+ assert r.status_code == 200
+ assert b"text/py-content-type" in r.request.body
def test_hook_receives_request_arguments(self):
def hook(resp, **kwargs):
requests.Request('GET', HTTPBIN, hooks={'response': hook})
+ def test_session_hooks_are_used_with_no_request_hooks(self):
+ hook = lambda x, *args, **kwargs: x
+ s = requests.Session()
+ s.hooks['response'].append(hook)
+ r = requests.Request('GET', HTTPBIN)
+ prep = s.prepare_request(r)
+ assert prep.hooks['response'] != []
+ assert prep.hooks['response'] == [hook]
+
+ def test_session_hooks_are_overriden_by_request_hooks(self):
+ hook1 = lambda x, *args, **kwargs: x
+ hook2 = lambda x, *args, **kwargs: x
+ assert hook1 is not hook2
+ s = requests.Session()
+ s.hooks['response'].append(hook2)
+ r = requests.Request('GET', HTTPBIN, hooks={'response': [hook1]})
+ prep = s.prepare_request(r)
+ assert prep.hooks['response'] == [hook1]
+
def test_prepared_request_hook(self):
def hook(resp, **kwargs):
resp.hook_working = True
s.proxies = getproxies()
resp = s.send(prep)
- self.assertTrue(hasattr(resp, 'hook_working'))
+ assert hasattr(resp, 'hook_working')
def test_prepared_from_session(self):
class DummyAuth(requests.auth.AuthBase):
return r
req = requests.Request('GET', httpbin('headers'))
- self.assertEqual(req.auth, None)
+ assert not req.auth
s = requests.Session()
s.auth = DummyAuth()
prep = s.prepare_request(req)
resp = s.send(prep)
- self.assertTrue(resp.json()['headers']['Dummy-Auth-Test'], 'dummy-auth-test-ok')
+ assert resp.json()['headers'][
+ 'Dummy-Auth-Test'] == 'dummy-auth-test-ok'
def test_links(self):
r = requests.Response()
'x-ratelimit-limit': '60',
'x-ratelimit-remaining': '57'
}
- self.assertEqual(r.links['next']['rel'], 'next')
+ assert r.links['next']['rel'] == 'next'
def test_cookie_parameters(self):
key = 'some_cookie'
jar = requests.cookies.RequestsCookieJar()
jar.set(key, value, secure=secure, domain=domain, rest=rest)
- self.assertEqual(len(jar), 1)
- self.assertTrue('some_cookie' in jar)
+ assert len(jar) == 1
+ assert 'some_cookie' in jar
cookie = list(jar)[0]
- self.assertEqual(cookie.secure, secure)
- self.assertEqual(cookie.domain, domain)
- self.assertEqual(cookie._rest['HttpOnly'], rest['HttpOnly'])
+ assert cookie.secure == secure
+ assert cookie.domain == domain
+ assert cookie._rest['HttpOnly'] == rest['HttpOnly']
+
+ def test_cookie_as_dict_keeps_len(self):
+ key = 'some_cookie'
+ value = 'some_value'
+
+ key1 = 'some_cookie1'
+ value1 = 'some_value1'
+
+ jar = requests.cookies.RequestsCookieJar()
+ jar.set(key, value)
+ jar.set(key1, value1)
+
+ d1 = dict(jar)
+ d2 = dict(jar.iteritems())
+ d3 = dict(jar.items())
+
+ assert len(jar) == 2
+ assert len(d1) == 2
+ assert len(d2) == 2
+ assert len(d3) == 2
+
+ def test_cookie_as_dict_keeps_items(self):
+ key = 'some_cookie'
+ value = 'some_value'
+
+ key1 = 'some_cookie1'
+ value1 = 'some_value1'
+
+ jar = requests.cookies.RequestsCookieJar()
+ jar.set(key, value)
+ jar.set(key1, value1)
+
+ d1 = dict(jar)
+ d2 = dict(jar.iteritems())
+ d3 = dict(jar.items())
+
+ assert d1['some_cookie'] == 'some_value'
+ assert d2['some_cookie'] == 'some_value'
+ assert d3['some_cookie1'] == 'some_value1'
+
+ def test_cookie_as_dict_keys(self):
+ key = 'some_cookie'
+ value = 'some_value'
+
+ key1 = 'some_cookie1'
+ value1 = 'some_value1'
+
+ jar = requests.cookies.RequestsCookieJar()
+ jar.set(key, value)
+ jar.set(key1, value1)
+
+ keys = jar.keys()
+ assert keys == list(keys)
+ # make sure one can use keys multiple times
+ assert list(keys) == list(keys)
+
+ def test_cookie_as_dict_values(self):
+ key = 'some_cookie'
+ value = 'some_value'
+
+ key1 = 'some_cookie1'
+ value1 = 'some_value1'
+
+ jar = requests.cookies.RequestsCookieJar()
+ jar.set(key, value)
+ jar.set(key1, value1)
+
+ values = jar.values()
+ assert values == list(values)
+ # make sure one can use values multiple times
+ assert list(values) == list(values)
+
+ def test_cookie_as_dict_items(self):
+ key = 'some_cookie'
+ value = 'some_value'
+
+ key1 = 'some_cookie1'
+ value1 = 'some_value1'
+
+ jar = requests.cookies.RequestsCookieJar()
+ jar.set(key, value)
+ jar.set(key1, value1)
+
+ items = jar.items()
+ assert items == list(items)
+ # make sure one can use items multiple times
+ assert list(items) == list(items)
def test_time_elapsed_blank(self):
r = requests.get(httpbin('get'))
td = r.elapsed
total_seconds = ((td.microseconds + (td.seconds + td.days * 24 * 3600)
* 10**6) / 10**6)
- self.assertTrue(total_seconds > 0.0)
+ assert total_seconds > 0.0
def test_response_is_iterable(self):
r = requests.Response()
return read_(amt)
setattr(io, 'read', read_mock)
r.raw = io
- self.assertTrue(next(iter(r)))
+ assert next(iter(r))
io.close()
+ def test_response_decode_unicode(self):
+ """
+ When called with decode_unicode, Response.iter_content should always
+ return unicode.
+ """
+ r = requests.Response()
+ r._content_consumed = True
+ r._content = b'the content'
+ r.encoding = 'ascii'
+
+ chunks = r.iter_content(decode_unicode=True)
+ assert all(isinstance(chunk, str) for chunk in chunks)
+
+ # also for streaming
+ r = requests.Response()
+ r.raw = io.BytesIO(b'the content')
+ r.encoding = 'ascii'
+ chunks = r.iter_content(decode_unicode=True)
+ assert all(isinstance(chunk, str) for chunk in chunks)
+
+ def test_request_and_response_are_pickleable(self):
+ r = requests.get(httpbin('get'))
+
+ # verify we can pickle the original request
+ assert pickle.loads(pickle.dumps(r.request))
+
+ # verify we can pickle the response and that we have access to
+ # the original request.
+ pr = pickle.loads(pickle.dumps(r))
+ assert r.request.url == pr.request.url
+ assert r.request.headers == pr.request.headers
+
def test_get_auth_from_url(self):
url = 'http://user:pass@complex.url.com/path?query=yes'
- self.assertEqual(('user', 'pass'),
- requests.utils.get_auth_from_url(url))
+ assert ('user', 'pass') == requests.utils.get_auth_from_url(url)
+
+ def test_get_auth_from_url_encoded_spaces(self):
+ url = 'http://user:pass%20pass@complex.url.com/path?query=yes'
+ assert ('user', 'pass pass') == requests.utils.get_auth_from_url(url)
+
+ def test_get_auth_from_url_not_encoded_spaces(self):
+ url = 'http://user:pass pass@complex.url.com/path?query=yes'
+ assert ('user', 'pass pass') == requests.utils.get_auth_from_url(url)
+
+ def test_get_auth_from_url_percent_chars(self):
+ url = 'http://user%25user:pass@complex.url.com/path?query=yes'
+ assert ('user%user', 'pass') == requests.utils.get_auth_from_url(url)
+
+ def test_get_auth_from_url_encoded_hashes(self):
+ url = 'http://user:pass%23pass@complex.url.com/path?query=yes'
+ assert ('user', 'pass#pass') == requests.utils.get_auth_from_url(url)
def test_cannot_send_unprepared_requests(self):
r = requests.Request(url=HTTPBIN)
- self.assertRaises(ValueError, requests.Session().send, r)
+ with pytest.raises(ValueError):
+ requests.Session().send(r)
def test_http_error(self):
error = requests.exceptions.HTTPError()
- self.assertEqual(error.response, None)
+ assert not error.response
response = requests.Response()
error = requests.exceptions.HTTPError(response=response)
- self.assertEqual(error.response, response)
+ assert error.response == response
error = requests.exceptions.HTTPError('message', response=response)
- self.assertEqual(str(error), 'message')
- self.assertEqual(error.response, response)
+ assert str(error) == 'message'
+ assert error.response == response
def test_session_pickling(self):
r = requests.Request('GET', httpbin('get'))
s.proxies = getproxies()
r = s.send(r.prepare())
- self.assertEqual(r.status_code, 200)
+ assert r.status_code == 200
def test_fixes_1329(self):
"""
s.headers.update({'accept': 'application/json'})
r = s.get(httpbin('get'))
headers = r.request.headers
- self.assertEqual(
- headers['accept'],
- 'application/json'
- )
- self.assertEqual(
- headers['Accept'],
- 'application/json'
- )
- self.assertEqual(
- headers['ACCEPT'],
- 'application/json'
- )
+ assert headers['accept'] == 'application/json'
+ assert headers['Accept'] == 'application/json'
+ assert headers['ACCEPT'] == 'application/json'
def test_uppercase_scheme_redirect(self):
parts = urlparse(httpbin('html'))
url = "HTTP://" + parts.netloc + parts.path
r = requests.get(httpbin('redirect-to'), params={'url': url})
- self.assertEqual(r.status_code, 200)
- self.assertEqual(r.url.lower(), url.lower())
+ assert r.status_code == 200
+ assert r.url.lower() == url.lower()
def test_transport_adapter_ordering(self):
s = requests.Session()
order = ['https://', 'http://']
- self.assertEqual(order, list(s.adapters))
+ assert order == list(s.adapters)
s.mount('http://git', HTTPAdapter())
s.mount('http://github', HTTPAdapter())
s.mount('http://github.com', HTTPAdapter())
'https://',
'http://',
]
- self.assertEqual(order, list(s.adapters))
+ assert order == list(s.adapters)
s.mount('http://gittip', HTTPAdapter())
s.mount('http://gittip.com', HTTPAdapter())
s.mount('http://gittip.com/about/', HTTPAdapter())
'https://',
'http://',
]
- self.assertEqual(order, list(s.adapters))
+ assert order == list(s.adapters)
s2 = requests.Session()
s2.adapters = {'http://': HTTPAdapter()}
s2.mount('https://', HTTPAdapter())
- self.assertTrue('http://' in s2.adapters)
- self.assertTrue('https://' in s2.adapters)
+ assert 'http://' in s2.adapters
+ assert 'https://' in s2.adapters
def test_header_remove_is_case_insensitive(self):
# From issue #1321
r = s.get(httpbin('get'), params={'FOO': 'bar'})
assert r.json()['args'] == {'foo': 'bar', 'FOO': 'bar'}
-
def test_long_authinfo_in_url(self):
url = 'http://{0}:{1}@{2}:9000/path?query#frag'.format(
'E8A3BE87-9E3F-4620-8858-95478E385B5B',
'exactly-------------sixty-----------three------------characters',
)
r = requests.Request('GET', url).prepare()
- self.assertEqual(r.url, url)
+ assert r.url == url
def test_header_keys_are_native(self):
- headers = {u'unicode': 'blah', 'byte'.encode('ascii'): 'blah'}
+ headers = {u('unicode'): 'blah', 'byte'.encode('ascii'): 'blah'}
r = requests.Request('GET', httpbin('get'), headers=headers)
p = r.prepare()
# This is testing that they are builtin strings. A bit weird, but there
# we go.
- self.assertTrue('unicode' in p.headers.keys())
- self.assertTrue('byte' in p.headers.keys())
+ assert 'unicode' in p.headers.keys()
+ assert 'byte' in p.headers.keys()
def test_can_send_nonstring_objects_with_files(self):
data = {'a': 0.0}
r = requests.Request('POST', httpbin('post'), data=data, files=files)
p = r.prepare()
- self.assertTrue('multipart/form-data' in p.headers['Content-Type'])
+ assert 'multipart/form-data' in p.headers['Content-Type']
+
+ def test_autoset_header_values_are_native(self):
+ data = 'this is a string'
+ length = '16'
+ req = requests.Request('POST', httpbin('post'), data=data)
+ p = req.prepare()
+
+ assert p.headers['Content-Length'] == length
+
+ def test_oddball_schemes_dont_check_URLs(self):
+ test_urls = (
+ 'data:image/gif;base64,R0lGODlhAQABAHAAACH5BAUAAAAALAAAAAABAAEAAAICRAEAOw==',
+ 'file:///etc/passwd',
+ 'magnet:?xt=urn:btih:be08f00302bc2d1d3cfa3af02024fa647a271431',
+ )
+ for test_url in test_urls:
+ req = requests.Request('GET', test_url)
+ preq = req.prepare()
+ assert test_url == preq.url
+
+ def test_auth_is_stripped_on_redirect_off_host(self):
+ r = requests.get(
+ httpbin('redirect-to'),
+ params={'url': 'http://www.google.co.uk'},
+ auth=('user', 'pass'),
+ )
+ assert r.history[0].request.headers['Authorization']
+ assert not r.request.headers.get('Authorization', '')
+
+ def test_auth_is_retained_for_redirect_on_host(self):
+ r = requests.get(httpbin('redirect/1'), auth=('user', 'pass'))
+ h1 = r.history[0].request.headers['Authorization']
+ h2 = r.request.headers['Authorization']
+
+ assert h1 == h2
+
+ def test_manual_redirect_with_partial_body_read(self):
+ s = requests.Session()
+ r1 = s.get(httpbin('redirect/2'), allow_redirects=False, stream=True)
+ assert r1.is_redirect
+ rg = s.resolve_redirects(r1, r1.request, stream=True)
+
+ # read only the first eight bytes of the response body,
+ # then follow the redirect
+ r1.iter_content(8)
+ r2 = next(rg)
+ assert r2.is_redirect
+
+ # read all of the response via iter_content,
+ # then follow the redirect
+ for _ in r2.iter_content():
+ pass
+ r3 = next(rg)
+ assert not r3.is_redirect
+
+ def _patch_adapter_gzipped_redirect(self, session, url):
+ adapter = session.get_adapter(url=url)
+ org_build_response = adapter.build_response
+ self._patched_response = False
+
+ def build_response(*args, **kwargs):
+ resp = org_build_response(*args, **kwargs)
+ if not self._patched_response:
+ resp.raw.headers['content-encoding'] = 'gzip'
+ self._patched_response = True
+ return resp
+
+ adapter.build_response = build_response
+
+ def test_redirect_with_wrong_gzipped_header(self):
+ s = requests.Session()
+ url = httpbin('redirect/1')
+ self._patch_adapter_gzipped_redirect(s, url)
+ s.get(url)
+
+ def test_basic_auth_str_is_always_native(self):
+ s = _basic_auth_str("test", "test")
+ assert isinstance(s, builtin_str)
+ assert s == "Basic dGVzdDp0ZXN0"
+
+ def test_requests_history_is_saved(self):
+ r = requests.get('https://httpbin.org/redirect/5')
+ total = r.history[-1].history
+ i = 0
+ for item in r.history:
+ assert item.history == total[0:i]
+ i=i+1
class TestContentEncodingDetection(unittest.TestCase):
def test_none(self):
encodings = requests.utils.get_encodings_from_content('')
- self.assertEqual(len(encodings), 0)
+ assert not len(encodings)
def test_html_charset(self):
"""HTML5 meta charset attribute"""
content = '<meta charset="UTF-8">'
encodings = requests.utils.get_encodings_from_content(content)
- self.assertEqual(len(encodings), 1)
- self.assertEqual(encodings[0], 'UTF-8')
+ assert len(encodings) == 1
+ assert encodings[0] == 'UTF-8'
def test_html4_pragma(self):
"""HTML4 pragma directive"""
content = '<meta http-equiv="Content-type" content="text/html;charset=UTF-8">'
encodings = requests.utils.get_encodings_from_content(content)
- self.assertEqual(len(encodings), 1)
- self.assertEqual(encodings[0], 'UTF-8')
+ assert len(encodings) == 1
+ assert encodings[0] == 'UTF-8'
def test_xhtml_pragma(self):
"""XHTML 1.x served with text/html MIME type"""
content = '<meta http-equiv="Content-type" content="text/html;charset=UTF-8" />'
encodings = requests.utils.get_encodings_from_content(content)
- self.assertEqual(len(encodings), 1)
- self.assertEqual(encodings[0], 'UTF-8')
+ assert len(encodings) == 1
+ assert encodings[0] == 'UTF-8'
def test_xml(self):
"""XHTML 1.x served as XML"""
content = '<?xml version="1.0" encoding="UTF-8"?>'
encodings = requests.utils.get_encodings_from_content(content)
- self.assertEqual(len(encodings), 1)
- self.assertEqual(encodings[0], 'UTF-8')
+ assert len(encodings) == 1
+ assert encodings[0] == 'UTF-8'
def test_precedence(self):
content = '''
<meta http-equiv="Content-type" content="text/html;charset=HTML4" />
'''.strip()
encodings = requests.utils.get_encodings_from_content(content)
- self.assertEqual(encodings, ['HTML5', 'HTML4', 'XML'])
+ assert encodings == ['HTML5', 'HTML4', 'XML']
class TestCaseInsensitiveDict(unittest.TestCase):
def test_mapping_init(self):
- cid = CaseInsensitiveDict({'Foo': 'foo','BAr': 'bar'})
- self.assertEqual(len(cid), 2)
- self.assertTrue('foo' in cid)
- self.assertTrue('bar' in cid)
+ cid = CaseInsensitiveDict({'Foo': 'foo', 'BAr': 'bar'})
+ assert len(cid) == 2
+ assert 'foo' in cid
+ assert 'bar' in cid
def test_iterable_init(self):
cid = CaseInsensitiveDict([('Foo', 'foo'), ('BAr', 'bar')])
- self.assertEqual(len(cid), 2)
- self.assertTrue('foo' in cid)
- self.assertTrue('bar' in cid)
+ assert len(cid) == 2
+ assert 'foo' in cid
+ assert 'bar' in cid
def test_kwargs_init(self):
cid = CaseInsensitiveDict(FOO='foo', BAr='bar')
- self.assertEqual(len(cid), 2)
- self.assertTrue('foo' in cid)
- self.assertTrue('bar' in cid)
+ assert len(cid) == 2
+ assert 'foo' in cid
+ assert 'bar' in cid
def test_docstring_example(self):
cid = CaseInsensitiveDict()
cid['Accept'] = 'application/json'
- self.assertEqual(cid['aCCEPT'], 'application/json')
- self.assertEqual(list(cid), ['Accept'])
+ assert cid['aCCEPT'] == 'application/json'
+ assert list(cid) == ['Accept']
def test_len(self):
cid = CaseInsensitiveDict({'a': 'a', 'b': 'b'})
cid['A'] = 'a'
- self.assertEqual(len(cid), 2)
+ assert len(cid) == 2
def test_getitem(self):
cid = CaseInsensitiveDict({'Spam': 'blueval'})
- self.assertEqual(cid['spam'], 'blueval')
- self.assertEqual(cid['SPAM'], 'blueval')
+ assert cid['spam'] == 'blueval'
+ assert cid['SPAM'] == 'blueval'
def test_fixes_649(self):
"""__setitem__ should behave case-insensitively."""
cid['Spam'] = 'twoval'
cid['sPAM'] = 'redval'
cid['SPAM'] = 'blueval'
- self.assertEqual(cid['spam'], 'blueval')
- self.assertEqual(cid['SPAM'], 'blueval')
- self.assertEqual(list(cid.keys()), ['SPAM'])
+ assert cid['spam'] == 'blueval'
+ assert cid['SPAM'] == 'blueval'
+ assert list(cid.keys()) == ['SPAM']
def test_delitem(self):
cid = CaseInsensitiveDict()
cid['Spam'] = 'someval'
del cid['sPam']
- self.assertFalse('spam' in cid)
- self.assertEqual(len(cid), 0)
+ assert 'spam' not in cid
+ assert len(cid) == 0
def test_contains(self):
cid = CaseInsensitiveDict()
cid['Spam'] = 'someval'
- self.assertTrue('Spam' in cid)
- self.assertTrue('spam' in cid)
- self.assertTrue('SPAM' in cid)
- self.assertTrue('sPam' in cid)
- self.assertFalse('notspam' in cid)
+ assert 'Spam' in cid
+ assert 'spam' in cid
+ assert 'SPAM' in cid
+ assert 'sPam' in cid
+ assert 'notspam' not in cid
def test_get(self):
cid = CaseInsensitiveDict()
cid['spam'] = 'oneval'
cid['SPAM'] = 'blueval'
- self.assertEqual(cid.get('spam'), 'blueval')
- self.assertEqual(cid.get('SPAM'), 'blueval')
- self.assertEqual(cid.get('sPam'), 'blueval')
- self.assertEqual(cid.get('notspam', 'default'), 'default')
+ assert cid.get('spam') == 'blueval'
+ assert cid.get('SPAM') == 'blueval'
+ assert cid.get('sPam') == 'blueval'
+ assert cid.get('notspam', 'default') == 'default'
def test_update(self):
cid = CaseInsensitiveDict()
cid['spam'] = 'blueval'
cid.update({'sPam': 'notblueval'})
- self.assertEqual(cid['spam'], 'notblueval')
- cid = CaseInsensitiveDict({'Foo': 'foo','BAr': 'bar'})
+ assert cid['spam'] == 'notblueval'
+ cid = CaseInsensitiveDict({'Foo': 'foo', 'BAr': 'bar'})
cid.update({'fOO': 'anotherfoo', 'bAR': 'anotherbar'})
- self.assertEqual(len(cid), 2)
- self.assertEqual(cid['foo'], 'anotherfoo')
- self.assertEqual(cid['bar'], 'anotherbar')
+ assert len(cid) == 2
+ assert cid['foo'] == 'anotherfoo'
+ assert cid['bar'] == 'anotherbar'
def test_update_retains_unchanged(self):
cid = CaseInsensitiveDict({'foo': 'foo', 'bar': 'bar'})
cid.update({'foo': 'newfoo'})
- self.assertEquals(cid['bar'], 'bar')
+ assert cid['bar'] == 'bar'
def test_iter(self):
cid = CaseInsensitiveDict({'Spam': 'spam', 'Eggs': 'eggs'})
keys = frozenset(['Spam', 'Eggs'])
- self.assertEqual(frozenset(iter(cid)), keys)
+ assert frozenset(iter(cid)) == keys
def test_equality(self):
cid = CaseInsensitiveDict({'SPAM': 'blueval', 'Eggs': 'redval'})
othercid = CaseInsensitiveDict({'spam': 'blueval', 'eggs': 'redval'})
- self.assertEqual(cid, othercid)
+ assert cid == othercid
del othercid['spam']
- self.assertNotEqual(cid, othercid)
- self.assertEqual(cid, {'spam': 'blueval', 'eggs': 'redval'})
+ assert cid != othercid
+ assert cid == {'spam': 'blueval', 'eggs': 'redval'}
def test_setdefault(self):
cid = CaseInsensitiveDict({'Spam': 'blueval'})
- self.assertEqual(
- cid.setdefault('spam', 'notblueval'),
- 'blueval'
- )
- self.assertEqual(
- cid.setdefault('notspam', 'notblueval'),
- 'notblueval'
- )
+ assert cid.setdefault('spam', 'notblueval') == 'blueval'
+ assert cid.setdefault('notspam', 'notblueval') == 'notblueval'
def test_lower_items(self):
cid = CaseInsensitiveDict({
})
keyset = frozenset(lowerkey for lowerkey, v in cid.lower_items())
lowerkeyset = frozenset(['accept', 'user-agent'])
- self.assertEqual(keyset, lowerkeyset)
+ assert keyset == lowerkeyset
def test_preserve_key_case(self):
cid = CaseInsensitiveDict({
'user-Agent': 'requests',
})
keyset = frozenset(['Accept', 'user-Agent'])
- self.assertEqual(frozenset(i[0] for i in cid.items()), keyset)
- self.assertEqual(frozenset(cid.keys()), keyset)
- self.assertEqual(frozenset(cid), keyset)
+ assert frozenset(i[0] for i in cid.items()) == keyset
+ assert frozenset(cid.keys()) == keyset
+ assert frozenset(cid) == keyset
def test_preserve_last_key_case(self):
cid = CaseInsensitiveDict({
cid.update({'ACCEPT': 'application/json'})
cid['USER-AGENT'] = 'requests'
keyset = frozenset(['ACCEPT', 'USER-AGENT'])
- self.assertEqual(frozenset(i[0] for i in cid.items()), keyset)
- self.assertEqual(frozenset(cid.keys()), keyset)
- self.assertEqual(frozenset(cid), keyset)
+ assert frozenset(i[0] for i in cid.items()) == keyset
+ assert frozenset(cid.keys()) == keyset
+ assert frozenset(cid) == keyset
class UtilsTestCase(unittest.TestCase):
from io import BytesIO
from requests.utils import super_len
- self.assertEqual(super_len(StringIO.StringIO()), 0)
- self.assertEqual(super_len(StringIO.StringIO('with so much drama in the LBC')), 29)
+ assert super_len(StringIO.StringIO()) == 0
+ assert super_len(
+ StringIO.StringIO('with so much drama in the LBC')) == 29
- self.assertEqual(super_len(BytesIO()), 0)
- self.assertEqual(super_len(BytesIO(b"it's kinda hard bein' snoop d-o-double-g")), 40)
+ assert super_len(BytesIO()) == 0
+ assert super_len(
+ BytesIO(b"it's kinda hard bein' snoop d-o-double-g")) == 40
try:
import cStringIO
except ImportError:
pass
else:
- self.assertEqual(super_len(cStringIO.StringIO('but some how, some way...')), 25)
+ assert super_len(
+ cStringIO.StringIO('but some how, some way...')) == 25
+
+ def test_get_environ_proxies_ip_ranges(self):
+ """Ensures that IP addresses are correctly matches with ranges
+ in no_proxy variable."""
+ from requests.utils import get_environ_proxies
+ os.environ['no_proxy'] = "192.168.0.0/24,127.0.0.1,localhost.localdomain,172.16.1.1"
+ assert get_environ_proxies('http://192.168.0.1:5000/') == {}
+ assert get_environ_proxies('http://192.168.0.1/') == {}
+ assert get_environ_proxies('http://172.16.1.1/') == {}
+ assert get_environ_proxies('http://172.16.1.1:5000/') == {}
+ assert get_environ_proxies('http://192.168.1.1:5000/') != {}
+ assert get_environ_proxies('http://192.168.1.1/') != {}
+
+ def test_get_environ_proxies(self):
+ """Ensures that IP addresses are correctly matches with ranges
+ in no_proxy variable."""
+ from requests.utils import get_environ_proxies
+ os.environ['no_proxy'] = "127.0.0.1,localhost.localdomain,192.168.0.0/24,172.16.1.1"
+ assert get_environ_proxies(
+ 'http://localhost.localdomain:5000/v1.0/') == {}
+ assert get_environ_proxies('http://www.requests.com/') != {}
+
+ def test_is_ipv4_address(self):
+ from requests.utils import is_ipv4_address
+ assert is_ipv4_address('8.8.8.8')
+ assert not is_ipv4_address('8.8.8.8.8')
+ assert not is_ipv4_address('localhost.localdomain')
+
+ def test_is_valid_cidr(self):
+ from requests.utils import is_valid_cidr
+ assert not is_valid_cidr('8.8.8.8')
+ assert is_valid_cidr('192.168.1.0/24')
+
+ def test_dotted_netmask(self):
+ from requests.utils import dotted_netmask
+ assert dotted_netmask(8) == '255.0.0.0'
+ assert dotted_netmask(24) == '255.255.255.0'
+ assert dotted_netmask(25) == '255.255.255.128'
+
+ def test_address_in_network(self):
+ from requests.utils import address_in_network
+ assert address_in_network('192.168.1.1', '192.168.1.0/24')
+ assert not address_in_network('172.16.0.1', '192.168.1.0/24')
+
+ def test_get_auth_from_url(self):
+ """Ensures that username and password in well-encoded URI as per
+ RFC 3986 are correclty extracted."""
+ from requests.utils import get_auth_from_url
+ from requests.compat import quote
+ percent_encoding_test_chars = "%!*'();:@&=+$,/?#[] "
+ url_address = "request.com/url.html#test"
+ url = "http://" + quote(
+ percent_encoding_test_chars, '') + ':' + quote(
+ percent_encoding_test_chars, '') + '@' + url_address
+ (username, password) = get_auth_from_url(url)
+ assert username == percent_encoding_test_chars
+ assert password == percent_encoding_test_chars
+
+
+class TestMorselToCookieExpires(unittest.TestCase):
+
+ """Tests for morsel_to_cookie when morsel contains expires."""
+
+ def test_expires_valid_str(self):
+ """Test case where we convert expires from string time."""
+
+ morsel = Morsel()
+ morsel['expires'] = 'Thu, 01-Jan-1970 00:00:01 GMT'
+ cookie = morsel_to_cookie(morsel)
+ assert cookie.expires == 1
+
+ def test_expires_invalid_int(self):
+ """Test case where an invalid type is passed for expires."""
+
+ morsel = Morsel()
+ morsel['expires'] = 100
+ with pytest.raises(TypeError):
+ morsel_to_cookie(morsel)
+
+ def test_expires_invalid_str(self):
+ """Test case where an invalid string is input."""
+
+ morsel = Morsel()
+ morsel['expires'] = 'woops'
+ with pytest.raises(ValueError):
+ morsel_to_cookie(morsel)
+
+ def test_expires_none(self):
+ """Test case where expires is None."""
+
+ morsel = Morsel()
+ morsel['expires'] = None
+ cookie = morsel_to_cookie(morsel)
+ assert cookie.expires is None
+
+
+class TestMorselToCookieMaxAge(unittest.TestCase):
+
+ """Tests for morsel_to_cookie when morsel contains max-age."""
+
+ def test_max_age_valid_int(self):
+ """Test case where a valid max age in seconds is passed."""
+
+ morsel = Morsel()
+ morsel['max-age'] = 60
+ cookie = morsel_to_cookie(morsel)
+ assert isinstance(cookie.expires, int)
+
+ def test_max_age_invalid_str(self):
+ """Test case where a invalid max age is passed."""
+
+ morsel = Morsel()
+ morsel['max-age'] = 'woops'
+ with pytest.raises(TypeError):
+ morsel_to_cookie(morsel)
+
+
+class TestTimeout:
+ def test_stream_timeout(self):
+ try:
+ requests.get('https://httpbin.org/delay/10', timeout=2.0)
+ except requests.exceptions.Timeout as e:
+ assert 'Read timed out' in e.args[0].args[0]
+
+ def test_invalid_timeout(self):
+ with pytest.raises(ValueError) as e:
+ requests.get(httpbin('get'), timeout=(3, 4, 5))
+ assert '(connect, read)' in str(e)
+
+ with pytest.raises(ValueError) as e:
+ requests.get(httpbin('get'), timeout="foo")
+ assert 'must be an int or float' in str(e)
+
+ def test_none_timeout(self):
+ """ Check that you can set None as a valid timeout value.
+
+ To actually test this behavior, we'd want to check that setting the
+ timeout to None actually lets the request block past the system default
+ timeout. However, this would make the test suite unbearably slow.
+ Instead we verify that setting the timeout to None does not prevent the
+ request from succeeding.
+ """
+ r = requests.get(httpbin('get'), timeout=None)
+ assert r.status_code == 200
+
+ def test_read_timeout(self):
+ try:
+ requests.get(httpbin('delay/10'), timeout=(None, 0.1))
+ assert False, "The recv() request should time out."
+ except ReadTimeout:
+ pass
+
+ def test_connect_timeout(self):
+ try:
+ requests.get(TARPIT, timeout=(0.1, None))
+ assert False, "The connect() request should time out."
+ except ConnectTimeout as e:
+ assert isinstance(e, ConnectionError)
+ assert isinstance(e, Timeout)
+
+ def test_total_timeout_connect(self):
+ try:
+ requests.get(TARPIT, timeout=(0.1, 0.1))
+ assert False, "The connect() request should time out."
+ except ConnectTimeout:
+ pass
+
+
+SendCall = collections.namedtuple('SendCall', ('args', 'kwargs'))
+
+
+class RedirectSession(SessionRedirectMixin):
+ def __init__(self, order_of_redirects):
+ self.redirects = order_of_redirects
+ self.calls = []
+ self.max_redirects = 30
+ self.cookies = {}
+ self.trust_env = False
+
+ def send(self, *args, **kwargs):
+ self.calls.append(SendCall(args, kwargs))
+ return self.build_response()
+
+ def build_response(self):
+ request = self.calls[-1].args[0]
+ r = requests.Response()
+
+ try:
+ r.status_code = int(self.redirects.pop(0))
+ except IndexError:
+ r.status_code = 200
+
+ r.headers = CaseInsensitiveDict({'Location': '/'})
+ r.raw = self._build_raw()
+ r.request = request
+ return r
+
+ def _build_raw(self):
+ string = StringIO.StringIO('')
+ setattr(string, 'release_conn', lambda *args: args)
+ return string
+
+
+class TestRedirects:
+ default_keyword_args = {
+ 'stream': False,
+ 'verify': True,
+ 'cert': None,
+ 'timeout': None,
+ 'allow_redirects': False,
+ 'proxies': {},
+ }
+
+ def test_requests_are_updated_each_time(self):
+ session = RedirectSession([303, 307])
+ prep = requests.Request('POST', 'http://httpbin.org/post').prepare()
+ r0 = session.send(prep)
+ assert r0.request.method == 'POST'
+ assert session.calls[-1] == SendCall((r0.request,), {})
+ redirect_generator = session.resolve_redirects(r0, prep)
+ for response in redirect_generator:
+ assert response.request.method == 'GET'
+ send_call = SendCall((response.request,),
+ TestRedirects.default_keyword_args)
+ assert session.calls[-1] == send_call
+
+
+
+@pytest.fixture
+def list_of_tuples():
+ return [
+ (('a', 'b'), ('c', 'd')),
+ (('c', 'd'), ('a', 'b')),
+ (('a', 'b'), ('c', 'd'), ('e', 'f')),
+ ]
+
+
+def test_data_argument_accepts_tuples(list_of_tuples):
+ """
+ Ensure that the data argument will accept tuples of strings
+ and properly encode them.
+ """
+ for data in list_of_tuples:
+ p = PreparedRequest()
+ p.prepare(
+ method='GET',
+ url='http://www.example.com',
+ data=data,
+ hooks=default_hooks()
+ )
+ assert p.body == urlencode(data)
+
+
+def assert_copy(p, p_copy):
+ for attr in ('method', 'url', 'headers', '_cookies', 'body', 'hooks'):
+ assert getattr(p, attr) == getattr(p_copy, attr)
+
+
+def test_prepared_request_empty_copy():
+ p = PreparedRequest()
+ assert_copy(p, p.copy())
+
+
+def test_prepared_request_no_cookies_copy():
+ p = PreparedRequest()
+ p.prepare(
+ method='GET',
+ url='http://www.example.com',
+ data='foo=bar',
+ hooks=default_hooks()
+ )
+ assert_copy(p, p.copy())
+
+
+def test_prepared_request_complete_copy():
+ p = PreparedRequest()
+ p.prepare(
+ method='GET',
+ url='http://www.example.com',
+ data='foo=bar',
+ hooks=default_hooks(),
+ cookies={'foo': 'bar'}
+ )
+ assert_copy(p, p.copy())
if __name__ == '__main__':
unittest.main()
+