chardet fixes!
authorKenneth Reitz <me@kennethreitz.com>
Sat, 1 Dec 2012 15:55:09 +0000 (16:55 +0100)
committerKenneth Reitz <me@kennethreitz.com>
Sat, 1 Dec 2012 15:55:09 +0000 (16:55 +0100)
requests/compat.py
requests/packages/charade/charsetprober.py
requests/packages/charade/compat.py
requests/packages/charade/jpcntx.py
requests/packages/charade/sbcharsetprober.py
requests/packages/charade/sbcsgroupprober.py

index 67e058b..5bd4fcb 100644 (file)
@@ -4,6 +4,7 @@
 pythoncompat
 """
 
+from .packages import charade as chardet
 
 import sys
 
index 9e46ba4..9758171 100755 (executable)
@@ -50,11 +50,11 @@ class CharSetProber:
         return 0.0
 
     def filter_high_bit_only(self, aBuf):
-        aBuf = re.sub(b'([\x00-\x7F])+', ' ', aBuf)
+        aBuf = re.sub(b'([\x00-\x7F])+', b' ', aBuf)
         return aBuf
 
     def filter_without_english_letters(self, aBuf):
-        aBuf = re.sub(b'([A-Za-z])+', ' ', aBuf)
+        aBuf = re.sub(b'([A-Za-z])+', b' ', aBuf)
         return aBuf
 
     def filter_with_english_letters(self, aBuf):
index 9aeff65..f2c3b5e 100644 (file)
 ######################### END LICENSE BLOCK #########################
 
 
-from sys import verion_info
+from sys import version_info
 
 
 def wrap_ord(a):
-    if isinstance(a, 'str') and version_info < (3, 0):
+    if isinstance(a, str) and version_info < (3, 0):
         return ord(a)
     elif isinstance(a, int) and version_info >= (3, 0):
         return a
-
-
-def cmp_corrector(s):
-    if version_info >= (3, 0):
-        if s.startswith('\x'):
-            return ord(s)
-        elif s.startswith('\\'):
-            return int(s[1:])
-    return s
index 41f3643..b876ff2 100755 (executable)
@@ -25,7 +25,6 @@
 # 02110-1301  USA
 ######################### END LICENSE BLOCK #########################
 
-from . import constants
 from .compat import wrap_ord
 
 NUM_OF_CATEGORY = 6
@@ -494,7 +493,7 @@ class JapaneseContextAnalysis:
                 if (order != -1) and (self._mLastCharOrder != -1):
                     self._mTotalRel += 1
                     if self._mTotalRel > MAX_REL_THRESHOLD:
-                        self._mDone = constants.True
+                        self._mDone = True
                         break
                     self._mRelSample[
                         jp2CharContext[self._mLastCharOrder][order]
index 29eb459..c12982f 100755 (executable)
@@ -86,7 +86,8 @@ class SingleByteCharSetProber(CharSetProber):
                         i = (self._mLastOrder * SAMPLE_SIZE) + order
                     else:  # reverse the order of the letters in the lookup
                         i = (order * SAMPLE_SIZE) + self._mLastOrder
-                    self._mSeqCounters[self.mModel['precedenceMatrix'][i]] += 1
+                    model = self._mModel['precedenceMatrix'][i]
+                    self._mSeqCounters[model] += 1
             self._mLastOrder = order
 
         if self.get_state() == constants.eDetecting:
index d19160c..1b6196c 100755 (executable)
 # modify it under the terms of the GNU Lesser General Public
 # License as published by the Free Software Foundation; either
 # version 2.1 of the License, or (at your option) any later version.
-# 
+#
 # This library is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 # Lesser General Public License for more details.
-# 
+#
 # You should have received a copy of the GNU Lesser General Public
 # License along with this library; if not, write to the Free Software
 # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
 # 02110-1301  USA
 ######################### END LICENSE BLOCK #########################
 
-import constants, sys
-from charsetgroupprober import CharSetGroupProber
-from sbcharsetprober import SingleByteCharSetProber
-from langcyrillicmodel import Win1251CyrillicModel, Koi8rModel, Latin5CyrillicModel, MacCyrillicModel, Ibm866Model, Ibm855Model
-from langgreekmodel import Latin7GreekModel, Win1253GreekModel
-from langbulgarianmodel import Latin5BulgarianModel, Win1251BulgarianModel
-from langhungarianmodel import Latin2HungarianModel, Win1250HungarianModel
-from langthaimodel import TIS620ThaiModel
-from langhebrewmodel import Win1255HebrewModel
-from hebrewprober import HebrewProber
+from .charsetgroupprober import CharSetGroupProber
+from .sbcharsetprober import SingleByteCharSetProber
+from .langcyrillicmodel import (Win1251CyrillicModel, Koi8rModel,
+                                Latin5CyrillicModel, MacCyrillicModel,
+                                Ibm866Model, Ibm855Model)
+from .langgreekmodel import Latin7GreekModel, Win1253GreekModel
+from .langbulgarianmodel import Latin5BulgarianModel, Win1251BulgarianModel
+from .langhungarianmodel import Latin2HungarianModel, Win1250HungarianModel
+from .langthaimodel import TIS620ThaiModel
+from .langhebrewmodel import Win1255HebrewModel
+from .hebrewprober import HebrewProber
+
 
 class SBCSGroupProber(CharSetGroupProber):
     def __init__(self):
         CharSetGroupProber.__init__(self)
-        self._mProbers = [ \
+        self._mProbers = [
             SingleByteCharSetProber(Win1251CyrillicModel),
             SingleByteCharSetProber(Koi8rModel),
             SingleByteCharSetProber(Latin5CyrillicModel),
@@ -54,11 +56,14 @@ class SBCSGroupProber(CharSetGroupProber):
             SingleByteCharSetProber(Latin2HungarianModel),
             SingleByteCharSetProber(Win1250HungarianModel),
             SingleByteCharSetProber(TIS620ThaiModel),
-            ]
+        ]
         hebrewProber = HebrewProber()
-        logicalHebrewProber = SingleByteCharSetProber(Win1255HebrewModel, constants.False, hebrewProber)
-        visualHebrewProber = SingleByteCharSetProber(Win1255HebrewModel, constants.True, hebrewProber)
+        logicalHebrewProber = SingleByteCharSetProber(Win1255HebrewModel,
+                                                      False, hebrewProber)
+        visualHebrewProber = SingleByteCharSetProber(Win1255HebrewModel, True,
+                                                     hebrewProber)
         hebrewProber.set_model_probers(logicalHebrewProber, visualHebrewProber)
-        self._mProbers.extend([hebrewProber, logicalHebrewProber, visualHebrewProber])
+        self._mProbers.extend([hebrewProber, logicalHebrewProber,
+                               visualHebrewProber])
 
         self.reset()