From: Kenneth Reitz Date: Sat, 1 Dec 2012 15:55:09 +0000 (+0100) Subject: chardet fixes! X-Git-Tag: v1.0.0~89 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=2eb63be19ae9496193fd6d70aef09442f1840c74;p=services%2Fpython-requests.git chardet fixes! --- diff --git a/requests/compat.py b/requests/compat.py index 67e058b..5bd4fcb 100644 --- a/requests/compat.py +++ b/requests/compat.py @@ -4,6 +4,7 @@ pythoncompat """ +from .packages import charade as chardet import sys diff --git a/requests/packages/charade/charsetprober.py b/requests/packages/charade/charsetprober.py index 9e46ba4..9758171 100755 --- a/requests/packages/charade/charsetprober.py +++ b/requests/packages/charade/charsetprober.py @@ -50,11 +50,11 @@ class CharSetProber: return 0.0 def filter_high_bit_only(self, aBuf): - aBuf = re.sub(b'([\x00-\x7F])+', ' ', aBuf) + aBuf = re.sub(b'([\x00-\x7F])+', b' ', aBuf) return aBuf def filter_without_english_letters(self, aBuf): - aBuf = re.sub(b'([A-Za-z])+', ' ', aBuf) + aBuf = re.sub(b'([A-Za-z])+', b' ', aBuf) return aBuf def filter_with_english_letters(self, aBuf): diff --git a/requests/packages/charade/compat.py b/requests/packages/charade/compat.py index 9aeff65..f2c3b5e 100644 --- a/requests/packages/charade/compat.py +++ b/requests/packages/charade/compat.py @@ -19,20 +19,11 @@ ######################### END LICENSE BLOCK ######################### -from sys import verion_info +from sys import version_info def wrap_ord(a): - if isinstance(a, 'str') and version_info < (3, 0): + if isinstance(a, str) and version_info < (3, 0): return ord(a) elif isinstance(a, int) and version_info >= (3, 0): return a - - -def cmp_corrector(s): - if version_info >= (3, 0): - if s.startswith('\x'): - return ord(s) - elif s.startswith('\\'): - return int(s[1:]) - return s diff --git a/requests/packages/charade/jpcntx.py b/requests/packages/charade/jpcntx.py index 41f3643..b876ff2 100755 --- a/requests/packages/charade/jpcntx.py +++ b/requests/packages/charade/jpcntx.py @@ -25,7 +25,6 @@ # 02110-1301 USA ######################### END LICENSE BLOCK ######################### -from . import constants from .compat import wrap_ord NUM_OF_CATEGORY = 6 @@ -494,7 +493,7 @@ class JapaneseContextAnalysis: if (order != -1) and (self._mLastCharOrder != -1): self._mTotalRel += 1 if self._mTotalRel > MAX_REL_THRESHOLD: - self._mDone = constants.True + self._mDone = True break self._mRelSample[ jp2CharContext[self._mLastCharOrder][order] diff --git a/requests/packages/charade/sbcharsetprober.py b/requests/packages/charade/sbcharsetprober.py index 29eb459..c12982f 100755 --- a/requests/packages/charade/sbcharsetprober.py +++ b/requests/packages/charade/sbcharsetprober.py @@ -86,7 +86,8 @@ class SingleByteCharSetProber(CharSetProber): i = (self._mLastOrder * SAMPLE_SIZE) + order else: # reverse the order of the letters in the lookup i = (order * SAMPLE_SIZE) + self._mLastOrder - self._mSeqCounters[self.mModel['precedenceMatrix'][i]] += 1 + model = self._mModel['precedenceMatrix'][i] + self._mSeqCounters[model] += 1 self._mLastOrder = order if self.get_state() == constants.eDetecting: diff --git a/requests/packages/charade/sbcsgroupprober.py b/requests/packages/charade/sbcsgroupprober.py index d19160c..1b6196c 100755 --- a/requests/packages/charade/sbcsgroupprober.py +++ b/requests/packages/charade/sbcsgroupprober.py @@ -14,33 +14,35 @@ # modify it under the terms of the GNU Lesser General Public # License as published by the Free Software Foundation; either # version 2.1 of the License, or (at your option) any later version. -# +# # This library is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # Lesser General Public License for more details. -# +# # You should have received a copy of the GNU Lesser General Public # License along with this library; if not, write to the Free Software # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA # 02110-1301 USA ######################### END LICENSE BLOCK ######################### -import constants, sys -from charsetgroupprober import CharSetGroupProber -from sbcharsetprober import SingleByteCharSetProber -from langcyrillicmodel import Win1251CyrillicModel, Koi8rModel, Latin5CyrillicModel, MacCyrillicModel, Ibm866Model, Ibm855Model -from langgreekmodel import Latin7GreekModel, Win1253GreekModel -from langbulgarianmodel import Latin5BulgarianModel, Win1251BulgarianModel -from langhungarianmodel import Latin2HungarianModel, Win1250HungarianModel -from langthaimodel import TIS620ThaiModel -from langhebrewmodel import Win1255HebrewModel -from hebrewprober import HebrewProber +from .charsetgroupprober import CharSetGroupProber +from .sbcharsetprober import SingleByteCharSetProber +from .langcyrillicmodel import (Win1251CyrillicModel, Koi8rModel, + Latin5CyrillicModel, MacCyrillicModel, + Ibm866Model, Ibm855Model) +from .langgreekmodel import Latin7GreekModel, Win1253GreekModel +from .langbulgarianmodel import Latin5BulgarianModel, Win1251BulgarianModel +from .langhungarianmodel import Latin2HungarianModel, Win1250HungarianModel +from .langthaimodel import TIS620ThaiModel +from .langhebrewmodel import Win1255HebrewModel +from .hebrewprober import HebrewProber + class SBCSGroupProber(CharSetGroupProber): def __init__(self): CharSetGroupProber.__init__(self) - self._mProbers = [ \ + self._mProbers = [ SingleByteCharSetProber(Win1251CyrillicModel), SingleByteCharSetProber(Koi8rModel), SingleByteCharSetProber(Latin5CyrillicModel), @@ -54,11 +56,14 @@ class SBCSGroupProber(CharSetGroupProber): SingleByteCharSetProber(Latin2HungarianModel), SingleByteCharSetProber(Win1250HungarianModel), SingleByteCharSetProber(TIS620ThaiModel), - ] + ] hebrewProber = HebrewProber() - logicalHebrewProber = SingleByteCharSetProber(Win1255HebrewModel, constants.False, hebrewProber) - visualHebrewProber = SingleByteCharSetProber(Win1255HebrewModel, constants.True, hebrewProber) + logicalHebrewProber = SingleByteCharSetProber(Win1255HebrewModel, + False, hebrewProber) + visualHebrewProber = SingleByteCharSetProber(Win1255HebrewModel, True, + hebrewProber) hebrewProber.set_model_probers(logicalHebrewProber, visualHebrewProber) - self._mProbers.extend([hebrewProber, logicalHebrewProber, visualHebrewProber]) + self._mProbers.extend([hebrewProber, logicalHebrewProber, + visualHebrewProber]) self.reset()