# modify it under the terms of the GNU Lesser General Public\r
# License as published by the Free Software Foundation; either\r
# version 2.1 of the License, or (at your option) any later version.\r
-# \r
+#\r
# This library is distributed in the hope that it will be useful,\r
# but WITHOUT ANY WARRANTY; without even the implied warranty of\r
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU\r
# Lesser General Public License for more details.\r
-# \r
+#\r
# You should have received a copy of the GNU Lesser General Public\r
# License along with this library; if not, write to the Free Software\r
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA\r
# for each byte we get its class\r
# if it is first byte, we also get byte length\r
# PY3K: aBuf is a byte stream, so c is an int, not a byte\r
+ if hasattr(c, 'encode'):\r
+ c = int(c.encode('hex'), 16)\r
+\r
byteCls = self._mModel['classTable'][c]\r
if self._mCurrentState == eStart:\r
self._mCurrentBytePos = 0\r
# modify it under the terms of the GNU Lesser General Public\r
# License as published by the Free Software Foundation; either\r
# version 2.1 of the License, or (at your option) any later version.\r
-# \r
+#\r
# This library is distributed in the hope that it will be useful,\r
# but WITHOUT ANY WARRANTY; without even the implied warranty of\r
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU\r
# Lesser General Public License for more details.\r
-# \r
+#\r
# You should have received a copy of the GNU Lesser General Public\r
# License along with this library; if not, write to the Free Software\r
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA\r
ASV, ASV, ASV, ASV, ASV, ASO, ASO, ASO, # F8 - FF\r
)\r
\r
-# 0 : illegal \r
-# 1 : very unlikely \r
-# 2 : normal \r
+# 0 : illegal\r
+# 1 : very unlikely\r
+# 2 : normal\r
# 3 : very likely\r
Latin1ClassModel = ( \\r
# UDF OTH ASC ASS ACV ACO ASV ASO\r
def feed(self, aBuf):\r
aBuf = self.filter_with_english_letters(aBuf)\r
for c in aBuf:\r
+ if hasattr(c, 'encode'):\r
+ c = int(c.encode('hex'), 16)\r
charClass = Latin1_CharToClass[c]\r
freq = Latin1ClassModel[(self._mLastCharClass * CLASS_NUM) + charClass]\r
if freq == 0:\r
def get_confidence(self):\r
if self.get_state() == constants.eNotMe:\r
return 0.01\r
- \r
+\r
total = sum(self._mFreqCounter)\r
if total < 0.01:\r
confidence = 0.0\r
confidence = (self._mFreqCounter[3] / total) - (self._mFreqCounter[1] * 20.0 / total)\r
if confidence < 0.0:\r
confidence = 0.0\r
- # lower the confidence of latin1 so that other more accurate detector \r
+ # lower the confidence of latin1 so that other more accurate detector\r
# can take priority.\r
confidence = confidence * 0.5\r
return confidence\r
# modify it under the terms of the GNU Lesser General Public\r
# License as published by the Free Software Foundation; either\r
# version 2.1 of the License, or (at your option) any later version.\r
-# \r
+#\r
# This library is distributed in the hope that it will be useful,\r
# but WITHOUT ANY WARRANTY; without even the implied warranty of\r
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU\r
# Lesser General Public License for more details.\r
-# \r
+#\r
# You should have received a copy of the GNU Lesser General Public\r
# License along with this library; if not, write to the Free Software\r
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA\r
NUMBER_OF_SEQ_CAT = 4\r
POSITIVE_CAT = NUMBER_OF_SEQ_CAT - 1\r
#NEGATIVE_CAT = 0\r
- \r
+\r
class SingleByteCharSetProber(CharSetProber):\r
def __init__(self, model, reversed=False, nameProber=None):\r
CharSetProber.__init__(self)\r
if not aLen:\r
return self.get_state()\r
for c in aBuf:\r
+ if hasattr(c, 'encode'):\r
+ c = int(c.encode('hex'), 16)\r
order = self._mModel['charToOrderMap'][c]\r
if order < SYMBOL_CAT_ORDER:\r
self._mTotalChar += 1\r