if contents[:len(codecs.BOM_UTF16_BE)] == codecs.BOM_UTF16_BE:
return contents[len(codecs.BOM_UTF16_BE):].decode('utf-16-be')
try:
- return contents.decode('utf-8',errors='backslashreplace')
- except (UnicodeDecodeError, AttributeError) as e:
- return contents
+ return contents.decode('utf-8')
+ except UnicodeDecodeError as e:
+ try:
+ return contents.decode('latin-1')
+ except UnicodeDecodeError as e:
+ return contents.decode('utf-8', error='backslashreplace')
def get_content_hash(self):
# Check for string which doesn't have BOM and isn't valid
# ASCII
- test_string = b'Gan\xef\xbf\xbdauge'
+ test_string = b'Gan\xdfauge'
test.write('latin1_file', test_string)
f1 = fs.File(test.workpath("latin1_file"))
- assert f1.get_text_contents() == test_string.decode('utf-8'), \
+ assert f1.get_text_contents() == test_string.decode('latin-1'), \
f1.get_text_contents()
def nonexistent(method, s):