Updates to get_text_content() logic and tests
authorWilliam Deegan <bill@baddogconsulting.com>
Thu, 24 Aug 2017 17:13:35 +0000 (10:13 -0700)
committerWilliam Deegan <bill@baddogconsulting.com>
Thu, 24 Aug 2017 17:13:35 +0000 (10:13 -0700)
src/engine/SCons/Node/FS.py
src/engine/SCons/Node/FSTests.py

index 606ecfda38e8be328536f9ccc9a2dc9ada746126..c31ac6c43ead5ad392a5fbe0746d4537d0205bda 100644 (file)
@@ -2654,9 +2654,12 @@ class File(Base):
         if contents[:len(codecs.BOM_UTF16_BE)] == codecs.BOM_UTF16_BE:
             return contents[len(codecs.BOM_UTF16_BE):].decode('utf-16-be')
         try:
-            return contents.decode('utf-8',errors='backslashreplace')
-        except (UnicodeDecodeError, AttributeError) as e:
-            return contents
+            return contents.decode('utf-8')
+        except UnicodeDecodeError as e:
+            try:
+                return contents.decode('latin-1')
+            except UnicodeDecodeError as e:
+                return contents.decode('utf-8', error='backslashreplace')
 
 
     def get_content_hash(self):
index c211ee1f5dad21d6b9686044b52166ac08f04f41..273f80971425d62f9e8fcfad230485a5b5588b2d 100644 (file)
@@ -1317,10 +1317,10 @@ class FSTestCase(_tempdirTestCase):
 
         # Check for string which doesn't have BOM and isn't valid
         # ASCII
-        test_string = b'Gan\xef\xbf\xbdauge'
+        test_string = b'Gan\xdfauge'
         test.write('latin1_file', test_string)
         f1 = fs.File(test.workpath("latin1_file"))
-        assert f1.get_text_contents() == test_string.decode('utf-8'), \
+        assert f1.get_text_contents() == test_string.decode('latin-1'), \
                f1.get_text_contents()
 
         def nonexistent(method, s):