Because of the way the Python hash function works, it's not guaranteed
to be the same. This was causing a lot of reproducers to be generated
for the same tests, even though the CWD or arguments didn't change.
Switching to an MD5 hash should fix that.
import os
import tempfile
import subprocess
+import hashlib
def help():
help()
return 1
- # Compute a hash based on the input arguments and the current working
+ # Compute an MD5 hash based on the input arguments and the current working
# directory.
- args = ' '.join(sys.argv[2:])
- cwd = os.getcwd()
- input_hash = str(hash((cwd, args)))
+ h = hashlib.md5()
+ h.update(' '.join(sys.argv[2:]))
+ h.update(os.getcwd())
+ input_hash = h.hexdigest()
# Use the hash to "uniquely" identify a reproducer path.
reproducer_path = os.path.join(tempfile.gettempdir(), input_hash)