From f6544934b94932f1d2231238046f83ba8b083040 Mon Sep 17 00:00:00 2001 From: Nico Weber Date: Mon, 6 Jan 2020 10:54:13 -0500 Subject: [PATCH] Make check-llvm run 50% faster on macOS, 18% faster on Windows. While looking at cycle time graphs of some of my bots, I noticed that 327894859cc made check-llvm noticeably slower on macOS and Windows. As it turns out, the 5 substitutions added in that change were enough to cause lit to thrash the build-in cache in re.compile() (re.sub() is implemented as re.compile().sub()), and apparently applySubstitutions() is on the cricital path and slow when all regexes need to compile all the time. (See `_MAXCACHE = 512` in cpython/Lib/re.py) Supporting full regexes for lit substitutions seems a bit like overkill, but for now add a simple unbounded cache to recover the lost performance. No intended behavior change. --- llvm/utils/lit/lit/TestRunner.py | 36 ++++++++++++++++++++++++++++++------ 1 file changed, 30 insertions(+), 6 deletions(-) diff --git a/llvm/utils/lit/lit/TestRunner.py b/llvm/utils/lit/lit/TestRunner.py index d572af2..96411c9 100644 --- a/llvm/utils/lit/lit/TestRunner.py +++ b/llvm/utils/lit/lit/TestRunner.py @@ -1112,11 +1112,16 @@ def getDefaultSubstitutions(test, tmpDir, tmpBase, normalize_slashes=False): s = s.replace('&', '\&') return s substitutions.extend([ - ('%{/s:regex_replacement}', regex_escape(sourcepath.replace('\\', '/'))), - ('%{/S:regex_replacement}', regex_escape(sourcedir.replace('\\', '/'))), - ('%{/p:regex_replacement}', regex_escape(sourcedir.replace('\\', '/'))), - ('%{/t:regex_replacement}', regex_escape(tmpBase.replace('\\', '/')) + '.tmp'), - ('%{/T:regex_replacement}', regex_escape(tmpDir.replace('\\', '/'))), + ('%{/s:regex_replacement}', + regex_escape(sourcepath.replace('\\', '/'))), + ('%{/S:regex_replacement}', + regex_escape(sourcedir.replace('\\', '/'))), + ('%{/p:regex_replacement}', + regex_escape(sourcedir.replace('\\', '/'))), + ('%{/t:regex_replacement}', + regex_escape(tmpBase.replace('\\', '/')) + '.tmp'), + ('%{/T:regex_replacement}', + regex_escape(tmpDir.replace('\\', '/'))), ]) # "%:[STpst]" are normalized paths without colons and without a leading @@ -1130,6 +1135,18 @@ def getDefaultSubstitutions(test, tmpDir, tmpBase, normalize_slashes=False): ]) return substitutions +def _memoize(f): + cache = {} # Intentionally unbounded, see applySubstitutions() + def memoized(x): + if x not in cache: + cache[x] = f(x) + return cache[x] + return memoized + +@_memoize +def _caching_re_compile(r): + return re.compile(r) + def applySubstitutions(script, substitutions): """Apply substitutions to the script. Allow full regular expression syntax. Replace each matching occurrence of regular expression pattern a with @@ -1139,7 +1156,14 @@ def applySubstitutions(script, substitutions): for a,b in substitutions: if kIsWindows: b = b.replace("\\","\\\\") - ln = re.sub(a, b, ln) + # re.compile() has a built-in LRU cache with 512 entries. In some + # test suites lit ends up thrashing that cache, which made e.g. + # check-llvm run 50% slower. Use an explicit, unbounded cache + # to prevent that from happening. Since lit is fairly + # short-lived, since the set of substitutions is fairly small, and + # since thrashing has such bad consequences, not bounding the cache + # seems reasonable. + ln = _caching_re_compile(a).sub(b, ln) # Strip the trailing newline and any extra whitespace. return ln.strip() -- 2.7.4