From a1bcafab692dd90c1d6ec2d090f78f2aafa2ff93 Mon Sep 17 00:00:00 2001 From: "sgjesse@chromium.org" Date: Mon, 23 May 2011 10:47:43 +0000 Subject: [PATCH] Improve bit tests on IA32. Bit tests with a memory operand should be avoided on Intel processors. On Atom bts with a memory operand has 10~11 cycles latency and on Core 2 it has 5~6 cycles latency, and also produces multiple uops. If the bit base operand is a register the latency is 1 cycle instead. This CL improves several cases from Kraken 1.1 benchmark on Atom. For example, 12% for imaging-desaturate, 8% for audio-fft, and 5% for audio-beat-detection. Review URL: http://codereview.chromium.org//7048003 git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@7987 ce2b1a6d-e550-0410-aec6-3dcde31c8c00 --- src/ia32/macro-assembler-ia32.cc | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/ia32/macro-assembler-ia32.cc b/src/ia32/macro-assembler-ia32.cc index f38e2eefa..b6e1d000b 100644 --- a/src/ia32/macro-assembler-ia32.cc +++ b/src/ia32/macro-assembler-ia32.cc @@ -73,7 +73,12 @@ void MacroAssembler::RecordWriteHelper(Register object, shr(addr, Page::kRegionSizeLog2); // Set dirty mark for region. - bts(Operand(object, Page::kDirtyFlagOffset), addr); + // Bit tests with a memory operand should be avoided on Intel processors, + // as they usually have long latency and multiple uops. We load the bit base + // operand to a register at first and store it back after bit set. + mov(scratch, Operand(object, Page::kDirtyFlagOffset)); + bts(Operand(scratch), addr); + mov(Operand(object, Page::kDirtyFlagOffset), scratch); } -- 2.34.1