This tweak was already present in CrankShaft for the non-AVX case. As it
turns out, it's also relevant even with AVX. Now the same optimization
is applied in case of TurboFan as well.
R=dcarney@chromium.org
Review URL: https://codereview.chromium.org/
1081033003
Cr-Commit-Position: refs/heads/master@{#27774}
break;
case kSSEFloat32Div:
__ divss(i.InputDoubleRegister(0), i.InputOperand(1));
+ // Don't delete this mov. It may improve performance on some CPUs,
+ // when there is a (v)mulss depending on the result.
+ __ movaps(i.OutputDoubleRegister(), i.OutputDoubleRegister());
break;
case kSSEFloat32Max:
__ maxss(i.InputDoubleRegister(0), i.InputOperand(1));
break;
case kSSEFloat64Div:
__ divsd(i.InputDoubleRegister(0), i.InputOperand(1));
+ // Don't delete this mov. It may improve performance on some CPUs,
+ // when there is a (v)mulsd depending on the result.
+ __ movaps(i.OutputDoubleRegister(), i.OutputDoubleRegister());
break;
case kSSEFloat64Max:
__ maxsd(i.InputDoubleRegister(0), i.InputOperand(1));
CpuFeatureScope avx_scope(masm(), AVX);
__ vdivss(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
i.InputOperand(1));
+ // Don't delete this mov. It may improve performance on some CPUs,
+ // when there is a (v)mulss depending on the result.
+ __ movaps(i.OutputDoubleRegister(), i.OutputDoubleRegister());
break;
}
case kAVXFloat32Max: {
CpuFeatureScope avx_scope(masm(), AVX);
__ vdivsd(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
i.InputOperand(1));
+ // Don't delete this mov. It may improve performance on some CPUs,
+ // when there is a (v)mulsd depending on the result.
+ __ movaps(i.OutputDoubleRegister(), i.OutputDoubleRegister());
break;
}
case kAVXFloat64Max: {
break;
case kSSEFloat32Div:
ASSEMBLE_SSE_BINOP(divss);
+ // Don't delete this mov. It may improve performance on some CPUs,
+ // when there is a (v)mulss depending on the result.
+ __ movaps(i.OutputDoubleRegister(), i.OutputDoubleRegister());
break;
case kSSEFloat32Abs: {
// TODO(bmeurer): Use RIP relative 128-bit constants.
break;
case kSSEFloat64Div:
ASSEMBLE_SSE_BINOP(divsd);
+ // Don't delete this mov. It may improve performance on some CPUs,
+ // when there is a (v)mulsd depending on the result.
+ __ movaps(i.OutputDoubleRegister(), i.OutputDoubleRegister());
break;
case kSSEFloat64Mod: {
__ subq(rsp, Immediate(kDoubleSize));
break;
case kAVXFloat32Div:
ASSEMBLE_AVX_BINOP(vdivss);
+ // Don't delete this mov. It may improve performance on some CPUs,
+ // when there is a (v)mulss depending on the result.
+ __ movaps(i.OutputDoubleRegister(), i.OutputDoubleRegister());
break;
case kAVXFloat32Max:
ASSEMBLE_AVX_BINOP(vmaxss);
break;
case kAVXFloat64Div:
ASSEMBLE_AVX_BINOP(vdivsd);
+ // Don't delete this mov. It may improve performance on some CPUs,
+ // when there is a (v)mulsd depending on the result.
+ __ movaps(i.OutputDoubleRegister(), i.OutputDoubleRegister());
break;
case kAVXFloat64Max:
ASSEMBLE_AVX_BINOP(vmaxsd);
} else {
DCHECK(result.is(left));
__ divsd(left, right);
- // Don't delete this mov. It may improve performance on some CPUs,
- // when there is a mulsd depending on the result
- __ movaps(left, left);
}
+ // Don't delete this mov. It may improve performance on some CPUs,
+ // when there is a (v)mulsd depending on the result
+ __ movaps(result, result);
break;
case Token::MOD: {
// Pass two doubles as arguments on the stack.
} else {
DCHECK(result.is(left));
__ divsd(left, right);
- // Don't delete this mov. It may improve performance on some CPUs,
- // when there is a mulsd depending on the result
- __ movaps(left, left);
}
+ // Don't delete this mov. It may improve performance on some CPUs,
+ // when there is a (v)mulsd depending on the result
+ __ movaps(result, result);
break;
case Token::MOD: {
XMMRegister xmm_scratch = double_scratch0();