MachineInstr *
sinkPostLoadHardenedInst(MachineInstr &MI,
SmallPtrSetImpl<MachineInstr *> &HardenedLoads);
+ bool canHardenPostLoad(MachineInstr &MI);
void hardenPostLoad(MachineInstr &MI, MachineSSAUpdater &PredStateSSA);
void checkReturnInstr(MachineInstr &MI, MachineSSAUpdater &PredStateSSA);
void checkCallInstr(MachineInstr &MI, MachineSSAUpdater &PredStateSSA);
(IndexReg && LoadDepRegs.test(IndexReg)))
continue;
- // If post-load hardening is enabled, this load is known to be
- // data-invariant, and we aren't already going to harden one of the
+ // If post-load hardening is enabled, this load is compatible with
+ // post-load hardening, and we aren't already going to harden one of the
// address registers, queue it up to be hardened post-load. Notably, even
// once hardened this won't introduce a useful dependency that could prune
// out subsequent loads.
- if (EnablePostLoadHardening && isDataInvariantLoad(MI) &&
+ if (EnablePostLoadHardening && canHardenPostLoad(MI) &&
!HardenedAddrRegs.count(BaseReg) &&
!HardenedAddrRegs.count(IndexReg)) {
HardenPostLoad.insert(&MI);
return MI;
}
+bool X86SpeculativeLoadHardeningPass::canHardenPostLoad(MachineInstr &MI) {
+ if (!isDataInvariantLoad(MI))
+ return false;
+
+ auto &DefOp = MI.getOperand(0);
+ unsigned OldDefReg = DefOp.getReg();
+
+ auto *DefRC = MRI->getRegClass(OldDefReg);
+ int DefRegBytes = TRI->getRegSizeInBits(*DefRC) / 8;
+ if (DefRegBytes > 8)
+ // We don't support post-load hardening of vectors.
+ return false;
+
+ const TargetRegisterClass *GPRRegClasses[] = {
+ &X86::GR8RegClass, &X86::GR16RegClass, &X86::GR32RegClass,
+ &X86::GR64RegClass};
+ return DefRC->hasSuperClassEq(GPRRegClasses[Log2_32(DefRegBytes)]);
+}
+
// We can harden non-leaking loads into register without touching the address
// by just hiding all of the loaded bits. We use an `or` instruction to do
// this because having the poison value be all ones allows us to use the same
// execution and coercing them to one is sufficient.
void X86SpeculativeLoadHardeningPass::hardenPostLoad(
MachineInstr &MI, MachineSSAUpdater &PredStateSSA) {
- assert(isDataInvariantLoad(MI) &&
- "Cannot get here with a non-invariant load!");
+ assert(canHardenPostLoad(MI) &&
+ "Invalid instruction for post-load hardening!");
MachineBasicBlock &MBB = *MI.getParent();
DebugLoc Loc = MI.getDebugLoc();
unsigned OrOpCodes[] = {X86::OR8rr, X86::OR16rr, X86::OR32rr, X86::OR64rr};
unsigned OrOpCode = OrOpCodes[Log2_32(DefRegBytes)];
-#ifndef NDEBUG
- const TargetRegisterClass *OrRegClasses[] = {
- &X86::GR8RegClass, &X86::GR16RegClass, &X86::GR32RegClass,
- &X86::GR64RegClass};
- assert(DefRC->hasSuperClassEq(OrRegClasses[Log2_32(DefRegBytes)]) &&
- "Cannot define this register with OR instruction!");
-#endif
-
unsigned SubRegImms[] = {X86::sub_8bit, X86::sub_16bit, X86::sub_32bit};
auto GetStateRegInRC = [&](const TargetRegisterClass &RC) {
call void @sink(i32 %leak)
unreachable
}
+
+declare void @sink_float(float)
+declare void @sink_double(double)
+
+; Test direct and converting loads of floating point values.
+define void @test_fp_loads(float* %fptr, double* %dptr, i32* %i32ptr, i64* %i64ptr) nounwind {
+; X64-LABEL: test_fp_loads:
+; X64: # %bb.0: # %entry
+; X64-NEXT: pushq %r15
+; X64-NEXT: pushq %r14
+; X64-NEXT: pushq %r12
+; X64-NEXT: pushq %rbx
+; X64-NEXT: pushq %rax
+; X64-NEXT: movq %rsp, %rax
+; X64-NEXT: movq %rcx, %r15
+; X64-NEXT: movq %rdx, %r14
+; X64-NEXT: movq %rsi, %rbx
+; X64-NEXT: movq %rdi, %r12
+; X64-NEXT: movq $-1, %rcx
+; X64-NEXT: sarq $63, %rax
+; X64-NEXT: orq %rax, %r12
+; X64-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X64-NEXT: shlq $47, %rax
+; X64-NEXT: orq %rax, %rsp
+; X64-NEXT: callq sink_float
+; X64-NEXT: movq %rsp, %rax
+; X64-NEXT: sarq $63, %rax
+; X64-NEXT: orq %rax, %rbx
+; X64-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
+; X64-NEXT: shlq $47, %rax
+; X64-NEXT: orq %rax, %rsp
+; X64-NEXT: callq sink_double
+; X64-NEXT: movq %rsp, %rax
+; X64-NEXT: sarq $63, %rax
+; X64-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
+; X64-NEXT: cvtsd2ss %xmm0, %xmm0
+; X64-NEXT: shlq $47, %rax
+; X64-NEXT: orq %rax, %rsp
+; X64-NEXT: callq sink_float
+; X64-NEXT: movq %rsp, %rax
+; X64-NEXT: sarq $63, %rax
+; X64-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X64-NEXT: cvtss2sd %xmm0, %xmm0
+; X64-NEXT: shlq $47, %rax
+; X64-NEXT: orq %rax, %rsp
+; X64-NEXT: callq sink_double
+; X64-NEXT: movq %rsp, %rax
+; X64-NEXT: sarq $63, %rax
+; X64-NEXT: orq %rax, %r14
+; X64-NEXT: xorps %xmm0, %xmm0
+; X64-NEXT: cvtsi2ssl (%r14), %xmm0
+; X64-NEXT: shlq $47, %rax
+; X64-NEXT: orq %rax, %rsp
+; X64-NEXT: callq sink_float
+; X64-NEXT: movq %rsp, %rax
+; X64-NEXT: sarq $63, %rax
+; X64-NEXT: orq %rax, %r15
+; X64-NEXT: xorps %xmm0, %xmm0
+; X64-NEXT: cvtsi2sdq (%r15), %xmm0
+; X64-NEXT: shlq $47, %rax
+; X64-NEXT: orq %rax, %rsp
+; X64-NEXT: callq sink_double
+; X64-NEXT: movq %rsp, %rax
+; X64-NEXT: sarq $63, %rax
+; X64-NEXT: xorps %xmm0, %xmm0
+; X64-NEXT: cvtsi2ssq (%r15), %xmm0
+; X64-NEXT: shlq $47, %rax
+; X64-NEXT: orq %rax, %rsp
+; X64-NEXT: callq sink_float
+; X64-NEXT: movq %rsp, %rax
+; X64-NEXT: sarq $63, %rax
+; X64-NEXT: xorps %xmm0, %xmm0
+; X64-NEXT: cvtsi2sdl (%r14), %xmm0
+; X64-NEXT: shlq $47, %rax
+; X64-NEXT: orq %rax, %rsp
+; X64-NEXT: callq sink_double
+; X64-NEXT: movq %rsp, %rax
+; X64-NEXT: sarq $63, %rax
+; X64-NEXT: shlq $47, %rax
+; X64-NEXT: orq %rax, %rsp
+; X64-NEXT: addq $8, %rsp
+; X64-NEXT: popq %rbx
+; X64-NEXT: popq %r12
+; X64-NEXT: popq %r14
+; X64-NEXT: popq %r15
+; X64-NEXT: retq
+;
+; X64-LFENCE-LABEL: test_fp_loads:
+; X64-LFENCE: # %bb.0: # %entry
+; X64-LFENCE-NEXT: pushq %r15
+; X64-LFENCE-NEXT: pushq %r14
+; X64-LFENCE-NEXT: pushq %r12
+; X64-LFENCE-NEXT: pushq %rbx
+; X64-LFENCE-NEXT: pushq %rax
+; X64-LFENCE-NEXT: movq %rcx, %r15
+; X64-LFENCE-NEXT: movq %rdx, %r14
+; X64-LFENCE-NEXT: movq %rsi, %rbx
+; X64-LFENCE-NEXT: movq %rdi, %r12
+; X64-LFENCE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X64-LFENCE-NEXT: callq sink_float
+; X64-LFENCE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
+; X64-LFENCE-NEXT: callq sink_double
+; X64-LFENCE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
+; X64-LFENCE-NEXT: cvtsd2ss %xmm0, %xmm0
+; X64-LFENCE-NEXT: callq sink_float
+; X64-LFENCE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X64-LFENCE-NEXT: cvtss2sd %xmm0, %xmm0
+; X64-LFENCE-NEXT: callq sink_double
+; X64-LFENCE-NEXT: xorps %xmm0, %xmm0
+; X64-LFENCE-NEXT: cvtsi2ssl (%r14), %xmm0
+; X64-LFENCE-NEXT: callq sink_float
+; X64-LFENCE-NEXT: xorps %xmm0, %xmm0
+; X64-LFENCE-NEXT: cvtsi2sdq (%r15), %xmm0
+; X64-LFENCE-NEXT: callq sink_double
+; X64-LFENCE-NEXT: xorps %xmm0, %xmm0
+; X64-LFENCE-NEXT: cvtsi2ssq (%r15), %xmm0
+; X64-LFENCE-NEXT: callq sink_float
+; X64-LFENCE-NEXT: xorps %xmm0, %xmm0
+; X64-LFENCE-NEXT: cvtsi2sdl (%r14), %xmm0
+; X64-LFENCE-NEXT: callq sink_double
+; X64-LFENCE-NEXT: addq $8, %rsp
+; X64-LFENCE-NEXT: popq %rbx
+; X64-LFENCE-NEXT: popq %r12
+; X64-LFENCE-NEXT: popq %r14
+; X64-LFENCE-NEXT: popq %r15
+; X64-LFENCE-NEXT: retq
+entry:
+ %f1 = load float, float* %fptr
+ call void @sink_float(float %f1)
+ %d1 = load double, double* %dptr
+ call void @sink_double(double %d1)
+ %f2.d = load double, double* %dptr
+ %f2 = fptrunc double %f2.d to float
+ call void @sink_float(float %f2)
+ %d2.f = load float, float* %fptr
+ %d2 = fpext float %d2.f to double
+ call void @sink_double(double %d2)
+ %f3.i = load i32, i32* %i32ptr
+ %f3 = sitofp i32 %f3.i to float
+ call void @sink_float(float %f3)
+ %d3.i = load i64, i64* %i64ptr
+ %d3 = sitofp i64 %d3.i to double
+ call void @sink_double(double %d3)
+ %f4.i = load i64, i64* %i64ptr
+ %f4 = sitofp i64 %f4.i to float
+ call void @sink_float(float %f4)
+ %d4.i = load i32, i32* %i32ptr
+ %d4 = sitofp i32 %d4.i to double
+ call void @sink_double(double %d4)
+ ret void
+}
+
+declare void @sink_v4f32(<4 x float>)
+declare void @sink_v2f64(<2 x double>)
+declare void @sink_v16i8(<16 x i8>)
+declare void @sink_v8i16(<8 x i16>)
+declare void @sink_v4i32(<4 x i32>)
+declare void @sink_v2i64(<2 x i64>)
+
+; Test loads of vectors.
+define void @test_vec_loads(<4 x float>* %v4f32ptr, <2 x double>* %v2f64ptr, <16 x i8>* %v16i8ptr, <8 x i16>* %v8i16ptr, <4 x i32>* %v4i32ptr, <2 x i64>* %v2i64ptr) nounwind {
+; X64-LABEL: test_vec_loads:
+; X64: # %bb.0: # %entry
+; X64-NEXT: pushq %r15
+; X64-NEXT: pushq %r14
+; X64-NEXT: pushq %r13
+; X64-NEXT: pushq %r12
+; X64-NEXT: pushq %rbx
+; X64-NEXT: movq %rsp, %rax
+; X64-NEXT: movq %r9, %r14
+; X64-NEXT: movq %r8, %r15
+; X64-NEXT: movq %rcx, %r12
+; X64-NEXT: movq %rdx, %r13
+; X64-NEXT: movq %rsi, %rbx
+; X64-NEXT: movq $-1, %rcx
+; X64-NEXT: sarq $63, %rax
+; X64-NEXT: orq %rax, %rdi
+; X64-NEXT: movaps (%rdi), %xmm0
+; X64-NEXT: shlq $47, %rax
+; X64-NEXT: orq %rax, %rsp
+; X64-NEXT: callq sink_v4f32
+; X64-NEXT: movq %rsp, %rax
+; X64-NEXT: sarq $63, %rax
+; X64-NEXT: orq %rax, %rbx
+; X64-NEXT: movaps (%rbx), %xmm0
+; X64-NEXT: shlq $47, %rax
+; X64-NEXT: orq %rax, %rsp
+; X64-NEXT: callq sink_v2f64
+; X64-NEXT: movq %rsp, %rax
+; X64-NEXT: sarq $63, %rax
+; X64-NEXT: orq %rax, %r13
+; X64-NEXT: movaps (%r13), %xmm0
+; X64-NEXT: shlq $47, %rax
+; X64-NEXT: orq %rax, %rsp
+; X64-NEXT: callq sink_v16i8
+; X64-NEXT: movq %rsp, %rax
+; X64-NEXT: sarq $63, %rax
+; X64-NEXT: orq %rax, %r12
+; X64-NEXT: movaps (%r12), %xmm0
+; X64-NEXT: shlq $47, %rax
+; X64-NEXT: orq %rax, %rsp
+; X64-NEXT: callq sink_v8i16
+; X64-NEXT: movq %rsp, %rax
+; X64-NEXT: sarq $63, %rax
+; X64-NEXT: orq %rax, %r15
+; X64-NEXT: movaps (%r15), %xmm0
+; X64-NEXT: shlq $47, %rax
+; X64-NEXT: orq %rax, %rsp
+; X64-NEXT: callq sink_v4i32
+; X64-NEXT: movq %rsp, %rax
+; X64-NEXT: sarq $63, %rax
+; X64-NEXT: orq %rax, %r14
+; X64-NEXT: movaps (%r14), %xmm0
+; X64-NEXT: shlq $47, %rax
+; X64-NEXT: orq %rax, %rsp
+; X64-NEXT: callq sink_v2i64
+; X64-NEXT: movq %rsp, %rax
+; X64-NEXT: sarq $63, %rax
+; X64-NEXT: shlq $47, %rax
+; X64-NEXT: orq %rax, %rsp
+; X64-NEXT: popq %rbx
+; X64-NEXT: popq %r12
+; X64-NEXT: popq %r13
+; X64-NEXT: popq %r14
+; X64-NEXT: popq %r15
+; X64-NEXT: retq
+;
+; X64-LFENCE-LABEL: test_vec_loads:
+; X64-LFENCE: # %bb.0: # %entry
+; X64-LFENCE-NEXT: pushq %r15
+; X64-LFENCE-NEXT: pushq %r14
+; X64-LFENCE-NEXT: pushq %r13
+; X64-LFENCE-NEXT: pushq %r12
+; X64-LFENCE-NEXT: pushq %rbx
+; X64-LFENCE-NEXT: movq %r9, %r14
+; X64-LFENCE-NEXT: movq %r8, %r15
+; X64-LFENCE-NEXT: movq %rcx, %r12
+; X64-LFENCE-NEXT: movq %rdx, %r13
+; X64-LFENCE-NEXT: movq %rsi, %rbx
+; X64-LFENCE-NEXT: movaps (%rdi), %xmm0
+; X64-LFENCE-NEXT: callq sink_v4f32
+; X64-LFENCE-NEXT: movaps (%rbx), %xmm0
+; X64-LFENCE-NEXT: callq sink_v2f64
+; X64-LFENCE-NEXT: movaps (%r13), %xmm0
+; X64-LFENCE-NEXT: callq sink_v16i8
+; X64-LFENCE-NEXT: movaps (%r12), %xmm0
+; X64-LFENCE-NEXT: callq sink_v8i16
+; X64-LFENCE-NEXT: movaps (%r15), %xmm0
+; X64-LFENCE-NEXT: callq sink_v4i32
+; X64-LFENCE-NEXT: movaps (%r14), %xmm0
+; X64-LFENCE-NEXT: callq sink_v2i64
+; X64-LFENCE-NEXT: popq %rbx
+; X64-LFENCE-NEXT: popq %r12
+; X64-LFENCE-NEXT: popq %r13
+; X64-LFENCE-NEXT: popq %r14
+; X64-LFENCE-NEXT: popq %r15
+; X64-LFENCE-NEXT: retq
+entry:
+ %x1 = load <4 x float>, <4 x float>* %v4f32ptr
+ call void @sink_v4f32(<4 x float> %x1)
+ %x2 = load <2 x double>, <2 x double>* %v2f64ptr
+ call void @sink_v2f64(<2 x double> %x2)
+ %x3 = load <16 x i8>, <16 x i8>* %v16i8ptr
+ call void @sink_v16i8(<16 x i8> %x3)
+ %x4 = load <8 x i16>, <8 x i16>* %v8i16ptr
+ call void @sink_v8i16(<8 x i16> %x4)
+ %x5 = load <4 x i32>, <4 x i32>* %v4i32ptr
+ call void @sink_v4i32(<4 x i32> %x5)
+ %x6 = load <2 x i64>, <2 x i64>* %v2i64ptr
+ call void @sink_v2i64(<2 x i64> %x6)
+ ret void
+}