--- /dev/null
+//===-- X86SpeculativeExecutionSideEffectSuppression.cpp ------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file contains the X86 implementation of the speculative execution side
+/// effect suppression mitigation.
+///
+/// This must be used with the -mlvi-cfi flag in order to mitigate indirect
+/// branches and returns.
+//===----------------------------------------------------------------------===//
+
+#include "X86.h"
+#include "X86InstrInfo.h"
+#include "X86Subtarget.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/Pass.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "x86-seses"
+
+STATISTIC(NumLFENCEsInserted, "Number of lfence instructions inserted");
+
+static cl::opt<bool> EnableSpeculativeExecutionSideEffectSuppression(
+ "x86-seses-enable",
+ cl::desc("Force enable speculative execution side effect suppresion. "
+ "(Note: User must pass -mlvi-cfi in order to mitigate indirect "
+ "branches and returns.)"),
+ cl::init(false), cl::Hidden);
+
+static cl::opt<bool> OneLFENCEPerBasicBlock(
+ "x86-seses-one-lfence-per-bb",
+ cl::desc(
+ "Omit all lfences other than the first to be placed in a basic block."),
+ cl::init(false), cl::Hidden);
+
+static cl::opt<bool> OnlyLFENCENonConst(
+ "x86-seses-only-lfence-non-const",
+ cl::desc("Only lfence before groups of terminators where at least one "
+ "branch instruction has an input to the addressing mode that is a "
+ "register other than %rip."),
+ cl::init(false), cl::Hidden);
+
+static cl::opt<bool>
+ OmitBranchLFENCEs("x86-seses-omit-branch-lfences",
+ cl::desc("Omit all lfences before branch instructions."),
+ cl::init(false), cl::Hidden);
+
+namespace {
+
+class X86SpeculativeExecutionSideEffectSuppression
+ : public MachineFunctionPass {
+public:
+ X86SpeculativeExecutionSideEffectSuppression() : MachineFunctionPass(ID) {}
+
+ static char ID;
+ StringRef getPassName() const override {
+ return "X86 Speculative Execution Side Effect Suppression";
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+};
+} // namespace
+
+char X86SpeculativeExecutionSideEffectSuppression::ID = 0;
+
+// This function returns whether the passed instruction uses a memory addressing
+// mode that is constant. We treat all memory addressing modes that read
+// from a register that is not %rip as non-constant. Note that the use
+// of the EFLAGS register results in an addressing mode being considered
+// non-constant, therefore all JCC instructions will return false from this
+// function since one of their operands will always be the EFLAGS register.
+static bool hasConstantAddressingMode(const MachineInstr &MI) {
+ for (const MachineOperand &MO : MI.uses())
+ if (MO.isReg() && X86::RIP != MO.getReg())
+ return false;
+ return true;
+}
+
+bool X86SpeculativeExecutionSideEffectSuppression::runOnMachineFunction(
+ MachineFunction &MF) {
+ if (!EnableSpeculativeExecutionSideEffectSuppression)
+ return false;
+
+ LLVM_DEBUG(dbgs() << "********** " << getPassName() << " : " << MF.getName()
+ << " **********\n");
+ bool Modified = false;
+ const X86Subtarget &Subtarget = MF.getSubtarget<X86Subtarget>();
+ const X86InstrInfo *TII = Subtarget.getInstrInfo();
+ for (MachineBasicBlock &MBB : MF) {
+ MachineInstr *FirstTerminator = nullptr;
+
+ for (auto &MI : MBB) {
+ // We want to put an LFENCE before any instruction that
+ // may load or store. This LFENCE is intended to avoid leaking any secret
+ // data due to a given load or store. This results in closing the cache
+ // and memory timing side channels. We will treat terminators that load
+ // or store separately.
+ if (MI.mayLoadOrStore() && !MI.isTerminator()) {
+ BuildMI(MBB, MI, DebugLoc(), TII->get(X86::LFENCE));
+ NumLFENCEsInserted++;
+ Modified = true;
+ if (OneLFENCEPerBasicBlock)
+ break;
+ }
+ // The following section will be LFENCEing before groups of terminators
+ // that include branches. This will close the branch prediction side
+ // channels since we will prevent code executing after misspeculation as
+ // a result of the LFENCEs placed with this logic.
+
+ // Keep track of the first terminator in a basic block since if we need
+ // to LFENCE the terminators in this basic block we must add the
+ // instruction before the first terminator in the basic block (as
+ // opposed to before the terminator that indicates an LFENCE is
+ // required). An example of why this is necessary is that the
+ // X86InstrInfo::analyzeBranch method assumes all terminators are grouped
+ // together and terminates it's analysis once the first non-termintor
+ // instruction is found.
+ if (MI.isTerminator() && FirstTerminator == nullptr)
+ FirstTerminator = &MI;
+
+ // Look for branch instructions that will require an LFENCE to be put
+ // before this basic block's terminators.
+ if (!MI.isBranch() || OmitBranchLFENCEs)
+ // This isn't a branch or we're not putting LFENCEs before branches.
+ continue;
+
+ if (OnlyLFENCENonConst && hasConstantAddressingMode(MI))
+ // This is a branch, but it only has constant addressing mode and we're
+ // not adding LFENCEs before such branches.
+ continue;
+
+ // This branch requires adding an LFENCE.
+ BuildMI(MBB, FirstTerminator, DebugLoc(), TII->get(X86::LFENCE));
+ NumLFENCEsInserted++;
+ Modified = true;
+ break;
+ }
+ }
+
+ return Modified;
+}
+
+FunctionPass *llvm::createX86SpeculativeExecutionSideEffectSuppression() {
+ return new X86SpeculativeExecutionSideEffectSuppression();
+}
+
+INITIALIZE_PASS(X86SpeculativeExecutionSideEffectSuppression, "x86-seses",
+ "X86 Speculative Execution Side Effect Suppresion", false,
+ false)
--- /dev/null
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=x86_64-unknown-linux-gnu -x86-seses-enable %s -o - | FileCheck %s
+; RUN: llc -mtriple=x86_64-unknown-linux-gnu -x86-seses-enable -x86-seses-one-lfence-per-bb %s -o - | FileCheck %s --check-prefix=X86-ONE-LFENCE
+; RUN: llc -mtriple=x86_64-unknown-linux-gnu -x86-seses-enable -x86-seses-omit-branch-lfences %s -o - | FileCheck %s --check-prefix=X86-OMIT-BR
+; RUN: llc -mtriple=x86_64-unknown-linux-gnu -x86-seses-enable -x86-seses-only-lfence-non-const %s -o - | FileCheck %s --check-prefix=X86-NON-CONST
+
+define void @_Z4buzzv() {
+; CHECK-LABEL: _Z4buzzv:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: lfence
+; CHECK-NEXT: movl $10, -{{[0-9]+}}(%rsp)
+; CHECK-NEXT: retq
+;
+; X86-ONE-LFENCE-LABEL: _Z4buzzv:
+; X86-ONE-LFENCE: # %bb.0: # %entry
+; X86-ONE-LFENCE-NEXT: lfence
+; X86-ONE-LFENCE-NEXT: movl $10, -{{[0-9]+}}(%rsp)
+; X86-ONE-LFENCE-NEXT: retq
+;
+; X86-OMIT-BR-LABEL: _Z4buzzv:
+; X86-OMIT-BR: # %bb.0: # %entry
+; X86-OMIT-BR-NEXT: lfence
+; X86-OMIT-BR-NEXT: movl $10, -{{[0-9]+}}(%rsp)
+; X86-OMIT-BR-NEXT: retq
+;
+; X86-NON-CONST-LABEL: _Z4buzzv:
+; X86-NON-CONST: # %bb.0: # %entry
+; X86-NON-CONST-NEXT: lfence
+; X86-NON-CONST-NEXT: movl $10, -{{[0-9]+}}(%rsp)
+; X86-NON-CONST-NEXT: retq
+entry:
+ %a = alloca i32, align 4
+ store i32 10, i32* %a, align 4
+ ret void
+}
+
+define i32 @_Z3barPi(i32* %p) {
+; CHECK-LABEL: _Z3barPi:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: lfence
+; CHECK-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
+; CHECK-NEXT: lfence
+; CHECK-NEXT: movl $4, -{{[0-9]+}}(%rsp)
+; CHECK-NEXT: lfence
+; CHECK-NEXT: cmpl $3, (%rdi)
+; CHECK-NEXT: lfence
+; CHECK-NEXT: jg .LBB1_2
+; CHECK-NEXT: # %bb.1: # %if.then
+; CHECK-NEXT: lfence
+; CHECK-NEXT: movq -{{[0-9]+}}(%rsp), %rax
+; CHECK-NEXT: lfence
+; CHECK-NEXT: movslq (%rax), %rax
+; CHECK-NEXT: lfence
+; CHECK-NEXT: movl -24(%rsp,%rax,4), %eax
+; CHECK-NEXT: lfence
+; CHECK-NEXT: movl %eax, -{{[0-9]+}}(%rsp)
+; CHECK-NEXT: lfence
+; CHECK-NEXT: movl -{{[0-9]+}}(%rsp), %eax
+; CHECK-NEXT: retq
+; CHECK-NEXT: .LBB1_2: # %if.else
+; CHECK-NEXT: lfence
+; CHECK-NEXT: movl $-1, -{{[0-9]+}}(%rsp)
+; CHECK-NEXT: lfence
+; CHECK-NEXT: movl -{{[0-9]+}}(%rsp), %eax
+; CHECK-NEXT: retq
+;
+; X86-ONE-LFENCE-LABEL: _Z3barPi:
+; X86-ONE-LFENCE: # %bb.0: # %entry
+; X86-ONE-LFENCE-NEXT: lfence
+; X86-ONE-LFENCE-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
+; X86-ONE-LFENCE-NEXT: movl $4, -{{[0-9]+}}(%rsp)
+; X86-ONE-LFENCE-NEXT: cmpl $3, (%rdi)
+; X86-ONE-LFENCE-NEXT: jg .LBB1_2
+; X86-ONE-LFENCE-NEXT: # %bb.1: # %if.then
+; X86-ONE-LFENCE-NEXT: lfence
+; X86-ONE-LFENCE-NEXT: movq -{{[0-9]+}}(%rsp), %rax
+; X86-ONE-LFENCE-NEXT: movslq (%rax), %rax
+; X86-ONE-LFENCE-NEXT: movl -24(%rsp,%rax,4), %eax
+; X86-ONE-LFENCE-NEXT: movl %eax, -{{[0-9]+}}(%rsp)
+; X86-ONE-LFENCE-NEXT: movl -{{[0-9]+}}(%rsp), %eax
+; X86-ONE-LFENCE-NEXT: retq
+; X86-ONE-LFENCE-NEXT: .LBB1_2: # %if.else
+; X86-ONE-LFENCE-NEXT: lfence
+; X86-ONE-LFENCE-NEXT: movl $-1, -{{[0-9]+}}(%rsp)
+; X86-ONE-LFENCE-NEXT: movl -{{[0-9]+}}(%rsp), %eax
+; X86-ONE-LFENCE-NEXT: retq
+;
+; X86-OMIT-BR-LABEL: _Z3barPi:
+; X86-OMIT-BR: # %bb.0: # %entry
+; X86-OMIT-BR-NEXT: lfence
+; X86-OMIT-BR-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
+; X86-OMIT-BR-NEXT: lfence
+; X86-OMIT-BR-NEXT: movl $4, -{{[0-9]+}}(%rsp)
+; X86-OMIT-BR-NEXT: lfence
+; X86-OMIT-BR-NEXT: cmpl $3, (%rdi)
+; X86-OMIT-BR-NEXT: jg .LBB1_2
+; X86-OMIT-BR-NEXT: # %bb.1: # %if.then
+; X86-OMIT-BR-NEXT: lfence
+; X86-OMIT-BR-NEXT: movq -{{[0-9]+}}(%rsp), %rax
+; X86-OMIT-BR-NEXT: lfence
+; X86-OMIT-BR-NEXT: movslq (%rax), %rax
+; X86-OMIT-BR-NEXT: lfence
+; X86-OMIT-BR-NEXT: movl -24(%rsp,%rax,4), %eax
+; X86-OMIT-BR-NEXT: lfence
+; X86-OMIT-BR-NEXT: movl %eax, -{{[0-9]+}}(%rsp)
+; X86-OMIT-BR-NEXT: lfence
+; X86-OMIT-BR-NEXT: movl -{{[0-9]+}}(%rsp), %eax
+; X86-OMIT-BR-NEXT: retq
+; X86-OMIT-BR-NEXT: .LBB1_2: # %if.else
+; X86-OMIT-BR-NEXT: lfence
+; X86-OMIT-BR-NEXT: movl $-1, -{{[0-9]+}}(%rsp)
+; X86-OMIT-BR-NEXT: lfence
+; X86-OMIT-BR-NEXT: movl -{{[0-9]+}}(%rsp), %eax
+; X86-OMIT-BR-NEXT: retq
+;
+; X86-NON-CONST-LABEL: _Z3barPi:
+; X86-NON-CONST: # %bb.0: # %entry
+; X86-NON-CONST-NEXT: lfence
+; X86-NON-CONST-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
+; X86-NON-CONST-NEXT: lfence
+; X86-NON-CONST-NEXT: movl $4, -{{[0-9]+}}(%rsp)
+; X86-NON-CONST-NEXT: lfence
+; X86-NON-CONST-NEXT: cmpl $3, (%rdi)
+; X86-NON-CONST-NEXT: lfence
+; X86-NON-CONST-NEXT: jg .LBB1_2
+; X86-NON-CONST-NEXT: # %bb.1: # %if.then
+; X86-NON-CONST-NEXT: lfence
+; X86-NON-CONST-NEXT: movq -{{[0-9]+}}(%rsp), %rax
+; X86-NON-CONST-NEXT: lfence
+; X86-NON-CONST-NEXT: movslq (%rax), %rax
+; X86-NON-CONST-NEXT: lfence
+; X86-NON-CONST-NEXT: movl -24(%rsp,%rax,4), %eax
+; X86-NON-CONST-NEXT: lfence
+; X86-NON-CONST-NEXT: movl %eax, -{{[0-9]+}}(%rsp)
+; X86-NON-CONST-NEXT: lfence
+; X86-NON-CONST-NEXT: movl -{{[0-9]+}}(%rsp), %eax
+; X86-NON-CONST-NEXT: retq
+; X86-NON-CONST-NEXT: .LBB1_2: # %if.else
+; X86-NON-CONST-NEXT: lfence
+; X86-NON-CONST-NEXT: movl $-1, -{{[0-9]+}}(%rsp)
+; X86-NON-CONST-NEXT: lfence
+; X86-NON-CONST-NEXT: movl -{{[0-9]+}}(%rsp), %eax
+; X86-NON-CONST-NEXT: retq
+entry:
+ %retval = alloca i32, align 4
+ %p.addr = alloca i32*, align 8
+ %a = alloca [4 x i32], align 16
+ %len = alloca i32, align 4
+ store i32* %p, i32** %p.addr, align 8
+ %0 = bitcast [4 x i32]* %a to i8*
+ store i32 4, i32* %len, align 4
+ %1 = load i32*, i32** %p.addr, align 8
+ %2 = load i32, i32* %1, align 4
+ %3 = load i32, i32* %len, align 4
+ %cmp = icmp slt i32 %2, %3
+ br i1 %cmp, label %if.then, label %if.else
+
+if.then: ; preds = %entry
+ %4 = load i32*, i32** %p.addr, align 8
+ %5 = load i32, i32* %4, align 4
+ %idxprom = sext i32 %5 to i64
+ %arrayidx = getelementptr inbounds [4 x i32], [4 x i32]* %a, i64 0, i64 %idxprom
+ %6 = load i32, i32* %arrayidx, align 4
+ store i32 %6, i32* %retval, align 4
+ br label %return
+
+if.else: ; preds = %entry
+ store i32 -1, i32* %retval, align 4
+ br label %return
+
+return: ; preds = %if.else, %if.then
+ %7 = load i32, i32* %retval, align 4
+ ret i32 %7
+}
+
+define i32 (i32*)* @_Z3bazv() {
+; CHECK-LABEL: _Z3bazv:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: lfence
+; CHECK-NEXT: movq $_Z3barPi, -{{[0-9]+}}(%rsp)
+; CHECK-NEXT: lfence
+; CHECK-NEXT: #APP
+; CHECK-NEXT: #NO_APP
+; CHECK-NEXT: lfence
+; CHECK-NEXT: movq -{{[0-9]+}}(%rsp), %rax
+; CHECK-NEXT: retq
+;
+; X86-ONE-LFENCE-LABEL: _Z3bazv:
+; X86-ONE-LFENCE: # %bb.0: # %entry
+; X86-ONE-LFENCE-NEXT: lfence
+; X86-ONE-LFENCE-NEXT: movq $_Z3barPi, -{{[0-9]+}}(%rsp)
+; X86-ONE-LFENCE-NEXT: #APP
+; X86-ONE-LFENCE-NEXT: #NO_APP
+; X86-ONE-LFENCE-NEXT: movq -{{[0-9]+}}(%rsp), %rax
+; X86-ONE-LFENCE-NEXT: retq
+;
+; X86-OMIT-BR-LABEL: _Z3bazv:
+; X86-OMIT-BR: # %bb.0: # %entry
+; X86-OMIT-BR-NEXT: lfence
+; X86-OMIT-BR-NEXT: movq $_Z3barPi, -{{[0-9]+}}(%rsp)
+; X86-OMIT-BR-NEXT: lfence
+; X86-OMIT-BR-NEXT: #APP
+; X86-OMIT-BR-NEXT: #NO_APP
+; X86-OMIT-BR-NEXT: lfence
+; X86-OMIT-BR-NEXT: movq -{{[0-9]+}}(%rsp), %rax
+; X86-OMIT-BR-NEXT: retq
+;
+; X86-NON-CONST-LABEL: _Z3bazv:
+; X86-NON-CONST: # %bb.0: # %entry
+; X86-NON-CONST-NEXT: lfence
+; X86-NON-CONST-NEXT: movq $_Z3barPi, -{{[0-9]+}}(%rsp)
+; X86-NON-CONST-NEXT: lfence
+; X86-NON-CONST-NEXT: #APP
+; X86-NON-CONST-NEXT: #NO_APP
+; X86-NON-CONST-NEXT: lfence
+; X86-NON-CONST-NEXT: movq -{{[0-9]+}}(%rsp), %rax
+; X86-NON-CONST-NEXT: retq
+entry:
+ %p = alloca i32 (i32*)*, align 8
+ store i32 (i32*)* @_Z3barPi, i32 (i32*)** %p, align 8
+ call void asm sideeffect "", "=*m,*m,~{dirflag},~{fpsr},~{flags}"(i32 (i32*)** %p, i32 (i32*)** %p) #3, !srcloc !2
+ %0 = load i32 (i32*)*, i32 (i32*)** %p, align 8
+ ret i32 (i32*)* %0
+}
+
+define void @_Z3fooPi(i32* %p) {
+; CHECK-LABEL: _Z3fooPi:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: subq $24, %rsp
+; CHECK-NEXT: .cfi_def_cfa_offset 32
+; CHECK-NEXT: lfence
+; CHECK-NEXT: movq %rdi, {{[0-9]+}}(%rsp)
+; CHECK-NEXT: callq _Z3bazv
+; CHECK-NEXT: lfence
+; CHECK-NEXT: movq %rax, {{[0-9]+}}(%rsp)
+; CHECK-NEXT: lfence
+; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %rdi
+; CHECK-NEXT: callq *%rax
+; CHECK-NEXT: addq $24, %rsp
+; CHECK-NEXT: .cfi_def_cfa_offset 8
+; CHECK-NEXT: retq
+;
+; X86-ONE-LFENCE-LABEL: _Z3fooPi:
+; X86-ONE-LFENCE: # %bb.0: # %entry
+; X86-ONE-LFENCE-NEXT: subq $24, %rsp
+; X86-ONE-LFENCE-NEXT: .cfi_def_cfa_offset 32
+; X86-ONE-LFENCE-NEXT: lfence
+; X86-ONE-LFENCE-NEXT: movq %rdi, {{[0-9]+}}(%rsp)
+; X86-ONE-LFENCE-NEXT: callq _Z3bazv
+; X86-ONE-LFENCE-NEXT: movq %rax, {{[0-9]+}}(%rsp)
+; X86-ONE-LFENCE-NEXT: movq {{[0-9]+}}(%rsp), %rdi
+; X86-ONE-LFENCE-NEXT: callq *%rax
+; X86-ONE-LFENCE-NEXT: addq $24, %rsp
+; X86-ONE-LFENCE-NEXT: .cfi_def_cfa_offset 8
+; X86-ONE-LFENCE-NEXT: retq
+;
+; X86-OMIT-BR-LABEL: _Z3fooPi:
+; X86-OMIT-BR: # %bb.0: # %entry
+; X86-OMIT-BR-NEXT: subq $24, %rsp
+; X86-OMIT-BR-NEXT: .cfi_def_cfa_offset 32
+; X86-OMIT-BR-NEXT: lfence
+; X86-OMIT-BR-NEXT: movq %rdi, {{[0-9]+}}(%rsp)
+; X86-OMIT-BR-NEXT: callq _Z3bazv
+; X86-OMIT-BR-NEXT: lfence
+; X86-OMIT-BR-NEXT: movq %rax, {{[0-9]+}}(%rsp)
+; X86-OMIT-BR-NEXT: lfence
+; X86-OMIT-BR-NEXT: movq {{[0-9]+}}(%rsp), %rdi
+; X86-OMIT-BR-NEXT: callq *%rax
+; X86-OMIT-BR-NEXT: addq $24, %rsp
+; X86-OMIT-BR-NEXT: .cfi_def_cfa_offset 8
+; X86-OMIT-BR-NEXT: retq
+;
+; X86-NON-CONST-LABEL: _Z3fooPi:
+; X86-NON-CONST: # %bb.0: # %entry
+; X86-NON-CONST-NEXT: subq $24, %rsp
+; X86-NON-CONST-NEXT: .cfi_def_cfa_offset 32
+; X86-NON-CONST-NEXT: lfence
+; X86-NON-CONST-NEXT: movq %rdi, {{[0-9]+}}(%rsp)
+; X86-NON-CONST-NEXT: callq _Z3bazv
+; X86-NON-CONST-NEXT: lfence
+; X86-NON-CONST-NEXT: movq %rax, {{[0-9]+}}(%rsp)
+; X86-NON-CONST-NEXT: lfence
+; X86-NON-CONST-NEXT: movq {{[0-9]+}}(%rsp), %rdi
+; X86-NON-CONST-NEXT: callq *%rax
+; X86-NON-CONST-NEXT: addq $24, %rsp
+; X86-NON-CONST-NEXT: .cfi_def_cfa_offset 8
+; X86-NON-CONST-NEXT: retq
+entry:
+ %p.addr = alloca i32*, align 8
+ %t = alloca i32 (i32*)*, align 8
+ store i32* %p, i32** %p.addr, align 8
+ %call = call i32 (i32*)* @_Z3bazv()
+ store i32 (i32*)* %call, i32 (i32*)** %t, align 8
+ %0 = load i32 (i32*)*, i32 (i32*)** %t, align 8
+ %1 = load i32*, i32** %p.addr, align 8
+ %call1 = call i32 %0(i32* %1)
+ ret void
+}
+
+!2 = !{i32 233}