/// Description: This pass finds Load Value Injection (LVI) gadgets consisting
/// of a load from memory (i.e., SOURCE), and any operation that may transmit
/// the value loaded from memory over a covert channel, or use the value loaded
-/// from memory to determine a branch/call target (i.e., SINK).
+/// from memory to determine a branch/call target (i.e., SINK). After finding
+/// all such gadgets in a given function, the pass minimally inserts LFENCE
+/// instructions in such a manner that the following property is satisfied: for
+/// all SOURCE+SINK pairs, all paths in the CFG from SOURCE to SINK contain at
+/// least one LFENCE instruction. The algorithm that implements this minimal
+/// insertion is influenced by an academic paper that minimally inserts memory
+/// fences for high-performance concurrent programs:
+/// http://www.cs.ucr.edu/~lesani/companion/oopsla15/OOPSLA15.pdf
+/// The algorithm implemented in this pass is as follows:
+/// 1. Build a condensed CFG (i.e., a GadgetGraph) consisting only of the
+/// following components:
+/// - SOURCE instructions (also includes function arguments)
+/// - SINK instructions
+/// - Basic block entry points
+/// - Basic block terminators
+/// - LFENCE instructions
+/// 2. Analyze the GadgetGraph to determine which SOURCE+SINK pairs (i.e.,
+/// gadgets) are already mitigated by existing LFENCEs. If all gadgets have been
+/// mitigated, go to step 6.
+/// 3. Use a heuristic or plugin to approximate minimal LFENCE insertion.
+/// 4. Insert one LFENCE along each CFG edge that was cut in step 3.
+/// 5. Go to step 2.
+/// 6. If any LFENCEs were inserted, return `true` from runOnFunction() to tell
+/// LLVM that the function was modified.
///
//===----------------------------------------------------------------------===//
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/DOTGraphTraits.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/DynamicLibrary.h"
#include "llvm/Support/GraphWriter.h"
#include "llvm/Support/raw_ostream.h"
#define PASS_KEY "x86-lvi-load"
#define DEBUG_TYPE PASS_KEY
+STATISTIC(NumFences, "Number of LFENCEs inserted for LVI mitigation");
STATISTIC(NumFunctionsConsidered, "Number of functions analyzed");
STATISTIC(NumFunctionsMitigated, "Number of functions for which mitigations "
"were deployed");
STATISTIC(NumGadgets, "Number of LVI gadgets detected during analysis");
+static cl::opt<std::string> OptimizePluginPath(
+ PASS_KEY "-opt-plugin",
+ cl::desc("Specify a plugin to optimize LFENCE insertion"), cl::Hidden);
+
static cl::opt<bool> NoConditionalBranches(
PASS_KEY "-no-cbranch",
cl::desc("Don't treat conditional branches as disclosure gadgets. This "
"may improve performance, at the cost of security."),
cl::init(false), cl::Hidden);
+static llvm::sys::DynamicLibrary OptimizeDL{};
+typedef int (*OptimizeCutT)(unsigned int *nodes, unsigned int nodes_size,
+ unsigned int *edges, int *edge_values,
+ int *cut_edges /* out */, unsigned int edges_size);
+static OptimizeCutT OptimizeCut = nullptr;
+
#define ARG_NODE nullptr
#define GADGET_EDGE ((int)(-1))
#define WEIGHT(EdgeValue) ((double)(2 * (EdgeValue) + 1))
getGadgetGraph(MachineFunction &MF, const MachineLoopInfo &MLI,
const MachineDominatorTree &MDT,
const MachineDominanceFrontier &MDF, bool FixedLoads) const;
+ std::unique_ptr<MachineGadgetGraph>
+ elimEdges(std::unique_ptr<MachineGadgetGraph> Graph) const;
+ void cutEdges(MachineGadgetGraph &G, EdgeSet &CutEdges /* out */) const;
+ int insertFences(MachineGadgetGraph &G,
+ EdgeSet &CutEdges /* in, out */) const;
bool instrUsesRegToAccessMemory(const MachineInstr &I, unsigned Reg) const;
bool instrUsesRegToBranch(const MachineInstr &I, unsigned Reg) const;
TII = STI->getInstrInfo();
TRI = STI->getRegisterInfo();
LLVM_DEBUG(dbgs() << "Hardening data-dependent loads...\n");
- hardenLoads(MF, false);
+ int FencesInserted = hardenLoads(MF, false);
LLVM_DEBUG(dbgs() << "Hardening data-dependent loads... Done\n");
if (!NoFixedLoads) {
LLVM_DEBUG(dbgs() << "Hardening fixed loads...\n");
- hardenLoads(MF, true);
+ FencesInserted += hardenLoads(MF, true);
LLVM_DEBUG(dbgs() << "Hardening fixed loads... Done\n");
}
- return false;
+ if (FencesInserted > 0)
+ ++NumFunctionsMitigated;
+ NumFences += FencesInserted;
+ return (FencesInserted > 0);
}
// Apply the mitigation to `MF`, return the number of fences inserted.
// loads; otherwise, mitigation will be applied to non-fixed loads.
int X86LoadValueInjectionLoadHardeningPass::hardenLoads(MachineFunction &MF,
bool FixedLoads) const {
+ int FencesInserted = 0;
+
LLVM_DEBUG(dbgs() << "Building gadget graph...\n");
const auto &MLI = getAnalysis<MachineLoopInfo>();
const auto &MDT = getAnalysis<MachineDominatorTree>();
return 0;
}
- return 0;
+ do {
+ LLVM_DEBUG(dbgs() << "Eliminating mitigated paths...\n");
+ std::unique_ptr<MachineGadgetGraph> ElimGraph = elimEdges(std::move(Graph));
+ LLVM_DEBUG(dbgs() << "Eliminating mitigated paths... Done\n");
+ if (ElimGraph->NumGadgets == 0)
+ break;
+
+ EdgeSet CutEdges{*ElimGraph};
+ LLVM_DEBUG(dbgs() << "Cutting edges...\n");
+ cutEdges(*ElimGraph, CutEdges);
+ LLVM_DEBUG(dbgs() << "Cutting edges... Done\n");
+
+ LLVM_DEBUG(dbgs() << "Inserting LFENCEs...\n");
+ FencesInserted += insertFences(*ElimGraph, CutEdges);
+ LLVM_DEBUG(dbgs() << "Inserting LFENCEs... Done\n");
+
+ Graph.reset(GraphBuilder::trim(
+ *ElimGraph, MachineGadgetGraph::NodeSet{*ElimGraph}, CutEdges));
+ } while (true);
+
+ return FencesInserted;
}
std::unique_ptr<X86LoadValueInjectionLoadHardeningPass::MachineGadgetGraph>
return G;
}
+std::unique_ptr<X86LoadValueInjectionLoadHardeningPass::MachineGadgetGraph>
+X86LoadValueInjectionLoadHardeningPass::elimEdges(
+ std::unique_ptr<MachineGadgetGraph> Graph) const {
+ MachineGadgetGraph::NodeSet ElimNodes{*Graph};
+ MachineGadgetGraph::EdgeSet ElimEdges{*Graph};
+
+ if (Graph->NumFences > 0) { // eliminate fences
+ for (auto EI = Graph->edges_begin(), EE = Graph->edges_end(); EI != EE;
+ ++EI) {
+ GTraits::NodeRef Dest = GTraits::edge_dest(*EI);
+ if (isFence(Dest->value())) {
+ ElimNodes.insert(Dest);
+ ElimEdges.insert(EI);
+ std::for_each(
+ GTraits::child_edge_begin(Dest), GTraits::child_edge_end(Dest),
+ [&ElimEdges](GTraits::EdgeRef E) { ElimEdges.insert(&E); });
+ }
+ }
+ LLVM_DEBUG(dbgs() << "Eliminated " << ElimNodes.count()
+ << " fence nodes\n");
+ }
+
+ // eliminate gadget edges that are mitigated
+ int NumGadgets = 0;
+ MachineGadgetGraph::NodeSet Visited{*Graph}, GadgetSinks{*Graph};
+ MachineGadgetGraph::EdgeSet ElimGadgets{*Graph};
+ for (auto NI = GTraits::nodes_begin(Graph.get()),
+ NE = GTraits::nodes_end(Graph.get());
+ NI != NE; ++NI) {
+ // collect the gadgets for this node
+ for (auto EI = GTraits::child_edge_begin(*NI),
+ EE = GTraits::child_edge_end(*NI);
+ EI != EE; ++EI) {
+ if (MachineGadgetGraph::isGadgetEdge(*EI)) {
+ ++NumGadgets;
+ ElimGadgets.insert(EI);
+ GadgetSinks.insert(GTraits::edge_dest(*EI));
+ }
+ }
+ if (GadgetSinks.empty())
+ continue;
+ std::function<void(GTraits::NodeRef, bool)> TraverseDFS =
+ [&](GTraits::NodeRef N, bool FirstNode) {
+ if (!FirstNode) {
+ Visited.insert(N);
+ if (GadgetSinks.contains(N)) {
+ for (auto CEI = GTraits::child_edge_begin(*NI),
+ CEE = GTraits::child_edge_end(*NI);
+ CEI != CEE; ++CEI) {
+ if (MachineGadgetGraph::isGadgetEdge(*CEI) &&
+ GTraits::edge_dest(*CEI) == N)
+ ElimGadgets.erase(CEI);
+ }
+ }
+ }
+ for (auto CEI = GTraits::child_edge_begin(N),
+ CEE = GTraits::child_edge_end(N);
+ CEI != CEE; ++CEI) {
+ GTraits::NodeRef Dest = GTraits::edge_dest(*CEI);
+ if (MachineGadgetGraph::isCFGEdge(*CEI) &&
+ !Visited.contains(Dest) && !ElimEdges.contains(CEI))
+ TraverseDFS(Dest, false);
+ }
+ };
+ TraverseDFS(*NI, true);
+ Visited.clear();
+ GadgetSinks.clear();
+ }
+ LLVM_DEBUG(dbgs() << "Eliminated " << ElimGadgets.count()
+ << " gadget edges\n");
+ ElimEdges |= ElimGadgets;
+
+ if (!(ElimEdges.empty() && ElimNodes.empty())) {
+ int NumRemainingGadgets = NumGadgets - ElimGadgets.count();
+ Graph.reset(GraphBuilder::trim(*Graph, ElimNodes, ElimEdges,
+ 0 /* NumFences */, NumRemainingGadgets));
+ } else {
+ Graph->NumFences = 0;
+ Graph->NumGadgets = NumGadgets;
+ }
+ return Graph;
+}
+
+void X86LoadValueInjectionLoadHardeningPass::cutEdges(
+ MachineGadgetGraph &G,
+ MachineGadgetGraph::EdgeSet &CutEdges /* out */) const {
+ if (!OptimizePluginPath.empty()) {
+ if (!OptimizeDL.isValid()) {
+ std::string ErrorMsg{};
+ OptimizeDL = llvm::sys::DynamicLibrary::getPermanentLibrary(
+ OptimizePluginPath.c_str(), &ErrorMsg);
+ if (!ErrorMsg.empty())
+ report_fatal_error("Failed to load opt plugin: \"" + ErrorMsg + '\"');
+ OptimizeCut = (OptimizeCutT)OptimizeDL.getAddressOfSymbol("optimize_cut");
+ if (!OptimizeCut)
+ report_fatal_error("Invalid optimization plugin");
+ }
+ auto *Nodes = new unsigned int[G.nodes_size() + 1 /* terminator node */];
+ auto *Edges = new unsigned int[G.edges_size()];
+ auto *EdgeCuts = new int[G.edges_size()];
+ auto *EdgeValues = new int[G.edges_size()];
+ for (auto *NI = G.nodes_begin(), *NE = G.nodes_end(); NI != NE; ++NI) {
+ Nodes[std::distance(G.nodes_begin(), NI)] =
+ std::distance(G.edges_begin(), GTraits::child_edge_begin(NI));
+ }
+ Nodes[G.nodes_size()] = G.edges_size(); // terminator node
+ for (auto *EI = G.edges_begin(), *EE = G.edges_end(); EI != EE; ++EI) {
+ Edges[std::distance(G.edges_begin(), EI)] =
+ std::distance(G.nodes_begin(), GTraits::edge_dest(*EI));
+ EdgeValues[std::distance(G.edges_begin(), EI)] = EI->value();
+ }
+ OptimizeCut(Nodes, G.nodes_size(), Edges, EdgeValues, EdgeCuts,
+ G.edges_size());
+ for (int I = 0; I < G.edges_size(); ++I) {
+ if (EdgeCuts[I])
+ CutEdges.set(I);
+ }
+ delete[] Nodes;
+ delete[] Edges;
+ delete[] EdgeCuts;
+ delete[] EdgeValues;
+ } else { // Use the default greedy heuristic
+ // Find the cheapest CFG edge that will eliminate a gadget (by being egress
+ // from a SOURCE node or ingress to a SINK node), and cut it.
+ MachineGadgetGraph::NodeSet GadgetSinks{G};
+ MachineGadgetGraph::Edge *CheapestSoFar = nullptr;
+ for (auto NI = GTraits::nodes_begin(&G), NE = GTraits::nodes_end(&G);
+ NI != NE; ++NI) {
+ for (auto EI = GTraits::child_edge_begin(*NI),
+ EE = GTraits::child_edge_end(*NI);
+ EI != EE; ++EI) {
+ if (MachineGadgetGraph::isGadgetEdge(*EI)) {
+ // NI is a SOURCE node. Look for a cheap egress edge
+ for (auto EEI = GTraits::child_edge_begin(*NI); EEI != EE; ++EEI) {
+ if (MachineGadgetGraph::isCFGEdge(*EEI)) {
+ if (!CheapestSoFar || EEI->value() < CheapestSoFar->value())
+ CheapestSoFar = EEI;
+ }
+ }
+ GadgetSinks.insert(GTraits::edge_dest(*EI));
+ } else { // EI is a CFG edge
+ if (GadgetSinks.contains(GTraits::edge_dest(*EI))) {
+ // The dest is a SINK node. Hence EI is an ingress edge
+ if (!CheapestSoFar || EI->value() < CheapestSoFar->value())
+ CheapestSoFar = EI;
+ }
+ }
+ }
+ }
+ assert(CheapestSoFar && "Failed to cut an edge");
+ CutEdges.insert(CheapestSoFar);
+ }
+ LLVM_DEBUG(dbgs() << "Cut " << CutEdges.count() << " edges\n");
+}
+
+int X86LoadValueInjectionLoadHardeningPass::insertFences(
+ MachineGadgetGraph &G, EdgeSet &CutEdges /* in, out */) const {
+ int FencesInserted = 0, AdditionalEdgesCut = 0;
+ auto CutAllCFGEdges = [&CutEdges, &AdditionalEdgesCut](GTraits::NodeRef N) {
+ for (auto CEI = GTraits::child_edge_begin(N),
+ CEE = GTraits::child_edge_end(N);
+ CEI != CEE; ++CEI) {
+ if (MachineGadgetGraph::isCFGEdge(*CEI) && !CutEdges.contains(CEI)) {
+ CutEdges.insert(CEI);
+ ++AdditionalEdgesCut;
+ }
+ }
+ };
+ for (auto NI = GTraits::nodes_begin(&G), NE = GTraits::nodes_end(&G);
+ NI != NE; ++NI) {
+ for (auto CEI = GTraits::child_edge_begin(*NI),
+ CEE = GTraits::child_edge_end(*NI);
+ CEI != CEE; ++CEI) {
+ if (CutEdges.contains(CEI)) {
+ MachineInstr *MI = (*NI)->value(), *Prev;
+ MachineBasicBlock *MBB;
+ MachineBasicBlock::iterator InsertionPt;
+ if (MI == ARG_NODE) { // insert LFENCE at beginning of entry block
+ MBB = &G.getMF().front();
+ InsertionPt = MBB->begin();
+ Prev = nullptr;
+ } else if (MI->isBranch()) { // insert the LFENCE before the branch
+ MBB = MI->getParent();
+ InsertionPt = MI;
+ Prev = MI->getPrevNode();
+ CutAllCFGEdges(*NI);
+ } else { // insert the LFENCE after the instruction
+ MBB = MI->getParent();
+ InsertionPt = MI->getNextNode() ? MI->getNextNode() : MBB->end();
+ Prev = InsertionPt == MBB->end()
+ ? (MBB->empty() ? nullptr : &MBB->back())
+ : InsertionPt->getPrevNode();
+ }
+ if ((InsertionPt == MBB->end() || !isFence(&*InsertionPt)) &&
+ (!Prev || !isFence(Prev))) {
+ BuildMI(*MBB, InsertionPt, DebugLoc(), TII->get(X86::LFENCE));
+ ++FencesInserted;
+ }
+ }
+ }
+ }
+ LLVM_DEBUG(dbgs() << "Inserted " << FencesInserted << " fences\n");
+ LLVM_DEBUG(dbgs() << "Cut an additional " << AdditionalEdgesCut
+ << " edges during fence insertion\n");
+ return FencesInserted;
+}
+
bool X86LoadValueInjectionLoadHardeningPass::instrUsesRegToAccessMemory(
const MachineInstr &MI, unsigned Reg) const {
if (!MI.mayLoadOrStore() || MI.getOpcode() == X86::MFENCE ||
--- /dev/null
+; RUN: llc -verify-machineinstrs -mtriple=x86_64-unknown < %s | FileCheck %s --check-prefix=X64 --check-prefix=X64-CBFX
+; RUN: llc -verify-machineinstrs -mtriple=x86_64-unknown --x86-lvi-load-no-fixed < %s | FileCheck %s --check-prefix=X64 --check-prefix=X64-CB
+; RUN: llc -verify-machineinstrs -mtriple=x86_64-unknown --x86-lvi-load-no-cbranch < %s | FileCheck %s --check-prefix=X64 --check-prefix=X64-FX
+; RUN: llc -verify-machineinstrs -mtriple=x86_64-unknown --x86-lvi-load-no-fixed --x86-lvi-load-no-cbranch < %s | FileCheck %s --check-prefix=X64 --check-prefix=X64-BASE
+
+; Function Attrs: noinline nounwind optnone uwtable
+define dso_local i32 @test(i32** %secret, i32 %secret_size) #0 {
+; X64-LABEL: test:
+entry:
+ %secret.addr = alloca i32**, align 8
+ %secret_size.addr = alloca i32, align 4
+ %ret_val = alloca i32, align 4
+ %i = alloca i32, align 4
+ store i32** %secret, i32*** %secret.addr, align 8
+ store i32 %secret_size, i32* %secret_size.addr, align 4
+ store i32 0, i32* %ret_val, align 4
+ call void @llvm.x86.sse2.lfence()
+ store i32 0, i32* %i, align 4
+ br label %for.cond
+
+; X64: # %bb.0: # %entry
+; X64-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
+; X64-NEXT: movl %esi, -{{[0-9]+}}(%rsp)
+; X64-NEXT: movl $0, -{{[0-9]+}}(%rsp)
+; X64-NEXT: lfence
+; X64-NEXT: movl $0, -{{[0-9]+}}(%rsp)
+; X64-NEXT: jmp .LBB0_1
+
+for.cond: ; preds = %for.inc, %entry
+ %0 = load i32, i32* %i, align 4
+ %1 = load i32, i32* %secret_size.addr, align 4
+ %cmp = icmp slt i32 %0, %1
+ br i1 %cmp, label %for.body, label %for.end
+
+; X64: .LBB0_1: # %for.cond
+; X64-NEXT: # =>This Inner Loop Header: Depth=1
+; X64-NEXT: movl -{{[0-9]+}}(%rsp), %eax
+; X64-CBFX-NEXT: lfence
+; X64-NEXT: cmpl -{{[0-9]+}}(%rsp), %eax
+; X64-CBFX-NEXT: lfence
+; X64-NEXT: jge .LBB0_5
+
+for.body: ; preds = %for.cond
+ %2 = load i32, i32* %i, align 4
+ %rem = srem i32 %2, 2
+ %cmp1 = icmp eq i32 %rem, 0
+ br i1 %cmp1, label %if.then, label %if.end
+
+; X64: # %bb.2: # %for.body
+; X64-NEXT: # in Loop: Header=BB0_1 Depth=1
+; X64-NEXT: movl -{{[0-9]+}}(%rsp), %eax
+; X64-CBFX-NEXT: lfence
+; X64-NEXT: movl %eax, %ecx
+; X64-NEXT: shrl $31, %ecx
+; X64-NEXT: addl %eax, %ecx
+; X64-NEXT: andl $-2, %ecx
+; X64-NEXT: cmpl %ecx, %eax
+; X64-NEXT: jne .LBB0_4
+
+if.then: ; preds = %for.body
+ %3 = load i32**, i32*** %secret.addr, align 8
+ %4 = load i32, i32* %ret_val, align 4
+ %idxprom = sext i32 %4 to i64
+ %arrayidx = getelementptr inbounds i32*, i32** %3, i64 %idxprom
+ %5 = load i32*, i32** %arrayidx, align 8
+ %6 = load i32, i32* %5, align 4
+ store i32 %6, i32* %ret_val, align 4
+ br label %if.end
+
+; X64: # %bb.3: # %if.then
+; X64-NEXT: # in Loop: Header=BB0_1 Depth=1
+; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rax
+; X64-CBFX-NEXT: lfence
+; X64-FX-NEXT: lfence
+; X64-NEXT: movslq -{{[0-9]+}}(%rsp), %rcx
+; X64-CBFX-NEXT: lfence
+; X64-FX-NEXT: lfence
+; X64-NEXT: movq (%rax,%rcx,8), %rax
+; X64-NEXT: lfence
+; X64-NEXT: movl (%rax), %eax
+; X64-NEXT: movl %eax, -{{[0-9]+}}(%rsp)
+; X64-NEXT: jmp .LBB0_4
+
+if.end: ; preds = %if.then, %for.body
+ br label %for.inc
+
+for.inc: ; preds = %if.end
+ %7 = load i32, i32* %i, align 4
+ %inc = add nsw i32 %7, 1
+ store i32 %inc, i32* %i, align 4
+ br label %for.cond
+
+for.end: ; preds = %for.cond
+ %8 = load i32, i32* %ret_val, align 4
+ ret i32 %8
+}
+
+; Function Attrs: nounwind
+declare void @llvm.x86.sse2.lfence() #1
+
+attributes #0 = { "target-features"="+lvi-load-hardening" }
+attributes #1 = { nounwind }