[BOLT] Fix AND evaluation bug in shrink wrapping

author Rafael Auler <rafaelauler@fb.com>

Sat, 21 May 2022 00:42:58 +0000 (17:42 -0700)

committer Rafael Auler <rafaelauler@fb.com>

Thu, 26 May 2022 21:59:28 +0000 (14:59 -0700)
author Rafael Auler <rafaelauler@fb.com>
Sat, 21 May 2022 00:42:58 +0000 (17:42 -0700)
committer Rafael Auler <rafaelauler@fb.com>
Thu, 26 May 2022 21:59:28 +0000 (14:59 -0700)
diff --git a/bolt/include/bolt/Core/MCPlusBuilder.h b/bolt/include/bolt/Core/MCPlusBuilder.h

index 1b4159d..d4f5c42 100644 (file)
--- a/bolt/include/bolt/Core/MCPlusBuilder.h
+++ b/bolt/include/bolt/Core/MCPlusBuilder.h
@@ -909,12 +909,22 @@ public:
      return false;
    }
  
-  /// Use \p Input1 or Input2 as the current value for the input register and
-  /// put in \p Output the changes incurred by executing \p Inst. Return false
-  /// if it was not possible to perform the evaluation.
-  virtual bool evaluateSimple(const MCInst &Inst, int64_t &Output,
-                              std::pair<MCPhysReg, int64_t> Input1,
-                              std::pair<MCPhysReg, int64_t> Input2) const {
+  /// Use \p Input1 or Input2 as the current value for the input
+  /// register and put in \p Output the changes incurred by executing
+  /// \p Inst. Return false if it was not possible to perform the
+  /// evaluation. evaluateStackOffsetExpr is restricted to operations
+  /// that have associativity with addition. Its intended usage is for
+  /// evaluating stack offset changes. In these cases, expressions
+  /// appear in the form of (x + offset) OP constant, where x is an
+  /// unknown base (such as stack base) but offset and constant are
+  /// known. In these cases, \p Output represents the new stack offset
+  /// after executing \p Inst. Because we don't know x, we can't
+  /// evaluate operations such as multiply or AND/OR, e.g. (x +
+  /// offset) OP constant is not the same as x + (offset OP constant).
+  virtual bool
+  evaluateStackOffsetExpr(const MCInst &Inst, int64_t &Output,
+                          std::pair<MCPhysReg, int64_t> Input1,
+                          std::pair<MCPhysReg, int64_t> Input2) const {
      llvm_unreachable("not implemented");
      return false;
    }
diff --git a/bolt/include/bolt/Passes/StackPointerTracking.h b/bolt/include/bolt/Passes/StackPointerTracking.h

index 535977a..cbb4f88 100644 (file)
--- a/bolt/include/bolt/Passes/StackPointerTracking.h
+++ b/bolt/include/bolt/Passes/StackPointerTracking.h
@@ -115,7 +115,7 @@ protected:
        else
          FP = std::make_pair(0, 0);
        int64_t Output;
-      if (!MIB->evaluateSimple(Point, Output, SP, FP)) {
+      if (!MIB->evaluateStackOffsetExpr(Point, Output, SP, FP)) {
          if (SPVal == EMPTY && FPVal == EMPTY)
            return SPVal;
          return SUPERPOSITION;
@@ -150,7 +150,7 @@ protected:
        else
          SP = std::make_pair(0, 0);
        int64_t Output;
-      if (!MIB->evaluateSimple(Point, Output, SP, FP)) {
+      if (!MIB->evaluateStackOffsetExpr(Point, Output, SP, FP)) {
          if (SPVal == EMPTY && FPVal == EMPTY)
            return FPVal;
          return SUPERPOSITION;
diff --git a/bolt/lib/Passes/AllocCombiner.cpp b/bolt/lib/Passes/AllocCombiner.cpp

index 652111c..e92b74e 100644 (file)
--- a/bolt/lib/Passes/AllocCombiner.cpp
+++ b/bolt/lib/Passes/AllocCombiner.cpp
@@ -29,9 +29,9 @@ namespace {
  
  bool getStackAdjustmentSize(const BinaryContext &BC, const MCInst &Inst,
                              int64_t &Adjustment) {
-  return BC.MIB->evaluateSimple(Inst, Adjustment,
-                                std::make_pair(BC.MIB->getStackPointer(), 0LL),
-                                std::make_pair(0, 0LL));
+  return BC.MIB->evaluateStackOffsetExpr(
+      Inst, Adjustment, std::make_pair(BC.MIB->getStackPointer(), 0LL),
+      std::make_pair(0, 0LL));
  }
  
  bool isIndifferentToSP(const MCInst &Inst, const BinaryContext &BC) {
diff --git a/bolt/lib/Passes/ShrinkWrapping.cpp b/bolt/lib/Passes/ShrinkWrapping.cpp

index f150965..f032735 100644 (file)
--- a/bolt/lib/Passes/ShrinkWrapping.cpp
+++ b/bolt/lib/Passes/ShrinkWrapping.cpp
@@ -246,7 +246,7 @@ void StackLayoutModifier::checkFramePointerInitialization(MCInst &Point) {
      SP = std::make_pair(0, 0);
  
    int64_t Output;
-  if (!BC.MIB->evaluateSimple(Point, Output, SP, FP))
+  if (!BC.MIB->evaluateStackOffsetExpr(Point, Output, SP, FP))
      return;
  
    // Not your regular frame pointer initialization... bail
@@ -294,7 +294,7 @@ void StackLayoutModifier::checkStackPointerRestore(MCInst &Point) {
      SP = std::make_pair(0, 0);
  
    int64_t Output;
-  if (!BC.MIB->evaluateSimple(Point, Output, SP, FP))
+  if (!BC.MIB->evaluateStackOffsetExpr(Point, Output, SP, FP))
      return;
  
    // If the value is the same of FP, no need to adjust it
diff --git a/bolt/lib/Passes/StackAllocationAnalysis.cpp b/bolt/lib/Passes/StackAllocationAnalysis.cpp

index 92ad2cc..2597553 100644 (file)
--- a/bolt/lib/Passes/StackAllocationAnalysis.cpp
+++ b/bolt/lib/Passes/StackAllocationAnalysis.cpp
@@ -134,7 +134,7 @@ BitVector StackAllocationAnalysis::computeNext(const MCInst &Point,
      else
        FP = std::make_pair(0, 0);
      int64_t Output;
-    if (!MIB->evaluateSimple(Point, Output, SP, FP))
+    if (!MIB->evaluateStackOffsetExpr(Point, Output, SP, FP))
        return Next;
  
      if (SPOffset < Output) {
diff --git a/bolt/lib/Passes/ValidateInternalCalls.cpp b/bolt/lib/Passes/ValidateInternalCalls.cpp

index 7faff76..d1246a2 100644 (file)
--- a/bolt/lib/Passes/ValidateInternalCalls.cpp
+++ b/bolt/lib/Passes/ValidateInternalCalls.cpp
@@ -261,8 +261,8 @@ bool ValidateInternalCalls::analyzeFunction(BinaryFunction &Function) const {
          int64_t Output;
          std::pair<MCPhysReg, int64_t> Input1 = std::make_pair(Reg, 0);
          std::pair<MCPhysReg, int64_t> Input2 = std::make_pair(0, 0);
-        if (!BC.MIB->evaluateSimple(Use, Output, Input1, Input2)) {
-          LLVM_DEBUG(dbgs() << "Evaluate simple failed.\n");
+        if (!BC.MIB->evaluateStackOffsetExpr(Use, Output, Input1, Input2)) {
+          LLVM_DEBUG(dbgs() << "Evaluate stack offset expr failed.\n");
            return false;
          }
          if (Offset + Output < 0 ||
diff --git a/bolt/lib/Target/X86/X86MCPlusBuilder.cpp b/bolt/lib/Target/X86/X86MCPlusBuilder.cpp

index 610eafc..d6debda 100644 (file)
--- a/bolt/lib/Target/X86/X86MCPlusBuilder.cpp
+++ b/bolt/lib/Target/X86/X86MCPlusBuilder.cpp
@@ -1244,9 +1244,10 @@ public:
      return false;
    }
  
-  bool evaluateSimple(const MCInst &Inst, int64_t &Output,
-                      std::pair<MCPhysReg, int64_t> Input1,
-                      std::pair<MCPhysReg, int64_t> Input2) const override {
+  bool
+  evaluateStackOffsetExpr(const MCInst &Inst, int64_t &Output,
+                          std::pair<MCPhysReg, int64_t> Input1,
+                          std::pair<MCPhysReg, int64_t> Input2) const override {
  
      auto getOperandVal = [&](MCPhysReg Reg) -> ErrorOr<int64_t> {
        if (Reg == Input1.first)
@@ -1260,16 +1261,6 @@ public:
      default:
        return false;
  
-    case X86::AND64ri32:
-    case X86::AND64ri8:
-      if (!Inst.getOperand(2).isImm())
-        return false;
-      if (ErrorOr<int64_t> InputVal =
-              getOperandVal(Inst.getOperand(1).getReg()))
-        Output = *InputVal & Inst.getOperand(2).getImm();
-      else
-        return false;
-      break;
      case X86::SUB64ri32:
      case X86::SUB64ri8:
        if (!Inst.getOperand(2).isImm())
diff --git a/bolt/test/X86/shrinkwrapping-and-rsp.s b/bolt/test/X86/shrinkwrapping-and-rsp.s

new file mode 100644 (file)

index 0000000..2228c60
--- /dev/null
+++ b/bolt/test/X86/shrinkwrapping-and-rsp.s
@@ -0,0 +1,55 @@
+# This checks that shrink wrapping does attempt at accessing stack elements
+# using RSP when the function is aligning RSP and changing offsets.
+
+# REQUIRES: system-linux
+
+# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-unknown \
+# RUN:   %s -o %t.o
+# RUN: link_fdata %s %t.o %t.fdata
+# RUN: llvm-strip --strip-unneeded %t.o
+# RUN: %clang %cflags %t.o -o %t.exe -Wl,-q -nostdlib
+# RUN: llvm-bolt %t.exe -o %t.out -data %t.fdata \
+# RUN:     -frame-opt=all -simplify-conditional-tail-calls=false \
+# RUN:     -eliminate-unreachable=false | FileCheck %s
+
+# Here we have a function that aligns the stack at prologue. Stack pointer
+# analysis can't try to infer offset positions after AND because that depends
+# on the runtime value of the stack pointer of callee (whether it is misaligned
+# or not).
+  .globl _start
+  .type _start, %function
+_start:
+  .cfi_startproc
+# FDATA: 0 [unknown] 0 1 _start 0 0 1
+  push  %rbp
+  mov   %rsp, %rbp
+  push  %rbx
+  push  %r14
+  and    $0xffffffffffffffe0,%rsp
+  subq  $0x20, %rsp
+b:  je  hot_path
+# FDATA: 1 _start #b# 1 _start #hot_path# 0 1
+cold_path:
+  mov %r14, %rdi
+  mov %rbx, %rdi
+  # Block push-pop mode by using an instruction that requires the
+  # stack to be aligned to 128B. This will force the pass
+  # to try to index stack elements by using RSP +offset directly, but
+  # we do not know how to access individual elements of the stack thanks
+  # to the alignment.
+  movdqa       %xmm8, (%rsp)
+  addq  $0x20, %rsp
+  pop %r14
+  pop %rbx
+  pop %rbp
+  ret
+hot_path:
+  addq  $0x20, %rsp
+  pop %r14
+  pop %rbx
+  pop %rbp
+  ret
+  .cfi_endproc
+  .size _start, .-_start
+
+# CHECK:   BOLT-INFO: Shrink wrapping moved 0 spills inserting load/stores and 0 spills inserting push/pops
author	Rafael Auler <rafaelauler@fb.com>
	Sat, 21 May 2022 00:42:58 +0000 (17:42 -0700)
committer	Rafael Auler <rafaelauler@fb.com>
	Thu, 26 May 2022 21:59:28 +0000 (14:59 -0700)
bolt/include/bolt/Core/MCPlusBuilder.h		patch \| blob \| history
bolt/include/bolt/Passes/StackPointerTracking.h		patch \| blob \| history
bolt/lib/Passes/AllocCombiner.cpp		patch \| blob \| history
bolt/lib/Passes/ShrinkWrapping.cpp		patch \| blob \| history
bolt/lib/Passes/StackAllocationAnalysis.cpp		patch \| blob \| history
bolt/lib/Passes/ValidateInternalCalls.cpp		patch \| blob \| history
bolt/lib/Target/X86/X86MCPlusBuilder.cpp		patch \| blob \| history
bolt/test/X86/shrinkwrapping-and-rsp.s	[new file with mode: 0644]	patch \| blob