AArch64: work around how Cyclone handles "movi.2d vD, #0".

author Tim Northover <tnorthover@apple.com>

Mon, 18 Dec 2017 10:36:00 +0000 (10:36 +0000)

committer Tim Northover <tnorthover@apple.com>

Mon, 18 Dec 2017 10:36:00 +0000 (10:36 +0000)
author Tim Northover <tnorthover@apple.com>
Mon, 18 Dec 2017 10:36:00 +0000 (10:36 +0000)
committer Tim Northover <tnorthover@apple.com>
Mon, 18 Dec 2017 10:36:00 +0000 (10:36 +0000)
diff --git a/llvm/lib/Target/AArch64/AArch64.td b/llvm/lib/Target/AArch64/AArch64.td

index 9cb4eaf..75fb937 100644 (file)
--- a/llvm/lib/Target/AArch64/AArch64.td
+++ b/llvm/lib/Target/AArch64/AArch64.td
@@ -61,6 +61,12 @@ def FeatureZCRegMove : SubtargetFeature<"zcm", "HasZeroCycleRegMove", "true",
  def FeatureZCZeroing : SubtargetFeature<"zcz", "HasZeroCycleZeroing", "true",
                                          "Has zero-cycle zeroing instructions">;
  
+/// ... but the floating-point version doesn't quite work in rare cases on older
+/// CPUs.
+def FeatureZCZeroingFPWorkaround : SubtargetFeature<"zcz-fp-workaround",
+    "HasZeroCycleZeroingFPWorkaround", "true",
+    "The zero-cycle floating-point zeroing instruction has a bug">;
+
  def FeatureStrictAlign : SubtargetFeature<"strict-align",
                                            "StrictAlign", "true",
                                            "Disallow all unaligned memory "
@@ -290,7 +296,8 @@ def ProcCyclone : SubtargetFeature<"cyclone", "ARMProcFamily", "Cyclone",
                                     FeaturePerfMon,
                                     FeatureSlowMisaligned128Store,
                                     FeatureZCRegMove,
-                                   FeatureZCZeroing
+                                   FeatureZCZeroing,
+                                   FeatureZCZeroingFPWorkaround
                                     ]>;
  
  def ProcExynosM1 : SubtargetFeature<"exynosm1", "ARMProcFamily", "ExynosM1",
diff --git a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp

index 56fcff6..67138f4 100644 (file)
--- a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
+++ b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
@@ -523,7 +523,7 @@ void AArch64AsmPrinter::LowerPATCHPOINT(MCStreamer &OutStreamer, StackMaps &SM,
  
  void AArch64AsmPrinter::EmitFMov0(const MachineInstr &MI) {
    unsigned DestReg = MI.getOperand(0).getReg();
-  if (STI->hasZeroCycleZeroing()) {
+  if (STI->hasZeroCycleZeroing() && !STI->hasZeroCycleZeroingFPWorkaround()) {
      // Convert H/S/D register to corresponding Q register
      if (AArch64::H0 <= DestReg && DestReg <= AArch64::H31)
        DestReg = AArch64::Q0 + (DestReg - AArch64::H0);
diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.h b/llvm/lib/Target/AArch64/AArch64Subtarget.h

index a73ba88..5d9759d 100644 (file)
--- a/llvm/lib/Target/AArch64/AArch64Subtarget.h
+++ b/llvm/lib/Target/AArch64/AArch64Subtarget.h
@@ -86,6 +86,7 @@ protected:
  
    // HasZeroCycleZeroing - Has zero-cycle zeroing instructions.
    bool HasZeroCycleZeroing = false;
+  bool HasZeroCycleZeroingFPWorkaround = false;
  
    // StrictAlign - Disallow unaligned memory accesses.
    bool StrictAlign = false;
@@ -197,6 +198,10 @@ public:
  
    bool hasZeroCycleZeroing() const { return HasZeroCycleZeroing; }
  
+  bool hasZeroCycleZeroingFPWorkaround() const {
+    return HasZeroCycleZeroingFPWorkaround;
+  }
+
    bool requiresStrictAlign() const { return StrictAlign; }
  
    bool isXRaySupported() const override { return true; }
diff --git a/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp

index 2763a5b..fd2ef18 100644 (file)
--- a/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
+++ b/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
@@ -3796,6 +3796,31 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
        }
      }
    }
+
+  // The Cyclone CPU and early successors didn't execute the zero-cycle zeroing
+  // instruction for FP registers correctly in some rare circumstances. Convert
+  // it to a safe instruction and warn (because silently changing someone's
+  // assembly is rude).
+  if (getSTI().getFeatureBits()[AArch64::FeatureZCZeroingFPWorkaround] &&
+      NumOperands == 4 && Tok == "movi") {
+    AArch64Operand &Op1 = static_cast<AArch64Operand &>(*Operands[1]);
+    AArch64Operand &Op2 = static_cast<AArch64Operand &>(*Operands[2]);
+    AArch64Operand &Op3 = static_cast<AArch64Operand &>(*Operands[3]);
+    if ((Op1.isToken() && Op2.isNeonVectorReg() && Op3.isImm()) ||
+        (Op1.isNeonVectorReg() && Op2.isToken() && Op3.isImm())) {
+      StringRef Suffix = Op1.isToken() ? Op1.getToken() : Op2.getToken();
+      if (Suffix.lower() == ".2d" &&
+          cast<MCConstantExpr>(Op3.getImm())->getValue() == 0) {
+        Warning(IDLoc, "instruction movi.2d with immediate #0 may not function"
+                " correctly on this CPU, converting to equivalent movi.16b");
+        // Switch the suffix to .16b.
+        unsigned Idx = Op1.isToken() ? 1 : 2;
+        Operands[Idx] = AArch64Operand::CreateToken(".16b", false, IDLoc,
+                                                  getContext());
+      }
+    }
+  }
+
    // FIXME: Horrible hack for sxtw and uxtw with Wn src and Xd dst operands.
    //        InstAlias can't quite handle this since the reg classes aren't
    //        subclasses.
diff --git a/llvm/test/CodeGen/AArch64/arm64-fcmp-opt.ll b/llvm/test/CodeGen/AArch64/arm64-fcmp-opt.ll

index e8b1557..5155d49 100644 (file)
--- a/llvm/test/CodeGen/AArch64/arm64-fcmp-opt.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-fcmp-opt.ll
@@ -41,7 +41,7 @@ entry:
  define float @fcmp_oeq(float %a, float %b) nounwind ssp {
  ; CHECK-LABEL: @fcmp_oeq
  ; CHECK: fcmp s0, s1
-; CHECK-DAG: movi.2d v[[ZERO:[0-9]+]], #0
+; CHECK-DAG: fmov s[[ZERO:[0-9]+]], wzr
  ; CHECK-DAG: fmov s[[ONE:[0-9]+]], #1.0
  ; CHECK: fcsel s0, s[[ONE]], s[[ZERO]], eq
  
@@ -53,7 +53,7 @@ define float @fcmp_oeq(float %a, float %b) nounwind ssp {
  define float @fcmp_ogt(float %a, float %b) nounwind ssp {
  ; CHECK-LABEL: @fcmp_ogt
  ; CHECK: fcmp s0, s1
-; CHECK-DAG: movi.2d v[[ZERO:[0-9]+]], #0
+; CHECK-DAG: fmov s[[ZERO:[0-9]+]], wzr
  ; CHECK-DAG: fmov s[[ONE:[0-9]+]], #1.0
  ; CHECK: fcsel s0, s[[ONE]], s[[ZERO]], gt
  
@@ -65,7 +65,7 @@ define float @fcmp_ogt(float %a, float %b) nounwind ssp {
  define float @fcmp_oge(float %a, float %b) nounwind ssp {
  ; CHECK-LABEL: @fcmp_oge
  ; CHECK: fcmp s0, s1
-; CHECK-DAG: movi.2d v[[ZERO:[0-9]+]], #0
+; CHECK-DAG: fmov s[[ZERO:[0-9]+]], wzr
  ; CHECK-DAG: fmov s[[ONE:[0-9]+]], #1.0
  ; CHECK: fcsel s0, s[[ONE]], s[[ZERO]], ge
  
@@ -77,7 +77,7 @@ define float @fcmp_oge(float %a, float %b) nounwind ssp {
  define float @fcmp_olt(float %a, float %b) nounwind ssp {
  ; CHECK-LABEL: @fcmp_olt
  ; CHECK: fcmp s0, s1
-; CHECK-DAG: movi.2d v[[ZERO:[0-9]+]], #0
+; CHECK-DAG: fmov s[[ZERO:[0-9]+]], wzr
  ; CHECK-DAG: fmov s[[ONE:[0-9]+]], #1.0
  ; CHECK: fcsel s0, s[[ONE]], s[[ZERO]], mi
  
@@ -89,7 +89,7 @@ define float @fcmp_olt(float %a, float %b) nounwind ssp {
  define float @fcmp_ole(float %a, float %b) nounwind ssp {
  ; CHECK-LABEL: @fcmp_ole
  ; CHECK: fcmp s0, s1
-; CHECK-DAG: movi.2d v[[ZERO:[0-9]+]], #0
+; CHECK-DAG: fmov s[[ZERO:[0-9]+]], wzr
  ; CHECK-DAG: fmov s[[ONE:[0-9]+]], #1.0
  ; CHECK: fcsel s0, s[[ONE]], s[[ZERO]], ls
  
@@ -101,7 +101,7 @@ define float @fcmp_ole(float %a, float %b) nounwind ssp {
  define float @fcmp_ord(float %a, float %b) nounwind ssp {
  ; CHECK-LABEL: @fcmp_ord
  ; CHECK: fcmp s0, s1
-; CHECK-DAG: movi.2d v[[ZERO:[0-9]+]], #0
+; CHECK-DAG: fmov s[[ZERO:[0-9]+]], wzr
  ; CHECK-DAG: fmov s[[ONE:[0-9]+]], #1.0
  ; CHECK: fcsel s0, s[[ONE]], s[[ZERO]], vc
    %cmp = fcmp ord float %a, %b
@@ -112,7 +112,7 @@ define float @fcmp_ord(float %a, float %b) nounwind ssp {
  define float @fcmp_uno(float %a, float %b) nounwind ssp {
  ; CHECK-LABEL: @fcmp_uno
  ; CHECK: fcmp s0, s1
-; CHECK-DAG: movi.2d v[[ZERO:[0-9]+]], #0
+; CHECK-DAG: fmov s[[ZERO:[0-9]+]], wzr
  ; CHECK-DAG: fmov s[[ONE:[0-9]+]], #1.0
  ; CHECK: fcsel s0, s[[ONE]], s[[ZERO]], vs
    %cmp = fcmp uno float %a, %b
@@ -123,7 +123,7 @@ define float @fcmp_uno(float %a, float %b) nounwind ssp {
  define float @fcmp_ugt(float %a, float %b) nounwind ssp {
  ; CHECK-LABEL: @fcmp_ugt
  ; CHECK: fcmp s0, s1
-; CHECK-DAG: movi.2d v[[ZERO:[0-9]+]], #0
+; CHECK-DAG: fmov s[[ZERO:[0-9]+]], wzr
  ; CHECK-DAG: fmov s[[ONE:[0-9]+]], #1.0
  ; CHECK: fcsel s0, s[[ONE]], s[[ZERO]], hi
    %cmp = fcmp ugt float %a, %b
@@ -134,7 +134,7 @@ define float @fcmp_ugt(float %a, float %b) nounwind ssp {
  define float @fcmp_uge(float %a, float %b) nounwind ssp {
  ; CHECK-LABEL: @fcmp_uge
  ; CHECK: fcmp s0, s1
-; CHECK-DAG: movi.2d v[[ZERO:[0-9]+]], #0
+; CHECK-DAG: fmov s[[ZERO:[0-9]+]], wzr
  ; CHECK-DAG: fmov s[[ONE:[0-9]+]], #1.0
  ; CHECK: fcsel s0, s[[ONE]], s[[ZERO]], pl
    %cmp = fcmp uge float %a, %b
@@ -145,7 +145,7 @@ define float @fcmp_uge(float %a, float %b) nounwind ssp {
  define float @fcmp_ult(float %a, float %b) nounwind ssp {
  ; CHECK-LABEL: @fcmp_ult
  ; CHECK: fcmp s0, s1
-; CHECK-DAG: movi.2d v[[ZERO:[0-9]+]], #0
+; CHECK-DAG: fmov s[[ZERO:[0-9]+]], wzr
  ; CHECK-DAG: fmov s[[ONE:[0-9]+]], #1.0
  ; CHECK: fcsel s0, s[[ONE]], s[[ZERO]], lt
    %cmp = fcmp ult float %a, %b
@@ -156,7 +156,7 @@ define float @fcmp_ult(float %a, float %b) nounwind ssp {
  define float @fcmp_ule(float %a, float %b) nounwind ssp {
  ; CHECK-LABEL: @fcmp_ule
  ; CHECK: fcmp s0, s1
-; CHECK-DAG: movi.2d v[[ZERO:[0-9]+]], #0
+; CHECK-DAG: fmov s[[ZERO:[0-9]+]], wzr
  ; CHECK-DAG: fmov s[[ONE:[0-9]+]], #1.0
  ; CHECK: fcsel s0, s[[ONE]], s[[ZERO]], le
    %cmp = fcmp ule float %a, %b
@@ -167,7 +167,7 @@ define float @fcmp_ule(float %a, float %b) nounwind ssp {
  define float @fcmp_une(float %a, float %b) nounwind ssp {
  ; CHECK-LABEL: @fcmp_une
  ; CHECK: fcmp s0, s1
-; CHECK-DAG: movi.2d v[[ZERO:[0-9]+]], #0
+; CHECK-DAG: fmov s[[ZERO:[0-9]+]], wzr
  ; CHECK-DAG: fmov s[[ONE:[0-9]+]], #1.0
  ; CHECK: fcsel s0, s[[ONE]], s[[ZERO]], ne
    %cmp = fcmp une float %a, %b
@@ -180,7 +180,7 @@ define float @fcmp_une(float %a, float %b) nounwind ssp {
  define float @fcmp_one(float %a, float %b) nounwind ssp {
  ; CHECK-LABEL: @fcmp_one
  ;      fcmp    s0, s1
-; CHECK-DAG: movi.2d v[[ZERO:[0-9]+]], #0
+; CHECK-DAG: fmov s[[ZERO:[0-9]+]], wzr
  ; CHECK-DAG: fmov s[[ONE:[0-9]+]], #1.0
  ; CHECK: fcsel [[TMP:s[0-9]+]], s[[ONE]], s[[ZERO]], mi
  ; CHECK: fcsel s0, s[[ONE]], [[TMP]], gt
@@ -194,7 +194,7 @@ define float @fcmp_one(float %a, float %b) nounwind ssp {
  define float @fcmp_ueq(float %a, float %b) nounwind ssp {
  ; CHECK-LABEL: @fcmp_ueq
  ; CHECK: fcmp s0, s1
-; CHECK-DAG: movi.2d v[[ZERO:[0-9]+]], #0
+; CHECK-DAG: fmov s[[ZERO:[0-9]+]], wzr
  ; CHECK-DAG: fmov s[[ONE:[0-9]+]], #1.0
  ; CHECK: fcsel [[TMP:s[0-9]+]], s[[ONE]], s[[ZERO]], eq
  ; CHECK: fcsel s0, s[[ONE]], [[TMP]], vs
diff --git a/llvm/test/CodeGen/AArch64/arm64-zero-cycle-zeroing.ll b/llvm/test/CodeGen/AArch64/arm64-zero-cycle-zeroing.ll

index 412651c..453334d 100644 (file)
--- a/llvm/test/CodeGen/AArch64/arm64-zero-cycle-zeroing.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-zero-cycle-zeroing.ll
@@ -9,10 +9,10 @@ define void @t1() nounwind ssp {
  entry:
  ; ALL-LABEL: t1:
  ; ALL-NOT: fmov
-; CYCLONE: movi.2d v0, #0000000000000000
-; CYCLONE: movi.2d v1, #0000000000000000
-; CYCLONE: movi.2d v2, #0000000000000000
-; CYCLONE: movi.2d v3, #0000000000000000
+; CYCLONE: fmov d0, xzr
+; CYCLONE: fmov d1, xzr
+; CYCLONE: fmov d2, xzr
+; CYCLONE: fmov d3, xzr
  ; KRYO: movi v0.2d, #0000000000000000
  ; KRYO: movi v1.2d, #0000000000000000
  ; KRYO: movi v2.2d, #0000000000000000
@@ -48,8 +48,8 @@ entry:
  define void @t4() nounwind ssp {
  ; ALL-LABEL: t4:
  ; ALL-NOT: fmov
-; CYCLONE: movi.2d v0, #0000000000000000
-; CYCLONE: movi.2d v1, #0000000000000000
+; CYCLONE: fmov s0, wzr
+; CYCLONE: fmov s1, wzr
  ; KRYO: movi v0.2d, #0000000000000000
  ; KRYO: movi v1.2d, #0000000000000000
  ; FALKOR: movi v0.2d, #0000000000000000
diff --git a/llvm/test/CodeGen/AArch64/fp-cond-sel.ll b/llvm/test/CodeGen/AArch64/fp-cond-sel.ll

index 4d9cb21..f74e9c3 100644 (file)
--- a/llvm/test/CodeGen/AArch64/fp-cond-sel.ll
+++ b/llvm/test/CodeGen/AArch64/fp-cond-sel.ll
@@ -12,7 +12,7 @@ define void @test_csel(i32 %lhs32, i32 %rhs32, i64 %lhs64) {
    %tst1 = icmp ugt i32 %lhs32, %rhs32
    %val1 = select i1 %tst1, float 0.0, float 1.0
    store float %val1, float* @varfloat
-; CHECK-DAG: movi v[[FLT0:[0-9]+]].2d, #0
+; CHECK-DAG: fmov s[[FLT0:[0-9]+]], wzr
  ; CHECK-DAG: fmov s[[FLT1:[0-9]+]], #1.0
  ; CHECK: fcsel {{s[0-9]+}}, s[[FLT0]], s[[FLT1]], hi
  
diff --git a/llvm/test/MC/AArch64/cyclone-movi-bug.s b/llvm/test/MC/AArch64/cyclone-movi-bug.s

new file mode 100644 (file)

index 0000000..d49aea3
--- /dev/null
+++ b/llvm/test/MC/AArch64/cyclone-movi-bug.s
@@ -0,0 +1,9 @@
+; RUN: llvm-mc -triple aarch64-apple-ios -mcpu=cyclone %s 2> %t.log | FileCheck %s
+; RUN: FileCheck %s --check-prefix=CHECK-ERR < %t.log
+
+    ; CHECK: movi v3.16b, #0
+    ; CHECK: movi v7.16b, #0
+    ; CHECK-ERR: warning: instruction movi.2d with immediate #0 may not function correctly on this CPU, converting to equivalent movi.16b
+    ; CHECK-ERR: warning: instruction movi.2d with immediate #0 may not function correctly on this CPU, converting to equivalent movi.16b
+    movi.2d v3, #0
+    movi v7.2d, #0
author	Tim Northover <tnorthover@apple.com>
	Mon, 18 Dec 2017 10:36:00 +0000 (10:36 +0000)
committer	Tim Northover <tnorthover@apple.com>
	Mon, 18 Dec 2017 10:36:00 +0000 (10:36 +0000)
llvm/lib/Target/AArch64/AArch64.td		patch \| blob \| history
llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp		patch \| blob \| history
llvm/lib/Target/AArch64/AArch64Subtarget.h		patch \| blob \| history
llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp		patch \| blob \| history
llvm/test/CodeGen/AArch64/arm64-fcmp-opt.ll		patch \| blob \| history
llvm/test/CodeGen/AArch64/arm64-zero-cycle-zeroing.ll		patch \| blob \| history
llvm/test/CodeGen/AArch64/fp-cond-sel.ll		patch \| blob \| history
llvm/test/MC/AArch64/cyclone-movi-bug.s	[new file with mode: 0644]	patch \| blob