From 28b01a51b36c782a77923156807ec0ebabd60059 Mon Sep 17 00:00:00 2001 From: Kyle Butt Date: Thu, 10 Dec 2015 21:28:40 +0000 Subject: [PATCH] PPC: Teach FMA mutate to respect register classes. This was causing bad code gen and assembly that won't assemble, as mixed altivec and vsx code would end up with a vsx high register assigned to an altivec instruction, which won't work. Constraining the classes allows the optimization to proceed. llvm-svn: 255299 --- llvm/lib/Target/PowerPC/PPCVSXFMAMutate.cpp | 11 ++- .../PowerPC/fma-mutate-register-constraint.ll | 89 ++++++++++++++++++++++ 2 files changed, 98 insertions(+), 2 deletions(-) create mode 100644 llvm/test/CodeGen/PowerPC/fma-mutate-register-constraint.ll diff --git a/llvm/lib/Target/PowerPC/PPCVSXFMAMutate.cpp b/llvm/lib/Target/PowerPC/PPCVSXFMAMutate.cpp index 0bd2bd8..6b19a2f 100644 --- a/llvm/lib/Target/PowerPC/PPCVSXFMAMutate.cpp +++ b/llvm/lib/Target/PowerPC/PPCVSXFMAMutate.cpp @@ -220,6 +220,14 @@ protected: if (OldFMAReg == KilledProdReg) continue; + // If there isn't a class that fits, we can't perform the transform. + // This is needed for correctness with a mixture of VSX and Altivec + // instructions to make sure that a low VSX register is not assigned to + // the Altivec instruction. + if (!MRI.constrainRegClass(KilledProdReg, + MRI.getRegClass(OldFMAReg))) + continue; + assert(OldFMAReg == AddendMI->getOperand(0).getReg() && "Addend copy not tied to old FMA output!"); @@ -262,8 +270,7 @@ protected: if (UseMI == AddendMI) continue; - UseMO.setReg(KilledProdReg); - UseMO.setSubReg(KilledProdSubReg); + UseMO.substVirtReg(KilledProdReg, KilledProdSubReg, *TRI); } // Extend the live intervals of the killed product operand to hold the diff --git a/llvm/test/CodeGen/PowerPC/fma-mutate-register-constraint.ll b/llvm/test/CodeGen/PowerPC/fma-mutate-register-constraint.ll new file mode 100644 index 0000000..fd2ba4e --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/fma-mutate-register-constraint.ll @@ -0,0 +1,89 @@ +; RUN: llc -enable-unsafe-fp-math < %s | FileCheck %s +target datalayout = "e-m:e-i64:64-n32:64" +target triple = "powerpc64le-unknown-linux-gnu" + +; CHECK-NOT: {{vmrg[hl]w.*(3[23456789]|[456][0-9])}} +define void @__f0() { +entry: + %0 = shufflevector <8 x float> zeroinitializer, <8 x float> undef, <16 x i32> + %1 = shufflevector <16 x float> %0, <16 x float> undef, <32 x i32> + %2 = shufflevector <8 x float> zeroinitializer, <8 x float> undef, <16 x i32> + %3 = shufflevector <16 x float> %2, <16 x float> undef, <32 x i32> + %4 = fmul <32 x float> %1, %3 + %5 = load <4 x float>, <4 x float>* undef, align 128 + %6 = load <4 x float>, <4 x float>* undef, align 128 + %7 = shufflevector <4 x float> undef, <4 x float> %5, <8 x i32> + %8 = shufflevector <4 x float> undef, <4 x float> %6, <8 x i32> + %9 = shufflevector <8 x float> %7, <8 x float> %8, <16 x i32> + %10 = shufflevector <16 x float> undef, <16 x float> %9, <32 x i32> + %11 = load <4 x float>, <4 x float>* null, align 128 + %12 = load <4 x float>, <4 x float>* undef, align 128 + %13 = shufflevector <4 x float> undef, <4 x float> %11, <8 x i32> + %14 = shufflevector <4 x float> undef, <4 x float> %12, <8 x i32> + %15 = shufflevector <8 x float> %13, <8 x float> %14, <16 x i32> + %16 = shufflevector <16 x float> undef, <16 x float> %15, <32 x i32> + %17 = fmul <32 x float> %10, %16 + %18 = fsub <32 x float> %4, %17 + %19 = shufflevector <32 x float> %18, <32 x float> undef, <64 x i32> + %20 = bitcast <64 x float> %19 to <32 x double> + %21 = shufflevector <32 x double> undef, <32 x double> %20, <64 x i32> + %22 = bitcast <64 x double> %21 to <128 x float> + %23 = shufflevector <128 x float> undef, <128 x float> %22, <256 x i32> + %24 = shufflevector <256 x float> undef, <256 x float> %23, <512 x i32> + %25 = shufflevector <512 x float> %24, <512 x float> undef, <1024 x i32> + %26 = shufflevector <1024 x float> %25, <1024 x float> undef, <4 x i32> + store <4 x float> %26, <4 x float>* undef, align 128 + %27 = shufflevector <1024 x float> %25, <1024 x float> undef, <4 x i32> + store <4 x float> %27, <4 x float>* undef, align 128 + %28 = shufflevector <1024 x float> %25, <1024 x float> undef, <4 x i32> + store <4 x float> %28, <4 x float>* undef, align 128 + %29 = shufflevector <1024 x float> %25, <1024 x float> undef, <4 x i32> + store <4 x float> %29, <4 x float>* undef, align 128 + %30 = shufflevector <1024 x float> %25, <1024 x float> undef, <4 x i32> + store <4 x float> %30, <4 x float>* undef, align 128 + %31 = shufflevector <1024 x float> %25, <1024 x float> undef, <4 x i32> + store <4 x float> %31, <4 x float>* undef, align 128 + %32 = shufflevector <1024 x float> %25, <1024 x float> undef, <4 x i32> + store <4 x float> %32, <4 x float>* undef, align 128 + %33 = shufflevector <1024 x float> %25, <1024 x float> undef, <4 x i32> + store <4 x float> %33, <4 x float>* undef, align 128 + %34 = shufflevector <1024 x float> %25, <1024 x float> undef, <4 x i32> + store <4 x float> %34, <4 x float>* undef, align 128 + %35 = shufflevector <1024 x float> %25, <1024 x float> undef, <4 x i32> + store <4 x float> %35, <4 x float>* undef, align 128 + %36 = shufflevector <1024 x float> %25, <1024 x float> undef, <4 x i32> + store <4 x float> %36, <4 x float>* undef, align 128 + %37 = shufflevector <1024 x float> %25, <1024 x float> undef, <4 x i32> + store <4 x float> %37, <4 x float>* undef, align 128 + %38 = shufflevector <1024 x float> %25, <1024 x float> undef, <4 x i32> + store <4 x float> %38, <4 x float>* undef, align 128 + %39 = shufflevector <1024 x float> %25, <1024 x float> undef, <4 x i32> + store <4 x float> %39, <4 x float>* undef, align 128 + %40 = shufflevector <1024 x float> %25, <1024 x float> undef, <4 x i32> + store <4 x float> %40, <4 x float>* undef, align 128 + %41 = shufflevector <1024 x float> %25, <1024 x float> undef, <4 x i32> + store <4 x float> %41, <4 x float>* undef, align 128 + %42 = shufflevector <1024 x float> %25, <1024 x float> undef, <4 x i32> + store <4 x float> %42, <4 x float>* undef, align 128 + %43 = shufflevector <1024 x float> %25, <1024 x float> undef, <4 x i32> + store <4 x float> %43, <4 x float>* undef, align 128 + %44 = shufflevector <1024 x float> %25, <1024 x float> undef, <4 x i32> + store <4 x float> %44, <4 x float>* undef, align 128 + %45 = shufflevector <1024 x float> %25, <1024 x float> undef, <4 x i32> + store <4 x float> %45, <4 x float>* undef, align 128 + %46 = shufflevector <1024 x float> %25, <1024 x float> undef, <4 x i32> + store <4 x float> %46, <4 x float>* undef, align 128 + %47 = shufflevector <1024 x float> undef, <1024 x float> undef, <4 x i32> + store <4 x float> %47, <4 x float>* undef, align 128 + %48 = shufflevector <1024 x float> undef, <1024 x float> undef, <4 x i32> + store <4 x float> %48, <4 x float>* undef, align 128 + %49 = shufflevector <1024 x float> undef, <1024 x float> undef, <4 x i32> + store <4 x float> %49, <4 x float>* undef, align 128 + %50 = shufflevector <1024 x float> undef, <1024 x float> undef, <4 x i32> + store <4 x float> %50, <4 x float>* undef, align 128 + %51 = shufflevector <1024 x float> undef, <1024 x float> undef, <4 x i32> + store <4 x float> %51, <4 x float>* undef, align 128 + %52 = shufflevector <1024 x float> undef, <1024 x float> undef, <4 x i32> + store <4 x float> %52, <4 x float>* undef, align 128 + ret void +} -- 2.7.4