From f8cb97a0f1a4fed7b0347f127ffe33841cf02134 Mon Sep 17 00:00:00 2001 From: Krzysztof Parzyszek Date: Wed, 23 Nov 2022 12:56:16 -0800 Subject: [PATCH] [Hexagon] Fix deinterleaving after vmpyh --- llvm/lib/Target/Hexagon/HexagonVectorCombine.cpp | 2 +- llvm/test/CodeGen/Hexagon/autohvx/mulh.ll | 105 +++++++++++++++++++---- llvm/test/CodeGen/Hexagon/autohvx/qmul.ll | 25 ++++-- 3 files changed, 111 insertions(+), 21 deletions(-) diff --git a/llvm/lib/Target/Hexagon/HexagonVectorCombine.cpp b/llvm/lib/Target/Hexagon/HexagonVectorCombine.cpp index 144555e..fe9fd14 100644 --- a/llvm/lib/Target/Hexagon/HexagonVectorCombine.cpp +++ b/llvm/lib/Target/Hexagon/HexagonVectorCombine.cpp @@ -1586,7 +1586,7 @@ auto HvxIdioms::createMul16(IRBuilderBase &Builder, SValue X, SValue Y) const Value *P = HVC.createHvxIntrinsic(Builder, V6_vmpyh, HvxP32Ty, {Y.Val, X.Val}); // Deinterleave - return HVC.vdeal(Builder, HVC.sublo(Builder, P), HVC.subhi(Builder, P)); + return HVC.vshuff(Builder, HVC.sublo(Builder, P), HVC.subhi(Builder, P)); } auto HvxIdioms::createMulH16(IRBuilderBase &Builder, SValue X, SValue Y) const diff --git a/llvm/test/CodeGen/Hexagon/autohvx/mulh.ll b/llvm/test/CodeGen/Hexagon/autohvx/mulh.ll index a4a418f..5009adf8 100644 --- a/llvm/test/CodeGen/Hexagon/autohvx/mulh.ll +++ b/llvm/test/CodeGen/Hexagon/autohvx/mulh.ll @@ -10,13 +10,28 @@ define <64 x i16> @mulhs16(<64 x i16> %a0, <64 x i16> %a1) #0 { ; V60-NEXT: v1:0.w = vmpy(v1.h,v0.h) ; V60-NEXT: } ; V60-NEXT: { -; V60-NEXT: r7 = #-4 +; V60-NEXT: r7:6 = combine(#64,#68) ; V60-NEXT: } ; V60-NEXT: { -; V60-NEXT: v1:0 = vdeal(v1,v0,r7) +; V60-NEXT: r5 = #120 ; V60-NEXT: } ; V60-NEXT: { -; V60-NEXT: v0.h = vpacko(v1.w,v0.w) +; V60-NEXT: v1:0 = vshuff(v1,v0,r7) +; V60-NEXT: } +; V60-NEXT: { +; V60-NEXT: v3:2 = vdeal(v0,v0,r6) +; V60-NEXT: } +; V60-NEXT: { +; V60-NEXT: v31:30 = vdeal(v0,v1,r6) +; V60-NEXT: } +; V60-NEXT: { +; V60-NEXT: v3:2 = vshuff(v3,v2,r5) +; V60-NEXT: } +; V60-NEXT: { +; V60-NEXT: v1:0 = vshuff(v31,v30,r5) +; V60-NEXT: } +; V60-NEXT: { +; V60-NEXT: v0.h = vpacko(v0.w,v2.w) ; V60-NEXT: } ; V60-NEXT: { ; V60-NEXT: jumpr r31 @@ -28,13 +43,28 @@ define <64 x i16> @mulhs16(<64 x i16> %a0, <64 x i16> %a1) #0 { ; V65-NEXT: v1:0.w = vmpy(v1.h,v0.h) ; V65-NEXT: } ; V65-NEXT: { -; V65-NEXT: r7 = #-4 +; V65-NEXT: r7:6 = combine(#64,#68) ; V65-NEXT: } ; V65-NEXT: { -; V65-NEXT: v1:0 = vdeal(v1,v0,r7) +; V65-NEXT: r5 = #120 ; V65-NEXT: } ; V65-NEXT: { -; V65-NEXT: v0.h = vpacko(v1.w,v0.w) +; V65-NEXT: v1:0 = vshuff(v1,v0,r7) +; V65-NEXT: } +; V65-NEXT: { +; V65-NEXT: v3:2 = vdeal(v0,v0,r6) +; V65-NEXT: } +; V65-NEXT: { +; V65-NEXT: v31:30 = vdeal(v0,v1,r6) +; V65-NEXT: } +; V65-NEXT: { +; V65-NEXT: v3:2 = vshuff(v3,v2,r5) +; V65-NEXT: } +; V65-NEXT: { +; V65-NEXT: v1:0 = vshuff(v31,v30,r5) +; V65-NEXT: } +; V65-NEXT: { +; V65-NEXT: v0.h = vpacko(v0.w,v2.w) ; V65-NEXT: } ; V65-NEXT: { ; V65-NEXT: jumpr r31 @@ -46,13 +76,28 @@ define <64 x i16> @mulhs16(<64 x i16> %a0, <64 x i16> %a1) #0 { ; V69-NEXT: v1:0.w = vmpy(v1.h,v0.h) ; V69-NEXT: } ; V69-NEXT: { -; V69-NEXT: r7 = #-4 +; V69-NEXT: r7:6 = combine(#64,#68) +; V69-NEXT: } +; V69-NEXT: { +; V69-NEXT: r5 = #120 +; V69-NEXT: } +; V69-NEXT: { +; V69-NEXT: v1:0 = vshuff(v1,v0,r7) +; V69-NEXT: } +; V69-NEXT: { +; V69-NEXT: v3:2 = vdeal(v0,v0,r6) +; V69-NEXT: } +; V69-NEXT: { +; V69-NEXT: v31:30 = vdeal(v0,v1,r6) ; V69-NEXT: } ; V69-NEXT: { -; V69-NEXT: v1:0 = vdeal(v1,v0,r7) +; V69-NEXT: v3:2 = vshuff(v3,v2,r5) ; V69-NEXT: } ; V69-NEXT: { -; V69-NEXT: v0.h = vpacko(v1.w,v0.w) +; V69-NEXT: v1:0 = vshuff(v31,v30,r5) +; V69-NEXT: } +; V69-NEXT: { +; V69-NEXT: v0.h = vpacko(v0.w,v2.w) ; V69-NEXT: } ; V69-NEXT: { ; V69-NEXT: jumpr r31 @@ -72,13 +117,28 @@ define <64 x i16> @mulhu16(<64 x i16> %a0, <64 x i16> %a1) #0 { ; V60-NEXT: v1:0.uw = vmpy(v1.uh,v0.uh) ; V60-NEXT: } ; V60-NEXT: { -; V60-NEXT: r7 = #-4 +; V60-NEXT: r7:6 = combine(#64,#68) +; V60-NEXT: } +; V60-NEXT: { +; V60-NEXT: r5 = #120 +; V60-NEXT: } +; V60-NEXT: { +; V60-NEXT: v1:0 = vshuff(v1,v0,r7) +; V60-NEXT: } +; V60-NEXT: { +; V60-NEXT: v3:2 = vdeal(v0,v0,r6) +; V60-NEXT: } +; V60-NEXT: { +; V60-NEXT: v31:30 = vdeal(v0,v1,r6) ; V60-NEXT: } ; V60-NEXT: { -; V60-NEXT: v1:0 = vdeal(v1,v0,r7) +; V60-NEXT: v3:2 = vshuff(v3,v2,r5) ; V60-NEXT: } ; V60-NEXT: { -; V60-NEXT: v0.h = vpacko(v1.w,v0.w) +; V60-NEXT: v1:0 = vshuff(v31,v30,r5) +; V60-NEXT: } +; V60-NEXT: { +; V60-NEXT: v0.h = vpacko(v0.w,v2.w) ; V60-NEXT: } ; V60-NEXT: { ; V60-NEXT: jumpr r31 @@ -90,13 +150,28 @@ define <64 x i16> @mulhu16(<64 x i16> %a0, <64 x i16> %a1) #0 { ; V65-NEXT: v1:0.uw = vmpy(v1.uh,v0.uh) ; V65-NEXT: } ; V65-NEXT: { -; V65-NEXT: r7 = #-4 +; V65-NEXT: r7:6 = combine(#64,#68) +; V65-NEXT: } +; V65-NEXT: { +; V65-NEXT: r5 = #120 +; V65-NEXT: } +; V65-NEXT: { +; V65-NEXT: v1:0 = vshuff(v1,v0,r7) +; V65-NEXT: } +; V65-NEXT: { +; V65-NEXT: v3:2 = vdeal(v0,v0,r6) +; V65-NEXT: } +; V65-NEXT: { +; V65-NEXT: v31:30 = vdeal(v0,v1,r6) +; V65-NEXT: } +; V65-NEXT: { +; V65-NEXT: v3:2 = vshuff(v3,v2,r5) ; V65-NEXT: } ; V65-NEXT: { -; V65-NEXT: v1:0 = vdeal(v1,v0,r7) +; V65-NEXT: v1:0 = vshuff(v31,v30,r5) ; V65-NEXT: } ; V65-NEXT: { -; V65-NEXT: v0.h = vpacko(v1.w,v0.w) +; V65-NEXT: v0.h = vpacko(v0.w,v2.w) ; V65-NEXT: } ; V65-NEXT: { ; V65-NEXT: jumpr r31 diff --git a/llvm/test/CodeGen/Hexagon/autohvx/qmul.ll b/llvm/test/CodeGen/Hexagon/autohvx/qmul.ll index 6760875..13634ed4 100644 --- a/llvm/test/CodeGen/Hexagon/autohvx/qmul.ll +++ b/llvm/test/CodeGen/Hexagon/autohvx/qmul.ll @@ -75,7 +75,10 @@ define void @f2(ptr %a0, ptr %a1, ptr %a2) #0 { ; CHECK-NEXT: v0 = vmem(r1+#0) ; CHECK-NEXT: } ; CHECK-NEXT: { -; CHECK-NEXT: r7 = #-4 +; CHECK-NEXT: r7 = #64 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r5:4 = combine(#68,#120) ; CHECK-NEXT: } ; CHECK-NEXT: { ; CHECK-NEXT: r3 = #15 @@ -87,16 +90,28 @@ define void @f2(ptr %a0, ptr %a1, ptr %a2) #0 { ; CHECK-NEXT: v1:0.w = vmpy(v0.h,v1.h) ; CHECK-NEXT: } ; CHECK-NEXT: { -; CHECK-NEXT: v1:0 = vdeal(v1,v0,r7) +; CHECK-NEXT: v1:0 = vshuff(v1,v0,r7) ; CHECK-NEXT: } ; CHECK-NEXT: { -; CHECK-NEXT: v0.uw = vlsr(v0.uw,r3) +; CHECK-NEXT: v3:2 = vdeal(v0,v0,r5) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: v31:30 = vdeal(v0,v1,r5) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: v3:2 = vshuff(v3,v2,r4) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: v1:0 = vshuff(v31,v30,r4) ; CHECK-NEXT: } ; CHECK-NEXT: { -; CHECK-NEXT: v1.uw = vlsr(v1.uw,r3) +; CHECK-NEXT: v1.uw = vlsr(v2.uw,r3) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: v0.uw = vlsr(v0.uw,r3) ; CHECK-NEXT: } ; CHECK-NEXT: { -; CHECK-NEXT: v0.h = vpacke(v1.w,v0.w) +; CHECK-NEXT: v0.h = vpacke(v0.w,v1.w) ; CHECK-NEXT: } ; CHECK-NEXT: { ; CHECK-NEXT: vmem(r2+#0) = v0 -- 2.7.4