From 83f5fa519ec5efcad200e4d9eb425e747d58088b Mon Sep 17 00:00:00 2001 From: David Sherwood Date: Fri, 16 Apr 2021 11:39:44 +0100 Subject: [PATCH] [CodeGen] Improve code generation for clamping of constant indices with scalable vectors When trying to clamp a constant index into a scalable vector we can test if the index is less than the minimum number of elements in the vector. If so, we can simply return the index because we know it is guaranteed to fit inside the vector. Differential Revision: https://reviews.llvm.org/D100639 --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 3 + llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp | 6 + .../CodeGen/AArch64/named-vector-shuffles-sve.ll | 165 ++++++--------------- llvm/test/CodeGen/AArch64/split-vector-insert.ll | 74 +++++---- llvm/test/CodeGen/AArch64/sve-extract-vector.ll | 31 +--- llvm/test/CodeGen/AArch64/sve-insert-vector.ll | 62 +------- .../RISCV/rvv/fixed-vectors-insert-subvector.ll | 20 +-- 7 files changed, 104 insertions(+), 257 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index c7d619d..0f53d8d 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -23040,6 +23040,9 @@ bool DAGCombiner::parallelizeChainedStores(StoreSDNode *St) { Intervals.insert(0, (St->getMemoryVT().getSizeInBits() + 7) / 8, Unit); while (StoreSDNode *Chain = dyn_cast(STChain->getChain())) { + if (Chain->getMemoryVT().isScalableVector()) + return false; + // If the chain has more than one use, then we can't reorder the mem ops. if (!SDValue(Chain, 0)->hasOneUse()) break; diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 870c4bf..e28da9c 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -7715,6 +7715,12 @@ static SDValue clampDynamicVectorIndex(SelectionDAG &DAG, EVT IdxVT = Idx.getValueType(); unsigned NElts = VecVT.getVectorMinNumElements(); if (VecVT.isScalableVector()) { + // If this is a constant index and we know the value is less than the + // minimum number of elements then it's safe to return Idx. + if (auto *IdxCst = dyn_cast(Idx)) + if (IdxCst->getZExtValue() < NElts) + return Idx; + SDValue VS = DAG.getVScale(dl, IdxVT, APInt(IdxVT.getFixedSizeInBits(), NElts)); diff --git a/llvm/test/CodeGen/AArch64/named-vector-shuffles-sve.ll b/llvm/test/CodeGen/AArch64/named-vector-shuffles-sve.ll index a994122..5e1b4b5 100644 --- a/llvm/test/CodeGen/AArch64/named-vector-shuffles-sve.ll +++ b/llvm/test/CodeGen/AArch64/named-vector-shuffles-sve.ll @@ -12,15 +12,11 @@ define @splice_nxv16i8_first_idx( %a, @splice_nxv16i8_last_idx( %a, @splice_nxv8i16_first_idx( %a, @splice_nxv8i16_last_idx( %a, @splice_nxv4i32_first_idx( %a, @splice_nxv4i32_last_idx( %a, @splice_nxv2i64_first_idx( %a, @splice_nxv2i64_last_idx( %a, @splice_nxv8f16_first_idx( %a, @splice_nxv8f16_last_idx( %a, @splice_nxv4f32_first_idx( %a, < ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-2 -; CHECK-NEXT: cntw x9 -; CHECK-NEXT: sub x9, x9, #1 // =1 ; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: mov x8, sp -; CHECK-NEXT: cmp x9, #0 // =0 ; CHECK-NEXT: st1w { z0.s }, p0, [sp] ; CHECK-NEXT: st1w { z1.s }, p0, [x8, #1, mul vl] -; CHECK-NEXT: csel x9, x9, xzr, lo -; CHECK-NEXT: ld1w { z0.s }, p0/z, [x8, x9, lsl #2] +; CHECK-NEXT: ld1w { z0.s }, p0/z, [sp] ; CHECK-NEXT: addvl sp, sp, #2 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret @@ -362,16 +319,12 @@ define @splice_nxv4f32_last_idx( %a, @splice_nxv2f64_first_idx( %a, ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-2 -; CHECK-NEXT: cntd x9 -; CHECK-NEXT: sub x9, x9, #1 // =1 ; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: mov x8, sp -; CHECK-NEXT: cmp x9, #0 // =0 ; CHECK-NEXT: st1d { z0.d }, p0, [sp] ; CHECK-NEXT: st1d { z1.d }, p0, [x8, #1, mul vl] -; CHECK-NEXT: csel x9, x9, xzr, lo -; CHECK-NEXT: ld1d { z0.d }, p0/z, [x8, x9, lsl #3] +; CHECK-NEXT: ld1d { z0.d }, p0/z, [sp] ; CHECK-NEXT: addvl sp, sp, #2 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret @@ -428,15 +377,12 @@ define @splice_nxv2f64_last_idx( %a, ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-2 -; CHECK-NEXT: cntd x9 -; CHECK-NEXT: sub x9, x9, #1 // =1 ; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: mov x8, sp -; CHECK-NEXT: cmp x9, #1 // =1 ; CHECK-NEXT: st1d { z0.d }, p0, [sp] ; CHECK-NEXT: st1d { z1.d }, p0, [x8, #1, mul vl] -; CHECK-NEXT: csinc x9, x9, xzr, lo -; CHECK-NEXT: ld1d { z0.d }, p0/z, [x8, x9, lsl #3] +; CHECK-NEXT: orr x8, x8, #0x8 +; CHECK-NEXT: ld1d { z0.d }, p0/z, [x8] ; CHECK-NEXT: addvl sp, sp, #2 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret @@ -473,17 +419,14 @@ define @splice_nxv2i1_idx( %a, @splice_nxv4i1_idx( %a, @splice_nxv8i1_idx( %a, @splice_nxv16i1_idx( %a, @splice_nxv2i8_idx( %a, @splice_nxv8i32_idx( %a, @test_nxv2i64_v8i64( %a, <8 x i64> % ; CHECK-NEXT: addvl sp, sp, #-4 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 32 * VG ; CHECK-NEXT: .cfi_offset w29, -16 -; CHECK-NEXT: cntd x8 -; CHECK-NEXT: sub x8, x8, #1 // =1 -; CHECK-NEXT: cmp x8, #0 // =0 -; CHECK-NEXT: csel x10, x8, xzr, lo ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: mov x9, sp -; CHECK-NEXT: lsl x10, x10, #3 +; CHECK-NEXT: cntd x9 ; CHECK-NEXT: st1d { z0.d }, p0, [sp] -; CHECK-NEXT: str q1, [x9, x10] +; CHECK-NEXT: str q1, [sp] +; CHECK-NEXT: sub x9, x9, #1 // =1 ; CHECK-NEXT: ld1d { z0.d }, p0/z, [sp] -; CHECK-NEXT: mov w9, #2 -; CHECK-NEXT: cmp x8, #2 // =2 -; CHECK-NEXT: csel x9, x8, x9, lo +; CHECK-NEXT: mov w8, #2 +; CHECK-NEXT: cmp x9, #2 // =2 +; CHECK-NEXT: csel x8, x9, x8, lo ; CHECK-NEXT: addvl x10, sp, #1 -; CHECK-NEXT: lsl x9, x9, #3 +; CHECK-NEXT: lsl x8, x8, #3 ; CHECK-NEXT: st1d { z0.d }, p0, [sp, #1, mul vl] -; CHECK-NEXT: str q2, [x10, x9] +; CHECK-NEXT: str q2, [x10, x8] ; CHECK-NEXT: ld1d { z0.d }, p0/z, [sp, #1, mul vl] -; CHECK-NEXT: mov w9, #4 -; CHECK-NEXT: cmp x8, #4 // =4 -; CHECK-NEXT: csel x9, x8, x9, lo +; CHECK-NEXT: mov w8, #4 +; CHECK-NEXT: cmp x9, #4 // =4 +; CHECK-NEXT: csel x8, x9, x8, lo ; CHECK-NEXT: addvl x10, sp, #2 -; CHECK-NEXT: lsl x9, x9, #3 +; CHECK-NEXT: lsl x8, x8, #3 ; CHECK-NEXT: st1d { z0.d }, p0, [sp, #2, mul vl] -; CHECK-NEXT: str q3, [x10, x9] +; CHECK-NEXT: str q3, [x10, x8] ; CHECK-NEXT: ld1d { z0.d }, p0/z, [sp, #2, mul vl] -; CHECK-NEXT: mov w9, #6 -; CHECK-NEXT: cmp x8, #6 // =6 -; CHECK-NEXT: csel x8, x8, x9, lo +; CHECK-NEXT: mov w8, #6 +; CHECK-NEXT: cmp x9, #6 // =6 +; CHECK-NEXT: csel x8, x9, x8, lo ; CHECK-NEXT: addvl x10, sp, #3 ; CHECK-NEXT: lsl x8, x8, #3 ; CHECK-NEXT: st1d { z0.d }, p0, [sp, #3, mul vl] @@ -57,6 +53,7 @@ define @test_nxv2i64_v8i64( %a, <8 x i64> % ; CHECK-NEXT: addvl sp, sp, #4 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret + %r = call @llvm.experimental.vector.insert.nxv2i64.v8i64( %a, <8 x i64> %b, i64 0) ret %r } @@ -73,35 +70,31 @@ define @test_nxv2f64_v8f64( %a, <8 x ; CHECK-NEXT: addvl sp, sp, #-4 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 32 * VG ; CHECK-NEXT: .cfi_offset w29, -16 -; CHECK-NEXT: cntd x8 -; CHECK-NEXT: sub x8, x8, #1 // =1 -; CHECK-NEXT: cmp x8, #0 // =0 -; CHECK-NEXT: csel x10, x8, xzr, lo ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: mov x9, sp -; CHECK-NEXT: lsl x10, x10, #3 +; CHECK-NEXT: cntd x9 ; CHECK-NEXT: st1d { z0.d }, p0, [sp] -; CHECK-NEXT: str q1, [x9, x10] +; CHECK-NEXT: str q1, [sp] +; CHECK-NEXT: sub x9, x9, #1 // =1 ; CHECK-NEXT: ld1d { z0.d }, p0/z, [sp] -; CHECK-NEXT: mov w9, #2 -; CHECK-NEXT: cmp x8, #2 // =2 -; CHECK-NEXT: csel x9, x8, x9, lo +; CHECK-NEXT: mov w8, #2 +; CHECK-NEXT: cmp x9, #2 // =2 +; CHECK-NEXT: csel x8, x9, x8, lo ; CHECK-NEXT: addvl x10, sp, #1 -; CHECK-NEXT: lsl x9, x9, #3 +; CHECK-NEXT: lsl x8, x8, #3 ; CHECK-NEXT: st1d { z0.d }, p0, [sp, #1, mul vl] -; CHECK-NEXT: str q2, [x10, x9] +; CHECK-NEXT: str q2, [x10, x8] ; CHECK-NEXT: ld1d { z0.d }, p0/z, [sp, #1, mul vl] -; CHECK-NEXT: mov w9, #4 -; CHECK-NEXT: cmp x8, #4 // =4 -; CHECK-NEXT: csel x9, x8, x9, lo +; CHECK-NEXT: mov w8, #4 +; CHECK-NEXT: cmp x9, #4 // =4 +; CHECK-NEXT: csel x8, x9, x8, lo ; CHECK-NEXT: addvl x10, sp, #2 -; CHECK-NEXT: lsl x9, x9, #3 +; CHECK-NEXT: lsl x8, x8, #3 ; CHECK-NEXT: st1d { z0.d }, p0, [sp, #2, mul vl] -; CHECK-NEXT: str q3, [x10, x9] +; CHECK-NEXT: str q3, [x10, x8] ; CHECK-NEXT: ld1d { z0.d }, p0/z, [sp, #2, mul vl] -; CHECK-NEXT: mov w9, #6 -; CHECK-NEXT: cmp x8, #6 // =6 -; CHECK-NEXT: csel x8, x8, x9, lo +; CHECK-NEXT: mov w8, #6 +; CHECK-NEXT: cmp x9, #6 // =6 +; CHECK-NEXT: csel x8, x9, x8, lo ; CHECK-NEXT: addvl x10, sp, #3 ; CHECK-NEXT: lsl x8, x8, #3 ; CHECK-NEXT: st1d { z0.d }, p0, [sp, #3, mul vl] @@ -110,6 +103,7 @@ define @test_nxv2f64_v8f64( %a, <8 x ; CHECK-NEXT: addvl sp, sp, #4 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret + %r = call @llvm.experimental.vector.insert.nxv2f64.v8f64( %a, <8 x double> %b, i64 0) ret %r } diff --git a/llvm/test/CodeGen/AArch64/sve-extract-vector.ll b/llvm/test/CodeGen/AArch64/sve-extract-vector.ll index 62832a8..5aa7eab 100644 --- a/llvm/test/CodeGen/AArch64/sve-extract-vector.ll +++ b/llvm/test/CodeGen/AArch64/sve-extract-vector.ll @@ -17,15 +17,9 @@ define <2 x i64> @extract_v2i64_nxv2i64_idx1( %vec) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: cntd x8 -; CHECK-NEXT: sub x8, x8, #1 // =1 -; CHECK-NEXT: cmp x8, #1 // =1 ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: csinc x8, x8, xzr, lo ; CHECK-NEXT: st1d { z0.d }, p0, [sp] -; CHECK-NEXT: lsl x8, x8, #3 -; CHECK-NEXT: mov x9, sp -; CHECK-NEXT: ldr q0, [x9, x8] +; CHECK-NEXT: ldur q0, [sp, #8] ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret @@ -49,15 +43,9 @@ define <4 x i32> @extract_v4i32_nxv4i32_idx1( %vec) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: cntw x8 -; CHECK-NEXT: sub x8, x8, #1 // =1 -; CHECK-NEXT: cmp x8, #1 // =1 ; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: csinc x8, x8, xzr, lo ; CHECK-NEXT: st1w { z0.s }, p0, [sp] -; CHECK-NEXT: lsl x8, x8, #2 -; CHECK-NEXT: mov x9, sp -; CHECK-NEXT: ldr q0, [x9, x8] +; CHECK-NEXT: ldur q0, [sp, #4] ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret @@ -81,15 +69,9 @@ define <8 x i16> @extract_v8i16_nxv8i16_idx1( %vec) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: cnth x8 -; CHECK-NEXT: sub x8, x8, #1 // =1 -; CHECK-NEXT: cmp x8, #1 // =1 ; CHECK-NEXT: ptrue p0.h -; CHECK-NEXT: csinc x8, x8, xzr, lo ; CHECK-NEXT: st1h { z0.h }, p0, [sp] -; CHECK-NEXT: lsl x8, x8, #1 -; CHECK-NEXT: mov x9, sp -; CHECK-NEXT: ldr q0, [x9, x8] +; CHECK-NEXT: ldur q0, [sp, #2] ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret @@ -113,14 +95,9 @@ define <16 x i8> @extract_v16i8_nxv16i8_idx1( %vec) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: rdvl x8, #1 -; CHECK-NEXT: sub x8, x8, #1 // =1 ; CHECK-NEXT: ptrue p0.b -; CHECK-NEXT: cmp x8, #1 // =1 ; CHECK-NEXT: st1b { z0.b }, p0, [sp] -; CHECK-NEXT: csinc x8, x8, xzr, lo -; CHECK-NEXT: mov x9, sp -; CHECK-NEXT: ldr q0, [x9, x8] +; CHECK-NEXT: ldur q0, [sp, #1] ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/sve-insert-vector.ll b/llvm/test/CodeGen/AArch64/sve-insert-vector.ll index 3950a66..dd4d646 100644 --- a/llvm/test/CodeGen/AArch64/sve-insert-vector.ll +++ b/llvm/test/CodeGen/AArch64/sve-insert-vector.ll @@ -6,15 +6,9 @@ define @insert_v2i64_nxv2i64( %vec, <2 x i6 ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: cntd x8 -; CHECK-NEXT: sub x8, x8, #1 // =1 -; CHECK-NEXT: cmp x8, #0 // =0 -; CHECK-NEXT: csel x8, x8, xzr, lo ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: lsl x8, x8, #3 -; CHECK-NEXT: mov x9, sp ; CHECK-NEXT: st1d { z0.d }, p0, [sp] -; CHECK-NEXT: str q1, [x9, x8] +; CHECK-NEXT: str q1, [sp] ; CHECK-NEXT: ld1d { z0.d }, p0/z, [sp] ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload @@ -28,15 +22,9 @@ define @insert_v2i64_nxv2i64_idx1( %vec, <2 ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: cntd x8 -; CHECK-NEXT: sub x8, x8, #1 // =1 -; CHECK-NEXT: cmp x8, #1 // =1 -; CHECK-NEXT: csinc x8, x8, xzr, lo ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: lsl x8, x8, #3 -; CHECK-NEXT: mov x9, sp ; CHECK-NEXT: st1d { z0.d }, p0, [sp] -; CHECK-NEXT: str q1, [x9, x8] +; CHECK-NEXT: stur q1, [sp, #8] ; CHECK-NEXT: ld1d { z0.d }, p0/z, [sp] ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload @@ -50,15 +38,9 @@ define @insert_v4i32_nxv4i32( %vec, <4 x i3 ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: cntw x8 -; CHECK-NEXT: sub x8, x8, #1 // =1 -; CHECK-NEXT: cmp x8, #0 // =0 -; CHECK-NEXT: csel x8, x8, xzr, lo ; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: lsl x8, x8, #2 -; CHECK-NEXT: mov x9, sp ; CHECK-NEXT: st1w { z0.s }, p0, [sp] -; CHECK-NEXT: str q1, [x9, x8] +; CHECK-NEXT: str q1, [sp] ; CHECK-NEXT: ld1w { z0.s }, p0/z, [sp] ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload @@ -72,15 +54,9 @@ define @insert_v4i32_nxv4i32_idx1( %vec, <4 ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: cntw x8 -; CHECK-NEXT: sub x8, x8, #1 // =1 -; CHECK-NEXT: cmp x8, #1 // =1 -; CHECK-NEXT: csinc x8, x8, xzr, lo ; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: lsl x8, x8, #2 -; CHECK-NEXT: mov x9, sp ; CHECK-NEXT: st1w { z0.s }, p0, [sp] -; CHECK-NEXT: str q1, [x9, x8] +; CHECK-NEXT: stur q1, [sp, #4] ; CHECK-NEXT: ld1w { z0.s }, p0/z, [sp] ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload @@ -94,15 +70,9 @@ define @insert_v8i16_nxv8i16( %vec, <8 x i1 ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: cnth x8 -; CHECK-NEXT: sub x8, x8, #1 // =1 -; CHECK-NEXT: cmp x8, #0 // =0 -; CHECK-NEXT: csel x8, x8, xzr, lo ; CHECK-NEXT: ptrue p0.h -; CHECK-NEXT: lsl x8, x8, #1 -; CHECK-NEXT: mov x9, sp ; CHECK-NEXT: st1h { z0.h }, p0, [sp] -; CHECK-NEXT: str q1, [x9, x8] +; CHECK-NEXT: str q1, [sp] ; CHECK-NEXT: ld1h { z0.h }, p0/z, [sp] ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload @@ -116,15 +86,9 @@ define @insert_v8i16_nxv8i16_idx1( %vec, <8 ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: cnth x8 -; CHECK-NEXT: sub x8, x8, #1 // =1 -; CHECK-NEXT: cmp x8, #1 // =1 -; CHECK-NEXT: csinc x8, x8, xzr, lo ; CHECK-NEXT: ptrue p0.h -; CHECK-NEXT: lsl x8, x8, #1 -; CHECK-NEXT: mov x9, sp ; CHECK-NEXT: st1h { z0.h }, p0, [sp] -; CHECK-NEXT: str q1, [x9, x8] +; CHECK-NEXT: stur q1, [sp, #2] ; CHECK-NEXT: ld1h { z0.h }, p0/z, [sp] ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload @@ -138,14 +102,9 @@ define @insert_v16i8_nxv16i8( %vec, <16 x i ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: rdvl x8, #1 -; CHECK-NEXT: sub x8, x8, #1 // =1 -; CHECK-NEXT: cmp x8, #0 // =0 ; CHECK-NEXT: ptrue p0.b -; CHECK-NEXT: csel x8, x8, xzr, lo -; CHECK-NEXT: mov x9, sp ; CHECK-NEXT: st1b { z0.b }, p0, [sp] -; CHECK-NEXT: str q1, [x9, x8] +; CHECK-NEXT: str q1, [sp] ; CHECK-NEXT: ld1b { z0.b }, p0/z, [sp] ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload @@ -159,14 +118,9 @@ define @insert_v16i8_nxv16i8_idx1( %vec, <1 ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: rdvl x8, #1 -; CHECK-NEXT: sub x8, x8, #1 // =1 -; CHECK-NEXT: cmp x8, #1 // =1 ; CHECK-NEXT: ptrue p0.b -; CHECK-NEXT: csinc x8, x8, xzr, lo -; CHECK-NEXT: mov x9, sp ; CHECK-NEXT: st1b { z0.b }, p0, [sp] -; CHECK-NEXT: str q1, [x9, x8] +; CHECK-NEXT: stur q1, [sp, #1] ; CHECK-NEXT: ld1b { z0.b }, p0/z, [sp] ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-subvector.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-subvector.ll index 7978fb7..f5606c4 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-subvector.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-subvector.ll @@ -615,22 +615,12 @@ define void @insert_v2i64_nxv16i64_hi(<2 x i64>* %psv, * %out ; CHECK-NEXT: sub sp, sp, a2 ; CHECK-NEXT: vsetivli a2, 2, e64,m1,ta,mu ; CHECK-NEXT: vle64.v v25, (a0) +; CHECK-NEXT: addi a0, sp, 80 +; CHECK-NEXT: vse64.v v25, (a0) ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: srli a0, a0, 3 -; CHECK-NEXT: slli a2, a0, 4 -; CHECK-NEXT: addi a2, a2, -1 -; CHECK-NEXT: addi a3, zero, 8 -; CHECK-NEXT: bltu a2, a3, .LBB29_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: addi a2, zero, 8 -; CHECK-NEXT: .LBB29_2: -; CHECK-NEXT: slli a2, a2, 3 -; CHECK-NEXT: addi a3, sp, 16 -; CHECK-NEXT: add a2, a3, a2 -; CHECK-NEXT: vsetivli a4, 2, e64,m1,ta,mu -; CHECK-NEXT: vse64.v v25, (a2) -; CHECK-NEXT: slli a0, a0, 6 -; CHECK-NEXT: add a2, a3, a0 +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: add a2, a2, a0 ; CHECK-NEXT: vl8re64.v v8, (a2) ; CHECK-NEXT: addi a2, sp, 16 ; CHECK-NEXT: vl8re64.v v16, (a2) -- 2.7.4