From 314dbde12cd2ae2809cbba6de0504b034a289a40 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Sun, 19 Jun 2022 14:34:03 -0700 Subject: [PATCH] [DAGCombiner][ARM][RISCV] Teach ShrinkLoadReplaceStoreWithStore to use truncstore. The VT we want to shrink to may not be legal especially after type legalization. Fixes PR56110. Reviewed By: RKSimon Differential Revision: https://reviews.llvm.org/D128135 --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 19 +++++- .../test/CodeGen/ARM/illegal-bitfield-loadstore.ll | 68 +++++++++------------- llvm/test/CodeGen/RISCV/pr56110.ll | 21 +++++++ 3 files changed, 65 insertions(+), 43 deletions(-) create mode 100644 llvm/test/CodeGen/RISCV/pr56110.ll diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index f5ab0c2..ccc9ad4 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -17369,11 +17369,19 @@ ShrinkLoadReplaceStoreWithStore(const std::pair &MaskInfo, // Check that it is legal on the target to do this. It is legal if the new // VT we're shrinking to (i8/i16/i32) is legal or we're still before type - // legalization (and the target doesn't explicitly think this is a bad idea). + // legalization. If the source type is legal, but the store type isn't, see + // if we can use a truncating store. MVT VT = MVT::getIntegerVT(NumBytes * 8); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - if (!DC->isTypeLegal(VT)) + bool UseTruncStore; + if (DC->isTypeLegal(VT)) + UseTruncStore = false; + else if (TLI.isTypeLegal(IVal.getValueType()) && + TLI.isTruncStoreLegal(IVal.getValueType(), VT)) + UseTruncStore = true; + else return SDValue(); + // Check that the target doesn't think this is a bad idea. if (St->getMemOperand() && !TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT, *St->getMemOperand())) @@ -17401,10 +17409,15 @@ ShrinkLoadReplaceStoreWithStore(const std::pair &MaskInfo, Ptr = DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(StOffset), DL); } + ++OpsNarrowed; + if (UseTruncStore) + return DAG.getTruncStore(St->getChain(), SDLoc(St), IVal, Ptr, + St->getPointerInfo().getWithOffset(StOffset), + VT, St->getOriginalAlign()); + // Truncate down to the new size. IVal = DAG.getNode(ISD::TRUNCATE, SDLoc(IVal), VT, IVal); - ++OpsNarrowed; return DAG .getStore(St->getChain(), SDLoc(St), IVal, Ptr, St->getPointerInfo().getWithOffset(StOffset), diff --git a/llvm/test/CodeGen/ARM/illegal-bitfield-loadstore.ll b/llvm/test/CodeGen/ARM/illegal-bitfield-loadstore.ll index 2922e0e..160646b 100644 --- a/llvm/test/CodeGen/ARM/illegal-bitfield-loadstore.ll +++ b/llvm/test/CodeGen/ARM/illegal-bitfield-loadstore.ll @@ -90,19 +90,16 @@ define void @i56_or(i56* %a) { ; ; BE-LABEL: i56_or: ; BE: @ %bb.0: -; BE-NEXT: mov r1, r0 -; BE-NEXT: ldr r12, [r0] -; BE-NEXT: ldrh r2, [r1, #4]! -; BE-NEXT: ldrb r3, [r1, #2] +; BE-NEXT: ldr r1, [r0] +; BE-NEXT: strb r1, [r0, #3] +; BE-NEXT: ldrh r2, [r0, #4]! +; BE-NEXT: ldrb r3, [r0, #2] ; BE-NEXT: orr r2, r3, r2, lsl #8 -; BE-NEXT: orr r2, r2, r12, lsl #24 -; BE-NEXT: orr r2, r2, #384 -; BE-NEXT: strb r2, [r1, #2] -; BE-NEXT: lsr r3, r2, #8 -; BE-NEXT: strh r3, [r1] -; BE-NEXT: bic r1, r12, #255 -; BE-NEXT: orr r1, r1, r2, lsr #24 -; BE-NEXT: str r1, [r0] +; BE-NEXT: orr r1, r2, r1, lsl #24 +; BE-NEXT: orr r1, r1, #384 +; BE-NEXT: strb r1, [r0, #2] +; BE-NEXT: lsr r1, r1, #8 +; BE-NEXT: strh r1, [r0] ; BE-NEXT: mov pc, lr %aa = load i56, i56* %a %b = or i56 %aa, 384 @@ -121,20 +118,17 @@ define void @i56_and_or(i56* %a) { ; ; BE-LABEL: i56_and_or: ; BE: @ %bb.0: -; BE-NEXT: mov r1, r0 +; BE-NEXT: ldr r1, [r0] ; BE-NEXT: mov r2, #128 -; BE-NEXT: ldrh r12, [r1, #4]! -; BE-NEXT: ldrb r3, [r1, #2] -; BE-NEXT: strb r2, [r1, #2] +; BE-NEXT: strb r1, [r0, #3] +; BE-NEXT: ldrh r12, [r0, #4]! +; BE-NEXT: ldrb r3, [r0, #2] +; BE-NEXT: strb r2, [r0, #2] ; BE-NEXT: orr r2, r3, r12, lsl #8 -; BE-NEXT: ldr r12, [r0] -; BE-NEXT: orr r2, r2, r12, lsl #24 -; BE-NEXT: orr r2, r2, #384 -; BE-NEXT: lsr r3, r2, #8 -; BE-NEXT: strh r3, [r1] -; BE-NEXT: bic r1, r12, #255 -; BE-NEXT: orr r1, r1, r2, lsr #24 -; BE-NEXT: str r1, [r0] +; BE-NEXT: orr r1, r2, r1, lsl #24 +; BE-NEXT: orr r1, r1, #384 +; BE-NEXT: lsr r1, r1, #8 +; BE-NEXT: strh r1, [r0] ; BE-NEXT: mov pc, lr %b = load i56, i56* %a, align 1 @@ -155,22 +149,16 @@ define void @i56_insert_bit(i56* %a, i1 zeroext %bit) { ; ; BE-LABEL: i56_insert_bit: ; BE: @ %bb.0: -; BE-NEXT: .save {r11, lr} -; BE-NEXT: push {r11, lr} -; BE-NEXT: mov r2, r0 -; BE-NEXT: ldr lr, [r0] -; BE-NEXT: ldrh r12, [r2, #4]! -; BE-NEXT: ldrb r3, [r2, #2] -; BE-NEXT: orr r12, r3, r12, lsl #8 -; BE-NEXT: orr r3, r12, lr, lsl #24 -; BE-NEXT: bic r3, r3, #8192 -; BE-NEXT: orr r1, r3, r1, lsl #13 -; BE-NEXT: lsr r3, r1, #8 -; BE-NEXT: strh r3, [r2] -; BE-NEXT: bic r2, lr, #255 -; BE-NEXT: orr r1, r2, r1, lsr #24 -; BE-NEXT: str r1, [r0] -; BE-NEXT: pop {r11, lr} +; BE-NEXT: ldr r2, [r0] +; BE-NEXT: strb r2, [r0, #3] +; BE-NEXT: ldrh r12, [r0, #4]! +; BE-NEXT: ldrb r3, [r0, #2] +; BE-NEXT: orr r3, r3, r12, lsl #8 +; BE-NEXT: orr r2, r3, r2, lsl #24 +; BE-NEXT: bic r2, r2, #8192 +; BE-NEXT: orr r1, r2, r1, lsl #13 +; BE-NEXT: lsr r1, r1, #8 +; BE-NEXT: strh r1, [r0] ; BE-NEXT: mov pc, lr %extbit = zext i1 %bit to i56 %b = load i56, i56* %a, align 1 diff --git a/llvm/test/CodeGen/RISCV/pr56110.ll b/llvm/test/CodeGen/RISCV/pr56110.ll new file mode 100644 index 0000000..fa441f5 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/pr56110.ll @@ -0,0 +1,21 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=riscv32 | FileCheck %s +; RUN: llc < %s -mtriple=riscv32 -mattr=+unaligned-scalar-mem | FileCheck %s + +define void @foo_set(ptr nocapture noundef %a, i32 noundef %v) { +; CHECK-LABEL: foo_set: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: srli a2, a1, 8 +; CHECK-NEXT: sb a1, 3(a0) +; CHECK-NEXT: sb a2, 4(a0) +; CHECK-NEXT: ret +entry: + %bf.load = load i96, ptr %a, align 1 + %0 = and i32 %v, 65535 + %bf.value = zext i32 %0 to i96 + %bf.shl = shl nuw nsw i96 %bf.value, 24 + %bf.clear = and i96 %bf.load, -1099494850561 + %bf.set = or i96 %bf.clear, %bf.shl + store i96 %bf.set, ptr %a, align 1 + ret void +} -- 2.7.4