if (LoadN->getNumValues() > 2)
return false;
+ // Only allow byte offsets.
+ if (ShAmt % 8)
+ return false;
+
+ // Ensure that this isn't going to produce an unsupported unaligned access.
+ if (ShAmt && !TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(),
+ ExtVT, LoadN->getAddressSpace(),
+ ShAmt / 8))
+ return false;
+
+
// If the load that we're shrinking is an extload and we're not just
// discarding the extension we can't simply shrink the load. Bail.
// TODO: It would be possible to merge the extensions in some cases.
unsigned ShAmt = 0;
if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) {
- if (ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
- ShAmt = N01->getZExtValue();
+ SDValue SRL = N0;
+ if (auto *ConstShift = dyn_cast<ConstantSDNode>(SRL.getOperand(1))) {
+ ShAmt = ConstShift->getZExtValue();
unsigned EVTBits = ExtVT.getSizeInBits();
// Is the shift amount a multiple of size of VT?
if ((ShAmt & (EVTBits-1)) == 0) {
// At this point, we must have a load or else we can't do the transform.
if (!isa<LoadSDNode>(N0)) return SDValue();
+ auto *LN0 = cast<LoadSDNode>(N0);
+
// Because a SRL must be assumed to *need* to zero-extend the high bits
// (as opposed to anyext the high bits), we can't combine the zextload
// lowering of SRL and an sextload.
- if (cast<LoadSDNode>(N0)->getExtensionType() == ISD::SEXTLOAD)
+ if (LN0->getExtensionType() == ISD::SEXTLOAD)
return SDValue();
// If the shift amount is larger than the input type then we're not
// accessing any of the loaded bytes. If the load was a zextload/extload
// then the result of the shift+trunc is zero/undef (handled elsewhere).
- if (ShAmt >= cast<LoadSDNode>(N0)->getMemoryVT().getSizeInBits())
+ if (ShAmt >= LN0->getMemoryVT().getSizeInBits())
return SDValue();
+
+ // If the SRL is only used by a masking AND, we may be able to adjust
+ // the ExtVT to make the AND redundant.
+ SDNode *Mask = *(SRL->use_begin());
+ if (Mask->getOpcode() == ISD::AND &&
+ isa<ConstantSDNode>(Mask->getOperand(1))) {
+ const APInt &ShiftMask =
+ cast<ConstantSDNode>(Mask->getOperand(1))->getAPIntValue();
+ if (ShiftMask.isMask()) {
+ EVT MaskedVT = EVT::getIntegerVT(*DAG.getContext(),
+ ShiftMask.countTrailingOnes());
+ // Recompute the type.
+ if (TLI.isLoadExtLegal(ExtType, N0.getValueType(), MaskedVT))
+ ExtVT = MaskedVT;
+ }
+ }
}
}
ret i32 %conv
}
-; CHECK-LABEL: test_shift8_mask8
+; CHECK-LABEL: test_shift7_mask8
; CHECK-BE: ldr r1, [r0]
; CHECK-COMMON: ldr r1, [r0]
-; CHECK-COMMON: ubfx r1, r1, #8, #8
+; CHECK-COMMON: ubfx r1, r1, #7, #8
+; CHECK-COMMON: str r1, [r0]
+define arm_aapcscc void @test_shift7_mask8(i32* nocapture %p) {
+entry:
+ %0 = load i32, i32* %p, align 4
+ %shl = lshr i32 %0, 7
+ %and = and i32 %shl, 255
+ store i32 %and, i32* %p, align 4
+ ret void
+}
+
+; CHECK-LABEL: test_shift8_mask8
+; CHECK-BE: ldrb r1, [r0, #2]
+; CHECK-COMMON: ldrb r1, [r0, #1]
; CHECK-COMMON: str r1, [r0]
define arm_aapcscc void @test_shift8_mask8(i32* nocapture %p) {
entry:
ret void
}
-; CHECK-LABEL: test_shift8_mask16
+; CHECK-LABEL: test_shift8_mask7
+; CHECK-BE: ldr r1, [r0]
+; CHECK-COMMON: ldr r1, [r0]
+; CHECK-COMMON: ubfx r1, r1, #8, #7
+; CHECK-COMMON: str r1, [r0]
+define arm_aapcscc void @test_shift8_mask7(i32* nocapture %p) {
+entry:
+ %0 = load i32, i32* %p, align 4
+ %shl = lshr i32 %0, 8
+ %and = and i32 %shl, 127
+ store i32 %and, i32* %p, align 4
+ ret void
+}
+
+; CHECK-LABEL: test_shift9_mask8
; CHECK-BE: ldr r1, [r0]
; CHECK-COMMON: ldr r1, [r0]
-; CHECK-COMMON: ubfx r1, r1, #8, #16
+; CHECK-COMMON: ubfx r1, r1, #9, #8
+; CHECK-COMMON: str r1, [r0]
+define arm_aapcscc void @test_shift9_mask8(i32* nocapture %p) {
+entry:
+ %0 = load i32, i32* %p, align 4
+ %shl = lshr i32 %0, 9
+ %and = and i32 %shl, 255
+ store i32 %and, i32* %p, align 4
+ ret void
+}
+
+; CHECK-LABEL: test_shift8_mask16
+; CHECK-ALIGN: ldr r1, [r0]
+; CHECK-ALIGN: ubfx r1, r1, #8, #16
+; CHECK-BE: ldrh r1, [r0, #1]
+; CHECK-ARM: ldrh r1, [r0, #1]
+; CHECK-THUMB: ldrh.w r1, [r0, #1]
; CHECK-COMMON: str r1, [r0]
define arm_aapcscc void @test_shift8_mask16(i32* nocapture %p) {
entry:
ret void
}
+; CHECK-LABEL: test_shift15_mask16
+; CHECK-COMMON: ldr r1, [r0]
+; CHECK-COMMON: ubfx r1, r1, #15, #16
+; CHECK-COMMON: str r1, [r0]
+define arm_aapcscc void @test_shift15_mask16(i32* nocapture %p) {
+entry:
+ %0 = load i32, i32* %p, align 4
+ %shl = lshr i32 %0, 15
+ %and = and i32 %shl, 65535
+ store i32 %and, i32* %p, align 4
+ ret void
+}
+
+; CHECK-LABEL: test_shift16_mask15
+; CHECK-BE: ldrh r1, [r0]
+; CHECK-COMMON: ldrh r1, [r0, #2]
+; CHECK-COMMON: bfc r1, #15, #17
+; CHECK-COMMON: str r1, [r0]
+define arm_aapcscc void @test_shift16_mask15(i32* nocapture %p) {
+entry:
+ %0 = load i32, i32* %p, align 4
+ %shl = lshr i32 %0, 16
+ %and = and i32 %shl, 32767
+ store i32 %and, i32* %p, align 4
+ ret void
+}
+
+; CHECK-LABEL: test_shift8_mask24
+; CHECK-BE: ldr r1, [r0]
+; CHECK-COMMON: ldr r1, [r0]
+; CHECK-ARM: lsr r1, r1, #8
+; CHECK-THUMB: lsrs r1, r1, #8
+; CHECK-COMMON: str r1, [r0]
+define arm_aapcscc void @test_shift8_mask24(i32* nocapture %p) {
+entry:
+ %0 = load i32, i32* %p, align 4
+ %shl = lshr i32 %0, 8
+ %and = and i32 %shl, 16777215
+ store i32 %and, i32* %p, align 4
+ ret void
+}
+
+; CHECK-LABEL: test_shift24_mask16
+; CHECK-BE: ldrb r1, [r0]
+; CHECK-COMMON: ldrb r1, [r0, #3]
+; CHECK-COMMON: str r1, [r0]
+define arm_aapcscc void @test_shift24_mask16(i32* nocapture %p) {
+entry:
+ %0 = load i32, i32* %p, align 4
+ %shl = lshr i32 %0, 24
+ %and = and i32 %shl, 65535
+ store i32 %and, i32* %p, align 4
+ ret void
+}
+
; CHECK-LABEL: test_sext_shift8_mask8
; CHECK-BE: ldrb r0, [r0]
; CHECK-COMMON: ldrb r0, [r0, #1]
store i32 %and, i32* %q, align 4
ret void
}
+
+; CHECK-LABEL: trunc_i64_mask_srl
+; CHECK-ARM: ldrh r2, [r1, #4]
+; CHECK-BE: ldrh r2, [r1, #2]
+define i1 @trunc_i64_mask_srl(i32 zeroext %AttrArgNo, i64* %ptr) {
+entry:
+ %bf.load.i = load i64, i64* %ptr, align 8
+ %bf.lshr.i = lshr i64 %bf.load.i, 32
+ %0 = trunc i64 %bf.lshr.i to i32
+ %bf.cast.i = and i32 %0, 65535
+ %cmp.i = icmp ugt i32 %bf.cast.i, %AttrArgNo
+ ret i1 %cmp.i
+}
; CHECK-NEXT: movzbl %ah, %eax
; CHECK-NEXT: movq %rax, %r10
; CHECK-NEXT: movzbl %dh, %edx
-; CHECK-NEXT: movzbl %ch, %eax
-; CHECK-NEXT: movq %rax, %r11
+; CHECK-NEXT: movzbl %ch, %ebp
; CHECK-NEXT: movq %r8, %rax
; CHECK-NEXT: movzbl %ah, %ecx
; CHECK-NEXT: movq %r9, %rax
-; CHECK-NEXT: movzbl %ah, %ebp
-; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax
-; CHECK-NEXT: movzbl %ah, %eax
-; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %ebx
-; CHECK-NEXT: movzbl %bh, %edi
+; CHECK-NEXT: movzbl %ah, %ebx
+; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
+; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %edi
; CHECK-NEXT: addq %r10, %rsi
-; CHECK-NEXT: addq %r11, %rdx
+; CHECK-NEXT: addq %rbp, %rdx
; CHECK-NEXT: addq %rsi, %rdx
-; CHECK-NEXT: addq %rbp, %rcx
+; CHECK-NEXT: addq %rbx, %rcx
; CHECK-NEXT: addq %rdi, %rax
; CHECK-NEXT: addq %rcx, %rax
; CHECK-NEXT: addq %rdx, %rax
; GNUX32-NEXT: movzbl %ah, %eax
; GNUX32-NEXT: movq %rax, %r10
; GNUX32-NEXT: movzbl %dh, %edx
-; GNUX32-NEXT: movzbl %ch, %eax
-; GNUX32-NEXT: movq %rax, %r11
+; GNUX32-NEXT: movzbl %ch, %ebp
; GNUX32-NEXT: movq %r8, %rax
; GNUX32-NEXT: movzbl %ah, %ecx
; GNUX32-NEXT: movq %r9, %rax
-; GNUX32-NEXT: movzbl %ah, %ebp
-; GNUX32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; GNUX32-NEXT: movzbl %ah, %eax
-; GNUX32-NEXT: movl {{[0-9]+}}(%esp), %ebx
-; GNUX32-NEXT: movzbl %bh, %edi
+; GNUX32-NEXT: movzbl %ah, %ebx
+; GNUX32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
+; GNUX32-NEXT: movzbl {{[0-9]+}}(%esp), %edi
; GNUX32-NEXT: addq %r10, %rsi
-; GNUX32-NEXT: addq %r11, %rdx
+; GNUX32-NEXT: addq %rbp, %rdx
; GNUX32-NEXT: addq %rsi, %rdx
-; GNUX32-NEXT: addq %rbp, %rcx
+; GNUX32-NEXT: addq %rbx, %rcx
; GNUX32-NEXT: addq %rdi, %rax
; GNUX32-NEXT: addq %rcx, %rax
; GNUX32-NEXT: addq %rdx, %rax