/// (X ^ Y) != X -> Y != 0
bool matchRedundantBinOpInEquality(MachineInstr &MI, BuildFnTy &MatchInfo);
+ /// Match shifts greater or equal to the bitwidth of the operation.
+ bool matchShiftsTooBig(MachineInstr &MI);
+
private:
/// Given a non-indexed load or store instruction \p MI, find an offset that
/// can be usefully and legally folded into it as a post-indexing operation.
[{ return Helper.matchPtrAddImmedChain(*${d}, ${matchinfo}); }]),
(apply [{ Helper.applyPtrAddImmedChain(*${d}, ${matchinfo}); }])>;
+def shifts_too_big : GICombineRule<
+ (defs root:$root),
+ (match (wip_match_opcode G_SHL, G_ASHR, G_LSHR):$root,
+ [{ return Helper.matchShiftsTooBig(*${root}); }]),
+ (apply [{ Helper.replaceInstWithUndef(*${root}); }])>;
+
// Fold shift (shift base x), y -> shift base, (x+y), if shifts are same
def shift_immed_matchdata : GIDefMatchData<"RegisterImmPair">;
def shift_immed_chain : GICombineRule<
def all_combines : GICombineGroup<[trivial_combines, insert_vec_elt_combines,
extract_vec_elt_combines, combines_for_extload,
combine_indexed_load_store, undef_combines, identity_combines, phi_combines,
- simplify_add_to_sub, hoist_logic_op_with_same_opcode_hands,
+ simplify_add_to_sub, hoist_logic_op_with_same_opcode_hands, shifts_too_big,
reassocs, ptr_add_immed_chain,
shl_ashr_to_sext_inreg, sext_inreg_of_load,
width_reduction_combines, select_combines,
return CmpInst::isEquality(Pred) && Y.isValid();
}
+bool CombinerHelper::matchShiftsTooBig(MachineInstr &MI) {
+ Register ShiftReg = MI.getOperand(2).getReg();
+ LLT ResTy = MRI.getType(MI.getOperand(0).getReg());
+ auto IsShiftTooBig = [&](const Constant *C) {
+ auto *CI = dyn_cast<ConstantInt>(C);
+ return CI && CI->uge(ResTy.getScalarSizeInBits());
+ };
+ return matchUnaryPredicate(MRI, ShiftReg, IsShiftTooBig);
+}
+
bool CombinerHelper::tryCombine(MachineInstr &MI) {
if (tryCombineCopy(MI))
return true;
--- /dev/null
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple aarch64 -run-pass=aarch64-prelegalizer-combiner -verify-machineinstrs %s -o - | FileCheck %s
+---
+name: shl_by_ge_bw
+alignment: 4
+tracksRegLiveness: true
+liveins:
+ - { reg: '$w0' }
+body: |
+ bb.1:
+ liveins: $w0
+
+ ; CHECK-LABEL: name: shl_by_ge_bw
+ ; CHECK: liveins: $w0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[DEF]](s16)
+ ; CHECK-NEXT: $w0 = COPY [[ANYEXT]](s32)
+ ; CHECK-NEXT: RET_ReallyLR implicit $w0
+ %1:_(s32) = COPY $w0
+ %0:_(s16) = G_TRUNC %1(s32)
+ %2:_(s16) = G_CONSTANT i16 20
+ %3:_(s16) = G_SHL %0, %2(s16)
+ %4:_(s32) = G_ANYEXT %3(s16)
+ $w0 = COPY %4(s32)
+ RET_ReallyLR implicit $w0
+
+...
+---
+name: lshr_by_ge_bw
+alignment: 4
+tracksRegLiveness: true
+liveins:
+ - { reg: '$w0' }
+body: |
+ bb.1:
+ liveins: $w0
+
+ ; CHECK-LABEL: name: lshr_by_ge_bw
+ ; CHECK: liveins: $w0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[DEF]](s16)
+ ; CHECK-NEXT: $w0 = COPY [[ANYEXT]](s32)
+ ; CHECK-NEXT: RET_ReallyLR implicit $w0
+ %1:_(s32) = COPY $w0
+ %0:_(s16) = G_TRUNC %1(s32)
+ %2:_(s16) = G_CONSTANT i16 16
+ %3:_(s16) = G_LSHR %0, %2(s16)
+ %4:_(s32) = G_ANYEXT %3(s16)
+ $w0 = COPY %4(s32)
+ RET_ReallyLR implicit $w0
+
+...
+---
+name: ashr_by_ge_bw
+alignment: 4
+tracksRegLiveness: true
+liveins:
+ - { reg: '$w0' }
+body: |
+ bb.1:
+ liveins: $w0
+
+ ; CHECK-LABEL: name: ashr_by_ge_bw
+ ; CHECK: liveins: $w0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[DEF]](s16)
+ ; CHECK-NEXT: $w0 = COPY [[ANYEXT]](s32)
+ ; CHECK-NEXT: RET_ReallyLR implicit $w0
+ %1:_(s32) = COPY $w0
+ %0:_(s16) = G_TRUNC %1(s32)
+ %2:_(s16) = G_CONSTANT i16 20
+ %3:_(s16) = G_ASHR %0, %2(s16)
+ %4:_(s32) = G_ANYEXT %3(s16)
+ $w0 = COPY %4(s32)
+ RET_ReallyLR implicit $w0
+
+...
+---
+name: shl_by_ge_bw_vector
+alignment: 4
+tracksRegLiveness: true
+liveins:
+ - { reg: '$q0' }
+body: |
+ bb.1:
+ liveins: $q0
+
+ ; CHECK-LABEL: name: shl_by_ge_bw_vector
+ ; CHECK: liveins: $q0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %shl:_(<4 x s32>) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: $q0 = COPY %shl(<4 x s32>)
+ ; CHECK-NEXT: RET_ReallyLR implicit $q0
+ %1:_(<4 x s32>) = COPY $q0
+ %0:_(s32) = G_CONSTANT i32 32
+ %bv:_(<4 x s32>) = G_BUILD_VECTOR %0, %0, %0, %0
+ %shl:_(<4 x s32>) = G_SHL %1, %bv(<4 x s32>)
+ $q0 = COPY %shl(<4 x s32>)
+ RET_ReallyLR implicit $q0
+
+...
+---
+name: shl_by_ge_bw_vector_partial
+alignment: 4
+tracksRegLiveness: true
+liveins:
+ - { reg: '$q0' }
+body: |
+ bb.1:
+ liveins: $q0
+
+ ; CHECK-LABEL: name: shl_by_ge_bw_vector_partial
+ ; CHECK: liveins: $q0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32
+ ; CHECK-NEXT: %small:_(s32) = G_CONSTANT i32 4
+ ; CHECK-NEXT: %bv:_(<4 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32), [[C]](s32), %small(s32)
+ ; CHECK-NEXT: %shl:_(<4 x s32>) = G_SHL [[COPY]], %bv(<4 x s32>)
+ ; CHECK-NEXT: $q0 = COPY %shl(<4 x s32>)
+ ; CHECK-NEXT: RET_ReallyLR implicit $q0
+ %1:_(<4 x s32>) = COPY $q0
+ %0:_(s32) = G_CONSTANT i32 32
+ %small:_(s32) = G_CONSTANT i32 4
+ %bv:_(<4 x s32>) = G_BUILD_VECTOR %0, %0, %0, %small
+ %shl:_(<4 x s32>) = G_SHL %1, %bv(<4 x s32>)
+ $q0 = COPY %shl(<4 x s32>)
+ RET_ReallyLR implicit $q0
+...
; CHECK-LABEL: name: cvt_f32_ubyte0_zext_lshr_16
; CHECK: liveins: $vgpr0
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: %arg:_(s32) = COPY $vgpr0
- ; CHECK-NEXT: %result:_(s32) = G_AMDGPU_CVT_F32_UBYTE2 %arg
+ ; CHECK-NEXT: %shift:_(s16) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: %zext:_(s32) = G_ZEXT %shift(s16)
+ ; CHECK-NEXT: %result:_(s32) = G_AMDGPU_CVT_F32_UBYTE0 %zext
; CHECK-NEXT: $vgpr0 = COPY %result(s32)
%arg:_(s32) = COPY $vgpr0
%trunc:_(s16) = G_TRUNC %arg
; CHECK-LABEL: name: cvt_f32_ubyte0_zext_lshr_24
; CHECK: liveins: $vgpr0
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: %arg:_(s32) = COPY $vgpr0
- ; CHECK-NEXT: %result:_(s32) = G_AMDGPU_CVT_F32_UBYTE3 %arg
+ ; CHECK-NEXT: %shift:_(s16) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: %zext:_(s32) = G_ZEXT %shift(s16)
+ ; CHECK-NEXT: %result:_(s32) = G_AMDGPU_CVT_F32_UBYTE0 %zext
; CHECK-NEXT: $vgpr0 = COPY %result(s32)
%arg:_(s32) = COPY $vgpr0
%trunc:_(s16) = G_TRUNC %arg
; CHECK-LABEL: name: narrow_ashr_s64_64
; CHECK: liveins: $vgpr0_vgpr1
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
- ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 64
- ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[COPY]], [[C]](s32)
- ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[ASHR]](s64)
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[DEF]](s64)
%0:_(s64) = COPY $vgpr0_vgpr1
%1:_(s32) = G_CONSTANT i32 64
%2:_(s64) = G_ASHR %0, %1
; CHECK-LABEL: name: narrow_ashr_s64_65
; CHECK: liveins: $vgpr0_vgpr1
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
- ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65
- ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[COPY]], [[C]](s32)
- ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[ASHR]](s64)
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[DEF]](s64)
%0:_(s64) = COPY $vgpr0_vgpr1
%1:_(s32) = G_CONSTANT i32 65
%2:_(s64) = G_ASHR %0, %1
; CHECK-LABEL: name: narrow_lshr_s64_64
; CHECK: liveins: $vgpr0_vgpr1
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
- ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 64
- ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY]], [[C]](s32)
- ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[LSHR]](s64)
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[DEF]](s64)
%0:_(s64) = COPY $vgpr0_vgpr1
%1:_(s32) = G_CONSTANT i32 64
%2:_(s64) = G_LSHR %0, %1
; CHECK-LABEL: name: narrow_lshr_s64_65
; CHECK: liveins: $vgpr0_vgpr1
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
- ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65
- ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY]], [[C]](s32)
- ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[LSHR]](s64)
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[DEF]](s64)
%0:_(s64) = COPY $vgpr0_vgpr1
%1:_(s32) = G_CONSTANT i32 65
%2:_(s64) = G_LSHR %0, %1
; CHECK-LABEL: name: narrow_shl_s64_64
; CHECK: liveins: $vgpr0_vgpr1
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
- ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 64
- ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[COPY]], [[C]](s32)
- ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[SHL]](s64)
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[DEF]](s64)
%0:_(s64) = COPY $vgpr0_vgpr1
%1:_(s32) = G_CONSTANT i32 64
%2:_(s64) = G_SHL %0, %1
; CHECK-LABEL: name: narrow_shl_s64_65
; CHECK: liveins: $vgpr0_vgpr1
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
- ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65
- ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[COPY]], [[C]](s32)
- ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[SHL]](s64)
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[DEF]](s64)
%0:_(s64) = COPY $vgpr0_vgpr1
%1:_(s32) = G_CONSTANT i32 65
%2:_(s64) = G_SHL %0, %1
;
; GFX8-LABEL: s_sext_inreg_v4i16_14:
; GFX8: ; %bb.0:
-; GFX8-NEXT: s_bfe_u32 s0, 0, 0x100000
+; GFX8-NEXT: s_bfe_u32 s0, -1, 0x100000
; GFX8-NEXT: s_mov_b32 s1, s0
; GFX8-NEXT: ; return to shader part epilog
;
; GFX8-LABEL: v_sext_inreg_v8i16_11:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: s_bfe_u32 s4, 0, 0x100000
+; GFX8-NEXT: s_bfe_u32 s4, -1, 0x100000
; GFX8-NEXT: v_mov_b32_e32 v0, s4
; GFX8-NEXT: v_mov_b32_e32 v1, s4
; GFX8-NEXT: v_mov_b32_e32 v2, s4