// COPY is workaround tablegen bug from multiple outputs
// from S_LSHL_B32's multiple outputs from implicit scc def.
def : GCNPat <
- (v2i16 (build_vector (i16 0), i16:$src1)),
- (v2i16 (COPY (S_LSHL_B32 i16:$src1, (i16 16))))
+ (v2i16 (build_vector (i16 0), (i16 SReg_32:$src1))),
+ (S_LSHL_B32 SReg_32:$src1, (i16 16))
>;
def : GCNPat <
- (v2i16 (build_vector i16:$src0, (i16 undef))),
- (v2i16 (COPY $src0))
+ (v2i16 (build_vector (i16 SReg_32:$src0), (i16 undef))),
+ (COPY_TO_REGCLASS SReg_32:$src0, SReg_32)
+>;
+
+def : GCNPat <
+ (v2i16 (build_vector (i16 VGPR_32:$src0), (i16 undef))),
+ (COPY_TO_REGCLASS VGPR_32:$src0, VGPR_32)
>;
def : GCNPat <
(v2f16 (build_vector f16:$src0, (f16 undef))),
- (v2f16 (COPY $src0))
+ (COPY $src0)
>;
def : GCNPat <
- (v2i16 (build_vector (i16 undef), i16:$src1)),
- (v2i16 (COPY (S_LSHL_B32 $src1, (i32 16))))
+ (v2i16 (build_vector (i16 undef), (i16 SReg_32:$src1))),
+ (S_LSHL_B32 SReg_32:$src1, (i32 16))
>;
def : GCNPat <
- (v2f16 (build_vector (f16 undef), f16:$src1)),
- (v2f16 (COPY (S_LSHL_B32 $src1, (i32 16))))
+ (v2f16 (build_vector (f16 undef), (f16 SReg_32:$src1))),
+ (S_LSHL_B32 SReg_32:$src1, (i32 16))
>;
let SubtargetPredicate = HasVOP3PInsts in {
def : GCNPat <
- (v2i16 (build_vector i16:$src0, i16:$src1)),
- (v2i16 (S_PACK_LL_B32_B16 $src0, $src1))
+ (v2i16 (build_vector (i16 SReg_32:$src0), (i16 SReg_32:$src1))),
+ (S_PACK_LL_B32_B16 SReg_32:$src0, SReg_32:$src1)
>;
// With multiple uses of the shift, this will duplicate the shift and
// increase register pressure.
def : GCNPat <
- (v2i16 (build_vector i16:$src0, (i16 (trunc (srl_oneuse i32:$src1, (i32 16)))))),
- (v2i16 (S_PACK_LH_B32_B16 i16:$src0, i32:$src1))
+ (v2i16 (build_vector (i16 SReg_32:$src0), (i16 (trunc (srl_oneuse SReg_32:$src1, (i32 16)))))),
+ (v2i16 (S_PACK_LH_B32_B16 SReg_32:$src0, SReg_32:$src1))
>;
def : GCNPat <
- (v2i16 (build_vector (i16 (trunc (srl_oneuse i32:$src0, (i32 16)))),
- (i16 (trunc (srl_oneuse i32:$src1, (i32 16)))))),
- (v2i16 (S_PACK_HH_B32_B16 $src0, $src1))
+ (v2i16 (build_vector (i16 (trunc (srl_oneuse SReg_32:$src0, (i32 16)))),
+ (i16 (trunc (srl_oneuse SReg_32:$src1, (i32 16)))))),
+ (S_PACK_HH_B32_B16 SReg_32:$src0, SReg_32:$src1)
>;
// TODO: Should source modifiers be matched to v_pack_b32_f16?
def : GCNPat <
- (v2f16 (build_vector f16:$src0, f16:$src1)),
- (v2f16 (S_PACK_LL_B32_B16 $src0, $src1))
+ (v2f16 (build_vector (f16 SReg_32:$src0), (f16 SReg_32:$src1))),
+ (S_PACK_LL_B32_B16 SReg_32:$src0, SReg_32:$src1)
>;
} // End SubtargetPredicate = HasVOP3PInsts
--- /dev/null
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX9 %s
+
+---
+name: test_build_vector_s_v2s16_s_s16_s_s16
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0, $sgpr1
+
+ ; GFX9-LABEL: name: test_build_vector_s_v2s16_s_s16_s_s16
+ ; GFX9: liveins: $sgpr0, $sgpr1
+ ; GFX9: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+ ; GFX9: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
+ ; GFX9: [[S_PACK_LL_B32_B16_:%[0-9]+]]:sreg_32 = S_PACK_LL_B32_B16 [[COPY]], [[COPY1]]
+ ; GFX9: S_ENDPGM 0, implicit [[S_PACK_LL_B32_B16_]]
+ %0:sgpr(s32) = COPY $sgpr0
+ %1:sgpr(s32) = COPY $sgpr1
+
+ %2:sgpr(s16) = G_TRUNC %0
+ %3:sgpr(s16) = G_TRUNC %1
+
+ %4:sgpr(<2 x s16>) = G_BUILD_VECTOR %2, %3
+ S_ENDPGM 0, implicit %4
+...
+
+---
+name: test_build_vector_s_pack_lh
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0, $sgpr1
+
+ ; GFX9-LABEL: name: test_build_vector_s_pack_lh
+ ; GFX9: liveins: $sgpr0, $sgpr1
+ ; GFX9: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+ ; GFX9: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
+ ; GFX9: [[S_PACK_LH_B32_B16_:%[0-9]+]]:sreg_32 = S_PACK_LH_B32_B16 [[COPY]], [[COPY1]]
+ ; GFX9: S_ENDPGM 0, implicit [[S_PACK_LH_B32_B16_]]
+ %0:sgpr(s32) = COPY $sgpr0
+ %1:sgpr(s32) = COPY $sgpr1
+
+ %2:sgpr(s32) = G_CONSTANT i32 16
+ %3:sgpr(s32) = G_LSHR %1, %2
+
+ %4:sgpr(s16) = G_TRUNC %0
+ %5:sgpr(s16) = G_TRUNC %3
+
+ %6:sgpr(<2 x s16>) = G_BUILD_VECTOR %4, %5
+ S_ENDPGM 0, implicit %6
+...
+
+# There is no s_pack_hl_b32
+---
+name: test_build_vector_s_pack_lh_swapped
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0, $sgpr1
+
+ ; GFX9-LABEL: name: test_build_vector_s_pack_lh_swapped
+ ; GFX9: liveins: $sgpr0, $sgpr1
+ ; GFX9: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+ ; GFX9: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
+ ; GFX9: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 16
+ ; GFX9: [[S_LSHR_B32_:%[0-9]+]]:sreg_32 = S_LSHR_B32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc
+ ; GFX9: [[S_PACK_LL_B32_B16_:%[0-9]+]]:sreg_32 = S_PACK_LL_B32_B16 [[S_LSHR_B32_]], [[COPY]]
+ ; GFX9: S_ENDPGM 0, implicit [[S_PACK_LL_B32_B16_]]
+ %0:sgpr(s32) = COPY $sgpr0
+ %1:sgpr(s32) = COPY $sgpr1
+
+ %2:sgpr(s32) = G_CONSTANT i32 16
+ %3:sgpr(s32) = G_LSHR %1, %2
+
+ %4:sgpr(s16) = G_TRUNC %0
+ %5:sgpr(s16) = G_TRUNC %3
+
+ %6:sgpr(<2 x s16>) = G_BUILD_VECTOR %5, %4
+ S_ENDPGM 0, implicit %6
+...
+
+---
+name: test_build_vector_s_pack_hh
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0, $sgpr1
+
+ ; GFX9-LABEL: name: test_build_vector_s_pack_hh
+ ; GFX9: liveins: $sgpr0, $sgpr1
+ ; GFX9: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+ ; GFX9: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
+ ; GFX9: [[S_PACK_HH_B32_B16_:%[0-9]+]]:sreg_32 = S_PACK_HH_B32_B16 [[COPY]], [[COPY1]]
+ ; GFX9: S_ENDPGM 0, implicit [[S_PACK_HH_B32_B16_]]
+ %0:sgpr(s32) = COPY $sgpr0
+ %1:sgpr(s32) = COPY $sgpr1
+
+ %2:sgpr(s32) = G_CONSTANT i32 16
+ %3:sgpr(s32) = G_LSHR %0, %2
+ %4:sgpr(s32) = G_LSHR %1, %2
+
+ %5:sgpr(s16) = G_TRUNC %3
+ %6:sgpr(s16) = G_TRUNC %4
+
+ %7:sgpr(<2 x s16>) = G_BUILD_VECTOR %5, %6
+ S_ENDPGM 0, implicit %7
+...
+
+# TODO: Should this use an and instead?
+---
+name: test_build_vector_s_v2s16_s_s16_s_0_s16
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0
+
+ ; GFX9-LABEL: name: test_build_vector_s_v2s16_s_s16_s_0_s16
+ ; GFX9: liveins: $sgpr0
+ ; GFX9: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+ ; GFX9: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GFX9: [[S_PACK_LL_B32_B16_:%[0-9]+]]:sreg_32 = S_PACK_LL_B32_B16 [[COPY]], [[S_MOV_B32_]]
+ ; GFX9: S_ENDPGM 0, implicit [[S_PACK_LL_B32_B16_]]
+ %0:sgpr(s32) = COPY $sgpr0
+
+ %1:sgpr(s16) = G_TRUNC %0
+ %2:sgpr(s16) = G_CONSTANT i16 0
+
+ %3:sgpr(<2 x s16>) = G_BUILD_VECTOR %1, %2
+ S_ENDPGM 0, implicit %3
+...
+
+---
+name: test_build_vector_s_v2s16_s_0_s16_s_s16
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0
+
+ ; GFX9-LABEL: name: test_build_vector_s_v2s16_s_0_s16_s_s16
+ ; GFX9: liveins: $sgpr0
+ ; GFX9: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+ ; GFX9: [[S_LSHL_B32_:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY]], 16, implicit-def $scc
+ ; GFX9: S_ENDPGM 0, implicit [[S_LSHL_B32_]]
+ %0:sgpr(s32) = COPY $sgpr0
+
+ %1:sgpr(s16) = G_CONSTANT i16 0
+ %2:sgpr(s16) = G_TRUNC %0
+
+ %3:sgpr(<2 x s16>) = G_BUILD_VECTOR %1, %2
+ S_ENDPGM 0, implicit %3
+...
+
+---
+name: test_build_vector_v_v2s16_v_s16_s_undef_s16
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0
+
+ ; GFX9-LABEL: name: test_build_vector_v_v2s16_v_s16_s_undef_s16
+ ; GFX9: liveins: $vgpr0
+ ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX9: S_ENDPGM 0, implicit [[COPY]]
+ %0:vgpr(s32) = COPY $vgpr0
+
+ %1:vgpr(s16) = G_TRUNC %0
+ %2:sgpr(s16) = G_IMPLICIT_DEF
+
+ %3:vgpr(<2 x s16>) = G_BUILD_VECTOR %1, %2
+ S_ENDPGM 0, implicit %3
+...
+
+---
+name: test_build_vector_s_v2s16_s_s16_s_undef_s16
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0
+
+ ; GFX9-LABEL: name: test_build_vector_s_v2s16_s_s16_s_undef_s16
+ ; GFX9: liveins: $sgpr0
+ ; GFX9: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+ ; GFX9: S_ENDPGM 0, implicit [[COPY]]
+ %0:sgpr(s32) = COPY $sgpr0
+
+ %1:sgpr(s16) = G_TRUNC %0
+ %2:sgpr(s16) = G_IMPLICIT_DEF
+
+ %3:sgpr(<2 x s16>) = G_BUILD_VECTOR %1, %2
+ S_ENDPGM 0, implicit %3
+...
+
+---
+name: test_build_vector_s_v2s16_s_undef_s16_s_s16
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0
+
+ ; GFX9-LABEL: name: test_build_vector_s_v2s16_s_undef_s16_s_s16
+ ; GFX9: liveins: $sgpr0
+ ; GFX9: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+ ; GFX9: [[S_LSHL_B32_:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY]], 16, implicit-def $scc
+ ; GFX9: S_ENDPGM 0, implicit [[S_LSHL_B32_]]
+ %0:sgpr(s32) = COPY $sgpr0
+
+ %1:sgpr(s16) = G_IMPLICIT_DEF
+ %2:sgpr(s16) = G_TRUNC %0
+
+ %3:sgpr(<2 x s16>) = G_BUILD_VECTOR %1, %2
+ S_ENDPGM 0, implicit %3
+...