--- /dev/null
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -global-isel -march=amdgcn -mcpu=tahiti < %s | FileCheck -check-prefix=GFX6 %s
+; RUN: llc -global-isel -march=amdgcn -mcpu=fiji < %s | FileCheck -check-prefix=GFX8 %s
+; RUN: llc -global-isel -march=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 %s
+
+define float @v_pow_f32(float %x, float %y) {
+; GFX6-LABEL: v_pow_f32:
+; GFX6: ; %bb.0:
+; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-NEXT: v_log_f32_e32 v0, v0
+; GFX6-NEXT: v_mul_legacy_f32_e32 v0, v0, v1
+; GFX6-NEXT: v_exp_f32_e32 v0, v0
+; GFX6-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: v_pow_f32:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT: v_log_f32_e32 v0, v0
+; GFX8-NEXT: v_mul_legacy_f32_e32 v0, v0, v1
+; GFX8-NEXT: v_exp_f32_e32 v0, v0
+; GFX8-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: v_pow_f32:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_log_f32_e32 v0, v0
+; GFX9-NEXT: v_mul_legacy_f32_e32 v0, v0, v1
+; GFX9-NEXT: v_exp_f32_e32 v0, v0
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %pow = call float @llvm.pow.f32(float %x, float %y)
+ ret float %pow
+}
+
+define <2 x float> @v_pow_v2f32(<2 x float> %x, <2 x float> %y) {
+; GFX6-LABEL: v_pow_v2f32:
+; GFX6: ; %bb.0:
+; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-NEXT: v_log_f32_e32 v0, v0
+; GFX6-NEXT: v_log_f32_e32 v1, v1
+; GFX6-NEXT: v_mul_legacy_f32_e32 v0, v0, v2
+; GFX6-NEXT: v_mul_legacy_f32_e32 v1, v1, v3
+; GFX6-NEXT: v_exp_f32_e32 v0, v0
+; GFX6-NEXT: v_exp_f32_e32 v1, v1
+; GFX6-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: v_pow_v2f32:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT: v_log_f32_e32 v0, v0
+; GFX8-NEXT: v_log_f32_e32 v1, v1
+; GFX8-NEXT: v_mul_legacy_f32_e32 v0, v0, v2
+; GFX8-NEXT: v_mul_legacy_f32_e32 v1, v1, v3
+; GFX8-NEXT: v_exp_f32_e32 v0, v0
+; GFX8-NEXT: v_exp_f32_e32 v1, v1
+; GFX8-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: v_pow_v2f32:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_log_f32_e32 v0, v0
+; GFX9-NEXT: v_log_f32_e32 v1, v1
+; GFX9-NEXT: v_mul_legacy_f32_e32 v0, v0, v2
+; GFX9-NEXT: v_mul_legacy_f32_e32 v1, v1, v3
+; GFX9-NEXT: v_exp_f32_e32 v0, v0
+; GFX9-NEXT: v_exp_f32_e32 v1, v1
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %pow = call <2 x float> @llvm.pow.v2f32(<2 x float> %x, <2 x float> %y)
+ ret <2 x float> %pow
+}
+
+define half @v_pow_f16(half %x, half %y) {
+; GFX6-LABEL: v_pow_f16:
+; GFX6: ; %bb.0:
+; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX6-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX6-NEXT: v_log_f32_e32 v0, v0
+; GFX6-NEXT: v_mul_legacy_f32_e32 v0, v0, v1
+; GFX6-NEXT: v_exp_f32_e32 v0, v0
+; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX6-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: v_pow_f16:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT: v_log_f16_e32 v0, v0
+; GFX8-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX8-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX8-NEXT: v_mul_legacy_f32_e32 v0, v0, v1
+; GFX8-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX8-NEXT: v_exp_f16_e32 v0, v0
+; GFX8-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: v_pow_f16:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_log_f16_e32 v0, v0
+; GFX9-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX9-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX9-NEXT: v_mul_legacy_f32_e32 v0, v0, v1
+; GFX9-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX9-NEXT: v_exp_f16_e32 v0, v0
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %pow = call half @llvm.pow.f16(half %x, half %y)
+ ret half %pow
+}
+
+define <2 x half> @v_pow_v2f16(<2 x half> %x, <2 x half> %y) {
+; GFX6-LABEL: v_pow_v2f16:
+; GFX6: ; %bb.0:
+; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX6-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX6-NEXT: v_cvt_f32_f16_e32 v2, v2
+; GFX6-NEXT: v_cvt_f32_f16_e32 v3, v3
+; GFX6-NEXT: v_log_f32_e32 v0, v0
+; GFX6-NEXT: v_log_f32_e32 v1, v1
+; GFX6-NEXT: v_mul_legacy_f32_e32 v0, v0, v2
+; GFX6-NEXT: v_mul_legacy_f32_e32 v1, v1, v3
+; GFX6-NEXT: v_exp_f32_e32 v0, v0
+; GFX6-NEXT: v_exp_f32_e32 v1, v1
+; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX6-NEXT: v_cvt_f16_f32_e32 v1, v1
+; GFX6-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: v_pow_v2f16:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT: v_log_f16_sdwa v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; GFX8-NEXT: v_log_f16_e32 v0, v0
+; GFX8-NEXT: v_cvt_f32_f16_sdwa v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; GFX8-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX8-NEXT: v_cvt_f32_f16_e32 v2, v2
+; GFX8-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX8-NEXT: v_mul_legacy_f32_e32 v2, v2, v3
+; GFX8-NEXT: v_mul_legacy_f32_e32 v0, v0, v1
+; GFX8-NEXT: v_cvt_f16_f32_e32 v1, v2
+; GFX8-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX8-NEXT: v_mov_b32_e32 v2, 16
+; GFX8-NEXT: v_exp_f16_e32 v1, v1
+; GFX8-NEXT: v_exp_f16_e32 v0, v0
+; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
+; GFX8-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; GFX8-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: v_pow_v2f16:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_log_f16_e32 v2, v0
+; GFX9-NEXT: v_log_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; GFX9-NEXT: v_cvt_f32_f16_e32 v3, v1
+; GFX9-NEXT: v_cvt_f32_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; GFX9-NEXT: v_cvt_f32_f16_e32 v2, v2
+; GFX9-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX9-NEXT: v_mul_legacy_f32_e32 v2, v2, v3
+; GFX9-NEXT: v_mul_legacy_f32_e32 v0, v0, v1
+; GFX9-NEXT: v_cvt_f16_f32_e32 v1, v2
+; GFX9-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX9-NEXT: v_mov_b32_e32 v2, 0xffff
+; GFX9-NEXT: v_exp_f16_e32 v1, v1
+; GFX9-NEXT: v_exp_f16_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
+; GFX9-NEXT: v_and_or_b32 v0, v1, v2, v0
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %pow = call <2 x half> @llvm.pow.v2f16(<2 x half> %x, <2 x half> %y)
+ ret <2 x half> %pow
+}
+
+define <2 x half> @v_pow_v2f16_fneg_lhs(<2 x half> %x, <2 x half> %y) {
+; GFX6-LABEL: v_pow_v2f16_fneg_lhs:
+; GFX6: ; %bb.0:
+; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1
+; GFX6-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX6-NEXT: v_or_b32_e32 v0, v1, v0
+; GFX6-NEXT: v_xor_b32_e32 v0, 0x80008000, v0
+; GFX6-NEXT: v_lshrrev_b32_e32 v1, 16, v0
+; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX6-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX6-NEXT: v_cvt_f32_f16_e32 v2, v2
+; GFX6-NEXT: v_cvt_f32_f16_e32 v3, v3
+; GFX6-NEXT: v_log_f32_e32 v0, v0
+; GFX6-NEXT: v_log_f32_e32 v1, v1
+; GFX6-NEXT: v_mul_legacy_f32_e32 v0, v0, v2
+; GFX6-NEXT: v_mul_legacy_f32_e32 v1, v1, v3
+; GFX6-NEXT: v_exp_f32_e32 v0, v0
+; GFX6-NEXT: v_exp_f32_e32 v1, v1
+; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX6-NEXT: v_cvt_f16_f32_e32 v1, v1
+; GFX6-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: v_pow_v2f16_fneg_lhs:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT: v_xor_b32_e32 v0, 0x80008000, v0
+; GFX8-NEXT: v_log_f16_sdwa v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; GFX8-NEXT: v_log_f16_e32 v0, v0
+; GFX8-NEXT: v_cvt_f32_f16_sdwa v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; GFX8-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX8-NEXT: v_cvt_f32_f16_e32 v2, v2
+; GFX8-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX8-NEXT: v_mul_legacy_f32_e32 v2, v2, v3
+; GFX8-NEXT: v_mul_legacy_f32_e32 v0, v0, v1
+; GFX8-NEXT: v_cvt_f16_f32_e32 v1, v2
+; GFX8-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX8-NEXT: v_mov_b32_e32 v2, 16
+; GFX8-NEXT: v_exp_f16_e32 v1, v1
+; GFX8-NEXT: v_exp_f16_e32 v0, v0
+; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
+; GFX8-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; GFX8-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: v_pow_v2f16_fneg_lhs:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_xor_b32_e32 v0, 0x80008000, v0
+; GFX9-NEXT: v_log_f16_e32 v2, v0
+; GFX9-NEXT: v_log_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; GFX9-NEXT: v_cvt_f32_f16_e32 v3, v1
+; GFX9-NEXT: v_cvt_f32_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; GFX9-NEXT: v_cvt_f32_f16_e32 v2, v2
+; GFX9-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX9-NEXT: v_mul_legacy_f32_e32 v2, v2, v3
+; GFX9-NEXT: v_mul_legacy_f32_e32 v0, v0, v1
+; GFX9-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX9-NEXT: v_cvt_f16_f32_e32 v2, v2
+; GFX9-NEXT: v_exp_f16_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
+; GFX9-NEXT: v_exp_f16_e32 v1, v2
+; GFX9-NEXT: v_mov_b32_e32 v2, 0xffff
+; GFX9-NEXT: v_and_or_b32 v0, v1, v2, v0
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %x.fneg = fneg <2 x half> %x
+ %pow = call <2 x half> @llvm.pow.v2f16(<2 x half> %x.fneg, <2 x half> %y)
+ ret <2 x half> %pow
+}
+
+define <2 x half> @v_pow_v2f16_fneg_rhs(<2 x half> %x, <2 x half> %y) {
+; GFX6-LABEL: v_pow_v2f16_fneg_rhs:
+; GFX6: ; %bb.0:
+; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX6-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX6-NEXT: v_lshlrev_b32_e32 v3, 16, v3
+; GFX6-NEXT: v_and_b32_e32 v2, 0xffff, v2
+; GFX6-NEXT: v_or_b32_e32 v2, v3, v2
+; GFX6-NEXT: v_xor_b32_e32 v2, 0x80008000, v2
+; GFX6-NEXT: v_lshrrev_b32_e32 v3, 16, v2
+; GFX6-NEXT: v_cvt_f32_f16_e32 v2, v2
+; GFX6-NEXT: v_log_f32_e32 v0, v0
+; GFX6-NEXT: v_cvt_f32_f16_e32 v3, v3
+; GFX6-NEXT: v_log_f32_e32 v1, v1
+; GFX6-NEXT: v_mul_legacy_f32_e32 v0, v0, v2
+; GFX6-NEXT: v_exp_f32_e32 v0, v0
+; GFX6-NEXT: v_mul_legacy_f32_e32 v1, v1, v3
+; GFX6-NEXT: v_exp_f32_e32 v1, v1
+; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX6-NEXT: v_cvt_f16_f32_e32 v1, v1
+; GFX6-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: v_pow_v2f16_fneg_rhs:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT: v_log_f16_sdwa v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; GFX8-NEXT: v_log_f16_e32 v0, v0
+; GFX8-NEXT: v_xor_b32_e32 v1, 0x80008000, v1
+; GFX8-NEXT: v_cvt_f32_f16_sdwa v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; GFX8-NEXT: v_cvt_f32_f16_e32 v2, v2
+; GFX8-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX8-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX8-NEXT: v_mul_legacy_f32_e32 v2, v2, v3
+; GFX8-NEXT: v_cvt_f16_f32_e32 v2, v2
+; GFX8-NEXT: v_mul_legacy_f32_e32 v0, v0, v1
+; GFX8-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX8-NEXT: v_exp_f16_e32 v1, v2
+; GFX8-NEXT: v_mov_b32_e32 v2, 16
+; GFX8-NEXT: v_exp_f16_e32 v0, v0
+; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
+; GFX8-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; GFX8-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: v_pow_v2f16_fneg_rhs:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_log_f16_e32 v2, v0
+; GFX9-NEXT: v_log_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; GFX9-NEXT: v_xor_b32_e32 v1, 0x80008000, v1
+; GFX9-NEXT: v_cvt_f32_f16_e32 v3, v1
+; GFX9-NEXT: v_cvt_f32_f16_e32 v2, v2
+; GFX9-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX9-NEXT: v_cvt_f32_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; GFX9-NEXT: v_mul_legacy_f32_e32 v2, v2, v3
+; GFX9-NEXT: v_cvt_f16_f32_e32 v2, v2
+; GFX9-NEXT: v_mul_legacy_f32_e32 v0, v0, v1
+; GFX9-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX9-NEXT: v_exp_f16_e32 v1, v2
+; GFX9-NEXT: v_mov_b32_e32 v2, 0xffff
+; GFX9-NEXT: v_exp_f16_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
+; GFX9-NEXT: v_and_or_b32 v0, v1, v2, v0
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %y.fneg = fneg <2 x half> %y
+ %pow = call <2 x half> @llvm.pow.v2f16(<2 x half> %x, <2 x half> %y.fneg)
+ ret <2 x half> %pow
+}
+
+define <2 x half> @v_pow_v2f16_fneg_lhs_rhs(<2 x half> %x, <2 x half> %y) {
+; GFX6-LABEL: v_pow_v2f16_fneg_lhs_rhs:
+; GFX6: ; %bb.0:
+; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-NEXT: v_mov_b32_e32 v4, 0xffff
+; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1
+; GFX6-NEXT: v_and_b32_e32 v0, v0, v4
+; GFX6-NEXT: v_or_b32_e32 v0, v1, v0
+; GFX6-NEXT: s_mov_b32 s4, 0x80008000
+; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v3
+; GFX6-NEXT: v_and_b32_e32 v2, v2, v4
+; GFX6-NEXT: v_xor_b32_e32 v0, s4, v0
+; GFX6-NEXT: v_or_b32_e32 v1, v1, v2
+; GFX6-NEXT: v_lshrrev_b32_e32 v2, 16, v0
+; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX6-NEXT: v_cvt_f32_f16_e32 v2, v2
+; GFX6-NEXT: v_xor_b32_e32 v1, s4, v1
+; GFX6-NEXT: v_lshrrev_b32_e32 v3, 16, v1
+; GFX6-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX6-NEXT: v_log_f32_e32 v0, v0
+; GFX6-NEXT: v_cvt_f32_f16_e32 v3, v3
+; GFX6-NEXT: v_log_f32_e32 v2, v2
+; GFX6-NEXT: v_mul_legacy_f32_e32 v0, v0, v1
+; GFX6-NEXT: v_exp_f32_e32 v0, v0
+; GFX6-NEXT: v_mul_legacy_f32_e32 v1, v2, v3
+; GFX6-NEXT: v_exp_f32_e32 v1, v1
+; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX6-NEXT: v_cvt_f16_f32_e32 v1, v1
+; GFX6-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: v_pow_v2f16_fneg_lhs_rhs:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT: s_mov_b32 s4, 0x80008000
+; GFX8-NEXT: v_xor_b32_e32 v0, s4, v0
+; GFX8-NEXT: v_log_f16_sdwa v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; GFX8-NEXT: v_log_f16_e32 v0, v0
+; GFX8-NEXT: v_xor_b32_e32 v1, s4, v1
+; GFX8-NEXT: v_cvt_f32_f16_sdwa v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; GFX8-NEXT: v_cvt_f32_f16_e32 v2, v2
+; GFX8-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX8-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX8-NEXT: v_mul_legacy_f32_e32 v2, v2, v3
+; GFX8-NEXT: v_cvt_f16_f32_e32 v2, v2
+; GFX8-NEXT: v_mul_legacy_f32_e32 v0, v0, v1
+; GFX8-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX8-NEXT: v_exp_f16_e32 v1, v2
+; GFX8-NEXT: v_mov_b32_e32 v2, 16
+; GFX8-NEXT: v_exp_f16_e32 v0, v0
+; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
+; GFX8-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; GFX8-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: v_pow_v2f16_fneg_lhs_rhs:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: s_mov_b32 s4, 0x80008000
+; GFX9-NEXT: v_xor_b32_e32 v0, s4, v0
+; GFX9-NEXT: v_log_f16_e32 v2, v0
+; GFX9-NEXT: v_log_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; GFX9-NEXT: v_xor_b32_e32 v1, s4, v1
+; GFX9-NEXT: v_cvt_f32_f16_e32 v3, v1
+; GFX9-NEXT: v_cvt_f32_f16_e32 v2, v2
+; GFX9-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX9-NEXT: v_cvt_f32_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; GFX9-NEXT: v_mul_legacy_f32_e32 v2, v2, v3
+; GFX9-NEXT: v_cvt_f16_f32_e32 v2, v2
+; GFX9-NEXT: v_mul_legacy_f32_e32 v0, v0, v1
+; GFX9-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX9-NEXT: v_exp_f16_e32 v1, v2
+; GFX9-NEXT: v_mov_b32_e32 v2, 0xffff
+; GFX9-NEXT: v_exp_f16_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
+; GFX9-NEXT: v_and_or_b32 v0, v1, v2, v0
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %x.fneg = fneg <2 x half> %x
+ %y.fneg = fneg <2 x half> %y
+ %pow = call <2 x half> @llvm.pow.v2f16(<2 x half> %x.fneg, <2 x half> %y.fneg)
+ ret <2 x half> %pow
+}
+
+; FIXME
+; define double @v_pow_f64(double %x, double %y) {
+; %pow = call double @llvm.pow.f64(double %x, double %y)
+; ret double %pow
+; }
+
+define float @v_pow_f32_fabs_lhs(float %x, float %y) {
+; GFX6-LABEL: v_pow_f32_fabs_lhs:
+; GFX6: ; %bb.0:
+; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-NEXT: v_log_f32_e64 v0, |v0|
+; GFX6-NEXT: v_mul_legacy_f32_e32 v0, v0, v1
+; GFX6-NEXT: v_exp_f32_e32 v0, v0
+; GFX6-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: v_pow_f32_fabs_lhs:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT: v_log_f32_e64 v0, |v0|
+; GFX8-NEXT: v_mul_legacy_f32_e32 v0, v0, v1
+; GFX8-NEXT: v_exp_f32_e32 v0, v0
+; GFX8-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: v_pow_f32_fabs_lhs:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_log_f32_e64 v0, |v0|
+; GFX9-NEXT: v_mul_legacy_f32_e32 v0, v0, v1
+; GFX9-NEXT: v_exp_f32_e32 v0, v0
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %fabs.x = call float @llvm.fabs.f32(float %x)
+ %pow = call float @llvm.pow.f32(float %fabs.x, float %y)
+ ret float %pow
+}
+
+define float @v_pow_f32_fabs_rhs(float %x, float %y) {
+; GFX6-LABEL: v_pow_f32_fabs_rhs:
+; GFX6: ; %bb.0:
+; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-NEXT: v_log_f32_e32 v0, v0
+; GFX6-NEXT: v_mul_legacy_f32_e64 v0, v0, |v1|
+; GFX6-NEXT: v_exp_f32_e32 v0, v0
+; GFX6-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: v_pow_f32_fabs_rhs:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT: v_log_f32_e32 v0, v0
+; GFX8-NEXT: v_mul_legacy_f32_e64 v0, v0, |v1|
+; GFX8-NEXT: v_exp_f32_e32 v0, v0
+; GFX8-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: v_pow_f32_fabs_rhs:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_log_f32_e32 v0, v0
+; GFX9-NEXT: v_mul_legacy_f32_e64 v0, v0, |v1|
+; GFX9-NEXT: v_exp_f32_e32 v0, v0
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %fabs.y = call float @llvm.fabs.f32(float %y)
+ %pow = call float @llvm.pow.f32(float %x, float %fabs.y)
+ ret float %pow
+}
+
+define float @v_pow_f32_fabs_lhs_rhs(float %x, float %y) {
+; GFX6-LABEL: v_pow_f32_fabs_lhs_rhs:
+; GFX6: ; %bb.0:
+; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-NEXT: v_log_f32_e64 v0, |v0|
+; GFX6-NEXT: v_mul_legacy_f32_e64 v0, v0, |v1|
+; GFX6-NEXT: v_exp_f32_e32 v0, v0
+; GFX6-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: v_pow_f32_fabs_lhs_rhs:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT: v_log_f32_e64 v0, |v0|
+; GFX8-NEXT: v_mul_legacy_f32_e64 v0, v0, |v1|
+; GFX8-NEXT: v_exp_f32_e32 v0, v0
+; GFX8-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: v_pow_f32_fabs_lhs_rhs:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_log_f32_e64 v0, |v0|
+; GFX9-NEXT: v_mul_legacy_f32_e64 v0, v0, |v1|
+; GFX9-NEXT: v_exp_f32_e32 v0, v0
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %fabs.x = call float @llvm.fabs.f32(float %x)
+ %fabs.y = call float @llvm.fabs.f32(float %y)
+ %pow = call float @llvm.pow.f32(float %fabs.x, float %fabs.y)
+ ret float %pow
+}
+
+define amdgpu_ps float @v_pow_f32_sgpr_vgpr(float inreg %x, float %y) {
+; GFX6-LABEL: v_pow_f32_sgpr_vgpr:
+; GFX6: ; %bb.0:
+; GFX6-NEXT: v_log_f32_e32 v1, s0
+; GFX6-NEXT: v_mul_legacy_f32_e32 v0, v1, v0
+; GFX6-NEXT: v_exp_f32_e32 v0, v0
+; GFX6-NEXT: ; return to shader part epilog
+;
+; GFX8-LABEL: v_pow_f32_sgpr_vgpr:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: v_log_f32_e32 v1, s0
+; GFX8-NEXT: v_mul_legacy_f32_e32 v0, v1, v0
+; GFX8-NEXT: v_exp_f32_e32 v0, v0
+; GFX8-NEXT: ; return to shader part epilog
+;
+; GFX9-LABEL: v_pow_f32_sgpr_vgpr:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: v_log_f32_e32 v1, s0
+; GFX9-NEXT: v_mul_legacy_f32_e32 v0, v1, v0
+; GFX9-NEXT: v_exp_f32_e32 v0, v0
+; GFX9-NEXT: ; return to shader part epilog
+ %pow = call float @llvm.pow.f32(float %x, float %y)
+ ret float %pow
+}
+
+define amdgpu_ps float @v_pow_f32_vgpr_sgpr(float %x, float inreg %y) {
+; GFX6-LABEL: v_pow_f32_vgpr_sgpr:
+; GFX6: ; %bb.0:
+; GFX6-NEXT: v_log_f32_e32 v0, v0
+; GFX6-NEXT: v_mul_legacy_f32_e32 v0, s0, v0
+; GFX6-NEXT: v_exp_f32_e32 v0, v0
+; GFX6-NEXT: ; return to shader part epilog
+;
+; GFX8-LABEL: v_pow_f32_vgpr_sgpr:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: v_log_f32_e32 v0, v0
+; GFX8-NEXT: v_mul_legacy_f32_e32 v0, s0, v0
+; GFX8-NEXT: v_exp_f32_e32 v0, v0
+; GFX8-NEXT: ; return to shader part epilog
+;
+; GFX9-LABEL: v_pow_f32_vgpr_sgpr:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: v_log_f32_e32 v0, v0
+; GFX9-NEXT: v_mul_legacy_f32_e32 v0, s0, v0
+; GFX9-NEXT: v_exp_f32_e32 v0, v0
+; GFX9-NEXT: ; return to shader part epilog
+ %pow = call float @llvm.pow.f32(float %x, float %y)
+ ret float %pow
+}
+
+define amdgpu_ps float @v_pow_f32_sgpr_sgpr(float inreg %x, float inreg %y) {
+; GFX6-LABEL: v_pow_f32_sgpr_sgpr:
+; GFX6: ; %bb.0:
+; GFX6-NEXT: v_log_f32_e32 v0, s0
+; GFX6-NEXT: v_mul_legacy_f32_e32 v0, s1, v0
+; GFX6-NEXT: v_exp_f32_e32 v0, v0
+; GFX6-NEXT: ; return to shader part epilog
+;
+; GFX8-LABEL: v_pow_f32_sgpr_sgpr:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: v_log_f32_e32 v0, s0
+; GFX8-NEXT: v_mul_legacy_f32_e32 v0, s1, v0
+; GFX8-NEXT: v_exp_f32_e32 v0, v0
+; GFX8-NEXT: ; return to shader part epilog
+;
+; GFX9-LABEL: v_pow_f32_sgpr_sgpr:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: v_log_f32_e32 v0, s0
+; GFX9-NEXT: v_mul_legacy_f32_e32 v0, s1, v0
+; GFX9-NEXT: v_exp_f32_e32 v0, v0
+; GFX9-NEXT: ; return to shader part epilog
+ %pow = call float @llvm.pow.f32(float %x, float %y)
+ ret float %pow
+}
+
+define float @v_pow_f32_fneg_lhs(float %x, float %y) {
+; GFX6-LABEL: v_pow_f32_fneg_lhs:
+; GFX6: ; %bb.0:
+; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-NEXT: v_log_f32_e64 v0, -v0
+; GFX6-NEXT: v_mul_legacy_f32_e32 v0, v0, v1
+; GFX6-NEXT: v_exp_f32_e32 v0, v0
+; GFX6-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: v_pow_f32_fneg_lhs:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT: v_log_f32_e64 v0, -v0
+; GFX8-NEXT: v_mul_legacy_f32_e32 v0, v0, v1
+; GFX8-NEXT: v_exp_f32_e32 v0, v0
+; GFX8-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: v_pow_f32_fneg_lhs:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_log_f32_e64 v0, -v0
+; GFX9-NEXT: v_mul_legacy_f32_e32 v0, v0, v1
+; GFX9-NEXT: v_exp_f32_e32 v0, v0
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %neg.x = fneg float %x
+ %pow = call float @llvm.pow.f32(float %neg.x, float %y)
+ ret float %pow
+}
+
+define float @v_pow_f32_fneg_rhs(float %x, float %y) {
+; GFX6-LABEL: v_pow_f32_fneg_rhs:
+; GFX6: ; %bb.0:
+; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-NEXT: v_log_f32_e32 v0, v0
+; GFX6-NEXT: v_mul_legacy_f32_e64 v0, v0, -v1
+; GFX6-NEXT: v_exp_f32_e32 v0, v0
+; GFX6-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: v_pow_f32_fneg_rhs:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT: v_log_f32_e32 v0, v0
+; GFX8-NEXT: v_mul_legacy_f32_e64 v0, v0, -v1
+; GFX8-NEXT: v_exp_f32_e32 v0, v0
+; GFX8-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: v_pow_f32_fneg_rhs:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_log_f32_e32 v0, v0
+; GFX9-NEXT: v_mul_legacy_f32_e64 v0, v0, -v1
+; GFX9-NEXT: v_exp_f32_e32 v0, v0
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %neg.y = fneg float %y
+ %pow = call float @llvm.pow.f32(float %x, float %neg.y)
+ ret float %pow
+}
+
+declare half @llvm.pow.f16(half, half)
+declare float @llvm.pow.f32(float, float)
+declare double @llvm.pow.f64(double, double)
+
+declare half @llvm.fabs.f16(half)
+declare float @llvm.fabs.f32(float)
+
+declare <2 x half> @llvm.pow.v2f16(<2 x half>, <2 x half>)
+declare <2 x float> @llvm.pow.v2f32(<2 x float>, <2 x float>)
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -run-pass=legalizer %s -o - | FileCheck %s
+# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX6 %s
+# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s
---
name: test_fpow_s32
bb.0:
liveins: $vgpr0, $vgpr1
- ; CHECK-LABEL: name: test_fpow_s32
- ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
- ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
- ; CHECK: [[FPOW:%[0-9]+]]:_(s32) = G_FPOW [[COPY]], [[COPY1]]
- ; CHECK: $vgpr0 = COPY [[FPOW]](s32)
+ ; GFX6-LABEL: name: test_fpow_s32
+ ; GFX6: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GFX6: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GFX6: [[FLOG2_:%[0-9]+]]:_(s32) = G_FLOG2 [[COPY]]
+ ; GFX6: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmul.legacy), [[FLOG2_]](s32), [[COPY1]](s32)
+ ; GFX6: [[FEXP2_:%[0-9]+]]:_(s32) = G_FEXP2 [[INT]]
+ ; GFX6: $vgpr0 = COPY [[FEXP2_]](s32)
+ ; GFX9-LABEL: name: test_fpow_s32
+ ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GFX9: [[FLOG2_:%[0-9]+]]:_(s32) = G_FLOG2 [[COPY]]
+ ; GFX9: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmul.legacy), [[FLOG2_]](s32), [[COPY1]](s32)
+ ; GFX9: [[FEXP2_:%[0-9]+]]:_(s32) = G_FEXP2 [[INT]]
+ ; GFX9: $vgpr0 = COPY [[FEXP2_]](s32)
%0:_(s32) = COPY $vgpr0
%1:_(s32) = COPY $vgpr1
%2:_(s32) = G_FPOW %0, %1
bb.0.entry:
liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
- ; CHECK-LABEL: name: test_fpow_v2s32
- ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
- ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3
- ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
- ; CHECK: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>)
- ; CHECK: [[FPOW:%[0-9]+]]:_(s32) = G_FPOW [[UV]], [[UV2]]
- ; CHECK: [[FPOW1:%[0-9]+]]:_(s32) = G_FPOW [[UV1]], [[UV3]]
- ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FPOW]](s32), [[FPOW1]](s32)
- ; CHECK: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
+ ; GFX6-LABEL: name: test_fpow_v2s32
+ ; GFX6: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
+ ; GFX6: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3
+ ; GFX6: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
+ ; GFX6: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>)
+ ; GFX6: [[FLOG2_:%[0-9]+]]:_(s32) = G_FLOG2 [[UV]]
+ ; GFX6: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmul.legacy), [[FLOG2_]](s32), [[UV2]](s32)
+ ; GFX6: [[FEXP2_:%[0-9]+]]:_(s32) = G_FEXP2 [[INT]]
+ ; GFX6: [[FLOG2_1:%[0-9]+]]:_(s32) = G_FLOG2 [[UV1]]
+ ; GFX6: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmul.legacy), [[FLOG2_1]](s32), [[UV3]](s32)
+ ; GFX6: [[FEXP2_1:%[0-9]+]]:_(s32) = G_FEXP2 [[INT1]]
+ ; GFX6: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FEXP2_]](s32), [[FEXP2_1]](s32)
+ ; GFX6: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
+ ; GFX9-LABEL: name: test_fpow_v2s32
+ ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
+ ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3
+ ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
+ ; GFX9: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>)
+ ; GFX9: [[FLOG2_:%[0-9]+]]:_(s32) = G_FLOG2 [[UV]]
+ ; GFX9: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmul.legacy), [[FLOG2_]](s32), [[UV2]](s32)
+ ; GFX9: [[FEXP2_:%[0-9]+]]:_(s32) = G_FEXP2 [[INT]]
+ ; GFX9: [[FLOG2_1:%[0-9]+]]:_(s32) = G_FLOG2 [[UV1]]
+ ; GFX9: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmul.legacy), [[FLOG2_1]](s32), [[UV3]](s32)
+ ; GFX9: [[FEXP2_1:%[0-9]+]]:_(s32) = G_FEXP2 [[INT1]]
+ ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FEXP2_]](s32), [[FEXP2_1]](s32)
+ ; GFX9: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
%0:_(<2 x s32>) = COPY $vgpr0_vgpr1
%1:_(<2 x s32>) = COPY $vgpr2_vgpr3
%2:_(<2 x s32>) = G_FPOW %0, %1
bb.0.entry:
liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5
- ; CHECK-LABEL: name: test_fpow_v3s32
- ; CHECK: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2
- ; CHECK: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5
- ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>)
- ; CHECK: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>)
- ; CHECK: [[FPOW:%[0-9]+]]:_(s32) = G_FPOW [[UV]], [[UV3]]
- ; CHECK: [[FPOW1:%[0-9]+]]:_(s32) = G_FPOW [[UV1]], [[UV4]]
- ; CHECK: [[FPOW2:%[0-9]+]]:_(s32) = G_FPOW [[UV2]], [[UV5]]
- ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[FPOW]](s32), [[FPOW1]](s32), [[FPOW2]](s32)
- ; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>)
+ ; GFX6-LABEL: name: test_fpow_v3s32
+ ; GFX6: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2
+ ; GFX6: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5
+ ; GFX6: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>)
+ ; GFX6: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>)
+ ; GFX6: [[FLOG2_:%[0-9]+]]:_(s32) = G_FLOG2 [[UV]]
+ ; GFX6: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmul.legacy), [[FLOG2_]](s32), [[UV3]](s32)
+ ; GFX6: [[FEXP2_:%[0-9]+]]:_(s32) = G_FEXP2 [[INT]]
+ ; GFX6: [[FLOG2_1:%[0-9]+]]:_(s32) = G_FLOG2 [[UV1]]
+ ; GFX6: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmul.legacy), [[FLOG2_1]](s32), [[UV4]](s32)
+ ; GFX6: [[FEXP2_1:%[0-9]+]]:_(s32) = G_FEXP2 [[INT1]]
+ ; GFX6: [[FLOG2_2:%[0-9]+]]:_(s32) = G_FLOG2 [[UV2]]
+ ; GFX6: [[INT2:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmul.legacy), [[FLOG2_2]](s32), [[UV5]](s32)
+ ; GFX6: [[FEXP2_2:%[0-9]+]]:_(s32) = G_FEXP2 [[INT2]]
+ ; GFX6: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[FEXP2_]](s32), [[FEXP2_1]](s32), [[FEXP2_2]](s32)
+ ; GFX6: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>)
+ ; GFX9-LABEL: name: test_fpow_v3s32
+ ; GFX9: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2
+ ; GFX9: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5
+ ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>)
+ ; GFX9: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>)
+ ; GFX9: [[FLOG2_:%[0-9]+]]:_(s32) = G_FLOG2 [[UV]]
+ ; GFX9: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmul.legacy), [[FLOG2_]](s32), [[UV3]](s32)
+ ; GFX9: [[FEXP2_:%[0-9]+]]:_(s32) = G_FEXP2 [[INT]]
+ ; GFX9: [[FLOG2_1:%[0-9]+]]:_(s32) = G_FLOG2 [[UV1]]
+ ; GFX9: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmul.legacy), [[FLOG2_1]](s32), [[UV4]](s32)
+ ; GFX9: [[FEXP2_1:%[0-9]+]]:_(s32) = G_FEXP2 [[INT1]]
+ ; GFX9: [[FLOG2_2:%[0-9]+]]:_(s32) = G_FLOG2 [[UV2]]
+ ; GFX9: [[INT2:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmul.legacy), [[FLOG2_2]](s32), [[UV5]](s32)
+ ; GFX9: [[FEXP2_2:%[0-9]+]]:_(s32) = G_FEXP2 [[INT2]]
+ ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[FEXP2_]](s32), [[FEXP2_1]](s32), [[FEXP2_2]](s32)
+ ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>)
%0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2
%1:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5
%2:_(<3 x s32>) = G_FPOW %0, %1
$vgpr0_vgpr1_vgpr2 = COPY %2
...
+
+---
+name: test_fpow_s32_flags
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1
+
+ ; GFX6-LABEL: name: test_fpow_s32_flags
+ ; GFX6: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GFX6: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GFX6: [[FLOG2_:%[0-9]+]]:_(s32) = nnan nsz G_FLOG2 [[COPY]]
+ ; GFX6: [[INT:%[0-9]+]]:_(s32) = nnan nsz G_INTRINSIC intrinsic(@llvm.amdgcn.fmul.legacy), [[FLOG2_]](s32), [[COPY1]](s32)
+ ; GFX6: [[FEXP2_:%[0-9]+]]:_(s32) = nnan nsz G_FEXP2 [[INT]]
+ ; GFX6: $vgpr0 = COPY [[FEXP2_]](s32)
+ ; GFX9-LABEL: name: test_fpow_s32_flags
+ ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GFX9: [[FLOG2_:%[0-9]+]]:_(s32) = nnan nsz G_FLOG2 [[COPY]]
+ ; GFX9: [[INT:%[0-9]+]]:_(s32) = nnan nsz G_INTRINSIC intrinsic(@llvm.amdgcn.fmul.legacy), [[FLOG2_]](s32), [[COPY1]](s32)
+ ; GFX9: [[FEXP2_:%[0-9]+]]:_(s32) = nnan nsz G_FEXP2 [[INT]]
+ ; GFX9: $vgpr0 = COPY [[FEXP2_]](s32)
+ %0:_(s32) = COPY $vgpr0
+ %1:_(s32) = COPY $vgpr1
+ %2:_(s32) = nnan nsz G_FPOW %0, %1
+ $vgpr0 = COPY %2
+...
+
+---
+name: test_fpow_s16
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1
+
+ ; GFX6-LABEL: name: test_fpow_s16
+ ; GFX6: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GFX6: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GFX6: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
+ ; GFX6: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
+ ; GFX6: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16)
+ ; GFX6: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16)
+ ; GFX6: [[FLOG2_:%[0-9]+]]:_(s32) = G_FLOG2 [[FPEXT]]
+ ; GFX6: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmul.legacy), [[FLOG2_]](s32), [[FPEXT1]](s32)
+ ; GFX6: [[FEXP2_:%[0-9]+]]:_(s32) = G_FEXP2 [[INT]]
+ ; GFX6: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FEXP2_]](s32)
+ ; GFX6: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16)
+ ; GFX6: $vgpr0 = COPY [[ANYEXT]](s32)
+ ; GFX9-LABEL: name: test_fpow_s16
+ ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
+ ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
+ ; GFX9: [[FLOG2_:%[0-9]+]]:_(s16) = G_FLOG2 [[TRUNC]]
+ ; GFX9: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[FLOG2_]](s16)
+ ; GFX9: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16)
+ ; GFX9: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmul.legacy), [[FPEXT]](s32), [[FPEXT1]](s32)
+ ; GFX9: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT]](s32)
+ ; GFX9: [[FEXP2_:%[0-9]+]]:_(s16) = G_FEXP2 [[FPTRUNC]]
+ ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FEXP2_]](s16)
+ ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32)
+ %0:_(s32) = COPY $vgpr0
+ %1:_(s32) = COPY $vgpr1
+ %2:_(s16) = G_TRUNC %0
+ %3:_(s16) = G_TRUNC %1
+ %4:_(s16) = G_FPOW %2, %3
+ %5:_(s32) = G_ANYEXT %4
+ $vgpr0 = COPY %5
+...
+
+---
+name: test_fpow_v2s16
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1
+
+ ; GFX6-LABEL: name: test_fpow_v2s16
+ ; GFX6: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
+ ; GFX6: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
+ ; GFX6: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>)
+ ; GFX6: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32)
+ ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; GFX6: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
+ ; GFX6: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
+ ; GFX6: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>)
+ ; GFX6: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32)
+ ; GFX6: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
+ ; GFX6: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
+ ; GFX6: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16)
+ ; GFX6: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16)
+ ; GFX6: [[FLOG2_:%[0-9]+]]:_(s32) = G_FLOG2 [[FPEXT]]
+ ; GFX6: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmul.legacy), [[FLOG2_]](s32), [[FPEXT1]](s32)
+ ; GFX6: [[FEXP2_:%[0-9]+]]:_(s32) = G_FEXP2 [[INT]]
+ ; GFX6: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FEXP2_]](s32)
+ ; GFX6: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16)
+ ; GFX6: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16)
+ ; GFX6: [[FLOG2_1:%[0-9]+]]:_(s32) = G_FLOG2 [[FPEXT2]]
+ ; GFX6: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmul.legacy), [[FLOG2_1]](s32), [[FPEXT3]](s32)
+ ; GFX6: [[FEXP2_1:%[0-9]+]]:_(s32) = G_FEXP2 [[INT1]]
+ ; GFX6: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FEXP2_1]](s32)
+ ; GFX6: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16)
+ ; GFX6: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16)
+ ; GFX6: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32)
+ ; GFX6: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]]
+ ; GFX6: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; GFX6: $vgpr0 = COPY [[BITCAST2]](<2 x s16>)
+ ; GFX9-LABEL: name: test_fpow_v2s16
+ ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
+ ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
+ ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>)
+ ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32)
+ ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
+ ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
+ ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>)
+ ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32)
+ ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
+ ; GFX9: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
+ ; GFX9: [[FLOG2_:%[0-9]+]]:_(s16) = G_FLOG2 [[TRUNC]]
+ ; GFX9: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[FLOG2_]](s16)
+ ; GFX9: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16)
+ ; GFX9: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmul.legacy), [[FPEXT]](s32), [[FPEXT1]](s32)
+ ; GFX9: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT]](s32)
+ ; GFX9: [[FEXP2_:%[0-9]+]]:_(s16) = G_FEXP2 [[FPTRUNC]]
+ ; GFX9: [[FLOG2_1:%[0-9]+]]:_(s16) = G_FLOG2 [[TRUNC1]]
+ ; GFX9: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[FLOG2_1]](s16)
+ ; GFX9: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16)
+ ; GFX9: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmul.legacy), [[FPEXT2]](s32), [[FPEXT3]](s32)
+ ; GFX9: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT1]](s32)
+ ; GFX9: [[FEXP2_1:%[0-9]+]]:_(s16) = G_FEXP2 [[FPTRUNC1]]
+ ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FEXP2_]](s16)
+ ; GFX9: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FEXP2_1]](s16)
+ ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32)
+ ; GFX9: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>)
+ %0:_(<2 x s16>) = COPY $vgpr0
+ %1:_(<2 x s16>) = COPY $vgpr1
+ %2:_(<2 x s16>) = G_FPOW %0, %1
+ $vgpr0 = COPY %2
+...
+
+---
+name: test_fpow_v2s16_flags
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1
+
+ ; GFX6-LABEL: name: test_fpow_v2s16_flags
+ ; GFX6: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
+ ; GFX6: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
+ ; GFX6: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>)
+ ; GFX6: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32)
+ ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; GFX6: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
+ ; GFX6: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
+ ; GFX6: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>)
+ ; GFX6: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32)
+ ; GFX6: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
+ ; GFX6: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
+ ; GFX6: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16)
+ ; GFX6: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16)
+ ; GFX6: [[FLOG2_:%[0-9]+]]:_(s32) = nnan nsz G_FLOG2 [[FPEXT]]
+ ; GFX6: [[INT:%[0-9]+]]:_(s32) = nnan nsz G_INTRINSIC intrinsic(@llvm.amdgcn.fmul.legacy), [[FLOG2_]](s32), [[FPEXT1]](s32)
+ ; GFX6: [[FEXP2_:%[0-9]+]]:_(s32) = nnan nsz G_FEXP2 [[INT]]
+ ; GFX6: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FEXP2_]](s32)
+ ; GFX6: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16)
+ ; GFX6: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16)
+ ; GFX6: [[FLOG2_1:%[0-9]+]]:_(s32) = nnan nsz G_FLOG2 [[FPEXT2]]
+ ; GFX6: [[INT1:%[0-9]+]]:_(s32) = nnan nsz G_INTRINSIC intrinsic(@llvm.amdgcn.fmul.legacy), [[FLOG2_1]](s32), [[FPEXT3]](s32)
+ ; GFX6: [[FEXP2_1:%[0-9]+]]:_(s32) = nnan nsz G_FEXP2 [[INT1]]
+ ; GFX6: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FEXP2_1]](s32)
+ ; GFX6: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16)
+ ; GFX6: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16)
+ ; GFX6: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32)
+ ; GFX6: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]]
+ ; GFX6: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; GFX6: $vgpr0 = COPY [[BITCAST2]](<2 x s16>)
+ ; GFX9-LABEL: name: test_fpow_v2s16_flags
+ ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
+ ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
+ ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>)
+ ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32)
+ ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
+ ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
+ ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>)
+ ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32)
+ ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
+ ; GFX9: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
+ ; GFX9: [[FLOG2_:%[0-9]+]]:_(s16) = nnan nsz G_FLOG2 [[TRUNC]]
+ ; GFX9: [[FPEXT:%[0-9]+]]:_(s32) = nnan nsz G_FPEXT [[FLOG2_]](s16)
+ ; GFX9: [[FPEXT1:%[0-9]+]]:_(s32) = nnan nsz G_FPEXT [[TRUNC2]](s16)
+ ; GFX9: [[INT:%[0-9]+]]:_(s32) = nnan nsz G_INTRINSIC intrinsic(@llvm.amdgcn.fmul.legacy), [[FPEXT]](s32), [[FPEXT1]](s32)
+ ; GFX9: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT]](s32)
+ ; GFX9: [[FEXP2_:%[0-9]+]]:_(s16) = nnan nsz G_FEXP2 [[FPTRUNC]]
+ ; GFX9: [[FLOG2_1:%[0-9]+]]:_(s16) = nnan nsz G_FLOG2 [[TRUNC1]]
+ ; GFX9: [[FPEXT2:%[0-9]+]]:_(s32) = nnan nsz G_FPEXT [[FLOG2_1]](s16)
+ ; GFX9: [[FPEXT3:%[0-9]+]]:_(s32) = nnan nsz G_FPEXT [[TRUNC3]](s16)
+ ; GFX9: [[INT1:%[0-9]+]]:_(s32) = nnan nsz G_INTRINSIC intrinsic(@llvm.amdgcn.fmul.legacy), [[FPEXT2]](s32), [[FPEXT3]](s32)
+ ; GFX9: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT1]](s32)
+ ; GFX9: [[FEXP2_1:%[0-9]+]]:_(s16) = nnan nsz G_FEXP2 [[FPTRUNC1]]
+ ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FEXP2_]](s16)
+ ; GFX9: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FEXP2_1]](s16)
+ ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32)
+ ; GFX9: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>)
+ %0:_(<2 x s16>) = COPY $vgpr0
+ %1:_(<2 x s16>) = COPY $vgpr1
+ %2:_(<2 x s16>) = nnan nsz G_FPOW %0, %1
+ $vgpr0 = COPY %2
+...