Detailed description: We currently have a set of patterns to select ISD::FNEG and ISD::FABS to the bitwise operations. We need to make them predicated to select the VALU or SALU bitwise operation variant according to the SDNode divergence bit.
Reviewed By: rampitec
Differential Revision: https://reviews.llvm.org/D114257
>;
}
+
/********** ================================ **********/
/********** Floating point absolute/negative **********/
/********** ================================ **********/
-// Prevent expanding both fneg and fabs.
-// TODO: Add IgnoredBySelectionDAG bit?
-let AddedComplexity = 1 in { // Prefer SALU to VALU patterns for DAG
-
def : GCNPat <
- (fneg (fabs (f32 SReg_32:$src))),
+ (UniformUnaryFrag<fneg> (fabs (f32 SReg_32:$src))),
(S_OR_B32 SReg_32:$src, (S_MOV_B32 (i32 0x80000000))) // Set sign bit
>;
def : GCNPat <
- (fabs (f32 SReg_32:$src)),
+ (UniformUnaryFrag<fabs> (f32 SReg_32:$src)),
(S_AND_B32 SReg_32:$src, (S_MOV_B32 (i32 0x7fffffff)))
>;
def : GCNPat <
- (fneg (f32 SReg_32:$src)),
+ (UniformUnaryFrag<fneg> (f32 SReg_32:$src)),
(S_XOR_B32 SReg_32:$src, (S_MOV_B32 (i32 0x80000000)))
>;
def : GCNPat <
- (fneg (f16 SReg_32:$src)),
+ (UniformUnaryFrag<fneg> (f16 SReg_32:$src)),
(S_XOR_B32 SReg_32:$src, (S_MOV_B32 (i32 0x00008000)))
>;
def : GCNPat <
- (fneg (f16 VGPR_32:$src)),
- (V_XOR_B32_e32 (S_MOV_B32 (i32 0x00008000)), VGPR_32:$src)
->;
-
-def : GCNPat <
- (fabs (f16 SReg_32:$src)),
+ (UniformUnaryFrag<fabs> (f16 SReg_32:$src)),
(S_AND_B32 SReg_32:$src, (S_MOV_B32 (i32 0x00007fff)))
>;
def : GCNPat <
- (fneg (fabs (f16 SReg_32:$src))),
+ (UniformUnaryFrag<fneg> (fabs (f16 SReg_32:$src))),
(S_OR_B32 SReg_32:$src, (S_MOV_B32 (i32 0x00008000))) // Set sign bit
>;
def : GCNPat <
- (fneg (fabs (f16 VGPR_32:$src))),
- (V_OR_B32_e32 (S_MOV_B32 (i32 0x00008000)), VGPR_32:$src) // Set sign bit
->;
-
-def : GCNPat <
- (fneg (v2f16 SReg_32:$src)),
+ (UniformUnaryFrag<fneg> (v2f16 SReg_32:$src)),
(S_XOR_B32 SReg_32:$src, (S_MOV_B32 (i32 0x80008000)))
>;
def : GCNPat <
- (fabs (v2f16 SReg_32:$src)),
+ (UniformUnaryFrag<fabs> (v2f16 SReg_32:$src)),
(S_AND_B32 SReg_32:$src, (S_MOV_B32 (i32 0x7fff7fff)))
>;
// fabs is not reported as free because there is modifier for it in
// VOP3P instructions, so it is turned into the bit op.
def : GCNPat <
- (fneg (v2f16 (bitconvert (and_oneuse (i32 SReg_32:$src), 0x7fff7fff)))),
+ (UniformUnaryFrag<fneg> (v2f16 (bitconvert (and_oneuse (i32 SReg_32:$src), 0x7fff7fff)))),
(S_OR_B32 SReg_32:$src, (S_MOV_B32 (i32 0x80008000))) // Set sign bit
>;
def : GCNPat <
- (fneg (v2f16 (fabs SReg_32:$src))),
+ (UniformUnaryFrag<fneg> (v2f16 (fabs SReg_32:$src))),
(S_OR_B32 SReg_32:$src, (S_MOV_B32 (i32 0x80008000))) // Set sign bit
>;
-// FIXME: The implicit-def of scc from S_[X]OR/AND_B32 is mishandled
- // def : GCNPat <
-// (fneg (f64 SReg_64:$src)),
-// (REG_SEQUENCE SReg_64,
-// (i32 (EXTRACT_SUBREG SReg_64:$src, sub0)),
-// sub0,
-// (S_XOR_B32 (i32 (EXTRACT_SUBREG SReg_64:$src, sub1)),
-// (i32 (S_MOV_B32 (i32 0x80000000)))),
-// sub1)
-// >;
-
-// def : GCNPat <
-// (fneg (fabs (f64 SReg_64:$src))),
-// (REG_SEQUENCE SReg_64,
-// (i32 (EXTRACT_SUBREG SReg_64:$src, sub0)),
-// sub0,
-// (S_OR_B32 (i32 (EXTRACT_SUBREG SReg_64:$src, sub1)),
-// (S_MOV_B32 (i32 0x80000000))), // Set sign bit.
-// sub1)
-// >;
-
-// FIXME: Use S_BITSET0_B32/B64?
-// def : GCNPat <
-// (fabs (f64 SReg_64:$src)),
-// (REG_SEQUENCE SReg_64,
-// (i32 (EXTRACT_SUBREG SReg_64:$src, sub0)),
-// sub0,
-// (S_AND_B32 (i32 (EXTRACT_SUBREG SReg_64:$src, sub1)),
-// (i32 (S_MOV_B32 (i32 0x7fffffff)))),
-// sub1)
-// >;
// COPY_TO_REGCLASS is needed to avoid using SCC from S_XOR_B32 instead
// of the real value.
def : GCNPat <
- (fneg (v2f32 SReg_64:$src)),
+ (UniformUnaryFrag<fneg> (v2f32 SReg_64:$src)),
(v2f32 (REG_SEQUENCE SReg_64,
(f32 (COPY_TO_REGCLASS (S_XOR_B32 (i32 (EXTRACT_SUBREG $src, sub0)),
(i32 (S_MOV_B32 (i32 0x80000000)))),
SReg_32)), sub1))
>;
-} // End let AddedComplexity = 1
+def : GCNPat <
+ (UniformUnaryFrag<fabs> (v2f32 SReg_64:$src)),
+ (v2f32 (REG_SEQUENCE SReg_64,
+ (f32 (COPY_TO_REGCLASS (S_AND_B32 (i32 (EXTRACT_SUBREG $src, sub0)),
+ (i32 (S_MOV_B32 (i32 0x7fffffff)))),
+ SReg_32)), sub0,
+ (f32 (COPY_TO_REGCLASS (S_AND_B32 (i32 (EXTRACT_SUBREG $src, sub1)),
+ (i32 (S_MOV_B32 (i32 0x7fffffff)))),
+ SReg_32)), sub1))
+>;
+
+def : GCNPat <
+ (UniformUnaryFrag<fneg> (fabs (v2f32 SReg_64:$src))),
+ (v2f32 (REG_SEQUENCE SReg_64,
+ (f32 (COPY_TO_REGCLASS (S_OR_B32 (i32 (EXTRACT_SUBREG $src, sub0)),
+ (i32 (S_MOV_B32 (i32 0x80000000)))),
+ SReg_32)), sub0,
+ (f32 (COPY_TO_REGCLASS (S_OR_B32 (i32 (EXTRACT_SUBREG $src, sub1)),
+ (i32 (S_MOV_B32 (i32 0x80000000)))),
+ SReg_32)), sub1))
+>;
+
+// FIXME: Use S_BITSET0_B32/B64?
+def : GCNPat <
+ (UniformUnaryFrag<fabs> (f64 SReg_64:$src)),
+ (REG_SEQUENCE SReg_64,
+ (i32 (EXTRACT_SUBREG SReg_64:$src, sub0)),
+ sub0,
+ (i32 (COPY_TO_REGCLASS (S_AND_B32 (i32 (EXTRACT_SUBREG SReg_64:$src, sub1)),
+ (S_MOV_B32 (i32 0x7fffffff))), SReg_32)), // Set sign bit.
+ sub1)
+>;
+
+def : GCNPat <
+ (UniformUnaryFrag<fneg> (f64 SReg_64:$src)),
+ (REG_SEQUENCE SReg_64,
+ (i32 (EXTRACT_SUBREG SReg_64:$src, sub0)),
+ sub0,
+ (i32 (COPY_TO_REGCLASS (S_XOR_B32 (i32 (EXTRACT_SUBREG SReg_64:$src, sub1)),
+ (i32 (S_MOV_B32 (i32 0x80000000)))), SReg_32)),
+ sub1)
+>;
+
+def : GCNPat <
+ (UniformUnaryFrag<fneg> (fabs (f64 SReg_64:$src))),
+ (REG_SEQUENCE SReg_64,
+ (i32 (EXTRACT_SUBREG SReg_64:$src, sub0)),
+ sub0,
+ (i32 (COPY_TO_REGCLASS (S_OR_B32 (i32 (EXTRACT_SUBREG SReg_64:$src, sub1)),
+ (S_MOV_B32 (i32 0x80000000))), SReg_32)),// Set sign bit.
+ sub1)
+>;
+
+
+def : GCNPat <
+ (fneg (fabs (f32 VGPR_32:$src))),
+ (V_OR_B32_e64 (S_MOV_B32 (i32 0x80000000)), VGPR_32:$src) // Set sign bit
+>;
def : GCNPat <
(fabs (f32 VGPR_32:$src)),
- (V_AND_B32_e32 (S_MOV_B32 (i32 0x7fffffff)), VGPR_32:$src)
+ (V_AND_B32_e64 (S_MOV_B32 (i32 0x7fffffff)), VGPR_32:$src)
>;
def : GCNPat <
(fneg (f32 VGPR_32:$src)),
- (V_XOR_B32_e32 (S_MOV_B32 (i32 0x80000000)), VGPR_32:$src)
+ (V_XOR_B32_e64 (S_MOV_B32 (i32 0x80000000)), VGPR_32:$src)
>;
def : GCNPat <
(fabs (f16 VGPR_32:$src)),
- (V_AND_B32_e32 (S_MOV_B32 (i32 0x00007fff)), VGPR_32:$src)
+ (V_AND_B32_e64 (S_MOV_B32 (i32 0x00007fff)), VGPR_32:$src)
+>;
+
+def : GCNPat <
+ (fneg (f16 VGPR_32:$src)),
+ (V_XOR_B32_e64 (S_MOV_B32 (i32 0x00008000)), VGPR_32:$src)
+>;
+
+def : GCNPat <
+ (fneg (fabs (f16 VGPR_32:$src))),
+ (V_OR_B32_e64 (S_MOV_B32 (i32 0x00008000)), VGPR_32:$src) // Set sign bit
>;
def : GCNPat <
(fneg (v2f16 VGPR_32:$src)),
- (V_XOR_B32_e32 (S_MOV_B32 (i32 0x80008000)), VGPR_32:$src)
+ (V_XOR_B32_e64 (S_MOV_B32 (i32 0x80008000)), VGPR_32:$src)
>;
def : GCNPat <
(fabs (v2f16 VGPR_32:$src)),
- (V_AND_B32_e32 (S_MOV_B32 (i32 0x7fff7fff)), VGPR_32:$src)
+ (V_AND_B32_e64 (S_MOV_B32 (i32 0x7fff7fff)), VGPR_32:$src)
>;
def : GCNPat <
(fneg (v2f16 (fabs VGPR_32:$src))),
- (V_OR_B32_e32 (S_MOV_B32 (i32 0x80008000)), VGPR_32:$src) // Set sign bit
+ (V_OR_B32_e64 (S_MOV_B32 (i32 0x80008000)), VGPR_32:$src)
>;
def : GCNPat <
(REG_SEQUENCE VReg_64,
(i32 (EXTRACT_SUBREG VReg_64:$src, sub0)),
sub0,
- (V_AND_B32_e64 (i32 (EXTRACT_SUBREG VReg_64:$src, sub1)),
- (V_MOV_B32_e32 (i32 0x7fffffff))), // Set sign bit.
+ (V_AND_B32_e64 (i32 (S_MOV_B32 (i32 0x7fffffff))),
+ (i32 (EXTRACT_SUBREG VReg_64:$src, sub1))),
sub1)
>;
-// TODO: Use SGPR for constant
def : GCNPat <
(fneg (f64 VReg_64:$src)),
(REG_SEQUENCE VReg_64,
(i32 (EXTRACT_SUBREG VReg_64:$src, sub0)),
sub0,
- (V_XOR_B32_e32 (i32 (EXTRACT_SUBREG VReg_64:$src, sub1)),
- (i32 (V_MOV_B32_e32 (i32 0x80000000)))),
+ (V_XOR_B32_e64 (i32 (S_MOV_B32 (i32 0x80000000))),
+ (i32 (EXTRACT_SUBREG VReg_64:$src, sub1))),
sub1)
>;
-// TODO: Use SGPR for constant
def : GCNPat <
(fneg (fabs (f64 VReg_64:$src))),
(REG_SEQUENCE VReg_64,
(i32 (EXTRACT_SUBREG VReg_64:$src, sub0)),
sub0,
- (V_OR_B32_e32 (i32 (EXTRACT_SUBREG VReg_64:$src, sub1)),
- (V_MOV_B32_e32 (i32 0x80000000))), // Set sign bit.
+ (V_OR_B32_e64 (i32 (S_MOV_B32 (i32 0x80000000))),
+ (i32 (EXTRACT_SUBREG VReg_64:$src, sub1))),
sub1)
>;
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -march=amdgcn -mcpu=tahiti -run-pass=instruction-select -global-isel-abort=0 -verify-machineinstrs -o - %s | FileCheck -check-prefix=GCN %s
-# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -global-isel-abort=0 -verify-machineinstrs -o - %s | FileCheck -check-prefix=GCN %s
-# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -global-isel-abort=0 -verify-machineinstrs -o - %s | FileCheck -check-prefix=GCN %s
-# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -global-isel-abort=0 -verify-machineinstrs -o - %s | FileCheck -check-prefix=GCN %s
+# RUN: llc -march=amdgcn -mcpu=tahiti -run-pass=instruction-select -global-isel-abort=0 -verify-machineinstrs -o - %s | FileCheck -check-prefix=SI %s
+# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -global-isel-abort=0 -verify-machineinstrs -o - %s | FileCheck -check-prefix=VI %s
+# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -global-isel-abort=0 -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX9 %s
+# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -global-isel-abort=0 -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX10 %s
---
name: fabs_s32_ss
; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483647
; GCN: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc
; GCN: $sgpr0 = COPY [[S_AND_B32_]]
+ ; SI-LABEL: name: fabs_s32_ss
+ ; SI: liveins: $sgpr0
+ ; SI-NEXT: {{ $}}
+ ; SI-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+ ; SI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483647
+ ; SI-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc
+ ; SI-NEXT: $sgpr0 = COPY [[S_AND_B32_]]
+ ; VI-LABEL: name: fabs_s32_ss
+ ; VI: liveins: $sgpr0
+ ; VI-NEXT: {{ $}}
+ ; VI-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+ ; VI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483647
+ ; VI-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc
+ ; VI-NEXT: $sgpr0 = COPY [[S_AND_B32_]]
+ ; GFX9-LABEL: name: fabs_s32_ss
+ ; GFX9: liveins: $sgpr0
+ ; GFX9-NEXT: {{ $}}
+ ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+ ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483647
+ ; GFX9-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc
+ ; GFX9-NEXT: $sgpr0 = COPY [[S_AND_B32_]]
+ ; GFX10-LABEL: name: fabs_s32_ss
+ ; GFX10: liveins: $sgpr0
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+ ; GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483647
+ ; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc
+ ; GFX10-NEXT: $sgpr0 = COPY [[S_AND_B32_]]
%0:sgpr(s32) = COPY $sgpr0
%1:sgpr(s32) = G_FABS %0
$sgpr0 = COPY %1
; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483647
; GCN: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 [[S_MOV_B32_]], [[COPY]], implicit $exec
; GCN: $vgpr0 = COPY [[V_AND_B32_e32_]]
+ ; SI-LABEL: name: fabs_s32_vv
+ ; SI: liveins: $vgpr0
+ ; SI-NEXT: {{ $}}
+ ; SI-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; SI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483647
+ ; SI-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec
+ ; SI-NEXT: $vgpr0 = COPY [[V_AND_B32_e64_]]
+ ; VI-LABEL: name: fabs_s32_vv
+ ; VI: liveins: $vgpr0
+ ; VI-NEXT: {{ $}}
+ ; VI-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; VI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483647
+ ; VI-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec
+ ; VI-NEXT: $vgpr0 = COPY [[V_AND_B32_e64_]]
+ ; GFX9-LABEL: name: fabs_s32_vv
+ ; GFX9: liveins: $vgpr0
+ ; GFX9-NEXT: {{ $}}
+ ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483647
+ ; GFX9-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec
+ ; GFX9-NEXT: $vgpr0 = COPY [[V_AND_B32_e64_]]
+ ; GFX10-LABEL: name: fabs_s32_vv
+ ; GFX10: liveins: $vgpr0
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483647
+ ; GFX10-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec
+ ; GFX10-NEXT: $vgpr0 = COPY [[V_AND_B32_e64_]]
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = G_FABS %0
$vgpr0 = COPY %1
; GCN: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
; GCN: [[FABS:%[0-9]+]]:vgpr_32(s32) = G_FABS [[COPY]]
; GCN: $vgpr0 = COPY [[FABS]](s32)
+ ; SI-LABEL: name: fabs_s32_vs
+ ; SI: liveins: $sgpr0
+ ; SI-NEXT: {{ $}}
+ ; SI-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+ ; SI-NEXT: [[FABS:%[0-9]+]]:vgpr_32(s32) = G_FABS [[COPY]]
+ ; SI-NEXT: $vgpr0 = COPY [[FABS]](s32)
+ ; VI-LABEL: name: fabs_s32_vs
+ ; VI: liveins: $sgpr0
+ ; VI-NEXT: {{ $}}
+ ; VI-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+ ; VI-NEXT: [[FABS:%[0-9]+]]:vgpr_32(s32) = G_FABS [[COPY]]
+ ; VI-NEXT: $vgpr0 = COPY [[FABS]](s32)
+ ; GFX9-LABEL: name: fabs_s32_vs
+ ; GFX9: liveins: $sgpr0
+ ; GFX9-NEXT: {{ $}}
+ ; GFX9-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+ ; GFX9-NEXT: [[FABS:%[0-9]+]]:vgpr_32(s32) = G_FABS [[COPY]]
+ ; GFX9-NEXT: $vgpr0 = COPY [[FABS]](s32)
+ ; GFX10-LABEL: name: fabs_s32_vs
+ ; GFX10: liveins: $sgpr0
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+ ; GFX10-NEXT: [[FABS:%[0-9]+]]:vgpr_32(s32) = G_FABS [[COPY]]
+ ; GFX10-NEXT: $vgpr0 = COPY [[FABS]](s32)
%0:sgpr(s32) = COPY $sgpr0
%1:vgpr(s32) = G_FABS %0
$vgpr0 = COPY %1
; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147450879
; GCN: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc
; GCN: $sgpr0 = COPY [[S_AND_B32_]]
+ ; SI-LABEL: name: fabs_v2s16_ss
+ ; SI: liveins: $sgpr0_sgpr1
+ ; SI-NEXT: {{ $}}
+ ; SI-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+ ; SI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147450879
+ ; SI-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc
+ ; SI-NEXT: $sgpr0 = COPY [[S_AND_B32_]]
+ ; VI-LABEL: name: fabs_v2s16_ss
+ ; VI: liveins: $sgpr0_sgpr1
+ ; VI-NEXT: {{ $}}
+ ; VI-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+ ; VI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147450879
+ ; VI-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc
+ ; VI-NEXT: $sgpr0 = COPY [[S_AND_B32_]]
+ ; GFX9-LABEL: name: fabs_v2s16_ss
+ ; GFX9: liveins: $sgpr0_sgpr1
+ ; GFX9-NEXT: {{ $}}
+ ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+ ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147450879
+ ; GFX9-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc
+ ; GFX9-NEXT: $sgpr0 = COPY [[S_AND_B32_]]
+ ; GFX10-LABEL: name: fabs_v2s16_ss
+ ; GFX10: liveins: $sgpr0_sgpr1
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+ ; GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147450879
+ ; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc
+ ; GFX10-NEXT: $sgpr0 = COPY [[S_AND_B32_]]
%0:sgpr(<2 x s16>) = COPY $sgpr0
%1:sgpr(<2 x s16>) = G_FABS %0
$sgpr0 = COPY %1
; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32767
; GCN: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc
; GCN: $sgpr0 = COPY [[S_AND_B32_]]
+ ; SI-LABEL: name: fabs_s16_ss
+ ; SI: liveins: $sgpr0
+ ; SI-NEXT: {{ $}}
+ ; SI-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+ ; SI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32767
+ ; SI-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc
+ ; SI-NEXT: $sgpr0 = COPY [[S_AND_B32_]]
+ ; VI-LABEL: name: fabs_s16_ss
+ ; VI: liveins: $sgpr0
+ ; VI-NEXT: {{ $}}
+ ; VI-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+ ; VI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32767
+ ; VI-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc
+ ; VI-NEXT: $sgpr0 = COPY [[S_AND_B32_]]
+ ; GFX9-LABEL: name: fabs_s16_ss
+ ; GFX9: liveins: $sgpr0
+ ; GFX9-NEXT: {{ $}}
+ ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+ ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32767
+ ; GFX9-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc
+ ; GFX9-NEXT: $sgpr0 = COPY [[S_AND_B32_]]
+ ; GFX10-LABEL: name: fabs_s16_ss
+ ; GFX10: liveins: $sgpr0
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+ ; GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32767
+ ; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc
+ ; GFX10-NEXT: $sgpr0 = COPY [[S_AND_B32_]]
%0:sgpr(s32) = COPY $sgpr0
%1:sgpr(s16) = G_TRUNC %0
%2:sgpr(s16) = G_FABS %1
; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32767
; GCN: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 [[S_MOV_B32_]], [[COPY]], implicit $exec
; GCN: $vgpr0 = COPY [[V_AND_B32_e32_]]
+ ; SI-LABEL: name: fabs_s16_vv
+ ; SI: liveins: $vgpr0
+ ; SI-NEXT: {{ $}}
+ ; SI-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; SI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32767
+ ; SI-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec
+ ; SI-NEXT: $vgpr0 = COPY [[V_AND_B32_e64_]]
+ ; VI-LABEL: name: fabs_s16_vv
+ ; VI: liveins: $vgpr0
+ ; VI-NEXT: {{ $}}
+ ; VI-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; VI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32767
+ ; VI-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec
+ ; VI-NEXT: $vgpr0 = COPY [[V_AND_B32_e64_]]
+ ; GFX9-LABEL: name: fabs_s16_vv
+ ; GFX9: liveins: $vgpr0
+ ; GFX9-NEXT: {{ $}}
+ ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32767
+ ; GFX9-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec
+ ; GFX9-NEXT: $vgpr0 = COPY [[V_AND_B32_e64_]]
+ ; GFX10-LABEL: name: fabs_s16_vv
+ ; GFX10: liveins: $vgpr0
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32767
+ ; GFX10-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec
+ ; GFX10-NEXT: $vgpr0 = COPY [[V_AND_B32_e64_]]
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s16) = G_TRUNC %0
%2:vgpr(s16) = G_FABS %1
; GCN: [[FABS:%[0-9]+]]:vgpr_32(s16) = G_FABS [[TRUNC]]
; GCN: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY [[FABS]](s16)
; GCN: $vgpr0 = COPY [[COPY1]](s32)
+ ; SI-LABEL: name: fabs_s16_vs
+ ; SI: liveins: $sgpr0
+ ; SI-NEXT: {{ $}}
+ ; SI-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+ ; SI-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32)
+ ; SI-NEXT: [[FABS:%[0-9]+]]:vgpr_32(s16) = G_FABS [[TRUNC]]
+ ; SI-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY [[FABS]](s16)
+ ; SI-NEXT: $vgpr0 = COPY [[COPY1]](s32)
+ ; VI-LABEL: name: fabs_s16_vs
+ ; VI: liveins: $sgpr0
+ ; VI-NEXT: {{ $}}
+ ; VI-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+ ; VI-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32)
+ ; VI-NEXT: [[FABS:%[0-9]+]]:vgpr_32(s16) = G_FABS [[TRUNC]]
+ ; VI-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY [[FABS]](s16)
+ ; VI-NEXT: $vgpr0 = COPY [[COPY1]](s32)
+ ; GFX9-LABEL: name: fabs_s16_vs
+ ; GFX9: liveins: $sgpr0
+ ; GFX9-NEXT: {{ $}}
+ ; GFX9-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+ ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32)
+ ; GFX9-NEXT: [[FABS:%[0-9]+]]:vgpr_32(s16) = G_FABS [[TRUNC]]
+ ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY [[FABS]](s16)
+ ; GFX9-NEXT: $vgpr0 = COPY [[COPY1]](s32)
+ ; GFX10-LABEL: name: fabs_s16_vs
+ ; GFX10: liveins: $sgpr0
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+ ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32)
+ ; GFX10-NEXT: [[FABS:%[0-9]+]]:vgpr_32(s16) = G_FABS [[TRUNC]]
+ ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY [[FABS]](s16)
+ ; GFX10-NEXT: $vgpr0 = COPY [[COPY1]](s32)
%0:sgpr(s32) = COPY $sgpr0
%1:sgpr(s16) = G_TRUNC %0
%2:vgpr(s16) = G_FABS %1
; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147450879
; GCN: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 [[S_MOV_B32_]], [[COPY]], implicit $exec
; GCN: $vgpr0 = COPY [[V_AND_B32_e32_]]
+ ; SI-LABEL: name: fabs_v2s16_vv
+ ; SI: liveins: $vgpr0
+ ; SI-NEXT: {{ $}}
+ ; SI-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; SI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147450879
+ ; SI-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec
+ ; SI-NEXT: $vgpr0 = COPY [[V_AND_B32_e64_]]
+ ; VI-LABEL: name: fabs_v2s16_vv
+ ; VI: liveins: $vgpr0
+ ; VI-NEXT: {{ $}}
+ ; VI-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; VI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147450879
+ ; VI-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec
+ ; VI-NEXT: $vgpr0 = COPY [[V_AND_B32_e64_]]
+ ; GFX9-LABEL: name: fabs_v2s16_vv
+ ; GFX9: liveins: $vgpr0
+ ; GFX9-NEXT: {{ $}}
+ ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147450879
+ ; GFX9-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec
+ ; GFX9-NEXT: $vgpr0 = COPY [[V_AND_B32_e64_]]
+ ; GFX10-LABEL: name: fabs_v2s16_vv
+ ; GFX10: liveins: $vgpr0
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147450879
+ ; GFX10-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec
+ ; GFX10-NEXT: $vgpr0 = COPY [[V_AND_B32_e64_]]
%0:vgpr(<2 x s16>) = COPY $vgpr0
%1:vgpr(<2 x s16>) = G_FABS %0
$vgpr0 = COPY %1
; GCN: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0
; GCN: [[FABS:%[0-9]+]]:vgpr_32(<2 x s16>) = G_FABS [[COPY]]
; GCN: $vgpr0 = COPY [[FABS]](<2 x s16>)
+ ; SI-LABEL: name: fabs_v2s16_vs
+ ; SI: liveins: $sgpr0
+ ; SI-NEXT: {{ $}}
+ ; SI-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0
+ ; SI-NEXT: [[FABS:%[0-9]+]]:vgpr_32(<2 x s16>) = G_FABS [[COPY]]
+ ; SI-NEXT: $vgpr0 = COPY [[FABS]](<2 x s16>)
+ ; VI-LABEL: name: fabs_v2s16_vs
+ ; VI: liveins: $sgpr0
+ ; VI-NEXT: {{ $}}
+ ; VI-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0
+ ; VI-NEXT: [[FABS:%[0-9]+]]:vgpr_32(<2 x s16>) = G_FABS [[COPY]]
+ ; VI-NEXT: $vgpr0 = COPY [[FABS]](<2 x s16>)
+ ; GFX9-LABEL: name: fabs_v2s16_vs
+ ; GFX9: liveins: $sgpr0
+ ; GFX9-NEXT: {{ $}}
+ ; GFX9-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0
+ ; GFX9-NEXT: [[FABS:%[0-9]+]]:vgpr_32(<2 x s16>) = G_FABS [[COPY]]
+ ; GFX9-NEXT: $vgpr0 = COPY [[FABS]](<2 x s16>)
+ ; GFX10-LABEL: name: fabs_v2s16_vs
+ ; GFX10: liveins: $sgpr0
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0
+ ; GFX10-NEXT: [[FABS:%[0-9]+]]:vgpr_32(<2 x s16>) = G_FABS [[COPY]]
+ ; GFX10-NEXT: $vgpr0 = COPY [[FABS]](<2 x s16>)
%0:sgpr(<2 x s16>) = COPY $sgpr0
%1:vgpr(<2 x s16>) = G_FABS %0
$vgpr0 = COPY %1
; GCN: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY2]], [[S_MOV_B32_]], implicit-def $scc
; GCN: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[S_AND_B32_]], %subreg.sub1
; GCN: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
+ ; SI-LABEL: name: fabs_s64_ss
+ ; SI: liveins: $sgpr0_sgpr1
+ ; SI-NEXT: {{ $}}
+ ; SI-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
+ ; SI-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0
+ ; SI-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1
+ ; SI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483647
+ ; SI-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY2]], [[S_MOV_B32_]], implicit-def $scc
+ ; SI-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[S_AND_B32_]], %subreg.sub1
+ ; SI-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
+ ; VI-LABEL: name: fabs_s64_ss
+ ; VI: liveins: $sgpr0_sgpr1
+ ; VI-NEXT: {{ $}}
+ ; VI-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
+ ; VI-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0
+ ; VI-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1
+ ; VI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483647
+ ; VI-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY2]], [[S_MOV_B32_]], implicit-def $scc
+ ; VI-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[S_AND_B32_]], %subreg.sub1
+ ; VI-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
+ ; GFX9-LABEL: name: fabs_s64_ss
+ ; GFX9: liveins: $sgpr0_sgpr1
+ ; GFX9-NEXT: {{ $}}
+ ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
+ ; GFX9-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0
+ ; GFX9-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1
+ ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483647
+ ; GFX9-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY2]], [[S_MOV_B32_]], implicit-def $scc
+ ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[S_AND_B32_]], %subreg.sub1
+ ; GFX9-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
+ ; GFX10-LABEL: name: fabs_s64_ss
+ ; GFX10: liveins: $sgpr0_sgpr1
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
+ ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0
+ ; GFX10-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1
+ ; GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483647
+ ; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY2]], [[S_MOV_B32_]], implicit-def $scc
+ ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[S_AND_B32_]], %subreg.sub1
+ ; GFX10-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
%0:sgpr(s64) = COPY $sgpr0_sgpr1
%1:sgpr(s64) = G_FABS %0
S_ENDPGM 0, implicit %1
; GCN-LABEL: name: fabs_s64_vv
; GCN: liveins: $vgpr0_vgpr1
; GCN: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2147483647, implicit $exec
; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
- ; GCN: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY1]], [[V_MOV_B32_e32_]], implicit $exec
+ ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483647
+ ; GCN: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 [[S_MOV_B32_]], [[COPY1]], implicit $exec
; GCN: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
; GCN: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[V_AND_B32_e64_]], %subreg.sub1
; GCN: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
+ ; SI-LABEL: name: fabs_s64_vv
+ ; SI: liveins: $vgpr0_vgpr1
+ ; SI-NEXT: {{ $}}
+ ; SI-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; SI-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+ ; SI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483647
+ ; SI-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[S_MOV_B32_]], [[COPY1]], implicit $exec
+ ; SI-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+ ; SI-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[V_AND_B32_e64_]], %subreg.sub1
+ ; SI-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
+ ; VI-LABEL: name: fabs_s64_vv
+ ; VI: liveins: $vgpr0_vgpr1
+ ; VI-NEXT: {{ $}}
+ ; VI-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; VI-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+ ; VI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483647
+ ; VI-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[S_MOV_B32_]], [[COPY1]], implicit $exec
+ ; VI-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+ ; VI-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[V_AND_B32_e64_]], %subreg.sub1
+ ; VI-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
+ ; GFX9-LABEL: name: fabs_s64_vv
+ ; GFX9: liveins: $vgpr0_vgpr1
+ ; GFX9-NEXT: {{ $}}
+ ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+ ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483647
+ ; GFX9-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[S_MOV_B32_]], [[COPY1]], implicit $exec
+ ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+ ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[V_AND_B32_e64_]], %subreg.sub1
+ ; GFX9-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
+ ; GFX10-LABEL: name: fabs_s64_vv
+ ; GFX10: liveins: $vgpr0_vgpr1
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+ ; GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483647
+ ; GFX10-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[S_MOV_B32_]], [[COPY1]], implicit $exec
+ ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+ ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[V_AND_B32_e64_]], %subreg.sub1
+ ; GFX10-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
%0:vgpr(s64) = COPY $vgpr0_vgpr1
%1:vgpr(s64) = G_FABS %0
S_ENDPGM 0, implicit %1
; GCN: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1
; GCN: [[FABS:%[0-9]+]]:vgpr(s64) = G_FABS [[COPY]]
; GCN: S_ENDPGM 0, implicit [[FABS]](s64)
+ ; SI-LABEL: name: fabs_s64_vs
+ ; SI: liveins: $sgpr0_sgpr1
+ ; SI-NEXT: {{ $}}
+ ; SI-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1
+ ; SI-NEXT: [[FABS:%[0-9]+]]:vgpr(s64) = G_FABS [[COPY]]
+ ; SI-NEXT: S_ENDPGM 0, implicit [[FABS]](s64)
+ ; VI-LABEL: name: fabs_s64_vs
+ ; VI: liveins: $sgpr0_sgpr1
+ ; VI-NEXT: {{ $}}
+ ; VI-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1
+ ; VI-NEXT: [[FABS:%[0-9]+]]:vgpr(s64) = G_FABS [[COPY]]
+ ; VI-NEXT: S_ENDPGM 0, implicit [[FABS]](s64)
+ ; GFX9-LABEL: name: fabs_s64_vs
+ ; GFX9: liveins: $sgpr0_sgpr1
+ ; GFX9-NEXT: {{ $}}
+ ; GFX9-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1
+ ; GFX9-NEXT: [[FABS:%[0-9]+]]:vgpr(s64) = G_FABS [[COPY]]
+ ; GFX9-NEXT: S_ENDPGM 0, implicit [[FABS]](s64)
+ ; GFX10-LABEL: name: fabs_s64_vs
+ ; GFX10: liveins: $sgpr0_sgpr1
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1
+ ; GFX10-NEXT: [[FABS:%[0-9]+]]:vgpr(s64) = G_FABS [[COPY]]
+ ; GFX10-NEXT: S_ENDPGM 0, implicit [[FABS]](s64)
%0:sgpr(s64) = COPY $sgpr0_sgpr1
%1:vgpr(s64) = G_FABS %0
S_ENDPGM 0, implicit %1
; GCN-LABEL: name: fabs_s64_vv_no_src_constraint
; GCN: liveins: $vgpr0_vgpr1
; GCN: [[DEF:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
- ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2147483647, implicit $exec
; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[DEF]].sub1
- ; GCN: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY]], [[V_MOV_B32_e32_]], implicit $exec
+ ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483647
+ ; GCN: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 [[S_MOV_B32_]], [[COPY]], implicit $exec
; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[DEF]].sub0
; GCN: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[V_AND_B32_e64_]], %subreg.sub1
; GCN: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
+ ; SI-LABEL: name: fabs_s64_vv_no_src_constraint
+ ; SI: liveins: $vgpr0_vgpr1
+ ; SI-NEXT: {{ $}}
+ ; SI-NEXT: [[DEF:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
+ ; SI-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[DEF]].sub1
+ ; SI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483647
+ ; SI-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec
+ ; SI-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[DEF]].sub0
+ ; SI-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[V_AND_B32_e64_]], %subreg.sub1
+ ; SI-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
+ ; VI-LABEL: name: fabs_s64_vv_no_src_constraint
+ ; VI: liveins: $vgpr0_vgpr1
+ ; VI-NEXT: {{ $}}
+ ; VI-NEXT: [[DEF:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
+ ; VI-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[DEF]].sub1
+ ; VI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483647
+ ; VI-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec
+ ; VI-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[DEF]].sub0
+ ; VI-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[V_AND_B32_e64_]], %subreg.sub1
+ ; VI-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
+ ; GFX9-LABEL: name: fabs_s64_vv_no_src_constraint
+ ; GFX9: liveins: $vgpr0_vgpr1
+ ; GFX9-NEXT: {{ $}}
+ ; GFX9-NEXT: [[DEF:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
+ ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[DEF]].sub1
+ ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483647
+ ; GFX9-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec
+ ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[DEF]].sub0
+ ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[V_AND_B32_e64_]], %subreg.sub1
+ ; GFX9-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
+ ; GFX10-LABEL: name: fabs_s64_vv_no_src_constraint
+ ; GFX10: liveins: $vgpr0_vgpr1
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: [[DEF:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
+ ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[DEF]].sub1
+ ; GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483647
+ ; GFX10-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec
+ ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[DEF]].sub0
+ ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[V_AND_B32_e64_]], %subreg.sub1
+ ; GFX10-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
%0:vgpr(s64) = IMPLICIT_DEF
%1:vgpr(s64) = G_FABS %0:vgpr(s64)
S_ENDPGM 0, implicit %1
; GCN: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc
; GCN: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[S_AND_B32_]], %subreg.sub1
; GCN: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
+ ; SI-LABEL: name: fabs_s64_ss_no_src_constraint
+ ; SI: liveins: $sgpr0_sgpr1
+ ; SI-NEXT: {{ $}}
+ ; SI-NEXT: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
+ ; SI-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY [[DEF]].sub0
+ ; SI-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[DEF]].sub1
+ ; SI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483647
+ ; SI-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc
+ ; SI-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[S_AND_B32_]], %subreg.sub1
+ ; SI-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
+ ; VI-LABEL: name: fabs_s64_ss_no_src_constraint
+ ; VI: liveins: $sgpr0_sgpr1
+ ; VI-NEXT: {{ $}}
+ ; VI-NEXT: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
+ ; VI-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY [[DEF]].sub0
+ ; VI-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[DEF]].sub1
+ ; VI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483647
+ ; VI-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc
+ ; VI-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[S_AND_B32_]], %subreg.sub1
+ ; VI-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
+ ; GFX9-LABEL: name: fabs_s64_ss_no_src_constraint
+ ; GFX9: liveins: $sgpr0_sgpr1
+ ; GFX9-NEXT: {{ $}}
+ ; GFX9-NEXT: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
+ ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY [[DEF]].sub0
+ ; GFX9-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[DEF]].sub1
+ ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483647
+ ; GFX9-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc
+ ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[S_AND_B32_]], %subreg.sub1
+ ; GFX9-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
+ ; GFX10-LABEL: name: fabs_s64_ss_no_src_constraint
+ ; GFX10: liveins: $sgpr0_sgpr1
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
+ ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY [[DEF]].sub0
+ ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[DEF]].sub1
+ ; GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483647
+ ; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc
+ ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[S_AND_B32_]], %subreg.sub1
+ ; GFX10-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
%0:sgpr(s64) = IMPLICIT_DEF
%1:sgpr(s64) = G_FABS %0:sgpr(s64)
S_ENDPGM 0, implicit %1
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -disable-gisel-legality-check -o - %s | FileCheck -check-prefix=GFX8 %s
# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX9 %s
-# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX9 %s
+# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX10 %s
---
liveins: $vgpr0
; GFX8-LABEL: name: fcanonicalize_f16_denorm
; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX8: %2:vgpr_32 = nofpexcept V_MAX_F16_e64 0, [[COPY]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
- ; GFX8: S_ENDPGM 0, implicit %2
+ ; GFX8-NEXT: %2:vgpr_32 = nofpexcept V_MAX_F16_e64 0, [[COPY]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
+ ; GFX8-NEXT: S_ENDPGM 0, implicit %2
; GFX9-LABEL: name: fcanonicalize_f16_denorm
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX9: %2:vgpr_32 = nofpexcept V_MAX_F16_e64 0, [[COPY]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
- ; GFX9: S_ENDPGM 0, implicit %2
+ ; GFX9-NEXT: %2:vgpr_32 = nofpexcept V_MAX_F16_e64 0, [[COPY]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
+ ; GFX9-NEXT: S_ENDPGM 0, implicit %2
+ ; GFX10-LABEL: name: fcanonicalize_f16_denorm
+ ; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX10-NEXT: %2:vgpr_32 = nofpexcept V_MAX_F16_e64 0, [[COPY]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
+ ; GFX10-NEXT: S_ENDPGM 0, implicit %2
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s16) = G_TRUNC %0
%2:vgpr(s16) = G_FCANONICALIZE %1
liveins: $vgpr0
; GFX8-LABEL: name: fcanonicalize_f16_flush
; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX8: %2:vgpr_32 = nofpexcept V_MUL_F16_e64 0, 15360, 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
- ; GFX8: S_ENDPGM 0, implicit %2
+ ; GFX8-NEXT: %2:vgpr_32 = nofpexcept V_MUL_F16_e64 0, 15360, 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
+ ; GFX8-NEXT: S_ENDPGM 0, implicit %2
; GFX9-LABEL: name: fcanonicalize_f16_flush
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX9: %2:vgpr_32 = nofpexcept V_MAX_F16_e64 0, [[COPY]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
- ; GFX9: S_ENDPGM 0, implicit %2
+ ; GFX9-NEXT: %2:vgpr_32 = nofpexcept V_MAX_F16_e64 0, [[COPY]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
+ ; GFX9-NEXT: S_ENDPGM 0, implicit %2
+ ; GFX10-LABEL: name: fcanonicalize_f16_flush
+ ; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX10-NEXT: %2:vgpr_32 = nofpexcept V_MAX_F16_e64 0, [[COPY]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
+ ; GFX10-NEXT: S_ENDPGM 0, implicit %2
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s16) = G_TRUNC %0
%2:vgpr(s16) = G_FCANONICALIZE %1
; GFX8-LABEL: name: fcanonicalize_f32_denorm
; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX8: %1:vgpr_32 = nofpexcept V_MUL_F32_e64 0, 1065353216, 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
- ; GFX8: S_ENDPGM 0, implicit %1
+ ; GFX8-NEXT: %1:vgpr_32 = nofpexcept V_MUL_F32_e64 0, 1065353216, 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
+ ; GFX8-NEXT: S_ENDPGM 0, implicit %1
; GFX9-LABEL: name: fcanonicalize_f32_denorm
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX9: %1:vgpr_32 = nofpexcept V_MAX_F32_e64 0, [[COPY]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
- ; GFX9: S_ENDPGM 0, implicit %1
+ ; GFX9-NEXT: %1:vgpr_32 = nofpexcept V_MAX_F32_e64 0, [[COPY]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
+ ; GFX9-NEXT: S_ENDPGM 0, implicit %1
+ ; GFX10-LABEL: name: fcanonicalize_f32_denorm
+ ; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX10-NEXT: %1:vgpr_32 = nofpexcept V_MAX_F32_e64 0, [[COPY]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
+ ; GFX10-NEXT: S_ENDPGM 0, implicit %1
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = G_FCANONICALIZE %0
S_ENDPGM 0, implicit %1
; GFX8-LABEL: name: fcanonicalize_f32_flush
; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX8: %1:vgpr_32 = nofpexcept V_MUL_F32_e64 0, 1065353216, 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
- ; GFX8: S_ENDPGM 0, implicit %1
+ ; GFX8-NEXT: %1:vgpr_32 = nofpexcept V_MUL_F32_e64 0, 1065353216, 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
+ ; GFX8-NEXT: S_ENDPGM 0, implicit %1
; GFX9-LABEL: name: fcanonicalize_f32_flush
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX9: %1:vgpr_32 = nofpexcept V_MAX_F32_e64 0, [[COPY]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
- ; GFX9: S_ENDPGM 0, implicit %1
+ ; GFX9-NEXT: %1:vgpr_32 = nofpexcept V_MAX_F32_e64 0, [[COPY]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
+ ; GFX9-NEXT: S_ENDPGM 0, implicit %1
+ ; GFX10-LABEL: name: fcanonicalize_f32_flush
+ ; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX10-NEXT: %1:vgpr_32 = nofpexcept V_MAX_F32_e64 0, [[COPY]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
+ ; GFX10-NEXT: S_ENDPGM 0, implicit %1
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = G_FCANONICALIZE %0
S_ENDPGM 0, implicit %1
; GFX8-LABEL: name: fcanonicalize_v2f16_denorm
; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX8: %1:vgpr_32 = nofpexcept V_PK_MAX_F16 8, [[COPY]], 8, [[COPY]], 0, 0, 0, 0, 0, implicit $mode, implicit $exec
- ; GFX8: S_ENDPGM 0, implicit %1
+ ; GFX8-NEXT: %1:vgpr_32 = nofpexcept V_PK_MAX_F16 8, [[COPY]], 8, [[COPY]], 0, 0, 0, 0, 0, implicit $mode, implicit $exec
+ ; GFX8-NEXT: S_ENDPGM 0, implicit %1
; GFX9-LABEL: name: fcanonicalize_v2f16_denorm
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX9: %1:vgpr_32 = nofpexcept V_PK_MAX_F16 8, [[COPY]], 8, [[COPY]], 0, 0, 0, 0, 0, implicit $mode, implicit $exec
- ; GFX9: S_ENDPGM 0, implicit %1
+ ; GFX9-NEXT: %1:vgpr_32 = nofpexcept V_PK_MAX_F16 8, [[COPY]], 8, [[COPY]], 0, 0, 0, 0, 0, implicit $mode, implicit $exec
+ ; GFX9-NEXT: S_ENDPGM 0, implicit %1
+ ; GFX10-LABEL: name: fcanonicalize_v2f16_denorm
+ ; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX10-NEXT: %1:vgpr_32 = nofpexcept V_PK_MAX_F16 8, [[COPY]], 8, [[COPY]], 0, 0, 0, 0, 0, implicit $mode, implicit $exec
+ ; GFX10-NEXT: S_ENDPGM 0, implicit %1
%0:vgpr(<2 x s16>) = COPY $vgpr0
%1:vgpr(<2 x s16>) = G_FCANONICALIZE %0
S_ENDPGM 0, implicit %1
; GFX8-LABEL: name: fcanonicalize_v2f16_flush
; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX8: %1:vgpr_32 = nofpexcept V_PK_MUL_F16 0, 15360, 8, [[COPY]], 0, 0, 0, 0, 0, implicit $mode, implicit $exec
- ; GFX8: S_ENDPGM 0, implicit %1
+ ; GFX8-NEXT: %1:vgpr_32 = nofpexcept V_PK_MUL_F16 0, 15360, 8, [[COPY]], 0, 0, 0, 0, 0, implicit $mode, implicit $exec
+ ; GFX8-NEXT: S_ENDPGM 0, implicit %1
; GFX9-LABEL: name: fcanonicalize_v2f16_flush
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX9: %1:vgpr_32 = nofpexcept V_PK_MAX_F16 8, [[COPY]], 8, [[COPY]], 0, 0, 0, 0, 0, implicit $mode, implicit $exec
- ; GFX9: S_ENDPGM 0, implicit %1
+ ; GFX9-NEXT: %1:vgpr_32 = nofpexcept V_PK_MAX_F16 8, [[COPY]], 8, [[COPY]], 0, 0, 0, 0, 0, implicit $mode, implicit $exec
+ ; GFX9-NEXT: S_ENDPGM 0, implicit %1
+ ; GFX10-LABEL: name: fcanonicalize_v2f16_flush
+ ; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX10-NEXT: %1:vgpr_32 = nofpexcept V_PK_MAX_F16 8, [[COPY]], 8, [[COPY]], 0, 0, 0, 0, 0, implicit $mode, implicit $exec
+ ; GFX10-NEXT: S_ENDPGM 0, implicit %1
%0:vgpr(<2 x s16>) = COPY $vgpr0
%1:vgpr(<2 x s16>) = G_FCANONICALIZE %0
S_ENDPGM 0, implicit %1
; GFX8-LABEL: name: fcanonicalize_f64_denorm
; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX8: %1:vreg_64 = nofpexcept V_MAX_F64_e64 0, [[COPY]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
- ; GFX8: S_ENDPGM 0, implicit %1
+ ; GFX8-NEXT: %1:vreg_64 = nofpexcept V_MAX_F64_e64 0, [[COPY]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
+ ; GFX8-NEXT: S_ENDPGM 0, implicit %1
; GFX9-LABEL: name: fcanonicalize_f64_denorm
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX9: %1:vreg_64 = nofpexcept V_MAX_F64_e64 0, [[COPY]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
- ; GFX9: S_ENDPGM 0, implicit %1
+ ; GFX9-NEXT: %1:vreg_64 = nofpexcept V_MAX_F64_e64 0, [[COPY]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
+ ; GFX9-NEXT: S_ENDPGM 0, implicit %1
+ ; GFX10-LABEL: name: fcanonicalize_f64_denorm
+ ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX10-NEXT: %1:vreg_64 = nofpexcept V_MAX_F64_e64 0, [[COPY]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
+ ; GFX10-NEXT: S_ENDPGM 0, implicit %1
%0:vgpr(s64) = COPY $vgpr0_vgpr1
%1:vgpr(s64) = G_FCANONICALIZE %0
S_ENDPGM 0, implicit %1
; GFX8-LABEL: name: fcanonicalize_f64_flush
; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX8: %1:vreg_64 = nofpexcept V_MUL_F64_e64 0, 4607182418800017408, 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
- ; GFX8: S_ENDPGM 0, implicit %1
+ ; GFX8-NEXT: %1:vreg_64 = nofpexcept V_MUL_F64_e64 0, 4607182418800017408, 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
+ ; GFX8-NEXT: S_ENDPGM 0, implicit %1
; GFX9-LABEL: name: fcanonicalize_f64_flush
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX9: %1:vreg_64 = nofpexcept V_MAX_F64_e64 0, [[COPY]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
- ; GFX9: S_ENDPGM 0, implicit %1
+ ; GFX9-NEXT: %1:vreg_64 = nofpexcept V_MAX_F64_e64 0, [[COPY]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
+ ; GFX9-NEXT: S_ENDPGM 0, implicit %1
+ ; GFX10-LABEL: name: fcanonicalize_f64_flush
+ ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX10-NEXT: %1:vreg_64 = nofpexcept V_MAX_F64_e64 0, [[COPY]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
+ ; GFX10-NEXT: S_ENDPGM 0, implicit %1
%0:vgpr(s64) = COPY $vgpr0_vgpr1
%1:vgpr(s64) = G_FCANONICALIZE %0
S_ENDPGM 0, implicit %1
liveins: $vgpr0
; GFX8-LABEL: name: fcanonicalize_fabs_f32_denorm
; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX8: %2:vgpr_32 = nofpexcept V_MUL_F32_e64 0, 1065353216, 2, [[COPY]], 0, 0, implicit $mode, implicit $exec
- ; GFX8: S_ENDPGM 0, implicit %2
+ ; GFX8-NEXT: %2:vgpr_32 = nofpexcept V_MUL_F32_e64 0, 1065353216, 2, [[COPY]], 0, 0, implicit $mode, implicit $exec
+ ; GFX8-NEXT: S_ENDPGM 0, implicit %2
; GFX9-LABEL: name: fcanonicalize_fabs_f32_denorm
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX9: %2:vgpr_32 = nofpexcept V_MAX_F32_e64 2, [[COPY]], 2, [[COPY]], 0, 0, implicit $mode, implicit $exec
- ; GFX9: S_ENDPGM 0, implicit %2
+ ; GFX9-NEXT: %2:vgpr_32 = nofpexcept V_MAX_F32_e64 2, [[COPY]], 2, [[COPY]], 0, 0, implicit $mode, implicit $exec
+ ; GFX9-NEXT: S_ENDPGM 0, implicit %2
+ ; GFX10-LABEL: name: fcanonicalize_fabs_f32_denorm
+ ; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX10-NEXT: %2:vgpr_32 = nofpexcept V_MAX_F32_e64 2, [[COPY]], 2, [[COPY]], 0, 0, implicit $mode, implicit $exec
+ ; GFX10-NEXT: S_ENDPGM 0, implicit %2
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = G_FABS %0
%2:vgpr(s32) = G_FCANONICALIZE %1
liveins: $vgpr0
; GFX8-LABEL: name: fcanonicalize_fabs_f32_flush
; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX8: %2:vgpr_32 = nofpexcept V_MUL_F32_e64 0, 1065353216, 2, [[COPY]], 0, 0, implicit $mode, implicit $exec
- ; GFX8: S_ENDPGM 0, implicit %2
+ ; GFX8-NEXT: %2:vgpr_32 = nofpexcept V_MUL_F32_e64 0, 1065353216, 2, [[COPY]], 0, 0, implicit $mode, implicit $exec
+ ; GFX8-NEXT: S_ENDPGM 0, implicit %2
; GFX9-LABEL: name: fcanonicalize_fabs_f32_flush
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX9: %2:vgpr_32 = nofpexcept V_MAX_F32_e64 2, [[COPY]], 2, [[COPY]], 0, 0, implicit $mode, implicit $exec
- ; GFX9: S_ENDPGM 0, implicit %2
+ ; GFX9-NEXT: %2:vgpr_32 = nofpexcept V_MAX_F32_e64 2, [[COPY]], 2, [[COPY]], 0, 0, implicit $mode, implicit $exec
+ ; GFX9-NEXT: S_ENDPGM 0, implicit %2
+ ; GFX10-LABEL: name: fcanonicalize_fabs_f32_flush
+ ; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX10-NEXT: %2:vgpr_32 = nofpexcept V_MAX_F32_e64 2, [[COPY]], 2, [[COPY]], 0, 0, implicit $mode, implicit $exec
+ ; GFX10-NEXT: S_ENDPGM 0, implicit %2
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = G_FABS %0
%2:vgpr(s32) = G_FCANONICALIZE %1
liveins: $vgpr0
; GFX8-LABEL: name: fcanonicalize_fneg_f32_denorm
; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX8: %2:vgpr_32 = nofpexcept V_MUL_F32_e64 0, 3212836864, 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
- ; GFX8: S_ENDPGM 0, implicit %2
+ ; GFX8-NEXT: %2:vgpr_32 = nofpexcept V_MUL_F32_e64 0, 3212836864, 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
+ ; GFX8-NEXT: S_ENDPGM 0, implicit %2
; GFX9-LABEL: name: fcanonicalize_fneg_f32_denorm
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX9: %2:vgpr_32 = nofpexcept V_MAX_F32_e64 1, [[COPY]], 1, [[COPY]], 0, 0, implicit $mode, implicit $exec
- ; GFX9: S_ENDPGM 0, implicit %2
+ ; GFX9-NEXT: %2:vgpr_32 = nofpexcept V_MAX_F32_e64 1, [[COPY]], 1, [[COPY]], 0, 0, implicit $mode, implicit $exec
+ ; GFX9-NEXT: S_ENDPGM 0, implicit %2
+ ; GFX10-LABEL: name: fcanonicalize_fneg_f32_denorm
+ ; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX10-NEXT: %2:vgpr_32 = nofpexcept V_MAX_F32_e64 1, [[COPY]], 1, [[COPY]], 0, 0, implicit $mode, implicit $exec
+ ; GFX10-NEXT: S_ENDPGM 0, implicit %2
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = G_FNEG %0
%2:vgpr(s32) = G_FCANONICALIZE %1
liveins: $vgpr0
; GFX8-LABEL: name: fcanonicalize_fneg_f32_flush
; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX8: %2:vgpr_32 = nofpexcept V_MUL_F32_e64 0, 3212836864, 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
- ; GFX8: S_ENDPGM 0, implicit %2
+ ; GFX8-NEXT: %2:vgpr_32 = nofpexcept V_MUL_F32_e64 0, 3212836864, 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
+ ; GFX8-NEXT: S_ENDPGM 0, implicit %2
; GFX9-LABEL: name: fcanonicalize_fneg_f32_flush
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX9: %2:vgpr_32 = nofpexcept V_MAX_F32_e64 1, [[COPY]], 1, [[COPY]], 0, 0, implicit $mode, implicit $exec
- ; GFX9: S_ENDPGM 0, implicit %2
+ ; GFX9-NEXT: %2:vgpr_32 = nofpexcept V_MAX_F32_e64 1, [[COPY]], 1, [[COPY]], 0, 0, implicit $mode, implicit $exec
+ ; GFX9-NEXT: S_ENDPGM 0, implicit %2
+ ; GFX10-LABEL: name: fcanonicalize_fneg_f32_flush
+ ; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX10-NEXT: %2:vgpr_32 = nofpexcept V_MAX_F32_e64 1, [[COPY]], 1, [[COPY]], 0, 0, implicit $mode, implicit $exec
+ ; GFX10-NEXT: S_ENDPGM 0, implicit %2
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = G_FNEG %0
%2:vgpr(s32) = G_FCANONICALIZE %1
liveins: $vgpr0
; GFX8-LABEL: name: fcanonicalize_fneg_fabs_f32_denorm
; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648
- ; GFX8: [[V_XOR_B32_e32_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e32 [[S_MOV_B32_]], [[COPY]], implicit $exec
- ; GFX8: %3:vgpr_32 = nofpexcept V_MUL_F32_e64 0, 1065353216, 2, [[V_XOR_B32_e32_]], 0, 0, implicit $mode, implicit $exec
- ; GFX8: S_ENDPGM 0, implicit %3
+ ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648
+ ; GFX8-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec
+ ; GFX8-NEXT: %3:vgpr_32 = nofpexcept V_MUL_F32_e64 0, 1065353216, 2, [[V_XOR_B32_e64_]], 0, 0, implicit $mode, implicit $exec
+ ; GFX8-NEXT: S_ENDPGM 0, implicit %3
; GFX9-LABEL: name: fcanonicalize_fneg_fabs_f32_denorm
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX9: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648
- ; GFX9: [[V_XOR_B32_e32_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e32 [[S_MOV_B32_]], [[COPY]], implicit $exec
- ; GFX9: %3:vgpr_32 = nofpexcept V_MAX_F32_e64 2, [[V_XOR_B32_e32_]], 2, [[V_XOR_B32_e32_]], 0, 0, implicit $mode, implicit $exec
- ; GFX9: S_ENDPGM 0, implicit %3
+ ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648
+ ; GFX9-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec
+ ; GFX9-NEXT: %3:vgpr_32 = nofpexcept V_MAX_F32_e64 2, [[V_XOR_B32_e64_]], 2, [[V_XOR_B32_e64_]], 0, 0, implicit $mode, implicit $exec
+ ; GFX9-NEXT: S_ENDPGM 0, implicit %3
+ ; GFX10-LABEL: name: fcanonicalize_fneg_fabs_f32_denorm
+ ; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648
+ ; GFX10-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec
+ ; GFX10-NEXT: %3:vgpr_32 = nofpexcept V_MAX_F32_e64 2, [[V_XOR_B32_e64_]], 2, [[V_XOR_B32_e64_]], 0, 0, implicit $mode, implicit $exec
+ ; GFX10-NEXT: S_ENDPGM 0, implicit %3
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = G_FNEG %0
%2:vgpr(s32) = G_FABS %1
liveins: $vgpr0
; GFX8-LABEL: name: fcanonicalize_fneg_fabs_f32_flush
; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648
- ; GFX8: [[V_XOR_B32_e32_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e32 [[S_MOV_B32_]], [[COPY]], implicit $exec
- ; GFX8: %3:vgpr_32 = nofpexcept V_MUL_F32_e64 0, 1065353216, 2, [[V_XOR_B32_e32_]], 0, 0, implicit $mode, implicit $exec
- ; GFX8: S_ENDPGM 0, implicit %3
+ ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648
+ ; GFX8-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec
+ ; GFX8-NEXT: %3:vgpr_32 = nofpexcept V_MUL_F32_e64 0, 1065353216, 2, [[V_XOR_B32_e64_]], 0, 0, implicit $mode, implicit $exec
+ ; GFX8-NEXT: S_ENDPGM 0, implicit %3
; GFX9-LABEL: name: fcanonicalize_fneg_fabs_f32_flush
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX9: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648
- ; GFX9: [[V_XOR_B32_e32_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e32 [[S_MOV_B32_]], [[COPY]], implicit $exec
- ; GFX9: %3:vgpr_32 = nofpexcept V_MAX_F32_e64 2, [[V_XOR_B32_e32_]], 2, [[V_XOR_B32_e32_]], 0, 0, implicit $mode, implicit $exec
- ; GFX9: S_ENDPGM 0, implicit %3
+ ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648
+ ; GFX9-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec
+ ; GFX9-NEXT: %3:vgpr_32 = nofpexcept V_MAX_F32_e64 2, [[V_XOR_B32_e64_]], 2, [[V_XOR_B32_e64_]], 0, 0, implicit $mode, implicit $exec
+ ; GFX9-NEXT: S_ENDPGM 0, implicit %3
+ ; GFX10-LABEL: name: fcanonicalize_fneg_fabs_f32_flush
+ ; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648
+ ; GFX10-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec
+ ; GFX10-NEXT: %3:vgpr_32 = nofpexcept V_MAX_F32_e64 2, [[V_XOR_B32_e64_]], 2, [[V_XOR_B32_e64_]], 0, 0, implicit $mode, implicit $exec
+ ; GFX10-NEXT: S_ENDPGM 0, implicit %3
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = G_FNEG %0
%2:vgpr(s32) = G_FABS %1
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -march=amdgcn -mcpu=tahiti -run-pass=instruction-select -global-isel-abort=0 -verify-machineinstrs -o - %s | FileCheck -check-prefix=GCN %s
-# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -global-isel-abort=0 -verify-machineinstrs -o - %s | FileCheck -check-prefix=GCN %s
-# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -global-isel-abort=0 -verify-machineinstrs -o - %s | FileCheck -check-prefix=GCN %s
-# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -global-isel-abort=0 -verify-machineinstrs -o - %s | FileCheck -check-prefix=GCN %s
+# RUN: llc -march=amdgcn -mcpu=tahiti -run-pass=instruction-select -global-isel-abort=0 -verify-machineinstrs -o - %s | FileCheck -check-prefix=SI %s
+# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -global-isel-abort=0 -verify-machineinstrs -o - %s | FileCheck -check-prefix=VI %s
+# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -global-isel-abort=0 -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX9 %s
+# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -global-isel-abort=0 -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX10 %s
---
name: fneg_s32_ss
; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648
; GCN: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc
; GCN: $sgpr0 = COPY [[S_XOR_B32_]]
+ ; SI-LABEL: name: fneg_s32_ss
+ ; SI: liveins: $sgpr0
+ ; SI-NEXT: {{ $}}
+ ; SI-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+ ; SI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648
+ ; SI-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc
+ ; SI-NEXT: $sgpr0 = COPY [[S_XOR_B32_]]
+ ; VI-LABEL: name: fneg_s32_ss
+ ; VI: liveins: $sgpr0
+ ; VI-NEXT: {{ $}}
+ ; VI-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+ ; VI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648
+ ; VI-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc
+ ; VI-NEXT: $sgpr0 = COPY [[S_XOR_B32_]]
+ ; GFX9-LABEL: name: fneg_s32_ss
+ ; GFX9: liveins: $sgpr0
+ ; GFX9-NEXT: {{ $}}
+ ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+ ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648
+ ; GFX9-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc
+ ; GFX9-NEXT: $sgpr0 = COPY [[S_XOR_B32_]]
+ ; GFX10-LABEL: name: fneg_s32_ss
+ ; GFX10: liveins: $sgpr0
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+ ; GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648
+ ; GFX10-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc
+ ; GFX10-NEXT: $sgpr0 = COPY [[S_XOR_B32_]]
%0:sgpr(s32) = COPY $sgpr0
%1:sgpr(s32) = G_FNEG %0
$sgpr0 = COPY %1
; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648
; GCN: [[V_XOR_B32_e32_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e32 [[S_MOV_B32_]], [[COPY]], implicit $exec
; GCN: $vgpr0 = COPY [[V_XOR_B32_e32_]]
+ ; SI-LABEL: name: fneg_s32_vv
+ ; SI: liveins: $vgpr0
+ ; SI-NEXT: {{ $}}
+ ; SI-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; SI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648
+ ; SI-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec
+ ; SI-NEXT: $vgpr0 = COPY [[V_XOR_B32_e64_]]
+ ; VI-LABEL: name: fneg_s32_vv
+ ; VI: liveins: $vgpr0
+ ; VI-NEXT: {{ $}}
+ ; VI-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; VI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648
+ ; VI-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec
+ ; VI-NEXT: $vgpr0 = COPY [[V_XOR_B32_e64_]]
+ ; GFX9-LABEL: name: fneg_s32_vv
+ ; GFX9: liveins: $vgpr0
+ ; GFX9-NEXT: {{ $}}
+ ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648
+ ; GFX9-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec
+ ; GFX9-NEXT: $vgpr0 = COPY [[V_XOR_B32_e64_]]
+ ; GFX10-LABEL: name: fneg_s32_vv
+ ; GFX10: liveins: $vgpr0
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648
+ ; GFX10-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec
+ ; GFX10-NEXT: $vgpr0 = COPY [[V_XOR_B32_e64_]]
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = G_FNEG %0
$vgpr0 = COPY %1
; GCN: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
; GCN: [[FNEG:%[0-9]+]]:vgpr_32(s32) = G_FNEG [[COPY]]
; GCN: $vgpr0 = COPY [[FNEG]](s32)
+ ; SI-LABEL: name: fneg_s32_vs
+ ; SI: liveins: $sgpr0
+ ; SI-NEXT: {{ $}}
+ ; SI-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+ ; SI-NEXT: [[FNEG:%[0-9]+]]:vgpr_32(s32) = G_FNEG [[COPY]]
+ ; SI-NEXT: $vgpr0 = COPY [[FNEG]](s32)
+ ; VI-LABEL: name: fneg_s32_vs
+ ; VI: liveins: $sgpr0
+ ; VI-NEXT: {{ $}}
+ ; VI-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+ ; VI-NEXT: [[FNEG:%[0-9]+]]:vgpr_32(s32) = G_FNEG [[COPY]]
+ ; VI-NEXT: $vgpr0 = COPY [[FNEG]](s32)
+ ; GFX9-LABEL: name: fneg_s32_vs
+ ; GFX9: liveins: $sgpr0
+ ; GFX9-NEXT: {{ $}}
+ ; GFX9-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+ ; GFX9-NEXT: [[FNEG:%[0-9]+]]:vgpr_32(s32) = G_FNEG [[COPY]]
+ ; GFX9-NEXT: $vgpr0 = COPY [[FNEG]](s32)
+ ; GFX10-LABEL: name: fneg_s32_vs
+ ; GFX10: liveins: $sgpr0
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+ ; GFX10-NEXT: [[FNEG:%[0-9]+]]:vgpr_32(s32) = G_FNEG [[COPY]]
+ ; GFX10-NEXT: $vgpr0 = COPY [[FNEG]](s32)
%0:sgpr(s32) = COPY $sgpr0
%1:vgpr(s32) = G_FNEG %0
$vgpr0 = COPY %1
; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32768
; GCN: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc
; GCN: $sgpr0 = COPY [[S_XOR_B32_]]
+ ; SI-LABEL: name: fneg_s16_ss
+ ; SI: liveins: $sgpr0
+ ; SI-NEXT: {{ $}}
+ ; SI-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+ ; SI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32768
+ ; SI-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc
+ ; SI-NEXT: $sgpr0 = COPY [[S_XOR_B32_]]
+ ; VI-LABEL: name: fneg_s16_ss
+ ; VI: liveins: $sgpr0
+ ; VI-NEXT: {{ $}}
+ ; VI-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+ ; VI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32768
+ ; VI-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc
+ ; VI-NEXT: $sgpr0 = COPY [[S_XOR_B32_]]
+ ; GFX9-LABEL: name: fneg_s16_ss
+ ; GFX9: liveins: $sgpr0
+ ; GFX9-NEXT: {{ $}}
+ ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+ ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32768
+ ; GFX9-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc
+ ; GFX9-NEXT: $sgpr0 = COPY [[S_XOR_B32_]]
+ ; GFX10-LABEL: name: fneg_s16_ss
+ ; GFX10: liveins: $sgpr0
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+ ; GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32768
+ ; GFX10-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc
+ ; GFX10-NEXT: $sgpr0 = COPY [[S_XOR_B32_]]
%0:sgpr(s32) = COPY $sgpr0
%1:sgpr(s16) = G_TRUNC %0
%2:sgpr(s16) = G_FNEG %1
; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32768
; GCN: [[V_XOR_B32_e32_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e32 [[S_MOV_B32_]], [[COPY]], implicit $exec
; GCN: $vgpr0 = COPY [[V_XOR_B32_e32_]]
+ ; SI-LABEL: name: fneg_s16_vv
+ ; SI: liveins: $vgpr0
+ ; SI-NEXT: {{ $}}
+ ; SI-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; SI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32768
+ ; SI-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec
+ ; SI-NEXT: $vgpr0 = COPY [[V_XOR_B32_e64_]]
+ ; VI-LABEL: name: fneg_s16_vv
+ ; VI: liveins: $vgpr0
+ ; VI-NEXT: {{ $}}
+ ; VI-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; VI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32768
+ ; VI-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec
+ ; VI-NEXT: $vgpr0 = COPY [[V_XOR_B32_e64_]]
+ ; GFX9-LABEL: name: fneg_s16_vv
+ ; GFX9: liveins: $vgpr0
+ ; GFX9-NEXT: {{ $}}
+ ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32768
+ ; GFX9-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec
+ ; GFX9-NEXT: $vgpr0 = COPY [[V_XOR_B32_e64_]]
+ ; GFX10-LABEL: name: fneg_s16_vv
+ ; GFX10: liveins: $vgpr0
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32768
+ ; GFX10-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec
+ ; GFX10-NEXT: $vgpr0 = COPY [[V_XOR_B32_e64_]]
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s16) = G_TRUNC %0
%2:vgpr(s16) = G_FNEG %1
; GCN: [[FNEG:%[0-9]+]]:vgpr_32(s16) = G_FNEG [[TRUNC]]
; GCN: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY [[FNEG]](s16)
; GCN: $vgpr0 = COPY [[COPY1]](s32)
+ ; SI-LABEL: name: fneg_s16_vs
+ ; SI: liveins: $sgpr0
+ ; SI-NEXT: {{ $}}
+ ; SI-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+ ; SI-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32)
+ ; SI-NEXT: [[FNEG:%[0-9]+]]:vgpr_32(s16) = G_FNEG [[TRUNC]]
+ ; SI-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY [[FNEG]](s16)
+ ; SI-NEXT: $vgpr0 = COPY [[COPY1]](s32)
+ ; VI-LABEL: name: fneg_s16_vs
+ ; VI: liveins: $sgpr0
+ ; VI-NEXT: {{ $}}
+ ; VI-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+ ; VI-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32)
+ ; VI-NEXT: [[FNEG:%[0-9]+]]:vgpr_32(s16) = G_FNEG [[TRUNC]]
+ ; VI-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY [[FNEG]](s16)
+ ; VI-NEXT: $vgpr0 = COPY [[COPY1]](s32)
+ ; GFX9-LABEL: name: fneg_s16_vs
+ ; GFX9: liveins: $sgpr0
+ ; GFX9-NEXT: {{ $}}
+ ; GFX9-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+ ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32)
+ ; GFX9-NEXT: [[FNEG:%[0-9]+]]:vgpr_32(s16) = G_FNEG [[TRUNC]]
+ ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY [[FNEG]](s16)
+ ; GFX9-NEXT: $vgpr0 = COPY [[COPY1]](s32)
+ ; GFX10-LABEL: name: fneg_s16_vs
+ ; GFX10: liveins: $sgpr0
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+ ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32)
+ ; GFX10-NEXT: [[FNEG:%[0-9]+]]:vgpr_32(s16) = G_FNEG [[TRUNC]]
+ ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY [[FNEG]](s16)
+ ; GFX10-NEXT: $vgpr0 = COPY [[COPY1]](s32)
%0:sgpr(s32) = COPY $sgpr0
%1:sgpr(s16) = G_TRUNC %0
%2:vgpr(s16) = G_FNEG %1
; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147516416
; GCN: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc
; GCN: $sgpr0 = COPY [[S_XOR_B32_]]
+ ; SI-LABEL: name: fneg_v2s16_ss
+ ; SI: liveins: $sgpr0_sgpr1
+ ; SI-NEXT: {{ $}}
+ ; SI-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+ ; SI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147516416
+ ; SI-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc
+ ; SI-NEXT: $sgpr0 = COPY [[S_XOR_B32_]]
+ ; VI-LABEL: name: fneg_v2s16_ss
+ ; VI: liveins: $sgpr0_sgpr1
+ ; VI-NEXT: {{ $}}
+ ; VI-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+ ; VI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147516416
+ ; VI-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc
+ ; VI-NEXT: $sgpr0 = COPY [[S_XOR_B32_]]
+ ; GFX9-LABEL: name: fneg_v2s16_ss
+ ; GFX9: liveins: $sgpr0_sgpr1
+ ; GFX9-NEXT: {{ $}}
+ ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+ ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147516416
+ ; GFX9-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc
+ ; GFX9-NEXT: $sgpr0 = COPY [[S_XOR_B32_]]
+ ; GFX10-LABEL: name: fneg_v2s16_ss
+ ; GFX10: liveins: $sgpr0_sgpr1
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+ ; GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147516416
+ ; GFX10-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc
+ ; GFX10-NEXT: $sgpr0 = COPY [[S_XOR_B32_]]
%0:sgpr(<2 x s16>) = COPY $sgpr0
%1:sgpr(<2 x s16>) = G_FNEG %0
$sgpr0 = COPY %1
; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147516416
; GCN: [[V_XOR_B32_e32_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e32 [[S_MOV_B32_]], [[COPY]], implicit $exec
; GCN: $vgpr0 = COPY [[V_XOR_B32_e32_]]
+ ; SI-LABEL: name: fneg_v2s16_vv
+ ; SI: liveins: $vgpr0
+ ; SI-NEXT: {{ $}}
+ ; SI-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; SI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147516416
+ ; SI-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec
+ ; SI-NEXT: $vgpr0 = COPY [[V_XOR_B32_e64_]]
+ ; VI-LABEL: name: fneg_v2s16_vv
+ ; VI: liveins: $vgpr0
+ ; VI-NEXT: {{ $}}
+ ; VI-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; VI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147516416
+ ; VI-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec
+ ; VI-NEXT: $vgpr0 = COPY [[V_XOR_B32_e64_]]
+ ; GFX9-LABEL: name: fneg_v2s16_vv
+ ; GFX9: liveins: $vgpr0
+ ; GFX9-NEXT: {{ $}}
+ ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147516416
+ ; GFX9-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec
+ ; GFX9-NEXT: $vgpr0 = COPY [[V_XOR_B32_e64_]]
+ ; GFX10-LABEL: name: fneg_v2s16_vv
+ ; GFX10: liveins: $vgpr0
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147516416
+ ; GFX10-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec
+ ; GFX10-NEXT: $vgpr0 = COPY [[V_XOR_B32_e64_]]
%0:vgpr(<2 x s16>) = COPY $vgpr0
%1:vgpr(<2 x s16>) = G_FNEG %0
$vgpr0 = COPY %1
; GCN: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0
; GCN: [[FNEG:%[0-9]+]]:vgpr_32(<2 x s16>) = G_FNEG [[COPY]]
; GCN: $vgpr0 = COPY [[FNEG]](<2 x s16>)
+ ; SI-LABEL: name: fneg_v2s16_vs
+ ; SI: liveins: $sgpr0
+ ; SI-NEXT: {{ $}}
+ ; SI-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0
+ ; SI-NEXT: [[FNEG:%[0-9]+]]:vgpr_32(<2 x s16>) = G_FNEG [[COPY]]
+ ; SI-NEXT: $vgpr0 = COPY [[FNEG]](<2 x s16>)
+ ; VI-LABEL: name: fneg_v2s16_vs
+ ; VI: liveins: $sgpr0
+ ; VI-NEXT: {{ $}}
+ ; VI-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0
+ ; VI-NEXT: [[FNEG:%[0-9]+]]:vgpr_32(<2 x s16>) = G_FNEG [[COPY]]
+ ; VI-NEXT: $vgpr0 = COPY [[FNEG]](<2 x s16>)
+ ; GFX9-LABEL: name: fneg_v2s16_vs
+ ; GFX9: liveins: $sgpr0
+ ; GFX9-NEXT: {{ $}}
+ ; GFX9-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0
+ ; GFX9-NEXT: [[FNEG:%[0-9]+]]:vgpr_32(<2 x s16>) = G_FNEG [[COPY]]
+ ; GFX9-NEXT: $vgpr0 = COPY [[FNEG]](<2 x s16>)
+ ; GFX10-LABEL: name: fneg_v2s16_vs
+ ; GFX10: liveins: $sgpr0
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0
+ ; GFX10-NEXT: [[FNEG:%[0-9]+]]:vgpr_32(<2 x s16>) = G_FNEG [[COPY]]
+ ; GFX10-NEXT: $vgpr0 = COPY [[FNEG]](<2 x s16>)
%0:sgpr(<2 x s16>) = COPY $sgpr0
%1:vgpr(<2 x s16>) = G_FNEG %0
$vgpr0 = COPY %1
; GCN: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[COPY2]], [[S_MOV_B32_]], implicit-def $scc
; GCN: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[S_XOR_B32_]], %subreg.sub1
; GCN: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
+ ; SI-LABEL: name: fneg_s64_ss
+ ; SI: liveins: $sgpr0_sgpr1
+ ; SI-NEXT: {{ $}}
+ ; SI-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
+ ; SI-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0
+ ; SI-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1
+ ; SI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648
+ ; SI-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[COPY2]], [[S_MOV_B32_]], implicit-def $scc
+ ; SI-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[S_XOR_B32_]], %subreg.sub1
+ ; SI-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
+ ; VI-LABEL: name: fneg_s64_ss
+ ; VI: liveins: $sgpr0_sgpr1
+ ; VI-NEXT: {{ $}}
+ ; VI-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
+ ; VI-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0
+ ; VI-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1
+ ; VI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648
+ ; VI-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[COPY2]], [[S_MOV_B32_]], implicit-def $scc
+ ; VI-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[S_XOR_B32_]], %subreg.sub1
+ ; VI-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
+ ; GFX9-LABEL: name: fneg_s64_ss
+ ; GFX9: liveins: $sgpr0_sgpr1
+ ; GFX9-NEXT: {{ $}}
+ ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
+ ; GFX9-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0
+ ; GFX9-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1
+ ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648
+ ; GFX9-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[COPY2]], [[S_MOV_B32_]], implicit-def $scc
+ ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[S_XOR_B32_]], %subreg.sub1
+ ; GFX9-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
+ ; GFX10-LABEL: name: fneg_s64_ss
+ ; GFX10: liveins: $sgpr0_sgpr1
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
+ ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0
+ ; GFX10-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1
+ ; GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648
+ ; GFX10-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[COPY2]], [[S_MOV_B32_]], implicit-def $scc
+ ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[S_XOR_B32_]], %subreg.sub1
+ ; GFX10-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
%0:sgpr(s64) = COPY $sgpr0_sgpr1
%1:sgpr(s64) = G_FNEG %0
S_ENDPGM 0, implicit %1
; GCN-LABEL: name: fneg_s64_vv
; GCN: liveins: $vgpr0_vgpr1
; GCN: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2147483648, implicit $exec
; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
- ; GCN: [[V_XOR_B32_e32_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e32 [[COPY1]], [[V_MOV_B32_e32_]], implicit $exec
+ ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648
+ ; GCN: [[V_XOR_B32_e32_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e32 [[S_MOV_B32_]], [[COPY1]], implicit $exec
; GCN: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
; GCN: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[V_XOR_B32_e32_]], %subreg.sub1
; GCN: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
+ ; SI-LABEL: name: fneg_s64_vv
+ ; SI: liveins: $vgpr0_vgpr1
+ ; SI-NEXT: {{ $}}
+ ; SI-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; SI-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+ ; SI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648
+ ; SI-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY1]], implicit $exec
+ ; SI-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+ ; SI-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[V_XOR_B32_e64_]], %subreg.sub1
+ ; SI-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
+ ; VI-LABEL: name: fneg_s64_vv
+ ; VI: liveins: $vgpr0_vgpr1
+ ; VI-NEXT: {{ $}}
+ ; VI-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; VI-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+ ; VI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648
+ ; VI-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY1]], implicit $exec
+ ; VI-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+ ; VI-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[V_XOR_B32_e64_]], %subreg.sub1
+ ; VI-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
+ ; GFX9-LABEL: name: fneg_s64_vv
+ ; GFX9: liveins: $vgpr0_vgpr1
+ ; GFX9-NEXT: {{ $}}
+ ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+ ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648
+ ; GFX9-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY1]], implicit $exec
+ ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+ ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[V_XOR_B32_e64_]], %subreg.sub1
+ ; GFX9-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
+ ; GFX10-LABEL: name: fneg_s64_vv
+ ; GFX10: liveins: $vgpr0_vgpr1
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+ ; GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648
+ ; GFX10-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY1]], implicit $exec
+ ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+ ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[V_XOR_B32_e64_]], %subreg.sub1
+ ; GFX10-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
%0:vgpr(s64) = COPY $vgpr0_vgpr1
%1:vgpr(s64) = G_FNEG %0
S_ENDPGM 0, implicit %1
; GCN: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1
; GCN: [[FNEG:%[0-9]+]]:vgpr(s64) = G_FNEG [[COPY]]
; GCN: S_ENDPGM 0, implicit [[FNEG]](s64)
+ ; SI-LABEL: name: fneg_s64_vs
+ ; SI: liveins: $sgpr0_sgpr1
+ ; SI-NEXT: {{ $}}
+ ; SI-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1
+ ; SI-NEXT: [[FNEG:%[0-9]+]]:vgpr(s64) = G_FNEG [[COPY]]
+ ; SI-NEXT: S_ENDPGM 0, implicit [[FNEG]](s64)
+ ; VI-LABEL: name: fneg_s64_vs
+ ; VI: liveins: $sgpr0_sgpr1
+ ; VI-NEXT: {{ $}}
+ ; VI-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1
+ ; VI-NEXT: [[FNEG:%[0-9]+]]:vgpr(s64) = G_FNEG [[COPY]]
+ ; VI-NEXT: S_ENDPGM 0, implicit [[FNEG]](s64)
+ ; GFX9-LABEL: name: fneg_s64_vs
+ ; GFX9: liveins: $sgpr0_sgpr1
+ ; GFX9-NEXT: {{ $}}
+ ; GFX9-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1
+ ; GFX9-NEXT: [[FNEG:%[0-9]+]]:vgpr(s64) = G_FNEG [[COPY]]
+ ; GFX9-NEXT: S_ENDPGM 0, implicit [[FNEG]](s64)
+ ; GFX10-LABEL: name: fneg_s64_vs
+ ; GFX10: liveins: $sgpr0_sgpr1
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1
+ ; GFX10-NEXT: [[FNEG:%[0-9]+]]:vgpr(s64) = G_FNEG [[COPY]]
+ ; GFX10-NEXT: S_ENDPGM 0, implicit [[FNEG]](s64)
%0:sgpr(s64) = COPY $sgpr0_sgpr1
%1:vgpr(s64) = G_FNEG %0
S_ENDPGM 0, implicit %1
; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648
; GCN: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc
; GCN: S_ENDPGM 0, implicit [[S_OR_B32_]]
+ ; SI-LABEL: name: fneg_fabs_s32_ss
+ ; SI: liveins: $sgpr0_sgpr1
+ ; SI-NEXT: {{ $}}
+ ; SI-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+ ; SI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648
+ ; SI-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc
+ ; SI-NEXT: S_ENDPGM 0, implicit [[S_OR_B32_]]
+ ; VI-LABEL: name: fneg_fabs_s32_ss
+ ; VI: liveins: $sgpr0_sgpr1
+ ; VI-NEXT: {{ $}}
+ ; VI-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+ ; VI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648
+ ; VI-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc
+ ; VI-NEXT: S_ENDPGM 0, implicit [[S_OR_B32_]]
+ ; GFX9-LABEL: name: fneg_fabs_s32_ss
+ ; GFX9: liveins: $sgpr0_sgpr1
+ ; GFX9-NEXT: {{ $}}
+ ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+ ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648
+ ; GFX9-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc
+ ; GFX9-NEXT: S_ENDPGM 0, implicit [[S_OR_B32_]]
+ ; GFX10-LABEL: name: fneg_fabs_s32_ss
+ ; GFX10: liveins: $sgpr0_sgpr1
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+ ; GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648
+ ; GFX10-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc
+ ; GFX10-NEXT: S_ENDPGM 0, implicit [[S_OR_B32_]]
%0:sgpr(s32) = COPY $sgpr0
%1:sgpr(s32) = G_FABS %0
%2:sgpr(s32) = G_FNEG %1
; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648
; GCN: [[V_XOR_B32_e32_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e32 [[S_MOV_B32_]], [[COPY]], implicit $exec
; GCN: S_ENDPGM 0, implicit [[V_XOR_B32_e32_]]
+ ; SI-LABEL: name: fneg_fabs_s32_vv
+ ; SI: liveins: $vgpr0
+ ; SI-NEXT: {{ $}}
+ ; SI-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; SI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648
+ ; SI-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec
+ ; SI-NEXT: S_ENDPGM 0, implicit [[V_XOR_B32_e64_]]
+ ; VI-LABEL: name: fneg_fabs_s32_vv
+ ; VI: liveins: $vgpr0
+ ; VI-NEXT: {{ $}}
+ ; VI-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; VI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648
+ ; VI-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec
+ ; VI-NEXT: S_ENDPGM 0, implicit [[V_XOR_B32_e64_]]
+ ; GFX9-LABEL: name: fneg_fabs_s32_vv
+ ; GFX9: liveins: $vgpr0
+ ; GFX9-NEXT: {{ $}}
+ ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648
+ ; GFX9-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec
+ ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_XOR_B32_e64_]]
+ ; GFX10-LABEL: name: fneg_fabs_s32_vv
+ ; GFX10: liveins: $vgpr0
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648
+ ; GFX10-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec
+ ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_XOR_B32_e64_]]
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = G_FABS %0
%2:vgpr(s32) = G_FNEG %0
; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32(s16) = S_MOV_B32 2147483648
; GCN: [[V_XOR_B32_e32_:%[0-9]+]]:vgpr_32(s32) = V_XOR_B32_e32 [[S_MOV_B32_]](s16), [[FABS]](s32), implicit $exec
; GCN: S_ENDPGM 0, implicit [[V_XOR_B32_e32_]](s32)
+ ; SI-LABEL: name: fneg_fabs_s32_vs
+ ; SI: liveins: $sgpr0
+ ; SI-NEXT: {{ $}}
+ ; SI-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+ ; SI-NEXT: [[FABS:%[0-9]+]]:vgpr_32(s32) = G_FABS [[COPY]]
+ ; SI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32(s16) = S_MOV_B32 2147483648
+ ; SI-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32(s32) = V_XOR_B32_e64 [[S_MOV_B32_]](s16), [[FABS]](s32), implicit $exec
+ ; SI-NEXT: S_ENDPGM 0, implicit [[V_XOR_B32_e64_]](s32)
+ ; VI-LABEL: name: fneg_fabs_s32_vs
+ ; VI: liveins: $sgpr0
+ ; VI-NEXT: {{ $}}
+ ; VI-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+ ; VI-NEXT: [[FABS:%[0-9]+]]:vgpr_32(s32) = G_FABS [[COPY]]
+ ; VI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32(s16) = S_MOV_B32 2147483648
+ ; VI-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32(s32) = V_XOR_B32_e64 [[S_MOV_B32_]](s16), [[FABS]](s32), implicit $exec
+ ; VI-NEXT: S_ENDPGM 0, implicit [[V_XOR_B32_e64_]](s32)
+ ; GFX9-LABEL: name: fneg_fabs_s32_vs
+ ; GFX9: liveins: $sgpr0
+ ; GFX9-NEXT: {{ $}}
+ ; GFX9-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+ ; GFX9-NEXT: [[FABS:%[0-9]+]]:vgpr_32(s32) = G_FABS [[COPY]]
+ ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32(s16) = S_MOV_B32 2147483648
+ ; GFX9-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32(s32) = V_XOR_B32_e64 [[S_MOV_B32_]](s16), [[FABS]](s32), implicit $exec
+ ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_XOR_B32_e64_]](s32)
+ ; GFX10-LABEL: name: fneg_fabs_s32_vs
+ ; GFX10: liveins: $sgpr0
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+ ; GFX10-NEXT: [[FABS:%[0-9]+]]:vgpr_32(s32) = G_FABS [[COPY]]
+ ; GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32(s16) = S_MOV_B32 2147483648
+ ; GFX10-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32(s32) = V_XOR_B32_e64 [[S_MOV_B32_]](s16), [[FABS]](s32), implicit $exec
+ ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_XOR_B32_e64_]](s32)
%0:sgpr(s32) = COPY $sgpr0
%1:vgpr(s32) = G_FABS %0
%2:vgpr(s32) = G_FNEG %1
; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32768
; GCN: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc
; GCN: $sgpr0 = COPY [[S_OR_B32_]]
+ ; SI-LABEL: name: fneg_fabs_s16_ss
+ ; SI: liveins: $sgpr0
+ ; SI-NEXT: {{ $}}
+ ; SI-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+ ; SI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32768
+ ; SI-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc
+ ; SI-NEXT: $sgpr0 = COPY [[S_OR_B32_]]
+ ; VI-LABEL: name: fneg_fabs_s16_ss
+ ; VI: liveins: $sgpr0
+ ; VI-NEXT: {{ $}}
+ ; VI-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+ ; VI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32768
+ ; VI-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc
+ ; VI-NEXT: $sgpr0 = COPY [[S_OR_B32_]]
+ ; GFX9-LABEL: name: fneg_fabs_s16_ss
+ ; GFX9: liveins: $sgpr0
+ ; GFX9-NEXT: {{ $}}
+ ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+ ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32768
+ ; GFX9-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc
+ ; GFX9-NEXT: $sgpr0 = COPY [[S_OR_B32_]]
+ ; GFX10-LABEL: name: fneg_fabs_s16_ss
+ ; GFX10: liveins: $sgpr0
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+ ; GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32768
+ ; GFX10-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc
+ ; GFX10-NEXT: $sgpr0 = COPY [[S_OR_B32_]]
%0:sgpr(s32) = COPY $sgpr0
%1:sgpr(s16) = G_TRUNC %0
%2:sgpr(s16) = G_FABS %1
; GCN: [[V_OR_B32_e32_:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[S_MOV_B32_]], [[COPY]], implicit $exec
; GCN: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[V_OR_B32_e32_]]
; GCN: $vgpr0 = COPY [[COPY1]]
+ ; SI-LABEL: name: fneg_fabs_s16_vv
+ ; SI: liveins: $vgpr0
+ ; SI-NEXT: {{ $}}
+ ; SI-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; SI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32768
+ ; SI-NEXT: [[V_OR_B32_e64_:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec
+ ; SI-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[V_OR_B32_e64_]]
+ ; SI-NEXT: $vgpr0 = COPY [[COPY1]]
+ ; VI-LABEL: name: fneg_fabs_s16_vv
+ ; VI: liveins: $vgpr0
+ ; VI-NEXT: {{ $}}
+ ; VI-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; VI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32768
+ ; VI-NEXT: [[V_OR_B32_e64_:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec
+ ; VI-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[V_OR_B32_e64_]]
+ ; VI-NEXT: $vgpr0 = COPY [[COPY1]]
+ ; GFX9-LABEL: name: fneg_fabs_s16_vv
+ ; GFX9: liveins: $vgpr0
+ ; GFX9-NEXT: {{ $}}
+ ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32768
+ ; GFX9-NEXT: [[V_OR_B32_e64_:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec
+ ; GFX9-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[V_OR_B32_e64_]]
+ ; GFX9-NEXT: $vgpr0 = COPY [[COPY1]]
+ ; GFX10-LABEL: name: fneg_fabs_s16_vv
+ ; GFX10: liveins: $vgpr0
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32768
+ ; GFX10-NEXT: [[V_OR_B32_e64_:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec
+ ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[V_OR_B32_e64_]]
+ ; GFX10-NEXT: $vgpr0 = COPY [[COPY1]]
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s16) = G_TRUNC %0
%2:vgpr(s16) = G_FABS %1
; GCN: [[FNEG1:%[0-9]+]]:vgpr_32(s16) = G_FNEG [[FNEG]]
; GCN: [[COPY1:%[0-9]+]]:sreg_32(s32) = COPY [[FNEG1]](s16)
; GCN: $vgpr0 = COPY [[COPY1]](s32)
+ ; SI-LABEL: name: fneg_fabs_s16_vs
+ ; SI: liveins: $sgpr0
+ ; SI-NEXT: {{ $}}
+ ; SI-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+ ; SI-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32)
+ ; SI-NEXT: [[FNEG:%[0-9]+]]:sgpr(s16) = G_FNEG [[TRUNC]]
+ ; SI-NEXT: [[FNEG1:%[0-9]+]]:vgpr_32(s16) = G_FNEG [[FNEG]]
+ ; SI-NEXT: [[COPY1:%[0-9]+]]:sreg_32(s32) = COPY [[FNEG1]](s16)
+ ; SI-NEXT: $vgpr0 = COPY [[COPY1]](s32)
+ ; VI-LABEL: name: fneg_fabs_s16_vs
+ ; VI: liveins: $sgpr0
+ ; VI-NEXT: {{ $}}
+ ; VI-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+ ; VI-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32)
+ ; VI-NEXT: [[FNEG:%[0-9]+]]:sgpr(s16) = G_FNEG [[TRUNC]]
+ ; VI-NEXT: [[FNEG1:%[0-9]+]]:vgpr_32(s16) = G_FNEG [[FNEG]]
+ ; VI-NEXT: [[COPY1:%[0-9]+]]:sreg_32(s32) = COPY [[FNEG1]](s16)
+ ; VI-NEXT: $vgpr0 = COPY [[COPY1]](s32)
+ ; GFX9-LABEL: name: fneg_fabs_s16_vs
+ ; GFX9: liveins: $sgpr0
+ ; GFX9-NEXT: {{ $}}
+ ; GFX9-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+ ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32)
+ ; GFX9-NEXT: [[FNEG:%[0-9]+]]:sgpr(s16) = G_FNEG [[TRUNC]]
+ ; GFX9-NEXT: [[FNEG1:%[0-9]+]]:vgpr_32(s16) = G_FNEG [[FNEG]]
+ ; GFX9-NEXT: [[COPY1:%[0-9]+]]:sreg_32(s32) = COPY [[FNEG1]](s16)
+ ; GFX9-NEXT: $vgpr0 = COPY [[COPY1]](s32)
+ ; GFX10-LABEL: name: fneg_fabs_s16_vs
+ ; GFX10: liveins: $sgpr0
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+ ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32)
+ ; GFX10-NEXT: [[FNEG:%[0-9]+]]:sgpr(s16) = G_FNEG [[TRUNC]]
+ ; GFX10-NEXT: [[FNEG1:%[0-9]+]]:vgpr_32(s16) = G_FNEG [[FNEG]]
+ ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sreg_32(s32) = COPY [[FNEG1]](s16)
+ ; GFX10-NEXT: $vgpr0 = COPY [[COPY1]](s32)
%0:sgpr(s32) = COPY $sgpr0
%1:sgpr(s16) = G_TRUNC %0
%2:sgpr(s16) = G_FNEG %1
; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147516416
; GCN: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc
; GCN: $sgpr0 = COPY [[S_OR_B32_]]
+ ; SI-LABEL: name: fneg_fabs_v2s16_ss
+ ; SI: liveins: $sgpr0_sgpr1
+ ; SI-NEXT: {{ $}}
+ ; SI-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+ ; SI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147516416
+ ; SI-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc
+ ; SI-NEXT: $sgpr0 = COPY [[S_OR_B32_]]
+ ; VI-LABEL: name: fneg_fabs_v2s16_ss
+ ; VI: liveins: $sgpr0_sgpr1
+ ; VI-NEXT: {{ $}}
+ ; VI-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+ ; VI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147516416
+ ; VI-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc
+ ; VI-NEXT: $sgpr0 = COPY [[S_OR_B32_]]
+ ; GFX9-LABEL: name: fneg_fabs_v2s16_ss
+ ; GFX9: liveins: $sgpr0_sgpr1
+ ; GFX9-NEXT: {{ $}}
+ ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+ ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147516416
+ ; GFX9-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc
+ ; GFX9-NEXT: $sgpr0 = COPY [[S_OR_B32_]]
+ ; GFX10-LABEL: name: fneg_fabs_v2s16_ss
+ ; GFX10: liveins: $sgpr0_sgpr1
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+ ; GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147516416
+ ; GFX10-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc
+ ; GFX10-NEXT: $sgpr0 = COPY [[S_OR_B32_]]
%0:sgpr(<2 x s16>) = COPY $sgpr0
%1:sgpr(<2 x s16>) = G_FABS %0
%2:sgpr(<2 x s16>) = G_FNEG %1
; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147516416
; GCN: [[V_XOR_B32_e32_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e32 [[S_MOV_B32_]], [[COPY]], implicit $exec
; GCN: $vgpr0 = COPY [[V_XOR_B32_e32_]]
+ ; SI-LABEL: name: fneg_fabs_v2s16_vv
+ ; SI: liveins: $vgpr0
+ ; SI-NEXT: {{ $}}
+ ; SI-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; SI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147516416
+ ; SI-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec
+ ; SI-NEXT: $vgpr0 = COPY [[V_XOR_B32_e64_]]
+ ; VI-LABEL: name: fneg_fabs_v2s16_vv
+ ; VI: liveins: $vgpr0
+ ; VI-NEXT: {{ $}}
+ ; VI-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; VI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147516416
+ ; VI-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec
+ ; VI-NEXT: $vgpr0 = COPY [[V_XOR_B32_e64_]]
+ ; GFX9-LABEL: name: fneg_fabs_v2s16_vv
+ ; GFX9: liveins: $vgpr0
+ ; GFX9-NEXT: {{ $}}
+ ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147516416
+ ; GFX9-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec
+ ; GFX9-NEXT: $vgpr0 = COPY [[V_XOR_B32_e64_]]
+ ; GFX10-LABEL: name: fneg_fabs_v2s16_vv
+ ; GFX10: liveins: $vgpr0
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147516416
+ ; GFX10-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec
+ ; GFX10-NEXT: $vgpr0 = COPY [[V_XOR_B32_e64_]]
%0:vgpr(<2 x s16>) = COPY $vgpr0
%1:vgpr(<2 x s16>) = G_FABS %0
%2:vgpr(<2 x s16>) = G_FNEG %0
; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32(s16) = S_MOV_B32 2147516416
; GCN: [[V_XOR_B32_e32_:%[0-9]+]]:vgpr_32(<2 x s16>) = V_XOR_B32_e32 [[S_MOV_B32_]](s16), [[FABS]](<2 x s16>), implicit $exec
; GCN: $vgpr0 = COPY [[V_XOR_B32_e32_]](<2 x s16>)
+ ; SI-LABEL: name: fneg_fabs_v2s16_vs
+ ; SI: liveins: $sgpr0
+ ; SI-NEXT: {{ $}}
+ ; SI-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0
+ ; SI-NEXT: [[FABS:%[0-9]+]]:vgpr_32(<2 x s16>) = G_FABS [[COPY]]
+ ; SI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32(s16) = S_MOV_B32 2147516416
+ ; SI-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32(<2 x s16>) = V_XOR_B32_e64 [[S_MOV_B32_]](s16), [[FABS]](<2 x s16>), implicit $exec
+ ; SI-NEXT: $vgpr0 = COPY [[V_XOR_B32_e64_]](<2 x s16>)
+ ; VI-LABEL: name: fneg_fabs_v2s16_vs
+ ; VI: liveins: $sgpr0
+ ; VI-NEXT: {{ $}}
+ ; VI-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0
+ ; VI-NEXT: [[FABS:%[0-9]+]]:vgpr_32(<2 x s16>) = G_FABS [[COPY]]
+ ; VI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32(s16) = S_MOV_B32 2147516416
+ ; VI-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32(<2 x s16>) = V_XOR_B32_e64 [[S_MOV_B32_]](s16), [[FABS]](<2 x s16>), implicit $exec
+ ; VI-NEXT: $vgpr0 = COPY [[V_XOR_B32_e64_]](<2 x s16>)
+ ; GFX9-LABEL: name: fneg_fabs_v2s16_vs
+ ; GFX9: liveins: $sgpr0
+ ; GFX9-NEXT: {{ $}}
+ ; GFX9-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0
+ ; GFX9-NEXT: [[FABS:%[0-9]+]]:vgpr_32(<2 x s16>) = G_FABS [[COPY]]
+ ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32(s16) = S_MOV_B32 2147516416
+ ; GFX9-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32(<2 x s16>) = V_XOR_B32_e64 [[S_MOV_B32_]](s16), [[FABS]](<2 x s16>), implicit $exec
+ ; GFX9-NEXT: $vgpr0 = COPY [[V_XOR_B32_e64_]](<2 x s16>)
+ ; GFX10-LABEL: name: fneg_fabs_v2s16_vs
+ ; GFX10: liveins: $sgpr0
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0
+ ; GFX10-NEXT: [[FABS:%[0-9]+]]:vgpr_32(<2 x s16>) = G_FABS [[COPY]]
+ ; GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32(s16) = S_MOV_B32 2147516416
+ ; GFX10-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32(<2 x s16>) = V_XOR_B32_e64 [[S_MOV_B32_]](s16), [[FABS]](<2 x s16>), implicit $exec
+ ; GFX10-NEXT: $vgpr0 = COPY [[V_XOR_B32_e64_]](<2 x s16>)
%0:sgpr(<2 x s16>) = COPY $sgpr0
%1:vgpr(<2 x s16>) = G_FABS %0
%2:vgpr(<2 x s16>) = G_FNEG %1
; GCN: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[COPY2]], [[S_MOV_B32_]], implicit-def $scc
; GCN: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[S_OR_B32_]], %subreg.sub1
; GCN: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
+ ; SI-LABEL: name: fneg_fabs_s64_ss
+ ; SI: liveins: $sgpr0_sgpr1
+ ; SI-NEXT: {{ $}}
+ ; SI-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
+ ; SI-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0
+ ; SI-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1
+ ; SI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648
+ ; SI-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[COPY2]], [[S_MOV_B32_]], implicit-def $scc
+ ; SI-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[S_OR_B32_]], %subreg.sub1
+ ; SI-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
+ ; VI-LABEL: name: fneg_fabs_s64_ss
+ ; VI: liveins: $sgpr0_sgpr1
+ ; VI-NEXT: {{ $}}
+ ; VI-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
+ ; VI-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0
+ ; VI-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1
+ ; VI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648
+ ; VI-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[COPY2]], [[S_MOV_B32_]], implicit-def $scc
+ ; VI-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[S_OR_B32_]], %subreg.sub1
+ ; VI-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
+ ; GFX9-LABEL: name: fneg_fabs_s64_ss
+ ; GFX9: liveins: $sgpr0_sgpr1
+ ; GFX9-NEXT: {{ $}}
+ ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
+ ; GFX9-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0
+ ; GFX9-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1
+ ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648
+ ; GFX9-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[COPY2]], [[S_MOV_B32_]], implicit-def $scc
+ ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[S_OR_B32_]], %subreg.sub1
+ ; GFX9-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
+ ; GFX10-LABEL: name: fneg_fabs_s64_ss
+ ; GFX10: liveins: $sgpr0_sgpr1
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
+ ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0
+ ; GFX10-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1
+ ; GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648
+ ; GFX10-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[COPY2]], [[S_MOV_B32_]], implicit-def $scc
+ ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[S_OR_B32_]], %subreg.sub1
+ ; GFX10-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
%0:sgpr(s64) = COPY $sgpr0_sgpr1
%1:sgpr(s64) = G_FABS %0
%2:sgpr(s64) = G_FNEG %1
; GCN-LABEL: name: fneg_fabs_s64_vv
; GCN: liveins: $vgpr0_vgpr1
; GCN: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2147483648, implicit $exec
; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
- ; GCN: [[V_OR_B32_e32_:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[COPY1]], [[V_MOV_B32_e32_]], implicit $exec
+ ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648
+ ; GCN: [[V_OR_B32_e32_:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[S_MOV_B32_]], [[COPY1]], implicit $exec
; GCN: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
; GCN: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[V_OR_B32_e32_]], %subreg.sub1
; GCN: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
+ ; SI-LABEL: name: fneg_fabs_s64_vv
+ ; SI: liveins: $vgpr0_vgpr1
+ ; SI-NEXT: {{ $}}
+ ; SI-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; SI-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+ ; SI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648
+ ; SI-NEXT: [[V_OR_B32_e64_:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[S_MOV_B32_]], [[COPY1]], implicit $exec
+ ; SI-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+ ; SI-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[V_OR_B32_e64_]], %subreg.sub1
+ ; SI-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
+ ; VI-LABEL: name: fneg_fabs_s64_vv
+ ; VI: liveins: $vgpr0_vgpr1
+ ; VI-NEXT: {{ $}}
+ ; VI-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; VI-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+ ; VI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648
+ ; VI-NEXT: [[V_OR_B32_e64_:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[S_MOV_B32_]], [[COPY1]], implicit $exec
+ ; VI-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+ ; VI-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[V_OR_B32_e64_]], %subreg.sub1
+ ; VI-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
+ ; GFX9-LABEL: name: fneg_fabs_s64_vv
+ ; GFX9: liveins: $vgpr0_vgpr1
+ ; GFX9-NEXT: {{ $}}
+ ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+ ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648
+ ; GFX9-NEXT: [[V_OR_B32_e64_:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[S_MOV_B32_]], [[COPY1]], implicit $exec
+ ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+ ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[V_OR_B32_e64_]], %subreg.sub1
+ ; GFX9-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
+ ; GFX10-LABEL: name: fneg_fabs_s64_vv
+ ; GFX10: liveins: $vgpr0_vgpr1
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+ ; GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648
+ ; GFX10-NEXT: [[V_OR_B32_e64_:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[S_MOV_B32_]], [[COPY1]], implicit $exec
+ ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+ ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[V_OR_B32_e64_]], %subreg.sub1
+ ; GFX10-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
%0:vgpr(s64) = COPY $vgpr0_vgpr1
%1:vgpr(s64) = G_FABS %0
%2:vgpr(s64) = G_FNEG %1
; GCN: liveins: $sgpr0_sgpr1
; GCN: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1
; GCN: [[FABS:%[0-9]+]]:vreg_64(s64) = G_FABS [[COPY]]
- ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32(s32) = V_MOV_B32_e32 2147483648, implicit $exec
; GCN: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY [[FABS]].sub1(s64)
- ; GCN: [[V_XOR_B32_e32_:%[0-9]+]]:vgpr_32(s16) = V_XOR_B32_e32 [[COPY1]](s32), [[V_MOV_B32_e32_]](s32), implicit $exec
+ ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32(s32) = S_MOV_B32 2147483648
+ ; GCN: [[V_XOR_B32_e32_:%[0-9]+]]:vgpr_32(s16) = V_XOR_B32_e32 [[S_MOV_B32_]](s32), [[COPY1]](s32), implicit $exec
; GCN: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY [[FABS]].sub0(s64)
; GCN: [[REG_SEQUENCE:%[0-9]+]]:vreg_64(s64) = REG_SEQUENCE [[COPY2]](s32), %subreg.sub0, [[V_XOR_B32_e32_]](s16), %subreg.sub1
; GCN: S_ENDPGM 0, implicit [[REG_SEQUENCE]](s64)
+ ; SI-LABEL: name: fneg_fabs_s64_vs
+ ; SI: liveins: $sgpr0_sgpr1
+ ; SI-NEXT: {{ $}}
+ ; SI-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1
+ ; SI-NEXT: [[FABS:%[0-9]+]]:vreg_64(s64) = G_FABS [[COPY]]
+ ; SI-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY [[FABS]].sub1(s64)
+ ; SI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32(s32) = S_MOV_B32 2147483648
+ ; SI-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32(s16) = V_XOR_B32_e64 [[S_MOV_B32_]](s32), [[COPY1]](s32), implicit $exec
+ ; SI-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY [[FABS]].sub0(s64)
+ ; SI-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64(s64) = REG_SEQUENCE [[COPY2]](s32), %subreg.sub0, [[V_XOR_B32_e64_]](s16), %subreg.sub1
+ ; SI-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]](s64)
+ ; VI-LABEL: name: fneg_fabs_s64_vs
+ ; VI: liveins: $sgpr0_sgpr1
+ ; VI-NEXT: {{ $}}
+ ; VI-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1
+ ; VI-NEXT: [[FABS:%[0-9]+]]:vreg_64(s64) = G_FABS [[COPY]]
+ ; VI-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY [[FABS]].sub1(s64)
+ ; VI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32(s32) = S_MOV_B32 2147483648
+ ; VI-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32(s16) = V_XOR_B32_e64 [[S_MOV_B32_]](s32), [[COPY1]](s32), implicit $exec
+ ; VI-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY [[FABS]].sub0(s64)
+ ; VI-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64(s64) = REG_SEQUENCE [[COPY2]](s32), %subreg.sub0, [[V_XOR_B32_e64_]](s16), %subreg.sub1
+ ; VI-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]](s64)
+ ; GFX9-LABEL: name: fneg_fabs_s64_vs
+ ; GFX9: liveins: $sgpr0_sgpr1
+ ; GFX9-NEXT: {{ $}}
+ ; GFX9-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1
+ ; GFX9-NEXT: [[FABS:%[0-9]+]]:vreg_64(s64) = G_FABS [[COPY]]
+ ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY [[FABS]].sub1(s64)
+ ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32(s32) = S_MOV_B32 2147483648
+ ; GFX9-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32(s16) = V_XOR_B32_e64 [[S_MOV_B32_]](s32), [[COPY1]](s32), implicit $exec
+ ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY [[FABS]].sub0(s64)
+ ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64(s64) = REG_SEQUENCE [[COPY2]](s32), %subreg.sub0, [[V_XOR_B32_e64_]](s16), %subreg.sub1
+ ; GFX9-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]](s64)
+ ; GFX10-LABEL: name: fneg_fabs_s64_vs
+ ; GFX10: liveins: $sgpr0_sgpr1
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1
+ ; GFX10-NEXT: [[FABS:%[0-9]+]]:vreg_64(s64) = G_FABS [[COPY]]
+ ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY [[FABS]].sub1(s64)
+ ; GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32(s32) = S_MOV_B32 2147483648
+ ; GFX10-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32(s16) = V_XOR_B32_e64 [[S_MOV_B32_]](s32), [[COPY1]](s32), implicit $exec
+ ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY [[FABS]].sub0(s64)
+ ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64(s64) = REG_SEQUENCE [[COPY2]](s32), %subreg.sub0, [[V_XOR_B32_e64_]](s16), %subreg.sub1
+ ; GFX10-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]](s64)
%0:sgpr(s64) = COPY $sgpr0_sgpr1
%1:vgpr(s64) = G_FABS %0
%2:vgpr(s64) = G_FNEG %1
; GCN-LABEL: name: fptosi_s32_to_s32_vv
; GCN: liveins: $vgpr0
- ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GCN: %1:vgpr_32 = nofpexcept V_CVT_I32_F32_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
- ; GCN: $vgpr0 = COPY %1
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GCN-NEXT: %1:vgpr_32 = nofpexcept V_CVT_I32_F32_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
+ ; GCN-NEXT: $vgpr0 = COPY %1
; VI-LABEL: name: fptosi_s32_to_s32_vv
; VI: liveins: $vgpr0
- ; VI: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; VI: %1:vgpr_32 = nofpexcept V_CVT_I32_F32_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
- ; VI: $vgpr0 = COPY %1
+ ; VI-NEXT: {{ $}}
+ ; VI-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; VI-NEXT: %1:vgpr_32 = nofpexcept V_CVT_I32_F32_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
+ ; VI-NEXT: $vgpr0 = COPY %1
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = G_FPTOSI %0
$vgpr0 = COPY %1
; GCN-LABEL: name: fptosi_s32_to_s32_vs
; GCN: liveins: $sgpr0
- ; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
- ; GCN: %1:vgpr_32 = nofpexcept V_CVT_I32_F32_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
- ; GCN: $vgpr0 = COPY %1
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+ ; GCN-NEXT: %1:vgpr_32 = nofpexcept V_CVT_I32_F32_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
+ ; GCN-NEXT: $vgpr0 = COPY %1
; VI-LABEL: name: fptosi_s32_to_s32_vs
; VI: liveins: $sgpr0
- ; VI: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
- ; VI: %1:vgpr_32 = nofpexcept V_CVT_I32_F32_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
- ; VI: $vgpr0 = COPY %1
+ ; VI-NEXT: {{ $}}
+ ; VI-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+ ; VI-NEXT: %1:vgpr_32 = nofpexcept V_CVT_I32_F32_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
+ ; VI-NEXT: $vgpr0 = COPY %1
%0:sgpr(s32) = COPY $sgpr0
%1:vgpr(s32) = G_FPTOSI %0
$vgpr0 = COPY %1
; GCN-LABEL: name: fptosi_s32_to_s32_fneg_vv
; GCN: liveins: $vgpr0
- ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GCN: %2:vgpr_32 = nofpexcept V_CVT_I32_F32_e64 1, [[COPY]], 0, 0, implicit $mode, implicit $exec
- ; GCN: $vgpr0 = COPY %2
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GCN-NEXT: %2:vgpr_32 = nofpexcept V_CVT_I32_F32_e64 1, [[COPY]], 0, 0, implicit $mode, implicit $exec
+ ; GCN-NEXT: $vgpr0 = COPY %2
; VI-LABEL: name: fptosi_s32_to_s32_fneg_vv
; VI: liveins: $vgpr0
- ; VI: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; VI: %2:vgpr_32 = nofpexcept V_CVT_I32_F32_e64 1, [[COPY]], 0, 0, implicit $mode, implicit $exec
- ; VI: $vgpr0 = COPY %2
+ ; VI-NEXT: {{ $}}
+ ; VI-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; VI-NEXT: %2:vgpr_32 = nofpexcept V_CVT_I32_F32_e64 1, [[COPY]], 0, 0, implicit $mode, implicit $exec
+ ; VI-NEXT: $vgpr0 = COPY %2
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = G_FNEG %0
%2:vgpr(s32) = G_FPTOSI %1
; GCN-LABEL: name: fptosi_s16_to_s32_vv
; GCN: liveins: $vgpr0
- ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GCN: %3:vgpr_32 = nofpexcept V_CVT_F32_F16_e32 [[COPY]], implicit $mode, implicit $exec
- ; GCN: %2:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %3, implicit $mode, implicit $exec
- ; GCN: $vgpr0 = COPY %2
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GCN-NEXT: %3:vgpr_32 = nofpexcept V_CVT_F32_F16_e32 [[COPY]], implicit $mode, implicit $exec
+ ; GCN-NEXT: %2:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %3, implicit $mode, implicit $exec
+ ; GCN-NEXT: $vgpr0 = COPY %2
; VI-LABEL: name: fptosi_s16_to_s32_vv
; VI: liveins: $vgpr0
- ; VI: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; VI: %3:vgpr_32 = nofpexcept V_CVT_F32_F16_e32 [[COPY]], implicit $mode, implicit $exec
- ; VI: %2:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %3, implicit $mode, implicit $exec
- ; VI: $vgpr0 = COPY %2
+ ; VI-NEXT: {{ $}}
+ ; VI-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; VI-NEXT: %3:vgpr_32 = nofpexcept V_CVT_F32_F16_e32 [[COPY]], implicit $mode, implicit $exec
+ ; VI-NEXT: %2:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %3, implicit $mode, implicit $exec
+ ; VI-NEXT: $vgpr0 = COPY %2
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s16) = G_TRUNC %0
%2:vgpr(s32) = G_FPTOSI %1
; GCN-LABEL: name: fptosi_s16_to_s32_vs
; GCN: liveins: $sgpr0
- ; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
- ; GCN: %3:vgpr_32 = nofpexcept V_CVT_F32_F16_e32 [[COPY]], implicit $mode, implicit $exec
- ; GCN: %2:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %3, implicit $mode, implicit $exec
- ; GCN: $vgpr0 = COPY %2
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+ ; GCN-NEXT: %3:vgpr_32 = nofpexcept V_CVT_F32_F16_e32 [[COPY]], implicit $mode, implicit $exec
+ ; GCN-NEXT: %2:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %3, implicit $mode, implicit $exec
+ ; GCN-NEXT: $vgpr0 = COPY %2
; VI-LABEL: name: fptosi_s16_to_s32_vs
; VI: liveins: $sgpr0
- ; VI: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
- ; VI: %3:vgpr_32 = nofpexcept V_CVT_F32_F16_e32 [[COPY]], implicit $mode, implicit $exec
- ; VI: %2:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %3, implicit $mode, implicit $exec
- ; VI: $vgpr0 = COPY %2
+ ; VI-NEXT: {{ $}}
+ ; VI-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+ ; VI-NEXT: %3:vgpr_32 = nofpexcept V_CVT_F32_F16_e32 [[COPY]], implicit $mode, implicit $exec
+ ; VI-NEXT: %2:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %3, implicit $mode, implicit $exec
+ ; VI-NEXT: $vgpr0 = COPY %2
%0:sgpr(s32) = COPY $sgpr0
%1:sgpr(s16) = G_TRUNC %0
%2:vgpr(s32) = G_FPTOSI %1
; GCN-LABEL: name: fptosi_s16_to_s32_fneg_vv
; GCN: liveins: $vgpr0
- ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32768
- ; GCN: [[V_XOR_B32_e32_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e32 [[S_MOV_B32_]], [[COPY]], implicit $exec
- ; GCN: %4:vgpr_32 = nofpexcept V_CVT_F32_F16_e32 [[V_XOR_B32_e32_]], implicit $mode, implicit $exec
- ; GCN: %3:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %4, implicit $mode, implicit $exec
- ; GCN: $vgpr0 = COPY %3
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GCN-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32768
+ ; GCN-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec
+ ; GCN-NEXT: %4:vgpr_32 = nofpexcept V_CVT_F32_F16_e32 [[V_XOR_B32_e64_]], implicit $mode, implicit $exec
+ ; GCN-NEXT: %3:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %4, implicit $mode, implicit $exec
+ ; GCN-NEXT: $vgpr0 = COPY %3
; VI-LABEL: name: fptosi_s16_to_s32_fneg_vv
; VI: liveins: $vgpr0
- ; VI: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; VI: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32768
- ; VI: [[V_XOR_B32_e32_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e32 [[S_MOV_B32_]], [[COPY]], implicit $exec
- ; VI: %4:vgpr_32 = nofpexcept V_CVT_F32_F16_e32 [[V_XOR_B32_e32_]], implicit $mode, implicit $exec
- ; VI: %3:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %4, implicit $mode, implicit $exec
- ; VI: $vgpr0 = COPY %3
+ ; VI-NEXT: {{ $}}
+ ; VI-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; VI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32768
+ ; VI-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec
+ ; VI-NEXT: %4:vgpr_32 = nofpexcept V_CVT_F32_F16_e32 [[V_XOR_B32_e64_]], implicit $mode, implicit $exec
+ ; VI-NEXT: %3:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %4, implicit $mode, implicit $exec
+ ; VI-NEXT: $vgpr0 = COPY %3
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s16) = G_TRUNC %0
%2:vgpr(s16) = G_FNEG %1
; GCN-LABEL: name: fptosi_s16_to_s1_vv
; GCN: liveins: $vgpr0
- ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GCN: %4:vgpr_32 = nofpexcept V_CVT_F32_F16_e32 [[COPY]], implicit $mode, implicit $exec
- ; GCN: %2:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %4, implicit $mode, implicit $exec
- ; GCN: S_ENDPGM 0, implicit %2
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GCN-NEXT: %4:vgpr_32 = nofpexcept V_CVT_F32_F16_e32 [[COPY]], implicit $mode, implicit $exec
+ ; GCN-NEXT: %2:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %4, implicit $mode, implicit $exec
+ ; GCN-NEXT: S_ENDPGM 0, implicit %2
; VI-LABEL: name: fptosi_s16_to_s1_vv
; VI: liveins: $vgpr0
- ; VI: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; VI: %4:vgpr_32 = nofpexcept V_CVT_F32_F16_e32 [[COPY]], implicit $mode, implicit $exec
- ; VI: %2:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %4, implicit $mode, implicit $exec
- ; VI: S_ENDPGM 0, implicit %2
+ ; VI-NEXT: {{ $}}
+ ; VI-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; VI-NEXT: %4:vgpr_32 = nofpexcept V_CVT_F32_F16_e32 [[COPY]], implicit $mode, implicit $exec
+ ; VI-NEXT: %2:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %4, implicit $mode, implicit $exec
+ ; VI-NEXT: S_ENDPGM 0, implicit %2
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s16) = G_TRUNC %0
%2:vgpr(s32) = G_FPTOSI %1
; GCN-LABEL: name: fptosi_s16_to_s1_vs
; GCN: liveins: $sgpr0
- ; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
- ; GCN: %4:vgpr_32 = nofpexcept V_CVT_F32_F16_e32 [[COPY]], implicit $mode, implicit $exec
- ; GCN: %2:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %4, implicit $mode, implicit $exec
- ; GCN: S_ENDPGM 0, implicit %2
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+ ; GCN-NEXT: %4:vgpr_32 = nofpexcept V_CVT_F32_F16_e32 [[COPY]], implicit $mode, implicit $exec
+ ; GCN-NEXT: %2:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %4, implicit $mode, implicit $exec
+ ; GCN-NEXT: S_ENDPGM 0, implicit %2
; VI-LABEL: name: fptosi_s16_to_s1_vs
; VI: liveins: $sgpr0
- ; VI: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
- ; VI: %4:vgpr_32 = nofpexcept V_CVT_F32_F16_e32 [[COPY]], implicit $mode, implicit $exec
- ; VI: %2:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %4, implicit $mode, implicit $exec
- ; VI: S_ENDPGM 0, implicit %2
+ ; VI-NEXT: {{ $}}
+ ; VI-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+ ; VI-NEXT: %4:vgpr_32 = nofpexcept V_CVT_F32_F16_e32 [[COPY]], implicit $mode, implicit $exec
+ ; VI-NEXT: %2:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %4, implicit $mode, implicit $exec
+ ; VI-NEXT: S_ENDPGM 0, implicit %2
%0:sgpr(s32) = COPY $sgpr0
%1:sgpr(s16) = G_TRUNC %0
%2:vgpr(s32) = G_FPTOSI %1
; GCN-LABEL: name: fptosi_s16_to_s1_fneg_vv
; GCN: liveins: $vgpr0
- ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32768
- ; GCN: [[V_XOR_B32_e32_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e32 [[S_MOV_B32_]], [[COPY]], implicit $exec
- ; GCN: %5:vgpr_32 = nofpexcept V_CVT_F32_F16_e32 [[V_XOR_B32_e32_]], implicit $mode, implicit $exec
- ; GCN: %3:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %5, implicit $mode, implicit $exec
- ; GCN: S_ENDPGM 0, implicit %3
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GCN-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32768
+ ; GCN-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec
+ ; GCN-NEXT: %5:vgpr_32 = nofpexcept V_CVT_F32_F16_e32 [[V_XOR_B32_e64_]], implicit $mode, implicit $exec
+ ; GCN-NEXT: %3:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %5, implicit $mode, implicit $exec
+ ; GCN-NEXT: S_ENDPGM 0, implicit %3
; VI-LABEL: name: fptosi_s16_to_s1_fneg_vv
; VI: liveins: $vgpr0
- ; VI: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; VI: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32768
- ; VI: [[V_XOR_B32_e32_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e32 [[S_MOV_B32_]], [[COPY]], implicit $exec
- ; VI: %5:vgpr_32 = nofpexcept V_CVT_F32_F16_e32 [[V_XOR_B32_e32_]], implicit $mode, implicit $exec
- ; VI: %3:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %5, implicit $mode, implicit $exec
- ; VI: S_ENDPGM 0, implicit %3
+ ; VI-NEXT: {{ $}}
+ ; VI-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; VI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32768
+ ; VI-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec
+ ; VI-NEXT: %5:vgpr_32 = nofpexcept V_CVT_F32_F16_e32 [[V_XOR_B32_e64_]], implicit $mode, implicit $exec
+ ; VI-NEXT: %3:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %5, implicit $mode, implicit $exec
+ ; VI-NEXT: S_ENDPGM 0, implicit %3
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s16) = G_TRUNC %0
%2:vgpr(s16) = G_FNEG %1
; GCN-LABEL: name: fptoui
; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
- ; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GCN: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr3_vgpr4
- ; GCN: %3:vgpr_32 = nofpexcept V_CVT_U32_F32_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
- ; GCN: %4:vgpr_32 = nofpexcept V_CVT_U32_F32_e64 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec
- ; GCN: FLAT_STORE_DWORD [[COPY2]], %3, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1)
- ; GCN: FLAT_STORE_DWORD [[COPY2]], %4, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1)
+ ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GCN-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr3_vgpr4
+ ; GCN-NEXT: %3:vgpr_32 = nofpexcept V_CVT_U32_F32_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
+ ; GCN-NEXT: %4:vgpr_32 = nofpexcept V_CVT_U32_F32_e64 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec
+ ; GCN-NEXT: FLAT_STORE_DWORD [[COPY2]], %3, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1)
+ ; GCN-NEXT: FLAT_STORE_DWORD [[COPY2]], %4, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1)
; VI-LABEL: name: fptoui
; VI: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
- ; VI: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; VI: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr3_vgpr4
- ; VI: %3:vgpr_32 = nofpexcept V_CVT_U32_F32_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
- ; VI: %4:vgpr_32 = nofpexcept V_CVT_U32_F32_e64 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec
- ; VI: FLAT_STORE_DWORD [[COPY2]], %3, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1)
- ; VI: FLAT_STORE_DWORD [[COPY2]], %4, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1)
+ ; VI-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; VI-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr3_vgpr4
+ ; VI-NEXT: %3:vgpr_32 = nofpexcept V_CVT_U32_F32_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
+ ; VI-NEXT: %4:vgpr_32 = nofpexcept V_CVT_U32_F32_e64 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec
+ ; VI-NEXT: FLAT_STORE_DWORD [[COPY2]], %3, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1)
+ ; VI-NEXT: FLAT_STORE_DWORD [[COPY2]], %4, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1)
%0:sgpr(s32) = COPY $sgpr0
%1:vgpr(s32) = COPY $vgpr0
; GCN-LABEL: name: fptoui_s16_to_s32_vv
; GCN: liveins: $vgpr0
- ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GCN: %3:vgpr_32 = nofpexcept V_CVT_F32_F16_e32 [[COPY]], implicit $mode, implicit $exec
- ; GCN: %2:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 %3, implicit $mode, implicit $exec
- ; GCN: $vgpr0 = COPY %2
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GCN-NEXT: %3:vgpr_32 = nofpexcept V_CVT_F32_F16_e32 [[COPY]], implicit $mode, implicit $exec
+ ; GCN-NEXT: %2:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 %3, implicit $mode, implicit $exec
+ ; GCN-NEXT: $vgpr0 = COPY %2
; VI-LABEL: name: fptoui_s16_to_s32_vv
; VI: liveins: $vgpr0
- ; VI: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; VI: %3:vgpr_32 = nofpexcept V_CVT_F32_F16_e32 [[COPY]], implicit $mode, implicit $exec
- ; VI: %2:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 %3, implicit $mode, implicit $exec
- ; VI: $vgpr0 = COPY %2
+ ; VI-NEXT: {{ $}}
+ ; VI-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; VI-NEXT: %3:vgpr_32 = nofpexcept V_CVT_F32_F16_e32 [[COPY]], implicit $mode, implicit $exec
+ ; VI-NEXT: %2:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 %3, implicit $mode, implicit $exec
+ ; VI-NEXT: $vgpr0 = COPY %2
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s16) = G_TRUNC %0
%2:vgpr(s32) = G_FPTOUI %1
; GCN-LABEL: name: fptoui_s16_to_s32_vs
; GCN: liveins: $sgpr0
- ; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
- ; GCN: %3:vgpr_32 = nofpexcept V_CVT_F32_F16_e32 [[COPY]], implicit $mode, implicit $exec
- ; GCN: %2:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 %3, implicit $mode, implicit $exec
- ; GCN: $vgpr0 = COPY %2
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+ ; GCN-NEXT: %3:vgpr_32 = nofpexcept V_CVT_F32_F16_e32 [[COPY]], implicit $mode, implicit $exec
+ ; GCN-NEXT: %2:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 %3, implicit $mode, implicit $exec
+ ; GCN-NEXT: $vgpr0 = COPY %2
; VI-LABEL: name: fptoui_s16_to_s32_vs
; VI: liveins: $sgpr0
- ; VI: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
- ; VI: %3:vgpr_32 = nofpexcept V_CVT_F32_F16_e32 [[COPY]], implicit $mode, implicit $exec
- ; VI: %2:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 %3, implicit $mode, implicit $exec
- ; VI: $vgpr0 = COPY %2
+ ; VI-NEXT: {{ $}}
+ ; VI-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+ ; VI-NEXT: %3:vgpr_32 = nofpexcept V_CVT_F32_F16_e32 [[COPY]], implicit $mode, implicit $exec
+ ; VI-NEXT: %2:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 %3, implicit $mode, implicit $exec
+ ; VI-NEXT: $vgpr0 = COPY %2
%0:sgpr(s32) = COPY $sgpr0
%1:sgpr(s16) = G_TRUNC %0
%2:vgpr(s32) = G_FPTOUI %1
; GCN-LABEL: name: fptoui_s16_to_s32_fneg_vv
; GCN: liveins: $vgpr0
- ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32768
- ; GCN: [[V_XOR_B32_e32_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e32 [[S_MOV_B32_]], [[COPY]], implicit $exec
- ; GCN: %4:vgpr_32 = nofpexcept V_CVT_F32_F16_e32 [[V_XOR_B32_e32_]], implicit $mode, implicit $exec
- ; GCN: %3:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 %4, implicit $mode, implicit $exec
- ; GCN: $vgpr0 = COPY %3
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GCN-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32768
+ ; GCN-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec
+ ; GCN-NEXT: %4:vgpr_32 = nofpexcept V_CVT_F32_F16_e32 [[V_XOR_B32_e64_]], implicit $mode, implicit $exec
+ ; GCN-NEXT: %3:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 %4, implicit $mode, implicit $exec
+ ; GCN-NEXT: $vgpr0 = COPY %3
; VI-LABEL: name: fptoui_s16_to_s32_fneg_vv
; VI: liveins: $vgpr0
- ; VI: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; VI: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32768
- ; VI: [[V_XOR_B32_e32_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e32 [[S_MOV_B32_]], [[COPY]], implicit $exec
- ; VI: %4:vgpr_32 = nofpexcept V_CVT_F32_F16_e32 [[V_XOR_B32_e32_]], implicit $mode, implicit $exec
- ; VI: %3:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 %4, implicit $mode, implicit $exec
- ; VI: $vgpr0 = COPY %3
+ ; VI-NEXT: {{ $}}
+ ; VI-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; VI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32768
+ ; VI-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec
+ ; VI-NEXT: %4:vgpr_32 = nofpexcept V_CVT_F32_F16_e32 [[V_XOR_B32_e64_]], implicit $mode, implicit $exec
+ ; VI-NEXT: %3:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 %4, implicit $mode, implicit $exec
+ ; VI-NEXT: $vgpr0 = COPY %3
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s16) = G_TRUNC %0
%2:vgpr(s16) = G_FNEG %1
; GCN-LABEL: name: fptoui_s16_to_s1_vv
; GCN: liveins: $vgpr0
- ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GCN: %4:vgpr_32 = nofpexcept V_CVT_F32_F16_e32 [[COPY]], implicit $mode, implicit $exec
- ; GCN: %2:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 %4, implicit $mode, implicit $exec
- ; GCN: S_ENDPGM 0, implicit %2
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GCN-NEXT: %4:vgpr_32 = nofpexcept V_CVT_F32_F16_e32 [[COPY]], implicit $mode, implicit $exec
+ ; GCN-NEXT: %2:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 %4, implicit $mode, implicit $exec
+ ; GCN-NEXT: S_ENDPGM 0, implicit %2
; VI-LABEL: name: fptoui_s16_to_s1_vv
; VI: liveins: $vgpr0
- ; VI: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; VI: %4:vgpr_32 = nofpexcept V_CVT_F32_F16_e32 [[COPY]], implicit $mode, implicit $exec
- ; VI: %2:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 %4, implicit $mode, implicit $exec
- ; VI: S_ENDPGM 0, implicit %2
+ ; VI-NEXT: {{ $}}
+ ; VI-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; VI-NEXT: %4:vgpr_32 = nofpexcept V_CVT_F32_F16_e32 [[COPY]], implicit $mode, implicit $exec
+ ; VI-NEXT: %2:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 %4, implicit $mode, implicit $exec
+ ; VI-NEXT: S_ENDPGM 0, implicit %2
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s16) = G_TRUNC %0
%2:vgpr(s32) = G_FPTOUI %1
; GCN-LABEL: name: fptoui_s16_to_s1_vs
; GCN: liveins: $sgpr0
- ; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
- ; GCN: %4:vgpr_32 = nofpexcept V_CVT_F32_F16_e32 [[COPY]], implicit $mode, implicit $exec
- ; GCN: %2:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 %4, implicit $mode, implicit $exec
- ; GCN: S_ENDPGM 0, implicit %2
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+ ; GCN-NEXT: %4:vgpr_32 = nofpexcept V_CVT_F32_F16_e32 [[COPY]], implicit $mode, implicit $exec
+ ; GCN-NEXT: %2:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 %4, implicit $mode, implicit $exec
+ ; GCN-NEXT: S_ENDPGM 0, implicit %2
; VI-LABEL: name: fptoui_s16_to_s1_vs
; VI: liveins: $sgpr0
- ; VI: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
- ; VI: %4:vgpr_32 = nofpexcept V_CVT_F32_F16_e32 [[COPY]], implicit $mode, implicit $exec
- ; VI: %2:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 %4, implicit $mode, implicit $exec
- ; VI: S_ENDPGM 0, implicit %2
+ ; VI-NEXT: {{ $}}
+ ; VI-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+ ; VI-NEXT: %4:vgpr_32 = nofpexcept V_CVT_F32_F16_e32 [[COPY]], implicit $mode, implicit $exec
+ ; VI-NEXT: %2:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 %4, implicit $mode, implicit $exec
+ ; VI-NEXT: S_ENDPGM 0, implicit %2
%0:sgpr(s32) = COPY $sgpr0
%1:sgpr(s16) = G_TRUNC %0
%2:vgpr(s32) = G_FPTOUI %1
; GCN-LABEL: name: fptoui_s16_to_s1_fneg_vv
; GCN: liveins: $vgpr0
- ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32768
- ; GCN: [[V_XOR_B32_e32_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e32 [[S_MOV_B32_]], [[COPY]], implicit $exec
- ; GCN: %5:vgpr_32 = nofpexcept V_CVT_F32_F16_e32 [[V_XOR_B32_e32_]], implicit $mode, implicit $exec
- ; GCN: %3:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 %5, implicit $mode, implicit $exec
- ; GCN: S_ENDPGM 0, implicit %3
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GCN-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32768
+ ; GCN-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec
+ ; GCN-NEXT: %5:vgpr_32 = nofpexcept V_CVT_F32_F16_e32 [[V_XOR_B32_e64_]], implicit $mode, implicit $exec
+ ; GCN-NEXT: %3:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 %5, implicit $mode, implicit $exec
+ ; GCN-NEXT: S_ENDPGM 0, implicit %3
; VI-LABEL: name: fptoui_s16_to_s1_fneg_vv
; VI: liveins: $vgpr0
- ; VI: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; VI: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32768
- ; VI: [[V_XOR_B32_e32_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e32 [[S_MOV_B32_]], [[COPY]], implicit $exec
- ; VI: %5:vgpr_32 = nofpexcept V_CVT_F32_F16_e32 [[V_XOR_B32_e32_]], implicit $mode, implicit $exec
- ; VI: %3:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 %5, implicit $mode, implicit $exec
- ; VI: S_ENDPGM 0, implicit %3
+ ; VI-NEXT: {{ $}}
+ ; VI-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; VI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32768
+ ; VI-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec
+ ; VI-NEXT: %5:vgpr_32 = nofpexcept V_CVT_F32_F16_e32 [[V_XOR_B32_e64_]], implicit $mode, implicit $exec
+ ; VI-NEXT: %3:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 %5, implicit $mode, implicit $exec
+ ; VI-NEXT: S_ENDPGM 0, implicit %3
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s16) = G_TRUNC %0
%2:vgpr(s16) = G_FNEG %1
; GCN-LABEL: name: select_s32_scc
; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
- ; GCN: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
- ; GCN: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2
- ; GCN: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3
- ; GCN: S_CMP_EQ_U32 [[COPY]], [[COPY1]], implicit-def $scc
- ; GCN: [[COPY4:%[0-9]+]]:sreg_32 = COPY $scc
- ; GCN: $scc = COPY [[COPY4]]
- ; GCN: [[S_CSELECT_B32_:%[0-9]+]]:sreg_32 = S_CSELECT_B32 [[COPY2]], [[COPY3]], implicit $scc
- ; GCN: S_ENDPGM 0, implicit [[S_CSELECT_B32_]]
+ ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
+ ; GCN-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GCN-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GCN-NEXT: S_CMP_EQ_U32 [[COPY]], [[COPY1]], implicit-def $scc
+ ; GCN-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $scc
+ ; GCN-NEXT: $scc = COPY [[COPY4]]
+ ; GCN-NEXT: [[S_CSELECT_B32_:%[0-9]+]]:sreg_32 = S_CSELECT_B32 [[COPY2]], [[COPY3]], implicit $scc
+ ; GCN-NEXT: S_ENDPGM 0, implicit [[S_CSELECT_B32_]]
%0:sgpr(s32) = COPY $sgpr0
%1:sgpr(s32) = COPY $sgpr1
%2:sgpr(s32) = COPY $sgpr2
; GCN-LABEL: name: select_s64_scc
; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
- ; GCN: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
- ; GCN: [[COPY2:%[0-9]+]]:sreg_64 = COPY $sgpr2_sgpr3
- ; GCN: [[COPY3:%[0-9]+]]:sreg_64 = COPY $sgpr4_sgpr5
- ; GCN: S_CMP_EQ_U32 [[COPY]], [[COPY1]], implicit-def $scc
- ; GCN: [[COPY4:%[0-9]+]]:sreg_32 = COPY $scc
- ; GCN: $scc = COPY [[COPY4]]
- ; GCN: [[S_CSELECT_B64_:%[0-9]+]]:sreg_64 = S_CSELECT_B64 [[COPY2]], [[COPY3]], implicit $scc
- ; GCN: S_ENDPGM 0, implicit [[S_CSELECT_B64_]]
+ ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
+ ; GCN-NEXT: [[COPY2:%[0-9]+]]:sreg_64 = COPY $sgpr2_sgpr3
+ ; GCN-NEXT: [[COPY3:%[0-9]+]]:sreg_64 = COPY $sgpr4_sgpr5
+ ; GCN-NEXT: S_CMP_EQ_U32 [[COPY]], [[COPY1]], implicit-def $scc
+ ; GCN-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $scc
+ ; GCN-NEXT: $scc = COPY [[COPY4]]
+ ; GCN-NEXT: [[S_CSELECT_B64_:%[0-9]+]]:sreg_64 = S_CSELECT_B64 [[COPY2]], [[COPY3]], implicit $scc
+ ; GCN-NEXT: S_ENDPGM 0, implicit [[S_CSELECT_B64_]]
%0:sgpr(s32) = COPY $sgpr0
%1:sgpr(s32) = COPY $sgpr1
%2:sgpr(s64) = COPY $sgpr2_sgpr3
; GCN-LABEL: name: select_p0_scc
; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
- ; GCN: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
- ; GCN: [[COPY2:%[0-9]+]]:sreg_64 = COPY $sgpr2_sgpr3
- ; GCN: [[COPY3:%[0-9]+]]:sreg_64 = COPY $sgpr4_sgpr5
- ; GCN: S_CMP_EQ_U32 [[COPY]], [[COPY1]], implicit-def $scc
- ; GCN: [[COPY4:%[0-9]+]]:sreg_32 = COPY $scc
- ; GCN: $scc = COPY [[COPY4]]
- ; GCN: [[S_CSELECT_B64_:%[0-9]+]]:sreg_64 = S_CSELECT_B64 [[COPY2]], [[COPY3]], implicit $scc
- ; GCN: S_ENDPGM 0, implicit [[S_CSELECT_B64_]]
+ ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
+ ; GCN-NEXT: [[COPY2:%[0-9]+]]:sreg_64 = COPY $sgpr2_sgpr3
+ ; GCN-NEXT: [[COPY3:%[0-9]+]]:sreg_64 = COPY $sgpr4_sgpr5
+ ; GCN-NEXT: S_CMP_EQ_U32 [[COPY]], [[COPY1]], implicit-def $scc
+ ; GCN-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $scc
+ ; GCN-NEXT: $scc = COPY [[COPY4]]
+ ; GCN-NEXT: [[S_CSELECT_B64_:%[0-9]+]]:sreg_64 = S_CSELECT_B64 [[COPY2]], [[COPY3]], implicit $scc
+ ; GCN-NEXT: S_ENDPGM 0, implicit [[S_CSELECT_B64_]]
%0:sgpr(s32) = COPY $sgpr0
%1:sgpr(s32) = COPY $sgpr1
%2:sgpr(p0) = COPY $sgpr2_sgpr3
; GCN-LABEL: name: select_p1_scc
; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
- ; GCN: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
- ; GCN: [[COPY2:%[0-9]+]]:sreg_64 = COPY $sgpr2_sgpr3
- ; GCN: [[COPY3:%[0-9]+]]:sreg_64 = COPY $sgpr4_sgpr5
- ; GCN: S_CMP_EQ_U32 [[COPY]], [[COPY1]], implicit-def $scc
- ; GCN: [[COPY4:%[0-9]+]]:sreg_32 = COPY $scc
- ; GCN: $scc = COPY [[COPY4]]
- ; GCN: [[S_CSELECT_B64_:%[0-9]+]]:sreg_64 = S_CSELECT_B64 [[COPY2]], [[COPY3]], implicit $scc
- ; GCN: S_ENDPGM 0, implicit [[S_CSELECT_B64_]]
+ ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
+ ; GCN-NEXT: [[COPY2:%[0-9]+]]:sreg_64 = COPY $sgpr2_sgpr3
+ ; GCN-NEXT: [[COPY3:%[0-9]+]]:sreg_64 = COPY $sgpr4_sgpr5
+ ; GCN-NEXT: S_CMP_EQ_U32 [[COPY]], [[COPY1]], implicit-def $scc
+ ; GCN-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $scc
+ ; GCN-NEXT: $scc = COPY [[COPY4]]
+ ; GCN-NEXT: [[S_CSELECT_B64_:%[0-9]+]]:sreg_64 = S_CSELECT_B64 [[COPY2]], [[COPY3]], implicit $scc
+ ; GCN-NEXT: S_ENDPGM 0, implicit [[S_CSELECT_B64_]]
%0:sgpr(s32) = COPY $sgpr0
%1:sgpr(s32) = COPY $sgpr1
%2:sgpr(p1) = COPY $sgpr2_sgpr3
; GCN-LABEL: name: select_p999_scc
; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
- ; GCN: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
- ; GCN: [[COPY2:%[0-9]+]]:sreg_64 = COPY $sgpr2_sgpr3
- ; GCN: [[COPY3:%[0-9]+]]:sreg_64 = COPY $sgpr4_sgpr5
- ; GCN: S_CMP_EQ_U32 [[COPY]], [[COPY1]], implicit-def $scc
- ; GCN: [[COPY4:%[0-9]+]]:sreg_32 = COPY $scc
- ; GCN: $scc = COPY [[COPY4]]
- ; GCN: [[S_CSELECT_B64_:%[0-9]+]]:sreg_64 = S_CSELECT_B64 [[COPY2]], [[COPY3]], implicit $scc
- ; GCN: S_ENDPGM 0, implicit [[S_CSELECT_B64_]]
+ ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
+ ; GCN-NEXT: [[COPY2:%[0-9]+]]:sreg_64 = COPY $sgpr2_sgpr3
+ ; GCN-NEXT: [[COPY3:%[0-9]+]]:sreg_64 = COPY $sgpr4_sgpr5
+ ; GCN-NEXT: S_CMP_EQ_U32 [[COPY]], [[COPY1]], implicit-def $scc
+ ; GCN-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $scc
+ ; GCN-NEXT: $scc = COPY [[COPY4]]
+ ; GCN-NEXT: [[S_CSELECT_B64_:%[0-9]+]]:sreg_64 = S_CSELECT_B64 [[COPY2]], [[COPY3]], implicit $scc
+ ; GCN-NEXT: S_ENDPGM 0, implicit [[S_CSELECT_B64_]]
%0:sgpr(s32) = COPY $sgpr0
%1:sgpr(s32) = COPY $sgpr1
%2:sgpr(p999) = COPY $sgpr2_sgpr3
; GCN-LABEL: name: select_v4s16_scc
; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
- ; GCN: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
- ; GCN: [[COPY2:%[0-9]+]]:sreg_64 = COPY $sgpr2_sgpr3
- ; GCN: [[COPY3:%[0-9]+]]:sreg_64 = COPY $sgpr4_sgpr5
- ; GCN: S_CMP_EQ_U32 [[COPY]], [[COPY1]], implicit-def $scc
- ; GCN: [[COPY4:%[0-9]+]]:sreg_32 = COPY $scc
- ; GCN: $scc = COPY [[COPY4]]
- ; GCN: [[S_CSELECT_B64_:%[0-9]+]]:sreg_64 = S_CSELECT_B64 [[COPY2]], [[COPY3]], implicit $scc
- ; GCN: S_ENDPGM 0, implicit [[S_CSELECT_B64_]]
+ ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
+ ; GCN-NEXT: [[COPY2:%[0-9]+]]:sreg_64 = COPY $sgpr2_sgpr3
+ ; GCN-NEXT: [[COPY3:%[0-9]+]]:sreg_64 = COPY $sgpr4_sgpr5
+ ; GCN-NEXT: S_CMP_EQ_U32 [[COPY]], [[COPY1]], implicit-def $scc
+ ; GCN-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $scc
+ ; GCN-NEXT: $scc = COPY [[COPY4]]
+ ; GCN-NEXT: [[S_CSELECT_B64_:%[0-9]+]]:sreg_64 = S_CSELECT_B64 [[COPY2]], [[COPY3]], implicit $scc
+ ; GCN-NEXT: S_ENDPGM 0, implicit [[S_CSELECT_B64_]]
%0:sgpr(s32) = COPY $sgpr0
%1:sgpr(s32) = COPY $sgpr1
%2:sgpr(<4 x s16>) = COPY $sgpr2_sgpr3
; GCN-LABEL: name: select_s16_scc
; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
- ; GCN: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
- ; GCN: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2
- ; GCN: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3
- ; GCN: S_CMP_EQ_U32 [[COPY2]], [[COPY3]], implicit-def $scc
- ; GCN: [[COPY4:%[0-9]+]]:sreg_32 = COPY $scc
- ; GCN: $scc = COPY [[COPY4]]
- ; GCN: [[S_CSELECT_B32_:%[0-9]+]]:sreg_32 = S_CSELECT_B32 [[COPY]], [[COPY1]], implicit $scc
- ; GCN: S_ENDPGM 0, implicit [[S_CSELECT_B32_]]
+ ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
+ ; GCN-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GCN-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GCN-NEXT: S_CMP_EQ_U32 [[COPY2]], [[COPY3]], implicit-def $scc
+ ; GCN-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $scc
+ ; GCN-NEXT: $scc = COPY [[COPY4]]
+ ; GCN-NEXT: [[S_CSELECT_B32_:%[0-9]+]]:sreg_32 = S_CSELECT_B32 [[COPY]], [[COPY1]], implicit $scc
+ ; GCN-NEXT: S_ENDPGM 0, implicit [[S_CSELECT_B32_]]
%0:sgpr(s32) = COPY $sgpr0
%1:sgpr(s32) = COPY $sgpr1
%2:sgpr(s32) = COPY $sgpr2
; GCN-LABEL: name: select_v2s16_scc
; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
- ; GCN: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
- ; GCN: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2
- ; GCN: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3
- ; GCN: S_CMP_EQ_U32 [[COPY]], [[COPY1]], implicit-def $scc
- ; GCN: [[COPY4:%[0-9]+]]:sreg_32 = COPY $scc
- ; GCN: $scc = COPY [[COPY4]]
- ; GCN: [[S_CSELECT_B32_:%[0-9]+]]:sreg_32 = S_CSELECT_B32 [[COPY2]], [[COPY3]], implicit $scc
- ; GCN: S_ENDPGM 0, implicit [[S_CSELECT_B32_]]
+ ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
+ ; GCN-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GCN-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GCN-NEXT: S_CMP_EQ_U32 [[COPY]], [[COPY1]], implicit-def $scc
+ ; GCN-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $scc
+ ; GCN-NEXT: $scc = COPY [[COPY4]]
+ ; GCN-NEXT: [[S_CSELECT_B32_:%[0-9]+]]:sreg_32 = S_CSELECT_B32 [[COPY2]], [[COPY3]], implicit $scc
+ ; GCN-NEXT: S_ENDPGM 0, implicit [[S_CSELECT_B32_]]
%0:sgpr(s32) = COPY $sgpr0
%1:sgpr(s32) = COPY $sgpr1
%2:sgpr(<2 x s16>) = COPY $sgpr2
; GCN-LABEL: name: select_s32_vcc
; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; GCN: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
- ; GCN: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
- ; GCN: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[COPY]], [[COPY1]], implicit $exec
- ; GCN: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY3]], 0, [[COPY2]], [[V_CMP_EQ_U32_e64_]], implicit $exec
- ; GCN: S_ENDPGM 0, implicit [[V_CNDMASK_B32_e64_]]
+ ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+ ; GCN-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
+ ; GCN-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[COPY]], [[COPY1]], implicit $exec
+ ; GCN-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY3]], 0, [[COPY2]], [[V_CMP_EQ_U32_e64_]], implicit $exec
+ ; GCN-NEXT: S_ENDPGM 0, implicit [[V_CNDMASK_B32_e64_]]
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = COPY $vgpr1
%2:vgpr(s32) = COPY $vgpr2
; GCN-LABEL: name: select_s16_vcc
; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; GCN: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
- ; GCN: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
- ; GCN: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[COPY2]], [[COPY3]], implicit $exec
- ; GCN: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY1]], 0, [[COPY]], [[V_CMP_EQ_U32_e64_]], implicit $exec
- ; GCN: S_ENDPGM 0, implicit [[V_CNDMASK_B32_e64_]]
+ ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+ ; GCN-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
+ ; GCN-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[COPY2]], [[COPY3]], implicit $exec
+ ; GCN-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY1]], 0, [[COPY]], [[V_CMP_EQ_U32_e64_]], implicit $exec
+ ; GCN-NEXT: S_ENDPGM 0, implicit [[V_CNDMASK_B32_e64_]]
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = COPY $vgpr1
%2:vgpr(s32) = COPY $vgpr2
; GCN-LABEL: name: select_v2s16_vcc
; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; GCN: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
- ; GCN: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
- ; GCN: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[COPY]], [[COPY1]], implicit $exec
- ; GCN: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY3]], 0, [[COPY2]], [[V_CMP_EQ_U32_e64_]], implicit $exec
- ; GCN: S_ENDPGM 0, implicit [[V_CNDMASK_B32_e64_]]
+ ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+ ; GCN-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
+ ; GCN-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[COPY]], [[COPY1]], implicit $exec
+ ; GCN-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY3]], 0, [[COPY2]], [[V_CMP_EQ_U32_e64_]], implicit $exec
+ ; GCN-NEXT: S_ENDPGM 0, implicit [[V_CNDMASK_B32_e64_]]
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = COPY $vgpr1
%2:vgpr(<2 x s16>) = COPY $vgpr2
; GCN-LABEL: name: select_p3_vcc
; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; GCN: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
- ; GCN: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
- ; GCN: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[COPY]], [[COPY1]], implicit $exec
- ; GCN: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY3]], 0, [[COPY2]], [[V_CMP_EQ_U32_e64_]], implicit $exec
- ; GCN: S_ENDPGM 0, implicit [[V_CNDMASK_B32_e64_]]
+ ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+ ; GCN-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
+ ; GCN-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[COPY]], [[COPY1]], implicit $exec
+ ; GCN-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY3]], 0, [[COPY2]], [[V_CMP_EQ_U32_e64_]], implicit $exec
+ ; GCN-NEXT: S_ENDPGM 0, implicit [[V_CNDMASK_B32_e64_]]
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = COPY $vgpr1
%2:vgpr(p3) = COPY $vgpr2
; GCN-LABEL: name: select_s32_vcc_fneg_lhs
; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; GCN: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
- ; GCN: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
- ; GCN: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[COPY]], [[COPY1]], implicit $exec
- ; GCN: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY3]], 1, [[COPY2]], [[V_CMP_EQ_U32_e64_]], implicit $exec
- ; GCN: S_ENDPGM 0, implicit [[V_CNDMASK_B32_e64_]]
+ ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+ ; GCN-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
+ ; GCN-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[COPY]], [[COPY1]], implicit $exec
+ ; GCN-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY3]], 1, [[COPY2]], [[V_CMP_EQ_U32_e64_]], implicit $exec
+ ; GCN-NEXT: S_ENDPGM 0, implicit [[V_CNDMASK_B32_e64_]]
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = COPY $vgpr1
%2:vgpr(s32) = COPY $vgpr2
; GCN-LABEL: name: select_s32_vcc_fneg_rhs
; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; GCN: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
- ; GCN: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
- ; GCN: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[COPY]], [[COPY1]], implicit $exec
- ; GCN: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 1, [[COPY3]], 0, [[COPY2]], [[V_CMP_EQ_U32_e64_]], implicit $exec
- ; GCN: S_ENDPGM 0, implicit [[V_CNDMASK_B32_e64_]]
+ ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+ ; GCN-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
+ ; GCN-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[COPY]], [[COPY1]], implicit $exec
+ ; GCN-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 1, [[COPY3]], 0, [[COPY2]], [[V_CMP_EQ_U32_e64_]], implicit $exec
+ ; GCN-NEXT: S_ENDPGM 0, implicit [[V_CNDMASK_B32_e64_]]
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = COPY $vgpr1
%2:vgpr(s32) = COPY $vgpr2
; GCN-LABEL: name: select_s32_vcc_fneg_fabs_lhs
; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; GCN: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
- ; GCN: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
- ; GCN: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[COPY]], [[COPY1]], implicit $exec
- ; GCN: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY2]], 3, [[COPY3]], [[V_CMP_EQ_U32_e64_]], implicit $exec
- ; GCN: S_ENDPGM 0, implicit [[V_CNDMASK_B32_e64_]]
+ ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+ ; GCN-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
+ ; GCN-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[COPY]], [[COPY1]], implicit $exec
+ ; GCN-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY2]], 3, [[COPY3]], [[V_CMP_EQ_U32_e64_]], implicit $exec
+ ; GCN-NEXT: S_ENDPGM 0, implicit [[V_CNDMASK_B32_e64_]]
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = COPY $vgpr1
%2:vgpr(s32) = COPY $vgpr2
; GCN-LABEL: name: select_s16_vcc_fneg_lhs
; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; GCN: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
- ; GCN: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
- ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32768
- ; GCN: [[V_XOR_B32_e32_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e32 [[S_MOV_B32_]], [[COPY]], implicit $exec
- ; GCN: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[COPY2]], [[COPY3]], implicit $exec
- ; GCN: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY1]], 0, [[V_XOR_B32_e32_]], [[V_CMP_EQ_U32_e64_]], implicit $exec
- ; GCN: S_ENDPGM 0, implicit [[V_CNDMASK_B32_e64_]]
+ ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+ ; GCN-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
+ ; GCN-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32768
+ ; GCN-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec
+ ; GCN-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[COPY2]], [[COPY3]], implicit $exec
+ ; GCN-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY1]], 0, [[V_XOR_B32_e64_]], [[V_CMP_EQ_U32_e64_]], implicit $exec
+ ; GCN-NEXT: S_ENDPGM 0, implicit [[V_CNDMASK_B32_e64_]]
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = COPY $vgpr1
%2:vgpr(s32) = COPY $vgpr2
; GCN-LABEL: name: select_v2s16_vcc_fneg_lhs
; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; GCN: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3
- ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147516416
- ; GCN: [[V_XOR_B32_e32_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e32 [[S_MOV_B32_]], [[COPY2]], implicit $exec
- ; GCN: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[COPY]], [[COPY1]], implicit $exec
- ; GCN: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY2]], 0, [[V_XOR_B32_e32_]], [[V_CMP_EQ_U32_e64_]], implicit $exec
- ; GCN: S_ENDPGM 0, implicit [[V_CNDMASK_B32_e64_]]
+ ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3
+ ; GCN-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147516416
+ ; GCN-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY2]], implicit $exec
+ ; GCN-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[COPY]], [[COPY1]], implicit $exec
+ ; GCN-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY2]], 0, [[V_XOR_B32_e64_]], [[V_CMP_EQ_U32_e64_]], implicit $exec
+ ; GCN-NEXT: S_ENDPGM 0, implicit [[V_CNDMASK_B32_e64_]]
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = COPY $vgpr1
%2:vgpr(<2 x s16>) = COPY $vgpr2
; GCN-LABEL: name: select_s32_scc_fneg_lhs
; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
- ; GCN: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
- ; GCN: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2
- ; GCN: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3
- ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648
- ; GCN: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[COPY2]], [[S_MOV_B32_]], implicit-def $scc
- ; GCN: S_CMP_EQ_U32 [[COPY]], [[COPY1]], implicit-def $scc
- ; GCN: [[COPY4:%[0-9]+]]:sreg_32 = COPY $scc
- ; GCN: $scc = COPY [[COPY4]]
- ; GCN: [[S_CSELECT_B32_:%[0-9]+]]:sreg_32 = S_CSELECT_B32 [[S_XOR_B32_]], [[COPY3]], implicit $scc
- ; GCN: S_ENDPGM 0, implicit [[S_CSELECT_B32_]]
+ ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
+ ; GCN-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GCN-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GCN-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648
+ ; GCN-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[COPY2]], [[S_MOV_B32_]], implicit-def $scc
+ ; GCN-NEXT: S_CMP_EQ_U32 [[COPY]], [[COPY1]], implicit-def $scc
+ ; GCN-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $scc
+ ; GCN-NEXT: $scc = COPY [[COPY4]]
+ ; GCN-NEXT: [[S_CSELECT_B32_:%[0-9]+]]:sreg_32 = S_CSELECT_B32 [[S_XOR_B32_]], [[COPY3]], implicit $scc
+ ; GCN-NEXT: S_ENDPGM 0, implicit [[S_CSELECT_B32_]]
%0:sgpr(s32) = COPY $sgpr0
%1:sgpr(s32) = COPY $sgpr1
%2:sgpr(s32) = COPY $sgpr2
; GCN-LABEL: name: select_s32_scc_fneg_rhs
; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
- ; GCN: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
- ; GCN: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2
- ; GCN: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3
- ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648
- ; GCN: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[COPY3]], [[S_MOV_B32_]], implicit-def $scc
- ; GCN: S_CMP_EQ_U32 [[COPY]], [[COPY1]], implicit-def $scc
- ; GCN: [[COPY4:%[0-9]+]]:sreg_32 = COPY $scc
- ; GCN: $scc = COPY [[COPY4]]
- ; GCN: [[S_CSELECT_B32_:%[0-9]+]]:sreg_32 = S_CSELECT_B32 [[COPY2]], [[S_XOR_B32_]], implicit $scc
- ; GCN: S_ENDPGM 0, implicit [[S_CSELECT_B32_]]
+ ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
+ ; GCN-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2
+ ; GCN-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3
+ ; GCN-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648
+ ; GCN-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[COPY3]], [[S_MOV_B32_]], implicit-def $scc
+ ; GCN-NEXT: S_CMP_EQ_U32 [[COPY]], [[COPY1]], implicit-def $scc
+ ; GCN-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $scc
+ ; GCN-NEXT: $scc = COPY [[COPY4]]
+ ; GCN-NEXT: [[S_CSELECT_B32_:%[0-9]+]]:sreg_32 = S_CSELECT_B32 [[COPY2]], [[S_XOR_B32_]], implicit $scc
+ ; GCN-NEXT: S_ENDPGM 0, implicit [[S_CSELECT_B32_]]
%0:sgpr(s32) = COPY $sgpr0
%1:sgpr(s32) = COPY $sgpr1
%2:sgpr(s32) = COPY $sgpr2
}
; FUNC-LABEL: {{^}}fabs_f64:
-; SI: v_and_b32
-; SI-NOT: v_and_b32
+; SI: s_bitset0_b32
; SI: s_endpgm
define amdgpu_kernel void @fabs_f64(double addrspace(1)* %out, double %in) {
%fabs = call double @llvm.fabs.f64(double %in)
}
; FUNC-LABEL: {{^}}fabs_v2f64:
-; SI: v_and_b32
-; SI: v_and_b32
+; SI: s_and_b32
+; SI: s_and_b32
; SI: s_endpgm
define amdgpu_kernel void @fabs_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %in) {
%fabs = call <2 x double> @llvm.fabs.v2f64(<2 x double> %in)
}
; FUNC-LABEL: {{^}}fabs_v4f64:
-; SI: v_and_b32
-; SI: v_and_b32
-; SI: v_and_b32
-; SI: v_and_b32
+; SI: s_and_b32
+; SI: s_and_b32
+; SI: s_and_b32
+; SI: s_and_b32
; SI: s_endpgm
define amdgpu_kernel void @fabs_v4f64(<4 x double> addrspace(1)* %out, <4 x double> %in) {
%fabs = call <4 x double> @llvm.fabs.v4f64(<4 x double> %in)
}
; FUNC-LABEL: {{^}}fabs_free_f64:
-; SI: v_and_b32
+; SI: s_bitset0_b32
; SI: s_endpgm
define amdgpu_kernel void @fabs_free_f64(double addrspace(1)* %out, i64 %in) {
%bc= bitcast i64 %in to double
}
; FUNC-LABEL: {{^}}fabs_fn_free_f64:
-; SI: v_and_b32
+; SI: s_bitset0_b32
; SI: s_endpgm
define amdgpu_kernel void @fabs_fn_free_f64(double addrspace(1)* %out, i64 %in) {
%bc= bitcast i64 %in to double
--- /dev/null
+; RUN: llc -march=amdgcn -stop-after=amdgpu-isel < %s | FileCheck -check-prefixes=GCN,SI %s
+; RUN: llc -march=amdgcn -mcpu=gfx900 -stop-after=amdgpu-isel < %s | FileCheck -check-prefixes=GCN,FP16 %s
+
+
+define amdgpu_kernel void @divergent_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %in) {
+; GCN-LABEL: name: divergent_fneg_f32
+; GCN-LABEL: bb.0 (%ir-block.0)
+; GCN: %[[REG:[0-9]+]]:sreg_32 = S_MOV_B32 -2147483648
+; GCN: V_XOR_B32_e64 killed %[[REG]]
+
+ %tid = call i32 @llvm.amdgcn.workitem.id.x()
+ %tid.ext = sext i32 %tid to i64
+ %in.gep = getelementptr inbounds float, float addrspace(1)* %in, i64 %tid.ext
+ %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
+ %val = load volatile float, float addrspace(1)* %in.gep
+ %fneg = fneg float %val
+ store float %fneg, float addrspace(1)* %out.gep
+ ret void
+}
+
+define amdgpu_kernel void @uniform_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %in, i64 %idx) {
+; GCN-LABEL: name: uniform_fneg_f32
+; GCN-LABEL: bb.0 (%ir-block.0)
+; GCN: %[[REG:[0-9]+]]:sreg_32 = S_MOV_B32 -2147483648
+; GCN: S_XOR_B32 killed %{{[0-9]+}}, killed %[[REG]]
+
+ %in.gep = getelementptr inbounds float, float addrspace(1)* %in, i64 %idx
+ %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %idx
+ %val = load volatile float, float addrspace(1)* %in.gep
+ %fneg = fneg float %val
+ store float %fneg, float addrspace(1)* %out.gep
+ ret void
+}
+
+define amdgpu_kernel void @divergent_fabs_f32(float addrspace(1)* %out, float addrspace(1)* %in) {
+; GCN-LABEL: name: divergent_fabs_f32
+; GCN-LABEL: bb.0 (%ir-block.0)
+; GCN: %[[REG:[0-9]+]]:sreg_32 = S_MOV_B32 2147483647
+; GCN: V_AND_B32_e64 killed %[[REG]]
+
+ %tid = call i32 @llvm.amdgcn.workitem.id.x()
+ %tid.ext = sext i32 %tid to i64
+ %in.gep = getelementptr inbounds float, float addrspace(1)* %in, i64 %tid.ext
+ %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
+ %val = load volatile float, float addrspace(1)* %in.gep
+ %fabs = call float @llvm.fabs.f32(float %val)
+ store float %fabs, float addrspace(1)* %out.gep
+ ret void
+}
+
+define amdgpu_kernel void @uniform_fabs_f32(float addrspace(1)* %out, float addrspace(1)* %in, i64 %idx) {
+; GCN-LABEL: name: uniform_fabs_f32
+; GCN-LABEL: bb.0 (%ir-block.0)
+; GCN: %[[REG:[0-9]+]]:sreg_32 = S_MOV_B32 2147483647
+; GCN: S_AND_B32 killed %{{[0-9]+}}, killed %[[REG]]
+
+ %in.gep = getelementptr inbounds float, float addrspace(1)* %in, i64 %idx
+ %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %idx
+ %val = load volatile float, float addrspace(1)* %in.gep
+ %fabs = call float @llvm.fabs.f32(float %val)
+ store float %fabs, float addrspace(1)* %out.gep
+ ret void
+}
+
+define amdgpu_kernel void @divergent_fneg_fabs_f32(float addrspace(1)* %out, float addrspace(1)* %in) {
+; GCN-LABEL: name: divergent_fneg_fabs_f32
+; GCN-LABEL: bb.0 (%ir-block.0)
+; GCN: %[[REG:[0-9]+]]:sreg_32 = S_MOV_B32 -2147483648
+; GCN: V_OR_B32_e64 killed %[[REG]]
+
+ %tid = call i32 @llvm.amdgcn.workitem.id.x()
+ %tid.ext = sext i32 %tid to i64
+ %in.gep = getelementptr inbounds float, float addrspace(1)* %in, i64 %tid.ext
+ %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
+ %val = load volatile float, float addrspace(1)* %in.gep
+ %fabs = call float @llvm.fabs.f32(float %val)
+ %fneg = fneg float %fabs
+ store float %fneg, float addrspace(1)* %out.gep
+ ret void
+}
+
+define amdgpu_kernel void @uniform_fneg_fabs_f32(float addrspace(1)* %out, float addrspace(1)* %in, i64 %idx) {
+; GCN-LABEL: name: uniform_fneg_fabs_f32
+; GCN-LABEL: bb.0 (%ir-block.0)
+; GCN: %[[REG:[0-9]+]]:sreg_32 = S_MOV_B32 -2147483648
+; GCN: S_OR_B32 killed %{{[0-9]+}}, killed %[[REG]]
+
+ %in.gep = getelementptr inbounds float, float addrspace(1)* %in, i64 %idx
+ %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %idx
+ %val = load volatile float, float addrspace(1)* %in.gep
+ %fabs = call float @llvm.fabs.f32(float %val)
+ %fneg = fneg float %fabs
+ store float %fneg, float addrspace(1)* %out.gep
+ ret void
+}
+
+
+define amdgpu_kernel void @divergent_fabs_f16(half addrspace(1)* %in, half addrspace(1)* %out) {
+; GCN-LABEL: name: divergent_fabs_f16
+; GCN-LABEL: bb.0 (%ir-block.0)
+; FP16: %[[REG:[0-9]+]]:sreg_32 = S_MOV_B32 32767
+; FP16: V_AND_B32_e64 killed %[[REG]]
+
+ %tid = call i32 @llvm.amdgcn.workitem.id.x()
+ %tid.ext = sext i32 %tid to i64
+ %in.gep = getelementptr inbounds half, half addrspace(1)* %in, i64 %tid.ext
+ %val = load volatile half, half addrspace(1)* %in.gep
+ %fabs = call half @llvm.fabs.f16(half %val)
+ store half %fabs, half addrspace(1)* %out
+ ret void
+}
+
+define amdgpu_kernel void @uniform_fabs_f16(half addrspace(1)* %in, half addrspace(1)* %out, i64 %idx) {
+; GCN-LABEL: name: uniform_fabs_f16
+; GCN-LABEL: bb.0 (%ir-block.0)
+; GCN: %[[REG:[0-9]+]]:sreg_32 = S_MOV_B32 32767
+; GCN: S_AND_B32 killed %{{[0-9]+}}, killed %[[REG]]
+
+ %in.gep = getelementptr inbounds half, half addrspace(1)* %in, i64 %idx
+ %val = load volatile half, half addrspace(1)* %in.gep
+ %fabs = call half @llvm.fabs.f16(half %val)
+ store half %fabs, half addrspace(1)* %out
+ ret void
+}
+
+define amdgpu_kernel void @divergent_fneg_f16(half addrspace(1)* %in, half addrspace(1)* %out) {
+; GCN-LABEL: name: divergent_fneg_f16
+; GCN-LABEL: bb.0 (%ir-block.0)
+; FP16: %[[REG:[0-9]+]]:sreg_32 = S_MOV_B32 32768
+; FP16: V_XOR_B32_e64 killed %[[REG]]
+
+ %tid = call i32 @llvm.amdgcn.workitem.id.x()
+ %tid.ext = sext i32 %tid to i64
+ %in.gep = getelementptr inbounds half, half addrspace(1)* %in, i64 %tid.ext
+ %val = load volatile half, half addrspace(1)* %in.gep
+ %fneg = fneg half %val
+ store half %fneg, half addrspace(1)* %out
+ ret void
+}
+
+define amdgpu_kernel void @uniform_fneg_f16(half addrspace(1)* %in, half addrspace(1)* %out, i64 %idx) {
+; GCN-LABEL: name: uniform_fneg_f16
+; GCN-LABEL: bb.0 (%ir-block.0)
+; GCN: %[[REG:[0-9]+]]:sreg_32 = S_MOV_B32 32768
+; GCN: S_XOR_B32 killed %{{[0-9]+}}, killed %[[REG]]
+
+ %in.gep = getelementptr inbounds half, half addrspace(1)* %in, i64 %idx
+ %val = load volatile half, half addrspace(1)* %in.gep
+ %fneg = fneg half %val
+ store half %fneg, half addrspace(1)* %out
+ ret void
+}
+
+define amdgpu_kernel void @divergent_fneg_fabs_f16(half addrspace(1)* %in, half addrspace(1)* %out) {
+; GCN-LABEL: name: divergent_fneg_fabs_f16
+; GCN-LABEL: bb.0 (%ir-block.0)
+; FP16: %[[REG:[0-9]+]]:sreg_32 = S_MOV_B32 32768
+; FP16: V_OR_B32_e64 killed %[[REG]]
+
+ %tid = call i32 @llvm.amdgcn.workitem.id.x()
+ %tid.ext = sext i32 %tid to i64
+ %in.gep = getelementptr inbounds half, half addrspace(1)* %in, i64 %tid.ext
+ %val = load volatile half, half addrspace(1)* %in.gep
+ %fabs = call half @llvm.fabs.f16(half %val)
+ %fneg = fneg half %fabs
+ store half %fneg, half addrspace(1)* %out
+ ret void
+}
+
+define amdgpu_kernel void @uniform_fneg_fabs_f16(half addrspace(1)* %in, half addrspace(1)* %out, i64 %idx) {
+; GCN-LABEL: name: uniform_fneg_fabs_f16
+; GCN-LABEL: bb.0 (%ir-block.0)
+; GCN: %[[REG:[0-9]+]]:sreg_32 = S_MOV_B32 32768
+; GCN: S_OR_B32 killed %{{[0-9]+}}, killed %[[REG]]
+
+ %in.gep = getelementptr inbounds half, half addrspace(1)* %in, i64 %idx
+ %val = load volatile half, half addrspace(1)* %in.gep
+ %fabs = call half @llvm.fabs.f16(half %val)
+ %fneg = fneg half %fabs
+ store half %fneg, half addrspace(1)* %out
+ ret void
+}
+
+define amdgpu_kernel void @divergent_fneg_v2f16(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %in) {
+; GCN-LABEL: name: divergent_fneg_v2f16
+; GCN-LABEL: bb.0 (%ir-block.0)
+; FP16: %[[REG:[0-9]+]]:sreg_32 = S_MOV_B32 -2147450880
+; FP16: V_XOR_B32_e64 killed %[[REG]]
+
+ %tid = call i32 @llvm.amdgcn.workitem.id.x()
+ %gep.in = getelementptr inbounds <2 x half>, <2 x half> addrspace(1)* %in, i32 %tid
+ %gep.out = getelementptr inbounds <2 x half>, <2 x half> addrspace(1)* %in, i32 %tid
+ %val = load <2 x half>, <2 x half> addrspace(1)* %gep.in, align 2
+ %fneg = fneg <2 x half> %val
+ store <2 x half> %fneg, <2 x half> addrspace(1)* %gep.out
+ ret void
+}
+
+define amdgpu_kernel void @uniform_fneg_v2f16(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %in, i32 %idx) {
+; GCN-LABEL: name: uniform_fneg_v2f16
+; GCN-LABEL: bb.0 (%ir-block.0)
+; GCN: %[[REG:[0-9]+]]:sreg_32 = S_MOV_B32 -2147450880
+; GCN: S_XOR_B32 killed %{{[0-9]+}}, killed %[[REG]]
+
+ %gep.in = getelementptr inbounds <2 x half>, <2 x half> addrspace(1)* %in, i32 %idx
+ %gep.out = getelementptr inbounds <2 x half>, <2 x half> addrspace(1)* %in, i32 %idx
+ %val = load <2 x half>, <2 x half> addrspace(1)* %gep.in, align 2
+ %fneg = fneg <2 x half> %val
+ store <2 x half> %fneg, <2 x half> addrspace(1)* %gep.out
+ ret void
+}
+
+define amdgpu_kernel void @divergent_fabs_v2f16(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %in) {
+; GCN-LABEL: name: divergent_fabs_v2f16
+; GCN-LABEL: bb.0 (%ir-block.0)
+; FP16: %[[REG:[0-9]+]]:sreg_32 = S_MOV_B32 2147450879
+; FP16: V_AND_B32_e64 killed %[[REG]]
+
+ %tid = call i32 @llvm.amdgcn.workitem.id.x()
+ %gep.in = getelementptr inbounds <2 x half>, <2 x half> addrspace(1)* %in, i32 %tid
+ %gep.out = getelementptr inbounds <2 x half>, <2 x half> addrspace(1)* %in, i32 %tid
+ %val = load <2 x half>, <2 x half> addrspace(1)* %gep.in, align 2
+ %fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %val)
+ store <2 x half> %fabs, <2 x half> addrspace(1)* %gep.out
+ ret void
+}
+
+define amdgpu_kernel void @uniform_fabs_v2f16(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %in, i32 %idx) {
+; GCN-LABEL: name: uniform_fabs_v2f16
+; GCN-LABEL: bb.0 (%ir-block.0)
+; GCN: %[[REG:[0-9]+]]:sreg_32 = S_MOV_B32 2147450879
+; GCN: S_AND_B32 killed %{{[0-9]+}}, killed %[[REG]]
+
+ %gep.in = getelementptr inbounds <2 x half>, <2 x half> addrspace(1)* %in, i32 %idx
+ %gep.out = getelementptr inbounds <2 x half>, <2 x half> addrspace(1)* %in, i32 %idx
+ %val = load <2 x half>, <2 x half> addrspace(1)* %gep.in, align 2
+ %fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %val)
+ store <2 x half> %fabs, <2 x half> addrspace(1)* %gep.out
+ ret void
+}
+
+define amdgpu_kernel void @divergent_fneg_fabs_v2f16(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %in) {
+; GCN-LABEL: name: divergent_fneg_fabs_v2f16
+; GCN-LABEL: bb.0 (%ir-block.0)
+; FP16: %[[REG:[0-9]+]]:sreg_32 = S_MOV_B32 -2147450880
+; FP16: V_OR_B32_e64 killed %[[REG]]
+
+ %tid = call i32 @llvm.amdgcn.workitem.id.x()
+ %gep.in = getelementptr inbounds <2 x half>, <2 x half> addrspace(1)* %in, i32 %tid
+ %gep.out = getelementptr inbounds <2 x half>, <2 x half> addrspace(1)* %in, i32 %tid
+ %val = load <2 x half>, <2 x half> addrspace(1)* %gep.in, align 2
+ %fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %val)
+ %fneg = fneg <2 x half> %fabs
+ store <2 x half> %fneg, <2 x half> addrspace(1)* %gep.out
+ ret void
+}
+
+define amdgpu_kernel void @uniform_fneg_fabs_v2f16(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %in, i32 %idx) {
+; GCN-LABEL: name: uniform_fneg_fabs_v2f16
+; GCN-LABEL: bb.0 (%ir-block.0)
+; GCN: %[[REG:[0-9]+]]:sreg_32 = S_MOV_B32 -2147450880
+; GCN: S_OR_B32 killed %{{[0-9]+}}, killed %[[REG]]
+
+ %gep.in = getelementptr inbounds <2 x half>, <2 x half> addrspace(1)* %in, i32 %idx
+ %gep.out = getelementptr inbounds <2 x half>, <2 x half> addrspace(1)* %in, i32 %idx
+ %val = load <2 x half>, <2 x half> addrspace(1)* %gep.in, align 2
+ %fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %val)
+ %fneg = fneg <2 x half> %fabs
+ store <2 x half> %fneg, <2 x half> addrspace(1)* %gep.out
+ ret void
+}
+
+define amdgpu_kernel void @divergent_fneg_v2f32(<2 x float> addrspace(1)* %out, <2 x float> addrspace(1)* %in) {
+; GCN-LABEL: name: divergent_fneg_v2f32
+; GCN-LABEL: bb.0 (%ir-block.0)
+; GCN: %[[REG:[0-9]+]]:sreg_32 = S_MOV_B32 -2147483648
+; GCN: V_XOR_B32_e64 %[[REG]]
+; GCN: V_XOR_B32_e64 %[[REG]]
+
+ %tid = call i32 @llvm.amdgcn.workitem.id.x()
+ %gep.in = getelementptr inbounds <2 x float>, <2 x float> addrspace(1)* %in, i32 %tid
+ %gep.out = getelementptr inbounds <2 x float>, <2 x float> addrspace(1)* %in, i32 %tid
+ %val = load <2 x float>, <2 x float> addrspace(1)* %gep.in, align 4
+ %fneg = fneg <2 x float> %val
+ store <2 x float> %fneg, <2 x float> addrspace(1)* %gep.out
+ ret void
+}
+
+define amdgpu_kernel void @uniform_fneg_v2f32(<2 x float> addrspace(1)* %out, <2 x float> addrspace(1)* %in, i32 %idx) {
+; GCN-LABEL: name: uniform_fneg_v2f32
+; GCN-LABEL: bb.0 (%ir-block.0)
+; GCN: %[[REG:[0-9]+]]:sreg_32 = S_MOV_B32 -2147483648
+; GCN: S_XOR_B32 killed %{{[0-9]+}}, %[[REG]]
+; GCN: S_XOR_B32 killed %{{[0-9]+}}, %[[REG]]
+
+ %gep.in = getelementptr inbounds <2 x float>, <2 x float> addrspace(1)* %in, i32 %idx
+ %gep.out = getelementptr inbounds <2 x float>, <2 x float> addrspace(1)* %in, i32 %idx
+ %val = load <2 x float>, <2 x float> addrspace(1)* %gep.in, align 4
+ %fneg = fneg <2 x float> %val
+ store <2 x float> %fneg, <2 x float> addrspace(1)* %gep.out
+ ret void
+}
+
+define amdgpu_kernel void @divergent_fabs_v2f32(<2 x float> addrspace(1)* %out, <2 x float> addrspace(1)* %in) {
+; GCN-LABEL: name: divergent_fabs_v2f32
+; GCN-LABEL: bb.0 (%ir-block.0)
+; GCN: %[[REG:[0-9]+]]:sreg_32 = S_MOV_B32 2147483647
+; GCN: V_AND_B32_e64 %[[REG]]
+; GCN: V_AND_B32_e64 %[[REG]]
+
+ %tid = call i32 @llvm.amdgcn.workitem.id.x()
+ %gep.in = getelementptr inbounds <2 x float>, <2 x float> addrspace(1)* %in, i32 %tid
+ %gep.out = getelementptr inbounds <2 x float>, <2 x float> addrspace(1)* %in, i32 %tid
+ %val = load <2 x float>, <2 x float> addrspace(1)* %gep.in, align 4
+ %fabs = call <2 x float> @llvm.fabs.v2f32(<2 x float> %val)
+ store <2 x float> %fabs, <2 x float> addrspace(1)* %gep.out
+ ret void
+}
+
+define amdgpu_kernel void @uniform_fabs_v2f32(<2 x float> addrspace(1)* %out, <2 x float> addrspace(1)* %in, i32 %idx) {
+; GCN-LABEL: name: uniform_fabs_v2f32
+; GCN-LABEL: bb.0 (%ir-block.0)
+; GCN: %[[REG:[0-9]+]]:sreg_32 = S_MOV_B32 2147483647
+; GCN: S_AND_B32 killed %{{[0-9]+}}, %[[REG]]
+; GCN: S_AND_B32 killed %{{[0-9]+}}, %[[REG]]
+
+ %gep.in = getelementptr inbounds <2 x float>, <2 x float> addrspace(1)* %in, i32 %idx
+ %gep.out = getelementptr inbounds <2 x float>, <2 x float> addrspace(1)* %in, i32 %idx
+ %val = load <2 x float>, <2 x float> addrspace(1)* %gep.in, align 4
+ %fabs = call <2 x float> @llvm.fabs.v2f32(<2 x float> %val)
+ store <2 x float> %fabs, <2 x float> addrspace(1)* %gep.out
+ ret void
+}
+
+define amdgpu_kernel void @divergent_fneg_fabs_v2f32(<2 x float> addrspace(1)* %out, <2 x float> addrspace(1)* %in) {
+; GCN-LABEL: name: divergent_fneg_fabs_v2f32
+; GCN-LABEL: bb.0 (%ir-block.0)
+; GCN: %[[REG:[0-9]+]]:sreg_32 = S_MOV_B32 -2147483648
+; GCN: V_OR_B32_e64 %[[REG]]
+; GCN: V_OR_B32_e64 %[[REG]]
+
+ %tid = call i32 @llvm.amdgcn.workitem.id.x()
+ %gep.in = getelementptr inbounds <2 x float>, <2 x float> addrspace(1)* %in, i32 %tid
+ %gep.out = getelementptr inbounds <2 x float>, <2 x float> addrspace(1)* %in, i32 %tid
+ %val = load <2 x float>, <2 x float> addrspace(1)* %gep.in, align 4
+ %fabs = call <2 x float> @llvm.fabs.v2f32(<2 x float> %val)
+ %fneg = fneg <2 x float> %fabs
+ store <2 x float> %fneg, <2 x float> addrspace(1)* %gep.out
+ ret void
+}
+
+define amdgpu_kernel void @uniform_fneg_fabs_v2f32(<2 x float> addrspace(1)* %out, <2 x float> addrspace(1)* %in, i32 %idx) {
+; GCN-LABEL: name: uniform_fneg_fabs_v2f32
+; GCN-LABEL: bb.0 (%ir-block.0)
+; GCN: %[[REG:[0-9]+]]:sreg_32 = S_MOV_B32 -2147483648
+; GCN: S_OR_B32 killed %{{[0-9]+}}, %[[REG]]
+; GCN: S_OR_B32 killed %{{[0-9]+}}, %[[REG]]
+
+ %gep.in = getelementptr inbounds <2 x float>, <2 x float> addrspace(1)* %in, i32 %idx
+ %gep.out = getelementptr inbounds <2 x float>, <2 x float> addrspace(1)* %in, i32 %idx
+ %val = load <2 x float>, <2 x float> addrspace(1)* %gep.in, align 4
+ %fabs = call <2 x float> @llvm.fabs.v2f32(<2 x float> %val)
+ %fneg = fneg <2 x float> %fabs
+ store <2 x float> %fneg, <2 x float> addrspace(1)* %gep.out
+ ret void
+}
+
+define amdgpu_kernel void @divergent_fneg_f64(double addrspace(1)* %out, double addrspace(1)* %in) {
+; GCN-LABEL: name: divergent_fneg_f64
+; GCN-LABEL: bb.0 (%ir-block.0)
+; SI: %[[VREG64:[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_ADDR64
+; FP16: %[[VREG64:[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2_SADDR
+; GCN: %[[HI32:[0-9]+]]:vgpr_32 = COPY %[[VREG64]].sub1
+; GCN: %[[SREG_MASK:[0-9]+]]:sreg_32 = S_MOV_B32 -2147483648
+; GCN: %[[XOR:[0-9]+]]:vgpr_32 = V_XOR_B32_e64 killed %[[SREG_MASK]], killed %[[HI32]]
+; GCN: %[[LO32:[0-9]+]]:vgpr_32 = COPY %[[VREG64]].sub0
+; GCN: REG_SEQUENCE killed %[[LO32]], %subreg.sub0, killed %[[XOR]], %subreg.sub1
+
+
+ %tid = call i32 @llvm.amdgcn.workitem.id.x()
+ %tid.ext = sext i32 %tid to i64
+ %in.gep = getelementptr inbounds double, double addrspace(1)* %in, i64 %tid.ext
+ %out.gep = getelementptr inbounds double, double addrspace(1)* %out, i64 %tid.ext
+ %val = load volatile double, double addrspace(1)* %in.gep
+ %fneg = fneg double %val
+ store double %fneg, double addrspace(1)* %out.gep
+ ret void
+}
+
+define amdgpu_kernel void @uniform_fneg_f64(double addrspace(1)* %out, double addrspace(1)* %in, i64 %idx) {
+; GCN-LABEL: name: uniform_fneg_f64
+; GCN-LABEL: bb.0 (%ir-block.0)
+; SI: %[[VREG64:[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_ADDR64
+; FP16: %[[VREG64:[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2_SADDR
+; GCN: %[[LO32:[0-9]+]]:sreg_32 = COPY %[[VREG64]].sub0
+; GCN: %[[HI32:[0-9]+]]:sreg_32 = COPY %[[VREG64]].sub1
+; GCN: %[[SREG_MASK:[0-9]+]]:sreg_32 = S_MOV_B32 -2147483648
+; GCN: %[[XOR:[0-9]+]]:sreg_32 = S_XOR_B32 killed %[[HI32]], killed %[[SREG_MASK]]
+; GCN: %[[XOR_COPY:[0-9]+]]:sreg_32 = COPY %[[XOR]]
+; GCN: REG_SEQUENCE killed %[[LO32]], %subreg.sub0, killed %[[XOR_COPY]], %subreg.sub1
+
+ %in.gep = getelementptr inbounds double, double addrspace(1)* %in, i64 %idx
+ %out.gep = getelementptr inbounds double, double addrspace(1)* %out, i64 %idx
+ %val = load volatile double, double addrspace(1)* %in.gep
+ %fneg = fneg double %val
+ store double %fneg, double addrspace(1)* %out.gep
+ ret void
+}
+
+define amdgpu_kernel void @divergent_fabs_f64(double addrspace(1)* %out, double addrspace(1)* %in) {
+; GCN-LABEL: name: divergent_fabs_f64
+; GCN-LABEL: bb.0 (%ir-block.0)
+; SI: %[[VREG64:[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_ADDR64
+; FP16: %[[VREG64:[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2_SADDR
+; GCN: %[[HI32:[0-9]+]]:vgpr_32 = COPY %[[VREG64]].sub1
+; GCN: %[[SREG_MASK:[0-9]+]]:sreg_32 = S_MOV_B32 2147483647
+; GCN: %[[AND:[0-9]+]]:vgpr_32 = V_AND_B32_e64 killed %[[SREG_MASK]], killed %[[HI32]]
+; GCN: %[[LO32:[0-9]+]]:vgpr_32 = COPY %[[VREG64]].sub0
+; GCN: REG_SEQUENCE killed %[[LO32]], %subreg.sub0, killed %[[AND]], %subreg.sub1
+
+
+ %tid = call i32 @llvm.amdgcn.workitem.id.x()
+ %tid.ext = sext i32 %tid to i64
+ %in.gep = getelementptr inbounds double, double addrspace(1)* %in, i64 %tid.ext
+ %out.gep = getelementptr inbounds double, double addrspace(1)* %out, i64 %tid.ext
+ %val = load volatile double, double addrspace(1)* %in.gep
+ %fabs = call double @llvm.fabs.f64(double %val)
+ store double %fabs, double addrspace(1)* %out.gep
+ ret void
+}
+
+define amdgpu_kernel void @uniform_fabs_f64(double addrspace(1)* %out, double addrspace(1)* %in, i64 %idx) {
+; GCN-LABEL: name: uniform_fabs_f64
+; GCN-LABEL: bb.0 (%ir-block.0)
+; SI: %[[VREG64:[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_ADDR64
+; FP16: %[[VREG64:[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2_SADDR
+; GCN: %[[LO32:[0-9]+]]:sreg_32 = COPY %[[VREG64]].sub0
+; GCN: %[[HI32:[0-9]+]]:sreg_32 = COPY %[[VREG64]].sub1
+; GCN: %[[SREG_MASK:[0-9]+]]:sreg_32 = S_MOV_B32 2147483647
+; GCN: %[[AND:[0-9]+]]:sreg_32 = S_AND_B32 killed %[[HI32]], killed %[[SREG_MASK]]
+; GCN: %[[AND_COPY:[0-9]+]]:sreg_32 = COPY %[[AND]]
+; GCN: REG_SEQUENCE killed %[[LO32]], %subreg.sub0, killed %[[AND_COPY]], %subreg.sub1
+
+
+ %in.gep = getelementptr inbounds double, double addrspace(1)* %in, i64 %idx
+ %out.gep = getelementptr inbounds double, double addrspace(1)* %out, i64 %idx
+ %val = load volatile double, double addrspace(1)* %in.gep
+ %fabs = call double @llvm.fabs.f64(double %val)
+ store double %fabs, double addrspace(1)* %out.gep
+ ret void
+}
+
+define amdgpu_kernel void @divergent_fneg_fabs_f64(double addrspace(1)* %out, double addrspace(1)* %in) {
+; GCN-LABEL: name: divergent_fneg_fabs_f64
+; GCN-LABEL: bb.0 (%ir-block.0)
+; SI: %[[VREG64:[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_ADDR64
+; FP16: %[[VREG64:[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2_SADDR
+; GCN: %[[HI32:[0-9]+]]:vgpr_32 = COPY %[[VREG64]].sub1
+; GCN: %[[SREG_MASK:[0-9]+]]:sreg_32 = S_MOV_B32 -2147483648
+; GCN: %[[OR:[0-9]+]]:vgpr_32 = V_OR_B32_e64 killed %[[SREG_MASK]], killed %[[HI32]]
+; GCN: %[[LO32:[0-9]+]]:vgpr_32 = COPY %[[VREG64]].sub0
+; GCN: REG_SEQUENCE killed %[[LO32]], %subreg.sub0, killed %[[OR]], %subreg.sub1
+
+
+ %tid = call i32 @llvm.amdgcn.workitem.id.x()
+ %tid.ext = sext i32 %tid to i64
+ %in.gep = getelementptr inbounds double, double addrspace(1)* %in, i64 %tid.ext
+ %out.gep = getelementptr inbounds double, double addrspace(1)* %out, i64 %tid.ext
+ %val = load volatile double, double addrspace(1)* %in.gep
+ %fabs = call double @llvm.fabs.f64(double %val)
+ %fneg = fneg double %fabs
+ store double %fneg, double addrspace(1)* %out.gep
+ ret void
+}
+
+define amdgpu_kernel void @uniform_fneg_fabs_f64(double addrspace(1)* %out, double addrspace(1)* %in, i64 %idx) {
+; GCN-LABEL: name: uniform_fneg_fabs_f64
+; GCN-LABEL: bb.0 (%ir-block.0)
+; SI: %[[VREG64:[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_ADDR64
+; FP16: %[[VREG64:[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2_SADDR
+; GCN: %[[LO32:[0-9]+]]:sreg_32 = COPY %[[VREG64]].sub0
+; GCN: %[[HI32:[0-9]+]]:sreg_32 = COPY %[[VREG64]].sub1
+; GCN: %[[SREG_MASK:[0-9]+]]:sreg_32 = S_MOV_B32 -2147483648
+; GCN: %[[OR:[0-9]+]]:sreg_32 = S_OR_B32 killed %[[HI32]], killed %[[SREG_MASK]]
+; GCN: %[[OR_COPY:[0-9]+]]:sreg_32 = COPY %[[OR]]
+; GCN: REG_SEQUENCE killed %[[LO32]], %subreg.sub0, killed %[[OR_COPY]], %subreg.sub1
+
+
+ %in.gep = getelementptr inbounds double, double addrspace(1)* %in, i64 %idx
+ %out.gep = getelementptr inbounds double, double addrspace(1)* %out, i64 %idx
+ %val = load volatile double, double addrspace(1)* %in.gep
+ %fabs = call double @llvm.fabs.f64(double %val)
+ %fneg = fneg double %fabs
+ store double %fneg, double addrspace(1)* %out.gep
+ ret void
+}
+
+declare float @llvm.fabs.f32(float)
+declare half @llvm.fabs.f16(half)
+declare double @llvm.fabs.f64(double)
+declare <2 x half> @llvm.fabs.v2f16(<2 x half>)
+declare <2 x float> @llvm.fabs.v2f32(<2 x float>)
+
+declare i32 @llvm.amdgcn.workitem.id.x()
}
; GCN-LABEL: {{^}}fneg_fabs_fn_free_f64:
-; GCN: v_bfrev_b32_e32 [[IMMREG:v[0-9]+]], 1{{$}}
-; GCN: v_or_b32_e32 v{{[0-9]+}}, s{{[0-9]+}}, [[IMMREG]]
+; SI: s_bitset1_b32
+; VI: s_or_b32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80000000
define amdgpu_kernel void @fneg_fabs_fn_free_f64(double addrspace(1)* %out, i64 %in) {
%bc = bitcast i64 %in to double
%fabs = call double @fabs(double %bc)
}
; GCN-LABEL: {{^}}fneg_fabs_f64:
-; GCN-DAG: v_bfrev_b32_e32 [[IMMREG:v[0-9]+]], 1{{$}}
; SI-DAG: s_load_dwordx2 s{{\[}}[[LO_X:[0-9]+]]:[[HI_X:[0-9]+]]{{\]}}, s[{{[0-9]+:[0-9]+}}], 0x13
; VI-DAG: s_load_dwordx2 s{{\[}}[[LO_X:[0-9]+]]:[[HI_X:[0-9]+]]{{\]}}, s[{{[0-9]+:[0-9]+}}], 0x4c
-; GCN-DAG: v_or_b32_e32 v[[HI_V:[0-9]+]], s[[HI_X]], [[IMMREG]]
+; GCN-DAG: s_bitset1_b32 s[[HI_X]], 31
; GCN-DAG: v_mov_b32_e32 v[[LO_V:[0-9]+]], s[[LO_X]]
+; GCN-DAG: v_mov_b32_e32 v[[HI_V:[0-9]+]], s[[HI_X]]
; GCN: buffer_store_dwordx2 v{{\[}}[[LO_V]]:[[HI_V]]{{\]}}
define amdgpu_kernel void @fneg_fabs_f64(double addrspace(1)* %out, [8 x i32], double %in) {
%fabs = call double @llvm.fabs.f64(double %in)
}
; GCN-LABEL: {{^}}fneg_fabs_v2f64:
-; GCN: v_bfrev_b32_e32 [[IMMREG:v[0-9]+]], 1{{$}}
+; GCN: s_brev_b32 [[IMMREG:s[0-9]+]], 1{{$}}
; GCN-NOT: 0x80000000
-; GCN: v_or_b32_e32 v{{[0-9]+}}, s{{[0-9]+}}, [[IMMREG]]
-; GCN: v_or_b32_e32 v{{[0-9]+}}, s{{[0-9]+}}, [[IMMREG]]
+; GCN: s_or_b32 s{{[0-9]+}}, s{{[0-9]+}}, [[IMMREG]]
+; GCN: s_or_b32 s{{[0-9]+}}, s{{[0-9]+}}, [[IMMREG]]
define amdgpu_kernel void @fneg_fabs_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %in) {
%fabs = call <2 x double> @llvm.fabs.v2f64(<2 x double> %in)
%fsub = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %fabs
}
; GCN-LABEL: {{^}}fneg_fabs_v4f64:
-; GCN: v_bfrev_b32_e32 [[IMMREG:v[0-9]+]], 1{{$}}
+; GCN: s_brev_b32 [[IMMREG:s[0-9]+]], 1{{$}}
; GCN-NOT: 0x80000000
-; GCN: v_or_b32_e32 v{{[0-9]+}}, s{{[0-9]+}}, [[IMMREG]]
-; GCN: v_or_b32_e32 v{{[0-9]+}}, s{{[0-9]+}}, [[IMMREG]]
-; GCN: v_or_b32_e32 v{{[0-9]+}}, s{{[0-9]+}}, [[IMMREG]]
-; GCN: v_or_b32_e32 v{{[0-9]+}}, s{{[0-9]+}}, [[IMMREG]]
+; GCN: s_or_b32 s{{[0-9]+}}, s{{[0-9]+}}, [[IMMREG]]
+; GCN: s_or_b32 s{{[0-9]+}}, s{{[0-9]+}}, [[IMMREG]]
+; GCN: s_or_b32 s{{[0-9]+}}, s{{[0-9]+}}, [[IMMREG]]
+; GCN: s_or_b32 s{{[0-9]+}}, s{{[0-9]+}}, [[IMMREG]]
define amdgpu_kernel void @fneg_fabs_v4f64(<4 x double> addrspace(1)* %out, <4 x double> %in) {
%fabs = call <4 x double> @llvm.fabs.v4f64(<4 x double> %in)
%fsub = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %fabs
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN -check-prefix=FUNC %s
; FUNC-LABEL: {{^}}fneg_f64:
-; GCN: v_xor_b32
+; GCN: s_xor_b32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80000000
define amdgpu_kernel void @fneg_f64(double addrspace(1)* %out, double %in) {
%fneg = fsub double -0.000000e+00, %in
store double %fneg, double addrspace(1)* %out
}
; FUNC-LABEL: {{^}}fneg_v2f64:
-; GCN: v_xor_b32
-; GCN: v_xor_b32
+; GCN: s_xor_b32
+; GCN: s_xor_b32
define amdgpu_kernel void @fneg_v2f64(<2 x double> addrspace(1)* nocapture %out, <2 x double> %in) {
%fneg = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %in
store <2 x double> %fneg, <2 x double> addrspace(1)* %out
; R600: -PV
; R600: -PV
-; GCN: v_xor_b32
-; GCN: v_xor_b32
-; GCN: v_xor_b32
-; GCN: v_xor_b32
+; GCN: s_xor_b32
+; GCN: s_xor_b32
+; GCN: s_xor_b32
+; GCN: s_xor_b32
define amdgpu_kernel void @fneg_v4f64(<4 x double> addrspace(1)* nocapture %out, <4 x double> %in) {
%fneg = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %in
store <4 x double> %fneg, <4 x double> addrspace(1)* %out