The first operand of an '``extractelement``' instruction is a value of
:ref:`vector <t_vector>` type. The second operand is an index indicating
the position from which to extract the element. The index may be a
-variable of any integer type.
+variable of any integer type, and will be treated as an unsigned integer.
Semantics:
""""""""""
:ref:`vector <t_vector>` type. The second operand is a scalar value whose
type must equal the element type of the first operand. The third operand
is an index indicating the position at which to insert the value. The
-index may be a variable of any integer type.
+index may be a variable of any integer type, and will be treated as an
+unsigned integer.
Semantics:
""""""""""
Register Idx;
if (auto *CI = dyn_cast<ConstantInt>(U.getOperand(1))) {
if (CI->getBitWidth() != PreferredVecIdxWidth) {
- APInt NewIdx = CI->getValue().sextOrTrunc(PreferredVecIdxWidth);
+ APInt NewIdx = CI->getValue().zextOrTrunc(PreferredVecIdxWidth);
auto *NewIdxCI = ConstantInt::get(CI->getContext(), NewIdx);
Idx = getOrCreateVReg(*NewIdxCI);
}
Idx = getOrCreateVReg(*U.getOperand(1));
if (MRI->getType(Idx).getSizeInBits() != PreferredVecIdxWidth) {
const LLT VecIdxTy = LLT::scalar(PreferredVecIdxWidth);
- Idx = MIRBuilder.buildSExtOrTrunc(VecIdxTy, Idx).getReg(0);
+ Idx = MIRBuilder.buildZExtOrTrunc(VecIdxTy, Idx).getReg(0);
}
MIRBuilder.buildExtractVectorElement(Res, Val, Idx);
return true;
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
SDValue InVec = getValue(I.getOperand(0));
SDValue InVal = getValue(I.getOperand(1));
- SDValue InIdx = DAG.getSExtOrTrunc(getValue(I.getOperand(2)), getCurSDLoc(),
+ SDValue InIdx = DAG.getZExtOrTrunc(getValue(I.getOperand(2)), getCurSDLoc(),
TLI.getVectorIdxTy(DAG.getDataLayout()));
setValue(&I, DAG.getNode(ISD::INSERT_VECTOR_ELT, getCurSDLoc(),
TLI.getValueType(DAG.getDataLayout(), I.getType()),
void SelectionDAGBuilder::visitExtractElement(const User &I) {
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
SDValue InVec = getValue(I.getOperand(0));
- SDValue InIdx = DAG.getSExtOrTrunc(getValue(I.getOperand(1)), getCurSDLoc(),
+ SDValue InIdx = DAG.getZExtOrTrunc(getValue(I.getOperand(1)), getCurSDLoc(),
TLI.getVectorIdxTy(DAG.getDataLayout()));
setValue(&I, DAG.getNode(ISD::EXTRACT_VECTOR_ELT, getCurSDLoc(),
TLI.getValueType(DAG.getDataLayout(), I.getType()),
; CHECK-LABEL: name: test_extractelement
; CHECK: [[VEC:%[0-9]+]]:_(<2 x s32>) = COPY $d0
; CHECK: [[IDX:%[0-9]+]]:_(s32) = COPY $w0
-; CHECK: [[IDXEXT:%[0-9]+]]:_(s64) = G_SEXT [[IDX]]
+; CHECK: [[IDXEXT:%[0-9]+]]:_(s64) = G_ZEXT [[IDX]]
; CHECK: [[RES:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[VEC]](<2 x s32>), [[IDXEXT]](s64)
; CHECK: $w0 = COPY [[RES]](s32)
%res = extractelement <2 x i32> %vec, i32 %idx
ret i32 %res
}
+define i32 @test_extractelement_const_idx_zext_i1(<2 x i32> %vec) {
+; CHECK-LABEL: name: test_extractelement
+; CHECK: [[VEC:%[0-9]+]]:_(<2 x s32>) = COPY $d0
+; CHECK: [[IDX:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+; CHECK: [[RES:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[VEC]](<2 x s32>), [[IDX]](s64)
+; CHECK: $w0 = COPY [[RES]](s32)
+ %res = extractelement <2 x i32> %vec, i1 true
+ ret i32 %res
+}
+
+define i32 @test_extractelement_const_idx_zext_i8(<2 x i32> %vec) {
+; CHECK-LABEL: name: test_extractelement
+; CHECK: [[VEC:%[0-9]+]]:_(<2 x s32>) = COPY $d0
+; CHECK: [[IDX:%[0-9]+]]:_(s64) = G_CONSTANT i64 255
+; CHECK: [[RES:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[VEC]](<2 x s32>), [[IDX]](s64)
+; CHECK: $w0 = COPY [[RES]](s32)
+ %res = extractelement <2 x i32> %vec, i8 255
+ ret i32 %res
+}
+
+
define i32 @test_singleelementvector(i32 %elt){
; CHECK-LABEL: name: test_singleelementvector
; CHECK: [[ELT:%[0-9]+]]:_(s32) = COPY $w0
define i32 @load_single_extract_variable_index_v3i32_small_align(<3 x i32>* %A, i32 %idx) {
; CHECK-LABEL: load_single_extract_variable_index_v3i32_small_align:
; CHECK: ; %bb.0:
-; CHECK-NEXT: ; kill: def $w1 killed $w1 def $x1
-; CHECK-NEXT: sxtw x9, w1
+; CHECK-NEXT: mov w9, w1
; CHECK-NEXT: mov w8, #2
; CHECK-NEXT: cmp x9, #2
; CHECK-NEXT: csel x8, x9, x8, lo
define i32 @load_single_extract_variable_index_v3i32_default_align(<3 x i32>* %A, i32 %idx) {
; CHECK-LABEL: load_single_extract_variable_index_v3i32_default_align:
; CHECK: ; %bb.0:
-; CHECK-NEXT: ; kill: def $w1 killed $w1 def $x1
-; CHECK-NEXT: sxtw x9, w1
+; CHECK-NEXT: mov w9, w1
; CHECK-NEXT: mov w8, #2
; CHECK-NEXT: cmp x9, #2
; CHECK-NEXT: csel x8, x9, x8, lo
define i8 @test_lanex_16xi8(<vscale x 16 x i8> %a, i32 %x) #0 {
; CHECK-LABEL: test_lanex_16xi8:
; CHECK: // %bb.0:
-; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
-; CHECK-NEXT: sxtw x8, w0
+; CHECK-NEXT: mov w8, w0
; CHECK-NEXT: whilels p0.b, xzr, x8
; CHECK-NEXT: lastb w0, p0, z0.b
; CHECK-NEXT: ret
define i16 @test_lanex_8xi16(<vscale x 8 x i16> %a, i32 %x) #0 {
; CHECK-LABEL: test_lanex_8xi16:
; CHECK: // %bb.0:
-; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
-; CHECK-NEXT: sxtw x8, w0
+; CHECK-NEXT: mov w8, w0
; CHECK-NEXT: whilels p0.h, xzr, x8
; CHECK-NEXT: lastb w0, p0, z0.h
; CHECK-NEXT: ret
define i32 @test_lanex_4xi32(<vscale x 4 x i32> %a, i32 %x) #0 {
; CHECK-LABEL: test_lanex_4xi32:
; CHECK: // %bb.0:
-; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
-; CHECK-NEXT: sxtw x8, w0
+; CHECK-NEXT: mov w8, w0
; CHECK-NEXT: whilels p0.s, xzr, x8
; CHECK-NEXT: lastb w0, p0, z0.s
; CHECK-NEXT: ret
define i64 @test_lanex_2xi64(<vscale x 2 x i64> %a, i32 %x) #0 {
; CHECK-LABEL: test_lanex_2xi64:
; CHECK: // %bb.0:
-; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
-; CHECK-NEXT: sxtw x8, w0
+; CHECK-NEXT: mov w8, w0
; CHECK-NEXT: whilels p0.d, xzr, x8
; CHECK-NEXT: lastb x0, p0, z0.d
; CHECK-NEXT: ret
define half @test_lanex_8xf16(<vscale x 8 x half> %a, i32 %x) #0 {
; CHECK-LABEL: test_lanex_8xf16:
; CHECK: // %bb.0:
-; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
-; CHECK-NEXT: sxtw x8, w0
+; CHECK-NEXT: mov w8, w0
; CHECK-NEXT: whilels p0.h, xzr, x8
; CHECK-NEXT: lastb h0, p0, z0.h
; CHECK-NEXT: ret
define half @test_lanex_4xf16(<vscale x 4 x half> %a, i32 %x) #0 {
; CHECK-LABEL: test_lanex_4xf16:
; CHECK: // %bb.0:
-; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
-; CHECK-NEXT: sxtw x8, w0
+; CHECK-NEXT: mov w8, w0
; CHECK-NEXT: whilels p0.s, xzr, x8
; CHECK-NEXT: lastb h0, p0, z0.h
; CHECK-NEXT: ret
define half @test_lanex_2xf16(<vscale x 2 x half> %a, i32 %x) #0 {
; CHECK-LABEL: test_lanex_2xf16:
; CHECK: // %bb.0:
-; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
-; CHECK-NEXT: sxtw x8, w0
+; CHECK-NEXT: mov w8, w0
; CHECK-NEXT: whilels p0.d, xzr, x8
; CHECK-NEXT: lastb h0, p0, z0.h
; CHECK-NEXT: ret
define float @test_lanex_4xf32(<vscale x 4 x float> %a, i32 %x) #0 {
; CHECK-LABEL: test_lanex_4xf32:
; CHECK: // %bb.0:
-; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
-; CHECK-NEXT: sxtw x8, w0
+; CHECK-NEXT: mov w8, w0
; CHECK-NEXT: whilels p0.s, xzr, x8
; CHECK-NEXT: lastb s0, p0, z0.s
; CHECK-NEXT: ret
define float @test_lanex_2xf32(<vscale x 2 x float> %a, i32 %x) #0 {
; CHECK-LABEL: test_lanex_2xf32:
; CHECK: // %bb.0:
-; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
-; CHECK-NEXT: sxtw x8, w0
+; CHECK-NEXT: mov w8, w0
; CHECK-NEXT: whilels p0.d, xzr, x8
; CHECK-NEXT: lastb s0, p0, z0.s
; CHECK-NEXT: ret
define double @test_lanex_2xf64(<vscale x 2 x double> %a, i32 %x) #0 {
; CHECK-LABEL: test_lanex_2xf64:
; CHECK: // %bb.0:
-; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
-; CHECK-NEXT: sxtw x8, w0
+; CHECK-NEXT: mov w8, w0
; CHECK-NEXT: whilels p0.d, xzr, x8
; CHECK-NEXT: lastb d0, p0, z0.d
; CHECK-NEXT: ret
define i1 @test_lanex_4xi1(<vscale x 4 x i1> %a, i32 %x) #0 {
; CHECK-LABEL: test_lanex_4xi1:
; CHECK: // %bb.0:
-; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
-; CHECK-NEXT: sxtw x8, w0
+; CHECK-NEXT: mov w8, w0
; CHECK-NEXT: mov z0.s, p0/z, #1 // =0x1
; CHECK-NEXT: whilels p0.s, xzr, x8
; CHECK-NEXT: lastb w8, p0, z0.s
define <vscale x 16 x i8> @test_lanex_16xi8(<vscale x 16 x i8> %a, i32 %x) {
; CHECK-LABEL: test_lanex_16xi8:
; CHECK: // %bb.0:
-; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
-; CHECK-NEXT: sxtw x8, w0
+; CHECK-NEXT: mov w8, w0
; CHECK-NEXT: mov w9, #30
; CHECK-NEXT: index z2.b, #0, #1
; CHECK-NEXT: ptrue p0.b
define <vscale x 8 x i1> @test_predicate_insert_8xi1_immediate (<vscale x 8 x i1> %val, i32 %idx) {
; CHECK-LABEL: test_predicate_insert_8xi1_immediate:
; CHECK: // %bb.0:
-; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
-; CHECK-NEXT: sxtw x8, w0
+; CHECK-NEXT: mov w8, w0
; CHECK-NEXT: mov w9, #1
; CHECK-NEXT: index z1.h, #0, #1
; CHECK-NEXT: ptrue p1.h
define <vscale x 2 x i1> @test_predicate_insert_2xi1(<vscale x 2 x i1> %val, i1 %elt, i32 %idx) {
; CHECK-LABEL: test_predicate_insert_2xi1:
; CHECK: // %bb.0:
-; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1
-; CHECK-NEXT: sxtw x8, w1
+; CHECK-NEXT: mov w8, w1
; CHECK-NEXT: index z1.d, #0, #1
; CHECK-NEXT: ptrue p1.d
; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
define <vscale x 4 x i1> @test_predicate_insert_4xi1(<vscale x 4 x i1> %val, i1 %elt, i32 %idx) {
; CHECK-LABEL: test_predicate_insert_4xi1:
; CHECK: // %bb.0:
-; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1
-; CHECK-NEXT: sxtw x8, w1
+; CHECK-NEXT: mov w8, w1
; CHECK-NEXT: index z1.s, #0, #1
; CHECK-NEXT: ptrue p1.s
; CHECK-NEXT: mov z0.s, w8
define <vscale x 8 x i1> @test_predicate_insert_8xi1(<vscale x 8 x i1> %val, i1 %elt, i32 %idx) {
; CHECK-LABEL: test_predicate_insert_8xi1:
; CHECK: // %bb.0:
-; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1
-; CHECK-NEXT: sxtw x8, w1
+; CHECK-NEXT: mov w8, w1
; CHECK-NEXT: index z1.h, #0, #1
; CHECK-NEXT: ptrue p1.h
; CHECK-NEXT: mov z0.h, w8
define <vscale x 16 x i1> @test_predicate_insert_16xi1(<vscale x 16 x i1> %val, i1 %elt, i32 %idx) {
; CHECK-LABEL: test_predicate_insert_16xi1:
; CHECK: // %bb.0:
-; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1
-; CHECK-NEXT: sxtw x8, w1
+; CHECK-NEXT: mov w8, w1
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: ptrue p1.b
; CHECK-NEXT: mov z0.b, w8
; CHECK-NEXT: addvl sp, sp, #-2
; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 16 * VG
; CHECK-NEXT: mov x8, #-1
-; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1
-; CHECK-NEXT: sxtw x9, w1
+; CHECK-NEXT: mov w9, w1
; CHECK-NEXT: mov z0.b, p1/z, #1 // =0x1
; CHECK-NEXT: ptrue p1.b
; CHECK-NEXT: st1b { z0.b }, p1, [sp, #1, mul vl]
define i32 @promote_extract_2i32_idx(<vscale x 2 x i32> %a, i32 %idx) {
; CHECK-LABEL: promote_extract_2i32_idx:
; CHECK: // %bb.0:
-; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
-; CHECK-NEXT: sxtw x8, w0
+; CHECK-NEXT: mov w8, w0
; CHECK-NEXT: whilels p0.d, xzr, x8
; CHECK-NEXT: lastb x0, p0, z0.d
; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
; CHECK-NEXT: addvl sp, sp, #-2
; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 16 * VG
; CHECK-NEXT: mov x8, #-1
-; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
-; CHECK-NEXT: sxtw x9, w0
+; CHECK-NEXT: mov w9, w0
; CHECK-NEXT: ptrue p0.b
; CHECK-NEXT: st1b { z1.b }, p0, [sp, #1, mul vl]
; CHECK-NEXT: st1b { z0.b }, p0, [sp]
; CHECK-NEXT: addvl sp, sp, #-2
; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 16 * VG
; CHECK-NEXT: mov x8, #-1
-; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
-; CHECK-NEXT: sxtw x9, w0
+; CHECK-NEXT: mov w9, w0
; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: st1h { z1.h }, p0, [sp, #1, mul vl]
; CHECK-NEXT: st1h { z0.h }, p0, [sp]
; CHECK-NEXT: addvl sp, sp, #-2
; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 16 * VG
; CHECK-NEXT: cnth x8
-; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
-; CHECK-NEXT: sxtw x9, w0
+; CHECK-NEXT: mov w9, w0
; CHECK-NEXT: sub x8, x8, #1
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: cmp x9, x8
; CHECK-NEXT: addvl sp, sp, #-4
; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 32 * VG
; CHECK-NEXT: cnth x8
-; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
-; CHECK-NEXT: sxtw x9, w0
+; CHECK-NEXT: mov w9, w0
; CHECK-NEXT: sub x8, x8, #1
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: cmp x9, x8
--- /dev/null
+; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+; RUN: llc -march=amdgcn -O0 -stop-after=irtranslator -global-isel -verify-machineinstrs %s -o - | FileCheck %s
+
+define i8 @f_i1_1() {
+ ; CHECK-LABEL: name: f_i1_1
+ ; CHECK: bb.1 (%ir-block.0):
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<256 x s8>) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+ ; CHECK-NEXT: [[EVEC:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[DEF]](<256 x s8>), [[C]](s32)
+ ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[EVEC]](s8)
+ ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32)
+ ; CHECK-NEXT: SI_RETURN implicit $vgpr0
+ %E1 = extractelement <256 x i8> undef, i1 true
+ ret i8 %E1
+}
+
+define i8 @f_i8_255() {
+ ; CHECK-LABEL: name: f_i8_255
+ ; CHECK: bb.1 (%ir-block.0):
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<256 x s8>) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+ ; CHECK-NEXT: [[EVEC:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[DEF]](<256 x s8>), [[C]](s32)
+ ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[EVEC]](s8)
+ ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32)
+ ; CHECK-NEXT: SI_RETURN implicit $vgpr0
+ %E1 = extractelement <256 x i8> undef, i8 255
+ ret i8 %E1
+}
; N64-NEXT: ld.b $w0, 0($2)
; N64-NEXT: addv.b $w0, $w0, $w0
; N64-NEXT: ld $1, %got_disp(i32)($1)
-; N64-NEXT: lw $1, 0($1)
+; N64-NEXT: lwu $1, 0($1)
; N64-NEXT: splat.b $w0, $w0[$1]
; N64-NEXT: mfc1 $1, $f0
; N64-NEXT: sra $1, $1, 24
; N64-NEXT: ld.h $w0, 0($2)
; N64-NEXT: addv.h $w0, $w0, $w0
; N64-NEXT: ld $1, %got_disp(i32)($1)
-; N64-NEXT: lw $1, 0($1)
+; N64-NEXT: lwu $1, 0($1)
; N64-NEXT: splat.h $w0, $w0[$1]
; N64-NEXT: mfc1 $1, $f0
; N64-NEXT: sra $1, $1, 16
; N64-NEXT: ld.w $w0, 0($2)
; N64-NEXT: addv.w $w0, $w0, $w0
; N64-NEXT: ld $1, %got_disp(i32)($1)
-; N64-NEXT: lw $1, 0($1)
+; N64-NEXT: lwu $1, 0($1)
; N64-NEXT: splat.w $w0, $w0[$1]
; N64-NEXT: jr $ra
; N64-NEXT: mfc1 $2, $f0
; N64-NEXT: ld.d $w0, 0($2)
; N64-NEXT: addv.d $w0, $w0, $w0
; N64-NEXT: ld $1, %got_disp(i32)($1)
-; N64-NEXT: lw $1, 0($1)
+; N64-NEXT: lwu $1, 0($1)
; N64-NEXT: splat.d $w0, $w0[$1]
; N64-NEXT: jr $ra
; N64-NEXT: dmfc1 $2, $f0
; N64-NEXT: ld.b $w0, 0($2)
; N64-NEXT: addv.b $w0, $w0, $w0
; N64-NEXT: ld $1, %got_disp(i32)($1)
-; N64-NEXT: lw $1, 0($1)
+; N64-NEXT: lwu $1, 0($1)
; N64-NEXT: splat.b $w0, $w0[$1]
; N64-NEXT: mfc1 $1, $f0
; N64-NEXT: jr $ra
; N64-NEXT: ld.h $w0, 0($2)
; N64-NEXT: addv.h $w0, $w0, $w0
; N64-NEXT: ld $1, %got_disp(i32)($1)
-; N64-NEXT: lw $1, 0($1)
+; N64-NEXT: lwu $1, 0($1)
; N64-NEXT: splat.h $w0, $w0[$1]
; N64-NEXT: mfc1 $1, $f0
; N64-NEXT: jr $ra
; N64-NEXT: ld.w $w0, 0($2)
; N64-NEXT: addv.w $w0, $w0, $w0
; N64-NEXT: ld $1, %got_disp(i32)($1)
-; N64-NEXT: lw $1, 0($1)
+; N64-NEXT: lwu $1, 0($1)
; N64-NEXT: splat.w $w0, $w0[$1]
; N64-NEXT: jr $ra
; N64-NEXT: mfc1 $2, $f0
; N64-NEXT: ld.d $w0, 0($2)
; N64-NEXT: addv.d $w0, $w0, $w0
; N64-NEXT: ld $1, %got_disp(i32)($1)
-; N64-NEXT: lw $1, 0($1)
+; N64-NEXT: lwu $1, 0($1)
; N64-NEXT: splat.d $w0, $w0[$1]
; N64-NEXT: jr $ra
; N64-NEXT: dmfc1 $2, $f0
; N64-NEXT: daddu $1, $1, $25
; N64-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(insert_v16i8_vidx)))
; N64-NEXT: ld $2, %got_disp(i32)($1)
-; N64-NEXT: lw $2, 0($2)
+; N64-NEXT: lwu $2, 0($2)
; N64-NEXT: ld $1, %got_disp(v16i8)($1)
; N64-NEXT: ld.b $w0, 0($1)
; N64-NEXT: sld.b $w0, $w0[$2]
; N64-NEXT: daddu $1, $1, $25
; N64-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(insert_v8i16_vidx)))
; N64-NEXT: ld $2, %got_disp(i32)($1)
-; N64-NEXT: lw $2, 0($2)
+; N64-NEXT: lwu $2, 0($2)
; N64-NEXT: ld $1, %got_disp(v8i16)($1)
; N64-NEXT: ld.h $w0, 0($1)
; N64-NEXT: dsll $2, $2, 1
; N64-NEXT: daddu $1, $1, $25
; N64-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(insert_v4i32_vidx)))
; N64-NEXT: ld $2, %got_disp(i32)($1)
-; N64-NEXT: lw $2, 0($2)
+; N64-NEXT: lwu $2, 0($2)
; N64-NEXT: ld $1, %got_disp(v4i32)($1)
; N64-NEXT: ld.w $w0, 0($1)
; N64-NEXT: dsll $2, $2, 2
; N64-NEXT: daddu $1, $1, $25
; N64-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(insert_v2i64_vidx)))
; N64-NEXT: ld $2, %got_disp(i32)($1)
-; N64-NEXT: lw $2, 0($2)
+; N64-NEXT: lwu $2, 0($2)
; N64-NEXT: ld $1, %got_disp(v2i64)($1)
; N64-NEXT: ld.d $w0, 0($1)
; N64-NEXT: dsll $2, $2, 3
; O32-DAG: lw [[PTR_I:\$[0-9]+]], %got(i32)(
; N32-DAG: lw [[PTR_I:\$[0-9]+]], %got_disp(i32)(
; N64-DAG: ld [[PTR_I:\$[0-9]+]], %got_disp(i32)(
- ; ALL-DAG: lw [[IDX:\$[0-9]+]], 0([[PTR_I]])
%4 = extractelement <4 x float> %2, i32 %3
- ; ALL-DAG: splat.w $w0, [[R1]][[[IDX]]]
+ ; ALL-DAG: splat.w $w0, [[R1]][[[PTR_I]]]
ret float %4
}
; O32-DAG: lw [[PTR_I:\$[0-9]+]], %got(i32)(
; N32-DAG: lw [[PTR_I:\$[0-9]+]], %got_disp(i32)(
; N64-DAG: ld [[PTR_I:\$[0-9]+]], %got_disp(i32)(
- ; ALL-DAG: lw [[IDX:\$[0-9]+]], 0([[PTR_I]])
%4 = extractelement <2 x double> %2, i32 %3
- ; ALL-DAG: splat.d $w0, [[R1]][[[IDX]]]
+ ; ALL-DAG: splat.d $w0, [[R1]][[[PTR_I]]]
ret double %4
}
; O32-DAG: lw [[PTR_I:\$[0-9]+]], %got(i32)(
; N32-DAG: lw [[PTR_I:\$[0-9]+]], %got_disp(i32)(
; N64-DAG: ld [[PTR_I:\$[0-9]+]], %got_disp(i32)(
- ; ALL-DAG: lw [[IDX:\$[0-9]+]], 0([[PTR_I]])
%3 = insertelement <4 x float> %1, float %a, i32 %2
; float argument passed in $f12
- ; ALL-DAG: sll [[BIDX:\$[0-9]+]], [[IDX]], 2
+ ; ALL-DAG: sll [[BIDX:\$[0-9]+]], [[PTR_I]], 2
; ALL-DAG: sld.b [[R1]], [[R1]][[[BIDX]]]
; ALL-DAG: insve.w [[R1]][0], $w12[0]
; ALL-DAG: neg [[NIDX:\$[0-9]+]], [[BIDX]]
; O32-DAG: lw [[PTR_I:\$[0-9]+]], %got(i32)(
; N32-DAG: lw [[PTR_I:\$[0-9]+]], %got_disp(i32)(
; N64-DAG: ld [[PTR_I:\$[0-9]+]], %got_disp(i32)(
- ; ALL-DAG: lw [[IDX:\$[0-9]+]], 0([[PTR_I]])
%3 = insertelement <2 x double> %1, double %a, i32 %2
; double argument passed in $f12
- ; ALL-DAG: sll [[BIDX:\$[0-9]+]], [[IDX]], 3
+ ; ALL-DAG: sll [[BIDX:\$[0-9]+]], [[PTR_I]], 3
; ALL-DAG: sld.b [[R1]], [[R1]][[[BIDX]]]
; ALL-DAG: insve.d [[R1]][0], $w12[0]
; ALL-DAG: neg [[NIDX:\$[0-9]+]], [[BIDX]]
define double @conv2dlbTestuiVar(<4 x i32> %a, i32 zeroext %elem) {
; CHECK-64-LABEL: conv2dlbTestuiVar:
; CHECK-64: # %bb.0: # %entry
-; CHECK-64-NEXT: extsw 3, 3
; CHECK-64-NEXT: rlwinm 3, 3, 2, 28, 29
; CHECK-64-NEXT: vextuwlx 3, 3, 2
; CHECK-64-NEXT: mtfprwz 0, 3
define zeroext i8 @test1(<16 x i8> %a, i32 signext %index) {
; CHECK-64-LABEL: test1:
; CHECK-64: # %bb.0: # %entry
+; CHECK-64-NEXT: clrldi 3, 3, 32
; CHECK-64-NEXT: vextublx 3, 3, 2
; CHECK-64-NEXT: clrldi 3, 3, 56
; CHECK-64-NEXT: blr
define signext i8 @test2(<16 x i8> %a, i32 signext %index) {
; CHECK-64-LABEL: test2:
; CHECK-64: # %bb.0: # %entry
+; CHECK-64-NEXT: clrldi 3, 3, 32
; CHECK-64-NEXT: vextublx 3, 3, 2
; CHECK-64-NEXT: extsb 3, 3
; CHECK-64-NEXT: blr
define zeroext i16 @test3(<8 x i16> %a, i32 signext %index) {
; CHECK-64-LABEL: test3:
; CHECK-64: # %bb.0: # %entry
+; CHECK-64-NEXT: clrldi 3, 3, 32
; CHECK-64-NEXT: rlwinm 3, 3, 1, 28, 30
; CHECK-64-NEXT: vextuhlx 3, 3, 2
; CHECK-64-NEXT: clrldi 3, 3, 48
define signext i16 @test4(<8 x i16> %a, i32 signext %index) {
; CHECK-64-LABEL: test4:
; CHECK-64: # %bb.0: # %entry
+; CHECK-64-NEXT: clrldi 3, 3, 32
; CHECK-64-NEXT: rlwinm 3, 3, 1, 28, 30
; CHECK-64-NEXT: vextuhlx 3, 3, 2
; CHECK-64-NEXT: extsh 3, 3
define zeroext i32 @test5(<4 x i32> %a, i32 signext %index) {
; CHECK-64-LABEL: test5:
; CHECK-64: # %bb.0: # %entry
+; CHECK-64-NEXT: clrldi 3, 3, 32
; CHECK-64-NEXT: rlwinm 3, 3, 2, 28, 29
; CHECK-64-NEXT: vextuwlx 3, 3, 2
; CHECK-64-NEXT: blr
define signext i32 @test6(<4 x i32> %a, i32 signext %index) {
; CHECK-64-LABEL: test6:
; CHECK-64: # %bb.0: # %entry
+; CHECK-64-NEXT: clrldi 3, 3, 32
; CHECK-64-NEXT: rlwinm 3, 3, 2, 28, 29
; CHECK-64-NEXT: vextuwlx 3, 3, 2
; CHECK-64-NEXT: extsw 3, 3
define zeroext i8 @test_add1(<16 x i8> %a, i32 signext %index, i8 zeroext %c) {
; CHECK-64-LABEL: test_add1:
; CHECK-64: # %bb.0: # %entry
+; CHECK-64-NEXT: clrldi 3, 3, 32
; CHECK-64-NEXT: vextublx 3, 3, 2
; CHECK-64-NEXT: add 3, 3, 4
; CHECK-64-NEXT: clrldi 3, 3, 56
define signext i8 @test_add2(<16 x i8> %a, i32 signext %index, i8 signext %c) {
; CHECK-64-LABEL: test_add2:
; CHECK-64: # %bb.0: # %entry
+; CHECK-64-NEXT: clrldi 3, 3, 32
; CHECK-64-NEXT: vextublx 3, 3, 2
; CHECK-64-NEXT: add 3, 3, 4
; CHECK-64-NEXT: extsb 3, 3
define zeroext i16 @test_add3(<8 x i16> %a, i32 signext %index, i16 zeroext %c) {
; CHECK-64-LABEL: test_add3:
; CHECK-64: # %bb.0: # %entry
+; CHECK-64-NEXT: clrldi 3, 3, 32
; CHECK-64-NEXT: rlwinm 3, 3, 1, 28, 30
; CHECK-64-NEXT: vextuhlx 3, 3, 2
; CHECK-64-NEXT: add 3, 3, 4
define signext i16 @test_add4(<8 x i16> %a, i32 signext %index, i16 signext %c) {
; CHECK-64-LABEL: test_add4:
; CHECK-64: # %bb.0: # %entry
+; CHECK-64-NEXT: clrldi 3, 3, 32
; CHECK-64-NEXT: rlwinm 3, 3, 1, 28, 30
; CHECK-64-NEXT: vextuhlx 3, 3, 2
; CHECK-64-NEXT: add 3, 3, 4
define zeroext i32 @test_add5(<4 x i32> %a, i32 signext %index, i32 zeroext %c) {
; CHECK-64-LABEL: test_add5:
; CHECK-64: # %bb.0: # %entry
+; CHECK-64-NEXT: clrldi 3, 3, 32
; CHECK-64-NEXT: rlwinm 3, 3, 2, 28, 29
; CHECK-64-NEXT: vextuwlx 3, 3, 2
; CHECK-64-NEXT: add 3, 3, 4
define signext i32 @test_add6(<4 x i32> %a, i32 signext %index, i32 signext %c) {
; CHECK-64-LABEL: test_add6:
; CHECK-64: # %bb.0: # %entry
+; CHECK-64-NEXT: clrldi 3, 3, 32
; CHECK-64-NEXT: rlwinm 3, 3, 2, 28, 29
; CHECK-64-NEXT: vextuwlx 3, 3, 2
; CHECK-64-NEXT: add 3, 3, 4
define <4 x float> @testFloat1(<4 x float> %a, float %b, i32 zeroext %idx1) {
; CHECK-64-LABEL: testFloat1:
; CHECK-64: # %bb.0: # %entry
-; CHECK-64-DAG: rlwinm 3, 4, 2, 28, 29
-; CHECK-64-DAG: addi 4, 1, -16
+; CHECK-64-NEXT: rlwinm 3, 4, 2, 28, 29
+; CHECK-64-NEXT: addi 4, 1, -16
; CHECK-64-NEXT: stxv 34, -16(1)
; CHECK-64-NEXT: stfsx 1, 4, 3
; CHECK-64-NEXT: lxv 34, -16(1)
; CHECK-64-P10-LABEL: testFloat1:
; CHECK-64-P10: # %bb.0: # %entry
; CHECK-64-P10-NEXT: xscvdpspn 35, 1
-; CHECK-64-P10-NEXT: extsw 3, 4
-; CHECK-64-P10-NEXT: slwi 3, 3, 2
+; CHECK-64-P10-NEXT: slwi 3, 4, 2
; CHECK-64-P10-NEXT: vinswvlx 2, 3, 3
; CHECK-64-P10-NEXT: blr
;
; CHECK-64-LABEL: testFloat2:
; CHECK-64: # %bb.0: # %entry
; CHECK-64-NEXT: lwz 6, 0(3)
-; CHECK-64-DAG: rlwinm 4, 4, 2, 28, 29
-; CHECK-64-DAG: addi 7, 1, -16
+; CHECK-64-NEXT: addi 7, 1, -16
+; CHECK-64-NEXT: rlwinm 4, 4, 2, 28, 29
; CHECK-64-NEXT: stxv 34, -16(1)
+; CHECK-64-NEXT: rlwinm 5, 5, 2, 28, 29
; CHECK-64-NEXT: stwx 6, 7, 4
-; CHECK-64-NEXT: rlwinm 4, 5, 2, 28, 29
-; CHECK-64-NEXT: addi 5, 1, -32
+; CHECK-64-NEXT: addi 4, 1, -32
; CHECK-64-NEXT: lxv 0, -16(1)
; CHECK-64-NEXT: lwz 3, 1(3)
; CHECK-64-NEXT: stxv 0, -32(1)
-; CHECK-64-NEXT: stwx 3, 5, 4
+; CHECK-64-NEXT: stwx 3, 4, 5
; CHECK-64-NEXT: lxv 34, -32(1)
; CHECK-64-NEXT: blr
;
; CHECK-64-P10-LABEL: testFloat2:
; CHECK-64-P10: # %bb.0: # %entry
; CHECK-64-P10-NEXT: lwz 6, 0(3)
-; CHECK-64-P10-NEXT: extsw 4, 4
; CHECK-64-P10-NEXT: lwz 3, 1(3)
; CHECK-64-P10-NEXT: slwi 4, 4, 2
; CHECK-64-P10-NEXT: vinswlx 2, 4, 6
-; CHECK-64-P10-NEXT: extsw 4, 5
-; CHECK-64-P10-NEXT: slwi 4, 4, 2
+; CHECK-64-P10-NEXT: slwi 4, 5, 2
; CHECK-64-P10-NEXT: vinswlx 2, 4, 3
; CHECK-64-P10-NEXT: blr
;
; CHECK-64-LABEL: testFloat3:
; CHECK-64: # %bb.0: # %entry
; CHECK-64-NEXT: lis 6, 1
-; CHECK-64-DAG: rlwinm 4, 4, 2, 28, 29
-; CHECK-64-DAG: addi 7, 1, -16
+; CHECK-64-NEXT: addi 7, 1, -16
+; CHECK-64-NEXT: rlwinm 4, 4, 2, 28, 29
+; CHECK-64-NEXT: rlwinm 5, 5, 2, 28, 29
; CHECK-64-NEXT: lwzx 6, 3, 6
; CHECK-64-NEXT: stxv 34, -16(1)
; CHECK-64-NEXT: stwx 6, 7, 4
; CHECK-64-NEXT: lxv 0, -16(1)
; CHECK-64-NEXT: rldic 4, 4, 36, 27
; CHECK-64-NEXT: lwzx 3, 3, 4
-; CHECK-64-NEXT: rlwinm 4, 5, 2, 28, 29
-; CHECK-64-NEXT: addi 5, 1, -32
+; CHECK-64-NEXT: addi 4, 1, -32
; CHECK-64-NEXT: stxv 0, -32(1)
-; CHECK-64-NEXT: stwx 3, 5, 4
+; CHECK-64-NEXT: stwx 3, 4, 5
; CHECK-64-NEXT: lxv 34, -32(1)
; CHECK-64-NEXT: blr
;
; CHECK-64-P10-LABEL: testFloat3:
; CHECK-64-P10: # %bb.0: # %entry
; CHECK-64-P10-NEXT: plwz 6, 65536(3), 0
-; CHECK-64-P10-NEXT: extsw 4, 4
; CHECK-64-P10-NEXT: slwi 4, 4, 2
; CHECK-64-P10-NEXT: vinswlx 2, 4, 6
; CHECK-64-P10-NEXT: li 4, 1
; CHECK-64-P10-NEXT: rldic 4, 4, 36, 27
; CHECK-64-P10-NEXT: lwzx 3, 3, 4
-; CHECK-64-P10-NEXT: extsw 4, 5
-; CHECK-64-P10-NEXT: slwi 4, 4, 2
+; CHECK-64-P10-NEXT: slwi 4, 5, 2
; CHECK-64-P10-NEXT: vinswlx 2, 4, 3
; CHECK-64-P10-NEXT: blr
;
define <2 x double> @testDouble1(<2 x double> %a, double %b, i32 zeroext %idx1) {
; CHECK-64-LABEL: testDouble1:
; CHECK-64: # %bb.0: # %entry
-; CHECK-64: rlwinm 3, 4, 3, 28, 28
+; CHECK-64-NEXT: rlwinm 3, 4, 3, 28, 28
; CHECK-64-NEXT: addi 4, 1, -16
; CHECK-64-NEXT: stxv 34, -16(1)
; CHECK-64-NEXT: stfdx 1, 4, 3
;
; CHECK-64-P10-LABEL: testDouble1:
; CHECK-64-P10: # %bb.0: # %entry
-; CHECK-64-P10-NEXT: extsw 4, 4
; CHECK-64-P10-NEXT: mffprd 3, 1
; CHECK-64-P10-NEXT: rlwinm 4, 4, 3, 0, 28
; CHECK-64-P10-NEXT: vinsdlx 2, 4, 3
;
; CHECK-32-P10-LABEL: testDouble1:
; CHECK-32-P10: # %bb.0: # %entry
-; CHECK-32-P10-DAG: addi 4, 1, -16
-; CHECK-32-P10-DAG: rlwinm 3, 5, 3, 28, 28
+; CHECK-32-P10-NEXT: rlwinm 3, 5, 3, 28, 28
+; CHECK-32-P10-NEXT: addi 4, 1, -16
; CHECK-32-P10-NEXT: stxv 34, -16(1)
; CHECK-32-P10-NEXT: stfdx 1, 4, 3
; CHECK-32-P10-NEXT: lxv 34, -16(1)
; CHECK-64-LABEL: testDouble2:
; CHECK-64: # %bb.0: # %entry
; CHECK-64-NEXT: ld 6, 0(3)
-; CHECK-64-DAG: rlwinm 4, 4, 3, 28, 28
-; CHECK-64-DAG: addi 7, 1, -32
+; CHECK-64-NEXT: addi 7, 1, -32
+; CHECK-64-NEXT: rlwinm 4, 4, 3, 28, 28
; CHECK-64-NEXT: stxv 34, -32(1)
+; CHECK-64-NEXT: rlwinm 5, 5, 3, 28, 28
; CHECK-64-NEXT: stdx 6, 7, 4
; CHECK-64-NEXT: li 4, 1
; CHECK-64-NEXT: lxv 0, -32(1)
; CHECK-64-NEXT: ldx 3, 3, 4
-; CHECK-64-NEXT: rlwinm 4, 5, 3, 28, 28
-; CHECK-64-NEXT: addi 5, 1, -16
+; CHECK-64-NEXT: addi 4, 1, -16
; CHECK-64-NEXT: stxv 0, -16(1)
-; CHECK-64-NEXT: stdx 3, 5, 4
+; CHECK-64-NEXT: stdx 3, 4, 5
; CHECK-64-NEXT: lxv 34, -16(1)
; CHECK-64-NEXT: blr
;
; CHECK-64-P10-LABEL: testDouble2:
; CHECK-64-P10: # %bb.0: # %entry
; CHECK-64-P10-NEXT: ld 6, 0(3)
-; CHECK-64-P10-NEXT: extsw 4, 4
; CHECK-64-P10-NEXT: pld 3, 1(3), 0
; CHECK-64-P10-NEXT: rlwinm 4, 4, 3, 0, 28
; CHECK-64-P10-NEXT: vinsdlx 2, 4, 6
-; CHECK-64-P10-NEXT: extsw 4, 5
-; CHECK-64-P10-NEXT: rlwinm 4, 4, 3, 0, 28
+; CHECK-64-P10-NEXT: rlwinm 4, 5, 3, 0, 28
; CHECK-64-P10-NEXT: vinsdlx 2, 4, 3
; CHECK-64-P10-NEXT: blr
;
; CHECK-32-P10-LABEL: testDouble2:
; CHECK-32-P10: # %bb.0: # %entry
; CHECK-32-P10-NEXT: lfd 0, 0(3)
-; CHECK-32-P10-DAG: addi 6, 1, -32
-; CHECK-32-P10-DAG: rlwinm 4, 4, 3, 28, 28
+; CHECK-32-P10-NEXT: rlwinm 4, 4, 3, 28, 28
+; CHECK-32-P10-NEXT: addi 6, 1, -32
; CHECK-32-P10-NEXT: stxv 34, -32(1)
; CHECK-32-P10-NEXT: rlwinm 5, 5, 3, 28, 28
; CHECK-32-P10-NEXT: stfdx 0, 6, 4
; CHECK-64-LABEL: testDouble3:
; CHECK-64: # %bb.0: # %entry
; CHECK-64-NEXT: lis 6, 1
-; CHECK-64-DAG: rlwinm 4, 4, 3, 28, 28
-; CHECK-64-DAG: addi 7, 1, -32
+; CHECK-64-NEXT: addi 7, 1, -32
+; CHECK-64-NEXT: rlwinm 4, 4, 3, 28, 28
+; CHECK-64-NEXT: rlwinm 5, 5, 3, 28, 28
; CHECK-64-NEXT: ldx 6, 3, 6
; CHECK-64-NEXT: stxv 34, -32(1)
; CHECK-64-NEXT: stdx 6, 7, 4
; CHECK-64-NEXT: lxv 0, -32(1)
; CHECK-64-NEXT: rldic 4, 4, 36, 27
; CHECK-64-NEXT: ldx 3, 3, 4
-; CHECK-64-NEXT: rlwinm 4, 5, 3, 28, 28
-; CHECK-64-NEXT: addi 5, 1, -16
+; CHECK-64-NEXT: addi 4, 1, -16
; CHECK-64-NEXT: stxv 0, -16(1)
-; CHECK-64-NEXT: stdx 3, 5, 4
+; CHECK-64-NEXT: stdx 3, 4, 5
; CHECK-64-NEXT: lxv 34, -16(1)
; CHECK-64-NEXT: blr
;
; CHECK-64-P10-LABEL: testDouble3:
; CHECK-64-P10: # %bb.0: # %entry
; CHECK-64-P10-NEXT: pld 6, 65536(3), 0
-; CHECK-64-P10-NEXT: extsw 4, 4
; CHECK-64-P10-NEXT: rlwinm 4, 4, 3, 0, 28
; CHECK-64-P10-NEXT: vinsdlx 2, 4, 6
; CHECK-64-P10-NEXT: li 4, 1
; CHECK-64-P10-NEXT: rldic 4, 4, 36, 27
; CHECK-64-P10-NEXT: ldx 3, 3, 4
-; CHECK-64-P10-NEXT: extsw 4, 5
-; CHECK-64-P10-NEXT: rlwinm 4, 4, 3, 0, 28
+; CHECK-64-P10-NEXT: rlwinm 4, 5, 3, 0, 28
; CHECK-64-P10-NEXT: vinsdlx 2, 4, 3
; CHECK-64-P10-NEXT: blr
;
; CHECK-32-P10-LABEL: testDouble3:
; CHECK-32-P10: # %bb.0: # %entry
; CHECK-32-P10-NEXT: plfd 0, 65536(3), 0
-; CHECK-32-P10-DAG: addi 6, 1, -32
-; CHECK-32-P10-DAG: rlwinm 4, 4, 3, 28, 28
+; CHECK-32-P10-NEXT: rlwinm 4, 4, 3, 28, 28
+; CHECK-32-P10-NEXT: addi 6, 1, -32
; CHECK-32-P10-NEXT: stxv 34, -32(1)
; CHECK-32-P10-NEXT: rlwinm 5, 5, 3, 28, 28
; CHECK-32-P10-NEXT: stfdx 0, 6, 4
define signext i8 @getvelsc(<16 x i8> %vsc, i32 signext %i) {
; CHECK-LABEL: getvelsc:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: andi. r4, r5, 8
+; CHECK-NEXT: clrldi r4, r5, 32
; CHECK-NEXT: li r3, 7
-; CHECK-NEXT: lvsl v3, 0, r4
-; CHECK-NEXT: andc r3, r3, r5
+; CHECK-NEXT: andi. r5, r4, 8
+; CHECK-NEXT: andc r3, r3, r4
+; CHECK-NEXT: lvsl v3, 0, r5
; CHECK-NEXT: sldi r3, r3, 3
; CHECK-NEXT: vperm v2, v2, v2, v3
; CHECK-NEXT: mfvsrd r4, v2
; CHECK-LE-LABEL: getvelsc:
; CHECK-LE: # %bb.0: # %entry
; CHECK-LE-NEXT: li r3, 8
-; CHECK-LE-NEXT: andc r3, r3, r5
+; CHECK-LE-NEXT: clrldi r4, r5, 32
+; CHECK-LE-NEXT: andc r3, r3, r4
; CHECK-LE-NEXT: lvsl v3, 0, r3
; CHECK-LE-NEXT: li r3, 7
-; CHECK-LE-NEXT: and r3, r3, r5
+; CHECK-LE-NEXT: and r3, r3, r4
; CHECK-LE-NEXT: vperm v2, v2, v2, v3
; CHECK-LE-NEXT: sldi r3, r3, 3
; CHECK-LE-NEXT: mfvsrd r4, v2
;
; CHECK-AIX-LABEL: getvelsc:
; CHECK-AIX: # %bb.0: # %entry
-; CHECK-AIX-NEXT: andi. 5, 3, 8
+; CHECK-AIX-NEXT: clrldi 3, 3, 32
; CHECK-AIX-NEXT: li 4, 7
-; CHECK-AIX-NEXT: lvsl 3, 0, 5
+; CHECK-AIX-NEXT: andi. 5, 3, 8
; CHECK-AIX-NEXT: andc 3, 4, 3
+; CHECK-AIX-NEXT: lvsl 3, 0, 5
; CHECK-AIX-NEXT: sldi 3, 3, 3
; CHECK-AIX-NEXT: vperm 2, 2, 2, 3
; CHECK-AIX-NEXT: mfvsrd 4, 34
define zeroext i8 @getveluc(<16 x i8> %vuc, i32 signext %i) {
; CHECK-LABEL: getveluc:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: andi. r4, r5, 8
+; CHECK-NEXT: clrldi r4, r5, 32
; CHECK-NEXT: li r3, 7
-; CHECK-NEXT: lvsl v3, 0, r4
-; CHECK-NEXT: andc r3, r3, r5
+; CHECK-NEXT: andi. r5, r4, 8
+; CHECK-NEXT: andc r3, r3, r4
+; CHECK-NEXT: lvsl v3, 0, r5
; CHECK-NEXT: sldi r3, r3, 3
; CHECK-NEXT: vperm v2, v2, v2, v3
; CHECK-NEXT: mfvsrd r4, v2
; CHECK-LE-LABEL: getveluc:
; CHECK-LE: # %bb.0: # %entry
; CHECK-LE-NEXT: li r3, 8
-; CHECK-LE-NEXT: andc r3, r3, r5
+; CHECK-LE-NEXT: clrldi r4, r5, 32
+; CHECK-LE-NEXT: andc r3, r3, r4
; CHECK-LE-NEXT: lvsl v3, 0, r3
; CHECK-LE-NEXT: li r3, 7
-; CHECK-LE-NEXT: and r3, r3, r5
+; CHECK-LE-NEXT: and r3, r3, r4
; CHECK-LE-NEXT: vperm v2, v2, v2, v3
; CHECK-LE-NEXT: sldi r3, r3, 3
; CHECK-LE-NEXT: mfvsrd r4, v2
;
; CHECK-AIX-LABEL: getveluc:
; CHECK-AIX: # %bb.0: # %entry
-; CHECK-AIX-NEXT: andi. 5, 3, 8
+; CHECK-AIX-NEXT: clrldi 3, 3, 32
; CHECK-AIX-NEXT: li 4, 7
-; CHECK-AIX-NEXT: lvsl 3, 0, 5
+; CHECK-AIX-NEXT: andi. 5, 3, 8
; CHECK-AIX-NEXT: andc 3, 4, 3
+; CHECK-AIX-NEXT: lvsl 3, 0, 5
; CHECK-AIX-NEXT: sldi 3, 3, 3
; CHECK-AIX-NEXT: vperm 2, 2, 2, 3
; CHECK-AIX-NEXT: mfvsrd 4, 34
define signext i16 @getvelss(<8 x i16> %vss, i32 signext %i) {
; CHECK-LABEL: getvelss:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: andi. r4, r5, 4
+; CHECK-NEXT: clrldi r4, r5, 32
; CHECK-NEXT: li r3, 3
-; CHECK-NEXT: sldi r4, r4, 1
-; CHECK-NEXT: andc r3, r3, r5
-; CHECK-NEXT: lvsl v3, 0, r4
+; CHECK-NEXT: andi. r5, r4, 4
+; CHECK-NEXT: andc r3, r3, r4
+; CHECK-NEXT: sldi r5, r5, 1
; CHECK-NEXT: sldi r3, r3, 4
+; CHECK-NEXT: lvsl v3, 0, r5
; CHECK-NEXT: vperm v2, v2, v2, v3
; CHECK-NEXT: mfvsrd r4, v2
; CHECK-NEXT: srd r3, r4, r3
; CHECK-LE-LABEL: getvelss:
; CHECK-LE: # %bb.0: # %entry
; CHECK-LE-NEXT: li r3, 4
-; CHECK-LE-NEXT: andc r3, r3, r5
+; CHECK-LE-NEXT: clrldi r4, r5, 32
+; CHECK-LE-NEXT: andc r3, r3, r4
; CHECK-LE-NEXT: sldi r3, r3, 1
; CHECK-LE-NEXT: lvsl v3, 0, r3
; CHECK-LE-NEXT: li r3, 3
-; CHECK-LE-NEXT: and r3, r3, r5
+; CHECK-LE-NEXT: and r3, r3, r4
; CHECK-LE-NEXT: vperm v2, v2, v2, v3
; CHECK-LE-NEXT: sldi r3, r3, 4
; CHECK-LE-NEXT: mfvsrd r4, v2
;
; CHECK-AIX-LABEL: getvelss:
; CHECK-AIX: # %bb.0: # %entry
-; CHECK-AIX-NEXT: andi. 5, 3, 4
+; CHECK-AIX-NEXT: clrldi 3, 3, 32
; CHECK-AIX-NEXT: li 4, 3
-; CHECK-AIX-NEXT: sldi 5, 5, 1
+; CHECK-AIX-NEXT: andi. 5, 3, 4
; CHECK-AIX-NEXT: andc 3, 4, 3
-; CHECK-AIX-NEXT: lvsl 3, 0, 5
+; CHECK-AIX-NEXT: sldi 5, 5, 1
; CHECK-AIX-NEXT: sldi 3, 3, 4
+; CHECK-AIX-NEXT: lvsl 3, 0, 5
; CHECK-AIX-NEXT: vperm 2, 2, 2, 3
; CHECK-AIX-NEXT: mfvsrd 4, 34
; CHECK-AIX-NEXT: srd 3, 4, 3
define zeroext i16 @getvelus(<8 x i16> %vus, i32 signext %i) {
; CHECK-LABEL: getvelus:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: andi. r4, r5, 4
+; CHECK-NEXT: clrldi r4, r5, 32
; CHECK-NEXT: li r3, 3
-; CHECK-NEXT: sldi r4, r4, 1
-; CHECK-NEXT: andc r3, r3, r5
-; CHECK-NEXT: lvsl v3, 0, r4
+; CHECK-NEXT: andi. r5, r4, 4
+; CHECK-NEXT: andc r3, r3, r4
+; CHECK-NEXT: sldi r5, r5, 1
; CHECK-NEXT: sldi r3, r3, 4
+; CHECK-NEXT: lvsl v3, 0, r5
; CHECK-NEXT: vperm v2, v2, v2, v3
; CHECK-NEXT: mfvsrd r4, v2
; CHECK-NEXT: srd r3, r4, r3
; CHECK-LE-LABEL: getvelus:
; CHECK-LE: # %bb.0: # %entry
; CHECK-LE-NEXT: li r3, 4
-; CHECK-LE-NEXT: andc r3, r3, r5
+; CHECK-LE-NEXT: clrldi r4, r5, 32
+; CHECK-LE-NEXT: andc r3, r3, r4
; CHECK-LE-NEXT: sldi r3, r3, 1
; CHECK-LE-NEXT: lvsl v3, 0, r3
; CHECK-LE-NEXT: li r3, 3
-; CHECK-LE-NEXT: and r3, r3, r5
+; CHECK-LE-NEXT: and r3, r3, r4
; CHECK-LE-NEXT: vperm v2, v2, v2, v3
; CHECK-LE-NEXT: sldi r3, r3, 4
; CHECK-LE-NEXT: mfvsrd r4, v2
;
; CHECK-AIX-LABEL: getvelus:
; CHECK-AIX: # %bb.0: # %entry
-; CHECK-AIX-NEXT: andi. 5, 3, 4
+; CHECK-AIX-NEXT: clrldi 3, 3, 32
; CHECK-AIX-NEXT: li 4, 3
-; CHECK-AIX-NEXT: sldi 5, 5, 1
+; CHECK-AIX-NEXT: andi. 5, 3, 4
; CHECK-AIX-NEXT: andc 3, 4, 3
-; CHECK-AIX-NEXT: lvsl 3, 0, 5
+; CHECK-AIX-NEXT: sldi 5, 5, 1
; CHECK-AIX-NEXT: sldi 3, 3, 4
+; CHECK-AIX-NEXT: lvsl 3, 0, 5
; CHECK-AIX-NEXT: vperm 2, 2, 2, 3
; CHECK-AIX-NEXT: mfvsrd 4, 34
; CHECK-AIX-NEXT: srd 3, 4, 3
define signext i32 @getvelsi(<4 x i32> %vsi, i32 signext %i) {
; CHECK-LABEL: getvelsi:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: andi. r4, r5, 2
+; CHECK-NEXT: clrldi r4, r5, 32
; CHECK-NEXT: li r3, 1
-; CHECK-NEXT: sldi r4, r4, 2
-; CHECK-NEXT: andc r3, r3, r5
-; CHECK-NEXT: lvsl v3, 0, r4
+; CHECK-NEXT: andi. r5, r4, 2
+; CHECK-NEXT: andc r3, r3, r4
+; CHECK-NEXT: sldi r5, r5, 2
; CHECK-NEXT: sldi r3, r3, 5
+; CHECK-NEXT: lvsl v3, 0, r5
; CHECK-NEXT: vperm v2, v2, v2, v3
; CHECK-NEXT: mfvsrd r4, v2
; CHECK-NEXT: srd r3, r4, r3
; CHECK-LE-LABEL: getvelsi:
; CHECK-LE: # %bb.0: # %entry
; CHECK-LE-NEXT: li r3, 2
-; CHECK-LE-NEXT: andc r3, r3, r5
+; CHECK-LE-NEXT: clrldi r4, r5, 32
+; CHECK-LE-NEXT: andc r3, r3, r4
; CHECK-LE-NEXT: sldi r3, r3, 2
; CHECK-LE-NEXT: lvsl v3, 0, r3
; CHECK-LE-NEXT: li r3, 1
-; CHECK-LE-NEXT: and r3, r3, r5
+; CHECK-LE-NEXT: and r3, r3, r4
; CHECK-LE-NEXT: vperm v2, v2, v2, v3
; CHECK-LE-NEXT: sldi r3, r3, 5
; CHECK-LE-NEXT: mfvsrd r4, v2
;
; CHECK-AIX-LABEL: getvelsi:
; CHECK-AIX: # %bb.0: # %entry
-; CHECK-AIX-NEXT: andi. 5, 3, 2
+; CHECK-AIX-NEXT: clrldi 3, 3, 32
; CHECK-AIX-NEXT: li 4, 1
-; CHECK-AIX-NEXT: sldi 5, 5, 2
+; CHECK-AIX-NEXT: andi. 5, 3, 2
; CHECK-AIX-NEXT: andc 3, 4, 3
-; CHECK-AIX-NEXT: lvsl 3, 0, 5
+; CHECK-AIX-NEXT: sldi 5, 5, 2
; CHECK-AIX-NEXT: sldi 3, 3, 5
+; CHECK-AIX-NEXT: lvsl 3, 0, 5
; CHECK-AIX-NEXT: vperm 2, 2, 2, 3
; CHECK-AIX-NEXT: mfvsrd 4, 34
; CHECK-AIX-NEXT: srd 3, 4, 3
define zeroext i32 @getvelui(<4 x i32> %vui, i32 signext %i) {
; CHECK-LABEL: getvelui:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: andi. r4, r5, 2
+; CHECK-NEXT: clrldi r4, r5, 32
; CHECK-NEXT: li r3, 1
-; CHECK-NEXT: sldi r4, r4, 2
-; CHECK-NEXT: andc r3, r3, r5
-; CHECK-NEXT: lvsl v3, 0, r4
+; CHECK-NEXT: andi. r5, r4, 2
+; CHECK-NEXT: andc r3, r3, r4
+; CHECK-NEXT: sldi r5, r5, 2
; CHECK-NEXT: sldi r3, r3, 5
+; CHECK-NEXT: lvsl v3, 0, r5
; CHECK-NEXT: vperm v2, v2, v2, v3
; CHECK-NEXT: mfvsrd r4, v2
; CHECK-NEXT: srd r3, r4, r3
; CHECK-LE-LABEL: getvelui:
; CHECK-LE: # %bb.0: # %entry
; CHECK-LE-NEXT: li r3, 2
-; CHECK-LE-NEXT: andc r3, r3, r5
+; CHECK-LE-NEXT: clrldi r4, r5, 32
+; CHECK-LE-NEXT: andc r3, r3, r4
; CHECK-LE-NEXT: sldi r3, r3, 2
; CHECK-LE-NEXT: lvsl v3, 0, r3
; CHECK-LE-NEXT: li r3, 1
-; CHECK-LE-NEXT: and r3, r3, r5
+; CHECK-LE-NEXT: and r3, r3, r4
; CHECK-LE-NEXT: vperm v2, v2, v2, v3
; CHECK-LE-NEXT: sldi r3, r3, 5
; CHECK-LE-NEXT: mfvsrd r4, v2
;
; CHECK-AIX-LABEL: getvelui:
; CHECK-AIX: # %bb.0: # %entry
-; CHECK-AIX-NEXT: andi. 5, 3, 2
+; CHECK-AIX-NEXT: clrldi 3, 3, 32
; CHECK-AIX-NEXT: li 4, 1
-; CHECK-AIX-NEXT: sldi 5, 5, 2
+; CHECK-AIX-NEXT: andi. 5, 3, 2
; CHECK-AIX-NEXT: andc 3, 4, 3
-; CHECK-AIX-NEXT: lvsl 3, 0, 5
+; CHECK-AIX-NEXT: sldi 5, 5, 2
; CHECK-AIX-NEXT: sldi 3, 3, 5
+; CHECK-AIX-NEXT: lvsl 3, 0, 5
; CHECK-AIX-NEXT: vperm 2, 2, 2, 3
; CHECK-AIX-NEXT: mfvsrd 4, 34
; CHECK-AIX-NEXT: srd 3, 4, 3
define i64 @getvelsl(<2 x i64> %vsl, i32 signext %i) {
; CHECK-LABEL: getvelsl:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: andi. r3, r5, 1
+; CHECK-NEXT: clrldi r3, r5, 32
+; CHECK-NEXT: andi. r3, r3, 1
; CHECK-NEXT: sldi r3, r3, 3
; CHECK-NEXT: lvsl v3, 0, r3
; CHECK-NEXT: vperm v2, v2, v2, v3
; CHECK-LE-LABEL: getvelsl:
; CHECK-LE: # %bb.0: # %entry
; CHECK-LE-NEXT: li r3, 1
-; CHECK-LE-NEXT: andc r3, r3, r5
+; CHECK-LE-NEXT: clrldi r4, r5, 32
+; CHECK-LE-NEXT: andc r3, r3, r4
; CHECK-LE-NEXT: sldi r3, r3, 3
; CHECK-LE-NEXT: lvsl v3, 0, r3
; CHECK-LE-NEXT: vperm v2, v2, v2, v3
;
; CHECK-AIX-LABEL: getvelsl:
; CHECK-AIX: # %bb.0: # %entry
+; CHECK-AIX-NEXT: clrldi 3, 3, 32
; CHECK-AIX-NEXT: andi. 3, 3, 1
; CHECK-AIX-NEXT: sldi 3, 3, 3
; CHECK-AIX-NEXT: lvsl 3, 0, 3
define i64 @getvelul(<2 x i64> %vul, i32 signext %i) {
; CHECK-LABEL: getvelul:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: andi. r3, r5, 1
+; CHECK-NEXT: clrldi r3, r5, 32
+; CHECK-NEXT: andi. r3, r3, 1
; CHECK-NEXT: sldi r3, r3, 3
; CHECK-NEXT: lvsl v3, 0, r3
; CHECK-NEXT: vperm v2, v2, v2, v3
; CHECK-LE-LABEL: getvelul:
; CHECK-LE: # %bb.0: # %entry
; CHECK-LE-NEXT: li r3, 1
-; CHECK-LE-NEXT: andc r3, r3, r5
+; CHECK-LE-NEXT: clrldi r4, r5, 32
+; CHECK-LE-NEXT: andc r3, r3, r4
; CHECK-LE-NEXT: sldi r3, r3, 3
; CHECK-LE-NEXT: lvsl v3, 0, r3
; CHECK-LE-NEXT: vperm v2, v2, v2, v3
;
; CHECK-AIX-LABEL: getvelul:
; CHECK-AIX: # %bb.0: # %entry
+; CHECK-AIX-NEXT: clrldi 3, 3, 32
; CHECK-AIX-NEXT: andi. 3, 3, 1
; CHECK-AIX-NEXT: sldi 3, 3, 3
; CHECK-AIX-NEXT: lvsl 3, 0, 3
define float @getvelf(<4 x float> %vf, i32 signext %i) {
; CHECK-LABEL: getvelf:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: sldi r3, r5, 2
+; CHECK-NEXT: rldic r3, r5, 2, 30
; CHECK-NEXT: lvsl v3, 0, r3
; CHECK-NEXT: vperm v2, v2, v2, v3
; CHECK-NEXT: xscvspdpn f1, v2
;
; CHECK-LE-LABEL: getvelf:
; CHECK-LE: # %bb.0: # %entry
-; CHECK-LE-NEXT: xori r3, r5, 3
+; CHECK-LE-NEXT: clrldi r3, r5, 32
+; CHECK-LE-NEXT: xori r3, r3, 3
; CHECK-LE-NEXT: sldi r3, r3, 2
; CHECK-LE-NEXT: lvsl v3, 0, r3
; CHECK-LE-NEXT: vperm v2, v2, v2, v3
;
; CHECK-AIX-LABEL: getvelf:
; CHECK-AIX: # %bb.0: # %entry
-; CHECK-AIX-NEXT: sldi 3, 3, 2
+; CHECK-AIX-NEXT: rldic 3, 3, 2, 30
; CHECK-AIX-NEXT: lvsl 3, 0, 3
; CHECK-AIX-NEXT: vperm 2, 2, 2, 3
; CHECK-AIX-NEXT: xscvspdpn 1, 34
define double @getveld(<2 x double> %vd, i32 signext %i) {
; CHECK-LABEL: getveld:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: andi. r3, r5, 1
+; CHECK-NEXT: clrldi r3, r5, 32
+; CHECK-NEXT: andi. r3, r3, 1
; CHECK-NEXT: sldi r3, r3, 3
; CHECK-NEXT: lvsl v3, 0, r3
; CHECK-NEXT: vperm v2, v2, v2, v3
; CHECK-LE-LABEL: getveld:
; CHECK-LE: # %bb.0: # %entry
; CHECK-LE-NEXT: li r3, 1
-; CHECK-LE-NEXT: andc r3, r3, r5
+; CHECK-LE-NEXT: clrldi r4, r5, 32
+; CHECK-LE-NEXT: andc r3, r3, r4
; CHECK-LE-NEXT: sldi r3, r3, 3
; CHECK-LE-NEXT: lvsl v3, 0, r3
; CHECK-LE-NEXT: vperm v2, v2, v2, v3
;
; CHECK-AIX-LABEL: getveld:
; CHECK-AIX: # %bb.0: # %entry
+; CHECK-AIX-NEXT: clrldi 3, 3, 32
; CHECK-AIX-NEXT: andi. 3, 3, 1
; CHECK-AIX-NEXT: sldi 3, 3, 3
; CHECK-AIX-NEXT: lvsl 3, 0, 3
; RUN: --check-prefix=CHECK-P7
; Function Attrs: norecurse nounwind readnone
-define signext i32 @geti(<4 x i32> %a, i32 signext %b) {
+define zeroext i32 @geti(<4 x i32> %a, i32 zeroext %b) {
; CHECK-LABEL: geti:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: li 3, 2
; CHECK-NEXT: sldi 3, 3, 5
; CHECK-NEXT: mfvsrd 4, 34
; CHECK-NEXT: srd 3, 4, 3
-; CHECK-NEXT: extsw 3, 3
+; CHECK-NEXT: clrldi 3, 3, 32
; CHECK-NEXT: blr
;
; CHECK-BE-LABEL: geti:
; CHECK-BE-NEXT: vperm 2, 2, 2, 3
; CHECK-BE-NEXT: mfvsrd 4, 34
; CHECK-BE-NEXT: srd 3, 4, 3
-; CHECK-BE-NEXT: extsw 3, 3
+; CHECK-BE-NEXT: clrldi 3, 3, 32
; CHECK-BE-NEXT: blr
;
; CHECK-P7-LABEL: geti:
; CHECK-P7-NEXT: addi 3, 1, -16
; CHECK-P7-NEXT: rlwinm 4, 5, 2, 28, 29
; CHECK-P7-NEXT: stxvw4x 34, 0, 3
-; CHECK-P7-NEXT: lwax 3, 3, 4
+; CHECK-P7-NEXT: lwzx 3, 3, 4
; CHECK-P7-NEXT: blr
entry:
%vecext = extractelement <4 x i32> %a, i32 %b
}
; Function Attrs: norecurse nounwind readnone
-define i64 @getl(<2 x i64> %a, i32 signext %b) {
+define i64 @getl(<2 x i64> %a, i32 zeroext %b) {
; CHECK-LABEL: getl:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: li 3, 1
}
; Function Attrs: norecurse nounwind readnone
-define float @getf(<4 x float> %a, i32 signext %b) {
+define float @getf(<4 x float> %a, i32 zeroext %b) {
; CHECK-LABEL: getf:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xori 3, 5, 3
}
; Function Attrs: norecurse nounwind readnone
-define double @getd(<2 x double> %a, i32 signext %b) {
+define double @getd(<2 x double> %a, i32 zeroext %b) {
; CHECK-LABEL: getd:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: li 3, 1
define zeroext i8 @test1(<16 x i8> %a, i32 signext %index) {
; CHECK-LE-LABEL: test1:
; CHECK-LE: # %bb.0: # %entry
-; CHECK-LE-NEXT: vextubrx 3, 5, 2
+; CHECK-LE-NEXT: clrldi 3, 5, 32
+; CHECK-LE-NEXT: vextubrx 3, 3, 2
; CHECK-LE-NEXT: clrldi 3, 3, 56
; CHECK-LE-NEXT: blr
;
; CHECK-BE-LABEL: test1:
; CHECK-BE: # %bb.0: # %entry
-; CHECK-BE-NEXT: vextublx 3, 5, 2
+; CHECK-BE-NEXT: clrldi 3, 5, 32
+; CHECK-BE-NEXT: vextublx 3, 3, 2
; CHECK-BE-NEXT: clrldi 3, 3, 56
; CHECK-BE-NEXT: blr
define signext i8 @test2(<16 x i8> %a, i32 signext %index) {
; CHECK-LE-LABEL: test2:
; CHECK-LE: # %bb.0: # %entry
-; CHECK-LE-NEXT: vextubrx 3, 5, 2
+; CHECK-LE-NEXT: clrldi 3, 5, 32
+; CHECK-LE-NEXT: vextubrx 3, 3, 2
; CHECK-LE-NEXT: extsb 3, 3
; CHECK-LE-NEXT: blr
;
; CHECK-BE-LABEL: test2:
; CHECK-BE: # %bb.0: # %entry
-; CHECK-BE-NEXT: vextublx 3, 5, 2
+; CHECK-BE-NEXT: clrldi 3, 5, 32
+; CHECK-BE-NEXT: vextublx 3, 3, 2
; CHECK-BE-NEXT: extsb 3, 3
; CHECK-BE-NEXT: blr
define zeroext i16 @test3(<8 x i16> %a, i32 signext %index) {
; CHECK-LE-LABEL: test3:
; CHECK-LE: # %bb.0: # %entry
-; CHECK-LE-NEXT: rlwinm 3, 5, 1, 28, 30
+; CHECK-LE-NEXT: clrldi 3, 5, 32
+; CHECK-LE-NEXT: rlwinm 3, 3, 1, 28, 30
; CHECK-LE-NEXT: vextuhrx 3, 3, 2
; CHECK-LE-NEXT: clrldi 3, 3, 48
; CHECK-LE-NEXT: blr
;
; CHECK-BE-LABEL: test3:
; CHECK-BE: # %bb.0: # %entry
-; CHECK-BE-NEXT: rlwinm 3, 5, 1, 28, 30
+; CHECK-BE-NEXT: clrldi 3, 5, 32
+; CHECK-BE-NEXT: rlwinm 3, 3, 1, 28, 30
; CHECK-BE-NEXT: vextuhlx 3, 3, 2
; CHECK-BE-NEXT: clrldi 3, 3, 48
; CHECK-BE-NEXT: blr
define signext i16 @test4(<8 x i16> %a, i32 signext %index) {
; CHECK-LE-LABEL: test4:
; CHECK-LE: # %bb.0: # %entry
-; CHECK-LE-NEXT: rlwinm 3, 5, 1, 28, 30
+; CHECK-LE-NEXT: clrldi 3, 5, 32
+; CHECK-LE-NEXT: rlwinm 3, 3, 1, 28, 30
; CHECK-LE-NEXT: vextuhrx 3, 3, 2
; CHECK-LE-NEXT: extsh 3, 3
; CHECK-LE-NEXT: blr
;
; CHECK-BE-LABEL: test4:
; CHECK-BE: # %bb.0: # %entry
-; CHECK-BE-NEXT: rlwinm 3, 5, 1, 28, 30
+; CHECK-BE-NEXT: clrldi 3, 5, 32
+; CHECK-BE-NEXT: rlwinm 3, 3, 1, 28, 30
; CHECK-BE-NEXT: vextuhlx 3, 3, 2
; CHECK-BE-NEXT: extsh 3, 3
; CHECK-BE-NEXT: blr
define zeroext i32 @test5(<4 x i32> %a, i32 signext %index) {
; CHECK-LE-LABEL: test5:
; CHECK-LE: # %bb.0: # %entry
-; CHECK-LE-NEXT: rlwinm 3, 5, 2, 28, 29
+; CHECK-LE-NEXT: clrldi 3, 5, 32
+; CHECK-LE-NEXT: rlwinm 3, 3, 2, 28, 29
; CHECK-LE-NEXT: vextuwrx 3, 3, 2
; CHECK-LE-NEXT: blr
;
; CHECK-BE-LABEL: test5:
; CHECK-BE: # %bb.0: # %entry
-; CHECK-BE-NEXT: rlwinm 3, 5, 2, 28, 29
+; CHECK-BE-NEXT: clrldi 3, 5, 32
+; CHECK-BE-NEXT: rlwinm 3, 3, 2, 28, 29
; CHECK-BE-NEXT: vextuwlx 3, 3, 2
; CHECK-BE-NEXT: blr
define signext i32 @test6(<4 x i32> %a, i32 signext %index) {
; CHECK-LE-LABEL: test6:
; CHECK-LE: # %bb.0: # %entry
-; CHECK-LE-NEXT: rlwinm 3, 5, 2, 28, 29
+; CHECK-LE-NEXT: clrldi 3, 5, 32
+; CHECK-LE-NEXT: rlwinm 3, 3, 2, 28, 29
; CHECK-LE-NEXT: vextuwrx 3, 3, 2
; CHECK-LE-NEXT: extsw 3, 3
; CHECK-LE-NEXT: blr
;
; CHECK-BE-LABEL: test6:
; CHECK-BE: # %bb.0: # %entry
-; CHECK-BE-NEXT: rlwinm 3, 5, 2, 28, 29
+; CHECK-BE-NEXT: clrldi 3, 5, 32
+; CHECK-BE-NEXT: rlwinm 3, 3, 2, 28, 29
; CHECK-BE-NEXT: vextuwlx 3, 3, 2
; CHECK-BE-NEXT: extsw 3, 3
; CHECK-BE-NEXT: blr
define zeroext i8 @test_add1(<16 x i8> %a, i32 signext %index, i8 zeroext %c) {
; CHECK-LE-LABEL: test_add1:
; CHECK-LE: # %bb.0: # %entry
-; CHECK-LE-NEXT: vextubrx 3, 5, 2
+; CHECK-LE-NEXT: clrldi 3, 5, 32
+; CHECK-LE-NEXT: vextubrx 3, 3, 2
; CHECK-LE-NEXT: add 3, 3, 6
; CHECK-LE-NEXT: clrldi 3, 3, 56
; CHECK-LE-NEXT: blr
;
; CHECK-BE-LABEL: test_add1:
; CHECK-BE: # %bb.0: # %entry
-; CHECK-BE-NEXT: vextublx 3, 5, 2
+; CHECK-BE-NEXT: clrldi 3, 5, 32
+; CHECK-BE-NEXT: vextublx 3, 3, 2
; CHECK-BE-NEXT: add 3, 3, 6
; CHECK-BE-NEXT: clrldi 3, 3, 56
; CHECK-BE-NEXT: blr
define signext i8 @test_add2(<16 x i8> %a, i32 signext %index, i8 signext %c) {
; CHECK-LE-LABEL: test_add2:
; CHECK-LE: # %bb.0: # %entry
-; CHECK-LE-NEXT: vextubrx 3, 5, 2
+; CHECK-LE-NEXT: clrldi 3, 5, 32
+; CHECK-LE-NEXT: vextubrx 3, 3, 2
; CHECK-LE-NEXT: add 3, 3, 6
; CHECK-LE-NEXT: extsb 3, 3
; CHECK-LE-NEXT: blr
;
; CHECK-BE-LABEL: test_add2:
; CHECK-BE: # %bb.0: # %entry
-; CHECK-BE-NEXT: vextublx 3, 5, 2
+; CHECK-BE-NEXT: clrldi 3, 5, 32
+; CHECK-BE-NEXT: vextublx 3, 3, 2
; CHECK-BE-NEXT: add 3, 3, 6
; CHECK-BE-NEXT: extsb 3, 3
; CHECK-BE-NEXT: blr
define zeroext i16 @test_add3(<8 x i16> %a, i32 signext %index, i16 zeroext %c) {
; CHECK-LE-LABEL: test_add3:
; CHECK-LE: # %bb.0: # %entry
-; CHECK-LE-NEXT: rlwinm 3, 5, 1, 28, 30
+; CHECK-LE-NEXT: clrldi 3, 5, 32
+; CHECK-LE-NEXT: rlwinm 3, 3, 1, 28, 30
; CHECK-LE-NEXT: vextuhrx 3, 3, 2
; CHECK-LE-NEXT: add 3, 3, 6
; CHECK-LE-NEXT: clrldi 3, 3, 48
;
; CHECK-BE-LABEL: test_add3:
; CHECK-BE: # %bb.0: # %entry
-; CHECK-BE-NEXT: rlwinm 3, 5, 1, 28, 30
+; CHECK-BE-NEXT: clrldi 3, 5, 32
+; CHECK-BE-NEXT: rlwinm 3, 3, 1, 28, 30
; CHECK-BE-NEXT: vextuhlx 3, 3, 2
; CHECK-BE-NEXT: add 3, 3, 6
; CHECK-BE-NEXT: clrldi 3, 3, 48
define signext i16 @test_add4(<8 x i16> %a, i32 signext %index, i16 signext %c) {
; CHECK-LE-LABEL: test_add4:
; CHECK-LE: # %bb.0: # %entry
-; CHECK-LE-NEXT: rlwinm 3, 5, 1, 28, 30
+; CHECK-LE-NEXT: clrldi 3, 5, 32
+; CHECK-LE-NEXT: rlwinm 3, 3, 1, 28, 30
; CHECK-LE-NEXT: vextuhrx 3, 3, 2
; CHECK-LE-NEXT: add 3, 3, 6
; CHECK-LE-NEXT: extsh 3, 3
;
; CHECK-BE-LABEL: test_add4:
; CHECK-BE: # %bb.0: # %entry
-; CHECK-BE-NEXT: rlwinm 3, 5, 1, 28, 30
+; CHECK-BE-NEXT: clrldi 3, 5, 32
+; CHECK-BE-NEXT: rlwinm 3, 3, 1, 28, 30
; CHECK-BE-NEXT: vextuhlx 3, 3, 2
; CHECK-BE-NEXT: add 3, 3, 6
; CHECK-BE-NEXT: extsh 3, 3
define zeroext i32 @test_add5(<4 x i32> %a, i32 signext %index, i32 zeroext %c) {
; CHECK-LE-LABEL: test_add5:
; CHECK-LE: # %bb.0: # %entry
-; CHECK-LE-NEXT: rlwinm 3, 5, 2, 28, 29
+; CHECK-LE-NEXT: clrldi 3, 5, 32
+; CHECK-LE-NEXT: rlwinm 3, 3, 2, 28, 29
; CHECK-LE-NEXT: vextuwrx 3, 3, 2
; CHECK-LE-NEXT: add 3, 3, 6
; CHECK-LE-NEXT: clrldi 3, 3, 32
;
; CHECK-BE-LABEL: test_add5:
; CHECK-BE: # %bb.0: # %entry
-; CHECK-BE-NEXT: rlwinm 3, 5, 2, 28, 29
+; CHECK-BE-NEXT: clrldi 3, 5, 32
+; CHECK-BE-NEXT: rlwinm 3, 3, 2, 28, 29
; CHECK-BE-NEXT: vextuwlx 3, 3, 2
; CHECK-BE-NEXT: add 3, 3, 6
; CHECK-BE-NEXT: clrldi 3, 3, 32
define signext i32 @test_add6(<4 x i32> %a, i32 signext %index, i32 signext %c) {
; CHECK-LE-LABEL: test_add6:
; CHECK-LE: # %bb.0: # %entry
-; CHECK-LE-NEXT: rlwinm 3, 5, 2, 28, 29
+; CHECK-LE-NEXT: clrldi 3, 5, 32
+; CHECK-LE-NEXT: rlwinm 3, 3, 2, 28, 29
; CHECK-LE-NEXT: vextuwrx 3, 3, 2
; CHECK-LE-NEXT: add 3, 3, 6
; CHECK-LE-NEXT: extsw 3, 3
;
; CHECK-BE-LABEL: test_add6:
; CHECK-BE: # %bb.0: # %entry
-; CHECK-BE-NEXT: rlwinm 3, 5, 2, 28, 29
+; CHECK-BE-NEXT: clrldi 3, 5, 32
+; CHECK-BE-NEXT: rlwinm 3, 3, 2, 28, 29
; CHECK-BE-NEXT: vextuwlx 3, 3, 2
; CHECK-BE-NEXT: add 3, 3, 6
; CHECK-BE-NEXT: extsw 3, 3
; CHECK-LABEL: testFloat1:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xscvdpspn v3, f1
-; CHECK-NEXT: extsw r3, r6
-; CHECK-NEXT: slwi r3, r3, 2
+; CHECK-NEXT: slwi r3, r6, 2
; CHECK-NEXT: vinswvrx v2, r3, v3
; CHECK-NEXT: blr
;
; CHECK-BE-LABEL: testFloat1:
; CHECK-BE: # %bb.0: # %entry
; CHECK-BE-NEXT: xscvdpspn v3, f1
-; CHECK-BE-NEXT: extsw r3, r6
-; CHECK-BE-NEXT: slwi r3, r3, 2
+; CHECK-BE-NEXT: slwi r3, r6, 2
; CHECK-BE-NEXT: vinswvlx v2, r3, v3
; CHECK-BE-NEXT: blr
;
; CHECK-LABEL: testFloat2:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: lwz r3, 0(r5)
-; CHECK-NEXT: extsw r4, r6
-; CHECK-NEXT: slwi r4, r4, 2
+; CHECK-NEXT: slwi r4, r6, 2
; CHECK-NEXT: vinswrx v2, r4, r3
; CHECK-NEXT: lwz r3, 1(r5)
-; CHECK-NEXT: extsw r4, r7
-; CHECK-NEXT: slwi r4, r4, 2
+; CHECK-NEXT: slwi r4, r7, 2
; CHECK-NEXT: vinswrx v2, r4, r3
; CHECK-NEXT: blr
;
; CHECK-BE-LABEL: testFloat2:
; CHECK-BE: # %bb.0: # %entry
; CHECK-BE-NEXT: lwz r3, 0(r5)
-; CHECK-BE-NEXT: extsw r4, r6
-; CHECK-BE-NEXT: slwi r4, r4, 2
+; CHECK-BE-NEXT: slwi r4, r6, 2
; CHECK-BE-NEXT: vinswlx v2, r4, r3
; CHECK-BE-NEXT: lwz r3, 1(r5)
-; CHECK-BE-NEXT: extsw r4, r7
-; CHECK-BE-NEXT: slwi r4, r4, 2
+; CHECK-BE-NEXT: slwi r4, r7, 2
; CHECK-BE-NEXT: vinswlx v2, r4, r3
; CHECK-BE-NEXT: blr
;
; CHECK-P9-LABEL: testFloat2:
; CHECK-P9: # %bb.0: # %entry
-; CHECK-P9-NEXT: lwz r3, 0(r5)
; CHECK-P9-NEXT: rlwinm r4, r6, 2, 28, 29
-; CHECK-P9-NEXT: addi r6, r1, -16
+; CHECK-P9-NEXT: lwz r6, 0(r5)
+; CHECK-P9-NEXT: rlwinm r3, r7, 2, 28, 29
+; CHECK-P9-NEXT: addi r7, r1, -16
; CHECK-P9-NEXT: stxv v2, -16(r1)
-; CHECK-P9-NEXT: stwx r3, r6, r4
-; CHECK-P9-NEXT: rlwinm r4, r7, 2, 28, 29
+; CHECK-P9-NEXT: stwx r6, r7, r4
; CHECK-P9-NEXT: lxv vs0, -16(r1)
-; CHECK-P9-NEXT: lwz r3, 1(r5)
+; CHECK-P9-NEXT: lwz r4, 1(r5)
; CHECK-P9-NEXT: addi r5, r1, -32
; CHECK-P9-NEXT: stxv vs0, -32(r1)
-; CHECK-P9-NEXT: stwx r3, r5, r4
+; CHECK-P9-NEXT: stwx r4, r5, r3
; CHECK-P9-NEXT: lxv v2, -32(r1)
; CHECK-P9-NEXT: blr
;
-; AIX-P8-64-LABEL: testFloat2:
-; AIX-P8-64: # %bb.0: # %entry
-; AIX-P8-64-NEXT: lwz r7, 0(r3)
-; AIX-P8-64-NEXT: addi r6, r1, -32
-; AIX-P8-64-NEXT: rlwinm r4, r4, 2, 28, 29
-; AIX-P8-64-NEXT: rlwinm r5, r5, 2, 28, 29
-; AIX-P8-64-NEXT: stxvw4x v2, 0, r6
-; AIX-P8-64-NEXT: stwx r7, r6, r4
-; AIX-P8-64-NEXT: addi r4, r1, -16
-; AIX-P8-64-NEXT: lxvw4x vs0, 0, r6
-; AIX-P8-64-NEXT: lwz r3, 1(r3)
-; AIX-P8-64-NEXT: stxvw4x vs0, 0, r4
-; AIX-P8-64-NEXT: stwx r3, r4, r5
-; AIX-P8-64-NEXT: lxvw4x v2, 0, r4
-; AIX-P8-64-NEXT: blr
-;
-; AIX-P8-32-LABEL: testFloat2:
-; AIX-P8-32: # %bb.0: # %entry
-; AIX-P8-32-NEXT: lwz r7, 0(r3)
-; AIX-P8-32-NEXT: addi r6, r1, -32
-; AIX-P8-32-NEXT: rlwinm r4, r4, 2, 28, 29
-; AIX-P8-32-NEXT: stxvw4x v2, 0, r6
-; AIX-P8-32-NEXT: stwx r7, r6, r4
-; AIX-P8-32-NEXT: rlwinm r4, r5, 2, 28, 29
-; AIX-P8-32-NEXT: addi r5, r1, -16
-; AIX-P8-32-NEXT: lxvw4x vs0, 0, r6
-; AIX-P8-32-NEXT: lwz r3, 1(r3)
-; AIX-P8-32-NEXT: stxvw4x vs0, 0, r5
-; AIX-P8-32-NEXT: stwx r3, r5, r4
-; AIX-P8-32-NEXT: lxvw4x v2, 0, r5
-; AIX-P8-32-NEXT: blr
+; AIX-P8-LABEL: testFloat2:
+; AIX-P8: # %bb.0: # %entry
+; AIX-P8-NEXT: lwz r7, 0(r3)
+; AIX-P8-NEXT: addi r6, r1, -32
+; AIX-P8-NEXT: rlwinm r4, r4, 2, 28, 29
+; AIX-P8-NEXT: stxvw4x v2, 0, r6
+; AIX-P8-NEXT: stwx r7, r6, r4
+; AIX-P8-NEXT: rlwinm r4, r5, 2, 28, 29
+; AIX-P8-NEXT: addi r5, r1, -16
+; AIX-P8-NEXT: lxvw4x vs0, 0, r6
+; AIX-P8-NEXT: lwz r3, 1(r3)
+; AIX-P8-NEXT: stxvw4x vs0, 0, r5
+; AIX-P8-NEXT: stwx r3, r5, r4
+; AIX-P8-NEXT: lxvw4x v2, 0, r5
+; AIX-P8-NEXT: blr
entry:
%add.ptr1 = getelementptr inbounds i8, ptr %b, i64 1
%0 = load float, ptr %b, align 4
; CHECK-LABEL: testFloat3:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: plwz r3, 65536(r5), 0
-; CHECK-NEXT: extsw r4, r6
-; CHECK-NEXT: slwi r4, r4, 2
+; CHECK-NEXT: slwi r4, r6, 2
; CHECK-NEXT: vinswrx v2, r4, r3
; CHECK-NEXT: li r3, 1
-; CHECK-NEXT: extsw r4, r7
+; CHECK-NEXT: slwi r4, r7, 2
; CHECK-NEXT: rldic r3, r3, 36, 27
-; CHECK-NEXT: slwi r4, r4, 2
; CHECK-NEXT: lwzx r3, r5, r3
; CHECK-NEXT: vinswrx v2, r4, r3
; CHECK-NEXT: blr
; CHECK-BE-LABEL: testFloat3:
; CHECK-BE: # %bb.0: # %entry
; CHECK-BE-NEXT: plwz r3, 65536(r5), 0
-; CHECK-BE-NEXT: extsw r4, r6
-; CHECK-BE-NEXT: slwi r4, r4, 2
+; CHECK-BE-NEXT: slwi r4, r6, 2
; CHECK-BE-NEXT: vinswlx v2, r4, r3
; CHECK-BE-NEXT: li r3, 1
-; CHECK-BE-NEXT: extsw r4, r7
+; CHECK-BE-NEXT: slwi r4, r7, 2
; CHECK-BE-NEXT: rldic r3, r3, 36, 27
-; CHECK-BE-NEXT: slwi r4, r4, 2
; CHECK-BE-NEXT: lwzx r3, r5, r3
; CHECK-BE-NEXT: vinswlx v2, r4, r3
; CHECK-BE-NEXT: blr
;
; CHECK-P9-LABEL: testFloat3:
; CHECK-P9: # %bb.0: # %entry
-; CHECK-P9-NEXT: lis r3, 1
; CHECK-P9-NEXT: rlwinm r4, r6, 2, 28, 29
-; CHECK-P9-NEXT: addi r6, r1, -16
-; CHECK-P9-NEXT: lwzx r3, r5, r3
+; CHECK-P9-NEXT: lis r6, 1
+; CHECK-P9-NEXT: rlwinm r3, r7, 2, 28, 29
+; CHECK-P9-NEXT: addi r7, r1, -16
+; CHECK-P9-NEXT: lwzx r6, r5, r6
; CHECK-P9-NEXT: stxv v2, -16(r1)
-; CHECK-P9-NEXT: stwx r3, r6, r4
-; CHECK-P9-NEXT: li r3, 1
-; CHECK-P9-NEXT: rlwinm r4, r7, 2, 28, 29
+; CHECK-P9-NEXT: stwx r6, r7, r4
+; CHECK-P9-NEXT: li r4, 1
; CHECK-P9-NEXT: lxv vs0, -16(r1)
-; CHECK-P9-NEXT: rldic r3, r3, 36, 27
-; CHECK-P9-NEXT: lwzx r3, r5, r3
+; CHECK-P9-NEXT: rldic r4, r4, 36, 27
+; CHECK-P9-NEXT: lwzx r4, r5, r4
; CHECK-P9-NEXT: addi r5, r1, -32
; CHECK-P9-NEXT: stxv vs0, -32(r1)
-; CHECK-P9-NEXT: stwx r3, r5, r4
+; CHECK-P9-NEXT: stwx r4, r5, r3
; CHECK-P9-NEXT: lxv v2, -32(r1)
; CHECK-P9-NEXT: blr
;
; AIX-P8-64-LABEL: testFloat3:
; AIX-P8-64: # %bb.0: # %entry
; AIX-P8-64-NEXT: lis r6, 1
-; AIX-P8-64-NEXT: addi r7, r1, -32
; AIX-P8-64-NEXT: rlwinm r4, r4, 2, 28, 29
-; AIX-P8-64-NEXT: rlwinm r5, r5, 2, 28, 29
+; AIX-P8-64-NEXT: addi r7, r1, -32
; AIX-P8-64-NEXT: lwzx r6, r3, r6
; AIX-P8-64-NEXT: stxvw4x v2, 0, r7
; AIX-P8-64-NEXT: stwx r6, r7, r4
; AIX-P8-64-NEXT: lxvw4x vs0, 0, r7
; AIX-P8-64-NEXT: rldic r4, r4, 36, 27
; AIX-P8-64-NEXT: lwzx r3, r3, r4
-; AIX-P8-64-NEXT: addi r4, r1, -16
-; AIX-P8-64-NEXT: stxvw4x vs0, 0, r4
-; AIX-P8-64-NEXT: stwx r3, r4, r5
-; AIX-P8-64-NEXT: lxvw4x v2, 0, r4
+; AIX-P8-64-NEXT: rlwinm r4, r5, 2, 28, 29
+; AIX-P8-64-NEXT: addi r5, r1, -16
+; AIX-P8-64-NEXT: stxvw4x vs0, 0, r5
+; AIX-P8-64-NEXT: stwx r3, r5, r4
+; AIX-P8-64-NEXT: lxvw4x v2, 0, r5
; AIX-P8-64-NEXT: blr
;
; AIX-P8-32-LABEL: testFloat3:
define <2 x double> @testDouble1(<2 x double> %a, double %b, i32 zeroext %idx1) {
; CHECK-LABEL: testDouble1:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: extsw r4, r6
; CHECK-NEXT: mffprd r3, f1
-; CHECK-NEXT: rlwinm r4, r4, 3, 0, 28
+; CHECK-NEXT: rlwinm r4, r6, 3, 0, 28
; CHECK-NEXT: vinsdrx v2, r4, r3
; CHECK-NEXT: blr
;
; CHECK-BE-LABEL: testDouble1:
; CHECK-BE: # %bb.0: # %entry
-; CHECK-BE-NEXT: extsw r4, r6
; CHECK-BE-NEXT: mffprd r3, f1
-; CHECK-BE-NEXT: rlwinm r4, r4, 3, 0, 28
+; CHECK-BE-NEXT: rlwinm r4, r6, 3, 0, 28
; CHECK-BE-NEXT: vinsdlx v2, r4, r3
; CHECK-BE-NEXT: blr
;
; CHECK-LABEL: testDouble2:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: ld r3, 0(r5)
-; CHECK-NEXT: extsw r4, r6
-; CHECK-NEXT: rlwinm r4, r4, 3, 0, 28
+; CHECK-NEXT: rlwinm r4, r6, 3, 0, 28
; CHECK-NEXT: vinsdrx v2, r4, r3
; CHECK-NEXT: pld r3, 1(r5), 0
-; CHECK-NEXT: extsw r4, r7
-; CHECK-NEXT: rlwinm r4, r4, 3, 0, 28
+; CHECK-NEXT: rlwinm r4, r7, 3, 0, 28
; CHECK-NEXT: vinsdrx v2, r4, r3
; CHECK-NEXT: blr
;
; CHECK-BE-LABEL: testDouble2:
; CHECK-BE: # %bb.0: # %entry
; CHECK-BE-NEXT: ld r3, 0(r5)
-; CHECK-BE-NEXT: extsw r4, r6
-; CHECK-BE-NEXT: rlwinm r4, r4, 3, 0, 28
+; CHECK-BE-NEXT: rlwinm r4, r6, 3, 0, 28
; CHECK-BE-NEXT: vinsdlx v2, r4, r3
; CHECK-BE-NEXT: pld r3, 1(r5), 0
-; CHECK-BE-NEXT: extsw r4, r7
-; CHECK-BE-NEXT: rlwinm r4, r4, 3, 0, 28
+; CHECK-BE-NEXT: rlwinm r4, r7, 3, 0, 28
; CHECK-BE-NEXT: vinsdlx v2, r4, r3
; CHECK-BE-NEXT: blr
;
; CHECK-P9-LABEL: testDouble2:
; CHECK-P9: # %bb.0: # %entry
-; CHECK-P9-NEXT: ld r3, 0(r5)
; CHECK-P9-NEXT: rlwinm r4, r6, 3, 28, 28
-; CHECK-P9-NEXT: addi r6, r1, -32
+; CHECK-P9-NEXT: ld r6, 0(r5)
+; CHECK-P9-NEXT: rlwinm r3, r7, 3, 28, 28
+; CHECK-P9-NEXT: addi r7, r1, -32
; CHECK-P9-NEXT: stxv v2, -32(r1)
-; CHECK-P9-NEXT: stdx r3, r6, r4
-; CHECK-P9-NEXT: li r3, 1
-; CHECK-P9-NEXT: rlwinm r4, r7, 3, 28, 28
+; CHECK-P9-NEXT: stdx r6, r7, r4
+; CHECK-P9-NEXT: li r4, 1
; CHECK-P9-NEXT: lxv vs0, -32(r1)
-; CHECK-P9-NEXT: ldx r3, r5, r3
+; CHECK-P9-NEXT: ldx r4, r5, r4
; CHECK-P9-NEXT: addi r5, r1, -16
; CHECK-P9-NEXT: stxv vs0, -16(r1)
-; CHECK-P9-NEXT: stdx r3, r5, r4
+; CHECK-P9-NEXT: stdx r4, r5, r3
; CHECK-P9-NEXT: lxv v2, -16(r1)
; CHECK-P9-NEXT: blr
;
; AIX-P8-64-NEXT: ld r7, 0(r3)
; AIX-P8-64-NEXT: addi r6, r1, -32
; AIX-P8-64-NEXT: rlwinm r4, r4, 3, 28, 28
-; AIX-P8-64-NEXT: rlwinm r5, r5, 3, 28, 28
; AIX-P8-64-NEXT: stxvd2x v2, 0, r6
; AIX-P8-64-NEXT: stdx r7, r6, r4
; AIX-P8-64-NEXT: li r4, 1
; AIX-P8-64-NEXT: lxvd2x vs0, 0, r6
; AIX-P8-64-NEXT: ldx r3, r3, r4
-; AIX-P8-64-NEXT: addi r4, r1, -16
-; AIX-P8-64-NEXT: stxvd2x vs0, 0, r4
-; AIX-P8-64-NEXT: stdx r3, r4, r5
-; AIX-P8-64-NEXT: lxvd2x v2, 0, r4
+; AIX-P8-64-NEXT: rlwinm r4, r5, 3, 28, 28
+; AIX-P8-64-NEXT: addi r5, r1, -16
+; AIX-P8-64-NEXT: stxvd2x vs0, 0, r5
+; AIX-P8-64-NEXT: stdx r3, r5, r4
+; AIX-P8-64-NEXT: lxvd2x v2, 0, r5
; AIX-P8-64-NEXT: blr
;
; AIX-P8-32-LABEL: testDouble2:
; CHECK-LABEL: testDouble3:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: pld r3, 65536(r5), 0
-; CHECK-NEXT: extsw r4, r6
-; CHECK-NEXT: rlwinm r4, r4, 3, 0, 28
+; CHECK-NEXT: rlwinm r4, r6, 3, 0, 28
; CHECK-NEXT: vinsdrx v2, r4, r3
; CHECK-NEXT: li r3, 1
-; CHECK-NEXT: extsw r4, r7
+; CHECK-NEXT: rlwinm r4, r7, 3, 0, 28
; CHECK-NEXT: rldic r3, r3, 36, 27
-; CHECK-NEXT: rlwinm r4, r4, 3, 0, 28
; CHECK-NEXT: ldx r3, r5, r3
; CHECK-NEXT: vinsdrx v2, r4, r3
; CHECK-NEXT: blr
; CHECK-BE-LABEL: testDouble3:
; CHECK-BE: # %bb.0: # %entry
; CHECK-BE-NEXT: pld r3, 65536(r5), 0
-; CHECK-BE-NEXT: extsw r4, r6
-; CHECK-BE-NEXT: rlwinm r4, r4, 3, 0, 28
+; CHECK-BE-NEXT: rlwinm r4, r6, 3, 0, 28
; CHECK-BE-NEXT: vinsdlx v2, r4, r3
; CHECK-BE-NEXT: li r3, 1
-; CHECK-BE-NEXT: extsw r4, r7
+; CHECK-BE-NEXT: rlwinm r4, r7, 3, 0, 28
; CHECK-BE-NEXT: rldic r3, r3, 36, 27
-; CHECK-BE-NEXT: rlwinm r4, r4, 3, 0, 28
; CHECK-BE-NEXT: ldx r3, r5, r3
; CHECK-BE-NEXT: vinsdlx v2, r4, r3
; CHECK-BE-NEXT: blr
;
; CHECK-P9-LABEL: testDouble3:
; CHECK-P9: # %bb.0: # %entry
-; CHECK-P9-NEXT: lis r3, 1
; CHECK-P9-NEXT: rlwinm r4, r6, 3, 28, 28
-; CHECK-P9-NEXT: addi r6, r1, -32
-; CHECK-P9-NEXT: ldx r3, r5, r3
+; CHECK-P9-NEXT: lis r6, 1
+; CHECK-P9-NEXT: rlwinm r3, r7, 3, 28, 28
+; CHECK-P9-NEXT: addi r7, r1, -32
+; CHECK-P9-NEXT: ldx r6, r5, r6
; CHECK-P9-NEXT: stxv v2, -32(r1)
-; CHECK-P9-NEXT: stdx r3, r6, r4
-; CHECK-P9-NEXT: li r3, 1
-; CHECK-P9-NEXT: rlwinm r4, r7, 3, 28, 28
+; CHECK-P9-NEXT: stdx r6, r7, r4
+; CHECK-P9-NEXT: li r4, 1
; CHECK-P9-NEXT: lxv vs0, -32(r1)
-; CHECK-P9-NEXT: rldic r3, r3, 36, 27
-; CHECK-P9-NEXT: ldx r3, r5, r3
+; CHECK-P9-NEXT: rldic r4, r4, 36, 27
+; CHECK-P9-NEXT: ldx r4, r5, r4
; CHECK-P9-NEXT: addi r5, r1, -16
; CHECK-P9-NEXT: stxv vs0, -16(r1)
-; CHECK-P9-NEXT: stdx r3, r5, r4
+; CHECK-P9-NEXT: stdx r4, r5, r3
; CHECK-P9-NEXT: lxv v2, -16(r1)
; CHECK-P9-NEXT: blr
;
; AIX-P8-64-LABEL: testDouble3:
; AIX-P8-64: # %bb.0: # %entry
; AIX-P8-64-NEXT: lis r6, 1
-; AIX-P8-64-NEXT: addi r7, r1, -32
; AIX-P8-64-NEXT: rlwinm r4, r4, 3, 28, 28
+; AIX-P8-64-NEXT: addi r7, r1, -32
; AIX-P8-64-NEXT: li r8, 1
-; AIX-P8-64-NEXT: rlwinm r5, r5, 3, 28, 28
; AIX-P8-64-NEXT: ldx r6, r3, r6
; AIX-P8-64-NEXT: stxvd2x v2, 0, r7
; AIX-P8-64-NEXT: stdx r6, r7, r4
; AIX-P8-64-NEXT: rldic r4, r8, 36, 27
; AIX-P8-64-NEXT: lxvd2x vs0, 0, r7
; AIX-P8-64-NEXT: ldx r3, r3, r4
-; AIX-P8-64-NEXT: addi r4, r1, -16
-; AIX-P8-64-NEXT: stxvd2x vs0, 0, r4
-; AIX-P8-64-NEXT: stdx r3, r4, r5
-; AIX-P8-64-NEXT: lxvd2x v2, 0, r4
+; AIX-P8-64-NEXT: rlwinm r4, r5, 3, 28, 28
+; AIX-P8-64-NEXT: addi r5, r1, -16
+; AIX-P8-64-NEXT: stxvd2x vs0, 0, r5
+; AIX-P8-64-NEXT: stdx r3, r5, r4
+; AIX-P8-64-NEXT: lxvd2x v2, 0, r5
; AIX-P8-64-NEXT: blr
;
; AIX-P8-32-LABEL: testDouble3:
ret half %r
}
-define half @extractelt_nxv1f16_idx(<vscale x 1 x half> %v, i32 signext %idx) {
+define half @extractelt_nxv1f16_idx(<vscale x 1 x half> %v, i32 zeroext %idx) {
; CHECK-LABEL: extractelt_nxv1f16_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
ret half %r
}
-define half @extractelt_nxv2f16_idx(<vscale x 2 x half> %v, i32 signext %idx) {
+define half @extractelt_nxv2f16_idx(<vscale x 2 x half> %v, i32 zeroext %idx) {
; CHECK-LABEL: extractelt_nxv2f16_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
ret half %r
}
-define half @extractelt_nxv4f16_idx(<vscale x 4 x half> %v, i32 signext %idx) {
+define half @extractelt_nxv4f16_idx(<vscale x 4 x half> %v, i32 zeroext %idx) {
; CHECK-LABEL: extractelt_nxv4f16_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
ret half %r
}
-define half @extractelt_nxv8f16_idx(<vscale x 8 x half> %v, i32 signext %idx) {
+define half @extractelt_nxv8f16_idx(<vscale x 8 x half> %v, i32 zeroext %idx) {
; CHECK-LABEL: extractelt_nxv8f16_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 1, e16, m2, ta, ma
ret half %r
}
-define half @extractelt_nxv16f16_idx(<vscale x 16 x half> %v, i32 signext %idx) {
+define half @extractelt_nxv16f16_idx(<vscale x 16 x half> %v, i32 zeroext %idx) {
; CHECK-LABEL: extractelt_nxv16f16_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 1, e16, m4, ta, ma
ret half %r
}
-define half @extractelt_nxv32f16_idx(<vscale x 32 x half> %v, i32 signext %idx) {
+define half @extractelt_nxv32f16_idx(<vscale x 32 x half> %v, i32 zeroext %idx) {
; CHECK-LABEL: extractelt_nxv32f16_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 1, e16, m8, ta, ma
ret float %r
}
-define float @extractelt_nxv1f32_idx(<vscale x 1 x float> %v, i32 signext %idx) {
+define float @extractelt_nxv1f32_idx(<vscale x 1 x float> %v, i32 zeroext %idx) {
; CHECK-LABEL: extractelt_nxv1f32_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
ret float %r
}
-define float @extractelt_nxv2f32_idx(<vscale x 2 x float> %v, i32 signext %idx) {
+define float @extractelt_nxv2f32_idx(<vscale x 2 x float> %v, i32 zeroext %idx) {
; CHECK-LABEL: extractelt_nxv2f32_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
ret float %r
}
-define float @extractelt_nxv4f32_idx(<vscale x 4 x float> %v, i32 signext %idx) {
+define float @extractelt_nxv4f32_idx(<vscale x 4 x float> %v, i32 zeroext %idx) {
; CHECK-LABEL: extractelt_nxv4f32_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 1, e32, m2, ta, ma
ret float %r
}
-define float @extractelt_nxv8f32_idx(<vscale x 8 x float> %v, i32 signext %idx) {
+define float @extractelt_nxv8f32_idx(<vscale x 8 x float> %v, i32 zeroext %idx) {
; CHECK-LABEL: extractelt_nxv8f32_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 1, e32, m4, ta, ma
ret float %r
}
-define float @extractelt_nxv16f32_idx(<vscale x 16 x float> %v, i32 signext %idx) {
+define float @extractelt_nxv16f32_idx(<vscale x 16 x float> %v, i32 zeroext %idx) {
; CHECK-LABEL: extractelt_nxv16f32_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 1, e32, m8, ta, ma
ret double %r
}
-define double @extractelt_nxv1f64_idx(<vscale x 1 x double> %v, i32 signext %idx) {
+define double @extractelt_nxv1f64_idx(<vscale x 1 x double> %v, i32 zeroext %idx) {
; CHECK-LABEL: extractelt_nxv1f64_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma
ret double %r
}
-define double @extractelt_nxv2f64_idx(<vscale x 2 x double> %v, i32 signext %idx) {
+define double @extractelt_nxv2f64_idx(<vscale x 2 x double> %v, i32 zeroext %idx) {
; CHECK-LABEL: extractelt_nxv2f64_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 1, e64, m2, ta, ma
ret double %r
}
-define double @extractelt_nxv4f64_idx(<vscale x 4 x double> %v, i32 signext %idx) {
+define double @extractelt_nxv4f64_idx(<vscale x 4 x double> %v, i32 zeroext %idx) {
; CHECK-LABEL: extractelt_nxv4f64_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 1, e64, m4, ta, ma
ret double %r
}
-define double @extractelt_nxv8f64_idx(<vscale x 8 x double> %v, i32 signext %idx) {
+define double @extractelt_nxv8f64_idx(<vscale x 8 x double> %v, i32 zeroext %idx) {
; CHECK-LABEL: extractelt_nxv8f64_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 1, e64, m8, ta, ma
}
define double @extractelt_nxv16f64_neg1(<vscale x 16 x double> %v) {
-; CHECK-LABEL: extractelt_nxv16f64_neg1:
-; CHECK: # %bb.0:
-; CHECK-NEXT: addi sp, sp, -64
-; CHECK-NEXT: .cfi_def_cfa_offset 64
-; CHECK-NEXT: addi s0, sp, 64
-; CHECK-NEXT: .cfi_def_cfa s0, 0
-; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a0, a0, 4
-; CHECK-NEXT: sub sp, sp, a0
-; CHECK-NEXT: andi sp, sp, -64
-; CHECK-NEXT: addi a0, sp, 64
-; CHECK-NEXT: vs8r.v v8, (a0)
-; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: slli a2, a1, 3
-; CHECK-NEXT: add a2, a0, a2
-; CHECK-NEXT: vs8r.v v16, (a2)
-; CHECK-NEXT: slli a1, a1, 4
-; CHECK-NEXT: add a0, a1, a0
-; CHECK-NEXT: fld fa0, -8(a0)
-; CHECK-NEXT: addi sp, s0, -64
-; CHECK-NEXT: addi sp, sp, 64
-; CHECK-NEXT: ret
%r = extractelement <vscale x 16 x double> %v, i32 -1
ret double %r
}
ret double %r
}
-define double @extractelt_nxv16f64_idx(<vscale x 16 x double> %v, i32 signext %idx) {
+define double @extractelt_nxv16f64_idx(<vscale x 16 x double> %v, i32 zeroext %idx) {
; CHECK-LABEL: extractelt_nxv16f64_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: csrr a1, vlenb
ret i8 %r
}
-define signext i8 @extractelt_nxv1i8_idx(<vscale x 1 x i8> %v, i32 signext %idx) {
+define signext i8 @extractelt_nxv1i8_idx(<vscale x 1 x i8> %v, i32 zeroext %idx) {
; CHECK-LABEL: extractelt_nxv1i8_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma
ret i8 %r
}
-define signext i8 @extractelt_nxv2i8_idx(<vscale x 2 x i8> %v, i32 signext %idx) {
+define signext i8 @extractelt_nxv2i8_idx(<vscale x 2 x i8> %v, i32 zeroext %idx) {
; CHECK-LABEL: extractelt_nxv2i8_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
ret i8 %r
}
-define signext i8 @extractelt_nxv4i8_idx(<vscale x 4 x i8> %v, i32 signext %idx) {
+define signext i8 @extractelt_nxv4i8_idx(<vscale x 4 x i8> %v, i32 zeroext %idx) {
; CHECK-LABEL: extractelt_nxv4i8_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 1, e8, mf2, ta, ma
ret i8 %r
}
-define signext i8 @extractelt_nxv8i8_idx(<vscale x 8 x i8> %v, i32 signext %idx) {
+define signext i8 @extractelt_nxv8i8_idx(<vscale x 8 x i8> %v, i32 zeroext %idx) {
; CHECK-LABEL: extractelt_nxv8i8_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
ret i8 %r
}
-define signext i8 @extractelt_nxv16i8_idx(<vscale x 16 x i8> %v, i32 signext %idx) {
+define signext i8 @extractelt_nxv16i8_idx(<vscale x 16 x i8> %v, i32 zeroext %idx) {
; CHECK-LABEL: extractelt_nxv16i8_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 1, e8, m2, ta, ma
ret i8 %r
}
-define signext i8 @extractelt_nxv32i8_idx(<vscale x 32 x i8> %v, i32 signext %idx) {
+define signext i8 @extractelt_nxv32i8_idx(<vscale x 32 x i8> %v, i32 zeroext %idx) {
; CHECK-LABEL: extractelt_nxv32i8_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 1, e8, m4, ta, ma
ret i8 %r
}
-define signext i8 @extractelt_nxv64i8_idx(<vscale x 64 x i8> %v, i32 signext %idx) {
+define signext i8 @extractelt_nxv64i8_idx(<vscale x 64 x i8> %v, i32 zeroext %idx) {
; CHECK-LABEL: extractelt_nxv64i8_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 1, e8, m8, ta, ma
ret i16 %r
}
-define signext i16 @extractelt_nxv1i16_idx(<vscale x 1 x i16> %v, i32 signext %idx) {
+define signext i16 @extractelt_nxv1i16_idx(<vscale x 1 x i16> %v, i32 zeroext %idx) {
; CHECK-LABEL: extractelt_nxv1i16_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
ret i16 %r
}
-define signext i16 @extractelt_nxv2i16_idx(<vscale x 2 x i16> %v, i32 signext %idx) {
+define signext i16 @extractelt_nxv2i16_idx(<vscale x 2 x i16> %v, i32 zeroext %idx) {
; CHECK-LABEL: extractelt_nxv2i16_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
ret i16 %r
}
-define signext i16 @extractelt_nxv4i16_idx(<vscale x 4 x i16> %v, i32 signext %idx) {
+define signext i16 @extractelt_nxv4i16_idx(<vscale x 4 x i16> %v, i32 zeroext %idx) {
; CHECK-LABEL: extractelt_nxv4i16_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
ret i16 %r
}
-define signext i16 @extractelt_nxv8i16_idx(<vscale x 8 x i16> %v, i32 signext %idx) {
+define signext i16 @extractelt_nxv8i16_idx(<vscale x 8 x i16> %v, i32 zeroext %idx) {
; CHECK-LABEL: extractelt_nxv8i16_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 1, e16, m2, ta, ma
ret i16 %r
}
-define signext i16 @extractelt_nxv16i16_idx(<vscale x 16 x i16> %v, i32 signext %idx) {
+define signext i16 @extractelt_nxv16i16_idx(<vscale x 16 x i16> %v, i32 zeroext %idx) {
; CHECK-LABEL: extractelt_nxv16i16_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 1, e16, m4, ta, ma
ret i16 %r
}
-define signext i16 @extractelt_nxv32i16_idx(<vscale x 32 x i16> %v, i32 signext %idx) {
+define signext i16 @extractelt_nxv32i16_idx(<vscale x 32 x i16> %v, i32 zeroext %idx) {
; CHECK-LABEL: extractelt_nxv32i16_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 1, e16, m8, ta, ma
ret i32 %r
}
-define signext i32 @extractelt_nxv1i32_idx(<vscale x 1 x i32> %v, i32 signext %idx) {
+define signext i32 @extractelt_nxv1i32_idx(<vscale x 1 x i32> %v, i32 zeroext %idx) {
; CHECK-LABEL: extractelt_nxv1i32_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
ret i32 %r
}
-define signext i32 @extractelt_nxv2i32_idx(<vscale x 2 x i32> %v, i32 signext %idx) {
+define signext i32 @extractelt_nxv2i32_idx(<vscale x 2 x i32> %v, i32 zeroext %idx) {
; CHECK-LABEL: extractelt_nxv2i32_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
ret i32 %r
}
-define signext i32 @extractelt_nxv4i32_idx(<vscale x 4 x i32> %v, i32 signext %idx) {
+define signext i32 @extractelt_nxv4i32_idx(<vscale x 4 x i32> %v, i32 zeroext %idx) {
; CHECK-LABEL: extractelt_nxv4i32_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 1, e32, m2, ta, ma
ret i32 %r
}
-define signext i32 @extractelt_nxv8i32_idx(<vscale x 8 x i32> %v, i32 signext %idx) {
+define signext i32 @extractelt_nxv8i32_idx(<vscale x 8 x i32> %v, i32 zeroext %idx) {
; CHECK-LABEL: extractelt_nxv8i32_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 1, e32, m4, ta, ma
ret i32 %r
}
-define signext i32 @extractelt_nxv16i32_idx(<vscale x 16 x i32> %v, i32 signext %idx) {
+define signext i32 @extractelt_nxv16i32_idx(<vscale x 16 x i32> %v, i32 zeroext %idx) {
; CHECK-LABEL: extractelt_nxv16i32_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 1, e32, m8, ta, ma
ret i64 %r
}
-define i64 @extractelt_nxv1i64_idx(<vscale x 1 x i64> %v, i32 signext %idx) {
+define i64 @extractelt_nxv1i64_idx(<vscale x 1 x i64> %v, i32 zeroext %idx) {
; CHECK-LABEL: extractelt_nxv1i64_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma
ret i64 %r
}
-define i64 @extractelt_nxv2i64_idx(<vscale x 2 x i64> %v, i32 signext %idx) {
+define i64 @extractelt_nxv2i64_idx(<vscale x 2 x i64> %v, i32 zeroext %idx) {
; CHECK-LABEL: extractelt_nxv2i64_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 1, e64, m2, ta, ma
ret i64 %r
}
-define i64 @extractelt_nxv4i64_idx(<vscale x 4 x i64> %v, i32 signext %idx) {
+define i64 @extractelt_nxv4i64_idx(<vscale x 4 x i64> %v, i32 zeroext %idx) {
; CHECK-LABEL: extractelt_nxv4i64_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 1, e64, m4, ta, ma
ret i64 %r
}
-define i64 @extractelt_nxv8i64_idx(<vscale x 8 x i64> %v, i32 signext %idx) {
+define i64 @extractelt_nxv8i64_idx(<vscale x 8 x i64> %v, i32 zeroext %idx) {
; CHECK-LABEL: extractelt_nxv8i64_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 1, e64, m8, ta, ma
; CHECK-NEXT: andi sp, sp, -64
; CHECK-NEXT: addi a0, sp, 64
; CHECK-NEXT: vs8r.v v8, (a0)
-; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: slli a2, a1, 3
-; CHECK-NEXT: add a2, a0, a2
-; CHECK-NEXT: vs8r.v v16, (a2)
-; CHECK-NEXT: slli a1, a1, 4
-; CHECK-NEXT: add a0, a1, a0
-; CHECK-NEXT: ld a0, -8(a0)
+; CHECK-NEXT: csrr a2, vlenb
+; CHECK-NEXT: slli a1, a2, 3
+; CHECK-NEXT: add a3, a0, a1
+; CHECK-NEXT: li a1, -1
+; CHECK-NEXT: srli a1, a1, 32
+; CHECK-NEXT: slli a2, a2, 1
+; CHECK-NEXT: addi a2, a2, -1
+; CHECK-NEXT: vs8r.v v16, (a3)
+; CHECK-NEXT: bltu a2, a1, .LBB72_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: mv a2, a1
+; CHECK-NEXT: .LBB72_2:
+; CHECK-NEXT: slli a1, a2, 3
+; CHECK-NEXT: add a0, a0, a1
+; CHECK-NEXT: ld a0, 0(a0)
; CHECK-NEXT: addi sp, s0, -64
; CHECK-NEXT: addi sp, sp, 64
; CHECK-NEXT: ret
ret i64 %r
}
-define i64 @extractelt_nxv16i64_idx(<vscale x 16 x i64> %v, i32 signext %idx) {
+define i64 @extractelt_nxv16i64_idx(<vscale x 16 x i64> %v, i32 zeroext %idx) {
; CHECK-LABEL: extractelt_nxv16i64_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: csrr a1, vlenb
ret i64 %b
}
-define i8 @extractelt_v16i8_idx(<16 x i8>* %x, i32 signext %idx) nounwind {
+define i8 @extractelt_v16i8_idx(<16 x i8>* %x, i32 zeroext %idx) nounwind {
; CHECK-LABEL: extractelt_v16i8_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
ret i8 %b
}
-define i16 @extractelt_v8i16_idx(<8 x i16>* %x, i32 signext %idx) nounwind {
+define i16 @extractelt_v8i16_idx(<8 x i16>* %x, i32 zeroext %idx) nounwind {
; CHECK-LABEL: extractelt_v8i16_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
ret i16 %b
}
-define i32 @extractelt_v4i32_idx(<4 x i32>* %x, i32 signext %idx) nounwind {
+define i32 @extractelt_v4i32_idx(<4 x i32>* %x, i32 zeroext %idx) nounwind {
; CHECK-LABEL: extractelt_v4i32_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
ret i32 %c
}
-define i64 @extractelt_v2i64_idx(<2 x i64>* %x, i32 signext %idx) nounwind {
+define i64 @extractelt_v2i64_idx(<2 x i64>* %x, i32 zeroext %idx) nounwind {
; RV32-LABEL: extractelt_v2i64_idx:
; RV32: # %bb.0:
; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
ret i64 %c
}
-define half @extractelt_v8f16_idx(<8 x half>* %x, i32 signext %idx) nounwind {
+define half @extractelt_v8f16_idx(<8 x half>* %x, i32 zeroext %idx) nounwind {
; CHECK-LABEL: extractelt_v8f16_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
ret half %c
}
-define float @extractelt_v4f32_idx(<4 x float>* %x, i32 signext %idx) nounwind {
+define float @extractelt_v4f32_idx(<4 x float>* %x, i32 zeroext %idx) nounwind {
; CHECK-LABEL: extractelt_v4f32_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
ret float %c
}
-define double @extractelt_v2f64_idx(<2 x double>* %x, i32 signext %idx) nounwind {
+define double @extractelt_v2f64_idx(<2 x double>* %x, i32 zeroext %idx) nounwind {
; CHECK-LABEL: extractelt_v2f64_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
ret double %c
}
-define i8 @extractelt_v32i8_idx(<32 x i8>* %x, i32 signext %idx) nounwind {
+define i8 @extractelt_v32i8_idx(<32 x i8>* %x, i32 zeroext %idx) nounwind {
; CHECK-LABEL: extractelt_v32i8_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: li a2, 32
ret i8 %b
}
-define i16 @extractelt_v16i16_idx(<16 x i16>* %x, i32 signext %idx) nounwind {
+define i16 @extractelt_v16i16_idx(<16 x i16>* %x, i32 zeroext %idx) nounwind {
; CHECK-LABEL: extractelt_v16i16_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma
ret i16 %b
}
-define i32 @extractelt_v8i32_idx(<8 x i32>* %x, i32 signext %idx) nounwind {
+define i32 @extractelt_v8i32_idx(<8 x i32>* %x, i32 zeroext %idx) nounwind {
; CHECK-LABEL: extractelt_v8i32_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
ret i32 %c
}
-define i64 @extractelt_v4i64_idx(<4 x i64>* %x, i32 signext %idx) nounwind {
+define i64 @extractelt_v4i64_idx(<4 x i64>* %x, i32 zeroext %idx) nounwind {
; RV32-LABEL: extractelt_v4i64_idx:
; RV32: # %bb.0:
; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
ret i64 %c
}
-define half @extractelt_v16f16_idx(<16 x half>* %x, i32 signext %idx) nounwind {
+define half @extractelt_v16f16_idx(<16 x half>* %x, i32 zeroext %idx) nounwind {
; CHECK-LABEL: extractelt_v16f16_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma
ret half %c
}
-define float @extractelt_v8f32_idx(<8 x float>* %x, i32 signext %idx) nounwind {
+define float @extractelt_v8f32_idx(<8 x float>* %x, i32 zeroext %idx) nounwind {
; CHECK-LABEL: extractelt_v8f32_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
ret float %c
}
-define double @extractelt_v4f64_idx(<4 x double>* %x, i32 signext %idx) nounwind {
+define double @extractelt_v4f64_idx(<4 x double>* %x, i32 zeroext %idx) nounwind {
; CHECK-LABEL: extractelt_v4f64_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
}
; This uses a non-power of 2 type so that it isn't an MVT to catch an
-; incorrect use of getSimpleValueType_idx(, i32 signext %idx).
+; incorrect use of getSimpleValueType_idx(, i32 zeroext %idx).
; NOTE: Type legalization is bitcasting to vXi32 and doing 2 independent
; slidedowns and extracts.
-define i64 @extractelt_v3i64_idx(<3 x i64>* %x, i32 signext %idx) nounwind {
+define i64 @extractelt_v3i64_idx(<3 x i64>* %x, i32 zeroext %idx) nounwind {
; RV32-LABEL: extractelt_v3i64_idx:
; RV32: # %bb.0:
; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
}
define <1 x i1> @insertelt_idx_v1i1(<1 x i1> %x, i1 %elt, i32 zeroext %idx) nounwind {
-; RV32-LABEL: insertelt_idx_v1i1:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetivli zero, 1, e8, mf8, ta, ma
-; RV32-NEXT: vmv.s.x v8, a0
-; RV32-NEXT: vmv.v.i v9, 0
-; RV32-NEXT: vmerge.vim v9, v9, 1, v0
-; RV32-NEXT: addi a0, a1, 1
-; RV32-NEXT: vsetvli zero, a0, e8, mf8, tu, ma
-; RV32-NEXT: vslideup.vx v9, v8, a1
-; RV32-NEXT: vsetivli zero, 1, e8, mf8, ta, ma
-; RV32-NEXT: vand.vi v8, v9, 1
-; RV32-NEXT: vmsne.vi v0, v8, 0
-; RV32-NEXT: ret
-;
-; RV64-LABEL: insertelt_idx_v1i1:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 1, e8, mf8, ta, ma
-; RV64-NEXT: vmv.s.x v8, a0
-; RV64-NEXT: vmv.v.i v9, 0
-; RV64-NEXT: vmerge.vim v9, v9, 1, v0
-; RV64-NEXT: sext.w a0, a1
-; RV64-NEXT: addi a1, a0, 1
-; RV64-NEXT: vsetvli zero, a1, e8, mf8, tu, ma
-; RV64-NEXT: vslideup.vx v9, v8, a0
-; RV64-NEXT: vsetivli zero, 1, e8, mf8, ta, ma
-; RV64-NEXT: vand.vi v8, v9, 1
-; RV64-NEXT: vmsne.vi v0, v8, 0
-; RV64-NEXT: ret
+; CHECK-LABEL: insertelt_idx_v1i1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma
+; CHECK-NEXT: vmv.s.x v8, a0
+; CHECK-NEXT: vmv.v.i v9, 0
+; CHECK-NEXT: vmerge.vim v9, v9, 1, v0
+; CHECK-NEXT: addi a0, a1, 1
+; CHECK-NEXT: vsetvli zero, a0, e8, mf8, tu, ma
+; CHECK-NEXT: vslideup.vx v9, v8, a1
+; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma
+; CHECK-NEXT: vand.vi v8, v9, 1
+; CHECK-NEXT: vmsne.vi v0, v8, 0
+; CHECK-NEXT: ret
%y = insertelement <1 x i1> %x, i1 %elt, i32 %idx
ret <1 x i1> %y
}
}
define <2 x i1> @insertelt_idx_v2i1(<2 x i1> %x, i1 %elt, i32 zeroext %idx) nounwind {
-; RV32-LABEL: insertelt_idx_v2i1:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
-; RV32-NEXT: vmv.s.x v8, a0
-; RV32-NEXT: vmv.v.i v9, 0
-; RV32-NEXT: vmerge.vim v9, v9, 1, v0
-; RV32-NEXT: addi a0, a1, 1
-; RV32-NEXT: vsetvli zero, a0, e8, mf8, tu, ma
-; RV32-NEXT: vslideup.vx v9, v8, a1
-; RV32-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
-; RV32-NEXT: vand.vi v8, v9, 1
-; RV32-NEXT: vmsne.vi v0, v8, 0
-; RV32-NEXT: ret
-;
-; RV64-LABEL: insertelt_idx_v2i1:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
-; RV64-NEXT: vmv.s.x v8, a0
-; RV64-NEXT: vmv.v.i v9, 0
-; RV64-NEXT: vmerge.vim v9, v9, 1, v0
-; RV64-NEXT: sext.w a0, a1
-; RV64-NEXT: addi a1, a0, 1
-; RV64-NEXT: vsetvli zero, a1, e8, mf8, tu, ma
-; RV64-NEXT: vslideup.vx v9, v8, a0
-; RV64-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
-; RV64-NEXT: vand.vi v8, v9, 1
-; RV64-NEXT: vmsne.vi v0, v8, 0
-; RV64-NEXT: ret
+; CHECK-LABEL: insertelt_idx_v2i1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
+; CHECK-NEXT: vmv.s.x v8, a0
+; CHECK-NEXT: vmv.v.i v9, 0
+; CHECK-NEXT: vmerge.vim v9, v9, 1, v0
+; CHECK-NEXT: addi a0, a1, 1
+; CHECK-NEXT: vsetvli zero, a0, e8, mf8, tu, ma
+; CHECK-NEXT: vslideup.vx v9, v8, a1
+; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
+; CHECK-NEXT: vand.vi v8, v9, 1
+; CHECK-NEXT: vmsne.vi v0, v8, 0
+; CHECK-NEXT: ret
%y = insertelement <2 x i1> %x, i1 %elt, i32 %idx
ret <2 x i1> %y
}
}
define <8 x i1> @insertelt_idx_v8i1(<8 x i1> %x, i1 %elt, i32 zeroext %idx) nounwind {
-; RV32-LABEL: insertelt_idx_v8i1:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
-; RV32-NEXT: vmv.s.x v8, a0
-; RV32-NEXT: vmv.v.i v9, 0
-; RV32-NEXT: vmerge.vim v9, v9, 1, v0
-; RV32-NEXT: addi a0, a1, 1
-; RV32-NEXT: vsetvli zero, a0, e8, mf2, tu, ma
-; RV32-NEXT: vslideup.vx v9, v8, a1
-; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
-; RV32-NEXT: vand.vi v8, v9, 1
-; RV32-NEXT: vmsne.vi v0, v8, 0
-; RV32-NEXT: ret
-;
-; RV64-LABEL: insertelt_idx_v8i1:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
-; RV64-NEXT: vmv.s.x v8, a0
-; RV64-NEXT: vmv.v.i v9, 0
-; RV64-NEXT: vmerge.vim v9, v9, 1, v0
-; RV64-NEXT: sext.w a0, a1
-; RV64-NEXT: addi a1, a0, 1
-; RV64-NEXT: vsetvli zero, a1, e8, mf2, tu, ma
-; RV64-NEXT: vslideup.vx v9, v8, a0
-; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
-; RV64-NEXT: vand.vi v8, v9, 1
-; RV64-NEXT: vmsne.vi v0, v8, 0
-; RV64-NEXT: ret
+; CHECK-LABEL: insertelt_idx_v8i1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
+; CHECK-NEXT: vmv.s.x v8, a0
+; CHECK-NEXT: vmv.v.i v9, 0
+; CHECK-NEXT: vmerge.vim v9, v9, 1, v0
+; CHECK-NEXT: addi a0, a1, 1
+; CHECK-NEXT: vsetvli zero, a0, e8, mf2, tu, ma
+; CHECK-NEXT: vslideup.vx v9, v8, a1
+; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
+; CHECK-NEXT: vand.vi v8, v9, 1
+; CHECK-NEXT: vmsne.vi v0, v8, 0
+; CHECK-NEXT: ret
%y = insertelement <8 x i1> %x, i1 %elt, i32 %idx
ret <8 x i1> %y
}
}
define <64 x i1> @insertelt_idx_v64i1(<64 x i1> %x, i1 %elt, i32 zeroext %idx) nounwind {
-; RV32-LABEL: insertelt_idx_v64i1:
-; RV32: # %bb.0:
-; RV32-NEXT: li a2, 64
-; RV32-NEXT: vsetvli zero, a2, e8, m4, ta, ma
-; RV32-NEXT: vmv.s.x v8, a0
-; RV32-NEXT: vmv.v.i v12, 0
-; RV32-NEXT: vmerge.vim v12, v12, 1, v0
-; RV32-NEXT: addi a0, a1, 1
-; RV32-NEXT: vsetvli zero, a0, e8, m4, tu, ma
-; RV32-NEXT: vslideup.vx v12, v8, a1
-; RV32-NEXT: vsetvli zero, a2, e8, m4, ta, ma
-; RV32-NEXT: vand.vi v8, v12, 1
-; RV32-NEXT: vmsne.vi v0, v8, 0
-; RV32-NEXT: ret
-;
-; RV64-LABEL: insertelt_idx_v64i1:
-; RV64: # %bb.0:
-; RV64-NEXT: li a2, 64
-; RV64-NEXT: vsetvli zero, a2, e8, m4, ta, ma
-; RV64-NEXT: vmv.s.x v8, a0
-; RV64-NEXT: vmv.v.i v12, 0
-; RV64-NEXT: vmerge.vim v12, v12, 1, v0
-; RV64-NEXT: sext.w a0, a1
-; RV64-NEXT: addi a1, a0, 1
-; RV64-NEXT: vsetvli zero, a1, e8, m4, tu, ma
-; RV64-NEXT: vslideup.vx v12, v8, a0
-; RV64-NEXT: vsetvli zero, a2, e8, m4, ta, ma
-; RV64-NEXT: vand.vi v8, v12, 1
-; RV64-NEXT: vmsne.vi v0, v8, 0
-; RV64-NEXT: ret
+; CHECK-LABEL: insertelt_idx_v64i1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a2, 64
+; CHECK-NEXT: vsetvli zero, a2, e8, m4, ta, ma
+; CHECK-NEXT: vmv.s.x v8, a0
+; CHECK-NEXT: vmv.v.i v12, 0
+; CHECK-NEXT: vmerge.vim v12, v12, 1, v0
+; CHECK-NEXT: addi a0, a1, 1
+; CHECK-NEXT: vsetvli zero, a0, e8, m4, tu, ma
+; CHECK-NEXT: vslideup.vx v12, v8, a1
+; CHECK-NEXT: vsetvli zero, a2, e8, m4, ta, ma
+; CHECK-NEXT: vand.vi v8, v12, 1
+; CHECK-NEXT: vmsne.vi v0, v8, 0
+; CHECK-NEXT: ret
%y = insertelement <64 x i1> %x, i1 %elt, i32 %idx
ret <64 x i1> %y
}
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; RV32: {{.*}}
+; RV64: {{.*}}
; RV64-NEXT: vsetvli zero, a3, e16, m4, ta, ma
; RV64-NEXT: vle16.v v8, (a0)
; RV64-NEXT: vmv.s.x v12, a1
-; RV64-NEXT: sext.w a1, a2
+; RV64-NEXT: slli a1, a2, 32
+; RV64-NEXT: srli a1, a1, 32
; RV64-NEXT: addi a2, a1, 1
; RV64-NEXT: vsetvli zero, a2, e16, m4, tu, ma
; RV64-NEXT: vslideup.vx v8, v12, a1
; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; RV64-NEXT: vle32.v v8, (a0)
; RV64-NEXT: vfmv.s.f v10, fa0
-; RV64-NEXT: sext.w a1, a1
+; RV64-NEXT: slli a1, a1, 32
+; RV64-NEXT: srli a1, a1, 32
; RV64-NEXT: addi a2, a1, 1
; RV64-NEXT: vsetvli zero, a2, e32, m2, tu, ma
; RV64-NEXT: vslideup.vx v8, v10, a1
; RV64-NEXT: vle64.v v8, (a0)
; RV64-NEXT: li a2, -1
; RV64-NEXT: vmv.s.x v12, a2
-; RV64-NEXT: sext.w a1, a1
+; RV64-NEXT: slli a1, a1, 32
+; RV64-NEXT: srli a1, a1, 32
; RV64-NEXT: addi a2, a1, 1
; RV64-NEXT: vsetvli zero, a2, e64, m4, tu, ma
; RV64-NEXT: vslideup.vx v8, v12, a1
; RV64-NEXT: vle64.v v8, (a0)
; RV64-NEXT: li a2, 6
; RV64-NEXT: vmv.s.x v12, a2
-; RV64-NEXT: sext.w a1, a1
+; RV64-NEXT: slli a1, a1, 32
+; RV64-NEXT: srli a1, a1, 32
; RV64-NEXT: addi a2, a1, 1
; RV64-NEXT: vsetvli zero, a2, e64, m4, tu, ma
; RV64-NEXT: vslideup.vx v8, v12, a1
ret <vscale x 1 x half> %r
}
-define <vscale x 1 x half> @insertelt_nxv1f16_idx(<vscale x 1 x half> %v, half %elt, i32 signext %idx) {
+define <vscale x 1 x half> @insertelt_nxv1f16_idx(<vscale x 1 x half> %v, half %elt, i32 zeroext %idx) {
; CHECK-LABEL: insertelt_nxv1f16_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
ret <vscale x 2 x half> %r
}
-define <vscale x 2 x half> @insertelt_nxv2f16_idx(<vscale x 2 x half> %v, half %elt, i32 signext %idx) {
+define <vscale x 2 x half> @insertelt_nxv2f16_idx(<vscale x 2 x half> %v, half %elt, i32 zeroext %idx) {
; CHECK-LABEL: insertelt_nxv2f16_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
ret <vscale x 4 x half> %r
}
-define <vscale x 4 x half> @insertelt_nxv4f16_idx(<vscale x 4 x half> %v, half %elt, i32 signext %idx) {
+define <vscale x 4 x half> @insertelt_nxv4f16_idx(<vscale x 4 x half> %v, half %elt, i32 zeroext %idx) {
; CHECK-LABEL: insertelt_nxv4f16_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma
ret <vscale x 8 x half> %r
}
-define <vscale x 8 x half> @insertelt_nxv8f16_idx(<vscale x 8 x half> %v, half %elt, i32 signext %idx) {
+define <vscale x 8 x half> @insertelt_nxv8f16_idx(<vscale x 8 x half> %v, half %elt, i32 zeroext %idx) {
; CHECK-LABEL: insertelt_nxv8f16_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma
ret <vscale x 16 x half> %r
}
-define <vscale x 16 x half> @insertelt_nxv16f16_idx(<vscale x 16 x half> %v, half %elt, i32 signext %idx) {
+define <vscale x 16 x half> @insertelt_nxv16f16_idx(<vscale x 16 x half> %v, half %elt, i32 zeroext %idx) {
; CHECK-LABEL: insertelt_nxv16f16_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma
ret <vscale x 32 x half> %r
}
-define <vscale x 32 x half> @insertelt_nxv32f16_idx(<vscale x 32 x half> %v, half %elt, i32 signext %idx) {
+define <vscale x 32 x half> @insertelt_nxv32f16_idx(<vscale x 32 x half> %v, half %elt, i32 zeroext %idx) {
; CHECK-LABEL: insertelt_nxv32f16_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a1, zero, e16, m8, ta, ma
ret <vscale x 1 x float> %r
}
-define <vscale x 1 x float> @insertelt_nxv1f32_idx(<vscale x 1 x float> %v, float %elt, i32 signext %idx) {
+define <vscale x 1 x float> @insertelt_nxv1f32_idx(<vscale x 1 x float> %v, float %elt, i32 zeroext %idx) {
; CHECK-LABEL: insertelt_nxv1f32_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a1, zero, e32, mf2, ta, ma
ret <vscale x 2 x float> %r
}
-define <vscale x 2 x float> @insertelt_nxv2f32_idx(<vscale x 2 x float> %v, float %elt, i32 signext %idx) {
+define <vscale x 2 x float> @insertelt_nxv2f32_idx(<vscale x 2 x float> %v, float %elt, i32 zeroext %idx) {
; CHECK-LABEL: insertelt_nxv2f32_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma
ret <vscale x 4 x float> %r
}
-define <vscale x 4 x float> @insertelt_nxv4f32_idx(<vscale x 4 x float> %v, float %elt, i32 signext %idx) {
+define <vscale x 4 x float> @insertelt_nxv4f32_idx(<vscale x 4 x float> %v, float %elt, i32 zeroext %idx) {
; CHECK-LABEL: insertelt_nxv4f32_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a1, zero, e32, m2, ta, ma
ret <vscale x 8 x float> %r
}
-define <vscale x 8 x float> @insertelt_nxv8f32_idx(<vscale x 8 x float> %v, float %elt, i32 signext %idx) {
+define <vscale x 8 x float> @insertelt_nxv8f32_idx(<vscale x 8 x float> %v, float %elt, i32 zeroext %idx) {
; CHECK-LABEL: insertelt_nxv8f32_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a1, zero, e32, m4, ta, ma
ret <vscale x 16 x float> %r
}
-define <vscale x 16 x float> @insertelt_nxv16f32_idx(<vscale x 16 x float> %v, float %elt, i32 signext %idx) {
+define <vscale x 16 x float> @insertelt_nxv16f32_idx(<vscale x 16 x float> %v, float %elt, i32 zeroext %idx) {
; CHECK-LABEL: insertelt_nxv16f32_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a1, zero, e32, m8, ta, ma
ret <vscale x 1 x double> %r
}
-define <vscale x 1 x double> @insertelt_nxv1f64_idx(<vscale x 1 x double> %v, double %elt, i32 signext %idx) {
+define <vscale x 1 x double> @insertelt_nxv1f64_idx(<vscale x 1 x double> %v, double %elt, i32 zeroext %idx) {
; CHECK-LABEL: insertelt_nxv1f64_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a1, zero, e64, m1, ta, ma
ret <vscale x 2 x double> %r
}
-define <vscale x 2 x double> @insertelt_nxv2f64_idx(<vscale x 2 x double> %v, double %elt, i32 signext %idx) {
+define <vscale x 2 x double> @insertelt_nxv2f64_idx(<vscale x 2 x double> %v, double %elt, i32 zeroext %idx) {
; CHECK-LABEL: insertelt_nxv2f64_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a1, zero, e64, m2, ta, ma
ret <vscale x 4 x double> %r
}
-define <vscale x 4 x double> @insertelt_nxv4f64_idx(<vscale x 4 x double> %v, double %elt, i32 signext %idx) {
+define <vscale x 4 x double> @insertelt_nxv4f64_idx(<vscale x 4 x double> %v, double %elt, i32 zeroext %idx) {
; CHECK-LABEL: insertelt_nxv4f64_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a1, zero, e64, m4, ta, ma
ret <vscale x 8 x double> %r
}
-define <vscale x 8 x double> @insertelt_nxv8f64_idx(<vscale x 8 x double> %v, double %elt, i32 signext %idx) {
+define <vscale x 8 x double> @insertelt_nxv8f64_idx(<vscale x 8 x double> %v, double %elt, i32 zeroext %idx) {
; CHECK-LABEL: insertelt_nxv8f64_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a1, zero, e64, m8, ta, ma
ret <vscale x 1 x i8> %r
}
-define <vscale x 1 x i8> @insertelt_nxv1i8_idx(<vscale x 1 x i8> %v, i8 signext %elt, i32 signext %idx) {
+define <vscale x 1 x i8> @insertelt_nxv1i8_idx(<vscale x 1 x i8> %v, i8 signext %elt, i32 zeroext %idx) {
; CHECK-LABEL: insertelt_nxv1i8_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a2, zero, e8, mf8, ta, ma
ret <vscale x 2 x i8> %r
}
-define <vscale x 2 x i8> @insertelt_nxv2i8_idx(<vscale x 2 x i8> %v, i8 signext %elt, i32 signext %idx) {
+define <vscale x 2 x i8> @insertelt_nxv2i8_idx(<vscale x 2 x i8> %v, i8 signext %elt, i32 zeroext %idx) {
; CHECK-LABEL: insertelt_nxv2i8_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a2, zero, e8, mf4, ta, ma
ret <vscale x 4 x i8> %r
}
-define <vscale x 4 x i8> @insertelt_nxv4i8_idx(<vscale x 4 x i8> %v, i8 signext %elt, i32 signext %idx) {
+define <vscale x 4 x i8> @insertelt_nxv4i8_idx(<vscale x 4 x i8> %v, i8 signext %elt, i32 zeroext %idx) {
; CHECK-LABEL: insertelt_nxv4i8_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a2, zero, e8, mf2, ta, ma
ret <vscale x 8 x i8> %r
}
-define <vscale x 8 x i8> @insertelt_nxv8i8_idx(<vscale x 8 x i8> %v, i8 signext %elt, i32 signext %idx) {
+define <vscale x 8 x i8> @insertelt_nxv8i8_idx(<vscale x 8 x i8> %v, i8 signext %elt, i32 zeroext %idx) {
; CHECK-LABEL: insertelt_nxv8i8_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a2, zero, e8, m1, ta, ma
ret <vscale x 16 x i8> %r
}
-define <vscale x 16 x i8> @insertelt_nxv16i8_idx(<vscale x 16 x i8> %v, i8 signext %elt, i32 signext %idx) {
+define <vscale x 16 x i8> @insertelt_nxv16i8_idx(<vscale x 16 x i8> %v, i8 signext %elt, i32 zeroext %idx) {
; CHECK-LABEL: insertelt_nxv16i8_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a2, zero, e8, m2, ta, ma
ret <vscale x 32 x i8> %r
}
-define <vscale x 32 x i8> @insertelt_nxv32i8_idx(<vscale x 32 x i8> %v, i8 signext %elt, i32 signext %idx) {
+define <vscale x 32 x i8> @insertelt_nxv32i8_idx(<vscale x 32 x i8> %v, i8 signext %elt, i32 zeroext %idx) {
; CHECK-LABEL: insertelt_nxv32i8_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a2, zero, e8, m4, ta, ma
ret <vscale x 64 x i8> %r
}
-define <vscale x 64 x i8> @insertelt_nxv64i8_idx(<vscale x 64 x i8> %v, i8 signext %elt, i32 signext %idx) {
+define <vscale x 64 x i8> @insertelt_nxv64i8_idx(<vscale x 64 x i8> %v, i8 signext %elt, i32 zeroext %idx) {
; CHECK-LABEL: insertelt_nxv64i8_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a2, zero, e8, m8, ta, ma
ret <vscale x 1 x i16> %r
}
-define <vscale x 1 x i16> @insertelt_nxv1i16_idx(<vscale x 1 x i16> %v, i16 signext %elt, i32 signext %idx) {
+define <vscale x 1 x i16> @insertelt_nxv1i16_idx(<vscale x 1 x i16> %v, i16 signext %elt, i32 zeroext %idx) {
; CHECK-LABEL: insertelt_nxv1i16_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a2, zero, e16, mf4, ta, ma
ret <vscale x 2 x i16> %r
}
-define <vscale x 2 x i16> @insertelt_nxv2i16_idx(<vscale x 2 x i16> %v, i16 signext %elt, i32 signext %idx) {
+define <vscale x 2 x i16> @insertelt_nxv2i16_idx(<vscale x 2 x i16> %v, i16 signext %elt, i32 zeroext %idx) {
; CHECK-LABEL: insertelt_nxv2i16_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a2, zero, e16, mf2, ta, ma
ret <vscale x 4 x i16> %r
}
-define <vscale x 4 x i16> @insertelt_nxv4i16_idx(<vscale x 4 x i16> %v, i16 signext %elt, i32 signext %idx) {
+define <vscale x 4 x i16> @insertelt_nxv4i16_idx(<vscale x 4 x i16> %v, i16 signext %elt, i32 zeroext %idx) {
; CHECK-LABEL: insertelt_nxv4i16_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, ma
ret <vscale x 8 x i16> %r
}
-define <vscale x 8 x i16> @insertelt_nxv8i16_idx(<vscale x 8 x i16> %v, i16 signext %elt, i32 signext %idx) {
+define <vscale x 8 x i16> @insertelt_nxv8i16_idx(<vscale x 8 x i16> %v, i16 signext %elt, i32 zeroext %idx) {
; CHECK-LABEL: insertelt_nxv8i16_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a2, zero, e16, m2, ta, ma
ret <vscale x 16 x i16> %r
}
-define <vscale x 16 x i16> @insertelt_nxv16i16_idx(<vscale x 16 x i16> %v, i16 signext %elt, i32 signext %idx) {
+define <vscale x 16 x i16> @insertelt_nxv16i16_idx(<vscale x 16 x i16> %v, i16 signext %elt, i32 zeroext %idx) {
; CHECK-LABEL: insertelt_nxv16i16_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma
ret <vscale x 32 x i16> %r
}
-define <vscale x 32 x i16> @insertelt_nxv32i16_idx(<vscale x 32 x i16> %v, i16 signext %elt, i32 signext %idx) {
+define <vscale x 32 x i16> @insertelt_nxv32i16_idx(<vscale x 32 x i16> %v, i16 signext %elt, i32 zeroext %idx) {
; CHECK-LABEL: insertelt_nxv32i16_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a2, zero, e16, m8, ta, ma
ret <vscale x 1 x i32> %r
}
-define <vscale x 1 x i32> @insertelt_nxv1i32_idx(<vscale x 1 x i32> %v, i32 signext %elt, i32 signext %idx) {
+define <vscale x 1 x i32> @insertelt_nxv1i32_idx(<vscale x 1 x i32> %v, i32 signext %elt, i32 zeroext %idx) {
; CHECK-LABEL: insertelt_nxv1i32_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a2, zero, e32, mf2, ta, ma
ret <vscale x 2 x i32> %r
}
-define <vscale x 2 x i32> @insertelt_nxv2i32_idx(<vscale x 2 x i32> %v, i32 signext %elt, i32 signext %idx) {
+define <vscale x 2 x i32> @insertelt_nxv2i32_idx(<vscale x 2 x i32> %v, i32 signext %elt, i32 zeroext %idx) {
; CHECK-LABEL: insertelt_nxv2i32_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a2, zero, e32, m1, ta, ma
ret <vscale x 4 x i32> %r
}
-define <vscale x 4 x i32> @insertelt_nxv4i32_idx(<vscale x 4 x i32> %v, i32 signext %elt, i32 signext %idx) {
+define <vscale x 4 x i32> @insertelt_nxv4i32_idx(<vscale x 4 x i32> %v, i32 signext %elt, i32 zeroext %idx) {
; CHECK-LABEL: insertelt_nxv4i32_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a2, zero, e32, m2, ta, ma
ret <vscale x 8 x i32> %r
}
-define <vscale x 8 x i32> @insertelt_nxv8i32_idx(<vscale x 8 x i32> %v, i32 signext %elt, i32 signext %idx) {
+define <vscale x 8 x i32> @insertelt_nxv8i32_idx(<vscale x 8 x i32> %v, i32 signext %elt, i32 zeroext %idx) {
; CHECK-LABEL: insertelt_nxv8i32_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a2, zero, e32, m4, ta, ma
ret <vscale x 16 x i32> %r
}
-define <vscale x 16 x i32> @insertelt_nxv16i32_idx(<vscale x 16 x i32> %v, i32 signext %elt, i32 signext %idx) {
+define <vscale x 16 x i32> @insertelt_nxv16i32_idx(<vscale x 16 x i32> %v, i32 signext %elt, i32 zeroext %idx) {
; CHECK-LABEL: insertelt_nxv16i32_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a2, zero, e32, m8, ta, ma
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a2, zero, e64, m1, ta, ma
; CHECK-NEXT: vmv.s.x v9, a0
-; CHECK-NEXT: sext.w a0, a1
+; CHECK-NEXT: slli a0, a1, 32
+; CHECK-NEXT: srli a0, a0, 32
; CHECK-NEXT: addi a1, a0, 1
; CHECK-NEXT: vsetvli zero, a1, e64, m1, tu, ma
; CHECK-NEXT: vslideup.vx v8, v9, a0
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a2, zero, e64, m2, ta, ma
; CHECK-NEXT: vmv.s.x v10, a0
-; CHECK-NEXT: sext.w a0, a1
+; CHECK-NEXT: slli a0, a1, 32
+; CHECK-NEXT: srli a0, a0, 32
; CHECK-NEXT: addi a1, a0, 1
; CHECK-NEXT: vsetvli zero, a1, e64, m2, tu, ma
; CHECK-NEXT: vslideup.vx v8, v10, a0
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a2, zero, e64, m4, ta, ma
; CHECK-NEXT: vmv.s.x v12, a0
-; CHECK-NEXT: sext.w a0, a1
+; CHECK-NEXT: slli a0, a1, 32
+; CHECK-NEXT: srli a0, a0, 32
; CHECK-NEXT: addi a1, a0, 1
; CHECK-NEXT: vsetvli zero, a1, e64, m4, tu, ma
; CHECK-NEXT: vslideup.vx v8, v12, a0
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a2, zero, e64, m8, ta, ma
; CHECK-NEXT: vmv.s.x v16, a0
-; CHECK-NEXT: sext.w a0, a1
+; CHECK-NEXT: slli a0, a1, 32
+; CHECK-NEXT: srli a0, a0, 32
; CHECK-NEXT: addi a1, a0, 1
; CHECK-NEXT: vsetvli zero, a1, e64, m8, tu, ma
; CHECK-NEXT: vslideup.vx v8, v16, a0
define fastcc i64 @extract_rr_v256i64(i32 signext %idx, <256 x i64> %v) {
; CHECK-LABEL: extract_rr_v256i64:
; CHECK: # %bb.0:
+; CHECK-NEXT: and %s0, %s0, (32)0
; CHECK-NEXT: lvs %s0, %v0(%s0)
; CHECK-NEXT: b.l.t (, %s10)
%ret = extractelement <256 x i64> %v, i32 %idx
define fastcc i32 @extract_rr_v256i32(i32 signext %idx, <256 x i32> %v) {
; CHECK-LABEL: extract_rr_v256i32:
; CHECK: # %bb.0:
+; CHECK-NEXT: and %s0, %s0, (32)0
; CHECK-NEXT: lvs %s0, %v0(%s0)
; CHECK-NEXT: b.l.t (, %s10)
%ret = extractelement <256 x i32> %v, i32 %idx
define fastcc i32 @extract_rr_v512i32(<512 x i32> %v, i32 signext %idx) {
; CHECK-LABEL: extract_rr_v512i32:
; CHECK: # %bb.0:
-; CHECK-NEXT: srl %s1, %s0, 1
+; CHECK-NEXT: lea %s1, -2
+; CHECK-NEXT: and %s1, %s1, (32)0
+; CHECK-NEXT: and %s1, %s0, %s1
+; CHECK-NEXT: srl %s1, %s1, 1
; CHECK-NEXT: lvs %s1, %v0(%s1)
; CHECK-NEXT: nnd %s0, %s0, (63)0
; CHECK-NEXT: sla.w.sx %s0, %s0, 5
define fastcc double @extract_rr_v256f64(i32 signext %idx, <256 x double> %v) {
; CHECK-LABEL: extract_rr_v256f64:
; CHECK: # %bb.0:
+; CHECK-NEXT: and %s0, %s0, (32)0
; CHECK-NEXT: lvs %s0, %v0(%s0)
; CHECK-NEXT: b.l.t (, %s10)
%ret = extractelement <256 x double> %v, i32 %idx
define fastcc float @extract_rr_v256f32(i32 signext %idx, <256 x float> %v) {
; CHECK-LABEL: extract_rr_v256f32:
; CHECK: # %bb.0:
+; CHECK-NEXT: and %s0, %s0, (32)0
; CHECK-NEXT: lvs %s0, %v0(%s0)
; CHECK-NEXT: b.l.t (, %s10)
%ret = extractelement <256 x float> %v, i32 %idx
define fastcc float @extract_rr_v512f32(<512 x float> %v, i32 signext %idx) {
; CHECK-LABEL: extract_rr_v512f32:
; CHECK: # %bb.0:
-; CHECK-NEXT: srl %s1, %s0, 1
+; CHECK-NEXT: lea %s1, -2
+; CHECK-NEXT: and %s1, %s1, (32)0
+; CHECK-NEXT: and %s1, %s0, %s1
+; CHECK-NEXT: srl %s1, %s1, 1
; CHECK-NEXT: lvs %s1, %v0(%s1)
; CHECK-NEXT: nnd %s0, %s0, (63)0
; CHECK-NEXT: sla.w.sx %s0, %s0, 5
define fastcc <256 x i64> @insert_rr_v256i64(i32 signext %idx, i64 %s) {
; CHECK-LABEL: insert_rr_v256i64:
; CHECK: # %bb.0:
+; CHECK-NEXT: and %s0, %s0, (32)0
; CHECK-NEXT: lsv %v0(%s0), %s1
; CHECK-NEXT: b.l.t (, %s10)
%ret = insertelement <256 x i64> undef, i64 %s, i32 %idx
; CHECK-LABEL: insert_rr_v256i32:
; CHECK: # %bb.0:
; CHECK-NEXT: and %s1, %s1, (32)0
+; CHECK-NEXT: and %s0, %s0, (32)0
; CHECK-NEXT: lsv %v0(%s0), %s1
; CHECK-NEXT: b.l.t (, %s10)
%ret = insertelement <256 x i32> undef, i32 %s, i32 %idx
; CHECK-NEXT: nnd %s2, %s0, (63)0
; CHECK-NEXT: sla.w.sx %s2, %s2, 5
; CHECK-NEXT: sll %s1, %s1, %s2
+; CHECK-NEXT: lea %s3, -2
+; CHECK-NEXT: and %s3, %s3, (32)0
+; CHECK-NEXT: and %s0, %s0, %s3
; CHECK-NEXT: srl %s0, %s0, 1
; CHECK-NEXT: lvs %s3, %v0(%s0)
; CHECK-NEXT: srl %s2, (32)1, %s2
define fastcc <256 x double> @insert_rr_v256f64(i32 signext %idx, double %s) {
; CHECK-LABEL: insert_rr_v256f64:
; CHECK: # %bb.0:
+; CHECK-NEXT: and %s0, %s0, (32)0
; CHECK-NEXT: lsv %v0(%s0), %s1
; CHECK-NEXT: b.l.t (, %s10)
%ret = insertelement <256 x double> undef, double %s, i32 %idx
define fastcc <256 x float> @insert_rr_v256f32(i32 signext %idx, float %s) {
; CHECK-LABEL: insert_rr_v256f32:
; CHECK: # %bb.0:
+; CHECK-NEXT: and %s0, %s0, (32)0
; CHECK-NEXT: lsv %v0(%s0), %s1
; CHECK-NEXT: b.l.t (, %s10)
%ret = insertelement <256 x float> undef, float %s, i32 %idx
; CHECK-LABEL: insert_rr_v512f32:
; CHECK: # %bb.0:
; CHECK-NEXT: sra.l %s1, %s1, 32
-; CHECK-NEXT: srl %s2, %s0, 1
+; CHECK-NEXT: lea %s2, -2
+; CHECK-NEXT: and %s2, %s2, (32)0
+; CHECK-NEXT: and %s2, %s0, %s2
+; CHECK-NEXT: srl %s2, %s2, 1
; CHECK-NEXT: lvs %s3, %v0(%s2)
; CHECK-NEXT: nnd %s0, %s0, (63)0
; CHECK-NEXT: sla.w.sx %s0, %s0, 5
; CHECK-LABEL: swizzle_one_i8x16:
; CHECK: .functype swizzle_one_i8x16 (v128, v128) -> (v128)
; CHECK-NEXT: # %bb.0:
-; CHECK-NEXT: i8x16.swizzle $push0=, $0, $1
-; CHECK-NEXT: return $pop0
+; CHECK-NEXT: global.get $push5=, __stack_pointer
+; CHECK-NEXT: i32.const $push6=, 16
+; CHECK-NEXT: i32.sub $push8=, $pop5, $pop6
+; CHECK-NEXT: local.tee $push7=, $2=, $pop8
+; CHECK-NEXT: v128.store 0($pop7), $0
+; CHECK-NEXT: i8x16.extract_lane_u $push0=, $1, 0
+; CHECK-NEXT: i32.const $push1=, 15
+; CHECK-NEXT: i32.and $push2=, $pop0, $pop1
+; CHECK-NEXT: i32.or $push3=, $2, $pop2
+; CHECK-NEXT: v128.load8_splat $push4=, 0($pop3)
+; CHECK-NEXT: return $pop4
%m0 = extractelement <16 x i8> %mask, i32 0
%s0 = extractelement <16 x i8> %src, i8 %m0
%v0 = insertelement <16 x i8> undef, i8 %s0, i32 0
; CHECK-LABEL: swizzle_all_i8x16:
; CHECK: .functype swizzle_all_i8x16 (v128, v128) -> (v128)
; CHECK-NEXT: # %bb.0:
-; CHECK-NEXT: i8x16.swizzle $push0=, $0, $1
-; CHECK-NEXT: return $pop0
+; CHECK-NEXT: global.get $push80=, __stack_pointer
+; CHECK-NEXT: i32.const $push81=, 16
+; CHECK-NEXT: i32.sub $push98=, $pop80, $pop81
+; CHECK-NEXT: local.tee $push97=, $2=, $pop98
+; CHECK-NEXT: v128.store 0($pop97), $0
+; CHECK-NEXT: i8x16.extract_lane_u $push61=, $1, 0
+; CHECK-NEXT: i32.const $push1=, 15
+; CHECK-NEXT: i32.and $push62=, $pop61, $pop1
+; CHECK-NEXT: i32.or $push63=, $2, $pop62
+; CHECK-NEXT: v128.load8_splat $push64=, 0($pop63)
+; CHECK-NEXT: i8x16.extract_lane_u $push57=, $1, 1
+; CHECK-NEXT: i32.const $push96=, 15
+; CHECK-NEXT: i32.and $push58=, $pop57, $pop96
+; CHECK-NEXT: i32.or $push59=, $2, $pop58
+; CHECK-NEXT: i32.load8_u $push60=, 0($pop59)
+; CHECK-NEXT: i8x16.replace_lane $push65=, $pop64, 1, $pop60
+; CHECK-NEXT: i8x16.extract_lane_u $push53=, $1, 2
+; CHECK-NEXT: i32.const $push95=, 15
+; CHECK-NEXT: i32.and $push54=, $pop53, $pop95
+; CHECK-NEXT: i32.or $push55=, $2, $pop54
+; CHECK-NEXT: i32.load8_u $push56=, 0($pop55)
+; CHECK-NEXT: i8x16.replace_lane $push66=, $pop65, 2, $pop56
+; CHECK-NEXT: i8x16.extract_lane_u $push49=, $1, 3
+; CHECK-NEXT: i32.const $push94=, 15
+; CHECK-NEXT: i32.and $push50=, $pop49, $pop94
+; CHECK-NEXT: i32.or $push51=, $2, $pop50
+; CHECK-NEXT: i32.load8_u $push52=, 0($pop51)
+; CHECK-NEXT: i8x16.replace_lane $push67=, $pop66, 3, $pop52
+; CHECK-NEXT: i8x16.extract_lane_u $push45=, $1, 4
+; CHECK-NEXT: i32.const $push93=, 15
+; CHECK-NEXT: i32.and $push46=, $pop45, $pop93
+; CHECK-NEXT: i32.or $push47=, $2, $pop46
+; CHECK-NEXT: i32.load8_u $push48=, 0($pop47)
+; CHECK-NEXT: i8x16.replace_lane $push68=, $pop67, 4, $pop48
+; CHECK-NEXT: i8x16.extract_lane_u $push41=, $1, 5
+; CHECK-NEXT: i32.const $push92=, 15
+; CHECK-NEXT: i32.and $push42=, $pop41, $pop92
+; CHECK-NEXT: i32.or $push43=, $2, $pop42
+; CHECK-NEXT: i32.load8_u $push44=, 0($pop43)
+; CHECK-NEXT: i8x16.replace_lane $push69=, $pop68, 5, $pop44
+; CHECK-NEXT: i8x16.extract_lane_u $push37=, $1, 6
+; CHECK-NEXT: i32.const $push91=, 15
+; CHECK-NEXT: i32.and $push38=, $pop37, $pop91
+; CHECK-NEXT: i32.or $push39=, $2, $pop38
+; CHECK-NEXT: i32.load8_u $push40=, 0($pop39)
+; CHECK-NEXT: i8x16.replace_lane $push70=, $pop69, 6, $pop40
+; CHECK-NEXT: i8x16.extract_lane_u $push33=, $1, 7
+; CHECK-NEXT: i32.const $push90=, 15
+; CHECK-NEXT: i32.and $push34=, $pop33, $pop90
+; CHECK-NEXT: i32.or $push35=, $2, $pop34
+; CHECK-NEXT: i32.load8_u $push36=, 0($pop35)
+; CHECK-NEXT: i8x16.replace_lane $push71=, $pop70, 7, $pop36
+; CHECK-NEXT: i8x16.extract_lane_u $push29=, $1, 8
+; CHECK-NEXT: i32.const $push89=, 15
+; CHECK-NEXT: i32.and $push30=, $pop29, $pop89
+; CHECK-NEXT: i32.or $push31=, $2, $pop30
+; CHECK-NEXT: i32.load8_u $push32=, 0($pop31)
+; CHECK-NEXT: i8x16.replace_lane $push72=, $pop71, 8, $pop32
+; CHECK-NEXT: i8x16.extract_lane_u $push25=, $1, 9
+; CHECK-NEXT: i32.const $push88=, 15
+; CHECK-NEXT: i32.and $push26=, $pop25, $pop88
+; CHECK-NEXT: i32.or $push27=, $2, $pop26
+; CHECK-NEXT: i32.load8_u $push28=, 0($pop27)
+; CHECK-NEXT: i8x16.replace_lane $push73=, $pop72, 9, $pop28
+; CHECK-NEXT: i8x16.extract_lane_u $push21=, $1, 10
+; CHECK-NEXT: i32.const $push87=, 15
+; CHECK-NEXT: i32.and $push22=, $pop21, $pop87
+; CHECK-NEXT: i32.or $push23=, $2, $pop22
+; CHECK-NEXT: i32.load8_u $push24=, 0($pop23)
+; CHECK-NEXT: i8x16.replace_lane $push74=, $pop73, 10, $pop24
+; CHECK-NEXT: i8x16.extract_lane_u $push17=, $1, 11
+; CHECK-NEXT: i32.const $push86=, 15
+; CHECK-NEXT: i32.and $push18=, $pop17, $pop86
+; CHECK-NEXT: i32.or $push19=, $2, $pop18
+; CHECK-NEXT: i32.load8_u $push20=, 0($pop19)
+; CHECK-NEXT: i8x16.replace_lane $push75=, $pop74, 11, $pop20
+; CHECK-NEXT: i8x16.extract_lane_u $push13=, $1, 12
+; CHECK-NEXT: i32.const $push85=, 15
+; CHECK-NEXT: i32.and $push14=, $pop13, $pop85
+; CHECK-NEXT: i32.or $push15=, $2, $pop14
+; CHECK-NEXT: i32.load8_u $push16=, 0($pop15)
+; CHECK-NEXT: i8x16.replace_lane $push76=, $pop75, 12, $pop16
+; CHECK-NEXT: i8x16.extract_lane_u $push9=, $1, 13
+; CHECK-NEXT: i32.const $push84=, 15
+; CHECK-NEXT: i32.and $push10=, $pop9, $pop84
+; CHECK-NEXT: i32.or $push11=, $2, $pop10
+; CHECK-NEXT: i32.load8_u $push12=, 0($pop11)
+; CHECK-NEXT: i8x16.replace_lane $push77=, $pop76, 13, $pop12
+; CHECK-NEXT: i8x16.extract_lane_u $push5=, $1, 14
+; CHECK-NEXT: i32.const $push83=, 15
+; CHECK-NEXT: i32.and $push6=, $pop5, $pop83
+; CHECK-NEXT: i32.or $push7=, $2, $pop6
+; CHECK-NEXT: i32.load8_u $push8=, 0($pop7)
+; CHECK-NEXT: i8x16.replace_lane $push78=, $pop77, 14, $pop8
+; CHECK-NEXT: i8x16.extract_lane_u $push0=, $1, 15
+; CHECK-NEXT: i32.const $push82=, 15
+; CHECK-NEXT: i32.and $push2=, $pop0, $pop82
+; CHECK-NEXT: i32.or $push3=, $2, $pop2
+; CHECK-NEXT: i32.load8_u $push4=, 0($pop3)
+; CHECK-NEXT: i8x16.replace_lane $push79=, $pop78, 15, $pop4
+; CHECK-NEXT: return $pop79
%m0 = extractelement <16 x i8> %mask, i32 0
%s0 = extractelement <16 x i8> %src, i8 %m0
%v0 = insertelement <16 x i8> undef, i8 %s0, i32 0
; CHECK-LABEL: mashup_swizzle_i8x16:
; CHECK: .functype mashup_swizzle_i8x16 (v128, v128, i32) -> (v128)
; CHECK-NEXT: # %bb.0:
-; CHECK-NEXT: i8x16.swizzle $push0=, $0, $1
-; CHECK-NEXT: i8x16.replace_lane $push1=, $pop0, 3, $2
-; CHECK-NEXT: i32.const $push2=, 42
-; CHECK-NEXT: i8x16.replace_lane $push3=, $pop1, 4, $pop2
-; CHECK-NEXT: i8x16.replace_lane $push4=, $pop3, 12, $2
-; CHECK-NEXT: i32.const $push6=, 42
-; CHECK-NEXT: i8x16.replace_lane $push5=, $pop4, 14, $pop6
-; CHECK-NEXT: return $pop5
+; CHECK-NEXT: global.get $push12=, __stack_pointer
+; CHECK-NEXT: i32.const $push13=, 16
+; CHECK-NEXT: i32.sub $push16=, $pop12, $pop13
+; CHECK-NEXT: local.tee $push15=, $3=, $pop16
+; CHECK-NEXT: v128.store 0($pop15), $0
+; CHECK-NEXT: i8x16.extract_lane_u $push7=, $1, 7
+; CHECK-NEXT: i32.const $push1=, 15
+; CHECK-NEXT: i32.and $push8=, $pop7, $pop1
+; CHECK-NEXT: i32.or $push9=, $3, $pop8
+; CHECK-NEXT: i8x16.extract_lane_u $push0=, $1, 0
+; CHECK-NEXT: i32.const $push14=, 15
+; CHECK-NEXT: i32.and $push2=, $pop0, $pop14
+; CHECK-NEXT: i32.or $push3=, $3, $pop2
+; CHECK-NEXT: v128.const $push4=, 0, 0, 0, 0, 42, 0, 0, 0, 0, 0, 0, 0, 0, 0, 42, 0
+; CHECK-NEXT: v128.load8_lane $push5=, 0($pop3), $pop4, 0
+; CHECK-NEXT: i8x16.replace_lane $push6=, $pop5, 3, $2
+; CHECK-NEXT: v128.load8_lane $push10=, 0($pop9), $pop6, 7
+; CHECK-NEXT: i8x16.replace_lane $push11=, $pop10, 12, $2
+; CHECK-NEXT: return $pop11
%m0 = extractelement <16 x i8> %mask, i32 0
%s0 = extractelement <16 x i8> %src, i8 %m0
%v0 = insertelement <16 x i8> undef, i8 %s0, i32 0
define i32 @extractelt_undef_insertelt(i32 %x, i32 %y) {
; CHECK-LABEL: extractelt_undef_insertelt:
; CHECK: # %bb.0:
+; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: ret{{[l|q]}}
%b = insertelement <4 x i32> zeroinitializer, i32 %x, i64 3
%c = icmp uge i32 %y, %y
;
; AVX512-LABEL: arg_i64_v2i64:
; AVX512: # %bb.0:
-; AVX512-NEXT: movslq %esi, %rax
+; AVX512-NEXT: movl %esi, %eax
; AVX512-NEXT: vpbroadcastq %rax, %xmm1
; AVX512-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %k1
; AVX512-NEXT: vpbroadcastq %rdi, %xmm0 {%k1}
; SSE41: # %bb.0:
; SSE41-NEXT: movapd %xmm0, %xmm2
; SSE41-NEXT: movddup {{.*#+}} xmm1 = xmm1[0,0]
-; SSE41-NEXT: movslq %edi, %rax
+; SSE41-NEXT: movl %edi, %eax
; SSE41-NEXT: movq %rax, %xmm0
; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
; SSE41-NEXT: pcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; AVX1-LABEL: arg_f64_v2f64:
; AVX1: # %bb.0:
; AVX1-NEXT: vmovddup {{.*#+}} xmm1 = xmm1[0,0]
-; AVX1-NEXT: movslq %edi, %rax
+; AVX1-NEXT: movl %edi, %eax
; AVX1-NEXT: vmovq %rax, %xmm2
; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,1,0,1]
; AVX1-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
; AVX2-LABEL: arg_f64_v2f64:
; AVX2: # %bb.0:
; AVX2-NEXT: vmovddup {{.*#+}} xmm1 = xmm1[0,0]
-; AVX2-NEXT: movslq %edi, %rax
+; AVX2-NEXT: movl %edi, %eax
; AVX2-NEXT: vmovq %rax, %xmm2
; AVX2-NEXT: vpbroadcastq %xmm2, %xmm2
; AVX2-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
;
; AVX512-LABEL: arg_f64_v2f64:
; AVX512: # %bb.0:
-; AVX512-NEXT: movslq %edi, %rax
+; AVX512-NEXT: movl %edi, %eax
; AVX512-NEXT: vpbroadcastq %rax, %xmm2
; AVX512-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %k1
; AVX512-NEXT: vmovddup {{.*#+}} xmm0 {%k1} = xmm1[0,0]
;
; AVX512-LABEL: load_i64_v2i64:
; AVX512: # %bb.0:
-; AVX512-NEXT: movslq %esi, %rax
+; AVX512-NEXT: movl %esi, %eax
; AVX512-NEXT: vpbroadcastq %rax, %xmm1
; AVX512-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %k1
; AVX512-NEXT: vpbroadcastq (%rdi), %xmm0 {%k1}
; SSE41: # %bb.0:
; SSE41-NEXT: movapd %xmm0, %xmm1
; SSE41-NEXT: movddup {{.*#+}} xmm2 = mem[0,0]
-; SSE41-NEXT: movslq %esi, %rax
+; SSE41-NEXT: movl %esi, %eax
; SSE41-NEXT: movq %rax, %xmm0
; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
; SSE41-NEXT: pcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; AVX1-LABEL: load_f64_v2f64:
; AVX1: # %bb.0:
; AVX1-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0]
-; AVX1-NEXT: movslq %esi, %rax
+; AVX1-NEXT: movl %esi, %eax
; AVX1-NEXT: vmovq %rax, %xmm2
; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,1,0,1]
; AVX1-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
; AVX2-LABEL: load_f64_v2f64:
; AVX2: # %bb.0:
; AVX2-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0]
-; AVX2-NEXT: movslq %esi, %rax
+; AVX2-NEXT: movl %esi, %eax
; AVX2-NEXT: vmovq %rax, %xmm2
; AVX2-NEXT: vpbroadcastq %xmm2, %xmm2
; AVX2-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
;
; AVX512-LABEL: load_f64_v2f64:
; AVX512: # %bb.0:
-; AVX512-NEXT: movslq %esi, %rax
+; AVX512-NEXT: movl %esi, %eax
; AVX512-NEXT: vpbroadcastq %rax, %xmm1
; AVX512-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %k1
; AVX512-NEXT: vmovddup {{.*#+}} xmm0 {%k1} = mem[0,0]
;
; AVX512-LABEL: arg_i64_v4i64:
; AVX512: # %bb.0:
-; AVX512-NEXT: movslq %esi, %rax
+; AVX512-NEXT: movl %esi, %eax
; AVX512-NEXT: vpbroadcastq %rax, %ymm1
; AVX512-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %k1
; AVX512-NEXT: vpbroadcastq %rdi, %ymm0 {%k1}
; AVX1: # %bb.0:
; AVX1-NEXT: vmovddup {{.*#+}} xmm1 = xmm1[0,0]
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm1, %ymm1
-; AVX1-NEXT: movslq %edi, %rax
+; AVX1-NEXT: movl %edi, %eax
; AVX1-NEXT: vmovq %rax, %xmm2
; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,1,0,1]
; AVX1-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm3
; AVX2-LABEL: arg_f64_v4f64:
; AVX2: # %bb.0:
; AVX2-NEXT: vbroadcastsd %xmm1, %ymm1
-; AVX2-NEXT: movslq %edi, %rax
+; AVX2-NEXT: movl %edi, %eax
; AVX2-NEXT: vmovq %rax, %xmm2
; AVX2-NEXT: vpbroadcastq %xmm2, %ymm2
; AVX2-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2
;
; AVX512-LABEL: arg_f64_v4f64:
; AVX512: # %bb.0:
-; AVX512-NEXT: movslq %edi, %rax
+; AVX512-NEXT: movl %edi, %eax
; AVX512-NEXT: vpbroadcastq %rax, %ymm2
; AVX512-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %k1
; AVX512-NEXT: vbroadcastsd %xmm1, %ymm0 {%k1}
;
; AVX512-LABEL: load_i64_v4i64:
; AVX512: # %bb.0:
-; AVX512-NEXT: movslq %esi, %rax
+; AVX512-NEXT: movl %esi, %eax
; AVX512-NEXT: vpbroadcastq %rax, %ymm1
; AVX512-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %k1
; AVX512-NEXT: vpbroadcastq (%rdi), %ymm0 {%k1}
;
; AVX1-LABEL: load_f64_v4f64:
; AVX1: # %bb.0:
-; AVX1-NEXT: movslq %esi, %rax
+; AVX1-NEXT: movl %esi, %eax
; AVX1-NEXT: vmovq %rax, %xmm1
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,1,0,1]
; AVX1-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm2
; AVX2-LABEL: load_f64_v4f64:
; AVX2: # %bb.0:
; AVX2-NEXT: vbroadcastsd (%rdi), %ymm1
-; AVX2-NEXT: movslq %esi, %rax
+; AVX2-NEXT: movl %esi, %eax
; AVX2-NEXT: vmovq %rax, %xmm2
; AVX2-NEXT: vpbroadcastq %xmm2, %ymm2
; AVX2-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2
;
; AVX512-LABEL: load_f64_v4f64:
; AVX512: # %bb.0:
-; AVX512-NEXT: movslq %esi, %rax
+; AVX512-NEXT: movl %esi, %eax
; AVX512-NEXT: vpbroadcastq %rax, %ymm1
; AVX512-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %k1
; AVX512-NEXT: vbroadcastsd (%rdi), %ymm0 {%k1}
; SSE-LABEL: PR44139:
; SSE: # %bb.0:
; SSE-NEXT: movl (%rdi), %eax
+; SSE-NEXT: pshufd {{.*#+}} xmm0 = mem[0,1,0,1]
+; SSE-NEXT: movdqa %xmm0, 96(%rdi)
+; SSE-NEXT: movdqa %xmm0, 112(%rdi)
+; SSE-NEXT: movdqa %xmm0, 64(%rdi)
+; SSE-NEXT: movdqa %xmm0, 80(%rdi)
+; SSE-NEXT: movdqa %xmm0, 32(%rdi)
+; SSE-NEXT: movdqa %xmm0, 48(%rdi)
+; SSE-NEXT: movdqa %xmm0, (%rdi)
+; SSE-NEXT: movdqa %xmm0, 16(%rdi)
; SSE-NEXT: leal 2147483647(%rax), %ecx
; SSE-NEXT: testl %eax, %eax
; SSE-NEXT: cmovnsl %eax, %ecx
; SSE-NEXT: divl %ecx
; SSE-NEXT: retq
;
-; AVX-LABEL: PR44139:
-; AVX: # %bb.0:
-; AVX-NEXT: movl (%rdi), %eax
-; AVX-NEXT: leal 2147483647(%rax), %ecx
-; AVX-NEXT: testl %eax, %eax
-; AVX-NEXT: cmovnsl %eax, %ecx
-; AVX-NEXT: andl $-2147483648, %ecx # imm = 0x80000000
-; AVX-NEXT: addl %eax, %ecx
-; AVX-NEXT: # kill: def $eax killed $eax killed $rax
-; AVX-NEXT: xorl %edx, %edx
-; AVX-NEXT: divl %ecx
-; AVX-NEXT: retq
+; AVX1OR2-LABEL: PR44139:
+; AVX1OR2: # %bb.0:
+; AVX1OR2-NEXT: vbroadcastsd (%rdi), %ymm0
+; AVX1OR2-NEXT: movl (%rdi), %eax
+; AVX1OR2-NEXT: vmovaps %ymm0, 64(%rdi)
+; AVX1OR2-NEXT: vmovaps %ymm0, 96(%rdi)
+; AVX1OR2-NEXT: vmovaps %ymm0, (%rdi)
+; AVX1OR2-NEXT: vmovaps %ymm0, 32(%rdi)
+; AVX1OR2-NEXT: leal 2147483647(%rax), %ecx
+; AVX1OR2-NEXT: testl %eax, %eax
+; AVX1OR2-NEXT: cmovnsl %eax, %ecx
+; AVX1OR2-NEXT: andl $-2147483648, %ecx # imm = 0x80000000
+; AVX1OR2-NEXT: addl %eax, %ecx
+; AVX1OR2-NEXT: # kill: def $eax killed $eax killed $rax
+; AVX1OR2-NEXT: xorl %edx, %edx
+; AVX1OR2-NEXT: divl %ecx
+; AVX1OR2-NEXT: vzeroupper
+; AVX1OR2-NEXT: retq
+;
+; AVX512-LABEL: PR44139:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vbroadcastsd (%rdi), %zmm0
+; AVX512-NEXT: movl (%rdi), %eax
+; AVX512-NEXT: vmovaps %zmm0, (%rdi)
+; AVX512-NEXT: vmovaps %zmm0, 64(%rdi)
+; AVX512-NEXT: leal 2147483647(%rax), %ecx
+; AVX512-NEXT: testl %eax, %eax
+; AVX512-NEXT: cmovnsl %eax, %ecx
+; AVX512-NEXT: andl $-2147483648, %ecx # imm = 0x80000000
+; AVX512-NEXT: addl %eax, %ecx
+; AVX512-NEXT: # kill: def $eax killed $eax killed $rax
+; AVX512-NEXT: xorl %edx, %edx
+; AVX512-NEXT: divl %ecx
+; AVX512-NEXT: vzeroupper
+; AVX512-NEXT: retq
;
; X86AVX2-LABEL: PR44139:
; X86AVX2: # %bb.0:
-; X86AVX2-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86AVX2-NEXT: movl (%eax), %eax
+; X86AVX2-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86AVX2-NEXT: movl (%ecx), %eax
+; X86AVX2-NEXT: vbroadcastsd (%ecx), %ymm0
+; X86AVX2-NEXT: vmovaps %ymm0, 64(%ecx)
+; X86AVX2-NEXT: vmovaps %ymm0, 96(%ecx)
+; X86AVX2-NEXT: vmovaps %ymm0, (%ecx)
+; X86AVX2-NEXT: vmovaps %ymm0, 32(%ecx)
; X86AVX2-NEXT: leal 2147483647(%eax), %ecx
; X86AVX2-NEXT: testl %eax, %eax
; X86AVX2-NEXT: cmovnsl %eax, %ecx
; X86AVX2-NEXT: addl %eax, %ecx
; X86AVX2-NEXT: xorl %edx, %edx
; X86AVX2-NEXT: divl %ecx
+; X86AVX2-NEXT: vzeroupper
; X86AVX2-NEXT: retl
%L = load <16 x i64>, ptr %p
%E1 = extractelement <16 x i64> %L, i64 0
define <8 x i16> @var_shuffle_v8i16(<8 x i16> %v, <8 x i16> %indices) nounwind {
; SSE3-LABEL: var_shuffle_v8i16:
; SSE3: # %bb.0:
-; SSE3-NEXT: movd %xmm1, %eax
+; SSE3-NEXT: pextrw $0, %xmm1, %eax
; SSE3-NEXT: pextrw $1, %xmm1, %ecx
; SSE3-NEXT: pextrw $2, %xmm1, %edx
; SSE3-NEXT: pextrw $3, %xmm1, %esi
; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm2
; AVX512F-NEXT: vextracti32x4 $2, %zmm1, %xmm3
; AVX512F-NEXT: vextracti32x4 $3, %zmm1, %xmm4
-; AVX512F-NEXT: vmovd %xmm4, %eax
+; AVX512F-NEXT: vpextrw $0, %xmm4, %eax
; AVX512F-NEXT: vmovaps %zmm0, (%rsp)
; AVX512F-NEXT: andl $31, %eax
; AVX512F-NEXT: movzwl (%rsp,%rax,2), %eax
; AVX512F-NEXT: vpextrw $7, %xmm4, %eax
; AVX512F-NEXT: andl $31, %eax
; AVX512F-NEXT: vpinsrw $7, (%rsp,%rax,2), %xmm0, %xmm0
-; AVX512F-NEXT: vmovd %xmm3, %eax
+; AVX512F-NEXT: vpextrw $0, %xmm3, %eax
; AVX512F-NEXT: andl $31, %eax
; AVX512F-NEXT: movzwl (%rsp,%rax,2), %eax
; AVX512F-NEXT: vmovd %eax, %xmm4
; AVX512F-NEXT: vpextrw $7, %xmm3, %eax
; AVX512F-NEXT: andl $31, %eax
; AVX512F-NEXT: vpinsrw $7, (%rsp,%rax,2), %xmm4, %xmm3
-; AVX512F-NEXT: vmovd %xmm2, %eax
+; AVX512F-NEXT: vpextrw $0, %xmm2, %eax
; AVX512F-NEXT: andl $31, %eax
; AVX512F-NEXT: movzwl (%rsp,%rax,2), %eax
; AVX512F-NEXT: vmovd %eax, %xmm4
; AVX512F-NEXT: andl $31, %eax
; AVX512F-NEXT: movzwl (%rsp,%rax,2), %eax
; AVX512F-NEXT: vpinsrw $7, %eax, %xmm4, %xmm2
-; AVX512F-NEXT: vmovd %xmm1, %eax
+; AVX512F-NEXT: vpextrw $0, %xmm1, %eax
; AVX512F-NEXT: andl $31, %eax
; AVX512F-NEXT: movzwl (%rsp,%rax,2), %eax
; AVX512F-NEXT: vmovd %eax, %xmm4
; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm2
; AVX512F-NEXT: vextracti32x4 $2, %zmm1, %xmm3
; AVX512F-NEXT: vextracti32x4 $3, %zmm1, %xmm4
-; AVX512F-NEXT: vmovd %xmm4, %eax
+; AVX512F-NEXT: vpextrb $0, %xmm4, %eax
; AVX512F-NEXT: vmovaps %zmm0, (%rsp)
; AVX512F-NEXT: andl $63, %eax
; AVX512F-NEXT: movzbl (%rsp,%rax), %eax
; AVX512F-NEXT: vpextrb $15, %xmm4, %eax
; AVX512F-NEXT: andl $63, %eax
; AVX512F-NEXT: vpinsrb $15, (%rsp,%rax), %xmm0, %xmm0
-; AVX512F-NEXT: vmovd %xmm3, %eax
+; AVX512F-NEXT: vpextrb $0, %xmm3, %eax
; AVX512F-NEXT: andl $63, %eax
; AVX512F-NEXT: movzbl (%rsp,%rax), %eax
; AVX512F-NEXT: vmovd %eax, %xmm4
; AVX512F-NEXT: andl $63, %eax
; AVX512F-NEXT: movzbl (%rsp,%rax), %eax
; AVX512F-NEXT: vpinsrb $15, %eax, %xmm4, %xmm3
-; AVX512F-NEXT: vmovd %xmm2, %eax
+; AVX512F-NEXT: vpextrb $0, %xmm2, %eax
; AVX512F-NEXT: andl $63, %eax
; AVX512F-NEXT: movzbl (%rsp,%rax), %eax
; AVX512F-NEXT: vmovd %eax, %xmm4
; AVX512F-NEXT: andl $63, %eax
; AVX512F-NEXT: movzbl (%rsp,%rax), %eax
; AVX512F-NEXT: vpinsrb $15, %eax, %xmm4, %xmm2
-; AVX512F-NEXT: vmovd %xmm1, %eax
+; AVX512F-NEXT: vpextrb $0, %xmm1, %eax
; AVX512F-NEXT: andl $63, %eax
; AVX512F-NEXT: movzbl (%rsp,%rax), %eax
; AVX512F-NEXT: vmovd %eax, %xmm4
; AVX512BW-NEXT: vextracti128 $1, %ymm1, %xmm2
; AVX512BW-NEXT: vextracti32x4 $2, %zmm1, %xmm3
; AVX512BW-NEXT: vextracti32x4 $3, %zmm1, %xmm4
-; AVX512BW-NEXT: vmovd %xmm4, %eax
+; AVX512BW-NEXT: vpextrb $0, %xmm4, %eax
; AVX512BW-NEXT: vmovaps %zmm0, (%rsp)
; AVX512BW-NEXT: andl $63, %eax
; AVX512BW-NEXT: movzbl (%rsp,%rax), %eax
; AVX512BW-NEXT: vpextrb $15, %xmm4, %eax
; AVX512BW-NEXT: andl $63, %eax
; AVX512BW-NEXT: vpinsrb $15, (%rsp,%rax), %xmm0, %xmm0
-; AVX512BW-NEXT: vmovd %xmm3, %eax
+; AVX512BW-NEXT: vpextrb $0, %xmm3, %eax
; AVX512BW-NEXT: andl $63, %eax
; AVX512BW-NEXT: movzbl (%rsp,%rax), %eax
; AVX512BW-NEXT: vmovd %eax, %xmm4
; AVX512BW-NEXT: andl $63, %eax
; AVX512BW-NEXT: movzbl (%rsp,%rax), %eax
; AVX512BW-NEXT: vpinsrb $15, %eax, %xmm4, %xmm3
-; AVX512BW-NEXT: vmovd %xmm2, %eax
+; AVX512BW-NEXT: vpextrb $0, %xmm2, %eax
; AVX512BW-NEXT: andl $63, %eax
; AVX512BW-NEXT: movzbl (%rsp,%rax), %eax
; AVX512BW-NEXT: vmovd %eax, %xmm4
; AVX512BW-NEXT: andl $63, %eax
; AVX512BW-NEXT: movzbl (%rsp,%rax), %eax
; AVX512BW-NEXT: vpinsrb $15, %eax, %xmm4, %xmm2
-; AVX512BW-NEXT: vmovd %xmm1, %eax
+; AVX512BW-NEXT: vpextrb $0, %xmm1, %eax
; AVX512BW-NEXT: andl $63, %eax
; AVX512BW-NEXT: movzbl (%rsp,%rax), %eax
; AVX512BW-NEXT: vmovd %eax, %xmm4
; X32-LABEL: ossfuzz15662:
; X32: # %bb.0:
; X32-NEXT: xorps %xmm0, %xmm0
+; X32-NEXT: movaps %xmm0, (%eax)
+; X32-NEXT: xorps %xmm0, %xmm0
; X32-NEXT: retl
;
; X64-LABEL: ossfuzz15662:
; X64: # %bb.0:
; X64-NEXT: xorps %xmm0, %xmm0
+; X64-NEXT: movaps %xmm0, (%rax)
+; X64-NEXT: xorps %xmm0, %xmm0
; X64-NEXT: retq
%C10 = icmp ule i1 false, false
%C3 = icmp ule i1 true, undef