In https://github.com/llvm/llvm-project/issues/57452, we found that IRTranslator is translating `i1 true` into `i32 -1`.
This is because IRTranslator uses SExt for indices.
In this fix, we change the expected behavior of extractelement's index, moving from SExt to ZExt.
This change includes both documentation, SelectionDAG and IRTranslator.
We also included a test for AMDGPU, updated tests for AArch64, Mips, PowerPC, RISCV, VE, WebAssembly and X86
This patch fixes issue #57452.
Differential Revision: https://reviews.llvm.org/D132978
The first operand of an '``extractelement``' instruction is a value of
:ref:`vector <t_vector>` type. The second operand is an index indicating
the position from which to extract the element. The index may be a
-variable of any integer type.
+variable of any integer type, and will be treated as an unsigned integer.
Semantics:
""""""""""
:ref:`vector <t_vector>` type. The second operand is a scalar value whose
type must equal the element type of the first operand. The third operand
is an index indicating the position at which to insert the value. The
-index may be a variable of any integer type.
+index may be a variable of any integer type, and will be treated as an
+unsigned integer.
Semantics:
""""""""""
Register Idx;
if (auto *CI = dyn_cast<ConstantInt>(U.getOperand(1))) {
if (CI->getBitWidth() != PreferredVecIdxWidth) {
- APInt NewIdx = CI->getValue().sextOrTrunc(PreferredVecIdxWidth);
+ APInt NewIdx = CI->getValue().zextOrTrunc(PreferredVecIdxWidth);
auto *NewIdxCI = ConstantInt::get(CI->getContext(), NewIdx);
Idx = getOrCreateVReg(*NewIdxCI);
}
Idx = getOrCreateVReg(*U.getOperand(1));
if (MRI->getType(Idx).getSizeInBits() != PreferredVecIdxWidth) {
const LLT VecIdxTy = LLT::scalar(PreferredVecIdxWidth);
- Idx = MIRBuilder.buildSExtOrTrunc(VecIdxTy, Idx).getReg(0);
+ Idx = MIRBuilder.buildZExtOrTrunc(VecIdxTy, Idx).getReg(0);
}
MIRBuilder.buildExtractVectorElement(Res, Val, Idx);
return true;
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
SDValue InVec = getValue(I.getOperand(0));
SDValue InVal = getValue(I.getOperand(1));
- SDValue InIdx = DAG.getSExtOrTrunc(getValue(I.getOperand(2)), getCurSDLoc(),
+ SDValue InIdx = DAG.getZExtOrTrunc(getValue(I.getOperand(2)), getCurSDLoc(),
TLI.getVectorIdxTy(DAG.getDataLayout()));
setValue(&I, DAG.getNode(ISD::INSERT_VECTOR_ELT, getCurSDLoc(),
TLI.getValueType(DAG.getDataLayout(), I.getType()),
void SelectionDAGBuilder::visitExtractElement(const User &I) {
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
SDValue InVec = getValue(I.getOperand(0));
- SDValue InIdx = DAG.getSExtOrTrunc(getValue(I.getOperand(1)), getCurSDLoc(),
+ SDValue InIdx = DAG.getZExtOrTrunc(getValue(I.getOperand(1)), getCurSDLoc(),
TLI.getVectorIdxTy(DAG.getDataLayout()));
setValue(&I, DAG.getNode(ISD::EXTRACT_VECTOR_ELT, getCurSDLoc(),
TLI.getValueType(DAG.getDataLayout(), I.getType()),
; CHECK-LABEL: name: test_extractelement
; CHECK: [[VEC:%[0-9]+]]:_(<2 x s32>) = COPY $d0
; CHECK: [[IDX:%[0-9]+]]:_(s32) = COPY $w0
-; CHECK: [[IDXEXT:%[0-9]+]]:_(s64) = G_SEXT [[IDX]]
+; CHECK: [[IDXEXT:%[0-9]+]]:_(s64) = G_ZEXT [[IDX]]
; CHECK: [[RES:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[VEC]](<2 x s32>), [[IDXEXT]](s64)
; CHECK: $w0 = COPY [[RES]](s32)
%res = extractelement <2 x i32> %vec, i32 %idx
ret i32 %res
}
+define i32 @test_extractelement_const_idx_zext_i1(<2 x i32> %vec) {
+; CHECK-LABEL: name: test_extractelement
+; CHECK: [[VEC:%[0-9]+]]:_(<2 x s32>) = COPY $d0
+; CHECK: [[IDX:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+; CHECK: [[RES:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[VEC]](<2 x s32>), [[IDX]](s64)
+; CHECK: $w0 = COPY [[RES]](s32)
+ %res = extractelement <2 x i32> %vec, i1 true
+ ret i32 %res
+}
+
+define i32 @test_extractelement_const_idx_zext_i8(<2 x i32> %vec) {
+; CHECK-LABEL: name: test_extractelement
+; CHECK: [[VEC:%[0-9]+]]:_(<2 x s32>) = COPY $d0
+; CHECK: [[IDX:%[0-9]+]]:_(s64) = G_CONSTANT i64 255
+; CHECK: [[RES:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[VEC]](<2 x s32>), [[IDX]](s64)
+; CHECK: $w0 = COPY [[RES]](s32)
+ %res = extractelement <2 x i32> %vec, i8 255
+ ret i32 %res
+}
+
+
define i32 @test_singleelementvector(i32 %elt){
; CHECK-LABEL: name: test_singleelementvector
; CHECK: [[ELT:%[0-9]+]]:_(s32) = COPY $w0
define i32 @load_single_extract_variable_index_v3i32_small_align(<3 x i32>* %A, i32 %idx) {
; CHECK-LABEL: load_single_extract_variable_index_v3i32_small_align:
; CHECK: ; %bb.0:
-; CHECK-NEXT: ; kill: def $w1 killed $w1 def $x1
-; CHECK-NEXT: sxtw x9, w1
+; CHECK-NEXT: mov w9, w1
; CHECK-NEXT: mov w8, #2
; CHECK-NEXT: cmp x9, #2
; CHECK-NEXT: csel x8, x9, x8, lo
define i32 @load_single_extract_variable_index_v3i32_default_align(<3 x i32>* %A, i32 %idx) {
; CHECK-LABEL: load_single_extract_variable_index_v3i32_default_align:
; CHECK: ; %bb.0:
-; CHECK-NEXT: ; kill: def $w1 killed $w1 def $x1
-; CHECK-NEXT: sxtw x9, w1
+; CHECK-NEXT: mov w9, w1
; CHECK-NEXT: mov w8, #2
; CHECK-NEXT: cmp x9, #2
; CHECK-NEXT: csel x8, x9, x8, lo
define i8 @test_lanex_16xi8(<vscale x 16 x i8> %a, i32 %x) #0 {
; CHECK-LABEL: test_lanex_16xi8:
; CHECK: // %bb.0:
-; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
-; CHECK-NEXT: sxtw x8, w0
+; CHECK-NEXT: mov w8, w0
; CHECK-NEXT: whilels p0.b, xzr, x8
; CHECK-NEXT: lastb w0, p0, z0.b
; CHECK-NEXT: ret
define i16 @test_lanex_8xi16(<vscale x 8 x i16> %a, i32 %x) #0 {
; CHECK-LABEL: test_lanex_8xi16:
; CHECK: // %bb.0:
-; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
-; CHECK-NEXT: sxtw x8, w0
+; CHECK-NEXT: mov w8, w0
; CHECK-NEXT: whilels p0.h, xzr, x8
; CHECK-NEXT: lastb w0, p0, z0.h
; CHECK-NEXT: ret
define i32 @test_lanex_4xi32(<vscale x 4 x i32> %a, i32 %x) #0 {
; CHECK-LABEL: test_lanex_4xi32:
; CHECK: // %bb.0:
-; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
-; CHECK-NEXT: sxtw x8, w0
+; CHECK-NEXT: mov w8, w0
; CHECK-NEXT: whilels p0.s, xzr, x8
; CHECK-NEXT: lastb w0, p0, z0.s
; CHECK-NEXT: ret
define i64 @test_lanex_2xi64(<vscale x 2 x i64> %a, i32 %x) #0 {
; CHECK-LABEL: test_lanex_2xi64:
; CHECK: // %bb.0:
-; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
-; CHECK-NEXT: sxtw x8, w0
+; CHECK-NEXT: mov w8, w0
; CHECK-NEXT: whilels p0.d, xzr, x8
; CHECK-NEXT: lastb x0, p0, z0.d
; CHECK-NEXT: ret
define half @test_lanex_8xf16(<vscale x 8 x half> %a, i32 %x) #0 {
; CHECK-LABEL: test_lanex_8xf16:
; CHECK: // %bb.0:
-; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
-; CHECK-NEXT: sxtw x8, w0
+; CHECK-NEXT: mov w8, w0
; CHECK-NEXT: whilels p0.h, xzr, x8
; CHECK-NEXT: lastb h0, p0, z0.h
; CHECK-NEXT: ret
define half @test_lanex_4xf16(<vscale x 4 x half> %a, i32 %x) #0 {
; CHECK-LABEL: test_lanex_4xf16:
; CHECK: // %bb.0:
-; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
-; CHECK-NEXT: sxtw x8, w0
+; CHECK-NEXT: mov w8, w0
; CHECK-NEXT: whilels p0.s, xzr, x8
; CHECK-NEXT: lastb h0, p0, z0.h
; CHECK-NEXT: ret
define half @test_lanex_2xf16(<vscale x 2 x half> %a, i32 %x) #0 {
; CHECK-LABEL: test_lanex_2xf16:
; CHECK: // %bb.0:
-; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
-; CHECK-NEXT: sxtw x8, w0
+; CHECK-NEXT: mov w8, w0
; CHECK-NEXT: whilels p0.d, xzr, x8
; CHECK-NEXT: lastb h0, p0, z0.h
; CHECK-NEXT: ret
define float @test_lanex_4xf32(<vscale x 4 x float> %a, i32 %x) #0 {
; CHECK-LABEL: test_lanex_4xf32:
; CHECK: // %bb.0:
-; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
-; CHECK-NEXT: sxtw x8, w0
+; CHECK-NEXT: mov w8, w0
; CHECK-NEXT: whilels p0.s, xzr, x8
; CHECK-NEXT: lastb s0, p0, z0.s
; CHECK-NEXT: ret
define float @test_lanex_2xf32(<vscale x 2 x float> %a, i32 %x) #0 {
; CHECK-LABEL: test_lanex_2xf32:
; CHECK: // %bb.0:
-; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
-; CHECK-NEXT: sxtw x8, w0
+; CHECK-NEXT: mov w8, w0
; CHECK-NEXT: whilels p0.d, xzr, x8
; CHECK-NEXT: lastb s0, p0, z0.s
; CHECK-NEXT: ret
define double @test_lanex_2xf64(<vscale x 2 x double> %a, i32 %x) #0 {
; CHECK-LABEL: test_lanex_2xf64:
; CHECK: // %bb.0:
-; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
-; CHECK-NEXT: sxtw x8, w0
+; CHECK-NEXT: mov w8, w0
; CHECK-NEXT: whilels p0.d, xzr, x8
; CHECK-NEXT: lastb d0, p0, z0.d
; CHECK-NEXT: ret
define i1 @test_lanex_4xi1(<vscale x 4 x i1> %a, i32 %x) #0 {
; CHECK-LABEL: test_lanex_4xi1:
; CHECK: // %bb.0:
-; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
-; CHECK-NEXT: sxtw x8, w0
+; CHECK-NEXT: mov w8, w0
; CHECK-NEXT: mov z0.s, p0/z, #1 // =0x1
; CHECK-NEXT: whilels p0.s, xzr, x8
; CHECK-NEXT: lastb w8, p0, z0.s
define <vscale x 16 x i8> @test_lanex_16xi8(<vscale x 16 x i8> %a, i32 %x) {
; CHECK-LABEL: test_lanex_16xi8:
; CHECK: // %bb.0:
-; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
-; CHECK-NEXT: sxtw x8, w0
+; CHECK-NEXT: mov w8, w0
; CHECK-NEXT: mov w9, #30
; CHECK-NEXT: index z2.b, #0, #1
; CHECK-NEXT: ptrue p0.b
define <vscale x 8 x i1> @test_predicate_insert_8xi1_immediate (<vscale x 8 x i1> %val, i32 %idx) {
; CHECK-LABEL: test_predicate_insert_8xi1_immediate:
; CHECK: // %bb.0:
-; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
-; CHECK-NEXT: sxtw x8, w0
+; CHECK-NEXT: mov w8, w0
; CHECK-NEXT: mov w9, #1
; CHECK-NEXT: index z1.h, #0, #1
; CHECK-NEXT: ptrue p1.h
define <vscale x 2 x i1> @test_predicate_insert_2xi1(<vscale x 2 x i1> %val, i1 %elt, i32 %idx) {
; CHECK-LABEL: test_predicate_insert_2xi1:
; CHECK: // %bb.0:
-; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1
-; CHECK-NEXT: sxtw x8, w1
+; CHECK-NEXT: mov w8, w1
; CHECK-NEXT: index z1.d, #0, #1
; CHECK-NEXT: ptrue p1.d
; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
define <vscale x 4 x i1> @test_predicate_insert_4xi1(<vscale x 4 x i1> %val, i1 %elt, i32 %idx) {
; CHECK-LABEL: test_predicate_insert_4xi1:
; CHECK: // %bb.0:
-; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1
-; CHECK-NEXT: sxtw x8, w1
+; CHECK-NEXT: mov w8, w1
; CHECK-NEXT: index z1.s, #0, #1
; CHECK-NEXT: ptrue p1.s
; CHECK-NEXT: mov z0.s, w8
define <vscale x 8 x i1> @test_predicate_insert_8xi1(<vscale x 8 x i1> %val, i1 %elt, i32 %idx) {
; CHECK-LABEL: test_predicate_insert_8xi1:
; CHECK: // %bb.0:
-; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1
-; CHECK-NEXT: sxtw x8, w1
+; CHECK-NEXT: mov w8, w1
; CHECK-NEXT: index z1.h, #0, #1
; CHECK-NEXT: ptrue p1.h
; CHECK-NEXT: mov z0.h, w8
define <vscale x 16 x i1> @test_predicate_insert_16xi1(<vscale x 16 x i1> %val, i1 %elt, i32 %idx) {
; CHECK-LABEL: test_predicate_insert_16xi1:
; CHECK: // %bb.0:
-; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1
-; CHECK-NEXT: sxtw x8, w1
+; CHECK-NEXT: mov w8, w1
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: ptrue p1.b
; CHECK-NEXT: mov z0.b, w8
; CHECK-NEXT: addvl sp, sp, #-2
; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 16 * VG
; CHECK-NEXT: mov x8, #-1
-; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1
-; CHECK-NEXT: sxtw x9, w1
+; CHECK-NEXT: mov w9, w1
; CHECK-NEXT: mov z0.b, p1/z, #1 // =0x1
; CHECK-NEXT: ptrue p1.b
; CHECK-NEXT: st1b { z0.b }, p1, [sp, #1, mul vl]
define i32 @promote_extract_2i32_idx(<vscale x 2 x i32> %a, i32 %idx) {
; CHECK-LABEL: promote_extract_2i32_idx:
; CHECK: // %bb.0:
-; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
-; CHECK-NEXT: sxtw x8, w0
+; CHECK-NEXT: mov w8, w0
; CHECK-NEXT: whilels p0.d, xzr, x8
; CHECK-NEXT: lastb x0, p0, z0.d
; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
; CHECK-NEXT: addvl sp, sp, #-2
; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 16 * VG
; CHECK-NEXT: mov x8, #-1
-; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
-; CHECK-NEXT: sxtw x9, w0
+; CHECK-NEXT: mov w9, w0
; CHECK-NEXT: ptrue p0.b
; CHECK-NEXT: st1b { z1.b }, p0, [sp, #1, mul vl]
; CHECK-NEXT: st1b { z0.b }, p0, [sp]
; CHECK-NEXT: addvl sp, sp, #-2
; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 16 * VG
; CHECK-NEXT: mov x8, #-1
-; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
-; CHECK-NEXT: sxtw x9, w0
+; CHECK-NEXT: mov w9, w0
; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: st1h { z1.h }, p0, [sp, #1, mul vl]
; CHECK-NEXT: st1h { z0.h }, p0, [sp]
; CHECK-NEXT: addvl sp, sp, #-2
; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 16 * VG
; CHECK-NEXT: cnth x8
-; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
-; CHECK-NEXT: sxtw x9, w0
+; CHECK-NEXT: mov w9, w0
; CHECK-NEXT: sub x8, x8, #1
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: cmp x9, x8
; CHECK-NEXT: addvl sp, sp, #-4
; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 32 * VG
; CHECK-NEXT: cnth x8
-; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
-; CHECK-NEXT: sxtw x9, w0
+; CHECK-NEXT: mov w9, w0
; CHECK-NEXT: sub x8, x8, #1
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: cmp x9, x8
--- /dev/null
+; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+; RUN: llc -march=amdgcn -O0 -stop-after=irtranslator -global-isel -verify-machineinstrs %s -o - | FileCheck %s
+
+define i8 @f_i1_1() {
+ ; CHECK-LABEL: name: f_i1_1
+ ; CHECK: bb.1 (%ir-block.0):
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<256 x s8>) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+ ; CHECK-NEXT: [[EVEC:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[DEF]](<256 x s8>), [[C]](s32)
+ ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[EVEC]](s8)
+ ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32)
+ ; CHECK-NEXT: SI_RETURN implicit $vgpr0
+ %E1 = extractelement <256 x i8> undef, i1 true
+ ret i8 %E1
+}
+
+define i8 @f_i8_255() {
+ ; CHECK-LABEL: name: f_i8_255
+ ; CHECK: bb.1 (%ir-block.0):
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<256 x s8>) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+ ; CHECK-NEXT: [[EVEC:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[DEF]](<256 x s8>), [[C]](s32)
+ ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[EVEC]](s8)
+ ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32)
+ ; CHECK-NEXT: SI_RETURN implicit $vgpr0
+ %E1 = extractelement <256 x i8> undef, i8 255
+ ret i8 %E1
+}
; N64-NEXT: ld.b $w0, 0($2)
; N64-NEXT: addv.b $w0, $w0, $w0
; N64-NEXT: ld $1, %got_disp(i32)($1)
-; N64-NEXT: lw $1, 0($1)
+; N64-NEXT: lwu $1, 0($1)
; N64-NEXT: splat.b $w0, $w0[$1]
; N64-NEXT: mfc1 $1, $f0
; N64-NEXT: sra $1, $1, 24
; N64-NEXT: ld.h $w0, 0($2)
; N64-NEXT: addv.h $w0, $w0, $w0
; N64-NEXT: ld $1, %got_disp(i32)($1)
-; N64-NEXT: lw $1, 0($1)
+; N64-NEXT: lwu $1, 0($1)
; N64-NEXT: splat.h $w0, $w0[$1]
; N64-NEXT: mfc1 $1, $f0
; N64-NEXT: sra $1, $1, 16
; N64-NEXT: ld.w $w0, 0($2)
; N64-NEXT: addv.w $w0, $w0, $w0
; N64-NEXT: ld $1, %got_disp(i32)($1)
-; N64-NEXT: lw $1, 0($1)
+; N64-NEXT: lwu $1, 0($1)
; N64-NEXT: splat.w $w0, $w0[$1]
; N64-NEXT: jr $ra
; N64-NEXT: mfc1 $2, $f0
; N64-NEXT: ld.d $w0, 0($2)
; N64-NEXT: addv.d $w0, $w0, $w0
; N64-NEXT: ld $1, %got_disp(i32)($1)
-; N64-NEXT: lw $1, 0($1)
+; N64-NEXT: lwu $1, 0($1)
; N64-NEXT: splat.d $w0, $w0[$1]
; N64-NEXT: jr $ra
; N64-NEXT: dmfc1 $2, $f0
; N64-NEXT: ld.b $w0, 0($2)
; N64-NEXT: addv.b $w0, $w0, $w0
; N64-NEXT: ld $1, %got_disp(i32)($1)
-; N64-NEXT: lw $1, 0($1)
+; N64-NEXT: lwu $1, 0($1)
; N64-NEXT: splat.b $w0, $w0[$1]
; N64-NEXT: mfc1 $1, $f0
; N64-NEXT: jr $ra
; N64-NEXT: ld.h $w0, 0($2)
; N64-NEXT: addv.h $w0, $w0, $w0
; N64-NEXT: ld $1, %got_disp(i32)($1)
-; N64-NEXT: lw $1, 0($1)
+; N64-NEXT: lwu $1, 0($1)
; N64-NEXT: splat.h $w0, $w0[$1]
; N64-NEXT: mfc1 $1, $f0
; N64-NEXT: jr $ra
; N64-NEXT: ld.w $w0, 0($2)
; N64-NEXT: addv.w $w0, $w0, $w0
; N64-NEXT: ld $1, %got_disp(i32)($1)
-; N64-NEXT: lw $1, 0($1)
+; N64-NEXT: lwu $1, 0($1)
; N64-NEXT: splat.w $w0, $w0[$1]
; N64-NEXT: jr $ra
; N64-NEXT: mfc1 $2, $f0
; N64-NEXT: ld.d $w0, 0($2)
; N64-NEXT: addv.d $w0, $w0, $w0
; N64-NEXT: ld $1, %got_disp(i32)($1)
-; N64-NEXT: lw $1, 0($1)
+; N64-NEXT: lwu $1, 0($1)
; N64-NEXT: splat.d $w0, $w0[$1]
; N64-NEXT: jr $ra
; N64-NEXT: dmfc1 $2, $f0
; N64-NEXT: daddu $1, $1, $25
; N64-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(insert_v16i8_vidx)))
; N64-NEXT: ld $2, %got_disp(i32)($1)
-; N64-NEXT: lw $2, 0($2)
+; N64-NEXT: lwu $2, 0($2)
; N64-NEXT: ld $1, %got_disp(v16i8)($1)
; N64-NEXT: ld.b $w0, 0($1)
; N64-NEXT: sld.b $w0, $w0[$2]
; N64-NEXT: daddu $1, $1, $25
; N64-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(insert_v8i16_vidx)))
; N64-NEXT: ld $2, %got_disp(i32)($1)
-; N64-NEXT: lw $2, 0($2)
+; N64-NEXT: lwu $2, 0($2)
; N64-NEXT: ld $1, %got_disp(v8i16)($1)
; N64-NEXT: ld.h $w0, 0($1)
; N64-NEXT: dsll $2, $2, 1
; N64-NEXT: daddu $1, $1, $25
; N64-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(insert_v4i32_vidx)))
; N64-NEXT: ld $2, %got_disp(i32)($1)
-; N64-NEXT: lw $2, 0($2)
+; N64-NEXT: lwu $2, 0($2)
; N64-NEXT: ld $1, %got_disp(v4i32)($1)
; N64-NEXT: ld.w $w0, 0($1)
; N64-NEXT: dsll $2, $2, 2
; N64-NEXT: daddu $1, $1, $25
; N64-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(insert_v2i64_vidx)))
; N64-NEXT: ld $2, %got_disp(i32)($1)
-; N64-NEXT: lw $2, 0($2)
+; N64-NEXT: lwu $2, 0($2)
; N64-NEXT: ld $1, %got_disp(v2i64)($1)
; N64-NEXT: ld.d $w0, 0($1)
; N64-NEXT: dsll $2, $2, 3
; O32-DAG: lw [[PTR_I:\$[0-9]+]], %got(i32)(
; N32-DAG: lw [[PTR_I:\$[0-9]+]], %got_disp(i32)(
; N64-DAG: ld [[PTR_I:\$[0-9]+]], %got_disp(i32)(
- ; ALL-DAG: lw [[IDX:\$[0-9]+]], 0([[PTR_I]])
%4 = extractelement <4 x float> %2, i32 %3
- ; ALL-DAG: splat.w $w0, [[R1]][[[IDX]]]
+ ; ALL-DAG: splat.w $w0, [[R1]][[[PTR_I]]]
ret float %4
}
; O32-DAG: lw [[PTR_I:\$[0-9]+]], %got(i32)(
; N32-DAG: lw [[PTR_I:\$[0-9]+]], %got_disp(i32)(
; N64-DAG: ld [[PTR_I:\$[0-9]+]], %got_disp(i32)(
- ; ALL-DAG: lw [[IDX:\$[0-9]+]], 0([[PTR_I]])
%4 = extractelement <2 x double> %2, i32 %3
- ; ALL-DAG: splat.d $w0, [[R1]][[[IDX]]]
+ ; ALL-DAG: splat.d $w0, [[R1]][[[PTR_I]]]
ret double %4
}
; O32-DAG: lw [[PTR_I:\$[0-9]+]], %got(i32)(
; N32-DAG: lw [[PTR_I:\$[0-9]+]], %got_disp(i32)(
; N64-DAG: ld [[PTR_I:\$[0-9]+]], %got_disp(i32)(
- ; ALL-DAG: lw [[IDX:\$[0-9]+]], 0([[PTR_I]])
%3 = insertelement <4 x float> %1, float %a, i32 %2
; float argument passed in $f12
- ; ALL-DAG: sll [[BIDX:\$[0-9]+]], [[IDX]], 2
+ ; ALL-DAG: sll [[BIDX:\$[0-9]+]], [[PTR_I]], 2
; ALL-DAG: sld.b [[R1]], [[R1]][[[BIDX]]]
; ALL-DAG: insve.w [[R1]][0], $w12[0]
; ALL-DAG: neg [[NIDX:\$[0-9]+]], [[BIDX]]
; O32-DAG: lw [[PTR_I:\$[0-9]+]], %got(i32)(
; N32-DAG: lw [[PTR_I:\$[0-9]+]], %got_disp(i32)(
; N64-DAG: ld [[PTR_I:\$[0-9]+]], %got_disp(i32)(
- ; ALL-DAG: lw [[IDX:\$[0-9]+]], 0([[PTR_I]])
%3 = insertelement <2 x double> %1, double %a, i32 %2
; double argument passed in $f12
- ; ALL-DAG: sll [[BIDX:\$[0-9]+]], [[IDX]], 3
+ ; ALL-DAG: sll [[BIDX:\$[0-9]+]], [[PTR_I]], 3
; ALL-DAG: sld.b [[R1]], [[R1]][[[BIDX]]]
; ALL-DAG: insve.d [[R1]][0], $w12[0]
; ALL-DAG: neg [[NIDX:\$[0-9]+]], [[BIDX]]
define double @conv2dlbTestuiVar(<4 x i32> %a, i32 zeroext %elem) {
; CHECK-64-LABEL: conv2dlbTestuiVar:
; CHECK-64: # %bb.0: # %entry
-; CHECK-64-NEXT: extsw 3, 3
; CHECK-64-NEXT: rlwinm 3, 3, 2, 28, 29
; CHECK-64-NEXT: vextuwlx 3, 3, 2
; CHECK-64-NEXT: mtfprwz 0, 3
define zeroext i8 @test1(<16 x i8> %a, i32 signext %index) {
; CHECK-64-LABEL: test1:
; CHECK-64: # %bb.0: # %entry
+; CHECK-64-NEXT: clrldi 3, 3, 32
; CHECK-64-NEXT: vextublx 3, 3, 2
; CHECK-64-NEXT: clrldi 3, 3, 56
; CHECK-64-NEXT: blr
define signext i8 @test2(<16 x i8> %a, i32 signext %index) {
; CHECK-64-LABEL: test2:
; CHECK-64: # %bb.0: # %entry
+; CHECK-64-NEXT: clrldi 3, 3, 32
; CHECK-64-NEXT: vextublx 3, 3, 2
; CHECK-64-NEXT: extsb 3, 3
; CHECK-64-NEXT: blr
define zeroext i16 @test3(<8 x i16> %a, i32 signext %index) {
; CHECK-64-LABEL: test3:
; CHECK-64: # %bb.0: # %entry
+; CHECK-64-NEXT: clrldi 3, 3, 32
; CHECK-64-NEXT: rlwinm 3, 3, 1, 28, 30
; CHECK-64-NEXT: vextuhlx 3, 3, 2
; CHECK-64-NEXT: clrldi 3, 3, 48
define signext i16 @test4(<8 x i16> %a, i32 signext %index) {
; CHECK-64-LABEL: test4:
; CHECK-64: # %bb.0: # %entry
+; CHECK-64-NEXT: clrldi 3, 3, 32
; CHECK-64-NEXT: rlwinm 3, 3, 1, 28, 30
; CHECK-64-NEXT: vextuhlx 3, 3, 2
; CHECK-64-NEXT: extsh 3, 3
define zeroext i32 @test5(<4 x i32> %a, i32 signext %index) {
; CHECK-64-LABEL: test5:
; CHECK-64: # %bb.0: # %entry
+; CHECK-64-NEXT: clrldi 3, 3, 32
; CHECK-64-NEXT: rlwinm 3, 3, 2, 28, 29
; CHECK-64-NEXT: vextuwlx 3, 3, 2
; CHECK-64-NEXT: blr
define signext i32 @test6(<4 x i32> %a, i32 signext %index) {
; CHECK-64-LABEL: test6:
; CHECK-64: # %bb.0: # %entry
+; CHECK-64-NEXT: clrldi 3, 3, 32
; CHECK-64-NEXT: rlwinm 3, 3, 2, 28, 29
; CHECK-64-NEXT: vextuwlx 3, 3, 2
; CHECK-64-NEXT: extsw 3, 3
define zeroext i8 @test_add1(<16 x i8> %a, i32 signext %index, i8 zeroext %c) {
; CHECK-64-LABEL: test_add1:
; CHECK-64: # %bb.0: # %entry
+; CHECK-64-NEXT: clrldi 3, 3, 32
; CHECK-64-NEXT: vextublx 3, 3, 2
; CHECK-64-NEXT: add 3, 3, 4
; CHECK-64-NEXT: clrldi 3, 3, 56
define signext i8 @test_add2(<16 x i8> %a, i32 signext %index, i8 signext %c) {
; CHECK-64-LABEL: test_add2:
; CHECK-64: # %bb.0: # %entry
+; CHECK-64-NEXT: clrldi 3, 3, 32
; CHECK-64-NEXT: vextublx 3, 3, 2
; CHECK-64-NEXT: add 3, 3, 4
; CHECK-64-NEXT: extsb 3, 3
define zeroext i16 @test_add3(<8 x i16> %a, i32 signext %index, i16 zeroext %c) {
; CHECK-64-LABEL: test_add3:
; CHECK-64: # %bb.0: # %entry
+; CHECK-64-NEXT: clrldi 3, 3, 32
; CHECK-64-NEXT: rlwinm 3, 3, 1, 28, 30
; CHECK-64-NEXT: vextuhlx 3, 3, 2
; CHECK-64-NEXT: add 3, 3, 4
define signext i16 @test_add4(<8 x i16> %a, i32 signext %index, i16 signext %c) {
; CHECK-64-LABEL: test_add4:
; CHECK-64: # %bb.0: # %entry
+; CHECK-64-NEXT: clrldi 3, 3, 32
; CHECK-64-NEXT: rlwinm 3, 3, 1, 28, 30
; CHECK-64-NEXT: vextuhlx 3, 3, 2
; CHECK-64-NEXT: add 3, 3, 4
define zeroext i32 @test_add5(<4 x i32> %a, i32 signext %index, i32 zeroext %c) {
; CHECK-64-LABEL: test_add5:
; CHECK-64: # %bb.0: # %entry
+; CHECK-64-NEXT: clrldi 3, 3, 32
; CHECK-64-NEXT: rlwinm 3, 3, 2, 28, 29
; CHECK-64-NEXT: vextuwlx 3, 3, 2
; CHECK-64-NEXT: add 3, 3, 4
define signext i32 @test_add6(<4 x i32> %a, i32 signext %index, i32 signext %c) {
; CHECK-64-LABEL: test_add6:
; CHECK-64: # %bb.0: # %entry
+; CHECK-64-NEXT: clrldi 3, 3, 32
; CHECK-64-NEXT: rlwinm 3, 3, 2, 28, 29
; CHECK-64-NEXT: vextuwlx 3, 3, 2
; CHECK-64-NEXT: add 3, 3, 4
define <4 x float> @testFloat1(<4 x float> %a, float %b, i32 zeroext %idx1) {
; CHECK-64-LABEL: testFloat1:
; CHECK-64: # %bb.0: # %entry
-; CHECK-64-DAG: rlwinm 3, 4, 2, 28, 29
-; CHECK-64-DAG: addi 4, 1, -16
+; CHECK-64-NEXT: rlwinm 3, 4, 2, 28, 29
+; CHECK-64-NEXT: addi 4, 1, -16
; CHECK-64-NEXT: stxv 34, -16(1)
; CHECK-64-NEXT: stfsx 1, 4, 3
; CHECK-64-NEXT: lxv 34, -16(1)
; CHECK-64-P10-LABEL: testFloat1:
; CHECK-64-P10: # %bb.0: # %entry
; CHECK-64-P10-NEXT: xscvdpspn 35, 1
-; CHECK-64-P10-NEXT: extsw 3, 4
-; CHECK-64-P10-NEXT: slwi 3, 3, 2
+; CHECK-64-P10-NEXT: slwi 3, 4, 2
; CHECK-64-P10-NEXT: vinswvlx 2, 3, 3
; CHECK-64-P10-NEXT: blr
;
; CHECK-64-LABEL: testFloat2:
; CHECK-64: # %bb.0: # %entry
; CHECK-64-NEXT: lwz 6, 0(3)
-; CHECK-64-DAG: rlwinm 4, 4, 2, 28, 29
-; CHECK-64-DAG: addi 7, 1, -16
+; CHECK-64-NEXT: addi 7, 1, -16
+; CHECK-64-NEXT: rlwinm 4, 4, 2, 28, 29
; CHECK-64-NEXT: stxv 34, -16(1)
+; CHECK-64-NEXT: rlwinm 5, 5, 2, 28, 29
; CHECK-64-NEXT: stwx 6, 7, 4
-; CHECK-64-NEXT: rlwinm 4, 5, 2, 28, 29
-; CHECK-64-NEXT: addi 5, 1, -32
+; CHECK-64-NEXT: addi 4, 1, -32
; CHECK-64-NEXT: lxv 0, -16(1)
; CHECK-64-NEXT: lwz 3, 1(3)
; CHECK-64-NEXT: stxv 0, -32(1)
-; CHECK-64-NEXT: stwx 3, 5, 4
+; CHECK-64-NEXT: stwx 3, 4, 5
; CHECK-64-NEXT: lxv 34, -32(1)
; CHECK-64-NEXT: blr
;
; CHECK-64-P10-LABEL: testFloat2:
; CHECK-64-P10: # %bb.0: # %entry
; CHECK-64-P10-NEXT: lwz 6, 0(3)
-; CHECK-64-P10-NEXT: extsw 4, 4
; CHECK-64-P10-NEXT: lwz 3, 1(3)
; CHECK-64-P10-NEXT: slwi 4, 4, 2
; CHECK-64-P10-NEXT: vinswlx 2, 4, 6
-; CHECK-64-P10-NEXT: extsw 4, 5
-; CHECK-64-P10-NEXT: slwi 4, 4, 2
+; CHECK-64-P10-NEXT: slwi 4, 5, 2
; CHECK-64-P10-NEXT: vinswlx 2, 4, 3
; CHECK-64-P10-NEXT: blr
;
; CHECK-64-LABEL: testFloat3:
; CHECK-64: # %bb.0: # %entry
; CHECK-64-NEXT: lis 6, 1
-; CHECK-64-DAG: rlwinm 4, 4, 2, 28, 29
-; CHECK-64-DAG: addi 7, 1, -16
+; CHECK-64-NEXT: addi 7, 1, -16
+; CHECK-64-NEXT: rlwinm 4, 4, 2, 28, 29
+; CHECK-64-NEXT: rlwinm 5, 5, 2, 28, 29
; CHECK-64-NEXT: lwzx 6, 3, 6
; CHECK-64-NEXT: stxv 34, -16(1)
; CHECK-64-NEXT: stwx 6, 7, 4
; CHECK-64-NEXT: lxv 0, -16(1)
; CHECK-64-NEXT: rldic 4, 4, 36, 27
; CHECK-64-NEXT: lwzx 3, 3, 4
-; CHECK-64-NEXT: rlwinm 4, 5, 2, 28, 29
-; CHECK-64-NEXT: addi 5, 1, -32
+; CHECK-64-NEXT: addi 4, 1, -32
; CHECK-64-NEXT: stxv 0, -32(1)
-; CHECK-64-NEXT: stwx 3, 5, 4
+; CHECK-64-NEXT: stwx 3, 4, 5
; CHECK-64-NEXT: lxv 34, -32(1)
; CHECK-64-NEXT: blr
;
; CHECK-64-P10-LABEL: testFloat3:
; CHECK-64-P10: # %bb.0: # %entry
; CHECK-64-P10-NEXT: plwz 6, 65536(3), 0
-; CHECK-64-P10-NEXT: extsw 4, 4
; CHECK-64-P10-NEXT: slwi 4, 4, 2
; CHECK-64-P10-NEXT: vinswlx 2, 4, 6
; CHECK-64-P10-NEXT: li 4, 1
; CHECK-64-P10-NEXT: rldic 4, 4, 36, 27
; CHECK-64-P10-NEXT: lwzx 3, 3, 4
-; CHECK-64-P10-NEXT: extsw 4, 5
-; CHECK-64-P10-NEXT: slwi 4, 4, 2
+; CHECK-64-P10-NEXT: slwi 4, 5, 2
; CHECK-64-P10-NEXT: vinswlx 2, 4, 3
; CHECK-64-P10-NEXT: blr
;
define <2 x double> @testDouble1(<2 x double> %a, double %b, i32 zeroext %idx1) {
; CHECK-64-LABEL: testDouble1:
; CHECK-64: # %bb.0: # %entry
-; CHECK-64: rlwinm 3, 4, 3, 28, 28
+; CHECK-64-NEXT: rlwinm 3, 4, 3, 28, 28
; CHECK-64-NEXT: addi 4, 1, -16
; CHECK-64-NEXT: stxv 34, -16(1)
; CHECK-64-NEXT: stfdx 1, 4, 3
;
; CHECK-64-P10-LABEL: testDouble1:
; CHECK-64-P10: # %bb.0: # %entry
-; CHECK-64-P10-NEXT: extsw 4, 4
; CHECK-64-P10-NEXT: mffprd 3, 1
; CHECK-64-P10-NEXT: rlwinm 4, 4, 3, 0, 28
; CHECK-64-P10-NEXT: vinsdlx 2, 4, 3
;
; CHECK-32-P10-LABEL: testDouble1:
; CHECK-32-P10: # %bb.0: # %entry
-; CHECK-32-P10-DAG: addi 4, 1, -16
-; CHECK-32-P10-DAG: rlwinm 3, 5, 3, 28, 28
+; CHECK-32-P10-NEXT: rlwinm 3, 5, 3, 28, 28
+; CHECK-32-P10-NEXT: addi 4, 1, -16
; CHECK-32-P10-NEXT: stxv 34, -16(1)
; CHECK-32-P10-NEXT: stfdx 1, 4, 3
; CHECK-32-P10-NEXT: lxv 34, -16(1)
; CHECK-64-LABEL: testDouble2:
; CHECK-64: # %bb.0: # %entry
; CHECK-64-NEXT: ld 6, 0(3)
-; CHECK-64-DAG: rlwinm 4, 4, 3, 28, 28
-; CHECK-64-DAG: addi 7, 1, -32
+; CHECK-64-NEXT: addi 7, 1, -32
+; CHECK-64-NEXT: rlwinm 4, 4, 3, 28, 28
; CHECK-64-NEXT: stxv 34, -32(1)
+; CHECK-64-NEXT: rlwinm 5, 5, 3, 28, 28
; CHECK-64-NEXT: stdx 6, 7, 4
; CHECK-64-NEXT: li 4, 1
; CHECK-64-NEXT: lxv 0, -32(1)
; CHECK-64-NEXT: ldx 3, 3, 4
-; CHECK-64-NEXT: rlwinm 4, 5, 3, 28, 28
-; CHECK-64-NEXT: addi 5, 1, -16
+; CHECK-64-NEXT: addi 4, 1, -16
; CHECK-64-NEXT: stxv 0, -16(1)
-; CHECK-64-NEXT: stdx 3, 5, 4
+; CHECK-64-NEXT: stdx 3, 4, 5
; CHECK-64-NEXT: lxv 34, -16(1)
; CHECK-64-NEXT: blr
;
; CHECK-64-P10-LABEL: testDouble2:
; CHECK-64-P10: # %bb.0: # %entry
; CHECK-64-P10-NEXT: ld 6, 0(3)
-; CHECK-64-P10-NEXT: extsw 4, 4
; CHECK-64-P10-NEXT: pld 3, 1(3), 0
; CHECK-64-P10-NEXT: rlwinm 4, 4, 3, 0, 28
; CHECK-64-P10-NEXT: vinsdlx 2, 4, 6
-; CHECK-64-P10-NEXT: extsw 4, 5
-; CHECK-64-P10-NEXT: rlwinm 4, 4, 3, 0, 28
+; CHECK-64-P10-NEXT: rlwinm 4, 5, 3, 0, 28
; CHECK-64-P10-NEXT: vinsdlx 2, 4, 3
; CHECK-64-P10-NEXT: blr
;
; CHECK-32-P10-LABEL: testDouble2:
; CHECK-32-P10: # %bb.0: # %entry
; CHECK-32-P10-NEXT: lfd 0, 0(3)
-; CHECK-32-P10-DAG: addi 6, 1, -32
-; CHECK-32-P10-DAG: rlwinm 4, 4, 3, 28, 28
+; CHECK-32-P10-NEXT: rlwinm 4, 4, 3, 28, 28
+; CHECK-32-P10-NEXT: addi 6, 1, -32
; CHECK-32-P10-NEXT: stxv 34, -32(1)
; CHECK-32-P10-NEXT: rlwinm 5, 5, 3, 28, 28
; CHECK-32-P10-NEXT: stfdx 0, 6, 4
; CHECK-64-LABEL: testDouble3:
; CHECK-64: # %bb.0: # %entry
; CHECK-64-NEXT: lis 6, 1
-; CHECK-64-DAG: rlwinm 4, 4, 3, 28, 28
-; CHECK-64-DAG: addi 7, 1, -32
+; CHECK-64-NEXT: addi 7, 1, -32
+; CHECK-64-NEXT: rlwinm 4, 4, 3, 28, 28
+; CHECK-64-NEXT: rlwinm 5, 5, 3, 28, 28
; CHECK-64-NEXT: ldx 6, 3, 6
; CHECK-64-NEXT: stxv 34, -32(1)
; CHECK-64-NEXT: stdx 6, 7, 4
; CHECK-64-NEXT: lxv 0, -32(1)
; CHECK-64-NEXT: rldic 4, 4, 36, 27
; CHECK-64-NEXT: ldx 3, 3, 4
-; CHECK-64-NEXT: rlwinm 4, 5, 3, 28, 28
-; CHECK-64-NEXT: addi 5, 1, -16
+; CHECK-64-NEXT: addi 4, 1, -16
; CHECK-64-NEXT: stxv 0, -16(1)
-; CHECK-64-NEXT: stdx 3, 5, 4
+; CHECK-64-NEXT: stdx 3, 4, 5
; CHECK-64-NEXT: lxv 34, -16(1)
; CHECK-64-NEXT: blr
;
; CHECK-64-P10-LABEL: testDouble3:
; CHECK-64-P10: # %bb.0: # %entry
; CHECK-64-P10-NEXT: pld 6, 65536(3), 0
-; CHECK-64-P10-NEXT: extsw 4, 4
; CHECK-64-P10-NEXT: rlwinm 4, 4, 3, 0, 28
; CHECK-64-P10-NEXT: vinsdlx 2, 4, 6
; CHECK-64-P10-NEXT: li 4, 1
; CHECK-64-P10-NEXT: rldic 4, 4, 36, 27
; CHECK-64-P10-NEXT: ldx 3, 3, 4
-; CHECK-64-P10-NEXT: extsw 4, 5
-; CHECK-64-P10-NEXT: rlwinm 4, 4, 3, 0, 28
+; CHECK-64-P10-NEXT: rlwinm 4, 5, 3, 0, 28
; CHECK-64-P10-NEXT: vinsdlx 2, 4, 3
; CHECK-64-P10-NEXT: blr
;
; CHECK-32-P10-LABEL: testDouble3:
; CHECK-32-P10: # %bb.0: # %entry
; CHECK-32-P10-NEXT: plfd 0, 65536(3), 0
-; CHECK-32-P10-DAG: addi 6, 1, -32
-; CHECK-32-P10-DAG: rlwinm 4, 4, 3, 28, 28
+; CHECK-32-P10-NEXT: rlwinm 4, 4, 3, 28, 28
+; CHECK-32-P10-NEXT: addi 6, 1, -32
; CHECK-32-P10-NEXT: stxv 34, -32(1)
; CHECK-32-P10-NEXT: rlwinm 5, 5, 3, 28, 28
; CHECK-32-P10-NEXT: stfdx 0, 6, 4
define signext i8 @getvelsc(<16 x i8> %vsc, i32 signext %i) {
; CHECK-LABEL: getvelsc:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: andi. r4, r5, 8
+; CHECK-NEXT: clrldi r4, r5, 32
; CHECK-NEXT: li r3, 7
-; CHECK-NEXT: lvsl v3, 0, r4
-; CHECK-NEXT: andc r3, r3, r5
+; CHECK-NEXT: andi. r5, r4, 8
+; CHECK-NEXT: andc r3, r3, r4
+; CHECK-NEXT: lvsl v3, 0, r5
; CHECK-NEXT: sldi r3, r3, 3
; CHECK-NEXT: vperm v2, v2, v2, v3
; CHECK-NEXT: mfvsrd r4, v2
; CHECK-LE-LABEL: getvelsc:
; CHECK-LE: # %bb.0: # %entry
; CHECK-LE-NEXT: li r3, 8
-; CHECK-LE-NEXT: andc r3, r3, r5
+; CHECK-LE-NEXT: clrldi r4, r5, 32
+; CHECK-LE-NEXT: andc r3, r3, r4
; CHECK-LE-NEXT: lvsl v3, 0, r3
; CHECK-LE-NEXT: li r3, 7
-; CHECK-LE-NEXT: and r3, r3, r5
+; CHECK-LE-NEXT: and r3, r3, r4
; CHECK-LE-NEXT: vperm v2, v2, v2, v3
; CHECK-LE-NEXT: sldi r3, r3, 3
; CHECK-LE-NEXT: mfvsrd r4, v2
;
; CHECK-AIX-LABEL: getvelsc:
; CHECK-AIX: # %bb.0: # %entry
-; CHECK-AIX-NEXT: andi. 5, 3, 8
+; CHECK-AIX-NEXT: clrldi 3, 3, 32
; CHECK-AIX-NEXT: li 4, 7
-; CHECK-AIX-NEXT: lvsl 3, 0, 5
+; CHECK-AIX-NEXT: andi. 5, 3, 8
; CHECK-AIX-NEXT: andc 3, 4, 3
+; CHECK-AIX-NEXT: lvsl 3, 0, 5
; CHECK-AIX-NEXT: sldi 3, 3, 3
; CHECK-AIX-NEXT: vperm 2, 2, 2, 3
; CHECK-AIX-NEXT: mfvsrd 4, 34
define zeroext i8 @getveluc(<16 x i8> %vuc, i32 signext %i) {
; CHECK-LABEL: getveluc:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: andi. r4, r5, 8
+; CHECK-NEXT: clrldi r4, r5, 32
; CHECK-NEXT: li r3, 7
-; CHECK-NEXT: lvsl v3, 0, r4
-; CHECK-NEXT: andc r3, r3, r5
+; CHECK-NEXT: andi. r5, r4, 8
+; CHECK-NEXT: andc r3, r3, r4
+; CHECK-NEXT: lvsl v3, 0, r5
; CHECK-NEXT: sldi r3, r3, 3
; CHECK-NEXT: vperm v2, v2, v2, v3
; CHECK-NEXT: mfvsrd r4, v2
; CHECK-LE-LABEL: getveluc:
; CHECK-LE: # %bb.0: # %entry
; CHECK-LE-NEXT: li r3, 8
-; CHECK-LE-NEXT: andc r3, r3, r5
+; CHECK-LE-NEXT: clrldi r4, r5, 32
+; CHECK-LE-NEXT: andc r3, r3, r4
; CHECK-LE-NEXT: lvsl v3, 0, r3
; CHECK-LE-NEXT: li r3, 7
-; CHECK-LE-NEXT: and r3, r3, r5
+; CHECK-LE-NEXT: and r3, r3, r4
; CHECK-LE-NEXT: vperm v2, v2, v2, v3
; CHECK-LE-NEXT: sldi r3, r3, 3
; CHECK-LE-NEXT: mfvsrd r4, v2
;
; CHECK-AIX-LABEL: getveluc:
; CHECK-AIX: # %bb.0: # %entry
-; CHECK-AIX-NEXT: andi. 5, 3, 8
+; CHECK-AIX-NEXT: clrldi 3, 3, 32
; CHECK-AIX-NEXT: li 4, 7
-; CHECK-AIX-NEXT: lvsl 3, 0, 5
+; CHECK-AIX-NEXT: andi. 5, 3, 8
; CHECK-AIX-NEXT: andc 3, 4, 3
+; CHECK-AIX-NEXT: lvsl 3, 0, 5
; CHECK-AIX-NEXT: sldi 3, 3, 3
; CHECK-AIX-NEXT: vperm 2, 2, 2, 3
; CHECK-AIX-NEXT: mfvsrd 4, 34
define signext i16 @getvelss(<8 x i16> %vss, i32 signext %i) {
; CHECK-LABEL: getvelss:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: andi. r4, r5, 4
+; CHECK-NEXT: clrldi r4, r5, 32
; CHECK-NEXT: li r3, 3
-; CHECK-NEXT: sldi r4, r4, 1
-; CHECK-NEXT: andc r3, r3, r5
-; CHECK-NEXT: lvsl v3, 0, r4
+; CHECK-NEXT: andi. r5, r4, 4
+; CHECK-NEXT: andc r3, r3, r4
+; CHECK-NEXT: sldi r5, r5, 1
; CHECK-NEXT: sldi r3, r3, 4
+; CHECK-NEXT: lvsl v3, 0, r5
; CHECK-NEXT: vperm v2, v2, v2, v3
; CHECK-NEXT: mfvsrd r4, v2
; CHECK-NEXT: srd r3, r4, r3
; CHECK-LE-LABEL: getvelss:
; CHECK-LE: # %bb.0: # %entry
; CHECK-LE-NEXT: li r3, 4
-; CHECK-LE-NEXT: andc r3, r3, r5
+; CHECK-LE-NEXT: clrldi r4, r5, 32
+; CHECK-LE-NEXT: andc r3, r3, r4
; CHECK-LE-NEXT: sldi r3, r3, 1
; CHECK-LE-NEXT: lvsl v3, 0, r3
; CHECK-LE-NEXT: li r3, 3
-; CHECK-LE-NEXT: and r3, r3, r5
+; CHECK-LE-NEXT: and r3, r3, r4
; CHECK-LE-NEXT: vperm v2, v2, v2, v3
; CHECK-LE-NEXT: sldi r3, r3, 4
; CHECK-LE-NEXT: mfvsrd r4, v2
;
; CHECK-AIX-LABEL: getvelss:
; CHECK-AIX: # %bb.0: # %entry
-; CHECK-AIX-NEXT: andi. 5, 3, 4
+; CHECK-AIX-NEXT: clrldi 3, 3, 32
; CHECK-AIX-NEXT: li 4, 3
-; CHECK-AIX-NEXT: sldi 5, 5, 1
+; CHECK-AIX-NEXT: andi. 5, 3, 4
; CHECK-AIX-NEXT: andc 3, 4, 3
-; CHECK-AIX-NEXT: lvsl 3, 0, 5
+; CHECK-AIX-NEXT: sldi 5, 5, 1
; CHECK-AIX-NEXT: sldi 3, 3, 4
+; CHECK-AIX-NEXT: lvsl 3, 0, 5
; CHECK-AIX-NEXT: vperm 2, 2, 2, 3
; CHECK-AIX-NEXT: mfvsrd 4, 34
; CHECK-AIX-NEXT: srd 3, 4, 3
define zeroext i16 @getvelus(<8 x i16> %vus, i32 signext %i) {
; CHECK-LABEL: getvelus:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: andi. r4, r5, 4
+; CHECK-NEXT: clrldi r4, r5, 32
; CHECK-NEXT: li r3, 3
-; CHECK-NEXT: sldi r4, r4, 1
-; CHECK-NEXT: andc r3, r3, r5
-; CHECK-NEXT: lvsl v3, 0, r4
+; CHECK-NEXT: andi. r5, r4, 4
+; CHECK-NEXT: andc r3, r3, r4
+; CHECK-NEXT: sldi r5, r5, 1
; CHECK-NEXT: sldi r3, r3, 4
+; CHECK-NEXT: lvsl v3, 0, r5
; CHECK-NEXT: vperm v2, v2, v2, v3
; CHECK-NEXT: mfvsrd r4, v2
; CHECK-NEXT: srd r3, r4, r3
; CHECK-LE-LABEL: getvelus:
; CHECK-LE: # %bb.0: # %entry
; CHECK-LE-NEXT: li r3, 4
-; CHECK-LE-NEXT: andc r3, r3, r5
+; CHECK-LE-NEXT: clrldi r4, r5, 32
+; CHECK-LE-NEXT: andc r3, r3, r4
; CHECK-LE-NEXT: sldi r3, r3, 1
; CHECK-LE-NEXT: lvsl v3, 0, r3
; CHECK-LE-NEXT: li r3, 3
-; CHECK-LE-NEXT: and r3, r3, r5
+; CHECK-LE-NEXT: and r3, r3, r4
; CHECK-LE-NEXT: vperm v2, v2, v2, v3
; CHECK-LE-NEXT: sldi r3, r3, 4
; CHECK-LE-NEXT: mfvsrd r4, v2
;
; CHECK-AIX-LABEL: getvelus:
; CHECK-AIX: # %bb.0: # %entry
-; CHECK-AIX-NEXT: andi. 5, 3, 4
+; CHECK-AIX-NEXT: clrldi 3, 3, 32
; CHECK-AIX-NEXT: li 4, 3
-; CHECK-AIX-NEXT: sldi 5, 5, 1
+; CHECK-AIX-NEXT: andi. 5, 3, 4
; CHECK-AIX-NEXT: andc 3, 4, 3
-; CHECK-AIX-NEXT: lvsl 3, 0, 5
+; CHECK-AIX-NEXT: sldi 5, 5, 1
; CHECK-AIX-NEXT: sldi 3, 3, 4
+; CHECK-AIX-NEXT: lvsl 3, 0, 5
; CHECK-AIX-NEXT: vperm 2, 2, 2, 3
; CHECK-AIX-NEXT: mfvsrd 4, 34
; CHECK-AIX-NEXT: srd 3, 4, 3
define signext i32 @getvelsi(<4 x i32> %vsi, i32 signext %i) {
; CHECK-LABEL: getvelsi:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: andi. r4, r5, 2
+; CHECK-NEXT: clrldi r4, r5, 32
; CHECK-NEXT: li r3, 1
-; CHECK-NEXT: sldi r4, r4, 2
-; CHECK-NEXT: andc r3, r3, r5
-; CHECK-NEXT: lvsl v3, 0, r4
+; CHECK-NEXT: andi. r5, r4, 2
+; CHECK-NEXT: andc r3, r3, r4
+; CHECK-NEXT: sldi r5, r5, 2
; CHECK-NEXT: sldi r3, r3, 5
+; CHECK-NEXT: lvsl v3, 0, r5
; CHECK-NEXT: vperm v2, v2, v2, v3
; CHECK-NEXT: mfvsrd r4, v2
; CHECK-NEXT: srd r3, r4, r3
; CHECK-LE-LABEL: getvelsi:
; CHECK-LE: # %bb.0: # %entry
; CHECK-LE-NEXT: li r3, 2
-; CHECK-LE-NEXT: andc r3, r3, r5
+; CHECK-LE-NEXT: clrldi r4, r5, 32
+; CHECK-LE-NEXT: andc r3, r3, r4
; CHECK-LE-NEXT: sldi r3, r3, 2
; CHECK-LE-NEXT: lvsl v3, 0, r3
; CHECK-LE-NEXT: li r3, 1
-; CHECK-LE-NEXT: and r3, r3, r5
+; CHECK-LE-NEXT: and r3, r3, r4
; CHECK-LE-NEXT: vperm v2, v2, v2, v3
; CHECK-LE-NEXT: sldi r3, r3, 5
; CHECK-LE-NEXT: mfvsrd r4, v2
;
; CHECK-AIX-LABEL: getvelsi:
; CHECK-AIX: # %bb.0: # %entry
-; CHECK-AIX-NEXT: andi. 5, 3, 2
+; CHECK-AIX-NEXT: clrldi 3, 3, 32
; CHECK-AIX-NEXT: li 4, 1
-; CHECK-AIX-NEXT: sldi 5, 5, 2
+; CHECK-AIX-NEXT: andi. 5, 3, 2
; CHECK-AIX-NEXT: andc 3, 4, 3
-; CHECK-AIX-NEXT: lvsl 3, 0, 5
+; CHECK-AIX-NEXT: sldi 5, 5, 2
; CHECK-AIX-NEXT: sldi 3, 3, 5
+; CHECK-AIX-NEXT: lvsl 3, 0, 5
; CHECK-AIX-NEXT: vperm 2, 2, 2, 3
; CHECK-AIX-NEXT: mfvsrd 4, 34
; CHECK-AIX-NEXT: srd 3, 4, 3
define zeroext i32 @getvelui(<4 x i32> %vui, i32 signext %i) {
; CHECK-LABEL: getvelui:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: andi. r4, r5, 2
+; CHECK-NEXT: clrldi r4, r5, 32
; CHECK-NEXT: li r3, 1
-; CHECK-NEXT: sldi r4, r4, 2
-; CHECK-NEXT: andc r3, r3, r5
-; CHECK-NEXT: lvsl v3, 0, r4
+; CHECK-NEXT: andi. r5, r4, 2
+; CHECK-NEXT: andc r3, r3, r4
+; CHECK-NEXT: sldi r5, r5, 2
; CHECK-NEXT: sldi r3, r3, 5
+; CHECK-NEXT: lvsl v3, 0, r5
; CHECK-NEXT: vperm v2, v2, v2, v3
; CHECK-NEXT: mfvsrd r4, v2
; CHECK-NEXT: srd r3, r4, r3
; CHECK-LE-LABEL: getvelui:
; CHECK-LE: # %bb.0: # %entry
; CHECK-LE-NEXT: li r3, 2
-; CHECK-LE-NEXT: andc r3, r3, r5
+; CHECK-LE-NEXT: clrldi r4, r5, 32
+; CHECK-LE-NEXT: andc r3, r3, r4
; CHECK-LE-NEXT: sldi r3, r3, 2
; CHECK-LE-NEXT: lvsl v3, 0, r3
; CHECK-LE-NEXT: li r3, 1
-; CHECK-LE-NEXT: and r3, r3, r5
+; CHECK-LE-NEXT: and r3, r3, r4
; CHECK-LE-NEXT: vperm v2, v2, v2, v3
; CHECK-LE-NEXT: sldi r3, r3, 5
; CHECK-LE-NEXT: mfvsrd r4, v2
;
; CHECK-AIX-LABEL: getvelui:
; CHECK-AIX: # %bb.0: # %entry
-; CHECK-AIX-NEXT: andi. 5, 3, 2
+; CHECK-AIX-NEXT: clrldi 3, 3, 32
; CHECK-AIX-NEXT: li 4, 1
-; CHECK-AIX-NEXT: sldi 5, 5, 2
+; CHECK-AIX-NEXT: andi. 5, 3, 2
; CHECK-AIX-NEXT: andc 3, 4, 3
-; CHECK-AIX-NEXT: lvsl 3, 0, 5
+; CHECK-AIX-NEXT: sldi 5, 5, 2
; CHECK-AIX-NEXT: sldi 3, 3, 5
+; CHECK-AIX-NEXT: lvsl 3, 0, 5
; CHECK-AIX-NEXT: vperm 2, 2, 2, 3
; CHECK-AIX-NEXT: mfvsrd 4, 34
; CHECK-AIX-NEXT: srd 3, 4, 3
define i64 @getvelsl(<2 x i64> %vsl, i32 signext %i) {
; CHECK-LABEL: getvelsl:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: andi. r3, r5, 1
+; CHECK-NEXT: clrldi r3, r5, 32
+; CHECK-NEXT: andi. r3, r3, 1
; CHECK-NEXT: sldi r3, r3, 3
; CHECK-NEXT: lvsl v3, 0, r3
; CHECK-NEXT: vperm v2, v2, v2, v3
; CHECK-LE-LABEL: getvelsl:
; CHECK-LE: # %bb.0: # %entry
; CHECK-LE-NEXT: li r3, 1
-; CHECK-LE-NEXT: andc r3, r3, r5
+; CHECK-LE-NEXT: clrldi r4, r5, 32
+; CHECK-LE-NEXT: andc r3, r3, r4
; CHECK-LE-NEXT: sldi r3, r3, 3
; CHECK-LE-NEXT: lvsl v3, 0, r3
; CHECK-LE-NEXT: vperm v2, v2, v2, v3
;
; CHECK-AIX-LABEL: getvelsl:
; CHECK-AIX: # %bb.0: # %entry
+; CHECK-AIX-NEXT: clrldi 3, 3, 32
; CHECK-AIX-NEXT: andi. 3, 3, 1
; CHECK-AIX-NEXT: sldi 3, 3, 3
; CHECK-AIX-NEXT: lvsl 3, 0, 3
define i64 @getvelul(<2 x i64> %vul, i32 signext %i) {
; CHECK-LABEL: getvelul:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: andi. r3, r5, 1
+; CHECK-NEXT: clrldi r3, r5, 32
+; CHECK-NEXT: andi. r3, r3, 1
; CHECK-NEXT: sldi r3, r3, 3
; CHECK-NEXT: lvsl v3, 0, r3
; CHECK-NEXT: vperm v2, v2, v2, v3
; CHECK-LE-LABEL: getvelul:
; CHECK-LE: # %bb.0: # %entry
; CHECK-LE-NEXT: li r3, 1
-; CHECK-LE-NEXT: andc r3, r3, r5
+; CHECK-LE-NEXT: clrldi r4, r5, 32
+; CHECK-LE-NEXT: andc r3, r3, r4
; CHECK-LE-NEXT: sldi r3, r3, 3
; CHECK-LE-NEXT: lvsl v3, 0, r3
; CHECK-LE-NEXT: vperm v2, v2, v2, v3
;
; CHECK-AIX-LABEL: getvelul:
; CHECK-AIX: # %bb.0: # %entry
+; CHECK-AIX-NEXT: clrldi 3, 3, 32
; CHECK-AIX-NEXT: andi. 3, 3, 1
; CHECK-AIX-NEXT: sldi 3, 3, 3
; CHECK-AIX-NEXT: lvsl 3, 0, 3
define float @getvelf(<4 x float> %vf, i32 signext %i) {
; CHECK-LABEL: getvelf:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: sldi r3, r5, 2
+; CHECK-NEXT: rldic r3, r5, 2, 30
; CHECK-NEXT: lvsl v3, 0, r3
; CHECK-NEXT: vperm v2, v2, v2, v3
; CHECK-NEXT: xscvspdpn f1, v2
;
; CHECK-LE-LABEL: getvelf:
; CHECK-LE: # %bb.0: # %entry
-; CHECK-LE-NEXT: xori r3, r5, 3
+; CHECK-LE-NEXT: clrldi r3, r5, 32
+; CHECK-LE-NEXT: xori r3, r3, 3
; CHECK-LE-NEXT: sldi r3, r3, 2
; CHECK-LE-NEXT: lvsl v3, 0, r3
; CHECK-LE-NEXT: vperm v2, v2, v2, v3
;
; CHECK-AIX-LABEL: getvelf:
; CHECK-AIX: # %bb.0: # %entry
-; CHECK-AIX-NEXT: sldi 3, 3, 2
+; CHECK-AIX-NEXT: rldic 3, 3, 2, 30
; CHECK-AIX-NEXT: lvsl 3, 0, 3
; CHECK-AIX-NEXT: vperm 2, 2, 2, 3
; CHECK-AIX-NEXT: xscvspdpn 1, 34
define double @getveld(<2 x double> %vd, i32 signext %i) {
; CHECK-LABEL: getveld:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: andi. r3, r5, 1
+; CHECK-NEXT: clrldi r3, r5, 32
+; CHECK-NEXT: andi. r3, r3, 1
; CHECK-NEXT: sldi r3, r3, 3
; CHECK-NEXT: lvsl v3, 0, r3
; CHECK-NEXT: vperm v2, v2, v2, v3
; CHECK-LE-LABEL: getveld:
; CHECK-LE: # %bb.0: # %entry
; CHECK-LE-NEXT: li r3, 1
-; CHECK-LE-NEXT: andc r3, r3, r5
+; CHECK-LE-NEXT: clrldi r4, r5, 32
+; CHECK-LE-NEXT: andc r3, r3, r4
; CHECK-LE-NEXT: sldi r3, r3, 3
; CHECK-LE-NEXT: lvsl v3, 0, r3
; CHECK-LE-NEXT: vperm v2, v2, v2, v3
;
; CHECK-AIX-LABEL: getveld:
; CHECK-AIX: # %bb.0: # %entry
+; CHECK-AIX-NEXT: clrldi 3, 3, 32
; CHECK-AIX-NEXT: andi. 3, 3, 1
; CHECK-AIX-NEXT: sldi 3, 3, 3
; CHECK-AIX-NEXT: lvsl 3, 0, 3
; RUN: --check-prefix=CHECK-P7
; Function Attrs: norecurse nounwind readnone
-define signext i32 @geti(<4 x i32> %a, i32 signext %b) {
+define zeroext i32 @geti(<4 x i32> %a, i32 zeroext %b) {
; CHECK-LABEL: geti:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: li 3, 2
; CHECK-NEXT: sldi 3, 3, 5
; CHECK-NEXT: mfvsrd 4, 34
; CHECK-NEXT: srd 3, 4, 3
-; CHECK-NEXT: extsw 3, 3
+; CHECK-NEXT: clrldi 3, 3, 32
; CHECK-NEXT: blr
;
; CHECK-BE-LABEL: geti:
; CHECK-BE-NEXT: vperm 2, 2, 2, 3
; CHECK-BE-NEXT: mfvsrd 4, 34
; CHECK-BE-NEXT: srd 3, 4, 3
-; CHECK-BE-NEXT: extsw 3, 3
+; CHECK-BE-NEXT: clrldi 3, 3, 32
; CHECK-BE-NEXT: blr
;
; CHECK-P7-LABEL: geti:
; CHECK-P7-NEXT: addi 3, 1, -16
; CHECK-P7-NEXT: rlwinm 4, 5, 2, 28, 29
; CHECK-P7-NEXT: stxvw4x 34, 0, 3
-; CHECK-P7-NEXT: lwax 3, 3, 4
+; CHECK-P7-NEXT: lwzx 3, 3, 4
; CHECK-P7-NEXT: blr
entry:
%vecext = extractelement <4 x i32> %a, i32 %b
}
; Function Attrs: norecurse nounwind readnone
-define i64 @getl(<2 x i64> %a, i32 signext %b) {
+define i64 @getl(<2 x i64> %a, i32 zeroext %b) {
; CHECK-LABEL: getl:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: li 3, 1
}
; Function Attrs: norecurse nounwind readnone
-define float @getf(<4 x float> %a, i32 signext %b) {
+define float @getf(<4 x float> %a, i32 zeroext %b) {
; CHECK-LABEL: getf:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xori 3, 5, 3
}
; Function Attrs: norecurse nounwind readnone
-define double @getd(<2 x double> %a, i32 signext %b) {
+define double @getd(<2 x double> %a, i32 zeroext %b) {
; CHECK-LABEL: getd:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: li 3, 1
define zeroext i8 @test1(<16 x i8> %a, i32 signext %index) {
; CHECK-LE-LABEL: test1:
; CHECK-LE: # %bb.0: # %entry
-; CHECK-LE-NEXT: vextubrx 3, 5, 2
+; CHECK-LE-NEXT: clrldi 3, 5, 32
+; CHECK-LE-NEXT: vextubrx 3, 3, 2
; CHECK-LE-NEXT: clrldi 3, 3, 56
; CHECK-LE-NEXT: blr
;
; CHECK-BE-LABEL: test1:
; CHECK-BE: # %bb.0: # %entry
-; CHECK-BE-NEXT: vextublx 3, 5, 2
+; CHECK-BE-NEXT: clrldi 3, 5, 32
+; CHECK-BE-NEXT: vextublx 3, 3, 2
; CHECK-BE-NEXT: clrldi 3, 3, 56
; CHECK-BE-NEXT: blr
define signext i8 @test2(<16 x i8> %a, i32 signext %index) {
; CHECK-LE-LABEL: test2:
; CHECK-LE: # %bb.0: # %entry
-; CHECK-LE-NEXT: vextubrx 3, 5, 2
+; CHECK-LE-NEXT: clrldi 3, 5, 32
+; CHECK-LE-NEXT: vextubrx 3, 3, 2
; CHECK-LE-NEXT: extsb 3, 3
; CHECK-LE-NEXT: blr
;
; CHECK-BE-LABEL: test2:
; CHECK-BE: # %bb.0: # %entry
-; CHECK-BE-NEXT: vextublx 3, 5, 2
+; CHECK-BE-NEXT: clrldi 3, 5, 32
+; CHECK-BE-NEXT: vextublx 3, 3, 2
; CHECK-BE-NEXT: extsb 3, 3
; CHECK-BE-NEXT: blr
define zeroext i16 @test3(<8 x i16> %a, i32 signext %index) {
; CHECK-LE-LABEL: test3:
; CHECK-LE: # %bb.0: # %entry
-; CHECK-LE-NEXT: rlwinm 3, 5, 1, 28, 30
+; CHECK-LE-NEXT: clrldi 3, 5, 32
+; CHECK-LE-NEXT: rlwinm 3, 3, 1, 28, 30
; CHECK-LE-NEXT: vextuhrx 3, 3, 2
; CHECK-LE-NEXT: clrldi 3, 3, 48
; CHECK-LE-NEXT: blr
;
; CHECK-BE-LABEL: test3:
; CHECK-BE: # %bb.0: # %entry
-; CHECK-BE-NEXT: rlwinm 3, 5, 1, 28, 30
+; CHECK-BE-NEXT: clrldi 3, 5, 32
+; CHECK-BE-NEXT: rlwinm 3, 3, 1, 28, 30
; CHECK-BE-NEXT: vextuhlx 3, 3, 2
; CHECK-BE-NEXT: clrldi 3, 3, 48
; CHECK-BE-NEXT: blr
define signext i16 @test4(<8 x i16> %a, i32 signext %index) {
; CHECK-LE-LABEL: test4:
; CHECK-LE: # %bb.0: # %entry
-; CHECK-LE-NEXT: rlwinm 3, 5, 1, 28, 30
+; CHECK-LE-NEXT: clrldi 3, 5, 32
+; CHECK-LE-NEXT: rlwinm 3, 3, 1, 28, 30
; CHECK-LE-NEXT: vextuhrx 3, 3, 2
; CHECK-LE-NEXT: extsh 3, 3
; CHECK-LE-NEXT: blr
;
; CHECK-BE-LABEL: test4:
; CHECK-BE: # %bb.0: # %entry
-; CHECK-BE-NEXT: rlwinm 3, 5, 1, 28, 30
+; CHECK-BE-NEXT: clrldi 3, 5, 32
+; CHECK-BE-NEXT: rlwinm 3, 3, 1, 28, 30
; CHECK-BE-NEXT: vextuhlx 3, 3, 2
; CHECK-BE-NEXT: extsh 3, 3
; CHECK-BE-NEXT: blr
define zeroext i32 @test5(<4 x i32> %a, i32 signext %index) {
; CHECK-LE-LABEL: test5:
; CHECK-LE: # %bb.0: # %entry
-; CHECK-LE-NEXT: rlwinm 3, 5, 2, 28, 29
+; CHECK-LE-NEXT: clrldi 3, 5, 32
+; CHECK-LE-NEXT: rlwinm 3, 3, 2, 28, 29
; CHECK-LE-NEXT: vextuwrx 3, 3, 2
; CHECK-LE-NEXT: blr
;
; CHECK-BE-LABEL: test5:
; CHECK-BE: # %bb.0: # %entry
-; CHECK-BE-NEXT: rlwinm 3, 5, 2, 28, 29
+; CHECK-BE-NEXT: clrldi 3, 5, 32
+; CHECK-BE-NEXT: rlwinm 3, 3, 2, 28, 29
; CHECK-BE-NEXT: vextuwlx 3, 3, 2
; CHECK-BE-NEXT: blr
define signext i32 @test6(<4 x i32> %a, i32 signext %index) {
; CHECK-LE-LABEL: test6:
; CHECK-LE: # %bb.0: # %entry
-; CHECK-LE-NEXT: rlwinm 3, 5, 2, 28, 29
+; CHECK-LE-NEXT: clrldi 3, 5, 32
+; CHECK-LE-NEXT: rlwinm 3, 3, 2, 28, 29
; CHECK-LE-NEXT: vextuwrx 3, 3, 2
; CHECK-LE-NEXT: extsw 3, 3
; CHECK-LE-NEXT: blr
;
; CHECK-BE-LABEL: test6:
; CHECK-BE: # %bb.0: # %entry
-; CHECK-BE-NEXT: rlwinm 3, 5, 2, 28, 29
+; CHECK-BE-NEXT: clrldi 3, 5, 32
+; CHECK-BE-NEXT: rlwinm 3, 3, 2, 28, 29
; CHECK-BE-NEXT: vextuwlx 3, 3, 2
; CHECK-BE-NEXT: extsw 3, 3
; CHECK-BE-NEXT: blr
define zeroext i8 @test_add1(<16 x i8> %a, i32 signext %index, i8 zeroext %c) {
; CHECK-LE-LABEL: test_add1:
; CHECK-LE: # %bb.0: # %entry
-; CHECK-LE-NEXT: vextubrx 3, 5, 2
+; CHECK-LE-NEXT: clrldi 3, 5, 32
+; CHECK-LE-NEXT: vextubrx 3, 3, 2
; CHECK-LE-NEXT: add 3, 3, 6
; CHECK-LE-NEXT: clrldi 3, 3, 56
; CHECK-LE-NEXT: blr
;
; CHECK-BE-LABEL: test_add1:
; CHECK-BE: # %bb.0: # %entry
-; CHECK-BE-NEXT: vextublx 3, 5, 2
+; CHECK-BE-NEXT: clrldi 3, 5, 32
+; CHECK-BE-NEXT: vextublx 3, 3, 2
; CHECK-BE-NEXT: add 3, 3, 6
; CHECK-BE-NEXT: clrldi 3, 3, 56
; CHECK-BE-NEXT: blr
define signext i8 @test_add2(<16 x i8> %a, i32 signext %index, i8 signext %c) {
; CHECK-LE-LABEL: test_add2:
; CHECK-LE: # %bb.0: # %entry
-; CHECK-LE-NEXT: vextubrx 3, 5, 2
+; CHECK-LE-NEXT: clrldi 3, 5, 32
+; CHECK-LE-NEXT: vextubrx 3, 3, 2
; CHECK-LE-NEXT: add 3, 3, 6
; CHECK-LE-NEXT: extsb 3, 3
; CHECK-LE-NEXT: blr
;
; CHECK-BE-LABEL: test_add2:
; CHECK-BE: # %bb.0: # %entry
-; CHECK-BE-NEXT: vextublx 3, 5, 2
+; CHECK-BE-NEXT: clrldi 3, 5, 32
+; CHECK-BE-NEXT: vextublx 3, 3, 2
; CHECK-BE-NEXT: add 3, 3, 6
; CHECK-BE-NEXT: extsb 3, 3
; CHECK-BE-NEXT: blr
define zeroext i16 @test_add3(<8 x i16> %a, i32 signext %index, i16 zeroext %c) {
; CHECK-LE-LABEL: test_add3:
; CHECK-LE: # %bb.0: # %entry
-; CHECK-LE-NEXT: rlwinm 3, 5, 1, 28, 30
+; CHECK-LE-NEXT: clrldi 3, 5, 32
+; CHECK-LE-NEXT: rlwinm 3, 3, 1, 28, 30
; CHECK-LE-NEXT: vextuhrx 3, 3, 2
; CHECK-LE-NEXT: add 3, 3, 6
; CHECK-LE-NEXT: clrldi 3, 3, 48
;
; CHECK-BE-LABEL: test_add3:
; CHECK-BE: # %bb.0: # %entry
-; CHECK-BE-NEXT: rlwinm 3, 5, 1, 28, 30
+; CHECK-BE-NEXT: clrldi 3, 5, 32
+; CHECK-BE-NEXT: rlwinm 3, 3, 1, 28, 30
; CHECK-BE-NEXT: vextuhlx 3, 3, 2
; CHECK-BE-NEXT: add 3, 3, 6
; CHECK-BE-NEXT: clrldi 3, 3, 48
define signext i16 @test_add4(<8 x i16> %a, i32 signext %index, i16 signext %c) {
; CHECK-LE-LABEL: test_add4:
; CHECK-LE: # %bb.0: # %entry
-; CHECK-LE-NEXT: rlwinm 3, 5, 1, 28, 30
+; CHECK-LE-NEXT: clrldi 3, 5, 32
+; CHECK-LE-NEXT: rlwinm 3, 3, 1, 28, 30
; CHECK-LE-NEXT: vextuhrx 3, 3, 2
; CHECK-LE-NEXT: add 3, 3, 6
; CHECK-LE-NEXT: extsh 3, 3
;
; CHECK-BE-LABEL: test_add4:
; CHECK-BE: # %bb.0: # %entry
-; CHECK-BE-NEXT: rlwinm 3, 5, 1, 28, 30
+; CHECK-BE-NEXT: clrldi 3, 5, 32
+; CHECK-BE-NEXT: rlwinm 3, 3, 1, 28, 30
; CHECK-BE-NEXT: vextuhlx 3, 3, 2
; CHECK-BE-NEXT: add 3, 3, 6
; CHECK-BE-NEXT: extsh 3, 3
define zeroext i32 @test_add5(<4 x i32> %a, i32 signext %index, i32 zeroext %c) {
; CHECK-LE-LABEL: test_add5:
; CHECK-LE: # %bb.0: # %entry
-; CHECK-LE-NEXT: rlwinm 3, 5, 2, 28, 29
+; CHECK-LE-NEXT: clrldi 3, 5, 32
+; CHECK-LE-NEXT: rlwinm 3, 3, 2, 28, 29
; CHECK-LE-NEXT: vextuwrx 3, 3, 2
; CHECK-LE-NEXT: add 3, 3, 6
; CHECK-LE-NEXT: clrldi 3, 3, 32
;
; CHECK-BE-LABEL: test_add5:
; CHECK-BE: # %bb.0: # %entry
-; CHECK-BE-NEXT: rlwinm 3, 5, 2, 28, 29
+; CHECK-BE-NEXT: clrldi 3, 5, 32
+; CHECK-BE-NEXT: rlwinm 3, 3, 2, 28, 29
; CHECK-BE-NEXT: vextuwlx 3, 3, 2
; CHECK-BE-NEXT: add 3, 3, 6
; CHECK-BE-NEXT: clrldi 3, 3, 32
define signext i32 @test_add6(<4 x i32> %a, i32 signext %index, i32 signext %c) {
; CHECK-LE-LABEL: test_add6:
; CHECK-LE: # %bb.0: # %entry
-; CHECK-LE-NEXT: rlwinm 3, 5, 2, 28, 29
+; CHECK-LE-NEXT: clrldi 3, 5, 32
+; CHECK-LE-NEXT: rlwinm 3, 3, 2, 28, 29
; CHECK-LE-NEXT: vextuwrx 3, 3, 2
; CHECK-LE-NEXT: add 3, 3, 6
; CHECK-LE-NEXT: extsw 3, 3
;
; CHECK-BE-LABEL: test_add6:
; CHECK-BE: # %bb.0: # %entry
-; CHECK-BE-NEXT: rlwinm 3, 5, 2, 28, 29
+; CHECK-BE-NEXT: clrldi 3, 5, 32
+; CHECK-BE-NEXT: rlwinm 3, 3, 2, 28, 29
; CHECK-BE-NEXT: vextuwlx 3, 3, 2
; CHECK-BE-NEXT: add 3, 3, 6
; CHECK-BE-NEXT: extsw 3, 3
; CHECK-LABEL: testFloat1:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xscvdpspn v3, f1
-; CHECK-NEXT: extsw r3, r6
-; CHECK-NEXT: slwi r3, r3, 2
+; CHECK-NEXT: slwi r3, r6, 2
; CHECK-NEXT: vinswvrx v2, r3, v3
; CHECK-NEXT: blr
;
; CHECK-BE-LABEL: testFloat1:
; CHECK-BE: # %bb.0: # %entry
; CHECK-BE-NEXT: xscvdpspn v3, f1
-; CHECK-BE-NEXT: extsw r3, r6
-; CHECK-BE-NEXT: slwi r3, r3, 2
+; CHECK-BE-NEXT: slwi r3, r6, 2
; CHECK-BE-NEXT: vinswvlx v2, r3, v3
; CHECK-BE-NEXT: blr
;
; CHECK-LABEL: testFloat2:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: lwz r3, 0(r5)
-; CHECK-NEXT: extsw r4, r6
-; CHECK-NEXT: slwi r4, r4, 2
+; CHECK-NEXT: slwi r4, r6, 2
; CHECK-NEXT: vinswrx v2, r4, r3
; CHECK-NEXT: lwz r3, 1(r5)
-; CHECK-NEXT: extsw r4, r7
-; CHECK-NEXT: slwi r4, r4, 2
+; CHECK-NEXT: slwi r4, r7, 2
; CHECK-NEXT: vinswrx v2, r4, r3
; CHECK-NEXT: blr
;
; CHECK-BE-LABEL: testFloat2:
; CHECK-BE: # %bb.0: # %entry
; CHECK-BE-NEXT: lwz r3, 0(r5)
-; CHECK-BE-NEXT: extsw r4, r6
-; CHECK-BE-NEXT: slwi r4, r4, 2
+; CHECK-BE-NEXT: slwi r4, r6, 2
; CHECK-BE-NEXT: vinswlx v2, r4, r3
; CHECK-BE-NEXT: lwz r3, 1(r5)
-; CHECK-BE-NEXT: extsw r4, r7
-; CHECK-BE-NEXT: slwi r4, r4, 2
+; CHECK-BE-NEXT: slwi r4, r7, 2
; CHECK-BE-NEXT: vinswlx v2, r4, r3
; CHECK-BE-NEXT: blr
;
; CHECK-P9-LABEL: testFloat2:
; CHECK-P9: # %bb.0: # %entry
-; CHECK-P9-NEXT: lwz r3, 0(r5)
; CHECK-P9-NEXT: rlwinm r4, r6, 2, 28, 29
-; CHECK-P9-NEXT: addi r6, r1, -16
+; CHECK-P9-NEXT: lwz r6, 0(r5)
+; CHECK-P9-NEXT: rlwinm r3, r7, 2, 28, 29
+; CHECK-P9-NEXT: addi r7, r1, -16
; CHECK-P9-NEXT: stxv v2, -16(r1)
-; CHECK-P9-NEXT: stwx r3, r6, r4
-; CHECK-P9-NEXT: rlwinm r4, r7, 2, 28, 29
+; CHECK-P9-NEXT: stwx r6, r7, r4
; CHECK-P9-NEXT: lxv vs0, -16(r1)
-; CHECK-P9-NEXT: lwz r3, 1(r5)
+; CHECK-P9-NEXT: lwz r4, 1(r5)
; CHECK-P9-NEXT: addi r5, r1, -32
; CHECK-P9-NEXT: stxv vs0, -32(r1)
-; CHECK-P9-NEXT: stwx r3, r5, r4
+; CHECK-P9-NEXT: stwx r4, r5, r3
; CHECK-P9-NEXT: lxv v2, -32(r1)
; CHECK-P9-NEXT: blr
;
-; AIX-P8-64-LABEL: testFloat2:
-; AIX-P8-64: # %bb.0: # %entry
-; AIX-P8-64-NEXT: lwz r7, 0(r3)
-; AIX-P8-64-NEXT: addi r6, r1, -32
-; AIX-P8-64-NEXT: rlwinm r4, r4, 2, 28, 29
-; AIX-P8-64-NEXT: rlwinm r5, r5, 2, 28, 29
-; AIX-P8-64-NEXT: stxvw4x v2, 0, r6
-; AIX-P8-64-NEXT: stwx r7, r6, r4
-; AIX-P8-64-NEXT: addi r4, r1, -16
-; AIX-P8-64-NEXT: lxvw4x vs0, 0, r6
-; AIX-P8-64-NEXT: lwz r3, 1(r3)
-; AIX-P8-64-NEXT: stxvw4x vs0, 0, r4
-; AIX-P8-64-NEXT: stwx r3, r4, r5
-; AIX-P8-64-NEXT: lxvw4x v2, 0, r4
-; AIX-P8-64-NEXT: blr
-;
-; AIX-P8-32-LABEL: testFloat2:
-; AIX-P8-32: # %bb.0: # %entry
-; AIX-P8-32-NEXT: lwz r7, 0(r3)
-; AIX-P8-32-NEXT: addi r6, r1, -32
-; AIX-P8-32-NEXT: rlwinm r4, r4, 2, 28, 29
-; AIX-P8-32-NEXT: stxvw4x v2, 0, r6
-; AIX-P8-32-NEXT: stwx r7, r6, r4
-; AIX-P8-32-NEXT: rlwinm r4, r5, 2, 28, 29
-; AIX-P8-32-NEXT: addi r5, r1, -16
-; AIX-P8-32-NEXT: lxvw4x vs0, 0, r6
-; AIX-P8-32-NEXT: lwz r3, 1(r3)
-; AIX-P8-32-NEXT: stxvw4x vs0, 0, r5
-; AIX-P8-32-NEXT: stwx r3, r5, r4
-; AIX-P8-32-NEXT: lxvw4x v2, 0, r5
-; AIX-P8-32-NEXT: blr
+; AIX-P8-LABEL: testFloat2:
+; AIX-P8: # %bb.0: # %entry
+; AIX-P8-NEXT: lwz r7, 0(r3)
+; AIX-P8-NEXT: addi r6, r1, -32
+; AIX-P8-NEXT: rlwinm r4, r4, 2, 28, 29
+; AIX-P8-NEXT: stxvw4x v2, 0, r6
+; AIX-P8-NEXT: stwx r7, r6, r4
+; AIX-P8-NEXT: rlwinm r4, r5, 2, 28, 29
+; AIX-P8-NEXT: addi r5, r1, -16
+; AIX-P8-NEXT: lxvw4x vs0, 0, r6
+; AIX-P8-NEXT: lwz r3, 1(r3)
+; AIX-P8-NEXT: stxvw4x vs0, 0, r5
+; AIX-P8-NEXT: stwx r3, r5, r4
+; AIX-P8-NEXT: lxvw4x v2, 0, r5
+; AIX-P8-NEXT: blr
entry:
%add.ptr1 = getelementptr inbounds i8, ptr %b, i64 1
%0 = load float, ptr %b, align 4
; CHECK-LABEL: testFloat3:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: plwz r3, 65536(r5), 0
-; CHECK-NEXT: extsw r4, r6
-; CHECK-NEXT: slwi r4, r4, 2
+; CHECK-NEXT: slwi r4, r6, 2
; CHECK-NEXT: vinswrx v2, r4, r3
; CHECK-NEXT: li r3, 1
-; CHECK-NEXT: extsw r4, r7
+; CHECK-NEXT: slwi r4, r7, 2
; CHECK-NEXT: rldic r3, r3, 36, 27
-; CHECK-NEXT: slwi r4, r4, 2
; CHECK-NEXT: lwzx r3, r5, r3
; CHECK-NEXT: vinswrx v2, r4, r3
; CHECK-NEXT: blr
; CHECK-BE-LABEL: testFloat3:
; CHECK-BE: # %bb.0: # %entry
; CHECK-BE-NEXT: plwz r3, 65536(r5), 0
-; CHECK-BE-NEXT: extsw r4, r6
-; CHECK-BE-NEXT: slwi r4, r4, 2
+; CHECK-BE-NEXT: slwi r4, r6, 2
; CHECK-BE-NEXT: vinswlx v2, r4, r3
; CHECK-BE-NEXT: li r3, 1
-; CHECK-BE-NEXT: extsw r4, r7
+; CHECK-BE-NEXT: slwi r4, r7, 2
; CHECK-BE-NEXT: rldic r3, r3, 36, 27
-; CHECK-BE-NEXT: slwi r4, r4, 2
; CHECK-BE-NEXT: lwzx r3, r5, r3
; CHECK-BE-NEXT: vinswlx v2, r4, r3
; CHECK-BE-NEXT: blr
;
; CHECK-P9-LABEL: testFloat3:
; CHECK-P9: # %bb.0: # %entry
-; CHECK-P9-NEXT: lis r3, 1
; CHECK-P9-NEXT: rlwinm r4, r6, 2, 28, 29
-; CHECK-P9-NEXT: addi r6, r1, -16
-; CHECK-P9-NEXT: lwzx r3, r5, r3
+; CHECK-P9-NEXT: lis r6, 1
+; CHECK-P9-NEXT: rlwinm r3, r7, 2, 28, 29
+; CHECK-P9-NEXT: addi r7, r1, -16
+; CHECK-P9-NEXT: lwzx r6, r5, r6
; CHECK-P9-NEXT: stxv v2, -16(r1)
-; CHECK-P9-NEXT: stwx r3, r6, r4
-; CHECK-P9-NEXT: li r3, 1
-; CHECK-P9-NEXT: rlwinm r4, r7, 2, 28, 29
+; CHECK-P9-NEXT: stwx r6, r7, r4
+; CHECK-P9-NEXT: li r4, 1
; CHECK-P9-NEXT: lxv vs0, -16(r1)
-; CHECK-P9-NEXT: rldic r3, r3, 36, 27
-; CHECK-P9-NEXT: lwzx r3, r5, r3
+; CHECK-P9-NEXT: rldic r4, r4, 36, 27
+; CHECK-P9-NEXT: lwzx r4, r5, r4
; CHECK-P9-NEXT: addi r5, r1, -32
; CHECK-P9-NEXT: stxv vs0, -32(r1)
-; CHECK-P9-NEXT: stwx r3, r5, r4
+; CHECK-P9-NEXT: stwx r4, r5, r3
; CHECK-P9-NEXT: lxv v2, -32(r1)
; CHECK-P9-NEXT: blr
;
; AIX-P8-64-LABEL: testFloat3:
; AIX-P8-64: # %bb.0: # %entry
; AIX-P8-64-NEXT: lis r6, 1
-; AIX-P8-64-NEXT: addi r7, r1, -32
; AIX-P8-64-NEXT: rlwinm r4, r4, 2, 28, 29
-; AIX-P8-64-NEXT: rlwinm r5, r5, 2, 28, 29
+; AIX-P8-64-NEXT: addi r7, r1, -32
; AIX-P8-64-NEXT: lwzx r6, r3, r6
; AIX-P8-64-NEXT: stxvw4x v2, 0, r7
; AIX-P8-64-NEXT: stwx r6, r7, r4
; AIX-P8-64-NEXT: lxvw4x vs0, 0, r7
; AIX-P8-64-NEXT: rldic r4, r4, 36, 27
; AIX-P8-64-NEXT: lwzx r3, r3, r4
-; AIX-P8-64-NEXT: addi r4, r1, -16
-; AIX-P8-64-NEXT: stxvw4x vs0, 0, r4
-; AIX-P8-64-NEXT: stwx r3, r4, r5
-; AIX-P8-64-NEXT: lxvw4x v2, 0, r4
+; AIX-P8-64-NEXT: rlwinm r4, r5, 2, 28, 29
+; AIX-P8-64-NEXT: addi r5, r1, -16
+; AIX-P8-64-NEXT: stxvw4x vs0, 0, r5
+; AIX-P8-64-NEXT: stwx r3, r5, r4
+; AIX-P8-64-NEXT: lxvw4x v2, 0, r5
; AIX-P8-64-NEXT: blr
;
; AIX-P8-32-LABEL: testFloat3:
define <2 x double> @testDouble1(<2 x double> %a, double %b, i32 zeroext %idx1) {
; CHECK-LABEL: testDouble1:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: extsw r4, r6
; CHECK-NEXT: mffprd r3, f1
-; CHECK-NEXT: rlwinm r4, r4, 3, 0, 28
+; CHECK-NEXT: rlwinm r4, r6, 3, 0, 28
; CHECK-NEXT: vinsdrx v2, r4, r3
; CHECK-NEXT: blr
;
; CHECK-BE-LABEL: testDouble1:
; CHECK-BE: # %bb.0: # %entry
-; CHECK-BE-NEXT: extsw r4, r6
; CHECK-BE-NEXT: mffprd r3, f1
-; CHECK-BE-NEXT: rlwinm r4, r4, 3, 0, 28
+; CHECK-BE-NEXT: rlwinm r4, r6, 3, 0, 28
; CHECK-BE-NEXT: vinsdlx v2, r4, r3
; CHECK-BE-NEXT: blr
;
; CHECK-LABEL: testDouble2:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: ld r3, 0(r5)
-; CHECK-NEXT: extsw r4, r6
-; CHECK-NEXT: rlwinm r4, r4, 3, 0, 28
+; CHECK-NEXT: rlwinm r4, r6, 3, 0, 28
; CHECK-NEXT: vinsdrx v2, r4, r3
; CHECK-NEXT: pld r3, 1(r5), 0
-; CHECK-NEXT: extsw r4, r7
-; CHECK-NEXT: rlwinm r4, r4, 3, 0, 28
+; CHECK-NEXT: rlwinm r4, r7, 3, 0, 28
; CHECK-NEXT: vinsdrx v2, r4, r3
; CHECK-NEXT: blr
;
; CHECK-BE-LABEL: testDouble2:
; CHECK-BE: # %bb.0: # %entry
; CHECK-BE-NEXT: ld r3, 0(r5)
-; CHECK-BE-NEXT: extsw r4, r6
-; CHECK-BE-NEXT: rlwinm r4, r4, 3, 0, 28
+; CHECK-BE-NEXT: rlwinm r4, r6, 3, 0, 28
; CHECK-BE-NEXT: vinsdlx v2, r4, r3
; CHECK-BE-NEXT: pld r3, 1(r5), 0
-; CHECK-BE-NEXT: extsw r4, r7
-; CHECK-BE-NEXT: rlwinm r4, r4, 3, 0, 28
+; CHECK-BE-NEXT: rlwinm r4, r7, 3, 0, 28
; CHECK-BE-NEXT: vinsdlx v2, r4, r3
; CHECK-BE-NEXT: blr
;
; CHECK-P9-LABEL: testDouble2:
; CHECK-P9: # %bb.0: # %entry
-; CHECK-P9-NEXT: ld r3, 0(r5)
; CHECK-P9-NEXT: rlwinm r4, r6, 3, 28, 28
-; CHECK-P9-NEXT: addi r6, r1, -32
+; CHECK-P9-NEXT: ld r6, 0(r5)
+; CHECK-P9-NEXT: rlwinm r3, r7, 3, 28, 28
+; CHECK-P9-NEXT: addi r7, r1, -32
; CHECK-P9-NEXT: stxv v2, -32(r1)
-; CHECK-P9-NEXT: stdx r3, r6, r4
-; CHECK-P9-NEXT: li r3, 1
-; CHECK-P9-NEXT: rlwinm r4, r7, 3, 28, 28
+; CHECK-P9-NEXT: stdx r6, r7, r4
+; CHECK-P9-NEXT: li r4, 1
; CHECK-P9-NEXT: lxv vs0, -32(r1)
-; CHECK-P9-NEXT: ldx r3, r5, r3
+; CHECK-P9-NEXT: ldx r4, r5, r4
; CHECK-P9-NEXT: addi r5, r1, -16
; CHECK-P9-NEXT: stxv vs0, -16(r1)
-; CHECK-P9-NEXT: stdx r3, r5, r4
+; CHECK-P9-NEXT: stdx r4, r5, r3
; CHECK-P9-NEXT: lxv v2, -16(r1)
; CHECK-P9-NEXT: blr
;
; AIX-P8-64-NEXT: ld r7, 0(r3)
; AIX-P8-64-NEXT: addi r6, r1, -32
; AIX-P8-64-NEXT: rlwinm r4, r4, 3, 28, 28
-; AIX-P8-64-NEXT: rlwinm r5, r5, 3, 28, 28
; AIX-P8-64-NEXT: stxvd2x v2, 0, r6
; AIX-P8-64-NEXT: stdx r7, r6, r4
; AIX-P8-64-NEXT: li r4, 1
; AIX-P8-64-NEXT: lxvd2x vs0, 0, r6
; AIX-P8-64-NEXT: ldx r3, r3, r4
-; AIX-P8-64-NEXT: addi r4, r1, -16
-; AIX-P8-64-NEXT: stxvd2x vs0, 0, r4
-; AIX-P8-64-NEXT: stdx r3, r4, r5
-; AIX-P8-64-NEXT: lxvd2x v2, 0, r4
+; AIX-P8-64-NEXT: rlwinm r4, r5, 3, 28, 28
+; AIX-P8-64-NEXT: addi r5, r1, -16
+; AIX-P8-64-NEXT: stxvd2x vs0, 0, r5
+; AIX-P8-64-NEXT: stdx r3, r5, r4
+; AIX-P8-64-NEXT: lxvd2x v2, 0, r5
; AIX-P8-64-NEXT: blr
;
; AIX-P8-32-LABEL: testDouble2:
; CHECK-LABEL: testDouble3:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: pld r3, 65536(r5), 0
-; CHECK-NEXT: extsw r4, r6
-; CHECK-NEXT: rlwinm r4, r4, 3, 0, 28
+; CHECK-NEXT: rlwinm r4, r6, 3, 0, 28
; CHECK-NEXT: vinsdrx v2, r4, r3
; CHECK-NEXT: li r3, 1
-; CHECK-NEXT: extsw r4, r7
+; CHECK-NEXT: rlwinm r4, r7, 3, 0, 28
; CHECK-NEXT: rldic r3, r3, 36, 27
-; CHECK-NEXT: rlwinm r4, r4, 3, 0, 28
; CHECK-NEXT: ldx r3, r5, r3
; CHECK-NEXT: vinsdrx v2, r4, r3
; CHECK-NEXT: blr
; CHECK-BE-LABEL: testDouble3:
; CHECK-BE: # %bb.0: # %entry
; CHECK-BE-NEXT: pld r3, 65536(r5), 0
-; CHECK-BE-NEXT: extsw r4, r6
-; CHECK-BE-NEXT: rlwinm r4, r4, 3, 0, 28
+; CHECK-BE-NEXT: rlwinm r4, r6, 3, 0, 28
; CHECK-BE-NEXT: vinsdlx v2, r4, r3
; CHECK-BE-NEXT: li r3, 1
-; CHECK-BE-NEXT: extsw r4, r7
+; CHECK-BE-NEXT: rlwinm r4, r7, 3, 0, 28
; CHECK-BE-NEXT: rldic r3, r3, 36, 27
-; CHECK-BE-NEXT: rlwinm r4, r4, 3, 0, 28
; CHECK-BE-NEXT: ldx r3, r5, r3
; CHECK-BE-NEXT: vinsdlx v2, r4, r3
; CHECK-BE-NEXT: blr
;
; CHECK-P9-LABEL: testDouble3:
; CHECK-P9: # %bb.0: # %entry
-; CHECK-P9-NEXT: lis r3, 1
; CHECK-P9-NEXT: rlwinm r4, r6, 3, 28, 28
-; CHECK-P9-NEXT: addi r6, r1, -32
-; CHECK-P9-NEXT: ldx r3, r5, r3
+; CHECK-P9-NEXT: lis r6, 1
+; CHECK-P9-NEXT: rlwinm r3, r7, 3, 28, 28
+; CHECK-P9-NEXT: addi r7, r1, -32
+; CHECK-P9-NEXT: ldx r6, r5, r6
; CHECK-P9-NEXT: stxv v2, -32(r1)
-; CHECK-P9-NEXT: stdx r3, r6, r4
-; CHECK-P9-NEXT: li r3, 1
-; CHECK-P9-NEXT: rlwinm r4, r7, 3, 28, 28
+; CHECK-P9-NEXT: stdx r6, r7, r4
+; CHECK-P9-NEXT: li r4, 1
; CHECK-P9-NEXT: lxv vs0, -32(r1)
-; CHECK-P9-NEXT: rldic r3, r3, 36, 27
-; CHECK-P9-NEXT: ldx r3, r5, r3
+; CHECK-P9-NEXT: rldic r4, r4, 36, 27
+; CHECK-P9-NEXT: ldx r4, r5, r4
; CHECK-P9-NEXT: addi r5, r1, -16
; CHECK-P9-NEXT: stxv vs0, -16(r1)
-; CHECK-P9-NEXT: stdx r3, r5, r4
+; CHECK-P9-NEXT: stdx r4, r5, r3
; CHECK-P9-NEXT: lxv v2, -16(r1)
; CHECK-P9-NEXT: blr
;
; AIX-P8-64-LABEL: testDouble3:
; AIX-P8-64: # %bb.0: # %entry
; AIX-P8-64-NEXT: lis r6, 1
-; AIX-P8-64-NEXT: addi r7, r1, -32
; AIX-P8-64-NEXT: rlwinm r4, r4, 3, 28, 28
+; AIX-P8-64-NEXT: addi r7, r1, -32
; AIX-P8-64-NEXT: li r8, 1
-; AIX-P8-64-NEXT: rlwinm r5, r5, 3, 28, 28
; AIX-P8-64-NEXT: ldx r6, r3, r6
; AIX-P8-64-NEXT: stxvd2x v2, 0, r7
; AIX-P8-64-NEXT: stdx r6, r7, r4
; AIX-P8-64-NEXT: rldic r4, r8, 36, 27
; AIX-P8-64-NEXT: lxvd2x vs0, 0, r7
; AIX-P8-64-NEXT: ldx r3, r3, r4
-; AIX-P8-64-NEXT: addi r4, r1, -16
-; AIX-P8-64-NEXT: stxvd2x vs0, 0, r4
-; AIX-P8-64-NEXT: stdx r3, r4, r5
-; AIX-P8-64-NEXT: lxvd2x v2, 0, r4
+; AIX-P8-64-NEXT: rlwinm r4, r5, 3, 28, 28
+; AIX-P8-64-NEXT: addi r5, r1, -16
+; AIX-P8-64-NEXT: stxvd2x vs0, 0, r5
+; AIX-P8-64-NEXT: stdx r3, r5, r4
+; AIX-P8-64-NEXT: lxvd2x v2, 0, r5
; AIX-P8-64-NEXT: blr
;
; AIX-P8-32-LABEL: testDouble3:
ret half %r
}
-define half @extractelt_nxv1f16_idx(<vscale x 1 x half> %v, i32 signext %idx) {
+define half @extractelt_nxv1f16_idx(<vscale x 1 x half> %v, i32 zeroext %idx) {
; CHECK-LABEL: extractelt_nxv1f16_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
ret half %r
}
-define half @extractelt_nxv2f16_idx(<vscale x 2 x half> %v, i32 signext %idx) {
+define half @extractelt_nxv2f16_idx(<vscale x 2 x half> %v, i32 zeroext %idx) {
; CHECK-LABEL: extractelt_nxv2f16_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
ret half %r
}
-define half @extractelt_nxv4f16_idx(<vscale x 4 x half> %v, i32 signext %idx) {
+define half @extractelt_nxv4f16_idx(<vscale x 4 x half> %v, i32 zeroext %idx) {
; CHECK-LABEL: extractelt_nxv4f16_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
ret half %r
}
-define half @extractelt_nxv8f16_idx(<vscale x 8 x half> %v, i32 signext %idx) {
+define half @extractelt_nxv8f16_idx(<vscale x 8 x half> %v, i32 zeroext %idx) {
; CHECK-LABEL: extractelt_nxv8f16_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 1, e16, m2, ta, ma
ret half %r
}
-define half @extractelt_nxv16f16_idx(<vscale x 16 x half> %v, i32 signext %idx) {
+define half @extractelt_nxv16f16_idx(<vscale x 16 x half> %v, i32 zeroext %idx) {
; CHECK-LABEL: extractelt_nxv16f16_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 1, e16, m4, ta, ma
ret half %r
}
-define half @extractelt_nxv32f16_idx(<vscale x 32 x half> %v, i32 signext %idx) {
+define half @extractelt_nxv32f16_idx(<vscale x 32 x half> %v, i32 zeroext %idx) {
; CHECK-LABEL: extractelt_nxv32f16_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 1, e16, m8, ta, ma
ret float %r
}
-define float @extractelt_nxv1f32_idx(<vscale x 1 x float> %v, i32 signext %idx) {
+define float @extractelt_nxv1f32_idx(<vscale x 1 x float> %v, i32 zeroext %idx) {
; CHECK-LABEL: extractelt_nxv1f32_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
ret float %r
}
-define float @extractelt_nxv2f32_idx(<vscale x 2 x float> %v, i32 signext %idx) {
+define float @extractelt_nxv2f32_idx(<vscale x 2 x float> %v, i32 zeroext %idx) {
; CHECK-LABEL: extractelt_nxv2f32_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
ret float %r
}
-define float @extractelt_nxv4f32_idx(<vscale x 4 x float> %v, i32 signext %idx) {
+define float @extractelt_nxv4f32_idx(<vscale x 4 x float> %v, i32 zeroext %idx) {
; CHECK-LABEL: extractelt_nxv4f32_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 1, e32, m2, ta, ma
ret float %r
}
-define float @extractelt_nxv8f32_idx(<vscale x 8 x float> %v, i32 signext %idx) {
+define float @extractelt_nxv8f32_idx(<vscale x 8 x float> %v, i32 zeroext %idx) {
; CHECK-LABEL: extractelt_nxv8f32_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 1, e32, m4, ta, ma
ret float %r
}
-define float @extractelt_nxv16f32_idx(<vscale x 16 x float> %v, i32 signext %idx) {
+define float @extractelt_nxv16f32_idx(<vscale x 16 x float> %v, i32 zeroext %idx) {
; CHECK-LABEL: extractelt_nxv16f32_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 1, e32, m8, ta, ma
ret double %r
}
-define double @extractelt_nxv1f64_idx(<vscale x 1 x double> %v, i32 signext %idx) {
+define double @extractelt_nxv1f64_idx(<vscale x 1 x double> %v, i32 zeroext %idx) {
; CHECK-LABEL: extractelt_nxv1f64_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma
ret double %r
}
-define double @extractelt_nxv2f64_idx(<vscale x 2 x double> %v, i32 signext %idx) {
+define double @extractelt_nxv2f64_idx(<vscale x 2 x double> %v, i32 zeroext %idx) {
; CHECK-LABEL: extractelt_nxv2f64_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 1, e64, m2, ta, ma
ret double %r
}
-define double @extractelt_nxv4f64_idx(<vscale x 4 x double> %v, i32 signext %idx) {
+define double @extractelt_nxv4f64_idx(<vscale x 4 x double> %v, i32 zeroext %idx) {
; CHECK-LABEL: extractelt_nxv4f64_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 1, e64, m4, ta, ma
ret double %r
}
-define double @extractelt_nxv8f64_idx(<vscale x 8 x double> %v, i32 signext %idx) {
+define double @extractelt_nxv8f64_idx(<vscale x 8 x double> %v, i32 zeroext %idx) {
; CHECK-LABEL: extractelt_nxv8f64_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 1, e64, m8, ta, ma
}
define double @extractelt_nxv16f64_neg1(<vscale x 16 x double> %v) {
-; CHECK-LABEL: extractelt_nxv16f64_neg1:
-; CHECK: # %bb.0:
-; CHECK-NEXT: addi sp, sp, -64
-; CHECK-NEXT: .cfi_def_cfa_offset 64
-; CHECK-NEXT: addi s0, sp, 64
-; CHECK-NEXT: .cfi_def_cfa s0, 0
-; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a0, a0, 4
-; CHECK-NEXT: sub sp, sp, a0
-; CHECK-NEXT: andi sp, sp, -64
-; CHECK-NEXT: addi a0, sp, 64
-; CHECK-NEXT: vs8r.v v8, (a0)
-; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: slli a2, a1, 3
-; CHECK-NEXT: add a2, a0, a2
-; CHECK-NEXT: vs8r.v v16, (a2)
-; CHECK-NEXT: slli a1, a1, 4
-; CHECK-NEXT: add a0, a1, a0
-; CHECK-NEXT: fld fa0, -8(a0)
-; CHECK-NEXT: addi sp, s0, -64
-; CHECK-NEXT: addi sp, sp, 64
-; CHECK-NEXT: ret
%r = extractelement <vscale x 16 x double> %v, i32 -1
ret double %r
}
ret double %r
}
-define double @extractelt_nxv16f64_idx(<vscale x 16 x double> %v, i32 signext %idx) {
+define double @extractelt_nxv16f64_idx(<vscale x 16 x double> %v, i32 zeroext %idx) {
; CHECK-LABEL: extractelt_nxv16f64_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: csrr a1, vlenb
ret i8 %r
}
-define signext i8 @extractelt_nxv1i8_idx(<vscale x 1 x i8> %v, i32 signext %idx) {
+define signext i8 @extractelt_nxv1i8_idx(<vscale x 1 x i8> %v, i32 zeroext %idx) {
; CHECK-LABEL: extractelt_nxv1i8_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma
ret i8 %r
}
-define signext i8 @extractelt_nxv2i8_idx(<vscale x 2 x i8> %v, i32 signext %idx) {
+define signext i8 @extractelt_nxv2i8_idx(<vscale x 2 x i8> %v, i32 zeroext %idx) {
; CHECK-LABEL: extractelt_nxv2i8_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
ret i8 %r
}
-define signext i8 @extractelt_nxv4i8_idx(<vscale x 4 x i8> %v, i32 signext %idx) {
+define signext i8 @extractelt_nxv4i8_idx(<vscale x 4 x i8> %v, i32 zeroext %idx) {
; CHECK-LABEL: extractelt_nxv4i8_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 1, e8, mf2, ta, ma
ret i8 %r
}
-define signext i8 @extractelt_nxv8i8_idx(<vscale x 8 x i8> %v, i32 signext %idx) {
+define signext i8 @extractelt_nxv8i8_idx(<vscale x 8 x i8> %v, i32 zeroext %idx) {
; CHECK-LABEL: extractelt_nxv8i8_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
ret i8 %r
}
-define signext i8 @extractelt_nxv16i8_idx(<vscale x 16 x i8> %v, i32 signext %idx) {
+define signext i8 @extractelt_nxv16i8_idx(<vscale x 16 x i8> %v, i32 zeroext %idx) {
; CHECK-LABEL: extractelt_nxv16i8_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 1, e8, m2, ta, ma
ret i8 %r
}
-define signext i8 @extractelt_nxv32i8_idx(<vscale x 32 x i8> %v, i32 signext %idx) {
+define signext i8 @extractelt_nxv32i8_idx(<vscale x 32 x i8> %v, i32 zeroext %idx) {
; CHECK-LABEL: extractelt_nxv32i8_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 1, e8, m4, ta, ma
ret i8 %r
}
-define signext i8 @extractelt_nxv64i8_idx(<vscale x 64 x i8> %v, i32 signext %idx) {
+define signext i8 @extractelt_nxv64i8_idx(<vscale x 64 x i8> %v, i32 zeroext %idx) {
; CHECK-LABEL: extractelt_nxv64i8_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 1, e8, m8, ta, ma
ret i16 %r
}
-define signext i16 @extractelt_nxv1i16_idx(<vscale x 1 x i16> %v, i32 signext %idx) {
+define signext i16 @extractelt_nxv1i16_idx(<vscale x 1 x i16> %v, i32 zeroext %idx) {
; CHECK-LABEL: extractelt_nxv1i16_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
ret i16 %r
}
-define signext i16 @extractelt_nxv2i16_idx(<vscale x 2 x i16> %v, i32 signext %idx) {
+define signext i16 @extractelt_nxv2i16_idx(<vscale x 2 x i16> %v, i32 zeroext %idx) {
; CHECK-LABEL: extractelt_nxv2i16_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
ret i16 %r
}
-define signext i16 @extractelt_nxv4i16_idx(<vscale x 4 x i16> %v, i32 signext %idx) {
+define signext i16 @extractelt_nxv4i16_idx(<vscale x 4 x i16> %v, i32 zeroext %idx) {
; CHECK-LABEL: extractelt_nxv4i16_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
ret i16 %r
}
-define signext i16 @extractelt_nxv8i16_idx(<vscale x 8 x i16> %v, i32 signext %idx) {
+define signext i16 @extractelt_nxv8i16_idx(<vscale x 8 x i16> %v, i32 zeroext %idx) {
; CHECK-LABEL: extractelt_nxv8i16_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 1, e16, m2, ta, ma
ret i16 %r
}
-define signext i16 @extractelt_nxv16i16_idx(<vscale x 16 x i16> %v, i32 signext %idx) {
+define signext i16 @extractelt_nxv16i16_idx(<vscale x 16 x i16> %v, i32 zeroext %idx) {
; CHECK-LABEL: extractelt_nxv16i16_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 1, e16, m4, ta, ma
ret i16 %r
}
-define signext i16 @extractelt_nxv32i16_idx(<vscale x 32 x i16> %v, i32 signext %idx) {
+define signext i16 @extractelt_nxv32i16_idx(<vscale x 32 x i16> %v, i32 zeroext %idx) {
; CHECK-LABEL: extractelt_nxv32i16_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 1, e16, m8, ta, ma
ret i32 %r
}
-define signext i32 @extractelt_nxv1i32_idx(<vscale x 1 x i32> %v, i32 signext %idx) {
+define signext i32 @extractelt_nxv1i32_idx(<vscale x 1 x i32> %v, i32 zeroext %idx) {
; CHECK-LABEL: extractelt_nxv1i32_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
ret i32 %r
}
-define signext i32 @extractelt_nxv2i32_idx(<vscale x 2 x i32> %v, i32 signext %idx) {
+define signext i32 @extractelt_nxv2i32_idx(<vscale x 2 x i32> %v, i32 zeroext %idx) {
; CHECK-LABEL: extractelt_nxv2i32_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
ret i32 %r
}
-define signext i32 @extractelt_nxv4i32_idx(<vscale x 4 x i32> %v, i32 signext %idx) {
+define signext i32 @extractelt_nxv4i32_idx(<vscale x 4 x i32> %v, i32 zeroext %idx) {
; CHECK-LABEL: extractelt_nxv4i32_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 1, e32, m2, ta, ma
ret i32 %r
}
-define signext i32 @extractelt_nxv8i32_idx(<vscale x 8 x i32> %v, i32 signext %idx) {
+define signext i32 @extractelt_nxv8i32_idx(<vscale x 8 x i32> %v, i32 zeroext %idx) {
; CHECK-LABEL: extractelt_nxv8i32_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 1, e32, m4, ta, ma
ret i32 %r
}
-define signext i32 @extractelt_nxv16i32_idx(<vscale x 16 x i32> %v, i32 signext %idx) {
+define signext i32 @extractelt_nxv16i32_idx(<vscale x 16 x i32> %v, i32 zeroext %idx) {
; CHECK-LABEL: extractelt_nxv16i32_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 1, e32, m8, ta, ma
ret i64 %r
}
-define i64 @extractelt_nxv1i64_idx(<vscale x 1 x i64> %v, i32 signext %idx) {
+define i64 @extractelt_nxv1i64_idx(<vscale x 1 x i64> %v, i32 zeroext %idx) {
; CHECK-LABEL: extractelt_nxv1i64_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma
ret i64 %r
}
-define i64 @extractelt_nxv2i64_idx(<vscale x 2 x i64> %v, i32 signext %idx) {
+define i64 @extractelt_nxv2i64_idx(<vscale x 2 x i64> %v, i32 zeroext %idx) {
; CHECK-LABEL: extractelt_nxv2i64_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 1, e64, m2, ta, ma
ret i64 %r
}
-define i64 @extractelt_nxv4i64_idx(<vscale x 4 x i64> %v, i32 signext %idx) {
+define i64 @extractelt_nxv4i64_idx(<vscale x 4 x i64> %v, i32 zeroext %idx) {
; CHECK-LABEL: extractelt_nxv4i64_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 1, e64, m4, ta, ma
ret i64 %r
}
-define i64 @extractelt_nxv8i64_idx(<vscale x 8 x i64> %v, i32 signext %idx) {
+define i64 @extractelt_nxv8i64_idx(<vscale x 8 x i64> %v, i32 zeroext %idx) {
; CHECK-LABEL: extractelt_nxv8i64_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 1, e64, m8, ta, ma
; CHECK-NEXT: andi sp, sp, -64
; CHECK-NEXT: addi a0, sp, 64
; CHECK-NEXT: vs8r.v v8, (a0)
-; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: slli a2, a1, 3
-; CHECK-NEXT: add a2, a0, a2
-; CHECK-NEXT: vs8r.v v16, (a2)
-; CHECK-NEXT: slli a1, a1, 4
-; CHECK-NEXT: add a0, a1, a0
-; CHECK-NEXT: ld a0, -8(a0)
+; CHECK-NEXT: csrr a2, vlenb
+; CHECK-NEXT: slli a1, a2, 3
+; CHECK-NEXT: add a3, a0, a1
+; CHECK-NEXT: li a1, -1
+; CHECK-NEXT: srli a1, a1, 32
+; CHECK-NEXT: slli a2, a2, 1
+; CHECK-NEXT: addi a2, a2, -1
+; CHECK-NEXT: vs8r.v v16, (a3)
+; CHECK-NEXT: bltu a2, a1, .LBB72_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: mv a2, a1
+; CHECK-NEXT: .LBB72_2:
+; CHECK-NEXT: slli a1, a2, 3
+; CHECK-NEXT: add a0, a0, a1
+; CHECK-NEXT: ld a0, 0(a0)
; CHECK-NEXT: addi sp, s0, -64
; CHECK-NEXT: addi sp, sp, 64
; CHECK-NEXT: ret
ret i64 %r
}
-define i64 @extractelt_nxv16i64_idx(<vscale x 16 x i64> %v, i32 signext %idx) {
+define i64 @extractelt_nxv16i64_idx(<vscale x 16 x i64> %v, i32 zeroext %idx) {
; CHECK-LABEL: extractelt_nxv16i64_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: csrr a1, vlenb
ret i64 %b
}
-define i8 @extractelt_v16i8_idx(<16 x i8>* %x, i32 signext %idx) nounwind {
+define i8 @extractelt_v16i8_idx(<16 x i8>* %x, i32 zeroext %idx) nounwind {
; CHECK-LABEL: extractelt_v16i8_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
ret i8 %b
}
-define i16 @extractelt_v8i16_idx(<8 x i16>* %x, i32 signext %idx) nounwind {
+define i16 @extractelt_v8i16_idx(<8 x i16>* %x, i32 zeroext %idx) nounwind {
; CHECK-LABEL: extractelt_v8i16_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
ret i16 %b
}
-define i32 @extractelt_v4i32_idx(<4 x i32>* %x, i32 signext %idx) nounwind {
+define i32 @extractelt_v4i32_idx(<4 x i32>* %x, i32 zeroext %idx) nounwind {
; CHECK-LABEL: extractelt_v4i32_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
ret i32 %c
}
-define i64 @extractelt_v2i64_idx(<2 x i64>* %x, i32 signext %idx) nounwind {
+define i64 @extractelt_v2i64_idx(<2 x i64>* %x, i32 zeroext %idx) nounwind {
; RV32-LABEL: extractelt_v2i64_idx:
; RV32: # %bb.0:
; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
ret i64 %c
}
-define half @extractelt_v8f16_idx(<8 x half>* %x, i32 signext %idx) nounwind {
+define half @extractelt_v8f16_idx(<8 x half>* %x, i32 zeroext %idx) nounwind {
; CHECK-LABEL: extractelt_v8f16_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
ret half %c
}
-define float @extractelt_v4f32_idx(<4 x float>* %x, i32 signext %idx) nounwind {
+define float @extractelt_v4f32_idx(<4 x float>* %x, i32 zeroext %idx) nounwind {
; CHECK-LABEL: extractelt_v4f32_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
ret float %c
}
-define double @extractelt_v2f64_idx(<2 x double>* %x, i32 signext %idx) nounwind {
+define double @extractelt_v2f64_idx(<2 x double>* %x, i32 zeroext %idx) nounwind {
; CHECK-LABEL: extractelt_v2f64_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
ret double %c
}
-define i8 @extractelt_v32i8_idx(<32 x i8>* %x, i32 signext %idx) nounwind {
+define i8 @extractelt_v32i8_idx(<32 x i8>* %x, i32 zeroext %idx) nounwind {
; CHECK-LABEL: extractelt_v32i8_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: li a2, 32
ret i8 %b
}
-define i16 @extractelt_v16i16_idx(<16 x i16>* %x, i32 signext %idx) nounwind {
+define i16 @extractelt_v16i16_idx(<16 x i16>* %x, i32 zeroext %idx) nounwind {
; CHECK-LABEL: extractelt_v16i16_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma
ret i16 %b
}
-define i32 @extractelt_v8i32_idx(<8 x i32>* %x, i32 signext %idx) nounwind {
+define i32 @extractelt_v8i32_idx(<8 x i32>* %x, i32 zeroext %idx) nounwind {
; CHECK-LABEL: extractelt_v8i32_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
ret i32 %c
}
-define i64 @extractelt_v4i64_idx(<4 x i64>* %x, i32 signext %idx) nounwind {
+define i64 @extractelt_v4i64_idx(<4 x i64>* %x, i32 zeroext %idx) nounwind {
; RV32-LABEL: extractelt_v4i64_idx:
; RV32: # %bb.0:
; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
ret i64 %c
}
-define half @extractelt_v16f16_idx(<16 x half>* %x, i32 signext %idx) nounwind {
+define half @extractelt_v16f16_idx(<16 x half>* %x, i32 zeroext %idx) nounwind {
; CHECK-LABEL: extractelt_v16f16_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma
ret half %c
}
-define float @extractelt_v8f32_idx(<8 x float>* %x, i32 signext %idx) nounwind {
+define float @extractelt_v8f32_idx(<8 x float>* %x, i32 zeroext %idx) nounwind {
; CHECK-LABEL: extractelt_v8f32_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
ret float %c
}
-define double @extractelt_v4f64_idx(<4 x double>* %x, i32 signext %idx) nounwind {
+define double @extractelt_v4f64_idx(<4 x double>* %x, i32 zeroext %idx) nounwind {
; CHECK-LABEL: extractelt_v4f64_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
}
; This uses a non-power of 2 type so that it isn't an MVT to catch an
-; incorrect use of getSimpleValueType_idx(, i32 signext %idx).
+; incorrect use of getSimpleValueType_idx(, i32 zeroext %idx).
; NOTE: Type legalization is bitcasting to vXi32 and doing 2 independent
; slidedowns and extracts.
-define i64 @extractelt_v3i64_idx(<3 x i64>* %x, i32 signext %idx) nounwind {
+define i64 @extractelt_v3i64_idx(<3 x i64>* %x, i32 zeroext %idx) nounwind {
; RV32-LABEL: extractelt_v3i64_idx:
; RV32: # %bb.0:
; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
}
define <1 x i1> @insertelt_idx_v1i1(<1 x i1> %x, i1 %elt, i32 zeroext %idx) nounwind {
-; RV32-LABEL: insertelt_idx_v1i1:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetivli zero, 1, e8, mf8, ta, ma
-; RV32-NEXT: vmv.s.x v8, a0
-; RV32-NEXT: vmv.v.i v9, 0
-; RV32-NEXT: vmerge.vim v9, v9, 1, v0
-; RV32-NEXT: addi a0, a1, 1
-; RV32-NEXT: vsetvli zero, a0, e8, mf8, tu, ma
-; RV32-NEXT: vslideup.vx v9, v8, a1
-; RV32-NEXT: vsetivli zero, 1, e8, mf8, ta, ma
-; RV32-NEXT: vand.vi v8, v9, 1
-; RV32-NEXT: vmsne.vi v0, v8, 0
-; RV32-NEXT: ret
-;
-; RV64-LABEL: insertelt_idx_v1i1:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 1, e8, mf8, ta, ma
-; RV64-NEXT: vmv.s.x v8, a0
-; RV64-NEXT: vmv.v.i v9, 0
-; RV64-NEXT: vmerge.vim v9, v9, 1, v0
-; RV64-NEXT: sext.w a0, a1
-; RV64-NEXT: addi a1, a0, 1
-; RV64-NEXT: vsetvli zero, a1, e8, mf8, tu, ma
-; RV64-NEXT: vslideup.vx v9, v8, a0
-; RV64-NEXT: vsetivli zero, 1, e8, mf8, ta, ma
-; RV64-NEXT: vand.vi v8, v9, 1
-; RV64-NEXT: vmsne.vi v0, v8, 0
-; RV64-NEXT: ret
+; CHECK-LABEL: insertelt_idx_v1i1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma
+; CHECK-NEXT: vmv.s.x v8, a0
+; CHECK-NEXT: vmv.v.i v9, 0
+; CHECK-NEXT: vmerge.vim v9, v9, 1, v0
+; CHECK-NEXT: addi a0, a1, 1
+; CHECK-NEXT: vsetvli zero, a0, e8, mf8, tu, ma
+; CHECK-NEXT: vslideup.vx v9, v8, a1
+; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma
+; CHECK-NEXT: vand.vi v8, v9, 1
+; CHECK-NEXT: vmsne.vi v0, v8, 0
+; CHECK-NEXT: ret
%y = insertelement <1 x i1> %x, i1 %elt, i32 %idx
ret <1 x i1> %y
}
}
define <2 x i1> @insertelt_idx_v2i1(<2 x i1> %x, i1 %elt, i32 zeroext %idx) nounwind {
-; RV32-LABEL: insertelt_idx_v2i1:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
-; RV32-NEXT: vmv.s.x v8, a0
-; RV32-NEXT: vmv.v.i v9, 0
-; RV32-NEXT: vmerge.vim v9, v9, 1, v0
-; RV32-NEXT: addi a0, a1, 1
-; RV32-NEXT: vsetvli zero, a0, e8, mf8, tu, ma
-; RV32-NEXT: vslideup.vx v9, v8, a1
-; RV32-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
-; RV32-NEXT: vand.vi v8, v9, 1
-; RV32-NEXT: vmsne.vi v0, v8, 0
-; RV32-NEXT: ret
-;
-; RV64-LABEL: insertelt_idx_v2i1:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
-; RV64-NEXT: vmv.s.x v8, a0
-; RV64-NEXT: vmv.v.i v9, 0
-; RV64-NEXT: vmerge.vim v9, v9, 1, v0
-; RV64-NEXT: sext.w a0, a1
-; RV64-NEXT: addi a1, a0, 1
-; RV64-NEXT: vsetvli zero, a1, e8, mf8, tu, ma
-; RV64-NEXT: vslideup.vx v9, v8, a0
-; RV64-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
-; RV64-NEXT: vand.vi v8, v9, 1
-; RV64-NEXT: vmsne.vi v0, v8, 0
-; RV64-NEXT: ret
+; CHECK-LABEL: insertelt_idx_v2i1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
+; CHECK-NEXT: vmv.s.x v8, a0
+; CHECK-NEXT: vmv.v.i v9, 0
+; CHECK-NEXT: vmerge.vim v9, v9, 1, v0
+; CHECK-NEXT: addi a0, a1, 1
+; CHECK-NEXT: vsetvli zero, a0, e8, mf8, tu, ma
+; CHECK-NEXT: vslideup.vx v9, v8, a1
+; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
+; CHECK-NEXT: vand.vi v8, v9, 1
+; CHECK-NEXT: vmsne.vi v0, v8, 0
+; CHECK-NEXT: ret
%y = insertelement <2 x i1> %x, i1 %elt, i32 %idx
ret <2 x i1> %y
}
}
define <8 x i1> @insertelt_idx_v8i1(<8 x i1> %x, i1 %elt, i32 zeroext %idx) nounwind {
-; RV32-LABEL: insertelt_idx_v8i1:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
-; RV32-NEXT: vmv.s.x v8, a0
-; RV32-NEXT: vmv.v.i v9, 0
-; RV32-NEXT: vmerge.vim v9, v9, 1, v0
-; RV32-NEXT: addi a0, a1, 1
-; RV32-NEXT: vsetvli zero, a0, e8, mf2, tu, ma
-; RV32-NEXT: vslideup.vx v9, v8, a1
-; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
-; RV32-NEXT: vand.vi v8, v9, 1
-; RV32-NEXT: vmsne.vi v0, v8, 0
-; RV32-NEXT: ret
-;
-; RV64-LABEL: insertelt_idx_v8i1:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
-; RV64-NEXT: vmv.s.x v8, a0
-; RV64-NEXT: vmv.v.i v9, 0
-; RV64-NEXT: vmerge.vim v9, v9, 1, v0
-; RV64-NEXT: sext.w a0, a1
-; RV64-NEXT: addi a1, a0, 1
-; RV64-NEXT: vsetvli zero, a1, e8, mf2, tu, ma
-; RV64-NEXT: vslideup.vx v9, v8, a0
-; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
-; RV64-NEXT: vand.vi v8, v9, 1
-; RV64-NEXT: vmsne.vi v0, v8, 0
-; RV64-NEXT: ret
+; CHECK-LABEL: insertelt_idx_v8i1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
+; CHECK-NEXT: vmv.s.x v8, a0
+; CHECK-NEXT: vmv.v.i v9, 0
+; CHECK-NEXT: vmerge.vim v9, v9, 1, v0
+; CHECK-NEXT: addi a0, a1, 1
+; CHECK-NEXT: vsetvli zero, a0, e8, mf2, tu, ma
+; CHECK-NEXT: vslideup.vx v9, v8, a1
+; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
+; CHECK-NEXT: vand.vi v8, v9, 1
+; CHECK-NEXT: vmsne.vi v0, v8, 0
+; CHECK-NEXT: ret
%y = insertelement <8 x i1> %x, i1 %elt, i32 %idx
ret <8 x i1> %y
}
}
define <64 x i1> @insertelt_idx_v64i1(<64 x i1> %x, i1 %elt, i32 zeroext %idx) nounwind {
-; RV32-LABEL: insertelt_idx_v64i1:
-; RV32: # %bb.0:
-; RV32-NEXT: li a2, 64
-; RV32-NEXT: vsetvli zero, a2, e8, m4, ta, ma
-; RV32-NEXT: vmv.s.x v8, a0
-; RV32-NEXT: vmv.v.i v12, 0
-; RV32-NEXT: vmerge.vim v12, v12, 1, v0
-; RV32-NEXT: addi a0, a1, 1
-; RV32-NEXT: vsetvli zero, a0, e8, m4, tu, ma
-; RV32-NEXT: vslideup.vx v12, v8, a1
-; RV32-NEXT: vsetvli zero, a2, e8, m4, ta, ma
-; RV32-NEXT: vand.vi v8, v12, 1
-; RV32-NEXT: vmsne.vi v0, v8, 0
-; RV32-NEXT: ret
-;
-; RV64-LABEL: insertelt_idx_v64i1:
-; RV64: # %bb.0:
-; RV64-NEXT: li a2, 64
-; RV64-NEXT: vsetvli zero, a2, e8, m4, ta, ma
-; RV64-NEXT: vmv.s.x v8, a0
-; RV64-NEXT: vmv.v.i v12, 0
-; RV64-NEXT: vmerge.vim v12, v12, 1, v0
-; RV64-NEXT: sext.w a0, a1
-; RV64-NEXT: addi a1, a0, 1
-; RV64-NEXT: vsetvli zero, a1, e8, m4, tu, ma
-; RV64-NEXT: vslideup.vx v12, v8, a0
-; RV64-NEXT: vsetvli zero, a2, e8, m4, ta, ma
-; RV64-NEXT: vand.vi v8, v12, 1
-; RV64-NEXT: vmsne.vi v0, v8, 0
-; RV64-NEXT: ret
+; CHECK-LABEL: insertelt_idx_v64i1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a2, 64
+; CHECK-NEXT: vsetvli zero, a2, e8, m4, ta, ma
+; CHECK-NEXT: vmv.s.x v8, a0
+; CHECK-NEXT: vmv.v.i v12, 0
+; CHECK-NEXT: vmerge.vim v12, v12, 1, v0
+; CHECK-NEXT: addi a0, a1, 1
+; CHECK-NEXT: vsetvli zero, a0, e8, m4, tu, ma
+; CHECK-NEXT: vslideup.vx v12, v8, a1
+; CHECK-NEXT: vsetvli zero, a2, e8, m4, ta, ma
+; CHECK-NEXT: vand.vi v8, v12, 1
+; CHECK-NEXT: vmsne.vi v0, v8, 0
+; CHECK-NEXT: ret
%y = insertelement <64 x i1> %x, i1 %elt, i32 %idx
ret <64 x i1> %y
}
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; RV32: {{.*}}
+; RV64: {{.*}}
; RV64-NEXT: vsetvli zero, a3, e16, m4, ta, ma
; RV64-NEXT: vle16.v v8, (a0)
; RV64-NEXT: vmv.s.x v12, a1
-; RV64-NEXT: sext.w a1, a2
+; RV64-NEXT: slli a1, a2, 32
+; RV64-NEXT: srli a1, a1, 32
; RV64-NEXT: addi a2, a1, 1
; RV64-NEXT: vsetvli zero, a2, e16, m4, tu, ma
; RV64-NEXT: vslideup.vx v8, v12, a1
; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; RV64-NEXT: vle32.v v8, (a0)
; RV64-NEXT: vfmv.s.f v10, fa0
-; RV64-NEXT: sext.w a1, a1
+; RV64-NEXT: slli a1, a1, 32
+; RV64-NEXT: srli a1, a1, 32
; RV64-NEXT: addi a2, a1, 1
; RV64-NEXT: vsetvli zero, a2, e32, m2, tu, ma
; RV64-NEXT: vslideup.vx v8, v10, a1
; RV64-NEXT: vle64.v v8, (a0)
; RV64-NEXT: li a2, -1
; RV64-NEXT: vmv.s.x v12, a2
-; RV64-NEXT: sext.w a1, a1
+; RV64-NEXT: slli a1, a1, 32
+; RV64-NEXT: srli a1, a1, 32
; RV64-NEXT: addi a2, a1, 1
; RV64-NEXT: vsetvli zero, a2, e64, m4, tu, ma
; RV64-NEXT: vslideup.vx v8, v12, a1
; RV64-NEXT: vle64.v v8, (a0)
; RV64-NEXT: li a2, 6
; RV64-NEXT: vmv.s.x v12, a2
-; RV64-NEXT: sext.w a1, a1
+; RV64-NEXT: slli a1, a1, 32
+; RV64-NEXT: srli a1, a1, 32
; RV64-NEXT: addi a2, a1, 1
; RV64-NEXT: vsetvli zero, a2, e64, m4, tu, ma
; RV64-NEXT: vslideup.vx v8, v12, a1
ret <vscale x 1 x half> %r
}
-define <vscale x 1 x half> @insertelt_nxv1f16_idx(<vscale x 1 x half> %v, half %elt, i32 signext %idx) {
+define <vscale x 1 x half> @insertelt_nxv1f16_idx(<vscale x 1 x half> %v, half %elt, i32 zeroext %idx) {
; CHECK-LABEL: insertelt_nxv1f16_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
ret <vscale x 2 x half> %r
}
-define <vscale x 2 x half> @insertelt_nxv2f16_idx(<vscale x 2 x half> %v, half %elt, i32 signext %idx) {
+define <vscale x 2 x half> @insertelt_nxv2f16_idx(<vscale x 2 x half> %v, half %elt, i32 zeroext %idx) {
; CHECK-LABEL: insertelt_nxv2f16_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
ret <vscale x 4 x half> %r
}
-define <vscale x 4 x half> @insertelt_nxv4f16_idx(<vscale x 4 x half> %v, half %elt, i32 signext %idx) {
+define <vscale x 4 x half> @insertelt_nxv4f16_idx(<vscale x 4 x half> %v, half %elt, i32 zeroext %idx) {
; CHECK-LABEL: insertelt_nxv4f16_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma
ret <vscale x 8 x half> %r
}
-define <vscale x 8 x half> @insertelt_nxv8f16_idx(<vscale x 8 x half> %v, half %elt, i32 signext %idx) {
+define <vscale x 8 x half> @insertelt_nxv8f16_idx(<vscale x 8 x half> %v, half %elt, i32 zeroext %idx) {
; CHECK-LABEL: insertelt_nxv8f16_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma
ret <vscale x 16 x half> %r
}
-define <vscale x 16 x half> @insertelt_nxv16f16_idx(<vscale x 16 x half> %v, half %elt, i32 signext %idx) {
+define <vscale x 16 x half> @insertelt_nxv16f16_idx(<vscale x 16 x half> %v, half %elt, i32 zeroext %idx) {
; CHECK-LABEL: insertelt_nxv16f16_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma
ret <vscale x 32 x half> %r
}
-define <vscale x 32 x half> @insertelt_nxv32f16_idx(<vscale x 32 x half> %v, half %elt, i32 signext %idx) {
+define <vscale x 32 x half> @insertelt_nxv32f16_idx(<vscale x 32 x half> %v, half %elt, i32 zeroext %idx) {
; CHECK-LABEL: insertelt_nxv32f16_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a1, zero, e16, m8, ta, ma
ret <vscale x 1 x float> %r
}
-define <vscale x 1 x float> @insertelt_nxv1f32_idx(<vscale x 1 x float> %v, float %elt, i32 signext %idx) {
+define <vscale x 1 x float> @insertelt_nxv1f32_idx(<vscale x 1 x float> %v, float %elt, i32 zeroext %idx) {
; CHECK-LABEL: insertelt_nxv1f32_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a1, zero, e32, mf2, ta, ma
ret <vscale x 2 x float> %r
}
-define <vscale x 2 x float> @insertelt_nxv2f32_idx(<vscale x 2 x float> %v, float %elt, i32 signext %idx) {
+define <vscale x 2 x float> @insertelt_nxv2f32_idx(<vscale x 2 x float> %v, float %elt, i32 zeroext %idx) {
; CHECK-LABEL: insertelt_nxv2f32_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma
ret <vscale x 4 x float> %r
}
-define <vscale x 4 x float> @insertelt_nxv4f32_idx(<vscale x 4 x float> %v, float %elt, i32 signext %idx) {
+define <vscale x 4 x float> @insertelt_nxv4f32_idx(<vscale x 4 x float> %v, float %elt, i32 zeroext %idx) {
; CHECK-LABEL: insertelt_nxv4f32_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a1, zero, e32, m2, ta, ma
ret <vscale x 8 x float> %r
}
-define <vscale x 8 x float> @insertelt_nxv8f32_idx(<vscale x 8 x float> %v, float %elt, i32 signext %idx) {
+define <vscale x 8 x float> @insertelt_nxv8f32_idx(<vscale x 8 x float> %v, float %elt, i32 zeroext %idx) {
; CHECK-LABEL: insertelt_nxv8f32_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a1, zero, e32, m4, ta, ma
ret <vscale x 16 x float> %r
}
-define <vscale x 16 x float> @insertelt_nxv16f32_idx(<vscale x 16 x float> %v, float %elt, i32 signext %idx) {
+define <vscale x 16 x float> @insertelt_nxv16f32_idx(<vscale x 16 x float> %v, float %elt, i32 zeroext %idx) {
; CHECK-LABEL: insertelt_nxv16f32_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a1, zero, e32, m8, ta, ma
ret <vscale x 1 x double> %r
}
-define <vscale x 1 x double> @insertelt_nxv1f64_idx(<vscale x 1 x double> %v, double %elt, i32 signext %idx) {
+define <vscale x 1 x double> @insertelt_nxv1f64_idx(<vscale x 1 x double> %v, double %elt, i32 zeroext %idx) {
; CHECK-LABEL: insertelt_nxv1f64_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a1, zero, e64, m1, ta, ma
ret <vscale x 2 x double> %r
}
-define <vscale x 2 x double> @insertelt_nxv2f64_idx(<vscale x 2 x double> %v, double %elt, i32 signext %idx) {
+define <vscale x 2 x double> @insertelt_nxv2f64_idx(<vscale x 2 x double> %v, double %elt, i32 zeroext %idx) {
; CHECK-LABEL: insertelt_nxv2f64_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a1, zero, e64, m2, ta, ma
ret <vscale x 4 x double> %r
}
-define <vscale x 4 x double> @insertelt_nxv4f64_idx(<vscale x 4 x double> %v, double %elt, i32 signext %idx) {
+define <vscale x 4 x double> @insertelt_nxv4f64_idx(<vscale x 4 x double> %v, double %elt, i32 zeroext %idx) {
; CHECK-LABEL: insertelt_nxv4f64_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a1, zero, e64, m4, ta, ma
ret <vscale x 8 x double> %r
}
-define <vscale x 8 x double> @insertelt_nxv8f64_idx(<vscale x 8 x double> %v, double %elt, i32 signext %idx) {
+define <vscale x 8 x double> @insertelt_nxv8f64_idx(<vscale x 8 x double> %v, double %elt, i32 zeroext %idx) {
; CHECK-LABEL: insertelt_nxv8f64_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a1, zero, e64, m8, ta, ma
ret <vscale x 1 x i8> %r
}
-define <vscale x 1 x i8> @insertelt_nxv1i8_idx(<vscale x 1 x i8> %v, i8 signext %elt, i32 signext %idx) {
+define <vscale x 1 x i8> @insertelt_nxv1i8_idx(<vscale x 1 x i8> %v, i8 signext %elt, i32 zeroext %idx) {
; CHECK-LABEL: insertelt_nxv1i8_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a2, zero, e8, mf8, ta, ma
ret <vscale x 2 x i8> %r
}
-define <vscale x 2 x i8> @insertelt_nxv2i8_idx(<vscale x 2 x i8> %v, i8 signext %elt, i32 signext %idx) {
+define <vscale x 2 x i8> @insertelt_nxv2i8_idx(<vscale x 2 x i8> %v, i8 signext %elt, i32 zeroext %idx) {
; CHECK-LABEL: insertelt_nxv2i8_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a2, zero, e8, mf4, ta, ma
ret <vscale x 4 x i8> %r
}
-define <vscale x 4 x i8> @insertelt_nxv4i8_idx(<vscale x 4 x i8> %v, i8 signext %elt, i32 signext %idx) {
+define <vscale x 4 x i8> @insertelt_nxv4i8_idx(<vscale x 4 x i8> %v, i8 signext %elt, i32 zeroext %idx) {
; CHECK-LABEL: insertelt_nxv4i8_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a2, zero, e8, mf2, ta, ma
ret <vscale x 8 x i8> %r
}
-define <vscale x 8 x i8> @insertelt_nxv8i8_idx(<vscale x 8 x i8> %v, i8 signext %elt, i32 signext %idx) {
+define <vscale x 8 x i8> @insertelt_nxv8i8_idx(<vscale x 8 x i8> %v, i8 signext %elt, i32 zeroext %idx) {
; CHECK-LABEL: insertelt_nxv8i8_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a2, zero, e8, m1, ta, ma
ret <vscale x 16 x i8> %r
}
-define <vscale x 16 x i8> @insertelt_nxv16i8_idx(<vscale x 16 x i8> %v, i8 signext %elt, i32 signext %idx) {
+define <vscale x 16 x i8> @insertelt_nxv16i8_idx(<vscale x 16 x i8> %v, i8 signext %elt, i32 zeroext %idx) {
; CHECK-LABEL: insertelt_nxv16i8_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a2, zero, e8, m2, ta, ma
ret <vscale x 32 x i8> %r
}
-define <vscale x 32 x i8> @insertelt_nxv32i8_idx(<vscale x 32 x i8> %v, i8 signext %elt, i32 signext %idx) {
+define <vscale x 32 x i8> @insertelt_nxv32i8_idx(<vscale x 32 x i8> %v, i8 signext %elt, i32 zeroext %idx) {
; CHECK-LABEL: insertelt_nxv32i8_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a2, zero, e8, m4, ta, ma
ret <vscale x 64 x i8> %r
}
-define <vscale x 64 x i8> @insertelt_nxv64i8_idx(<vscale x 64 x i8> %v, i8 signext %elt, i32 signext %idx) {
+define <vscale x 64 x i8> @insertelt_nxv64i8_idx(<vscale x 64 x i8> %v, i8 signext %elt, i32 zeroext %idx) {
; CHECK-LABEL: insertelt_nxv64i8_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a2, zero, e8, m8, ta, ma
ret <vscale x 1 x i16> %r
}
-define <vscale x 1 x i16> @insertelt_nxv1i16_idx(<vscale x 1 x i16> %v, i16 signext %elt, i32 signext %idx) {
+define <vscale x 1 x i16> @insertelt_nxv1i16_idx(<vscale x 1 x i16> %v, i16 signext %elt, i32 zeroext %idx) {
; CHECK-LABEL: insertelt_nxv1i16_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a2, zero, e16, mf4, ta, ma
ret <vscale x 2 x i16> %r
}
-define <vscale x 2 x i16> @insertelt_nxv2i16_idx(<vscale x 2 x i16> %v, i16 signext %elt, i32 signext %idx) {
+define <vscale x 2 x i16> @insertelt_nxv2i16_idx(<vscale x 2 x i16> %v, i16 signext %elt, i32 zeroext %idx) {
; CHECK-LABEL: insertelt_nxv2i16_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a2, zero, e16, mf2, ta, ma
ret <vscale x 4 x i16> %r
}
-define <vscale x 4 x i16> @insertelt_nxv4i16_idx(<vscale x 4 x i16> %v, i16 signext %elt, i32 signext %idx) {
+define <vscale x 4 x i16> @insertelt_nxv4i16_idx(<vscale x 4 x i16> %v, i16 signext %elt, i32 zeroext %idx) {
; CHECK-LABEL: insertelt_nxv4i16_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, ma
ret <vscale x 8 x i16> %r
}
-define <vscale x 8 x i16> @insertelt_nxv8i16_idx(<vscale x 8 x i16> %v, i16 signext %elt, i32 signext %idx) {
+define <vscale x 8 x i16> @insertelt_nxv8i16_idx(<vscale x 8 x i16> %v, i16 signext %elt, i32 zeroext %idx) {
; CHECK-LABEL: insertelt_nxv8i16_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a2, zero, e16, m2, ta, ma
ret <vscale x 16 x i16> %r
}
-define <vscale x 16 x i16> @insertelt_nxv16i16_idx(<vscale x 16 x i16> %v, i16 signext %elt, i32 signext %idx) {
+define <vscale x 16 x i16> @insertelt_nxv16i16_idx(<vscale x 16 x i16> %v, i16 signext %elt, i32 zeroext %idx) {
; CHECK-LABEL: insertelt_nxv16i16_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma
ret <vscale x 32 x i16> %r
}
-define <vscale x 32 x i16> @insertelt_nxv32i16_idx(<vscale x 32 x i16> %v, i16 signext %elt, i32 signext %idx) {
+define <vscale x 32 x i16> @insertelt_nxv32i16_idx(<vscale x 32 x i16> %v, i16 signext %elt, i32 zeroext %idx) {
; CHECK-LABEL: insertelt_nxv32i16_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a2, zero, e16, m8, ta, ma
ret <vscale x 1 x i32> %r
}
-define <vscale x 1 x i32> @insertelt_nxv1i32_idx(<vscale x 1 x i32> %v, i32 signext %elt, i32 signext %idx) {
+define <vscale x 1 x i32> @insertelt_nxv1i32_idx(<vscale x 1 x i32> %v, i32 signext %elt, i32 zeroext %idx) {
; CHECK-LABEL: insertelt_nxv1i32_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a2, zero, e32, mf2, ta, ma
ret <vscale x 2 x i32> %r
}
-define <vscale x 2 x i32> @insertelt_nxv2i32_idx(<vscale x 2 x i32> %v, i32 signext %elt, i32 signext %idx) {
+define <vscale x 2 x i32> @insertelt_nxv2i32_idx(<vscale x 2 x i32> %v, i32 signext %elt, i32 zeroext %idx) {
; CHECK-LABEL: insertelt_nxv2i32_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a2, zero, e32, m1, ta, ma
ret <vscale x 4 x i32> %r
}
-define <vscale x 4 x i32> @insertelt_nxv4i32_idx(<vscale x 4 x i32> %v, i32 signext %elt, i32 signext %idx) {
+define <vscale x 4 x i32> @insertelt_nxv4i32_idx(<vscale x 4 x i32> %v, i32 signext %elt, i32 zeroext %idx) {
; CHECK-LABEL: insertelt_nxv4i32_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a2, zero, e32, m2, ta, ma
ret <vscale x 8 x i32> %r
}
-define <vscale x 8 x i32> @insertelt_nxv8i32_idx(<vscale x 8 x i32> %v, i32 signext %elt, i32 signext %idx) {
+define <vscale x 8 x i32> @insertelt_nxv8i32_idx(<vscale x 8 x i32> %v, i32 signext %elt, i32 zeroext %idx) {
; CHECK-LABEL: insertelt_nxv8i32_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a2, zero, e32, m4, ta, ma
ret <vscale x 16 x i32> %r
}
-define <vscale x 16 x i32> @insertelt_nxv16i32_idx(<vscale x 16 x i32> %v, i32 signext %elt, i32 signext %idx) {
+define <vscale x 16 x i32> @insertelt_nxv16i32_idx(<vscale x 16 x i32> %v, i32 signext %elt, i32 zeroext %idx) {
; CHECK-LABEL: insertelt_nxv16i32_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a2, zero, e32, m8, ta, ma
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a2, zero, e64, m1, ta, ma
; CHECK-NEXT: vmv.s.x v9, a0
-; CHECK-NEXT: sext.w a0, a1
+; CHECK-NEXT: slli a0, a1, 32
+; CHECK-NEXT: srli a0, a0, 32
; CHECK-NEXT: addi a1, a0, 1
; CHECK-NEXT: vsetvli zero, a1, e64, m1, tu, ma
; CHECK-NEXT: vslideup.vx v8, v9, a0
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a2, zero, e64, m2, ta, ma
; CHECK-NEXT: vmv.s.x v10, a0
-; CHECK-NEXT: sext.w a0, a1
+; CHECK-NEXT: slli a0, a1, 32
+; CHECK-NEXT: srli a0, a0, 32
; CHECK-NEXT: addi a1, a0, 1
; CHECK-NEXT: vsetvli zero, a1, e64, m2, tu, ma
; CHECK-NEXT: vslideup.vx v8, v10, a0
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a2, zero, e64, m4, ta, ma
; CHECK-NEXT: vmv.s.x v12, a0
-; CHECK-NEXT: sext.w a0, a1
+; CHECK-NEXT: slli a0, a1, 32
+; CHECK-NEXT: srli a0, a0, 32
; CHECK-NEXT: addi a1, a0, 1
; CHECK-NEXT: vsetvli zero, a1, e64, m4, tu, ma
; CHECK-NEXT: vslideup.vx v8, v12, a0
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a2, zero, e64, m8, ta, ma
; CHECK-NEXT: vmv.s.x v16, a0
-; CHECK-NEXT: sext.w a0, a1
+; CHECK-NEXT: slli a0, a1, 32
+; CHECK-NEXT: srli a0, a0, 32
; CHECK-NEXT: addi a1, a0, 1
; CHECK-NEXT: vsetvli zero, a1, e64, m8, tu, ma
; CHECK-NEXT: vslideup.vx v8, v16, a0
define fastcc i64 @extract_rr_v256i64(i32 signext %idx, <256 x i64> %v) {
; CHECK-LABEL: extract_rr_v256i64:
; CHECK: # %bb.0:
+; CHECK-NEXT: and %s0, %s0, (32)0
; CHECK-NEXT: lvs %s0, %v0(%s0)
; CHECK-NEXT: b.l.t (, %s10)
%ret = extractelement <256 x i64> %v, i32 %idx
define fastcc i32 @extract_rr_v256i32(i32 signext %idx, <256 x i32> %v) {
; CHECK-LABEL: extract_rr_v256i32:
; CHECK: # %bb.0:
+; CHECK-NEXT: and %s0, %s0, (32)0
; CHECK-NEXT: lvs %s0, %v0(%s0)
; CHECK-NEXT: b.l.t (, %s10)
%ret = extractelement <256 x i32> %v, i32 %idx
define fastcc i32 @extract_rr_v512i32(<512 x i32> %v, i32 signext %idx) {
; CHECK-LABEL: extract_rr_v512i32:
; CHECK: # %bb.0:
-; CHECK-NEXT: srl %s1, %s0, 1
+; CHECK-NEXT: lea %s1, -2
+; CHECK-NEXT: and %s1, %s1, (32)0
+; CHECK-NEXT: and %s1, %s0, %s1
+; CHECK-NEXT: srl %s1, %s1, 1
; CHECK-NEXT: lvs %s1, %v0(%s1)
; CHECK-NEXT: nnd %s0, %s0, (63)0
; CHECK-NEXT: sla.w.sx %s0, %s0, 5
define fastcc double @extract_rr_v256f64(i32 signext %idx, <256 x double> %v) {
; CHECK-LABEL: extract_rr_v256f64:
; CHECK: # %bb.0:
+; CHECK-NEXT: and %s0, %s0, (32)0
; CHECK-NEXT: lvs %s0, %v0(%s0)
; CHECK-NEXT: b.l.t (, %s10)
%ret = extractelement <256 x double> %v, i32 %idx
define fastcc float @extract_rr_v256f32(i32 signext %idx, <256 x float> %v) {
; CHECK-LABEL: extract_rr_v256f32:
; CHECK: # %bb.0:
+; CHECK-NEXT: and %s0, %s0, (32)0
; CHECK-NEXT: lvs %s0, %v0(%s0)
; CHECK-NEXT: b.l.t (, %s10)
%ret = extractelement <256 x float> %v, i32 %idx
define fastcc float @extract_rr_v512f32(<512 x float> %v, i32 signext %idx) {
; CHECK-LABEL: extract_rr_v512f32:
; CHECK: # %bb.0:
-; CHECK-NEXT: srl %s1, %s0, 1
+; CHECK-NEXT: lea %s1, -2
+; CHECK-NEXT: and %s1, %s1, (32)0
+; CHECK-NEXT: and %s1, %s0, %s1
+; CHECK-NEXT: srl %s1, %s1, 1
; CHECK-NEXT: lvs %s1, %v0(%s1)
; CHECK-NEXT: nnd %s0, %s0, (63)0
; CHECK-NEXT: sla.w.sx %s0, %s0, 5
define fastcc <256 x i64> @insert_rr_v256i64(i32 signext %idx, i64 %s) {
; CHECK-LABEL: insert_rr_v256i64:
; CHECK: # %bb.0:
+; CHECK-NEXT: and %s0, %s0, (32)0
; CHECK-NEXT: lsv %v0(%s0), %s1
; CHECK-NEXT: b.l.t (, %s10)
%ret = insertelement <256 x i64> undef, i64 %s, i32 %idx
; CHECK-LABEL: insert_rr_v256i32:
; CHECK: # %bb.0:
; CHECK-NEXT: and %s1, %s1, (32)0
+; CHECK-NEXT: and %s0, %s0, (32)0
; CHECK-NEXT: lsv %v0(%s0), %s1
; CHECK-NEXT: b.l.t (, %s10)
%ret = insertelement <256 x i32> undef, i32 %s, i32 %idx
; CHECK-NEXT: nnd %s2, %s0, (63)0
; CHECK-NEXT: sla.w.sx %s2, %s2, 5
; CHECK-NEXT: sll %s1, %s1, %s2
+; CHECK-NEXT: lea %s3, -2
+; CHECK-NEXT: and %s3, %s3, (32)0
+; CHECK-NEXT: and %s0, %s0, %s3
; CHECK-NEXT: srl %s0, %s0, 1
; CHECK-NEXT: lvs %s3, %v0(%s0)
; CHECK-NEXT: srl %s2, (32)1, %s2
define fastcc <256 x double> @insert_rr_v256f64(i32 signext %idx, double %s) {
; CHECK-LABEL: insert_rr_v256f64:
; CHECK: # %bb.0:
+; CHECK-NEXT: and %s0, %s0, (32)0
; CHECK-NEXT: lsv %v0(%s0), %s1
; CHECK-NEXT: b.l.t (, %s10)
%ret = insertelement <256 x double> undef, double %s, i32 %idx
define fastcc <256 x float> @insert_rr_v256f32(i32 signext %idx, float %s) {
; CHECK-LABEL: insert_rr_v256f32:
; CHECK: # %bb.0:
+; CHECK-NEXT: and %s0, %s0, (32)0
; CHECK-NEXT: lsv %v0(%s0), %s1
; CHECK-NEXT: b.l.t (, %s10)
%ret = insertelement <256 x float> undef, float %s, i32 %idx
; CHECK-LABEL: insert_rr_v512f32:
; CHECK: # %bb.0:
; CHECK-NEXT: sra.l %s1, %s1, 32
-; CHECK-NEXT: srl %s2, %s0, 1
+; CHECK-NEXT: lea %s2, -2
+; CHECK-NEXT: and %s2, %s2, (32)0
+; CHECK-NEXT: and %s2, %s0, %s2
+; CHECK-NEXT: srl %s2, %s2, 1
; CHECK-NEXT: lvs %s3, %v0(%s2)
; CHECK-NEXT: nnd %s0, %s0, (63)0
; CHECK-NEXT: sla.w.sx %s0, %s0, 5
; CHECK-LABEL: swizzle_one_i8x16:
; CHECK: .functype swizzle_one_i8x16 (v128, v128) -> (v128)
; CHECK-NEXT: # %bb.0:
-; CHECK-NEXT: i8x16.swizzle $push0=, $0, $1
-; CHECK-NEXT: return $pop0
+; CHECK-NEXT: global.get $push5=, __stack_pointer
+; CHECK-NEXT: i32.const $push6=, 16
+; CHECK-NEXT: i32.sub $push8=, $pop5, $pop6
+; CHECK-NEXT: local.tee $push7=, $2=, $pop8
+; CHECK-NEXT: v128.store 0($pop7), $0
+; CHECK-NEXT: i8x16.extract_lane_u $push0=, $1, 0
+; CHECK-NEXT: i32.const $push1=, 15
+; CHECK-NEXT: i32.and $push2=, $pop0, $pop1
+; CHECK-NEXT: i32.or $push3=, $2, $pop2
+; CHECK-NEXT: v128.load8_splat $push4=, 0($pop3)
+; CHECK-NEXT: return $pop4
%m0 = extractelement <16 x i8> %mask, i32 0
%s0 = extractelement <16 x i8> %src, i8 %m0
%v0 = insertelement <16 x i8> undef, i8 %s0, i32 0
; CHECK-LABEL: swizzle_all_i8x16:
; CHECK: .functype swizzle_all_i8x16 (v128, v128) -> (v128)
; CHECK-NEXT: # %bb.0:
-; CHECK-NEXT: i8x16.swizzle $push0=, $0, $1
-; CHECK-NEXT: return $pop0
+; CHECK-NEXT: global.get $push80=, __stack_pointer
+; CHECK-NEXT: i32.const $push81=, 16
+; CHECK-NEXT: i32.sub $push98=, $pop80, $pop81
+; CHECK-NEXT: local.tee $push97=, $2=, $pop98
+; CHECK-NEXT: v128.store 0($pop97), $0
+; CHECK-NEXT: i8x16.extract_lane_u $push61=, $1, 0
+; CHECK-NEXT: i32.const $push1=, 15
+; CHECK-NEXT: i32.and $push62=, $pop61, $pop1
+; CHECK-NEXT: i32.or $push63=, $2, $pop62
+; CHECK-NEXT: v128.load8_splat $push64=, 0($pop63)
+; CHECK-NEXT: i8x16.extract_lane_u $push57=, $1, 1
+; CHECK-NEXT: i32.const $push96=, 15
+; CHECK-NEXT: i32.and $push58=, $pop57, $pop96
+; CHECK-NEXT: i32.or $push59=, $2, $pop58
+; CHECK-NEXT: i32.load8_u $push60=, 0($pop59)
+; CHECK-NEXT: i8x16.replace_lane $push65=, $pop64, 1, $pop60
+; CHECK-NEXT: i8x16.extract_lane_u $push53=, $1, 2
+; CHECK-NEXT: i32.const $push95=, 15
+; CHECK-NEXT: i32.and $push54=, $pop53, $pop95
+; CHECK-NEXT: i32.or $push55=, $2, $pop54
+; CHECK-NEXT: i32.load8_u $push56=, 0($pop55)
+; CHECK-NEXT: i8x16.replace_lane $push66=, $pop65, 2, $pop56
+; CHECK-NEXT: i8x16.extract_lane_u $push49=, $1, 3
+; CHECK-NEXT: i32.const $push94=, 15
+; CHECK-NEXT: i32.and $push50=, $pop49, $pop94
+; CHECK-NEXT: i32.or $push51=, $2, $pop50
+; CHECK-NEXT: i32.load8_u $push52=, 0($pop51)
+; CHECK-NEXT: i8x16.replace_lane $push67=, $pop66, 3, $pop52
+; CHECK-NEXT: i8x16.extract_lane_u $push45=, $1, 4
+; CHECK-NEXT: i32.const $push93=, 15
+; CHECK-NEXT: i32.and $push46=, $pop45, $pop93
+; CHECK-NEXT: i32.or $push47=, $2, $pop46
+; CHECK-NEXT: i32.load8_u $push48=, 0($pop47)
+; CHECK-NEXT: i8x16.replace_lane $push68=, $pop67, 4, $pop48
+; CHECK-NEXT: i8x16.extract_lane_u $push41=, $1, 5
+; CHECK-NEXT: i32.const $push92=, 15
+; CHECK-NEXT: i32.and $push42=, $pop41, $pop92
+; CHECK-NEXT: i32.or $push43=, $2, $pop42
+; CHECK-NEXT: i32.load8_u $push44=, 0($pop43)
+; CHECK-NEXT: i8x16.replace_lane $push69=, $pop68, 5, $pop44
+; CHECK-NEXT: i8x16.extract_lane_u $push37=, $1, 6
+; CHECK-NEXT: i32.const $push91=, 15
+; CHECK-NEXT: i32.and $push38=, $pop37, $pop91
+; CHECK-NEXT: i32.or $push39=, $2, $pop38
+; CHECK-NEXT: i32.load8_u $push40=, 0($pop39)
+; CHECK-NEXT: i8x16.replace_lane $push70=, $pop69, 6, $pop40
+; CHECK-NEXT: i8x16.extract_lane_u $push33=, $1, 7
+; CHECK-NEXT: i32.const $push90=, 15
+; CHECK-NEXT: i32.and $push34=, $pop33, $pop90
+; CHECK-NEXT: i32.or $push35=, $2, $pop34
+; CHECK-NEXT: i32.load8_u $push36=, 0($pop35)
+; CHECK-NEXT: i8x16.replace_lane $push71=, $pop70, 7, $pop36
+; CHECK-NEXT: i8x16.extract_lane_u $push29=, $1, 8
+; CHECK-NEXT: i32.const $push89=, 15
+; CHECK-NEXT: i32.and $push30=, $pop29, $pop89
+; CHECK-NEXT: i32.or $push31=, $2, $pop30
+; CHECK-NEXT: i32.load8_u $push32=, 0($pop31)
+; CHECK-NEXT: i8x16.replace_lane $push72=, $pop71, 8, $pop32
+; CHECK-NEXT: i8x16.extract_lane_u $push25=, $1, 9
+; CHECK-NEXT: i32.const $push88=, 15
+; CHECK-NEXT: i32.and $push26=, $pop25, $pop88
+; CHECK-NEXT: i32.or $push27=, $2, $pop26
+; CHECK-NEXT: i32.load8_u $push28=, 0($pop27)
+; CHECK-NEXT: i8x16.replace_lane $push73=, $pop72, 9, $pop28
+; CHECK-NEXT: i8x16.extract_lane_u $push21=, $1, 10
+; CHECK-NEXT: i32.const $push87=, 15
+; CHECK-NEXT: i32.and $push22=, $pop21, $pop87
+; CHECK-NEXT: i32.or $push23=, $2, $pop22
+; CHECK-NEXT: i32.load8_u $push24=, 0($pop23)
+; CHECK-NEXT: i8x16.replace_lane $push74=, $pop73, 10, $pop24
+; CHECK-NEXT: i8x16.extract_lane_u $push17=, $1, 11
+; CHECK-NEXT: i32.const $push86=, 15
+; CHECK-NEXT: i32.and $push18=, $pop17, $pop86
+; CHECK-NEXT: i32.or $push19=, $2, $pop18
+; CHECK-NEXT: i32.load8_u $push20=, 0($pop19)
+; CHECK-NEXT: i8x16.replace_lane $push75=, $pop74, 11, $pop20
+; CHECK-NEXT: i8x16.extract_lane_u $push13=, $1, 12
+; CHECK-NEXT: i32.const $push85=, 15
+; CHECK-NEXT: i32.and $push14=, $pop13, $pop85
+; CHECK-NEXT: i32.or $push15=, $2, $pop14
+; CHECK-NEXT: i32.load8_u $push16=, 0($pop15)
+; CHECK-NEXT: i8x16.replace_lane $push76=, $pop75, 12, $pop16
+; CHECK-NEXT: i8x16.extract_lane_u $push9=, $1, 13
+; CHECK-NEXT: i32.const $push84=, 15
+; CHECK-NEXT: i32.and $push10=, $pop9, $pop84
+; CHECK-NEXT: i32.or $push11=, $2, $pop10
+; CHECK-NEXT: i32.load8_u $push12=, 0($pop11)
+; CHECK-NEXT: i8x16.replace_lane $push77=, $pop76, 13, $pop12
+; CHECK-NEXT: i8x16.extract_lane_u $push5=, $1, 14
+; CHECK-NEXT: i32.const $push83=, 15
+; CHECK-NEXT: i32.and $push6=, $pop5, $pop83
+; CHECK-NEXT: i32.or $push7=, $2, $pop6
+; CHECK-NEXT: i32.load8_u $push8=, 0($pop7)
+; CHECK-NEXT: i8x16.replace_lane $push78=, $pop77, 14, $pop8
+; CHECK-NEXT: i8x16.extract_lane_u $push0=, $1, 15
+; CHECK-NEXT: i32.const $push82=, 15
+; CHECK-NEXT: i32.and $push2=, $pop0, $pop82
+; CHECK-NEXT: i32.or $push3=, $2, $pop2
+; CHECK-NEXT: i32.load8_u $push4=, 0($pop3)
+; CHECK-NEXT: i8x16.replace_lane $push79=, $pop78, 15, $pop4
+; CHECK-NEXT: return $pop79
%m0 = extractelement <16 x i8> %mask, i32 0
%s0 = extractelement <16 x i8> %src, i8 %m0
%v0 = insertelement <16 x i8> undef, i8 %s0, i32 0
; CHECK-LABEL: mashup_swizzle_i8x16:
; CHECK: .functype mashup_swizzle_i8x16 (v128, v128, i32) -> (v128)
; CHECK-NEXT: # %bb.0:
-; CHECK-NEXT: i8x16.swizzle $push0=, $0, $1
-; CHECK-NEXT: i8x16.replace_lane $push1=, $pop0, 3, $2
-; CHECK-NEXT: i32.const $push2=, 42
-; CHECK-NEXT: i8x16.replace_lane $push3=, $pop1, 4, $pop2
-; CHECK-NEXT: i8x16.replace_lane $push4=, $pop3, 12, $2
-; CHECK-NEXT: i32.const $push6=, 42
-; CHECK-NEXT: i8x16.replace_lane $push5=, $pop4, 14, $pop6
-; CHECK-NEXT: return $pop5
+; CHECK-NEXT: global.get $push12=, __stack_pointer
+; CHECK-NEXT: i32.const $push13=, 16
+; CHECK-NEXT: i32.sub $push16=, $pop12, $pop13
+; CHECK-NEXT: local.tee $push15=, $3=, $pop16
+; CHECK-NEXT: v128.store 0($pop15), $0
+; CHECK-NEXT: i8x16.extract_lane_u $push7=, $1, 7
+; CHECK-NEXT: i32.const $push1=, 15
+; CHECK-NEXT: i32.and $push8=, $pop7, $pop1
+; CHECK-NEXT: i32.or $push9=, $3, $pop8
+; CHECK-NEXT: i8x16.extract_lane_u $push0=, $1, 0
+; CHECK-NEXT: i32.const $push14=, 15
+; CHECK-NEXT: i32.and $push2=, $pop0, $pop14
+; CHECK-NEXT: i32.or $push3=, $3, $pop2
+; CHECK-NEXT: v128.const $push4=, 0, 0, 0, 0, 42, 0, 0, 0, 0, 0, 0, 0, 0, 0, 42, 0
+; CHECK-NEXT: v128.load8_lane $push5=, 0($pop3), $pop4, 0
+; CHECK-NEXT: i8x16.replace_lane $push6=, $pop5, 3, $2
+; CHECK-NEXT: v128.load8_lane $push10=, 0($pop9), $pop6, 7
+; CHECK-NEXT: i8x16.replace_lane $push11=, $pop10, 12, $2
+; CHECK-NEXT: return $pop11
%m0 = extractelement <16 x i8> %mask, i32 0
%s0 = extractelement <16 x i8> %src, i8 %m0
%v0 = insertelement <16 x i8> undef, i8 %s0, i32 0
define i32 @extractelt_undef_insertelt(i32 %x, i32 %y) {
; CHECK-LABEL: extractelt_undef_insertelt:
; CHECK: # %bb.0:
+; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: ret{{[l|q]}}
%b = insertelement <4 x i32> zeroinitializer, i32 %x, i64 3
%c = icmp uge i32 %y, %y
;
; AVX512-LABEL: arg_i64_v2i64:
; AVX512: # %bb.0:
-; AVX512-NEXT: movslq %esi, %rax
+; AVX512-NEXT: movl %esi, %eax
; AVX512-NEXT: vpbroadcastq %rax, %xmm1
; AVX512-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %k1
; AVX512-NEXT: vpbroadcastq %rdi, %xmm0 {%k1}
; SSE41: # %bb.0:
; SSE41-NEXT: movapd %xmm0, %xmm2
; SSE41-NEXT: movddup {{.*#+}} xmm1 = xmm1[0,0]
-; SSE41-NEXT: movslq %edi, %rax
+; SSE41-NEXT: movl %edi, %eax
; SSE41-NEXT: movq %rax, %xmm0
; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
; SSE41-NEXT: pcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; AVX1-LABEL: arg_f64_v2f64:
; AVX1: # %bb.0:
; AVX1-NEXT: vmovddup {{.*#+}} xmm1 = xmm1[0,0]
-; AVX1-NEXT: movslq %edi, %rax
+; AVX1-NEXT: movl %edi, %eax
; AVX1-NEXT: vmovq %rax, %xmm2
; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,1,0,1]
; AVX1-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
; AVX2-LABEL: arg_f64_v2f64:
; AVX2: # %bb.0:
; AVX2-NEXT: vmovddup {{.*#+}} xmm1 = xmm1[0,0]
-; AVX2-NEXT: movslq %edi, %rax
+; AVX2-NEXT: movl %edi, %eax
; AVX2-NEXT: vmovq %rax, %xmm2
; AVX2-NEXT: vpbroadcastq %xmm2, %xmm2
; AVX2-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
;
; AVX512-LABEL: arg_f64_v2f64:
; AVX512: # %bb.0:
-; AVX512-NEXT: movslq %edi, %rax
+; AVX512-NEXT: movl %edi, %eax
; AVX512-NEXT: vpbroadcastq %rax, %xmm2
; AVX512-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %k1
; AVX512-NEXT: vmovddup {{.*#+}} xmm0 {%k1} = xmm1[0,0]
;
; AVX512-LABEL: load_i64_v2i64:
; AVX512: # %bb.0:
-; AVX512-NEXT: movslq %esi, %rax
+; AVX512-NEXT: movl %esi, %eax
; AVX512-NEXT: vpbroadcastq %rax, %xmm1
; AVX512-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %k1
; AVX512-NEXT: vpbroadcastq (%rdi), %xmm0 {%k1}
; SSE41: # %bb.0:
; SSE41-NEXT: movapd %xmm0, %xmm1
; SSE41-NEXT: movddup {{.*#+}} xmm2 = mem[0,0]
-; SSE41-NEXT: movslq %esi, %rax
+; SSE41-NEXT: movl %esi, %eax
; SSE41-NEXT: movq %rax, %xmm0
; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
; SSE41-NEXT: pcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; AVX1-LABEL: load_f64_v2f64:
; AVX1: # %bb.0:
; AVX1-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0]
-; AVX1-NEXT: movslq %esi, %rax
+; AVX1-NEXT: movl %esi, %eax
; AVX1-NEXT: vmovq %rax, %xmm2
; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,1,0,1]
; AVX1-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
; AVX2-LABEL: load_f64_v2f64:
; AVX2: # %bb.0:
; AVX2-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0]
-; AVX2-NEXT: movslq %esi, %rax
+; AVX2-NEXT: movl %esi, %eax
; AVX2-NEXT: vmovq %rax, %xmm2
; AVX2-NEXT: vpbroadcastq %xmm2, %xmm2
; AVX2-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
;
; AVX512-LABEL: load_f64_v2f64:
; AVX512: # %bb.0:
-; AVX512-NEXT: movslq %esi, %rax
+; AVX512-NEXT: movl %esi, %eax
; AVX512-NEXT: vpbroadcastq %rax, %xmm1
; AVX512-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %k1
; AVX512-NEXT: vmovddup {{.*#+}} xmm0 {%k1} = mem[0,0]
;
; AVX512-LABEL: arg_i64_v4i64:
; AVX512: # %bb.0:
-; AVX512-NEXT: movslq %esi, %rax
+; AVX512-NEXT: movl %esi, %eax
; AVX512-NEXT: vpbroadcastq %rax, %ymm1
; AVX512-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %k1
; AVX512-NEXT: vpbroadcastq %rdi, %ymm0 {%k1}
; AVX1: # %bb.0:
; AVX1-NEXT: vmovddup {{.*#+}} xmm1 = xmm1[0,0]
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm1, %ymm1
-; AVX1-NEXT: movslq %edi, %rax
+; AVX1-NEXT: movl %edi, %eax
; AVX1-NEXT: vmovq %rax, %xmm2
; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,1,0,1]
; AVX1-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm3
; AVX2-LABEL: arg_f64_v4f64:
; AVX2: # %bb.0:
; AVX2-NEXT: vbroadcastsd %xmm1, %ymm1
-; AVX2-NEXT: movslq %edi, %rax
+; AVX2-NEXT: movl %edi, %eax
; AVX2-NEXT: vmovq %rax, %xmm2
; AVX2-NEXT: vpbroadcastq %xmm2, %ymm2
; AVX2-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2
;
; AVX512-LABEL: arg_f64_v4f64:
; AVX512: # %bb.0:
-; AVX512-NEXT: movslq %edi, %rax
+; AVX512-NEXT: movl %edi, %eax
; AVX512-NEXT: vpbroadcastq %rax, %ymm2
; AVX512-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %k1
; AVX512-NEXT: vbroadcastsd %xmm1, %ymm0 {%k1}
;
; AVX512-LABEL: load_i64_v4i64:
; AVX512: # %bb.0:
-; AVX512-NEXT: movslq %esi, %rax
+; AVX512-NEXT: movl %esi, %eax
; AVX512-NEXT: vpbroadcastq %rax, %ymm1
; AVX512-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %k1
; AVX512-NEXT: vpbroadcastq (%rdi), %ymm0 {%k1}
;
; AVX1-LABEL: load_f64_v4f64:
; AVX1: # %bb.0:
-; AVX1-NEXT: movslq %esi, %rax
+; AVX1-NEXT: movl %esi, %eax
; AVX1-NEXT: vmovq %rax, %xmm1
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,1,0,1]
; AVX1-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm2
; AVX2-LABEL: load_f64_v4f64:
; AVX2: # %bb.0:
; AVX2-NEXT: vbroadcastsd (%rdi), %ymm1
-; AVX2-NEXT: movslq %esi, %rax
+; AVX2-NEXT: movl %esi, %eax
; AVX2-NEXT: vmovq %rax, %xmm2
; AVX2-NEXT: vpbroadcastq %xmm2, %ymm2
; AVX2-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2
;
; AVX512-LABEL: load_f64_v4f64:
; AVX512: # %bb.0:
-; AVX512-NEXT: movslq %esi, %rax
+; AVX512-NEXT: movl %esi, %eax
; AVX512-NEXT: vpbroadcastq %rax, %ymm1
; AVX512-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %k1
; AVX512-NEXT: vbroadcastsd (%rdi), %ymm0 {%k1}
; SSE-LABEL: PR44139:
; SSE: # %bb.0:
; SSE-NEXT: movl (%rdi), %eax
+; SSE-NEXT: pshufd {{.*#+}} xmm0 = mem[0,1,0,1]
+; SSE-NEXT: movdqa %xmm0, 96(%rdi)
+; SSE-NEXT: movdqa %xmm0, 112(%rdi)
+; SSE-NEXT: movdqa %xmm0, 64(%rdi)
+; SSE-NEXT: movdqa %xmm0, 80(%rdi)
+; SSE-NEXT: movdqa %xmm0, 32(%rdi)
+; SSE-NEXT: movdqa %xmm0, 48(%rdi)
+; SSE-NEXT: movdqa %xmm0, (%rdi)
+; SSE-NEXT: movdqa %xmm0, 16(%rdi)
; SSE-NEXT: leal 2147483647(%rax), %ecx
; SSE-NEXT: testl %eax, %eax
; SSE-NEXT: cmovnsl %eax, %ecx
; SSE-NEXT: divl %ecx
; SSE-NEXT: retq
;
-; AVX-LABEL: PR44139:
-; AVX: # %bb.0:
-; AVX-NEXT: movl (%rdi), %eax
-; AVX-NEXT: leal 2147483647(%rax), %ecx
-; AVX-NEXT: testl %eax, %eax
-; AVX-NEXT: cmovnsl %eax, %ecx
-; AVX-NEXT: andl $-2147483648, %ecx # imm = 0x80000000
-; AVX-NEXT: addl %eax, %ecx
-; AVX-NEXT: # kill: def $eax killed $eax killed $rax
-; AVX-NEXT: xorl %edx, %edx
-; AVX-NEXT: divl %ecx
-; AVX-NEXT: retq
+; AVX1OR2-LABEL: PR44139:
+; AVX1OR2: # %bb.0:
+; AVX1OR2-NEXT: vbroadcastsd (%rdi), %ymm0
+; AVX1OR2-NEXT: movl (%rdi), %eax
+; AVX1OR2-NEXT: vmovaps %ymm0, 64(%rdi)
+; AVX1OR2-NEXT: vmovaps %ymm0, 96(%rdi)
+; AVX1OR2-NEXT: vmovaps %ymm0, (%rdi)
+; AVX1OR2-NEXT: vmovaps %ymm0, 32(%rdi)
+; AVX1OR2-NEXT: leal 2147483647(%rax), %ecx
+; AVX1OR2-NEXT: testl %eax, %eax
+; AVX1OR2-NEXT: cmovnsl %eax, %ecx
+; AVX1OR2-NEXT: andl $-2147483648, %ecx # imm = 0x80000000
+; AVX1OR2-NEXT: addl %eax, %ecx
+; AVX1OR2-NEXT: # kill: def $eax killed $eax killed $rax
+; AVX1OR2-NEXT: xorl %edx, %edx
+; AVX1OR2-NEXT: divl %ecx
+; AVX1OR2-NEXT: vzeroupper
+; AVX1OR2-NEXT: retq
+;
+; AVX512-LABEL: PR44139:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vbroadcastsd (%rdi), %zmm0
+; AVX512-NEXT: movl (%rdi), %eax
+; AVX512-NEXT: vmovaps %zmm0, (%rdi)
+; AVX512-NEXT: vmovaps %zmm0, 64(%rdi)
+; AVX512-NEXT: leal 2147483647(%rax), %ecx
+; AVX512-NEXT: testl %eax, %eax
+; AVX512-NEXT: cmovnsl %eax, %ecx
+; AVX512-NEXT: andl $-2147483648, %ecx # imm = 0x80000000
+; AVX512-NEXT: addl %eax, %ecx
+; AVX512-NEXT: # kill: def $eax killed $eax killed $rax
+; AVX512-NEXT: xorl %edx, %edx
+; AVX512-NEXT: divl %ecx
+; AVX512-NEXT: vzeroupper
+; AVX512-NEXT: retq
;
; X86AVX2-LABEL: PR44139:
; X86AVX2: # %bb.0:
-; X86AVX2-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86AVX2-NEXT: movl (%eax), %eax
+; X86AVX2-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86AVX2-NEXT: movl (%ecx), %eax
+; X86AVX2-NEXT: vbroadcastsd (%ecx), %ymm0
+; X86AVX2-NEXT: vmovaps %ymm0, 64(%ecx)
+; X86AVX2-NEXT: vmovaps %ymm0, 96(%ecx)
+; X86AVX2-NEXT: vmovaps %ymm0, (%ecx)
+; X86AVX2-NEXT: vmovaps %ymm0, 32(%ecx)
; X86AVX2-NEXT: leal 2147483647(%eax), %ecx
; X86AVX2-NEXT: testl %eax, %eax
; X86AVX2-NEXT: cmovnsl %eax, %ecx
; X86AVX2-NEXT: addl %eax, %ecx
; X86AVX2-NEXT: xorl %edx, %edx
; X86AVX2-NEXT: divl %ecx
+; X86AVX2-NEXT: vzeroupper
; X86AVX2-NEXT: retl
%L = load <16 x i64>, ptr %p
%E1 = extractelement <16 x i64> %L, i64 0
define <8 x i16> @var_shuffle_v8i16(<8 x i16> %v, <8 x i16> %indices) nounwind {
; SSE3-LABEL: var_shuffle_v8i16:
; SSE3: # %bb.0:
-; SSE3-NEXT: movd %xmm1, %eax
+; SSE3-NEXT: pextrw $0, %xmm1, %eax
; SSE3-NEXT: pextrw $1, %xmm1, %ecx
; SSE3-NEXT: pextrw $2, %xmm1, %edx
; SSE3-NEXT: pextrw $3, %xmm1, %esi
; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm2
; AVX512F-NEXT: vextracti32x4 $2, %zmm1, %xmm3
; AVX512F-NEXT: vextracti32x4 $3, %zmm1, %xmm4
-; AVX512F-NEXT: vmovd %xmm4, %eax
+; AVX512F-NEXT: vpextrw $0, %xmm4, %eax
; AVX512F-NEXT: vmovaps %zmm0, (%rsp)
; AVX512F-NEXT: andl $31, %eax
; AVX512F-NEXT: movzwl (%rsp,%rax,2), %eax
; AVX512F-NEXT: vpextrw $7, %xmm4, %eax
; AVX512F-NEXT: andl $31, %eax
; AVX512F-NEXT: vpinsrw $7, (%rsp,%rax,2), %xmm0, %xmm0
-; AVX512F-NEXT: vmovd %xmm3, %eax
+; AVX512F-NEXT: vpextrw $0, %xmm3, %eax
; AVX512F-NEXT: andl $31, %eax
; AVX512F-NEXT: movzwl (%rsp,%rax,2), %eax
; AVX512F-NEXT: vmovd %eax, %xmm4
; AVX512F-NEXT: vpextrw $7, %xmm3, %eax
; AVX512F-NEXT: andl $31, %eax
; AVX512F-NEXT: vpinsrw $7, (%rsp,%rax,2), %xmm4, %xmm3
-; AVX512F-NEXT: vmovd %xmm2, %eax
+; AVX512F-NEXT: vpextrw $0, %xmm2, %eax
; AVX512F-NEXT: andl $31, %eax
; AVX512F-NEXT: movzwl (%rsp,%rax,2), %eax
; AVX512F-NEXT: vmovd %eax, %xmm4
; AVX512F-NEXT: andl $31, %eax
; AVX512F-NEXT: movzwl (%rsp,%rax,2), %eax
; AVX512F-NEXT: vpinsrw $7, %eax, %xmm4, %xmm2
-; AVX512F-NEXT: vmovd %xmm1, %eax
+; AVX512F-NEXT: vpextrw $0, %xmm1, %eax
; AVX512F-NEXT: andl $31, %eax
; AVX512F-NEXT: movzwl (%rsp,%rax,2), %eax
; AVX512F-NEXT: vmovd %eax, %xmm4
; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm2
; AVX512F-NEXT: vextracti32x4 $2, %zmm1, %xmm3
; AVX512F-NEXT: vextracti32x4 $3, %zmm1, %xmm4
-; AVX512F-NEXT: vmovd %xmm4, %eax
+; AVX512F-NEXT: vpextrb $0, %xmm4, %eax
; AVX512F-NEXT: vmovaps %zmm0, (%rsp)
; AVX512F-NEXT: andl $63, %eax
; AVX512F-NEXT: movzbl (%rsp,%rax), %eax
; AVX512F-NEXT: vpextrb $15, %xmm4, %eax
; AVX512F-NEXT: andl $63, %eax
; AVX512F-NEXT: vpinsrb $15, (%rsp,%rax), %xmm0, %xmm0
-; AVX512F-NEXT: vmovd %xmm3, %eax
+; AVX512F-NEXT: vpextrb $0, %xmm3, %eax
; AVX512F-NEXT: andl $63, %eax
; AVX512F-NEXT: movzbl (%rsp,%rax), %eax
; AVX512F-NEXT: vmovd %eax, %xmm4
; AVX512F-NEXT: andl $63, %eax
; AVX512F-NEXT: movzbl (%rsp,%rax), %eax
; AVX512F-NEXT: vpinsrb $15, %eax, %xmm4, %xmm3
-; AVX512F-NEXT: vmovd %xmm2, %eax
+; AVX512F-NEXT: vpextrb $0, %xmm2, %eax
; AVX512F-NEXT: andl $63, %eax
; AVX512F-NEXT: movzbl (%rsp,%rax), %eax
; AVX512F-NEXT: vmovd %eax, %xmm4
; AVX512F-NEXT: andl $63, %eax
; AVX512F-NEXT: movzbl (%rsp,%rax), %eax
; AVX512F-NEXT: vpinsrb $15, %eax, %xmm4, %xmm2
-; AVX512F-NEXT: vmovd %xmm1, %eax
+; AVX512F-NEXT: vpextrb $0, %xmm1, %eax
; AVX512F-NEXT: andl $63, %eax
; AVX512F-NEXT: movzbl (%rsp,%rax), %eax
; AVX512F-NEXT: vmovd %eax, %xmm4
; AVX512BW-NEXT: vextracti128 $1, %ymm1, %xmm2
; AVX512BW-NEXT: vextracti32x4 $2, %zmm1, %xmm3
; AVX512BW-NEXT: vextracti32x4 $3, %zmm1, %xmm4
-; AVX512BW-NEXT: vmovd %xmm4, %eax
+; AVX512BW-NEXT: vpextrb $0, %xmm4, %eax
; AVX512BW-NEXT: vmovaps %zmm0, (%rsp)
; AVX512BW-NEXT: andl $63, %eax
; AVX512BW-NEXT: movzbl (%rsp,%rax), %eax
; AVX512BW-NEXT: vpextrb $15, %xmm4, %eax
; AVX512BW-NEXT: andl $63, %eax
; AVX512BW-NEXT: vpinsrb $15, (%rsp,%rax), %xmm0, %xmm0
-; AVX512BW-NEXT: vmovd %xmm3, %eax
+; AVX512BW-NEXT: vpextrb $0, %xmm3, %eax
; AVX512BW-NEXT: andl $63, %eax
; AVX512BW-NEXT: movzbl (%rsp,%rax), %eax
; AVX512BW-NEXT: vmovd %eax, %xmm4
; AVX512BW-NEXT: andl $63, %eax
; AVX512BW-NEXT: movzbl (%rsp,%rax), %eax
; AVX512BW-NEXT: vpinsrb $15, %eax, %xmm4, %xmm3
-; AVX512BW-NEXT: vmovd %xmm2, %eax
+; AVX512BW-NEXT: vpextrb $0, %xmm2, %eax
; AVX512BW-NEXT: andl $63, %eax
; AVX512BW-NEXT: movzbl (%rsp,%rax), %eax
; AVX512BW-NEXT: vmovd %eax, %xmm4
; AVX512BW-NEXT: andl $63, %eax
; AVX512BW-NEXT: movzbl (%rsp,%rax), %eax
; AVX512BW-NEXT: vpinsrb $15, %eax, %xmm4, %xmm2
-; AVX512BW-NEXT: vmovd %xmm1, %eax
+; AVX512BW-NEXT: vpextrb $0, %xmm1, %eax
; AVX512BW-NEXT: andl $63, %eax
; AVX512BW-NEXT: movzbl (%rsp,%rax), %eax
; AVX512BW-NEXT: vmovd %eax, %xmm4
; X32-LABEL: ossfuzz15662:
; X32: # %bb.0:
; X32-NEXT: xorps %xmm0, %xmm0
+; X32-NEXT: movaps %xmm0, (%eax)
+; X32-NEXT: xorps %xmm0, %xmm0
; X32-NEXT: retl
;
; X64-LABEL: ossfuzz15662:
; X64: # %bb.0:
; X64-NEXT: xorps %xmm0, %xmm0
+; X64-NEXT: movaps %xmm0, (%rax)
+; X64-NEXT: xorps %xmm0, %xmm0
; X64-NEXT: retq
%C10 = icmp ule i1 false, false
%C3 = icmp ule i1 true, undef