// We limit this transform to power-of-2 types because we expect that the
// backend can convert the simplified IR patterns to identical nodes as the
// original IR.
- // TODO: If we can verify that behavior for arbitrary types, the power-of-2
- // checks can be removed.
+ // TODO: If we can verify the same behavior for arbitrary types, the
+ // power-of-2 checks can be removed.
Value *X = Shuffle0->getOperand(0);
Value *Y = Shuffle1->getOperand(0);
if (X->getType() != Y->getType() ||
for (int i = 0, e = Mask.size(); i != e; ++i) {
if (Mask[i] == -1)
continue;
- if (Mask[i] < WideElts)
+
+ // If this shuffle is choosing an undef element from 1 of the sources, that
+ // element is undef.
+ if (Mask[i] < WideElts) {
+ if (Shuffle0->getMaskValue(Mask[i]) == -1)
+ continue;
+ } else {
+ if (Shuffle1->getMaskValue(Mask[i] - WideElts) == -1)
+ continue;
+ }
+
+ // If this shuffle is choosing from the 1st narrow op, the mask element is
+ // the same. If this shuffle is choosing from the 2nd narrow op, the mask
+ // element is offset down to adjust for the narrow vector widths.
+ if (Mask[i] < WideElts) {
+ assert(Mask[i] < NarrowElts && "Unexpected shuffle mask");
NewMask[i] = ConstantInt::get(I32Ty, Mask[i]);
- else
+ } else {
+ assert(Mask[i] < (WideElts + NarrowElts) && "Unexpected shuffle mask");
NewMask[i] = ConstantInt::get(I32Ty, Mask[i] - (WideElts - NarrowElts));
+ }
}
return new ShuffleVectorInst(X, Y, ConstantVector::get(NewMask));
}
%s3 = shufflevector <4 x double> %s2, <4 x double> %s1, <4 x i32> <i32 0, i32 5, i32 undef, i32 undef>
ret <4 x double> %s3
}
+
+; Demanded vector elements may not be able to simplify a shuffle mask
+; before we try to narrow it. This used to crash.
+
+define <4 x float> @insert_subvector_crash_invalid_mask_elt(<2 x float> %x, <4 x float>* %p) {
+; CHECK-LABEL: @insert_subvector_crash_invalid_mask_elt(
+; CHECK-NEXT: [[WIDEN:%.*]] = shufflevector <2 x float> [[X:%.*]], <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
+; CHECK-NEXT: [[I:%.*]] = shufflevector <2 x float> [[X]], <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
+; CHECK-NEXT: store <4 x float> [[I]], <4 x float>* [[P:%.*]], align 16
+; CHECK-NEXT: ret <4 x float> [[WIDEN]]
+;
+ %widen = shufflevector <2 x float> %x, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
+ %ext2 = extractelement <2 x float> %x, i32 0
+ %I = insertelement <4 x float> %widen, float %ext2, i16 0
+ store <4 x float> %I, <4 x float>* %p
+ ret <4 x float> %widen
+}