We already perform some basic folds (add/sub with zero etc.) on scalar types, this patch adds some basic support for constant splats as well in a few cases (we can add more with future test coverage).
In the cases I've enabled, we can handle buildvector implicit truncation as we're not creating new constant nodes from the vector types - we're just returning existing nodes. This allows us to get a number of extra cases in the aarch64 tests.
I haven't enabled support for undefs in buildvector splats, as we're often checking for zero/allones patterns that return the original constant and we shouldn't be returning undef elements in some of these cases - we can enable this later if we're OK with creating new constants.
Differential Revision: https://reviews.llvm.org/D118264
std::swap(N1, N2);
}
- ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
- ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2);
+ auto *N1C = dyn_cast<ConstantSDNode>(N1);
+ auto *N2C = dyn_cast<ConstantSDNode>(N2);
+
+ // Don't allow undefs in vector splats - we might be returning N2 when folding
+ // to zero etc.
+ ConstantSDNode *N2CV =
+ isConstOrConstSplat(N2, /*AllowUndefs*/ false, /*AllowTruncation*/ true);
switch (Opcode) {
default: break;
N1.getValueType() == VT && "Binary operator types must match!");
// (X & 0) -> 0. This commonly occurs when legalizing i64 values, so it's
// worth handling here.
- if (N2C && N2C->isZero())
+ if (N2CV && N2CV->isZero())
return N2;
- if (N2C && N2C->isAllOnes()) // X & -1 -> X
+ if (N2CV && N2CV->isAllOnes()) // X & -1 -> X
return N1;
break;
case ISD::OR:
N1.getValueType() == VT && "Binary operator types must match!");
// (X ^|+- 0) -> X. This commonly occurs when legalizing i64 values, so
// it's worth handling here.
- if (N2C && N2C->isZero())
+ if (N2CV && N2CV->isZero())
return N1;
if ((Opcode == ISD::ADD || Opcode == ISD::SUB) && VT.isVector() &&
VT.getVectorElementType() == MVT::i1)
// size of the value, the shift/rotate count is guaranteed to be zero.
if (VT == MVT::i1)
return N1;
- if (N2C && N2C->isZero())
+ if (N2CV && N2CV->isZero())
return N1;
break;
case ISD::FP_ROUND:
; CHECK: // %bb.0:
; CHECK-NEXT: movi d1, #0x00000000ff00ff
; CHECK-NEXT: and v0.8b, v0.8b, v1.8b
-; CHECK-NEXT: orr v0.2s, #0
; CHECK-NEXT: ret
%b = select <8 x i1> <i1 true, i1 false, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false>, <8 x i8> %a, <8 x i8> zeroinitializer
ret <8 x i8> %b
; CHECK: // %bb.0:
; CHECK-NEXT: movi d1, #0xffff00000000ffff
; CHECK-NEXT: and v0.8b, v0.8b, v1.8b
-; CHECK-NEXT: orr v0.2s, #0
; CHECK-NEXT: ret
%b = select <4 x i1> <i1 true, i1 false, i1 false, i1 true>, <4 x i16> %a, <4 x i16> zeroinitializer
ret <4 x i16> %b
; CHECK-NEXT: adrp x8, .LCPI85_0
; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI85_0]
; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
-; CHECK-NEXT: orr v0.4s, #0
; CHECK-NEXT: ret
%b = select <4 x i1> <i1 true, i1 false, i1 false, i1 true>, <4 x i32> %a, <4 x i32> zeroinitializer
ret <4 x i32> %b
define void @function() {
; CHECK: cmp r0, #0
; CHECK: bxne lr
-; CHECK: vmov.i32 q8, #0xff0000
entry:
br i1 undef, label %vector.body, label %for.end
-; CHECK: vld1.32 {d18, d19}, [r0]
-; CHECK: vand q10, q9, q8
-; CHECK: vbic.i16 q9, #0xff
-; CHECK: vorr q9, q9, q10
-; CHECK: vst1.32 {d18, d19}, [r0]
+; CHECK: vld1.32 {d16, d17}, [r0]
+; CHECK: vbic.i32 q8, #0xff
+; CHECK: vorr q8, q8, q9
+; CHECK: vst1.32 {d16, d17}, [r0]
vector.body:
%wide.load = load <4 x i32>, <4 x i32>* undef, align 4
%0 = and <4 x i32> %wide.load, <i32 -16711936, i32 -16711936, i32 -16711936, i32 -16711936>
; AVX-LABEL: smulo_v4i1:
; AVX: # %bb.0:
; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0
-; AVX-NEXT: vpslld $31, %xmm0, %xmm0
-; AVX-NEXT: vmovmskps %xmm0, %eax
-; AVX-NEXT: vpsrad $31, %xmm0, %xmm0
+; AVX-NEXT: vpslld $31, %xmm0, %xmm1
+; AVX-NEXT: vpsrad $31, %xmm1, %xmm0
+; AVX-NEXT: vmovmskps %xmm1, %eax
; AVX-NEXT: movb %al, (%rdi)
; AVX-NEXT: retq
;