{ ISD::CTTZ, MVT::v64i8, 9 },
{ ISD::SADDSAT, MVT::v32i16, 1 },
{ ISD::SADDSAT, MVT::v64i8, 1 },
+ { ISD::SMAX, MVT::v32i16, 1 },
+ { ISD::SMAX, MVT::v64i8, 1 },
+ { ISD::SMIN, MVT::v32i16, 1 },
+ { ISD::SMIN, MVT::v64i8, 1 },
{ ISD::SSUBSAT, MVT::v32i16, 1 },
{ ISD::SSUBSAT, MVT::v64i8, 1 },
{ ISD::UADDSAT, MVT::v32i16, 1 },
{ ISD::UADDSAT, MVT::v64i8, 1 },
+ { ISD::UMAX, MVT::v32i16, 1 },
+ { ISD::UMAX, MVT::v64i8, 1 },
+ { ISD::UMIN, MVT::v32i16, 1 },
+ { ISD::UMIN, MVT::v64i8, 1 },
{ ISD::USUBSAT, MVT::v32i16, 1 },
{ ISD::USUBSAT, MVT::v64i8, 1 },
};
{ ISD::CTTZ, MVT::v16i32, 28 },
{ ISD::CTTZ, MVT::v32i16, 24 },
{ ISD::CTTZ, MVT::v64i8, 18 },
+ { ISD::SMAX, MVT::v8i64, 1 },
+ { ISD::SMAX, MVT::v16i32, 1 },
+ { ISD::SMAX, MVT::v32i16, 2 }, // FIXME: include split
+ { ISD::SMAX, MVT::v64i8, 2 }, // FIXME: include split
+ { ISD::SMAX, MVT::v4i64, 1 },
+ { ISD::SMAX, MVT::v2i64, 1 },
+ { ISD::SMIN, MVT::v8i64, 1 },
+ { ISD::SMIN, MVT::v16i32, 1 },
+ { ISD::SMIN, MVT::v32i16, 2 }, // FIXME: include split
+ { ISD::SMIN, MVT::v64i8, 2 }, // FIXME: include split
+ { ISD::SMIN, MVT::v4i64, 1 },
+ { ISD::SMIN, MVT::v2i64, 1 },
+ { ISD::UMAX, MVT::v8i64, 1 },
+ { ISD::UMAX, MVT::v16i32, 1 },
+ { ISD::UMAX, MVT::v32i16, 2 }, // FIXME: include split
+ { ISD::UMAX, MVT::v64i8, 2 }, // FIXME: include split
+ { ISD::UMAX, MVT::v4i64, 1 },
+ { ISD::UMAX, MVT::v2i64, 1 },
+ { ISD::UMIN, MVT::v8i64, 1 },
+ { ISD::UMIN, MVT::v16i32, 1 },
+ { ISD::UMIN, MVT::v32i16, 2 }, // FIXME: include split
+ { ISD::UMIN, MVT::v64i8, 2 }, // FIXME: include split
+ { ISD::UMIN, MVT::v4i64, 1 },
+ { ISD::UMIN, MVT::v2i64, 1 },
{ ISD::USUBSAT, MVT::v16i32, 2 }, // pmaxud + psubd
{ ISD::USUBSAT, MVT::v2i64, 2 }, // pmaxuq + psubq
{ ISD::USUBSAT, MVT::v4i64, 2 }, // pmaxuq + psubq
{ ISD::CTTZ, MVT::v32i8, 9 },
{ ISD::SADDSAT, MVT::v16i16, 1 },
{ ISD::SADDSAT, MVT::v32i8, 1 },
+ { ISD::SMAX, MVT::v8i32, 1 },
+ { ISD::SMAX, MVT::v16i16, 1 },
+ { ISD::SMAX, MVT::v32i8, 1 },
+ { ISD::SMIN, MVT::v8i32, 1 },
+ { ISD::SMIN, MVT::v16i16, 1 },
+ { ISD::SMIN, MVT::v32i8, 1 },
{ ISD::SSUBSAT, MVT::v16i16, 1 },
{ ISD::SSUBSAT, MVT::v32i8, 1 },
{ ISD::UADDSAT, MVT::v16i16, 1 },
{ ISD::UADDSAT, MVT::v32i8, 1 },
{ ISD::UADDSAT, MVT::v8i32, 3 }, // not + pminud + paddd
+ { ISD::UMAX, MVT::v8i32, 1 },
+ { ISD::UMAX, MVT::v16i16, 1 },
+ { ISD::UMAX, MVT::v32i8, 1 },
+ { ISD::UMIN, MVT::v8i32, 1 },
+ { ISD::UMIN, MVT::v16i16, 1 },
+ { ISD::UMIN, MVT::v32i8, 1 },
{ ISD::USUBSAT, MVT::v16i16, 1 },
{ ISD::USUBSAT, MVT::v32i8, 1 },
{ ISD::USUBSAT, MVT::v8i32, 2 }, // pmaxud + psubd
{ ISD::CTTZ, MVT::v32i8, 20 }, // 2 x 128-bit Op + extract/insert
{ ISD::SADDSAT, MVT::v16i16, 4 }, // 2 x 128-bit Op + extract/insert
{ ISD::SADDSAT, MVT::v32i8, 4 }, // 2 x 128-bit Op + extract/insert
+ { ISD::SMAX, MVT::v8i32, 4 }, // 2 x 128-bit Op + extract/insert
+ { ISD::SMAX, MVT::v16i16, 4 }, // 2 x 128-bit Op + extract/insert
+ { ISD::SMAX, MVT::v32i8, 4 }, // 2 x 128-bit Op + extract/insert
+ { ISD::SMIN, MVT::v8i32, 4 }, // 2 x 128-bit Op + extract/insert
+ { ISD::SMIN, MVT::v16i16, 4 }, // 2 x 128-bit Op + extract/insert
+ { ISD::SMIN, MVT::v32i8, 4 }, // 2 x 128-bit Op + extract/insert
{ ISD::SSUBSAT, MVT::v16i16, 4 }, // 2 x 128-bit Op + extract/insert
{ ISD::SSUBSAT, MVT::v32i8, 4 }, // 2 x 128-bit Op + extract/insert
{ ISD::UADDSAT, MVT::v16i16, 4 }, // 2 x 128-bit Op + extract/insert
{ ISD::UADDSAT, MVT::v32i8, 4 }, // 2 x 128-bit Op + extract/insert
{ ISD::UADDSAT, MVT::v8i32, 8 }, // 2 x 128-bit Op + extract/insert
+ { ISD::UMAX, MVT::v8i32, 4 }, // 2 x 128-bit Op + extract/insert
+ { ISD::UMAX, MVT::v16i16, 4 }, // 2 x 128-bit Op + extract/insert
+ { ISD::UMAX, MVT::v32i8, 4 }, // 2 x 128-bit Op + extract/insert
+ { ISD::UMIN, MVT::v8i32, 4 }, // 2 x 128-bit Op + extract/insert
+ { ISD::UMIN, MVT::v16i16, 4 }, // 2 x 128-bit Op + extract/insert
+ { ISD::UMIN, MVT::v32i8, 4 }, // 2 x 128-bit Op + extract/insert
{ ISD::USUBSAT, MVT::v16i16, 4 }, // 2 x 128-bit Op + extract/insert
{ ISD::USUBSAT, MVT::v32i8, 4 }, // 2 x 128-bit Op + extract/insert
{ ISD::USUBSAT, MVT::v8i32, 6 }, // 2 x 128-bit Op + extract/insert
{ ISD::FSQRT, MVT::f32, 18 }, // Nehalem from http://www.agner.org/
{ ISD::FSQRT, MVT::v4f32, 18 }, // Nehalem from http://www.agner.org/
};
+ static const CostTblEntry SSE41CostTbl[] = {
+ { ISD::SMAX, MVT::v4i32, 1 },
+ { ISD::SMAX, MVT::v16i8, 1 },
+ { ISD::SMIN, MVT::v4i32, 1 },
+ { ISD::SMIN, MVT::v16i8, 1 },
+ { ISD::UMAX, MVT::v4i32, 1 },
+ { ISD::UMAX, MVT::v8i16, 1 },
+ { ISD::UMIN, MVT::v4i32, 1 },
+ { ISD::UMIN, MVT::v8i16, 1 },
+ };
static const CostTblEntry SSSE3CostTbl[] = {
{ ISD::ABS, MVT::v4i32, 1 },
{ ISD::ABS, MVT::v8i16, 1 },
{ ISD::CTTZ, MVT::v16i8, 13 },
{ ISD::SADDSAT, MVT::v8i16, 1 },
{ ISD::SADDSAT, MVT::v16i8, 1 },
+ { ISD::SMAX, MVT::v8i16, 1 },
+ { ISD::SMIN, MVT::v8i16, 1 },
{ ISD::SSUBSAT, MVT::v8i16, 1 },
{ ISD::SSUBSAT, MVT::v16i8, 1 },
{ ISD::UADDSAT, MVT::v8i16, 1 },
{ ISD::UADDSAT, MVT::v16i8, 1 },
+ { ISD::UMAX, MVT::v16i8, 1 },
+ { ISD::UMIN, MVT::v16i8, 1 },
{ ISD::USUBSAT, MVT::v8i16, 1 },
{ ISD::USUBSAT, MVT::v16i8, 1 },
{ ISD::FMAXNUM, MVT::f64, 4 },
case Intrinsic::sadd_sat:
ISD = ISD::SADDSAT;
break;
+ case Intrinsic::smax:
+ ISD = ISD::SMAX;
+ break;
+ case Intrinsic::smin:
+ ISD = ISD::SMIN;
+ break;
case Intrinsic::ssub_sat:
ISD = ISD::SSUBSAT;
break;
case Intrinsic::uadd_sat:
ISD = ISD::UADDSAT;
break;
+ case Intrinsic::umax:
+ ISD = ISD::UMAX;
+ break;
+ case Intrinsic::umin:
+ ISD = ISD::UMIN;
+ break;
case Intrinsic::usub_sat:
ISD = ISD::USUBSAT;
break;
if (const auto *Entry = CostTableLookup(SSE42CostTbl, ISD, MTy))
return LT.first * Entry->Cost;
+ if (ST->hasSSE41())
+ if (const auto *Entry = CostTableLookup(SSE41CostTbl, ISD, MTy))
+ return LT.first * Entry->Cost;
+
if (ST->hasSSSE3())
if (const auto *Entry = CostTableLookup(SSSE3CostTbl, ISD, MTy))
return LT.first * Entry->Cost;
; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I32 = call <8 x i32> @llvm.smax.v8i32(<8 x i32> undef, <8 x i32> undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16I32 = call <16 x i32> @llvm.smax.v16i32(<16 x i32> undef, <16 x i32> undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.smax.i16(i16 undef, i16 undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.smax.v8i16(<8 x i16> undef, <8 x i16> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I16 = call <16 x i16> @llvm.smax.v16i16(<16 x i16> undef, <16 x i16> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32I16 = call <32 x i16> @llvm.smax.v32i16(<32 x i16> undef, <32 x i16> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.smax.v8i16(<8 x i16> undef, <8 x i16> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = call <16 x i16> @llvm.smax.v16i16(<16 x i16> undef, <16 x i16> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = call <32 x i16> @llvm.smax.v32i16(<32 x i16> undef, <32 x i16> undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.smax.i8(i8 undef, i8 undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I8 = call <16 x i8> @llvm.smax.v16i8(<16 x i8> undef, <16 x i8> undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I8 = call <32 x i8> @llvm.smax.v32i8(<32 x i8> undef, <32 x i8> undef)
; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I32 = call <8 x i32> @llvm.smax.v8i32(<8 x i32> undef, <8 x i32> undef)
; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16I32 = call <16 x i32> @llvm.smax.v16i32(<16 x i32> undef, <16 x i32> undef)
; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.smax.i16(i16 undef, i16 undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.smax.v8i16(<8 x i16> undef, <8 x i16> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I16 = call <16 x i16> @llvm.smax.v16i16(<16 x i16> undef, <16 x i16> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32I16 = call <32 x i16> @llvm.smax.v32i16(<32 x i16> undef, <32 x i16> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.smax.v8i16(<8 x i16> undef, <8 x i16> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = call <16 x i16> @llvm.smax.v16i16(<16 x i16> undef, <16 x i16> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = call <32 x i16> @llvm.smax.v32i16(<32 x i16> undef, <32 x i16> undef)
; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.smax.i8(i8 undef, i8 undef)
; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I8 = call <16 x i8> @llvm.smax.v16i8(<16 x i8> undef, <16 x i8> undef)
; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I8 = call <32 x i8> @llvm.smax.v32i8(<32 x i8> undef, <32 x i8> undef)
; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I64 = call <4 x i64> @llvm.smax.v4i64(<4 x i64> undef, <4 x i64> undef)
; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I64 = call <8 x i64> @llvm.smax.v8i64(<8 x i64> undef, <8 x i64> undef)
; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call i32 @llvm.smax.i32(i32 undef, i32 undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call <4 x i32> @llvm.smax.v4i32(<4 x i32> undef, <4 x i32> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = call <8 x i32> @llvm.smax.v8i32(<8 x i32> undef, <8 x i32> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I32 = call <16 x i32> @llvm.smax.v16i32(<16 x i32> undef, <16 x i32> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <4 x i32> @llvm.smax.v4i32(<4 x i32> undef, <4 x i32> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = call <8 x i32> @llvm.smax.v8i32(<8 x i32> undef, <8 x i32> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = call <16 x i32> @llvm.smax.v16i32(<16 x i32> undef, <16 x i32> undef)
; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.smax.i16(i16 undef, i16 undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = call <8 x i16> @llvm.smax.v8i16(<8 x i16> undef, <8 x i16> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = call <16 x i16> @llvm.smax.v16i16(<16 x i16> undef, <16 x i16> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = call <32 x i16> @llvm.smax.v32i16(<32 x i16> undef, <32 x i16> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.smax.v8i16(<8 x i16> undef, <8 x i16> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = call <16 x i16> @llvm.smax.v16i16(<16 x i16> undef, <16 x i16> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = call <32 x i16> @llvm.smax.v32i16(<32 x i16> undef, <32 x i16> undef)
; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.smax.i8(i8 undef, i8 undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = call <16 x i8> @llvm.smax.v16i8(<16 x i8> undef, <16 x i8> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = call <32 x i8> @llvm.smax.v32i8(<32 x i8> undef, <32 x i8> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = call <64 x i8> @llvm.smax.v64i8(<64 x i8> undef, <64 x i8> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.smax.v16i8(<16 x i8> undef, <16 x i8> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = call <32 x i8> @llvm.smax.v32i8(<32 x i8> undef, <32 x i8> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = call <64 x i8> @llvm.smax.v64i8(<64 x i8> undef, <64 x i8> undef)
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX1-LABEL: 'smax'
; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4I64 = call <4 x i64> @llvm.smax.v4i64(<4 x i64> undef, <4 x i64> undef)
; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8I64 = call <8 x i64> @llvm.smax.v8i64(<8 x i64> undef, <8 x i64> undef)
; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call i32 @llvm.smax.i32(i32 undef, i32 undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call <4 x i32> @llvm.smax.v4i32(<4 x i32> undef, <4 x i32> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8I32 = call <8 x i32> @llvm.smax.v8i32(<8 x i32> undef, <8 x i32> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16I32 = call <16 x i32> @llvm.smax.v16i32(<16 x i32> undef, <16 x i32> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <4 x i32> @llvm.smax.v4i32(<4 x i32> undef, <4 x i32> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = call <8 x i32> @llvm.smax.v8i32(<8 x i32> undef, <8 x i32> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I32 = call <16 x i32> @llvm.smax.v16i32(<16 x i32> undef, <16 x i32> undef)
; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.smax.i16(i16 undef, i16 undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = call <8 x i16> @llvm.smax.v8i16(<8 x i16> undef, <8 x i16> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I16 = call <16 x i16> @llvm.smax.v16i16(<16 x i16> undef, <16 x i16> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32I16 = call <32 x i16> @llvm.smax.v32i16(<32 x i16> undef, <32 x i16> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.smax.v8i16(<8 x i16> undef, <8 x i16> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = call <16 x i16> @llvm.smax.v16i16(<16 x i16> undef, <16 x i16> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = call <32 x i16> @llvm.smax.v32i16(<32 x i16> undef, <32 x i16> undef)
; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.smax.i8(i8 undef, i8 undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = call <16 x i8> @llvm.smax.v16i8(<16 x i8> undef, <16 x i8> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32I8 = call <32 x i8> @llvm.smax.v32i8(<32 x i8> undef, <32 x i8> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V64I8 = call <64 x i8> @llvm.smax.v64i8(<64 x i8> undef, <64 x i8> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.smax.v16i8(<16 x i8> undef, <16 x i8> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = call <32 x i8> @llvm.smax.v32i8(<32 x i8> undef, <32 x i8> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = call <64 x i8> @llvm.smax.v64i8(<64 x i8> undef, <64 x i8> undef)
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX2-LABEL: 'smax'
; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I64 = call <4 x i64> @llvm.smax.v4i64(<4 x i64> undef, <4 x i64> undef)
; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I64 = call <8 x i64> @llvm.smax.v8i64(<8 x i64> undef, <8 x i64> undef)
; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call i32 @llvm.smax.i32(i32 undef, i32 undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call <4 x i32> @llvm.smax.v4i32(<4 x i32> undef, <4 x i32> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = call <8 x i32> @llvm.smax.v8i32(<8 x i32> undef, <8 x i32> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = call <16 x i32> @llvm.smax.v16i32(<16 x i32> undef, <16 x i32> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <4 x i32> @llvm.smax.v4i32(<4 x i32> undef, <4 x i32> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <8 x i32> @llvm.smax.v8i32(<8 x i32> undef, <8 x i32> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I32 = call <16 x i32> @llvm.smax.v16i32(<16 x i32> undef, <16 x i32> undef)
; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.smax.i16(i16 undef, i16 undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = call <8 x i16> @llvm.smax.v8i16(<8 x i16> undef, <8 x i16> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = call <16 x i16> @llvm.smax.v16i16(<16 x i16> undef, <16 x i16> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = call <32 x i16> @llvm.smax.v32i16(<32 x i16> undef, <32 x i16> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.smax.v8i16(<8 x i16> undef, <8 x i16> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.smax.v16i16(<16 x i16> undef, <16 x i16> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I16 = call <32 x i16> @llvm.smax.v32i16(<32 x i16> undef, <32 x i16> undef)
; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.smax.i8(i8 undef, i8 undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = call <16 x i8> @llvm.smax.v16i8(<16 x i8> undef, <16 x i8> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = call <32 x i8> @llvm.smax.v32i8(<32 x i8> undef, <32 x i8> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = call <64 x i8> @llvm.smax.v64i8(<64 x i8> undef, <64 x i8> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.smax.v16i8(<16 x i8> undef, <16 x i8> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = call <32 x i8> @llvm.smax.v32i8(<32 x i8> undef, <32 x i8> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64I8 = call <64 x i8> @llvm.smax.v64i8(<64 x i8> undef, <64 x i8> undef)
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512F-LABEL: 'smax'
; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = call i64 @llvm.smax.i64(i64 undef, i64 undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call <2 x i64> @llvm.smax.v2i64(<2 x i64> undef, <2 x i64> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I64 = call <4 x i64> @llvm.smax.v4i64(<4 x i64> undef, <4 x i64> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I64 = call <8 x i64> @llvm.smax.v8i64(<8 x i64> undef, <8 x i64> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.smax.v2i64(<2 x i64> undef, <2 x i64> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.smax.v4i64(<4 x i64> undef, <4 x i64> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.smax.v8i64(<8 x i64> undef, <8 x i64> undef)
; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call i32 @llvm.smax.i32(i32 undef, i32 undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call <4 x i32> @llvm.smax.v4i32(<4 x i32> undef, <4 x i32> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = call <8 x i32> @llvm.smax.v8i32(<8 x i32> undef, <8 x i32> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I32 = call <16 x i32> @llvm.smax.v16i32(<16 x i32> undef, <16 x i32> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <4 x i32> @llvm.smax.v4i32(<4 x i32> undef, <4 x i32> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <8 x i32> @llvm.smax.v8i32(<8 x i32> undef, <8 x i32> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = call <16 x i32> @llvm.smax.v16i32(<16 x i32> undef, <16 x i32> undef)
; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.smax.i16(i16 undef, i16 undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = call <8 x i16> @llvm.smax.v8i16(<8 x i16> undef, <8 x i16> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = call <16 x i16> @llvm.smax.v16i16(<16 x i16> undef, <16 x i16> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = call <32 x i16> @llvm.smax.v32i16(<32 x i16> undef, <32 x i16> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.smax.v8i16(<8 x i16> undef, <8 x i16> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.smax.v16i16(<16 x i16> undef, <16 x i16> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I16 = call <32 x i16> @llvm.smax.v32i16(<32 x i16> undef, <32 x i16> undef)
; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.smax.i8(i8 undef, i8 undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = call <16 x i8> @llvm.smax.v16i8(<16 x i8> undef, <16 x i8> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = call <32 x i8> @llvm.smax.v32i8(<32 x i8> undef, <32 x i8> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = call <64 x i8> @llvm.smax.v64i8(<64 x i8> undef, <64 x i8> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.smax.v16i8(<16 x i8> undef, <16 x i8> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = call <32 x i8> @llvm.smax.v32i8(<32 x i8> undef, <32 x i8> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64I8 = call <64 x i8> @llvm.smax.v64i8(<64 x i8> undef, <64 x i8> undef)
; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512DQ-LABEL: 'smax'
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = call i64 @llvm.smax.i64(i64 undef, i64 undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call <2 x i64> @llvm.smax.v2i64(<2 x i64> undef, <2 x i64> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I64 = call <4 x i64> @llvm.smax.v4i64(<4 x i64> undef, <4 x i64> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I64 = call <8 x i64> @llvm.smax.v8i64(<8 x i64> undef, <8 x i64> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.smax.v2i64(<2 x i64> undef, <2 x i64> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.smax.v4i64(<4 x i64> undef, <4 x i64> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.smax.v8i64(<8 x i64> undef, <8 x i64> undef)
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call i32 @llvm.smax.i32(i32 undef, i32 undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call <4 x i32> @llvm.smax.v4i32(<4 x i32> undef, <4 x i32> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = call <8 x i32> @llvm.smax.v8i32(<8 x i32> undef, <8 x i32> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I32 = call <16 x i32> @llvm.smax.v16i32(<16 x i32> undef, <16 x i32> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <4 x i32> @llvm.smax.v4i32(<4 x i32> undef, <4 x i32> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <8 x i32> @llvm.smax.v8i32(<8 x i32> undef, <8 x i32> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = call <16 x i32> @llvm.smax.v16i32(<16 x i32> undef, <16 x i32> undef)
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.smax.i16(i16 undef, i16 undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = call <8 x i16> @llvm.smax.v8i16(<8 x i16> undef, <8 x i16> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = call <16 x i16> @llvm.smax.v16i16(<16 x i16> undef, <16 x i16> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = call <32 x i16> @llvm.smax.v32i16(<32 x i16> undef, <32 x i16> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.smax.v8i16(<8 x i16> undef, <8 x i16> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.smax.v16i16(<16 x i16> undef, <16 x i16> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I16 = call <32 x i16> @llvm.smax.v32i16(<32 x i16> undef, <32 x i16> undef)
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.smax.i8(i8 undef, i8 undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = call <16 x i8> @llvm.smax.v16i8(<16 x i8> undef, <16 x i8> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = call <32 x i8> @llvm.smax.v32i8(<32 x i8> undef, <32 x i8> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = call <64 x i8> @llvm.smax.v64i8(<64 x i8> undef, <64 x i8> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.smax.v16i8(<16 x i8> undef, <16 x i8> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = call <32 x i8> @llvm.smax.v32i8(<32 x i8> undef, <32 x i8> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64I8 = call <64 x i8> @llvm.smax.v64i8(<64 x i8> undef, <64 x i8> undef)
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512BW-LABEL: 'smax'
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = call i64 @llvm.smax.i64(i64 undef, i64 undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call <2 x i64> @llvm.smax.v2i64(<2 x i64> undef, <2 x i64> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I64 = call <4 x i64> @llvm.smax.v4i64(<4 x i64> undef, <4 x i64> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I64 = call <8 x i64> @llvm.smax.v8i64(<8 x i64> undef, <8 x i64> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.smax.v2i64(<2 x i64> undef, <2 x i64> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.smax.v4i64(<4 x i64> undef, <4 x i64> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.smax.v8i64(<8 x i64> undef, <8 x i64> undef)
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call i32 @llvm.smax.i32(i32 undef, i32 undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call <4 x i32> @llvm.smax.v4i32(<4 x i32> undef, <4 x i32> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = call <8 x i32> @llvm.smax.v8i32(<8 x i32> undef, <8 x i32> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I32 = call <16 x i32> @llvm.smax.v16i32(<16 x i32> undef, <16 x i32> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <4 x i32> @llvm.smax.v4i32(<4 x i32> undef, <4 x i32> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <8 x i32> @llvm.smax.v8i32(<8 x i32> undef, <8 x i32> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = call <16 x i32> @llvm.smax.v16i32(<16 x i32> undef, <16 x i32> undef)
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.smax.i16(i16 undef, i16 undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = call <8 x i16> @llvm.smax.v8i16(<8 x i16> undef, <8 x i16> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = call <16 x i16> @llvm.smax.v16i16(<16 x i16> undef, <16 x i16> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I16 = call <32 x i16> @llvm.smax.v32i16(<32 x i16> undef, <32 x i16> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.smax.v8i16(<8 x i16> undef, <8 x i16> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.smax.v16i16(<16 x i16> undef, <16 x i16> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.smax.v32i16(<32 x i16> undef, <32 x i16> undef)
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.smax.i8(i8 undef, i8 undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = call <16 x i8> @llvm.smax.v16i8(<16 x i8> undef, <16 x i8> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = call <32 x i8> @llvm.smax.v32i8(<32 x i8> undef, <32 x i8> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64I8 = call <64 x i8> @llvm.smax.v64i8(<64 x i8> undef, <64 x i8> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.smax.v16i8(<16 x i8> undef, <16 x i8> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = call <32 x i8> @llvm.smax.v32i8(<32 x i8> undef, <32 x i8> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64I8 = call <64 x i8> @llvm.smax.v64i8(<64 x i8> undef, <64 x i8> undef)
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
%I64 = call i64 @llvm.smax.i64(i64 undef, i64 undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I32 = call <8 x i32> @llvm.smin.v8i32(<8 x i32> undef, <8 x i32> undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16I32 = call <16 x i32> @llvm.smin.v16i32(<16 x i32> undef, <16 x i32> undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.smin.i16(i16 undef, i16 undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.smin.v8i16(<8 x i16> undef, <8 x i16> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I16 = call <16 x i16> @llvm.smin.v16i16(<16 x i16> undef, <16 x i16> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32I16 = call <32 x i16> @llvm.smin.v32i16(<32 x i16> undef, <32 x i16> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.smin.v8i16(<8 x i16> undef, <8 x i16> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = call <16 x i16> @llvm.smin.v16i16(<16 x i16> undef, <16 x i16> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = call <32 x i16> @llvm.smin.v32i16(<32 x i16> undef, <32 x i16> undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.smin.i8(i8 undef, i8 undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I8 = call <16 x i8> @llvm.smin.v16i8(<16 x i8> undef, <16 x i8> undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I8 = call <32 x i8> @llvm.smin.v32i8(<32 x i8> undef, <32 x i8> undef)
; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I32 = call <8 x i32> @llvm.smin.v8i32(<8 x i32> undef, <8 x i32> undef)
; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16I32 = call <16 x i32> @llvm.smin.v16i32(<16 x i32> undef, <16 x i32> undef)
; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.smin.i16(i16 undef, i16 undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.smin.v8i16(<8 x i16> undef, <8 x i16> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I16 = call <16 x i16> @llvm.smin.v16i16(<16 x i16> undef, <16 x i16> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32I16 = call <32 x i16> @llvm.smin.v32i16(<32 x i16> undef, <32 x i16> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.smin.v8i16(<8 x i16> undef, <8 x i16> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = call <16 x i16> @llvm.smin.v16i16(<16 x i16> undef, <16 x i16> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = call <32 x i16> @llvm.smin.v32i16(<32 x i16> undef, <32 x i16> undef)
; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.smin.i8(i8 undef, i8 undef)
; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I8 = call <16 x i8> @llvm.smin.v16i8(<16 x i8> undef, <16 x i8> undef)
; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I8 = call <32 x i8> @llvm.smin.v32i8(<32 x i8> undef, <32 x i8> undef)
; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I64 = call <4 x i64> @llvm.smin.v4i64(<4 x i64> undef, <4 x i64> undef)
; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I64 = call <8 x i64> @llvm.smin.v8i64(<8 x i64> undef, <8 x i64> undef)
; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call i32 @llvm.smin.i32(i32 undef, i32 undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call <4 x i32> @llvm.smin.v4i32(<4 x i32> undef, <4 x i32> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = call <8 x i32> @llvm.smin.v8i32(<8 x i32> undef, <8 x i32> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I32 = call <16 x i32> @llvm.smin.v16i32(<16 x i32> undef, <16 x i32> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <4 x i32> @llvm.smin.v4i32(<4 x i32> undef, <4 x i32> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = call <8 x i32> @llvm.smin.v8i32(<8 x i32> undef, <8 x i32> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = call <16 x i32> @llvm.smin.v16i32(<16 x i32> undef, <16 x i32> undef)
; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.smin.i16(i16 undef, i16 undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = call <8 x i16> @llvm.smin.v8i16(<8 x i16> undef, <8 x i16> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = call <16 x i16> @llvm.smin.v16i16(<16 x i16> undef, <16 x i16> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = call <32 x i16> @llvm.smin.v32i16(<32 x i16> undef, <32 x i16> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.smin.v8i16(<8 x i16> undef, <8 x i16> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = call <16 x i16> @llvm.smin.v16i16(<16 x i16> undef, <16 x i16> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = call <32 x i16> @llvm.smin.v32i16(<32 x i16> undef, <32 x i16> undef)
; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.smin.i8(i8 undef, i8 undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = call <16 x i8> @llvm.smin.v16i8(<16 x i8> undef, <16 x i8> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = call <32 x i8> @llvm.smin.v32i8(<32 x i8> undef, <32 x i8> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = call <64 x i8> @llvm.smin.v64i8(<64 x i8> undef, <64 x i8> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.smin.v16i8(<16 x i8> undef, <16 x i8> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = call <32 x i8> @llvm.smin.v32i8(<32 x i8> undef, <32 x i8> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = call <64 x i8> @llvm.smin.v64i8(<64 x i8> undef, <64 x i8> undef)
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX1-LABEL: 'smin'
; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4I64 = call <4 x i64> @llvm.smin.v4i64(<4 x i64> undef, <4 x i64> undef)
; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8I64 = call <8 x i64> @llvm.smin.v8i64(<8 x i64> undef, <8 x i64> undef)
; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call i32 @llvm.smin.i32(i32 undef, i32 undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call <4 x i32> @llvm.smin.v4i32(<4 x i32> undef, <4 x i32> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8I32 = call <8 x i32> @llvm.smin.v8i32(<8 x i32> undef, <8 x i32> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16I32 = call <16 x i32> @llvm.smin.v16i32(<16 x i32> undef, <16 x i32> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <4 x i32> @llvm.smin.v4i32(<4 x i32> undef, <4 x i32> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = call <8 x i32> @llvm.smin.v8i32(<8 x i32> undef, <8 x i32> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I32 = call <16 x i32> @llvm.smin.v16i32(<16 x i32> undef, <16 x i32> undef)
; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.smin.i16(i16 undef, i16 undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = call <8 x i16> @llvm.smin.v8i16(<8 x i16> undef, <8 x i16> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I16 = call <16 x i16> @llvm.smin.v16i16(<16 x i16> undef, <16 x i16> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32I16 = call <32 x i16> @llvm.smin.v32i16(<32 x i16> undef, <32 x i16> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.smin.v8i16(<8 x i16> undef, <8 x i16> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = call <16 x i16> @llvm.smin.v16i16(<16 x i16> undef, <16 x i16> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = call <32 x i16> @llvm.smin.v32i16(<32 x i16> undef, <32 x i16> undef)
; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.smin.i8(i8 undef, i8 undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = call <16 x i8> @llvm.smin.v16i8(<16 x i8> undef, <16 x i8> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32I8 = call <32 x i8> @llvm.smin.v32i8(<32 x i8> undef, <32 x i8> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V64I8 = call <64 x i8> @llvm.smin.v64i8(<64 x i8> undef, <64 x i8> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.smin.v16i8(<16 x i8> undef, <16 x i8> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = call <32 x i8> @llvm.smin.v32i8(<32 x i8> undef, <32 x i8> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = call <64 x i8> @llvm.smin.v64i8(<64 x i8> undef, <64 x i8> undef)
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX2-LABEL: 'smin'
; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I64 = call <4 x i64> @llvm.smin.v4i64(<4 x i64> undef, <4 x i64> undef)
; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I64 = call <8 x i64> @llvm.smin.v8i64(<8 x i64> undef, <8 x i64> undef)
; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call i32 @llvm.smin.i32(i32 undef, i32 undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call <4 x i32> @llvm.smin.v4i32(<4 x i32> undef, <4 x i32> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = call <8 x i32> @llvm.smin.v8i32(<8 x i32> undef, <8 x i32> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = call <16 x i32> @llvm.smin.v16i32(<16 x i32> undef, <16 x i32> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <4 x i32> @llvm.smin.v4i32(<4 x i32> undef, <4 x i32> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <8 x i32> @llvm.smin.v8i32(<8 x i32> undef, <8 x i32> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I32 = call <16 x i32> @llvm.smin.v16i32(<16 x i32> undef, <16 x i32> undef)
; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.smin.i16(i16 undef, i16 undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = call <8 x i16> @llvm.smin.v8i16(<8 x i16> undef, <8 x i16> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = call <16 x i16> @llvm.smin.v16i16(<16 x i16> undef, <16 x i16> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = call <32 x i16> @llvm.smin.v32i16(<32 x i16> undef, <32 x i16> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.smin.v8i16(<8 x i16> undef, <8 x i16> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.smin.v16i16(<16 x i16> undef, <16 x i16> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I16 = call <32 x i16> @llvm.smin.v32i16(<32 x i16> undef, <32 x i16> undef)
; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.smin.i8(i8 undef, i8 undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = call <16 x i8> @llvm.smin.v16i8(<16 x i8> undef, <16 x i8> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = call <32 x i8> @llvm.smin.v32i8(<32 x i8> undef, <32 x i8> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = call <64 x i8> @llvm.smin.v64i8(<64 x i8> undef, <64 x i8> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.smin.v16i8(<16 x i8> undef, <16 x i8> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = call <32 x i8> @llvm.smin.v32i8(<32 x i8> undef, <32 x i8> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64I8 = call <64 x i8> @llvm.smin.v64i8(<64 x i8> undef, <64 x i8> undef)
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512F-LABEL: 'smin'
; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = call i64 @llvm.smin.i64(i64 undef, i64 undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call <2 x i64> @llvm.smin.v2i64(<2 x i64> undef, <2 x i64> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I64 = call <4 x i64> @llvm.smin.v4i64(<4 x i64> undef, <4 x i64> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I64 = call <8 x i64> @llvm.smin.v8i64(<8 x i64> undef, <8 x i64> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.smin.v2i64(<2 x i64> undef, <2 x i64> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.smin.v4i64(<4 x i64> undef, <4 x i64> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.smin.v8i64(<8 x i64> undef, <8 x i64> undef)
; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call i32 @llvm.smin.i32(i32 undef, i32 undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call <4 x i32> @llvm.smin.v4i32(<4 x i32> undef, <4 x i32> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = call <8 x i32> @llvm.smin.v8i32(<8 x i32> undef, <8 x i32> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I32 = call <16 x i32> @llvm.smin.v16i32(<16 x i32> undef, <16 x i32> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <4 x i32> @llvm.smin.v4i32(<4 x i32> undef, <4 x i32> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <8 x i32> @llvm.smin.v8i32(<8 x i32> undef, <8 x i32> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = call <16 x i32> @llvm.smin.v16i32(<16 x i32> undef, <16 x i32> undef)
; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.smin.i16(i16 undef, i16 undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = call <8 x i16> @llvm.smin.v8i16(<8 x i16> undef, <8 x i16> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = call <16 x i16> @llvm.smin.v16i16(<16 x i16> undef, <16 x i16> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = call <32 x i16> @llvm.smin.v32i16(<32 x i16> undef, <32 x i16> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.smin.v8i16(<8 x i16> undef, <8 x i16> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.smin.v16i16(<16 x i16> undef, <16 x i16> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I16 = call <32 x i16> @llvm.smin.v32i16(<32 x i16> undef, <32 x i16> undef)
; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.smin.i8(i8 undef, i8 undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = call <16 x i8> @llvm.smin.v16i8(<16 x i8> undef, <16 x i8> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = call <32 x i8> @llvm.smin.v32i8(<32 x i8> undef, <32 x i8> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = call <64 x i8> @llvm.smin.v64i8(<64 x i8> undef, <64 x i8> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.smin.v16i8(<16 x i8> undef, <16 x i8> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = call <32 x i8> @llvm.smin.v32i8(<32 x i8> undef, <32 x i8> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64I8 = call <64 x i8> @llvm.smin.v64i8(<64 x i8> undef, <64 x i8> undef)
; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512DQ-LABEL: 'smin'
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = call i64 @llvm.smin.i64(i64 undef, i64 undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call <2 x i64> @llvm.smin.v2i64(<2 x i64> undef, <2 x i64> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I64 = call <4 x i64> @llvm.smin.v4i64(<4 x i64> undef, <4 x i64> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I64 = call <8 x i64> @llvm.smin.v8i64(<8 x i64> undef, <8 x i64> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.smin.v2i64(<2 x i64> undef, <2 x i64> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.smin.v4i64(<4 x i64> undef, <4 x i64> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.smin.v8i64(<8 x i64> undef, <8 x i64> undef)
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call i32 @llvm.smin.i32(i32 undef, i32 undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call <4 x i32> @llvm.smin.v4i32(<4 x i32> undef, <4 x i32> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = call <8 x i32> @llvm.smin.v8i32(<8 x i32> undef, <8 x i32> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I32 = call <16 x i32> @llvm.smin.v16i32(<16 x i32> undef, <16 x i32> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <4 x i32> @llvm.smin.v4i32(<4 x i32> undef, <4 x i32> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <8 x i32> @llvm.smin.v8i32(<8 x i32> undef, <8 x i32> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = call <16 x i32> @llvm.smin.v16i32(<16 x i32> undef, <16 x i32> undef)
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.smin.i16(i16 undef, i16 undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = call <8 x i16> @llvm.smin.v8i16(<8 x i16> undef, <8 x i16> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = call <16 x i16> @llvm.smin.v16i16(<16 x i16> undef, <16 x i16> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = call <32 x i16> @llvm.smin.v32i16(<32 x i16> undef, <32 x i16> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.smin.v8i16(<8 x i16> undef, <8 x i16> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.smin.v16i16(<16 x i16> undef, <16 x i16> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I16 = call <32 x i16> @llvm.smin.v32i16(<32 x i16> undef, <32 x i16> undef)
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.smin.i8(i8 undef, i8 undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = call <16 x i8> @llvm.smin.v16i8(<16 x i8> undef, <16 x i8> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = call <32 x i8> @llvm.smin.v32i8(<32 x i8> undef, <32 x i8> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = call <64 x i8> @llvm.smin.v64i8(<64 x i8> undef, <64 x i8> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.smin.v16i8(<16 x i8> undef, <16 x i8> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = call <32 x i8> @llvm.smin.v32i8(<32 x i8> undef, <32 x i8> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64I8 = call <64 x i8> @llvm.smin.v64i8(<64 x i8> undef, <64 x i8> undef)
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512BW-LABEL: 'smin'
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = call i64 @llvm.smin.i64(i64 undef, i64 undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call <2 x i64> @llvm.smin.v2i64(<2 x i64> undef, <2 x i64> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I64 = call <4 x i64> @llvm.smin.v4i64(<4 x i64> undef, <4 x i64> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I64 = call <8 x i64> @llvm.smin.v8i64(<8 x i64> undef, <8 x i64> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.smin.v2i64(<2 x i64> undef, <2 x i64> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.smin.v4i64(<4 x i64> undef, <4 x i64> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.smin.v8i64(<8 x i64> undef, <8 x i64> undef)
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call i32 @llvm.smin.i32(i32 undef, i32 undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call <4 x i32> @llvm.smin.v4i32(<4 x i32> undef, <4 x i32> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = call <8 x i32> @llvm.smin.v8i32(<8 x i32> undef, <8 x i32> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I32 = call <16 x i32> @llvm.smin.v16i32(<16 x i32> undef, <16 x i32> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <4 x i32> @llvm.smin.v4i32(<4 x i32> undef, <4 x i32> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <8 x i32> @llvm.smin.v8i32(<8 x i32> undef, <8 x i32> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = call <16 x i32> @llvm.smin.v16i32(<16 x i32> undef, <16 x i32> undef)
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.smin.i16(i16 undef, i16 undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = call <8 x i16> @llvm.smin.v8i16(<8 x i16> undef, <8 x i16> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = call <16 x i16> @llvm.smin.v16i16(<16 x i16> undef, <16 x i16> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I16 = call <32 x i16> @llvm.smin.v32i16(<32 x i16> undef, <32 x i16> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.smin.v8i16(<8 x i16> undef, <8 x i16> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.smin.v16i16(<16 x i16> undef, <16 x i16> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.smin.v32i16(<32 x i16> undef, <32 x i16> undef)
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.smin.i8(i8 undef, i8 undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = call <16 x i8> @llvm.smin.v16i8(<16 x i8> undef, <16 x i8> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = call <32 x i8> @llvm.smin.v32i8(<32 x i8> undef, <32 x i8> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64I8 = call <64 x i8> @llvm.smin.v64i8(<64 x i8> undef, <64 x i8> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.smin.v16i8(<16 x i8> undef, <16 x i8> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = call <32 x i8> @llvm.smin.v32i8(<32 x i8> undef, <32 x i8> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64I8 = call <64 x i8> @llvm.smin.v64i8(<64 x i8> undef, <64 x i8> undef)
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
%I64 = call i64 @llvm.smin.i64(i64 undef, i64 undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I16 = call <16 x i16> @llvm.umax.v16i16(<16 x i16> undef, <16 x i16> undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32I16 = call <32 x i16> @llvm.umax.v32i16(<32 x i16> undef, <32 x i16> undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.umax.i8(i8 undef, i8 undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I8 = call <16 x i8> @llvm.umax.v16i8(<16 x i8> undef, <16 x i8> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I8 = call <32 x i8> @llvm.umax.v32i8(<32 x i8> undef, <32 x i8> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64I8 = call <64 x i8> @llvm.umax.v64i8(<64 x i8> undef, <64 x i8> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.umax.v16i8(<16 x i8> undef, <16 x i8> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = call <32 x i8> @llvm.umax.v32i8(<32 x i8> undef, <32 x i8> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = call <64 x i8> @llvm.umax.v64i8(<64 x i8> undef, <64 x i8> undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SSSE3-LABEL: 'umax'
; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I16 = call <16 x i16> @llvm.umax.v16i16(<16 x i16> undef, <16 x i16> undef)
; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32I16 = call <32 x i16> @llvm.umax.v32i16(<32 x i16> undef, <32 x i16> undef)
; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.umax.i8(i8 undef, i8 undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I8 = call <16 x i8> @llvm.umax.v16i8(<16 x i8> undef, <16 x i8> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I8 = call <32 x i8> @llvm.umax.v32i8(<32 x i8> undef, <32 x i8> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64I8 = call <64 x i8> @llvm.umax.v64i8(<64 x i8> undef, <64 x i8> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.umax.v16i8(<16 x i8> undef, <16 x i8> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = call <32 x i8> @llvm.umax.v32i8(<32 x i8> undef, <32 x i8> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = call <64 x i8> @llvm.umax.v64i8(<64 x i8> undef, <64 x i8> undef)
; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SSE42-LABEL: 'umax'
; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I64 = call <4 x i64> @llvm.umax.v4i64(<4 x i64> undef, <4 x i64> undef)
; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I64 = call <8 x i64> @llvm.umax.v8i64(<8 x i64> undef, <8 x i64> undef)
; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call i32 @llvm.umax.i32(i32 undef, i32 undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call <4 x i32> @llvm.umax.v4i32(<4 x i32> undef, <4 x i32> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = call <8 x i32> @llvm.umax.v8i32(<8 x i32> undef, <8 x i32> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I32 = call <16 x i32> @llvm.umax.v16i32(<16 x i32> undef, <16 x i32> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <4 x i32> @llvm.umax.v4i32(<4 x i32> undef, <4 x i32> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = call <8 x i32> @llvm.umax.v8i32(<8 x i32> undef, <8 x i32> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = call <16 x i32> @llvm.umax.v16i32(<16 x i32> undef, <16 x i32> undef)
; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.umax.i16(i16 undef, i16 undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = call <8 x i16> @llvm.umax.v8i16(<8 x i16> undef, <8 x i16> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = call <16 x i16> @llvm.umax.v16i16(<16 x i16> undef, <16 x i16> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = call <32 x i16> @llvm.umax.v32i16(<32 x i16> undef, <32 x i16> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.umax.v8i16(<8 x i16> undef, <8 x i16> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = call <16 x i16> @llvm.umax.v16i16(<16 x i16> undef, <16 x i16> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = call <32 x i16> @llvm.umax.v32i16(<32 x i16> undef, <32 x i16> undef)
; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.umax.i8(i8 undef, i8 undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = call <16 x i8> @llvm.umax.v16i8(<16 x i8> undef, <16 x i8> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = call <32 x i8> @llvm.umax.v32i8(<32 x i8> undef, <32 x i8> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = call <64 x i8> @llvm.umax.v64i8(<64 x i8> undef, <64 x i8> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.umax.v16i8(<16 x i8> undef, <16 x i8> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = call <32 x i8> @llvm.umax.v32i8(<32 x i8> undef, <32 x i8> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = call <64 x i8> @llvm.umax.v64i8(<64 x i8> undef, <64 x i8> undef)
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX1-LABEL: 'umax'
; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4I64 = call <4 x i64> @llvm.umax.v4i64(<4 x i64> undef, <4 x i64> undef)
; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8I64 = call <8 x i64> @llvm.umax.v8i64(<8 x i64> undef, <8 x i64> undef)
; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call i32 @llvm.umax.i32(i32 undef, i32 undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call <4 x i32> @llvm.umax.v4i32(<4 x i32> undef, <4 x i32> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8I32 = call <8 x i32> @llvm.umax.v8i32(<8 x i32> undef, <8 x i32> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16I32 = call <16 x i32> @llvm.umax.v16i32(<16 x i32> undef, <16 x i32> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <4 x i32> @llvm.umax.v4i32(<4 x i32> undef, <4 x i32> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = call <8 x i32> @llvm.umax.v8i32(<8 x i32> undef, <8 x i32> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I32 = call <16 x i32> @llvm.umax.v16i32(<16 x i32> undef, <16 x i32> undef)
; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.umax.i16(i16 undef, i16 undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = call <8 x i16> @llvm.umax.v8i16(<8 x i16> undef, <8 x i16> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I16 = call <16 x i16> @llvm.umax.v16i16(<16 x i16> undef, <16 x i16> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32I16 = call <32 x i16> @llvm.umax.v32i16(<32 x i16> undef, <32 x i16> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.umax.v8i16(<8 x i16> undef, <8 x i16> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = call <16 x i16> @llvm.umax.v16i16(<16 x i16> undef, <16 x i16> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = call <32 x i16> @llvm.umax.v32i16(<32 x i16> undef, <32 x i16> undef)
; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.umax.i8(i8 undef, i8 undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = call <16 x i8> @llvm.umax.v16i8(<16 x i8> undef, <16 x i8> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32I8 = call <32 x i8> @llvm.umax.v32i8(<32 x i8> undef, <32 x i8> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V64I8 = call <64 x i8> @llvm.umax.v64i8(<64 x i8> undef, <64 x i8> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.umax.v16i8(<16 x i8> undef, <16 x i8> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = call <32 x i8> @llvm.umax.v32i8(<32 x i8> undef, <32 x i8> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = call <64 x i8> @llvm.umax.v64i8(<64 x i8> undef, <64 x i8> undef)
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX2-LABEL: 'umax'
; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I64 = call <4 x i64> @llvm.umax.v4i64(<4 x i64> undef, <4 x i64> undef)
; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I64 = call <8 x i64> @llvm.umax.v8i64(<8 x i64> undef, <8 x i64> undef)
; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call i32 @llvm.umax.i32(i32 undef, i32 undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call <4 x i32> @llvm.umax.v4i32(<4 x i32> undef, <4 x i32> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = call <8 x i32> @llvm.umax.v8i32(<8 x i32> undef, <8 x i32> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = call <16 x i32> @llvm.umax.v16i32(<16 x i32> undef, <16 x i32> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <4 x i32> @llvm.umax.v4i32(<4 x i32> undef, <4 x i32> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <8 x i32> @llvm.umax.v8i32(<8 x i32> undef, <8 x i32> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I32 = call <16 x i32> @llvm.umax.v16i32(<16 x i32> undef, <16 x i32> undef)
; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.umax.i16(i16 undef, i16 undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = call <8 x i16> @llvm.umax.v8i16(<8 x i16> undef, <8 x i16> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = call <16 x i16> @llvm.umax.v16i16(<16 x i16> undef, <16 x i16> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = call <32 x i16> @llvm.umax.v32i16(<32 x i16> undef, <32 x i16> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.umax.v8i16(<8 x i16> undef, <8 x i16> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.umax.v16i16(<16 x i16> undef, <16 x i16> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I16 = call <32 x i16> @llvm.umax.v32i16(<32 x i16> undef, <32 x i16> undef)
; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.umax.i8(i8 undef, i8 undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = call <16 x i8> @llvm.umax.v16i8(<16 x i8> undef, <16 x i8> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = call <32 x i8> @llvm.umax.v32i8(<32 x i8> undef, <32 x i8> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = call <64 x i8> @llvm.umax.v64i8(<64 x i8> undef, <64 x i8> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.umax.v16i8(<16 x i8> undef, <16 x i8> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = call <32 x i8> @llvm.umax.v32i8(<32 x i8> undef, <32 x i8> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64I8 = call <64 x i8> @llvm.umax.v64i8(<64 x i8> undef, <64 x i8> undef)
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512F-LABEL: 'umax'
; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = call i64 @llvm.umax.i64(i64 undef, i64 undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call <2 x i64> @llvm.umax.v2i64(<2 x i64> undef, <2 x i64> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I64 = call <4 x i64> @llvm.umax.v4i64(<4 x i64> undef, <4 x i64> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I64 = call <8 x i64> @llvm.umax.v8i64(<8 x i64> undef, <8 x i64> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.umax.v2i64(<2 x i64> undef, <2 x i64> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.umax.v4i64(<4 x i64> undef, <4 x i64> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.umax.v8i64(<8 x i64> undef, <8 x i64> undef)
; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call i32 @llvm.umax.i32(i32 undef, i32 undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call <4 x i32> @llvm.umax.v4i32(<4 x i32> undef, <4 x i32> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = call <8 x i32> @llvm.umax.v8i32(<8 x i32> undef, <8 x i32> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I32 = call <16 x i32> @llvm.umax.v16i32(<16 x i32> undef, <16 x i32> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <4 x i32> @llvm.umax.v4i32(<4 x i32> undef, <4 x i32> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <8 x i32> @llvm.umax.v8i32(<8 x i32> undef, <8 x i32> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = call <16 x i32> @llvm.umax.v16i32(<16 x i32> undef, <16 x i32> undef)
; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.umax.i16(i16 undef, i16 undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = call <8 x i16> @llvm.umax.v8i16(<8 x i16> undef, <8 x i16> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = call <16 x i16> @llvm.umax.v16i16(<16 x i16> undef, <16 x i16> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = call <32 x i16> @llvm.umax.v32i16(<32 x i16> undef, <32 x i16> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.umax.v8i16(<8 x i16> undef, <8 x i16> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.umax.v16i16(<16 x i16> undef, <16 x i16> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I16 = call <32 x i16> @llvm.umax.v32i16(<32 x i16> undef, <32 x i16> undef)
; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.umax.i8(i8 undef, i8 undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = call <16 x i8> @llvm.umax.v16i8(<16 x i8> undef, <16 x i8> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = call <32 x i8> @llvm.umax.v32i8(<32 x i8> undef, <32 x i8> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = call <64 x i8> @llvm.umax.v64i8(<64 x i8> undef, <64 x i8> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.umax.v16i8(<16 x i8> undef, <16 x i8> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = call <32 x i8> @llvm.umax.v32i8(<32 x i8> undef, <32 x i8> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64I8 = call <64 x i8> @llvm.umax.v64i8(<64 x i8> undef, <64 x i8> undef)
; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512DQ-LABEL: 'umax'
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = call i64 @llvm.umax.i64(i64 undef, i64 undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call <2 x i64> @llvm.umax.v2i64(<2 x i64> undef, <2 x i64> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I64 = call <4 x i64> @llvm.umax.v4i64(<4 x i64> undef, <4 x i64> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I64 = call <8 x i64> @llvm.umax.v8i64(<8 x i64> undef, <8 x i64> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.umax.v2i64(<2 x i64> undef, <2 x i64> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.umax.v4i64(<4 x i64> undef, <4 x i64> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.umax.v8i64(<8 x i64> undef, <8 x i64> undef)
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call i32 @llvm.umax.i32(i32 undef, i32 undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call <4 x i32> @llvm.umax.v4i32(<4 x i32> undef, <4 x i32> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = call <8 x i32> @llvm.umax.v8i32(<8 x i32> undef, <8 x i32> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I32 = call <16 x i32> @llvm.umax.v16i32(<16 x i32> undef, <16 x i32> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <4 x i32> @llvm.umax.v4i32(<4 x i32> undef, <4 x i32> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <8 x i32> @llvm.umax.v8i32(<8 x i32> undef, <8 x i32> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = call <16 x i32> @llvm.umax.v16i32(<16 x i32> undef, <16 x i32> undef)
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.umax.i16(i16 undef, i16 undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = call <8 x i16> @llvm.umax.v8i16(<8 x i16> undef, <8 x i16> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = call <16 x i16> @llvm.umax.v16i16(<16 x i16> undef, <16 x i16> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = call <32 x i16> @llvm.umax.v32i16(<32 x i16> undef, <32 x i16> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.umax.v8i16(<8 x i16> undef, <8 x i16> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.umax.v16i16(<16 x i16> undef, <16 x i16> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I16 = call <32 x i16> @llvm.umax.v32i16(<32 x i16> undef, <32 x i16> undef)
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.umax.i8(i8 undef, i8 undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = call <16 x i8> @llvm.umax.v16i8(<16 x i8> undef, <16 x i8> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = call <32 x i8> @llvm.umax.v32i8(<32 x i8> undef, <32 x i8> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = call <64 x i8> @llvm.umax.v64i8(<64 x i8> undef, <64 x i8> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.umax.v16i8(<16 x i8> undef, <16 x i8> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = call <32 x i8> @llvm.umax.v32i8(<32 x i8> undef, <32 x i8> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64I8 = call <64 x i8> @llvm.umax.v64i8(<64 x i8> undef, <64 x i8> undef)
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512BW-LABEL: 'umax'
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = call i64 @llvm.umax.i64(i64 undef, i64 undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call <2 x i64> @llvm.umax.v2i64(<2 x i64> undef, <2 x i64> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I64 = call <4 x i64> @llvm.umax.v4i64(<4 x i64> undef, <4 x i64> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I64 = call <8 x i64> @llvm.umax.v8i64(<8 x i64> undef, <8 x i64> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.umax.v2i64(<2 x i64> undef, <2 x i64> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.umax.v4i64(<4 x i64> undef, <4 x i64> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.umax.v8i64(<8 x i64> undef, <8 x i64> undef)
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call i32 @llvm.umax.i32(i32 undef, i32 undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call <4 x i32> @llvm.umax.v4i32(<4 x i32> undef, <4 x i32> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = call <8 x i32> @llvm.umax.v8i32(<8 x i32> undef, <8 x i32> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I32 = call <16 x i32> @llvm.umax.v16i32(<16 x i32> undef, <16 x i32> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <4 x i32> @llvm.umax.v4i32(<4 x i32> undef, <4 x i32> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <8 x i32> @llvm.umax.v8i32(<8 x i32> undef, <8 x i32> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = call <16 x i32> @llvm.umax.v16i32(<16 x i32> undef, <16 x i32> undef)
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.umax.i16(i16 undef, i16 undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = call <8 x i16> @llvm.umax.v8i16(<8 x i16> undef, <8 x i16> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = call <16 x i16> @llvm.umax.v16i16(<16 x i16> undef, <16 x i16> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I16 = call <32 x i16> @llvm.umax.v32i16(<32 x i16> undef, <32 x i16> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.umax.v8i16(<8 x i16> undef, <8 x i16> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.umax.v16i16(<16 x i16> undef, <16 x i16> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.umax.v32i16(<32 x i16> undef, <32 x i16> undef)
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.umax.i8(i8 undef, i8 undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = call <16 x i8> @llvm.umax.v16i8(<16 x i8> undef, <16 x i8> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = call <32 x i8> @llvm.umax.v32i8(<32 x i8> undef, <32 x i8> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64I8 = call <64 x i8> @llvm.umax.v64i8(<64 x i8> undef, <64 x i8> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.umax.v16i8(<16 x i8> undef, <16 x i8> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = call <32 x i8> @llvm.umax.v32i8(<32 x i8> undef, <32 x i8> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64I8 = call <64 x i8> @llvm.umax.v64i8(<64 x i8> undef, <64 x i8> undef)
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
%I64 = call i64 @llvm.umax.i64(i64 undef, i64 undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I16 = call <16 x i16> @llvm.umin.v16i16(<16 x i16> undef, <16 x i16> undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32I16 = call <32 x i16> @llvm.umin.v32i16(<32 x i16> undef, <32 x i16> undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.umin.i8(i8 undef, i8 undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I8 = call <16 x i8> @llvm.umin.v16i8(<16 x i8> undef, <16 x i8> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I8 = call <32 x i8> @llvm.umin.v32i8(<32 x i8> undef, <32 x i8> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64I8 = call <64 x i8> @llvm.umin.v64i8(<64 x i8> undef, <64 x i8> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.umin.v16i8(<16 x i8> undef, <16 x i8> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = call <32 x i8> @llvm.umin.v32i8(<32 x i8> undef, <32 x i8> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = call <64 x i8> @llvm.umin.v64i8(<64 x i8> undef, <64 x i8> undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SSSE3-LABEL: 'umin'
; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I16 = call <16 x i16> @llvm.umin.v16i16(<16 x i16> undef, <16 x i16> undef)
; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32I16 = call <32 x i16> @llvm.umin.v32i16(<32 x i16> undef, <32 x i16> undef)
; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.umin.i8(i8 undef, i8 undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I8 = call <16 x i8> @llvm.umin.v16i8(<16 x i8> undef, <16 x i8> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I8 = call <32 x i8> @llvm.umin.v32i8(<32 x i8> undef, <32 x i8> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64I8 = call <64 x i8> @llvm.umin.v64i8(<64 x i8> undef, <64 x i8> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.umin.v16i8(<16 x i8> undef, <16 x i8> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = call <32 x i8> @llvm.umin.v32i8(<32 x i8> undef, <32 x i8> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = call <64 x i8> @llvm.umin.v64i8(<64 x i8> undef, <64 x i8> undef)
; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SSE42-LABEL: 'umin'
; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I64 = call <4 x i64> @llvm.umin.v4i64(<4 x i64> undef, <4 x i64> undef)
; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I64 = call <8 x i64> @llvm.umin.v8i64(<8 x i64> undef, <8 x i64> undef)
; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call i32 @llvm.umin.i32(i32 undef, i32 undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call <4 x i32> @llvm.umin.v4i32(<4 x i32> undef, <4 x i32> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = call <8 x i32> @llvm.umin.v8i32(<8 x i32> undef, <8 x i32> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I32 = call <16 x i32> @llvm.umin.v16i32(<16 x i32> undef, <16 x i32> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <4 x i32> @llvm.umin.v4i32(<4 x i32> undef, <4 x i32> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = call <8 x i32> @llvm.umin.v8i32(<8 x i32> undef, <8 x i32> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = call <16 x i32> @llvm.umin.v16i32(<16 x i32> undef, <16 x i32> undef)
; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.umin.i16(i16 undef, i16 undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = call <8 x i16> @llvm.umin.v8i16(<8 x i16> undef, <8 x i16> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = call <16 x i16> @llvm.umin.v16i16(<16 x i16> undef, <16 x i16> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = call <32 x i16> @llvm.umin.v32i16(<32 x i16> undef, <32 x i16> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.umin.v8i16(<8 x i16> undef, <8 x i16> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = call <16 x i16> @llvm.umin.v16i16(<16 x i16> undef, <16 x i16> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = call <32 x i16> @llvm.umin.v32i16(<32 x i16> undef, <32 x i16> undef)
; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.umin.i8(i8 undef, i8 undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = call <16 x i8> @llvm.umin.v16i8(<16 x i8> undef, <16 x i8> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = call <32 x i8> @llvm.umin.v32i8(<32 x i8> undef, <32 x i8> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = call <64 x i8> @llvm.umin.v64i8(<64 x i8> undef, <64 x i8> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.umin.v16i8(<16 x i8> undef, <16 x i8> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = call <32 x i8> @llvm.umin.v32i8(<32 x i8> undef, <32 x i8> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = call <64 x i8> @llvm.umin.v64i8(<64 x i8> undef, <64 x i8> undef)
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX1-LABEL: 'umin'
; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4I64 = call <4 x i64> @llvm.umin.v4i64(<4 x i64> undef, <4 x i64> undef)
; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8I64 = call <8 x i64> @llvm.umin.v8i64(<8 x i64> undef, <8 x i64> undef)
; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call i32 @llvm.umin.i32(i32 undef, i32 undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call <4 x i32> @llvm.umin.v4i32(<4 x i32> undef, <4 x i32> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8I32 = call <8 x i32> @llvm.umin.v8i32(<8 x i32> undef, <8 x i32> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16I32 = call <16 x i32> @llvm.umin.v16i32(<16 x i32> undef, <16 x i32> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <4 x i32> @llvm.umin.v4i32(<4 x i32> undef, <4 x i32> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = call <8 x i32> @llvm.umin.v8i32(<8 x i32> undef, <8 x i32> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I32 = call <16 x i32> @llvm.umin.v16i32(<16 x i32> undef, <16 x i32> undef)
; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.umin.i16(i16 undef, i16 undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = call <8 x i16> @llvm.umin.v8i16(<8 x i16> undef, <8 x i16> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I16 = call <16 x i16> @llvm.umin.v16i16(<16 x i16> undef, <16 x i16> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32I16 = call <32 x i16> @llvm.umin.v32i16(<32 x i16> undef, <32 x i16> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.umin.v8i16(<8 x i16> undef, <8 x i16> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = call <16 x i16> @llvm.umin.v16i16(<16 x i16> undef, <16 x i16> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = call <32 x i16> @llvm.umin.v32i16(<32 x i16> undef, <32 x i16> undef)
; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.umin.i8(i8 undef, i8 undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = call <16 x i8> @llvm.umin.v16i8(<16 x i8> undef, <16 x i8> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32I8 = call <32 x i8> @llvm.umin.v32i8(<32 x i8> undef, <32 x i8> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V64I8 = call <64 x i8> @llvm.umin.v64i8(<64 x i8> undef, <64 x i8> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.umin.v16i8(<16 x i8> undef, <16 x i8> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = call <32 x i8> @llvm.umin.v32i8(<32 x i8> undef, <32 x i8> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = call <64 x i8> @llvm.umin.v64i8(<64 x i8> undef, <64 x i8> undef)
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX2-LABEL: 'umin'
; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I64 = call <4 x i64> @llvm.umin.v4i64(<4 x i64> undef, <4 x i64> undef)
; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I64 = call <8 x i64> @llvm.umin.v8i64(<8 x i64> undef, <8 x i64> undef)
; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call i32 @llvm.umin.i32(i32 undef, i32 undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call <4 x i32> @llvm.umin.v4i32(<4 x i32> undef, <4 x i32> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = call <8 x i32> @llvm.umin.v8i32(<8 x i32> undef, <8 x i32> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = call <16 x i32> @llvm.umin.v16i32(<16 x i32> undef, <16 x i32> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <4 x i32> @llvm.umin.v4i32(<4 x i32> undef, <4 x i32> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <8 x i32> @llvm.umin.v8i32(<8 x i32> undef, <8 x i32> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I32 = call <16 x i32> @llvm.umin.v16i32(<16 x i32> undef, <16 x i32> undef)
; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.umin.i16(i16 undef, i16 undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = call <8 x i16> @llvm.umin.v8i16(<8 x i16> undef, <8 x i16> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = call <16 x i16> @llvm.umin.v16i16(<16 x i16> undef, <16 x i16> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = call <32 x i16> @llvm.umin.v32i16(<32 x i16> undef, <32 x i16> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.umin.v8i16(<8 x i16> undef, <8 x i16> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.umin.v16i16(<16 x i16> undef, <16 x i16> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I16 = call <32 x i16> @llvm.umin.v32i16(<32 x i16> undef, <32 x i16> undef)
; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.umin.i8(i8 undef, i8 undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = call <16 x i8> @llvm.umin.v16i8(<16 x i8> undef, <16 x i8> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = call <32 x i8> @llvm.umin.v32i8(<32 x i8> undef, <32 x i8> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = call <64 x i8> @llvm.umin.v64i8(<64 x i8> undef, <64 x i8> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.umin.v16i8(<16 x i8> undef, <16 x i8> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = call <32 x i8> @llvm.umin.v32i8(<32 x i8> undef, <32 x i8> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64I8 = call <64 x i8> @llvm.umin.v64i8(<64 x i8> undef, <64 x i8> undef)
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512F-LABEL: 'umin'
; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = call i64 @llvm.umin.i64(i64 undef, i64 undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call <2 x i64> @llvm.umin.v2i64(<2 x i64> undef, <2 x i64> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I64 = call <4 x i64> @llvm.umin.v4i64(<4 x i64> undef, <4 x i64> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I64 = call <8 x i64> @llvm.umin.v8i64(<8 x i64> undef, <8 x i64> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.umin.v2i64(<2 x i64> undef, <2 x i64> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.umin.v4i64(<4 x i64> undef, <4 x i64> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.umin.v8i64(<8 x i64> undef, <8 x i64> undef)
; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call i32 @llvm.umin.i32(i32 undef, i32 undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call <4 x i32> @llvm.umin.v4i32(<4 x i32> undef, <4 x i32> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = call <8 x i32> @llvm.umin.v8i32(<8 x i32> undef, <8 x i32> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I32 = call <16 x i32> @llvm.umin.v16i32(<16 x i32> undef, <16 x i32> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <4 x i32> @llvm.umin.v4i32(<4 x i32> undef, <4 x i32> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <8 x i32> @llvm.umin.v8i32(<8 x i32> undef, <8 x i32> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = call <16 x i32> @llvm.umin.v16i32(<16 x i32> undef, <16 x i32> undef)
; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.umin.i16(i16 undef, i16 undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = call <8 x i16> @llvm.umin.v8i16(<8 x i16> undef, <8 x i16> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = call <16 x i16> @llvm.umin.v16i16(<16 x i16> undef, <16 x i16> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = call <32 x i16> @llvm.umin.v32i16(<32 x i16> undef, <32 x i16> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.umin.v8i16(<8 x i16> undef, <8 x i16> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.umin.v16i16(<16 x i16> undef, <16 x i16> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I16 = call <32 x i16> @llvm.umin.v32i16(<32 x i16> undef, <32 x i16> undef)
; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.umin.i8(i8 undef, i8 undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = call <16 x i8> @llvm.umin.v16i8(<16 x i8> undef, <16 x i8> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = call <32 x i8> @llvm.umin.v32i8(<32 x i8> undef, <32 x i8> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = call <64 x i8> @llvm.umin.v64i8(<64 x i8> undef, <64 x i8> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.umin.v16i8(<16 x i8> undef, <16 x i8> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = call <32 x i8> @llvm.umin.v32i8(<32 x i8> undef, <32 x i8> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64I8 = call <64 x i8> @llvm.umin.v64i8(<64 x i8> undef, <64 x i8> undef)
; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512DQ-LABEL: 'umin'
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = call i64 @llvm.umin.i64(i64 undef, i64 undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call <2 x i64> @llvm.umin.v2i64(<2 x i64> undef, <2 x i64> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I64 = call <4 x i64> @llvm.umin.v4i64(<4 x i64> undef, <4 x i64> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I64 = call <8 x i64> @llvm.umin.v8i64(<8 x i64> undef, <8 x i64> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.umin.v2i64(<2 x i64> undef, <2 x i64> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.umin.v4i64(<4 x i64> undef, <4 x i64> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.umin.v8i64(<8 x i64> undef, <8 x i64> undef)
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call i32 @llvm.umin.i32(i32 undef, i32 undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call <4 x i32> @llvm.umin.v4i32(<4 x i32> undef, <4 x i32> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = call <8 x i32> @llvm.umin.v8i32(<8 x i32> undef, <8 x i32> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I32 = call <16 x i32> @llvm.umin.v16i32(<16 x i32> undef, <16 x i32> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <4 x i32> @llvm.umin.v4i32(<4 x i32> undef, <4 x i32> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <8 x i32> @llvm.umin.v8i32(<8 x i32> undef, <8 x i32> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = call <16 x i32> @llvm.umin.v16i32(<16 x i32> undef, <16 x i32> undef)
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.umin.i16(i16 undef, i16 undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = call <8 x i16> @llvm.umin.v8i16(<8 x i16> undef, <8 x i16> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = call <16 x i16> @llvm.umin.v16i16(<16 x i16> undef, <16 x i16> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = call <32 x i16> @llvm.umin.v32i16(<32 x i16> undef, <32 x i16> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.umin.v8i16(<8 x i16> undef, <8 x i16> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.umin.v16i16(<16 x i16> undef, <16 x i16> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I16 = call <32 x i16> @llvm.umin.v32i16(<32 x i16> undef, <32 x i16> undef)
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.umin.i8(i8 undef, i8 undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = call <16 x i8> @llvm.umin.v16i8(<16 x i8> undef, <16 x i8> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = call <32 x i8> @llvm.umin.v32i8(<32 x i8> undef, <32 x i8> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = call <64 x i8> @llvm.umin.v64i8(<64 x i8> undef, <64 x i8> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.umin.v16i8(<16 x i8> undef, <16 x i8> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = call <32 x i8> @llvm.umin.v32i8(<32 x i8> undef, <32 x i8> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64I8 = call <64 x i8> @llvm.umin.v64i8(<64 x i8> undef, <64 x i8> undef)
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512BW-LABEL: 'umin'
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = call i64 @llvm.umin.i64(i64 undef, i64 undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call <2 x i64> @llvm.umin.v2i64(<2 x i64> undef, <2 x i64> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I64 = call <4 x i64> @llvm.umin.v4i64(<4 x i64> undef, <4 x i64> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I64 = call <8 x i64> @llvm.umin.v8i64(<8 x i64> undef, <8 x i64> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.umin.v2i64(<2 x i64> undef, <2 x i64> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.umin.v4i64(<4 x i64> undef, <4 x i64> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.umin.v8i64(<8 x i64> undef, <8 x i64> undef)
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call i32 @llvm.umin.i32(i32 undef, i32 undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call <4 x i32> @llvm.umin.v4i32(<4 x i32> undef, <4 x i32> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = call <8 x i32> @llvm.umin.v8i32(<8 x i32> undef, <8 x i32> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I32 = call <16 x i32> @llvm.umin.v16i32(<16 x i32> undef, <16 x i32> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <4 x i32> @llvm.umin.v4i32(<4 x i32> undef, <4 x i32> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <8 x i32> @llvm.umin.v8i32(<8 x i32> undef, <8 x i32> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = call <16 x i32> @llvm.umin.v16i32(<16 x i32> undef, <16 x i32> undef)
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.umin.i16(i16 undef, i16 undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = call <8 x i16> @llvm.umin.v8i16(<8 x i16> undef, <8 x i16> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = call <16 x i16> @llvm.umin.v16i16(<16 x i16> undef, <16 x i16> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I16 = call <32 x i16> @llvm.umin.v32i16(<32 x i16> undef, <32 x i16> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.umin.v8i16(<8 x i16> undef, <8 x i16> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.umin.v16i16(<16 x i16> undef, <16 x i16> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.umin.v32i16(<32 x i16> undef, <32 x i16> undef)
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.umin.i8(i8 undef, i8 undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = call <16 x i8> @llvm.umin.v16i8(<16 x i8> undef, <16 x i8> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = call <32 x i8> @llvm.umin.v32i8(<32 x i8> undef, <32 x i8> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64I8 = call <64 x i8> @llvm.umin.v64i8(<64 x i8> undef, <64 x i8> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.umin.v16i8(<16 x i8> undef, <16 x i8> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = call <32 x i8> @llvm.umin.v32i8(<32 x i8> undef, <32 x i8> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64I8 = call <64 x i8> @llvm.umin.v64i8(<64 x i8> undef, <64 x i8> undef)
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
%I64 = call i64 @llvm.umin.i64(i64 undef, i64 undef)