Tweak cost model to match what lowering actually does.
llvm-svn: 303013
if (ISD == ISD::SDIV && LT.second == MVT::v4i32 && ST->hasSSE41())
return LT.first * 15;
- if (const auto *Entry = CostTableLookup(SSE2UniformConstCostTable, ISD,
- LT.second))
- return LT.first * Entry->Cost;
+ // XOP has faster vXi8 shifts.
+ if ((ISD != ISD::SHL && ISD != ISD::SRL && ISD != ISD::SRA) ||
+ !ST->hasXOP())
+ if (const auto *Entry =
+ CostTableLookup(SSE2UniformConstCostTable, ISD, LT.second))
+ return LT.first * Entry->Cost;
}
static const CostTblEntry AVX2UniformCostTable[] = {
; AVX: Found an estimated cost of 4 for instruction: %shift
; AVX2: Found an estimated cost of 4 for instruction: %shift
; AVX512: Found an estimated cost of 4 for instruction: %shift
-; XOP: Found an estimated cost of 4 for instruction: %shift
+; XOP: Found an estimated cost of 2 for instruction: %shift
%shift = ashr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
ret <16 x i8> %shift
}
; AVX: Found an estimated cost of 10 for instruction: %shift
; AVX2: Found an estimated cost of 4 for instruction: %shift
; AVX512: Found an estimated cost of 4 for instruction: %shift
-; XOPAVX: Found an estimated cost of 10 for instruction: %shift
+; XOPAVX: Found an estimated cost of 6 for instruction: %shift
; XOPAVX2: Found an estimated cost of 4 for instruction: %shift
%shift = ashr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
ret <32 x i8> %shift
; AVX2: Found an estimated cost of 8 for instruction: %shift
; AVX512F: Found an estimated cost of 8 for instruction: %shift
; AVX512BW: Found an estimated cost of 4 for instruction: %shift
-; XOPAVX: Found an estimated cost of 20 for instruction: %shift
+; XOPAVX: Found an estimated cost of 12 for instruction: %shift
; XOPAVX2: Found an estimated cost of 8 for instruction: %shift
%shift = ashr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
ret <64 x i8> %shift
; AVX: Found an estimated cost of 2 for instruction: %shift
; AVX2: Found an estimated cost of 2 for instruction: %shift
; AVX512: Found an estimated cost of 2 for instruction: %shift
-; XOP: Found an estimated cost of 2 for instruction: %shift
+; XOP: Found an estimated cost of 1 for instruction: %shift
%shift = shl <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
ret <16 x i8> %shift
}
; AVX: Found an estimated cost of 6 for instruction: %shift
; AVX2: Found an estimated cost of 2 for instruction: %shift
; AVX512: Found an estimated cost of 2 for instruction: %shift
-; XOPAVX: Found an estimated cost of 6 for instruction: %shift
+; XOPAVX: Found an estimated cost of 4 for instruction: %shift
; XOPAVX2: Found an estimated cost of 2 for instruction: %shift
%shift = shl <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
ret <32 x i8> %shift
; AVX2: Found an estimated cost of 4 for instruction: %shift
; AVX512F: Found an estimated cost of 4 for instruction: %shift
; AVX512BW: Found an estimated cost of 2 for instruction: %shift
-; XOPAVX: Found an estimated cost of 12 for instruction: %shift
+; XOPAVX: Found an estimated cost of 8 for instruction: %shift
; XOPAVX2: Found an estimated cost of 4 for instruction: %shift
%shift = shl <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
ret <64 x i8> %shift