[X86][AVX] Fixed v16i16/v32i8 ADD/SUB costs on AVX1 subtargets

author Simon Pilgrim <llvm-dev@redking.me.uk>

Mon, 14 Nov 2016 14:45:16 +0000 (14:45 +0000)

committer Simon Pilgrim <llvm-dev@redking.me.uk>

Mon, 14 Nov 2016 14:45:16 +0000 (14:45 +0000)
author Simon Pilgrim <llvm-dev@redking.me.uk>
Mon, 14 Nov 2016 14:45:16 +0000 (14:45 +0000)
committer Simon Pilgrim <llvm-dev@redking.me.uk>
Mon, 14 Nov 2016 14:45:16 +0000 (14:45 +0000)
diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp

index 7029a02e6826b27d76fa07c37ed4d8034394670f..5b3091eed3c1188fd93b3cd5bf064a448c582e08 100644 (file)
--- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -526,6 +526,10 @@ int X86TTIImpl::getArithmeticInstrCost(
      // Two ops + 1 extract + 1 insert = 4.
      { ISD::MUL,     MVT::v16i16,   4 },
      { ISD::MUL,     MVT::v8i32,    4 },
+    { ISD::SUB,     MVT::v32i8,    4 },
+    { ISD::ADD,     MVT::v32i8,    4 },
+    { ISD::SUB,     MVT::v16i16,   4 },
+    { ISD::ADD,     MVT::v16i16,   4 },
      { ISD::SUB,     MVT::v8i32,    4 },
      { ISD::ADD,     MVT::v8i32,    4 },
      { ISD::SUB,     MVT::v4i64,    4 },
diff --git a/llvm/test/Analysis/CostModel/X86/arith.ll b/llvm/test/Analysis/CostModel/X86/arith.ll

index 2f3f0f9aa736c06d04797567b3c9ef6dbe04bff7..aa204db3042f6a6a06ce9d4493a5e198e55da0ff 100644 (file)
--- a/llvm/test/Analysis/CostModel/X86/arith.ll
+++ b/llvm/test/Analysis/CostModel/X86/arith.ll
@@ -57,13 +57,13 @@ define i32 @add(i32 %arg) {
    %G = add <8 x i16> undef, undef
    ; SSSE3: cost of 2 {{.*}} %H = add
    ; SSE42: cost of 2 {{.*}} %H = add
-  ; AVX: cost of 2 {{.*}} %H = add
+  ; AVX: cost of 4 {{.*}} %H = add
    ; AVX2: cost of 1 {{.*}} %H = add
    ; AVX512: cost of 1 {{.*}} %H = add
    %H = add <16 x i16> undef, undef
    ; SSSE3: cost of 4 {{.*}} %I = add
    ; SSE42: cost of 4 {{.*}} %I = add
-  ; AVX: cost of 4 {{.*}} %I = add
+  ; AVX: cost of 8 {{.*}} %I = add
    ; AVX2: cost of 2 {{.*}} %I = add
    ; AVX512F: cost of 2 {{.*}} %I = add
    ; AVX512BW: cost of 1 {{.*}} %I = add
@@ -77,13 +77,13 @@ define i32 @add(i32 %arg) {
    %J = add <16 x i8> undef, undef
    ; SSSE3: cost of 2 {{.*}} %K = add
    ; SSE42: cost of 2 {{.*}} %K = add
-  ; AVX: cost of 2 {{.*}} %K = add
+  ; AVX: cost of 4 {{.*}} %K = add
    ; AVX2: cost of 1 {{.*}} %K = add
    ; AVX512: cost of 1 {{.*}} %K = add
    %K = add <32 x i8> undef, undef
    ; SSSE3: cost of 4 {{.*}} %L = add
    ; SSE42: cost of 4 {{.*}} %L = add
-  ; AVX: cost of 4 {{.*}} %L = add
+  ; AVX: cost of 8 {{.*}} %L = add
    ; AVX2: cost of 2 {{.*}} %L = add
    ; AVX512F: cost of 2 {{.*}} %L = add
    ; AVX512BW: cost of 1 {{.*}} %L = add
@@ -140,13 +140,13 @@ define i32 @sub(i32 %arg) {
    %G = sub <8 x i16> undef, undef
    ; SSSE3: cost of 2 {{.*}} %H = sub
    ; SSE42: cost of 2 {{.*}} %H = sub
-  ; AVX: cost of 2 {{.*}} %H = sub
+  ; AVX: cost of 4 {{.*}} %H = sub
    ; AVX2: cost of 1 {{.*}} %H = sub
    ; AVX512: cost of 1 {{.*}} %H = sub
    %H = sub <16 x i16> undef, undef
    ; SSSE3: cost of 4 {{.*}} %I = sub
    ; SSE42: cost of 4 {{.*}} %I = sub
-  ; AVX: cost of 4 {{.*}} %I = sub
+  ; AVX: cost of 8 {{.*}} %I = sub
    ; AVX2: cost of 2 {{.*}} %I = sub
    ; AVX512F: cost of 2 {{.*}} %I = sub
    ; AVX512BW: cost of 1 {{.*}} %I = sub
@@ -160,13 +160,13 @@ define i32 @sub(i32 %arg) {
    %J = sub <16 x i8> undef, undef
    ; SSSE3: cost of 2 {{.*}} %K = sub
    ; SSE42: cost of 2 {{.*}} %K = sub
-  ; AVX: cost of 2 {{.*}} %K = sub
+  ; AVX: cost of 4 {{.*}} %K = sub
    ; AVX2: cost of 1 {{.*}} %K = sub
    ; AVX512: cost of 1 {{.*}} %K = sub
    %K = sub <32 x i8> undef, undef
    ; SSSE3: cost of 4 {{.*}} %L = sub
    ; SSE42: cost of 4 {{.*}} %L = sub
-  ; AVX: cost of 4 {{.*}} %L = sub
+  ; AVX: cost of 8 {{.*}} %L = sub
    ; AVX2: cost of 2 {{.*}} %L = sub
    ; AVX512F: cost of 2 {{.*}} %L = sub
    ; AVX512BW: cost of 1 {{.*}} %L = sub
diff --git a/llvm/test/Transforms/LoopVectorize/X86/vector_max_bandwidth.ll b/llvm/test/Transforms/LoopVectorize/X86/vector_max_bandwidth.ll

index fe9d59efc8b37b6a8cb8d15d5aabde5dbc0b01b2..a32cc46e913543b933d6ed8b35878c2f863d13a8 100644 (file)
--- a/llvm/test/Transforms/LoopVectorize/X86/vector_max_bandwidth.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/vector_max_bandwidth.ll
@@ -1,4 +1,5 @@
-; RUN: opt -loop-vectorize -vectorizer-maximize-bandwidth -mcpu=corei7-avx -debug-only=loop-vectorize -S < %s 2>&1 | FileCheck %s
+; RUN: opt -loop-vectorize -vectorizer-maximize-bandwidth -mcpu=corei7-avx -debug-only=loop-vectorize -S < %s 2>&1 | FileCheck %s --check-prefix=CHECK-AVX1
+; RUN: opt -loop-vectorize -vectorizer-maximize-bandwidth -mcpu=core-avx2 -debug-only=loop-vectorize -S < %s 2>&1 | FileCheck %s --check-prefix=CHECK-AVX2
  ; REQUIRES: asserts
  
  target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
@@ -16,7 +17,8 @@ target triple = "x86_64-unknown-linux-gnu"
  ; -vectorizer-maximize-bandwidth is indicated.
  ;
  ; CHECK-label: foo
-; CHECK: LV: Selecting VF: 32.
+; CHECK-AVX1: LV: Selecting VF: 16.
+; CHECK-AVX2: LV: Selecting VF: 32.
  define void @foo() {
  entry:
    br label %for.body
author	Simon Pilgrim <llvm-dev@redking.me.uk>
	Mon, 14 Nov 2016 14:45:16 +0000 (14:45 +0000)
committer	Simon Pilgrim <llvm-dev@redking.me.uk>
	Mon, 14 Nov 2016 14:45:16 +0000 (14:45 +0000)
llvm/lib/Target/X86/X86TargetTransformInfo.cpp		patch \| blob \| history
llvm/test/Analysis/CostModel/X86/arith.ll		patch \| blob \| history
llvm/test/Transforms/LoopVectorize/X86/vector_max_bandwidth.ll		patch \| blob \| history