From cf362ff4cab3a3bef5f32f0445526823998eb72f Mon Sep 17 00:00:00 2001 From: Daniil Fukalov Date: Thu, 30 Sep 2021 18:13:17 +0300 Subject: [PATCH] [NFC][AMDGPU] Improve cost model tests coverage. --- llvm/test/Analysis/CostModel/AMDGPU/add-sub.ll | 97 +++- .../Analysis/CostModel/AMDGPU/addrspacecast.ll | 153 +++++ llvm/test/Analysis/CostModel/AMDGPU/bit-ops.ll | 225 +++++++- .../Analysis/CostModel/AMDGPU/extractelement.ll | 321 ++++++++++- llvm/test/Analysis/CostModel/AMDGPU/fabs.ll | 16 + llvm/test/Analysis/CostModel/AMDGPU/fadd.ll | 22 + llvm/test/Analysis/CostModel/AMDGPU/fdiv.ll | 357 ++++++++++-- llvm/test/Analysis/CostModel/AMDGPU/fma.ll | 33 ++ llvm/test/Analysis/CostModel/AMDGPU/fmul.ll | 22 + llvm/test/Analysis/CostModel/AMDGPU/fneg.ll | 15 + llvm/test/Analysis/CostModel/AMDGPU/fsub.ll | 22 + llvm/test/Analysis/CostModel/AMDGPU/fused_costs.ll | 45 ++ .../Analysis/CostModel/AMDGPU/insertelement.ll | 245 ++++++-- llvm/test/Analysis/CostModel/AMDGPU/logicalop.ll | 39 +- llvm/test/Analysis/CostModel/AMDGPU/mul.ll | 13 + llvm/test/Analysis/CostModel/AMDGPU/shifts.ll | 300 +++++++++- .../Analysis/CostModel/AMDGPU/shufflevector.ll | 614 +++++++++++++++++---- 17 files changed, 2268 insertions(+), 271 deletions(-) diff --git a/llvm/test/Analysis/CostModel/AMDGPU/add-sub.ll b/llvm/test/Analysis/CostModel/AMDGPU/add-sub.ll index 930ebc8..7b70eb2 100644 --- a/llvm/test/Analysis/CostModel/AMDGPU/add-sub.ll +++ b/llvm/test/Analysis/CostModel/AMDGPU/add-sub.ll @@ -12,6 +12,10 @@ define amdgpu_kernel void @add_i32() #0 { ; ALL-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v3i32 = add <3 x i32> undef, undef ; ALL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i32 = add <4 x i32> undef, undef ; ALL-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v5i32 = add <5 x i32> undef, undef +; ALL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v6i32 = add <6 x i32> undef, undef +; ALL-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v7i32 = add <7 x i32> undef, undef +; ALL-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8i32 = add <8 x i32> undef, undef +; ALL-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %v32i32 = add <32 x i32> undef, undef ; ALL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void ; ; ALL-SIZE-LABEL: 'add_i32' @@ -20,6 +24,10 @@ define amdgpu_kernel void @add_i32() #0 { ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v3i32 = add <3 x i32> undef, undef ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i32 = add <4 x i32> undef, undef ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v5i32 = add <5 x i32> undef, undef +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v6i32 = add <6 x i32> undef, undef +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v7i32 = add <7 x i32> undef, undef +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8i32 = add <8 x i32> undef, undef +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %v32i32 = add <32 x i32> undef, undef ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %i32 = add i32 undef, undef @@ -27,6 +35,10 @@ define amdgpu_kernel void @add_i32() #0 { %v3i32 = add <3 x i32> undef, undef %v4i32 = add <4 x i32> undef, undef %v5i32 = add <5 x i32> undef, undef + %v6i32 = add <6 x i32> undef, undef + %v7i32 = add <7 x i32> undef, undef + %v8i32 = add <8 x i32> undef, undef + %v32i32 = add <32 x i32> undef, undef ret void } @@ -36,6 +48,10 @@ define amdgpu_kernel void @add_i64() #0 { ; ALL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2i64 = add <2 x i64> undef, undef ; ALL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v3i64 = add <3 x i64> undef, undef ; ALL-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4i64 = add <4 x i64> undef, undef +; ALL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v5i64 = add <5 x i64> undef, undef +; ALL-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v6i64 = add <6 x i64> undef, undef +; ALL-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v7i64 = add <7 x i64> undef, undef +; ALL-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v8i64 = add <8 x i64> undef, undef ; ALL-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %v16i64 = add <16 x i64> undef, undef ; ALL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void ; @@ -44,6 +60,10 @@ define amdgpu_kernel void @add_i64() #0 { ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2i64 = add <2 x i64> undef, undef ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v3i64 = add <3 x i64> undef, undef ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4i64 = add <4 x i64> undef, undef +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v5i64 = add <5 x i64> undef, undef +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v6i64 = add <6 x i64> undef, undef +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v7i64 = add <7 x i64> undef, undef +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v8i64 = add <8 x i64> undef, undef ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %v16i64 = add <16 x i64> undef, undef ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; @@ -51,6 +71,10 @@ define amdgpu_kernel void @add_i64() #0 { %v2i64 = add <2 x i64> undef, undef %v3i64 = add <3 x i64> undef, undef %v4i64 = add <4 x i64> undef, undef + %v5i64 = add <5 x i64> undef, undef + %v6i64 = add <6 x i64> undef, undef + %v7i64 = add <7 x i64> undef, undef + %v8i64 = add <8 x i64> undef, undef %v16i64 = add <16 x i64> undef, undef ret void } @@ -59,61 +83,124 @@ define amdgpu_kernel void @add_i16() #0 { ; FAST16-LABEL: 'add_i16' ; FAST16-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i16 = add i16 undef, undef ; FAST16-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i16 = add <2 x i16> undef, undef +; FAST16-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v3i16 = add <3 x i16> undef, undef +; FAST16-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i16 = add <4 x i16> undef, undef +; FAST16-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v5i16 = add <5 x i16> undef, undef +; FAST16-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v6i16 = add <6 x i16> undef, undef ; FAST16-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void ; ; SLOW16-LABEL: 'add_i16' ; SLOW16-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i16 = add i16 undef, undef ; SLOW16-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i16 = add <2 x i16> undef, undef +; SLOW16-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v3i16 = add <3 x i16> undef, undef +; SLOW16-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i16 = add <4 x i16> undef, undef +; SLOW16-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v5i16 = add <5 x i16> undef, undef +; SLOW16-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v6i16 = add <6 x i16> undef, undef ; SLOW16-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void ; ; FAST16-SIZE-LABEL: 'add_i16' ; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i16 = add i16 undef, undef ; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i16 = add <2 x i16> undef, undef +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v3i16 = add <3 x i16> undef, undef +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i16 = add <4 x i16> undef, undef +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v5i16 = add <5 x i16> undef, undef +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v6i16 = add <6 x i16> undef, undef ; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SLOW16-SIZE-LABEL: 'add_i16' ; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i16 = add i16 undef, undef ; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i16 = add <2 x i16> undef, undef +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v3i16 = add <3 x i16> undef, undef +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i16 = add <4 x i16> undef, undef +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v5i16 = add <5 x i16> undef, undef +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v6i16 = add <6 x i16> undef, undef ; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %i16 = add i16 undef, undef %v2i16 = add <2 x i16> undef, undef + %v3i16 = add <3 x i16> undef, undef + %v4i16 = add <4 x i16> undef, undef + %v5i16 = add <5 x i16> undef, undef + %v6i16 = add <6 x i16> undef, undef + ret void +} + +define amdgpu_kernel void @add_i8() #0 { +; ALL-LABEL: 'add_i8' +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i8 = add i8 undef, undef +; ALL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i8 = add <2 x i8> undef, undef +; ALL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v3i8 = add <3 x i8> undef, undef +; ALL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i8 = add <4 x i8> undef, undef +; ALL-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v5i8 = add <5 x i8> undef, undef +; ALL-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v6i8 = add <6 x i8> undef, undef +; ALL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void +; +; ALL-SIZE-LABEL: 'add_i8' +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i8 = add i8 undef, undef +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i8 = add <2 x i8> undef, undef +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v3i8 = add <3 x i8> undef, undef +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i8 = add <4 x i8> undef, undef +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v5i8 = add <5 x i8> undef, undef +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v6i8 = add <6 x i8> undef, undef +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %i8 = add i8 undef, undef + %v2i8 = add <2 x i8> undef, undef + %v3i8 = add <3 x i8> undef, undef + %v4i8 = add <4 x i8> undef, undef + %v5i8 = add <5 x i8> undef, undef + %v6i8 = add <6 x i8> undef, undef ret void } define amdgpu_kernel void @sub() #0 { ; FAST16-LABEL: 'sub' +; FAST16-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i8 = sub i16 undef, undef +; FAST16-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i16 = sub i16 undef, undef ; FAST16-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i32 = sub i32 undef, undef ; FAST16-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %i64 = sub i64 undef, undef -; FAST16-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i16 = sub i16 undef, undef ; FAST16-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i16 = sub <2 x i16> undef, undef +; FAST16-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v3i16 = sub <3 x i16> undef, undef +; FAST16-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i16 = sub <4 x i16> undef, undef ; FAST16-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void ; ; SLOW16-LABEL: 'sub' +; SLOW16-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i8 = sub i16 undef, undef +; SLOW16-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i16 = sub i16 undef, undef ; SLOW16-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i32 = sub i32 undef, undef ; SLOW16-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %i64 = sub i64 undef, undef -; SLOW16-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i16 = sub i16 undef, undef ; SLOW16-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i16 = sub <2 x i16> undef, undef +; SLOW16-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v3i16 = sub <3 x i16> undef, undef +; SLOW16-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i16 = sub <4 x i16> undef, undef ; SLOW16-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void ; ; FAST16-SIZE-LABEL: 'sub' +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i8 = sub i16 undef, undef +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i16 = sub i16 undef, undef ; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i32 = sub i32 undef, undef ; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %i64 = sub i64 undef, undef -; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i16 = sub i16 undef, undef ; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i16 = sub <2 x i16> undef, undef +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v3i16 = sub <3 x i16> undef, undef +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i16 = sub <4 x i16> undef, undef ; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SLOW16-SIZE-LABEL: 'sub' +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i8 = sub i16 undef, undef +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i16 = sub i16 undef, undef ; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i32 = sub i32 undef, undef ; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %i64 = sub i64 undef, undef -; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i16 = sub i16 undef, undef ; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i16 = sub <2 x i16> undef, undef +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v3i16 = sub <3 x i16> undef, undef +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i16 = sub <4 x i16> undef, undef ; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; + %i8 = sub i16 undef, undef + %i16 = sub i16 undef, undef %i32 = sub i32 undef, undef %i64 = sub i64 undef, undef - %i16 = sub i16 undef, undef %v2i16 = sub <2 x i16> undef, undef + %v3i16 = sub <3 x i16> undef, undef + %v4i16 = sub <4 x i16> undef, undef ret void } diff --git a/llvm/test/Analysis/CostModel/AMDGPU/addrspacecast.ll b/llvm/test/Analysis/CostModel/AMDGPU/addrspacecast.ll index 8d558e4..95b996b 100644 --- a/llvm/test/Analysis/CostModel/AMDGPU/addrspacecast.ll +++ b/llvm/test/Analysis/CostModel/AMDGPU/addrspacecast.ll @@ -7,18 +7,69 @@ define void @addrspacecast_global_to_flat() #0 { ; ALL-LABEL: 'addrspacecast_global_to_flat' ; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %i8ptr = addrspacecast i8 addrspace(1)* undef to i8* ; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2i8ptr = addrspacecast <2 x i8 addrspace(1)*> undef to <2 x i8*> +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v3i8ptr = addrspacecast <3 x i8 addrspace(1)*> undef to <3 x i8*> +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4i8ptr = addrspacecast <4 x i8 addrspace(1)*> undef to <4 x i8*> ; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v32i8ptr = addrspacecast <32 x i8 addrspace(1)*> undef to <32 x i8*> +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %i16ptr = addrspacecast i16 addrspace(1)* undef to i16* +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2i16ptr = addrspacecast <2 x i16 addrspace(1)*> undef to <2 x i16*> +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v3i16ptr = addrspacecast <3 x i16 addrspace(1)*> undef to <3 x i16*> +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4i16ptr = addrspacecast <4 x i16 addrspace(1)*> undef to <4 x i16*> +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v32i16ptr = addrspacecast <32 x i16 addrspace(1)*> undef to <32 x i16*> +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %i32ptr = addrspacecast i32 addrspace(1)* undef to i32* +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2i32ptr = addrspacecast <2 x i32 addrspace(1)*> undef to <2 x i32*> +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v3i32ptr = addrspacecast <3 x i32 addrspace(1)*> undef to <3 x i32*> +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4i32ptr = addrspacecast <4 x i32 addrspace(1)*> undef to <4 x i32*> +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v32i32ptr = addrspacecast <32 x i32 addrspace(1)*> undef to <32 x i32*> +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %i64ptr = addrspacecast i64 addrspace(1)* undef to i64* +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2i64ptr = addrspacecast <2 x i64 addrspace(1)*> undef to <2 x i64*> +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v3i64ptr = addrspacecast <3 x i64 addrspace(1)*> undef to <3 x i64*> +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4i64ptr = addrspacecast <4 x i64 addrspace(1)*> undef to <4 x i64*> +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v32i64ptr = addrspacecast <32 x i64 addrspace(1)*> undef to <32 x i64*> ; ALL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void ; ; ALL-SIZE-LABEL: 'addrspacecast_global_to_flat' ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %i8ptr = addrspacecast i8 addrspace(1)* undef to i8* ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2i8ptr = addrspacecast <2 x i8 addrspace(1)*> undef to <2 x i8*> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v3i8ptr = addrspacecast <3 x i8 addrspace(1)*> undef to <3 x i8*> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4i8ptr = addrspacecast <4 x i8 addrspace(1)*> undef to <4 x i8*> ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v32i8ptr = addrspacecast <32 x i8 addrspace(1)*> undef to <32 x i8*> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %i16ptr = addrspacecast i16 addrspace(1)* undef to i16* +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2i16ptr = addrspacecast <2 x i16 addrspace(1)*> undef to <2 x i16*> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v3i16ptr = addrspacecast <3 x i16 addrspace(1)*> undef to <3 x i16*> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4i16ptr = addrspacecast <4 x i16 addrspace(1)*> undef to <4 x i16*> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v32i16ptr = addrspacecast <32 x i16 addrspace(1)*> undef to <32 x i16*> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %i32ptr = addrspacecast i32 addrspace(1)* undef to i32* +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2i32ptr = addrspacecast <2 x i32 addrspace(1)*> undef to <2 x i32*> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v3i32ptr = addrspacecast <3 x i32 addrspace(1)*> undef to <3 x i32*> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4i32ptr = addrspacecast <4 x i32 addrspace(1)*> undef to <4 x i32*> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v32i32ptr = addrspacecast <32 x i32 addrspace(1)*> undef to <32 x i32*> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %i64ptr = addrspacecast i64 addrspace(1)* undef to i64* +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2i64ptr = addrspacecast <2 x i64 addrspace(1)*> undef to <2 x i64*> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v3i64ptr = addrspacecast <3 x i64 addrspace(1)*> undef to <3 x i64*> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4i64ptr = addrspacecast <4 x i64 addrspace(1)*> undef to <4 x i64*> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v32i64ptr = addrspacecast <32 x i64 addrspace(1)*> undef to <32 x i64*> ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %i8ptr = addrspacecast i8 addrspace(1)* undef to i8* %v2i8ptr = addrspacecast <2 x i8 addrspace(1)*> undef to <2 x i8*> + %v3i8ptr = addrspacecast <3 x i8 addrspace(1)*> undef to <3 x i8*> + %v4i8ptr = addrspacecast <4 x i8 addrspace(1)*> undef to <4 x i8*> %v32i8ptr = addrspacecast <32 x i8 addrspace(1)*> undef to <32 x i8*> + %i16ptr = addrspacecast i16 addrspace(1)* undef to i16* + %v2i16ptr = addrspacecast <2 x i16 addrspace(1)*> undef to <2 x i16*> + %v3i16ptr = addrspacecast <3 x i16 addrspace(1)*> undef to <3 x i16*> + %v4i16ptr = addrspacecast <4 x i16 addrspace(1)*> undef to <4 x i16*> + %v32i16ptr = addrspacecast <32 x i16 addrspace(1)*> undef to <32 x i16*> + %i32ptr = addrspacecast i32 addrspace(1)* undef to i32* + %v2i32ptr = addrspacecast <2 x i32 addrspace(1)*> undef to <2 x i32*> + %v3i32ptr = addrspacecast <3 x i32 addrspace(1)*> undef to <3 x i32*> + %v4i32ptr = addrspacecast <4 x i32 addrspace(1)*> undef to <4 x i32*> + %v32i32ptr = addrspacecast <32 x i32 addrspace(1)*> undef to <32 x i32*> + %i64ptr = addrspacecast i64 addrspace(1)* undef to i64* + %v2i64ptr = addrspacecast <2 x i64 addrspace(1)*> undef to <2 x i64*> + %v3i64ptr = addrspacecast <3 x i64 addrspace(1)*> undef to <3 x i64*> + %v4i64ptr = addrspacecast <4 x i64 addrspace(1)*> undef to <4 x i64*> + %v32i64ptr = addrspacecast <32 x i64 addrspace(1)*> undef to <32 x i64*> ret void } @@ -26,18 +77,69 @@ define void @addrspacecast_local_to_flat() #0 { ; ALL-LABEL: 'addrspacecast_local_to_flat' ; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i8ptr = addrspacecast i8 addrspace(3)* undef to i8* ; ALL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i8ptr = addrspacecast <2 x i8 addrspace(3)*> undef to <2 x i8*> +; ALL-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v3i8ptr = addrspacecast <3 x i8 addrspace(3)*> undef to <3 x i8*> +; ALL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i8ptr = addrspacecast <4 x i8 addrspace(3)*> undef to <4 x i8*> ; ALL-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v32i8ptr = addrspacecast <32 x i8 addrspace(3)*> undef to <32 x i8*> +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i16ptr = addrspacecast i16 addrspace(3)* undef to i16* +; ALL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i16ptr = addrspacecast <2 x i16 addrspace(3)*> undef to <2 x i16*> +; ALL-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v3i16ptr = addrspacecast <3 x i16 addrspace(3)*> undef to <3 x i16*> +; ALL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i16ptr = addrspacecast <4 x i16 addrspace(3)*> undef to <4 x i16*> +; ALL-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v32i16ptr = addrspacecast <32 x i16 addrspace(3)*> undef to <32 x i16*> +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i32ptr = addrspacecast i32 addrspace(3)* undef to i32* +; ALL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i32ptr = addrspacecast <2 x i32 addrspace(3)*> undef to <2 x i32*> +; ALL-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v3i32ptr = addrspacecast <3 x i32 addrspace(3)*> undef to <3 x i32*> +; ALL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i32ptr = addrspacecast <4 x i32 addrspace(3)*> undef to <4 x i32*> +; ALL-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v32i32ptr = addrspacecast <32 x i32 addrspace(3)*> undef to <32 x i32*> +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i64ptr = addrspacecast i64 addrspace(3)* undef to i64* +; ALL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i64ptr = addrspacecast <2 x i64 addrspace(3)*> undef to <2 x i64*> +; ALL-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v3i64ptr = addrspacecast <3 x i64 addrspace(3)*> undef to <3 x i64*> +; ALL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i64ptr = addrspacecast <4 x i64 addrspace(3)*> undef to <4 x i64*> +; ALL-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v32i64ptr = addrspacecast <32 x i64 addrspace(3)*> undef to <32 x i64*> ; ALL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void ; ; ALL-SIZE-LABEL: 'addrspacecast_local_to_flat' ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i8ptr = addrspacecast i8 addrspace(3)* undef to i8* ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i8ptr = addrspacecast <2 x i8 addrspace(3)*> undef to <2 x i8*> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v3i8ptr = addrspacecast <3 x i8 addrspace(3)*> undef to <3 x i8*> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i8ptr = addrspacecast <4 x i8 addrspace(3)*> undef to <4 x i8*> ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v32i8ptr = addrspacecast <32 x i8 addrspace(3)*> undef to <32 x i8*> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i16ptr = addrspacecast i16 addrspace(3)* undef to i16* +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i16ptr = addrspacecast <2 x i16 addrspace(3)*> undef to <2 x i16*> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v3i16ptr = addrspacecast <3 x i16 addrspace(3)*> undef to <3 x i16*> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i16ptr = addrspacecast <4 x i16 addrspace(3)*> undef to <4 x i16*> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v32i16ptr = addrspacecast <32 x i16 addrspace(3)*> undef to <32 x i16*> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i32ptr = addrspacecast i32 addrspace(3)* undef to i32* +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i32ptr = addrspacecast <2 x i32 addrspace(3)*> undef to <2 x i32*> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v3i32ptr = addrspacecast <3 x i32 addrspace(3)*> undef to <3 x i32*> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i32ptr = addrspacecast <4 x i32 addrspace(3)*> undef to <4 x i32*> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v32i32ptr = addrspacecast <32 x i32 addrspace(3)*> undef to <32 x i32*> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i64ptr = addrspacecast i64 addrspace(3)* undef to i64* +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i64ptr = addrspacecast <2 x i64 addrspace(3)*> undef to <2 x i64*> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v3i64ptr = addrspacecast <3 x i64 addrspace(3)*> undef to <3 x i64*> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i64ptr = addrspacecast <4 x i64 addrspace(3)*> undef to <4 x i64*> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v32i64ptr = addrspacecast <32 x i64 addrspace(3)*> undef to <32 x i64*> ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %i8ptr = addrspacecast i8 addrspace(3)* undef to i8* %v2i8ptr = addrspacecast <2 x i8 addrspace(3)*> undef to <2 x i8*> + %v3i8ptr = addrspacecast <3 x i8 addrspace(3)*> undef to <3 x i8*> + %v4i8ptr = addrspacecast <4 x i8 addrspace(3)*> undef to <4 x i8*> %v32i8ptr = addrspacecast <32 x i8 addrspace(3)*> undef to <32 x i8*> + %i16ptr = addrspacecast i16 addrspace(3)* undef to i16* + %v2i16ptr = addrspacecast <2 x i16 addrspace(3)*> undef to <2 x i16*> + %v3i16ptr = addrspacecast <3 x i16 addrspace(3)*> undef to <3 x i16*> + %v4i16ptr = addrspacecast <4 x i16 addrspace(3)*> undef to <4 x i16*> + %v32i16ptr = addrspacecast <32 x i16 addrspace(3)*> undef to <32 x i16*> + %i32ptr = addrspacecast i32 addrspace(3)* undef to i32* + %v2i32ptr = addrspacecast <2 x i32 addrspace(3)*> undef to <2 x i32*> + %v3i32ptr = addrspacecast <3 x i32 addrspace(3)*> undef to <3 x i32*> + %v4i32ptr = addrspacecast <4 x i32 addrspace(3)*> undef to <4 x i32*> + %v32i32ptr = addrspacecast <32 x i32 addrspace(3)*> undef to <32 x i32*> + %i64ptr = addrspacecast i64 addrspace(3)* undef to i64* + %v2i64ptr = addrspacecast <2 x i64 addrspace(3)*> undef to <2 x i64*> + %v3i64ptr = addrspacecast <3 x i64 addrspace(3)*> undef to <3 x i64*> + %v4i64ptr = addrspacecast <4 x i64 addrspace(3)*> undef to <4 x i64*> + %v32i64ptr = addrspacecast <32 x i64 addrspace(3)*> undef to <32 x i64*> ret void } @@ -45,18 +147,69 @@ define void @addrspacecast_flat_to_local() #0 { ; ALL-LABEL: 'addrspacecast_flat_to_local' ; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %i8ptr = addrspacecast i8* undef to i8 addrspace(3)* ; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2i8ptr = addrspacecast <2 x i8*> undef to <2 x i8 addrspace(3)*> +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v3i8ptr = addrspacecast <3 x i8*> undef to <3 x i8 addrspace(3)*> +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4i8ptr = addrspacecast <4 x i8*> undef to <4 x i8 addrspace(3)*> ; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v32i8ptr = addrspacecast <32 x i8*> undef to <32 x i8 addrspace(3)*> +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %i16ptr = addrspacecast i16* undef to i16 addrspace(3)* +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2i16ptr = addrspacecast <2 x i16*> undef to <2 x i16 addrspace(3)*> +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v3i16ptr = addrspacecast <3 x i16*> undef to <3 x i16 addrspace(3)*> +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4i16ptr = addrspacecast <4 x i16*> undef to <4 x i16 addrspace(3)*> +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v32i16ptr = addrspacecast <32 x i16*> undef to <32 x i16 addrspace(3)*> +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %i32ptr = addrspacecast i32* undef to i32 addrspace(3)* +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2i32ptr = addrspacecast <2 x i32*> undef to <2 x i32 addrspace(3)*> +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v3i32ptr = addrspacecast <3 x i32*> undef to <3 x i32 addrspace(3)*> +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4i32ptr = addrspacecast <4 x i32*> undef to <4 x i32 addrspace(3)*> +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v32i32ptr = addrspacecast <32 x i32*> undef to <32 x i32 addrspace(3)*> +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %i64ptr = addrspacecast i64* undef to i64 addrspace(3)* +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2i64ptr = addrspacecast <2 x i64*> undef to <2 x i64 addrspace(3)*> +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v3i64ptr = addrspacecast <3 x i64*> undef to <3 x i64 addrspace(3)*> +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4i64ptr = addrspacecast <4 x i64*> undef to <4 x i64 addrspace(3)*> +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v32i64ptr = addrspacecast <32 x i64*> undef to <32 x i64 addrspace(3)*> ; ALL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void ; ; ALL-SIZE-LABEL: 'addrspacecast_flat_to_local' ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %i8ptr = addrspacecast i8* undef to i8 addrspace(3)* ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2i8ptr = addrspacecast <2 x i8*> undef to <2 x i8 addrspace(3)*> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v3i8ptr = addrspacecast <3 x i8*> undef to <3 x i8 addrspace(3)*> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4i8ptr = addrspacecast <4 x i8*> undef to <4 x i8 addrspace(3)*> ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v32i8ptr = addrspacecast <32 x i8*> undef to <32 x i8 addrspace(3)*> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %i16ptr = addrspacecast i16* undef to i16 addrspace(3)* +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2i16ptr = addrspacecast <2 x i16*> undef to <2 x i16 addrspace(3)*> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v3i16ptr = addrspacecast <3 x i16*> undef to <3 x i16 addrspace(3)*> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4i16ptr = addrspacecast <4 x i16*> undef to <4 x i16 addrspace(3)*> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v32i16ptr = addrspacecast <32 x i16*> undef to <32 x i16 addrspace(3)*> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %i32ptr = addrspacecast i32* undef to i32 addrspace(3)* +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2i32ptr = addrspacecast <2 x i32*> undef to <2 x i32 addrspace(3)*> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v3i32ptr = addrspacecast <3 x i32*> undef to <3 x i32 addrspace(3)*> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4i32ptr = addrspacecast <4 x i32*> undef to <4 x i32 addrspace(3)*> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v32i32ptr = addrspacecast <32 x i32*> undef to <32 x i32 addrspace(3)*> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %i64ptr = addrspacecast i64* undef to i64 addrspace(3)* +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2i64ptr = addrspacecast <2 x i64*> undef to <2 x i64 addrspace(3)*> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v3i64ptr = addrspacecast <3 x i64*> undef to <3 x i64 addrspace(3)*> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4i64ptr = addrspacecast <4 x i64*> undef to <4 x i64 addrspace(3)*> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v32i64ptr = addrspacecast <32 x i64*> undef to <32 x i64 addrspace(3)*> ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %i8ptr = addrspacecast i8* undef to i8 addrspace(3)* %v2i8ptr = addrspacecast <2 x i8*> undef to <2 x i8 addrspace(3)*> + %v3i8ptr = addrspacecast <3 x i8*> undef to <3 x i8 addrspace(3)*> + %v4i8ptr = addrspacecast <4 x i8*> undef to <4 x i8 addrspace(3)*> %v32i8ptr = addrspacecast <32 x i8*> undef to <32 x i8 addrspace(3)*> + %i16ptr = addrspacecast i16* undef to i16 addrspace(3)* + %v2i16ptr = addrspacecast <2 x i16*> undef to <2 x i16 addrspace(3)*> + %v3i16ptr = addrspacecast <3 x i16*> undef to <3 x i16 addrspace(3)*> + %v4i16ptr = addrspacecast <4 x i16*> undef to <4 x i16 addrspace(3)*> + %v32i16ptr = addrspacecast <32 x i16*> undef to <32 x i16 addrspace(3)*> + %i32ptr = addrspacecast i32* undef to i32 addrspace(3)* + %v2i32ptr = addrspacecast <2 x i32*> undef to <2 x i32 addrspace(3)*> + %v3i32ptr = addrspacecast <3 x i32*> undef to <3 x i32 addrspace(3)*> + %v4i32ptr = addrspacecast <4 x i32*> undef to <4 x i32 addrspace(3)*> + %v32i32ptr = addrspacecast <32 x i32*> undef to <32 x i32 addrspace(3)*> + %i64ptr = addrspacecast i64* undef to i64 addrspace(3)* + %v2i64ptr = addrspacecast <2 x i64*> undef to <2 x i64 addrspace(3)*> + %v3i64ptr = addrspacecast <3 x i64*> undef to <3 x i64 addrspace(3)*> + %v4i64ptr = addrspacecast <4 x i64*> undef to <4 x i64 addrspace(3)*> + %v32i64ptr = addrspacecast <32 x i64*> undef to <32 x i64 addrspace(3)*> ret void } diff --git a/llvm/test/Analysis/CostModel/AMDGPU/bit-ops.ll b/llvm/test/Analysis/CostModel/AMDGPU/bit-ops.ll index 042b43b..e57e6d0 100644 --- a/llvm/test/Analysis/CostModel/AMDGPU/bit-ops.ll +++ b/llvm/test/Analysis/CostModel/AMDGPU/bit-ops.ll @@ -7,94 +7,289 @@ define amdgpu_kernel void @or() #0 { ; SLOW16-LABEL: 'or' +; SLOW16-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i8 = or i8 undef, undef +; SLOW16-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i8 = or <2 x i8> undef, undef +; SLOW16-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v3i8 = or <3 x i8> undef, undef +; SLOW16-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i8 = or <4 x i8> undef, undef +; SLOW16-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i16 = or i16 undef, undef +; SLOW16-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i16 = or <2 x i16> undef, undef +; SLOW16-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v3i16 = or <3 x i16> undef, undef +; SLOW16-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i16 = or <4 x i16> undef, undef ; SLOW16-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i32 = or i32 undef, undef +; SLOW16-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i32 = or <2 x i32> undef, undef +; SLOW16-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v3i32 = or <3 x i32> undef, undef +; SLOW16-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i32 = or <4 x i32> undef, undef ; SLOW16-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %i64 = or i64 undef, undef -; SLOW16-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i16 = or <2 x i16> undef, undef +; SLOW16-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2i64 = or <2 x i64> undef, undef +; SLOW16-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v3i64 = or <3 x i64> undef, undef +; SLOW16-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4i64 = or <4 x i64> undef, undef ; SLOW16-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void ; ; FAST16-LABEL: 'or' +; FAST16-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i8 = or i8 undef, undef +; FAST16-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i8 = or <2 x i8> undef, undef +; FAST16-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v3i8 = or <3 x i8> undef, undef +; FAST16-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i8 = or <4 x i8> undef, undef +; FAST16-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i16 = or i16 undef, undef +; FAST16-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i16 = or <2 x i16> undef, undef +; FAST16-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v3i16 = or <3 x i16> undef, undef +; FAST16-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i16 = or <4 x i16> undef, undef ; FAST16-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i32 = or i32 undef, undef +; FAST16-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i32 = or <2 x i32> undef, undef +; FAST16-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v3i32 = or <3 x i32> undef, undef +; FAST16-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i32 = or <4 x i32> undef, undef ; FAST16-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %i64 = or i64 undef, undef -; FAST16-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i16 = or <2 x i16> undef, undef +; FAST16-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2i64 = or <2 x i64> undef, undef +; FAST16-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v3i64 = or <3 x i64> undef, undef +; FAST16-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4i64 = or <4 x i64> undef, undef ; FAST16-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void ; ; SLOW16-SIZE-LABEL: 'or' +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i8 = or i8 undef, undef +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i8 = or <2 x i8> undef, undef +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v3i8 = or <3 x i8> undef, undef +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i8 = or <4 x i8> undef, undef +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i16 = or i16 undef, undef +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i16 = or <2 x i16> undef, undef +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v3i16 = or <3 x i16> undef, undef +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i16 = or <4 x i16> undef, undef ; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i32 = or i32 undef, undef +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i32 = or <2 x i32> undef, undef +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v3i32 = or <3 x i32> undef, undef +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i32 = or <4 x i32> undef, undef ; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %i64 = or i64 undef, undef -; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i16 = or <2 x i16> undef, undef +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2i64 = or <2 x i64> undef, undef +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v3i64 = or <3 x i64> undef, undef +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4i64 = or <4 x i64> undef, undef ; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; FAST16-SIZE-LABEL: 'or' +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i8 = or i8 undef, undef +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i8 = or <2 x i8> undef, undef +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v3i8 = or <3 x i8> undef, undef +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i8 = or <4 x i8> undef, undef +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i16 = or i16 undef, undef +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i16 = or <2 x i16> undef, undef +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v3i16 = or <3 x i16> undef, undef +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i16 = or <4 x i16> undef, undef ; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i32 = or i32 undef, undef +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i32 = or <2 x i32> undef, undef +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v3i32 = or <3 x i32> undef, undef +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i32 = or <4 x i32> undef, undef ; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %i64 = or i64 undef, undef -; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i16 = or <2 x i16> undef, undef +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2i64 = or <2 x i64> undef, undef +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v3i64 = or <3 x i64> undef, undef +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4i64 = or <4 x i64> undef, undef ; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; + %i8 = or i8 undef, undef + %v2i8 = or <2 x i8> undef, undef + %v3i8 = or <3 x i8> undef, undef + %v4i8 = or <4 x i8> undef, undef + %i16 = or i16 undef, undef + %v2i16 = or <2 x i16> undef, undef + %v3i16 = or <3 x i16> undef, undef + %v4i16 = or <4 x i16> undef, undef %i32 = or i32 undef, undef + %v2i32 = or <2 x i32> undef, undef + %v3i32 = or <3 x i32> undef, undef + %v4i32 = or <4 x i32> undef, undef %i64 = or i64 undef, undef - %v2i16 = or <2 x i16> undef, undef + %v2i64 = or <2 x i64> undef, undef + %v3i64 = or <3 x i64> undef, undef + %v4i64 = or <4 x i64> undef, undef ret void } define amdgpu_kernel void @xor() #0 { ; SLOW16-LABEL: 'xor' +; SLOW16-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i8 = xor i8 undef, undef +; SLOW16-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i8 = xor <2 x i8> undef, undef +; SLOW16-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v3i8 = xor <3 x i8> undef, undef +; SLOW16-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i8 = xor <4 x i8> undef, undef +; SLOW16-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i16 = xor i16 undef, undef +; SLOW16-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i16 = xor <2 x i16> undef, undef +; SLOW16-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v3i16 = xor <3 x i16> undef, undef +; SLOW16-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i16 = xor <4 x i16> undef, undef ; SLOW16-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i32 = xor i32 undef, undef +; SLOW16-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i32 = xor <2 x i32> undef, undef +; SLOW16-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v3i32 = xor <3 x i32> undef, undef +; SLOW16-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i32 = xor <4 x i32> undef, undef ; SLOW16-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %i64 = xor i64 undef, undef -; SLOW16-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i16 = xor <2 x i16> undef, undef +; SLOW16-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2i64 = xor <2 x i64> undef, undef +; SLOW16-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v3i64 = xor <3 x i64> undef, undef +; SLOW16-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4i64 = xor <4 x i64> undef, undef ; SLOW16-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void ; ; FAST16-LABEL: 'xor' +; FAST16-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i8 = xor i8 undef, undef +; FAST16-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i8 = xor <2 x i8> undef, undef +; FAST16-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v3i8 = xor <3 x i8> undef, undef +; FAST16-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i8 = xor <4 x i8> undef, undef +; FAST16-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i16 = xor i16 undef, undef +; FAST16-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i16 = xor <2 x i16> undef, undef +; FAST16-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v3i16 = xor <3 x i16> undef, undef +; FAST16-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i16 = xor <4 x i16> undef, undef ; FAST16-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i32 = xor i32 undef, undef +; FAST16-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i32 = xor <2 x i32> undef, undef +; FAST16-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v3i32 = xor <3 x i32> undef, undef +; FAST16-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i32 = xor <4 x i32> undef, undef ; FAST16-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %i64 = xor i64 undef, undef -; FAST16-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i16 = xor <2 x i16> undef, undef +; FAST16-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2i64 = xor <2 x i64> undef, undef +; FAST16-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v3i64 = xor <3 x i64> undef, undef +; FAST16-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4i64 = xor <4 x i64> undef, undef ; FAST16-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void ; ; SLOW16-SIZE-LABEL: 'xor' +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i8 = xor i8 undef, undef +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i8 = xor <2 x i8> undef, undef +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v3i8 = xor <3 x i8> undef, undef +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i8 = xor <4 x i8> undef, undef +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i16 = xor i16 undef, undef +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i16 = xor <2 x i16> undef, undef +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v3i16 = xor <3 x i16> undef, undef +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i16 = xor <4 x i16> undef, undef ; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i32 = xor i32 undef, undef +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i32 = xor <2 x i32> undef, undef +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v3i32 = xor <3 x i32> undef, undef +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i32 = xor <4 x i32> undef, undef ; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %i64 = xor i64 undef, undef -; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i16 = xor <2 x i16> undef, undef +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2i64 = xor <2 x i64> undef, undef +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v3i64 = xor <3 x i64> undef, undef +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4i64 = xor <4 x i64> undef, undef ; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; FAST16-SIZE-LABEL: 'xor' +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i8 = xor i8 undef, undef +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i8 = xor <2 x i8> undef, undef +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v3i8 = xor <3 x i8> undef, undef +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i8 = xor <4 x i8> undef, undef +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i16 = xor i16 undef, undef +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i16 = xor <2 x i16> undef, undef +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v3i16 = xor <3 x i16> undef, undef +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i16 = xor <4 x i16> undef, undef ; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i32 = xor i32 undef, undef +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i32 = xor <2 x i32> undef, undef +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v3i32 = xor <3 x i32> undef, undef +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i32 = xor <4 x i32> undef, undef ; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %i64 = xor i64 undef, undef -; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i16 = xor <2 x i16> undef, undef +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2i64 = xor <2 x i64> undef, undef +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v3i64 = xor <3 x i64> undef, undef +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4i64 = xor <4 x i64> undef, undef ; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; + %i8 = xor i8 undef, undef + %v2i8 = xor <2 x i8> undef, undef + %v3i8 = xor <3 x i8> undef, undef + %v4i8 = xor <4 x i8> undef, undef + %i16 = xor i16 undef, undef + %v2i16 = xor <2 x i16> undef, undef + %v3i16 = xor <3 x i16> undef, undef + %v4i16 = xor <4 x i16> undef, undef %i32 = xor i32 undef, undef + %v2i32 = xor <2 x i32> undef, undef + %v3i32 = xor <3 x i32> undef, undef + %v4i32 = xor <4 x i32> undef, undef %i64 = xor i64 undef, undef - %v2i16 = xor <2 x i16> undef, undef + %v2i64 = xor <2 x i64> undef, undef + %v3i64 = xor <3 x i64> undef, undef + %v4i64 = xor <4 x i64> undef, undef ret void } define amdgpu_kernel void @and() #0 { ; SLOW16-LABEL: 'and' +; SLOW16-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i8 = and i8 undef, undef +; SLOW16-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i8 = and <2 x i8> undef, undef +; SLOW16-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v3i8 = and <3 x i8> undef, undef +; SLOW16-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i8 = and <4 x i8> undef, undef +; SLOW16-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i16 = and i16 undef, undef +; SLOW16-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i16 = and <2 x i16> undef, undef +; SLOW16-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v3i16 = and <3 x i16> undef, undef +; SLOW16-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i16 = and <4 x i16> undef, undef ; SLOW16-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i32 = and i32 undef, undef +; SLOW16-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i32 = and <2 x i32> undef, undef +; SLOW16-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v3i32 = and <3 x i32> undef, undef +; SLOW16-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i32 = and <4 x i32> undef, undef ; SLOW16-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %i64 = and i64 undef, undef -; SLOW16-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i16 = and <2 x i16> undef, undef +; SLOW16-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2i64 = and <2 x i64> undef, undef +; SLOW16-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v3i64 = and <3 x i64> undef, undef +; SLOW16-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4i64 = and <4 x i64> undef, undef ; SLOW16-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void ; ; FAST16-LABEL: 'and' +; FAST16-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i8 = and i8 undef, undef +; FAST16-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i8 = and <2 x i8> undef, undef +; FAST16-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v3i8 = and <3 x i8> undef, undef +; FAST16-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i8 = and <4 x i8> undef, undef +; FAST16-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i16 = and i16 undef, undef +; FAST16-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i16 = and <2 x i16> undef, undef +; FAST16-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v3i16 = and <3 x i16> undef, undef +; FAST16-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i16 = and <4 x i16> undef, undef ; FAST16-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i32 = and i32 undef, undef +; FAST16-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i32 = and <2 x i32> undef, undef +; FAST16-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v3i32 = and <3 x i32> undef, undef +; FAST16-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i32 = and <4 x i32> undef, undef ; FAST16-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %i64 = and i64 undef, undef -; FAST16-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i16 = and <2 x i16> undef, undef +; FAST16-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2i64 = and <2 x i64> undef, undef +; FAST16-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v3i64 = and <3 x i64> undef, undef +; FAST16-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4i64 = and <4 x i64> undef, undef ; FAST16-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void ; ; SLOW16-SIZE-LABEL: 'and' +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i8 = and i8 undef, undef +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i8 = and <2 x i8> undef, undef +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v3i8 = and <3 x i8> undef, undef +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i8 = and <4 x i8> undef, undef +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i16 = and i16 undef, undef +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i16 = and <2 x i16> undef, undef +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v3i16 = and <3 x i16> undef, undef +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i16 = and <4 x i16> undef, undef ; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i32 = and i32 undef, undef +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i32 = and <2 x i32> undef, undef +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v3i32 = and <3 x i32> undef, undef +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i32 = and <4 x i32> undef, undef ; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %i64 = and i64 undef, undef -; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i16 = and <2 x i16> undef, undef +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2i64 = and <2 x i64> undef, undef +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v3i64 = and <3 x i64> undef, undef +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4i64 = and <4 x i64> undef, undef ; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; FAST16-SIZE-LABEL: 'and' +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i8 = and i8 undef, undef +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i8 = and <2 x i8> undef, undef +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v3i8 = and <3 x i8> undef, undef +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i8 = and <4 x i8> undef, undef +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i16 = and i16 undef, undef +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i16 = and <2 x i16> undef, undef +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v3i16 = and <3 x i16> undef, undef +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i16 = and <4 x i16> undef, undef ; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i32 = and i32 undef, undef +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i32 = and <2 x i32> undef, undef +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v3i32 = and <3 x i32> undef, undef +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i32 = and <4 x i32> undef, undef ; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %i64 = and i64 undef, undef -; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i16 = and <2 x i16> undef, undef +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2i64 = and <2 x i64> undef, undef +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v3i64 = and <3 x i64> undef, undef +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4i64 = and <4 x i64> undef, undef ; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; + %i8 = and i8 undef, undef + %v2i8 = and <2 x i8> undef, undef + %v3i8 = and <3 x i8> undef, undef + %v4i8 = and <4 x i8> undef, undef + %i16 = and i16 undef, undef + %v2i16 = and <2 x i16> undef, undef + %v3i16 = and <3 x i16> undef, undef + %v4i16 = and <4 x i16> undef, undef %i32 = and i32 undef, undef + %v2i32 = and <2 x i32> undef, undef + %v3i32 = and <3 x i32> undef, undef + %v4i32 = and <4 x i32> undef, undef %i64 = and i64 undef, undef - %v2i16 = and <2 x i16> undef, undef + %v2i64 = and <2 x i64> undef, undef + %v3i64 = and <3 x i64> undef, undef + %v4i64 = and <4 x i64> undef, undef ret void } diff --git a/llvm/test/Analysis/CostModel/AMDGPU/extractelement.ll b/llvm/test/Analysis/CostModel/AMDGPU/extractelement.ll index f907148..0843430 100644 --- a/llvm/test/Analysis/CostModel/AMDGPU/extractelement.ll +++ b/llvm/test/Analysis/CostModel/AMDGPU/extractelement.ll @@ -9,97 +9,364 @@ define amdgpu_kernel void @extractelement_32(i32 %arg) { ; GCN-LABEL: 'extractelement_32' +; GCN-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2i32_0 = extractelement <2 x i32> undef, i32 0 +; GCN-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2f32_0 = extractelement <2 x float> undef, i32 0 +; GCN-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v3i32_0 = extractelement <3 x i32> undef, i32 0 +; GCN-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4i32_0 = extractelement <4 x i32> undef, i32 0 +; GCN-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v5i32_0 = extractelement <5 x i32> undef, i32 0 +; GCN-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8i32_0 = extractelement <8 x i32> undef, i32 0 ; GCN-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2i32_1 = extractelement <2 x i32> undef, i32 1 ; GCN-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2f32_1 = extractelement <2 x float> undef, i32 1 ; GCN-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v3i32_1 = extractelement <3 x i32> undef, i32 1 ; GCN-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4i32_1 = extractelement <4 x i32> undef, i32 1 ; GCN-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v5i32_1 = extractelement <5 x i32> undef, i32 1 ; GCN-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8i32_1 = extractelement <8 x i32> undef, i32 1 +; GCN-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v3i32_2 = extractelement <3 x i32> undef, i32 2 +; GCN-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4i32_2 = extractelement <4 x i32> undef, i32 2 +; GCN-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v5i32_2 = extractelement <5 x i32> undef, i32 2 +; GCN-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8i32_2 = extractelement <8 x i32> undef, i32 2 +; GCN-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4i32_3 = extractelement <4 x i32> undef, i32 3 +; GCN-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v5i32_3 = extractelement <5 x i32> undef, i32 3 +; GCN-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8i32_3 = extractelement <8 x i32> undef, i32 3 +; GCN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i32_a = extractelement <2 x i32> undef, i32 %arg +; GCN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i32_a = extractelement <4 x i32> undef, i32 %arg ; GCN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i32_a = extractelement <8 x i32> undef, i32 %arg ; GCN-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void ; ; GCN-SIZE-LABEL: 'extractelement_32' +; GCN-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2i32_0 = extractelement <2 x i32> undef, i32 0 +; GCN-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2f32_0 = extractelement <2 x float> undef, i32 0 +; GCN-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v3i32_0 = extractelement <3 x i32> undef, i32 0 +; GCN-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4i32_0 = extractelement <4 x i32> undef, i32 0 +; GCN-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v5i32_0 = extractelement <5 x i32> undef, i32 0 +; GCN-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8i32_0 = extractelement <8 x i32> undef, i32 0 ; GCN-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2i32_1 = extractelement <2 x i32> undef, i32 1 ; GCN-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2f32_1 = extractelement <2 x float> undef, i32 1 ; GCN-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v3i32_1 = extractelement <3 x i32> undef, i32 1 ; GCN-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4i32_1 = extractelement <4 x i32> undef, i32 1 ; GCN-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v5i32_1 = extractelement <5 x i32> undef, i32 1 ; GCN-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8i32_1 = extractelement <8 x i32> undef, i32 1 +; GCN-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v3i32_2 = extractelement <3 x i32> undef, i32 2 +; GCN-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4i32_2 = extractelement <4 x i32> undef, i32 2 +; GCN-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v5i32_2 = extractelement <5 x i32> undef, i32 2 +; GCN-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8i32_2 = extractelement <8 x i32> undef, i32 2 +; GCN-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4i32_3 = extractelement <4 x i32> undef, i32 3 +; GCN-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v5i32_3 = extractelement <5 x i32> undef, i32 3 +; GCN-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8i32_3 = extractelement <8 x i32> undef, i32 3 +; GCN-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i32_a = extractelement <2 x i32> undef, i32 %arg +; GCN-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i32_a = extractelement <4 x i32> undef, i32 %arg ; GCN-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i32_a = extractelement <8 x i32> undef, i32 %arg ; GCN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; + %v2i32_0 = extractelement <2 x i32> undef, i32 0 + %v2f32_0 = extractelement <2 x float> undef, i32 0 + %v3i32_0 = extractelement <3 x i32> undef, i32 0 + %v4i32_0 = extractelement <4 x i32> undef, i32 0 + %v5i32_0 = extractelement <5 x i32> undef, i32 0 + %v8i32_0 = extractelement <8 x i32> undef, i32 0 + %v2i32_1 = extractelement <2 x i32> undef, i32 1 %v2f32_1 = extractelement <2 x float> undef, i32 1 %v3i32_1 = extractelement <3 x i32> undef, i32 1 %v4i32_1 = extractelement <4 x i32> undef, i32 1 %v5i32_1 = extractelement <5 x i32> undef, i32 1 %v8i32_1 = extractelement <8 x i32> undef, i32 1 + + %v3i32_2 = extractelement <3 x i32> undef, i32 2 + %v4i32_2 = extractelement <4 x i32> undef, i32 2 + %v5i32_2 = extractelement <5 x i32> undef, i32 2 + %v8i32_2 = extractelement <8 x i32> undef, i32 2 + + %v4i32_3 = extractelement <4 x i32> undef, i32 3 + %v5i32_3 = extractelement <5 x i32> undef, i32 3 + %v8i32_3 = extractelement <8 x i32> undef, i32 3 + + %v2i32_a = extractelement <2 x i32> undef, i32 %arg + %v4i32_a = extractelement <4 x i32> undef, i32 %arg %v8i32_a = extractelement <8 x i32> undef, i32 %arg ret void } -define amdgpu_kernel void @extractelement_64() { +define amdgpu_kernel void @extractelement_64(i32 %arg) { ; GCN-LABEL: 'extractelement_64' -; GCN-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2i64_1 = extractelement <2 x i64> undef, i64 1 -; GCN-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v3i64_1 = extractelement <3 x i64> undef, i64 1 -; GCN-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4i64_1 = extractelement <4 x i64> undef, i64 1 -; GCN-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8i64_1 = extractelement <8 x i64> undef, i64 1 +; GCN-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2i64_0 = extractelement <2 x i64> undef, i32 0 +; GCN-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2f64_0 = extractelement <2 x double> undef, i32 0 +; GCN-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v3i64_0 = extractelement <3 x i64> undef, i32 0 +; GCN-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4i64_0 = extractelement <4 x i64> undef, i32 0 +; GCN-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v5i64_0 = extractelement <5 x i64> undef, i32 0 +; GCN-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8i64_0 = extractelement <8 x i64> undef, i32 0 +; GCN-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2i64_1 = extractelement <2 x i64> undef, i32 1 +; GCN-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2f64_1 = extractelement <2 x double> undef, i32 1 +; GCN-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v3i64_1 = extractelement <3 x i64> undef, i32 1 +; GCN-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4i64_1 = extractelement <4 x i64> undef, i32 1 +; GCN-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v5i64_1 = extractelement <5 x i64> undef, i32 1 +; GCN-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8i64_1 = extractelement <8 x i64> undef, i32 1 +; GCN-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v3i64_2 = extractelement <3 x i64> undef, i32 2 +; GCN-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4i64_2 = extractelement <4 x i64> undef, i32 2 +; GCN-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v5i64_2 = extractelement <5 x i64> undef, i32 2 +; GCN-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8i64_2 = extractelement <8 x i64> undef, i32 2 +; GCN-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4i64_3 = extractelement <4 x i64> undef, i32 3 +; GCN-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v5i64_3 = extractelement <5 x i64> undef, i32 3 +; GCN-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8i64_3 = extractelement <8 x i64> undef, i32 3 +; GCN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i64_a = extractelement <2 x i64> undef, i32 %arg +; GCN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i64_a = extractelement <4 x i64> undef, i32 %arg +; GCN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i64_a = extractelement <8 x i64> undef, i32 %arg ; GCN-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void ; ; GCN-SIZE-LABEL: 'extractelement_64' -; GCN-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2i64_1 = extractelement <2 x i64> undef, i64 1 -; GCN-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v3i64_1 = extractelement <3 x i64> undef, i64 1 -; GCN-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4i64_1 = extractelement <4 x i64> undef, i64 1 -; GCN-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8i64_1 = extractelement <8 x i64> undef, i64 1 +; GCN-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2i64_0 = extractelement <2 x i64> undef, i32 0 +; GCN-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2f64_0 = extractelement <2 x double> undef, i32 0 +; GCN-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v3i64_0 = extractelement <3 x i64> undef, i32 0 +; GCN-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4i64_0 = extractelement <4 x i64> undef, i32 0 +; GCN-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v5i64_0 = extractelement <5 x i64> undef, i32 0 +; GCN-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8i64_0 = extractelement <8 x i64> undef, i32 0 +; GCN-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2i64_1 = extractelement <2 x i64> undef, i32 1 +; GCN-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2f64_1 = extractelement <2 x double> undef, i32 1 +; GCN-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v3i64_1 = extractelement <3 x i64> undef, i32 1 +; GCN-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4i64_1 = extractelement <4 x i64> undef, i32 1 +; GCN-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v5i64_1 = extractelement <5 x i64> undef, i32 1 +; GCN-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8i64_1 = extractelement <8 x i64> undef, i32 1 +; GCN-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v3i64_2 = extractelement <3 x i64> undef, i32 2 +; GCN-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4i64_2 = extractelement <4 x i64> undef, i32 2 +; GCN-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v5i64_2 = extractelement <5 x i64> undef, i32 2 +; GCN-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8i64_2 = extractelement <8 x i64> undef, i32 2 +; GCN-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4i64_3 = extractelement <4 x i64> undef, i32 3 +; GCN-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v5i64_3 = extractelement <5 x i64> undef, i32 3 +; GCN-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8i64_3 = extractelement <8 x i64> undef, i32 3 +; GCN-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i64_a = extractelement <2 x i64> undef, i32 %arg +; GCN-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i64_a = extractelement <4 x i64> undef, i32 %arg +; GCN-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i64_a = extractelement <8 x i64> undef, i32 %arg ; GCN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; - %v2i64_1 = extractelement <2 x i64> undef, i64 1 - %v3i64_1 = extractelement <3 x i64> undef, i64 1 - %v4i64_1 = extractelement <4 x i64> undef, i64 1 - %v8i64_1 = extractelement <8 x i64> undef, i64 1 + %v2i64_0 = extractelement <2 x i64> undef, i32 0 + %v2f64_0 = extractelement <2 x double> undef, i32 0 + %v3i64_0 = extractelement <3 x i64> undef, i32 0 + %v4i64_0 = extractelement <4 x i64> undef, i32 0 + %v5i64_0 = extractelement <5 x i64> undef, i32 0 + %v8i64_0 = extractelement <8 x i64> undef, i32 0 + + %v2i64_1 = extractelement <2 x i64> undef, i32 1 + %v2f64_1 = extractelement <2 x double> undef, i32 1 + %v3i64_1 = extractelement <3 x i64> undef, i32 1 + %v4i64_1 = extractelement <4 x i64> undef, i32 1 + %v5i64_1 = extractelement <5 x i64> undef, i32 1 + %v8i64_1 = extractelement <8 x i64> undef, i32 1 + + %v3i64_2 = extractelement <3 x i64> undef, i32 2 + %v4i64_2 = extractelement <4 x i64> undef, i32 2 + %v5i64_2 = extractelement <5 x i64> undef, i32 2 + %v8i64_2 = extractelement <8 x i64> undef, i32 2 + + %v4i64_3 = extractelement <4 x i64> undef, i32 3 + %v5i64_3 = extractelement <5 x i64> undef, i32 3 + %v8i64_3 = extractelement <8 x i64> undef, i32 3 + + %v2i64_a = extractelement <2 x i64> undef, i32 %arg + %v4i64_a = extractelement <4 x i64> undef, i32 %arg + %v8i64_a = extractelement <8 x i64> undef, i32 %arg ret void } -define amdgpu_kernel void @extractelement_8() { +define amdgpu_kernel void @extractelement_8(i32 %arg) { ; GCN-LABEL: 'extractelement_8' -; GCN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i8_1 = extractelement <4 x i8> undef, i8 1 +; GCN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i8_0 = extractelement <2 x i8> undef, i32 0 +; GCN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v3i8_0 = extractelement <3 x i8> undef, i32 0 +; GCN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i8_0 = extractelement <4 x i8> undef, i32 0 +; GCN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v5i8_0 = extractelement <5 x i8> undef, i32 0 +; GCN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i8_0 = extractelement <8 x i8> undef, i32 0 +; GCN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i8_1 = extractelement <2 x i8> undef, i32 1 +; GCN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v3i8_1 = extractelement <3 x i8> undef, i32 1 +; GCN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i8_1 = extractelement <4 x i8> undef, i32 1 +; GCN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v5i8_1 = extractelement <5 x i8> undef, i32 1 +; GCN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i8_1 = extractelement <8 x i8> undef, i32 1 +; GCN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v3i8_2 = extractelement <3 x i8> undef, i32 2 +; GCN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i8_2 = extractelement <4 x i8> undef, i32 2 +; GCN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v5i8_2 = extractelement <5 x i8> undef, i32 2 +; GCN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i8_2 = extractelement <8 x i8> undef, i32 2 +; GCN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i8_3 = extractelement <4 x i8> undef, i32 3 +; GCN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v5i8_3 = extractelement <5 x i8> undef, i32 3 +; GCN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i8_3 = extractelement <8 x i8> undef, i32 3 +; GCN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i8_a = extractelement <2 x i8> undef, i32 %arg +; GCN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i8_a = extractelement <4 x i8> undef, i32 %arg +; GCN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i8_a = extractelement <8 x i8> undef, i32 %arg ; GCN-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void ; ; GCN-SIZE-LABEL: 'extractelement_8' -; GCN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i8_1 = extractelement <4 x i8> undef, i8 1 +; GCN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i8_0 = extractelement <2 x i8> undef, i32 0 +; GCN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v3i8_0 = extractelement <3 x i8> undef, i32 0 +; GCN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i8_0 = extractelement <4 x i8> undef, i32 0 +; GCN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v5i8_0 = extractelement <5 x i8> undef, i32 0 +; GCN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i8_0 = extractelement <8 x i8> undef, i32 0 +; GCN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i8_1 = extractelement <2 x i8> undef, i32 1 +; GCN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v3i8_1 = extractelement <3 x i8> undef, i32 1 +; GCN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i8_1 = extractelement <4 x i8> undef, i32 1 +; GCN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v5i8_1 = extractelement <5 x i8> undef, i32 1 +; GCN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i8_1 = extractelement <8 x i8> undef, i32 1 +; GCN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v3i8_2 = extractelement <3 x i8> undef, i32 2 +; GCN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i8_2 = extractelement <4 x i8> undef, i32 2 +; GCN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v5i8_2 = extractelement <5 x i8> undef, i32 2 +; GCN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i8_2 = extractelement <8 x i8> undef, i32 2 +; GCN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i8_3 = extractelement <4 x i8> undef, i32 3 +; GCN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v5i8_3 = extractelement <5 x i8> undef, i32 3 +; GCN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i8_3 = extractelement <8 x i8> undef, i32 3 +; GCN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i8_a = extractelement <2 x i8> undef, i32 %arg +; GCN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i8_a = extractelement <4 x i8> undef, i32 %arg +; GCN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i8_a = extractelement <8 x i8> undef, i32 %arg ; GCN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; - %v4i8_1 = extractelement <4 x i8> undef, i8 1 + %v2i8_0 = extractelement <2 x i8> undef, i32 0 + %v3i8_0 = extractelement <3 x i8> undef, i32 0 + %v4i8_0 = extractelement <4 x i8> undef, i32 0 + %v5i8_0 = extractelement <5 x i8> undef, i32 0 + %v8i8_0 = extractelement <8 x i8> undef, i32 0 + + %v2i8_1 = extractelement <2 x i8> undef, i32 1 + %v3i8_1 = extractelement <3 x i8> undef, i32 1 + %v4i8_1 = extractelement <4 x i8> undef, i32 1 + %v5i8_1 = extractelement <5 x i8> undef, i32 1 + %v8i8_1 = extractelement <8 x i8> undef, i32 1 + + %v3i8_2 = extractelement <3 x i8> undef, i32 2 + %v4i8_2 = extractelement <4 x i8> undef, i32 2 + %v5i8_2 = extractelement <5 x i8> undef, i32 2 + %v8i8_2 = extractelement <8 x i8> undef, i32 2 + + %v4i8_3 = extractelement <4 x i8> undef, i32 3 + %v5i8_3 = extractelement <5 x i8> undef, i32 3 + %v8i8_3 = extractelement <8 x i8> undef, i32 3 + + %v2i8_a = extractelement <2 x i8> undef, i32 %arg + %v4i8_a = extractelement <4 x i8> undef, i32 %arg + %v8i8_a = extractelement <8 x i8> undef, i32 %arg ret void } define amdgpu_kernel void @extractelement_16(i32 %arg) { ; CI-LABEL: 'extractelement_16' -; CI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i16_0 = extractelement <2 x i16> undef, i16 0 -; CI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i16_1 = extractelement <2 x i16> undef, i16 1 +; CI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i16_0 = extractelement <2 x i16> undef, i32 0 +; CI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f16_0 = extractelement <2 x half> undef, i32 0 +; CI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v3i16_0 = extractelement <3 x i16> undef, i32 0 +; CI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i16_0 = extractelement <4 x i16> undef, i32 0 +; CI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v5i16_0 = extractelement <5 x i16> undef, i32 0 +; CI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_0 = extractelement <8 x i16> undef, i32 0 +; CI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i16_1 = extractelement <2 x i16> undef, i32 1 +; CI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f16_1 = extractelement <2 x half> undef, i32 1 +; CI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v3i16_1 = extractelement <3 x i16> undef, i32 1 +; CI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i16_1 = extractelement <4 x i16> undef, i32 1 +; CI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v5i16_1 = extractelement <5 x i16> undef, i32 1 +; CI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_1 = extractelement <8 x i16> undef, i32 1 +; CI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v3i16_2 = extractelement <3 x i16> undef, i32 2 +; CI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i16_2 = extractelement <4 x i16> undef, i32 2 +; CI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v5i16_2 = extractelement <5 x i16> undef, i32 2 +; CI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_2 = extractelement <8 x i16> undef, i32 2 +; CI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i16_3 = extractelement <4 x i16> undef, i32 3 +; CI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v5i16_3 = extractelement <5 x i16> undef, i32 3 +; CI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_3 = extractelement <8 x i16> undef, i32 3 ; CI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i16_a = extractelement <2 x i16> undef, i32 %arg +; CI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i16_a = extractelement <4 x i16> undef, i32 %arg +; CI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_a = extractelement <8 x i16> undef, i32 %arg ; CI-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void ; ; GFX89-LABEL: 'extractelement_16' -; GFX89-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2i16_0 = extractelement <2 x i16> undef, i16 0 -; GFX89-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i16_1 = extractelement <2 x i16> undef, i16 1 +; GFX89-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2i16_0 = extractelement <2 x i16> undef, i32 0 +; GFX89-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2f16_0 = extractelement <2 x half> undef, i32 0 +; GFX89-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v3i16_0 = extractelement <3 x i16> undef, i32 0 +; GFX89-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4i16_0 = extractelement <4 x i16> undef, i32 0 +; GFX89-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v5i16_0 = extractelement <5 x i16> undef, i32 0 +; GFX89-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8i16_0 = extractelement <8 x i16> undef, i32 0 +; GFX89-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i16_1 = extractelement <2 x i16> undef, i32 1 +; GFX89-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f16_1 = extractelement <2 x half> undef, i32 1 +; GFX89-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v3i16_1 = extractelement <3 x i16> undef, i32 1 +; GFX89-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i16_1 = extractelement <4 x i16> undef, i32 1 +; GFX89-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v5i16_1 = extractelement <5 x i16> undef, i32 1 +; GFX89-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_1 = extractelement <8 x i16> undef, i32 1 +; GFX89-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v3i16_2 = extractelement <3 x i16> undef, i32 2 +; GFX89-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i16_2 = extractelement <4 x i16> undef, i32 2 +; GFX89-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v5i16_2 = extractelement <5 x i16> undef, i32 2 +; GFX89-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_2 = extractelement <8 x i16> undef, i32 2 +; GFX89-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i16_3 = extractelement <4 x i16> undef, i32 3 +; GFX89-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v5i16_3 = extractelement <5 x i16> undef, i32 3 +; GFX89-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_3 = extractelement <8 x i16> undef, i32 3 ; GFX89-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i16_a = extractelement <2 x i16> undef, i32 %arg +; GFX89-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i16_a = extractelement <4 x i16> undef, i32 %arg +; GFX89-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_a = extractelement <8 x i16> undef, i32 %arg ; GFX89-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void ; ; CI-SIZE-LABEL: 'extractelement_16' -; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i16_0 = extractelement <2 x i16> undef, i16 0 -; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i16_1 = extractelement <2 x i16> undef, i16 1 +; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i16_0 = extractelement <2 x i16> undef, i32 0 +; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f16_0 = extractelement <2 x half> undef, i32 0 +; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v3i16_0 = extractelement <3 x i16> undef, i32 0 +; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i16_0 = extractelement <4 x i16> undef, i32 0 +; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v5i16_0 = extractelement <5 x i16> undef, i32 0 +; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_0 = extractelement <8 x i16> undef, i32 0 +; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i16_1 = extractelement <2 x i16> undef, i32 1 +; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f16_1 = extractelement <2 x half> undef, i32 1 +; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v3i16_1 = extractelement <3 x i16> undef, i32 1 +; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i16_1 = extractelement <4 x i16> undef, i32 1 +; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v5i16_1 = extractelement <5 x i16> undef, i32 1 +; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_1 = extractelement <8 x i16> undef, i32 1 +; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v3i16_2 = extractelement <3 x i16> undef, i32 2 +; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i16_2 = extractelement <4 x i16> undef, i32 2 +; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v5i16_2 = extractelement <5 x i16> undef, i32 2 +; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_2 = extractelement <8 x i16> undef, i32 2 +; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i16_3 = extractelement <4 x i16> undef, i32 3 +; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v5i16_3 = extractelement <5 x i16> undef, i32 3 +; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_3 = extractelement <8 x i16> undef, i32 3 ; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i16_a = extractelement <2 x i16> undef, i32 %arg +; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i16_a = extractelement <4 x i16> undef, i32 %arg +; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_a = extractelement <8 x i16> undef, i32 %arg ; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; GFX89-SIZE-LABEL: 'extractelement_16' -; GFX89-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2i16_0 = extractelement <2 x i16> undef, i16 0 -; GFX89-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i16_1 = extractelement <2 x i16> undef, i16 1 +; GFX89-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2i16_0 = extractelement <2 x i16> undef, i32 0 +; GFX89-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2f16_0 = extractelement <2 x half> undef, i32 0 +; GFX89-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v3i16_0 = extractelement <3 x i16> undef, i32 0 +; GFX89-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4i16_0 = extractelement <4 x i16> undef, i32 0 +; GFX89-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v5i16_0 = extractelement <5 x i16> undef, i32 0 +; GFX89-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8i16_0 = extractelement <8 x i16> undef, i32 0 +; GFX89-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i16_1 = extractelement <2 x i16> undef, i32 1 +; GFX89-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f16_1 = extractelement <2 x half> undef, i32 1 +; GFX89-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v3i16_1 = extractelement <3 x i16> undef, i32 1 +; GFX89-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i16_1 = extractelement <4 x i16> undef, i32 1 +; GFX89-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v5i16_1 = extractelement <5 x i16> undef, i32 1 +; GFX89-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_1 = extractelement <8 x i16> undef, i32 1 +; GFX89-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v3i16_2 = extractelement <3 x i16> undef, i32 2 +; GFX89-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i16_2 = extractelement <4 x i16> undef, i32 2 +; GFX89-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v5i16_2 = extractelement <5 x i16> undef, i32 2 +; GFX89-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_2 = extractelement <8 x i16> undef, i32 2 +; GFX89-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i16_3 = extractelement <4 x i16> undef, i32 3 +; GFX89-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v5i16_3 = extractelement <5 x i16> undef, i32 3 +; GFX89-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_3 = extractelement <8 x i16> undef, i32 3 ; GFX89-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i16_a = extractelement <2 x i16> undef, i32 %arg +; GFX89-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i16_a = extractelement <4 x i16> undef, i32 %arg +; GFX89-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_a = extractelement <8 x i16> undef, i32 %arg ; GFX89-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; - %v2i16_0 = extractelement <2 x i16> undef, i16 0 - %v2i16_1 = extractelement <2 x i16> undef, i16 1 + %v2i16_0 = extractelement <2 x i16> undef, i32 0 + %v2f16_0 = extractelement <2 x half> undef, i32 0 + %v3i16_0 = extractelement <3 x i16> undef, i32 0 + %v4i16_0 = extractelement <4 x i16> undef, i32 0 + %v5i16_0 = extractelement <5 x i16> undef, i32 0 + %v8i16_0 = extractelement <8 x i16> undef, i32 0 + + %v2i16_1 = extractelement <2 x i16> undef, i32 1 + %v2f16_1 = extractelement <2 x half> undef, i32 1 + %v3i16_1 = extractelement <3 x i16> undef, i32 1 + %v4i16_1 = extractelement <4 x i16> undef, i32 1 + %v5i16_1 = extractelement <5 x i16> undef, i32 1 + %v8i16_1 = extractelement <8 x i16> undef, i32 1 + + %v3i16_2 = extractelement <3 x i16> undef, i32 2 + %v4i16_2 = extractelement <4 x i16> undef, i32 2 + %v5i16_2 = extractelement <5 x i16> undef, i32 2 + %v8i16_2 = extractelement <8 x i16> undef, i32 2 + + %v4i16_3 = extractelement <4 x i16> undef, i32 3 + %v5i16_3 = extractelement <5 x i16> undef, i32 3 + %v8i16_3 = extractelement <8 x i16> undef, i32 3 + %v2i16_a = extractelement <2 x i16> undef, i32 %arg + %v4i16_a = extractelement <4 x i16> undef, i32 %arg + %v8i16_a = extractelement <8 x i16> undef, i32 %arg ret void } diff --git a/llvm/test/Analysis/CostModel/AMDGPU/fabs.ll b/llvm/test/Analysis/CostModel/AMDGPU/fabs.ll index 7475a4d..4a01b1b 100644 --- a/llvm/test/Analysis/CostModel/AMDGPU/fabs.ll +++ b/llvm/test/Analysis/CostModel/AMDGPU/fabs.ll @@ -8,6 +8,7 @@ define amdgpu_kernel void @fabs_f32() #0 { ; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %f32 = call float @llvm.fabs.f32(float undef) #2 ; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2f32 = call <2 x float> @llvm.fabs.v2f32(<2 x float> undef) #2 ; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v3f32 = call <3 x float> @llvm.fabs.v3f32(<3 x float> undef) #2 +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4f32 = call <4 x float> @llvm.fabs.v4f32(<4 x float> undef) #2 ; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v5f32 = call <5 x float> @llvm.fabs.v5f32(<5 x float> undef) #2 ; ALL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void ; @@ -15,12 +16,14 @@ define amdgpu_kernel void @fabs_f32() #0 { ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %f32 = call float @llvm.fabs.f32(float undef) #2 ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2f32 = call <2 x float> @llvm.fabs.v2f32(<2 x float> undef) #2 ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v3f32 = call <3 x float> @llvm.fabs.v3f32(<3 x float> undef) #2 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4f32 = call <4 x float> @llvm.fabs.v4f32(<4 x float> undef) #2 ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v5f32 = call <5 x float> @llvm.fabs.v5f32(<5 x float> undef) #2 ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %f32 = call float @llvm.fabs.f32(float undef) #1 %v2f32 = call <2 x float> @llvm.fabs.v2f32(<2 x float> undef) #1 %v3f32 = call <3 x float> @llvm.fabs.v3f32(<3 x float> undef) #1 + %v4f32 = call <4 x float> @llvm.fabs.v4f32(<4 x float> undef) #1 %v5f32 = call <5 x float> @llvm.fabs.v5f32(<5 x float> undef) #1 ret void } @@ -30,17 +33,20 @@ define amdgpu_kernel void @fabs_f64() #0 { ; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %f64 = call double @llvm.fabs.f64(double undef) #2 ; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2f64 = call <2 x double> @llvm.fabs.v2f64(<2 x double> undef) #2 ; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v3f64 = call <3 x double> @llvm.fabs.v3f64(<3 x double> undef) #2 +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4f64 = call <4 x double> @llvm.fabs.v4f64(<4 x double> undef) #2 ; ALL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void ; ; ALL-SIZE-LABEL: 'fabs_f64' ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %f64 = call double @llvm.fabs.f64(double undef) #2 ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2f64 = call <2 x double> @llvm.fabs.v2f64(<2 x double> undef) #2 ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v3f64 = call <3 x double> @llvm.fabs.v3f64(<3 x double> undef) #2 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4f64 = call <4 x double> @llvm.fabs.v4f64(<4 x double> undef) #2 ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %f64 = call double @llvm.fabs.f64(double undef) #1 %v2f64 = call <2 x double> @llvm.fabs.v2f64(<2 x double> undef) #1 %v3f64 = call <3 x double> @llvm.fabs.v3f64(<3 x double> undef) #1 + %v4f64 = call <4 x double> @llvm.fabs.v4f64(<4 x double> undef) #1 ret void } @@ -49,32 +55,42 @@ define amdgpu_kernel void @fabs_f16() #0 { ; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %f16 = call half @llvm.fabs.f16(half undef) #2 ; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2f16 = call <2 x half> @llvm.fabs.v2f16(<2 x half> undef) #2 ; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v3f16 = call <3 x half> @llvm.fabs.v3f16(<3 x half> undef) #2 +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4f16 = call <4 x half> @llvm.fabs.v4f16(<4 x half> undef) #2 +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v5f16 = call <5 x half> @llvm.fabs.v5f16(<5 x half> undef) #2 ; ALL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void ; ; ALL-SIZE-LABEL: 'fabs_f16' ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %f16 = call half @llvm.fabs.f16(half undef) #2 ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2f16 = call <2 x half> @llvm.fabs.v2f16(<2 x half> undef) #2 ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v3f16 = call <3 x half> @llvm.fabs.v3f16(<3 x half> undef) #2 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4f16 = call <4 x half> @llvm.fabs.v4f16(<4 x half> undef) #2 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v5f16 = call <5 x half> @llvm.fabs.v5f16(<5 x half> undef) #2 ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %f16 = call half @llvm.fabs.f16(half undef) #1 %v2f16 = call <2 x half> @llvm.fabs.v2f16(<2 x half> undef) #1 %v3f16 = call <3 x half> @llvm.fabs.v3f16(<3 x half> undef) #1 + %v4f16 = call <4 x half> @llvm.fabs.v4f16(<4 x half> undef) #1 + %v5f16 = call <5 x half> @llvm.fabs.v5f16(<5 x half> undef) #1 ret void } declare float @llvm.fabs.f32(float) #1 declare <2 x float> @llvm.fabs.v2f32(<2 x float>) #1 declare <3 x float> @llvm.fabs.v3f32(<3 x float>) #1 +declare <4 x float> @llvm.fabs.v4f32(<4 x float>) #1 declare <5 x float> @llvm.fabs.v5f32(<5 x float>) #1 declare double @llvm.fabs.f64(double) #1 declare <2 x double> @llvm.fabs.v2f64(<2 x double>) #1 declare <3 x double> @llvm.fabs.v3f64(<3 x double>) #1 +declare <4 x double> @llvm.fabs.v4f64(<4 x double>) #1 declare half @llvm.fabs.f16(half) #1 declare <2 x half> @llvm.fabs.v2f16(<2 x half>) #1 declare <3 x half> @llvm.fabs.v3f16(<3 x half>) #1 +declare <4 x half> @llvm.fabs.v4f16(<4 x half>) #1 +declare <5 x half> @llvm.fabs.v5f16(<5 x half>) #1 attributes #0 = { nounwind } attributes #1 = { nounwind readnone } diff --git a/llvm/test/Analysis/CostModel/AMDGPU/fadd.ll b/llvm/test/Analysis/CostModel/AMDGPU/fadd.ll index a733f4a..2decec0 100644 --- a/llvm/test/Analysis/CostModel/AMDGPU/fadd.ll +++ b/llvm/test/Analysis/CostModel/AMDGPU/fadd.ll @@ -12,6 +12,7 @@ define amdgpu_kernel void @fadd_f32() #0 { ; GFX90A-FASTF64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f32 = fadd float undef, undef ; GFX90A-FASTF64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f32 = fadd <2 x float> undef, undef ; GFX90A-FASTF64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v3f32 = fadd <3 x float> undef, undef +; GFX90A-FASTF64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4f32 = fadd <4 x float> undef, undef ; GFX90A-FASTF64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v5f32 = fadd <5 x float> undef, undef ; GFX90A-FASTF64-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void ; @@ -19,6 +20,7 @@ define amdgpu_kernel void @fadd_f32() #0 { ; NOPACKEDF32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f32 = fadd float undef, undef ; NOPACKEDF32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2f32 = fadd <2 x float> undef, undef ; NOPACKEDF32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v3f32 = fadd <3 x float> undef, undef +; NOPACKEDF32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4f32 = fadd <4 x float> undef, undef ; NOPACKEDF32-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v5f32 = fadd <5 x float> undef, undef ; NOPACKEDF32-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void ; @@ -26,6 +28,7 @@ define amdgpu_kernel void @fadd_f32() #0 { ; GFX90A-FASTF64-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f32 = fadd float undef, undef ; GFX90A-FASTF64-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f32 = fadd <2 x float> undef, undef ; GFX90A-FASTF64-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v3f32 = fadd <3 x float> undef, undef +; GFX90A-FASTF64-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4f32 = fadd <4 x float> undef, undef ; GFX90A-FASTF64-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v5f32 = fadd <5 x float> undef, undef ; GFX90A-FASTF64-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; @@ -33,12 +36,14 @@ define amdgpu_kernel void @fadd_f32() #0 { ; NOPACKEDF32-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f32 = fadd float undef, undef ; NOPACKEDF32-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2f32 = fadd <2 x float> undef, undef ; NOPACKEDF32-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v3f32 = fadd <3 x float> undef, undef +; NOPACKEDF32-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4f32 = fadd <4 x float> undef, undef ; NOPACKEDF32-SIZE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v5f32 = fadd <5 x float> undef, undef ; NOPACKEDF32-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %f32 = fadd float undef, undef %v2f32 = fadd <2 x float> undef, undef %v3f32 = fadd <3 x float> undef, undef + %v4f32 = fadd <4 x float> undef, undef %v5f32 = fadd <5 x float> undef, undef ret void } @@ -48,35 +53,47 @@ define amdgpu_kernel void @fadd_f64() #0 { ; GFX90A-FASTF64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f64 = fadd double undef, undef ; GFX90A-FASTF64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2f64 = fadd <2 x double> undef, undef ; GFX90A-FASTF64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v3f64 = fadd <3 x double> undef, undef +; GFX90A-FASTF64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4f64 = fadd <4 x double> undef, undef +; GFX90A-FASTF64-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v5f64 = fadd <5 x double> undef, undef ; GFX90A-FASTF64-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void ; ; FASTF64-LABEL: 'fadd_f64' ; FASTF64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %f64 = fadd double undef, undef ; FASTF64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2f64 = fadd <2 x double> undef, undef ; FASTF64-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v3f64 = fadd <3 x double> undef, undef +; FASTF64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4f64 = fadd <4 x double> undef, undef +; FASTF64-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v5f64 = fadd <5 x double> undef, undef ; FASTF64-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void ; ; SLOWF64-LABEL: 'fadd_f64' ; SLOWF64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %f64 = fadd double undef, undef ; SLOWF64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v2f64 = fadd <2 x double> undef, undef ; SLOWF64-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v3f64 = fadd <3 x double> undef, undef +; SLOWF64-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v4f64 = fadd <4 x double> undef, undef +; SLOWF64-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v5f64 = fadd <5 x double> undef, undef ; SLOWF64-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void ; ; GFX90A-FASTF64-SIZE-LABEL: 'fadd_f64' ; GFX90A-FASTF64-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f64 = fadd double undef, undef ; GFX90A-FASTF64-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2f64 = fadd <2 x double> undef, undef ; GFX90A-FASTF64-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v3f64 = fadd <3 x double> undef, undef +; GFX90A-FASTF64-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4f64 = fadd <4 x double> undef, undef +; GFX90A-FASTF64-SIZE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v5f64 = fadd <5 x double> undef, undef ; GFX90A-FASTF64-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; NOPACKEDF32-SIZE-LABEL: 'fadd_f64' ; NOPACKEDF32-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %f64 = fadd double undef, undef ; NOPACKEDF32-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2f64 = fadd <2 x double> undef, undef ; NOPACKEDF32-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v3f64 = fadd <3 x double> undef, undef +; NOPACKEDF32-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4f64 = fadd <4 x double> undef, undef +; NOPACKEDF32-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v5f64 = fadd <5 x double> undef, undef ; NOPACKEDF32-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %f64 = fadd double undef, undef %v2f64 = fadd <2 x double> undef, undef %v3f64 = fadd <3 x double> undef, undef + %v4f64 = fadd <4 x double> undef, undef + %v5f64 = fadd <5 x double> undef, undef ret void } @@ -86,6 +103,7 @@ define amdgpu_kernel void @fadd_f16() #0 { ; FASTF16-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f16 = fadd <2 x half> undef, undef ; FASTF16-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v3f16 = fadd <3 x half> undef, undef ; FASTF16-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4f16 = fadd <4 x half> undef, undef +; FASTF16-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v5f16 = fadd <5 x half> undef, undef ; FASTF16-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void ; ; SLOWF64-LABEL: 'fadd_f16' @@ -93,6 +111,7 @@ define amdgpu_kernel void @fadd_f16() #0 { ; SLOWF64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2f16 = fadd <2 x half> undef, undef ; SLOWF64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v3f16 = fadd <3 x half> undef, undef ; SLOWF64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4f16 = fadd <4 x half> undef, undef +; SLOWF64-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v5f16 = fadd <5 x half> undef, undef ; SLOWF64-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void ; ; FASTF16-SIZE-LABEL: 'fadd_f16' @@ -100,6 +119,7 @@ define amdgpu_kernel void @fadd_f16() #0 { ; FASTF16-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f16 = fadd <2 x half> undef, undef ; FASTF16-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v3f16 = fadd <3 x half> undef, undef ; FASTF16-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4f16 = fadd <4 x half> undef, undef +; FASTF16-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v5f16 = fadd <5 x half> undef, undef ; FASTF16-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SLOWF64-SIZE-LABEL: 'fadd_f16' @@ -107,12 +127,14 @@ define amdgpu_kernel void @fadd_f16() #0 { ; SLOWF64-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2f16 = fadd <2 x half> undef, undef ; SLOWF64-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v3f16 = fadd <3 x half> undef, undef ; SLOWF64-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4f16 = fadd <4 x half> undef, undef +; SLOWF64-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v5f16 = fadd <5 x half> undef, undef ; SLOWF64-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %f16 = fadd half undef, undef %v2f16 = fadd <2 x half> undef, undef %v3f16 = fadd <3 x half> undef, undef %v4f16 = fadd <4 x half> undef, undef + %v5f16 = fadd <5 x half> undef, undef ret void } diff --git a/llvm/test/Analysis/CostModel/AMDGPU/fdiv.ll b/llvm/test/Analysis/CostModel/AMDGPU/fdiv.ll index fc641bb..ff300dd 100644 --- a/llvm/test/Analysis/CostModel/AMDGPU/fdiv.ll +++ b/llvm/test/Analysis/CostModel/AMDGPU/fdiv.ll @@ -17,6 +17,7 @@ define amdgpu_kernel void @fdiv_f32_ieee() #0 { ; ALL-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %f32 = fdiv float undef, undef ; ALL-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %v2f32 = fdiv <2 x float> undef, undef ; ALL-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %v3f32 = fdiv <3 x float> undef, undef +; ALL-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %v4f32 = fdiv <4 x float> undef, undef ; ALL-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %v5f32 = fdiv <5 x float> undef, undef ; ALL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void ; @@ -24,12 +25,14 @@ define amdgpu_kernel void @fdiv_f32_ieee() #0 { ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %f32 = fdiv float undef, undef ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %v2f32 = fdiv <2 x float> undef, undef ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %v3f32 = fdiv <3 x float> undef, undef +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v4f32 = fdiv <4 x float> undef, undef ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %v5f32 = fdiv <5 x float> undef, undef ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %f32 = fdiv float undef, undef %v2f32 = fdiv <2 x float> undef, undef %v3f32 = fdiv <3 x float> undef, undef + %v4f32 = fdiv <4 x float> undef, undef %v5f32 = fdiv <5 x float> undef, undef ret void } @@ -39,6 +42,7 @@ define amdgpu_kernel void @fdiv_f32_ftzdaz() #1 { ; ALL-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %f32 = fdiv float undef, undef ; ALL-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v2f32 = fdiv <2 x float> undef, undef ; ALL-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v3f32 = fdiv <3 x float> undef, undef +; ALL-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %v4f32 = fdiv <4 x float> undef, undef ; ALL-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %v5f32 = fdiv <5 x float> undef, undef ; ALL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void ; @@ -46,12 +50,14 @@ define amdgpu_kernel void @fdiv_f32_ftzdaz() #1 { ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %f32 = fdiv float undef, undef ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %v2f32 = fdiv <2 x float> undef, undef ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %v3f32 = fdiv <3 x float> undef, undef +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %v4f32 = fdiv <4 x float> undef, undef ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %v5f32 = fdiv <5 x float> undef, undef ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %f32 = fdiv float undef, undef %v2f32 = fdiv <2 x float> undef, undef %v3f32 = fdiv <3 x float> undef, undef + %v4f32 = fdiv <4 x float> undef, undef %v5f32 = fdiv <5 x float> undef, undef ret void } @@ -61,53 +67,71 @@ define amdgpu_kernel void @fdiv_f64() #0 { ; CIFASTF64-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %f64 = fdiv double undef, undef ; CIFASTF64-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v2f64 = fdiv <2 x double> undef, undef ; CIFASTF64-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %v3f64 = fdiv <3 x double> undef, undef +; CIFASTF64-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %v4f64 = fdiv <4 x double> undef, undef +; CIFASTF64-NEXT: Cost Model: Found an estimated cost of 120 for instruction: %v5f64 = fdiv <5 x double> undef, undef ; CIFASTF64-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void ; ; CISLOWF64-LABEL: 'fdiv_f64' ; CISLOWF64-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %f64 = fdiv double undef, undef ; CISLOWF64-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %v2f64 = fdiv <2 x double> undef, undef ; CISLOWF64-NEXT: Cost Model: Found an estimated cost of 114 for instruction: %v3f64 = fdiv <3 x double> undef, undef +; CISLOWF64-NEXT: Cost Model: Found an estimated cost of 152 for instruction: %v4f64 = fdiv <4 x double> undef, undef +; CISLOWF64-NEXT: Cost Model: Found an estimated cost of 190 for instruction: %v5f64 = fdiv <5 x double> undef, undef ; CISLOWF64-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void ; ; SIFASTF64-LABEL: 'fdiv_f64' ; SIFASTF64-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %f64 = fdiv double undef, undef ; SIFASTF64-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %v2f64 = fdiv <2 x double> undef, undef ; SIFASTF64-NEXT: Cost Model: Found an estimated cost of 81 for instruction: %v3f64 = fdiv <3 x double> undef, undef +; SIFASTF64-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %v4f64 = fdiv <4 x double> undef, undef +; SIFASTF64-NEXT: Cost Model: Found an estimated cost of 135 for instruction: %v5f64 = fdiv <5 x double> undef, undef ; SIFASTF64-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void ; ; SISLOWF64-LABEL: 'fdiv_f64' ; SISLOWF64-NEXT: Cost Model: Found an estimated cost of 41 for instruction: %f64 = fdiv double undef, undef ; SISLOWF64-NEXT: Cost Model: Found an estimated cost of 82 for instruction: %v2f64 = fdiv <2 x double> undef, undef ; SISLOWF64-NEXT: Cost Model: Found an estimated cost of 123 for instruction: %v3f64 = fdiv <3 x double> undef, undef +; SISLOWF64-NEXT: Cost Model: Found an estimated cost of 164 for instruction: %v4f64 = fdiv <4 x double> undef, undef +; SISLOWF64-NEXT: Cost Model: Found an estimated cost of 205 for instruction: %v5f64 = fdiv <5 x double> undef, undef ; SISLOWF64-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void ; ; FP16-LABEL: 'fdiv_f64' ; FP16-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %f64 = fdiv double undef, undef ; FP16-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %v2f64 = fdiv <2 x double> undef, undef ; FP16-NEXT: Cost Model: Found an estimated cost of 114 for instruction: %v3f64 = fdiv <3 x double> undef, undef +; FP16-NEXT: Cost Model: Found an estimated cost of 152 for instruction: %v4f64 = fdiv <4 x double> undef, undef +; FP16-NEXT: Cost Model: Found an estimated cost of 190 for instruction: %v5f64 = fdiv <5 x double> undef, undef ; FP16-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void ; ; CI-SIZE-LABEL: 'fdiv_f64' ; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %f64 = fdiv double undef, undef ; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %v2f64 = fdiv <2 x double> undef, undef ; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %v3f64 = fdiv <3 x double> undef, undef +; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %v4f64 = fdiv <4 x double> undef, undef +; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %v5f64 = fdiv <5 x double> undef, undef ; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SI-SIZE-LABEL: 'fdiv_f64' ; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %f64 = fdiv double undef, undef ; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %v2f64 = fdiv <2 x double> undef, undef ; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 75 for instruction: %v3f64 = fdiv <3 x double> undef, undef +; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %v4f64 = fdiv <4 x double> undef, undef +; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 125 for instruction: %v5f64 = fdiv <5 x double> undef, undef ; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; FP16-SIZE-LABEL: 'fdiv_f64' ; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %f64 = fdiv double undef, undef ; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %v2f64 = fdiv <2 x double> undef, undef ; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %v3f64 = fdiv <3 x double> undef, undef +; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %v4f64 = fdiv <4 x double> undef, undef +; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %v5f64 = fdiv <5 x double> undef, undef ; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %f64 = fdiv double undef, undef %v2f64 = fdiv <2 x double> undef, undef %v3f64 = fdiv <3 x double> undef, undef + %v4f64 = fdiv <4 x double> undef, undef + %v5f64 = fdiv <5 x double> undef, undef ret void } @@ -115,30 +139,40 @@ define amdgpu_kernel void @fdiv_f16_f32ieee() #0 { ; NOFP16-LABEL: 'fdiv_f16_f32ieee' ; NOFP16-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %f16 = fdiv half undef, undef ; NOFP16-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %v2f16 = fdiv <2 x half> undef, undef +; NOFP16-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %v3f16 = fdiv <3 x half> undef, undef ; NOFP16-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %v4f16 = fdiv <4 x half> undef, undef +; NOFP16-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v5f16 = fdiv <5 x half> undef, undef ; NOFP16-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void ; ; FP16-LABEL: 'fdiv_f16_f32ieee' ; FP16-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %f16 = fdiv half undef, undef ; FP16-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %v2f16 = fdiv <2 x half> undef, undef +; FP16-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v3f16 = fdiv <3 x half> undef, undef ; FP16-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v4f16 = fdiv <4 x half> undef, undef +; FP16-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %v5f16 = fdiv <5 x half> undef, undef ; FP16-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void ; ; NOFP16-SIZE-LABEL: 'fdiv_f16_f32ieee' ; NOFP16-SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %f16 = fdiv half undef, undef ; NOFP16-SIZE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %v2f16 = fdiv <2 x half> undef, undef +; NOFP16-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v3f16 = fdiv <3 x half> undef, undef ; NOFP16-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v4f16 = fdiv <4 x half> undef, undef +; NOFP16-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v5f16 = fdiv <5 x half> undef, undef ; NOFP16-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; FP16-SIZE-LABEL: 'fdiv_f16_f32ieee' ; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %f16 = fdiv half undef, undef ; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v2f16 = fdiv <2 x half> undef, undef +; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v3f16 = fdiv <3 x half> undef, undef ; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v4f16 = fdiv <4 x half> undef, undef +; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %v5f16 = fdiv <5 x half> undef, undef ; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %f16 = fdiv half undef, undef %v2f16 = fdiv <2 x half> undef, undef + %v3f16 = fdiv <3 x half> undef, undef %v4f16 = fdiv <4 x half> undef, undef + %v5f16 = fdiv <5 x half> undef, undef ret void } @@ -146,125 +180,366 @@ define amdgpu_kernel void @fdiv_f16_f32ftzdaz() #1 { ; NOFP16-LABEL: 'fdiv_f16_f32ftzdaz' ; NOFP16-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %f16 = fdiv half undef, undef ; NOFP16-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v2f16 = fdiv <2 x half> undef, undef +; NOFP16-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %v3f16 = fdiv <3 x half> undef, undef ; NOFP16-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %v4f16 = fdiv <4 x half> undef, undef +; NOFP16-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v5f16 = fdiv <5 x half> undef, undef ; NOFP16-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void ; ; FP16-LABEL: 'fdiv_f16_f32ftzdaz' ; FP16-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %f16 = fdiv half undef, undef ; FP16-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %v2f16 = fdiv <2 x half> undef, undef +; FP16-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v3f16 = fdiv <3 x half> undef, undef ; FP16-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v4f16 = fdiv <4 x half> undef, undef +; FP16-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %v5f16 = fdiv <5 x half> undef, undef ; FP16-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void ; ; NOFP16-SIZE-LABEL: 'fdiv_f16_f32ftzdaz' ; NOFP16-SIZE-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %f16 = fdiv half undef, undef ; NOFP16-SIZE-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %v2f16 = fdiv <2 x half> undef, undef +; NOFP16-SIZE-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %v3f16 = fdiv <3 x half> undef, undef ; NOFP16-SIZE-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %v4f16 = fdiv <4 x half> undef, undef +; NOFP16-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v5f16 = fdiv <5 x half> undef, undef ; NOFP16-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; FP16-SIZE-LABEL: 'fdiv_f16_f32ftzdaz' ; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %f16 = fdiv half undef, undef ; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v2f16 = fdiv <2 x half> undef, undef +; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v3f16 = fdiv <3 x half> undef, undef ; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v4f16 = fdiv <4 x half> undef, undef +; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %v5f16 = fdiv <5 x half> undef, undef ; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %f16 = fdiv half undef, undef %v2f16 = fdiv <2 x half> undef, undef + %v3f16 = fdiv <3 x half> undef, undef %v4f16 = fdiv <4 x half> undef, undef + %v5f16 = fdiv <5 x half> undef, undef ret void } define amdgpu_kernel void @rcp_ieee() #0 { ; CIFASTF64-LABEL: 'rcp_ieee' -; CIFASTF64-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %f32 = fdiv float 1.000000e+00, undef ; CIFASTF64-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %f16 = fdiv half 0xH3C00, undef -; CIFASTF64-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %f64 = fdiv double 1.000000e+00, undef -; CIFASTF64-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %v2f32 = fdiv <2 x float> , undef ; CIFASTF64-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %v2f16 = fdiv <2 x half> , undef +; CIFASTF64-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %v3f16 = fdiv <3 x half> , undef +; CIFASTF64-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %v4f16 = fdiv <4 x half> , undef +; CIFASTF64-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v5f16 = fdiv <5 x half> , undef +; CIFASTF64-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %f32 = fdiv float 1.000000e+00, undef +; CIFASTF64-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %v2f32 = fdiv <2 x float> , undef +; CIFASTF64-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %v3f32 = fdiv <3 x float> , undef +; CIFASTF64-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %v4f32 = fdiv <4 x float> , undef +; CIFASTF64-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %v5f32 = fdiv <5 x float> , undef +; CIFASTF64-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %f64 = fdiv double 1.000000e+00, undef +; CIFASTF64-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v2f64 = fdiv <2 x double> , undef +; CIFASTF64-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %v3f64 = fdiv <3 x double> , undef +; CIFASTF64-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %v4f64 = fdiv <4 x double> , undef +; CIFASTF64-NEXT: Cost Model: Found an estimated cost of 120 for instruction: %v5f64 = fdiv <5 x double> , undef ; CIFASTF64-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void ; ; CISLOWF64-LABEL: 'rcp_ieee' -; CISLOWF64-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %f32 = fdiv float 1.000000e+00, undef ; CISLOWF64-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %f16 = fdiv half 0xH3C00, undef -; CISLOWF64-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %f64 = fdiv double 1.000000e+00, undef -; CISLOWF64-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %v2f32 = fdiv <2 x float> , undef ; CISLOWF64-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %v2f16 = fdiv <2 x half> , undef +; CISLOWF64-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %v3f16 = fdiv <3 x half> , undef +; CISLOWF64-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %v4f16 = fdiv <4 x half> , undef +; CISLOWF64-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v5f16 = fdiv <5 x half> , undef +; CISLOWF64-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %f32 = fdiv float 1.000000e+00, undef +; CISLOWF64-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %v2f32 = fdiv <2 x float> , undef +; CISLOWF64-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %v3f32 = fdiv <3 x float> , undef +; CISLOWF64-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %v4f32 = fdiv <4 x float> , undef +; CISLOWF64-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %v5f32 = fdiv <5 x float> , undef +; CISLOWF64-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %f64 = fdiv double 1.000000e+00, undef +; CISLOWF64-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %v2f64 = fdiv <2 x double> , undef +; CISLOWF64-NEXT: Cost Model: Found an estimated cost of 114 for instruction: %v3f64 = fdiv <3 x double> , undef +; CISLOWF64-NEXT: Cost Model: Found an estimated cost of 152 for instruction: %v4f64 = fdiv <4 x double> , undef +; CISLOWF64-NEXT: Cost Model: Found an estimated cost of 190 for instruction: %v5f64 = fdiv <5 x double> , undef ; CISLOWF64-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void ; ; SIFASTF64-LABEL: 'rcp_ieee' -; SIFASTF64-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %f32 = fdiv float 1.000000e+00, undef ; SIFASTF64-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %f16 = fdiv half 0xH3C00, undef -; SIFASTF64-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %f64 = fdiv double 1.000000e+00, undef -; SIFASTF64-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %v2f32 = fdiv <2 x float> , undef ; SIFASTF64-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %v2f16 = fdiv <2 x half> , undef +; SIFASTF64-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %v3f16 = fdiv <3 x half> , undef +; SIFASTF64-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %v4f16 = fdiv <4 x half> , undef +; SIFASTF64-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v5f16 = fdiv <5 x half> , undef +; SIFASTF64-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %f32 = fdiv float 1.000000e+00, undef +; SIFASTF64-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %v2f32 = fdiv <2 x float> , undef +; SIFASTF64-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %v3f32 = fdiv <3 x float> , undef +; SIFASTF64-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %v4f32 = fdiv <4 x float> , undef +; SIFASTF64-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %v5f32 = fdiv <5 x float> , undef +; SIFASTF64-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %f64 = fdiv double 1.000000e+00, undef +; SIFASTF64-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %v2f64 = fdiv <2 x double> , undef +; SIFASTF64-NEXT: Cost Model: Found an estimated cost of 81 for instruction: %v3f64 = fdiv <3 x double> , undef +; SIFASTF64-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %v4f64 = fdiv <4 x double> , undef +; SIFASTF64-NEXT: Cost Model: Found an estimated cost of 135 for instruction: %v5f64 = fdiv <5 x double> , undef ; SIFASTF64-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void ; ; SISLOWF64-LABEL: 'rcp_ieee' -; SISLOWF64-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %f32 = fdiv float 1.000000e+00, undef ; SISLOWF64-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %f16 = fdiv half 0xH3C00, undef -; SISLOWF64-NEXT: Cost Model: Found an estimated cost of 41 for instruction: %f64 = fdiv double 1.000000e+00, undef -; SISLOWF64-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %v2f32 = fdiv <2 x float> , undef ; SISLOWF64-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %v2f16 = fdiv <2 x half> , undef +; SISLOWF64-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %v3f16 = fdiv <3 x half> , undef +; SISLOWF64-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %v4f16 = fdiv <4 x half> , undef +; SISLOWF64-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v5f16 = fdiv <5 x half> , undef +; SISLOWF64-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %f32 = fdiv float 1.000000e+00, undef +; SISLOWF64-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %v2f32 = fdiv <2 x float> , undef +; SISLOWF64-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %v3f32 = fdiv <3 x float> , undef +; SISLOWF64-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %v4f32 = fdiv <4 x float> , undef +; SISLOWF64-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %v5f32 = fdiv <5 x float> , undef +; SISLOWF64-NEXT: Cost Model: Found an estimated cost of 41 for instruction: %f64 = fdiv double 1.000000e+00, undef +; SISLOWF64-NEXT: Cost Model: Found an estimated cost of 82 for instruction: %v2f64 = fdiv <2 x double> , undef +; SISLOWF64-NEXT: Cost Model: Found an estimated cost of 123 for instruction: %v3f64 = fdiv <3 x double> , undef +; SISLOWF64-NEXT: Cost Model: Found an estimated cost of 164 for instruction: %v4f64 = fdiv <4 x double> , undef +; SISLOWF64-NEXT: Cost Model: Found an estimated cost of 205 for instruction: %v5f64 = fdiv <5 x double> , undef ; SISLOWF64-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void ; ; FP16-LABEL: 'rcp_ieee' -; FP16-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %f32 = fdiv float 1.000000e+00, undef ; FP16-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %f16 = fdiv half 0xH3C00, undef -; FP16-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %f64 = fdiv double 1.000000e+00, undef -; FP16-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %v2f32 = fdiv <2 x float> , undef ; FP16-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v2f16 = fdiv <2 x half> , undef +; FP16-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v3f16 = fdiv <3 x half> , undef +; FP16-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v4f16 = fdiv <4 x half> , undef +; FP16-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %v5f16 = fdiv <5 x half> , undef +; FP16-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %f32 = fdiv float 1.000000e+00, undef +; FP16-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %v2f32 = fdiv <2 x float> , undef +; FP16-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %v3f32 = fdiv <3 x float> , undef +; FP16-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %v4f32 = fdiv <4 x float> , undef +; FP16-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %v5f32 = fdiv <5 x float> , undef +; FP16-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %f64 = fdiv double 1.000000e+00, undef +; FP16-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %v2f64 = fdiv <2 x double> , undef +; FP16-NEXT: Cost Model: Found an estimated cost of 114 for instruction: %v3f64 = fdiv <3 x double> , undef +; FP16-NEXT: Cost Model: Found an estimated cost of 152 for instruction: %v4f64 = fdiv <4 x double> , undef +; FP16-NEXT: Cost Model: Found an estimated cost of 190 for instruction: %v5f64 = fdiv <5 x double> , undef ; FP16-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void ; ; CI-SIZE-LABEL: 'rcp_ieee' -; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %f32 = fdiv float 1.000000e+00, undef ; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %f16 = fdiv half 0xH3C00, undef -; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %f64 = fdiv double 1.000000e+00, undef -; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %v2f32 = fdiv <2 x float> , undef ; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %v2f16 = fdiv <2 x half> , undef +; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v3f16 = fdiv <3 x half> , undef +; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v4f16 = fdiv <4 x half> , undef +; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v5f16 = fdiv <5 x half> , undef +; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %f32 = fdiv float 1.000000e+00, undef +; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %v2f32 = fdiv <2 x float> , undef +; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %v3f32 = fdiv <3 x float> , undef +; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v4f32 = fdiv <4 x float> , undef +; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %v5f32 = fdiv <5 x float> , undef +; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %f64 = fdiv double 1.000000e+00, undef +; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %v2f64 = fdiv <2 x double> , undef +; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %v3f64 = fdiv <3 x double> , undef +; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %v4f64 = fdiv <4 x double> , undef +; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %v5f64 = fdiv <5 x double> , undef ; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SI-SIZE-LABEL: 'rcp_ieee' -; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %f32 = fdiv float 1.000000e+00, undef ; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %f16 = fdiv half 0xH3C00, undef -; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %f64 = fdiv double 1.000000e+00, undef -; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %v2f32 = fdiv <2 x float> , undef ; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %v2f16 = fdiv <2 x half> , undef +; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v3f16 = fdiv <3 x half> , undef +; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v4f16 = fdiv <4 x half> , undef +; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v5f16 = fdiv <5 x half> , undef +; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %f32 = fdiv float 1.000000e+00, undef +; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %v2f32 = fdiv <2 x float> , undef +; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %v3f32 = fdiv <3 x float> , undef +; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v4f32 = fdiv <4 x float> , undef +; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %v5f32 = fdiv <5 x float> , undef +; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %f64 = fdiv double 1.000000e+00, undef +; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %v2f64 = fdiv <2 x double> , undef +; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 75 for instruction: %v3f64 = fdiv <3 x double> , undef +; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %v4f64 = fdiv <4 x double> , undef +; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 125 for instruction: %v5f64 = fdiv <5 x double> , undef ; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; FP16-SIZE-LABEL: 'rcp_ieee' -; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %f32 = fdiv float 1.000000e+00, undef ; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %f16 = fdiv half 0xH3C00, undef -; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %f64 = fdiv double 1.000000e+00, undef -; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %v2f32 = fdiv <2 x float> , undef ; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2f16 = fdiv <2 x half> , undef +; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v3f16 = fdiv <3 x half> , undef +; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4f16 = fdiv <4 x half> , undef +; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v5f16 = fdiv <5 x half> , undef +; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %f32 = fdiv float 1.000000e+00, undef +; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %v2f32 = fdiv <2 x float> , undef +; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %v3f32 = fdiv <3 x float> , undef +; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v4f32 = fdiv <4 x float> , undef +; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %v5f32 = fdiv <5 x float> , undef +; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %f64 = fdiv double 1.000000e+00, undef +; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %v2f64 = fdiv <2 x double> , undef +; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %v3f64 = fdiv <3 x double> , undef +; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %v4f64 = fdiv <4 x double> , undef +; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %v5f64 = fdiv <5 x double> , undef ; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; - %f32 = fdiv float 1.0, undef %f16 = fdiv half 1.0, undef - %f64 = fdiv double 1.0, undef - %v2f32 = fdiv <2 x float> , undef %v2f16 = fdiv <2 x half> , undef + %v3f16 = fdiv <3 x half> , undef + %v4f16 = fdiv <4 x half> , undef + %v5f16 = fdiv <5 x half> , undef + %f32 = fdiv float 1.0, undef + %v2f32 = fdiv <2 x float> , undef + %v3f32 = fdiv <3 x float> , undef + %v4f32 = fdiv <4 x float> , undef + %v5f32 = fdiv <5 x float> , undef + %f64 = fdiv double 1.0, undef + %v2f64 = fdiv <2 x double> , undef + %v3f64 = fdiv <3 x double> , undef + %v4f64 = fdiv <4 x double> , undef + %v5f64 = fdiv <5 x double> , undef ret void } define amdgpu_kernel void @rcp_ftzdaz() #1 { -; ALL-LABEL: 'rcp_ftzdaz' -; ALL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %f32 = fdiv float 1.000000e+00, undef -; ALL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %f16 = fdiv half 0xH3C00, undef -; ALL-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v2f32 = fdiv <2 x float> , undef -; ALL-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v2f16 = fdiv <2 x half> , undef -; ALL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void +; CIFASTF64-LABEL: 'rcp_ftzdaz' +; CIFASTF64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %f16 = fdiv half 0xH3C00, undef +; CIFASTF64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v2f16 = fdiv <2 x half> , undef +; CIFASTF64-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v3f16 = fdiv <3 x half> , undef +; CIFASTF64-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v4f16 = fdiv <4 x half> , undef +; CIFASTF64-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v5f16 = fdiv <5 x half> , undef +; CIFASTF64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %f32 = fdiv float 1.000000e+00, undef +; CIFASTF64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v2f32 = fdiv <2 x float> , undef +; CIFASTF64-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v3f32 = fdiv <3 x float> , undef +; CIFASTF64-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v4f32 = fdiv <4 x float> , undef +; CIFASTF64-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v5f32 = fdiv <5 x float> , undef +; CIFASTF64-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %f64 = fdiv double 1.000000e+00, undef +; CIFASTF64-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v2f64 = fdiv <2 x double> , undef +; CIFASTF64-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %v3f64 = fdiv <3 x double> , undef +; CIFASTF64-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %v4f64 = fdiv <4 x double> , undef +; CIFASTF64-NEXT: Cost Model: Found an estimated cost of 120 for instruction: %v5f64 = fdiv <5 x double> , undef +; CIFASTF64-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void ; -; ALL-SIZE-LABEL: 'rcp_ftzdaz' -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %f32 = fdiv float 1.000000e+00, undef -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %f16 = fdiv half 0xH3C00, undef -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2f32 = fdiv <2 x float> , undef -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2f16 = fdiv <2 x half> , undef -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; CISLOWF64-LABEL: 'rcp_ftzdaz' +; CISLOWF64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %f16 = fdiv half 0xH3C00, undef +; CISLOWF64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v2f16 = fdiv <2 x half> , undef +; CISLOWF64-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v3f16 = fdiv <3 x half> , undef +; CISLOWF64-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v4f16 = fdiv <4 x half> , undef +; CISLOWF64-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v5f16 = fdiv <5 x half> , undef +; CISLOWF64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %f32 = fdiv float 1.000000e+00, undef +; CISLOWF64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v2f32 = fdiv <2 x float> , undef +; CISLOWF64-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v3f32 = fdiv <3 x float> , undef +; CISLOWF64-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v4f32 = fdiv <4 x float> , undef +; CISLOWF64-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v5f32 = fdiv <5 x float> , undef +; CISLOWF64-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %f64 = fdiv double 1.000000e+00, undef +; CISLOWF64-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %v2f64 = fdiv <2 x double> , undef +; CISLOWF64-NEXT: Cost Model: Found an estimated cost of 114 for instruction: %v3f64 = fdiv <3 x double> , undef +; CISLOWF64-NEXT: Cost Model: Found an estimated cost of 152 for instruction: %v4f64 = fdiv <4 x double> , undef +; CISLOWF64-NEXT: Cost Model: Found an estimated cost of 190 for instruction: %v5f64 = fdiv <5 x double> , undef +; CISLOWF64-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void +; +; SIFASTF64-LABEL: 'rcp_ftzdaz' +; SIFASTF64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %f16 = fdiv half 0xH3C00, undef +; SIFASTF64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v2f16 = fdiv <2 x half> , undef +; SIFASTF64-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v3f16 = fdiv <3 x half> , undef +; SIFASTF64-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v4f16 = fdiv <4 x half> , undef +; SIFASTF64-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v5f16 = fdiv <5 x half> , undef +; SIFASTF64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %f32 = fdiv float 1.000000e+00, undef +; SIFASTF64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v2f32 = fdiv <2 x float> , undef +; SIFASTF64-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v3f32 = fdiv <3 x float> , undef +; SIFASTF64-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v4f32 = fdiv <4 x float> , undef +; SIFASTF64-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v5f32 = fdiv <5 x float> , undef +; SIFASTF64-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %f64 = fdiv double 1.000000e+00, undef +; SIFASTF64-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %v2f64 = fdiv <2 x double> , undef +; SIFASTF64-NEXT: Cost Model: Found an estimated cost of 81 for instruction: %v3f64 = fdiv <3 x double> , undef +; SIFASTF64-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %v4f64 = fdiv <4 x double> , undef +; SIFASTF64-NEXT: Cost Model: Found an estimated cost of 135 for instruction: %v5f64 = fdiv <5 x double> , undef +; SIFASTF64-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void +; +; SISLOWF64-LABEL: 'rcp_ftzdaz' +; SISLOWF64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %f16 = fdiv half 0xH3C00, undef +; SISLOWF64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v2f16 = fdiv <2 x half> , undef +; SISLOWF64-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v3f16 = fdiv <3 x half> , undef +; SISLOWF64-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v4f16 = fdiv <4 x half> , undef +; SISLOWF64-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v5f16 = fdiv <5 x half> , undef +; SISLOWF64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %f32 = fdiv float 1.000000e+00, undef +; SISLOWF64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v2f32 = fdiv <2 x float> , undef +; SISLOWF64-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v3f32 = fdiv <3 x float> , undef +; SISLOWF64-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v4f32 = fdiv <4 x float> , undef +; SISLOWF64-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v5f32 = fdiv <5 x float> , undef +; SISLOWF64-NEXT: Cost Model: Found an estimated cost of 41 for instruction: %f64 = fdiv double 1.000000e+00, undef +; SISLOWF64-NEXT: Cost Model: Found an estimated cost of 82 for instruction: %v2f64 = fdiv <2 x double> , undef +; SISLOWF64-NEXT: Cost Model: Found an estimated cost of 123 for instruction: %v3f64 = fdiv <3 x double> , undef +; SISLOWF64-NEXT: Cost Model: Found an estimated cost of 164 for instruction: %v4f64 = fdiv <4 x double> , undef +; SISLOWF64-NEXT: Cost Model: Found an estimated cost of 205 for instruction: %v5f64 = fdiv <5 x double> , undef +; SISLOWF64-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void +; +; FP16-LABEL: 'rcp_ftzdaz' +; FP16-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %f16 = fdiv half 0xH3C00, undef +; FP16-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v2f16 = fdiv <2 x half> , undef +; FP16-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v3f16 = fdiv <3 x half> , undef +; FP16-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v4f16 = fdiv <4 x half> , undef +; FP16-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %v5f16 = fdiv <5 x half> , undef +; FP16-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %f32 = fdiv float 1.000000e+00, undef +; FP16-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v2f32 = fdiv <2 x float> , undef +; FP16-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v3f32 = fdiv <3 x float> , undef +; FP16-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v4f32 = fdiv <4 x float> , undef +; FP16-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v5f32 = fdiv <5 x float> , undef +; FP16-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %f64 = fdiv double 1.000000e+00, undef +; FP16-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %v2f64 = fdiv <2 x double> , undef +; FP16-NEXT: Cost Model: Found an estimated cost of 114 for instruction: %v3f64 = fdiv <3 x double> , undef +; FP16-NEXT: Cost Model: Found an estimated cost of 152 for instruction: %v4f64 = fdiv <4 x double> , undef +; FP16-NEXT: Cost Model: Found an estimated cost of 190 for instruction: %v5f64 = fdiv <5 x double> , undef +; FP16-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void +; +; CI-SIZE-LABEL: 'rcp_ftzdaz' +; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %f16 = fdiv half 0xH3C00, undef +; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2f16 = fdiv <2 x half> , undef +; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v3f16 = fdiv <3 x half> , undef +; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4f16 = fdiv <4 x half> , undef +; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v5f16 = fdiv <5 x half> , undef +; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %f32 = fdiv float 1.000000e+00, undef +; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2f32 = fdiv <2 x float> , undef +; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v3f32 = fdiv <3 x float> , undef +; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4f32 = fdiv <4 x float> , undef +; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v5f32 = fdiv <5 x float> , undef +; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %f64 = fdiv double 1.000000e+00, undef +; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %v2f64 = fdiv <2 x double> , undef +; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %v3f64 = fdiv <3 x double> , undef +; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %v4f64 = fdiv <4 x double> , undef +; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %v5f64 = fdiv <5 x double> , undef +; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SI-SIZE-LABEL: 'rcp_ftzdaz' +; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %f16 = fdiv half 0xH3C00, undef +; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2f16 = fdiv <2 x half> , undef +; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v3f16 = fdiv <3 x half> , undef +; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4f16 = fdiv <4 x half> , undef +; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v5f16 = fdiv <5 x half> , undef +; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %f32 = fdiv float 1.000000e+00, undef +; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2f32 = fdiv <2 x float> , undef +; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v3f32 = fdiv <3 x float> , undef +; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4f32 = fdiv <4 x float> , undef +; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v5f32 = fdiv <5 x float> , undef +; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %f64 = fdiv double 1.000000e+00, undef +; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %v2f64 = fdiv <2 x double> , undef +; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 75 for instruction: %v3f64 = fdiv <3 x double> , undef +; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %v4f64 = fdiv <4 x double> , undef +; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 125 for instruction: %v5f64 = fdiv <5 x double> , undef +; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; FP16-SIZE-LABEL: 'rcp_ftzdaz' +; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %f16 = fdiv half 0xH3C00, undef +; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2f16 = fdiv <2 x half> , undef +; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v3f16 = fdiv <3 x half> , undef +; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4f16 = fdiv <4 x half> , undef +; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v5f16 = fdiv <5 x half> , undef +; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %f32 = fdiv float 1.000000e+00, undef +; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2f32 = fdiv <2 x float> , undef +; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v3f32 = fdiv <3 x float> , undef +; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4f32 = fdiv <4 x float> , undef +; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v5f32 = fdiv <5 x float> , undef +; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %f64 = fdiv double 1.000000e+00, undef +; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %v2f64 = fdiv <2 x double> , undef +; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %v3f64 = fdiv <3 x double> , undef +; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %v4f64 = fdiv <4 x double> , undef +; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %v5f64 = fdiv <5 x double> , undef +; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; - %f32 = fdiv float 1.0, undef %f16 = fdiv half 1.0, undef - %v2f32 = fdiv <2 x float> , undef %v2f16 = fdiv <2 x half> , undef + %v3f16 = fdiv <3 x half> , undef + %v4f16 = fdiv <4 x half> , undef + %v5f16 = fdiv <5 x half> , undef + %f32 = fdiv float 1.0, undef + %v2f32 = fdiv <2 x float> , undef + %v3f32 = fdiv <3 x float> , undef + %v4f32 = fdiv <4 x float> , undef + %v5f32 = fdiv <5 x float> , undef + %f64 = fdiv double 1.0, undef + %v2f64 = fdiv <2 x double> , undef + %v3f64 = fdiv <3 x double> , undef + %v4f64 = fdiv <4 x double> , undef + %v5f64 = fdiv <5 x double> , undef ret void } diff --git a/llvm/test/Analysis/CostModel/AMDGPU/fma.ll b/llvm/test/Analysis/CostModel/AMDGPU/fma.ll index d9e4e45..55ea145 100644 --- a/llvm/test/Analysis/CostModel/AMDGPU/fma.ll +++ b/llvm/test/Analysis/CostModel/AMDGPU/fma.ll @@ -12,6 +12,7 @@ define amdgpu_kernel void @fma_f32() #0 { ; GFX90A-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %f32 = call float @llvm.fma.f32(float undef, float undef, float undef) #2 ; GFX90A-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2f32 = call <2 x float> @llvm.fma.v2f32(<2 x float> undef, <2 x float> undef, <2 x float> undef) #2 ; GFX90A-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v3f32 = call <3 x float> @llvm.fma.v3f32(<3 x float> undef, <3 x float> undef, <3 x float> undef) #2 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4f32 = call <4 x float> @llvm.fma.v4f32(<4 x float> undef, <4 x float> undef, <4 x float> undef) #2 ; GFX90A-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v5f32 = call <5 x float> @llvm.fma.v5f32(<5 x float> undef, <5 x float> undef, <5 x float> undef) #2 ; GFX90A-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void ; @@ -19,6 +20,7 @@ define amdgpu_kernel void @fma_f32() #0 { ; GFX900-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %f32 = call float @llvm.fma.f32(float undef, float undef, float undef) #2 ; GFX900-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2f32 = call <2 x float> @llvm.fma.v2f32(<2 x float> undef, <2 x float> undef, <2 x float> undef) #2 ; GFX900-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v3f32 = call <3 x float> @llvm.fma.v3f32(<3 x float> undef, <3 x float> undef, <3 x float> undef) #2 +; GFX900-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4f32 = call <4 x float> @llvm.fma.v4f32(<4 x float> undef, <4 x float> undef, <4 x float> undef) #2 ; GFX900-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v5f32 = call <5 x float> @llvm.fma.v5f32(<5 x float> undef, <5 x float> undef, <5 x float> undef) #2 ; GFX900-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void ; @@ -26,6 +28,7 @@ define amdgpu_kernel void @fma_f32() #0 { ; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %f32 = call float @llvm.fma.f32(float undef, float undef, float undef) #2 ; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v2f32 = call <2 x float> @llvm.fma.v2f32(<2 x float> undef, <2 x float> undef, <2 x float> undef) #2 ; SLOW-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v3f32 = call <3 x float> @llvm.fma.v3f32(<3 x float> undef, <3 x float> undef, <3 x float> undef) #2 +; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v4f32 = call <4 x float> @llvm.fma.v4f32(<4 x float> undef, <4 x float> undef, <4 x float> undef) #2 ; SLOW-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v5f32 = call <5 x float> @llvm.fma.v5f32(<5 x float> undef, <5 x float> undef, <5 x float> undef) #2 ; SLOW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void ; @@ -33,6 +36,7 @@ define amdgpu_kernel void @fma_f32() #0 { ; GFX90A-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %f32 = call float @llvm.fma.f32(float undef, float undef, float undef) #2 ; GFX90A-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2f32 = call <2 x float> @llvm.fma.v2f32(<2 x float> undef, <2 x float> undef, <2 x float> undef) #2 ; GFX90A-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v3f32 = call <3 x float> @llvm.fma.v3f32(<3 x float> undef, <3 x float> undef, <3 x float> undef) #2 +; GFX90A-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4f32 = call <4 x float> @llvm.fma.v4f32(<4 x float> undef, <4 x float> undef, <4 x float> undef) #2 ; GFX90A-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v5f32 = call <5 x float> @llvm.fma.v5f32(<5 x float> undef, <5 x float> undef, <5 x float> undef) #2 ; GFX90A-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; @@ -40,12 +44,14 @@ define amdgpu_kernel void @fma_f32() #0 { ; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %f32 = call float @llvm.fma.f32(float undef, float undef, float undef) #2 ; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2f32 = call <2 x float> @llvm.fma.v2f32(<2 x float> undef, <2 x float> undef, <2 x float> undef) #2 ; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v3f32 = call <3 x float> @llvm.fma.v3f32(<3 x float> undef, <3 x float> undef, <3 x float> undef) #2 +; SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4f32 = call <4 x float> @llvm.fma.v4f32(<4 x float> undef, <4 x float> undef, <4 x float> undef) #2 ; SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v5f32 = call <5 x float> @llvm.fma.v5f32(<5 x float> undef, <5 x float> undef, <5 x float> undef) #2 ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %f32 = call float @llvm.fma.f32(float undef, float undef, float undef) #1 %v2f32 = call <2 x float> @llvm.fma.v2f32(<2 x float> undef, <2 x float> undef, <2 x float> undef) #1 %v3f32 = call <3 x float> @llvm.fma.v3f32(<3 x float> undef, <3 x float> undef, <3 x float> undef) #1 + %v4f32 = call <4 x float> @llvm.fma.v4f32(<4 x float> undef, <4 x float> undef, <4 x float> undef) #1 %v5f32 = call <5 x float> @llvm.fma.v5f32(<5 x float> undef, <5 x float> undef, <5 x float> undef) #1 ret void } @@ -55,35 +61,47 @@ define amdgpu_kernel void @fma_f64() #0 { ; GFX90A-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f64 = call double @llvm.fma.f64(double undef, double undef, double undef) #2 ; GFX90A-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2f64 = call <2 x double> @llvm.fma.v2f64(<2 x double> undef, <2 x double> undef, <2 x double> undef) #2 ; GFX90A-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v3f64 = call <3 x double> @llvm.fma.v3f64(<3 x double> undef, <3 x double> undef, <3 x double> undef) #2 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4f64 = call <4 x double> @llvm.fma.v4f64(<4 x double> undef, <4 x double> undef, <4 x double> undef) #2 +; GFX90A-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v5f64 = call <5 x double> @llvm.fma.v5f64(<5 x double> undef, <5 x double> undef, <5 x double> undef) #2 ; GFX90A-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void ; ; GFX900-LABEL: 'fma_f64' ; GFX900-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %f64 = call double @llvm.fma.f64(double undef, double undef, double undef) #2 ; GFX900-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2f64 = call <2 x double> @llvm.fma.v2f64(<2 x double> undef, <2 x double> undef, <2 x double> undef) #2 ; GFX900-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v3f64 = call <3 x double> @llvm.fma.v3f64(<3 x double> undef, <3 x double> undef, <3 x double> undef) #2 +; GFX900-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4f64 = call <4 x double> @llvm.fma.v4f64(<4 x double> undef, <4 x double> undef, <4 x double> undef) #2 +; GFX900-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v5f64 = call <5 x double> @llvm.fma.v5f64(<5 x double> undef, <5 x double> undef, <5 x double> undef) #2 ; GFX900-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void ; ; SLOW-LABEL: 'fma_f64' ; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %f64 = call double @llvm.fma.f64(double undef, double undef, double undef) #2 ; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v2f64 = call <2 x double> @llvm.fma.v2f64(<2 x double> undef, <2 x double> undef, <2 x double> undef) #2 ; SLOW-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v3f64 = call <3 x double> @llvm.fma.v3f64(<3 x double> undef, <3 x double> undef, <3 x double> undef) #2 +; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v4f64 = call <4 x double> @llvm.fma.v4f64(<4 x double> undef, <4 x double> undef, <4 x double> undef) #2 +; SLOW-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v5f64 = call <5 x double> @llvm.fma.v5f64(<5 x double> undef, <5 x double> undef, <5 x double> undef) #2 ; SLOW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void ; ; GFX90A-SIZE-LABEL: 'fma_f64' ; GFX90A-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f64 = call double @llvm.fma.f64(double undef, double undef, double undef) #2 ; GFX90A-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2f64 = call <2 x double> @llvm.fma.v2f64(<2 x double> undef, <2 x double> undef, <2 x double> undef) #2 ; GFX90A-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v3f64 = call <3 x double> @llvm.fma.v3f64(<3 x double> undef, <3 x double> undef, <3 x double> undef) #2 +; GFX90A-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4f64 = call <4 x double> @llvm.fma.v4f64(<4 x double> undef, <4 x double> undef, <4 x double> undef) #2 +; GFX90A-SIZE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v5f64 = call <5 x double> @llvm.fma.v5f64(<5 x double> undef, <5 x double> undef, <5 x double> undef) #2 ; GFX90A-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SIZE-LABEL: 'fma_f64' ; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %f64 = call double @llvm.fma.f64(double undef, double undef, double undef) #2 ; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2f64 = call <2 x double> @llvm.fma.v2f64(<2 x double> undef, <2 x double> undef, <2 x double> undef) #2 ; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v3f64 = call <3 x double> @llvm.fma.v3f64(<3 x double> undef, <3 x double> undef, <3 x double> undef) #2 +; SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4f64 = call <4 x double> @llvm.fma.v4f64(<4 x double> undef, <4 x double> undef, <4 x double> undef) #2 +; SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v5f64 = call <5 x double> @llvm.fma.v5f64(<5 x double> undef, <5 x double> undef, <5 x double> undef) #2 ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %f64 = call double @llvm.fma.f64(double undef, double undef, double undef) #1 %v2f64 = call <2 x double> @llvm.fma.v2f64(<2 x double> undef, <2 x double> undef, <2 x double> undef) #1 %v3f64 = call <3 x double> @llvm.fma.v3f64(<3 x double> undef, <3 x double> undef, <3 x double> undef) #1 + %v4f64 = call <4 x double> @llvm.fma.v4f64(<4 x double> undef, <4 x double> undef, <4 x double> undef) #1 + %v5f64 = call <5 x double> @llvm.fma.v5f64(<5 x double> undef, <5 x double> undef, <5 x double> undef) #1 ret void } @@ -92,44 +110,59 @@ define amdgpu_kernel void @fma_f16() #0 { ; GFX9-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %f16 = call half @llvm.fma.f16(half undef, half undef, half undef) #2 ; GFX9-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2f16 = call <2 x half> @llvm.fma.v2f16(<2 x half> undef, <2 x half> undef, <2 x half> undef) #2 ; GFX9-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v3f16 = call <3 x half> @llvm.fma.v3f16(<3 x half> undef, <3 x half> undef, <3 x half> undef) #2 +; GFX9-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4f16 = call <4 x half> @llvm.fma.v4f16(<4 x half> undef, <4 x half> undef, <4 x half> undef) #2 +; GFX9-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v5f16 = call <5 x half> @llvm.fma.v5f16(<5 x half> undef, <5 x half> undef, <5 x half> undef) #2 ; GFX9-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void ; ; SLOW-LABEL: 'fma_f16' ; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %f16 = call half @llvm.fma.f16(half undef, half undef, half undef) #2 ; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v2f16 = call <2 x half> @llvm.fma.v2f16(<2 x half> undef, <2 x half> undef, <2 x half> undef) #2 ; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v3f16 = call <3 x half> @llvm.fma.v3f16(<3 x half> undef, <3 x half> undef, <3 x half> undef) #2 +; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v4f16 = call <4 x half> @llvm.fma.v4f16(<4 x half> undef, <4 x half> undef, <4 x half> undef) #2 +; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v5f16 = call <5 x half> @llvm.fma.v5f16(<5 x half> undef, <5 x half> undef, <5 x half> undef) #2 ; SLOW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void ; ; GFX9-SIZE-LABEL: 'fma_f16' ; GFX9-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %f16 = call half @llvm.fma.f16(half undef, half undef, half undef) #2 ; GFX9-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2f16 = call <2 x half> @llvm.fma.v2f16(<2 x half> undef, <2 x half> undef, <2 x half> undef) #2 ; GFX9-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v3f16 = call <3 x half> @llvm.fma.v3f16(<3 x half> undef, <3 x half> undef, <3 x half> undef) #2 +; GFX9-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4f16 = call <4 x half> @llvm.fma.v4f16(<4 x half> undef, <4 x half> undef, <4 x half> undef) #2 +; GFX9-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v5f16 = call <5 x half> @llvm.fma.v5f16(<5 x half> undef, <5 x half> undef, <5 x half> undef) #2 ; GFX9-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SLOW-SIZE-LABEL: 'fma_f16' ; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %f16 = call half @llvm.fma.f16(half undef, half undef, half undef) #2 ; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2f16 = call <2 x half> @llvm.fma.v2f16(<2 x half> undef, <2 x half> undef, <2 x half> undef) #2 ; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v3f16 = call <3 x half> @llvm.fma.v3f16(<3 x half> undef, <3 x half> undef, <3 x half> undef) #2 +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4f16 = call <4 x half> @llvm.fma.v4f16(<4 x half> undef, <4 x half> undef, <4 x half> undef) #2 +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v5f16 = call <5 x half> @llvm.fma.v5f16(<5 x half> undef, <5 x half> undef, <5 x half> undef) #2 ; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %f16 = call half @llvm.fma.f16(half undef, half undef, half undef) #1 %v2f16 = call <2 x half> @llvm.fma.v2f16(<2 x half> undef, <2 x half> undef, <2 x half> undef) #1 %v3f16 = call <3 x half> @llvm.fma.v3f16(<3 x half> undef, <3 x half> undef, <3 x half> undef) #1 + %v4f16 = call <4 x half> @llvm.fma.v4f16(<4 x half> undef, <4 x half> undef, <4 x half> undef) #1 + %v5f16 = call <5 x half> @llvm.fma.v5f16(<5 x half> undef, <5 x half> undef, <5 x half> undef) #1 ret void } declare float @llvm.fma.f32(float, float, float) #1 declare <2 x float> @llvm.fma.v2f32(<2 x float>, <2 x float>, <2 x float>) #1 declare <3 x float> @llvm.fma.v3f32(<3 x float>, <3 x float>, <3 x float>) #1 +declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>) #1 declare <5 x float> @llvm.fma.v5f32(<5 x float>, <5 x float>, <5 x float>) #1 declare double @llvm.fma.f64(double, double, double) #1 declare <2 x double> @llvm.fma.v2f64(<2 x double>, <2 x double>, <2 x double>) #1 declare <3 x double> @llvm.fma.v3f64(<3 x double>, <3 x double>, <3 x double>) #1 +declare <4 x double> @llvm.fma.v4f64(<4 x double>, <4 x double>, <4 x double>) #1 +declare <5 x double> @llvm.fma.v5f64(<5 x double>, <5 x double>, <5 x double>) #1 declare half @llvm.fma.f16(half, half, half) #1 declare <2 x half> @llvm.fma.v2f16(<2 x half>, <2 x half>, <2 x half>) #1 declare <3 x half> @llvm.fma.v3f16(<3 x half>, <3 x half>, <3 x half>) #1 +declare <4 x half> @llvm.fma.v4f16(<4 x half>, <4 x half>, <4 x half>) #1 +declare <5 x half> @llvm.fma.v5f16(<5 x half>, <5 x half>, <5 x half>) #1 attributes #0 = { nounwind } attributes #1 = { nounwind readnone } diff --git a/llvm/test/Analysis/CostModel/AMDGPU/fmul.ll b/llvm/test/Analysis/CostModel/AMDGPU/fmul.ll index 82e8f0f..5eb4aa7 100644 --- a/llvm/test/Analysis/CostModel/AMDGPU/fmul.ll +++ b/llvm/test/Analysis/CostModel/AMDGPU/fmul.ll @@ -12,6 +12,7 @@ define amdgpu_kernel void @fmul_f32() #0 { ; GFX90A-FASTF64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f32 = fmul float undef, undef ; GFX90A-FASTF64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f32 = fmul <2 x float> undef, undef ; GFX90A-FASTF64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v3f32 = fmul <3 x float> undef, undef +; GFX90A-FASTF64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4f32 = fmul <4 x float> undef, undef ; GFX90A-FASTF64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v5f32 = fmul <5 x float> undef, undef ; GFX90A-FASTF64-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void ; @@ -19,6 +20,7 @@ define amdgpu_kernel void @fmul_f32() #0 { ; F32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f32 = fmul float undef, undef ; F32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2f32 = fmul <2 x float> undef, undef ; F32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v3f32 = fmul <3 x float> undef, undef +; F32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4f32 = fmul <4 x float> undef, undef ; F32-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v5f32 = fmul <5 x float> undef, undef ; F32-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void ; @@ -26,6 +28,7 @@ define amdgpu_kernel void @fmul_f32() #0 { ; GFX90A-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f32 = fmul float undef, undef ; GFX90A-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f32 = fmul <2 x float> undef, undef ; GFX90A-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v3f32 = fmul <3 x float> undef, undef +; GFX90A-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4f32 = fmul <4 x float> undef, undef ; GFX90A-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v5f32 = fmul <5 x float> undef, undef ; GFX90A-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; @@ -33,12 +36,14 @@ define amdgpu_kernel void @fmul_f32() #0 { ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f32 = fmul float undef, undef ; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2f32 = fmul <2 x float> undef, undef ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v3f32 = fmul <3 x float> undef, undef +; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4f32 = fmul <4 x float> undef, undef ; SIZE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v5f32 = fmul <5 x float> undef, undef ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %f32 = fmul float undef, undef %v2f32 = fmul <2 x float> undef, undef %v3f32 = fmul <3 x float> undef, undef + %v4f32 = fmul <4 x float> undef, undef %v5f32 = fmul <5 x float> undef, undef ret void } @@ -48,35 +53,47 @@ define amdgpu_kernel void @fmul_f64() #0 { ; GFX90A-FASTF64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f64 = fmul double undef, undef ; GFX90A-FASTF64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2f64 = fmul <2 x double> undef, undef ; GFX90A-FASTF64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v3f64 = fmul <3 x double> undef, undef +; GFX90A-FASTF64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4f64 = fmul <4 x double> undef, undef +; GFX90A-FASTF64-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v5f64 = fmul <5 x double> undef, undef ; GFX90A-FASTF64-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void ; ; FASTF64-LABEL: 'fmul_f64' ; FASTF64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %f64 = fmul double undef, undef ; FASTF64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2f64 = fmul <2 x double> undef, undef ; FASTF64-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v3f64 = fmul <3 x double> undef, undef +; FASTF64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4f64 = fmul <4 x double> undef, undef +; FASTF64-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v5f64 = fmul <5 x double> undef, undef ; FASTF64-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void ; ; SLOW-LABEL: 'fmul_f64' ; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %f64 = fmul double undef, undef ; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v2f64 = fmul <2 x double> undef, undef ; SLOW-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v3f64 = fmul <3 x double> undef, undef +; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v4f64 = fmul <4 x double> undef, undef +; SLOW-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v5f64 = fmul <5 x double> undef, undef ; SLOW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void ; ; GFX90A-SIZE-LABEL: 'fmul_f64' ; GFX90A-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f64 = fmul double undef, undef ; GFX90A-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2f64 = fmul <2 x double> undef, undef ; GFX90A-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v3f64 = fmul <3 x double> undef, undef +; GFX90A-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4f64 = fmul <4 x double> undef, undef +; GFX90A-SIZE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v5f64 = fmul <5 x double> undef, undef ; GFX90A-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SIZE-LABEL: 'fmul_f64' ; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %f64 = fmul double undef, undef ; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2f64 = fmul <2 x double> undef, undef ; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v3f64 = fmul <3 x double> undef, undef +; SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4f64 = fmul <4 x double> undef, undef +; SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v5f64 = fmul <5 x double> undef, undef ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %f64 = fmul double undef, undef %v2f64 = fmul <2 x double> undef, undef %v3f64 = fmul <3 x double> undef, undef + %v4f64 = fmul <4 x double> undef, undef + %v5f64 = fmul <5 x double> undef, undef ret void } @@ -86,6 +103,7 @@ define amdgpu_kernel void @fmul_f16() #0 { ; GFX9-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f16 = fmul <2 x half> undef, undef ; GFX9-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v3f16 = fmul <3 x half> undef, undef ; GFX9-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4f16 = fmul <4 x half> undef, undef +; GFX9-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v5f16 = fmul <5 x half> undef, undef ; GFX9-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void ; ; SLOW-LABEL: 'fmul_f16' @@ -93,6 +111,7 @@ define amdgpu_kernel void @fmul_f16() #0 { ; SLOW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2f16 = fmul <2 x half> undef, undef ; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v3f16 = fmul <3 x half> undef, undef ; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4f16 = fmul <4 x half> undef, undef +; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v5f16 = fmul <5 x half> undef, undef ; SLOW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void ; ; GFX9-SIZE-LABEL: 'fmul_f16' @@ -100,6 +119,7 @@ define amdgpu_kernel void @fmul_f16() #0 { ; GFX9-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f16 = fmul <2 x half> undef, undef ; GFX9-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v3f16 = fmul <3 x half> undef, undef ; GFX9-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4f16 = fmul <4 x half> undef, undef +; GFX9-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v5f16 = fmul <5 x half> undef, undef ; GFX9-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SLOW-SIZE-LABEL: 'fmul_f16' @@ -107,12 +127,14 @@ define amdgpu_kernel void @fmul_f16() #0 { ; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2f16 = fmul <2 x half> undef, undef ; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v3f16 = fmul <3 x half> undef, undef ; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4f16 = fmul <4 x half> undef, undef +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v5f16 = fmul <5 x half> undef, undef ; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %f16 = fmul half undef, undef %v2f16 = fmul <2 x half> undef, undef %v3f16 = fmul <3 x half> undef, undef %v4f16 = fmul <4 x half> undef, undef + %v5f16 = fmul <5 x half> undef, undef ret void } diff --git a/llvm/test/Analysis/CostModel/AMDGPU/fneg.ll b/llvm/test/Analysis/CostModel/AMDGPU/fneg.ll index 970c8d9..b448290 100644 --- a/llvm/test/Analysis/CostModel/AMDGPU/fneg.ll +++ b/llvm/test/Analysis/CostModel/AMDGPU/fneg.ll @@ -8,6 +8,7 @@ define amdgpu_kernel void @fneg_f32() { ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %f32 = fneg float undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2f32 = fneg <2 x float> undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v3f32 = fneg <3 x float> undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4f32 = fneg <4 x float> undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v5f32 = fneg <5 x float> undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void ; @@ -15,12 +16,14 @@ define amdgpu_kernel void @fneg_f32() { ; SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %f32 = fneg float undef ; SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2f32 = fneg <2 x float> undef ; SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v3f32 = fneg <3 x float> undef +; SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4f32 = fneg <4 x float> undef ; SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v5f32 = fneg <5 x float> undef ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %f32 = fneg float undef %v2f32 = fneg <2 x float> undef %v3f32 = fneg <3 x float> undef + %v4f32 = fneg <4 x float> undef %v5f32 = fneg <5 x float> undef ret void } @@ -30,17 +33,23 @@ define amdgpu_kernel void @fneg_f64() { ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %f64 = fneg double undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2f64 = fneg <2 x double> undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v3f64 = fneg <3 x double> undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4f64 = fneg <4 x double> undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v5f64 = fneg <5 x double> undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void ; ; SIZE-LABEL: 'fneg_f64' ; SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %f64 = fneg double undef ; SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2f64 = fneg <2 x double> undef ; SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v3f64 = fneg <3 x double> undef +; SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4f64 = fneg <4 x double> undef +; SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v5f64 = fneg <5 x double> undef ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %f64 = fneg double undef %v2f64 = fneg <2 x double> undef %v3f64 = fneg <3 x double> undef + %v4f64 = fneg <4 x double> undef + %v5f64 = fneg <5 x double> undef ret void } @@ -49,16 +58,22 @@ define amdgpu_kernel void @fneg_f16() { ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %f16 = fneg half undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2f16 = fneg <2 x half> undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v3f16 = fneg <3 x half> undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4f16 = fneg <4 x half> undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v5f16 = fneg <5 x half> undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void ; ; SIZE-LABEL: 'fneg_f16' ; SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %f16 = fneg half undef ; SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2f16 = fneg <2 x half> undef ; SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v3f16 = fneg <3 x half> undef +; SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4f16 = fneg <4 x half> undef +; SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v5f16 = fneg <5 x half> undef ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %f16 = fneg half undef %v2f16 = fneg <2 x half> undef %v3f16 = fneg <3 x half> undef + %v4f16 = fneg <4 x half> undef + %v5f16 = fneg <5 x half> undef ret void } diff --git a/llvm/test/Analysis/CostModel/AMDGPU/fsub.ll b/llvm/test/Analysis/CostModel/AMDGPU/fsub.ll index 2340b377..a10ef8e 100644 --- a/llvm/test/Analysis/CostModel/AMDGPU/fsub.ll +++ b/llvm/test/Analysis/CostModel/AMDGPU/fsub.ll @@ -12,6 +12,7 @@ define amdgpu_kernel void @fsub_f32() #0 { ; GFX90A-FASTF64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f32 = fsub float undef, undef ; GFX90A-FASTF64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f32 = fsub <2 x float> undef, undef ; GFX90A-FASTF64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v3f32 = fsub <3 x float> undef, undef +; GFX90A-FASTF64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4f32 = fsub <4 x float> undef, undef ; GFX90A-FASTF64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v5f32 = fsub <5 x float> undef, undef ; GFX90A-FASTF64-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void ; @@ -19,6 +20,7 @@ define amdgpu_kernel void @fsub_f32() #0 { ; NOPACKEDF32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f32 = fsub float undef, undef ; NOPACKEDF32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2f32 = fsub <2 x float> undef, undef ; NOPACKEDF32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v3f32 = fsub <3 x float> undef, undef +; NOPACKEDF32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4f32 = fsub <4 x float> undef, undef ; NOPACKEDF32-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v5f32 = fsub <5 x float> undef, undef ; NOPACKEDF32-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void ; @@ -26,6 +28,7 @@ define amdgpu_kernel void @fsub_f32() #0 { ; GFX90A-FASTF64-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f32 = fsub float undef, undef ; GFX90A-FASTF64-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f32 = fsub <2 x float> undef, undef ; GFX90A-FASTF64-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v3f32 = fsub <3 x float> undef, undef +; GFX90A-FASTF64-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4f32 = fsub <4 x float> undef, undef ; GFX90A-FASTF64-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v5f32 = fsub <5 x float> undef, undef ; GFX90A-FASTF64-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; @@ -33,12 +36,14 @@ define amdgpu_kernel void @fsub_f32() #0 { ; NOPACKEDF32-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f32 = fsub float undef, undef ; NOPACKEDF32-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2f32 = fsub <2 x float> undef, undef ; NOPACKEDF32-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v3f32 = fsub <3 x float> undef, undef +; NOPACKEDF32-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4f32 = fsub <4 x float> undef, undef ; NOPACKEDF32-SIZE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v5f32 = fsub <5 x float> undef, undef ; NOPACKEDF32-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %f32 = fsub float undef, undef %v2f32 = fsub <2 x float> undef, undef %v3f32 = fsub <3 x float> undef, undef + %v4f32 = fsub <4 x float> undef, undef %v5f32 = fsub <5 x float> undef, undef ret void } @@ -48,35 +53,47 @@ define amdgpu_kernel void @fsub_f64() #0 { ; GFX90A-FASTF64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f64 = fsub double undef, undef ; GFX90A-FASTF64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2f64 = fsub <2 x double> undef, undef ; GFX90A-FASTF64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v3f64 = fsub <3 x double> undef, undef +; GFX90A-FASTF64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4f64 = fsub <4 x double> undef, undef +; GFX90A-FASTF64-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v5f64 = fsub <5 x double> undef, undef ; GFX90A-FASTF64-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void ; ; FASTF64-LABEL: 'fsub_f64' ; FASTF64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %f64 = fsub double undef, undef ; FASTF64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2f64 = fsub <2 x double> undef, undef ; FASTF64-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v3f64 = fsub <3 x double> undef, undef +; FASTF64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4f64 = fsub <4 x double> undef, undef +; FASTF64-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v5f64 = fsub <5 x double> undef, undef ; FASTF64-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void ; ; SLOWF64-LABEL: 'fsub_f64' ; SLOWF64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %f64 = fsub double undef, undef ; SLOWF64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v2f64 = fsub <2 x double> undef, undef ; SLOWF64-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v3f64 = fsub <3 x double> undef, undef +; SLOWF64-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v4f64 = fsub <4 x double> undef, undef +; SLOWF64-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v5f64 = fsub <5 x double> undef, undef ; SLOWF64-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void ; ; GFX90A-FASTF64-SIZE-LABEL: 'fsub_f64' ; GFX90A-FASTF64-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f64 = fsub double undef, undef ; GFX90A-FASTF64-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2f64 = fsub <2 x double> undef, undef ; GFX90A-FASTF64-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v3f64 = fsub <3 x double> undef, undef +; GFX90A-FASTF64-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4f64 = fsub <4 x double> undef, undef +; GFX90A-FASTF64-SIZE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v5f64 = fsub <5 x double> undef, undef ; GFX90A-FASTF64-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; NOPACKEDF32-SIZE-LABEL: 'fsub_f64' ; NOPACKEDF32-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %f64 = fsub double undef, undef ; NOPACKEDF32-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2f64 = fsub <2 x double> undef, undef ; NOPACKEDF32-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v3f64 = fsub <3 x double> undef, undef +; NOPACKEDF32-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4f64 = fsub <4 x double> undef, undef +; NOPACKEDF32-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v5f64 = fsub <5 x double> undef, undef ; NOPACKEDF32-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %f64 = fsub double undef, undef %v2f64 = fsub <2 x double> undef, undef %v3f64 = fsub <3 x double> undef, undef + %v4f64 = fsub <4 x double> undef, undef + %v5f64 = fsub <5 x double> undef, undef ret void } @@ -86,6 +103,7 @@ define amdgpu_kernel void @fsub_f16() #0 { ; FASTF16-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f16 = fsub <2 x half> undef, undef ; FASTF16-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v3f16 = fsub <3 x half> undef, undef ; FASTF16-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4f16 = fsub <4 x half> undef, undef +; FASTF16-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %v5f16 = fsub <5 x half> undef, undef ; FASTF16-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void ; ; SLOWF64-LABEL: 'fsub_f16' @@ -93,6 +111,7 @@ define amdgpu_kernel void @fsub_f16() #0 { ; SLOWF64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2f16 = fsub <2 x half> undef, undef ; SLOWF64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v3f16 = fsub <3 x half> undef, undef ; SLOWF64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4f16 = fsub <4 x half> undef, undef +; SLOWF64-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v5f16 = fsub <5 x half> undef, undef ; SLOWF64-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void ; ; FASTF16-SIZE-LABEL: 'fsub_f16' @@ -100,6 +119,7 @@ define amdgpu_kernel void @fsub_f16() #0 { ; FASTF16-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f16 = fsub <2 x half> undef, undef ; FASTF16-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v3f16 = fsub <3 x half> undef, undef ; FASTF16-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4f16 = fsub <4 x half> undef, undef +; FASTF16-SIZE-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %v5f16 = fsub <5 x half> undef, undef ; FASTF16-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SLOWF64-SIZE-LABEL: 'fsub_f16' @@ -107,11 +127,13 @@ define amdgpu_kernel void @fsub_f16() #0 { ; SLOWF64-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2f16 = fsub <2 x half> undef, undef ; SLOWF64-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v3f16 = fsub <3 x half> undef, undef ; SLOWF64-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4f16 = fsub <4 x half> undef, undef +; SLOWF64-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v5f16 = fsub <5 x half> undef, undef ; SLOWF64-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %f16 = fsub half undef, undef %v2f16 = fsub <2 x half> undef, undef %v3f16 = fsub <3 x half> undef, undef %v4f16 = fsub <4 x half> undef, undef + %v5f16 = fsub <5 x half> undef, undef ret void } diff --git a/llvm/test/Analysis/CostModel/AMDGPU/fused_costs.ll b/llvm/test/Analysis/CostModel/AMDGPU/fused_costs.ll index 50fccb9..86db730 100644 --- a/llvm/test/Analysis/CostModel/AMDGPU/fused_costs.ll +++ b/llvm/test/Analysis/CostModel/AMDGPU/fused_costs.ll @@ -22,6 +22,8 @@ define void @fmul_fadd_f32() #0 { ; SLOWF32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2f32add = fadd <2 x float> %v2f32, undef ; SLOWF32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %f32_2 = fmul float undef, undef ; SLOWF32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f32sub = fsub float %f32_2, undef +; SLOWF32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %f32c_2 = fmul contract float undef, undef +; SLOWF32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f32csub = fsub contract float %f32c_2, undef ; SLOWF32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2f32_2 = fmul <2 x float> undef, undef ; SLOWF32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2f32sub = fsub <2 x float> %v2f32_2, undef ; SLOWF32-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void @@ -35,6 +37,8 @@ define void @fmul_fadd_f32() #0 { ; FASTF32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2f32add = fadd <2 x float> %v2f32, undef ; FASTF32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f32_2 = fmul float undef, undef ; FASTF32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f32sub = fsub float %f32_2, undef +; FASTF32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %f32c_2 = fmul contract float undef, undef +; FASTF32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f32csub = fsub contract float %f32c_2, undef ; FASTF32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2f32_2 = fmul <2 x float> undef, undef ; FASTF32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2f32sub = fsub <2 x float> %v2f32_2, undef ; FASTF32-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void @@ -48,6 +52,8 @@ define void @fmul_fadd_f32() #0 { ; SLOWF32-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2f32add = fadd <2 x float> %v2f32, undef ; SLOWF32-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %f32_2 = fmul float undef, undef ; SLOWF32-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f32sub = fsub float %f32_2, undef +; SLOWF32-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %f32c_2 = fmul contract float undef, undef +; SLOWF32-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f32csub = fsub contract float %f32c_2, undef ; SLOWF32-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2f32_2 = fmul <2 x float> undef, undef ; SLOWF32-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2f32sub = fsub <2 x float> %v2f32_2, undef ; SLOWF32-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void @@ -61,18 +67,27 @@ define void @fmul_fadd_f32() #0 { ; FASTF32-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2f32add = fadd <2 x float> %v2f32, undef ; FASTF32-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f32_2 = fmul float undef, undef ; FASTF32-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f32sub = fsub float %f32_2, undef +; FASTF32-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %f32c_2 = fmul contract float undef, undef +; FASTF32-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f32csub = fsub contract float %f32c_2, undef ; FASTF32-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2f32_2 = fmul <2 x float> undef, undef ; FASTF32-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2f32sub = fsub <2 x float> %v2f32_2, undef ; FASTF32-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %f32 = fmul float undef, undef %f32add = fadd float %f32, undef + %f32c = fmul contract float undef, undef %f32cadd = fadd contract float %f32c, undef + %v2f32 = fmul <2 x float> undef, undef %v2f32add = fadd <2 x float> %v2f32, undef + %f32_2 = fmul float undef, undef %f32sub = fsub float %f32_2, undef + + %f32c_2 = fmul contract float undef, undef + %f32csub = fsub contract float %f32c_2, undef + %v2f32_2 = fmul <2 x float> undef, undef %v2f32sub = fsub <2 x float> %v2f32_2, undef ret void @@ -88,6 +103,8 @@ define void @fmul_fadd_f16() #0 { ; FUSED-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f16add = fadd <2 x half> %v2f16, undef ; FUSED-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %f16_2 = fmul half undef, undef ; FUSED-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f16sub = fsub half %f16_2, undef +; FUSED-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %f16c_2 = fmul contract half undef, undef +; FUSED-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f15csub = fsub contract half %f16c_2, undef ; FUSED-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2f16_2 = fmul <2 x half> undef, undef ; FUSED-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f16sub = fsub <2 x half> %v2f16_2, undef ; FUSED-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void @@ -101,6 +118,8 @@ define void @fmul_fadd_f16() #0 { ; GFX9SLOW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f16add = fadd <2 x half> %v2f16, undef ; GFX9SLOW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f16_2 = fmul half undef, undef ; GFX9SLOW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f16sub = fsub half %f16_2, undef +; GFX9SLOW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %f16c_2 = fmul contract half undef, undef +; GFX9SLOW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f15csub = fsub contract half %f16c_2, undef ; GFX9SLOW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f16_2 = fmul <2 x half> undef, undef ; GFX9SLOW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f16sub = fsub <2 x half> %v2f16_2, undef ; GFX9SLOW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void @@ -114,6 +133,8 @@ define void @fmul_fadd_f16() #0 { ; FUSED-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f16add = fadd <2 x half> %v2f16, undef ; FUSED-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %f16_2 = fmul half undef, undef ; FUSED-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f16sub = fsub half %f16_2, undef +; FUSED-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %f16c_2 = fmul contract half undef, undef +; FUSED-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f15csub = fsub contract half %f16c_2, undef ; FUSED-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2f16_2 = fmul <2 x half> undef, undef ; FUSED-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f16sub = fsub <2 x half> %v2f16_2, undef ; FUSED-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void @@ -127,18 +148,27 @@ define void @fmul_fadd_f16() #0 { ; GFX9SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f16add = fadd <2 x half> %v2f16, undef ; GFX9SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f16_2 = fmul half undef, undef ; GFX9SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f16sub = fsub half %f16_2, undef +; GFX9SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %f16c_2 = fmul contract half undef, undef +; GFX9SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f15csub = fsub contract half %f16c_2, undef ; GFX9SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f16_2 = fmul <2 x half> undef, undef ; GFX9SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f16sub = fsub <2 x half> %v2f16_2, undef ; GFX9SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %f16 = fmul half undef, undef %f16add = fadd half %f16, undef + %f16c = fmul contract half undef, undef %f15cadd = fadd contract half %f16c, undef + %v2f16 = fmul <2 x half> undef, undef %v2f16add = fadd <2 x half> %v2f16, undef + %f16_2 = fmul half undef, undef %f16sub = fsub half %f16_2, undef + + %f16c_2 = fmul contract half undef, undef + %f15csub = fsub contract half %f16c_2, undef + %v2f16_2 = fmul <2 x half> undef, undef %v2f16sub = fsub <2 x half> %v2f16_2, undef ret void @@ -154,6 +184,8 @@ define void @fmul_fadd_f64() #0 { ; SLOWF64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v2f64add = fadd <2 x double> %v2f64, undef ; SLOWF64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %f64_2 = fmul double undef, undef ; SLOWF64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %f64sub = fsub double %f64_2, undef +; SLOWF64-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %f64c_2 = fmul contract double undef, undef +; SLOWF64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %f64csub = fsub contract double %f64c_2, undef ; SLOWF64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v2f64_2 = fmul <2 x double> undef, undef ; SLOWF64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v2f64sub = fsub <2 x double> %v2f64_2, undef ; SLOWF64-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void @@ -167,6 +199,8 @@ define void @fmul_fadd_f64() #0 { ; GFX9FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v2f64add = fadd <2 x double> %v2f64, undef ; GFX9FAST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %f64_2 = fmul double undef, undef ; GFX9FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %f64sub = fsub double %f64_2, undef +; GFX9FAST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %f64c_2 = fmul contract double undef, undef +; GFX9FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %f64csub = fsub contract double %f64c_2, undef ; GFX9FAST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2f64_2 = fmul <2 x double> undef, undef ; GFX9FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v2f64sub = fsub <2 x double> %v2f64_2, undef ; GFX9FAST-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void @@ -180,6 +214,8 @@ define void @fmul_fadd_f64() #0 { ; SLOWF64-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2f64add = fadd <2 x double> %v2f64, undef ; SLOWF64-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %f64_2 = fmul double undef, undef ; SLOWF64-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %f64sub = fsub double %f64_2, undef +; SLOWF64-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %f64c_2 = fmul contract double undef, undef +; SLOWF64-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %f64csub = fsub contract double %f64c_2, undef ; SLOWF64-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2f64_2 = fmul <2 x double> undef, undef ; SLOWF64-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2f64sub = fsub <2 x double> %v2f64_2, undef ; SLOWF64-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void @@ -193,18 +229,27 @@ define void @fmul_fadd_f64() #0 { ; GFX9FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2f64add = fadd <2 x double> %v2f64, undef ; GFX9FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %f64_2 = fmul double undef, undef ; GFX9FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %f64sub = fsub double %f64_2, undef +; GFX9FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %f64c_2 = fmul contract double undef, undef +; GFX9FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %f64csub = fsub contract double %f64c_2, undef ; GFX9FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2f64_2 = fmul <2 x double> undef, undef ; GFX9FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2f64sub = fsub <2 x double> %v2f64_2, undef ; GFX9FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %f64 = fmul double undef, undef %f64add = fadd double %f64, undef + %f64c = fmul contract double undef, undef %f64cadd = fadd contract double %f64c, undef + %v2f64 = fmul <2 x double> undef, undef %v2f64add = fadd <2 x double> %v2f64, undef + %f64_2 = fmul double undef, undef %f64sub = fsub double %f64_2, undef + + %f64c_2 = fmul contract double undef, undef + %f64csub = fsub contract double %f64c_2, undef + %v2f64_2 = fmul <2 x double> undef, undef %v2f64sub = fsub <2 x double> %v2f64_2, undef ret void diff --git a/llvm/test/Analysis/CostModel/AMDGPU/insertelement.ll b/llvm/test/Analysis/CostModel/AMDGPU/insertelement.ll index e0f6420..1bdbb5b 100644 --- a/llvm/test/Analysis/CostModel/AMDGPU/insertelement.ll +++ b/llvm/test/Analysis/CostModel/AMDGPU/insertelement.ll @@ -1,49 +1,222 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py -; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa %s | FileCheck -check-prefixes=CI %s -; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=fiji %s | FileCheck -check-prefixes=GFX89 %s -; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 %s | FileCheck -check-prefixes=GFX89 %s -; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa %s | FileCheck -check-prefixes=CI-SIZE %s -; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=fiji %s | FileCheck -check-prefixes=GFX89-SIZE %s -; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 %s | FileCheck -check-prefixes=GFX89-SIZE %s +; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa %s | FileCheck -check-prefixes=ALL,CI %s +; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=fiji %s | FileCheck -check-prefixes=ALL,GFX89 %s +; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 %s | FileCheck -check-prefixes=ALL,GFX89 %s +; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa %s | FileCheck -check-prefixes=ALL-SIZE,CI-SIZE %s +; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=fiji %s | FileCheck -check-prefixes=ALL-SIZE,GFX89-SIZE %s +; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 %s | FileCheck -check-prefixes=ALL-SIZE,GFX89-SIZE %s ; END. -define amdgpu_kernel void @insertelement_v2() { -; CI-LABEL: 'insertelement_v2' -; CI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2i32_1 = insertelement <2 x i32> undef, i32 123, i32 1 -; CI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2i64_1 = insertelement <2 x i64> undef, i64 123, i64 1 -; CI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i16_0 = insertelement <2 x i16> undef, i16 123, i16 0 -; CI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i16_1 = insertelement <2 x i16> undef, i16 123, i16 1 -; CI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i8_1 = insertelement <2 x i8> undef, i8 123, i8 1 +define amdgpu_kernel void @insertelement_i8(i32 %arg) { +; ALL-LABEL: 'insertelement_i8' +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i8_0 = insertelement <2 x i8> undef, i8 42, i32 0 +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v3i8_0 = insertelement <3 x i8> undef, i8 42, i32 0 +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i8_0 = insertelement <4 x i8> undef, i8 42, i32 0 +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v5i8_0 = insertelement <5 x i8> undef, i8 42, i32 0 +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i8_1 = insertelement <2 x i8> undef, i8 42, i32 1 +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v3i8_1 = insertelement <3 x i8> undef, i8 42, i32 1 +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i8_1 = insertelement <4 x i8> undef, i8 42, i32 1 +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v5i8_1 = insertelement <5 x i8> undef, i8 42, i32 1 +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i8_a = insertelement <2 x i8> undef, i8 42, i32 %arg +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v3i8_a = insertelement <3 x i8> undef, i8 42, i32 %arg +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i8_a = insertelement <4 x i8> undef, i8 42, i32 %arg +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v5i8_a = insertelement <5 x i8> undef, i8 42, i32 %arg +; ALL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void +; +; ALL-SIZE-LABEL: 'insertelement_i8' +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i8_0 = insertelement <2 x i8> undef, i8 42, i32 0 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v3i8_0 = insertelement <3 x i8> undef, i8 42, i32 0 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i8_0 = insertelement <4 x i8> undef, i8 42, i32 0 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v5i8_0 = insertelement <5 x i8> undef, i8 42, i32 0 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i8_1 = insertelement <2 x i8> undef, i8 42, i32 1 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v3i8_1 = insertelement <3 x i8> undef, i8 42, i32 1 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i8_1 = insertelement <4 x i8> undef, i8 42, i32 1 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v5i8_1 = insertelement <5 x i8> undef, i8 42, i32 1 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i8_a = insertelement <2 x i8> undef, i8 42, i32 %arg +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v3i8_a = insertelement <3 x i8> undef, i8 42, i32 %arg +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i8_a = insertelement <4 x i8> undef, i8 42, i32 %arg +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v5i8_a = insertelement <5 x i8> undef, i8 42, i32 %arg +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %v2i8_0 = insertelement <2 x i8> undef, i8 42, i32 0 + %v3i8_0 = insertelement <3 x i8> undef, i8 42, i32 0 + %v4i8_0 = insertelement <4 x i8> undef, i8 42, i32 0 + %v5i8_0 = insertelement <5 x i8> undef, i8 42, i32 0 + %v2i8_1 = insertelement <2 x i8> undef, i8 42, i32 1 + %v3i8_1 = insertelement <3 x i8> undef, i8 42, i32 1 + %v4i8_1 = insertelement <4 x i8> undef, i8 42, i32 1 + %v5i8_1 = insertelement <5 x i8> undef, i8 42, i32 1 + %v2i8_a = insertelement <2 x i8> undef, i8 42, i32 %arg + %v3i8_a = insertelement <3 x i8> undef, i8 42, i32 %arg + %v4i8_a = insertelement <4 x i8> undef, i8 42, i32 %arg + %v5i8_a = insertelement <5 x i8> undef, i8 42, i32 %arg + ret void +} + +define amdgpu_kernel void @insertelement_i16(i32 %arg) { +; CI-LABEL: 'insertelement_i16' +; CI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i16_0 = insertelement <2 x i16> undef, i16 42, i32 0 +; CI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v3i16_0 = insertelement <3 x i16> undef, i16 42, i32 0 +; CI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i16_0 = insertelement <4 x i16> undef, i16 42, i32 0 +; CI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v5i16_0 = insertelement <5 x i16> undef, i16 42, i32 0 +; CI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i16_1 = insertelement <2 x i16> undef, i16 42, i32 1 +; CI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v3i16_1 = insertelement <3 x i16> undef, i16 42, i32 1 +; CI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i16_1 = insertelement <4 x i16> undef, i16 42, i32 1 +; CI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v5i16_1 = insertelement <5 x i16> undef, i16 42, i32 1 +; CI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i16_a = insertelement <2 x i16> undef, i16 42, i32 %arg +; CI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v3i16_a = insertelement <3 x i16> undef, i16 42, i32 %arg +; CI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i16_a = insertelement <4 x i16> undef, i16 42, i32 %arg +; CI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v5i16_a = insertelement <5 x i16> undef, i16 42, i32 %arg ; CI-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void ; -; GFX89-LABEL: 'insertelement_v2' -; GFX89-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2i32_1 = insertelement <2 x i32> undef, i32 123, i32 1 -; GFX89-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2i64_1 = insertelement <2 x i64> undef, i64 123, i64 1 -; GFX89-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2i16_0 = insertelement <2 x i16> undef, i16 123, i16 0 -; GFX89-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i16_1 = insertelement <2 x i16> undef, i16 123, i16 1 -; GFX89-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i8_1 = insertelement <2 x i8> undef, i8 123, i8 1 +; GFX89-LABEL: 'insertelement_i16' +; GFX89-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2i16_0 = insertelement <2 x i16> undef, i16 42, i32 0 +; GFX89-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v3i16_0 = insertelement <3 x i16> undef, i16 42, i32 0 +; GFX89-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4i16_0 = insertelement <4 x i16> undef, i16 42, i32 0 +; GFX89-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v5i16_0 = insertelement <5 x i16> undef, i16 42, i32 0 +; GFX89-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i16_1 = insertelement <2 x i16> undef, i16 42, i32 1 +; GFX89-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v3i16_1 = insertelement <3 x i16> undef, i16 42, i32 1 +; GFX89-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i16_1 = insertelement <4 x i16> undef, i16 42, i32 1 +; GFX89-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v5i16_1 = insertelement <5 x i16> undef, i16 42, i32 1 +; GFX89-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i16_a = insertelement <2 x i16> undef, i16 42, i32 %arg +; GFX89-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v3i16_a = insertelement <3 x i16> undef, i16 42, i32 %arg +; GFX89-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i16_a = insertelement <4 x i16> undef, i16 42, i32 %arg +; GFX89-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v5i16_a = insertelement <5 x i16> undef, i16 42, i32 %arg ; GFX89-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void ; -; CI-SIZE-LABEL: 'insertelement_v2' -; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2i32_1 = insertelement <2 x i32> undef, i32 123, i32 1 -; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2i64_1 = insertelement <2 x i64> undef, i64 123, i64 1 -; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i16_0 = insertelement <2 x i16> undef, i16 123, i16 0 -; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i16_1 = insertelement <2 x i16> undef, i16 123, i16 1 -; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i8_1 = insertelement <2 x i8> undef, i8 123, i8 1 +; CI-SIZE-LABEL: 'insertelement_i16' +; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i16_0 = insertelement <2 x i16> undef, i16 42, i32 0 +; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v3i16_0 = insertelement <3 x i16> undef, i16 42, i32 0 +; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i16_0 = insertelement <4 x i16> undef, i16 42, i32 0 +; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v5i16_0 = insertelement <5 x i16> undef, i16 42, i32 0 +; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i16_1 = insertelement <2 x i16> undef, i16 42, i32 1 +; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v3i16_1 = insertelement <3 x i16> undef, i16 42, i32 1 +; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i16_1 = insertelement <4 x i16> undef, i16 42, i32 1 +; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v5i16_1 = insertelement <5 x i16> undef, i16 42, i32 1 +; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i16_a = insertelement <2 x i16> undef, i16 42, i32 %arg +; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v3i16_a = insertelement <3 x i16> undef, i16 42, i32 %arg +; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i16_a = insertelement <4 x i16> undef, i16 42, i32 %arg +; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v5i16_a = insertelement <5 x i16> undef, i16 42, i32 %arg ; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; -; GFX89-SIZE-LABEL: 'insertelement_v2' -; GFX89-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2i32_1 = insertelement <2 x i32> undef, i32 123, i32 1 -; GFX89-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2i64_1 = insertelement <2 x i64> undef, i64 123, i64 1 -; GFX89-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2i16_0 = insertelement <2 x i16> undef, i16 123, i16 0 -; GFX89-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i16_1 = insertelement <2 x i16> undef, i16 123, i16 1 -; GFX89-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i8_1 = insertelement <2 x i8> undef, i8 123, i8 1 +; GFX89-SIZE-LABEL: 'insertelement_i16' +; GFX89-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2i16_0 = insertelement <2 x i16> undef, i16 42, i32 0 +; GFX89-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v3i16_0 = insertelement <3 x i16> undef, i16 42, i32 0 +; GFX89-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4i16_0 = insertelement <4 x i16> undef, i16 42, i32 0 +; GFX89-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v5i16_0 = insertelement <5 x i16> undef, i16 42, i32 0 +; GFX89-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i16_1 = insertelement <2 x i16> undef, i16 42, i32 1 +; GFX89-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v3i16_1 = insertelement <3 x i16> undef, i16 42, i32 1 +; GFX89-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i16_1 = insertelement <4 x i16> undef, i16 42, i32 1 +; GFX89-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v5i16_1 = insertelement <5 x i16> undef, i16 42, i32 1 +; GFX89-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i16_a = insertelement <2 x i16> undef, i16 42, i32 %arg +; GFX89-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v3i16_a = insertelement <3 x i16> undef, i16 42, i32 %arg +; GFX89-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i16_a = insertelement <4 x i16> undef, i16 42, i32 %arg +; GFX89-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v5i16_a = insertelement <5 x i16> undef, i16 42, i32 %arg ; GFX89-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; - %v2i32_1 = insertelement <2 x i32> undef, i32 123, i32 1 - %v2i64_1 = insertelement <2 x i64> undef, i64 123, i64 1 - %v2i16_0 = insertelement <2 x i16> undef, i16 123, i16 0 - %v2i16_1 = insertelement <2 x i16> undef, i16 123, i16 1 - %v2i8_1 = insertelement <2 x i8> undef, i8 123, i8 1 + %v2i16_0 = insertelement <2 x i16> undef, i16 42, i32 0 + %v3i16_0 = insertelement <3 x i16> undef, i16 42, i32 0 + %v4i16_0 = insertelement <4 x i16> undef, i16 42, i32 0 + %v5i16_0 = insertelement <5 x i16> undef, i16 42, i32 0 + %v2i16_1 = insertelement <2 x i16> undef, i16 42, i32 1 + %v3i16_1 = insertelement <3 x i16> undef, i16 42, i32 1 + %v4i16_1 = insertelement <4 x i16> undef, i16 42, i32 1 + %v5i16_1 = insertelement <5 x i16> undef, i16 42, i32 1 + %v2i16_a = insertelement <2 x i16> undef, i16 42, i32 %arg + %v3i16_a = insertelement <3 x i16> undef, i16 42, i32 %arg + %v4i16_a = insertelement <4 x i16> undef, i16 42, i32 %arg + %v5i16_a = insertelement <5 x i16> undef, i16 42, i32 %arg + ret void +} + +define amdgpu_kernel void @insertelement_i32(i32 %arg) { +; ALL-LABEL: 'insertelement_i32' +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2i32_0 = insertelement <2 x i32> undef, i32 42, i32 0 +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v3i32_0 = insertelement <3 x i32> undef, i32 42, i32 0 +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4i32_0 = insertelement <4 x i32> undef, i32 42, i32 0 +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v5i32_0 = insertelement <5 x i32> undef, i32 42, i32 0 +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2i32_1 = insertelement <2 x i32> undef, i32 42, i32 1 +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v3i32_1 = insertelement <3 x i32> undef, i32 42, i32 1 +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4i32_1 = insertelement <4 x i32> undef, i32 42, i32 1 +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v5i32_1 = insertelement <5 x i32> undef, i32 42, i32 1 +; ALL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i32_a = insertelement <2 x i32> undef, i32 42, i32 %arg +; ALL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v3i32_a = insertelement <3 x i32> undef, i32 42, i32 %arg +; ALL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i32_a = insertelement <4 x i32> undef, i32 42, i32 %arg +; ALL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v5i32_a = insertelement <5 x i32> undef, i32 42, i32 %arg +; ALL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void +; +; ALL-SIZE-LABEL: 'insertelement_i32' +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2i32_0 = insertelement <2 x i32> undef, i32 42, i32 0 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v3i32_0 = insertelement <3 x i32> undef, i32 42, i32 0 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4i32_0 = insertelement <4 x i32> undef, i32 42, i32 0 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v5i32_0 = insertelement <5 x i32> undef, i32 42, i32 0 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2i32_1 = insertelement <2 x i32> undef, i32 42, i32 1 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v3i32_1 = insertelement <3 x i32> undef, i32 42, i32 1 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4i32_1 = insertelement <4 x i32> undef, i32 42, i32 1 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v5i32_1 = insertelement <5 x i32> undef, i32 42, i32 1 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i32_a = insertelement <2 x i32> undef, i32 42, i32 %arg +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v3i32_a = insertelement <3 x i32> undef, i32 42, i32 %arg +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i32_a = insertelement <4 x i32> undef, i32 42, i32 %arg +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v5i32_a = insertelement <5 x i32> undef, i32 42, i32 %arg +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %v2i32_0 = insertelement <2 x i32> undef, i32 42, i32 0 + %v3i32_0 = insertelement <3 x i32> undef, i32 42, i32 0 + %v4i32_0 = insertelement <4 x i32> undef, i32 42, i32 0 + %v5i32_0 = insertelement <5 x i32> undef, i32 42, i32 0 + %v2i32_1 = insertelement <2 x i32> undef, i32 42, i32 1 + %v3i32_1 = insertelement <3 x i32> undef, i32 42, i32 1 + %v4i32_1 = insertelement <4 x i32> undef, i32 42, i32 1 + %v5i32_1 = insertelement <5 x i32> undef, i32 42, i32 1 + %v2i32_a = insertelement <2 x i32> undef, i32 42, i32 %arg + %v3i32_a = insertelement <3 x i32> undef, i32 42, i32 %arg + %v4i32_a = insertelement <4 x i32> undef, i32 42, i32 %arg + %v5i32_a = insertelement <5 x i32> undef, i32 42, i32 %arg + ret void +} + +define amdgpu_kernel void @insertelement_i64(i32 %arg) { +; ALL-LABEL: 'insertelement_i64' +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2i64_0 = insertelement <2 x i64> undef, i64 42, i32 0 +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v3i64_0 = insertelement <3 x i64> undef, i64 42, i32 0 +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4i64_0 = insertelement <4 x i64> undef, i64 42, i32 0 +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v5i64_0 = insertelement <5 x i64> undef, i64 42, i32 0 +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2i64_1 = insertelement <2 x i64> undef, i64 42, i32 1 +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v3i64_1 = insertelement <3 x i64> undef, i64 42, i32 1 +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4i64_1 = insertelement <4 x i64> undef, i64 42, i32 1 +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v5i64_1 = insertelement <5 x i64> undef, i64 42, i32 1 +; ALL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i64_a = insertelement <2 x i64> undef, i64 42, i32 %arg +; ALL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v3i64_a = insertelement <3 x i64> undef, i64 42, i32 %arg +; ALL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i64_a = insertelement <4 x i64> undef, i64 42, i32 %arg +; ALL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v5i64_a = insertelement <5 x i64> undef, i64 42, i32 %arg +; ALL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void +; +; ALL-SIZE-LABEL: 'insertelement_i64' +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2i64_0 = insertelement <2 x i64> undef, i64 42, i32 0 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v3i64_0 = insertelement <3 x i64> undef, i64 42, i32 0 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4i64_0 = insertelement <4 x i64> undef, i64 42, i32 0 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v5i64_0 = insertelement <5 x i64> undef, i64 42, i32 0 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2i64_1 = insertelement <2 x i64> undef, i64 42, i32 1 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v3i64_1 = insertelement <3 x i64> undef, i64 42, i32 1 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4i64_1 = insertelement <4 x i64> undef, i64 42, i32 1 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v5i64_1 = insertelement <5 x i64> undef, i64 42, i32 1 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i64_a = insertelement <2 x i64> undef, i64 42, i32 %arg +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v3i64_a = insertelement <3 x i64> undef, i64 42, i32 %arg +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i64_a = insertelement <4 x i64> undef, i64 42, i32 %arg +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v5i64_a = insertelement <5 x i64> undef, i64 42, i32 %arg +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %v2i64_0 = insertelement <2 x i64> undef, i64 42, i32 0 + %v3i64_0 = insertelement <3 x i64> undef, i64 42, i32 0 + %v4i64_0 = insertelement <4 x i64> undef, i64 42, i32 0 + %v5i64_0 = insertelement <5 x i64> undef, i64 42, i32 0 + %v2i64_1 = insertelement <2 x i64> undef, i64 42, i32 1 + %v3i64_1 = insertelement <3 x i64> undef, i64 42, i32 1 + %v4i64_1 = insertelement <4 x i64> undef, i64 42, i32 1 + %v5i64_1 = insertelement <5 x i64> undef, i64 42, i32 1 + %v2i64_a = insertelement <2 x i64> undef, i64 42, i32 %arg + %v3i64_a = insertelement <3 x i64> undef, i64 42, i32 %arg + %v4i64_a = insertelement <4 x i64> undef, i64 42, i32 %arg + %v5i64_a = insertelement <5 x i64> undef, i64 42, i32 %arg ret void } diff --git a/llvm/test/Analysis/CostModel/AMDGPU/logicalop.ll b/llvm/test/Analysis/CostModel/AMDGPU/logicalop.ll index 0eedf867..c838dc7 100644 --- a/llvm/test/Analysis/CostModel/AMDGPU/logicalop.ll +++ b/llvm/test/Analysis/CostModel/AMDGPU/logicalop.ll @@ -3,8 +3,9 @@ ; RUN: opt -cost-model -analyze -cost-kind=code-size -mtriple=amdgcn-unknown-amdhsa < %s | FileCheck %s --check-prefix=CHECK-SIZE ; END. +; Logical and/or - select's cost must be equivalent to that of binop + define amdgpu_kernel void @op() { - ; Logical and/or - select's cost must be equivalent to that of binop ; CHECK-THROUGHPUT-LABEL: 'op' ; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sand = select i1 undef, i1 undef, i1 false ; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %band = and i1 undef, undef @@ -29,23 +30,35 @@ define amdgpu_kernel void @op() { define void @vecop() { ; CHECK-THROUGHPUT-LABEL: 'vecop' -; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %sand = select <4 x i1> undef, <4 x i1> undef, <4 x i1> zeroinitializer -; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %band = and <4 x i1> undef, undef -; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %sor = select <4 x i1> undef, <4 x i1> , <4 x i1> undef -; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %bor = or <4 x i1> undef, undef +; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2sand = select <2 x i1> undef, <2 x i1> undef, <2 x i1> zeroinitializer +; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2band = and <2 x i1> undef, undef +; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2sor = select <2 x i1> undef, <2 x i1> , <2 x i1> undef +; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2bor = or <2 x i1> undef, undef +; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4sand = select <4 x i1> undef, <4 x i1> undef, <4 x i1> zeroinitializer +; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4band = and <4 x i1> undef, undef +; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4sor = select <4 x i1> undef, <4 x i1> , <4 x i1> undef +; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4bor = or <4 x i1> undef, undef ; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void ; ; CHECK-SIZE-LABEL: 'vecop' -; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %sand = select <4 x i1> undef, <4 x i1> undef, <4 x i1> zeroinitializer -; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %band = and <4 x i1> undef, undef -; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %sor = select <4 x i1> undef, <4 x i1> , <4 x i1> undef -; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %bor = or <4 x i1> undef, undef +; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2sand = select <2 x i1> undef, <2 x i1> undef, <2 x i1> zeroinitializer +; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2band = and <2 x i1> undef, undef +; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2sor = select <2 x i1> undef, <2 x i1> , <2 x i1> undef +; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2bor = or <2 x i1> undef, undef +; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4sand = select <4 x i1> undef, <4 x i1> undef, <4 x i1> zeroinitializer +; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4band = and <4 x i1> undef, undef +; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4sor = select <4 x i1> undef, <4 x i1> , <4 x i1> undef +; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4bor = or <4 x i1> undef, undef ; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; - %sand = select <4 x i1> undef, <4 x i1> undef, <4 x i1> - %band = and <4 x i1> undef, undef - %sor = select <4 x i1> undef, <4 x i1> , <4 x i1> undef - %bor = or <4 x i1> undef, undef + %v2sand = select <2 x i1> undef, <2 x i1> undef, <2 x i1> + %v2band = and <2 x i1> undef, undef + %v2sor = select <2 x i1> undef, <2 x i1> , <2 x i1> undef + %v2bor = or <2 x i1> undef, undef + %v4sand = select <4 x i1> undef, <4 x i1> undef, <4 x i1> + %v4band = and <4 x i1> undef, undef + %v4sor = select <4 x i1> undef, <4 x i1> , <4 x i1> undef + %v4bor = or <4 x i1> undef, undef ret void } diff --git a/llvm/test/Analysis/CostModel/AMDGPU/mul.ll b/llvm/test/Analysis/CostModel/AMDGPU/mul.ll index 6d702ae..3c5e3da 100644 --- a/llvm/test/Analysis/CostModel/AMDGPU/mul.ll +++ b/llvm/test/Analysis/CostModel/AMDGPU/mul.ll @@ -36,6 +36,7 @@ define amdgpu_kernel void @mul_i64() #0 { ; ALL-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %v2i64 = mul <2 x i64> undef, undef ; ALL-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %v3i64 = mul <3 x i64> undef, undef ; ALL-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %v4i64 = mul <4 x i64> undef, undef +; ALL-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %v5i64 = mul <4 x i64> undef, undef ; ALL-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %v8i64 = mul <8 x i64> undef, undef ; ALL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void ; @@ -44,6 +45,7 @@ define amdgpu_kernel void @mul_i64() #0 { ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %v2i64 = mul <2 x i64> undef, undef ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %v3i64 = mul <3 x i64> undef, undef ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v4i64 = mul <4 x i64> undef, undef +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v5i64 = mul <4 x i64> undef, undef ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %v8i64 = mul <8 x i64> undef, undef ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; @@ -51,6 +53,7 @@ define amdgpu_kernel void @mul_i64() #0 { %v2i64 = mul <2 x i64> undef, undef %v3i64 = mul <3 x i64> undef, undef %v4i64 = mul <4 x i64> undef, undef + %v5i64 = mul <4 x i64> undef, undef %v8i64 = mul <8 x i64> undef, undef ret void } @@ -60,29 +63,39 @@ define amdgpu_kernel void @mul_i16() #0 { ; SLOW16-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %i16 = mul i16 undef, undef ; SLOW16-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v2i16 = mul <2 x i16> undef, undef ; SLOW16-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v3i16 = mul <3 x i16> undef, undef +; SLOW16-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v4i16 = mul <4 x i16> undef, undef +; SLOW16-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v5i16 = mul <5 x i16> undef, undef ; SLOW16-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void ; ; FAST16-LABEL: 'mul_i16' ; FAST16-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %i16 = mul i16 undef, undef ; FAST16-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2i16 = mul <2 x i16> undef, undef ; FAST16-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v3i16 = mul <3 x i16> undef, undef +; FAST16-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4i16 = mul <4 x i16> undef, undef +; FAST16-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v5i16 = mul <5 x i16> undef, undef ; FAST16-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void ; ; SLOW16-SIZE-LABEL: 'mul_i16' ; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %i16 = mul i16 undef, undef ; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2i16 = mul <2 x i16> undef, undef ; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v3i16 = mul <3 x i16> undef, undef +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4i16 = mul <4 x i16> undef, undef +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v5i16 = mul <5 x i16> undef, undef ; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; FAST16-SIZE-LABEL: 'mul_i16' ; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %i16 = mul i16 undef, undef ; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i16 = mul <2 x i16> undef, undef ; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v3i16 = mul <3 x i16> undef, undef +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i16 = mul <4 x i16> undef, undef +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v5i16 = mul <5 x i16> undef, undef ; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %i16 = mul i16 undef, undef %v2i16 = mul <2 x i16> undef, undef %v3i16 = mul <3 x i16> undef, undef + %v4i16 = mul <4 x i16> undef, undef + %v5i16 = mul <5 x i16> undef, undef ret void } diff --git a/llvm/test/Analysis/CostModel/AMDGPU/shifts.ll b/llvm/test/Analysis/CostModel/AMDGPU/shifts.ll index f2a6a79..3ab46ad 100644 --- a/llvm/test/Analysis/CostModel/AMDGPU/shifts.ll +++ b/llvm/test/Analysis/CostModel/AMDGPU/shifts.ll @@ -7,109 +7,349 @@ define amdgpu_kernel void @shl() #0 { ; FAST64-LABEL: 'shl' -; FAST64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i32 = shl i32 undef, undef -; FAST64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %i64 = shl i64 undef, undef +; FAST64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i8 = shl i8 undef, undef +; FAST64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i8 = shl <2 x i8> undef, undef +; FAST64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v3i8 = shl <3 x i8> undef, undef +; FAST64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i8 = shl <4 x i8> undef, undef +; FAST64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v5i8 = shl <5 x i8> undef, undef ; FAST64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i16 = shl i16 undef, undef ; FAST64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i16 = shl <2 x i16> undef, undef +; FAST64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v3i16 = shl <3 x i16> undef, undef +; FAST64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i16 = shl <4 x i16> undef, undef +; FAST64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v5i16 = shl <5 x i16> undef, undef +; FAST64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i32 = shl i32 undef, undef +; FAST64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i32 = shl <2 x i32> undef, undef +; FAST64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v3i32 = shl <3 x i32> undef, undef +; FAST64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i32 = shl <4 x i32> undef, undef +; FAST64-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v5i32 = shl <5 x i32> undef, undef +; FAST64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %i64 = shl i64 undef, undef +; FAST64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2i64 = shl <2 x i64> undef, undef +; FAST64-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v3i64 = shl <3 x i64> undef, undef +; FAST64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4i64 = shl <4 x i64> undef, undef +; FAST64-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v5i64 = shl <5 x i64> undef, undef ; FAST64-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void ; ; SLOW64-LABEL: 'shl' -; SLOW64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i32 = shl i32 undef, undef -; SLOW64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %i64 = shl i64 undef, undef +; SLOW64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i8 = shl i8 undef, undef +; SLOW64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i8 = shl <2 x i8> undef, undef +; SLOW64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v3i8 = shl <3 x i8> undef, undef +; SLOW64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i8 = shl <4 x i8> undef, undef +; SLOW64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v5i8 = shl <5 x i8> undef, undef ; SLOW64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i16 = shl i16 undef, undef ; SLOW64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i16 = shl <2 x i16> undef, undef +; SLOW64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v3i16 = shl <3 x i16> undef, undef +; SLOW64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i16 = shl <4 x i16> undef, undef +; SLOW64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v5i16 = shl <5 x i16> undef, undef +; SLOW64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i32 = shl i32 undef, undef +; SLOW64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i32 = shl <2 x i32> undef, undef +; SLOW64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v3i32 = shl <3 x i32> undef, undef +; SLOW64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i32 = shl <4 x i32> undef, undef +; SLOW64-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v5i32 = shl <5 x i32> undef, undef +; SLOW64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %i64 = shl i64 undef, undef +; SLOW64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v2i64 = shl <2 x i64> undef, undef +; SLOW64-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v3i64 = shl <3 x i64> undef, undef +; SLOW64-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v4i64 = shl <4 x i64> undef, undef +; SLOW64-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v5i64 = shl <5 x i64> undef, undef ; SLOW64-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void ; ; FAST64-SIZE-LABEL: 'shl' -; FAST64-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i32 = shl i32 undef, undef -; FAST64-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %i64 = shl i64 undef, undef +; FAST64-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i8 = shl i8 undef, undef +; FAST64-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i8 = shl <2 x i8> undef, undef +; FAST64-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v3i8 = shl <3 x i8> undef, undef +; FAST64-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i8 = shl <4 x i8> undef, undef +; FAST64-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v5i8 = shl <5 x i8> undef, undef ; FAST64-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i16 = shl i16 undef, undef ; FAST64-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i16 = shl <2 x i16> undef, undef +; FAST64-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v3i16 = shl <3 x i16> undef, undef +; FAST64-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i16 = shl <4 x i16> undef, undef +; FAST64-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v5i16 = shl <5 x i16> undef, undef +; FAST64-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i32 = shl i32 undef, undef +; FAST64-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i32 = shl <2 x i32> undef, undef +; FAST64-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v3i32 = shl <3 x i32> undef, undef +; FAST64-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i32 = shl <4 x i32> undef, undef +; FAST64-SIZE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v5i32 = shl <5 x i32> undef, undef +; FAST64-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %i64 = shl i64 undef, undef +; FAST64-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2i64 = shl <2 x i64> undef, undef +; FAST64-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v3i64 = shl <3 x i64> undef, undef +; FAST64-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4i64 = shl <4 x i64> undef, undef +; FAST64-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v5i64 = shl <5 x i64> undef, undef ; FAST64-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SLOW64-SIZE-LABEL: 'shl' -; SLOW64-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i32 = shl i32 undef, undef -; SLOW64-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %i64 = shl i64 undef, undef +; SLOW64-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i8 = shl i8 undef, undef +; SLOW64-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i8 = shl <2 x i8> undef, undef +; SLOW64-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v3i8 = shl <3 x i8> undef, undef +; SLOW64-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i8 = shl <4 x i8> undef, undef +; SLOW64-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v5i8 = shl <5 x i8> undef, undef ; SLOW64-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i16 = shl i16 undef, undef ; SLOW64-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i16 = shl <2 x i16> undef, undef +; SLOW64-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v3i16 = shl <3 x i16> undef, undef +; SLOW64-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i16 = shl <4 x i16> undef, undef +; SLOW64-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v5i16 = shl <5 x i16> undef, undef +; SLOW64-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i32 = shl i32 undef, undef +; SLOW64-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i32 = shl <2 x i32> undef, undef +; SLOW64-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v3i32 = shl <3 x i32> undef, undef +; SLOW64-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i32 = shl <4 x i32> undef, undef +; SLOW64-SIZE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v5i32 = shl <5 x i32> undef, undef +; SLOW64-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %i64 = shl i64 undef, undef +; SLOW64-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2i64 = shl <2 x i64> undef, undef +; SLOW64-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v3i64 = shl <3 x i64> undef, undef +; SLOW64-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4i64 = shl <4 x i64> undef, undef +; SLOW64-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v5i64 = shl <5 x i64> undef, undef ; SLOW64-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; - %i32 = shl i32 undef, undef - %i64 = shl i64 undef, undef + %i8 = shl i8 undef, undef + %v2i8 = shl <2 x i8> undef, undef + %v3i8 = shl <3 x i8> undef, undef + %v4i8 = shl <4 x i8> undef, undef + %v5i8 = shl <5 x i8> undef, undef %i16 = shl i16 undef, undef %v2i16 = shl <2 x i16> undef, undef + %v3i16 = shl <3 x i16> undef, undef + %v4i16 = shl <4 x i16> undef, undef + %v5i16 = shl <5 x i16> undef, undef + %i32 = shl i32 undef, undef + %v2i32 = shl <2 x i32> undef, undef + %v3i32 = shl <3 x i32> undef, undef + %v4i32 = shl <4 x i32> undef, undef + %v5i32 = shl <5 x i32> undef, undef + %i64 = shl i64 undef, undef + %v2i64 = shl <2 x i64> undef, undef + %v3i64 = shl <3 x i64> undef, undef + %v4i64 = shl <4 x i64> undef, undef + %v5i64 = shl <5 x i64> undef, undef ret void } define amdgpu_kernel void @lshr() #0 { ; FAST64-LABEL: 'lshr' -; FAST64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i32 = lshr i32 undef, undef -; FAST64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %i64 = lshr i64 undef, undef +; FAST64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i8 = lshr i8 undef, undef +; FAST64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i8 = lshr <2 x i8> undef, undef +; FAST64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v3i8 = lshr <3 x i8> undef, undef +; FAST64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i8 = lshr <4 x i8> undef, undef +; FAST64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v5i8 = lshr <5 x i8> undef, undef ; FAST64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i16 = lshr i16 undef, undef ; FAST64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i16 = lshr <2 x i16> undef, undef +; FAST64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v3i16 = lshr <3 x i16> undef, undef +; FAST64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i16 = lshr <4 x i16> undef, undef +; FAST64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v5i16 = lshr <5 x i16> undef, undef +; FAST64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i32 = lshr i32 undef, undef +; FAST64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i32 = lshr <2 x i32> undef, undef +; FAST64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v3i32 = lshr <3 x i32> undef, undef +; FAST64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i32 = lshr <4 x i32> undef, undef +; FAST64-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v5i32 = lshr <5 x i32> undef, undef +; FAST64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %i64 = lshr i64 undef, undef +; FAST64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2i64 = lshr <2 x i64> undef, undef +; FAST64-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v3i64 = lshr <3 x i64> undef, undef +; FAST64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4i64 = lshr <4 x i64> undef, undef +; FAST64-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v5i64 = lshr <5 x i64> undef, undef ; FAST64-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void ; ; SLOW64-LABEL: 'lshr' -; SLOW64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i32 = lshr i32 undef, undef -; SLOW64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %i64 = lshr i64 undef, undef +; SLOW64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i8 = lshr i8 undef, undef +; SLOW64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i8 = lshr <2 x i8> undef, undef +; SLOW64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v3i8 = lshr <3 x i8> undef, undef +; SLOW64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i8 = lshr <4 x i8> undef, undef +; SLOW64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v5i8 = lshr <5 x i8> undef, undef ; SLOW64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i16 = lshr i16 undef, undef ; SLOW64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i16 = lshr <2 x i16> undef, undef +; SLOW64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v3i16 = lshr <3 x i16> undef, undef +; SLOW64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i16 = lshr <4 x i16> undef, undef +; SLOW64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v5i16 = lshr <5 x i16> undef, undef +; SLOW64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i32 = lshr i32 undef, undef +; SLOW64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i32 = lshr <2 x i32> undef, undef +; SLOW64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v3i32 = lshr <3 x i32> undef, undef +; SLOW64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i32 = lshr <4 x i32> undef, undef +; SLOW64-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v5i32 = lshr <5 x i32> undef, undef +; SLOW64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %i64 = lshr i64 undef, undef +; SLOW64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v2i64 = lshr <2 x i64> undef, undef +; SLOW64-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v3i64 = lshr <3 x i64> undef, undef +; SLOW64-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v4i64 = lshr <4 x i64> undef, undef +; SLOW64-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v5i64 = lshr <5 x i64> undef, undef ; SLOW64-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void ; ; FAST64-SIZE-LABEL: 'lshr' -; FAST64-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i32 = lshr i32 undef, undef -; FAST64-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %i64 = lshr i64 undef, undef +; FAST64-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i8 = lshr i8 undef, undef +; FAST64-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i8 = lshr <2 x i8> undef, undef +; FAST64-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v3i8 = lshr <3 x i8> undef, undef +; FAST64-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i8 = lshr <4 x i8> undef, undef +; FAST64-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v5i8 = lshr <5 x i8> undef, undef ; FAST64-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i16 = lshr i16 undef, undef ; FAST64-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i16 = lshr <2 x i16> undef, undef +; FAST64-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v3i16 = lshr <3 x i16> undef, undef +; FAST64-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i16 = lshr <4 x i16> undef, undef +; FAST64-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v5i16 = lshr <5 x i16> undef, undef +; FAST64-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i32 = lshr i32 undef, undef +; FAST64-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i32 = lshr <2 x i32> undef, undef +; FAST64-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v3i32 = lshr <3 x i32> undef, undef +; FAST64-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i32 = lshr <4 x i32> undef, undef +; FAST64-SIZE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v5i32 = lshr <5 x i32> undef, undef +; FAST64-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %i64 = lshr i64 undef, undef +; FAST64-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2i64 = lshr <2 x i64> undef, undef +; FAST64-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v3i64 = lshr <3 x i64> undef, undef +; FAST64-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4i64 = lshr <4 x i64> undef, undef +; FAST64-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v5i64 = lshr <5 x i64> undef, undef ; FAST64-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SLOW64-SIZE-LABEL: 'lshr' -; SLOW64-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i32 = lshr i32 undef, undef -; SLOW64-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %i64 = lshr i64 undef, undef +; SLOW64-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i8 = lshr i8 undef, undef +; SLOW64-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i8 = lshr <2 x i8> undef, undef +; SLOW64-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v3i8 = lshr <3 x i8> undef, undef +; SLOW64-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i8 = lshr <4 x i8> undef, undef +; SLOW64-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v5i8 = lshr <5 x i8> undef, undef ; SLOW64-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i16 = lshr i16 undef, undef ; SLOW64-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i16 = lshr <2 x i16> undef, undef +; SLOW64-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v3i16 = lshr <3 x i16> undef, undef +; SLOW64-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i16 = lshr <4 x i16> undef, undef +; SLOW64-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v5i16 = lshr <5 x i16> undef, undef +; SLOW64-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i32 = lshr i32 undef, undef +; SLOW64-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i32 = lshr <2 x i32> undef, undef +; SLOW64-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v3i32 = lshr <3 x i32> undef, undef +; SLOW64-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i32 = lshr <4 x i32> undef, undef +; SLOW64-SIZE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v5i32 = lshr <5 x i32> undef, undef +; SLOW64-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %i64 = lshr i64 undef, undef +; SLOW64-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2i64 = lshr <2 x i64> undef, undef +; SLOW64-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v3i64 = lshr <3 x i64> undef, undef +; SLOW64-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4i64 = lshr <4 x i64> undef, undef +; SLOW64-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v5i64 = lshr <5 x i64> undef, undef ; SLOW64-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; - %i32 = lshr i32 undef, undef - %i64 = lshr i64 undef, undef + %i8 = lshr i8 undef, undef + %v2i8 = lshr <2 x i8> undef, undef + %v3i8 = lshr <3 x i8> undef, undef + %v4i8 = lshr <4 x i8> undef, undef + %v5i8 = lshr <5 x i8> undef, undef %i16 = lshr i16 undef, undef %v2i16 = lshr <2 x i16> undef, undef + %v3i16 = lshr <3 x i16> undef, undef + %v4i16 = lshr <4 x i16> undef, undef + %v5i16 = lshr <5 x i16> undef, undef + %i32 = lshr i32 undef, undef + %v2i32 = lshr <2 x i32> undef, undef + %v3i32 = lshr <3 x i32> undef, undef + %v4i32 = lshr <4 x i32> undef, undef + %v5i32 = lshr <5 x i32> undef, undef + %i64 = lshr i64 undef, undef + %v2i64 = lshr <2 x i64> undef, undef + %v3i64 = lshr <3 x i64> undef, undef + %v4i64 = lshr <4 x i64> undef, undef + %v5i64 = lshr <5 x i64> undef, undef ret void } define amdgpu_kernel void @ashr() #0 { ; FAST64-LABEL: 'ashr' -; FAST64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i32 = ashr i32 undef, undef -; FAST64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %i64 = ashr i64 undef, undef +; FAST64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i8 = ashr i8 undef, undef +; FAST64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i8 = ashr <2 x i8> undef, undef +; FAST64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v3i8 = ashr <3 x i8> undef, undef +; FAST64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i8 = ashr <4 x i8> undef, undef +; FAST64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v5i8 = ashr <5 x i8> undef, undef ; FAST64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i16 = ashr i16 undef, undef ; FAST64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i16 = ashr <2 x i16> undef, undef +; FAST64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v3i16 = ashr <3 x i16> undef, undef +; FAST64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i16 = ashr <4 x i16> undef, undef +; FAST64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v5i16 = ashr <5 x i16> undef, undef +; FAST64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i32 = ashr i32 undef, undef +; FAST64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i32 = ashr <2 x i32> undef, undef +; FAST64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v3i32 = ashr <3 x i32> undef, undef +; FAST64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i32 = ashr <4 x i32> undef, undef +; FAST64-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v5i32 = ashr <5 x i32> undef, undef +; FAST64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %i64 = ashr i64 undef, undef +; FAST64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2i64 = ashr <2 x i64> undef, undef +; FAST64-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v3i64 = ashr <3 x i64> undef, undef +; FAST64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4i64 = ashr <4 x i64> undef, undef +; FAST64-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v5i64 = ashr <5 x i64> undef, undef ; FAST64-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void ; ; SLOW64-LABEL: 'ashr' -; SLOW64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i32 = ashr i32 undef, undef -; SLOW64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %i64 = ashr i64 undef, undef +; SLOW64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i8 = ashr i8 undef, undef +; SLOW64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i8 = ashr <2 x i8> undef, undef +; SLOW64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v3i8 = ashr <3 x i8> undef, undef +; SLOW64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i8 = ashr <4 x i8> undef, undef +; SLOW64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v5i8 = ashr <5 x i8> undef, undef ; SLOW64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i16 = ashr i16 undef, undef ; SLOW64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i16 = ashr <2 x i16> undef, undef +; SLOW64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v3i16 = ashr <3 x i16> undef, undef +; SLOW64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i16 = ashr <4 x i16> undef, undef +; SLOW64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v5i16 = ashr <5 x i16> undef, undef +; SLOW64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i32 = ashr i32 undef, undef +; SLOW64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i32 = ashr <2 x i32> undef, undef +; SLOW64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v3i32 = ashr <3 x i32> undef, undef +; SLOW64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i32 = ashr <4 x i32> undef, undef +; SLOW64-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v5i32 = ashr <5 x i32> undef, undef +; SLOW64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %i64 = ashr i64 undef, undef +; SLOW64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v2i64 = ashr <2 x i64> undef, undef +; SLOW64-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v3i64 = ashr <3 x i64> undef, undef +; SLOW64-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v4i64 = ashr <4 x i64> undef, undef +; SLOW64-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v5i64 = ashr <5 x i64> undef, undef ; SLOW64-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void ; ; FAST64-SIZE-LABEL: 'ashr' -; FAST64-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i32 = ashr i32 undef, undef -; FAST64-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %i64 = ashr i64 undef, undef +; FAST64-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i8 = ashr i8 undef, undef +; FAST64-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i8 = ashr <2 x i8> undef, undef +; FAST64-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v3i8 = ashr <3 x i8> undef, undef +; FAST64-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i8 = ashr <4 x i8> undef, undef +; FAST64-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v5i8 = ashr <5 x i8> undef, undef ; FAST64-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i16 = ashr i16 undef, undef ; FAST64-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i16 = ashr <2 x i16> undef, undef +; FAST64-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v3i16 = ashr <3 x i16> undef, undef +; FAST64-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i16 = ashr <4 x i16> undef, undef +; FAST64-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v5i16 = ashr <5 x i16> undef, undef +; FAST64-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i32 = ashr i32 undef, undef +; FAST64-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i32 = ashr <2 x i32> undef, undef +; FAST64-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v3i32 = ashr <3 x i32> undef, undef +; FAST64-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i32 = ashr <4 x i32> undef, undef +; FAST64-SIZE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v5i32 = ashr <5 x i32> undef, undef +; FAST64-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %i64 = ashr i64 undef, undef +; FAST64-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2i64 = ashr <2 x i64> undef, undef +; FAST64-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v3i64 = ashr <3 x i64> undef, undef +; FAST64-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4i64 = ashr <4 x i64> undef, undef +; FAST64-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v5i64 = ashr <5 x i64> undef, undef ; FAST64-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SLOW64-SIZE-LABEL: 'ashr' -; SLOW64-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i32 = ashr i32 undef, undef -; SLOW64-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %i64 = ashr i64 undef, undef +; SLOW64-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i8 = ashr i8 undef, undef +; SLOW64-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i8 = ashr <2 x i8> undef, undef +; SLOW64-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v3i8 = ashr <3 x i8> undef, undef +; SLOW64-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i8 = ashr <4 x i8> undef, undef +; SLOW64-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v5i8 = ashr <5 x i8> undef, undef ; SLOW64-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i16 = ashr i16 undef, undef ; SLOW64-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i16 = ashr <2 x i16> undef, undef +; SLOW64-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v3i16 = ashr <3 x i16> undef, undef +; SLOW64-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i16 = ashr <4 x i16> undef, undef +; SLOW64-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v5i16 = ashr <5 x i16> undef, undef +; SLOW64-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i32 = ashr i32 undef, undef +; SLOW64-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i32 = ashr <2 x i32> undef, undef +; SLOW64-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v3i32 = ashr <3 x i32> undef, undef +; SLOW64-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i32 = ashr <4 x i32> undef, undef +; SLOW64-SIZE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v5i32 = ashr <5 x i32> undef, undef +; SLOW64-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %i64 = ashr i64 undef, undef +; SLOW64-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2i64 = ashr <2 x i64> undef, undef +; SLOW64-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v3i64 = ashr <3 x i64> undef, undef +; SLOW64-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4i64 = ashr <4 x i64> undef, undef +; SLOW64-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v5i64 = ashr <5 x i64> undef, undef ; SLOW64-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; - %i32 = ashr i32 undef, undef - %i64 = ashr i64 undef, undef + %i8 = ashr i8 undef, undef + %v2i8 = ashr <2 x i8> undef, undef + %v3i8 = ashr <3 x i8> undef, undef + %v4i8 = ashr <4 x i8> undef, undef + %v5i8 = ashr <5 x i8> undef, undef %i16 = ashr i16 undef, undef %v2i16 = ashr <2 x i16> undef, undef + %v3i16 = ashr <3 x i16> undef, undef + %v4i16 = ashr <4 x i16> undef, undef + %v5i16 = ashr <5 x i16> undef, undef + %i32 = ashr i32 undef, undef + %v2i32 = ashr <2 x i32> undef, undef + %v3i32 = ashr <3 x i32> undef, undef + %v4i32 = ashr <4 x i32> undef, undef + %v5i32 = ashr <5 x i32> undef, undef + %i64 = ashr i64 undef, undef + %v2i64 = ashr <2 x i64> undef, undef + %v3i64 = ashr <3 x i64> undef, undef + %v4i64 = ashr <4 x i64> undef, undef + %v5i64 = ashr <5 x i64> undef, undef ret void } diff --git a/llvm/test/Analysis/CostModel/AMDGPU/shufflevector.ll b/llvm/test/Analysis/CostModel/AMDGPU/shufflevector.ll index 17c21af..4e5b868 100644 --- a/llvm/test/Analysis/CostModel/AMDGPU/shufflevector.ll +++ b/llvm/test/Analysis/CostModel/AMDGPU/shufflevector.ll @@ -7,143 +7,549 @@ ; RUN: opt < %s -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=fiji -cost-kind=code-size -S | FileCheck -check-prefixes=ALL-SIZE,VI-SIZE %s ; END. -define amdgpu_kernel void @shufflevector_00_v2i16(<2 x i16> %vec0, <2 x i16> %vec1) { -; GFX9-10-LABEL: 'shufflevector_00_v2i16' -; GFX9-10-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf00 = shufflevector <2 x i16> %vec0, <2 x i16> undef, <2 x i32> zeroinitializer -; GFX9-10-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf01 = shufflevector <2 x i16> %vec0, <2 x i16> undef, <2 x i32> -; GFX9-10-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf10 = shufflevector <2 x i16> %vec0, <2 x i16> undef, <2 x i32> -; GFX9-10-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf11 = shufflevector <2 x i16> %vec0, <2 x i16> undef, <2 x i32> -; GFX9-10-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf02 = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <2 x i32> -; GFX9-10-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf20 = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <2 x i32> -; GFX9-10-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf22 = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <2 x i32> +define amdgpu_kernel void @shufflevector_i16() { +; GFX9-10-LABEL: 'shufflevector_i16' +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf00 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> zeroinitializer +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf01 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf10 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf11 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf02 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf20 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf22 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf03 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf30 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf33 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf12 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf21 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf13 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf31 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf23 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf32 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf000 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> zeroinitializer +; GFX9-10-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf001 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf010 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf011 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf100 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf101 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf110 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf111 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf002 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf020 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf022 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf200 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf202 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf220 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf222 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf112 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf121 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf122 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf211 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf212 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf221 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> ; GFX9-10-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void ; -; VI-LABEL: 'shufflevector_00_v2i16' -; VI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf00 = shufflevector <2 x i16> %vec0, <2 x i16> undef, <2 x i32> zeroinitializer -; VI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf01 = shufflevector <2 x i16> %vec0, <2 x i16> undef, <2 x i32> -; VI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf10 = shufflevector <2 x i16> %vec0, <2 x i16> undef, <2 x i32> -; VI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf11 = shufflevector <2 x i16> %vec0, <2 x i16> undef, <2 x i32> -; VI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf02 = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <2 x i32> -; VI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf20 = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <2 x i32> -; VI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf22 = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <2 x i32> +; VI-LABEL: 'shufflevector_i16' +; VI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf00 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> zeroinitializer +; VI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf01 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> +; VI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf10 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> +; VI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf11 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> +; VI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf02 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> +; VI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf20 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> +; VI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf22 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> +; VI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf03 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> +; VI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf30 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> +; VI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf33 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> +; VI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf12 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> +; VI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf21 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> +; VI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf13 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> +; VI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf31 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> +; VI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf23 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> +; VI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf32 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> +; VI-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf000 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> zeroinitializer +; VI-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf001 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; VI-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf010 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; VI-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf011 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; VI-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf100 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; VI-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf101 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; VI-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf110 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; VI-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf111 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; VI-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf002 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; VI-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf020 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; VI-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf022 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; VI-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf200 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; VI-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf202 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; VI-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf220 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; VI-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf222 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; VI-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf112 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; VI-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf121 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; VI-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf122 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; VI-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf211 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; VI-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf212 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; VI-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf221 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> ; VI-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void ; -; GFX9-10-SIZE-LABEL: 'shufflevector_00_v2i16' -; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf00 = shufflevector <2 x i16> %vec0, <2 x i16> undef, <2 x i32> zeroinitializer -; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf01 = shufflevector <2 x i16> %vec0, <2 x i16> undef, <2 x i32> -; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf10 = shufflevector <2 x i16> %vec0, <2 x i16> undef, <2 x i32> -; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf11 = shufflevector <2 x i16> %vec0, <2 x i16> undef, <2 x i32> -; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf02 = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <2 x i32> -; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf20 = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <2 x i32> -; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf22 = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <2 x i32> +; GFX9-10-SIZE-LABEL: 'shufflevector_i16' +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf00 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> zeroinitializer +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf01 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf10 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf11 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf02 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf20 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf22 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf03 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf30 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf33 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf12 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf21 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf13 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf31 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf23 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf32 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf000 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> zeroinitializer +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf001 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf010 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf011 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf100 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf101 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf110 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf111 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf002 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf020 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf022 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf200 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf202 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf220 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf222 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf112 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf121 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf122 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf211 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf212 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf221 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> ; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; -; VI-SIZE-LABEL: 'shufflevector_00_v2i16' -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf00 = shufflevector <2 x i16> %vec0, <2 x i16> undef, <2 x i32> zeroinitializer -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf01 = shufflevector <2 x i16> %vec0, <2 x i16> undef, <2 x i32> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf10 = shufflevector <2 x i16> %vec0, <2 x i16> undef, <2 x i32> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf11 = shufflevector <2 x i16> %vec0, <2 x i16> undef, <2 x i32> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf02 = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <2 x i32> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf20 = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <2 x i32> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf22 = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <2 x i32> +; VI-SIZE-LABEL: 'shufflevector_i16' +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf00 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> zeroinitializer +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf01 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf10 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf11 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf02 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf20 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf22 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf03 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf30 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf33 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf12 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf21 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf13 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf31 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf23 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf32 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf000 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> zeroinitializer +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf001 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf010 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf011 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf100 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf101 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf110 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf111 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf002 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf020 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf022 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf200 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf202 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf220 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf222 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf112 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf121 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf122 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf211 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf212 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf221 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> ; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; - %shuf00 = shufflevector <2 x i16> %vec0, <2 x i16> undef, <2 x i32> zeroinitializer - %shuf01 = shufflevector <2 x i16> %vec0, <2 x i16> undef, <2 x i32> - %shuf10 = shufflevector <2 x i16> %vec0, <2 x i16> undef, <2 x i32> - %shuf11 = shufflevector <2 x i16> %vec0, <2 x i16> undef, <2 x i32> - %shuf02 = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <2 x i32> - %shuf20 = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <2 x i32> - %shuf22 = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <2 x i32> + %shuf00 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> zeroinitializer + %shuf01 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> + %shuf10 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> + %shuf11 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> + %shuf02 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> + %shuf20 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> + %shuf22 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> + %shuf03 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> + %shuf30 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> + %shuf33 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> + %shuf12 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> + %shuf21 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> + %shuf13 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> + %shuf31 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> + %shuf23 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> + %shuf32 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> + %shuf000 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> + %shuf001 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> + %shuf010 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> + %shuf011 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> + %shuf100 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> + %shuf101 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> + %shuf110 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> + %shuf111 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> + %shuf002 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> + %shuf020 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> + %shuf022 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> + %shuf200 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> + %shuf202 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> + %shuf220 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> + %shuf222 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> + %shuf112 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> + %shuf121 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> + %shuf122 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> + %shuf211 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> + %shuf212 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> + %shuf221 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> ret void } ; Should not assert -define amdgpu_kernel void @shufflevector_xxx(<2 x i8> %vec0) { -; ALL-LABEL: 'shufflevector_xxx' -; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf01uu = shufflevector <2 x i8> %vec0, <2 x i8> undef, <4 x i32> -; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf10uu = shufflevector <2 x i8> %vec0, <2 x i8> undef, <4 x i32> +define amdgpu_kernel void @shufflevector_i8() { +; ALL-LABEL: 'shufflevector_i8' +; ALL-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shuf00 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> zeroinitializer +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf01 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf10 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf11 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf02 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf20 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shuf22 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf03 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf30 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf33 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf12 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf21 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf13 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf31 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf23 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf32 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf000 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> zeroinitializer +; ALL-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf001 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf010 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf011 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf100 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf101 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf110 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf111 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf002 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf020 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf022 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf200 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf202 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf220 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf222 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf112 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf121 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf122 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf211 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf212 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf221 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> ; ALL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void ; -; ALL-SIZE-LABEL: 'shufflevector_xxx' -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf01uu = shufflevector <2 x i8> %vec0, <2 x i8> undef, <4 x i32> -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf10uu = shufflevector <2 x i8> %vec0, <2 x i8> undef, <4 x i32> +; ALL-SIZE-LABEL: 'shufflevector_i8' +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shuf00 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> zeroinitializer +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf01 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf10 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf11 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf02 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf20 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shuf22 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf03 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf30 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf33 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf12 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf21 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf13 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf31 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf23 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf32 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf000 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> zeroinitializer +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf001 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf010 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf011 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf100 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf101 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf110 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf111 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf002 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf020 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf022 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf200 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf202 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf220 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf222 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf112 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf121 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf122 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf211 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf212 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf221 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; - %shuf01uu = shufflevector <2 x i8> %vec0, <2 x i8> undef, <4 x i32> - %shuf10uu = shufflevector <2 x i8> %vec0, <2 x i8> undef, <4 x i32> + %shuf00 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> zeroinitializer + %shuf01 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> + %shuf10 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> + %shuf11 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> + %shuf02 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> + %shuf20 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> + %shuf22 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> + %shuf03 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> + %shuf30 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> + %shuf33 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> + %shuf12 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> + %shuf21 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> + %shuf13 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> + %shuf31 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> + %shuf23 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> + %shuf32 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> + %shuf000 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> + %shuf001 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> + %shuf010 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> + %shuf011 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> + %shuf100 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> + %shuf101 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> + %shuf110 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> + %shuf111 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> + %shuf002 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> + %shuf020 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> + %shuf022 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> + %shuf200 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> + %shuf202 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> + %shuf220 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> + %shuf222 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> + %shuf112 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> + %shuf121 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> + %shuf122 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> + %shuf211 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> + %shuf212 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> + %shuf221 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> + ret void +} + +define amdgpu_kernel void @shufflevector_i32() { +; ALL-LABEL: 'shufflevector_i32' +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf00 = shufflevector <2 x i32> undef, <2 x i32> undef, <2 x i32> zeroinitializer +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf01 = shufflevector <2 x i32> undef, <2 x i32> undef, <2 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf10 = shufflevector <2 x i32> undef, <2 x i32> undef, <2 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf11 = shufflevector <2 x i32> undef, <2 x i32> undef, <2 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf02 = shufflevector <2 x i32> undef, <2 x i32> undef, <2 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf20 = shufflevector <2 x i32> undef, <2 x i32> undef, <2 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf22 = shufflevector <2 x i32> undef, <2 x i32> undef, <2 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf03 = shufflevector <2 x i32> undef, <2 x i32> undef, <2 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf30 = shufflevector <2 x i32> undef, <2 x i32> undef, <2 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf33 = shufflevector <2 x i32> undef, <2 x i32> undef, <2 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf12 = shufflevector <2 x i32> undef, <2 x i32> undef, <2 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf21 = shufflevector <2 x i32> undef, <2 x i32> undef, <2 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf13 = shufflevector <2 x i32> undef, <2 x i32> undef, <2 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf31 = shufflevector <2 x i32> undef, <2 x i32> undef, <2 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf23 = shufflevector <2 x i32> undef, <2 x i32> undef, <2 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf32 = shufflevector <2 x i32> undef, <2 x i32> undef, <2 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf000 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> zeroinitializer +; ALL-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf001 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf010 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf011 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf100 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf101 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf110 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf111 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf002 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf020 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf022 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf200 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf202 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf220 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf222 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf112 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf121 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf122 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf211 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf212 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf221 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void +; +; ALL-SIZE-LABEL: 'shufflevector_i32' +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf00 = shufflevector <2 x i32> undef, <2 x i32> undef, <2 x i32> zeroinitializer +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf01 = shufflevector <2 x i32> undef, <2 x i32> undef, <2 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf10 = shufflevector <2 x i32> undef, <2 x i32> undef, <2 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf11 = shufflevector <2 x i32> undef, <2 x i32> undef, <2 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf02 = shufflevector <2 x i32> undef, <2 x i32> undef, <2 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf20 = shufflevector <2 x i32> undef, <2 x i32> undef, <2 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf22 = shufflevector <2 x i32> undef, <2 x i32> undef, <2 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf03 = shufflevector <2 x i32> undef, <2 x i32> undef, <2 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf30 = shufflevector <2 x i32> undef, <2 x i32> undef, <2 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf33 = shufflevector <2 x i32> undef, <2 x i32> undef, <2 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf12 = shufflevector <2 x i32> undef, <2 x i32> undef, <2 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf21 = shufflevector <2 x i32> undef, <2 x i32> undef, <2 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf13 = shufflevector <2 x i32> undef, <2 x i32> undef, <2 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf31 = shufflevector <2 x i32> undef, <2 x i32> undef, <2 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf23 = shufflevector <2 x i32> undef, <2 x i32> undef, <2 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf32 = shufflevector <2 x i32> undef, <2 x i32> undef, <2 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf000 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> zeroinitializer +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf001 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf010 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf011 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf100 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf101 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf110 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf111 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf002 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf020 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf022 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf200 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf202 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf220 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf222 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf112 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf121 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf122 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf211 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf212 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf221 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %shuf00 = shufflevector <2 x i32> undef, <2 x i32> undef, <2 x i32> zeroinitializer + %shuf01 = shufflevector <2 x i32> undef, <2 x i32> undef, <2 x i32> + %shuf10 = shufflevector <2 x i32> undef, <2 x i32> undef, <2 x i32> + %shuf11 = shufflevector <2 x i32> undef, <2 x i32> undef, <2 x i32> + %shuf02 = shufflevector <2 x i32> undef, <2 x i32> undef, <2 x i32> + %shuf20 = shufflevector <2 x i32> undef, <2 x i32> undef, <2 x i32> + %shuf22 = shufflevector <2 x i32> undef, <2 x i32> undef, <2 x i32> + %shuf03 = shufflevector <2 x i32> undef, <2 x i32> undef, <2 x i32> + %shuf30 = shufflevector <2 x i32> undef, <2 x i32> undef, <2 x i32> + %shuf33 = shufflevector <2 x i32> undef, <2 x i32> undef, <2 x i32> + %shuf12 = shufflevector <2 x i32> undef, <2 x i32> undef, <2 x i32> + %shuf21 = shufflevector <2 x i32> undef, <2 x i32> undef, <2 x i32> + %shuf13 = shufflevector <2 x i32> undef, <2 x i32> undef, <2 x i32> + %shuf31 = shufflevector <2 x i32> undef, <2 x i32> undef, <2 x i32> + %shuf23 = shufflevector <2 x i32> undef, <2 x i32> undef, <2 x i32> + %shuf32 = shufflevector <2 x i32> undef, <2 x i32> undef, <2 x i32> + %shuf000 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> + %shuf001 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> + %shuf010 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> + %shuf011 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> + %shuf100 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> + %shuf101 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> + %shuf110 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> + %shuf111 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> + %shuf002 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> + %shuf020 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> + %shuf022 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> + %shuf200 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> + %shuf202 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> + %shuf220 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> + %shuf222 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> + %shuf112 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> + %shuf121 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> + %shuf122 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> + %shuf211 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> + %shuf212 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> + %shuf221 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> ret void } ; Other shuffle cases define void @shuffle() { ; GFX9-10-LABEL: 'shuffle' -; GFX9-10-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v7 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> -; GFX9-10-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8 = shufflevector <4 x i8> undef, <4 x i8> undef, <4 x i32> -; GFX9-10-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v9 = shufflevector <8 x i8> undef, <8 x i8> undef, <8 x i32> -; GFX9-10-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v10 = shufflevector <16 x i8> undef, <16 x i8> undef, <16 x i32> -; GFX9-10-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v11 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> -; GFX9-10-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v12 = shufflevector <4 x i16> undef, <4 x i16> undef, <4 x i32> -; GFX9-10-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v13 = shufflevector <8 x i16> undef, <8 x i16> undef, <8 x i32> -; GFX9-10-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v14 = shufflevector <2 x i32> undef, <2 x i32> undef, <2 x i32> -; GFX9-10-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v15 = shufflevector <4 x i32> undef, <4 x i32> undef, <4 x i32> -; GFX9-10-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v16 = shufflevector <2 x float> undef, <2 x float> undef, <2 x i32> -; GFX9-10-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v17 = shufflevector <4 x float> undef, <4 x float> undef, <4 x i32> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2i8_2 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %v2i8_4 = shufflevector <2 x i8> undef, <2 x i8> undef, <4 x i32> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4i8_4 = shufflevector <4 x i8> undef, <4 x i8> undef, <4 x i32> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %v2i8_8 = shufflevector <2 x i8> undef, <2 x i8> undef, <8 x i32> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %v4i8_8 = shufflevector <4 x i8> undef, <4 x i8> undef, <8 x i32> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %v6i8_8 = shufflevector <6 x i8> undef, <6 x i8> undef, <8 x i32> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v8i8_8 = shufflevector <8 x i8> undef, <8 x i8> undef, <8 x i32> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v16i8_16 = shufflevector <16 x i8> undef, <16 x i8> undef, <16 x i32> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2i16_2 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v4i16_4 = shufflevector <4 x i16> undef, <4 x i16> undef, <4 x i32> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v8i16_8 = shufflevector <8 x i16> undef, <8 x i16> undef, <8 x i32> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2i32_2 = shufflevector <2 x i32> undef, <2 x i32> undef, <2 x i32> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4i32_4 = shufflevector <4 x i32> undef, <4 x i32> undef, <4 x i32> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2f32_2 = shufflevector <2 x float> undef, <2 x float> undef, <2 x i32> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4f32_4 = shufflevector <4 x float> undef, <4 x float> undef, <4 x i32> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2i64_2 = shufflevector <2 x i64> undef, <2 x i64> undef, <2 x i32> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2f64_2 = shufflevector <2 x double> undef, <2 x double> undef, <2 x i32> ; GFX9-10-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void ; ; VI-LABEL: 'shuffle' -; VI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v7 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> -; VI-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8 = shufflevector <4 x i8> undef, <4 x i8> undef, <4 x i32> -; VI-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v9 = shufflevector <8 x i8> undef, <8 x i8> undef, <8 x i32> -; VI-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v10 = shufflevector <16 x i8> undef, <16 x i8> undef, <16 x i32> -; VI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v11 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> -; VI-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v12 = shufflevector <4 x i16> undef, <4 x i16> undef, <4 x i32> -; VI-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v13 = shufflevector <8 x i16> undef, <8 x i16> undef, <8 x i32> -; VI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v14 = shufflevector <2 x i32> undef, <2 x i32> undef, <2 x i32> -; VI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v15 = shufflevector <4 x i32> undef, <4 x i32> undef, <4 x i32> -; VI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v16 = shufflevector <2 x float> undef, <2 x float> undef, <2 x i32> -; VI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v17 = shufflevector <4 x float> undef, <4 x float> undef, <4 x i32> +; VI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2i8_2 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> +; VI-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %v2i8_4 = shufflevector <2 x i8> undef, <2 x i8> undef, <4 x i32> +; VI-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4i8_4 = shufflevector <4 x i8> undef, <4 x i8> undef, <4 x i32> +; VI-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %v2i8_8 = shufflevector <2 x i8> undef, <2 x i8> undef, <8 x i32> +; VI-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %v4i8_8 = shufflevector <4 x i8> undef, <4 x i8> undef, <8 x i32> +; VI-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %v6i8_8 = shufflevector <6 x i8> undef, <6 x i8> undef, <8 x i32> +; VI-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v8i8_8 = shufflevector <8 x i8> undef, <8 x i8> undef, <8 x i32> +; VI-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v16i8_16 = shufflevector <16 x i8> undef, <16 x i8> undef, <16 x i32> +; VI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i16_2 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> +; VI-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v4i16_4 = shufflevector <4 x i16> undef, <4 x i16> undef, <4 x i32> +; VI-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v8i16_8 = shufflevector <8 x i16> undef, <8 x i16> undef, <8 x i32> +; VI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2i32_2 = shufflevector <2 x i32> undef, <2 x i32> undef, <2 x i32> +; VI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4i32_4 = shufflevector <4 x i32> undef, <4 x i32> undef, <4 x i32> +; VI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2f32_2 = shufflevector <2 x float> undef, <2 x float> undef, <2 x i32> +; VI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4f32_4 = shufflevector <4 x float> undef, <4 x float> undef, <4 x i32> +; VI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2i64_2 = shufflevector <2 x i64> undef, <2 x i64> undef, <2 x i32> +; VI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2f64_2 = shufflevector <2 x double> undef, <2 x double> undef, <2 x i32> ; VI-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void ; ; GFX9-10-SIZE-LABEL: 'shuffle' -; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v7 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> -; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8 = shufflevector <4 x i8> undef, <4 x i8> undef, <4 x i32> -; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v9 = shufflevector <8 x i8> undef, <8 x i8> undef, <8 x i32> -; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v10 = shufflevector <16 x i8> undef, <16 x i8> undef, <16 x i32> -; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v11 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> -; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v12 = shufflevector <4 x i16> undef, <4 x i16> undef, <4 x i32> -; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v13 = shufflevector <8 x i16> undef, <8 x i16> undef, <8 x i32> -; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v14 = shufflevector <2 x i32> undef, <2 x i32> undef, <2 x i32> -; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v15 = shufflevector <4 x i32> undef, <4 x i32> undef, <4 x i32> -; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v16 = shufflevector <2 x float> undef, <2 x float> undef, <2 x i32> -; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v17 = shufflevector <4 x float> undef, <4 x float> undef, <4 x i32> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2i8_2 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i8_4 = shufflevector <2 x i8> undef, <2 x i8> undef, <4 x i32> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4i8_4 = shufflevector <4 x i8> undef, <4 x i8> undef, <4 x i32> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i8_8 = shufflevector <2 x i8> undef, <2 x i8> undef, <8 x i32> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i8_8 = shufflevector <4 x i8> undef, <4 x i8> undef, <8 x i32> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v6i8_8 = shufflevector <6 x i8> undef, <6 x i8> undef, <8 x i32> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v8i8_8 = shufflevector <8 x i8> undef, <8 x i8> undef, <8 x i32> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v16i8_16 = shufflevector <16 x i8> undef, <16 x i8> undef, <16 x i32> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2i16_2 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v4i16_4 = shufflevector <4 x i16> undef, <4 x i16> undef, <4 x i32> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v8i16_8 = shufflevector <8 x i16> undef, <8 x i16> undef, <8 x i32> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2i32_2 = shufflevector <2 x i32> undef, <2 x i32> undef, <2 x i32> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4i32_4 = shufflevector <4 x i32> undef, <4 x i32> undef, <4 x i32> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2f32_2 = shufflevector <2 x float> undef, <2 x float> undef, <2 x i32> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4f32_4 = shufflevector <4 x float> undef, <4 x float> undef, <4 x i32> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2i64_2 = shufflevector <2 x i64> undef, <2 x i64> undef, <2 x i32> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2f64_2 = shufflevector <2 x double> undef, <2 x double> undef, <2 x i32> ; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; VI-SIZE-LABEL: 'shuffle' -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v7 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8 = shufflevector <4 x i8> undef, <4 x i8> undef, <4 x i32> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v9 = shufflevector <8 x i8> undef, <8 x i8> undef, <8 x i32> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v10 = shufflevector <16 x i8> undef, <16 x i8> undef, <16 x i32> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v11 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v12 = shufflevector <4 x i16> undef, <4 x i16> undef, <4 x i32> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v13 = shufflevector <8 x i16> undef, <8 x i16> undef, <8 x i32> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v14 = shufflevector <2 x i32> undef, <2 x i32> undef, <2 x i32> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v15 = shufflevector <4 x i32> undef, <4 x i32> undef, <4 x i32> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v16 = shufflevector <2 x float> undef, <2 x float> undef, <2 x i32> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v17 = shufflevector <4 x float> undef, <4 x float> undef, <4 x i32> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2i8_2 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i8_4 = shufflevector <2 x i8> undef, <2 x i8> undef, <4 x i32> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4i8_4 = shufflevector <4 x i8> undef, <4 x i8> undef, <4 x i32> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i8_8 = shufflevector <2 x i8> undef, <2 x i8> undef, <8 x i32> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i8_8 = shufflevector <4 x i8> undef, <4 x i8> undef, <8 x i32> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v6i8_8 = shufflevector <6 x i8> undef, <6 x i8> undef, <8 x i32> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v8i8_8 = shufflevector <8 x i8> undef, <8 x i8> undef, <8 x i32> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v16i8_16 = shufflevector <16 x i8> undef, <16 x i8> undef, <16 x i32> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i16_2 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v4i16_4 = shufflevector <4 x i16> undef, <4 x i16> undef, <4 x i32> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v8i16_8 = shufflevector <8 x i16> undef, <8 x i16> undef, <8 x i32> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2i32_2 = shufflevector <2 x i32> undef, <2 x i32> undef, <2 x i32> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4i32_4 = shufflevector <4 x i32> undef, <4 x i32> undef, <4 x i32> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2f32_2 = shufflevector <2 x float> undef, <2 x float> undef, <2 x i32> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4f32_4 = shufflevector <4 x float> undef, <4 x float> undef, <4 x i32> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2i64_2 = shufflevector <2 x i64> undef, <2 x i64> undef, <2 x i32> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2f64_2 = shufflevector <2 x double> undef, <2 x double> undef, <2 x i32> ; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; - %v7 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> - %v8 = shufflevector <4 x i8> undef, <4 x i8> undef, <4 x i32> - %v9 = shufflevector <8 x i8> undef, <8 x i8> undef, <8 x i32> - %v10 = shufflevector <16 x i8> undef, <16 x i8> undef, <16 x i32> - %v11 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> - %v12 = shufflevector <4 x i16> undef, <4 x i16> undef, <4 x i32> - %v13 = shufflevector <8 x i16> undef, <8 x i16> undef, <8 x i32> - %v14 = shufflevector <2 x i32> undef, <2 x i32> undef, <2 x i32> - %v15 = shufflevector <4 x i32> undef, <4 x i32> undef, <4 x i32> - %v16 = shufflevector <2 x float> undef, <2 x float> undef, <2 x i32> - %v17 = shufflevector <4 x float> undef, <4 x float> undef, <4 x i32> + %v2i8_2 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> + %v2i8_4 = shufflevector <2 x i8> undef, <2 x i8> undef, <4 x i32> + %v4i8_4 = shufflevector <4 x i8> undef, <4 x i8> undef, <4 x i32> + %v2i8_8 = shufflevector <2 x i8> undef, <2 x i8> undef, <8 x i32> + %v4i8_8 = shufflevector <4 x i8> undef, <4 x i8> undef, <8 x i32> + %v6i8_8 = shufflevector <6 x i8> undef, <6 x i8> undef, <8 x i32> + %v8i8_8 = shufflevector <8 x i8> undef, <8 x i8> undef, <8 x i32> + %v16i8_16 = shufflevector <16 x i8> undef, <16 x i8> undef, <16 x i32> + %v2i16_2 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> + %v4i16_4 = shufflevector <4 x i16> undef, <4 x i16> undef, <4 x i32> + %v8i16_8 = shufflevector <8 x i16> undef, <8 x i16> undef, <8 x i32> + %v2i32_2 = shufflevector <2 x i32> undef, <2 x i32> undef, <2 x i32> + %v4i32_4 = shufflevector <4 x i32> undef, <4 x i32> undef, <4 x i32> + %v2f32_2 = shufflevector <2 x float> undef, <2 x float> undef, <2 x i32> + %v4f32_4 = shufflevector <4 x float> undef, <4 x float> undef, <4 x i32> + %v2i64_2 = shufflevector <2 x i64> undef, <2 x i64> undef, <2 x i32> + %v2f64_2 = shufflevector <2 x double> undef, <2 x double> undef, <2 x i32> ret void } -- 2.7.4