From e7f261116095a5124d23d6068099b91ff15c5c2e Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Thu, 29 Sep 2016 05:54:39 +0000 Subject: [PATCH] [X86] Add EVEX encoded VBROADCASTSS/SD and VPBROADCASTD/Q to execution domain fixing table. llvm-svn: 282687 --- llvm/lib/Target/X86/X86InstrInfo.cpp | 10 ++++ llvm/test/CodeGen/X86/avx512-bugfix-25270.ll | 8 ++-- .../CodeGen/X86/avx512-intrinsics-fast-isel.ll | 8 ++-- llvm/test/CodeGen/X86/avx512-vbroadcast.ll | 6 +-- .../CodeGen/X86/avx512vl-intrinsics-fast-isel.ll | 12 ++--- llvm/test/CodeGen/X86/masked_gather_scatter.ll | 4 +- llvm/test/CodeGen/X86/vec_fabs.ll | 4 +- llvm/test/CodeGen/X86/vector-shuffle-256-v4.ll | 56 +++++----------------- llvm/test/CodeGen/X86/vector-shuffle-512-v16.ll | 2 +- llvm/test/CodeGen/X86/vector-shuffle-512-v8.ll | 4 +- .../X86/vector-shuffle-combining-avx512bw.ll | 6 +-- 11 files changed, 50 insertions(+), 70 deletions(-) diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp index 6976a6f..a6e80d4 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.cpp +++ b/llvm/lib/Target/X86/X86InstrInfo.cpp @@ -7512,6 +7512,16 @@ static const uint16_t ReplaceableInstrs[][3] = { { X86::VMOVNTPSZ128mr, X86::VMOVNTPDZ128mr, X86::VMOVNTDQZ128mr }, { X86::VMOVNTPSZ128mr, X86::VMOVNTPDZ128mr, X86::VMOVNTDQZ128mr }, { X86::VMOVNTPSZmr, X86::VMOVNTPDZmr, X86::VMOVNTDQZmr }, + { X86::VBROADCASTSSZ128r, X86::VBROADCASTSSZ128r, X86::VPBROADCASTDZ128r }, + { X86::VBROADCASTSSZ128m, X86::VBROADCASTSSZ128m, X86::VPBROADCASTDZ128m }, + { X86::VBROADCASTSSZ256r, X86::VBROADCASTSSZ256r, X86::VPBROADCASTDZ256r }, + { X86::VBROADCASTSSZ256m, X86::VBROADCASTSSZ256m, X86::VPBROADCASTDZ256m }, + { X86::VBROADCASTSSZr, X86::VBROADCASTSSZr, X86::VPBROADCASTDZr }, + { X86::VBROADCASTSSZm, X86::VBROADCASTSSZm, X86::VPBROADCASTDZm }, + { X86::VBROADCASTSDZ256r, X86::VBROADCASTSDZ256r, X86::VPBROADCASTQZ256r }, + { X86::VBROADCASTSDZ256m, X86::VBROADCASTSDZ256m, X86::VPBROADCASTQZ256m }, + { X86::VBROADCASTSDZr, X86::VBROADCASTSDZr, X86::VPBROADCASTQZr }, + { X86::VBROADCASTSDZm, X86::VBROADCASTSDZm, X86::VPBROADCASTQZm }, }; static const uint16_t ReplaceableInstrsAVX2[][3] = { diff --git a/llvm/test/CodeGen/X86/avx512-bugfix-25270.ll b/llvm/test/CodeGen/X86/avx512-bugfix-25270.ll index 7e8b521..47384fa 100644 --- a/llvm/test/CodeGen/X86/avx512-bugfix-25270.ll +++ b/llvm/test/CodeGen/X86/avx512-bugfix-25270.ll @@ -11,13 +11,13 @@ define void @bar__512(<16 x i32>* %var) #0 { ; CHECK-NEXT: movq %rdi, %rbx ; CHECK-NEXT: vmovups (%rbx), %zmm0 ; CHECK-NEXT: vmovups %zmm0, (%rsp) ## 64-byte Spill -; CHECK-NEXT: vpbroadcastd {{.*}}(%rip), %zmm1 -; CHECK-NEXT: vmovdqa32 %zmm1, (%rbx) +; CHECK-NEXT: vbroadcastss {{.*}}(%rip), %zmm1 +; CHECK-NEXT: vmovaps %zmm1, (%rbx) ; CHECK-NEXT: callq _Print__512 ; CHECK-NEXT: vmovups (%rsp), %zmm0 ## 64-byte Reload ; CHECK-NEXT: callq _Print__512 -; CHECK-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 -; CHECK-NEXT: vmovdqa32 %zmm0, (%rbx) +; CHECK-NEXT: vbroadcastss {{.*}}(%rip), %zmm0 +; CHECK-NEXT: vmovaps %zmm0, (%rbx) ; CHECK-NEXT: addq $112, %rsp ; CHECK-NEXT: popq %rbx ; CHECK-NEXT: retq diff --git a/llvm/test/CodeGen/X86/avx512-intrinsics-fast-isel.ll b/llvm/test/CodeGen/X86/avx512-intrinsics-fast-isel.ll index 7a0424b..2d4bf6e 100644 --- a/llvm/test/CodeGen/X86/avx512-intrinsics-fast-isel.ll +++ b/llvm/test/CodeGen/X86/avx512-intrinsics-fast-isel.ll @@ -7,12 +7,12 @@ define <8 x i64> @test_mm512_broadcastd_epi32(<2 x i64> %a0) { ; X32-LABEL: test_mm512_broadcastd_epi32: ; X32: # BB#0: -; X32-NEXT: vpbroadcastd %xmm0, %zmm0 +; X32-NEXT: vbroadcastss %xmm0, %zmm0 ; X32-NEXT: retl ; ; X64-LABEL: test_mm512_broadcastd_epi32: ; X64: # BB#0: -; X64-NEXT: vpbroadcastd %xmm0, %zmm0 +; X64-NEXT: vbroadcastss %xmm0, %zmm0 ; X64-NEXT: retq %arg0 = bitcast <2 x i64> %a0 to <4 x i32> %res0 = shufflevector <4 x i32> %arg0, <4 x i32> undef, <16 x i32> zeroinitializer @@ -66,12 +66,12 @@ define <8 x i64> @test_mm512_maskz_broadcastd_epi32(i16 %a0, <2 x i64> %a1) { define <8 x i64> @test_mm512_broadcastq_epi64(<2 x i64> %a0) { ; X32-LABEL: test_mm512_broadcastq_epi64: ; X32: # BB#0: -; X32-NEXT: vpbroadcastq %xmm0, %zmm0 +; X32-NEXT: vbroadcastsd %xmm0, %zmm0 ; X32-NEXT: retl ; ; X64-LABEL: test_mm512_broadcastq_epi64: ; X64: # BB#0: -; X64-NEXT: vpbroadcastq %xmm0, %zmm0 +; X64-NEXT: vbroadcastsd %xmm0, %zmm0 ; X64-NEXT: retq %res = shufflevector <2 x i64> %a0, <2 x i64> undef, <8 x i32> zeroinitializer ret <8 x i64> %res diff --git a/llvm/test/CodeGen/X86/avx512-vbroadcast.ll b/llvm/test/CodeGen/X86/avx512-vbroadcast.ll index 3cb9ea2..8d02173 100644 --- a/llvm/test/CodeGen/X86/avx512-vbroadcast.ll +++ b/llvm/test/CodeGen/X86/avx512-vbroadcast.ll @@ -198,7 +198,7 @@ define <8 x double> @_sd8xdouble_maskz_load(double* %a.ptr, <8 x i32> %mask1) define <16 x i32> @_xmm16xi32(<16 x i32> %a) { ; ALL-LABEL: _xmm16xi32: ; ALL: # BB#0: -; ALL-NEXT: vpbroadcastd %xmm0, %zmm0 +; ALL-NEXT: vbroadcastss %xmm0, %zmm0 ; ALL-NEXT: retq %b = shufflevector <16 x i32> %a, <16 x i32> undef, <16 x i32> zeroinitializer ret <16 x i32> %b @@ -388,7 +388,7 @@ define <32 x i16> @_invec16xi16(<16 x i16>%a) { define <16 x i32> @_invec8xi32(<8 x i32>%a) { ; ALL-LABEL: _invec8xi32: ; ALL: # BB#0: -; ALL-NEXT: vpbroadcastd %xmm0, %zmm0 +; ALL-NEXT: vbroadcastss %xmm0, %zmm0 ; ALL-NEXT: retq %res = shufflevector <8 x i32> %a, <8 x i32> undef, <16 x i32> zeroinitializer ret <16 x i32>%res @@ -397,7 +397,7 @@ define <16 x i32> @_invec8xi32(<8 x i32>%a) { define <8 x i64> @_invec4xi64(<4 x i64>%a) { ; ALL-LABEL: _invec4xi64: ; ALL: # BB#0: -; ALL-NEXT: vpbroadcastq %xmm0, %zmm0 +; ALL-NEXT: vbroadcastsd %xmm0, %zmm0 ; ALL-NEXT: retq %res = shufflevector <4 x i64> %a, <4 x i64> undef, <8 x i32> zeroinitializer ret <8 x i64>%res diff --git a/llvm/test/CodeGen/X86/avx512vl-intrinsics-fast-isel.ll b/llvm/test/CodeGen/X86/avx512vl-intrinsics-fast-isel.ll index 310ed8f..ce61139 100644 --- a/llvm/test/CodeGen/X86/avx512vl-intrinsics-fast-isel.ll +++ b/llvm/test/CodeGen/X86/avx512vl-intrinsics-fast-isel.ll @@ -7,12 +7,12 @@ define <2 x i64> @test_mm_broadcastd_epi32(<2 x i64> %a0) { ; X32-LABEL: test_mm_broadcastd_epi32: ; X32: # BB#0: -; X32-NEXT: vpbroadcastd %xmm0, %xmm0 +; X32-NEXT: vbroadcastss %xmm0, %xmm0 ; X32-NEXT: retl ; ; X64-LABEL: test_mm_broadcastd_epi32: ; X64: # BB#0: -; X64-NEXT: vpbroadcastd %xmm0, %xmm0 +; X64-NEXT: vbroadcastss %xmm0, %xmm0 ; X64-NEXT: retq %arg0 = bitcast <2 x i64> %a0 to <4 x i32> %res0 = shufflevector <4 x i32> %arg0, <4 x i32> undef, <4 x i32> zeroinitializer @@ -88,12 +88,12 @@ define <2 x i64> @test_mm_maskz_broadcastd_epi32(i8 %a0, <2 x i64> %a1) { define <4 x i64> @test_mm256_broadcastd_epi32(<2 x i64> %a0) { ; X32-LABEL: test_mm256_broadcastd_epi32: ; X32: # BB#0: -; X32-NEXT: vpbroadcastd %xmm0, %ymm0 +; X32-NEXT: vbroadcastss %xmm0, %ymm0 ; X32-NEXT: retl ; ; X64-LABEL: test_mm256_broadcastd_epi32: ; X64: # BB#0: -; X64-NEXT: vpbroadcastd %xmm0, %ymm0 +; X64-NEXT: vbroadcastss %xmm0, %ymm0 ; X64-NEXT: retq %arg0 = bitcast <2 x i64> %a0 to <4 x i32> %res0 = shufflevector <4 x i32> %arg0, <4 x i32> undef, <8 x i32> zeroinitializer @@ -221,12 +221,12 @@ define <2 x i64> @test_mm_maskz_broadcastq_epi64(i8 %a0, <2 x i64> %a1) { define <4 x i64> @test_mm256_broadcastq_epi64(<2 x i64> %a0) { ; X32-LABEL: test_mm256_broadcastq_epi64: ; X32: # BB#0: -; X32-NEXT: vpbroadcastq %xmm0, %ymm0 +; X32-NEXT: vbroadcastsd %xmm0, %ymm0 ; X32-NEXT: retl ; ; X64-LABEL: test_mm256_broadcastq_epi64: ; X64: # BB#0: -; X64-NEXT: vpbroadcastq %xmm0, %ymm0 +; X64-NEXT: vbroadcastsd %xmm0, %ymm0 ; X64-NEXT: retq %res = shufflevector <2 x i64> %a0, <2 x i64> undef, <4 x i32> zeroinitializer ret <4 x i64> %res diff --git a/llvm/test/CodeGen/X86/masked_gather_scatter.ll b/llvm/test/CodeGen/X86/masked_gather_scatter.ll index 29352bb..68b1ebc 100644 --- a/llvm/test/CodeGen/X86/masked_gather_scatter.ll +++ b/llvm/test/CodeGen/X86/masked_gather_scatter.ll @@ -613,7 +613,7 @@ define <16 x float> @test11(float* %base, i32 %ind) { ; KNL_32-LABEL: test11: ; KNL_32: # BB#0: ; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax -; KNL_32-NEXT: vpbroadcastd {{[0-9]+}}(%esp), %zmm1 +; KNL_32-NEXT: vbroadcastss {{[0-9]+}}(%esp), %zmm1 ; KNL_32-NEXT: kxnorw %k0, %k0, %k1 ; KNL_32-NEXT: vgatherdps (%eax,%zmm1,4), %zmm0 {%k1} ; KNL_32-NEXT: retl @@ -628,7 +628,7 @@ define <16 x float> @test11(float* %base, i32 %ind) { ; SKX_32-LABEL: test11: ; SKX_32: # BB#0: ; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %eax -; SKX_32-NEXT: vpbroadcastd {{[0-9]+}}(%esp), %zmm1 +; SKX_32-NEXT: vbroadcastss {{[0-9]+}}(%esp), %zmm1 ; SKX_32-NEXT: kxnorw %k0, %k0, %k1 ; SKX_32-NEXT: vgatherdps (%eax,%zmm1,4), %zmm0 {%k1} ; SKX_32-NEXT: retl diff --git a/llvm/test/CodeGen/X86/vec_fabs.ll b/llvm/test/CodeGen/X86/vec_fabs.ll index 35d6da9..15941f7 100644 --- a/llvm/test/CodeGen/X86/vec_fabs.ll +++ b/llvm/test/CodeGen/X86/vec_fabs.ll @@ -70,7 +70,7 @@ define <4 x double> @fabs_v4f64(<4 x double> %p) { ; X32_AVX512VL-LABEL: fabs_v4f64: ; X32_AVX512VL: # BB#0: ; X32_AVX512VL-NEXT: vbroadcastsd {{\.LCPI.*}}, %ymm1 -; X32_AVX512VL-NEXT: vandpd %ymm1, %ymm0, %ymm0 +; X32_AVX512VL-NEXT: vandps %ymm1, %ymm0, %ymm0 ; X32_AVX512VL-NEXT: retl ; ; X32_AVX512VLDQ-LABEL: fabs_v4f64: @@ -86,7 +86,7 @@ define <4 x double> @fabs_v4f64(<4 x double> %p) { ; X64_AVX512VL-LABEL: fabs_v4f64: ; X64_AVX512VL: # BB#0: ; X64_AVX512VL-NEXT: vbroadcastsd {{.*}}(%rip), %ymm1 -; X64_AVX512VL-NEXT: vandpd %ymm1, %ymm0, %ymm0 +; X64_AVX512VL-NEXT: vandps %ymm1, %ymm0, %ymm0 ; X64_AVX512VL-NEXT: retq ; ; X64_AVX512VLDQ-LABEL: fabs_v4f64: diff --git a/llvm/test/CodeGen/X86/vector-shuffle-256-v4.ll b/llvm/test/CodeGen/X86/vector-shuffle-256-v4.ll index 7c554bbe..4dc72c1 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-256-v4.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-256-v4.ll @@ -524,7 +524,7 @@ define <4 x i64> @shuffle_v4i64_0000(<4 x i64> %a, <4 x i64> %b) { ; ; AVX512VL-LABEL: shuffle_v4i64_0000: ; AVX512VL: # BB#0: -; AVX512VL-NEXT: vpbroadcastq %xmm0, %ymm0 +; AVX512VL-NEXT: vbroadcastsd %xmm0, %ymm0 ; AVX512VL-NEXT: retq %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> ret <4 x i64> %shuffle @@ -1244,20 +1244,10 @@ define <4 x double> @splat_mem_v4f64(double* %ptr) { } define <4 x i64> @splat_mem_v4i64(i64* %ptr) { -; AVX1-LABEL: splat_mem_v4i64: -; AVX1: # BB#0: -; AVX1-NEXT: vbroadcastsd (%rdi), %ymm0 -; AVX1-NEXT: retq -; -; AVX2-LABEL: splat_mem_v4i64: -; AVX2: # BB#0: -; AVX2-NEXT: vbroadcastsd (%rdi), %ymm0 -; AVX2-NEXT: retq -; -; AVX512VL-LABEL: splat_mem_v4i64: -; AVX512VL: # BB#0: -; AVX512VL-NEXT: vpbroadcastq (%rdi), %ymm0 -; AVX512VL-NEXT: retq +; ALL-LABEL: splat_mem_v4i64: +; ALL: # BB#0: +; ALL-NEXT: vbroadcastsd (%rdi), %ymm0 +; ALL-NEXT: retq %a = load i64, i64* %ptr %v = insertelement <4 x i64> undef, i64 %a, i64 0 %shuffle = shufflevector <4 x i64> %v, <4 x i64> undef, <4 x i32> @@ -1296,20 +1286,10 @@ define <4 x double> @splat_v4f64(<2 x double> %r) { } define <4 x i64> @splat_mem_v4i64_from_v2i64(<2 x i64>* %ptr) { -; AVX1-LABEL: splat_mem_v4i64_from_v2i64: -; AVX1: # BB#0: -; AVX1-NEXT: vbroadcastsd (%rdi), %ymm0 -; AVX1-NEXT: retq -; -; AVX2-LABEL: splat_mem_v4i64_from_v2i64: -; AVX2: # BB#0: -; AVX2-NEXT: vbroadcastsd (%rdi), %ymm0 -; AVX2-NEXT: retq -; -; AVX512VL-LABEL: splat_mem_v4i64_from_v2i64: -; AVX512VL: # BB#0: -; AVX512VL-NEXT: vpbroadcastq (%rdi), %ymm0 -; AVX512VL-NEXT: retq +; ALL-LABEL: splat_mem_v4i64_from_v2i64: +; ALL: # BB#0: +; ALL-NEXT: vbroadcastsd (%rdi), %ymm0 +; ALL-NEXT: retq %v = load <2 x i64>, <2 x i64>* %ptr %shuffle = shufflevector <2 x i64> %v, <2 x i64> undef, <4 x i32> ret <4 x i64> %shuffle @@ -1457,20 +1437,10 @@ define <4 x i64> @concat_v4i64_0145_bc(<4 x i64> %a0, <4 x i64> %a1) { } define <4 x i64> @insert_dup_mem_v4i64(i64* %ptr) { -; AVX1-LABEL: insert_dup_mem_v4i64: -; AVX1: # BB#0: -; AVX1-NEXT: vbroadcastsd (%rdi), %ymm0 -; AVX1-NEXT: retq -; -; AVX2-LABEL: insert_dup_mem_v4i64: -; AVX2: # BB#0: -; AVX2-NEXT: vbroadcastsd (%rdi), %ymm0 -; AVX2-NEXT: retq -; -; AVX512VL-LABEL: insert_dup_mem_v4i64: -; AVX512VL: # BB#0: -; AVX512VL-NEXT: vpbroadcastq (%rdi), %ymm0 -; AVX512VL-NEXT: retq +; ALL-LABEL: insert_dup_mem_v4i64: +; ALL: # BB#0: +; ALL-NEXT: vbroadcastsd (%rdi), %ymm0 +; ALL-NEXT: retq %tmp = load i64, i64* %ptr, align 1 %tmp1 = insertelement <2 x i64> undef, i64 %tmp, i32 0 %tmp2 = shufflevector <2 x i64> %tmp1, <2 x i64> undef, <4 x i32> zeroinitializer diff --git a/llvm/test/CodeGen/X86/vector-shuffle-512-v16.ll b/llvm/test/CodeGen/X86/vector-shuffle-512-v16.ll index 321543f..fd5c367 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-512-v16.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-512-v16.ll @@ -137,7 +137,7 @@ define <16 x float> @shuffle_v16f32_03_uu_uu_uu_uu_04_uu_uu_uu_uu_11_uu_uu_uu_uu define <16 x i32> @shuffle_v16i32_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00(<16 x i32> %a, <16 x i32> %b) { ; ALL-LABEL: shuffle_v16i32_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: ; ALL: # BB#0: -; ALL-NEXT: vpbroadcastd %xmm0, %zmm0 +; ALL-NEXT: vbroadcastss %xmm0, %zmm0 ; ALL-NEXT: retq %shuffle = shufflevector <16 x i32> %a, <16 x i32> %b, <16 x i32> ret <16 x i32> %shuffle diff --git a/llvm/test/CodeGen/X86/vector-shuffle-512-v8.ll b/llvm/test/CodeGen/X86/vector-shuffle-512-v8.ll index b1de0656..fbe414b 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-512-v8.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-512-v8.ll @@ -962,12 +962,12 @@ define <8 x i64> @shuffle_v8i64_00000000(<8 x i64> %a, <8 x i64> %b) { ; ; AVX512F-LABEL: shuffle_v8i64_00000000: ; AVX512F: # BB#0: -; AVX512F-NEXT: vpbroadcastq %xmm0, %zmm0 +; AVX512F-NEXT: vbroadcastsd %xmm0, %zmm0 ; AVX512F-NEXT: retq ; ; AVX512F-32-LABEL: shuffle_v8i64_00000000: ; AVX512F-32: # BB#0: -; AVX512F-32-NEXT: vpbroadcastq %xmm0, %zmm0 +; AVX512F-32-NEXT: vbroadcastsd %xmm0, %zmm0 ; AVX512F-32-NEXT: retl %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> ret <8 x i64> %shuffle diff --git a/llvm/test/CodeGen/X86/vector-shuffle-combining-avx512bw.ll b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx512bw.ll index 3f5608f..14a870f 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-combining-avx512bw.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx512bw.ll @@ -642,12 +642,12 @@ define <32 x i16> @combine_permvar_as_vpbroadcastw512(<32 x i16> %x0) { define <16 x i32> @combine_permvar_as_vpbroadcastd512(<16 x i32> %x0) { ; X32-LABEL: combine_permvar_as_vpbroadcastd512: ; X32: # BB#0: -; X32-NEXT: vpbroadcastd %xmm0, %zmm0 +; X32-NEXT: vbroadcastss %xmm0, %zmm0 ; X32-NEXT: retl ; ; X64-LABEL: combine_permvar_as_vpbroadcastd512: ; X64: # BB#0: -; X64-NEXT: vpbroadcastd %xmm0, %zmm0 +; X64-NEXT: vbroadcastss %xmm0, %zmm0 ; X64-NEXT: retq %1 = call <16 x i32> @llvm.x86.avx512.mask.permvar.si.512(<16 x i32> %x0, <16 x i32> zeroinitializer, <16 x i32> undef, i16 -1) ret <16 x i32> %1 @@ -662,7 +662,7 @@ define <8 x i64> @combine_permvar_as_vpbroadcastq512(<8 x i64> %x0) { ; ; X64-LABEL: combine_permvar_as_vpbroadcastq512: ; X64: # BB#0: -; X64-NEXT: vpbroadcastq %xmm0, %zmm0 +; X64-NEXT: vbroadcastsd %xmm0, %zmm0 ; X64-NEXT: retq %1 = call <8 x i64> @llvm.x86.avx512.mask.permvar.di.512(<8 x i64> %x0, <8 x i64> zeroinitializer, <8 x i64> undef, i8 -1) ret <8 x i64> %1 -- 2.7.4