From 7a112356e4a19c0e63f533548fa95a03a5537fb9 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Sat, 26 Jun 2021 19:02:26 -0700 Subject: [PATCH] [X86] Correct the conversion of VALIGND/Q intrinsics to shufflevector. We need to mask the immediate to the width of a single vector rather than 2 vectors. If we use the width of 2 vectors then any shift larger than the length of 1 vector is going to overflow the shuffle indices. Fixes PR50895. --- clang/lib/CodeGen/CGBuiltin.cpp | 4 ++-- clang/test/CodeGen/X86/avx512vl-builtins.c | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index e8ad4e0..97f2db9 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -13376,8 +13376,8 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, cast(Ops[0]->getType())->getNumElements(); unsigned ShiftVal = cast(Ops[2])->getZExtValue() & 0xff; - // Mask the shift amount to width of two vectors. - ShiftVal &= (2 * NumElts) - 1; + // Mask the shift amount to width of a vector. + ShiftVal &= NumElts - 1; int Indices[16]; for (unsigned i = 0; i != NumElts; ++i) diff --git a/clang/test/CodeGen/X86/avx512vl-builtins.c b/clang/test/CodeGen/X86/avx512vl-builtins.c index ef27542..4a72264 100644 --- a/clang/test/CodeGen/X86/avx512vl-builtins.c +++ b/clang/test/CodeGen/X86/avx512vl-builtins.c @@ -9426,7 +9426,7 @@ __m128i test_mm_mask_alignr_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128 // CHECK-LABEL: @test_mm_mask_alignr_epi32 // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} - return _mm_mask_alignr_epi32(__W, __U, __A, __B, 1); + return _mm_mask_alignr_epi32(__W, __U, __A, __B, 5); } __m128i test_mm_maskz_alignr_epi32(__mmask8 __U, __m128i __A, __m128i __B) { @@ -9446,7 +9446,7 @@ __m256i test_mm256_mask_alignr_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m // CHECK-LABEL: @test_mm256_mask_alignr_epi32 // CHECK: shufflevector <8 x i32> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}} - return _mm256_mask_alignr_epi32(__W, __U, __A, __B, 1); + return _mm256_mask_alignr_epi32(__W, __U, __A, __B, 9); } __m256i test_mm256_maskz_alignr_epi32(__mmask8 __U, __m256i __A, __m256i __B) { @@ -9466,7 +9466,7 @@ __m128i test_mm_mask_alignr_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128 // CHECK-LABEL: @test_mm_mask_alignr_epi64 // CHECK: shufflevector <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i32> // CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}} - return _mm_mask_alignr_epi64(__W, __U, __A, __B, 1); + return _mm_mask_alignr_epi64(__W, __U, __A, __B, 3); } __m128i test_mm_maskz_alignr_epi64(__mmask8 __U, __m128i __A, __m128i __B) { @@ -9486,7 +9486,7 @@ __m256i test_mm256_mask_alignr_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m // CHECK-LABEL: @test_mm256_mask_alignr_epi64 // CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> %{{.*}}, <4 x i32> // CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}} - return _mm256_mask_alignr_epi64(__W, __U, __A, __B, 1); + return _mm256_mask_alignr_epi64(__W, __U, __A, __B, 5); } __m256i test_mm256_maskz_alignr_epi64(__mmask8 __U, __m256i __A, __m256i __B) { -- 2.7.4