From 868351f894c3e4b36254efc3b09f20d1d479a8e3 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Mon, 26 Jun 2023 16:50:03 +0100 Subject: [PATCH] [X86] combineMul - ensure getTargetConstantFromNode splat extraction is the correct element width The extracted Constant and Constant::getSplatValue can both be any bitwidth - they don't necessarily match the original ConstantSDNode type Fixes #63507 --- llvm/lib/Target/X86/X86ISelLowering.cpp | 2 +- llvm/test/CodeGen/X86/pr63507.ll | 17 +++++++++++++++++ 2 files changed, 18 insertions(+), 1 deletion(-) create mode 100644 llvm/test/CodeGen/X86/pr63507.ll diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 888a277..b59bcc4 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -48766,7 +48766,7 @@ static SDValue combineMul(SDNode *N, SelectionDAG &DAG, if (auto *SplatC = RawC->getSplatValue()) C = &(SplatC->getUniqueInteger()); - if (!C) + if (!C || C->getBitWidth() != VT.getScalarSizeInBits()) return SDValue(); } else { C = &(CNode->getAPIntValue()); diff --git a/llvm/test/CodeGen/X86/pr63507.ll b/llvm/test/CodeGen/X86/pr63507.ll new file mode 100644 index 0000000..4016f1c --- /dev/null +++ b/llvm/test/CodeGen/X86/pr63507.ll @@ -0,0 +1,17 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 +; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx2 | FileCheck %s + +define <4 x i32> @PR63507() { +; CHECK-LABEL: PR63507: +; CHECK: # %bb.0: +; CHECK-NEXT: vpbroadcastq {{.*#+}} xmm0 = [4294967295,4294967295] +; CHECK-NEXT: vpmulld %xmm0, %xmm0, %xmm0 +; CHECK-NEXT: retq + %psll.i = tail call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> zeroinitializer, <4 x i32> zeroinitializer) + %cmp.i = icmp eq <4 x i32> %psll.i, zeroinitializer + %sext.i = sext <4 x i1> %cmp.i to <4 x i32> + %shuffle.i101 = shufflevector <4 x i32> %sext.i, <4 x i32> zeroinitializer, <4 x i32> + %mul.i = mul <4 x i32> %shuffle.i101, %shuffle.i101 + ret <4 x i32> %mul.i +} +declare <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32>, <4 x i32>) -- 2.7.4