From: Sanjay Patel Date: Tue, 10 May 2022 18:20:43 +0000 (-0400) Subject: [InstCombine] fold shuffles with FP<->Int cast operands X-Git-Tag: upstream/15.0.7~8105 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=0353c2c996c5863463c356de97c9852f9330ed11;p=platform%2Fupstream%2Fllvm.git [InstCombine] fold shuffles with FP<->Int cast operands shuffle (cast X), (cast Y), Mask --> cast (shuffle X, Y, Mask) This is similar to a recent transform with fneg ( b331a7ebc1e0 ), but this is intentionally the most conservative first step to try to avoid regressions in codegen. There are several restrictions that could be removed as follow-up enhancements. Note that a cast with a unary shuffle is currently canonicalized in the other direction (shuffle after cast - D103038 ). We might want to invert that to be consistent with this patch. --- diff --git a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp index 8908739..1e81655 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp @@ -2275,6 +2275,54 @@ static Instruction *foldFNegShuffle(ShuffleVectorInst &Shuf, return nullptr; } +/// Canonicalize casts after shuffle. +static Instruction *foldCastShuffle(ShuffleVectorInst &Shuf, + InstCombiner::BuilderTy &Builder) { + // Do we have 2 matching cast operands? + auto *Cast0 = dyn_cast(Shuf.getOperand(0)); + auto *Cast1 = dyn_cast(Shuf.getOperand(1)); + if (!Cast0 || !Cast1 || Cast0->getOpcode() != Cast1->getOpcode() || + Cast0->getSrcTy() != Cast1->getSrcTy()) + return nullptr; + + // TODO: Allow other opcodes? That would require easing the type restrictions + // below here. + CastInst::CastOps CastOpcode = Cast0->getOpcode(); + switch (CastOpcode) { + case Instruction::FPToSI: + case Instruction::FPToUI: + case Instruction::SIToFP: + case Instruction::UIToFP: + break; + default: + return nullptr; + } + + VectorType *ShufTy = Shuf.getType(); + VectorType *ShufOpTy = cast(Shuf.getOperand(0)->getType()); + VectorType *CastSrcTy = cast(Cast0->getSrcTy()); + + // TODO: Allow length-changing shuffles? + if (ShufTy != ShufOpTy) + return nullptr; + + // TODO: Allow element-size-changing casts? + assert(isa(CastSrcTy) && isa(ShufOpTy) && + "Expected fixed vector operands for casts and binary shuffle"); + if (CastSrcTy->getPrimitiveSizeInBits() != ShufOpTy->getPrimitiveSizeInBits()) + return nullptr; + + // At least one of the operands must have only one use (the shuffle). + if (!Cast0->hasOneUse() && !Cast1->hasOneUse()) + return nullptr; + + // shuffle (cast X), (cast Y), Mask --> cast (shuffle X, Y, Mask) + Value *X = Cast0->getOperand(0); + Value *Y = Cast1->getOperand(0); + Value *NewShuf = Builder.CreateShuffleVector(X, Y, Shuf.getShuffleMask()); + return CastInst::Create(CastOpcode, NewShuf, ShufTy); +} + /// Try to fold an extract subvector operation. static Instruction *foldIdentityExtractShuffle(ShuffleVectorInst &Shuf) { Value *Op0 = Shuf.getOperand(0), *Op1 = Shuf.getOperand(1); @@ -2573,6 +2621,9 @@ Instruction *InstCombinerImpl::visitShuffleVectorInst(ShuffleVectorInst &SVI) { if (Instruction *I = foldFNegShuffle(SVI, Builder)) return I; + if (Instruction *I = foldCastShuffle(SVI, Builder)) + return I; + APInt UndefElts(VWidth, 0); APInt AllOnesEltMask(APInt::getAllOnes(VWidth)); if (Value *V = SimplifyDemandedVectorElts(&SVI, AllOnesEltMask, UndefElts)) { diff --git a/llvm/test/Transforms/InstCombine/vec_shuffle.ll b/llvm/test/Transforms/InstCombine/vec_shuffle.ll index 47ed28d..746966a 100644 --- a/llvm/test/Transforms/InstCombine/vec_shuffle.ll +++ b/llvm/test/Transforms/InstCombine/vec_shuffle.ll @@ -2038,9 +2038,8 @@ define <2 x half> @uitofp_unary_shuf_narrow_narrow_elts(<4 x i32> %x) { define <4 x i32> @fptosi_shuf(<4 x float> %x, <4 x float> %y) { ; CHECK-LABEL: @fptosi_shuf( -; CHECK-NEXT: [[NX:%.*]] = fptosi <4 x float> [[X:%.*]] to <4 x i32> -; CHECK-NEXT: [[NY:%.*]] = fptosi <4 x float> [[Y:%.*]] to <4 x i32> -; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i32> [[NX]], <4 x i32> [[NY]], <4 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[X:%.*]], <4 x float> [[Y:%.*]], <4 x i32> +; CHECK-NEXT: [[R:%.*]] = fptosi <4 x float> [[TMP1]] to <4 x i32> ; CHECK-NEXT: ret <4 x i32> [[R]] ; %nx = fptosi <4 x float> %x to <4 x i32> @@ -2051,9 +2050,8 @@ define <4 x i32> @fptosi_shuf(<4 x float> %x, <4 x float> %y) { define <3 x i16> @fptoui_shuf(<3 x half> %x, <3 x half> %y) { ; CHECK-LABEL: @fptoui_shuf( -; CHECK-NEXT: [[NX:%.*]] = fptoui <3 x half> [[X:%.*]] to <3 x i16> -; CHECK-NEXT: [[NY:%.*]] = fptoui <3 x half> [[Y:%.*]] to <3 x i16> -; CHECK-NEXT: [[R:%.*]] = shufflevector <3 x i16> [[NX]], <3 x i16> [[NY]], <3 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <3 x half> [[X:%.*]], <3 x half> [[Y:%.*]], <3 x i32> +; CHECK-NEXT: [[R:%.*]] = fptoui <3 x half> [[TMP1]] to <3 x i16> ; CHECK-NEXT: ret <3 x i16> [[R]] ; %nx = fptoui <3 x half> %x to <3 x i16> @@ -2062,6 +2060,8 @@ define <3 x i16> @fptoui_shuf(<3 x half> %x, <3 x half> %y) { ret <3 x i16> %r } +; negative test - must have same source types + define <3 x i16> @fptoui_shuf_different_source_types(<3 x float> %x, <3 x half> %y) { ; CHECK-LABEL: @fptoui_shuf_different_source_types( ; CHECK-NEXT: [[NX:%.*]] = fptoui <3 x float> [[X:%.*]] to <3 x i16> @@ -2075,6 +2075,8 @@ define <3 x i16> @fptoui_shuf_different_source_types(<3 x float> %x, <3 x half> ret <3 x i16> %r } +; negative test - must have same size elements + define <4 x i32> @fptoui_shuf_widen_elts(<4 x half> %x, <4 x half> %y) { ; CHECK-LABEL: @fptoui_shuf_widen_elts( ; CHECK-NEXT: [[NX:%.*]] = fptosi <4 x half> [[X:%.*]] to <4 x i32> @@ -2088,6 +2090,8 @@ define <4 x i32> @fptoui_shuf_widen_elts(<4 x half> %x, <4 x half> %y) { ret <4 x i32> %r } +; negative test - must have same size elements + define <4 x float> @sitofp_shuf_narrow_elts(<4 x i64> %x, <4 x i64> %y) { ; CHECK-LABEL: @sitofp_shuf_narrow_elts( ; CHECK-NEXT: [[NX:%.*]] = sitofp <4 x i64> [[X:%.*]] to <4 x float> @@ -2101,12 +2105,14 @@ define <4 x float> @sitofp_shuf_narrow_elts(<4 x i64> %x, <4 x i64> %y) { ret <4 x float> %r } +; one extra use is ok + define <4 x float> @uitofp_shuf_extra_use1(<4 x i32> %x, <4 x i32> %y) { ; CHECK-LABEL: @uitofp_shuf_extra_use1( ; CHECK-NEXT: [[NX:%.*]] = uitofp <4 x i32> [[X:%.*]] to <4 x float> ; CHECK-NEXT: call void @use4(<4 x float> [[NX]]) -; CHECK-NEXT: [[NY:%.*]] = uitofp <4 x i32> [[Y:%.*]] to <4 x float> -; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[NX]], <4 x float> [[NY]], <4 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[X]], <4 x i32> [[Y:%.*]], <4 x i32> +; CHECK-NEXT: [[R:%.*]] = uitofp <4 x i32> [[TMP1]] to <4 x float> ; CHECK-NEXT: ret <4 x float> [[R]] ; %nx = uitofp <4 x i32> %x to <4 x float> @@ -2116,12 +2122,14 @@ define <4 x float> @uitofp_shuf_extra_use1(<4 x i32> %x, <4 x i32> %y) { ret <4 x float> %r } +; one extra use is ok + define <4 x float> @sitofp_shuf_extra_use2(<4 x i32> %x, <4 x i32> %y) { ; CHECK-LABEL: @sitofp_shuf_extra_use2( -; CHECK-NEXT: [[NX:%.*]] = sitofp <4 x i32> [[X:%.*]] to <4 x float> ; CHECK-NEXT: [[NY:%.*]] = sitofp <4 x i32> [[Y:%.*]] to <4 x float> ; CHECK-NEXT: call void @use4(<4 x float> [[NY]]) -; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[NX]], <4 x float> [[NY]], <4 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> [[Y]], <4 x i32> +; CHECK-NEXT: [[R:%.*]] = sitofp <4 x i32> [[TMP1]] to <4 x float> ; CHECK-NEXT: ret <4 x float> [[R]] ; %nx = sitofp <4 x i32> %x to <4 x float> @@ -2131,6 +2139,8 @@ define <4 x float> @sitofp_shuf_extra_use2(<4 x i32> %x, <4 x i32> %y) { ret <4 x float> %r } +; negative test - both ops have extra uses + define <2 x float> @sitofp_shuf_extra_use3(<2 x i32> %x, <2 x i32> %y) { ; CHECK-LABEL: @sitofp_shuf_extra_use3( ; CHECK-NEXT: [[NX:%.*]] = sitofp <2 x i32> [[X:%.*]] to <2 x float> @@ -2148,6 +2158,8 @@ define <2 x float> @sitofp_shuf_extra_use3(<2 x i32> %x, <2 x i32> %y) { ret <2 x float> %r } +; negative test - mismatched casts + define <4 x i32> @fptoi_shuf(<4 x float> %x, <4 x float> %y) { ; CHECK-LABEL: @fptoi_shuf( ; CHECK-NEXT: [[NX:%.*]] = fptoui <4 x float> [[X:%.*]] to <4 x i32> @@ -2161,7 +2173,7 @@ define <4 x i32> @fptoi_shuf(<4 x float> %x, <4 x float> %y) { ret <4 x i32> %r } -; length-changing shuffle +; negative test - length-changing shuffle define <4 x float> @sitofp_shuf_widen(<2 x i32> %x, <2 x i32> %y) { ; CHECK-LABEL: @sitofp_shuf_widen( @@ -2176,7 +2188,7 @@ define <4 x float> @sitofp_shuf_widen(<2 x i32> %x, <2 x i32> %y) { ret <4 x float> %r } -; length-changing shuffle +; negative test - length-changing shuffle define <2 x float> @uitofp_shuf_narrow(<4 x i32> %x, <4 x i32> %y) { ; CHECK-LABEL: @uitofp_shuf_narrow(