[InstCombine] relax masking requirement for truncated funnel/rotate match

author Sanjay Patel <spatel@rotateright.com>

Wed, 28 Apr 2021 20:13:32 +0000 (16:13 -0400)

committer Sanjay Patel <spatel@rotateright.com>

Wed, 28 Apr 2021 20:49:50 +0000 (16:49 -0400)
author Sanjay Patel <spatel@rotateright.com>
Wed, 28 Apr 2021 20:13:32 +0000 (16:13 -0400)
committer Sanjay Patel <spatel@rotateright.com>
Wed, 28 Apr 2021 20:49:50 +0000 (16:49 -0400)
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp

index 21ca15c..e6b6936 100644 (file)
--- a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
@@ -589,16 +589,16 @@ Instruction *InstCombinerImpl::narrowFunnelShift(TruncInst &Trunc) {
    if (!ShAmt)
      return nullptr;
  
-  // The shifted value must have high zeros in the wide type. Typically, this
-  // will be a zext, but it could also be the result of an 'and' or 'shift'.
+  // The right-shifted value must have high zeros in the wide type (for example
+  // from 'zext', 'and' or 'shift'). High bits of the left-shifted value are
+  // truncated, so those do not matter.
    unsigned WideWidth = Trunc.getSrcTy()->getScalarSizeInBits();
    APInt HiBitMask = APInt::getHighBitsSet(WideWidth, WideWidth - NarrowWidth);
-  if (!MaskedValueIsZero(ShVal0, HiBitMask, 0, &Trunc) ||
-      !MaskedValueIsZero(ShVal1, HiBitMask, 0, &Trunc))
+  if (!MaskedValueIsZero(ShVal1, HiBitMask, 0, &Trunc))
      return nullptr;
  
    // We have an unnecessarily wide rotate!
-  // trunc (or (lshr ShVal0, ShAmt), (shl ShVal1, BitWidth - ShAmt))
+  // trunc (or (shl ShVal0, ShAmt), (lshr ShVal1, BitWidth - ShAmt))
    // Narrow the inputs and convert to funnel shift intrinsic:
    // llvm.fshl.i8(trunc(ShVal), trunc(ShVal), trunc(ShAmt))
    Value *NarrowShAmt = Builder.CreateTrunc(ShAmt, DestTy);
diff --git a/llvm/test/Transforms/InstCombine/funnel.ll b/llvm/test/Transforms/InstCombine/funnel.ll

index 4d2d6ef..42e289f 100644 (file)
--- a/llvm/test/Transforms/InstCombine/funnel.ll
+++ b/llvm/test/Transforms/InstCombine/funnel.ll
@@ -281,18 +281,15 @@ define i8 @fshr_commute_8bit(i32 %x, i32 %y, i32 %shift) {
    ret i8 %conv2
  }
  
-; TODO:
  ; The left-shifted value does not need to be masked at all.
  
  define i8 @fshr_commute_8bit_unmasked_shl(i32 %x, i32 %y, i32 %shift) {
  ; CHECK-LABEL: @fshr_commute_8bit_unmasked_shl(
-; CHECK-NEXT:    [[AND:%.*]] = and i32 [[SHIFT:%.*]], 3
-; CHECK-NEXT:    [[CONVX:%.*]] = and i32 [[X:%.*]], 255
-; CHECK-NEXT:    [[SHR:%.*]] = lshr i32 [[CONVX]], [[AND]]
-; CHECK-NEXT:    [[SUB:%.*]] = sub nuw nsw i32 8, [[AND]]
-; CHECK-NEXT:    [[SHL:%.*]] = shl i32 [[Y:%.*]], [[SUB]]
-; CHECK-NEXT:    [[OR:%.*]] = or i32 [[SHR]], [[SHL]]
-; CHECK-NEXT:    [[CONV2:%.*]] = trunc i32 [[OR]] to i8
+; CHECK-NEXT:    [[TMP1:%.*]] = trunc i32 [[SHIFT:%.*]] to i8
+; CHECK-NEXT:    [[TMP2:%.*]] = and i8 [[TMP1]], 3
+; CHECK-NEXT:    [[TMP3:%.*]] = trunc i32 [[Y:%.*]] to i8
+; CHECK-NEXT:    [[TMP4:%.*]] = trunc i32 [[X:%.*]] to i8
+; CHECK-NEXT:    [[CONV2:%.*]] = call i8 @llvm.fshr.i8(i8 [[TMP3]], i8 [[TMP4]], i8 [[TMP2]])
  ; CHECK-NEXT:    ret i8 [[CONV2]]
  ;
    %and = and i32 %shift, 3
diff --git a/llvm/test/Transforms/InstCombine/rotate.ll b/llvm/test/Transforms/InstCombine/rotate.ll

index 4c77d44..7a1066f 100644 (file)
--- a/llvm/test/Transforms/InstCombine/rotate.ll
+++ b/llvm/test/Transforms/InstCombine/rotate.ll
@@ -456,18 +456,15 @@ define i8 @rotate_right_commute_8bit_unmasked_shl(i32 %v, i32 %shift) {
    ret i8 %conv2
  }
  
-; TODO:
  ; The left-shifted value does not need to be masked at all.
  
  define i8 @rotate_right_commute_8bit(i32 %v, i32 %shift) {
  ; CHECK-LABEL: @rotate_right_commute_8bit(
-; CHECK-NEXT:    [[AND:%.*]] = and i32 [[SHIFT:%.*]], 3
-; CHECK-NEXT:    [[CONV:%.*]] = and i32 [[V:%.*]], 255
-; CHECK-NEXT:    [[SHR:%.*]] = lshr i32 [[CONV]], [[AND]]
-; CHECK-NEXT:    [[SUB:%.*]] = sub nuw nsw i32 8, [[AND]]
-; CHECK-NEXT:    [[SHL:%.*]] = shl i32 [[V]], [[SUB]]
-; CHECK-NEXT:    [[OR:%.*]] = or i32 [[SHR]], [[SHL]]
-; CHECK-NEXT:    [[CONV2:%.*]] = trunc i32 [[OR]] to i8
+; CHECK-NEXT:    [[TMP1:%.*]] = trunc i32 [[SHIFT:%.*]] to i8
+; CHECK-NEXT:    [[TMP2:%.*]] = and i8 [[TMP1]], 3
+; CHECK-NEXT:    [[TMP3:%.*]] = trunc i32 [[V:%.*]] to i8
+; CHECK-NEXT:    [[TMP4:%.*]] = trunc i32 [[V]] to i8
+; CHECK-NEXT:    [[CONV2:%.*]] = call i8 @llvm.fshr.i8(i8 [[TMP3]], i8 [[TMP4]], i8 [[TMP2]])
  ; CHECK-NEXT:    ret i8 [[CONV2]]
  ;
    %and = and i32 %shift, 3
author	Sanjay Patel <spatel@rotateright.com>
	Wed, 28 Apr 2021 20:13:32 +0000 (16:13 -0400)
committer	Sanjay Patel <spatel@rotateright.com>
	Wed, 28 Apr 2021 20:49:50 +0000 (16:49 -0400)
llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp		patch \| blob \| history
llvm/test/Transforms/InstCombine/funnel.ll		patch \| blob \| history
llvm/test/Transforms/InstCombine/rotate.ll		patch \| blob \| history