R600: Compute masked bits for min and max

author Matt Arsenault <Matthew.Arsenault@amd.com>

Mon, 31 Mar 2014 19:35:33 +0000 (19:35 +0000)

committer Matt Arsenault <Matthew.Arsenault@amd.com>

Mon, 31 Mar 2014 19:35:33 +0000 (19:35 +0000)
author Matt Arsenault <Matthew.Arsenault@amd.com>
Mon, 31 Mar 2014 19:35:33 +0000 (19:35 +0000)
committer Matt Arsenault <Matthew.Arsenault@amd.com>
Mon, 31 Mar 2014 19:35:33 +0000 (19:35 +0000)
diff --git a/llvm/lib/Target/R600/AMDGPUISelLowering.cpp b/llvm/lib/Target/R600/AMDGPUISelLowering.cpp

index 54ef2c403f209052704aaaf35c7c7894fcf09a47..183725cc21713f53f00a7030daf6384fa37dd671 100644 (file)
--- a/llvm/lib/Target/R600/AMDGPUISelLowering.cpp
+++ b/llvm/lib/Target/R600/AMDGPUISelLowering.cpp
@@ -1219,11 +1219,55 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
    }
  }
  
+static void computeMaskedBitsForMinMax(const SDValue Op0,
+                                       const SDValue Op1,
+                                       APInt &KnownZero,
+                                       APInt &KnownOne,
+                                       const SelectionDAG &DAG,
+                                       unsigned Depth) {
+  APInt Op0Zero, Op0One;
+  APInt Op1Zero, Op1One;
+  DAG.ComputeMaskedBits(Op0, Op0Zero, Op0One, Depth);
+  DAG.ComputeMaskedBits(Op1, Op1Zero, Op1One, Depth);
+
+  KnownZero = Op0Zero & Op1Zero;
+  KnownOne = Op0One & Op1One;
+}
+
  void AMDGPUTargetLowering::computeMaskedBitsForTargetNode(
    const SDValue Op,
    APInt &KnownZero,
    APInt &KnownOne,
    const SelectionDAG &DAG,
    unsigned Depth) const {
+
    KnownZero = KnownOne = APInt(KnownOne.getBitWidth(), 0); // Don't know anything.
+  unsigned Opc = Op.getOpcode();
+  switch (Opc) {
+  case ISD::INTRINSIC_WO_CHAIN: {
+    // FIXME: The intrinsic should just use the node.
+    switch (cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue()) {
+    case AMDGPUIntrinsic::AMDGPU_imax:
+    case AMDGPUIntrinsic::AMDGPU_umax:
+    case AMDGPUIntrinsic::AMDGPU_imin:
+    case AMDGPUIntrinsic::AMDGPU_umin:
+      computeMaskedBitsForMinMax(Op.getOperand(1), Op.getOperand(2),
+                                 KnownZero, KnownOne, DAG, Depth);
+      break;
+    default:
+      break;
+    }
+
+    break;
+  }
+  case AMDGPUISD::SMAX:
+  case AMDGPUISD::UMAX:
+  case AMDGPUISD::SMIN:
+  case AMDGPUISD::UMIN:
+    computeMaskedBitsForMinMax(Op.getOperand(0), Op.getOperand(1),
+                               KnownZero, KnownOne, DAG, Depth);
+    break;
+  default:
+    break;
+  }
  }
diff --git a/llvm/test/CodeGen/R600/llvm.AMDGPU.umax.ll b/llvm/test/CodeGen/R600/llvm.AMDGPU.umax.ll

index c3e1cfe9019e697f71cae79784bffc5f40373182..1b8da2e155347e121ca5ef2ed747c562078ec840 100644 (file)
--- a/llvm/test/CodeGen/R600/llvm.AMDGPU.umax.ll
+++ b/llvm/test/CodeGen/R600/llvm.AMDGPU.umax.ll
@@ -21,6 +21,21 @@ entry:
    ret void
  }
  
+; SI-LABEL: @trunc_zext_umax
+; SI: BUFFER_LOAD_UBYTE [[VREG:v[0-9]+]],
+; SI: V_MAX_U32_e32 [[RESULT:v[0-9]+]], 0, [[VREG]]
+; SI-NOT: AND
+; SI: BUFFER_STORE_SHORT [[RESULT]],
+define void @trunc_zext_umax(i16 addrspace(1)* nocapture %out, i8 addrspace(1)* nocapture %src) nounwind {
+  %tmp5 = load i8 addrspace(1)* %src, align 1
+  %tmp2 = zext i8 %tmp5 to i32
+  %tmp3 = tail call i32 @llvm.AMDGPU.umax(i32 %tmp2, i32 0) nounwind readnone
+  %tmp4 = trunc i32 %tmp3 to i8
+  %tmp6 = zext i8 %tmp4 to i16
+  store i16 %tmp6, i16 addrspace(1)* %out, align 2
+  ret void
+}
+
  ; Function Attrs: readnone
  declare i32 @llvm.AMDGPU.umax(i32, i32) #1
  
diff --git a/llvm/test/CodeGen/R600/llvm.AMDGPU.umin.ll b/llvm/test/CodeGen/R600/llvm.AMDGPU.umin.ll

index 460a7b2d4254912ea6611f466a9f9c92e148af3e..08397f8356c9408462e2766963f99711bfc79465 100644 (file)
--- a/llvm/test/CodeGen/R600/llvm.AMDGPU.umin.ll
+++ b/llvm/test/CodeGen/R600/llvm.AMDGPU.umin.ll
@@ -21,6 +21,21 @@ entry:
    ret void
  }
  
+; SI-LABEL: @trunc_zext_umin
+; SI: BUFFER_LOAD_UBYTE [[VREG:v[0-9]+]],
+; SI: V_MIN_U32_e32 [[RESULT:v[0-9]+]], 0, [[VREG]]
+; SI-NOT: AND
+; SI: BUFFER_STORE_SHORT [[RESULT]],
+define void @trunc_zext_umin(i16 addrspace(1)* nocapture %out, i8 addrspace(1)* nocapture %src) nounwind {
+  %tmp5 = load i8 addrspace(1)* %src, align 1
+  %tmp2 = zext i8 %tmp5 to i32
+  %tmp3 = tail call i32 @llvm.AMDGPU.umin(i32 %tmp2, i32 0) nounwind readnone
+  %tmp4 = trunc i32 %tmp3 to i8
+  %tmp6 = zext i8 %tmp4 to i16
+  store i16 %tmp6, i16 addrspace(1)* %out, align 2
+  ret void
+}
+
  ; Function Attrs: readnone
  declare i32 @llvm.AMDGPU.umin(i32, i32) #1
author	Matt Arsenault <Matthew.Arsenault@amd.com>
	Mon, 31 Mar 2014 19:35:33 +0000 (19:35 +0000)
committer	Matt Arsenault <Matthew.Arsenault@amd.com>
	Mon, 31 Mar 2014 19:35:33 +0000 (19:35 +0000)
llvm/lib/Target/R600/AMDGPUISelLowering.cpp		patch \| blob \| history
llvm/test/CodeGen/R600/llvm.AMDGPU.umax.ll		patch \| blob \| history
llvm/test/CodeGen/R600/llvm.AMDGPU.umin.ll		patch \| blob \| history