From f15a05623e5263f9232f8796587ee1cedcf7b15c Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Thu, 22 May 2014 18:00:20 +0000 Subject: [PATCH] R600: Expand mad24 for GPUs without it llvm-svn: 209457 --- llvm/lib/Target/R600/AMDGPUInstructions.td | 10 ++++++++++ llvm/lib/Target/R600/CaymanInstructions.td | 1 + llvm/lib/Target/R600/EvergreenInstructions.td | 8 +++++++- llvm/lib/Target/R600/R600Instructions.td | 6 ++++++ llvm/test/CodeGen/R600/llvm.AMDGPU.imad24.ll | 7 +++++++ llvm/test/CodeGen/R600/llvm.AMDGPU.umad24.ll | 7 +++++++ 6 files changed, 38 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Target/R600/AMDGPUInstructions.td b/llvm/lib/Target/R600/AMDGPUInstructions.td index 127b74a..8a9ab54 100644 --- a/llvm/lib/Target/R600/AMDGPUInstructions.td +++ b/llvm/lib/Target/R600/AMDGPUInstructions.td @@ -433,6 +433,16 @@ class UMad24Pat : Pat < (Inst $src0, $src1, $src2) >; +class IMad24ExpandPat : Pat < + (AMDGPUmad_i24 i32:$src0, i32:$src1, i32:$src2), + (AddInst (MulInst $src0, $src1), $src2) +>; + +class UMad24ExpandPat : Pat < + (AMDGPUmad_u24 i32:$src0, i32:$src1, i32:$src2), + (AddInst (MulInst $src0, $src1), $src2) +>; + include "R600Instructions.td" include "R700Instructions.td" diff --git a/llvm/lib/Target/R600/CaymanInstructions.td b/llvm/lib/Target/R600/CaymanInstructions.td index 6dded24c..58424a6 100644 --- a/llvm/lib/Target/R600/CaymanInstructions.td +++ b/llvm/lib/Target/R600/CaymanInstructions.td @@ -49,6 +49,7 @@ def COS_cm : COS_Common<0x8E>; def : POW_Common ; defm DIV_cm : DIV_Common; +def : UMad24ExpandPat; // RECIP_UINT emulation for Cayman // The multiplication scales from [0,1] to the unsigned integer range diff --git a/llvm/lib/Target/R600/EvergreenInstructions.td b/llvm/lib/Target/R600/EvergreenInstructions.td index d9931c8..7741667 100644 --- a/llvm/lib/Target/R600/EvergreenInstructions.td +++ b/llvm/lib/Target/R600/EvergreenInstructions.td @@ -75,6 +75,9 @@ def COS_eg : COS_Common<0x8E>; def : POW_Common ; def : Pat<(fsqrt f32:$src), (MUL $src, (RECIPSQRT_CLAMPED_eg $src))>; +def : IMad24ExpandPat; +def : UMad24ExpandPat; + //===----------------------------------------------------------------------===// // Memory read/write instructions //===----------------------------------------------------------------------===// @@ -301,8 +304,11 @@ def BFM_INT_eg : R600_2OP <0xA0, "BFM_INT", >; def MULADD_UINT24_eg : R600_3OP <0x10, "MULADD_UINT24", - [(set i32:$dst, (add (AMDGPUmul_u24 i32:$src0, i32:$src1), i32:$src2))], VecALU + [(set i32:$dst, (AMDGPUmad_u24 i32:$src0, i32:$src1, i32:$src2))], VecALU >; + +def : UMad24Pat; + def BIT_ALIGN_INT_eg : R600_3OP <0xC, "BIT_ALIGN_INT", [], VecALU>; def : ROTRPattern ; def MULADD_eg : MULADD_Common<0x14>; diff --git a/llvm/lib/Target/R600/R600Instructions.td b/llvm/lib/Target/R600/R600Instructions.td index d2075c0..0c804ff 100644 --- a/llvm/lib/Target/R600/R600Instructions.td +++ b/llvm/lib/Target/R600/R600Instructions.td @@ -1625,6 +1625,12 @@ def : DwordAddrPat ; } // End isR600toCayman Predicate +let Predicates = [isR600] in { +// Intrinsic patterns +def : IMad24ExpandPat; +def : UMad24ExpandPat; +} // End isR600 + def getLDSNoRetOp : InstrMapping { let FilterClass = "R600_LDS_1A1D"; let RowFields = ["BaseOp"]; diff --git a/llvm/test/CodeGen/R600/llvm.AMDGPU.imad24.ll b/llvm/test/CodeGen/R600/llvm.AMDGPU.imad24.ll index c7a3660..95795ea 100644 --- a/llvm/test/CodeGen/R600/llvm.AMDGPU.imad24.ll +++ b/llvm/test/CodeGen/R600/llvm.AMDGPU.imad24.ll @@ -1,11 +1,18 @@ ; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s ; RUN: llc -march=r600 -mcpu=cayman -verify-machineinstrs < %s | FileCheck -check-prefix=CM -check-prefix=FUNC %s +; RUN: llc -march=r600 -mcpu=redwood -verify-machineinstrs < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s +; XUN: llc -march=r600 -mcpu=r600 -verify-machineinstrs < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s +; XUN: llc -march=r600 -mcpu=r770 -verify-machineinstrs < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s + +; FIXME: Store of i32 seems to be broken pre-EG somehow? declare i32 @llvm.AMDGPU.imad24(i32, i32, i32) nounwind readnone ; FUNC-LABEL: @test_imad24 ; SI: V_MAD_I32_I24 ; CM: MULADD_INT24 +; R600: MULLO_INT +; R600: ADD_INT define void @test_imad24(i32 addrspace(1)* %out, i32 %src0, i32 %src1, i32 %src2) nounwind { %mad = call i32 @llvm.AMDGPU.imad24(i32 %src0, i32 %src1, i32 %src2) nounwind readnone store i32 %mad, i32 addrspace(1)* %out, align 4 diff --git a/llvm/test/CodeGen/R600/llvm.AMDGPU.umad24.ll b/llvm/test/CodeGen/R600/llvm.AMDGPU.umad24.ll index 08daac5..afdfb18 100644 --- a/llvm/test/CodeGen/R600/llvm.AMDGPU.umad24.ll +++ b/llvm/test/CodeGen/R600/llvm.AMDGPU.umad24.ll @@ -1,9 +1,16 @@ ; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=r600 -mcpu=cayman -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s +; RUN: llc -march=r600 -mcpu=redwood -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s +; XUN: llc -march=r600 -mcpu=r600 -verify-machineinstrs < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s +; XUN: llc -march=r600 -mcpu=rv770 -verify-machineinstrs < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s declare i32 @llvm.AMDGPU.umad24(i32, i32, i32) nounwind readnone ; FUNC-LABEL: @test_umad24 ; SI: V_MAD_U32_U24 +; EG: MULADD_UINT24 +; R600: MULLO_UINT +; R600: ADD_INT define void @test_umad24(i32 addrspace(1)* %out, i32 %src0, i32 %src1, i32 %src2) nounwind { %mad = call i32 @llvm.AMDGPU.umad24(i32 %src0, i32 %src1, i32 %src2) nounwind readnone store i32 %mad, i32 addrspace(1)* %out, align 4 -- 2.7.4