From 05d9e6a2a3d297260495aa51afb7ca5c8bbe3b4b Mon Sep 17 00:00:00 2001 From: Stanislav Mekhanoshin Date: Fri, 19 Jul 2019 21:43:42 +0000 Subject: [PATCH] [AMDGPU] Autogenerate register sequences in tuples Differential Revision: https://reviews.llvm.org/D65007 llvm-svn: 366619 --- llvm/lib/Target/AMDGPU/SIRegisterInfo.td | 319 +++++-------------------------- 1 file changed, 47 insertions(+), 272 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td index 2fbc2cb..389e65a 100644 --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td @@ -39,16 +39,37 @@ class getSubRegs { // Generates list of sequential register tuple names. // E.g. RegSeq<3,2,2,"s">.ret -> [ "s[0:1]", "s[2:3]" ] -class RegSeq { +class RegSeqNames { int next = !add(start, stride); int end_reg = !add(!add(start, size), -1); list ret = !if(!le(end_reg, last_reg), !listconcat([prefix # "[" # start # ":" # end_reg # "]"], - RegSeq.ret), + RegSeqNames.ret), []); } +// Generates list of dags for register tupless. +class RegSeqDags { + dag trunc_rc = (trunc RC, + !if(!and(!eq(stride, 1), !eq(start, 0)), + !add(!add(last_reg, 2), !mul(size, -1)), + !add(last_reg, 1))); + list ret = + !if(!lt(start, size), + !listconcat([(add (decimate (shl trunc_rc, start), stride))], + RegSeqDags.ret), + []); +} + +class SIRegisterTuples Indices, RegisterClass RC, + int last_reg, int stride, int size, string prefix> : + RegisterTuples.ret, + RegSeqNames.ret>; + //===----------------------------------------------------------------------===// // Declarations that describe the SI registers //===----------------------------------------------------------------------===// @@ -214,102 +235,25 @@ def SGPR_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32, } // SGPR 64-bit registers -def SGPR_64Regs : RegisterTuples.ret, - [(add (decimate SGPR_32, 2)), - (add (decimate (shl SGPR_32, 1), 2))], - RegSeq<105, 2, 2, "s">.ret>; +def SGPR_64Regs : SIRegisterTuples.ret, SGPR_32, 105, 2, 2, "s">; // SGPR 96-bit registers. No operations use these, but for symmetry with 96-bit VGPRs. -def SGPR_96Regs : RegisterTuples.ret, - [(add (decimate SGPR_32, 3)), - (add (decimate (shl SGPR_32, 1), 3)), - (add (decimate (shl SGPR_32, 2), 3))], - RegSeq<105, 3, 3, "s">.ret>; +def SGPR_96Regs : SIRegisterTuples.ret, SGPR_32, 105, 3, 3, "s">; // SGPR 128-bit registers -def SGPR_128Regs : RegisterTuples.ret, - [(add (decimate SGPR_32, 4)), - (add (decimate (shl SGPR_32, 1), 4)), - (add (decimate (shl SGPR_32, 2), 4)), - (add (decimate (shl SGPR_32, 3), 4))], - RegSeq<105, 4, 4, "s">.ret>; +def SGPR_128Regs : SIRegisterTuples.ret, SGPR_32, 105, 4, 4, "s">; // SGPR 160-bit registers. No operations use these, but for symmetry with 160-bit VGPRs. -def SGPR_160Regs : RegisterTuples.ret, - [(add (decimate SGPR_32, 4)), - (add (decimate (shl SGPR_32, 1), 4)), - (add (decimate (shl SGPR_32, 2), 4)), - (add (decimate (shl SGPR_32, 3), 4)), - (add (decimate (shl SGPR_32, 4), 4))], - RegSeq<105, 4, 5, "s">.ret>; +def SGPR_160Regs : SIRegisterTuples.ret, SGPR_32, 105, 4, 5, "s">; // SGPR 256-bit registers -def SGPR_256Regs : RegisterTuples.ret, - [(add (decimate SGPR_32, 4)), - (add (decimate (shl SGPR_32, 1), 4)), - (add (decimate (shl SGPR_32, 2), 4)), - (add (decimate (shl SGPR_32, 3), 4)), - (add (decimate (shl SGPR_32, 4), 4)), - (add (decimate (shl SGPR_32, 5), 4)), - (add (decimate (shl SGPR_32, 6), 4)), - (add (decimate (shl SGPR_32, 7), 4))], - RegSeq<105, 4, 8, "s">.ret>; +def SGPR_256Regs : SIRegisterTuples.ret, SGPR_32, 105, 4, 8, "s">; // SGPR 512-bit registers -def SGPR_512Regs : RegisterTuples.ret, - [(add (decimate SGPR_32, 4)), - (add (decimate (shl SGPR_32, 1), 4)), - (add (decimate (shl SGPR_32, 2), 4)), - (add (decimate (shl SGPR_32, 3), 4)), - (add (decimate (shl SGPR_32, 4), 4)), - (add (decimate (shl SGPR_32, 5), 4)), - (add (decimate (shl SGPR_32, 6), 4)), - (add (decimate (shl SGPR_32, 7), 4)), - (add (decimate (shl SGPR_32, 8), 4)), - (add (decimate (shl SGPR_32, 9), 4)), - (add (decimate (shl SGPR_32, 10), 4)), - (add (decimate (shl SGPR_32, 11), 4)), - (add (decimate (shl SGPR_32, 12), 4)), - (add (decimate (shl SGPR_32, 13), 4)), - (add (decimate (shl SGPR_32, 14), 4)), - (add (decimate (shl SGPR_32, 15), 4))], - RegSeq<105, 4, 16, "s">.ret>; +def SGPR_512Regs : SIRegisterTuples.ret, SGPR_32, 105, 4, 16, "s">; // SGPR 1024-bit registers -def SGPR_1024Regs : RegisterTuples.ret, - [(add (decimate SGPR_32, 4)), - (add (decimate (shl SGPR_32, 1), 4)), - (add (decimate (shl SGPR_32, 2), 4)), - (add (decimate (shl SGPR_32, 3), 4)), - (add (decimate (shl SGPR_32, 4), 4)), - (add (decimate (shl SGPR_32, 5), 4)), - (add (decimate (shl SGPR_32, 6), 4)), - (add (decimate (shl SGPR_32, 7), 4)), - (add (decimate (shl SGPR_32, 8), 4)), - (add (decimate (shl SGPR_32, 9), 4)), - (add (decimate (shl SGPR_32, 10), 4)), - (add (decimate (shl SGPR_32, 11), 4)), - (add (decimate (shl SGPR_32, 12), 4)), - (add (decimate (shl SGPR_32, 13), 4)), - (add (decimate (shl SGPR_32, 14), 4)), - (add (decimate (shl SGPR_32, 15), 4)), - (add (decimate (shl SGPR_32, 16), 4)), - (add (decimate (shl SGPR_32, 17), 4)), - (add (decimate (shl SGPR_32, 18), 4)), - (add (decimate (shl SGPR_32, 19), 4)), - (add (decimate (shl SGPR_32, 20), 4)), - (add (decimate (shl SGPR_32, 21), 4)), - (add (decimate (shl SGPR_32, 22), 4)), - (add (decimate (shl SGPR_32, 23), 4)), - (add (decimate (shl SGPR_32, 24), 4)), - (add (decimate (shl SGPR_32, 25), 4)), - (add (decimate (shl SGPR_32, 26), 4)), - (add (decimate (shl SGPR_32, 27), 4)), - (add (decimate (shl SGPR_32, 28), 4)), - (add (decimate (shl SGPR_32, 29), 4)), - (add (decimate (shl SGPR_32, 30), 4)), - (add (decimate (shl SGPR_32, 31), 4))], - RegSeq<105, 4, 32, "s">.ret>; +def SGPR_1024Regs : SIRegisterTuples.ret, SGPR_32, 105, 4, 32, "s">; // Trap handler TMP 32-bit registers def TTMP_32 : RegisterClass<"AMDGPU", [i32, f32, v2i16, v2f16], 32, @@ -318,48 +262,14 @@ def TTMP_32 : RegisterClass<"AMDGPU", [i32, f32, v2i16, v2f16], 32, } // Trap handler TMP 64-bit registers -def TTMP_64Regs : RegisterTuples.ret, - [(add (decimate TTMP_32, 2)), - (add (decimate (shl TTMP_32, 1), 2))], - RegSeq<15, 2, 2, "ttmp">.ret>; +def TTMP_64Regs : SIRegisterTuples.ret, TTMP_32, 15, 2, 2, "ttmp">; // Trap handler TMP 128-bit registers -def TTMP_128Regs : RegisterTuples.ret, - [(add (decimate TTMP_32, 4)), - (add (decimate (shl TTMP_32, 1), 4)), - (add (decimate (shl TTMP_32, 2), 4)), - (add (decimate (shl TTMP_32, 3), 4))], - RegSeq<15, 4, 4, "ttmp">.ret>; - -def TTMP_256Regs : RegisterTuples.ret, - [(add (decimate TTMP_32, 4)), - (add (decimate (shl TTMP_32, 1), 4)), - (add (decimate (shl TTMP_32, 2), 4)), - (add (decimate (shl TTMP_32, 3), 4)), - (add (decimate (shl TTMP_32, 4), 4)), - (add (decimate (shl TTMP_32, 5), 4)), - (add (decimate (shl TTMP_32, 6), 4)), - (add (decimate (shl TTMP_32, 7), 4))], - RegSeq<15, 4, 8, "ttmp">.ret>; - -def TTMP_512Regs : RegisterTuples.ret, - [(add (decimate TTMP_32, 4)), - (add (decimate (shl TTMP_32, 1), 4)), - (add (decimate (shl TTMP_32, 2), 4)), - (add (decimate (shl TTMP_32, 3), 4)), - (add (decimate (shl TTMP_32, 4), 4)), - (add (decimate (shl TTMP_32, 5), 4)), - (add (decimate (shl TTMP_32, 6), 4)), - (add (decimate (shl TTMP_32, 7), 4)), - (add (decimate (shl TTMP_32, 8), 4)), - (add (decimate (shl TTMP_32, 9), 4)), - (add (decimate (shl TTMP_32, 10), 4)), - (add (decimate (shl TTMP_32, 11), 4)), - (add (decimate (shl TTMP_32, 12), 4)), - (add (decimate (shl TTMP_32, 13), 4)), - (add (decimate (shl TTMP_32, 14), 4)), - (add (decimate (shl TTMP_32, 15), 4))], - RegSeq<15, 4, 16, "ttmp">.ret>; +def TTMP_128Regs : SIRegisterTuples.ret, TTMP_32, 15, 4, 4, "ttmp">; + +def TTMP_256Regs : SIRegisterTuples.ret, TTMP_32, 15, 4, 8, "ttmp">; + +def TTMP_512Regs : SIRegisterTuples.ret, TTMP_32, 15, 4, 16, "ttmp">; class TmpRegTuplesBase subRegs, @@ -449,102 +359,25 @@ def VGPR_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32, } // VGPR 64-bit registers -def VGPR_64 : RegisterTuples.ret, - [(add (trunc VGPR_32, 255)), - (add (shl VGPR_32, 1))], - RegSeq<255, 1, 2, "v">.ret>; +def VGPR_64 : SIRegisterTuples.ret, VGPR_32, 255, 1, 2, "v">; // VGPR 96-bit registers -def VGPR_96 : RegisterTuples.ret, - [(add (trunc VGPR_32, 254)), - (add (shl VGPR_32, 1)), - (add (shl VGPR_32, 2))], - RegSeq<255, 1, 3, "v">.ret>; +def VGPR_96 : SIRegisterTuples.ret, VGPR_32, 255, 1, 3, "v">; // VGPR 128-bit registers -def VGPR_128 : RegisterTuples.ret, - [(add (trunc VGPR_32, 253)), - (add (shl VGPR_32, 1)), - (add (shl VGPR_32, 2)), - (add (shl VGPR_32, 3))], - RegSeq<255, 1, 4, "v">.ret>; +def VGPR_128 : SIRegisterTuples.ret, VGPR_32, 255, 1, 4, "v">; // VGPR 160-bit registers -def VGPR_160 : RegisterTuples.ret, - [(add (trunc VGPR_32, 252)), - (add (shl VGPR_32, 1)), - (add (shl VGPR_32, 2)), - (add (shl VGPR_32, 3)), - (add (shl VGPR_32, 4))], - RegSeq<255, 1, 5, "v">.ret>; +def VGPR_160 : SIRegisterTuples.ret, VGPR_32, 255, 1, 5, "v">; // VGPR 256-bit registers -def VGPR_256 : RegisterTuples.ret, - [(add (trunc VGPR_32, 249)), - (add (shl VGPR_32, 1)), - (add (shl VGPR_32, 2)), - (add (shl VGPR_32, 3)), - (add (shl VGPR_32, 4)), - (add (shl VGPR_32, 5)), - (add (shl VGPR_32, 6)), - (add (shl VGPR_32, 7))], - RegSeq<255, 1, 8, "v">.ret>; +def VGPR_256 : SIRegisterTuples.ret, VGPR_32, 255, 1, 8, "v">; // VGPR 512-bit registers -def VGPR_512 : RegisterTuples.ret, - [(add (trunc VGPR_32, 241)), - (add (shl VGPR_32, 1)), - (add (shl VGPR_32, 2)), - (add (shl VGPR_32, 3)), - (add (shl VGPR_32, 4)), - (add (shl VGPR_32, 5)), - (add (shl VGPR_32, 6)), - (add (shl VGPR_32, 7)), - (add (shl VGPR_32, 8)), - (add (shl VGPR_32, 9)), - (add (shl VGPR_32, 10)), - (add (shl VGPR_32, 11)), - (add (shl VGPR_32, 12)), - (add (shl VGPR_32, 13)), - (add (shl VGPR_32, 14)), - (add (shl VGPR_32, 15))], - RegSeq<255, 1, 16, "v">.ret>; +def VGPR_512 : SIRegisterTuples.ret, VGPR_32, 255, 1, 16, "v">; // VGPR 1024-bit registers -def VGPR_1024 : RegisterTuples.ret, - [(add (trunc VGPR_32, 225)), - (add (shl VGPR_32, 1)), - (add (shl VGPR_32, 2)), - (add (shl VGPR_32, 3)), - (add (shl VGPR_32, 4)), - (add (shl VGPR_32, 5)), - (add (shl VGPR_32, 6)), - (add (shl VGPR_32, 7)), - (add (shl VGPR_32, 8)), - (add (shl VGPR_32, 9)), - (add (shl VGPR_32, 10)), - (add (shl VGPR_32, 11)), - (add (shl VGPR_32, 12)), - (add (shl VGPR_32, 13)), - (add (shl VGPR_32, 14)), - (add (shl VGPR_32, 15)), - (add (shl VGPR_32, 16)), - (add (shl VGPR_32, 17)), - (add (shl VGPR_32, 18)), - (add (shl VGPR_32, 19)), - (add (shl VGPR_32, 20)), - (add (shl VGPR_32, 21)), - (add (shl VGPR_32, 22)), - (add (shl VGPR_32, 23)), - (add (shl VGPR_32, 24)), - (add (shl VGPR_32, 25)), - (add (shl VGPR_32, 26)), - (add (shl VGPR_32, 27)), - (add (shl VGPR_32, 28)), - (add (shl VGPR_32, 29)), - (add (shl VGPR_32, 30)), - (add (shl VGPR_32, 31))], - RegSeq<255, 1, 32, "v">.ret>; +def VGPR_1024 : SIRegisterTuples.ret, VGPR_32, 255, 1, 32, "v">; // AccVGPR 32-bit registers def AGPR_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32, @@ -554,74 +387,16 @@ def AGPR_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32, } // AGPR 64-bit registers -def AGPR_64 : RegisterTuples.ret, - [(add (trunc AGPR_32, 255)), - (add (shl AGPR_32, 1))], - RegSeq<255, 1, 2, "a">.ret>; +def AGPR_64 : SIRegisterTuples.ret, AGPR_32, 255, 1, 2, "a">; // AGPR 128-bit registers -def AGPR_128 : RegisterTuples.ret, - [(add (trunc AGPR_32, 253)), - (add (shl AGPR_32, 1)), - (add (shl AGPR_32, 2)), - (add (shl AGPR_32, 3))], - RegSeq<255, 1, 4, "a">.ret>; +def AGPR_128 : SIRegisterTuples.ret, AGPR_32, 255, 1, 4, "a">; // AGPR 512-bit registers -def AGPR_512 : RegisterTuples.ret, - [(add (trunc AGPR_32, 241)), - (add (shl AGPR_32, 1)), - (add (shl AGPR_32, 2)), - (add (shl AGPR_32, 3)), - (add (shl AGPR_32, 4)), - (add (shl AGPR_32, 5)), - (add (shl AGPR_32, 6)), - (add (shl AGPR_32, 7)), - (add (shl AGPR_32, 8)), - (add (shl AGPR_32, 9)), - (add (shl AGPR_32, 10)), - (add (shl AGPR_32, 11)), - (add (shl AGPR_32, 12)), - (add (shl AGPR_32, 13)), - (add (shl AGPR_32, 14)), - (add (shl AGPR_32, 15))], - RegSeq<255, 1, 16, "a">.ret>; +def AGPR_512 : SIRegisterTuples.ret, AGPR_32, 255, 1, 16, "a">; // AGPR 1024-bit registers -def AGPR_1024 : RegisterTuples.ret, - [(add (trunc AGPR_32, 225)), - (add (shl AGPR_32, 1)), - (add (shl AGPR_32, 2)), - (add (shl AGPR_32, 3)), - (add (shl AGPR_32, 4)), - (add (shl AGPR_32, 5)), - (add (shl AGPR_32, 6)), - (add (shl AGPR_32, 7)), - (add (shl AGPR_32, 8)), - (add (shl AGPR_32, 9)), - (add (shl AGPR_32, 10)), - (add (shl AGPR_32, 11)), - (add (shl AGPR_32, 12)), - (add (shl AGPR_32, 13)), - (add (shl AGPR_32, 14)), - (add (shl AGPR_32, 15)), - (add (shl AGPR_32, 16)), - (add (shl AGPR_32, 17)), - (add (shl AGPR_32, 18)), - (add (shl AGPR_32, 19)), - (add (shl AGPR_32, 20)), - (add (shl AGPR_32, 21)), - (add (shl AGPR_32, 22)), - (add (shl AGPR_32, 23)), - (add (shl AGPR_32, 24)), - (add (shl AGPR_32, 25)), - (add (shl AGPR_32, 26)), - (add (shl AGPR_32, 27)), - (add (shl AGPR_32, 28)), - (add (shl AGPR_32, 29)), - (add (shl AGPR_32, 30)), - (add (shl AGPR_32, 31))], - RegSeq<255, 1, 32, "a">.ret>; +def AGPR_1024 : SIRegisterTuples.ret, AGPR_32, 255, 1, 32, "a">; //===----------------------------------------------------------------------===// // Register classes used as source and destination -- 2.7.4