From c69849ef803bee8296c961df355499b42c76e973 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Fri, 16 Oct 2020 08:14:11 -0400 Subject: [PATCH] amd: update addrlib All Mesa-specific includes and definitions have been moved to addrcommon.h. Instead of suppressing warnings in the code, they are suppressed in meson.build. Acked-by: Samuel Pitoiset Acked-by: Pierre-Eric Pelloux-Prayer Part-of: --- src/amd/addrlib/inc/addrinterface.h | 108 ++- src/amd/addrlib/inc/addrtypes.h | 45 +- src/amd/addrlib/meson.build | 3 + src/amd/addrlib/src/addrinterface.cpp | 39 +- src/amd/addrlib/src/amdgpu_asic_addr.h | 5 +- src/amd/addrlib/src/chip/gfx10/gfx10_gb_reg.h | 10 +- src/amd/addrlib/src/chip/gfx9/gfx9_gb_reg.h | 10 +- src/amd/addrlib/src/chip/r800/si_gb_reg.h | 24 +- src/amd/addrlib/src/core/addrcommon.h | 92 ++- src/amd/addrlib/src/core/addrelemlib.cpp | 3 +- src/amd/addrlib/src/core/addrlib.cpp | 9 +- src/amd/addrlib/src/core/addrlib.h | 2 - src/amd/addrlib/src/core/addrlib1.cpp | 17 +- src/amd/addrlib/src/core/addrlib1.h | 1 + src/amd/addrlib/src/core/addrlib2.cpp | 14 +- src/amd/addrlib/src/core/addrlib2.h | 102 ++- src/amd/addrlib/src/gfx10/gfx10SwizzlePattern.h | 226 +++---- src/amd/addrlib/src/gfx10/gfx10addrlib.cpp | 850 +++++++++++++----------- src/amd/addrlib/src/gfx10/gfx10addrlib.h | 74 +-- src/amd/addrlib/src/gfx9/gfx9addrlib.cpp | 118 ++-- src/amd/addrlib/src/gfx9/gfx9addrlib.h | 17 +- src/amd/addrlib/src/r800/ciaddrlib.cpp | 28 +- src/amd/addrlib/src/r800/ciaddrlib.h | 5 + src/amd/addrlib/src/r800/egbaddrlib.cpp | 20 +- src/amd/addrlib/src/r800/siaddrlib.cpp | 3 +- 25 files changed, 1057 insertions(+), 768 deletions(-) diff --git a/src/amd/addrlib/inc/addrinterface.h b/src/amd/addrlib/inc/addrinterface.h index 98581e0..c05e680 100644 --- a/src/amd/addrlib/inc/addrinterface.h +++ b/src/amd/addrlib/inc/addrinterface.h @@ -177,6 +177,7 @@ typedef struct _ADDR_EQUATION ///< stacked vertically prior to swizzling } ADDR_EQUATION; + /** **************************************************************************************************** * @brief Alloc system memory flags. @@ -308,8 +309,9 @@ typedef union _ADDR_CREATE_FLAGS UINT_32 useHtileSliceAlign : 1; ///< Do htile single slice alignment UINT_32 allowLargeThickTile : 1; ///< Allow 64*thickness*bytesPerPixel > rowSize UINT_32 forceDccAndTcCompat : 1; ///< Force enable DCC and TC compatibility - UINT_32 nonPower2MemConfig : 1; ///< Physical video memory size is not power of 2 - UINT_32 reserved : 23; ///< Reserved bits for future use + UINT_32 nonPower2MemConfig : 1; ///< Video memory bit width is not power of 2 + UINT_32 enableAltTiling : 1; ///< Enable alt tile mode + UINT_32 reserved : 22; ///< Reserved bits for future use }; UINT_32 value; @@ -407,6 +409,8 @@ ADDR_E_RETURNCODE ADDR_API AddrCreate( const ADDR_CREATE_INPUT* pAddrCreateIn, ADDR_CREATE_OUTPUT* pAddrCreateOut); + + /** **************************************************************************************************** * AddrDestroy @@ -421,6 +425,8 @@ ADDR_E_RETURNCODE ADDR_API AddrCreate( ADDR_E_RETURNCODE ADDR_API AddrDestroy( ADDR_HANDLE hLib); + + //////////////////////////////////////////////////////////////////////////////////////////////////// // Surface functions //////////////////////////////////////////////////////////////////////////////////////////////////// @@ -652,6 +658,8 @@ ADDR_E_RETURNCODE ADDR_API AddrComputeSurfaceInfo( const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn, ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut); + + /** **************************************************************************************************** * ADDR_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT @@ -740,6 +748,8 @@ ADDR_E_RETURNCODE ADDR_API AddrComputeSurfaceAddrFromCoord( const ADDR_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn, ADDR_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT* pOut); + + /** **************************************************************************************************** * ADDR_COMPUTE_SURFACE_COORDFROMADDR_INPUT @@ -921,6 +931,8 @@ ADDR_E_RETURNCODE ADDR_API AddrComputeHtileInfo( const ADDR_COMPUTE_HTILE_INFO_INPUT* pIn, ADDR_COMPUTE_HTILE_INFO_OUTPUT* pOut); + + /** **************************************************************************************************** * ADDR_COMPUTE_HTILE_ADDRFROMCOORD_INPUT @@ -983,6 +995,8 @@ ADDR_E_RETURNCODE ADDR_API AddrComputeHtileAddrFromCoord( const ADDR_COMPUTE_HTILE_ADDRFROMCOORD_INPUT* pIn, ADDR_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT* pOut); + + /** **************************************************************************************************** * ADDR_COMPUTE_HTILE_COORDFROMADDR_INPUT @@ -1043,6 +1057,8 @@ ADDR_E_RETURNCODE ADDR_API AddrComputeHtileCoordFromAddr( const ADDR_COMPUTE_HTILE_COORDFROMADDR_INPUT* pIn, ADDR_COMPUTE_HTILE_COORDFROMADDR_OUTPUT* pOut); + + //////////////////////////////////////////////////////////////////////////////////////////////////// // C-mask functions //////////////////////////////////////////////////////////////////////////////////////////////////// @@ -1130,6 +1146,8 @@ ADDR_E_RETURNCODE ADDR_API AddrComputeCmaskInfo( const ADDR_COMPUTE_CMASK_INFO_INPUT* pIn, ADDR_COMPUTE_CMASK_INFO_OUTPUT* pOut); + + /** **************************************************************************************************** * ADDR_COMPUTE_CMASK_ADDRFROMCOORD_INPUT @@ -1190,6 +1208,8 @@ ADDR_E_RETURNCODE ADDR_API AddrComputeCmaskAddrFromCoord( const ADDR_COMPUTE_CMASK_ADDRFROMCOORD_INPUT* pIn, ADDR_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT* pOut); + + /** **************************************************************************************************** * ADDR_COMPUTE_CMASK_COORDFROMADDR_INPUT @@ -1248,6 +1268,8 @@ ADDR_E_RETURNCODE ADDR_API AddrComputeCmaskCoordFromAddr( const ADDR_COMPUTE_CMASK_COORDFROMADDR_INPUT* pIn, ADDR_COMPUTE_CMASK_COORDFROMADDR_OUTPUT* pOut); + + //////////////////////////////////////////////////////////////////////////////////////////////////// // F-mask functions //////////////////////////////////////////////////////////////////////////////////////////////////// @@ -1328,6 +1350,8 @@ ADDR_E_RETURNCODE ADDR_API AddrComputeFmaskInfo( const ADDR_COMPUTE_FMASK_INFO_INPUT* pIn, ADDR_COMPUTE_FMASK_INFO_OUTPUT* pOut); + + /** **************************************************************************************************** * ADDR_COMPUTE_FMASK_ADDRFROMCOORD_INPUT @@ -1404,6 +1428,8 @@ ADDR_E_RETURNCODE ADDR_API AddrComputeFmaskAddrFromCoord( const ADDR_COMPUTE_FMASK_ADDRFROMCOORD_INPUT* pIn, ADDR_COMPUTE_FMASK_ADDRFROMCOORD_OUTPUT* pOut); + + /** **************************************************************************************************** * ADDR_COMPUTE_FMASK_COORDFROMADDR_INPUT @@ -1477,6 +1503,8 @@ ADDR_E_RETURNCODE ADDR_API AddrComputeFmaskCoordFromAddr( const ADDR_COMPUTE_FMASK_COORDFROMADDR_INPUT* pIn, ADDR_COMPUTE_FMASK_COORDFROMADDR_OUTPUT* pOut); + + //////////////////////////////////////////////////////////////////////////////////////////////////// // Element/utility functions //////////////////////////////////////////////////////////////////////////////////////////////////// @@ -1565,6 +1593,7 @@ ADDR_E_RETURNCODE ADDR_API AddrExtractBankPipeSwizzle( const ADDR_EXTRACT_BANKPIPE_SWIZZLE_INPUT* pIn, ADDR_EXTRACT_BANKPIPE_SWIZZLE_OUTPUT* pOut); + /** **************************************************************************************************** * ADDR_COMBINE_BANKPIPE_SWIZZLE_INPUT @@ -1622,6 +1651,8 @@ ADDR_E_RETURNCODE ADDR_API AddrCombineBankPipeSwizzle( const ADDR_COMBINE_BANKPIPE_SWIZZLE_INPUT* pIn, ADDR_COMBINE_BANKPIPE_SWIZZLE_OUTPUT* pOut); + + /** **************************************************************************************************** * ADDR_COMPUTE_SLICESWIZZLE_INPUT @@ -1648,6 +1679,8 @@ typedef struct _ADDR_COMPUTE_SLICESWIZZLE_INPUT ///< README: When tileIndex is not -1, this must be valid } ADDR_COMPUTE_SLICESWIZZLE_INPUT; + + /** **************************************************************************************************** * ADDR_COMPUTE_SLICESWIZZLE_OUTPUT @@ -1678,6 +1711,7 @@ ADDR_E_RETURNCODE ADDR_API AddrComputeSliceSwizzle( const ADDR_COMPUTE_SLICESWIZZLE_INPUT* pIn, ADDR_COMPUTE_SLICESWIZZLE_OUTPUT* pOut); + /** **************************************************************************************************** * AddrSwizzleGenOption @@ -1768,6 +1802,8 @@ ADDR_E_RETURNCODE ADDR_API AddrComputeBaseSwizzle( const ADDR_COMPUTE_BASE_SWIZZLE_INPUT* pIn, ADDR_COMPUTE_BASE_SWIZZLE_OUTPUT* pOut); + + /** **************************************************************************************************** * ELEM_GETEXPORTNORM_INPUT @@ -1808,6 +1844,8 @@ BOOL_32 ADDR_API ElemGetExportNorm( ADDR_HANDLE hLib, const ELEM_GETEXPORTNORM_INPUT* pIn); + + /** **************************************************************************************************** * ELEM_FLT32TODEPTHPIXEL_INPUT @@ -1863,6 +1901,8 @@ ADDR_E_RETURNCODE ADDR_API ElemFlt32ToDepthPixel( const ELEM_FLT32TODEPTHPIXEL_INPUT* pIn, ELEM_FLT32TODEPTHPIXEL_OUTPUT* pOut); + + /** **************************************************************************************************** * ELEM_FLT32TOCOLORPIXEL_INPUT @@ -1989,6 +2029,8 @@ ADDR_E_RETURNCODE ADDR_API AddrConvertTileInfoToHW( const ADDR_CONVERT_TILEINFOTOHW_INPUT* pIn, ADDR_CONVERT_TILEINFOTOHW_OUTPUT* pOut); + + /** **************************************************************************************************** * ADDR_CONVERT_TILEINDEX_INPUT @@ -2113,6 +2155,8 @@ ADDR_E_RETURNCODE ADDR_API AddrConvertTileIndex1( const ADDR_CONVERT_TILEINDEX1_INPUT* pIn, ADDR_CONVERT_TILEINDEX_OUTPUT* pOut); + + /** **************************************************************************************************** * ADDR_GET_TILEINDEX_INPUT @@ -2158,6 +2202,8 @@ ADDR_E_RETURNCODE ADDR_API AddrGetTileIndex( const ADDR_GET_TILEINDEX_INPUT* pIn, ADDR_GET_TILEINDEX_OUTPUT* pOut); + + /** **************************************************************************************************** * ADDR_PRT_INFO_INPUT @@ -2202,6 +2248,8 @@ ADDR_E_RETURNCODE ADDR_API AddrComputePrtInfo( const ADDR_PRT_INFO_INPUT* pIn, ADDR_PRT_INFO_OUTPUT* pOut); + + //////////////////////////////////////////////////////////////////////////////////////////////////// // DCC key functions //////////////////////////////////////////////////////////////////////////////////////////////////// @@ -2262,6 +2310,8 @@ ADDR_E_RETURNCODE ADDR_API AddrComputeDccInfo( const ADDR_COMPUTE_DCCINFO_INPUT* pIn, ADDR_COMPUTE_DCCINFO_OUTPUT* pOut); + + /** **************************************************************************************************** * ADDR_GET_MAX_ALIGNMENTS_OUTPUT @@ -2325,6 +2375,7 @@ ADDR_E_RETURNCODE ADDR_API AddrGetMaxMetaAlignments( * **/ + //////////////////////////////////////////////////////////////////////////////////////////////////// // Surface functions for Gfx9 //////////////////////////////////////////////////////////////////////////////////////////////////// @@ -2488,6 +2539,8 @@ ADDR_E_RETURNCODE ADDR_API Addr2ComputeSurfaceInfo( const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn, ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut); + + /** **************************************************************************************************** * ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT @@ -2554,6 +2607,8 @@ ADDR_E_RETURNCODE ADDR_API Addr2ComputeSurfaceAddrFromCoord( const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn, ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT* pOut); + + /** **************************************************************************************************** * ADDR2_COMPUTE_SURFACE_COORDFROMADDR_INPUT @@ -2619,6 +2674,8 @@ ADDR_E_RETURNCODE ADDR_API Addr2ComputeSurfaceCoordFromAddr( const ADDR2_COMPUTE_SURFACE_COORDFROMADDR_INPUT* pIn, ADDR2_COMPUTE_SURFACE_COORDFROMADDR_OUTPUT* pOut); + + //////////////////////////////////////////////////////////////////////////////////////////////////// // HTile functions for Gfx9 //////////////////////////////////////////////////////////////////////////////////////////////////// @@ -2740,6 +2797,8 @@ ADDR_E_RETURNCODE ADDR_API Addr2ComputeHtileInfo( const ADDR2_COMPUTE_HTILE_INFO_INPUT* pIn, ADDR2_COMPUTE_HTILE_INFO_OUTPUT* pOut); + + /** **************************************************************************************************** * ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_INPUT @@ -2797,6 +2856,8 @@ ADDR_E_RETURNCODE ADDR_API Addr2ComputeHtileAddrFromCoord( const ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_INPUT* pIn, ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT* pOut); + + /** **************************************************************************************************** * ADDR2_COMPUTE_HTILE_COORDFROMADDR_INPUT @@ -2855,6 +2916,8 @@ ADDR_E_RETURNCODE ADDR_API Addr2ComputeHtileCoordFromAddr( const ADDR2_COMPUTE_HTILE_COORDFROMADDR_INPUT* pIn, ADDR2_COMPUTE_HTILE_COORDFROMADDR_OUTPUT* pOut); + + //////////////////////////////////////////////////////////////////////////////////////////////////// // C-mask functions for Gfx9 //////////////////////////////////////////////////////////////////////////////////////////////////// @@ -2924,6 +2987,8 @@ ADDR_E_RETURNCODE ADDR_API Addr2ComputeCmaskInfo( const ADDR2_COMPUTE_CMASK_INFO_INPUT* pIn, ADDR2_COMPUTE_CMASK_INFO_OUTPUT* pOut); + + /** **************************************************************************************************** * ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_INPUT @@ -2984,6 +3049,8 @@ ADDR_E_RETURNCODE ADDR_API Addr2ComputeCmaskAddrFromCoord( const ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_INPUT* pIn, ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT* pOut); + + /** **************************************************************************************************** * ADDR2_COMPUTE_CMASK_COORDFROMADDR_INPUT @@ -3041,6 +3108,8 @@ ADDR_E_RETURNCODE ADDR_API Addr2ComputeCmaskCoordFromAddr( const ADDR2_COMPUTE_CMASK_COORDFROMADDR_INPUT* pIn, ADDR2_COMPUTE_CMASK_COORDFROMADDR_OUTPUT* pOut); + + //////////////////////////////////////////////////////////////////////////////////////////////////// // F-mask functions for Gfx9 //////////////////////////////////////////////////////////////////////////////////////////////////// @@ -3123,6 +3192,8 @@ ADDR_E_RETURNCODE ADDR_API Addr2ComputeFmaskInfo( const ADDR2_COMPUTE_FMASK_INFO_INPUT* pIn, ADDR2_COMPUTE_FMASK_INFO_OUTPUT* pOut); + + /** **************************************************************************************************** * ADDR2_COMPUTE_FMASK_ADDRFROMCOORD_INPUT @@ -3182,6 +3253,8 @@ ADDR_E_RETURNCODE ADDR_API Addr2ComputeFmaskAddrFromCoord( const ADDR2_COMPUTE_FMASK_ADDRFROMCOORD_INPUT* pIn, ADDR2_COMPUTE_FMASK_ADDRFROMCOORD_OUTPUT* pOut); + + /** **************************************************************************************************** * ADDR2_COMPUTE_FMASK_COORDFROMADDR_INPUT @@ -3240,6 +3313,8 @@ ADDR_E_RETURNCODE ADDR_API Addr2ComputeFmaskCoordFromAddr( const ADDR2_COMPUTE_FMASK_COORDFROMADDR_INPUT* pIn, ADDR2_COMPUTE_FMASK_COORDFROMADDR_OUTPUT* pOut); + + //////////////////////////////////////////////////////////////////////////////////////////////////// // DCC key functions for Gfx9 //////////////////////////////////////////////////////////////////////////////////////////////////// @@ -3297,7 +3372,7 @@ typedef struct _ADDR2_COMPUTE_DCCINFO_OUTPUT UINT_32 metaBlkWidth; ///< DCC meta block width UINT_32 metaBlkHeight; ///< DCC meta block height UINT_32 metaBlkDepth; ///< DCC meta block depth - + UINT_32 metaBlkSize; ///< DCC meta block size in bytes UINT_32 metaBlkNumPerSlice; ///< Number of metablock within one slice union @@ -3325,6 +3400,7 @@ ADDR_E_RETURNCODE ADDR_API Addr2ComputeDccInfo( const ADDR2_COMPUTE_DCCINFO_INPUT* pIn, ADDR2_COMPUTE_DCCINFO_OUTPUT* pOut); + /** **************************************************************************************************** * ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT @@ -3458,6 +3534,7 @@ typedef struct _ADDR2_COMPUTE_SLICE_PIPEBANKXOR_INPUT UINT_32 size; ///< Size of this structure in bytes AddrSwizzleMode swizzleMode; ///< Surface swizzle mode AddrResourceType resourceType; ///< Surface resource type + UINT_32 bpe; ///< bits per element (e.g. block size for BCn format) UINT_32 basePipeBankXor; ///< Base pipe bank xor UINT_32 slice; ///< Slice id UINT_32 numSamples; ///< Number of samples @@ -3555,8 +3632,9 @@ typedef union _ADDR2_BLOCK_SET UINT_32 macroThin64KB : 1; // Thin 64KB for 2D/3D resource UINT_32 macroThick64KB : 1; // Thick 64KB for 3D resource UINT_32 var : 1; // VAR block + UINT_32 : 1; UINT_32 linear : 1; // Linear block - UINT_32 reserved : 25; + UINT_32 reserved : 24; }; UINT_32 value; @@ -3608,10 +3686,10 @@ typedef union _ADDR2_SWMODE_SET UINT_32 sw64KB_S : 1; UINT_32 sw64KB_D : 1; UINT_32 sw64KB_R : 1; - UINT_32 swReserved0 : 1; - UINT_32 swReserved1 : 1; - UINT_32 swReserved2 : 1; - UINT_32 swReserved3 : 1; + UINT_32 swMiscDef12 : 1; + UINT_32 swMiscDef13 : 1; + UINT_32 swMiscDef14 : 1; + UINT_32 swMiscDef15 : 1; UINT_32 sw64KB_Z_T : 1; UINT_32 sw64KB_S_T : 1; UINT_32 sw64KB_D_T : 1; @@ -3624,11 +3702,19 @@ typedef union _ADDR2_SWMODE_SET UINT_32 sw64KB_S_X : 1; UINT_32 sw64KB_D_X : 1; UINT_32 sw64KB_R_X : 1; + UINT_32 swMiscDef28 : 1; + UINT_32 swMiscDef29 : 1; + UINT_32 swMiscDef30 : 1; + UINT_32 swMiscDef31 : 1; + }; + + struct + { + UINT_32 : 28; UINT_32 swVar_Z_X : 1; - UINT_32 swReserved4 : 1; - UINT_32 swReserved5 : 1; + UINT_32 : 2; UINT_32 swVar_R_X : 1; - }; + } gfx10; UINT_32 value; } ADDR2_SWMODE_SET; diff --git a/src/amd/addrlib/inc/addrtypes.h b/src/amd/addrlib/inc/addrtypes.h index c17e817..8d552fd 100644 --- a/src/amd/addrlib/inc/addrtypes.h +++ b/src/amd/addrlib/inc/addrtypes.h @@ -88,7 +88,8 @@ typedef int INT; #ifndef ADDR_FASTCALL #if defined(__GNUC__) - #define ADDR_FASTCALL __attribute__((regparm(0))) + // We don't care about the performance of call instructions in addrlib + #define ADDR_FASTCALL #else #define ADDR_FASTCALL __fastcall #endif @@ -106,6 +107,7 @@ typedef int INT; #define GC_FASTCALL ADDR_FASTCALL #endif + #if defined(__GNUC__) #define ADDR_INLINE static inline // inline needs to be static to link #else @@ -113,11 +115,7 @@ typedef int INT; #define ADDR_INLINE __inline #endif // #if defined(__GNUC__) -#if defined(__amd64__) || defined(__x86_64__) || defined(__i386__) - #define ADDR_API ADDR_FASTCALL // default call convention is fast call -#else - #define ADDR_API -#endif +#define ADDR_API ADDR_FASTCALL //default call convention is fast call /** **************************************************************************************************** @@ -205,9 +203,10 @@ typedef enum _AddrTileMode * @note * * ADDR_SW_LINEAR linear aligned addressing mode, for 1D/2D/3D resource -* ADDR_SW_256B_* addressing block aligned size is 256B, for 2D/3D resource +* ADDR_SW_256B_* addressing block aligned size is 256B, for 2D resource * ADDR_SW_4KB_* addressing block aligned size is 4KB, for 2D/3D resource -* ADDR_SW_64KB_* addressing block aligned size is 64KB, for 2D/3D resource +* ADDR_SW_64KB_* addressing block aligned size is 64KB, for 1D/2D/3D resource +* ADDR_SW_VAR_* addressing block aligned size is ASIC specific * * ADDR_SW_*_Z For GFX9: - for 2D resource, represents Z-order swizzle mode for depth/stencil/FMask @@ -244,10 +243,10 @@ typedef enum _AddrSwizzleMode ADDR_SW_64KB_S = 9, ADDR_SW_64KB_D = 10, ADDR_SW_64KB_R = 11, - ADDR_SW_RESERVED0 = 12, - ADDR_SW_RESERVED1 = 13, - ADDR_SW_RESERVED2 = 14, - ADDR_SW_RESERVED3 = 15, + ADDR_SW_MISCDEF12 = 12, + ADDR_SW_MISCDEF13 = 13, + ADDR_SW_MISCDEF14 = 14, + ADDR_SW_MISCDEF15 = 15, ADDR_SW_64KB_Z_T = 16, ADDR_SW_64KB_S_T = 17, ADDR_SW_64KB_D_T = 18, @@ -260,12 +259,23 @@ typedef enum _AddrSwizzleMode ADDR_SW_64KB_S_X = 25, ADDR_SW_64KB_D_X = 26, ADDR_SW_64KB_R_X = 27, - ADDR_SW_VAR_Z_X = 28, - ADDR_SW_RESERVED4 = 29, - ADDR_SW_RESERVED5 = 30, - ADDR_SW_VAR_R_X = 31, + ADDR_SW_MISCDEF28 = 28, + ADDR_SW_MISCDEF29 = 29, + ADDR_SW_MISCDEF30 = 30, + ADDR_SW_MISCDEF31 = 31, ADDR_SW_LINEAR_GENERAL = 32, ADDR_SW_MAX_TYPE = 33, + + ADDR_SW_RESERVED0 = ADDR_SW_MISCDEF12, + ADDR_SW_RESERVED1 = ADDR_SW_MISCDEF13, + ADDR_SW_RESERVED2 = ADDR_SW_MISCDEF14, + ADDR_SW_RESERVED3 = ADDR_SW_MISCDEF15, + ADDR_SW_RESERVED4 = ADDR_SW_MISCDEF29, + ADDR_SW_RESERVED5 = ADDR_SW_MISCDEF30, + + ADDR_SW_VAR_Z_X = ADDR_SW_MISCDEF28, + ADDR_SW_VAR_R_X = ADDR_SW_MISCDEF31, + } AddrSwizzleMode; /** @@ -553,6 +563,7 @@ typedef enum _AddrHtileBlockSize ADDR_HTILE_BLOCKSIZE_8 = 8, } AddrHtileBlockSize; + /** **************************************************************************************************** * AddrPipeCfg @@ -715,6 +726,7 @@ typedef enum _AddrTileType #define ADDR64D "lld" OR "I64d" #endif + /// @brief Union for storing a 32-bit float or 32-bit integer /// @ingroup type /// @@ -730,6 +742,7 @@ typedef union { float f; } ADDR_FLT_32; + //////////////////////////////////////////////////////////////////////////////////////////////////// // // Macros for controlling linking and building on multiple systems diff --git a/src/amd/addrlib/meson.build b/src/amd/addrlib/meson.build index 6add6dc..db20504 100644 --- a/src/amd/addrlib/meson.build +++ b/src/amd/addrlib/meson.build @@ -55,6 +55,9 @@ files_addrlib = files( cpp_args_addrlib = [] if cpp.get_id() != 'msvc' cpp_args_addrlib += '-Wno-unused-variable' + cpp_args_addrlib += '-Wno-unused-local-typedefs' + cpp_args_addrlib += '-Wno-unused-but-set-variable' + cpp_args_addrlib += '-Wno-maybe-uninitialized' endif libamdgpu_addrlib = static_library( diff --git a/src/amd/addrlib/src/addrinterface.cpp b/src/amd/addrlib/src/addrinterface.cpp index 5256499..72f2ebe 100644 --- a/src/amd/addrlib/src/addrinterface.cpp +++ b/src/amd/addrlib/src/addrinterface.cpp @@ -36,8 +36,6 @@ #include "addrcommon.h" -#include "util/macros.h" - using namespace Addr; //////////////////////////////////////////////////////////////////////////////////////////////////// @@ -67,6 +65,8 @@ ADDR_E_RETURNCODE ADDR_API AddrCreate( return returnCode; } + + /** **************************************************************************************************** * AddrDestroy @@ -96,6 +96,8 @@ ADDR_E_RETURNCODE ADDR_API AddrDestroy( return returnCode; } + + //////////////////////////////////////////////////////////////////////////////////////////////////// // Surface functions //////////////////////////////////////////////////////////////////////////////////////////////////// @@ -132,6 +134,8 @@ ADDR_E_RETURNCODE ADDR_API AddrComputeSurfaceInfo( return returnCode; } + + /** **************************************************************************************************** * AddrComputeSurfaceAddrFromCoord @@ -196,6 +200,8 @@ ADDR_E_RETURNCODE ADDR_API AddrComputeSurfaceCoordFromAddr( return returnCode; } + + //////////////////////////////////////////////////////////////////////////////////////////////////// // HTile functions //////////////////////////////////////////////////////////////////////////////////////////////////// @@ -297,6 +303,8 @@ ADDR_E_RETURNCODE ADDR_API AddrComputeHtileCoordFromAddr( return returnCode; } + + //////////////////////////////////////////////////////////////////////////////////////////////////// // C-mask functions //////////////////////////////////////////////////////////////////////////////////////////////////// @@ -399,6 +407,8 @@ ADDR_E_RETURNCODE ADDR_API AddrComputeCmaskCoordFromAddr( return returnCode; } + + //////////////////////////////////////////////////////////////////////////////////////////////////// // F-mask functions //////////////////////////////////////////////////////////////////////////////////////////////////// @@ -499,6 +509,8 @@ ADDR_E_RETURNCODE ADDR_API AddrComputeFmaskCoordFromAddr( return returnCode; } + + //////////////////////////////////////////////////////////////////////////////////////////////////// // DCC key functions //////////////////////////////////////////////////////////////////////////////////////////////////// @@ -533,6 +545,8 @@ ADDR_E_RETURNCODE ADDR_API AddrComputeDccInfo( return returnCode; } + + /////////////////////////////////////////////////////////////////////////////// // Below functions are element related or helper functions /////////////////////////////////////////////////////////////////////////////// @@ -819,7 +833,7 @@ BOOL_32 ADDR_API ElemGetExportNorm( Addr::Lib* pLib = Lib::GetLib(hLib); BOOL_32 enabled = FALSE; - ASSERTED ADDR_E_RETURNCODE returnCode = ADDR_OK; + ADDR_E_RETURNCODE returnCode = ADDR_OK; if (pLib != NULL) { @@ -1118,6 +1132,7 @@ ADDR_E_RETURNCODE ADDR_API AddrGetMaxMetaAlignments( return returnCode; } + //////////////////////////////////////////////////////////////////////////////////////////////////// // Surface functions for Addr2 //////////////////////////////////////////////////////////////////////////////////////////////////// @@ -1154,6 +1169,7 @@ ADDR_E_RETURNCODE ADDR_API Addr2ComputeSurfaceInfo( return returnCode; } + /** **************************************************************************************************** * Addr2ComputeSurfaceAddrFromCoord @@ -1186,6 +1202,7 @@ ADDR_E_RETURNCODE ADDR_API Addr2ComputeSurfaceAddrFromCoord( return returnCode; } + /** **************************************************************************************************** * Addr2ComputeSurfaceCoordFromAddr @@ -1218,6 +1235,8 @@ ADDR_E_RETURNCODE ADDR_API Addr2ComputeSurfaceCoordFromAddr( return returnCode; } + + //////////////////////////////////////////////////////////////////////////////////////////////////// // HTile functions for Addr2 //////////////////////////////////////////////////////////////////////////////////////////////////// @@ -1254,6 +1273,7 @@ ADDR_E_RETURNCODE ADDR_API Addr2ComputeHtileInfo( return returnCode; } + /** **************************************************************************************************** * Addr2ComputeHtileAddrFromCoord @@ -1286,6 +1306,7 @@ ADDR_E_RETURNCODE ADDR_API Addr2ComputeHtileAddrFromCoord( return returnCode; } + /** **************************************************************************************************** * Addr2ComputeHtileCoordFromAddr @@ -1319,6 +1340,8 @@ ADDR_E_RETURNCODE ADDR_API Addr2ComputeHtileCoordFromAddr( return returnCode; } + + //////////////////////////////////////////////////////////////////////////////////////////////////// // C-mask functions for Addr2 //////////////////////////////////////////////////////////////////////////////////////////////////// @@ -1356,6 +1379,7 @@ ADDR_E_RETURNCODE ADDR_API Addr2ComputeCmaskInfo( return returnCode; } + /** **************************************************************************************************** * Addr2ComputeCmaskAddrFromCoord @@ -1388,6 +1412,7 @@ ADDR_E_RETURNCODE ADDR_API Addr2ComputeCmaskAddrFromCoord( return returnCode; } + /** **************************************************************************************************** * Addr2ComputeCmaskCoordFromAddr @@ -1421,6 +1446,8 @@ ADDR_E_RETURNCODE ADDR_API Addr2ComputeCmaskCoordFromAddr( return returnCode; } + + //////////////////////////////////////////////////////////////////////////////////////////////////// // F-mask functions for Addr2 //////////////////////////////////////////////////////////////////////////////////////////////////// @@ -1457,6 +1484,7 @@ ADDR_E_RETURNCODE ADDR_API Addr2ComputeFmaskInfo( return returnCode; } + /** **************************************************************************************************** * Addr2ComputeFmaskAddrFromCoord @@ -1489,6 +1517,7 @@ ADDR_E_RETURNCODE ADDR_API Addr2ComputeFmaskAddrFromCoord( return returnCode; } + /** **************************************************************************************************** * Addr2ComputeFmaskCoordFromAddr @@ -1521,6 +1550,8 @@ ADDR_E_RETURNCODE ADDR_API Addr2ComputeFmaskCoordFromAddr( return returnCode; } + + //////////////////////////////////////////////////////////////////////////////////////////////////// // DCC key functions for Addr2 //////////////////////////////////////////////////////////////////////////////////////////////////// @@ -1723,7 +1754,7 @@ ADDR_E_RETURNCODE ADDR_API Addr2IsValidDisplaySwizzleMode( if (pLib != NULL) { - ADDR2_COMPUTE_SURFACE_INFO_INPUT in; + ADDR2_COMPUTE_SURFACE_INFO_INPUT in = {0}; in.resourceType = ADDR_RSRC_TEX_2D; in.swizzleMode = swizzleMode; in.bpp = bpp; diff --git a/src/amd/addrlib/src/amdgpu_asic_addr.h b/src/amd/addrlib/src/amdgpu_asic_addr.h index 0c74673..02b241e 100644 --- a/src/amd/addrlib/src/amdgpu_asic_addr.h +++ b/src/amd/addrlib/src/amdgpu_asic_addr.h @@ -108,6 +108,7 @@ #define AMDGPU_RANGE_HELPER(val, min, max) ((val >= min) && (val < max)) #define AMDGPU_IN_RANGE(val, ...) AMDGPU_EXPAND_FIX(AMDGPU_RANGE_HELPER(val, __VA_ARGS__)) + // ASICREV_IS(eRevisionId, revisionName) #define ASICREV_IS(r, rn) AMDGPU_IN_RANGE(r, AMDGPU_##rn##_RANGE) #define ASICREV_IS_TAHITI_P(r) ASICREV_IS(r, TAHITI) @@ -148,8 +149,8 @@ #define ASICREV_IS_RENOIR(r) ASICREV_IS(r, RENOIR) #define ASICREV_IS_NAVI10_P(r) ASICREV_IS(r, NAVI10) -#define ASICREV_IS_NAVI12(r) ASICREV_IS(r, NAVI12) -#define ASICREV_IS_NAVI14(r) ASICREV_IS(r, NAVI14) +#define ASICREV_IS_NAVI12_P(r) ASICREV_IS(r, NAVI12) +#define ASICREV_IS_NAVI14_M(r) ASICREV_IS(r, NAVI14) #define ASICREV_IS_SIENNA_CICHLID(r) ASICREV_IS(r, SIENNA_CICHLID) #define ASICREV_IS_NAVY_FLOUNDER(r) ASICREV_IS(r, NAVY_FLOUNDER) #define ASICREV_IS_DIMGREY_CAVEFISH(r) ASICREV_IS(r, DIMGREY_CAVEFISH) diff --git a/src/amd/addrlib/src/chip/gfx10/gfx10_gb_reg.h b/src/amd/addrlib/src/chip/gfx10/gfx10_gb_reg.h index 93f2507..9a7f715 100644 --- a/src/amd/addrlib/src/chip/gfx10/gfx10_gb_reg.h +++ b/src/amd/addrlib/src/chip/gfx10/gfx10_gb_reg.h @@ -37,12 +37,10 @@ // // Make sure the necessary endian defines are there. // -#include "util/u_endian.h" - -#if UTIL_ARCH_LITTLE_ENDIAN -#define LITTLEENDIAN_CPU -#elif UTIL_ARCH_BIG_ENDIAN -#define BIGENDIAN_CPU +#if defined(LITTLEENDIAN_CPU) +#elif defined(BIGENDIAN_CPU) +#else +#error "BIGENDIAN_CPU or LITTLEENDIAN_CPU must be defined" #endif union GB_ADDR_CONFIG_gfx10 diff --git a/src/amd/addrlib/src/chip/gfx9/gfx9_gb_reg.h b/src/amd/addrlib/src/chip/gfx9/gfx9_gb_reg.h index 43e3738..988b7c0 100644 --- a/src/amd/addrlib/src/chip/gfx9/gfx9_gb_reg.h +++ b/src/amd/addrlib/src/chip/gfx9/gfx9_gb_reg.h @@ -37,12 +37,10 @@ // // Make sure the necessary endian defines are there. // -#include "util/u_endian.h" - -#if UTIL_ARCH_LITTLE_ENDIAN -#define LITTLEENDIAN_CPU -#elif UTIL_ARCH_BIG_ENDIAN -#define BIGENDIAN_CPU +#if defined(LITTLEENDIAN_CPU) +#elif defined(BIGENDIAN_CPU) +#else +#error "BIGENDIAN_CPU or LITTLEENDIAN_CPU must be defined" #endif union GB_ADDR_CONFIG_gfx9 { diff --git a/src/amd/addrlib/src/chip/r800/si_gb_reg.h b/src/amd/addrlib/src/chip/r800/si_gb_reg.h index e5af4fd..1a02335 100644 --- a/src/amd/addrlib/src/chip/r800/si_gb_reg.h +++ b/src/amd/addrlib/src/chip/r800/si_gb_reg.h @@ -38,12 +38,10 @@ // // Make sure the necessary endian defines are there. // -#include "util/u_endian.h" - -#if UTIL_ARCH_LITTLE_ENDIAN -#define LITTLEENDIAN_CPU -#elif UTIL_ARCH_BIG_ENDIAN -#define BIGENDIAN_CPU +#if defined(LITTLEENDIAN_CPU) +#elif defined(BIGENDIAN_CPU) +#else +#error "BIGENDIAN_CPU or LITTLEENDIAN_CPU must be defined" #endif /* @@ -114,7 +112,7 @@ typedef union { unsigned int num_banks : 2; unsigned int micro_tile_mode_new : 3; unsigned int sample_split : 2; - unsigned int : 5; + unsigned int alt_pipe_config : 5; } GB_TILE_MODE_T; typedef struct _GB_MACROTILE_MODE_T { @@ -122,13 +120,16 @@ typedef union { unsigned int bank_height : 2; unsigned int macro_tile_aspect : 2; unsigned int num_banks : 2; - unsigned int : 24; + unsigned int alt_bank_height : 2; + unsigned int alt_macro_tile_aspect : 2; + unsigned int alt_num_banks : 2; + unsigned int : 18; } GB_MACROTILE_MODE_T; #elif defined(BIGENDIAN_CPU) typedef struct _GB_TILE_MODE_T { - unsigned int : 5; + unsigned int alt_pipe_config : 5; unsigned int sample_split : 2; unsigned int micro_tile_mode_new : 3; unsigned int num_banks : 2; @@ -142,7 +143,10 @@ typedef union { } GB_TILE_MODE_T; typedef struct _GB_MACROTILE_MODE_T { - unsigned int : 24; + unsigned int : 18; + unsigned int alt_num_banks : 2; + unsigned int alt_macro_tile_aspect : 2; + unsigned int alt_bank_height : 2; unsigned int num_banks : 2; unsigned int macro_tile_aspect : 2; unsigned int bank_height : 2; diff --git a/src/amd/addrlib/src/core/addrcommon.h b/src/amd/addrlib/src/core/addrcommon.h index f2f8c6a..9b76c53 100644 --- a/src/amd/addrlib/src/core/addrcommon.h +++ b/src/amd/addrlib/src/core/addrcommon.h @@ -36,14 +36,6 @@ #include "addrinterface.h" -#if !defined(DEBUG) -#ifdef NDEBUG -#define DEBUG 0 -#else -#define DEBUG 1 -#endif -#endif - // ADDR_LNX_KERNEL_BUILD is for internal build // Moved from addrinterface.h so __KERNEL__ is not needed any more #if ADDR_LNX_KERNEL_BUILD // || (defined(__GNUC__) && defined(__KERNEL__)) @@ -55,6 +47,22 @@ #include #include "util/macros.h" +#include "util/u_endian.h" + +#if !defined(DEBUG) +#ifdef NDEBUG +#define DEBUG 0 +#else +#define DEBUG 1 +#endif +#endif + +#if UTIL_ARCH_LITTLE_ENDIAN +#define LITTLEENDIAN_CPU +#elif UTIL_ARCH_BIG_ENDIAN +#define BIGENDIAN_CPU +#endif + //////////////////////////////////////////////////////////////////////////////////////////////////// // Platform specific debug break defines @@ -68,7 +76,7 @@ #define ADDR_DBG_BREAK() { __debugbreak(); } #endif #else - #define ADDR_DBG_BREAK() do {} while(0) + #define ADDR_DBG_BREAK() #endif //////////////////////////////////////////////////////////////////////////////////////////////////// @@ -81,10 +89,29 @@ #define ADDR_ANALYSIS_ASSUME(expr) do { (void)(expr); } while (0) #endif -#define ADDR_ASSERT(__e) assert(__e) -#define ADDR_ASSERT_ALWAYS() ADDR_DBG_BREAK() -#define ADDR_UNHANDLED_CASE() ADDR_ASSERT(!"Unhandled case") -#define ADDR_NOT_IMPLEMENTED() ADDR_ASSERT(!"Not implemented"); +#if DEBUG + #if defined( _WIN32 ) + #define ADDR_ASSERT(__e) \ + { \ + ADDR_ANALYSIS_ASSUME(__e); \ + if ( !((__e) ? TRUE : FALSE)) { ADDR_DBG_BREAK(); } \ + } + #else + #define ADDR_ASSERT(__e) if ( !((__e) ? TRUE : FALSE)) { ADDR_DBG_BREAK(); } + #endif + #define ADDR_ASSERT_ALWAYS() ADDR_DBG_BREAK() + #define ADDR_UNHANDLED_CASE() ADDR_ASSERT(!"Unhandled case") + #define ADDR_NOT_IMPLEMENTED() ADDR_ASSERT(!"Not implemented"); +#else //DEBUG + #if defined( _WIN32 ) + #define ADDR_ASSERT(__e) { ADDR_ANALYSIS_ASSUME(__e); } + #else + #define ADDR_ASSERT(__e) + #endif + #define ADDR_ASSERT_ALWAYS() + #define ADDR_UNHANDLED_CASE() + #define ADDR_NOT_IMPLEMENTED() +#endif //DEBUG //////////////////////////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////////////////////////// @@ -107,6 +134,7 @@ #define ADDR_INFO(cond, a) \ { if (!(cond)) { ADDR_PRNT(a); } } + /// @brief Macro for reporting error warning messages /// @ingroup util /// @@ -125,6 +153,7 @@ ADDR_PRNT((" WARNING in file %s, line %d\n", __FILE__, __LINE__)); \ } } + /// @brief Macro for reporting fatal error conditions /// @ingroup util /// @@ -147,20 +176,24 @@ #define ADDRDPF 1 ? (void)0 : (void) -#define ADDR_PRNT(a) do {} while(0) +#define ADDR_PRNT(a) -#define ADDR_DBG_BREAK() do {} while(0) +#define ADDR_DBG_BREAK() -#define ADDR_INFO(cond, a) do {} while(0) +#define ADDR_INFO(cond, a) -#define ADDR_WARN(cond, a) do {} while(0) +#define ADDR_WARN(cond, a) -#define ADDR_EXIT(cond, a) do {} while(0) +#define ADDR_EXIT(cond, a) #endif // DEBUG //////////////////////////////////////////////////////////////////////////////////////////////////// -#define ADDR_C_ASSERT(__e) STATIC_ASSERT(__e) +#if defined(static_assert) +#define ADDR_C_ASSERT(__e) static_assert(__e, "") +#else +#define ADDR_C_ASSERT(__e) typedef char __ADDR_C_ASSERT__[(__e) ? 1 : -1] +#endif namespace Addr { @@ -208,21 +241,6 @@ static const UINT_32 MaxSurfaceHeight = 16384; /** **************************************************************************************************** -* @brief Enums to identify AddrLib type -**************************************************************************************************** -*/ -enum LibClass -{ - BASE_ADDRLIB = 0x0, - R600_ADDRLIB = 0x6, - R800_ADDRLIB = 0x8, - SI_ADDRLIB = 0xa, - CI_ADDRLIB = 0xb, - AI_ADDRLIB = 0xd, -}; - -/** -**************************************************************************************************** * ChipFamily * * @brief @@ -270,8 +288,9 @@ union ConfigFlags UINT_32 disableLinearOpt : 1; ///< Disallow tile modes to be optimized to linear UINT_32 use32bppFor422Fmt : 1; ///< View 422 formats as 32 bits per pixel element UINT_32 forceDccAndTcCompat : 1; ///< Force enable DCC and TC compatibility - UINT_32 nonPower2MemConfig : 1; ///< Physical video memory size is not power of 2 - UINT_32 reserved : 19; ///< Reserved bits for future use + UINT_32 nonPower2MemConfig : 1; ///< Video memory bit width is not power of 2 + UINT_32 enableAltTiling : 1; ///< Enable alt tile mode + UINT_32 reserved : 18; ///< Reserved bits for future use }; UINT_32 value; @@ -855,6 +874,7 @@ static inline VOID InitChannel( pChanSet->index = index; } + /** **************************************************************************************************** * InitChannel diff --git a/src/amd/addrlib/src/core/addrelemlib.cpp b/src/amd/addrlib/src/core/addrelemlib.cpp index 27afb59..9279aff 100644 --- a/src/amd/addrlib/src/core/addrelemlib.cpp +++ b/src/amd/addrlib/src/core/addrelemlib.cpp @@ -347,6 +347,7 @@ VOID ElemLib::Int32sToPixel( UINT_32 elemMask=0; UINT_32 elementXor = 0; // address xor when reading bytes from elements + // @@ NOTE: assert if called on a compressed format! if (properties.byteAligned) // Components are all byte-sized @@ -1272,7 +1273,6 @@ VOID ElemLib::RestoreSurfaceInfo( UINT_32 bpp; BOOL_32 bBCnFormat = FALSE; - (void)bBCnFormat; ADDR_ASSERT(pBpp != NULL); ADDR_ASSERT(pWidth != NULL && pHeight != NULL); @@ -1745,6 +1745,7 @@ BOOL_32 ElemLib::IsBlockCompressed( ((format >= ADDR_FMT_ASTC_4x4) && (format <= ADDR_FMT_ETC2_128BPP))); } + /** **************************************************************************************************** * ElemLib::IsCompressed diff --git a/src/amd/addrlib/src/core/addrlib.cpp b/src/amd/addrlib/src/core/addrlib.cpp index 6696a97..bb431c8 100644 --- a/src/amd/addrlib/src/core/addrlib.cpp +++ b/src/amd/addrlib/src/core/addrlib.cpp @@ -98,7 +98,6 @@ namespace Addr **************************************************************************************************** */ Lib::Lib() : - m_class(BASE_ADDRLIB), m_chipFamily(ADDR_CHIP_FAMILY_IVLD), m_chipRevision(0), m_version(ADDRLIB_VERSION), @@ -124,7 +123,6 @@ Lib::Lib() : */ Lib::Lib(const Client* pClient) : Object(pClient), - m_class(BASE_ADDRLIB), m_chipFamily(ADDR_CHIP_FAMILY_IVLD), m_chipRevision(0), m_version(ADDRLIB_VERSION), @@ -157,6 +155,7 @@ Lib::~Lib() } } + //////////////////////////////////////////////////////////////////////////////////////////////////// // Initialization/Helper //////////////////////////////////////////////////////////////////////////////////////////////////// @@ -224,8 +223,6 @@ ADDR_E_RETURNCODE Lib::Create( pLib = Gfx9HwlInit(&client); break; case FAMILY_NV: - pLib = Gfx10HwlInit(&client); - break; case FAMILY_VGH: pLib = Gfx10HwlInit(&client); break; @@ -254,6 +251,7 @@ ADDR_E_RETURNCODE Lib::Create( pLib->m_configFlags.allowLargeThickTile = pCreateIn->createFlags.allowLargeThickTile; pLib->m_configFlags.forceDccAndTcCompat = pCreateIn->createFlags.forceDccAndTcCompat; pLib->m_configFlags.nonPower2MemConfig = pCreateIn->createFlags.nonPower2MemConfig; + pLib->m_configFlags.enableAltTiling = pCreateIn->createFlags.enableAltTiling; pLib->m_configFlags.disableLinearOpt = FALSE; pLib->SetChipFamily(pCreateIn->chipFamily, pCreateIn->chipRevision); @@ -493,10 +491,12 @@ UINT_32 Lib::Bits2Number( return number; } + //////////////////////////////////////////////////////////////////////////////////////////////////// // Element lib //////////////////////////////////////////////////////////////////////////////////////////////////// + /** **************************************************************************************************** * Lib::Flt32ToColorPixel @@ -612,6 +612,7 @@ ADDR_E_RETURNCODE Lib::Flt32ToColorPixel( return returnCode; } + /** **************************************************************************************************** * Lib::GetExportNorm diff --git a/src/amd/addrlib/src/core/addrlib.h b/src/amd/addrlib/src/core/addrlib.h index c1510d6..79895ac 100644 --- a/src/amd/addrlib/src/core/addrlib.h +++ b/src/amd/addrlib/src/core/addrlib.h @@ -370,8 +370,6 @@ private: VOID SetMaxAlignments(); protected: - LibClass m_class; ///< Store class type (HWL type) - ChipFamily m_chipFamily; ///< Chip family translated from the one in atiid.h UINT_32 m_chipRevision; ///< Revision id from xxx_id.h diff --git a/src/amd/addrlib/src/core/addrlib1.cpp b/src/amd/addrlib/src/core/addrlib1.cpp index 0704e0f..1622c5b 100644 --- a/src/amd/addrlib/src/core/addrlib1.cpp +++ b/src/amd/addrlib/src/core/addrlib1.cpp @@ -147,10 +147,12 @@ Lib* Lib::GetLib( return static_cast(hLib); } + //////////////////////////////////////////////////////////////////////////////////////////////////// // Surface Methods //////////////////////////////////////////////////////////////////////////////////////////////////// + /** **************************************************************************************************** * Lib::ComputeSurfaceInfo @@ -1228,6 +1230,8 @@ UINT_32 Lib::Thickness( return ModeFlags[tileMode].thickness; } + + //////////////////////////////////////////////////////////////////////////////////////////////////// // CMASK/HTILE //////////////////////////////////////////////////////////////////////////////////////////////////// @@ -2008,6 +2012,7 @@ ADDR_E_RETURNCODE Lib::ComputeCmaskInfo( *pPitchOut = (pitchIn + macroWidth - 1) & ~(macroWidth - 1); *pHeightOut = (heightIn + macroHeight - 1) & ~(macroHeight - 1); + sliceBytes = ComputeCmaskBytes(*pPitchOut, *pHeightOut, 1); @@ -2186,7 +2191,6 @@ VOID Lib::HwlComputeXmaskCoordFromAddr( UINT_32 pipe; UINT_32 numPipes; UINT_32 numGroupBits; - (void)numGroupBits; UINT_32 numPipeBits; UINT_32 macroTilePitch; UINT_32 macroTileHeight; @@ -2235,6 +2239,7 @@ VOID Lib::HwlComputeXmaskCoordFromAddr( UINT_32 groupBits = 8 * m_pipeInterleaveBytes; UINT_32 pipes = numPipes; + // // Compute the micro tile size, in bits. And macro tile pitch and height. // @@ -2287,17 +2292,20 @@ VOID Lib::HwlComputeXmaskCoordFromAddr( pitch = pitchAligned; height = heightAligned; + // // Convert byte address to bit address. // bitAddr = BYTES_TO_BITS(addr) + bitPosition; + // // Remove pipe bits from address. // bitAddr = (bitAddr % groupBits) + ((bitAddr/groupBits/pipes)*groupBits); + elemOffset = bitAddr / elemBits; tilesPerMacro = (macroTilePitch/factor) * macroTileHeight / MicroTilePixels >> numPipeBits; @@ -2315,6 +2323,7 @@ VOID Lib::HwlComputeXmaskCoordFromAddr( macroY = static_cast((macroNumber % macrosPerSlice) / macrosPerPitch); macroZ = static_cast((macroNumber / macrosPerSlice)); + microX = microNumber % (macroTilePitch / factor / MicroTileWidth); microY = (microNumber / (macroTilePitch / factor / MicroTileHeight)); @@ -2325,6 +2334,7 @@ VOID Lib::HwlComputeXmaskCoordFromAddr( microTileCoordY = ComputeXmaskCoordYFromPipe(pipe, *pX/MicroTileWidth); + // // Assemble final coordinates. // @@ -2384,6 +2394,7 @@ UINT_64 Lib::HwlComputeXmaskAddrFromCoord( UINT_64 offsetHi; UINT_64 groupMask; + UINT_32 elemBits = 0; UINT_32 numPipes = m_pipes; // This function is accessed prior to si only @@ -3346,6 +3357,7 @@ VOID Lib::PadDimensions( heightAlign); } + /** **************************************************************************************************** * Lib::HwlPreHandleBaseLvl3xPitch @@ -3407,6 +3419,7 @@ UINT_32 Lib::HwlPostHandleBaseLvl3xPitch( return expPitch; } + /** **************************************************************************************************** * Lib::IsMacroTiled @@ -3523,7 +3536,6 @@ VOID Lib::ComputeMipLevel( { // Check if HWL has handled BOOL_32 hwlHandled = FALSE; - (void)hwlHandled; if (ElemLib::IsBlockCompressed(pIn->format)) { @@ -3927,6 +3939,7 @@ VOID Lib::ComputeQbStereoInfo( // 1D surface on SI may break this rule, but we can force it to meet by checking .qbStereo. } + /** **************************************************************************************************** * Lib::ComputePrtInfo diff --git a/src/amd/addrlib/src/core/addrlib1.h b/src/amd/addrlib/src/core/addrlib1.h index 5411d1c..07bc54a 100644 --- a/src/amd/addrlib/src/core/addrlib1.h +++ b/src/amd/addrlib/src/core/addrlib1.h @@ -354,6 +354,7 @@ protected: const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn, UINT_32* pPitchAlign, UINT_32* pHeightAlign, UINT_32* pSizeAlign) const = 0; + virtual VOID HwlOverrideTileMode(ADDR_COMPUTE_SURFACE_INFO_INPUT* pInOut) const { // not supported in hwl layer diff --git a/src/amd/addrlib/src/core/addrlib2.cpp b/src/amd/addrlib/src/core/addrlib2.cpp index 0007e2b..f83e3a4 100644 --- a/src/amd/addrlib/src/core/addrlib2.cpp +++ b/src/amd/addrlib/src/core/addrlib2.cpp @@ -142,10 +142,12 @@ Lib* Lib::GetLib( return static_cast(hLib); } + //////////////////////////////////////////////////////////////////////////////////////////////////// // Surface Methods //////////////////////////////////////////////////////////////////////////////////////////////////// + /** ************************************************************************************************************************ * Lib::ComputeSurfaceInfo @@ -437,6 +439,7 @@ ADDR_E_RETURNCODE Lib::ComputeSurfaceCoordFromAddr( return returnCode; } + //////////////////////////////////////////////////////////////////////////////////////////////////// // CMASK/HTILE //////////////////////////////////////////////////////////////////////////////////////////////////// @@ -889,6 +892,15 @@ ADDR_E_RETURNCODE Lib::ComputeSlicePipeBankXor( { returnCode = ADDR_NOTSUPPORTED; } + else if ((pIn->bpe != 0) && + (pIn->bpe != 8) && + (pIn->bpe != 16) && + (pIn->bpe != 32) && + (pIn->bpe != 64) && + (pIn->bpe != 128)) + { + returnCode = ADDR_INVALIDPARAMS; + } else { returnCode = HwlComputeSlicePipeBankXor(pIn, pOut); @@ -1540,11 +1552,11 @@ Dim3d Lib::GetMipTailDim( { ADDR_ASSERT(IsThin(resourceType, swizzleMode)); +#if DEBUG // GFX9/GFX10 use different dimension shrinking logic for mipmap tail: say for 128KB block + 2BPE, the maximum // dimension of mipmap tail level will be [256W * 128H] on GFX9 ASICs and [128W * 256H] on GFX10 ASICs. Since // GFX10 is newer HWL so we make its implementation into base class, in order to save future change on new HWLs. // And assert log2BlkSize will always be an even value on GFX9, so we never need the logic wrapped by DEBUG... -#if DEBUG if ((log2BlkSize & 1) && (m_chipFamily == ADDR_CHIP_FAMILY_AI)) { // Should never go here... diff --git a/src/amd/addrlib/src/core/addrlib2.h b/src/amd/addrlib/src/core/addrlib2.h index 75fe644..7780458 100644 --- a/src/amd/addrlib/src/core/addrlib2.h +++ b/src/amd/addrlib/src/core/addrlib2.h @@ -88,15 +88,17 @@ struct Dim3d // Macro define resource block type enum AddrBlockType { - AddrBlockMicro = 0, // Resource uses 256B block - AddrBlockThin4KB = 1, // Resource uses thin 4KB block - AddrBlockThick4KB = 2, // Resource uses thick 4KB block - AddrBlockThin64KB = 3, // Resource uses thin 64KB block - AddrBlockThick64KB = 4, // Resource uses thick 64KB block - AddrBlockVar = 5, // Resource uses var block, only valid for GFX9 - AddrBlockLinear = 6, // Resource uses linear swizzle mode - - AddrBlockMaxTiledType = AddrBlockVar + 1, + AddrBlockMicro = 0, // Resource uses 256B block + AddrBlockThin4KB = 1, // Resource uses thin 4KB block + AddrBlockThick4KB = 2, // Resource uses thick 4KB block + AddrBlockThin64KB = 3, // Resource uses thin 64KB block + AddrBlockThick64KB = 4, // Resource uses thick 64KB block + AddrBlockThinVar = 5, // Resource uses thin var block + AddrBlockThickVar = 6, // Resource uses thick var block + AddrBlockLinear, // Resource uses linear swizzle mode + + AddrBlockMaxTiledType = AddrBlockLinear, + }; enum AddrSwSet @@ -119,6 +121,87 @@ const UINT_32 Log2Size64K = 16u; /** ************************************************************************************************************************ +* @brief Bit setting for swizzle pattern +************************************************************************************************************************ +*/ +union ADDR_BIT_SETTING +{ + struct + { + UINT_16 x; + UINT_16 y; + UINT_16 z; + UINT_16 s; + }; + UINT_64 value; +}; + +/** +************************************************************************************************************************ +* @brief Swizzle pattern information +************************************************************************************************************************ +*/ +struct ADDR_SW_PATINFO +{ + UINT_8 maxItemCount; + UINT_8 nibble01Idx; + UINT_16 nibble2Idx; + UINT_16 nibble3Idx; + UINT_8 nibble4Idx; +}; + +/** +************************************************************************************************************************ +* InitBit +* +* @brief +* Initialize bit setting value via a return value +************************************************************************************************************************ +*/ +#define InitBit(c, index) (1ull << ((c << 4) + index)) + +const UINT_64 X0 = InitBit(0, 0); +const UINT_64 X1 = InitBit(0, 1); +const UINT_64 X2 = InitBit(0, 2); +const UINT_64 X3 = InitBit(0, 3); +const UINT_64 X4 = InitBit(0, 4); +const UINT_64 X5 = InitBit(0, 5); +const UINT_64 X6 = InitBit(0, 6); +const UINT_64 X7 = InitBit(0, 7); +const UINT_64 X8 = InitBit(0, 8); +const UINT_64 X9 = InitBit(0, 9); +const UINT_64 X10 = InitBit(0, 10); +const UINT_64 X11 = InitBit(0, 11); + +const UINT_64 Y0 = InitBit(1, 0); +const UINT_64 Y1 = InitBit(1, 1); +const UINT_64 Y2 = InitBit(1, 2); +const UINT_64 Y3 = InitBit(1, 3); +const UINT_64 Y4 = InitBit(1, 4); +const UINT_64 Y5 = InitBit(1, 5); +const UINT_64 Y6 = InitBit(1, 6); +const UINT_64 Y7 = InitBit(1, 7); +const UINT_64 Y8 = InitBit(1, 8); +const UINT_64 Y9 = InitBit(1, 9); +const UINT_64 Y10 = InitBit(1, 10); +const UINT_64 Y11 = InitBit(1, 11); + +const UINT_64 Z0 = InitBit(2, 0); +const UINT_64 Z1 = InitBit(2, 1); +const UINT_64 Z2 = InitBit(2, 2); +const UINT_64 Z3 = InitBit(2, 3); +const UINT_64 Z4 = InitBit(2, 4); +const UINT_64 Z5 = InitBit(2, 5); +const UINT_64 Z6 = InitBit(2, 6); +const UINT_64 Z7 = InitBit(2, 7); +const UINT_64 Z8 = InitBit(2, 8); + +const UINT_64 S0 = InitBit(3, 0); +const UINT_64 S1 = InitBit(3, 1); +const UINT_64 S2 = InitBit(3, 2); + +/** +************************************************************************************************************************ * @brief This class contains asic independent address lib functionalities ************************************************************************************************************************ */ @@ -550,6 +633,7 @@ protected: return ADDR_NOTSUPPORTED; } + virtual ADDR_E_RETURNCODE HwlComputeSubResourceOffsetForSwizzlePattern( const ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_INPUT* pIn, ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_OUTPUT* pOut) const diff --git a/src/amd/addrlib/src/gfx10/gfx10SwizzlePattern.h b/src/amd/addrlib/src/gfx10/gfx10SwizzlePattern.h index 45d3809..49d3b2f 100644 --- a/src/amd/addrlib/src/gfx10/gfx10SwizzlePattern.h +++ b/src/amd/addrlib/src/gfx10/gfx10SwizzlePattern.h @@ -34,92 +34,12 @@ #ifndef __GFX10_SWIZZLE_PATTERN_H__ #define __GFX10_SWIZZLE_PATTERN_H__ + namespace Addr { namespace V2 { -/** -************************************************************************************************************************ -* @brief Bit setting for swizzle pattern -************************************************************************************************************************ -*/ -union ADDR_BIT_SETTING -{ - struct - { - UINT_16 x; - UINT_16 y; - UINT_16 z; - UINT_16 s; - }; - UINT_64 value; -}; - -/** -************************************************************************************************************************ -* @brief Swizzle pattern information -************************************************************************************************************************ -*/ -struct ADDR_SW_PATINFO -{ - UINT_8 maxItemCount; - UINT_8 nibble01Idx; - UINT_16 nibble2Idx; - UINT_16 nibble3Idx; - UINT_8 nibble4Idx; -}; - -/** -************************************************************************************************************************ -* InitBit -* -* @brief -* Initialize bit setting value via a return value -************************************************************************************************************************ -*/ -#define InitBit(c, index) (1ull << ((c << 4) + index)) - -const UINT_64 X0 = InitBit(0, 0); -const UINT_64 X1 = InitBit(0, 1); -const UINT_64 X2 = InitBit(0, 2); -const UINT_64 X3 = InitBit(0, 3); -const UINT_64 X4 = InitBit(0, 4); -const UINT_64 X5 = InitBit(0, 5); -const UINT_64 X6 = InitBit(0, 6); -const UINT_64 X7 = InitBit(0, 7); -const UINT_64 X8 = InitBit(0, 8); -const UINT_64 X9 = InitBit(0, 9); -const UINT_64 X10 = InitBit(0, 10); -const UINT_64 X11 = InitBit(0, 11); - -const UINT_64 Y0 = InitBit(1, 0); -const UINT_64 Y1 = InitBit(1, 1); -const UINT_64 Y2 = InitBit(1, 2); -const UINT_64 Y3 = InitBit(1, 3); -const UINT_64 Y4 = InitBit(1, 4); -const UINT_64 Y5 = InitBit(1, 5); -const UINT_64 Y6 = InitBit(1, 6); -const UINT_64 Y7 = InitBit(1, 7); -const UINT_64 Y8 = InitBit(1, 8); -const UINT_64 Y9 = InitBit(1, 9); -const UINT_64 Y10 = InitBit(1, 10); -const UINT_64 Y11 = InitBit(1, 11); - -const UINT_64 Z0 = InitBit(2, 0); -const UINT_64 Z1 = InitBit(2, 1); -const UINT_64 Z2 = InitBit(2, 2); -const UINT_64 Z3 = InitBit(2, 3); -const UINT_64 Z4 = InitBit(2, 4); -const UINT_64 Z5 = InitBit(2, 5); -const UINT_64 Z6 = InitBit(2, 6); -const UINT_64 Z7 = InitBit(2, 7); -const UINT_64 Z8 = InitBit(2, 8); - -const UINT_64 S0 = InitBit(3, 0); -const UINT_64 S1 = InitBit(3, 1); -const UINT_64 S2 = InitBit(3, 2); - -const ADDR_SW_PATINFO SW_256_S_PATINFO[] = +const ADDR_SW_PATINFO GFX10_SW_256_S_PATINFO[] = { { 1, 0, 0, 0, 0, } , // 1 pipes 1 bpe @ SW_256_S @ Navi1x { 1, 1, 0, 0, 0, } , // 1 pipes 2 bpe @ SW_256_S @ Navi1x @@ -158,7 +78,7 @@ const ADDR_SW_PATINFO SW_256_S_PATINFO[] = { 1, 4, 0, 0, 0, } , // 64 pipes 16 bpe @ SW_256_S @ Navi1x }; -const ADDR_SW_PATINFO SW_256_D_PATINFO[] = +const ADDR_SW_PATINFO GFX10_SW_256_D_PATINFO[] = { { 1, 5, 0, 0, 0, } , // 1 pipes 1 bpe @ SW_256_D @ Navi1x { 1, 1, 0, 0, 0, } , // 1 pipes 2 bpe @ SW_256_D @ Navi1x @@ -197,7 +117,7 @@ const ADDR_SW_PATINFO SW_256_D_PATINFO[] = { 1, 7, 0, 0, 0, } , // 64 pipes 16 bpe @ SW_256_D @ Navi1x }; -const ADDR_SW_PATINFO SW_4K_S_PATINFO[] = +const ADDR_SW_PATINFO GFX10_SW_4K_S_PATINFO[] = { { 1, 0, 1, 0, 0, } , // 1 pipes 1 bpe @ SW_4K_S @ Navi1x { 1, 1, 2, 0, 0, } , // 1 pipes 2 bpe @ SW_4K_S @ Navi1x @@ -236,7 +156,7 @@ const ADDR_SW_PATINFO SW_4K_S_PATINFO[] = { 1, 4, 5, 0, 0, } , // 64 pipes 16 bpe @ SW_4K_S @ Navi1x }; -const ADDR_SW_PATINFO SW_4K_D_PATINFO[] = +const ADDR_SW_PATINFO GFX10_SW_4K_D_PATINFO[] = { { 1, 5, 1, 0, 0, } , // 1 pipes 1 bpe @ SW_4K_D @ Navi1x { 1, 1, 2, 0, 0, } , // 1 pipes 2 bpe @ SW_4K_D @ Navi1x @@ -275,7 +195,7 @@ const ADDR_SW_PATINFO SW_4K_D_PATINFO[] = { 1, 7, 5, 0, 0, } , // 64 pipes 16 bpe @ SW_4K_D @ Navi1x }; -const ADDR_SW_PATINFO SW_4K_S_X_PATINFO[] = +const ADDR_SW_PATINFO GFX10_SW_4K_S_X_PATINFO[] = { { 1, 0, 1, 0, 0, } , // 1 pipes 1 bpe @ SW_4K_S_X @ Navi1x { 1, 1, 2, 0, 0, } , // 1 pipes 2 bpe @ SW_4K_S_X @ Navi1x @@ -314,7 +234,7 @@ const ADDR_SW_PATINFO SW_4K_S_X_PATINFO[] = { 3, 4, 25, 0, 0, } , // 64 pipes 16 bpe @ SW_4K_S_X @ Navi1x }; -const ADDR_SW_PATINFO SW_4K_D_X_PATINFO[] = +const ADDR_SW_PATINFO GFX10_SW_4K_D_X_PATINFO[] = { { 1, 5, 1, 0, 0, } , // 1 pipes 1 bpe @ SW_4K_D_X @ Navi1x { 1, 1, 2, 0, 0, } , // 1 pipes 2 bpe @ SW_4K_D_X @ Navi1x @@ -353,7 +273,7 @@ const ADDR_SW_PATINFO SW_4K_D_X_PATINFO[] = { 3, 7, 25, 0, 0, } , // 64 pipes 16 bpe @ SW_4K_D_X @ Navi1x }; -const ADDR_SW_PATINFO SW_4K_S3_PATINFO[] = +const ADDR_SW_PATINFO GFX10_SW_4K_S3_PATINFO[] = { { 1, 29, 131, 0, 0, } , // 1 pipes 1 bpe @ SW_4K_S3 @ Navi1x { 1, 30, 132, 0, 0, } , // 1 pipes 2 bpe @ SW_4K_S3 @ Navi1x @@ -392,7 +312,7 @@ const ADDR_SW_PATINFO SW_4K_S3_PATINFO[] = { 1, 33, 135, 0, 0, } , // 64 pipes 16 bpe @ SW_4K_S3 @ Navi1x }; -const ADDR_SW_PATINFO SW_4K_S3_X_PATINFO[] = +const ADDR_SW_PATINFO GFX10_SW_4K_S3_X_PATINFO[] = { { 1, 29, 131, 0, 0, } , // 1 pipes 1 bpe @ SW_4K_S3_X @ Navi1x { 1, 30, 132, 0, 0, } , // 1 pipes 2 bpe @ SW_4K_S3_X @ Navi1x @@ -431,7 +351,7 @@ const ADDR_SW_PATINFO SW_4K_S3_X_PATINFO[] = { 3, 33, 155, 0, 0, } , // 64 pipes 16 bpe @ SW_4K_S3_X @ Navi1x }; -const ADDR_SW_PATINFO SW_64K_S_PATINFO[] = +const ADDR_SW_PATINFO GFX10_SW_64K_S_PATINFO[] = { { 1, 0, 1, 1, 0, } , // 1 pipes 1 bpe @ SW_64K_S @ Navi1x { 1, 1, 2, 2, 0, } , // 1 pipes 2 bpe @ SW_64K_S @ Navi1x @@ -470,7 +390,7 @@ const ADDR_SW_PATINFO SW_64K_S_PATINFO[] = { 1, 4, 5, 5, 0, } , // 64 pipes 16 bpe @ SW_64K_S @ Navi1x }; -const ADDR_SW_PATINFO SW_64K_D_PATINFO[] = +const ADDR_SW_PATINFO GFX10_SW_64K_D_PATINFO[] = { { 1, 5, 1, 1, 0, } , // 1 pipes 1 bpe @ SW_64K_D @ Navi1x { 1, 1, 2, 2, 0, } , // 1 pipes 2 bpe @ SW_64K_D @ Navi1x @@ -509,7 +429,7 @@ const ADDR_SW_PATINFO SW_64K_D_PATINFO[] = { 1, 7, 5, 5, 0, } , // 64 pipes 16 bpe @ SW_64K_D @ Navi1x }; -const ADDR_SW_PATINFO SW_64K_S_T_PATINFO[] = +const ADDR_SW_PATINFO GFX10_SW_64K_S_T_PATINFO[] = { { 1, 0, 1, 1, 0, } , // 1 pipes 1 bpe @ SW_64K_S_T @ Navi1x { 1, 1, 2, 2, 0, } , // 1 pipes 2 bpe @ SW_64K_S_T @ Navi1x @@ -548,7 +468,7 @@ const ADDR_SW_PATINFO SW_64K_S_T_PATINFO[] = { 2, 4, 5, 25, 0, } , // 64 pipes 16 bpe @ SW_64K_S_T @ Navi1x }; -const ADDR_SW_PATINFO SW_64K_D_T_PATINFO[] = +const ADDR_SW_PATINFO GFX10_SW_64K_D_T_PATINFO[] = { { 1, 5, 1, 1, 0, } , // 1 pipes 1 bpe @ SW_64K_D_T @ Navi1x { 1, 1, 2, 2, 0, } , // 1 pipes 2 bpe @ SW_64K_D_T @ Navi1x @@ -587,7 +507,7 @@ const ADDR_SW_PATINFO SW_64K_D_T_PATINFO[] = { 2, 7, 5, 25, 0, } , // 64 pipes 16 bpe @ SW_64K_D_T @ Navi1x }; -const ADDR_SW_PATINFO SW_64K_S_X_PATINFO[] = +const ADDR_SW_PATINFO GFX10_SW_64K_S_X_PATINFO[] = { { 1, 0, 1, 1, 0, } , // 1 pipes 1 bpe @ SW_64K_S_X @ Navi1x { 1, 1, 2, 2, 0, } , // 1 pipes 2 bpe @ SW_64K_S_X @ Navi1x @@ -626,7 +546,7 @@ const ADDR_SW_PATINFO SW_64K_S_X_PATINFO[] = { 3, 4, 35, 15, 0, } , // 64 pipes 16 bpe @ SW_64K_S_X @ Navi1x }; -const ADDR_SW_PATINFO SW_64K_D_X_PATINFO[] = +const ADDR_SW_PATINFO GFX10_SW_64K_D_X_PATINFO[] = { { 1, 5, 1, 1, 0, } , // 1 pipes 1 bpe @ SW_64K_D_X @ Navi1x { 1, 1, 2, 2, 0, } , // 1 pipes 2 bpe @ SW_64K_D_X @ Navi1x @@ -665,7 +585,7 @@ const ADDR_SW_PATINFO SW_64K_D_X_PATINFO[] = { 3, 7, 35, 15, 0, } , // 64 pipes 16 bpe @ SW_64K_D_X @ Navi1x }; -const ADDR_SW_PATINFO SW_64K_R_X_1xaa_PATINFO[] = +const ADDR_SW_PATINFO GFX10_SW_64K_R_X_1xaa_PATINFO[] = { { 1, 5, 1, 1, 0, } , // 1 pipes 1 bpe @ SW_64K_R_X 1xaa @ Navi1x { 1, 1, 2, 2, 0, } , // 1 pipes 2 bpe @ SW_64K_R_X 1xaa @ Navi1x @@ -704,7 +624,7 @@ const ADDR_SW_PATINFO SW_64K_R_X_1xaa_PATINFO[] = { 3, 7, 79, 41, 0, } , // 64 pipes 16 bpe @ SW_64K_R_X 1xaa @ Navi1x }; -const ADDR_SW_PATINFO SW_64K_R_X_2xaa_PATINFO[] = +const ADDR_SW_PATINFO GFX10_SW_64K_R_X_2xaa_PATINFO[] = { { 2, 5, 1, 99, 0, } , // 1 pipes 1 bpe @ SW_64K_R_X 2xaa @ Navi1x { 2, 1, 2, 100, 0, } , // 1 pipes 2 bpe @ SW_64K_R_X 2xaa @ Navi1x @@ -743,7 +663,7 @@ const ADDR_SW_PATINFO SW_64K_R_X_2xaa_PATINFO[] = { 3, 7, 115, 41, 0, } , // 64 pipes 16 bpe @ SW_64K_R_X 2xaa @ Navi1x }; -const ADDR_SW_PATINFO SW_64K_R_X_4xaa_PATINFO[] = +const ADDR_SW_PATINFO GFX10_SW_64K_R_X_4xaa_PATINFO[] = { { 2, 5, 1, 118, 0, } , // 1 pipes 1 bpe @ SW_64K_R_X 4xaa @ Navi1x { 2, 1, 2, 119, 0, } , // 1 pipes 2 bpe @ SW_64K_R_X 4xaa @ Navi1x @@ -782,7 +702,7 @@ const ADDR_SW_PATINFO SW_64K_R_X_4xaa_PATINFO[] = { 3, 7, 122, 41, 0, } , // 64 pipes 16 bpe @ SW_64K_R_X 4xaa @ Navi1x }; -const ADDR_SW_PATINFO SW_64K_R_X_8xaa_PATINFO[] = +const ADDR_SW_PATINFO GFX10_SW_64K_R_X_8xaa_PATINFO[] = { { 2, 5, 1, 134, 0, } , // 1 pipes 1 bpe @ SW_64K_R_X 8xaa @ Navi1x { 2, 1, 2, 135, 0, } , // 1 pipes 2 bpe @ SW_64K_R_X 8xaa @ Navi1x @@ -821,7 +741,7 @@ const ADDR_SW_PATINFO SW_64K_R_X_8xaa_PATINFO[] = { 3, 7, 130, 147, 0, } , // 64 pipes 16 bpe @ SW_64K_R_X 8xaa @ Navi1x }; -const ADDR_SW_PATINFO SW_64K_Z_X_1xaa_PATINFO[] = +const ADDR_SW_PATINFO GFX10_SW_64K_Z_X_1xaa_PATINFO[] = { { 1, 8, 1, 1, 0, } , // 1 pipes 1 bpe @ SW_64K_Z_X 1xaa @ Navi1x { 1, 9, 2, 2, 0, } , // 1 pipes 2 bpe @ SW_64K_Z_X 1xaa @ Navi1x @@ -860,7 +780,7 @@ const ADDR_SW_PATINFO SW_64K_Z_X_1xaa_PATINFO[] = { 3, 7, 79, 41, 0, } , // 64 pipes 16 bpe @ SW_64K_Z_X 1xaa @ Navi1x }; -const ADDR_SW_PATINFO SW_64K_Z_X_2xaa_PATINFO[] = +const ADDR_SW_PATINFO GFX10_SW_64K_Z_X_2xaa_PATINFO[] = { { 1, 13, 80, 42, 0, } , // 1 pipes 1 bpe @ SW_64K_Z_X 2xaa @ Navi1x { 1, 14, 3, 3, 0, } , // 1 pipes 2 bpe @ SW_64K_Z_X 2xaa @ Navi1x @@ -899,7 +819,7 @@ const ADDR_SW_PATINFO SW_64K_Z_X_2xaa_PATINFO[] = { 3, 17, 89, 61, 0, } , // 64 pipes 16 bpe @ SW_64K_Z_X 2xaa @ Navi1x }; -const ADDR_SW_PATINFO SW_64K_Z_X_4xaa_PATINFO[] = +const ADDR_SW_PATINFO GFX10_SW_64K_Z_X_4xaa_PATINFO[] = { { 1, 18, 3, 3, 0, } , // 1 pipes 1 bpe @ SW_64K_Z_X 4xaa @ Navi1x { 2, 19, 90, 62, 0, } , // 1 pipes 2 bpe @ SW_64K_Z_X 4xaa @ Navi1x @@ -938,7 +858,7 @@ const ADDR_SW_PATINFO SW_64K_Z_X_4xaa_PATINFO[] = { 3, 22, 100, 82, 0, } , // 64 pipes 16 bpe @ SW_64K_Z_X 4xaa @ Navi1x }; -const ADDR_SW_PATINFO SW_64K_Z_X_8xaa_PATINFO[] = +const ADDR_SW_PATINFO GFX10_SW_64K_Z_X_8xaa_PATINFO[] = { { 2, 23, 3, 43, 0, } , // 1 pipes 1 bpe @ SW_64K_Z_X 8xaa @ Navi1x { 2, 24, 3, 63, 0, } , // 1 pipes 2 bpe @ SW_64K_Z_X 8xaa @ Navi1x @@ -977,7 +897,7 @@ const ADDR_SW_PATINFO SW_64K_Z_X_8xaa_PATINFO[] = { 3, 27, 112, 98, 0, } , // 64 pipes 16 bpe @ SW_64K_Z_X 8xaa @ Navi1x }; -const ADDR_SW_PATINFO SW_64K_S3_PATINFO[] = +const ADDR_SW_PATINFO GFX10_SW_64K_S3_PATINFO[] = { { 1, 29, 131, 148, 0, } , // 1 pipes 1 bpe @ SW_64K_S3 @ Navi1x { 1, 30, 132, 149, 0, } , // 1 pipes 2 bpe @ SW_64K_S3 @ Navi1x @@ -1016,7 +936,7 @@ const ADDR_SW_PATINFO SW_64K_S3_PATINFO[] = { 1, 33, 135, 152, 0, } , // 64 pipes 16 bpe @ SW_64K_S3 @ Navi1x }; -const ADDR_SW_PATINFO SW_64K_S3_X_PATINFO[] = +const ADDR_SW_PATINFO GFX10_SW_64K_S3_X_PATINFO[] = { { 1, 29, 131, 148, 0, } , // 1 pipes 1 bpe @ SW_64K_S3_X @ Navi1x { 1, 30, 132, 149, 0, } , // 1 pipes 2 bpe @ SW_64K_S3_X @ Navi1x @@ -1055,7 +975,7 @@ const ADDR_SW_PATINFO SW_64K_S3_X_PATINFO[] = { 3, 33, 165, 162, 0, } , // 64 pipes 16 bpe @ SW_64K_S3_X @ Navi1x }; -const ADDR_SW_PATINFO SW_64K_S3_T_PATINFO[] = +const ADDR_SW_PATINFO GFX10_SW_64K_S3_T_PATINFO[] = { { 1, 29, 131, 148, 0, } , // 1 pipes 1 bpe @ SW_64K_S3_T @ Navi1x { 1, 30, 132, 149, 0, } , // 1 pipes 2 bpe @ SW_64K_S3_T @ Navi1x @@ -1094,7 +1014,7 @@ const ADDR_SW_PATINFO SW_64K_S3_T_PATINFO[] = { 3, 33, 135, 167, 0, } , // 64 pipes 16 bpe @ SW_64K_S3_T @ Navi1x }; -const ADDR_SW_PATINFO SW_64K_D3_X_PATINFO[] = +const ADDR_SW_PATINFO GFX10_SW_64K_D3_X_PATINFO[] = { { 1, 34, 131, 148, 0, } , // 1 pipes 1 bpe @ SW_64K_D3_X @ Navi1x { 1, 35, 132, 149, 0, } , // 1 pipes 2 bpe @ SW_64K_D3_X @ Navi1x @@ -1133,7 +1053,7 @@ const ADDR_SW_PATINFO SW_64K_D3_X_PATINFO[] = { 3, 38, 209, 182, 0, } , // 64 pipes 16 bpe @ SW_64K_D3_X @ Navi1x }; -const ADDR_SW_PATINFO SW_256_S_RBPLUS_PATINFO[] = +const ADDR_SW_PATINFO GFX10_SW_256_S_RBPLUS_PATINFO[] = { { 1, 0, 0, 0, 0, } , // 1 pipes (1 PKRs) 1 bpe @ SW_256_S @ RbPlus { 1, 1, 0, 0, 0, } , // 1 pipes (1 PKRs) 2 bpe @ SW_256_S @ RbPlus @@ -1212,7 +1132,7 @@ const ADDR_SW_PATINFO SW_256_S_RBPLUS_PATINFO[] = { 1, 4, 0, 0, 0, } , // 64 pipes (32 PKRs) 16 bpe @ SW_256_S @ RbPlus }; -const ADDR_SW_PATINFO SW_256_D_RBPLUS_PATINFO[] = +const ADDR_SW_PATINFO GFX10_SW_256_D_RBPLUS_PATINFO[] = { { 1, 5, 0, 0, 0, } , // 1 pipes (1 PKRs) 1 bpe @ SW_256_D @ RbPlus { 1, 1, 0, 0, 0, } , // 1 pipes (1 PKRs) 2 bpe @ SW_256_D @ RbPlus @@ -1291,7 +1211,7 @@ const ADDR_SW_PATINFO SW_256_D_RBPLUS_PATINFO[] = { 1, 7, 0, 0, 0, } , // 64 pipes (32 PKRs) 16 bpe @ SW_256_D @ RbPlus }; -const ADDR_SW_PATINFO SW_4K_S_RBPLUS_PATINFO[] = +const ADDR_SW_PATINFO GFX10_SW_4K_S_RBPLUS_PATINFO[] = { { 1, 0, 1, 0, 0, } , // 1 pipes (1 PKRs) 1 bpe @ SW_4K_S @ RbPlus { 1, 1, 2, 0, 0, } , // 1 pipes (1 PKRs) 2 bpe @ SW_4K_S @ RbPlus @@ -1370,7 +1290,7 @@ const ADDR_SW_PATINFO SW_4K_S_RBPLUS_PATINFO[] = { 1, 4, 5, 0, 0, } , // 64 pipes (32 PKRs) 16 bpe @ SW_4K_S @ RbPlus }; -const ADDR_SW_PATINFO SW_4K_D_RBPLUS_PATINFO[] = +const ADDR_SW_PATINFO GFX10_SW_4K_D_RBPLUS_PATINFO[] = { { 1, 5, 1, 0, 0, } , // 1 pipes (1 PKRs) 1 bpe @ SW_4K_D @ RbPlus { 1, 1, 2, 0, 0, } , // 1 pipes (1 PKRs) 2 bpe @ SW_4K_D @ RbPlus @@ -1449,7 +1369,7 @@ const ADDR_SW_PATINFO SW_4K_D_RBPLUS_PATINFO[] = { 1, 7, 5, 0, 0, } , // 64 pipes (32 PKRs) 16 bpe @ SW_4K_D @ RbPlus }; -const ADDR_SW_PATINFO SW_4K_S_X_RBPLUS_PATINFO[] = +const ADDR_SW_PATINFO GFX10_SW_4K_S_X_RBPLUS_PATINFO[] = { { 1, 0, 1, 0, 0, } , // 1 pipes (1 PKRs) 1 bpe @ SW_4K_S_X @ RbPlus { 1, 1, 2, 0, 0, } , // 1 pipes (1 PKRs) 2 bpe @ SW_4K_S_X @ RbPlus @@ -1528,7 +1448,7 @@ const ADDR_SW_PATINFO SW_4K_S_X_RBPLUS_PATINFO[] = { 3, 4, 244, 0, 0, } , // 64 pipes (32 PKRs) 16 bpe @ SW_4K_S_X @ RbPlus }; -const ADDR_SW_PATINFO SW_4K_D_X_RBPLUS_PATINFO[] = +const ADDR_SW_PATINFO GFX10_SW_4K_D_X_RBPLUS_PATINFO[] = { { 1, 5, 1, 0, 0, } , // 1 pipes (1 PKRs) 1 bpe @ SW_4K_D_X @ RbPlus { 1, 1, 2, 0, 0, } , // 1 pipes (1 PKRs) 2 bpe @ SW_4K_D_X @ RbPlus @@ -1607,7 +1527,7 @@ const ADDR_SW_PATINFO SW_4K_D_X_RBPLUS_PATINFO[] = { 3, 7, 244, 0, 0, } , // 64 pipes (32 PKRs) 16 bpe @ SW_4K_D_X @ RbPlus }; -const ADDR_SW_PATINFO SW_4K_S3_RBPLUS_PATINFO[] = +const ADDR_SW_PATINFO GFX10_SW_4K_S3_RBPLUS_PATINFO[] = { { 1, 29, 131, 0, 0, } , // 1 pipes (1 PKRs) 1 bpe @ SW_4K_S3 @ RbPlus { 1, 30, 132, 0, 0, } , // 1 pipes (1 PKRs) 2 bpe @ SW_4K_S3 @ RbPlus @@ -1686,7 +1606,7 @@ const ADDR_SW_PATINFO SW_4K_S3_RBPLUS_PATINFO[] = { 1, 33, 135, 0, 0, } , // 64 pipes (32 PKRs) 16 bpe @ SW_4K_S3 @ RbPlus }; -const ADDR_SW_PATINFO SW_4K_S3_X_RBPLUS_PATINFO[] = +const ADDR_SW_PATINFO GFX10_SW_4K_S3_X_RBPLUS_PATINFO[] = { { 1, 29, 131, 0, 0, } , // 1 pipes (1 PKRs) 1 bpe @ SW_4K_S3_X @ RbPlus { 1, 30, 132, 0, 0, } , // 1 pipes (1 PKRs) 2 bpe @ SW_4K_S3_X @ RbPlus @@ -1765,7 +1685,7 @@ const ADDR_SW_PATINFO SW_4K_S3_X_RBPLUS_PATINFO[] = { 3, 33, 155, 0, 0, } , // 64 pipes (32 PKRs) 16 bpe @ SW_4K_S3_X @ RbPlus }; -const ADDR_SW_PATINFO SW_64K_S_RBPLUS_PATINFO[] = +const ADDR_SW_PATINFO GFX10_SW_64K_S_RBPLUS_PATINFO[] = { { 1, 0, 1, 1, 0, } , // 1 pipes (1 PKRs) 1 bpe @ SW_64K_S @ RbPlus { 1, 1, 2, 2, 0, } , // 1 pipes (1 PKRs) 2 bpe @ SW_64K_S @ RbPlus @@ -1844,7 +1764,7 @@ const ADDR_SW_PATINFO SW_64K_S_RBPLUS_PATINFO[] = { 1, 4, 5, 5, 0, } , // 64 pipes (32 PKRs) 16 bpe @ SW_64K_S @ RbPlus }; -const ADDR_SW_PATINFO SW_64K_D_RBPLUS_PATINFO[] = +const ADDR_SW_PATINFO GFX10_SW_64K_D_RBPLUS_PATINFO[] = { { 1, 5, 1, 1, 0, } , // 1 pipes (1 PKRs) 1 bpe @ SW_64K_D @ RbPlus { 1, 1, 2, 2, 0, } , // 1 pipes (1 PKRs) 2 bpe @ SW_64K_D @ RbPlus @@ -1923,7 +1843,7 @@ const ADDR_SW_PATINFO SW_64K_D_RBPLUS_PATINFO[] = { 1, 7, 5, 5, 0, } , // 64 pipes (32 PKRs) 16 bpe @ SW_64K_D @ RbPlus }; -const ADDR_SW_PATINFO SW_64K_S_T_RBPLUS_PATINFO[] = +const ADDR_SW_PATINFO GFX10_SW_64K_S_T_RBPLUS_PATINFO[] = { { 1, 0, 1, 1, 0, } , // 1 pipes (1 PKRs) 1 bpe @ SW_64K_S_T @ RbPlus { 1, 1, 2, 2, 0, } , // 1 pipes (1 PKRs) 2 bpe @ SW_64K_S_T @ RbPlus @@ -2002,7 +1922,7 @@ const ADDR_SW_PATINFO SW_64K_S_T_RBPLUS_PATINFO[] = { 2, 4, 5, 25, 0, } , // 64 pipes (32 PKRs) 16 bpe @ SW_64K_S_T @ RbPlus }; -const ADDR_SW_PATINFO SW_64K_D_T_RBPLUS_PATINFO[] = +const ADDR_SW_PATINFO GFX10_SW_64K_D_T_RBPLUS_PATINFO[] = { { 1, 5, 1, 1, 0, } , // 1 pipes (1 PKRs) 1 bpe @ SW_64K_D_T @ RbPlus { 1, 1, 2, 2, 0, } , // 1 pipes (1 PKRs) 2 bpe @ SW_64K_D_T @ RbPlus @@ -2081,7 +2001,7 @@ const ADDR_SW_PATINFO SW_64K_D_T_RBPLUS_PATINFO[] = { 2, 7, 5, 25, 0, } , // 64 pipes (32 PKRs) 16 bpe @ SW_64K_D_T @ RbPlus }; -const ADDR_SW_PATINFO SW_64K_S_X_RBPLUS_PATINFO[] = +const ADDR_SW_PATINFO GFX10_SW_64K_S_X_RBPLUS_PATINFO[] = { { 1, 0, 1, 1, 0, } , // 1 pipes (1 PKRs) 1 bpe @ SW_64K_S_X @ RbPlus { 1, 1, 2, 2, 0, } , // 1 pipes (1 PKRs) 2 bpe @ SW_64K_S_X @ RbPlus @@ -2160,7 +2080,7 @@ const ADDR_SW_PATINFO SW_64K_S_X_RBPLUS_PATINFO[] = { 3, 4, 269, 15, 0, } , // 64 pipes (32 PKRs) 16 bpe @ SW_64K_S_X @ RbPlus }; -const ADDR_SW_PATINFO SW_64K_D_X_RBPLUS_PATINFO[] = +const ADDR_SW_PATINFO GFX10_SW_64K_D_X_RBPLUS_PATINFO[] = { { 1, 5, 1, 1, 0, } , // 1 pipes (1 PKRs) 1 bpe @ SW_64K_D_X @ RbPlus { 1, 1, 2, 2, 0, } , // 1 pipes (1 PKRs) 2 bpe @ SW_64K_D_X @ RbPlus @@ -2239,7 +2159,7 @@ const ADDR_SW_PATINFO SW_64K_D_X_RBPLUS_PATINFO[] = { 3, 7, 269, 15, 0, } , // 64 pipes (32 PKRs) 16 bpe @ SW_64K_D_X @ RbPlus }; -const ADDR_SW_PATINFO SW_64K_R_X_1xaa_RBPLUS_PATINFO[] = +const ADDR_SW_PATINFO GFX10_SW_64K_R_X_1xaa_RBPLUS_PATINFO[] = { { 2, 0, 347, 193, 0, } , // 1 pipes (1 PKRs) 1 bpe @ SW_64K_R_X 1xaa @ RbPlus { 2, 1, 348, 366, 0, } , // 1 pipes (1 PKRs) 2 bpe @ SW_64K_R_X 1xaa @ RbPlus @@ -2318,7 +2238,7 @@ const ADDR_SW_PATINFO SW_64K_R_X_1xaa_RBPLUS_PATINFO[] = { 3, 7, 324, 414, 0, } , // 64 pipes (32 PKRs) 16 bpe @ SW_64K_R_X 1xaa @ RbPlus }; -const ADDR_SW_PATINFO SW_64K_R_X_2xaa_RBPLUS_PATINFO[] = +const ADDR_SW_PATINFO GFX10_SW_64K_R_X_2xaa_RBPLUS_PATINFO[] = { { 3, 0, 424, 526, 0, } , // 1 pipes (1 PKRs) 1 bpe @ SW_64K_R_X 2xaa @ RbPlus { 3, 1, 348, 527, 0, } , // 1 pipes (1 PKRs) 2 bpe @ SW_64K_R_X 2xaa @ RbPlus @@ -2397,7 +2317,7 @@ const ADDR_SW_PATINFO SW_64K_R_X_2xaa_RBPLUS_PATINFO[] = { 3, 7, 429, 414, 0, } , // 64 pipes (32 PKRs) 16 bpe @ SW_64K_R_X 2xaa @ RbPlus }; -const ADDR_SW_PATINFO SW_64K_R_X_4xaa_RBPLUS_PATINFO[] = +const ADDR_SW_PATINFO GFX10_SW_64K_R_X_4xaa_RBPLUS_PATINFO[] = { { 3, 0, 347, 566, 0, } , // 1 pipes (1 PKRs) 1 bpe @ SW_64K_R_X 4xaa @ RbPlus { 3, 1, 348, 733, 0, } , // 1 pipes (1 PKRs) 2 bpe @ SW_64K_R_X 4xaa @ RbPlus @@ -2476,7 +2396,7 @@ const ADDR_SW_PATINFO SW_64K_R_X_4xaa_RBPLUS_PATINFO[] = { 3, 7, 441, 414, 0, } , // 64 pipes (32 PKRs) 16 bpe @ SW_64K_R_X 4xaa @ RbPlus }; -const ADDR_SW_PATINFO SW_64K_R_X_8xaa_RBPLUS_PATINFO[] = +const ADDR_SW_PATINFO GFX10_SW_64K_R_X_8xaa_RBPLUS_PATINFO[] = { { 3, 0, 424, 619, 0, } , // 1 pipes (1 PKRs) 1 bpe @ SW_64K_R_X 8xaa @ RbPlus { 3, 1, 348, 620, 0, } , // 1 pipes (1 PKRs) 2 bpe @ SW_64K_R_X 8xaa @ RbPlus @@ -2555,7 +2475,7 @@ const ADDR_SW_PATINFO SW_64K_R_X_8xaa_RBPLUS_PATINFO[] = { 3, 7, 458, 414, 0, } , // 64 pipes (32 PKRs) 16 bpe @ SW_64K_R_X 8xaa @ RbPlus }; -const ADDR_SW_PATINFO SW_64K_Z_X_1xaa_RBPLUS_PATINFO[] = +const ADDR_SW_PATINFO GFX10_SW_64K_Z_X_1xaa_RBPLUS_PATINFO[] = { { 2, 8, 347, 193, 0, } , // 1 pipes (1 PKRs) 1 bpe @ SW_64K_Z_X 1xaa @ RbPlus { 2, 9, 348, 366, 0, } , // 1 pipes (1 PKRs) 2 bpe @ SW_64K_Z_X 1xaa @ RbPlus @@ -2634,7 +2554,7 @@ const ADDR_SW_PATINFO SW_64K_Z_X_1xaa_RBPLUS_PATINFO[] = { 3, 7, 324, 414, 0, } , // 64 pipes (32 PKRs) 16 bpe @ SW_64K_Z_X 1xaa @ RbPlus }; -const ADDR_SW_PATINFO SW_64K_Z_X_2xaa_RBPLUS_PATINFO[] = +const ADDR_SW_PATINFO GFX10_SW_64K_Z_X_2xaa_RBPLUS_PATINFO[] = { { 2, 13, 357, 415, 0, } , // 1 pipes (1 PKRs) 1 bpe @ SW_64K_Z_X 2xaa @ RbPlus { 2, 14, 349, 195, 0, } , // 1 pipes (1 PKRs) 2 bpe @ SW_64K_Z_X 2xaa @ RbPlus @@ -2713,7 +2633,7 @@ const ADDR_SW_PATINFO SW_64K_Z_X_2xaa_RBPLUS_PATINFO[] = { 3, 17, 367, 414, 0, } , // 64 pipes (32 PKRs) 16 bpe @ SW_64K_Z_X 2xaa @ RbPlus }; -const ADDR_SW_PATINFO SW_64K_Z_X_4xaa_RBPLUS_PATINFO[] = +const ADDR_SW_PATINFO GFX10_SW_64K_Z_X_4xaa_RBPLUS_PATINFO[] = { { 2, 18, 349, 195, 0, } , // 1 pipes (1 PKRs) 1 bpe @ SW_64K_Z_X 4xaa @ RbPlus { 3, 19, 349, 447, 0, } , // 1 pipes (1 PKRs) 2 bpe @ SW_64K_Z_X 4xaa @ RbPlus @@ -2792,7 +2712,7 @@ const ADDR_SW_PATINFO SW_64K_Z_X_4xaa_RBPLUS_PATINFO[] = { 3, 22, 377, 414, 0, } , // 64 pipes (32 PKRs) 16 bpe @ SW_64K_Z_X 4xaa @ RbPlus }; -const ADDR_SW_PATINFO SW_64K_Z_X_8xaa_RBPLUS_PATINFO[] = +const ADDR_SW_PATINFO GFX10_SW_64K_Z_X_8xaa_RBPLUS_PATINFO[] = { { 3, 23, 358, 263, 0, } , // 1 pipes (1 PKRs) 1 bpe @ SW_64K_Z_X 8xaa @ RbPlus { 3, 24, 349, 448, 0, } , // 1 pipes (1 PKRs) 2 bpe @ SW_64K_Z_X 8xaa @ RbPlus @@ -2871,7 +2791,7 @@ const ADDR_SW_PATINFO SW_64K_Z_X_8xaa_RBPLUS_PATINFO[] = { 3, 27, 393, 414, 0, } , // 64 pipes (32 PKRs) 16 bpe @ SW_64K_Z_X 8xaa @ RbPlus }; -const ADDR_SW_PATINFO SW_64K_S3_RBPLUS_PATINFO[] = +const ADDR_SW_PATINFO GFX10_SW_64K_S3_RBPLUS_PATINFO[] = { { 1, 29, 131, 148, 0, } , // 1 pipes (1 PKRs) 1 bpe @ SW_64K_S3 @ RbPlus { 1, 30, 132, 149, 0, } , // 1 pipes (1 PKRs) 2 bpe @ SW_64K_S3 @ RbPlus @@ -2950,7 +2870,7 @@ const ADDR_SW_PATINFO SW_64K_S3_RBPLUS_PATINFO[] = { 1, 33, 135, 152, 0, } , // 64 pipes (32 PKRs) 16 bpe @ SW_64K_S3 @ RbPlus }; -const ADDR_SW_PATINFO SW_64K_S3_X_RBPLUS_PATINFO[] = +const ADDR_SW_PATINFO GFX10_SW_64K_S3_X_RBPLUS_PATINFO[] = { { 1, 29, 131, 148, 0, } , // 1 pipes (1 PKRs) 1 bpe @ SW_64K_S3_X @ RbPlus { 1, 30, 132, 149, 0, } , // 1 pipes (1 PKRs) 2 bpe @ SW_64K_S3_X @ RbPlus @@ -3029,7 +2949,7 @@ const ADDR_SW_PATINFO SW_64K_S3_X_RBPLUS_PATINFO[] = { 3, 33, 165, 162, 0, } , // 64 pipes (32 PKRs) 16 bpe @ SW_64K_S3_X @ RbPlus }; -const ADDR_SW_PATINFO SW_64K_S3_T_RBPLUS_PATINFO[] = +const ADDR_SW_PATINFO GFX10_SW_64K_S3_T_RBPLUS_PATINFO[] = { { 1, 29, 131, 148, 0, } , // 1 pipes (1 PKRs) 1 bpe @ SW_64K_S3_T @ RbPlus { 1, 30, 132, 149, 0, } , // 1 pipes (1 PKRs) 2 bpe @ SW_64K_S3_T @ RbPlus @@ -3108,7 +3028,7 @@ const ADDR_SW_PATINFO SW_64K_S3_T_RBPLUS_PATINFO[] = { 3, 33, 135, 167, 0, } , // 64 pipes (32 PKRs) 16 bpe @ SW_64K_S3_T @ RbPlus }; -const ADDR_SW_PATINFO SW_64K_D3_X_RBPLUS_PATINFO[] = +const ADDR_SW_PATINFO GFX10_SW_64K_D3_X_RBPLUS_PATINFO[] = { { 1, 34, 131, 148, 0, } , // 1 pipes (1 PKRs) 1 bpe @ SW_64K_D3_X @ RbPlus { 1, 35, 132, 149, 0, } , // 1 pipes (1 PKRs) 2 bpe @ SW_64K_D3_X @ RbPlus @@ -3187,7 +3107,7 @@ const ADDR_SW_PATINFO SW_64K_D3_X_RBPLUS_PATINFO[] = { 4, 38, 509, 841, 0, } , // 64 pipes (32 PKRs) 16 bpe @ SW_64K_D3_X @ RbPlus }; -const ADDR_SW_PATINFO SW_VAR_R_X_1xaa_RBPLUS_PATINFO[] = +const ADDR_SW_PATINFO GFX10_SW_VAR_R_X_1xaa_RBPLUS_PATINFO[] = { { 2, 0, 270, 183, 0, } , // 1 pipes (1 PKRs) 1 bpe @ SW_VAR_R_X 1xaa @ RbPlus { 2, 1, 271, 184, 0, } , // 1 pipes (1 PKRs) 2 bpe @ SW_VAR_R_X 1xaa @ RbPlus @@ -3266,7 +3186,7 @@ const ADDR_SW_PATINFO SW_VAR_R_X_1xaa_RBPLUS_PATINFO[] = { 3, 7, 311, 254, 44, } , // 64 pipes (32 PKRs) 16 bpe @ SW_VAR_R_X 1xaa @ RbPlus }; -const ADDR_SW_PATINFO SW_VAR_R_X_2xaa_RBPLUS_PATINFO[] = +const ADDR_SW_PATINFO GFX10_SW_VAR_R_X_2xaa_RBPLUS_PATINFO[] = { { 3, 0, 403, 516, 0, } , // 1 pipes (1 PKRs) 1 bpe @ SW_VAR_R_X 2xaa @ RbPlus { 3, 1, 271, 517, 0, } , // 1 pipes (1 PKRs) 2 bpe @ SW_VAR_R_X 2xaa @ RbPlus @@ -3345,7 +3265,7 @@ const ADDR_SW_PATINFO SW_VAR_R_X_2xaa_RBPLUS_PATINFO[] = { 3, 7, 318, 290, 158, } , // 64 pipes (32 PKRs) 16 bpe @ SW_VAR_R_X 2xaa @ RbPlus }; -const ADDR_SW_PATINFO SW_VAR_R_X_4xaa_RBPLUS_PATINFO[] = +const ADDR_SW_PATINFO GFX10_SW_VAR_R_X_4xaa_RBPLUS_PATINFO[] = { { 3, 0, 270, 556, 0, } , // 1 pipes (1 PKRs) 1 bpe @ SW_VAR_R_X 4xaa @ RbPlus { 3, 1, 271, 557, 0, } , // 1 pipes (1 PKRs) 2 bpe @ SW_VAR_R_X 4xaa @ RbPlus @@ -3424,7 +3344,7 @@ const ADDR_SW_PATINFO SW_VAR_R_X_4xaa_RBPLUS_PATINFO[] = { 3, 7, 324, 328, 204, } , // 64 pipes (32 PKRs) 16 bpe @ SW_VAR_R_X 4xaa @ RbPlus }; -const ADDR_SW_PATINFO SW_VAR_R_X_8xaa_RBPLUS_PATINFO[] = +const ADDR_SW_PATINFO GFX10_SW_VAR_R_X_8xaa_RBPLUS_PATINFO[] = { { 3, 0, 407, 610, 0, } , // 1 pipes (1 PKRs) 1 bpe @ SW_VAR_R_X 8xaa @ RbPlus { 3, 1, 408, 611, 0, } , // 1 pipes (1 PKRs) 2 bpe @ SW_VAR_R_X 8xaa @ RbPlus @@ -3503,7 +3423,7 @@ const ADDR_SW_PATINFO SW_VAR_R_X_8xaa_RBPLUS_PATINFO[] = { 3, 7, 344, 668, 204, } , // 64 pipes (32 PKRs) 16 bpe @ SW_VAR_R_X 8xaa @ RbPlus }; -const ADDR_SW_PATINFO SW_VAR_Z_X_1xaa_RBPLUS_PATINFO[] = +const ADDR_SW_PATINFO GFX10_SW_VAR_Z_X_1xaa_RBPLUS_PATINFO[] = { { 2, 8, 270, 183, 0, } , // 1 pipes (1 PKRs) 1 bpe @ SW_VAR_Z_X 1xaa @ RbPlus { 2, 9, 271, 184, 0, } , // 1 pipes (1 PKRs) 2 bpe @ SW_VAR_Z_X 1xaa @ RbPlus @@ -3582,7 +3502,7 @@ const ADDR_SW_PATINFO SW_VAR_Z_X_1xaa_RBPLUS_PATINFO[] = { 3, 7, 311, 254, 44, } , // 64 pipes (32 PKRs) 16 bpe @ SW_VAR_Z_X 1xaa @ RbPlus }; -const ADDR_SW_PATINFO SW_VAR_Z_X_2xaa_RBPLUS_PATINFO[] = +const ADDR_SW_PATINFO GFX10_SW_VAR_Z_X_2xaa_RBPLUS_PATINFO[] = { { 2, 13, 312, 255, 0, } , // 1 pipes (1 PKRs) 1 bpe @ SW_VAR_Z_X 2xaa @ RbPlus { 2, 14, 272, 185, 0, } , // 1 pipes (1 PKRs) 2 bpe @ SW_VAR_Z_X 2xaa @ RbPlus @@ -3661,7 +3581,7 @@ const ADDR_SW_PATINFO SW_VAR_Z_X_2xaa_RBPLUS_PATINFO[] = { 3, 17, 318, 290, 65, } , // 64 pipes (32 PKRs) 16 bpe @ SW_VAR_Z_X 2xaa @ RbPlus }; -const ADDR_SW_PATINFO SW_VAR_Z_X_4xaa_RBPLUS_PATINFO[] = +const ADDR_SW_PATINFO GFX10_SW_VAR_Z_X_4xaa_RBPLUS_PATINFO[] = { { 2, 18, 272, 185, 0, } , // 1 pipes (1 PKRs) 1 bpe @ SW_VAR_Z_X 4xaa @ RbPlus { 3, 19, 272, 291, 0, } , // 1 pipes (1 PKRs) 2 bpe @ SW_VAR_Z_X 4xaa @ RbPlus @@ -3740,7 +3660,7 @@ const ADDR_SW_PATINFO SW_VAR_Z_X_4xaa_RBPLUS_PATINFO[] = { 3, 22, 324, 328, 100, } , // 64 pipes (32 PKRs) 16 bpe @ SW_VAR_Z_X 4xaa @ RbPlus }; -const ADDR_SW_PATINFO SW_VAR_Z_X_8xaa_RBPLUS_PATINFO[] = +const ADDR_SW_PATINFO GFX10_SW_VAR_Z_X_8xaa_RBPLUS_PATINFO[] = { { 3, 23, 313, 256, 0, } , // 1 pipes (1 PKRs) 1 bpe @ SW_VAR_Z_X 8xaa @ RbPlus { 3, 24, 272, 292, 0, } , // 1 pipes (1 PKRs) 2 bpe @ SW_VAR_Z_X 8xaa @ RbPlus @@ -3819,6 +3739,7 @@ const ADDR_SW_PATINFO SW_VAR_Z_X_8xaa_RBPLUS_PATINFO[] = { 3, 27, 344, 365, 124, } , // 64 pipes (32 PKRs) 16 bpe @ SW_VAR_Z_X 8xaa @ RbPlus }; + const UINT_64 GFX10_SW_PATTERN_NIBBLE01[][8] = { {X0, X1, X2, X3, Y0, Y1, Y2, Y3, }, // 0 @@ -5466,7 +5387,7 @@ const UINT_64 GFX10_SW_PATTERN_NIBBLE4[][4] = {X3^Y8, S0^X7^Y7, S1^X6^Y7, 0, }, // 238 }; -const UINT_8 DCC_64K_R_X_PATIDX[] = +const UINT_8 GFX10_DCC_64K_R_X_PATIDX[] = { 0, // 1 pipes 1 bpe ua @ SW_64K_R_X 1xaa @ Navi1x 1, // 1 pipes 2 bpe ua @ SW_64K_R_X 1xaa @ Navi1x @@ -5520,7 +5441,7 @@ const UINT_8 DCC_64K_R_X_PATIDX[] = 37, // 64 pipes 16 bpe pa @ SW_64K_R_X 1xaa @ Navi1x }; -const UINT_8 HTILE_PATIDX[] = +const UINT_8 GFX10_HTILE_PATIDX[] = { 0, // 1xaa ua @ HTILE_64K @ Navi1x 0, // 2xaa ua @ HTILE_64K @ Navi1x @@ -5556,7 +5477,7 @@ const UINT_8 HTILE_PATIDX[] = 12, // 64 pipes 8xaa pa @ HTILE_64K @ Navi1x }; -const UINT_8 CMASK_64K_PATIDX[] = +const UINT_8 GFX10_CMASK_64K_PATIDX[] = { 0, // 1 bpe ua @ CMASK_64K @ Navi1x 0, // 2 bpe ua @ CMASK_64K @ Navi1x @@ -5592,7 +5513,7 @@ const UINT_8 CMASK_64K_PATIDX[] = 7, // 64 pipes 8 bpe pa @ CMASK_64K @ Navi1x }; -const UINT_8 DCC_64K_R_X_RBPLUS_PATIDX[] = +const UINT_8 GFX10_DCC_64K_R_X_RBPLUS_PATIDX[] = { 0, // 1 bpe ua @ SW_64K_R_X 1xaa @ RbPlus 1, // 2 bpe ua @ SW_64K_R_X 1xaa @ RbPlus @@ -5676,7 +5597,7 @@ const UINT_8 DCC_64K_R_X_RBPLUS_PATIDX[] = 107, // 64 pipes (32 PKRs) 16 bpe pa @ SW_64K_R_X 1xaa @ RbPlus }; -const UINT_8 HTILE_RBPLUS_PATIDX[] = +const UINT_8 GFX10_HTILE_RBPLUS_PATIDX[] = { 0, // 1xaa ua @ HTILE_64K @ RbPlus 0, // 2xaa ua @ HTILE_64K @ RbPlus @@ -5760,7 +5681,7 @@ const UINT_8 HTILE_RBPLUS_PATIDX[] = 29, // 64 pipes (32 PKRs) 8xaa pa @ HTILE_64K @ RbPlus }; -const UINT_8 CMASK_64K_RBPLUS_PATIDX[] = +const UINT_8 GFX10_CMASK_64K_RBPLUS_PATIDX[] = { 0, // 1 bpe ua @ CMASK_64K @ RbPlus 0, // 2 bpe ua @ CMASK_64K @ RbPlus @@ -5844,7 +5765,7 @@ const UINT_8 CMASK_64K_RBPLUS_PATIDX[] = 34, // 64 pipes (32 PKRs) 8 bpe pa @ CMASK_64K @ RbPlus }; -const UINT_8 CMASK_VAR_RBPLUS_PATIDX[] = +const UINT_8 GFX10_CMASK_VAR_RBPLUS_PATIDX[] = { 0, // 1 bpe ua @ CMASK_VAR @ RbPlus 0, // 2 bpe ua @ CMASK_VAR @ RbPlus @@ -5928,7 +5849,8 @@ const UINT_8 CMASK_VAR_RBPLUS_PATIDX[] = 31, // 64 pipes (32 PKRs) 8 bpe pa @ CMASK_VAR @ RbPlus }; -const UINT_64 DCC_64K_R_X_SW_PATTERN[][17] = + +const UINT_64 GFX10_DCC_64K_R_X_SW_PATTERN[][17] = { {0, X4, Y4, X5, Y5, X6, Y6, X7, Y7, X8, Y8, X9, Y9, 0, 0, 0, 0, }, //0 {0, Y3, X4, Y4, X5, Y5, X6, Y6, X7, Y7, X8, Y8, X9, 0, 0, 0, 0, }, //1 @@ -6040,7 +5962,7 @@ const UINT_64 DCC_64K_R_X_SW_PATTERN[][17] = {0, X3, Y3, X7, Y7, X8, Y8, X2, Y2, Y4^X9^Y9, Z2^X4^Y4, Z1^Y5^X8, Z0^X5^Y8, Y2^Y6^X7, X2^X6^Y7, 0, 0, }, //107 }; -const UINT_64 HTILE_SW_PATTERN[][18] = +const UINT_64 GFX10_HTILE_SW_PATTERN[][18] = { {0, 0, 0, X3, Y3, X4, Y4, X5, Y5, X6, Y6, X7, Y7, 0, 0, 0, 0, 0, }, //0 {0, 0, 0, X3, Y4, X4, X5, Y5, X6, Z0^X3^Y3, Y6, X7, Y7, 0, 0, 0, 0, 0, }, //1 @@ -6074,7 +5996,7 @@ const UINT_64 HTILE_SW_PATTERN[][18] = {0, 0, 0, X3, Y3, X7, Y7, X8, Y8, Y4^X9^Y9, Z1^X4^Y4, Z0^Y5^X8, X5^Y8, Y6^X7, X6^Y7, X9, Y9, X10, }, //29 }; -const UINT_64 CMASK_SW_PATTERN[][17] = +const UINT_64 GFX10_CMASK_SW_PATTERN[][17] = { {X3, Y3, X4, Y4, X5, Y5, X6, Y6, X7, Y7, X8, Y8, X9, 0, 0, 0, 0, }, //0 {X3, Y4, X4, X5, Y5, X6, Y6, X7, Y7, Z0^X3^Y3, X8, Y8, X9, 0, 0, 0, 0, }, //1 @@ -6113,7 +6035,7 @@ const UINT_64 CMASK_SW_PATTERN[][17] = {X3, Y3, X7, Y7, X8, Y8, X9, Y9, X10, Y4^X9^Y9, Z3^X4^Y4, Z2^Y5^X8, Z1^X5^Y8, Y6^X7, Z0^X6^Y7, 0, 0, }, //34 }; -} // V2 +}// V2 } // Addr #endif diff --git a/src/amd/addrlib/src/gfx10/gfx10addrlib.cpp b/src/amd/addrlib/src/gfx10/gfx10addrlib.cpp index 5583ce5..6f44e37 100644 --- a/src/amd/addrlib/src/gfx10/gfx10addrlib.cpp +++ b/src/amd/addrlib/src/gfx10/gfx10addrlib.cpp @@ -129,7 +129,6 @@ Gfx10Lib::Gfx10Lib(const Client* pClient) m_xmaskBaseIndex(0), m_dccBaseIndex(0) { - m_class = AI_ADDRLIB; memset(&m_settings, 0, sizeof(m_settings)); memcpy(m_swizzleModeTable, SwizzleModeTable, sizeof(SwizzleModeTable)); } @@ -395,96 +394,104 @@ ADDR_E_RETURNCODE Gfx10Lib::HwlComputeDccInfo( } else { - // only SW_*_R_X surfaces may be DCC compressed when attached to the CB - ADDR_ASSERT(IsRtOptSwizzle(pIn->swizzleMode)); - - Dim3d metaBlk = {0}; - const UINT_32 elemLog2 = Log2(pIn->bpp >> 3); - const UINT_32 numFragLog2 = Log2(pIn->numFrags); - const UINT_32 metaBlkSize = GetMetaBlkSize(Gfx10DataColor, - pIn->resourceType, - pIn->swizzleMode, - elemLog2, - numFragLog2, - pIn->dccKeyFlags.pipeAligned, - &metaBlk); - const BOOL_32 isThick = IsThick(pIn->resourceType, pIn->swizzleMode); + const UINT_32 elemLog2 = Log2(pIn->bpp >> 3); - pOut->compressBlkWidth = isThick ? Block256_3d[elemLog2].w : Block256_2d[elemLog2].w; - pOut->compressBlkHeight = isThick ? Block256_3d[elemLog2].h : Block256_2d[elemLog2].h; - pOut->compressBlkDepth = isThick ? Block256_3d[elemLog2].d : 1; + { + // only SW_*_R_X surfaces may be DCC compressed when attached to the CB + ADDR_ASSERT(IsRtOptSwizzle(pIn->swizzleMode)); - pOut->dccRamBaseAlign = metaBlkSize; - pOut->metaBlkWidth = metaBlk.w; - pOut->metaBlkHeight = metaBlk.h; - pOut->metaBlkDepth = metaBlk.d; + const BOOL_32 isThick = IsThick(pIn->resourceType, pIn->swizzleMode); - pOut->pitch = PowTwoAlign(pIn->unalignedWidth, metaBlk.w); - pOut->height = PowTwoAlign(pIn->unalignedHeight, metaBlk.h); - pOut->depth = PowTwoAlign(pIn->numSlices, metaBlk.d); + pOut->compressBlkWidth = isThick ? Block256_3d[elemLog2].w : Block256_2d[elemLog2].w; + pOut->compressBlkHeight = isThick ? Block256_3d[elemLog2].h : Block256_2d[elemLog2].h; + pOut->compressBlkDepth = isThick ? Block256_3d[elemLog2].d : 1; + } - if (pIn->numMipLevels > 1) + if (ret == ADDR_OK) { - ADDR_ASSERT(pIn->firstMipIdInTail <= pIn->numMipLevels); + Dim3d metaBlk = {0}; + const UINT_32 numFragLog2 = Log2(Max(pIn->numFrags, 1u)); + const UINT_32 metaBlkSize = GetMetaBlkSize(Gfx10DataColor, + pIn->resourceType, + pIn->swizzleMode, + elemLog2, + numFragLog2, + pIn->dccKeyFlags.pipeAligned, + &metaBlk); + + pOut->dccRamBaseAlign = metaBlkSize; + pOut->metaBlkWidth = metaBlk.w; + pOut->metaBlkHeight = metaBlk.h; + pOut->metaBlkDepth = metaBlk.d; + pOut->metaBlkSize = metaBlkSize; + + pOut->pitch = PowTwoAlign(pIn->unalignedWidth, metaBlk.w); + pOut->height = PowTwoAlign(pIn->unalignedHeight, metaBlk.h); + pOut->depth = PowTwoAlign(Max(pIn->numSlices, 1u), metaBlk.d); - UINT_32 offset = (pIn->firstMipIdInTail == pIn->numMipLevels) ? 0 : metaBlkSize; - - for (INT_32 i = static_cast(pIn->firstMipIdInTail) - 1; i >= 0; i--) + if (pIn->numMipLevels > 1) { - UINT_32 mipWidth, mipHeight; + ADDR_ASSERT(pIn->firstMipIdInTail <= pIn->numMipLevels); - GetMipSize(pIn->unalignedWidth, pIn->unalignedHeight, 1, i, &mipWidth, &mipHeight); + UINT_32 offset = (pIn->firstMipIdInTail == pIn->numMipLevels) ? 0 : metaBlkSize; - mipWidth = PowTwoAlign(mipWidth, metaBlk.w); - mipHeight = PowTwoAlign(mipHeight, metaBlk.h); + for (INT_32 i = static_cast(pIn->firstMipIdInTail) - 1; i >= 0; i--) + { + UINT_32 mipWidth, mipHeight; - const UINT_32 pitchInM = mipWidth / metaBlk.w; - const UINT_32 heightInM = mipHeight / metaBlk.h; - const UINT_32 mipSliceSize = pitchInM * heightInM * metaBlkSize; + GetMipSize(pIn->unalignedWidth, pIn->unalignedHeight, 1, i, &mipWidth, &mipHeight); - if (pOut->pMipInfo != NULL) - { - pOut->pMipInfo[i].inMiptail = FALSE; - pOut->pMipInfo[i].offset = offset; - pOut->pMipInfo[i].sliceSize = mipSliceSize; - } + mipWidth = PowTwoAlign(mipWidth, metaBlk.w); + mipHeight = PowTwoAlign(mipHeight, metaBlk.h); - offset += mipSliceSize; - } + const UINT_32 pitchInM = mipWidth / metaBlk.w; + const UINT_32 heightInM = mipHeight / metaBlk.h; + const UINT_32 mipSliceSize = pitchInM * heightInM * metaBlkSize; - pOut->dccRamSliceSize = offset; - pOut->metaBlkNumPerSlice = offset / metaBlkSize; - pOut->dccRamSize = pOut->dccRamSliceSize * (pOut->depth / metaBlk.d); + if (pOut->pMipInfo != NULL) + { + pOut->pMipInfo[i].inMiptail = FALSE; + pOut->pMipInfo[i].offset = offset; + pOut->pMipInfo[i].sliceSize = mipSliceSize; + } - if (pOut->pMipInfo != NULL) - { - for (UINT_32 i = pIn->firstMipIdInTail; i < pIn->numMipLevels; i++) - { - pOut->pMipInfo[i].inMiptail = TRUE; - pOut->pMipInfo[i].offset = 0; - pOut->pMipInfo[i].sliceSize = 0; + offset += mipSliceSize; } - if (pIn->firstMipIdInTail != pIn->numMipLevels) + pOut->dccRamSliceSize = offset; + pOut->metaBlkNumPerSlice = offset / metaBlkSize; + pOut->dccRamSize = pOut->dccRamSliceSize * (pOut->depth / metaBlk.d); + + if (pOut->pMipInfo != NULL) { - pOut->pMipInfo[pIn->firstMipIdInTail].sliceSize = metaBlkSize; + for (UINT_32 i = pIn->firstMipIdInTail; i < pIn->numMipLevels; i++) + { + pOut->pMipInfo[i].inMiptail = TRUE; + pOut->pMipInfo[i].offset = 0; + pOut->pMipInfo[i].sliceSize = 0; + } + + if (pIn->firstMipIdInTail != pIn->numMipLevels) + { + pOut->pMipInfo[pIn->firstMipIdInTail].sliceSize = metaBlkSize; + } } } - } - else - { - const UINT_32 pitchInM = pOut->pitch / metaBlk.w; - const UINT_32 heightInM = pOut->height / metaBlk.h; + else + { + const UINT_32 pitchInM = pOut->pitch / metaBlk.w; + const UINT_32 heightInM = pOut->height / metaBlk.h; - pOut->metaBlkNumPerSlice = pitchInM * heightInM; - pOut->dccRamSliceSize = pOut->metaBlkNumPerSlice * metaBlkSize; - pOut->dccRamSize = pOut->dccRamSliceSize * (pOut->depth / metaBlk.d); + pOut->metaBlkNumPerSlice = pitchInM * heightInM; + pOut->dccRamSliceSize = pOut->metaBlkNumPerSlice * metaBlkSize; + pOut->dccRamSize = pOut->dccRamSliceSize * (pOut->depth / metaBlk.d); - if (pOut->pMipInfo != NULL) - { - pOut->pMipInfo[0].inMiptail = FALSE; - pOut->pMipInfo[0].offset = 0; - pOut->pMipInfo[0].sliceSize = pOut->dccRamSliceSize; + if (pOut->pMipInfo != NULL) + { + pOut->pMipInfo[0].inMiptail = FALSE; + pOut->pMipInfo[0].offset = 0; + pOut->pMipInfo[0].sliceSize = pOut->dccRamSliceSize; + } } } } @@ -510,7 +517,7 @@ ADDR_E_RETURNCODE Gfx10Lib::HwlComputeCmaskAddrFromCoord( // Only support pipe aligned CMask ADDR_ASSERT(pIn->cMaskFlags.pipeAligned == TRUE); - ADDR2_COMPUTE_CMASK_INFO_INPUT input = {}; + ADDR2_COMPUTE_CMASK_INFO_INPUT input = {0}; input.size = sizeof(input); input.cMaskFlags = pIn->cMaskFlags; input.unalignedWidth = Max(pIn->unalignedWidth, 1u); @@ -519,7 +526,7 @@ ADDR_E_RETURNCODE Gfx10Lib::HwlComputeCmaskAddrFromCoord( input.swizzleMode = pIn->swizzleMode; input.resourceType = pIn->resourceType; - ADDR2_COMPUTE_CMASK_INFO_OUTPUT output = {}; + ADDR2_COMPUTE_CMASK_INFO_OUTPUT output = {0}; output.size = sizeof(output); ADDR_E_RETURNCODE returnCode = ComputeCmaskInfo(&input, &output); @@ -530,12 +537,14 @@ ADDR_E_RETURNCODE Gfx10Lib::HwlComputeCmaskAddrFromCoord( const UINT_32 fmaskElemLog2 = Log2(fmaskBpp >> 3); const UINT_32 pipeMask = (1 << m_pipesLog2) - 1; const UINT_32 index = m_xmaskBaseIndex + fmaskElemLog2; - const UINT_8* patIdxTable = (pIn->swizzleMode == ADDR_SW_VAR_Z_X) ? CMASK_VAR_RBPLUS_PATIDX : - (m_settings.supportRbPlus ? CMASK_64K_RBPLUS_PATIDX : CMASK_64K_PATIDX); + const UINT_8* patIdxTable = + (pIn->swizzleMode == ADDR_SW_VAR_Z_X) ? GFX10_CMASK_VAR_RBPLUS_PATIDX : + (m_settings.supportRbPlus ? GFX10_CMASK_64K_RBPLUS_PATIDX : GFX10_CMASK_64K_PATIDX); + const UINT_32 blkSizeLog2 = Log2(output.metaBlkWidth) + Log2(output.metaBlkHeight) - 7; const UINT_32 blkMask = (1 << blkSizeLog2) - 1; - const UINT_32 blkOffset = ComputeOffsetFromSwizzlePattern(CMASK_SW_PATTERN[patIdxTable[index]], + const UINT_32 blkOffset = ComputeOffsetFromSwizzlePattern(GFX10_CMASK_SW_PATTERN[patIdxTable[index]], blkSizeLog2 + 1, // +1 for nibble offset pIn->x, pIn->y, @@ -599,11 +608,12 @@ ADDR_E_RETURNCODE Gfx10Lib::HwlComputeHtileAddrFromCoord( const UINT_32 numSampleLog2 = Log2(pIn->numSamples); const UINT_32 pipeMask = (1 << m_pipesLog2) - 1; const UINT_32 index = m_xmaskBaseIndex + numSampleLog2; - const UINT_8* patIdxTable = m_settings.supportRbPlus ? HTILE_RBPLUS_PATIDX : HTILE_PATIDX; + const UINT_8* patIdxTable = m_settings.supportRbPlus ? GFX10_HTILE_RBPLUS_PATIDX : GFX10_HTILE_PATIDX; + const UINT_32 blkSizeLog2 = Log2(output.metaBlkWidth) + Log2(output.metaBlkHeight) - 4; const UINT_32 blkMask = (1 << blkSizeLog2) - 1; - const UINT_32 blkOffset = ComputeOffsetFromSwizzlePattern(HTILE_SW_PATTERN[patIdxTable[index]], + const UINT_32 blkOffset = ComputeOffsetFromSwizzlePattern(GFX10_HTILE_SW_PATTERN[patIdxTable[index]], blkSizeLog2 + 1, // +1 for nibble offset pIn->x, pIn->y, @@ -680,7 +690,7 @@ ADDR_E_RETURNCODE Gfx10Lib::HwlComputeDccAddrFromCoord( if (m_settings.supportRbPlus) { - patIdxTable = DCC_64K_R_X_RBPLUS_PATIDX; + patIdxTable = GFX10_DCC_64K_R_X_RBPLUS_PATIDX; if (pIn->dccKeyFlags.pipeAligned) { @@ -704,7 +714,7 @@ ADDR_E_RETURNCODE Gfx10Lib::HwlComputeDccAddrFromCoord( } else { - patIdxTable = DCC_64K_R_X_PATIDX; + patIdxTable = GFX10_DCC_64K_R_X_PATIDX; if (pIn->dccKeyFlags.pipeAligned) { @@ -718,12 +728,13 @@ ADDR_E_RETURNCODE Gfx10Lib::HwlComputeDccAddrFromCoord( const UINT_32 blkSizeLog2 = Log2(pIn->metaBlkWidth) + Log2(pIn->metaBlkHeight) + elemLog2 - 8; const UINT_32 blkMask = (1 << blkSizeLog2) - 1; - const UINT_32 blkOffset = ComputeOffsetFromSwizzlePattern(DCC_64K_R_X_SW_PATTERN[patIdxTable[index]], - blkSizeLog2 + 1, // +1 for nibble offset - pIn->x, - pIn->y, - pIn->slice, - 0); + const UINT_32 blkOffset = + ComputeOffsetFromSwizzlePattern(GFX10_DCC_64K_R_X_SW_PATTERN[patIdxTable[index]], + blkSizeLog2 + 1, // +1 for nibble offset + pIn->x, + pIn->y, + pIn->slice, + 0); const UINT_32 xb = pIn->x / pIn->metaBlkWidth; const UINT_32 yb = pIn->y / pIn->metaBlkHeight; const UINT_32 pb = pIn->pitch / pIn->metaBlkWidth; @@ -862,8 +873,8 @@ BOOL_32 Gfx10Lib::HwlInitGlobalParams( ADDR_ASSERT((m_numPkrLog2 <= m_pipesLog2) && ((m_pipesLog2 - m_numPkrLog2) <= 2)); - ADDR_C_ASSERT(sizeof(HTILE_RBPLUS_PATIDX) / sizeof(HTILE_RBPLUS_PATIDX[0]) == - sizeof(CMASK_64K_RBPLUS_PATIDX) / sizeof(CMASK_64K_RBPLUS_PATIDX[0])); + ADDR_C_ASSERT(sizeof(GFX10_HTILE_RBPLUS_PATIDX) / sizeof(GFX10_HTILE_RBPLUS_PATIDX[0]) == + sizeof(GFX10_CMASK_64K_RBPLUS_PATIDX) / sizeof(GFX10_CMASK_64K_RBPLUS_PATIDX[0])); if (m_numPkrLog2 >= 2) { @@ -877,10 +888,10 @@ BOOL_32 Gfx10Lib::HwlInitGlobalParams( static_cast(ADDR_CONFIG_1_PIPE) + 1; - ADDR_C_ASSERT(sizeof(HTILE_PATIDX) / sizeof(HTILE_PATIDX[0]) == (numPipeType + 1) * MaxNumOfAA); + ADDR_C_ASSERT(sizeof(GFX10_HTILE_PATIDX) / sizeof(GFX10_HTILE_PATIDX[0]) == (numPipeType + 1) * MaxNumOfAA); - ADDR_C_ASSERT(sizeof(HTILE_PATIDX) / sizeof(HTILE_PATIDX[0]) == - sizeof(CMASK_64K_PATIDX) / sizeof(CMASK_64K_PATIDX[0])); + ADDR_C_ASSERT(sizeof(GFX10_HTILE_PATIDX) / sizeof(GFX10_HTILE_PATIDX[0]) == + sizeof(GFX10_CMASK_64K_PATIDX) / sizeof(GFX10_CMASK_64K_PATIDX[0])); } } @@ -891,6 +902,7 @@ BOOL_32 Gfx10Lib::HwlInitGlobalParams( m_blockVarSizeLog2 = m_pipesLog2 + 14; } + if (valid) { InitEquationTable(); @@ -915,12 +927,27 @@ ChipFamily Gfx10Lib::HwlConvertChipFamily( { ChipFamily family = ADDR_CHIP_FAMILY_NAVI; - m_settings.dccUnsup3DSwDis = 1; + m_settings.dccUnsup3DSwDis = 1; + m_settings.dsMipmapHtileFix = 1; switch (chipFamily) { case FAMILY_NV: - m_settings.isDcn2 = 1; + if (ASICREV_IS_NAVI10_P(chipRevision)) + { + m_settings.dsMipmapHtileFix = 0; + m_settings.isDcn20 = 1; + } + + if (ASICREV_IS_NAVI12_P(chipRevision)) + { + m_settings.isDcn20 = 1; + } + + if (ASICREV_IS_NAVI14_M(chipRevision)) + { + m_settings.isDcn20 = 1; + } if (ASICREV_IS_SIENNA_CICHLID(chipRevision)) { @@ -942,13 +969,15 @@ ChipFamily Gfx10Lib::HwlConvertChipFamily( break; case FAMILY_VGH: - m_settings.isDcn2 = 1; - if (ASICREV_IS_VANGOGH(chipRevision)) { m_settings.supportRbPlus = 1; m_settings.dccUnsup3DSwDis = 0; } + else + { + ADDR_ASSERT(!"Unknown chip revision"); + } break; default: @@ -956,13 +985,6 @@ ChipFamily Gfx10Lib::HwlConvertChipFamily( break; } - m_settings.dsMipmapHtileFix = 1; - - if (ASICREV_IS_NAVI10_P(chipRevision)) - { - m_settings.dsMipmapHtileFix = 0; - } - m_configFlags.use32bppFor422Fmt = TRUE; return family; @@ -1173,123 +1195,126 @@ UINT_32 Gfx10Lib::GetMetaBlkSize( Dim3d* pBlock ///< [out] block size ) const { - INT_32 metablkSizeLog2; - const INT_32 metaElemSizeLog2 = GetMetaElementSizeLog2(dataType); - const INT_32 metaCacheSizeLog2 = GetMetaCacheSizeLog2(dataType); - const INT_32 compBlkSizeLog2 = (dataType == Gfx10DataColor) ? 8 : 6 + numSamplesLog2 + elemLog2; - const INT_32 metaBlkSamplesLog2 = (dataType == Gfx10DataDepthStencil) ? - numSamplesLog2 : Min(numSamplesLog2, m_maxCompFragLog2); - const INT_32 dataBlkSizeLog2 = GetBlockSizeLog2(swizzleMode); - INT_32 numPipesLog2 = m_pipesLog2; + INT_32 metablkSizeLog2; - if (IsThin(resourceType, swizzleMode)) { - if ((pipeAlign == FALSE) || - (IsStandardSwizzle(resourceType, swizzleMode) == TRUE) || - (IsDisplaySwizzle(resourceType, swizzleMode) == TRUE)) + const INT_32 metaElemSizeLog2 = GetMetaElementSizeLog2(dataType); + const INT_32 metaCacheSizeLog2 = GetMetaCacheSizeLog2(dataType); + const INT_32 compBlkSizeLog2 = (dataType == Gfx10DataColor) ? 8 : 6 + numSamplesLog2 + elemLog2; + const INT_32 metaBlkSamplesLog2 = (dataType == Gfx10DataDepthStencil) ? + numSamplesLog2 : Min(numSamplesLog2, m_maxCompFragLog2); + const INT_32 dataBlkSizeLog2 = GetBlockSizeLog2(swizzleMode); + INT_32 numPipesLog2 = m_pipesLog2; + + if (IsThin(resourceType, swizzleMode)) { - if (pipeAlign) + if ((pipeAlign == FALSE) || + (IsStandardSwizzle(resourceType, swizzleMode) == TRUE) || + (IsDisplaySwizzle(resourceType, swizzleMode) == TRUE)) { - metablkSizeLog2 = Max(static_cast(m_pipeInterleaveLog2) + numPipesLog2, 12); - metablkSizeLog2 = Min(metablkSizeLog2, dataBlkSizeLog2); + if (pipeAlign) + { + metablkSizeLog2 = Max(static_cast(m_pipeInterleaveLog2) + numPipesLog2, 12); + metablkSizeLog2 = Min(metablkSizeLog2, dataBlkSizeLog2); + } + else + { + metablkSizeLog2 = Min(dataBlkSizeLog2, 12); + } } else { - metablkSizeLog2 = Min(dataBlkSizeLog2, 12); - } - } - else - { - if (m_settings.supportRbPlus && (m_pipesLog2 == m_numSaLog2 + 1) && (m_pipesLog2 > 1)) - { - numPipesLog2++; - } - - INT_32 pipeRotateLog2 = GetPipeRotateAmount(resourceType, swizzleMode); + if (m_settings.supportRbPlus && (m_pipesLog2 == m_numSaLog2 + 1) && (m_pipesLog2 > 1)) + { + numPipesLog2++; + } - if (numPipesLog2 >= 4) - { - INT_32 overlapLog2 = GetMetaOverlapLog2(dataType, resourceType, swizzleMode, elemLog2, numSamplesLog2); + INT_32 pipeRotateLog2 = GetPipeRotateAmount(resourceType, swizzleMode); - // In 16Bpe 8xaa, we have an extra overlap bit - if ((pipeRotateLog2 > 0) && - (elemLog2 == 4) && - (numSamplesLog2 == 3) && - (IsZOrderSwizzle(swizzleMode) || (GetEffectiveNumPipes() > 3))) + if (numPipesLog2 >= 4) { - overlapLog2++; - } + INT_32 overlapLog2 = GetMetaOverlapLog2(dataType, resourceType, swizzleMode, elemLog2, numSamplesLog2); - metablkSizeLog2 = metaCacheSizeLog2 + overlapLog2 + numPipesLog2; - metablkSizeLog2 = Max(metablkSizeLog2, static_cast(m_pipeInterleaveLog2) + numPipesLog2); + // In 16Bpe 8xaa, we have an extra overlap bit + if ((pipeRotateLog2 > 0) && + (elemLog2 == 4) && + (numSamplesLog2 == 3) && + (IsZOrderSwizzle(swizzleMode) || (GetEffectiveNumPipes() > 3))) + { + overlapLog2++; + } - if (m_settings.supportRbPlus && - IsRtOptSwizzle(swizzleMode) && - (numPipesLog2 == 6) && - (numSamplesLog2 == 3) && - (m_maxCompFragLog2 == 3) && - (metablkSizeLog2 < 15)) + metablkSizeLog2 = metaCacheSizeLog2 + overlapLog2 + numPipesLog2; + metablkSizeLog2 = Max(metablkSizeLog2, static_cast(m_pipeInterleaveLog2) + numPipesLog2); + + if (m_settings.supportRbPlus && + IsRtOptSwizzle(swizzleMode) && + (numPipesLog2 == 6) && + (numSamplesLog2 == 3) && + (m_maxCompFragLog2 == 3) && + (metablkSizeLog2 < 15)) + { + metablkSizeLog2 = 15; + } + } + else { - metablkSizeLog2 = 15; + metablkSizeLog2 = Max(static_cast(m_pipeInterleaveLog2) + numPipesLog2, 12); } - } - else - { - metablkSizeLog2 = Max(static_cast(m_pipeInterleaveLog2) + numPipesLog2, 12); - } - if (dataType == Gfx10DataDepthStencil) - { - // For htile surfaces, pad meta block size to 2K * num_pipes - metablkSizeLog2 = Max(metablkSizeLog2, 11 + numPipesLog2); - } + if (dataType == Gfx10DataDepthStencil) + { + // For htile surfaces, pad meta block size to 2K * num_pipes + metablkSizeLog2 = Max(metablkSizeLog2, 11 + numPipesLog2); + } - const INT_32 compFragLog2 = Min(m_maxCompFragLog2, numSamplesLog2); + const INT_32 compFragLog2 = Min(m_maxCompFragLog2, numSamplesLog2); - if (IsRtOptSwizzle(swizzleMode) && (compFragLog2 > 1) && (pipeRotateLog2 >= 1)) - { - const INT_32 tmp = 8 + m_pipesLog2 + Max(pipeRotateLog2, compFragLog2 - 1); + if (IsRtOptSwizzle(swizzleMode) && (compFragLog2 > 1) && (pipeRotateLog2 >= 1)) + { + const INT_32 tmp = 8 + m_pipesLog2 + Max(pipeRotateLog2, compFragLog2 - 1); - metablkSizeLog2 = Max(metablkSizeLog2, tmp); + metablkSizeLog2 = Max(metablkSizeLog2, tmp); + } } + + const INT_32 metablkBitsLog2 = + metablkSizeLog2 + compBlkSizeLog2 - elemLog2 - metaBlkSamplesLog2 - metaElemSizeLog2; + pBlock->w = 1 << ((metablkBitsLog2 >> 1) + (metablkBitsLog2 & 1)); + pBlock->h = 1 << (metablkBitsLog2 >> 1); + pBlock->d = 1; } + else + { + ADDR_ASSERT(IsThick(resourceType, swizzleMode)); - const INT_32 metablkBitsLog2 = - metablkSizeLog2 + compBlkSizeLog2 - elemLog2 - metaBlkSamplesLog2 - metaElemSizeLog2; - pBlock->w = 1 << ((metablkBitsLog2 >> 1) + (metablkBitsLog2 & 1)); - pBlock->h = 1 << (metablkBitsLog2 >> 1); - pBlock->d = 1; - } - else - { - ADDR_ASSERT(IsThick(resourceType, swizzleMode)); + if (pipeAlign) + { + if (m_settings.supportRbPlus && + (m_pipesLog2 == m_numSaLog2 + 1) && + (m_pipesLog2 > 1) && + IsRbAligned(resourceType, swizzleMode)) + { + numPipesLog2++; + } - if (pipeAlign) - { - if (m_settings.supportRbPlus && - (m_pipesLog2 == m_numSaLog2 + 1) && - (m_pipesLog2 > 1) && - IsRbAligned(resourceType, swizzleMode)) + const INT_32 overlapLog2 = Get3DMetaOverlapLog2(resourceType, swizzleMode, elemLog2); + + metablkSizeLog2 = metaCacheSizeLog2 + overlapLog2 + numPipesLog2; + metablkSizeLog2 = Max(metablkSizeLog2, static_cast(m_pipeInterleaveLog2) + numPipesLog2); + metablkSizeLog2 = Max(metablkSizeLog2, 12); + } + else { - numPipesLog2++; + metablkSizeLog2 = 12; } - const INT_32 overlapLog2 = Get3DMetaOverlapLog2(resourceType, swizzleMode, elemLog2); - - metablkSizeLog2 = metaCacheSizeLog2 + overlapLog2 + numPipesLog2; - metablkSizeLog2 = Max(metablkSizeLog2, static_cast(m_pipeInterleaveLog2) + numPipesLog2); - metablkSizeLog2 = Max(metablkSizeLog2, 12); + const INT_32 metablkBitsLog2 = + metablkSizeLog2 + compBlkSizeLog2 - elemLog2 - metaBlkSamplesLog2 - metaElemSizeLog2; + pBlock->w = 1 << ((metablkBitsLog2 / 3) + (((metablkBitsLog2 % 3) > 0) ? 1 : 0)); + pBlock->h = 1 << ((metablkBitsLog2 / 3) + (((metablkBitsLog2 % 3) > 1) ? 1 : 0)); + pBlock->d = 1 << (metablkBitsLog2 / 3); } - else - { - metablkSizeLog2 = 12; - } - - const INT_32 metablkBitsLog2 = - metablkSizeLog2 + compBlkSizeLog2 - elemLog2 - metaBlkSamplesLog2 - metaElemSizeLog2; - pBlock->w = 1 << ((metablkBitsLog2 / 3) + (((metablkBitsLog2 % 3) > 0) ? 1 : 0)); - pBlock->h = 1 << ((metablkBitsLog2 / 3) + (((metablkBitsLog2 % 3) > 1) ? 1 : 0)); - pBlock->d = 1 << (metablkBitsLog2 / 3); } return (1 << static_cast(metablkSizeLog2)); @@ -1867,7 +1892,7 @@ VOID Gfx10Lib::InitEquationTable() if (pPatInfo->maxItemCount <= 3) { - ADDR_EQUATION equation = {}; + ADDR_EQUATION equation = {0}; ConvertSwizzlePatternToEquation(elemLog2, rsrcType, swMode, pPatInfo, &equation); @@ -1935,6 +1960,38 @@ UINT_32 Gfx10Lib::HwlGetEquationIndex( /** ************************************************************************************************************************ +* Gfx10Lib::GetValidDisplaySwizzleModes +* +* @brief +* Get valid swizzle modes mask for displayable surface +* +* @return +* Valid swizzle modes mask for displayable surface +************************************************************************************************************************ +*/ +UINT_32 Gfx10Lib::GetValidDisplaySwizzleModes( + UINT_32 bpp + ) const +{ + UINT_32 swModeMask = 0; + + if (bpp <= 64) + { + if (m_settings.isDcn20) + { + swModeMask = (bpp == 64) ? Dcn20Bpp64SwModeMask : Dcn20NonBpp64SwModeMask; + } + else + { + swModeMask = (bpp == 64) ? Dcn21Bpp64SwModeMask : Dcn21NonBpp64SwModeMask; + } + } + + return swModeMask; +} + +/** +************************************************************************************************************************ * Gfx10Lib::IsValidDisplaySwizzleMode * * @brief @@ -1950,40 +2007,7 @@ BOOL_32 Gfx10Lib::IsValidDisplaySwizzleMode( { ADDR_ASSERT(pIn->resourceType == ADDR_RSRC_TEX_2D); - BOOL_32 support = FALSE; - - if (m_settings.isDcn2) - { - switch (pIn->swizzleMode) - { - case ADDR_SW_4KB_D: - case ADDR_SW_4KB_D_X: - case ADDR_SW_64KB_D: - case ADDR_SW_64KB_D_T: - case ADDR_SW_64KB_D_X: - support = (pIn->bpp == 64); - break; - - case ADDR_SW_LINEAR: - case ADDR_SW_4KB_S: - case ADDR_SW_4KB_S_X: - case ADDR_SW_64KB_S: - case ADDR_SW_64KB_S_T: - case ADDR_SW_64KB_S_X: - case ADDR_SW_64KB_R_X: - support = (pIn->bpp <= 64); - break; - - default: - break; - } - } - else - { - ADDR_NOT_IMPLEMENTED(); - } - - return support; + return (GetValidDisplaySwizzleModes(pIn->bpp) & (1 << pIn->swizzleMode)) ? TRUE : FALSE; } /** @@ -2030,42 +2054,32 @@ ADDR_E_RETURNCODE Gfx10Lib::HwlComputePipeBankXor( { if (IsNonPrtXor(pIn->swizzleMode)) { - const UINT_32 blockBits = GetBlockSizeLog2(pIn->swizzleMode); - const UINT_32 pipeBits = GetPipeXorBits(blockBits); - const UINT_32 bankBits = GetBankXorBits(blockBits); + const UINT_32 bankBits = GetBankXorBits(GetBlockSizeLog2(pIn->swizzleMode)); - UINT_32 pipeXor = 0; - UINT_32 bankXor = 0; + // No pipe xor... + const UINT_32 pipeXor = 0; + UINT_32 bankXor = 0; - if (bankBits != 0) - { - if (blockBits == 16) - { - const UINT_32 XorPatternLen = 8; - static const UINT_32 XorBank1b[XorPatternLen] = {0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80}; - static const UINT_32 XorBank2b[XorPatternLen] = {0x00, 0x80, 0x40, 0xC0, 0x80, 0x00, 0xC0, 0x40}; - static const UINT_32 XorBank3b[XorPatternLen] = {0x00, 0x80, 0x40, 0xC0, 0x20, 0xA0, 0x60, 0xE0}; - - const UINT_32 index = pIn->surfIndex % XorPatternLen; - - if (bankBits == 1) - { - bankXor = XorBank1b[index]; - } - else if (bankBits == 2) - { - bankXor = XorBank2b[index]; - } - else - { - bankXor = XorBank3b[index]; + const UINT_32 XorPatternLen = 8; + static const UINT_32 XorBankRot1b[XorPatternLen] = {0, 1, 0, 1, 0, 1, 0, 1}; + static const UINT_32 XorBankRot2b[XorPatternLen] = {0, 2, 1, 3, 2, 0, 3, 1}; + static const UINT_32 XorBankRot3b[XorPatternLen] = {0, 4, 2, 6, 1, 5, 3, 7}; + static const UINT_32 XorBankRot4b[XorPatternLen] = {0, 8, 4, 12, 2, 10, 6, 14}; + static const UINT_32* XorBankRotPat[] = {XorBankRot1b, XorBankRot2b, XorBankRot3b, XorBankRot4b}; - if (bankBits == 4) - { - bankXor >>= (2 - pipeBits); - } - } - } + switch (bankBits) + { + case 1: + case 2: + case 3: + case 4: + bankXor = XorBankRotPat[bankBits - 1][pIn->surfIndex % XorPatternLen] << (m_pipesLog2 + ColumnBits); + break; + default: + // valid bank bits should be 0~4 + ADDR_ASSERT_ALWAYS(); + case 0: + break; } pOut->pipeBankXor = bankXor | pipeXor; @@ -2101,6 +2115,38 @@ ADDR_E_RETURNCODE Gfx10Lib::HwlComputeSlicePipeBankXor( const UINT_32 pipeXor = ReverseBitVector(pIn->slice, pipeBits); pOut->pipeBankXor = pIn->basePipeBankXor ^ pipeXor; + + if (pIn->bpe != 0) + { + const ADDR_SW_PATINFO* pPatInfo = GetSwizzlePatternInfo(pIn->swizzleMode, + pIn->resourceType, + Log2(pIn->bpe >> 3), + 1); + + if (pPatInfo != NULL) + { + ADDR_BIT_SETTING fullSwizzlePattern[20]; + GetSwizzlePatternFromPatternInfo(pPatInfo, fullSwizzlePattern); + + const UINT_32 pipeBankXorOffset = + ComputeOffsetFromSwizzlePattern(reinterpret_cast(fullSwizzlePattern), + blockBits, + 0, + 0, + pIn->slice, + 0); + + const UINT_32 pipeBankXor = pipeBankXorOffset >> m_pipeInterleaveLog2; + + // Should have no bit set under pipe interleave + ADDR_ASSERT((pipeBankXor << m_pipeInterleaveLog2) == pipeBankXorOffset); + + // This assertion firing means old approach doesn't calculate a correct sliceXor value... + ADDR_ASSERT(pipeBankXor == pipeXor); + + pOut->pipeBankXor = pIn->basePipeBankXor ^ pipeBankXor; + } + } } else { @@ -2171,6 +2217,7 @@ BOOL_32 Gfx10Lib::ValidateNonSwModeParams( const BOOL_32 tex1d = IsTex1d(rsrcType); const BOOL_32 stereo = flags.qbStereo; + // Resource type check if (tex1d) { @@ -2221,11 +2268,18 @@ BOOL_32 Gfx10Lib::ValidateSwModeParams( { BOOL_32 valid = TRUE; - if ((pIn->swizzleMode >= ADDR_SW_MAX_TYPE) || (IsValidSwMode(pIn->swizzleMode) == FALSE)) + if (pIn->swizzleMode >= ADDR_SW_MAX_TYPE) { ADDR_ASSERT_ALWAYS(); valid = FALSE; } + else if (IsValidSwMode(pIn->swizzleMode) == FALSE) + { + { + ADDR_ASSERT_ALWAYS(); + valid = FALSE; + } + } const ADDR2_SURFACE_FLAGS flags = pIn->flags; const AddrResourceType rsrcType = pIn->resourceType; @@ -2279,13 +2333,20 @@ BOOL_32 Gfx10Lib::ValidateSwModeParams( } else if (tex2d) { - if (((swizzleMask & Gfx10Rsrc2dSwModeMask) == 0) || - (prt && ((swizzleMask & Gfx10Rsrc2dPrtSwModeMask) == 0)) || - (fmask && ((swizzleMask & Gfx10ZSwModeMask) == 0))) + if ((swizzleMask & Gfx10Rsrc2dSwModeMask) == 0) + { + { + ADDR_ASSERT_ALWAYS(); + valid = FALSE; + } + } + else if ((prt && ((swizzleMask & Gfx10Rsrc2dPrtSwModeMask) == 0)) || + (fmask && ((swizzleMask & Gfx10ZSwModeMask) == 0))) { ADDR_ASSERT_ALWAYS(); valid = FALSE; } + } else if (tex3d) { @@ -2344,8 +2405,10 @@ BOOL_32 Gfx10Lib::ValidateSwModeParams( } else { - ADDR_ASSERT_ALWAYS(); - valid = FALSE; + { + ADDR_ASSERT_ALWAYS(); + valid = FALSE; + } } // Block type check @@ -2418,16 +2481,16 @@ ADDR_E_RETURNCODE Gfx10Lib::HwlGetPreferredSurfaceSetting( } else { - pOut->resourceType = ADDR_RSRC_TEX_2D; - pOut->validBlockSet.value = 0; - pOut->validBlockSet.macroThin64KB = forbid64KbBlockType ? 0 : 1; - pOut->validBlockSet.var = forbidVarBlockType ? 0 : 1; - pOut->validSwModeSet.value = 0; - pOut->validSwModeSet.sw64KB_Z_X = forbid64KbBlockType ? 0 : 1; - pOut->validSwModeSet.swVar_Z_X = forbidVarBlockType ? 0 : 1; - pOut->canXor = TRUE; - pOut->validSwTypeSet.value = AddrSwSetZ; - pOut->clientPreferredSwSet = pOut->validSwTypeSet; + pOut->resourceType = ADDR_RSRC_TEX_2D; + pOut->validBlockSet.value = 0; + pOut->validBlockSet.macroThin64KB = forbid64KbBlockType ? 0 : 1; + pOut->validBlockSet.var = forbidVarBlockType ? 0 : 1; + pOut->validSwModeSet.value = 0; + pOut->validSwModeSet.sw64KB_Z_X = forbid64KbBlockType ? 0 : 1; + pOut->validSwModeSet.gfx10.swVar_Z_X = forbidVarBlockType ? 0 : 1; + pOut->canXor = TRUE; + pOut->validSwTypeSet.value = AddrSwSetZ; + pOut->clientPreferredSwSet = pOut->validSwTypeSet; BOOL_32 use64KbBlockType = (forbid64KbBlockType == FALSE); @@ -2443,8 +2506,8 @@ ADDR_E_RETURNCODE Gfx10Lib::HwlGetPreferredSurfaceSetting( const UINT_64 sizeAlignInElement = Max(NextPow2(pIn->minSizeAlign) / (fmaskBpp >> 3), 1u); AddrSwizzleMode swMode[maxFmaskSwizzleModeType] = {ADDR_SW_64KB_Z_X, ADDR_SW_VAR_Z_X}; - Dim3d blkDim[maxFmaskSwizzleModeType] = {{0}, {0}}; - Dim3d padDim[maxFmaskSwizzleModeType] = {{0}, {0}}; + Dim3d blkDim[maxFmaskSwizzleModeType] = {{}, {}}; + Dim3d padDim[maxFmaskSwizzleModeType] = {{}, {}}; UINT_64 padSize[maxFmaskSwizzleModeType] = {0}; for (UINT_8 i = 0; i < maxFmaskSwizzleModeType; i++) @@ -2526,7 +2589,7 @@ ADDR_E_RETURNCODE Gfx10Lib::HwlGetPreferredSurfaceSetting( const BOOL_32 msaa = (numFrags > 1) || (numSamples > 1); // Pre sanity check on non swizzle mode parameters - ADDR2_COMPUTE_SURFACE_INFO_INPUT localIn = {}; + ADDR2_COMPUTE_SURFACE_INFO_INPUT localIn = {0}; localIn.flags = pIn->flags; localIn.resourceType = pIn->resourceType; localIn.format = pIn->format; @@ -2541,7 +2604,7 @@ ADDR_E_RETURNCODE Gfx10Lib::HwlGetPreferredSurfaceSetting( if (ValidateNonSwModeParams(&localIn)) { // Forbid swizzle mode(s) by client setting - ADDR2_SWMODE_SET allowedSwModeSet = {}; + ADDR2_SWMODE_SET allowedSwModeSet = {0}; allowedSwModeSet.value |= pIn->forbiddenBlock.linear ? 0 : Gfx10LinearSwModeMask; allowedSwModeSet.value |= pIn->forbiddenBlock.micro ? 0 : Gfx10Blk256BSwModeMask; allowedSwModeSet.value |= @@ -2604,6 +2667,7 @@ ADDR_E_RETURNCODE Gfx10Lib::HwlGetPreferredSurfaceSetting( case ADDR_RSRC_TEX_2D: allowedSwModeSet.value &= pIn->flags.prt ? Gfx10Rsrc2dPrtSwModeMask : Gfx10Rsrc2dSwModeMask; + break; case ADDR_RSRC_TEX_3D: @@ -2646,14 +2710,7 @@ ADDR_E_RETURNCODE Gfx10Lib::HwlGetPreferredSurfaceSetting( if (pIn->flags.display) { - if (m_settings.isDcn2) - { - allowedSwModeSet.value &= (bpp == 64) ? Dcn2Bpp64SwModeMask : Dcn2NonBpp64SwModeMask; - } - else - { - ADDR_NOT_IMPLEMENTED(); - } + allowedSwModeSet.value &= GetValidDisplaySwizzleModes(bpp); } if (allowedSwModeSet.value != 0) @@ -2720,14 +2777,14 @@ ADDR_E_RETURNCODE Gfx10Lib::HwlGetPreferredSurfaceSetting( ADDR2_BLOCK_SET allowedBlockSet = GetAllowedBlockSet(allowedSwModeSet, pOut->resourceType); - // Determine block size if there is 2 or more block type candidates + // Determine block size if there are 2 or more block type candidates if (IsPow2(allowedBlockSet.value) == FALSE) { - AddrSwizzleMode swMode[AddrBlockMaxTiledType] = { ADDR_SW_LINEAR }; + AddrSwizzleMode swMode[AddrBlockMaxTiledType] = {ADDR_SW_LINEAR}; if (m_blockVarSizeLog2 != 0) { - swMode[AddrBlockVar] = ADDR_SW_VAR_R_X; + swMode[AddrBlockThinVar] = ADDR_SW_VAR_R_X; } if (pOut->resourceType == ADDR_RSRC_TEX_3D) @@ -2743,8 +2800,8 @@ ADDR_E_RETURNCODE Gfx10Lib::HwlGetPreferredSurfaceSetting( swMode[AddrBlockThin64KB] = ADDR_SW_64KB_S; } - Dim3d blkDim[AddrBlockMaxTiledType] = {{0}, {0}, {0}, {0}, {0}, {0}}; - Dim3d padDim[AddrBlockMaxTiledType] = {{0}, {0}, {0}, {0}, {0}, {0}}; + Dim3d blkDim[AddrBlockMaxTiledType] = {0}; + Dim3d padDim[AddrBlockMaxTiledType] = {0}; UINT_64 padSize[AddrBlockMaxTiledType] = {0}; const UINT_32 ratioLow = pIn->flags.minimizeAlign ? 1 : (pIn->flags.opt4space ? 3 : 2); @@ -2834,7 +2891,7 @@ ADDR_E_RETURNCODE Gfx10Lib::HwlGetPreferredSurfaceSetting( } else { - ADDR_ASSERT(minSizeBlk == AddrBlockVar); + ADDR_ASSERT(minSizeBlk == AddrBlockThinVar); allowedSwModeSet.value &= Gfx10BlkVarSwModeMask; } } @@ -2844,7 +2901,7 @@ ADDR_E_RETURNCODE Gfx10Lib::HwlGetPreferredSurfaceSetting( ADDR2_SWTYPE_SET allowedSwSet = GetAllowedSwSet(allowedSwModeSet); - // Determine swizzle type if there is 2 or more swizzle type candidates + // Determine swizzle type if there are 2 or more swizzle type candidates if (IsPow2(allowedSwSet.value) == FALSE) { if (ElemLib::IsBlockCompressed(pIn->format)) @@ -3045,6 +3102,16 @@ ADDR_E_RETURNCODE Gfx10Lib::HwlComputeSurfaceInfoTiled( { ADDR_E_RETURNCODE ret; + // Mip chain dimesion and epitch has no meaning in GFX10, set to default value + pOut->mipChainPitch = 0; + pOut->mipChainHeight = 0; + pOut->mipChainSlice = 0; + pOut->epitchIsHeight = FALSE; + + // Following information will be provided in ComputeSurfaceInfoMacroTiled() if necessary + pOut->mipChainInTail = FALSE; + pOut->firstMipIdInTail = pIn->numMipLevels; + if (IsBlock256b(pIn->swizzleMode)) { ret = ComputeSurfaceInfoMicroTiled(pIn, pOut); @@ -3057,6 +3124,7 @@ ADDR_E_RETURNCODE Gfx10Lib::HwlComputeSurfaceInfoTiled( return ret; } + /** ************************************************************************************************************************ * Gfx10Lib::ComputeSurfaceInfoMicroTiled @@ -3083,13 +3151,6 @@ ADDR_E_RETURNCODE Gfx10Lib::ComputeSurfaceInfoMicroTiled( if (ret == ADDR_OK) { - pOut->mipChainPitch = 0; - pOut->mipChainHeight = 0; - pOut->mipChainSlice = 0; - pOut->epitchIsHeight = FALSE; - pOut->mipChainInTail = FALSE; - pOut->firstMipIdInTail = pIn->numMipLevels; - const UINT_32 blockSize = GetBlockSize(pIn->swizzleMode); pOut->pitch = PowTwoAlign(pIn->width, pOut->blockWidth); @@ -3194,14 +3255,6 @@ ADDR_E_RETURNCODE Gfx10Lib::ComputeSurfaceInfoMacroTiled( if (returnCode == ADDR_OK) { - // Mip chain dimesion and epitch has no meaning in GFX10, set to default value - pOut->mipChainPitch = 0; - pOut->mipChainHeight = 0; - pOut->mipChainSlice = 0; - pOut->epitchIsHeight = FALSE; - pOut->mipChainInTail = FALSE; - pOut->firstMipIdInTail = pIn->numMipLevels; - const UINT_32 blockSizeLog2 = GetBlockSizeLog2(pIn->swizzleMode); const UINT_32 blockSize = 1 << blockSizeLog2; @@ -3263,7 +3316,7 @@ ADDR_E_RETURNCODE Gfx10Lib::ComputeSurfaceInfoMacroTiled( { pOut->pMipInfo[i].pitch = pitch; pOut->pMipInfo[i].height = height; - pOut->pMipInfo[i].depth = depth; + pOut->pMipInfo[i].depth = IsTex3d(pIn->resourceType) ? pOut->numSlices : 1; } } } @@ -3317,7 +3370,7 @@ ADDR_E_RETURNCODE Gfx10Lib::ComputeSurfaceInfoMacroTiled( pOut->pMipInfo[i].pitch = pitch; pOut->pMipInfo[i].height = height; - pOut->pMipInfo[i].depth = depth; + pOut->pMipInfo[i].depth = IsTex3d(pIn->resourceType) ? pOut->numSlices : 1; UINT_32 mipX = ((mipOffset >> 9) & 1) | ((mipOffset >> 10) & 2) | @@ -3353,7 +3406,6 @@ ADDR_E_RETURNCODE Gfx10Lib::ComputeSurfaceInfoMacroTiled( pitch = Max(pitch >> 1, Block256_2d[index].w); height = Max(height >> 1, Block256_2d[index].h); - depth = 1; } else { @@ -3363,7 +3415,6 @@ ADDR_E_RETURNCODE Gfx10Lib::ComputeSurfaceInfoMacroTiled( pitch = Max(pitch >> 1, Block256_3d[index].w); height = Max(height >> 1, Block256_3d[index].h); - depth = PowTwoAlign(Max(depth >> 1, 1u), Block256_3d[index].d); } } } @@ -3377,7 +3428,7 @@ ADDR_E_RETURNCODE Gfx10Lib::ComputeSurfaceInfoMacroTiled( { pOut->pMipInfo[0].pitch = pOut->pitch; pOut->pMipInfo[0].height = pOut->height; - pOut->pMipInfo[0].depth = IsTex3d(pIn->resourceType)? pOut->numSlices : 1; + pOut->pMipInfo[0].depth = IsTex3d(pIn->resourceType) ? pOut->numSlices : 1; pOut->pMipInfo[0].offset = 0; pOut->pMipInfo[0].mipTailOffset = 0; pOut->pMipInfo[0].macroBlockOffset = 0; @@ -3626,57 +3677,57 @@ const ADDR_SW_PATINFO* Gfx10Lib::GetSwizzlePatternInfo( const ADDR_SW_PATINFO* patInfo = NULL; const UINT_32 swizzleMask = 1 << swizzleMode; - if (IsLinear(swizzleMode) == FALSE) + if (IsBlockVariable(swizzleMode)) { - if (IsBlockVariable(swizzleMode)) + if (m_blockVarSizeLog2 != 0) { - if (m_blockVarSizeLog2 != 0) - { - ADDR_ASSERT(m_settings.supportRbPlus); + ADDR_ASSERT(m_settings.supportRbPlus); - if (IsRtOptSwizzle(swizzleMode)) + if (IsRtOptSwizzle(swizzleMode)) + { + if (numFrag == 1) { - if (numFrag == 1) - { - patInfo = SW_VAR_R_X_1xaa_RBPLUS_PATINFO; - } - else if (numFrag == 2) - { - patInfo = SW_VAR_R_X_2xaa_RBPLUS_PATINFO; - } - else if (numFrag == 4) - { - patInfo = SW_VAR_R_X_4xaa_RBPLUS_PATINFO; - } - else - { - ADDR_ASSERT(numFrag == 8); - patInfo = SW_VAR_R_X_8xaa_RBPLUS_PATINFO; - } + patInfo = GFX10_SW_VAR_R_X_1xaa_RBPLUS_PATINFO; } - else if (IsZOrderSwizzle(swizzleMode)) + else if (numFrag == 2) { - if (numFrag == 1) - { - patInfo = SW_VAR_Z_X_1xaa_RBPLUS_PATINFO; - } - else if (numFrag == 2) - { - patInfo = SW_VAR_Z_X_2xaa_RBPLUS_PATINFO; - } - else if (numFrag == 4) - { - patInfo = SW_VAR_Z_X_4xaa_RBPLUS_PATINFO; - } - else - { - ADDR_ASSERT(numFrag == 8); - patInfo = SW_VAR_Z_X_8xaa_RBPLUS_PATINFO; - } + patInfo = GFX10_SW_VAR_R_X_2xaa_RBPLUS_PATINFO; + } + else if (numFrag == 4) + { + patInfo = GFX10_SW_VAR_R_X_4xaa_RBPLUS_PATINFO; + } + else + { + ADDR_ASSERT(numFrag == 8); + patInfo = GFX10_SW_VAR_R_X_8xaa_RBPLUS_PATINFO; + } + } + else if (IsZOrderSwizzle(swizzleMode)) + { + if (numFrag == 1) + { + patInfo = GFX10_SW_VAR_Z_X_1xaa_RBPLUS_PATINFO; + } + else if (numFrag == 2) + { + patInfo = GFX10_SW_VAR_Z_X_2xaa_RBPLUS_PATINFO; + } + else if (numFrag == 4) + { + patInfo = GFX10_SW_VAR_Z_X_4xaa_RBPLUS_PATINFO; + } + else + { + ADDR_ASSERT(numFrag == 8); + patInfo = GFX10_SW_VAR_Z_X_8xaa_RBPLUS_PATINFO; } } } - else if (resourceType == ADDR_RSRC_TEX_3D) + } + else if (IsLinear(swizzleMode) == FALSE) + { + if (resourceType == ADDR_RSRC_TEX_3D) { ADDR_ASSERT(numFrag == 1); @@ -3684,16 +3735,19 @@ const ADDR_SW_PATINFO* Gfx10Lib::GetSwizzlePatternInfo( { if (IsRtOptSwizzle(swizzleMode)) { - patInfo = m_settings.supportRbPlus ? SW_64K_R_X_1xaa_RBPLUS_PATINFO : SW_64K_R_X_1xaa_PATINFO; + patInfo = m_settings.supportRbPlus ? + GFX10_SW_64K_R_X_1xaa_RBPLUS_PATINFO : GFX10_SW_64K_R_X_1xaa_PATINFO; } else if (IsZOrderSwizzle(swizzleMode)) { - patInfo = m_settings.supportRbPlus ? SW_64K_Z_X_1xaa_RBPLUS_PATINFO : SW_64K_Z_X_1xaa_PATINFO; + patInfo = m_settings.supportRbPlus ? + GFX10_SW_64K_Z_X_1xaa_RBPLUS_PATINFO : GFX10_SW_64K_Z_X_1xaa_PATINFO; } else if (IsDisplaySwizzle(resourceType, swizzleMode)) { ADDR_ASSERT(swizzleMode == ADDR_SW_64KB_D_X); - patInfo = m_settings.supportRbPlus ? SW_64K_D3_X_RBPLUS_PATINFO : SW_64K_D3_X_PATINFO; + patInfo = m_settings.supportRbPlus ? + GFX10_SW_64K_D3_X_RBPLUS_PATINFO : GFX10_SW_64K_D3_X_PATINFO; } else { @@ -3703,28 +3757,33 @@ const ADDR_SW_PATINFO* Gfx10Lib::GetSwizzlePatternInfo( { if (swizzleMode == ADDR_SW_4KB_S) { - patInfo = m_settings.supportRbPlus ? SW_4K_S3_RBPLUS_PATINFO : SW_4K_S3_PATINFO; + patInfo = m_settings.supportRbPlus ? + GFX10_SW_4K_S3_RBPLUS_PATINFO : GFX10_SW_4K_S3_PATINFO; } else { ADDR_ASSERT(swizzleMode == ADDR_SW_4KB_S_X); - patInfo = m_settings.supportRbPlus ? SW_4K_S3_X_RBPLUS_PATINFO : SW_4K_S3_X_PATINFO; + patInfo = m_settings.supportRbPlus ? + GFX10_SW_4K_S3_X_RBPLUS_PATINFO : GFX10_SW_4K_S3_X_PATINFO; } } else { if (swizzleMode == ADDR_SW_64KB_S) { - patInfo = m_settings.supportRbPlus ? SW_64K_S3_RBPLUS_PATINFO : SW_64K_S3_PATINFO; + patInfo = m_settings.supportRbPlus ? + GFX10_SW_64K_S3_RBPLUS_PATINFO : GFX10_SW_64K_S3_PATINFO; } else if (swizzleMode == ADDR_SW_64KB_S_X) { - patInfo = m_settings.supportRbPlus ? SW_64K_S3_X_RBPLUS_PATINFO : SW_64K_S3_X_PATINFO; + patInfo = m_settings.supportRbPlus ? + GFX10_SW_64K_S3_X_RBPLUS_PATINFO : GFX10_SW_64K_S3_X_PATINFO; } else { ADDR_ASSERT(swizzleMode == ADDR_SW_64KB_S_T); - patInfo = m_settings.supportRbPlus ? SW_64K_S3_T_RBPLUS_PATINFO : SW_64K_S3_T_PATINFO; + patInfo = m_settings.supportRbPlus ? + GFX10_SW_64K_S3_T_RBPLUS_PATINFO : GFX10_SW_64K_S3_T_PATINFO; } } } @@ -3738,12 +3797,14 @@ const ADDR_SW_PATINFO* Gfx10Lib::GetSwizzlePatternInfo( { if (swizzleMode == ADDR_SW_256B_S) { - patInfo = m_settings.supportRbPlus ? SW_256_S_RBPLUS_PATINFO : SW_256_S_PATINFO; + patInfo = m_settings.supportRbPlus ? + GFX10_SW_256_S_RBPLUS_PATINFO : GFX10_SW_256_S_PATINFO; } else { ADDR_ASSERT(swizzleMode == ADDR_SW_256B_D); - patInfo = m_settings.supportRbPlus ? SW_256_D_RBPLUS_PATINFO : SW_256_D_PATINFO; + patInfo = m_settings.supportRbPlus ? + GFX10_SW_256_D_RBPLUS_PATINFO : GFX10_SW_256_D_PATINFO; } } else if (IsBlock4kb(swizzleMode)) @@ -3752,24 +3813,28 @@ const ADDR_SW_PATINFO* Gfx10Lib::GetSwizzlePatternInfo( { if (swizzleMode == ADDR_SW_4KB_S) { - patInfo = m_settings.supportRbPlus ? SW_4K_S_RBPLUS_PATINFO : SW_4K_S_PATINFO; + patInfo = m_settings.supportRbPlus ? + GFX10_SW_4K_S_RBPLUS_PATINFO : GFX10_SW_4K_S_PATINFO; } else { ADDR_ASSERT(swizzleMode == ADDR_SW_4KB_S_X); - patInfo = m_settings.supportRbPlus ? SW_4K_S_X_RBPLUS_PATINFO : SW_4K_S_X_PATINFO; + patInfo = m_settings.supportRbPlus ? + GFX10_SW_4K_S_X_RBPLUS_PATINFO : GFX10_SW_4K_S_X_PATINFO; } } else { if (swizzleMode == ADDR_SW_4KB_D) { - patInfo = m_settings.supportRbPlus ? SW_4K_D_RBPLUS_PATINFO : SW_4K_D_PATINFO; + patInfo = m_settings.supportRbPlus ? + GFX10_SW_4K_D_RBPLUS_PATINFO : GFX10_SW_4K_D_PATINFO; } else { ADDR_ASSERT(swizzleMode == ADDR_SW_4KB_D_X); - patInfo = m_settings.supportRbPlus ? SW_4K_D_X_RBPLUS_PATINFO : SW_4K_D_X_PATINFO; + patInfo = m_settings.supportRbPlus ? + GFX10_SW_4K_D_X_RBPLUS_PATINFO : GFX10_SW_4K_D_X_PATINFO; } } } @@ -3779,72 +3844,86 @@ const ADDR_SW_PATINFO* Gfx10Lib::GetSwizzlePatternInfo( { if (numFrag == 1) { - patInfo = m_settings.supportRbPlus ? SW_64K_R_X_1xaa_RBPLUS_PATINFO : SW_64K_R_X_1xaa_PATINFO; + patInfo = m_settings.supportRbPlus ? + GFX10_SW_64K_R_X_1xaa_RBPLUS_PATINFO : GFX10_SW_64K_R_X_1xaa_PATINFO; } else if (numFrag == 2) { - patInfo = m_settings.supportRbPlus ? SW_64K_R_X_2xaa_RBPLUS_PATINFO : SW_64K_R_X_2xaa_PATINFO; + patInfo = m_settings.supportRbPlus ? + GFX10_SW_64K_R_X_2xaa_RBPLUS_PATINFO : GFX10_SW_64K_R_X_2xaa_PATINFO; } else if (numFrag == 4) { - patInfo = m_settings.supportRbPlus ? SW_64K_R_X_4xaa_RBPLUS_PATINFO : SW_64K_R_X_4xaa_PATINFO; + patInfo = m_settings.supportRbPlus ? + GFX10_SW_64K_R_X_4xaa_RBPLUS_PATINFO : GFX10_SW_64K_R_X_4xaa_PATINFO; } else { ADDR_ASSERT(numFrag == 8); - patInfo = m_settings.supportRbPlus ? SW_64K_R_X_8xaa_RBPLUS_PATINFO : SW_64K_R_X_8xaa_PATINFO; + patInfo = m_settings.supportRbPlus ? + GFX10_SW_64K_R_X_8xaa_RBPLUS_PATINFO : GFX10_SW_64K_R_X_8xaa_PATINFO; } } else if (IsZOrderSwizzle(swizzleMode)) { if (numFrag == 1) { - patInfo = m_settings.supportRbPlus ? SW_64K_Z_X_1xaa_RBPLUS_PATINFO : SW_64K_Z_X_1xaa_PATINFO; + patInfo = m_settings.supportRbPlus ? + GFX10_SW_64K_Z_X_1xaa_RBPLUS_PATINFO : GFX10_SW_64K_Z_X_1xaa_PATINFO; } else if (numFrag == 2) { - patInfo = m_settings.supportRbPlus ? SW_64K_Z_X_2xaa_RBPLUS_PATINFO : SW_64K_Z_X_2xaa_PATINFO; + patInfo = m_settings.supportRbPlus ? + GFX10_SW_64K_Z_X_2xaa_RBPLUS_PATINFO : GFX10_SW_64K_Z_X_2xaa_PATINFO; } else if (numFrag == 4) { - patInfo = m_settings.supportRbPlus ? SW_64K_Z_X_4xaa_RBPLUS_PATINFO : SW_64K_Z_X_4xaa_PATINFO; + patInfo = m_settings.supportRbPlus ? + GFX10_SW_64K_Z_X_4xaa_RBPLUS_PATINFO : GFX10_SW_64K_Z_X_4xaa_PATINFO; } else { ADDR_ASSERT(numFrag == 8); - patInfo = m_settings.supportRbPlus ? SW_64K_Z_X_8xaa_RBPLUS_PATINFO : SW_64K_Z_X_8xaa_PATINFO; + patInfo = m_settings.supportRbPlus ? + GFX10_SW_64K_Z_X_8xaa_RBPLUS_PATINFO : GFX10_SW_64K_Z_X_8xaa_PATINFO; } } else if (IsDisplaySwizzle(resourceType, swizzleMode)) { if (swizzleMode == ADDR_SW_64KB_D) { - patInfo = m_settings.supportRbPlus ? SW_64K_D_RBPLUS_PATINFO : SW_64K_D_PATINFO; + patInfo = m_settings.supportRbPlus ? + GFX10_SW_64K_D_RBPLUS_PATINFO : GFX10_SW_64K_D_PATINFO; } else if (swizzleMode == ADDR_SW_64KB_D_X) { - patInfo = m_settings.supportRbPlus ? SW_64K_D_X_RBPLUS_PATINFO : SW_64K_D_X_PATINFO; + patInfo = m_settings.supportRbPlus ? + GFX10_SW_64K_D_X_RBPLUS_PATINFO : GFX10_SW_64K_D_X_PATINFO; } else { ADDR_ASSERT(swizzleMode == ADDR_SW_64KB_D_T); - patInfo = m_settings.supportRbPlus ? SW_64K_D_T_RBPLUS_PATINFO : SW_64K_D_T_PATINFO; + patInfo = m_settings.supportRbPlus ? + GFX10_SW_64K_D_T_RBPLUS_PATINFO : GFX10_SW_64K_D_T_PATINFO; } } else { if (swizzleMode == ADDR_SW_64KB_S) { - patInfo = m_settings.supportRbPlus ? SW_64K_S_RBPLUS_PATINFO : SW_64K_S_PATINFO; + patInfo = m_settings.supportRbPlus ? + GFX10_SW_64K_S_RBPLUS_PATINFO : GFX10_SW_64K_S_PATINFO; } else if (swizzleMode == ADDR_SW_64KB_S_X) { - patInfo = m_settings.supportRbPlus ? SW_64K_S_X_RBPLUS_PATINFO : SW_64K_S_X_PATINFO; + patInfo = m_settings.supportRbPlus ? + GFX10_SW_64K_S_X_RBPLUS_PATINFO : GFX10_SW_64K_S_X_PATINFO; } else { ADDR_ASSERT(swizzleMode == ADDR_SW_64KB_S_T); - patInfo = m_settings.supportRbPlus ? SW_64K_S_T_RBPLUS_PATINFO : SW_64K_S_T_PATINFO; + patInfo = m_settings.supportRbPlus ? + GFX10_SW_64K_S_T_RBPLUS_PATINFO : GFX10_SW_64K_S_T_PATINFO; } } } @@ -3855,6 +3934,7 @@ const ADDR_SW_PATINFO* Gfx10Lib::GetSwizzlePatternInfo( return (patInfo != NULL) ? &patInfo[index] : NULL; } + /** ************************************************************************************************************************ * Gfx10Lib::ComputeSurfaceAddrFromCoordMicroTiled diff --git a/src/amd/addrlib/src/gfx10/gfx10addrlib.h b/src/amd/addrlib/src/gfx10/gfx10addrlib.h index c9a24c0..b2a5832 100644 --- a/src/amd/addrlib/src/gfx10/gfx10addrlib.h +++ b/src/amd/addrlib/src/gfx10/gfx10addrlib.h @@ -55,11 +55,12 @@ struct Gfx10ChipSettings UINT_32 reserved1 : 32; // Misc configuration bits - UINT_32 isDcn2 : 1; + UINT_32 isDcn20 : 1; UINT_32 supportRbPlus : 1; UINT_32 dsMipmapHtileFix : 1; UINT_32 dccUnsup3DSwDis : 1; - UINT_32 reserved2 : 28; + UINT_32 : 2; + UINT_32 reserved2 : 26; }; }; @@ -170,20 +171,32 @@ const UINT_32 Gfx10Rsrc3dThick64KBSwModeMask = Gfx10Rsrc3dThickSwModeMask & Gfx1 const UINT_32 Gfx10MsaaSwModeMask = Gfx10ZSwModeMask | Gfx10RenderSwModeMask; -const UINT_32 Dcn2NonBpp64SwModeMask = (1u << ADDR_SW_LINEAR) | - (1u << ADDR_SW_4KB_S) | - (1u << ADDR_SW_64KB_S) | - (1u << ADDR_SW_64KB_S_T) | - (1u << ADDR_SW_4KB_S_X) | - (1u << ADDR_SW_64KB_S_X) | - (1u << ADDR_SW_64KB_R_X); +const UINT_32 Dcn20NonBpp64SwModeMask = (1u << ADDR_SW_LINEAR) | + (1u << ADDR_SW_4KB_S) | + (1u << ADDR_SW_64KB_S) | + (1u << ADDR_SW_64KB_S_T) | + (1u << ADDR_SW_4KB_S_X) | + (1u << ADDR_SW_64KB_S_X) | + (1u << ADDR_SW_64KB_R_X); + +const UINT_32 Dcn20Bpp64SwModeMask = (1u << ADDR_SW_4KB_D) | + (1u << ADDR_SW_64KB_D) | + (1u << ADDR_SW_64KB_D_T) | + (1u << ADDR_SW_4KB_D_X) | + (1u << ADDR_SW_64KB_D_X) | + Dcn20NonBpp64SwModeMask; + +const UINT_32 Dcn21NonBpp64SwModeMask = (1u << ADDR_SW_LINEAR) | + (1u << ADDR_SW_64KB_S) | + (1u << ADDR_SW_64KB_S_T) | + (1u << ADDR_SW_64KB_S_X) | + (1u << ADDR_SW_64KB_R_X); + +const UINT_32 Dcn21Bpp64SwModeMask = (1u << ADDR_SW_64KB_D) | + (1u << ADDR_SW_64KB_D_T) | + (1u << ADDR_SW_64KB_D_X) | + Dcn21NonBpp64SwModeMask; -const UINT_32 Dcn2Bpp64SwModeMask = (1u << ADDR_SW_4KB_D) | - (1u << ADDR_SW_64KB_D) | - (1u << ADDR_SW_64KB_D_T) | - (1u << ADDR_SW_4KB_D_X) | - (1u << ADDR_SW_64KB_D_X) | - Dcn2NonBpp64SwModeMask; /** ************************************************************************************************************************ * @brief This class is the GFX10 specific address library @@ -315,6 +328,7 @@ protected: virtual ChipFamily HwlConvertChipFamily(UINT_32 uChipFamily, UINT_32 uChipRevision); +private: // Initialize equation table VOID InitEquationTable(); @@ -334,7 +348,7 @@ protected: const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn, ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT* pOut) const; -private: + UINT_32 ComputeOffsetFromSwizzlePattern( const UINT_64* pPattern, UINT_32 numBits, @@ -355,28 +369,6 @@ private: UINT_32* pAlignY, UINT_32* pRightXor) const; - Dim3d GetDccCompressBlk( - AddrResourceType resourceType, - AddrSwizzleMode swizzleMode, - UINT_32 bpp) const - { - UINT_32 index = Log2(bpp >> 3); - Dim3d compressBlkDim; - - if (IsThin(resourceType, swizzleMode)) - { - compressBlkDim.w = Block256_2d[index].w; - compressBlkDim.h = Block256_2d[index].h; - compressBlkDim.d = 1; - } - else - { - compressBlkDim = Block256_3d[index]; - } - - return compressBlkDim; - } - static void GetMipSize( UINT_32 mip0Width, UINT_32 mip0Height, @@ -492,13 +484,15 @@ private: } + UINT_32 GetValidDisplaySwizzleModes(UINT_32 bpp) const; + BOOL_32 IsValidDisplaySwizzleMode(const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const; UINT_32 GetMaxNumMipsInTail(UINT_32 blockSizeLog2, BOOL_32 isThin) const; static ADDR2_BLOCK_SET GetAllowedBlockSet(ADDR2_SWMODE_SET allowedSwModeSet, AddrResourceType rsrcType) { - ADDR2_BLOCK_SET allowedBlockSet = {}; + ADDR2_BLOCK_SET allowedBlockSet = {0}; allowedBlockSet.micro = (allowedSwModeSet.value & Gfx10Blk256BSwModeMask) ? TRUE : FALSE; allowedBlockSet.linear = (allowedSwModeSet.value & Gfx10LinearSwModeMask) ? TRUE : FALSE; @@ -521,7 +515,7 @@ private: static ADDR2_SWTYPE_SET GetAllowedSwSet(ADDR2_SWMODE_SET allowedSwModeSet) { - ADDR2_SWTYPE_SET allowedSwSet = {}; + ADDR2_SWTYPE_SET allowedSwSet = {0}; allowedSwSet.sw_Z = (allowedSwModeSet.value & Gfx10ZSwModeMask) ? TRUE : FALSE; allowedSwSet.sw_S = (allowedSwModeSet.value & Gfx10StandardSwModeMask) ? TRUE : FALSE; diff --git a/src/amd/addrlib/src/gfx9/gfx9addrlib.cpp b/src/amd/addrlib/src/gfx9/gfx9addrlib.cpp index 122471f..4deb55a 100644 --- a/src/amd/addrlib/src/gfx9/gfx9addrlib.cpp +++ b/src/amd/addrlib/src/gfx9/gfx9addrlib.cpp @@ -37,8 +37,6 @@ #include "amdgpu_asic_addr.h" -#include "util/macros.h" - //////////////////////////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////////////////////////// @@ -131,7 +129,6 @@ Gfx9Lib::Gfx9Lib(const Client* pClient) : Lib(pClient) { - m_class = AI_ADDRLIB; memset(&m_settings, 0, sizeof(m_settings)); memcpy(m_swizzleModeTable, SwizzleModeTable, sizeof(SwizzleModeTable)); memset(m_cachedMetaEqKey, 0, sizeof(m_cachedMetaEqKey)); @@ -663,6 +660,7 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlComputeDccInfo( pOut->metaBlkWidth = metaBlkDim.w; pOut->metaBlkHeight = metaBlkDim.h; pOut->metaBlkDepth = metaBlkDim.d; + pOut->metaBlkSize = numCompressBlkPerMetaBlk * numFrags; pOut->metaBlkNumPerSlice = numMetaBlkX * numMetaBlkY; pOut->fastClearSizePerSlice = @@ -804,6 +802,7 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlComputeCmaskAddrFromCoord( pOut->addr = address >> 1; pOut->bitPosition = static_cast((address & 1) << 2); + UINT_32 numPipeBits = GetPipeLog2ForMetaAddressing(pIn->cMaskFlags.pipeAligned, pIn->swizzleMode); @@ -1205,6 +1204,7 @@ BOOL_32 Gfx9Lib::HwlInitGlobalParams( ((m_pipesLog2 == 2) && ((m_seLog2 == 1) || (m_seLog2 == 2))))) { ADDR_ASSERT(m_settings.isVega10 == FALSE); + ADDR_ASSERT(m_settings.isRaven == FALSE); ADDR_ASSERT(m_settings.isVega20 == FALSE); @@ -1288,18 +1288,19 @@ ChipFamily Gfx9Lib::HwlConvertChipFamily( m_settings.applyAliasFix = 1; } + m_settings.isDcn1 = m_settings.isRaven; + if (ASICREV_IS_RENOIR(uChipRevision)) { m_settings.isRaven = 1; + m_settings.isDcn2 = 1; } - m_settings.isDcn1 = m_settings.isRaven; - m_settings.metaBaseAlignFix = 1; break; default: - ADDR_ASSERT(!"This should be a Fusion"); + ADDR_ASSERT(!"No Chip found"); break; } @@ -1975,7 +1976,7 @@ VOID Gfx9Lib::GenMetaEquation( } } - bool rbAppendedWithPipeBits[1 << (MaxSeLog2 + MaxRbPerSeLog2)] = {}; + bool rbAppendedWithPipeBits[1 << (MaxSeLog2 + MaxRbPerSeLog2)] = {0}; // Loop through each bit of the channel, get the smallest coordinate, // and remove it from the metaaddr, and rb_equation @@ -2302,8 +2303,8 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlComputeBlock256Equation( ADDR_CHANNEL_SETTING* pixelBit = &pEquation->addr[elementBytesLog2]; const UINT_32 maxBitsUsed = 4; - ADDR_CHANNEL_SETTING x[maxBitsUsed] = {}; - ADDR_CHANNEL_SETTING y[maxBitsUsed] = {}; + ADDR_CHANNEL_SETTING x[maxBitsUsed] = {0}; + ADDR_CHANNEL_SETTING y[maxBitsUsed] = {0}; for (i = 0; i < maxBitsUsed; i++) { @@ -2465,7 +2466,7 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlComputeBlock256Equation( // Post validation if (ret == ADDR_OK) { - ASSERTED Dim2d microBlockDim = Block256_2d[elementBytesLog2]; + Dim2d microBlockDim = Block256_2d[elementBytesLog2]; ADDR_ASSERT((2u << GetMaxValidChannelIndex(pEquation->addr, 8, 0)) == (microBlockDim.w * (1 << elementBytesLog2))); ADDR_ASSERT((2u << GetMaxValidChannelIndex(pEquation->addr, 8, 1)) == microBlockDim.h); @@ -2512,12 +2513,12 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlComputeThinEquation( const UINT_32 maxBitsUsed = 14; ADDR_ASSERT((2 * maxBitsUsed) >= maxXorBits); - ADDR_CHANNEL_SETTING x[maxBitsUsed] = {}; - ADDR_CHANNEL_SETTING y[maxBitsUsed] = {}; + ADDR_CHANNEL_SETTING x[maxBitsUsed] = {0}; + ADDR_CHANNEL_SETTING y[maxBitsUsed] = {0}; const UINT_32 extraXorBits = 16; ADDR_ASSERT(extraXorBits >= maxXorBits - blockSizeLog2); - ADDR_CHANNEL_SETTING xorExtra[extraXorBits] = {}; + ADDR_CHANNEL_SETTING xorExtra[extraXorBits] = {0}; for (UINT_32 i = 0; i < maxBitsUsed; i++) { @@ -2671,13 +2672,13 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlComputeThickEquation( const UINT_32 maxBitsUsed = 12; ADDR_ASSERT((3 * maxBitsUsed) >= maxXorBits); - ADDR_CHANNEL_SETTING x[maxBitsUsed] = {}; - ADDR_CHANNEL_SETTING y[maxBitsUsed] = {}; - ADDR_CHANNEL_SETTING z[maxBitsUsed] = {}; + ADDR_CHANNEL_SETTING x[maxBitsUsed] = {0}; + ADDR_CHANNEL_SETTING y[maxBitsUsed] = {0}; + ADDR_CHANNEL_SETTING z[maxBitsUsed] = {0}; const UINT_32 extraXorBits = 24; ADDR_ASSERT(extraXorBits >= maxXorBits - blockSizeLog2); - ADDR_CHANNEL_SETTING xorExtra[extraXorBits] = {}; + ADDR_CHANNEL_SETTING xorExtra[extraXorBits] = {0}; for (UINT_32 i = 0; i < maxBitsUsed; i++) { @@ -2917,54 +2918,39 @@ BOOL_32 Gfx9Lib::IsValidDisplaySwizzleMode( { BOOL_32 support = FALSE; + const UINT_32 swizzleMask = 1 << pIn->swizzleMode; + if (m_settings.isDce12) { - switch (pIn->swizzleMode) + if (pIn->bpp == 32) { - case ADDR_SW_256B_D: - case ADDR_SW_256B_R: - support = (pIn->bpp == 32); - break; - - case ADDR_SW_LINEAR: - case ADDR_SW_4KB_D: - case ADDR_SW_4KB_R: - case ADDR_SW_64KB_D: - case ADDR_SW_64KB_R: - case ADDR_SW_4KB_D_X: - case ADDR_SW_4KB_R_X: - case ADDR_SW_64KB_D_X: - case ADDR_SW_64KB_R_X: - support = (pIn->bpp <= 64); - break; - - default: - break; + support = (Dce12Bpp32SwModeMask & swizzleMask) ? TRUE : FALSE; + } + else if (pIn->bpp <= 64) + { + support = (Dce12NonBpp32SwModeMask & swizzleMask) ? TRUE : FALSE; } } else if (m_settings.isDcn1) { - switch (pIn->swizzleMode) + if (pIn->bpp < 64) { - case ADDR_SW_4KB_D: - case ADDR_SW_64KB_D: - case ADDR_SW_64KB_D_T: - case ADDR_SW_4KB_D_X: - case ADDR_SW_64KB_D_X: - support = (pIn->bpp == 64); - break; - - case ADDR_SW_LINEAR: - case ADDR_SW_4KB_S: - case ADDR_SW_64KB_S: - case ADDR_SW_64KB_S_T: - case ADDR_SW_4KB_S_X: - case ADDR_SW_64KB_S_X: - support = (pIn->bpp <= 64); - break; - - default: - break; + support = (Dcn1NonBpp64SwModeMask & swizzleMask) ? TRUE : FALSE; + } + else if (pIn->bpp == 64) + { + support = (Dcn1Bpp64SwModeMask & swizzleMask) ? TRUE : FALSE; + } + } + else if (m_settings.isDcn2) + { + if (pIn->bpp < 64) + { + support = (Dcn2NonBpp64SwModeMask & swizzleMask) ? TRUE : FALSE; + } + else if (pIn->bpp == 64) + { + support = (Dcn2Bpp64SwModeMask & swizzleMask) ? TRUE : FALSE; } } else @@ -3393,7 +3379,7 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlGetPreferredSurfaceSetting( const BOOL_32 displayRsrc = pIn->flags.display || pIn->flags.rotated; // Pre sanity check on non swizzle mode parameters - ADDR2_COMPUTE_SURFACE_INFO_INPUT localIn = {}; + ADDR2_COMPUTE_SURFACE_INFO_INPUT localIn = {0}; localIn.flags = pIn->flags; localIn.resourceType = pOut->resourceType; localIn.format = pIn->format; @@ -3408,7 +3394,7 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlGetPreferredSurfaceSetting( if (ValidateNonSwModeParams(&localIn)) { // Forbid swizzle mode(s) by client setting - ADDR2_SWMODE_SET allowedSwModeSet = {}; + ADDR2_SWMODE_SET allowedSwModeSet = {0}; allowedSwModeSet.value |= pIn->forbiddenBlock.linear ? 0 : Gfx9LinearSwModeMask; allowedSwModeSet.value |= pIn->forbiddenBlock.micro ? 0 : Gfx9Blk256BSwModeMask; allowedSwModeSet.value |= @@ -3577,6 +3563,10 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlGetPreferredSurfaceSetting( { allowedSwModeSet.value &= (bpp == 64) ? Dcn1Bpp64SwModeMask : Dcn1NonBpp64SwModeMask; } + else if (m_settings.isDcn2) + { + allowedSwModeSet.value &= (bpp == 64) ? Dcn2Bpp64SwModeMask : Dcn2NonBpp64SwModeMask; + } else { ADDR_NOT_IMPLEMENTED(); @@ -3630,10 +3620,10 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlGetPreferredSurfaceSetting( ADDR2_BLOCK_SET allowedBlockSet = GetAllowedBlockSet(allowedSwModeSet, pOut->resourceType); - // Determine block size if there is 2 or more block type candidates + // Determine block size if there are 2 or more block type candidates if (IsPow2(allowedBlockSet.value) == FALSE) { - AddrSwizzleMode swMode[AddrBlockMaxTiledType] = { ADDR_SW_LINEAR }; + AddrSwizzleMode swMode[AddrBlockMaxTiledType] = {ADDR_SW_LINEAR}; swMode[AddrBlockMicro] = ADDR_SW_256B_D; swMode[AddrBlockThin4KB] = ADDR_SW_4KB_D; @@ -3645,8 +3635,8 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlGetPreferredSurfaceSetting( swMode[AddrBlockThick64KB] = ADDR_SW_64KB_S; } - Dim3d blkDim[AddrBlockMaxTiledType] = {{0}, {0}, {0}, {0}, {0}, {0}}; - Dim3d padDim[AddrBlockMaxTiledType] = {{0}, {0}, {0}, {0}, {0}, {0}}; + Dim3d blkDim[AddrBlockMaxTiledType] = {0}; + Dim3d padDim[AddrBlockMaxTiledType] = {0}; UINT_64 padSize[AddrBlockMaxTiledType] = {0}; const UINT_32 ratioLow = pIn->flags.minimizeAlign ? 1 : (pIn->flags.opt4space ? 3 : 2); @@ -3725,7 +3715,7 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlGetPreferredSurfaceSetting( ADDR2_SWTYPE_SET allowedSwSet = GetAllowedSwSet(allowedSwModeSet); - // Determine swizzle type if there is 2 or more swizzle type candidates + // Determine swizzle type if there are 2 or more swizzle type candidates if (IsPow2(allowedSwSet.value) == FALSE) { if (ElemLib::IsBlockCompressed(pIn->format)) diff --git a/src/amd/addrlib/src/gfx9/gfx9addrlib.h b/src/amd/addrlib/src/gfx9/gfx9addrlib.h index 93e6515..e433bb0 100644 --- a/src/amd/addrlib/src/gfx9/gfx9addrlib.h +++ b/src/amd/addrlib/src/gfx9/gfx9addrlib.h @@ -62,7 +62,8 @@ struct Gfx9ChipSettings // Display engine IP version name UINT_32 isDce12 : 1; UINT_32 isDcn1 : 1; - UINT_32 reserved1 : 30; + UINT_32 isDcn2 : 1; + UINT_32 reserved1 : 29; // Misc configuration bits UINT_32 metaBaseAlignFix : 1; @@ -215,6 +216,16 @@ const UINT_32 Dcn1Bpp64SwModeMask = (1u << ADDR_SW_4KB_D) | (1u << ADDR_SW_64KB_D_X) | Dcn1NonBpp64SwModeMask; +const UINT_32 Dcn2NonBpp64SwModeMask = (1u << ADDR_SW_LINEAR) | + (1u << ADDR_SW_64KB_S) | + (1u << ADDR_SW_64KB_S_T) | + (1u << ADDR_SW_64KB_S_X); + +const UINT_32 Dcn2Bpp64SwModeMask = (1u << ADDR_SW_64KB_D) | + (1u << ADDR_SW_64KB_D_T) | + (1u << ADDR_SW_64KB_D_X) | + Dcn2NonBpp64SwModeMask; + /** ************************************************************************************************************************ * @brief GFX9 meta equation parameters @@ -438,7 +449,7 @@ private: static ADDR2_BLOCK_SET GetAllowedBlockSet(ADDR2_SWMODE_SET allowedSwModeSet, AddrResourceType rsrcType) { - ADDR2_BLOCK_SET allowedBlockSet = {}; + ADDR2_BLOCK_SET allowedBlockSet = {0}; allowedBlockSet.micro = (allowedSwModeSet.value & Gfx9Blk256BSwModeMask) ? TRUE : FALSE; allowedBlockSet.linear = (allowedSwModeSet.value & Gfx9LinearSwModeMask) ? TRUE : FALSE; @@ -461,7 +472,7 @@ private: static ADDR2_SWTYPE_SET GetAllowedSwSet(ADDR2_SWMODE_SET allowedSwModeSet) { - ADDR2_SWTYPE_SET allowedSwSet = {}; + ADDR2_SWTYPE_SET allowedSwSet = {0}; allowedSwSet.sw_Z = (allowedSwModeSet.value & Gfx9ZSwModeMask) ? TRUE : FALSE; allowedSwSet.sw_S = (allowedSwModeSet.value & Gfx9StandardSwModeMask) ? TRUE : FALSE; diff --git a/src/amd/addrlib/src/r800/ciaddrlib.cpp b/src/amd/addrlib/src/r800/ciaddrlib.cpp index 5a83e71..51718ed 100644 --- a/src/amd/addrlib/src/r800/ciaddrlib.cpp +++ b/src/amd/addrlib/src/r800/ciaddrlib.cpp @@ -179,7 +179,6 @@ CiLib::CiLib(const Client* pClient) m_noOfMacroEntries(0), m_allowNonDispThickModes(FALSE) { - m_class = CI_ADDRLIB; } /** @@ -410,7 +409,7 @@ ChipFamily CiLib::HwlConvertChipFamily( family = ADDR_CHIP_FAMILY_VI; break; default: - ADDR_ASSERT(!"This should be a unexpected Fusion"); + ADDR_ASSERT(!"No Chip found"); break; } @@ -1588,7 +1587,14 @@ VOID CiLib::ReadGbTileMode( gbTileMode.val = regValue; pCfg->type = static_cast(gbTileMode.f.micro_tile_mode_new); - pCfg->info.pipeConfig = static_cast(gbTileMode.f.pipe_config + 1); + if (AltTilingEnabled() == TRUE) + { + pCfg->info.pipeConfig = static_cast(gbTileMode.f.alt_pipe_config + 1); + } + else + { + pCfg->info.pipeConfig = static_cast(gbTileMode.f.pipe_config + 1); + } if (pCfg->type == ADDR_DEPTH_SAMPLE_ORDER) { @@ -1730,10 +1736,19 @@ VOID CiLib::ReadGbMacroTileCfg( GB_MACROTILE_MODE gbTileMode; gbTileMode.val = regValue; - pCfg->bankHeight = 1 << gbTileMode.f.bank_height; + if (AltTilingEnabled() == TRUE) + { + pCfg->bankHeight = 1 << gbTileMode.f.alt_bank_height; + pCfg->banks = 1 << (gbTileMode.f.alt_num_banks + 1); + pCfg->macroAspectRatio = 1 << gbTileMode.f.alt_macro_tile_aspect; + } + else + { + pCfg->bankHeight = 1 << gbTileMode.f.bank_height; + pCfg->banks = 1 << (gbTileMode.f.num_banks + 1); + pCfg->macroAspectRatio = 1 << gbTileMode.f.macro_tile_aspect; + } pCfg->bankWidth = 1 << gbTileMode.f.bank_width; - pCfg->banks = 1 << (gbTileMode.f.num_banks + 1); - pCfg->macroAspectRatio = 1 << gbTileMode.f.macro_tile_aspect; } /** @@ -2032,6 +2047,7 @@ UINT_64 CiLib::HwlComputeMetadataNibbleAddress( /// NOTE *2 because we are converting to Nibble address in this step UINT_64 metaAddressInPipe = blockInBankpipeWithBankBits * 2 * metadataBitSize / 8; + ///-------------------------------------------------------------------------------------------- /// Reinsert pipe bits back into the final address ///-------------------------------------------------------------------------------------------- diff --git a/src/amd/addrlib/src/r800/ciaddrlib.h b/src/amd/addrlib/src/r800/ciaddrlib.h index 6461389..2e72ec4 100644 --- a/src/amd/addrlib/src/r800/ciaddrlib.h +++ b/src/amd/addrlib/src/r800/ciaddrlib.h @@ -188,6 +188,11 @@ private: return ((m_settings.isVolcanicIslands == TRUE) || (m_configFlags.forceDccAndTcCompat == TRUE)); } + BOOL_32 AltTilingEnabled() const + { + return (m_configFlags.enableAltTiling == TRUE); + } + static const UINT_32 MacroTileTableSize = 16; static const UINT_32 PrtMacroModeOffset = MacroTileTableSize / 2; static const INT_32 MinDepth2DThinIndex = 0; diff --git a/src/amd/addrlib/src/r800/egbaddrlib.cpp b/src/amd/addrlib/src/r800/egbaddrlib.cpp index bdeda5f..84c7b20 100644 --- a/src/amd/addrlib/src/r800/egbaddrlib.cpp +++ b/src/amd/addrlib/src/r800/egbaddrlib.cpp @@ -32,8 +32,6 @@ #include "egbaddrlib.h" -#include "util/macros.h" - namespace Addr { namespace V1 @@ -435,6 +433,7 @@ BOOL_32 EgBasedLib::ComputeSurfaceInfoMicroTiled( &expPitch, &expHeight); + pOut->pitch = expPitch; pOut->height = expHeight; pOut->depth = expNumSlices; @@ -446,6 +445,7 @@ BOOL_32 EgBasedLib::ComputeSurfaceInfoMicroTiled( return valid; } + /** **************************************************************************************************** * EgBasedLib::ComputeSurfaceInfoMacroTiled @@ -750,6 +750,7 @@ BOOL_32 EgBasedLib::ComputeSurfaceAlignmentsMicroTiled( return valid; } + /** **************************************************************************************************** * EgBasedLib::HwlReduceBankWidthHeight @@ -975,7 +976,7 @@ BOOL_32 EgBasedLib::SanityCheckMacroTiled( ) const { BOOL_32 valid = TRUE; - ASSERTED UINT_32 numPipes = HwlGetPipes(pTileInfo); + UINT_32 numPipes = HwlGetPipes(pTileInfo); switch (pTileInfo->banks) { @@ -1089,7 +1090,6 @@ AddrTileMode EgBasedLib::ComputeSurfaceMipLevelTileMode( ) const { UINT_64 bytesPerSlice; - (void)bytesPerSlice; UINT_32 bytesPerTile; AddrTileMode expTileMode = baseTileMode; @@ -1811,6 +1811,7 @@ UINT_64 EgBasedLib::ComputeSurfaceAddrFromCoordMacroTiled( tileSplitSlice, pTileInfo); + // // Split the offset to put some bits below the pipe+bank bits and some above. // @@ -2154,6 +2155,7 @@ VOID EgBasedLib::HwlComputePixelCoordFromOffset( *pSlice += z; } + /** **************************************************************************************************** * EgBasedLib::DispatchComputeSurfaceCoordFromAddrDispatch @@ -2298,6 +2300,7 @@ VOID EgBasedLib::DispatchComputeSurfaceCoordFromAddr( } } + /** **************************************************************************************************** * EgBasedLib::ComputeSurfaceCoordFromAddrMacroTiled @@ -2342,6 +2345,7 @@ VOID EgBasedLib::ComputeSurfaceCoordFromAddrMacroTiled( UINT_32 tileIndex; UINT_64 totalOffset; + UINT_32 bank; UINT_32 pipe; UINT_32 groupBits = m_pipeInterleaveBytes << 3; @@ -2663,6 +2667,7 @@ ADDR_E_RETURNCODE EgBasedLib::HwlExtractBankPipeSwizzle( return ADDR_OK; } + /** **************************************************************************************************** * EgBasedLib::HwlCombineBankPipeSwizzle @@ -2724,7 +2729,6 @@ ADDR_E_RETURNCODE EgBasedLib::HwlComputeBaseSwizzle( }; UINT_32 pipes = HwlGetPipes(pTileInfo); - (void)pipes; UINT_32 banks = pTileInfo ? pTileInfo->banks : 2; UINT_32 hwNumBanks; @@ -3032,6 +3036,7 @@ UINT_32 EgBasedLib::ComputeBankFromCoord( break; } + // // Compute bank rotation for the tile split slice. // @@ -3129,6 +3134,8 @@ UINT_32 EgBasedLib::ComputePipeRotation( return rotation; } + + /** **************************************************************************************************** * EgBasedLib::ComputeBankRotation @@ -3171,6 +3178,7 @@ UINT_32 EgBasedLib::ComputeBankRotation( return rotation; } + /** **************************************************************************************************** * EgBasedLib::ComputeHtileBytes @@ -4095,7 +4103,7 @@ UINT_64 EgBasedLib::HwlGetSizeAdjustmentMicroTiled( ) const { UINT_64 logicalSliceSize; - ASSERTED UINT_64 physicalSliceSize; + UINT_64 physicalSliceSize; UINT_32 pitch = *pPitch; UINT_32 height = *pHeight; diff --git a/src/amd/addrlib/src/r800/siaddrlib.cpp b/src/amd/addrlib/src/r800/siaddrlib.cpp index c91f726..59c481f 100644 --- a/src/amd/addrlib/src/r800/siaddrlib.cpp +++ b/src/amd/addrlib/src/r800/siaddrlib.cpp @@ -112,7 +112,6 @@ SiLib::SiLib(const Client* pClient) m_noOfEntries(0), m_numEquations(0) { - m_class = SI_ADDRLIB; memset(&m_settings, 0, sizeof(m_settings)); } @@ -1930,7 +1929,7 @@ ChipFamily SiLib::HwlConvertChipFamily( m_settings.isHainan = ASICREV_IS_HAINAN_V(uChipRevision); break; default: - ADDR_ASSERT(!"This should be a Fusion"); + ADDR_ASSERT(!"No Chip found"); break; } -- 2.7.4