amdgpu/addrlib: Adjust bank equation bit order based on macro tile aspect ratio settings
authorFrans Gu <frans.gu@amd.com>
Fri, 4 Mar 2016 10:04:23 +0000 (05:04 -0500)
committerMarek Olšák <marek.olsak@amd.com>
Thu, 30 Mar 2017 12:44:33 +0000 (14:44 +0200)
By this way, we can have valid equation for 2D_THIN1 tile mode.
Add flag "preferEquation" to return equation index without adjusting
input tile mode.

src/amd/addrlib/addrinterface.h
src/amd/addrlib/r800/ciaddrlib.cpp
src/amd/addrlib/r800/siaddrlib.cpp
src/amd/addrlib/r800/siaddrlib.h

index 065545e..c68cacf 100644 (file)
@@ -146,10 +146,12 @@ typedef union _ADDR_EQUATION_KEY
         UINT_32 tileMode         : 5; ///< Tile mode
         UINT_32 microTileType    : 3; ///< Micro tile type
         UINT_32 pipeConfig       : 5; ///< pipe config
-        UINT_32 numBanks         : 5; ///< Number of banks
+        UINT_32 numBanksLog2     : 3; ///< Number of banks log2
         UINT_32 bankWidth        : 4; ///< Bank width
         UINT_32 bankHeight       : 4; ///< Bank height
         UINT_32 macroAspectRatio : 3; ///< Macro tile aspect ratio
+        UINT_32 prt              : 1; ///< SI only, indicate whether this equation is for prt
+        UINT_32 reserved         : 1; ///< Reserved bit
     } fields;
     UINT_32 value;
 } ADDR_EQUATION_KEY;
@@ -516,7 +518,8 @@ typedef union _ADDR_SURFACE_FLAGS
         UINT_32 skipIndicesOutput    : 1; ///< Skipping indices in output.
         UINT_32 rotateDisplay        : 1; ///< Rotate micro tile type
         UINT_32 minimizeAlignment    : 1; ///< Minimize alignment
-        UINT_32 reserved             : 5; ///< Reserved bits
+        UINT_32 preferEquation       : 1; ///< Return equation index without adjusting tile mode
+        UINT_32 reserved             : 4; ///< Reserved bits
     };
 
     UINT_32 value;
index cf08566..2c62979 100644 (file)
@@ -889,48 +889,54 @@ VOID CiLib::HwlOptimizeTileMode(
 
     // Override 2D/3D macro tile mode to PRT_* tile mode if
     // client driver requests this surface is equation compatible
-    if ((pInOut->flags.needEquation == TRUE) &&
-        (pInOut->numSamples <= 1) &&
-        (IsMacroTiled(tileMode) == TRUE) &&
-        (IsPrtTileMode(tileMode) == FALSE))
+    if (IsMacroTiled(tileMode) == TRUE)
     {
-        UINT_32 thickness = Thickness(tileMode);
-
-        if ((pInOut->maxBaseAlign != 0) && (pInOut->maxBaseAlign < Block64K))
-        {
-            tileMode = (thickness == 1) ? ADDR_TM_1D_TILED_THIN1 : ADDR_TM_1D_TILED_THICK;
-        }
-        else if (thickness == 1)
+        if ((pInOut->flags.needEquation == TRUE) &&
+            (pInOut->numSamples <= 1) &&
+            (IsPrtTileMode(tileMode) == FALSE))
         {
-            tileMode = ADDR_TM_PRT_TILED_THIN1;
-        }
-        else
-        {
-            static const UINT_32 PrtTileBytes = 0x10000;
-            // First prt thick tile index in the tile mode table
-            static const UINT_32 PrtThickTileIndex = 22;
-            ADDR_TILEINFO tileInfo = {0};
-
-            HwlComputeMacroModeIndex(PrtThickTileIndex,
-                                     pInOut->flags,
-                                     pInOut->bpp,
-                                     pInOut->numSamples,
-                                     &tileInfo);
-
-            UINT_32 macroTileBytes = ((pInOut->bpp) >> 3) * 64 * pInOut->numSamples *
-                                     thickness * HwlGetPipes(&tileInfo) *
-                                     tileInfo.banks * tileInfo.bankWidth *
-                                     tileInfo.bankHeight;
-
-            if (macroTileBytes <= PrtTileBytes)
-            {
-                tileMode = ADDR_TM_PRT_TILED_THICK;
-            }
-            else
+            if ((pInOut->numSlices > 1) && ((pInOut->maxBaseAlign == 0) || (pInOut->maxBaseAlign >= Block64K)))
             {
-                tileMode = ADDR_TM_PRT_TILED_THIN1;
+                UINT_32 thickness = Thickness(tileMode);
+
+                if (thickness == 1)
+                {
+                    tileMode = ADDR_TM_PRT_TILED_THIN1;
+                }
+                else
+                {
+                    static const UINT_32 PrtTileBytes = 0x10000;
+                    // First prt thick tile index in the tile mode table
+                    static const UINT_32 PrtThickTileIndex = 22;
+                    ADDR_TILEINFO tileInfo = {0};
+
+                    HwlComputeMacroModeIndex(PrtThickTileIndex,
+                                             pInOut->flags,
+                                             pInOut->bpp,
+                                             pInOut->numSamples,
+                                             &tileInfo);
+
+                    UINT_32 macroTileBytes = ((pInOut->bpp) >> 3) * 64 * pInOut->numSamples *
+                                             thickness * HwlGetPipes(&tileInfo) *
+                                             tileInfo.banks * tileInfo.bankWidth *
+                                             tileInfo.bankHeight;
+
+                    if (macroTileBytes <= PrtTileBytes)
+                    {
+                        tileMode = ADDR_TM_PRT_TILED_THICK;
+                    }
+                    else
+                    {
+                        tileMode = ADDR_TM_PRT_TILED_THIN1;
+                    }
+                }
             }
         }
+
+        if (pInOut->maxBaseAlign != 0)
+        {
+            pInOut->flags.dccCompatible = FALSE;
+        }
     }
 
     if (tileMode != pInOut->tileMode)
index c1f6dac..d3e9486 100644 (file)
@@ -67,6 +67,43 @@ Lib* SiHwlInit(const Client* pClient)
 namespace V1
 {
 
+// We don't support MSAA for equation
+const BOOL_32 SiLib::m_EquationSupport[SiLib::TileTableSize][SiLib::MaxNumElementBytes] =
+{
+    {TRUE,  TRUE,  TRUE,  FALSE, FALSE},    //  0, non-AA compressed depth or any stencil
+    {FALSE, FALSE, FALSE, FALSE, FALSE},    //  1, 2xAA/4xAA compressed depth with or without stencil
+    {FALSE, FALSE, FALSE, FALSE, FALSE},    //  2, 8xAA compressed depth with or without stencil
+    {FALSE, TRUE,  FALSE, FALSE, FALSE},    //  3, 16 bpp depth PRT (non-MSAA), don't support uncompressed depth
+    {TRUE,  TRUE,  TRUE,  FALSE, FALSE},    //  4, 1D depth
+    {FALSE, FALSE, FALSE, FALSE, FALSE},    //  5, 16 bpp depth PRT (4xMSAA)
+    {FALSE, FALSE, TRUE,  FALSE, FALSE},    //  6, 32 bpp depth PRT (non-MSAA)
+    {FALSE, FALSE, FALSE, FALSE, FALSE},    //  7, 32 bpp depth PRT (4xMSAA)
+    {TRUE,  TRUE,  TRUE,  TRUE,  TRUE },    //  8, Linear
+    {TRUE,  TRUE,  TRUE,  TRUE,  TRUE },    //  9, 1D display
+    {TRUE,  FALSE, FALSE, FALSE, FALSE},    // 10, 8 bpp color (displayable)
+    {FALSE, TRUE,  FALSE, FALSE, FALSE},    // 11, 16 bpp color (displayable)
+    {FALSE, FALSE, TRUE,  TRUE,  FALSE},    // 12, 32/64 bpp color (displayable)
+    {TRUE,  TRUE,  TRUE,  TRUE,  TRUE },    // 13, 1D thin
+    {TRUE,  FALSE, FALSE, FALSE, FALSE},    // 14, 8 bpp color non-displayable
+    {FALSE, TRUE,  FALSE, FALSE, FALSE},    // 15, 16 bpp color non-displayable
+    {FALSE, FALSE, TRUE,  FALSE, FALSE},    // 16, 32 bpp color non-displayable
+    {FALSE, FALSE, FALSE, TRUE,  TRUE },    // 17, 64/128 bpp color non-displayable
+    {TRUE,  TRUE,  TRUE,  TRUE,  TRUE },    // 18, 1D THICK
+    {FALSE, FALSE, FALSE, FALSE, FALSE},    // 19, 2D XTHICK
+    {FALSE, FALSE, FALSE, FALSE, FALSE},    // 20, 2D THICK
+    {TRUE,  FALSE, FALSE, FALSE, FALSE},    // 21, 8 bpp 2D PRTs (non-MSAA)
+    {FALSE, TRUE,  FALSE, FALSE, FALSE},    // 22, 16 bpp 2D PRTs (non-MSAA)
+    {FALSE, FALSE, TRUE,  FALSE, FALSE},    // 23, 32 bpp 2D PRTs (non-MSAA)
+    {FALSE, FALSE, FALSE, TRUE,  FALSE},    // 24, 64 bpp 2D PRTs (non-MSAA)
+    {FALSE, FALSE, FALSE, FALSE, TRUE },    // 25, 128bpp 2D PRTs (non-MSAA)
+    {FALSE, FALSE, FALSE, FALSE, FALSE},    // 26, none
+    {FALSE, FALSE, FALSE, FALSE, FALSE},    // 27, none
+    {FALSE, FALSE, FALSE, FALSE, FALSE},    // 28, none
+    {FALSE, FALSE, FALSE, FALSE, FALSE},    // 29, none
+    {FALSE, FALSE, FALSE, FALSE, FALSE},    // 30, 64bpp 2D PRTs (4xMSAA)
+    {FALSE, FALSE, FALSE, FALSE, FALSE},    // 31, none
+};
+
 /**
 ****************************************************************************************************
 *   SiLib::SiLib
@@ -219,37 +256,132 @@ ADDR_E_RETURNCODE SiLib::ComputeBankEquation(
     switch (pTileInfo->banks)
     {
         case 16:
-            pEquation->addr[0] = y6;
-            pEquation->xor1[0] = x3;
-            pEquation->addr[1] = y5;
-            pEquation->xor1[1] = y6;
-            pEquation->xor2[1] = x4;
-            pEquation->addr[2] = y4;
-            pEquation->xor1[2] = x5;
-            pEquation->addr[3] = y3;
-            pEquation->xor1[3] = x6;
+            if (pTileInfo->macroAspectRatio == 1)
+            {
+                pEquation->addr[0] = y6;
+                pEquation->xor1[0] = x3;
+                pEquation->addr[1] = y5;
+                pEquation->xor1[1] = y6;
+                pEquation->xor2[1] = x4;
+                pEquation->addr[2] = y4;
+                pEquation->xor1[2] = x5;
+                pEquation->addr[3] = y3;
+                pEquation->xor1[3] = x6;
+            }
+            else if (pTileInfo->macroAspectRatio == 2)
+            {
+                pEquation->addr[0] = x3;
+                pEquation->xor1[0] = y6;
+                pEquation->addr[1] = y5;
+                pEquation->xor1[1] = y6;
+                pEquation->xor2[1] = x4;
+                pEquation->addr[2] = y4;
+                pEquation->xor1[2] = x5;
+                pEquation->addr[3] = y3;
+                pEquation->xor1[3] = x6;
+            }
+            else if (pTileInfo->macroAspectRatio == 4)
+            {
+                pEquation->addr[0] = x3;
+                pEquation->xor1[0] = y6;
+                pEquation->addr[1] = x4;
+                pEquation->xor1[1] = y5;
+                pEquation->xor2[1] = y6;
+                pEquation->addr[2] = y4;
+                pEquation->xor1[2] = x5;
+                pEquation->addr[3] = y3;
+                pEquation->xor1[3] = x6;
+            }
+            else if (pTileInfo->macroAspectRatio == 8)
+            {
+                pEquation->addr[0] = x3;
+                pEquation->xor1[0] = y6;
+                pEquation->addr[1] = x4;
+                pEquation->xor1[1] = y5;
+                pEquation->xor2[1] = y6;
+                pEquation->addr[2] = x5;
+                pEquation->xor1[2] = y4;
+                pEquation->addr[3] = y3;
+                pEquation->xor1[3] = x6;
+            }
+            else
+            {
+                ADDR_ASSERT_ALWAYS();
+            }
             pEquation->numBits = 4;
             break;
         case 8:
-            pEquation->addr[0] = y5;
-            pEquation->xor1[0] = x3;
-            pEquation->addr[1] = y4;
-            pEquation->xor1[1] = y5;
-            pEquation->xor2[1] = x4;
-            pEquation->addr[2] = y3;
-            pEquation->xor1[2] = x5;
+            if (pTileInfo->macroAspectRatio == 1)
+            {
+                pEquation->addr[0] = y5;
+                pEquation->xor1[0] = x3;
+                pEquation->addr[1] = y4;
+                pEquation->xor1[1] = y5;
+                pEquation->xor2[1] = x4;
+                pEquation->addr[2] = y3;
+                pEquation->xor1[2] = x5;
+            }
+            else if (pTileInfo->macroAspectRatio == 2)
+            {
+                pEquation->addr[0] = x3;
+                pEquation->xor1[0] = y5;
+                pEquation->addr[1] = y4;
+                pEquation->xor1[1] = y5;
+                pEquation->xor2[1] = x4;
+                pEquation->addr[2] = y3;
+                pEquation->xor1[2] = x5;
+            }
+            else if (pTileInfo->macroAspectRatio == 4)
+            {
+                pEquation->addr[0] = x3;
+                pEquation->xor1[0] = y5;
+                pEquation->addr[1] = x4;
+                pEquation->xor1[1] = y4;
+                pEquation->xor2[1] = y5;
+                pEquation->addr[2] = y3;
+                pEquation->xor1[2] = x5;
+            }
+            else
+            {
+                ADDR_ASSERT_ALWAYS();
+            }
             pEquation->numBits = 3;
             break;
         case 4:
-            pEquation->addr[0] = y4;
-            pEquation->xor1[0] = x3;
-            pEquation->addr[1] = y3;
-            pEquation->xor1[1] = x4;
+            if (pTileInfo->macroAspectRatio == 1)
+            {
+                pEquation->addr[0] = y4;
+                pEquation->xor1[0] = x3;
+                pEquation->addr[1] = y3;
+                pEquation->xor1[1] = x4;
+            }
+            else if (pTileInfo->macroAspectRatio == 2)
+            {
+                pEquation->addr[0] = x3;
+                pEquation->xor1[0] = y4;
+                pEquation->addr[1] = y3;
+                pEquation->xor1[1] = x4;
+            }
+            else
+            {
+                pEquation->addr[0] = x3;
+                pEquation->xor1[0] = y4;
+                pEquation->addr[1] = x4;
+                pEquation->xor1[1] = y3;
+            }
             pEquation->numBits = 2;
             break;
         case 2:
-            pEquation->addr[0] = y3;
-            pEquation->xor1[0] = x3;
+            if (pTileInfo->macroAspectRatio == 1)
+            {
+                pEquation->addr[0] = y3;
+                pEquation->xor1[0] = x3;
+            }
+            else
+            {
+                pEquation->addr[0] = x3;
+                pEquation->xor1[0] = y3;
+            }
             pEquation->numBits = 1;
             break;
         default:
@@ -2522,11 +2654,24 @@ ADDR_E_RETURNCODE SiLib::HwlComputeSurfaceInfo(
 
     UINT_32 tileIndex = static_cast<UINT_32>(pOut->tileIndex);
 
-    if ((pIn->flags.needEquation == TRUE) &&
+    if (((pIn->flags.needEquation   == TRUE) ||
+         (pIn->flags.preferEquation == TRUE)) &&
         (pIn->numSamples <= 1) &&
         (tileIndex < TileTableSize))
     {
-        pOut->equationIndex = m_equationLookupTable[Log2(pIn->bpp >> 3)][tileIndex];
+        static const UINT_32 SiUncompressDepthTileIndex = 3;
+
+        if ((pIn->flags.prt == FALSE) &&
+            (m_uncompressDepthEqIndex != 0) &&
+            (tileIndex == SiUncompressDepthTileIndex))
+        {
+            pOut->equationIndex = m_uncompressDepthEqIndex + Log2(pIn->bpp >> 3);
+        }
+        else
+        {
+
+            pOut->equationIndex = m_equationLookupTable[Log2(pIn->bpp >> 3)][tileIndex];
+        }
 
         if (pOut->equationIndex != ADDR_INVALID_EQUATION_INDEX)
         {
@@ -3157,8 +3302,6 @@ VOID SiLib::HwlOptimizeTileMode(
     {
         UINT_32 thickness = Thickness(tileMode);
 
-        pInOut->flags.prt = TRUE;
-
         if (thickness > 1)
         {
             tileMode = ADDR_TM_1D_TILED_THICK;
@@ -3449,7 +3592,7 @@ VOID SiLib::InitEquationTable()
             HwlComputeMacroModeIndex(tileIndex, flags, bpp, 1, &tileConfig.info, NULL, NULL);
 
             // Check if the input is supported
-            if (IsEquationSupported(bpp, tileConfig, tileIndex) == TRUE)
+            if (IsEquationSupported(bpp, tileConfig, tileIndex, log2ElementBytes) == TRUE)
             {
                 ADDR_EQUATION_KEY  key   = {{0}};
 
@@ -3461,10 +3604,12 @@ VOID SiLib::InitEquationTable()
                 key.fields.microTileType    = (tileConfig.type == ADDR_DEPTH_SAMPLE_ORDER) ?
                                               ADDR_NON_DISPLAYABLE : tileConfig.type;
                 key.fields.pipeConfig       = tileConfig.info.pipeConfig;
-                key.fields.numBanks         = tileConfig.info.banks;
+                key.fields.numBanksLog2     = Log2(tileConfig.info.banks);
                 key.fields.bankWidth        = tileConfig.info.bankWidth;
                 key.fields.bankHeight       = tileConfig.info.bankHeight;
                 key.fields.macroAspectRatio = tileConfig.info.macroAspectRatio;
+                key.fields.prt              = ((m_chipFamily == ADDR_CHIP_FAMILY_SI) &&
+                                               ((1 << tileIndex) & SiPrtTileIndexMask)) ? 1 : 0;
 
                 // Find in the table if the equation has been built based on the key
                 for (UINT_32 i = 0; i < m_numEquations; i++)
@@ -3528,7 +3673,7 @@ VOID SiLib::InitEquationTable()
                                 MicroTileHeight * pTileInfo->bankHeight * pTileInfo->banks /
                                 pTileInfo->macroAspectRatio;
 
-                            if (m_chipFamily == ADDR_CHIP_FAMILY_SI)
+                            if (key.fields.prt)
                             {
                                 UINT_32 macroTileSize =
                                     m_blockWidth[equationIndex] * m_blockHeight[equationIndex] *
@@ -3571,6 +3716,48 @@ VOID SiLib::InitEquationTable()
             // fill the invalid equation index
             m_equationLookupTable[log2ElementBytes][tileIndex] = equationIndex;
         }
+
+        if (m_chipFamily == ADDR_CHIP_FAMILY_SI)
+        {
+            // For tile index 3 which is shared between PRT depth and uncompressed depth
+            m_uncompressDepthEqIndex = m_numEquations;
+
+            for (UINT_32 log2ElemBytes = 0; log2ElemBytes < MaxNumElementBytes; log2ElemBytes++)
+            {
+                TileConfig        tileConfig = m_tileTable[3];
+                ADDR_EQUATION     equation;
+                ADDR_E_RETURNCODE retCode;
+
+                memset(&equation, 0, sizeof(ADDR_EQUATION));
+
+                retCode = ComputeMacroTileEquation(log2ElemBytes,
+                                                   tileConfig.mode,
+                                                   tileConfig.type,
+                                                   &tileConfig.info,
+                                                   &equation);
+
+                if (retCode == ADDR_OK)
+                {
+                    UINT_32 equationIndex = m_numEquations;
+                    ADDR_ASSERT(equationIndex < EquationTableSize);
+
+                    m_blockSlices[equationIndex] = 1;
+
+                    const ADDR_TILEINFO* pTileInfo = &tileConfig.info;
+
+                    m_blockWidth[equationIndex]  =
+                        HwlGetPipes(pTileInfo) * MicroTileWidth * pTileInfo->bankWidth *
+                        pTileInfo->macroAspectRatio;
+                    m_blockHeight[equationIndex] =
+                        MicroTileHeight * pTileInfo->bankHeight * pTileInfo->banks /
+                        pTileInfo->macroAspectRatio;
+
+                    m_equationTable[equationIndex] = equation;
+
+                    m_numEquations++;
+                }
+            }
+        }
     }
 }
 
@@ -3586,9 +3773,10 @@ VOID SiLib::InitEquationTable()
 ****************************************************************************************************
 */
 BOOL_32 SiLib::IsEquationSupported(
-    UINT_32    bpp,         ///< Bits per pixel
-    TileConfig tileConfig,  ///< Tile config
-    INT_32     tileIndex    ///< Tile index
+    UINT_32    bpp,             ///< Bits per pixel
+    TileConfig tileConfig,      ///< Tile config
+    INT_32     tileIndex,       ///< Tile index
+    UINT_32    elementBytesLog2 ///< Log2 of element bytes
     ) const
 {
     BOOL_32 supported = TRUE;
@@ -3624,24 +3812,7 @@ BOOL_32 SiLib::IsEquationSupported(
 
         if ((supported == TRUE) && (m_chipFamily == ADDR_CHIP_FAMILY_SI))
         {
-            // Please refer to SiLib::HwlSetupTileInfo for PRT tile index selecting
-            // Tile index 3, 6, 21-25 are for PRT single sample
-            if (tileIndex == 3)
-            {
-                supported = (bpp == 16);
-            }
-            else if (tileIndex == 6)
-            {
-                supported = (bpp == 32);
-            }
-            else if ((tileIndex >= 21) && (tileIndex <= 25))
-            {
-                supported = (bpp == 8u * (1u << (static_cast<UINT_32>(tileIndex) - 21u)));
-            }
-            else
-            {
-                supported = FALSE;
-            }
+            supported = m_EquationSupport[tileIndex][elementBytesLog2];
         }
     }
 
index 6daaa23..37e26ff 100644 (file)
@@ -261,7 +261,7 @@ protected:
 
     // Check if it is supported for given bpp and tile config to generate an equation
     BOOL_32 IsEquationSupported(
-        UINT_32 bpp, TileConfig tileConfig, INT_32 tileIndex) const;
+        UINT_32 bpp, TileConfig tileConfig, INT_32 tileIndex, UINT_32 elementBytesLog2) const;
 
     // Protected non-virtual functions
     VOID ComputeTileCoordFromPipeAndElemIdx(
@@ -289,10 +289,19 @@ protected:
 
     // Max number of bpp (8bpp/16bpp/32bpp/64bpp/128bpp)
     static const UINT_32    MaxNumElementBytes  = 5;
+
+    static const BOOL_32    m_EquationSupport[TileTableSize][MaxNumElementBytes];
+
+    // Prt tile mode index mask
+    static const UINT_32    SiPrtTileIndexMask = ((1 << 3)  | (1 << 5)  | (1 << 6)  | (1 << 7)  |
+                                                  (1 << 21) | (1 << 22) | (1 << 23) | (1 << 24) |
+                                                  (1 << 25) | (1 << 30));
+
     // More than half slots in tile mode table can't support equation
-    static const UINT_32    EquationTableSize = (MaxNumElementBytes * TileTableSize) / 2;
+    static const UINT_32    EquationTableSize   = (MaxNumElementBytes * TileTableSize) / 2;
     // Equation table
     ADDR_EQUATION           m_equationTable[EquationTableSize];
+    UINT_32                 m_numMacroBits[EquationTableSize];
     UINT_32                 m_blockWidth[EquationTableSize];
     UINT_32                 m_blockHeight[EquationTableSize];
     UINT_32                 m_blockSlices[EquationTableSize];
@@ -301,6 +310,8 @@ protected:
     // Equation lookup table according to bpp and tile index
     UINT_32                 m_equationLookupTable[MaxNumElementBytes][TileTableSize];
 
+    UINT_32                 m_uncompressDepthEqIndex;
+
 private:
 
     VOID ReadGbTileMode(UINT_32 regValue, TileConfig* pCfg) const;