Only the render backends of the first shader engine were enabled. The others
were erroneously disabled. Enabling the other render backends improves
performance a lot.
Unigine Sanctuary on Bonaire:
Before: 15 fps
After: 90 fps
Judging from the fan noise, the GPU was also underclocked when the other
render backends were disabled, resulting in horrible performance. The fan is
a lot noisy under load now.
Signed-off-by: Marek Olšák <marek.olsak@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Cc: stable@vger.kernel.org
* Returns the disabled RB bitmask.
*/
static u32 cik_get_rb_disabled(struct radeon_device *rdev,
* Returns the disabled RB bitmask.
*/
static u32 cik_get_rb_disabled(struct radeon_device *rdev,
- u32 max_rb_num, u32 se_num,
u32 sh_per_se)
{
u32 data, mask;
u32 sh_per_se)
{
u32 data, mask;
data >>= BACKEND_DISABLE_SHIFT;
data >>= BACKEND_DISABLE_SHIFT;
- mask = cik_create_bitmask(max_rb_num / se_num / sh_per_se);
+ mask = cik_create_bitmask(max_rb_num_per_se / sh_per_se);
*/
static void cik_setup_rb(struct radeon_device *rdev,
u32 se_num, u32 sh_per_se,
*/
static void cik_setup_rb(struct radeon_device *rdev,
u32 se_num, u32 sh_per_se,
{
int i, j;
u32 data, mask;
{
int i, j;
u32 data, mask;
for (i = 0; i < se_num; i++) {
for (j = 0; j < sh_per_se; j++) {
cik_select_se_sh(rdev, i, j);
for (i = 0; i < se_num; i++) {
for (j = 0; j < sh_per_se; j++) {
cik_select_se_sh(rdev, i, j);
- data = cik_get_rb_disabled(rdev, max_rb_num, se_num, sh_per_se);
+ data = cik_get_rb_disabled(rdev, max_rb_num_per_se, sh_per_se);
if (rdev->family == CHIP_HAWAII)
disabled_rbs |= data << ((i * sh_per_se + j) * HAWAII_RB_BITMAP_WIDTH_PER_SH);
else
if (rdev->family == CHIP_HAWAII)
disabled_rbs |= data << ((i * sh_per_se + j) * HAWAII_RB_BITMAP_WIDTH_PER_SH);
else
cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
mask = 1;
cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
mask = 1;
- for (i = 0; i < max_rb_num; i++) {
+ for (i = 0; i < max_rb_num_per_se * se_num; i++) {
if (!(disabled_rbs & mask))
enabled_rbs |= mask;
mask <<= 1;
if (!(disabled_rbs & mask))
enabled_rbs |= mask;
mask <<= 1;
}
static u32 si_get_rb_disabled(struct radeon_device *rdev,
}
static u32 si_get_rb_disabled(struct radeon_device *rdev,
- u32 max_rb_num, u32 se_num,
u32 sh_per_se)
{
u32 data, mask;
u32 sh_per_se)
{
u32 data, mask;
data >>= BACKEND_DISABLE_SHIFT;
data >>= BACKEND_DISABLE_SHIFT;
- mask = si_create_bitmask(max_rb_num / se_num / sh_per_se);
+ mask = si_create_bitmask(max_rb_num_per_se / sh_per_se);
return data & mask;
}
static void si_setup_rb(struct radeon_device *rdev,
u32 se_num, u32 sh_per_se,
return data & mask;
}
static void si_setup_rb(struct radeon_device *rdev,
u32 se_num, u32 sh_per_se,
{
int i, j;
u32 data, mask;
{
int i, j;
u32 data, mask;
for (i = 0; i < se_num; i++) {
for (j = 0; j < sh_per_se; j++) {
si_select_se_sh(rdev, i, j);
for (i = 0; i < se_num; i++) {
for (j = 0; j < sh_per_se; j++) {
si_select_se_sh(rdev, i, j);
- data = si_get_rb_disabled(rdev, max_rb_num, se_num, sh_per_se);
+ data = si_get_rb_disabled(rdev, max_rb_num_per_se, sh_per_se);
disabled_rbs |= data << ((i * sh_per_se + j) * TAHITI_RB_BITMAP_WIDTH_PER_SH);
}
}
si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
mask = 1;
disabled_rbs |= data << ((i * sh_per_se + j) * TAHITI_RB_BITMAP_WIDTH_PER_SH);
}
}
si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
mask = 1;
- for (i = 0; i < max_rb_num; i++) {
+ for (i = 0; i < max_rb_num_per_se * se_num; i++) {
if (!(disabled_rbs & mask))
enabled_rbs |= mask;
mask <<= 1;
if (!(disabled_rbs & mask))
enabled_rbs |= mask;
mask <<= 1;