drm/radeon: add query for number of active CUs
authorAlex Deucher <alexander.deucher@amd.com>
Mon, 2 Jun 2014 20:13:21 +0000 (16:13 -0400)
committerAlex Deucher <alexander.deucher@amd.com>
Tue, 10 Jun 2014 02:06:55 +0000 (22:06 -0400)
Query to find out how many compute units on a GPU.
Useful for OpenCL usermode drivers.

Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/radeon/cik.c
drivers/gpu/drm/radeon/evergreen.c
drivers/gpu/drm/radeon/ni.c
drivers/gpu/drm/radeon/r600.c
drivers/gpu/drm/radeon/radeon.h
drivers/gpu/drm/radeon/radeon_drv.c
drivers/gpu/drm/radeon/radeon_kms.c
drivers/gpu/drm/radeon/rv770.c
drivers/gpu/drm/radeon/si.c
include/uapi/drm/radeon_drm.h

index e4b2f2b..dcd4518 100644 (file)
@@ -80,6 +80,7 @@ extern int sumo_rlc_init(struct radeon_device *rdev);
 extern void si_vram_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc);
 extern void si_rlc_reset(struct radeon_device *rdev);
 extern void si_init_uvd_internal_cg(struct radeon_device *rdev);
+static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh);
 extern int cik_sdma_resume(struct radeon_device *rdev);
 extern void cik_sdma_enable(struct radeon_device *rdev, bool enable);
 extern void cik_sdma_fini(struct radeon_device *rdev);
@@ -3257,7 +3258,7 @@ static void cik_gpu_init(struct radeon_device *rdev)
        u32 mc_shared_chmap, mc_arb_ramcfg;
        u32 hdp_host_path_cntl;
        u32 tmp;
-       int i, j;
+       int i, j, k;
 
        switch (rdev->family) {
        case CHIP_BONAIRE:
@@ -3446,6 +3447,15 @@ static void cik_gpu_init(struct radeon_device *rdev)
                     rdev->config.cik.max_sh_per_se,
                     rdev->config.cik.max_backends_per_se);
 
+       for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
+               for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
+                       for (k = 0; k < rdev->config.cik.max_cu_per_sh; k++) {
+                               rdev->config.cik.active_cus +=
+                                       hweight32(cik_get_cu_active_bitmap(rdev, i, j));
+                       }
+               }
+       }
+
        /* set HW defaults for 3D engine */
        WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
 
index 653eff8..e2f6052 100644 (file)
@@ -3337,6 +3337,18 @@ static void evergreen_gpu_init(struct radeon_device *rdev)
                        disabled_rb_mask &= ~(1 << i);
        }
 
+       for (i = 0; i < rdev->config.evergreen.num_ses; i++) {
+               u32 simd_disable_bitmap;
+
+               WREG32(GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES | SE_INDEX(i));
+               WREG32(RLC_GFX_INDEX, INSTANCE_BROADCAST_WRITES | SE_INDEX(i));
+               simd_disable_bitmap = (RREG32(CC_GC_SHADER_PIPE_CONFIG) & 0xffff0000) >> 16;
+               simd_disable_bitmap |= 0xffffffff << rdev->config.evergreen.max_simds;
+               tmp <<= 16;
+               tmp |= simd_disable_bitmap;
+       }
+       rdev->config.evergreen.active_simds = hweight32(~tmp);
+
        WREG32(GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES | SE_BROADCAST_WRITES);
        WREG32(RLC_GFX_INDEX, INSTANCE_BROADCAST_WRITES | SE_BROADCAST_WRITES);
 
index c0fd8f6..5a33ca6 100644 (file)
@@ -1057,6 +1057,18 @@ static void cayman_gpu_init(struct radeon_device *rdev)
                        disabled_rb_mask &= ~(1 << i);
        }
 
+       for (i = 0; i < rdev->config.cayman.max_shader_engines; i++) {
+               u32 simd_disable_bitmap;
+
+               WREG32(GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES | SE_INDEX(i));
+               WREG32(RLC_GFX_INDEX, INSTANCE_BROADCAST_WRITES | SE_INDEX(i));
+               simd_disable_bitmap = (RREG32(CC_GC_SHADER_PIPE_CONFIG) & 0xffff0000) >> 16;
+               simd_disable_bitmap |= 0xffffffff << rdev->config.cayman.max_simds_per_se;
+               tmp <<= 16;
+               tmp |= simd_disable_bitmap;
+       }
+       rdev->config.cayman.active_simds = hweight32(~tmp);
+
        WREG32(GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES | SE_BROADCAST_WRITES);
        WREG32(RLC_GFX_INDEX, INSTANCE_BROADCAST_WRITES | SE_BROADCAST_WRITES);
 
index c2ff17c..c66952d 100644 (file)
@@ -1958,6 +1958,9 @@ static void r600_gpu_init(struct radeon_device *rdev)
        if (tmp < rdev->config.r600.max_simds) {
                rdev->config.r600.max_simds = tmp;
        }
+       tmp = rdev->config.r600.max_simds -
+               r600_count_pipe_bits((cc_gc_shader_pipe_config >> 16) & R6XX_MAX_SIMDS_MASK);
+       rdev->config.r600.active_simds = tmp;
 
        disabled_rb_mask = (RREG32(CC_RB_BACKEND_DISABLE) >> 16) & R6XX_MAX_BACKENDS_MASK;
        tmp = (tiling_config & PIPE_TILING__MASK) >> PIPE_TILING__SHIFT;
index dd77111..4b0bbf8 100644 (file)
@@ -1932,6 +1932,7 @@ struct r600_asic {
        unsigned                tiling_group_size;
        unsigned                tile_config;
        unsigned                backend_map;
+       unsigned                active_simds;
 };
 
 struct rv770_asic {
@@ -1957,6 +1958,7 @@ struct rv770_asic {
        unsigned                tiling_group_size;
        unsigned                tile_config;
        unsigned                backend_map;
+       unsigned                active_simds;
 };
 
 struct evergreen_asic {
@@ -1983,6 +1985,7 @@ struct evergreen_asic {
        unsigned tiling_group_size;
        unsigned tile_config;
        unsigned backend_map;
+       unsigned active_simds;
 };
 
 struct cayman_asic {
@@ -2021,6 +2024,7 @@ struct cayman_asic {
        unsigned multi_gpu_tile_size;
 
        unsigned tile_config;
+       unsigned active_simds;
 };
 
 struct si_asic {
@@ -2051,6 +2055,7 @@ struct si_asic {
 
        unsigned tile_config;
        uint32_t tile_mode_array[32];
+       uint32_t active_cus;
 };
 
 struct cik_asic {
@@ -2082,6 +2087,7 @@ struct cik_asic {
        unsigned tile_config;
        uint32_t tile_mode_array[32];
        uint32_t macrotile_mode_array[16];
+       uint32_t active_cus;
 };
 
 union radeon_asic_config {
index b7a2ec2..6e30174 100644 (file)
  *   2.37.0 - allow GS ring setup on r6xx/r7xx
  *   2.38.0 - RADEON_GEM_OP (GET_INITIAL_DOMAIN, SET_INITIAL_DOMAIN),
  *            CIK: 1D and linear tiling modes contain valid PIPE_CONFIG
+ *   2.39.0 - Add INFO query for number of active CUs
  */
 #define KMS_DRIVER_MAJOR       2
-#define KMS_DRIVER_MINOR       38
+#define KMS_DRIVER_MINOR       39
 #define KMS_DRIVER_PATCHLEVEL  0
 int radeon_driver_load_kms(struct drm_device *dev, unsigned long flags);
 int radeon_driver_unload_kms(struct drm_device *dev);
index eaaedba..5cd70f9 100644 (file)
@@ -513,6 +513,22 @@ static int radeon_info_ioctl(struct drm_device *dev, void *data, struct drm_file
                value_size = sizeof(uint64_t);
                value64 = atomic64_read(&rdev->gtt_usage);
                break;
+       case RADEON_INFO_ACTIVE_CU_COUNT:
+               if (rdev->family >= CHIP_BONAIRE)
+                       *value = rdev->config.cik.active_cus;
+               else if (rdev->family >= CHIP_TAHITI)
+                       *value = rdev->config.si.active_cus;
+               else if (rdev->family >= CHIP_CAYMAN)
+                       *value = rdev->config.cayman.active_simds;
+               else if (rdev->family >= CHIP_CEDAR)
+                       *value = rdev->config.evergreen.active_simds;
+               else if (rdev->family >= CHIP_RV770)
+                       *value = rdev->config.rv770.active_simds;
+               else if (rdev->family >= CHIP_R600)
+                       *value = rdev->config.r600.active_simds;
+               else
+                       *value = 1;
+               break;
        default:
                DRM_DEBUG_KMS("Invalid request %d\n", info->request);
                return -EINVAL;
index 97b7766..da8703d 100644 (file)
@@ -1327,6 +1327,9 @@ static void rv770_gpu_init(struct radeon_device *rdev)
        if (tmp < rdev->config.rv770.max_simds) {
                rdev->config.rv770.max_simds = tmp;
        }
+       tmp = rdev->config.rv770.max_simds -
+               r600_count_pipe_bits((cc_gc_shader_pipe_config >> 16) & R7XX_MAX_SIMDS_MASK);
+       rdev->config.rv770.active_simds = tmp;
 
        switch (rdev->config.rv770.max_tile_pipes) {
        case 1:
index ec13e8d..730cee2 100644 (file)
@@ -71,6 +71,7 @@ MODULE_FIRMWARE("radeon/HAINAN_mc2.bin");
 MODULE_FIRMWARE("radeon/HAINAN_rlc.bin");
 MODULE_FIRMWARE("radeon/HAINAN_smc.bin");
 
+static u32 si_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh);
 static void si_pcie_gen3_enable(struct radeon_device *rdev);
 static void si_program_aspm(struct radeon_device *rdev);
 extern void sumo_rlc_fini(struct radeon_device *rdev);
@@ -2900,7 +2901,7 @@ static void si_gpu_init(struct radeon_device *rdev)
        u32 sx_debug_1;
        u32 hdp_host_path_cntl;
        u32 tmp;
-       int i, j;
+       int i, j, k;
 
        switch (rdev->family) {
        case CHIP_TAHITI:
@@ -3098,6 +3099,14 @@ static void si_gpu_init(struct radeon_device *rdev)
                     rdev->config.si.max_sh_per_se,
                     rdev->config.si.max_cu_per_sh);
 
+       for (i = 0; i < rdev->config.si.max_shader_engines; i++) {
+               for (j = 0; j < rdev->config.si.max_sh_per_se; j++) {
+                       for (k = 0; k < rdev->config.si.max_cu_per_sh; k++) {
+                               rdev->config.si.active_cus +=
+                                       hweight32(si_get_cu_active_bitmap(rdev, i, j));
+                       }
+               }
+       }
 
        /* set HW defaults for 3D engine */
        WREG32(CP_QUEUE_THRESHOLDS, (ROQ_IB1_START(0x16) |
index aefa2f6..1cc0b61 100644 (file)
@@ -1007,7 +1007,7 @@ struct drm_radeon_cs {
 #define RADEON_INFO_NUM_BYTES_MOVED    0x1d
 #define RADEON_INFO_VRAM_USAGE         0x1e
 #define RADEON_INFO_GTT_USAGE          0x1f
-
+#define RADEON_INFO_ACTIVE_CU_COUNT    0x20
 
 struct drm_radeon_info {
        uint32_t                request;