LOCAL / GPU: ARM: add MALI R12P0_04REL0 drivers
[platform/kernel/linux-exynos.git] / drivers / gpu / arm / midgard / r12p0_04rel0 / mali_kbase_gpuprops.c
1 /*
2  *
3  * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved.
4  *
5  * This program is free software and is provided to you under the terms of the
6  * GNU General Public License version 2 as published by the Free Software
7  * Foundation, and any use by you of this program is subject to the terms
8  * of such GNU licence.
9  *
10  * A copy of the licence is included with the program, and can also be obtained
11  * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
12  * Boston, MA  02110-1301, USA.
13  *
14  */
15
16
17
18
19
20 /*
21  * Base kernel property query APIs
22  */
23
24 #include <mali_kbase.h>
25 #include <mali_midg_regmap.h>
26 #include <mali_kbase_gpuprops.h>
27 #include <mali_kbase_config_defaults.h>
28 #include <mali_kbase_hwaccess_gpuprops.h>
29 #include <linux/clk.h>
30
31 /**
32  * KBASE_UBFX32 - Extracts bits from a 32-bit bitfield.
33  * @value:  The value from which to extract bits.
34  * @offset: The first bit to extract (0 being the LSB).
35  * @size:   The number of bits to extract.
36  *
37  * Context: @offset + @size <= 32.
38  *
39  * Return: Bits [@offset, @offset + @size) from @value.
40  */
41 /* from mali_cdsb.h */
42 #define KBASE_UBFX32(value, offset, size) \
43         (((u32)(value) >> (u32)(offset)) & (u32)((1ULL << (u32)(size)) - 1))
44
45 int kbase_gpuprops_uk_get_props(struct kbase_context *kctx, struct kbase_uk_gpuprops * const kbase_props)
46 {
47         kbase_gpu_clk_speed_func get_gpu_speed_mhz;
48         u32 gpu_speed_mhz;
49         int rc = 1;
50
51         KBASE_DEBUG_ASSERT(NULL != kctx);
52         KBASE_DEBUG_ASSERT(NULL != kbase_props);
53
54         /* Current GPU speed is requested from the system integrator via the GPU_SPEED_FUNC function.
55          * If that function fails, or the function is not provided by the system integrator, we report the maximum
56          * GPU speed as specified by GPU_FREQ_KHZ_MAX.
57          */
58         get_gpu_speed_mhz = (kbase_gpu_clk_speed_func) GPU_SPEED_FUNC;
59         if (get_gpu_speed_mhz != NULL) {
60                 rc = get_gpu_speed_mhz(&gpu_speed_mhz);
61 #ifdef CONFIG_MALI_DEBUG
62                 /* Issue a warning message when the reported GPU speed falls outside the min/max range */
63                 if (rc == 0) {
64                         u32 gpu_speed_khz = gpu_speed_mhz * 1000;
65
66                         if (gpu_speed_khz < kctx->kbdev->gpu_props.props.core_props.gpu_freq_khz_min ||
67                                         gpu_speed_khz > kctx->kbdev->gpu_props.props.core_props.gpu_freq_khz_max)
68                                 dev_warn(kctx->kbdev->dev, "GPU Speed is outside of min/max range (got %lu Khz, min %lu Khz, max %lu Khz)\n",
69                                                 (unsigned long)gpu_speed_khz,
70                                                 (unsigned long)kctx->kbdev->gpu_props.props.core_props.gpu_freq_khz_min,
71                                                 (unsigned long)kctx->kbdev->gpu_props.props.core_props.gpu_freq_khz_max);
72                 }
73 #endif                          /* CONFIG_MALI_DEBUG */
74         }
75         if (kctx->kbdev->clock) {
76                 gpu_speed_mhz = clk_get_rate(kctx->kbdev->clock) / 1000000;
77                 rc = 0;
78         }
79         if (rc != 0)
80                 gpu_speed_mhz = kctx->kbdev->gpu_props.props.core_props.gpu_freq_khz_max / 1000;
81
82         kctx->kbdev->gpu_props.props.core_props.gpu_speed_mhz = gpu_speed_mhz;
83
84         memcpy(&kbase_props->props, &kctx->kbdev->gpu_props.props, sizeof(kbase_props->props));
85
86         /* Before API 8.2 they expect L3 cache info here, which was always 0 */
87         if (kctx->api_version < KBASE_API_VERSION(8, 2))
88                 kbase_props->props.raw_props.suspend_size = 0;
89
90         return 0;
91 }
92
93 static void kbase_gpuprops_construct_coherent_groups(base_gpu_props * const props)
94 {
95         struct mali_base_gpu_coherent_group *current_group;
96         u64 group_present;
97         u64 group_mask;
98         u64 first_set, first_set_prev;
99         u32 num_groups = 0;
100
101         KBASE_DEBUG_ASSERT(NULL != props);
102
103         props->coherency_info.coherency = props->raw_props.mem_features;
104         props->coherency_info.num_core_groups = hweight64(props->raw_props.l2_present);
105
106         if (props->coherency_info.coherency & GROUPS_L2_COHERENT) {
107                 /* Group is l2 coherent */
108                 group_present = props->raw_props.l2_present;
109         } else {
110                 /* Group is l1 coherent */
111                 group_present = props->raw_props.shader_present;
112         }
113
114         /*
115          * The coherent group mask can be computed from the l2 present
116          * register.
117          *
118          * For the coherent group n:
119          * group_mask[n] = (first_set[n] - 1) & ~(first_set[n-1] - 1)
120          * where first_set is group_present with only its nth set-bit kept
121          * (i.e. the position from where a new group starts).
122          *
123          * For instance if the groups are l2 coherent and l2_present=0x0..01111:
124          * The first mask is:
125          * group_mask[1] = (first_set[1] - 1) & ~(first_set[0] - 1)
126          *               = (0x0..010     - 1) & ~(0x0..01      - 1)
127          *               =  0x0..00f
128          * The second mask is:
129          * group_mask[2] = (first_set[2] - 1) & ~(first_set[1] - 1)
130          *               = (0x0..100     - 1) & ~(0x0..010     - 1)
131          *               =  0x0..0f0
132          * And so on until all the bits from group_present have been cleared
133          * (i.e. there is no group left).
134          */
135
136         current_group = props->coherency_info.group;
137         first_set = group_present & ~(group_present - 1);
138
139         while (group_present != 0 && num_groups < BASE_MAX_COHERENT_GROUPS) {
140                 group_present -= first_set;     /* Clear the current group bit */
141                 first_set_prev = first_set;
142
143                 first_set = group_present & ~(group_present - 1);
144                 group_mask = (first_set - 1) & ~(first_set_prev - 1);
145
146                 /* Populate the coherent_group structure for each group */
147                 current_group->core_mask = group_mask & props->raw_props.shader_present;
148                 current_group->num_cores = hweight64(current_group->core_mask);
149
150                 num_groups++;
151                 current_group++;
152         }
153
154         if (group_present != 0)
155                 pr_warn("Too many coherent groups (keeping only %d groups).\n", BASE_MAX_COHERENT_GROUPS);
156
157         props->coherency_info.num_groups = num_groups;
158 }
159
160 /**
161  * kbase_gpuprops_get_props - Get the GPU configuration
162  * @gpu_props: The &base_gpu_props structure
163  * @kbdev: The &struct kbase_device structure for the device
164  *
165  * Fill the &base_gpu_props structure with values from the GPU configuration
166  * registers. Only the raw properties are filled in this function
167  */
168 static void kbase_gpuprops_get_props(base_gpu_props * const gpu_props, struct kbase_device *kbdev)
169 {
170         struct kbase_gpuprops_regdump regdump;
171         int i;
172
173         KBASE_DEBUG_ASSERT(NULL != kbdev);
174         KBASE_DEBUG_ASSERT(NULL != gpu_props);
175
176         /* Dump relevant registers */
177         kbase_backend_gpuprops_get(kbdev, &regdump);
178
179         gpu_props->raw_props.gpu_id = regdump.gpu_id;
180         gpu_props->raw_props.tiler_features = regdump.tiler_features;
181         gpu_props->raw_props.mem_features = regdump.mem_features;
182         gpu_props->raw_props.mmu_features = regdump.mmu_features;
183         gpu_props->raw_props.l2_features = regdump.l2_features;
184         gpu_props->raw_props.suspend_size = regdump.suspend_size;
185
186         gpu_props->raw_props.as_present = regdump.as_present;
187         gpu_props->raw_props.js_present = regdump.js_present;
188         gpu_props->raw_props.shader_present = ((u64) regdump.shader_present_hi << 32) + regdump.shader_present_lo;
189         gpu_props->raw_props.tiler_present = ((u64) regdump.tiler_present_hi << 32) + regdump.tiler_present_lo;
190         gpu_props->raw_props.l2_present = ((u64) regdump.l2_present_hi << 32) + regdump.l2_present_lo;
191
192         for (i = 0; i < GPU_MAX_JOB_SLOTS; i++)
193                 gpu_props->raw_props.js_features[i] = regdump.js_features[i];
194
195         for (i = 0; i < BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS; i++)
196                 gpu_props->raw_props.texture_features[i] = regdump.texture_features[i];
197
198         gpu_props->raw_props.thread_max_barrier_size = regdump.thread_max_barrier_size;
199         gpu_props->raw_props.thread_max_threads = regdump.thread_max_threads;
200         gpu_props->raw_props.thread_max_workgroup_size = regdump.thread_max_workgroup_size;
201         gpu_props->raw_props.thread_features = regdump.thread_features;
202 }
203
204 /**
205  * kbase_gpuprops_calculate_props - Calculate the derived properties
206  * @gpu_props: The &base_gpu_props structure
207  * @kbdev:     The &struct kbase_device structure for the device
208  *
209  * Fill the &base_gpu_props structure with values derived from the GPU
210  * configuration registers
211  */
212 static void kbase_gpuprops_calculate_props(base_gpu_props * const gpu_props, struct kbase_device *kbdev)
213 {
214         int i;
215
216         /* Populate the base_gpu_props structure */
217         gpu_props->core_props.version_status = KBASE_UBFX32(gpu_props->raw_props.gpu_id, 0U, 4);
218         gpu_props->core_props.minor_revision = KBASE_UBFX32(gpu_props->raw_props.gpu_id, 4U, 8);
219         gpu_props->core_props.major_revision = KBASE_UBFX32(gpu_props->raw_props.gpu_id, 12U, 4);
220         gpu_props->core_props.product_id = KBASE_UBFX32(gpu_props->raw_props.gpu_id, 16U, 16);
221         gpu_props->core_props.log2_program_counter_size = KBASE_GPU_PC_SIZE_LOG2;
222         gpu_props->core_props.gpu_available_memory_size = totalram_pages << PAGE_SHIFT;
223
224         for (i = 0; i < BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS; i++)
225                 gpu_props->core_props.texture_features[i] = gpu_props->raw_props.texture_features[i];
226
227         gpu_props->l2_props.log2_line_size = KBASE_UBFX32(gpu_props->raw_props.l2_features, 0U, 8);
228         gpu_props->l2_props.log2_cache_size = KBASE_UBFX32(gpu_props->raw_props.l2_features, 16U, 8);
229
230         /* Field with number of l2 slices is added to MEM_FEATURES register
231          * since t76x. Below code assumes that for older GPU reserved bits will
232          * be read as zero. */
233         gpu_props->l2_props.num_l2_slices =
234                 KBASE_UBFX32(gpu_props->raw_props.mem_features, 8U, 4) + 1;
235
236         gpu_props->tiler_props.bin_size_bytes = 1 << KBASE_UBFX32(gpu_props->raw_props.tiler_features, 0U, 6);
237         gpu_props->tiler_props.max_active_levels = KBASE_UBFX32(gpu_props->raw_props.tiler_features, 8U, 4);
238
239         if (gpu_props->raw_props.thread_max_threads == 0)
240                 gpu_props->thread_props.max_threads = THREAD_MT_DEFAULT;
241         else
242                 gpu_props->thread_props.max_threads = gpu_props->raw_props.thread_max_threads;
243
244         if (gpu_props->raw_props.thread_max_workgroup_size == 0)
245                 gpu_props->thread_props.max_workgroup_size = THREAD_MWS_DEFAULT;
246         else
247                 gpu_props->thread_props.max_workgroup_size = gpu_props->raw_props.thread_max_workgroup_size;
248
249         if (gpu_props->raw_props.thread_max_barrier_size == 0)
250                 gpu_props->thread_props.max_barrier_size = THREAD_MBS_DEFAULT;
251         else
252                 gpu_props->thread_props.max_barrier_size = gpu_props->raw_props.thread_max_barrier_size;
253
254         gpu_props->thread_props.max_registers = KBASE_UBFX32(gpu_props->raw_props.thread_features, 0U, 16);
255         gpu_props->thread_props.max_task_queue = KBASE_UBFX32(gpu_props->raw_props.thread_features, 16U, 8);
256         gpu_props->thread_props.max_thread_group_split = KBASE_UBFX32(gpu_props->raw_props.thread_features, 24U, 6);
257         gpu_props->thread_props.impl_tech = KBASE_UBFX32(gpu_props->raw_props.thread_features, 30U, 2);
258
259         /* If values are not specified, then use defaults */
260         if (gpu_props->thread_props.max_registers == 0) {
261                 gpu_props->thread_props.max_registers = THREAD_MR_DEFAULT;
262                 gpu_props->thread_props.max_task_queue = THREAD_MTQ_DEFAULT;
263                 gpu_props->thread_props.max_thread_group_split = THREAD_MTGS_DEFAULT;
264         }
265         /* Initialize the coherent_group structure for each group */
266         kbase_gpuprops_construct_coherent_groups(gpu_props);
267 }
268
269 void kbase_gpuprops_set(struct kbase_device *kbdev)
270 {
271         struct kbase_gpu_props *gpu_props;
272         struct gpu_raw_gpu_props *raw;
273
274         KBASE_DEBUG_ASSERT(NULL != kbdev);
275         gpu_props = &kbdev->gpu_props;
276         raw = &gpu_props->props.raw_props;
277
278         /* Initialize the base_gpu_props structure from the hardware */
279         kbase_gpuprops_get_props(&gpu_props->props, kbdev);
280
281         /* Populate the derived properties */
282         kbase_gpuprops_calculate_props(&gpu_props->props, kbdev);
283
284         /* Populate kbase-only fields */
285         gpu_props->l2_props.associativity = KBASE_UBFX32(raw->l2_features, 8U, 8);
286         gpu_props->l2_props.external_bus_width = KBASE_UBFX32(raw->l2_features, 24U, 8);
287
288         gpu_props->mem.core_group = KBASE_UBFX32(raw->mem_features, 0U, 1);
289
290         gpu_props->mmu.va_bits = KBASE_UBFX32(raw->mmu_features, 0U, 8);
291         gpu_props->mmu.pa_bits = KBASE_UBFX32(raw->mmu_features, 8U, 8);
292
293         gpu_props->num_cores = hweight64(raw->shader_present);
294         gpu_props->num_core_groups = hweight64(raw->l2_present);
295         gpu_props->num_address_spaces = hweight32(raw->as_present);
296         gpu_props->num_job_slots = hweight32(raw->js_present);
297 }
298
299 void kbase_gpuprops_set_features(struct kbase_device *kbdev)
300 {
301         base_gpu_props *gpu_props;
302         struct kbase_gpuprops_regdump regdump;
303
304         gpu_props = &kbdev->gpu_props.props;
305
306         /* Dump relevant registers */
307         kbase_backend_gpuprops_get_features(kbdev, &regdump);
308
309         /*
310          * Copy the raw value from the register, later this will get turned
311          * into the selected coherency mode.
312          */
313         gpu_props->raw_props.coherency_mode = regdump.coherency_features;
314 }