Initial commit
[kernel/linux-3.0.git] / drivers / gpu / vithar / kbase / src / common / mali_kbase_gpuprops.c
1 /*
2  *
3  * (C) COPYRIGHT 2011-2012 ARM Limited. All rights reserved.
4  *
5  * This program is free software and is provided to you under the terms of the GNU General Public License version 2
6  * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
7  * 
8  * A copy of the licence is included with the program, and can also be obtained from Free Software
9  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
10  * 
11  */
12
13
14
15 /**
16  * @file mali_kbase_gpuprops.c
17  * Base kernel property query APIs
18  */
19
20 #include <kbase/src/common/mali_kbase.h>
21 #include <kbase/src/common/mali_midg_regmap.h>
22 #include <kbase/src/common/mali_kbase_gpuprops.h>
23
24 /**
25  * @brief Extracts bits from a 32-bit bitfield.
26  * @hideinitializer
27  *
28  * @param[in]    value       The value from which to extract bits.
29  * @param[in]    offset      The first bit to extract (0 being the LSB).
30  * @param[in]    size        The number of bits to extract.
31  * @return                   Bits [@a offset, @a offset + @a size) from @a value.
32  *
33  * @pre offset + size <= 32.
34  */
35 /* from mali_cdsb.h */
36 #define KBASE_UBFX32(value, offset, size) \
37         (((u32)(value) >> (u32)(offset)) & (u32)((1ULL << (u32)(size)) - 1))
38
39 mali_error kbase_gpuprops_uk_get_props(kbase_context *kctx, kbase_uk_gpuprops * kbase_props)
40 {
41         OSK_ASSERT(NULL != kctx);
42         OSK_ASSERT(NULL != kbase_props);
43
44         OSK_MEMCPY(&kbase_props->props, &kctx->kbdev->gpu_props.props, sizeof(kbase_props->props));
45
46         return MALI_ERROR_NONE;
47 }
48
49 STATIC void kbase_gpuprops_dump_registers(kbase_device * kbdev, kbase_gpuprops_regdump * regdump)
50 {
51         int i;
52
53         OSK_ASSERT(NULL != kbdev);
54         OSK_ASSERT(NULL != regdump);
55
56         /* Ensure that the GPU is powered */
57         kbase_pm_context_active(kbdev);
58
59         /* Fill regdump with the content of the relevant registers */
60
61         regdump->gpu_id = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_ID), NULL);
62
63         regdump->l2_features = kbase_reg_read(kbdev, GPU_CONTROL_REG(L2_FEATURES), NULL);
64         regdump->l3_features = kbase_reg_read(kbdev, GPU_CONTROL_REG(L3_FEATURES), NULL);
65         regdump->tiler_features = kbase_reg_read(kbdev, GPU_CONTROL_REG(TILER_FEATURES), NULL);
66         regdump->mem_features = kbase_reg_read(kbdev, GPU_CONTROL_REG(MEM_FEATURES), NULL);
67         regdump->mmu_features = kbase_reg_read(kbdev, GPU_CONTROL_REG(MMU_FEATURES), NULL);
68         regdump->as_present = kbase_reg_read(kbdev, GPU_CONTROL_REG(AS_PRESENT), NULL);
69         regdump->js_present = kbase_reg_read(kbdev, GPU_CONTROL_REG(JS_PRESENT), NULL);
70
71         for(i = 0; i < MIDG_MAX_JOB_SLOTS; i++)
72         {
73                 regdump->js_features[i] = kbase_reg_read(kbdev, GPU_CONTROL_REG(JS_FEATURES_REG(i)), NULL);
74         }
75
76         for(i = 0; i < BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS; i++)
77         {
78                 regdump->texture_features[i] =  kbase_reg_read(kbdev, GPU_CONTROL_REG(TEXTURE_FEATURES_REG(i)), NULL); 
79         }
80
81         regdump->shader_present_lo = kbase_reg_read(kbdev, GPU_CONTROL_REG(SHADER_PRESENT_LO), NULL);
82         regdump->shader_present_hi = kbase_reg_read(kbdev, GPU_CONTROL_REG(SHADER_PRESENT_HI), NULL);
83
84         regdump->tiler_present_lo = kbase_reg_read(kbdev, GPU_CONTROL_REG(TILER_PRESENT_LO), NULL);
85         regdump->tiler_present_hi = kbase_reg_read(kbdev, GPU_CONTROL_REG(TILER_PRESENT_HI), NULL);
86
87         regdump->l2_present_lo = kbase_reg_read(kbdev, GPU_CONTROL_REG(L2_PRESENT_LO), NULL);
88         regdump->l2_present_hi = kbase_reg_read(kbdev, GPU_CONTROL_REG(L2_PRESENT_HI), NULL);
89
90         regdump->l3_present_lo = kbase_reg_read(kbdev, GPU_CONTROL_REG(L3_PRESENT_LO), NULL);
91         regdump->l3_present_hi = kbase_reg_read(kbdev, GPU_CONTROL_REG(L3_PRESENT_HI), NULL);
92
93         kbase_pm_context_idle(kbdev);
94 }
95
96 STATIC void kbase_gpuprops_construct_coherent_groups(base_gpu_props * const props)
97 {
98         struct mali_base_gpu_coherent_group *current_group;
99         u64 group_present;
100         u64 group_mask;
101         u64 first_set, first_set_prev;
102         u32 num_groups = 0;
103
104         OSK_ASSERT(NULL != props);
105
106         props->coherency_info.coherency = props->raw_props.mem_features;
107         props->coherency_info.num_core_groups = osk_count_set_bits64(props->raw_props.l2_present);
108
109         if (props->coherency_info.coherency & GROUPS_L3_COHERENT)
110         {
111                 /* Group is l3 coherent */
112                 group_present = props->raw_props.l3_present;
113         }
114         else if (props->coherency_info.coherency & GROUPS_L2_COHERENT)
115         {
116                 /* Group is l2 coherent */
117                 group_present = props->raw_props.l2_present;
118         }
119         else
120         {
121                 /* Group is l1 coherent */
122                 group_present = props->raw_props.shader_present;
123         }
124
125         /*
126          * The coherent group mask can be computed from the l2/l3 present 
127          * register.
128          *
129          * For the coherent group n:
130          * group_mask[n] = (first_set[n] - 1) & ~(first_set[n-1] - 1)
131          * where first_set is group_present with only its nth set-bit kept 
132          * (i.e. the position from where a new group starts).
133          *
134          * For instance if the groups are l2 coherent and l2_present=0x0..01111:
135          * The first mask is:
136          * group_mask[1] = (first_set[1] - 1) & ~(first_set[0] - 1)
137          *               = (0x0..010     - 1) & ~(0x0..01      - 1)
138          *               =  0x0..00f
139          * The second mask is:
140          * group_mask[2] = (first_set[2] - 1) & ~(first_set[1] - 1)
141          *               = (0x0..100     - 1) & ~(0x0..010     - 1)
142          *               =  0x0..0f0
143          * And so on until all the bits from group_present have been cleared
144          * (i.e. there is no group left).
145          */
146
147         current_group = props->coherency_info.group;
148         first_set = group_present & ~(group_present - 1); 
149
150         while (group_present != 0 && num_groups < BASE_MAX_COHERENT_GROUPS)
151         {
152                 group_present -= first_set; /* Clear the current group bit */
153                 first_set_prev = first_set;
154
155                 first_set = group_present & ~(group_present - 1);
156                 group_mask = (first_set - 1) & ~(first_set_prev - 1);
157
158                 /* Populate the coherent_group structure for each group */ 
159                 current_group->core_mask = group_mask & props->raw_props.shader_present; 
160                 current_group->num_cores = osk_count_set_bits64(current_group->core_mask);
161
162                 num_groups++;
163                 current_group++;
164         }
165
166         if (group_present != 0)
167         {
168                 OSK_PRINT_WARN(OSK_BASE_CORE, "Too many coherent groups (keeping only %d groups).\n", BASE_MAX_COHERENT_GROUPS);        
169         }
170
171         props->coherency_info.num_groups = num_groups;
172 }
173
174 /**
175  * @brief Get the GPU configuration
176  *
177  * Fill the base_gpu_props structure with values from the GPU configuration registers
178  *
179  * @param gpu_props  The base_gpu_props structure
180  * @param kbdev      The kbase_device structure for the device
181  */
182 static void kbase_gpuprops_get_props(base_gpu_props * gpu_props, kbase_device * kbdev)
183 {
184         kbase_gpuprops_regdump regdump;
185         int i;
186
187         OSK_ASSERT(NULL != kbdev);
188         OSK_ASSERT(NULL != gpu_props);
189
190         /* Dump relevant registers */
191         kbase_gpuprops_dump_registers(kbdev, &regdump);
192
193         /* Populate the base_gpu_props structure */
194         gpu_props->core_props.version_status = KBASE_UBFX32(regdump.gpu_id, 0U, 4);
195         gpu_props->core_props.minor_revision = KBASE_UBFX32(regdump.gpu_id, 4U, 8);
196         gpu_props->core_props.major_revision = KBASE_UBFX32(regdump.gpu_id, 12U, 4);
197         gpu_props->core_props.product_id = KBASE_UBFX32(regdump.gpu_id, 16U, 16);
198         gpu_props->core_props.log2_program_counter_size = KBASE_GPU_PC_SIZE_LOG2;
199         gpu_props->core_props.gpu_speed_mhz = KBASE_GPU_SPEED_MHZ;
200         gpu_props->core_props.gpu_available_memory_size = OSK_MEM_PAGES << OSK_PAGE_SHIFT;
201
202         for(i = 0; i < BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS; i++)
203         {
204                 gpu_props->core_props.texture_features[i] = regdump.texture_features[i];
205         }
206
207         gpu_props->l2_props.log2_line_size = KBASE_UBFX32(regdump.l2_features, 0U, 8);
208         gpu_props->l2_props.log2_cache_size = KBASE_UBFX32(regdump.l2_features, 16U, 8);
209
210         gpu_props->l3_props.log2_line_size = KBASE_UBFX32(regdump.l3_features, 0U, 8);
211         gpu_props->l3_props.log2_cache_size = KBASE_UBFX32(regdump.l3_features, 16U, 8);
212
213         gpu_props->tiler_props.bin_size_bytes = 1 << KBASE_UBFX32(regdump.tiler_features, 0U, 6);
214         gpu_props->tiler_props.max_active_levels = KBASE_UBFX32(regdump.tiler_features, 8U, 4);
215
216         gpu_props->raw_props.tiler_features = regdump.tiler_features;
217         gpu_props->raw_props.mem_features = regdump.mem_features;
218         gpu_props->raw_props.mmu_features = regdump.mmu_features;
219         gpu_props->raw_props.l2_features = regdump.l2_features;
220         gpu_props->raw_props.l3_features = regdump.l3_features;
221
222         gpu_props->raw_props.as_present = regdump.as_present;
223         gpu_props->raw_props.js_present = regdump.js_present;
224         gpu_props->raw_props.shader_present = ((u64)regdump.shader_present_hi << 32) + regdump.shader_present_lo;
225         gpu_props->raw_props.tiler_present = ((u64)regdump.tiler_present_hi << 32) + regdump.tiler_present_lo;
226         gpu_props->raw_props.l2_present = ((u64)regdump.l2_present_hi << 32) + regdump.l2_present_lo;
227         gpu_props->raw_props.l3_present = ((u64)regdump.l3_present_hi << 32) + regdump.l3_present_lo;
228
229         for(i = 0; i < MIDG_MAX_JOB_SLOTS; i++)
230         {
231                 gpu_props->raw_props.js_features[i] = regdump.js_features[i];
232         }
233
234         /* Initialize the coherent_group structure for each group */
235         kbase_gpuprops_construct_coherent_groups(gpu_props);
236 }
237
238 void kbase_gpuprops_set(kbase_device *kbdev)
239 {
240         kbase_gpu_props *gpu_props;
241         struct midg_raw_gpu_props *raw;
242
243         OSK_ASSERT(NULL != kbdev);
244         gpu_props = &kbdev->gpu_props;
245         raw = &gpu_props->props.raw_props;
246
247         /* Initialize the base_gpu_props structure */
248         kbase_gpuprops_get_props(&gpu_props->props, kbdev);
249
250         /* Populate kbase-only fields */
251         gpu_props->l2_props.associativity = KBASE_UBFX32(raw->l2_features, 8U, 8);
252         gpu_props->l2_props.external_bus_width = KBASE_UBFX32(raw->l2_features, 24U, 8);
253
254         gpu_props->l3_props.associativity = KBASE_UBFX32(raw->l3_features, 8U, 8);
255         gpu_props->l3_props.external_bus_width = KBASE_UBFX32(raw->l3_features, 24U, 8);
256
257         gpu_props->mem.core_group = KBASE_UBFX32(raw->mem_features, 0U, 1);
258         gpu_props->mem.supergroup = KBASE_UBFX32(raw->mem_features, 1U, 1);
259
260         gpu_props->mmu.va_bits = KBASE_UBFX32(raw->mmu_features, 0U, 8);
261         gpu_props->mmu.pa_bits = KBASE_UBFX32(raw->mmu_features, 8U, 8);
262
263         gpu_props->num_cores = osk_count_set_bits64(raw->shader_present);
264         gpu_props->num_core_groups = osk_count_set_bits64(raw->l2_present);
265         gpu_props->num_supergroups = osk_count_set_bits64(raw->l3_present);
266 }
267