Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
[platform/kernel/linux-rpi.git] / drivers / xen / xen-acpi-processor.c
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright 2012 by Oracle Inc
4  * Author: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
5  *
6  * This code borrows ideas from
7  * https://lore.kernel.org/lkml/1322673664-14642-6-git-send-email-konrad.wilk@oracle.com
8  * so many thanks go to Kevin Tian <kevin.tian@intel.com>
9  * and Yu Ke <ke.yu@intel.com>.
10  */
11
12 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
13
14 #include <linux/cpumask.h>
15 #include <linux/cpufreq.h>
16 #include <linux/freezer.h>
17 #include <linux/kernel.h>
18 #include <linux/kthread.h>
19 #include <linux/init.h>
20 #include <linux/module.h>
21 #include <linux/types.h>
22 #include <linux/syscore_ops.h>
23 #include <linux/acpi.h>
24 #include <acpi/processor.h>
25 #include <xen/xen.h>
26 #include <xen/interface/platform.h>
27 #include <asm/xen/hypercall.h>
28
29 static int no_hypercall;
30 MODULE_PARM_DESC(off, "Inhibit the hypercall.");
31 module_param_named(off, no_hypercall, int, 0400);
32
33 /*
34  * Note: Do not convert the acpi_id* below to cpumask_var_t or use cpumask_bit
35  * - as those shrink to nr_cpu_bits (which is dependent on possible_cpu), which
36  * can be less than what we want to put in. Instead use the 'nr_acpi_bits'
37  * which is dynamically computed based on the MADT or x2APIC table.
38  */
39 static unsigned int nr_acpi_bits;
40 /* Mutex to protect the acpi_ids_done - for CPU hotplug use. */
41 static DEFINE_MUTEX(acpi_ids_mutex);
42 /* Which ACPI ID we have processed from 'struct acpi_processor'. */
43 static unsigned long *acpi_ids_done;
44 /* Which ACPI ID exist in the SSDT/DSDT processor definitions. */
45 static unsigned long *acpi_id_present;
46 /* And if there is an _CST definition (or a PBLK) for the ACPI IDs */
47 static unsigned long *acpi_id_cst_present;
48 /* Which ACPI P-State dependencies for a enumerated processor */
49 static struct acpi_psd_package *acpi_psd;
50
51 static int push_cxx_to_hypervisor(struct acpi_processor *_pr)
52 {
53         struct xen_platform_op op = {
54                 .cmd                    = XENPF_set_processor_pminfo,
55                 .interface_version      = XENPF_INTERFACE_VERSION,
56                 .u.set_pminfo.id        = _pr->acpi_id,
57                 .u.set_pminfo.type      = XEN_PM_CX,
58         };
59         struct xen_processor_cx *dst_cx, *dst_cx_states = NULL;
60         struct acpi_processor_cx *cx;
61         unsigned int i, ok;
62         int ret = 0;
63
64         dst_cx_states = kcalloc(_pr->power.count,
65                                 sizeof(struct xen_processor_cx), GFP_KERNEL);
66         if (!dst_cx_states)
67                 return -ENOMEM;
68
69         for (ok = 0, i = 1; i <= _pr->power.count; i++) {
70                 cx = &_pr->power.states[i];
71                 if (!cx->valid)
72                         continue;
73
74                 dst_cx = &(dst_cx_states[ok++]);
75
76                 dst_cx->reg.space_id = ACPI_ADR_SPACE_SYSTEM_IO;
77                 if (cx->entry_method == ACPI_CSTATE_SYSTEMIO) {
78                         dst_cx->reg.bit_width = 8;
79                         dst_cx->reg.bit_offset = 0;
80                         dst_cx->reg.access_size = 1;
81                 } else {
82                         dst_cx->reg.space_id = ACPI_ADR_SPACE_FIXED_HARDWARE;
83                         if (cx->entry_method == ACPI_CSTATE_FFH) {
84                                 /* NATIVE_CSTATE_BEYOND_HALT */
85                                 dst_cx->reg.bit_offset = 2;
86                                 dst_cx->reg.bit_width = 1; /* VENDOR_INTEL */
87                         }
88                         dst_cx->reg.access_size = 0;
89                 }
90                 dst_cx->reg.address = cx->address;
91
92                 dst_cx->type = cx->type;
93                 dst_cx->latency = cx->latency;
94
95                 dst_cx->dpcnt = 0;
96                 set_xen_guest_handle(dst_cx->dp, NULL);
97         }
98         if (!ok) {
99                 pr_debug("No _Cx for ACPI CPU %u\n", _pr->acpi_id);
100                 kfree(dst_cx_states);
101                 return -EINVAL;
102         }
103         op.u.set_pminfo.power.count = ok;
104         op.u.set_pminfo.power.flags.bm_control = _pr->flags.bm_control;
105         op.u.set_pminfo.power.flags.bm_check = _pr->flags.bm_check;
106         op.u.set_pminfo.power.flags.has_cst = _pr->flags.has_cst;
107         op.u.set_pminfo.power.flags.power_setup_done =
108                 _pr->flags.power_setup_done;
109
110         set_xen_guest_handle(op.u.set_pminfo.power.states, dst_cx_states);
111
112         if (!no_hypercall)
113                 ret = HYPERVISOR_platform_op(&op);
114
115         if (!ret) {
116                 pr_debug("ACPI CPU%u - C-states uploaded.\n", _pr->acpi_id);
117                 for (i = 1; i <= _pr->power.count; i++) {
118                         cx = &_pr->power.states[i];
119                         if (!cx->valid)
120                                 continue;
121                         pr_debug("     C%d: %s %d uS\n",
122                                  cx->type, cx->desc, (u32)cx->latency);
123                 }
124         } else if ((ret != -EINVAL) && (ret != -ENOSYS))
125                 /* EINVAL means the ACPI ID is incorrect - meaning the ACPI
126                  * table is referencing a non-existing CPU - which can happen
127                  * with broken ACPI tables. */
128                 pr_err("(CX): Hypervisor error (%d) for ACPI CPU%u\n",
129                        ret, _pr->acpi_id);
130
131         kfree(dst_cx_states);
132
133         return ret;
134 }
135 static struct xen_processor_px *
136 xen_copy_pss_data(struct acpi_processor *_pr,
137                   struct xen_processor_performance *dst_perf)
138 {
139         struct xen_processor_px *dst_states = NULL;
140         unsigned int i;
141
142         BUILD_BUG_ON(sizeof(struct xen_processor_px) !=
143                      sizeof(struct acpi_processor_px));
144
145         dst_states = kcalloc(_pr->performance->state_count,
146                              sizeof(struct xen_processor_px), GFP_KERNEL);
147         if (!dst_states)
148                 return ERR_PTR(-ENOMEM);
149
150         dst_perf->state_count = _pr->performance->state_count;
151         for (i = 0; i < _pr->performance->state_count; i++) {
152                 /* Fortunatly for us, they are both the same size */
153                 memcpy(&(dst_states[i]), &(_pr->performance->states[i]),
154                        sizeof(struct acpi_processor_px));
155         }
156         return dst_states;
157 }
158 static int xen_copy_psd_data(struct acpi_processor *_pr,
159                              struct xen_processor_performance *dst)
160 {
161         struct acpi_psd_package *pdomain;
162
163         BUILD_BUG_ON(sizeof(struct xen_psd_package) !=
164                      sizeof(struct acpi_psd_package));
165
166         /* This information is enumerated only if acpi_processor_preregister_performance
167          * has been called.
168          */
169         dst->shared_type = _pr->performance->shared_type;
170
171         pdomain = &(_pr->performance->domain_info);
172
173         /* 'acpi_processor_preregister_performance' does not parse if the
174          * num_processors <= 1, but Xen still requires it. Do it manually here.
175          */
176         if (pdomain->num_processors <= 1) {
177                 if (pdomain->coord_type == DOMAIN_COORD_TYPE_SW_ALL)
178                         dst->shared_type = CPUFREQ_SHARED_TYPE_ALL;
179                 else if (pdomain->coord_type == DOMAIN_COORD_TYPE_HW_ALL)
180                         dst->shared_type = CPUFREQ_SHARED_TYPE_HW;
181                 else if (pdomain->coord_type == DOMAIN_COORD_TYPE_SW_ANY)
182                         dst->shared_type = CPUFREQ_SHARED_TYPE_ANY;
183
184         }
185         memcpy(&(dst->domain_info), pdomain, sizeof(struct acpi_psd_package));
186         return 0;
187 }
188 static int xen_copy_pct_data(struct acpi_pct_register *pct,
189                              struct xen_pct_register *dst_pct)
190 {
191         /* It would be nice if you could just do 'memcpy(pct, dst_pct') but
192          * sadly the Xen structure did not have the proper padding so the
193          * descriptor field takes two (dst_pct) bytes instead of one (pct).
194          */
195         dst_pct->descriptor = pct->descriptor;
196         dst_pct->length = pct->length;
197         dst_pct->space_id = pct->space_id;
198         dst_pct->bit_width = pct->bit_width;
199         dst_pct->bit_offset = pct->bit_offset;
200         dst_pct->reserved = pct->reserved;
201         dst_pct->address = pct->address;
202         return 0;
203 }
204 static int push_pxx_to_hypervisor(struct acpi_processor *_pr)
205 {
206         int ret = 0;
207         struct xen_platform_op op = {
208                 .cmd                    = XENPF_set_processor_pminfo,
209                 .interface_version      = XENPF_INTERFACE_VERSION,
210                 .u.set_pminfo.id        = _pr->acpi_id,
211                 .u.set_pminfo.type      = XEN_PM_PX,
212         };
213         struct xen_processor_performance *dst_perf;
214         struct xen_processor_px *dst_states = NULL;
215
216         dst_perf = &op.u.set_pminfo.perf;
217
218         dst_perf->platform_limit = _pr->performance_platform_limit;
219         dst_perf->flags |= XEN_PX_PPC;
220         xen_copy_pct_data(&(_pr->performance->control_register),
221                           &dst_perf->control_register);
222         xen_copy_pct_data(&(_pr->performance->status_register),
223                           &dst_perf->status_register);
224         dst_perf->flags |= XEN_PX_PCT;
225         dst_states = xen_copy_pss_data(_pr, dst_perf);
226         if (!IS_ERR_OR_NULL(dst_states)) {
227                 set_xen_guest_handle(dst_perf->states, dst_states);
228                 dst_perf->flags |= XEN_PX_PSS;
229         }
230         if (!xen_copy_psd_data(_pr, dst_perf))
231                 dst_perf->flags |= XEN_PX_PSD;
232
233         if (dst_perf->flags != (XEN_PX_PSD | XEN_PX_PSS | XEN_PX_PCT | XEN_PX_PPC)) {
234                 pr_warn("ACPI CPU%u missing some P-state data (%x), skipping\n",
235                         _pr->acpi_id, dst_perf->flags);
236                 ret = -ENODEV;
237                 goto err_free;
238         }
239
240         if (!no_hypercall)
241                 ret = HYPERVISOR_platform_op(&op);
242
243         if (!ret) {
244                 struct acpi_processor_performance *perf;
245                 unsigned int i;
246
247                 perf = _pr->performance;
248                 pr_debug("ACPI CPU%u - P-states uploaded.\n", _pr->acpi_id);
249                 for (i = 0; i < perf->state_count; i++) {
250                         pr_debug("     %cP%d: %d MHz, %d mW, %d uS\n",
251                         (i == perf->state ? '*' : ' '), i,
252                         (u32) perf->states[i].core_frequency,
253                         (u32) perf->states[i].power,
254                         (u32) perf->states[i].transition_latency);
255                 }
256         } else if ((ret != -EINVAL) && (ret != -ENOSYS))
257                 /* EINVAL means the ACPI ID is incorrect - meaning the ACPI
258                  * table is referencing a non-existing CPU - which can happen
259                  * with broken ACPI tables. */
260                 pr_warn("(_PXX): Hypervisor error (%d) for ACPI CPU%u\n",
261                         ret, _pr->acpi_id);
262 err_free:
263         if (!IS_ERR_OR_NULL(dst_states))
264                 kfree(dst_states);
265
266         return ret;
267 }
268 static int upload_pm_data(struct acpi_processor *_pr)
269 {
270         int err = 0;
271
272         mutex_lock(&acpi_ids_mutex);
273         if (__test_and_set_bit(_pr->acpi_id, acpi_ids_done)) {
274                 mutex_unlock(&acpi_ids_mutex);
275                 return -EBUSY;
276         }
277         if (_pr->flags.power)
278                 err = push_cxx_to_hypervisor(_pr);
279
280         if (_pr->performance && _pr->performance->states)
281                 err |= push_pxx_to_hypervisor(_pr);
282
283         mutex_unlock(&acpi_ids_mutex);
284         return err;
285 }
286 static unsigned int __init get_max_acpi_id(void)
287 {
288         struct xenpf_pcpuinfo *info;
289         struct xen_platform_op op = {
290                 .cmd = XENPF_get_cpuinfo,
291                 .interface_version = XENPF_INTERFACE_VERSION,
292         };
293         int ret = 0;
294         unsigned int i, last_cpu, max_acpi_id = 0;
295
296         info = &op.u.pcpu_info;
297         info->xen_cpuid = 0;
298
299         ret = HYPERVISOR_platform_op(&op);
300         if (ret)
301                 return NR_CPUS;
302
303         /* The max_present is the same irregardless of the xen_cpuid */
304         last_cpu = op.u.pcpu_info.max_present;
305         for (i = 0; i <= last_cpu; i++) {
306                 info->xen_cpuid = i;
307                 ret = HYPERVISOR_platform_op(&op);
308                 if (ret)
309                         continue;
310                 max_acpi_id = max(info->acpi_id, max_acpi_id);
311         }
312         max_acpi_id *= 2; /* Slack for CPU hotplug support. */
313         pr_debug("Max ACPI ID: %u\n", max_acpi_id);
314         return max_acpi_id;
315 }
316 /*
317  * The read_acpi_id and check_acpi_ids are there to support the Xen
318  * oddity of virtual CPUs != physical CPUs in the initial domain.
319  * The user can supply 'xen_max_vcpus=X' on the Xen hypervisor line
320  * which will band the amount of CPUs the initial domain can see.
321  * In general that is OK, except it plays havoc with any of the
322  * for_each_[present|online]_cpu macros which are banded to the virtual
323  * CPU amount.
324  */
325 static acpi_status
326 read_acpi_id(acpi_handle handle, u32 lvl, void *context, void **rv)
327 {
328         u32 acpi_id;
329         acpi_status status;
330         acpi_object_type acpi_type;
331         unsigned long long tmp;
332         union acpi_object object = { 0 };
333         struct acpi_buffer buffer = { sizeof(union acpi_object), &object };
334         acpi_io_address pblk = 0;
335
336         status = acpi_get_type(handle, &acpi_type);
337         if (ACPI_FAILURE(status))
338                 return AE_OK;
339
340         switch (acpi_type) {
341         case ACPI_TYPE_PROCESSOR:
342                 status = acpi_evaluate_object(handle, NULL, NULL, &buffer);
343                 if (ACPI_FAILURE(status))
344                         return AE_OK;
345                 acpi_id = object.processor.proc_id;
346                 pblk = object.processor.pblk_address;
347                 break;
348         case ACPI_TYPE_DEVICE:
349                 status = acpi_evaluate_integer(handle, "_UID", NULL, &tmp);
350                 if (ACPI_FAILURE(status))
351                         return AE_OK;
352                 acpi_id = tmp;
353                 break;
354         default:
355                 return AE_OK;
356         }
357         if (invalid_phys_cpuid(acpi_get_phys_id(handle,
358                                                 acpi_type == ACPI_TYPE_DEVICE,
359                                                 acpi_id))) {
360                 pr_debug("CPU with ACPI ID %u is unavailable\n", acpi_id);
361                 return AE_OK;
362         }
363         /* There are more ACPI Processor objects than in x2APIC or MADT.
364          * This can happen with incorrect ACPI SSDT declerations. */
365         if (acpi_id >= nr_acpi_bits) {
366                 pr_debug("max acpi id %u, trying to set %u\n",
367                          nr_acpi_bits - 1, acpi_id);
368                 return AE_OK;
369         }
370         /* OK, There is a ACPI Processor object */
371         __set_bit(acpi_id, acpi_id_present);
372
373         pr_debug("ACPI CPU%u w/ PBLK:0x%lx\n", acpi_id, (unsigned long)pblk);
374
375         /* It has P-state dependencies */
376         if (!acpi_processor_get_psd(handle, &acpi_psd[acpi_id])) {
377                 pr_debug("ACPI CPU%u w/ PST:coord_type = %llu domain = %llu\n",
378                          acpi_id, acpi_psd[acpi_id].coord_type,
379                          acpi_psd[acpi_id].domain);
380         }
381
382         status = acpi_evaluate_object(handle, "_CST", NULL, &buffer);
383         if (ACPI_FAILURE(status)) {
384                 if (!pblk)
385                         return AE_OK;
386         }
387         /* .. and it has a C-state */
388         __set_bit(acpi_id, acpi_id_cst_present);
389
390         return AE_OK;
391 }
392 static int check_acpi_ids(struct acpi_processor *pr_backup)
393 {
394
395         if (!pr_backup)
396                 return -ENODEV;
397
398         if (acpi_id_present && acpi_id_cst_present)
399                 /* OK, done this once .. skip to uploading */
400                 goto upload;
401
402         /* All online CPUs have been processed at this stage. Now verify
403          * whether in fact "online CPUs" == physical CPUs.
404          */
405         acpi_id_present = bitmap_zalloc(nr_acpi_bits, GFP_KERNEL);
406         if (!acpi_id_present)
407                 return -ENOMEM;
408
409         acpi_id_cst_present = bitmap_zalloc(nr_acpi_bits, GFP_KERNEL);
410         if (!acpi_id_cst_present) {
411                 bitmap_free(acpi_id_present);
412                 return -ENOMEM;
413         }
414
415         acpi_psd = kcalloc(nr_acpi_bits, sizeof(struct acpi_psd_package),
416                            GFP_KERNEL);
417         if (!acpi_psd) {
418                 bitmap_free(acpi_id_present);
419                 bitmap_free(acpi_id_cst_present);
420                 return -ENOMEM;
421         }
422
423         acpi_walk_namespace(ACPI_TYPE_PROCESSOR, ACPI_ROOT_OBJECT,
424                             ACPI_UINT32_MAX,
425                             read_acpi_id, NULL, NULL, NULL);
426         acpi_get_devices(ACPI_PROCESSOR_DEVICE_HID, read_acpi_id, NULL, NULL);
427
428 upload:
429         if (!bitmap_equal(acpi_id_present, acpi_ids_done, nr_acpi_bits)) {
430                 unsigned int i;
431                 for_each_set_bit(i, acpi_id_present, nr_acpi_bits) {
432                         pr_backup->acpi_id = i;
433                         /* Mask out C-states if there are no _CST or PBLK */
434                         pr_backup->flags.power = test_bit(i, acpi_id_cst_present);
435                         /* num_entries is non-zero if we evaluated _PSD */
436                         if (acpi_psd[i].num_entries) {
437                                 memcpy(&pr_backup->performance->domain_info,
438                                        &acpi_psd[i],
439                                        sizeof(struct acpi_psd_package));
440                         }
441                         (void)upload_pm_data(pr_backup);
442                 }
443         }
444
445         return 0;
446 }
447
448 /* acpi_perf_data is a pointer to percpu data. */
449 static struct acpi_processor_performance __percpu *acpi_perf_data;
450
451 static void free_acpi_perf_data(void)
452 {
453         int i;
454
455         /* Freeing a NULL pointer is OK, and alloc_percpu zeroes. */
456         for_each_possible_cpu(i)
457                 free_cpumask_var(per_cpu_ptr(acpi_perf_data, i)
458                                  ->shared_cpu_map);
459         free_percpu(acpi_perf_data);
460 }
461
462 static int xen_upload_processor_pm_data(void)
463 {
464         struct acpi_processor *pr_backup = NULL;
465         int i;
466         int rc = 0;
467
468         pr_info("Uploading Xen processor PM info\n");
469
470         for_each_possible_cpu(i) {
471                 struct acpi_processor *_pr;
472                 _pr = per_cpu(processors, i /* APIC ID */);
473                 if (!_pr)
474                         continue;
475
476                 if (!pr_backup)
477                         pr_backup = kmemdup(_pr, sizeof(*_pr), GFP_KERNEL);
478                 (void)upload_pm_data(_pr);
479         }
480
481         rc = check_acpi_ids(pr_backup);
482         kfree(pr_backup);
483
484         return rc;
485 }
486
487 static void xen_acpi_processor_resume_worker(struct work_struct *dummy)
488 {
489         int rc;
490
491         bitmap_zero(acpi_ids_done, nr_acpi_bits);
492
493         rc = xen_upload_processor_pm_data();
494         if (rc != 0)
495                 pr_info("ACPI data upload failed, error = %d\n", rc);
496 }
497
498 static void xen_acpi_processor_resume(void)
499 {
500         static DECLARE_WORK(wq, xen_acpi_processor_resume_worker);
501
502         /*
503          * xen_upload_processor_pm_data() calls non-atomic code.
504          * However, the context for xen_acpi_processor_resume is syscore
505          * with only the boot CPU online and in an atomic context.
506          *
507          * So defer the upload for some point safer.
508          */
509         schedule_work(&wq);
510 }
511
512 static struct syscore_ops xap_syscore_ops = {
513         .resume = xen_acpi_processor_resume,
514 };
515
516 static int __init xen_acpi_processor_init(void)
517 {
518         int i;
519         int rc;
520
521         if (!xen_initial_domain())
522                 return -ENODEV;
523
524         nr_acpi_bits = get_max_acpi_id() + 1;
525         acpi_ids_done = bitmap_zalloc(nr_acpi_bits, GFP_KERNEL);
526         if (!acpi_ids_done)
527                 return -ENOMEM;
528
529         acpi_perf_data = alloc_percpu(struct acpi_processor_performance);
530         if (!acpi_perf_data) {
531                 pr_debug("Memory allocation error for acpi_perf_data\n");
532                 bitmap_free(acpi_ids_done);
533                 return -ENOMEM;
534         }
535         for_each_possible_cpu(i) {
536                 if (!zalloc_cpumask_var_node(
537                         &per_cpu_ptr(acpi_perf_data, i)->shared_cpu_map,
538                         GFP_KERNEL, cpu_to_node(i))) {
539                         rc = -ENOMEM;
540                         goto err_out;
541                 }
542         }
543
544         /* Do initialization in ACPI core. It is OK to fail here. */
545         (void)acpi_processor_preregister_performance(acpi_perf_data);
546
547         for_each_possible_cpu(i) {
548                 struct acpi_processor *pr;
549                 struct acpi_processor_performance *perf;
550
551                 pr = per_cpu(processors, i);
552                 perf = per_cpu_ptr(acpi_perf_data, i);
553                 if (!pr)
554                         continue;
555
556                 pr->performance = perf;
557                 rc = acpi_processor_get_performance_info(pr);
558                 if (rc)
559                         goto err_out;
560         }
561
562         rc = xen_upload_processor_pm_data();
563         if (rc)
564                 goto err_unregister;
565
566         register_syscore_ops(&xap_syscore_ops);
567
568         return 0;
569 err_unregister:
570         for_each_possible_cpu(i)
571                 acpi_processor_unregister_performance(i);
572
573 err_out:
574         /* Freeing a NULL pointer is OK: alloc_percpu zeroes. */
575         free_acpi_perf_data();
576         bitmap_free(acpi_ids_done);
577         return rc;
578 }
579 static void __exit xen_acpi_processor_exit(void)
580 {
581         int i;
582
583         unregister_syscore_ops(&xap_syscore_ops);
584         bitmap_free(acpi_ids_done);
585         bitmap_free(acpi_id_present);
586         bitmap_free(acpi_id_cst_present);
587         kfree(acpi_psd);
588         for_each_possible_cpu(i)
589                 acpi_processor_unregister_performance(i);
590
591         free_acpi_perf_data();
592 }
593
594 MODULE_AUTHOR("Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>");
595 MODULE_DESCRIPTION("Xen ACPI Processor P-states (and Cx) driver which uploads PM data to Xen hypervisor");
596 MODULE_LICENSE("GPL");
597
598 /* We want to be loaded before the CPU freq scaling drivers are loaded.
599  * They are loaded in late_initcall. */
600 device_initcall(xen_acpi_processor_init);
601 module_exit(xen_acpi_processor_exit);