powerpc/mm: Avoid calling arch_enter/leave_lazy_mmu() in set_ptes
[platform/kernel/linux-starfive.git] / arch / loongarch / kernel / smp.c
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * Copyright (C) 2020-2022 Loongson Technology Corporation Limited
4  *
5  * Derived from MIPS:
6  * Copyright (C) 2000, 2001 Kanoj Sarcar
7  * Copyright (C) 2000, 2001 Ralf Baechle
8  * Copyright (C) 2000, 2001 Silicon Graphics, Inc.
9  * Copyright (C) 2000, 2001, 2003 Broadcom Corporation
10  */
11 #include <linux/acpi.h>
12 #include <linux/cpu.h>
13 #include <linux/cpumask.h>
14 #include <linux/init.h>
15 #include <linux/interrupt.h>
16 #include <linux/seq_file.h>
17 #include <linux/smp.h>
18 #include <linux/threads.h>
19 #include <linux/export.h>
20 #include <linux/syscore_ops.h>
21 #include <linux/time.h>
22 #include <linux/tracepoint.h>
23 #include <linux/sched/hotplug.h>
24 #include <linux/sched/task_stack.h>
25
26 #include <asm/cpu.h>
27 #include <asm/idle.h>
28 #include <asm/loongson.h>
29 #include <asm/mmu_context.h>
30 #include <asm/numa.h>
31 #include <asm/processor.h>
32 #include <asm/setup.h>
33 #include <asm/time.h>
34
35 int __cpu_number_map[NR_CPUS];   /* Map physical to logical */
36 EXPORT_SYMBOL(__cpu_number_map);
37
38 int __cpu_logical_map[NR_CPUS];         /* Map logical to physical */
39 EXPORT_SYMBOL(__cpu_logical_map);
40
41 /* Representing the threads (siblings) of each logical CPU */
42 cpumask_t cpu_sibling_map[NR_CPUS] __read_mostly;
43 EXPORT_SYMBOL(cpu_sibling_map);
44
45 /* Representing the core map of multi-core chips of each logical CPU */
46 cpumask_t cpu_core_map[NR_CPUS] __read_mostly;
47 EXPORT_SYMBOL(cpu_core_map);
48
49 static DECLARE_COMPLETION(cpu_starting);
50 static DECLARE_COMPLETION(cpu_running);
51
52 /*
53  * A logcal cpu mask containing only one VPE per core to
54  * reduce the number of IPIs on large MT systems.
55  */
56 cpumask_t cpu_foreign_map[NR_CPUS] __read_mostly;
57 EXPORT_SYMBOL(cpu_foreign_map);
58
59 /* representing cpus for which sibling maps can be computed */
60 static cpumask_t cpu_sibling_setup_map;
61
62 /* representing cpus for which core maps can be computed */
63 static cpumask_t cpu_core_setup_map;
64
65 struct secondary_data cpuboot_data;
66 static DEFINE_PER_CPU(int, cpu_state);
67
68 enum ipi_msg_type {
69         IPI_RESCHEDULE,
70         IPI_CALL_FUNCTION,
71 };
72
73 static const char *ipi_types[NR_IPI] __tracepoint_string = {
74         [IPI_RESCHEDULE] = "Rescheduling interrupts",
75         [IPI_CALL_FUNCTION] = "Function call interrupts",
76 };
77
78 void show_ipi_list(struct seq_file *p, int prec)
79 {
80         unsigned int cpu, i;
81
82         for (i = 0; i < NR_IPI; i++) {
83                 seq_printf(p, "%*s%u:%s", prec - 1, "IPI", i, prec >= 4 ? " " : "");
84                 for_each_online_cpu(cpu)
85                         seq_printf(p, "%10u ", per_cpu(irq_stat, cpu).ipi_irqs[i]);
86                 seq_printf(p, " LoongArch  %d  %s\n", i + 1, ipi_types[i]);
87         }
88 }
89
90 /* Send mailbox buffer via Mail_Send */
91 static void csr_mail_send(uint64_t data, int cpu, int mailbox)
92 {
93         uint64_t val;
94
95         /* Send high 32 bits */
96         val = IOCSR_MBUF_SEND_BLOCKING;
97         val |= (IOCSR_MBUF_SEND_BOX_HI(mailbox) << IOCSR_MBUF_SEND_BOX_SHIFT);
98         val |= (cpu << IOCSR_MBUF_SEND_CPU_SHIFT);
99         val |= (data & IOCSR_MBUF_SEND_H32_MASK);
100         iocsr_write64(val, LOONGARCH_IOCSR_MBUF_SEND);
101
102         /* Send low 32 bits */
103         val = IOCSR_MBUF_SEND_BLOCKING;
104         val |= (IOCSR_MBUF_SEND_BOX_LO(mailbox) << IOCSR_MBUF_SEND_BOX_SHIFT);
105         val |= (cpu << IOCSR_MBUF_SEND_CPU_SHIFT);
106         val |= (data << IOCSR_MBUF_SEND_BUF_SHIFT);
107         iocsr_write64(val, LOONGARCH_IOCSR_MBUF_SEND);
108 };
109
110 static u32 ipi_read_clear(int cpu)
111 {
112         u32 action;
113
114         /* Load the ipi register to figure out what we're supposed to do */
115         action = iocsr_read32(LOONGARCH_IOCSR_IPI_STATUS);
116         /* Clear the ipi register to clear the interrupt */
117         iocsr_write32(action, LOONGARCH_IOCSR_IPI_CLEAR);
118         wbflush();
119
120         return action;
121 }
122
123 static void ipi_write_action(int cpu, u32 action)
124 {
125         unsigned int irq = 0;
126
127         while ((irq = ffs(action))) {
128                 uint32_t val = IOCSR_IPI_SEND_BLOCKING;
129
130                 val |= (irq - 1);
131                 val |= (cpu << IOCSR_IPI_SEND_CPU_SHIFT);
132                 iocsr_write32(val, LOONGARCH_IOCSR_IPI_SEND);
133                 action &= ~BIT(irq - 1);
134         }
135 }
136
137 void loongson_send_ipi_single(int cpu, unsigned int action)
138 {
139         ipi_write_action(cpu_logical_map(cpu), (u32)action);
140 }
141
142 void loongson_send_ipi_mask(const struct cpumask *mask, unsigned int action)
143 {
144         unsigned int i;
145
146         for_each_cpu(i, mask)
147                 ipi_write_action(cpu_logical_map(i), (u32)action);
148 }
149
150 /*
151  * This function sends a 'reschedule' IPI to another CPU.
152  * it goes straight through and wastes no time serializing
153  * anything. Worst case is that we lose a reschedule ...
154  */
155 void arch_smp_send_reschedule(int cpu)
156 {
157         loongson_send_ipi_single(cpu, SMP_RESCHEDULE);
158 }
159 EXPORT_SYMBOL_GPL(arch_smp_send_reschedule);
160
161 irqreturn_t loongson_ipi_interrupt(int irq, void *dev)
162 {
163         unsigned int action;
164         unsigned int cpu = smp_processor_id();
165
166         action = ipi_read_clear(cpu_logical_map(cpu));
167
168         if (action & SMP_RESCHEDULE) {
169                 scheduler_ipi();
170                 per_cpu(irq_stat, cpu).ipi_irqs[IPI_RESCHEDULE]++;
171         }
172
173         if (action & SMP_CALL_FUNCTION) {
174                 generic_smp_call_function_interrupt();
175                 per_cpu(irq_stat, cpu).ipi_irqs[IPI_CALL_FUNCTION]++;
176         }
177
178         return IRQ_HANDLED;
179 }
180
181 static void __init fdt_smp_setup(void)
182 {
183 #ifdef CONFIG_OF
184         unsigned int cpu, cpuid;
185         struct device_node *node = NULL;
186
187         for_each_of_cpu_node(node) {
188                 if (!of_device_is_available(node))
189                         continue;
190
191                 cpuid = of_get_cpu_hwid(node, 0);
192                 if (cpuid >= nr_cpu_ids)
193                         continue;
194
195                 if (cpuid == loongson_sysconf.boot_cpu_id) {
196                         cpu = 0;
197                         numa_add_cpu(cpu);
198                 } else {
199                         cpu = cpumask_next_zero(-1, cpu_present_mask);
200                 }
201
202                 num_processors++;
203                 set_cpu_possible(cpu, true);
204                 set_cpu_present(cpu, true);
205                 __cpu_number_map[cpuid] = cpu;
206                 __cpu_logical_map[cpu] = cpuid;
207         }
208
209         loongson_sysconf.nr_cpus = num_processors;
210         set_bit(0, &(loongson_sysconf.cores_io_master));
211 #endif
212 }
213
214 void __init loongson_smp_setup(void)
215 {
216         fdt_smp_setup();
217
218         cpu_data[0].core = cpu_logical_map(0) % loongson_sysconf.cores_per_package;
219         cpu_data[0].package = cpu_logical_map(0) / loongson_sysconf.cores_per_package;
220
221         iocsr_write32(0xffffffff, LOONGARCH_IOCSR_IPI_EN);
222         pr_info("Detected %i available CPU(s)\n", loongson_sysconf.nr_cpus);
223 }
224
225 void __init loongson_prepare_cpus(unsigned int max_cpus)
226 {
227         int i = 0;
228
229         parse_acpi_topology();
230
231         for (i = 0; i < loongson_sysconf.nr_cpus; i++) {
232                 set_cpu_present(i, true);
233                 csr_mail_send(0, __cpu_logical_map[i], 0);
234                 cpu_data[i].global_id = __cpu_logical_map[i];
235         }
236
237         per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE;
238 }
239
240 /*
241  * Setup the PC, SP, and TP of a secondary processor and start it running!
242  */
243 void loongson_boot_secondary(int cpu, struct task_struct *idle)
244 {
245         unsigned long entry;
246
247         pr_info("Booting CPU#%d...\n", cpu);
248
249         entry = __pa_symbol((unsigned long)&smpboot_entry);
250         cpuboot_data.stack = (unsigned long)__KSTK_TOS(idle);
251         cpuboot_data.thread_info = (unsigned long)task_thread_info(idle);
252
253         csr_mail_send(entry, cpu_logical_map(cpu), 0);
254
255         loongson_send_ipi_single(cpu, SMP_BOOT_CPU);
256 }
257
258 /*
259  * SMP init and finish on secondary CPUs
260  */
261 void loongson_init_secondary(void)
262 {
263         unsigned int cpu = smp_processor_id();
264         unsigned int imask = ECFGF_IP0 | ECFGF_IP1 | ECFGF_IP2 |
265                              ECFGF_IPI | ECFGF_PMC | ECFGF_TIMER;
266
267         change_csr_ecfg(ECFG0_IM, imask);
268
269         iocsr_write32(0xffffffff, LOONGARCH_IOCSR_IPI_EN);
270
271 #ifdef CONFIG_NUMA
272         numa_add_cpu(cpu);
273 #endif
274         per_cpu(cpu_state, cpu) = CPU_ONLINE;
275         cpu_data[cpu].package =
276                      cpu_logical_map(cpu) / loongson_sysconf.cores_per_package;
277         cpu_data[cpu].core = pptt_enabled ? cpu_data[cpu].core :
278                      cpu_logical_map(cpu) % loongson_sysconf.cores_per_package;
279 }
280
281 void loongson_smp_finish(void)
282 {
283         local_irq_enable();
284         iocsr_write64(0, LOONGARCH_IOCSR_MBUF0);
285         pr_info("CPU#%d finished\n", smp_processor_id());
286 }
287
288 #ifdef CONFIG_HOTPLUG_CPU
289
290 int loongson_cpu_disable(void)
291 {
292         unsigned long flags;
293         unsigned int cpu = smp_processor_id();
294
295         if (io_master(cpu))
296                 return -EBUSY;
297
298 #ifdef CONFIG_NUMA
299         numa_remove_cpu(cpu);
300 #endif
301         set_cpu_online(cpu, false);
302         calculate_cpu_foreign_map();
303         local_irq_save(flags);
304         irq_migrate_all_off_this_cpu();
305         clear_csr_ecfg(ECFG0_IM);
306         local_irq_restore(flags);
307         local_flush_tlb_all();
308
309         return 0;
310 }
311
312 void loongson_cpu_die(unsigned int cpu)
313 {
314         while (per_cpu(cpu_state, cpu) != CPU_DEAD)
315                 cpu_relax();
316
317         mb();
318 }
319
320 void __noreturn arch_cpu_idle_dead(void)
321 {
322         register uint64_t addr;
323         register void (*init_fn)(void);
324
325         idle_task_exit();
326         local_irq_enable();
327         set_csr_ecfg(ECFGF_IPI);
328         __this_cpu_write(cpu_state, CPU_DEAD);
329
330         __smp_mb();
331         do {
332                 __asm__ __volatile__("idle 0\n\t");
333                 addr = iocsr_read64(LOONGARCH_IOCSR_MBUF0);
334         } while (addr == 0);
335
336         init_fn = (void *)TO_CACHE(addr);
337         iocsr_write32(0xffffffff, LOONGARCH_IOCSR_IPI_CLEAR);
338
339         init_fn();
340         BUG();
341 }
342
343 #endif
344
345 /*
346  * Power management
347  */
348 #ifdef CONFIG_PM
349
350 static int loongson_ipi_suspend(void)
351 {
352         return 0;
353 }
354
355 static void loongson_ipi_resume(void)
356 {
357         iocsr_write32(0xffffffff, LOONGARCH_IOCSR_IPI_EN);
358 }
359
360 static struct syscore_ops loongson_ipi_syscore_ops = {
361         .resume         = loongson_ipi_resume,
362         .suspend        = loongson_ipi_suspend,
363 };
364
365 /*
366  * Enable boot cpu ipi before enabling nonboot cpus
367  * during syscore_resume.
368  */
369 static int __init ipi_pm_init(void)
370 {
371         register_syscore_ops(&loongson_ipi_syscore_ops);
372         return 0;
373 }
374
375 core_initcall(ipi_pm_init);
376 #endif
377
378 static inline void set_cpu_sibling_map(int cpu)
379 {
380         int i;
381
382         cpumask_set_cpu(cpu, &cpu_sibling_setup_map);
383
384         for_each_cpu(i, &cpu_sibling_setup_map) {
385                 if (cpus_are_siblings(cpu, i)) {
386                         cpumask_set_cpu(i, &cpu_sibling_map[cpu]);
387                         cpumask_set_cpu(cpu, &cpu_sibling_map[i]);
388                 }
389         }
390 }
391
392 static inline void set_cpu_core_map(int cpu)
393 {
394         int i;
395
396         cpumask_set_cpu(cpu, &cpu_core_setup_map);
397
398         for_each_cpu(i, &cpu_core_setup_map) {
399                 if (cpu_data[cpu].package == cpu_data[i].package) {
400                         cpumask_set_cpu(i, &cpu_core_map[cpu]);
401                         cpumask_set_cpu(cpu, &cpu_core_map[i]);
402                 }
403         }
404 }
405
406 /*
407  * Calculate a new cpu_foreign_map mask whenever a
408  * new cpu appears or disappears.
409  */
410 void calculate_cpu_foreign_map(void)
411 {
412         int i, k, core_present;
413         cpumask_t temp_foreign_map;
414
415         /* Re-calculate the mask */
416         cpumask_clear(&temp_foreign_map);
417         for_each_online_cpu(i) {
418                 core_present = 0;
419                 for_each_cpu(k, &temp_foreign_map)
420                         if (cpus_are_siblings(i, k))
421                                 core_present = 1;
422                 if (!core_present)
423                         cpumask_set_cpu(i, &temp_foreign_map);
424         }
425
426         for_each_online_cpu(i)
427                 cpumask_andnot(&cpu_foreign_map[i],
428                                &temp_foreign_map, &cpu_sibling_map[i]);
429 }
430
431 /* Preload SMP state for boot cpu */
432 void smp_prepare_boot_cpu(void)
433 {
434         unsigned int cpu, node, rr_node;
435
436         set_cpu_possible(0, true);
437         set_cpu_online(0, true);
438         set_my_cpu_offset(per_cpu_offset(0));
439
440         rr_node = first_node(node_online_map);
441         for_each_possible_cpu(cpu) {
442                 node = early_cpu_to_node(cpu);
443
444                 /*
445                  * The mapping between present cpus and nodes has been
446                  * built during MADT and SRAT parsing.
447                  *
448                  * If possible cpus = present cpus here, early_cpu_to_node
449                  * will return valid node.
450                  *
451                  * If possible cpus > present cpus here (e.g. some possible
452                  * cpus will be added by cpu-hotplug later), for possible but
453                  * not present cpus, early_cpu_to_node will return NUMA_NO_NODE,
454                  * and we just map them to online nodes in round-robin way.
455                  * Once hotplugged, new correct mapping will be built for them.
456                  */
457                 if (node != NUMA_NO_NODE)
458                         set_cpu_numa_node(cpu, node);
459                 else {
460                         set_cpu_numa_node(cpu, rr_node);
461                         rr_node = next_node_in(rr_node, node_online_map);
462                 }
463         }
464 }
465
466 /* called from main before smp_init() */
467 void __init smp_prepare_cpus(unsigned int max_cpus)
468 {
469         init_new_context(current, &init_mm);
470         current_thread_info()->cpu = 0;
471         loongson_prepare_cpus(max_cpus);
472         set_cpu_sibling_map(0);
473         set_cpu_core_map(0);
474         calculate_cpu_foreign_map();
475 #ifndef CONFIG_HOTPLUG_CPU
476         init_cpu_present(cpu_possible_mask);
477 #endif
478 }
479
480 int __cpu_up(unsigned int cpu, struct task_struct *tidle)
481 {
482         loongson_boot_secondary(cpu, tidle);
483
484         /* Wait for CPU to start and be ready to sync counters */
485         if (!wait_for_completion_timeout(&cpu_starting,
486                                          msecs_to_jiffies(5000))) {
487                 pr_crit("CPU%u: failed to start\n", cpu);
488                 return -EIO;
489         }
490
491         /* Wait for CPU to finish startup & mark itself online before return */
492         wait_for_completion(&cpu_running);
493
494         return 0;
495 }
496
497 /*
498  * First C code run on the secondary CPUs after being started up by
499  * the master.
500  */
501 asmlinkage void start_secondary(void)
502 {
503         unsigned int cpu;
504
505         sync_counter();
506         cpu = smp_processor_id();
507         set_my_cpu_offset(per_cpu_offset(cpu));
508
509         cpu_probe();
510         constant_clockevent_init();
511         loongson_init_secondary();
512
513         set_cpu_sibling_map(cpu);
514         set_cpu_core_map(cpu);
515
516         notify_cpu_starting(cpu);
517
518         /* Notify boot CPU that we're starting */
519         complete(&cpu_starting);
520
521         /* The CPU is running, now mark it online */
522         set_cpu_online(cpu, true);
523
524         calculate_cpu_foreign_map();
525
526         /*
527          * Notify boot CPU that we're up & online and it can safely return
528          * from __cpu_up()
529          */
530         complete(&cpu_running);
531
532         /*
533          * irq will be enabled in loongson_smp_finish(), enabling it too
534          * early is dangerous.
535          */
536         WARN_ON_ONCE(!irqs_disabled());
537         loongson_smp_finish();
538
539         cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
540 }
541
542 void __init smp_cpus_done(unsigned int max_cpus)
543 {
544 }
545
546 static void stop_this_cpu(void *dummy)
547 {
548         set_cpu_online(smp_processor_id(), false);
549         calculate_cpu_foreign_map();
550         local_irq_disable();
551         while (true);
552 }
553
554 void smp_send_stop(void)
555 {
556         smp_call_function(stop_this_cpu, NULL, 0);
557 }
558
559 int setup_profiling_timer(unsigned int multiplier)
560 {
561         return 0;
562 }
563
564 static void flush_tlb_all_ipi(void *info)
565 {
566         local_flush_tlb_all();
567 }
568
569 void flush_tlb_all(void)
570 {
571         on_each_cpu(flush_tlb_all_ipi, NULL, 1);
572 }
573
574 static void flush_tlb_mm_ipi(void *mm)
575 {
576         local_flush_tlb_mm((struct mm_struct *)mm);
577 }
578
579 void flush_tlb_mm(struct mm_struct *mm)
580 {
581         if (atomic_read(&mm->mm_users) == 0)
582                 return;         /* happens as a result of exit_mmap() */
583
584         preempt_disable();
585
586         if ((atomic_read(&mm->mm_users) != 1) || (current->mm != mm)) {
587                 on_each_cpu_mask(mm_cpumask(mm), flush_tlb_mm_ipi, mm, 1);
588         } else {
589                 unsigned int cpu;
590
591                 for_each_online_cpu(cpu) {
592                         if (cpu != smp_processor_id() && cpu_context(cpu, mm))
593                                 cpu_context(cpu, mm) = 0;
594                 }
595                 local_flush_tlb_mm(mm);
596         }
597
598         preempt_enable();
599 }
600
601 struct flush_tlb_data {
602         struct vm_area_struct *vma;
603         unsigned long addr1;
604         unsigned long addr2;
605 };
606
607 static void flush_tlb_range_ipi(void *info)
608 {
609         struct flush_tlb_data *fd = info;
610
611         local_flush_tlb_range(fd->vma, fd->addr1, fd->addr2);
612 }
613
614 void flush_tlb_range(struct vm_area_struct *vma, unsigned long start, unsigned long end)
615 {
616         struct mm_struct *mm = vma->vm_mm;
617
618         preempt_disable();
619         if ((atomic_read(&mm->mm_users) != 1) || (current->mm != mm)) {
620                 struct flush_tlb_data fd = {
621                         .vma = vma,
622                         .addr1 = start,
623                         .addr2 = end,
624                 };
625
626                 on_each_cpu_mask(mm_cpumask(mm), flush_tlb_range_ipi, &fd, 1);
627         } else {
628                 unsigned int cpu;
629
630                 for_each_online_cpu(cpu) {
631                         if (cpu != smp_processor_id() && cpu_context(cpu, mm))
632                                 cpu_context(cpu, mm) = 0;
633                 }
634                 local_flush_tlb_range(vma, start, end);
635         }
636         preempt_enable();
637 }
638
639 static void flush_tlb_kernel_range_ipi(void *info)
640 {
641         struct flush_tlb_data *fd = info;
642
643         local_flush_tlb_kernel_range(fd->addr1, fd->addr2);
644 }
645
646 void flush_tlb_kernel_range(unsigned long start, unsigned long end)
647 {
648         struct flush_tlb_data fd = {
649                 .addr1 = start,
650                 .addr2 = end,
651         };
652
653         on_each_cpu(flush_tlb_kernel_range_ipi, &fd, 1);
654 }
655
656 static void flush_tlb_page_ipi(void *info)
657 {
658         struct flush_tlb_data *fd = info;
659
660         local_flush_tlb_page(fd->vma, fd->addr1);
661 }
662
663 void flush_tlb_page(struct vm_area_struct *vma, unsigned long page)
664 {
665         preempt_disable();
666         if ((atomic_read(&vma->vm_mm->mm_users) != 1) || (current->mm != vma->vm_mm)) {
667                 struct flush_tlb_data fd = {
668                         .vma = vma,
669                         .addr1 = page,
670                 };
671
672                 on_each_cpu_mask(mm_cpumask(vma->vm_mm), flush_tlb_page_ipi, &fd, 1);
673         } else {
674                 unsigned int cpu;
675
676                 for_each_online_cpu(cpu) {
677                         if (cpu != smp_processor_id() && cpu_context(cpu, vma->vm_mm))
678                                 cpu_context(cpu, vma->vm_mm) = 0;
679                 }
680                 local_flush_tlb_page(vma, page);
681         }
682         preempt_enable();
683 }
684 EXPORT_SYMBOL(flush_tlb_page);
685
686 static void flush_tlb_one_ipi(void *info)
687 {
688         unsigned long vaddr = (unsigned long) info;
689
690         local_flush_tlb_one(vaddr);
691 }
692
693 void flush_tlb_one(unsigned long vaddr)
694 {
695         on_each_cpu(flush_tlb_one_ipi, (void *)vaddr, 1);
696 }
697 EXPORT_SYMBOL(flush_tlb_one);