powerpc/mm: Avoid calling arch_enter/leave_lazy_mmu() in set_ptes
[platform/kernel/linux-starfive.git] / drivers / xen / events / events_base.c
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Xen event channels
4  *
5  * Xen models interrupts with abstract event channels.  Because each
6  * domain gets 1024 event channels, but NR_IRQ is not that large, we
7  * must dynamically map irqs<->event channels.  The event channels
8  * interface with the rest of the kernel by defining a xen interrupt
9  * chip.  When an event is received, it is mapped to an irq and sent
10  * through the normal interrupt processing path.
11  *
12  * There are four kinds of events which can be mapped to an event
13  * channel:
14  *
15  * 1. Inter-domain notifications.  This includes all the virtual
16  *    device events, since they're driven by front-ends in another domain
17  *    (typically dom0).
18  * 2. VIRQs, typically used for timers.  These are per-cpu events.
19  * 3. IPIs.
20  * 4. PIRQs - Hardware interrupts.
21  *
22  * Jeremy Fitzhardinge <jeremy@xensource.com>, XenSource Inc, 2007
23  */
24
25 #define pr_fmt(fmt) "xen:" KBUILD_MODNAME ": " fmt
26
27 #include <linux/linkage.h>
28 #include <linux/interrupt.h>
29 #include <linux/irq.h>
30 #include <linux/moduleparam.h>
31 #include <linux/string.h>
32 #include <linux/memblock.h>
33 #include <linux/slab.h>
34 #include <linux/irqnr.h>
35 #include <linux/pci.h>
36 #include <linux/spinlock.h>
37 #include <linux/cpuhotplug.h>
38 #include <linux/atomic.h>
39 #include <linux/ktime.h>
40
41 #ifdef CONFIG_X86
42 #include <asm/desc.h>
43 #include <asm/ptrace.h>
44 #include <asm/idtentry.h>
45 #include <asm/irq.h>
46 #include <asm/io_apic.h>
47 #include <asm/i8259.h>
48 #include <asm/xen/cpuid.h>
49 #include <asm/xen/pci.h>
50 #endif
51 #include <asm/sync_bitops.h>
52 #include <asm/xen/hypercall.h>
53 #include <asm/xen/hypervisor.h>
54 #include <xen/page.h>
55
56 #include <xen/xen.h>
57 #include <xen/hvm.h>
58 #include <xen/xen-ops.h>
59 #include <xen/events.h>
60 #include <xen/interface/xen.h>
61 #include <xen/interface/event_channel.h>
62 #include <xen/interface/hvm/hvm_op.h>
63 #include <xen/interface/hvm/params.h>
64 #include <xen/interface/physdev.h>
65 #include <xen/interface/sched.h>
66 #include <xen/interface/vcpu.h>
67 #include <xen/xenbus.h>
68 #include <asm/hw_irq.h>
69
70 #include "events_internal.h"
71
72 #undef MODULE_PARAM_PREFIX
73 #define MODULE_PARAM_PREFIX "xen."
74
75 /* Interrupt types. */
76 enum xen_irq_type {
77         IRQT_UNBOUND = 0,
78         IRQT_PIRQ,
79         IRQT_VIRQ,
80         IRQT_IPI,
81         IRQT_EVTCHN
82 };
83
84 /*
85  * Packed IRQ information:
86  * type - enum xen_irq_type
87  * event channel - irq->event channel mapping
88  * cpu - cpu this event channel is bound to
89  * index - type-specific information:
90  *    PIRQ - vector, with MSB being "needs EIO", or physical IRQ of the HVM
91  *           guest, or GSI (real passthrough IRQ) of the device.
92  *    VIRQ - virq number
93  *    IPI - IPI vector
94  *    EVTCHN -
95  */
96 struct irq_info {
97         struct list_head list;
98         struct list_head eoi_list;
99         short refcnt;
100         u8 spurious_cnt;
101         u8 is_accounted;
102         short type;             /* type: IRQT_* */
103         u8 mask_reason;         /* Why is event channel masked */
104 #define EVT_MASK_REASON_EXPLICIT        0x01
105 #define EVT_MASK_REASON_TEMPORARY       0x02
106 #define EVT_MASK_REASON_EOI_PENDING     0x04
107         u8 is_active;           /* Is event just being handled? */
108         unsigned irq;
109         evtchn_port_t evtchn;   /* event channel */
110         unsigned short cpu;     /* cpu bound */
111         unsigned short eoi_cpu; /* EOI must happen on this cpu-1 */
112         unsigned int irq_epoch; /* If eoi_cpu valid: irq_epoch of event */
113         u64 eoi_time;           /* Time in jiffies when to EOI. */
114         raw_spinlock_t lock;
115         bool is_static;           /* Is event channel static */
116
117         union {
118                 unsigned short virq;
119                 enum ipi_vector ipi;
120                 struct {
121                         unsigned short pirq;
122                         unsigned short gsi;
123                         unsigned char vector;
124                         unsigned char flags;
125                         uint16_t domid;
126                 } pirq;
127                 struct xenbus_device *interdomain;
128         } u;
129 };
130
131 #define PIRQ_NEEDS_EOI  (1 << 0)
132 #define PIRQ_SHAREABLE  (1 << 1)
133 #define PIRQ_MSI_GROUP  (1 << 2)
134
135 static uint __read_mostly event_loop_timeout = 2;
136 module_param(event_loop_timeout, uint, 0644);
137
138 static uint __read_mostly event_eoi_delay = 10;
139 module_param(event_eoi_delay, uint, 0644);
140
141 const struct evtchn_ops *evtchn_ops;
142
143 /*
144  * This lock protects updates to the following mapping and reference-count
145  * arrays. The lock does not need to be acquired to read the mapping tables.
146  */
147 static DEFINE_MUTEX(irq_mapping_update_lock);
148
149 /*
150  * Lock protecting event handling loop against removing event channels.
151  * Adding of event channels is no issue as the associated IRQ becomes active
152  * only after everything is setup (before request_[threaded_]irq() the handler
153  * can't be entered for an event, as the event channel will be unmasked only
154  * then).
155  */
156 static DEFINE_RWLOCK(evtchn_rwlock);
157
158 /*
159  * Lock hierarchy:
160  *
161  * irq_mapping_update_lock
162  *   evtchn_rwlock
163  *     IRQ-desc lock
164  *       percpu eoi_list_lock
165  *         irq_info->lock
166  */
167
168 static LIST_HEAD(xen_irq_list_head);
169
170 /* IRQ <-> VIRQ mapping. */
171 static DEFINE_PER_CPU(int [NR_VIRQS], virq_to_irq) = {[0 ... NR_VIRQS-1] = -1};
172
173 /* IRQ <-> IPI mapping */
174 static DEFINE_PER_CPU(int [XEN_NR_IPIS], ipi_to_irq) = {[0 ... XEN_NR_IPIS-1] = -1};
175
176 /* Event channel distribution data */
177 static atomic_t channels_on_cpu[NR_CPUS];
178
179 static int **evtchn_to_irq;
180 #ifdef CONFIG_X86
181 static unsigned long *pirq_eoi_map;
182 #endif
183 static bool (*pirq_needs_eoi)(unsigned irq);
184
185 #define EVTCHN_ROW(e)  (e / (PAGE_SIZE/sizeof(**evtchn_to_irq)))
186 #define EVTCHN_COL(e)  (e % (PAGE_SIZE/sizeof(**evtchn_to_irq)))
187 #define EVTCHN_PER_ROW (PAGE_SIZE / sizeof(**evtchn_to_irq))
188
189 /* Xen will never allocate port zero for any purpose. */
190 #define VALID_EVTCHN(chn)       ((chn) != 0)
191
192 static struct irq_info *legacy_info_ptrs[NR_IRQS_LEGACY];
193
194 static struct irq_chip xen_dynamic_chip;
195 static struct irq_chip xen_lateeoi_chip;
196 static struct irq_chip xen_percpu_chip;
197 static struct irq_chip xen_pirq_chip;
198 static void enable_dynirq(struct irq_data *data);
199 static void disable_dynirq(struct irq_data *data);
200
201 static DEFINE_PER_CPU(unsigned int, irq_epoch);
202
203 static void clear_evtchn_to_irq_row(int *evtchn_row)
204 {
205         unsigned col;
206
207         for (col = 0; col < EVTCHN_PER_ROW; col++)
208                 WRITE_ONCE(evtchn_row[col], -1);
209 }
210
211 static void clear_evtchn_to_irq_all(void)
212 {
213         unsigned row;
214
215         for (row = 0; row < EVTCHN_ROW(xen_evtchn_max_channels()); row++) {
216                 if (evtchn_to_irq[row] == NULL)
217                         continue;
218                 clear_evtchn_to_irq_row(evtchn_to_irq[row]);
219         }
220 }
221
222 static int set_evtchn_to_irq(evtchn_port_t evtchn, unsigned int irq)
223 {
224         unsigned row;
225         unsigned col;
226         int *evtchn_row;
227
228         if (evtchn >= xen_evtchn_max_channels())
229                 return -EINVAL;
230
231         row = EVTCHN_ROW(evtchn);
232         col = EVTCHN_COL(evtchn);
233
234         if (evtchn_to_irq[row] == NULL) {
235                 /* Unallocated irq entries return -1 anyway */
236                 if (irq == -1)
237                         return 0;
238
239                 evtchn_row = (int *) __get_free_pages(GFP_KERNEL, 0);
240                 if (evtchn_row == NULL)
241                         return -ENOMEM;
242
243                 clear_evtchn_to_irq_row(evtchn_row);
244
245                 /*
246                  * We've prepared an empty row for the mapping. If a different
247                  * thread was faster inserting it, we can drop ours.
248                  */
249                 if (cmpxchg(&evtchn_to_irq[row], NULL, evtchn_row) != NULL)
250                         free_page((unsigned long) evtchn_row);
251         }
252
253         WRITE_ONCE(evtchn_to_irq[row][col], irq);
254         return 0;
255 }
256
257 int get_evtchn_to_irq(evtchn_port_t evtchn)
258 {
259         if (evtchn >= xen_evtchn_max_channels())
260                 return -1;
261         if (evtchn_to_irq[EVTCHN_ROW(evtchn)] == NULL)
262                 return -1;
263         return READ_ONCE(evtchn_to_irq[EVTCHN_ROW(evtchn)][EVTCHN_COL(evtchn)]);
264 }
265
266 /* Get info for IRQ */
267 static struct irq_info *info_for_irq(unsigned irq)
268 {
269         if (irq < nr_legacy_irqs())
270                 return legacy_info_ptrs[irq];
271         else
272                 return irq_get_chip_data(irq);
273 }
274
275 static void set_info_for_irq(unsigned int irq, struct irq_info *info)
276 {
277         if (irq < nr_legacy_irqs())
278                 legacy_info_ptrs[irq] = info;
279         else
280                 irq_set_chip_data(irq, info);
281 }
282
283 /* Per CPU channel accounting */
284 static void channels_on_cpu_dec(struct irq_info *info)
285 {
286         if (!info->is_accounted)
287                 return;
288
289         info->is_accounted = 0;
290
291         if (WARN_ON_ONCE(info->cpu >= nr_cpu_ids))
292                 return;
293
294         WARN_ON_ONCE(!atomic_add_unless(&channels_on_cpu[info->cpu], -1 , 0));
295 }
296
297 static void channels_on_cpu_inc(struct irq_info *info)
298 {
299         if (WARN_ON_ONCE(info->cpu >= nr_cpu_ids))
300                 return;
301
302         if (WARN_ON_ONCE(!atomic_add_unless(&channels_on_cpu[info->cpu], 1,
303                                             INT_MAX)))
304                 return;
305
306         info->is_accounted = 1;
307 }
308
309 /* Constructors for packed IRQ information. */
310 static int xen_irq_info_common_setup(struct irq_info *info,
311                                      unsigned irq,
312                                      enum xen_irq_type type,
313                                      evtchn_port_t evtchn,
314                                      unsigned short cpu)
315 {
316         int ret;
317
318         BUG_ON(info->type != IRQT_UNBOUND && info->type != type);
319
320         info->type = type;
321         info->irq = irq;
322         info->evtchn = evtchn;
323         info->cpu = cpu;
324         info->mask_reason = EVT_MASK_REASON_EXPLICIT;
325         raw_spin_lock_init(&info->lock);
326
327         ret = set_evtchn_to_irq(evtchn, irq);
328         if (ret < 0)
329                 return ret;
330
331         irq_clear_status_flags(irq, IRQ_NOREQUEST|IRQ_NOAUTOEN);
332
333         return xen_evtchn_port_setup(evtchn);
334 }
335
336 static int xen_irq_info_evtchn_setup(unsigned irq,
337                                      evtchn_port_t evtchn,
338                                      struct xenbus_device *dev)
339 {
340         struct irq_info *info = info_for_irq(irq);
341         int ret;
342
343         ret = xen_irq_info_common_setup(info, irq, IRQT_EVTCHN, evtchn, 0);
344         info->u.interdomain = dev;
345         if (dev)
346                 atomic_inc(&dev->event_channels);
347
348         return ret;
349 }
350
351 static int xen_irq_info_ipi_setup(unsigned cpu,
352                                   unsigned irq,
353                                   evtchn_port_t evtchn,
354                                   enum ipi_vector ipi)
355 {
356         struct irq_info *info = info_for_irq(irq);
357
358         info->u.ipi = ipi;
359
360         per_cpu(ipi_to_irq, cpu)[ipi] = irq;
361
362         return xen_irq_info_common_setup(info, irq, IRQT_IPI, evtchn, 0);
363 }
364
365 static int xen_irq_info_virq_setup(unsigned cpu,
366                                    unsigned irq,
367                                    evtchn_port_t evtchn,
368                                    unsigned virq)
369 {
370         struct irq_info *info = info_for_irq(irq);
371
372         info->u.virq = virq;
373
374         per_cpu(virq_to_irq, cpu)[virq] = irq;
375
376         return xen_irq_info_common_setup(info, irq, IRQT_VIRQ, evtchn, 0);
377 }
378
379 static int xen_irq_info_pirq_setup(unsigned irq,
380                                    evtchn_port_t evtchn,
381                                    unsigned pirq,
382                                    unsigned gsi,
383                                    uint16_t domid,
384                                    unsigned char flags)
385 {
386         struct irq_info *info = info_for_irq(irq);
387
388         info->u.pirq.pirq = pirq;
389         info->u.pirq.gsi = gsi;
390         info->u.pirq.domid = domid;
391         info->u.pirq.flags = flags;
392
393         return xen_irq_info_common_setup(info, irq, IRQT_PIRQ, evtchn, 0);
394 }
395
396 static void xen_irq_info_cleanup(struct irq_info *info)
397 {
398         set_evtchn_to_irq(info->evtchn, -1);
399         xen_evtchn_port_remove(info->evtchn, info->cpu);
400         info->evtchn = 0;
401         channels_on_cpu_dec(info);
402 }
403
404 /*
405  * Accessors for packed IRQ information.
406  */
407 evtchn_port_t evtchn_from_irq(unsigned irq)
408 {
409         const struct irq_info *info = NULL;
410
411         if (likely(irq < nr_irqs))
412                 info = info_for_irq(irq);
413         if (!info)
414                 return 0;
415
416         return info->evtchn;
417 }
418
419 unsigned int irq_from_evtchn(evtchn_port_t evtchn)
420 {
421         return get_evtchn_to_irq(evtchn);
422 }
423 EXPORT_SYMBOL_GPL(irq_from_evtchn);
424
425 int irq_from_virq(unsigned int cpu, unsigned int virq)
426 {
427         return per_cpu(virq_to_irq, cpu)[virq];
428 }
429
430 static enum ipi_vector ipi_from_irq(unsigned irq)
431 {
432         struct irq_info *info = info_for_irq(irq);
433
434         BUG_ON(info == NULL);
435         BUG_ON(info->type != IRQT_IPI);
436
437         return info->u.ipi;
438 }
439
440 static unsigned virq_from_irq(unsigned irq)
441 {
442         struct irq_info *info = info_for_irq(irq);
443
444         BUG_ON(info == NULL);
445         BUG_ON(info->type != IRQT_VIRQ);
446
447         return info->u.virq;
448 }
449
450 static unsigned pirq_from_irq(unsigned irq)
451 {
452         struct irq_info *info = info_for_irq(irq);
453
454         BUG_ON(info == NULL);
455         BUG_ON(info->type != IRQT_PIRQ);
456
457         return info->u.pirq.pirq;
458 }
459
460 static enum xen_irq_type type_from_irq(unsigned irq)
461 {
462         return info_for_irq(irq)->type;
463 }
464
465 static unsigned cpu_from_irq(unsigned irq)
466 {
467         return info_for_irq(irq)->cpu;
468 }
469
470 unsigned int cpu_from_evtchn(evtchn_port_t evtchn)
471 {
472         int irq = get_evtchn_to_irq(evtchn);
473         unsigned ret = 0;
474
475         if (irq != -1)
476                 ret = cpu_from_irq(irq);
477
478         return ret;
479 }
480
481 static void do_mask(struct irq_info *info, u8 reason)
482 {
483         unsigned long flags;
484
485         raw_spin_lock_irqsave(&info->lock, flags);
486
487         if (!info->mask_reason)
488                 mask_evtchn(info->evtchn);
489
490         info->mask_reason |= reason;
491
492         raw_spin_unlock_irqrestore(&info->lock, flags);
493 }
494
495 static void do_unmask(struct irq_info *info, u8 reason)
496 {
497         unsigned long flags;
498
499         raw_spin_lock_irqsave(&info->lock, flags);
500
501         info->mask_reason &= ~reason;
502
503         if (!info->mask_reason)
504                 unmask_evtchn(info->evtchn);
505
506         raw_spin_unlock_irqrestore(&info->lock, flags);
507 }
508
509 #ifdef CONFIG_X86
510 static bool pirq_check_eoi_map(unsigned irq)
511 {
512         return test_bit(pirq_from_irq(irq), pirq_eoi_map);
513 }
514 #endif
515
516 static bool pirq_needs_eoi_flag(unsigned irq)
517 {
518         struct irq_info *info = info_for_irq(irq);
519         BUG_ON(info->type != IRQT_PIRQ);
520
521         return info->u.pirq.flags & PIRQ_NEEDS_EOI;
522 }
523
524 static void bind_evtchn_to_cpu(evtchn_port_t evtchn, unsigned int cpu,
525                                bool force_affinity)
526 {
527         int irq = get_evtchn_to_irq(evtchn);
528         struct irq_info *info = info_for_irq(irq);
529
530         BUG_ON(irq == -1);
531
532         if (IS_ENABLED(CONFIG_SMP) && force_affinity) {
533                 struct irq_data *data = irq_get_irq_data(irq);
534
535                 irq_data_update_affinity(data, cpumask_of(cpu));
536                 irq_data_update_effective_affinity(data, cpumask_of(cpu));
537         }
538
539         xen_evtchn_port_bind_to_cpu(evtchn, cpu, info->cpu);
540
541         channels_on_cpu_dec(info);
542         info->cpu = cpu;
543         channels_on_cpu_inc(info);
544 }
545
546 /**
547  * notify_remote_via_irq - send event to remote end of event channel via irq
548  * @irq: irq of event channel to send event to
549  *
550  * Unlike notify_remote_via_evtchn(), this is safe to use across
551  * save/restore. Notifications on a broken connection are silently
552  * dropped.
553  */
554 void notify_remote_via_irq(int irq)
555 {
556         evtchn_port_t evtchn = evtchn_from_irq(irq);
557
558         if (VALID_EVTCHN(evtchn))
559                 notify_remote_via_evtchn(evtchn);
560 }
561 EXPORT_SYMBOL_GPL(notify_remote_via_irq);
562
563 struct lateeoi_work {
564         struct delayed_work delayed;
565         spinlock_t eoi_list_lock;
566         struct list_head eoi_list;
567 };
568
569 static DEFINE_PER_CPU(struct lateeoi_work, lateeoi);
570
571 static void lateeoi_list_del(struct irq_info *info)
572 {
573         struct lateeoi_work *eoi = &per_cpu(lateeoi, info->eoi_cpu);
574         unsigned long flags;
575
576         spin_lock_irqsave(&eoi->eoi_list_lock, flags);
577         list_del_init(&info->eoi_list);
578         spin_unlock_irqrestore(&eoi->eoi_list_lock, flags);
579 }
580
581 static void lateeoi_list_add(struct irq_info *info)
582 {
583         struct lateeoi_work *eoi = &per_cpu(lateeoi, info->eoi_cpu);
584         struct irq_info *elem;
585         u64 now = get_jiffies_64();
586         unsigned long delay;
587         unsigned long flags;
588
589         if (now < info->eoi_time)
590                 delay = info->eoi_time - now;
591         else
592                 delay = 1;
593
594         spin_lock_irqsave(&eoi->eoi_list_lock, flags);
595
596         if (list_empty(&eoi->eoi_list)) {
597                 list_add(&info->eoi_list, &eoi->eoi_list);
598                 mod_delayed_work_on(info->eoi_cpu, system_wq,
599                                     &eoi->delayed, delay);
600         } else {
601                 list_for_each_entry_reverse(elem, &eoi->eoi_list, eoi_list) {
602                         if (elem->eoi_time <= info->eoi_time)
603                                 break;
604                 }
605                 list_add(&info->eoi_list, &elem->eoi_list);
606         }
607
608         spin_unlock_irqrestore(&eoi->eoi_list_lock, flags);
609 }
610
611 static void xen_irq_lateeoi_locked(struct irq_info *info, bool spurious)
612 {
613         evtchn_port_t evtchn;
614         unsigned int cpu;
615         unsigned int delay = 0;
616
617         evtchn = info->evtchn;
618         if (!VALID_EVTCHN(evtchn) || !list_empty(&info->eoi_list))
619                 return;
620
621         if (spurious) {
622                 struct xenbus_device *dev = info->u.interdomain;
623                 unsigned int threshold = 1;
624
625                 if (dev && dev->spurious_threshold)
626                         threshold = dev->spurious_threshold;
627
628                 if ((1 << info->spurious_cnt) < (HZ << 2)) {
629                         if (info->spurious_cnt != 0xFF)
630                                 info->spurious_cnt++;
631                 }
632                 if (info->spurious_cnt > threshold) {
633                         delay = 1 << (info->spurious_cnt - 1 - threshold);
634                         if (delay > HZ)
635                                 delay = HZ;
636                         if (!info->eoi_time)
637                                 info->eoi_cpu = smp_processor_id();
638                         info->eoi_time = get_jiffies_64() + delay;
639                         if (dev)
640                                 atomic_add(delay, &dev->jiffies_eoi_delayed);
641                 }
642                 if (dev)
643                         atomic_inc(&dev->spurious_events);
644         } else {
645                 info->spurious_cnt = 0;
646         }
647
648         cpu = info->eoi_cpu;
649         if (info->eoi_time &&
650             (info->irq_epoch == per_cpu(irq_epoch, cpu) || delay)) {
651                 lateeoi_list_add(info);
652                 return;
653         }
654
655         info->eoi_time = 0;
656
657         /* is_active hasn't been reset yet, do it now. */
658         smp_store_release(&info->is_active, 0);
659         do_unmask(info, EVT_MASK_REASON_EOI_PENDING);
660 }
661
662 static void xen_irq_lateeoi_worker(struct work_struct *work)
663 {
664         struct lateeoi_work *eoi;
665         struct irq_info *info;
666         u64 now = get_jiffies_64();
667         unsigned long flags;
668
669         eoi = container_of(to_delayed_work(work), struct lateeoi_work, delayed);
670
671         read_lock_irqsave(&evtchn_rwlock, flags);
672
673         while (true) {
674                 spin_lock(&eoi->eoi_list_lock);
675
676                 info = list_first_entry_or_null(&eoi->eoi_list, struct irq_info,
677                                                 eoi_list);
678
679                 if (info == NULL || now < info->eoi_time) {
680                         spin_unlock(&eoi->eoi_list_lock);
681                         break;
682                 }
683
684                 list_del_init(&info->eoi_list);
685
686                 spin_unlock(&eoi->eoi_list_lock);
687
688                 info->eoi_time = 0;
689
690                 xen_irq_lateeoi_locked(info, false);
691         }
692
693         if (info)
694                 mod_delayed_work_on(info->eoi_cpu, system_wq,
695                                     &eoi->delayed, info->eoi_time - now);
696
697         read_unlock_irqrestore(&evtchn_rwlock, flags);
698 }
699
700 static void xen_cpu_init_eoi(unsigned int cpu)
701 {
702         struct lateeoi_work *eoi = &per_cpu(lateeoi, cpu);
703
704         INIT_DELAYED_WORK(&eoi->delayed, xen_irq_lateeoi_worker);
705         spin_lock_init(&eoi->eoi_list_lock);
706         INIT_LIST_HEAD(&eoi->eoi_list);
707 }
708
709 void xen_irq_lateeoi(unsigned int irq, unsigned int eoi_flags)
710 {
711         struct irq_info *info;
712         unsigned long flags;
713
714         read_lock_irqsave(&evtchn_rwlock, flags);
715
716         info = info_for_irq(irq);
717
718         if (info)
719                 xen_irq_lateeoi_locked(info, eoi_flags & XEN_EOI_FLAG_SPURIOUS);
720
721         read_unlock_irqrestore(&evtchn_rwlock, flags);
722 }
723 EXPORT_SYMBOL_GPL(xen_irq_lateeoi);
724
725 static void xen_irq_init(unsigned irq)
726 {
727         struct irq_info *info;
728
729         info = kzalloc(sizeof(*info), GFP_KERNEL);
730         if (info == NULL)
731                 panic("Unable to allocate metadata for IRQ%d\n", irq);
732
733         info->type = IRQT_UNBOUND;
734         info->refcnt = -1;
735
736         set_info_for_irq(irq, info);
737         /*
738          * Interrupt affinity setting can be immediate. No point
739          * in delaying it until an interrupt is handled.
740          */
741         irq_set_status_flags(irq, IRQ_MOVE_PCNTXT);
742
743         INIT_LIST_HEAD(&info->eoi_list);
744         list_add_tail(&info->list, &xen_irq_list_head);
745 }
746
747 static int __must_check xen_allocate_irqs_dynamic(int nvec)
748 {
749         int i, irq = irq_alloc_descs(-1, 0, nvec, -1);
750
751         if (irq >= 0) {
752                 for (i = 0; i < nvec; i++)
753                         xen_irq_init(irq + i);
754         }
755
756         return irq;
757 }
758
759 static inline int __must_check xen_allocate_irq_dynamic(void)
760 {
761
762         return xen_allocate_irqs_dynamic(1);
763 }
764
765 static int __must_check xen_allocate_irq_gsi(unsigned gsi)
766 {
767         int irq;
768
769         /*
770          * A PV guest has no concept of a GSI (since it has no ACPI
771          * nor access to/knowledge of the physical APICs). Therefore
772          * all IRQs are dynamically allocated from the entire IRQ
773          * space.
774          */
775         if (xen_pv_domain() && !xen_initial_domain())
776                 return xen_allocate_irq_dynamic();
777
778         /* Legacy IRQ descriptors are already allocated by the arch. */
779         if (gsi < nr_legacy_irqs())
780                 irq = gsi;
781         else
782                 irq = irq_alloc_desc_at(gsi, -1);
783
784         xen_irq_init(irq);
785
786         return irq;
787 }
788
789 static void xen_free_irq(unsigned irq)
790 {
791         struct irq_info *info = info_for_irq(irq);
792         unsigned long flags;
793
794         if (WARN_ON(!info))
795                 return;
796
797         write_lock_irqsave(&evtchn_rwlock, flags);
798
799         if (!list_empty(&info->eoi_list))
800                 lateeoi_list_del(info);
801
802         list_del(&info->list);
803
804         set_info_for_irq(irq, NULL);
805
806         WARN_ON(info->refcnt > 0);
807
808         write_unlock_irqrestore(&evtchn_rwlock, flags);
809
810         kfree(info);
811
812         /* Legacy IRQ descriptors are managed by the arch. */
813         if (irq < nr_legacy_irqs())
814                 return;
815
816         irq_free_desc(irq);
817 }
818
819 /* Not called for lateeoi events. */
820 static void event_handler_exit(struct irq_info *info)
821 {
822         smp_store_release(&info->is_active, 0);
823         clear_evtchn(info->evtchn);
824 }
825
826 static void pirq_query_unmask(int irq)
827 {
828         struct physdev_irq_status_query irq_status;
829         struct irq_info *info = info_for_irq(irq);
830
831         BUG_ON(info->type != IRQT_PIRQ);
832
833         irq_status.irq = pirq_from_irq(irq);
834         if (HYPERVISOR_physdev_op(PHYSDEVOP_irq_status_query, &irq_status))
835                 irq_status.flags = 0;
836
837         info->u.pirq.flags &= ~PIRQ_NEEDS_EOI;
838         if (irq_status.flags & XENIRQSTAT_needs_eoi)
839                 info->u.pirq.flags |= PIRQ_NEEDS_EOI;
840 }
841
842 static void eoi_pirq(struct irq_data *data)
843 {
844         struct irq_info *info = info_for_irq(data->irq);
845         evtchn_port_t evtchn = info ? info->evtchn : 0;
846         struct physdev_eoi eoi = { .irq = pirq_from_irq(data->irq) };
847         int rc = 0;
848
849         if (!VALID_EVTCHN(evtchn))
850                 return;
851
852         event_handler_exit(info);
853
854         if (pirq_needs_eoi(data->irq)) {
855                 rc = HYPERVISOR_physdev_op(PHYSDEVOP_eoi, &eoi);
856                 WARN_ON(rc);
857         }
858 }
859
860 static void mask_ack_pirq(struct irq_data *data)
861 {
862         disable_dynirq(data);
863         eoi_pirq(data);
864 }
865
866 static unsigned int __startup_pirq(unsigned int irq)
867 {
868         struct evtchn_bind_pirq bind_pirq;
869         struct irq_info *info = info_for_irq(irq);
870         evtchn_port_t evtchn = evtchn_from_irq(irq);
871         int rc;
872
873         BUG_ON(info->type != IRQT_PIRQ);
874
875         if (VALID_EVTCHN(evtchn))
876                 goto out;
877
878         bind_pirq.pirq = pirq_from_irq(irq);
879         /* NB. We are happy to share unless we are probing. */
880         bind_pirq.flags = info->u.pirq.flags & PIRQ_SHAREABLE ?
881                                         BIND_PIRQ__WILL_SHARE : 0;
882         rc = HYPERVISOR_event_channel_op(EVTCHNOP_bind_pirq, &bind_pirq);
883         if (rc != 0) {
884                 pr_warn("Failed to obtain physical IRQ %d\n", irq);
885                 return 0;
886         }
887         evtchn = bind_pirq.port;
888
889         pirq_query_unmask(irq);
890
891         rc = set_evtchn_to_irq(evtchn, irq);
892         if (rc)
893                 goto err;
894
895         info->evtchn = evtchn;
896         bind_evtchn_to_cpu(evtchn, 0, false);
897
898         rc = xen_evtchn_port_setup(evtchn);
899         if (rc)
900                 goto err;
901
902 out:
903         do_unmask(info, EVT_MASK_REASON_EXPLICIT);
904
905         eoi_pirq(irq_get_irq_data(irq));
906
907         return 0;
908
909 err:
910         pr_err("irq%d: Failed to set port to irq mapping (%d)\n", irq, rc);
911         xen_evtchn_close(evtchn);
912         return 0;
913 }
914
915 static unsigned int startup_pirq(struct irq_data *data)
916 {
917         return __startup_pirq(data->irq);
918 }
919
920 static void shutdown_pirq(struct irq_data *data)
921 {
922         unsigned int irq = data->irq;
923         struct irq_info *info = info_for_irq(irq);
924         evtchn_port_t evtchn = evtchn_from_irq(irq);
925
926         BUG_ON(info->type != IRQT_PIRQ);
927
928         if (!VALID_EVTCHN(evtchn))
929                 return;
930
931         do_mask(info, EVT_MASK_REASON_EXPLICIT);
932         xen_evtchn_close(evtchn);
933         xen_irq_info_cleanup(info);
934 }
935
936 static void enable_pirq(struct irq_data *data)
937 {
938         enable_dynirq(data);
939 }
940
941 static void disable_pirq(struct irq_data *data)
942 {
943         disable_dynirq(data);
944 }
945
946 int xen_irq_from_gsi(unsigned gsi)
947 {
948         struct irq_info *info;
949
950         list_for_each_entry(info, &xen_irq_list_head, list) {
951                 if (info->type != IRQT_PIRQ)
952                         continue;
953
954                 if (info->u.pirq.gsi == gsi)
955                         return info->irq;
956         }
957
958         return -1;
959 }
960 EXPORT_SYMBOL_GPL(xen_irq_from_gsi);
961
962 static void __unbind_from_irq(unsigned int irq)
963 {
964         evtchn_port_t evtchn = evtchn_from_irq(irq);
965         struct irq_info *info = info_for_irq(irq);
966
967         if (info->refcnt > 0) {
968                 info->refcnt--;
969                 if (info->refcnt != 0)
970                         return;
971         }
972
973         if (VALID_EVTCHN(evtchn)) {
974                 unsigned int cpu = cpu_from_irq(irq);
975                 struct xenbus_device *dev;
976
977                 if (!info->is_static)
978                         xen_evtchn_close(evtchn);
979
980                 switch (type_from_irq(irq)) {
981                 case IRQT_VIRQ:
982                         per_cpu(virq_to_irq, cpu)[virq_from_irq(irq)] = -1;
983                         break;
984                 case IRQT_IPI:
985                         per_cpu(ipi_to_irq, cpu)[ipi_from_irq(irq)] = -1;
986                         break;
987                 case IRQT_EVTCHN:
988                         dev = info->u.interdomain;
989                         if (dev)
990                                 atomic_dec(&dev->event_channels);
991                         break;
992                 default:
993                         break;
994                 }
995
996                 xen_irq_info_cleanup(info);
997         }
998
999         xen_free_irq(irq);
1000 }
1001
1002 /*
1003  * Do not make any assumptions regarding the relationship between the
1004  * IRQ number returned here and the Xen pirq argument.
1005  *
1006  * Note: We don't assign an event channel until the irq actually started
1007  * up.  Return an existing irq if we've already got one for the gsi.
1008  *
1009  * Shareable implies level triggered, not shareable implies edge
1010  * triggered here.
1011  */
1012 int xen_bind_pirq_gsi_to_irq(unsigned gsi,
1013                              unsigned pirq, int shareable, char *name)
1014 {
1015         int irq;
1016         struct physdev_irq irq_op;
1017         int ret;
1018
1019         mutex_lock(&irq_mapping_update_lock);
1020
1021         irq = xen_irq_from_gsi(gsi);
1022         if (irq != -1) {
1023                 pr_info("%s: returning irq %d for gsi %u\n",
1024                         __func__, irq, gsi);
1025                 goto out;
1026         }
1027
1028         irq = xen_allocate_irq_gsi(gsi);
1029         if (irq < 0)
1030                 goto out;
1031
1032         irq_op.irq = irq;
1033         irq_op.vector = 0;
1034
1035         /* Only the privileged domain can do this. For non-priv, the pcifront
1036          * driver provides a PCI bus that does the call to do exactly
1037          * this in the priv domain. */
1038         if (xen_initial_domain() &&
1039             HYPERVISOR_physdev_op(PHYSDEVOP_alloc_irq_vector, &irq_op)) {
1040                 xen_free_irq(irq);
1041                 irq = -ENOSPC;
1042                 goto out;
1043         }
1044
1045         ret = xen_irq_info_pirq_setup(irq, 0, pirq, gsi, DOMID_SELF,
1046                                shareable ? PIRQ_SHAREABLE : 0);
1047         if (ret < 0) {
1048                 __unbind_from_irq(irq);
1049                 irq = ret;
1050                 goto out;
1051         }
1052
1053         pirq_query_unmask(irq);
1054         /* We try to use the handler with the appropriate semantic for the
1055          * type of interrupt: if the interrupt is an edge triggered
1056          * interrupt we use handle_edge_irq.
1057          *
1058          * On the other hand if the interrupt is level triggered we use
1059          * handle_fasteoi_irq like the native code does for this kind of
1060          * interrupts.
1061          *
1062          * Depending on the Xen version, pirq_needs_eoi might return true
1063          * not only for level triggered interrupts but for edge triggered
1064          * interrupts too. In any case Xen always honors the eoi mechanism,
1065          * not injecting any more pirqs of the same kind if the first one
1066          * hasn't received an eoi yet. Therefore using the fasteoi handler
1067          * is the right choice either way.
1068          */
1069         if (shareable)
1070                 irq_set_chip_and_handler_name(irq, &xen_pirq_chip,
1071                                 handle_fasteoi_irq, name);
1072         else
1073                 irq_set_chip_and_handler_name(irq, &xen_pirq_chip,
1074                                 handle_edge_irq, name);
1075
1076 out:
1077         mutex_unlock(&irq_mapping_update_lock);
1078
1079         return irq;
1080 }
1081
1082 #ifdef CONFIG_PCI_MSI
1083 int xen_allocate_pirq_msi(struct pci_dev *dev, struct msi_desc *msidesc)
1084 {
1085         int rc;
1086         struct physdev_get_free_pirq op_get_free_pirq;
1087
1088         op_get_free_pirq.type = MAP_PIRQ_TYPE_MSI;
1089         rc = HYPERVISOR_physdev_op(PHYSDEVOP_get_free_pirq, &op_get_free_pirq);
1090
1091         WARN_ONCE(rc == -ENOSYS,
1092                   "hypervisor does not support the PHYSDEVOP_get_free_pirq interface\n");
1093
1094         return rc ? -1 : op_get_free_pirq.pirq;
1095 }
1096
1097 int xen_bind_pirq_msi_to_irq(struct pci_dev *dev, struct msi_desc *msidesc,
1098                              int pirq, int nvec, const char *name, domid_t domid)
1099 {
1100         int i, irq, ret;
1101
1102         mutex_lock(&irq_mapping_update_lock);
1103
1104         irq = xen_allocate_irqs_dynamic(nvec);
1105         if (irq < 0)
1106                 goto out;
1107
1108         for (i = 0; i < nvec; i++) {
1109                 irq_set_chip_and_handler_name(irq + i, &xen_pirq_chip, handle_edge_irq, name);
1110
1111                 ret = xen_irq_info_pirq_setup(irq + i, 0, pirq + i, 0, domid,
1112                                               i == 0 ? 0 : PIRQ_MSI_GROUP);
1113                 if (ret < 0)
1114                         goto error_irq;
1115         }
1116
1117         ret = irq_set_msi_desc(irq, msidesc);
1118         if (ret < 0)
1119                 goto error_irq;
1120 out:
1121         mutex_unlock(&irq_mapping_update_lock);
1122         return irq;
1123 error_irq:
1124         while (nvec--)
1125                 __unbind_from_irq(irq + nvec);
1126         mutex_unlock(&irq_mapping_update_lock);
1127         return ret;
1128 }
1129 #endif
1130
1131 int xen_destroy_irq(int irq)
1132 {
1133         struct physdev_unmap_pirq unmap_irq;
1134         struct irq_info *info = info_for_irq(irq);
1135         int rc = -ENOENT;
1136
1137         mutex_lock(&irq_mapping_update_lock);
1138
1139         /*
1140          * If trying to remove a vector in a MSI group different
1141          * than the first one skip the PIRQ unmap unless this vector
1142          * is the first one in the group.
1143          */
1144         if (xen_initial_domain() && !(info->u.pirq.flags & PIRQ_MSI_GROUP)) {
1145                 unmap_irq.pirq = info->u.pirq.pirq;
1146                 unmap_irq.domid = info->u.pirq.domid;
1147                 rc = HYPERVISOR_physdev_op(PHYSDEVOP_unmap_pirq, &unmap_irq);
1148                 /* If another domain quits without making the pci_disable_msix
1149                  * call, the Xen hypervisor takes care of freeing the PIRQs
1150                  * (free_domain_pirqs).
1151                  */
1152                 if ((rc == -ESRCH && info->u.pirq.domid != DOMID_SELF))
1153                         pr_info("domain %d does not have %d anymore\n",
1154                                 info->u.pirq.domid, info->u.pirq.pirq);
1155                 else if (rc) {
1156                         pr_warn("unmap irq failed %d\n", rc);
1157                         goto out;
1158                 }
1159         }
1160
1161         xen_free_irq(irq);
1162
1163 out:
1164         mutex_unlock(&irq_mapping_update_lock);
1165         return rc;
1166 }
1167
1168 int xen_irq_from_pirq(unsigned pirq)
1169 {
1170         int irq;
1171
1172         struct irq_info *info;
1173
1174         mutex_lock(&irq_mapping_update_lock);
1175
1176         list_for_each_entry(info, &xen_irq_list_head, list) {
1177                 if (info->type != IRQT_PIRQ)
1178                         continue;
1179                 irq = info->irq;
1180                 if (info->u.pirq.pirq == pirq)
1181                         goto out;
1182         }
1183         irq = -1;
1184 out:
1185         mutex_unlock(&irq_mapping_update_lock);
1186
1187         return irq;
1188 }
1189
1190
1191 int xen_pirq_from_irq(unsigned irq)
1192 {
1193         return pirq_from_irq(irq);
1194 }
1195 EXPORT_SYMBOL_GPL(xen_pirq_from_irq);
1196
1197 static int bind_evtchn_to_irq_chip(evtchn_port_t evtchn, struct irq_chip *chip,
1198                                    struct xenbus_device *dev)
1199 {
1200         int irq;
1201         int ret;
1202
1203         if (evtchn >= xen_evtchn_max_channels())
1204                 return -ENOMEM;
1205
1206         mutex_lock(&irq_mapping_update_lock);
1207
1208         irq = get_evtchn_to_irq(evtchn);
1209
1210         if (irq == -1) {
1211                 irq = xen_allocate_irq_dynamic();
1212                 if (irq < 0)
1213                         goto out;
1214
1215                 irq_set_chip_and_handler_name(irq, chip,
1216                                               handle_edge_irq, "event");
1217
1218                 ret = xen_irq_info_evtchn_setup(irq, evtchn, dev);
1219                 if (ret < 0) {
1220                         __unbind_from_irq(irq);
1221                         irq = ret;
1222                         goto out;
1223                 }
1224                 /*
1225                  * New interdomain events are initially bound to vCPU0 This
1226                  * is required to setup the event channel in the first
1227                  * place and also important for UP guests because the
1228                  * affinity setting is not invoked on them so nothing would
1229                  * bind the channel.
1230                  */
1231                 bind_evtchn_to_cpu(evtchn, 0, false);
1232         } else {
1233                 struct irq_info *info = info_for_irq(irq);
1234                 WARN_ON(info == NULL || info->type != IRQT_EVTCHN);
1235         }
1236
1237 out:
1238         mutex_unlock(&irq_mapping_update_lock);
1239
1240         return irq;
1241 }
1242
1243 int bind_evtchn_to_irq(evtchn_port_t evtchn)
1244 {
1245         return bind_evtchn_to_irq_chip(evtchn, &xen_dynamic_chip, NULL);
1246 }
1247 EXPORT_SYMBOL_GPL(bind_evtchn_to_irq);
1248
1249 int bind_evtchn_to_irq_lateeoi(evtchn_port_t evtchn)
1250 {
1251         return bind_evtchn_to_irq_chip(evtchn, &xen_lateeoi_chip, NULL);
1252 }
1253 EXPORT_SYMBOL_GPL(bind_evtchn_to_irq_lateeoi);
1254
1255 static int bind_ipi_to_irq(unsigned int ipi, unsigned int cpu)
1256 {
1257         struct evtchn_bind_ipi bind_ipi;
1258         evtchn_port_t evtchn;
1259         int ret, irq;
1260
1261         mutex_lock(&irq_mapping_update_lock);
1262
1263         irq = per_cpu(ipi_to_irq, cpu)[ipi];
1264
1265         if (irq == -1) {
1266                 irq = xen_allocate_irq_dynamic();
1267                 if (irq < 0)
1268                         goto out;
1269
1270                 irq_set_chip_and_handler_name(irq, &xen_percpu_chip,
1271                                               handle_percpu_irq, "ipi");
1272
1273                 bind_ipi.vcpu = xen_vcpu_nr(cpu);
1274                 if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_ipi,
1275                                                 &bind_ipi) != 0)
1276                         BUG();
1277                 evtchn = bind_ipi.port;
1278
1279                 ret = xen_irq_info_ipi_setup(cpu, irq, evtchn, ipi);
1280                 if (ret < 0) {
1281                         __unbind_from_irq(irq);
1282                         irq = ret;
1283                         goto out;
1284                 }
1285                 /*
1286                  * Force the affinity mask to the target CPU so proc shows
1287                  * the correct target.
1288                  */
1289                 bind_evtchn_to_cpu(evtchn, cpu, true);
1290         } else {
1291                 struct irq_info *info = info_for_irq(irq);
1292                 WARN_ON(info == NULL || info->type != IRQT_IPI);
1293         }
1294
1295  out:
1296         mutex_unlock(&irq_mapping_update_lock);
1297         return irq;
1298 }
1299
1300 static int bind_interdomain_evtchn_to_irq_chip(struct xenbus_device *dev,
1301                                                evtchn_port_t remote_port,
1302                                                struct irq_chip *chip)
1303 {
1304         struct evtchn_bind_interdomain bind_interdomain;
1305         int err;
1306
1307         bind_interdomain.remote_dom  = dev->otherend_id;
1308         bind_interdomain.remote_port = remote_port;
1309
1310         err = HYPERVISOR_event_channel_op(EVTCHNOP_bind_interdomain,
1311                                           &bind_interdomain);
1312
1313         return err ? : bind_evtchn_to_irq_chip(bind_interdomain.local_port,
1314                                                chip, dev);
1315 }
1316
1317 int bind_interdomain_evtchn_to_irq_lateeoi(struct xenbus_device *dev,
1318                                            evtchn_port_t remote_port)
1319 {
1320         return bind_interdomain_evtchn_to_irq_chip(dev, remote_port,
1321                                                    &xen_lateeoi_chip);
1322 }
1323 EXPORT_SYMBOL_GPL(bind_interdomain_evtchn_to_irq_lateeoi);
1324
1325 static int find_virq(unsigned int virq, unsigned int cpu, evtchn_port_t *evtchn)
1326 {
1327         struct evtchn_status status;
1328         evtchn_port_t port;
1329         int rc = -ENOENT;
1330
1331         memset(&status, 0, sizeof(status));
1332         for (port = 0; port < xen_evtchn_max_channels(); port++) {
1333                 status.dom = DOMID_SELF;
1334                 status.port = port;
1335                 rc = HYPERVISOR_event_channel_op(EVTCHNOP_status, &status);
1336                 if (rc < 0)
1337                         continue;
1338                 if (status.status != EVTCHNSTAT_virq)
1339                         continue;
1340                 if (status.u.virq == virq && status.vcpu == xen_vcpu_nr(cpu)) {
1341                         *evtchn = port;
1342                         break;
1343                 }
1344         }
1345         return rc;
1346 }
1347
1348 /**
1349  * xen_evtchn_nr_channels - number of usable event channel ports
1350  *
1351  * This may be less than the maximum supported by the current
1352  * hypervisor ABI. Use xen_evtchn_max_channels() for the maximum
1353  * supported.
1354  */
1355 unsigned xen_evtchn_nr_channels(void)
1356 {
1357         return evtchn_ops->nr_channels();
1358 }
1359 EXPORT_SYMBOL_GPL(xen_evtchn_nr_channels);
1360
1361 int bind_virq_to_irq(unsigned int virq, unsigned int cpu, bool percpu)
1362 {
1363         struct evtchn_bind_virq bind_virq;
1364         evtchn_port_t evtchn = 0;
1365         int irq, ret;
1366
1367         mutex_lock(&irq_mapping_update_lock);
1368
1369         irq = per_cpu(virq_to_irq, cpu)[virq];
1370
1371         if (irq == -1) {
1372                 irq = xen_allocate_irq_dynamic();
1373                 if (irq < 0)
1374                         goto out;
1375
1376                 if (percpu)
1377                         irq_set_chip_and_handler_name(irq, &xen_percpu_chip,
1378                                                       handle_percpu_irq, "virq");
1379                 else
1380                         irq_set_chip_and_handler_name(irq, &xen_dynamic_chip,
1381                                                       handle_edge_irq, "virq");
1382
1383                 bind_virq.virq = virq;
1384                 bind_virq.vcpu = xen_vcpu_nr(cpu);
1385                 ret = HYPERVISOR_event_channel_op(EVTCHNOP_bind_virq,
1386                                                 &bind_virq);
1387                 if (ret == 0)
1388                         evtchn = bind_virq.port;
1389                 else {
1390                         if (ret == -EEXIST)
1391                                 ret = find_virq(virq, cpu, &evtchn);
1392                         BUG_ON(ret < 0);
1393                 }
1394
1395                 ret = xen_irq_info_virq_setup(cpu, irq, evtchn, virq);
1396                 if (ret < 0) {
1397                         __unbind_from_irq(irq);
1398                         irq = ret;
1399                         goto out;
1400                 }
1401
1402                 /*
1403                  * Force the affinity mask for percpu interrupts so proc
1404                  * shows the correct target.
1405                  */
1406                 bind_evtchn_to_cpu(evtchn, cpu, percpu);
1407         } else {
1408                 struct irq_info *info = info_for_irq(irq);
1409                 WARN_ON(info == NULL || info->type != IRQT_VIRQ);
1410         }
1411
1412 out:
1413         mutex_unlock(&irq_mapping_update_lock);
1414
1415         return irq;
1416 }
1417
1418 static void unbind_from_irq(unsigned int irq)
1419 {
1420         mutex_lock(&irq_mapping_update_lock);
1421         __unbind_from_irq(irq);
1422         mutex_unlock(&irq_mapping_update_lock);
1423 }
1424
1425 static int bind_evtchn_to_irqhandler_chip(evtchn_port_t evtchn,
1426                                           irq_handler_t handler,
1427                                           unsigned long irqflags,
1428                                           const char *devname, void *dev_id,
1429                                           struct irq_chip *chip)
1430 {
1431         int irq, retval;
1432
1433         irq = bind_evtchn_to_irq_chip(evtchn, chip, NULL);
1434         if (irq < 0)
1435                 return irq;
1436         retval = request_irq(irq, handler, irqflags, devname, dev_id);
1437         if (retval != 0) {
1438                 unbind_from_irq(irq);
1439                 return retval;
1440         }
1441
1442         return irq;
1443 }
1444
1445 int bind_evtchn_to_irqhandler(evtchn_port_t evtchn,
1446                               irq_handler_t handler,
1447                               unsigned long irqflags,
1448                               const char *devname, void *dev_id)
1449 {
1450         return bind_evtchn_to_irqhandler_chip(evtchn, handler, irqflags,
1451                                               devname, dev_id,
1452                                               &xen_dynamic_chip);
1453 }
1454 EXPORT_SYMBOL_GPL(bind_evtchn_to_irqhandler);
1455
1456 int bind_evtchn_to_irqhandler_lateeoi(evtchn_port_t evtchn,
1457                                       irq_handler_t handler,
1458                                       unsigned long irqflags,
1459                                       const char *devname, void *dev_id)
1460 {
1461         return bind_evtchn_to_irqhandler_chip(evtchn, handler, irqflags,
1462                                               devname, dev_id,
1463                                               &xen_lateeoi_chip);
1464 }
1465 EXPORT_SYMBOL_GPL(bind_evtchn_to_irqhandler_lateeoi);
1466
1467 static int bind_interdomain_evtchn_to_irqhandler_chip(
1468                 struct xenbus_device *dev, evtchn_port_t remote_port,
1469                 irq_handler_t handler, unsigned long irqflags,
1470                 const char *devname, void *dev_id, struct irq_chip *chip)
1471 {
1472         int irq, retval;
1473
1474         irq = bind_interdomain_evtchn_to_irq_chip(dev, remote_port, chip);
1475         if (irq < 0)
1476                 return irq;
1477
1478         retval = request_irq(irq, handler, irqflags, devname, dev_id);
1479         if (retval != 0) {
1480                 unbind_from_irq(irq);
1481                 return retval;
1482         }
1483
1484         return irq;
1485 }
1486
1487 int bind_interdomain_evtchn_to_irqhandler_lateeoi(struct xenbus_device *dev,
1488                                                   evtchn_port_t remote_port,
1489                                                   irq_handler_t handler,
1490                                                   unsigned long irqflags,
1491                                                   const char *devname,
1492                                                   void *dev_id)
1493 {
1494         return bind_interdomain_evtchn_to_irqhandler_chip(dev,
1495                                 remote_port, handler, irqflags, devname,
1496                                 dev_id, &xen_lateeoi_chip);
1497 }
1498 EXPORT_SYMBOL_GPL(bind_interdomain_evtchn_to_irqhandler_lateeoi);
1499
1500 int bind_virq_to_irqhandler(unsigned int virq, unsigned int cpu,
1501                             irq_handler_t handler,
1502                             unsigned long irqflags, const char *devname, void *dev_id)
1503 {
1504         int irq, retval;
1505
1506         irq = bind_virq_to_irq(virq, cpu, irqflags & IRQF_PERCPU);
1507         if (irq < 0)
1508                 return irq;
1509         retval = request_irq(irq, handler, irqflags, devname, dev_id);
1510         if (retval != 0) {
1511                 unbind_from_irq(irq);
1512                 return retval;
1513         }
1514
1515         return irq;
1516 }
1517 EXPORT_SYMBOL_GPL(bind_virq_to_irqhandler);
1518
1519 int bind_ipi_to_irqhandler(enum ipi_vector ipi,
1520                            unsigned int cpu,
1521                            irq_handler_t handler,
1522                            unsigned long irqflags,
1523                            const char *devname,
1524                            void *dev_id)
1525 {
1526         int irq, retval;
1527
1528         irq = bind_ipi_to_irq(ipi, cpu);
1529         if (irq < 0)
1530                 return irq;
1531
1532         irqflags |= IRQF_NO_SUSPEND | IRQF_FORCE_RESUME | IRQF_EARLY_RESUME;
1533         retval = request_irq(irq, handler, irqflags, devname, dev_id);
1534         if (retval != 0) {
1535                 unbind_from_irq(irq);
1536                 return retval;
1537         }
1538
1539         return irq;
1540 }
1541
1542 void unbind_from_irqhandler(unsigned int irq, void *dev_id)
1543 {
1544         struct irq_info *info = info_for_irq(irq);
1545
1546         if (WARN_ON(!info))
1547                 return;
1548         free_irq(irq, dev_id);
1549         unbind_from_irq(irq);
1550 }
1551 EXPORT_SYMBOL_GPL(unbind_from_irqhandler);
1552
1553 /**
1554  * xen_set_irq_priority() - set an event channel priority.
1555  * @irq:irq bound to an event channel.
1556  * @priority: priority between XEN_IRQ_PRIORITY_MAX and XEN_IRQ_PRIORITY_MIN.
1557  */
1558 int xen_set_irq_priority(unsigned irq, unsigned priority)
1559 {
1560         struct evtchn_set_priority set_priority;
1561
1562         set_priority.port = evtchn_from_irq(irq);
1563         set_priority.priority = priority;
1564
1565         return HYPERVISOR_event_channel_op(EVTCHNOP_set_priority,
1566                                            &set_priority);
1567 }
1568 EXPORT_SYMBOL_GPL(xen_set_irq_priority);
1569
1570 int evtchn_make_refcounted(evtchn_port_t evtchn, bool is_static)
1571 {
1572         int irq = get_evtchn_to_irq(evtchn);
1573         struct irq_info *info;
1574
1575         if (irq == -1)
1576                 return -ENOENT;
1577
1578         info = info_for_irq(irq);
1579
1580         if (!info)
1581                 return -ENOENT;
1582
1583         WARN_ON(info->refcnt != -1);
1584
1585         info->refcnt = 1;
1586         info->is_static = is_static;
1587
1588         return 0;
1589 }
1590 EXPORT_SYMBOL_GPL(evtchn_make_refcounted);
1591
1592 int evtchn_get(evtchn_port_t evtchn)
1593 {
1594         int irq;
1595         struct irq_info *info;
1596         int err = -ENOENT;
1597
1598         if (evtchn >= xen_evtchn_max_channels())
1599                 return -EINVAL;
1600
1601         mutex_lock(&irq_mapping_update_lock);
1602
1603         irq = get_evtchn_to_irq(evtchn);
1604         if (irq == -1)
1605                 goto done;
1606
1607         info = info_for_irq(irq);
1608
1609         if (!info)
1610                 goto done;
1611
1612         err = -EINVAL;
1613         if (info->refcnt <= 0 || info->refcnt == SHRT_MAX)
1614                 goto done;
1615
1616         info->refcnt++;
1617         err = 0;
1618  done:
1619         mutex_unlock(&irq_mapping_update_lock);
1620
1621         return err;
1622 }
1623 EXPORT_SYMBOL_GPL(evtchn_get);
1624
1625 void evtchn_put(evtchn_port_t evtchn)
1626 {
1627         int irq = get_evtchn_to_irq(evtchn);
1628         if (WARN_ON(irq == -1))
1629                 return;
1630         unbind_from_irq(irq);
1631 }
1632 EXPORT_SYMBOL_GPL(evtchn_put);
1633
1634 void xen_send_IPI_one(unsigned int cpu, enum ipi_vector vector)
1635 {
1636         int irq;
1637
1638 #ifdef CONFIG_X86
1639         if (unlikely(vector == XEN_NMI_VECTOR)) {
1640                 int rc =  HYPERVISOR_vcpu_op(VCPUOP_send_nmi, xen_vcpu_nr(cpu),
1641                                              NULL);
1642                 if (rc < 0)
1643                         printk(KERN_WARNING "Sending nmi to CPU%d failed (rc:%d)\n", cpu, rc);
1644                 return;
1645         }
1646 #endif
1647         irq = per_cpu(ipi_to_irq, cpu)[vector];
1648         BUG_ON(irq < 0);
1649         notify_remote_via_irq(irq);
1650 }
1651
1652 struct evtchn_loop_ctrl {
1653         ktime_t timeout;
1654         unsigned count;
1655         bool defer_eoi;
1656 };
1657
1658 void handle_irq_for_port(evtchn_port_t port, struct evtchn_loop_ctrl *ctrl)
1659 {
1660         int irq;
1661         struct irq_info *info;
1662         struct xenbus_device *dev;
1663
1664         irq = get_evtchn_to_irq(port);
1665         if (irq == -1)
1666                 return;
1667
1668         /*
1669          * Check for timeout every 256 events.
1670          * We are setting the timeout value only after the first 256
1671          * events in order to not hurt the common case of few loop
1672          * iterations. The 256 is basically an arbitrary value.
1673          *
1674          * In case we are hitting the timeout we need to defer all further
1675          * EOIs in order to ensure to leave the event handling loop rather
1676          * sooner than later.
1677          */
1678         if (!ctrl->defer_eoi && !(++ctrl->count & 0xff)) {
1679                 ktime_t kt = ktime_get();
1680
1681                 if (!ctrl->timeout) {
1682                         kt = ktime_add_ms(kt,
1683                                           jiffies_to_msecs(event_loop_timeout));
1684                         ctrl->timeout = kt;
1685                 } else if (kt > ctrl->timeout) {
1686                         ctrl->defer_eoi = true;
1687                 }
1688         }
1689
1690         info = info_for_irq(irq);
1691         if (xchg_acquire(&info->is_active, 1))
1692                 return;
1693
1694         dev = (info->type == IRQT_EVTCHN) ? info->u.interdomain : NULL;
1695         if (dev)
1696                 atomic_inc(&dev->events);
1697
1698         if (ctrl->defer_eoi) {
1699                 info->eoi_cpu = smp_processor_id();
1700                 info->irq_epoch = __this_cpu_read(irq_epoch);
1701                 info->eoi_time = get_jiffies_64() + event_eoi_delay;
1702         }
1703
1704         generic_handle_irq(irq);
1705 }
1706
1707 static int __xen_evtchn_do_upcall(void)
1708 {
1709         struct vcpu_info *vcpu_info = __this_cpu_read(xen_vcpu);
1710         int ret = vcpu_info->evtchn_upcall_pending ? IRQ_HANDLED : IRQ_NONE;
1711         int cpu = smp_processor_id();
1712         struct evtchn_loop_ctrl ctrl = { 0 };
1713
1714         read_lock(&evtchn_rwlock);
1715
1716         do {
1717                 vcpu_info->evtchn_upcall_pending = 0;
1718
1719                 xen_evtchn_handle_events(cpu, &ctrl);
1720
1721                 BUG_ON(!irqs_disabled());
1722
1723                 virt_rmb(); /* Hypervisor can set upcall pending. */
1724
1725         } while (vcpu_info->evtchn_upcall_pending);
1726
1727         read_unlock(&evtchn_rwlock);
1728
1729         /*
1730          * Increment irq_epoch only now to defer EOIs only for
1731          * xen_irq_lateeoi() invocations occurring from inside the loop
1732          * above.
1733          */
1734         __this_cpu_inc(irq_epoch);
1735
1736         return ret;
1737 }
1738
1739 void xen_evtchn_do_upcall(struct pt_regs *regs)
1740 {
1741         struct pt_regs *old_regs = set_irq_regs(regs);
1742
1743         irq_enter();
1744
1745         __xen_evtchn_do_upcall();
1746
1747         irq_exit();
1748         set_irq_regs(old_regs);
1749 }
1750
1751 int xen_hvm_evtchn_do_upcall(void)
1752 {
1753         return __xen_evtchn_do_upcall();
1754 }
1755 EXPORT_SYMBOL_GPL(xen_hvm_evtchn_do_upcall);
1756
1757 /* Rebind a new event channel to an existing irq. */
1758 void rebind_evtchn_irq(evtchn_port_t evtchn, int irq)
1759 {
1760         struct irq_info *info = info_for_irq(irq);
1761
1762         if (WARN_ON(!info))
1763                 return;
1764
1765         /* Make sure the irq is masked, since the new event channel
1766            will also be masked. */
1767         disable_irq(irq);
1768
1769         mutex_lock(&irq_mapping_update_lock);
1770
1771         /* After resume the irq<->evtchn mappings are all cleared out */
1772         BUG_ON(get_evtchn_to_irq(evtchn) != -1);
1773         /* Expect irq to have been bound before,
1774            so there should be a proper type */
1775         BUG_ON(info->type == IRQT_UNBOUND);
1776
1777         (void)xen_irq_info_evtchn_setup(irq, evtchn, NULL);
1778
1779         mutex_unlock(&irq_mapping_update_lock);
1780
1781         bind_evtchn_to_cpu(evtchn, info->cpu, false);
1782
1783         /* Unmask the event channel. */
1784         enable_irq(irq);
1785 }
1786
1787 /* Rebind an evtchn so that it gets delivered to a specific cpu */
1788 static int xen_rebind_evtchn_to_cpu(struct irq_info *info, unsigned int tcpu)
1789 {
1790         struct evtchn_bind_vcpu bind_vcpu;
1791         evtchn_port_t evtchn = info ? info->evtchn : 0;
1792
1793         if (!VALID_EVTCHN(evtchn))
1794                 return -1;
1795
1796         if (!xen_support_evtchn_rebind())
1797                 return -1;
1798
1799         /* Send future instances of this interrupt to other vcpu. */
1800         bind_vcpu.port = evtchn;
1801         bind_vcpu.vcpu = xen_vcpu_nr(tcpu);
1802
1803         /*
1804          * Mask the event while changing the VCPU binding to prevent
1805          * it being delivered on an unexpected VCPU.
1806          */
1807         do_mask(info, EVT_MASK_REASON_TEMPORARY);
1808
1809         /*
1810          * If this fails, it usually just indicates that we're dealing with a
1811          * virq or IPI channel, which don't actually need to be rebound. Ignore
1812          * it, but don't do the xenlinux-level rebind in that case.
1813          */
1814         if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_vcpu, &bind_vcpu) >= 0)
1815                 bind_evtchn_to_cpu(evtchn, tcpu, false);
1816
1817         do_unmask(info, EVT_MASK_REASON_TEMPORARY);
1818
1819         return 0;
1820 }
1821
1822 /*
1823  * Find the CPU within @dest mask which has the least number of channels
1824  * assigned. This is not precise as the per cpu counts can be modified
1825  * concurrently.
1826  */
1827 static unsigned int select_target_cpu(const struct cpumask *dest)
1828 {
1829         unsigned int cpu, best_cpu = UINT_MAX, minch = UINT_MAX;
1830
1831         for_each_cpu_and(cpu, dest, cpu_online_mask) {
1832                 unsigned int curch = atomic_read(&channels_on_cpu[cpu]);
1833
1834                 if (curch < minch) {
1835                         minch = curch;
1836                         best_cpu = cpu;
1837                 }
1838         }
1839
1840         /*
1841          * Catch the unlikely case that dest contains no online CPUs. Can't
1842          * recurse.
1843          */
1844         if (best_cpu == UINT_MAX)
1845                 return select_target_cpu(cpu_online_mask);
1846
1847         return best_cpu;
1848 }
1849
1850 static int set_affinity_irq(struct irq_data *data, const struct cpumask *dest,
1851                             bool force)
1852 {
1853         unsigned int tcpu = select_target_cpu(dest);
1854         int ret;
1855
1856         ret = xen_rebind_evtchn_to_cpu(info_for_irq(data->irq), tcpu);
1857         if (!ret)
1858                 irq_data_update_effective_affinity(data, cpumask_of(tcpu));
1859
1860         return ret;
1861 }
1862
1863 static void enable_dynirq(struct irq_data *data)
1864 {
1865         struct irq_info *info = info_for_irq(data->irq);
1866         evtchn_port_t evtchn = info ? info->evtchn : 0;
1867
1868         if (VALID_EVTCHN(evtchn))
1869                 do_unmask(info, EVT_MASK_REASON_EXPLICIT);
1870 }
1871
1872 static void disable_dynirq(struct irq_data *data)
1873 {
1874         struct irq_info *info = info_for_irq(data->irq);
1875         evtchn_port_t evtchn = info ? info->evtchn : 0;
1876
1877         if (VALID_EVTCHN(evtchn))
1878                 do_mask(info, EVT_MASK_REASON_EXPLICIT);
1879 }
1880
1881 static void ack_dynirq(struct irq_data *data)
1882 {
1883         struct irq_info *info = info_for_irq(data->irq);
1884         evtchn_port_t evtchn = info ? info->evtchn : 0;
1885
1886         if (VALID_EVTCHN(evtchn))
1887                 event_handler_exit(info);
1888 }
1889
1890 static void mask_ack_dynirq(struct irq_data *data)
1891 {
1892         disable_dynirq(data);
1893         ack_dynirq(data);
1894 }
1895
1896 static void lateeoi_ack_dynirq(struct irq_data *data)
1897 {
1898         struct irq_info *info = info_for_irq(data->irq);
1899         evtchn_port_t evtchn = info ? info->evtchn : 0;
1900
1901         if (VALID_EVTCHN(evtchn)) {
1902                 do_mask(info, EVT_MASK_REASON_EOI_PENDING);
1903                 /*
1904                  * Don't call event_handler_exit().
1905                  * Need to keep is_active non-zero in order to ignore re-raised
1906                  * events after cpu affinity changes while a lateeoi is pending.
1907                  */
1908                 clear_evtchn(evtchn);
1909         }
1910 }
1911
1912 static void lateeoi_mask_ack_dynirq(struct irq_data *data)
1913 {
1914         struct irq_info *info = info_for_irq(data->irq);
1915         evtchn_port_t evtchn = info ? info->evtchn : 0;
1916
1917         if (VALID_EVTCHN(evtchn)) {
1918                 do_mask(info, EVT_MASK_REASON_EXPLICIT);
1919                 event_handler_exit(info);
1920         }
1921 }
1922
1923 static int retrigger_dynirq(struct irq_data *data)
1924 {
1925         struct irq_info *info = info_for_irq(data->irq);
1926         evtchn_port_t evtchn = info ? info->evtchn : 0;
1927
1928         if (!VALID_EVTCHN(evtchn))
1929                 return 0;
1930
1931         do_mask(info, EVT_MASK_REASON_TEMPORARY);
1932         set_evtchn(evtchn);
1933         do_unmask(info, EVT_MASK_REASON_TEMPORARY);
1934
1935         return 1;
1936 }
1937
1938 static void restore_pirqs(void)
1939 {
1940         int pirq, rc, irq, gsi;
1941         struct physdev_map_pirq map_irq;
1942         struct irq_info *info;
1943
1944         list_for_each_entry(info, &xen_irq_list_head, list) {
1945                 if (info->type != IRQT_PIRQ)
1946                         continue;
1947
1948                 pirq = info->u.pirq.pirq;
1949                 gsi = info->u.pirq.gsi;
1950                 irq = info->irq;
1951
1952                 /* save/restore of PT devices doesn't work, so at this point the
1953                  * only devices present are GSI based emulated devices */
1954                 if (!gsi)
1955                         continue;
1956
1957                 map_irq.domid = DOMID_SELF;
1958                 map_irq.type = MAP_PIRQ_TYPE_GSI;
1959                 map_irq.index = gsi;
1960                 map_irq.pirq = pirq;
1961
1962                 rc = HYPERVISOR_physdev_op(PHYSDEVOP_map_pirq, &map_irq);
1963                 if (rc) {
1964                         pr_warn("xen map irq failed gsi=%d irq=%d pirq=%d rc=%d\n",
1965                                 gsi, irq, pirq, rc);
1966                         xen_free_irq(irq);
1967                         continue;
1968                 }
1969
1970                 printk(KERN_DEBUG "xen: --> irq=%d, pirq=%d\n", irq, map_irq.pirq);
1971
1972                 __startup_pirq(irq);
1973         }
1974 }
1975
1976 static void restore_cpu_virqs(unsigned int cpu)
1977 {
1978         struct evtchn_bind_virq bind_virq;
1979         evtchn_port_t evtchn;
1980         int virq, irq;
1981
1982         for (virq = 0; virq < NR_VIRQS; virq++) {
1983                 if ((irq = per_cpu(virq_to_irq, cpu)[virq]) == -1)
1984                         continue;
1985
1986                 BUG_ON(virq_from_irq(irq) != virq);
1987
1988                 /* Get a new binding from Xen. */
1989                 bind_virq.virq = virq;
1990                 bind_virq.vcpu = xen_vcpu_nr(cpu);
1991                 if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_virq,
1992                                                 &bind_virq) != 0)
1993                         BUG();
1994                 evtchn = bind_virq.port;
1995
1996                 /* Record the new mapping. */
1997                 (void)xen_irq_info_virq_setup(cpu, irq, evtchn, virq);
1998                 /* The affinity mask is still valid */
1999                 bind_evtchn_to_cpu(evtchn, cpu, false);
2000         }
2001 }
2002
2003 static void restore_cpu_ipis(unsigned int cpu)
2004 {
2005         struct evtchn_bind_ipi bind_ipi;
2006         evtchn_port_t evtchn;
2007         int ipi, irq;
2008
2009         for (ipi = 0; ipi < XEN_NR_IPIS; ipi++) {
2010                 if ((irq = per_cpu(ipi_to_irq, cpu)[ipi]) == -1)
2011                         continue;
2012
2013                 BUG_ON(ipi_from_irq(irq) != ipi);
2014
2015                 /* Get a new binding from Xen. */
2016                 bind_ipi.vcpu = xen_vcpu_nr(cpu);
2017                 if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_ipi,
2018                                                 &bind_ipi) != 0)
2019                         BUG();
2020                 evtchn = bind_ipi.port;
2021
2022                 /* Record the new mapping. */
2023                 (void)xen_irq_info_ipi_setup(cpu, irq, evtchn, ipi);
2024                 /* The affinity mask is still valid */
2025                 bind_evtchn_to_cpu(evtchn, cpu, false);
2026         }
2027 }
2028
2029 /* Clear an irq's pending state, in preparation for polling on it */
2030 void xen_clear_irq_pending(int irq)
2031 {
2032         struct irq_info *info = info_for_irq(irq);
2033         evtchn_port_t evtchn = info ? info->evtchn : 0;
2034
2035         if (VALID_EVTCHN(evtchn))
2036                 event_handler_exit(info);
2037 }
2038 EXPORT_SYMBOL(xen_clear_irq_pending);
2039 void xen_set_irq_pending(int irq)
2040 {
2041         evtchn_port_t evtchn = evtchn_from_irq(irq);
2042
2043         if (VALID_EVTCHN(evtchn))
2044                 set_evtchn(evtchn);
2045 }
2046
2047 bool xen_test_irq_pending(int irq)
2048 {
2049         evtchn_port_t evtchn = evtchn_from_irq(irq);
2050         bool ret = false;
2051
2052         if (VALID_EVTCHN(evtchn))
2053                 ret = test_evtchn(evtchn);
2054
2055         return ret;
2056 }
2057
2058 /* Poll waiting for an irq to become pending with timeout.  In the usual case,
2059  * the irq will be disabled so it won't deliver an interrupt. */
2060 void xen_poll_irq_timeout(int irq, u64 timeout)
2061 {
2062         evtchn_port_t evtchn = evtchn_from_irq(irq);
2063
2064         if (VALID_EVTCHN(evtchn)) {
2065                 struct sched_poll poll;
2066
2067                 poll.nr_ports = 1;
2068                 poll.timeout = timeout;
2069                 set_xen_guest_handle(poll.ports, &evtchn);
2070
2071                 if (HYPERVISOR_sched_op(SCHEDOP_poll, &poll) != 0)
2072                         BUG();
2073         }
2074 }
2075 EXPORT_SYMBOL(xen_poll_irq_timeout);
2076 /* Poll waiting for an irq to become pending.  In the usual case, the
2077  * irq will be disabled so it won't deliver an interrupt. */
2078 void xen_poll_irq(int irq)
2079 {
2080         xen_poll_irq_timeout(irq, 0 /* no timeout */);
2081 }
2082
2083 /* Check whether the IRQ line is shared with other guests. */
2084 int xen_test_irq_shared(int irq)
2085 {
2086         struct irq_info *info = info_for_irq(irq);
2087         struct physdev_irq_status_query irq_status;
2088
2089         if (WARN_ON(!info))
2090                 return -ENOENT;
2091
2092         irq_status.irq = info->u.pirq.pirq;
2093
2094         if (HYPERVISOR_physdev_op(PHYSDEVOP_irq_status_query, &irq_status))
2095                 return 0;
2096         return !(irq_status.flags & XENIRQSTAT_shared);
2097 }
2098 EXPORT_SYMBOL_GPL(xen_test_irq_shared);
2099
2100 void xen_irq_resume(void)
2101 {
2102         unsigned int cpu;
2103         struct irq_info *info;
2104
2105         /* New event-channel space is not 'live' yet. */
2106         xen_evtchn_resume();
2107
2108         /* No IRQ <-> event-channel mappings. */
2109         list_for_each_entry(info, &xen_irq_list_head, list) {
2110                 /* Zap event-channel binding */
2111                 info->evtchn = 0;
2112                 /* Adjust accounting */
2113                 channels_on_cpu_dec(info);
2114         }
2115
2116         clear_evtchn_to_irq_all();
2117
2118         for_each_possible_cpu(cpu) {
2119                 restore_cpu_virqs(cpu);
2120                 restore_cpu_ipis(cpu);
2121         }
2122
2123         restore_pirqs();
2124 }
2125
2126 static struct irq_chip xen_dynamic_chip __read_mostly = {
2127         .name                   = "xen-dyn",
2128
2129         .irq_disable            = disable_dynirq,
2130         .irq_mask               = disable_dynirq,
2131         .irq_unmask             = enable_dynirq,
2132
2133         .irq_ack                = ack_dynirq,
2134         .irq_mask_ack           = mask_ack_dynirq,
2135
2136         .irq_set_affinity       = set_affinity_irq,
2137         .irq_retrigger          = retrigger_dynirq,
2138 };
2139
2140 static struct irq_chip xen_lateeoi_chip __read_mostly = {
2141         /* The chip name needs to contain "xen-dyn" for irqbalance to work. */
2142         .name                   = "xen-dyn-lateeoi",
2143
2144         .irq_disable            = disable_dynirq,
2145         .irq_mask               = disable_dynirq,
2146         .irq_unmask             = enable_dynirq,
2147
2148         .irq_ack                = lateeoi_ack_dynirq,
2149         .irq_mask_ack           = lateeoi_mask_ack_dynirq,
2150
2151         .irq_set_affinity       = set_affinity_irq,
2152         .irq_retrigger          = retrigger_dynirq,
2153 };
2154
2155 static struct irq_chip xen_pirq_chip __read_mostly = {
2156         .name                   = "xen-pirq",
2157
2158         .irq_startup            = startup_pirq,
2159         .irq_shutdown           = shutdown_pirq,
2160         .irq_enable             = enable_pirq,
2161         .irq_disable            = disable_pirq,
2162
2163         .irq_mask               = disable_dynirq,
2164         .irq_unmask             = enable_dynirq,
2165
2166         .irq_ack                = eoi_pirq,
2167         .irq_eoi                = eoi_pirq,
2168         .irq_mask_ack           = mask_ack_pirq,
2169
2170         .irq_set_affinity       = set_affinity_irq,
2171
2172         .irq_retrigger          = retrigger_dynirq,
2173 };
2174
2175 static struct irq_chip xen_percpu_chip __read_mostly = {
2176         .name                   = "xen-percpu",
2177
2178         .irq_disable            = disable_dynirq,
2179         .irq_mask               = disable_dynirq,
2180         .irq_unmask             = enable_dynirq,
2181
2182         .irq_ack                = ack_dynirq,
2183 };
2184
2185 #ifdef CONFIG_X86
2186 #ifdef CONFIG_XEN_PVHVM
2187 /* Vector callbacks are better than PCI interrupts to receive event
2188  * channel notifications because we can receive vector callbacks on any
2189  * vcpu and we don't need PCI support or APIC interactions. */
2190 void xen_setup_callback_vector(void)
2191 {
2192         uint64_t callback_via;
2193
2194         if (xen_have_vector_callback) {
2195                 callback_via = HVM_CALLBACK_VECTOR(HYPERVISOR_CALLBACK_VECTOR);
2196                 if (xen_set_callback_via(callback_via)) {
2197                         pr_err("Request for Xen HVM callback vector failed\n");
2198                         xen_have_vector_callback = false;
2199                 }
2200         }
2201 }
2202
2203 /*
2204  * Setup per-vCPU vector-type callbacks. If this setup is unavailable,
2205  * fallback to the global vector-type callback.
2206  */
2207 static __init void xen_init_setup_upcall_vector(void)
2208 {
2209         if (!xen_have_vector_callback)
2210                 return;
2211
2212         if ((cpuid_eax(xen_cpuid_base() + 4) & XEN_HVM_CPUID_UPCALL_VECTOR) &&
2213             !xen_set_upcall_vector(0))
2214                 xen_percpu_upcall = true;
2215         else if (xen_feature(XENFEAT_hvm_callback_vector))
2216                 xen_setup_callback_vector();
2217         else
2218                 xen_have_vector_callback = false;
2219 }
2220
2221 int xen_set_upcall_vector(unsigned int cpu)
2222 {
2223         int rc;
2224         xen_hvm_evtchn_upcall_vector_t op = {
2225                 .vector = HYPERVISOR_CALLBACK_VECTOR,
2226                 .vcpu = per_cpu(xen_vcpu_id, cpu),
2227         };
2228
2229         rc = HYPERVISOR_hvm_op(HVMOP_set_evtchn_upcall_vector, &op);
2230         if (rc)
2231                 return rc;
2232
2233         /* Trick toolstack to think we are enlightened. */
2234         if (!cpu)
2235                 rc = xen_set_callback_via(1);
2236
2237         return rc;
2238 }
2239
2240 static __init void xen_alloc_callback_vector(void)
2241 {
2242         if (!xen_have_vector_callback)
2243                 return;
2244
2245         pr_info("Xen HVM callback vector for event delivery is enabled\n");
2246         alloc_intr_gate(HYPERVISOR_CALLBACK_VECTOR, asm_sysvec_xen_hvm_callback);
2247 }
2248 #else
2249 void xen_setup_callback_vector(void) {}
2250 static inline void xen_init_setup_upcall_vector(void) {}
2251 int xen_set_upcall_vector(unsigned int cpu) {}
2252 static inline void xen_alloc_callback_vector(void) {}
2253 #endif /* CONFIG_XEN_PVHVM */
2254 #endif /* CONFIG_X86 */
2255
2256 bool xen_fifo_events = true;
2257 module_param_named(fifo_events, xen_fifo_events, bool, 0);
2258
2259 static int xen_evtchn_cpu_prepare(unsigned int cpu)
2260 {
2261         int ret = 0;
2262
2263         xen_cpu_init_eoi(cpu);
2264
2265         if (evtchn_ops->percpu_init)
2266                 ret = evtchn_ops->percpu_init(cpu);
2267
2268         return ret;
2269 }
2270
2271 static int xen_evtchn_cpu_dead(unsigned int cpu)
2272 {
2273         int ret = 0;
2274
2275         if (evtchn_ops->percpu_deinit)
2276                 ret = evtchn_ops->percpu_deinit(cpu);
2277
2278         return ret;
2279 }
2280
2281 void __init xen_init_IRQ(void)
2282 {
2283         int ret = -EINVAL;
2284         evtchn_port_t evtchn;
2285
2286         if (xen_fifo_events)
2287                 ret = xen_evtchn_fifo_init();
2288         if (ret < 0) {
2289                 xen_evtchn_2l_init();
2290                 xen_fifo_events = false;
2291         }
2292
2293         xen_cpu_init_eoi(smp_processor_id());
2294
2295         cpuhp_setup_state_nocalls(CPUHP_XEN_EVTCHN_PREPARE,
2296                                   "xen/evtchn:prepare",
2297                                   xen_evtchn_cpu_prepare, xen_evtchn_cpu_dead);
2298
2299         evtchn_to_irq = kcalloc(EVTCHN_ROW(xen_evtchn_max_channels()),
2300                                 sizeof(*evtchn_to_irq), GFP_KERNEL);
2301         BUG_ON(!evtchn_to_irq);
2302
2303         /* No event channels are 'live' right now. */
2304         for (evtchn = 0; evtchn < xen_evtchn_nr_channels(); evtchn++)
2305                 mask_evtchn(evtchn);
2306
2307         pirq_needs_eoi = pirq_needs_eoi_flag;
2308
2309 #ifdef CONFIG_X86
2310         if (xen_pv_domain()) {
2311                 if (xen_initial_domain())
2312                         pci_xen_initial_domain();
2313         }
2314         xen_init_setup_upcall_vector();
2315         xen_alloc_callback_vector();
2316
2317
2318         if (xen_hvm_domain()) {
2319                 native_init_IRQ();
2320                 /* pci_xen_hvm_init must be called after native_init_IRQ so that
2321                  * __acpi_register_gsi can point at the right function */
2322                 pci_xen_hvm_init();
2323         } else {
2324                 int rc;
2325                 struct physdev_pirq_eoi_gmfn eoi_gmfn;
2326
2327                 pirq_eoi_map = (void *)__get_free_page(GFP_KERNEL|__GFP_ZERO);
2328                 eoi_gmfn.gmfn = virt_to_gfn(pirq_eoi_map);
2329                 rc = HYPERVISOR_physdev_op(PHYSDEVOP_pirq_eoi_gmfn_v2, &eoi_gmfn);
2330                 if (rc != 0) {
2331                         free_page((unsigned long) pirq_eoi_map);
2332                         pirq_eoi_map = NULL;
2333                 } else
2334                         pirq_needs_eoi = pirq_check_eoi_map;
2335         }
2336 #endif
2337 }