x86/xen: Add support for HVMOP_set_evtchn_upcall_vector
[platform/kernel/linux-starfive.git] / drivers / xen / events / events_base.c
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Xen event channels
4  *
5  * Xen models interrupts with abstract event channels.  Because each
6  * domain gets 1024 event channels, but NR_IRQ is not that large, we
7  * must dynamically map irqs<->event channels.  The event channels
8  * interface with the rest of the kernel by defining a xen interrupt
9  * chip.  When an event is received, it is mapped to an irq and sent
10  * through the normal interrupt processing path.
11  *
12  * There are four kinds of events which can be mapped to an event
13  * channel:
14  *
15  * 1. Inter-domain notifications.  This includes all the virtual
16  *    device events, since they're driven by front-ends in another domain
17  *    (typically dom0).
18  * 2. VIRQs, typically used for timers.  These are per-cpu events.
19  * 3. IPIs.
20  * 4. PIRQs - Hardware interrupts.
21  *
22  * Jeremy Fitzhardinge <jeremy@xensource.com>, XenSource Inc, 2007
23  */
24
25 #define pr_fmt(fmt) "xen:" KBUILD_MODNAME ": " fmt
26
27 #include <linux/linkage.h>
28 #include <linux/interrupt.h>
29 #include <linux/irq.h>
30 #include <linux/moduleparam.h>
31 #include <linux/string.h>
32 #include <linux/memblock.h>
33 #include <linux/slab.h>
34 #include <linux/irqnr.h>
35 #include <linux/pci.h>
36 #include <linux/spinlock.h>
37 #include <linux/cpuhotplug.h>
38 #include <linux/atomic.h>
39 #include <linux/ktime.h>
40
41 #ifdef CONFIG_X86
42 #include <asm/desc.h>
43 #include <asm/ptrace.h>
44 #include <asm/idtentry.h>
45 #include <asm/irq.h>
46 #include <asm/io_apic.h>
47 #include <asm/i8259.h>
48 #include <asm/xen/cpuid.h>
49 #include <asm/xen/pci.h>
50 #endif
51 #include <asm/sync_bitops.h>
52 #include <asm/xen/hypercall.h>
53 #include <asm/xen/hypervisor.h>
54 #include <xen/page.h>
55
56 #include <xen/xen.h>
57 #include <xen/hvm.h>
58 #include <xen/xen-ops.h>
59 #include <xen/events.h>
60 #include <xen/interface/xen.h>
61 #include <xen/interface/event_channel.h>
62 #include <xen/interface/hvm/hvm_op.h>
63 #include <xen/interface/hvm/params.h>
64 #include <xen/interface/physdev.h>
65 #include <xen/interface/sched.h>
66 #include <xen/interface/vcpu.h>
67 #include <xen/xenbus.h>
68 #include <asm/hw_irq.h>
69
70 #include "events_internal.h"
71
72 #undef MODULE_PARAM_PREFIX
73 #define MODULE_PARAM_PREFIX "xen."
74
75 /* Interrupt types. */
76 enum xen_irq_type {
77         IRQT_UNBOUND = 0,
78         IRQT_PIRQ,
79         IRQT_VIRQ,
80         IRQT_IPI,
81         IRQT_EVTCHN
82 };
83
84 /*
85  * Packed IRQ information:
86  * type - enum xen_irq_type
87  * event channel - irq->event channel mapping
88  * cpu - cpu this event channel is bound to
89  * index - type-specific information:
90  *    PIRQ - vector, with MSB being "needs EIO", or physical IRQ of the HVM
91  *           guest, or GSI (real passthrough IRQ) of the device.
92  *    VIRQ - virq number
93  *    IPI - IPI vector
94  *    EVTCHN -
95  */
96 struct irq_info {
97         struct list_head list;
98         struct list_head eoi_list;
99         short refcnt;
100         u8 spurious_cnt;
101         u8 is_accounted;
102         short type;             /* type: IRQT_* */
103         u8 mask_reason;         /* Why is event channel masked */
104 #define EVT_MASK_REASON_EXPLICIT        0x01
105 #define EVT_MASK_REASON_TEMPORARY       0x02
106 #define EVT_MASK_REASON_EOI_PENDING     0x04
107         u8 is_active;           /* Is event just being handled? */
108         unsigned irq;
109         evtchn_port_t evtchn;   /* event channel */
110         unsigned short cpu;     /* cpu bound */
111         unsigned short eoi_cpu; /* EOI must happen on this cpu-1 */
112         unsigned int irq_epoch; /* If eoi_cpu valid: irq_epoch of event */
113         u64 eoi_time;           /* Time in jiffies when to EOI. */
114         raw_spinlock_t lock;
115
116         union {
117                 unsigned short virq;
118                 enum ipi_vector ipi;
119                 struct {
120                         unsigned short pirq;
121                         unsigned short gsi;
122                         unsigned char vector;
123                         unsigned char flags;
124                         uint16_t domid;
125                 } pirq;
126                 struct xenbus_device *interdomain;
127         } u;
128 };
129
130 #define PIRQ_NEEDS_EOI  (1 << 0)
131 #define PIRQ_SHAREABLE  (1 << 1)
132 #define PIRQ_MSI_GROUP  (1 << 2)
133
134 static uint __read_mostly event_loop_timeout = 2;
135 module_param(event_loop_timeout, uint, 0644);
136
137 static uint __read_mostly event_eoi_delay = 10;
138 module_param(event_eoi_delay, uint, 0644);
139
140 const struct evtchn_ops *evtchn_ops;
141
142 /*
143  * This lock protects updates to the following mapping and reference-count
144  * arrays. The lock does not need to be acquired to read the mapping tables.
145  */
146 static DEFINE_MUTEX(irq_mapping_update_lock);
147
148 /*
149  * Lock protecting event handling loop against removing event channels.
150  * Adding of event channels is no issue as the associated IRQ becomes active
151  * only after everything is setup (before request_[threaded_]irq() the handler
152  * can't be entered for an event, as the event channel will be unmasked only
153  * then).
154  */
155 static DEFINE_RWLOCK(evtchn_rwlock);
156
157 /*
158  * Lock hierarchy:
159  *
160  * irq_mapping_update_lock
161  *   evtchn_rwlock
162  *     IRQ-desc lock
163  *       percpu eoi_list_lock
164  *         irq_info->lock
165  */
166
167 static LIST_HEAD(xen_irq_list_head);
168
169 /* IRQ <-> VIRQ mapping. */
170 static DEFINE_PER_CPU(int [NR_VIRQS], virq_to_irq) = {[0 ... NR_VIRQS-1] = -1};
171
172 /* IRQ <-> IPI mapping */
173 static DEFINE_PER_CPU(int [XEN_NR_IPIS], ipi_to_irq) = {[0 ... XEN_NR_IPIS-1] = -1};
174
175 /* Event channel distribution data */
176 static atomic_t channels_on_cpu[NR_CPUS];
177
178 static int **evtchn_to_irq;
179 #ifdef CONFIG_X86
180 static unsigned long *pirq_eoi_map;
181 #endif
182 static bool (*pirq_needs_eoi)(unsigned irq);
183
184 #define EVTCHN_ROW(e)  (e / (PAGE_SIZE/sizeof(**evtchn_to_irq)))
185 #define EVTCHN_COL(e)  (e % (PAGE_SIZE/sizeof(**evtchn_to_irq)))
186 #define EVTCHN_PER_ROW (PAGE_SIZE / sizeof(**evtchn_to_irq))
187
188 /* Xen will never allocate port zero for any purpose. */
189 #define VALID_EVTCHN(chn)       ((chn) != 0)
190
191 static struct irq_info *legacy_info_ptrs[NR_IRQS_LEGACY];
192
193 static struct irq_chip xen_dynamic_chip;
194 static struct irq_chip xen_lateeoi_chip;
195 static struct irq_chip xen_percpu_chip;
196 static struct irq_chip xen_pirq_chip;
197 static void enable_dynirq(struct irq_data *data);
198 static void disable_dynirq(struct irq_data *data);
199
200 static DEFINE_PER_CPU(unsigned int, irq_epoch);
201
202 static void clear_evtchn_to_irq_row(int *evtchn_row)
203 {
204         unsigned col;
205
206         for (col = 0; col < EVTCHN_PER_ROW; col++)
207                 WRITE_ONCE(evtchn_row[col], -1);
208 }
209
210 static void clear_evtchn_to_irq_all(void)
211 {
212         unsigned row;
213
214         for (row = 0; row < EVTCHN_ROW(xen_evtchn_max_channels()); row++) {
215                 if (evtchn_to_irq[row] == NULL)
216                         continue;
217                 clear_evtchn_to_irq_row(evtchn_to_irq[row]);
218         }
219 }
220
221 static int set_evtchn_to_irq(evtchn_port_t evtchn, unsigned int irq)
222 {
223         unsigned row;
224         unsigned col;
225         int *evtchn_row;
226
227         if (evtchn >= xen_evtchn_max_channels())
228                 return -EINVAL;
229
230         row = EVTCHN_ROW(evtchn);
231         col = EVTCHN_COL(evtchn);
232
233         if (evtchn_to_irq[row] == NULL) {
234                 /* Unallocated irq entries return -1 anyway */
235                 if (irq == -1)
236                         return 0;
237
238                 evtchn_row = (int *) __get_free_pages(GFP_KERNEL, 0);
239                 if (evtchn_row == NULL)
240                         return -ENOMEM;
241
242                 clear_evtchn_to_irq_row(evtchn_row);
243
244                 /*
245                  * We've prepared an empty row for the mapping. If a different
246                  * thread was faster inserting it, we can drop ours.
247                  */
248                 if (cmpxchg(&evtchn_to_irq[row], NULL, evtchn_row) != NULL)
249                         free_page((unsigned long) evtchn_row);
250         }
251
252         WRITE_ONCE(evtchn_to_irq[row][col], irq);
253         return 0;
254 }
255
256 int get_evtchn_to_irq(evtchn_port_t evtchn)
257 {
258         if (evtchn >= xen_evtchn_max_channels())
259                 return -1;
260         if (evtchn_to_irq[EVTCHN_ROW(evtchn)] == NULL)
261                 return -1;
262         return READ_ONCE(evtchn_to_irq[EVTCHN_ROW(evtchn)][EVTCHN_COL(evtchn)]);
263 }
264
265 /* Get info for IRQ */
266 static struct irq_info *info_for_irq(unsigned irq)
267 {
268         if (irq < nr_legacy_irqs())
269                 return legacy_info_ptrs[irq];
270         else
271                 return irq_get_chip_data(irq);
272 }
273
274 static void set_info_for_irq(unsigned int irq, struct irq_info *info)
275 {
276         if (irq < nr_legacy_irqs())
277                 legacy_info_ptrs[irq] = info;
278         else
279                 irq_set_chip_data(irq, info);
280 }
281
282 /* Per CPU channel accounting */
283 static void channels_on_cpu_dec(struct irq_info *info)
284 {
285         if (!info->is_accounted)
286                 return;
287
288         info->is_accounted = 0;
289
290         if (WARN_ON_ONCE(info->cpu >= nr_cpu_ids))
291                 return;
292
293         WARN_ON_ONCE(!atomic_add_unless(&channels_on_cpu[info->cpu], -1 , 0));
294 }
295
296 static void channels_on_cpu_inc(struct irq_info *info)
297 {
298         if (WARN_ON_ONCE(info->cpu >= nr_cpu_ids))
299                 return;
300
301         if (WARN_ON_ONCE(!atomic_add_unless(&channels_on_cpu[info->cpu], 1,
302                                             INT_MAX)))
303                 return;
304
305         info->is_accounted = 1;
306 }
307
308 /* Constructors for packed IRQ information. */
309 static int xen_irq_info_common_setup(struct irq_info *info,
310                                      unsigned irq,
311                                      enum xen_irq_type type,
312                                      evtchn_port_t evtchn,
313                                      unsigned short cpu)
314 {
315         int ret;
316
317         BUG_ON(info->type != IRQT_UNBOUND && info->type != type);
318
319         info->type = type;
320         info->irq = irq;
321         info->evtchn = evtchn;
322         info->cpu = cpu;
323         info->mask_reason = EVT_MASK_REASON_EXPLICIT;
324         raw_spin_lock_init(&info->lock);
325
326         ret = set_evtchn_to_irq(evtchn, irq);
327         if (ret < 0)
328                 return ret;
329
330         irq_clear_status_flags(irq, IRQ_NOREQUEST|IRQ_NOAUTOEN);
331
332         return xen_evtchn_port_setup(evtchn);
333 }
334
335 static int xen_irq_info_evtchn_setup(unsigned irq,
336                                      evtchn_port_t evtchn,
337                                      struct xenbus_device *dev)
338 {
339         struct irq_info *info = info_for_irq(irq);
340         int ret;
341
342         ret = xen_irq_info_common_setup(info, irq, IRQT_EVTCHN, evtchn, 0);
343         info->u.interdomain = dev;
344         if (dev)
345                 atomic_inc(&dev->event_channels);
346
347         return ret;
348 }
349
350 static int xen_irq_info_ipi_setup(unsigned cpu,
351                                   unsigned irq,
352                                   evtchn_port_t evtchn,
353                                   enum ipi_vector ipi)
354 {
355         struct irq_info *info = info_for_irq(irq);
356
357         info->u.ipi = ipi;
358
359         per_cpu(ipi_to_irq, cpu)[ipi] = irq;
360
361         return xen_irq_info_common_setup(info, irq, IRQT_IPI, evtchn, 0);
362 }
363
364 static int xen_irq_info_virq_setup(unsigned cpu,
365                                    unsigned irq,
366                                    evtchn_port_t evtchn,
367                                    unsigned virq)
368 {
369         struct irq_info *info = info_for_irq(irq);
370
371         info->u.virq = virq;
372
373         per_cpu(virq_to_irq, cpu)[virq] = irq;
374
375         return xen_irq_info_common_setup(info, irq, IRQT_VIRQ, evtchn, 0);
376 }
377
378 static int xen_irq_info_pirq_setup(unsigned irq,
379                                    evtchn_port_t evtchn,
380                                    unsigned pirq,
381                                    unsigned gsi,
382                                    uint16_t domid,
383                                    unsigned char flags)
384 {
385         struct irq_info *info = info_for_irq(irq);
386
387         info->u.pirq.pirq = pirq;
388         info->u.pirq.gsi = gsi;
389         info->u.pirq.domid = domid;
390         info->u.pirq.flags = flags;
391
392         return xen_irq_info_common_setup(info, irq, IRQT_PIRQ, evtchn, 0);
393 }
394
395 static void xen_irq_info_cleanup(struct irq_info *info)
396 {
397         set_evtchn_to_irq(info->evtchn, -1);
398         xen_evtchn_port_remove(info->evtchn, info->cpu);
399         info->evtchn = 0;
400         channels_on_cpu_dec(info);
401 }
402
403 /*
404  * Accessors for packed IRQ information.
405  */
406 evtchn_port_t evtchn_from_irq(unsigned irq)
407 {
408         const struct irq_info *info = NULL;
409
410         if (likely(irq < nr_irqs))
411                 info = info_for_irq(irq);
412         if (!info)
413                 return 0;
414
415         return info->evtchn;
416 }
417
418 unsigned int irq_from_evtchn(evtchn_port_t evtchn)
419 {
420         return get_evtchn_to_irq(evtchn);
421 }
422 EXPORT_SYMBOL_GPL(irq_from_evtchn);
423
424 int irq_from_virq(unsigned int cpu, unsigned int virq)
425 {
426         return per_cpu(virq_to_irq, cpu)[virq];
427 }
428
429 static enum ipi_vector ipi_from_irq(unsigned irq)
430 {
431         struct irq_info *info = info_for_irq(irq);
432
433         BUG_ON(info == NULL);
434         BUG_ON(info->type != IRQT_IPI);
435
436         return info->u.ipi;
437 }
438
439 static unsigned virq_from_irq(unsigned irq)
440 {
441         struct irq_info *info = info_for_irq(irq);
442
443         BUG_ON(info == NULL);
444         BUG_ON(info->type != IRQT_VIRQ);
445
446         return info->u.virq;
447 }
448
449 static unsigned pirq_from_irq(unsigned irq)
450 {
451         struct irq_info *info = info_for_irq(irq);
452
453         BUG_ON(info == NULL);
454         BUG_ON(info->type != IRQT_PIRQ);
455
456         return info->u.pirq.pirq;
457 }
458
459 static enum xen_irq_type type_from_irq(unsigned irq)
460 {
461         return info_for_irq(irq)->type;
462 }
463
464 static unsigned cpu_from_irq(unsigned irq)
465 {
466         return info_for_irq(irq)->cpu;
467 }
468
469 unsigned int cpu_from_evtchn(evtchn_port_t evtchn)
470 {
471         int irq = get_evtchn_to_irq(evtchn);
472         unsigned ret = 0;
473
474         if (irq != -1)
475                 ret = cpu_from_irq(irq);
476
477         return ret;
478 }
479
480 static void do_mask(struct irq_info *info, u8 reason)
481 {
482         unsigned long flags;
483
484         raw_spin_lock_irqsave(&info->lock, flags);
485
486         if (!info->mask_reason)
487                 mask_evtchn(info->evtchn);
488
489         info->mask_reason |= reason;
490
491         raw_spin_unlock_irqrestore(&info->lock, flags);
492 }
493
494 static void do_unmask(struct irq_info *info, u8 reason)
495 {
496         unsigned long flags;
497
498         raw_spin_lock_irqsave(&info->lock, flags);
499
500         info->mask_reason &= ~reason;
501
502         if (!info->mask_reason)
503                 unmask_evtchn(info->evtchn);
504
505         raw_spin_unlock_irqrestore(&info->lock, flags);
506 }
507
508 #ifdef CONFIG_X86
509 static bool pirq_check_eoi_map(unsigned irq)
510 {
511         return test_bit(pirq_from_irq(irq), pirq_eoi_map);
512 }
513 #endif
514
515 static bool pirq_needs_eoi_flag(unsigned irq)
516 {
517         struct irq_info *info = info_for_irq(irq);
518         BUG_ON(info->type != IRQT_PIRQ);
519
520         return info->u.pirq.flags & PIRQ_NEEDS_EOI;
521 }
522
523 static void bind_evtchn_to_cpu(evtchn_port_t evtchn, unsigned int cpu,
524                                bool force_affinity)
525 {
526         int irq = get_evtchn_to_irq(evtchn);
527         struct irq_info *info = info_for_irq(irq);
528
529         BUG_ON(irq == -1);
530
531         if (IS_ENABLED(CONFIG_SMP) && force_affinity) {
532                 cpumask_copy(irq_get_affinity_mask(irq), cpumask_of(cpu));
533                 cpumask_copy(irq_get_effective_affinity_mask(irq),
534                              cpumask_of(cpu));
535         }
536
537         xen_evtchn_port_bind_to_cpu(evtchn, cpu, info->cpu);
538
539         channels_on_cpu_dec(info);
540         info->cpu = cpu;
541         channels_on_cpu_inc(info);
542 }
543
544 /**
545  * notify_remote_via_irq - send event to remote end of event channel via irq
546  * @irq: irq of event channel to send event to
547  *
548  * Unlike notify_remote_via_evtchn(), this is safe to use across
549  * save/restore. Notifications on a broken connection are silently
550  * dropped.
551  */
552 void notify_remote_via_irq(int irq)
553 {
554         evtchn_port_t evtchn = evtchn_from_irq(irq);
555
556         if (VALID_EVTCHN(evtchn))
557                 notify_remote_via_evtchn(evtchn);
558 }
559 EXPORT_SYMBOL_GPL(notify_remote_via_irq);
560
561 struct lateeoi_work {
562         struct delayed_work delayed;
563         spinlock_t eoi_list_lock;
564         struct list_head eoi_list;
565 };
566
567 static DEFINE_PER_CPU(struct lateeoi_work, lateeoi);
568
569 static void lateeoi_list_del(struct irq_info *info)
570 {
571         struct lateeoi_work *eoi = &per_cpu(lateeoi, info->eoi_cpu);
572         unsigned long flags;
573
574         spin_lock_irqsave(&eoi->eoi_list_lock, flags);
575         list_del_init(&info->eoi_list);
576         spin_unlock_irqrestore(&eoi->eoi_list_lock, flags);
577 }
578
579 static void lateeoi_list_add(struct irq_info *info)
580 {
581         struct lateeoi_work *eoi = &per_cpu(lateeoi, info->eoi_cpu);
582         struct irq_info *elem;
583         u64 now = get_jiffies_64();
584         unsigned long delay;
585         unsigned long flags;
586
587         if (now < info->eoi_time)
588                 delay = info->eoi_time - now;
589         else
590                 delay = 1;
591
592         spin_lock_irqsave(&eoi->eoi_list_lock, flags);
593
594         if (list_empty(&eoi->eoi_list)) {
595                 list_add(&info->eoi_list, &eoi->eoi_list);
596                 mod_delayed_work_on(info->eoi_cpu, system_wq,
597                                     &eoi->delayed, delay);
598         } else {
599                 list_for_each_entry_reverse(elem, &eoi->eoi_list, eoi_list) {
600                         if (elem->eoi_time <= info->eoi_time)
601                                 break;
602                 }
603                 list_add(&info->eoi_list, &elem->eoi_list);
604         }
605
606         spin_unlock_irqrestore(&eoi->eoi_list_lock, flags);
607 }
608
609 static void xen_irq_lateeoi_locked(struct irq_info *info, bool spurious)
610 {
611         evtchn_port_t evtchn;
612         unsigned int cpu;
613         unsigned int delay = 0;
614
615         evtchn = info->evtchn;
616         if (!VALID_EVTCHN(evtchn) || !list_empty(&info->eoi_list))
617                 return;
618
619         if (spurious) {
620                 struct xenbus_device *dev = info->u.interdomain;
621                 unsigned int threshold = 1;
622
623                 if (dev && dev->spurious_threshold)
624                         threshold = dev->spurious_threshold;
625
626                 if ((1 << info->spurious_cnt) < (HZ << 2)) {
627                         if (info->spurious_cnt != 0xFF)
628                                 info->spurious_cnt++;
629                 }
630                 if (info->spurious_cnt > threshold) {
631                         delay = 1 << (info->spurious_cnt - 1 - threshold);
632                         if (delay > HZ)
633                                 delay = HZ;
634                         if (!info->eoi_time)
635                                 info->eoi_cpu = smp_processor_id();
636                         info->eoi_time = get_jiffies_64() + delay;
637                         if (dev)
638                                 atomic_add(delay, &dev->jiffies_eoi_delayed);
639                 }
640                 if (dev)
641                         atomic_inc(&dev->spurious_events);
642         } else {
643                 info->spurious_cnt = 0;
644         }
645
646         cpu = info->eoi_cpu;
647         if (info->eoi_time &&
648             (info->irq_epoch == per_cpu(irq_epoch, cpu) || delay)) {
649                 lateeoi_list_add(info);
650                 return;
651         }
652
653         info->eoi_time = 0;
654
655         /* is_active hasn't been reset yet, do it now. */
656         smp_store_release(&info->is_active, 0);
657         do_unmask(info, EVT_MASK_REASON_EOI_PENDING);
658 }
659
660 static void xen_irq_lateeoi_worker(struct work_struct *work)
661 {
662         struct lateeoi_work *eoi;
663         struct irq_info *info;
664         u64 now = get_jiffies_64();
665         unsigned long flags;
666
667         eoi = container_of(to_delayed_work(work), struct lateeoi_work, delayed);
668
669         read_lock_irqsave(&evtchn_rwlock, flags);
670
671         while (true) {
672                 spin_lock(&eoi->eoi_list_lock);
673
674                 info = list_first_entry_or_null(&eoi->eoi_list, struct irq_info,
675                                                 eoi_list);
676
677                 if (info == NULL || now < info->eoi_time) {
678                         spin_unlock(&eoi->eoi_list_lock);
679                         break;
680                 }
681
682                 list_del_init(&info->eoi_list);
683
684                 spin_unlock(&eoi->eoi_list_lock);
685
686                 info->eoi_time = 0;
687
688                 xen_irq_lateeoi_locked(info, false);
689         }
690
691         if (info)
692                 mod_delayed_work_on(info->eoi_cpu, system_wq,
693                                     &eoi->delayed, info->eoi_time - now);
694
695         read_unlock_irqrestore(&evtchn_rwlock, flags);
696 }
697
698 static void xen_cpu_init_eoi(unsigned int cpu)
699 {
700         struct lateeoi_work *eoi = &per_cpu(lateeoi, cpu);
701
702         INIT_DELAYED_WORK(&eoi->delayed, xen_irq_lateeoi_worker);
703         spin_lock_init(&eoi->eoi_list_lock);
704         INIT_LIST_HEAD(&eoi->eoi_list);
705 }
706
707 void xen_irq_lateeoi(unsigned int irq, unsigned int eoi_flags)
708 {
709         struct irq_info *info;
710         unsigned long flags;
711
712         read_lock_irqsave(&evtchn_rwlock, flags);
713
714         info = info_for_irq(irq);
715
716         if (info)
717                 xen_irq_lateeoi_locked(info, eoi_flags & XEN_EOI_FLAG_SPURIOUS);
718
719         read_unlock_irqrestore(&evtchn_rwlock, flags);
720 }
721 EXPORT_SYMBOL_GPL(xen_irq_lateeoi);
722
723 static void xen_irq_init(unsigned irq)
724 {
725         struct irq_info *info;
726
727         info = kzalloc(sizeof(*info), GFP_KERNEL);
728         if (info == NULL)
729                 panic("Unable to allocate metadata for IRQ%d\n", irq);
730
731         info->type = IRQT_UNBOUND;
732         info->refcnt = -1;
733
734         set_info_for_irq(irq, info);
735         /*
736          * Interrupt affinity setting can be immediate. No point
737          * in delaying it until an interrupt is handled.
738          */
739         irq_set_status_flags(irq, IRQ_MOVE_PCNTXT);
740
741         INIT_LIST_HEAD(&info->eoi_list);
742         list_add_tail(&info->list, &xen_irq_list_head);
743 }
744
745 static int __must_check xen_allocate_irqs_dynamic(int nvec)
746 {
747         int i, irq = irq_alloc_descs(-1, 0, nvec, -1);
748
749         if (irq >= 0) {
750                 for (i = 0; i < nvec; i++)
751                         xen_irq_init(irq + i);
752         }
753
754         return irq;
755 }
756
757 static inline int __must_check xen_allocate_irq_dynamic(void)
758 {
759
760         return xen_allocate_irqs_dynamic(1);
761 }
762
763 static int __must_check xen_allocate_irq_gsi(unsigned gsi)
764 {
765         int irq;
766
767         /*
768          * A PV guest has no concept of a GSI (since it has no ACPI
769          * nor access to/knowledge of the physical APICs). Therefore
770          * all IRQs are dynamically allocated from the entire IRQ
771          * space.
772          */
773         if (xen_pv_domain() && !xen_initial_domain())
774                 return xen_allocate_irq_dynamic();
775
776         /* Legacy IRQ descriptors are already allocated by the arch. */
777         if (gsi < nr_legacy_irqs())
778                 irq = gsi;
779         else
780                 irq = irq_alloc_desc_at(gsi, -1);
781
782         xen_irq_init(irq);
783
784         return irq;
785 }
786
787 static void xen_free_irq(unsigned irq)
788 {
789         struct irq_info *info = info_for_irq(irq);
790         unsigned long flags;
791
792         if (WARN_ON(!info))
793                 return;
794
795         write_lock_irqsave(&evtchn_rwlock, flags);
796
797         if (!list_empty(&info->eoi_list))
798                 lateeoi_list_del(info);
799
800         list_del(&info->list);
801
802         set_info_for_irq(irq, NULL);
803
804         WARN_ON(info->refcnt > 0);
805
806         write_unlock_irqrestore(&evtchn_rwlock, flags);
807
808         kfree(info);
809
810         /* Legacy IRQ descriptors are managed by the arch. */
811         if (irq < nr_legacy_irqs())
812                 return;
813
814         irq_free_desc(irq);
815 }
816
817 static void xen_evtchn_close(evtchn_port_t port)
818 {
819         struct evtchn_close close;
820
821         close.port = port;
822         if (HYPERVISOR_event_channel_op(EVTCHNOP_close, &close) != 0)
823                 BUG();
824 }
825
826 /* Not called for lateeoi events. */
827 static void event_handler_exit(struct irq_info *info)
828 {
829         smp_store_release(&info->is_active, 0);
830         clear_evtchn(info->evtchn);
831 }
832
833 static void pirq_query_unmask(int irq)
834 {
835         struct physdev_irq_status_query irq_status;
836         struct irq_info *info = info_for_irq(irq);
837
838         BUG_ON(info->type != IRQT_PIRQ);
839
840         irq_status.irq = pirq_from_irq(irq);
841         if (HYPERVISOR_physdev_op(PHYSDEVOP_irq_status_query, &irq_status))
842                 irq_status.flags = 0;
843
844         info->u.pirq.flags &= ~PIRQ_NEEDS_EOI;
845         if (irq_status.flags & XENIRQSTAT_needs_eoi)
846                 info->u.pirq.flags |= PIRQ_NEEDS_EOI;
847 }
848
849 static void eoi_pirq(struct irq_data *data)
850 {
851         struct irq_info *info = info_for_irq(data->irq);
852         evtchn_port_t evtchn = info ? info->evtchn : 0;
853         struct physdev_eoi eoi = { .irq = pirq_from_irq(data->irq) };
854         int rc = 0;
855
856         if (!VALID_EVTCHN(evtchn))
857                 return;
858
859         event_handler_exit(info);
860
861         if (pirq_needs_eoi(data->irq)) {
862                 rc = HYPERVISOR_physdev_op(PHYSDEVOP_eoi, &eoi);
863                 WARN_ON(rc);
864         }
865 }
866
867 static void mask_ack_pirq(struct irq_data *data)
868 {
869         disable_dynirq(data);
870         eoi_pirq(data);
871 }
872
873 static unsigned int __startup_pirq(unsigned int irq)
874 {
875         struct evtchn_bind_pirq bind_pirq;
876         struct irq_info *info = info_for_irq(irq);
877         evtchn_port_t evtchn = evtchn_from_irq(irq);
878         int rc;
879
880         BUG_ON(info->type != IRQT_PIRQ);
881
882         if (VALID_EVTCHN(evtchn))
883                 goto out;
884
885         bind_pirq.pirq = pirq_from_irq(irq);
886         /* NB. We are happy to share unless we are probing. */
887         bind_pirq.flags = info->u.pirq.flags & PIRQ_SHAREABLE ?
888                                         BIND_PIRQ__WILL_SHARE : 0;
889         rc = HYPERVISOR_event_channel_op(EVTCHNOP_bind_pirq, &bind_pirq);
890         if (rc != 0) {
891                 pr_warn("Failed to obtain physical IRQ %d\n", irq);
892                 return 0;
893         }
894         evtchn = bind_pirq.port;
895
896         pirq_query_unmask(irq);
897
898         rc = set_evtchn_to_irq(evtchn, irq);
899         if (rc)
900                 goto err;
901
902         info->evtchn = evtchn;
903         bind_evtchn_to_cpu(evtchn, 0, false);
904
905         rc = xen_evtchn_port_setup(evtchn);
906         if (rc)
907                 goto err;
908
909 out:
910         do_unmask(info, EVT_MASK_REASON_EXPLICIT);
911
912         eoi_pirq(irq_get_irq_data(irq));
913
914         return 0;
915
916 err:
917         pr_err("irq%d: Failed to set port to irq mapping (%d)\n", irq, rc);
918         xen_evtchn_close(evtchn);
919         return 0;
920 }
921
922 static unsigned int startup_pirq(struct irq_data *data)
923 {
924         return __startup_pirq(data->irq);
925 }
926
927 static void shutdown_pirq(struct irq_data *data)
928 {
929         unsigned int irq = data->irq;
930         struct irq_info *info = info_for_irq(irq);
931         evtchn_port_t evtchn = evtchn_from_irq(irq);
932
933         BUG_ON(info->type != IRQT_PIRQ);
934
935         if (!VALID_EVTCHN(evtchn))
936                 return;
937
938         do_mask(info, EVT_MASK_REASON_EXPLICIT);
939         xen_evtchn_close(evtchn);
940         xen_irq_info_cleanup(info);
941 }
942
943 static void enable_pirq(struct irq_data *data)
944 {
945         enable_dynirq(data);
946 }
947
948 static void disable_pirq(struct irq_data *data)
949 {
950         disable_dynirq(data);
951 }
952
953 int xen_irq_from_gsi(unsigned gsi)
954 {
955         struct irq_info *info;
956
957         list_for_each_entry(info, &xen_irq_list_head, list) {
958                 if (info->type != IRQT_PIRQ)
959                         continue;
960
961                 if (info->u.pirq.gsi == gsi)
962                         return info->irq;
963         }
964
965         return -1;
966 }
967 EXPORT_SYMBOL_GPL(xen_irq_from_gsi);
968
969 static void __unbind_from_irq(unsigned int irq)
970 {
971         evtchn_port_t evtchn = evtchn_from_irq(irq);
972         struct irq_info *info = info_for_irq(irq);
973
974         if (info->refcnt > 0) {
975                 info->refcnt--;
976                 if (info->refcnt != 0)
977                         return;
978         }
979
980         if (VALID_EVTCHN(evtchn)) {
981                 unsigned int cpu = cpu_from_irq(irq);
982                 struct xenbus_device *dev;
983
984                 xen_evtchn_close(evtchn);
985
986                 switch (type_from_irq(irq)) {
987                 case IRQT_VIRQ:
988                         per_cpu(virq_to_irq, cpu)[virq_from_irq(irq)] = -1;
989                         break;
990                 case IRQT_IPI:
991                         per_cpu(ipi_to_irq, cpu)[ipi_from_irq(irq)] = -1;
992                         break;
993                 case IRQT_EVTCHN:
994                         dev = info->u.interdomain;
995                         if (dev)
996                                 atomic_dec(&dev->event_channels);
997                         break;
998                 default:
999                         break;
1000                 }
1001
1002                 xen_irq_info_cleanup(info);
1003         }
1004
1005         xen_free_irq(irq);
1006 }
1007
1008 /*
1009  * Do not make any assumptions regarding the relationship between the
1010  * IRQ number returned here and the Xen pirq argument.
1011  *
1012  * Note: We don't assign an event channel until the irq actually started
1013  * up.  Return an existing irq if we've already got one for the gsi.
1014  *
1015  * Shareable implies level triggered, not shareable implies edge
1016  * triggered here.
1017  */
1018 int xen_bind_pirq_gsi_to_irq(unsigned gsi,
1019                              unsigned pirq, int shareable, char *name)
1020 {
1021         int irq;
1022         struct physdev_irq irq_op;
1023         int ret;
1024
1025         mutex_lock(&irq_mapping_update_lock);
1026
1027         irq = xen_irq_from_gsi(gsi);
1028         if (irq != -1) {
1029                 pr_info("%s: returning irq %d for gsi %u\n",
1030                         __func__, irq, gsi);
1031                 goto out;
1032         }
1033
1034         irq = xen_allocate_irq_gsi(gsi);
1035         if (irq < 0)
1036                 goto out;
1037
1038         irq_op.irq = irq;
1039         irq_op.vector = 0;
1040
1041         /* Only the privileged domain can do this. For non-priv, the pcifront
1042          * driver provides a PCI bus that does the call to do exactly
1043          * this in the priv domain. */
1044         if (xen_initial_domain() &&
1045             HYPERVISOR_physdev_op(PHYSDEVOP_alloc_irq_vector, &irq_op)) {
1046                 xen_free_irq(irq);
1047                 irq = -ENOSPC;
1048                 goto out;
1049         }
1050
1051         ret = xen_irq_info_pirq_setup(irq, 0, pirq, gsi, DOMID_SELF,
1052                                shareable ? PIRQ_SHAREABLE : 0);
1053         if (ret < 0) {
1054                 __unbind_from_irq(irq);
1055                 irq = ret;
1056                 goto out;
1057         }
1058
1059         pirq_query_unmask(irq);
1060         /* We try to use the handler with the appropriate semantic for the
1061          * type of interrupt: if the interrupt is an edge triggered
1062          * interrupt we use handle_edge_irq.
1063          *
1064          * On the other hand if the interrupt is level triggered we use
1065          * handle_fasteoi_irq like the native code does for this kind of
1066          * interrupts.
1067          *
1068          * Depending on the Xen version, pirq_needs_eoi might return true
1069          * not only for level triggered interrupts but for edge triggered
1070          * interrupts too. In any case Xen always honors the eoi mechanism,
1071          * not injecting any more pirqs of the same kind if the first one
1072          * hasn't received an eoi yet. Therefore using the fasteoi handler
1073          * is the right choice either way.
1074          */
1075         if (shareable)
1076                 irq_set_chip_and_handler_name(irq, &xen_pirq_chip,
1077                                 handle_fasteoi_irq, name);
1078         else
1079                 irq_set_chip_and_handler_name(irq, &xen_pirq_chip,
1080                                 handle_edge_irq, name);
1081
1082 out:
1083         mutex_unlock(&irq_mapping_update_lock);
1084
1085         return irq;
1086 }
1087
1088 #ifdef CONFIG_PCI_MSI
1089 int xen_allocate_pirq_msi(struct pci_dev *dev, struct msi_desc *msidesc)
1090 {
1091         int rc;
1092         struct physdev_get_free_pirq op_get_free_pirq;
1093
1094         op_get_free_pirq.type = MAP_PIRQ_TYPE_MSI;
1095         rc = HYPERVISOR_physdev_op(PHYSDEVOP_get_free_pirq, &op_get_free_pirq);
1096
1097         WARN_ONCE(rc == -ENOSYS,
1098                   "hypervisor does not support the PHYSDEVOP_get_free_pirq interface\n");
1099
1100         return rc ? -1 : op_get_free_pirq.pirq;
1101 }
1102
1103 int xen_bind_pirq_msi_to_irq(struct pci_dev *dev, struct msi_desc *msidesc,
1104                              int pirq, int nvec, const char *name, domid_t domid)
1105 {
1106         int i, irq, ret;
1107
1108         mutex_lock(&irq_mapping_update_lock);
1109
1110         irq = xen_allocate_irqs_dynamic(nvec);
1111         if (irq < 0)
1112                 goto out;
1113
1114         for (i = 0; i < nvec; i++) {
1115                 irq_set_chip_and_handler_name(irq + i, &xen_pirq_chip, handle_edge_irq, name);
1116
1117                 ret = xen_irq_info_pirq_setup(irq + i, 0, pirq + i, 0, domid,
1118                                               i == 0 ? 0 : PIRQ_MSI_GROUP);
1119                 if (ret < 0)
1120                         goto error_irq;
1121         }
1122
1123         ret = irq_set_msi_desc(irq, msidesc);
1124         if (ret < 0)
1125                 goto error_irq;
1126 out:
1127         mutex_unlock(&irq_mapping_update_lock);
1128         return irq;
1129 error_irq:
1130         while (nvec--)
1131                 __unbind_from_irq(irq + nvec);
1132         mutex_unlock(&irq_mapping_update_lock);
1133         return ret;
1134 }
1135 #endif
1136
1137 int xen_destroy_irq(int irq)
1138 {
1139         struct physdev_unmap_pirq unmap_irq;
1140         struct irq_info *info = info_for_irq(irq);
1141         int rc = -ENOENT;
1142
1143         mutex_lock(&irq_mapping_update_lock);
1144
1145         /*
1146          * If trying to remove a vector in a MSI group different
1147          * than the first one skip the PIRQ unmap unless this vector
1148          * is the first one in the group.
1149          */
1150         if (xen_initial_domain() && !(info->u.pirq.flags & PIRQ_MSI_GROUP)) {
1151                 unmap_irq.pirq = info->u.pirq.pirq;
1152                 unmap_irq.domid = info->u.pirq.domid;
1153                 rc = HYPERVISOR_physdev_op(PHYSDEVOP_unmap_pirq, &unmap_irq);
1154                 /* If another domain quits without making the pci_disable_msix
1155                  * call, the Xen hypervisor takes care of freeing the PIRQs
1156                  * (free_domain_pirqs).
1157                  */
1158                 if ((rc == -ESRCH && info->u.pirq.domid != DOMID_SELF))
1159                         pr_info("domain %d does not have %d anymore\n",
1160                                 info->u.pirq.domid, info->u.pirq.pirq);
1161                 else if (rc) {
1162                         pr_warn("unmap irq failed %d\n", rc);
1163                         goto out;
1164                 }
1165         }
1166
1167         xen_free_irq(irq);
1168
1169 out:
1170         mutex_unlock(&irq_mapping_update_lock);
1171         return rc;
1172 }
1173
1174 int xen_irq_from_pirq(unsigned pirq)
1175 {
1176         int irq;
1177
1178         struct irq_info *info;
1179
1180         mutex_lock(&irq_mapping_update_lock);
1181
1182         list_for_each_entry(info, &xen_irq_list_head, list) {
1183                 if (info->type != IRQT_PIRQ)
1184                         continue;
1185                 irq = info->irq;
1186                 if (info->u.pirq.pirq == pirq)
1187                         goto out;
1188         }
1189         irq = -1;
1190 out:
1191         mutex_unlock(&irq_mapping_update_lock);
1192
1193         return irq;
1194 }
1195
1196
1197 int xen_pirq_from_irq(unsigned irq)
1198 {
1199         return pirq_from_irq(irq);
1200 }
1201 EXPORT_SYMBOL_GPL(xen_pirq_from_irq);
1202
1203 static int bind_evtchn_to_irq_chip(evtchn_port_t evtchn, struct irq_chip *chip,
1204                                    struct xenbus_device *dev)
1205 {
1206         int irq;
1207         int ret;
1208
1209         if (evtchn >= xen_evtchn_max_channels())
1210                 return -ENOMEM;
1211
1212         mutex_lock(&irq_mapping_update_lock);
1213
1214         irq = get_evtchn_to_irq(evtchn);
1215
1216         if (irq == -1) {
1217                 irq = xen_allocate_irq_dynamic();
1218                 if (irq < 0)
1219                         goto out;
1220
1221                 irq_set_chip_and_handler_name(irq, chip,
1222                                               handle_edge_irq, "event");
1223
1224                 ret = xen_irq_info_evtchn_setup(irq, evtchn, dev);
1225                 if (ret < 0) {
1226                         __unbind_from_irq(irq);
1227                         irq = ret;
1228                         goto out;
1229                 }
1230                 /*
1231                  * New interdomain events are initially bound to vCPU0 This
1232                  * is required to setup the event channel in the first
1233                  * place and also important for UP guests because the
1234                  * affinity setting is not invoked on them so nothing would
1235                  * bind the channel.
1236                  */
1237                 bind_evtchn_to_cpu(evtchn, 0, false);
1238         } else {
1239                 struct irq_info *info = info_for_irq(irq);
1240                 WARN_ON(info == NULL || info->type != IRQT_EVTCHN);
1241         }
1242
1243 out:
1244         mutex_unlock(&irq_mapping_update_lock);
1245
1246         return irq;
1247 }
1248
1249 int bind_evtchn_to_irq(evtchn_port_t evtchn)
1250 {
1251         return bind_evtchn_to_irq_chip(evtchn, &xen_dynamic_chip, NULL);
1252 }
1253 EXPORT_SYMBOL_GPL(bind_evtchn_to_irq);
1254
1255 int bind_evtchn_to_irq_lateeoi(evtchn_port_t evtchn)
1256 {
1257         return bind_evtchn_to_irq_chip(evtchn, &xen_lateeoi_chip, NULL);
1258 }
1259 EXPORT_SYMBOL_GPL(bind_evtchn_to_irq_lateeoi);
1260
1261 static int bind_ipi_to_irq(unsigned int ipi, unsigned int cpu)
1262 {
1263         struct evtchn_bind_ipi bind_ipi;
1264         evtchn_port_t evtchn;
1265         int ret, irq;
1266
1267         mutex_lock(&irq_mapping_update_lock);
1268
1269         irq = per_cpu(ipi_to_irq, cpu)[ipi];
1270
1271         if (irq == -1) {
1272                 irq = xen_allocate_irq_dynamic();
1273                 if (irq < 0)
1274                         goto out;
1275
1276                 irq_set_chip_and_handler_name(irq, &xen_percpu_chip,
1277                                               handle_percpu_irq, "ipi");
1278
1279                 bind_ipi.vcpu = xen_vcpu_nr(cpu);
1280                 if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_ipi,
1281                                                 &bind_ipi) != 0)
1282                         BUG();
1283                 evtchn = bind_ipi.port;
1284
1285                 ret = xen_irq_info_ipi_setup(cpu, irq, evtchn, ipi);
1286                 if (ret < 0) {
1287                         __unbind_from_irq(irq);
1288                         irq = ret;
1289                         goto out;
1290                 }
1291                 /*
1292                  * Force the affinity mask to the target CPU so proc shows
1293                  * the correct target.
1294                  */
1295                 bind_evtchn_to_cpu(evtchn, cpu, true);
1296         } else {
1297                 struct irq_info *info = info_for_irq(irq);
1298                 WARN_ON(info == NULL || info->type != IRQT_IPI);
1299         }
1300
1301  out:
1302         mutex_unlock(&irq_mapping_update_lock);
1303         return irq;
1304 }
1305
1306 static int bind_interdomain_evtchn_to_irq_chip(struct xenbus_device *dev,
1307                                                evtchn_port_t remote_port,
1308                                                struct irq_chip *chip)
1309 {
1310         struct evtchn_bind_interdomain bind_interdomain;
1311         int err;
1312
1313         bind_interdomain.remote_dom  = dev->otherend_id;
1314         bind_interdomain.remote_port = remote_port;
1315
1316         err = HYPERVISOR_event_channel_op(EVTCHNOP_bind_interdomain,
1317                                           &bind_interdomain);
1318
1319         return err ? : bind_evtchn_to_irq_chip(bind_interdomain.local_port,
1320                                                chip, dev);
1321 }
1322
1323 int bind_interdomain_evtchn_to_irq_lateeoi(struct xenbus_device *dev,
1324                                            evtchn_port_t remote_port)
1325 {
1326         return bind_interdomain_evtchn_to_irq_chip(dev, remote_port,
1327                                                    &xen_lateeoi_chip);
1328 }
1329 EXPORT_SYMBOL_GPL(bind_interdomain_evtchn_to_irq_lateeoi);
1330
1331 static int find_virq(unsigned int virq, unsigned int cpu, evtchn_port_t *evtchn)
1332 {
1333         struct evtchn_status status;
1334         evtchn_port_t port;
1335         int rc = -ENOENT;
1336
1337         memset(&status, 0, sizeof(status));
1338         for (port = 0; port < xen_evtchn_max_channels(); port++) {
1339                 status.dom = DOMID_SELF;
1340                 status.port = port;
1341                 rc = HYPERVISOR_event_channel_op(EVTCHNOP_status, &status);
1342                 if (rc < 0)
1343                         continue;
1344                 if (status.status != EVTCHNSTAT_virq)
1345                         continue;
1346                 if (status.u.virq == virq && status.vcpu == xen_vcpu_nr(cpu)) {
1347                         *evtchn = port;
1348                         break;
1349                 }
1350         }
1351         return rc;
1352 }
1353
1354 /**
1355  * xen_evtchn_nr_channels - number of usable event channel ports
1356  *
1357  * This may be less than the maximum supported by the current
1358  * hypervisor ABI. Use xen_evtchn_max_channels() for the maximum
1359  * supported.
1360  */
1361 unsigned xen_evtchn_nr_channels(void)
1362 {
1363         return evtchn_ops->nr_channels();
1364 }
1365 EXPORT_SYMBOL_GPL(xen_evtchn_nr_channels);
1366
1367 int bind_virq_to_irq(unsigned int virq, unsigned int cpu, bool percpu)
1368 {
1369         struct evtchn_bind_virq bind_virq;
1370         evtchn_port_t evtchn = 0;
1371         int irq, ret;
1372
1373         mutex_lock(&irq_mapping_update_lock);
1374
1375         irq = per_cpu(virq_to_irq, cpu)[virq];
1376
1377         if (irq == -1) {
1378                 irq = xen_allocate_irq_dynamic();
1379                 if (irq < 0)
1380                         goto out;
1381
1382                 if (percpu)
1383                         irq_set_chip_and_handler_name(irq, &xen_percpu_chip,
1384                                                       handle_percpu_irq, "virq");
1385                 else
1386                         irq_set_chip_and_handler_name(irq, &xen_dynamic_chip,
1387                                                       handle_edge_irq, "virq");
1388
1389                 bind_virq.virq = virq;
1390                 bind_virq.vcpu = xen_vcpu_nr(cpu);
1391                 ret = HYPERVISOR_event_channel_op(EVTCHNOP_bind_virq,
1392                                                 &bind_virq);
1393                 if (ret == 0)
1394                         evtchn = bind_virq.port;
1395                 else {
1396                         if (ret == -EEXIST)
1397                                 ret = find_virq(virq, cpu, &evtchn);
1398                         BUG_ON(ret < 0);
1399                 }
1400
1401                 ret = xen_irq_info_virq_setup(cpu, irq, evtchn, virq);
1402                 if (ret < 0) {
1403                         __unbind_from_irq(irq);
1404                         irq = ret;
1405                         goto out;
1406                 }
1407
1408                 /*
1409                  * Force the affinity mask for percpu interrupts so proc
1410                  * shows the correct target.
1411                  */
1412                 bind_evtchn_to_cpu(evtchn, cpu, percpu);
1413         } else {
1414                 struct irq_info *info = info_for_irq(irq);
1415                 WARN_ON(info == NULL || info->type != IRQT_VIRQ);
1416         }
1417
1418 out:
1419         mutex_unlock(&irq_mapping_update_lock);
1420
1421         return irq;
1422 }
1423
1424 static void unbind_from_irq(unsigned int irq)
1425 {
1426         mutex_lock(&irq_mapping_update_lock);
1427         __unbind_from_irq(irq);
1428         mutex_unlock(&irq_mapping_update_lock);
1429 }
1430
1431 static int bind_evtchn_to_irqhandler_chip(evtchn_port_t evtchn,
1432                                           irq_handler_t handler,
1433                                           unsigned long irqflags,
1434                                           const char *devname, void *dev_id,
1435                                           struct irq_chip *chip)
1436 {
1437         int irq, retval;
1438
1439         irq = bind_evtchn_to_irq_chip(evtchn, chip, NULL);
1440         if (irq < 0)
1441                 return irq;
1442         retval = request_irq(irq, handler, irqflags, devname, dev_id);
1443         if (retval != 0) {
1444                 unbind_from_irq(irq);
1445                 return retval;
1446         }
1447
1448         return irq;
1449 }
1450
1451 int bind_evtchn_to_irqhandler(evtchn_port_t evtchn,
1452                               irq_handler_t handler,
1453                               unsigned long irqflags,
1454                               const char *devname, void *dev_id)
1455 {
1456         return bind_evtchn_to_irqhandler_chip(evtchn, handler, irqflags,
1457                                               devname, dev_id,
1458                                               &xen_dynamic_chip);
1459 }
1460 EXPORT_SYMBOL_GPL(bind_evtchn_to_irqhandler);
1461
1462 int bind_evtchn_to_irqhandler_lateeoi(evtchn_port_t evtchn,
1463                                       irq_handler_t handler,
1464                                       unsigned long irqflags,
1465                                       const char *devname, void *dev_id)
1466 {
1467         return bind_evtchn_to_irqhandler_chip(evtchn, handler, irqflags,
1468                                               devname, dev_id,
1469                                               &xen_lateeoi_chip);
1470 }
1471 EXPORT_SYMBOL_GPL(bind_evtchn_to_irqhandler_lateeoi);
1472
1473 static int bind_interdomain_evtchn_to_irqhandler_chip(
1474                 struct xenbus_device *dev, evtchn_port_t remote_port,
1475                 irq_handler_t handler, unsigned long irqflags,
1476                 const char *devname, void *dev_id, struct irq_chip *chip)
1477 {
1478         int irq, retval;
1479
1480         irq = bind_interdomain_evtchn_to_irq_chip(dev, remote_port, chip);
1481         if (irq < 0)
1482                 return irq;
1483
1484         retval = request_irq(irq, handler, irqflags, devname, dev_id);
1485         if (retval != 0) {
1486                 unbind_from_irq(irq);
1487                 return retval;
1488         }
1489
1490         return irq;
1491 }
1492
1493 int bind_interdomain_evtchn_to_irqhandler_lateeoi(struct xenbus_device *dev,
1494                                                   evtchn_port_t remote_port,
1495                                                   irq_handler_t handler,
1496                                                   unsigned long irqflags,
1497                                                   const char *devname,
1498                                                   void *dev_id)
1499 {
1500         return bind_interdomain_evtchn_to_irqhandler_chip(dev,
1501                                 remote_port, handler, irqflags, devname,
1502                                 dev_id, &xen_lateeoi_chip);
1503 }
1504 EXPORT_SYMBOL_GPL(bind_interdomain_evtchn_to_irqhandler_lateeoi);
1505
1506 int bind_virq_to_irqhandler(unsigned int virq, unsigned int cpu,
1507                             irq_handler_t handler,
1508                             unsigned long irqflags, const char *devname, void *dev_id)
1509 {
1510         int irq, retval;
1511
1512         irq = bind_virq_to_irq(virq, cpu, irqflags & IRQF_PERCPU);
1513         if (irq < 0)
1514                 return irq;
1515         retval = request_irq(irq, handler, irqflags, devname, dev_id);
1516         if (retval != 0) {
1517                 unbind_from_irq(irq);
1518                 return retval;
1519         }
1520
1521         return irq;
1522 }
1523 EXPORT_SYMBOL_GPL(bind_virq_to_irqhandler);
1524
1525 int bind_ipi_to_irqhandler(enum ipi_vector ipi,
1526                            unsigned int cpu,
1527                            irq_handler_t handler,
1528                            unsigned long irqflags,
1529                            const char *devname,
1530                            void *dev_id)
1531 {
1532         int irq, retval;
1533
1534         irq = bind_ipi_to_irq(ipi, cpu);
1535         if (irq < 0)
1536                 return irq;
1537
1538         irqflags |= IRQF_NO_SUSPEND | IRQF_FORCE_RESUME | IRQF_EARLY_RESUME;
1539         retval = request_irq(irq, handler, irqflags, devname, dev_id);
1540         if (retval != 0) {
1541                 unbind_from_irq(irq);
1542                 return retval;
1543         }
1544
1545         return irq;
1546 }
1547
1548 void unbind_from_irqhandler(unsigned int irq, void *dev_id)
1549 {
1550         struct irq_info *info = info_for_irq(irq);
1551
1552         if (WARN_ON(!info))
1553                 return;
1554         free_irq(irq, dev_id);
1555         unbind_from_irq(irq);
1556 }
1557 EXPORT_SYMBOL_GPL(unbind_from_irqhandler);
1558
1559 /**
1560  * xen_set_irq_priority() - set an event channel priority.
1561  * @irq:irq bound to an event channel.
1562  * @priority: priority between XEN_IRQ_PRIORITY_MAX and XEN_IRQ_PRIORITY_MIN.
1563  */
1564 int xen_set_irq_priority(unsigned irq, unsigned priority)
1565 {
1566         struct evtchn_set_priority set_priority;
1567
1568         set_priority.port = evtchn_from_irq(irq);
1569         set_priority.priority = priority;
1570
1571         return HYPERVISOR_event_channel_op(EVTCHNOP_set_priority,
1572                                            &set_priority);
1573 }
1574 EXPORT_SYMBOL_GPL(xen_set_irq_priority);
1575
1576 int evtchn_make_refcounted(evtchn_port_t evtchn)
1577 {
1578         int irq = get_evtchn_to_irq(evtchn);
1579         struct irq_info *info;
1580
1581         if (irq == -1)
1582                 return -ENOENT;
1583
1584         info = info_for_irq(irq);
1585
1586         if (!info)
1587                 return -ENOENT;
1588
1589         WARN_ON(info->refcnt != -1);
1590
1591         info->refcnt = 1;
1592
1593         return 0;
1594 }
1595 EXPORT_SYMBOL_GPL(evtchn_make_refcounted);
1596
1597 int evtchn_get(evtchn_port_t evtchn)
1598 {
1599         int irq;
1600         struct irq_info *info;
1601         int err = -ENOENT;
1602
1603         if (evtchn >= xen_evtchn_max_channels())
1604                 return -EINVAL;
1605
1606         mutex_lock(&irq_mapping_update_lock);
1607
1608         irq = get_evtchn_to_irq(evtchn);
1609         if (irq == -1)
1610                 goto done;
1611
1612         info = info_for_irq(irq);
1613
1614         if (!info)
1615                 goto done;
1616
1617         err = -EINVAL;
1618         if (info->refcnt <= 0 || info->refcnt == SHRT_MAX)
1619                 goto done;
1620
1621         info->refcnt++;
1622         err = 0;
1623  done:
1624         mutex_unlock(&irq_mapping_update_lock);
1625
1626         return err;
1627 }
1628 EXPORT_SYMBOL_GPL(evtchn_get);
1629
1630 void evtchn_put(evtchn_port_t evtchn)
1631 {
1632         int irq = get_evtchn_to_irq(evtchn);
1633         if (WARN_ON(irq == -1))
1634                 return;
1635         unbind_from_irq(irq);
1636 }
1637 EXPORT_SYMBOL_GPL(evtchn_put);
1638
1639 void xen_send_IPI_one(unsigned int cpu, enum ipi_vector vector)
1640 {
1641         int irq;
1642
1643 #ifdef CONFIG_X86
1644         if (unlikely(vector == XEN_NMI_VECTOR)) {
1645                 int rc =  HYPERVISOR_vcpu_op(VCPUOP_send_nmi, xen_vcpu_nr(cpu),
1646                                              NULL);
1647                 if (rc < 0)
1648                         printk(KERN_WARNING "Sending nmi to CPU%d failed (rc:%d)\n", cpu, rc);
1649                 return;
1650         }
1651 #endif
1652         irq = per_cpu(ipi_to_irq, cpu)[vector];
1653         BUG_ON(irq < 0);
1654         notify_remote_via_irq(irq);
1655 }
1656
1657 struct evtchn_loop_ctrl {
1658         ktime_t timeout;
1659         unsigned count;
1660         bool defer_eoi;
1661 };
1662
1663 void handle_irq_for_port(evtchn_port_t port, struct evtchn_loop_ctrl *ctrl)
1664 {
1665         int irq;
1666         struct irq_info *info;
1667         struct xenbus_device *dev;
1668
1669         irq = get_evtchn_to_irq(port);
1670         if (irq == -1)
1671                 return;
1672
1673         /*
1674          * Check for timeout every 256 events.
1675          * We are setting the timeout value only after the first 256
1676          * events in order to not hurt the common case of few loop
1677          * iterations. The 256 is basically an arbitrary value.
1678          *
1679          * In case we are hitting the timeout we need to defer all further
1680          * EOIs in order to ensure to leave the event handling loop rather
1681          * sooner than later.
1682          */
1683         if (!ctrl->defer_eoi && !(++ctrl->count & 0xff)) {
1684                 ktime_t kt = ktime_get();
1685
1686                 if (!ctrl->timeout) {
1687                         kt = ktime_add_ms(kt,
1688                                           jiffies_to_msecs(event_loop_timeout));
1689                         ctrl->timeout = kt;
1690                 } else if (kt > ctrl->timeout) {
1691                         ctrl->defer_eoi = true;
1692                 }
1693         }
1694
1695         info = info_for_irq(irq);
1696         if (xchg_acquire(&info->is_active, 1))
1697                 return;
1698
1699         dev = (info->type == IRQT_EVTCHN) ? info->u.interdomain : NULL;
1700         if (dev)
1701                 atomic_inc(&dev->events);
1702
1703         if (ctrl->defer_eoi) {
1704                 info->eoi_cpu = smp_processor_id();
1705                 info->irq_epoch = __this_cpu_read(irq_epoch);
1706                 info->eoi_time = get_jiffies_64() + event_eoi_delay;
1707         }
1708
1709         generic_handle_irq(irq);
1710 }
1711
1712 static void __xen_evtchn_do_upcall(void)
1713 {
1714         struct vcpu_info *vcpu_info = __this_cpu_read(xen_vcpu);
1715         int cpu = smp_processor_id();
1716         struct evtchn_loop_ctrl ctrl = { 0 };
1717
1718         read_lock(&evtchn_rwlock);
1719
1720         do {
1721                 vcpu_info->evtchn_upcall_pending = 0;
1722
1723                 xen_evtchn_handle_events(cpu, &ctrl);
1724
1725                 BUG_ON(!irqs_disabled());
1726
1727                 virt_rmb(); /* Hypervisor can set upcall pending. */
1728
1729         } while (vcpu_info->evtchn_upcall_pending);
1730
1731         read_unlock(&evtchn_rwlock);
1732
1733         /*
1734          * Increment irq_epoch only now to defer EOIs only for
1735          * xen_irq_lateeoi() invocations occurring from inside the loop
1736          * above.
1737          */
1738         __this_cpu_inc(irq_epoch);
1739 }
1740
1741 void xen_evtchn_do_upcall(struct pt_regs *regs)
1742 {
1743         struct pt_regs *old_regs = set_irq_regs(regs);
1744
1745         irq_enter();
1746
1747         __xen_evtchn_do_upcall();
1748
1749         irq_exit();
1750         set_irq_regs(old_regs);
1751 }
1752
1753 void xen_hvm_evtchn_do_upcall(void)
1754 {
1755         __xen_evtchn_do_upcall();
1756 }
1757 EXPORT_SYMBOL_GPL(xen_hvm_evtchn_do_upcall);
1758
1759 /* Rebind a new event channel to an existing irq. */
1760 void rebind_evtchn_irq(evtchn_port_t evtchn, int irq)
1761 {
1762         struct irq_info *info = info_for_irq(irq);
1763
1764         if (WARN_ON(!info))
1765                 return;
1766
1767         /* Make sure the irq is masked, since the new event channel
1768            will also be masked. */
1769         disable_irq(irq);
1770
1771         mutex_lock(&irq_mapping_update_lock);
1772
1773         /* After resume the irq<->evtchn mappings are all cleared out */
1774         BUG_ON(get_evtchn_to_irq(evtchn) != -1);
1775         /* Expect irq to have been bound before,
1776            so there should be a proper type */
1777         BUG_ON(info->type == IRQT_UNBOUND);
1778
1779         (void)xen_irq_info_evtchn_setup(irq, evtchn, NULL);
1780
1781         mutex_unlock(&irq_mapping_update_lock);
1782
1783         bind_evtchn_to_cpu(evtchn, info->cpu, false);
1784
1785         /* Unmask the event channel. */
1786         enable_irq(irq);
1787 }
1788
1789 /* Rebind an evtchn so that it gets delivered to a specific cpu */
1790 static int xen_rebind_evtchn_to_cpu(struct irq_info *info, unsigned int tcpu)
1791 {
1792         struct evtchn_bind_vcpu bind_vcpu;
1793         evtchn_port_t evtchn = info ? info->evtchn : 0;
1794
1795         if (!VALID_EVTCHN(evtchn))
1796                 return -1;
1797
1798         if (!xen_support_evtchn_rebind())
1799                 return -1;
1800
1801         /* Send future instances of this interrupt to other vcpu. */
1802         bind_vcpu.port = evtchn;
1803         bind_vcpu.vcpu = xen_vcpu_nr(tcpu);
1804
1805         /*
1806          * Mask the event while changing the VCPU binding to prevent
1807          * it being delivered on an unexpected VCPU.
1808          */
1809         do_mask(info, EVT_MASK_REASON_TEMPORARY);
1810
1811         /*
1812          * If this fails, it usually just indicates that we're dealing with a
1813          * virq or IPI channel, which don't actually need to be rebound. Ignore
1814          * it, but don't do the xenlinux-level rebind in that case.
1815          */
1816         if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_vcpu, &bind_vcpu) >= 0)
1817                 bind_evtchn_to_cpu(evtchn, tcpu, false);
1818
1819         do_unmask(info, EVT_MASK_REASON_TEMPORARY);
1820
1821         return 0;
1822 }
1823
1824 /*
1825  * Find the CPU within @dest mask which has the least number of channels
1826  * assigned. This is not precise as the per cpu counts can be modified
1827  * concurrently.
1828  */
1829 static unsigned int select_target_cpu(const struct cpumask *dest)
1830 {
1831         unsigned int cpu, best_cpu = UINT_MAX, minch = UINT_MAX;
1832
1833         for_each_cpu_and(cpu, dest, cpu_online_mask) {
1834                 unsigned int curch = atomic_read(&channels_on_cpu[cpu]);
1835
1836                 if (curch < minch) {
1837                         minch = curch;
1838                         best_cpu = cpu;
1839                 }
1840         }
1841
1842         /*
1843          * Catch the unlikely case that dest contains no online CPUs. Can't
1844          * recurse.
1845          */
1846         if (best_cpu == UINT_MAX)
1847                 return select_target_cpu(cpu_online_mask);
1848
1849         return best_cpu;
1850 }
1851
1852 static int set_affinity_irq(struct irq_data *data, const struct cpumask *dest,
1853                             bool force)
1854 {
1855         unsigned int tcpu = select_target_cpu(dest);
1856         int ret;
1857
1858         ret = xen_rebind_evtchn_to_cpu(info_for_irq(data->irq), tcpu);
1859         if (!ret)
1860                 irq_data_update_effective_affinity(data, cpumask_of(tcpu));
1861
1862         return ret;
1863 }
1864
1865 static void enable_dynirq(struct irq_data *data)
1866 {
1867         struct irq_info *info = info_for_irq(data->irq);
1868         evtchn_port_t evtchn = info ? info->evtchn : 0;
1869
1870         if (VALID_EVTCHN(evtchn))
1871                 do_unmask(info, EVT_MASK_REASON_EXPLICIT);
1872 }
1873
1874 static void disable_dynirq(struct irq_data *data)
1875 {
1876         struct irq_info *info = info_for_irq(data->irq);
1877         evtchn_port_t evtchn = info ? info->evtchn : 0;
1878
1879         if (VALID_EVTCHN(evtchn))
1880                 do_mask(info, EVT_MASK_REASON_EXPLICIT);
1881 }
1882
1883 static void ack_dynirq(struct irq_data *data)
1884 {
1885         struct irq_info *info = info_for_irq(data->irq);
1886         evtchn_port_t evtchn = info ? info->evtchn : 0;
1887
1888         if (VALID_EVTCHN(evtchn))
1889                 event_handler_exit(info);
1890 }
1891
1892 static void mask_ack_dynirq(struct irq_data *data)
1893 {
1894         disable_dynirq(data);
1895         ack_dynirq(data);
1896 }
1897
1898 static void lateeoi_ack_dynirq(struct irq_data *data)
1899 {
1900         struct irq_info *info = info_for_irq(data->irq);
1901         evtchn_port_t evtchn = info ? info->evtchn : 0;
1902
1903         if (VALID_EVTCHN(evtchn)) {
1904                 do_mask(info, EVT_MASK_REASON_EOI_PENDING);
1905                 /*
1906                  * Don't call event_handler_exit().
1907                  * Need to keep is_active non-zero in order to ignore re-raised
1908                  * events after cpu affinity changes while a lateeoi is pending.
1909                  */
1910                 clear_evtchn(evtchn);
1911         }
1912 }
1913
1914 static void lateeoi_mask_ack_dynirq(struct irq_data *data)
1915 {
1916         struct irq_info *info = info_for_irq(data->irq);
1917         evtchn_port_t evtchn = info ? info->evtchn : 0;
1918
1919         if (VALID_EVTCHN(evtchn)) {
1920                 do_mask(info, EVT_MASK_REASON_EXPLICIT);
1921                 event_handler_exit(info);
1922         }
1923 }
1924
1925 static int retrigger_dynirq(struct irq_data *data)
1926 {
1927         struct irq_info *info = info_for_irq(data->irq);
1928         evtchn_port_t evtchn = info ? info->evtchn : 0;
1929
1930         if (!VALID_EVTCHN(evtchn))
1931                 return 0;
1932
1933         do_mask(info, EVT_MASK_REASON_TEMPORARY);
1934         set_evtchn(evtchn);
1935         do_unmask(info, EVT_MASK_REASON_TEMPORARY);
1936
1937         return 1;
1938 }
1939
1940 static void restore_pirqs(void)
1941 {
1942         int pirq, rc, irq, gsi;
1943         struct physdev_map_pirq map_irq;
1944         struct irq_info *info;
1945
1946         list_for_each_entry(info, &xen_irq_list_head, list) {
1947                 if (info->type != IRQT_PIRQ)
1948                         continue;
1949
1950                 pirq = info->u.pirq.pirq;
1951                 gsi = info->u.pirq.gsi;
1952                 irq = info->irq;
1953
1954                 /* save/restore of PT devices doesn't work, so at this point the
1955                  * only devices present are GSI based emulated devices */
1956                 if (!gsi)
1957                         continue;
1958
1959                 map_irq.domid = DOMID_SELF;
1960                 map_irq.type = MAP_PIRQ_TYPE_GSI;
1961                 map_irq.index = gsi;
1962                 map_irq.pirq = pirq;
1963
1964                 rc = HYPERVISOR_physdev_op(PHYSDEVOP_map_pirq, &map_irq);
1965                 if (rc) {
1966                         pr_warn("xen map irq failed gsi=%d irq=%d pirq=%d rc=%d\n",
1967                                 gsi, irq, pirq, rc);
1968                         xen_free_irq(irq);
1969                         continue;
1970                 }
1971
1972                 printk(KERN_DEBUG "xen: --> irq=%d, pirq=%d\n", irq, map_irq.pirq);
1973
1974                 __startup_pirq(irq);
1975         }
1976 }
1977
1978 static void restore_cpu_virqs(unsigned int cpu)
1979 {
1980         struct evtchn_bind_virq bind_virq;
1981         evtchn_port_t evtchn;
1982         int virq, irq;
1983
1984         for (virq = 0; virq < NR_VIRQS; virq++) {
1985                 if ((irq = per_cpu(virq_to_irq, cpu)[virq]) == -1)
1986                         continue;
1987
1988                 BUG_ON(virq_from_irq(irq) != virq);
1989
1990                 /* Get a new binding from Xen. */
1991                 bind_virq.virq = virq;
1992                 bind_virq.vcpu = xen_vcpu_nr(cpu);
1993                 if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_virq,
1994                                                 &bind_virq) != 0)
1995                         BUG();
1996                 evtchn = bind_virq.port;
1997
1998                 /* Record the new mapping. */
1999                 (void)xen_irq_info_virq_setup(cpu, irq, evtchn, virq);
2000                 /* The affinity mask is still valid */
2001                 bind_evtchn_to_cpu(evtchn, cpu, false);
2002         }
2003 }
2004
2005 static void restore_cpu_ipis(unsigned int cpu)
2006 {
2007         struct evtchn_bind_ipi bind_ipi;
2008         evtchn_port_t evtchn;
2009         int ipi, irq;
2010
2011         for (ipi = 0; ipi < XEN_NR_IPIS; ipi++) {
2012                 if ((irq = per_cpu(ipi_to_irq, cpu)[ipi]) == -1)
2013                         continue;
2014
2015                 BUG_ON(ipi_from_irq(irq) != ipi);
2016
2017                 /* Get a new binding from Xen. */
2018                 bind_ipi.vcpu = xen_vcpu_nr(cpu);
2019                 if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_ipi,
2020                                                 &bind_ipi) != 0)
2021                         BUG();
2022                 evtchn = bind_ipi.port;
2023
2024                 /* Record the new mapping. */
2025                 (void)xen_irq_info_ipi_setup(cpu, irq, evtchn, ipi);
2026                 /* The affinity mask is still valid */
2027                 bind_evtchn_to_cpu(evtchn, cpu, false);
2028         }
2029 }
2030
2031 /* Clear an irq's pending state, in preparation for polling on it */
2032 void xen_clear_irq_pending(int irq)
2033 {
2034         struct irq_info *info = info_for_irq(irq);
2035         evtchn_port_t evtchn = info ? info->evtchn : 0;
2036
2037         if (VALID_EVTCHN(evtchn))
2038                 event_handler_exit(info);
2039 }
2040 EXPORT_SYMBOL(xen_clear_irq_pending);
2041 void xen_set_irq_pending(int irq)
2042 {
2043         evtchn_port_t evtchn = evtchn_from_irq(irq);
2044
2045         if (VALID_EVTCHN(evtchn))
2046                 set_evtchn(evtchn);
2047 }
2048
2049 bool xen_test_irq_pending(int irq)
2050 {
2051         evtchn_port_t evtchn = evtchn_from_irq(irq);
2052         bool ret = false;
2053
2054         if (VALID_EVTCHN(evtchn))
2055                 ret = test_evtchn(evtchn);
2056
2057         return ret;
2058 }
2059
2060 /* Poll waiting for an irq to become pending with timeout.  In the usual case,
2061  * the irq will be disabled so it won't deliver an interrupt. */
2062 void xen_poll_irq_timeout(int irq, u64 timeout)
2063 {
2064         evtchn_port_t evtchn = evtchn_from_irq(irq);
2065
2066         if (VALID_EVTCHN(evtchn)) {
2067                 struct sched_poll poll;
2068
2069                 poll.nr_ports = 1;
2070                 poll.timeout = timeout;
2071                 set_xen_guest_handle(poll.ports, &evtchn);
2072
2073                 if (HYPERVISOR_sched_op(SCHEDOP_poll, &poll) != 0)
2074                         BUG();
2075         }
2076 }
2077 EXPORT_SYMBOL(xen_poll_irq_timeout);
2078 /* Poll waiting for an irq to become pending.  In the usual case, the
2079  * irq will be disabled so it won't deliver an interrupt. */
2080 void xen_poll_irq(int irq)
2081 {
2082         xen_poll_irq_timeout(irq, 0 /* no timeout */);
2083 }
2084
2085 /* Check whether the IRQ line is shared with other guests. */
2086 int xen_test_irq_shared(int irq)
2087 {
2088         struct irq_info *info = info_for_irq(irq);
2089         struct physdev_irq_status_query irq_status;
2090
2091         if (WARN_ON(!info))
2092                 return -ENOENT;
2093
2094         irq_status.irq = info->u.pirq.pirq;
2095
2096         if (HYPERVISOR_physdev_op(PHYSDEVOP_irq_status_query, &irq_status))
2097                 return 0;
2098         return !(irq_status.flags & XENIRQSTAT_shared);
2099 }
2100 EXPORT_SYMBOL_GPL(xen_test_irq_shared);
2101
2102 void xen_irq_resume(void)
2103 {
2104         unsigned int cpu;
2105         struct irq_info *info;
2106
2107         /* New event-channel space is not 'live' yet. */
2108         xen_evtchn_resume();
2109
2110         /* No IRQ <-> event-channel mappings. */
2111         list_for_each_entry(info, &xen_irq_list_head, list) {
2112                 /* Zap event-channel binding */
2113                 info->evtchn = 0;
2114                 /* Adjust accounting */
2115                 channels_on_cpu_dec(info);
2116         }
2117
2118         clear_evtchn_to_irq_all();
2119
2120         for_each_possible_cpu(cpu) {
2121                 restore_cpu_virqs(cpu);
2122                 restore_cpu_ipis(cpu);
2123         }
2124
2125         restore_pirqs();
2126 }
2127
2128 static struct irq_chip xen_dynamic_chip __read_mostly = {
2129         .name                   = "xen-dyn",
2130
2131         .irq_disable            = disable_dynirq,
2132         .irq_mask               = disable_dynirq,
2133         .irq_unmask             = enable_dynirq,
2134
2135         .irq_ack                = ack_dynirq,
2136         .irq_mask_ack           = mask_ack_dynirq,
2137
2138         .irq_set_affinity       = set_affinity_irq,
2139         .irq_retrigger          = retrigger_dynirq,
2140 };
2141
2142 static struct irq_chip xen_lateeoi_chip __read_mostly = {
2143         /* The chip name needs to contain "xen-dyn" for irqbalance to work. */
2144         .name                   = "xen-dyn-lateeoi",
2145
2146         .irq_disable            = disable_dynirq,
2147         .irq_mask               = disable_dynirq,
2148         .irq_unmask             = enable_dynirq,
2149
2150         .irq_ack                = lateeoi_ack_dynirq,
2151         .irq_mask_ack           = lateeoi_mask_ack_dynirq,
2152
2153         .irq_set_affinity       = set_affinity_irq,
2154         .irq_retrigger          = retrigger_dynirq,
2155 };
2156
2157 static struct irq_chip xen_pirq_chip __read_mostly = {
2158         .name                   = "xen-pirq",
2159
2160         .irq_startup            = startup_pirq,
2161         .irq_shutdown           = shutdown_pirq,
2162         .irq_enable             = enable_pirq,
2163         .irq_disable            = disable_pirq,
2164
2165         .irq_mask               = disable_dynirq,
2166         .irq_unmask             = enable_dynirq,
2167
2168         .irq_ack                = eoi_pirq,
2169         .irq_eoi                = eoi_pirq,
2170         .irq_mask_ack           = mask_ack_pirq,
2171
2172         .irq_set_affinity       = set_affinity_irq,
2173
2174         .irq_retrigger          = retrigger_dynirq,
2175 };
2176
2177 static struct irq_chip xen_percpu_chip __read_mostly = {
2178         .name                   = "xen-percpu",
2179
2180         .irq_disable            = disable_dynirq,
2181         .irq_mask               = disable_dynirq,
2182         .irq_unmask             = enable_dynirq,
2183
2184         .irq_ack                = ack_dynirq,
2185 };
2186
2187 #ifdef CONFIG_X86
2188 #ifdef CONFIG_XEN_PVHVM
2189 /* Vector callbacks are better than PCI interrupts to receive event
2190  * channel notifications because we can receive vector callbacks on any
2191  * vcpu and we don't need PCI support or APIC interactions. */
2192 void xen_setup_callback_vector(void)
2193 {
2194         uint64_t callback_via;
2195
2196         if (xen_have_vector_callback) {
2197                 callback_via = HVM_CALLBACK_VECTOR(HYPERVISOR_CALLBACK_VECTOR);
2198                 if (xen_set_callback_via(callback_via)) {
2199                         pr_err("Request for Xen HVM callback vector failed\n");
2200                         xen_have_vector_callback = false;
2201                 }
2202         }
2203 }
2204
2205 /*
2206  * Setup per-vCPU vector-type callbacks. If this setup is unavailable,
2207  * fallback to the global vector-type callback.
2208  */
2209 static __init void xen_init_setup_upcall_vector(void)
2210 {
2211         if (!xen_have_vector_callback)
2212                 return;
2213
2214         if ((cpuid_eax(xen_cpuid_base() + 4) & XEN_HVM_CPUID_UPCALL_VECTOR) &&
2215             !xen_set_upcall_vector(0))
2216                 xen_percpu_upcall = true;
2217         else if (xen_feature(XENFEAT_hvm_callback_vector))
2218                 xen_setup_callback_vector();
2219         else
2220                 xen_have_vector_callback = false;
2221 }
2222
2223 int xen_set_upcall_vector(unsigned int cpu)
2224 {
2225         int rc;
2226         xen_hvm_evtchn_upcall_vector_t op = {
2227                 .vector = HYPERVISOR_CALLBACK_VECTOR,
2228                 .vcpu = per_cpu(xen_vcpu_id, cpu),
2229         };
2230
2231         rc = HYPERVISOR_hvm_op(HVMOP_set_evtchn_upcall_vector, &op);
2232         if (rc)
2233                 return rc;
2234
2235         /* Trick toolstack to think we are enlightened. */
2236         if (!cpu)
2237                 rc = xen_set_callback_via(1);
2238
2239         return rc;
2240 }
2241
2242 static __init void xen_alloc_callback_vector(void)
2243 {
2244         if (!xen_have_vector_callback)
2245                 return;
2246
2247         pr_info("Xen HVM callback vector for event delivery is enabled\n");
2248         alloc_intr_gate(HYPERVISOR_CALLBACK_VECTOR, asm_sysvec_xen_hvm_callback);
2249 }
2250 #else
2251 void xen_setup_callback_vector(void) {}
2252 static inline void xen_init_setup_upcall_vector(void) {}
2253 int xen_set_upcall_vector(unsigned int cpu) {}
2254 static inline void xen_alloc_callback_vector(void) {}
2255 #endif /* CONFIG_XEN_PVHVM */
2256 #endif /* CONFIG_X86 */
2257
2258 bool xen_fifo_events = true;
2259 module_param_named(fifo_events, xen_fifo_events, bool, 0);
2260
2261 static int xen_evtchn_cpu_prepare(unsigned int cpu)
2262 {
2263         int ret = 0;
2264
2265         xen_cpu_init_eoi(cpu);
2266
2267         if (evtchn_ops->percpu_init)
2268                 ret = evtchn_ops->percpu_init(cpu);
2269
2270         return ret;
2271 }
2272
2273 static int xen_evtchn_cpu_dead(unsigned int cpu)
2274 {
2275         int ret = 0;
2276
2277         if (evtchn_ops->percpu_deinit)
2278                 ret = evtchn_ops->percpu_deinit(cpu);
2279
2280         return ret;
2281 }
2282
2283 void __init xen_init_IRQ(void)
2284 {
2285         int ret = -EINVAL;
2286         evtchn_port_t evtchn;
2287
2288         if (xen_fifo_events)
2289                 ret = xen_evtchn_fifo_init();
2290         if (ret < 0) {
2291                 xen_evtchn_2l_init();
2292                 xen_fifo_events = false;
2293         }
2294
2295         xen_cpu_init_eoi(smp_processor_id());
2296
2297         cpuhp_setup_state_nocalls(CPUHP_XEN_EVTCHN_PREPARE,
2298                                   "xen/evtchn:prepare",
2299                                   xen_evtchn_cpu_prepare, xen_evtchn_cpu_dead);
2300
2301         evtchn_to_irq = kcalloc(EVTCHN_ROW(xen_evtchn_max_channels()),
2302                                 sizeof(*evtchn_to_irq), GFP_KERNEL);
2303         BUG_ON(!evtchn_to_irq);
2304
2305         /* No event channels are 'live' right now. */
2306         for (evtchn = 0; evtchn < xen_evtchn_nr_channels(); evtchn++)
2307                 mask_evtchn(evtchn);
2308
2309         pirq_needs_eoi = pirq_needs_eoi_flag;
2310
2311 #ifdef CONFIG_X86
2312         if (xen_pv_domain()) {
2313                 if (xen_initial_domain())
2314                         pci_xen_initial_domain();
2315         }
2316         xen_init_setup_upcall_vector();
2317         xen_alloc_callback_vector();
2318
2319
2320         if (xen_hvm_domain()) {
2321                 native_init_IRQ();
2322                 /* pci_xen_hvm_init must be called after native_init_IRQ so that
2323                  * __acpi_register_gsi can point at the right function */
2324                 pci_xen_hvm_init();
2325         } else {
2326                 int rc;
2327                 struct physdev_pirq_eoi_gmfn eoi_gmfn;
2328
2329                 pirq_eoi_map = (void *)__get_free_page(GFP_KERNEL|__GFP_ZERO);
2330                 eoi_gmfn.gmfn = virt_to_gfn(pirq_eoi_map);
2331                 rc = HYPERVISOR_physdev_op(PHYSDEVOP_pirq_eoi_gmfn_v2, &eoi_gmfn);
2332                 if (rc != 0) {
2333                         free_page((unsigned long) pirq_eoi_map);
2334                         pirq_eoi_map = NULL;
2335                 } else
2336                         pirq_needs_eoi = pirq_check_eoi_map;
2337         }
2338 #endif
2339 }