Merge branch 'imx/fixes' of git://git.pengutronix.de/git/imx/linux-2.6 into fixes
[platform/adaptation/renesas_rcar/renesas_kernel.git] / drivers / iommu / dmar.c
1 /*
2  * Copyright (c) 2006, Intel Corporation.
3  *
4  * This program is free software; you can redistribute it and/or modify it
5  * under the terms and conditions of the GNU General Public License,
6  * version 2, as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope it will be useful, but WITHOUT
9  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
11  * more details.
12  *
13  * You should have received a copy of the GNU General Public License along with
14  * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
15  * Place - Suite 330, Boston, MA 02111-1307 USA.
16  *
17  * Copyright (C) 2006-2008 Intel Corporation
18  * Author: Ashok Raj <ashok.raj@intel.com>
19  * Author: Shaohua Li <shaohua.li@intel.com>
20  * Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
21  *
22  * This file implements early detection/parsing of Remapping Devices
23  * reported to OS through BIOS via DMA remapping reporting (DMAR) ACPI
24  * tables.
25  *
26  * These routines are used by both DMA-remapping and Interrupt-remapping
27  */
28
29 #include <linux/pci.h>
30 #include <linux/dmar.h>
31 #include <linux/iova.h>
32 #include <linux/intel-iommu.h>
33 #include <linux/timer.h>
34 #include <linux/irq.h>
35 #include <linux/interrupt.h>
36 #include <linux/tboot.h>
37 #include <linux/dmi.h>
38 #include <linux/slab.h>
39 #include <asm/irq_remapping.h>
40 #include <asm/iommu_table.h>
41
42 #define PREFIX "DMAR: "
43
44 /* No locks are needed as DMA remapping hardware unit
45  * list is constructed at boot time and hotplug of
46  * these units are not supported by the architecture.
47  */
48 LIST_HEAD(dmar_drhd_units);
49
50 struct acpi_table_header * __initdata dmar_tbl;
51 static acpi_size dmar_tbl_size;
52
53 static void __init dmar_register_drhd_unit(struct dmar_drhd_unit *drhd)
54 {
55         /*
56          * add INCLUDE_ALL at the tail, so scan the list will find it at
57          * the very end.
58          */
59         if (drhd->include_all)
60                 list_add_tail(&drhd->list, &dmar_drhd_units);
61         else
62                 list_add(&drhd->list, &dmar_drhd_units);
63 }
64
65 static int __init dmar_parse_one_dev_scope(struct acpi_dmar_device_scope *scope,
66                                            struct pci_dev **dev, u16 segment)
67 {
68         struct pci_bus *bus;
69         struct pci_dev *pdev = NULL;
70         struct acpi_dmar_pci_path *path;
71         int count;
72
73         bus = pci_find_bus(segment, scope->bus);
74         path = (struct acpi_dmar_pci_path *)(scope + 1);
75         count = (scope->length - sizeof(struct acpi_dmar_device_scope))
76                 / sizeof(struct acpi_dmar_pci_path);
77
78         while (count) {
79                 if (pdev)
80                         pci_dev_put(pdev);
81                 /*
82                  * Some BIOSes list non-exist devices in DMAR table, just
83                  * ignore it
84                  */
85                 if (!bus) {
86                         printk(KERN_WARNING
87                         PREFIX "Device scope bus [%d] not found\n",
88                         scope->bus);
89                         break;
90                 }
91                 pdev = pci_get_slot(bus, PCI_DEVFN(path->dev, path->fn));
92                 if (!pdev) {
93                         printk(KERN_WARNING PREFIX
94                         "Device scope device [%04x:%02x:%02x.%02x] not found\n",
95                                 segment, bus->number, path->dev, path->fn);
96                         break;
97                 }
98                 path ++;
99                 count --;
100                 bus = pdev->subordinate;
101         }
102         if (!pdev) {
103                 printk(KERN_WARNING PREFIX
104                 "Device scope device [%04x:%02x:%02x.%02x] not found\n",
105                 segment, scope->bus, path->dev, path->fn);
106                 *dev = NULL;
107                 return 0;
108         }
109         if ((scope->entry_type == ACPI_DMAR_SCOPE_TYPE_ENDPOINT && \
110                         pdev->subordinate) || (scope->entry_type == \
111                         ACPI_DMAR_SCOPE_TYPE_BRIDGE && !pdev->subordinate)) {
112                 pci_dev_put(pdev);
113                 printk(KERN_WARNING PREFIX
114                         "Device scope type does not match for %s\n",
115                          pci_name(pdev));
116                 return -EINVAL;
117         }
118         *dev = pdev;
119         return 0;
120 }
121
122 int __init dmar_parse_dev_scope(void *start, void *end, int *cnt,
123                                 struct pci_dev ***devices, u16 segment)
124 {
125         struct acpi_dmar_device_scope *scope;
126         void * tmp = start;
127         int index;
128         int ret;
129
130         *cnt = 0;
131         while (start < end) {
132                 scope = start;
133                 if (scope->entry_type == ACPI_DMAR_SCOPE_TYPE_ENDPOINT ||
134                     scope->entry_type == ACPI_DMAR_SCOPE_TYPE_BRIDGE)
135                         (*cnt)++;
136                 else if (scope->entry_type != ACPI_DMAR_SCOPE_TYPE_IOAPIC) {
137                         printk(KERN_WARNING PREFIX
138                                "Unsupported device scope\n");
139                 }
140                 start += scope->length;
141         }
142         if (*cnt == 0)
143                 return 0;
144
145         *devices = kcalloc(*cnt, sizeof(struct pci_dev *), GFP_KERNEL);
146         if (!*devices)
147                 return -ENOMEM;
148
149         start = tmp;
150         index = 0;
151         while (start < end) {
152                 scope = start;
153                 if (scope->entry_type == ACPI_DMAR_SCOPE_TYPE_ENDPOINT ||
154                     scope->entry_type == ACPI_DMAR_SCOPE_TYPE_BRIDGE) {
155                         ret = dmar_parse_one_dev_scope(scope,
156                                 &(*devices)[index], segment);
157                         if (ret) {
158                                 kfree(*devices);
159                                 return ret;
160                         }
161                         index ++;
162                 }
163                 start += scope->length;
164         }
165
166         return 0;
167 }
168
169 /**
170  * dmar_parse_one_drhd - parses exactly one DMA remapping hardware definition
171  * structure which uniquely represent one DMA remapping hardware unit
172  * present in the platform
173  */
174 static int __init
175 dmar_parse_one_drhd(struct acpi_dmar_header *header)
176 {
177         struct acpi_dmar_hardware_unit *drhd;
178         struct dmar_drhd_unit *dmaru;
179         int ret = 0;
180
181         drhd = (struct acpi_dmar_hardware_unit *)header;
182         dmaru = kzalloc(sizeof(*dmaru), GFP_KERNEL);
183         if (!dmaru)
184                 return -ENOMEM;
185
186         dmaru->hdr = header;
187         dmaru->reg_base_addr = drhd->address;
188         dmaru->segment = drhd->segment;
189         dmaru->include_all = drhd->flags & 0x1; /* BIT0: INCLUDE_ALL */
190
191         ret = alloc_iommu(dmaru);
192         if (ret) {
193                 kfree(dmaru);
194                 return ret;
195         }
196         dmar_register_drhd_unit(dmaru);
197         return 0;
198 }
199
200 static int __init dmar_parse_dev(struct dmar_drhd_unit *dmaru)
201 {
202         struct acpi_dmar_hardware_unit *drhd;
203         int ret = 0;
204
205         drhd = (struct acpi_dmar_hardware_unit *) dmaru->hdr;
206
207         if (dmaru->include_all)
208                 return 0;
209
210         ret = dmar_parse_dev_scope((void *)(drhd + 1),
211                                 ((void *)drhd) + drhd->header.length,
212                                 &dmaru->devices_cnt, &dmaru->devices,
213                                 drhd->segment);
214         if (ret) {
215                 list_del(&dmaru->list);
216                 kfree(dmaru);
217         }
218         return ret;
219 }
220
221 #ifdef CONFIG_ACPI_NUMA
222 static int __init
223 dmar_parse_one_rhsa(struct acpi_dmar_header *header)
224 {
225         struct acpi_dmar_rhsa *rhsa;
226         struct dmar_drhd_unit *drhd;
227
228         rhsa = (struct acpi_dmar_rhsa *)header;
229         for_each_drhd_unit(drhd) {
230                 if (drhd->reg_base_addr == rhsa->base_address) {
231                         int node = acpi_map_pxm_to_node(rhsa->proximity_domain);
232
233                         if (!node_online(node))
234                                 node = -1;
235                         drhd->iommu->node = node;
236                         return 0;
237                 }
238         }
239         WARN_TAINT(
240                 1, TAINT_FIRMWARE_WORKAROUND,
241                 "Your BIOS is broken; RHSA refers to non-existent DMAR unit at %llx\n"
242                 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
243                 drhd->reg_base_addr,
244                 dmi_get_system_info(DMI_BIOS_VENDOR),
245                 dmi_get_system_info(DMI_BIOS_VERSION),
246                 dmi_get_system_info(DMI_PRODUCT_VERSION));
247
248         return 0;
249 }
250 #endif
251
252 static void __init
253 dmar_table_print_dmar_entry(struct acpi_dmar_header *header)
254 {
255         struct acpi_dmar_hardware_unit *drhd;
256         struct acpi_dmar_reserved_memory *rmrr;
257         struct acpi_dmar_atsr *atsr;
258         struct acpi_dmar_rhsa *rhsa;
259
260         switch (header->type) {
261         case ACPI_DMAR_TYPE_HARDWARE_UNIT:
262                 drhd = container_of(header, struct acpi_dmar_hardware_unit,
263                                     header);
264                 printk (KERN_INFO PREFIX
265                         "DRHD base: %#016Lx flags: %#x\n",
266                         (unsigned long long)drhd->address, drhd->flags);
267                 break;
268         case ACPI_DMAR_TYPE_RESERVED_MEMORY:
269                 rmrr = container_of(header, struct acpi_dmar_reserved_memory,
270                                     header);
271                 printk (KERN_INFO PREFIX
272                         "RMRR base: %#016Lx end: %#016Lx\n",
273                         (unsigned long long)rmrr->base_address,
274                         (unsigned long long)rmrr->end_address);
275                 break;
276         case ACPI_DMAR_TYPE_ATSR:
277                 atsr = container_of(header, struct acpi_dmar_atsr, header);
278                 printk(KERN_INFO PREFIX "ATSR flags: %#x\n", atsr->flags);
279                 break;
280         case ACPI_DMAR_HARDWARE_AFFINITY:
281                 rhsa = container_of(header, struct acpi_dmar_rhsa, header);
282                 printk(KERN_INFO PREFIX "RHSA base: %#016Lx proximity domain: %#x\n",
283                        (unsigned long long)rhsa->base_address,
284                        rhsa->proximity_domain);
285                 break;
286         }
287 }
288
289 /**
290  * dmar_table_detect - checks to see if the platform supports DMAR devices
291  */
292 static int __init dmar_table_detect(void)
293 {
294         acpi_status status = AE_OK;
295
296         /* if we could find DMAR table, then there are DMAR devices */
297         status = acpi_get_table_with_size(ACPI_SIG_DMAR, 0,
298                                 (struct acpi_table_header **)&dmar_tbl,
299                                 &dmar_tbl_size);
300
301         if (ACPI_SUCCESS(status) && !dmar_tbl) {
302                 printk (KERN_WARNING PREFIX "Unable to map DMAR\n");
303                 status = AE_NOT_FOUND;
304         }
305
306         return (ACPI_SUCCESS(status) ? 1 : 0);
307 }
308
309 /**
310  * parse_dmar_table - parses the DMA reporting table
311  */
312 static int __init
313 parse_dmar_table(void)
314 {
315         struct acpi_table_dmar *dmar;
316         struct acpi_dmar_header *entry_header;
317         int ret = 0;
318
319         /*
320          * Do it again, earlier dmar_tbl mapping could be mapped with
321          * fixed map.
322          */
323         dmar_table_detect();
324
325         /*
326          * ACPI tables may not be DMA protected by tboot, so use DMAR copy
327          * SINIT saved in SinitMleData in TXT heap (which is DMA protected)
328          */
329         dmar_tbl = tboot_get_dmar_table(dmar_tbl);
330
331         dmar = (struct acpi_table_dmar *)dmar_tbl;
332         if (!dmar)
333                 return -ENODEV;
334
335         if (dmar->width < PAGE_SHIFT - 1) {
336                 printk(KERN_WARNING PREFIX "Invalid DMAR haw\n");
337                 return -EINVAL;
338         }
339
340         printk (KERN_INFO PREFIX "Host address width %d\n",
341                 dmar->width + 1);
342
343         entry_header = (struct acpi_dmar_header *)(dmar + 1);
344         while (((unsigned long)entry_header) <
345                         (((unsigned long)dmar) + dmar_tbl->length)) {
346                 /* Avoid looping forever on bad ACPI tables */
347                 if (entry_header->length == 0) {
348                         printk(KERN_WARNING PREFIX
349                                 "Invalid 0-length structure\n");
350                         ret = -EINVAL;
351                         break;
352                 }
353
354                 dmar_table_print_dmar_entry(entry_header);
355
356                 switch (entry_header->type) {
357                 case ACPI_DMAR_TYPE_HARDWARE_UNIT:
358                         ret = dmar_parse_one_drhd(entry_header);
359                         break;
360                 case ACPI_DMAR_TYPE_RESERVED_MEMORY:
361                         ret = dmar_parse_one_rmrr(entry_header);
362                         break;
363                 case ACPI_DMAR_TYPE_ATSR:
364                         ret = dmar_parse_one_atsr(entry_header);
365                         break;
366                 case ACPI_DMAR_HARDWARE_AFFINITY:
367 #ifdef CONFIG_ACPI_NUMA
368                         ret = dmar_parse_one_rhsa(entry_header);
369 #endif
370                         break;
371                 default:
372                         printk(KERN_WARNING PREFIX
373                                 "Unknown DMAR structure type %d\n",
374                                 entry_header->type);
375                         ret = 0; /* for forward compatibility */
376                         break;
377                 }
378                 if (ret)
379                         break;
380
381                 entry_header = ((void *)entry_header + entry_header->length);
382         }
383         return ret;
384 }
385
386 static int dmar_pci_device_match(struct pci_dev *devices[], int cnt,
387                           struct pci_dev *dev)
388 {
389         int index;
390
391         while (dev) {
392                 for (index = 0; index < cnt; index++)
393                         if (dev == devices[index])
394                                 return 1;
395
396                 /* Check our parent */
397                 dev = dev->bus->self;
398         }
399
400         return 0;
401 }
402
403 struct dmar_drhd_unit *
404 dmar_find_matched_drhd_unit(struct pci_dev *dev)
405 {
406         struct dmar_drhd_unit *dmaru = NULL;
407         struct acpi_dmar_hardware_unit *drhd;
408
409         dev = pci_physfn(dev);
410
411         list_for_each_entry(dmaru, &dmar_drhd_units, list) {
412                 drhd = container_of(dmaru->hdr,
413                                     struct acpi_dmar_hardware_unit,
414                                     header);
415
416                 if (dmaru->include_all &&
417                     drhd->segment == pci_domain_nr(dev->bus))
418                         return dmaru;
419
420                 if (dmar_pci_device_match(dmaru->devices,
421                                           dmaru->devices_cnt, dev))
422                         return dmaru;
423         }
424
425         return NULL;
426 }
427
428 int __init dmar_dev_scope_init(void)
429 {
430         static int dmar_dev_scope_initialized;
431         struct dmar_drhd_unit *drhd, *drhd_n;
432         int ret = -ENODEV;
433
434         if (dmar_dev_scope_initialized)
435                 return dmar_dev_scope_initialized;
436
437         if (list_empty(&dmar_drhd_units))
438                 goto fail;
439
440         list_for_each_entry_safe(drhd, drhd_n, &dmar_drhd_units, list) {
441                 ret = dmar_parse_dev(drhd);
442                 if (ret)
443                         goto fail;
444         }
445
446         ret = dmar_parse_rmrr_atsr_dev();
447         if (ret)
448                 goto fail;
449
450         dmar_dev_scope_initialized = 1;
451         return 0;
452
453 fail:
454         dmar_dev_scope_initialized = ret;
455         return ret;
456 }
457
458
459 int __init dmar_table_init(void)
460 {
461         static int dmar_table_initialized;
462         int ret;
463
464         if (dmar_table_initialized)
465                 return 0;
466
467         dmar_table_initialized = 1;
468
469         ret = parse_dmar_table();
470         if (ret) {
471                 if (ret != -ENODEV)
472                         printk(KERN_INFO PREFIX "parse DMAR table failure.\n");
473                 return ret;
474         }
475
476         if (list_empty(&dmar_drhd_units)) {
477                 printk(KERN_INFO PREFIX "No DMAR devices found\n");
478                 return -ENODEV;
479         }
480
481         return 0;
482 }
483
484 static void warn_invalid_dmar(u64 addr, const char *message)
485 {
486         WARN_TAINT_ONCE(
487                 1, TAINT_FIRMWARE_WORKAROUND,
488                 "Your BIOS is broken; DMAR reported at address %llx%s!\n"
489                 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
490                 addr, message,
491                 dmi_get_system_info(DMI_BIOS_VENDOR),
492                 dmi_get_system_info(DMI_BIOS_VERSION),
493                 dmi_get_system_info(DMI_PRODUCT_VERSION));
494 }
495
496 int __init check_zero_address(void)
497 {
498         struct acpi_table_dmar *dmar;
499         struct acpi_dmar_header *entry_header;
500         struct acpi_dmar_hardware_unit *drhd;
501
502         dmar = (struct acpi_table_dmar *)dmar_tbl;
503         entry_header = (struct acpi_dmar_header *)(dmar + 1);
504
505         while (((unsigned long)entry_header) <
506                         (((unsigned long)dmar) + dmar_tbl->length)) {
507                 /* Avoid looping forever on bad ACPI tables */
508                 if (entry_header->length == 0) {
509                         printk(KERN_WARNING PREFIX
510                                 "Invalid 0-length structure\n");
511                         return 0;
512                 }
513
514                 if (entry_header->type == ACPI_DMAR_TYPE_HARDWARE_UNIT) {
515                         void __iomem *addr;
516                         u64 cap, ecap;
517
518                         drhd = (void *)entry_header;
519                         if (!drhd->address) {
520                                 warn_invalid_dmar(0, "");
521                                 goto failed;
522                         }
523
524                         addr = early_ioremap(drhd->address, VTD_PAGE_SIZE);
525                         if (!addr ) {
526                                 printk("IOMMU: can't validate: %llx\n", drhd->address);
527                                 goto failed;
528                         }
529                         cap = dmar_readq(addr + DMAR_CAP_REG);
530                         ecap = dmar_readq(addr + DMAR_ECAP_REG);
531                         early_iounmap(addr, VTD_PAGE_SIZE);
532                         if (cap == (uint64_t)-1 && ecap == (uint64_t)-1) {
533                                 warn_invalid_dmar(drhd->address,
534                                                   " returns all ones");
535                                 goto failed;
536                         }
537                 }
538
539                 entry_header = ((void *)entry_header + entry_header->length);
540         }
541         return 1;
542
543 failed:
544         return 0;
545 }
546
547 int __init detect_intel_iommu(void)
548 {
549         int ret;
550
551         ret = dmar_table_detect();
552         if (ret)
553                 ret = check_zero_address();
554         {
555                 struct acpi_table_dmar *dmar;
556
557                 dmar = (struct acpi_table_dmar *) dmar_tbl;
558
559                 if (ret && irq_remapping_enabled && cpu_has_x2apic &&
560                     dmar->flags & 0x1)
561                         printk(KERN_INFO
562                                "Queued invalidation will be enabled to support x2apic and Intr-remapping.\n");
563
564                 if (ret && !no_iommu && !iommu_detected && !dmar_disabled) {
565                         iommu_detected = 1;
566                         /* Make sure ACS will be enabled */
567                         pci_request_acs();
568                 }
569
570 #ifdef CONFIG_X86
571                 if (ret)
572                         x86_init.iommu.iommu_init = intel_iommu_init;
573 #endif
574         }
575         early_acpi_os_unmap_memory(dmar_tbl, dmar_tbl_size);
576         dmar_tbl = NULL;
577
578         return ret ? 1 : -ENODEV;
579 }
580
581
582 int alloc_iommu(struct dmar_drhd_unit *drhd)
583 {
584         struct intel_iommu *iommu;
585         int map_size;
586         u32 ver;
587         static int iommu_allocated = 0;
588         int agaw = 0;
589         int msagaw = 0;
590
591         if (!drhd->reg_base_addr) {
592                 warn_invalid_dmar(0, "");
593                 return -EINVAL;
594         }
595
596         iommu = kzalloc(sizeof(*iommu), GFP_KERNEL);
597         if (!iommu)
598                 return -ENOMEM;
599
600         iommu->seq_id = iommu_allocated++;
601         sprintf (iommu->name, "dmar%d", iommu->seq_id);
602
603         iommu->reg = ioremap(drhd->reg_base_addr, VTD_PAGE_SIZE);
604         if (!iommu->reg) {
605                 printk(KERN_ERR "IOMMU: can't map the region\n");
606                 goto error;
607         }
608         iommu->cap = dmar_readq(iommu->reg + DMAR_CAP_REG);
609         iommu->ecap = dmar_readq(iommu->reg + DMAR_ECAP_REG);
610
611         if (iommu->cap == (uint64_t)-1 && iommu->ecap == (uint64_t)-1) {
612                 warn_invalid_dmar(drhd->reg_base_addr, " returns all ones");
613                 goto err_unmap;
614         }
615
616         agaw = iommu_calculate_agaw(iommu);
617         if (agaw < 0) {
618                 printk(KERN_ERR
619                        "Cannot get a valid agaw for iommu (seq_id = %d)\n",
620                        iommu->seq_id);
621                 goto err_unmap;
622         }
623         msagaw = iommu_calculate_max_sagaw(iommu);
624         if (msagaw < 0) {
625                 printk(KERN_ERR
626                         "Cannot get a valid max agaw for iommu (seq_id = %d)\n",
627                         iommu->seq_id);
628                 goto err_unmap;
629         }
630         iommu->agaw = agaw;
631         iommu->msagaw = msagaw;
632
633         iommu->node = -1;
634
635         /* the registers might be more than one page */
636         map_size = max_t(int, ecap_max_iotlb_offset(iommu->ecap),
637                 cap_max_fault_reg_offset(iommu->cap));
638         map_size = VTD_PAGE_ALIGN(map_size);
639         if (map_size > VTD_PAGE_SIZE) {
640                 iounmap(iommu->reg);
641                 iommu->reg = ioremap(drhd->reg_base_addr, map_size);
642                 if (!iommu->reg) {
643                         printk(KERN_ERR "IOMMU: can't map the region\n");
644                         goto error;
645                 }
646         }
647
648         ver = readl(iommu->reg + DMAR_VER_REG);
649         pr_info("IOMMU %d: reg_base_addr %llx ver %d:%d cap %llx ecap %llx\n",
650                 iommu->seq_id,
651                 (unsigned long long)drhd->reg_base_addr,
652                 DMAR_VER_MAJOR(ver), DMAR_VER_MINOR(ver),
653                 (unsigned long long)iommu->cap,
654                 (unsigned long long)iommu->ecap);
655
656         raw_spin_lock_init(&iommu->register_lock);
657
658         drhd->iommu = iommu;
659         return 0;
660
661  err_unmap:
662         iounmap(iommu->reg);
663  error:
664         kfree(iommu);
665         return -1;
666 }
667
668 void free_iommu(struct intel_iommu *iommu)
669 {
670         if (!iommu)
671                 return;
672
673         free_dmar_iommu(iommu);
674
675         if (iommu->reg)
676                 iounmap(iommu->reg);
677         kfree(iommu);
678 }
679
680 /*
681  * Reclaim all the submitted descriptors which have completed its work.
682  */
683 static inline void reclaim_free_desc(struct q_inval *qi)
684 {
685         while (qi->desc_status[qi->free_tail] == QI_DONE ||
686                qi->desc_status[qi->free_tail] == QI_ABORT) {
687                 qi->desc_status[qi->free_tail] = QI_FREE;
688                 qi->free_tail = (qi->free_tail + 1) % QI_LENGTH;
689                 qi->free_cnt++;
690         }
691 }
692
693 static int qi_check_fault(struct intel_iommu *iommu, int index)
694 {
695         u32 fault;
696         int head, tail;
697         struct q_inval *qi = iommu->qi;
698         int wait_index = (index + 1) % QI_LENGTH;
699
700         if (qi->desc_status[wait_index] == QI_ABORT)
701                 return -EAGAIN;
702
703         fault = readl(iommu->reg + DMAR_FSTS_REG);
704
705         /*
706          * If IQE happens, the head points to the descriptor associated
707          * with the error. No new descriptors are fetched until the IQE
708          * is cleared.
709          */
710         if (fault & DMA_FSTS_IQE) {
711                 head = readl(iommu->reg + DMAR_IQH_REG);
712                 if ((head >> DMAR_IQ_SHIFT) == index) {
713                         printk(KERN_ERR "VT-d detected invalid descriptor: "
714                                 "low=%llx, high=%llx\n",
715                                 (unsigned long long)qi->desc[index].low,
716                                 (unsigned long long)qi->desc[index].high);
717                         memcpy(&qi->desc[index], &qi->desc[wait_index],
718                                         sizeof(struct qi_desc));
719                         __iommu_flush_cache(iommu, &qi->desc[index],
720                                         sizeof(struct qi_desc));
721                         writel(DMA_FSTS_IQE, iommu->reg + DMAR_FSTS_REG);
722                         return -EINVAL;
723                 }
724         }
725
726         /*
727          * If ITE happens, all pending wait_desc commands are aborted.
728          * No new descriptors are fetched until the ITE is cleared.
729          */
730         if (fault & DMA_FSTS_ITE) {
731                 head = readl(iommu->reg + DMAR_IQH_REG);
732                 head = ((head >> DMAR_IQ_SHIFT) - 1 + QI_LENGTH) % QI_LENGTH;
733                 head |= 1;
734                 tail = readl(iommu->reg + DMAR_IQT_REG);
735                 tail = ((tail >> DMAR_IQ_SHIFT) - 1 + QI_LENGTH) % QI_LENGTH;
736
737                 writel(DMA_FSTS_ITE, iommu->reg + DMAR_FSTS_REG);
738
739                 do {
740                         if (qi->desc_status[head] == QI_IN_USE)
741                                 qi->desc_status[head] = QI_ABORT;
742                         head = (head - 2 + QI_LENGTH) % QI_LENGTH;
743                 } while (head != tail);
744
745                 if (qi->desc_status[wait_index] == QI_ABORT)
746                         return -EAGAIN;
747         }
748
749         if (fault & DMA_FSTS_ICE)
750                 writel(DMA_FSTS_ICE, iommu->reg + DMAR_FSTS_REG);
751
752         return 0;
753 }
754
755 /*
756  * Submit the queued invalidation descriptor to the remapping
757  * hardware unit and wait for its completion.
758  */
759 int qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu)
760 {
761         int rc;
762         struct q_inval *qi = iommu->qi;
763         struct qi_desc *hw, wait_desc;
764         int wait_index, index;
765         unsigned long flags;
766
767         if (!qi)
768                 return 0;
769
770         hw = qi->desc;
771
772 restart:
773         rc = 0;
774
775         raw_spin_lock_irqsave(&qi->q_lock, flags);
776         while (qi->free_cnt < 3) {
777                 raw_spin_unlock_irqrestore(&qi->q_lock, flags);
778                 cpu_relax();
779                 raw_spin_lock_irqsave(&qi->q_lock, flags);
780         }
781
782         index = qi->free_head;
783         wait_index = (index + 1) % QI_LENGTH;
784
785         qi->desc_status[index] = qi->desc_status[wait_index] = QI_IN_USE;
786
787         hw[index] = *desc;
788
789         wait_desc.low = QI_IWD_STATUS_DATA(QI_DONE) |
790                         QI_IWD_STATUS_WRITE | QI_IWD_TYPE;
791         wait_desc.high = virt_to_phys(&qi->desc_status[wait_index]);
792
793         hw[wait_index] = wait_desc;
794
795         __iommu_flush_cache(iommu, &hw[index], sizeof(struct qi_desc));
796         __iommu_flush_cache(iommu, &hw[wait_index], sizeof(struct qi_desc));
797
798         qi->free_head = (qi->free_head + 2) % QI_LENGTH;
799         qi->free_cnt -= 2;
800
801         /*
802          * update the HW tail register indicating the presence of
803          * new descriptors.
804          */
805         writel(qi->free_head << DMAR_IQ_SHIFT, iommu->reg + DMAR_IQT_REG);
806
807         while (qi->desc_status[wait_index] != QI_DONE) {
808                 /*
809                  * We will leave the interrupts disabled, to prevent interrupt
810                  * context to queue another cmd while a cmd is already submitted
811                  * and waiting for completion on this cpu. This is to avoid
812                  * a deadlock where the interrupt context can wait indefinitely
813                  * for free slots in the queue.
814                  */
815                 rc = qi_check_fault(iommu, index);
816                 if (rc)
817                         break;
818
819                 raw_spin_unlock(&qi->q_lock);
820                 cpu_relax();
821                 raw_spin_lock(&qi->q_lock);
822         }
823
824         qi->desc_status[index] = QI_DONE;
825
826         reclaim_free_desc(qi);
827         raw_spin_unlock_irqrestore(&qi->q_lock, flags);
828
829         if (rc == -EAGAIN)
830                 goto restart;
831
832         return rc;
833 }
834
835 /*
836  * Flush the global interrupt entry cache.
837  */
838 void qi_global_iec(struct intel_iommu *iommu)
839 {
840         struct qi_desc desc;
841
842         desc.low = QI_IEC_TYPE;
843         desc.high = 0;
844
845         /* should never fail */
846         qi_submit_sync(&desc, iommu);
847 }
848
849 void qi_flush_context(struct intel_iommu *iommu, u16 did, u16 sid, u8 fm,
850                       u64 type)
851 {
852         struct qi_desc desc;
853
854         desc.low = QI_CC_FM(fm) | QI_CC_SID(sid) | QI_CC_DID(did)
855                         | QI_CC_GRAN(type) | QI_CC_TYPE;
856         desc.high = 0;
857
858         qi_submit_sync(&desc, iommu);
859 }
860
861 void qi_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr,
862                     unsigned int size_order, u64 type)
863 {
864         u8 dw = 0, dr = 0;
865
866         struct qi_desc desc;
867         int ih = 0;
868
869         if (cap_write_drain(iommu->cap))
870                 dw = 1;
871
872         if (cap_read_drain(iommu->cap))
873                 dr = 1;
874
875         desc.low = QI_IOTLB_DID(did) | QI_IOTLB_DR(dr) | QI_IOTLB_DW(dw)
876                 | QI_IOTLB_GRAN(type) | QI_IOTLB_TYPE;
877         desc.high = QI_IOTLB_ADDR(addr) | QI_IOTLB_IH(ih)
878                 | QI_IOTLB_AM(size_order);
879
880         qi_submit_sync(&desc, iommu);
881 }
882
883 void qi_flush_dev_iotlb(struct intel_iommu *iommu, u16 sid, u16 qdep,
884                         u64 addr, unsigned mask)
885 {
886         struct qi_desc desc;
887
888         if (mask) {
889                 BUG_ON(addr & ((1 << (VTD_PAGE_SHIFT + mask)) - 1));
890                 addr |= (1 << (VTD_PAGE_SHIFT + mask - 1)) - 1;
891                 desc.high = QI_DEV_IOTLB_ADDR(addr) | QI_DEV_IOTLB_SIZE;
892         } else
893                 desc.high = QI_DEV_IOTLB_ADDR(addr);
894
895         if (qdep >= QI_DEV_IOTLB_MAX_INVS)
896                 qdep = 0;
897
898         desc.low = QI_DEV_IOTLB_SID(sid) | QI_DEV_IOTLB_QDEP(qdep) |
899                    QI_DIOTLB_TYPE;
900
901         qi_submit_sync(&desc, iommu);
902 }
903
904 /*
905  * Disable Queued Invalidation interface.
906  */
907 void dmar_disable_qi(struct intel_iommu *iommu)
908 {
909         unsigned long flags;
910         u32 sts;
911         cycles_t start_time = get_cycles();
912
913         if (!ecap_qis(iommu->ecap))
914                 return;
915
916         raw_spin_lock_irqsave(&iommu->register_lock, flags);
917
918         sts =  dmar_readq(iommu->reg + DMAR_GSTS_REG);
919         if (!(sts & DMA_GSTS_QIES))
920                 goto end;
921
922         /*
923          * Give a chance to HW to complete the pending invalidation requests.
924          */
925         while ((readl(iommu->reg + DMAR_IQT_REG) !=
926                 readl(iommu->reg + DMAR_IQH_REG)) &&
927                 (DMAR_OPERATION_TIMEOUT > (get_cycles() - start_time)))
928                 cpu_relax();
929
930         iommu->gcmd &= ~DMA_GCMD_QIE;
931         writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
932
933         IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, readl,
934                       !(sts & DMA_GSTS_QIES), sts);
935 end:
936         raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
937 }
938
939 /*
940  * Enable queued invalidation.
941  */
942 static void __dmar_enable_qi(struct intel_iommu *iommu)
943 {
944         u32 sts;
945         unsigned long flags;
946         struct q_inval *qi = iommu->qi;
947
948         qi->free_head = qi->free_tail = 0;
949         qi->free_cnt = QI_LENGTH;
950
951         raw_spin_lock_irqsave(&iommu->register_lock, flags);
952
953         /* write zero to the tail reg */
954         writel(0, iommu->reg + DMAR_IQT_REG);
955
956         dmar_writeq(iommu->reg + DMAR_IQA_REG, virt_to_phys(qi->desc));
957
958         iommu->gcmd |= DMA_GCMD_QIE;
959         writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
960
961         /* Make sure hardware complete it */
962         IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, readl, (sts & DMA_GSTS_QIES), sts);
963
964         raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
965 }
966
967 /*
968  * Enable Queued Invalidation interface. This is a must to support
969  * interrupt-remapping. Also used by DMA-remapping, which replaces
970  * register based IOTLB invalidation.
971  */
972 int dmar_enable_qi(struct intel_iommu *iommu)
973 {
974         struct q_inval *qi;
975         struct page *desc_page;
976
977         if (!ecap_qis(iommu->ecap))
978                 return -ENOENT;
979
980         /*
981          * queued invalidation is already setup and enabled.
982          */
983         if (iommu->qi)
984                 return 0;
985
986         iommu->qi = kmalloc(sizeof(*qi), GFP_ATOMIC);
987         if (!iommu->qi)
988                 return -ENOMEM;
989
990         qi = iommu->qi;
991
992
993         desc_page = alloc_pages_node(iommu->node, GFP_ATOMIC | __GFP_ZERO, 0);
994         if (!desc_page) {
995                 kfree(qi);
996                 iommu->qi = 0;
997                 return -ENOMEM;
998         }
999
1000         qi->desc = page_address(desc_page);
1001
1002         qi->desc_status = kmalloc(QI_LENGTH * sizeof(int), GFP_ATOMIC);
1003         if (!qi->desc_status) {
1004                 free_page((unsigned long) qi->desc);
1005                 kfree(qi);
1006                 iommu->qi = 0;
1007                 return -ENOMEM;
1008         }
1009
1010         qi->free_head = qi->free_tail = 0;
1011         qi->free_cnt = QI_LENGTH;
1012
1013         raw_spin_lock_init(&qi->q_lock);
1014
1015         __dmar_enable_qi(iommu);
1016
1017         return 0;
1018 }
1019
1020 /* iommu interrupt handling. Most stuff are MSI-like. */
1021
1022 enum faulttype {
1023         DMA_REMAP,
1024         INTR_REMAP,
1025         UNKNOWN,
1026 };
1027
1028 static const char *dma_remap_fault_reasons[] =
1029 {
1030         "Software",
1031         "Present bit in root entry is clear",
1032         "Present bit in context entry is clear",
1033         "Invalid context entry",
1034         "Access beyond MGAW",
1035         "PTE Write access is not set",
1036         "PTE Read access is not set",
1037         "Next page table ptr is invalid",
1038         "Root table address invalid",
1039         "Context table ptr is invalid",
1040         "non-zero reserved fields in RTP",
1041         "non-zero reserved fields in CTP",
1042         "non-zero reserved fields in PTE",
1043 };
1044
1045 static const char *irq_remap_fault_reasons[] =
1046 {
1047         "Detected reserved fields in the decoded interrupt-remapped request",
1048         "Interrupt index exceeded the interrupt-remapping table size",
1049         "Present field in the IRTE entry is clear",
1050         "Error accessing interrupt-remapping table pointed by IRTA_REG",
1051         "Detected reserved fields in the IRTE entry",
1052         "Blocked a compatibility format interrupt request",
1053         "Blocked an interrupt request due to source-id verification failure",
1054 };
1055
1056 #define MAX_FAULT_REASON_IDX    (ARRAY_SIZE(fault_reason_strings) - 1)
1057
1058 const char *dmar_get_fault_reason(u8 fault_reason, int *fault_type)
1059 {
1060         if (fault_reason >= 0x20 && (fault_reason - 0x20 <
1061                                         ARRAY_SIZE(irq_remap_fault_reasons))) {
1062                 *fault_type = INTR_REMAP;
1063                 return irq_remap_fault_reasons[fault_reason - 0x20];
1064         } else if (fault_reason < ARRAY_SIZE(dma_remap_fault_reasons)) {
1065                 *fault_type = DMA_REMAP;
1066                 return dma_remap_fault_reasons[fault_reason];
1067         } else {
1068                 *fault_type = UNKNOWN;
1069                 return "Unknown";
1070         }
1071 }
1072
1073 void dmar_msi_unmask(struct irq_data *data)
1074 {
1075         struct intel_iommu *iommu = irq_data_get_irq_handler_data(data);
1076         unsigned long flag;
1077
1078         /* unmask it */
1079         raw_spin_lock_irqsave(&iommu->register_lock, flag);
1080         writel(0, iommu->reg + DMAR_FECTL_REG);
1081         /* Read a reg to force flush the post write */
1082         readl(iommu->reg + DMAR_FECTL_REG);
1083         raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1084 }
1085
1086 void dmar_msi_mask(struct irq_data *data)
1087 {
1088         unsigned long flag;
1089         struct intel_iommu *iommu = irq_data_get_irq_handler_data(data);
1090
1091         /* mask it */
1092         raw_spin_lock_irqsave(&iommu->register_lock, flag);
1093         writel(DMA_FECTL_IM, iommu->reg + DMAR_FECTL_REG);
1094         /* Read a reg to force flush the post write */
1095         readl(iommu->reg + DMAR_FECTL_REG);
1096         raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1097 }
1098
1099 void dmar_msi_write(int irq, struct msi_msg *msg)
1100 {
1101         struct intel_iommu *iommu = irq_get_handler_data(irq);
1102         unsigned long flag;
1103
1104         raw_spin_lock_irqsave(&iommu->register_lock, flag);
1105         writel(msg->data, iommu->reg + DMAR_FEDATA_REG);
1106         writel(msg->address_lo, iommu->reg + DMAR_FEADDR_REG);
1107         writel(msg->address_hi, iommu->reg + DMAR_FEUADDR_REG);
1108         raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1109 }
1110
1111 void dmar_msi_read(int irq, struct msi_msg *msg)
1112 {
1113         struct intel_iommu *iommu = irq_get_handler_data(irq);
1114         unsigned long flag;
1115
1116         raw_spin_lock_irqsave(&iommu->register_lock, flag);
1117         msg->data = readl(iommu->reg + DMAR_FEDATA_REG);
1118         msg->address_lo = readl(iommu->reg + DMAR_FEADDR_REG);
1119         msg->address_hi = readl(iommu->reg + DMAR_FEUADDR_REG);
1120         raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1121 }
1122
1123 static int dmar_fault_do_one(struct intel_iommu *iommu, int type,
1124                 u8 fault_reason, u16 source_id, unsigned long long addr)
1125 {
1126         const char *reason;
1127         int fault_type;
1128
1129         reason = dmar_get_fault_reason(fault_reason, &fault_type);
1130
1131         if (fault_type == INTR_REMAP)
1132                 printk(KERN_ERR "INTR-REMAP: Request device [[%02x:%02x.%d] "
1133                        "fault index %llx\n"
1134                         "INTR-REMAP:[fault reason %02d] %s\n",
1135                         (source_id >> 8), PCI_SLOT(source_id & 0xFF),
1136                         PCI_FUNC(source_id & 0xFF), addr >> 48,
1137                         fault_reason, reason);
1138         else
1139                 printk(KERN_ERR
1140                        "DMAR:[%s] Request device [%02x:%02x.%d] "
1141                        "fault addr %llx \n"
1142                        "DMAR:[fault reason %02d] %s\n",
1143                        (type ? "DMA Read" : "DMA Write"),
1144                        (source_id >> 8), PCI_SLOT(source_id & 0xFF),
1145                        PCI_FUNC(source_id & 0xFF), addr, fault_reason, reason);
1146         return 0;
1147 }
1148
1149 #define PRIMARY_FAULT_REG_LEN (16)
1150 irqreturn_t dmar_fault(int irq, void *dev_id)
1151 {
1152         struct intel_iommu *iommu = dev_id;
1153         int reg, fault_index;
1154         u32 fault_status;
1155         unsigned long flag;
1156
1157         raw_spin_lock_irqsave(&iommu->register_lock, flag);
1158         fault_status = readl(iommu->reg + DMAR_FSTS_REG);
1159         if (fault_status)
1160                 printk(KERN_ERR "DRHD: handling fault status reg %x\n",
1161                        fault_status);
1162
1163         /* TBD: ignore advanced fault log currently */
1164         if (!(fault_status & DMA_FSTS_PPF))
1165                 goto clear_rest;
1166
1167         fault_index = dma_fsts_fault_record_index(fault_status);
1168         reg = cap_fault_reg_offset(iommu->cap);
1169         while (1) {
1170                 u8 fault_reason;
1171                 u16 source_id;
1172                 u64 guest_addr;
1173                 int type;
1174                 u32 data;
1175
1176                 /* highest 32 bits */
1177                 data = readl(iommu->reg + reg +
1178                                 fault_index * PRIMARY_FAULT_REG_LEN + 12);
1179                 if (!(data & DMA_FRCD_F))
1180                         break;
1181
1182                 fault_reason = dma_frcd_fault_reason(data);
1183                 type = dma_frcd_type(data);
1184
1185                 data = readl(iommu->reg + reg +
1186                                 fault_index * PRIMARY_FAULT_REG_LEN + 8);
1187                 source_id = dma_frcd_source_id(data);
1188
1189                 guest_addr = dmar_readq(iommu->reg + reg +
1190                                 fault_index * PRIMARY_FAULT_REG_LEN);
1191                 guest_addr = dma_frcd_page_addr(guest_addr);
1192                 /* clear the fault */
1193                 writel(DMA_FRCD_F, iommu->reg + reg +
1194                         fault_index * PRIMARY_FAULT_REG_LEN + 12);
1195
1196                 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1197
1198                 dmar_fault_do_one(iommu, type, fault_reason,
1199                                 source_id, guest_addr);
1200
1201                 fault_index++;
1202                 if (fault_index >= cap_num_fault_regs(iommu->cap))
1203                         fault_index = 0;
1204                 raw_spin_lock_irqsave(&iommu->register_lock, flag);
1205         }
1206 clear_rest:
1207         /* clear all the other faults */
1208         fault_status = readl(iommu->reg + DMAR_FSTS_REG);
1209         writel(fault_status, iommu->reg + DMAR_FSTS_REG);
1210
1211         raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1212         return IRQ_HANDLED;
1213 }
1214
1215 int dmar_set_interrupt(struct intel_iommu *iommu)
1216 {
1217         int irq, ret;
1218
1219         /*
1220          * Check if the fault interrupt is already initialized.
1221          */
1222         if (iommu->irq)
1223                 return 0;
1224
1225         irq = create_irq();
1226         if (!irq) {
1227                 printk(KERN_ERR "IOMMU: no free vectors\n");
1228                 return -EINVAL;
1229         }
1230
1231         irq_set_handler_data(irq, iommu);
1232         iommu->irq = irq;
1233
1234         ret = arch_setup_dmar_msi(irq);
1235         if (ret) {
1236                 irq_set_handler_data(irq, NULL);
1237                 iommu->irq = 0;
1238                 destroy_irq(irq);
1239                 return ret;
1240         }
1241
1242         ret = request_irq(irq, dmar_fault, IRQF_NO_THREAD, iommu->name, iommu);
1243         if (ret)
1244                 printk(KERN_ERR "IOMMU: can't request irq\n");
1245         return ret;
1246 }
1247
1248 int __init enable_drhd_fault_handling(void)
1249 {
1250         struct dmar_drhd_unit *drhd;
1251
1252         /*
1253          * Enable fault control interrupt.
1254          */
1255         for_each_drhd_unit(drhd) {
1256                 int ret;
1257                 struct intel_iommu *iommu = drhd->iommu;
1258                 ret = dmar_set_interrupt(iommu);
1259
1260                 if (ret) {
1261                         printk(KERN_ERR "DRHD %Lx: failed to enable fault, "
1262                                " interrupt, ret %d\n",
1263                                (unsigned long long)drhd->reg_base_addr, ret);
1264                         return -1;
1265                 }
1266
1267                 /*
1268                  * Clear any previous faults.
1269                  */
1270                 dmar_fault(iommu->irq, iommu);
1271         }
1272
1273         return 0;
1274 }
1275
1276 /*
1277  * Re-enable Queued Invalidation interface.
1278  */
1279 int dmar_reenable_qi(struct intel_iommu *iommu)
1280 {
1281         if (!ecap_qis(iommu->ecap))
1282                 return -ENOENT;
1283
1284         if (!iommu->qi)
1285                 return -ENOENT;
1286
1287         /*
1288          * First disable queued invalidation.
1289          */
1290         dmar_disable_qi(iommu);
1291         /*
1292          * Then enable queued invalidation again. Since there is no pending
1293          * invalidation requests now, it's safe to re-enable queued
1294          * invalidation.
1295          */
1296         __dmar_enable_qi(iommu);
1297
1298         return 0;
1299 }
1300
1301 /*
1302  * Check interrupt remapping support in DMAR table description.
1303  */
1304 int __init dmar_ir_support(void)
1305 {
1306         struct acpi_table_dmar *dmar;
1307         dmar = (struct acpi_table_dmar *)dmar_tbl;
1308         if (!dmar)
1309                 return 0;
1310         return dmar->flags & 0x1;
1311 }
1312 IOMMU_INIT_POST(detect_intel_iommu);