Merge git://git.infradead.org/iommu-2.6
[platform/kernel/linux-rpi.git] / drivers / pci / dmar.c
1 /*
2  * Copyright (c) 2006, Intel Corporation.
3  *
4  * This program is free software; you can redistribute it and/or modify it
5  * under the terms and conditions of the GNU General Public License,
6  * version 2, as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope it will be useful, but WITHOUT
9  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
11  * more details.
12  *
13  * You should have received a copy of the GNU General Public License along with
14  * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
15  * Place - Suite 330, Boston, MA 02111-1307 USA.
16  *
17  * Copyright (C) 2006-2008 Intel Corporation
18  * Author: Ashok Raj <ashok.raj@intel.com>
19  * Author: Shaohua Li <shaohua.li@intel.com>
20  * Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
21  *
22  * This file implements early detection/parsing of Remapping Devices
23  * reported to OS through BIOS via DMA remapping reporting (DMAR) ACPI
24  * tables.
25  *
26  * These routines are used by both DMA-remapping and Interrupt-remapping
27  */
28
29 #include <linux/pci.h>
30 #include <linux/dmar.h>
31 #include <linux/iova.h>
32 #include <linux/intel-iommu.h>
33 #include <linux/timer.h>
34 #include <linux/irq.h>
35 #include <linux/interrupt.h>
36 #include <linux/tboot.h>
37 #include <linux/dmi.h>
38
39 #define PREFIX "DMAR: "
40
41 /* No locks are needed as DMA remapping hardware unit
42  * list is constructed at boot time and hotplug of
43  * these units are not supported by the architecture.
44  */
45 LIST_HEAD(dmar_drhd_units);
46
47 static struct acpi_table_header * __initdata dmar_tbl;
48 static acpi_size dmar_tbl_size;
49
50 static void __init dmar_register_drhd_unit(struct dmar_drhd_unit *drhd)
51 {
52         /*
53          * add INCLUDE_ALL at the tail, so scan the list will find it at
54          * the very end.
55          */
56         if (drhd->include_all)
57                 list_add_tail(&drhd->list, &dmar_drhd_units);
58         else
59                 list_add(&drhd->list, &dmar_drhd_units);
60 }
61
62 static int __init dmar_parse_one_dev_scope(struct acpi_dmar_device_scope *scope,
63                                            struct pci_dev **dev, u16 segment)
64 {
65         struct pci_bus *bus;
66         struct pci_dev *pdev = NULL;
67         struct acpi_dmar_pci_path *path;
68         int count;
69
70         bus = pci_find_bus(segment, scope->bus);
71         path = (struct acpi_dmar_pci_path *)(scope + 1);
72         count = (scope->length - sizeof(struct acpi_dmar_device_scope))
73                 / sizeof(struct acpi_dmar_pci_path);
74
75         while (count) {
76                 if (pdev)
77                         pci_dev_put(pdev);
78                 /*
79                  * Some BIOSes list non-exist devices in DMAR table, just
80                  * ignore it
81                  */
82                 if (!bus) {
83                         printk(KERN_WARNING
84                         PREFIX "Device scope bus [%d] not found\n",
85                         scope->bus);
86                         break;
87                 }
88                 pdev = pci_get_slot(bus, PCI_DEVFN(path->dev, path->fn));
89                 if (!pdev) {
90                         printk(KERN_WARNING PREFIX
91                         "Device scope device [%04x:%02x:%02x.%02x] not found\n",
92                                 segment, bus->number, path->dev, path->fn);
93                         break;
94                 }
95                 path ++;
96                 count --;
97                 bus = pdev->subordinate;
98         }
99         if (!pdev) {
100                 printk(KERN_WARNING PREFIX
101                 "Device scope device [%04x:%02x:%02x.%02x] not found\n",
102                 segment, scope->bus, path->dev, path->fn);
103                 *dev = NULL;
104                 return 0;
105         }
106         if ((scope->entry_type == ACPI_DMAR_SCOPE_TYPE_ENDPOINT && \
107                         pdev->subordinate) || (scope->entry_type == \
108                         ACPI_DMAR_SCOPE_TYPE_BRIDGE && !pdev->subordinate)) {
109                 pci_dev_put(pdev);
110                 printk(KERN_WARNING PREFIX
111                         "Device scope type does not match for %s\n",
112                          pci_name(pdev));
113                 return -EINVAL;
114         }
115         *dev = pdev;
116         return 0;
117 }
118
119 static int __init dmar_parse_dev_scope(void *start, void *end, int *cnt,
120                                        struct pci_dev ***devices, u16 segment)
121 {
122         struct acpi_dmar_device_scope *scope;
123         void * tmp = start;
124         int index;
125         int ret;
126
127         *cnt = 0;
128         while (start < end) {
129                 scope = start;
130                 if (scope->entry_type == ACPI_DMAR_SCOPE_TYPE_ENDPOINT ||
131                     scope->entry_type == ACPI_DMAR_SCOPE_TYPE_BRIDGE)
132                         (*cnt)++;
133                 else
134                         printk(KERN_WARNING PREFIX
135                                 "Unsupported device scope\n");
136                 start += scope->length;
137         }
138         if (*cnt == 0)
139                 return 0;
140
141         *devices = kcalloc(*cnt, sizeof(struct pci_dev *), GFP_KERNEL);
142         if (!*devices)
143                 return -ENOMEM;
144
145         start = tmp;
146         index = 0;
147         while (start < end) {
148                 scope = start;
149                 if (scope->entry_type == ACPI_DMAR_SCOPE_TYPE_ENDPOINT ||
150                     scope->entry_type == ACPI_DMAR_SCOPE_TYPE_BRIDGE) {
151                         ret = dmar_parse_one_dev_scope(scope,
152                                 &(*devices)[index], segment);
153                         if (ret) {
154                                 kfree(*devices);
155                                 return ret;
156                         }
157                         index ++;
158                 }
159                 start += scope->length;
160         }
161
162         return 0;
163 }
164
165 /**
166  * dmar_parse_one_drhd - parses exactly one DMA remapping hardware definition
167  * structure which uniquely represent one DMA remapping hardware unit
168  * present in the platform
169  */
170 static int __init
171 dmar_parse_one_drhd(struct acpi_dmar_header *header)
172 {
173         struct acpi_dmar_hardware_unit *drhd;
174         struct dmar_drhd_unit *dmaru;
175         int ret = 0;
176
177         drhd = (struct acpi_dmar_hardware_unit *)header;
178         if (!drhd->address) {
179                 /* Promote an attitude of violence to a BIOS engineer today */
180                 WARN(1, "Your BIOS is broken; DMAR reported at address zero!\n"
181                      "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
182                      dmi_get_system_info(DMI_BIOS_VENDOR),
183                      dmi_get_system_info(DMI_BIOS_VERSION),
184                      dmi_get_system_info(DMI_PRODUCT_VERSION));
185                 return -ENODEV;
186         }
187         dmaru = kzalloc(sizeof(*dmaru), GFP_KERNEL);
188         if (!dmaru)
189                 return -ENOMEM;
190
191         dmaru->hdr = header;
192         dmaru->reg_base_addr = drhd->address;
193         dmaru->segment = drhd->segment;
194         dmaru->include_all = drhd->flags & 0x1; /* BIT0: INCLUDE_ALL */
195
196         ret = alloc_iommu(dmaru);
197         if (ret) {
198                 kfree(dmaru);
199                 return ret;
200         }
201         dmar_register_drhd_unit(dmaru);
202         return 0;
203 }
204
205 static int __init dmar_parse_dev(struct dmar_drhd_unit *dmaru)
206 {
207         struct acpi_dmar_hardware_unit *drhd;
208         int ret = 0;
209
210         drhd = (struct acpi_dmar_hardware_unit *) dmaru->hdr;
211
212         if (dmaru->include_all)
213                 return 0;
214
215         ret = dmar_parse_dev_scope((void *)(drhd + 1),
216                                 ((void *)drhd) + drhd->header.length,
217                                 &dmaru->devices_cnt, &dmaru->devices,
218                                 drhd->segment);
219         if (ret) {
220                 list_del(&dmaru->list);
221                 kfree(dmaru);
222         }
223         return ret;
224 }
225
226 #ifdef CONFIG_DMAR
227 LIST_HEAD(dmar_rmrr_units);
228
229 static void __init dmar_register_rmrr_unit(struct dmar_rmrr_unit *rmrr)
230 {
231         list_add(&rmrr->list, &dmar_rmrr_units);
232 }
233
234
235 static int __init
236 dmar_parse_one_rmrr(struct acpi_dmar_header *header)
237 {
238         struct acpi_dmar_reserved_memory *rmrr;
239         struct dmar_rmrr_unit *rmrru;
240
241         rmrru = kzalloc(sizeof(*rmrru), GFP_KERNEL);
242         if (!rmrru)
243                 return -ENOMEM;
244
245         rmrru->hdr = header;
246         rmrr = (struct acpi_dmar_reserved_memory *)header;
247         rmrru->base_address = rmrr->base_address;
248         rmrru->end_address = rmrr->end_address;
249
250         dmar_register_rmrr_unit(rmrru);
251         return 0;
252 }
253
254 static int __init
255 rmrr_parse_dev(struct dmar_rmrr_unit *rmrru)
256 {
257         struct acpi_dmar_reserved_memory *rmrr;
258         int ret;
259
260         rmrr = (struct acpi_dmar_reserved_memory *) rmrru->hdr;
261         ret = dmar_parse_dev_scope((void *)(rmrr + 1),
262                 ((void *)rmrr) + rmrr->header.length,
263                 &rmrru->devices_cnt, &rmrru->devices, rmrr->segment);
264
265         if (ret || (rmrru->devices_cnt == 0)) {
266                 list_del(&rmrru->list);
267                 kfree(rmrru);
268         }
269         return ret;
270 }
271
272 static LIST_HEAD(dmar_atsr_units);
273
274 static int __init dmar_parse_one_atsr(struct acpi_dmar_header *hdr)
275 {
276         struct acpi_dmar_atsr *atsr;
277         struct dmar_atsr_unit *atsru;
278
279         atsr = container_of(hdr, struct acpi_dmar_atsr, header);
280         atsru = kzalloc(sizeof(*atsru), GFP_KERNEL);
281         if (!atsru)
282                 return -ENOMEM;
283
284         atsru->hdr = hdr;
285         atsru->include_all = atsr->flags & 0x1;
286
287         list_add(&atsru->list, &dmar_atsr_units);
288
289         return 0;
290 }
291
292 static int __init atsr_parse_dev(struct dmar_atsr_unit *atsru)
293 {
294         int rc;
295         struct acpi_dmar_atsr *atsr;
296
297         if (atsru->include_all)
298                 return 0;
299
300         atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
301         rc = dmar_parse_dev_scope((void *)(atsr + 1),
302                                 (void *)atsr + atsr->header.length,
303                                 &atsru->devices_cnt, &atsru->devices,
304                                 atsr->segment);
305         if (rc || !atsru->devices_cnt) {
306                 list_del(&atsru->list);
307                 kfree(atsru);
308         }
309
310         return rc;
311 }
312
313 int dmar_find_matched_atsr_unit(struct pci_dev *dev)
314 {
315         int i;
316         struct pci_bus *bus;
317         struct acpi_dmar_atsr *atsr;
318         struct dmar_atsr_unit *atsru;
319
320         list_for_each_entry(atsru, &dmar_atsr_units, list) {
321                 atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
322                 if (atsr->segment == pci_domain_nr(dev->bus))
323                         goto found;
324         }
325
326         return 0;
327
328 found:
329         for (bus = dev->bus; bus; bus = bus->parent) {
330                 struct pci_dev *bridge = bus->self;
331
332                 if (!bridge || !bridge->is_pcie ||
333                     bridge->pcie_type == PCI_EXP_TYPE_PCI_BRIDGE)
334                         return 0;
335
336                 if (bridge->pcie_type == PCI_EXP_TYPE_ROOT_PORT) {
337                         for (i = 0; i < atsru->devices_cnt; i++)
338                                 if (atsru->devices[i] == bridge)
339                                         return 1;
340                         break;
341                 }
342         }
343
344         if (atsru->include_all)
345                 return 1;
346
347         return 0;
348 }
349 #endif
350
351 static void __init
352 dmar_table_print_dmar_entry(struct acpi_dmar_header *header)
353 {
354         struct acpi_dmar_hardware_unit *drhd;
355         struct acpi_dmar_reserved_memory *rmrr;
356         struct acpi_dmar_atsr *atsr;
357
358         switch (header->type) {
359         case ACPI_DMAR_TYPE_HARDWARE_UNIT:
360                 drhd = container_of(header, struct acpi_dmar_hardware_unit,
361                                     header);
362                 printk (KERN_INFO PREFIX
363                         "DRHD base: %#016Lx flags: %#x\n",
364                         (unsigned long long)drhd->address, drhd->flags);
365                 break;
366         case ACPI_DMAR_TYPE_RESERVED_MEMORY:
367                 rmrr = container_of(header, struct acpi_dmar_reserved_memory,
368                                     header);
369                 printk (KERN_INFO PREFIX
370                         "RMRR base: %#016Lx end: %#016Lx\n",
371                         (unsigned long long)rmrr->base_address,
372                         (unsigned long long)rmrr->end_address);
373                 break;
374         case ACPI_DMAR_TYPE_ATSR:
375                 atsr = container_of(header, struct acpi_dmar_atsr, header);
376                 printk(KERN_INFO PREFIX "ATSR flags: %#x\n", atsr->flags);
377                 break;
378         }
379 }
380
381 /**
382  * dmar_table_detect - checks to see if the platform supports DMAR devices
383  */
384 static int __init dmar_table_detect(void)
385 {
386         acpi_status status = AE_OK;
387
388         /* if we could find DMAR table, then there are DMAR devices */
389         status = acpi_get_table_with_size(ACPI_SIG_DMAR, 0,
390                                 (struct acpi_table_header **)&dmar_tbl,
391                                 &dmar_tbl_size);
392
393         if (ACPI_SUCCESS(status) && !dmar_tbl) {
394                 printk (KERN_WARNING PREFIX "Unable to map DMAR\n");
395                 status = AE_NOT_FOUND;
396         }
397
398         return (ACPI_SUCCESS(status) ? 1 : 0);
399 }
400
401 /**
402  * parse_dmar_table - parses the DMA reporting table
403  */
404 static int __init
405 parse_dmar_table(void)
406 {
407         struct acpi_table_dmar *dmar;
408         struct acpi_dmar_header *entry_header;
409         int ret = 0;
410
411         /*
412          * Do it again, earlier dmar_tbl mapping could be mapped with
413          * fixed map.
414          */
415         dmar_table_detect();
416
417         /*
418          * ACPI tables may not be DMA protected by tboot, so use DMAR copy
419          * SINIT saved in SinitMleData in TXT heap (which is DMA protected)
420          */
421         dmar_tbl = tboot_get_dmar_table(dmar_tbl);
422
423         dmar = (struct acpi_table_dmar *)dmar_tbl;
424         if (!dmar)
425                 return -ENODEV;
426
427         if (dmar->width < PAGE_SHIFT - 1) {
428                 printk(KERN_WARNING PREFIX "Invalid DMAR haw\n");
429                 return -EINVAL;
430         }
431
432         printk (KERN_INFO PREFIX "Host address width %d\n",
433                 dmar->width + 1);
434
435         entry_header = (struct acpi_dmar_header *)(dmar + 1);
436         while (((unsigned long)entry_header) <
437                         (((unsigned long)dmar) + dmar_tbl->length)) {
438                 /* Avoid looping forever on bad ACPI tables */
439                 if (entry_header->length == 0) {
440                         printk(KERN_WARNING PREFIX
441                                 "Invalid 0-length structure\n");
442                         ret = -EINVAL;
443                         break;
444                 }
445
446                 dmar_table_print_dmar_entry(entry_header);
447
448                 switch (entry_header->type) {
449                 case ACPI_DMAR_TYPE_HARDWARE_UNIT:
450                         ret = dmar_parse_one_drhd(entry_header);
451                         break;
452                 case ACPI_DMAR_TYPE_RESERVED_MEMORY:
453 #ifdef CONFIG_DMAR
454                         ret = dmar_parse_one_rmrr(entry_header);
455 #endif
456                         break;
457                 case ACPI_DMAR_TYPE_ATSR:
458 #ifdef CONFIG_DMAR
459                         ret = dmar_parse_one_atsr(entry_header);
460 #endif
461                         break;
462                 default:
463                         printk(KERN_WARNING PREFIX
464                                 "Unknown DMAR structure type\n");
465                         ret = 0; /* for forward compatibility */
466                         break;
467                 }
468                 if (ret)
469                         break;
470
471                 entry_header = ((void *)entry_header + entry_header->length);
472         }
473         return ret;
474 }
475
476 int dmar_pci_device_match(struct pci_dev *devices[], int cnt,
477                           struct pci_dev *dev)
478 {
479         int index;
480
481         while (dev) {
482                 for (index = 0; index < cnt; index++)
483                         if (dev == devices[index])
484                                 return 1;
485
486                 /* Check our parent */
487                 dev = dev->bus->self;
488         }
489
490         return 0;
491 }
492
493 struct dmar_drhd_unit *
494 dmar_find_matched_drhd_unit(struct pci_dev *dev)
495 {
496         struct dmar_drhd_unit *dmaru = NULL;
497         struct acpi_dmar_hardware_unit *drhd;
498
499         list_for_each_entry(dmaru, &dmar_drhd_units, list) {
500                 drhd = container_of(dmaru->hdr,
501                                     struct acpi_dmar_hardware_unit,
502                                     header);
503
504                 if (dmaru->include_all &&
505                     drhd->segment == pci_domain_nr(dev->bus))
506                         return dmaru;
507
508                 if (dmar_pci_device_match(dmaru->devices,
509                                           dmaru->devices_cnt, dev))
510                         return dmaru;
511         }
512
513         return NULL;
514 }
515
516 int __init dmar_dev_scope_init(void)
517 {
518         struct dmar_drhd_unit *drhd, *drhd_n;
519         int ret = -ENODEV;
520
521         list_for_each_entry_safe(drhd, drhd_n, &dmar_drhd_units, list) {
522                 ret = dmar_parse_dev(drhd);
523                 if (ret)
524                         return ret;
525         }
526
527 #ifdef CONFIG_DMAR
528         {
529                 struct dmar_rmrr_unit *rmrr, *rmrr_n;
530                 struct dmar_atsr_unit *atsr, *atsr_n;
531
532                 list_for_each_entry_safe(rmrr, rmrr_n, &dmar_rmrr_units, list) {
533                         ret = rmrr_parse_dev(rmrr);
534                         if (ret)
535                                 return ret;
536                 }
537
538                 list_for_each_entry_safe(atsr, atsr_n, &dmar_atsr_units, list) {
539                         ret = atsr_parse_dev(atsr);
540                         if (ret)
541                                 return ret;
542                 }
543         }
544 #endif
545
546         return ret;
547 }
548
549
550 int __init dmar_table_init(void)
551 {
552         static int dmar_table_initialized;
553         int ret;
554
555         if (dmar_table_initialized)
556                 return 0;
557
558         dmar_table_initialized = 1;
559
560         ret = parse_dmar_table();
561         if (ret) {
562                 if (ret != -ENODEV)
563                         printk(KERN_INFO PREFIX "parse DMAR table failure.\n");
564                 return ret;
565         }
566
567         if (list_empty(&dmar_drhd_units)) {
568                 printk(KERN_INFO PREFIX "No DMAR devices found\n");
569                 return -ENODEV;
570         }
571
572 #ifdef CONFIG_DMAR
573         if (list_empty(&dmar_rmrr_units))
574                 printk(KERN_INFO PREFIX "No RMRR found\n");
575
576         if (list_empty(&dmar_atsr_units))
577                 printk(KERN_INFO PREFIX "No ATSR found\n");
578 #endif
579
580         return 0;
581 }
582
583 void __init detect_intel_iommu(void)
584 {
585         int ret;
586
587         ret = dmar_table_detect();
588
589         {
590 #ifdef CONFIG_INTR_REMAP
591                 struct acpi_table_dmar *dmar;
592                 /*
593                  * for now we will disable dma-remapping when interrupt
594                  * remapping is enabled.
595                  * When support for queued invalidation for IOTLB invalidation
596                  * is added, we will not need this any more.
597                  */
598                 dmar = (struct acpi_table_dmar *) dmar_tbl;
599                 if (ret && cpu_has_x2apic && dmar->flags & 0x1)
600                         printk(KERN_INFO
601                                "Queued invalidation will be enabled to support "
602                                "x2apic and Intr-remapping.\n");
603 #endif
604 #ifdef CONFIG_DMAR
605                 if (ret && !no_iommu && !iommu_detected && !swiotlb &&
606                     !dmar_disabled)
607                         iommu_detected = 1;
608 #endif
609         }
610         early_acpi_os_unmap_memory(dmar_tbl, dmar_tbl_size);
611         dmar_tbl = NULL;
612 }
613
614
615 int alloc_iommu(struct dmar_drhd_unit *drhd)
616 {
617         struct intel_iommu *iommu;
618         int map_size;
619         u32 ver;
620         static int iommu_allocated = 0;
621         int agaw = 0;
622         int msagaw = 0;
623
624         iommu = kzalloc(sizeof(*iommu), GFP_KERNEL);
625         if (!iommu)
626                 return -ENOMEM;
627
628         iommu->seq_id = iommu_allocated++;
629         sprintf (iommu->name, "dmar%d", iommu->seq_id);
630
631         iommu->reg = ioremap(drhd->reg_base_addr, VTD_PAGE_SIZE);
632         if (!iommu->reg) {
633                 printk(KERN_ERR "IOMMU: can't map the region\n");
634                 goto error;
635         }
636         iommu->cap = dmar_readq(iommu->reg + DMAR_CAP_REG);
637         iommu->ecap = dmar_readq(iommu->reg + DMAR_ECAP_REG);
638
639         if (iommu->cap == (uint64_t)-1 && iommu->ecap == (uint64_t)-1) {
640                 /* Promote an attitude of violence to a BIOS engineer today */
641                 WARN(1, "Your BIOS is broken; DMAR reported at address %llx returns all ones!\n"
642                      "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
643                      drhd->reg_base_addr,
644                      dmi_get_system_info(DMI_BIOS_VENDOR),
645                      dmi_get_system_info(DMI_BIOS_VERSION),
646                      dmi_get_system_info(DMI_PRODUCT_VERSION));
647                 goto err_unmap;
648         }
649
650 #ifdef CONFIG_DMAR
651         agaw = iommu_calculate_agaw(iommu);
652         if (agaw < 0) {
653                 printk(KERN_ERR
654                        "Cannot get a valid agaw for iommu (seq_id = %d)\n",
655                        iommu->seq_id);
656                 goto err_unmap;
657         }
658         msagaw = iommu_calculate_max_sagaw(iommu);
659         if (msagaw < 0) {
660                 printk(KERN_ERR
661                         "Cannot get a valid max agaw for iommu (seq_id = %d)\n",
662                         iommu->seq_id);
663                 goto err_unmap;
664         }
665 #endif
666         iommu->agaw = agaw;
667         iommu->msagaw = msagaw;
668
669         /* the registers might be more than one page */
670         map_size = max_t(int, ecap_max_iotlb_offset(iommu->ecap),
671                 cap_max_fault_reg_offset(iommu->cap));
672         map_size = VTD_PAGE_ALIGN(map_size);
673         if (map_size > VTD_PAGE_SIZE) {
674                 iounmap(iommu->reg);
675                 iommu->reg = ioremap(drhd->reg_base_addr, map_size);
676                 if (!iommu->reg) {
677                         printk(KERN_ERR "IOMMU: can't map the region\n");
678                         goto error;
679                 }
680         }
681
682         ver = readl(iommu->reg + DMAR_VER_REG);
683         pr_info("IOMMU %llx: ver %d:%d cap %llx ecap %llx\n",
684                 (unsigned long long)drhd->reg_base_addr,
685                 DMAR_VER_MAJOR(ver), DMAR_VER_MINOR(ver),
686                 (unsigned long long)iommu->cap,
687                 (unsigned long long)iommu->ecap);
688
689         spin_lock_init(&iommu->register_lock);
690
691         drhd->iommu = iommu;
692         return 0;
693
694  err_unmap:
695         iounmap(iommu->reg);
696  error:
697         kfree(iommu);
698         return -1;
699 }
700
701 void free_iommu(struct intel_iommu *iommu)
702 {
703         if (!iommu)
704                 return;
705
706 #ifdef CONFIG_DMAR
707         free_dmar_iommu(iommu);
708 #endif
709
710         if (iommu->reg)
711                 iounmap(iommu->reg);
712         kfree(iommu);
713 }
714
715 /*
716  * Reclaim all the submitted descriptors which have completed its work.
717  */
718 static inline void reclaim_free_desc(struct q_inval *qi)
719 {
720         while (qi->desc_status[qi->free_tail] == QI_DONE ||
721                qi->desc_status[qi->free_tail] == QI_ABORT) {
722                 qi->desc_status[qi->free_tail] = QI_FREE;
723                 qi->free_tail = (qi->free_tail + 1) % QI_LENGTH;
724                 qi->free_cnt++;
725         }
726 }
727
728 static int qi_check_fault(struct intel_iommu *iommu, int index)
729 {
730         u32 fault;
731         int head, tail;
732         struct q_inval *qi = iommu->qi;
733         int wait_index = (index + 1) % QI_LENGTH;
734
735         if (qi->desc_status[wait_index] == QI_ABORT)
736                 return -EAGAIN;
737
738         fault = readl(iommu->reg + DMAR_FSTS_REG);
739
740         /*
741          * If IQE happens, the head points to the descriptor associated
742          * with the error. No new descriptors are fetched until the IQE
743          * is cleared.
744          */
745         if (fault & DMA_FSTS_IQE) {
746                 head = readl(iommu->reg + DMAR_IQH_REG);
747                 if ((head >> DMAR_IQ_SHIFT) == index) {
748                         printk(KERN_ERR "VT-d detected invalid descriptor: "
749                                 "low=%llx, high=%llx\n",
750                                 (unsigned long long)qi->desc[index].low,
751                                 (unsigned long long)qi->desc[index].high);
752                         memcpy(&qi->desc[index], &qi->desc[wait_index],
753                                         sizeof(struct qi_desc));
754                         __iommu_flush_cache(iommu, &qi->desc[index],
755                                         sizeof(struct qi_desc));
756                         writel(DMA_FSTS_IQE, iommu->reg + DMAR_FSTS_REG);
757                         return -EINVAL;
758                 }
759         }
760
761         /*
762          * If ITE happens, all pending wait_desc commands are aborted.
763          * No new descriptors are fetched until the ITE is cleared.
764          */
765         if (fault & DMA_FSTS_ITE) {
766                 head = readl(iommu->reg + DMAR_IQH_REG);
767                 head = ((head >> DMAR_IQ_SHIFT) - 1 + QI_LENGTH) % QI_LENGTH;
768                 head |= 1;
769                 tail = readl(iommu->reg + DMAR_IQT_REG);
770                 tail = ((tail >> DMAR_IQ_SHIFT) - 1 + QI_LENGTH) % QI_LENGTH;
771
772                 writel(DMA_FSTS_ITE, iommu->reg + DMAR_FSTS_REG);
773
774                 do {
775                         if (qi->desc_status[head] == QI_IN_USE)
776                                 qi->desc_status[head] = QI_ABORT;
777                         head = (head - 2 + QI_LENGTH) % QI_LENGTH;
778                 } while (head != tail);
779
780                 if (qi->desc_status[wait_index] == QI_ABORT)
781                         return -EAGAIN;
782         }
783
784         if (fault & DMA_FSTS_ICE)
785                 writel(DMA_FSTS_ICE, iommu->reg + DMAR_FSTS_REG);
786
787         return 0;
788 }
789
790 /*
791  * Submit the queued invalidation descriptor to the remapping
792  * hardware unit and wait for its completion.
793  */
794 int qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu)
795 {
796         int rc;
797         struct q_inval *qi = iommu->qi;
798         struct qi_desc *hw, wait_desc;
799         int wait_index, index;
800         unsigned long flags;
801
802         if (!qi)
803                 return 0;
804
805         hw = qi->desc;
806
807 restart:
808         rc = 0;
809
810         spin_lock_irqsave(&qi->q_lock, flags);
811         while (qi->free_cnt < 3) {
812                 spin_unlock_irqrestore(&qi->q_lock, flags);
813                 cpu_relax();
814                 spin_lock_irqsave(&qi->q_lock, flags);
815         }
816
817         index = qi->free_head;
818         wait_index = (index + 1) % QI_LENGTH;
819
820         qi->desc_status[index] = qi->desc_status[wait_index] = QI_IN_USE;
821
822         hw[index] = *desc;
823
824         wait_desc.low = QI_IWD_STATUS_DATA(QI_DONE) |
825                         QI_IWD_STATUS_WRITE | QI_IWD_TYPE;
826         wait_desc.high = virt_to_phys(&qi->desc_status[wait_index]);
827
828         hw[wait_index] = wait_desc;
829
830         __iommu_flush_cache(iommu, &hw[index], sizeof(struct qi_desc));
831         __iommu_flush_cache(iommu, &hw[wait_index], sizeof(struct qi_desc));
832
833         qi->free_head = (qi->free_head + 2) % QI_LENGTH;
834         qi->free_cnt -= 2;
835
836         /*
837          * update the HW tail register indicating the presence of
838          * new descriptors.
839          */
840         writel(qi->free_head << DMAR_IQ_SHIFT, iommu->reg + DMAR_IQT_REG);
841
842         while (qi->desc_status[wait_index] != QI_DONE) {
843                 /*
844                  * We will leave the interrupts disabled, to prevent interrupt
845                  * context to queue another cmd while a cmd is already submitted
846                  * and waiting for completion on this cpu. This is to avoid
847                  * a deadlock where the interrupt context can wait indefinitely
848                  * for free slots in the queue.
849                  */
850                 rc = qi_check_fault(iommu, index);
851                 if (rc)
852                         break;
853
854                 spin_unlock(&qi->q_lock);
855                 cpu_relax();
856                 spin_lock(&qi->q_lock);
857         }
858
859         qi->desc_status[index] = QI_DONE;
860
861         reclaim_free_desc(qi);
862         spin_unlock_irqrestore(&qi->q_lock, flags);
863
864         if (rc == -EAGAIN)
865                 goto restart;
866
867         return rc;
868 }
869
870 /*
871  * Flush the global interrupt entry cache.
872  */
873 void qi_global_iec(struct intel_iommu *iommu)
874 {
875         struct qi_desc desc;
876
877         desc.low = QI_IEC_TYPE;
878         desc.high = 0;
879
880         /* should never fail */
881         qi_submit_sync(&desc, iommu);
882 }
883
884 void qi_flush_context(struct intel_iommu *iommu, u16 did, u16 sid, u8 fm,
885                       u64 type)
886 {
887         struct qi_desc desc;
888
889         desc.low = QI_CC_FM(fm) | QI_CC_SID(sid) | QI_CC_DID(did)
890                         | QI_CC_GRAN(type) | QI_CC_TYPE;
891         desc.high = 0;
892
893         qi_submit_sync(&desc, iommu);
894 }
895
896 void qi_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr,
897                     unsigned int size_order, u64 type)
898 {
899         u8 dw = 0, dr = 0;
900
901         struct qi_desc desc;
902         int ih = 0;
903
904         if (cap_write_drain(iommu->cap))
905                 dw = 1;
906
907         if (cap_read_drain(iommu->cap))
908                 dr = 1;
909
910         desc.low = QI_IOTLB_DID(did) | QI_IOTLB_DR(dr) | QI_IOTLB_DW(dw)
911                 | QI_IOTLB_GRAN(type) | QI_IOTLB_TYPE;
912         desc.high = QI_IOTLB_ADDR(addr) | QI_IOTLB_IH(ih)
913                 | QI_IOTLB_AM(size_order);
914
915         qi_submit_sync(&desc, iommu);
916 }
917
918 void qi_flush_dev_iotlb(struct intel_iommu *iommu, u16 sid, u16 qdep,
919                         u64 addr, unsigned mask)
920 {
921         struct qi_desc desc;
922
923         if (mask) {
924                 BUG_ON(addr & ((1 << (VTD_PAGE_SHIFT + mask)) - 1));
925                 addr |= (1 << (VTD_PAGE_SHIFT + mask - 1)) - 1;
926                 desc.high = QI_DEV_IOTLB_ADDR(addr) | QI_DEV_IOTLB_SIZE;
927         } else
928                 desc.high = QI_DEV_IOTLB_ADDR(addr);
929
930         if (qdep >= QI_DEV_IOTLB_MAX_INVS)
931                 qdep = 0;
932
933         desc.low = QI_DEV_IOTLB_SID(sid) | QI_DEV_IOTLB_QDEP(qdep) |
934                    QI_DIOTLB_TYPE;
935
936         qi_submit_sync(&desc, iommu);
937 }
938
939 /*
940  * Disable Queued Invalidation interface.
941  */
942 void dmar_disable_qi(struct intel_iommu *iommu)
943 {
944         unsigned long flags;
945         u32 sts;
946         cycles_t start_time = get_cycles();
947
948         if (!ecap_qis(iommu->ecap))
949                 return;
950
951         spin_lock_irqsave(&iommu->register_lock, flags);
952
953         sts =  dmar_readq(iommu->reg + DMAR_GSTS_REG);
954         if (!(sts & DMA_GSTS_QIES))
955                 goto end;
956
957         /*
958          * Give a chance to HW to complete the pending invalidation requests.
959          */
960         while ((readl(iommu->reg + DMAR_IQT_REG) !=
961                 readl(iommu->reg + DMAR_IQH_REG)) &&
962                 (DMAR_OPERATION_TIMEOUT > (get_cycles() - start_time)))
963                 cpu_relax();
964
965         iommu->gcmd &= ~DMA_GCMD_QIE;
966         writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
967
968         IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, readl,
969                       !(sts & DMA_GSTS_QIES), sts);
970 end:
971         spin_unlock_irqrestore(&iommu->register_lock, flags);
972 }
973
974 /*
975  * Enable queued invalidation.
976  */
977 static void __dmar_enable_qi(struct intel_iommu *iommu)
978 {
979         u32 sts;
980         unsigned long flags;
981         struct q_inval *qi = iommu->qi;
982
983         qi->free_head = qi->free_tail = 0;
984         qi->free_cnt = QI_LENGTH;
985
986         spin_lock_irqsave(&iommu->register_lock, flags);
987
988         /* write zero to the tail reg */
989         writel(0, iommu->reg + DMAR_IQT_REG);
990
991         dmar_writeq(iommu->reg + DMAR_IQA_REG, virt_to_phys(qi->desc));
992
993         iommu->gcmd |= DMA_GCMD_QIE;
994         writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
995
996         /* Make sure hardware complete it */
997         IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, readl, (sts & DMA_GSTS_QIES), sts);
998
999         spin_unlock_irqrestore(&iommu->register_lock, flags);
1000 }
1001
1002 /*
1003  * Enable Queued Invalidation interface. This is a must to support
1004  * interrupt-remapping. Also used by DMA-remapping, which replaces
1005  * register based IOTLB invalidation.
1006  */
1007 int dmar_enable_qi(struct intel_iommu *iommu)
1008 {
1009         struct q_inval *qi;
1010
1011         if (!ecap_qis(iommu->ecap))
1012                 return -ENOENT;
1013
1014         /*
1015          * queued invalidation is already setup and enabled.
1016          */
1017         if (iommu->qi)
1018                 return 0;
1019
1020         iommu->qi = kmalloc(sizeof(*qi), GFP_ATOMIC);
1021         if (!iommu->qi)
1022                 return -ENOMEM;
1023
1024         qi = iommu->qi;
1025
1026         qi->desc = (void *)(get_zeroed_page(GFP_ATOMIC));
1027         if (!qi->desc) {
1028                 kfree(qi);
1029                 iommu->qi = 0;
1030                 return -ENOMEM;
1031         }
1032
1033         qi->desc_status = kmalloc(QI_LENGTH * sizeof(int), GFP_ATOMIC);
1034         if (!qi->desc_status) {
1035                 free_page((unsigned long) qi->desc);
1036                 kfree(qi);
1037                 iommu->qi = 0;
1038                 return -ENOMEM;
1039         }
1040
1041         qi->free_head = qi->free_tail = 0;
1042         qi->free_cnt = QI_LENGTH;
1043
1044         spin_lock_init(&qi->q_lock);
1045
1046         __dmar_enable_qi(iommu);
1047
1048         return 0;
1049 }
1050
1051 /* iommu interrupt handling. Most stuff are MSI-like. */
1052
1053 enum faulttype {
1054         DMA_REMAP,
1055         INTR_REMAP,
1056         UNKNOWN,
1057 };
1058
1059 static const char *dma_remap_fault_reasons[] =
1060 {
1061         "Software",
1062         "Present bit in root entry is clear",
1063         "Present bit in context entry is clear",
1064         "Invalid context entry",
1065         "Access beyond MGAW",
1066         "PTE Write access is not set",
1067         "PTE Read access is not set",
1068         "Next page table ptr is invalid",
1069         "Root table address invalid",
1070         "Context table ptr is invalid",
1071         "non-zero reserved fields in RTP",
1072         "non-zero reserved fields in CTP",
1073         "non-zero reserved fields in PTE",
1074 };
1075
1076 static const char *intr_remap_fault_reasons[] =
1077 {
1078         "Detected reserved fields in the decoded interrupt-remapped request",
1079         "Interrupt index exceeded the interrupt-remapping table size",
1080         "Present field in the IRTE entry is clear",
1081         "Error accessing interrupt-remapping table pointed by IRTA_REG",
1082         "Detected reserved fields in the IRTE entry",
1083         "Blocked a compatibility format interrupt request",
1084         "Blocked an interrupt request due to source-id verification failure",
1085 };
1086
1087 #define MAX_FAULT_REASON_IDX    (ARRAY_SIZE(fault_reason_strings) - 1)
1088
1089 const char *dmar_get_fault_reason(u8 fault_reason, int *fault_type)
1090 {
1091         if (fault_reason >= 0x20 && (fault_reason <= 0x20 +
1092                                      ARRAY_SIZE(intr_remap_fault_reasons))) {
1093                 *fault_type = INTR_REMAP;
1094                 return intr_remap_fault_reasons[fault_reason - 0x20];
1095         } else if (fault_reason < ARRAY_SIZE(dma_remap_fault_reasons)) {
1096                 *fault_type = DMA_REMAP;
1097                 return dma_remap_fault_reasons[fault_reason];
1098         } else {
1099                 *fault_type = UNKNOWN;
1100                 return "Unknown";
1101         }
1102 }
1103
1104 void dmar_msi_unmask(unsigned int irq)
1105 {
1106         struct intel_iommu *iommu = get_irq_data(irq);
1107         unsigned long flag;
1108
1109         /* unmask it */
1110         spin_lock_irqsave(&iommu->register_lock, flag);
1111         writel(0, iommu->reg + DMAR_FECTL_REG);
1112         /* Read a reg to force flush the post write */
1113         readl(iommu->reg + DMAR_FECTL_REG);
1114         spin_unlock_irqrestore(&iommu->register_lock, flag);
1115 }
1116
1117 void dmar_msi_mask(unsigned int irq)
1118 {
1119         unsigned long flag;
1120         struct intel_iommu *iommu = get_irq_data(irq);
1121
1122         /* mask it */
1123         spin_lock_irqsave(&iommu->register_lock, flag);
1124         writel(DMA_FECTL_IM, iommu->reg + DMAR_FECTL_REG);
1125         /* Read a reg to force flush the post write */
1126         readl(iommu->reg + DMAR_FECTL_REG);
1127         spin_unlock_irqrestore(&iommu->register_lock, flag);
1128 }
1129
1130 void dmar_msi_write(int irq, struct msi_msg *msg)
1131 {
1132         struct intel_iommu *iommu = get_irq_data(irq);
1133         unsigned long flag;
1134
1135         spin_lock_irqsave(&iommu->register_lock, flag);
1136         writel(msg->data, iommu->reg + DMAR_FEDATA_REG);
1137         writel(msg->address_lo, iommu->reg + DMAR_FEADDR_REG);
1138         writel(msg->address_hi, iommu->reg + DMAR_FEUADDR_REG);
1139         spin_unlock_irqrestore(&iommu->register_lock, flag);
1140 }
1141
1142 void dmar_msi_read(int irq, struct msi_msg *msg)
1143 {
1144         struct intel_iommu *iommu = get_irq_data(irq);
1145         unsigned long flag;
1146
1147         spin_lock_irqsave(&iommu->register_lock, flag);
1148         msg->data = readl(iommu->reg + DMAR_FEDATA_REG);
1149         msg->address_lo = readl(iommu->reg + DMAR_FEADDR_REG);
1150         msg->address_hi = readl(iommu->reg + DMAR_FEUADDR_REG);
1151         spin_unlock_irqrestore(&iommu->register_lock, flag);
1152 }
1153
1154 static int dmar_fault_do_one(struct intel_iommu *iommu, int type,
1155                 u8 fault_reason, u16 source_id, unsigned long long addr)
1156 {
1157         const char *reason;
1158         int fault_type;
1159
1160         reason = dmar_get_fault_reason(fault_reason, &fault_type);
1161
1162         if (fault_type == INTR_REMAP)
1163                 printk(KERN_ERR "INTR-REMAP: Request device [[%02x:%02x.%d] "
1164                        "fault index %llx\n"
1165                         "INTR-REMAP:[fault reason %02d] %s\n",
1166                         (source_id >> 8), PCI_SLOT(source_id & 0xFF),
1167                         PCI_FUNC(source_id & 0xFF), addr >> 48,
1168                         fault_reason, reason);
1169         else
1170                 printk(KERN_ERR
1171                        "DMAR:[%s] Request device [%02x:%02x.%d] "
1172                        "fault addr %llx \n"
1173                        "DMAR:[fault reason %02d] %s\n",
1174                        (type ? "DMA Read" : "DMA Write"),
1175                        (source_id >> 8), PCI_SLOT(source_id & 0xFF),
1176                        PCI_FUNC(source_id & 0xFF), addr, fault_reason, reason);
1177         return 0;
1178 }
1179
1180 #define PRIMARY_FAULT_REG_LEN (16)
1181 irqreturn_t dmar_fault(int irq, void *dev_id)
1182 {
1183         struct intel_iommu *iommu = dev_id;
1184         int reg, fault_index;
1185         u32 fault_status;
1186         unsigned long flag;
1187
1188         spin_lock_irqsave(&iommu->register_lock, flag);
1189         fault_status = readl(iommu->reg + DMAR_FSTS_REG);
1190         if (fault_status)
1191                 printk(KERN_ERR "DRHD: handling fault status reg %x\n",
1192                        fault_status);
1193
1194         /* TBD: ignore advanced fault log currently */
1195         if (!(fault_status & DMA_FSTS_PPF))
1196                 goto clear_rest;
1197
1198         fault_index = dma_fsts_fault_record_index(fault_status);
1199         reg = cap_fault_reg_offset(iommu->cap);
1200         while (1) {
1201                 u8 fault_reason;
1202                 u16 source_id;
1203                 u64 guest_addr;
1204                 int type;
1205                 u32 data;
1206
1207                 /* highest 32 bits */
1208                 data = readl(iommu->reg + reg +
1209                                 fault_index * PRIMARY_FAULT_REG_LEN + 12);
1210                 if (!(data & DMA_FRCD_F))
1211                         break;
1212
1213                 fault_reason = dma_frcd_fault_reason(data);
1214                 type = dma_frcd_type(data);
1215
1216                 data = readl(iommu->reg + reg +
1217                                 fault_index * PRIMARY_FAULT_REG_LEN + 8);
1218                 source_id = dma_frcd_source_id(data);
1219
1220                 guest_addr = dmar_readq(iommu->reg + reg +
1221                                 fault_index * PRIMARY_FAULT_REG_LEN);
1222                 guest_addr = dma_frcd_page_addr(guest_addr);
1223                 /* clear the fault */
1224                 writel(DMA_FRCD_F, iommu->reg + reg +
1225                         fault_index * PRIMARY_FAULT_REG_LEN + 12);
1226
1227                 spin_unlock_irqrestore(&iommu->register_lock, flag);
1228
1229                 dmar_fault_do_one(iommu, type, fault_reason,
1230                                 source_id, guest_addr);
1231
1232                 fault_index++;
1233                 if (fault_index >= cap_num_fault_regs(iommu->cap))
1234                         fault_index = 0;
1235                 spin_lock_irqsave(&iommu->register_lock, flag);
1236         }
1237 clear_rest:
1238         /* clear all the other faults */
1239         fault_status = readl(iommu->reg + DMAR_FSTS_REG);
1240         writel(fault_status, iommu->reg + DMAR_FSTS_REG);
1241
1242         spin_unlock_irqrestore(&iommu->register_lock, flag);
1243         return IRQ_HANDLED;
1244 }
1245
1246 int dmar_set_interrupt(struct intel_iommu *iommu)
1247 {
1248         int irq, ret;
1249
1250         /*
1251          * Check if the fault interrupt is already initialized.
1252          */
1253         if (iommu->irq)
1254                 return 0;
1255
1256         irq = create_irq();
1257         if (!irq) {
1258                 printk(KERN_ERR "IOMMU: no free vectors\n");
1259                 return -EINVAL;
1260         }
1261
1262         set_irq_data(irq, iommu);
1263         iommu->irq = irq;
1264
1265         ret = arch_setup_dmar_msi(irq);
1266         if (ret) {
1267                 set_irq_data(irq, NULL);
1268                 iommu->irq = 0;
1269                 destroy_irq(irq);
1270                 return ret;
1271         }
1272
1273         ret = request_irq(irq, dmar_fault, 0, iommu->name, iommu);
1274         if (ret)
1275                 printk(KERN_ERR "IOMMU: can't request irq\n");
1276         return ret;
1277 }
1278
1279 int __init enable_drhd_fault_handling(void)
1280 {
1281         struct dmar_drhd_unit *drhd;
1282
1283         /*
1284          * Enable fault control interrupt.
1285          */
1286         for_each_drhd_unit(drhd) {
1287                 int ret;
1288                 struct intel_iommu *iommu = drhd->iommu;
1289                 ret = dmar_set_interrupt(iommu);
1290
1291                 if (ret) {
1292                         printk(KERN_ERR "DRHD %Lx: failed to enable fault, "
1293                                " interrupt, ret %d\n",
1294                                (unsigned long long)drhd->reg_base_addr, ret);
1295                         return -1;
1296                 }
1297         }
1298
1299         return 0;
1300 }
1301
1302 /*
1303  * Re-enable Queued Invalidation interface.
1304  */
1305 int dmar_reenable_qi(struct intel_iommu *iommu)
1306 {
1307         if (!ecap_qis(iommu->ecap))
1308                 return -ENOENT;
1309
1310         if (!iommu->qi)
1311                 return -ENOENT;
1312
1313         /*
1314          * First disable queued invalidation.
1315          */
1316         dmar_disable_qi(iommu);
1317         /*
1318          * Then enable queued invalidation again. Since there is no pending
1319          * invalidation requests now, it's safe to re-enable queued
1320          * invalidation.
1321          */
1322         __dmar_enable_qi(iommu);
1323
1324         return 0;
1325 }
1326
1327 /*
1328  * Check interrupt remapping support in DMAR table description.
1329  */
1330 int dmar_ir_support(void)
1331 {
1332         struct acpi_table_dmar *dmar;
1333         dmar = (struct acpi_table_dmar *)dmar_tbl;
1334         return dmar->flags & 0x1;
1335 }