time: Revert ALWAYS_USE_PERSISTENT_CLOCK compile time optimizaitons
[platform/adaptation/renesas_rcar/renesas_kernel.git] / drivers / iommu / dmar.c
1 /*
2  * Copyright (c) 2006, Intel Corporation.
3  *
4  * This program is free software; you can redistribute it and/or modify it
5  * under the terms and conditions of the GNU General Public License,
6  * version 2, as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope it will be useful, but WITHOUT
9  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
11  * more details.
12  *
13  * You should have received a copy of the GNU General Public License along with
14  * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
15  * Place - Suite 330, Boston, MA 02111-1307 USA.
16  *
17  * Copyright (C) 2006-2008 Intel Corporation
18  * Author: Ashok Raj <ashok.raj@intel.com>
19  * Author: Shaohua Li <shaohua.li@intel.com>
20  * Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
21  *
22  * This file implements early detection/parsing of Remapping Devices
23  * reported to OS through BIOS via DMA remapping reporting (DMAR) ACPI
24  * tables.
25  *
26  * These routines are used by both DMA-remapping and Interrupt-remapping
27  */
28
29 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt /* has to precede printk.h */
30
31 #include <linux/pci.h>
32 #include <linux/dmar.h>
33 #include <linux/iova.h>
34 #include <linux/intel-iommu.h>
35 #include <linux/timer.h>
36 #include <linux/irq.h>
37 #include <linux/interrupt.h>
38 #include <linux/tboot.h>
39 #include <linux/dmi.h>
40 #include <linux/slab.h>
41 #include <asm/irq_remapping.h>
42 #include <asm/iommu_table.h>
43
44 #include "irq_remapping.h"
45
46 /* No locks are needed as DMA remapping hardware unit
47  * list is constructed at boot time and hotplug of
48  * these units are not supported by the architecture.
49  */
50 LIST_HEAD(dmar_drhd_units);
51
52 struct acpi_table_header * __initdata dmar_tbl;
53 static acpi_size dmar_tbl_size;
54
55 static void __init dmar_register_drhd_unit(struct dmar_drhd_unit *drhd)
56 {
57         /*
58          * add INCLUDE_ALL at the tail, so scan the list will find it at
59          * the very end.
60          */
61         if (drhd->include_all)
62                 list_add_tail(&drhd->list, &dmar_drhd_units);
63         else
64                 list_add(&drhd->list, &dmar_drhd_units);
65 }
66
67 static int __init dmar_parse_one_dev_scope(struct acpi_dmar_device_scope *scope,
68                                            struct pci_dev **dev, u16 segment)
69 {
70         struct pci_bus *bus;
71         struct pci_dev *pdev = NULL;
72         struct acpi_dmar_pci_path *path;
73         int count;
74
75         bus = pci_find_bus(segment, scope->bus);
76         path = (struct acpi_dmar_pci_path *)(scope + 1);
77         count = (scope->length - sizeof(struct acpi_dmar_device_scope))
78                 / sizeof(struct acpi_dmar_pci_path);
79
80         while (count) {
81                 if (pdev)
82                         pci_dev_put(pdev);
83                 /*
84                  * Some BIOSes list non-exist devices in DMAR table, just
85                  * ignore it
86                  */
87                 if (!bus) {
88                         pr_warn("Device scope bus [%d] not found\n", scope->bus);
89                         break;
90                 }
91                 pdev = pci_get_slot(bus, PCI_DEVFN(path->dev, path->fn));
92                 if (!pdev) {
93                         /* warning will be printed below */
94                         break;
95                 }
96                 path ++;
97                 count --;
98                 bus = pdev->subordinate;
99         }
100         if (!pdev) {
101                 pr_warn("Device scope device [%04x:%02x:%02x.%02x] not found\n",
102                         segment, scope->bus, path->dev, path->fn);
103                 *dev = NULL;
104                 return 0;
105         }
106         if ((scope->entry_type == ACPI_DMAR_SCOPE_TYPE_ENDPOINT && \
107                         pdev->subordinate) || (scope->entry_type == \
108                         ACPI_DMAR_SCOPE_TYPE_BRIDGE && !pdev->subordinate)) {
109                 pci_dev_put(pdev);
110                 pr_warn("Device scope type does not match for %s\n",
111                         pci_name(pdev));
112                 return -EINVAL;
113         }
114         *dev = pdev;
115         return 0;
116 }
117
118 int __init dmar_parse_dev_scope(void *start, void *end, int *cnt,
119                                 struct pci_dev ***devices, u16 segment)
120 {
121         struct acpi_dmar_device_scope *scope;
122         void * tmp = start;
123         int index;
124         int ret;
125
126         *cnt = 0;
127         while (start < end) {
128                 scope = start;
129                 if (scope->entry_type == ACPI_DMAR_SCOPE_TYPE_ENDPOINT ||
130                     scope->entry_type == ACPI_DMAR_SCOPE_TYPE_BRIDGE)
131                         (*cnt)++;
132                 else if (scope->entry_type != ACPI_DMAR_SCOPE_TYPE_IOAPIC &&
133                         scope->entry_type != ACPI_DMAR_SCOPE_TYPE_HPET) {
134                         pr_warn("Unsupported device scope\n");
135                 }
136                 start += scope->length;
137         }
138         if (*cnt == 0)
139                 return 0;
140
141         *devices = kcalloc(*cnt, sizeof(struct pci_dev *), GFP_KERNEL);
142         if (!*devices)
143                 return -ENOMEM;
144
145         start = tmp;
146         index = 0;
147         while (start < end) {
148                 scope = start;
149                 if (scope->entry_type == ACPI_DMAR_SCOPE_TYPE_ENDPOINT ||
150                     scope->entry_type == ACPI_DMAR_SCOPE_TYPE_BRIDGE) {
151                         ret = dmar_parse_one_dev_scope(scope,
152                                 &(*devices)[index], segment);
153                         if (ret) {
154                                 kfree(*devices);
155                                 return ret;
156                         }
157                         index ++;
158                 }
159                 start += scope->length;
160         }
161
162         return 0;
163 }
164
165 /**
166  * dmar_parse_one_drhd - parses exactly one DMA remapping hardware definition
167  * structure which uniquely represent one DMA remapping hardware unit
168  * present in the platform
169  */
170 static int __init
171 dmar_parse_one_drhd(struct acpi_dmar_header *header)
172 {
173         struct acpi_dmar_hardware_unit *drhd;
174         struct dmar_drhd_unit *dmaru;
175         int ret = 0;
176
177         drhd = (struct acpi_dmar_hardware_unit *)header;
178         dmaru = kzalloc(sizeof(*dmaru), GFP_KERNEL);
179         if (!dmaru)
180                 return -ENOMEM;
181
182         dmaru->hdr = header;
183         dmaru->reg_base_addr = drhd->address;
184         dmaru->segment = drhd->segment;
185         dmaru->include_all = drhd->flags & 0x1; /* BIT0: INCLUDE_ALL */
186
187         ret = alloc_iommu(dmaru);
188         if (ret) {
189                 kfree(dmaru);
190                 return ret;
191         }
192         dmar_register_drhd_unit(dmaru);
193         return 0;
194 }
195
196 static int __init dmar_parse_dev(struct dmar_drhd_unit *dmaru)
197 {
198         struct acpi_dmar_hardware_unit *drhd;
199         int ret = 0;
200
201         drhd = (struct acpi_dmar_hardware_unit *) dmaru->hdr;
202
203         if (dmaru->include_all)
204                 return 0;
205
206         ret = dmar_parse_dev_scope((void *)(drhd + 1),
207                                 ((void *)drhd) + drhd->header.length,
208                                 &dmaru->devices_cnt, &dmaru->devices,
209                                 drhd->segment);
210         if (ret) {
211                 list_del(&dmaru->list);
212                 kfree(dmaru);
213         }
214         return ret;
215 }
216
217 #ifdef CONFIG_ACPI_NUMA
218 static int __init
219 dmar_parse_one_rhsa(struct acpi_dmar_header *header)
220 {
221         struct acpi_dmar_rhsa *rhsa;
222         struct dmar_drhd_unit *drhd;
223
224         rhsa = (struct acpi_dmar_rhsa *)header;
225         for_each_drhd_unit(drhd) {
226                 if (drhd->reg_base_addr == rhsa->base_address) {
227                         int node = acpi_map_pxm_to_node(rhsa->proximity_domain);
228
229                         if (!node_online(node))
230                                 node = -1;
231                         drhd->iommu->node = node;
232                         return 0;
233                 }
234         }
235         WARN_TAINT(
236                 1, TAINT_FIRMWARE_WORKAROUND,
237                 "Your BIOS is broken; RHSA refers to non-existent DMAR unit at %llx\n"
238                 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
239                 drhd->reg_base_addr,
240                 dmi_get_system_info(DMI_BIOS_VENDOR),
241                 dmi_get_system_info(DMI_BIOS_VERSION),
242                 dmi_get_system_info(DMI_PRODUCT_VERSION));
243
244         return 0;
245 }
246 #endif
247
248 static void __init
249 dmar_table_print_dmar_entry(struct acpi_dmar_header *header)
250 {
251         struct acpi_dmar_hardware_unit *drhd;
252         struct acpi_dmar_reserved_memory *rmrr;
253         struct acpi_dmar_atsr *atsr;
254         struct acpi_dmar_rhsa *rhsa;
255
256         switch (header->type) {
257         case ACPI_DMAR_TYPE_HARDWARE_UNIT:
258                 drhd = container_of(header, struct acpi_dmar_hardware_unit,
259                                     header);
260                 pr_info("DRHD base: %#016Lx flags: %#x\n",
261                         (unsigned long long)drhd->address, drhd->flags);
262                 break;
263         case ACPI_DMAR_TYPE_RESERVED_MEMORY:
264                 rmrr = container_of(header, struct acpi_dmar_reserved_memory,
265                                     header);
266                 pr_info("RMRR base: %#016Lx end: %#016Lx\n",
267                         (unsigned long long)rmrr->base_address,
268                         (unsigned long long)rmrr->end_address);
269                 break;
270         case ACPI_DMAR_TYPE_ATSR:
271                 atsr = container_of(header, struct acpi_dmar_atsr, header);
272                 pr_info("ATSR flags: %#x\n", atsr->flags);
273                 break;
274         case ACPI_DMAR_HARDWARE_AFFINITY:
275                 rhsa = container_of(header, struct acpi_dmar_rhsa, header);
276                 pr_info("RHSA base: %#016Lx proximity domain: %#x\n",
277                        (unsigned long long)rhsa->base_address,
278                        rhsa->proximity_domain);
279                 break;
280         }
281 }
282
283 /**
284  * dmar_table_detect - checks to see if the platform supports DMAR devices
285  */
286 static int __init dmar_table_detect(void)
287 {
288         acpi_status status = AE_OK;
289
290         /* if we could find DMAR table, then there are DMAR devices */
291         status = acpi_get_table_with_size(ACPI_SIG_DMAR, 0,
292                                 (struct acpi_table_header **)&dmar_tbl,
293                                 &dmar_tbl_size);
294
295         if (ACPI_SUCCESS(status) && !dmar_tbl) {
296                 pr_warn("Unable to map DMAR\n");
297                 status = AE_NOT_FOUND;
298         }
299
300         return (ACPI_SUCCESS(status) ? 1 : 0);
301 }
302
303 /**
304  * parse_dmar_table - parses the DMA reporting table
305  */
306 static int __init
307 parse_dmar_table(void)
308 {
309         struct acpi_table_dmar *dmar;
310         struct acpi_dmar_header *entry_header;
311         int ret = 0;
312
313         /*
314          * Do it again, earlier dmar_tbl mapping could be mapped with
315          * fixed map.
316          */
317         dmar_table_detect();
318
319         /*
320          * ACPI tables may not be DMA protected by tboot, so use DMAR copy
321          * SINIT saved in SinitMleData in TXT heap (which is DMA protected)
322          */
323         dmar_tbl = tboot_get_dmar_table(dmar_tbl);
324
325         dmar = (struct acpi_table_dmar *)dmar_tbl;
326         if (!dmar)
327                 return -ENODEV;
328
329         if (dmar->width < PAGE_SHIFT - 1) {
330                 pr_warn("Invalid DMAR haw\n");
331                 return -EINVAL;
332         }
333
334         pr_info("Host address width %d\n", dmar->width + 1);
335
336         entry_header = (struct acpi_dmar_header *)(dmar + 1);
337         while (((unsigned long)entry_header) <
338                         (((unsigned long)dmar) + dmar_tbl->length)) {
339                 /* Avoid looping forever on bad ACPI tables */
340                 if (entry_header->length == 0) {
341                         pr_warn("Invalid 0-length structure\n");
342                         ret = -EINVAL;
343                         break;
344                 }
345
346                 dmar_table_print_dmar_entry(entry_header);
347
348                 switch (entry_header->type) {
349                 case ACPI_DMAR_TYPE_HARDWARE_UNIT:
350                         ret = dmar_parse_one_drhd(entry_header);
351                         break;
352                 case ACPI_DMAR_TYPE_RESERVED_MEMORY:
353                         ret = dmar_parse_one_rmrr(entry_header);
354                         break;
355                 case ACPI_DMAR_TYPE_ATSR:
356                         ret = dmar_parse_one_atsr(entry_header);
357                         break;
358                 case ACPI_DMAR_HARDWARE_AFFINITY:
359 #ifdef CONFIG_ACPI_NUMA
360                         ret = dmar_parse_one_rhsa(entry_header);
361 #endif
362                         break;
363                 default:
364                         pr_warn("Unknown DMAR structure type %d\n",
365                                 entry_header->type);
366                         ret = 0; /* for forward compatibility */
367                         break;
368                 }
369                 if (ret)
370                         break;
371
372                 entry_header = ((void *)entry_header + entry_header->length);
373         }
374         return ret;
375 }
376
377 static int dmar_pci_device_match(struct pci_dev *devices[], int cnt,
378                           struct pci_dev *dev)
379 {
380         int index;
381
382         while (dev) {
383                 for (index = 0; index < cnt; index++)
384                         if (dev == devices[index])
385                                 return 1;
386
387                 /* Check our parent */
388                 dev = dev->bus->self;
389         }
390
391         return 0;
392 }
393
394 struct dmar_drhd_unit *
395 dmar_find_matched_drhd_unit(struct pci_dev *dev)
396 {
397         struct dmar_drhd_unit *dmaru = NULL;
398         struct acpi_dmar_hardware_unit *drhd;
399
400         dev = pci_physfn(dev);
401
402         list_for_each_entry(dmaru, &dmar_drhd_units, list) {
403                 drhd = container_of(dmaru->hdr,
404                                     struct acpi_dmar_hardware_unit,
405                                     header);
406
407                 if (dmaru->include_all &&
408                     drhd->segment == pci_domain_nr(dev->bus))
409                         return dmaru;
410
411                 if (dmar_pci_device_match(dmaru->devices,
412                                           dmaru->devices_cnt, dev))
413                         return dmaru;
414         }
415
416         return NULL;
417 }
418
419 int __init dmar_dev_scope_init(void)
420 {
421         static int dmar_dev_scope_initialized;
422         struct dmar_drhd_unit *drhd, *drhd_n;
423         int ret = -ENODEV;
424
425         if (dmar_dev_scope_initialized)
426                 return dmar_dev_scope_initialized;
427
428         if (list_empty(&dmar_drhd_units))
429                 goto fail;
430
431         list_for_each_entry_safe(drhd, drhd_n, &dmar_drhd_units, list) {
432                 ret = dmar_parse_dev(drhd);
433                 if (ret)
434                         goto fail;
435         }
436
437         ret = dmar_parse_rmrr_atsr_dev();
438         if (ret)
439                 goto fail;
440
441         dmar_dev_scope_initialized = 1;
442         return 0;
443
444 fail:
445         dmar_dev_scope_initialized = ret;
446         return ret;
447 }
448
449
450 int __init dmar_table_init(void)
451 {
452         static int dmar_table_initialized;
453         int ret;
454
455         if (dmar_table_initialized)
456                 return 0;
457
458         dmar_table_initialized = 1;
459
460         ret = parse_dmar_table();
461         if (ret) {
462                 if (ret != -ENODEV)
463                         pr_info("parse DMAR table failure.\n");
464                 return ret;
465         }
466
467         if (list_empty(&dmar_drhd_units)) {
468                 pr_info("No DMAR devices found\n");
469                 return -ENODEV;
470         }
471
472         return 0;
473 }
474
475 static void warn_invalid_dmar(u64 addr, const char *message)
476 {
477         WARN_TAINT_ONCE(
478                 1, TAINT_FIRMWARE_WORKAROUND,
479                 "Your BIOS is broken; DMAR reported at address %llx%s!\n"
480                 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
481                 addr, message,
482                 dmi_get_system_info(DMI_BIOS_VENDOR),
483                 dmi_get_system_info(DMI_BIOS_VERSION),
484                 dmi_get_system_info(DMI_PRODUCT_VERSION));
485 }
486
487 int __init check_zero_address(void)
488 {
489         struct acpi_table_dmar *dmar;
490         struct acpi_dmar_header *entry_header;
491         struct acpi_dmar_hardware_unit *drhd;
492
493         dmar = (struct acpi_table_dmar *)dmar_tbl;
494         entry_header = (struct acpi_dmar_header *)(dmar + 1);
495
496         while (((unsigned long)entry_header) <
497                         (((unsigned long)dmar) + dmar_tbl->length)) {
498                 /* Avoid looping forever on bad ACPI tables */
499                 if (entry_header->length == 0) {
500                         pr_warn("Invalid 0-length structure\n");
501                         return 0;
502                 }
503
504                 if (entry_header->type == ACPI_DMAR_TYPE_HARDWARE_UNIT) {
505                         void __iomem *addr;
506                         u64 cap, ecap;
507
508                         drhd = (void *)entry_header;
509                         if (!drhd->address) {
510                                 warn_invalid_dmar(0, "");
511                                 goto failed;
512                         }
513
514                         addr = early_ioremap(drhd->address, VTD_PAGE_SIZE);
515                         if (!addr ) {
516                                 printk("IOMMU: can't validate: %llx\n", drhd->address);
517                                 goto failed;
518                         }
519                         cap = dmar_readq(addr + DMAR_CAP_REG);
520                         ecap = dmar_readq(addr + DMAR_ECAP_REG);
521                         early_iounmap(addr, VTD_PAGE_SIZE);
522                         if (cap == (uint64_t)-1 && ecap == (uint64_t)-1) {
523                                 warn_invalid_dmar(drhd->address,
524                                                   " returns all ones");
525                                 goto failed;
526                         }
527                 }
528
529                 entry_header = ((void *)entry_header + entry_header->length);
530         }
531         return 1;
532
533 failed:
534         return 0;
535 }
536
537 int __init detect_intel_iommu(void)
538 {
539         int ret;
540
541         ret = dmar_table_detect();
542         if (ret)
543                 ret = check_zero_address();
544         {
545                 struct acpi_table_dmar *dmar;
546
547                 dmar = (struct acpi_table_dmar *) dmar_tbl;
548
549                 if (ret && irq_remapping_enabled && cpu_has_x2apic &&
550                     dmar->flags & 0x1)
551                         pr_info("Queued invalidation will be enabled to support x2apic and Intr-remapping.\n");
552
553                 if (ret && !no_iommu && !iommu_detected && !dmar_disabled) {
554                         iommu_detected = 1;
555                         /* Make sure ACS will be enabled */
556                         pci_request_acs();
557                 }
558
559 #ifdef CONFIG_X86
560                 if (ret)
561                         x86_init.iommu.iommu_init = intel_iommu_init;
562 #endif
563         }
564         early_acpi_os_unmap_memory(dmar_tbl, dmar_tbl_size);
565         dmar_tbl = NULL;
566
567         return ret ? 1 : -ENODEV;
568 }
569
570
571 static void unmap_iommu(struct intel_iommu *iommu)
572 {
573         iounmap(iommu->reg);
574         release_mem_region(iommu->reg_phys, iommu->reg_size);
575 }
576
577 /**
578  * map_iommu: map the iommu's registers
579  * @iommu: the iommu to map
580  * @phys_addr: the physical address of the base resgister
581  *
582  * Memory map the iommu's registers.  Start w/ a single page, and
583  * possibly expand if that turns out to be insufficent.
584  */
585 static int map_iommu(struct intel_iommu *iommu, u64 phys_addr)
586 {
587         int map_size, err=0;
588
589         iommu->reg_phys = phys_addr;
590         iommu->reg_size = VTD_PAGE_SIZE;
591
592         if (!request_mem_region(iommu->reg_phys, iommu->reg_size, iommu->name)) {
593                 pr_err("IOMMU: can't reserve memory\n");
594                 err = -EBUSY;
595                 goto out;
596         }
597
598         iommu->reg = ioremap(iommu->reg_phys, iommu->reg_size);
599         if (!iommu->reg) {
600                 pr_err("IOMMU: can't map the region\n");
601                 err = -ENOMEM;
602                 goto release;
603         }
604
605         iommu->cap = dmar_readq(iommu->reg + DMAR_CAP_REG);
606         iommu->ecap = dmar_readq(iommu->reg + DMAR_ECAP_REG);
607
608         if (iommu->cap == (uint64_t)-1 && iommu->ecap == (uint64_t)-1) {
609                 err = -EINVAL;
610                 warn_invalid_dmar(phys_addr, " returns all ones");
611                 goto unmap;
612         }
613
614         /* the registers might be more than one page */
615         map_size = max_t(int, ecap_max_iotlb_offset(iommu->ecap),
616                          cap_max_fault_reg_offset(iommu->cap));
617         map_size = VTD_PAGE_ALIGN(map_size);
618         if (map_size > iommu->reg_size) {
619                 iounmap(iommu->reg);
620                 release_mem_region(iommu->reg_phys, iommu->reg_size);
621                 iommu->reg_size = map_size;
622                 if (!request_mem_region(iommu->reg_phys, iommu->reg_size,
623                                         iommu->name)) {
624                         pr_err("IOMMU: can't reserve memory\n");
625                         err = -EBUSY;
626                         goto out;
627                 }
628                 iommu->reg = ioremap(iommu->reg_phys, iommu->reg_size);
629                 if (!iommu->reg) {
630                         pr_err("IOMMU: can't map the region\n");
631                         err = -ENOMEM;
632                         goto release;
633                 }
634         }
635         err = 0;
636         goto out;
637
638 unmap:
639         iounmap(iommu->reg);
640 release:
641         release_mem_region(iommu->reg_phys, iommu->reg_size);
642 out:
643         return err;
644 }
645
646 int alloc_iommu(struct dmar_drhd_unit *drhd)
647 {
648         struct intel_iommu *iommu;
649         u32 ver;
650         static int iommu_allocated = 0;
651         int agaw = 0;
652         int msagaw = 0;
653         int err;
654
655         if (!drhd->reg_base_addr) {
656                 warn_invalid_dmar(0, "");
657                 return -EINVAL;
658         }
659
660         iommu = kzalloc(sizeof(*iommu), GFP_KERNEL);
661         if (!iommu)
662                 return -ENOMEM;
663
664         iommu->seq_id = iommu_allocated++;
665         sprintf (iommu->name, "dmar%d", iommu->seq_id);
666
667         err = map_iommu(iommu, drhd->reg_base_addr);
668         if (err) {
669                 pr_err("IOMMU: failed to map %s\n", iommu->name);
670                 goto error;
671         }
672
673         err = -EINVAL;
674         agaw = iommu_calculate_agaw(iommu);
675         if (agaw < 0) {
676                 pr_err("Cannot get a valid agaw for iommu (seq_id = %d)\n",
677                         iommu->seq_id);
678                 goto err_unmap;
679         }
680         msagaw = iommu_calculate_max_sagaw(iommu);
681         if (msagaw < 0) {
682                 pr_err("Cannot get a valid max agaw for iommu (seq_id = %d)\n",
683                         iommu->seq_id);
684                 goto err_unmap;
685         }
686         iommu->agaw = agaw;
687         iommu->msagaw = msagaw;
688
689         iommu->node = -1;
690
691         ver = readl(iommu->reg + DMAR_VER_REG);
692         pr_info("IOMMU %d: reg_base_addr %llx ver %d:%d cap %llx ecap %llx\n",
693                 iommu->seq_id,
694                 (unsigned long long)drhd->reg_base_addr,
695                 DMAR_VER_MAJOR(ver), DMAR_VER_MINOR(ver),
696                 (unsigned long long)iommu->cap,
697                 (unsigned long long)iommu->ecap);
698
699         raw_spin_lock_init(&iommu->register_lock);
700
701         drhd->iommu = iommu;
702         return 0;
703
704  err_unmap:
705         unmap_iommu(iommu);
706  error:
707         kfree(iommu);
708         return err;
709 }
710
711 void free_iommu(struct intel_iommu *iommu)
712 {
713         if (!iommu)
714                 return;
715
716         free_dmar_iommu(iommu);
717
718         if (iommu->reg)
719                 unmap_iommu(iommu);
720
721         kfree(iommu);
722 }
723
724 /*
725  * Reclaim all the submitted descriptors which have completed its work.
726  */
727 static inline void reclaim_free_desc(struct q_inval *qi)
728 {
729         while (qi->desc_status[qi->free_tail] == QI_DONE ||
730                qi->desc_status[qi->free_tail] == QI_ABORT) {
731                 qi->desc_status[qi->free_tail] = QI_FREE;
732                 qi->free_tail = (qi->free_tail + 1) % QI_LENGTH;
733                 qi->free_cnt++;
734         }
735 }
736
737 static int qi_check_fault(struct intel_iommu *iommu, int index)
738 {
739         u32 fault;
740         int head, tail;
741         struct q_inval *qi = iommu->qi;
742         int wait_index = (index + 1) % QI_LENGTH;
743
744         if (qi->desc_status[wait_index] == QI_ABORT)
745                 return -EAGAIN;
746
747         fault = readl(iommu->reg + DMAR_FSTS_REG);
748
749         /*
750          * If IQE happens, the head points to the descriptor associated
751          * with the error. No new descriptors are fetched until the IQE
752          * is cleared.
753          */
754         if (fault & DMA_FSTS_IQE) {
755                 head = readl(iommu->reg + DMAR_IQH_REG);
756                 if ((head >> DMAR_IQ_SHIFT) == index) {
757                         pr_err("VT-d detected invalid descriptor: "
758                                 "low=%llx, high=%llx\n",
759                                 (unsigned long long)qi->desc[index].low,
760                                 (unsigned long long)qi->desc[index].high);
761                         memcpy(&qi->desc[index], &qi->desc[wait_index],
762                                         sizeof(struct qi_desc));
763                         __iommu_flush_cache(iommu, &qi->desc[index],
764                                         sizeof(struct qi_desc));
765                         writel(DMA_FSTS_IQE, iommu->reg + DMAR_FSTS_REG);
766                         return -EINVAL;
767                 }
768         }
769
770         /*
771          * If ITE happens, all pending wait_desc commands are aborted.
772          * No new descriptors are fetched until the ITE is cleared.
773          */
774         if (fault & DMA_FSTS_ITE) {
775                 head = readl(iommu->reg + DMAR_IQH_REG);
776                 head = ((head >> DMAR_IQ_SHIFT) - 1 + QI_LENGTH) % QI_LENGTH;
777                 head |= 1;
778                 tail = readl(iommu->reg + DMAR_IQT_REG);
779                 tail = ((tail >> DMAR_IQ_SHIFT) - 1 + QI_LENGTH) % QI_LENGTH;
780
781                 writel(DMA_FSTS_ITE, iommu->reg + DMAR_FSTS_REG);
782
783                 do {
784                         if (qi->desc_status[head] == QI_IN_USE)
785                                 qi->desc_status[head] = QI_ABORT;
786                         head = (head - 2 + QI_LENGTH) % QI_LENGTH;
787                 } while (head != tail);
788
789                 if (qi->desc_status[wait_index] == QI_ABORT)
790                         return -EAGAIN;
791         }
792
793         if (fault & DMA_FSTS_ICE)
794                 writel(DMA_FSTS_ICE, iommu->reg + DMAR_FSTS_REG);
795
796         return 0;
797 }
798
799 /*
800  * Submit the queued invalidation descriptor to the remapping
801  * hardware unit and wait for its completion.
802  */
803 int qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu)
804 {
805         int rc;
806         struct q_inval *qi = iommu->qi;
807         struct qi_desc *hw, wait_desc;
808         int wait_index, index;
809         unsigned long flags;
810
811         if (!qi)
812                 return 0;
813
814         hw = qi->desc;
815
816 restart:
817         rc = 0;
818
819         raw_spin_lock_irqsave(&qi->q_lock, flags);
820         while (qi->free_cnt < 3) {
821                 raw_spin_unlock_irqrestore(&qi->q_lock, flags);
822                 cpu_relax();
823                 raw_spin_lock_irqsave(&qi->q_lock, flags);
824         }
825
826         index = qi->free_head;
827         wait_index = (index + 1) % QI_LENGTH;
828
829         qi->desc_status[index] = qi->desc_status[wait_index] = QI_IN_USE;
830
831         hw[index] = *desc;
832
833         wait_desc.low = QI_IWD_STATUS_DATA(QI_DONE) |
834                         QI_IWD_STATUS_WRITE | QI_IWD_TYPE;
835         wait_desc.high = virt_to_phys(&qi->desc_status[wait_index]);
836
837         hw[wait_index] = wait_desc;
838
839         __iommu_flush_cache(iommu, &hw[index], sizeof(struct qi_desc));
840         __iommu_flush_cache(iommu, &hw[wait_index], sizeof(struct qi_desc));
841
842         qi->free_head = (qi->free_head + 2) % QI_LENGTH;
843         qi->free_cnt -= 2;
844
845         /*
846          * update the HW tail register indicating the presence of
847          * new descriptors.
848          */
849         writel(qi->free_head << DMAR_IQ_SHIFT, iommu->reg + DMAR_IQT_REG);
850
851         while (qi->desc_status[wait_index] != QI_DONE) {
852                 /*
853                  * We will leave the interrupts disabled, to prevent interrupt
854                  * context to queue another cmd while a cmd is already submitted
855                  * and waiting for completion on this cpu. This is to avoid
856                  * a deadlock where the interrupt context can wait indefinitely
857                  * for free slots in the queue.
858                  */
859                 rc = qi_check_fault(iommu, index);
860                 if (rc)
861                         break;
862
863                 raw_spin_unlock(&qi->q_lock);
864                 cpu_relax();
865                 raw_spin_lock(&qi->q_lock);
866         }
867
868         qi->desc_status[index] = QI_DONE;
869
870         reclaim_free_desc(qi);
871         raw_spin_unlock_irqrestore(&qi->q_lock, flags);
872
873         if (rc == -EAGAIN)
874                 goto restart;
875
876         return rc;
877 }
878
879 /*
880  * Flush the global interrupt entry cache.
881  */
882 void qi_global_iec(struct intel_iommu *iommu)
883 {
884         struct qi_desc desc;
885
886         desc.low = QI_IEC_TYPE;
887         desc.high = 0;
888
889         /* should never fail */
890         qi_submit_sync(&desc, iommu);
891 }
892
893 void qi_flush_context(struct intel_iommu *iommu, u16 did, u16 sid, u8 fm,
894                       u64 type)
895 {
896         struct qi_desc desc;
897
898         desc.low = QI_CC_FM(fm) | QI_CC_SID(sid) | QI_CC_DID(did)
899                         | QI_CC_GRAN(type) | QI_CC_TYPE;
900         desc.high = 0;
901
902         qi_submit_sync(&desc, iommu);
903 }
904
905 void qi_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr,
906                     unsigned int size_order, u64 type)
907 {
908         u8 dw = 0, dr = 0;
909
910         struct qi_desc desc;
911         int ih = 0;
912
913         if (cap_write_drain(iommu->cap))
914                 dw = 1;
915
916         if (cap_read_drain(iommu->cap))
917                 dr = 1;
918
919         desc.low = QI_IOTLB_DID(did) | QI_IOTLB_DR(dr) | QI_IOTLB_DW(dw)
920                 | QI_IOTLB_GRAN(type) | QI_IOTLB_TYPE;
921         desc.high = QI_IOTLB_ADDR(addr) | QI_IOTLB_IH(ih)
922                 | QI_IOTLB_AM(size_order);
923
924         qi_submit_sync(&desc, iommu);
925 }
926
927 void qi_flush_dev_iotlb(struct intel_iommu *iommu, u16 sid, u16 qdep,
928                         u64 addr, unsigned mask)
929 {
930         struct qi_desc desc;
931
932         if (mask) {
933                 BUG_ON(addr & ((1 << (VTD_PAGE_SHIFT + mask)) - 1));
934                 addr |= (1 << (VTD_PAGE_SHIFT + mask - 1)) - 1;
935                 desc.high = QI_DEV_IOTLB_ADDR(addr) | QI_DEV_IOTLB_SIZE;
936         } else
937                 desc.high = QI_DEV_IOTLB_ADDR(addr);
938
939         if (qdep >= QI_DEV_IOTLB_MAX_INVS)
940                 qdep = 0;
941
942         desc.low = QI_DEV_IOTLB_SID(sid) | QI_DEV_IOTLB_QDEP(qdep) |
943                    QI_DIOTLB_TYPE;
944
945         qi_submit_sync(&desc, iommu);
946 }
947
948 /*
949  * Disable Queued Invalidation interface.
950  */
951 void dmar_disable_qi(struct intel_iommu *iommu)
952 {
953         unsigned long flags;
954         u32 sts;
955         cycles_t start_time = get_cycles();
956
957         if (!ecap_qis(iommu->ecap))
958                 return;
959
960         raw_spin_lock_irqsave(&iommu->register_lock, flags);
961
962         sts =  dmar_readq(iommu->reg + DMAR_GSTS_REG);
963         if (!(sts & DMA_GSTS_QIES))
964                 goto end;
965
966         /*
967          * Give a chance to HW to complete the pending invalidation requests.
968          */
969         while ((readl(iommu->reg + DMAR_IQT_REG) !=
970                 readl(iommu->reg + DMAR_IQH_REG)) &&
971                 (DMAR_OPERATION_TIMEOUT > (get_cycles() - start_time)))
972                 cpu_relax();
973
974         iommu->gcmd &= ~DMA_GCMD_QIE;
975         writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
976
977         IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, readl,
978                       !(sts & DMA_GSTS_QIES), sts);
979 end:
980         raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
981 }
982
983 /*
984  * Enable queued invalidation.
985  */
986 static void __dmar_enable_qi(struct intel_iommu *iommu)
987 {
988         u32 sts;
989         unsigned long flags;
990         struct q_inval *qi = iommu->qi;
991
992         qi->free_head = qi->free_tail = 0;
993         qi->free_cnt = QI_LENGTH;
994
995         raw_spin_lock_irqsave(&iommu->register_lock, flags);
996
997         /* write zero to the tail reg */
998         writel(0, iommu->reg + DMAR_IQT_REG);
999
1000         dmar_writeq(iommu->reg + DMAR_IQA_REG, virt_to_phys(qi->desc));
1001
1002         iommu->gcmd |= DMA_GCMD_QIE;
1003         writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1004
1005         /* Make sure hardware complete it */
1006         IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, readl, (sts & DMA_GSTS_QIES), sts);
1007
1008         raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
1009 }
1010
1011 /*
1012  * Enable Queued Invalidation interface. This is a must to support
1013  * interrupt-remapping. Also used by DMA-remapping, which replaces
1014  * register based IOTLB invalidation.
1015  */
1016 int dmar_enable_qi(struct intel_iommu *iommu)
1017 {
1018         struct q_inval *qi;
1019         struct page *desc_page;
1020
1021         if (!ecap_qis(iommu->ecap))
1022                 return -ENOENT;
1023
1024         /*
1025          * queued invalidation is already setup and enabled.
1026          */
1027         if (iommu->qi)
1028                 return 0;
1029
1030         iommu->qi = kmalloc(sizeof(*qi), GFP_ATOMIC);
1031         if (!iommu->qi)
1032                 return -ENOMEM;
1033
1034         qi = iommu->qi;
1035
1036
1037         desc_page = alloc_pages_node(iommu->node, GFP_ATOMIC | __GFP_ZERO, 0);
1038         if (!desc_page) {
1039                 kfree(qi);
1040                 iommu->qi = 0;
1041                 return -ENOMEM;
1042         }
1043
1044         qi->desc = page_address(desc_page);
1045
1046         qi->desc_status = kzalloc(QI_LENGTH * sizeof(int), GFP_ATOMIC);
1047         if (!qi->desc_status) {
1048                 free_page((unsigned long) qi->desc);
1049                 kfree(qi);
1050                 iommu->qi = 0;
1051                 return -ENOMEM;
1052         }
1053
1054         qi->free_head = qi->free_tail = 0;
1055         qi->free_cnt = QI_LENGTH;
1056
1057         raw_spin_lock_init(&qi->q_lock);
1058
1059         __dmar_enable_qi(iommu);
1060
1061         return 0;
1062 }
1063
1064 /* iommu interrupt handling. Most stuff are MSI-like. */
1065
1066 enum faulttype {
1067         DMA_REMAP,
1068         INTR_REMAP,
1069         UNKNOWN,
1070 };
1071
1072 static const char *dma_remap_fault_reasons[] =
1073 {
1074         "Software",
1075         "Present bit in root entry is clear",
1076         "Present bit in context entry is clear",
1077         "Invalid context entry",
1078         "Access beyond MGAW",
1079         "PTE Write access is not set",
1080         "PTE Read access is not set",
1081         "Next page table ptr is invalid",
1082         "Root table address invalid",
1083         "Context table ptr is invalid",
1084         "non-zero reserved fields in RTP",
1085         "non-zero reserved fields in CTP",
1086         "non-zero reserved fields in PTE",
1087         "PCE for translation request specifies blocking",
1088 };
1089
1090 static const char *irq_remap_fault_reasons[] =
1091 {
1092         "Detected reserved fields in the decoded interrupt-remapped request",
1093         "Interrupt index exceeded the interrupt-remapping table size",
1094         "Present field in the IRTE entry is clear",
1095         "Error accessing interrupt-remapping table pointed by IRTA_REG",
1096         "Detected reserved fields in the IRTE entry",
1097         "Blocked a compatibility format interrupt request",
1098         "Blocked an interrupt request due to source-id verification failure",
1099 };
1100
1101 #define MAX_FAULT_REASON_IDX    (ARRAY_SIZE(fault_reason_strings) - 1)
1102
1103 const char *dmar_get_fault_reason(u8 fault_reason, int *fault_type)
1104 {
1105         if (fault_reason >= 0x20 && (fault_reason - 0x20 <
1106                                         ARRAY_SIZE(irq_remap_fault_reasons))) {
1107                 *fault_type = INTR_REMAP;
1108                 return irq_remap_fault_reasons[fault_reason - 0x20];
1109         } else if (fault_reason < ARRAY_SIZE(dma_remap_fault_reasons)) {
1110                 *fault_type = DMA_REMAP;
1111                 return dma_remap_fault_reasons[fault_reason];
1112         } else {
1113                 *fault_type = UNKNOWN;
1114                 return "Unknown";
1115         }
1116 }
1117
1118 void dmar_msi_unmask(struct irq_data *data)
1119 {
1120         struct intel_iommu *iommu = irq_data_get_irq_handler_data(data);
1121         unsigned long flag;
1122
1123         /* unmask it */
1124         raw_spin_lock_irqsave(&iommu->register_lock, flag);
1125         writel(0, iommu->reg + DMAR_FECTL_REG);
1126         /* Read a reg to force flush the post write */
1127         readl(iommu->reg + DMAR_FECTL_REG);
1128         raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1129 }
1130
1131 void dmar_msi_mask(struct irq_data *data)
1132 {
1133         unsigned long flag;
1134         struct intel_iommu *iommu = irq_data_get_irq_handler_data(data);
1135
1136         /* mask it */
1137         raw_spin_lock_irqsave(&iommu->register_lock, flag);
1138         writel(DMA_FECTL_IM, iommu->reg + DMAR_FECTL_REG);
1139         /* Read a reg to force flush the post write */
1140         readl(iommu->reg + DMAR_FECTL_REG);
1141         raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1142 }
1143
1144 void dmar_msi_write(int irq, struct msi_msg *msg)
1145 {
1146         struct intel_iommu *iommu = irq_get_handler_data(irq);
1147         unsigned long flag;
1148
1149         raw_spin_lock_irqsave(&iommu->register_lock, flag);
1150         writel(msg->data, iommu->reg + DMAR_FEDATA_REG);
1151         writel(msg->address_lo, iommu->reg + DMAR_FEADDR_REG);
1152         writel(msg->address_hi, iommu->reg + DMAR_FEUADDR_REG);
1153         raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1154 }
1155
1156 void dmar_msi_read(int irq, struct msi_msg *msg)
1157 {
1158         struct intel_iommu *iommu = irq_get_handler_data(irq);
1159         unsigned long flag;
1160
1161         raw_spin_lock_irqsave(&iommu->register_lock, flag);
1162         msg->data = readl(iommu->reg + DMAR_FEDATA_REG);
1163         msg->address_lo = readl(iommu->reg + DMAR_FEADDR_REG);
1164         msg->address_hi = readl(iommu->reg + DMAR_FEUADDR_REG);
1165         raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1166 }
1167
1168 static int dmar_fault_do_one(struct intel_iommu *iommu, int type,
1169                 u8 fault_reason, u16 source_id, unsigned long long addr)
1170 {
1171         const char *reason;
1172         int fault_type;
1173
1174         reason = dmar_get_fault_reason(fault_reason, &fault_type);
1175
1176         if (fault_type == INTR_REMAP)
1177                 pr_err("INTR-REMAP: Request device [[%02x:%02x.%d] "
1178                        "fault index %llx\n"
1179                         "INTR-REMAP:[fault reason %02d] %s\n",
1180                         (source_id >> 8), PCI_SLOT(source_id & 0xFF),
1181                         PCI_FUNC(source_id & 0xFF), addr >> 48,
1182                         fault_reason, reason);
1183         else
1184                 pr_err("DMAR:[%s] Request device [%02x:%02x.%d] "
1185                        "fault addr %llx \n"
1186                        "DMAR:[fault reason %02d] %s\n",
1187                        (type ? "DMA Read" : "DMA Write"),
1188                        (source_id >> 8), PCI_SLOT(source_id & 0xFF),
1189                        PCI_FUNC(source_id & 0xFF), addr, fault_reason, reason);
1190         return 0;
1191 }
1192
1193 #define PRIMARY_FAULT_REG_LEN (16)
1194 irqreturn_t dmar_fault(int irq, void *dev_id)
1195 {
1196         struct intel_iommu *iommu = dev_id;
1197         int reg, fault_index;
1198         u32 fault_status;
1199         unsigned long flag;
1200
1201         raw_spin_lock_irqsave(&iommu->register_lock, flag);
1202         fault_status = readl(iommu->reg + DMAR_FSTS_REG);
1203         if (fault_status)
1204                 pr_err("DRHD: handling fault status reg %x\n", fault_status);
1205
1206         /* TBD: ignore advanced fault log currently */
1207         if (!(fault_status & DMA_FSTS_PPF))
1208                 goto clear_rest;
1209
1210         fault_index = dma_fsts_fault_record_index(fault_status);
1211         reg = cap_fault_reg_offset(iommu->cap);
1212         while (1) {
1213                 u8 fault_reason;
1214                 u16 source_id;
1215                 u64 guest_addr;
1216                 int type;
1217                 u32 data;
1218
1219                 /* highest 32 bits */
1220                 data = readl(iommu->reg + reg +
1221                                 fault_index * PRIMARY_FAULT_REG_LEN + 12);
1222                 if (!(data & DMA_FRCD_F))
1223                         break;
1224
1225                 fault_reason = dma_frcd_fault_reason(data);
1226                 type = dma_frcd_type(data);
1227
1228                 data = readl(iommu->reg + reg +
1229                                 fault_index * PRIMARY_FAULT_REG_LEN + 8);
1230                 source_id = dma_frcd_source_id(data);
1231
1232                 guest_addr = dmar_readq(iommu->reg + reg +
1233                                 fault_index * PRIMARY_FAULT_REG_LEN);
1234                 guest_addr = dma_frcd_page_addr(guest_addr);
1235                 /* clear the fault */
1236                 writel(DMA_FRCD_F, iommu->reg + reg +
1237                         fault_index * PRIMARY_FAULT_REG_LEN + 12);
1238
1239                 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1240
1241                 dmar_fault_do_one(iommu, type, fault_reason,
1242                                 source_id, guest_addr);
1243
1244                 fault_index++;
1245                 if (fault_index >= cap_num_fault_regs(iommu->cap))
1246                         fault_index = 0;
1247                 raw_spin_lock_irqsave(&iommu->register_lock, flag);
1248         }
1249 clear_rest:
1250         /* clear all the other faults */
1251         fault_status = readl(iommu->reg + DMAR_FSTS_REG);
1252         writel(fault_status, iommu->reg + DMAR_FSTS_REG);
1253
1254         raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1255         return IRQ_HANDLED;
1256 }
1257
1258 int dmar_set_interrupt(struct intel_iommu *iommu)
1259 {
1260         int irq, ret;
1261
1262         /*
1263          * Check if the fault interrupt is already initialized.
1264          */
1265         if (iommu->irq)
1266                 return 0;
1267
1268         irq = create_irq();
1269         if (!irq) {
1270                 pr_err("IOMMU: no free vectors\n");
1271                 return -EINVAL;
1272         }
1273
1274         irq_set_handler_data(irq, iommu);
1275         iommu->irq = irq;
1276
1277         ret = arch_setup_dmar_msi(irq);
1278         if (ret) {
1279                 irq_set_handler_data(irq, NULL);
1280                 iommu->irq = 0;
1281                 destroy_irq(irq);
1282                 return ret;
1283         }
1284
1285         ret = request_irq(irq, dmar_fault, IRQF_NO_THREAD, iommu->name, iommu);
1286         if (ret)
1287                 pr_err("IOMMU: can't request irq\n");
1288         return ret;
1289 }
1290
1291 int __init enable_drhd_fault_handling(void)
1292 {
1293         struct dmar_drhd_unit *drhd;
1294
1295         /*
1296          * Enable fault control interrupt.
1297          */
1298         for_each_drhd_unit(drhd) {
1299                 int ret;
1300                 struct intel_iommu *iommu = drhd->iommu;
1301                 ret = dmar_set_interrupt(iommu);
1302
1303                 if (ret) {
1304                         pr_err("DRHD %Lx: failed to enable fault, interrupt, ret %d\n",
1305                                (unsigned long long)drhd->reg_base_addr, ret);
1306                         return -1;
1307                 }
1308
1309                 /*
1310                  * Clear any previous faults.
1311                  */
1312                 dmar_fault(iommu->irq, iommu);
1313         }
1314
1315         return 0;
1316 }
1317
1318 /*
1319  * Re-enable Queued Invalidation interface.
1320  */
1321 int dmar_reenable_qi(struct intel_iommu *iommu)
1322 {
1323         if (!ecap_qis(iommu->ecap))
1324                 return -ENOENT;
1325
1326         if (!iommu->qi)
1327                 return -ENOENT;
1328
1329         /*
1330          * First disable queued invalidation.
1331          */
1332         dmar_disable_qi(iommu);
1333         /*
1334          * Then enable queued invalidation again. Since there is no pending
1335          * invalidation requests now, it's safe to re-enable queued
1336          * invalidation.
1337          */
1338         __dmar_enable_qi(iommu);
1339
1340         return 0;
1341 }
1342
1343 /*
1344  * Check interrupt remapping support in DMAR table description.
1345  */
1346 int __init dmar_ir_support(void)
1347 {
1348         struct acpi_table_dmar *dmar;
1349         dmar = (struct acpi_table_dmar *)dmar_tbl;
1350         if (!dmar)
1351                 return 0;
1352         return dmar->flags & 0x1;
1353 }
1354 IOMMU_INIT_POST(detect_intel_iommu);