Record and propagate NUMA information for devices.
Signed-off-by: David S. Miller <davem@davemloft.net>
sd->op = &dev->ofdev;
sd->iommu = dev->bus->ofdev.dev.parent->archdata.iommu;
sd->stc = dev->bus->ofdev.dev.parent->archdata.stc;
+ sd->numa_node = dev->bus->ofdev.dev.parent->archdata.numa_node;
dev->ofdev.node = dp;
dev->ofdev.dev.parent = &dev->bus->ofdev.dev;
}
int iommu_table_init(struct iommu *iommu, int tsbsize,
- u32 dma_offset, u32 dma_addr_mask)
+ u32 dma_offset, u32 dma_addr_mask,
+ int numa_node)
{
- unsigned long i, tsbbase, order, sz, num_tsb_entries;
+ unsigned long i, order, sz, num_tsb_entries;
+ struct page *page;
num_tsb_entries = tsbsize / sizeof(iopte_t);
/* Allocate and initialize the free area map. */
sz = num_tsb_entries / 8;
sz = (sz + 7UL) & ~7UL;
- iommu->arena.map = kzalloc(sz, GFP_KERNEL);
+ iommu->arena.map = kmalloc_node(sz, GFP_KERNEL, numa_node);
if (!iommu->arena.map) {
printk(KERN_ERR "IOMMU: Error, kmalloc(arena.map) failed.\n");
return -ENOMEM;
}
+ memset(iommu->arena.map, 0, sz);
iommu->arena.limit = num_tsb_entries;
if (tlb_type != hypervisor)
/* Allocate and initialize the dummy page which we
* set inactive IO PTEs to point to.
*/
- iommu->dummy_page = get_zeroed_page(GFP_KERNEL);
- if (!iommu->dummy_page) {
+ page = alloc_pages_node(numa_node, GFP_KERNEL, 0);
+ if (!page) {
printk(KERN_ERR "IOMMU: Error, gfp(dummy_page) failed.\n");
goto out_free_map;
}
+ iommu->dummy_page = (unsigned long) page_address(page);
+ memset((void *)iommu->dummy_page, 0, PAGE_SIZE);
iommu->dummy_page_pa = (unsigned long) __pa(iommu->dummy_page);
/* Now allocate and setup the IOMMU page table itself. */
order = get_order(tsbsize);
- tsbbase = __get_free_pages(GFP_KERNEL, order);
- if (!tsbbase) {
+ page = alloc_pages_node(numa_node, GFP_KERNEL, order);
+ if (!page) {
printk(KERN_ERR "IOMMU: Error, gfp(tsb) failed.\n");
goto out_free_dummy_page;
}
- iommu->page_table = (iopte_t *)tsbbase;
+ iommu->page_table = (iopte_t *)page_address(page);
for (i = 0; i < num_tsb_entries; i++)
iopte_make_dummy(iommu, &iommu->page_table[i]);
static void *dma_4u_alloc_coherent(struct device *dev, size_t size,
dma_addr_t *dma_addrp, gfp_t gfp)
{
+ unsigned long flags, order, first_page;
struct iommu *iommu;
+ struct page *page;
+ int npages, nid;
iopte_t *iopte;
- unsigned long flags, order, first_page;
void *ret;
- int npages;
size = IO_PAGE_ALIGN(size);
order = get_order(size);
if (order >= 10)
return NULL;
- first_page = __get_free_pages(gfp, order);
- if (first_page == 0UL)
+ nid = dev->archdata.numa_node;
+ page = alloc_pages_node(nid, gfp, order);
+ if (unlikely(!page))
return NULL;
+
+ first_page = (unsigned long) page_address(page);
memset((char *)first_page, 0, PAGE_SIZE << order);
iommu = dev->archdata.iommu;
sd->op = &isa_dev->ofdev;
sd->iommu = isa_br->ofdev.dev.parent->archdata.iommu;
sd->stc = isa_br->ofdev.dev.parent->archdata.stc;
+ sd->numa_node = isa_br->ofdev.dev.parent->archdata.numa_node;
isa_dev->ofdev.node = dp;
isa_dev->ofdev.dev.parent = &isa_br->ofdev.dev;
#include <linux/mod_devicetable.h>
#include <linux/slab.h>
#include <linux/errno.h>
+#include <linux/irq.h>
#include <linux/of_device.h>
#include <linux/of_platform.h>
struct device_node *dp = op->node;
struct device_node *pp, *ip;
unsigned int orig_irq = irq;
+ int nid;
if (irq == 0xffffffff)
return irq;
printk("%s: direct translate %x --> %x\n",
dp->full_name, orig_irq, irq);
- return irq;
+ goto out;
}
/* Something more complicated. Walk up to the root, applying
printk("%s: Apply IRQ trans [%s] %x --> %x\n",
op->node->full_name, ip->full_name, orig_irq, irq);
+out:
+ nid = of_node_to_nid(dp);
+ if (nid != -1) {
+ cpumask_t numa_mask = node_to_cpumask(nid);
+
+ irq_set_affinity(irq, numa_mask);
+ }
+
return irq;
}
sd->host_controller = pbm;
sd->prom_node = node;
sd->op = of_find_device_by_node(node);
+ sd->numa_node = pbm->numa_node;
sd = &sd->op->dev.archdata;
sd->iommu = pbm->iommu;
sd->stc = &pbm->stc;
+ sd->numa_node = pbm->numa_node;
type = of_get_property(node, "device_type", NULL);
if (type == NULL)
return 0;
}
+#ifdef CONFIG_NUMA
+int pcibus_to_node(struct pci_bus *pbus)
+{
+ struct pci_pbm_info *pbm = pbus->sysdata;
+
+ return pbm->numa_node;
+}
+EXPORT_SYMBOL(pcibus_to_node);
+#endif
+
/* Return the domain nuber for this pci bus */
int pci_domain_nr(struct pci_bus *pbus)
*/
fire_write(iommu->iommu_flushinv, ~(u64)0);
- err = iommu_table_init(iommu, tsbsize * 8 * 1024, vdma[0], dma_mask);
+ err = iommu_table_init(iommu, tsbsize * 8 * 1024, vdma[0], dma_mask,
+ pbm->numa_node);
if (err)
return err;
pbm->next = pci_pbm_root;
pci_pbm_root = pbm;
+ pbm->numa_node = -1;
+
pbm->scan_bus = pci_fire_scan_bus;
pbm->pci_ops = &sun4u_pci_ops;
pbm->config_space_reg_bits = 12;
struct pci_bus *pci_bus;
void (*scan_bus)(struct pci_pbm_info *);
struct pci_ops *pci_ops;
+
+ int numa_node;
};
struct pci_controller_info {
unsigned long devino)
{
int irq = ops->msiq_build_irq(pbm, msiqid, devino);
- int err;
+ int err, nid;
if (irq < 0)
return irq;
+ nid = pbm->numa_node;
+ if (nid != -1) {
+ cpumask_t numa_mask = node_to_cpumask(nid);
+
+ irq_set_affinity(irq, numa_mask);
+ }
err = request_irq(irq, sparc64_msiq_interrupt, 0,
"MSIQ",
&pbm->msiq_irq_cookies[msiqid - pbm->msiq_first]);
/* Leave diag mode enabled for full-flushing done
* in pci_iommu.c
*/
- err = iommu_table_init(iommu, IO_TSB_SIZE, 0xc0000000, 0xffffffff);
+ err = iommu_table_init(iommu, IO_TSB_SIZE, 0xc0000000, 0xffffffff,
+ pbm->numa_node);
if (err)
return err;
pbm->next = pci_pbm_root;
pci_pbm_root = pbm;
+ pbm->numa_node = -1;
+
pbm->scan_bus = psycho_scan_bus;
pbm->pci_ops = &sun4u_pci_ops;
pbm->config_space_reg_bits = 8;
* in pci_iommu.c
*/
err = iommu_table_init(iommu, tsbsize * 1024 * 8,
- dvma_offset, dma_mask);
+ dvma_offset, dma_mask, pbm->numa_node);
if (err)
return err;
pbm->name = dp->full_name;
printk("%s: SABRE PCI Bus Module\n", pbm->name);
+ pbm->numa_node = -1;
+
pbm->scan_bus = sabre_scan_bus;
pbm->pci_ops = &sun4u_pci_ops;
pbm->config_space_reg_bits = 8;
/* Leave diag mode enabled for full-flushing done
* in pci_iommu.c
*/
- err = iommu_table_init(iommu, tsbsize * 8 * 1024, vdma[0], dma_mask);
+ err = iommu_table_init(iommu, tsbsize * 8 * 1024, vdma[0], dma_mask,
+ pbm->numa_node);
if (err)
return err;
pbm->next = pci_pbm_root;
pci_pbm_root = pbm;
+ pbm->numa_node = -1;
+
pbm->scan_bus = schizo_scan_bus;
pbm->pci_ops = &sun4u_pci_ops;
pbm->config_space_reg_bits = 8;
static void *dma_4v_alloc_coherent(struct device *dev, size_t size,
dma_addr_t *dma_addrp, gfp_t gfp)
{
- struct iommu *iommu;
unsigned long flags, order, first_page, npages, n;
+ struct iommu *iommu;
+ struct page *page;
void *ret;
long entry;
+ int nid;
size = IO_PAGE_ALIGN(size);
order = get_order(size);
npages = size >> IO_PAGE_SHIFT;
- first_page = __get_free_pages(gfp, order);
- if (unlikely(first_page == 0UL))
+ nid = dev->archdata.numa_node;
+ page = alloc_pages_node(nid, gfp, order);
+ if (unlikely(!page))
return NULL;
+ first_page = (unsigned long) page_address(page);
memset((char *)first_page, 0, PAGE_SIZE << order);
iommu = dev->archdata.iommu;
pbm->next = pci_pbm_root;
pci_pbm_root = pbm;
+ pbm->numa_node = of_node_to_nid(dp);
+
pbm->scan_bus = pci_sun4v_scan_bus;
pbm->pci_ops = &sun4v_pci_ops;
pbm->config_space_reg_bits = 12;
pbm->name = dp->full_name;
printk("%s: SUN4V PCI Bus Module\n", pbm->name);
+ printk("%s: On NUMA node %d\n", pbm->name, pbm->numa_node);
pci_determine_mem_io_space(pbm);
sbus->ofdev.dev.archdata.iommu = iommu;
sbus->ofdev.dev.archdata.stc = strbuf;
+ sbus->ofdev.dev.archdata.numa_node = -1;
reg_base = regs + SYSIO_IOMMUREG_BASE;
iommu->iommu_control = reg_base + IOMMU_CONTROL;
sbus->portid, regs);
/* Setup for TSB_SIZE=7, TBW_SIZE=0, MMU_DE=1, MMU_EN=1 */
- if (iommu_table_init(iommu, IO_TSB_SIZE, MAP_BASE, 0xffffffff))
+ if (iommu_table_init(iommu, IO_TSB_SIZE, MAP_BASE, 0xffffffff, -1))
goto fatal_memory_error;
control = upa_readq(iommu->iommu_control);
struct device_node *prom_node;
struct of_device *op;
+
+ int numa_node;
};
#endif /* _ASM_SPARC_DEVICE_H */
const char *name,
int def);
extern int of_find_in_proplist(const char *list, const char *match, int len);
+#ifdef CONFIG_NUMA
+extern int of_node_to_nid(struct device_node *dp);
+#else
+#define of_node_to_nid(dp) (-1)
+#endif
extern void prom_build_devicetree(void);
};
extern int iommu_table_init(struct iommu *iommu, int tsbsize,
- u32 dma_offset, u32 dma_addr_mask);
+ u32 dma_offset, u32 dma_addr_mask,
+ int numa_node);
#endif /* !(_SPARC64_IOMMU_H) */