* The ITS structure - contains most of the infrastructure, with the
* top-level MSI domain, the command queue, the collections, and the
* list of devices writing to it.
+ *
+ * dev_alloc_lock has to be taken for device allocations, while the
+ * spinlock must be taken to parse data structures such as the device
+ * list.
*/
struct its_node {
raw_spinlock_t lock;
+ struct mutex dev_alloc_lock;
struct list_head entry;
void __iomem *base;
phys_addr_t phys_base;
#define ITS_ITT_ALIGN SZ_256
+/* The maximum number of VPEID bits supported by VLPI commands */
+#define ITS_MAX_VPEID_BITS (16)
+#define ITS_MAX_VPEID (1 << (ITS_MAX_VPEID_BITS))
+
/* Convert page order to size in bytes */
#define PAGE_ORDER_TO_SIZE(o) (PAGE_SIZE << (o))
void *itt;
u32 nr_ites;
u32 device_id;
+ bool shared;
};
static struct {
static void its_encode_itt(struct its_cmd_block *cmd, u64 itt_addr)
{
- its_mask_encode(&cmd->raw_cmd[2], itt_addr >> 8, 50, 8);
+ its_mask_encode(&cmd->raw_cmd[2], itt_addr >> 8, 51, 8);
}
static void its_encode_valid(struct its_cmd_block *cmd, int valid)
static void its_encode_target(struct its_cmd_block *cmd, u64 target_addr)
{
- its_mask_encode(&cmd->raw_cmd[2], target_addr >> 16, 50, 16);
+ its_mask_encode(&cmd->raw_cmd[2], target_addr >> 16, 51, 16);
}
static void its_encode_collection(struct its_cmd_block *cmd, u16 col)
static void its_encode_vpt_addr(struct its_cmd_block *cmd, u64 vpt_pa)
{
- its_mask_encode(&cmd->raw_cmd[3], vpt_pa >> 16, 50, 16);
+ its_mask_encode(&cmd->raw_cmd[3], vpt_pa >> 16, 51, 16);
}
static void its_encode_vpt_size(struct its_cmd_block *cmd, u8 vpt_size)
* This gives us (((1UL << id_bits) - 8192) >> 5) possible allocations.
*/
#define IRQS_PER_CHUNK_SHIFT 5
-#define IRQS_PER_CHUNK (1 << IRQS_PER_CHUNK_SHIFT)
+#define IRQS_PER_CHUNK (1UL << IRQS_PER_CHUNK_SHIFT)
#define ITS_MAX_LPI_NRBITS 16 /* 64K LPIs */
static unsigned long *lpi_bitmap;
u64 val = its_read_baser(its, baser);
u64 esz = GITS_BASER_ENTRY_SIZE(val);
u64 type = GITS_BASER_TYPE(val);
+ u64 baser_phys, tmp;
u32 alloc_pages;
void *base;
- u64 tmp;
retry_alloc_baser:
alloc_pages = (PAGE_ORDER_TO_SIZE(order) / psz);
if (!base)
return -ENOMEM;
+ baser_phys = virt_to_phys(base);
+
+ /* Check if the physical address of the memory is above 48bits */
+ if (IS_ENABLED(CONFIG_ARM64_64K_PAGES) && (baser_phys >> 48)) {
+
+ /* 52bit PA is supported only when PageSize=64K */
+ if (psz != SZ_64K) {
+ pr_err("ITS: no 52bit PA support when psz=%d\n", psz);
+ free_pages((unsigned long)base, order);
+ return -ENXIO;
+ }
+
+ /* Convert 52bit PA to 48bit field */
+ baser_phys = GITS_BASER_PHYS_52_to_48(baser_phys);
+ }
+
retry_baser:
- val = (virt_to_phys(base) |
+ val = (baser_phys |
(type << GITS_BASER_TYPE_SHIFT) |
((esz - 1) << GITS_BASER_ENTRY_SIZE_SHIFT) |
((alloc_pages - 1) << GITS_BASER_PAGES_SHIFT) |
static bool its_parse_indirect_baser(struct its_node *its,
struct its_baser *baser,
- u32 psz, u32 *order)
+ u32 psz, u32 *order, u32 ids)
{
u64 tmp = its_read_baser(its, baser);
u64 type = GITS_BASER_TYPE(tmp);
u64 esz = GITS_BASER_ENTRY_SIZE(tmp);
u64 val = GITS_BASER_InnerShareable | GITS_BASER_RaWaWb;
- u32 ids = its->device_ids;
u32 new_order = *order;
bool indirect = false;
continue;
case GITS_BASER_TYPE_DEVICE:
+ indirect = its_parse_indirect_baser(its, baser,
+ psz, &order,
+ its->device_ids);
case GITS_BASER_TYPE_VCPU:
indirect = its_parse_indirect_baser(its, baser,
- psz, &order);
+ psz, &order,
+ ITS_MAX_VPEID_BITS);
break;
}
dev = kzalloc(sizeof(*dev), GFP_KERNEL);
/*
- * At least one bit of EventID is being used, hence a minimum
- * of two entries. No, the architecture doesn't let you
- * express an ITT with a single entry.
+ * We allocate at least one chunk worth of LPIs bet device,
+ * and thus that many ITEs. The device may require less though.
*/
- nr_ites = max(2UL, roundup_pow_of_two(nvecs));
+ nr_ites = max(IRQS_PER_CHUNK, roundup_pow_of_two(nvecs));
sz = nr_ites * its->ite_size;
sz = max(sz, ITS_ITT_ALIGN) + ITS_ITT_ALIGN - 1;
itt = kzalloc(sz, GFP_KERNEL);
kfree(its_dev);
}
-static int its_alloc_device_irq(struct its_device *dev, irq_hw_number_t *hwirq)
+static int its_alloc_device_irq(struct its_device *dev, int nvecs, irq_hw_number_t *hwirq)
{
int idx;
- idx = find_first_zero_bit(dev->event_map.lpi_map,
- dev->event_map.nr_lpis);
- if (idx == dev->event_map.nr_lpis)
+ idx = bitmap_find_free_region(dev->event_map.lpi_map,
+ dev->event_map.nr_lpis,
+ get_count_order(nvecs));
+ if (idx < 0)
return -ENOSPC;
*hwirq = dev->event_map.lpi_base + idx;
struct its_device *its_dev;
struct msi_domain_info *msi_info;
u32 dev_id;
+ int err = 0;
/*
* We ignore "dev" entierely, and rely on the dev_id that has
return -EINVAL;
}
+ mutex_lock(&its->dev_alloc_lock);
its_dev = its_find_device(its, dev_id);
if (its_dev) {
/*
* another alias (PCI bridge of some sort). No need to
* create the device.
*/
+ its_dev->shared = true;
pr_debug("Reusing ITT for devID %x\n", dev_id);
goto out;
}
its_dev = its_create_device(its, dev_id, nvec, true);
- if (!its_dev)
- return -ENOMEM;
+ if (!its_dev) {
+ err = -ENOMEM;
+ goto out;
+ }
pr_debug("ITT %d entries, %d bits\n", nvec, ilog2(nvec));
out:
+ mutex_unlock(&its->dev_alloc_lock);
info->scratchpad[0].ptr = its_dev;
- return 0;
+ return err;
}
static struct msi_domain_ops its_msi_domain_ops = {
int err;
int i;
- for (i = 0; i < nr_irqs; i++) {
- err = its_alloc_device_irq(its_dev, &hwirq);
- if (err)
- return err;
+ err = its_alloc_device_irq(its_dev, nr_irqs, &hwirq);
+ if (err)
+ return err;
- err = its_irq_gic_domain_alloc(domain, virq + i, hwirq);
+ for (i = 0; i < nr_irqs; i++) {
+ err = its_irq_gic_domain_alloc(domain, virq + i, hwirq + i);
if (err)
return err;
irq_domain_set_hwirq_and_chip(domain, virq + i,
- hwirq, &its_irq_chip, its_dev);
+ hwirq + i, &its_irq_chip, its_dev);
irqd_set_single_target(irq_desc_get_irq_data(irq_to_desc(virq + i)));
pr_debug("ID:%d pID:%d vID:%d\n",
- (int)(hwirq - its_dev->event_map.lpi_base),
- (int) hwirq, virq + i);
+ (int)(hwirq + i - its_dev->event_map.lpi_base),
+ (int)(hwirq + i), virq + i);
}
return 0;
cpu_mask = cpumask_of_node(its_dev->its->numa_node);
/* Bind the LPI to the first possible CPU */
- cpu = cpumask_first(cpu_mask);
+ cpu = cpumask_first_and(cpu_mask, cpu_online_mask);
+ if (cpu >= nr_cpu_ids) {
+ if (its_dev->its->flags & ITS_FLAGS_WORKAROUND_CAVIUM_23144)
+ return;
+
+ cpu = cpumask_first(cpu_online_mask);
+ }
+
its_dev->event_map.col_map[event] = cpu;
irq_data_update_effective_affinity(d, cpumask_of(cpu));
{
struct irq_data *d = irq_domain_get_irq_data(domain, virq);
struct its_device *its_dev = irq_data_get_irq_chip_data(d);
+ struct its_node *its = its_dev->its;
int i;
for (i = 0; i < nr_irqs; i++) {
irq_domain_reset_irq_data(data);
}
- /* If all interrupts have been freed, start mopping the floor */
- if (bitmap_empty(its_dev->event_map.lpi_map,
+ mutex_lock(&its->dev_alloc_lock);
+
+ /*
+ * If all interrupts have been freed, start mopping the
+ * floor. This is conditionned on the device not being shared.
+ */
+ if (!its_dev->shared &&
+ bitmap_empty(its_dev->event_map.lpi_map,
its_dev->event_map.nr_lpis)) {
its_lpi_free_chunks(its_dev->event_map.lpi_map,
its_dev->event_map.lpi_base,
its_free_device(its_dev);
}
+ mutex_unlock(&its->dev_alloc_lock);
+
irq_domain_free_irqs_parent(domain, virq, nr_irqs);
}
static int its_vpe_id_alloc(void)
{
- return ida_simple_get(&its_vpeid_ida, 0, 1 << 16, GFP_KERNEL);
+ return ida_simple_get(&its_vpeid_ida, 0, ITS_MAX_VPEID, GFP_KERNEL);
}
static void its_vpe_id_free(u16 id)
return -ENOMEM;
}
- BUG_ON(entries != vpe_proxy.dev->nr_ites);
+ BUG_ON(entries > vpe_proxy.dev->nr_ites);
raw_spin_lock_init(&vpe_proxy.lock);
vpe_proxy.next_victim = 0;
}
raw_spin_lock_init(&its->lock);
+ mutex_init(&its->dev_alloc_lock);
INIT_LIST_HEAD(&its->entry);
INIT_LIST_HEAD(&its->its_device_list);
typer = gic_read_typer(its_base + GITS_TYPER);
for (np = of_find_matching_node(node, its_device_id); np;
np = of_find_matching_node(np, its_device_id)) {
+ if (!of_device_is_available(np))
+ continue;
if (!of_property_read_bool(np, "msi-controller")) {
pr_warn("%pOF: no msi-controller property, ITS ignored\n",
np);