iommu/arm-smmu-v3: Add second level of context descriptor table
authorJean-Philippe Brucker <jean-philippe@linaro.org>
Wed, 15 Jan 2020 12:52:36 +0000 (13:52 +0100)
committerWill Deacon <will@kernel.org>
Wed, 15 Jan 2020 16:06:50 +0000 (16:06 +0000)
The SMMU can support up to 20 bits of SSID. Add a second level of page
tables to accommodate this. Devices that support more than 1024 SSIDs now
have a table of 1024 L1 entries (8kB), pointing to tables of 1024 context
descriptors (64kB), allocated on demand.

Signed-off-by: Jean-Philippe Brucker <jean-philippe@linaro.org>
Signed-off-by: Will Deacon <will@kernel.org>
drivers/iommu/arm-smmu-v3.c

index a224b91..c2d1325 100644 (file)
 
 #define STRTAB_STE_0_S1FMT             GENMASK_ULL(5, 4)
 #define STRTAB_STE_0_S1FMT_LINEAR      0
+#define STRTAB_STE_0_S1FMT_64K_L2      2
 #define STRTAB_STE_0_S1CTXPTR_MASK     GENMASK_ULL(51, 6)
 #define STRTAB_STE_0_S1CDMAX           GENMASK_ULL(63, 59)
 
 
 #define STRTAB_STE_3_S2TTB_MASK                GENMASK_ULL(51, 4)
 
-/* Context descriptor (stage-1 only) */
+/*
+ * Context descriptors.
+ *
+ * Linear: when less than 1024 SSIDs are supported
+ * 2lvl: at most 1024 L1 entries,
+ *       1024 lazy entries per table.
+ */
+#define CTXDESC_SPLIT                  10
+#define CTXDESC_L2_ENTRIES             (1 << CTXDESC_SPLIT)
+
+#define CTXDESC_L1_DESC_DWORDS         1
+#define CTXDESC_L1_DESC_V              (1UL << 0)
+#define CTXDESC_L1_DESC_L2PTR_MASK     GENMASK_ULL(51, 12)
+
 #define CTXDESC_CD_DWORDS              8
 #define CTXDESC_CD_0_TCR_T0SZ          GENMASK_ULL(5, 0)
 #define CTXDESC_CD_0_TCR_TG0           GENMASK_ULL(7, 6)
@@ -558,9 +572,15 @@ struct arm_smmu_ctx_desc {
        u64                             mair;
 };
 
+struct arm_smmu_l1_ctx_desc {
+       __le64                          *l2ptr;
+       dma_addr_t                      l2ptr_dma;
+};
+
 struct arm_smmu_ctx_desc_cfg {
        __le64                          *cdtab;
        dma_addr_t                      cdtab_dma;
+       struct arm_smmu_l1_ctx_desc     *l1_desc;
        unsigned int                    num_l1_ents;
 };
 
@@ -1490,6 +1510,57 @@ static void arm_smmu_sync_cd(struct arm_smmu_domain *smmu_domain,
        arm_smmu_cmdq_issue_sync(smmu);
 }
 
+static int arm_smmu_alloc_cd_leaf_table(struct arm_smmu_device *smmu,
+                                       struct arm_smmu_l1_ctx_desc *l1_desc)
+{
+       size_t size = CTXDESC_L2_ENTRIES * (CTXDESC_CD_DWORDS << 3);
+
+       l1_desc->l2ptr = dmam_alloc_coherent(smmu->dev, size,
+                                            &l1_desc->l2ptr_dma, GFP_KERNEL);
+       if (!l1_desc->l2ptr) {
+               dev_warn(smmu->dev,
+                        "failed to allocate context descriptor table\n");
+               return -ENOMEM;
+       }
+       return 0;
+}
+
+static void arm_smmu_write_cd_l1_desc(__le64 *dst,
+                                     struct arm_smmu_l1_ctx_desc *l1_desc)
+{
+       u64 val = (l1_desc->l2ptr_dma & CTXDESC_L1_DESC_L2PTR_MASK) |
+                 CTXDESC_L1_DESC_V;
+
+       WRITE_ONCE(*dst, cpu_to_le64(val));
+}
+
+static __le64 *arm_smmu_get_cd_ptr(struct arm_smmu_domain *smmu_domain,
+                                  u32 ssid)
+{
+       __le64 *l1ptr;
+       unsigned int idx;
+       struct arm_smmu_l1_ctx_desc *l1_desc;
+       struct arm_smmu_device *smmu = smmu_domain->smmu;
+       struct arm_smmu_ctx_desc_cfg *cdcfg = &smmu_domain->s1_cfg.cdcfg;
+
+       if (smmu_domain->s1_cfg.s1fmt == STRTAB_STE_0_S1FMT_LINEAR)
+               return cdcfg->cdtab + ssid * CTXDESC_CD_DWORDS;
+
+       idx = ssid >> CTXDESC_SPLIT;
+       l1_desc = &cdcfg->l1_desc[idx];
+       if (!l1_desc->l2ptr) {
+               if (arm_smmu_alloc_cd_leaf_table(smmu, l1_desc))
+                       return NULL;
+
+               l1ptr = cdcfg->cdtab + idx * CTXDESC_L1_DESC_DWORDS;
+               arm_smmu_write_cd_l1_desc(l1ptr, l1_desc);
+               /* An invalid L1CD can be cached */
+               arm_smmu_sync_cd(smmu_domain, ssid, false);
+       }
+       idx = ssid & (CTXDESC_L2_ENTRIES - 1);
+       return l1_desc->l2ptr + idx * CTXDESC_CD_DWORDS;
+}
+
 static int arm_smmu_write_ctx_desc(struct arm_smmu_domain *smmu_domain,
                                   int ssid, struct arm_smmu_ctx_desc *cd)
 {
@@ -1504,9 +1575,15 @@ static int arm_smmu_write_ctx_desc(struct arm_smmu_domain *smmu_domain,
         */
        u64 val;
        bool cd_live;
+       __le64 *cdptr;
        struct arm_smmu_device *smmu = smmu_domain->smmu;
-       __le64 *cdptr = smmu_domain->s1_cfg.cdcfg.cdtab + ssid *
-                       CTXDESC_CD_DWORDS;
+
+       if (WARN_ON(ssid >= (1 << smmu_domain->s1_cfg.s1cdmax)))
+               return -E2BIG;
+
+       cdptr = arm_smmu_get_cd_ptr(smmu_domain, ssid);
+       if (!cdptr)
+               return -ENOMEM;
 
        val = le64_to_cpu(cdptr[0]);
        cd_live = !!(val & CTXDESC_CD_0_V);
@@ -1562,29 +1639,78 @@ static int arm_smmu_write_ctx_desc(struct arm_smmu_domain *smmu_domain,
 
 static int arm_smmu_alloc_cd_tables(struct arm_smmu_domain *smmu_domain)
 {
+       int ret;
        size_t l1size;
+       size_t max_contexts;
        struct arm_smmu_device *smmu = smmu_domain->smmu;
        struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
        struct arm_smmu_ctx_desc_cfg *cdcfg = &cfg->cdcfg;
 
-       cfg->s1fmt = STRTAB_STE_0_S1FMT_LINEAR;
+       max_contexts = 1 << cfg->s1cdmax;
+
+       if (!(smmu->features & ARM_SMMU_FEAT_2_LVL_CDTAB) ||
+           max_contexts <= CTXDESC_L2_ENTRIES) {
+               cfg->s1fmt = STRTAB_STE_0_S1FMT_LINEAR;
+               cdcfg->num_l1_ents = max_contexts;
+
+               l1size = max_contexts * (CTXDESC_CD_DWORDS << 3);
+       } else {
+               cfg->s1fmt = STRTAB_STE_0_S1FMT_64K_L2;
+               cdcfg->num_l1_ents = DIV_ROUND_UP(max_contexts,
+                                                 CTXDESC_L2_ENTRIES);
+
+               cdcfg->l1_desc = devm_kcalloc(smmu->dev, cdcfg->num_l1_ents,
+                                             sizeof(*cdcfg->l1_desc),
+                                             GFP_KERNEL);
+               if (!cdcfg->l1_desc)
+                       return -ENOMEM;
+
+               l1size = cdcfg->num_l1_ents * (CTXDESC_L1_DESC_DWORDS << 3);
+       }
 
-       cdcfg->num_l1_ents = 1UL << cfg->s1cdmax;
-       l1size = cdcfg->num_l1_ents * (CTXDESC_CD_DWORDS << 3);
        cdcfg->cdtab = dmam_alloc_coherent(smmu->dev, l1size, &cdcfg->cdtab_dma,
                                           GFP_KERNEL);
        if (!cdcfg->cdtab) {
                dev_warn(smmu->dev, "failed to allocate context descriptor\n");
-               return -ENOMEM;
+               ret = -ENOMEM;
+               goto err_free_l1;
        }
+
        return 0;
+
+err_free_l1:
+       if (cdcfg->l1_desc) {
+               devm_kfree(smmu->dev, cdcfg->l1_desc);
+               cdcfg->l1_desc = NULL;
+       }
+       return ret;
 }
 
 static void arm_smmu_free_cd_tables(struct arm_smmu_domain *smmu_domain)
 {
+       int i;
+       size_t size, l1size;
        struct arm_smmu_device *smmu = smmu_domain->smmu;
        struct arm_smmu_ctx_desc_cfg *cdcfg = &smmu_domain->s1_cfg.cdcfg;
-       size_t l1size = cdcfg->num_l1_ents * (CTXDESC_CD_DWORDS << 3);
+
+       if (cdcfg->l1_desc) {
+               size = CTXDESC_L2_ENTRIES * (CTXDESC_CD_DWORDS << 3);
+
+               for (i = 0; i < cdcfg->num_l1_ents; i++) {
+                       if (!cdcfg->l1_desc[i].l2ptr)
+                               continue;
+
+                       dmam_free_coherent(smmu->dev, size,
+                                          cdcfg->l1_desc[i].l2ptr,
+                                          cdcfg->l1_desc[i].l2ptr_dma);
+               }
+               devm_kfree(smmu->dev, cdcfg->l1_desc);
+               cdcfg->l1_desc = NULL;
+
+               l1size = cdcfg->num_l1_ents * (CTXDESC_L1_DESC_DWORDS << 3);
+       } else {
+               l1size = cdcfg->num_l1_ents * (CTXDESC_CD_DWORDS << 3);
+       }
 
        dmam_free_coherent(smmu->dev, l1size, cdcfg->cdtab, cdcfg->cdtab_dma);
        cdcfg->cdtab_dma = 0;