Say Y here if you intend to run this kernel as a guest.
+config SR_NPU_IOMMU
+ bool "SR Tinity Vision 2 IOMMU Support"
+ depends on ARM || ARM64
+ select ARM_DMA_USE_IOMMU
+ select IOMMU_API
+ select IOMMU_DMA
+ help
+ Support for the IOMMU on Samsung Research (SR) Neural Processing
+ Unit (NPU), Tinity Vision 2, family. This enables the NPUs to see
+ non-linear physical memory chunks as linear memory in their address
+ space.
+
+ If unsure, say N here.
endif # IOMMU_SUPPORT
obj-$(CONFIG_S390_IOMMU) += s390-iommu.o
obj-$(CONFIG_HYPERV_IOMMU) += hyperv-iommu.o
obj-$(CONFIG_VIRTIO_IOMMU) += virtio-iommu.o
+obj-$(CONFIG_SR_NPU_IOMMU) += srnpu-iommu.o
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * IOMMU (ARM/ARM64) driver for Samsung Research NPU device family
+ *
+ * Copyright (C) 2021 Samsung Electronics
+ * Copyright (C) 2021 Wook Song <wook16.song@samsung.com>
+ * Copyright (C) 2021 Dongju Chae <dongju.chae@samsung.com>
+ */
+
+#include <linux/bitmap.h>
+#include <linux/dma-iommu.h>
+#include <linux/init.h>
+#include <linux/io.h>
+#include <linux/iommu.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/of_iommu.h>
+#include <linux/of_platform.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+#include <linux/version.h>
+
+#include <asm/cacheflush.h>
+#ifdef CONFIG_ARM
+#include <asm/dma-iommu.h>
+#define MASK_HIGH_BIT (31)
+#else /* CONFIG_ARM64 */
+#define MASK_HIGH_BIT (35)
+#endif
+
+#define CHECK_BITS(val, mask) ((val & mask) == mask)
+
+/* Register offsets for CBOX */
+#define MMREG_CBOX_IDX (2)
+#define OFFSET_CBOX_IOMMU_DLA (0x1000)
+#define OFFSET_CBOX_IOMMU_DSP (0x2000)
+#define LENGTH_CBOX_IOMMU (0x1000)
+
+/* Register offsets for SRNPU-IOMMU */
+#define OFFSET_IOMMU_CTRL (0x0)
+#define OFFSET_IOMMU_STATUS (0x4)
+#define OFFSET_IOMMU_FLPT_BASE (0x8)
+#define OFFSET_IOMMU_ALL_INVALIDATION (0xC)
+#define OFFSET_IOMMU_VPN_INVALIDATION (0x10)
+#define OFFSET_IOMMU_ICH_FAULT_STATUS (0x14)
+#define OFFSET_IOMMU_ICH_FAULT_VA (0x18)
+#define OFFSET_IOMMU_OCH_FAULT_STATUS (0x1C)
+#define OFFSET_IOMMU_OCH_FAULT_VA (0x20)
+#define OFFSET_IOMMU_TLB_READ (0x24)
+#define OFFSET_IOMMU_TLB_TAG (0x28)
+#define OFFSET_IOMMU_TLB_PPN (0x2C)
+#define OFFSET_IOMMU_PTW_BIF_CFG (0x30)
+#define OFFSET_IOMMU_DBG (0xFC)
+
+#define BIT_MMU_ENABLE_IOMMU_CTRL (0x0)
+#define BIT_MMU_BLOCK_IOMMU_CTRL (0x1)
+
+/* Bitmap of the page sizes currently supported */
+#define SRNPU_IOMMU_PGSIZE_BITMAP (SZ_4K | SZ_64K | SZ_1M | SZ_2M | SZ_16M)
+
+#define MASK_PTE_MAPPED BIT_MASK(0)
+#define MASK_FLPTE_MAP_1M BIT_MASK(1)
+#define MASK_FLPTE_PPN_MASK_1M GENMASK(MASK_HIGH_BIT, 20)
+#define RSHFT_FLPTE_PPN_1M (20)
+#define LSHFT_FLPTE_PPN_1M (16)
+#define MASK_FLPTE_MAP_2M BIT_MASK(2)
+#define MASK_FLPTE_PPN_MASK_2M GENMASK(MASK_HIGH_BIT, 21)
+#define RSHFT_FLPTE_PPN_2M (21)
+#define LSHFT_FLPTE_PPN_2M (17)
+#define MASK_FLPTE_MAP_16M GENMASK(2, 1)
+#define MASK_FLPTE_PPN_MASK_16M GENMASK(MASK_HIGH_BIT, 24)
+#define RSHFT_FLPTE_PPN_16M (24)
+#define LSHFT_FLPTE_PPN_16M (20)
+#define MASK_SLPT_BASE_TO_FLPTE GENMASK(MASK_HIGH_BIT, 10)
+#define RSHFT_SLPT_BASE_TO_FLPTE (10)
+#define LSHFT_SLPT_BASE_TO_FLPTE (6)
+#define MASK_FLPTE_TO_SLPT_BASE GENMASK(31, 6)
+#define RSHFT_FLPTE_TO_SLPT_BASE (6)
+#define LSHFT_FLPTE_TO_SLPT_BASE (10)
+
+#define RSHFT_SIZE_TO_NUM_PTE (20)
+#define RSHFT_SIZE_TO_NUM_LV2PTE (12)
+#define MASK_IOVA_TO_VPN GENMASK(31, 20)
+#define RSHFT_IOVA_TO_VPN (20)
+#define MASK_IOVA_TO_LV2VPN GENMASK(19, 12)
+#define RSHFT_IOVA_TO_LV2VPN (12)
+#define MASK_IOVA_TO_TLBVPN GENMASK(31, 14)
+#define RSHFT_IOVA_TO_TLBVPN (14)
+#define LSHFT_TLBVPN_TO_REGVAL (14)
+#define MASK_IOVA_TO_PAGEOFFSET_4K GENMASK(11, 0)
+#define MASK_IOVA_TO_PAGEOFFSET_64K GENMASK(15, 0)
+#define MASK_IOVA_TO_PAGEOFFSET_1M GENMASK(19, 0)
+#define MASK_IOVA_TO_PAGEOFFSET_2M GENMASK(20, 0)
+#define MASK_IOVA_TO_PAGEOFFSET_16M GENMASK(23, 0)
+
+#define MASK_SLPTE_MAP_4K 0
+#define MASK_SLPTE_MAP_64K BIT_MASK(1)
+#define MASK_SLPTE_PPN_MASK_4K GENMASK(MASK_HIGH_BIT, 12)
+#define MASK_SLPTE_PPN_MASK_64K GENMASK(MASK_HIGH_BIT, 16)
+#define RSHFT_SLPTE_PPN_4K (12)
+#define LSHFT_SLPTE_PPN_4K (8)
+#define RSHFT_SLPTE_PPN_64K (16)
+#define LSHFT_SLPTE_PPN_64K (12)
+
+#define FLPT_PHYS_TO_REGVAL(x) (((x & GENMASK(MASK_HIGH_BIT, 14)) >> 14) << 10)
+#define FLPT_NUM_PTES BIT(12)
+#define FLPT_PTE_SIZE (SZ_4)
+#define FLPT_SIZE (FLPT_NUM_PTES * FLPT_PTE_SIZE)
+#define SLPT_NUM_PTES BIT(8)
+#define SLPT_PTE_SIZE (SZ_4)
+#define SLPT_SIZE (SLPT_NUM_PTES * SLPT_PTE_SIZE)
+
+#define FLPT_PAGE_FAULT BIT(3)
+#define SLPT_PAGE_FAULT BIT(2)
+#define PTW_ACCESS_FAULT BIT(1)
+#define ATU_ACCESS_FAULT BIT(0)
+
+static void *srnpu_iommu_flpt = NULL;
+static struct kmem_cache *srnpu_iommu_slpt_cache = NULL;
+
+struct srnpu_iommu_domain {
+ spinlock_t lock;
+ struct platform_device *pdev;
+ struct iommu_domain domain;
+ uint32_t *flpt;
+};
+
+/* global iommu-shared data */
+struct srnpu_iommu_drvdata {
+ struct device *dev; /* physical device */
+ struct iommu_device iommu;
+ struct srnpu_iommu_domain *domain; /* domain we belong to */
+#ifdef CONFIG_ARM
+ struct dma_iommu_mapping *mapping;
+#else
+ struct iommu_group *group;
+#endif
+ struct { /* optional reserved memory */
+ phys_addr_t paddr;
+ size_t size;
+ } resv;
+};
+
+/* per-device iommu data */
+struct srnpu_iommu {
+ struct device *dev; /* trinity device */
+ struct srnpu_iommu_drvdata *data;
+ void __iomem *regbase_dla;
+ void __iomem *regbase_dsp;
+};
+
+static struct kref srnpu_iommu_slpt_refcnts[FLPT_NUM_PTES];
+
+static inline struct srnpu_iommu_domain *
+domain_to_srnpu_domain(struct iommu_domain *dm)
+{
+ if (!dm)
+ return NULL;
+
+ return container_of(dm, struct srnpu_iommu_domain, domain);
+}
+
+static inline struct srnpu_iommu *dev_to_srnpu_iommu(struct device *dev)
+{
+ if (!dev)
+ return NULL;
+
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(5, 9, 0))
+ if (!dev->iommu_fwspec)
+ return NULL;
+
+ return dev->iommu_fwspec->iommu_priv;
+#else
+ return dev_iommu_priv_get(dev);
+#endif
+}
+
+static inline void dev_set_srnpu_iommu(struct device *dev, void *priv)
+{
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(5, 9, 0))
+ dev->iommu_fwspec->iommu_priv = priv;
+#else
+ dev_iommu_priv_set(dev, priv);
+#endif
+}
+
+static inline struct iommu_fwspec *dev_to_iommu_fwspec(struct device *dev)
+{
+ if (!dev)
+ return NULL;
+
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(5, 7, 0))
+ return dev->iommu_fwspec;
+#else
+ return dev_iommu_fwspec_get(dev);
+#endif
+}
+
+static inline struct srnpu_iommu_drvdata *
+dev_to_srnpu_iommu_drvdata(struct device *dev)
+{
+ struct srnpu_iommu *iommu;
+
+ iommu = dev_to_srnpu_iommu(dev);
+ if (!iommu)
+ return NULL;
+
+ return iommu->data;
+}
+
+static inline uint32_t iova_to_vpn(unsigned long iova)
+{
+ uint32_t ret = iova;
+
+ ret &= MASK_IOVA_TO_VPN;
+ ret >>= RSHFT_IOVA_TO_VPN;
+
+ return ret;
+}
+
+static inline uint32_t iova_to_lv2vpn(unsigned long iova)
+{
+ uint32_t ret = iova;
+
+ ret &= MASK_IOVA_TO_LV2VPN;
+ ret >>= RSHFT_IOVA_TO_LV2VPN;
+
+ return ret;
+}
+
+static inline uint32_t iova_to_tlbvpn(unsigned long iova)
+{
+ uint32_t ret = iova;
+
+ ret &= MASK_IOVA_TO_TLBVPN;
+ ret >>= RSHFT_IOVA_TO_TLBVPN;
+
+ return ret;
+}
+
+static inline uint32_t srnpu_iommu_read_reg(void __iomem *base, uint32_t offset)
+{
+ return ioread32(base + offset);
+}
+
+static inline void srnpu_iommu_write_reg(void __iomem *base, uint32_t offset,
+ uint32_t val)
+{
+ iowrite32(val, base + offset);
+}
+
+static inline void srnpu_iommu_enable(struct srnpu_iommu *_iommu)
+{
+ phys_addr_t paddr = virt_to_phys(srnpu_iommu_flpt);
+
+ /** Set FLPT base */
+ srnpu_iommu_write_reg(_iommu->regbase_dla, OFFSET_IOMMU_FLPT_BASE,
+ FLPT_PHYS_TO_REGVAL(paddr));
+ srnpu_iommu_write_reg(_iommu->regbase_dsp, OFFSET_IOMMU_FLPT_BASE,
+ FLPT_PHYS_TO_REGVAL(paddr));
+
+ /** Enable IOMMU */
+ srnpu_iommu_write_reg(_iommu->regbase_dla, OFFSET_IOMMU_CTRL, 0x1U);
+ srnpu_iommu_write_reg(_iommu->regbase_dsp, OFFSET_IOMMU_CTRL, 0x1U);
+}
+
+static inline void srnpu_iommu_disable(struct srnpu_iommu *_iommu)
+{
+ /** Disable IOMMU */
+ srnpu_iommu_write_reg(_iommu->regbase_dla, OFFSET_IOMMU_CTRL, 0x0U);
+ srnpu_iommu_write_reg(_iommu->regbase_dsp, OFFSET_IOMMU_CTRL, 0x0U);
+}
+
+static inline uint32_t srnpu_iommu_get_slpte(phys_addr_t paddr, uint32_t nents)
+{
+ phys_addr_t mask_ppn;
+ uint32_t mask_map;
+ uint32_t lshft_ppn;
+ uint32_t rshft_ppn;
+ uint32_t val;
+
+ switch (nents) {
+ case 1:
+ mask_map = MASK_SLPTE_MAP_4K;
+ mask_ppn = MASK_SLPTE_PPN_MASK_4K;
+ rshft_ppn = RSHFT_SLPTE_PPN_4K;
+ lshft_ppn = LSHFT_SLPTE_PPN_4K;
+ break;
+ case 16:
+ mask_map = MASK_SLPTE_MAP_64K;
+ mask_ppn = MASK_SLPTE_PPN_MASK_64K;
+ rshft_ppn = RSHFT_SLPTE_PPN_64K;
+ lshft_ppn = LSHFT_SLPTE_PPN_64K;
+ break;
+ default:
+ return 0;
+ }
+
+ val = 0;
+ val |= mask_map;
+ paddr &= mask_ppn;
+ val |= ((paddr >> rshft_ppn) << lshft_ppn);
+ val |= MASK_PTE_MAPPED;
+
+ return val;
+}
+
+static inline uint32_t srnpu_iommu_get_flpte(phys_addr_t paddr, uint32_t nents)
+{
+ phys_addr_t mask_ppn;
+ uint32_t mask_map;
+ uint32_t lshft_ppn;
+ uint32_t rshft_ppn;
+ uint32_t val;
+
+ switch (nents) {
+ case 1:
+ mask_map = MASK_FLPTE_MAP_1M;
+ mask_ppn = MASK_FLPTE_PPN_MASK_1M;
+ rshft_ppn = RSHFT_FLPTE_PPN_1M;
+ lshft_ppn = LSHFT_FLPTE_PPN_1M;
+ break;
+ case 2:
+ mask_map = MASK_FLPTE_MAP_2M;
+ mask_ppn = MASK_FLPTE_PPN_MASK_2M;
+ rshft_ppn = RSHFT_FLPTE_PPN_2M;
+ lshft_ppn = LSHFT_FLPTE_PPN_2M;
+ break;
+ case 16:
+ mask_map = MASK_FLPTE_MAP_16M;
+ mask_ppn = MASK_FLPTE_PPN_MASK_16M;
+ rshft_ppn = RSHFT_FLPTE_PPN_16M;
+ lshft_ppn = LSHFT_FLPTE_PPN_16M;
+ break;
+ default:
+ return 0;
+ }
+
+ val = 0;
+ val |= mask_map;
+ paddr &= mask_ppn;
+ val |= ((paddr >> rshft_ppn) << lshft_ppn);
+ val |= MASK_PTE_MAPPED;
+
+ return val;
+}
+static inline uint32_t flpte_to_slpt_base(const uint32_t flpte)
+{
+ uint32_t ret = 0;
+
+ ret = flpte & MASK_FLPTE_TO_SLPT_BASE;
+ ret >>= RSHFT_FLPTE_TO_SLPT_BASE;
+ ret <<= LSHFT_FLPTE_TO_SLPT_BASE;
+
+ return ret;
+}
+
+static inline void *alloc_slpt_and_get_flpte(uint32_t *flpte)
+{
+ phys_addr_t pa;
+ void *slpt;
+
+ slpt = kmem_cache_zalloc(srnpu_iommu_slpt_cache, GFP_ATOMIC);
+ if (!slpt)
+ return ERR_PTR(-ENOMEM);
+
+ pa = virt_to_phys(slpt);
+ if (!IS_ALIGNED(pa, SZ_1K)) {
+ kmem_cache_free(srnpu_iommu_slpt_cache, slpt);
+ return ERR_PTR(-EINVAL);
+ }
+
+ pa &= MASK_SLPT_BASE_TO_FLPTE;
+ pa >>= RSHFT_SLPT_BASE_TO_FLPTE;
+ pa <<= LSHFT_SLPT_BASE_TO_FLPTE;
+
+ *flpte = (uint32_t)pa;
+
+ return slpt;
+}
+
+static void release_slpt(struct kref *kref)
+{
+ BUG_ON(kref_read(kref));
+}
+
+static void flush_dcache_area(void *addr, size_t len)
+{
+#ifdef CONFIG_ARM
+ __cpuc_flush_dcache_area(addr, len);
+#else
+ __flush_dcache_area(addr, len);
+#endif
+}
+
+static int srnpu_iommu_map_internal(unsigned long iova, phys_addr_t paddr,
+ size_t size, int prot)
+{
+ uint32_t vpn = iova_to_vpn(iova);
+ uint32_t *pte;
+ uint32_t num_pte;
+ uint32_t val;
+ uint32_t i;
+ int err = 0;
+
+ pte = &(((uint32_t *)srnpu_iommu_flpt)[vpn]);
+
+ switch (size) {
+ case SZ_4K:
+ case SZ_64K: {
+ uint32_t flpte;
+ uint32_t lv2_vpn;
+ uint32_t *slpt;
+
+ if (*pte & MASK_PTE_MAPPED) {
+ slpt = phys_to_virt(flpte_to_slpt_base(*pte));
+ kref_get(&srnpu_iommu_slpt_refcnts[vpn]);
+ } else {
+ slpt = alloc_slpt_and_get_flpte(&flpte);
+ if (IS_ERR(slpt)) {
+ pr_err("%s: Failed to alloc slpt (iova 0x%lx, paddr 0x%lx)\n",
+ __func__, iova, (unsigned long)paddr);
+ return PTR_ERR(slpt);
+ }
+ /*bitlock here */
+ kref_init(&srnpu_iommu_slpt_refcnts[vpn]);
+ *pte = (flpte | MASK_PTE_MAPPED);
+ flush_dcache_area(pte, sizeof(*pte));
+ }
+
+ lv2_vpn = iova_to_lv2vpn(iova);
+ slpt = &slpt[lv2_vpn];
+
+ num_pte = size >> RSHFT_SIZE_TO_NUM_LV2PTE;
+ val = srnpu_iommu_get_slpte(paddr, num_pte);
+ if (!(val & MASK_PTE_MAPPED)) {
+ pr_err("%s: Failed to check slpt (iova 0x%lx, paddr 0x%lx)\n",
+ __func__, iova, (unsigned long)paddr);
+ return -EINVAL;
+ }
+
+ for (i = 0; i < num_pte; ++i) {
+ slpt[i] = val;
+ paddr = paddr + SZ_4K;
+ }
+ flush_dcache_area(slpt, sizeof(*slpt) * num_pte);
+
+ break;
+ }
+ case SZ_1M:
+ case SZ_2M:
+ case SZ_16M:
+ if (*pte & MASK_PTE_MAPPED) {
+ pr_err("%s: iova 0x%lx is already mapped to phys 0x%lx\n",
+ __func__, iova, (unsigned long)paddr);
+ return -EADDRINUSE;
+ }
+
+ num_pte = size >> RSHFT_SIZE_TO_NUM_PTE;
+ val = srnpu_iommu_get_flpte(paddr, num_pte);
+ if (!(val & MASK_PTE_MAPPED)) {
+ pr_err("%s: Failed to check flpt (iova 0x%lx, paddr 0x%lx)\n",
+ __func__, iova, (unsigned long)paddr);
+ return -EINVAL;
+ }
+
+ /** @todo: alignment check here */
+ for (i = 0; i < num_pte; ++i) {
+ pte[i] = val;
+ paddr = paddr + SZ_1M;
+ }
+ flush_dcache_area(pte, sizeof(*pte) * num_pte);
+
+ break;
+ default:
+ err = -EINVAL;
+ break;
+ }
+
+ return err;
+}
+
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(5, 5, 0))
+static int srnpu_iommu_map(struct iommu_domain *domain, unsigned long iova,
+ phys_addr_t paddr, size_t size, int prot)
+#else
+static int srnpu_iommu_map(struct iommu_domain *domain, unsigned long iova,
+ phys_addr_t paddr, size_t size, int prot, gfp_t gfp)
+#endif
+{
+ struct srnpu_iommu_domain *_domain = domain_to_srnpu_domain(domain);
+ unsigned long flags;
+ int err = 0;
+
+ if (!_domain)
+ return -ENODEV;
+
+ spin_lock_irqsave(&_domain->lock, flags);
+ err = srnpu_iommu_map_internal(iova, paddr, size, prot);
+ spin_unlock_irqrestore(&_domain->lock, flags);
+
+ return err;
+}
+
+static size_t srnpu_iommu_unmap_internal(unsigned long iova, size_t size)
+{
+ uint32_t vpn = iova_to_vpn(iova);
+ size_t unmapped = 0;
+ uint32_t *pte;
+ uint32_t num_pte;
+ uint32_t i;
+
+ pte = &(((uint32_t *)srnpu_iommu_flpt)[vpn]);
+
+ if (CHECK_BITS(*pte, MASK_FLPTE_MAP_16M) ||
+ CHECK_BITS(*pte, MASK_FLPTE_MAP_2M) ||
+ CHECK_BITS(*pte, MASK_FLPTE_MAP_1M)) {
+ num_pte = size >> RSHFT_SIZE_TO_NUM_PTE;
+ for (i = 0; i < num_pte; ++i) {
+ if (!((pte[i]) & MASK_PTE_MAPPED)) {
+ pr_err("%s: Unmapped flpt (iova 0x%lx)\n",
+ __func__, iova);
+ }
+
+ pte[i] ^= MASK_PTE_MAPPED;
+ unmapped += SZ_1M;
+ }
+ flush_dcache_area(pte, sizeof(*pte) * num_pte);
+ } else {
+ void *slpt_base = phys_to_virt(flpte_to_slpt_base(*pte));
+ uint32_t lv2vpn = iova_to_lv2vpn(iova);
+ uint32_t *slpt;
+
+ slpt = &((uint32_t *)slpt_base)[lv2vpn];
+ if (*slpt & MASK_SLPTE_MAP_64K)
+ size = SZ_64K;
+ else
+ size = SZ_4K;
+
+ num_pte = size >> RSHFT_SIZE_TO_NUM_LV2PTE;
+
+ for (i = 0; i < num_pte; ++i) {
+ if (!((slpt[i]) & MASK_PTE_MAPPED))
+ pr_err("%s: Unmapped slpt (iova 0x%lx)\n",
+ __func__, iova);
+
+ slpt[i] ^= MASK_PTE_MAPPED;
+ unmapped += SZ_4K;
+ }
+ flush_dcache_area(slpt, sizeof(*slpt) * num_pte);
+
+ if (kref_put(&srnpu_iommu_slpt_refcnts[vpn], release_slpt)) {
+ kmem_cache_free(srnpu_iommu_slpt_cache, slpt_base);
+ *pte ^= MASK_PTE_MAPPED;
+ flush_dcache_area(pte, sizeof(*pte));
+ }
+ }
+
+ return unmapped;
+}
+
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(5, 4, 0))
+static size_t srnpu_iommu_unmap(struct iommu_domain *domain, unsigned long iova,
+ size_t size)
+#else
+static size_t srnpu_iommu_unmap(struct iommu_domain *domain, unsigned long iova,
+ size_t size, struct iommu_iotlb_gather *gather)
+#endif
+{
+ struct srnpu_iommu_domain *_domain = domain_to_srnpu_domain(domain);
+ unsigned long flags;
+ size_t unmapped = 0;
+
+ if (!_domain)
+ return 0;
+
+ spin_lock_irqsave(&_domain->lock, flags);
+ while (unmapped < size)
+ unmapped += srnpu_iommu_unmap_internal(iova + unmapped,
+ size - unmapped);
+ spin_unlock_irqrestore(&_domain->lock, flags);
+
+ return unmapped;
+}
+
+static dma_addr_t srnpu_iommu_get_daddr(u32 first, u32 second)
+{
+ return (first << RSHFT_SIZE_TO_NUM_PTE) +
+ (second << RSHFT_SIZE_TO_NUM_LV2PTE);
+}
+
+static void srnpu_iommu_dump_page_table(struct device *dev)
+{
+ u32 i, *flpt_base = srnpu_iommu_flpt;
+
+ dev_warn(dev, "IOMMU PAGE TABLE DUMP");
+ for (i = 0; i < FLPT_NUM_PTES; i++) {
+ u32 ppn, pte = flpt_base[i];
+ if (!(pte & MASK_PTE_MAPPED))
+ continue;
+
+ if (CHECK_BITS(pte, MASK_FLPTE_MAP_16M)) {
+ ppn = (pte >> LSHFT_FLPTE_PPN_16M);
+ dev_warn(
+ dev,
+ "\t[%u] DADDR (0x%08lx): 16M PTE(0x%08x) PADDR (0x%08x)",
+ i, (unsigned long)srnpu_iommu_get_daddr(i, 0),
+ pte, ppn << RSHFT_FLPTE_PPN_16M);
+ } else if (CHECK_BITS(pte, MASK_FLPTE_MAP_2M)) {
+ ppn = (pte >> LSHFT_FLPTE_PPN_2M);
+ dev_warn(
+ dev,
+ "\t[%u] DADDR (0x%08lx): 2M PTE(0x%08x) PADDR (0x%08x)",
+ i, (unsigned long)srnpu_iommu_get_daddr(i, 0),
+ pte, ppn << RSHFT_FLPTE_PPN_2M);
+ } else if (CHECK_BITS(pte, MASK_FLPTE_MAP_1M)) {
+ ppn = (pte >> LSHFT_FLPTE_PPN_1M);
+ dev_warn(
+ dev,
+ "\t[%u] DADDR (0x%08lx): 1M PTE(0x%08x) PADDR (0x%08x)",
+ i, (unsigned long)srnpu_iommu_get_daddr(i, 0),
+ pte, ppn << RSHFT_FLPTE_PPN_1M);
+ } else {
+ u32 slpt_base = flpte_to_slpt_base(pte);
+ u32 *slpt_base_vaddr = phys_to_virt(slpt_base);
+ u32 j;
+
+ dev_warn(
+ dev,
+ "\t[%u] DADDR (0x%08lx): 4K/64K PTE(0x%08x) SLPT_BASE (0x%08x)",
+ i, (unsigned long)srnpu_iommu_get_daddr(i, 0),
+ pte, slpt_base);
+
+ for (j = 0; j < SLPT_NUM_PTES; j++) {
+ pte = slpt_base_vaddr[j];
+ if (!(pte & MASK_PTE_MAPPED))
+ continue;
+
+ if (CHECK_BITS(pte, MASK_SLPTE_MAP_64K)) {
+ ppn = (pte >> LSHFT_SLPTE_PPN_64K);
+ dev_warn(
+ dev,
+ "\t\t[%u] DADDR (0x%08lx): 64K SLPT PTE (0x%08x) PADDR (0x%08x)",
+ j,
+ (unsigned long)
+ srnpu_iommu_get_daddr(
+ i, j),
+ pte,
+ ppn << RSHFT_SLPTE_PPN_64K);
+ } else {
+ ppn = (pte >> LSHFT_SLPTE_PPN_4K);
+ dev_warn(
+ dev,
+ "\t\t[%u] DADDR (0x%08lx): 4K SLPT PTE (0x%08x) PADDR (0x%08x)",
+ j,
+ (unsigned long)
+ srnpu_iommu_get_daddr(
+ i, j),
+ pte, ppn << RSHFT_SLPTE_PPN_4K);
+ }
+ }
+ }
+ }
+}
+
+static int srnpu_iommu_fault_mmreg(struct device *dev, void __iomem *mmreg,
+ bool dla)
+{
+ const char *type = dla ? "DLA" : "DSP";
+ unsigned int ICH_FAULT, OCH_FAULT;
+ int ret = 0;
+
+ ICH_FAULT = ioread32(mmreg + OFFSET_IOMMU_ICH_FAULT_STATUS);
+ if (ICH_FAULT & FLPT_PAGE_FAULT) {
+ ret++;
+ dev_err(dev,
+ "[%s][IN] First-level page table fault detected at 0x%x",
+ type, ioread32(mmreg + OFFSET_IOMMU_ICH_FAULT_VA));
+ }
+ if (ICH_FAULT & SLPT_PAGE_FAULT) {
+ ret++;
+ dev_err(dev,
+ "[%s][IN] Second-level page table fault detected at 0x%x",
+ type, ioread32(mmreg + OFFSET_IOMMU_ICH_FAULT_VA));
+ }
+ if (ICH_FAULT & PTW_ACCESS_FAULT) {
+ ret++;
+ dev_err(dev, "[%s][IN] Page Table Walker (PTW) access fault",
+ type);
+ }
+ if (ICH_FAULT & ATU_ACCESS_FAULT) {
+ ret++;
+ dev_err(dev,
+ "[%s][IN] Address Translation Unit (PTU) access fault",
+ type);
+ }
+
+ OCH_FAULT = ioread32(mmreg + OFFSET_IOMMU_OCH_FAULT_STATUS);
+ if (OCH_FAULT & FLPT_PAGE_FAULT) {
+ ret++;
+ dev_err(dev,
+ "[%s][OUT] First-level page table fault detected at 0x%x",
+ type, ioread32(mmreg + OFFSET_IOMMU_OCH_FAULT_VA));
+ }
+ if (OCH_FAULT & SLPT_PAGE_FAULT) {
+ ret++;
+ dev_err(dev,
+ "[%s][OUT] Second-level page table fault detected at 0x%x",
+ type, ioread32(mmreg + OFFSET_IOMMU_OCH_FAULT_VA));
+ }
+ if (OCH_FAULT & PTW_ACCESS_FAULT) {
+ ret++;
+ dev_err(dev, "[%s][OUT] Page Table Walker (PTW) access fault",
+ type);
+ }
+ if (OCH_FAULT & ATU_ACCESS_FAULT) {
+ ret++;
+ dev_err(dev,
+ "[%s][OUT] Address Translation Unit (PTU) access fault",
+ type);
+ }
+
+ return ret;
+}
+
+static int srnpu_iommu_fault_handler(struct iommu_domain *domain,
+ struct device *dev, unsigned long iova,
+ int flags, void *token)
+{
+ struct srnpu_iommu *_iommu;
+ int verbose = flags;
+ int num_fault = 0;
+
+ if (!dev)
+ return -EINVAL;
+
+ _iommu = dev_to_srnpu_iommu(dev);
+ if (!_iommu)
+ return -EINVAL;
+
+ num_fault += srnpu_iommu_fault_mmreg(dev, _iommu->regbase_dla, true);
+ num_fault += srnpu_iommu_fault_mmreg(dev, _iommu->regbase_dsp, false);
+
+ if (verbose > 0 && num_fault > 0)
+ srnpu_iommu_dump_page_table(dev);
+
+ return num_fault;
+}
+
+static struct iommu_domain *srnpu_iommu_domain_alloc(unsigned type)
+{
+ struct srnpu_iommu_domain *_domain;
+
+#ifdef CONFIG_ARM64
+ if ((type != IOMMU_DOMAIN_UNMANAGED) && (type != IOMMU_DOMAIN_DMA))
+#else
+ if (type != IOMMU_DOMAIN_UNMANAGED)
+#endif
+ return NULL;
+
+ _domain = kzalloc(sizeof(*_domain), GFP_KERNEL);
+ if (!_domain)
+ return NULL;
+
+ spin_lock_init(&_domain->lock);
+
+#ifdef CONFIG_ARM64
+ if (type == IOMMU_DOMAIN_DMA && iommu_get_dma_cookie(&_domain->domain))
+ return NULL;
+#endif
+
+ _domain->flpt = srnpu_iommu_flpt;
+
+ iommu_set_fault_handler(&_domain->domain, srnpu_iommu_fault_handler,
+ NULL);
+
+ return &_domain->domain;
+}
+
+static void srnpu_iommu_domain_free(struct iommu_domain *domain)
+{
+ struct srnpu_iommu_domain *_domain = domain_to_srnpu_domain(domain);
+
+ if (!_domain)
+ return;
+
+ kfree(_domain);
+}
+
+static int srnpu_iommu_attach_dev(struct iommu_domain *domain,
+ struct device *dev)
+{
+ struct srnpu_iommu *_iommu;
+
+ _iommu = dev_to_srnpu_iommu(dev);
+ if (!_iommu)
+ return -ENODEV;
+
+ srnpu_iommu_enable(_iommu);
+
+ return 0;
+}
+
+static void srnpu_iommu_detach_dev(struct iommu_domain *domain,
+ struct device *dev)
+{
+ struct srnpu_iommu *_iommu;
+
+ _iommu = dev_to_srnpu_iommu(dev);
+ if (!_iommu)
+ return;
+
+ srnpu_iommu_disable(_iommu);
+}
+
+#ifdef CONFIG_ARM64
+static struct iommu_group *srnpu_iommu_device_group(struct device *dev)
+{
+ struct srnpu_iommu_drvdata *_data;
+
+ _data = dev_to_srnpu_iommu_drvdata(dev);
+ if (!_data)
+ return ERR_PTR(-ENODEV);
+
+ if (!_data->group) {
+ _data->group = iommu_group_alloc();
+ if (IS_ERR(_data->group)) {
+ dev_err(dev,
+ "failed to allocate a srnpu IOMMU group\n");
+ return _data->group;
+ }
+ } else {
+ iommu_group_ref_get(_data->group);
+ }
+
+ return _data->group;
+}
+#endif
+
+static struct iommu_ops srnpu_iommu_ops;
+static struct platform_driver srnpu_iommu_driver;
+
+static int srnpu_iommu_xlate(struct device *dev, struct of_phandle_args *spec)
+{
+ struct platform_device *pdev;
+ struct srnpu_iommu_drvdata *_data;
+ struct iommu_fwspec *fwspec;
+
+ pdev = of_find_device_by_node(spec->np);
+ if (!pdev)
+ return -ENODEV;
+
+ _data = platform_get_drvdata(pdev);
+ if (!_data)
+ return -ENODEV;
+
+#ifdef CONFIG_ARM
+ if (!_data->mapping) {
+ struct dma_iommu_mapping *mapping;
+
+ /* Support 2GB iova address space */
+ mapping = arm_iommu_create_mapping(&platform_bus_type, SZ_1G,
+ SZ_2G);
+ if (IS_ERR(mapping)) {
+ return PTR_ERR(mapping);
+ }
+
+ _data->mapping = mapping;
+ }
+#endif
+
+ fwspec = dev_to_iommu_fwspec(dev);
+ if (!fwspec) {
+ int err = iommu_fwspec_init(dev, &spec->np->fwnode,
+ &srnpu_iommu_ops);
+
+ if (err)
+ return err;
+
+ fwspec = dev_to_iommu_fwspec(dev);
+ } else if (fwspec->ops != &srnpu_iommu_ops) {
+ return -EINVAL;
+ }
+
+ if (!dev_to_srnpu_iommu(dev)) {
+ struct srnpu_iommu *_iommu;
+
+ _iommu = kzalloc(sizeof(*_iommu), GFP_KERNEL);
+ if (!_iommu)
+ return -ENOMEM;
+
+ _iommu->data = _data;
+ _iommu->dev = dev;
+
+ dev_set_srnpu_iommu(dev, _iommu);
+ }
+
+ return iommu_fwspec_add_ids(dev, spec->args, 1);
+}
+
+static void srnpu_iommu_setup_mmreg(struct device *dev)
+{
+ struct srnpu_iommu *_iommu;
+ struct device_node *np;
+ struct resource mmreg;
+ phys_addr_t paddr, base;
+
+ _iommu = dev_to_srnpu_iommu(dev);
+ if (!_iommu)
+ return;
+
+ paddr = virt_to_phys(srnpu_iommu_flpt);
+ np = dev->of_node;
+ if (of_address_to_resource(np, MMREG_CBOX_IDX, &mmreg) < 0) {
+ dev_err(dev, "Unable to find CBOX mmreg\n");
+ return;
+ }
+ base = mmreg.start;
+
+ mmreg.start = base + OFFSET_CBOX_IOMMU_DLA;
+ mmreg.end = mmreg.start + LENGTH_CBOX_IOMMU - 1;
+
+ _iommu->regbase_dla = devm_ioremap_resource(dev, &mmreg);
+ if (IS_ERR(_iommu->regbase_dla))
+ dev_err(dev, "Unable to ioremap DLA mmreg\n");
+
+ mmreg.start = base + OFFSET_CBOX_IOMMU_DSP;
+ mmreg.end = mmreg.start + LENGTH_CBOX_IOMMU - 1;
+
+ _iommu->regbase_dsp = devm_ioremap_resource(dev, &mmreg);
+ if (IS_ERR(_iommu->regbase_dsp))
+ dev_err(dev, "Unable to ioremap DSP mmreg\n");
+}
+
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(5, 8, 0))
+static int srnpu_iommu_add_device(struct device *dev)
+{
+ struct iommu_fwspec *fwspec = dev_to_iommu_fwspec(dev);
+ struct srnpu_iommu_drvdata *_data;
+ struct iommu_group *group;
+ int err;
+
+ if (!fwspec || fwspec->ops != &srnpu_iommu_ops)
+ return -ENODEV;
+
+ _data = dev_to_srnpu_iommu_drvdata(dev);
+ err = iommu_device_link(&_data->iommu, dev);
+ if (err < 0)
+ return -ENODEV;
+
+ srnpu_iommu_setup_mmreg(dev);
+
+#ifdef CONFIG_ARM
+ /* FIXME: ARM DMA code doesn't understand multi-device groups */
+ group = iommu_group_alloc();
+ if (IS_ERR(group)) {
+ err = PTR_ERR(group);
+ goto err_unlink;
+ }
+
+ err = iommu_group_add_device(group, dev);
+ iommu_group_put(group);
+ if (err)
+ goto err_unlink;
+
+ err = arm_iommu_attach_device(dev, _data->mapping);
+ if (err) {
+ iommu_group_remove_device(dev);
+ goto err_unlink;
+ }
+#else
+ group = iommu_group_get_for_dev(dev);
+ if (IS_ERR(group)) {
+ err = PTR_ERR(group);
+ goto err_unlink;
+ }
+ iommu_group_put(group);
+#endif
+ return 0;
+
+err_unlink:
+ iommu_device_unlink(&_data->iommu, dev);
+ return err;
+}
+
+static void srnpu_iommu_remove_device(struct device *dev)
+{
+ struct iommu_fwspec *fwspec = dev_to_iommu_fwspec(dev);
+ struct srnpu_iommu_drvdata *_data;
+
+ if (!fwspec || fwspec->ops != &srnpu_iommu_ops)
+ return;
+
+ _data = dev_to_srnpu_iommu_drvdata(dev);
+ iommu_device_unlink(&_data->iommu, dev);
+ iommu_group_remove_device(dev);
+ iommu_fwspec_free(dev);
+}
+#else
+static struct iommu_device *srnpu_iommu_probe_device(struct device *dev)
+{
+ struct iommu_fwspec *fwspec = dev_to_iommu_fwspec(dev);
+ struct srnpu_iommu_drvdata *_data;
+
+ if (!fwspec || fwspec->ops != &srnpu_iommu_ops)
+ return ERR_PTR(-ENODEV);
+
+ _data = dev_to_srnpu_iommu_drvdata(dev);
+ if (!_data)
+ return ERR_PTR(-EINVAL);
+
+ srnpu_iommu_setup_mmreg(dev);
+ return &_data->iommu;
+}
+
+#ifdef CONFIG_ARM
+static void srnpu_iommu_probe_finalize(struct device *dev)
+{
+ struct srnpu_iommu_drvdata *_data;
+ struct dma_iommu_mapping *mapping;
+
+ _data = dev_to_srnpu_iommu_drvdata(dev);
+ if (!_data || !_data->mapping)
+ return;
+
+ mapping = _data->mapping;
+ if (!arm_iommu_attach_device(dev, mapping))
+ dev_err(dev, "Unable to attach iommu mapping to the device");
+}
+#endif
+
+static void srnpu_iommu_release_device(struct device *dev)
+{
+ struct iommu_fwspec *fwspec = dev_to_iommu_fwspec(dev);
+
+ if (!fwspec || fwspec->ops != &srnpu_iommu_ops)
+ return;
+
+ iommu_fwspec_free(dev);
+}
+
+#ifdef CONFIG_ARM
+static int srnpu_iommu_def_domain_type(struct device *dev)
+{
+ return IOMMU_DOMAIN_UNMANAGED;
+}
+#endif
+#endif
+
+static phys_addr_t srnpu_iommu_iova_to_phys(struct iommu_domain *domain,
+ dma_addr_t iova)
+{
+ struct srnpu_iommu_domain *_domain;
+ phys_addr_t paddr = 0;
+ uint32_t vpn;
+ uint32_t *pte;
+
+ _domain = domain_to_srnpu_domain(domain);
+ if (!_domain)
+ return 0;
+
+ vpn = iova_to_vpn(iova);
+ pte = &_domain->flpt[vpn];
+
+ if (!pte || !(*pte & MASK_PTE_MAPPED))
+ return 0;
+
+ if (CHECK_BITS(*pte, MASK_FLPTE_MAP_16M) ||
+ CHECK_BITS(*pte, MASK_FLPTE_MAP_2M) ||
+ CHECK_BITS(*pte, MASK_FLPTE_MAP_1M)) {
+ paddr |= *pte;
+ if (CHECK_BITS(*pte, MASK_FLPTE_MAP_16M)) {
+ paddr >>= LSHFT_FLPTE_PPN_16M;
+ paddr <<= RSHFT_FLPTE_PPN_16M;
+ paddr |= (iova & MASK_IOVA_TO_PAGEOFFSET_16M);
+ } else if (CHECK_BITS(*pte, MASK_FLPTE_MAP_2M)) {
+ paddr >>= LSHFT_FLPTE_PPN_2M;
+ paddr <<= RSHFT_FLPTE_PPN_2M;
+ paddr |= (iova & MASK_IOVA_TO_PAGEOFFSET_2M);
+ } else {
+ paddr >>= LSHFT_FLPTE_PPN_1M;
+ paddr <<= RSHFT_FLPTE_PPN_1M;
+ paddr |= (iova & MASK_IOVA_TO_PAGEOFFSET_1M);
+ }
+ } else {
+ uint32_t *slpt_base = phys_to_virt(flpte_to_slpt_base(*pte));
+ uint32_t lv2vpn = iova_to_lv2vpn(iova);
+
+ paddr |= slpt_base[lv2vpn];
+ if (CHECK_BITS(*pte, MASK_SLPTE_MAP_64K)) {
+ paddr >>= LSHFT_SLPTE_PPN_64K;
+ paddr <<= RSHFT_SLPTE_PPN_64K;
+ paddr &= MASK_SLPTE_PPN_MASK_64K;
+ paddr |= (iova & MASK_IOVA_TO_PAGEOFFSET_64K);
+ } else {
+ paddr >>= LSHFT_SLPTE_PPN_4K;
+ paddr <<= RSHFT_SLPTE_PPN_4K;
+ paddr &= MASK_SLPTE_PPN_MASK_4K;
+ paddr |= (iova & MASK_IOVA_TO_PAGEOFFSET_4K);
+ }
+ }
+
+ return paddr;
+}
+
+static void srnpu_iommu_get_resv_regions(struct device *dev,
+ struct list_head *head)
+{
+ struct srnpu_iommu_drvdata *_data;
+ struct iommu_resv_region *region;
+
+ _data = dev_to_srnpu_iommu_drvdata(dev);
+ region = iommu_alloc_resv_region(_data->resv.paddr, _data->resv.size,
+ IOMMU_WRITE | IOMMU_READ,
+ IOMMU_RESV_DIRECT);
+ if (!region)
+ return;
+
+ list_add_tail(®ion->list, head);
+}
+
+static void srnpu_iommu_put_resv_regions(struct device *dev,
+ struct list_head *head)
+{
+ struct iommu_resv_region *entry, *next;
+
+ list_for_each_entry_safe (entry, next, head, list)
+ kfree(entry);
+}
+
+static struct iommu_ops srnpu_iommu_ops = {
+ .domain_alloc = srnpu_iommu_domain_alloc,
+ .domain_free = srnpu_iommu_domain_free,
+ .attach_dev = srnpu_iommu_attach_dev,
+ .detach_dev = srnpu_iommu_detach_dev,
+ .map = srnpu_iommu_map,
+ .unmap = srnpu_iommu_unmap,
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 19, 0))
+ .map_sg = default_iommu_map_sg,
+#endif
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(5, 8, 0))
+ .add_device = srnpu_iommu_add_device,
+ .remove_device = srnpu_iommu_remove_device,
+#else
+ .probe_device = srnpu_iommu_probe_device,
+ .release_device = srnpu_iommu_release_device,
+#ifdef CONFIG_ARM
+ .probe_finalize = srnpu_iommu_probe_finalize,
+ .def_domain_type = srnpu_iommu_def_domain_type,
+#endif
+#endif
+#ifdef CONFIG_ARM64
+ .device_group = srnpu_iommu_device_group,
+#endif
+ .iova_to_phys = srnpu_iommu_iova_to_phys,
+ .of_xlate = srnpu_iommu_xlate,
+ .get_resv_regions = srnpu_iommu_get_resv_regions,
+ .put_resv_regions = srnpu_iommu_put_resv_regions,
+ .pgsize_bitmap = SRNPU_IOMMU_PGSIZE_BITMAP,
+};
+
+static void srnpu_iommu_set_resv(struct srnpu_iommu_drvdata *_data)
+{
+ struct device_node *np;
+
+ np = of_find_node_by_name(NULL, "sdp_mmap");
+ if (np) {
+ u32 info[3];
+
+ if (of_property_read_u32_array(np, "samsung,npu", info, 3) < 0)
+ return;
+
+ _data->resv.paddr = info[1];
+#ifdef CONFIG_PHYS_ADDR_T_64BIT
+ _data->resv.paddr += ((u64)info[0]) << 32;
+#endif
+ _data->resv.size = info[2];
+ } else {
+ struct resource res;
+
+ np = of_find_node_by_name(NULL, "samsung,npu");
+ if (!np)
+ return;
+
+ if (of_address_to_resource(np, 0, &res) < 0)
+ return;
+
+ _data->resv.paddr = res.start;
+ _data->resv.size = resource_size(&res);
+ }
+}
+
+static int srnpu_iommu_probe(struct platform_device *pdev)
+{
+ struct srnpu_iommu_drvdata *_data;
+ struct device *dev = &pdev->dev;
+ struct device_node *of = dev->of_node;
+ int err;
+
+ _data = devm_kzalloc(dev, sizeof(*_data), GFP_KERNEL);
+ if (!_data)
+ return -ENOMEM;
+
+ _data->dev = dev;
+
+ err = iommu_device_sysfs_add(&_data->iommu, dev, NULL, dev_name(dev));
+ if (err)
+ return err;
+
+ iommu_device_set_ops(&_data->iommu, &srnpu_iommu_ops);
+ iommu_device_set_fwnode(&_data->iommu, &of->fwnode);
+
+ err = iommu_device_register(&_data->iommu);
+ if (err)
+ goto err_sysfs;
+
+ platform_set_drvdata(pdev, _data);
+ srnpu_iommu_set_resv(_data);
+
+ err = bus_set_iommu(&platform_bus_type, &srnpu_iommu_ops);
+ if (err)
+ dev_warn(dev,
+ "failed to set iommu-callbacks for the platform_bus");
+
+ dev_info(dev, "probed\n");
+
+ return 0;
+err_sysfs:
+ iommu_device_sysfs_remove(&_data->iommu);
+
+ return err;
+}
+
+static int srnpu_iommu_remove(struct platform_device *pdev)
+{
+ struct srnpu_iommu_drvdata *_data = platform_get_drvdata(pdev);
+
+ if (!_data)
+ return 0;
+
+ iommu_device_sysfs_remove(&_data->iommu);
+ iommu_device_unregister(&_data->iommu);
+
+ if (iommu_present(&platform_bus_type))
+ bus_set_iommu(&platform_bus_type, NULL);
+
+ dev_info(&pdev->dev, "removed\n");
+
+ return 0;
+}
+
+static const struct of_device_id srnpu_iommu_of_match[] = {
+ { .compatible = "samsung,srnpu-iommu" },
+ { /* sentinel */ }
+};
+
+static struct platform_driver srnpu_iommu_driver = {
+ .probe = srnpu_iommu_probe,
+ .remove = srnpu_iommu_remove,
+ .driver =
+ {
+ .name = "srnpu_iommu",
+ .of_match_table = of_match_ptr(srnpu_iommu_of_match),
+ },
+};
+
+static int __init srnpu_iommu_init(void)
+{
+ struct device_node *np;
+ int ret;
+
+ np = of_find_matching_node(NULL, srnpu_iommu_of_match);
+ if (!np)
+ return 0;
+ of_node_put(np);
+
+ /* Base address of FLPT must be aligned in 16 KB. */
+ srnpu_iommu_flpt = alloc_pages_exact(FLPT_SIZE, GFP_KERNEL);
+ if ((!srnpu_iommu_flpt) ||
+ (!IS_ALIGNED(virt_to_phys(srnpu_iommu_flpt), SZ_16K)))
+ return -ENOMEM;
+ memset(srnpu_iommu_flpt, 0x0, FLPT_SIZE);
+
+ /* Base address of SLPT must be aligned in 1 KB. */
+ srnpu_iommu_slpt_cache =
+ kmem_cache_create("srnpu_iommu_slpt_cache", SLPT_SIZE, BIT(10),
+ SLAB_HWCACHE_ALIGN, NULL);
+ if (!srnpu_iommu_slpt_cache) {
+ ret = -ENOMEM;
+ goto err_cleanup_flpt;
+ }
+
+ return platform_driver_register(&srnpu_iommu_driver);
+
+err_cleanup_flpt:
+ free_pages_exact(srnpu_iommu_flpt, FLPT_SIZE);
+ srnpu_iommu_flpt = NULL;
+
+ return ret;
+}
+subsys_initcall(srnpu_iommu_init);
source "drivers/misc/cardreader/Kconfig"
source "drivers/misc/habanalabs/Kconfig"
source "drivers/misc/uacce/Kconfig"
+source "drivers/misc/trinity/Kconfig"
endmenu
obj-$(CONFIG_XILINX_SDFEC) += xilinx_sdfec.o
obj-$(CONFIG_HISI_HIKEY_USB) += hisi_hikey_usb.o
obj-$(CONFIG_TIZEN_INFORM_REBOOT) += tizen-inform-reboot.o
+obj-$(CONFIG_TRINITY) += trinity/
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0-only
+
+config TRINITY
+ bool "SR Neural Processing Unit Driver"
+ default n
+ help
+ Select this option to enable driver support for Samsung
+ Research (SR) Neural Processing Unit (NPU). This driver works
+ as a base driver of the other drivers for Trinity device family
+ so that this option should be enabled to support Trinity
+ Vision (TRIV), Trinity Vision 2 (TRIV2), and Trinity Audio
+ (TRIA).
+
+config TRINITY_HWMEM
+ bool "Trinity DMA Buffer Manager"
+ depends on TRINITY
+ default n
+ help
+ Choose this option to enable the Trinity DMA buffer manager,
+ used by Trinity device drivers to allocate DMA buffers. This
+ enables userspace programs to allocate DMA buffers via the
+ Trinity device nodes such as /dev/triv-N and /dev/triv2-N.
+
+config TRINITY_SCHED
+ bool "Trinity Task Scheduler"
+ depends on TRINITY
+ default n
+ help
+ Choose this option to enable the Trinity Task Scheduler.
+
+config TRINITY_SCHED_SR
+ bool "Trinity Task Schduler by Samsung Research"
+ depends on TRINITY_SCHED
+ default n
+ help
+ Choose this option to enable SR Task Scheduler via the
+ Trinity Task Scheduler.
+
+config TRINITY_VISION2
+ tristate "SR Tinity Vision 2 Driver"
+ depends on TRINITY && TRINITY_HWMEM
+ default n
+ help
+ Select this option to enable driver support for a Samsung
+ Research (SR) Neural Processing Unit (NPU), Tinity Vision 2.
+ This driver enables userspace system library to access the
+ device via /dev/triv2-N.
+
+config TRINITY_SYSFS
+ bool "Trinity SYSFS support"
+ depends on TRINITY && SYSFS
+ default n
+ help
+ Choose this option to enable the Trinity SYSFS interface.
+
+config TRINITY_DEBUG
+ bool "Debugging capability for trinity drivers including debugfs"
+ depends on TRINITY && DEBUG_FS
+ default n
+ help
+ Select this option to enable debugging support for a Samsung
+ Research (SR) Neural Processing Unit (NPU).
+
+config TRINITY_MONITOR
+ bool "Device status monitor for trinity drivers"
+ depends on TRINITY
+ default n
+ help
+ Select this option to enable device monitor support for a Samsung
+ Research (SR) Neural Processing Unit (NPU).
+
+config TRINITY_FPGA
+ bool "Enable some workaround codes for FPGA envionment"
+ depends on TRINITY_VISION2 && TRINITY_HWMEM
+ default n
+ help
+ Select this option to enable FPGA-workaround codes.
+
+config TRINITY_FPGA_DEBUG
+ bool "Enable debugging mode for FPGA envionment"
+ depends on TRINITY_FPGA
+ default n
+ help
+ Select this option to enable FPGA debugging mode.
+
+choice
+ prompt "Select supported platform"
+ depends on TRINITY
+ default TRINITY_SUPPORT_FVP
+
+config TRINITY_SUPPORT_FVP
+ bool "Arm FVP support"
+
+config TRINITY_SUPPORT_FPGA
+ depends on TRINITY_FGPA
+ bool "Xilinx FPGA support"
+
+endchoice
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0-only
+
+obj-$(CONFIG_TRINITY_VISION2) += trinity_vision2.o
+
+CFLAGS_trinity_trace.o += -I$(src)
+trinity-y := trinity.o trinity_vision2_drv.o trinity_stat.o
+trinity-$(CONFIG_TRINITY_SCHED) += trinity_sched.o
+trinity-$(CONFIG_TRINITY_SCHED_SR) += trinity_sched_sr.o
+trinity-$(CONFIG_TRINITY_HWMEM) += trinity_hwmem.o trinity_resv_mem.o
+trinity-$(CONFIG_TRINITY_FPGA) += trinity_hwmem_iommu_helper.o
+trinity-$(CONFIG_TRINITY_SYSFS) += trinity_sysfs.o
+trinity-$(CONFIG_TRINITY_DEBUG) += trinity_debug.o trinity_trace.o
+trinity-$(CONFIG_TRINITY_MONITOR) += trinity_monitor.o
+trinity-$(CONFIG_TRINITY_SUPPORT_FVP) += support/arm_fvp.o
+trinity-$(CONFIG_TRINITY_SUPPORT_FPGA) += support/xilinx_fpga.o
+
+trinity_vision2-objs := $(trinity-y)
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include "../trinity_common.h"
+#include <linux/of_address.h>
+#include <linux/pm_runtime.h>
+
+int trinity_get_dma_memory(struct device *dev, phys_addr_t *paddr,
+ dma_addr_t *daddr, size_t *size)
+{
+ struct device_node *np;
+ struct resource res;
+ int err;
+
+ if (!dev || !paddr || !daddr || !size)
+ return -EINVAL;
+
+ np = of_find_node_by_name(NULL, "samsung,npu");
+ if (!np)
+ return -ENOENT;
+
+ err = of_address_to_resource(np, 0, &res);
+ if (err < 0)
+ return err;
+
+ *paddr = res.start;
+ *daddr = *paddr; /* direct mapping */
+ *size = resource_size(&res);
+
+ dev_info(dev, "Detected DMA memory region: %lx-%lx",
+ (unsigned long)*paddr, (unsigned long)(*paddr + *size));
+ return 0;
+}
+
+int trinity_get_extern_memory(struct device *dev, phys_addr_t *paddr,
+ dma_addr_t *daddr, size_t *size)
+{
+ return -ENOENT;
+}
+
+int trinity_pm_runtime_init(struct device *dev)
+{
+ return 0;
+}
+
+int trinity_pm_runtime_forbid(struct device *dev)
+{
+ pm_runtime_forbid(dev);
+ return 0;
+}
+
+void trinity_pm_runtime_allow(struct device *dev)
+{
+ pm_runtime_allow(dev);
+}
+
+void trinity_pm_runtime_attach(struct trinity_driver *drv)
+{
+}
+
+void trinity_reset_device(struct device *dev, bool do_test)
+{
+}
+
+void trinity_set_irq_affinity(int irq)
+{
+}
+
+void trinity_monitor_invalid_access(void)
+{
+}
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include "../trinity_common.h"
+#include <linux/pm_runtime.h>
+
+int trinity_get_dma_memory(struct device *dev, phys_addr_t *paddr,
+ dma_addr_t *daddr, size_t *size)
+{
+ struct device_node *np;
+ u64 info[3];
+ int err;
+
+ if (!dev || !paddr || !daddr || !size)
+ return -EINVAL;
+
+ np = of_find_node_by_name(NULL, "trinity");
+ if (!np)
+ return -ENOENT;
+
+ err = of_property_read_u64_array(np, "samsung,dma", info, 3);
+ if (err < 0)
+ return err;
+
+ *paddr = info[0];
+ *daddr = info[1];
+ *size = info[2];
+
+ dev_info(dev, "Detected DMA memory region: %lx-%lx",
+ (unsigned long)*paddr, (unsigned long)(*paddr + *size));
+ return 0;
+}
+
+int trinity_get_extern_memory(struct device *dev, phys_addr_t *paddr,
+ dma_addr_t *daddr, size_t *size)
+{
+ return -ENOENT;
+}
+
+int trinity_pm_runtime_init(struct trinity_driver *drv)
+{
+ return 0;
+}
+
+int trinity_pm_runtime_forbid(struct device *dev)
+{
+ pm_runtime_forbid(dev);
+ return 0;
+}
+
+void trinity_pm_runtime_allow(struct device *dev)
+{
+ pm_runtime_allow(dev);
+}
+
+void trinity_pm_runtime_attach(struct trinity_driver *drv)
+{
+}
+
+void trinity_reset_device(struct device *dev, bool do_test)
+{
+}
+
+void trinity_set_irq_affinity(int irq)
+{
+}
+
+void trinity_monitor_invalid_access(void)
+{
+}
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Base device driver for Samsung Research Trinity device family support
+ *
+ * Copyright (C) 2020 Samsung Electronics
+ * Copyright (C) 2020 Wook Song <wook16.song@samsung.com>
+ * Copyright (C) 2020 Dongju Chae <dongju.chae@samsung.com>
+ */
+
+#include <asm/cacheflush.h>
+#include <linux/bitmap.h>
+#include <linux/device.h>
+#include <linux/dma-buf.h>
+#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/init.h>
+#include <linux/iommu.h>
+#include <linux/kernel.h>
+#include <linux/kobject.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/of_device.h>
+#include <linux/of_iommu.h>
+#include <linux/of_irq.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+#include <linux/stddef.h>
+#include <linux/uaccess.h>
+
+#include "trinity_common.h"
+#include "trinity_monitor.h"
+#include "trinity_resv_mem.h"
+#include "trinity_stat.h"
+#include "trinity_trace.h"
+
+#define BASE_DEV_NAME "trinity"
+
+#ifndef TRUE
+#define TRUE 1
+#endif /* TRUE */
+
+#ifndef FALSE
+#define FALSE 0
+#endif /* TRUE */
+
+#ifdef CONFIG_TRINITY_FPGA
+#define TRINITY_PADDR_BASE (0x400000000)
+#else
+#define TRINITY_PADDR_BASE (0x0)
+#endif
+
+#define TRINITY_MODEL_HASH_BITS 10
+#define TRINITY_MODEL_HASH_SIZE (1 << TRINITY_MODEL_HASH_BITS)
+
+#define TRINITY_DEVVER(drv) (drv->desc->ver >> TRINITY_SHIFT_DEV)
+
+static struct hlist_bl_head trinity_model_node_hlist[TRINITY_MODEL_HASH_SIZE];
+
+/* A global lock for shared static variables such as dev_bitmap */
+static DEFINE_SPINLOCK(trinity_lock);
+
+/* A bitmap to keep track of active Trinity devices */
+static unsigned long dev_bitmap[TRINITY_DEV_END];
+
+static void trinity_model_get(struct trinity_model *model);
+static void trinity_model_put(struct trinity_model *model);
+
+phys_addr_t trinity_get_paddr(struct iommu_domain *domain, dma_addr_t daddr)
+{
+ if (domain)
+ return iommu_iova_to_phys(domain, daddr);
+
+ return TRINITY_PADDR_BASE + daddr;
+}
+
+void trinity_finish_req(struct trinity_driver *drv, struct trinity_req *req)
+{
+ if (drv->desc->check_profile(drv, req) < 0)
+ dev_warn(drv_to_dev_ptr(drv),
+ "Unable to get profile data from NPU\n");
+ trinity_hwmem_import_dmabuf_end(&req->input.import_info);
+ trinity_stat_finish_req(drv, req);
+ trinity_model_put(req->model);
+}
+
+static uint64_t trinity_gen_model_id(int32_t dbuf_fd)
+{
+ static uint32_t id;
+ uint64_t mid = 0;
+
+ spin_lock(&trinity_lock);
+ if (++id >= (1 << TRINITY_SHIFT_MODEL_ID))
+ id = 0;
+ mid = id;
+ spin_unlock(&trinity_lock);
+
+ mid |= (dbuf_fd << TRINITY_SHIFT_MODEL_ID);
+
+ return mid;
+}
+
+static int32_t trinity_model_id_to_dbuf_fd(uint64_t id)
+{
+ return (id >> TRINITY_SHIFT_MODEL_ID) & UINT_MAX;
+}
+
+static void trinity_model_htable_init(void)
+{
+ int i;
+
+ for (i = 0; i < TRINITY_MODEL_HASH_SIZE; ++i)
+ INIT_HLIST_BL_HEAD(&trinity_model_node_hlist[i]);
+}
+
+void trinity_init_model_htable(const struct trinity_driver *drv,
+ struct trinity_model_htable *ht)
+{
+ ht->ht_heads = trinity_model_node_hlist;
+ ht->hash_size = TRINITY_MODEL_HASH_SIZE;
+ ht->hash_bits = TRINITY_MODEL_HASH_BITS;
+}
+
+static struct trinity_model *
+trinity_get_model_by_id(const struct trinity_driver *drv, const uint64_t id)
+{
+ struct trinity_model_htable ht;
+ struct hlist_bl_node *hn;
+ struct trinity_model *hm;
+ unsigned long key;
+ int32_t dbuf_fd;
+ bool found = false;
+
+ trinity_init_model_htable(drv, &ht);
+
+ dbuf_fd = trinity_model_id_to_dbuf_fd(id);
+ key = hash_long(dbuf_fd, ht.hash_bits);
+ hm = NULL;
+
+ hlist_bl_lock(&(ht.ht_heads[key]));
+ hlist_bl_for_each_entry (hm, hn, &(ht.ht_heads[key]), hnode) {
+ if (hm->config.id == id) {
+ found = true;
+ break;
+ }
+ }
+ hlist_bl_unlock(&(ht.ht_heads[key]));
+
+ return found ? hm : NULL;
+}
+
+/**
+ * trinity_register_model - Registers a model to the internal hashtable. Note
+ * that the device is responsible for the hashtable maintainance.
+ *
+ * @drv: An instance of the trinity driver
+ * @model: Model information to be registered
+ *
+ * Returns 0 and sets model->id with a valid value, which is unique system-wide,
+ * on success. Ohterwise, returns negative error.
+ */
+int32_t trinity_register_model(struct trinity_driver *drv,
+ struct trinity_model *model)
+{
+ struct trinity_model_htable ht;
+ unsigned long key;
+ int32_t ret;
+
+ ret = trinity_hwmem_import_dmabuf_begin(drv_to_dev_ptr(drv),
+ model->config.dbuf_fd,
+ &model->import_info);
+ if (ret)
+ return ret;
+
+#ifdef ARM
+ /* sync model program data */
+ __cpuc_flush_dcache_area(model->import_info.addr,
+ model->import_info.buf->size);
+#endif
+
+ model->config.id = trinity_gen_model_id(model->config.dbuf_fd);
+ model->owner_id = trinity_get_app_id();
+
+ INIT_HLIST_BL_NODE(&model->hnode);
+
+ trinity_init_model_htable(drv, &ht);
+
+ key = hash_long(model->config.dbuf_fd, ht.hash_bits);
+
+ hlist_bl_lock(&(ht.ht_heads[key]));
+ hlist_bl_add_head(&model->hnode, &ht.ht_heads[key]);
+ hlist_bl_unlock(&(ht.ht_heads[key]));
+
+ kref_init(&model->refcnt);
+
+ return 0;
+}
+
+static void trinity_destroy_model(struct kref *refcnt)
+{
+ struct trinity_model *model =
+ container_of(refcnt, struct trinity_model, refcnt);
+
+ trinity_hwmem_import_dmabuf_end(&model->import_info);
+ kfree(model);
+}
+
+static void trinity_model_get(struct trinity_model *model)
+{
+ if (!model)
+ return;
+
+ kref_get(&model->refcnt);
+}
+
+static void trinity_model_put(struct trinity_model *model)
+{
+ if (!model)
+ return;
+
+ kref_put(&model->refcnt, trinity_destroy_model);
+}
+
+/**
+ * trinity_deregister_model - Deregisters the model with a given id from the
+ * table
+ *
+ * @drv: An instance of the trinity driver
+ * @id: An id of the model to be deregistered
+ *
+ * Returns 0 on success. Ohterwise, returns negative error.
+ */
+int32_t trinity_deregister_model(const struct trinity_driver *drv,
+ const uint64_t id)
+{
+ int32_t dbuf_fd = trinity_model_id_to_dbuf_fd(id);
+ struct trinity_model_htable ht;
+ unsigned long key;
+ struct hlist_bl_node *hn;
+ struct trinity_model *hm = NULL;
+
+ trinity_init_model_htable(drv, &ht);
+
+ key = hash_long(dbuf_fd, ht.hash_bits);
+ hlist_bl_lock(&(ht.ht_heads[key]));
+ hlist_bl_for_each_entry (hm, hn, &(ht.ht_heads[key]), hnode) {
+ if (hm->config.id == id) {
+ hlist_bl_del_init(&hm->hnode);
+ break;
+ }
+ }
+ hlist_bl_unlock(&(ht.ht_heads[key]));
+
+ if (!hm)
+ return -ENOENT;
+
+ trinity_model_put(hm);
+
+ return 0;
+}
+
+/**
+ * trinity_deregister_models_owned - Deregisters models owned
+ *
+ * @drv: An instance of the trinity driver
+ */
+void trinity_deregister_models_owned(struct trinity_driver *drv)
+{
+ struct trinity_model_htable ht;
+ struct trinity_model *hm;
+ struct hlist_bl_node *hn;
+ int i = 0, app_id = trinity_get_app_id();
+
+ trinity_init_model_htable(drv, &ht);
+
+retry:
+ for (; i < TRINITY_MODEL_HASH_SIZE; i++) {
+ hlist_bl_lock(&(ht.ht_heads[i]));
+ hlist_bl_for_each_entry (hm, hn, &(ht.ht_heads[i]), hnode) {
+ if (hm->owner_id == app_id) {
+ hlist_bl_del_init(&hm->hnode);
+ hlist_bl_unlock(&(ht.ht_heads[i]));
+
+ trinity_model_put(hm);
+
+ goto retry;
+ }
+ }
+ hlist_bl_unlock(&(ht.ht_heads[i]));
+ }
+}
+
+struct trinity_sched_desc *get_trinity_sched(struct trinity_req *req)
+{
+ struct trinity_sched_desc *sched;
+
+ if (req->input.config.task_handle != UINT_MAX)
+ sched = trinity_sched_find(SCHED_VD);
+ else
+ sched = trinity_sched_find(SCHED_SR);
+
+ return sched;
+}
+
+static int32_t trinity_submit_req(struct trinity_driver *drv,
+ struct trinity_req *req)
+{
+ struct trinity_sched_desc *sched;
+ struct device *dev;
+ wait_queue_head_t wq;
+ unsigned long timeout, timeout_ms;
+ unsigned long retry, max_retry = 10;
+ int ret = 0;
+
+ dev = drv_to_dev_ptr(drv);
+ sched = get_trinity_sched(req);
+ if (!sched) {
+ dev_err(dev, "Unable to find the target scheduler");
+ return -EINVAL;
+ }
+
+ /* optional req setup before submission */
+ if (drv->desc->prepare_req) {
+ ret = drv->desc->prepare_req(drv, req);
+ if (ret < 0) {
+ dev_err(dev, "Unable to prepare req submission: %d",
+ ret);
+ return ret;
+ }
+ }
+
+ req->submit_retry = 0;
+ timeout_ms = req->input.config.timeout_ms;
+ /* use the default timeout if an user didn't set */
+ if (timeout_ms == 0)
+ timeout_ms = TRINITY_RUN_TIMEOUT_MSEC;
+
+ retry = 0;
+ init_waitqueue_head(&wq);
+ init_completion(&req->complete);
+
+ timeout = msecs_to_jiffies(timeout_ms);
+ while (wait_event_interruptible_timeout(wq, sched->ready(),
+ timeout / 10) == 0) {
+ if (retry == max_retry) {
+ ret = -ETIMEDOUT;
+ break;
+ }
+ retry++;
+ }
+
+ if (ret == 0) {
+ ret = trinity_stat_append_req(drv, req);
+ if (ret < 0) {
+ dev_err(dev, "Unable to append request stat: %d", ret);
+ return ret;
+ }
+
+ ret = sched->submit(req);
+ if (ret < 0)
+ trinity_stat_remove_req(drv, req, true);
+ }
+
+ if (ret < 0) {
+ dev_err(dev, "Unable to submit req to scheduler: %d", ret);
+ return ret;
+ }
+
+ if (req->input.config.output_mode != TRINITY_OUTPUT_HW) {
+ timeout = wait_for_completion_timeout(&req->complete, timeout);
+ /* Check and handle the timeout if its handler exists */
+ if (timeout == 0) {
+ bool canceled = false;
+
+ dev_warn(dev, "The request timeout reached: %lu ms",
+ timeout_ms);
+
+ if (sched->cancel) {
+ canceled = sched->cancel(req);
+ if (!canceled)
+ dev_warn(dev, "Unable to cancel req");
+ }
+
+ if (!canceled)
+ drv->desc->handle_timeout(drv, req);
+
+ req->stat->status = TRINITY_REQ_STATUS_ERROR;
+ ret = -ECANCELED;
+ } else if (req->stat->status == TRINITY_REQ_STATUS_ERROR) {
+ ret = -ECANCELED;
+ } else if (drv->verbose) {
+ dev_info(dev,
+ "Execution Cycles: %u, Elapsed Time (us): %u",
+ req->stat->prev_cycles, req->stat->prev_time);
+ }
+ trinity_finish_req(drv, req);
+ }
+
+ return ret;
+}
+
+static int32_t trinity_run_input(struct trinity_driver *drv,
+ struct trinity_input *input,
+ struct trinity_req *req)
+{
+ struct trinity_model *model;
+ int32_t err;
+
+ model = trinity_get_model_by_id(drv, input->config.model_id);
+ if (!model) {
+ dev_info(drv_to_dev_ptr(drv), "Unable to find the model");
+ return -EINVAL;
+ }
+
+ /* skip to submit this req */
+ if (model->config.program_size == 0 &&
+ input->config.output_mode != TRINITY_OUTPUT_HW)
+ return 0;
+
+ trinity_model_get(model);
+
+ err = trinity_hwmem_import_dmabuf_begin(drv_to_dev_ptr(drv),
+ input->config.dbuf_fd,
+ &input->import_info);
+ if (err < 0)
+ return err;
+
+ req->model = model;
+ err = trinity_submit_req(drv, req);
+ if (err == 0)
+ return 0;
+
+ if (err != -ECANCELED)
+ trinity_hwmem_import_dmabuf_end(&input->import_info);
+ return err;
+}
+
+static void trinity_remove_req_cb(void *data_drv, void *data_req)
+{
+ struct trinity_driver *drv = data_drv;
+ struct trinity_req *req = data_req;
+
+ if (!drv || !req)
+ return;
+
+ trinity_finish_req(drv, req);
+ trinity_stat_remove_req(drv, req, false);
+ drv->desc->dealloc_req(drv, req);
+}
+
+void trinity_sched_remove_requests(struct trinity_driver *drv)
+{
+ struct trinity_sched_desc *sched;
+
+ sched = trinity_sched_find(SCHED_VD);
+ if (!sched) {
+ dev_err(drv_to_dev_ptr(drv), "Unable to find VD scheduler");
+ return;
+ }
+ sched->remove_reqs(drv, trinity_remove_req_cb);
+}
+
+/**
+ * trinity_ioctl - A common callback for unlocked_ioctl() in file_operations for
+ * a Trinity device node.
+ *
+ * @f: A file instance of the opened device node
+ * @cmd: The target IOCTL command to be handled
+ * @arg: A user argument
+ *
+ * Returns 0 on success. Ohterwise, returns negative error.
+ */
+long trinity_ioctl(struct file *f, unsigned int cmd, unsigned long arg)
+{
+ struct trinity_driver *drv = f->private_data;
+ const struct trinity_desc *desc = drv->desc;
+ ssize_t err = 0L;
+
+ switch (cmd) {
+ case TRINITY_IOCTL_GET_VERSION: {
+ if (copy_to_user((uint32_t __user *)arg, &(desc->ver),
+ sizeof((desc->ver))))
+ return -EFAULT;
+
+#ifdef CONFIG_TRINITY_DEBUG
+ trace_trinity_ioctl_msg(drv->dev_id, trinity_get_app_id(),
+ "TRINITY_IOCTL_GET_VERSION");
+#endif
+ break;
+ }
+ case TRINITY_IOCTL_GET_API_LEVEL: {
+ uint32_t api_level = TRINITY_API_LEVEL;
+
+ if (copy_to_user((uint32_t __user *)arg, &api_level,
+ sizeof(api_level)))
+ return -EFAULT;
+
+#ifdef CONFIG_TRINITY_DEBUG
+ trace_trinity_ioctl_msg(drv->dev_id, trinity_get_app_id(),
+ "TRINITY_IOCTL_GET_API_LEVEL");
+#endif
+ break;
+ }
+ case TRINITY_IOCTL_GET_STATE: {
+ enum trinity_state ready;
+
+ ready = drv->desc->get_state(drv);
+ if (copy_to_user((enum trinity_state __user *)arg, &ready,
+ sizeof(ready)))
+ return -EFAULT;
+
+#ifdef CONFIG_TRINITY_DEBUG
+ trace_trinity_ioctl_msg(drv->dev_id, trinity_get_app_id(),
+ "TRINITY_IOCTL_GET_STATE");
+#endif
+ break;
+ }
+ case TRINITY_IOCTL_GET_TOPS: {
+ if (copy_to_user((uint32_t __user *)arg, &(drv->tops),
+ sizeof((drv->tops))))
+ return -EFAULT;
+
+#ifdef CONFIG_TRINITY_DEBUG
+ trace_trinity_ioctl_msg(drv->dev_id, trinity_get_app_id(),
+ "TRINITY_IOCTL_GET_TOPS");
+#endif
+ break;
+ }
+ case TRINITY_IOCTL_GET_DSPM: {
+ if (copy_to_user((uint32_t __user *)arg, &(drv->dspm),
+ sizeof((drv->dspm))))
+ return -EFAULT;
+
+#ifdef CONFIG_TRINITY_DEBUG
+ trace_trinity_ioctl_msg(drv->dev_id, trinity_get_app_id(),
+ "TRINITY_IOCTL_GET_DSPM");
+#endif
+ break;
+ }
+ case TRINITY_IOCTL_GET_NEXT_REQUEST: {
+ int32_t req_id = atomic_inc_return(&drv->global_req_id);
+
+ if (copy_to_user((int32_t __user *)arg, &req_id,
+ sizeof(req_id)))
+ return -EFAULT;
+
+#ifdef CONFIG_TRINITY_DEBUG
+ trace_trinity_ioctl_next_req(drv->dev_id, trinity_get_app_id(),
+ req_id);
+#endif
+ break;
+ }
+ case TRINITY_IOCTL_HWMEM_ALLOC: {
+ struct trinity_ioctl_hwmem hwmem;
+
+ if (copy_from_user(&hwmem, (size_t __user *)arg, sizeof(hwmem)))
+ return -EFAULT;
+
+ err = trinity_hwmem_alloc(drv_to_dev_ptr(drv), hwmem.size,
+ hwmem.type);
+ if (err >= 0)
+ trinity_stat_app_total_alloc(drv, hwmem.size);
+
+#ifdef CONFIG_TRINITY_DEBUG
+ trace_trinity_ioctl_hwmem_alloc(
+ drv->dev_id, trinity_get_app_id(), hwmem.size, err);
+#endif
+ break;
+ }
+ case TRINITY_IOCTL_HWMEM_DEALLOC: {
+ struct trinity_ioctl_hwmem hwmem;
+ struct dma_buf *dbuf;
+
+ if (copy_from_user(&hwmem, (size_t __user *)arg, sizeof(hwmem)))
+ return -EFAULT;
+
+ dbuf = dma_buf_get(hwmem.dbuf_fd);
+ if (IS_ERR(dbuf))
+ return PTR_ERR(dbuf);
+
+ err = trinity_hwmem_free(drv_to_dev_ptr(drv), hwmem.dbuf_fd);
+ if (err == 0)
+ trinity_stat_app_total_freed(drv, dbuf->size);
+
+#ifdef CONFIG_TRINITY_DEBUG
+ trace_trinity_ioctl_hwmem_dealloc(
+ drv->dev_id, trinity_get_app_id(), hwmem.dbuf_fd);
+#endif
+ break;
+ }
+ case TRINITY_IOCTL_REGISTER_MODEL: {
+ struct trinity_model *model =
+ kzalloc(sizeof(struct trinity_model), GFP_KERNEL);
+
+ if (IS_ERR_OR_NULL(model))
+ return -ENOMEM;
+
+ if (copy_from_user(&model->config,
+ (struct trinity_model __user *)arg,
+ sizeof(model->config))) {
+ kfree(model);
+ return -EFAULT;
+ }
+
+ err = trinity_register_model(drv, model);
+ if (err < 0)
+ break;
+
+ if (copy_to_user((struct trinity_model __user *)arg,
+ &model->config, sizeof(model->config)))
+ return -EFAULT;
+
+#ifdef CONFIG_TRINITY_DEBUG
+ trace_trinity_ioctl_register_model(
+ drv->dev_id, trinity_get_app_id(), model->config.id,
+ model->config.dbuf_fd,
+ model->config.program_offset_addr,
+ model->config.program_size);
+
+ if (TRINITY_DEVVER(drv) == 1)
+ trace_trinity_ioctl_register_model_drv_ver1(
+ model->config.weight_offset_addr);
+ else if (TRINITY_DEVVER(drv) == 2)
+ trace_trinity_ioctl_register_model_drv_ver2(
+ model->config.metadata_dbuf_fd,
+ model->config.metadata_ext_dbuf_fd,
+ model->config.metadata_ext_size);
+#endif
+ break;
+ }
+ case TRINITY_IOCTL_DEREGISTER_MODEL: {
+ uint64_t id;
+
+ if (copy_from_user(&id, (uint64_t __user *)arg, sizeof(id)))
+ return -EFAULT;
+
+ err = trinity_deregister_model(drv, id);
+
+#ifdef CONFIG_TRINITY_DEBUG
+ trace_trinity_ioctl_msg(drv->dev_id, trinity_get_app_id(),
+ "TRINITY_IOCTL_DEREGISTER_MODEL");
+#endif
+ break;
+ }
+ case TRINITY_IOCTL_RUN_INPUT: {
+ struct trinity_req *req;
+ struct trinity_input *input;
+
+ req = drv->desc->alloc_req(drv);
+ if (!req)
+ return -ENOMEM;
+ req->drv = drv;
+ req->time_started = ktime_get();
+
+ input = &(req->input);
+ /** run input based on config received from the user */
+ if (copy_from_user(&input->config,
+ (struct trinity_input __user *)arg,
+ sizeof(input->config))) {
+ drv->desc->dealloc_req(drv, req);
+ return -EACCES;
+ }
+
+ err = trinity_run_input(drv, input, req);
+ if (err < 0) {
+ drv->desc->dealloc_req(drv, req);
+ return err;
+ }
+
+#ifdef CONFIG_TRINITY_DEBUG
+ trace_trinity_ioctl_run_input(drv->dev_id, trinity_get_app_id(),
+ input->config.dbuf_fd,
+ input->config.model_id);
+
+ if (TRINITY_DEVVER(drv) == 1)
+ trace_trinity_ioctl_run_input_drv_ver1(
+ input->config.activation_offset_addr0,
+ input->config.activation_offset_addr1);
+ else if (TRINITY_DEVVER(drv) == 2)
+ trace_trinity_ioctl_run_input_drv_ver2(
+ input->config.timeout_ms,
+ input->config.priority,
+ input->config.num_segments,
+ input->config.input_mode,
+ input->config.output_mode);
+#endif
+
+ if (copy_to_user((struct trinity_input __user *)arg,
+ &input->config, sizeof(input->config))) {
+ drv->desc->dealloc_req(drv, req);
+ return -EACCES;
+ }
+
+ /* this will be freed when stop request is called */
+ if (!req->is_kernel)
+ drv->desc->dealloc_req(drv, req);
+
+ break;
+ }
+ case TRINITY_IOCTL_STOP_REQUESTS: {
+ if (drv->desc->stop_reqs) {
+ schedule_work(&drv->work_stop);
+#ifdef CONFIG_TRINITY_DEBUG
+ trace_trinity_ioctl_msg(drv->dev_id,
+ trinity_get_app_id(),
+ "TRINITY_IOCTL_STOP_REQUESTS");
+#endif
+ } else {
+#ifdef CONFIG_TRINITY_DEBUG
+ trace_trinity_ioctl_msg(
+ drv->dev_id, trinity_get_app_id(),
+ "TRINITY_IOCTL_STOP_REQUESTS: not supported");
+#endif
+ }
+ break;
+ }
+ case TRINITY_IOCTL_STOP_REQUEST: {
+ struct trinity_sched_desc *sched;
+ struct trinity_req *req;
+ int32_t req_id;
+
+ if (copy_from_user(&req_id, (uint32_t __user *)arg,
+ sizeof(req_id)))
+ return -EFAULT;
+
+ sched = trinity_sched_find(SCHED_VD);
+ if (!sched) {
+ dev_err(drv_to_dev_ptr(drv),
+ "Unable to find VD scheduler");
+ return -EINVAL;
+ }
+
+ req = sched->find_req(drv->dev_id, req_id);
+ if (!req || !req->is_kernel) {
+ dev_err(drv_to_dev_ptr(drv),
+ "Unable to find the kernel request with ID %d",
+ req_id);
+ return -ENOENT;
+ }
+
+ sched->cancel(req);
+
+ trinity_remove_req_cb(drv, req);
+
+#ifdef CONFIG_TRINITY_DEBUG
+ trace_trinity_ioctl_stop_req(drv->dev_id, trinity_get_app_id(),
+ req_id);
+#endif
+ break;
+ }
+ case TRINITY_IOCTL_STAT_CURRENT_APP: {
+ struct trinity_ioctl_stat_app ioctl_stat_app;
+
+ if (copy_from_user(&ioctl_stat_app,
+ (struct trinity_ioctl_stat_app __user *)arg,
+ sizeof(ioctl_stat_app)))
+ return -EACCES;
+
+ trinity_stat_app_copy_ioctl(drv, &ioctl_stat_app);
+
+ if (copy_to_user((struct trinity_ioctl_stat_app __user *)arg,
+ &ioctl_stat_app, sizeof(ioctl_stat_app)))
+ return -EACCES;
+
+#ifdef CONFIG_TRINITY_DEBUG
+ trace_trinity_ioctl_msg(drv->dev_id, trinity_get_app_id(),
+ "TRINITY_IOCTL_STAT_CURRENT_APP");
+#endif
+ break;
+ }
+ case TRINITY_IOCTL_STAT_APPS: {
+ struct trinity_ioctl_stat_apps ioctl_stat_apps;
+
+ if (copy_from_user(&ioctl_stat_apps,
+ (struct trinity_ioctl_stat_apps __user *)arg,
+ sizeof(ioctl_stat_apps)))
+ return -EACCES;
+
+ trinity_stat_apps_copy_ioctl(drv, &ioctl_stat_apps);
+
+ if (copy_to_user((struct trinity_ioctl_stat_apps __user *)arg,
+ &ioctl_stat_apps, sizeof(ioctl_stat_apps)))
+ return -EACCES;
+
+#ifdef CONFIG_TRINITY_DEBUG
+ trace_trinity_ioctl_msg(drv->dev_id, trinity_get_app_id(),
+ "TRINITY_IOCTL_STAT_APPS");
+#endif
+ break;
+ }
+ case TRINITY_IOCTL_STAT_REQS: {
+ struct trinity_ioctl_stat_reqs ioctl_stat_reqs;
+
+ if (copy_from_user(&ioctl_stat_reqs,
+ (struct trinity_ioctl_stat_reqs __user *)arg,
+ sizeof(ioctl_stat_reqs)))
+ return -EACCES;
+
+ if (ioctl_stat_reqs.app_id == 0)
+ ioctl_stat_reqs.app_id = trinity_get_app_id();
+
+ trinity_stat_reqs_copy_ioctl(drv, &ioctl_stat_reqs);
+
+ if (copy_to_user((struct trinity_ioctl_stat_reqs __user *)arg,
+ &ioctl_stat_reqs, sizeof(ioctl_stat_reqs)))
+ return -EACCES;
+
+#ifdef CONFIG_TRINITY_DEBUG
+ trace_trinity_ioctl_msg(drv->dev_id, trinity_get_app_id(),
+ "TRINITY_IOCTL_STAT_REQS");
+#endif
+ break;
+ }
+ case TRINITY_IOCTL_GET_PROFILE_META: {
+ struct trinity_ioctl_profile_meta profile;
+
+ if (copy_from_user(
+ &profile,
+ (struct trinity_ioctl_profile_meta __user *)arg,
+ sizeof(profile)))
+ return -EACCES;
+
+ if (drv->desc->get_profile_meta) {
+ err = drv->desc->get_profile_meta(drv, &profile);
+ } else {
+ profile.total_cycles = -1;
+ profile.total_ops = 0;
+ profile.profile_size = 0;
+ profile.input_footprint = -1;
+ profile.output_footprint = -1;
+ }
+
+ if (copy_to_user((struct trinity_ioctl_profile_meta __user *)arg,
+ &profile, sizeof(profile)))
+ return -EACCES;
+
+#ifdef CONFIG_TRINITY_DEBUG
+ trace_trinity_ioctl_get_profile_meta(drv->dev_id,
+ trinity_get_app_id(),
+ profile.req_id,
+ profile.profile_size);
+#endif
+ break;
+ }
+ case TRINITY_IOCTL_GET_PROFILE_BUFF: {
+ struct trinity_ioctl_profile_buff profile;
+
+ if (copy_from_user(
+ &profile,
+ (struct trinity_ioctl_profile_buff __user *)arg,
+ sizeof(profile)))
+ return -EACCES;
+
+ if (drv->desc->get_profile_buff)
+ err = drv->desc->get_profile_buff(drv, &profile);
+
+ if (copy_to_user((struct trinity_ioctl_profile_buff __user *)arg,
+ &profile, sizeof(profile)))
+ return -EACCES;
+
+#ifdef CONFIG_TRINITY_DEBUG
+ trace_trinity_ioctl_get_profile_buff(
+ drv->dev_id, trinity_get_app_id(), profile.req_id,
+ profile.profile_pos, profile.profile_size);
+#endif
+ break;
+ }
+ case TRINITY_IOCTL_FPGA_MEMCPY: {
+ struct trinity_ioctl_fpga_memcpy fpga;
+ struct trinity_hwmem_import import_info;
+ struct iommu_domain *domain;
+ phys_addr_t paddr;
+ void __iomem *vaddr;
+ uint32_t val;
+ uint64_t i;
+
+ if (copy_from_user(
+ &fpga,
+ (struct trinity_ioctl_fpga_memcpy __user *)arg,
+ sizeof(fpga)))
+ return -EFAULT;
+
+ /* make sure that dbuf_off is PAGE_SIZE aligned */
+ if (!IS_ALIGNED(fpga.dbuf_off, PAGE_SIZE)) {
+ dev_err(drv->dev, "Unaligned dmabuf offset: 0x%x\n",
+ fpga.dbuf_off);
+ return -EINVAL;
+ }
+
+ err = trinity_hwmem_import_dmabuf_begin(
+ drv_to_dev_ptr(drv), fpga.dbuf_fd, &import_info);
+ if (err)
+ return err;
+
+ domain = iommu_get_domain_for_dev(drv->dev);
+ paddr = trinity_get_paddr(domain, import_info.dma_addr);
+
+ trinity_hwmem_import_dmabuf_end(&import_info);
+
+ vaddr = ioremap(paddr + fpga.dbuf_off,
+ PAGE_ALIGN(fpga.user_size));
+ if (vaddr == NULL) {
+ dev_err(drv->dev, "Unable to ioremap %lx",
+ (unsigned long)paddr);
+ return -EINVAL;
+ }
+
+ for (i = 0; i < fpga.user_size; i += sizeof(uint32_t)) {
+ val = ioread32((char *)vaddr + i);
+ if (copy_to_user(((char __user *)fpga.user_addr) + i,
+ &val, sizeof(uint32_t))) {
+ err = -EFAULT;
+ break;
+ }
+ }
+
+ iounmap(vaddr);
+
+ break;
+ }
+ default:
+ return -ENOTTY;
+ }
+
+ return err;
+}
+
+/**
+ * trinity_release - A common callback for close() in file_operations for a
+ * Trinity device node. If there are device-specific data to be
+ * cleaned-up, it is required to clean them up before invoke this
+ * callback.
+ *
+ * @inode: Inode to be closed
+ * @file: File to be closed
+ *
+ * Returns 0 on success. Ohterwise, returns negative error.
+ */
+int trinity_release(struct inode *inode, struct file *file)
+{
+ struct trinity_driver *drv;
+
+ drv = file->private_data;
+
+ if (drv->verbose)
+ dev_info(drv_to_dev_ptr(drv), "%s\n", "Device closed");
+
+ trinity_stat_app_set_status(drv, TRINITY_APP_STATUS_TERMINATED);
+
+ mutex_lock(&drv->lock);
+ drv->opened = drv->opened - 1;
+ if (drv->opened == 0) {
+ /* block newly incoming requests */
+ trinity_sched_suspend();
+
+ /* wait already submitted requests */
+ if (drv->desc->drain_reqs)
+ drv->desc->drain_reqs(drv);
+
+ /* deregister models owned by this device handle */
+ trinity_deregister_models_owned(drv);
+ /* remove all kernel requests submitted before */
+ trinity_sched_remove_requests(drv);
+
+ drv->desc->set_state(drv, TRINITY_STATE_PAUSE);
+
+ trinity_sched_resume();
+ }
+ mutex_unlock(&drv->lock);
+
+ return 0;
+}
+
+static bool trinity_is_empty(void)
+{
+ enum trinity_dev_type type;
+ bool empty = true;
+
+ spin_lock(&trinity_lock);
+ for (type = TRINITY_DEV_UNKNOWN, type++; type < TRINITY_DEV_END;
+ type++) {
+ if (find_first_bit(&dev_bitmap[type], TRINITY_DEV_EACH_MAX) !=
+ TRINITY_DEV_EACH_MAX) {
+ empty = false;
+ break;
+ }
+ }
+ spin_unlock(&trinity_lock);
+
+ return empty;
+}
+
+int trinity_wait_ready(struct trinity_driver *drv)
+{
+ const unsigned long time_out = HZ / 100UL; /* 1/100 seconds*/
+ const unsigned int max_retry = 10;
+ unsigned int retry = 0;
+ wait_queue_head_t wq;
+
+ drv->desc->set_state(drv, TRINITY_STATE_READY);
+
+ init_waitqueue_head(&wq);
+ /* try to ensure that NPU is in the ready state */
+ while (wait_event_timeout(
+ wq, drv->desc->get_state(drv) == TRINITY_STATE_READY,
+ time_out) == 0) {
+ /* regarded as failure */
+ if (retry == max_retry)
+ return -ETIMEDOUT;
+ retry++;
+ }
+
+ return 0;
+}
+
+/**
+ * trinity_open - A common callback for open() in file_operations for a Trinity
+ * device node. If device-specific open() is requried, this
+ * callback should be invoked by that open().
+ *
+ * @inode: Inode to be opened
+ * @f: File to be opened
+ *
+ * Returns 0 on success. Ohterwise, returns negative error.
+ */
+int trinity_open(struct inode *inode, struct file *f)
+{
+ struct miscdevice *miscdev;
+ struct trinity_driver *drv;
+ int ret = 0;
+
+ miscdev = (struct miscdevice *)f->private_data;
+ drv = container_of(miscdev, struct trinity_driver, mdev);
+ f->private_data = drv;
+
+ mutex_lock(&drv->lock);
+ /** remove PAUSE set on the CP of the NPU */
+ if (drv->opened == 0) {
+ ret = trinity_wait_ready(drv);
+ if (ret != 0)
+ goto out;
+ }
+ drv->opened = drv->opened + 1;
+
+ if (drv->verbose)
+ dev_info(drv_to_dev_ptr(drv), "%s\n", "Device opened");
+
+ trinity_stat_app_set_status(drv, TRINITY_APP_STATUS_STARTED);
+
+out:
+ mutex_unlock(&drv->lock);
+
+ return 0;
+}
+
+static int trinity_declare_dma_memory(struct device *dev)
+{
+ phys_addr_t paddr;
+ dma_addr_t daddr;
+ size_t size;
+ int err;
+
+ err = trinity_get_dma_memory(dev, &paddr, &daddr, &size);
+ if (err < 0) {
+ dev_info(dev, "No available dma memory, skipping");
+ return 0;
+ }
+
+ err = trinity_declare_resv_mem(paddr, daddr, size);
+ if (err < 0) {
+ dev_err(dev, "Failed to declare reserved memory: %d\n", err);
+ return err;
+ }
+
+ return 0;
+}
+
+static void trinity_release_dma_memory(void)
+{
+ return trinity_release_resv_mem();
+}
+
+static void trinity_common_init(struct device *dev)
+{
+ if (!trinity_is_empty())
+ return;
+
+ trinity_reset_device(dev, true);
+ trinity_model_htable_init();
+
+ if (trinity_monitor_init(dev) < 0)
+ dev_warn(dev, "Failed to initialize monitor\n");
+
+ if (trinity_pm_runtime_init(dev) < 0)
+ dev_warn(dev, "Unable to initialize runtime pm\n");
+
+ if (trinity_debug_init() < 0)
+ dev_warn(dev, "Unable to initialize debugfs\n");
+
+ if (trinity_sched_init(dev) < 0)
+ dev_warn(dev, "Unable to initialize scheduler\n");
+
+ if (trinity_declare_dma_memory(dev) < 0)
+ dev_warn(dev, "Failed to declare DMA memory\n");
+}
+
+static void trinity_common_exit(void)
+{
+ if (!trinity_is_empty())
+ return;
+
+ trinity_release_dma_memory();
+ trinity_debug_exit();
+ trinity_sched_exit();
+}
+
+static int trinity_set_device_id(struct trinity_driver *drv)
+{
+ const struct trinity_desc *desc = drv->desc;
+ struct device *dev = drv_to_dev_ptr(drv);
+ int err = -EEXIST;
+
+ spin_lock(&trinity_lock);
+ drv->dev_id =
+ find_first_zero_bit(&dev_bitmap[dev->id], TRINITY_DEV_EACH_MAX);
+ if (drv->dev_id < TRINITY_DEV_EACH_MAX) {
+ set_bit(drv->dev_id, &dev_bitmap[dev->id]);
+ err = 0;
+ }
+ spin_unlock(&trinity_lock);
+
+ if (err == 0) {
+ drv->name = devm_kasprintf(dev, GFP_KERNEL, "%s-%u", desc->type,
+ drv->dev_id);
+ err = IS_ERR_OR_NULL(drv->name) ? -ENOMEM : 0;
+ }
+
+ return err;
+}
+
+int trinity_create_node(struct trinity_driver *drv)
+{
+ struct device *dev = drv_to_dev_ptr(drv);
+ int err;
+
+ /** register as a misc device */
+ drv->mdev.minor = MISC_DYNAMIC_MINOR;
+ drv->mdev.parent = NULL;
+ drv->mdev.name = drv->name;
+
+ err = misc_register(&drv->mdev);
+ if (err < 0)
+ dev_err(dev, "failed to register as a misc device");
+ else
+ dev_info(dev, "misc device created!");
+
+ return err;
+}
+
+void trinity_destroy_node(struct trinity_driver *drv)
+{
+ misc_deregister(&drv->mdev);
+}
+
+/**
+ * trinity_probe - Probes a new Trinity device. This is a standard interface to
+ * probe a Trinity family device.
+ *
+ * @pdev: Platform device structure to probe
+ * @desc: Device description to probe
+ *
+ * Returns 0 on success. Ohterwise, returns negative error.
+ */
+int trinity_probe(struct platform_device *pdev, const struct trinity_desc *desc)
+{
+ struct device_node *np;
+ struct device *dev;
+ struct trinity_driver *drv;
+ int irq_out;
+ int i, err;
+
+ dev = &pdev->dev;
+ dev->id = ((desc->ver & TRINITY_MASK_DEV) >> TRINITY_SHIFT_DEV);
+
+ /* set private data */
+ drv = devm_kzalloc(dev, sizeof(*drv), GFP_KERNEL);
+ if (drv == NULL)
+ return -ENOMEM;
+
+ platform_set_drvdata(pdev, drv);
+ dev_set_drvdata(dev, drv);
+
+ drv->dev = dev;
+ drv->desc = desc;
+
+ np = dev->of_node;
+ if (of_property_match_string(np, "samsung,trinity-type", desc->type))
+ return -EPROBE_DEFER;
+
+ /* get reg info for MMREG_BASE */
+ for (i = 0; i < TRINITY_MAX_MMREGS; i++) {
+ struct resource mmreg;
+
+ err = of_address_to_resource(np, i, &mmreg);
+ if (err < 0) {
+ if (i == 0) {
+ dev_err(dev, "failed to get %d-th mmreg info",
+ i);
+ return -EINVAL;
+ }
+ break;
+ }
+
+ drv->mmreg_vaddr[i] = devm_ioremap_resource(dev, &mmreg);
+ if (IS_ERR(drv->mmreg_vaddr[i])) {
+ dev_err(dev,
+ "failed to remap %d-th mmreg resource info", i);
+ return PTR_ERR(drv->mmreg_vaddr[i]);
+ }
+ drv->mmreg_paddr[i] = mmreg.start;
+ }
+
+ /** get a TOPS property */
+ err = of_property_read_u32(np, "samsung,tops", &drv->tops);
+ if (err < 0) {
+ dev_err(dev, "failed to read 'tops' property: %d\n", err);
+ return err;
+ }
+
+ /** get a DSPM property */
+ err = of_property_read_u32(np, "samsung,dspm", &drv->dspm);
+ if (err < 0) {
+ dev_info(dev, "Setting the size of DPSM to 0\n");
+ drv->dspm = 0;
+ }
+
+ /* Set IRQ handlers */
+ irq_out = platform_get_irq(pdev, 0);
+ if (irq_out < 0) {
+ dev_err(dev, "IRQ is not supported");
+ return irq_out;
+ }
+ trinity_set_irq_affinity(irq_out);
+
+ /* get the IRQ number from DT and set handlers for it */
+ err = devm_request_irq(dev, irq_out, desc->handle_irq,
+ IRQF_TRIGGER_HIGH, desc->type, &drv->mdev);
+ if (err < 0) {
+ dev_err(dev, "failed to register handlers for IRQ %d", irq_out);
+ return err;
+ }
+
+ /** Initialize device-specific variables */
+ init_completion(&drv->complete);
+ mutex_init(&drv->lock);
+ INIT_WORK(&drv->work_stop, desc->stop_reqs);
+ drv->mdev.fops = desc->fops;
+
+ trinity_common_init(dev);
+
+ err = trinity_set_device_id(drv);
+ if (err < 0) {
+ dev_err(dev, "Please unload old devices first (max: %d)\n",
+ TRINITY_DEV_EACH_MAX);
+ goto err_cleanup;
+ }
+
+ err = trinity_sysfs_init(drv);
+ if (err < 0) {
+ dev_err(dev, "failed to initialize sysfs for a trinity device");
+ goto err_cleanup;
+ }
+
+ err = trinity_debug_add(drv);
+ if (err < 0) {
+ dev_err(dev,
+ "failed to add a debugging feature to the trinity device");
+ goto err_cleanup_sysfs;
+ }
+
+ trinity_stat_init(drv);
+
+ return 0;
+
+err_cleanup_sysfs:
+ trinity_sysfs_cleanup(drv);
+
+err_cleanup:
+ spin_lock(&trinity_lock);
+ clear_bit(drv->dev_id, &dev_bitmap[dev->id]);
+ spin_unlock(&trinity_lock);
+
+ trinity_common_exit();
+
+ return err;
+}
+
+/**
+ * trinity_remove - Cleans up the device driver. This is a standard interface to
+ * remove a Trinity family device.
+ *
+ * @pdev: Platform device structure to probe
+ * @desc: Device description to probe
+ *
+ * Always returns 0.
+ */
+int trinity_remove(struct platform_device *pdev,
+ const struct trinity_desc *desc)
+{
+ struct trinity_driver *drv;
+ struct device *dev;
+
+ drv = (struct trinity_driver *)platform_get_drvdata(pdev);
+ dev = drv_to_dev_ptr(drv);
+
+ trinity_stat_fini(drv);
+ trinity_debug_remove(drv);
+ trinity_sysfs_cleanup(drv);
+
+ spin_lock(&trinity_lock);
+ clear_bit(drv->dev_id, &dev_bitmap[dev->id]);
+ spin_unlock(&trinity_lock);
+
+ trinity_common_exit();
+
+ return 0;
+}
--- /dev/null
+../../../include/uapi/misc/trinity.h
\ No newline at end of file
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0-only
+/**
+ * trinity/trinity_common.h: Common header for trinity devices
+ *
+ * Copyright (C) 2020 Samsung Electronics
+ * Copyright (C) 2020 Dongju Chae <dongju.chae@samsung.com>
+ * Copyright (C) 2020 Parichay Kapoor <pk.kapoor@samsung.com>
+ * Copyright (C) 2020 Wook Song <wook16.song@samsung.com>
+ */
+
+#ifndef __TRINITY_COMMON_H__
+#define __TRINITY_COMMON_H__
+
+#include <linux/idr.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/iommu.h>
+#include <linux/irqreturn.h>
+#include <linux/kernel.h>
+#include <linux/list_bl.h>
+#include <linux/miscdevice.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+#include <linux/types.h>
+
+#include "trinity.h"
+
+#include "trinity_hwmem.h"
+#include "trinity_sched.h"
+
+/** Default timeout to wait for opening device in jiffies */
+#define TRINITY_DEV_TIMEOUT_MSEC (3000)
+#define TRINITY_DEV_TIMEOUT (msecs_to_jiffies(TRINITY_DEV_TIMEOUT_MSEC))
+
+/** Default timeout to wait for running input in jiffies */
+#define TRINITY_RUN_TIMEOUT_MSEC (4000)
+#define TRINITY_RUN_TIMEOUT (msecs_to_jiffies(TRINITY_RUN_TIMEOUT_MSEC))
+
+#define TRINITY_DEV_TYPE_LEN (16)
+#define TRINITY_DEV_EACH_MAX (2)
+#define TRINITY_MAX_MMREGS (3)
+
+/** A helper function to generate the version code of the device driver */
+#define GENVER(dev, mj, mn, ex) \
+ ((dev << TRINITY_SHIFT_DEV) | (mj << TRINITY_SHIFT_MAJOR_VER) | \
+ (mn << TRINITY_SHIFT_MINOR_VER) | (ex << TRINITY_SHIFT_EXTRA_VER))
+
+#define trinity_get_iomem_addr(base, offset) (base + offset)
+#define drv_to_dev_ptr(d) (d->dev)
+#define drv_to_priv(drv) (drv->desc->pdata)
+
+#define TRINITY_STAT_HASH_BITS (10)
+#define TRINITY_STAT_HASH_SIZE (1 << TRINITY_STAT_HASH_BITS)
+
+#define TIME_DIFF(t1, t2) ktime_to_ms(ktime_sub(t1, t2))
+#define TIME_DIFF_US(t1, t2) ktime_to_us(ktime_sub(t1, t2))
+
+enum cpu_acc_control {
+ BEGIN,
+ END,
+};
+
+struct trinity_desc;
+struct trinity_driver;
+struct trinity_req;
+struct trinity_stat;
+struct trinity_stat_app;
+struct trinity_stat_req;
+struct trinity_model_htable;
+
+/**
+ * struct trinity_desc - a structure for device description
+ * @type: A string that indicates the type of this device.
+ * @ver: Coded version information generated via GENVER().
+ * @fops: Device-specific file_operations.
+ * @prepare_model: Device-specific model configuration function before invoking
+ * trinity_submit_req() (if any). This requires a registered model
+ * to the driver via &trinity_desc->register_model before.
+ * @prepare_input: Device-specific function to configure input information.
+ * This information is passed to the device by setting the relevant
+ * registers to use run the device.
+ * @get_state: Device-specific helper function to get the state of the device.
+ * @set_state: Device-specific helper function to set the state of the device.
+ * @run: Device-specific function to run the input with the given model.
+ * Configuration required to run the device should be done before
+ * invoking run()
+ * @submit_req: Device-specific function to submit a req for running the
+ * device. Note that if there are req scheduling policies, instead
+ * of running the req immediately, the req would be placed in the
+ * req queue until the policy decides to run this req.
+ * @handle_irq: Device-specifix IRQ handler.
+ */
+struct trinity_desc {
+ char *type;
+ uint32_t ver;
+
+ const struct file_operations *fops;
+
+ /* Optional */
+ void (*reset)(struct trinity_driver *);
+ int32_t (*prepare_req)(struct trinity_driver *, struct trinity_req *);
+ void (*handle_timeout)(struct trinity_driver *,
+ struct trinity_req *req);
+ void (*stop_reqs)(struct work_struct *);
+ void (*drain_reqs)(struct trinity_driver *);
+ void (*init_profile)(struct trinity_driver *, unsigned long);
+ int32_t (*check_profile)(struct trinity_driver *, struct trinity_req *);
+ int32_t (*get_profile_meta)(const struct trinity_driver *,
+ struct trinity_ioctl_profile_meta *);
+ int32_t (*get_profile_buff)(const struct trinity_driver *,
+ struct trinity_ioctl_profile_buff *);
+ void (*show_profile)(const struct trinity_driver *, int);
+ void (*destroy_profile)(const struct trinity_driver *, void *);
+
+ /* Mandatory */
+ int32_t (*idu_load)(struct trinity_driver *, const char *, bool);
+ int32_t (*idu_version)(struct trinity_driver *, uint32_t *, uint32_t *,
+ uint32_t *);
+ int32_t (*get_state)(const struct trinity_driver *);
+ void (*set_state)(const struct trinity_driver *, enum trinity_state);
+ struct trinity_req *(*alloc_req)(struct trinity_driver *drv);
+ void (*dealloc_req)(struct trinity_driver *drv,
+ struct trinity_req *req);
+ int32_t (*invoke_req)(struct trinity_driver *, struct trinity_req *,
+ void *);
+ irq_handler_t handle_irq;
+};
+
+/**
+ * struct trinity_stat - A structure for representing a device's statistics.
+ */
+struct trinity_stat {
+ spinlock_t lock;
+
+ struct hlist_bl_head hlist[TRINITY_STAT_HASH_SIZE];
+ struct list_head list;
+
+ void *pdata;
+};
+
+/**
+ * struct trinity_stat_app - A structure for representing statistics for each app
+ * @app_id: Identifier for each app
+ * @hnode: Hash node
+ * @total_alloc_mem: Total allocated memory size
+ * @total_free_mem: Total freed memory size
+ */
+struct trinity_stat_app {
+ int32_t app_id; /* app identifier */
+ char name[TASK_COMM_LEN];
+ enum trinity_app_status status;
+
+ struct trinity_stat *parent;
+
+ uint64_t total_alloc_mem; /* total allocated memory */
+ uint64_t total_freed_mem; /* total freed memory */
+
+ struct list_head reqs;
+ uint32_t num_total_reqs;
+ uint32_t num_kept_reqs;
+ uint32_t num_active_reqs;
+
+ struct hlist_bl_node hnode; /* hash node */
+ struct list_head lnode; /* list node */
+
+ unsigned long slot;
+};
+
+/**
+ * struct trinity_stat_req - A structure for representing statistics for each req
+ * @status: req status
+ * @req_id: req identifier
+ * @model_id: model identifier
+ * @submitted: submitted time (i.e., when req is submitted to global queue)
+ * @scheduled: scheduled time (i.e., when req is scheduled to device)
+ * @completed: completed time (i.e., when output notification arrives)
+ * @list: list node mananged by trinity_stat_app
+ * @profile: profile data
+ *
+ * Even if a req is freed, its stat could be kept for a while.
+ */
+struct trinity_stat_req {
+ enum trinity_req_status status; /* status of submit result */
+ enum trinity_req_priority priority;
+
+ struct trinity_stat_app *parent;
+
+ int32_t app_id;
+ int32_t req_id;
+ uint64_t model_id;
+
+ bool is_kernel;
+
+ ktime_t submitted;
+ ktime_t scheduled;
+ ktime_t completed;
+
+ uint32_t num_runs;
+ uint32_t total_time;
+
+ uint32_t prev_time;
+ uint32_t prev_cycles;
+
+ struct list_head list;
+ void *profile;
+
+ unsigned long slot;
+};
+
+/**
+ * struct trinity_driver - A private data structure for Trinity device driver
+ * @desc: A pointer to the device description.
+ * @name: The id-annotated name of the device.
+ * @mdev: A copy of &struct miscdevice to which the device is registered.
+ * @dev: A pointer to &struct device of the device.
+ * @complete: A &struct completion variable to maintain events from the device.
+ * @lock: A lock for access control to driver-level static variables
+ * @mmreg_vaddr: The iomapped base address of memory-mapped registers.
+ * @mmreg_paddr: The physical base address of memory-mapped registers.
+ * @opened: The number of clients which open the device.
+ * @tops: Tera Operations Per Second (TOPS) of this device.
+ * @dspm: The size of Data Scratch-Pad Memory (DSPM) in the DSP.
+ *
+ * Description of the structure.
+ */
+struct trinity_driver {
+ const struct trinity_desc *desc;
+ const char *name;
+ void *pdata;
+
+ uint32_t dev_id;
+ struct miscdevice mdev;
+ struct device *dev;
+ struct completion complete;
+ struct mutex lock;
+
+ atomic_t global_req_id;
+
+ void __iomem *mmreg_vaddr[TRINITY_MAX_MMREGS];
+ phys_addr_t mmreg_paddr[TRINITY_MAX_MMREGS];
+
+ int32_t opened;
+ unsigned long verbose;
+
+ struct work_struct work_stop;
+
+ uint32_t tops;
+ uint32_t dspm;
+
+ /* statistics */
+ struct trinity_stat stat;
+
+ /* debugfs */
+ void *debugfs_pdata;
+
+ struct list_head rpm_list;
+ void *resv_pool;
+};
+
+/**
+ * struct trinity_model - A structure for representing model data
+ * @config: model configuration
+ * @hnode: hash node for indexing
+ * @import_info: Cached hwmem import info.
+ * @owner_id: Identifier for owner app
+ */
+struct trinity_model {
+ struct trinity_ioctl_model config;
+ struct trinity_hwmem_import import_info;
+ struct hlist_bl_node hnode;
+ int32_t owner_id;
+ struct kref refcnt;
+} __attribute__((packed));
+
+/**
+ * struct trinity_input - A structure for representing input data
+ * @config: input configuration
+ * @import_info: Cached hwmem import info.
+ */
+struct trinity_input {
+ struct trinity_ioctl_input config;
+ struct trinity_hwmem_import import_info;
+} __attribute__((packed));
+
+/**
+ * struct trinity_req - A structure for representing a req
+ * @drv: An instance of the driver.
+ * @input: Information of the input configuration to be run by this req.
+ * @status: Status of the submitted req.
+ * @priv: A handle of private data
+ */
+struct trinity_req {
+ /** context where the req belongs */
+ struct trinity_driver *drv;
+
+ struct trinity_input input; /* the req's input argument */
+ struct trinity_model *model;
+
+ struct trinity_stat_req *stat;
+
+ uint64_t submit_retry;
+ struct completion complete;
+ struct llist_node llist;
+
+#ifdef CONFIG_TRINITY_SCHED_VD
+ struct hlist_node hlist;
+#endif
+
+ ktime_t time_started;
+ bool is_kernel;
+ bool skip_iommu_mapping;
+ uint32_t poll_magic;
+
+ bool scheduled;
+
+ void *priv;
+};
+
+/**
+ * struct trinity_model_htable - A common hashtable to maintain models
+ * @ht_heads: A pointer to heads of this hashtable
+ * @hash_bits: The number of bits to use in hashing.
+ * @hash_size: The number of hash buckets.
+ */
+struct trinity_model_htable {
+ struct hlist_bl_head *ht_heads;
+ int hash_bits;
+ int hash_size;
+};
+
+static inline void trinity_set_bit(uint32_t bit, void __iomem *addr)
+{
+ uint32_t reg = 0;
+
+ reg |= bit;
+ iowrite32(reg, addr);
+}
+
+/**
+ * trinity_get_app_id - get a app_id for the current opened device
+ *
+ * Returns app_id (just returns its tgid for now).
+ */
+static inline int32_t trinity_get_app_id(void)
+{
+ return task_tgid_vnr(current);
+}
+
+/*
+ * Extern support functions
+ */
+extern int trinity_pm_runtime_init(struct device *dev);
+extern int trinity_pm_runtime_forbid(struct device *dev);
+extern void trinity_pm_runtime_allow(struct device *dev);
+extern void trinity_pm_runtime_attach(struct trinity_driver *drv);
+extern int trinity_get_dma_memory(struct device *dev, phys_addr_t *paddr,
+ dma_addr_t *daddr, size_t *size);
+extern int trinity_get_extern_memory(struct device *dev, phys_addr_t *paddr,
+ dma_addr_t *daddr, size_t *size);
+extern void trinity_reset_device(struct device *dev, bool do_test);
+extern void trinity_set_irq_affinity(int irq);
+extern void trinity_monitor_invalid_access(void);
+/*
+ * Trinity common functions
+ */
+int trinity_create_node(struct trinity_driver *drv);
+void trinity_destroy_node(struct trinity_driver *drv);
+int trinity_idu_load(struct trinity_driver *drv, const char *dirpath);
+void trinity_init_model_htable(const struct trinity_driver *drv,
+ struct trinity_model_htable *ht);
+int32_t trinity_get_app_id(void);
+void trinity_finish_req(struct trinity_driver *drv, struct trinity_req *req);
+phys_addr_t trinity_get_paddr(struct iommu_domain *domain, dma_addr_t daddr);
+struct trinity_sched_desc *get_trinity_sched(struct trinity_req *req);
+int trinity_wait_ready(struct trinity_driver *drv);
+
+/* File operations */
+int trinity_open(struct inode *inode, struct file *f);
+int trinity_release(struct inode *inode, struct file *f);
+long trinity_ioctl(struct file *f, unsigned int cmd, unsigned long arg);
+
+/* Device probing and removing */
+int trinity_probe(struct platform_device *pdev,
+ const struct trinity_desc *desc);
+int trinity_remove(struct platform_device *pdev,
+ const struct trinity_desc *desc);
+
+#ifdef CONFIG_TRINITY_SYSFS
+int trinity_sysfs_init(struct trinity_driver *drv);
+int trinity_sysfs_cleanup(struct trinity_driver *drv);
+#else
+static inline int trinity_sysfs_init(struct trinity_driver *drv)
+{
+ return 0;
+}
+
+static inline int trinity_sysfs_cleanup(struct trinity_driver *drv)
+{
+ return 0;
+}
+#endif
+
+#ifdef CONFIG_TRINITY_DEBUG
+int trinity_debug_init(void);
+void trinity_debug_exit(void);
+
+int trinity_debug_add(struct trinity_driver *drv);
+void trinity_debug_remove(struct trinity_driver *drv);
+void trinity_debug_clear(struct trinity_driver *drv, unsigned long msg_max);
+unsigned long trinity_debug_get_max(struct trinity_driver *drv);
+void trinity_debug_dump_msg(struct trinity_driver *drv, const char *fmt, ...);
+void trinity_debug_dump_model(struct trinity_driver *drv,
+ const struct trinity_model *model,
+ const char *fmt, ...);
+void trinity_debug_dump_input(struct trinity_driver *drv,
+ const struct trinity_input *input,
+ const char *fmt, ...);
+#else
+static inline int trinity_debug_init(void)
+{
+ return 0;
+}
+static inline void trinity_debug_exit(void)
+{
+}
+
+static inline int trinity_debug_add(struct trinity_driver *drv)
+{
+ return 0;
+}
+static inline void trinity_debug_remove(struct trinity_driver *drv)
+{
+}
+static inline void trinity_debug_clear(struct trinity_driver *drv,
+ unsigned long msg_max)
+{
+}
+static inline unsigned long trinity_debug_get_max(struct trinity_driver *drv)
+{
+ return 0;
+}
+static inline void trinity_debug_dump_msg(struct trinity_driver *drv,
+ const char *fmt, ...)
+{
+}
+static inline void trinity_debug_dump_model(struct trinity_driver *drv,
+ const struct trinity_model *model,
+ const char *fmt, ...)
+{
+}
+static inline void trinity_debug_dump_input(struct trinity_driver *drv,
+ const struct trinity_input *input,
+ const char *fmt, ...)
+{
+}
+#endif
+
+#endif /* __TRINITY_COMMON_H__ */
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0-only
+/**
+ * Copyright (C) 2020 Samsung Electronics
+ * Copyright (C) 2020 Dongju Chae <dongju.chae@samsung.com>
+ */
+/**
+ * @file trinity_debug.c
+ * @brief Implementation of debug functions for trinity drivers
+ * @date 19 Mar 2020
+ * @author Dongju Chae <dongju.chae@samsung.com>
+ * @bug No known bugs except for NYI items
+ */
+
+#include <linux/debugfs.h>
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+
+#include "trinity_common.h"
+#include "trinity_resv_mem.h"
+
+#define TRINITY_DEVVER(drv) (drv->desc->ver >> TRINITY_SHIFT_DEV)
+#define TRINITY_DEBUGFS_DIR ("trinity")
+#define TRINITY_DEBUGFS_MAX (1024UL)
+#define TRINITY_DEBUGFS_LENGTH (255)
+
+struct trinity_debugfs_msg {
+ char msg[TRINITY_DEBUGFS_LENGTH + 1]; /* including NULL */
+};
+
+struct trinity_debugfs_entry {
+ struct dentry *dentry;
+ spinlock_t lock;
+
+ unsigned long msg_max;
+ unsigned long msg_num;
+ unsigned long msg_off;
+
+ struct trinity_resv_mem msg_buf;
+};
+
+static struct dentry *trinity_debugfs;
+
+static size_t trinity_debug_append_app_id(struct trinity_driver *drv, char *msg)
+{
+ return snprintf(msg, TRINITY_DEBUGFS_LENGTH, "[%d] ",
+ trinity_get_app_id());
+}
+
+static char *trinity_debug_get_msg_buf(struct trinity_driver *drv)
+{
+ struct trinity_debugfs_entry *entry = drv->debugfs_pdata;
+ struct trinity_debugfs_msg *buf;
+
+ if (!entry || entry->msg_max == 0)
+ return NULL;
+
+ spin_lock(&entry->lock);
+ if (entry->msg_num == entry->msg_max) {
+ buf = &((struct trinity_debugfs_msg *)
+ entry->msg_buf.vaddr)[entry->msg_off];
+ entry->msg_off = (entry->msg_off + 1) % entry->msg_max;
+ } else {
+ buf = &((struct trinity_debugfs_msg *)
+ entry->msg_buf.vaddr)[entry->msg_num++];
+ }
+ spin_unlock(&entry->lock);
+
+ memset(buf, '\x00', sizeof(*buf));
+ return buf->msg;
+}
+
+void trinity_debug_dump_msg(struct trinity_driver *drv, const char *fmt, ...)
+{
+ char *msg;
+ size_t len;
+ va_list args;
+
+ msg = trinity_debug_get_msg_buf(drv);
+ if (msg == NULL)
+ return;
+
+ len = trinity_debug_append_app_id(drv, msg);
+
+ va_start(args, fmt);
+ len += vsnprintf(msg + len, TRINITY_DEBUGFS_LENGTH - len, fmt, args);
+ va_end(args);
+
+ if (drv->verbose > 0)
+ dev_info(drv_to_dev_ptr(drv), msg);
+}
+
+void trinity_debug_dump_model(struct trinity_driver *drv,
+ const struct trinity_model *model,
+ const char *fmt, ...)
+{
+ char *msg;
+ size_t len;
+ va_list args;
+
+ msg = trinity_debug_get_msg_buf(drv);
+ if (msg == NULL)
+ return;
+
+ len = trinity_debug_append_app_id(drv, msg);
+
+ va_start(args, fmt);
+ len += vsnprintf(msg + len, TRINITY_DEBUGFS_LENGTH - len, fmt, args);
+ va_end(args);
+
+ len += snprintf(
+ msg + len, TRINITY_DEBUGFS_LENGTH - len,
+ "\n\tid(0x%llx) dbuf_fd(%d) program_offset_addr(0x%llx) program_size(0x%llx)\n",
+ model->config.id, model->config.dbuf_fd,
+ model->config.program_offset_addr, model->config.program_size);
+ if (TRINITY_DEVVER(drv) == 1) {
+ len += snprintf(msg + len, TRINITY_DEBUGFS_LENGTH - len,
+ "\tweight_offset_addr(0x%llx)",
+ model->config.weight_offset_addr);
+ } else if (TRINITY_DEVVER(drv) == 2) {
+ len += snprintf(
+ msg + len, TRINITY_DEBUGFS_LENGTH - len,
+ "\tmetadata_dbuf_fd(%d) metadata_ext_dbuf_fd(%d) metadata_ext_size(0x%llx)",
+ model->config.metadata_dbuf_fd,
+ model->config.metadata_ext_dbuf_fd,
+ model->config.metadata_ext_size);
+ }
+
+ if (drv->verbose > 0)
+ dev_info(drv_to_dev_ptr(drv), msg);
+}
+
+void trinity_debug_dump_input(struct trinity_driver *drv,
+ const struct trinity_input *input,
+ const char *fmt, ...)
+{
+ char *msg;
+ size_t len;
+ va_list args;
+
+ msg = trinity_debug_get_msg_buf(drv);
+ if (msg == NULL)
+ return;
+
+ len = trinity_debug_append_app_id(drv, msg);
+
+ va_start(args, fmt);
+ len += vsnprintf(msg + len, TRINITY_DEBUGFS_LENGTH - len, fmt, args);
+ va_end(args);
+
+ len += snprintf(msg + len, TRINITY_DEBUGFS_LENGTH - len,
+ "\n\tdbuf_fd(%d) model_id(0x%llx)\n",
+ input->config.dbuf_fd, input->config.model_id);
+ if (TRINITY_DEVVER(drv) == 1) {
+ len += snprintf(
+ msg + len, TRINITY_DEBUGFS_LENGTH - len,
+ "\tactivation_offset_addr0(0x%llx) activation_offset_addr1(0x%llx)",
+ input->config.activation_offset_addr0,
+ input->config.activation_offset_addr1);
+ } else if (TRINITY_DEVVER(drv) == 2) {
+ len += snprintf(
+ msg + len, TRINITY_DEBUGFS_LENGTH - len,
+ "\ttimeout_ms(%lld) priority(%u) num_segments(%u) input_mode(%d) output_mode(%d)",
+ input->config.timeout_ms, input->config.priority,
+ input->config.num_segments, input->config.input_mode,
+ input->config.output_mode);
+ }
+
+ if (drv->verbose > 0)
+ dev_info(drv_to_dev_ptr(drv), msg);
+}
+
+static int trinity_debugfs_show(struct seq_file *s, void *unsed)
+{
+ struct trinity_driver *drv = s->private;
+ struct trinity_debugfs_entry *entry = drv->debugfs_pdata;
+ struct trinity_debugfs_msg *msg;
+ unsigned long i, offset;
+
+ spin_lock(&entry->lock);
+ for (i = 0; i < entry->msg_num; i++) {
+ offset = (entry->msg_off + i) % entry->msg_max;
+ msg = &((struct trinity_debugfs_msg *)
+ entry->msg_buf.vaddr)[offset];
+
+ seq_puts(s, msg->msg);
+ seq_puts(s, "\n");
+ }
+ spin_unlock(&entry->lock);
+
+ return 0;
+}
+
+static int trinity_debugfs_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, trinity_debugfs_show, inode->i_private);
+}
+
+static const struct file_operations trinity_debugfs_fops = {
+ .open = trinity_debugfs_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
+int trinity_debug_add(struct trinity_driver *drv)
+{
+ struct trinity_debugfs_entry *entry;
+ struct dentry *dentry;
+ const char *name = drv->name;
+
+ if (name == NULL)
+ return -EINVAL;
+
+ entry = kzalloc(sizeof(*entry), GFP_KERNEL);
+ if (!entry)
+ return -ENOMEM;
+
+ dentry = debugfs_create_file_unsafe(name, 0400, trinity_debugfs, drv,
+ &trinity_debugfs_fops);
+ if (IS_ERR(dentry)) {
+ kfree(entry);
+ return PTR_ERR(dentry);
+ }
+
+ entry->dentry = dentry;
+ spin_lock_init(&entry->lock);
+
+ drv->debugfs_pdata = entry;
+
+ return 0;
+}
+
+void trinity_debug_remove(struct trinity_driver *drv)
+{
+ struct trinity_debugfs_entry *entry = drv->debugfs_pdata;
+
+ trinity_debug_clear(drv, 0);
+
+ debugfs_remove(entry->dentry);
+ kfree(entry);
+
+ drv->debugfs_pdata = NULL;
+}
+
+void trinity_debug_clear(struct trinity_driver *drv, unsigned long msg_max)
+{
+ struct trinity_debugfs_entry *entry = drv->debugfs_pdata;
+ struct device *dev = drv_to_dev_ptr(drv);
+ size_t size;
+
+ /* maximum size limit: 256KiB */
+ if (msg_max > TRINITY_DEBUGFS_MAX) {
+ dev_err(dev, "Too much debugfs entries (limit: %lu)",
+ TRINITY_DEBUGFS_MAX);
+ return;
+ }
+
+ spin_lock(&entry->lock);
+
+ /* disable debugfs temporally */
+ trinity_free_from_resv_mem(&entry->msg_buf, false);
+ entry->msg_max = 0;
+ entry->msg_num = 0;
+ entry->msg_off = 0;
+
+ if (msg_max == 0)
+ goto out;
+
+ /* reallocate debugfs buffer */
+ size = PAGE_ALIGN(msg_max * sizeof(struct trinity_debugfs_msg));
+ if (trinity_alloc_from_resv_mem(size, &entry->msg_buf, false) < 0) {
+ dev_warn(dev, "No available reserved memory for debugfs");
+ goto out;
+ }
+ /* more available entries due to page size alignment */
+ entry->msg_max = size / sizeof(struct trinity_debugfs_msg);
+
+out:
+ spin_unlock(&entry->lock);
+}
+
+unsigned long trinity_debug_get_max(struct trinity_driver *drv)
+{
+ struct trinity_debugfs_entry *entry = drv->debugfs_pdata;
+ unsigned long msg_max;
+
+ spin_lock(&entry->lock);
+ msg_max = entry->msg_max;
+ spin_unlock(&entry->lock);
+
+ return msg_max;
+}
+
+int trinity_debug_init(void)
+{
+ struct dentry *entry;
+
+ entry = debugfs_create_dir(TRINITY_DEBUGFS_DIR, NULL);
+ if (IS_ERR(entry))
+ return PTR_ERR(entry);
+
+ trinity_debugfs = entry;
+
+ return 0;
+}
+
+void trinity_debug_exit(void)
+{
+ debugfs_remove_recursive(trinity_debugfs);
+}
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0-only
+/**
+ * An abstraction layer to handle DMA memory buffers for Trinity device driver
+ *
+ * Copyright (C) 2020 Samsung Electronics
+ * Copyright (C) 2020 Wook Song <wook16.song@samsung.com>
+ * Copyright (C) 2020 Dongju Chae <dongju.chae@samsung.com>
+ */
+
+#include <linux/dma-buf.h>
+#include <linux/mutex.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/version.h>
+
+#include "trinity_hwmem.h"
+#include "trinity_hwmem_iommu_helper.h"
+#include "trinity_resv_mem.h"
+
+#define dbuf_to_trnt_hwmem(d) ((struct trinity_hwmem *)d->priv)
+#define vma_to_trnt_hwmem(v) ((struct trinity_hwmem *)v->vm_private_data)
+
+/**
+ * struct trinity_hwmem - A data structure for Trinity DMA buffer management
+ * @dev: A pointer to device which this hwmem belongs to.
+ * @dbuf: The dma_buf instance.
+ * @refcnt: Reference counts.
+ * @direction: A variable indicating the DMA data direction in allocating this
+ * dma_buf.
+ * @attrs: Attributes used in allocating this dma_buf.
+ * @req_size: The size of the DMA buffer that the user request to allocate.
+ * @alc_size: The size of the DMA buffer which is actually allocated.
+ * @addr: The DMA (physical) address of this dma_buf.
+ * @cookie: The DMA cookies.
+ */
+struct trinity_hwmem {
+ struct device *dev;
+ struct dma_buf *dbuf;
+ struct kref refcnt;
+
+ enum dma_data_direction direction;
+ enum trinity_hwmem_type type;
+
+ unsigned long attrs;
+ size_t req_size;
+ size_t alc_size;
+
+ bool is_cont;
+ dma_addr_t addr;
+ void *cookie;
+};
+
+static void __trinity_hwmem_free(struct kref *refcnt)
+{
+ struct trinity_hwmem *mem =
+ container_of(refcnt, struct trinity_hwmem, refcnt);
+ /**
+ * when the dmabuf reference counter becomes zero,
+ * trinity_hwmem_dbuf_ops_release() is triggered.
+ */
+ dma_buf_put(mem->dbuf);
+}
+
+static void __trinity_hwmem_put(struct trinity_hwmem *mem)
+{
+ kref_put(&mem->refcnt, __trinity_hwmem_free);
+}
+
+static void __trinity_hwmem_put_dmabuf(struct dma_buf *dbuf)
+{
+ __trinity_hwmem_put(dbuf_to_trnt_hwmem(dbuf));
+}
+
+static struct trinity_hwmem *__trinity_hwmem_get(struct trinity_hwmem *mem)
+{
+ kref_get(&mem->refcnt);
+
+ return mem;
+}
+
+static void trinity_hwmem_dbuf_ops_detach(struct dma_buf *dbuf,
+ struct dma_buf_attachment *attachment)
+{
+ struct trinity_hwmem *mem = dbuf_to_trnt_hwmem(dbuf);
+
+ /* Decrease ref count of the backing storage */
+ __trinity_hwmem_put(mem);
+}
+
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 19, 0))
+static int trinity_hwmem_dbuf_ops_attach(struct dma_buf *dbuf,
+ struct device *dev,
+ struct dma_buf_attachment *attachment)
+#else
+static int trinity_hwmem_dbuf_ops_attach(struct dma_buf *dbuf,
+ struct dma_buf_attachment *attachment)
+#endif
+{
+ struct trinity_hwmem *mem = dbuf_to_trnt_hwmem(dbuf);
+
+ /* Increase ref count of the backing storage */
+ mem = __trinity_hwmem_get(mem);
+ attachment->priv = mem;
+
+ return 0;
+}
+
+static struct sg_table *
+trinity_hwmem_dbuf_ops_map_dma_buf(struct dma_buf_attachment *attachment,
+ enum dma_data_direction dir)
+{
+ return NULL;
+}
+
+static void
+trinity_hwmem_dbuf_ops_unmap_dma_buf(struct dma_buf_attachment *attachment,
+ struct sg_table *sgt,
+ enum dma_data_direction dir)
+{
+}
+
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 19, 0))
+/**
+ * trinity_hwmem_dbuf_ops_map_atomic() - Implementation of the .map_atomic
+ * callback in &struct dma_buf_ops
+ * @dbuf: A pointer to the instance of &struct dma_buf to map.
+ * @pgnum: The number of pages to map.
+ *
+ * This is a mandatory callback to be implemented in the current kernel version
+ * (v4.12.0), but not used Trinity internally and deprecated since v4.19.
+ */
+static inline void *trinity_hwmem_dbuf_ops_map_atomic(struct dma_buf *dbuf,
+ unsigned long pgnum)
+{
+ return NULL;
+}
+
+/**
+ * trinity_hwmem_dbuf_ops_map() - Implementation of the .map callback in &struct
+ * dma_buf_ops
+ * @dbuf: A pointer to the instance of &struct dma_buf to map.
+ * @pgnum: The number of pages to map.
+ *
+ * This is a mandatory callback to be implemented in the current kernel version
+ * (v4.12.0), but not used Trinity internally and deprecated since v4.19.
+ */
+static inline void *trinity_hwmem_dbuf_ops_map(struct dma_buf *dbuf,
+ unsigned long pgnum)
+{
+ return NULL;
+}
+#endif
+
+static void trinity_hwmem_vm_ops_open(struct vm_area_struct *vma)
+{
+ struct trinity_hwmem *mem = vma_to_trnt_hwmem(vma);
+
+ __trinity_hwmem_get(mem);
+}
+
+static void trinity_hwmem_vm_ops_close(struct vm_area_struct *vma)
+{
+ struct trinity_hwmem *mem = vma_to_trnt_hwmem(vma);
+
+ __trinity_hwmem_put(mem);
+}
+
+static const struct vm_operations_struct trinity_hwmem_vm_ops = {
+ .open = trinity_hwmem_vm_ops_open,
+ .close = trinity_hwmem_vm_ops_close,
+};
+
+static int32_t trinity_hwmem_dbuf_ops_mmap(struct dma_buf *dbuf,
+ struct vm_area_struct *vma)
+{
+ struct trinity_hwmem *mem;
+ int32_t ret;
+
+ if (!dbuf)
+ return -EINVAL;
+
+ mem = dbuf_to_trnt_hwmem(dbuf);
+ if (!mem)
+ return -EINVAL;
+
+ vma->vm_pgoff = 0;
+ if (mem->type == TRINITY_HWMEM_DMA_CONT)
+ ret = trinity_mmap_from_resv_mem(vma, mem->cookie,
+ mem->alc_size, mem->is_cont);
+ else
+ ret = dma_mmap_attrs(mem->dev, vma, mem->cookie, mem->addr,
+ mem->alc_size, mem->attrs);
+ if (ret)
+ return ret;
+
+ vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP;
+ vma->vm_private_data = mem;
+ vma->vm_ops = &trinity_hwmem_vm_ops;
+
+ vma->vm_ops->open(vma);
+
+ return 0;
+}
+
+static void trinity_hwmem_dbuf_ops_release(struct dma_buf *dbuf)
+{
+ struct trinity_hwmem *mem = dbuf_to_trnt_hwmem(dbuf);
+
+#ifdef CONFIG_TRINITY_FPGA
+ if (trinity_hwmem_iommu_unmap(mem->dev, mem->addr, mem->alc_size) < 0)
+ dev_warn(mem->dev, "Unable to unmap iommu mapping for 0x%llx",
+ mem->addr);
+#endif
+
+ if (mem->type == TRINITY_HWMEM_DMA_CONT) {
+ struct trinity_resv_mem resv_mem;
+
+ resv_mem.vaddr = mem->cookie;
+ resv_mem.daddr = mem->addr;
+ resv_mem.size = mem->alc_size;
+
+ trinity_free_from_resv_mem(&resv_mem, mem->is_cont);
+ } else {
+ dma_free_attrs(mem->dev, mem->alc_size, mem->cookie, mem->addr,
+ mem->attrs);
+ }
+ put_device(mem->dev);
+
+ mem->dbuf->priv = NULL;
+
+ kfree(mem);
+}
+
+static void *trinity_hwmem_dbuf_ops_vmap(struct dma_buf *dbuf)
+{
+ struct trinity_hwmem *mem;
+
+ if (!dbuf)
+ return NULL;
+
+ mem = dbuf_to_trnt_hwmem(dbuf);
+ if (!mem)
+ return NULL;
+
+ return mem->cookie;
+}
+
+static struct dma_buf_ops trinity_hwmem_dbuf_ops = {
+ .vmap = trinity_hwmem_dbuf_ops_vmap,
+ .attach = trinity_hwmem_dbuf_ops_attach,
+ .detach = trinity_hwmem_dbuf_ops_detach,
+ .map_dma_buf = trinity_hwmem_dbuf_ops_map_dma_buf,
+ .unmap_dma_buf = trinity_hwmem_dbuf_ops_unmap_dma_buf,
+ .release = trinity_hwmem_dbuf_ops_release,
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 19, 0))
+ .map = trinity_hwmem_dbuf_ops_map,
+ .map_atomic = trinity_hwmem_dbuf_ops_map_atomic,
+#endif
+ .mmap = trinity_hwmem_dbuf_ops_mmap,
+};
+
+static void *__trinity_hwmem_alloc(struct device *dev, const size_t size,
+ const enum dma_data_direction dir,
+ const enum trinity_hwmem_type type)
+{
+ size_t aligned_size = ALIGN(size, PAGE_SIZE);
+ struct trinity_hwmem *mem;
+ struct trinity_resv_mem resv_mem;
+ int ret;
+
+ if (WARN_ON(!dev))
+ return ERR_PTR(-EINVAL);
+
+ mem = kzalloc(sizeof(*mem), GFP_KERNEL);
+ if (!mem)
+ return ERR_PTR(-ENOMEM);
+
+ mem->dev = get_device(dev);
+ mem->req_size = size;
+ mem->alc_size = aligned_size;
+ mem->direction = dir;
+ mem->type = TRINITY_HWMEM_DMA_IOMMU;
+ mem->is_cont = (type == TRINITY_HWMEM_DMA_CONT);
+
+ mem->attrs |= DMA_ATTR_WRITE_COMBINE;
+ mem->attrs |= DMA_ATTR_SKIP_CPU_SYNC;
+
+ /**
+ * Trying to alloc memery from resv mem first regardless of hwmem type.
+ * But, the resv allocator should preserve a minimum space for vISA programs
+ * because they should be physically contiguous.
+ */
+ ret = trinity_alloc_from_resv_mem(aligned_size, &resv_mem,
+ mem->is_cont);
+ if (ret == 0) {
+ mem->addr = resv_mem.daddr;
+ mem->cookie = resv_mem.vaddr;
+ mem->type = TRINITY_HWMEM_DMA_CONT;
+ } else if (!mem->is_cont) {
+ mem->cookie = dma_alloc_attrs(dev, aligned_size, &mem->addr,
+ GFP_KERNEL, mem->attrs);
+ } else {
+ dev_err(mem->dev,
+ "Unable alloc contiguous memory for program: %zu\n",
+ size);
+ }
+
+ if (!mem->cookie) {
+ ret = -ENOMEM;
+ goto free_mem;
+ }
+
+ kref_init(&mem->refcnt);
+
+#ifdef CONFIG_TRINITY_FPGA
+ if (trinity_hwmem_iommu_map(mem->dev, mem->addr, mem->alc_size) < 0)
+ dev_warn(mem->dev, "Unable to map iommu mapping for 0x%llx",
+ mem->addr);
+#endif
+
+ return mem;
+
+free_mem:
+ kfree(mem);
+
+ return ERR_PTR(ret);
+}
+
+static struct dma_buf *__trinity_hwmem_get_dmabuf(struct trinity_hwmem *mem,
+ unsigned long flags)
+{
+ DEFINE_DMA_BUF_EXPORT_INFO(einfo);
+ struct dma_buf *dbuf;
+
+ einfo.ops = &trinity_hwmem_dbuf_ops;
+ einfo.size = mem->alc_size;
+ einfo.flags = flags;
+ einfo.priv = (void *)mem;
+
+ dbuf = dma_buf_export(&einfo);
+ if (IS_ERR(dbuf))
+ return dbuf;
+
+ /* Increase ref count of the backing storage */
+ dbuf->priv = (void *)__trinity_hwmem_get(mem);
+ mem->dbuf = dbuf;
+
+ return dbuf;
+}
+
+int32_t trinity_hwmem_alloc(struct device *dev, const size_t size,
+ enum trinity_hwmem_type type)
+{
+ struct trinity_hwmem *mem;
+ struct dma_buf *dbuf;
+ int32_t ret;
+
+ mem = __trinity_hwmem_alloc(dev, size, DMA_BIDIRECTIONAL, type);
+ if (IS_ERR(mem))
+ return PTR_ERR(mem);
+
+ dbuf = __trinity_hwmem_get_dmabuf(mem, O_CLOEXEC | O_RDWR);
+ if (IS_ERR(dbuf)) {
+ ret = PTR_ERR(dbuf);
+ goto err_put_mem;
+ }
+
+ ret = dma_buf_fd(dbuf, O_CLOEXEC);
+ if (ret < 0)
+ goto err_put_mem;
+
+ return ret;
+
+err_put_mem:
+ __trinity_hwmem_put(mem);
+
+ return ret;
+}
+
+int32_t trinity_hwmem_free(struct device *dev, const int32_t fd)
+{
+ struct dma_buf *dbuf;
+
+ dbuf = dma_buf_get(fd);
+ if (!IS_ERR(dbuf)) {
+ struct trinity_hwmem *mem = dbuf_to_trnt_hwmem(dbuf);
+
+ /* Counter part of __trinity_hwmem_get() in __trinity_hwmem_get_dmabuf() */
+ __trinity_hwmem_put_dmabuf(dbuf);
+ /* Counter part of __trinity_hwmem_get() in __trinity_hwmem_alloc() */
+ __trinity_hwmem_put(mem);
+
+ dma_buf_put(dbuf);
+
+ return 0;
+ }
+
+ dev_err(dev,
+ "failed to free the dma_buf structure realted to fd with %ld\n",
+ PTR_ERR(dbuf));
+
+ return PTR_ERR(dbuf);
+}
+
+/**
+ * trinity_hwmem_import_dmabuf_begin() - Defines the beginning of a section to
+ * import a given DMA buffer file descriptor.
+ * @dev: A pointer to the instance of the device to be attached the DMA buffer
+ * @dbuf_fd: The file descriptor of the DMA buffer to be imported.
+ * @import_info: If importing is successful, information such as the DMA
+ * address, the virtual address which is mapped to the DMA address,
+ * &struct dma_buf_attachment, a scatter-gather table, and &struct
+ * dma_buf corresponding to the file descriptor will be passed
+ * using this parameter.
+ *
+ * Return: 0 on success. Ohterwise, returns negative error.
+ */
+int32_t
+trinity_hwmem_import_dmabuf_begin(struct device *dev, const int32_t dbuf_fd,
+ struct trinity_hwmem_import *import_info)
+{
+ struct dma_buf_attachment *attachment;
+ struct dma_buf *buf;
+ struct trinity_hwmem *mem;
+ int32_t ret;
+
+ if (!import_info)
+ return -EINVAL;
+
+ buf = dma_buf_get(dbuf_fd);
+ if (IS_ERR(buf))
+ return PTR_ERR(buf);
+
+ attachment = dma_buf_attach(buf, dev);
+ if (IS_ERR(attachment)) {
+ ret = PTR_ERR(attachment);
+ goto err_dbuf_put;
+ }
+
+ mem = attachment->priv;
+ import_info->dma_addr = mem->addr;
+ import_info->addr = dma_buf_vmap(buf);
+ import_info->attachment = attachment;
+ import_info->buf = buf;
+
+ return 0;
+
+err_dbuf_put:
+ dma_buf_put(buf);
+
+ return ret;
+}
+
+/**
+ * trinity_hwmem_import_dmabuf_end() - Defines the ending of the section related
+ * to the given pointer to &strut trinity_hwmem_import.
+ * @import_info: Importing information related to the section to be ended.
+ */
+void trinity_hwmem_import_dmabuf_end(struct trinity_hwmem_import *import_info)
+{
+ if (!import_info || !import_info->buf)
+ return;
+ dma_buf_vunmap(import_info->buf, import_info->addr);
+ dma_buf_detach(import_info->buf, import_info->attachment);
+ dma_buf_put(import_info->buf);
+}
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0-only
+/**
+ * Copyright (C) 2020 Samsung Electronics
+ * Copyright (C) 2020 Wook Song <wook16.song@samsung.com>
+ * Copyright (C) 2020 Dongju Chae <dongju.chae@samsung.com>
+ */
+
+#ifndef __DRIVERS_MISC_TRINITY_HWMEM_H__
+#define __DRIVERS_MISC_TRINITY_HWMEM_H__
+
+#include <linux/dma-buf.h>
+#include <linux/dma-mapping.h>
+#include <linux/kref.h>
+
+#include "trinity.h"
+
+/**
+ * struct trinity_hwmem_import - A data structure to maintin imported hwmem
+ * (that is Trinity DMA buffer).
+ * @dma_addr: The physical DMA address of this DMA buffer.
+ * @addr: A virtual address of this DMA buffer.
+ * @attachment: A pointer to &struct dma_buf_attachment.
+ * @buf: &struct dma_buf that this hwmem wrapped.
+ */
+struct trinity_hwmem_import {
+ dma_addr_t dma_addr;
+ void *addr;
+ struct dma_buf_attachment *attachment;
+ struct dma_buf *buf;
+};
+
+int32_t trinity_hwmem_import_dmabuf_begin(struct device *, const int32_t,
+ struct trinity_hwmem_import *);
+void trinity_hwmem_import_dmabuf_end(struct trinity_hwmem_import *);
+
+int32_t trinity_hwmem_alloc(struct device *, const size_t,
+ enum trinity_hwmem_type type);
+int32_t trinity_hwmem_free(struct device *, const int32_t);
+
+#endif /* __DRIVERS_MISC_TRINITY_HWMEM_H__ */
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * IOMMU device driver for Samsung Research NPU device family
+ *
+ * Copyright (C) 2020 Samsung Electronics
+ * Copyright (C) 2020 Wook Song <wook16.song@samsung.com>
+ * Copyright (C) 2020 Dongju Chae <dongju.chae@samsung.com>
+ */
+
+#include <linux/bitmap.h>
+#include <linux/dma-mapping.h>
+#include <linux/dmapool.h>
+#include <linux/init.h>
+#include <linux/io.h>
+#include <linux/memory.h>
+#include <linux/of.h>
+#include <linux/of_platform.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+#include <linux/spinlock_types.h>
+
+#include "trinity_hwmem_iommu_helper.h"
+
+#define CHECK_BITS(val, mask) ((val & mask) == mask)
+
+/* Register offsets for SRNPU-IOMMU (only for Triv2) */
+#define BASE_OFFSET_IOMMU_DLA (0x1000)
+#define BASE_OFFSET_IOMMU_DSP (0x2000)
+
+#define OFFSET_IOMMU_CTRL (0x00)
+#define OFFSET_IOMMU_STATUS (0x04)
+#define OFFSET_IOMMU_FLPT_BASE (0x08)
+#define OFFSET_IOMMU_ALL_INVALIDATION (0x0C)
+#define OFFSET_IOMMU_VPN_INVALIDATION (0x10)
+#define OFFSET_IOMMU_IFLT_STAT (0x14)
+#define OFFSET_IOMMU_IFLT_VA (0x18)
+#define OFFSET_IOMMU_OFLT_STAT (0x1C)
+#define OFFSET_IOMMU_OFLT_VA (0x20)
+#define OFFSET_IOMMU_TLB_READ (0x24)
+#define OFFSET_IOMMU_TLB_TAG (0x28)
+#define OFFSET_IOMMU_TLB_PPN (0x2C)
+
+#define MASK_PTE_MAPPED BIT_MASK(0)
+
+#define MASK_FLPTE_MAP_1M BIT_MASK(1)
+#define MASK_FLPTE_PPN_MASK_1M GENMASK(35, 20)
+#define RSHFT_FLPTE_PPN_1M (20)
+#define LSHFT_FLPTE_PPN_1M (16)
+
+#define MASK_FLPTE_MAP_2M BIT_MASK(2)
+#define MASK_FLPTE_PPN_MASK_2M GENMASK(35, 21)
+#define RSHFT_FLPTE_PPN_2M (21)
+#define LSHFT_FLPTE_PPN_2M (17)
+
+#define MASK_FLPTE_MAP_16M GENMASK(2, 1)
+#define MASK_FLPTE_PPN_MASK_16M GENMASK(35, 24)
+#define RSHFT_FLPTE_PPN_16M (24)
+#define LSHFT_FLPTE_PPN_16M (20)
+
+#define MASK_SLPTE_MAP_4K 0
+#define MASK_SLPTE_PPN_MASK_4K GENMASK(35, 12)
+#define RSHFT_SLPTE_PPN_4K (12)
+#define LSHFT_SLPTE_PPN_4K (8)
+
+#define MASK_SLPTE_MAP_64K BIT_MASK(1)
+#define MASK_SLPTE_PPN_MASK_64K GENMASK(35, 16)
+#define RSHFT_SLPTE_PPN_64K (16)
+#define LSHFT_SLPTE_PPN_64K (12)
+
+#define MASK_SLPT_BASE_TO_FLPTE GENMASK(35, 10)
+#define RSHFT_SLPT_BASE_TO_FLPTE (10)
+#define LSHFT_SLPT_BASE_TO_FLPTE (6)
+#define MASK_FLPTE_TO_SLPT_BASE GENMASK(31, 6)
+#define RSHFT_FLPTE_TO_SLPT_BASE (6)
+#define LSHFT_FLPTE_TO_SLPT_BASE (10)
+
+#define FLPT_DMAADDR_TO_REGVAL(x) ((x >> 14) << 10)
+#define FLPT_NUM_PTES BIT(12)
+#define FLPT_PTE_SIZE (SZ_4)
+#define FLPT_SIZE (FLPT_NUM_PTES * FLPT_PTE_SIZE)
+#define SLPT_NUM_PTES BIT(8)
+#define SLPT_PTE_SIZE (SZ_4)
+#define SLPT_SIZE (SLPT_NUM_PTES * SLPT_PTE_SIZE)
+#define RSHFT_SIZE_TO_NUM_PTE (20)
+#define RSHFT_SIZE_TO_NUM_LV2PTE (12)
+#define MASK_IOVA_TO_VPN GENMASK(31, 20)
+#define RSHFT_IOVA_TO_VPN (20)
+#define MASK_IOVA_TO_LV2VPN GENMASK(19, 12)
+#define RSHFT_IOVA_TO_LV2VPN (12)
+#define MASK_IOVA_TO_TLBVPN GENMASK(31, 14)
+#define MASK_IOVA_TO_PAGEOFFSET_4K GENMASK(11, 0)
+#define MASK_IOVA_TO_PAGEOFFSET_1M GENMASK(19, 0)
+#define RSHFT_IOVA_TO_TLBVPN (14)
+#define LSHFT_TLBVPN_TO_REGVAL (14)
+
+#define FLPT_PAGE_FAULT BIT(3)
+#define SLPT_PAGE_FAULT BIT(2)
+#define PTW_ACCESS_FAULT BIT(1)
+#define ATU_ACCESS_FAULT BIT(0)
+
+#define MASK_TLB_READ_BUF BIT_MASK(13)
+#define MASK_TLB_READ_CH BIT_MASK(12)
+#define MASK_TLB_READ_LANE GENMASK(9, 8)
+#define LSHFT_TLB_READ_LANE (8)
+#define MASK_TLB_READ_LINE GENMASK(7, 0)
+#define LSHFT_TLB_READ_LINE (0)
+
+#define MASK_TLB_PPN_PPN GENMASK(31, 8)
+#define RSHFT_TLB_PPN_PPN (8)
+#define MASK_TLB_TAG_VPN GENMASK(31, 12)
+#define RSHFT_TLB_TAG_VPN (12)
+#define MASK_TLB_TAG_PS GENMASK(6, 4)
+#define RSHFT_TLB_TAG_PS (4)
+#define MASK_TLB_TAG_LANE_MAPPED BIT_MASK(1)
+#define RSHFT_TLB_TAG_LANE_MAPPED (1)
+#define MASK_TLB_TAG_LINE_VALID BIT_MASK(0)
+#define RSHFT_TLB_TAG_LINE_VALID (0)
+
+struct trinity_hwmem_iommu_info {
+ struct device *dev;
+ spinlock_t lock;
+ void __iomem *regbase;
+ void *flpt;
+ dma_addr_t flpt_dma_addr;
+ struct dma_pool *slpt_cache;
+ void *dma_region_virt_base;
+ phys_addr_t dma_region_phys_base;
+ dma_addr_t dma_region_dma_base;
+ struct kref *slpt_refcnts;
+ struct list_head node;
+};
+
+static LIST_HEAD(iommu_info_list);
+static DEFINE_SPINLOCK(iommu_info_list_lock);
+
+static inline void enable_iommu(struct trinity_hwmem_iommu_info *info)
+{
+ uint32_t val = 0x1;
+
+ iowrite32(val,
+ info->regbase + BASE_OFFSET_IOMMU_DLA + OFFSET_IOMMU_CTRL);
+ iowrite32(val,
+ info->regbase + BASE_OFFSET_IOMMU_DSP + OFFSET_IOMMU_CTRL);
+}
+
+static inline uint32_t iova_to_vpn(size_t iova)
+{
+ uint32_t ret = iova;
+
+ ret &= MASK_IOVA_TO_VPN;
+ ret >>= RSHFT_IOVA_TO_VPN;
+
+ return ret;
+}
+
+static inline uint32_t iova_to_lv2vpn(dma_addr_t iova)
+{
+ iova &= MASK_IOVA_TO_LV2VPN;
+ iova >>= RSHFT_IOVA_TO_LV2VPN;
+
+ return iova;
+}
+
+static inline uint32_t iova_to_tlbvpn(dma_addr_t iova)
+{
+ iova &= MASK_IOVA_TO_TLBVPN;
+ iova >>= RSHFT_IOVA_TO_TLBVPN;
+
+ return iova;
+}
+
+static inline dma_addr_t virt_to_dma(void *virt_base, dma_addr_t da_base,
+ void *va)
+{
+ return da_base + ((size_t)va - (size_t)virt_base);
+}
+
+static inline void *dma_to_virt(void *virt_base, dma_addr_t da_base,
+ dma_addr_t da)
+{
+ return virt_base + (da - da_base);
+}
+
+static inline uint32_t srnpu_iommu_get_pte(dma_addr_t addr, size_t size)
+{
+ uint32_t val = 0;
+
+ switch (size) {
+ case SZ_16M:
+ val |= MASK_FLPTE_MAP_16M;
+ addr &= MASK_FLPTE_PPN_MASK_16M;
+ val |= ((addr >> RSHFT_FLPTE_PPN_16M) << LSHFT_FLPTE_PPN_16M);
+ break;
+ case SZ_2M:
+ val |= MASK_FLPTE_MAP_2M;
+ addr &= MASK_FLPTE_PPN_MASK_2M;
+ val |= ((addr >> RSHFT_FLPTE_PPN_2M) << LSHFT_FLPTE_PPN_2M);
+ break;
+ case SZ_1M:
+ val |= MASK_FLPTE_MAP_1M;
+ addr &= MASK_FLPTE_PPN_MASK_1M;
+ val |= ((addr >> RSHFT_FLPTE_PPN_1M) << LSHFT_FLPTE_PPN_1M);
+ break;
+ case SZ_64K:
+ val |= MASK_SLPTE_MAP_64K;
+ addr &= MASK_SLPTE_PPN_MASK_64K;
+ val |= ((addr >> RSHFT_SLPTE_PPN_64K) << LSHFT_SLPTE_PPN_64K);
+ break;
+ case SZ_4K:
+ val |= MASK_SLPTE_MAP_4K;
+ addr &= MASK_SLPTE_PPN_MASK_4K;
+ val |= ((addr >> RSHFT_SLPTE_PPN_4K) << LSHFT_SLPTE_PPN_4K);
+ break;
+ default:
+ return 0;
+ }
+
+ val |= MASK_PTE_MAPPED;
+
+ return val;
+}
+
+static inline uint32_t flpte_to_slpt_base(const uint32_t flpte)
+{
+ uint32_t ret = 0;
+
+ ret = flpte & MASK_FLPTE_TO_SLPT_BASE;
+ ret >>= RSHFT_FLPTE_TO_SLPT_BASE;
+ ret <<= LSHFT_FLPTE_TO_SLPT_BASE;
+
+ return ret;
+}
+
+static inline phys_addr_t iova_to_phys(struct trinity_hwmem_iommu_info *info,
+ const dma_addr_t iova)
+{
+ phys_addr_t paddr = 0;
+ uint32_t vpn = iova_to_vpn(iova);
+ uint32_t *pte;
+
+ pte = &((uint32_t *)info->flpt)[vpn];
+
+ if (!pte || !(*pte & MASK_PTE_MAPPED))
+ return 0;
+
+ if (*pte & MASK_FLPTE_MAP_1M) {
+ paddr |= *pte;
+ paddr >>= LSHFT_FLPTE_PPN_1M;
+ paddr <<= RSHFT_FLPTE_PPN_1M;
+ paddr |= (iova & MASK_IOVA_TO_PAGEOFFSET_1M);
+ } else {
+ uint32_t *slpt_base = dma_to_virt(info->dma_region_virt_base,
+ info->dma_region_dma_base,
+ flpte_to_slpt_base(*pte));
+ uint32_t lv2vpn = iova_to_lv2vpn(iova);
+
+ paddr |= ioread32(&slpt_base[lv2vpn]);
+ paddr >>= LSHFT_SLPTE_PPN_4K;
+ paddr <<= RSHFT_SLPTE_PPN_4K;
+ paddr &= MASK_SLPTE_PPN_MASK_4K;
+ paddr |= (iova & MASK_IOVA_TO_PAGEOFFSET_4K);
+ }
+
+ return paddr;
+}
+
+static inline void *
+alloc_slpt_and_get_flpte(struct trinity_hwmem_iommu_info *info, uint32_t *flpte)
+{
+ dma_addr_t da;
+ void *slpt;
+
+ slpt = dma_alloc_wc(info->dev, SZ_1K, &da, GFP_KERNEL);
+ if (!slpt)
+ return ERR_PTR(-ENOMEM);
+ if (!IS_ALIGNED(da, SZ_1K)) {
+ dma_free_wc(info->dev, SZ_1K, slpt, da);
+ return ERR_PTR(-EINVAL);
+ }
+
+ da &= MASK_SLPT_BASE_TO_FLPTE;
+ da >>= RSHFT_SLPT_BASE_TO_FLPTE;
+ da <<= LSHFT_SLPT_BASE_TO_FLPTE;
+
+ *flpte = da;
+ memset(slpt, 0, SLPT_SIZE);
+
+ return slpt;
+}
+
+static struct trinity_hwmem_iommu_info *find_iommu_info(struct device *dev)
+{
+ struct trinity_hwmem_iommu_info *info = NULL;
+
+ spin_lock(&iommu_info_list_lock);
+ llist_for_each_entry (info, &iommu_info_list, node) {
+ if (info->dev == dev)
+ break;
+ }
+ spin_unlock(&iommu_info_list_lock);
+
+ return info;
+}
+
+static int iommu_map(struct trinity_hwmem_iommu_info *info, dma_addr_t daddr,
+ size_t size)
+{
+ uint32_t vpn = iova_to_vpn(daddr);
+ uint32_t lv2_vpn;
+ uint32_t flpte;
+ uint32_t val;
+ uint32_t *pte;
+ uint32_t *slpt;
+ uint32_t i, num_pte;
+
+ pte = &(((uint32_t *)info->flpt)[vpn]);
+ if (!pte) {
+ dev_err(info->dev, "%s: pte is NULL!\n", __func__);
+ return -EINVAL;
+ }
+
+ switch (size) {
+ case SZ_16M:
+ case SZ_2M:
+ case SZ_1M:
+ if (*pte & MASK_PTE_MAPPED) {
+ dev_err(info->dev,
+ "%s: iova 0x%llx is already mapped to phys 0x%llx\n",
+ __func__, daddr, daddr);
+ return -EADDRINUSE;
+ }
+
+ val = srnpu_iommu_get_pte(daddr, size);
+ if (!(val & MASK_PTE_MAPPED))
+ return -EINVAL;
+
+ num_pte = size >> RSHFT_SIZE_TO_NUM_PTE;
+ for (i = 0; i < num_pte; ++i)
+ iowrite32(val, &pte[i]);
+
+ break;
+ case SZ_64K:
+ case SZ_4K:
+ if (*pte & MASK_PTE_MAPPED) {
+ slpt = dma_to_virt(info->dma_region_virt_base,
+ info->dma_region_dma_base,
+ flpte_to_slpt_base(*pte));
+ kref_get(&info->slpt_refcnts[vpn]);
+ /* kref_get(&srnpu_iommu_slpt_refcnts[vpn]); */
+ } else {
+ slpt = alloc_slpt_and_get_flpte(info, &flpte);
+ if (IS_ERR(slpt)) {
+ return PTR_ERR(slpt);
+ }
+ /*bitlock here */
+ kref_init(&info->slpt_refcnts[vpn]);
+ /* kref_init(&srnpu_iommu_slpt_refcnts[vpn]); */
+ iowrite32((flpte | MASK_PTE_MAPPED), pte);
+ }
+ lv2_vpn = iova_to_lv2vpn(daddr);
+ slpt = &slpt[lv2_vpn];
+
+ val = srnpu_iommu_get_pte(daddr, size);
+ if (!(val & MASK_PTE_MAPPED))
+ return -EINVAL;
+
+ num_pte = size >> RSHFT_SIZE_TO_NUM_LV2PTE;
+ for (i = 0; i < num_pte; i++)
+ iowrite32(val, &slpt[i]);
+
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static void release_slpt(struct kref *kref)
+{
+ BUG_ON(kref_read(kref));
+}
+
+static int32_t iommu_unmap(struct trinity_hwmem_iommu_info *info,
+ dma_addr_t daddr, size_t size)
+{
+ uint32_t vpn = iova_to_vpn(daddr);
+ uint32_t *pte;
+ uint32_t val;
+ uint32_t i, num_pte;
+
+ pte = &(((uint32_t *)info->flpt)[vpn]);
+
+ switch (size) {
+ case SZ_16M:
+ case SZ_2M:
+ case SZ_1M:
+ num_pte = size >> RSHFT_SIZE_TO_NUM_PTE;
+ for (i = 0; i < num_pte; ++i) {
+ val = pte[i] ^ MASK_PTE_MAPPED;
+ iowrite32(val, &pte[i]);
+ }
+ break;
+ case SZ_64K:
+ case SZ_4K: {
+ void *slpt_base = dma_to_virt(info->dma_region_virt_base,
+ info->dma_region_dma_base,
+ flpte_to_slpt_base(*pte));
+
+ uint32_t lv2vpn = iova_to_lv2vpn(daddr);
+ uint32_t *slpt;
+
+ slpt = &((uint32_t *)slpt_base)[lv2vpn];
+ num_pte = size >> RSHFT_SIZE_TO_NUM_LV2PTE;
+ for (i = 0; i < num_pte; i++) {
+ val = slpt[i] ^ MASK_PTE_MAPPED;
+ iowrite32(val, &slpt[i]);
+ }
+
+ if (kref_put(&info->slpt_refcnts[vpn], release_slpt)) {
+ dma_free_wc(info->dev, PAGE_ALIGN(SLPT_SIZE), slpt_base,
+ flpte_to_slpt_base(*pte));
+ val = (*pte) ^ MASK_PTE_MAPPED;
+ iowrite32(val, pte);
+ }
+ break;
+ }
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+int32_t trinity_hwmem_iommu_map(struct device *dev, dma_addr_t addr,
+ size_t size)
+{
+ struct trinity_hwmem_iommu_info *info = NULL;
+ size_t size_remain = size;
+ size_t size_strd;
+
+ info = find_iommu_info(dev);
+ if (info == NULL) {
+ dev_err(dev, "Unable to find the iommu info");
+ return -ENOENT;
+ }
+
+ spin_lock(&info->lock);
+
+ while (size_remain > 0) {
+ if (size_remain >= SZ_16M)
+ size_strd = SZ_16M;
+ else if (size_remain >= SZ_2M)
+ size_strd = SZ_2M;
+ else if (size_remain >= SZ_1M)
+ size_strd = SZ_1M;
+ else if (size_remain >= SZ_64K)
+ size_strd = SZ_64K;
+ else
+ size_strd = SZ_4K;
+
+ if (iommu_map(info, addr, size_strd) < 0)
+ dev_warn(dev, "Unable to map iommu mapping for 0x%llx",
+ addr);
+
+ size_remain -= size_strd;
+ addr += size_strd;
+ }
+
+ spin_unlock(&info->lock);
+
+ return 0;
+}
+
+int32_t trinity_hwmem_iommu_unmap(struct device *dev, dma_addr_t addr,
+ size_t size)
+{
+ struct trinity_hwmem_iommu_info *info;
+ size_t size_remain = size;
+ size_t size_strd;
+
+ info = find_iommu_info(dev);
+ if (info == NULL) {
+ dev_err(dev, "Unable to find the iommu info");
+ return -ENOENT;
+ }
+
+ spin_lock(&info->lock);
+
+ while (size_remain > 0) {
+ if (size_remain >= SZ_16M)
+ size_strd = SZ_16M;
+ else if (size_remain >= SZ_2M)
+ size_strd = SZ_2M;
+ else if (size_remain >= SZ_1M)
+ size_strd = SZ_1M;
+ else if (size_remain >= SZ_64K)
+ size_strd = SZ_64K;
+ else
+ size_strd = SZ_4K;
+
+ if (iommu_unmap(info, addr, size_strd) < 0)
+ dev_warn(dev,
+ "Unable to unmap iommu mapping for 0x%llx",
+ addr);
+
+ size_remain -= size_strd;
+ addr += size_strd;
+ }
+
+ spin_unlock(&info->lock);
+
+ return 0;
+}
+
+int32_t trinity_hwmem_iommu_init(struct device *dev, void __iomem *regbase)
+{
+ struct trinity_hwmem_iommu_info *info;
+ struct device_node *np = dev->of_node;
+ struct property *prop;
+ uint64_t dma_info[3];
+ int32_t err;
+
+ /**
+ * In the case of the FPGA development board, let's asuume triv2 has
+ * its own DMA memory region.
+ */
+ prop = of_find_property(np, "samsung,dma", NULL);
+ if (!prop)
+ return -ENODEV;
+
+ err = of_property_read_u64_array(np, "samsung,dma", dma_info, 3);
+ if (err < 0)
+ return -EINVAL;
+
+ info = kzalloc(sizeof(*info), GFP_KERNEL);
+ if (!info)
+ return -ENOMEM;
+
+ info->regbase = regbase;
+ info->dma_region_phys_base = dma_info[0];
+ info->dma_region_dma_base = dma_info[1];
+
+ spin_lock_init(&info->lock);
+
+ info->flpt = dma_alloc_wc(dev, PAGE_ALIGN(FLPT_SIZE),
+ &info->flpt_dma_addr, GFP_KERNEL);
+ if (!info->flpt) {
+ err = -ENOMEM;
+ goto err_free;
+ } else if (!IS_ALIGNED(info->flpt_dma_addr, SZ_16K)) {
+ err = -ENOMEM;
+ goto err_free_flptr;
+ }
+
+ info->slpt_refcnts = devm_kzalloc(
+ dev, sizeof(*info->slpt_refcnts) * FLPT_NUM_PTES, GFP_KERNEL);
+ info->dma_region_virt_base =
+ (void *)((size_t)info->flpt -
+ (info->flpt_dma_addr - info->dma_region_dma_base));
+
+ info->dev = dev;
+
+ spin_lock(&iommu_info_list_lock);
+ list_add_tail(&info->node, &iommu_info_list);
+ spin_unlock(&iommu_info_list_lock);
+
+ /** Set FLPT base */
+ iowrite32(FLPT_DMAADDR_TO_REGVAL(info->flpt_dma_addr),
+ info->regbase + BASE_OFFSET_IOMMU_DLA +
+ OFFSET_IOMMU_FLPT_BASE);
+ iowrite32(FLPT_DMAADDR_TO_REGVAL(info->flpt_dma_addr),
+ info->regbase + BASE_OFFSET_IOMMU_DSP +
+ OFFSET_IOMMU_FLPT_BASE);
+
+ /** Enable IOMMU */
+ enable_iommu(info);
+
+ return 0;
+
+err_free_flptr:
+ dma_free_wc(dev, PAGE_ALIGN(FLPT_SIZE), info->flpt,
+ info->flpt_dma_addr);
+err_free:
+ kfree(info);
+
+ return err;
+}
+
+void trinity_hwmem_iommu_flush(struct device *dev)
+{
+ struct trinity_hwmem_iommu_info *info = NULL;
+ u32 val = 1;
+
+ spin_lock(&iommu_info_list_lock);
+ llist_for_each_entry (info, &iommu_info_list, node) {
+ if (info->dev == dev)
+ break;
+ }
+ spin_unlock(&iommu_info_list_lock);
+
+ if (info == NULL)
+ return;
+
+ spin_lock(&info->lock);
+ iowrite32(val, info->regbase + BASE_OFFSET_IOMMU_DLA +
+ OFFSET_IOMMU_ALL_INVALIDATION);
+ iowrite32(val, info->regbase + BASE_OFFSET_IOMMU_DSP +
+ OFFSET_IOMMU_ALL_INVALIDATION);
+ spin_unlock(&info->lock);
+}
+
+#ifdef CONFIG_TRINITY_FPGA_DEBUG
+static void iommu_dump_mapped(struct trinity_hwmem_iommu_info *info)
+{
+ void *slpt_base_vaddr;
+ u32 pte, ppn, slpt_base;
+ u32 i, j;
+
+ if (!info)
+ return;
+
+ dev_info(info->dev, "[IOMMU] PAGE TABLE DUMP");
+
+ for (i = 0; i < FLPT_NUM_PTES; i++) {
+ pte = ((u32 *)info->flpt)[i];
+
+ if (!(pte & MASK_PTE_MAPPED))
+ continue;
+
+ if (CHECK_BITS(pte, MASK_FLPTE_MAP_16M)) {
+ ppn = (pte >> LSHFT_FLPTE_PPN_16M);
+ dev_info(info->dev, "\t[%u] 16M PTE(0x%x) PADDR (0x%x)",
+ i, pte, ppn << RSHFT_FLPTE_PPN_16M);
+ } else if (CHECK_BITS(pte, MASK_FLPTE_MAP_2M)) {
+ ppn = (pte >> LSHFT_FLPTE_PPN_2M);
+ dev_info(info->dev, "\t[%u] 2M PTE(0x%x) PADDR (0x%x)",
+ i, pte, ppn << RSHFT_FLPTE_PPN_2M);
+ } else if (CHECK_BITS(pte, MASK_FLPTE_MAP_1M)) {
+ ppn = (pte >> LSHFT_FLPTE_PPN_1M);
+ dev_info(info->dev, "\t[%u] 1M PTE(0x%x) PADDR (0x%x)",
+ i, pte, ppn << RSHFT_FLPTE_PPN_1M);
+ } else {
+ slpt_base = flpte_to_slpt_base(pte);
+ slpt_base_vaddr =
+ dma_to_virt(info->dma_region_virt_base,
+ info->dma_region_dma_base,
+ slpt_base);
+
+ dev_info(info->dev,
+ "\t[%u] 4K/64K PTE(0x%x) SLPT_BASE (0x%x)", i,
+ pte, slpt_base);
+
+ for (j = 0; j < SLPT_NUM_PTES; j++) {
+ pte = ((u32 *)slpt_base_vaddr)[j];
+ if (!(pte & MASK_PTE_MAPPED))
+ continue;
+
+ if (CHECK_BITS(pte, MASK_SLPTE_MAP_64K)) {
+ ppn = (pte >> LSHFT_SLPTE_PPN_64K);
+ dev_info(
+ info->dev,
+ "\t\t[%u] 64K slpt pte (0x%x) paddr (0x%x)",
+ j, pte,
+ ppn << RSHFT_SLPTE_PPN_64K);
+ } else {
+ ppn = (pte >> LSHFT_SLPTE_PPN_4K);
+ dev_info(
+ info->dev,
+ "\t\t[%u] 4K slpt pte (0x%x) paddr (0x%x)",
+ j, pte,
+ ppn << RSHFT_SLPTE_PPN_4K);
+ }
+ }
+ }
+ }
+}
+
+static void iommu_dump_tlb(struct trinity_hwmem_iommu_info *info)
+{
+ u32 val, tag, ppn;
+ u32 line, lane;
+ if (!info)
+ return;
+
+ dev_info(info->dev, "[IOMMU] IOMMU TLB (PBUF/INPUT) DUMP");
+
+ for (line = 0; line < 8; line++) {
+ for (lane = 0; lane < 4; lane++) {
+ val = 0;
+ val |= MASK_TLB_READ_BUF; /* PBUF */
+ val |= ((lane << LSHFT_TLB_READ_LANE) &
+ MASK_TLB_READ_LANE);
+ val |= ((line << LSHFT_TLB_READ_LINE) &
+ MASK_TLB_READ_LINE);
+
+ iowrite32(val, info->regbase + BASE_OFFSET_IOMMU_DLA +
+ OFFSET_IOMMU_TLB_READ);
+
+ ppn = ioread32(info->regbase + BASE_OFFSET_IOMMU_DLA +
+ OFFSET_IOMMU_TLB_PPN);
+ tag = ioread32(info->regbase + BASE_OFFSET_IOMMU_DLA +
+ OFFSET_IOMMU_TLB_TAG);
+
+ dev_info(
+ info->dev,
+ "\t[%u][%u] PPN (0x%x) VPN(0x%x) PS(%x) LANE_MAPPED(%u) LINE_VALID(%u)",
+ line, lane,
+ (u32)((ppn & MASK_TLB_PPN_PPN) >>
+ RSHFT_TLB_PPN_PPN),
+ (u32)((tag & MASK_TLB_TAG_VPN) >>
+ RSHFT_TLB_TAG_VPN),
+ (u32)((tag & MASK_TLB_TAG_PS) >>
+ RSHFT_TLB_TAG_PS),
+ (u32)((tag & MASK_TLB_TAG_LANE_MAPPED) >>
+ RSHFT_TLB_TAG_LANE_MAPPED),
+ (u32)((tag & MASK_TLB_TAG_LINE_VALID) >>
+ RSHFT_TLB_TAG_LINE_VALID));
+ }
+ }
+
+ dev_info(info->dev, "[IOMMU] IOMMU TLB (PBUF/OUTPUT) DUMP");
+
+ for (line = 0; line < 8; line++) {
+ for (lane = 0; lane < 4; lane++) {
+ val = 0;
+ val |= MASK_TLB_READ_BUF; /* PBUF */
+ val |= MASK_TLB_READ_CH; /* Output */
+ val |= ((lane << LSHFT_TLB_READ_LANE) &
+ MASK_TLB_READ_LANE);
+ val |= ((line << LSHFT_TLB_READ_LINE) &
+ MASK_TLB_READ_LINE);
+
+ iowrite32(val, info->regbase + BASE_OFFSET_IOMMU_DLA +
+ OFFSET_IOMMU_TLB_READ);
+
+ ppn = ioread32(info->regbase + BASE_OFFSET_IOMMU_DLA +
+ OFFSET_IOMMU_TLB_PPN);
+ tag = ioread32(info->regbase + BASE_OFFSET_IOMMU_DLA +
+ OFFSET_IOMMU_TLB_TAG);
+
+ dev_info(
+ info->dev,
+ "\t[%u][%u] PPN (0x%x) VPN(0x%x) PS(%x) LANE_MAPPED(%u) LINE_VALID(%u)",
+ line, lane,
+ (u32)((ppn & MASK_TLB_PPN_PPN) >>
+ RSHFT_TLB_PPN_PPN),
+ (u32)((tag & MASK_TLB_TAG_VPN) >>
+ RSHFT_TLB_TAG_VPN),
+ (u32)((tag & MASK_TLB_TAG_PS) >>
+ RSHFT_TLB_TAG_PS),
+ (u32)((tag & MASK_TLB_TAG_LANE_MAPPED) >>
+ RSHFT_TLB_TAG_LANE_MAPPED),
+ (u32)((tag & MASK_TLB_TAG_LINE_VALID) >>
+ RSHFT_TLB_TAG_LINE_VALID));
+ }
+ }
+
+ dev_info(info->dev, "[IOMMU] IOMMU TLB (VBUF) DUMP");
+
+ for (line = 0; line < 8; line++) {
+ for (lane = 0; lane < 4; lane++) {
+ val = 0;
+ val |= ((lane << LSHFT_TLB_READ_LANE) &
+ MASK_TLB_READ_LANE);
+ val |= ((line << LSHFT_TLB_READ_LINE) &
+ MASK_TLB_READ_LINE);
+
+ iowrite32(val, info->regbase + BASE_OFFSET_IOMMU_DLA +
+ OFFSET_IOMMU_TLB_READ);
+
+ ppn = ioread32(info->regbase + BASE_OFFSET_IOMMU_DLA +
+ OFFSET_IOMMU_TLB_PPN);
+ tag = ioread32(info->regbase + BASE_OFFSET_IOMMU_DLA +
+ OFFSET_IOMMU_TLB_TAG);
+
+ dev_info(
+ info->dev,
+ "\t[%u][%u] PPN (0x%x) VPN(0x%x) PS(%x) LANE_MAPPED(%u) LINE_VALID(%u)",
+ line, lane,
+ (u32)((ppn & MASK_TLB_PPN_PPN) >>
+ RSHFT_TLB_PPN_PPN),
+ (u32)((tag & MASK_TLB_TAG_VPN) >>
+ RSHFT_TLB_TAG_VPN),
+ (u32)((tag & MASK_TLB_TAG_PS) >>
+ RSHFT_TLB_TAG_PS),
+ (u32)((tag & MASK_TLB_TAG_LANE_MAPPED) >>
+ RSHFT_TLB_TAG_LANE_MAPPED),
+ (u32)((tag & MASK_TLB_TAG_LINE_VALID) >>
+ RSHFT_TLB_TAG_LINE_VALID));
+ }
+ }
+}
+
+void trinity_hwmem_iommu_print_status(struct device *dev)
+{
+ struct trinity_hwmem_iommu_info *info = NULL;
+ u32 IFLT_STAT, OFLT_STAT;
+
+ spin_lock(&iommu_info_list_lock);
+ llist_for_each_entry (info, &iommu_info_list, node) {
+ if (info->dev == dev)
+ break;
+ }
+ spin_unlock(&iommu_info_list_lock);
+
+ if (!info) {
+ dev_err(dev, "Unable to find the iommu info");
+ return;
+ }
+
+ spin_lock(&info->lock);
+
+ IFLT_STAT = ioread32(info->regbase + BASE_OFFSET_IOMMU_DLA +
+ OFFSET_IOMMU_IFLT_STAT);
+ if (IFLT_STAT & FLPT_PAGE_FAULT)
+ dev_err(dev,
+ "[IOMMU] [IN] First-level page table fault detected at 0x%x",
+ ioread32(info->regbase + BASE_OFFSET_IOMMU_DLA +
+ OFFSET_IOMMU_IFLT_VA));
+ if (IFLT_STAT & SLPT_PAGE_FAULT)
+ dev_err(dev,
+ "[IOMMU] [IN] Second-level page table fault detected at 0x%x",
+ ioread32(info->regbase + BASE_OFFSET_IOMMU_DLA +
+ OFFSET_IOMMU_IFLT_VA));
+ if (IFLT_STAT & PTW_ACCESS_FAULT)
+ dev_err(dev,
+ "[IOMMU] [IN] Page Table Walker (PTW) access fault");
+ if (IFLT_STAT & ATU_ACCESS_FAULT)
+ dev_err(dev,
+ "[IOMMU] [IN] Address Translation Unit (PTU) access fault");
+
+ OFLT_STAT = ioread32(info->regbase + BASE_OFFSET_IOMMU_DLA +
+ OFFSET_IOMMU_OFLT_STAT);
+ if (OFLT_STAT & FLPT_PAGE_FAULT)
+ dev_err(dev,
+ "[IOMMU] [OUT] First-level page table fault detected at 0x%x",
+ ioread32(info->regbase + BASE_OFFSET_IOMMU_DLA +
+ OFFSET_IOMMU_OFLT_VA));
+ if (OFLT_STAT & SLPT_PAGE_FAULT)
+ dev_err(dev,
+ "[IOMMU] [OUT] Second-level page table fault detected at 0x%x",
+ ioread32(info->regbase + BASE_OFFSET_IOMMU_DLA +
+ OFFSET_IOMMU_OFLT_VA));
+ if (OFLT_STAT & PTW_ACCESS_FAULT)
+ dev_err(dev,
+ "[IOMMU] [OUT] Page Table Walker (PTW) access fault");
+ if (OFLT_STAT & ATU_ACCESS_FAULT)
+ dev_err(dev,
+ "[IOMMU] [OUT] Address Translation Unit (PTU) access fault");
+
+ iommu_dump_mapped(info);
+ iommu_dump_tlb(info);
+
+ spin_unlock(&info->lock);
+}
+#endif
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0-only
+/**
+ * Copyright (C) 2021 Samsung Electronics
+ * Copyright (C) 2021 Wook Song <wook16.song@samsung.com>
+ * Copyright (C) 2021 Dongju Chae <dongju.chae@samsung.com>
+ */
+
+#ifndef __DRIVERS_MISC_TRINITY_HWMEM_IOMMU_HELPER_H__
+#define __DRIVERS_MISC_TRINITY_HWMEM_IOMMU_HELPER_H__
+
+#include <linux/device.h>
+#include <linux/dma-mapping.h>
+#include <linux/list.h>
+#include <linux/spinlock.h>
+#include <linux/types.h>
+
+int32_t trinity_hwmem_iommu_init(struct device *dev, void __iomem *regbase);
+int32_t trinity_hwmem_iommu_map(struct device *dev, dma_addr_t addr,
+ size_t size);
+int32_t trinity_hwmem_iommu_unmap(struct device *dev, dma_addr_t addr,
+ size_t size);
+void trinity_hwmem_iommu_flush(struct device *dev);
+
+#ifdef CONFIG_TRINITY_FPGA_DEBUG
+void trinity_hwmem_iommu_print_status(struct device *dev);
+#endif
+
+#endif /* __DRIVERS_MISC_TRINITY_HWMEM_IOMMU_HELPER_H__ */
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0-only
+/**
+ * trinity/trinity_monitor.c: Device status monitor
+ *
+ * Copyright (C) 2021 Samsung Electronics
+ * Copyright (C) 2021 Dongju Chae <dongju.chae@samsung.com>
+ */
+
+#include <linux/bitmap.h>
+#include <linux/kthread.h>
+#include <linux/mutex.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/wait.h>
+#include <uapi/linux/sched/types.h>
+
+#include "trinity_common.h"
+#include "trinity_monitor.h"
+
+#define EVENT_POOL_SIZE (4096)
+#define EVENT_TOTAL_NUM (EVENT_POOL_SIZE / sizeof(struct trinity_monitor_event))
+
+struct trinity_monitor {
+ DECLARE_BITMAP(bitmap, EVENT_TOTAL_NUM);
+ spinlock_t lock;
+ void *pool;
+
+ struct task_struct *thread;
+ struct device *dev;
+ struct llist_head event_queue;
+ wait_queue_head_t wait_queue;
+};
+
+static struct trinity_monitor monitor;
+
+static void trinity_monitor_clear_event(struct trinity_monitor_event *event)
+{
+ if (!event)
+ return;
+
+ spin_lock(&monitor.lock);
+ clear_bit(event->slot, monitor.bitmap);
+ spin_unlock(&monitor.lock);
+}
+
+static void trinity_monitor_handle_event(struct trinity_monitor_event *event)
+{
+ if (!event)
+ return;
+
+ if (atomic_read(&event->marker) != 1) {
+ ktime_t elapsed_time;
+
+ /* check event timeout */
+ elapsed_time =
+ ktime_to_ms(ktime_sub(ktime_get(), event->start_time));
+ BUG_ON(elapsed_time < 0);
+
+ if (elapsed_time > event->timeout_ms) {
+ if (event->cb)
+ event->cb(event->cb_data);
+ trinity_monitor_clear_event(event);
+ return;
+ }
+ } else {
+ /* don't need to handle event callback */
+ trinity_monitor_clear_event(event);
+ return;
+ }
+
+ /* push back to the queue */
+ llist_add(&event->llist, &monitor.event_queue);
+}
+
+/* lock-less thread worker */
+static int trinity_monitor_worker(void *data)
+{
+ struct sched_param param = { .sched_priority = MAX_RT_PRIO - 1 };
+ struct trinity_monitor_event *event;
+ struct llist_node *first, *next;
+
+ sched_setscheduler(current, SCHED_RR, ¶m);
+repeat:
+ if (kthread_should_stop())
+ return 0;
+
+ /* check any incoming events */
+ first = llist_del_all(&monitor.event_queue);
+ for (; first; first = next) {
+ next = llist_next(first);
+ event = llist_entry(first, typeof(*event), llist);
+ trinity_monitor_handle_event(event);
+ }
+
+ /* check any invalid memory access from devices */
+ trinity_monitor_invalid_access();
+
+ /* wake up the worker thread per 100 ms */
+ wait_event_interruptible_timeout(monitor.wait_queue,
+ kthread_should_stop(), HZ / 10);
+ goto repeat;
+}
+
+struct trinity_monitor_event *trinity_monitor_get_event(void)
+{
+ struct trinity_monitor_event *event = NULL;
+ int slot;
+
+ spin_lock(&monitor.lock);
+ slot = find_first_zero_bit(monitor.bitmap, EVENT_TOTAL_NUM);
+ if (slot < EVENT_TOTAL_NUM) {
+ set_bit(slot, monitor.bitmap);
+ event = &((struct trinity_monitor_event *)monitor.pool)[slot];
+ }
+ spin_unlock(&monitor.lock);
+
+ if (event) {
+ memset(event, '\x00', sizeof(*event));
+ event->slot = slot;
+ }
+
+ return event;
+}
+
+int trinity_monitor_add_event(struct trinity_monitor_event *event)
+{
+ if (!event)
+ return -EINVAL;
+
+ llist_add(&event->llist, &monitor.event_queue);
+ return 0;
+}
+
+int trinity_monitor_init(struct device *dev)
+{
+ struct task_struct *thread;
+
+ if (!dev)
+ return -EINVAL;
+
+ monitor.dev = dev;
+ monitor.pool = devm_kzalloc(dev, PAGE_SIZE, GFP_KERNEL);
+ if (!monitor.pool)
+ return -ENOMEM;
+
+ bitmap_zero(monitor.bitmap, EVENT_TOTAL_NUM);
+ spin_lock_init(&monitor.lock);
+
+ init_llist_head(&monitor.event_queue);
+ init_waitqueue_head(&monitor.wait_queue);
+
+ thread = kthread_run(trinity_monitor_worker, NULL, "trinity_monitor");
+ if (IS_ERR(thread)) {
+ dev_err(dev, "Unable to create kthread");
+ return PTR_ERR(thread);
+ }
+ monitor.thread = thread;
+
+ return 0;
+}
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0-only
+/**
+ * trinity/trinity_monitor.h: Device status monitor
+ *
+ * Copyright (C) 2021 Samsung Electronics
+ * Copyright (C) 2021 Dongju Chae <dongju.chae@samsung.com>
+ */
+
+#ifndef __TRINITY_MONITOR_H__
+#define __TRINITY_MONITOR_H__
+
+#include <linux/device.h>
+#include <linux/errno.h>
+#include <linux/ktime.h>
+#include <linux/llist.h>
+#include <linux/types.h>
+
+typedef void (*trinity_monitor_cb)(void *data);
+
+struct trinity_monitor_event {
+ struct llist_node llist;
+ unsigned long timeout_ms;
+ trinity_monitor_cb cb;
+ void *cb_data;
+ ktime_t start_time;
+ atomic_t marker;
+ int slot;
+};
+
+#ifdef CONFIG_TRINITY_MONITOR
+struct trinity_monitor_event *trinity_monitor_get_event(void);
+int trinity_monitor_add_event(struct trinity_monitor_event *event);
+int trinity_monitor_init(struct device *dev);
+#else
+static inline struct trinity_monitor_event *trinity_monitor_get_event(void)
+{
+ return NULL;
+}
+static inline int trinity_monitor_add_event(struct trinity_monitor_event *event)
+{
+ return 0;
+}
+static inline int trinity_monitor_init(struct device *dev)
+{
+ return 0;
+}
+#endif
+
+#endif /* __TRINITY_MONITOR_H__ */
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0-only
+/**
+ * Reserved memory allocator for Trinity device drivers
+ *
+ * Copyright (C) 2021 Samsung Electronics
+ * Copyright (C) 2021 Dongju Chae <dongju.chae@samsung.com>
+ */
+
+#include "trinity_resv_mem.h"
+#include <linux/io.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+
+#define TRINITY_DRV_TO_RESV_POOL(drv) \
+ ((struct trinity_resv_mem_pool *)drv->resv_pool)
+#define PROG_POOL_SIZE (6 * 1024 * 1024) /* FIXME: 6MB */
+#define IS_INITIALIZED(pool) (atomic_read(&((pool)->initialized)) == 1)
+#define SET_INITIALIZED(pool) \
+ do { \
+ atomic_set(&((pool)->initialized), 1); \
+ } while (0);
+
+#define UNSET_INITIALIZED(pool) \
+ do { \
+ atomic_set(&((pool)->initialized), 0); \
+ } while (0);
+
+struct trinity_resv_mem_pool {
+ phys_addr_t paddr_base;
+ dma_addr_t daddr_base;
+ void *vaddr_base;
+
+ size_t total_size;
+ size_t total_used;
+
+ unsigned int num_bits;
+ unsigned long *bitmap;
+
+ spinlock_t lock;
+ atomic_t initialized;
+};
+
+/* Trinity devices share this reserved memory pool */
+static struct trinity_resv_mem_pool resv_pool_cont;
+static struct trinity_resv_mem_pool resv_pool_norm;
+
+static int init_resv_mem(phys_addr_t paddr, dma_addr_t daddr, size_t size,
+ struct trinity_resv_mem_pool *pool)
+{
+ unsigned int num_bits = size >> PAGE_SHIFT;
+ int bitmap_size = BITS_TO_LONGS(num_bits) * sizeof(long);
+ void *vaddr;
+
+ vaddr = ioremap_wc(paddr, size);
+ if (unlikely(!vaddr))
+ return -EINVAL;
+
+ pool->bitmap = kzalloc(bitmap_size, GFP_KERNEL);
+ if (unlikely(!pool->bitmap)) {
+ iounmap(vaddr);
+ return -ENOMEM;
+ }
+
+ pool->paddr_base = paddr;
+ pool->daddr_base = daddr;
+ pool->vaddr_base = vaddr;
+ pool->total_size = size;
+ pool->total_used = 0;
+ pool->num_bits = num_bits;
+
+ spin_lock_init(&pool->lock);
+ SET_INITIALIZED(pool);
+
+ return 0;
+}
+
+static void fini_resv_mem(struct trinity_resv_mem_pool *pool)
+{
+ if (!pool || unlikely(!IS_INITIALIZED(pool)))
+ return;
+
+ UNSET_INITIALIZED(pool);
+
+ iounmap(pool->vaddr_base);
+ kfree(pool->bitmap);
+ memset(pool, '\x00', sizeof(*pool));
+}
+
+int trinity_declare_resv_mem(phys_addr_t paddr, dma_addr_t daddr, size_t size)
+{
+ int ret;
+
+ /* skip if initialized before */
+ if (unlikely(IS_INITIALIZED(&resv_pool_cont) ||
+ IS_INITIALIZED(&resv_pool_norm)))
+ return 0;
+
+ ret = init_resv_mem(paddr, daddr, PROG_POOL_SIZE, &resv_pool_cont);
+ if (ret != 0)
+ return ret;
+
+ /* FIXME: reserve the first page (not used) */
+ set_bit(0, resv_pool_cont.bitmap);
+ resv_pool_cont.total_used = PAGE_SIZE;
+
+ ret = init_resv_mem(paddr + PROG_POOL_SIZE, daddr + PROG_POOL_SIZE,
+ size - PROG_POOL_SIZE, &resv_pool_norm);
+ if (ret != 0) {
+ fini_resv_mem(&resv_pool_cont);
+ return ret;
+ }
+
+ return 0;
+}
+
+void trinity_release_resv_mem()
+{
+ fini_resv_mem(&resv_pool_cont);
+ fini_resv_mem(&resv_pool_norm);
+}
+
+static int find_free_region(unsigned long *bitmap, unsigned long num_bits,
+ unsigned long nr)
+{
+ unsigned long index, start, end, i;
+
+ start = 0;
+retry:
+ index = find_next_zero_bit(bitmap, num_bits, start);
+ end = index + nr;
+ if (end > num_bits)
+ return -ERANGE;
+
+ i = find_next_bit(bitmap, end, index);
+ if (i < end) {
+ start = i + 1;
+ goto retry;
+ }
+ return index;
+}
+
+int trinity_alloc_from_resv_mem(const size_t size, struct trinity_resv_mem *mem,
+ bool is_cont)
+{
+ struct trinity_resv_mem_pool *pool;
+ dma_addr_t offset;
+ int pageno, err = 0;
+
+ pool = is_cont ? &resv_pool_cont : &resv_pool_norm;
+
+ if (unlikely(!IS_INITIALIZED(pool)))
+ return -EPERM;
+
+ if (unlikely(!IS_ALIGNED(size, PAGE_SIZE)))
+ return -EINVAL;
+
+ spin_lock(&pool->lock);
+
+ if (unlikely(size > pool->total_size)) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ pageno = find_free_region(pool->bitmap, pool->num_bits,
+ size >> PAGE_SHIFT);
+ if (unlikely(pageno < 0)) {
+ err = pageno;
+ goto out;
+ }
+ bitmap_set(pool->bitmap, pageno, size >> PAGE_SHIFT);
+ offset = (dma_addr_t)pageno << PAGE_SHIFT;
+
+ mem->daddr = pool->daddr_base + offset;
+ mem->vaddr = pool->vaddr_base + offset;
+ mem->size = size;
+
+ memset(mem->vaddr, '\x00', size);
+
+ pool->total_used += mem->size;
+out:
+ spin_unlock(&pool->lock);
+
+ return err;
+}
+
+void trinity_free_from_resv_mem(struct trinity_resv_mem *mem, bool is_cont)
+{
+ struct trinity_resv_mem_pool *pool;
+
+ pool = is_cont ? &resv_pool_cont : &resv_pool_norm;
+
+ if (unlikely(!IS_INITIALIZED(pool)))
+ return;
+
+ if (likely(mem->vaddr != NULL)) {
+ int page = (mem->vaddr - pool->vaddr_base) >> PAGE_SHIFT;
+ int len = mem->size >> PAGE_SHIFT;
+
+ spin_lock(&pool->lock);
+
+ bitmap_clear(pool->bitmap, page, len);
+ pool->total_used -= mem->size;
+
+ spin_unlock(&pool->lock);
+ }
+}
+
+int trinity_mmap_from_resv_mem(struct vm_area_struct *vma, void *vaddr,
+ size_t size, bool is_cont)
+{
+ struct trinity_resv_mem_pool *pool;
+
+ pool = is_cont ? &resv_pool_cont : &resv_pool_norm;
+
+ if (likely(IS_INITIALIZED(pool))) {
+ unsigned long off = vma->vm_pgoff;
+ unsigned long pfn_base = PFN_DOWN(pool->paddr_base);
+ int start = (vaddr - pool->vaddr_base) >> PAGE_SHIFT;
+ int user_count = vma_pages(vma);
+ int count = PAGE_ALIGN(size) >> PAGE_SHIFT;
+
+ if (off < count && user_count <= count - off) {
+ unsigned long pfn = pfn_base + start + off;
+ return remap_pfn_range(vma, vma->vm_start, pfn,
+ user_count << PAGE_SHIFT,
+ vma->vm_page_prot);
+ }
+ }
+
+ return -ENXIO;
+}
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0-only
+/**
+ * Reserved memory allocator for Trinity device drivers
+ *
+ * Copyright (C) 2021 Samsung Electronics
+ * Copyright (C) 2021 Dongju Chae <dongju.chae@samsung.com>
+ */
+
+#ifndef __DRIVERS_MISC_TRINITY_RESV_MEM_H__
+#define __DRIVERS_MISC_TRINITY_RESV_MEM_H__
+
+#include <linux/mm_types.h>
+#include <linux/types.h>
+
+struct trinity_resv_mem {
+ dma_addr_t daddr;
+ void *vaddr;
+ size_t size;
+ size_t orig_size;
+};
+
+int trinity_declare_resv_mem(phys_addr_t paddr, dma_addr_t daddr, size_t size);
+
+void trinity_release_resv_mem(void);
+
+int trinity_alloc_from_resv_mem(const size_t size, struct trinity_resv_mem *mem,
+ bool is_prog);
+void trinity_free_from_resv_mem(struct trinity_resv_mem *mem, bool is_prog);
+int trinity_mmap_from_resv_mem(struct vm_area_struct *vma, void *vaddr,
+ size_t size, bool is_prog);
+
+#endif /* __DRIVERS_MISC_TRINITY_RESV_MEM_H__ */
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * NPU req scheduler interface
+ *
+ * Copyright (C) 2021 Samsung Electronics
+ * Copyright (C) 2021 Dongju Chae <dongju.chae@samsung.com>
+ */
+
+#include <linux/spinlock.h>
+
+#include "trinity_common.h"
+#include "trinity_sched.h"
+
+#ifdef CONFIG_TRINITY_SCHED_SR
+extern int trinity_sched_init_sr(struct device *);
+extern void trinity_sched_exit_sr(void);
+#endif
+#ifdef CONFIG_TRINITY_SCHED_VD
+extern int trinity_sched_init_vd(struct device *);
+extern void trinity_sched_exit_vd(void);
+#endif
+
+static struct trinity_sched_desc *sched_table[SCHED_END];
+static DEFINE_SPINLOCK(sched_lock);
+
+void trinity_sched_register(enum trinity_sched_type type,
+ struct trinity_sched_desc *desc)
+{
+ if (type >= SCHED_END)
+ return;
+
+ spin_lock(&sched_lock);
+ if (!sched_table[type])
+ sched_table[type] = desc;
+ spin_unlock(&sched_lock);
+}
+EXPORT_SYMBOL(trinity_sched_register);
+
+void trinity_sched_unregister(enum trinity_sched_type type,
+ struct trinity_sched_desc *desc)
+{
+ if (type >= SCHED_END)
+ return;
+
+ spin_lock(&sched_lock);
+ if (sched_table[type] == desc)
+ sched_table[type] = NULL;
+ spin_unlock(&sched_lock);
+}
+EXPORT_SYMBOL(trinity_sched_unregister);
+
+struct trinity_sched_desc *trinity_sched_find(enum trinity_sched_type type)
+{
+ struct trinity_sched_desc *desc;
+ unsigned long flags;
+
+ if (type >= SCHED_END)
+ return NULL;
+
+ spin_lock_irqsave(&sched_lock, flags);
+ desc = sched_table[type];
+ spin_unlock_irqrestore(&sched_lock, flags);
+
+ return desc;
+}
+EXPORT_SYMBOL(trinity_sched_find);
+
+/**
+ * trinity_sched_run_req() - Schedules a req to the target from the req queue.
+ * @req_data: The data ptr to hold req information to be submitted.
+ *
+ * Return: 0 on success. Otherwise, returns negative error. Additional status of
+ * the submitted req could be passed by req->status.
+ */
+int32_t trinity_sched_run_req(void *req_data, void *sched_data)
+{
+ struct trinity_req *req = (struct trinity_req *)req_data;
+ struct trinity_driver *drv = req->drv;
+ int32_t err = 0;
+ int32_t ready;
+
+ /** setup is only allowed in ready state */
+ ready = drv->desc->get_state(drv);
+ if (ready != TRINITY_STATE_READY) {
+ dev_err(drv_to_dev_ptr(drv),
+ "Cannot setup NPU when it's in a non-ready state");
+ err = -EPERM;
+ goto out;
+ }
+
+ if (req->stat->status != TRINITY_REQ_STATUS_PENDING &&
+ req->stat->status != TRINITY_REQ_STATUS_FINISHED) {
+ dev_err(drv_to_dev_ptr(drv), "Invalid req status: %d",
+ req->stat->status);
+ err = -EINVAL;
+ goto out;
+ }
+
+ req->stat->status = TRINITY_REQ_STATUS_RUNNING;
+ err = drv->desc->invoke_req(drv, req, sched_data);
+out:
+ if (err != 0)
+ req->stat->status = TRINITY_REQ_STATUS_ERROR;
+
+ return err;
+}
+EXPORT_SYMBOL(trinity_sched_run_req);
+
+void trinity_sched_suspend()
+{
+ enum trinity_sched_type type;
+ struct trinity_sched_desc *desc;
+
+ for (type = SCHED_SR; type < SCHED_END; type++) {
+ desc = sched_table[type];
+ if (desc)
+ desc->suspend();
+ }
+}
+
+void trinity_sched_resume()
+{
+ enum trinity_sched_type type;
+ struct trinity_sched_desc *desc;
+
+ for (type = SCHED_SR; type < SCHED_END; type++) {
+ desc = sched_table[type];
+ if (desc)
+ desc->resume();
+ }
+}
+
+int32_t trinity_sched_init(struct device *dev)
+{
+#ifdef CONFIG_TRINITY_SCHED_SR
+ if (trinity_sched_init_sr(dev) < 0)
+ dev_warn(dev, "Unable to initialize SR task scheduler");
+#endif
+#ifdef CONFIG_TRINITY_SCHED_VD
+ if (trinity_sched_init_vd(dev) < 0)
+ dev_warn(dev, "Unable to initialize VD task scheduler");
+#endif
+ return 0;
+}
+
+void trinity_sched_exit()
+{
+#ifdef CONFIG_TRINITY_SCHED_SR
+ trinity_sched_exit_sr();
+#endif
+#ifdef CONFIG_TRINITY_SCHED_VD
+ trinity_sched_exit_vd();
+#endif
+}
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0-only
+/**
+ * trinity/trinity_sched.h: Scheduler I/F header for trinity devices
+ *
+ * Copyright (C) 2021 Samsung Electronics
+ * Copyright (C) 2021 Dongju Chae <dongju.chae@samsung.com>
+ */
+
+#ifndef __TRINITY_SCHED_H__
+#define __TRINITY_SCHED_H__
+
+#include <linux/device.h>
+#include <linux/types.h>
+
+enum trinity_sched_type { SCHED_SR = 0, SCHED_VD, SCHED_END };
+typedef void (*remove_req_cb)(void *data, void *req);
+
+/**
+ * struct trinity_sched_desc - a structure for scheduler description
+ */
+struct trinity_sched_desc {
+ bool (*ready)(void);
+ int32_t (*submit)(void *data);
+ bool (*cancel)(void *data);
+ void (*suspend)(void);
+ void (*resume)(void);
+ void (*notify)(void *data, bool error);
+
+ struct trinity_req *(*find_req)(uint32_t dev_id, int req_id);
+ void (*remove_reqs)(void *data, remove_req_cb cb);
+ void (*test_run)(void *data, int req_id);
+};
+
+struct trinity_sched_desc *trinity_sched_find(enum trinity_sched_type type);
+void trinity_sched_register(enum trinity_sched_type type,
+ struct trinity_sched_desc *desc);
+void trinity_sched_unregister(enum trinity_sched_type type,
+ struct trinity_sched_desc *desc);
+int32_t trinity_sched_run_req(void *req_data, void *sched_data);
+void trinity_sched_suspend(void);
+void trinity_sched_resume(void);
+int32_t trinity_sched_init(struct device *dev);
+void trinity_sched_exit(void);
+#endif
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * SR's NPU req scheduler for Trinity device family
+ *
+ * Copyright (C) 2021 Samsung Electronics
+ * Copyright (C) 2021 Dongju Chae <dongju.chae@samsung.com>
+ */
+
+#include <linux/fs.h>
+#include <linux/kernel.h>
+#include <linux/kthread.h>
+#include <linux/miscdevice.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+
+#include "trinity_common.h"
+#include "trinity_sched.h"
+
+#define get_dev_ptr() (g_sched_priv.dev)
+
+struct trinity_sched_priv {
+ struct device *dev;
+ struct llist_head req_queue;
+ wait_queue_head_t wait_queue;
+ struct task_struct *sched_thread;
+ struct mutex lock;
+ unsigned long suspended;
+};
+
+static struct trinity_sched_priv g_sched_priv;
+
+/**
+ * @brief calculate priority using timeout
+ */
+static unsigned long trinity_sched_calc_priority(struct trinity_req *req)
+{
+ ktime_t elapsed_time;
+ int64_t priority;
+
+ if (req->input.config.timeout_ms == 0)
+ return 0; /** @todo need preemption */
+
+ elapsed_time = ktime_to_ms(ktime_sub(ktime_get(), req->time_started));
+ BUG_ON(elapsed_time < 0);
+
+ /**
+ * if the elapsed time exceeds the timeout of req,
+ * its priority value is set to the minimum (highest).
+ */
+ priority = req->input.config.timeout_ms - elapsed_time;
+ if (priority < 0)
+ priority = 0;
+
+ return priority;
+}
+
+/**
+ * @brief pick the top-priority req from req queue
+ */
+static struct trinity_req *trinity_sched_pick_req(struct llist_head *queue)
+{
+ struct trinity_req *req, *req_prev;
+ struct trinity_req *top_req, *top_req_prev;
+ int64_t top_priority = S64_MAX;
+ unsigned long priority;
+
+ if (llist_empty(queue))
+ return NULL;
+
+ req = req_prev = NULL;
+ top_req = top_req_prev = NULL;
+
+ /**
+ * llist is not a double linked list, and sorting is not easy
+ * because llist provides only limited APIs.
+ * it could be better than sorting if there are a few pending reqs.
+ * Note that each user application can submit only one req at once.
+ */
+ llist_for_each_entry (req, queue->first, llist) {
+ priority = trinity_sched_calc_priority(req);
+ if (top_priority > priority) {
+ top_priority = priority;
+ top_req = req;
+ top_req_prev = req_prev;
+ }
+
+ req_prev = req;
+ }
+
+ if (top_req_prev) {
+ BUG_ON(!top_req);
+ top_req_prev->llist.next = top_req->llist.next;
+ } else {
+ /** it's first entry */
+ top_req = llist_entry(llist_del_first(queue), typeof(*(req)),
+ llist);
+ }
+
+ return top_req;
+}
+
+static struct llist_node *llist_last(struct llist_node *first)
+{
+ struct llist_node *last = first;
+
+ while (first && first->next) {
+ last = first->next;
+ first = last;
+ }
+
+ return last;
+}
+
+static int trinity_sched_thread_func(void *data)
+{
+ const unsigned long MAX_RETRY_COUNT = 100; /** around 100 ms */
+
+ struct llist_head local_queue;
+ struct llist_node *new_first;
+
+ init_llist_head(&local_queue);
+repeat:
+ if (kthread_should_stop())
+ return 0;
+
+ /** extract reqs from global queue without locking */
+ new_first = llist_del_all(&g_sched_priv.req_queue);
+ /** new and pending reqs could be located together */
+ if (new_first) {
+ struct llist_node *new_last = llist_last(new_first);
+ llist_add_batch(new_first, new_last, &local_queue);
+ }
+
+ /** flush reqs in the queue */
+ while (!llist_empty(&local_queue)) {
+ struct trinity_req *req;
+ int32_t ret;
+
+ /**
+ * pick the top-priority req from the queue.
+ * first and last node pointers are updated
+ */
+ req = trinity_sched_pick_req(&local_queue);
+ if (!req)
+ goto repeat;
+
+ mutex_lock(&g_sched_priv.lock);
+ ret = trinity_sched_run_req(req, NULL);
+ if (ret == 0)
+ req->scheduled = true;
+ mutex_unlock(&g_sched_priv.lock);
+
+ if (ret == -EBUSY) {
+ if (req->submit_retry >= MAX_RETRY_COUNT) {
+ /** give up to handling this req*/
+ complete_all(&req->complete);
+ } else {
+ req->submit_retry++;
+ /** push again and restart the loop */
+ llist_add(&req->llist, &local_queue);
+ }
+ goto repeat;
+ } else if (ret != 0) {
+ /** let's notify this unknown error */
+ complete_all(&req->complete);
+ }
+ }
+
+ /** ensure the local queue is empty */
+ BUG_ON(!llist_empty(&local_queue));
+
+ wait_event_interruptible(
+ g_sched_priv.wait_queue,
+ kthread_should_stop() ||
+ !llist_empty(&(g_sched_priv.req_queue)));
+ goto repeat;
+}
+
+static bool sr_sched_ready(void)
+{
+ return (test_bit(1, &g_sched_priv.suspended) != 1);
+}
+
+static int32_t sr_sched_submit(void *data)
+{
+ struct trinity_req *req = data;
+
+ if (!req)
+ return -EINVAL;
+
+ if (!sr_sched_ready())
+ return -EAGAIN;
+
+ llist_add(&req->llist, &g_sched_priv.req_queue);
+ wake_up(&g_sched_priv.wait_queue);
+
+ return 0;
+}
+
+static void sr_sched_notify(void *data, bool error)
+{
+ struct trinity_req *req = data;
+
+ req->scheduled = false;
+}
+
+static void sr_sched_suspend(void)
+{
+ if (!test_and_set_bit(1, &g_sched_priv.suspended))
+ mutex_lock(&g_sched_priv.lock);
+}
+
+static void sr_sched_resume(void)
+{
+ if (test_and_clear_bit(1, &g_sched_priv.suspended))
+ mutex_unlock(&g_sched_priv.lock);
+}
+
+static struct trinity_sched_desc trinity_sched_sr = {
+ .ready = sr_sched_ready,
+ .submit = sr_sched_submit,
+ .notify = sr_sched_notify,
+ .suspend = sr_sched_suspend,
+ .resume = sr_sched_resume,
+};
+
+static int trinity_sched_open(struct inode *inodep, struct file *filp)
+{
+ return 0;
+}
+
+static int trinity_sched_release(struct inode *inodep, struct file *filp)
+{
+ return 0;
+}
+
+static const struct file_operations trinity_sched_fops = {
+ .owner = THIS_MODULE,
+ .open = trinity_sched_open,
+ .release = trinity_sched_release,
+ .llseek = no_llseek,
+};
+
+static struct miscdevice trinity_sched_device = {
+ .minor = MISC_DYNAMIC_MINOR,
+ .name = "trinity_sched",
+ .fops = &trinity_sched_fops,
+};
+
+static int trinity_sched_init_priv(void)
+{
+ g_sched_priv.dev = trinity_sched_device.this_device;
+
+ init_llist_head(&g_sched_priv.req_queue);
+ init_waitqueue_head(&g_sched_priv.wait_queue);
+
+ g_sched_priv.sched_thread = kthread_run(trinity_sched_thread_func, NULL,
+ "trinity_sched_thread");
+ if (IS_ERR(g_sched_priv.sched_thread)) {
+ dev_err(get_dev_ptr(),
+ "Failed to create a thread for scheduling reqs");
+ misc_deregister(&trinity_sched_device);
+ return PTR_ERR(g_sched_priv.sched_thread);
+ }
+
+ mutex_init(&g_sched_priv.lock);
+ clear_bit(1, &g_sched_priv.suspended);
+
+ return 0;
+}
+
+int trinity_sched_init_sr(struct device *dev)
+{
+ int err;
+
+ err = misc_register(&trinity_sched_device);
+ if (err) {
+ dev_err(dev,
+ "Failed to register a misc device for scheduler\n");
+ return err;
+ }
+
+ trinity_sched_register(SCHED_SR, &trinity_sched_sr);
+ return trinity_sched_init_priv();
+}
+
+void trinity_sched_exit_sr(void)
+{
+ trinity_sched_unregister(SCHED_SR, &trinity_sched_sr);
+ misc_deregister(&trinity_sched_device);
+}
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Providing statistics for Samsung Research Trinity device family support
+ *
+ * Copyright (C) 2021 Samsung Electronics
+ * Copyright (C) 2021 Dongju Chae <dongju.chae@samsung.com>
+ */
+
+#include "trinity_stat.h"
+#include "trinity_common.h"
+#include "trinity_resv_mem.h"
+
+#include <linux/bitmap.h>
+#include <linux/list_bl.h>
+
+/* maximum number of stats configurable from sysfs */
+#define TRINITY_STAT_MAX_APPS (128UL)
+#define TRINITY_STAT_MAX_REQS (4096UL)
+#define TRINITY_STAT_MAX_REQS_PER_APP (128UL)
+
+/* default number of stats */
+#define TRINITY_STAT_DEF_APPS (32UL)
+#define TRINITY_STAT_DEF_REQS (128UL)
+#define TRINITY_STAT_DEF_REQS_PER_APP (32UL)
+
+/* per-device stat pool (drv->stat.pdata)*/
+struct trinity_stat_pool {
+ DECLARE_BITMAP(bitmap_app, TRINITY_STAT_MAX_APPS);
+ DECLARE_BITMAP(bitmap_req, TRINITY_STAT_MAX_REQS);
+
+ struct trinity_resv_mem mem_app;
+ struct trinity_resv_mem mem_req;
+
+ unsigned long max_stat_apps;
+ unsigned long max_stat_reqs;
+ unsigned long max_stat_reqs_per_app;
+
+ unsigned long cur_stat_apps;
+ unsigned long cur_stat_reqs;
+
+ struct trinity_driver *drv;
+};
+
+int trinity_stat_pool_init(struct trinity_driver *drv)
+{
+ struct device *dev = drv_to_dev_ptr(drv);
+ struct trinity_stat_pool *pool;
+
+ /* initialize stat pool */
+ pool = kzalloc(sizeof(*pool), GFP_KERNEL);
+ if (!pool) {
+ dev_err(dev, "Unable to allocate a stat pool for requests");
+ return -ENOMEM;
+ }
+ pool->drv = drv;
+
+ drv->stat.pdata = pool;
+
+ return 0;
+}
+
+void trinity_stat_pool_fini(struct trinity_driver *drv)
+{
+ struct trinity_stat_pool *pool = drv->stat.pdata;
+
+ if (!pool)
+ return;
+
+ trinity_free_from_resv_mem(&pool->mem_app, false);
+ trinity_free_from_resv_mem(&pool->mem_req, false);
+ kfree(pool);
+
+ drv->stat.pdata = NULL;
+}
+
+static void trinity_stat_pool_resize_apps(struct trinity_stat_pool *pool,
+ unsigned long num_apps)
+{
+ struct device *dev = drv_to_dev_ptr(pool->drv);
+ struct trinity_resv_mem mem;
+ unsigned long size;
+
+ if (num_apps > TRINITY_STAT_MAX_APPS) {
+ dev_err(dev, "The maximum number of stat apps: %lu",
+ TRINITY_STAT_MAX_APPS);
+ return;
+ }
+
+ size = PAGE_ALIGN(sizeof(struct trinity_stat_app) * num_apps);
+ if (trinity_alloc_from_resv_mem(size, &mem, false) == 0) {
+ trinity_free_from_resv_mem(&pool->mem_app, false);
+
+ bitmap_fill(pool->bitmap_app, TRINITY_STAT_MAX_APPS);
+ bitmap_zero(pool->bitmap_app, num_apps);
+
+ pool->max_stat_apps = num_apps;
+ pool->mem_app = mem;
+ } else {
+ dev_warn(dev, "Unable to allocate stats for apps");
+ }
+}
+
+static void trinity_stat_pool_resize_reqs(struct trinity_stat_pool *pool,
+ unsigned long num_reqs)
+{
+ struct device *dev = drv_to_dev_ptr(pool->drv);
+ struct trinity_resv_mem mem;
+ unsigned long size;
+
+ if (num_reqs > TRINITY_STAT_MAX_REQS) {
+ dev_err(dev, "The maximum number of stat reqs: %lu",
+ TRINITY_STAT_MAX_REQS);
+ return;
+ }
+
+ size = PAGE_ALIGN(sizeof(struct trinity_stat_req) * num_reqs);
+ if (trinity_alloc_from_resv_mem(size, &mem, false) == 0) {
+ trinity_free_from_resv_mem(&pool->mem_req, false);
+
+ bitmap_fill(pool->bitmap_req, TRINITY_STAT_MAX_REQS);
+ bitmap_zero(pool->bitmap_req, num_reqs);
+
+ pool->max_stat_reqs = num_reqs;
+ pool->mem_req = mem;
+ } else {
+ dev_warn(dev, "Unable to allocate stats for reqs");
+ }
+}
+
+static struct trinity_stat_app *
+trinity_stat_pool_get_app(struct trinity_driver *drv)
+{
+ struct trinity_stat *stat = &drv->stat;
+ struct trinity_stat_pool *pool = stat->pdata;
+ struct trinity_stat_app *app = NULL;
+ unsigned long slot;
+ bool retried = false;
+
+ /* ensured that the lock is acquired */
+retry:
+ slot = find_first_zero_bit(pool->bitmap_app, TRINITY_STAT_MAX_APPS);
+ if (slot < TRINITY_STAT_MAX_APPS) {
+ app = &((struct trinity_stat_app *)pool->mem_app.vaddr)[slot];
+ memset(app, '\x00', sizeof(*app));
+ set_bit(slot, pool->bitmap_app);
+ app->slot = slot;
+ } else if (!retried) {
+ /* retry after destroy old stats */
+ retried = true;
+ trinity_destroy_stats(stat, true);
+ goto retry;
+ } else {
+ dev_warn(drv_to_dev_ptr(pool->drv),
+ "Please increase stat pool limit for apps");
+ }
+
+ return app;
+}
+
+static void trinity_stat_pool_put_app(struct trinity_driver *drv,
+ struct trinity_stat_app *app)
+{
+ struct trinity_stat_pool *pool = drv->stat.pdata;
+
+ /* ensured that the lock is acquired */
+ clear_bit(app->slot, pool->bitmap_app);
+}
+
+static struct trinity_stat_req *
+trinity_stat_pool_get_req(struct trinity_driver *drv)
+{
+ struct trinity_stat *stat = &drv->stat;
+ struct trinity_stat_pool *pool = stat->pdata;
+ struct trinity_stat_req *req = NULL;
+ unsigned long slot;
+ bool retried = false;
+
+ /* ensured that the lock is acquired */
+retry:
+ slot = find_first_zero_bit(pool->bitmap_req, TRINITY_STAT_MAX_REQS);
+ if (slot < TRINITY_STAT_MAX_REQS) {
+ req = &((struct trinity_stat_req *)pool->mem_req.vaddr)[slot];
+ memset(req, '\x00', sizeof(*req));
+ set_bit(slot, pool->bitmap_req);
+ req->slot = slot;
+ } else if (!retried) {
+ /* retry after destroy old stats */
+ retried = true;
+ trinity_destroy_stats(stat, true);
+ goto retry;
+ } else {
+ dev_warn(drv_to_dev_ptr(pool->drv),
+ "Please increase stat pool limit for reqs");
+ }
+
+ return req;
+}
+
+static void trinity_stat_pool_put_req(struct trinity_driver *drv,
+ struct trinity_stat_req *req)
+{
+ struct trinity_stat_pool *pool = drv->stat.pdata;
+
+ /* ensured that the lock is acquired */
+ clear_bit(req->slot, pool->bitmap_req);
+}
+
+void trinity_stat_init(struct trinity_driver *drv)
+{
+ unsigned long i;
+
+ spin_lock_init(&drv->stat.lock);
+
+ INIT_LIST_HEAD(&drv->stat.list);
+ for (i = 0; i < TRINITY_STAT_HASH_SIZE; ++i)
+ INIT_HLIST_BL_HEAD(&drv->stat.hlist[i]);
+
+ trinity_stat_pool_init(drv);
+ /* initialize to default values */
+ trinity_stat_resize(drv, TRINITY_STAT_DEF_APPS, TRINITY_STAT_DEF_REQS,
+ TRINITY_STAT_DEF_REQS_PER_APP);
+}
+
+void trinity_stat_fini(struct trinity_driver *drv)
+{
+ trinity_stat_resize(drv, 0, 0, 0);
+ trinity_stat_pool_fini(drv);
+}
+
+void trinity_stat_resize(struct trinity_driver *drv, unsigned long num_apps,
+ unsigned long num_reqs, unsigned long num_reqs_per_app)
+{
+ struct trinity_stat *stat = &drv->stat;
+ struct trinity_stat_pool *pool = stat->pdata;
+ unsigned long i;
+
+ if (!pool)
+ return;
+
+ trinity_stat_lock(&drv->stat);
+
+ for (i = 0; i < TRINITY_STAT_HASH_SIZE; i++) {
+ struct trinity_stat_app *stat_app;
+ struct hlist_bl_node *hn;
+
+ hlist_bl_lock(&(stat->hlist[i]));
+ hlist_bl_for_each_entry (stat_app, hn, &(stat->hlist[i]),
+ hnode) {
+ if (stat_app->status != TRINITY_APP_STATUS_TERMINATED) {
+ dev_warn(drv_to_dev_ptr(drv),
+ "Still busy apps detected.. waiting");
+ hlist_bl_unlock(&(stat->hlist[i]));
+ goto unlock;
+ }
+ }
+ hlist_bl_unlock(&(stat->hlist[i]));
+ }
+
+ trinity_destroy_stats(stat, true);
+
+ /* re-allocate each stat buffer */
+ if (num_apps > 0)
+ trinity_stat_pool_resize_apps(pool, num_apps);
+
+ if (num_reqs > 0)
+ trinity_stat_pool_resize_reqs(pool, num_reqs);
+
+ if (num_reqs_per_app > 0)
+ pool->max_stat_reqs_per_app = num_reqs_per_app;
+
+unlock:
+ trinity_stat_unlock(&drv->stat);
+}
+
+unsigned long trinity_stat_get_max_apps(struct trinity_driver *drv)
+{
+ struct trinity_stat_pool *pool = drv->stat.pdata;
+ unsigned long num;
+
+ if (!pool)
+ return 0;
+
+ trinity_stat_lock(&drv->stat);
+ num = pool->max_stat_apps;
+ trinity_stat_unlock(&drv->stat);
+
+ return num;
+}
+
+unsigned long trinity_stat_get_max_reqs(struct trinity_driver *drv)
+{
+ struct trinity_stat_pool *pool = drv->stat.pdata;
+ unsigned long num;
+
+ if (!pool)
+ return 0;
+
+ trinity_stat_lock(&drv->stat);
+ num = pool->max_stat_reqs;
+ trinity_stat_unlock(&drv->stat);
+
+ return num;
+}
+
+unsigned long trinity_stat_get_max_reqs_per_app(struct trinity_driver *drv)
+{
+ struct trinity_stat_pool *pool = drv->stat.pdata;
+ unsigned long num;
+
+ if (!pool)
+ return 0;
+
+ trinity_stat_lock(&drv->stat);
+ num = pool->max_stat_reqs_per_app;
+ trinity_stat_unlock(&drv->stat);
+
+ return num;
+}
+
+void trinity_stat_lock(struct trinity_stat *stat)
+{
+ if (stat)
+ spin_lock(&stat->lock);
+}
+
+void trinity_stat_unlock(struct trinity_stat *stat)
+{
+ if (stat)
+ spin_unlock(&stat->lock);
+}
+
+/**
+ * trinity_create_stat - create a stat structure for the opened app
+ *
+ * @drv: An instance of the trinity driver.
+ *
+ * Returns 0 on success. Ohterwise, returns negative error.
+ */
+static int trinity_create_stat_app(struct trinity_driver *drv)
+{
+ struct trinity_stat *stat = &drv->stat;
+ struct trinity_stat_pool *pool = stat->pdata;
+ struct trinity_stat_app *stat_app;
+ unsigned long key;
+
+ trinity_stat_lock(stat);
+ stat_app = trinity_stat_pool_get_app(drv);
+ if (IS_ERR_OR_NULL(stat_app)) {
+ trinity_stat_unlock(stat);
+ dev_err(drv_to_dev_ptr(drv),
+ "Unable to allocate stat of request");
+ return -ENOMEM;
+ }
+
+ stat_app->parent = stat;
+ stat_app->app_id = trinity_get_app_id();
+ stat_app->total_alloc_mem = 0;
+ stat_app->total_freed_mem = 0;
+ stat_app->num_total_reqs = 0;
+ stat_app->num_kept_reqs = 0;
+ stat_app->num_active_reqs = 0;
+ stat_app->status = TRINITY_APP_STATUS_STARTED;
+
+ strncpy(stat_app->name, current->comm, TASK_COMM_LEN);
+ stat_app->name[TASK_COMM_LEN - 1] = '\x00';
+
+ INIT_HLIST_BL_NODE(&stat_app->hnode);
+ INIT_LIST_HEAD(&stat_app->reqs);
+
+ /* hash table for fast lookup */
+ key = hash_long(stat_app->app_id, TRINITY_STAT_HASH_BITS);
+
+ hlist_bl_lock(&(stat->hlist[key]));
+ hlist_bl_add_head(&stat_app->hnode, &(stat->hlist[key]));
+ hlist_bl_unlock(&(stat->hlist[key]));
+
+ /* list for ordered management */
+ list_add_tail(&stat_app->lnode, &stat->list);
+ pool->cur_stat_apps++;
+
+ /* Remove terminated stats if the number reaches the maximum */
+ trinity_destroy_stats(stat, false);
+
+ trinity_stat_unlock(stat);
+
+ return 0;
+}
+
+static void trinity_destroy_stat_req(struct trinity_stat_req *stat_req)
+{
+ struct trinity_stat_app *stat_app = stat_req->parent;
+ struct trinity_stat *stat = stat_app->parent;
+ struct trinity_driver *drv =
+ container_of(stat, struct trinity_driver, stat);
+
+ if (stat_req->profile)
+ drv->desc->destroy_profile(drv, stat_req->profile);
+ list_del(&stat_req->list);
+ trinity_stat_pool_put_req(drv, stat_req);
+}
+
+static void trinity_destroy_stat_reqs(struct trinity_stat_app *stat_app)
+{
+ struct trinity_stat_req *stat_req, *tmp;
+
+ list_for_each_entry_safe (stat_req, tmp, &stat_app->reqs, list)
+ trinity_destroy_stat_req(stat_req);
+}
+
+/**
+ * trinity_destroy_stats - destroy terminated stat structures
+ *
+ * @drv: An instance of the trinity driver.
+ */
+void trinity_destroy_stats(struct trinity_stat *stat, bool force)
+{
+ struct trinity_driver *drv =
+ container_of(stat, struct trinity_driver, stat);
+ struct trinity_stat_pool *pool = stat->pdata;
+ struct trinity_stat_app *stat_app;
+ struct hlist_bl_node *hn, *tmp;
+ int i;
+
+ /* lock should be acquired before */
+ if (!force && pool->cur_stat_apps <= pool->max_stat_apps)
+ return;
+
+ for (i = 0; i < TRINITY_STAT_HASH_SIZE; i++) {
+ hlist_bl_lock(&stat->hlist[i]);
+ hlist_bl_for_each_entry_safe (stat_app, hn, tmp,
+ &(stat->hlist[i]), hnode) {
+ enum trinity_app_status status = stat_app->status;
+
+ if (status == TRINITY_APP_STATUS_TERMINATED) {
+ hlist_bl_del(&stat_app->hnode);
+ list_del(&stat_app->lnode);
+
+ pool->cur_stat_apps--;
+
+ trinity_destroy_stat_reqs(stat_app);
+ trinity_stat_pool_put_app(drv, stat_app);
+ }
+ }
+ hlist_bl_unlock(&stat->hlist[i]);
+ }
+}
+
+static struct trinity_stat_app *
+trinity_get_stat_by_id(struct trinity_driver *drv, int32_t app_id)
+{
+ struct trinity_stat *stat = &drv->stat;
+ struct trinity_stat_app *stat_app;
+ struct hlist_bl_node *hn;
+ unsigned long key;
+
+ key = hash_long(app_id, TRINITY_STAT_HASH_BITS);
+
+ hlist_bl_lock(&stat->hlist[key]);
+ hlist_bl_for_each_entry (stat_app, hn, &stat->hlist[key], hnode) {
+ if (stat_app->app_id == app_id)
+ goto out;
+ }
+ stat_app = NULL;
+out:
+ hlist_bl_unlock(&stat->hlist[key]);
+
+ return stat_app;
+}
+
+/**
+ * trinity_get_stat - get a stat structure for the target app
+ *
+ * @drv: An instance of the trinity driver.
+ *
+ * Returns 0 on success. Ohterwise, returns negative error.
+ *
+ * If the stat is not allocated yet, try to create and return it.
+ */
+struct trinity_stat_app *trinity_get_stat_app(struct trinity_driver *drv)
+{
+ struct trinity_stat *stat = &drv->stat;
+ struct trinity_stat_app *stat_app;
+ int app_id = trinity_get_app_id();
+
+retry:
+ trinity_stat_lock(stat);
+ stat_app = trinity_get_stat_by_id(drv, app_id);
+ trinity_stat_unlock(stat);
+
+ if (!IS_ERR_OR_NULL(stat_app))
+ return stat_app;
+
+ if (trinity_create_stat_app(drv) != 0)
+ return NULL;
+
+ goto retry;
+}
+
+void trinity_stat_app_set_status(struct trinity_driver *drv,
+ enum trinity_app_status status)
+{
+ struct trinity_stat *stat = &drv->stat;
+ struct trinity_stat_app *stat_app;
+ int app_id = trinity_get_app_id();
+
+ trinity_stat_lock(stat);
+ stat_app = trinity_get_stat_by_id(drv, app_id);
+ trinity_stat_unlock(stat);
+
+ if (IS_ERR_OR_NULL(stat_app))
+ return;
+
+ stat_app->status = status;
+}
+
+int trinity_stat_append_req(struct trinity_driver *drv, struct trinity_req *req)
+{
+ struct trinity_stat *stat = &drv->stat;
+ struct trinity_stat_pool *pool = stat->pdata;
+ struct trinity_stat_app *stat_app;
+ struct trinity_stat_req *stat_req;
+
+ stat_app = trinity_get_stat_app(drv);
+ if (IS_ERR_OR_NULL(stat_app))
+ return -ENOMEM;
+
+ trinity_stat_lock(stat);
+ stat_req = trinity_stat_pool_get_req(drv);
+ if (!stat_req) {
+ trinity_stat_unlock(stat);
+ dev_err(drv_to_dev_ptr(drv),
+ "Unable to allocate stat of request");
+ return -ENOMEM;
+ }
+
+ stat_req->parent = stat_app;
+ stat_req->app_id = stat_app->app_id;
+ stat_req->req_id = req->input.config.req_id;
+ stat_req->model_id = req->input.config.model_id;
+ stat_req->submitted = ktime_get();
+ stat_req->status = TRINITY_REQ_STATUS_PENDING;
+ stat_req->priority =
+ (enum trinity_req_priority)req->input.config.priority;
+ stat_req->is_kernel = req->is_kernel;
+
+ req->stat = stat_req;
+
+ list_add_tail(&stat_req->list, &stat_app->reqs);
+
+ /* don't count kernel requests */
+ if (!req->is_kernel) {
+ if (stat_app->num_kept_reqs == pool->max_stat_reqs_per_app) {
+ struct trinity_stat_req *old_stat;
+
+ old_stat = list_first_entry(
+ &stat_app->reqs, struct trinity_stat_req, list);
+ /* skip any kernel or unfinished request */
+ while (old_stat->is_kernel ||
+ (old_stat->status !=
+ TRINITY_REQ_STATUS_FINISHED &&
+ old_stat->status != TRINITY_REQ_STATUS_ERROR))
+ old_stat = list_next_entry(old_stat, list);
+
+ BUG_ON(old_stat == NULL);
+
+ trinity_destroy_stat_req(old_stat);
+ stat_app->num_total_reqs--;
+ } else {
+ /* total number of user requests kepted */
+ stat_app->num_kept_reqs++;
+ }
+ }
+
+ /* total number of all requests (including finished ones) */
+ stat_app->num_total_reqs++;
+ /* total number of active requests (running or pending) */
+ stat_app->num_active_reqs++;
+
+ trinity_stat_unlock(stat);
+ return 0;
+}
+
+void trinity_stat_remove_req(struct trinity_driver *drv,
+ struct trinity_req *req, bool rollback)
+{
+ struct trinity_stat *stat = &drv->stat;
+ struct trinity_stat_req *stat_req = req->stat;
+ struct trinity_stat_app *stat_app = stat_req->parent;
+
+ trinity_stat_lock(stat);
+
+ trinity_destroy_stat_req(stat_req);
+
+ if (!req->is_kernel) {
+ BUG_ON(stat_app->num_kept_reqs == 0);
+ stat_app->num_kept_reqs--;
+ }
+
+ if (rollback) {
+ BUG_ON(stat_app->num_total_reqs == 0);
+ stat_app->num_total_reqs--;
+ BUG_ON(stat_app->num_active_reqs == 0);
+ stat_app->num_active_reqs--;
+ }
+
+ trinity_stat_unlock(stat);
+}
+
+void trinity_stat_finish_req(struct trinity_driver *drv,
+ struct trinity_req *req)
+{
+ struct trinity_stat *stat = &drv->stat;
+ struct trinity_stat_req *stat_req = req->stat;
+ struct trinity_stat_app *stat_app = stat_req->parent;
+
+ trinity_stat_lock(stat);
+ if (stat_app->num_active_reqs != 0)
+ stat_app->num_active_reqs--;
+ else
+ dev_err(drv_to_dev_ptr(drv),
+ "Fail to keep track of the active reqs");
+ trinity_stat_unlock(stat);
+}
+
+static void copy_stat_app_ioctl(struct trinity_stat_app *stat_app,
+ struct trinity_ioctl_stat_app *ioctl_stat_app)
+{
+ ioctl_stat_app->app_id = stat_app->app_id;
+ ioctl_stat_app->status = stat_app->status;
+ ioctl_stat_app->num_total_reqs = stat_app->num_total_reqs;
+ ioctl_stat_app->num_active_reqs = stat_app->num_active_reqs;
+ ioctl_stat_app->total_alloc_mem = stat_app->total_alloc_mem;
+ ioctl_stat_app->total_freed_mem = stat_app->total_freed_mem;
+
+ strncpy(ioctl_stat_app->name, stat_app->name, TASK_COMM_LEN);
+ ioctl_stat_app->name[TASK_COMM_LEN - 1] = '\x00';
+}
+
+static void copy_stat_req_ioctl(struct trinity_stat_req *stat_req,
+ struct trinity_ioctl_stat_req *ioctl_stat_req)
+{
+ ktime_t cur_time = ktime_get();
+ ktime_t submitted, scheduled, completed;
+
+ submitted = stat_req->submitted;
+ scheduled = stat_req->scheduled ? stat_req->scheduled : cur_time;
+ completed = stat_req->completed ? stat_req->completed : cur_time;
+
+ ioctl_stat_req->req_id = stat_req->req_id;
+ ioctl_stat_req->model_id = stat_req->model_id;
+ ioctl_stat_req->priority = stat_req->priority;
+ ioctl_stat_req->status = stat_req->status;
+
+ if (stat_req->priority == TRINITY_REQ_PRIORITY_HIGH)
+ ioctl_stat_req->sched_time = 0;
+ else
+ ioctl_stat_req->sched_time = TIME_DIFF(scheduled, submitted);
+ ioctl_stat_req->infer_time = TIME_DIFF(completed, scheduled);
+}
+
+void trinity_stat_app_copy_ioctl(struct trinity_driver *drv,
+ struct trinity_ioctl_stat_app *ioctl_stat_app)
+{
+ struct trinity_stat *stat = &drv->stat;
+ struct trinity_stat_app *stat_app;
+ int app_id = trinity_get_app_id();
+
+ trinity_stat_lock(stat);
+
+ stat_app = trinity_get_stat_by_id(drv, app_id);
+ if (IS_ERR_OR_NULL(stat_app)) {
+ ioctl_stat_app->app_id = app_id;
+ ioctl_stat_app->status = TRINITY_APP_STATUS_PENDING;
+ ioctl_stat_app->num_total_reqs = 0;
+ ioctl_stat_app->num_active_reqs = 0;
+ ioctl_stat_app->total_alloc_mem = 0;
+ ioctl_stat_app->total_freed_mem = 0;
+
+ strncpy(ioctl_stat_app->name, current->comm, TASK_COMM_LEN);
+ ioctl_stat_app->name[TASK_COMM_LEN - 1] = '\x00';
+ } else {
+ copy_stat_app_ioctl(stat_app, ioctl_stat_app);
+ }
+
+ trinity_stat_unlock(stat);
+}
+
+void trinity_stat_apps_copy_ioctl(
+ struct trinity_driver *drv,
+ struct trinity_ioctl_stat_apps *ioctl_stat_apps)
+{
+ struct trinity_stat *stat = &drv->stat;
+ struct trinity_ioctl_stat_app *ioctl_stat_app;
+ struct trinity_stat_app *stat_app;
+ uint32_t idx = 0;
+
+ trinity_stat_lock(stat);
+
+ list_for_each_entry (stat_app, &stat->list, lnode) {
+ if (idx >= TRINITY_APP_STAT_MAX)
+ break;
+ ioctl_stat_app = &ioctl_stat_apps->stat[idx++];
+ copy_stat_app_ioctl(stat_app, ioctl_stat_app);
+ }
+ ioctl_stat_apps->num_apps = idx;
+
+ trinity_stat_unlock(stat);
+}
+
+void trinity_stat_reqs_copy_ioctl(
+ struct trinity_driver *drv,
+ struct trinity_ioctl_stat_reqs *ioctl_stat_reqs)
+{
+ struct trinity_stat *stat = &drv->stat;
+ struct trinity_ioctl_stat_req *ioctl_stat_req;
+ struct trinity_stat_app *stat_app;
+ struct trinity_stat_req *stat_req;
+ uint32_t idx = 0;
+
+ trinity_stat_lock(stat);
+ stat_app = trinity_get_stat_by_id(drv, ioctl_stat_reqs->app_id);
+ if (IS_ERR_OR_NULL(stat_app)) {
+ ioctl_stat_reqs->num_reqs = 0;
+ trinity_stat_unlock(stat);
+ return;
+ }
+
+ list_for_each_entry (stat_req, &stat_app->reqs, list) {
+ if (idx >= TRINITY_REQ_STAT_MAX)
+ break;
+ ioctl_stat_req = &ioctl_stat_reqs->stat[idx++];
+ copy_stat_req_ioctl(stat_req, ioctl_stat_req);
+ }
+ ioctl_stat_reqs->num_reqs = idx;
+
+ trinity_stat_unlock(stat);
+}
+
+void trinity_stat_app_total_alloc(struct trinity_driver *drv, size_t size)
+{
+ struct trinity_stat *stat = &drv->stat;
+ struct trinity_stat_app *stat_app;
+
+ stat_app = trinity_get_stat_app(drv);
+ if (IS_ERR_OR_NULL(stat_app))
+ return;
+
+ trinity_stat_lock(stat);
+ stat_app->total_alloc_mem += size;
+ trinity_stat_unlock(stat);
+}
+
+void trinity_stat_app_total_freed(struct trinity_driver *drv, size_t size)
+{
+ struct trinity_stat *stat = &drv->stat;
+ struct trinity_stat_app *stat_app;
+
+ stat_app = trinity_get_stat_app(drv);
+ if (IS_ERR_OR_NULL(stat_app))
+ return;
+
+ trinity_stat_lock(stat);
+ stat_app->total_freed_mem += size;
+ trinity_stat_unlock(stat);
+}
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0-only
+/**
+ * trinity/trinity_stat.h: Statistics header for trinity devices
+ *
+ * Copyright (C) 2021 Samsung Electronics
+ * Copyright (C) 2021 Dongju Chae <dongju.chae@samsung.com>
+ */
+
+#ifndef __TRINITY_STAT_H__
+#define __TRINITY_STAT_H__
+
+#include "trinity_common.h"
+
+void trinity_stat_init(struct trinity_driver *drv);
+void trinity_stat_fini(struct trinity_driver *drv);
+void trinity_stat_resize(struct trinity_driver *drv, unsigned long num_apps,
+ unsigned long num_reqs,
+ unsigned long num_reqs_per_app);
+
+void trinity_stat_lock(struct trinity_stat *stat);
+void trinity_stat_unlock(struct trinity_stat *stat);
+void trinity_destroy_stats(struct trinity_stat *stat, bool force);
+
+unsigned long trinity_stat_get_max_apps(struct trinity_driver *drv);
+unsigned long trinity_stat_get_max_reqs(struct trinity_driver *drv);
+unsigned long trinity_stat_get_max_reqs_per_app(struct trinity_driver *drv);
+
+struct trinity_stat_app *trinity_get_stat_app(struct trinity_driver *drv);
+
+void trinity_stat_app_total_alloc(struct trinity_driver *drv, size_t size);
+void trinity_stat_app_total_freed(struct trinity_driver *drv, size_t size);
+void trinity_stat_app_set_status(struct trinity_driver *drv,
+ enum trinity_app_status status);
+
+int trinity_stat_append_req(struct trinity_driver *drv,
+ struct trinity_req *req);
+void trinity_stat_remove_req(struct trinity_driver *drv,
+ struct trinity_req *req, bool rollback);
+void trinity_stat_finish_req(struct trinity_driver *drv,
+ struct trinity_req *req);
+
+void trinity_stat_app_copy_ioctl(struct trinity_driver *drv,
+ struct trinity_ioctl_stat_app *ioctl_stat_app);
+
+void trinity_stat_apps_copy_ioctl(
+ struct trinity_driver *drv,
+ struct trinity_ioctl_stat_apps *ioctl_stat_apps);
+
+void trinity_stat_reqs_copy_ioctl(
+ struct trinity_driver *drv,
+ struct trinity_ioctl_stat_reqs *ioctl_stat_reqs);
+
+#endif /* __TRINITY_STAT_H__ */
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Sysfs inferface for Samsung Research Trinity device family
+ *
+ * Copyright (C) 2020 Samsung Electronics
+ * Copyright (C) 2020 Wook Song <wook16.song@samsung.com>
+ * Copyright (C) 2020 Dongju Chae <dongju.chae@samsung.com>
+ */
+
+#include <linux/device.h>
+#include <linux/sysfs.h>
+
+#include "trinity_common.h"
+#include "trinity_sched.h"
+#include "trinity_stat.h"
+
+enum trinity_sysfs_msg {
+ SYSFS_MSG_NORMAL = 0,
+ SYSFS_MSG_PROLOGUE,
+ SYSFS_MSG_EPILOGUE,
+ SYSFS_MSG_EMIT,
+};
+
+static ssize_t verbose_store(struct device *dev, struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct platform_device *pdev;
+ struct trinity_driver *drv;
+ int32_t ret = 0;
+
+ pdev = container_of(dev, struct platform_device, dev);
+ drv = platform_get_drvdata(pdev);
+
+ ret = kstrtoul(buf, 10, &drv->verbose);
+ if (ret != 0)
+ return -EINVAL;
+
+ return (ssize_t)count;
+}
+
+static ssize_t verbose_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ struct platform_device *pdev;
+ struct trinity_driver *drv;
+
+ pdev = container_of(dev, struct platform_device, dev);
+ drv = platform_get_drvdata(pdev);
+
+ if (drv == NULL)
+ return 0;
+
+ return snprintf(buf, PAGE_SIZE, "%lu\n", drv->verbose);
+}
+static DEVICE_ATTR_RW(verbose);
+
+static ssize_t debugfs_max_store(struct device *dev,
+ struct device_attribute *attr, const char *buf,
+ size_t count)
+{
+ struct platform_device *pdev;
+ struct trinity_driver *drv;
+ unsigned long msg_max;
+ int32_t ret = 0;
+
+ pdev = container_of(dev, struct platform_device, dev);
+ drv = platform_get_drvdata(pdev);
+
+ ret = kstrtoul(buf, 10, &msg_max);
+ if (ret != 0)
+ return -EINVAL;
+
+ trinity_debug_clear(drv, msg_max);
+
+ return (ssize_t)count;
+}
+
+static ssize_t debugfs_max_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct platform_device *pdev;
+ struct trinity_driver *drv;
+
+ pdev = container_of(dev, struct platform_device, dev);
+ drv = platform_get_drvdata(pdev);
+
+ if (drv == NULL)
+ return 0;
+
+ return snprintf(buf, PAGE_SIZE, "%lu\n", trinity_debug_get_max(drv));
+}
+static DEVICE_ATTR_RW(debugfs_max);
+
+static ssize_t show_profile_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct platform_device *pdev;
+ struct trinity_driver *drv;
+ unsigned long req_id;
+ int32_t ret = 0;
+
+ pdev = container_of(dev, struct platform_device, dev);
+ drv = platform_get_drvdata(pdev);
+
+ ret = kstrtoul(buf, 10, &req_id);
+ if (ret != 0)
+ return -EINVAL;
+
+ if (drv->desc->show_profile)
+ drv->desc->show_profile(drv, (int)req_id);
+
+ return (ssize_t)count;
+}
+static DEVICE_ATTR_WO(show_profile);
+
+static ssize_t idu_version_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct platform_device *pdev;
+ struct trinity_driver *drv;
+
+ pdev = container_of(dev, struct platform_device, dev);
+ drv = platform_get_drvdata(pdev);
+
+ if (drv == NULL)
+ return 0;
+
+ if (drv->desc->idu_version) {
+ uint32_t major, minor, extra;
+ if (drv->desc->idu_version(drv, &major, &minor, &extra) == 0)
+ return snprintf(buf, PAGE_SIZE, "v%u.%u.%u\n", major,
+ minor, extra);
+ }
+
+ return snprintf(buf, PAGE_SIZE,
+ "Unknown... v0.30.7 or higher version required.\n");
+}
+static DEVICE_ATTR_RO(idu_version);
+
+static struct attribute *trinity_attrs_debug[] = {
+ &dev_attr_verbose.attr, &dev_attr_debugfs_max.attr,
+ &dev_attr_show_profile.attr, &dev_attr_idu_version.attr, NULL
+};
+
+/* e.g, /sys/devices/platform/304f0000.triv2/debug/ */
+static struct attribute_group trinity_attrs_debug_group = {
+ .name = "debug",
+ .attrs = trinity_attrs_debug
+};
+
+static ssize_t max_stat_apps_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct platform_device *pdev;
+ struct trinity_driver *drv;
+ unsigned long val;
+ int32_t ret = 0;
+
+ pdev = container_of(dev, struct platform_device, dev);
+ drv = platform_get_drvdata(pdev);
+
+ ret = kstrtoul(buf, 10, &val);
+ if (ret != 0)
+ return -EINVAL;
+
+ trinity_stat_resize(drv, val, 0, 0);
+
+ return (ssize_t)count;
+}
+
+static ssize_t max_stat_apps_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct platform_device *pdev;
+ struct trinity_driver *drv;
+
+ pdev = container_of(dev, struct platform_device, dev);
+ drv = platform_get_drvdata(pdev);
+
+ if (drv == NULL)
+ return 0;
+
+ return snprintf(buf, PAGE_SIZE, "%lu\n",
+ trinity_stat_get_max_apps(drv));
+}
+static DEVICE_ATTR_RW(max_stat_apps);
+
+static ssize_t max_stat_reqs_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct platform_device *pdev;
+ struct trinity_driver *drv;
+ unsigned long val;
+ int32_t ret = 0;
+
+ pdev = container_of(dev, struct platform_device, dev);
+ drv = platform_get_drvdata(pdev);
+
+ ret = kstrtoul(buf, 10, &val);
+ if (ret != 0)
+ return -EINVAL;
+
+ trinity_stat_resize(drv, 0, val, 0);
+
+ return (ssize_t)count;
+}
+
+static ssize_t max_stat_reqs_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct platform_device *pdev;
+ struct trinity_driver *drv;
+
+ pdev = container_of(dev, struct platform_device, dev);
+ drv = platform_get_drvdata(pdev);
+
+ if (drv == NULL)
+ return 0;
+
+ return snprintf(buf, PAGE_SIZE, "%lu\n",
+ trinity_stat_get_max_reqs(drv));
+}
+static DEVICE_ATTR_RW(max_stat_reqs);
+
+static ssize_t max_stat_reqs_per_app_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct platform_device *pdev;
+ struct trinity_driver *drv;
+ unsigned long val;
+ int32_t ret = 0;
+
+ pdev = container_of(dev, struct platform_device, dev);
+ drv = platform_get_drvdata(pdev);
+
+ ret = kstrtoul(buf, 10, &val);
+ if (ret != 0)
+ return -EINVAL;
+
+ trinity_stat_resize(drv, 0, 0, val);
+
+ return (ssize_t)count;
+}
+
+static ssize_t max_stat_reqs_per_app_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct platform_device *pdev;
+ struct trinity_driver *drv;
+
+ pdev = container_of(dev, struct platform_device, dev);
+ drv = platform_get_drvdata(pdev);
+
+ if (drv == NULL)
+ return 0;
+
+ return snprintf(buf, PAGE_SIZE, "%lu\n",
+ trinity_stat_get_max_reqs_per_app(drv));
+}
+static DEVICE_ATTR_RW(max_stat_reqs_per_app);
+
+static ssize_t mem_usage_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ struct platform_device *pdev;
+ struct trinity_driver *drv;
+ struct trinity_stat_app *stat_app;
+ ssize_t pos = 0;
+ bool first = true;
+
+ pdev = container_of(dev, struct platform_device, dev);
+ drv = platform_get_drvdata(pdev);
+
+ if (drv == NULL)
+ return 0;
+
+ trinity_stat_lock(&drv->stat);
+
+ list_for_each_entry (stat_app, &drv->stat.list, lnode) {
+ if (first) {
+ pos += snprintf(
+ buf + pos, PAGE_SIZE,
+ "Memory usage statistics for all opened devices\n");
+ first = false;
+ }
+
+ pos += snprintf(
+ buf + pos, PAGE_SIZE,
+ " [%d] total_alloc: %llu bytes, total_freed: %llu bytes\n",
+ stat_app->app_id, stat_app->total_alloc_mem,
+ stat_app->total_freed_mem);
+ }
+
+ if (first)
+ pos += snprintf(buf + pos, PAGE_SIZE, "No active devices\n");
+
+ trinity_stat_unlock(&drv->stat);
+
+ return pos;
+}
+static DEVICE_ATTR_RO(mem_usage);
+
+#define MODEL_REGISTERED_PROLOGUE \
+ "\n Model statistics registered in all opened devices\n" \
+ "+--------------+--------------+-----------+------------+\n" \
+ "| Model ID | Model Size | Dmabuf FD | Offset |\n" \
+ "+--------------+--------------+-----------+------------+\n"
+#define MODEL_REGISTERED_NORMAL "| %#12llx | %#12llx | %9d | %#10llx |\n"
+#define MODEL_REGISTERED_EPILOGUE \
+ "+--------------+--------------+-----------+------------+\n"
+
+static ssize_t print_registered_models(const struct trinity_model *model,
+ char *buf, enum trinity_sysfs_msg msg)
+{
+ ssize_t pos = 0;
+
+ switch (msg) {
+ case SYSFS_MSG_PROLOGUE:
+ pos = snprintf(buf, PAGE_SIZE, MODEL_REGISTERED_PROLOGUE);
+ break;
+ case SYSFS_MSG_NORMAL:
+ pos = snprintf(buf, PAGE_SIZE, MODEL_REGISTERED_NORMAL,
+ model->config.id, model->config.program_size,
+ model->config.dbuf_fd,
+ model->config.program_offset_addr);
+ break;
+ case SYSFS_MSG_EPILOGUE:
+ pos = snprintf(buf, PAGE_SIZE, MODEL_REGISTERED_EPILOGUE);
+ break;
+ default:
+ break;
+ }
+
+ return pos;
+}
+
+static ssize_t registered_models_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct platform_device *pdev;
+ struct trinity_driver *drv;
+ struct trinity_model_htable ht;
+ struct trinity_model *model;
+ struct hlist_bl_node *hn;
+ ssize_t pos;
+ int i, num_printed = 0;
+
+ pdev = container_of(dev, struct platform_device, dev);
+ drv = platform_get_drvdata(pdev);
+
+ if (drv == NULL)
+ return 0;
+
+ trinity_init_model_htable(drv, &ht);
+
+ pos = print_registered_models(NULL, buf, SYSFS_MSG_PROLOGUE);
+
+ for (i = 0; i < ht.hash_size; i++) {
+ hlist_bl_lock(&(ht.ht_heads[i]));
+ hlist_bl_for_each_entry (model, hn, &(ht.ht_heads[i]), hnode) {
+ pos += print_registered_models(model, buf + pos,
+ SYSFS_MSG_NORMAL);
+ num_printed++;
+ }
+ hlist_bl_unlock(&(ht.ht_heads[i]));
+ }
+
+ if (num_printed > 0)
+ pos += print_registered_models(NULL, buf + pos,
+ SYSFS_MSG_EPILOGUE);
+
+ return pos;
+}
+static DEVICE_ATTR_RO(registered_models);
+
+static const char *priority_to_string(enum trinity_req_priority priority)
+{
+ static const char *const priority_strings[] = {
+ [TRINITY_REQ_PRIORITY_LOW] = "Low",
+ [TRINITY_REQ_PRIORITY_MID] = "Mid",
+ [TRINITY_REQ_PRIORITY_HIGH] = "High",
+ };
+ return priority_strings[priority];
+}
+
+static const char *status_to_string(enum trinity_req_status status)
+{
+ static const char *const status_strings[] = {
+ [TRINITY_REQ_STATUS_UNKNOWN] = "Unknown",
+ [TRINITY_REQ_STATUS_ERROR] = "Error",
+ [TRINITY_REQ_STATUS_PENDING] = "Pending",
+ [TRINITY_REQ_STATUS_RUNNING] = "Running",
+ [TRINITY_REQ_STATUS_FINISHED] = "Finished",
+ };
+ return status_strings[status];
+}
+
+#define APP_STATUS_LENGTH (77)
+#define USER_APP_STATUS_PROLOGUE \
+ "\n\tUser-level request statistics running in %s\n" \
+ "+-------+--------+----------+------+----------+--------------+-------------+\n" \
+ "| PID | Req ID | Model ID | Prio | Status | Sched (us) | Infer (us) |\n" \
+ "+-------+--------+----------+------+----------+--------------+-------------+\n"
+#define USER_APP_STATUS_NORMAL \
+ "| %5d | %6d | %#8llx | %4s | %8s | %12lld | %11lld |\n"
+#define USER_APP_STATUS_EMIT \
+ "| ... (emitted) ... |\n"
+#define USER_APP_STATUS_EPILOGUE \
+ "+-------+--------+----------+------+----------+--------------+-------------+\n"
+
+static ssize_t print_user_app_status(struct device *dev,
+ const struct trinity_stat_req *req,
+ char *buf, enum trinity_sysfs_msg msg)
+{
+ ssize_t pos = 0;
+
+ switch (msg) {
+ case SYSFS_MSG_PROLOGUE:
+ pos = snprintf(buf, APP_STATUS_LENGTH * 4 + 1,
+ USER_APP_STATUS_PROLOGUE, dev_name(dev));
+ break;
+ case SYSFS_MSG_NORMAL: {
+ ktime_t cur_time = ktime_get();
+ ktime_t submitted = req->submitted;
+ ktime_t scheduled = req->scheduled ? req->scheduled : cur_time;
+ ktime_t completed = req->completed ? req->completed : cur_time;
+
+ int64_t sched_diff = TIME_DIFF_US(scheduled, submitted);
+ int64_t infer_diff = TIME_DIFF_US(completed, scheduled);
+
+ if (req->status == TRINITY_REQ_STATUS_ERROR) {
+ sched_diff = 0;
+ infer_diff = 0;
+ }
+
+ pos = snprintf(buf, APP_STATUS_LENGTH + 1,
+ USER_APP_STATUS_NORMAL, req->app_id, req->req_id,
+ req->model_id, priority_to_string(req->priority),
+ status_to_string(req->status), sched_diff,
+ infer_diff);
+ } break;
+ case SYSFS_MSG_EMIT:
+ pos = snprintf(buf, APP_STATUS_LENGTH + 1,
+ USER_APP_STATUS_EMIT);
+ break;
+ case SYSFS_MSG_EPILOGUE:
+ pos = snprintf(buf, APP_STATUS_LENGTH + 1,
+ USER_APP_STATUS_EPILOGUE);
+ break;
+ default:
+ break;
+ }
+
+ return pos;
+}
+
+#define KERNEL_APP_STATUS_PROLOGUE \
+ "\n\tKernel-level request statistics running in %s\n" \
+ "+-------+--------+----------+------+----------+------------+---------------+\n" \
+ "| PID | Req ID | Model ID | Prio | Status | # Runs | Avg. Lat (us) |\n" \
+ "+-------+--------+----------+------+----------+------------+---------------+\n"
+#define KERNEL_APP_STATUS_NORMAL \
+ "| %5d | %6d | %#8llx | %4s | %8s | %10u | %13u |\n"
+#define KERNEL_APP_STATUS_EMIT \
+ "| ... (emitted) ... |\n"
+#define KERNEL_APP_STATUS_EPILOGUE \
+ "+-------+--------+----------+------+----------+------------+---------------+\n"
+
+static ssize_t print_kernel_app_status(struct device *dev,
+ const struct trinity_stat_req *req,
+ char *buf, enum trinity_sysfs_msg msg)
+{
+ ssize_t pos = 0;
+
+ switch (msg) {
+ case SYSFS_MSG_PROLOGUE:
+ pos = snprintf(buf, APP_STATUS_LENGTH * 4 + 1,
+ KERNEL_APP_STATUS_PROLOGUE, dev_name(dev));
+ break;
+ case SYSFS_MSG_NORMAL: {
+ uint32_t avg_latency = 0;
+
+ if (req->num_runs > 0)
+ avg_latency = req->total_time / req->num_runs;
+
+ pos = snprintf(buf, APP_STATUS_LENGTH + 1,
+ KERNEL_APP_STATUS_NORMAL, req->app_id,
+ req->req_id, req->model_id,
+ priority_to_string(req->priority),
+ status_to_string(req->status), req->num_runs,
+ avg_latency);
+ } break;
+ case SYSFS_MSG_EMIT:
+ pos = snprintf(buf, APP_STATUS_LENGTH + 1,
+ KERNEL_APP_STATUS_EMIT);
+ break;
+ case SYSFS_MSG_EPILOGUE:
+ pos = snprintf(buf, APP_STATUS_LENGTH + 1,
+ KERNEL_APP_STATUS_EPILOGUE);
+ break;
+ default:
+ break;
+ }
+
+ return pos;
+}
+
+static ssize_t app_status_user_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct platform_device *pdev;
+ struct trinity_driver *drv;
+ struct trinity_stat_app *stat_app;
+ struct trinity_stat_req *stat_req;
+ int num_printed = 0;
+ ssize_t pos;
+
+ pdev = container_of(dev, struct platform_device, dev);
+ drv = platform_get_drvdata(pdev);
+
+ if (drv == NULL)
+ return 0;
+
+ pos = print_user_app_status(dev, NULL, buf, SYSFS_MSG_PROLOGUE);
+
+ trinity_stat_lock(&drv->stat);
+ list_for_each_entry (stat_app, &drv->stat.list, lnode) {
+ list_for_each_entry (stat_req, &stat_app->reqs, list) {
+ if (stat_req->is_kernel)
+ continue;
+
+ pos += print_user_app_status(dev, stat_req, buf + pos,
+ SYSFS_MSG_NORMAL);
+ num_printed++;
+
+ /* buffer size limit: PAGE_SIZE (also need reserved bytes) */
+ if (pos + APP_STATUS_LENGTH >
+ PAGE_SIZE - 2 * APP_STATUS_LENGTH) {
+ pos += print_user_app_status(
+ dev, NULL, buf + pos, SYSFS_MSG_EMIT);
+ /* clear old stats */
+ trinity_destroy_stats(&drv->stat, true);
+ goto out;
+ }
+ }
+ }
+out:
+ trinity_stat_unlock(&drv->stat);
+
+ if (num_printed > 0)
+ pos += print_user_app_status(dev, NULL, buf + pos,
+ SYSFS_MSG_EPILOGUE);
+
+ return pos;
+}
+static DEVICE_ATTR_RO(app_status_user);
+
+static ssize_t app_status_kernel_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct platform_device *pdev;
+ struct trinity_driver *drv;
+ struct trinity_stat_app *stat_app;
+ struct trinity_stat_req *stat_req;
+ int num_printed = 0;
+ ssize_t pos;
+
+ pdev = container_of(dev, struct platform_device, dev);
+ drv = platform_get_drvdata(pdev);
+
+ if (drv == NULL)
+ return 0;
+
+ pos = print_kernel_app_status(dev, NULL, buf, SYSFS_MSG_PROLOGUE);
+
+ trinity_stat_lock(&drv->stat);
+ list_for_each_entry (stat_app, &drv->stat.list, lnode) {
+ list_for_each_entry (stat_req, &stat_app->reqs, list) {
+ if (!stat_req->is_kernel)
+ continue;
+
+ pos += print_kernel_app_status(dev, stat_req, buf + pos,
+ SYSFS_MSG_NORMAL);
+ num_printed++;
+
+ /* buffer size limit: PAGE_SIZE (also need reserved bytes) */
+ if (pos + APP_STATUS_LENGTH >
+ PAGE_SIZE - 2 * APP_STATUS_LENGTH) {
+ pos += print_kernel_app_status(
+ dev, NULL, buf + pos, SYSFS_MSG_EMIT);
+ /* clear old stats */
+ trinity_destroy_stats(&drv->stat, true);
+ goto out;
+ }
+ }
+ }
+out:
+ trinity_stat_unlock(&drv->stat);
+
+ if (num_printed > 0)
+ pos += print_kernel_app_status(dev, NULL, buf + pos,
+ SYSFS_MSG_EPILOGUE);
+
+ return pos;
+}
+static DEVICE_ATTR_RO(app_status_kernel);
+
+static ssize_t num_total_reqs_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct platform_device *pdev;
+ struct trinity_driver *drv;
+ struct trinity_stat_app *stat_app;
+ uint32_t num_total_reqs = 0;
+
+ pdev = container_of(dev, struct platform_device, dev);
+ drv = platform_get_drvdata(pdev);
+
+ if (drv == NULL)
+ return 0;
+
+ trinity_stat_lock(&drv->stat);
+
+ list_for_each_entry (stat_app, &drv->stat.list, lnode) {
+ num_total_reqs += stat_app->num_total_reqs;
+ }
+
+ trinity_stat_unlock(&drv->stat);
+
+ return snprintf(buf, PAGE_SIZE, "%u\n", num_total_reqs);
+}
+static DEVICE_ATTR_RO(num_total_reqs);
+
+static ssize_t num_active_reqs_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct platform_device *pdev;
+ struct trinity_driver *drv;
+ struct trinity_stat_app *stat_app;
+ uint32_t num_active_reqs = 0;
+
+ pdev = container_of(dev, struct platform_device, dev);
+ drv = platform_get_drvdata(pdev);
+
+ if (drv == NULL)
+ return 0;
+
+ trinity_stat_lock(&drv->stat);
+
+ list_for_each_entry (stat_app, &drv->stat.list, lnode) {
+ num_active_reqs += stat_app->num_active_reqs;
+ }
+
+ trinity_stat_unlock(&drv->stat);
+
+ return snprintf(buf, PAGE_SIZE, "%u\n", num_active_reqs);
+}
+static DEVICE_ATTR_RO(num_active_reqs);
+
+static struct attribute *trinity_attrs_stat[] = {
+ &dev_attr_max_stat_apps.attr, &dev_attr_max_stat_reqs.attr,
+ &dev_attr_max_stat_reqs_per_app.attr, &dev_attr_mem_usage.attr,
+ &dev_attr_registered_models.attr, &dev_attr_app_status_user.attr,
+ &dev_attr_app_status_kernel.attr, &dev_attr_num_total_reqs.attr,
+ &dev_attr_num_active_reqs.attr, NULL
+};
+
+/* e.g, /sys/devices/platform/304f0000.triv2/stat/ */
+static struct attribute_group trinity_attrs_stat_group = {
+ .name = "stat",
+ .attrs = trinity_attrs_stat
+};
+
+static ssize_t stop_store(struct device *dev, struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct platform_device *pdev;
+ struct trinity_driver *drv;
+ unsigned long stop;
+ int32_t ret = 0;
+
+ pdev = container_of(dev, struct platform_device, dev);
+ drv = platform_get_drvdata(pdev);
+
+ if (drv == NULL)
+ return 0;
+
+ ret = kstrtoul(buf, 10, &stop);
+ if (ret != 0)
+ return 0;
+
+ if (stop == 1 && drv->desc->stop_reqs)
+ schedule_work(&drv->work_stop);
+
+ return (ssize_t)count;
+}
+
+static DEVICE_ATTR_WO(stop);
+
+static ssize_t idu_store(struct device *dev, struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct platform_device *pdev;
+ struct trinity_driver *drv;
+ char dirpath[NAME_MAX];
+
+ pdev = container_of(dev, struct platform_device, dev);
+ drv = platform_get_drvdata(pdev);
+
+ if (drv == NULL)
+ return 0;
+
+ strncpy(dirpath, buf, NAME_MAX);
+ /* remove newline if exists */
+ dirpath[strcspn(dirpath, "\n")] = '\x00';
+
+ mutex_lock(&drv->lock);
+ drv->desc->idu_load(drv, dirpath, true);
+ mutex_unlock(&drv->lock);
+
+ return (ssize_t)count;
+}
+
+static DEVICE_ATTR_WO(idu);
+
+static ssize_t suspend_store(struct device *dev, struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ unsigned long suspend;
+
+ if (kstrtoul(buf, 10, &suspend) != 0)
+ return 0;
+
+ /** Note that this interface is used only for testing purpose */
+ if (suspend == 1) {
+ const struct dev_pm_ops *ops = dev->driver->pm;
+ ops->runtime_suspend(dev);
+ }
+
+ return (ssize_t)count;
+}
+
+static DEVICE_ATTR_WO(suspend);
+
+static ssize_t resume_store(struct device *dev, struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ unsigned long resume;
+
+ if (kstrtoul(buf, 10, &resume) != 0)
+ return 0;
+
+ /** Note that this interface is used only for testing purpose */
+ if (resume == 1) {
+ const struct dev_pm_ops *ops = dev->driver->pm;
+ ops->runtime_resume(dev);
+ }
+
+ return (ssize_t)count;
+}
+
+static DEVICE_ATTR_WO(resume);
+
+static ssize_t sched_test_store(struct device *dev,
+ struct device_attribute *attr, const char *buf,
+ size_t count)
+{
+ struct platform_device *pdev;
+ struct trinity_driver *drv;
+ struct trinity_sched_desc *desc;
+ long req_id;
+
+ pdev = container_of(dev, struct platform_device, dev);
+ drv = platform_get_drvdata(pdev);
+
+ if (drv == NULL)
+ return 0;
+
+ if (kstrtol(buf, 10, &req_id) != 0 || req_id > INT_MAX)
+ return 0;
+
+ /** Note that this interface is used only for testing purpose */
+ desc = trinity_sched_find(SCHED_VD);
+ if (desc && desc->test_run)
+ desc->test_run(drv, (int)req_id);
+
+ return (ssize_t)count;
+}
+
+static DEVICE_ATTR_WO(sched_test);
+
+static ssize_t profile_store(struct device *dev, struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct platform_device *pdev;
+ struct trinity_driver *drv;
+ unsigned long profile;
+
+ pdev = container_of(dev, struct platform_device, dev);
+ drv = platform_get_drvdata(pdev);
+
+ if (drv == NULL)
+ return 0;
+
+ if (kstrtoul(buf, 10, &profile) != 0)
+ return 0;
+
+ /** Note that this interface is used only for testing purpose */
+ if (drv->desc->init_profile)
+ drv->desc->init_profile(drv, profile);
+
+ return (ssize_t)count;
+}
+
+static DEVICE_ATTR_WO(profile);
+
+static ssize_t reset_store(struct device *dev, struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct platform_device *pdev;
+ struct trinity_driver *drv;
+ unsigned long reset;
+
+ pdev = container_of(dev, struct platform_device, dev);
+ drv = platform_get_drvdata(pdev);
+
+ if (drv == NULL)
+ return 0;
+
+ if (kstrtoul(buf, 10, &reset) != 0)
+ return 0;
+
+ if (reset == 1 && drv->desc->reset)
+ drv->desc->reset(drv);
+
+ return (ssize_t)count;
+}
+
+static DEVICE_ATTR_WO(reset);
+
+static struct attribute *trinity_attrs_control[] = {
+ &dev_attr_stop.attr, &dev_attr_idu.attr,
+ &dev_attr_suspend.attr, &dev_attr_resume.attr,
+ &dev_attr_sched_test.attr, &dev_attr_profile.attr,
+ &dev_attr_reset.attr, NULL
+};
+
+/* e.g, /sys/devices/platform/304f0000.triv2/control/ */
+static struct attribute_group trinity_attrs_control_group = {
+ .name = "control",
+ .attrs = trinity_attrs_control
+};
+
+static const struct attribute_group *trinity_attrs_groups[] = {
+ &trinity_attrs_debug_group, &trinity_attrs_stat_group,
+ &trinity_attrs_control_group, NULL
+};
+
+int trinity_sysfs_init(struct trinity_driver *drv)
+{
+ struct device *dev = drv_to_dev_ptr(drv);
+ int err;
+
+ err = sysfs_create_groups(&dev->kobj, trinity_attrs_groups);
+ if (err < 0) {
+ dev_err(dev, "failed to create sysfs groups\n");
+ return err;
+ }
+
+ return 0;
+}
+
+int trinity_sysfs_cleanup(struct trinity_driver *drv)
+{
+ struct device *dev = drv_to_dev_ptr(drv);
+
+ sysfs_remove_groups(&dev->kobj, trinity_attrs_groups);
+
+ return 0;
+}
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0-only
+/**
+ * trinity/trinity_trace.c: Trace source for trinity devices
+ *
+ * Copyright (C) 2021 Samsung Electronics
+ * Copyright (C) 2021 Dongju Chae <dongju.chae@samsung.com>
+ */
+
+#ifndef __CHECKER__
+#define CREATE_TRACE_POINTS
+#include "trinity_trace.h"
+#endif
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0-only
+/**
+ * trinity/trinity_trace.h: Trace header for trinity devices
+ *
+ * Copyright (C) 2021 Samsung Electronics
+ * Copyright (C) 2021 Dongju Chae <dongju.chae@samsung.com>
+ */
+
+#if !defined(__TRINITY_TRACE_H__) || defined(TRACE_HEADER_MULTI_READ)
+#define __TRINITY_TRACE_H__
+
+#include <linux/tracepoint.h>
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM trinity
+#define TRACE_INCLUDE_FILE trinity_trace
+
+// clang-format off
+TRACE_EVENT(triv2_run_trigger,
+ TP_PROTO(u32 device_id, s32 slot),
+ TP_ARGS(device_id, slot),
+ TP_STRUCT__entry(
+ __field(u32, device_id)
+ __field(s32, slot)
+ ),
+ TP_fast_assign(
+ __entry->device_id = device_id;
+ __entry->slot = slot;
+ ),
+ TP_printk("device_id=%u slot=%d",
+ __entry->device_id,
+ __entry->slot)
+);
+TRACE_EVENT(triv2_wakeup_cp,
+ TP_PROTO(u32 device_id),
+ TP_ARGS(device_id),
+ TP_STRUCT__entry(
+ __field(u32, device_id)
+ ),
+ TP_fast_assign(
+ __entry->device_id = device_id;
+ ),
+ TP_printk("device_id=%u",
+ __entry->device_id)
+);
+TRACE_EVENT(triv2_handle_irq,
+ TP_PROTO(u32 device_id, s32 irq),
+ TP_ARGS(device_id, irq),
+ TP_STRUCT__entry(
+ __field(u32, device_id)
+ __field(s32, irq)
+ ),
+ TP_fast_assign(
+ __entry->device_id = device_id;
+ __entry->irq = irq;
+ ),
+ TP_printk("device_id=%u irq=%d",
+ __entry->device_id,
+ __entry->irq)
+);
+TRACE_EVENT(triv2_handle_threaded_irq,
+ TP_PROTO(u32 device_id, s32 irq),
+ TP_ARGS(device_id, irq),
+ TP_STRUCT__entry(
+ __field(u32, device_id)
+ __field(s32, irq)
+ ),
+ TP_fast_assign(
+ __entry->device_id = device_id;
+ __entry->irq = irq;
+ ),
+ TP_printk("device_id=%u irq=%d",
+ __entry->device_id,
+ __entry->irq)
+);
+TRACE_EVENT(triv2_handle_cmd_done,
+ TP_PROTO(u32 device_id, s32 slot, u32 cycles, u32 time),
+ TP_ARGS(device_id, slot, cycles, time),
+ TP_STRUCT__entry(
+ __field(u32, device_id)
+ __field(s32, slot)
+ __field(u32, cycles)
+ __field(u32, time)
+ ),
+ TP_fast_assign(
+ __entry->device_id = device_id;
+ __entry->slot = slot;
+ __entry->cycles = cycles;
+ __entry->time = time;
+ ),
+ TP_printk("device_id=%u slot=%d cycles=%u time(us)=%u",
+ __entry->device_id,
+ __entry->slot,
+ __entry->cycles,
+ __entry->time)
+);
+TRACE_EVENT(triv2_map_sched_data,
+ TP_PROTO(u32 device_id, s32 slot, u32 batch_size, u32 in_cnt, u32 out_cnt),
+ TP_ARGS(device_id, slot, batch_size, in_cnt, out_cnt),
+ TP_STRUCT__entry(
+ __field(u32, device_id)
+ __field(s32, slot)
+ __field(u32, batch_size)
+ __field(u32, in_cnt)
+ __field(u32, out_cnt)
+ ),
+ TP_fast_assign(
+ __entry->device_id = device_id;
+ __entry->slot = slot;
+ __entry->batch_size = batch_size;
+ __entry->in_cnt = in_cnt;
+ __entry->out_cnt = out_cnt;
+ ),
+ TP_printk("device_id=%u slot=%d batch_size=%u in_cnt=%u out_cnt=%u",
+ __entry->device_id,
+ __entry->slot,
+ __entry->batch_size,
+ __entry->in_cnt,
+ __entry->out_cnt)
+);
+TRACE_EVENT(triv2_unmap_sched_data,
+ TP_PROTO(u32 device_id, s32 slot),
+ TP_ARGS(device_id, slot),
+ TP_STRUCT__entry(
+ __field(u32, device_id)
+ __field(s32, slot)
+ ),
+ TP_fast_assign(
+ __entry->device_id = device_id;
+ __entry->slot = slot;
+ ),
+ TP_printk("device_id=%u slot=%d",
+ __entry->device_id,
+ __entry->slot)
+);
+TRACE_EVENT(trinity_ioctl_msg,
+ TP_PROTO(u32 device_id, s32 app_id, char* msg),
+ TP_ARGS(device_id, app_id, msg),
+ TP_STRUCT__entry(
+ __field(u32, device_id)
+ __field(s32, app_id)
+ __field(char*, msg)
+ ),
+ TP_fast_assign(
+ __entry->device_id = device_id;
+ __entry->app_id = app_id;
+ __entry->msg = msg;
+ ),
+ TP_printk("device_id=%u app_id=%d msg=%s",
+ __entry->device_id,
+ __entry->app_id,
+ __entry->msg)
+);
+TRACE_EVENT(trinity_ioctl_next_req,
+ TP_PROTO(u32 device_id, s32 app_id, s32 req_id),
+ TP_ARGS(device_id, app_id, req_id),
+ TP_STRUCT__entry(
+ __field(u32, device_id)
+ __field(s32, app_id)
+ __field(s32, req_id)
+ ),
+ TP_fast_assign(
+ __entry->device_id = device_id;
+ __entry->app_id = app_id;
+ __entry->req_id = req_id;
+ ),
+ TP_printk("device_id=%u app_id=%d req_id=%d",
+ __entry->device_id,
+ __entry->app_id,
+ __entry->req_id)
+);
+TRACE_EVENT(trinity_ioctl_stop_req,
+ TP_PROTO(u32 device_id, s32 app_id, s32 req_id),
+ TP_ARGS(device_id, app_id, req_id),
+ TP_STRUCT__entry(
+ __field(u32, device_id)
+ __field(s32, app_id)
+ __field(s32, req_id)
+ ),
+ TP_fast_assign(
+ __entry->device_id = device_id;
+ __entry->app_id = app_id;
+ __entry->req_id = req_id;
+ ),
+ TP_printk("device_id=%u app_id=%d req_id=%d",
+ __entry->device_id,
+ __entry->app_id,
+ __entry->req_id)
+);
+TRACE_EVENT(trinity_ioctl_hwmem_alloc,
+ TP_PROTO(u32 device_id, s32 app_id, s64 size, s32 dbuf_fd),
+ TP_ARGS(device_id, app_id, size, dbuf_fd),
+ TP_STRUCT__entry(
+ __field(u32, device_id)
+ __field(s32, app_id)
+ __field(s64, size)
+ __field(s32, dbuf_fd)
+ ),
+ TP_fast_assign(
+ __entry->device_id = device_id;
+ __entry->app_id = app_id;
+ __entry->size = size;
+ __entry->dbuf_fd = dbuf_fd;
+ ),
+ TP_printk("device_id=%u app_id=%d size=%lld dbuf_fd=%d",
+ __entry->device_id,
+ __entry->app_id,
+ __entry->size,
+ __entry->dbuf_fd)
+);
+TRACE_EVENT(trinity_ioctl_hwmem_dealloc,
+ TP_PROTO(u32 device_id, s32 app_id, s32 dbuf_fd),
+ TP_ARGS(device_id, app_id, dbuf_fd),
+ TP_STRUCT__entry(
+ __field(u32, device_id)
+ __field(s32, app_id)
+ __field(s32, dbuf_fd)
+ ),
+ TP_fast_assign(
+ __entry->device_id = device_id;
+ __entry->app_id = app_id;
+ __entry->dbuf_fd = dbuf_fd;
+ ),
+ TP_printk("device_id=%u app_id=%d dbuf_fd=%d",
+ __entry->device_id,
+ __entry->app_id,
+ __entry->dbuf_fd)
+);
+TRACE_EVENT(trinity_ioctl_get_profile_meta,
+ TP_PROTO(u32 device_id, s32 app_id, s32 req_id, u32 profile_size),
+ TP_ARGS(device_id, app_id, req_id, profile_size),
+ TP_STRUCT__entry(
+ __field(u32, device_id)
+ __field(s32, app_id)
+ __field(s32, req_id)
+ __field(u32, profile_size)
+ ),
+ TP_fast_assign(
+ __entry->device_id = device_id;
+ __entry->app_id = app_id;
+ __entry->req_id = req_id;
+ __entry->profile_size = profile_size;
+ ),
+ TP_printk("device_id=%u app_id=%d req_id=%d profile_size=%u",
+ __entry->device_id,
+ __entry->app_id,
+ __entry->req_id,
+ __entry->profile_size)
+);
+TRACE_EVENT(trinity_ioctl_get_profile_buff,
+ TP_PROTO(u32 device_id, s32 app_id, s32 req_id, u32 profile_pos, u32 profile_size),
+ TP_ARGS(device_id, app_id, req_id, profile_pos, profile_size),
+ TP_STRUCT__entry(
+ __field(u32, device_id)
+ __field(s32, app_id)
+ __field(s32, req_id)
+ __field(u32, profile_pos)
+ __field(u32, profile_size)
+ ),
+ TP_fast_assign(
+ __entry->device_id = device_id;
+ __entry->app_id = app_id;
+ __entry->req_id = req_id;
+ __entry->profile_pos = profile_pos;
+ __entry->profile_size = profile_size;
+ ),
+ TP_printk("device_id=%u app_id=%d req_id=%d profile_pos=%u profile_size=%u",
+ __entry->device_id,
+ __entry->app_id,
+ __entry->req_id,
+ __entry->profile_pos,
+ __entry->profile_size)
+);
+TRACE_EVENT(trinity_ioctl_register_model,
+ TP_PROTO(u32 device_id, s32 app_id, u64 config_id, s32 dbuf_fd, u64 program_offset_addr, u64 program_size),
+ TP_ARGS(device_id, app_id, config_id, dbuf_fd, program_offset_addr, program_size),
+ TP_STRUCT__entry(
+ __field(u32, device_id)
+ __field(s32, app_id)
+ __field(u64, config_id)
+ __field(s32, dbuf_fd)
+ __field(u64, program_offset_addr)
+ __field(u64, program_size)
+ ),
+ TP_fast_assign(
+ __entry->device_id = device_id;
+ __entry->app_id = app_id;
+ __entry->config_id = config_id;
+ __entry->dbuf_fd = dbuf_fd;
+ __entry->program_offset_addr = program_offset_addr;
+ __entry->program_size = program_size;
+ ),
+ TP_printk("device_id=%u app_id=%d config_id=0x%llx dbuf_fd=%d program_offset_addr=0x%llx program_size=0x%llx",
+ __entry->device_id,
+ __entry->app_id,
+ __entry->config_id,
+ __entry->dbuf_fd,
+ __entry->program_offset_addr,
+ __entry->program_size)
+);
+TRACE_EVENT(trinity_ioctl_register_model_drv_ver1,
+ TP_PROTO(u64 weight_offset_addr),
+ TP_ARGS(weight_offset_addr),
+ TP_STRUCT__entry(
+ __field(u64, weight_offset_addr)
+ ),
+ TP_fast_assign(
+ __entry->weight_offset_addr = weight_offset_addr;
+ ),
+ TP_printk("weight_offset_addr=0x%llx",
+ __entry->weight_offset_addr)
+);
+TRACE_EVENT(trinity_ioctl_register_model_drv_ver2,
+ TP_PROTO(s32 metadata_dbuf_fd, s32 metadata_ext_dbuf_fd, u64 metadata_ext_size),
+ TP_ARGS(metadata_dbuf_fd, metadata_ext_dbuf_fd, metadata_ext_size),
+ TP_STRUCT__entry(
+ __field(s32, metadata_dbuf_fd)
+ __field(s32, metadata_ext_dbuf_fd)
+ __field(u64, metadata_ext_size)
+ ),
+ TP_fast_assign(
+ __entry->metadata_dbuf_fd = metadata_dbuf_fd;
+ __entry->metadata_ext_dbuf_fd = metadata_ext_dbuf_fd;
+ __entry->metadata_ext_size = metadata_ext_size;
+ ),
+ TP_printk("metadata_dbuf_fd=%d metadata_ext_dbuf_fd=%d metadata_ext_size=0x%llx",
+ __entry->metadata_dbuf_fd,
+ __entry->metadata_ext_dbuf_fd,
+ __entry->metadata_ext_size)
+);
+TRACE_EVENT(trinity_ioctl_run_input,
+ TP_PROTO(u32 device_id, s32 app_id, s32 dbuf_fd, u64 model_id),
+ TP_ARGS(device_id, app_id, dbuf_fd, model_id),
+ TP_STRUCT__entry(
+ __field(u32, device_id)
+ __field(s32, app_id)
+ __field(s32, dbuf_fd)
+ __field(u64, model_id)
+ ),
+ TP_fast_assign(
+ __entry->device_id = device_id;
+ __entry->app_id = app_id;
+ __entry->dbuf_fd = dbuf_fd;
+ __entry->model_id = model_id;
+ ),
+ TP_printk("device_id=%u app_id=%d dbuf_fd=%d model_id=0x%llx",
+ __entry->device_id,
+ __entry->app_id,
+ __entry->dbuf_fd,
+ __entry->model_id)
+);
+TRACE_EVENT(trinity_ioctl_run_input_drv_ver1,
+ TP_PROTO(u64 activation_offset_addr0, u64 activation_offset_addr1),
+ TP_ARGS(activation_offset_addr0, activation_offset_addr1),
+ TP_STRUCT__entry(
+ __field(u64, activation_offset_addr0)
+ __field(u64, activation_offset_addr1)
+ ),
+ TP_fast_assign(
+ __entry->activation_offset_addr0 = activation_offset_addr0;
+ __entry->activation_offset_addr1 = activation_offset_addr1;
+ ),
+ TP_printk("activation_offset_addr0=0x%llx activation_offset_addr1=0x%llx",
+ __entry->activation_offset_addr0,
+ __entry->activation_offset_addr1)
+);
+TRACE_EVENT(trinity_ioctl_run_input_drv_ver2,
+ TP_PROTO(s64 timeout_ms, u32 priority, u32 num_segments, s32 input_mode, s32 output_mode),
+ TP_ARGS(timeout_ms, priority, num_segments, input_mode, output_mode),
+ TP_STRUCT__entry(
+ __field(s64, timeout_ms)
+ __field(u32, priority)
+ __field(u32, num_segments)
+ __field(s32, input_mode)
+ __field(s32, output_mode)
+ ),
+ TP_fast_assign(
+ __entry->timeout_ms = timeout_ms;
+ __entry->priority = priority;
+ __entry->num_segments = num_segments;
+ __entry->input_mode = input_mode;
+ __entry->output_mode = output_mode;
+ ),
+ TP_printk("timeout_ms=%lld priority=%u num_segments=%u input_mode=%d output_mode=%d",
+ __entry->timeout_ms,
+ __entry->priority,
+ __entry->num_segments,
+ __entry->input_mode,
+ __entry->output_mode)
+);
+
+// clang-format on
+
+#endif /* __TRINITY_TRACE_H__ */
+
+/* This part must be outside protection */
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH .
+#include <trace/define_trace.h>
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0-only
+/**
+ * Copyright (C) 2020 Samsung Electronics
+ * Copyright (C) 2020 Dongju Chae <dongju.chae@samsung.com>
+ * Copyright (C) 2020 Wook Song <wook16.song@samsung.com>
+ */
+/**
+ * @file trinity_vision2.c
+ * @brief Samsung Research Trinity Vision2 NPU device driver
+ * @date 10 Mar 2020
+ * @author Dongju Chae <dongju.chae@samsung.com>
+ * Wook Song <wook16.song@samsung.com>
+ * @bug No known bugs except for NYI items
+ */
+
+#include <linux/bitmap.h>
+#include <linux/dma-buf.h>
+#include <linux/fs.h>
+#include <linux/hashtable.h>
+#include <linux/highmem.h>
+#include <linux/init.h>
+#include <linux/io.h>
+#include <linux/kthread.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+#include <linux/uaccess.h>
+#include <linux/utsname.h>
+#include <linux/version.h>
+
+#include <linux/delay.h>
+#include <linux/iommu.h>
+#include <linux/iopoll.h>
+#include <linux/timer.h>
+
+#include "trinity_common.h"
+#include "trinity_hwmem.h"
+#include "trinity_resv_mem.h"
+#include "trinity_vision2_profile.h"
+
+#include "trinity_monitor.h"
+#include "trinity_sched_vd.h"
+#include "trinity_trace.h"
+
+#ifdef CONFIG_TRINITY_FPGA
+/* Workaround for the FPGA development board */
+#include "trinity_hwmem_iommu_helper.h"
+#endif
+
+/* Register offsets for NPU CP (Config) */
+#define OFFSET_CP_INFO (0x000) /* Processor Information */
+#define OFFSET_CP_PROC_STAT (0x010) /* Processor Status */
+#define OFFSET_CP_PROC_SET (0x014) /* Processor Control (Set) */
+#define OFFSET_CP_PROC_CLR (0x018) /* Processor Control (Clear) */
+#define OFFSET_CP_IMIF_BASE (0x024) /* Instruction Base Address (DRAM) */
+#define OFFSET_CP_CNT_CFG (0x200) /* CP Performance Counter */
+
+/* Register offsets for NPU CP (IDU Setup) */
+#define OFFSET_NPU_PROG_BASE (0x100) /* GPR00: Instruction Base Address */
+#define OFFSET_NPU_PROG_SIZE (0x104) /* GPR01: Program Size */
+#define OFFSET_NPU_SEGT_ADDR (0x108) /* GPR02: Segment Table Address */
+#define OFFSET_NPU_PROF_ADDR (0x10C) /* GPR03: NPU Profiling Address */
+#define OFFSET_NPU_PROF_SIZE (0x110) /* GPR04: NPU Profiling Size */
+#define OFFSET_NPU_BACK_ADDR (0x114) /* GRP05: NPU Context Backup Address */
+#define OFFSET_NPU_BACK_SIZE (0x118) /* GRP06: NPU Context Backup Size */
+#define OFFSET_NPU_PC (0x11C) /* GRP07: NPU Program Counter */
+
+/* Register offsets for NPU CP (Commands) */
+#define OFFSET_NPU_CMD_READY (0x124) /* GRP09: Command Ready Status */
+#define OFFSET_NPU_CMD_BASE (0x128) /* GRP10: Command Base Address */
+#define OFFSET_NPU_CMD_REQ (0x12C) /* GRP11: Command Request Slots (not used) */
+#define OFFSET_NPU_CMD_FREE (0x130) /* GRP12: Command Free Slots */
+
+/* Register offsets for NPU CP (Cbox Setup) */
+#define OFFSET_NPU_CBOX_BASE (0x134) /* GRP13: NPU CBOX BASE */
+
+/* Register offsets for Debugging */
+#define OFFSET_NPU_IDU_VERSION (0x138) /* GRP14: NPU IDU VERSION */
+#define OFFSET_NPU_IDU_STAGE (0x13C) /* GRP15: NPU IDU STAGE */
+
+#define OFFSET_NPU_CP_DMAI_EADDR (0x300) /* CP DMA Source Address */
+#define OFFSET_NPU_CP_DMAI_IADDR (0x304) /* CP DMA Dest Address */
+#define OFFSET_NPU_CP_DMAI_TSIZE (0x308) /* CP DMA Transfer Size */
+#define OFFSET_NPU_CP_DMAI_CONTR (0x310) /* CP DMA Status */
+#define OFFSET_NPU_CP_DMAI_CMDID (0x314) /* CP DMA Command ID */
+#define OFFSET_NPU_CP_DMAI_LSTID \
+ (0x318) /* CP DMA Command ID of the last transfer */
+
+#define OFFSET_NPU_DLA_DMAI_EADDR (0x1000) /* DLA Input External Address */
+#define OFFSET_NPU_DLA_DMAI_EYMOD \
+ (0x1004) /* DLA Input External Address Y Modifier */
+#define OFFSET_NPU_DLA_DMAI_EZMOD \
+ (0x1008) /* DLA Input External Address Z Modifier */
+#define OFFSET_NPU_DLA_DMAI_IADDR (0x100C) /* DLA Input Internal Address */
+#define OFFSET_NPU_DLA_DMAI_IYMOD \
+ (0x1010) /* DLA Input Internal Address Y Modifier */
+#define OFFSET_NPU_DLA_DMAI_IZMOD \
+ (0x1014) /* DLA Input Internal Address Z Modifier */
+#define OFFSET_NPU_DLA_DMAI_SIZE0 (0x1018) /* DLA Input Data Size 0 */
+#define OFFSET_NPU_DLA_DMAI_SIZE1 (0x101C) /* DLA Input Data Size 1 */
+#define OFFSET_NPU_DLA_DMAI_CTRL (0x1020) /* DLA Input Channel Status */
+
+#define OFFSET_NPU_DLA_DMAO_EADDR (0x1080) /* DLA Output External Address */
+#define OFFSET_NPU_DLA_DMAO_EYMOD \
+ (0x1084) /* DLA Output External Address Y Modifier */
+#define OFFSET_NPU_DLA_DMAO_EZMOD \
+ (0x1088) /* DLA Output External Address Z Modifier */
+#define OFFSET_NPU_DLA_DMAO_IADDR (0x108C) /* DLA Output Internal Address */
+#define OFFSET_NPU_DLA_DMAO_IYMOD \
+ (0x1090) /* DLA Output Internal Address Y Modifier */
+#define OFFSET_NPU_DLA_DMAO_IZMOD \
+ (0x1094) /* DLA Output Internal Address Z Modifier */
+#define OFFSET_NPU_DLA_DMAO_SIZE0 (0x1098) /* DLA Output Data Size 0 */
+#define OFFSET_NPU_DLA_DMAO_SIZE1 (0x109C) /* DLA Output Data Size 1 */
+#define OFFSET_NPU_DLA_DMAO_CTRL (0x10A0) /* DLA Output Channel Status */
+
+#define OFFSET_NPU_DLA_CORE_OPC (0x1100) /* DLA Operation Code */
+#define OFFSET_NPU_DLA_CORE_WIND_CFG (0x1104)
+#define OFFSET_NPU_DLA_CORE_SIZE0 (0x1108)
+#define OFFSET_NPU_DLA_CORE_SIZE1 (0x110C)
+#define OFFSET_NPU_DLA_CORE_ZP (0x1110)
+#define OFFSET_NPU_DLA_CORE_OUT_MULT (0x1114)
+#define OFFSET_NPU_DLA_CORE_IN0_MULT (0x1118)
+#define OFFSET_NPU_DLA_CORE_IN1_MULT (0x111C)
+#define OFFSET_NPU_DLA_CORE_OUT_CFG (0x1120)
+#define OFFSET_NPU_DLA_CORE_OUT_MOD (0x1124)
+#define OFFSET_NPU_DLA_CORE_IN0_CFG (0x1128)
+#define OFFSET_NPU_DLA_CORE_IN0_MOD (0x112C)
+#define OFFSET_NPU_DLA_CORE_IN1_CFG (0x1130)
+#define OFFSET_NPU_DLA_CORE_IN1_MOD (0x1134)
+#define OFFSET_NPU_DLA_CORE_PARAM_ADDR (0x1138)
+#define OFFSET_NPU_DLA_CORE_PSUM_ADDR (0x113C)
+#define OFFSET_NPU_DLA_CORE_CWGT_ADDR (0x1140)
+#define OFFSET_NPU_DLA_CORE_CTRL (0x1144) /* DLA Core Status */
+
+#define OFFSET_NPU_DSP_DMAI_EADDR (0x2000) /* DSP Input External Address */
+#define OFFSET_NPU_DSP_DMAI_EYMOD \
+ (0x2004) /* DSP Input External Address Y Modifier */
+#define OFFSET_NPU_DSP_DMAI_EZMOD \
+ (0x2008) /* DSP Input External Address Z Modifier */
+#define OFFSET_NPU_DSP_DMAI_IADDR (0x200C) /* DSP Input Internal Address */
+#define OFFSET_NPU_DSP_DMAI_IYMOD \
+ (0x2010) /* DSP Input Internal Address Y Modifier */
+#define OFFSET_NPU_DSP_DMAI_IZMOD \
+ (0x2014) /* DSP Input Internal Address Z Modifier */
+#define OFFSET_NPU_DSP_DMAI_SIZE0 (0x2018) /* DSP Input Data Size 0 */
+#define OFFSET_NPU_DSP_DMAI_SIZE1 (0x201C) /* DSP Input Data Size 1 */
+#define OFFSET_NPU_DSP_DMAI_CTRL (0x2020) /* DSP Input Channel Status */
+
+#define OFFSET_NPU_DSP_DMAO_EADDR (0x2080) /* DSP Output External Address */
+#define OFFSET_NPU_DSP_DMAO_EYMOD \
+ (0x2084) /* DSP Output External Address Y Modifier */
+#define OFFSET_NPU_DSP_DMAO_EZMOD \
+ (0x2088) /* DSP Output External Address Z Modifier */
+#define OFFSET_NPU_DSP_DMAO_IADDR (0x208C) /* DSP Output Internal Address */
+#define OFFSET_NPU_DSP_DMAO_IYMOD \
+ (0x2090) /* DSP Output Internal Address Y Modifier */
+#define OFFSET_NPU_DSP_DMAO_IZMOD \
+ (0x2094) /* DSP Output Internal Address Z Modifier */
+#define OFFSET_NPU_DSP_DMAO_SIZE0 (0x2098) /* DSP Output Data Size 0 */
+#define OFFSET_NPU_DSP_DMAO_SIZE1 (0x209C) /* DSP Output Data Size 1 */
+#define OFFSET_NPU_DSP_DMAO_CTRL (0x20A0) /* DSP Output Channel Status */
+#define OFFSET_NPU_DSP_CORE_CTRL (0x2140) /* DSP Core Status */
+
+/* Register offsets for NPU DSP */
+#define OFFSET_DSP_INFO (0x000) /* Processor Information */
+#define OFFSET_DSP_PROC_STAT (0x010) /* Processor Status */
+#define OFFSET_DSP_PROC_SET (0x014) /* Processor Control (Set) */
+#define OFFSET_DSP_PROC_CLR (0x018) /* Processor Control (Clear) */
+#define OFFSET_DSP_IMIF_BASE (0x024) /* Instruction Base Address (DRAM) */
+
+/* Register offsets for NPU ComBox (IRQ) */
+#define OFFSET_CBOX_EXT_IRQ_MSK (0x100) /* External IRQ Output Mask */
+#define OFFSET_CBOX_EXT_IRQ_STA (0x104) /* External IRQ Output Status */
+#define OFFSET_CBOX_CP_SWI_CLR (0x134) /* CP IRQ output Clear */
+#define OFFSET_CBOX_DSP_SWI_CLR (0x154) /* DSP IRQ output Clear */
+
+/* Location of bits inside corresponding registers */
+#define BIT_CLR_IRQ_OUT BIT(24)
+#define BIT_CLR_PAUSE BIT(0)
+#define BIT_SET_SEND_EVT1 BIT(18)
+#define BIT_SET_PAUSE BIT(0)
+#define BIT_STAT_PAUSED BIT(1)
+
+/* Performance counter configurations */
+#define BIT_CNT_DST_EN BIT(6)
+#define BIT_CNT_IST_EN BIT(5)
+#define BIT_CNT_ST_EN BIT(4)
+#define BIT_CNT_FR_EN BIT(0)
+
+/* Bit masks */
+#define MASK_DSP_SWI_STA BIT_MASK(1)
+#define MASK_CP_SWI_STA BIT_MASK(0)
+
+#define MASK_STAT_WFE_PARAM GENMASK(14, 6)
+#define MASK_STAT_WFE_PARAM_EVT1 BIT_MASK(8)
+#define MASK_STAT_WFE BIT_MASK(5)
+#define MASK_STAT_PAUSED BIT_MASK(1)
+#define MASK_STAT_PAUSE BIT_MASK(0)
+
+#define VER_MAJOR (2)
+#define VER_MINOR (0)
+#define VER_EXTRA (0)
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 14, 0)
+#define read_idu_file(file, pos, addr, size) kernel_read(filp, pos, addr, size)
+#else
+#define read_idu_file(file, pos, addr, size) kernel_read(filp, addr, size, &pos)
+#endif
+
+/** Macros for Instruction Decode Unit (IDU) */
+#define TRIV2_IDU_DIRPATH_FMT "/lib/modules/%s/kernel/soc/idu"
+#define TRIV2_IDU_MAX_SECTORS (3)
+#define TRIV2_IDU_ZEROIDX (0)
+#define TRIV2_IDU_DATAIDX (1)
+#define TRIV2_IDU_CODEIDX (2)
+#define TRIV2_IDU_ADDR(addr) ((uint32_t)(addr))
+#define TRIV2_IDU_MAXSIZE (1 << 20) /* 1 MiB */
+
+#define TRIV2_IDU_CP_DSPM_SIZE (0x10000)
+
+#define TRIV2_IDU_MASK_MAJOR (0xFF000000)
+#define TRIV2_IDU_MASK_MINOR (0x00FFF000)
+#define TRIV2_IDU_MASK_EXTRA (0x00000FFF)
+
+#define TRIV2_IDU_SHIFT_MAJOR (24)
+#define TRIV2_IDU_SHIFT_MINOR (12)
+
+#define TRIV2_MODEL_HASH_BITS (8)
+#define TRIV2_MODEL_HASH_SIZE (1 << TRIV2_MODEL_HASH_BITS)
+#define TRIV2_PROFILE_HASH_BITS (6)
+#define TRIV2_PROFILE_HASH_SIZE (1 << TRINITY_PROFILE_HASH_BITS)
+#define TRIV2_PROFILE_HASH_KEY(id) (hash_long((id), TRIV2_PROFILE_HASH_BITS))
+
+#define TRIV2_MAX_SEGMENTS (256)
+/** Fits in a single 4K Page */
+#define TRIV2_MAX_CMDSLOTS (PAGE_SIZE / sizeof(struct triv2_cmd))
+
+#define TRIV2_DRV_GET_PDATA(drv) ((struct triv2_pdata *)(drv->pdata))
+#define TRIV2_DRV_GET_CMD_INFO(drv) (&(TRIV2_DRV_GET_PDATA(drv)->cmd_info))
+#define TRIV2_DRV_GET_CMD_BUF(drv) (&(TRIV2_DRV_GET_CMD_INFO(drv)->buf))
+#define TRIV2_DRV_GET_PROF_BUF(drv) (&(TRIV2_DRV_GET_PDATA(drv)->prof_buf))
+#define TRIV2_DRV_GET_BACK_BUF(drv) (&(TRIV2_DRV_GET_PDATA(drv)->back_buf))
+
+#define TRIV2_GET_CMD_FROM_SLOT(info, slot) \
+ ((struct triv2_cmd *)(info->buf.vaddr + \
+ slot * sizeof(struct triv2_cmd)))
+
+#define TRIV2_GET_REQ(req) (container_of(req, struct triv2_req, req))
+
+#define TRIV2_MAX_TENSORS (16)
+#define TRIV2_MAX_CMD_SIZE (512)
+#define TRIV2_MAX_BATCH_SIZE (32)
+
+#define TRIV2_DLA_GBUFFER_SIZE (0x80000)
+#define TRIV2_DSP_DSPM_OFFSET (0x10000)
+
+#define HALF_PAGE_SIZE (PAGE_SIZE >> 1)
+
+/* 4MiB (~300ns to flush all caches) */
+#define TRIV2_CACHE_FLUSH_THRESHOLD (0x400000)
+#define TRIV2_KERN_TIMEOUT_RESET (1000)
+
+enum triv2_cmd_status {
+ STATUS_CMD_NONE = 0,
+ STATUS_CMD_READY = 1,
+ STATUS_CMD_DONE = 2,
+};
+
+/** req command for triv2 */
+struct triv2_cmd {
+ union {
+ struct {
+ uint32_t slot;
+ uint32_t prog_addr;
+ uint32_t prog_size;
+ uint32_t segt_addr;
+ uint32_t num_visa;
+
+ uint32_t priority;
+ uint32_t status;
+ uint32_t input_mode;
+ uint32_t output_mode;
+
+ /** for profiling */
+ uint32_t profile_offset;
+
+ /** for preemptive scheduling */
+ uint32_t program_position;
+
+ /** for batch processing */
+ uint32_t batch_size;
+ uint32_t curr_cnt;
+ uint32_t in_addr[TRIV2_MAX_BATCH_SIZE];
+ uint32_t out_addr[TRIV2_MAX_BATCH_SIZE];
+ uint32_t poll_addr;
+ uint32_t poll_magic;
+ /* deprecated but keep for backward compatibiltiy */
+ uint32_t in_seg_idx;
+ uint32_t out_seg_idx;
+
+ uint32_t total_cycles;
+
+ /* kernel requests */
+ uint32_t in_extern_seg_num;
+ uint32_t out_extern_seg_num;
+ uint32_t in_extern_seg_idx[TRIV2_MAX_TENSORS];
+ uint32_t out_extern_seg_idx[TRIV2_MAX_TENSORS];
+ };
+ uint8_t reserved[TRIV2_MAX_CMD_SIZE];
+ };
+} __attribute__((packed));
+
+struct triv2_cmd_info {
+ DECLARE_BITMAP(bitmap, TRIV2_MAX_CMDSLOTS);
+ spinlock_t lock;
+
+ struct triv2_req *reqs[TRIV2_MAX_CMDSLOTS];
+ struct triv2_cmd cur_cmd;
+ struct trinity_resv_mem buf;
+};
+
+struct triv2_hashed_cmd_info {
+ struct trinity_driver *drv;
+ struct hlist_bl_node hnode;
+ struct triv2_req *req;
+ struct triv2_cmd *cmd;
+};
+
+struct triv2_kernel_req {
+ uint32_t in_seg_idx[TRIV2_MAX_TENSORS];
+ uint32_t in_seg_size[TRIV2_MAX_TENSORS];
+ uint32_t out_seg_idx[TRIV2_MAX_TENSORS];
+ uint32_t out_seg_size[TRIV2_MAX_TENSORS];
+};
+
+struct triv2_req {
+ struct trinity_req req;
+
+ struct trinity_hwmem_import *seg_import;
+
+ int cmd_slot;
+
+ /** kernel requets */
+ struct triv2_kernel_req *kernel;
+
+ /** profiling */
+ uint32_t profile_offset;
+ uint32_t total_cycles;
+
+ /** misc */
+ uint32_t total_segment_size;
+#ifdef CONFIG_TRINITY_MONITOR
+ struct trinity_monitor_event *event;
+#endif
+};
+
+struct triv2_idu {
+ phys_addr_t *addrs;
+ size_t addr_num;
+ struct trinity_resv_mem data;
+ struct trinity_resv_mem code;
+ dma_addr_t dspm;
+};
+
+struct triv2_pdata {
+ struct trinity_driver *drv;
+ struct list_head list;
+
+ /* idu */
+ struct triv2_idu idu_cp;
+ struct triv2_idu idu_dsp;
+ uint32_t idu_version;
+
+ /* command info. */
+ struct triv2_cmd_info cmd_info;
+
+ /* context switching */
+ struct trinity_resv_mem back_buf;
+
+ /* profiling */
+ struct trinity_resv_mem prof_buf;
+ struct mutex prof_lock;
+ DECLARE_HASHTABLE(prof_htable, TRIV2_PROFILE_HASH_BITS);
+};
+
+static void triv2_handle_cmd_done(struct trinity_driver *drv,
+ struct triv2_cmd *cmd, bool timeout);
+static void triv2_setup_buffers(struct trinity_driver *drv);
+static int triv2_idu_load(struct trinity_driver *drv, const char *dirpath,
+ bool load_files);
+
+static LIST_HEAD(triv2_driver_list);
+static struct hlist_bl_head triv2_model_node_hlist[TRIV2_MODEL_HASH_SIZE];
+static const char *triv2_op_names[] = { TRIV2_FOREACH_OPNAME(
+ TRIV2_GENERATE_OPNAME) };
+
+static void triv2_map_sched_data(struct trinity_driver *drv,
+ struct trinity_req *req, struct triv2_cmd *cmd,
+ void *sched_data);
+static void triv2_unmap_sched_data(struct trinity_driver *drv,
+ struct triv2_req *req,
+ struct triv2_cmd *cmd);
+
+static struct triv2_profile *
+triv2_find_profile(const struct trinity_driver *drv, int req_id)
+{
+ struct triv2_pdata *pdata = TRIV2_DRV_GET_PDATA(drv);
+ unsigned long key = TRIV2_PROFILE_HASH_KEY(req_id);
+ struct triv2_profile *profile = NULL;
+
+ hash_for_each_possible (pdata->prof_htable, profile, hlist, key) {
+ if (profile->req_id == req_id)
+ break;
+ }
+
+ return profile;
+}
+
+static void triv2_fini_profile(struct trinity_resv_mem *prof_buf)
+{
+ if (!prof_buf->vaddr)
+ return;
+
+ trinity_free_from_resv_mem(prof_buf, false);
+ memset(prof_buf, '\x00', sizeof(*prof_buf));
+}
+
+static void triv2_init_profile(struct trinity_driver *drv,
+ unsigned long profile_size)
+{
+ struct device *dev = drv_to_dev_ptr(drv);
+ struct trinity_resv_mem *prof_buf = TRIV2_DRV_GET_PROF_BUF(drv);
+
+ if (profile_size > 0) {
+ /* allocate profile buffer and enable it */
+ struct iommu_domain *domain;
+ phys_addr_t paddr;
+ int status;
+
+ triv2_fini_profile(prof_buf);
+
+ profile_size = PAGE_ALIGN(profile_size);
+ status = trinity_alloc_from_resv_mem(profile_size, prof_buf,
+ false);
+ if (status < 0) {
+ dev_err(dev,
+ "Couldn't allocate memory for profiling buffer: %d",
+ status);
+ return;
+ }
+
+ domain = iommu_get_domain_for_dev(drv_to_dev_ptr(drv));
+ paddr = trinity_get_paddr(domain, prof_buf->daddr);
+ iowrite32(TRIV2_IDU_ADDR(paddr),
+ trinity_get_iomem_addr(drv->mmreg_vaddr[0],
+ OFFSET_NPU_PROF_ADDR));
+ iowrite32(prof_buf->size,
+ trinity_get_iomem_addr(drv->mmreg_vaddr[0],
+ OFFSET_NPU_PROF_SIZE));
+
+ if (drv->verbose)
+ dev_info(dev, "Profiling enabled (%ld bytes)",
+ profile_size);
+ } else {
+ /* disable profiling */
+ triv2_fini_profile(prof_buf);
+
+ iowrite32(0, trinity_get_iomem_addr(drv->mmreg_vaddr[0],
+ OFFSET_NPU_PROF_ADDR));
+ iowrite32(0, trinity_get_iomem_addr(drv->mmreg_vaddr[0],
+ OFFSET_NPU_PROF_SIZE));
+ if (drv->verbose)
+ dev_info(dev, "Profiling disabled");
+ }
+}
+
+static void triv2_assign_opnames(struct triv2_cmd_profile *cmd)
+{
+ struct triv2_op_profile *ops = cmd->profile_ops;
+ uint32_t i;
+
+ for (i = 0; i < cmd->total_ops; i++)
+ snprintf(ops[i].op_name, TRIV2_MAX_OPNAME, "%s",
+ triv2_op_names[ops[i].opcode]);
+}
+
+static int32_t triv2_check_profile(struct trinity_driver *drv,
+ struct trinity_req *req)
+{
+ struct triv2_pdata *pdata = TRIV2_DRV_GET_PDATA(drv);
+ struct triv2_req *t_req = TRIV2_GET_REQ(req);
+ struct trinity_resv_mem *profile_buf;
+ struct triv2_cmd_profile *profile_cmd;
+ struct triv2_cmd_profile *profile_cmd_new;
+ struct triv2_profile *profile;
+
+ uint32_t offset = t_req->profile_offset;
+ uint32_t total_ops, total_size;
+
+ profile_buf = TRIV2_DRV_GET_PROF_BUF(drv);
+ if (!profile_buf->vaddr)
+ return 0;
+
+ if (profile_buf->size <= offset) {
+ dev_err(drv_to_dev_ptr(drv),
+ "Invalid profile offset detected: 0x%x", offset);
+ return -EINVAL;
+ }
+
+ profile_cmd = (struct triv2_cmd_profile *)((char *)profile_buf->vaddr +
+ offset);
+ profile_cmd->total_cycles = t_req->total_cycles;
+
+ total_ops = profile_cmd->total_ops;
+ total_size = sizeof(struct triv2_cmd_profile) +
+ total_ops * sizeof(struct triv2_op_profile);
+
+ profile_cmd_new = vzalloc(total_size);
+ if (!profile_cmd_new) {
+ dev_err(drv_to_dev_ptr(drv),
+ "Failed to allocate profile cmd data\n");
+ return -ENOMEM;
+ }
+
+ mutex_lock(&pdata->prof_lock);
+
+ profile = req->stat->profile;
+ if (profile) {
+ BUG_ON(!profile->data);
+ vfree(profile->data);
+ profile->data = profile_cmd_new;
+ } else {
+ int req_id = req->input.config.req_id;
+ unsigned long key = TRIV2_PROFILE_HASH_KEY(req_id);
+
+ profile = vzalloc(sizeof(struct triv2_profile));
+ if (!profile) {
+ dev_err(drv_to_dev_ptr(drv),
+ "Failed to allocate profile data\n");
+ vfree(profile_cmd_new);
+ mutex_unlock(&pdata->prof_lock);
+ return -ENOMEM;
+ }
+ profile->req_id = req_id;
+ profile->data = profile_cmd_new;
+
+ hash_add(pdata->prof_htable, &profile->hlist, key);
+
+ req->stat->profile = profile;
+ }
+ memcpy(profile_cmd_new, profile_cmd, total_size);
+ triv2_assign_opnames(profile_cmd_new);
+
+ mutex_unlock(&pdata->prof_lock);
+ return 0;
+}
+
+/**
+ * @brief Get state (TRINITY_STATE_READY/TRINITY_STATE_PAUSE) of the device.
+ * @returns (enum triv2_state) TRINITY_STATE_READY (i.e., 1) or TRINITY_STATE_PAUSE (i.e., 0 )
+ * according to the state of the device
+ */
+int32_t triv2_get_state(const struct trinity_driver *drv)
+{
+ if (ioread32(drv->mmreg_vaddr[0] + OFFSET_NPU_CMD_READY) == 1)
+ return TRINITY_STATE_READY;
+
+ return TRINITY_STATE_PAUSE;
+}
+
+/**
+ * @brief Set state of the device to TRINITY_STATE_READY (1) or TRINITY_STATE_PAUSE (0)
+ */
+static void triv2_set_state(const struct trinity_driver *drv,
+ enum trinity_state state)
+{
+ void __iomem *addr;
+
+ switch (state) {
+ case TRINITY_STATE_PAUSE:
+ /* CP */
+ addr = trinity_get_iomem_addr(drv->mmreg_vaddr[0],
+ OFFSET_CP_PROC_SET);
+ trinity_set_bit(BIT_SET_PAUSE, addr);
+ iowrite32(0, addr);
+
+ /* DSP */
+ addr = trinity_get_iomem_addr(drv->mmreg_vaddr[1],
+ OFFSET_DSP_PROC_SET);
+ trinity_set_bit(BIT_SET_PAUSE, addr);
+ iowrite32(0, addr);
+
+ break;
+ case TRINITY_STATE_READY:
+ /* CP */
+ addr = trinity_get_iomem_addr(drv->mmreg_vaddr[0],
+ OFFSET_CP_PROC_CLR);
+ trinity_set_bit(BIT_CLR_PAUSE, addr);
+ iowrite32(0, addr);
+
+ /* DSP */
+ addr = trinity_get_iomem_addr(drv->mmreg_vaddr[1],
+ OFFSET_DSP_PROC_CLR);
+ trinity_set_bit(BIT_CLR_PAUSE, addr);
+ iowrite32(0, addr);
+
+ /* Performance Counter */
+ addr = trinity_get_iomem_addr(drv->mmreg_vaddr[0],
+ OFFSET_CP_CNT_CFG);
+ trinity_set_bit(BIT_CNT_IST_EN | BIT_CNT_FR_EN, addr);
+ break;
+ default:
+ dev_err(drv_to_dev_ptr(drv),
+ "failed to set state of the NPU state: %d", state);
+ }
+}
+
+/**
+ * @brief synchronize the segment table entries
+ */
+static int triv2_sync_segt_entries(const struct trinity_driver *drv,
+ struct triv2_req *req)
+{
+#ifdef ARM
+ struct trinity_input *input = &(req->req.input);
+ int i;
+
+ /* flush all caches for heavy models */
+ if (req->total_segment_size > TRIV2_CACHE_FLUSH_THRESHOLD ||
+ /* cannot handle external segments for kernel requests */
+ req->kernel != NULL) {
+ flush_cache_all();
+ return 0;
+ }
+
+ for (i = 0; i < input->config.num_segments; ++i)
+ __cpuc_flush_dcache_area(req->seg_import[i].addr,
+ req->seg_import[i].buf->size);
+#endif
+ return 0;
+}
+
+static void triv2_wakeup_cp(const struct trinity_driver *drv)
+{
+ void *addr =
+ trinity_get_iomem_addr(drv->mmreg_vaddr[0], OFFSET_CP_PROC_SET);
+#ifdef CONFIG_TRINITY_DEBUG
+ trace_triv2_wakeup_cp(drv->dev_id);
+#endif
+ trinity_set_bit(BIT_SET_SEND_EVT1, addr);
+}
+
+static void triv2_cancel_reqs(struct trinity_driver *drv)
+{
+ struct triv2_cmd_info *info;
+ struct triv2_cmd *cmd;
+ unsigned long flags;
+ int slot;
+
+ info = TRIV2_DRV_GET_CMD_INFO(drv);
+ spin_lock_irqsave(&info->lock, flags);
+
+ slot = find_first_bit(info->bitmap, TRIV2_MAX_CMDSLOTS);
+ while (slot < TRIV2_MAX_CMDSLOTS) {
+ cmd = TRIV2_GET_CMD_FROM_SLOT(info, slot);
+ triv2_handle_cmd_done(drv, cmd, true);
+ slot = find_next_bit(info->bitmap, TRIV2_MAX_CMDSLOTS,
+ slot + 1);
+ }
+
+ spin_unlock_irqrestore(&info->lock, flags);
+}
+
+static void triv2_drain_reqs(struct trinity_driver *drv)
+{
+ struct triv2_cmd_info *info;
+ unsigned long flags;
+ int cur_retries, max_retries = 1000; /* 1-sec */
+ int slot;
+
+ cur_retries = 0;
+ info = TRIV2_DRV_GET_CMD_INFO(drv);
+retry:
+ spin_lock_irqsave(&info->lock, flags);
+
+ /* wait until all bits are unset */
+ slot = find_first_bit(info->bitmap, TRIV2_MAX_CMDSLOTS);
+ if (slot < TRIV2_MAX_CMDSLOTS) {
+ spin_unlock_irqrestore(&info->lock, flags);
+
+ usleep_range(900, 1100);
+ if (cur_retries++ < max_retries)
+ goto retry;
+
+ spin_lock_irqsave(&info->lock, flags);
+ }
+
+ spin_unlock_irqrestore(&info->lock, flags);
+}
+
+static void triv2_reset_devices(struct trinity_driver *drv, bool do_test)
+{
+ trinity_reset_device(drv_to_dev_ptr(drv), do_test);
+
+ triv2_setup_buffers(drv);
+ triv2_idu_load(drv, NULL, false);
+}
+
+static void triv2_reset(struct trinity_driver *drv)
+{
+ struct device *dev = drv_to_dev_ptr(drv);
+ struct triv2_pdata *pdata;
+ bool do_test;
+
+ /* FIXME: The HW reset should handle all the devices simultaneously */
+
+ list_for_each_entry (pdata, &triv2_driver_list, list)
+ mutex_lock(&pdata->drv->lock);
+
+ dev_err(dev, "NPU HW reset started");
+
+ /* block runtime pm suspend */
+ trinity_pm_runtime_forbid(dev);
+
+ /* block new incoming requests first */
+ trinity_sched_suspend();
+
+ /* cancel all requests by force */
+ list_for_each_entry (pdata, &triv2_driver_list, list)
+ triv2_cancel_reqs(pdata->drv);
+
+ /* wait some pending requests in NPU */
+ msleep(100);
+
+ /* reset all devices */
+ do_test = true;
+ list_for_each_entry (pdata, &triv2_driver_list, list) {
+ triv2_reset_devices(pdata->drv, do_test);
+ if (pdata->drv->opened > 0)
+ triv2_set_state(pdata->drv, TRINITY_STATE_READY);
+ do_test = false;
+ }
+
+ /* resume scheduler */
+ trinity_sched_resume();
+
+ trinity_pm_runtime_allow(dev);
+
+ dev_err(dev, "NPU HW reset completed");
+
+ list_for_each_entry (pdata, &triv2_driver_list, list)
+ mutex_unlock(&pdata->drv->lock);
+}
+
+#ifdef CONFIG_TRINITY_MONITOR
+enum triv2_idu_stage {
+ IDU_STAGE_UNKNOWN = 0,
+ IDU_STAGE_WAITING,
+ IDU_STAGE_GET_CMD,
+ IDU_STAGE_RUN_CMD,
+ IDU_STAGE_SWAP_OUT,
+ IDU_STAGE_SWAP_IN,
+ IDU_STAGE_SEND_IRQ,
+};
+
+static const char *triv2_debug_idu_stage(struct trinity_driver *drv)
+{
+ static const char *debug_stage_msg[] = {
+ [IDU_STAGE_UNKNOWN] = "unknown",
+ [IDU_STAGE_WAITING] = "wait event",
+ [IDU_STAGE_GET_CMD] = "get command",
+ [IDU_STAGE_RUN_CMD] = "run command",
+ [IDU_STAGE_SWAP_OUT] = "swap out",
+ [IDU_STAGE_SWAP_IN] = "swap in",
+ [IDU_STAGE_SEND_IRQ] = "send irq",
+ };
+ u32 stage = IDU_STAGE_UNKNOWN;
+
+ if (drv) {
+ stage = ioread32(drv->mmreg_vaddr[0] + OFFSET_NPU_IDU_STAGE);
+ if (stage > IDU_STAGE_SEND_IRQ)
+ stage = IDU_STAGE_UNKNOWN;
+ }
+
+ return debug_stage_msg[stage];
+}
+
+static void triv2_dump_segment_table(struct trinity_driver *drv,
+ struct triv2_req *t_req)
+{
+ struct device *dev = drv_to_dev_ptr(drv);
+ struct trinity_req *req;
+ struct trinity_input *input;
+ struct trinity_hwmem_import *segt_import;
+ u32 i;
+
+ req = &(t_req->req);
+ input = &(req->input);
+ segt_import = &(input->import_info);
+
+ dev_err(dev, "- segment table dump");
+ for (i = 0; i < input->config.num_segments; ++i)
+ dev_err(dev, "\t[%u] = %08x", i,
+ ioread32(segt_import->addr + i * sizeof(u32)));
+}
+
+static void triv2_dump_kernel_request(struct trinity_driver *drv,
+ struct triv2_cmd *cmd)
+{
+ struct device *dev = drv_to_dev_ptr(drv);
+ u32 i;
+
+ if (cmd->batch_size == 0)
+ return;
+
+ dev_err(dev,
+ "- batch_size: %u, curr_cnt: %u, poll_addr: 0x%x, poll_magic: 0x%x",
+ cmd->batch_size, cmd->curr_cnt, cmd->poll_addr,
+ cmd->poll_magic);
+ dev_err(dev, "- extern_input_num: %u, extern_output_num: %u",
+ cmd->in_extern_seg_num, cmd->out_extern_seg_num);
+ for (i = 0; i < cmd->in_extern_seg_num; i++)
+ dev_err(dev, "\tin_seg_idx[%u] = %u", i,
+ cmd->in_extern_seg_idx[i]);
+ for (i = 0; i < cmd->out_extern_seg_num; i++)
+ dev_err(dev, "\tout_seg_idx[%u] = %u", i,
+ cmd->out_extern_seg_idx[i]);
+ for (i = 0; i < cmd->in_extern_seg_num * cmd->batch_size; i++)
+ dev_err(dev, "\tin_addr[%u] = 0x%x", i, cmd->in_addr[i]);
+ for (i = 0; i < cmd->out_extern_seg_num * cmd->batch_size; i++)
+ dev_err(dev, "\tout_addr[%u] = 0x%x", i, cmd->out_addr[i]);
+}
+
+static void triv2_dump_command_slots(struct trinity_driver *drv)
+{
+ struct device *dev;
+ struct triv2_cmd_info *info;
+ struct triv2_cmd *cmd;
+ struct triv2_req *req;
+ u32 slot;
+
+ if (!drv)
+ return;
+
+ dev = drv_to_dev_ptr(drv);
+ info = TRIV2_DRV_GET_CMD_INFO(drv);
+
+#ifdef ARM
+ flush_cache_all();
+#endif
+
+ /* skip lock: just dump all slots */
+ for (slot = 0; slot < TRIV2_MAX_CMDSLOTS; slot++) {
+ cmd = TRIV2_GET_CMD_FROM_SLOT(info, slot);
+
+ /* skip invalid slot */
+ if (cmd->slot != slot)
+ continue;
+
+ req = info->reqs[slot];
+ if (!req)
+ continue;
+
+ dev_err(dev, "slot: %u, num_visa: %u, priority: %u, status: %u",
+ cmd->slot, cmd->num_visa, cmd->priority, cmd->status);
+ dev_err(dev,
+ "- prog_addr: 0x%x prog_size: 0x%x, segt_addr: 0x%x",
+ cmd->prog_addr, cmd->prog_size, cmd->segt_addr);
+
+ triv2_dump_segment_table(drv, req);
+ triv2_dump_kernel_request(drv, cmd);
+ }
+}
+
+static void triv2_dump_npu_mmregs(struct trinity_driver *drv)
+{
+ struct device *dev;
+
+ dev = drv_to_dev_ptr(drv);
+
+ dev_err(dev,
+ "NPU_PROG_BASE: 0x%x, NPU_PC: 0x%x, NPU_PROG_SIZE: 0x%x, NPU_SEGT_ADDR: 0x%x",
+ ioread32(drv->mmreg_vaddr[0] + OFFSET_NPU_PROG_BASE),
+ ioread32(drv->mmreg_vaddr[0] + OFFSET_NPU_PC),
+ ioread32(drv->mmreg_vaddr[0] + OFFSET_NPU_PROG_SIZE),
+ ioread32(drv->mmreg_vaddr[0] + OFFSET_NPU_SEGT_ADDR));
+
+ dev_err(dev,
+ "CP_DMAI_CONTR: 0x%x, CP_DMAI_CMDID: 0x%x, CP_DMAI_LSTID: 0x%x",
+ ioread32(drv->mmreg_vaddr[0] + OFFSET_NPU_CP_DMAI_CONTR),
+ ioread32(drv->mmreg_vaddr[0] + OFFSET_NPU_CP_DMAI_CMDID),
+ ioread32(drv->mmreg_vaddr[0] + OFFSET_NPU_CP_DMAI_LSTID));
+ if (ioread32(drv->mmreg_vaddr[0] + OFFSET_NPU_CP_DMAI_CONTR) > 0) {
+ dev_err(dev,
+ "\tCP_DMAI_EADDR: 0x%x, CP_DMAI_IADDR: 0x%x, CP_DMAI_TSIZE: 0x%x",
+ ioread32(drv->mmreg_vaddr[0] +
+ OFFSET_NPU_CP_DMAI_EADDR),
+ ioread32(drv->mmreg_vaddr[0] +
+ OFFSET_NPU_CP_DMAI_IADDR),
+ ioread32(drv->mmreg_vaddr[0] +
+ OFFSET_NPU_CP_DMAI_TSIZE));
+ }
+
+ dev_err(dev,
+ "DLA_CORE_CTRL: 0x%x, DLA_DMAI_CTRL: 0x%x, DLA_DMAO_CTRL: 0x%x",
+ ioread32(drv->mmreg_vaddr[0] + OFFSET_NPU_DLA_CORE_CTRL),
+ ioread32(drv->mmreg_vaddr[0] + OFFSET_NPU_DLA_DMAI_CTRL),
+ ioread32(drv->mmreg_vaddr[0] + OFFSET_NPU_DLA_DMAO_CTRL));
+ if (ioread32(drv->mmreg_vaddr[0] + OFFSET_NPU_DLA_CORE_CTRL) > 0) {
+ dev_err(dev, "\tDLA_CORE_OPC: 0x%x, DLA_CORE_WIND_CFG: 0x%x",
+ ioread32(drv->mmreg_vaddr[0] + OFFSET_NPU_DLA_CORE_OPC),
+ ioread32(drv->mmreg_vaddr[0] +
+ OFFSET_NPU_DLA_CORE_WIND_CFG));
+ dev_err(dev,
+ "\tDLA_CORE_SIZE0: 0x%x, DLA_CORE_SIZE1: 0x%x, DLA_CORE_ZP: 0x%x",
+ ioread32(drv->mmreg_vaddr[0] +
+ OFFSET_NPU_DLA_CORE_SIZE0),
+ ioread32(drv->mmreg_vaddr[0] +
+ OFFSET_NPU_DLA_CORE_SIZE1),
+ ioread32(drv->mmreg_vaddr[0] + OFFSET_NPU_DLA_CORE_ZP));
+ dev_err(dev,
+ "\tDLA_CORE_OUT_MULT: 0x%x, DLA_CORE_IN0_MULT: 0x%x, DLA_CORE_IN1_MULT: 0x%x",
+ ioread32(drv->mmreg_vaddr[0] +
+ OFFSET_NPU_DLA_CORE_OUT_MULT),
+ ioread32(drv->mmreg_vaddr[0] +
+ OFFSET_NPU_DLA_CORE_IN0_MULT),
+ ioread32(drv->mmreg_vaddr[0] +
+ OFFSET_NPU_DLA_CORE_IN1_MULT));
+ dev_err(dev, "\tDLA_CORE_OUT_CFG: 0x%x, DLA_CORE_OUT_MOD: 0x%x",
+ ioread32(drv->mmreg_vaddr[0] +
+ OFFSET_NPU_DLA_CORE_OUT_CFG),
+ ioread32(drv->mmreg_vaddr[0] +
+ OFFSET_NPU_DLA_CORE_OUT_MOD));
+ dev_err(dev, "\tDLA_CORE_IN0_CFG: 0x%x, DLA_CORE_IN0_MOD: 0x%x",
+ ioread32(drv->mmreg_vaddr[0] +
+ OFFSET_NPU_DLA_CORE_IN0_CFG),
+ ioread32(drv->mmreg_vaddr[0] +
+ OFFSET_NPU_DLA_CORE_IN0_MOD));
+ dev_err(dev, "\tDLA_CORE_IN1_CFG: 0x%x, DLA_CORE_IN1_MOD: 0x%x",
+ ioread32(drv->mmreg_vaddr[0] +
+ OFFSET_NPU_DLA_CORE_IN1_CFG),
+ ioread32(drv->mmreg_vaddr[0] +
+ OFFSET_NPU_DLA_CORE_IN1_MOD));
+ dev_err(dev,
+ "\tDLA_CORE_PARAM_ADDR: 0x%x, DLA_CORE_PSUM_ADDR: 0x%x, DLA_CORE_CWGT_ADDR: 0x%x",
+ ioread32(drv->mmreg_vaddr[0] +
+ OFFSET_NPU_DLA_CORE_PARAM_ADDR),
+ ioread32(drv->mmreg_vaddr[0] +
+ OFFSET_NPU_DLA_CORE_PSUM_ADDR),
+ ioread32(drv->mmreg_vaddr[0] +
+ OFFSET_NPU_DLA_CORE_CWGT_ADDR));
+ }
+ if (ioread32(drv->mmreg_vaddr[0] + OFFSET_NPU_DLA_DMAI_CTRL) > 0) {
+ dev_err(dev,
+ "\tDLA_DMAI_EADDR: 0x%x, DLA_DMAI_EYMOD: 0x%x, DLA_DMAI_EZMOD: 0x%x",
+ ioread32(drv->mmreg_vaddr[0] +
+ OFFSET_NPU_DLA_DMAI_EADDR),
+ ioread32(drv->mmreg_vaddr[0] +
+ OFFSET_NPU_DLA_DMAI_EYMOD),
+ ioread32(drv->mmreg_vaddr[0] +
+ OFFSET_NPU_DLA_DMAI_EZMOD));
+ dev_err(dev,
+ "\tDLA_DMAI_IADDR: 0x%x, DLA_DMAI_IYMOD: 0x%x, DLA_DMAI_IZMOD: 0x%x",
+ ioread32(drv->mmreg_vaddr[0] +
+ OFFSET_NPU_DLA_DMAI_IADDR),
+ ioread32(drv->mmreg_vaddr[0] +
+ OFFSET_NPU_DLA_DMAI_IYMOD),
+ ioread32(drv->mmreg_vaddr[0] +
+ OFFSET_NPU_DLA_DMAI_IZMOD));
+ dev_err(dev, "\tDLA_DMAI_SIZE0: 0x%x, DLA_DMAI_SIZE1: 0x%x",
+ ioread32(drv->mmreg_vaddr[0] +
+ OFFSET_NPU_DLA_DMAI_SIZE0),
+ ioread32(drv->mmreg_vaddr[0] +
+ OFFSET_NPU_DLA_DMAI_SIZE1));
+ }
+ if (ioread32(drv->mmreg_vaddr[0] + OFFSET_NPU_DLA_DMAO_CTRL) > 0) {
+ dev_err(dev,
+ "\tDLA_DMAO_EADDR: 0x%x, DLA_DMAO_EYMOD: 0x%x, DLA_DMAO_EZMOD: 0x%x",
+ ioread32(drv->mmreg_vaddr[0] +
+ OFFSET_NPU_DLA_DMAO_EADDR),
+ ioread32(drv->mmreg_vaddr[0] +
+ OFFSET_NPU_DLA_DMAO_EYMOD),
+ ioread32(drv->mmreg_vaddr[0] +
+ OFFSET_NPU_DLA_DMAO_EZMOD));
+ dev_err(dev,
+ "\tDLA_DMAO_IADDR: 0x%x, DLA_DMAO_IYMOD: 0x%x, DLA_DMAO_IZMOD: 0x%x",
+ ioread32(drv->mmreg_vaddr[0] +
+ OFFSET_NPU_DLA_DMAO_IADDR),
+ ioread32(drv->mmreg_vaddr[0] +
+ OFFSET_NPU_DLA_DMAO_IYMOD),
+ ioread32(drv->mmreg_vaddr[0] +
+ OFFSET_NPU_DLA_DMAO_IZMOD));
+ dev_err(dev, "\tDLA_DMAO_SIZE0: 0x%x, DLA_DMAO_SIZE1: 0x%x",
+ ioread32(drv->mmreg_vaddr[0] +
+ OFFSET_NPU_DLA_DMAO_SIZE0),
+ ioread32(drv->mmreg_vaddr[0] +
+ OFFSET_NPU_DLA_DMAO_SIZE1));
+ }
+
+ dev_err(dev,
+ "DSP_CORE_CTRL: 0x%x, DSP_DMAI_CTRL: 0x%x, DSP_DMAO_CTRL: 0x%x",
+ ioread32(drv->mmreg_vaddr[0] + OFFSET_NPU_DSP_CORE_CTRL),
+ ioread32(drv->mmreg_vaddr[0] + OFFSET_NPU_DSP_DMAI_CTRL),
+ ioread32(drv->mmreg_vaddr[0] + OFFSET_NPU_DSP_DMAO_CTRL));
+ if (ioread32(drv->mmreg_vaddr[0] + OFFSET_NPU_DSP_DMAI_CTRL) > 0) {
+ dev_err(dev,
+ "\tDSP_DMAI_EADDR: 0x%x, DSP_DMAI_EYMOD: 0x%x, DSP_DMAI_EZMOD: 0x%x",
+ ioread32(drv->mmreg_vaddr[0] +
+ OFFSET_NPU_DSP_DMAI_EADDR),
+ ioread32(drv->mmreg_vaddr[0] +
+ OFFSET_NPU_DSP_DMAI_EYMOD),
+ ioread32(drv->mmreg_vaddr[0] +
+ OFFSET_NPU_DSP_DMAI_EZMOD));
+ dev_err(dev,
+ "\tDSP_DMAI_IADDR: 0x%x, DSP_DMAI_IYMOD: 0x%x, DSP_DMAI_IZMOD: 0x%x",
+ ioread32(drv->mmreg_vaddr[0] +
+ OFFSET_NPU_DSP_DMAI_IADDR),
+ ioread32(drv->mmreg_vaddr[0] +
+ OFFSET_NPU_DSP_DMAI_IYMOD),
+ ioread32(drv->mmreg_vaddr[0] +
+ OFFSET_NPU_DSP_DMAI_IZMOD));
+ dev_err(dev, "\tDSP_DMAI_SIZE0: 0x%x, DSP_DMAI_SIZE1: 0x%x",
+ ioread32(drv->mmreg_vaddr[0] +
+ OFFSET_NPU_DSP_DMAI_SIZE0),
+ ioread32(drv->mmreg_vaddr[0] +
+ OFFSET_NPU_DSP_DMAI_SIZE1));
+ }
+ if (ioread32(drv->mmreg_vaddr[0] + OFFSET_NPU_DSP_DMAO_CTRL) > 0) {
+ dev_err(dev,
+ "\tDSP_DMAO_EADDR: 0x%x, DSP_DMAO_EYMOD: 0x%x, DSP_DMAO_EZMOD: 0x%x",
+ ioread32(drv->mmreg_vaddr[0] +
+ OFFSET_NPU_DSP_DMAO_EADDR),
+ ioread32(drv->mmreg_vaddr[0] +
+ OFFSET_NPU_DSP_DMAO_EYMOD),
+ ioread32(drv->mmreg_vaddr[0] +
+ OFFSET_NPU_DSP_DMAO_EZMOD));
+ dev_err(dev,
+ "\tDSP_DMAO_IADDR: 0x%x, DSP_DMAO_IYMOD: 0x%x, DSP_DMAO_IZMOD: 0x%x",
+ ioread32(drv->mmreg_vaddr[0] +
+ OFFSET_NPU_DSP_DMAO_IADDR),
+ ioread32(drv->mmreg_vaddr[0] +
+ OFFSET_NPU_DSP_DMAO_IYMOD),
+ ioread32(drv->mmreg_vaddr[0] +
+ OFFSET_NPU_DSP_DMAO_IZMOD));
+ dev_err(dev, "\tDSP_DMAO_SIZE0: 0x%x, DSP_DMAO_SIZE1: 0x%x",
+ ioread32(drv->mmreg_vaddr[0] +
+ OFFSET_NPU_DSP_DMAO_SIZE0),
+ ioread32(drv->mmreg_vaddr[0] +
+ OFFSET_NPU_DSP_DMAO_SIZE1));
+ }
+}
+
+static void triv2_monitor_timeout_cb(void *data)
+{
+ struct trinity_driver *drv;
+ struct device *dev;
+
+ if (!data)
+ return;
+
+ drv = data;
+ dev = drv_to_dev_ptr(drv);
+
+ dev_err(dev, "Request timeout detected (device_id: %u, stage: %s)",
+ drv->dev_id, triv2_debug_idu_stage(drv));
+
+ triv2_dump_npu_mmregs(drv);
+ triv2_dump_command_slots(drv);
+
+#ifdef CONFIG_SR_NPU_IOMMU
+ /* reset devices when iommu fault is detected */
+ if (report_iommu_fault(iommu_get_domain_for_dev(dev), dev, 0, 1) > 0)
+ triv2_reset(drv);
+#endif
+}
+
+static void triv2_append_monitor_event(struct triv2_req *req)
+{
+ struct trinity_monitor_event *event;
+
+ /* for kernel requests only (i.e., high priority) */
+ if (!req->kernel)
+ return;
+
+ event = trinity_monitor_get_event();
+ if (!event)
+ return;
+
+ event->start_time = req->req.stat->scheduled;
+ event->timeout_ms = TRIV2_KERN_TIMEOUT_RESET;
+ event->cb = triv2_monitor_timeout_cb;
+ event->cb_data = req->req.drv;
+
+ req->event = event;
+ trinity_monitor_add_event(event);
+}
+#endif
+
+/**
+ * @brief trigger memory-mapped register for inference running
+ */
+static void triv2_run_trigger(const struct trinity_driver *drv, int slot)
+{
+ struct triv2_cmd_info *cmd_info = TRIV2_DRV_GET_CMD_INFO(drv);
+ struct triv2_req *t_req = cmd_info->reqs[slot];
+
+#ifdef CONFIG_TRINITY_DEBUG
+ trace_triv2_run_trigger(drv->dev_id, slot);
+#endif
+ if (!t_req) {
+ dev_err(drv_to_dev_ptr(drv),
+ "Unable to find the corresponding req");
+ return;
+ }
+
+ if (triv2_sync_segt_entries(drv, t_req) < 0)
+ dev_err(drv_to_dev_ptr(drv),
+ "Unable to sync the segment table");
+
+ /* sync the current bitmap */
+ iowrite32(*cmd_info->bitmap,
+ trinity_get_iomem_addr(drv->mmreg_vaddr[0],
+ OFFSET_NPU_CMD_REQ));
+
+ t_req->req.stat->scheduled = ktime_get();
+ t_req->req.stat->completed = 0;
+
+#ifdef CONFIG_TRINITY_MONITOR
+ triv2_append_monitor_event(t_req);
+#endif
+
+ /* trigger the event (we do not assume that IDU always accepts this event) */
+ triv2_wakeup_cp(drv);
+}
+
+static void triv2_clear_cmd(struct trinity_driver *drv, struct triv2_req *req,
+ struct triv2_cmd *cmd)
+{
+ struct triv2_cmd_info *cmd_info = TRIV2_DRV_GET_CMD_INFO(drv);
+
+ cmd_info->reqs[req->cmd_slot] = NULL;
+ clear_bit(req->cmd_slot, cmd_info->bitmap);
+ req->cmd_slot = -1;
+
+ memset_io(cmd, '\x00', sizeof(struct triv2_cmd));
+}
+
+static void triv2_handle_cmd_done(struct trinity_driver *drv,
+ struct triv2_cmd *cmd, bool timeout)
+{
+ struct device *dev = drv_to_dev_ptr(drv);
+ struct triv2_cmd_info *cmd_info = TRIV2_DRV_GET_CMD_INFO(drv);
+ struct triv2_req *t_req;
+ struct trinity_req *req;
+ struct trinity_sched_desc *sched;
+ uint32_t slot = cmd->slot;
+ int64_t time_diff;
+
+ t_req = cmd_info->reqs[slot];
+ if (!t_req) {
+ dev_err(dev, "Failed to find the req\n");
+ return;
+ }
+
+#ifdef CONFIG_TRINITY_MONITOR
+ if (t_req->event)
+ atomic_set(&t_req->event->marker, 1);
+#endif
+
+ req = &(t_req->req);
+ req->stat->completed = ktime_get();
+ req->stat->status = TRINITY_REQ_STATUS_FINISHED;
+
+ time_diff = TIME_DIFF_US(req->stat->completed, req->stat->scheduled);
+ if (time_diff < 0) {
+ dev_warn(dev, "Detected invalid inference time of request\n");
+ } else {
+ req->stat->prev_time = (uint32_t)time_diff;
+ req->stat->prev_cycles = cmd->total_cycles;
+ req->stat->num_runs++;
+ req->stat->total_time += req->stat->prev_time;
+#ifdef CONFIG_TRINITY_DEBUG
+ trace_triv2_handle_cmd_done(drv->dev_id, cmd->slot,
+ cmd->total_cycles,
+ req->stat->prev_time);
+#endif
+ }
+
+ t_req->total_cycles = cmd->total_cycles;
+ t_req->profile_offset = cmd->profile_offset;
+
+ triv2_unmap_sched_data(drv, t_req, cmd);
+ triv2_clear_cmd(drv, t_req, cmd);
+
+ /* notify to the scheduler */
+ sched = get_trinity_sched(req);
+ if (sched && sched->notify)
+ sched->notify(req, timeout);
+
+ /* notify to the caller */
+ if (!req->is_kernel)
+ complete_all(&req->complete);
+}
+
+/**
+ * @brief Prepare command info. for the target req before invoking
+ */
+static int32_t triv2_prepare_cmd(struct trinity_driver *drv,
+ struct trinity_req *req, void *sched_data)
+{
+ struct triv2_cmd_info *cmd_info;
+ struct triv2_cmd cmd = { 0 };
+ struct triv2_req *t;
+
+ const struct trinity_model *model = req->model;
+ const struct trinity_input *input = &req->input;
+
+ int32_t slot;
+ struct iommu_domain *domain;
+ phys_addr_t paddr;
+ unsigned long flags;
+
+ /** Note that the program base is not behind iommu */
+ domain = iommu_get_domain_for_dev(drv_to_dev_ptr(drv));
+
+ paddr = trinity_get_paddr(domain, model->import_info.dma_addr);
+ cmd.prog_addr = TRIV2_IDU_ADDR(paddr);
+ cmd.prog_addr += model->config.program_offset_addr;
+ cmd.prog_size = model->config.program_size;
+
+ paddr = trinity_get_paddr(domain, input->import_info.dma_addr);
+ cmd.segt_addr = TRIV2_IDU_ADDR(paddr);
+ cmd.num_visa = model->config.num_visa_insts;
+
+ cmd.priority = input->config.priority;
+ cmd.input_mode = input->config.input_mode;
+ cmd.output_mode = input->config.output_mode;
+
+ /** Find a empty cmd slot in bitmap (need a spin lock) */
+ cmd_info = TRIV2_DRV_GET_CMD_INFO(drv);
+ t = TRIV2_GET_REQ(req);
+
+ spin_lock_irqsave(&cmd_info->lock, flags);
+
+ slot = find_first_zero_bit(cmd_info->bitmap, TRIV2_MAX_CMDSLOTS);
+ if (slot < TRIV2_MAX_CMDSLOTS) {
+ set_bit(slot, cmd_info->bitmap);
+ cmd_info->reqs[slot] = t;
+ t->cmd_slot = slot;
+ }
+
+ spin_unlock_irqrestore(&cmd_info->lock, flags);
+
+ /** Will be retried (rely on platform device's scheduling) */
+ if (slot >= TRIV2_MAX_CMDSLOTS)
+ return -EBUSY;
+
+ cmd.slot = slot;
+ cmd.status = STATUS_CMD_READY;
+
+ if (req->is_kernel && sched_data)
+ triv2_map_sched_data(drv, req, &cmd, sched_data);
+
+ memcpy_toio(cmd_info->buf.vaddr + slot * sizeof(struct triv2_cmd), &cmd,
+ sizeof(struct triv2_cmd));
+
+ return slot;
+}
+
+static dma_addr_t triv2_map_iommu_extern(struct device *dev,
+ struct trinity_req *req,
+ phys_addr_t paddr, size_t size)
+{
+ struct iommu_domain *domain;
+ enum dma_data_direction dir;
+ unsigned attrs = 0;
+
+ domain = iommu_get_domain_for_dev(dev);
+ if (!domain)
+ return (dma_addr_t)paddr;
+
+ dir = DMA_BIDIRECTIONAL;
+ attrs |= DMA_ATTR_WRITE_COMBINE;
+ attrs |= DMA_ATTR_SKIP_CPU_SYNC;
+ attrs |= DMA_ATTR_FORCE_CONTIGUOUS;
+
+ if (req->skip_iommu_mapping) {
+ phys_addr_t extern_paddr;
+ dma_addr_t extern_daddr;
+ size_t extern_size;
+ unsigned long offset;
+
+ /* fallback to original iommu mapping on erroneous cases */
+ if (trinity_get_extern_memory(dev, &extern_paddr, &extern_daddr,
+ &extern_size) != 0)
+ goto out;
+ if (unlikely(extern_paddr > paddr))
+ goto out;
+ if (unlikely(extern_paddr + extern_size < paddr + size))
+ goto out;
+
+ offset = (unsigned long)(paddr - extern_paddr);
+ return extern_daddr + offset;
+ }
+
+out:
+ req->skip_iommu_mapping = false;
+ return dma_map_resource(dev, paddr, size, dir, attrs);
+}
+
+static void triv2_unmap_iommu_extern(struct device *dev,
+ struct trinity_req *req, dma_addr_t daddr,
+ size_t size)
+{
+ struct iommu_domain *domain;
+ enum dma_data_direction dir;
+ unsigned attrs = 0;
+
+ domain = iommu_get_domain_for_dev(dev);
+ if (!domain)
+ return;
+
+ if (req->skip_iommu_mapping)
+ return;
+
+ dir = DMA_BIDIRECTIONAL;
+ attrs |= DMA_ATTR_WRITE_COMBINE;
+ attrs |= DMA_ATTR_SKIP_CPU_SYNC;
+ attrs |= DMA_ATTR_FORCE_CONTIGUOUS;
+
+ dma_unmap_resource(dev, daddr, size, dir, attrs);
+}
+
+static void triv2_map_sched_data(struct trinity_driver *drv,
+ struct trinity_req *req, struct triv2_cmd *cmd,
+ void *sched_data)
+{
+ struct device *dev = drv_to_dev_ptr(drv);
+ struct inout_addr_info *addr_info;
+ struct triv2_req *t_req;
+ struct triv2_kernel_req *k_req;
+ uint32_t i, j, offset;
+
+ t_req = TRIV2_GET_REQ(req);
+ addr_info = (struct inout_addr_info *)sched_data;
+
+ cmd->batch_size = addr_info->batch_size;
+ cmd->curr_cnt = 0;
+ cmd->poll_addr = addr_info->poll_addr;
+ cmd->poll_magic = req->poll_magic;
+ cmd->in_extern_seg_num = addr_info->in_cnt;
+ cmd->out_extern_seg_num = addr_info->out_cnt;
+
+ k_req = t_req->kernel;
+ for (i = 0; i < addr_info->batch_size; i++) {
+ /* input extern segment */
+ offset = i * addr_info->in_cnt;
+ for (j = 0; j < addr_info->in_cnt; j++)
+ cmd->in_addr[offset + j] = triv2_map_iommu_extern(
+ dev, req, addr_info->in_addr[offset + j],
+ k_req->in_seg_size[j]);
+ /* output extern segment */
+ offset = i * addr_info->out_cnt;
+ for (j = 0; j < addr_info->out_cnt; j++)
+ cmd->out_addr[offset + j] = triv2_map_iommu_extern(
+ dev, req, addr_info->out_addr[offset + j],
+ k_req->out_seg_size[j]);
+ }
+ /* index for extern segments */
+ for (i = 0; i < cmd->in_extern_seg_num; i++)
+ cmd->in_extern_seg_idx[i] = k_req->in_seg_idx[i];
+ for (i = 0; i < cmd->out_extern_seg_num; i++)
+ cmd->out_extern_seg_idx[i] = k_req->out_seg_idx[i];
+
+ trace_triv2_map_sched_data(drv->dev_id, cmd->slot, cmd->batch_size,
+ cmd->in_extern_seg_num,
+ cmd->out_extern_seg_num);
+}
+
+static void triv2_unmap_sched_data(struct trinity_driver *drv,
+ struct triv2_req *t_req,
+ struct triv2_cmd *cmd)
+{
+ struct device *dev = drv_to_dev_ptr(drv);
+ struct trinity_req *req = &(t_req->req);
+ struct triv2_kernel_req *k_req;
+ uint32_t i, j, offset;
+
+ /* only for kernel request */
+ if (!req->is_kernel)
+ return;
+
+ k_req = t_req->kernel;
+ for (i = 0; i < cmd->batch_size; i++) {
+ offset = i * cmd->in_extern_seg_num;
+ for (j = 0; j < cmd->in_extern_seg_num; j++)
+ triv2_unmap_iommu_extern(dev, req,
+ cmd->in_addr[offset + j],
+ k_req->in_seg_size[j]);
+
+ offset = i * cmd->out_extern_seg_num;
+ for (j = 0; j < cmd->out_extern_seg_num; j++)
+ triv2_unmap_iommu_extern(dev, req,
+ cmd->out_addr[offset + j],
+ k_req->out_seg_size[j]);
+ }
+
+ trace_triv2_unmap_sched_data(drv->dev_id, cmd->slot);
+}
+
+/**
+ * @brief Invoke a req on the device. Note that all configurations
+ * required by running should be done before invocation of this function.
+ */
+static int32_t triv2_invoke_req(struct trinity_driver *drv,
+ struct trinity_req *req, void *sched_data)
+{
+ enum trinity_output_mode mode = req->input.config.output_mode;
+ int32_t slot = triv2_prepare_cmd(drv, req, sched_data);
+ if (slot < 0)
+ return slot;
+
+ if (mode == TRINITY_OUTPUT_HW || mode == TRINITY_OUTPUT_CPU_POLL ||
+ mode == TRINITY_OUTPUT_CPU_INTR) {
+ triv2_run_trigger(drv, slot);
+ } else {
+ dev_err(drv_to_dev_ptr(drv), "Invalid output mode: %d\n", mode);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static struct trinity_req *triv2_alloc_req(struct trinity_driver *drv)
+{
+ struct triv2_req *t_req;
+
+ t_req = kzalloc(sizeof(struct triv2_req), GFP_KERNEL);
+ if (!t_req)
+ return NULL;
+
+ t_req->cmd_slot = -1;
+
+ return &(t_req->req);
+}
+
+static void triv2_dealloc_req(struct trinity_driver *drv,
+ struct trinity_req *req)
+{
+ struct triv2_req *t_req = TRIV2_GET_REQ(req);
+
+ if (t_req->seg_import) {
+ struct trinity_hwmem_import *import;
+ uint32_t i;
+ for (i = 0; i < req->input.config.num_segments; i++) {
+ import = &(t_req->seg_import[i]);
+ if (import->addr)
+ trinity_hwmem_import_dmabuf_end(import);
+ }
+ kfree(t_req->seg_import);
+ }
+ if (t_req->kernel)
+ kfree(t_req->kernel);
+ kfree(t_req);
+}
+
+static void triv2_handle_timeout(struct trinity_driver *drv,
+ struct trinity_req *req)
+{
+ struct triv2_cmd_info *cmd_info = TRIV2_DRV_GET_CMD_INFO(drv);
+ struct triv2_cmd *cmd;
+ struct triv2_req *t;
+ unsigned long flags;
+
+#ifdef CONFIG_TRINITY_FPGA_DEBUG
+ trinity_hwmem_iommu_print_status(drv_to_dev_ptr(drv));
+#endif
+
+ t = TRIV2_GET_REQ(req);
+
+ spin_lock_irqsave(&cmd_info->lock, flags);
+ if (t->cmd_slot >= 0) {
+ /* Timeout! check whether it's not handled in irq handler */
+ cmd = TRIV2_GET_CMD_FROM_SLOT(cmd_info, t->cmd_slot);
+ triv2_handle_cmd_done(drv, cmd, true);
+ }
+ spin_unlock_irqrestore(&cmd_info->lock, flags);
+}
+
+/**
+ * @brief stop the submitted reqs to the driver.
+ * In case of already-executed req, each device needs to determine the policy
+ * depending its capability to terminate the running one.
+ */
+static void triv2_stop_reqs(struct work_struct *work)
+{
+ struct trinity_driver *drv;
+
+ drv = container_of(work, struct trinity_driver, work_stop);
+ if (drv == NULL)
+ return;
+
+ triv2_cancel_reqs(drv);
+}
+
+/**
+ * @brief get profile metadata for the target req
+ */
+static int32_t triv2_get_profile_meta(const struct trinity_driver *drv,
+ struct trinity_ioctl_profile_meta *meta)
+{
+ struct triv2_pdata *pdata = TRIV2_DRV_GET_PDATA(drv);
+ struct triv2_profile *profile;
+ struct triv2_cmd_profile *profile_data;
+ int ret = 0;
+
+ mutex_lock(&pdata->prof_lock);
+
+ profile = triv2_find_profile(drv, meta->req_id);
+ if (!profile) {
+ ret = -ENOENT;
+ goto out;
+ }
+
+ profile_data = profile->data;
+ BUG_ON(!profile_data);
+
+ meta->total_cycles = profile_data->total_cycles;
+ meta->total_ops = profile_data->total_ops;
+ meta->profile_size =
+ profile_data->total_ops * sizeof(struct triv2_op_profile);
+ /* unsupported for now */
+ meta->input_footprint = -1;
+ meta->output_footprint = -1;
+
+out:
+ mutex_unlock(&pdata->prof_lock);
+
+ return ret;
+}
+
+/**
+ * @brief get profile buffer for the target req
+ */
+static int32_t triv2_get_profile_buff(const struct trinity_driver *drv,
+ struct trinity_ioctl_profile_buff *buff)
+{
+ struct triv2_pdata *pdata = TRIV2_DRV_GET_PDATA(drv);
+ struct triv2_profile *profile;
+ struct triv2_cmd_profile *profile_data;
+ uint32_t total_size;
+ int ret = 0;
+
+ mutex_lock(&pdata->prof_lock);
+
+ profile = triv2_find_profile(drv, buff->req_id);
+ if (!profile) {
+ ret = -ENOENT;
+ goto out;
+ }
+
+ profile_data = profile->data;
+ BUG_ON(!profile_data);
+
+ profile_data = profile->data;
+ total_size = profile_data->total_ops * sizeof(struct triv2_op_profile);
+
+ if (buff->profile_pos + buff->profile_size > total_size) {
+ dev_err(drv_to_dev_ptr(drv),
+ "Profile data out-of-range! pos(%u) size(%u) > total_size(%u)",
+ buff->profile_pos, buff->profile_size, total_size);
+ ret = -ERANGE;
+ goto out;
+ }
+
+ /* consider partial memory copies */
+ if (copy_to_user((char __user *)buff->profile_buf,
+ (char *)profile_data->profile_ops + buff->profile_pos,
+ buff->profile_size))
+ ret = -EACCES;
+
+out:
+ mutex_unlock(&pdata->prof_lock);
+
+ return ret;
+}
+
+static void triv2_show_profile(const struct trinity_driver *drv, int req_id)
+{
+ struct device *dev = drv_to_dev_ptr(drv);
+ struct triv2_pdata *pdata = TRIV2_DRV_GET_PDATA(drv);
+ struct triv2_profile *profile;
+ struct triv2_cmd_profile *profile_data;
+ uint32_t i;
+
+ mutex_lock(&pdata->prof_lock);
+
+ profile = triv2_find_profile(drv, req_id);
+ if (!profile) {
+ dev_warn(dev, "Unable to find the profile data (req_id %d)",
+ req_id);
+ goto out;
+ }
+
+ profile_data = profile->data;
+ BUG_ON(!profile_data);
+
+ dev_info(dev, "Total cycles: %lld", profile_data->total_cycles);
+ dev_info(dev, "Total ops: %u", profile_data->total_ops);
+
+ for (i = 0; i < profile_data->total_ops; i++) {
+ struct triv2_op_profile *op = &profile_data->profile_ops[i];
+
+ dev_info(dev, "[%u] opcode: %u name:%s", i, op->opcode,
+ op->op_name);
+ dev_info(dev, "\tcycles: %lld", op->cycles);
+ dev_info(dev, "\tprog_seq: %lld", op->prog_seq);
+ dev_info(dev, "\texec_seq: %lld", op->exec_seq);
+ if (op->dram_read > 0)
+ dev_info(dev, "\tdram_read: %lld", op->dram_read);
+ if (op->dram_write > 0)
+ dev_info(dev, "\tdram_write: %lld", op->dram_write);
+ if (op->sram_read > 0)
+ dev_info(dev, "\tsram_read: %lld", op->sram_read);
+ if (op->sram_write > 0)
+ dev_info(dev, "\tsram_write: %lld", op->sram_write);
+ }
+out:
+ mutex_unlock(&pdata->prof_lock);
+}
+
+/**
+ * @brief destroy profile data
+ */
+static void triv2_destroy_profile(const struct trinity_driver *drv, void *data)
+{
+ struct triv2_pdata *pdata = TRIV2_DRV_GET_PDATA(drv);
+ struct triv2_profile *profile = data;
+ struct triv2_cmd_profile *profile_data;
+
+ if (!profile)
+ return;
+
+ mutex_lock(&pdata->prof_lock);
+
+ profile_data = profile->data;
+ BUG_ON(!profile_data);
+ vfree(profile_data);
+
+ hash_del(&profile->hlist);
+ vfree(profile);
+
+ mutex_unlock(&pdata->prof_lock);
+}
+
+static void triv2_handle_irq_cmds(struct trinity_driver *drv)
+{
+ struct triv2_cmd_info *info;
+ struct triv2_cmd *cmd;
+ unsigned long flags;
+ int slot;
+
+ info = TRIV2_DRV_GET_CMD_INFO(drv);
+ spin_lock_irqsave(&info->lock, flags);
+
+ /** Search the bitmap to find the completed CMDs */
+ slot = find_first_bit(info->bitmap, TRIV2_MAX_CMDSLOTS);
+ while (slot < TRIV2_MAX_CMDSLOTS) {
+ cmd = TRIV2_GET_CMD_FROM_SLOT(info, slot);
+ if (cmd->status == STATUS_CMD_DONE)
+ triv2_handle_cmd_done(drv, cmd, false);
+ slot = find_next_bit(info->bitmap, TRIV2_MAX_CMDSLOTS,
+ slot + 1);
+ }
+
+ spin_unlock_irqrestore(&info->lock, flags);
+}
+
+/**
+ * @brief An IRQ handler to be called when a registered IRQ (IRQ_OUT) occurs.
+ */
+static irqreturn_t triv2_handle_irq(int irq_no, void *dev_id)
+{
+ struct miscdevice *_mdev;
+ struct trinity_driver *drv;
+ void __iomem *addr;
+ uint32_t interrupt;
+ uint32_t reg;
+
+ _mdev = (struct miscdevice *)dev_id;
+ drv = container_of(_mdev, struct trinity_driver, mdev);
+
+#ifdef CONFIG_TRINITY_DEBUG
+ trace_triv2_handle_irq(drv->dev_id, irq_no);
+#endif
+
+ /**
+ * Verify that the IRQ is actually from the NPU
+ * This is required as IRQ_SHARED is used when setting up IRQ
+ */
+ addr = trinity_get_iomem_addr(drv->mmreg_vaddr[2],
+ OFFSET_CBOX_EXT_IRQ_STA);
+ reg = ioread32(addr);
+
+ interrupt = reg & MASK_CP_SWI_STA;
+ if (interrupt == 0)
+ return IRQ_NONE;
+
+ /** Clear the interrupt first */
+ addr = trinity_get_iomem_addr(drv->mmreg_vaddr[2],
+ OFFSET_CBOX_CP_SWI_CLR);
+ iowrite32(1, addr);
+
+ triv2_handle_irq_cmds(drv);
+ return IRQ_HANDLED;
+}
+
+/**
+ * @brief evaluate the physical address of entries in the segment table
+ */
+static int32_t triv2_prepare_req(struct trinity_driver *drv,
+ struct trinity_req *req)
+{
+ struct triv2_req *t = TRIV2_GET_REQ(req);
+ struct trinity_input *input = &(req->input);
+ struct trinity_hwmem_import *segt_import = &(input->import_info);
+ int32_t *segtable_dbuffd_base;
+ uint32_t *segtable_extra_base;
+ int ret, i;
+
+ if (input->config.num_segments == 0)
+ return -EINVAL;
+
+ if (input->config.num_segments > TRIV2_MAX_SEGMENTS)
+ return -ERANGE;
+
+ t->seg_import =
+ kcalloc(input->config.num_segments,
+ sizeof(struct trinity_hwmem_import), GFP_KERNEL);
+ if (!t->seg_import)
+ return -ENOMEM;
+
+ /* dmabuf fd to be resolved */
+ segtable_dbuffd_base = segt_import->addr;
+ /* extra value (e.g., offset or size) */
+ segtable_extra_base = segt_import->addr + HALF_PAGE_SIZE;
+
+#ifdef ARM
+ /* sync segment table */
+ __cpuc_flush_dcache_area(input->import_info.addr,
+ input->import_info.buf->size);
+#endif
+
+ for (i = 0; i < input->config.num_segments; ++i) {
+ struct trinity_hwmem_import *import;
+ int32_t fd = segtable_dbuffd_base[i];
+ dma_addr_t daddr;
+
+ if (fd < 0) {
+ uint32_t idx = (uint32_t)((fd + 1) * -1);
+ struct triv2_kernel_req *kreq;
+
+ /* it's for kernel input/output */
+ if (!req->is_kernel) {
+ req->is_kernel = true;
+ kreq = kzalloc(sizeof(*kreq), GFP_KERNEL);
+ if (!kreq) {
+ ret = -ENOMEM;
+ goto err;
+ }
+ t->kernel = kreq;
+ }
+
+ kreq = t->kernel;
+ if (idx < TRIV2_MAX_TENSORS) {
+ kreq->in_seg_idx[idx] = i;
+ kreq->in_seg_size[idx] = segtable_extra_base[i];
+ t->total_segment_size += kreq->in_seg_size[idx];
+ } else if (idx < TRIV2_MAX_TENSORS * 2) {
+ idx -= TRIV2_MAX_TENSORS;
+ kreq->out_seg_idx[idx] = i;
+ kreq->out_seg_size[idx] =
+ segtable_extra_base[i];
+ t->total_segment_size +=
+ kreq->out_seg_size[idx];
+ } else {
+ dev_err(drv_to_dev_ptr(drv),
+ "Invalid external segment (idx: %u)",
+ idx);
+ ret = -EINVAL;
+ goto err;
+ }
+ continue;
+ }
+
+ import = &(t->seg_import[i]);
+ ret = trinity_hwmem_import_dmabuf_begin(drv_to_dev_ptr(drv), fd,
+ import);
+ if (ret) {
+ dev_err(drv_to_dev_ptr(drv),
+ "%d-th segment with fd (%d) seems invalid: %d",
+ i, fd, ret);
+ goto err;
+ }
+
+ t->total_segment_size += import->buf->size;
+
+ /** @todo Use a local ptr variable */
+ daddr = import->dma_addr;
+ daddr += segtable_extra_base[i];
+
+ iowrite32(TRIV2_IDU_ADDR(daddr),
+ segt_import->addr + i * sizeof(u32));
+ }
+
+ /* set the dma address of DSPM (reserved index: TRIV2_MAX_SEGMENTS - 1) */
+ if (drv->dspm > 0) {
+ struct triv2_pdata *pdata = TRIV2_DRV_GET_PDATA(drv);
+
+ iowrite32(TRIV2_IDU_ADDR(pdata->idu_dsp.dspm),
+ segt_import->addr +
+ (TRIV2_MAX_SEGMENTS - 1) * sizeof(u32));
+ }
+
+ return 0;
+
+err:
+ kfree(t->seg_import);
+ t->seg_import = NULL;
+ return ret;
+}
+
+long triv2_ioctl(struct file *f, unsigned int cmd, unsigned long arg)
+{
+ struct trinity_driver *drv = f->private_data;
+ struct device *dev = drv_to_dev_ptr(drv);
+ long ret;
+
+ if (trinity_pm_runtime_forbid(dev) != 0)
+ return -EBUSY;
+
+ ret = trinity_ioctl(f, cmd, arg);
+
+ trinity_pm_runtime_allow(dev);
+
+ return ret;
+}
+
+int triv2_open(struct inode *inode, struct file *f)
+{
+ struct miscdevice *miscdev;
+ struct trinity_driver *drv;
+ struct device *dev;
+ int ret;
+
+ miscdev = (struct miscdevice *)f->private_data;
+ drv = container_of(miscdev, struct trinity_driver, mdev);
+ dev = drv_to_dev_ptr(drv);
+
+ if (trinity_pm_runtime_forbid(dev) != 0)
+ return -EBUSY;
+
+ ret = trinity_open(inode, f);
+
+ trinity_pm_runtime_allow(dev);
+
+ return ret;
+}
+
+static const struct file_operations triv2_fops = {
+ .owner = THIS_MODULE,
+ .unlocked_ioctl = triv2_ioctl,
+ .open = triv2_open,
+ .release = trinity_release,
+ .llseek = noop_llseek,
+};
+
+static void triv2_setup_cp(struct trinity_driver *drv, phys_addr_t paddr)
+{
+ iowrite32(TRIV2_IDU_ADDR(paddr) >> 4,
+ drv->mmreg_vaddr[0] + OFFSET_CP_IMIF_BASE);
+ iowrite32(TRIV2_IDU_ADDR(drv->mmreg_paddr[2]),
+ drv->mmreg_vaddr[0] + OFFSET_NPU_CBOX_BASE);
+}
+
+static void triv2_setup_dsp(struct trinity_driver *drv, phys_addr_t paddr)
+{
+ iowrite32(TRIV2_IDU_ADDR(paddr) >> 4,
+ drv->mmreg_vaddr[1] + OFFSET_DSP_IMIF_BASE);
+}
+
+static void triv2_init_common(void)
+{
+ static bool done = false;
+ int i;
+
+ if (done)
+ return;
+
+ /* init hlists */
+ for (i = 0; i < TRIV2_MODEL_HASH_SIZE; ++i)
+ INIT_HLIST_BL_HEAD(&triv2_model_node_hlist[i]);
+ done = true;
+}
+
+static int triv2_idu_alloc(struct device *dev, struct trinity_resv_mem *mem)
+{
+#ifdef CONFIG_TRINITY_FPGA
+ mem->vaddr = dma_alloc_wc(dev, mem->size, &mem->daddr, GFP_KERNEL);
+ if (!mem->vaddr)
+ return -ENOMEM;
+ return 0;
+#else
+ return trinity_alloc_from_resv_mem(mem->size, mem, false);
+#endif
+}
+
+static void triv2_idu_free(struct device *dev, struct trinity_resv_mem *mem)
+{
+ if (!mem->vaddr)
+ return;
+
+#ifdef CONFIG_TRINITY_FPGA
+ dma_free_wc(dev, mem->size, mem->vaddr, mem->daddr);
+#else
+ trinity_free_from_resv_mem(mem, false);
+#endif
+ mem->vaddr = NULL;
+}
+
+static int triv2_idu_version(struct trinity_driver *drv, uint32_t *major,
+ uint32_t *minor, uint32_t *extra)
+{
+ struct triv2_pdata *pdata;
+ uint32_t val;
+
+ if (!drv || !major || !minor || !extra)
+ return -EINVAL;
+
+ pdata = TRIV2_DRV_GET_PDATA(drv);
+ val = pdata->idu_version;
+ if (val != 0) {
+ *major = (val & TRIV2_IDU_MASK_MAJOR) >> TRIV2_IDU_SHIFT_MAJOR;
+ *minor = (val & TRIV2_IDU_MASK_MINOR) >> TRIV2_IDU_SHIFT_MINOR;
+ *extra = val & TRIV2_IDU_MASK_EXTRA;
+ } else {
+ return -ENOENT;
+ }
+
+ return 0;
+}
+
+static void triv2_idu_check(struct trinity_driver *drv)
+{
+ struct triv2_pdata *pdata = TRIV2_DRV_GET_PDATA(drv);
+ struct device *dev = drv_to_dev_ptr(drv);
+ uint32_t major, minor, extra;
+
+ if (trinity_wait_ready(drv) != 0) {
+ dev_warn(dev, "Unable to load IDU properly");
+ return;
+ }
+
+ pdata->idu_version =
+ ioread32(drv->mmreg_vaddr[0] + OFFSET_NPU_IDU_VERSION);
+ if (triv2_idu_version(drv, &major, &minor, &extra) == 0)
+ dev_info(dev,
+ "Instruction Decoder Unit (IDU) v%u.%u.%u detected",
+ major, minor, extra);
+
+ /* paused until device is opened */
+ triv2_set_state(drv, TRINITY_STATE_PAUSE);
+}
+
+static int triv2_idu_load_file(struct trinity_driver *drv, const char *dirpath,
+ const char *file_name,
+ struct trinity_resv_mem *sector)
+{
+ struct device *dev = drv_to_dev_ptr(drv);
+ struct trinity_resv_mem mem;
+ char filepath[NAME_MAX];
+ struct kstat *stat;
+ struct file *filp;
+ mm_segment_t old_fs;
+ loff_t pos = 0;
+ size_t size;
+ int ret;
+
+ dev = drv_to_dev_ptr(drv);
+ stat = (struct kstat *)vmalloc(sizeof(*stat));
+ if (stat == NULL)
+ return -ENOMEM;
+
+ /* if dirpath is null, use the default path */
+ if (dirpath)
+ snprintf(filepath, NAME_MAX, "%s/%s", dirpath, file_name);
+ else
+ snprintf(filepath, NAME_MAX, TRIV2_IDU_DIRPATH_FMT "/%s",
+ utsname()->release, file_name);
+
+ filp = filp_open(filepath, O_RDONLY, 0400);
+ if (IS_ERR(filp)) {
+ dev_err(dev, "Failed to open the idu binary: %s", filepath);
+ ret = PTR_ERR(filp);
+ goto out_free;
+ }
+
+ old_fs = get_fs();
+ set_fs(KERNEL_DS);
+
+ /* check file existence first */
+ ret = vfs_getattr(&filp->f_path, stat, STATX_SIZE,
+ AT_STATX_SYNC_AS_STAT);
+
+ set_fs(old_fs);
+
+ if (ret != 0 || stat->size == 0) {
+ dev_warn(dev, "File not found: %s", filepath);
+ ret = -ENOENT;
+ goto out_close;
+ }
+
+ size = stat->size;
+ if (size > TRIV2_IDU_MAXSIZE) {
+ dev_err(dev, "Too large idu binary: %zu MiB", size >> 20);
+ ret = -EINVAL;
+ goto out_close;
+ }
+
+#ifdef CONFIG_TRINITY_FPGA
+ mem.size = TRIV2_IDU_MAXSIZE;
+#else
+ mem.size = PAGE_ALIGN(size);
+#endif
+ ret = triv2_idu_alloc(dev, &mem);
+ if (ret < 0) {
+ dev_err(dev, "Failed to allocate memory for idu");
+ goto out_close;
+ }
+
+ ret = read_idu_file(filp, pos, mem.vaddr, size);
+ if (ret != size) {
+ dev_err(dev, "Failed to read the file %s", filepath);
+ triv2_idu_free(dev, &mem);
+ ret = -ERANGE;
+ goto out_close;
+ }
+
+ /* free previous idu if exists */
+ if (sector->vaddr)
+ triv2_idu_free(dev, sector);
+
+ sector->daddr = mem.daddr;
+ sector->vaddr = mem.vaddr;
+ sector->size = mem.size;
+ sector->orig_size = size;
+
+ ret = 0;
+out_close:
+ filp_close(filp, NULL);
+out_free:
+ vfree(stat);
+
+ return ret;
+}
+
+static int triv2_idu_load_files(struct trinity_driver *drv, const char *dirpath)
+{
+ struct triv2_pdata *pdata = TRIV2_DRV_GET_PDATA(drv);
+ struct iommu_domain *domain;
+ phys_addr_t paddr;
+ int ret;
+
+ domain = iommu_get_domain_for_dev(drv_to_dev_ptr(drv));
+
+ ret = triv2_idu_load_file(drv, dirpath, "cp/data.bin",
+ &(pdata->idu_cp.data));
+ if (ret < 0)
+ return ret;
+
+ ret = triv2_idu_load_file(drv, dirpath, "cp/code.bin",
+ &(pdata->idu_cp.code));
+ if (ret < 0)
+ return ret;
+
+ paddr = trinity_get_paddr(domain, pdata->idu_cp.code.daddr);
+ pdata->idu_cp.addrs[TRIV2_IDU_CODEIDX] = paddr;
+
+ if (!pdata->idu_dsp.addrs)
+ return 0;
+
+ ret = triv2_idu_load_file(drv, dirpath, "dsp/data.bin",
+ &(pdata->idu_dsp.data));
+ if (ret < 0)
+ return ret;
+
+ ret = triv2_idu_load_file(drv, dirpath, "dsp/code.bin",
+ &(pdata->idu_dsp.code));
+ if (ret < 0)
+ return ret;
+
+ paddr = trinity_get_paddr(domain, pdata->idu_dsp.code.daddr);
+ pdata->idu_dsp.addrs[TRIV2_IDU_CODEIDX] = paddr;
+
+ return 0;
+}
+
+static void triv2_idu_fill_zero(struct trinity_driver *drv, phys_addr_t paddr,
+ size_t size)
+{
+ void *__iomem vaddr;
+
+ vaddr = ioremap(paddr, PAGE_ALIGN(size));
+ if (vaddr == NULL) {
+ dev_err(drv_to_dev_ptr(drv), "Failed to do ioremap() for 0x%lx",
+ (unsigned long)paddr);
+ return;
+ }
+ memset_io(vaddr, 0, size);
+
+ iounmap(vaddr);
+}
+
+static void triv2_idu_fill_data(struct trinity_driver *drv, phys_addr_t paddr,
+ struct trinity_resv_mem *data)
+{
+ void *__iomem vaddr;
+
+ vaddr = ioremap(paddr, data->size);
+ if (vaddr == NULL) {
+ dev_err(drv_to_dev_ptr(drv), "Failed to do ioremap() for 0x%lx",
+ (unsigned long)paddr);
+ return;
+ }
+ memcpy_toio(vaddr, data->vaddr, data->orig_size);
+
+ iounmap(vaddr);
+}
+
+static void triv2_idu_load_code(struct trinity_driver *drv)
+{
+ struct triv2_pdata *pdata = TRIV2_DRV_GET_PDATA(drv);
+
+ /* CP is mandatory */
+ triv2_setup_cp(drv, pdata->idu_cp.addrs[TRIV2_IDU_CODEIDX]);
+
+ /* DSP is optional */
+ if (pdata->idu_dsp.addrs)
+ triv2_setup_dsp(drv, pdata->idu_dsp.addrs[TRIV2_IDU_CODEIDX]);
+}
+
+static int triv2_idu_load(struct trinity_driver *drv, const char *dirpath,
+ bool load_files)
+{
+ struct triv2_pdata *pdata;
+ struct triv2_idu *idu_cp;
+ struct triv2_idu *idu_dsp;
+ struct device *dev;
+
+ if (!drv)
+ return -EINVAL;
+
+ dev = drv_to_dev_ptr(drv);
+ if (load_files) {
+ int ret = triv2_idu_load_files(drv, dirpath);
+ if (ret != 0) {
+ dev_warn(dev, "Unable to load IDU files: %d", ret);
+ goto load_code;
+ }
+ }
+
+ pdata = TRIV2_DRV_GET_PDATA(drv);
+ idu_cp = &pdata->idu_cp;
+ idu_dsp = &pdata->idu_dsp;
+
+ triv2_idu_fill_zero(drv, idu_cp->addrs[TRIV2_IDU_ZEROIDX],
+ TRIV2_IDU_CP_DSPM_SIZE);
+ triv2_idu_fill_data(drv, idu_cp->addrs[TRIV2_IDU_DATAIDX],
+ &idu_cp->data);
+
+ if (!pdata->idu_dsp.addrs)
+ goto load_code;
+
+ triv2_idu_fill_zero(drv, idu_dsp->addrs[TRIV2_IDU_ZEROIDX],
+ drv->dspm + TRIV2_DSP_DSPM_OFFSET);
+ triv2_idu_fill_data(drv, idu_dsp->addrs[TRIV2_IDU_DATAIDX],
+ &idu_dsp->data);
+
+load_code:
+ triv2_idu_load_code(drv);
+
+ return 0;
+}
+
+static void triv2_idu_unload(struct trinity_driver *drv)
+{
+ struct device *dev = drv_to_dev_ptr(drv);
+ struct triv2_pdata *pdata = TRIV2_DRV_GET_PDATA(drv);
+
+ triv2_idu_free(dev, &pdata->idu_cp.data);
+ triv2_idu_free(dev, &pdata->idu_dsp.data);
+
+ triv2_idu_free(dev, &pdata->idu_cp.code);
+ triv2_idu_free(dev, &pdata->idu_dsp.code);
+}
+
+static void triv2_setup_buffers(struct trinity_driver *drv)
+{
+ struct device *dev = drv_to_dev_ptr(drv);
+ struct iommu_domain *domain;
+ struct trinity_resv_mem *cmd_buf;
+ struct trinity_resv_mem *back_buf;
+ struct trinity_resv_mem *prof_buf;
+ phys_addr_t paddr;
+
+ domain = iommu_get_domain_for_dev(dev);
+ cmd_buf = TRIV2_DRV_GET_CMD_BUF(drv);
+ back_buf = TRIV2_DRV_GET_BACK_BUF(drv);
+ prof_buf = TRIV2_DRV_GET_PROF_BUF(drv);
+
+ /* command */
+ paddr = trinity_get_paddr(domain, cmd_buf->daddr);
+ iowrite32(TRIV2_IDU_ADDR(paddr),
+ trinity_get_iomem_addr(drv->mmreg_vaddr[0],
+ OFFSET_NPU_CMD_BASE));
+ /* backup */
+ iowrite32(TRIV2_IDU_ADDR(back_buf->daddr),
+ trinity_get_iomem_addr(drv->mmreg_vaddr[0],
+ OFFSET_NPU_BACK_ADDR));
+ iowrite32(back_buf->size, trinity_get_iomem_addr(drv->mmreg_vaddr[0],
+ OFFSET_NPU_BACK_SIZE));
+
+ /* profile */
+ if (prof_buf->size > 0) {
+ paddr = trinity_get_paddr(domain, prof_buf->daddr);
+ iowrite32(TRIV2_IDU_ADDR(paddr),
+ trinity_get_iomem_addr(drv->mmreg_vaddr[0],
+ OFFSET_NPU_PROF_ADDR));
+ iowrite32(prof_buf->size,
+ trinity_get_iomem_addr(drv->mmreg_vaddr[0],
+ OFFSET_NPU_PROF_SIZE));
+ } else {
+ iowrite32(0, trinity_get_iomem_addr(drv->mmreg_vaddr[0],
+ OFFSET_NPU_PROF_ADDR));
+ iowrite32(0, trinity_get_iomem_addr(drv->mmreg_vaddr[0],
+ OFFSET_NPU_PROF_SIZE));
+ }
+}
+
+static int32_t triv2_init_pdata(struct trinity_driver *drv)
+{
+ struct device *dev = drv_to_dev_ptr(drv);
+ struct triv2_pdata *pdata;
+ struct triv2_cmd_info *cmd_info;
+ struct trinity_resv_mem *cmd_buf;
+ struct trinity_resv_mem *back_buf;
+ int status;
+
+ trinity_pm_runtime_attach(drv);
+
+ /* alloc triv2 pdata */
+ drv->pdata = (struct triv2_pdata *)kzalloc(sizeof(struct triv2_pdata),
+ GFP_KERNEL);
+ if (!drv->pdata)
+ return -ENOMEM;
+
+ pdata = drv->pdata;
+ pdata->drv = drv;
+
+ cmd_info = TRIV2_DRV_GET_CMD_INFO(drv);
+ cmd_buf = TRIV2_DRV_GET_CMD_BUF(drv);
+ back_buf = TRIV2_DRV_GET_BACK_BUF(drv);
+
+ mutex_init(&pdata->prof_lock);
+
+#ifdef CONFIG_TRINITY_FPGA
+ /* initialize IOMMU */
+ status = trinity_hwmem_iommu_init(dev, drv->mmreg_vaddr[2]);
+ if (status < 0) {
+ dev_err(dev, "Failed to enable the IOMMU device");
+ goto free_pdata;
+ }
+#endif
+
+ spin_lock_init(&cmd_info->lock);
+ /* init cmd bitmap */
+ bitmap_zero(cmd_info->bitmap, TRIV2_MAX_CMDSLOTS);
+
+ /* alloc command buffer */
+ status = trinity_alloc_from_resv_mem(PAGE_SIZE, cmd_buf, false);
+ if (status < 0) {
+ dev_err(dev, "Couldn't allocate memory for cmd slots");
+ goto free_pdata;
+ }
+ /* ensure cmd buffer is null-initialized, which is visible in NPU as well */
+ memset_io(cmd_buf->vaddr, '\x00', PAGE_SIZE);
+
+ /* alloc backup buffer for preemption (GBUF + DSPM) */
+ status = trinity_alloc_from_resv_mem(TRIV2_DLA_GBUFFER_SIZE + drv->dspm,
+ back_buf, false);
+ if (status < 0) {
+ dev_err(dev,
+ "Couldn't allocate memory for context backup buffer");
+ goto free_cmd_info;
+ }
+
+#ifdef CONFIG_TRINITY_FPGA
+ if (trinity_hwmem_iommu_map(dev, back_buf->daddr, back_buf->size) < 0)
+ dev_warn(dev, "Unable to map iommu mapping for 0x%llx",
+ back_buf->daddr);
+#endif
+
+ triv2_setup_buffers(drv);
+ list_add_tail(&pdata->list, &triv2_driver_list);
+
+ return 0;
+
+free_cmd_info:
+ dma_free_wc(drv_to_dev_ptr(drv), PAGE_SIZE, cmd_buf->vaddr,
+ cmd_buf->daddr);
+free_pdata:
+ kfree(drv->pdata);
+ drv->pdata = NULL;
+
+ return status;
+}
+
+static int32_t parse_idu_property(struct device *dev,
+ const struct device_node *np,
+ const char *prop_name, struct triv2_idu *idu)
+{
+ struct property *prop;
+ u64 values[TRIV2_IDU_MAX_SECTORS];
+ size_t size;
+ int i, err;
+
+ memset(idu, '\x00', sizeof(*idu));
+
+ prop = of_find_property(np, prop_name, NULL);
+ if (!prop)
+ return -EINVAL;
+
+ size = prop->length / sizeof(u64);
+ if (size != TRIV2_IDU_MAX_SECTORS) {
+ dev_err(dev, "idu requires %d values", TRIV2_IDU_MAX_SECTORS);
+ return -EINVAL;
+ }
+
+ idu->addr_num = size;
+ idu->addrs = devm_kcalloc(dev, size, sizeof(*idu->addrs), GFP_KERNEL);
+ if (!idu->addrs) {
+ dev_err(dev, "failed to allocate memory for idu values");
+ return -ENOMEM;
+ }
+
+ err = of_property_read_u64_array(np, prop_name, values, size);
+ if (err < 0) {
+ dev_err(dev, "failed to read property u64 array: %d", err);
+ return err;
+ }
+
+ for (i = 0; i < TRIV2_IDU_MAX_SECTORS; i++)
+ idu->addrs[i] = (unsigned long)values[i];
+
+ return 0;
+}
+
+/**
+ * @brief Setup IDU (e.g., CP, DSP) sections for this device
+ */
+static int triv2_setup_idu(struct trinity_driver *drv)
+{
+ struct triv2_pdata *pdata = TRIV2_DRV_GET_PDATA(drv);
+ struct device *dev = drv_to_dev_ptr(drv);
+ struct device_node *np = dev->of_node;
+ int err;
+
+ /* get Instruction Decode Unit (IDU) property */
+ err = parse_idu_property(dev, np, "samsung,idu_cp", &pdata->idu_cp);
+ if (err < 0) {
+ dev_err(dev, "Failed to parse idu property: samsung,idu_cp");
+ return err;
+ }
+
+ err = parse_idu_property(dev, np, "samsung,idu_dsp", &pdata->idu_dsp);
+ if (err < 0) {
+ dev_info(dev, "DSP is not supported");
+ pdata->idu_dsp.addrs = NULL;
+ }
+
+ /* try to find the IDU files (default) */
+ if (triv2_idu_load(drv, NULL, true) < 0) {
+ dev_warn(dev, "Failed to load IDU in the default path\n");
+ dev_warn(dev, "Should load IDU using sysfs later\n");
+ } else {
+ triv2_idu_check(drv);
+ }
+
+ if (pdata->idu_dsp.addrs && drv->dspm > 0) {
+ struct iommu_domain *domain;
+ phys_addr_t paddr;
+ dma_addr_t daddr;
+
+ /* iommu mapping for dspm segment */
+ domain = iommu_get_domain_for_dev(dev);
+ if (!domain)
+ return 0;
+
+ paddr = pdata->idu_dsp.addrs[0] + TRIV2_DSP_DSPM_OFFSET;
+ daddr = dma_map_resource(dev, paddr, drv->dspm,
+ DMA_BIDIRECTIONAL, 0);
+ pdata->idu_dsp.dspm = daddr;
+ }
+
+ return 0;
+}
+
+/**
+ * @brief Initialize necessary variables in TRIV2
+ */
+static int32_t triv2_init(struct trinity_driver *drv)
+{
+ triv2_init_common();
+ return triv2_init_pdata(drv);
+}
+
+/**
+ * @brief Clean up initialized variables in TRIV2
+ */
+static void triv2_cleanup(struct trinity_driver *drv)
+{
+ struct trinity_resv_mem *cmd_buf;
+ struct trinity_resv_mem *back_buf;
+
+ if (!drv->pdata)
+ return;
+
+ triv2_idu_unload(drv);
+
+ cmd_buf = TRIV2_DRV_GET_CMD_BUF(drv);
+ back_buf = TRIV2_DRV_GET_BACK_BUF(drv);
+
+ if (cmd_buf->vaddr)
+ trinity_free_from_resv_mem(cmd_buf, false);
+
+ if (back_buf->vaddr) {
+#ifdef CONFIG_TRINITY_FPGA
+ struct device *dev = drv_to_dev_ptr(drv);
+
+ if (trinity_hwmem_iommu_unmap(dev, back_buf->daddr,
+ back_buf->size) < 0)
+ dev_warn(dev,
+ "Unable to unmap iommu mapping for 0x%llx",
+ back_buf->daddr);
+#endif
+ trinity_free_from_resv_mem(back_buf, false);
+ }
+
+ list_del(&(TRIV2_DRV_GET_PDATA(drv)->list));
+ kfree(drv->pdata);
+ drv->pdata = NULL;
+}
+
+static struct trinity_desc triv2_desc = {
+ .type = "triv2",
+ .ver = GENVER(TRINITY_DEV_VISION2, VER_MAJOR, VER_MINOR, VER_EXTRA),
+ .fops = &triv2_fops,
+ /* device management */
+ .reset = triv2_reset,
+ .idu_load = triv2_idu_load,
+ .idu_version = triv2_idu_version,
+ .get_state = triv2_get_state,
+ .set_state = triv2_set_state,
+ /* req management */
+ .alloc_req = triv2_alloc_req,
+ .dealloc_req = triv2_dealloc_req,
+ .prepare_req = triv2_prepare_req,
+ .invoke_req = triv2_invoke_req,
+ /* profile */
+ .init_profile = triv2_init_profile,
+ .check_profile = triv2_check_profile,
+ .get_profile_meta = triv2_get_profile_meta,
+ .get_profile_buff = triv2_get_profile_buff,
+ .show_profile = triv2_show_profile,
+ .destroy_profile = triv2_destroy_profile,
+ /* etc. */
+ .handle_timeout = triv2_handle_timeout,
+ .stop_reqs = triv2_stop_reqs,
+ .drain_reqs = triv2_drain_reqs,
+ .handle_irq = triv2_handle_irq,
+};
+
+#ifdef CONFIG_PM_SLEEP
+static int triv2_suspend(struct device *dev)
+{
+ return 0;
+}
+
+static int triv2_resume(struct device *dev)
+{
+ return 0;
+}
+#endif
+
+#ifdef CONFIG_PM
+static int triv2_runtime_suspended;
+static int triv2_runtime_resumed;
+
+static int triv2_runtime_suspend(struct device *dev)
+{
+ struct trinity_driver *drv;
+
+ drv = (struct trinity_driver *)dev_get_drvdata(dev);
+ if (!drv) {
+ dev_warn(dev, "Cannot find driver data");
+ return 0;
+ }
+
+ if (drv->verbose)
+ dev_info(dev, "%s called", __func__);
+
+ mutex_lock(&drv->lock);
+
+ /* 1) Ensure that the scheduler was suspended */
+ trinity_sched_suspend();
+
+ /* 2) Set pause state if it's in ready state */
+ if (triv2_get_state(drv) == TRINITY_STATE_READY)
+ triv2_set_state(drv, TRINITY_STATE_PAUSE);
+
+ mutex_unlock(&drv->lock);
+
+ triv2_runtime_suspended++;
+
+ return 0;
+}
+
+static int triv2_runtime_resume(struct device *dev)
+{
+ struct trinity_driver *drv;
+
+ drv = (struct trinity_driver *)dev_get_drvdata(dev);
+ if (!drv) {
+ dev_warn(dev, "Cannot find driver data");
+ return 0;
+ }
+
+ if (drv->verbose)
+ dev_info(dev, "%s called", __func__);
+
+ /* 0) Reset NPU devices (only once) */
+ trinity_reset_device(dev, triv2_runtime_resumed == 0);
+
+ mutex_lock(&drv->lock);
+
+ /* 1) Restore IDU setup */
+ triv2_setup_buffers(drv);
+ triv2_idu_load(drv, NULL, false);
+
+ /* 2) Set ready state if it was in ready state before */
+ if (drv->opened > 0)
+ triv2_set_state(drv, TRINITY_STATE_READY);
+
+ /* 3) Resume the req scheduler */
+ trinity_sched_resume();
+
+ mutex_unlock(&drv->lock);
+
+ if (++triv2_runtime_resumed == triv2_runtime_suspended)
+ triv2_runtime_resumed = triv2_runtime_suspended = 0;
+
+ return 0;
+}
+#endif
+
+static const struct dev_pm_ops triv2_dev_pm_ops = {
+ // clang-format off
+ SET_SYSTEM_SLEEP_PM_OPS(triv2_suspend, triv2_resume)
+ SET_RUNTIME_PM_OPS(triv2_runtime_suspend, triv2_runtime_resume, NULL)
+ // clang-format on
+};
+
+static const struct of_device_id trinity_match[] = {
+ {
+ .compatible = "samsung,trinity",
+ },
+ { /** sentinel */ },
+};
+
+/**
+ * @brief Probes for Trinity vision devices, inits them if found
+ */
+static int trinity_triv2_probe(struct platform_device *pdev)
+{
+ struct trinity_driver *drv;
+ int err;
+
+ err = trinity_probe(pdev, &triv2_desc);
+ if (err < 0)
+ return err;
+
+ drv = (struct trinity_driver *)platform_get_drvdata(pdev);
+ if (drv->dspm > 0) {
+ /* DSPM's some region is reserved for DSP kernel operations */
+ if (drv->dspm < TRIV2_DSP_DSPM_OFFSET) {
+ dev_err(drv_to_dev_ptr(drv),
+ "Too small DSPM size.. wrong device tree?");
+ err = -EINVAL;
+ goto out_remove;
+ }
+ drv->dspm -= TRIV2_DSP_DSPM_OFFSET;
+ }
+
+ err = triv2_init(drv);
+ if (err < 0)
+ goto out_remove;
+
+ err = triv2_setup_idu(drv);
+ if (err < 0) {
+ triv2_cleanup(drv);
+ goto out_remove;
+ }
+
+ err = trinity_create_node(drv);
+ if (err < 0) {
+ triv2_cleanup(drv);
+ goto out_remove;
+ }
+
+ dev_info(drv_to_dev_ptr(drv), "Trinity Vision2 (TRIV2) probed");
+
+ return 0;
+
+out_remove:
+ trinity_remove(pdev, &triv2_desc);
+ return err;
+}
+
+/**
+ * @brief Removes a particular instance of a Trinity vision device
+ */
+static int trinity_triv2_remove(struct platform_device *pdev)
+{
+ struct trinity_driver *drv;
+
+ drv = (struct trinity_driver *)platform_get_drvdata(pdev);
+
+ trinity_destroy_node(drv);
+ triv2_cleanup(drv);
+ return trinity_remove(pdev, &triv2_desc);
+}
+
+static struct platform_driver trinity_triv2 = {
+ .probe = trinity_triv2_probe,
+ .remove = trinity_triv2_remove,
+ .driver =
+ {
+ .name = "triv2",
+ .owner = THIS_MODULE,
+ .of_match_table = of_match_ptr(trinity_match),
+ .pm = &triv2_dev_pm_ops,
+ },
+};
+
+/* Register as a platform driver */
+module_platform_driver(trinity_triv2);
+
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Dongju Chae <dongju.chae@samsung.com>");
+MODULE_AUTHOR("Wook Song <wook16.song@samsung.com>");
+MODULE_DESCRIPTION("Neural Processing Unit device driver for vision 2");
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0-only
+/**
+ * trinity/trinity_vision2_profile.h: Profile header for TRIV2 devices
+ *
+ * Copyright (C) 2021 Samsung Electronics
+ * Copyright (C) 2021 Dongju Chae <dongju.chae@samsung.com>
+ */
+
+#ifndef __TRINITY_VISION2_PROFILE_H__
+#define __TRINITY_VISION2_PROFILE_H__
+
+#include <linux/types.h>
+
+#define TRIV2_MAX_OPNAME (128)
+#define TRIV2_MAX_PROFILE_SIZE (256)
+
+/** profling for each command */
+struct triv2_op_profile {
+ union {
+ struct {
+ char op_name[TRIV2_MAX_OPNAME];
+
+ int64_t cycles;
+
+ int64_t dram_read;
+ int64_t dram_write;
+
+ int64_t sram_read;
+ int64_t sram_write;
+
+ int64_t start_cycles;
+ int64_t end_cycles;
+
+ uint32_t opcode;
+ int64_t prog_seq;
+ int64_t exec_seq;
+ } __attribute__((packed));
+ uint8_t reserved[TRIV2_MAX_PROFILE_SIZE];
+ };
+};
+
+struct triv2_cmd_profile {
+ int64_t total_cycles;
+ uint32_t total_ops;
+ /* zero-length array */
+ struct triv2_op_profile profile_ops[];
+} __attribute__((packed));
+
+struct triv2_profile {
+ int req_id;
+ struct hlist_node hlist;
+ struct triv2_cmd_profile *data;
+};
+
+enum { NOP = 0x00,
+ HALT = 0x01,
+ ADMA_IN = 0x02,
+ ADMA_OUT = 0x03,
+ RESCALE_I8 = 0x04,
+ RESCALE_I16 = 0x05,
+ CONVERT_I16_I8 = 0x06,
+ CONVERT_I8_I16 = 0x07,
+ RELUN_I8 = 0x08,
+ RELUN_I16 = 0x09,
+ PRELU_I8 = 0x0A,
+ PRELU_I16 = 0x0B,
+ ADD_I8 = 0x0C,
+ ADD_I16 = 0x0D,
+ REDUCE_MEAN_I8 = 0x0E,
+ REDUCE_MEAN_I16 = 0x0F,
+ MAX_POOL_I8 = 0x10,
+ MAX_POOL_I16 = 0x11,
+ AVG_POOL_I8 = 0x12,
+ AVG_POOL_I16 = 0x13,
+ CONV_I8 = 0x14,
+ CONV_I16 = 0x15,
+ CONVE_I8 = 0x16,
+ CONVE_I16 = 0x17,
+ TCONV_I8 = 0x18,
+ TCONV_I16 = 0x19,
+ MUL_I8 = 0x1A,
+ MUL_I16 = 0x1B,
+ DCONV_I8 = 0x1C,
+ DCONV_I16 = 0x1D,
+ DCONVE_I8 = 0x1E,
+ DCONVE_I16 = 0x1F,
+ CONV_I8_P = 0x20,
+ CONV_I16_P = 0x21,
+ PDMA_IN = 0x40,
+ PDMA_OUT = 0x41,
+ ARGMAX_I8 = 0x42,
+ ARGMAX_I16 = 0x43,
+ RESHAPE_I8 = 0x44,
+ RESHAPE_I16 = 0x45,
+ TRANSPOSE_I8 = 0x46,
+ TRANSPOSE_I16 = 0x47,
+ CONCAT_I8 = 0x48,
+ CONCAT_I16 = 0x49,
+ PAD_I8 = 0x4A,
+ PAD_I16 = 0x4B,
+ STRIDED_SLICE_I8 = 0x4C,
+ STRIDED_SLICE_I16 = 0x4D,
+ CONVERT_FORMAT_I8 = 0x4E,
+ CONVERT_FORMAT_I16 = 0x4F,
+ SIGMOID_I8 = 0x50,
+ SIGMOID_I16 = 0x51,
+ TANH_I8 = 0x52,
+ TANH_I16 = 0x53,
+ ELU_I8 = 0x54,
+ ELU_I16 = 0x55,
+ FLOOR_I8 = 0x56,
+ FLOOR_I16 = 0x57,
+ RSQRT_I8 = 0x58,
+ RSQRT_I16 = 0x59,
+ SQRT_I8 = 0x5A,
+ SQRT_I16 = 0x5B,
+ SOFTMAX_I8 = 0x5C,
+ SOFTMAX_I16 = 0x5D,
+ DIVIDE_I8 = 0x60,
+ DIVIDE_I16 = 0x61,
+ FLOORDIV_I8 = 0x62,
+ FLOORDIV_I16 = 0x63,
+ LOGICAL_OR_I8 = 0x64,
+ LOGICAL_OR_I16 = 0x65,
+ GREATER_I8 = 0x66,
+ GREATER_I16 = 0x67,
+ GREATER_EQUAL_I8 = 0x68,
+ GREATER_EQUAL_I16 = 0x69,
+ POW_I8 = 0x6A,
+ POW_I16 = 0x6B,
+ EXP_I8 = 0x6C,
+ EXP_I16 = 0x6D,
+ NOT_EQUAL_I8 = 0x6E,
+ NOT_EQUAL_I16 = 0x6F,
+ BATCH_TO_SPACE_I8 = 0x70,
+ BATCH_TO_SPACE_I16 = 0x71,
+ SPACE_TO_BATCH_I8 = 0x72,
+ SPACE_TO_BATCH_I16 = 0x73,
+ DEPTH_TO_SPACE_I8 = 0x74,
+ DEPTH_TO_SPACE_I16 = 0x75,
+ SPACE_TO_DEPTH_I8 = 0x76,
+ SPACE_TO_DEPTH_I16 = 0x77,
+ YUV_TO_RGB_I8 = 0x7A,
+ YUV_TO_RGB_I16 = 0x7B,
+ RESIZE_BILINEAR_I8 = 0x7C,
+ RESIZE_BILINEAR_I16 = 0x7D,
+ RESIZE_NEAREST_NEIGHBOR_I8 = 0x7E,
+ RESIZE_NEAREST_NEIGHBOR_I16 = 0x7F,
+ LOCAL_RESPONSE_NORM_I8 = 0x80,
+ LOCAL_RESPONSE_NORM_I16 = 0x81,
+ INSTANCE_NORM_I8 = 0x82,
+ INSTANCE_NORM_I16 = 0x83,
+ REDUCED_SUM_SSUM_I8 = 0x84,
+ REDUCED_SUM_SSUM_I16 = 0x85,
+ REDUCED_SUM_SSUM_ACC_I8 = 0x86,
+ REDUCED_SUM_SSUM_ACC_I16 = 0x87,
+ REDUCED_SUM_2SUM_I8 = 0x88,
+ REDUCED_SUM_2SUM_I16 = 0x89,
+ REDUCED_MEAN_DEV_WSUM_I8 = 0x8A,
+ REDUCED_MEAN_DEV_WSUM_I16 = 0x8B,
+ REDUCED_MEAN_DEV_I8 = 0x8C,
+ REDUCED_MEAN_DEV_I16 = 0x8D,
+ RESCALE_CW_I8 = 0x8E,
+ RESCALE_CW_I16 = 0x8F,
+ REDUCED_MEAN_SCALE_WSUM_I8 = 0x90,
+ REDUCED_MEAN_SCALE_WSUM_I16 = 0x91,
+ RESCALE_CHANNELWISE_I8 = 0x92,
+ RESCALE_CHANNELWISE_I16 = 0x93,
+};
+
+/** macro to generate opnames */
+#define TRIV2_GENERATE_OPNAME(OPNAME) [OPNAME] = #OPNAME,
+#define TRIV2_FOREACH_OPNAME(OPNAME) \
+ OPNAME(NOP) \
+ OPNAME(HALT) \
+ OPNAME(ADMA_IN) \
+ OPNAME(ADMA_OUT) \
+ OPNAME(RESCALE_I8) \
+ OPNAME(RESCALE_I16) \
+ OPNAME(CONVERT_I16_I8) \
+ OPNAME(CONVERT_I8_I16) \
+ OPNAME(RELUN_I8) \
+ OPNAME(RELUN_I16) \
+ OPNAME(PRELU_I8) \
+ OPNAME(PRELU_I16) \
+ OPNAME(ADD_I8) \
+ OPNAME(ADD_I16) \
+ OPNAME(REDUCE_MEAN_I8) \
+ OPNAME(REDUCE_MEAN_I16) \
+ OPNAME(MAX_POOL_I8) \
+ OPNAME(MAX_POOL_I16) \
+ OPNAME(AVG_POOL_I8) \
+ OPNAME(AVG_POOL_I16) \
+ OPNAME(CONV_I8) \
+ OPNAME(CONV_I16) \
+ OPNAME(CONVE_I8) \
+ OPNAME(CONVE_I16) \
+ OPNAME(TCONV_I8) \
+ OPNAME(TCONV_I16) \
+ OPNAME(MUL_I8) \
+ OPNAME(MUL_I16) \
+ OPNAME(DCONV_I8) \
+ OPNAME(DCONV_I16) \
+ OPNAME(DCONVE_I8) \
+ OPNAME(DCONVE_I16) \
+ OPNAME(CONV_I8_P) \
+ OPNAME(CONV_I16_P) \
+ OPNAME(PDMA_IN) \
+ OPNAME(PDMA_OUT) \
+ OPNAME(ARGMAX_I8) \
+ OPNAME(ARGMAX_I16) \
+ OPNAME(RESHAPE_I8) \
+ OPNAME(RESHAPE_I16) \
+ OPNAME(TRANSPOSE_I8) \
+ OPNAME(TRANSPOSE_I16) \
+ OPNAME(CONCAT_I8) \
+ OPNAME(CONCAT_I16) \
+ OPNAME(PAD_I8) \
+ OPNAME(PAD_I16) \
+ OPNAME(STRIDED_SLICE_I8) \
+ OPNAME(STRIDED_SLICE_I16) \
+ OPNAME(CONVERT_FORMAT_I8) \
+ OPNAME(CONVERT_FORMAT_I16) \
+ OPNAME(SIGMOID_I8) \
+ OPNAME(SIGMOID_I16) \
+ OPNAME(TANH_I8) \
+ OPNAME(TANH_I16) \
+ OPNAME(ELU_I8) \
+ OPNAME(ELU_I16) \
+ OPNAME(FLOOR_I8) \
+ OPNAME(FLOOR_I16) \
+ OPNAME(RSQRT_I8) \
+ OPNAME(RSQRT_I16) \
+ OPNAME(SQRT_I8) \
+ OPNAME(SQRT_I16) \
+ OPNAME(SOFTMAX_I8) \
+ OPNAME(SOFTMAX_I16) \
+ OPNAME(DIVIDE_I8) \
+ OPNAME(DIVIDE_I16) \
+ OPNAME(FLOORDIV_I8) \
+ OPNAME(FLOORDIV_I16) \
+ OPNAME(LOGICAL_OR_I8) \
+ OPNAME(LOGICAL_OR_I16) \
+ OPNAME(GREATER_I8) \
+ OPNAME(GREATER_I16) \
+ OPNAME(GREATER_EQUAL_I8) \
+ OPNAME(GREATER_EQUAL_I16) \
+ OPNAME(POW_I8) \
+ OPNAME(POW_I16) \
+ OPNAME(EXP_I8) \
+ OPNAME(EXP_I16) \
+ OPNAME(NOT_EQUAL_I8) \
+ OPNAME(NOT_EQUAL_I16) \
+ OPNAME(BATCH_TO_SPACE_I8) \
+ OPNAME(BATCH_TO_SPACE_I16) \
+ OPNAME(SPACE_TO_BATCH_I8) \
+ OPNAME(SPACE_TO_BATCH_I16) \
+ OPNAME(DEPTH_TO_SPACE_I8) \
+ OPNAME(DEPTH_TO_SPACE_I16) \
+ OPNAME(SPACE_TO_DEPTH_I8) \
+ OPNAME(SPACE_TO_DEPTH_I16) \
+ OPNAME(YUV_TO_RGB_I8) \
+ OPNAME(YUV_TO_RGB_I16) \
+ OPNAME(RESIZE_BILINEAR_I8) \
+ OPNAME(RESIZE_BILINEAR_I16) \
+ OPNAME(RESIZE_NEAREST_NEIGHBOR_I8) \
+ OPNAME(RESIZE_NEAREST_NEIGHBOR_I16) \
+ OPNAME(LOCAL_RESPONSE_NORM_I8) \
+ OPNAME(LOCAL_RESPONSE_NORM_I16) \
+ OPNAME(INSTANCE_NORM_I8) \
+ OPNAME(INSTANCE_NORM_I16) \
+ OPNAME(REDUCED_SUM_SSUM_I8) \
+ OPNAME(REDUCED_SUM_SSUM_I16) \
+ OPNAME(REDUCED_SUM_SSUM_ACC_I8) \
+ OPNAME(REDUCED_SUM_SSUM_ACC_I16) \
+ OPNAME(REDUCED_SUM_2SUM_I8) \
+ OPNAME(REDUCED_SUM_2SUM_I16) \
+ OPNAME(REDUCED_MEAN_DEV_WSUM_I8) \
+ OPNAME(REDUCED_MEAN_DEV_WSUM_I16) \
+ OPNAME(REDUCED_MEAN_DEV_I8) \
+ OPNAME(REDUCED_MEAN_DEV_I16) \
+ OPNAME(RESCALE_CW_I8) \
+ OPNAME(RESCALE_CW_I16) \
+ OPNAME(REDUCED_MEAN_SCALE_WSUM_I8) \
+ OPNAME(REDUCED_MEAN_SCALE_WSUM_I16) \
+ OPNAME(RESCALE_CHANNELWISE_I8) \
+ OPNAME(RESCALE_CHANNELWISE_I16)
+#endif
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0-only
+/**
+ * include/uapi/misc/trinity.h: User-level header for trinity devices.
+ *
+ * Copyright (C) 2020 Samsung Electronics
+ * Copyright (C) 2020 Parichay Kapoor <pk.kapoor@samsung.com>
+ * Copyright (C) 2020 Dongju Chae <dongju.chae@samsung.com>
+ * Copyright (C) 2020 Wook Song <wook16.song@samsung.com>
+ */
+
+#ifndef __TRINITY_H__
+#define __TRINITY_H__
+
+#include <linux/types.h>
+
+#define TRINITY_API_LEVEL 12
+
+/**
+ * enum trinity_state - Enum that describes a trinity device state
+ * @TRINITY_STATE_UNKNOWN: A device has unknown state
+ * @TRINITY_STATE_PAUSE: A device is paused
+ * @TRINITY_STATE_READY: A device is ready
+ * @TRINITY_STATE_END: End of trinity_state
+ */
+enum trinity_state {
+ TRINITY_STATE_UNKNOWN = -1,
+ TRINITY_STATE_PAUSE = 0,
+ TRINITY_STATE_READY,
+ TRINITY_STATE_END,
+};
+
+/**
+ * enum trinity_input_mode - Enum that describes an input source
+ * @TRINITY_INPUT_UNKNOWN: Unknown input mode
+ * @TRINITY_INPUT_CPU: Input feed by CPU
+ * @TRINITY_INPUT_HW: Input feed by third-party HW
+ * @TRINITY_INPUT_END: End of trinity_input_mode
+ */
+enum trinity_input_mode {
+ TRINITY_INPUT_UNKNOWN = -1,
+ TRINITY_INPUT_CPU = 0,
+ TRINITY_INPUT_HW,
+ TRINITY_INPUT_END,
+};
+
+/**
+ * enum trinity_output_mode - Enum that describes an output source
+ * @TRINITY_OUTPUT_UNKNOWN: Unknown output mode
+ * @TRINITY_OUTPUT_CPU_INTR: Output completion handling by interrupt
+ * @TRINITY_OUTPUT_CPU_POLL: Output completion handling by polling
+ * @TRINITY_OUTPUT_HW: Output completion handling by third-party HW
+ * @TRINITY_OUTPUT_END: End of trinity_output_mode
+ */
+enum trinity_output_mode {
+ TRINITY_OUTPUT_UNKNOWN = -1,
+ TRINITY_OUTPUT_CPU_INTR = 0,
+ TRINITY_OUTPUT_CPU_POLL,
+ TRINITY_OUTPUT_HW,
+ TRINITY_OUTPUT_END,
+};
+
+/**
+ * enum trinity_app_status - Enum that describes an app status
+ * @TRINITY_APP_STATUS_UNKNOWN: Unknown app status
+ * @TRINITY_APP_STATUS_ERROR: App has got some errors
+ * @TRINITY_APP_STATUS_PENDING: App is currently pending
+ * @TRINITY_APP_STATUS_STARTED: App was started
+ * @TRINITY_APP_STATUS_TERMINATED: App was terminated
+ */
+enum trinity_app_status {
+ TRINITY_APP_STATUS_UNKNOWN = 0,
+ TRINITY_APP_STATUS_ERROR = 1,
+ TRINITY_APP_STATUS_PENDING = 2,
+ TRINITY_APP_STATUS_STARTED = 3,
+ TRINITY_APP_STATUS_TERMINATED = 4
+};
+
+/**
+ * enum trinity_req_status - Enum that describes a request status
+ * @TRINITY_REQ_STATUS_UNKNOWN: Unknown request status
+ * @TRINITY_REQ_STATUS_ERROR: Request has got some errors
+ * @TRINITY_REQ_STATUS_PENDING: Request is currently pending
+ * @TRINITY_REQ_STATUS_RUNING: Request is currently running
+ * @TRINITY_REQ_STATUS_FINISHED: Request was finished
+ */
+enum trinity_req_status {
+ TRINITY_REQ_STATUS_UNKNOWN = 0,
+ TRINITY_REQ_STATUS_ERROR = 1,
+ TRINITY_REQ_STATUS_PENDING = 2, /* A request is submitted */
+ TRINITY_REQ_STATUS_RUNNING = 3, /* A request is running on NPU */
+ TRINITY_REQ_STATUS_FINISHED = 4 /* A request is just finished */
+};
+
+/**
+ * enum trinity_req_priority - Enum that describes a request priority
+ * @TRINITY_REQ_PRIORITY_LOW: Low priority
+ * @TRINITY_REQ_PRIORITY_MID: Mid priority scheduled with a higher chance than low one
+ * @TRINITY_REQ_PRIORITY_HIGH: High priority preempting lower priority requests
+ */
+enum trinity_req_priority {
+ TRINITY_REQ_PRIORITY_LOW = 0,
+ TRINITY_REQ_PRIORITY_MID = 1,
+ TRINITY_REQ_PRIORITY_HIGH = 2,
+};
+
+/**
+ * enum trinity_hwmem_type - A type of DMA buffer allocation method.
+ * @TRINITY_HWMEM_DMA_CONT: Use CMA to allocate backing stroage of DMA buffers.
+ * @TRINITY_HWMEM_DMA_IOMMU: Use IOMMU to allocate backing stroage of DMA buffers.
+ * @HWMEM_END: Sentinel.
+ */
+enum trinity_hwmem_type {
+ TRINITY_HWMEM_DMA_CONT = 0,
+ TRINITY_HWMEM_DMA_IOMMU,
+ TRINITY_HWMEM_END,
+};
+
+#ifndef TASK_COMM_LEN
+#define TASK_COMM_LEN 16
+#endif
+
+#define TRINITY_APP_NAME_MAX TASK_COMM_LEN
+#define TRINITY_APP_STAT_MAX 10
+#define TRINITY_REQ_STAT_MAX 10
+
+/**
+ * struct trinity_ioctl_stat_app - Describes stat of the target app
+ * @app_id: Trinity app id (currently, equal to pid)
+ * @name: Trinity app name
+ * @status: Trinity app status
+ * @num_total_reqs: Number of total requests in app (including finished ones)
+ * @num_active_reqs: Number of active (running or pending) requests in app
+ * @total_alloc_mem: Total size of allocated memory in the device
+ * @total_freed_mem: Total size of freed memory in the device
+ */
+struct trinity_ioctl_stat_app {
+ __s32 app_id;
+
+ char name[TRINITY_APP_NAME_MAX];
+ enum trinity_app_status status;
+
+ __u32 num_total_reqs;
+ __u32 num_active_reqs;
+
+ __u64 total_alloc_mem;
+ __u64 total_freed_mem;
+} __attribute__((packed));
+
+/**
+ * struct trinity_ioctl_stat_apps - Describes stats of the latest apps
+ * @num_apps: Number of apps for the stat list
+ * @stat: Stat of the latest apps
+ */
+struct trinity_ioctl_stat_apps {
+ __u32 num_apps;
+ struct trinity_ioctl_stat_app stat[TRINITY_APP_STAT_MAX];
+} __attribute__((packed));
+
+/**
+ * struct trinity_ioctl_stat_req - Describes stat of the target request
+ * @req_id: Trinity req id
+ * @model_id: Trinity model id
+ * @priority: Request priority (low, mid, or high)
+ * @status: Request status
+ * @sched_time: scheduling time in ms
+ * @infer_time: inference time in ms
+ */
+struct trinity_ioctl_stat_req {
+ __s32 req_id;
+ __u64 model_id;
+
+ enum trinity_req_priority priority;
+ enum trinity_req_status status;
+
+ __u32 sched_time;
+ __u32 infer_time;
+} __attribute__((packed));
+
+/**
+ * struct trinity_ioctl_stat_reqs - Describes stats of the latest reqs
+ * @app_id: Trinity app id (0 means 'current')
+ * @num_reqs: Number of reqs for stat list
+ * @stat: Stat of the latest reqs
+ */
+struct trinity_ioctl_stat_reqs {
+ __s32 app_id;
+ __u32 num_reqs;
+ struct trinity_ioctl_stat_req stat[TRINITY_REQ_STAT_MAX];
+} __attribute__((packed));
+
+/**
+ * struct trinity_ioctl_hwmem - A structure that Describes hardware memory (hwmem)
+ * @type: The type of hwmem type
+ * @size: The size of hwmem
+ * @dbuf_fd: File descriptor for dmabuf representing hwmem
+ */
+struct trinity_ioctl_hwmem {
+ enum trinity_hwmem_type type;
+ __u64 size;
+ __s32 dbuf_fd;
+} __attribute__((packed));
+
+/**
+ * struct trinity_ioctl_profile_meta - Describes profiling meta info.
+ * @req_id: The target req id for profiling
+ * @total_cycles: The total number of cycles of the given req
+ * @total_ops: The total number of operations of the given req
+ * @input_footprint: The DRAM footprint of input data
+ * @output_footprint: The DRAM footprint of output data
+ * @profile_size: The size of profiling data
+ */
+struct trinity_ioctl_profile_meta {
+ __s32 req_id;
+ __s64 total_cycles;
+ __u32 total_ops;
+ __s64 input_footprint;
+ __s64 output_footprint;
+ __u32 profile_size;
+} __attribute__((packed));
+
+/**
+ * struct trinity_ioctl_profile_buff - Describes profiling buff info.
+ * @req_id: The target req id for profiling
+ * @profile_pos: The start position to extract profiling data
+ * @profile_size: The size of user-allocated profiling buffer
+ * @profile_buf: The profiling buffer which user allocated
+ */
+struct trinity_ioctl_profile_buff {
+ __s32 req_id;
+ __u32 profile_pos;
+ __u32 profile_size;
+ void __user *profile_buf;
+} __attribute__((packed));
+
+/**
+ * struct trinity_ioctl_model - A structure that configure a model registered on NPU
+ * @id: Id for NPU model to extract the base phys addr
+ * @dbuf_fd: File descriptor for dmabuf representing the model
+ * @program_offset_addr: Offset address for the instructions (NPU_PROG_BASE)
+ * @program_size: Size of the program instructions (NPU_PROG_SIZE)
+ * @version: The version of npubinfmt
+ * @endp_trnt_model_common: Indicator for the end of common model parameters
+ * @weight_offset_addr: Offset address for storing weights (NPU_WGT_BASE)
+ * @metadata_dbuf_fd: File descriptor for dmabuf representing the metadata
+ * @metadata_extra_addr: Offset address for the metadata extra
+ * @metadata_extra_size: Size of the metadata extra
+ * @num_visa_insts: Number of virtual ISA instructions
+ */
+struct trinity_ioctl_model {
+ __u64 id;
+ __s32 dbuf_fd;
+ __u64 program_offset_addr;
+ __u64 program_size;
+ __u32 version;
+ union {
+ __u8 endp_trnt_model_common[0];
+ struct {
+ __u64 weight_offset_addr;
+ } __attribute__((packed));
+ struct {
+ __s32 metadata_dbuf_fd;
+ __s32 metadata_ext_dbuf_fd;
+ __u64 metadata_ext_size;
+ __u32 num_visa_insts;
+ } __attribute__((packed));
+ };
+} __attribute__((packed));
+
+/**
+ * struct trinity_ioctl_input - A structure that configure an input passed to NPU
+ * @dbuf_fd: File descriptor for dmabuf of I/O buffer (or segment table)
+ * @model_id: Model id received when setting the model in the NPU
+ * @req_id: Request id to distinguish each run_input
+ * @timeout_ms: Timeout in ms, zero is regarded as preemption
+ * @priority: Priority (LOW: 0, MID: 1, HIGH: 2)
+ * @endp_trnt_input_common: Indicator for the end of common input parameters
+ * @activation_offset_addr0: Offset address for storing weights (NPU_ACT_BASE0)
+ * @activation_offset_addr1: Offset address for storing weights (NPU_ACT_BASE1)
+ * @num_segments: Number of segments
+ * @input_mode: Input mode (who is supposed to feed input)
+ * @output_mode: Output mode (who is supposed to retrieve output)
+ * @hw_input_seg: Third-party HW's input segment idx
+ * @hw_output_seg: Third-party HW's output segment idx
+ */
+
+/** model configuration settings to pass input information to NPU */
+struct trinity_ioctl_input {
+ /* id for I/O buffer (or segment table) to extract the base phys addr */
+ __s32 dbuf_fd;
+ /** model id received when setting the model in the NPU */
+ __u64 model_id;
+ /** req id to distinguish each run_input */
+ __s32 req_id;
+ /** timeout in ms */
+ __s64 timeout_ms;
+ /** priority */
+ __u32 priority;
+ union {
+ __u8 endp_trnt_input_common[0];
+ struct {
+ /* added for TRIV-1 */
+ __u64 activation_offset_addr0;
+ __u64 activation_offset_addr1;
+ } __attribute__((packed));
+ struct {
+ /* added for TRIV-2 */
+ __u32 num_segments;
+ enum trinity_input_mode input_mode;
+ enum trinity_output_mode output_mode;
+ __s32 hw_input_seg;
+ __s32 hw_output_seg;
+ /* [optional] vd scheduler info */
+ union {
+ struct { /* user request */
+ __u32 task_handle;
+ __u32 subtask_idx;
+ } __attribute__((packed));
+ struct { /* kernel request */
+ __u32 task_id;
+ } __attribute__((packed));
+ };
+ } __attribute__((packed));
+ };
+} __attribute__((packed));
+
+/**
+ * struct trinity_ioctl_fpga_memcpy - A structure that contains driver-assisted memcpy
+ * @dbuf_fd: File descriptor for dmabuf of the target buffer
+ * @dbuf_off: Offset from the dmabuf base address
+ * @user_addr: Address of user-level buffer
+ * @user_size: Size of user-level buffer
+ *
+ * It's workaround structure for FPGA envionment
+ */
+struct trinity_ioctl_fpga_memcpy {
+ __s32 dbuf_fd;
+ __u32 dbuf_off;
+ void __user *user_addr;
+ __u64 user_size;
+} __attribute__((packed));
+
+/*
+ * struct to share device status to user space
+ * This will be moved to debugfs
+ */
+#if 0
+struct trinity_status {
+ /** Processor Information */
+ __u32 cp_info;
+
+ /** Processor Status */
+ __u32 cp_proc_stat;
+ __u32 npu_stat;
+
+ /** Control Status */
+ __u32 cp_dmai_ctrl;
+
+ /** Monitor Registers */
+ __u32 cp_cnt_cfg;
+ __u32 cp_cnt_frl;
+ __u32 cp_cnt_frh;
+ __u32 cp_cnt_stl;
+ __u32 cp_cnt_sth;
+};
+#endif
+
+#define TRINITY_MASK_DEV (0xFF000000)
+#define TRINITY_MASK_MAJOR_VER (0x00FF0000)
+#define TRINITY_MASK_MINOR_VER (0x0000FF00)
+#define TRINITY_MASK_EXTRA_VER (0x000000FF)
+
+#define TRINITY_SHIFT_DEV (24)
+#define TRINITY_SHIFT_MAJOR_VER (16)
+#define TRINITY_SHIFT_MINOR_VER (8)
+#define TRINITY_SHIFT_EXTRA_VER (0)
+#define TRINITY_SHIFT_MODEL_ID (16)
+
+#define trinity_gen_ver(dev, mj, mn, ex) \
+ (dev << TRINITY_SHIFT_DEV) | (mj << TRINITY_SHIFT_MAJOR_VER) | \
+ (mn << TRINITY_SHIFT_MINOR_VER) | \
+ (ex << TRINITY_SHIFT_EXTRA_VER)
+
+/**
+ * enum trinity_dev_type - Enum that describes a trinity device type
+ * @TRINITY_DEV_UNKNOWN: Unknown device type
+ * @TRINITY_DEV_VISION: Trinity Vision (TRIV)
+ * @TRINITY_DEV_AUDIO: Trinity Asr (TRIA)
+ * @TRINITY_DEV_VISION2: Trinity Vision2 (TRIV2)
+ * @TRINITY_DEV_VISION2_CUSE: Trinity Vision2 (TRIV2), CUSE-based impl.
+ * @TRINITY_DEV_END: End of trinity_dev_type
+ */
+enum trinity_dev_type {
+ TRINITY_DEV_UNKNOWN = 0,
+ TRINITY_DEV_VISION,
+ TRINITY_DEV_AUDIO,
+ TRINITY_DEV_VISION2,
+ TRINITY_DEV_VISION2_CUSE, /* CUSE-based impl. for triv2 */
+ TRINITY_DEV_END /* sentinel */
+};
+
+/**
+ * Major number cant be dynamic as ioctls need it,
+ */
+#define TRINITY_DRIVER_MAGIC 0x88
+
+#define TRINITY_IO(no) _IO(TRINITY_DRIVER_MAGIC, no)
+#define TRINITY_IOR(no, data_type) _IOR(TRINITY_DRIVER_MAGIC, no, data_type)
+#define TRINITY_IOW(no, data_type) _IOW(TRINITY_DRIVER_MAGIC, no, data_type)
+#define TRINITY_IOWR(no, data_type) _IOWR(TRINITY_DRIVER_MAGIC, no, data_type)
+
+/** Device Information */
+
+/** Get the device version information from the driver */
+#define TRINITY_IOCTL_GET_VERSION TRINITY_IOR(1, __u32)
+/** Get the device API level from the driver */
+#define TRINITY_IOCTL_GET_API_LEVEL TRINITY_IOR(2, __u32)
+/** Get the device state from the driver */
+#define TRINITY_IOCTL_GET_STATE TRINITY_IOR(3, __s32)
+/** Get the device tops information from the driver */
+#define TRINITY_IOCTL_GET_TOPS TRINITY_IOR(4, __u32)
+/** Get the device dspm information from the driver */
+#define TRINITY_IOCTL_GET_DSPM TRINITY_IOR(5, __u32)
+/** Get the next request ID from the driver */
+#define TRINITY_IOCTL_GET_NEXT_REQUEST TRINITY_IOR(6, __s32)
+
+/** Device Control */
+
+/** Allocate driver-managed memory */
+#define TRINITY_IOCTL_HWMEM_ALLOC TRINITY_IOW(21, struct trinity_ioctl_hwmem)
+
+/** De-allocate driver-managed memory */
+#define TRINITY_IOCTL_HWMEM_DEALLOC TRINITY_IOW(22, struct trinity_ioctl_hwmem)
+
+/** Register the given model config in the device and return model id */
+#define TRINITY_IOCTL_REGISTER_MODEL \
+ TRINITY_IOWR(23, struct trinity_ioctl_model)
+
+/** Unregister the model config associated with the given model_id */
+#define TRINITY_IOCTL_DEREGISTER_MODEL TRINITY_IOW(24, __u64)
+
+/** Run the device with the given input */
+#define TRINITY_IOCTL_RUN_INPUT TRINITY_IOWR(25, struct trinity_ioctl_input)
+
+/** Stop all requests submitted to the device */
+#define TRINITY_IOCTL_STOP_REQUESTS TRINITY_IO(26)
+
+/** Stop the target request with id returned by run_input */
+#define TRINITY_IOCTL_STOP_REQUEST TRINITY_IOW(27, __s32)
+
+/** Device Statistics/Profile */
+
+/** Get the current app stat in the opened device */
+#define TRINITY_IOCTL_STAT_CURRENT_APP \
+ TRINITY_IOR(51, struct trinity_ioctl_stat_app)
+
+/** Get latest apps' stat of the opened device */
+#define TRINITY_IOCTL_STAT_APPS TRINITY_IOR(52, struct trinity_ioctl_stat_apps)
+
+/** Get latest reqs' stat in the target app */
+#define TRINITY_IOCTL_STAT_REQS TRINITY_IOR(53, struct trinity_ioctl_stat_reqs)
+
+/** Get profiling metadata of the request */
+#define TRINITY_IOCTL_GET_PROFILE_META \
+ TRINITY_IOWR(54, struct trinity_ioctl_profile_meta)
+
+/** Get profiling per-op data of the request */
+#define TRINITY_IOCTL_GET_PROFILE_BUFF \
+ TRINITY_IOWR(55, struct trinity_ioctl_profile_buff)
+
+/** Device Testing/Workaround */
+
+/** Driver-assisted memory copy for FPGA env. */
+#define TRINITY_IOCTL_FPGA_MEMCPY \
+ TRINITY_IOWR(91, struct trinity_ioctl_fpga_memcpy)
+
+/** A wrapper of trinity_run_internal_req() */
+#define TRINITY_IOCTL_RUN_INTERNAL_REQ TRINITY_IOW(92, dev_t)
+
+#ifdef __KERNEL__
+__s32 trinity_run_internal_req(dev_t);
+#endif
+#endif /* __TRINITY_H__ */