misc/trinity: Add trinity npu driver
authorDongju Chae <dongju.chae@samsung.com>
Wed, 29 Dec 2021 04:58:43 +0000 (13:58 +0900)
committerHoegeun Kwon <hoegeun.kwon@samsung.com>
Wed, 23 Nov 2022 02:23:53 +0000 (11:23 +0900)
This patch uploads trinity npu driver source codes
(for now, triv2 only) including the srnpu iommu codes.

Note that the driver may not available in the rpi4 board,
but let's just sync codes as the reference tizen kernel.

Change-Id: I61353768adb948e6189aeb2c7c79082c3ddbc2ba
Signed-off-by: Dongju Chae <dongju.chae@samsung.com>
[Fix conflicts for kernel v5.15.y]
Signed-off-by: Hoegeun Kwon <hoegeun.kwon@samsung.com>
32 files changed:
drivers/iommu/Kconfig
drivers/iommu/Makefile
drivers/iommu/srnpu-iommu.c [new file with mode: 0644]
drivers/misc/Kconfig
drivers/misc/Makefile
drivers/misc/trinity/Kconfig [new file with mode: 0644]
drivers/misc/trinity/Makefile [new file with mode: 0644]
drivers/misc/trinity/support/arm_fvp.c [new file with mode: 0644]
drivers/misc/trinity/support/xilinx_fpga.c [new file with mode: 0644]
drivers/misc/trinity/trinity.c [new file with mode: 0644]
drivers/misc/trinity/trinity.h [new symlink]
drivers/misc/trinity/trinity_common.h [new file with mode: 0644]
drivers/misc/trinity/trinity_debug.c [new file with mode: 0644]
drivers/misc/trinity/trinity_hwmem.c [new file with mode: 0644]
drivers/misc/trinity/trinity_hwmem.h [new file with mode: 0644]
drivers/misc/trinity/trinity_hwmem_iommu_helper.c [new file with mode: 0644]
drivers/misc/trinity/trinity_hwmem_iommu_helper.h [new file with mode: 0644]
drivers/misc/trinity/trinity_monitor.c [new file with mode: 0644]
drivers/misc/trinity/trinity_monitor.h [new file with mode: 0644]
drivers/misc/trinity/trinity_resv_mem.c [new file with mode: 0644]
drivers/misc/trinity/trinity_resv_mem.h [new file with mode: 0644]
drivers/misc/trinity/trinity_sched.c [new file with mode: 0644]
drivers/misc/trinity/trinity_sched.h [new file with mode: 0644]
drivers/misc/trinity/trinity_sched_sr.c [new file with mode: 0644]
drivers/misc/trinity/trinity_stat.c [new file with mode: 0644]
drivers/misc/trinity/trinity_stat.h [new file with mode: 0644]
drivers/misc/trinity/trinity_sysfs.c [new file with mode: 0644]
drivers/misc/trinity/trinity_trace.c [new file with mode: 0644]
drivers/misc/trinity/trinity_trace.h [new file with mode: 0644]
drivers/misc/trinity/trinity_vision2_drv.c [new file with mode: 0644]
drivers/misc/trinity/trinity_vision2_profile.h [new file with mode: 0644]
include/uapi/misc/trinity.h [new file with mode: 0644]

index 3eb68fa..a311891 100644 (file)
@@ -486,4 +486,17 @@ config SPRD_IOMMU
 
          Say Y here if you want to use the multimedia devices listed above.
 
+config SR_NPU_IOMMU
+       bool "SR Tinity Vision 2 IOMMU Support"
+       depends on ARM || ARM64
+       select ARM_DMA_USE_IOMMU
+       select IOMMU_API
+       select IOMMU_DMA
+       help
+         Support for the IOMMU on Samsung Research (SR) Neural Processing
+         Unit (NPU), Tinity Vision 2, family. This enables the NPUs to see
+         non-linear physical memory chunks as linear memory in their address
+         space.
+
+         If unsure, say N here.
 endif # IOMMU_SUPPORT
index bc7f730..4babe35 100644 (file)
@@ -30,3 +30,4 @@ obj-$(CONFIG_VIRTIO_IOMMU) += virtio-iommu.o
 obj-$(CONFIG_IOMMU_SVA_LIB) += iommu-sva-lib.o io-pgfault.o
 obj-$(CONFIG_SPRD_IOMMU) += sprd-iommu.o
 obj-$(CONFIG_APPLE_DART) += apple-dart.o
+obj-$(CONFIG_SR_NPU_IOMMU) += srnpu-iommu.o
diff --git a/drivers/iommu/srnpu-iommu.c b/drivers/iommu/srnpu-iommu.c
new file mode 100644 (file)
index 0000000..363c226
--- /dev/null
@@ -0,0 +1,1301 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * IOMMU (ARM/ARM64) driver for Samsung Research NPU device family
+ *
+ * Copyright (C) 2021 Samsung Electronics
+ * Copyright (C) 2021 Wook Song <wook16.song@samsung.com>
+ * Copyright (C) 2021 Dongju Chae <dongju.chae@samsung.com>
+ */
+
+#include <linux/bitmap.h>
+#include <linux/dma-iommu.h>
+#include <linux/init.h>
+#include <linux/io.h>
+#include <linux/iommu.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/of_iommu.h>
+#include <linux/of_platform.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+#include <linux/version.h>
+
+#include <asm/cacheflush.h>
+#ifdef CONFIG_ARM
+#include <asm/dma-iommu.h>
+#define MASK_HIGH_BIT (31)
+#else /* CONFIG_ARM64 */
+#define MASK_HIGH_BIT (35)
+#endif
+
+#define CHECK_BITS(val, mask) ((val & mask) == mask)
+
+/* Register offsets for CBOX */
+#define MMREG_CBOX_IDX       (2)
+#define OFFSET_CBOX_IOMMU_DLA (0x1000)
+#define OFFSET_CBOX_IOMMU_DSP (0x2000)
+#define LENGTH_CBOX_IOMMU     (0x1000)
+
+/* Register offsets for SRNPU-IOMMU */
+#define OFFSET_IOMMU_CTRL            (0x0)
+#define OFFSET_IOMMU_STATUS          (0x4)
+#define OFFSET_IOMMU_FLPT_BASE       (0x8)
+#define OFFSET_IOMMU_ALL_INVALIDATION (0xC)
+#define OFFSET_IOMMU_VPN_INVALIDATION (0x10)
+#define OFFSET_IOMMU_ICH_FAULT_STATUS (0x14)
+#define OFFSET_IOMMU_ICH_FAULT_VA     (0x18)
+#define OFFSET_IOMMU_OCH_FAULT_STATUS (0x1C)
+#define OFFSET_IOMMU_OCH_FAULT_VA     (0x20)
+#define OFFSET_IOMMU_TLB_READ        (0x24)
+#define OFFSET_IOMMU_TLB_TAG         (0x28)
+#define OFFSET_IOMMU_TLB_PPN         (0x2C)
+#define OFFSET_IOMMU_PTW_BIF_CFG      (0x30)
+#define OFFSET_IOMMU_DBG             (0xFC)
+
+#define BIT_MMU_ENABLE_IOMMU_CTRL (0x0)
+#define BIT_MMU_BLOCK_IOMMU_CTRL  (0x1)
+
+/* Bitmap of the page sizes currently supported */
+#define SRNPU_IOMMU_PGSIZE_BITMAP (SZ_4K | SZ_64K | SZ_1M | SZ_2M | SZ_16M)
+
+#define MASK_PTE_MAPPED                 BIT_MASK(0)
+#define MASK_FLPTE_MAP_1M       BIT_MASK(1)
+#define MASK_FLPTE_PPN_MASK_1M  GENMASK(MASK_HIGH_BIT, 20)
+#define RSHFT_FLPTE_PPN_1M      (20)
+#define LSHFT_FLPTE_PPN_1M      (16)
+#define MASK_FLPTE_MAP_2M       BIT_MASK(2)
+#define MASK_FLPTE_PPN_MASK_2M  GENMASK(MASK_HIGH_BIT, 21)
+#define RSHFT_FLPTE_PPN_2M      (21)
+#define LSHFT_FLPTE_PPN_2M      (17)
+#define MASK_FLPTE_MAP_16M      GENMASK(2, 1)
+#define MASK_FLPTE_PPN_MASK_16M         GENMASK(MASK_HIGH_BIT, 24)
+#define RSHFT_FLPTE_PPN_16M     (24)
+#define LSHFT_FLPTE_PPN_16M     (20)
+#define MASK_SLPT_BASE_TO_FLPTE         GENMASK(MASK_HIGH_BIT, 10)
+#define RSHFT_SLPT_BASE_TO_FLPTE (10)
+#define LSHFT_SLPT_BASE_TO_FLPTE (6)
+#define MASK_FLPTE_TO_SLPT_BASE         GENMASK(31, 6)
+#define RSHFT_FLPTE_TO_SLPT_BASE (6)
+#define LSHFT_FLPTE_TO_SLPT_BASE (10)
+
+#define RSHFT_SIZE_TO_NUM_PTE      (20)
+#define RSHFT_SIZE_TO_NUM_LV2PTE    (12)
+#define MASK_IOVA_TO_VPN           GENMASK(31, 20)
+#define RSHFT_IOVA_TO_VPN          (20)
+#define MASK_IOVA_TO_LV2VPN        GENMASK(19, 12)
+#define RSHFT_IOVA_TO_LV2VPN       (12)
+#define MASK_IOVA_TO_TLBVPN        GENMASK(31, 14)
+#define RSHFT_IOVA_TO_TLBVPN       (14)
+#define LSHFT_TLBVPN_TO_REGVAL     (14)
+#define MASK_IOVA_TO_PAGEOFFSET_4K  GENMASK(11, 0)
+#define MASK_IOVA_TO_PAGEOFFSET_64K GENMASK(15, 0)
+#define MASK_IOVA_TO_PAGEOFFSET_1M  GENMASK(19, 0)
+#define MASK_IOVA_TO_PAGEOFFSET_2M  GENMASK(20, 0)
+#define MASK_IOVA_TO_PAGEOFFSET_16M GENMASK(23, 0)
+
+#define MASK_SLPTE_MAP_4K      0
+#define MASK_SLPTE_MAP_64K     BIT_MASK(1)
+#define MASK_SLPTE_PPN_MASK_4K GENMASK(MASK_HIGH_BIT, 12)
+#define MASK_SLPTE_PPN_MASK_64K GENMASK(MASK_HIGH_BIT, 16)
+#define RSHFT_SLPTE_PPN_4K     (12)
+#define LSHFT_SLPTE_PPN_4K     (8)
+#define RSHFT_SLPTE_PPN_64K    (16)
+#define LSHFT_SLPTE_PPN_64K    (12)
+
+#define FLPT_PHYS_TO_REGVAL(x) (((x & GENMASK(MASK_HIGH_BIT, 14)) >> 14) << 10)
+#define FLPT_NUM_PTES         BIT(12)
+#define FLPT_PTE_SIZE         (SZ_4)
+#define FLPT_SIZE             (FLPT_NUM_PTES * FLPT_PTE_SIZE)
+#define SLPT_NUM_PTES         BIT(8)
+#define SLPT_PTE_SIZE         (SZ_4)
+#define SLPT_SIZE             (SLPT_NUM_PTES * SLPT_PTE_SIZE)
+
+#define FLPT_PAGE_FAULT         BIT(3)
+#define SLPT_PAGE_FAULT         BIT(2)
+#define PTW_ACCESS_FAULT BIT(1)
+#define ATU_ACCESS_FAULT BIT(0)
+
+static void *srnpu_iommu_flpt = NULL;
+static struct kmem_cache *srnpu_iommu_slpt_cache = NULL;
+
+struct srnpu_iommu_domain {
+       spinlock_t lock;
+       struct platform_device *pdev;
+       struct iommu_domain domain;
+       uint32_t *flpt;
+};
+
+/* global iommu-shared data */
+struct srnpu_iommu_drvdata {
+       struct device *dev; /* physical device  */
+       struct iommu_device iommu;
+       struct srnpu_iommu_domain *domain; /* domain we belong to */
+#ifdef CONFIG_ARM
+       struct dma_iommu_mapping *mapping;
+#else
+       struct iommu_group *group;
+#endif
+       struct { /* optional reserved memory */
+               phys_addr_t paddr;
+               size_t size;
+       } resv;
+};
+
+/* per-device iommu data */
+struct srnpu_iommu {
+       struct device *dev; /* trinity device */
+       struct srnpu_iommu_drvdata *data;
+       void __iomem *regbase_dla;
+       void __iomem *regbase_dsp;
+};
+
+static struct kref srnpu_iommu_slpt_refcnts[FLPT_NUM_PTES];
+
+static inline struct srnpu_iommu_domain *
+domain_to_srnpu_domain(struct iommu_domain *dm)
+{
+       if (!dm)
+               return NULL;
+
+       return container_of(dm, struct srnpu_iommu_domain, domain);
+}
+
+static inline struct srnpu_iommu *dev_to_srnpu_iommu(struct device *dev)
+{
+       if (!dev)
+               return NULL;
+
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(5, 9, 0))
+       if (!dev->iommu_fwspec)
+               return NULL;
+
+       return dev->iommu_fwspec->iommu_priv;
+#else
+       return dev_iommu_priv_get(dev);
+#endif
+}
+
+static inline void dev_set_srnpu_iommu(struct device *dev, void *priv)
+{
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(5, 9, 0))
+       dev->iommu_fwspec->iommu_priv = priv;
+#else
+       dev_iommu_priv_set(dev, priv);
+#endif
+}
+
+static inline struct iommu_fwspec *dev_to_iommu_fwspec(struct device *dev)
+{
+       if (!dev)
+               return NULL;
+
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(5, 7, 0))
+       return dev->iommu_fwspec;
+#else
+       return dev_iommu_fwspec_get(dev);
+#endif
+}
+
+static inline struct srnpu_iommu_drvdata *
+dev_to_srnpu_iommu_drvdata(struct device *dev)
+{
+       struct srnpu_iommu *iommu;
+
+       iommu = dev_to_srnpu_iommu(dev);
+       if (!iommu)
+               return NULL;
+
+       return iommu->data;
+}
+
+static inline uint32_t iova_to_vpn(unsigned long iova)
+{
+       uint32_t ret = iova;
+
+       ret &= MASK_IOVA_TO_VPN;
+       ret >>= RSHFT_IOVA_TO_VPN;
+
+       return ret;
+}
+
+static inline uint32_t iova_to_lv2vpn(unsigned long iova)
+{
+       uint32_t ret = iova;
+
+       ret &= MASK_IOVA_TO_LV2VPN;
+       ret >>= RSHFT_IOVA_TO_LV2VPN;
+
+       return ret;
+}
+
+static inline uint32_t iova_to_tlbvpn(unsigned long iova)
+{
+       uint32_t ret = iova;
+
+       ret &= MASK_IOVA_TO_TLBVPN;
+       ret >>= RSHFT_IOVA_TO_TLBVPN;
+
+       return ret;
+}
+
+static inline uint32_t srnpu_iommu_read_reg(void __iomem *base, uint32_t offset)
+{
+       return ioread32(base + offset);
+}
+
+static inline void srnpu_iommu_write_reg(void __iomem *base, uint32_t offset,
+                                        uint32_t val)
+{
+       iowrite32(val, base + offset);
+}
+
+static inline void srnpu_iommu_enable(struct srnpu_iommu *_iommu)
+{
+       phys_addr_t paddr = virt_to_phys(srnpu_iommu_flpt);
+
+       /** Set FLPT base */
+       srnpu_iommu_write_reg(_iommu->regbase_dla, OFFSET_IOMMU_FLPT_BASE,
+                             FLPT_PHYS_TO_REGVAL(paddr));
+       srnpu_iommu_write_reg(_iommu->regbase_dsp, OFFSET_IOMMU_FLPT_BASE,
+                             FLPT_PHYS_TO_REGVAL(paddr));
+
+       /** Enable IOMMU */
+       srnpu_iommu_write_reg(_iommu->regbase_dla, OFFSET_IOMMU_CTRL, 0x1U);
+       srnpu_iommu_write_reg(_iommu->regbase_dsp, OFFSET_IOMMU_CTRL, 0x1U);
+}
+
+static inline void srnpu_iommu_disable(struct srnpu_iommu *_iommu)
+{
+       /** Disable IOMMU */
+       srnpu_iommu_write_reg(_iommu->regbase_dla, OFFSET_IOMMU_CTRL, 0x0U);
+       srnpu_iommu_write_reg(_iommu->regbase_dsp, OFFSET_IOMMU_CTRL, 0x0U);
+}
+
+static inline uint32_t srnpu_iommu_get_slpte(phys_addr_t paddr, uint32_t nents)
+{
+       phys_addr_t mask_ppn;
+       uint32_t mask_map;
+       uint32_t lshft_ppn;
+       uint32_t rshft_ppn;
+       uint32_t val;
+
+       switch (nents) {
+       case 1:
+               mask_map = MASK_SLPTE_MAP_4K;
+               mask_ppn = MASK_SLPTE_PPN_MASK_4K;
+               rshft_ppn = RSHFT_SLPTE_PPN_4K;
+               lshft_ppn = LSHFT_SLPTE_PPN_4K;
+               break;
+       case 16:
+               mask_map = MASK_SLPTE_MAP_64K;
+               mask_ppn = MASK_SLPTE_PPN_MASK_64K;
+               rshft_ppn = RSHFT_SLPTE_PPN_64K;
+               lshft_ppn = LSHFT_SLPTE_PPN_64K;
+               break;
+       default:
+               return 0;
+       }
+
+       val = 0;
+       val |= mask_map;
+       paddr &= mask_ppn;
+       val |= ((paddr >> rshft_ppn) << lshft_ppn);
+       val |= MASK_PTE_MAPPED;
+
+       return val;
+}
+
+static inline uint32_t srnpu_iommu_get_flpte(phys_addr_t paddr, uint32_t nents)
+{
+       phys_addr_t mask_ppn;
+       uint32_t mask_map;
+       uint32_t lshft_ppn;
+       uint32_t rshft_ppn;
+       uint32_t val;
+
+       switch (nents) {
+       case 1:
+               mask_map = MASK_FLPTE_MAP_1M;
+               mask_ppn = MASK_FLPTE_PPN_MASK_1M;
+               rshft_ppn = RSHFT_FLPTE_PPN_1M;
+               lshft_ppn = LSHFT_FLPTE_PPN_1M;
+               break;
+       case 2:
+               mask_map = MASK_FLPTE_MAP_2M;
+               mask_ppn = MASK_FLPTE_PPN_MASK_2M;
+               rshft_ppn = RSHFT_FLPTE_PPN_2M;
+               lshft_ppn = LSHFT_FLPTE_PPN_2M;
+               break;
+       case 16:
+               mask_map = MASK_FLPTE_MAP_16M;
+               mask_ppn = MASK_FLPTE_PPN_MASK_16M;
+               rshft_ppn = RSHFT_FLPTE_PPN_16M;
+               lshft_ppn = LSHFT_FLPTE_PPN_16M;
+               break;
+       default:
+               return 0;
+       }
+
+       val = 0;
+       val |= mask_map;
+       paddr &= mask_ppn;
+       val |= ((paddr >> rshft_ppn) << lshft_ppn);
+       val |= MASK_PTE_MAPPED;
+
+       return val;
+}
+static inline uint32_t flpte_to_slpt_base(const uint32_t flpte)
+{
+       uint32_t ret = 0;
+
+       ret = flpte & MASK_FLPTE_TO_SLPT_BASE;
+       ret >>= RSHFT_FLPTE_TO_SLPT_BASE;
+       ret <<= LSHFT_FLPTE_TO_SLPT_BASE;
+
+       return ret;
+}
+
+static inline void *alloc_slpt_and_get_flpte(uint32_t *flpte)
+{
+       phys_addr_t pa;
+       void *slpt;
+
+       slpt = kmem_cache_zalloc(srnpu_iommu_slpt_cache, GFP_ATOMIC);
+       if (!slpt)
+               return ERR_PTR(-ENOMEM);
+
+       pa = virt_to_phys(slpt);
+       if (!IS_ALIGNED(pa, SZ_1K)) {
+               kmem_cache_free(srnpu_iommu_slpt_cache, slpt);
+               return ERR_PTR(-EINVAL);
+       }
+
+       pa &= MASK_SLPT_BASE_TO_FLPTE;
+       pa >>= RSHFT_SLPT_BASE_TO_FLPTE;
+       pa <<= LSHFT_SLPT_BASE_TO_FLPTE;
+
+       *flpte = (uint32_t)pa;
+
+       return slpt;
+}
+
+static void release_slpt(struct kref *kref)
+{
+       BUG_ON(kref_read(kref));
+}
+
+static void flush_dcache_area(void *addr, size_t len)
+{
+#ifdef CONFIG_ARM
+       __cpuc_flush_dcache_area(addr, len);
+#else
+       __flush_dcache_area(addr, len);
+#endif
+}
+
+static int srnpu_iommu_map_internal(unsigned long iova, phys_addr_t paddr,
+                                   size_t size, int prot)
+{
+       uint32_t vpn = iova_to_vpn(iova);
+       uint32_t *pte;
+       uint32_t num_pte;
+       uint32_t val;
+       uint32_t i;
+       int err = 0;
+
+       pte = &(((uint32_t *)srnpu_iommu_flpt)[vpn]);
+
+       switch (size) {
+       case SZ_4K:
+       case SZ_64K: {
+               uint32_t flpte;
+               uint32_t lv2_vpn;
+               uint32_t *slpt;
+
+               if (*pte & MASK_PTE_MAPPED) {
+                       slpt = phys_to_virt(flpte_to_slpt_base(*pte));
+                       kref_get(&srnpu_iommu_slpt_refcnts[vpn]);
+               } else {
+                       slpt = alloc_slpt_and_get_flpte(&flpte);
+                       if (IS_ERR(slpt)) {
+                               pr_err("%s: Failed to alloc slpt (iova 0x%lx, paddr 0x%lx)\n",
+                                      __func__, iova, (unsigned long)paddr);
+                               return PTR_ERR(slpt);
+                       }
+                       /*bitlock here */
+                       kref_init(&srnpu_iommu_slpt_refcnts[vpn]);
+                       *pte = (flpte | MASK_PTE_MAPPED);
+                       flush_dcache_area(pte, sizeof(*pte));
+               }
+
+               lv2_vpn = iova_to_lv2vpn(iova);
+               slpt = &slpt[lv2_vpn];
+
+               num_pte = size >> RSHFT_SIZE_TO_NUM_LV2PTE;
+               val = srnpu_iommu_get_slpte(paddr, num_pte);
+               if (!(val & MASK_PTE_MAPPED)) {
+                       pr_err("%s: Failed to check slpt (iova 0x%lx, paddr 0x%lx)\n",
+                              __func__, iova, (unsigned long)paddr);
+                       return -EINVAL;
+               }
+
+               for (i = 0; i < num_pte; ++i) {
+                       slpt[i] = val;
+                       paddr = paddr + SZ_4K;
+               }
+               flush_dcache_area(slpt, sizeof(*slpt) * num_pte);
+
+               break;
+       }
+       case SZ_1M:
+       case SZ_2M:
+       case SZ_16M:
+               if (*pte & MASK_PTE_MAPPED) {
+                       pr_err("%s: iova 0x%lx is already mapped to phys 0x%lx\n",
+                              __func__, iova, (unsigned long)paddr);
+                       return -EADDRINUSE;
+               }
+
+               num_pte = size >> RSHFT_SIZE_TO_NUM_PTE;
+               val = srnpu_iommu_get_flpte(paddr, num_pte);
+               if (!(val & MASK_PTE_MAPPED)) {
+                       pr_err("%s: Failed to check flpt (iova 0x%lx, paddr 0x%lx)\n",
+                              __func__, iova, (unsigned long)paddr);
+                       return -EINVAL;
+               }
+
+               /** @todo: alignment check here */
+               for (i = 0; i < num_pte; ++i) {
+                       pte[i] = val;
+                       paddr = paddr + SZ_1M;
+               }
+               flush_dcache_area(pte, sizeof(*pte) * num_pte);
+
+               break;
+       default:
+               err = -EINVAL;
+               break;
+       }
+
+       return err;
+}
+
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(5, 5, 0))
+static int srnpu_iommu_map(struct iommu_domain *domain, unsigned long iova,
+                          phys_addr_t paddr, size_t size, int prot)
+#else
+static int srnpu_iommu_map(struct iommu_domain *domain, unsigned long iova,
+                          phys_addr_t paddr, size_t size, int prot, gfp_t gfp)
+#endif
+{
+       struct srnpu_iommu_domain *_domain = domain_to_srnpu_domain(domain);
+       unsigned long flags;
+       int err = 0;
+
+       if (!_domain)
+               return -ENODEV;
+
+       spin_lock_irqsave(&_domain->lock, flags);
+       err = srnpu_iommu_map_internal(iova, paddr, size, prot);
+       spin_unlock_irqrestore(&_domain->lock, flags);
+
+       return err;
+}
+
+static size_t srnpu_iommu_unmap_internal(unsigned long iova, size_t size)
+{
+       uint32_t vpn = iova_to_vpn(iova);
+       size_t unmapped = 0;
+       uint32_t *pte;
+       uint32_t num_pte;
+       uint32_t i;
+
+       pte = &(((uint32_t *)srnpu_iommu_flpt)[vpn]);
+
+       if (CHECK_BITS(*pte, MASK_FLPTE_MAP_16M) ||
+           CHECK_BITS(*pte, MASK_FLPTE_MAP_2M) ||
+           CHECK_BITS(*pte, MASK_FLPTE_MAP_1M)) {
+               num_pte = size >> RSHFT_SIZE_TO_NUM_PTE;
+               for (i = 0; i < num_pte; ++i) {
+                       if (!((pte[i]) & MASK_PTE_MAPPED)) {
+                               pr_err("%s: Unmapped flpt (iova 0x%lx)\n",
+                                      __func__, iova);
+                       }
+
+                       pte[i] ^= MASK_PTE_MAPPED;
+                       unmapped += SZ_1M;
+               }
+               flush_dcache_area(pte, sizeof(*pte) * num_pte);
+       } else {
+               void *slpt_base = phys_to_virt(flpte_to_slpt_base(*pte));
+               uint32_t lv2vpn = iova_to_lv2vpn(iova);
+               uint32_t *slpt;
+
+               slpt = &((uint32_t *)slpt_base)[lv2vpn];
+               if (*slpt & MASK_SLPTE_MAP_64K)
+                       size = SZ_64K;
+               else
+                       size = SZ_4K;
+
+               num_pte = size >> RSHFT_SIZE_TO_NUM_LV2PTE;
+
+               for (i = 0; i < num_pte; ++i) {
+                       if (!((slpt[i]) & MASK_PTE_MAPPED))
+                               pr_err("%s: Unmapped slpt (iova 0x%lx)\n",
+                                      __func__, iova);
+
+                       slpt[i] ^= MASK_PTE_MAPPED;
+                       unmapped += SZ_4K;
+               }
+               flush_dcache_area(slpt, sizeof(*slpt) * num_pte);
+
+               if (kref_put(&srnpu_iommu_slpt_refcnts[vpn], release_slpt)) {
+                       kmem_cache_free(srnpu_iommu_slpt_cache, slpt_base);
+                       *pte ^= MASK_PTE_MAPPED;
+                       flush_dcache_area(pte, sizeof(*pte));
+               }
+       }
+
+       return unmapped;
+}
+
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(5, 4, 0))
+static size_t srnpu_iommu_unmap(struct iommu_domain *domain, unsigned long iova,
+                               size_t size)
+#else
+static size_t srnpu_iommu_unmap(struct iommu_domain *domain, unsigned long iova,
+                               size_t size, struct iommu_iotlb_gather *gather)
+#endif
+{
+       struct srnpu_iommu_domain *_domain = domain_to_srnpu_domain(domain);
+       unsigned long flags;
+       size_t unmapped = 0;
+
+       if (!_domain)
+               return 0;
+
+       spin_lock_irqsave(&_domain->lock, flags);
+       while (unmapped < size)
+               unmapped += srnpu_iommu_unmap_internal(iova + unmapped,
+                                                      size - unmapped);
+       spin_unlock_irqrestore(&_domain->lock, flags);
+
+       return unmapped;
+}
+
+static dma_addr_t srnpu_iommu_get_daddr(u32 first, u32 second)
+{
+       return (first << RSHFT_SIZE_TO_NUM_PTE) +
+              (second << RSHFT_SIZE_TO_NUM_LV2PTE);
+}
+
+static void srnpu_iommu_dump_page_table(struct device *dev)
+{
+       u32 i, *flpt_base = srnpu_iommu_flpt;
+
+       dev_warn(dev, "IOMMU PAGE TABLE DUMP");
+       for (i = 0; i < FLPT_NUM_PTES; i++) {
+               u32 ppn, pte = flpt_base[i];
+               if (!(pte & MASK_PTE_MAPPED))
+                       continue;
+
+               if (CHECK_BITS(pte, MASK_FLPTE_MAP_16M)) {
+                       ppn = (pte >> LSHFT_FLPTE_PPN_16M);
+                       dev_warn(
+                               dev,
+                               "\t[%u] DADDR (0x%08lx): 16M PTE(0x%08x) PADDR (0x%08x)",
+                               i, (unsigned long)srnpu_iommu_get_daddr(i, 0),
+                               pte, ppn << RSHFT_FLPTE_PPN_16M);
+               } else if (CHECK_BITS(pte, MASK_FLPTE_MAP_2M)) {
+                       ppn = (pte >> LSHFT_FLPTE_PPN_2M);
+                       dev_warn(
+                               dev,
+                               "\t[%u] DADDR (0x%08lx): 2M PTE(0x%08x) PADDR (0x%08x)",
+                               i, (unsigned long)srnpu_iommu_get_daddr(i, 0),
+                               pte, ppn << RSHFT_FLPTE_PPN_2M);
+               } else if (CHECK_BITS(pte, MASK_FLPTE_MAP_1M)) {
+                       ppn = (pte >> LSHFT_FLPTE_PPN_1M);
+                       dev_warn(
+                               dev,
+                               "\t[%u] DADDR (0x%08lx): 1M PTE(0x%08x) PADDR (0x%08x)",
+                               i, (unsigned long)srnpu_iommu_get_daddr(i, 0),
+                               pte, ppn << RSHFT_FLPTE_PPN_1M);
+               } else {
+                       u32 slpt_base = flpte_to_slpt_base(pte);
+                       u32 *slpt_base_vaddr = phys_to_virt(slpt_base);
+                       u32 j;
+
+                       dev_warn(
+                               dev,
+                               "\t[%u] DADDR (0x%08lx): 4K/64K PTE(0x%08x) SLPT_BASE (0x%08x)",
+                               i, (unsigned long)srnpu_iommu_get_daddr(i, 0),
+                               pte, slpt_base);
+
+                       for (j = 0; j < SLPT_NUM_PTES; j++) {
+                               pte = slpt_base_vaddr[j];
+                               if (!(pte & MASK_PTE_MAPPED))
+                                       continue;
+
+                               if (CHECK_BITS(pte, MASK_SLPTE_MAP_64K)) {
+                                       ppn = (pte >> LSHFT_SLPTE_PPN_64K);
+                                       dev_warn(
+                                               dev,
+                                               "\t\t[%u] DADDR (0x%08lx): 64K SLPT PTE (0x%08x) PADDR (0x%08x)",
+                                               j,
+                                               (unsigned long)
+                                                       srnpu_iommu_get_daddr(
+                                                               i, j),
+                                               pte,
+                                               ppn << RSHFT_SLPTE_PPN_64K);
+                               } else {
+                                       ppn = (pte >> LSHFT_SLPTE_PPN_4K);
+                                       dev_warn(
+                                               dev,
+                                               "\t\t[%u] DADDR (0x%08lx): 4K SLPT PTE (0x%08x) PADDR (0x%08x)",
+                                               j,
+                                               (unsigned long)
+                                                       srnpu_iommu_get_daddr(
+                                                               i, j),
+                                               pte, ppn << RSHFT_SLPTE_PPN_4K);
+                               }
+                       }
+               }
+       }
+}
+
+static int srnpu_iommu_fault_mmreg(struct device *dev, void __iomem *mmreg,
+                                  bool dla)
+{
+       const char *type = dla ? "DLA" : "DSP";
+       unsigned int ICH_FAULT, OCH_FAULT;
+       int ret = 0;
+
+       ICH_FAULT = ioread32(mmreg + OFFSET_IOMMU_ICH_FAULT_STATUS);
+       if (ICH_FAULT & FLPT_PAGE_FAULT) {
+               ret++;
+               dev_err(dev,
+                       "[%s][IN] First-level page table fault detected at 0x%x",
+                       type, ioread32(mmreg + OFFSET_IOMMU_ICH_FAULT_VA));
+       }
+       if (ICH_FAULT & SLPT_PAGE_FAULT) {
+               ret++;
+               dev_err(dev,
+                       "[%s][IN] Second-level page table fault detected at 0x%x",
+                       type, ioread32(mmreg + OFFSET_IOMMU_ICH_FAULT_VA));
+       }
+       if (ICH_FAULT & PTW_ACCESS_FAULT) {
+               ret++;
+               dev_err(dev, "[%s][IN] Page Table Walker (PTW) access fault",
+                       type);
+       }
+       if (ICH_FAULT & ATU_ACCESS_FAULT) {
+               ret++;
+               dev_err(dev,
+                       "[%s][IN] Address Translation Unit (PTU) access fault",
+                       type);
+       }
+
+       OCH_FAULT = ioread32(mmreg + OFFSET_IOMMU_OCH_FAULT_STATUS);
+       if (OCH_FAULT & FLPT_PAGE_FAULT) {
+               ret++;
+               dev_err(dev,
+                       "[%s][OUT] First-level page table fault detected at 0x%x",
+                       type, ioread32(mmreg + OFFSET_IOMMU_OCH_FAULT_VA));
+       }
+       if (OCH_FAULT & SLPT_PAGE_FAULT) {
+               ret++;
+               dev_err(dev,
+                       "[%s][OUT] Second-level page table fault detected at 0x%x",
+                       type, ioread32(mmreg + OFFSET_IOMMU_OCH_FAULT_VA));
+       }
+       if (OCH_FAULT & PTW_ACCESS_FAULT) {
+               ret++;
+               dev_err(dev, "[%s][OUT] Page Table Walker (PTW) access fault",
+                       type);
+       }
+       if (OCH_FAULT & ATU_ACCESS_FAULT) {
+               ret++;
+               dev_err(dev,
+                       "[%s][OUT] Address Translation Unit (PTU) access fault",
+                       type);
+       }
+
+       return ret;
+}
+
+static int srnpu_iommu_fault_handler(struct iommu_domain *domain,
+                                    struct device *dev, unsigned long iova,
+                                    int flags, void *token)
+{
+       struct srnpu_iommu *_iommu;
+       int verbose = flags;
+       int num_fault = 0;
+
+       if (!dev)
+               return -EINVAL;
+
+       _iommu = dev_to_srnpu_iommu(dev);
+       if (!_iommu)
+               return -EINVAL;
+
+       num_fault += srnpu_iommu_fault_mmreg(dev, _iommu->regbase_dla, true);
+       num_fault += srnpu_iommu_fault_mmreg(dev, _iommu->regbase_dsp, false);
+
+       if (verbose > 0 && num_fault > 0)
+               srnpu_iommu_dump_page_table(dev);
+
+       return num_fault;
+}
+
+static struct iommu_domain *srnpu_iommu_domain_alloc(unsigned type)
+{
+       struct srnpu_iommu_domain *_domain;
+
+#ifdef CONFIG_ARM64
+       if ((type != IOMMU_DOMAIN_UNMANAGED) && (type != IOMMU_DOMAIN_DMA))
+#else
+       if (type != IOMMU_DOMAIN_UNMANAGED)
+#endif
+               return NULL;
+
+       _domain = kzalloc(sizeof(*_domain), GFP_KERNEL);
+       if (!_domain)
+               return NULL;
+
+       spin_lock_init(&_domain->lock);
+
+#ifdef CONFIG_ARM64
+       if (type == IOMMU_DOMAIN_DMA && iommu_get_dma_cookie(&_domain->domain))
+               return NULL;
+#endif
+
+       _domain->flpt = srnpu_iommu_flpt;
+
+       iommu_set_fault_handler(&_domain->domain, srnpu_iommu_fault_handler,
+                               NULL);
+
+       return &_domain->domain;
+}
+
+static void srnpu_iommu_domain_free(struct iommu_domain *domain)
+{
+       struct srnpu_iommu_domain *_domain = domain_to_srnpu_domain(domain);
+
+       if (!_domain)
+               return;
+
+       kfree(_domain);
+}
+
+static int srnpu_iommu_attach_dev(struct iommu_domain *domain,
+                                 struct device *dev)
+{
+       struct srnpu_iommu *_iommu;
+
+       _iommu = dev_to_srnpu_iommu(dev);
+       if (!_iommu)
+               return -ENODEV;
+
+       srnpu_iommu_enable(_iommu);
+
+       return 0;
+}
+
+static void srnpu_iommu_detach_dev(struct iommu_domain *domain,
+                                  struct device *dev)
+{
+       struct srnpu_iommu *_iommu;
+
+       _iommu = dev_to_srnpu_iommu(dev);
+       if (!_iommu)
+               return;
+
+       srnpu_iommu_disable(_iommu);
+}
+
+#ifdef CONFIG_ARM64
+static struct iommu_group *srnpu_iommu_device_group(struct device *dev)
+{
+       struct srnpu_iommu_drvdata *_data;
+
+       _data = dev_to_srnpu_iommu_drvdata(dev);
+       if (!_data)
+               return ERR_PTR(-ENODEV);
+
+       if (!_data->group) {
+               _data->group = iommu_group_alloc();
+               if (IS_ERR(_data->group)) {
+                       dev_err(dev,
+                               "failed to allocate a srnpu IOMMU group\n");
+                       return _data->group;
+               }
+       } else {
+               iommu_group_ref_get(_data->group);
+       }
+
+       return _data->group;
+}
+#endif
+
+static struct iommu_ops srnpu_iommu_ops;
+static struct platform_driver srnpu_iommu_driver;
+
+static int srnpu_iommu_xlate(struct device *dev, struct of_phandle_args *spec)
+{
+       struct platform_device *pdev;
+       struct srnpu_iommu_drvdata *_data;
+       struct iommu_fwspec *fwspec;
+
+       pdev = of_find_device_by_node(spec->np);
+       if (!pdev)
+               return -ENODEV;
+
+       _data = platform_get_drvdata(pdev);
+       if (!_data)
+               return -ENODEV;
+
+#ifdef CONFIG_ARM
+       if (!_data->mapping) {
+               struct dma_iommu_mapping *mapping;
+
+               /* Support 2GB iova address space */
+               mapping = arm_iommu_create_mapping(&platform_bus_type, SZ_1G,
+                                                  SZ_2G);
+               if (IS_ERR(mapping)) {
+                       return PTR_ERR(mapping);
+               }
+
+               _data->mapping = mapping;
+       }
+#endif
+
+       fwspec = dev_to_iommu_fwspec(dev);
+       if (!fwspec) {
+               int err = iommu_fwspec_init(dev, &spec->np->fwnode,
+                                           &srnpu_iommu_ops);
+
+               if (err)
+                       return err;
+
+               fwspec = dev_to_iommu_fwspec(dev);
+       } else if (fwspec->ops != &srnpu_iommu_ops) {
+               return -EINVAL;
+       }
+
+       if (!dev_to_srnpu_iommu(dev)) {
+               struct srnpu_iommu *_iommu;
+
+               _iommu = kzalloc(sizeof(*_iommu), GFP_KERNEL);
+               if (!_iommu)
+                       return -ENOMEM;
+
+               _iommu->data = _data;
+               _iommu->dev = dev;
+
+               dev_set_srnpu_iommu(dev, _iommu);
+       }
+
+       return iommu_fwspec_add_ids(dev, spec->args, 1);
+}
+
+static void srnpu_iommu_setup_mmreg(struct device *dev)
+{
+       struct srnpu_iommu *_iommu;
+       struct device_node *np;
+       struct resource mmreg;
+       phys_addr_t paddr, base;
+
+       _iommu = dev_to_srnpu_iommu(dev);
+       if (!_iommu)
+               return;
+
+       paddr = virt_to_phys(srnpu_iommu_flpt);
+       np = dev->of_node;
+       if (of_address_to_resource(np, MMREG_CBOX_IDX, &mmreg) < 0) {
+               dev_err(dev, "Unable to find CBOX mmreg\n");
+               return;
+       }
+       base = mmreg.start;
+
+       mmreg.start = base + OFFSET_CBOX_IOMMU_DLA;
+       mmreg.end = mmreg.start + LENGTH_CBOX_IOMMU - 1;
+
+       _iommu->regbase_dla = devm_ioremap_resource(dev, &mmreg);
+       if (IS_ERR(_iommu->regbase_dla))
+               dev_err(dev, "Unable to ioremap DLA mmreg\n");
+
+       mmreg.start = base + OFFSET_CBOX_IOMMU_DSP;
+       mmreg.end = mmreg.start + LENGTH_CBOX_IOMMU - 1;
+
+       _iommu->regbase_dsp = devm_ioremap_resource(dev, &mmreg);
+       if (IS_ERR(_iommu->regbase_dsp))
+               dev_err(dev, "Unable to ioremap DSP mmreg\n");
+}
+
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(5, 8, 0))
+static int srnpu_iommu_add_device(struct device *dev)
+{
+       struct iommu_fwspec *fwspec = dev_to_iommu_fwspec(dev);
+       struct srnpu_iommu_drvdata *_data;
+       struct iommu_group *group;
+       int err;
+
+       if (!fwspec || fwspec->ops != &srnpu_iommu_ops)
+               return -ENODEV;
+
+       _data = dev_to_srnpu_iommu_drvdata(dev);
+       err = iommu_device_link(&_data->iommu, dev);
+       if (err < 0)
+               return -ENODEV;
+
+       srnpu_iommu_setup_mmreg(dev);
+
+#ifdef CONFIG_ARM
+       /* FIXME: ARM DMA code doesn't understand multi-device groups */
+       group = iommu_group_alloc();
+       if (IS_ERR(group)) {
+               err = PTR_ERR(group);
+               goto err_unlink;
+       }
+
+       err = iommu_group_add_device(group, dev);
+       iommu_group_put(group);
+       if (err)
+               goto err_unlink;
+
+       err = arm_iommu_attach_device(dev, _data->mapping);
+       if (err) {
+               iommu_group_remove_device(dev);
+               goto err_unlink;
+       }
+#else
+       group = iommu_group_get_for_dev(dev);
+       if (IS_ERR(group)) {
+               err = PTR_ERR(group);
+               goto err_unlink;
+       }
+       iommu_group_put(group);
+#endif
+       return 0;
+
+err_unlink:
+       iommu_device_unlink(&_data->iommu, dev);
+       return err;
+}
+
+static void srnpu_iommu_remove_device(struct device *dev)
+{
+       struct iommu_fwspec *fwspec = dev_to_iommu_fwspec(dev);
+       struct srnpu_iommu_drvdata *_data;
+
+       if (!fwspec || fwspec->ops != &srnpu_iommu_ops)
+               return;
+
+       _data = dev_to_srnpu_iommu_drvdata(dev);
+       iommu_device_unlink(&_data->iommu, dev);
+       iommu_group_remove_device(dev);
+       iommu_fwspec_free(dev);
+}
+#else
+static struct iommu_device *srnpu_iommu_probe_device(struct device *dev)
+{
+       struct iommu_fwspec *fwspec = dev_to_iommu_fwspec(dev);
+       struct srnpu_iommu_drvdata *_data;
+
+       if (!fwspec || fwspec->ops != &srnpu_iommu_ops)
+               return ERR_PTR(-ENODEV);
+
+       _data = dev_to_srnpu_iommu_drvdata(dev);
+       if (!_data)
+               return ERR_PTR(-EINVAL);
+
+       srnpu_iommu_setup_mmreg(dev);
+       return &_data->iommu;
+}
+
+#ifdef CONFIG_ARM
+static void srnpu_iommu_probe_finalize(struct device *dev)
+{
+       struct srnpu_iommu_drvdata *_data;
+       struct dma_iommu_mapping *mapping;
+
+       _data = dev_to_srnpu_iommu_drvdata(dev);
+       if (!_data || !_data->mapping)
+               return;
+
+       mapping = _data->mapping;
+       if (!arm_iommu_attach_device(dev, mapping))
+               dev_err(dev, "Unable to attach iommu mapping to the device");
+}
+#endif
+
+static void srnpu_iommu_release_device(struct device *dev)
+{
+       struct iommu_fwspec *fwspec = dev_to_iommu_fwspec(dev);
+
+       if (!fwspec || fwspec->ops != &srnpu_iommu_ops)
+               return;
+
+       iommu_fwspec_free(dev);
+}
+
+#ifdef CONFIG_ARM
+static int srnpu_iommu_def_domain_type(struct device *dev)
+{
+       return IOMMU_DOMAIN_UNMANAGED;
+}
+#endif
+#endif
+
+static phys_addr_t srnpu_iommu_iova_to_phys(struct iommu_domain *domain,
+                                           dma_addr_t iova)
+{
+       struct srnpu_iommu_domain *_domain;
+       phys_addr_t paddr = 0;
+       uint32_t vpn;
+       uint32_t *pte;
+
+       _domain = domain_to_srnpu_domain(domain);
+       if (!_domain)
+               return 0;
+
+       vpn = iova_to_vpn(iova);
+       pte = &_domain->flpt[vpn];
+
+       if (!pte || !(*pte & MASK_PTE_MAPPED))
+               return 0;
+
+       if (CHECK_BITS(*pte, MASK_FLPTE_MAP_16M) ||
+           CHECK_BITS(*pte, MASK_FLPTE_MAP_2M) ||
+           CHECK_BITS(*pte, MASK_FLPTE_MAP_1M)) {
+               paddr |= *pte;
+               if (CHECK_BITS(*pte, MASK_FLPTE_MAP_16M)) {
+                       paddr >>= LSHFT_FLPTE_PPN_16M;
+                       paddr <<= RSHFT_FLPTE_PPN_16M;
+                       paddr |= (iova & MASK_IOVA_TO_PAGEOFFSET_16M);
+               } else if (CHECK_BITS(*pte, MASK_FLPTE_MAP_2M)) {
+                       paddr >>= LSHFT_FLPTE_PPN_2M;
+                       paddr <<= RSHFT_FLPTE_PPN_2M;
+                       paddr |= (iova & MASK_IOVA_TO_PAGEOFFSET_2M);
+               } else {
+                       paddr >>= LSHFT_FLPTE_PPN_1M;
+                       paddr <<= RSHFT_FLPTE_PPN_1M;
+                       paddr |= (iova & MASK_IOVA_TO_PAGEOFFSET_1M);
+               }
+       } else {
+               uint32_t *slpt_base = phys_to_virt(flpte_to_slpt_base(*pte));
+               uint32_t lv2vpn = iova_to_lv2vpn(iova);
+
+               paddr |= slpt_base[lv2vpn];
+               if (CHECK_BITS(*pte, MASK_SLPTE_MAP_64K)) {
+                       paddr >>= LSHFT_SLPTE_PPN_64K;
+                       paddr <<= RSHFT_SLPTE_PPN_64K;
+                       paddr &= MASK_SLPTE_PPN_MASK_64K;
+                       paddr |= (iova & MASK_IOVA_TO_PAGEOFFSET_64K);
+               } else {
+                       paddr >>= LSHFT_SLPTE_PPN_4K;
+                       paddr <<= RSHFT_SLPTE_PPN_4K;
+                       paddr &= MASK_SLPTE_PPN_MASK_4K;
+                       paddr |= (iova & MASK_IOVA_TO_PAGEOFFSET_4K);
+               }
+       }
+
+       return paddr;
+}
+
+static void srnpu_iommu_get_resv_regions(struct device *dev,
+                                        struct list_head *head)
+{
+       struct srnpu_iommu_drvdata *_data;
+       struct iommu_resv_region *region;
+
+       _data = dev_to_srnpu_iommu_drvdata(dev);
+       region = iommu_alloc_resv_region(_data->resv.paddr, _data->resv.size,
+                                        IOMMU_WRITE | IOMMU_READ,
+                                        IOMMU_RESV_DIRECT);
+       if (!region)
+               return;
+
+       list_add_tail(&region->list, head);
+}
+
+static void srnpu_iommu_put_resv_regions(struct device *dev,
+                                        struct list_head *head)
+{
+       struct iommu_resv_region *entry, *next;
+
+       list_for_each_entry_safe (entry, next, head, list)
+               kfree(entry);
+}
+
+static struct iommu_ops srnpu_iommu_ops = {
+       .domain_alloc = srnpu_iommu_domain_alloc,
+       .domain_free = srnpu_iommu_domain_free,
+       .attach_dev = srnpu_iommu_attach_dev,
+       .detach_dev = srnpu_iommu_detach_dev,
+       .map = srnpu_iommu_map,
+       .unmap = srnpu_iommu_unmap,
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 19, 0))
+       .map_sg = default_iommu_map_sg,
+#endif
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(5, 8, 0))
+       .add_device = srnpu_iommu_add_device,
+       .remove_device = srnpu_iommu_remove_device,
+#else
+       .probe_device = srnpu_iommu_probe_device,
+       .release_device = srnpu_iommu_release_device,
+#ifdef CONFIG_ARM
+       .probe_finalize = srnpu_iommu_probe_finalize,
+       .def_domain_type = srnpu_iommu_def_domain_type,
+#endif
+#endif
+#ifdef CONFIG_ARM64
+       .device_group = srnpu_iommu_device_group,
+#endif
+       .iova_to_phys = srnpu_iommu_iova_to_phys,
+       .of_xlate = srnpu_iommu_xlate,
+       .get_resv_regions = srnpu_iommu_get_resv_regions,
+       .put_resv_regions = srnpu_iommu_put_resv_regions,
+       .pgsize_bitmap = SRNPU_IOMMU_PGSIZE_BITMAP,
+};
+
+static void srnpu_iommu_set_resv(struct srnpu_iommu_drvdata *_data)
+{
+       struct device_node *np;
+
+       np = of_find_node_by_name(NULL, "sdp_mmap");
+       if (np) {
+               u32 info[3];
+
+               if (of_property_read_u32_array(np, "samsung,npu", info, 3) < 0)
+                       return;
+
+               _data->resv.paddr = info[1];
+#ifdef CONFIG_PHYS_ADDR_T_64BIT
+               _data->resv.paddr += ((u64)info[0]) << 32;
+#endif
+               _data->resv.size = info[2];
+       } else {
+               struct resource res;
+
+               np = of_find_node_by_name(NULL, "samsung,npu");
+               if (!np)
+                       return;
+
+               if (of_address_to_resource(np, 0, &res) < 0)
+                       return;
+
+               _data->resv.paddr = res.start;
+               _data->resv.size = resource_size(&res);
+       }
+}
+
+static int srnpu_iommu_probe(struct platform_device *pdev)
+{
+       struct srnpu_iommu_drvdata *_data;
+       struct device *dev = &pdev->dev;
+       struct device_node *of = dev->of_node;
+       int err;
+
+       _data = devm_kzalloc(dev, sizeof(*_data), GFP_KERNEL);
+       if (!_data)
+               return -ENOMEM;
+
+       _data->dev = dev;
+
+       err = iommu_device_sysfs_add(&_data->iommu, dev, NULL, dev_name(dev));
+       if (err)
+               return err;
+
+       iommu_device_set_ops(&_data->iommu, &srnpu_iommu_ops);
+       iommu_device_set_fwnode(&_data->iommu, &of->fwnode);
+
+       err = iommu_device_register(&_data->iommu);
+       if (err)
+               goto err_sysfs;
+
+       platform_set_drvdata(pdev, _data);
+       srnpu_iommu_set_resv(_data);
+
+       err = bus_set_iommu(&platform_bus_type, &srnpu_iommu_ops);
+       if (err)
+               dev_warn(dev,
+                        "failed to set iommu-callbacks for the platform_bus");
+
+       dev_info(dev, "probed\n");
+
+       return 0;
+err_sysfs:
+       iommu_device_sysfs_remove(&_data->iommu);
+
+       return err;
+}
+
+static int srnpu_iommu_remove(struct platform_device *pdev)
+{
+       struct srnpu_iommu_drvdata *_data = platform_get_drvdata(pdev);
+
+       if (!_data)
+               return 0;
+
+       iommu_device_sysfs_remove(&_data->iommu);
+       iommu_device_unregister(&_data->iommu);
+
+       if (iommu_present(&platform_bus_type))
+               bus_set_iommu(&platform_bus_type, NULL);
+
+       dev_info(&pdev->dev, "removed\n");
+
+       return 0;
+}
+
+static const struct of_device_id srnpu_iommu_of_match[] = {
+       { .compatible = "samsung,srnpu-iommu" },
+       { /* sentinel */ }
+};
+
+static struct platform_driver srnpu_iommu_driver = {
+       .probe = srnpu_iommu_probe,
+       .remove = srnpu_iommu_remove,
+       .driver =
+               {
+                       .name = "srnpu_iommu",
+                       .of_match_table = of_match_ptr(srnpu_iommu_of_match),
+               },
+};
+
+static int __init srnpu_iommu_init(void)
+{
+       struct device_node *np;
+       int ret;
+
+       np = of_find_matching_node(NULL, srnpu_iommu_of_match);
+       if (!np)
+               return 0;
+       of_node_put(np);
+
+       /* Base address of FLPT must be aligned in 16 KB. */
+       srnpu_iommu_flpt = alloc_pages_exact(FLPT_SIZE, GFP_KERNEL);
+       if ((!srnpu_iommu_flpt) ||
+           (!IS_ALIGNED(virt_to_phys(srnpu_iommu_flpt), SZ_16K)))
+               return -ENOMEM;
+       memset(srnpu_iommu_flpt, 0x0, FLPT_SIZE);
+
+       /* Base address of SLPT must be aligned in 1 KB. */
+       srnpu_iommu_slpt_cache =
+               kmem_cache_create("srnpu_iommu_slpt_cache", SLPT_SIZE, BIT(10),
+                                 SLAB_HWCACHE_ALIGN, NULL);
+       if (!srnpu_iommu_slpt_cache) {
+               ret = -ENOMEM;
+               goto err_cleanup_flpt;
+       }
+
+       return platform_driver_register(&srnpu_iommu_driver);
+
+err_cleanup_flpt:
+       free_pages_exact(srnpu_iommu_flpt, FLPT_SIZE);
+       srnpu_iommu_flpt = NULL;
+
+       return ret;
+}
+subsys_initcall(srnpu_iommu_init);
index ffcace8..0ee808b 100644 (file)
@@ -511,4 +511,5 @@ source "drivers/misc/cardreader/Kconfig"
 source "drivers/misc/habanalabs/Kconfig"
 source "drivers/misc/uacce/Kconfig"
 source "drivers/misc/pvpanic/Kconfig"
+source "drivers/misc/trinity/Kconfig"
 endmenu
index 2583008..cd2fc63 100644 (file)
@@ -61,3 +61,4 @@ obj-$(CONFIG_XILINX_SDFEC)    += xilinx_sdfec.o
 obj-$(CONFIG_HISI_HIKEY_USB)   += hisi_hikey_usb.o
 obj-$(CONFIG_HI6421V600_IRQ)   += hi6421v600-irq.o
 obj-$(CONFIG_TIZEN_INFORM_REBOOT)      += tizen-inform-reboot.o
+obj-$(CONFIG_TRINITY)          += trinity/
diff --git a/drivers/misc/trinity/Kconfig b/drivers/misc/trinity/Kconfig
new file mode 100644 (file)
index 0000000..f4a724e
--- /dev/null
@@ -0,0 +1,98 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+config TRINITY
+       bool "SR Neural Processing Unit Driver"
+       default n
+       help
+               Select this option to enable driver support for Samsung
+               Research (SR) Neural Processing Unit (NPU). This driver works
+               as a base driver of the other drivers for Trinity device family
+               so that this option should be enabled to support Trinity
+               Vision (TRIV), Trinity Vision 2 (TRIV2), and Trinity Audio
+               (TRIA).
+
+config TRINITY_HWMEM
+       bool "Trinity DMA Buffer Manager"
+       depends on TRINITY
+       default n
+       help
+               Choose this option to enable the Trinity DMA buffer manager,
+               used by Trinity device drivers to allocate DMA buffers. This
+               enables userspace programs to allocate DMA buffers via the
+               Trinity device nodes such as /dev/triv-N and /dev/triv2-N.
+
+config TRINITY_SCHED
+       bool "Trinity Task Scheduler"
+       depends on TRINITY
+       default n
+       help
+               Choose this option to enable the Trinity Task Scheduler.
+
+config TRINITY_SCHED_SR
+       bool "Trinity Task Schduler by Samsung Research"
+       depends on TRINITY_SCHED
+       default n
+       help
+               Choose this option to enable SR Task Scheduler via the
+               Trinity Task Scheduler.
+
+config TRINITY_VISION2
+       tristate "SR Tinity Vision 2 Driver"
+       depends on TRINITY && TRINITY_HWMEM
+       default n
+       help
+               Select this option to enable driver support for a Samsung
+               Research (SR) Neural Processing Unit (NPU), Tinity Vision 2.
+               This driver enables userspace system library to access the
+               device via /dev/triv2-N.
+
+config TRINITY_SYSFS
+       bool "Trinity SYSFS support"
+       depends on TRINITY && SYSFS
+       default n
+       help
+               Choose this option to enable the Trinity SYSFS interface.
+
+config TRINITY_DEBUG
+       bool "Debugging capability for trinity drivers including debugfs"
+       depends on TRINITY && DEBUG_FS
+       default n
+       help
+               Select this option to enable debugging support for a Samsung
+               Research (SR) Neural Processing Unit (NPU).
+
+config TRINITY_MONITOR
+       bool "Device status monitor for trinity drivers"
+       depends on TRINITY
+       default n
+       help
+               Select this option to enable device monitor support for a Samsung
+               Research (SR) Neural Processing Unit (NPU).
+
+config TRINITY_FPGA
+       bool "Enable some workaround codes for FPGA envionment"
+       depends on TRINITY_VISION2 && TRINITY_HWMEM
+       default n
+       help
+               Select this option to enable FPGA-workaround codes.
+
+config TRINITY_FPGA_DEBUG
+       bool "Enable debugging mode for FPGA envionment"
+       depends on TRINITY_FPGA
+       default n
+       help
+               Select this option to enable FPGA debugging mode.
+
+choice
+       prompt "Select supported platform"
+       depends on TRINITY
+       default TRINITY_SUPPORT_FVP
+
+config TRINITY_SUPPORT_FVP
+       bool "Arm FVP support"
+
+config TRINITY_SUPPORT_FPGA
+       depends on TRINITY_FGPA
+       bool "Xilinx FPGA support"
+
+endchoice
diff --git a/drivers/misc/trinity/Makefile b/drivers/misc/trinity/Makefile
new file mode 100644 (file)
index 0000000..a1b5531
--- /dev/null
@@ -0,0 +1,17 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+obj-$(CONFIG_TRINITY_VISION2) += trinity_vision2.o
+
+CFLAGS_trinity_trace.o += -I$(src)
+trinity-y                      := trinity.o trinity_vision2_drv.o trinity_stat.o
+trinity-$(CONFIG_TRINITY_SCHED)        += trinity_sched.o
+trinity-$(CONFIG_TRINITY_SCHED_SR)     += trinity_sched_sr.o
+trinity-$(CONFIG_TRINITY_HWMEM)        += trinity_hwmem.o trinity_resv_mem.o
+trinity-$(CONFIG_TRINITY_FPGA) += trinity_hwmem_iommu_helper.o
+trinity-$(CONFIG_TRINITY_SYSFS)        += trinity_sysfs.o
+trinity-$(CONFIG_TRINITY_DEBUG)        += trinity_debug.o trinity_trace.o
+trinity-$(CONFIG_TRINITY_MONITOR)      += trinity_monitor.o
+trinity-$(CONFIG_TRINITY_SUPPORT_FVP)  += support/arm_fvp.o
+trinity-$(CONFIG_TRINITY_SUPPORT_FPGA) += support/xilinx_fpga.o
+
+trinity_vision2-objs := $(trinity-y)
diff --git a/drivers/misc/trinity/support/arm_fvp.c b/drivers/misc/trinity/support/arm_fvp.c
new file mode 100644 (file)
index 0000000..78a68b1
--- /dev/null
@@ -0,0 +1,70 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include "../trinity_common.h"
+#include <linux/of_address.h>
+#include <linux/pm_runtime.h>
+
+int trinity_get_dma_memory(struct device *dev, phys_addr_t *paddr,
+                          dma_addr_t *daddr, size_t *size)
+{
+       struct device_node *np;
+       struct resource res;
+       int err;
+
+       if (!dev || !paddr || !daddr || !size)
+               return -EINVAL;
+
+       np = of_find_node_by_name(NULL, "samsung,npu");
+       if (!np)
+               return -ENOENT;
+
+       err = of_address_to_resource(np, 0, &res);
+       if (err < 0)
+               return err;
+
+       *paddr = res.start;
+       *daddr = *paddr; /* direct mapping */
+       *size = resource_size(&res);
+
+       dev_info(dev, "Detected DMA memory region: %lx-%lx",
+                (unsigned long)*paddr, (unsigned long)(*paddr + *size));
+       return 0;
+}
+
+int trinity_get_extern_memory(struct device *dev, phys_addr_t *paddr,
+                             dma_addr_t *daddr, size_t *size)
+{
+       return -ENOENT;
+}
+
+int trinity_pm_runtime_init(struct device *dev)
+{
+       return 0;
+}
+
+int trinity_pm_runtime_forbid(struct device *dev)
+{
+       pm_runtime_forbid(dev);
+       return 0;
+}
+
+void trinity_pm_runtime_allow(struct device *dev)
+{
+       pm_runtime_allow(dev);
+}
+
+void trinity_pm_runtime_attach(struct trinity_driver *drv)
+{
+}
+
+void trinity_reset_device(struct device *dev, bool do_test)
+{
+}
+
+void trinity_set_irq_affinity(int irq)
+{
+}
+
+void trinity_monitor_invalid_access(void)
+{
+}
diff --git a/drivers/misc/trinity/support/xilinx_fpga.c b/drivers/misc/trinity/support/xilinx_fpga.c
new file mode 100644 (file)
index 0000000..66ab84e
--- /dev/null
@@ -0,0 +1,69 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include "../trinity_common.h"
+#include <linux/pm_runtime.h>
+
+int trinity_get_dma_memory(struct device *dev, phys_addr_t *paddr,
+                          dma_addr_t *daddr, size_t *size)
+{
+       struct device_node *np;
+       u64 info[3];
+       int err;
+
+       if (!dev || !paddr || !daddr || !size)
+               return -EINVAL;
+
+       np = of_find_node_by_name(NULL, "trinity");
+       if (!np)
+               return -ENOENT;
+
+       err = of_property_read_u64_array(np, "samsung,dma", info, 3);
+       if (err < 0)
+               return err;
+
+       *paddr = info[0];
+       *daddr = info[1];
+       *size = info[2];
+
+       dev_info(dev, "Detected DMA memory region: %lx-%lx",
+                (unsigned long)*paddr, (unsigned long)(*paddr + *size));
+       return 0;
+}
+
+int trinity_get_extern_memory(struct device *dev, phys_addr_t *paddr,
+                             dma_addr_t *daddr, size_t *size)
+{
+       return -ENOENT;
+}
+
+int trinity_pm_runtime_init(struct trinity_driver *drv)
+{
+       return 0;
+}
+
+int trinity_pm_runtime_forbid(struct device *dev)
+{
+       pm_runtime_forbid(dev);
+       return 0;
+}
+
+void trinity_pm_runtime_allow(struct device *dev)
+{
+       pm_runtime_allow(dev);
+}
+
+void trinity_pm_runtime_attach(struct trinity_driver *drv)
+{
+}
+
+void trinity_reset_device(struct device *dev, bool do_test)
+{
+}
+
+void trinity_set_irq_affinity(int irq)
+{
+}
+
+void trinity_monitor_invalid_access(void)
+{
+}
diff --git a/drivers/misc/trinity/trinity.c b/drivers/misc/trinity/trinity.c
new file mode 100644 (file)
index 0000000..5abb94e
--- /dev/null
@@ -0,0 +1,1328 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Base device driver for Samsung Research Trinity device family support
+ *
+ * Copyright (C) 2020 Samsung Electronics
+ * Copyright (C) 2020 Wook Song <wook16.song@samsung.com>
+ * Copyright (C) 2020 Dongju Chae <dongju.chae@samsung.com>
+ */
+
+#include <asm/cacheflush.h>
+#include <linux/bitmap.h>
+#include <linux/device.h>
+#include <linux/dma-buf.h>
+#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/init.h>
+#include <linux/iommu.h>
+#include <linux/kernel.h>
+#include <linux/kobject.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/of_device.h>
+#include <linux/of_iommu.h>
+#include <linux/of_irq.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+#include <linux/stddef.h>
+#include <linux/uaccess.h>
+
+#include "trinity_common.h"
+#include "trinity_monitor.h"
+#include "trinity_resv_mem.h"
+#include "trinity_stat.h"
+#include "trinity_trace.h"
+
+#define BASE_DEV_NAME "trinity"
+
+#ifndef TRUE
+#define TRUE 1
+#endif /* TRUE */
+
+#ifndef FALSE
+#define FALSE 0
+#endif /* TRUE */
+
+#ifdef CONFIG_TRINITY_FPGA
+#define TRINITY_PADDR_BASE (0x400000000)
+#else
+#define TRINITY_PADDR_BASE (0x0)
+#endif
+
+#define TRINITY_MODEL_HASH_BITS 10
+#define TRINITY_MODEL_HASH_SIZE (1 << TRINITY_MODEL_HASH_BITS)
+
+#define TRINITY_DEVVER(drv) (drv->desc->ver >> TRINITY_SHIFT_DEV)
+
+static struct hlist_bl_head trinity_model_node_hlist[TRINITY_MODEL_HASH_SIZE];
+
+/* A global lock for shared static variables such as dev_bitmap */
+static DEFINE_SPINLOCK(trinity_lock);
+
+/* A bitmap to keep track of active Trinity devices */
+static unsigned long dev_bitmap[TRINITY_DEV_END];
+
+static void trinity_model_get(struct trinity_model *model);
+static void trinity_model_put(struct trinity_model *model);
+
+phys_addr_t trinity_get_paddr(struct iommu_domain *domain, dma_addr_t daddr)
+{
+       if (domain)
+               return iommu_iova_to_phys(domain, daddr);
+
+       return TRINITY_PADDR_BASE + daddr;
+}
+
+void trinity_finish_req(struct trinity_driver *drv, struct trinity_req *req)
+{
+       if (drv->desc->check_profile(drv, req) < 0)
+               dev_warn(drv_to_dev_ptr(drv),
+                        "Unable to get profile data from NPU\n");
+       trinity_hwmem_import_dmabuf_end(&req->input.import_info);
+       trinity_stat_finish_req(drv, req);
+       trinity_model_put(req->model);
+}
+
+static uint64_t trinity_gen_model_id(int32_t dbuf_fd)
+{
+       static uint32_t id;
+       uint64_t mid = 0;
+
+       spin_lock(&trinity_lock);
+       if (++id >= (1 << TRINITY_SHIFT_MODEL_ID))
+               id = 0;
+       mid = id;
+       spin_unlock(&trinity_lock);
+
+       mid |= (dbuf_fd << TRINITY_SHIFT_MODEL_ID);
+
+       return mid;
+}
+
+static int32_t trinity_model_id_to_dbuf_fd(uint64_t id)
+{
+       return (id >> TRINITY_SHIFT_MODEL_ID) & UINT_MAX;
+}
+
+static void trinity_model_htable_init(void)
+{
+       int i;
+
+       for (i = 0; i < TRINITY_MODEL_HASH_SIZE; ++i)
+               INIT_HLIST_BL_HEAD(&trinity_model_node_hlist[i]);
+}
+
+void trinity_init_model_htable(const struct trinity_driver *drv,
+                              struct trinity_model_htable *ht)
+{
+       ht->ht_heads = trinity_model_node_hlist;
+       ht->hash_size = TRINITY_MODEL_HASH_SIZE;
+       ht->hash_bits = TRINITY_MODEL_HASH_BITS;
+}
+
+static struct trinity_model *
+trinity_get_model_by_id(const struct trinity_driver *drv, const uint64_t id)
+{
+       struct trinity_model_htable ht;
+       struct hlist_bl_node *hn;
+       struct trinity_model *hm;
+       unsigned long key;
+       int32_t dbuf_fd;
+       bool found = false;
+
+       trinity_init_model_htable(drv, &ht);
+
+       dbuf_fd = trinity_model_id_to_dbuf_fd(id);
+       key = hash_long(dbuf_fd, ht.hash_bits);
+       hm = NULL;
+
+       hlist_bl_lock(&(ht.ht_heads[key]));
+       hlist_bl_for_each_entry (hm, hn, &(ht.ht_heads[key]), hnode) {
+               if (hm->config.id == id) {
+                       found = true;
+                       break;
+               }
+       }
+       hlist_bl_unlock(&(ht.ht_heads[key]));
+
+       return found ? hm : NULL;
+}
+
+/**
+ * trinity_register_model - Registers a model to the internal hashtable. Note
+ *             that the device is responsible for the hashtable maintainance.
+ *
+ * @drv: An instance of the trinity driver
+ * @model: Model information to be registered
+ *
+ * Returns 0 and sets model->id with a valid value, which is unique system-wide,
+ * on success. Ohterwise, returns negative error.
+ */
+int32_t trinity_register_model(struct trinity_driver *drv,
+                              struct trinity_model *model)
+{
+       struct trinity_model_htable ht;
+       unsigned long key;
+       int32_t ret;
+
+       ret = trinity_hwmem_import_dmabuf_begin(drv_to_dev_ptr(drv),
+                                               model->config.dbuf_fd,
+                                               &model->import_info);
+       if (ret)
+               return ret;
+
+#ifdef ARM
+       /* sync model program data */
+       __cpuc_flush_dcache_area(model->import_info.addr,
+                                model->import_info.buf->size);
+#endif
+
+       model->config.id = trinity_gen_model_id(model->config.dbuf_fd);
+       model->owner_id = trinity_get_app_id();
+
+       INIT_HLIST_BL_NODE(&model->hnode);
+
+       trinity_init_model_htable(drv, &ht);
+
+       key = hash_long(model->config.dbuf_fd, ht.hash_bits);
+
+       hlist_bl_lock(&(ht.ht_heads[key]));
+       hlist_bl_add_head(&model->hnode, &ht.ht_heads[key]);
+       hlist_bl_unlock(&(ht.ht_heads[key]));
+
+       kref_init(&model->refcnt);
+
+       return 0;
+}
+
+static void trinity_destroy_model(struct kref *refcnt)
+{
+       struct trinity_model *model =
+               container_of(refcnt, struct trinity_model, refcnt);
+
+       trinity_hwmem_import_dmabuf_end(&model->import_info);
+       kfree(model);
+}
+
+static void trinity_model_get(struct trinity_model *model)
+{
+       if (!model)
+               return;
+
+       kref_get(&model->refcnt);
+}
+
+static void trinity_model_put(struct trinity_model *model)
+{
+       if (!model)
+               return;
+
+       kref_put(&model->refcnt, trinity_destroy_model);
+}
+
+/**
+ * trinity_deregister_model - Deregisters the model with a given id from the
+ *             table
+ *
+ * @drv: An instance of the trinity driver
+ * @id: An id of the model to be deregistered
+ *
+ * Returns 0 on success. Ohterwise, returns negative error.
+ */
+int32_t trinity_deregister_model(const struct trinity_driver *drv,
+                                const uint64_t id)
+{
+       int32_t dbuf_fd = trinity_model_id_to_dbuf_fd(id);
+       struct trinity_model_htable ht;
+       unsigned long key;
+       struct hlist_bl_node *hn;
+       struct trinity_model *hm = NULL;
+
+       trinity_init_model_htable(drv, &ht);
+
+       key = hash_long(dbuf_fd, ht.hash_bits);
+       hlist_bl_lock(&(ht.ht_heads[key]));
+       hlist_bl_for_each_entry (hm, hn, &(ht.ht_heads[key]), hnode) {
+               if (hm->config.id == id) {
+                       hlist_bl_del_init(&hm->hnode);
+                       break;
+               }
+       }
+       hlist_bl_unlock(&(ht.ht_heads[key]));
+
+       if (!hm)
+               return -ENOENT;
+
+       trinity_model_put(hm);
+
+       return 0;
+}
+
+/**
+ * trinity_deregister_models_owned - Deregisters models owned
+ *
+ * @drv: An instance of the trinity driver
+ */
+void trinity_deregister_models_owned(struct trinity_driver *drv)
+{
+       struct trinity_model_htable ht;
+       struct trinity_model *hm;
+       struct hlist_bl_node *hn;
+       int i = 0, app_id = trinity_get_app_id();
+
+       trinity_init_model_htable(drv, &ht);
+
+retry:
+       for (; i < TRINITY_MODEL_HASH_SIZE; i++) {
+               hlist_bl_lock(&(ht.ht_heads[i]));
+               hlist_bl_for_each_entry (hm, hn, &(ht.ht_heads[i]), hnode) {
+                       if (hm->owner_id == app_id) {
+                               hlist_bl_del_init(&hm->hnode);
+                               hlist_bl_unlock(&(ht.ht_heads[i]));
+
+                               trinity_model_put(hm);
+
+                               goto retry;
+                       }
+               }
+               hlist_bl_unlock(&(ht.ht_heads[i]));
+       }
+}
+
+struct trinity_sched_desc *get_trinity_sched(struct trinity_req *req)
+{
+       struct trinity_sched_desc *sched;
+
+       if (req->input.config.task_handle != UINT_MAX)
+               sched = trinity_sched_find(SCHED_VD);
+       else
+               sched = trinity_sched_find(SCHED_SR);
+
+       return sched;
+}
+
+static int32_t trinity_submit_req(struct trinity_driver *drv,
+                                 struct trinity_req *req)
+{
+       struct trinity_sched_desc *sched;
+       struct device *dev;
+       wait_queue_head_t wq;
+       unsigned long timeout, timeout_ms;
+       unsigned long retry, max_retry = 10;
+       int ret = 0;
+
+       dev = drv_to_dev_ptr(drv);
+       sched = get_trinity_sched(req);
+       if (!sched) {
+               dev_err(dev, "Unable to find the target scheduler");
+               return -EINVAL;
+       }
+
+       /* optional req setup before submission */
+       if (drv->desc->prepare_req) {
+               ret = drv->desc->prepare_req(drv, req);
+               if (ret < 0) {
+                       dev_err(dev, "Unable to prepare req submission: %d",
+                               ret);
+                       return ret;
+               }
+       }
+
+       req->submit_retry = 0;
+       timeout_ms = req->input.config.timeout_ms;
+       /* use the default timeout if an user didn't set */
+       if (timeout_ms == 0)
+               timeout_ms = TRINITY_RUN_TIMEOUT_MSEC;
+
+       retry = 0;
+       init_waitqueue_head(&wq);
+       init_completion(&req->complete);
+
+       timeout = msecs_to_jiffies(timeout_ms);
+       while (wait_event_interruptible_timeout(wq, sched->ready(),
+                                               timeout / 10) == 0) {
+               if (retry == max_retry) {
+                       ret = -ETIMEDOUT;
+                       break;
+               }
+               retry++;
+       }
+
+       if (ret == 0) {
+               ret = trinity_stat_append_req(drv, req);
+               if (ret < 0) {
+                       dev_err(dev, "Unable to append request stat: %d", ret);
+                       return ret;
+               }
+
+               ret = sched->submit(req);
+               if (ret < 0)
+                       trinity_stat_remove_req(drv, req, true);
+       }
+
+       if (ret < 0) {
+               dev_err(dev, "Unable to submit req to scheduler: %d", ret);
+               return ret;
+       }
+
+       if (req->input.config.output_mode != TRINITY_OUTPUT_HW) {
+               timeout = wait_for_completion_timeout(&req->complete, timeout);
+               /* Check and handle the timeout if its handler exists */
+               if (timeout == 0) {
+                       bool canceled = false;
+
+                       dev_warn(dev, "The request timeout reached: %lu ms",
+                                timeout_ms);
+
+                       if (sched->cancel) {
+                               canceled = sched->cancel(req);
+                               if (!canceled)
+                                       dev_warn(dev, "Unable to cancel req");
+                       }
+
+                       if (!canceled)
+                               drv->desc->handle_timeout(drv, req);
+
+                       req->stat->status = TRINITY_REQ_STATUS_ERROR;
+                       ret = -ECANCELED;
+               } else if (req->stat->status == TRINITY_REQ_STATUS_ERROR) {
+                       ret = -ECANCELED;
+               } else if (drv->verbose) {
+                       dev_info(dev,
+                                "Execution Cycles: %u, Elapsed Time (us): %u",
+                                req->stat->prev_cycles, req->stat->prev_time);
+               }
+               trinity_finish_req(drv, req);
+       }
+
+       return ret;
+}
+
+static int32_t trinity_run_input(struct trinity_driver *drv,
+                                struct trinity_input *input,
+                                struct trinity_req *req)
+{
+       struct trinity_model *model;
+       int32_t err;
+
+       model = trinity_get_model_by_id(drv, input->config.model_id);
+       if (!model) {
+               dev_info(drv_to_dev_ptr(drv), "Unable to find the model");
+               return -EINVAL;
+       }
+
+       /* skip to submit this req */
+       if (model->config.program_size == 0 &&
+           input->config.output_mode != TRINITY_OUTPUT_HW)
+               return 0;
+
+       trinity_model_get(model);
+
+       err = trinity_hwmem_import_dmabuf_begin(drv_to_dev_ptr(drv),
+                                               input->config.dbuf_fd,
+                                               &input->import_info);
+       if (err < 0)
+               return err;
+
+       req->model = model;
+       err = trinity_submit_req(drv, req);
+       if (err == 0)
+               return 0;
+
+       if (err != -ECANCELED)
+               trinity_hwmem_import_dmabuf_end(&input->import_info);
+       return err;
+}
+
+static void trinity_remove_req_cb(void *data_drv, void *data_req)
+{
+       struct trinity_driver *drv = data_drv;
+       struct trinity_req *req = data_req;
+
+       if (!drv || !req)
+               return;
+
+       trinity_finish_req(drv, req);
+       trinity_stat_remove_req(drv, req, false);
+       drv->desc->dealloc_req(drv, req);
+}
+
+void trinity_sched_remove_requests(struct trinity_driver *drv)
+{
+       struct trinity_sched_desc *sched;
+
+       sched = trinity_sched_find(SCHED_VD);
+       if (!sched) {
+               dev_err(drv_to_dev_ptr(drv), "Unable to find VD scheduler");
+               return;
+       }
+       sched->remove_reqs(drv, trinity_remove_req_cb);
+}
+
+/**
+ * trinity_ioctl - A common callback for unlocked_ioctl() in file_operations for
+ *             a Trinity device node.
+ *
+ * @f: A file instance of the opened device node
+ * @cmd: The target IOCTL command to be handled
+ * @arg: A user argument
+ *
+ * Returns 0 on success. Ohterwise, returns negative error.
+ */
+long trinity_ioctl(struct file *f, unsigned int cmd, unsigned long arg)
+{
+       struct trinity_driver *drv = f->private_data;
+       const struct trinity_desc *desc = drv->desc;
+       ssize_t err = 0L;
+
+       switch (cmd) {
+       case TRINITY_IOCTL_GET_VERSION: {
+               if (copy_to_user((uint32_t __user *)arg, &(desc->ver),
+                                sizeof((desc->ver))))
+                       return -EFAULT;
+
+#ifdef CONFIG_TRINITY_DEBUG
+               trace_trinity_ioctl_msg(drv->dev_id, trinity_get_app_id(),
+                                       "TRINITY_IOCTL_GET_VERSION");
+#endif
+               break;
+       }
+       case TRINITY_IOCTL_GET_API_LEVEL: {
+               uint32_t api_level = TRINITY_API_LEVEL;
+
+               if (copy_to_user((uint32_t __user *)arg, &api_level,
+                                sizeof(api_level)))
+                       return -EFAULT;
+
+#ifdef CONFIG_TRINITY_DEBUG
+               trace_trinity_ioctl_msg(drv->dev_id, trinity_get_app_id(),
+                                       "TRINITY_IOCTL_GET_API_LEVEL");
+#endif
+               break;
+       }
+       case TRINITY_IOCTL_GET_STATE: {
+               enum trinity_state ready;
+
+               ready = drv->desc->get_state(drv);
+               if (copy_to_user((enum trinity_state __user *)arg, &ready,
+                                sizeof(ready)))
+                       return -EFAULT;
+
+#ifdef CONFIG_TRINITY_DEBUG
+               trace_trinity_ioctl_msg(drv->dev_id, trinity_get_app_id(),
+                                       "TRINITY_IOCTL_GET_STATE");
+#endif
+               break;
+       }
+       case TRINITY_IOCTL_GET_TOPS: {
+               if (copy_to_user((uint32_t __user *)arg, &(drv->tops),
+                                sizeof((drv->tops))))
+                       return -EFAULT;
+
+#ifdef CONFIG_TRINITY_DEBUG
+               trace_trinity_ioctl_msg(drv->dev_id, trinity_get_app_id(),
+                                       "TRINITY_IOCTL_GET_TOPS");
+#endif
+               break;
+       }
+       case TRINITY_IOCTL_GET_DSPM: {
+               if (copy_to_user((uint32_t __user *)arg, &(drv->dspm),
+                                sizeof((drv->dspm))))
+                       return -EFAULT;
+
+#ifdef CONFIG_TRINITY_DEBUG
+               trace_trinity_ioctl_msg(drv->dev_id, trinity_get_app_id(),
+                                       "TRINITY_IOCTL_GET_DSPM");
+#endif
+               break;
+       }
+       case TRINITY_IOCTL_GET_NEXT_REQUEST: {
+               int32_t req_id = atomic_inc_return(&drv->global_req_id);
+
+               if (copy_to_user((int32_t __user *)arg, &req_id,
+                                sizeof(req_id)))
+                       return -EFAULT;
+
+#ifdef CONFIG_TRINITY_DEBUG
+               trace_trinity_ioctl_next_req(drv->dev_id, trinity_get_app_id(),
+                                            req_id);
+#endif
+               break;
+       }
+       case TRINITY_IOCTL_HWMEM_ALLOC: {
+               struct trinity_ioctl_hwmem hwmem;
+
+               if (copy_from_user(&hwmem, (size_t __user *)arg, sizeof(hwmem)))
+                       return -EFAULT;
+
+               err = trinity_hwmem_alloc(drv_to_dev_ptr(drv), hwmem.size,
+                                         hwmem.type);
+               if (err >= 0)
+                       trinity_stat_app_total_alloc(drv, hwmem.size);
+
+#ifdef CONFIG_TRINITY_DEBUG
+               trace_trinity_ioctl_hwmem_alloc(
+                       drv->dev_id, trinity_get_app_id(), hwmem.size, err);
+#endif
+               break;
+       }
+       case TRINITY_IOCTL_HWMEM_DEALLOC: {
+               struct trinity_ioctl_hwmem hwmem;
+               struct dma_buf *dbuf;
+
+               if (copy_from_user(&hwmem, (size_t __user *)arg, sizeof(hwmem)))
+                       return -EFAULT;
+
+               dbuf = dma_buf_get(hwmem.dbuf_fd);
+               if (IS_ERR(dbuf))
+                       return PTR_ERR(dbuf);
+
+               err = trinity_hwmem_free(drv_to_dev_ptr(drv), hwmem.dbuf_fd);
+               if (err == 0)
+                       trinity_stat_app_total_freed(drv, dbuf->size);
+
+#ifdef CONFIG_TRINITY_DEBUG
+               trace_trinity_ioctl_hwmem_dealloc(
+                       drv->dev_id, trinity_get_app_id(), hwmem.dbuf_fd);
+#endif
+               break;
+       }
+       case TRINITY_IOCTL_REGISTER_MODEL: {
+               struct trinity_model *model =
+                       kzalloc(sizeof(struct trinity_model), GFP_KERNEL);
+
+               if (IS_ERR_OR_NULL(model))
+                       return -ENOMEM;
+
+               if (copy_from_user(&model->config,
+                                  (struct trinity_model __user *)arg,
+                                  sizeof(model->config))) {
+                       kfree(model);
+                       return -EFAULT;
+               }
+
+               err = trinity_register_model(drv, model);
+               if (err < 0)
+                       break;
+
+               if (copy_to_user((struct trinity_model __user *)arg,
+                                &model->config, sizeof(model->config)))
+                       return -EFAULT;
+
+#ifdef CONFIG_TRINITY_DEBUG
+               trace_trinity_ioctl_register_model(
+                       drv->dev_id, trinity_get_app_id(), model->config.id,
+                       model->config.dbuf_fd,
+                       model->config.program_offset_addr,
+                       model->config.program_size);
+
+               if (TRINITY_DEVVER(drv) == 1)
+                       trace_trinity_ioctl_register_model_drv_ver1(
+                               model->config.weight_offset_addr);
+               else if (TRINITY_DEVVER(drv) == 2)
+                       trace_trinity_ioctl_register_model_drv_ver2(
+                               model->config.metadata_dbuf_fd,
+                               model->config.metadata_ext_dbuf_fd,
+                               model->config.metadata_ext_size);
+#endif
+               break;
+       }
+       case TRINITY_IOCTL_DEREGISTER_MODEL: {
+               uint64_t id;
+
+               if (copy_from_user(&id, (uint64_t __user *)arg, sizeof(id)))
+                       return -EFAULT;
+
+               err = trinity_deregister_model(drv, id);
+
+#ifdef CONFIG_TRINITY_DEBUG
+               trace_trinity_ioctl_msg(drv->dev_id, trinity_get_app_id(),
+                                       "TRINITY_IOCTL_DEREGISTER_MODEL");
+#endif
+               break;
+       }
+       case TRINITY_IOCTL_RUN_INPUT: {
+               struct trinity_req *req;
+               struct trinity_input *input;
+
+               req = drv->desc->alloc_req(drv);
+               if (!req)
+                       return -ENOMEM;
+               req->drv = drv;
+               req->time_started = ktime_get();
+
+               input = &(req->input);
+               /** run input based on config received from the user */
+               if (copy_from_user(&input->config,
+                                  (struct trinity_input __user *)arg,
+                                  sizeof(input->config))) {
+                       drv->desc->dealloc_req(drv, req);
+                       return -EACCES;
+               }
+
+               err = trinity_run_input(drv, input, req);
+               if (err < 0) {
+                       drv->desc->dealloc_req(drv, req);
+                       return err;
+               }
+
+#ifdef CONFIG_TRINITY_DEBUG
+               trace_trinity_ioctl_run_input(drv->dev_id, trinity_get_app_id(),
+                                             input->config.dbuf_fd,
+                                             input->config.model_id);
+
+               if (TRINITY_DEVVER(drv) == 1)
+                       trace_trinity_ioctl_run_input_drv_ver1(
+                               input->config.activation_offset_addr0,
+                               input->config.activation_offset_addr1);
+               else if (TRINITY_DEVVER(drv) == 2)
+                       trace_trinity_ioctl_run_input_drv_ver2(
+                               input->config.timeout_ms,
+                               input->config.priority,
+                               input->config.num_segments,
+                               input->config.input_mode,
+                               input->config.output_mode);
+#endif
+
+               if (copy_to_user((struct trinity_input __user *)arg,
+                                &input->config, sizeof(input->config))) {
+                       drv->desc->dealloc_req(drv, req);
+                       return -EACCES;
+               }
+
+               /* this will be freed when stop request is called */
+               if (!req->is_kernel)
+                       drv->desc->dealloc_req(drv, req);
+
+               break;
+       }
+       case TRINITY_IOCTL_STOP_REQUESTS: {
+               if (drv->desc->stop_reqs) {
+                       schedule_work(&drv->work_stop);
+#ifdef CONFIG_TRINITY_DEBUG
+                       trace_trinity_ioctl_msg(drv->dev_id,
+                                               trinity_get_app_id(),
+                                               "TRINITY_IOCTL_STOP_REQUESTS");
+#endif
+               } else {
+#ifdef CONFIG_TRINITY_DEBUG
+                       trace_trinity_ioctl_msg(
+                               drv->dev_id, trinity_get_app_id(),
+                               "TRINITY_IOCTL_STOP_REQUESTS: not supported");
+#endif
+               }
+               break;
+       }
+       case TRINITY_IOCTL_STOP_REQUEST: {
+               struct trinity_sched_desc *sched;
+               struct trinity_req *req;
+               int32_t req_id;
+
+               if (copy_from_user(&req_id, (uint32_t __user *)arg,
+                                  sizeof(req_id)))
+                       return -EFAULT;
+
+               sched = trinity_sched_find(SCHED_VD);
+               if (!sched) {
+                       dev_err(drv_to_dev_ptr(drv),
+                               "Unable to find VD scheduler");
+                       return -EINVAL;
+               }
+
+               req = sched->find_req(drv->dev_id, req_id);
+               if (!req || !req->is_kernel) {
+                       dev_err(drv_to_dev_ptr(drv),
+                               "Unable to find the kernel request with ID %d",
+                               req_id);
+                       return -ENOENT;
+               }
+
+               sched->cancel(req);
+
+               trinity_remove_req_cb(drv, req);
+
+#ifdef CONFIG_TRINITY_DEBUG
+               trace_trinity_ioctl_stop_req(drv->dev_id, trinity_get_app_id(),
+                                            req_id);
+#endif
+               break;
+       }
+       case TRINITY_IOCTL_STAT_CURRENT_APP: {
+               struct trinity_ioctl_stat_app ioctl_stat_app;
+
+               if (copy_from_user(&ioctl_stat_app,
+                                  (struct trinity_ioctl_stat_app __user *)arg,
+                                  sizeof(ioctl_stat_app)))
+                       return -EACCES;
+
+               trinity_stat_app_copy_ioctl(drv, &ioctl_stat_app);
+
+               if (copy_to_user((struct trinity_ioctl_stat_app __user *)arg,
+                                &ioctl_stat_app, sizeof(ioctl_stat_app)))
+                       return -EACCES;
+
+#ifdef CONFIG_TRINITY_DEBUG
+               trace_trinity_ioctl_msg(drv->dev_id, trinity_get_app_id(),
+                                       "TRINITY_IOCTL_STAT_CURRENT_APP");
+#endif
+               break;
+       }
+       case TRINITY_IOCTL_STAT_APPS: {
+               struct trinity_ioctl_stat_apps ioctl_stat_apps;
+
+               if (copy_from_user(&ioctl_stat_apps,
+                                  (struct trinity_ioctl_stat_apps __user *)arg,
+                                  sizeof(ioctl_stat_apps)))
+                       return -EACCES;
+
+               trinity_stat_apps_copy_ioctl(drv, &ioctl_stat_apps);
+
+               if (copy_to_user((struct trinity_ioctl_stat_apps __user *)arg,
+                                &ioctl_stat_apps, sizeof(ioctl_stat_apps)))
+                       return -EACCES;
+
+#ifdef CONFIG_TRINITY_DEBUG
+               trace_trinity_ioctl_msg(drv->dev_id, trinity_get_app_id(),
+                                       "TRINITY_IOCTL_STAT_APPS");
+#endif
+               break;
+       }
+       case TRINITY_IOCTL_STAT_REQS: {
+               struct trinity_ioctl_stat_reqs ioctl_stat_reqs;
+
+               if (copy_from_user(&ioctl_stat_reqs,
+                                  (struct trinity_ioctl_stat_reqs __user *)arg,
+                                  sizeof(ioctl_stat_reqs)))
+                       return -EACCES;
+
+               if (ioctl_stat_reqs.app_id == 0)
+                       ioctl_stat_reqs.app_id = trinity_get_app_id();
+
+               trinity_stat_reqs_copy_ioctl(drv, &ioctl_stat_reqs);
+
+               if (copy_to_user((struct trinity_ioctl_stat_reqs __user *)arg,
+                                &ioctl_stat_reqs, sizeof(ioctl_stat_reqs)))
+                       return -EACCES;
+
+#ifdef CONFIG_TRINITY_DEBUG
+               trace_trinity_ioctl_msg(drv->dev_id, trinity_get_app_id(),
+                                       "TRINITY_IOCTL_STAT_REQS");
+#endif
+               break;
+       }
+       case TRINITY_IOCTL_GET_PROFILE_META: {
+               struct trinity_ioctl_profile_meta profile;
+
+               if (copy_from_user(
+                           &profile,
+                           (struct trinity_ioctl_profile_meta __user *)arg,
+                           sizeof(profile)))
+                       return -EACCES;
+
+               if (drv->desc->get_profile_meta) {
+                       err = drv->desc->get_profile_meta(drv, &profile);
+               } else {
+                       profile.total_cycles = -1;
+                       profile.total_ops = 0;
+                       profile.profile_size = 0;
+                       profile.input_footprint = -1;
+                       profile.output_footprint = -1;
+               }
+
+               if (copy_to_user((struct trinity_ioctl_profile_meta __user *)arg,
+                                &profile, sizeof(profile)))
+                       return -EACCES;
+
+#ifdef CONFIG_TRINITY_DEBUG
+               trace_trinity_ioctl_get_profile_meta(drv->dev_id,
+                                                    trinity_get_app_id(),
+                                                    profile.req_id,
+                                                    profile.profile_size);
+#endif
+               break;
+       }
+       case TRINITY_IOCTL_GET_PROFILE_BUFF: {
+               struct trinity_ioctl_profile_buff profile;
+
+               if (copy_from_user(
+                           &profile,
+                           (struct trinity_ioctl_profile_buff __user *)arg,
+                           sizeof(profile)))
+                       return -EACCES;
+
+               if (drv->desc->get_profile_buff)
+                       err = drv->desc->get_profile_buff(drv, &profile);
+
+               if (copy_to_user((struct trinity_ioctl_profile_buff __user *)arg,
+                                &profile, sizeof(profile)))
+                       return -EACCES;
+
+#ifdef CONFIG_TRINITY_DEBUG
+               trace_trinity_ioctl_get_profile_buff(
+                       drv->dev_id, trinity_get_app_id(), profile.req_id,
+                       profile.profile_pos, profile.profile_size);
+#endif
+               break;
+       }
+       case TRINITY_IOCTL_FPGA_MEMCPY: {
+               struct trinity_ioctl_fpga_memcpy fpga;
+               struct trinity_hwmem_import import_info;
+               struct iommu_domain *domain;
+               phys_addr_t paddr;
+               void __iomem *vaddr;
+               uint32_t val;
+               uint64_t i;
+
+               if (copy_from_user(
+                           &fpga,
+                           (struct trinity_ioctl_fpga_memcpy __user *)arg,
+                           sizeof(fpga)))
+                       return -EFAULT;
+
+               /* make sure that dbuf_off is PAGE_SIZE aligned */
+               if (!IS_ALIGNED(fpga.dbuf_off, PAGE_SIZE)) {
+                       dev_err(drv->dev, "Unaligned dmabuf offset: 0x%x\n",
+                               fpga.dbuf_off);
+                       return -EINVAL;
+               }
+
+               err = trinity_hwmem_import_dmabuf_begin(
+                       drv_to_dev_ptr(drv), fpga.dbuf_fd, &import_info);
+               if (err)
+                       return err;
+
+               domain = iommu_get_domain_for_dev(drv->dev);
+               paddr = trinity_get_paddr(domain, import_info.dma_addr);
+
+               trinity_hwmem_import_dmabuf_end(&import_info);
+
+               vaddr = ioremap(paddr + fpga.dbuf_off,
+                               PAGE_ALIGN(fpga.user_size));
+               if (vaddr == NULL) {
+                       dev_err(drv->dev, "Unable to ioremap %lx",
+                               (unsigned long)paddr);
+                       return -EINVAL;
+               }
+
+               for (i = 0; i < fpga.user_size; i += sizeof(uint32_t)) {
+                       val = ioread32((char *)vaddr + i);
+                       if (copy_to_user(((char __user *)fpga.user_addr) + i,
+                                        &val, sizeof(uint32_t))) {
+                               err = -EFAULT;
+                               break;
+                       }
+               }
+
+               iounmap(vaddr);
+
+               break;
+       }
+       default:
+               return -ENOTTY;
+       }
+
+       return err;
+}
+
+/**
+ * trinity_release - A common callback for close() in file_operations for a
+ *             Trinity device node. If there are device-specific data to be
+ *             cleaned-up, it is required to clean them up before invoke this
+ *             callback.
+ *
+ * @inode: Inode to be closed
+ * @file: File to be closed
+ *
+ * Returns 0 on success. Ohterwise, returns negative error.
+ */
+int trinity_release(struct inode *inode, struct file *file)
+{
+       struct trinity_driver *drv;
+
+       drv = file->private_data;
+
+       if (drv->verbose)
+               dev_info(drv_to_dev_ptr(drv), "%s\n", "Device closed");
+
+       trinity_stat_app_set_status(drv, TRINITY_APP_STATUS_TERMINATED);
+
+       mutex_lock(&drv->lock);
+       drv->opened = drv->opened - 1;
+       if (drv->opened == 0) {
+               /* block newly incoming requests */
+               trinity_sched_suspend();
+
+               /* wait already submitted requests */
+               if (drv->desc->drain_reqs)
+                       drv->desc->drain_reqs(drv);
+
+               /* deregister models owned by this device handle */
+               trinity_deregister_models_owned(drv);
+               /* remove all kernel requests submitted before */
+               trinity_sched_remove_requests(drv);
+
+               drv->desc->set_state(drv, TRINITY_STATE_PAUSE);
+
+               trinity_sched_resume();
+       }
+       mutex_unlock(&drv->lock);
+
+       return 0;
+}
+
+static bool trinity_is_empty(void)
+{
+       enum trinity_dev_type type;
+       bool empty = true;
+
+       spin_lock(&trinity_lock);
+       for (type = TRINITY_DEV_UNKNOWN, type++; type < TRINITY_DEV_END;
+            type++) {
+               if (find_first_bit(&dev_bitmap[type], TRINITY_DEV_EACH_MAX) !=
+                   TRINITY_DEV_EACH_MAX) {
+                       empty = false;
+                       break;
+               }
+       }
+       spin_unlock(&trinity_lock);
+
+       return empty;
+}
+
+int trinity_wait_ready(struct trinity_driver *drv)
+{
+       const unsigned long time_out = HZ / 100UL; /* 1/100 seconds*/
+       const unsigned int max_retry = 10;
+       unsigned int retry = 0;
+       wait_queue_head_t wq;
+
+       drv->desc->set_state(drv, TRINITY_STATE_READY);
+
+       init_waitqueue_head(&wq);
+       /* try to ensure that NPU is in the ready state */
+       while (wait_event_timeout(
+                      wq, drv->desc->get_state(drv) == TRINITY_STATE_READY,
+                      time_out) == 0) {
+               /* regarded as failure */
+               if (retry == max_retry)
+                       return -ETIMEDOUT;
+               retry++;
+       }
+
+       return 0;
+}
+
+/**
+ * trinity_open - A common callback for open() in file_operations for a Trinity
+ *             device node. If device-specific open() is requried, this
+ *             callback should be invoked by that open().
+ *
+ * @inode: Inode to be opened
+ * @f: File to be opened
+ *
+ * Returns 0 on success. Ohterwise, returns negative error.
+ */
+int trinity_open(struct inode *inode, struct file *f)
+{
+       struct miscdevice *miscdev;
+       struct trinity_driver *drv;
+       int ret = 0;
+
+       miscdev = (struct miscdevice *)f->private_data;
+       drv = container_of(miscdev, struct trinity_driver, mdev);
+       f->private_data = drv;
+
+       mutex_lock(&drv->lock);
+       /** remove PAUSE set on the CP of the NPU */
+       if (drv->opened == 0) {
+               ret = trinity_wait_ready(drv);
+               if (ret != 0)
+                       goto out;
+       }
+       drv->opened = drv->opened + 1;
+
+       if (drv->verbose)
+               dev_info(drv_to_dev_ptr(drv), "%s\n", "Device opened");
+
+       trinity_stat_app_set_status(drv, TRINITY_APP_STATUS_STARTED);
+
+out:
+       mutex_unlock(&drv->lock);
+
+       return 0;
+}
+
+static int trinity_declare_dma_memory(struct device *dev)
+{
+       phys_addr_t paddr;
+       dma_addr_t daddr;
+       size_t size;
+       int err;
+
+       err = trinity_get_dma_memory(dev, &paddr, &daddr, &size);
+       if (err < 0) {
+               dev_info(dev, "No available dma memory, skipping");
+               return 0;
+       }
+
+       err = trinity_declare_resv_mem(paddr, daddr, size);
+       if (err < 0) {
+               dev_err(dev, "Failed to declare reserved memory: %d\n", err);
+               return err;
+       }
+
+       return 0;
+}
+
+static void trinity_release_dma_memory(void)
+{
+       return trinity_release_resv_mem();
+}
+
+static void trinity_common_init(struct device *dev)
+{
+       if (!trinity_is_empty())
+               return;
+
+       trinity_reset_device(dev, true);
+       trinity_model_htable_init();
+
+       if (trinity_monitor_init(dev) < 0)
+               dev_warn(dev, "Failed to initialize monitor\n");
+
+       if (trinity_pm_runtime_init(dev) < 0)
+               dev_warn(dev, "Unable to initialize runtime pm\n");
+
+       if (trinity_debug_init() < 0)
+               dev_warn(dev, "Unable to initialize debugfs\n");
+
+       if (trinity_sched_init(dev) < 0)
+               dev_warn(dev, "Unable to initialize scheduler\n");
+
+       if (trinity_declare_dma_memory(dev) < 0)
+               dev_warn(dev, "Failed to declare DMA memory\n");
+}
+
+static void trinity_common_exit(void)
+{
+       if (!trinity_is_empty())
+               return;
+
+       trinity_release_dma_memory();
+       trinity_debug_exit();
+       trinity_sched_exit();
+}
+
+static int trinity_set_device_id(struct trinity_driver *drv)
+{
+       const struct trinity_desc *desc = drv->desc;
+       struct device *dev = drv_to_dev_ptr(drv);
+       int err = -EEXIST;
+
+       spin_lock(&trinity_lock);
+       drv->dev_id =
+               find_first_zero_bit(&dev_bitmap[dev->id], TRINITY_DEV_EACH_MAX);
+       if (drv->dev_id < TRINITY_DEV_EACH_MAX) {
+               set_bit(drv->dev_id, &dev_bitmap[dev->id]);
+               err = 0;
+       }
+       spin_unlock(&trinity_lock);
+
+       if (err == 0) {
+               drv->name = devm_kasprintf(dev, GFP_KERNEL, "%s-%u", desc->type,
+                                          drv->dev_id);
+               err = IS_ERR_OR_NULL(drv->name) ? -ENOMEM : 0;
+       }
+
+       return err;
+}
+
+int trinity_create_node(struct trinity_driver *drv)
+{
+       struct device *dev = drv_to_dev_ptr(drv);
+       int err;
+
+       /** register as a misc device */
+       drv->mdev.minor = MISC_DYNAMIC_MINOR;
+       drv->mdev.parent = NULL;
+       drv->mdev.name = drv->name;
+
+       err = misc_register(&drv->mdev);
+       if (err < 0)
+               dev_err(dev, "failed to register as a misc device");
+       else
+               dev_info(dev, "misc device created!");
+
+       return err;
+}
+
+void trinity_destroy_node(struct trinity_driver *drv)
+{
+       misc_deregister(&drv->mdev);
+}
+
+/**
+ * trinity_probe - Probes a new Trinity device. This is a standard interface to
+ * probe a Trinity family device.
+ *
+ * @pdev: Platform device structure to probe
+ * @desc: Device description to probe
+ *
+ * Returns 0 on success. Ohterwise, returns negative error.
+ */
+int trinity_probe(struct platform_device *pdev, const struct trinity_desc *desc)
+{
+       struct device_node *np;
+       struct device *dev;
+       struct trinity_driver *drv;
+       int irq_out;
+       int i, err;
+
+       dev = &pdev->dev;
+       dev->id = ((desc->ver & TRINITY_MASK_DEV) >> TRINITY_SHIFT_DEV);
+
+       /* set private data */
+       drv = devm_kzalloc(dev, sizeof(*drv), GFP_KERNEL);
+       if (drv == NULL)
+               return -ENOMEM;
+
+       platform_set_drvdata(pdev, drv);
+       dev_set_drvdata(dev, drv);
+
+       drv->dev = dev;
+       drv->desc = desc;
+
+       np = dev->of_node;
+       if (of_property_match_string(np, "samsung,trinity-type", desc->type))
+               return -EPROBE_DEFER;
+
+       /* get reg info for MMREG_BASE */
+       for (i = 0; i < TRINITY_MAX_MMREGS; i++) {
+               struct resource mmreg;
+
+               err = of_address_to_resource(np, i, &mmreg);
+               if (err < 0) {
+                       if (i == 0) {
+                               dev_err(dev, "failed to get %d-th mmreg info",
+                                       i);
+                               return -EINVAL;
+                       }
+                       break;
+               }
+
+               drv->mmreg_vaddr[i] = devm_ioremap_resource(dev, &mmreg);
+               if (IS_ERR(drv->mmreg_vaddr[i])) {
+                       dev_err(dev,
+                               "failed to remap %d-th mmreg resource info", i);
+                       return PTR_ERR(drv->mmreg_vaddr[i]);
+               }
+               drv->mmreg_paddr[i] = mmreg.start;
+       }
+
+       /** get a TOPS property */
+       err = of_property_read_u32(np, "samsung,tops", &drv->tops);
+       if (err < 0) {
+               dev_err(dev, "failed to read 'tops' property: %d\n", err);
+               return err;
+       }
+
+       /** get a DSPM property */
+       err = of_property_read_u32(np, "samsung,dspm", &drv->dspm);
+       if (err < 0) {
+               dev_info(dev, "Setting the size of DPSM to 0\n");
+               drv->dspm = 0;
+       }
+
+       /* Set IRQ handlers */
+       irq_out = platform_get_irq(pdev, 0);
+       if (irq_out < 0) {
+               dev_err(dev, "IRQ is not supported");
+               return irq_out;
+       }
+       trinity_set_irq_affinity(irq_out);
+
+       /* get the IRQ number from DT and set handlers for it */
+       err = devm_request_irq(dev, irq_out, desc->handle_irq,
+                              IRQF_TRIGGER_HIGH, desc->type, &drv->mdev);
+       if (err < 0) {
+               dev_err(dev, "failed to register handlers for IRQ %d", irq_out);
+               return err;
+       }
+
+       /** Initialize device-specific variables */
+       init_completion(&drv->complete);
+       mutex_init(&drv->lock);
+       INIT_WORK(&drv->work_stop, desc->stop_reqs);
+       drv->mdev.fops = desc->fops;
+
+       trinity_common_init(dev);
+
+       err = trinity_set_device_id(drv);
+       if (err < 0) {
+               dev_err(dev, "Please unload old devices first (max: %d)\n",
+                       TRINITY_DEV_EACH_MAX);
+               goto err_cleanup;
+       }
+
+       err = trinity_sysfs_init(drv);
+       if (err < 0) {
+               dev_err(dev, "failed to initialize sysfs for a trinity device");
+               goto err_cleanup;
+       }
+
+       err = trinity_debug_add(drv);
+       if (err < 0) {
+               dev_err(dev,
+                       "failed to add a debugging feature to the trinity device");
+               goto err_cleanup_sysfs;
+       }
+
+       trinity_stat_init(drv);
+
+       return 0;
+
+err_cleanup_sysfs:
+       trinity_sysfs_cleanup(drv);
+
+err_cleanup:
+       spin_lock(&trinity_lock);
+       clear_bit(drv->dev_id, &dev_bitmap[dev->id]);
+       spin_unlock(&trinity_lock);
+
+       trinity_common_exit();
+
+       return err;
+}
+
+/**
+ * trinity_remove - Cleans up the device driver. This is a standard interface to
+ * remove a Trinity family device.
+ *
+ * @pdev: Platform device structure to probe
+ * @desc: Device description to probe
+ *
+ * Always returns 0.
+ */
+int trinity_remove(struct platform_device *pdev,
+                  const struct trinity_desc *desc)
+{
+       struct trinity_driver *drv;
+       struct device *dev;
+
+       drv = (struct trinity_driver *)platform_get_drvdata(pdev);
+       dev = drv_to_dev_ptr(drv);
+
+       trinity_stat_fini(drv);
+       trinity_debug_remove(drv);
+       trinity_sysfs_cleanup(drv);
+
+       spin_lock(&trinity_lock);
+       clear_bit(drv->dev_id, &dev_bitmap[dev->id]);
+       spin_unlock(&trinity_lock);
+
+       trinity_common_exit();
+
+       return 0;
+}
diff --git a/drivers/misc/trinity/trinity.h b/drivers/misc/trinity/trinity.h
new file mode 120000 (symlink)
index 0000000..e4c3861
--- /dev/null
@@ -0,0 +1 @@
+../../../include/uapi/misc/trinity.h
\ No newline at end of file
diff --git a/drivers/misc/trinity/trinity_common.h b/drivers/misc/trinity/trinity_common.h
new file mode 100644 (file)
index 0000000..e5f7cfd
--- /dev/null
@@ -0,0 +1,458 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/**
+ * trinity/trinity_common.h: Common header for trinity devices
+ *
+ * Copyright (C) 2020 Samsung Electronics
+ * Copyright (C) 2020 Dongju Chae <dongju.chae@samsung.com>
+ * Copyright (C) 2020 Parichay Kapoor <pk.kapoor@samsung.com>
+ * Copyright (C) 2020 Wook Song <wook16.song@samsung.com>
+ */
+
+#ifndef __TRINITY_COMMON_H__
+#define __TRINITY_COMMON_H__
+
+#include <linux/idr.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/iommu.h>
+#include <linux/irqreturn.h>
+#include <linux/kernel.h>
+#include <linux/list_bl.h>
+#include <linux/miscdevice.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+#include <linux/types.h>
+
+#include "trinity.h"
+
+#include "trinity_hwmem.h"
+#include "trinity_sched.h"
+
+/** Default timeout to wait for opening device in jiffies */
+#define TRINITY_DEV_TIMEOUT_MSEC (3000)
+#define TRINITY_DEV_TIMEOUT     (msecs_to_jiffies(TRINITY_DEV_TIMEOUT_MSEC))
+
+/** Default timeout to wait for running input in jiffies */
+#define TRINITY_RUN_TIMEOUT_MSEC (4000)
+#define TRINITY_RUN_TIMEOUT     (msecs_to_jiffies(TRINITY_RUN_TIMEOUT_MSEC))
+
+#define TRINITY_DEV_TYPE_LEN (16)
+#define TRINITY_DEV_EACH_MAX (2)
+#define TRINITY_MAX_MMREGS   (3)
+
+/** A helper function to generate the version code of the device driver */
+#define GENVER(dev, mj, mn, ex)                                                \
+       ((dev << TRINITY_SHIFT_DEV) | (mj << TRINITY_SHIFT_MAJOR_VER) |        \
+        (mn << TRINITY_SHIFT_MINOR_VER) | (ex << TRINITY_SHIFT_EXTRA_VER))
+
+#define trinity_get_iomem_addr(base, offset) (base + offset)
+#define drv_to_dev_ptr(d)                   (d->dev)
+#define drv_to_priv(drv)                    (drv->desc->pdata)
+
+#define TRINITY_STAT_HASH_BITS (10)
+#define TRINITY_STAT_HASH_SIZE (1 << TRINITY_STAT_HASH_BITS)
+
+#define TIME_DIFF(t1, t2)    ktime_to_ms(ktime_sub(t1, t2))
+#define TIME_DIFF_US(t1, t2) ktime_to_us(ktime_sub(t1, t2))
+
+enum cpu_acc_control {
+       BEGIN,
+       END,
+};
+
+struct trinity_desc;
+struct trinity_driver;
+struct trinity_req;
+struct trinity_stat;
+struct trinity_stat_app;
+struct trinity_stat_req;
+struct trinity_model_htable;
+
+/**
+ * struct trinity_desc - a structure for device description
+ * @type: A string that indicates the type of this device.
+ * @ver: Coded version information generated via GENVER().
+ * @fops: Device-specific file_operations.
+ * @prepare_model: Device-specific model configuration function before invoking
+ *             trinity_submit_req() (if any). This requires a registered model
+ *             to the driver via &trinity_desc->register_model before.
+ * @prepare_input: Device-specific function to configure input information.
+ *             This information is passed to the device by setting the relevant
+ *             registers to use run the device.
+ * @get_state: Device-specific helper function to get the state of the device.
+ * @set_state: Device-specific helper function to set the state of the device.
+ * @run: Device-specific function to run the input with the given model.
+ *             Configuration required to run the device should be done before
+ *             invoking run()
+ * @submit_req: Device-specific function to submit a req for running the
+ *             device. Note that if there are req scheduling policies, instead
+ *             of running the req immediately, the req would be placed in the
+ *             req queue until the policy decides to run this req.
+ * @handle_irq: Device-specifix IRQ handler.
+ */
+struct trinity_desc {
+       char *type;
+       uint32_t ver;
+
+       const struct file_operations *fops;
+
+       /* Optional */
+       void (*reset)(struct trinity_driver *);
+       int32_t (*prepare_req)(struct trinity_driver *, struct trinity_req *);
+       void (*handle_timeout)(struct trinity_driver *,
+                              struct trinity_req *req);
+       void (*stop_reqs)(struct work_struct *);
+       void (*drain_reqs)(struct trinity_driver *);
+       void (*init_profile)(struct trinity_driver *, unsigned long);
+       int32_t (*check_profile)(struct trinity_driver *, struct trinity_req *);
+       int32_t (*get_profile_meta)(const struct trinity_driver *,
+                                   struct trinity_ioctl_profile_meta *);
+       int32_t (*get_profile_buff)(const struct trinity_driver *,
+                                   struct trinity_ioctl_profile_buff *);
+       void (*show_profile)(const struct trinity_driver *, int);
+       void (*destroy_profile)(const struct trinity_driver *, void *);
+
+       /* Mandatory */
+       int32_t (*idu_load)(struct trinity_driver *, const char *, bool);
+       int32_t (*idu_version)(struct trinity_driver *, uint32_t *, uint32_t *,
+                              uint32_t *);
+       int32_t (*get_state)(const struct trinity_driver *);
+       void (*set_state)(const struct trinity_driver *, enum trinity_state);
+       struct trinity_req *(*alloc_req)(struct trinity_driver *drv);
+       void (*dealloc_req)(struct trinity_driver *drv,
+                           struct trinity_req *req);
+       int32_t (*invoke_req)(struct trinity_driver *, struct trinity_req *,
+                             void *);
+       irq_handler_t handle_irq;
+};
+
+/**
+ * struct trinity_stat - A structure for representing a device's statistics.
+ */
+struct trinity_stat {
+       spinlock_t lock;
+
+       struct hlist_bl_head hlist[TRINITY_STAT_HASH_SIZE];
+       struct list_head list;
+
+       void *pdata;
+};
+
+/**
+ * struct trinity_stat_app - A structure for representing statistics for each app
+ * @app_id: Identifier for each app
+ * @hnode: Hash node
+ * @total_alloc_mem: Total allocated memory size
+ * @total_free_mem: Total freed memory size
+ */
+struct trinity_stat_app {
+       int32_t app_id; /* app identifier */
+       char name[TASK_COMM_LEN];
+       enum trinity_app_status status;
+
+       struct trinity_stat *parent;
+
+       uint64_t total_alloc_mem; /* total allocated memory */
+       uint64_t total_freed_mem; /* total freed memory */
+
+       struct list_head reqs;
+       uint32_t num_total_reqs;
+       uint32_t num_kept_reqs;
+       uint32_t num_active_reqs;
+
+       struct hlist_bl_node hnode; /* hash node */
+       struct list_head lnode; /* list node */
+
+       unsigned long slot;
+};
+
+/**
+ * struct trinity_stat_req - A structure for representing statistics for each req
+ * @status: req status
+ * @req_id: req identifier
+ * @model_id: model identifier
+ * @submitted: submitted time (i.e., when req is submitted to global queue)
+ * @scheduled: scheduled time (i.e., when req is scheduled to device)
+ * @completed: completed time (i.e., when output notification arrives)
+ * @list: list node mananged by trinity_stat_app
+ * @profile: profile data
+ *
+ * Even if a req is freed, its stat could be kept for a while.
+ */
+struct trinity_stat_req {
+       enum trinity_req_status status; /* status of submit result */
+       enum trinity_req_priority priority;
+
+       struct trinity_stat_app *parent;
+
+       int32_t app_id;
+       int32_t req_id;
+       uint64_t model_id;
+
+       bool is_kernel;
+
+       ktime_t submitted;
+       ktime_t scheduled;
+       ktime_t completed;
+
+       uint32_t num_runs;
+       uint32_t total_time;
+
+       uint32_t prev_time;
+       uint32_t prev_cycles;
+
+       struct list_head list;
+       void *profile;
+
+       unsigned long slot;
+};
+
+/**
+ * struct trinity_driver - A private data structure for Trinity device driver
+ * @desc: A pointer to the device description.
+ * @name: The id-annotated name of the device.
+ * @mdev: A copy of &struct miscdevice to which the device is registered.
+ * @dev: A pointer to &struct device of the device.
+ * @complete: A &struct completion variable to maintain events from the device.
+ * @lock: A lock for access control to driver-level static variables
+ * @mmreg_vaddr: The iomapped base address of memory-mapped registers.
+ * @mmreg_paddr: The physical base address of memory-mapped registers.
+ * @opened: The number of clients which open the device.
+ * @tops: Tera Operations Per Second (TOPS) of this device.
+ * @dspm: The size of Data Scratch-Pad Memory (DSPM) in the DSP.
+ *
+ * Description of the structure.
+ */
+struct trinity_driver {
+       const struct trinity_desc *desc;
+       const char *name;
+       void *pdata;
+
+       uint32_t dev_id;
+       struct miscdevice mdev;
+       struct device *dev;
+       struct completion complete;
+       struct mutex lock;
+
+       atomic_t global_req_id;
+
+       void __iomem *mmreg_vaddr[TRINITY_MAX_MMREGS];
+       phys_addr_t mmreg_paddr[TRINITY_MAX_MMREGS];
+
+       int32_t opened;
+       unsigned long verbose;
+
+       struct work_struct work_stop;
+
+       uint32_t tops;
+       uint32_t dspm;
+
+       /* statistics */
+       struct trinity_stat stat;
+
+       /* debugfs */
+       void *debugfs_pdata;
+
+       struct list_head rpm_list;
+       void *resv_pool;
+};
+
+/**
+ * struct trinity_model - A structure for representing model data
+ * @config: model configuration
+ * @hnode: hash node for indexing
+ * @import_info: Cached hwmem import info.
+ * @owner_id: Identifier for owner app
+ */
+struct trinity_model {
+       struct trinity_ioctl_model config;
+       struct trinity_hwmem_import import_info;
+       struct hlist_bl_node hnode;
+       int32_t owner_id;
+       struct kref refcnt;
+} __attribute__((packed));
+
+/**
+ * struct trinity_input - A structure for representing input data
+ * @config: input configuration
+ * @import_info: Cached hwmem import info.
+ */
+struct trinity_input {
+       struct trinity_ioctl_input config;
+       struct trinity_hwmem_import import_info;
+} __attribute__((packed));
+
+/**
+ * struct trinity_req - A structure for representing a req
+ * @drv: An instance of the driver.
+ * @input: Information of the input configuration to be run by this req.
+ * @status: Status of the submitted req.
+ * @priv: A handle of private data
+ */
+struct trinity_req {
+       /** context where the req belongs */
+       struct trinity_driver *drv;
+
+       struct trinity_input input; /* the req's input argument */
+       struct trinity_model *model;
+
+       struct trinity_stat_req *stat;
+
+       uint64_t submit_retry;
+       struct completion complete;
+       struct llist_node llist;
+
+#ifdef CONFIG_TRINITY_SCHED_VD
+       struct hlist_node hlist;
+#endif
+
+       ktime_t time_started;
+       bool is_kernel;
+       bool skip_iommu_mapping;
+       uint32_t poll_magic;
+
+       bool scheduled;
+
+       void *priv;
+};
+
+/**
+ * struct trinity_model_htable - A common hashtable to maintain models
+ * @ht_heads: A pointer to heads of this hashtable
+ * @hash_bits: The number of bits to use in hashing.
+ * @hash_size: The number of hash buckets.
+ */
+struct trinity_model_htable {
+       struct hlist_bl_head *ht_heads;
+       int hash_bits;
+       int hash_size;
+};
+
+static inline void trinity_set_bit(uint32_t bit, void __iomem *addr)
+{
+       uint32_t reg = 0;
+
+       reg |= bit;
+       iowrite32(reg, addr);
+}
+
+/**
+ * trinity_get_app_id - get a app_id for the current opened device
+ *
+ * Returns app_id (just returns its tgid for now).
+ */
+static inline int32_t trinity_get_app_id(void)
+{
+       return task_tgid_vnr(current);
+}
+
+/*
+ * Extern support functions
+ */
+extern int trinity_pm_runtime_init(struct device *dev);
+extern int trinity_pm_runtime_forbid(struct device *dev);
+extern void trinity_pm_runtime_allow(struct device *dev);
+extern void trinity_pm_runtime_attach(struct trinity_driver *drv);
+extern int trinity_get_dma_memory(struct device *dev, phys_addr_t *paddr,
+                                 dma_addr_t *daddr, size_t *size);
+extern int trinity_get_extern_memory(struct device *dev, phys_addr_t *paddr,
+                                    dma_addr_t *daddr, size_t *size);
+extern void trinity_reset_device(struct device *dev, bool do_test);
+extern void trinity_set_irq_affinity(int irq);
+extern void trinity_monitor_invalid_access(void);
+/*
+ * Trinity common functions
+ */
+int trinity_create_node(struct trinity_driver *drv);
+void trinity_destroy_node(struct trinity_driver *drv);
+int trinity_idu_load(struct trinity_driver *drv, const char *dirpath);
+void trinity_init_model_htable(const struct trinity_driver *drv,
+                              struct trinity_model_htable *ht);
+int32_t trinity_get_app_id(void);
+void trinity_finish_req(struct trinity_driver *drv, struct trinity_req *req);
+phys_addr_t trinity_get_paddr(struct iommu_domain *domain, dma_addr_t daddr);
+struct trinity_sched_desc *get_trinity_sched(struct trinity_req *req);
+int trinity_wait_ready(struct trinity_driver *drv);
+
+/* File operations */
+int trinity_open(struct inode *inode, struct file *f);
+int trinity_release(struct inode *inode, struct file *f);
+long trinity_ioctl(struct file *f, unsigned int cmd, unsigned long arg);
+
+/* Device probing and removing */
+int trinity_probe(struct platform_device *pdev,
+                 const struct trinity_desc *desc);
+int trinity_remove(struct platform_device *pdev,
+                  const struct trinity_desc *desc);
+
+#ifdef CONFIG_TRINITY_SYSFS
+int trinity_sysfs_init(struct trinity_driver *drv);
+int trinity_sysfs_cleanup(struct trinity_driver *drv);
+#else
+static inline int trinity_sysfs_init(struct trinity_driver *drv)
+{
+       return 0;
+}
+
+static inline int trinity_sysfs_cleanup(struct trinity_driver *drv)
+{
+       return 0;
+}
+#endif
+
+#ifdef CONFIG_TRINITY_DEBUG
+int trinity_debug_init(void);
+void trinity_debug_exit(void);
+
+int trinity_debug_add(struct trinity_driver *drv);
+void trinity_debug_remove(struct trinity_driver *drv);
+void trinity_debug_clear(struct trinity_driver *drv, unsigned long msg_max);
+unsigned long trinity_debug_get_max(struct trinity_driver *drv);
+void trinity_debug_dump_msg(struct trinity_driver *drv, const char *fmt, ...);
+void trinity_debug_dump_model(struct trinity_driver *drv,
+                             const struct trinity_model *model,
+                             const char *fmt, ...);
+void trinity_debug_dump_input(struct trinity_driver *drv,
+                             const struct trinity_input *input,
+                             const char *fmt, ...);
+#else
+static inline int trinity_debug_init(void)
+{
+       return 0;
+}
+static inline void trinity_debug_exit(void)
+{
+}
+
+static inline int trinity_debug_add(struct trinity_driver *drv)
+{
+       return 0;
+}
+static inline void trinity_debug_remove(struct trinity_driver *drv)
+{
+}
+static inline void trinity_debug_clear(struct trinity_driver *drv,
+                                      unsigned long msg_max)
+{
+}
+static inline unsigned long trinity_debug_get_max(struct trinity_driver *drv)
+{
+       return 0;
+}
+static inline void trinity_debug_dump_msg(struct trinity_driver *drv,
+                                         const char *fmt, ...)
+{
+}
+static inline void trinity_debug_dump_model(struct trinity_driver *drv,
+                                           const struct trinity_model *model,
+                                           const char *fmt, ...)
+{
+}
+static inline void trinity_debug_dump_input(struct trinity_driver *drv,
+                                           const struct trinity_input *input,
+                                           const char *fmt, ...)
+{
+}
+#endif
+
+#endif /* __TRINITY_COMMON_H__ */
diff --git a/drivers/misc/trinity/trinity_debug.c b/drivers/misc/trinity/trinity_debug.c
new file mode 100644 (file)
index 0000000..7594b04
--- /dev/null
@@ -0,0 +1,311 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/**
+ * Copyright (C) 2020 Samsung Electronics
+ * Copyright (C) 2020 Dongju Chae <dongju.chae@samsung.com>
+ */
+/**
+ * @file       trinity_debug.c
+ * @brief      Implementation of debug functions for trinity drivers
+ * @date       19 Mar 2020
+ * @author     Dongju Chae <dongju.chae@samsung.com>
+ * @bug                No known bugs except for NYI items
+ */
+
+#include <linux/debugfs.h>
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+
+#include "trinity_common.h"
+#include "trinity_resv_mem.h"
+
+#define TRINITY_DEVVER(drv)    (drv->desc->ver >> TRINITY_SHIFT_DEV)
+#define TRINITY_DEBUGFS_DIR    ("trinity")
+#define TRINITY_DEBUGFS_MAX    (1024UL)
+#define TRINITY_DEBUGFS_LENGTH (255)
+
+struct trinity_debugfs_msg {
+       char msg[TRINITY_DEBUGFS_LENGTH + 1]; /* including NULL */
+};
+
+struct trinity_debugfs_entry {
+       struct dentry *dentry;
+       spinlock_t lock;
+
+       unsigned long msg_max;
+       unsigned long msg_num;
+       unsigned long msg_off;
+
+       struct trinity_resv_mem msg_buf;
+};
+
+static struct dentry *trinity_debugfs;
+
+static size_t trinity_debug_append_app_id(struct trinity_driver *drv, char *msg)
+{
+       return snprintf(msg, TRINITY_DEBUGFS_LENGTH, "[%d] ",
+                       trinity_get_app_id());
+}
+
+static char *trinity_debug_get_msg_buf(struct trinity_driver *drv)
+{
+       struct trinity_debugfs_entry *entry = drv->debugfs_pdata;
+       struct trinity_debugfs_msg *buf;
+
+       if (!entry || entry->msg_max == 0)
+               return NULL;
+
+       spin_lock(&entry->lock);
+       if (entry->msg_num == entry->msg_max) {
+               buf = &((struct trinity_debugfs_msg *)
+                               entry->msg_buf.vaddr)[entry->msg_off];
+               entry->msg_off = (entry->msg_off + 1) % entry->msg_max;
+       } else {
+               buf = &((struct trinity_debugfs_msg *)
+                               entry->msg_buf.vaddr)[entry->msg_num++];
+       }
+       spin_unlock(&entry->lock);
+
+       memset(buf, '\x00', sizeof(*buf));
+       return buf->msg;
+}
+
+void trinity_debug_dump_msg(struct trinity_driver *drv, const char *fmt, ...)
+{
+       char *msg;
+       size_t len;
+       va_list args;
+
+       msg = trinity_debug_get_msg_buf(drv);
+       if (msg == NULL)
+               return;
+
+       len = trinity_debug_append_app_id(drv, msg);
+
+       va_start(args, fmt);
+       len += vsnprintf(msg + len, TRINITY_DEBUGFS_LENGTH - len, fmt, args);
+       va_end(args);
+
+       if (drv->verbose > 0)
+               dev_info(drv_to_dev_ptr(drv), msg);
+}
+
+void trinity_debug_dump_model(struct trinity_driver *drv,
+                             const struct trinity_model *model,
+                             const char *fmt, ...)
+{
+       char *msg;
+       size_t len;
+       va_list args;
+
+       msg = trinity_debug_get_msg_buf(drv);
+       if (msg == NULL)
+               return;
+
+       len = trinity_debug_append_app_id(drv, msg);
+
+       va_start(args, fmt);
+       len += vsnprintf(msg + len, TRINITY_DEBUGFS_LENGTH - len, fmt, args);
+       va_end(args);
+
+       len += snprintf(
+               msg + len, TRINITY_DEBUGFS_LENGTH - len,
+               "\n\tid(0x%llx) dbuf_fd(%d) program_offset_addr(0x%llx) program_size(0x%llx)\n",
+               model->config.id, model->config.dbuf_fd,
+               model->config.program_offset_addr, model->config.program_size);
+       if (TRINITY_DEVVER(drv) == 1) {
+               len += snprintf(msg + len, TRINITY_DEBUGFS_LENGTH - len,
+                               "\tweight_offset_addr(0x%llx)",
+                               model->config.weight_offset_addr);
+       } else if (TRINITY_DEVVER(drv) == 2) {
+               len += snprintf(
+                       msg + len, TRINITY_DEBUGFS_LENGTH - len,
+                       "\tmetadata_dbuf_fd(%d) metadata_ext_dbuf_fd(%d) metadata_ext_size(0x%llx)",
+                       model->config.metadata_dbuf_fd,
+                       model->config.metadata_ext_dbuf_fd,
+                       model->config.metadata_ext_size);
+       }
+
+       if (drv->verbose > 0)
+               dev_info(drv_to_dev_ptr(drv), msg);
+}
+
+void trinity_debug_dump_input(struct trinity_driver *drv,
+                             const struct trinity_input *input,
+                             const char *fmt, ...)
+{
+       char *msg;
+       size_t len;
+       va_list args;
+
+       msg = trinity_debug_get_msg_buf(drv);
+       if (msg == NULL)
+               return;
+
+       len = trinity_debug_append_app_id(drv, msg);
+
+       va_start(args, fmt);
+       len += vsnprintf(msg + len, TRINITY_DEBUGFS_LENGTH - len, fmt, args);
+       va_end(args);
+
+       len += snprintf(msg + len, TRINITY_DEBUGFS_LENGTH - len,
+                       "\n\tdbuf_fd(%d) model_id(0x%llx)\n",
+                       input->config.dbuf_fd, input->config.model_id);
+       if (TRINITY_DEVVER(drv) == 1) {
+               len += snprintf(
+                       msg + len, TRINITY_DEBUGFS_LENGTH - len,
+                       "\tactivation_offset_addr0(0x%llx) activation_offset_addr1(0x%llx)",
+                       input->config.activation_offset_addr0,
+                       input->config.activation_offset_addr1);
+       } else if (TRINITY_DEVVER(drv) == 2) {
+               len += snprintf(
+                       msg + len, TRINITY_DEBUGFS_LENGTH - len,
+                       "\ttimeout_ms(%lld) priority(%u) num_segments(%u) input_mode(%d) output_mode(%d)",
+                       input->config.timeout_ms, input->config.priority,
+                       input->config.num_segments, input->config.input_mode,
+                       input->config.output_mode);
+       }
+
+       if (drv->verbose > 0)
+               dev_info(drv_to_dev_ptr(drv), msg);
+}
+
+static int trinity_debugfs_show(struct seq_file *s, void *unsed)
+{
+       struct trinity_driver *drv = s->private;
+       struct trinity_debugfs_entry *entry = drv->debugfs_pdata;
+       struct trinity_debugfs_msg *msg;
+       unsigned long i, offset;
+
+       spin_lock(&entry->lock);
+       for (i = 0; i < entry->msg_num; i++) {
+               offset = (entry->msg_off + i) % entry->msg_max;
+               msg = &((struct trinity_debugfs_msg *)
+                               entry->msg_buf.vaddr)[offset];
+
+               seq_puts(s, msg->msg);
+               seq_puts(s, "\n");
+       }
+       spin_unlock(&entry->lock);
+
+       return 0;
+}
+
+static int trinity_debugfs_open(struct inode *inode, struct file *file)
+{
+       return single_open(file, trinity_debugfs_show, inode->i_private);
+}
+
+static const struct file_operations trinity_debugfs_fops = {
+       .open = trinity_debugfs_open,
+       .read = seq_read,
+       .llseek = seq_lseek,
+       .release = single_release,
+};
+
+int trinity_debug_add(struct trinity_driver *drv)
+{
+       struct trinity_debugfs_entry *entry;
+       struct dentry *dentry;
+       const char *name = drv->name;
+
+       if (name == NULL)
+               return -EINVAL;
+
+       entry = kzalloc(sizeof(*entry), GFP_KERNEL);
+       if (!entry)
+               return -ENOMEM;
+
+       dentry = debugfs_create_file_unsafe(name, 0400, trinity_debugfs, drv,
+                                           &trinity_debugfs_fops);
+       if (IS_ERR(dentry)) {
+               kfree(entry);
+               return PTR_ERR(dentry);
+       }
+
+       entry->dentry = dentry;
+       spin_lock_init(&entry->lock);
+
+       drv->debugfs_pdata = entry;
+
+       return 0;
+}
+
+void trinity_debug_remove(struct trinity_driver *drv)
+{
+       struct trinity_debugfs_entry *entry = drv->debugfs_pdata;
+
+       trinity_debug_clear(drv, 0);
+
+       debugfs_remove(entry->dentry);
+       kfree(entry);
+
+       drv->debugfs_pdata = NULL;
+}
+
+void trinity_debug_clear(struct trinity_driver *drv, unsigned long msg_max)
+{
+       struct trinity_debugfs_entry *entry = drv->debugfs_pdata;
+       struct device *dev = drv_to_dev_ptr(drv);
+       size_t size;
+
+       /* maximum size limit: 256KiB */
+       if (msg_max > TRINITY_DEBUGFS_MAX) {
+               dev_err(dev, "Too much debugfs entries (limit: %lu)",
+                       TRINITY_DEBUGFS_MAX);
+               return;
+       }
+
+       spin_lock(&entry->lock);
+
+       /* disable debugfs temporally */
+       trinity_free_from_resv_mem(&entry->msg_buf, false);
+       entry->msg_max = 0;
+       entry->msg_num = 0;
+       entry->msg_off = 0;
+
+       if (msg_max == 0)
+               goto out;
+
+       /* reallocate debugfs buffer */
+       size = PAGE_ALIGN(msg_max * sizeof(struct trinity_debugfs_msg));
+       if (trinity_alloc_from_resv_mem(size, &entry->msg_buf, false) < 0) {
+               dev_warn(dev, "No available reserved memory for debugfs");
+               goto out;
+       }
+       /* more available entries due to page size alignment */
+       entry->msg_max = size / sizeof(struct trinity_debugfs_msg);
+
+out:
+       spin_unlock(&entry->lock);
+}
+
+unsigned long trinity_debug_get_max(struct trinity_driver *drv)
+{
+       struct trinity_debugfs_entry *entry = drv->debugfs_pdata;
+       unsigned long msg_max;
+
+       spin_lock(&entry->lock);
+       msg_max = entry->msg_max;
+       spin_unlock(&entry->lock);
+
+       return msg_max;
+}
+
+int trinity_debug_init(void)
+{
+       struct dentry *entry;
+
+       entry = debugfs_create_dir(TRINITY_DEBUGFS_DIR, NULL);
+       if (IS_ERR(entry))
+               return PTR_ERR(entry);
+
+       trinity_debugfs = entry;
+
+       return 0;
+}
+
+void trinity_debug_exit(void)
+{
+       debugfs_remove_recursive(trinity_debugfs);
+}
diff --git a/drivers/misc/trinity/trinity_hwmem.c b/drivers/misc/trinity/trinity_hwmem.c
new file mode 100644 (file)
index 0000000..dff0a97
--- /dev/null
@@ -0,0 +1,466 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/**
+ * An abstraction layer to handle DMA memory buffers for Trinity device driver
+ *
+ * Copyright (C) 2020 Samsung Electronics
+ * Copyright (C) 2020 Wook Song <wook16.song@samsung.com>
+ * Copyright (C) 2020 Dongju Chae <dongju.chae@samsung.com>
+ */
+
+#include <linux/dma-buf.h>
+#include <linux/mutex.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/version.h>
+
+#include "trinity_hwmem.h"
+#include "trinity_hwmem_iommu_helper.h"
+#include "trinity_resv_mem.h"
+
+#define dbuf_to_trnt_hwmem(d) ((struct trinity_hwmem *)d->priv)
+#define vma_to_trnt_hwmem(v)  ((struct trinity_hwmem *)v->vm_private_data)
+
+/**
+ * struct trinity_hwmem - A data structure for Trinity DMA buffer management
+ * @dev: A pointer to device which this hwmem belongs to.
+ * @dbuf: The dma_buf instance.
+ * @refcnt: Reference counts.
+ * @direction: A variable indicating the DMA data direction in allocating this
+ *             dma_buf.
+ * @attrs: Attributes used in allocating this dma_buf.
+ * @req_size: The size of the DMA buffer that the user request to allocate.
+ * @alc_size: The size of the DMA buffer which is actually allocated.
+ * @addr: The DMA (physical) address of this dma_buf.
+ * @cookie: The DMA cookies.
+ */
+struct trinity_hwmem {
+       struct device *dev;
+       struct dma_buf *dbuf;
+       struct kref refcnt;
+
+       enum dma_data_direction direction;
+       enum trinity_hwmem_type type;
+
+       unsigned long attrs;
+       size_t req_size;
+       size_t alc_size;
+
+       bool is_cont;
+       dma_addr_t addr;
+       void *cookie;
+};
+
+static void __trinity_hwmem_free(struct kref *refcnt)
+{
+       struct trinity_hwmem *mem =
+               container_of(refcnt, struct trinity_hwmem, refcnt);
+       /**
+        * when the dmabuf reference counter becomes zero,
+        * trinity_hwmem_dbuf_ops_release() is triggered.
+        */
+       dma_buf_put(mem->dbuf);
+}
+
+static void __trinity_hwmem_put(struct trinity_hwmem *mem)
+{
+       kref_put(&mem->refcnt, __trinity_hwmem_free);
+}
+
+static void __trinity_hwmem_put_dmabuf(struct dma_buf *dbuf)
+{
+       __trinity_hwmem_put(dbuf_to_trnt_hwmem(dbuf));
+}
+
+static struct trinity_hwmem *__trinity_hwmem_get(struct trinity_hwmem *mem)
+{
+       kref_get(&mem->refcnt);
+
+       return mem;
+}
+
+static void trinity_hwmem_dbuf_ops_detach(struct dma_buf *dbuf,
+                                         struct dma_buf_attachment *attachment)
+{
+       struct trinity_hwmem *mem = dbuf_to_trnt_hwmem(dbuf);
+
+       /* Decrease ref count of the backing storage */
+       __trinity_hwmem_put(mem);
+}
+
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 19, 0))
+static int trinity_hwmem_dbuf_ops_attach(struct dma_buf *dbuf,
+                                        struct device *dev,
+                                        struct dma_buf_attachment *attachment)
+#else
+static int trinity_hwmem_dbuf_ops_attach(struct dma_buf *dbuf,
+                                        struct dma_buf_attachment *attachment)
+#endif
+{
+       struct trinity_hwmem *mem = dbuf_to_trnt_hwmem(dbuf);
+
+       /* Increase ref count of the backing storage */
+       mem = __trinity_hwmem_get(mem);
+       attachment->priv = mem;
+
+       return 0;
+}
+
+static struct sg_table *
+trinity_hwmem_dbuf_ops_map_dma_buf(struct dma_buf_attachment *attachment,
+                                  enum dma_data_direction dir)
+{
+       return NULL;
+}
+
+static void
+trinity_hwmem_dbuf_ops_unmap_dma_buf(struct dma_buf_attachment *attachment,
+                                    struct sg_table *sgt,
+                                    enum dma_data_direction dir)
+{
+}
+
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 19, 0))
+/**
+ * trinity_hwmem_dbuf_ops_map_atomic() - Implementation of the .map_atomic
+ *             callback in &struct dma_buf_ops
+ * @dbuf: A pointer to the instance of &struct dma_buf to map.
+ * @pgnum: The number of pages to map.
+ *
+ * This is a mandatory callback to be implemented in the current kernel version
+ * (v4.12.0), but not used Trinity internally and deprecated since v4.19.
+ */
+static inline void *trinity_hwmem_dbuf_ops_map_atomic(struct dma_buf *dbuf,
+                                                     unsigned long pgnum)
+{
+       return NULL;
+}
+
+/**
+ * trinity_hwmem_dbuf_ops_map() - Implementation of the .map callback in &struct
+ *             dma_buf_ops
+ * @dbuf: A pointer to the instance of &struct dma_buf to map.
+ * @pgnum: The number of pages to map.
+ *
+ * This is a mandatory callback to be implemented in the current kernel version
+ * (v4.12.0), but not used Trinity internally and deprecated since v4.19.
+ */
+static inline void *trinity_hwmem_dbuf_ops_map(struct dma_buf *dbuf,
+                                              unsigned long pgnum)
+{
+       return NULL;
+}
+#endif
+
+static void trinity_hwmem_vm_ops_open(struct vm_area_struct *vma)
+{
+       struct trinity_hwmem *mem = vma_to_trnt_hwmem(vma);
+
+       __trinity_hwmem_get(mem);
+}
+
+static void trinity_hwmem_vm_ops_close(struct vm_area_struct *vma)
+{
+       struct trinity_hwmem *mem = vma_to_trnt_hwmem(vma);
+
+       __trinity_hwmem_put(mem);
+}
+
+static const struct vm_operations_struct trinity_hwmem_vm_ops = {
+       .open = trinity_hwmem_vm_ops_open,
+       .close = trinity_hwmem_vm_ops_close,
+};
+
+static int32_t trinity_hwmem_dbuf_ops_mmap(struct dma_buf *dbuf,
+                                          struct vm_area_struct *vma)
+{
+       struct trinity_hwmem *mem;
+       int32_t ret;
+
+       if (!dbuf)
+               return -EINVAL;
+
+       mem = dbuf_to_trnt_hwmem(dbuf);
+       if (!mem)
+               return -EINVAL;
+
+       vma->vm_pgoff = 0;
+       if (mem->type == TRINITY_HWMEM_DMA_CONT)
+               ret = trinity_mmap_from_resv_mem(vma, mem->cookie,
+                                                mem->alc_size, mem->is_cont);
+       else
+               ret = dma_mmap_attrs(mem->dev, vma, mem->cookie, mem->addr,
+                                    mem->alc_size, mem->attrs);
+       if (ret)
+               return ret;
+
+       vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP;
+       vma->vm_private_data = mem;
+       vma->vm_ops = &trinity_hwmem_vm_ops;
+
+       vma->vm_ops->open(vma);
+
+       return 0;
+}
+
+static void trinity_hwmem_dbuf_ops_release(struct dma_buf *dbuf)
+{
+       struct trinity_hwmem *mem = dbuf_to_trnt_hwmem(dbuf);
+
+#ifdef CONFIG_TRINITY_FPGA
+       if (trinity_hwmem_iommu_unmap(mem->dev, mem->addr, mem->alc_size) < 0)
+               dev_warn(mem->dev, "Unable to unmap iommu mapping for 0x%llx",
+                        mem->addr);
+#endif
+
+       if (mem->type == TRINITY_HWMEM_DMA_CONT) {
+               struct trinity_resv_mem resv_mem;
+
+               resv_mem.vaddr = mem->cookie;
+               resv_mem.daddr = mem->addr;
+               resv_mem.size = mem->alc_size;
+
+               trinity_free_from_resv_mem(&resv_mem, mem->is_cont);
+       } else {
+               dma_free_attrs(mem->dev, mem->alc_size, mem->cookie, mem->addr,
+                              mem->attrs);
+       }
+       put_device(mem->dev);
+
+       mem->dbuf->priv = NULL;
+
+       kfree(mem);
+}
+
+static void *trinity_hwmem_dbuf_ops_vmap(struct dma_buf *dbuf)
+{
+       struct trinity_hwmem *mem;
+
+       if (!dbuf)
+               return NULL;
+
+       mem = dbuf_to_trnt_hwmem(dbuf);
+       if (!mem)
+               return NULL;
+
+       return mem->cookie;
+}
+
+static struct dma_buf_ops trinity_hwmem_dbuf_ops = {
+       .vmap = trinity_hwmem_dbuf_ops_vmap,
+       .attach = trinity_hwmem_dbuf_ops_attach,
+       .detach = trinity_hwmem_dbuf_ops_detach,
+       .map_dma_buf = trinity_hwmem_dbuf_ops_map_dma_buf,
+       .unmap_dma_buf = trinity_hwmem_dbuf_ops_unmap_dma_buf,
+       .release = trinity_hwmem_dbuf_ops_release,
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 19, 0))
+       .map = trinity_hwmem_dbuf_ops_map,
+       .map_atomic = trinity_hwmem_dbuf_ops_map_atomic,
+#endif
+       .mmap = trinity_hwmem_dbuf_ops_mmap,
+};
+
+static void *__trinity_hwmem_alloc(struct device *dev, const size_t size,
+                                  const enum dma_data_direction dir,
+                                  const enum trinity_hwmem_type type)
+{
+       size_t aligned_size = ALIGN(size, PAGE_SIZE);
+       struct trinity_hwmem *mem;
+       struct trinity_resv_mem resv_mem;
+       int ret;
+
+       if (WARN_ON(!dev))
+               return ERR_PTR(-EINVAL);
+
+       mem = kzalloc(sizeof(*mem), GFP_KERNEL);
+       if (!mem)
+               return ERR_PTR(-ENOMEM);
+
+       mem->dev = get_device(dev);
+       mem->req_size = size;
+       mem->alc_size = aligned_size;
+       mem->direction = dir;
+       mem->type = TRINITY_HWMEM_DMA_IOMMU;
+       mem->is_cont = (type == TRINITY_HWMEM_DMA_CONT);
+
+       mem->attrs |= DMA_ATTR_WRITE_COMBINE;
+       mem->attrs |= DMA_ATTR_SKIP_CPU_SYNC;
+
+       /**
+        * Trying to alloc memery from resv mem first regardless of hwmem type.
+        * But, the resv allocator should preserve a minimum space for vISA programs
+        * because they should be physically contiguous.
+        */
+       ret = trinity_alloc_from_resv_mem(aligned_size, &resv_mem,
+                                         mem->is_cont);
+       if (ret == 0) {
+               mem->addr = resv_mem.daddr;
+               mem->cookie = resv_mem.vaddr;
+               mem->type = TRINITY_HWMEM_DMA_CONT;
+       } else if (!mem->is_cont) {
+               mem->cookie = dma_alloc_attrs(dev, aligned_size, &mem->addr,
+                                             GFP_KERNEL, mem->attrs);
+       } else {
+               dev_err(mem->dev,
+                       "Unable alloc contiguous memory for program: %zu\n",
+                       size);
+       }
+
+       if (!mem->cookie) {
+               ret = -ENOMEM;
+               goto free_mem;
+       }
+
+       kref_init(&mem->refcnt);
+
+#ifdef CONFIG_TRINITY_FPGA
+       if (trinity_hwmem_iommu_map(mem->dev, mem->addr, mem->alc_size) < 0)
+               dev_warn(mem->dev, "Unable to map iommu mapping for 0x%llx",
+                        mem->addr);
+#endif
+
+       return mem;
+
+free_mem:
+       kfree(mem);
+
+       return ERR_PTR(ret);
+}
+
+static struct dma_buf *__trinity_hwmem_get_dmabuf(struct trinity_hwmem *mem,
+                                                 unsigned long flags)
+{
+       DEFINE_DMA_BUF_EXPORT_INFO(einfo);
+       struct dma_buf *dbuf;
+
+       einfo.ops = &trinity_hwmem_dbuf_ops;
+       einfo.size = mem->alc_size;
+       einfo.flags = flags;
+       einfo.priv = (void *)mem;
+
+       dbuf = dma_buf_export(&einfo);
+       if (IS_ERR(dbuf))
+               return dbuf;
+
+       /* Increase ref count of the backing storage */
+       dbuf->priv = (void *)__trinity_hwmem_get(mem);
+       mem->dbuf = dbuf;
+
+       return dbuf;
+}
+
+int32_t trinity_hwmem_alloc(struct device *dev, const size_t size,
+                           enum trinity_hwmem_type type)
+{
+       struct trinity_hwmem *mem;
+       struct dma_buf *dbuf;
+       int32_t ret;
+
+       mem = __trinity_hwmem_alloc(dev, size, DMA_BIDIRECTIONAL, type);
+       if (IS_ERR(mem))
+               return PTR_ERR(mem);
+
+       dbuf = __trinity_hwmem_get_dmabuf(mem, O_CLOEXEC | O_RDWR);
+       if (IS_ERR(dbuf)) {
+               ret = PTR_ERR(dbuf);
+               goto err_put_mem;
+       }
+
+       ret = dma_buf_fd(dbuf, O_CLOEXEC);
+       if (ret < 0)
+               goto err_put_mem;
+
+       return ret;
+
+err_put_mem:
+       __trinity_hwmem_put(mem);
+
+       return ret;
+}
+
+int32_t trinity_hwmem_free(struct device *dev, const int32_t fd)
+{
+       struct dma_buf *dbuf;
+
+       dbuf = dma_buf_get(fd);
+       if (!IS_ERR(dbuf)) {
+               struct trinity_hwmem *mem = dbuf_to_trnt_hwmem(dbuf);
+
+               /* Counter part of __trinity_hwmem_get() in __trinity_hwmem_get_dmabuf() */
+               __trinity_hwmem_put_dmabuf(dbuf);
+               /* Counter part of __trinity_hwmem_get() in __trinity_hwmem_alloc() */
+               __trinity_hwmem_put(mem);
+
+               dma_buf_put(dbuf);
+
+               return 0;
+       }
+
+       dev_err(dev,
+               "failed to free the dma_buf structure realted to fd with %ld\n",
+               PTR_ERR(dbuf));
+
+       return PTR_ERR(dbuf);
+}
+
+/**
+ * trinity_hwmem_import_dmabuf_begin() - Defines the beginning of a section to
+ *             import a given DMA buffer file descriptor.
+ * @dev: A pointer to the instance of the device to be attached the DMA buffer
+ * @dbuf_fd: The file descriptor of the DMA buffer to be imported.
+ * @import_info: If importing is successful, information such as the DMA
+ *             address, the virtual address which is mapped to the DMA address,
+ *             &struct dma_buf_attachment, a scatter-gather table, and &struct
+ *             dma_buf corresponding to the file descriptor will be passed
+ *             using this parameter.
+ *
+ * Return: 0 on success. Ohterwise, returns negative error.
+ */
+int32_t
+trinity_hwmem_import_dmabuf_begin(struct device *dev, const int32_t dbuf_fd,
+                                 struct trinity_hwmem_import *import_info)
+{
+       struct dma_buf_attachment *attachment;
+       struct dma_buf *buf;
+       struct trinity_hwmem *mem;
+       int32_t ret;
+
+       if (!import_info)
+               return -EINVAL;
+
+       buf = dma_buf_get(dbuf_fd);
+       if (IS_ERR(buf))
+               return PTR_ERR(buf);
+
+       attachment = dma_buf_attach(buf, dev);
+       if (IS_ERR(attachment)) {
+               ret = PTR_ERR(attachment);
+               goto err_dbuf_put;
+       }
+
+       mem = attachment->priv;
+       import_info->dma_addr = mem->addr;
+       import_info->addr = dma_buf_vmap(buf);
+       import_info->attachment = attachment;
+       import_info->buf = buf;
+
+       return 0;
+
+err_dbuf_put:
+       dma_buf_put(buf);
+
+       return ret;
+}
+
+/**
+ * trinity_hwmem_import_dmabuf_end() - Defines the ending of the section related
+ *             to the given pointer to &strut trinity_hwmem_import.
+ * @import_info: Importing information related to the section to be ended.
+ */
+void trinity_hwmem_import_dmabuf_end(struct trinity_hwmem_import *import_info)
+{
+       if (!import_info || !import_info->buf)
+               return;
+       dma_buf_vunmap(import_info->buf, import_info->addr);
+       dma_buf_detach(import_info->buf, import_info->attachment);
+       dma_buf_put(import_info->buf);
+}
diff --git a/drivers/misc/trinity/trinity_hwmem.h b/drivers/misc/trinity/trinity_hwmem.h
new file mode 100644 (file)
index 0000000..4c1f25a
--- /dev/null
@@ -0,0 +1,40 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/**
+ * Copyright (C) 2020 Samsung Electronics
+ * Copyright (C) 2020 Wook Song <wook16.song@samsung.com>
+ * Copyright (C) 2020 Dongju Chae <dongju.chae@samsung.com>
+ */
+
+#ifndef __DRIVERS_MISC_TRINITY_HWMEM_H__
+#define __DRIVERS_MISC_TRINITY_HWMEM_H__
+
+#include <linux/dma-buf.h>
+#include <linux/dma-mapping.h>
+#include <linux/kref.h>
+
+#include "trinity.h"
+
+/**
+ * struct trinity_hwmem_import - A data structure to maintin imported hwmem
+ *             (that is Trinity DMA buffer).
+ * @dma_addr: The physical DMA address of this DMA buffer.
+ * @addr: A virtual address of this DMA buffer.
+ * @attachment: A pointer to &struct dma_buf_attachment.
+ * @buf: &struct dma_buf that this hwmem wrapped.
+ */
+struct trinity_hwmem_import {
+       dma_addr_t dma_addr;
+       void *addr;
+       struct dma_buf_attachment *attachment;
+       struct dma_buf *buf;
+};
+
+int32_t trinity_hwmem_import_dmabuf_begin(struct device *, const int32_t,
+                                         struct trinity_hwmem_import *);
+void trinity_hwmem_import_dmabuf_end(struct trinity_hwmem_import *);
+
+int32_t trinity_hwmem_alloc(struct device *, const size_t,
+                           enum trinity_hwmem_type type);
+int32_t trinity_hwmem_free(struct device *, const int32_t);
+
+#endif /* __DRIVERS_MISC_TRINITY_HWMEM_H__ */
diff --git a/drivers/misc/trinity/trinity_hwmem_iommu_helper.c b/drivers/misc/trinity/trinity_hwmem_iommu_helper.c
new file mode 100644 (file)
index 0000000..5cf0ee5
--- /dev/null
@@ -0,0 +1,857 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * IOMMU device driver for Samsung Research NPU device family
+ *
+ * Copyright (C) 2020 Samsung Electronics
+ * Copyright (C) 2020 Wook Song <wook16.song@samsung.com>
+ * Copyright (C) 2020 Dongju Chae <dongju.chae@samsung.com>
+ */
+
+#include <linux/bitmap.h>
+#include <linux/dma-mapping.h>
+#include <linux/dmapool.h>
+#include <linux/init.h>
+#include <linux/io.h>
+#include <linux/memory.h>
+#include <linux/of.h>
+#include <linux/of_platform.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+#include <linux/spinlock_types.h>
+
+#include "trinity_hwmem_iommu_helper.h"
+
+#define CHECK_BITS(val, mask) ((val & mask) == mask)
+
+/* Register offsets for SRNPU-IOMMU (only for Triv2) */
+#define BASE_OFFSET_IOMMU_DLA (0x1000)
+#define BASE_OFFSET_IOMMU_DSP (0x2000)
+
+#define OFFSET_IOMMU_CTRL            (0x00)
+#define OFFSET_IOMMU_STATUS          (0x04)
+#define OFFSET_IOMMU_FLPT_BASE       (0x08)
+#define OFFSET_IOMMU_ALL_INVALIDATION (0x0C)
+#define OFFSET_IOMMU_VPN_INVALIDATION (0x10)
+#define OFFSET_IOMMU_IFLT_STAT       (0x14)
+#define OFFSET_IOMMU_IFLT_VA         (0x18)
+#define OFFSET_IOMMU_OFLT_STAT       (0x1C)
+#define OFFSET_IOMMU_OFLT_VA         (0x20)
+#define OFFSET_IOMMU_TLB_READ        (0x24)
+#define OFFSET_IOMMU_TLB_TAG         (0x28)
+#define OFFSET_IOMMU_TLB_PPN         (0x2C)
+
+#define MASK_PTE_MAPPED BIT_MASK(0)
+
+#define MASK_FLPTE_MAP_1M      BIT_MASK(1)
+#define MASK_FLPTE_PPN_MASK_1M GENMASK(35, 20)
+#define RSHFT_FLPTE_PPN_1M     (20)
+#define LSHFT_FLPTE_PPN_1M     (16)
+
+#define MASK_FLPTE_MAP_2M      BIT_MASK(2)
+#define MASK_FLPTE_PPN_MASK_2M GENMASK(35, 21)
+#define RSHFT_FLPTE_PPN_2M     (21)
+#define LSHFT_FLPTE_PPN_2M     (17)
+
+#define MASK_FLPTE_MAP_16M     GENMASK(2, 1)
+#define MASK_FLPTE_PPN_MASK_16M GENMASK(35, 24)
+#define RSHFT_FLPTE_PPN_16M    (24)
+#define LSHFT_FLPTE_PPN_16M    (20)
+
+#define MASK_SLPTE_MAP_4K      0
+#define MASK_SLPTE_PPN_MASK_4K GENMASK(35, 12)
+#define RSHFT_SLPTE_PPN_4K     (12)
+#define LSHFT_SLPTE_PPN_4K     (8)
+
+#define MASK_SLPTE_MAP_64K     BIT_MASK(1)
+#define MASK_SLPTE_PPN_MASK_64K GENMASK(35, 16)
+#define RSHFT_SLPTE_PPN_64K    (16)
+#define LSHFT_SLPTE_PPN_64K    (12)
+
+#define MASK_SLPT_BASE_TO_FLPTE         GENMASK(35, 10)
+#define RSHFT_SLPT_BASE_TO_FLPTE (10)
+#define LSHFT_SLPT_BASE_TO_FLPTE (6)
+#define MASK_FLPTE_TO_SLPT_BASE         GENMASK(31, 6)
+#define RSHFT_FLPTE_TO_SLPT_BASE (6)
+#define LSHFT_FLPTE_TO_SLPT_BASE (10)
+
+#define FLPT_DMAADDR_TO_REGVAL(x)  ((x >> 14) << 10)
+#define FLPT_NUM_PTES             BIT(12)
+#define FLPT_PTE_SIZE             (SZ_4)
+#define FLPT_SIZE                 (FLPT_NUM_PTES * FLPT_PTE_SIZE)
+#define SLPT_NUM_PTES             BIT(8)
+#define SLPT_PTE_SIZE             (SZ_4)
+#define SLPT_SIZE                 (SLPT_NUM_PTES * SLPT_PTE_SIZE)
+#define RSHFT_SIZE_TO_NUM_PTE     (20)
+#define RSHFT_SIZE_TO_NUM_LV2PTE   (12)
+#define MASK_IOVA_TO_VPN          GENMASK(31, 20)
+#define RSHFT_IOVA_TO_VPN         (20)
+#define MASK_IOVA_TO_LV2VPN       GENMASK(19, 12)
+#define RSHFT_IOVA_TO_LV2VPN      (12)
+#define MASK_IOVA_TO_TLBVPN       GENMASK(31, 14)
+#define MASK_IOVA_TO_PAGEOFFSET_4K GENMASK(11, 0)
+#define MASK_IOVA_TO_PAGEOFFSET_1M GENMASK(19, 0)
+#define RSHFT_IOVA_TO_TLBVPN      (14)
+#define LSHFT_TLBVPN_TO_REGVAL    (14)
+
+#define FLPT_PAGE_FAULT         BIT(3)
+#define SLPT_PAGE_FAULT         BIT(2)
+#define PTW_ACCESS_FAULT BIT(1)
+#define ATU_ACCESS_FAULT BIT(0)
+
+#define MASK_TLB_READ_BUF   BIT_MASK(13)
+#define MASK_TLB_READ_CH    BIT_MASK(12)
+#define MASK_TLB_READ_LANE  GENMASK(9, 8)
+#define LSHFT_TLB_READ_LANE (8)
+#define MASK_TLB_READ_LINE  GENMASK(7, 0)
+#define LSHFT_TLB_READ_LINE (0)
+
+#define MASK_TLB_PPN_PPN         GENMASK(31, 8)
+#define RSHFT_TLB_PPN_PPN        (8)
+#define MASK_TLB_TAG_VPN         GENMASK(31, 12)
+#define RSHFT_TLB_TAG_VPN        (12)
+#define MASK_TLB_TAG_PS                  GENMASK(6, 4)
+#define RSHFT_TLB_TAG_PS         (4)
+#define MASK_TLB_TAG_LANE_MAPPED  BIT_MASK(1)
+#define RSHFT_TLB_TAG_LANE_MAPPED (1)
+#define MASK_TLB_TAG_LINE_VALID          BIT_MASK(0)
+#define RSHFT_TLB_TAG_LINE_VALID  (0)
+
+struct trinity_hwmem_iommu_info {
+       struct device *dev;
+       spinlock_t lock;
+       void __iomem *regbase;
+       void *flpt;
+       dma_addr_t flpt_dma_addr;
+       struct dma_pool *slpt_cache;
+       void *dma_region_virt_base;
+       phys_addr_t dma_region_phys_base;
+       dma_addr_t dma_region_dma_base;
+       struct kref *slpt_refcnts;
+       struct list_head node;
+};
+
+static LIST_HEAD(iommu_info_list);
+static DEFINE_SPINLOCK(iommu_info_list_lock);
+
+static inline void enable_iommu(struct trinity_hwmem_iommu_info *info)
+{
+       uint32_t val = 0x1;
+
+       iowrite32(val,
+                 info->regbase + BASE_OFFSET_IOMMU_DLA + OFFSET_IOMMU_CTRL);
+       iowrite32(val,
+                 info->regbase + BASE_OFFSET_IOMMU_DSP + OFFSET_IOMMU_CTRL);
+}
+
+static inline uint32_t iova_to_vpn(size_t iova)
+{
+       uint32_t ret = iova;
+
+       ret &= MASK_IOVA_TO_VPN;
+       ret >>= RSHFT_IOVA_TO_VPN;
+
+       return ret;
+}
+
+static inline uint32_t iova_to_lv2vpn(dma_addr_t iova)
+{
+       iova &= MASK_IOVA_TO_LV2VPN;
+       iova >>= RSHFT_IOVA_TO_LV2VPN;
+
+       return iova;
+}
+
+static inline uint32_t iova_to_tlbvpn(dma_addr_t iova)
+{
+       iova &= MASK_IOVA_TO_TLBVPN;
+       iova >>= RSHFT_IOVA_TO_TLBVPN;
+
+       return iova;
+}
+
+static inline dma_addr_t virt_to_dma(void *virt_base, dma_addr_t da_base,
+                                    void *va)
+{
+       return da_base + ((size_t)va - (size_t)virt_base);
+}
+
+static inline void *dma_to_virt(void *virt_base, dma_addr_t da_base,
+                               dma_addr_t da)
+{
+       return virt_base + (da - da_base);
+}
+
+static inline uint32_t srnpu_iommu_get_pte(dma_addr_t addr, size_t size)
+{
+       uint32_t val = 0;
+
+       switch (size) {
+       case SZ_16M:
+               val |= MASK_FLPTE_MAP_16M;
+               addr &= MASK_FLPTE_PPN_MASK_16M;
+               val |= ((addr >> RSHFT_FLPTE_PPN_16M) << LSHFT_FLPTE_PPN_16M);
+               break;
+       case SZ_2M:
+               val |= MASK_FLPTE_MAP_2M;
+               addr &= MASK_FLPTE_PPN_MASK_2M;
+               val |= ((addr >> RSHFT_FLPTE_PPN_2M) << LSHFT_FLPTE_PPN_2M);
+               break;
+       case SZ_1M:
+               val |= MASK_FLPTE_MAP_1M;
+               addr &= MASK_FLPTE_PPN_MASK_1M;
+               val |= ((addr >> RSHFT_FLPTE_PPN_1M) << LSHFT_FLPTE_PPN_1M);
+               break;
+       case SZ_64K:
+               val |= MASK_SLPTE_MAP_64K;
+               addr &= MASK_SLPTE_PPN_MASK_64K;
+               val |= ((addr >> RSHFT_SLPTE_PPN_64K) << LSHFT_SLPTE_PPN_64K);
+               break;
+       case SZ_4K:
+               val |= MASK_SLPTE_MAP_4K;
+               addr &= MASK_SLPTE_PPN_MASK_4K;
+               val |= ((addr >> RSHFT_SLPTE_PPN_4K) << LSHFT_SLPTE_PPN_4K);
+               break;
+       default:
+               return 0;
+       }
+
+       val |= MASK_PTE_MAPPED;
+
+       return val;
+}
+
+static inline uint32_t flpte_to_slpt_base(const uint32_t flpte)
+{
+       uint32_t ret = 0;
+
+       ret = flpte & MASK_FLPTE_TO_SLPT_BASE;
+       ret >>= RSHFT_FLPTE_TO_SLPT_BASE;
+       ret <<= LSHFT_FLPTE_TO_SLPT_BASE;
+
+       return ret;
+}
+
+static inline phys_addr_t iova_to_phys(struct trinity_hwmem_iommu_info *info,
+                                      const dma_addr_t iova)
+{
+       phys_addr_t paddr = 0;
+       uint32_t vpn = iova_to_vpn(iova);
+       uint32_t *pte;
+
+       pte = &((uint32_t *)info->flpt)[vpn];
+
+       if (!pte || !(*pte & MASK_PTE_MAPPED))
+               return 0;
+
+       if (*pte & MASK_FLPTE_MAP_1M) {
+               paddr |= *pte;
+               paddr >>= LSHFT_FLPTE_PPN_1M;
+               paddr <<= RSHFT_FLPTE_PPN_1M;
+               paddr |= (iova & MASK_IOVA_TO_PAGEOFFSET_1M);
+       } else {
+               uint32_t *slpt_base = dma_to_virt(info->dma_region_virt_base,
+                                                 info->dma_region_dma_base,
+                                                 flpte_to_slpt_base(*pte));
+               uint32_t lv2vpn = iova_to_lv2vpn(iova);
+
+               paddr |= ioread32(&slpt_base[lv2vpn]);
+               paddr >>= LSHFT_SLPTE_PPN_4K;
+               paddr <<= RSHFT_SLPTE_PPN_4K;
+               paddr &= MASK_SLPTE_PPN_MASK_4K;
+               paddr |= (iova & MASK_IOVA_TO_PAGEOFFSET_4K);
+       }
+
+       return paddr;
+}
+
+static inline void *
+alloc_slpt_and_get_flpte(struct trinity_hwmem_iommu_info *info, uint32_t *flpte)
+{
+       dma_addr_t da;
+       void *slpt;
+
+       slpt = dma_alloc_wc(info->dev, SZ_1K, &da, GFP_KERNEL);
+       if (!slpt)
+               return ERR_PTR(-ENOMEM);
+       if (!IS_ALIGNED(da, SZ_1K)) {
+               dma_free_wc(info->dev, SZ_1K, slpt, da);
+               return ERR_PTR(-EINVAL);
+       }
+
+       da &= MASK_SLPT_BASE_TO_FLPTE;
+       da >>= RSHFT_SLPT_BASE_TO_FLPTE;
+       da <<= LSHFT_SLPT_BASE_TO_FLPTE;
+
+       *flpte = da;
+       memset(slpt, 0, SLPT_SIZE);
+
+       return slpt;
+}
+
+static struct trinity_hwmem_iommu_info *find_iommu_info(struct device *dev)
+{
+       struct trinity_hwmem_iommu_info *info = NULL;
+
+       spin_lock(&iommu_info_list_lock);
+       llist_for_each_entry (info, &iommu_info_list, node) {
+               if (info->dev == dev)
+                       break;
+       }
+       spin_unlock(&iommu_info_list_lock);
+
+       return info;
+}
+
+static int iommu_map(struct trinity_hwmem_iommu_info *info, dma_addr_t daddr,
+                    size_t size)
+{
+       uint32_t vpn = iova_to_vpn(daddr);
+       uint32_t lv2_vpn;
+       uint32_t flpte;
+       uint32_t val;
+       uint32_t *pte;
+       uint32_t *slpt;
+       uint32_t i, num_pte;
+
+       pte = &(((uint32_t *)info->flpt)[vpn]);
+       if (!pte) {
+               dev_err(info->dev, "%s: pte is NULL!\n", __func__);
+               return -EINVAL;
+       }
+
+       switch (size) {
+       case SZ_16M:
+       case SZ_2M:
+       case SZ_1M:
+               if (*pte & MASK_PTE_MAPPED) {
+                       dev_err(info->dev,
+                               "%s: iova 0x%llx is already mapped to phys 0x%llx\n",
+                               __func__, daddr, daddr);
+                       return -EADDRINUSE;
+               }
+
+               val = srnpu_iommu_get_pte(daddr, size);
+               if (!(val & MASK_PTE_MAPPED))
+                       return -EINVAL;
+
+               num_pte = size >> RSHFT_SIZE_TO_NUM_PTE;
+               for (i = 0; i < num_pte; ++i)
+                       iowrite32(val, &pte[i]);
+
+               break;
+       case SZ_64K:
+       case SZ_4K:
+               if (*pte & MASK_PTE_MAPPED) {
+                       slpt = dma_to_virt(info->dma_region_virt_base,
+                                          info->dma_region_dma_base,
+                                          flpte_to_slpt_base(*pte));
+                       kref_get(&info->slpt_refcnts[vpn]);
+                       /* kref_get(&srnpu_iommu_slpt_refcnts[vpn]); */
+               } else {
+                       slpt = alloc_slpt_and_get_flpte(info, &flpte);
+                       if (IS_ERR(slpt)) {
+                               return PTR_ERR(slpt);
+                       }
+                       /*bitlock here */
+                       kref_init(&info->slpt_refcnts[vpn]);
+                       /* kref_init(&srnpu_iommu_slpt_refcnts[vpn]); */
+                       iowrite32((flpte | MASK_PTE_MAPPED), pte);
+               }
+               lv2_vpn = iova_to_lv2vpn(daddr);
+               slpt = &slpt[lv2_vpn];
+
+               val = srnpu_iommu_get_pte(daddr, size);
+               if (!(val & MASK_PTE_MAPPED))
+                       return -EINVAL;
+
+               num_pte = size >> RSHFT_SIZE_TO_NUM_LV2PTE;
+               for (i = 0; i < num_pte; i++)
+                       iowrite32(val, &slpt[i]);
+
+               break;
+       default:
+               return -EINVAL;
+       }
+
+       return 0;
+}
+
+static void release_slpt(struct kref *kref)
+{
+       BUG_ON(kref_read(kref));
+}
+
+static int32_t iommu_unmap(struct trinity_hwmem_iommu_info *info,
+                          dma_addr_t daddr, size_t size)
+{
+       uint32_t vpn = iova_to_vpn(daddr);
+       uint32_t *pte;
+       uint32_t val;
+       uint32_t i, num_pte;
+
+       pte = &(((uint32_t *)info->flpt)[vpn]);
+
+       switch (size) {
+       case SZ_16M:
+       case SZ_2M:
+       case SZ_1M:
+               num_pte = size >> RSHFT_SIZE_TO_NUM_PTE;
+               for (i = 0; i < num_pte; ++i) {
+                       val = pte[i] ^ MASK_PTE_MAPPED;
+                       iowrite32(val, &pte[i]);
+               }
+               break;
+       case SZ_64K:
+       case SZ_4K: {
+               void *slpt_base = dma_to_virt(info->dma_region_virt_base,
+                                             info->dma_region_dma_base,
+                                             flpte_to_slpt_base(*pte));
+
+               uint32_t lv2vpn = iova_to_lv2vpn(daddr);
+               uint32_t *slpt;
+
+               slpt = &((uint32_t *)slpt_base)[lv2vpn];
+               num_pte = size >> RSHFT_SIZE_TO_NUM_LV2PTE;
+               for (i = 0; i < num_pte; i++) {
+                       val = slpt[i] ^ MASK_PTE_MAPPED;
+                       iowrite32(val, &slpt[i]);
+               }
+
+               if (kref_put(&info->slpt_refcnts[vpn], release_slpt)) {
+                       dma_free_wc(info->dev, PAGE_ALIGN(SLPT_SIZE), slpt_base,
+                                   flpte_to_slpt_base(*pte));
+                       val = (*pte) ^ MASK_PTE_MAPPED;
+                       iowrite32(val, pte);
+               }
+               break;
+       }
+       default:
+               return -EINVAL;
+       }
+
+       return 0;
+}
+
+int32_t trinity_hwmem_iommu_map(struct device *dev, dma_addr_t addr,
+                               size_t size)
+{
+       struct trinity_hwmem_iommu_info *info = NULL;
+       size_t size_remain = size;
+       size_t size_strd;
+
+       info = find_iommu_info(dev);
+       if (info == NULL) {
+               dev_err(dev, "Unable to find the iommu info");
+               return -ENOENT;
+       }
+
+       spin_lock(&info->lock);
+
+       while (size_remain > 0) {
+               if (size_remain >= SZ_16M)
+                       size_strd = SZ_16M;
+               else if (size_remain >= SZ_2M)
+                       size_strd = SZ_2M;
+               else if (size_remain >= SZ_1M)
+                       size_strd = SZ_1M;
+               else if (size_remain >= SZ_64K)
+                       size_strd = SZ_64K;
+               else
+                       size_strd = SZ_4K;
+
+               if (iommu_map(info, addr, size_strd) < 0)
+                       dev_warn(dev, "Unable to map iommu mapping for 0x%llx",
+                                addr);
+
+               size_remain -= size_strd;
+               addr += size_strd;
+       }
+
+       spin_unlock(&info->lock);
+
+       return 0;
+}
+
+int32_t trinity_hwmem_iommu_unmap(struct device *dev, dma_addr_t addr,
+                                 size_t size)
+{
+       struct trinity_hwmem_iommu_info *info;
+       size_t size_remain = size;
+       size_t size_strd;
+
+       info = find_iommu_info(dev);
+       if (info == NULL) {
+               dev_err(dev, "Unable to find the iommu info");
+               return -ENOENT;
+       }
+
+       spin_lock(&info->lock);
+
+       while (size_remain > 0) {
+               if (size_remain >= SZ_16M)
+                       size_strd = SZ_16M;
+               else if (size_remain >= SZ_2M)
+                       size_strd = SZ_2M;
+               else if (size_remain >= SZ_1M)
+                       size_strd = SZ_1M;
+               else if (size_remain >= SZ_64K)
+                       size_strd = SZ_64K;
+               else
+                       size_strd = SZ_4K;
+
+               if (iommu_unmap(info, addr, size_strd) < 0)
+                       dev_warn(dev,
+                                "Unable to unmap iommu mapping for 0x%llx",
+                                addr);
+
+               size_remain -= size_strd;
+               addr += size_strd;
+       }
+
+       spin_unlock(&info->lock);
+
+       return 0;
+}
+
+int32_t trinity_hwmem_iommu_init(struct device *dev, void __iomem *regbase)
+{
+       struct trinity_hwmem_iommu_info *info;
+       struct device_node *np = dev->of_node;
+       struct property *prop;
+       uint64_t dma_info[3];
+       int32_t err;
+
+       /**
+        *  In the case of the FPGA development board, let's asuume triv2 has
+        *  its own DMA memory region.
+        */
+       prop = of_find_property(np, "samsung,dma", NULL);
+       if (!prop)
+               return -ENODEV;
+
+       err = of_property_read_u64_array(np, "samsung,dma", dma_info, 3);
+       if (err < 0)
+               return -EINVAL;
+
+       info = kzalloc(sizeof(*info), GFP_KERNEL);
+       if (!info)
+               return -ENOMEM;
+
+       info->regbase = regbase;
+       info->dma_region_phys_base = dma_info[0];
+       info->dma_region_dma_base = dma_info[1];
+
+       spin_lock_init(&info->lock);
+
+       info->flpt = dma_alloc_wc(dev, PAGE_ALIGN(FLPT_SIZE),
+                                 &info->flpt_dma_addr, GFP_KERNEL);
+       if (!info->flpt) {
+               err = -ENOMEM;
+               goto err_free;
+       } else if (!IS_ALIGNED(info->flpt_dma_addr, SZ_16K)) {
+               err = -ENOMEM;
+               goto err_free_flptr;
+       }
+
+       info->slpt_refcnts = devm_kzalloc(
+               dev, sizeof(*info->slpt_refcnts) * FLPT_NUM_PTES, GFP_KERNEL);
+       info->dma_region_virt_base =
+               (void *)((size_t)info->flpt -
+                        (info->flpt_dma_addr - info->dma_region_dma_base));
+
+       info->dev = dev;
+
+       spin_lock(&iommu_info_list_lock);
+       list_add_tail(&info->node, &iommu_info_list);
+       spin_unlock(&iommu_info_list_lock);
+
+       /** Set FLPT base */
+       iowrite32(FLPT_DMAADDR_TO_REGVAL(info->flpt_dma_addr),
+                 info->regbase + BASE_OFFSET_IOMMU_DLA +
+                         OFFSET_IOMMU_FLPT_BASE);
+       iowrite32(FLPT_DMAADDR_TO_REGVAL(info->flpt_dma_addr),
+                 info->regbase + BASE_OFFSET_IOMMU_DSP +
+                         OFFSET_IOMMU_FLPT_BASE);
+
+       /** Enable IOMMU */
+       enable_iommu(info);
+
+       return 0;
+
+err_free_flptr:
+       dma_free_wc(dev, PAGE_ALIGN(FLPT_SIZE), info->flpt,
+                   info->flpt_dma_addr);
+err_free:
+       kfree(info);
+
+       return err;
+}
+
+void trinity_hwmem_iommu_flush(struct device *dev)
+{
+       struct trinity_hwmem_iommu_info *info = NULL;
+       u32 val = 1;
+
+       spin_lock(&iommu_info_list_lock);
+       llist_for_each_entry (info, &iommu_info_list, node) {
+               if (info->dev == dev)
+                       break;
+       }
+       spin_unlock(&iommu_info_list_lock);
+
+       if (info == NULL)
+               return;
+
+       spin_lock(&info->lock);
+       iowrite32(val, info->regbase + BASE_OFFSET_IOMMU_DLA +
+                              OFFSET_IOMMU_ALL_INVALIDATION);
+       iowrite32(val, info->regbase + BASE_OFFSET_IOMMU_DSP +
+                              OFFSET_IOMMU_ALL_INVALIDATION);
+       spin_unlock(&info->lock);
+}
+
+#ifdef CONFIG_TRINITY_FPGA_DEBUG
+static void iommu_dump_mapped(struct trinity_hwmem_iommu_info *info)
+{
+       void *slpt_base_vaddr;
+       u32 pte, ppn, slpt_base;
+       u32 i, j;
+
+       if (!info)
+               return;
+
+       dev_info(info->dev, "[IOMMU] PAGE TABLE DUMP");
+
+       for (i = 0; i < FLPT_NUM_PTES; i++) {
+               pte = ((u32 *)info->flpt)[i];
+
+               if (!(pte & MASK_PTE_MAPPED))
+                       continue;
+
+               if (CHECK_BITS(pte, MASK_FLPTE_MAP_16M)) {
+                       ppn = (pte >> LSHFT_FLPTE_PPN_16M);
+                       dev_info(info->dev, "\t[%u] 16M PTE(0x%x) PADDR (0x%x)",
+                                i, pte, ppn << RSHFT_FLPTE_PPN_16M);
+               } else if (CHECK_BITS(pte, MASK_FLPTE_MAP_2M)) {
+                       ppn = (pte >> LSHFT_FLPTE_PPN_2M);
+                       dev_info(info->dev, "\t[%u] 2M PTE(0x%x) PADDR (0x%x)",
+                                i, pte, ppn << RSHFT_FLPTE_PPN_2M);
+               } else if (CHECK_BITS(pte, MASK_FLPTE_MAP_1M)) {
+                       ppn = (pte >> LSHFT_FLPTE_PPN_1M);
+                       dev_info(info->dev, "\t[%u] 1M PTE(0x%x) PADDR (0x%x)",
+                                i, pte, ppn << RSHFT_FLPTE_PPN_1M);
+               } else {
+                       slpt_base = flpte_to_slpt_base(pte);
+                       slpt_base_vaddr =
+                               dma_to_virt(info->dma_region_virt_base,
+                                           info->dma_region_dma_base,
+                                           slpt_base);
+
+                       dev_info(info->dev,
+                                "\t[%u] 4K/64K PTE(0x%x) SLPT_BASE (0x%x)", i,
+                                pte, slpt_base);
+
+                       for (j = 0; j < SLPT_NUM_PTES; j++) {
+                               pte = ((u32 *)slpt_base_vaddr)[j];
+                               if (!(pte & MASK_PTE_MAPPED))
+                                       continue;
+
+                               if (CHECK_BITS(pte, MASK_SLPTE_MAP_64K)) {
+                                       ppn = (pte >> LSHFT_SLPTE_PPN_64K);
+                                       dev_info(
+                                               info->dev,
+                                               "\t\t[%u] 64K slpt pte (0x%x) paddr (0x%x)",
+                                               j, pte,
+                                               ppn << RSHFT_SLPTE_PPN_64K);
+                               } else {
+                                       ppn = (pte >> LSHFT_SLPTE_PPN_4K);
+                                       dev_info(
+                                               info->dev,
+                                               "\t\t[%u] 4K slpt pte (0x%x) paddr (0x%x)",
+                                               j, pte,
+                                               ppn << RSHFT_SLPTE_PPN_4K);
+                               }
+                       }
+               }
+       }
+}
+
+static void iommu_dump_tlb(struct trinity_hwmem_iommu_info *info)
+{
+       u32 val, tag, ppn;
+       u32 line, lane;
+       if (!info)
+               return;
+
+       dev_info(info->dev, "[IOMMU] IOMMU TLB (PBUF/INPUT) DUMP");
+
+       for (line = 0; line < 8; line++) {
+               for (lane = 0; lane < 4; lane++) {
+                       val = 0;
+                       val |= MASK_TLB_READ_BUF; /* PBUF */
+                       val |= ((lane << LSHFT_TLB_READ_LANE) &
+                               MASK_TLB_READ_LANE);
+                       val |= ((line << LSHFT_TLB_READ_LINE) &
+                               MASK_TLB_READ_LINE);
+
+                       iowrite32(val, info->regbase + BASE_OFFSET_IOMMU_DLA +
+                                              OFFSET_IOMMU_TLB_READ);
+
+                       ppn = ioread32(info->regbase + BASE_OFFSET_IOMMU_DLA +
+                                      OFFSET_IOMMU_TLB_PPN);
+                       tag = ioread32(info->regbase + BASE_OFFSET_IOMMU_DLA +
+                                      OFFSET_IOMMU_TLB_TAG);
+
+                       dev_info(
+                               info->dev,
+                               "\t[%u][%u] PPN (0x%x) VPN(0x%x) PS(%x) LANE_MAPPED(%u) LINE_VALID(%u)",
+                               line, lane,
+                               (u32)((ppn & MASK_TLB_PPN_PPN) >>
+                                     RSHFT_TLB_PPN_PPN),
+                               (u32)((tag & MASK_TLB_TAG_VPN) >>
+                                     RSHFT_TLB_TAG_VPN),
+                               (u32)((tag & MASK_TLB_TAG_PS) >>
+                                     RSHFT_TLB_TAG_PS),
+                               (u32)((tag & MASK_TLB_TAG_LANE_MAPPED) >>
+                                     RSHFT_TLB_TAG_LANE_MAPPED),
+                               (u32)((tag & MASK_TLB_TAG_LINE_VALID) >>
+                                     RSHFT_TLB_TAG_LINE_VALID));
+               }
+       }
+
+       dev_info(info->dev, "[IOMMU] IOMMU TLB (PBUF/OUTPUT) DUMP");
+
+       for (line = 0; line < 8; line++) {
+               for (lane = 0; lane < 4; lane++) {
+                       val = 0;
+                       val |= MASK_TLB_READ_BUF; /* PBUF */
+                       val |= MASK_TLB_READ_CH; /* Output */
+                       val |= ((lane << LSHFT_TLB_READ_LANE) &
+                               MASK_TLB_READ_LANE);
+                       val |= ((line << LSHFT_TLB_READ_LINE) &
+                               MASK_TLB_READ_LINE);
+
+                       iowrite32(val, info->regbase + BASE_OFFSET_IOMMU_DLA +
+                                              OFFSET_IOMMU_TLB_READ);
+
+                       ppn = ioread32(info->regbase + BASE_OFFSET_IOMMU_DLA +
+                                      OFFSET_IOMMU_TLB_PPN);
+                       tag = ioread32(info->regbase + BASE_OFFSET_IOMMU_DLA +
+                                      OFFSET_IOMMU_TLB_TAG);
+
+                       dev_info(
+                               info->dev,
+                               "\t[%u][%u] PPN (0x%x) VPN(0x%x) PS(%x) LANE_MAPPED(%u) LINE_VALID(%u)",
+                               line, lane,
+                               (u32)((ppn & MASK_TLB_PPN_PPN) >>
+                                     RSHFT_TLB_PPN_PPN),
+                               (u32)((tag & MASK_TLB_TAG_VPN) >>
+                                     RSHFT_TLB_TAG_VPN),
+                               (u32)((tag & MASK_TLB_TAG_PS) >>
+                                     RSHFT_TLB_TAG_PS),
+                               (u32)((tag & MASK_TLB_TAG_LANE_MAPPED) >>
+                                     RSHFT_TLB_TAG_LANE_MAPPED),
+                               (u32)((tag & MASK_TLB_TAG_LINE_VALID) >>
+                                     RSHFT_TLB_TAG_LINE_VALID));
+               }
+       }
+
+       dev_info(info->dev, "[IOMMU] IOMMU TLB (VBUF) DUMP");
+
+       for (line = 0; line < 8; line++) {
+               for (lane = 0; lane < 4; lane++) {
+                       val = 0;
+                       val |= ((lane << LSHFT_TLB_READ_LANE) &
+                               MASK_TLB_READ_LANE);
+                       val |= ((line << LSHFT_TLB_READ_LINE) &
+                               MASK_TLB_READ_LINE);
+
+                       iowrite32(val, info->regbase + BASE_OFFSET_IOMMU_DLA +
+                                              OFFSET_IOMMU_TLB_READ);
+
+                       ppn = ioread32(info->regbase + BASE_OFFSET_IOMMU_DLA +
+                                      OFFSET_IOMMU_TLB_PPN);
+                       tag = ioread32(info->regbase + BASE_OFFSET_IOMMU_DLA +
+                                      OFFSET_IOMMU_TLB_TAG);
+
+                       dev_info(
+                               info->dev,
+                               "\t[%u][%u] PPN (0x%x) VPN(0x%x) PS(%x) LANE_MAPPED(%u) LINE_VALID(%u)",
+                               line, lane,
+                               (u32)((ppn & MASK_TLB_PPN_PPN) >>
+                                     RSHFT_TLB_PPN_PPN),
+                               (u32)((tag & MASK_TLB_TAG_VPN) >>
+                                     RSHFT_TLB_TAG_VPN),
+                               (u32)((tag & MASK_TLB_TAG_PS) >>
+                                     RSHFT_TLB_TAG_PS),
+                               (u32)((tag & MASK_TLB_TAG_LANE_MAPPED) >>
+                                     RSHFT_TLB_TAG_LANE_MAPPED),
+                               (u32)((tag & MASK_TLB_TAG_LINE_VALID) >>
+                                     RSHFT_TLB_TAG_LINE_VALID));
+               }
+       }
+}
+
+void trinity_hwmem_iommu_print_status(struct device *dev)
+{
+       struct trinity_hwmem_iommu_info *info = NULL;
+       u32 IFLT_STAT, OFLT_STAT;
+
+       spin_lock(&iommu_info_list_lock);
+       llist_for_each_entry (info, &iommu_info_list, node) {
+               if (info->dev == dev)
+                       break;
+       }
+       spin_unlock(&iommu_info_list_lock);
+
+       if (!info) {
+               dev_err(dev, "Unable to find the iommu info");
+               return;
+       }
+
+       spin_lock(&info->lock);
+
+       IFLT_STAT = ioread32(info->regbase + BASE_OFFSET_IOMMU_DLA +
+                            OFFSET_IOMMU_IFLT_STAT);
+       if (IFLT_STAT & FLPT_PAGE_FAULT)
+               dev_err(dev,
+                       "[IOMMU] [IN] First-level page table fault detected at 0x%x",
+                       ioread32(info->regbase + BASE_OFFSET_IOMMU_DLA +
+                                OFFSET_IOMMU_IFLT_VA));
+       if (IFLT_STAT & SLPT_PAGE_FAULT)
+               dev_err(dev,
+                       "[IOMMU] [IN] Second-level page table fault detected at 0x%x",
+                       ioread32(info->regbase + BASE_OFFSET_IOMMU_DLA +
+                                OFFSET_IOMMU_IFLT_VA));
+       if (IFLT_STAT & PTW_ACCESS_FAULT)
+               dev_err(dev,
+                       "[IOMMU] [IN] Page Table Walker (PTW) access fault");
+       if (IFLT_STAT & ATU_ACCESS_FAULT)
+               dev_err(dev,
+                       "[IOMMU] [IN] Address Translation Unit (PTU) access fault");
+
+       OFLT_STAT = ioread32(info->regbase + BASE_OFFSET_IOMMU_DLA +
+                            OFFSET_IOMMU_OFLT_STAT);
+       if (OFLT_STAT & FLPT_PAGE_FAULT)
+               dev_err(dev,
+                       "[IOMMU] [OUT] First-level page table fault detected at 0x%x",
+                       ioread32(info->regbase + BASE_OFFSET_IOMMU_DLA +
+                                OFFSET_IOMMU_OFLT_VA));
+       if (OFLT_STAT & SLPT_PAGE_FAULT)
+               dev_err(dev,
+                       "[IOMMU] [OUT] Second-level page table fault detected at 0x%x",
+                       ioread32(info->regbase + BASE_OFFSET_IOMMU_DLA +
+                                OFFSET_IOMMU_OFLT_VA));
+       if (OFLT_STAT & PTW_ACCESS_FAULT)
+               dev_err(dev,
+                       "[IOMMU] [OUT] Page Table Walker (PTW) access fault");
+       if (OFLT_STAT & ATU_ACCESS_FAULT)
+               dev_err(dev,
+                       "[IOMMU] [OUT] Address Translation Unit (PTU) access fault");
+
+       iommu_dump_mapped(info);
+       iommu_dump_tlb(info);
+
+       spin_unlock(&info->lock);
+}
+#endif
diff --git a/drivers/misc/trinity/trinity_hwmem_iommu_helper.h b/drivers/misc/trinity/trinity_hwmem_iommu_helper.h
new file mode 100644 (file)
index 0000000..35321c9
--- /dev/null
@@ -0,0 +1,28 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/**
+ * Copyright (C) 2021 Samsung Electronics
+ * Copyright (C) 2021 Wook Song <wook16.song@samsung.com>
+ * Copyright (C) 2021 Dongju Chae <dongju.chae@samsung.com>
+ */
+
+#ifndef __DRIVERS_MISC_TRINITY_HWMEM_IOMMU_HELPER_H__
+#define __DRIVERS_MISC_TRINITY_HWMEM_IOMMU_HELPER_H__
+
+#include <linux/device.h>
+#include <linux/dma-mapping.h>
+#include <linux/list.h>
+#include <linux/spinlock.h>
+#include <linux/types.h>
+
+int32_t trinity_hwmem_iommu_init(struct device *dev, void __iomem *regbase);
+int32_t trinity_hwmem_iommu_map(struct device *dev, dma_addr_t addr,
+                               size_t size);
+int32_t trinity_hwmem_iommu_unmap(struct device *dev, dma_addr_t addr,
+                                 size_t size);
+void trinity_hwmem_iommu_flush(struct device *dev);
+
+#ifdef CONFIG_TRINITY_FPGA_DEBUG
+void trinity_hwmem_iommu_print_status(struct device *dev);
+#endif
+
+#endif /* __DRIVERS_MISC_TRINITY_HWMEM_IOMMU_HELPER_H__ */
diff --git a/drivers/misc/trinity/trinity_monitor.c b/drivers/misc/trinity/trinity_monitor.c
new file mode 100644 (file)
index 0000000..2bef138
--- /dev/null
@@ -0,0 +1,160 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/**
+ * trinity/trinity_monitor.c: Device status monitor
+ *
+ * Copyright (C) 2021 Samsung Electronics
+ * Copyright (C) 2021 Dongju Chae <dongju.chae@samsung.com>
+ */
+
+#include <linux/bitmap.h>
+#include <linux/kthread.h>
+#include <linux/mutex.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/wait.h>
+#include <uapi/linux/sched/types.h>
+
+#include "trinity_common.h"
+#include "trinity_monitor.h"
+
+#define EVENT_POOL_SIZE (4096)
+#define EVENT_TOTAL_NUM (EVENT_POOL_SIZE / sizeof(struct trinity_monitor_event))
+
+struct trinity_monitor {
+       DECLARE_BITMAP(bitmap, EVENT_TOTAL_NUM);
+       spinlock_t lock;
+       void *pool;
+
+       struct task_struct *thread;
+       struct device *dev;
+       struct llist_head event_queue;
+       wait_queue_head_t wait_queue;
+};
+
+static struct trinity_monitor monitor;
+
+static void trinity_monitor_clear_event(struct trinity_monitor_event *event)
+{
+       if (!event)
+               return;
+
+       spin_lock(&monitor.lock);
+       clear_bit(event->slot, monitor.bitmap);
+       spin_unlock(&monitor.lock);
+}
+
+static void trinity_monitor_handle_event(struct trinity_monitor_event *event)
+{
+       if (!event)
+               return;
+
+       if (atomic_read(&event->marker) != 1) {
+               ktime_t elapsed_time;
+
+               /* check event timeout */
+               elapsed_time =
+                       ktime_to_ms(ktime_sub(ktime_get(), event->start_time));
+               BUG_ON(elapsed_time < 0);
+
+               if (elapsed_time > event->timeout_ms) {
+                       if (event->cb)
+                               event->cb(event->cb_data);
+                       trinity_monitor_clear_event(event);
+                       return;
+               }
+       } else {
+               /* don't need to handle event callback */
+               trinity_monitor_clear_event(event);
+               return;
+       }
+
+       /* push back to the queue */
+       llist_add(&event->llist, &monitor.event_queue);
+}
+
+/* lock-less thread worker */
+static int trinity_monitor_worker(void *data)
+{
+       struct sched_param param = { .sched_priority = MAX_RT_PRIO - 1 };
+       struct trinity_monitor_event *event;
+       struct llist_node *first, *next;
+
+       sched_setscheduler(current, SCHED_RR, &param);
+repeat:
+       if (kthread_should_stop())
+               return 0;
+
+       /* check any incoming events */
+       first = llist_del_all(&monitor.event_queue);
+       for (; first; first = next) {
+               next = llist_next(first);
+               event = llist_entry(first, typeof(*event), llist);
+               trinity_monitor_handle_event(event);
+       }
+
+       /* check any invalid memory access from devices */
+       trinity_monitor_invalid_access();
+
+       /* wake up the worker thread per 100 ms */
+       wait_event_interruptible_timeout(monitor.wait_queue,
+                                        kthread_should_stop(), HZ / 10);
+       goto repeat;
+}
+
+struct trinity_monitor_event *trinity_monitor_get_event(void)
+{
+       struct trinity_monitor_event *event = NULL;
+       int slot;
+
+       spin_lock(&monitor.lock);
+       slot = find_first_zero_bit(monitor.bitmap, EVENT_TOTAL_NUM);
+       if (slot < EVENT_TOTAL_NUM) {
+               set_bit(slot, monitor.bitmap);
+               event = &((struct trinity_monitor_event *)monitor.pool)[slot];
+       }
+       spin_unlock(&monitor.lock);
+
+       if (event) {
+               memset(event, '\x00', sizeof(*event));
+               event->slot = slot;
+       }
+
+       return event;
+}
+
+int trinity_monitor_add_event(struct trinity_monitor_event *event)
+{
+       if (!event)
+               return -EINVAL;
+
+       llist_add(&event->llist, &monitor.event_queue);
+       return 0;
+}
+
+int trinity_monitor_init(struct device *dev)
+{
+       struct task_struct *thread;
+
+       if (!dev)
+               return -EINVAL;
+
+       monitor.dev = dev;
+       monitor.pool = devm_kzalloc(dev, PAGE_SIZE, GFP_KERNEL);
+       if (!monitor.pool)
+               return -ENOMEM;
+
+       bitmap_zero(monitor.bitmap, EVENT_TOTAL_NUM);
+       spin_lock_init(&monitor.lock);
+
+       init_llist_head(&monitor.event_queue);
+       init_waitqueue_head(&monitor.wait_queue);
+
+       thread = kthread_run(trinity_monitor_worker, NULL, "trinity_monitor");
+       if (IS_ERR(thread)) {
+               dev_err(dev, "Unable to create kthread");
+               return PTR_ERR(thread);
+       }
+       monitor.thread = thread;
+
+       return 0;
+}
diff --git a/drivers/misc/trinity/trinity_monitor.h b/drivers/misc/trinity/trinity_monitor.h
new file mode 100644 (file)
index 0000000..1877992
--- /dev/null
@@ -0,0 +1,49 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/**
+ * trinity/trinity_monitor.h: Device status monitor
+ *
+ * Copyright (C) 2021 Samsung Electronics
+ * Copyright (C) 2021 Dongju Chae <dongju.chae@samsung.com>
+ */
+
+#ifndef __TRINITY_MONITOR_H__
+#define __TRINITY_MONITOR_H__
+
+#include <linux/device.h>
+#include <linux/errno.h>
+#include <linux/ktime.h>
+#include <linux/llist.h>
+#include <linux/types.h>
+
+typedef void (*trinity_monitor_cb)(void *data);
+
+struct trinity_monitor_event {
+       struct llist_node llist;
+       unsigned long timeout_ms;
+       trinity_monitor_cb cb;
+       void *cb_data;
+       ktime_t start_time;
+       atomic_t marker;
+       int slot;
+};
+
+#ifdef CONFIG_TRINITY_MONITOR
+struct trinity_monitor_event *trinity_monitor_get_event(void);
+int trinity_monitor_add_event(struct trinity_monitor_event *event);
+int trinity_monitor_init(struct device *dev);
+#else
+static inline struct trinity_monitor_event *trinity_monitor_get_event(void)
+{
+       return NULL;
+}
+static inline int trinity_monitor_add_event(struct trinity_monitor_event *event)
+{
+       return 0;
+}
+static inline int trinity_monitor_init(struct device *dev)
+{
+       return 0;
+}
+#endif
+
+#endif /* __TRINITY_MONITOR_H__ */
diff --git a/drivers/misc/trinity/trinity_resv_mem.c b/drivers/misc/trinity/trinity_resv_mem.c
new file mode 100644 (file)
index 0000000..5f5b8a5
--- /dev/null
@@ -0,0 +1,231 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/**
+ * Reserved memory allocator for Trinity device drivers
+ *
+ * Copyright (C) 2021 Samsung Electronics
+ * Copyright (C) 2021 Dongju Chae <dongju.chae@samsung.com>
+ */
+
+#include "trinity_resv_mem.h"
+#include <linux/io.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+
+#define TRINITY_DRV_TO_RESV_POOL(drv)                                          \
+       ((struct trinity_resv_mem_pool *)drv->resv_pool)
+#define PROG_POOL_SIZE      (6 * 1024 * 1024) /* FIXME: 6MB */
+#define IS_INITIALIZED(pool) (atomic_read(&((pool)->initialized)) == 1)
+#define SET_INITIALIZED(pool)                                                  \
+       do {                                                                   \
+               atomic_set(&((pool)->initialized), 1);                         \
+       } while (0);
+
+#define UNSET_INITIALIZED(pool)                                                \
+       do {                                                                   \
+               atomic_set(&((pool)->initialized), 0);                         \
+       } while (0);
+
+struct trinity_resv_mem_pool {
+       phys_addr_t paddr_base;
+       dma_addr_t daddr_base;
+       void *vaddr_base;
+
+       size_t total_size;
+       size_t total_used;
+
+       unsigned int num_bits;
+       unsigned long *bitmap;
+
+       spinlock_t lock;
+       atomic_t initialized;
+};
+
+/* Trinity devices share this reserved memory pool */
+static struct trinity_resv_mem_pool resv_pool_cont;
+static struct trinity_resv_mem_pool resv_pool_norm;
+
+static int init_resv_mem(phys_addr_t paddr, dma_addr_t daddr, size_t size,
+                        struct trinity_resv_mem_pool *pool)
+{
+       unsigned int num_bits = size >> PAGE_SHIFT;
+       int bitmap_size = BITS_TO_LONGS(num_bits) * sizeof(long);
+       void *vaddr;
+
+       vaddr = ioremap_wc(paddr, size);
+       if (unlikely(!vaddr))
+               return -EINVAL;
+
+       pool->bitmap = kzalloc(bitmap_size, GFP_KERNEL);
+       if (unlikely(!pool->bitmap)) {
+               iounmap(vaddr);
+               return -ENOMEM;
+       }
+
+       pool->paddr_base = paddr;
+       pool->daddr_base = daddr;
+       pool->vaddr_base = vaddr;
+       pool->total_size = size;
+       pool->total_used = 0;
+       pool->num_bits = num_bits;
+
+       spin_lock_init(&pool->lock);
+       SET_INITIALIZED(pool);
+
+       return 0;
+}
+
+static void fini_resv_mem(struct trinity_resv_mem_pool *pool)
+{
+       if (!pool || unlikely(!IS_INITIALIZED(pool)))
+               return;
+
+       UNSET_INITIALIZED(pool);
+
+       iounmap(pool->vaddr_base);
+       kfree(pool->bitmap);
+       memset(pool, '\x00', sizeof(*pool));
+}
+
+int trinity_declare_resv_mem(phys_addr_t paddr, dma_addr_t daddr, size_t size)
+{
+       int ret;
+
+       /* skip if initialized before */
+       if (unlikely(IS_INITIALIZED(&resv_pool_cont) ||
+                    IS_INITIALIZED(&resv_pool_norm)))
+               return 0;
+
+       ret = init_resv_mem(paddr, daddr, PROG_POOL_SIZE, &resv_pool_cont);
+       if (ret != 0)
+               return ret;
+
+       /* FIXME: reserve the first page (not used) */
+       set_bit(0, resv_pool_cont.bitmap);
+       resv_pool_cont.total_used = PAGE_SIZE;
+
+       ret = init_resv_mem(paddr + PROG_POOL_SIZE, daddr + PROG_POOL_SIZE,
+                           size - PROG_POOL_SIZE, &resv_pool_norm);
+       if (ret != 0) {
+               fini_resv_mem(&resv_pool_cont);
+               return ret;
+       }
+
+       return 0;
+}
+
+void trinity_release_resv_mem()
+{
+       fini_resv_mem(&resv_pool_cont);
+       fini_resv_mem(&resv_pool_norm);
+}
+
+static int find_free_region(unsigned long *bitmap, unsigned long num_bits,
+                           unsigned long nr)
+{
+       unsigned long index, start, end, i;
+
+       start = 0;
+retry:
+       index = find_next_zero_bit(bitmap, num_bits, start);
+       end = index + nr;
+       if (end > num_bits)
+               return -ERANGE;
+
+       i = find_next_bit(bitmap, end, index);
+       if (i < end) {
+               start = i + 1;
+               goto retry;
+       }
+       return index;
+}
+
+int trinity_alloc_from_resv_mem(const size_t size, struct trinity_resv_mem *mem,
+                               bool is_cont)
+{
+       struct trinity_resv_mem_pool *pool;
+       dma_addr_t offset;
+       int pageno, err = 0;
+
+       pool = is_cont ? &resv_pool_cont : &resv_pool_norm;
+
+       if (unlikely(!IS_INITIALIZED(pool)))
+               return -EPERM;
+
+       if (unlikely(!IS_ALIGNED(size, PAGE_SIZE)))
+               return -EINVAL;
+
+       spin_lock(&pool->lock);
+
+       if (unlikely(size > pool->total_size)) {
+               err = -ENOMEM;
+               goto out;
+       }
+
+       pageno = find_free_region(pool->bitmap, pool->num_bits,
+                                 size >> PAGE_SHIFT);
+       if (unlikely(pageno < 0)) {
+               err = pageno;
+               goto out;
+       }
+       bitmap_set(pool->bitmap, pageno, size >> PAGE_SHIFT);
+       offset = (dma_addr_t)pageno << PAGE_SHIFT;
+
+       mem->daddr = pool->daddr_base + offset;
+       mem->vaddr = pool->vaddr_base + offset;
+       mem->size = size;
+
+       memset(mem->vaddr, '\x00', size);
+
+       pool->total_used += mem->size;
+out:
+       spin_unlock(&pool->lock);
+
+       return err;
+}
+
+void trinity_free_from_resv_mem(struct trinity_resv_mem *mem, bool is_cont)
+{
+       struct trinity_resv_mem_pool *pool;
+
+       pool = is_cont ? &resv_pool_cont : &resv_pool_norm;
+
+       if (unlikely(!IS_INITIALIZED(pool)))
+               return;
+
+       if (likely(mem->vaddr != NULL)) {
+               int page = (mem->vaddr - pool->vaddr_base) >> PAGE_SHIFT;
+               int len = mem->size >> PAGE_SHIFT;
+
+               spin_lock(&pool->lock);
+
+               bitmap_clear(pool->bitmap, page, len);
+               pool->total_used -= mem->size;
+
+               spin_unlock(&pool->lock);
+       }
+}
+
+int trinity_mmap_from_resv_mem(struct vm_area_struct *vma, void *vaddr,
+                              size_t size, bool is_cont)
+{
+       struct trinity_resv_mem_pool *pool;
+
+       pool = is_cont ? &resv_pool_cont : &resv_pool_norm;
+
+       if (likely(IS_INITIALIZED(pool))) {
+               unsigned long off = vma->vm_pgoff;
+               unsigned long pfn_base = PFN_DOWN(pool->paddr_base);
+               int start = (vaddr - pool->vaddr_base) >> PAGE_SHIFT;
+               int user_count = vma_pages(vma);
+               int count = PAGE_ALIGN(size) >> PAGE_SHIFT;
+
+               if (off < count && user_count <= count - off) {
+                       unsigned long pfn = pfn_base + start + off;
+                       return remap_pfn_range(vma, vma->vm_start, pfn,
+                                              user_count << PAGE_SHIFT,
+                                              vma->vm_page_prot);
+               }
+       }
+
+       return -ENXIO;
+}
diff --git a/drivers/misc/trinity/trinity_resv_mem.h b/drivers/misc/trinity/trinity_resv_mem.h
new file mode 100644 (file)
index 0000000..333cf8c
--- /dev/null
@@ -0,0 +1,32 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/**
+ * Reserved memory allocator for Trinity device drivers
+ *
+ * Copyright (C) 2021 Samsung Electronics
+ * Copyright (C) 2021 Dongju Chae <dongju.chae@samsung.com>
+ */
+
+#ifndef __DRIVERS_MISC_TRINITY_RESV_MEM_H__
+#define __DRIVERS_MISC_TRINITY_RESV_MEM_H__
+
+#include <linux/mm_types.h>
+#include <linux/types.h>
+
+struct trinity_resv_mem {
+       dma_addr_t daddr;
+       void *vaddr;
+       size_t size;
+       size_t orig_size;
+};
+
+int trinity_declare_resv_mem(phys_addr_t paddr, dma_addr_t daddr, size_t size);
+
+void trinity_release_resv_mem(void);
+
+int trinity_alloc_from_resv_mem(const size_t size, struct trinity_resv_mem *mem,
+                               bool is_prog);
+void trinity_free_from_resv_mem(struct trinity_resv_mem *mem, bool is_prog);
+int trinity_mmap_from_resv_mem(struct vm_area_struct *vma, void *vaddr,
+                              size_t size, bool is_prog);
+
+#endif /* __DRIVERS_MISC_TRINITY_RESV_MEM_H__ */
diff --git a/drivers/misc/trinity/trinity_sched.c b/drivers/misc/trinity/trinity_sched.c
new file mode 100644 (file)
index 0000000..a192057
--- /dev/null
@@ -0,0 +1,154 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * NPU req scheduler interface
+ *
+ * Copyright (C) 2021 Samsung Electronics
+ * Copyright (C) 2021 Dongju Chae <dongju.chae@samsung.com>
+ */
+
+#include <linux/spinlock.h>
+
+#include "trinity_common.h"
+#include "trinity_sched.h"
+
+#ifdef CONFIG_TRINITY_SCHED_SR
+extern int trinity_sched_init_sr(struct device *);
+extern void trinity_sched_exit_sr(void);
+#endif
+#ifdef CONFIG_TRINITY_SCHED_VD
+extern int trinity_sched_init_vd(struct device *);
+extern void trinity_sched_exit_vd(void);
+#endif
+
+static struct trinity_sched_desc *sched_table[SCHED_END];
+static DEFINE_SPINLOCK(sched_lock);
+
+void trinity_sched_register(enum trinity_sched_type type,
+                           struct trinity_sched_desc *desc)
+{
+       if (type >= SCHED_END)
+               return;
+
+       spin_lock(&sched_lock);
+       if (!sched_table[type])
+               sched_table[type] = desc;
+       spin_unlock(&sched_lock);
+}
+EXPORT_SYMBOL(trinity_sched_register);
+
+void trinity_sched_unregister(enum trinity_sched_type type,
+                             struct trinity_sched_desc *desc)
+{
+       if (type >= SCHED_END)
+               return;
+
+       spin_lock(&sched_lock);
+       if (sched_table[type] == desc)
+               sched_table[type] = NULL;
+       spin_unlock(&sched_lock);
+}
+EXPORT_SYMBOL(trinity_sched_unregister);
+
+struct trinity_sched_desc *trinity_sched_find(enum trinity_sched_type type)
+{
+       struct trinity_sched_desc *desc;
+       unsigned long flags;
+
+       if (type >= SCHED_END)
+               return NULL;
+
+       spin_lock_irqsave(&sched_lock, flags);
+       desc = sched_table[type];
+       spin_unlock_irqrestore(&sched_lock, flags);
+
+       return desc;
+}
+EXPORT_SYMBOL(trinity_sched_find);
+
+/**
+ * trinity_sched_run_req() - Schedules a req to the target from the req queue.
+ * @req_data: The data ptr to hold req information to be submitted.
+ *
+ * Return: 0 on success. Otherwise, returns negative error. Additional status of
+ * the submitted req could be passed by req->status.
+ */
+int32_t trinity_sched_run_req(void *req_data, void *sched_data)
+{
+       struct trinity_req *req = (struct trinity_req *)req_data;
+       struct trinity_driver *drv = req->drv;
+       int32_t err = 0;
+       int32_t ready;
+
+       /** setup is only allowed in ready state */
+       ready = drv->desc->get_state(drv);
+       if (ready != TRINITY_STATE_READY) {
+               dev_err(drv_to_dev_ptr(drv),
+                       "Cannot setup NPU when it's in a non-ready state");
+               err = -EPERM;
+               goto out;
+       }
+
+       if (req->stat->status != TRINITY_REQ_STATUS_PENDING &&
+           req->stat->status != TRINITY_REQ_STATUS_FINISHED) {
+               dev_err(drv_to_dev_ptr(drv), "Invalid req status: %d",
+                       req->stat->status);
+               err = -EINVAL;
+               goto out;
+       }
+
+       req->stat->status = TRINITY_REQ_STATUS_RUNNING;
+       err = drv->desc->invoke_req(drv, req, sched_data);
+out:
+       if (err != 0)
+               req->stat->status = TRINITY_REQ_STATUS_ERROR;
+
+       return err;
+}
+EXPORT_SYMBOL(trinity_sched_run_req);
+
+void trinity_sched_suspend()
+{
+       enum trinity_sched_type type;
+       struct trinity_sched_desc *desc;
+
+       for (type = SCHED_SR; type < SCHED_END; type++) {
+               desc = sched_table[type];
+               if (desc)
+                       desc->suspend();
+       }
+}
+
+void trinity_sched_resume()
+{
+       enum trinity_sched_type type;
+       struct trinity_sched_desc *desc;
+
+       for (type = SCHED_SR; type < SCHED_END; type++) {
+               desc = sched_table[type];
+               if (desc)
+                       desc->resume();
+       }
+}
+
+int32_t trinity_sched_init(struct device *dev)
+{
+#ifdef CONFIG_TRINITY_SCHED_SR
+       if (trinity_sched_init_sr(dev) < 0)
+               dev_warn(dev, "Unable to initialize SR task scheduler");
+#endif
+#ifdef CONFIG_TRINITY_SCHED_VD
+       if (trinity_sched_init_vd(dev) < 0)
+               dev_warn(dev, "Unable to initialize VD task scheduler");
+#endif
+       return 0;
+}
+
+void trinity_sched_exit()
+{
+#ifdef CONFIG_TRINITY_SCHED_SR
+       trinity_sched_exit_sr();
+#endif
+#ifdef CONFIG_TRINITY_SCHED_VD
+       trinity_sched_exit_vd();
+#endif
+}
diff --git a/drivers/misc/trinity/trinity_sched.h b/drivers/misc/trinity/trinity_sched.h
new file mode 100644 (file)
index 0000000..650213f
--- /dev/null
@@ -0,0 +1,44 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/**
+ * trinity/trinity_sched.h: Scheduler I/F header for trinity devices
+ *
+ * Copyright (C) 2021 Samsung Electronics
+ * Copyright (C) 2021 Dongju Chae <dongju.chae@samsung.com>
+ */
+
+#ifndef __TRINITY_SCHED_H__
+#define __TRINITY_SCHED_H__
+
+#include <linux/device.h>
+#include <linux/types.h>
+
+enum trinity_sched_type { SCHED_SR = 0, SCHED_VD, SCHED_END };
+typedef void (*remove_req_cb)(void *data, void *req);
+
+/**
+ * struct trinity_sched_desc - a structure for scheduler description
+ */
+struct trinity_sched_desc {
+       bool (*ready)(void);
+       int32_t (*submit)(void *data);
+       bool (*cancel)(void *data);
+       void (*suspend)(void);
+       void (*resume)(void);
+       void (*notify)(void *data, bool error);
+
+       struct trinity_req *(*find_req)(uint32_t dev_id, int req_id);
+       void (*remove_reqs)(void *data, remove_req_cb cb);
+       void (*test_run)(void *data, int req_id);
+};
+
+struct trinity_sched_desc *trinity_sched_find(enum trinity_sched_type type);
+void trinity_sched_register(enum trinity_sched_type type,
+                           struct trinity_sched_desc *desc);
+void trinity_sched_unregister(enum trinity_sched_type type,
+                             struct trinity_sched_desc *desc);
+int32_t trinity_sched_run_req(void *req_data, void *sched_data);
+void trinity_sched_suspend(void);
+void trinity_sched_resume(void);
+int32_t trinity_sched_init(struct device *dev);
+void trinity_sched_exit(void);
+#endif
diff --git a/drivers/misc/trinity/trinity_sched_sr.c b/drivers/misc/trinity/trinity_sched_sr.c
new file mode 100644 (file)
index 0000000..41cdab8
--- /dev/null
@@ -0,0 +1,291 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * SR's NPU req scheduler for Trinity device family
+ *
+ * Copyright (C) 2021 Samsung Electronics
+ * Copyright (C) 2021 Dongju Chae <dongju.chae@samsung.com>
+ */
+
+#include <linux/fs.h>
+#include <linux/kernel.h>
+#include <linux/kthread.h>
+#include <linux/miscdevice.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+
+#include "trinity_common.h"
+#include "trinity_sched.h"
+
+#define get_dev_ptr() (g_sched_priv.dev)
+
+struct trinity_sched_priv {
+       struct device *dev;
+       struct llist_head req_queue;
+       wait_queue_head_t wait_queue;
+       struct task_struct *sched_thread;
+       struct mutex lock;
+       unsigned long suspended;
+};
+
+static struct trinity_sched_priv g_sched_priv;
+
+/**
+ * @brief calculate priority using timeout
+ */
+static unsigned long trinity_sched_calc_priority(struct trinity_req *req)
+{
+       ktime_t elapsed_time;
+       int64_t priority;
+
+       if (req->input.config.timeout_ms == 0)
+               return 0; /** @todo need preemption */
+
+       elapsed_time = ktime_to_ms(ktime_sub(ktime_get(), req->time_started));
+       BUG_ON(elapsed_time < 0);
+
+       /**
+        * if the elapsed time exceeds the timeout of req,
+        * its priority value is set to the minimum (highest).
+        */
+       priority = req->input.config.timeout_ms - elapsed_time;
+       if (priority < 0)
+               priority = 0;
+
+       return priority;
+}
+
+/**
+ * @brief pick the top-priority req from req queue
+ */
+static struct trinity_req *trinity_sched_pick_req(struct llist_head *queue)
+{
+       struct trinity_req *req, *req_prev;
+       struct trinity_req *top_req, *top_req_prev;
+       int64_t top_priority = S64_MAX;
+       unsigned long priority;
+
+       if (llist_empty(queue))
+               return NULL;
+
+       req = req_prev = NULL;
+       top_req = top_req_prev = NULL;
+
+       /**
+        * llist is not a double linked list, and sorting is not easy
+        * because llist provides only limited APIs.
+        * it could be better than sorting if there are a few pending reqs.
+        * Note that each user application can submit only one req at once.
+        */
+       llist_for_each_entry (req, queue->first, llist) {
+               priority = trinity_sched_calc_priority(req);
+               if (top_priority > priority) {
+                       top_priority = priority;
+                       top_req = req;
+                       top_req_prev = req_prev;
+               }
+
+               req_prev = req;
+       }
+
+       if (top_req_prev) {
+               BUG_ON(!top_req);
+               top_req_prev->llist.next = top_req->llist.next;
+       } else {
+               /** it's first entry */
+               top_req = llist_entry(llist_del_first(queue), typeof(*(req)),
+                                     llist);
+       }
+
+       return top_req;
+}
+
+static struct llist_node *llist_last(struct llist_node *first)
+{
+       struct llist_node *last = first;
+
+       while (first && first->next) {
+               last = first->next;
+               first = last;
+       }
+
+       return last;
+}
+
+static int trinity_sched_thread_func(void *data)
+{
+       const unsigned long MAX_RETRY_COUNT = 100; /** around 100 ms */
+
+       struct llist_head local_queue;
+       struct llist_node *new_first;
+
+       init_llist_head(&local_queue);
+repeat:
+       if (kthread_should_stop())
+               return 0;
+
+       /** extract reqs from global queue without locking */
+       new_first = llist_del_all(&g_sched_priv.req_queue);
+       /** new and pending reqs could be located together */
+       if (new_first) {
+               struct llist_node *new_last = llist_last(new_first);
+               llist_add_batch(new_first, new_last, &local_queue);
+       }
+
+       /** flush reqs in the queue */
+       while (!llist_empty(&local_queue)) {
+               struct trinity_req *req;
+               int32_t ret;
+
+               /**
+                * pick the top-priority req from the queue.
+                * first and last node pointers are updated
+                */
+               req = trinity_sched_pick_req(&local_queue);
+               if (!req)
+                       goto repeat;
+
+               mutex_lock(&g_sched_priv.lock);
+               ret = trinity_sched_run_req(req, NULL);
+               if (ret == 0)
+                       req->scheduled = true;
+               mutex_unlock(&g_sched_priv.lock);
+
+               if (ret == -EBUSY) {
+                       if (req->submit_retry >= MAX_RETRY_COUNT) {
+                               /** give up to handling this req*/
+                               complete_all(&req->complete);
+                       } else {
+                               req->submit_retry++;
+                               /** push again and restart the loop */
+                               llist_add(&req->llist, &local_queue);
+                       }
+                       goto repeat;
+               } else if (ret != 0) {
+                       /** let's notify this unknown error */
+                       complete_all(&req->complete);
+               }
+       }
+
+       /** ensure the local queue is empty */
+       BUG_ON(!llist_empty(&local_queue));
+
+       wait_event_interruptible(
+               g_sched_priv.wait_queue,
+               kthread_should_stop() ||
+                       !llist_empty(&(g_sched_priv.req_queue)));
+       goto repeat;
+}
+
+static bool sr_sched_ready(void)
+{
+       return (test_bit(1, &g_sched_priv.suspended) != 1);
+}
+
+static int32_t sr_sched_submit(void *data)
+{
+       struct trinity_req *req = data;
+
+       if (!req)
+               return -EINVAL;
+
+       if (!sr_sched_ready())
+               return -EAGAIN;
+
+       llist_add(&req->llist, &g_sched_priv.req_queue);
+       wake_up(&g_sched_priv.wait_queue);
+
+       return 0;
+}
+
+static void sr_sched_notify(void *data, bool error)
+{
+       struct trinity_req *req = data;
+
+       req->scheduled = false;
+}
+
+static void sr_sched_suspend(void)
+{
+       if (!test_and_set_bit(1, &g_sched_priv.suspended))
+               mutex_lock(&g_sched_priv.lock);
+}
+
+static void sr_sched_resume(void)
+{
+       if (test_and_clear_bit(1, &g_sched_priv.suspended))
+               mutex_unlock(&g_sched_priv.lock);
+}
+
+static struct trinity_sched_desc trinity_sched_sr = {
+       .ready = sr_sched_ready,
+       .submit = sr_sched_submit,
+       .notify = sr_sched_notify,
+       .suspend = sr_sched_suspend,
+       .resume = sr_sched_resume,
+};
+
+static int trinity_sched_open(struct inode *inodep, struct file *filp)
+{
+       return 0;
+}
+
+static int trinity_sched_release(struct inode *inodep, struct file *filp)
+{
+       return 0;
+}
+
+static const struct file_operations trinity_sched_fops = {
+       .owner = THIS_MODULE,
+       .open = trinity_sched_open,
+       .release = trinity_sched_release,
+       .llseek = no_llseek,
+};
+
+static struct miscdevice trinity_sched_device = {
+       .minor = MISC_DYNAMIC_MINOR,
+       .name = "trinity_sched",
+       .fops = &trinity_sched_fops,
+};
+
+static int trinity_sched_init_priv(void)
+{
+       g_sched_priv.dev = trinity_sched_device.this_device;
+
+       init_llist_head(&g_sched_priv.req_queue);
+       init_waitqueue_head(&g_sched_priv.wait_queue);
+
+       g_sched_priv.sched_thread = kthread_run(trinity_sched_thread_func, NULL,
+                                               "trinity_sched_thread");
+       if (IS_ERR(g_sched_priv.sched_thread)) {
+               dev_err(get_dev_ptr(),
+                       "Failed to create a thread for scheduling reqs");
+               misc_deregister(&trinity_sched_device);
+               return PTR_ERR(g_sched_priv.sched_thread);
+       }
+
+       mutex_init(&g_sched_priv.lock);
+       clear_bit(1, &g_sched_priv.suspended);
+
+       return 0;
+}
+
+int trinity_sched_init_sr(struct device *dev)
+{
+       int err;
+
+       err = misc_register(&trinity_sched_device);
+       if (err) {
+               dev_err(dev,
+                       "Failed to register a misc device for scheduler\n");
+               return err;
+       }
+
+       trinity_sched_register(SCHED_SR, &trinity_sched_sr);
+       return trinity_sched_init_priv();
+}
+
+void trinity_sched_exit_sr(void)
+{
+       trinity_sched_unregister(SCHED_SR, &trinity_sched_sr);
+       misc_deregister(&trinity_sched_device);
+}
diff --git a/drivers/misc/trinity/trinity_stat.c b/drivers/misc/trinity/trinity_stat.c
new file mode 100644 (file)
index 0000000..4d1093a
--- /dev/null
@@ -0,0 +1,765 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Providing statistics for Samsung Research Trinity device family support
+ *
+ * Copyright (C) 2021 Samsung Electronics
+ * Copyright (C) 2021 Dongju Chae <dongju.chae@samsung.com>
+ */
+
+#include "trinity_stat.h"
+#include "trinity_common.h"
+#include "trinity_resv_mem.h"
+
+#include <linux/bitmap.h>
+#include <linux/list_bl.h>
+
+/* maximum number of stats configurable from sysfs */
+#define TRINITY_STAT_MAX_APPS        (128UL)
+#define TRINITY_STAT_MAX_REQS        (4096UL)
+#define TRINITY_STAT_MAX_REQS_PER_APP (128UL)
+
+/* default number of stats */
+#define TRINITY_STAT_DEF_APPS        (32UL)
+#define TRINITY_STAT_DEF_REQS        (128UL)
+#define TRINITY_STAT_DEF_REQS_PER_APP (32UL)
+
+/* per-device stat pool (drv->stat.pdata)*/
+struct trinity_stat_pool {
+       DECLARE_BITMAP(bitmap_app, TRINITY_STAT_MAX_APPS);
+       DECLARE_BITMAP(bitmap_req, TRINITY_STAT_MAX_REQS);
+
+       struct trinity_resv_mem mem_app;
+       struct trinity_resv_mem mem_req;
+
+       unsigned long max_stat_apps;
+       unsigned long max_stat_reqs;
+       unsigned long max_stat_reqs_per_app;
+
+       unsigned long cur_stat_apps;
+       unsigned long cur_stat_reqs;
+
+       struct trinity_driver *drv;
+};
+
+int trinity_stat_pool_init(struct trinity_driver *drv)
+{
+       struct device *dev = drv_to_dev_ptr(drv);
+       struct trinity_stat_pool *pool;
+
+       /* initialize stat pool */
+       pool = kzalloc(sizeof(*pool), GFP_KERNEL);
+       if (!pool) {
+               dev_err(dev, "Unable to allocate a stat pool for requests");
+               return -ENOMEM;
+       }
+       pool->drv = drv;
+
+       drv->stat.pdata = pool;
+
+       return 0;
+}
+
+void trinity_stat_pool_fini(struct trinity_driver *drv)
+{
+       struct trinity_stat_pool *pool = drv->stat.pdata;
+
+       if (!pool)
+               return;
+
+       trinity_free_from_resv_mem(&pool->mem_app, false);
+       trinity_free_from_resv_mem(&pool->mem_req, false);
+       kfree(pool);
+
+       drv->stat.pdata = NULL;
+}
+
+static void trinity_stat_pool_resize_apps(struct trinity_stat_pool *pool,
+                                         unsigned long num_apps)
+{
+       struct device *dev = drv_to_dev_ptr(pool->drv);
+       struct trinity_resv_mem mem;
+       unsigned long size;
+
+       if (num_apps > TRINITY_STAT_MAX_APPS) {
+               dev_err(dev, "The maximum number of stat apps: %lu",
+                       TRINITY_STAT_MAX_APPS);
+               return;
+       }
+
+       size = PAGE_ALIGN(sizeof(struct trinity_stat_app) * num_apps);
+       if (trinity_alloc_from_resv_mem(size, &mem, false) == 0) {
+               trinity_free_from_resv_mem(&pool->mem_app, false);
+
+               bitmap_fill(pool->bitmap_app, TRINITY_STAT_MAX_APPS);
+               bitmap_zero(pool->bitmap_app, num_apps);
+
+               pool->max_stat_apps = num_apps;
+               pool->mem_app = mem;
+       } else {
+               dev_warn(dev, "Unable to allocate stats for apps");
+       }
+}
+
+static void trinity_stat_pool_resize_reqs(struct trinity_stat_pool *pool,
+                                         unsigned long num_reqs)
+{
+       struct device *dev = drv_to_dev_ptr(pool->drv);
+       struct trinity_resv_mem mem;
+       unsigned long size;
+
+       if (num_reqs > TRINITY_STAT_MAX_REQS) {
+               dev_err(dev, "The maximum number of stat reqs: %lu",
+                       TRINITY_STAT_MAX_REQS);
+               return;
+       }
+
+       size = PAGE_ALIGN(sizeof(struct trinity_stat_req) * num_reqs);
+       if (trinity_alloc_from_resv_mem(size, &mem, false) == 0) {
+               trinity_free_from_resv_mem(&pool->mem_req, false);
+
+               bitmap_fill(pool->bitmap_req, TRINITY_STAT_MAX_REQS);
+               bitmap_zero(pool->bitmap_req, num_reqs);
+
+               pool->max_stat_reqs = num_reqs;
+               pool->mem_req = mem;
+       } else {
+               dev_warn(dev, "Unable to allocate stats for reqs");
+       }
+}
+
+static struct trinity_stat_app *
+trinity_stat_pool_get_app(struct trinity_driver *drv)
+{
+       struct trinity_stat *stat = &drv->stat;
+       struct trinity_stat_pool *pool = stat->pdata;
+       struct trinity_stat_app *app = NULL;
+       unsigned long slot;
+       bool retried = false;
+
+       /* ensured that the lock is acquired */
+retry:
+       slot = find_first_zero_bit(pool->bitmap_app, TRINITY_STAT_MAX_APPS);
+       if (slot < TRINITY_STAT_MAX_APPS) {
+               app = &((struct trinity_stat_app *)pool->mem_app.vaddr)[slot];
+               memset(app, '\x00', sizeof(*app));
+               set_bit(slot, pool->bitmap_app);
+               app->slot = slot;
+       } else if (!retried) {
+               /* retry after destroy old stats */
+               retried = true;
+               trinity_destroy_stats(stat, true);
+               goto retry;
+       } else {
+               dev_warn(drv_to_dev_ptr(pool->drv),
+                        "Please increase stat pool limit for apps");
+       }
+
+       return app;
+}
+
+static void trinity_stat_pool_put_app(struct trinity_driver *drv,
+                                     struct trinity_stat_app *app)
+{
+       struct trinity_stat_pool *pool = drv->stat.pdata;
+
+       /* ensured that the lock is acquired */
+       clear_bit(app->slot, pool->bitmap_app);
+}
+
+static struct trinity_stat_req *
+trinity_stat_pool_get_req(struct trinity_driver *drv)
+{
+       struct trinity_stat *stat = &drv->stat;
+       struct trinity_stat_pool *pool = stat->pdata;
+       struct trinity_stat_req *req = NULL;
+       unsigned long slot;
+       bool retried = false;
+
+       /* ensured that the lock is acquired */
+retry:
+       slot = find_first_zero_bit(pool->bitmap_req, TRINITY_STAT_MAX_REQS);
+       if (slot < TRINITY_STAT_MAX_REQS) {
+               req = &((struct trinity_stat_req *)pool->mem_req.vaddr)[slot];
+               memset(req, '\x00', sizeof(*req));
+               set_bit(slot, pool->bitmap_req);
+               req->slot = slot;
+       } else if (!retried) {
+               /* retry after destroy old stats */
+               retried = true;
+               trinity_destroy_stats(stat, true);
+               goto retry;
+       } else {
+               dev_warn(drv_to_dev_ptr(pool->drv),
+                        "Please increase stat pool limit for reqs");
+       }
+
+       return req;
+}
+
+static void trinity_stat_pool_put_req(struct trinity_driver *drv,
+                                     struct trinity_stat_req *req)
+{
+       struct trinity_stat_pool *pool = drv->stat.pdata;
+
+       /* ensured that the lock is acquired */
+       clear_bit(req->slot, pool->bitmap_req);
+}
+
+void trinity_stat_init(struct trinity_driver *drv)
+{
+       unsigned long i;
+
+       spin_lock_init(&drv->stat.lock);
+
+       INIT_LIST_HEAD(&drv->stat.list);
+       for (i = 0; i < TRINITY_STAT_HASH_SIZE; ++i)
+               INIT_HLIST_BL_HEAD(&drv->stat.hlist[i]);
+
+       trinity_stat_pool_init(drv);
+       /* initialize to default values */
+       trinity_stat_resize(drv, TRINITY_STAT_DEF_APPS, TRINITY_STAT_DEF_REQS,
+                           TRINITY_STAT_DEF_REQS_PER_APP);
+}
+
+void trinity_stat_fini(struct trinity_driver *drv)
+{
+       trinity_stat_resize(drv, 0, 0, 0);
+       trinity_stat_pool_fini(drv);
+}
+
+void trinity_stat_resize(struct trinity_driver *drv, unsigned long num_apps,
+                        unsigned long num_reqs, unsigned long num_reqs_per_app)
+{
+       struct trinity_stat *stat = &drv->stat;
+       struct trinity_stat_pool *pool = stat->pdata;
+       unsigned long i;
+
+       if (!pool)
+               return;
+
+       trinity_stat_lock(&drv->stat);
+
+       for (i = 0; i < TRINITY_STAT_HASH_SIZE; i++) {
+               struct trinity_stat_app *stat_app;
+               struct hlist_bl_node *hn;
+
+               hlist_bl_lock(&(stat->hlist[i]));
+               hlist_bl_for_each_entry (stat_app, hn, &(stat->hlist[i]),
+                                        hnode) {
+                       if (stat_app->status != TRINITY_APP_STATUS_TERMINATED) {
+                               dev_warn(drv_to_dev_ptr(drv),
+                                        "Still busy apps detected.. waiting");
+                               hlist_bl_unlock(&(stat->hlist[i]));
+                               goto unlock;
+                       }
+               }
+               hlist_bl_unlock(&(stat->hlist[i]));
+       }
+
+       trinity_destroy_stats(stat, true);
+
+       /* re-allocate each stat buffer */
+       if (num_apps > 0)
+               trinity_stat_pool_resize_apps(pool, num_apps);
+
+       if (num_reqs > 0)
+               trinity_stat_pool_resize_reqs(pool, num_reqs);
+
+       if (num_reqs_per_app > 0)
+               pool->max_stat_reqs_per_app = num_reqs_per_app;
+
+unlock:
+       trinity_stat_unlock(&drv->stat);
+}
+
+unsigned long trinity_stat_get_max_apps(struct trinity_driver *drv)
+{
+       struct trinity_stat_pool *pool = drv->stat.pdata;
+       unsigned long num;
+
+       if (!pool)
+               return 0;
+
+       trinity_stat_lock(&drv->stat);
+       num = pool->max_stat_apps;
+       trinity_stat_unlock(&drv->stat);
+
+       return num;
+}
+
+unsigned long trinity_stat_get_max_reqs(struct trinity_driver *drv)
+{
+       struct trinity_stat_pool *pool = drv->stat.pdata;
+       unsigned long num;
+
+       if (!pool)
+               return 0;
+
+       trinity_stat_lock(&drv->stat);
+       num = pool->max_stat_reqs;
+       trinity_stat_unlock(&drv->stat);
+
+       return num;
+}
+
+unsigned long trinity_stat_get_max_reqs_per_app(struct trinity_driver *drv)
+{
+       struct trinity_stat_pool *pool = drv->stat.pdata;
+       unsigned long num;
+
+       if (!pool)
+               return 0;
+
+       trinity_stat_lock(&drv->stat);
+       num = pool->max_stat_reqs_per_app;
+       trinity_stat_unlock(&drv->stat);
+
+       return num;
+}
+
+void trinity_stat_lock(struct trinity_stat *stat)
+{
+       if (stat)
+               spin_lock(&stat->lock);
+}
+
+void trinity_stat_unlock(struct trinity_stat *stat)
+{
+       if (stat)
+               spin_unlock(&stat->lock);
+}
+
+/**
+ * trinity_create_stat - create a stat structure for the opened app
+ *
+ * @drv: An instance of the trinity driver.
+ *
+ * Returns 0 on success. Ohterwise, returns negative error.
+ */
+static int trinity_create_stat_app(struct trinity_driver *drv)
+{
+       struct trinity_stat *stat = &drv->stat;
+       struct trinity_stat_pool *pool = stat->pdata;
+       struct trinity_stat_app *stat_app;
+       unsigned long key;
+
+       trinity_stat_lock(stat);
+       stat_app = trinity_stat_pool_get_app(drv);
+       if (IS_ERR_OR_NULL(stat_app)) {
+               trinity_stat_unlock(stat);
+               dev_err(drv_to_dev_ptr(drv),
+                       "Unable to allocate stat of request");
+               return -ENOMEM;
+       }
+
+       stat_app->parent = stat;
+       stat_app->app_id = trinity_get_app_id();
+       stat_app->total_alloc_mem = 0;
+       stat_app->total_freed_mem = 0;
+       stat_app->num_total_reqs = 0;
+       stat_app->num_kept_reqs = 0;
+       stat_app->num_active_reqs = 0;
+       stat_app->status = TRINITY_APP_STATUS_STARTED;
+
+       strncpy(stat_app->name, current->comm, TASK_COMM_LEN);
+       stat_app->name[TASK_COMM_LEN - 1] = '\x00';
+
+       INIT_HLIST_BL_NODE(&stat_app->hnode);
+       INIT_LIST_HEAD(&stat_app->reqs);
+
+       /* hash table for fast lookup */
+       key = hash_long(stat_app->app_id, TRINITY_STAT_HASH_BITS);
+
+       hlist_bl_lock(&(stat->hlist[key]));
+       hlist_bl_add_head(&stat_app->hnode, &(stat->hlist[key]));
+       hlist_bl_unlock(&(stat->hlist[key]));
+
+       /* list for ordered management */
+       list_add_tail(&stat_app->lnode, &stat->list);
+       pool->cur_stat_apps++;
+
+       /* Remove terminated stats if the number reaches the maximum */
+       trinity_destroy_stats(stat, false);
+
+       trinity_stat_unlock(stat);
+
+       return 0;
+}
+
+static void trinity_destroy_stat_req(struct trinity_stat_req *stat_req)
+{
+       struct trinity_stat_app *stat_app = stat_req->parent;
+       struct trinity_stat *stat = stat_app->parent;
+       struct trinity_driver *drv =
+               container_of(stat, struct trinity_driver, stat);
+
+       if (stat_req->profile)
+               drv->desc->destroy_profile(drv, stat_req->profile);
+       list_del(&stat_req->list);
+       trinity_stat_pool_put_req(drv, stat_req);
+}
+
+static void trinity_destroy_stat_reqs(struct trinity_stat_app *stat_app)
+{
+       struct trinity_stat_req *stat_req, *tmp;
+
+       list_for_each_entry_safe (stat_req, tmp, &stat_app->reqs, list)
+               trinity_destroy_stat_req(stat_req);
+}
+
+/**
+ * trinity_destroy_stats - destroy terminated stat structures
+ *
+ * @drv: An instance of the trinity driver.
+ */
+void trinity_destroy_stats(struct trinity_stat *stat, bool force)
+{
+       struct trinity_driver *drv =
+               container_of(stat, struct trinity_driver, stat);
+       struct trinity_stat_pool *pool = stat->pdata;
+       struct trinity_stat_app *stat_app;
+       struct hlist_bl_node *hn, *tmp;
+       int i;
+
+       /* lock should be acquired before */
+       if (!force && pool->cur_stat_apps <= pool->max_stat_apps)
+               return;
+
+       for (i = 0; i < TRINITY_STAT_HASH_SIZE; i++) {
+               hlist_bl_lock(&stat->hlist[i]);
+               hlist_bl_for_each_entry_safe (stat_app, hn, tmp,
+                                             &(stat->hlist[i]), hnode) {
+                       enum trinity_app_status status = stat_app->status;
+
+                       if (status == TRINITY_APP_STATUS_TERMINATED) {
+                               hlist_bl_del(&stat_app->hnode);
+                               list_del(&stat_app->lnode);
+
+                               pool->cur_stat_apps--;
+
+                               trinity_destroy_stat_reqs(stat_app);
+                               trinity_stat_pool_put_app(drv, stat_app);
+                       }
+               }
+               hlist_bl_unlock(&stat->hlist[i]);
+       }
+}
+
+static struct trinity_stat_app *
+trinity_get_stat_by_id(struct trinity_driver *drv, int32_t app_id)
+{
+       struct trinity_stat *stat = &drv->stat;
+       struct trinity_stat_app *stat_app;
+       struct hlist_bl_node *hn;
+       unsigned long key;
+
+       key = hash_long(app_id, TRINITY_STAT_HASH_BITS);
+
+       hlist_bl_lock(&stat->hlist[key]);
+       hlist_bl_for_each_entry (stat_app, hn, &stat->hlist[key], hnode) {
+               if (stat_app->app_id == app_id)
+                       goto out;
+       }
+       stat_app = NULL;
+out:
+       hlist_bl_unlock(&stat->hlist[key]);
+
+       return stat_app;
+}
+
+/**
+ * trinity_get_stat - get a stat structure for the target app
+ *
+ * @drv: An instance of the trinity driver.
+ *
+ * Returns 0 on success. Ohterwise, returns negative error.
+ *
+ * If the stat is not allocated yet, try to create and return it.
+ */
+struct trinity_stat_app *trinity_get_stat_app(struct trinity_driver *drv)
+{
+       struct trinity_stat *stat = &drv->stat;
+       struct trinity_stat_app *stat_app;
+       int app_id = trinity_get_app_id();
+
+retry:
+       trinity_stat_lock(stat);
+       stat_app = trinity_get_stat_by_id(drv, app_id);
+       trinity_stat_unlock(stat);
+
+       if (!IS_ERR_OR_NULL(stat_app))
+               return stat_app;
+
+       if (trinity_create_stat_app(drv) != 0)
+               return NULL;
+
+       goto retry;
+}
+
+void trinity_stat_app_set_status(struct trinity_driver *drv,
+                                enum trinity_app_status status)
+{
+       struct trinity_stat *stat = &drv->stat;
+       struct trinity_stat_app *stat_app;
+       int app_id = trinity_get_app_id();
+
+       trinity_stat_lock(stat);
+       stat_app = trinity_get_stat_by_id(drv, app_id);
+       trinity_stat_unlock(stat);
+
+       if (IS_ERR_OR_NULL(stat_app))
+               return;
+
+       stat_app->status = status;
+}
+
+int trinity_stat_append_req(struct trinity_driver *drv, struct trinity_req *req)
+{
+       struct trinity_stat *stat = &drv->stat;
+       struct trinity_stat_pool *pool = stat->pdata;
+       struct trinity_stat_app *stat_app;
+       struct trinity_stat_req *stat_req;
+
+       stat_app = trinity_get_stat_app(drv);
+       if (IS_ERR_OR_NULL(stat_app))
+               return -ENOMEM;
+
+       trinity_stat_lock(stat);
+       stat_req = trinity_stat_pool_get_req(drv);
+       if (!stat_req) {
+               trinity_stat_unlock(stat);
+               dev_err(drv_to_dev_ptr(drv),
+                       "Unable to allocate stat of request");
+               return -ENOMEM;
+       }
+
+       stat_req->parent = stat_app;
+       stat_req->app_id = stat_app->app_id;
+       stat_req->req_id = req->input.config.req_id;
+       stat_req->model_id = req->input.config.model_id;
+       stat_req->submitted = ktime_get();
+       stat_req->status = TRINITY_REQ_STATUS_PENDING;
+       stat_req->priority =
+               (enum trinity_req_priority)req->input.config.priority;
+       stat_req->is_kernel = req->is_kernel;
+
+       req->stat = stat_req;
+
+       list_add_tail(&stat_req->list, &stat_app->reqs);
+
+       /* don't count kernel requests */
+       if (!req->is_kernel) {
+               if (stat_app->num_kept_reqs == pool->max_stat_reqs_per_app) {
+                       struct trinity_stat_req *old_stat;
+
+                       old_stat = list_first_entry(
+                               &stat_app->reqs, struct trinity_stat_req, list);
+                       /* skip any kernel or unfinished request */
+                       while (old_stat->is_kernel ||
+                              (old_stat->status !=
+                                       TRINITY_REQ_STATUS_FINISHED &&
+                               old_stat->status != TRINITY_REQ_STATUS_ERROR))
+                               old_stat = list_next_entry(old_stat, list);
+
+                       BUG_ON(old_stat == NULL);
+
+                       trinity_destroy_stat_req(old_stat);
+                       stat_app->num_total_reqs--;
+               } else {
+                       /* total number of user requests kepted */
+                       stat_app->num_kept_reqs++;
+               }
+       }
+
+       /* total number of all requests (including finished ones) */
+       stat_app->num_total_reqs++;
+       /* total number of active requests (running or pending) */
+       stat_app->num_active_reqs++;
+
+       trinity_stat_unlock(stat);
+       return 0;
+}
+
+void trinity_stat_remove_req(struct trinity_driver *drv,
+                            struct trinity_req *req, bool rollback)
+{
+       struct trinity_stat *stat = &drv->stat;
+       struct trinity_stat_req *stat_req = req->stat;
+       struct trinity_stat_app *stat_app = stat_req->parent;
+
+       trinity_stat_lock(stat);
+
+       trinity_destroy_stat_req(stat_req);
+
+       if (!req->is_kernel) {
+               BUG_ON(stat_app->num_kept_reqs == 0);
+               stat_app->num_kept_reqs--;
+       }
+
+       if (rollback) {
+               BUG_ON(stat_app->num_total_reqs == 0);
+               stat_app->num_total_reqs--;
+               BUG_ON(stat_app->num_active_reqs == 0);
+               stat_app->num_active_reqs--;
+       }
+
+       trinity_stat_unlock(stat);
+}
+
+void trinity_stat_finish_req(struct trinity_driver *drv,
+                            struct trinity_req *req)
+{
+       struct trinity_stat *stat = &drv->stat;
+       struct trinity_stat_req *stat_req = req->stat;
+       struct trinity_stat_app *stat_app = stat_req->parent;
+
+       trinity_stat_lock(stat);
+       if (stat_app->num_active_reqs != 0)
+               stat_app->num_active_reqs--;
+       else
+               dev_err(drv_to_dev_ptr(drv),
+                       "Fail to keep track of the active reqs");
+       trinity_stat_unlock(stat);
+}
+
+static void copy_stat_app_ioctl(struct trinity_stat_app *stat_app,
+                               struct trinity_ioctl_stat_app *ioctl_stat_app)
+{
+       ioctl_stat_app->app_id = stat_app->app_id;
+       ioctl_stat_app->status = stat_app->status;
+       ioctl_stat_app->num_total_reqs = stat_app->num_total_reqs;
+       ioctl_stat_app->num_active_reqs = stat_app->num_active_reqs;
+       ioctl_stat_app->total_alloc_mem = stat_app->total_alloc_mem;
+       ioctl_stat_app->total_freed_mem = stat_app->total_freed_mem;
+
+       strncpy(ioctl_stat_app->name, stat_app->name, TASK_COMM_LEN);
+       ioctl_stat_app->name[TASK_COMM_LEN - 1] = '\x00';
+}
+
+static void copy_stat_req_ioctl(struct trinity_stat_req *stat_req,
+                               struct trinity_ioctl_stat_req *ioctl_stat_req)
+{
+       ktime_t cur_time = ktime_get();
+       ktime_t submitted, scheduled, completed;
+
+       submitted = stat_req->submitted;
+       scheduled = stat_req->scheduled ? stat_req->scheduled : cur_time;
+       completed = stat_req->completed ? stat_req->completed : cur_time;
+
+       ioctl_stat_req->req_id = stat_req->req_id;
+       ioctl_stat_req->model_id = stat_req->model_id;
+       ioctl_stat_req->priority = stat_req->priority;
+       ioctl_stat_req->status = stat_req->status;
+
+       if (stat_req->priority == TRINITY_REQ_PRIORITY_HIGH)
+               ioctl_stat_req->sched_time = 0;
+       else
+               ioctl_stat_req->sched_time = TIME_DIFF(scheduled, submitted);
+       ioctl_stat_req->infer_time = TIME_DIFF(completed, scheduled);
+}
+
+void trinity_stat_app_copy_ioctl(struct trinity_driver *drv,
+                                struct trinity_ioctl_stat_app *ioctl_stat_app)
+{
+       struct trinity_stat *stat = &drv->stat;
+       struct trinity_stat_app *stat_app;
+       int app_id = trinity_get_app_id();
+
+       trinity_stat_lock(stat);
+
+       stat_app = trinity_get_stat_by_id(drv, app_id);
+       if (IS_ERR_OR_NULL(stat_app)) {
+               ioctl_stat_app->app_id = app_id;
+               ioctl_stat_app->status = TRINITY_APP_STATUS_PENDING;
+               ioctl_stat_app->num_total_reqs = 0;
+               ioctl_stat_app->num_active_reqs = 0;
+               ioctl_stat_app->total_alloc_mem = 0;
+               ioctl_stat_app->total_freed_mem = 0;
+
+               strncpy(ioctl_stat_app->name, current->comm, TASK_COMM_LEN);
+               ioctl_stat_app->name[TASK_COMM_LEN - 1] = '\x00';
+       } else {
+               copy_stat_app_ioctl(stat_app, ioctl_stat_app);
+       }
+
+       trinity_stat_unlock(stat);
+}
+
+void trinity_stat_apps_copy_ioctl(
+       struct trinity_driver *drv,
+       struct trinity_ioctl_stat_apps *ioctl_stat_apps)
+{
+       struct trinity_stat *stat = &drv->stat;
+       struct trinity_ioctl_stat_app *ioctl_stat_app;
+       struct trinity_stat_app *stat_app;
+       uint32_t idx = 0;
+
+       trinity_stat_lock(stat);
+
+       list_for_each_entry (stat_app, &stat->list, lnode) {
+               if (idx >= TRINITY_APP_STAT_MAX)
+                       break;
+               ioctl_stat_app = &ioctl_stat_apps->stat[idx++];
+               copy_stat_app_ioctl(stat_app, ioctl_stat_app);
+       }
+       ioctl_stat_apps->num_apps = idx;
+
+       trinity_stat_unlock(stat);
+}
+
+void trinity_stat_reqs_copy_ioctl(
+       struct trinity_driver *drv,
+       struct trinity_ioctl_stat_reqs *ioctl_stat_reqs)
+{
+       struct trinity_stat *stat = &drv->stat;
+       struct trinity_ioctl_stat_req *ioctl_stat_req;
+       struct trinity_stat_app *stat_app;
+       struct trinity_stat_req *stat_req;
+       uint32_t idx = 0;
+
+       trinity_stat_lock(stat);
+       stat_app = trinity_get_stat_by_id(drv, ioctl_stat_reqs->app_id);
+       if (IS_ERR_OR_NULL(stat_app)) {
+               ioctl_stat_reqs->num_reqs = 0;
+               trinity_stat_unlock(stat);
+               return;
+       }
+
+       list_for_each_entry (stat_req, &stat_app->reqs, list) {
+               if (idx >= TRINITY_REQ_STAT_MAX)
+                       break;
+               ioctl_stat_req = &ioctl_stat_reqs->stat[idx++];
+               copy_stat_req_ioctl(stat_req, ioctl_stat_req);
+       }
+       ioctl_stat_reqs->num_reqs = idx;
+
+       trinity_stat_unlock(stat);
+}
+
+void trinity_stat_app_total_alloc(struct trinity_driver *drv, size_t size)
+{
+       struct trinity_stat *stat = &drv->stat;
+       struct trinity_stat_app *stat_app;
+
+       stat_app = trinity_get_stat_app(drv);
+       if (IS_ERR_OR_NULL(stat_app))
+               return;
+
+       trinity_stat_lock(stat);
+       stat_app->total_alloc_mem += size;
+       trinity_stat_unlock(stat);
+}
+
+void trinity_stat_app_total_freed(struct trinity_driver *drv, size_t size)
+{
+       struct trinity_stat *stat = &drv->stat;
+       struct trinity_stat_app *stat_app;
+
+       stat_app = trinity_get_stat_app(drv);
+       if (IS_ERR_OR_NULL(stat_app))
+               return;
+
+       trinity_stat_lock(stat);
+       stat_app->total_freed_mem += size;
+       trinity_stat_unlock(stat);
+}
diff --git a/drivers/misc/trinity/trinity_stat.h b/drivers/misc/trinity/trinity_stat.h
new file mode 100644 (file)
index 0000000..bf0fb97
--- /dev/null
@@ -0,0 +1,53 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/**
+ * trinity/trinity_stat.h: Statistics header for trinity devices
+ *
+ * Copyright (C) 2021 Samsung Electronics
+ * Copyright (C) 2021 Dongju Chae <dongju.chae@samsung.com>
+ */
+
+#ifndef __TRINITY_STAT_H__
+#define __TRINITY_STAT_H__
+
+#include "trinity_common.h"
+
+void trinity_stat_init(struct trinity_driver *drv);
+void trinity_stat_fini(struct trinity_driver *drv);
+void trinity_stat_resize(struct trinity_driver *drv, unsigned long num_apps,
+                        unsigned long num_reqs,
+                        unsigned long num_reqs_per_app);
+
+void trinity_stat_lock(struct trinity_stat *stat);
+void trinity_stat_unlock(struct trinity_stat *stat);
+void trinity_destroy_stats(struct trinity_stat *stat, bool force);
+
+unsigned long trinity_stat_get_max_apps(struct trinity_driver *drv);
+unsigned long trinity_stat_get_max_reqs(struct trinity_driver *drv);
+unsigned long trinity_stat_get_max_reqs_per_app(struct trinity_driver *drv);
+
+struct trinity_stat_app *trinity_get_stat_app(struct trinity_driver *drv);
+
+void trinity_stat_app_total_alloc(struct trinity_driver *drv, size_t size);
+void trinity_stat_app_total_freed(struct trinity_driver *drv, size_t size);
+void trinity_stat_app_set_status(struct trinity_driver *drv,
+                                enum trinity_app_status status);
+
+int trinity_stat_append_req(struct trinity_driver *drv,
+                           struct trinity_req *req);
+void trinity_stat_remove_req(struct trinity_driver *drv,
+                            struct trinity_req *req, bool rollback);
+void trinity_stat_finish_req(struct trinity_driver *drv,
+                            struct trinity_req *req);
+
+void trinity_stat_app_copy_ioctl(struct trinity_driver *drv,
+                                struct trinity_ioctl_stat_app *ioctl_stat_app);
+
+void trinity_stat_apps_copy_ioctl(
+       struct trinity_driver *drv,
+       struct trinity_ioctl_stat_apps *ioctl_stat_apps);
+
+void trinity_stat_reqs_copy_ioctl(
+       struct trinity_driver *drv,
+       struct trinity_ioctl_stat_reqs *ioctl_stat_reqs);
+
+#endif /* __TRINITY_STAT_H__ */
diff --git a/drivers/misc/trinity/trinity_sysfs.c b/drivers/misc/trinity/trinity_sysfs.c
new file mode 100644 (file)
index 0000000..4cb77e1
--- /dev/null
@@ -0,0 +1,885 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Sysfs inferface for Samsung Research Trinity device family
+ *
+ * Copyright (C) 2020 Samsung Electronics
+ * Copyright (C) 2020 Wook Song <wook16.song@samsung.com>
+ * Copyright (C) 2020 Dongju Chae <dongju.chae@samsung.com>
+ */
+
+#include <linux/device.h>
+#include <linux/sysfs.h>
+
+#include "trinity_common.h"
+#include "trinity_sched.h"
+#include "trinity_stat.h"
+
+enum trinity_sysfs_msg {
+       SYSFS_MSG_NORMAL = 0,
+       SYSFS_MSG_PROLOGUE,
+       SYSFS_MSG_EPILOGUE,
+       SYSFS_MSG_EMIT,
+};
+
+static ssize_t verbose_store(struct device *dev, struct device_attribute *attr,
+                            const char *buf, size_t count)
+{
+       struct platform_device *pdev;
+       struct trinity_driver *drv;
+       int32_t ret = 0;
+
+       pdev = container_of(dev, struct platform_device, dev);
+       drv = platform_get_drvdata(pdev);
+
+       ret = kstrtoul(buf, 10, &drv->verbose);
+       if (ret != 0)
+               return -EINVAL;
+
+       return (ssize_t)count;
+}
+
+static ssize_t verbose_show(struct device *dev, struct device_attribute *attr,
+                           char *buf)
+{
+       struct platform_device *pdev;
+       struct trinity_driver *drv;
+
+       pdev = container_of(dev, struct platform_device, dev);
+       drv = platform_get_drvdata(pdev);
+
+       if (drv == NULL)
+               return 0;
+
+       return snprintf(buf, PAGE_SIZE, "%lu\n", drv->verbose);
+}
+static DEVICE_ATTR_RW(verbose);
+
+static ssize_t debugfs_max_store(struct device *dev,
+                                struct device_attribute *attr, const char *buf,
+                                size_t count)
+{
+       struct platform_device *pdev;
+       struct trinity_driver *drv;
+       unsigned long msg_max;
+       int32_t ret = 0;
+
+       pdev = container_of(dev, struct platform_device, dev);
+       drv = platform_get_drvdata(pdev);
+
+       ret = kstrtoul(buf, 10, &msg_max);
+       if (ret != 0)
+               return -EINVAL;
+
+       trinity_debug_clear(drv, msg_max);
+
+       return (ssize_t)count;
+}
+
+static ssize_t debugfs_max_show(struct device *dev,
+                               struct device_attribute *attr, char *buf)
+{
+       struct platform_device *pdev;
+       struct trinity_driver *drv;
+
+       pdev = container_of(dev, struct platform_device, dev);
+       drv = platform_get_drvdata(pdev);
+
+       if (drv == NULL)
+               return 0;
+
+       return snprintf(buf, PAGE_SIZE, "%lu\n", trinity_debug_get_max(drv));
+}
+static DEVICE_ATTR_RW(debugfs_max);
+
+static ssize_t show_profile_store(struct device *dev,
+                                 struct device_attribute *attr,
+                                 const char *buf, size_t count)
+{
+       struct platform_device *pdev;
+       struct trinity_driver *drv;
+       unsigned long req_id;
+       int32_t ret = 0;
+
+       pdev = container_of(dev, struct platform_device, dev);
+       drv = platform_get_drvdata(pdev);
+
+       ret = kstrtoul(buf, 10, &req_id);
+       if (ret != 0)
+               return -EINVAL;
+
+       if (drv->desc->show_profile)
+               drv->desc->show_profile(drv, (int)req_id);
+
+       return (ssize_t)count;
+}
+static DEVICE_ATTR_WO(show_profile);
+
+static ssize_t idu_version_show(struct device *dev,
+                               struct device_attribute *attr, char *buf)
+{
+       struct platform_device *pdev;
+       struct trinity_driver *drv;
+
+       pdev = container_of(dev, struct platform_device, dev);
+       drv = platform_get_drvdata(pdev);
+
+       if (drv == NULL)
+               return 0;
+
+       if (drv->desc->idu_version) {
+               uint32_t major, minor, extra;
+               if (drv->desc->idu_version(drv, &major, &minor, &extra) == 0)
+                       return snprintf(buf, PAGE_SIZE, "v%u.%u.%u\n", major,
+                                       minor, extra);
+       }
+
+       return snprintf(buf, PAGE_SIZE,
+                       "Unknown... v0.30.7 or higher version required.\n");
+}
+static DEVICE_ATTR_RO(idu_version);
+
+static struct attribute *trinity_attrs_debug[] = {
+       &dev_attr_verbose.attr, &dev_attr_debugfs_max.attr,
+       &dev_attr_show_profile.attr, &dev_attr_idu_version.attr, NULL
+};
+
+/* e.g, /sys/devices/platform/304f0000.triv2/debug/ */
+static struct attribute_group trinity_attrs_debug_group = {
+       .name = "debug",
+       .attrs = trinity_attrs_debug
+};
+
+static ssize_t max_stat_apps_store(struct device *dev,
+                                  struct device_attribute *attr,
+                                  const char *buf, size_t count)
+{
+       struct platform_device *pdev;
+       struct trinity_driver *drv;
+       unsigned long val;
+       int32_t ret = 0;
+
+       pdev = container_of(dev, struct platform_device, dev);
+       drv = platform_get_drvdata(pdev);
+
+       ret = kstrtoul(buf, 10, &val);
+       if (ret != 0)
+               return -EINVAL;
+
+       trinity_stat_resize(drv, val, 0, 0);
+
+       return (ssize_t)count;
+}
+
+static ssize_t max_stat_apps_show(struct device *dev,
+                                 struct device_attribute *attr, char *buf)
+{
+       struct platform_device *pdev;
+       struct trinity_driver *drv;
+
+       pdev = container_of(dev, struct platform_device, dev);
+       drv = platform_get_drvdata(pdev);
+
+       if (drv == NULL)
+               return 0;
+
+       return snprintf(buf, PAGE_SIZE, "%lu\n",
+                       trinity_stat_get_max_apps(drv));
+}
+static DEVICE_ATTR_RW(max_stat_apps);
+
+static ssize_t max_stat_reqs_store(struct device *dev,
+                                  struct device_attribute *attr,
+                                  const char *buf, size_t count)
+{
+       struct platform_device *pdev;
+       struct trinity_driver *drv;
+       unsigned long val;
+       int32_t ret = 0;
+
+       pdev = container_of(dev, struct platform_device, dev);
+       drv = platform_get_drvdata(pdev);
+
+       ret = kstrtoul(buf, 10, &val);
+       if (ret != 0)
+               return -EINVAL;
+
+       trinity_stat_resize(drv, 0, val, 0);
+
+       return (ssize_t)count;
+}
+
+static ssize_t max_stat_reqs_show(struct device *dev,
+                                 struct device_attribute *attr, char *buf)
+{
+       struct platform_device *pdev;
+       struct trinity_driver *drv;
+
+       pdev = container_of(dev, struct platform_device, dev);
+       drv = platform_get_drvdata(pdev);
+
+       if (drv == NULL)
+               return 0;
+
+       return snprintf(buf, PAGE_SIZE, "%lu\n",
+                       trinity_stat_get_max_reqs(drv));
+}
+static DEVICE_ATTR_RW(max_stat_reqs);
+
+static ssize_t max_stat_reqs_per_app_store(struct device *dev,
+                                          struct device_attribute *attr,
+                                          const char *buf, size_t count)
+{
+       struct platform_device *pdev;
+       struct trinity_driver *drv;
+       unsigned long val;
+       int32_t ret = 0;
+
+       pdev = container_of(dev, struct platform_device, dev);
+       drv = platform_get_drvdata(pdev);
+
+       ret = kstrtoul(buf, 10, &val);
+       if (ret != 0)
+               return -EINVAL;
+
+       trinity_stat_resize(drv, 0, 0, val);
+
+       return (ssize_t)count;
+}
+
+static ssize_t max_stat_reqs_per_app_show(struct device *dev,
+                                         struct device_attribute *attr,
+                                         char *buf)
+{
+       struct platform_device *pdev;
+       struct trinity_driver *drv;
+
+       pdev = container_of(dev, struct platform_device, dev);
+       drv = platform_get_drvdata(pdev);
+
+       if (drv == NULL)
+               return 0;
+
+       return snprintf(buf, PAGE_SIZE, "%lu\n",
+                       trinity_stat_get_max_reqs_per_app(drv));
+}
+static DEVICE_ATTR_RW(max_stat_reqs_per_app);
+
+static ssize_t mem_usage_show(struct device *dev, struct device_attribute *attr,
+                             char *buf)
+{
+       struct platform_device *pdev;
+       struct trinity_driver *drv;
+       struct trinity_stat_app *stat_app;
+       ssize_t pos = 0;
+       bool first = true;
+
+       pdev = container_of(dev, struct platform_device, dev);
+       drv = platform_get_drvdata(pdev);
+
+       if (drv == NULL)
+               return 0;
+
+       trinity_stat_lock(&drv->stat);
+
+       list_for_each_entry (stat_app, &drv->stat.list, lnode) {
+               if (first) {
+                       pos += snprintf(
+                               buf + pos, PAGE_SIZE,
+                               "Memory usage statistics for all opened devices\n");
+                       first = false;
+               }
+
+               pos += snprintf(
+                       buf + pos, PAGE_SIZE,
+                       " [%d] total_alloc: %llu bytes, total_freed: %llu bytes\n",
+                       stat_app->app_id, stat_app->total_alloc_mem,
+                       stat_app->total_freed_mem);
+       }
+
+       if (first)
+               pos += snprintf(buf + pos, PAGE_SIZE, "No active devices\n");
+
+       trinity_stat_unlock(&drv->stat);
+
+       return pos;
+}
+static DEVICE_ATTR_RO(mem_usage);
+
+#define MODEL_REGISTERED_PROLOGUE                                              \
+       "\n   Model statistics registered in all opened devices\n"             \
+       "+--------------+--------------+-----------+------------+\n"           \
+       "|   Model ID   |  Model Size  | Dmabuf FD |   Offset   |\n"           \
+       "+--------------+--------------+-----------+------------+\n"
+#define MODEL_REGISTERED_NORMAL "| %#12llx | %#12llx | %9d | %#10llx |\n"
+#define MODEL_REGISTERED_EPILOGUE                                              \
+       "+--------------+--------------+-----------+------------+\n"
+
+static ssize_t print_registered_models(const struct trinity_model *model,
+                                      char *buf, enum trinity_sysfs_msg msg)
+{
+       ssize_t pos = 0;
+
+       switch (msg) {
+       case SYSFS_MSG_PROLOGUE:
+               pos = snprintf(buf, PAGE_SIZE, MODEL_REGISTERED_PROLOGUE);
+               break;
+       case SYSFS_MSG_NORMAL:
+               pos = snprintf(buf, PAGE_SIZE, MODEL_REGISTERED_NORMAL,
+                              model->config.id, model->config.program_size,
+                              model->config.dbuf_fd,
+                              model->config.program_offset_addr);
+               break;
+       case SYSFS_MSG_EPILOGUE:
+               pos = snprintf(buf, PAGE_SIZE, MODEL_REGISTERED_EPILOGUE);
+               break;
+       default:
+               break;
+       }
+
+       return pos;
+}
+
+static ssize_t registered_models_show(struct device *dev,
+                                     struct device_attribute *attr, char *buf)
+{
+       struct platform_device *pdev;
+       struct trinity_driver *drv;
+       struct trinity_model_htable ht;
+       struct trinity_model *model;
+       struct hlist_bl_node *hn;
+       ssize_t pos;
+       int i, num_printed = 0;
+
+       pdev = container_of(dev, struct platform_device, dev);
+       drv = platform_get_drvdata(pdev);
+
+       if (drv == NULL)
+               return 0;
+
+       trinity_init_model_htable(drv, &ht);
+
+       pos = print_registered_models(NULL, buf, SYSFS_MSG_PROLOGUE);
+
+       for (i = 0; i < ht.hash_size; i++) {
+               hlist_bl_lock(&(ht.ht_heads[i]));
+               hlist_bl_for_each_entry (model, hn, &(ht.ht_heads[i]), hnode) {
+                       pos += print_registered_models(model, buf + pos,
+                                                      SYSFS_MSG_NORMAL);
+                       num_printed++;
+               }
+               hlist_bl_unlock(&(ht.ht_heads[i]));
+       }
+
+       if (num_printed > 0)
+               pos += print_registered_models(NULL, buf + pos,
+                                              SYSFS_MSG_EPILOGUE);
+
+       return pos;
+}
+static DEVICE_ATTR_RO(registered_models);
+
+static const char *priority_to_string(enum trinity_req_priority priority)
+{
+       static const char *const priority_strings[] = {
+               [TRINITY_REQ_PRIORITY_LOW] = "Low",
+               [TRINITY_REQ_PRIORITY_MID] = "Mid",
+               [TRINITY_REQ_PRIORITY_HIGH] = "High",
+       };
+       return priority_strings[priority];
+}
+
+static const char *status_to_string(enum trinity_req_status status)
+{
+       static const char *const status_strings[] = {
+               [TRINITY_REQ_STATUS_UNKNOWN] = "Unknown",
+               [TRINITY_REQ_STATUS_ERROR] = "Error",
+               [TRINITY_REQ_STATUS_PENDING] = "Pending",
+               [TRINITY_REQ_STATUS_RUNNING] = "Running",
+               [TRINITY_REQ_STATUS_FINISHED] = "Finished",
+       };
+       return status_strings[status];
+}
+
+#define APP_STATUS_LENGTH (77)
+#define USER_APP_STATUS_PROLOGUE                                                         \
+       "\n\tUser-level request statistics running in %s\n"                              \
+       "+-------+--------+----------+------+----------+--------------+-------------+\n" \
+       "|  PID  | Req ID | Model ID | Prio |  Status  |  Sched (us)  |  Infer (us) |\n" \
+       "+-------+--------+----------+------+----------+--------------+-------------+\n"
+#define USER_APP_STATUS_NORMAL                                                 \
+       "| %5d | %6d | %#8llx | %4s | %8s | %12lld | %11lld |\n"
+#define USER_APP_STATUS_EMIT                                                   \
+       "|                             ... (emitted) ...                            |\n"
+#define USER_APP_STATUS_EPILOGUE                                               \
+       "+-------+--------+----------+------+----------+--------------+-------------+\n"
+
+static ssize_t print_user_app_status(struct device *dev,
+                                    const struct trinity_stat_req *req,
+                                    char *buf, enum trinity_sysfs_msg msg)
+{
+       ssize_t pos = 0;
+
+       switch (msg) {
+       case SYSFS_MSG_PROLOGUE:
+               pos = snprintf(buf, APP_STATUS_LENGTH * 4 + 1,
+                              USER_APP_STATUS_PROLOGUE, dev_name(dev));
+               break;
+       case SYSFS_MSG_NORMAL: {
+               ktime_t cur_time = ktime_get();
+               ktime_t submitted = req->submitted;
+               ktime_t scheduled = req->scheduled ? req->scheduled : cur_time;
+               ktime_t completed = req->completed ? req->completed : cur_time;
+
+               int64_t sched_diff = TIME_DIFF_US(scheduled, submitted);
+               int64_t infer_diff = TIME_DIFF_US(completed, scheduled);
+
+               if (req->status == TRINITY_REQ_STATUS_ERROR) {
+                       sched_diff = 0;
+                       infer_diff = 0;
+               }
+
+               pos = snprintf(buf, APP_STATUS_LENGTH + 1,
+                              USER_APP_STATUS_NORMAL, req->app_id, req->req_id,
+                              req->model_id, priority_to_string(req->priority),
+                              status_to_string(req->status), sched_diff,
+                              infer_diff);
+       } break;
+       case SYSFS_MSG_EMIT:
+               pos = snprintf(buf, APP_STATUS_LENGTH + 1,
+                              USER_APP_STATUS_EMIT);
+               break;
+       case SYSFS_MSG_EPILOGUE:
+               pos = snprintf(buf, APP_STATUS_LENGTH + 1,
+                              USER_APP_STATUS_EPILOGUE);
+               break;
+       default:
+               break;
+       }
+
+       return pos;
+}
+
+#define KERNEL_APP_STATUS_PROLOGUE                                                       \
+       "\n\tKernel-level request statistics running in %s\n"                            \
+       "+-------+--------+----------+------+----------+------------+---------------+\n" \
+       "|  PID  | Req ID | Model ID | Prio |  Status  |   # Runs   | Avg. Lat (us) |\n" \
+       "+-------+--------+----------+------+----------+------------+---------------+\n"
+#define KERNEL_APP_STATUS_NORMAL                                               \
+       "| %5d | %6d | %#8llx | %4s | %8s | %10u | %13u |\n"
+#define KERNEL_APP_STATUS_EMIT                                                 \
+       "|                             ... (emitted) ...                            |\n"
+#define KERNEL_APP_STATUS_EPILOGUE                                             \
+       "+-------+--------+----------+------+----------+------------+---------------+\n"
+
+static ssize_t print_kernel_app_status(struct device *dev,
+                                      const struct trinity_stat_req *req,
+                                      char *buf, enum trinity_sysfs_msg msg)
+{
+       ssize_t pos = 0;
+
+       switch (msg) {
+       case SYSFS_MSG_PROLOGUE:
+               pos = snprintf(buf, APP_STATUS_LENGTH * 4 + 1,
+                              KERNEL_APP_STATUS_PROLOGUE, dev_name(dev));
+               break;
+       case SYSFS_MSG_NORMAL: {
+               uint32_t avg_latency = 0;
+
+               if (req->num_runs > 0)
+                       avg_latency = req->total_time / req->num_runs;
+
+               pos = snprintf(buf, APP_STATUS_LENGTH + 1,
+                              KERNEL_APP_STATUS_NORMAL, req->app_id,
+                              req->req_id, req->model_id,
+                              priority_to_string(req->priority),
+                              status_to_string(req->status), req->num_runs,
+                              avg_latency);
+       } break;
+       case SYSFS_MSG_EMIT:
+               pos = snprintf(buf, APP_STATUS_LENGTH + 1,
+                              KERNEL_APP_STATUS_EMIT);
+               break;
+       case SYSFS_MSG_EPILOGUE:
+               pos = snprintf(buf, APP_STATUS_LENGTH + 1,
+                              KERNEL_APP_STATUS_EPILOGUE);
+               break;
+       default:
+               break;
+       }
+
+       return pos;
+}
+
+static ssize_t app_status_user_show(struct device *dev,
+                                   struct device_attribute *attr, char *buf)
+{
+       struct platform_device *pdev;
+       struct trinity_driver *drv;
+       struct trinity_stat_app *stat_app;
+       struct trinity_stat_req *stat_req;
+       int num_printed = 0;
+       ssize_t pos;
+
+       pdev = container_of(dev, struct platform_device, dev);
+       drv = platform_get_drvdata(pdev);
+
+       if (drv == NULL)
+               return 0;
+
+       pos = print_user_app_status(dev, NULL, buf, SYSFS_MSG_PROLOGUE);
+
+       trinity_stat_lock(&drv->stat);
+       list_for_each_entry (stat_app, &drv->stat.list, lnode) {
+               list_for_each_entry (stat_req, &stat_app->reqs, list) {
+                       if (stat_req->is_kernel)
+                               continue;
+
+                       pos += print_user_app_status(dev, stat_req, buf + pos,
+                                                    SYSFS_MSG_NORMAL);
+                       num_printed++;
+
+                       /* buffer size limit: PAGE_SIZE (also need reserved bytes) */
+                       if (pos + APP_STATUS_LENGTH >
+                           PAGE_SIZE - 2 * APP_STATUS_LENGTH) {
+                               pos += print_user_app_status(
+                                       dev, NULL, buf + pos, SYSFS_MSG_EMIT);
+                               /* clear old stats */
+                               trinity_destroy_stats(&drv->stat, true);
+                               goto out;
+                       }
+               }
+       }
+out:
+       trinity_stat_unlock(&drv->stat);
+
+       if (num_printed > 0)
+               pos += print_user_app_status(dev, NULL, buf + pos,
+                                            SYSFS_MSG_EPILOGUE);
+
+       return pos;
+}
+static DEVICE_ATTR_RO(app_status_user);
+
+static ssize_t app_status_kernel_show(struct device *dev,
+                                     struct device_attribute *attr, char *buf)
+{
+       struct platform_device *pdev;
+       struct trinity_driver *drv;
+       struct trinity_stat_app *stat_app;
+       struct trinity_stat_req *stat_req;
+       int num_printed = 0;
+       ssize_t pos;
+
+       pdev = container_of(dev, struct platform_device, dev);
+       drv = platform_get_drvdata(pdev);
+
+       if (drv == NULL)
+               return 0;
+
+       pos = print_kernel_app_status(dev, NULL, buf, SYSFS_MSG_PROLOGUE);
+
+       trinity_stat_lock(&drv->stat);
+       list_for_each_entry (stat_app, &drv->stat.list, lnode) {
+               list_for_each_entry (stat_req, &stat_app->reqs, list) {
+                       if (!stat_req->is_kernel)
+                               continue;
+
+                       pos += print_kernel_app_status(dev, stat_req, buf + pos,
+                                                      SYSFS_MSG_NORMAL);
+                       num_printed++;
+
+                       /* buffer size limit: PAGE_SIZE (also need reserved bytes) */
+                       if (pos + APP_STATUS_LENGTH >
+                           PAGE_SIZE - 2 * APP_STATUS_LENGTH) {
+                               pos += print_kernel_app_status(
+                                       dev, NULL, buf + pos, SYSFS_MSG_EMIT);
+                               /* clear old stats */
+                               trinity_destroy_stats(&drv->stat, true);
+                               goto out;
+                       }
+               }
+       }
+out:
+       trinity_stat_unlock(&drv->stat);
+
+       if (num_printed > 0)
+               pos += print_kernel_app_status(dev, NULL, buf + pos,
+                                              SYSFS_MSG_EPILOGUE);
+
+       return pos;
+}
+static DEVICE_ATTR_RO(app_status_kernel);
+
+static ssize_t num_total_reqs_show(struct device *dev,
+                                  struct device_attribute *attr, char *buf)
+{
+       struct platform_device *pdev;
+       struct trinity_driver *drv;
+       struct trinity_stat_app *stat_app;
+       uint32_t num_total_reqs = 0;
+
+       pdev = container_of(dev, struct platform_device, dev);
+       drv = platform_get_drvdata(pdev);
+
+       if (drv == NULL)
+               return 0;
+
+       trinity_stat_lock(&drv->stat);
+
+       list_for_each_entry (stat_app, &drv->stat.list, lnode) {
+               num_total_reqs += stat_app->num_total_reqs;
+       }
+
+       trinity_stat_unlock(&drv->stat);
+
+       return snprintf(buf, PAGE_SIZE, "%u\n", num_total_reqs);
+}
+static DEVICE_ATTR_RO(num_total_reqs);
+
+static ssize_t num_active_reqs_show(struct device *dev,
+                                   struct device_attribute *attr, char *buf)
+{
+       struct platform_device *pdev;
+       struct trinity_driver *drv;
+       struct trinity_stat_app *stat_app;
+       uint32_t num_active_reqs = 0;
+
+       pdev = container_of(dev, struct platform_device, dev);
+       drv = platform_get_drvdata(pdev);
+
+       if (drv == NULL)
+               return 0;
+
+       trinity_stat_lock(&drv->stat);
+
+       list_for_each_entry (stat_app, &drv->stat.list, lnode) {
+               num_active_reqs += stat_app->num_active_reqs;
+       }
+
+       trinity_stat_unlock(&drv->stat);
+
+       return snprintf(buf, PAGE_SIZE, "%u\n", num_active_reqs);
+}
+static DEVICE_ATTR_RO(num_active_reqs);
+
+static struct attribute *trinity_attrs_stat[] = {
+       &dev_attr_max_stat_apps.attr,         &dev_attr_max_stat_reqs.attr,
+       &dev_attr_max_stat_reqs_per_app.attr, &dev_attr_mem_usage.attr,
+       &dev_attr_registered_models.attr,     &dev_attr_app_status_user.attr,
+       &dev_attr_app_status_kernel.attr,     &dev_attr_num_total_reqs.attr,
+       &dev_attr_num_active_reqs.attr,       NULL
+};
+
+/* e.g, /sys/devices/platform/304f0000.triv2/stat/ */
+static struct attribute_group trinity_attrs_stat_group = {
+       .name = "stat",
+       .attrs = trinity_attrs_stat
+};
+
+static ssize_t stop_store(struct device *dev, struct device_attribute *attr,
+                         const char *buf, size_t count)
+{
+       struct platform_device *pdev;
+       struct trinity_driver *drv;
+       unsigned long stop;
+       int32_t ret = 0;
+
+       pdev = container_of(dev, struct platform_device, dev);
+       drv = platform_get_drvdata(pdev);
+
+       if (drv == NULL)
+               return 0;
+
+       ret = kstrtoul(buf, 10, &stop);
+       if (ret != 0)
+               return 0;
+
+       if (stop == 1 && drv->desc->stop_reqs)
+               schedule_work(&drv->work_stop);
+
+       return (ssize_t)count;
+}
+
+static DEVICE_ATTR_WO(stop);
+
+static ssize_t idu_store(struct device *dev, struct device_attribute *attr,
+                        const char *buf, size_t count)
+{
+       struct platform_device *pdev;
+       struct trinity_driver *drv;
+       char dirpath[NAME_MAX];
+
+       pdev = container_of(dev, struct platform_device, dev);
+       drv = platform_get_drvdata(pdev);
+
+       if (drv == NULL)
+               return 0;
+
+       strncpy(dirpath, buf, NAME_MAX);
+       /* remove newline if exists */
+       dirpath[strcspn(dirpath, "\n")] = '\x00';
+
+       mutex_lock(&drv->lock);
+       drv->desc->idu_load(drv, dirpath, true);
+       mutex_unlock(&drv->lock);
+
+       return (ssize_t)count;
+}
+
+static DEVICE_ATTR_WO(idu);
+
+static ssize_t suspend_store(struct device *dev, struct device_attribute *attr,
+                            const char *buf, size_t count)
+{
+       unsigned long suspend;
+
+       if (kstrtoul(buf, 10, &suspend) != 0)
+               return 0;
+
+       /** Note that this interface is used only for testing purpose */
+       if (suspend == 1) {
+               const struct dev_pm_ops *ops = dev->driver->pm;
+               ops->runtime_suspend(dev);
+       }
+
+       return (ssize_t)count;
+}
+
+static DEVICE_ATTR_WO(suspend);
+
+static ssize_t resume_store(struct device *dev, struct device_attribute *attr,
+                           const char *buf, size_t count)
+{
+       unsigned long resume;
+
+       if (kstrtoul(buf, 10, &resume) != 0)
+               return 0;
+
+       /** Note that this interface is used only for testing purpose */
+       if (resume == 1) {
+               const struct dev_pm_ops *ops = dev->driver->pm;
+               ops->runtime_resume(dev);
+       }
+
+       return (ssize_t)count;
+}
+
+static DEVICE_ATTR_WO(resume);
+
+static ssize_t sched_test_store(struct device *dev,
+                               struct device_attribute *attr, const char *buf,
+                               size_t count)
+{
+       struct platform_device *pdev;
+       struct trinity_driver *drv;
+       struct trinity_sched_desc *desc;
+       long req_id;
+
+       pdev = container_of(dev, struct platform_device, dev);
+       drv = platform_get_drvdata(pdev);
+
+       if (drv == NULL)
+               return 0;
+
+       if (kstrtol(buf, 10, &req_id) != 0 || req_id > INT_MAX)
+               return 0;
+
+       /** Note that this interface is used only for testing purpose */
+       desc = trinity_sched_find(SCHED_VD);
+       if (desc && desc->test_run)
+               desc->test_run(drv, (int)req_id);
+
+       return (ssize_t)count;
+}
+
+static DEVICE_ATTR_WO(sched_test);
+
+static ssize_t profile_store(struct device *dev, struct device_attribute *attr,
+                            const char *buf, size_t count)
+{
+       struct platform_device *pdev;
+       struct trinity_driver *drv;
+       unsigned long profile;
+
+       pdev = container_of(dev, struct platform_device, dev);
+       drv = platform_get_drvdata(pdev);
+
+       if (drv == NULL)
+               return 0;
+
+       if (kstrtoul(buf, 10, &profile) != 0)
+               return 0;
+
+       /** Note that this interface is used only for testing purpose */
+       if (drv->desc->init_profile)
+               drv->desc->init_profile(drv, profile);
+
+       return (ssize_t)count;
+}
+
+static DEVICE_ATTR_WO(profile);
+
+static ssize_t reset_store(struct device *dev, struct device_attribute *attr,
+                          const char *buf, size_t count)
+{
+       struct platform_device *pdev;
+       struct trinity_driver *drv;
+       unsigned long reset;
+
+       pdev = container_of(dev, struct platform_device, dev);
+       drv = platform_get_drvdata(pdev);
+
+       if (drv == NULL)
+               return 0;
+
+       if (kstrtoul(buf, 10, &reset) != 0)
+               return 0;
+
+       if (reset == 1 && drv->desc->reset)
+               drv->desc->reset(drv);
+
+       return (ssize_t)count;
+}
+
+static DEVICE_ATTR_WO(reset);
+
+static struct attribute *trinity_attrs_control[] = {
+       &dev_attr_stop.attr,       &dev_attr_idu.attr,
+       &dev_attr_suspend.attr,    &dev_attr_resume.attr,
+       &dev_attr_sched_test.attr, &dev_attr_profile.attr,
+       &dev_attr_reset.attr,      NULL
+};
+
+/* e.g, /sys/devices/platform/304f0000.triv2/control/ */
+static struct attribute_group trinity_attrs_control_group = {
+       .name = "control",
+       .attrs = trinity_attrs_control
+};
+
+static const struct attribute_group *trinity_attrs_groups[] = {
+       &trinity_attrs_debug_group, &trinity_attrs_stat_group,
+       &trinity_attrs_control_group, NULL
+};
+
+int trinity_sysfs_init(struct trinity_driver *drv)
+{
+       struct device *dev = drv_to_dev_ptr(drv);
+       int err;
+
+       err = sysfs_create_groups(&dev->kobj, trinity_attrs_groups);
+       if (err < 0) {
+               dev_err(dev, "failed to create sysfs groups\n");
+               return err;
+       }
+
+       return 0;
+}
+
+int trinity_sysfs_cleanup(struct trinity_driver *drv)
+{
+       struct device *dev = drv_to_dev_ptr(drv);
+
+       sysfs_remove_groups(&dev->kobj, trinity_attrs_groups);
+
+       return 0;
+}
diff --git a/drivers/misc/trinity/trinity_trace.c b/drivers/misc/trinity/trinity_trace.c
new file mode 100644 (file)
index 0000000..5f07abf
--- /dev/null
@@ -0,0 +1,12 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/**
+ * trinity/trinity_trace.c: Trace source for trinity devices
+ *
+ * Copyright (C) 2021 Samsung Electronics
+ * Copyright (C) 2021 Dongju Chae <dongju.chae@samsung.com>
+ */
+
+#ifndef __CHECKER__
+#define CREATE_TRACE_POINTS
+#include "trinity_trace.h"
+#endif
diff --git a/drivers/misc/trinity/trinity_trace.h b/drivers/misc/trinity/trinity_trace.h
new file mode 100644 (file)
index 0000000..52e2ce9
--- /dev/null
@@ -0,0 +1,399 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/**
+ * trinity/trinity_trace.h: Trace header for trinity devices
+ *
+ * Copyright (C) 2021 Samsung Electronics
+ * Copyright (C) 2021 Dongju Chae <dongju.chae@samsung.com>
+ */
+
+#if !defined(__TRINITY_TRACE_H__) || defined(TRACE_HEADER_MULTI_READ)
+#define __TRINITY_TRACE_H__
+
+#include <linux/tracepoint.h>
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM      trinity
+#define TRACE_INCLUDE_FILE trinity_trace
+
+// clang-format off
+TRACE_EVENT(triv2_run_trigger,
+       TP_PROTO(u32 device_id, s32 slot),
+       TP_ARGS(device_id, slot),
+       TP_STRUCT__entry(
+               __field(u32, device_id)
+               __field(s32, slot)
+       ),
+       TP_fast_assign(
+               __entry->device_id = device_id;
+               __entry->slot = slot;
+       ),
+       TP_printk("device_id=%u slot=%d",
+               __entry->device_id,
+               __entry->slot)
+);
+TRACE_EVENT(triv2_wakeup_cp,
+       TP_PROTO(u32 device_id),
+       TP_ARGS(device_id),
+       TP_STRUCT__entry(
+               __field(u32, device_id)
+       ),
+       TP_fast_assign(
+               __entry->device_id = device_id;
+       ),
+       TP_printk("device_id=%u",
+               __entry->device_id)
+);
+TRACE_EVENT(triv2_handle_irq,
+       TP_PROTO(u32 device_id, s32 irq),
+       TP_ARGS(device_id, irq),
+       TP_STRUCT__entry(
+               __field(u32, device_id)
+               __field(s32, irq)
+       ),
+       TP_fast_assign(
+               __entry->device_id = device_id;
+               __entry->irq = irq;
+       ),
+       TP_printk("device_id=%u irq=%d",
+               __entry->device_id,
+               __entry->irq)
+);
+TRACE_EVENT(triv2_handle_threaded_irq,
+       TP_PROTO(u32 device_id, s32 irq),
+       TP_ARGS(device_id, irq),
+       TP_STRUCT__entry(
+               __field(u32, device_id)
+               __field(s32, irq)
+       ),
+       TP_fast_assign(
+               __entry->device_id = device_id;
+               __entry->irq = irq;
+       ),
+       TP_printk("device_id=%u irq=%d",
+               __entry->device_id,
+               __entry->irq)
+);
+TRACE_EVENT(triv2_handle_cmd_done,
+       TP_PROTO(u32 device_id, s32 slot, u32 cycles, u32 time),
+       TP_ARGS(device_id, slot, cycles, time),
+       TP_STRUCT__entry(
+               __field(u32, device_id)
+               __field(s32, slot)
+               __field(u32, cycles)
+               __field(u32, time)
+       ),
+       TP_fast_assign(
+               __entry->device_id = device_id;
+               __entry->slot = slot;
+               __entry->cycles = cycles;
+               __entry->time = time;
+       ),
+       TP_printk("device_id=%u slot=%d cycles=%u time(us)=%u",
+               __entry->device_id,
+               __entry->slot,
+               __entry->cycles,
+               __entry->time)
+);
+TRACE_EVENT(triv2_map_sched_data,
+       TP_PROTO(u32 device_id, s32 slot, u32 batch_size, u32 in_cnt, u32 out_cnt),
+       TP_ARGS(device_id, slot, batch_size, in_cnt, out_cnt),
+       TP_STRUCT__entry(
+               __field(u32, device_id)
+               __field(s32, slot)
+               __field(u32, batch_size)
+               __field(u32, in_cnt)
+               __field(u32, out_cnt)
+       ),
+       TP_fast_assign(
+               __entry->device_id = device_id;
+               __entry->slot = slot;
+               __entry->batch_size = batch_size;
+               __entry->in_cnt = in_cnt;
+               __entry->out_cnt = out_cnt;
+       ),
+       TP_printk("device_id=%u slot=%d batch_size=%u in_cnt=%u out_cnt=%u",
+               __entry->device_id,
+               __entry->slot,
+               __entry->batch_size,
+               __entry->in_cnt,
+               __entry->out_cnt)
+);
+TRACE_EVENT(triv2_unmap_sched_data,
+       TP_PROTO(u32 device_id, s32 slot),
+       TP_ARGS(device_id, slot),
+       TP_STRUCT__entry(
+               __field(u32, device_id)
+               __field(s32, slot)
+       ),
+       TP_fast_assign(
+               __entry->device_id = device_id;
+               __entry->slot = slot;
+       ),
+       TP_printk("device_id=%u slot=%d",
+               __entry->device_id,
+               __entry->slot)
+);
+TRACE_EVENT(trinity_ioctl_msg,
+       TP_PROTO(u32 device_id, s32 app_id, char* msg),
+       TP_ARGS(device_id, app_id, msg),
+       TP_STRUCT__entry(
+               __field(u32, device_id)
+               __field(s32, app_id)
+               __field(char*, msg)
+       ),
+       TP_fast_assign(
+               __entry->device_id = device_id;
+               __entry->app_id = app_id;
+               __entry->msg = msg;
+       ),
+       TP_printk("device_id=%u app_id=%d msg=%s",
+               __entry->device_id,
+               __entry->app_id,
+               __entry->msg)
+);
+TRACE_EVENT(trinity_ioctl_next_req,
+       TP_PROTO(u32 device_id, s32 app_id, s32 req_id),
+       TP_ARGS(device_id, app_id, req_id),
+       TP_STRUCT__entry(
+               __field(u32, device_id)
+               __field(s32, app_id)
+               __field(s32, req_id)
+       ),
+       TP_fast_assign(
+               __entry->device_id = device_id;
+               __entry->app_id = app_id;
+               __entry->req_id = req_id;
+       ),
+       TP_printk("device_id=%u app_id=%d req_id=%d",
+               __entry->device_id,
+               __entry->app_id,
+               __entry->req_id)
+);
+TRACE_EVENT(trinity_ioctl_stop_req,
+       TP_PROTO(u32 device_id, s32 app_id, s32 req_id),
+       TP_ARGS(device_id, app_id, req_id),
+       TP_STRUCT__entry(
+               __field(u32, device_id)
+               __field(s32, app_id)
+               __field(s32, req_id)
+       ),
+       TP_fast_assign(
+               __entry->device_id = device_id;
+               __entry->app_id = app_id;
+               __entry->req_id = req_id;
+       ),
+       TP_printk("device_id=%u app_id=%d req_id=%d",
+               __entry->device_id,
+               __entry->app_id,
+               __entry->req_id)
+);
+TRACE_EVENT(trinity_ioctl_hwmem_alloc,
+       TP_PROTO(u32 device_id, s32 app_id, s64 size, s32 dbuf_fd),
+       TP_ARGS(device_id, app_id, size, dbuf_fd),
+       TP_STRUCT__entry(
+               __field(u32, device_id)
+               __field(s32, app_id)
+               __field(s64, size)
+               __field(s32, dbuf_fd)
+       ),
+       TP_fast_assign(
+               __entry->device_id = device_id;
+               __entry->app_id = app_id;
+               __entry->size = size;
+               __entry->dbuf_fd = dbuf_fd;
+       ),
+       TP_printk("device_id=%u app_id=%d size=%lld dbuf_fd=%d",
+               __entry->device_id,
+               __entry->app_id,
+               __entry->size,
+               __entry->dbuf_fd)
+);
+TRACE_EVENT(trinity_ioctl_hwmem_dealloc,
+       TP_PROTO(u32 device_id, s32 app_id, s32 dbuf_fd),
+       TP_ARGS(device_id, app_id, dbuf_fd),
+       TP_STRUCT__entry(
+               __field(u32, device_id)
+               __field(s32, app_id)
+               __field(s32, dbuf_fd)
+       ),
+       TP_fast_assign(
+               __entry->device_id = device_id;
+               __entry->app_id = app_id;
+               __entry->dbuf_fd = dbuf_fd;
+       ),
+       TP_printk("device_id=%u app_id=%d dbuf_fd=%d",
+               __entry->device_id,
+               __entry->app_id,
+               __entry->dbuf_fd)
+);
+TRACE_EVENT(trinity_ioctl_get_profile_meta,
+       TP_PROTO(u32 device_id, s32 app_id, s32 req_id, u32 profile_size),
+       TP_ARGS(device_id, app_id, req_id, profile_size),
+       TP_STRUCT__entry(
+               __field(u32, device_id)
+               __field(s32, app_id)
+               __field(s32, req_id)
+               __field(u32, profile_size)
+       ),
+       TP_fast_assign(
+               __entry->device_id = device_id;
+               __entry->app_id = app_id;
+               __entry->req_id = req_id;
+               __entry->profile_size = profile_size;
+       ),
+       TP_printk("device_id=%u app_id=%d req_id=%d profile_size=%u",
+               __entry->device_id,
+               __entry->app_id,
+               __entry->req_id,
+               __entry->profile_size)
+);
+TRACE_EVENT(trinity_ioctl_get_profile_buff,
+       TP_PROTO(u32 device_id, s32 app_id, s32 req_id, u32 profile_pos, u32 profile_size),
+       TP_ARGS(device_id, app_id, req_id, profile_pos, profile_size),
+       TP_STRUCT__entry(
+               __field(u32, device_id)
+               __field(s32, app_id)
+               __field(s32, req_id)
+               __field(u32, profile_pos)
+               __field(u32, profile_size)
+       ),
+       TP_fast_assign(
+               __entry->device_id = device_id;
+               __entry->app_id = app_id;
+               __entry->req_id = req_id;
+               __entry->profile_pos = profile_pos;
+               __entry->profile_size = profile_size;
+       ),
+       TP_printk("device_id=%u app_id=%d req_id=%d profile_pos=%u profile_size=%u",
+               __entry->device_id,
+               __entry->app_id,
+               __entry->req_id,
+               __entry->profile_pos,
+               __entry->profile_size)
+);
+TRACE_EVENT(trinity_ioctl_register_model,
+       TP_PROTO(u32 device_id, s32 app_id, u64 config_id, s32 dbuf_fd, u64 program_offset_addr, u64 program_size),
+       TP_ARGS(device_id, app_id, config_id, dbuf_fd, program_offset_addr, program_size),
+       TP_STRUCT__entry(
+               __field(u32, device_id)
+               __field(s32, app_id)
+               __field(u64, config_id)
+               __field(s32, dbuf_fd)
+               __field(u64, program_offset_addr)
+               __field(u64, program_size)
+       ),
+       TP_fast_assign(
+               __entry->device_id = device_id;
+               __entry->app_id = app_id;
+               __entry->config_id = config_id;
+               __entry->dbuf_fd = dbuf_fd;
+               __entry->program_offset_addr = program_offset_addr;
+               __entry->program_size = program_size;
+       ),
+       TP_printk("device_id=%u app_id=%d config_id=0x%llx dbuf_fd=%d program_offset_addr=0x%llx program_size=0x%llx",
+               __entry->device_id,
+               __entry->app_id,
+               __entry->config_id,
+               __entry->dbuf_fd,
+               __entry->program_offset_addr,
+               __entry->program_size)
+);
+TRACE_EVENT(trinity_ioctl_register_model_drv_ver1,
+       TP_PROTO(u64 weight_offset_addr),
+       TP_ARGS(weight_offset_addr),
+       TP_STRUCT__entry(
+               __field(u64, weight_offset_addr)
+       ),
+       TP_fast_assign(
+               __entry->weight_offset_addr = weight_offset_addr;
+       ),
+       TP_printk("weight_offset_addr=0x%llx",
+               __entry->weight_offset_addr)
+);
+TRACE_EVENT(trinity_ioctl_register_model_drv_ver2,
+       TP_PROTO(s32 metadata_dbuf_fd, s32 metadata_ext_dbuf_fd, u64 metadata_ext_size),
+       TP_ARGS(metadata_dbuf_fd, metadata_ext_dbuf_fd, metadata_ext_size),
+       TP_STRUCT__entry(
+               __field(s32, metadata_dbuf_fd)
+               __field(s32, metadata_ext_dbuf_fd)
+               __field(u64, metadata_ext_size)
+       ),
+       TP_fast_assign(
+               __entry->metadata_dbuf_fd = metadata_dbuf_fd;
+               __entry->metadata_ext_dbuf_fd = metadata_ext_dbuf_fd;
+               __entry->metadata_ext_size = metadata_ext_size;
+       ),
+       TP_printk("metadata_dbuf_fd=%d metadata_ext_dbuf_fd=%d metadata_ext_size=0x%llx",
+               __entry->metadata_dbuf_fd,
+               __entry->metadata_ext_dbuf_fd,
+               __entry->metadata_ext_size)
+);
+TRACE_EVENT(trinity_ioctl_run_input,
+       TP_PROTO(u32 device_id, s32 app_id, s32 dbuf_fd, u64 model_id),
+       TP_ARGS(device_id, app_id, dbuf_fd, model_id),
+       TP_STRUCT__entry(
+               __field(u32, device_id)
+               __field(s32, app_id)
+               __field(s32, dbuf_fd)
+               __field(u64, model_id)
+       ),
+       TP_fast_assign(
+               __entry->device_id = device_id;
+               __entry->app_id = app_id;
+               __entry->dbuf_fd = dbuf_fd;
+               __entry->model_id = model_id;
+       ),
+       TP_printk("device_id=%u app_id=%d dbuf_fd=%d model_id=0x%llx",
+               __entry->device_id,
+               __entry->app_id,
+               __entry->dbuf_fd,
+               __entry->model_id)
+);
+TRACE_EVENT(trinity_ioctl_run_input_drv_ver1,
+       TP_PROTO(u64 activation_offset_addr0, u64 activation_offset_addr1),
+       TP_ARGS(activation_offset_addr0, activation_offset_addr1),
+       TP_STRUCT__entry(
+               __field(u64, activation_offset_addr0)
+               __field(u64, activation_offset_addr1)
+       ),
+       TP_fast_assign(
+               __entry->activation_offset_addr0 = activation_offset_addr0;
+               __entry->activation_offset_addr1 = activation_offset_addr1;
+       ),
+       TP_printk("activation_offset_addr0=0x%llx activation_offset_addr1=0x%llx",
+               __entry->activation_offset_addr0,
+               __entry->activation_offset_addr1)
+);
+TRACE_EVENT(trinity_ioctl_run_input_drv_ver2,
+       TP_PROTO(s64 timeout_ms, u32 priority, u32 num_segments, s32 input_mode, s32 output_mode),
+       TP_ARGS(timeout_ms, priority, num_segments, input_mode, output_mode),
+       TP_STRUCT__entry(
+               __field(s64, timeout_ms)
+               __field(u32, priority)
+               __field(u32, num_segments)
+               __field(s32, input_mode)
+               __field(s32, output_mode)
+       ),
+       TP_fast_assign(
+               __entry->timeout_ms = timeout_ms;
+               __entry->priority = priority;
+               __entry->num_segments = num_segments;
+               __entry->input_mode = input_mode;
+               __entry->output_mode = output_mode;
+       ),
+       TP_printk("timeout_ms=%lld priority=%u num_segments=%u input_mode=%d output_mode=%d",
+               __entry->timeout_ms,
+               __entry->priority,
+               __entry->num_segments,
+               __entry->input_mode,
+               __entry->output_mode)
+);
+
+// clang-format on
+
+#endif /* __TRINITY_TRACE_H__ */
+
+/* This part must be outside protection */
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH .
+#include <trace/define_trace.h>
diff --git a/drivers/misc/trinity/trinity_vision2_drv.c b/drivers/misc/trinity/trinity_vision2_drv.c
new file mode 100644 (file)
index 0000000..ca8ae61
--- /dev/null
@@ -0,0 +1,2674 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/**
+ * Copyright (C) 2020 Samsung Electronics
+ * Copyright (C) 2020 Dongju Chae <dongju.chae@samsung.com>
+ * Copyright (C) 2020 Wook Song <wook16.song@samsung.com>
+ */
+/**
+ * @file       trinity_vision2.c
+ * @brief      Samsung Research Trinity Vision2 NPU device driver
+ * @date       10 Mar 2020
+ * @author     Dongju Chae <dongju.chae@samsung.com>
+ *             Wook Song <wook16.song@samsung.com>
+ * @bug                No known bugs except for NYI items
+ */
+
+#include <linux/bitmap.h>
+#include <linux/dma-buf.h>
+#include <linux/fs.h>
+#include <linux/hashtable.h>
+#include <linux/highmem.h>
+#include <linux/init.h>
+#include <linux/io.h>
+#include <linux/kthread.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+#include <linux/uaccess.h>
+#include <linux/utsname.h>
+#include <linux/version.h>
+
+#include <linux/delay.h>
+#include <linux/iommu.h>
+#include <linux/iopoll.h>
+#include <linux/timer.h>
+
+#include "trinity_common.h"
+#include "trinity_hwmem.h"
+#include "trinity_resv_mem.h"
+#include "trinity_vision2_profile.h"
+
+#include "trinity_monitor.h"
+#include "trinity_sched_vd.h"
+#include "trinity_trace.h"
+
+#ifdef CONFIG_TRINITY_FPGA
+/* Workaround for the FPGA development board */
+#include "trinity_hwmem_iommu_helper.h"
+#endif
+
+/* Register offsets for NPU CP (Config) */
+#define OFFSET_CP_INFO     (0x000) /* Processor Information */
+#define OFFSET_CP_PROC_STAT (0x010) /* Processor Status */
+#define OFFSET_CP_PROC_SET  (0x014) /* Processor Control (Set) */
+#define OFFSET_CP_PROC_CLR  (0x018) /* Processor Control (Clear) */
+#define OFFSET_CP_IMIF_BASE (0x024) /* Instruction Base Address (DRAM) */
+#define OFFSET_CP_CNT_CFG   (0x200) /* CP Performance Counter */
+
+/* Register offsets for NPU CP (IDU Setup) */
+#define OFFSET_NPU_PROG_BASE (0x100) /* GPR00: Instruction Base Address */
+#define OFFSET_NPU_PROG_SIZE (0x104) /* GPR01: Program Size */
+#define OFFSET_NPU_SEGT_ADDR (0x108) /* GPR02: Segment Table Address */
+#define OFFSET_NPU_PROF_ADDR (0x10C) /* GPR03: NPU Profiling Address */
+#define OFFSET_NPU_PROF_SIZE (0x110) /* GPR04: NPU Profiling Size */
+#define OFFSET_NPU_BACK_ADDR (0x114) /* GRP05: NPU Context Backup Address */
+#define OFFSET_NPU_BACK_SIZE (0x118) /* GRP06: NPU Context Backup Size */
+#define OFFSET_NPU_PC       (0x11C) /* GRP07: NPU Program Counter */
+
+/* Register offsets for NPU CP (Commands) */
+#define OFFSET_NPU_CMD_READY (0x124) /* GRP09: Command Ready Status */
+#define OFFSET_NPU_CMD_BASE  (0x128) /* GRP10: Command Base Address */
+#define OFFSET_NPU_CMD_REQ   (0x12C) /* GRP11: Command Request Slots (not used) */
+#define OFFSET_NPU_CMD_FREE  (0x130) /* GRP12: Command Free Slots */
+
+/* Register offsets for NPU CP (Cbox Setup) */
+#define OFFSET_NPU_CBOX_BASE (0x134) /* GRP13: NPU CBOX BASE */
+
+/* Register offsets for Debugging */
+#define OFFSET_NPU_IDU_VERSION (0x138) /* GRP14: NPU IDU VERSION */
+#define OFFSET_NPU_IDU_STAGE   (0x13C) /* GRP15: NPU IDU STAGE */
+
+#define OFFSET_NPU_CP_DMAI_EADDR (0x300) /* CP DMA Source Address */
+#define OFFSET_NPU_CP_DMAI_IADDR (0x304) /* CP DMA Dest Address */
+#define OFFSET_NPU_CP_DMAI_TSIZE (0x308) /* CP DMA Transfer Size */
+#define OFFSET_NPU_CP_DMAI_CONTR (0x310) /* CP DMA Status */
+#define OFFSET_NPU_CP_DMAI_CMDID (0x314) /* CP DMA Command ID */
+#define OFFSET_NPU_CP_DMAI_LSTID                                               \
+       (0x318) /* CP DMA Command ID of the last transfer */
+
+#define OFFSET_NPU_DLA_DMAI_EADDR (0x1000) /* DLA Input External Address */
+#define OFFSET_NPU_DLA_DMAI_EYMOD                                              \
+       (0x1004) /* DLA Input External Address Y Modifier */
+#define OFFSET_NPU_DLA_DMAI_EZMOD                                              \
+       (0x1008) /* DLA Input External Address Z Modifier */
+#define OFFSET_NPU_DLA_DMAI_IADDR (0x100C) /* DLA Input Internal Address */
+#define OFFSET_NPU_DLA_DMAI_IYMOD                                              \
+       (0x1010) /* DLA Input Internal Address Y Modifier */
+#define OFFSET_NPU_DLA_DMAI_IZMOD                                              \
+       (0x1014) /* DLA Input Internal Address Z Modifier */
+#define OFFSET_NPU_DLA_DMAI_SIZE0 (0x1018) /* DLA Input Data Size 0 */
+#define OFFSET_NPU_DLA_DMAI_SIZE1 (0x101C) /* DLA Input Data Size 1 */
+#define OFFSET_NPU_DLA_DMAI_CTRL  (0x1020) /* DLA Input Channel Status */
+
+#define OFFSET_NPU_DLA_DMAO_EADDR (0x1080) /* DLA Output External Address */
+#define OFFSET_NPU_DLA_DMAO_EYMOD                                              \
+       (0x1084) /* DLA Output External Address Y Modifier */
+#define OFFSET_NPU_DLA_DMAO_EZMOD                                              \
+       (0x1088) /* DLA Output External Address Z Modifier */
+#define OFFSET_NPU_DLA_DMAO_IADDR (0x108C) /* DLA Output Internal Address */
+#define OFFSET_NPU_DLA_DMAO_IYMOD                                              \
+       (0x1090) /* DLA Output Internal Address Y Modifier */
+#define OFFSET_NPU_DLA_DMAO_IZMOD                                              \
+       (0x1094) /* DLA Output Internal Address Z Modifier */
+#define OFFSET_NPU_DLA_DMAO_SIZE0 (0x1098) /* DLA Output Data Size 0 */
+#define OFFSET_NPU_DLA_DMAO_SIZE1 (0x109C) /* DLA Output Data Size 1 */
+#define OFFSET_NPU_DLA_DMAO_CTRL  (0x10A0) /* DLA Output Channel Status */
+
+#define OFFSET_NPU_DLA_CORE_OPC               (0x1100) /* DLA Operation Code */
+#define OFFSET_NPU_DLA_CORE_WIND_CFG   (0x1104)
+#define OFFSET_NPU_DLA_CORE_SIZE0      (0x1108)
+#define OFFSET_NPU_DLA_CORE_SIZE1      (0x110C)
+#define OFFSET_NPU_DLA_CORE_ZP        (0x1110)
+#define OFFSET_NPU_DLA_CORE_OUT_MULT   (0x1114)
+#define OFFSET_NPU_DLA_CORE_IN0_MULT   (0x1118)
+#define OFFSET_NPU_DLA_CORE_IN1_MULT   (0x111C)
+#define OFFSET_NPU_DLA_CORE_OUT_CFG    (0x1120)
+#define OFFSET_NPU_DLA_CORE_OUT_MOD    (0x1124)
+#define OFFSET_NPU_DLA_CORE_IN0_CFG    (0x1128)
+#define OFFSET_NPU_DLA_CORE_IN0_MOD    (0x112C)
+#define OFFSET_NPU_DLA_CORE_IN1_CFG    (0x1130)
+#define OFFSET_NPU_DLA_CORE_IN1_MOD    (0x1134)
+#define OFFSET_NPU_DLA_CORE_PARAM_ADDR (0x1138)
+#define OFFSET_NPU_DLA_CORE_PSUM_ADDR  (0x113C)
+#define OFFSET_NPU_DLA_CORE_CWGT_ADDR  (0x1140)
+#define OFFSET_NPU_DLA_CORE_CTRL       (0x1144) /* DLA Core Status */
+
+#define OFFSET_NPU_DSP_DMAI_EADDR (0x2000) /* DSP Input External Address */
+#define OFFSET_NPU_DSP_DMAI_EYMOD                                              \
+       (0x2004) /* DSP Input External Address Y Modifier */
+#define OFFSET_NPU_DSP_DMAI_EZMOD                                              \
+       (0x2008) /* DSP Input External Address Z Modifier */
+#define OFFSET_NPU_DSP_DMAI_IADDR (0x200C) /* DSP Input Internal Address */
+#define OFFSET_NPU_DSP_DMAI_IYMOD                                              \
+       (0x2010) /* DSP Input Internal Address Y Modifier */
+#define OFFSET_NPU_DSP_DMAI_IZMOD                                              \
+       (0x2014) /* DSP Input Internal Address Z Modifier */
+#define OFFSET_NPU_DSP_DMAI_SIZE0 (0x2018) /* DSP Input Data Size 0 */
+#define OFFSET_NPU_DSP_DMAI_SIZE1 (0x201C) /* DSP Input Data Size 1 */
+#define OFFSET_NPU_DSP_DMAI_CTRL  (0x2020) /* DSP Input Channel Status */
+
+#define OFFSET_NPU_DSP_DMAO_EADDR (0x2080) /* DSP Output External Address */
+#define OFFSET_NPU_DSP_DMAO_EYMOD                                              \
+       (0x2084) /* DSP Output External Address Y Modifier */
+#define OFFSET_NPU_DSP_DMAO_EZMOD                                              \
+       (0x2088) /* DSP Output External Address Z Modifier */
+#define OFFSET_NPU_DSP_DMAO_IADDR (0x208C) /* DSP Output Internal Address */
+#define OFFSET_NPU_DSP_DMAO_IYMOD                                              \
+       (0x2090) /* DSP Output Internal Address Y Modifier */
+#define OFFSET_NPU_DSP_DMAO_IZMOD                                              \
+       (0x2094) /* DSP Output Internal Address Z Modifier */
+#define OFFSET_NPU_DSP_DMAO_SIZE0 (0x2098) /* DSP Output Data Size 0 */
+#define OFFSET_NPU_DSP_DMAO_SIZE1 (0x209C) /* DSP Output Data Size 1 */
+#define OFFSET_NPU_DSP_DMAO_CTRL  (0x20A0) /* DSP Output Channel Status */
+#define OFFSET_NPU_DSP_CORE_CTRL  (0x2140) /* DSP Core Status */
+
+/* Register offsets for NPU DSP */
+#define OFFSET_DSP_INFO             (0x000) /* Processor Information */
+#define OFFSET_DSP_PROC_STAT (0x010) /* Processor Status */
+#define OFFSET_DSP_PROC_SET  (0x014) /* Processor Control (Set) */
+#define OFFSET_DSP_PROC_CLR  (0x018) /* Processor Control (Clear) */
+#define OFFSET_DSP_IMIF_BASE (0x024) /* Instruction Base Address (DRAM) */
+
+/* Register offsets for NPU ComBox (IRQ) */
+#define OFFSET_CBOX_EXT_IRQ_MSK (0x100) /* External IRQ Output Mask */
+#define OFFSET_CBOX_EXT_IRQ_STA (0x104) /* External IRQ Output Status */
+#define OFFSET_CBOX_CP_SWI_CLR (0x134) /* CP IRQ output Clear */
+#define OFFSET_CBOX_DSP_SWI_CLR (0x154) /* DSP IRQ output Clear */
+
+/* Location of bits inside corresponding registers */
+#define BIT_CLR_IRQ_OUT          BIT(24)
+#define BIT_CLR_PAUSE    BIT(0)
+#define BIT_SET_SEND_EVT1 BIT(18)
+#define BIT_SET_PAUSE    BIT(0)
+#define BIT_STAT_PAUSED          BIT(1)
+
+/* Performance counter configurations */
+#define BIT_CNT_DST_EN BIT(6)
+#define BIT_CNT_IST_EN BIT(5)
+#define BIT_CNT_ST_EN  BIT(4)
+#define BIT_CNT_FR_EN  BIT(0)
+
+/* Bit masks */
+#define MASK_DSP_SWI_STA BIT_MASK(1)
+#define MASK_CP_SWI_STA         BIT_MASK(0)
+
+#define MASK_STAT_WFE_PARAM     GENMASK(14, 6)
+#define MASK_STAT_WFE_PARAM_EVT1 BIT_MASK(8)
+#define MASK_STAT_WFE           BIT_MASK(5)
+#define MASK_STAT_PAUSED        BIT_MASK(1)
+#define MASK_STAT_PAUSE                 BIT_MASK(0)
+
+#define VER_MAJOR (2)
+#define VER_MINOR (0)
+#define VER_EXTRA (0)
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 14, 0)
+#define read_idu_file(file, pos, addr, size) kernel_read(filp, pos, addr, size)
+#else
+#define read_idu_file(file, pos, addr, size) kernel_read(filp, addr, size, &pos)
+#endif
+
+/** Macros for Instruction Decode Unit (IDU) */
+#define TRIV2_IDU_DIRPATH_FMT "/lib/modules/%s/kernel/soc/idu"
+#define TRIV2_IDU_MAX_SECTORS (3)
+#define TRIV2_IDU_ZEROIDX     (0)
+#define TRIV2_IDU_DATAIDX     (1)
+#define TRIV2_IDU_CODEIDX     (2)
+#define TRIV2_IDU_ADDR(addr)  ((uint32_t)(addr))
+#define TRIV2_IDU_MAXSIZE     (1 << 20) /* 1 MiB */
+
+#define TRIV2_IDU_CP_DSPM_SIZE (0x10000)
+
+#define TRIV2_IDU_MASK_MAJOR (0xFF000000)
+#define TRIV2_IDU_MASK_MINOR (0x00FFF000)
+#define TRIV2_IDU_MASK_EXTRA (0x00000FFF)
+
+#define TRIV2_IDU_SHIFT_MAJOR (24)
+#define TRIV2_IDU_SHIFT_MINOR (12)
+
+#define TRIV2_MODEL_HASH_BITS     (8)
+#define TRIV2_MODEL_HASH_SIZE     (1 << TRIV2_MODEL_HASH_BITS)
+#define TRIV2_PROFILE_HASH_BITS           (6)
+#define TRIV2_PROFILE_HASH_SIZE           (1 << TRINITY_PROFILE_HASH_BITS)
+#define TRIV2_PROFILE_HASH_KEY(id) (hash_long((id), TRIV2_PROFILE_HASH_BITS))
+
+#define TRIV2_MAX_SEGMENTS (256)
+/** Fits in a single 4K Page */
+#define TRIV2_MAX_CMDSLOTS (PAGE_SIZE / sizeof(struct triv2_cmd))
+
+#define TRIV2_DRV_GET_PDATA(drv)    ((struct triv2_pdata *)(drv->pdata))
+#define TRIV2_DRV_GET_CMD_INFO(drv) (&(TRIV2_DRV_GET_PDATA(drv)->cmd_info))
+#define TRIV2_DRV_GET_CMD_BUF(drv)  (&(TRIV2_DRV_GET_CMD_INFO(drv)->buf))
+#define TRIV2_DRV_GET_PROF_BUF(drv) (&(TRIV2_DRV_GET_PDATA(drv)->prof_buf))
+#define TRIV2_DRV_GET_BACK_BUF(drv) (&(TRIV2_DRV_GET_PDATA(drv)->back_buf))
+
+#define TRIV2_GET_CMD_FROM_SLOT(info, slot)                                    \
+       ((struct triv2_cmd *)(info->buf.vaddr +                                \
+                             slot * sizeof(struct triv2_cmd)))
+
+#define TRIV2_GET_REQ(req) (container_of(req, struct triv2_req, req))
+
+#define TRIV2_MAX_TENSORS    (16)
+#define TRIV2_MAX_CMD_SIZE   (512)
+#define TRIV2_MAX_BATCH_SIZE (32)
+
+#define TRIV2_DLA_GBUFFER_SIZE (0x80000)
+#define TRIV2_DSP_DSPM_OFFSET  (0x10000)
+
+#define HALF_PAGE_SIZE (PAGE_SIZE >> 1)
+
+/* 4MiB (~300ns to flush all caches) */
+#define TRIV2_CACHE_FLUSH_THRESHOLD (0x400000)
+#define TRIV2_KERN_TIMEOUT_RESET    (1000)
+
+enum triv2_cmd_status {
+       STATUS_CMD_NONE = 0,
+       STATUS_CMD_READY = 1,
+       STATUS_CMD_DONE = 2,
+};
+
+/** req command for triv2 */
+struct triv2_cmd {
+       union {
+               struct {
+                       uint32_t slot;
+                       uint32_t prog_addr;
+                       uint32_t prog_size;
+                       uint32_t segt_addr;
+                       uint32_t num_visa;
+
+                       uint32_t priority;
+                       uint32_t status;
+                       uint32_t input_mode;
+                       uint32_t output_mode;
+
+                       /** for profiling */
+                       uint32_t profile_offset;
+
+                       /** for preemptive scheduling */
+                       uint32_t program_position;
+
+                       /** for batch processing */
+                       uint32_t batch_size;
+                       uint32_t curr_cnt;
+                       uint32_t in_addr[TRIV2_MAX_BATCH_SIZE];
+                       uint32_t out_addr[TRIV2_MAX_BATCH_SIZE];
+                       uint32_t poll_addr;
+                       uint32_t poll_magic;
+                       /* deprecated but keep for backward compatibiltiy */
+                       uint32_t in_seg_idx;
+                       uint32_t out_seg_idx;
+
+                       uint32_t total_cycles;
+
+                       /* kernel requests */
+                       uint32_t in_extern_seg_num;
+                       uint32_t out_extern_seg_num;
+                       uint32_t in_extern_seg_idx[TRIV2_MAX_TENSORS];
+                       uint32_t out_extern_seg_idx[TRIV2_MAX_TENSORS];
+               };
+               uint8_t reserved[TRIV2_MAX_CMD_SIZE];
+       };
+} __attribute__((packed));
+
+struct triv2_cmd_info {
+       DECLARE_BITMAP(bitmap, TRIV2_MAX_CMDSLOTS);
+       spinlock_t lock;
+
+       struct triv2_req *reqs[TRIV2_MAX_CMDSLOTS];
+       struct triv2_cmd cur_cmd;
+       struct trinity_resv_mem buf;
+};
+
+struct triv2_hashed_cmd_info {
+       struct trinity_driver *drv;
+       struct hlist_bl_node hnode;
+       struct triv2_req *req;
+       struct triv2_cmd *cmd;
+};
+
+struct triv2_kernel_req {
+       uint32_t in_seg_idx[TRIV2_MAX_TENSORS];
+       uint32_t in_seg_size[TRIV2_MAX_TENSORS];
+       uint32_t out_seg_idx[TRIV2_MAX_TENSORS];
+       uint32_t out_seg_size[TRIV2_MAX_TENSORS];
+};
+
+struct triv2_req {
+       struct trinity_req req;
+
+       struct trinity_hwmem_import *seg_import;
+
+       int cmd_slot;
+
+       /** kernel requets */
+       struct triv2_kernel_req *kernel;
+
+       /** profiling */
+       uint32_t profile_offset;
+       uint32_t total_cycles;
+
+       /** misc */
+       uint32_t total_segment_size;
+#ifdef CONFIG_TRINITY_MONITOR
+       struct trinity_monitor_event *event;
+#endif
+};
+
+struct triv2_idu {
+       phys_addr_t *addrs;
+       size_t addr_num;
+       struct trinity_resv_mem data;
+       struct trinity_resv_mem code;
+       dma_addr_t dspm;
+};
+
+struct triv2_pdata {
+       struct trinity_driver *drv;
+       struct list_head list;
+
+       /* idu */
+       struct triv2_idu idu_cp;
+       struct triv2_idu idu_dsp;
+       uint32_t idu_version;
+
+       /* command info. */
+       struct triv2_cmd_info cmd_info;
+
+       /* context switching */
+       struct trinity_resv_mem back_buf;
+
+       /* profiling */
+       struct trinity_resv_mem prof_buf;
+       struct mutex prof_lock;
+       DECLARE_HASHTABLE(prof_htable, TRIV2_PROFILE_HASH_BITS);
+};
+
+static void triv2_handle_cmd_done(struct trinity_driver *drv,
+                                 struct triv2_cmd *cmd, bool timeout);
+static void triv2_setup_buffers(struct trinity_driver *drv);
+static int triv2_idu_load(struct trinity_driver *drv, const char *dirpath,
+                         bool load_files);
+
+static LIST_HEAD(triv2_driver_list);
+static struct hlist_bl_head triv2_model_node_hlist[TRIV2_MODEL_HASH_SIZE];
+static const char *triv2_op_names[] = { TRIV2_FOREACH_OPNAME(
+       TRIV2_GENERATE_OPNAME) };
+
+static void triv2_map_sched_data(struct trinity_driver *drv,
+                                struct trinity_req *req, struct triv2_cmd *cmd,
+                                void *sched_data);
+static void triv2_unmap_sched_data(struct trinity_driver *drv,
+                                  struct triv2_req *req,
+                                  struct triv2_cmd *cmd);
+
+static struct triv2_profile *
+triv2_find_profile(const struct trinity_driver *drv, int req_id)
+{
+       struct triv2_pdata *pdata = TRIV2_DRV_GET_PDATA(drv);
+       unsigned long key = TRIV2_PROFILE_HASH_KEY(req_id);
+       struct triv2_profile *profile = NULL;
+
+       hash_for_each_possible (pdata->prof_htable, profile, hlist, key) {
+               if (profile->req_id == req_id)
+                       break;
+       }
+
+       return profile;
+}
+
+static void triv2_fini_profile(struct trinity_resv_mem *prof_buf)
+{
+       if (!prof_buf->vaddr)
+               return;
+
+       trinity_free_from_resv_mem(prof_buf, false);
+       memset(prof_buf, '\x00', sizeof(*prof_buf));
+}
+
+static void triv2_init_profile(struct trinity_driver *drv,
+                              unsigned long profile_size)
+{
+       struct device *dev = drv_to_dev_ptr(drv);
+       struct trinity_resv_mem *prof_buf = TRIV2_DRV_GET_PROF_BUF(drv);
+
+       if (profile_size > 0) {
+               /* allocate profile buffer and enable it */
+               struct iommu_domain *domain;
+               phys_addr_t paddr;
+               int status;
+
+               triv2_fini_profile(prof_buf);
+
+               profile_size = PAGE_ALIGN(profile_size);
+               status = trinity_alloc_from_resv_mem(profile_size, prof_buf,
+                                                    false);
+               if (status < 0) {
+                       dev_err(dev,
+                               "Couldn't allocate memory for profiling buffer: %d",
+                               status);
+                       return;
+               }
+
+               domain = iommu_get_domain_for_dev(drv_to_dev_ptr(drv));
+               paddr = trinity_get_paddr(domain, prof_buf->daddr);
+               iowrite32(TRIV2_IDU_ADDR(paddr),
+                         trinity_get_iomem_addr(drv->mmreg_vaddr[0],
+                                                OFFSET_NPU_PROF_ADDR));
+               iowrite32(prof_buf->size,
+                         trinity_get_iomem_addr(drv->mmreg_vaddr[0],
+                                                OFFSET_NPU_PROF_SIZE));
+
+               if (drv->verbose)
+                       dev_info(dev, "Profiling enabled (%ld bytes)",
+                                profile_size);
+       } else {
+               /* disable profiling */
+               triv2_fini_profile(prof_buf);
+
+               iowrite32(0, trinity_get_iomem_addr(drv->mmreg_vaddr[0],
+                                                   OFFSET_NPU_PROF_ADDR));
+               iowrite32(0, trinity_get_iomem_addr(drv->mmreg_vaddr[0],
+                                                   OFFSET_NPU_PROF_SIZE));
+               if (drv->verbose)
+                       dev_info(dev, "Profiling disabled");
+       }
+}
+
+static void triv2_assign_opnames(struct triv2_cmd_profile *cmd)
+{
+       struct triv2_op_profile *ops = cmd->profile_ops;
+       uint32_t i;
+
+       for (i = 0; i < cmd->total_ops; i++)
+               snprintf(ops[i].op_name, TRIV2_MAX_OPNAME, "%s",
+                        triv2_op_names[ops[i].opcode]);
+}
+
+static int32_t triv2_check_profile(struct trinity_driver *drv,
+                                  struct trinity_req *req)
+{
+       struct triv2_pdata *pdata = TRIV2_DRV_GET_PDATA(drv);
+       struct triv2_req *t_req = TRIV2_GET_REQ(req);
+       struct trinity_resv_mem *profile_buf;
+       struct triv2_cmd_profile *profile_cmd;
+       struct triv2_cmd_profile *profile_cmd_new;
+       struct triv2_profile *profile;
+
+       uint32_t offset = t_req->profile_offset;
+       uint32_t total_ops, total_size;
+
+       profile_buf = TRIV2_DRV_GET_PROF_BUF(drv);
+       if (!profile_buf->vaddr)
+               return 0;
+
+       if (profile_buf->size <= offset) {
+               dev_err(drv_to_dev_ptr(drv),
+                       "Invalid profile offset detected: 0x%x", offset);
+               return -EINVAL;
+       }
+
+       profile_cmd = (struct triv2_cmd_profile *)((char *)profile_buf->vaddr +
+                                                  offset);
+       profile_cmd->total_cycles = t_req->total_cycles;
+
+       total_ops = profile_cmd->total_ops;
+       total_size = sizeof(struct triv2_cmd_profile) +
+                    total_ops * sizeof(struct triv2_op_profile);
+
+       profile_cmd_new = vzalloc(total_size);
+       if (!profile_cmd_new) {
+               dev_err(drv_to_dev_ptr(drv),
+                       "Failed to allocate profile cmd data\n");
+               return -ENOMEM;
+       }
+
+       mutex_lock(&pdata->prof_lock);
+
+       profile = req->stat->profile;
+       if (profile) {
+               BUG_ON(!profile->data);
+               vfree(profile->data);
+               profile->data = profile_cmd_new;
+       } else {
+               int req_id = req->input.config.req_id;
+               unsigned long key = TRIV2_PROFILE_HASH_KEY(req_id);
+
+               profile = vzalloc(sizeof(struct triv2_profile));
+               if (!profile) {
+                       dev_err(drv_to_dev_ptr(drv),
+                               "Failed to allocate profile data\n");
+                       vfree(profile_cmd_new);
+                       mutex_unlock(&pdata->prof_lock);
+                       return -ENOMEM;
+               }
+               profile->req_id = req_id;
+               profile->data = profile_cmd_new;
+
+               hash_add(pdata->prof_htable, &profile->hlist, key);
+
+               req->stat->profile = profile;
+       }
+       memcpy(profile_cmd_new, profile_cmd, total_size);
+       triv2_assign_opnames(profile_cmd_new);
+
+       mutex_unlock(&pdata->prof_lock);
+       return 0;
+}
+
+/**
+ * @brief Get state (TRINITY_STATE_READY/TRINITY_STATE_PAUSE) of the device.
+ * @returns (enum triv2_state) TRINITY_STATE_READY (i.e., 1) or TRINITY_STATE_PAUSE (i.e., 0 )
+ * according to the state of the device
+ */
+int32_t triv2_get_state(const struct trinity_driver *drv)
+{
+       if (ioread32(drv->mmreg_vaddr[0] + OFFSET_NPU_CMD_READY) == 1)
+               return TRINITY_STATE_READY;
+
+       return TRINITY_STATE_PAUSE;
+}
+
+/**
+ * @brief Set state of the device to TRINITY_STATE_READY (1) or TRINITY_STATE_PAUSE (0)
+ */
+static void triv2_set_state(const struct trinity_driver *drv,
+                           enum trinity_state state)
+{
+       void __iomem *addr;
+
+       switch (state) {
+       case TRINITY_STATE_PAUSE:
+               /* CP */
+               addr = trinity_get_iomem_addr(drv->mmreg_vaddr[0],
+                                             OFFSET_CP_PROC_SET);
+               trinity_set_bit(BIT_SET_PAUSE, addr);
+               iowrite32(0, addr);
+
+               /* DSP */
+               addr = trinity_get_iomem_addr(drv->mmreg_vaddr[1],
+                                             OFFSET_DSP_PROC_SET);
+               trinity_set_bit(BIT_SET_PAUSE, addr);
+               iowrite32(0, addr);
+
+               break;
+       case TRINITY_STATE_READY:
+               /* CP */
+               addr = trinity_get_iomem_addr(drv->mmreg_vaddr[0],
+                                             OFFSET_CP_PROC_CLR);
+               trinity_set_bit(BIT_CLR_PAUSE, addr);
+               iowrite32(0, addr);
+
+               /* DSP */
+               addr = trinity_get_iomem_addr(drv->mmreg_vaddr[1],
+                                             OFFSET_DSP_PROC_CLR);
+               trinity_set_bit(BIT_CLR_PAUSE, addr);
+               iowrite32(0, addr);
+
+               /* Performance Counter */
+               addr = trinity_get_iomem_addr(drv->mmreg_vaddr[0],
+                                             OFFSET_CP_CNT_CFG);
+               trinity_set_bit(BIT_CNT_IST_EN | BIT_CNT_FR_EN, addr);
+               break;
+       default:
+               dev_err(drv_to_dev_ptr(drv),
+                       "failed to set state of the NPU state: %d", state);
+       }
+}
+
+/**
+ * @brief synchronize the segment table entries
+ */
+static int triv2_sync_segt_entries(const struct trinity_driver *drv,
+                                  struct triv2_req *req)
+{
+#ifdef ARM
+       struct trinity_input *input = &(req->req.input);
+       int i;
+
+       /* flush all caches for heavy models */
+       if (req->total_segment_size > TRIV2_CACHE_FLUSH_THRESHOLD ||
+           /* cannot handle external segments for kernel requests */
+           req->kernel != NULL) {
+               flush_cache_all();
+               return 0;
+       }
+
+       for (i = 0; i < input->config.num_segments; ++i)
+               __cpuc_flush_dcache_area(req->seg_import[i].addr,
+                                        req->seg_import[i].buf->size);
+#endif
+       return 0;
+}
+
+static void triv2_wakeup_cp(const struct trinity_driver *drv)
+{
+       void *addr =
+               trinity_get_iomem_addr(drv->mmreg_vaddr[0], OFFSET_CP_PROC_SET);
+#ifdef CONFIG_TRINITY_DEBUG
+       trace_triv2_wakeup_cp(drv->dev_id);
+#endif
+       trinity_set_bit(BIT_SET_SEND_EVT1, addr);
+}
+
+static void triv2_cancel_reqs(struct trinity_driver *drv)
+{
+       struct triv2_cmd_info *info;
+       struct triv2_cmd *cmd;
+       unsigned long flags;
+       int slot;
+
+       info = TRIV2_DRV_GET_CMD_INFO(drv);
+       spin_lock_irqsave(&info->lock, flags);
+
+       slot = find_first_bit(info->bitmap, TRIV2_MAX_CMDSLOTS);
+       while (slot < TRIV2_MAX_CMDSLOTS) {
+               cmd = TRIV2_GET_CMD_FROM_SLOT(info, slot);
+               triv2_handle_cmd_done(drv, cmd, true);
+               slot = find_next_bit(info->bitmap, TRIV2_MAX_CMDSLOTS,
+                                    slot + 1);
+       }
+
+       spin_unlock_irqrestore(&info->lock, flags);
+}
+
+static void triv2_drain_reqs(struct trinity_driver *drv)
+{
+       struct triv2_cmd_info *info;
+       unsigned long flags;
+       int cur_retries, max_retries = 1000; /* 1-sec */
+       int slot;
+
+       cur_retries = 0;
+       info = TRIV2_DRV_GET_CMD_INFO(drv);
+retry:
+       spin_lock_irqsave(&info->lock, flags);
+
+       /* wait until all bits are unset */
+       slot = find_first_bit(info->bitmap, TRIV2_MAX_CMDSLOTS);
+       if (slot < TRIV2_MAX_CMDSLOTS) {
+               spin_unlock_irqrestore(&info->lock, flags);
+
+               usleep_range(900, 1100);
+               if (cur_retries++ < max_retries)
+                       goto retry;
+
+               spin_lock_irqsave(&info->lock, flags);
+       }
+
+       spin_unlock_irqrestore(&info->lock, flags);
+}
+
+static void triv2_reset_devices(struct trinity_driver *drv, bool do_test)
+{
+       trinity_reset_device(drv_to_dev_ptr(drv), do_test);
+
+       triv2_setup_buffers(drv);
+       triv2_idu_load(drv, NULL, false);
+}
+
+static void triv2_reset(struct trinity_driver *drv)
+{
+       struct device *dev = drv_to_dev_ptr(drv);
+       struct triv2_pdata *pdata;
+       bool do_test;
+
+       /* FIXME: The HW reset should handle all the devices simultaneously */
+
+       list_for_each_entry (pdata, &triv2_driver_list, list)
+               mutex_lock(&pdata->drv->lock);
+
+       dev_err(dev, "NPU HW reset started");
+
+       /* block runtime pm suspend */
+       trinity_pm_runtime_forbid(dev);
+
+       /* block new incoming requests first */
+       trinity_sched_suspend();
+
+       /* cancel all requests by force */
+       list_for_each_entry (pdata, &triv2_driver_list, list)
+               triv2_cancel_reqs(pdata->drv);
+
+       /* wait some pending requests in NPU */
+       msleep(100);
+
+       /* reset all devices */
+       do_test = true;
+       list_for_each_entry (pdata, &triv2_driver_list, list) {
+               triv2_reset_devices(pdata->drv, do_test);
+               if (pdata->drv->opened > 0)
+                       triv2_set_state(pdata->drv, TRINITY_STATE_READY);
+               do_test = false;
+       }
+
+       /* resume scheduler */
+       trinity_sched_resume();
+
+       trinity_pm_runtime_allow(dev);
+
+       dev_err(dev, "NPU HW reset completed");
+
+       list_for_each_entry (pdata, &triv2_driver_list, list)
+               mutex_unlock(&pdata->drv->lock);
+}
+
+#ifdef CONFIG_TRINITY_MONITOR
+enum triv2_idu_stage {
+       IDU_STAGE_UNKNOWN = 0,
+       IDU_STAGE_WAITING,
+       IDU_STAGE_GET_CMD,
+       IDU_STAGE_RUN_CMD,
+       IDU_STAGE_SWAP_OUT,
+       IDU_STAGE_SWAP_IN,
+       IDU_STAGE_SEND_IRQ,
+};
+
+static const char *triv2_debug_idu_stage(struct trinity_driver *drv)
+{
+       static const char *debug_stage_msg[] = {
+               [IDU_STAGE_UNKNOWN] = "unknown",
+               [IDU_STAGE_WAITING] = "wait event",
+               [IDU_STAGE_GET_CMD] = "get command",
+               [IDU_STAGE_RUN_CMD] = "run command",
+               [IDU_STAGE_SWAP_OUT] = "swap out",
+               [IDU_STAGE_SWAP_IN] = "swap in",
+               [IDU_STAGE_SEND_IRQ] = "send irq",
+       };
+       u32 stage = IDU_STAGE_UNKNOWN;
+
+       if (drv) {
+               stage = ioread32(drv->mmreg_vaddr[0] + OFFSET_NPU_IDU_STAGE);
+               if (stage > IDU_STAGE_SEND_IRQ)
+                       stage = IDU_STAGE_UNKNOWN;
+       }
+
+       return debug_stage_msg[stage];
+}
+
+static void triv2_dump_segment_table(struct trinity_driver *drv,
+                                    struct triv2_req *t_req)
+{
+       struct device *dev = drv_to_dev_ptr(drv);
+       struct trinity_req *req;
+       struct trinity_input *input;
+       struct trinity_hwmem_import *segt_import;
+       u32 i;
+
+       req = &(t_req->req);
+       input = &(req->input);
+       segt_import = &(input->import_info);
+
+       dev_err(dev, "- segment table dump");
+       for (i = 0; i < input->config.num_segments; ++i)
+               dev_err(dev, "\t[%u] = %08x", i,
+                       ioread32(segt_import->addr + i * sizeof(u32)));
+}
+
+static void triv2_dump_kernel_request(struct trinity_driver *drv,
+                                     struct triv2_cmd *cmd)
+{
+       struct device *dev = drv_to_dev_ptr(drv);
+       u32 i;
+
+       if (cmd->batch_size == 0)
+               return;
+
+       dev_err(dev,
+               "- batch_size: %u, curr_cnt: %u, poll_addr: 0x%x, poll_magic: 0x%x",
+               cmd->batch_size, cmd->curr_cnt, cmd->poll_addr,
+               cmd->poll_magic);
+       dev_err(dev, "- extern_input_num: %u, extern_output_num: %u",
+               cmd->in_extern_seg_num, cmd->out_extern_seg_num);
+       for (i = 0; i < cmd->in_extern_seg_num; i++)
+               dev_err(dev, "\tin_seg_idx[%u] = %u", i,
+                       cmd->in_extern_seg_idx[i]);
+       for (i = 0; i < cmd->out_extern_seg_num; i++)
+               dev_err(dev, "\tout_seg_idx[%u] = %u", i,
+                       cmd->out_extern_seg_idx[i]);
+       for (i = 0; i < cmd->in_extern_seg_num * cmd->batch_size; i++)
+               dev_err(dev, "\tin_addr[%u] = 0x%x", i, cmd->in_addr[i]);
+       for (i = 0; i < cmd->out_extern_seg_num * cmd->batch_size; i++)
+               dev_err(dev, "\tout_addr[%u] = 0x%x", i, cmd->out_addr[i]);
+}
+
+static void triv2_dump_command_slots(struct trinity_driver *drv)
+{
+       struct device *dev;
+       struct triv2_cmd_info *info;
+       struct triv2_cmd *cmd;
+       struct triv2_req *req;
+       u32 slot;
+
+       if (!drv)
+               return;
+
+       dev = drv_to_dev_ptr(drv);
+       info = TRIV2_DRV_GET_CMD_INFO(drv);
+
+#ifdef ARM
+       flush_cache_all();
+#endif
+
+       /* skip lock: just dump all slots */
+       for (slot = 0; slot < TRIV2_MAX_CMDSLOTS; slot++) {
+               cmd = TRIV2_GET_CMD_FROM_SLOT(info, slot);
+
+               /* skip invalid slot */
+               if (cmd->slot != slot)
+                       continue;
+
+               req = info->reqs[slot];
+               if (!req)
+                       continue;
+
+               dev_err(dev, "slot: %u, num_visa: %u, priority: %u, status: %u",
+                       cmd->slot, cmd->num_visa, cmd->priority, cmd->status);
+               dev_err(dev,
+                       "- prog_addr: 0x%x prog_size: 0x%x, segt_addr: 0x%x",
+                       cmd->prog_addr, cmd->prog_size, cmd->segt_addr);
+
+               triv2_dump_segment_table(drv, req);
+               triv2_dump_kernel_request(drv, cmd);
+       }
+}
+
+static void triv2_dump_npu_mmregs(struct trinity_driver *drv)
+{
+       struct device *dev;
+
+       dev = drv_to_dev_ptr(drv);
+
+       dev_err(dev,
+               "NPU_PROG_BASE: 0x%x, NPU_PC: 0x%x, NPU_PROG_SIZE: 0x%x, NPU_SEGT_ADDR: 0x%x",
+               ioread32(drv->mmreg_vaddr[0] + OFFSET_NPU_PROG_BASE),
+               ioread32(drv->mmreg_vaddr[0] + OFFSET_NPU_PC),
+               ioread32(drv->mmreg_vaddr[0] + OFFSET_NPU_PROG_SIZE),
+               ioread32(drv->mmreg_vaddr[0] + OFFSET_NPU_SEGT_ADDR));
+
+       dev_err(dev,
+               "CP_DMAI_CONTR: 0x%x, CP_DMAI_CMDID: 0x%x, CP_DMAI_LSTID: 0x%x",
+               ioread32(drv->mmreg_vaddr[0] + OFFSET_NPU_CP_DMAI_CONTR),
+               ioread32(drv->mmreg_vaddr[0] + OFFSET_NPU_CP_DMAI_CMDID),
+               ioread32(drv->mmreg_vaddr[0] + OFFSET_NPU_CP_DMAI_LSTID));
+       if (ioread32(drv->mmreg_vaddr[0] + OFFSET_NPU_CP_DMAI_CONTR) > 0) {
+               dev_err(dev,
+                       "\tCP_DMAI_EADDR: 0x%x, CP_DMAI_IADDR: 0x%x, CP_DMAI_TSIZE: 0x%x",
+                       ioread32(drv->mmreg_vaddr[0] +
+                                OFFSET_NPU_CP_DMAI_EADDR),
+                       ioread32(drv->mmreg_vaddr[0] +
+                                OFFSET_NPU_CP_DMAI_IADDR),
+                       ioread32(drv->mmreg_vaddr[0] +
+                                OFFSET_NPU_CP_DMAI_TSIZE));
+       }
+
+       dev_err(dev,
+               "DLA_CORE_CTRL: 0x%x, DLA_DMAI_CTRL: 0x%x, DLA_DMAO_CTRL: 0x%x",
+               ioread32(drv->mmreg_vaddr[0] + OFFSET_NPU_DLA_CORE_CTRL),
+               ioread32(drv->mmreg_vaddr[0] + OFFSET_NPU_DLA_DMAI_CTRL),
+               ioread32(drv->mmreg_vaddr[0] + OFFSET_NPU_DLA_DMAO_CTRL));
+       if (ioread32(drv->mmreg_vaddr[0] + OFFSET_NPU_DLA_CORE_CTRL) > 0) {
+               dev_err(dev, "\tDLA_CORE_OPC: 0x%x, DLA_CORE_WIND_CFG: 0x%x",
+                       ioread32(drv->mmreg_vaddr[0] + OFFSET_NPU_DLA_CORE_OPC),
+                       ioread32(drv->mmreg_vaddr[0] +
+                                OFFSET_NPU_DLA_CORE_WIND_CFG));
+               dev_err(dev,
+                       "\tDLA_CORE_SIZE0: 0x%x, DLA_CORE_SIZE1: 0x%x, DLA_CORE_ZP: 0x%x",
+                       ioread32(drv->mmreg_vaddr[0] +
+                                OFFSET_NPU_DLA_CORE_SIZE0),
+                       ioread32(drv->mmreg_vaddr[0] +
+                                OFFSET_NPU_DLA_CORE_SIZE1),
+                       ioread32(drv->mmreg_vaddr[0] + OFFSET_NPU_DLA_CORE_ZP));
+               dev_err(dev,
+                       "\tDLA_CORE_OUT_MULT: 0x%x, DLA_CORE_IN0_MULT: 0x%x, DLA_CORE_IN1_MULT: 0x%x",
+                       ioread32(drv->mmreg_vaddr[0] +
+                                OFFSET_NPU_DLA_CORE_OUT_MULT),
+                       ioread32(drv->mmreg_vaddr[0] +
+                                OFFSET_NPU_DLA_CORE_IN0_MULT),
+                       ioread32(drv->mmreg_vaddr[0] +
+                                OFFSET_NPU_DLA_CORE_IN1_MULT));
+               dev_err(dev, "\tDLA_CORE_OUT_CFG: 0x%x, DLA_CORE_OUT_MOD: 0x%x",
+                       ioread32(drv->mmreg_vaddr[0] +
+                                OFFSET_NPU_DLA_CORE_OUT_CFG),
+                       ioread32(drv->mmreg_vaddr[0] +
+                                OFFSET_NPU_DLA_CORE_OUT_MOD));
+               dev_err(dev, "\tDLA_CORE_IN0_CFG: 0x%x, DLA_CORE_IN0_MOD: 0x%x",
+                       ioread32(drv->mmreg_vaddr[0] +
+                                OFFSET_NPU_DLA_CORE_IN0_CFG),
+                       ioread32(drv->mmreg_vaddr[0] +
+                                OFFSET_NPU_DLA_CORE_IN0_MOD));
+               dev_err(dev, "\tDLA_CORE_IN1_CFG: 0x%x, DLA_CORE_IN1_MOD: 0x%x",
+                       ioread32(drv->mmreg_vaddr[0] +
+                                OFFSET_NPU_DLA_CORE_IN1_CFG),
+                       ioread32(drv->mmreg_vaddr[0] +
+                                OFFSET_NPU_DLA_CORE_IN1_MOD));
+               dev_err(dev,
+                       "\tDLA_CORE_PARAM_ADDR: 0x%x, DLA_CORE_PSUM_ADDR: 0x%x, DLA_CORE_CWGT_ADDR: 0x%x",
+                       ioread32(drv->mmreg_vaddr[0] +
+                                OFFSET_NPU_DLA_CORE_PARAM_ADDR),
+                       ioread32(drv->mmreg_vaddr[0] +
+                                OFFSET_NPU_DLA_CORE_PSUM_ADDR),
+                       ioread32(drv->mmreg_vaddr[0] +
+                                OFFSET_NPU_DLA_CORE_CWGT_ADDR));
+       }
+       if (ioread32(drv->mmreg_vaddr[0] + OFFSET_NPU_DLA_DMAI_CTRL) > 0) {
+               dev_err(dev,
+                       "\tDLA_DMAI_EADDR: 0x%x, DLA_DMAI_EYMOD: 0x%x, DLA_DMAI_EZMOD: 0x%x",
+                       ioread32(drv->mmreg_vaddr[0] +
+                                OFFSET_NPU_DLA_DMAI_EADDR),
+                       ioread32(drv->mmreg_vaddr[0] +
+                                OFFSET_NPU_DLA_DMAI_EYMOD),
+                       ioread32(drv->mmreg_vaddr[0] +
+                                OFFSET_NPU_DLA_DMAI_EZMOD));
+               dev_err(dev,
+                       "\tDLA_DMAI_IADDR: 0x%x, DLA_DMAI_IYMOD: 0x%x, DLA_DMAI_IZMOD: 0x%x",
+                       ioread32(drv->mmreg_vaddr[0] +
+                                OFFSET_NPU_DLA_DMAI_IADDR),
+                       ioread32(drv->mmreg_vaddr[0] +
+                                OFFSET_NPU_DLA_DMAI_IYMOD),
+                       ioread32(drv->mmreg_vaddr[0] +
+                                OFFSET_NPU_DLA_DMAI_IZMOD));
+               dev_err(dev, "\tDLA_DMAI_SIZE0: 0x%x, DLA_DMAI_SIZE1: 0x%x",
+                       ioread32(drv->mmreg_vaddr[0] +
+                                OFFSET_NPU_DLA_DMAI_SIZE0),
+                       ioread32(drv->mmreg_vaddr[0] +
+                                OFFSET_NPU_DLA_DMAI_SIZE1));
+       }
+       if (ioread32(drv->mmreg_vaddr[0] + OFFSET_NPU_DLA_DMAO_CTRL) > 0) {
+               dev_err(dev,
+                       "\tDLA_DMAO_EADDR: 0x%x, DLA_DMAO_EYMOD: 0x%x, DLA_DMAO_EZMOD: 0x%x",
+                       ioread32(drv->mmreg_vaddr[0] +
+                                OFFSET_NPU_DLA_DMAO_EADDR),
+                       ioread32(drv->mmreg_vaddr[0] +
+                                OFFSET_NPU_DLA_DMAO_EYMOD),
+                       ioread32(drv->mmreg_vaddr[0] +
+                                OFFSET_NPU_DLA_DMAO_EZMOD));
+               dev_err(dev,
+                       "\tDLA_DMAO_IADDR: 0x%x, DLA_DMAO_IYMOD: 0x%x, DLA_DMAO_IZMOD: 0x%x",
+                       ioread32(drv->mmreg_vaddr[0] +
+                                OFFSET_NPU_DLA_DMAO_IADDR),
+                       ioread32(drv->mmreg_vaddr[0] +
+                                OFFSET_NPU_DLA_DMAO_IYMOD),
+                       ioread32(drv->mmreg_vaddr[0] +
+                                OFFSET_NPU_DLA_DMAO_IZMOD));
+               dev_err(dev, "\tDLA_DMAO_SIZE0: 0x%x, DLA_DMAO_SIZE1: 0x%x",
+                       ioread32(drv->mmreg_vaddr[0] +
+                                OFFSET_NPU_DLA_DMAO_SIZE0),
+                       ioread32(drv->mmreg_vaddr[0] +
+                                OFFSET_NPU_DLA_DMAO_SIZE1));
+       }
+
+       dev_err(dev,
+               "DSP_CORE_CTRL: 0x%x, DSP_DMAI_CTRL: 0x%x, DSP_DMAO_CTRL: 0x%x",
+               ioread32(drv->mmreg_vaddr[0] + OFFSET_NPU_DSP_CORE_CTRL),
+               ioread32(drv->mmreg_vaddr[0] + OFFSET_NPU_DSP_DMAI_CTRL),
+               ioread32(drv->mmreg_vaddr[0] + OFFSET_NPU_DSP_DMAO_CTRL));
+       if (ioread32(drv->mmreg_vaddr[0] + OFFSET_NPU_DSP_DMAI_CTRL) > 0) {
+               dev_err(dev,
+                       "\tDSP_DMAI_EADDR: 0x%x, DSP_DMAI_EYMOD: 0x%x, DSP_DMAI_EZMOD: 0x%x",
+                       ioread32(drv->mmreg_vaddr[0] +
+                                OFFSET_NPU_DSP_DMAI_EADDR),
+                       ioread32(drv->mmreg_vaddr[0] +
+                                OFFSET_NPU_DSP_DMAI_EYMOD),
+                       ioread32(drv->mmreg_vaddr[0] +
+                                OFFSET_NPU_DSP_DMAI_EZMOD));
+               dev_err(dev,
+                       "\tDSP_DMAI_IADDR: 0x%x, DSP_DMAI_IYMOD: 0x%x, DSP_DMAI_IZMOD: 0x%x",
+                       ioread32(drv->mmreg_vaddr[0] +
+                                OFFSET_NPU_DSP_DMAI_IADDR),
+                       ioread32(drv->mmreg_vaddr[0] +
+                                OFFSET_NPU_DSP_DMAI_IYMOD),
+                       ioread32(drv->mmreg_vaddr[0] +
+                                OFFSET_NPU_DSP_DMAI_IZMOD));
+               dev_err(dev, "\tDSP_DMAI_SIZE0: 0x%x, DSP_DMAI_SIZE1: 0x%x",
+                       ioread32(drv->mmreg_vaddr[0] +
+                                OFFSET_NPU_DSP_DMAI_SIZE0),
+                       ioread32(drv->mmreg_vaddr[0] +
+                                OFFSET_NPU_DSP_DMAI_SIZE1));
+       }
+       if (ioread32(drv->mmreg_vaddr[0] + OFFSET_NPU_DSP_DMAO_CTRL) > 0) {
+               dev_err(dev,
+                       "\tDSP_DMAO_EADDR: 0x%x, DSP_DMAO_EYMOD: 0x%x, DSP_DMAO_EZMOD: 0x%x",
+                       ioread32(drv->mmreg_vaddr[0] +
+                                OFFSET_NPU_DSP_DMAO_EADDR),
+                       ioread32(drv->mmreg_vaddr[0] +
+                                OFFSET_NPU_DSP_DMAO_EYMOD),
+                       ioread32(drv->mmreg_vaddr[0] +
+                                OFFSET_NPU_DSP_DMAO_EZMOD));
+               dev_err(dev,
+                       "\tDSP_DMAO_IADDR: 0x%x, DSP_DMAO_IYMOD: 0x%x, DSP_DMAO_IZMOD: 0x%x",
+                       ioread32(drv->mmreg_vaddr[0] +
+                                OFFSET_NPU_DSP_DMAO_IADDR),
+                       ioread32(drv->mmreg_vaddr[0] +
+                                OFFSET_NPU_DSP_DMAO_IYMOD),
+                       ioread32(drv->mmreg_vaddr[0] +
+                                OFFSET_NPU_DSP_DMAO_IZMOD));
+               dev_err(dev, "\tDSP_DMAO_SIZE0: 0x%x, DSP_DMAO_SIZE1: 0x%x",
+                       ioread32(drv->mmreg_vaddr[0] +
+                                OFFSET_NPU_DSP_DMAO_SIZE0),
+                       ioread32(drv->mmreg_vaddr[0] +
+                                OFFSET_NPU_DSP_DMAO_SIZE1));
+       }
+}
+
+static void triv2_monitor_timeout_cb(void *data)
+{
+       struct trinity_driver *drv;
+       struct device *dev;
+
+       if (!data)
+               return;
+
+       drv = data;
+       dev = drv_to_dev_ptr(drv);
+
+       dev_err(dev, "Request timeout detected (device_id: %u, stage: %s)",
+               drv->dev_id, triv2_debug_idu_stage(drv));
+
+       triv2_dump_npu_mmregs(drv);
+       triv2_dump_command_slots(drv);
+
+#ifdef CONFIG_SR_NPU_IOMMU
+       /* reset devices when iommu fault is detected */
+       if (report_iommu_fault(iommu_get_domain_for_dev(dev), dev, 0, 1) > 0)
+               triv2_reset(drv);
+#endif
+}
+
+static void triv2_append_monitor_event(struct triv2_req *req)
+{
+       struct trinity_monitor_event *event;
+
+       /* for kernel requests only (i.e., high priority) */
+       if (!req->kernel)
+               return;
+
+       event = trinity_monitor_get_event();
+       if (!event)
+               return;
+
+       event->start_time = req->req.stat->scheduled;
+       event->timeout_ms = TRIV2_KERN_TIMEOUT_RESET;
+       event->cb = triv2_monitor_timeout_cb;
+       event->cb_data = req->req.drv;
+
+       req->event = event;
+       trinity_monitor_add_event(event);
+}
+#endif
+
+/**
+ * @brief trigger memory-mapped register for inference running
+ */
+static void triv2_run_trigger(const struct trinity_driver *drv, int slot)
+{
+       struct triv2_cmd_info *cmd_info = TRIV2_DRV_GET_CMD_INFO(drv);
+       struct triv2_req *t_req = cmd_info->reqs[slot];
+
+#ifdef CONFIG_TRINITY_DEBUG
+       trace_triv2_run_trigger(drv->dev_id, slot);
+#endif
+       if (!t_req) {
+               dev_err(drv_to_dev_ptr(drv),
+                       "Unable to find the corresponding req");
+               return;
+       }
+
+       if (triv2_sync_segt_entries(drv, t_req) < 0)
+               dev_err(drv_to_dev_ptr(drv),
+                       "Unable to sync the segment table");
+
+       /* sync the current bitmap */
+       iowrite32(*cmd_info->bitmap,
+                 trinity_get_iomem_addr(drv->mmreg_vaddr[0],
+                                        OFFSET_NPU_CMD_REQ));
+
+       t_req->req.stat->scheduled = ktime_get();
+       t_req->req.stat->completed = 0;
+
+#ifdef CONFIG_TRINITY_MONITOR
+       triv2_append_monitor_event(t_req);
+#endif
+
+       /* trigger the event (we do not assume that IDU always accepts this event) */
+       triv2_wakeup_cp(drv);
+}
+
+static void triv2_clear_cmd(struct trinity_driver *drv, struct triv2_req *req,
+                           struct triv2_cmd *cmd)
+{
+       struct triv2_cmd_info *cmd_info = TRIV2_DRV_GET_CMD_INFO(drv);
+
+       cmd_info->reqs[req->cmd_slot] = NULL;
+       clear_bit(req->cmd_slot, cmd_info->bitmap);
+       req->cmd_slot = -1;
+
+       memset_io(cmd, '\x00', sizeof(struct triv2_cmd));
+}
+
+static void triv2_handle_cmd_done(struct trinity_driver *drv,
+                                 struct triv2_cmd *cmd, bool timeout)
+{
+       struct device *dev = drv_to_dev_ptr(drv);
+       struct triv2_cmd_info *cmd_info = TRIV2_DRV_GET_CMD_INFO(drv);
+       struct triv2_req *t_req;
+       struct trinity_req *req;
+       struct trinity_sched_desc *sched;
+       uint32_t slot = cmd->slot;
+       int64_t time_diff;
+
+       t_req = cmd_info->reqs[slot];
+       if (!t_req) {
+               dev_err(dev, "Failed to find the req\n");
+               return;
+       }
+
+#ifdef CONFIG_TRINITY_MONITOR
+       if (t_req->event)
+               atomic_set(&t_req->event->marker, 1);
+#endif
+
+       req = &(t_req->req);
+       req->stat->completed = ktime_get();
+       req->stat->status = TRINITY_REQ_STATUS_FINISHED;
+
+       time_diff = TIME_DIFF_US(req->stat->completed, req->stat->scheduled);
+       if (time_diff < 0) {
+               dev_warn(dev, "Detected invalid inference time of request\n");
+       } else {
+               req->stat->prev_time = (uint32_t)time_diff;
+               req->stat->prev_cycles = cmd->total_cycles;
+               req->stat->num_runs++;
+               req->stat->total_time += req->stat->prev_time;
+#ifdef CONFIG_TRINITY_DEBUG
+               trace_triv2_handle_cmd_done(drv->dev_id, cmd->slot,
+                                           cmd->total_cycles,
+                                           req->stat->prev_time);
+#endif
+       }
+
+       t_req->total_cycles = cmd->total_cycles;
+       t_req->profile_offset = cmd->profile_offset;
+
+       triv2_unmap_sched_data(drv, t_req, cmd);
+       triv2_clear_cmd(drv, t_req, cmd);
+
+       /* notify to the scheduler */
+       sched = get_trinity_sched(req);
+       if (sched && sched->notify)
+               sched->notify(req, timeout);
+
+       /* notify to the caller */
+       if (!req->is_kernel)
+               complete_all(&req->complete);
+}
+
+/**
+ * @brief Prepare command info. for the target req before invoking
+ */
+static int32_t triv2_prepare_cmd(struct trinity_driver *drv,
+                                struct trinity_req *req, void *sched_data)
+{
+       struct triv2_cmd_info *cmd_info;
+       struct triv2_cmd cmd = { 0 };
+       struct triv2_req *t;
+
+       const struct trinity_model *model = req->model;
+       const struct trinity_input *input = &req->input;
+
+       int32_t slot;
+       struct iommu_domain *domain;
+       phys_addr_t paddr;
+       unsigned long flags;
+
+       /** Note that the program base is not behind iommu */
+       domain = iommu_get_domain_for_dev(drv_to_dev_ptr(drv));
+
+       paddr = trinity_get_paddr(domain, model->import_info.dma_addr);
+       cmd.prog_addr = TRIV2_IDU_ADDR(paddr);
+       cmd.prog_addr += model->config.program_offset_addr;
+       cmd.prog_size = model->config.program_size;
+
+       paddr = trinity_get_paddr(domain, input->import_info.dma_addr);
+       cmd.segt_addr = TRIV2_IDU_ADDR(paddr);
+       cmd.num_visa = model->config.num_visa_insts;
+
+       cmd.priority = input->config.priority;
+       cmd.input_mode = input->config.input_mode;
+       cmd.output_mode = input->config.output_mode;
+
+       /** Find a empty cmd slot in bitmap (need a spin lock) */
+       cmd_info = TRIV2_DRV_GET_CMD_INFO(drv);
+       t = TRIV2_GET_REQ(req);
+
+       spin_lock_irqsave(&cmd_info->lock, flags);
+
+       slot = find_first_zero_bit(cmd_info->bitmap, TRIV2_MAX_CMDSLOTS);
+       if (slot < TRIV2_MAX_CMDSLOTS) {
+               set_bit(slot, cmd_info->bitmap);
+               cmd_info->reqs[slot] = t;
+               t->cmd_slot = slot;
+       }
+
+       spin_unlock_irqrestore(&cmd_info->lock, flags);
+
+       /** Will be retried (rely on platform device's scheduling) */
+       if (slot >= TRIV2_MAX_CMDSLOTS)
+               return -EBUSY;
+
+       cmd.slot = slot;
+       cmd.status = STATUS_CMD_READY;
+
+       if (req->is_kernel && sched_data)
+               triv2_map_sched_data(drv, req, &cmd, sched_data);
+
+       memcpy_toio(cmd_info->buf.vaddr + slot * sizeof(struct triv2_cmd), &cmd,
+                   sizeof(struct triv2_cmd));
+
+       return slot;
+}
+
+static dma_addr_t triv2_map_iommu_extern(struct device *dev,
+                                        struct trinity_req *req,
+                                        phys_addr_t paddr, size_t size)
+{
+       struct iommu_domain *domain;
+       enum dma_data_direction dir;
+       unsigned attrs = 0;
+
+       domain = iommu_get_domain_for_dev(dev);
+       if (!domain)
+               return (dma_addr_t)paddr;
+
+       dir = DMA_BIDIRECTIONAL;
+       attrs |= DMA_ATTR_WRITE_COMBINE;
+       attrs |= DMA_ATTR_SKIP_CPU_SYNC;
+       attrs |= DMA_ATTR_FORCE_CONTIGUOUS;
+
+       if (req->skip_iommu_mapping) {
+               phys_addr_t extern_paddr;
+               dma_addr_t extern_daddr;
+               size_t extern_size;
+               unsigned long offset;
+
+               /* fallback to original iommu mapping on erroneous cases */
+               if (trinity_get_extern_memory(dev, &extern_paddr, &extern_daddr,
+                                             &extern_size) != 0)
+                       goto out;
+               if (unlikely(extern_paddr > paddr))
+                       goto out;
+               if (unlikely(extern_paddr + extern_size < paddr + size))
+                       goto out;
+
+               offset = (unsigned long)(paddr - extern_paddr);
+               return extern_daddr + offset;
+       }
+
+out:
+       req->skip_iommu_mapping = false;
+       return dma_map_resource(dev, paddr, size, dir, attrs);
+}
+
+static void triv2_unmap_iommu_extern(struct device *dev,
+                                    struct trinity_req *req, dma_addr_t daddr,
+                                    size_t size)
+{
+       struct iommu_domain *domain;
+       enum dma_data_direction dir;
+       unsigned attrs = 0;
+
+       domain = iommu_get_domain_for_dev(dev);
+       if (!domain)
+               return;
+
+       if (req->skip_iommu_mapping)
+               return;
+
+       dir = DMA_BIDIRECTIONAL;
+       attrs |= DMA_ATTR_WRITE_COMBINE;
+       attrs |= DMA_ATTR_SKIP_CPU_SYNC;
+       attrs |= DMA_ATTR_FORCE_CONTIGUOUS;
+
+       dma_unmap_resource(dev, daddr, size, dir, attrs);
+}
+
+static void triv2_map_sched_data(struct trinity_driver *drv,
+                                struct trinity_req *req, struct triv2_cmd *cmd,
+                                void *sched_data)
+{
+       struct device *dev = drv_to_dev_ptr(drv);
+       struct inout_addr_info *addr_info;
+       struct triv2_req *t_req;
+       struct triv2_kernel_req *k_req;
+       uint32_t i, j, offset;
+
+       t_req = TRIV2_GET_REQ(req);
+       addr_info = (struct inout_addr_info *)sched_data;
+
+       cmd->batch_size = addr_info->batch_size;
+       cmd->curr_cnt = 0;
+       cmd->poll_addr = addr_info->poll_addr;
+       cmd->poll_magic = req->poll_magic;
+       cmd->in_extern_seg_num = addr_info->in_cnt;
+       cmd->out_extern_seg_num = addr_info->out_cnt;
+
+       k_req = t_req->kernel;
+       for (i = 0; i < addr_info->batch_size; i++) {
+               /* input extern segment */
+               offset = i * addr_info->in_cnt;
+               for (j = 0; j < addr_info->in_cnt; j++)
+                       cmd->in_addr[offset + j] = triv2_map_iommu_extern(
+                               dev, req, addr_info->in_addr[offset + j],
+                               k_req->in_seg_size[j]);
+               /* output extern segment */
+               offset = i * addr_info->out_cnt;
+               for (j = 0; j < addr_info->out_cnt; j++)
+                       cmd->out_addr[offset + j] = triv2_map_iommu_extern(
+                               dev, req, addr_info->out_addr[offset + j],
+                               k_req->out_seg_size[j]);
+       }
+       /* index for extern segments */
+       for (i = 0; i < cmd->in_extern_seg_num; i++)
+               cmd->in_extern_seg_idx[i] = k_req->in_seg_idx[i];
+       for (i = 0; i < cmd->out_extern_seg_num; i++)
+               cmd->out_extern_seg_idx[i] = k_req->out_seg_idx[i];
+
+       trace_triv2_map_sched_data(drv->dev_id, cmd->slot, cmd->batch_size,
+                                  cmd->in_extern_seg_num,
+                                  cmd->out_extern_seg_num);
+}
+
+static void triv2_unmap_sched_data(struct trinity_driver *drv,
+                                  struct triv2_req *t_req,
+                                  struct triv2_cmd *cmd)
+{
+       struct device *dev = drv_to_dev_ptr(drv);
+       struct trinity_req *req = &(t_req->req);
+       struct triv2_kernel_req *k_req;
+       uint32_t i, j, offset;
+
+       /* only for kernel request */
+       if (!req->is_kernel)
+               return;
+
+       k_req = t_req->kernel;
+       for (i = 0; i < cmd->batch_size; i++) {
+               offset = i * cmd->in_extern_seg_num;
+               for (j = 0; j < cmd->in_extern_seg_num; j++)
+                       triv2_unmap_iommu_extern(dev, req,
+                                                cmd->in_addr[offset + j],
+                                                k_req->in_seg_size[j]);
+
+               offset = i * cmd->out_extern_seg_num;
+               for (j = 0; j < cmd->out_extern_seg_num; j++)
+                       triv2_unmap_iommu_extern(dev, req,
+                                                cmd->out_addr[offset + j],
+                                                k_req->out_seg_size[j]);
+       }
+
+       trace_triv2_unmap_sched_data(drv->dev_id, cmd->slot);
+}
+
+/**
+ * @brief Invoke a req on the device. Note that all configurations
+ * required by running should be done before invocation of this function.
+ */
+static int32_t triv2_invoke_req(struct trinity_driver *drv,
+                               struct trinity_req *req, void *sched_data)
+{
+       enum trinity_output_mode mode = req->input.config.output_mode;
+       int32_t slot = triv2_prepare_cmd(drv, req, sched_data);
+       if (slot < 0)
+               return slot;
+
+       if (mode == TRINITY_OUTPUT_HW || mode == TRINITY_OUTPUT_CPU_POLL ||
+           mode == TRINITY_OUTPUT_CPU_INTR) {
+               triv2_run_trigger(drv, slot);
+       } else {
+               dev_err(drv_to_dev_ptr(drv), "Invalid output mode: %d\n", mode);
+               return -EINVAL;
+       }
+
+       return 0;
+}
+
+static struct trinity_req *triv2_alloc_req(struct trinity_driver *drv)
+{
+       struct triv2_req *t_req;
+
+       t_req = kzalloc(sizeof(struct triv2_req), GFP_KERNEL);
+       if (!t_req)
+               return NULL;
+
+       t_req->cmd_slot = -1;
+
+       return &(t_req->req);
+}
+
+static void triv2_dealloc_req(struct trinity_driver *drv,
+                             struct trinity_req *req)
+{
+       struct triv2_req *t_req = TRIV2_GET_REQ(req);
+
+       if (t_req->seg_import) {
+               struct trinity_hwmem_import *import;
+               uint32_t i;
+               for (i = 0; i < req->input.config.num_segments; i++) {
+                       import = &(t_req->seg_import[i]);
+                       if (import->addr)
+                               trinity_hwmem_import_dmabuf_end(import);
+               }
+               kfree(t_req->seg_import);
+       }
+       if (t_req->kernel)
+               kfree(t_req->kernel);
+       kfree(t_req);
+}
+
+static void triv2_handle_timeout(struct trinity_driver *drv,
+                                struct trinity_req *req)
+{
+       struct triv2_cmd_info *cmd_info = TRIV2_DRV_GET_CMD_INFO(drv);
+       struct triv2_cmd *cmd;
+       struct triv2_req *t;
+       unsigned long flags;
+
+#ifdef CONFIG_TRINITY_FPGA_DEBUG
+       trinity_hwmem_iommu_print_status(drv_to_dev_ptr(drv));
+#endif
+
+       t = TRIV2_GET_REQ(req);
+
+       spin_lock_irqsave(&cmd_info->lock, flags);
+       if (t->cmd_slot >= 0) {
+               /* Timeout! check whether it's not handled in irq handler */
+               cmd = TRIV2_GET_CMD_FROM_SLOT(cmd_info, t->cmd_slot);
+               triv2_handle_cmd_done(drv, cmd, true);
+       }
+       spin_unlock_irqrestore(&cmd_info->lock, flags);
+}
+
+/**
+ * @brief stop the submitted reqs to the driver.
+ * In case of already-executed req, each device needs to determine the policy
+ * depending its capability to terminate the running one.
+ */
+static void triv2_stop_reqs(struct work_struct *work)
+{
+       struct trinity_driver *drv;
+
+       drv = container_of(work, struct trinity_driver, work_stop);
+       if (drv == NULL)
+               return;
+
+       triv2_cancel_reqs(drv);
+}
+
+/**
+ * @brief get profile metadata for the target req
+ */
+static int32_t triv2_get_profile_meta(const struct trinity_driver *drv,
+                                     struct trinity_ioctl_profile_meta *meta)
+{
+       struct triv2_pdata *pdata = TRIV2_DRV_GET_PDATA(drv);
+       struct triv2_profile *profile;
+       struct triv2_cmd_profile *profile_data;
+       int ret = 0;
+
+       mutex_lock(&pdata->prof_lock);
+
+       profile = triv2_find_profile(drv, meta->req_id);
+       if (!profile) {
+               ret = -ENOENT;
+               goto out;
+       }
+
+       profile_data = profile->data;
+       BUG_ON(!profile_data);
+
+       meta->total_cycles = profile_data->total_cycles;
+       meta->total_ops = profile_data->total_ops;
+       meta->profile_size =
+               profile_data->total_ops * sizeof(struct triv2_op_profile);
+       /* unsupported for now */
+       meta->input_footprint = -1;
+       meta->output_footprint = -1;
+
+out:
+       mutex_unlock(&pdata->prof_lock);
+
+       return ret;
+}
+
+/**
+ * @brief get profile buffer for the target req
+ */
+static int32_t triv2_get_profile_buff(const struct trinity_driver *drv,
+                                     struct trinity_ioctl_profile_buff *buff)
+{
+       struct triv2_pdata *pdata = TRIV2_DRV_GET_PDATA(drv);
+       struct triv2_profile *profile;
+       struct triv2_cmd_profile *profile_data;
+       uint32_t total_size;
+       int ret = 0;
+
+       mutex_lock(&pdata->prof_lock);
+
+       profile = triv2_find_profile(drv, buff->req_id);
+       if (!profile) {
+               ret = -ENOENT;
+               goto out;
+       }
+
+       profile_data = profile->data;
+       BUG_ON(!profile_data);
+
+       profile_data = profile->data;
+       total_size = profile_data->total_ops * sizeof(struct triv2_op_profile);
+
+       if (buff->profile_pos + buff->profile_size > total_size) {
+               dev_err(drv_to_dev_ptr(drv),
+                       "Profile data out-of-range! pos(%u) size(%u) > total_size(%u)",
+                       buff->profile_pos, buff->profile_size, total_size);
+               ret = -ERANGE;
+               goto out;
+       }
+
+       /* consider partial memory copies */
+       if (copy_to_user((char __user *)buff->profile_buf,
+                        (char *)profile_data->profile_ops + buff->profile_pos,
+                        buff->profile_size))
+               ret = -EACCES;
+
+out:
+       mutex_unlock(&pdata->prof_lock);
+
+       return ret;
+}
+
+static void triv2_show_profile(const struct trinity_driver *drv, int req_id)
+{
+       struct device *dev = drv_to_dev_ptr(drv);
+       struct triv2_pdata *pdata = TRIV2_DRV_GET_PDATA(drv);
+       struct triv2_profile *profile;
+       struct triv2_cmd_profile *profile_data;
+       uint32_t i;
+
+       mutex_lock(&pdata->prof_lock);
+
+       profile = triv2_find_profile(drv, req_id);
+       if (!profile) {
+               dev_warn(dev, "Unable to find the profile data (req_id %d)",
+                        req_id);
+               goto out;
+       }
+
+       profile_data = profile->data;
+       BUG_ON(!profile_data);
+
+       dev_info(dev, "Total cycles: %lld", profile_data->total_cycles);
+       dev_info(dev, "Total ops: %u", profile_data->total_ops);
+
+       for (i = 0; i < profile_data->total_ops; i++) {
+               struct triv2_op_profile *op = &profile_data->profile_ops[i];
+
+               dev_info(dev, "[%u] opcode: %u name:%s", i, op->opcode,
+                        op->op_name);
+               dev_info(dev, "\tcycles: %lld", op->cycles);
+               dev_info(dev, "\tprog_seq: %lld", op->prog_seq);
+               dev_info(dev, "\texec_seq: %lld", op->exec_seq);
+               if (op->dram_read > 0)
+                       dev_info(dev, "\tdram_read: %lld", op->dram_read);
+               if (op->dram_write > 0)
+                       dev_info(dev, "\tdram_write: %lld", op->dram_write);
+               if (op->sram_read > 0)
+                       dev_info(dev, "\tsram_read: %lld", op->sram_read);
+               if (op->sram_write > 0)
+                       dev_info(dev, "\tsram_write: %lld", op->sram_write);
+       }
+out:
+       mutex_unlock(&pdata->prof_lock);
+}
+
+/**
+ * @brief destroy profile data
+ */
+static void triv2_destroy_profile(const struct trinity_driver *drv, void *data)
+{
+       struct triv2_pdata *pdata = TRIV2_DRV_GET_PDATA(drv);
+       struct triv2_profile *profile = data;
+       struct triv2_cmd_profile *profile_data;
+
+       if (!profile)
+               return;
+
+       mutex_lock(&pdata->prof_lock);
+
+       profile_data = profile->data;
+       BUG_ON(!profile_data);
+       vfree(profile_data);
+
+       hash_del(&profile->hlist);
+       vfree(profile);
+
+       mutex_unlock(&pdata->prof_lock);
+}
+
+static void triv2_handle_irq_cmds(struct trinity_driver *drv)
+{
+       struct triv2_cmd_info *info;
+       struct triv2_cmd *cmd;
+       unsigned long flags;
+       int slot;
+
+       info = TRIV2_DRV_GET_CMD_INFO(drv);
+       spin_lock_irqsave(&info->lock, flags);
+
+       /** Search the bitmap to find the completed CMDs */
+       slot = find_first_bit(info->bitmap, TRIV2_MAX_CMDSLOTS);
+       while (slot < TRIV2_MAX_CMDSLOTS) {
+               cmd = TRIV2_GET_CMD_FROM_SLOT(info, slot);
+               if (cmd->status == STATUS_CMD_DONE)
+                       triv2_handle_cmd_done(drv, cmd, false);
+               slot = find_next_bit(info->bitmap, TRIV2_MAX_CMDSLOTS,
+                                    slot + 1);
+       }
+
+       spin_unlock_irqrestore(&info->lock, flags);
+}
+
+/**
+ * @brief An IRQ handler to be called when a registered IRQ (IRQ_OUT) occurs.
+ */
+static irqreturn_t triv2_handle_irq(int irq_no, void *dev_id)
+{
+       struct miscdevice *_mdev;
+       struct trinity_driver *drv;
+       void __iomem *addr;
+       uint32_t interrupt;
+       uint32_t reg;
+
+       _mdev = (struct miscdevice *)dev_id;
+       drv = container_of(_mdev, struct trinity_driver, mdev);
+
+#ifdef CONFIG_TRINITY_DEBUG
+       trace_triv2_handle_irq(drv->dev_id, irq_no);
+#endif
+
+       /**
+        * Verify that the IRQ is actually from the NPU
+        * This is required as IRQ_SHARED is used when setting up IRQ
+        */
+       addr = trinity_get_iomem_addr(drv->mmreg_vaddr[2],
+                                     OFFSET_CBOX_EXT_IRQ_STA);
+       reg = ioread32(addr);
+
+       interrupt = reg & MASK_CP_SWI_STA;
+       if (interrupt == 0)
+               return IRQ_NONE;
+
+       /** Clear the interrupt first */
+       addr = trinity_get_iomem_addr(drv->mmreg_vaddr[2],
+                                     OFFSET_CBOX_CP_SWI_CLR);
+       iowrite32(1, addr);
+
+       triv2_handle_irq_cmds(drv);
+       return IRQ_HANDLED;
+}
+
+/**
+ * @brief evaluate the physical address of entries in the segment table
+ */
+static int32_t triv2_prepare_req(struct trinity_driver *drv,
+                                struct trinity_req *req)
+{
+       struct triv2_req *t = TRIV2_GET_REQ(req);
+       struct trinity_input *input = &(req->input);
+       struct trinity_hwmem_import *segt_import = &(input->import_info);
+       int32_t *segtable_dbuffd_base;
+       uint32_t *segtable_extra_base;
+       int ret, i;
+
+       if (input->config.num_segments == 0)
+               return -EINVAL;
+
+       if (input->config.num_segments > TRIV2_MAX_SEGMENTS)
+               return -ERANGE;
+
+       t->seg_import =
+               kcalloc(input->config.num_segments,
+                       sizeof(struct trinity_hwmem_import), GFP_KERNEL);
+       if (!t->seg_import)
+               return -ENOMEM;
+
+       /* dmabuf fd to be resolved */
+       segtable_dbuffd_base = segt_import->addr;
+       /* extra value (e.g., offset or size) */
+       segtable_extra_base = segt_import->addr + HALF_PAGE_SIZE;
+
+#ifdef ARM
+       /* sync segment table */
+       __cpuc_flush_dcache_area(input->import_info.addr,
+                                input->import_info.buf->size);
+#endif
+
+       for (i = 0; i < input->config.num_segments; ++i) {
+               struct trinity_hwmem_import *import;
+               int32_t fd = segtable_dbuffd_base[i];
+               dma_addr_t daddr;
+
+               if (fd < 0) {
+                       uint32_t idx = (uint32_t)((fd + 1) * -1);
+                       struct triv2_kernel_req *kreq;
+
+                       /* it's for kernel input/output */
+                       if (!req->is_kernel) {
+                               req->is_kernel = true;
+                               kreq = kzalloc(sizeof(*kreq), GFP_KERNEL);
+                               if (!kreq) {
+                                       ret = -ENOMEM;
+                                       goto err;
+                               }
+                               t->kernel = kreq;
+                       }
+
+                       kreq = t->kernel;
+                       if (idx < TRIV2_MAX_TENSORS) {
+                               kreq->in_seg_idx[idx] = i;
+                               kreq->in_seg_size[idx] = segtable_extra_base[i];
+                               t->total_segment_size += kreq->in_seg_size[idx];
+                       } else if (idx < TRIV2_MAX_TENSORS * 2) {
+                               idx -= TRIV2_MAX_TENSORS;
+                               kreq->out_seg_idx[idx] = i;
+                               kreq->out_seg_size[idx] =
+                                       segtable_extra_base[i];
+                               t->total_segment_size +=
+                                       kreq->out_seg_size[idx];
+                       } else {
+                               dev_err(drv_to_dev_ptr(drv),
+                                       "Invalid external segment (idx: %u)",
+                                       idx);
+                               ret = -EINVAL;
+                               goto err;
+                       }
+                       continue;
+               }
+
+               import = &(t->seg_import[i]);
+               ret = trinity_hwmem_import_dmabuf_begin(drv_to_dev_ptr(drv), fd,
+                                                       import);
+               if (ret) {
+                       dev_err(drv_to_dev_ptr(drv),
+                               "%d-th segment with fd (%d) seems invalid: %d",
+                               i, fd, ret);
+                       goto err;
+               }
+
+               t->total_segment_size += import->buf->size;
+
+               /** @todo Use a local ptr variable */
+               daddr = import->dma_addr;
+               daddr += segtable_extra_base[i];
+
+               iowrite32(TRIV2_IDU_ADDR(daddr),
+                         segt_import->addr + i * sizeof(u32));
+       }
+
+       /* set the dma address of DSPM (reserved index: TRIV2_MAX_SEGMENTS - 1) */
+       if (drv->dspm > 0) {
+               struct triv2_pdata *pdata = TRIV2_DRV_GET_PDATA(drv);
+
+               iowrite32(TRIV2_IDU_ADDR(pdata->idu_dsp.dspm),
+                         segt_import->addr +
+                                 (TRIV2_MAX_SEGMENTS - 1) * sizeof(u32));
+       }
+
+       return 0;
+
+err:
+       kfree(t->seg_import);
+       t->seg_import = NULL;
+       return ret;
+}
+
+long triv2_ioctl(struct file *f, unsigned int cmd, unsigned long arg)
+{
+       struct trinity_driver *drv = f->private_data;
+       struct device *dev = drv_to_dev_ptr(drv);
+       long ret;
+
+       if (trinity_pm_runtime_forbid(dev) != 0)
+               return -EBUSY;
+
+       ret = trinity_ioctl(f, cmd, arg);
+
+       trinity_pm_runtime_allow(dev);
+
+       return ret;
+}
+
+int triv2_open(struct inode *inode, struct file *f)
+{
+       struct miscdevice *miscdev;
+       struct trinity_driver *drv;
+       struct device *dev;
+       int ret;
+
+       miscdev = (struct miscdevice *)f->private_data;
+       drv = container_of(miscdev, struct trinity_driver, mdev);
+       dev = drv_to_dev_ptr(drv);
+
+       if (trinity_pm_runtime_forbid(dev) != 0)
+               return -EBUSY;
+
+       ret = trinity_open(inode, f);
+
+       trinity_pm_runtime_allow(dev);
+
+       return ret;
+}
+
+static const struct file_operations triv2_fops = {
+       .owner = THIS_MODULE,
+       .unlocked_ioctl = triv2_ioctl,
+       .open = triv2_open,
+       .release = trinity_release,
+       .llseek = noop_llseek,
+};
+
+static void triv2_setup_cp(struct trinity_driver *drv, phys_addr_t paddr)
+{
+       iowrite32(TRIV2_IDU_ADDR(paddr) >> 4,
+                 drv->mmreg_vaddr[0] + OFFSET_CP_IMIF_BASE);
+       iowrite32(TRIV2_IDU_ADDR(drv->mmreg_paddr[2]),
+                 drv->mmreg_vaddr[0] + OFFSET_NPU_CBOX_BASE);
+}
+
+static void triv2_setup_dsp(struct trinity_driver *drv, phys_addr_t paddr)
+{
+       iowrite32(TRIV2_IDU_ADDR(paddr) >> 4,
+                 drv->mmreg_vaddr[1] + OFFSET_DSP_IMIF_BASE);
+}
+
+static void triv2_init_common(void)
+{
+       static bool done = false;
+       int i;
+
+       if (done)
+               return;
+
+       /* init hlists */
+       for (i = 0; i < TRIV2_MODEL_HASH_SIZE; ++i)
+               INIT_HLIST_BL_HEAD(&triv2_model_node_hlist[i]);
+       done = true;
+}
+
+static int triv2_idu_alloc(struct device *dev, struct trinity_resv_mem *mem)
+{
+#ifdef CONFIG_TRINITY_FPGA
+       mem->vaddr = dma_alloc_wc(dev, mem->size, &mem->daddr, GFP_KERNEL);
+       if (!mem->vaddr)
+               return -ENOMEM;
+       return 0;
+#else
+       return trinity_alloc_from_resv_mem(mem->size, mem, false);
+#endif
+}
+
+static void triv2_idu_free(struct device *dev, struct trinity_resv_mem *mem)
+{
+       if (!mem->vaddr)
+               return;
+
+#ifdef CONFIG_TRINITY_FPGA
+       dma_free_wc(dev, mem->size, mem->vaddr, mem->daddr);
+#else
+       trinity_free_from_resv_mem(mem, false);
+#endif
+       mem->vaddr = NULL;
+}
+
+static int triv2_idu_version(struct trinity_driver *drv, uint32_t *major,
+                            uint32_t *minor, uint32_t *extra)
+{
+       struct triv2_pdata *pdata;
+       uint32_t val;
+
+       if (!drv || !major || !minor || !extra)
+               return -EINVAL;
+
+       pdata = TRIV2_DRV_GET_PDATA(drv);
+       val = pdata->idu_version;
+       if (val != 0) {
+               *major = (val & TRIV2_IDU_MASK_MAJOR) >> TRIV2_IDU_SHIFT_MAJOR;
+               *minor = (val & TRIV2_IDU_MASK_MINOR) >> TRIV2_IDU_SHIFT_MINOR;
+               *extra = val & TRIV2_IDU_MASK_EXTRA;
+       } else {
+               return -ENOENT;
+       }
+
+       return 0;
+}
+
+static void triv2_idu_check(struct trinity_driver *drv)
+{
+       struct triv2_pdata *pdata = TRIV2_DRV_GET_PDATA(drv);
+       struct device *dev = drv_to_dev_ptr(drv);
+       uint32_t major, minor, extra;
+
+       if (trinity_wait_ready(drv) != 0) {
+               dev_warn(dev, "Unable to load IDU properly");
+               return;
+       }
+
+       pdata->idu_version =
+               ioread32(drv->mmreg_vaddr[0] + OFFSET_NPU_IDU_VERSION);
+       if (triv2_idu_version(drv, &major, &minor, &extra) == 0)
+               dev_info(dev,
+                        "Instruction Decoder Unit (IDU) v%u.%u.%u detected",
+                        major, minor, extra);
+
+       /* paused until device is opened */
+       triv2_set_state(drv, TRINITY_STATE_PAUSE);
+}
+
+static int triv2_idu_load_file(struct trinity_driver *drv, const char *dirpath,
+                              const char *file_name,
+                              struct trinity_resv_mem *sector)
+{
+       struct device *dev = drv_to_dev_ptr(drv);
+       struct trinity_resv_mem mem;
+       char filepath[NAME_MAX];
+       struct kstat *stat;
+       struct file *filp;
+       mm_segment_t old_fs;
+       loff_t pos = 0;
+       size_t size;
+       int ret;
+
+       dev = drv_to_dev_ptr(drv);
+       stat = (struct kstat *)vmalloc(sizeof(*stat));
+       if (stat == NULL)
+               return -ENOMEM;
+
+       /* if dirpath is null, use the default path */
+       if (dirpath)
+               snprintf(filepath, NAME_MAX, "%s/%s", dirpath, file_name);
+       else
+               snprintf(filepath, NAME_MAX, TRIV2_IDU_DIRPATH_FMT "/%s",
+                        utsname()->release, file_name);
+
+       filp = filp_open(filepath, O_RDONLY, 0400);
+       if (IS_ERR(filp)) {
+               dev_err(dev, "Failed to open the idu binary: %s", filepath);
+               ret = PTR_ERR(filp);
+               goto out_free;
+       }
+
+       old_fs = get_fs();
+       set_fs(KERNEL_DS);
+
+       /* check file existence first */
+       ret = vfs_getattr(&filp->f_path, stat, STATX_SIZE,
+                         AT_STATX_SYNC_AS_STAT);
+
+       set_fs(old_fs);
+
+       if (ret != 0 || stat->size == 0) {
+               dev_warn(dev, "File not found: %s", filepath);
+               ret = -ENOENT;
+               goto out_close;
+       }
+
+       size = stat->size;
+       if (size > TRIV2_IDU_MAXSIZE) {
+               dev_err(dev, "Too large idu binary: %zu MiB", size >> 20);
+               ret = -EINVAL;
+               goto out_close;
+       }
+
+#ifdef CONFIG_TRINITY_FPGA
+       mem.size = TRIV2_IDU_MAXSIZE;
+#else
+       mem.size = PAGE_ALIGN(size);
+#endif
+       ret = triv2_idu_alloc(dev, &mem);
+       if (ret < 0) {
+               dev_err(dev, "Failed to allocate memory for idu");
+               goto out_close;
+       }
+
+       ret = read_idu_file(filp, pos, mem.vaddr, size);
+       if (ret != size) {
+               dev_err(dev, "Failed to read the file %s", filepath);
+               triv2_idu_free(dev, &mem);
+               ret = -ERANGE;
+               goto out_close;
+       }
+
+       /* free previous idu if exists */
+       if (sector->vaddr)
+               triv2_idu_free(dev, sector);
+
+       sector->daddr = mem.daddr;
+       sector->vaddr = mem.vaddr;
+       sector->size = mem.size;
+       sector->orig_size = size;
+
+       ret = 0;
+out_close:
+       filp_close(filp, NULL);
+out_free:
+       vfree(stat);
+
+       return ret;
+}
+
+static int triv2_idu_load_files(struct trinity_driver *drv, const char *dirpath)
+{
+       struct triv2_pdata *pdata = TRIV2_DRV_GET_PDATA(drv);
+       struct iommu_domain *domain;
+       phys_addr_t paddr;
+       int ret;
+
+       domain = iommu_get_domain_for_dev(drv_to_dev_ptr(drv));
+
+       ret = triv2_idu_load_file(drv, dirpath, "cp/data.bin",
+                                 &(pdata->idu_cp.data));
+       if (ret < 0)
+               return ret;
+
+       ret = triv2_idu_load_file(drv, dirpath, "cp/code.bin",
+                                 &(pdata->idu_cp.code));
+       if (ret < 0)
+               return ret;
+
+       paddr = trinity_get_paddr(domain, pdata->idu_cp.code.daddr);
+       pdata->idu_cp.addrs[TRIV2_IDU_CODEIDX] = paddr;
+
+       if (!pdata->idu_dsp.addrs)
+               return 0;
+
+       ret = triv2_idu_load_file(drv, dirpath, "dsp/data.bin",
+                                 &(pdata->idu_dsp.data));
+       if (ret < 0)
+               return ret;
+
+       ret = triv2_idu_load_file(drv, dirpath, "dsp/code.bin",
+                                 &(pdata->idu_dsp.code));
+       if (ret < 0)
+               return ret;
+
+       paddr = trinity_get_paddr(domain, pdata->idu_dsp.code.daddr);
+       pdata->idu_dsp.addrs[TRIV2_IDU_CODEIDX] = paddr;
+
+       return 0;
+}
+
+static void triv2_idu_fill_zero(struct trinity_driver *drv, phys_addr_t paddr,
+                               size_t size)
+{
+       void *__iomem vaddr;
+
+       vaddr = ioremap(paddr, PAGE_ALIGN(size));
+       if (vaddr == NULL) {
+               dev_err(drv_to_dev_ptr(drv), "Failed to do ioremap() for 0x%lx",
+                       (unsigned long)paddr);
+               return;
+       }
+       memset_io(vaddr, 0, size);
+
+       iounmap(vaddr);
+}
+
+static void triv2_idu_fill_data(struct trinity_driver *drv, phys_addr_t paddr,
+                               struct trinity_resv_mem *data)
+{
+       void *__iomem vaddr;
+
+       vaddr = ioremap(paddr, data->size);
+       if (vaddr == NULL) {
+               dev_err(drv_to_dev_ptr(drv), "Failed to do ioremap() for 0x%lx",
+                       (unsigned long)paddr);
+               return;
+       }
+       memcpy_toio(vaddr, data->vaddr, data->orig_size);
+
+       iounmap(vaddr);
+}
+
+static void triv2_idu_load_code(struct trinity_driver *drv)
+{
+       struct triv2_pdata *pdata = TRIV2_DRV_GET_PDATA(drv);
+
+       /* CP is mandatory */
+       triv2_setup_cp(drv, pdata->idu_cp.addrs[TRIV2_IDU_CODEIDX]);
+
+       /* DSP is optional */
+       if (pdata->idu_dsp.addrs)
+               triv2_setup_dsp(drv, pdata->idu_dsp.addrs[TRIV2_IDU_CODEIDX]);
+}
+
+static int triv2_idu_load(struct trinity_driver *drv, const char *dirpath,
+                         bool load_files)
+{
+       struct triv2_pdata *pdata;
+       struct triv2_idu *idu_cp;
+       struct triv2_idu *idu_dsp;
+       struct device *dev;
+
+       if (!drv)
+               return -EINVAL;
+
+       dev = drv_to_dev_ptr(drv);
+       if (load_files) {
+               int ret = triv2_idu_load_files(drv, dirpath);
+               if (ret != 0) {
+                       dev_warn(dev, "Unable to load IDU files: %d", ret);
+                       goto load_code;
+               }
+       }
+
+       pdata = TRIV2_DRV_GET_PDATA(drv);
+       idu_cp = &pdata->idu_cp;
+       idu_dsp = &pdata->idu_dsp;
+
+       triv2_idu_fill_zero(drv, idu_cp->addrs[TRIV2_IDU_ZEROIDX],
+                           TRIV2_IDU_CP_DSPM_SIZE);
+       triv2_idu_fill_data(drv, idu_cp->addrs[TRIV2_IDU_DATAIDX],
+                           &idu_cp->data);
+
+       if (!pdata->idu_dsp.addrs)
+               goto load_code;
+
+       triv2_idu_fill_zero(drv, idu_dsp->addrs[TRIV2_IDU_ZEROIDX],
+                           drv->dspm + TRIV2_DSP_DSPM_OFFSET);
+       triv2_idu_fill_data(drv, idu_dsp->addrs[TRIV2_IDU_DATAIDX],
+                           &idu_dsp->data);
+
+load_code:
+       triv2_idu_load_code(drv);
+
+       return 0;
+}
+
+static void triv2_idu_unload(struct trinity_driver *drv)
+{
+       struct device *dev = drv_to_dev_ptr(drv);
+       struct triv2_pdata *pdata = TRIV2_DRV_GET_PDATA(drv);
+
+       triv2_idu_free(dev, &pdata->idu_cp.data);
+       triv2_idu_free(dev, &pdata->idu_dsp.data);
+
+       triv2_idu_free(dev, &pdata->idu_cp.code);
+       triv2_idu_free(dev, &pdata->idu_dsp.code);
+}
+
+static void triv2_setup_buffers(struct trinity_driver *drv)
+{
+       struct device *dev = drv_to_dev_ptr(drv);
+       struct iommu_domain *domain;
+       struct trinity_resv_mem *cmd_buf;
+       struct trinity_resv_mem *back_buf;
+       struct trinity_resv_mem *prof_buf;
+       phys_addr_t paddr;
+
+       domain = iommu_get_domain_for_dev(dev);
+       cmd_buf = TRIV2_DRV_GET_CMD_BUF(drv);
+       back_buf = TRIV2_DRV_GET_BACK_BUF(drv);
+       prof_buf = TRIV2_DRV_GET_PROF_BUF(drv);
+
+       /* command */
+       paddr = trinity_get_paddr(domain, cmd_buf->daddr);
+       iowrite32(TRIV2_IDU_ADDR(paddr),
+                 trinity_get_iomem_addr(drv->mmreg_vaddr[0],
+                                        OFFSET_NPU_CMD_BASE));
+       /* backup */
+       iowrite32(TRIV2_IDU_ADDR(back_buf->daddr),
+                 trinity_get_iomem_addr(drv->mmreg_vaddr[0],
+                                        OFFSET_NPU_BACK_ADDR));
+       iowrite32(back_buf->size, trinity_get_iomem_addr(drv->mmreg_vaddr[0],
+                                                        OFFSET_NPU_BACK_SIZE));
+
+       /* profile */
+       if (prof_buf->size > 0) {
+               paddr = trinity_get_paddr(domain, prof_buf->daddr);
+               iowrite32(TRIV2_IDU_ADDR(paddr),
+                         trinity_get_iomem_addr(drv->mmreg_vaddr[0],
+                                                OFFSET_NPU_PROF_ADDR));
+               iowrite32(prof_buf->size,
+                         trinity_get_iomem_addr(drv->mmreg_vaddr[0],
+                                                OFFSET_NPU_PROF_SIZE));
+       } else {
+               iowrite32(0, trinity_get_iomem_addr(drv->mmreg_vaddr[0],
+                                                   OFFSET_NPU_PROF_ADDR));
+               iowrite32(0, trinity_get_iomem_addr(drv->mmreg_vaddr[0],
+                                                   OFFSET_NPU_PROF_SIZE));
+       }
+}
+
+static int32_t triv2_init_pdata(struct trinity_driver *drv)
+{
+       struct device *dev = drv_to_dev_ptr(drv);
+       struct triv2_pdata *pdata;
+       struct triv2_cmd_info *cmd_info;
+       struct trinity_resv_mem *cmd_buf;
+       struct trinity_resv_mem *back_buf;
+       int status;
+
+       trinity_pm_runtime_attach(drv);
+
+       /* alloc triv2 pdata */
+       drv->pdata = (struct triv2_pdata *)kzalloc(sizeof(struct triv2_pdata),
+                                                  GFP_KERNEL);
+       if (!drv->pdata)
+               return -ENOMEM;
+
+       pdata = drv->pdata;
+       pdata->drv = drv;
+
+       cmd_info = TRIV2_DRV_GET_CMD_INFO(drv);
+       cmd_buf = TRIV2_DRV_GET_CMD_BUF(drv);
+       back_buf = TRIV2_DRV_GET_BACK_BUF(drv);
+
+       mutex_init(&pdata->prof_lock);
+
+#ifdef CONFIG_TRINITY_FPGA
+       /* initialize IOMMU */
+       status = trinity_hwmem_iommu_init(dev, drv->mmreg_vaddr[2]);
+       if (status < 0) {
+               dev_err(dev, "Failed to enable the IOMMU device");
+               goto free_pdata;
+       }
+#endif
+
+       spin_lock_init(&cmd_info->lock);
+       /* init cmd bitmap */
+       bitmap_zero(cmd_info->bitmap, TRIV2_MAX_CMDSLOTS);
+
+       /* alloc command buffer */
+       status = trinity_alloc_from_resv_mem(PAGE_SIZE, cmd_buf, false);
+       if (status < 0) {
+               dev_err(dev, "Couldn't allocate memory for cmd slots");
+               goto free_pdata;
+       }
+       /* ensure cmd buffer is null-initialized, which is visible in NPU as well */
+       memset_io(cmd_buf->vaddr, '\x00', PAGE_SIZE);
+
+       /* alloc backup buffer for preemption (GBUF + DSPM) */
+       status = trinity_alloc_from_resv_mem(TRIV2_DLA_GBUFFER_SIZE + drv->dspm,
+                                            back_buf, false);
+       if (status < 0) {
+               dev_err(dev,
+                       "Couldn't allocate memory for context backup buffer");
+               goto free_cmd_info;
+       }
+
+#ifdef CONFIG_TRINITY_FPGA
+       if (trinity_hwmem_iommu_map(dev, back_buf->daddr, back_buf->size) < 0)
+               dev_warn(dev, "Unable to map iommu mapping for 0x%llx",
+                        back_buf->daddr);
+#endif
+
+       triv2_setup_buffers(drv);
+       list_add_tail(&pdata->list, &triv2_driver_list);
+
+       return 0;
+
+free_cmd_info:
+       dma_free_wc(drv_to_dev_ptr(drv), PAGE_SIZE, cmd_buf->vaddr,
+                   cmd_buf->daddr);
+free_pdata:
+       kfree(drv->pdata);
+       drv->pdata = NULL;
+
+       return status;
+}
+
+static int32_t parse_idu_property(struct device *dev,
+                                 const struct device_node *np,
+                                 const char *prop_name, struct triv2_idu *idu)
+{
+       struct property *prop;
+       u64 values[TRIV2_IDU_MAX_SECTORS];
+       size_t size;
+       int i, err;
+
+       memset(idu, '\x00', sizeof(*idu));
+
+       prop = of_find_property(np, prop_name, NULL);
+       if (!prop)
+               return -EINVAL;
+
+       size = prop->length / sizeof(u64);
+       if (size != TRIV2_IDU_MAX_SECTORS) {
+               dev_err(dev, "idu requires %d values", TRIV2_IDU_MAX_SECTORS);
+               return -EINVAL;
+       }
+
+       idu->addr_num = size;
+       idu->addrs = devm_kcalloc(dev, size, sizeof(*idu->addrs), GFP_KERNEL);
+       if (!idu->addrs) {
+               dev_err(dev, "failed to allocate memory for idu values");
+               return -ENOMEM;
+       }
+
+       err = of_property_read_u64_array(np, prop_name, values, size);
+       if (err < 0) {
+               dev_err(dev, "failed to read property u64 array: %d", err);
+               return err;
+       }
+
+       for (i = 0; i < TRIV2_IDU_MAX_SECTORS; i++)
+               idu->addrs[i] = (unsigned long)values[i];
+
+       return 0;
+}
+
+/**
+ * @brief Setup IDU (e.g., CP, DSP) sections for this device
+ */
+static int triv2_setup_idu(struct trinity_driver *drv)
+{
+       struct triv2_pdata *pdata = TRIV2_DRV_GET_PDATA(drv);
+       struct device *dev = drv_to_dev_ptr(drv);
+       struct device_node *np = dev->of_node;
+       int err;
+
+       /* get Instruction Decode Unit (IDU) property */
+       err = parse_idu_property(dev, np, "samsung,idu_cp", &pdata->idu_cp);
+       if (err < 0) {
+               dev_err(dev, "Failed to parse idu property: samsung,idu_cp");
+               return err;
+       }
+
+       err = parse_idu_property(dev, np, "samsung,idu_dsp", &pdata->idu_dsp);
+       if (err < 0) {
+               dev_info(dev, "DSP is not supported");
+               pdata->idu_dsp.addrs = NULL;
+       }
+
+       /* try to find the IDU files (default) */
+       if (triv2_idu_load(drv, NULL, true) < 0) {
+               dev_warn(dev, "Failed to load IDU in the default path\n");
+               dev_warn(dev, "Should load IDU using sysfs later\n");
+       } else {
+               triv2_idu_check(drv);
+       }
+
+       if (pdata->idu_dsp.addrs && drv->dspm > 0) {
+               struct iommu_domain *domain;
+               phys_addr_t paddr;
+               dma_addr_t daddr;
+
+               /* iommu mapping for dspm segment */
+               domain = iommu_get_domain_for_dev(dev);
+               if (!domain)
+                       return 0;
+
+               paddr = pdata->idu_dsp.addrs[0] + TRIV2_DSP_DSPM_OFFSET;
+               daddr = dma_map_resource(dev, paddr, drv->dspm,
+                                        DMA_BIDIRECTIONAL, 0);
+               pdata->idu_dsp.dspm = daddr;
+       }
+
+       return 0;
+}
+
+/**
+ * @brief Initialize necessary variables in TRIV2
+ */
+static int32_t triv2_init(struct trinity_driver *drv)
+{
+       triv2_init_common();
+       return triv2_init_pdata(drv);
+}
+
+/**
+ * @brief Clean up initialized variables in TRIV2
+ */
+static void triv2_cleanup(struct trinity_driver *drv)
+{
+       struct trinity_resv_mem *cmd_buf;
+       struct trinity_resv_mem *back_buf;
+
+       if (!drv->pdata)
+               return;
+
+       triv2_idu_unload(drv);
+
+       cmd_buf = TRIV2_DRV_GET_CMD_BUF(drv);
+       back_buf = TRIV2_DRV_GET_BACK_BUF(drv);
+
+       if (cmd_buf->vaddr)
+               trinity_free_from_resv_mem(cmd_buf, false);
+
+       if (back_buf->vaddr) {
+#ifdef CONFIG_TRINITY_FPGA
+               struct device *dev = drv_to_dev_ptr(drv);
+
+               if (trinity_hwmem_iommu_unmap(dev, back_buf->daddr,
+                                             back_buf->size) < 0)
+                       dev_warn(dev,
+                                "Unable to unmap iommu mapping for 0x%llx",
+                                back_buf->daddr);
+#endif
+               trinity_free_from_resv_mem(back_buf, false);
+       }
+
+       list_del(&(TRIV2_DRV_GET_PDATA(drv)->list));
+       kfree(drv->pdata);
+       drv->pdata = NULL;
+}
+
+static struct trinity_desc triv2_desc = {
+       .type = "triv2",
+       .ver = GENVER(TRINITY_DEV_VISION2, VER_MAJOR, VER_MINOR, VER_EXTRA),
+       .fops = &triv2_fops,
+       /* device management */
+       .reset = triv2_reset,
+       .idu_load = triv2_idu_load,
+       .idu_version = triv2_idu_version,
+       .get_state = triv2_get_state,
+       .set_state = triv2_set_state,
+       /* req management */
+       .alloc_req = triv2_alloc_req,
+       .dealloc_req = triv2_dealloc_req,
+       .prepare_req = triv2_prepare_req,
+       .invoke_req = triv2_invoke_req,
+       /* profile */
+       .init_profile = triv2_init_profile,
+       .check_profile = triv2_check_profile,
+       .get_profile_meta = triv2_get_profile_meta,
+       .get_profile_buff = triv2_get_profile_buff,
+       .show_profile = triv2_show_profile,
+       .destroy_profile = triv2_destroy_profile,
+       /* etc. */
+       .handle_timeout = triv2_handle_timeout,
+       .stop_reqs = triv2_stop_reqs,
+       .drain_reqs = triv2_drain_reqs,
+       .handle_irq = triv2_handle_irq,
+};
+
+#ifdef CONFIG_PM_SLEEP
+static int triv2_suspend(struct device *dev)
+{
+       return 0;
+}
+
+static int triv2_resume(struct device *dev)
+{
+       return 0;
+}
+#endif
+
+#ifdef CONFIG_PM
+static int triv2_runtime_suspended;
+static int triv2_runtime_resumed;
+
+static int triv2_runtime_suspend(struct device *dev)
+{
+       struct trinity_driver *drv;
+
+       drv = (struct trinity_driver *)dev_get_drvdata(dev);
+       if (!drv) {
+               dev_warn(dev, "Cannot find driver data");
+               return 0;
+       }
+
+       if (drv->verbose)
+               dev_info(dev, "%s called", __func__);
+
+       mutex_lock(&drv->lock);
+
+       /* 1) Ensure that the scheduler was suspended */
+       trinity_sched_suspend();
+
+       /* 2) Set pause state if it's in ready state */
+       if (triv2_get_state(drv) == TRINITY_STATE_READY)
+               triv2_set_state(drv, TRINITY_STATE_PAUSE);
+
+       mutex_unlock(&drv->lock);
+
+       triv2_runtime_suspended++;
+
+       return 0;
+}
+
+static int triv2_runtime_resume(struct device *dev)
+{
+       struct trinity_driver *drv;
+
+       drv = (struct trinity_driver *)dev_get_drvdata(dev);
+       if (!drv) {
+               dev_warn(dev, "Cannot find driver data");
+               return 0;
+       }
+
+       if (drv->verbose)
+               dev_info(dev, "%s called", __func__);
+
+       /* 0) Reset NPU devices (only once) */
+       trinity_reset_device(dev, triv2_runtime_resumed == 0);
+
+       mutex_lock(&drv->lock);
+
+       /* 1) Restore IDU setup */
+       triv2_setup_buffers(drv);
+       triv2_idu_load(drv, NULL, false);
+
+       /* 2) Set ready state if it was in ready state before */
+       if (drv->opened > 0)
+               triv2_set_state(drv, TRINITY_STATE_READY);
+
+       /* 3) Resume the req scheduler */
+       trinity_sched_resume();
+
+       mutex_unlock(&drv->lock);
+
+       if (++triv2_runtime_resumed == triv2_runtime_suspended)
+               triv2_runtime_resumed = triv2_runtime_suspended = 0;
+
+       return 0;
+}
+#endif
+
+static const struct dev_pm_ops triv2_dev_pm_ops = {
+       // clang-format off
+       SET_SYSTEM_SLEEP_PM_OPS(triv2_suspend, triv2_resume)
+       SET_RUNTIME_PM_OPS(triv2_runtime_suspend, triv2_runtime_resume, NULL)
+       // clang-format on
+};
+
+static const struct of_device_id trinity_match[] = {
+       {
+               .compatible = "samsung,trinity",
+       },
+       { /** sentinel */ },
+};
+
+/**
+ * @brief Probes for Trinity vision devices, inits them if found
+ */
+static int trinity_triv2_probe(struct platform_device *pdev)
+{
+       struct trinity_driver *drv;
+       int err;
+
+       err = trinity_probe(pdev, &triv2_desc);
+       if (err < 0)
+               return err;
+
+       drv = (struct trinity_driver *)platform_get_drvdata(pdev);
+       if (drv->dspm > 0) {
+               /* DSPM's some region is reserved for DSP kernel operations */
+               if (drv->dspm < TRIV2_DSP_DSPM_OFFSET) {
+                       dev_err(drv_to_dev_ptr(drv),
+                               "Too small DSPM size.. wrong device tree?");
+                       err = -EINVAL;
+                       goto out_remove;
+               }
+               drv->dspm -= TRIV2_DSP_DSPM_OFFSET;
+       }
+
+       err = triv2_init(drv);
+       if (err < 0)
+               goto out_remove;
+
+       err = triv2_setup_idu(drv);
+       if (err < 0) {
+               triv2_cleanup(drv);
+               goto out_remove;
+       }
+
+       err = trinity_create_node(drv);
+       if (err < 0) {
+               triv2_cleanup(drv);
+               goto out_remove;
+       }
+
+       dev_info(drv_to_dev_ptr(drv), "Trinity Vision2 (TRIV2) probed");
+
+       return 0;
+
+out_remove:
+       trinity_remove(pdev, &triv2_desc);
+       return err;
+}
+
+/**
+ * @brief Removes a particular instance of a Trinity vision device
+ */
+static int trinity_triv2_remove(struct platform_device *pdev)
+{
+       struct trinity_driver *drv;
+
+       drv = (struct trinity_driver *)platform_get_drvdata(pdev);
+
+       trinity_destroy_node(drv);
+       triv2_cleanup(drv);
+       return trinity_remove(pdev, &triv2_desc);
+}
+
+static struct platform_driver trinity_triv2 = {
+       .probe = trinity_triv2_probe,
+       .remove = trinity_triv2_remove,
+       .driver =
+               {
+                       .name = "triv2",
+                       .owner = THIS_MODULE,
+                       .of_match_table = of_match_ptr(trinity_match),
+                       .pm = &triv2_dev_pm_ops,
+               },
+};
+
+/* Register as a platform driver */
+module_platform_driver(trinity_triv2);
+
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Dongju Chae <dongju.chae@samsung.com>");
+MODULE_AUTHOR("Wook Song <wook16.song@samsung.com>");
+MODULE_DESCRIPTION("Neural Processing Unit device driver for vision 2");
diff --git a/drivers/misc/trinity/trinity_vision2_profile.h b/drivers/misc/trinity/trinity_vision2_profile.h
new file mode 100644 (file)
index 0000000..d3d15ae
--- /dev/null
@@ -0,0 +1,288 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/**
+ * trinity/trinity_vision2_profile.h: Profile header for TRIV2 devices
+ *
+ * Copyright (C) 2021 Samsung Electronics
+ * Copyright (C) 2021 Dongju Chae <dongju.chae@samsung.com>
+ */
+
+#ifndef __TRINITY_VISION2_PROFILE_H__
+#define __TRINITY_VISION2_PROFILE_H__
+
+#include <linux/types.h>
+
+#define TRIV2_MAX_OPNAME       (128)
+#define TRIV2_MAX_PROFILE_SIZE (256)
+
+/** profling for each command */
+struct triv2_op_profile {
+       union {
+               struct {
+                       char op_name[TRIV2_MAX_OPNAME];
+
+                       int64_t cycles;
+
+                       int64_t dram_read;
+                       int64_t dram_write;
+
+                       int64_t sram_read;
+                       int64_t sram_write;
+
+                       int64_t start_cycles;
+                       int64_t end_cycles;
+
+                       uint32_t opcode;
+                       int64_t prog_seq;
+                       int64_t exec_seq;
+               } __attribute__((packed));
+               uint8_t reserved[TRIV2_MAX_PROFILE_SIZE];
+       };
+};
+
+struct triv2_cmd_profile {
+       int64_t total_cycles;
+       uint32_t total_ops;
+       /* zero-length array */
+       struct triv2_op_profile profile_ops[];
+} __attribute__((packed));
+
+struct triv2_profile {
+       int req_id;
+       struct hlist_node hlist;
+       struct triv2_cmd_profile *data;
+};
+
+enum { NOP = 0x00,
+       HALT = 0x01,
+       ADMA_IN = 0x02,
+       ADMA_OUT = 0x03,
+       RESCALE_I8 = 0x04,
+       RESCALE_I16 = 0x05,
+       CONVERT_I16_I8 = 0x06,
+       CONVERT_I8_I16 = 0x07,
+       RELUN_I8 = 0x08,
+       RELUN_I16 = 0x09,
+       PRELU_I8 = 0x0A,
+       PRELU_I16 = 0x0B,
+       ADD_I8 = 0x0C,
+       ADD_I16 = 0x0D,
+       REDUCE_MEAN_I8 = 0x0E,
+       REDUCE_MEAN_I16 = 0x0F,
+       MAX_POOL_I8 = 0x10,
+       MAX_POOL_I16 = 0x11,
+       AVG_POOL_I8 = 0x12,
+       AVG_POOL_I16 = 0x13,
+       CONV_I8 = 0x14,
+       CONV_I16 = 0x15,
+       CONVE_I8 = 0x16,
+       CONVE_I16 = 0x17,
+       TCONV_I8 = 0x18,
+       TCONV_I16 = 0x19,
+       MUL_I8 = 0x1A,
+       MUL_I16 = 0x1B,
+       DCONV_I8 = 0x1C,
+       DCONV_I16 = 0x1D,
+       DCONVE_I8 = 0x1E,
+       DCONVE_I16 = 0x1F,
+       CONV_I8_P = 0x20,
+       CONV_I16_P = 0x21,
+       PDMA_IN = 0x40,
+       PDMA_OUT = 0x41,
+       ARGMAX_I8 = 0x42,
+       ARGMAX_I16 = 0x43,
+       RESHAPE_I8 = 0x44,
+       RESHAPE_I16 = 0x45,
+       TRANSPOSE_I8 = 0x46,
+       TRANSPOSE_I16 = 0x47,
+       CONCAT_I8 = 0x48,
+       CONCAT_I16 = 0x49,
+       PAD_I8 = 0x4A,
+       PAD_I16 = 0x4B,
+       STRIDED_SLICE_I8 = 0x4C,
+       STRIDED_SLICE_I16 = 0x4D,
+       CONVERT_FORMAT_I8 = 0x4E,
+       CONVERT_FORMAT_I16 = 0x4F,
+       SIGMOID_I8 = 0x50,
+       SIGMOID_I16 = 0x51,
+       TANH_I8 = 0x52,
+       TANH_I16 = 0x53,
+       ELU_I8 = 0x54,
+       ELU_I16 = 0x55,
+       FLOOR_I8 = 0x56,
+       FLOOR_I16 = 0x57,
+       RSQRT_I8 = 0x58,
+       RSQRT_I16 = 0x59,
+       SQRT_I8 = 0x5A,
+       SQRT_I16 = 0x5B,
+       SOFTMAX_I8 = 0x5C,
+       SOFTMAX_I16 = 0x5D,
+       DIVIDE_I8 = 0x60,
+       DIVIDE_I16 = 0x61,
+       FLOORDIV_I8 = 0x62,
+       FLOORDIV_I16 = 0x63,
+       LOGICAL_OR_I8 = 0x64,
+       LOGICAL_OR_I16 = 0x65,
+       GREATER_I8 = 0x66,
+       GREATER_I16 = 0x67,
+       GREATER_EQUAL_I8 = 0x68,
+       GREATER_EQUAL_I16 = 0x69,
+       POW_I8 = 0x6A,
+       POW_I16 = 0x6B,
+       EXP_I8 = 0x6C,
+       EXP_I16 = 0x6D,
+       NOT_EQUAL_I8 = 0x6E,
+       NOT_EQUAL_I16 = 0x6F,
+       BATCH_TO_SPACE_I8 = 0x70,
+       BATCH_TO_SPACE_I16 = 0x71,
+       SPACE_TO_BATCH_I8 = 0x72,
+       SPACE_TO_BATCH_I16 = 0x73,
+       DEPTH_TO_SPACE_I8 = 0x74,
+       DEPTH_TO_SPACE_I16 = 0x75,
+       SPACE_TO_DEPTH_I8 = 0x76,
+       SPACE_TO_DEPTH_I16 = 0x77,
+       YUV_TO_RGB_I8 = 0x7A,
+       YUV_TO_RGB_I16 = 0x7B,
+       RESIZE_BILINEAR_I8 = 0x7C,
+       RESIZE_BILINEAR_I16 = 0x7D,
+       RESIZE_NEAREST_NEIGHBOR_I8 = 0x7E,
+       RESIZE_NEAREST_NEIGHBOR_I16 = 0x7F,
+       LOCAL_RESPONSE_NORM_I8 = 0x80,
+       LOCAL_RESPONSE_NORM_I16 = 0x81,
+       INSTANCE_NORM_I8 = 0x82,
+       INSTANCE_NORM_I16 = 0x83,
+       REDUCED_SUM_SSUM_I8 = 0x84,
+       REDUCED_SUM_SSUM_I16 = 0x85,
+       REDUCED_SUM_SSUM_ACC_I8 = 0x86,
+       REDUCED_SUM_SSUM_ACC_I16 = 0x87,
+       REDUCED_SUM_2SUM_I8 = 0x88,
+       REDUCED_SUM_2SUM_I16 = 0x89,
+       REDUCED_MEAN_DEV_WSUM_I8 = 0x8A,
+       REDUCED_MEAN_DEV_WSUM_I16 = 0x8B,
+       REDUCED_MEAN_DEV_I8 = 0x8C,
+       REDUCED_MEAN_DEV_I16 = 0x8D,
+       RESCALE_CW_I8 = 0x8E,
+       RESCALE_CW_I16 = 0x8F,
+       REDUCED_MEAN_SCALE_WSUM_I8 = 0x90,
+       REDUCED_MEAN_SCALE_WSUM_I16 = 0x91,
+       RESCALE_CHANNELWISE_I8 = 0x92,
+       RESCALE_CHANNELWISE_I16 = 0x93,
+};
+
+/** macro to generate opnames */
+#define TRIV2_GENERATE_OPNAME(OPNAME) [OPNAME] = #OPNAME,
+#define TRIV2_FOREACH_OPNAME(OPNAME)                                           \
+       OPNAME(NOP)                                                            \
+       OPNAME(HALT)                                                           \
+       OPNAME(ADMA_IN)                                                        \
+       OPNAME(ADMA_OUT)                                                       \
+       OPNAME(RESCALE_I8)                                                     \
+       OPNAME(RESCALE_I16)                                                    \
+       OPNAME(CONVERT_I16_I8)                                                 \
+       OPNAME(CONVERT_I8_I16)                                                 \
+       OPNAME(RELUN_I8)                                                       \
+       OPNAME(RELUN_I16)                                                      \
+       OPNAME(PRELU_I8)                                                       \
+       OPNAME(PRELU_I16)                                                      \
+       OPNAME(ADD_I8)                                                         \
+       OPNAME(ADD_I16)                                                        \
+       OPNAME(REDUCE_MEAN_I8)                                                 \
+       OPNAME(REDUCE_MEAN_I16)                                                \
+       OPNAME(MAX_POOL_I8)                                                    \
+       OPNAME(MAX_POOL_I16)                                                   \
+       OPNAME(AVG_POOL_I8)                                                    \
+       OPNAME(AVG_POOL_I16)                                                   \
+       OPNAME(CONV_I8)                                                        \
+       OPNAME(CONV_I16)                                                       \
+       OPNAME(CONVE_I8)                                                       \
+       OPNAME(CONVE_I16)                                                      \
+       OPNAME(TCONV_I8)                                                       \
+       OPNAME(TCONV_I16)                                                      \
+       OPNAME(MUL_I8)                                                         \
+       OPNAME(MUL_I16)                                                        \
+       OPNAME(DCONV_I8)                                                       \
+       OPNAME(DCONV_I16)                                                      \
+       OPNAME(DCONVE_I8)                                                      \
+       OPNAME(DCONVE_I16)                                                     \
+       OPNAME(CONV_I8_P)                                                      \
+       OPNAME(CONV_I16_P)                                                     \
+       OPNAME(PDMA_IN)                                                        \
+       OPNAME(PDMA_OUT)                                                       \
+       OPNAME(ARGMAX_I8)                                                      \
+       OPNAME(ARGMAX_I16)                                                     \
+       OPNAME(RESHAPE_I8)                                                     \
+       OPNAME(RESHAPE_I16)                                                    \
+       OPNAME(TRANSPOSE_I8)                                                   \
+       OPNAME(TRANSPOSE_I16)                                                  \
+       OPNAME(CONCAT_I8)                                                      \
+       OPNAME(CONCAT_I16)                                                     \
+       OPNAME(PAD_I8)                                                         \
+       OPNAME(PAD_I16)                                                        \
+       OPNAME(STRIDED_SLICE_I8)                                               \
+       OPNAME(STRIDED_SLICE_I16)                                              \
+       OPNAME(CONVERT_FORMAT_I8)                                              \
+       OPNAME(CONVERT_FORMAT_I16)                                             \
+       OPNAME(SIGMOID_I8)                                                     \
+       OPNAME(SIGMOID_I16)                                                    \
+       OPNAME(TANH_I8)                                                        \
+       OPNAME(TANH_I16)                                                       \
+       OPNAME(ELU_I8)                                                         \
+       OPNAME(ELU_I16)                                                        \
+       OPNAME(FLOOR_I8)                                                       \
+       OPNAME(FLOOR_I16)                                                      \
+       OPNAME(RSQRT_I8)                                                       \
+       OPNAME(RSQRT_I16)                                                      \
+       OPNAME(SQRT_I8)                                                        \
+       OPNAME(SQRT_I16)                                                       \
+       OPNAME(SOFTMAX_I8)                                                     \
+       OPNAME(SOFTMAX_I16)                                                    \
+       OPNAME(DIVIDE_I8)                                                      \
+       OPNAME(DIVIDE_I16)                                                     \
+       OPNAME(FLOORDIV_I8)                                                    \
+       OPNAME(FLOORDIV_I16)                                                   \
+       OPNAME(LOGICAL_OR_I8)                                                  \
+       OPNAME(LOGICAL_OR_I16)                                                 \
+       OPNAME(GREATER_I8)                                                     \
+       OPNAME(GREATER_I16)                                                    \
+       OPNAME(GREATER_EQUAL_I8)                                               \
+       OPNAME(GREATER_EQUAL_I16)                                              \
+       OPNAME(POW_I8)                                                         \
+       OPNAME(POW_I16)                                                        \
+       OPNAME(EXP_I8)                                                         \
+       OPNAME(EXP_I16)                                                        \
+       OPNAME(NOT_EQUAL_I8)                                                   \
+       OPNAME(NOT_EQUAL_I16)                                                  \
+       OPNAME(BATCH_TO_SPACE_I8)                                              \
+       OPNAME(BATCH_TO_SPACE_I16)                                             \
+       OPNAME(SPACE_TO_BATCH_I8)                                              \
+       OPNAME(SPACE_TO_BATCH_I16)                                             \
+       OPNAME(DEPTH_TO_SPACE_I8)                                              \
+       OPNAME(DEPTH_TO_SPACE_I16)                                             \
+       OPNAME(SPACE_TO_DEPTH_I8)                                              \
+       OPNAME(SPACE_TO_DEPTH_I16)                                             \
+       OPNAME(YUV_TO_RGB_I8)                                                  \
+       OPNAME(YUV_TO_RGB_I16)                                                 \
+       OPNAME(RESIZE_BILINEAR_I8)                                             \
+       OPNAME(RESIZE_BILINEAR_I16)                                            \
+       OPNAME(RESIZE_NEAREST_NEIGHBOR_I8)                                     \
+       OPNAME(RESIZE_NEAREST_NEIGHBOR_I16)                                    \
+       OPNAME(LOCAL_RESPONSE_NORM_I8)                                         \
+       OPNAME(LOCAL_RESPONSE_NORM_I16)                                        \
+       OPNAME(INSTANCE_NORM_I8)                                               \
+       OPNAME(INSTANCE_NORM_I16)                                              \
+       OPNAME(REDUCED_SUM_SSUM_I8)                                            \
+       OPNAME(REDUCED_SUM_SSUM_I16)                                           \
+       OPNAME(REDUCED_SUM_SSUM_ACC_I8)                                        \
+       OPNAME(REDUCED_SUM_SSUM_ACC_I16)                                       \
+       OPNAME(REDUCED_SUM_2SUM_I8)                                            \
+       OPNAME(REDUCED_SUM_2SUM_I16)                                           \
+       OPNAME(REDUCED_MEAN_DEV_WSUM_I8)                                       \
+       OPNAME(REDUCED_MEAN_DEV_WSUM_I16)                                      \
+       OPNAME(REDUCED_MEAN_DEV_I8)                                            \
+       OPNAME(REDUCED_MEAN_DEV_I16)                                           \
+       OPNAME(RESCALE_CW_I8)                                                  \
+       OPNAME(RESCALE_CW_I16)                                                 \
+       OPNAME(REDUCED_MEAN_SCALE_WSUM_I8)                                     \
+       OPNAME(REDUCED_MEAN_SCALE_WSUM_I16)                                    \
+       OPNAME(RESCALE_CHANNELWISE_I8)                                         \
+       OPNAME(RESCALE_CHANNELWISE_I16)
+#endif
diff --git a/include/uapi/misc/trinity.h b/include/uapi/misc/trinity.h
new file mode 100644 (file)
index 0000000..1136af4
--- /dev/null
@@ -0,0 +1,482 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/**
+ * include/uapi/misc/trinity.h: User-level header for trinity devices.
+ *
+ * Copyright (C) 2020 Samsung Electronics
+ * Copyright (C) 2020 Parichay Kapoor <pk.kapoor@samsung.com>
+ * Copyright (C) 2020 Dongju Chae <dongju.chae@samsung.com>
+ * Copyright (C) 2020 Wook Song <wook16.song@samsung.com>
+ */
+
+#ifndef __TRINITY_H__
+#define __TRINITY_H__
+
+#include <linux/types.h>
+
+#define TRINITY_API_LEVEL 12
+
+/**
+ * enum trinity_state - Enum that describes a trinity device state
+ * @TRINITY_STATE_UNKNOWN: A device has unknown state
+ * @TRINITY_STATE_PAUSE: A device is paused
+ * @TRINITY_STATE_READY: A device is ready
+ * @TRINITY_STATE_END: End of trinity_state
+ */
+enum trinity_state {
+       TRINITY_STATE_UNKNOWN = -1,
+       TRINITY_STATE_PAUSE = 0,
+       TRINITY_STATE_READY,
+       TRINITY_STATE_END,
+};
+
+/**
+ * enum trinity_input_mode - Enum that describes an input source
+ * @TRINITY_INPUT_UNKNOWN: Unknown input mode
+ * @TRINITY_INPUT_CPU: Input feed by CPU
+ * @TRINITY_INPUT_HW: Input feed by third-party HW
+ * @TRINITY_INPUT_END: End of trinity_input_mode
+ */
+enum trinity_input_mode {
+       TRINITY_INPUT_UNKNOWN = -1,
+       TRINITY_INPUT_CPU = 0,
+       TRINITY_INPUT_HW,
+       TRINITY_INPUT_END,
+};
+
+/**
+ * enum trinity_output_mode - Enum that describes an output source
+ * @TRINITY_OUTPUT_UNKNOWN: Unknown output mode
+ * @TRINITY_OUTPUT_CPU_INTR: Output completion handling by interrupt
+ * @TRINITY_OUTPUT_CPU_POLL: Output completion handling by polling
+ * @TRINITY_OUTPUT_HW: Output completion handling by third-party HW
+ * @TRINITY_OUTPUT_END: End of trinity_output_mode
+ */
+enum trinity_output_mode {
+       TRINITY_OUTPUT_UNKNOWN = -1,
+       TRINITY_OUTPUT_CPU_INTR = 0,
+       TRINITY_OUTPUT_CPU_POLL,
+       TRINITY_OUTPUT_HW,
+       TRINITY_OUTPUT_END,
+};
+
+/**
+ * enum trinity_app_status - Enum that describes an app status
+ * @TRINITY_APP_STATUS_UNKNOWN: Unknown app status
+ * @TRINITY_APP_STATUS_ERROR: App has got some errors
+ * @TRINITY_APP_STATUS_PENDING: App is currently pending
+ * @TRINITY_APP_STATUS_STARTED: App was started
+ * @TRINITY_APP_STATUS_TERMINATED: App was terminated
+ */
+enum trinity_app_status {
+       TRINITY_APP_STATUS_UNKNOWN = 0,
+       TRINITY_APP_STATUS_ERROR = 1,
+       TRINITY_APP_STATUS_PENDING = 2,
+       TRINITY_APP_STATUS_STARTED = 3,
+       TRINITY_APP_STATUS_TERMINATED = 4
+};
+
+/**
+ * enum trinity_req_status - Enum that describes a request status
+ * @TRINITY_REQ_STATUS_UNKNOWN: Unknown request status
+ * @TRINITY_REQ_STATUS_ERROR: Request has got some errors
+ * @TRINITY_REQ_STATUS_PENDING: Request is currently pending
+ * @TRINITY_REQ_STATUS_RUNING: Request is currently running
+ * @TRINITY_REQ_STATUS_FINISHED: Request was finished
+ */
+enum trinity_req_status {
+       TRINITY_REQ_STATUS_UNKNOWN = 0,
+       TRINITY_REQ_STATUS_ERROR = 1,
+       TRINITY_REQ_STATUS_PENDING = 2, /* A request is submitted */
+       TRINITY_REQ_STATUS_RUNNING = 3, /* A request is running on NPU */
+       TRINITY_REQ_STATUS_FINISHED = 4 /* A request is just finished */
+};
+
+/**
+ * enum trinity_req_priority - Enum that describes a request priority
+ * @TRINITY_REQ_PRIORITY_LOW: Low priority
+ * @TRINITY_REQ_PRIORITY_MID: Mid priority scheduled with a higher chance than low one
+ * @TRINITY_REQ_PRIORITY_HIGH: High priority preempting lower priority requests
+ */
+enum trinity_req_priority {
+       TRINITY_REQ_PRIORITY_LOW = 0,
+       TRINITY_REQ_PRIORITY_MID = 1,
+       TRINITY_REQ_PRIORITY_HIGH = 2,
+};
+
+/**
+ * enum trinity_hwmem_type - A type of DMA buffer allocation method.
+ * @TRINITY_HWMEM_DMA_CONT: Use CMA to allocate backing stroage of DMA buffers.
+ * @TRINITY_HWMEM_DMA_IOMMU: Use IOMMU to allocate backing stroage of DMA buffers.
+ * @HWMEM_END: Sentinel.
+ */
+enum trinity_hwmem_type {
+       TRINITY_HWMEM_DMA_CONT = 0,
+       TRINITY_HWMEM_DMA_IOMMU,
+       TRINITY_HWMEM_END,
+};
+
+#ifndef TASK_COMM_LEN
+#define TASK_COMM_LEN 16
+#endif
+
+#define TRINITY_APP_NAME_MAX TASK_COMM_LEN
+#define TRINITY_APP_STAT_MAX 10
+#define TRINITY_REQ_STAT_MAX 10
+
+/**
+ * struct trinity_ioctl_stat_app - Describes stat of the target app
+ * @app_id: Trinity app id (currently, equal to pid)
+ * @name: Trinity app name
+ * @status: Trinity app status
+ * @num_total_reqs: Number of total requests in app (including finished ones)
+ * @num_active_reqs: Number of active (running or pending) requests in app
+ * @total_alloc_mem: Total size of allocated memory in the device
+ * @total_freed_mem: Total size of freed memory in the device
+ */
+struct trinity_ioctl_stat_app {
+       __s32 app_id;
+
+       char name[TRINITY_APP_NAME_MAX];
+       enum trinity_app_status status;
+
+       __u32 num_total_reqs;
+       __u32 num_active_reqs;
+
+       __u64 total_alloc_mem;
+       __u64 total_freed_mem;
+} __attribute__((packed));
+
+/**
+ * struct trinity_ioctl_stat_apps - Describes stats of the latest apps
+ * @num_apps: Number of apps for the stat list
+ * @stat: Stat of the latest apps
+ */
+struct trinity_ioctl_stat_apps {
+       __u32 num_apps;
+       struct trinity_ioctl_stat_app stat[TRINITY_APP_STAT_MAX];
+} __attribute__((packed));
+
+/**
+ * struct trinity_ioctl_stat_req - Describes stat of the target request
+ * @req_id: Trinity req id
+ * @model_id: Trinity model id
+ * @priority: Request priority (low, mid, or high)
+ * @status: Request status
+ * @sched_time: scheduling time in ms
+ * @infer_time: inference time in ms
+ */
+struct trinity_ioctl_stat_req {
+       __s32 req_id;
+       __u64 model_id;
+
+       enum trinity_req_priority priority;
+       enum trinity_req_status status;
+
+       __u32 sched_time;
+       __u32 infer_time;
+} __attribute__((packed));
+
+/**
+ * struct trinity_ioctl_stat_reqs - Describes stats of the latest reqs
+ * @app_id: Trinity app id (0 means 'current')
+ * @num_reqs: Number of reqs for stat list
+ * @stat: Stat of the latest reqs
+ */
+struct trinity_ioctl_stat_reqs {
+       __s32 app_id;
+       __u32 num_reqs;
+       struct trinity_ioctl_stat_req stat[TRINITY_REQ_STAT_MAX];
+} __attribute__((packed));
+
+/**
+ * struct trinity_ioctl_hwmem - A structure that Describes hardware memory (hwmem)
+ * @type: The type of hwmem type
+ * @size: The size of hwmem
+ * @dbuf_fd: File descriptor for dmabuf representing hwmem
+ */
+struct trinity_ioctl_hwmem {
+       enum trinity_hwmem_type type;
+       __u64 size;
+       __s32 dbuf_fd;
+} __attribute__((packed));
+
+/**
+ * struct trinity_ioctl_profile_meta - Describes profiling meta info.
+ * @req_id: The target req id for profiling
+ * @total_cycles: The total number of cycles of the given req
+ * @total_ops: The total number of operations of the given req
+ * @input_footprint: The DRAM footprint of input data
+ * @output_footprint: The DRAM footprint of output data
+ * @profile_size: The size of profiling data
+ */
+struct trinity_ioctl_profile_meta {
+       __s32 req_id;
+       __s64 total_cycles;
+       __u32 total_ops;
+       __s64 input_footprint;
+       __s64 output_footprint;
+       __u32 profile_size;
+} __attribute__((packed));
+
+/**
+ * struct trinity_ioctl_profile_buff - Describes profiling buff info.
+ * @req_id: The target req id for profiling
+ * @profile_pos: The start position to extract profiling data
+ * @profile_size: The size of user-allocated profiling buffer
+ * @profile_buf: The profiling buffer which user allocated
+ */
+struct trinity_ioctl_profile_buff {
+       __s32 req_id;
+       __u32 profile_pos;
+       __u32 profile_size;
+       void __user *profile_buf;
+} __attribute__((packed));
+
+/**
+ * struct trinity_ioctl_model - A structure that configure a model registered on NPU
+ * @id: Id for NPU model to extract the base phys addr
+ * @dbuf_fd: File descriptor for dmabuf representing the model
+ * @program_offset_addr: Offset address for the instructions (NPU_PROG_BASE)
+ * @program_size: Size of the program instructions (NPU_PROG_SIZE)
+ * @version: The version of npubinfmt
+ * @endp_trnt_model_common: Indicator for the end of common model parameters
+ * @weight_offset_addr: Offset address for storing weights (NPU_WGT_BASE)
+ * @metadata_dbuf_fd: File descriptor for dmabuf representing the metadata
+ * @metadata_extra_addr: Offset address for the metadata extra
+ * @metadata_extra_size: Size of the metadata extra
+ * @num_visa_insts: Number of virtual ISA instructions
+ */
+struct trinity_ioctl_model {
+       __u64 id;
+       __s32 dbuf_fd;
+       __u64 program_offset_addr;
+       __u64 program_size;
+       __u32 version;
+       union {
+               __u8 endp_trnt_model_common[0];
+               struct {
+                       __u64 weight_offset_addr;
+               } __attribute__((packed));
+               struct {
+                       __s32 metadata_dbuf_fd;
+                       __s32 metadata_ext_dbuf_fd;
+                       __u64 metadata_ext_size;
+                       __u32 num_visa_insts;
+               } __attribute__((packed));
+       };
+} __attribute__((packed));
+
+/**
+ * struct trinity_ioctl_input - A structure that configure an input passed to NPU
+ * @dbuf_fd: File descriptor for dmabuf of I/O buffer (or segment table)
+ * @model_id: Model id received when setting the model in the NPU
+ * @req_id: Request id to distinguish each run_input
+ * @timeout_ms: Timeout in ms, zero is regarded as preemption
+ * @priority: Priority (LOW: 0, MID: 1, HIGH: 2)
+ * @endp_trnt_input_common: Indicator for the end of common input parameters
+ * @activation_offset_addr0: Offset address for storing weights (NPU_ACT_BASE0)
+ * @activation_offset_addr1: Offset address for storing weights (NPU_ACT_BASE1)
+ * @num_segments: Number of segments
+ * @input_mode: Input mode (who is supposed to feed input)
+ * @output_mode: Output mode (who is supposed to retrieve output)
+ * @hw_input_seg: Third-party HW's input segment idx
+ * @hw_output_seg: Third-party HW's output segment idx
+ */
+
+/** model configuration settings to pass input information to NPU */
+struct trinity_ioctl_input {
+       /* id for I/O buffer (or segment table) to extract the base phys addr */
+       __s32 dbuf_fd;
+       /** model id received when setting the model in the NPU */
+       __u64 model_id;
+       /** req id to distinguish each run_input */
+       __s32 req_id;
+       /** timeout in ms */
+       __s64 timeout_ms;
+       /** priority */
+       __u32 priority;
+       union {
+               __u8 endp_trnt_input_common[0];
+               struct {
+                       /* added for TRIV-1 */
+                       __u64 activation_offset_addr0;
+                       __u64 activation_offset_addr1;
+               } __attribute__((packed));
+               struct {
+                       /* added for TRIV-2 */
+                       __u32 num_segments;
+                       enum trinity_input_mode input_mode;
+                       enum trinity_output_mode output_mode;
+                       __s32 hw_input_seg;
+                       __s32 hw_output_seg;
+                       /* [optional] vd scheduler info */
+                       union {
+                               struct { /* user request */
+                                       __u32 task_handle;
+                                       __u32 subtask_idx;
+                               } __attribute__((packed));
+                               struct { /* kernel request */
+                                       __u32 task_id;
+                               } __attribute__((packed));
+                       };
+               } __attribute__((packed));
+       };
+} __attribute__((packed));
+
+/**
+ * struct trinity_ioctl_fpga_memcpy - A structure that contains driver-assisted memcpy
+ * @dbuf_fd: File descriptor for dmabuf of the target buffer
+ * @dbuf_off: Offset from the dmabuf base address
+ * @user_addr: Address of user-level buffer
+ * @user_size: Size of user-level buffer
+ *
+ * It's workaround structure for FPGA envionment
+ */
+struct trinity_ioctl_fpga_memcpy {
+       __s32 dbuf_fd;
+       __u32 dbuf_off;
+       void __user *user_addr;
+       __u64 user_size;
+} __attribute__((packed));
+
+/*
+ * struct to share device status to user space
+ * This will be moved to debugfs
+ */
+#if 0
+struct trinity_status {
+       /** Processor Information */
+       __u32 cp_info;
+
+       /** Processor Status */
+       __u32 cp_proc_stat;
+       __u32 npu_stat;
+
+       /** Control Status */
+       __u32 cp_dmai_ctrl;
+
+       /** Monitor Registers */
+       __u32 cp_cnt_cfg;
+       __u32 cp_cnt_frl;
+       __u32 cp_cnt_frh;
+       __u32 cp_cnt_stl;
+       __u32 cp_cnt_sth;
+};
+#endif
+
+#define TRINITY_MASK_DEV       (0xFF000000)
+#define TRINITY_MASK_MAJOR_VER (0x00FF0000)
+#define TRINITY_MASK_MINOR_VER (0x0000FF00)
+#define TRINITY_MASK_EXTRA_VER (0x000000FF)
+
+#define TRINITY_SHIFT_DEV      (24)
+#define TRINITY_SHIFT_MAJOR_VER (16)
+#define TRINITY_SHIFT_MINOR_VER (8)
+#define TRINITY_SHIFT_EXTRA_VER (0)
+#define TRINITY_SHIFT_MODEL_ID (16)
+
+#define trinity_gen_ver(dev, mj, mn, ex)                                       \
+       (dev << TRINITY_SHIFT_DEV) | (mj << TRINITY_SHIFT_MAJOR_VER) |         \
+               (mn << TRINITY_SHIFT_MINOR_VER) |                              \
+               (ex << TRINITY_SHIFT_EXTRA_VER)
+
+/**
+ * enum trinity_dev_type - Enum that describes a trinity device type
+ * @TRINITY_DEV_UNKNOWN: Unknown device type
+ * @TRINITY_DEV_VISION: Trinity Vision (TRIV)
+ * @TRINITY_DEV_AUDIO: Trinity Asr (TRIA)
+ * @TRINITY_DEV_VISION2: Trinity Vision2 (TRIV2)
+ * @TRINITY_DEV_VISION2_CUSE: Trinity Vision2 (TRIV2), CUSE-based impl.
+ * @TRINITY_DEV_END: End of trinity_dev_type
+ */
+enum trinity_dev_type {
+       TRINITY_DEV_UNKNOWN = 0,
+       TRINITY_DEV_VISION,
+       TRINITY_DEV_AUDIO,
+       TRINITY_DEV_VISION2,
+       TRINITY_DEV_VISION2_CUSE, /* CUSE-based impl. for triv2 */
+       TRINITY_DEV_END /* sentinel */
+};
+
+/**
+ * Major number cant be dynamic as ioctls need it,
+ */
+#define TRINITY_DRIVER_MAGIC 0x88
+
+#define TRINITY_IO(no)             _IO(TRINITY_DRIVER_MAGIC, no)
+#define TRINITY_IOR(no, data_type)  _IOR(TRINITY_DRIVER_MAGIC, no, data_type)
+#define TRINITY_IOW(no, data_type)  _IOW(TRINITY_DRIVER_MAGIC, no, data_type)
+#define TRINITY_IOWR(no, data_type) _IOWR(TRINITY_DRIVER_MAGIC, no, data_type)
+
+/** Device Information */
+
+/** Get the device version information from the driver */
+#define TRINITY_IOCTL_GET_VERSION TRINITY_IOR(1, __u32)
+/** Get the device API level from the driver */
+#define TRINITY_IOCTL_GET_API_LEVEL TRINITY_IOR(2, __u32)
+/** Get the device state from the driver */
+#define TRINITY_IOCTL_GET_STATE TRINITY_IOR(3, __s32)
+/** Get the device tops information from the driver */
+#define TRINITY_IOCTL_GET_TOPS TRINITY_IOR(4, __u32)
+/** Get the device dspm information from the driver */
+#define TRINITY_IOCTL_GET_DSPM TRINITY_IOR(5, __u32)
+/** Get the next request ID from the driver */
+#define TRINITY_IOCTL_GET_NEXT_REQUEST TRINITY_IOR(6, __s32)
+
+/** Device Control */
+
+/** Allocate driver-managed memory */
+#define TRINITY_IOCTL_HWMEM_ALLOC TRINITY_IOW(21, struct trinity_ioctl_hwmem)
+
+/** De-allocate driver-managed memory */
+#define TRINITY_IOCTL_HWMEM_DEALLOC TRINITY_IOW(22, struct trinity_ioctl_hwmem)
+
+/** Register the given model config in the device and return model id */
+#define TRINITY_IOCTL_REGISTER_MODEL                                           \
+       TRINITY_IOWR(23, struct trinity_ioctl_model)
+
+/** Unregister the model config associated with the given model_id */
+#define TRINITY_IOCTL_DEREGISTER_MODEL TRINITY_IOW(24, __u64)
+
+/** Run the device with the given input */
+#define TRINITY_IOCTL_RUN_INPUT TRINITY_IOWR(25, struct trinity_ioctl_input)
+
+/** Stop all requests submitted to the device */
+#define TRINITY_IOCTL_STOP_REQUESTS TRINITY_IO(26)
+
+/** Stop the target request with id returned by run_input */
+#define TRINITY_IOCTL_STOP_REQUEST TRINITY_IOW(27, __s32)
+
+/** Device Statistics/Profile */
+
+/** Get the current app stat in the opened device */
+#define TRINITY_IOCTL_STAT_CURRENT_APP                                         \
+       TRINITY_IOR(51, struct trinity_ioctl_stat_app)
+
+/** Get latest apps' stat of the opened device */
+#define TRINITY_IOCTL_STAT_APPS TRINITY_IOR(52, struct trinity_ioctl_stat_apps)
+
+/** Get latest reqs' stat in the target app */
+#define TRINITY_IOCTL_STAT_REQS TRINITY_IOR(53, struct trinity_ioctl_stat_reqs)
+
+/** Get profiling metadata of the request */
+#define TRINITY_IOCTL_GET_PROFILE_META                                         \
+       TRINITY_IOWR(54, struct trinity_ioctl_profile_meta)
+
+/** Get profiling per-op data of the request */
+#define TRINITY_IOCTL_GET_PROFILE_BUFF                                         \
+       TRINITY_IOWR(55, struct trinity_ioctl_profile_buff)
+
+/** Device Testing/Workaround */
+
+/** Driver-assisted memory copy for FPGA env. */
+#define TRINITY_IOCTL_FPGA_MEMCPY                                              \
+       TRINITY_IOWR(91, struct trinity_ioctl_fpga_memcpy)
+
+/** A wrapper of trinity_run_internal_req() */
+#define TRINITY_IOCTL_RUN_INTERNAL_REQ TRINITY_IOW(92, dev_t)
+
+#ifdef __KERNEL__
+__s32 trinity_run_internal_req(dev_t);
+#endif
+#endif /* __TRINITY_H__ */