habanalabs: PCIe iATU refactoring
authorOfir Bitton <obitton@habana.ai>
Mon, 15 Jun 2020 14:45:12 +0000 (17:45 +0300)
committerOded Gabbay <oded.gabbay@gmail.com>
Fri, 24 Jul 2020 17:31:36 +0000 (20:31 +0300)
Divide iATU initialization into inbound/outbound methods.
We must separate it in order to enable different match mode
per PCIe region.
In addition, added support for PCI address match mode.

Signed-off-by: Ofir Bitton <obitton@habana.ai>
Reviewed-by: Oded Gabbay <oded.gabbay@gmail.com>
Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
drivers/misc/habanalabs/gaudi/gaudi.c
drivers/misc/habanalabs/goya/goya.c
drivers/misc/habanalabs/habanalabs.h
drivers/misc/habanalabs/pci.c

index 888f42a..a6e40de 100644 (file)
@@ -465,6 +465,7 @@ static int gaudi_pci_bars_map(struct hl_device *hdev)
 static u64 gaudi_set_hbm_bar_base(struct hl_device *hdev, u64 addr)
 {
        struct gaudi_device *gaudi = hdev->asic_specific;
+       struct hl_inbound_pci_region pci_region;
        u64 old_addr = addr;
        int rc;
 
@@ -472,7 +473,10 @@ static u64 gaudi_set_hbm_bar_base(struct hl_device *hdev, u64 addr)
                return old_addr;
 
        /* Inbound Region 2 - Bar 4 - Point to HBM */
-       rc = hl_pci_set_dram_bar_base(hdev, 2, 4, addr);
+       pci_region.mode = PCI_BAR_MATCH_MODE;
+       pci_region.bar = HBM_BAR_ID;
+       pci_region.addr = addr;
+       rc = hl_pci_set_inbound_region(hdev, 2, &pci_region);
        if (rc)
                return U64_MAX;
 
@@ -486,22 +490,43 @@ static u64 gaudi_set_hbm_bar_base(struct hl_device *hdev, u64 addr)
 
 static int gaudi_init_iatu(struct hl_device *hdev)
 {
-       int rc = 0;
+       struct hl_inbound_pci_region inbound_region;
+       struct hl_outbound_pci_region outbound_region;
+       int rc;
+
+       /* Inbound Region 0 - Bar 0 - Point to SRAM + CFG */
+       inbound_region.mode = PCI_BAR_MATCH_MODE;
+       inbound_region.bar = SRAM_BAR_ID;
+       inbound_region.addr = SRAM_BASE_ADDR;
+       rc = hl_pci_set_inbound_region(hdev, 0, &inbound_region);
+       if (rc)
+               goto done;
 
        /* Inbound Region 1 - Bar 2 - Point to SPI FLASH */
-       rc  = hl_pci_iatu_write(hdev, 0x314,
-                               lower_32_bits(SPI_FLASH_BASE_ADDR));
-       rc |= hl_pci_iatu_write(hdev, 0x318,
-                               upper_32_bits(SPI_FLASH_BASE_ADDR));
-       rc |= hl_pci_iatu_write(hdev, 0x300, 0);
-       /* Enable + Bar match + match enable */
-       rc |= hl_pci_iatu_write(hdev, 0x304, 0xC0080200);
+       inbound_region.mode = PCI_BAR_MATCH_MODE;
+       inbound_region.bar = CFG_BAR_ID;
+       inbound_region.addr = SPI_FLASH_BASE_ADDR;
+       rc = hl_pci_set_inbound_region(hdev, 1, &inbound_region);
+       if (rc)
+               goto done;
 
+       /* Inbound Region 2 - Bar 4 - Point to HBM */
+       inbound_region.mode = PCI_BAR_MATCH_MODE;
+       inbound_region.bar = HBM_BAR_ID;
+       inbound_region.addr = DRAM_PHYS_BASE;
+       rc = hl_pci_set_inbound_region(hdev, 2, &inbound_region);
        if (rc)
-               return -EIO;
+               goto done;
+
+       hdev->asic_funcs->set_dma_mask_from_fw(hdev);
 
-       return hl_pci_init_iatu(hdev, SRAM_BASE_ADDR, DRAM_PHYS_BASE,
-                               HOST_PHYS_BASE, HOST_PHYS_SIZE);
+       /* Outbound Region 0 - Point to Host */
+       outbound_region.addr = HOST_PHYS_BASE;
+       outbound_region.size = HOST_PHYS_SIZE;
+       rc = hl_pci_set_outbound_region(hdev, &outbound_region);
+
+done:
+       return rc;
 }
 
 static int gaudi_early_init(struct hl_device *hdev)
@@ -2884,16 +2909,6 @@ static int gaudi_hw_init(struct hl_device *hdev)
 
        gaudi_init_hbm_dma_qmans(hdev);
 
-       /*
-        * Before pushing u-boot/linux to device, need to set the hbm bar to
-        * base address of dram
-        */
-       if (gaudi_set_hbm_bar_base(hdev, DRAM_PHYS_BASE) == U64_MAX) {
-               dev_err(hdev->dev,
-                       "failed to map HBM bar to DRAM base address\n");
-               return -EIO;
-       }
-
        rc = gaudi_init_cpu(hdev);
        if (rc) {
                dev_err(hdev->dev, "failed to initialize CPU\n");
index ff32a8f..5839b5b 100644 (file)
@@ -458,6 +458,7 @@ static int goya_pci_bars_map(struct hl_device *hdev)
 static u64 goya_set_ddr_bar_base(struct hl_device *hdev, u64 addr)
 {
        struct goya_device *goya = hdev->asic_specific;
+       struct hl_inbound_pci_region pci_region;
        u64 old_addr = addr;
        int rc;
 
@@ -465,7 +466,10 @@ static u64 goya_set_ddr_bar_base(struct hl_device *hdev, u64 addr)
                return old_addr;
 
        /* Inbound Region 1 - Bar 4 - Point to DDR */
-       rc = hl_pci_set_dram_bar_base(hdev, 1, 4, addr);
+       pci_region.mode = PCI_BAR_MATCH_MODE;
+       pci_region.bar = DDR_BAR_ID;
+       pci_region.addr = addr;
+       rc = hl_pci_set_inbound_region(hdev, 1, &pci_region);
        if (rc)
                return U64_MAX;
 
@@ -487,8 +491,35 @@ static u64 goya_set_ddr_bar_base(struct hl_device *hdev, u64 addr)
  */
 static int goya_init_iatu(struct hl_device *hdev)
 {
-       return hl_pci_init_iatu(hdev, SRAM_BASE_ADDR, DRAM_PHYS_BASE,
-                               HOST_PHYS_BASE, HOST_PHYS_SIZE);
+       struct hl_inbound_pci_region inbound_region;
+       struct hl_outbound_pci_region outbound_region;
+       int rc;
+
+       /* Inbound Region 0 - Bar 0 - Point to SRAM and CFG */
+       inbound_region.mode = PCI_BAR_MATCH_MODE;
+       inbound_region.bar = SRAM_CFG_BAR_ID;
+       inbound_region.addr = SRAM_BASE_ADDR;
+       rc = hl_pci_set_inbound_region(hdev, 0, &inbound_region);
+       if (rc)
+               goto done;
+
+       /* Inbound Region 1 - Bar 4 - Point to DDR */
+       inbound_region.mode = PCI_BAR_MATCH_MODE;
+       inbound_region.bar = DDR_BAR_ID;
+       inbound_region.addr = DRAM_PHYS_BASE;
+       rc = hl_pci_set_inbound_region(hdev, 1, &inbound_region);
+       if (rc)
+               goto done;
+
+       hdev->asic_funcs->set_dma_mask_from_fw(hdev);
+
+       /* Outbound Region 0 - Point to Host  */
+       outbound_region.addr = HOST_PHYS_BASE;
+       outbound_region.size = HOST_PHYS_SIZE;
+       rc = hl_pci_set_outbound_region(hdev, &outbound_region);
+
+done:
+       return rc;
 }
 
 /*
index ae78145..3652365 100644 (file)
@@ -66,6 +66,8 @@
 #define IS_POWER_OF_2(n)               (n != 0 && ((n & (n - 1)) == 0))
 #define IS_MAX_PENDING_CS_VALID(n)     (IS_POWER_OF_2(n) && (n > 1))
 
+#define HL_PCI_NUM_BARS                        6
+
 /**
  * struct pgt_info - MMU hop page info.
  * @node: hash linked-list node for the pgts shadow hash of pgts.
@@ -91,6 +93,16 @@ struct hl_device;
 struct hl_fpriv;
 
 /**
+ * enum hl_pci_match_mode - pci match mode per region
+ * @PCI_ADDRESS_MATCH_MODE: address match mode
+ * @PCI_BAR_MATCH_MODE: bar match mode
+ */
+enum hl_pci_match_mode {
+       PCI_ADDRESS_MATCH_MODE,
+       PCI_BAR_MATCH_MODE
+};
+
+/**
  * enum hl_fw_component - F/W components to read version through registers.
  * @FW_COMP_UBOOT: u-boot.
  * @FW_COMP_PREBOOT: preboot.
@@ -126,6 +138,32 @@ enum hl_cs_type {
 };
 
 /*
+ * struct hl_inbound_pci_region - inbound region descriptor
+ * @mode: pci match mode for this region
+ * @addr: region target address
+ * @size: region size in bytes
+ * @offset_in_bar: offset within bar (address match mode)
+ * @bar: bar id
+ */
+struct hl_inbound_pci_region {
+       enum hl_pci_match_mode  mode;
+       u64                     addr;
+       u64                     size;
+       u64                     offset_in_bar;
+       u8                      bar;
+};
+
+/*
+ * struct hl_outbound_pci_region - outbound region descriptor
+ * @addr: region target address
+ * @size: region size in bytes
+ */
+struct hl_outbound_pci_region {
+       u64     addr;
+       u64     size;
+};
+
+/*
  * struct hl_hw_sob - H/W SOB info.
  * @hdev: habanalabs device structure.
  * @kref: refcount of this SOB. The SOB will reset once the refcount is zero.
@@ -1347,7 +1385,9 @@ struct hl_device_idle_busy_ts {
 /**
  * struct hl_device - habanalabs device structure.
  * @pdev: pointer to PCI device, can be NULL in case of simulator device.
- * @pcie_bar: array of available PCIe bars.
+ * @pcie_bar_phys: array of available PCIe bars physical addresses.
+ *                (required only for PCI address match mode)
+ * @pcie_bar: array of available PCIe bars virtual addresses.
  * @rmmio: configuration area address on SRAM.
  * @cdev: related char device.
  * @cdev_ctrl: char device for control operations only (INFO IOCTL)
@@ -1442,7 +1482,8 @@ struct hl_device_idle_busy_ts {
  */
 struct hl_device {
        struct pci_dev                  *pdev;
-       void __iomem                    *pcie_bar[6];
+       u64                             pcie_bar_phys[HL_PCI_NUM_BARS];
+       void __iomem                    *pcie_bar[HL_PCI_NUM_BARS];
        void __iomem                    *rmmio;
        struct cdev                     cdev;
        struct cdev                     cdev_ctrl;
@@ -1767,9 +1808,10 @@ int hl_pci_bars_map(struct hl_device *hdev, const char * const name[3],
 int hl_pci_iatu_write(struct hl_device *hdev, u32 addr, u32 data);
 int hl_pci_set_dram_bar_base(struct hl_device *hdev, u8 inbound_region, u8 bar,
                                u64 addr);
-int hl_pci_init_iatu(struct hl_device *hdev, u64 sram_base_address,
-                       u64 dram_base_address, u64 host_phys_base_address,
-                       u64 host_phys_size);
+int hl_pci_set_inbound_region(struct hl_device *hdev, u8 region,
+               struct hl_inbound_pci_region *pci_region);
+int hl_pci_set_outbound_region(struct hl_device *hdev,
+               struct hl_outbound_pci_region *pci_region);
 int hl_pci_init(struct hl_device *hdev);
 void hl_pci_fini(struct hl_device *hdev);
 
index 61a8bb0..1791f66 100644 (file)
@@ -9,9 +9,15 @@
 #include "include/hw_ip/pci/pci_general.h"
 
 #include <linux/pci.h>
+#include <linux/bitfield.h>
 
 #define HL_PLDM_PCI_ELBI_TIMEOUT_MSEC  (HL_PCI_ELBI_TIMEOUT_MSEC * 10)
 
+#define IATU_REGION_CTRL_REGION_EN_MASK                BIT(31)
+#define IATU_REGION_CTRL_MATCH_MODE_MASK       BIT(30)
+#define IATU_REGION_CTRL_NUM_MATCH_EN_MASK     BIT(19)
+#define IATU_REGION_CTRL_BAR_NUM_MASK          GENMASK(10, 8)
+
 /**
  * hl_pci_bars_map() - Map PCI BARs.
  * @hdev: Pointer to hl_device structure.
@@ -187,110 +193,94 @@ static void hl_pci_reset_link_through_bridge(struct hl_device *hdev)
 }
 
 /**
- * hl_pci_set_dram_bar_base() - Set DDR BAR to map specific device address.
+ * hl_pci_set_inbound_region() - Configure inbound region
  * @hdev: Pointer to hl_device structure.
- * @inbound_region: Inbound region number.
- * @bar: PCI BAR number.
- * @addr: Address in DRAM. Must be aligned to DRAM bar size.
+ * @region: Inbound region number.
+ * @pci_region: Inbound region parameters.
  *
- * Configure the iATU so that the DRAM bar will start at the specified address.
+ * Configure the iATU inbound region.
  *
  * Return: 0 on success, negative value for failure.
  */
-int hl_pci_set_dram_bar_base(struct hl_device *hdev, u8 inbound_region, u8 bar,
-                               u64 addr)
+int hl_pci_set_inbound_region(struct hl_device *hdev, u8 region,
+               struct hl_inbound_pci_region *pci_region)
 {
        struct asic_fixed_properties *prop = &hdev->asic_prop;
-       u32 offset;
-       int rc;
+       u64 bar_phys_base, region_base, region_end_address;
+       u32 offset, ctrl_reg_val;
+       int rc = 0;
 
-       switch (inbound_region) {
-       case 0:
-               offset = 0x100;
-               break;
-       case 1:
-               offset = 0x300;
-               break;
-       case 2:
-               offset = 0x500;
-               break;
-       default:
-               dev_err(hdev->dev, "Invalid inbound region %d\n",
-                       inbound_region);
-               return -EINVAL;
-       }
+       /* region offset */
+       offset = (0x200 * region) + 0x100;
+
+       if (pci_region->mode == PCI_ADDRESS_MATCH_MODE) {
+               bar_phys_base = hdev->pcie_bar_phys[pci_region->bar];
+               region_base = bar_phys_base + pci_region->offset_in_bar;
+               region_end_address = region_base + pci_region->size - 1;
 
-       if (bar != 0 && bar != 2 && bar != 4) {
-               dev_err(hdev->dev, "Invalid PCI BAR %d\n", bar);
-               return -EINVAL;
+               rc |= hl_pci_iatu_write(hdev, offset + 0x8,
+                               lower_32_bits(region_base));
+               rc |= hl_pci_iatu_write(hdev, offset + 0xC,
+                               upper_32_bits(region_base));
+               rc |= hl_pci_iatu_write(hdev, offset + 0x10,
+                               lower_32_bits(region_end_address));
        }
 
        /* Point to the specified address */
-       rc = hl_pci_iatu_write(hdev, offset + 0x14, lower_32_bits(addr));
-       rc |= hl_pci_iatu_write(hdev, offset + 0x18, upper_32_bits(addr));
+       rc = hl_pci_iatu_write(hdev, offset + 0x14,
+                       lower_32_bits(pci_region->addr));
+       rc |= hl_pci_iatu_write(hdev, offset + 0x18,
+                       upper_32_bits(pci_region->addr));
        rc |= hl_pci_iatu_write(hdev, offset + 0x0, 0);
-       /* Enable + BAR match + match enable + BAR number */
-       rc |= hl_pci_iatu_write(hdev, offset + 0x4, 0xC0080000 | (bar << 8));
+
+       /* Enable + bar/address match + match enable + bar number */
+       ctrl_reg_val = FIELD_PREP(IATU_REGION_CTRL_REGION_EN_MASK, 1);
+       ctrl_reg_val |= FIELD_PREP(IATU_REGION_CTRL_MATCH_MODE_MASK,
+                       pci_region->mode);
+       ctrl_reg_val |= FIELD_PREP(IATU_REGION_CTRL_NUM_MATCH_EN_MASK, 1);
+
+       if (pci_region->mode == PCI_BAR_MATCH_MODE)
+               ctrl_reg_val |= FIELD_PREP(IATU_REGION_CTRL_BAR_NUM_MASK,
+                               pci_region->bar);
+
+       rc |= hl_pci_iatu_write(hdev, offset + 0x4, ctrl_reg_val);
 
        /* Return the DBI window to the default location */
        rc |= hl_pci_elbi_write(hdev, prop->pcie_aux_dbi_reg_addr, 0);
        rc |= hl_pci_elbi_write(hdev, prop->pcie_aux_dbi_reg_addr + 4, 0);
 
        if (rc)
-               dev_err(hdev->dev, "failed to map DRAM bar to 0x%08llx\n",
-                       addr);
+               dev_err(hdev->dev, "failed to map bar %u to 0x%08llx\n",
+                               pci_region->bar, pci_region->addr);
 
        return rc;
 }
 
 /**
- * hl_pci_init_iatu() - Initialize the iATU unit inside the PCI controller.
+ * hl_pci_set_outbound_region() - Configure outbound region 0
  * @hdev: Pointer to hl_device structure.
- * @sram_base_address: SRAM base address.
- * @dram_base_address: DRAM base address.
- * @host_phys_base_address: Base physical address of host memory for device
- *                          transactions.
- * @host_phys_size: Size of host memory for device transactions.
+ * @pci_region: Outbound region parameters.
  *
- * This is needed in case the firmware doesn't initialize the iATU.
+ * Configure the iATU outbound region 0.
  *
  * Return: 0 on success, negative value for failure.
  */
-int hl_pci_init_iatu(struct hl_device *hdev, u64 sram_base_address,
-                       u64 dram_base_address, u64 host_phys_base_address,
-                       u64 host_phys_size)
+int hl_pci_set_outbound_region(struct hl_device *hdev,
+               struct hl_outbound_pci_region *pci_region)
 {
        struct asic_fixed_properties *prop = &hdev->asic_prop;
-       u64 host_phys_end_addr;
+       u64 outbound_region_end_address;
        int rc = 0;
 
-       /* Inbound Region 0 - Bar 0 - Point to SRAM base address */
-       rc  = hl_pci_iatu_write(hdev, 0x114, lower_32_bits(sram_base_address));
-       rc |= hl_pci_iatu_write(hdev, 0x118, upper_32_bits(sram_base_address));
-       rc |= hl_pci_iatu_write(hdev, 0x100, 0);
-       /* Enable + Bar match + match enable */
-       rc |= hl_pci_iatu_write(hdev, 0x104, 0xC0080000);
-
-       /* Return the DBI window to the default location */
-       rc |= hl_pci_elbi_write(hdev, prop->pcie_aux_dbi_reg_addr, 0);
-       rc |= hl_pci_elbi_write(hdev, prop->pcie_aux_dbi_reg_addr + 4, 0);
-
-       hdev->asic_funcs->set_dma_mask_from_fw(hdev);
-
-       /* Point to DRAM */
-       if (!hdev->asic_funcs->set_dram_bar_base)
-               return -EINVAL;
-       if (hdev->asic_funcs->set_dram_bar_base(hdev, dram_base_address) ==
-                                                               U64_MAX)
-               return -EIO;
-
-       /* Outbound Region 0 - Point to Host */
-       host_phys_end_addr = host_phys_base_address + host_phys_size - 1;
+       /* Outbound Region 0 */
+       outbound_region_end_address =
+                       pci_region->addr + pci_region->size - 1;
        rc |= hl_pci_iatu_write(hdev, 0x008,
-                               lower_32_bits(host_phys_base_address));
+                               lower_32_bits(pci_region->addr));
        rc |= hl_pci_iatu_write(hdev, 0x00C,
-                               upper_32_bits(host_phys_base_address));
-       rc |= hl_pci_iatu_write(hdev, 0x010, lower_32_bits(host_phys_end_addr));
+                               upper_32_bits(pci_region->addr));
+       rc |= hl_pci_iatu_write(hdev, 0x010,
+                               lower_32_bits(outbound_region_end_address));
        rc |= hl_pci_iatu_write(hdev, 0x014, 0);
 
        if ((hdev->power9_64bit_dma_enable) && (hdev->dma_mask == 64))
@@ -298,7 +288,8 @@ int hl_pci_init_iatu(struct hl_device *hdev, u64 sram_base_address,
        else
                rc |= hl_pci_iatu_write(hdev, 0x018, 0);
 
-       rc |= hl_pci_iatu_write(hdev, 0x020, upper_32_bits(host_phys_end_addr));
+       rc |= hl_pci_iatu_write(hdev, 0x020,
+                               upper_32_bits(outbound_region_end_address));
        /* Increase region size */
        rc |= hl_pci_iatu_write(hdev, 0x000, 0x00002000);
        /* Enable */
@@ -308,10 +299,7 @@ int hl_pci_init_iatu(struct hl_device *hdev, u64 sram_base_address,
        rc |= hl_pci_elbi_write(hdev, prop->pcie_aux_dbi_reg_addr, 0);
        rc |= hl_pci_elbi_write(hdev, prop->pcie_aux_dbi_reg_addr + 4, 0);
 
-       if (rc)
-               return -EIO;
-
-       return 0;
+       return rc;
 }
 
 /**