iwlwifi: pcie: use partial pages if applicable
authorJohannes Berg <johannes.berg@intel.com>
Wed, 2 Oct 2019 10:11:54 +0000 (12:11 +0200)
committerLuca Coelho <luciano.coelho@intel.com>
Mon, 23 Dec 2019 09:54:31 +0000 (11:54 +0200)
If we have only 2k RBs like on the latest (AX210) hardware, then
even on x86 where PAGE_SIZE is 4k we currently waste half of the
memory.

If this is the case, return partial pages from the allocator and
track the offset in each RBD (to be able to find the data in them
and remap them later.)

This might also address other platforms with larger PAGE_SIZE by
putting more RBs into a single large page.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
drivers/net/wireless/intel/iwlwifi/pcie/internal.h
drivers/net/wireless/intel/iwlwifi/pcie/rx.c
drivers/net/wireless/intel/iwlwifi/pcie/trans.c

index c7d094c..2ac7308 100644 (file)
@@ -106,6 +106,8 @@ struct iwl_host_cmd;
  * @page: driver's pointer to the rxb page
  * @invalid: rxb is in driver ownership - not owned by HW
  * @vid: index of this rxb in the global table
+ * @offset: indicates which offset of the page (in bytes)
+ *     this buffer uses (if multiple RBs fit into one page)
  */
 struct iwl_rx_mem_buffer {
        dma_addr_t page_dma;
@@ -113,6 +115,7 @@ struct iwl_rx_mem_buffer {
        u16 vid;
        bool invalid;
        struct list_head list;
+       u32 offset;
 };
 
 /**
@@ -511,6 +514,11 @@ struct cont_rec {
  * @in_rescan: true if we have triggered a device rescan
  * @base_rb_stts: base virtual address of receive buffer status for all queues
  * @base_rb_stts_dma: base physical address of receive buffer status
+ * @supported_dma_mask: DMA mask to validate the actual address against,
+ *     will be DMA_BIT_MASK(11) or DMA_BIT_MASK(12) depending on the device
+ * @alloc_page_lock: spinlock for the page allocator
+ * @alloc_page: allocated page to still use parts of
+ * @alloc_page_used: how much of the allocated page was already used (bytes)
  */
 struct iwl_trans_pcie {
        struct iwl_rxq *rxq;
@@ -583,6 +591,12 @@ struct iwl_trans_pcie {
        bool pcie_dbg_dumped_once;
        u32 rx_page_order;
        u32 rx_buf_bytes;
+       u32 supported_dma_mask;
+
+       /* allocator lock for the two values below */
+       spinlock_t alloc_page_lock;
+       struct page *alloc_page;
+       u32 alloc_page_used;
 
        /*protect hw register */
        spinlock_t reg_lock;
index 20f9338..498a14e 100644 (file)
@@ -298,6 +298,7 @@ static void iwl_pcie_restock_bd(struct iwl_trans *trans,
 static void iwl_pcie_rxmq_restock(struct iwl_trans *trans,
                                  struct iwl_rxq *rxq)
 {
+       struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
        struct iwl_rx_mem_buffer *rxb;
 
        /*
@@ -318,8 +319,8 @@ static void iwl_pcie_rxmq_restock(struct iwl_trans *trans,
                                       list);
                list_del(&rxb->list);
                rxb->invalid = false;
-               /* 12 first bits are expected to be empty */
-               WARN_ON(rxb->page_dma & DMA_BIT_MASK(12));
+               /* some low bits are expected to be unset (depending on hw) */
+               WARN_ON(rxb->page_dma & trans_pcie->supported_dma_mask);
                /* Point to Rx buffer via next RBD in circular buffer */
                iwl_pcie_restock_bd(trans, rxq, rxb);
                rxq->write = (rxq->write + 1) & (rxq->queue_size - 1);
@@ -412,15 +413,34 @@ void iwl_pcie_rxq_restock(struct iwl_trans *trans, struct iwl_rxq *rxq)
  *
  */
 static struct page *iwl_pcie_rx_alloc_page(struct iwl_trans *trans,
-                                          gfp_t priority)
+                                          u32 *offset, gfp_t priority)
 {
        struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
+       unsigned int rbsize = iwl_trans_get_rb_size(trans_pcie->rx_buf_size);
+       unsigned int allocsize = PAGE_SIZE << trans_pcie->rx_page_order;
        struct page *page;
        gfp_t gfp_mask = priority;
 
        if (trans_pcie->rx_page_order > 0)
                gfp_mask |= __GFP_COMP;
 
+       if (trans_pcie->alloc_page) {
+               spin_lock_bh(&trans_pcie->alloc_page_lock);
+               /* recheck */
+               if (trans_pcie->alloc_page) {
+                       *offset = trans_pcie->alloc_page_used;
+                       page = trans_pcie->alloc_page;
+                       trans_pcie->alloc_page_used += rbsize;
+                       if (trans_pcie->alloc_page_used >= allocsize)
+                               trans_pcie->alloc_page = NULL;
+                       else
+                               get_page(page);
+                       spin_unlock_bh(&trans_pcie->alloc_page_lock);
+                       return page;
+               }
+               spin_unlock_bh(&trans_pcie->alloc_page_lock);
+       }
+
        /* Alloc a new receive buffer */
        page = alloc_pages(gfp_mask, trans_pcie->rx_page_order);
        if (!page) {
@@ -436,6 +456,18 @@ static struct page *iwl_pcie_rx_alloc_page(struct iwl_trans *trans,
                                 "Failed to alloc_pages\n");
                return NULL;
        }
+
+       if (2 * rbsize <= allocsize) {
+               spin_lock_bh(&trans_pcie->alloc_page_lock);
+               if (!trans_pcie->alloc_page) {
+                       get_page(page);
+                       trans_pcie->alloc_page = page;
+                       trans_pcie->alloc_page_used = rbsize;
+               }
+               spin_unlock_bh(&trans_pcie->alloc_page_lock);
+       }
+
+       *offset = 0;
        return page;
 }
 
@@ -456,6 +488,8 @@ void iwl_pcie_rxq_alloc_rbs(struct iwl_trans *trans, gfp_t priority,
        struct page *page;
 
        while (1) {
+               unsigned int offset;
+
                spin_lock(&rxq->lock);
                if (list_empty(&rxq->rx_used)) {
                        spin_unlock(&rxq->lock);
@@ -463,8 +497,7 @@ void iwl_pcie_rxq_alloc_rbs(struct iwl_trans *trans, gfp_t priority,
                }
                spin_unlock(&rxq->lock);
 
-               /* Alloc a new receive buffer */
-               page = iwl_pcie_rx_alloc_page(trans, priority);
+               page = iwl_pcie_rx_alloc_page(trans, &offset, priority);
                if (!page)
                        return;
 
@@ -482,9 +515,10 @@ void iwl_pcie_rxq_alloc_rbs(struct iwl_trans *trans, gfp_t priority,
 
                BUG_ON(rxb->page);
                rxb->page = page;
+               rxb->offset = offset;
                /* Get physical address of the RB */
                rxb->page_dma =
-                       dma_map_page(trans->dev, page, 0,
+                       dma_map_page(trans->dev, page, rxb->offset,
                                     trans_pcie->rx_buf_bytes,
                                     DMA_FROM_DEVICE);
                if (dma_mapping_error(trans->dev, rxb->page_dma)) {
@@ -567,13 +601,15 @@ static void iwl_pcie_rx_allocator(struct iwl_trans *trans)
                        BUG_ON(rxb->page);
 
                        /* Alloc a new receive buffer */
-                       page = iwl_pcie_rx_alloc_page(trans, gfp_mask);
+                       page = iwl_pcie_rx_alloc_page(trans, &rxb->offset,
+                                                     gfp_mask);
                        if (!page)
                                continue;
                        rxb->page = page;
 
                        /* Get physical address of the RB */
-                       rxb->page_dma = dma_map_page(trans->dev, page, 0,
+                       rxb->page_dma = dma_map_page(trans->dev, page,
+                                                    rxb->offset,
                                                     trans_pcie->rx_buf_bytes,
                                                     DMA_FROM_DEVICE);
                        if (dma_mapping_error(trans->dev, rxb->page_dma)) {
@@ -1190,6 +1226,9 @@ void iwl_pcie_rx_free(struct iwl_trans *trans)
        kfree(trans_pcie->rx_pool);
        kfree(trans_pcie->global_table);
        kfree(trans_pcie->rxq);
+
+       if (trans_pcie->alloc_page)
+               __free_pages(trans_pcie->alloc_page, trans_pcie->rx_page_order);
 }
 
 static void iwl_pcie_rx_move_to_allocator(struct iwl_rxq *rxq,
@@ -1261,7 +1300,7 @@ static void iwl_pcie_rx_handle_rb(struct iwl_trans *trans,
                bool reclaim;
                int index, cmd_index, len;
                struct iwl_rx_cmd_buffer rxcb = {
-                       ._offset = offset,
+                       ._offset = rxb->offset + offset,
                        ._rx_page_order = trans_pcie->rx_page_order,
                        ._page = rxb->page,
                        ._page_stolen = false,
@@ -1367,7 +1406,7 @@ static void iwl_pcie_rx_handle_rb(struct iwl_trans *trans,
         * rx_free list for reuse later. */
        if (rxb->page != NULL) {
                rxb->page_dma =
-                       dma_map_page(trans->dev, rxb->page, 0,
+                       dma_map_page(trans->dev, rxb->page, rxb->offset,
                                     trans_pcie->rx_buf_bytes,
                                     DMA_FROM_DEVICE);
                if (dma_mapping_error(trans->dev, rxb->page_dma)) {
index d7617eb..cb49154 100644 (file)
@@ -1917,6 +1917,9 @@ static void iwl_trans_pcie_configure(struct iwl_trans *trans,
                iwl_trans_get_rb_size_order(trans_pcie->rx_buf_size);
        trans_pcie->rx_buf_bytes =
                iwl_trans_get_rb_size(trans_pcie->rx_buf_size);
+       trans_pcie->supported_dma_mask = DMA_BIT_MASK(12);
+       if (trans->trans_cfg->device_family >= IWL_DEVICE_FAMILY_AX210)
+               trans_pcie->supported_dma_mask = DMA_BIT_MASK(11);
 
        trans_pcie->bc_table_dword = trans_cfg->bc_table_dword;
        trans_pcie->scd_set_active = trans_cfg->scd_set_active;
@@ -2961,9 +2964,9 @@ static u32 iwl_trans_pcie_dump_rbs(struct iwl_trans *trans,
                rb->index = cpu_to_le32(i);
                memcpy(rb->data, page_address(rxb->page), max_len);
                /* remap the page for the free benefit */
-               rxb->page_dma = dma_map_page(trans->dev, rxb->page, 0,
-                                                    max_len,
-                                                    DMA_FROM_DEVICE);
+               rxb->page_dma = dma_map_page(trans->dev, rxb->page,
+                                            rxb->offset, max_len,
+                                            DMA_FROM_DEVICE);
 
                *data = iwl_fw_error_next_data(*data);
        }
@@ -3454,6 +3457,7 @@ struct iwl_trans *iwl_trans_pcie_alloc(struct pci_dev *pdev,
        trans_pcie->opmode_down = true;
        spin_lock_init(&trans_pcie->irq_lock);
        spin_lock_init(&trans_pcie->reg_lock);
+       spin_lock_init(&trans_pcie->alloc_page_lock);
        mutex_init(&trans_pcie->mutex);
        init_waitqueue_head(&trans_pcie->ucode_write_waitq);