cxgb4/cxgb4vf: For T5 use Packing and Padding Boundaries for SGE DMA transfers
authorHariprasad Shenai <hariprasad@chelsio.com>
Fri, 7 Nov 2014 11:36:30 +0000 (17:06 +0530)
committerDavid S. Miller <davem@davemloft.net>
Mon, 10 Nov 2014 19:15:03 +0000 (14:15 -0500)
T5 introduces the ability to have separate Packing and Padding Boundaries
for SGE DMA transfers from the chip to Host Memory. This change set takes
advantage of that to set up a smaller Padding Boundary to conserve PCI Link
and Memory Bandwidth with T5.

Signed-off-by: Hariprasad Shenai <hariprasad@chelsio.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
drivers/net/ethernet/chelsio/cxgb4/sge.c
drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
drivers/net/ethernet/chelsio/cxgb4/t4_regs.h
drivers/net/ethernet/chelsio/cxgb4vf/sge.c
drivers/net/ethernet/chelsio/cxgb4vf/t4vf_common.h
drivers/net/ethernet/chelsio/cxgb4vf/t4vf_hw.c

index 5e1b314..39f2b13 100644 (file)
@@ -2914,7 +2914,8 @@ static int t4_sge_init_hard(struct adapter *adap)
 int t4_sge_init(struct adapter *adap)
 {
        struct sge *s = &adap->sge;
-       u32 sge_control, sge_conm_ctrl;
+       u32 sge_control, sge_control2, sge_conm_ctrl;
+       unsigned int ingpadboundary, ingpackboundary;
        int ret, egress_threshold;
 
        /*
@@ -2924,8 +2925,31 @@ int t4_sge_init(struct adapter *adap)
        sge_control = t4_read_reg(adap, SGE_CONTROL);
        s->pktshift = PKTSHIFT_GET(sge_control);
        s->stat_len = (sge_control & EGRSTATUSPAGESIZE_MASK) ? 128 : 64;
-       s->fl_align = 1 << (INGPADBOUNDARY_GET(sge_control) +
-                           X_INGPADBOUNDARY_SHIFT);
+
+       /* T4 uses a single control field to specify both the PCIe Padding and
+        * Packing Boundary.  T5 introduced the ability to specify these
+        * separately.  The actual Ingress Packet Data alignment boundary
+        * within Packed Buffer Mode is the maximum of these two
+        * specifications.
+        */
+       ingpadboundary = 1 << (INGPADBOUNDARY_GET(sge_control) +
+                              X_INGPADBOUNDARY_SHIFT);
+       if (is_t4(adap->params.chip)) {
+               s->fl_align = ingpadboundary;
+       } else {
+               /* T5 has a different interpretation of one of the PCIe Packing
+                * Boundary values.
+                */
+               sge_control2 = t4_read_reg(adap, SGE_CONTROL2_A);
+               ingpackboundary = INGPACKBOUNDARY_G(sge_control2);
+               if (ingpackboundary == INGPACKBOUNDARY_16B_X)
+                       ingpackboundary = 16;
+               else
+                       ingpackboundary = 1 << (ingpackboundary +
+                                               INGPACKBOUNDARY_SHIFT_X);
+
+               s->fl_align = max(ingpadboundary, ingpackboundary);
+       }
 
        if (adap->flags & USING_SOFT_PARAMS)
                ret = t4_sge_init_soft(adap);
index a9d9d74..163a2a1 100644 (file)
@@ -3129,12 +3129,51 @@ int t4_fixup_host_params(struct adapter *adap, unsigned int page_size,
                     HOSTPAGESIZEPF6(sge_hps) |
                     HOSTPAGESIZEPF7(sge_hps));
 
-       t4_set_reg_field(adap, SGE_CONTROL,
-                        INGPADBOUNDARY_MASK |
-                        EGRSTATUSPAGESIZE_MASK,
-                        INGPADBOUNDARY(fl_align_log - 5) |
-                        EGRSTATUSPAGESIZE(stat_len != 64));
-
+       if (is_t4(adap->params.chip)) {
+               t4_set_reg_field(adap, SGE_CONTROL,
+                                INGPADBOUNDARY_MASK |
+                                EGRSTATUSPAGESIZE_MASK,
+                                INGPADBOUNDARY(fl_align_log - 5) |
+                                EGRSTATUSPAGESIZE(stat_len != 64));
+       } else {
+               /* T5 introduced the separation of the Free List Padding and
+                * Packing Boundaries.  Thus, we can select a smaller Padding
+                * Boundary to avoid uselessly chewing up PCIe Link and Memory
+                * Bandwidth, and use a Packing Boundary which is large enough
+                * to avoid false sharing between CPUs, etc.
+                *
+                * For the PCI Link, the smaller the Padding Boundary the
+                * better.  For the Memory Controller, a smaller Padding
+                * Boundary is better until we cross under the Memory Line
+                * Size (the minimum unit of transfer to/from Memory).  If we
+                * have a Padding Boundary which is smaller than the Memory
+                * Line Size, that'll involve a Read-Modify-Write cycle on the
+                * Memory Controller which is never good.  For T5 the smallest
+                * Padding Boundary which we can select is 32 bytes which is
+                * larger than any known Memory Controller Line Size so we'll
+                * use that.
+                *
+                * T5 has a different interpretation of the "0" value for the
+                * Packing Boundary.  This corresponds to 16 bytes instead of
+                * the expected 32 bytes.  We never have a Packing Boundary
+                * less than 32 bytes so we can't use that special value but
+                * on the other hand, if we wanted 32 bytes, the best we can
+                * really do is 64 bytes.
+               */
+               if (fl_align <= 32) {
+                       fl_align = 64;
+                       fl_align_log = 6;
+               }
+               t4_set_reg_field(adap, SGE_CONTROL,
+                                INGPADBOUNDARY_MASK |
+                                EGRSTATUSPAGESIZE_MASK,
+                                INGPADBOUNDARY(INGPCIEBOUNDARY_32B_X) |
+                                EGRSTATUSPAGESIZE(stat_len != 64));
+               t4_set_reg_field(adap, SGE_CONTROL2_A,
+                                INGPACKBOUNDARY_V(INGPACKBOUNDARY_M),
+                                INGPACKBOUNDARY_V(fl_align_log -
+                                                INGPACKBOUNDARY_SHIFT_X));
+       }
        /*
         * Adjust various SGE Free List Host Buffer Sizes.
         *
index a1024db..8d2de10 100644 (file)
@@ -95,6 +95,7 @@
 #define X_INGPADBOUNDARY_SHIFT 5
 
 #define SGE_CONTROL 0x1008
+#define SGE_CONTROL2_A         0x1124
 #define  DCASYSTYPE             0x00080000U
 #define  RXPKTCPLMODE_MASK      0x00040000U
 #define  RXPKTCPLMODE_SHIFT     18
 #define  PKTSHIFT_SHIFT         10
 #define  PKTSHIFT(x)            ((x) << PKTSHIFT_SHIFT)
 #define  PKTSHIFT_GET(x)       (((x) & PKTSHIFT_MASK) >> PKTSHIFT_SHIFT)
+#define  INGPCIEBOUNDARY_32B_X 0
 #define  INGPCIEBOUNDARY_MASK   0x00000380U
 #define  INGPCIEBOUNDARY_SHIFT  7
 #define  INGPCIEBOUNDARY(x)     ((x) << INGPCIEBOUNDARY_SHIFT)
 #define  INGPADBOUNDARY(x)      ((x) << INGPADBOUNDARY_SHIFT)
 #define  INGPADBOUNDARY_GET(x) (((x) & INGPADBOUNDARY_MASK) \
                                 >> INGPADBOUNDARY_SHIFT)
+#define  INGPACKBOUNDARY_16B_X 0
+#define  INGPACKBOUNDARY_SHIFT_X 5
+
+#define  INGPACKBOUNDARY_S     16
+#define  INGPACKBOUNDARY_M     0x7U
+#define  INGPACKBOUNDARY_V(x)  ((x) << INGPACKBOUNDARY_S)
+#define  INGPACKBOUNDARY_G(x)  (((x) >> INGPACKBOUNDARY_S) \
+                                & INGPACKBOUNDARY_M)
 #define  EGRPCIEBOUNDARY_MASK   0x0000000eU
 #define  EGRPCIEBOUNDARY_SHIFT  1
 #define  EGRPCIEBOUNDARY(x)     ((x) << EGRPCIEBOUNDARY_SHIFT)
index a18830d..cd5b789 100644 (file)
@@ -2436,6 +2436,7 @@ int t4vf_sge_init(struct adapter *adapter)
        u32 fl0 = sge_params->sge_fl_buffer_size[0];
        u32 fl1 = sge_params->sge_fl_buffer_size[1];
        struct sge *s = &adapter->sge;
+       unsigned int ingpadboundary, ingpackboundary;
 
        /*
         * Start by vetting the basic SGE parameters which have been set up by
@@ -2460,8 +2461,34 @@ int t4vf_sge_init(struct adapter *adapter)
        s->stat_len = ((sge_params->sge_control & EGRSTATUSPAGESIZE_MASK)
                        ? 128 : 64);
        s->pktshift = PKTSHIFT_GET(sge_params->sge_control);
-       s->fl_align = 1 << (INGPADBOUNDARY_GET(sge_params->sge_control) +
-                           SGE_INGPADBOUNDARY_SHIFT);
+
+       /* T4 uses a single control field to specify both the PCIe Padding and
+        * Packing Boundary.  T5 introduced the ability to specify these
+        * separately.  The actual Ingress Packet Data alignment boundary
+        * within Packed Buffer Mode is the maximum of these two
+        * specifications.  (Note that it makes no real practical sense to
+        * have the Pading Boudary be larger than the Packing Boundary but you
+        * could set the chip up that way and, in fact, legacy T4 code would
+        * end doing this because it would initialize the Padding Boundary and
+        * leave the Packing Boundary initialized to 0 (16 bytes).)
+        */
+       ingpadboundary = 1 << (INGPADBOUNDARY_GET(sge_params->sge_control) +
+                              X_INGPADBOUNDARY_SHIFT);
+       if (is_t4(adapter->params.chip)) {
+               s->fl_align = ingpadboundary;
+       } else {
+               /* T5 has a different interpretation of one of the PCIe Packing
+                * Boundary values.
+                */
+               ingpackboundary = INGPACKBOUNDARY_G(sge_params->sge_control2);
+               if (ingpackboundary == INGPACKBOUNDARY_16B_X)
+                       ingpackboundary = 16;
+               else
+                       ingpackboundary = 1 << (ingpackboundary +
+                                               INGPACKBOUNDARY_SHIFT_X);
+
+               s->fl_align = max(ingpadboundary, ingpackboundary);
+       }
 
        /*
         * Set up tasklet timers.
index 95df61d..b5c301d 100644 (file)
@@ -134,6 +134,7 @@ struct dev_params {
  */
 struct sge_params {
        u32 sge_control;                /* padding, boundaries, lengths, etc. */
+       u32 sge_control2;               /* T5: more of the same */
        u32 sge_host_page_size;         /* RDMA page sizes */
        u32 sge_queues_per_page;        /* RDMA queues/page */
        u32 sge_user_mode_limits;       /* limits for BAR2 user mode accesses */
index e984fdc..dc30d28 100644 (file)
@@ -468,6 +468,29 @@ int t4vf_get_sge_params(struct adapter *adapter)
        sge_params->sge_timer_value_2_and_3 = vals[5];
        sge_params->sge_timer_value_4_and_5 = vals[6];
 
+       /* T4 uses a single control field to specify both the PCIe Padding and
+        * Packing Boundary.  T5 introduced the ability to specify these
+        * separately with the Padding Boundary in SGE_CONTROL and and Packing
+        * Boundary in SGE_CONTROL2.  So for T5 and later we need to grab
+        * SGE_CONTROL in order to determine how ingress packet data will be
+        * laid out in Packed Buffer Mode.  Unfortunately, older versions of
+        * the firmware won't let us retrieve SGE_CONTROL2 so if we get a
+        * failure grabbing it we throw an error since we can't figure out the
+        * right value.
+        */
+       if (!is_t4(adapter->params.chip)) {
+               params[0] = (FW_PARAMS_MNEM(FW_PARAMS_MNEM_REG) |
+                            FW_PARAMS_PARAM_XYZ(SGE_CONTROL2_A));
+               v = t4vf_query_params(adapter, 1, params, vals);
+               if (v != FW_SUCCESS) {
+                       dev_err(adapter->pdev_dev,
+                               "Unable to get SGE Control2; "
+                               "probably old firmware.\n");
+                       return v;
+               }
+               sge_params->sge_control2 = vals[0];
+       }
+
        params[0] = (FW_PARAMS_MNEM(FW_PARAMS_MNEM_REG) |
                     FW_PARAMS_PARAM_XYZ(SGE_INGRESS_RX_THRESHOLD));
        v = t4vf_query_params(adapter, 1, params, vals);