IB/hfi1: Get rid of divide in pio buffer allocator
authorSebastian Sanchez <sebastian.sanchez@intel.com>
Tue, 25 Oct 2016 20:12:28 +0000 (13:12 -0700)
committerDoug Ledford <dledford@redhat.com>
Tue, 15 Nov 2016 21:37:27 +0000 (16:37 -0500)
The div instruction shows costly in profiles.

Reviewed-by: Mike Marciniszyn <mike.marciniszyn@intel.com>
Signed-off-by: Sebastian Sanchez <sebastian.sanchez@intel.com>
Signed-off-by: Dennis Dalessandro <dennis.dalessandro@intel.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
drivers/infiniband/hw/hfi1/pio.c
drivers/infiniband/hw/hfi1/pio.h

index 385e4dcf2cd3c2e088db42c0f9e4db8917954639..516fac38d31ee33b47806cefd1203a6342edbe4d 100644 (file)
@@ -1249,6 +1249,7 @@ int sc_enable(struct send_context *sc)
        sc->free = 0;
        sc->alloc_free = 0;
        sc->fill = 0;
+       sc->fill_wrap = 0;
        sc->sr_head = 0;
        sc->sr_tail = 0;
        sc->flags = 0;
@@ -1392,7 +1393,7 @@ struct pio_buf *sc_buffer_alloc(struct send_context *sc, u32 dw_len,
        unsigned long flags;
        unsigned long avail;
        unsigned long blocks = dwords_to_blocks(dw_len);
-       unsigned long start_fill;
+       u32 fill_wrap;
        int trycount = 0;
        u32 head, next;
 
@@ -1435,8 +1436,11 @@ retry:
        head = sc->sr_head;
 
        /* "allocate" the buffer */
-       start_fill = sc->fill;
        sc->fill += blocks;
+       fill_wrap = sc->fill_wrap;
+       sc->fill_wrap += blocks;
+       if (sc->fill_wrap >= sc->credits)
+               sc->fill_wrap = sc->fill_wrap - sc->credits;
 
        /*
         * Fill the parts that the releaser looks at before moving the head.
@@ -1465,8 +1469,7 @@ retry:
        spin_unlock_irqrestore(&sc->alloc_lock, flags);
 
        /* finish filling in the buffer outside the lock */
-       pbuf->start = sc->base_addr + ((start_fill % sc->credits)
-                                                       * PIO_BLOCK_SIZE);
+       pbuf->start = sc->base_addr + fill_wrap * PIO_BLOCK_SIZE;
        pbuf->size = sc->credits * PIO_BLOCK_SIZE;
        pbuf->end = sc->base_addr + pbuf->size;
        pbuf->block_count = blocks;
index bd19507b6bb0e1dba70e68a48286ea43f8913c1f..498b548055e00804f6dc7ea7c72476b36d4428ca 100644 (file)
@@ -119,6 +119,7 @@ struct send_context {
        unsigned long fill;             /* official alloc count */
        unsigned long alloc_free;       /* copy of free (less cache thrash) */
        u32 __percpu *buffers_allocated;/* count of buffers allocated */
+       u32 fill_wrap;                  /* tracks fill within ring */
        /* releaser fields */
        spinlock_t release_lock ____cacheline_aligned_in_smp;
        u32 sr_tail;                    /* shadow ring tail */