USB: EHCI: use a bandwidth-allocation table
authorAlan Stern <stern@rowland.harvard.edu>
Fri, 11 Oct 2013 15:29:13 +0000 (11:29 -0400)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Fri, 11 Oct 2013 23:45:43 +0000 (16:45 -0700)
This patch significantly changes the scheduling code in ehci-hcd.
Instead of calculating the current bandwidth utilization by trudging
through the schedule and adding up the times used by the existing
transfers, we will now maintain a table holding the time used for each
of 64 microframes.  This will drastically speed up the bandwidth
computations.

In addition, it eliminates a theoretical bug.  An isochronous endpoint
may have bandwidth reserved even at times when it has no transfers
listed in the schedule.  The table will keep track of the reserved
bandwidth, whereas adding up entries in the schedule would miss it.

As a corollary, we can keep bandwidth reserved for endpoints even
when they aren't in active use.  Eventually the bandwidth will be
reserved when a new alternate setting is installed; for now the
endpoint's reservation takes place when its first URB is submitted.

A drawback of this approach is that transfers with an interval larger
than 64 microframes will have to be charged for bandwidth as though
the interval was 64.  In practice this shouldn't matter much;
transfers with longer intervals tend to be rather short anyway (things
like hubs or HID devices).

Another minor drawback is that we will keep track of two different
period and phase values: the actual ones and the ones used for
bandwidth allocation (which are limited to 64).  This adds only a
small amount of overhead: 3 bytes for each endpoint.

The patch also adds a new debugfs file named "bandwidth" to display
the information stored in the new table.

Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
drivers/usb/host/ehci-dbg.c
drivers/usb/host/ehci-hcd.c
drivers/usb/host/ehci-q.c
drivers/usb/host/ehci-sched.c
drivers/usb/host/ehci-sysfs.c
drivers/usb/host/ehci.h

index 09e5bc8..5bbfb1f 100644 (file)
@@ -334,6 +334,7 @@ static inline void remove_debug_files (struct ehci_hcd *bus) { }
 /* troubleshooting help: expose state in debugfs */
 
 static int debug_async_open(struct inode *, struct file *);
+static int debug_bandwidth_open(struct inode *, struct file *);
 static int debug_periodic_open(struct inode *, struct file *);
 static int debug_registers_open(struct inode *, struct file *);
 
@@ -347,6 +348,13 @@ static const struct file_operations debug_async_fops = {
        .release        = debug_close,
        .llseek         = default_llseek,
 };
+static const struct file_operations debug_bandwidth_fops = {
+       .owner          = THIS_MODULE,
+       .open           = debug_bandwidth_open,
+       .read           = debug_output,
+       .release        = debug_close,
+       .llseek         = default_llseek,
+};
 static const struct file_operations debug_periodic_fops = {
        .owner          = THIS_MODULE,
        .open           = debug_periodic_open,
@@ -525,6 +533,41 @@ static ssize_t fill_async_buffer(struct debug_buffer *buf)
        return strlen(buf->output_buf);
 }
 
+static ssize_t fill_bandwidth_buffer(struct debug_buffer *buf)
+{
+       struct ehci_hcd         *ehci;
+       unsigned                temp, size;
+       char                    *next;
+       unsigned                i;
+       u8                      *bw;
+
+       ehci = hcd_to_ehci(bus_to_hcd(buf->bus));
+       next = buf->output_buf;
+       size = buf->alloc_size;
+
+       *next = 0;
+
+       spin_lock_irq(&ehci->lock);
+
+       /* Dump the HS bandwidth table */
+       temp = scnprintf(next, size,
+                       "HS bandwidth allocation (us per microframe)\n");
+       size -= temp;
+       next += temp;
+       for (i = 0; i < EHCI_BANDWIDTH_SIZE; i += 8) {
+               bw = &ehci->bandwidth[i];
+               temp = scnprintf(next, size,
+                               "%2u: %4u%4u%4u%4u%4u%4u%4u%4u\n",
+                               i, bw[0], bw[1], bw[2], bw[3],
+                                       bw[4], bw[5], bw[6], bw[7]);
+               size -= temp;
+               next += temp;
+       }
+       spin_unlock_irq(&ehci->lock);
+
+       return next - buf->output_buf;
+}
+
 #define DBG_SCHED_LIMIT 64
 static ssize_t fill_periodic_buffer(struct debug_buffer *buf)
 {
@@ -919,6 +962,7 @@ static int debug_close(struct inode *inode, struct file *file)
 
        return 0;
 }
+
 static int debug_async_open(struct inode *inode, struct file *file)
 {
        file->private_data = alloc_buffer(inode->i_private, fill_async_buffer);
@@ -926,6 +970,14 @@ static int debug_async_open(struct inode *inode, struct file *file)
        return file->private_data ? 0 : -ENOMEM;
 }
 
+static int debug_bandwidth_open(struct inode *inode, struct file *file)
+{
+       file->private_data = alloc_buffer(inode->i_private,
+                       fill_bandwidth_buffer);
+
+       return file->private_data ? 0 : -ENOMEM;
+}
+
 static int debug_periodic_open(struct inode *inode, struct file *file)
 {
        struct debug_buffer *buf;
@@ -958,6 +1010,10 @@ static inline void create_debug_files (struct ehci_hcd *ehci)
                                                &debug_async_fops))
                goto file_error;
 
+       if (!debugfs_create_file("bandwidth", S_IRUGO, ehci->debug_dir, bus,
+                                               &debug_bandwidth_fops))
+               goto file_error;
+
        if (!debugfs_create_file("periodic", S_IRUGO, ehci->debug_dir, bus,
                                                &debug_periodic_fops))
                goto file_error;
index b2e4e4b..398e8fa 100644 (file)
@@ -956,6 +956,7 @@ rescan:
                        goto idle_timeout;
 
                /* BUG_ON(!list_empty(&stream->free_list)); */
+               reserve_release_iso_bandwidth(ehci, stream, -1);
                kfree(stream);
                goto done;
        }
@@ -982,6 +983,8 @@ idle_timeout:
                if (qh->clearing_tt)
                        goto idle_timeout;
                if (list_empty (&qh->qtd_list)) {
+                       if (qh->ps.bw_uperiod)
+                               reserve_release_intr_bandwidth(ehci, qh, -1);
                        qh_destroy(ehci, qh);
                        break;
                }
index 9bfaa21..db05bd8 100644 (file)
@@ -797,6 +797,8 @@ qh_make (
         * For control/bulk requests, the HC or TT handles these.
         */
        if (type == PIPE_INTERRUPT) {
+               unsigned        tmp;
+
                qh->ps.usecs = NS_TO_US(usb_calc_bus_time(USB_SPEED_HIGH,
                                is_input, 0,
                                hb_mult(maxp) * max_packet(maxp)));
@@ -816,6 +818,14 @@ qh_make (
                                urb->interval = ehci->periodic_size << 3;
                        }
                        qh->ps.period = urb->interval >> 3;
+
+                       /* period for bandwidth allocation */
+                       tmp = min_t(unsigned, EHCI_BANDWIDTH_SIZE,
+                                       1 << (urb->ep->desc.bInterval - 1));
+
+                       /* Allow urb->interval to override */
+                       qh->ps.bw_uperiod = min_t(unsigned, tmp, urb->interval);
+                       qh->ps.bw_period = qh->ps.bw_uperiod >> 3;
                } else {
                        int             think_time;
 
@@ -839,6 +849,15 @@ qh_make (
                        if (urb->interval > ehci->periodic_size)
                                urb->interval = ehci->periodic_size;
                        qh->ps.period = urb->interval;
+
+                       /* period for bandwidth allocation */
+                       tmp = min_t(unsigned, EHCI_BANDWIDTH_FRAMES,
+                                       urb->ep->desc.bInterval);
+                       tmp = rounddown_pow_of_two(tmp);
+
+                       /* Allow urb->interval to override */
+                       qh->ps.bw_period = min_t(unsigned, tmp, urb->interval);
+                       qh->ps.bw_uperiod = qh->ps.bw_period << 3;
                }
        }
 
index 1fafcda..790a64c 100644 (file)
@@ -106,75 +106,53 @@ static void periodic_unlink (struct ehci_hcd *ehci, unsigned frame, void *ptr)
                *hw_p = ehci->dummy->qh_dma;
 }
 
-/* how many of the uframe's 125 usecs are allocated? */
-static unsigned short
-periodic_usecs (struct ehci_hcd *ehci, unsigned frame, unsigned uframe)
+static void bandwidth_dbg(struct ehci_hcd *ehci, int sign, char *type,
+               struct ehci_per_sched *ps)
 {
-       __hc32                  *hw_p = &ehci->periodic [frame];
-       union ehci_shadow       *q = &ehci->pshadow [frame];
-       unsigned                usecs = 0;
-       struct ehci_qh_hw       *hw;
+       dev_dbg(&ps->udev->dev,
+                       "ep %02x: %s %s @ %u+%u (%u.%u+%u) [%u/%u us] mask %04x\n",
+                       ps->ep->desc.bEndpointAddress,
+                       (sign >= 0 ? "reserve" : "release"), type,
+                       (ps->bw_phase << 3) + ps->phase_uf, ps->bw_uperiod,
+                       ps->phase, ps->phase_uf, ps->period,
+                       ps->usecs, ps->c_usecs, ps->cs_mask);
+}
 
-       while (q->ptr) {
-               switch (hc32_to_cpu(ehci, Q_NEXT_TYPE(ehci, *hw_p))) {
-               case Q_TYPE_QH:
-                       hw = q->qh->hw;
-                       /* is it in the S-mask? */
-                       if (hw->hw_info2 & cpu_to_hc32(ehci, 1 << uframe))
-                               usecs += q->qh->ps.usecs;
-                       /* ... or C-mask? */
-                       if (hw->hw_info2 & cpu_to_hc32(ehci,
-                                       1 << (8 + uframe)))
-                               usecs += q->qh->ps.c_usecs;
-                       hw_p = &hw->hw_next;
-                       q = &q->qh->qh_next;
-                       break;
-               // case Q_TYPE_FSTN:
-               default:
-                       /* for "save place" FSTNs, count the relevant INTR
-                        * bandwidth from the previous frame
-                        */
-                       if (q->fstn->hw_prev != EHCI_LIST_END(ehci)) {
-                               ehci_dbg (ehci, "ignoring FSTN cost ...\n");
-                       }
-                       hw_p = &q->fstn->hw_next;
-                       q = &q->fstn->fstn_next;
-                       break;
-               case Q_TYPE_ITD:
-                       if (q->itd->hw_transaction[uframe])
-                               usecs += q->itd->stream->ps.usecs;
-                       hw_p = &q->itd->hw_next;
-                       q = &q->itd->itd_next;
-                       break;
-               case Q_TYPE_SITD:
-                       /* is it in the S-mask?  (count SPLIT, DATA) */
-                       if (q->sitd->hw_uframe & cpu_to_hc32(ehci,
-                                       1 << uframe)) {
-                               if (q->sitd->hw_fullspeed_ep &
-                                               cpu_to_hc32(ehci, 1<<31))
-                                       usecs += q->sitd->stream->ps.usecs;
-                               else    /* worst case for OUT start-split */
-                                       usecs += HS_USECS_ISO (188);
-                       }
+static void reserve_release_intr_bandwidth(struct ehci_hcd *ehci,
+               struct ehci_qh *qh, int sign)
+{
+       unsigned                start_uf;
+       unsigned                i, j, m;
+       int                     usecs = qh->ps.usecs;
+       int                     c_usecs = qh->ps.c_usecs;
 
-                       /* ... C-mask?  (count CSPLIT, DATA) */
-                       if (q->sitd->hw_uframe &
-                                       cpu_to_hc32(ehci, 1 << (8 + uframe))) {
-                               /* worst case for IN complete-split */
-                               usecs += q->sitd->stream->ps.c_usecs;
-                       }
+       if (qh->ps.phase == NO_FRAME)   /* Bandwidth wasn't reserved */
+               return;
+       start_uf = qh->ps.bw_phase << 3;
 
-                       hw_p = &q->sitd->hw_next;
-                       q = &q->sitd->sitd_next;
-                       break;
+       bandwidth_dbg(ehci, sign, "intr", &qh->ps);
+
+       if (sign < 0) {         /* Release bandwidth */
+               usecs = -usecs;
+               c_usecs = -c_usecs;
+       }
+
+       /* Entire transaction (high speed) or start-split (full/low speed) */
+       for (i = start_uf + qh->ps.phase_uf; i < EHCI_BANDWIDTH_SIZE;
+                       i += qh->ps.bw_uperiod)
+               ehci->bandwidth[i] += usecs;
+
+       /* Complete-split (full/low speed) */
+       if (qh->ps.c_usecs) {
+               /* NOTE: adjustments needed for FSTN */
+               for (i = start_uf; i < EHCI_BANDWIDTH_SIZE;
+                               i += qh->ps.bw_uperiod) {
+                       for ((j = 2, m = 1 << (j+8)); j < 8; (++j, m <<= 1)) {
+                               if (qh->ps.cs_mask & m)
+                                       ehci->bandwidth[i+j] += c_usecs;
+                       }
                }
        }
-#if defined(DEBUG) || defined(CONFIG_DYNAMIC_DEBUG)
-       if (usecs > ehci->uframe_periodic_max)
-               ehci_err (ehci, "uframe %d sched overrun: %d usecs\n",
-                       frame * 8 + uframe, usecs);
-#endif
-       return usecs;
 }
 
 /*-------------------------------------------------------------------------*/
@@ -524,8 +502,8 @@ static void qh_link_periodic(struct ehci_hcd *ehci, struct ehci_qh *qh)
        qh->exception = 0;
 
        /* update per-qh bandwidth for debugfs */
-       ehci_to_hcd(ehci)->self.bandwidth_allocated += qh->ps.period
-               ? ((qh->ps.usecs + qh->ps.c_usecs) / qh->ps.period)
+       ehci_to_hcd(ehci)->self.bandwidth_allocated += qh->ps.bw_period
+               ? ((qh->ps.usecs + qh->ps.c_usecs) / qh->ps.bw_period)
                : (qh->ps.usecs * 8);
 
        list_add(&qh->intr_node, &ehci->intr_qh_list);
@@ -562,8 +540,8 @@ static void qh_unlink_periodic(struct ehci_hcd *ehci, struct ehci_qh *qh)
                periodic_unlink (ehci, i, qh);
 
        /* update per-qh bandwidth for debugfs */
-       ehci_to_hcd(ehci)->self.bandwidth_allocated -= qh->ps.period
-               ? ((qh->ps.usecs + qh->ps.c_usecs) / qh->ps.period)
+       ehci_to_hcd(ehci)->self.bandwidth_allocated -= qh->ps.bw_period
+               ? ((qh->ps.usecs + qh->ps.c_usecs) / qh->ps.bw_period)
                : (qh->ps.usecs * 8);
 
        dev_dbg(&qh->ps.udev->dev,
@@ -693,11 +671,9 @@ static int check_period (
        struct ehci_hcd *ehci,
        unsigned        frame,
        unsigned        uframe,
-       unsigned        period,
+       unsigned        uperiod,
        unsigned        usecs
 ) {
-       int             claimed;
-
        /* complete split running into next frame?
         * given FSTN support, we could sometimes check...
         */
@@ -707,25 +683,10 @@ static int check_period (
        /* convert "usecs we need" to "max already claimed" */
        usecs = ehci->uframe_periodic_max - usecs;
 
-       /* we "know" 2 and 4 uframe intervals were rejected; so
-        * for period 0, check _every_ microframe in the schedule.
-        */
-       if (unlikely (period == 0)) {
-               do {
-                       for (uframe = 0; uframe < 7; uframe++) {
-                               claimed = periodic_usecs (ehci, frame, uframe);
-                               if (claimed > usecs)
-                                       return 0;
-                       }
-               } while ((frame += 1) < ehci->periodic_size);
-
-       /* just check the specified uframe, at that period */
-       } else {
-               do {
-                       claimed = periodic_usecs (ehci, frame, uframe);
-                       if (claimed > usecs)
-                               return 0;
-               } while ((frame += period) < ehci->periodic_size);
+       for (uframe += frame << 3; uframe < EHCI_BANDWIDTH_SIZE;
+                       uframe += uperiod) {
+               if (ehci->bandwidth[uframe] > usecs)
+                       return 0;
        }
 
        // success!
@@ -746,7 +707,7 @@ static int check_intr_schedule (
        if (qh->ps.c_usecs && uframe >= 6)      /* FSTN territory? */
                goto done;
 
-       if (!check_period(ehci, frame, uframe, qh->ps.period, qh->ps.usecs))
+       if (!check_period(ehci, frame, uframe, qh->ps.bw_uperiod, qh->ps.usecs))
                goto done;
        if (!qh->ps.c_usecs) {
                retval = 0;
@@ -755,21 +716,21 @@ static int check_intr_schedule (
        }
 
 #ifdef CONFIG_USB_EHCI_TT_NEWSCHED
-       if (tt_available(ehci, qh->ps.period, qh->ps.udev, frame, uframe,
+       if (tt_available(ehci, qh->ps.bw_period, qh->ps.udev, frame, uframe,
                                qh->ps.tt_usecs)) {
                unsigned i;
 
                /* TODO : this may need FSTN for SSPLIT in uframe 5. */
                for (i = uframe+2; i < 8 && i <= uframe+4; i++)
                        if (!check_period(ehci, frame, i,
-                                       qh->ps.period, qh->ps.c_usecs))
+                                       qh->ps.bw_uperiod, qh->ps.c_usecs))
                                goto done;
                        else
                                mask |= 1 << i;
 
                retval = 0;
 
-               *c_maskp = cpu_to_hc32(ehci, mask << 8);
+               *c_maskp = mask;
        }
 #else
        /* Make sure this tt's buffer is also available for CSPLITs.
@@ -780,15 +741,15 @@ static int check_intr_schedule (
         * one smart pass...
         */
        mask = 0x03 << (uframe + qh->gap_uf);
-       *c_maskp = cpu_to_hc32(ehci, mask << 8);
+       *c_maskp = mask;
 
        mask |= 1 << uframe;
-       if (tt_no_collision(ehci, qh->ps.period, qh->ps.udev, frame, mask)) {
+       if (tt_no_collision(ehci, qh->ps.bw_period, qh->ps.udev, frame, mask)) {
                if (!check_period(ehci, frame, uframe + qh->gap_uf + 1,
-                               qh->ps.period, qh->ps.c_usecs))
+                               qh->ps.bw_uperiod, qh->ps.c_usecs))
                        goto done;
                if (!check_period(ehci, frame, uframe + qh->gap_uf,
-                               qh->ps.period, qh->ps.c_usecs))
+                               qh->ps.bw_uperiod, qh->ps.c_usecs))
                        goto done;
                retval = 0;
        }
@@ -804,60 +765,57 @@ static int qh_schedule(struct ehci_hcd *ehci, struct ehci_qh *qh)
 {
        int             status;
        unsigned        uframe;
-       __hc32          c_mask;
-       unsigned        frame;          /* 0..(qh->period - 1), or NO_FRAME */
+       unsigned        c_mask;
        struct ehci_qh_hw       *hw = qh->hw;
 
        hw->hw_next = EHCI_LIST_END(ehci);
-       frame = qh->ps.phase;
 
        /* reuse the previous schedule slots, if we can */
-       if (frame != NO_FRAME) {
-               uframe = ffs(hc32_to_cpup(ehci, &hw->hw_info2) & QH_SMASK);
-               status = check_intr_schedule (ehci, frame, --uframe,
-                               qh, &c_mask);
-       } else {
-               uframe = 0;
-               c_mask = 0;
-               status = -ENOSPC;
+       if (qh->ps.phase != NO_FRAME) {
+               ehci_dbg(ehci, "reused qh %p schedule\n", qh);
+               return 0;
        }
 
+       uframe = 0;
+       c_mask = 0;
+       status = -ENOSPC;
+
        /* else scan the schedule to find a group of slots such that all
         * uframes have enough periodic bandwidth available.
         */
-       if (status) {
-               /* "normal" case, uframing flexible except with splits */
-               if (qh->ps.period) {
-                       int             i;
-
-                       for (i = qh->ps.period; status && i > 0; --i) {
-                               frame = ++ehci->random_frame % qh->ps.period;
-                               for (uframe = 0; uframe < 8; uframe++) {
-                                       status = check_intr_schedule (ehci,
-                                                       frame, uframe, qh,
-                                                       &c_mask);
-                                       if (status == 0)
-                                               break;
-                               }
+       /* "normal" case, uframing flexible except with splits */
+       if (qh->ps.bw_period) {
+               int             i;
+               unsigned        frame;
+
+               for (i = qh->ps.bw_period; status && i > 0; --i) {
+                       frame = ++ehci->random_frame & (qh->ps.bw_period - 1);
+                       for (uframe = 0; uframe < 8; uframe++) {
+                               status = check_intr_schedule(ehci,
+                                               frame, uframe, qh, &c_mask);
+                               if (status == 0)
+                                       break;
                        }
-
-               /* qh->ps.period == 0 means every uframe */
-               } else {
-                       frame = 0;
-                       status = check_intr_schedule (ehci, 0, 0, qh, &c_mask);
                }
-               if (status)
-                       goto done;
-               qh->ps.phase = frame;
 
-               /* reset S-frame and (maybe) C-frame masks */
-               hw->hw_info2 &= cpu_to_hc32(ehci, ~(QH_CMASK | QH_SMASK));
-               hw->hw_info2 |= qh->ps.period
-                       ? cpu_to_hc32(ehci, 1 << uframe)
-                       : cpu_to_hc32(ehci, QH_SMASK);
-               hw->hw_info2 |= c_mask;
-       } else
-               ehci_dbg (ehci, "reused qh %p schedule\n", qh);
+       /* qh->ps.bw_period == 0 means every uframe */
+       } else {
+               status = check_intr_schedule(ehci, 0, 0, qh, &c_mask);
+       }
+       if (status)
+               goto done;
+       qh->ps.phase = (qh->ps.period ? ehci->random_frame &
+                       (qh->ps.period - 1) : 0);
+       qh->ps.bw_phase = qh->ps.phase & (qh->ps.bw_period - 1);
+       qh->ps.phase_uf = uframe;
+       qh->ps.cs_mask = qh->ps.period ?
+                       (c_mask << 8) | (1 << uframe) :
+                       QH_SMASK;
+
+       /* reset S-frame and (maybe) C-frame masks */
+       hw->hw_info2 &= cpu_to_hc32(ehci, ~(QH_CMASK | QH_SMASK));
+       hw->hw_info2 |= cpu_to_hc32(ehci, qh->ps.cs_mask);
+       reserve_release_intr_bandwidth(ehci, qh, 1);
 
 done:
        return status;
@@ -969,6 +927,7 @@ iso_stream_alloc (gfp_t mem_flags)
                INIT_LIST_HEAD(&stream->td_list);
                INIT_LIST_HEAD(&stream->free_list);
                stream->next_uframe = NO_FRAME;
+               stream->ps.phase = NO_FRAME;
        }
        return stream;
 }
@@ -983,10 +942,10 @@ iso_stream_init (
        static const u8 smask_out [] = { 0x01, 0x03, 0x07, 0x0f, 0x1f, 0x3f };
 
        struct usb_device       *dev = urb->dev;
-       unsigned                interval = urb->interval;
        u32                     buf1;
        unsigned                epnum, maxp;
        int                     is_input;
+       unsigned                tmp;
 
        /*
         * this might be a "high bandwidth" highspeed endpoint,
@@ -1020,9 +979,17 @@ iso_stream_init (
                 */
                stream->ps.usecs = HS_USECS_ISO(maxp);
 
-               stream->bandwidth = stream->ps.usecs * 8 / interval;
-               stream->uperiod = interval;
-               stream->ps.period = interval >> 3;
+               /* period for bandwidth allocation */
+               tmp = min_t(unsigned, EHCI_BANDWIDTH_SIZE,
+                               1 << (urb->ep->desc.bInterval - 1));
+
+               /* Allow urb->interval to override */
+               stream->ps.bw_uperiod = min_t(unsigned, tmp, urb->interval);
+
+               stream->uperiod = urb->interval;
+               stream->ps.period = urb->interval >> 3;
+               stream->bandwidth = stream->ps.usecs * 8 /
+                               stream->ps.bw_uperiod;
 
        } else {
                u32             addr;
@@ -1047,20 +1014,28 @@ iso_stream_init (
                        addr |= 1 << 31;
                        stream->ps.c_usecs = stream->ps.usecs;
                        stream->ps.usecs = HS_USECS_ISO(1);
-                       stream->raw_mask = 1;
+                       stream->ps.cs_mask = 1;
 
                        /* c-mask as specified in USB 2.0 11.18.4 3.c */
                        tmp = (1 << (hs_transfers + 2)) - 1;
-                       stream->raw_mask |= tmp << (8 + 2);
+                       stream->ps.cs_mask |= tmp << (8 + 2);
                } else
-                       stream->raw_mask = smask_out [hs_transfers - 1];
+                       stream->ps.cs_mask = smask_out[hs_transfers - 1];
+
+               /* period for bandwidth allocation */
+               tmp = min_t(unsigned, EHCI_BANDWIDTH_FRAMES,
+                               1 << (urb->ep->desc.bInterval - 1));
 
+               /* Allow urb->interval to override */
+               stream->ps.bw_period = min_t(unsigned, tmp, urb->interval);
+               stream->ps.bw_uperiod = stream->ps.bw_period << 3;
+
+               stream->ps.period = urb->interval;
+               stream->uperiod = urb->interval << 3;
                stream->bandwidth = (stream->ps.usecs + stream->ps.c_usecs) /
-                               interval;
-               stream->uperiod = interval << 3;
-               stream->ps.period = interval;
+                               stream->ps.bw_period;
 
-               /* stream->splits gets created from raw_mask later */
+               /* stream->splits gets created from cs_mask later */
                stream->address = cpu_to_hc32(ehci, addr);
        }
 
@@ -1249,45 +1224,84 @@ itd_urb_transaction (
 
 /*-------------------------------------------------------------------------*/
 
+static void reserve_release_iso_bandwidth(struct ehci_hcd *ehci,
+               struct ehci_iso_stream *stream, int sign)
+{
+       unsigned                uframe;
+       unsigned                i, j;
+       unsigned                s_mask, c_mask, m;
+       int                     usecs = stream->ps.usecs;
+       int                     c_usecs = stream->ps.c_usecs;
+
+       if (stream->ps.phase == NO_FRAME)       /* Bandwidth wasn't reserved */
+               return;
+       uframe = stream->ps.bw_phase << 3;
+
+       bandwidth_dbg(ehci, sign, "iso", &stream->ps);
+
+       if (sign < 0) {         /* Release bandwidth */
+               usecs = -usecs;
+               c_usecs = -c_usecs;
+       }
+
+       if (!stream->splits) {          /* High speed */
+               for (i = uframe + stream->ps.phase_uf; i < EHCI_BANDWIDTH_SIZE;
+                               i += stream->ps.bw_uperiod)
+                       ehci->bandwidth[i] += usecs;
+
+       } else {                        /* Full speed */
+               s_mask = stream->ps.cs_mask;
+               c_mask = s_mask >> 8;
+
+               /* NOTE: adjustment needed for frame overflow */
+               for (i = uframe; i < EHCI_BANDWIDTH_SIZE;
+                               i += stream->ps.bw_uperiod) {
+                       for ((j = stream->ps.phase_uf, m = 1 << j); j < 8;
+                                       (++j, m <<= 1)) {
+                               if (s_mask & m)
+                                       ehci->bandwidth[i+j] += usecs;
+                               else if (c_mask & m)
+                                       ehci->bandwidth[i+j] += c_usecs;
+                       }
+               }
+       }
+}
+
 static inline int
 itd_slot_ok (
        struct ehci_hcd         *ehci,
-       u32                     mod,
-       u32                     uframe,
-       u8                      usecs,
-       u32                     period
+       struct ehci_iso_stream  *stream,
+       unsigned                uframe
 )
 {
-       uframe %= period;
-       do {
-               /* can't commit more than uframe_periodic_max usec */
-               if (periodic_usecs (ehci, uframe >> 3, uframe & 0x7)
-                               > (ehci->uframe_periodic_max - usecs))
-                       return 0;
+       unsigned                usecs;
+
+       /* convert "usecs we need" to "max already claimed" */
+       usecs = ehci->uframe_periodic_max - stream->ps.usecs;
 
-               /* we know urb->interval is 2^N uframes */
-               uframe += period;
-       } while (uframe < mod);
+       for (uframe &= stream->ps.bw_uperiod - 1; uframe < EHCI_BANDWIDTH_SIZE;
+                       uframe += stream->ps.bw_uperiod) {
+               if (ehci->bandwidth[uframe] > usecs)
+                       return 0;
+       }
        return 1;
 }
 
 static inline int
 sitd_slot_ok (
        struct ehci_hcd         *ehci,
-       u32                     mod,
        struct ehci_iso_stream  *stream,
-       u32                     uframe,
-       struct ehci_iso_sched   *sched,
-       u32                     period_uframes
+       unsigned                uframe,
+       struct ehci_iso_sched   *sched
 )
 {
-       u32                     mask, tmp;
-       u32                     frame, uf;
+       unsigned                mask, tmp;
+       unsigned                frame, uf;
 
-       mask = stream->raw_mask << (uframe & 7);
+       mask = stream->ps.cs_mask << (uframe & 7);
 
        /* for OUT, don't wrap SSPLIT into H-microframe 7 */
-       if (((stream->raw_mask & 0xff) << (uframe & 7)) >= (1 << 7))
+       if (((stream->ps.cs_mask & 0xff) << (uframe & 7)) >= (1 << 7))
                return 0;
 
        /* for IN, don't wrap CSPLIT into the next frame */
@@ -1295,7 +1309,7 @@ sitd_slot_ok (
                return 0;
 
        /* check bandwidth */
-       uframe %= period_uframes;
+       uframe &= stream->ps.bw_uperiod - 1;
        frame = uframe >> 3;
 
 #ifdef CONFIG_USB_EHCI_TT_NEWSCHED
@@ -1303,55 +1317,49 @@ sitd_slot_ok (
         * tt_available scheduling guarantees 10+% for control/bulk.
         */
        uf = uframe & 7;
-       if (!tt_available(ehci, period_uframes >> 3,
+       if (!tt_available(ehci, stream->ps.bw_period,
                        stream->ps.udev, frame, uf, stream->ps.tt_usecs))
                return 0;
 #else
        /* tt must be idle for start(s), any gap, and csplit.
         * assume scheduling slop leaves 10+% for control/bulk.
         */
-       if (!tt_no_collision(ehci, period_uframes >> 3,
+       if (!tt_no_collision(ehci, stream->ps.bw_period,
                        stream->ps.udev, frame, mask))
                return 0;
 #endif
 
-       /* this multi-pass logic is simple, but performance may
-        * suffer when the schedule data isn't cached.
-        */
        do {
-               u32             max_used;
-
-               frame = uframe >> 3;
-               uf = uframe & 7;
+               unsigned        max_used;
+               unsigned        i;
 
                /* check starts (OUT uses more than one) */
+               uf = uframe;
                max_used = ehci->uframe_periodic_max - stream->ps.usecs;
-               for (tmp = stream->raw_mask & 0xff; tmp; tmp >>= 1, uf++) {
-                       if (periodic_usecs (ehci, frame, uf) > max_used)
+               for (tmp = stream->ps.cs_mask & 0xff; tmp; tmp >>= 1, uf++) {
+                       if (ehci->bandwidth[uf] > max_used)
                                return 0;
                }
 
                /* for IN, check CSPLIT */
                if (stream->ps.c_usecs) {
-                       uf = uframe & 7;
                        max_used = ehci->uframe_periodic_max -
                                        stream->ps.c_usecs;
-                       do {
-                               tmp = 1 << uf;
-                               tmp <<= 8;
-                               if ((stream->raw_mask & tmp) == 0)
+                       uf = uframe & ~7;
+                       tmp = 1 << (2+8);
+                       for (i = (uframe & 7) + 2; i < 8; (++i, tmp <<= 1)) {
+                               if ((stream->ps.cs_mask & tmp) == 0)
                                        continue;
-                               if (periodic_usecs (ehci, frame, uf)
-                                               > max_used)
+                               if (ehci->bandwidth[uf+i] > max_used)
                                        return 0;
-                       } while (++uf < 8);
+                       }
                }
 
-               /* we know urb->interval is 2^N uframes */
-               uframe += period_uframes;
-       } while (uframe < mod);
+               uframe += stream->ps.bw_uperiod;
+       } while (uframe < EHCI_BANDWIDTH_SIZE);
 
-       stream->splits = cpu_to_hc32(ehci, stream->raw_mask << (uframe & 7));
+       stream->ps.cs_mask <<= uframe & 7;
+       stream->splits = cpu_to_hc32(ehci, stream->ps.cs_mask);
        return 1;
 }
 
@@ -1382,12 +1390,10 @@ iso_stream_schedule (
        struct ehci_iso_sched   *sched = urb->hcpriv;
        bool                    empty = list_empty(&stream->td_list);
 
-       period = urb->interval;
+       period = stream->uperiod;
        span = sched->span;
-       if (!stream->highspeed) {
-               period <<= 3;
+       if (!stream->highspeed)
                span <<= 3;
-       }
 
        now = ehci_read_frame_index(ehci) & (mod - 1);
 
@@ -1404,47 +1410,55 @@ iso_stream_schedule (
        base = ehci->last_iso_frame << 3;
        next = (next - base) & (mod - 1);
 
-       /*
-        * Need to schedule; when's the next (u)frame we could start?
-        * This is bigger than ehci->i_thresh allows; scheduling itself
-        * isn't free, the delay should handle reasonably slow cpus.  It
-        * can also help high bandwidth if the dma and irq loads don't
-        * jump until after the queue is primed.
-        */
+       /* Start a new isochronous stream? */
        if (unlikely(empty && !hcd_periodic_completion_in_progress(
                        ehci_to_hcd(ehci), urb->ep))) {
-               int done = 0;
 
-               start = (now & ~0x07) + SCHEDULING_DELAY;
+               /* Schedule the endpoint */
+               if (stream->ps.phase == NO_FRAME) {
+                       int             done = 0;
 
-               /* find a uframe slot with enough bandwidth.
-                * Early uframes are more precious because full-speed
-                * iso IN transfers can't use late uframes,
-                * and therefore they should be allocated last.
-                */
-               next = start;
-               start += period;
-               do {
-                       start--;
-                       /* check schedule: enough space? */
-                       if (stream->highspeed) {
-                               if (itd_slot_ok(ehci, mod, start,
-                                               stream->ps.usecs, period))
-                                       done = 1;
-                       } else {
-                               if ((start % 8) >= 6)
-                                       continue;
-                               if (sitd_slot_ok(ehci, mod, stream,
-                                               start, sched, period))
-                                       done = 1;
+                       start = (now & ~0x07) + SCHEDULING_DELAY;
+
+                       /* find a uframe slot with enough bandwidth.
+                        * Early uframes are more precious because full-speed
+                        * iso IN transfers can't use late uframes,
+                        * and therefore they should be allocated last.
+                        */
+                       next = start;
+                       start += period;
+                       do {
+                               start--;
+                               /* check schedule: enough space? */
+                               if (stream->highspeed) {
+                                       if (itd_slot_ok(ehci, stream, start))
+                                               done = 1;
+                               } else {
+                                       if ((start % 8) >= 6)
+                                               continue;
+                                       if (sitd_slot_ok(ehci, stream, start,
+                                                       sched))
+                                               done = 1;
+                               }
+                       } while (start > next && !done);
+
+                       /* no room in the schedule */
+                       if (!done) {
+                               ehci_dbg(ehci, "iso sched full %p", urb);
+                               status = -ENOSPC;
+                               goto fail;
                        }
-               } while (start > next && !done);
+                       stream->ps.phase = (start >> 3) &
+                                       (stream->ps.period - 1);
+                       stream->ps.bw_phase = stream->ps.phase &
+                                       (stream->ps.bw_period - 1);
+                       stream->ps.phase_uf = start & 7;
+                       reserve_release_iso_bandwidth(ehci, stream, 1);
+               }
 
-               /* no room in the schedule */
-               if (!done) {
-                       ehci_dbg(ehci, "iso sched full %p", urb);
-                       status = -ENOSPC;
-                       goto fail;
+               /* New stream is already scheduled; use the upcoming slot */
+               else {
+                       start = (stream->ps.phase << 3) + stream->ps.phase_uf;
                }
 
                start = (start - base) & (mod - 1);
@@ -1452,7 +1466,7 @@ iso_stream_schedule (
        }
 
        /*
-        * Typical case: reuse current schedule, stream is still active.
+        * Typical case: reuse current schedule, stream may still be active.
         * Hopefully there are no gaps from the host falling behind
         * (irq delays etc).  If there are, the behavior depends on
         * whether URB_ISO_ASAP is set.
index 14ced00..f6459df 100644 (file)
@@ -97,8 +97,7 @@ static ssize_t store_uframe_periodic_max(struct device *dev,
 {
        struct ehci_hcd         *ehci;
        unsigned                uframe_periodic_max;
-       unsigned                frame, uframe;
-       unsigned short          allocated_max;
+       unsigned                uframe;
        unsigned long           flags;
        ssize_t                 ret;
 
@@ -122,16 +121,14 @@ static ssize_t store_uframe_periodic_max(struct device *dev,
 
        /*
         * for request to decrease max periodic bandwidth, we have to check
-        * every microframe in the schedule to see whether the decrease is
-        * possible.
+        * to see whether the decrease is possible.
         */
        if (uframe_periodic_max < ehci->uframe_periodic_max) {
-               allocated_max = 0;
+               u8              allocated_max = 0;
 
-               for (frame = 0; frame < ehci->periodic_size; ++frame)
-                       for (uframe = 0; uframe < 7; ++uframe)
-                               allocated_max = max(allocated_max,
-                                                   periodic_usecs (ehci, frame, uframe));
+               for (uframe = 0; uframe < EHCI_BANDWIDTH_SIZE; ++uframe)
+                       allocated_max = max(allocated_max,
+                                       ehci->bandwidth[uframe]);
 
                if (allocated_max > uframe_periodic_max) {
                        ehci_info(ehci,
index e2b64c4..12504fb 100644 (file)
@@ -62,10 +62,16 @@ struct ehci_per_sched {
        struct usb_device       *udev;          /* access to the TT */
        struct usb_host_endpoint *ep;
        u16                     tt_usecs;       /* time on the FS/LS bus */
+       u16                     cs_mask;        /* C-mask and S-mask bytes */
        u16                     period;         /* actual period in frames */
        u16                     phase;          /* actual phase, frame part */
+       u8                      bw_phase;       /* same, for bandwidth
+                                                  reservation */
        u8                      phase_uf;       /* uframe part of the phase */
        u8                      usecs, c_usecs; /* times on the HS bus */
+       u8                      bw_uperiod;     /* period in microframes, for
+                                                  bandwidth reservation */
+       u8                      bw_period;      /* same, in frames */
 };
 #define NO_FRAME       29999                   /* frame not assigned yet */
 
@@ -245,6 +251,12 @@ struct ehci_hcd {                  /* one per controller */
        struct dentry           *debug_dir;
 #endif
 
+       /* bandwidth usage */
+#define EHCI_BANDWIDTH_SIZE    64
+#define EHCI_BANDWIDTH_FRAMES  (EHCI_BANDWIDTH_SIZE >> 3)
+       u8                      bandwidth[EHCI_BANDWIDTH_SIZE];
+                                               /* us allocated per uframe */
+
        /* platform-specific data -- must come last */
        unsigned long           priv[0] __aligned(sizeof(s64));
 };
@@ -469,7 +481,6 @@ struct ehci_iso_stream {
         */
        u16                     uperiod;        /* period in uframes */
        u16                     maxp;
-       u16                     raw_mask;
        unsigned                bandwidth;
 
        /* This is used to initialize iTD's hw_bufp fields */