From xiphmont@gmail.com Thu Sep 28 00:26:48 2006 Message-ID: <806dafc20609280026mb22944dvb14be785c44461@mail.gmail.com> Date: Thu, 28 Sep 2006 03:26:47 -0400 From: "Christopher \"Monty\" Montgomery" To: linux-usb-devel@lists.sourceforge.net Subject: [PATCH 12/15] USB: ehci-hcd: activate use of shadow budget for scheduling decisions Cc: greg@kroah.com, david-b@pacbell.net, xiphmont@gmail.com Content-Disposition: inline patch 12: Switches the existing scheduler mechanisms over to using the shadow budget for all scheduling decisions. Removes all unused bandwidth allocation logic from previous scheduler versions. Signed-off-by: Christopher "Monty" Montgomery Cc: David Brownell Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/Kconfig | 17 drivers/usb/host/ehci-q.c | 27 - drivers/usb/host/ehci-sched.c | 1033 +++++++++--------------------------------- drivers/usb/host/ehci.h | 14 4 files changed, 248 insertions(+), 843 deletions(-) --- gregkh-2.6.orig/drivers/usb/host/Kconfig +++ gregkh-2.6/drivers/usb/host/Kconfig @@ -41,23 +41,6 @@ config USB_EHCI_ROOT_HUB_TT This supports the EHCI implementation that's originally from ARC, and has since changed hands a few times. -config USB_EHCI_TT_NEWSCHED - bool "Improved Transaction Translator scheduling (EXPERIMENTAL)" - depends on USB_EHCI_HCD && EXPERIMENTAL - ---help--- - This changes the periodic scheduling code to fill more of the low - and full speed bandwidth available from the Transaction Translator - (TT) in USB 2.0 hubs. Without this, only one transfer will be - issued in each microframe, significantly reducing the number of - periodic low/fullspeed transfers possible. - - If you have multiple periodic low/fullspeed devices connected to a - highspeed USB hub which is connected to a highspeed USB Host - Controller, and some of those devices will not work correctly - (possibly due to "ENOSPC" or "-28" errors), say Y. - - If unsure, say N. - config USB_ISP116X_HCD tristate "ISP116X HCD support" depends on USB --- gregkh-2.6.orig/drivers/usb/host/ehci-q.c +++ gregkh-2.6/drivers/usb/host/ehci-q.c @@ -633,9 +633,9 @@ qh_make ( struct urb *urb, gfp_t flags ) { + int is_input, type = usb_pipetype (urb->pipe); struct ehci_qh *qh = ehci_qh_alloc (ehci, flags); u32 info1 = 0, info2 = 0; - int is_input, type; int maxp = 0; if (!qh) @@ -648,7 +648,6 @@ qh_make ( info1 |= usb_pipedevice (urb->pipe) << 0; is_input = usb_pipein (urb->pipe); - type = usb_pipetype (urb->pipe); maxp = usb_maxpacket (urb->dev, urb->pipe, !is_input); /* Compute interrupt scheduling parameters just once, and save. @@ -662,24 +661,12 @@ qh_make ( if (type == PIPE_INTERRUPT) { qh->usecs = NS_TO_US (usb_calc_bus_time (USB_SPEED_HIGH, is_input, 0, hb_mult (maxp) * max_packet (maxp))); - qh->start = NO_FRAME; if (urb->dev->speed == USB_SPEED_HIGH) { qh->c_usecs = 0; - qh->gap_uf = 0; + qh->tt_bytes = 0; qh->period = urb->interval; /* uFrame */ - /* XXX: remove once new budgeting code */ - if (urb->interval<8 && urb->interval != 1) { - /* NOTE interval 2 or 4 uframes could work. - * But interval 1 scheduling is simpler, and - * includes high bandwidth. - */ - dbg ("intr period %d uframes, NYET!", - urb->interval); - goto done; - } - } else { struct usb_tt *tt = urb->dev->tt; int think_time, think_bytes; @@ -699,11 +686,9 @@ qh_make ( /* expressed in full speed bytes */ qh->tt_bytes = think_bytes + maxp*8 + 98; - /* gap is f(FS/LS transfer times) */ - qh->gap_uf = 1 + usb_calc_bus_time (urb->dev->speed, - is_input, 0, maxp) / (125 * 1000); - /* FIXME this just approximates SPLIT/CSPLIT times */ + /* can't fix without going to ns granularity; + this is at least safe */ if (is_input) { // SPLIT, gap, CSPLIT+DATA qh->c_usecs = qh->usecs + HS_USECS (0); qh->usecs = HS_USECS (1); @@ -712,9 +697,6 @@ qh_make ( qh->c_usecs = HS_USECS (0); } - qh->tt_usecs = NS_TO_US (think_time + - usb_calc_bus_time (urb->dev->speed, - is_input, 0, max_packet (maxp))); qh->period = urb->interval << 3; /* uFrame */ } } @@ -778,7 +760,6 @@ qh_make ( break; default: dbg ("bogus dev %p speed %d", urb->dev, urb->dev->speed); -done: qh_put (qh); return NULL; } --- gregkh-2.6.orig/drivers/usb/host/ehci-sched.c +++ gregkh-2.6/drivers/usb/host/ehci-sched.c @@ -229,6 +229,74 @@ static void print_budget (struct ehci_hc print_budget_frame(ehci,i,insert,owner); } +static void print_schedule_frame (char *pre,struct ehci_hcd *ehci, int frame, + void *insert) +{ + union ehci_shadow *here = + &ehci->pshadow [frame & (ehci->periodic_size-1)]; + __le32 *hw_p = &ehci->periodic [frame & (ehci->periodic_size-1)]; + __le32 type = Q_NEXT_TYPE (*hw_p); + printk(KERN_INFO "%s @ %d: ",pre,frame & (ehci->periodic_size-1)); + while(here && here->ptr){ + if(here->ptr == insert) + printk(">>"); + + switch(type){ + case Q_TYPE_ITD: + { + struct ehci_itd *itd = here->itd; + printk("[ITD 0x%p : 0x%x]", + itd, + (unsigned)itd->hw_next); + } + break; + case Q_TYPE_SITD: + { + struct ehci_sitd *sitd = here->sitd; + if(sitd->hw_backpointer != EHCI_LIST_END) + printk("[DUMMY 0x%p]", + sitd); + else + printk("[SITD 0x%p]", + sitd); + } + break; + case Q_TYPE_FSTN: + { + struct ehci_fstn *fstn = here->fstn; + if(fstn->hw_prev == EHCI_LIST_END) + printk("[FSTN restore 0x%p]",fstn); + else + printk("[FSTN save 0x%p]", + fstn); + } + break; + case Q_TYPE_QH: + { + struct ehci_qh *qh = here->qh; + printk("[QH %d 0x%p]", + qh->period,qh); + } + break; + } + + if(here->ptr == insert) + printk("<<"); + + if(here == periodic_next_shadow (here, type)){ + printk("\nERROR: periodic schedule entry " + "linked to itself!\n"); + return; + } + + hw_p = here->hw_next; + here = periodic_next_shadow (here, type); + type = Q_NEXT_TYPE (*hw_p); + + } + printk("\n"); +} + /* find position of a specific entry in the periodic schedule (ie, * returns pointers such that we can update the predecessor's * linkage); here->ptr == NULL indicates the find failed. @@ -1211,348 +1279,6 @@ static int budget_schedule_next_frame (s /* end of shadow budget implementation */ /*-------------------------------------------------------------------------*/ - -/* how many of the uframe's 125 usecs are allocated? */ -static unsigned short -periodic_usecs (struct ehci_hcd *ehci, unsigned frame, unsigned uframe) -{ - __le32 *hw_p = &ehci->periodic [frame]; - union ehci_shadow *q = &ehci->pshadow [frame]; - unsigned usecs = 0; - - while (q->ptr) { - switch (Q_NEXT_TYPE (*hw_p)) { - case Q_TYPE_QH: - /* is it in the S-mask? */ - if (q->qh->hw_info2 & cpu_to_le32 (1 << uframe)) - usecs += q->qh->usecs; - /* ... or C-mask? */ - if (q->qh->hw_info2 & cpu_to_le32 (1 << (8 + uframe))) - usecs += q->qh->c_usecs; - hw_p = &q->qh->hw_next; - q = &q->qh->qh_next; - break; - // case Q_TYPE_FSTN: - default: - /* for "save place" FSTNs, count the relevant INTR - * bandwidth from the previous frame - */ - if (q->fstn->hw_prev != EHCI_LIST_END) { - ehci_dbg (ehci, "ignoring FSTN cost ...\n"); - } - hw_p = &q->fstn->hw_next; - q = &q->fstn->fstn_next; - break; - case Q_TYPE_ITD: - usecs += q->itd->usecs [uframe]; - hw_p = &q->itd->hw_next; - q = &q->itd->itd_next; - break; - case Q_TYPE_SITD: - /* is it in the S-mask? (count SPLIT, DATA) */ - if (q->sitd->hw_uframe & cpu_to_le32 (1 << uframe)) { - if (q->sitd->hw_fullspeed_ep & - __constant_cpu_to_le32 (1<<31)) - usecs += q->sitd->stream->usecs; - else /* worst case for OUT start-split */ - usecs += HS_USECS_ISO (188); - } - - /* ... C-mask? (count CSPLIT, DATA) */ - if (q->sitd->hw_uframe & - cpu_to_le32 (1 << (8 + uframe))) { - /* worst case for IN complete-split */ - usecs += q->sitd->stream->c_usecs; - } - - hw_p = &q->sitd->hw_next; - q = &q->sitd->sitd_next; - break; - } - } -#ifdef DEBUG - if (usecs > 100) - ehci_err (ehci, "uframe %d sched overrun: %d usecs\n", - frame * 8 + uframe, usecs); -#endif - return usecs; -} - -/*-------------------------------------------------------------------------*/ - -static int same_tt (struct usb_device *dev1, struct usb_device *dev2) -{ - if (!dev1->tt || !dev2->tt) - return 0; - if (dev1->tt != dev2->tt) - return 0; - if (dev1->tt->multi) - return dev1->ttport == dev2->ttport; - else - return 1; -} - -#ifdef CONFIG_USB_EHCI_TT_NEWSCHED - -/* Which uframe does the low/fullspeed transfer start in? - * - * The parameter is the mask of ssplits in "H-frame" terms - * and this returns the transfer start uframe in "B-frame" terms, - * which allows both to match, e.g. a ssplit in "H-frame" uframe 0 - * will cause a transfer in "B-frame" uframe 0. "B-frames" lag - * "H-frames" by 1 uframe. See the EHCI spec sec 4.5 and figure 4.7. - */ -static inline unsigned char tt_start_uframe(struct ehci_hcd *ehci, __le32 mask) -{ - unsigned char smask = QH_SMASK & le32_to_cpu(mask); - if (!smask) { - ehci_err(ehci, "invalid empty smask!\n"); - /* uframe 7 can't have bw so this will indicate failure */ - return 7; - } - return ffs(smask) - 1; -} - -static const unsigned char -max_tt_usecs[] = { 125, 125, 125, 125, 125, 125, 30, 0 }; - -/* carryover low/fullspeed bandwidth that crosses uframe boundries */ -static inline void carryover_tt_bandwidth(unsigned short tt_usecs[8]) -{ - int i; - for (i=0; i<7; i++) { - if (max_tt_usecs[i] < tt_usecs[i]) { - tt_usecs[i+1] += tt_usecs[i] - max_tt_usecs[i]; - tt_usecs[i] = max_tt_usecs[i]; - } - } -} - -/* How many of the tt's periodic downstream 1000 usecs are allocated? - * - * While this measures the bandwidth in terms of usecs/uframe, - * the low/fullspeed bus has no notion of uframes, so any particular - * low/fullspeed transfer can "carry over" from one uframe to the next, - * since the TT just performs downstream transfers in sequence. - * - * For example two seperate 100 usec transfers can start in the same uframe, - * and the second one would "carry over" 75 usecs into the next uframe. - */ -static void -periodic_tt_usecs ( - struct ehci_hcd *ehci, - struct usb_device *dev, - unsigned frame, - unsigned short tt_usecs[8] -) -{ - __le32 *hw_p = &ehci->periodic [frame]; - union ehci_shadow *q = &ehci->pshadow [frame]; - unsigned char uf; - - memset(tt_usecs, 0, 16); - - while (q->ptr) { - switch (Q_NEXT_TYPE(*hw_p)) { - case Q_TYPE_ITD: - hw_p = &q->itd->hw_next; - q = &q->itd->itd_next; - continue; - case Q_TYPE_QH: - if (same_tt(dev, q->qh->dev)) { - uf = tt_start_uframe(ehci, q->qh->hw_info2); - tt_usecs[uf] += q->qh->tt_usecs; - } - hw_p = &q->qh->hw_next; - q = &q->qh->qh_next; - continue; - case Q_TYPE_SITD: - if (same_tt(dev, q->sitd->urb->dev)) { - uf = tt_start_uframe(ehci, q->sitd->hw_uframe); - tt_usecs[uf] += q->sitd->stream->tt_usecs; - } - hw_p = &q->sitd->hw_next; - q = &q->sitd->sitd_next; - continue; - // case Q_TYPE_FSTN: - default: - ehci_dbg(ehci, - "ignoring periodic frame %d FSTN\n", frame); - hw_p = &q->fstn->hw_next; - q = &q->fstn->fstn_next; - } - } - - carryover_tt_bandwidth(tt_usecs); - - if (max_tt_usecs[7] < tt_usecs[7]) - ehci_err(ehci, "frame %d tt sched overrun: %d usecs\n", - frame, tt_usecs[7] - max_tt_usecs[7]); -} - -/* - * Return true if the device's tt's downstream bus is available for a - * periodic transfer of the specified length (usecs), starting at the - * specified frame/uframe. Note that (as summarized in section 11.19 - * of the usb 2.0 spec) TTs can buffer multiple transactions for each - * uframe. - * - * The uframe parameter is when the fullspeed/lowspeed transfer - * should be executed in "B-frame" terms, which is the same as the - * highspeed ssplit's uframe (which is in "H-frame" terms). For example - * a ssplit in "H-frame" 0 causes a transfer in "B-frame" 0. - * See the EHCI spec sec 4.5 and fig 4.7. - * - * This checks if the full/lowspeed bus, at the specified starting uframe, - * has the specified bandwidth available, according to rules listed - * in USB 2.0 spec section 11.18.1 fig 11-60. - * - * This does not check if the transfer would exceed the max ssplit - * limit of 16, specified in USB 2.0 spec section 11.18.4 requirement #4, - * since proper scheduling limits ssplits to less than 16 per uframe. - */ -static int tt_available ( - struct ehci_hcd *ehci, - unsigned period, - struct usb_device *dev, - unsigned frame, - unsigned uframe, - u16 usecs -) -{ - if ((period == 0) || (uframe >= 7)) /* error */ - return 0; - - for (; frame < ehci->periodic_size; frame += period) { - unsigned short tt_usecs[8]; - - periodic_tt_usecs (ehci, dev, frame, tt_usecs); - - ehci_vdbg(ehci, "tt frame %d check %d usecs start uframe %d in" - " schedule %d/%d/%d/%d/%d/%d/%d/%d\n", - frame, usecs, uframe, - tt_usecs[0], tt_usecs[1], tt_usecs[2], tt_usecs[3], - tt_usecs[4], tt_usecs[5], tt_usecs[6], tt_usecs[7]); - - if (max_tt_usecs[uframe] <= tt_usecs[uframe]) { - ehci_vdbg(ehci, "frame %d uframe %d fully scheduled\n", - frame, uframe); - return 0; - } - - /* special case for isoc transfers larger than 125us: - * the first and each subsequent fully used uframe - * must be empty, so as to not illegally delay - * already scheduled transactions - */ - if (125 < usecs) { - int ufs = (usecs / 125) - 1; - int i; - for (i = uframe; i < (uframe + ufs) && i < 8; i++) - if (0 < tt_usecs[i]) { - ehci_vdbg(ehci, - "multi-uframe xfer can't fit " - "in frame %d uframe %d\n", - frame, i); - return 0; - } - } - - tt_usecs[uframe] += usecs; - - carryover_tt_bandwidth(tt_usecs); - - /* fail if the carryover pushed bw past the last uframe's limit */ - if (max_tt_usecs[7] < tt_usecs[7]) { - ehci_vdbg(ehci, - "tt unavailable usecs %d frame %d uframe %d\n", - usecs, frame, uframe); - return 0; - } - } - - return 1; -} - -#else - -/* return true iff the device's transaction translator is available - * for a periodic transfer starting at the specified frame, using - * all the uframes in the mask. - */ -static int tt_no_collision ( - struct ehci_hcd *ehci, - unsigned period, - struct usb_device *dev, - unsigned frame, - u32 uf_mask -) -{ - if (period == 0) /* error */ - return 0; - - /* note bandwidth wastage: split never follows csplit - * (different dev or endpoint) until the next uframe. - * calling convention doesn't make that distinction. - */ - for (; frame < ehci->periodic_size; frame += period) { - union ehci_shadow here; - __le32 type; - - here = ehci->pshadow [frame]; - type = Q_NEXT_TYPE (ehci->periodic [frame]); - while (here.ptr) { - switch (type) { - case Q_TYPE_ITD: - type = Q_NEXT_TYPE (here.itd->hw_next); - here = here.itd->itd_next; - continue; - case Q_TYPE_QH: - if (same_tt (dev, here.qh->dev)) { - u32 mask; - - mask = le32_to_cpu (here.qh->hw_info2); - /* "knows" no gap is needed */ - mask |= mask >> 8; - if (mask & uf_mask) - break; - } - type = Q_NEXT_TYPE (here.qh->hw_next); - here = here.qh->qh_next; - continue; - case Q_TYPE_SITD: - if (same_tt (dev, here.sitd->urb->dev)) { - u16 mask; - - mask = le32_to_cpu (here.sitd - ->hw_uframe); - /* FIXME assumes no gap for IN! */ - mask |= mask >> 8; - if (mask & uf_mask) - break; - } - type = Q_NEXT_TYPE (here.sitd->hw_next); - here = here.sitd->sitd_next; - continue; - // case Q_TYPE_FSTN: - default: - ehci_dbg (ehci, - "periodic frame %d bogus type %d\n", - frame, type); - } - - /* collision or error */ - return 0; - } - } - - /* no collision */ - return 1; -} - -#endif /* CONFIG_USB_EHCI_TT_NEWSCHED */ - -/*-------------------------------------------------------------------------*/ /* enable_periodic - Activate the periodic schedule * * @ehci: pointer to ehci host controller device structure. @@ -1584,7 +1310,7 @@ static int enable_periodic (struct ehci_ /* make sure ehci_work scans these */ ehci->next_uframe = readl (&ehci->regs->frame_index) - % (ehci->periodic_size << 3); + & ((ehci->periodic_size << 3)-1); } if(ehci->periodic_sched<2) @@ -1712,15 +1438,11 @@ static void periodic_qh_deschedule(struc periodic_qh_unlink_frame (ehci, i, qh); /* update per-qh bandwidth for usbfs */ + /* XXX Hm... now that we have the budget, this should be moved + to where the budget is released */ ehci_to_hcd(ehci)->self.bandwidth_allocated -= (qh->usecs + qh->c_usecs) / qh->period; - dev_dbg (&qh->dev->dev, - "unlink qh%d-%04x/%p start %d [%d/%d us]\n", - qh->period, - le32_to_cpup (&qh->hw_info2) & (QH_CMASK | QH_SMASK), - qh, qh->start, qh->usecs, qh->c_usecs); - /* qh->qh_next still "live" to HC */ qh->qh_state = QH_STATE_UNLINK; qh->qh_next.ptr = NULL; @@ -1756,50 +1478,50 @@ static void periodic_qh_deschedule(struc */ static int periodic_qh_link (struct ehci_hcd *ehci, struct ehci_qh *qh) { - unsigned i; - unsigned period = _period_to_level(qh->period); + unsigned i=0; + unsigned level = _period_to_level(qh->period); - dev_dbg (&qh->dev->dev, - "link qh%d-%04x/%p start %d [%d/%d us]\n", - period, le32_to_cpup (&qh->hw_info2) & (QH_CMASK | QH_SMASK), - qh, qh->start, qh->usecs, qh->c_usecs); - - for (i = qh->start; i < ehci->periodic_size; i += period) { - union ehci_shadow *prev = &ehci->pshadow [i]; - __le32 *hw_p = &ehci->periodic [i]; - union ehci_shadow here = *prev; - __le32 type = 0; - - /* skip the iso nodes at list head */ - while (here.ptr) { - type = Q_NEXT_TYPE (*hw_p); - if (type == Q_TYPE_QH) - break; - prev = periodic_next_shadow (prev, type); - hw_p = &here.qh->hw_next; - here = *prev; + /* set masks */ + qh->hw_info2 &= + __constant_cpu_to_le32(~(QH_CMASK | QH_SMASK)); + qh->hw_info2 |= + qh->budget->smask | + ((qh->budget->cmask&0xff)<<8) | + (((qh->budget->cmask>>8)&0xff)<<8); + + while(1){ + __le32 *hw_p; + union ehci_shadow *here; + struct ehci_shadow_budget *budget; + + i = budget_schedule_next (ehci, i, qh, &budget, &here, &hw_p); + + if(i >= ehci->periodic_size)break; + + if(i<0){ + ehci_err(ehci, + "QH slot not in budget where expected"); + i += level; + continue; } - /* sorting each branch by period (slow-->fast) - * enables sharing interior tree nodes - */ - while (here.ptr && qh != here.qh) { - if (period > _period_to_level(here.qh->period)) - break; - prev = &here.qh->qh_next; - hw_p = &here.qh->hw_next; - here = *prev; - } - /* link in this qh, unless some earlier pass did that */ - if (qh != here.qh) { - qh->qh_next = here; - if (here.qh) + /* already linked in? */ + if(here->ptr != qh){ + + /* link right */ + qh->qh_next = *here; qh->hw_next = *hw_p; wmb (); - prev->qh = qh; + + /* link left; fully live */ + here->qh = qh; *hw_p = QH_NEXT (qh->qh_dma); + } + + i += level; } + qh->qh_state = QH_STATE_LINKED; qh_get (qh); @@ -1811,131 +1533,18 @@ static int periodic_qh_link (struct ehci return enable_periodic (ehci); } -/*-------------------------------------------------------------------------*/ - -static int check_period ( - struct ehci_hcd *ehci, - unsigned frame, - unsigned uframe, - unsigned period, - unsigned usecs -) { - int claimed; - int level = _period_to_level(period); - - /* complete split running into next frame? - * given FSTN support, we could sometimes check... - */ - if (uframe >= 8) - return 0; - - /* - * 80% periodic == 100 usec/uframe available - * convert "usecs we need" to "max already claimed" - */ - usecs = 100 - usecs; - - /* we "know" 2 and 4 uframe intervals were rejected; so - * for period 1, check _every_ microframe in the schedule. - */ - if (unlikely (period == 1)) { - do { - for (uframe = 0; uframe < 7; uframe++) { - claimed = periodic_usecs (ehci, frame, uframe); - if (claimed > usecs) - return 0; - } - } while ((frame += 1) < ehci->periodic_size); - - /* just check the specified uframe, at that period */ - } else { - do { - claimed = periodic_usecs (ehci, frame, uframe); - if (claimed > usecs) - return 0; - } while ((frame += level) < ehci->periodic_size); - } - - // success! - return 1; -} - -static int check_intr_schedule ( - struct ehci_hcd *ehci, - unsigned frame, - unsigned uframe, - const struct ehci_qh *qh, - __le32 *c_maskp -) -{ - int retval = -ENOSPC; - u8 mask = 0; - - if (qh->c_usecs && uframe >= 6) /* FSTN territory? */ - goto done; - - if (!check_period (ehci, frame, uframe, qh->period, qh->usecs)) - goto done; - if (!qh->c_usecs) { - retval = 0; - *c_maskp = 0; - goto done; - } - -#ifdef CONFIG_USB_EHCI_TT_NEWSCHED - if (tt_available (ehci, _period_to_level(qh->period), - qh->dev, frame, uframe, qh->tt_usecs)) { - unsigned i; - - /* TODO : this may need FSTN for SSPLIT in uframe 5. */ - for (i=uframe+1; i<8 && iperiod, qh->c_usecs)) - goto done; - else - mask |= 1 << i; - - retval = 0; - - *c_maskp = cpu_to_le32 (mask << 8); - } -#else - /* Make sure this tt's buffer is also available for CSPLITs. - * We pessimize a bit; probably the typical full speed case - * doesn't need the second CSPLIT. +/* periodic_qh_schedule - budget the passed in QH if it has not + * already been budgeted, then add QH to the hardware schedule * - * NOTE: both SPLIT and CSPLIT could be checked in just - * one smart pass... + * @ehci: pointer to ehci host controller device structure + * @qh: QH to link */ - mask = 0x03 << (uframe + qh->gap_uf); - *c_maskp = cpu_to_le32 (mask << 8); - - mask |= 1 << uframe; - if (tt_no_collision (ehci, qh->period, qh->dev, frame, mask)) { - if (!check_period (ehci, frame, uframe + qh->gap_uf + 1, - qh->period, qh->c_usecs)) - goto done; - if (!check_period (ehci, frame, uframe + qh->gap_uf, - qh->period, qh->c_usecs)) - goto done; - retval = 0; - } -#endif -done: - return retval; -} - static int periodic_qh_schedule (struct ehci_hcd *ehci, struct ehci_qh *qh) { int status; - unsigned uframe; - __le32 c_mask; - unsigned frame; /* 0..(qh->period - 1), or NO_FRAME */ - int period = _period_to_level(qh->period); qh_refresh(ehci, qh); qh->hw_next = EHCI_LIST_END; - frame = qh->start; /* budget the qh if not already budgeted */ if(!qh->budget){ @@ -1946,55 +1555,15 @@ static int periodic_qh_schedule (struct return status; } - /* reuse the previous schedule slots, if we can */ - if (frame < period) { - uframe = ffs (le32_to_cpup (&qh->hw_info2) & QH_SMASK); - status = check_intr_schedule (ehci, frame, --uframe, - qh, &c_mask); - } else { - uframe = 0; - c_mask = 0; - status = -ENOSPC; - } - - /* else scan the schedule to find a group of slots such that all - * uframes have enough periodic bandwidth available. - */ - if (status) { - /* "normal" case, uframing flexible except with splits */ - if (qh->period > 1) { - frame = period - 1; - do { - for (uframe = 0; uframe < 8; uframe++) { - status = check_intr_schedule (ehci, - frame, uframe, qh, - &c_mask); - if (status == 0) - break; + /* schedule the qh */ + if(sched_verbose){ + ehci_info(ehci, "Adding new QH %p to schedule:\n", + qh); } - } while (status && frame--); - /* qh->period == 1 means every uframe */ - } else { - frame = 0; - status = check_intr_schedule (ehci, 0, 0, qh, &c_mask); - } - if (status) - goto done; - qh->start = frame; + status = periodic_qh_link (ehci, qh); - /* reset S-frame and (maybe) C-frame masks */ - qh->hw_info2 &= __constant_cpu_to_le32(~(QH_CMASK | QH_SMASK)); - qh->hw_info2 |= (qh->period>1) - ? cpu_to_le32 (1 << uframe) - : __constant_cpu_to_le32 (QH_SMASK); - qh->hw_info2 |= c_mask; - } else - ehci_dbg (ehci, "reused qh %p schedule\n", qh); - /* stuff into the periodic schedule */ - status = periodic_qh_link (ehci, qh); -done: if(status){ budget_unlink_entries_by_owner(ehci,qh); qh->budget = NULL; @@ -2017,8 +1586,8 @@ static int intr_submit ( struct usb_host_endpoint *ep, struct urb *urb, struct list_head *qtd_list, - gfp_t mem_flags -) { + gfp_t mem_flags) +{ unsigned epnum; unsigned long flags; struct ehci_qh *qh; @@ -2133,10 +1702,8 @@ iso_stream_init ( struct usb_device *dev, int pipe, unsigned interval -) + ) { - static const u8 smask_out [] = { 0x01, 0x03, 0x07, 0x0f, 0x1f, 0x3f }; - u32 buf1; unsigned epnum, maxp; int is_input; @@ -2180,7 +1747,6 @@ iso_stream_init ( } else { u32 addr; int think_time, think_bytes; - int hs_transfers; addr = dev->ttport << 24; if (!ehci_is_TDI(ehci) @@ -2194,13 +1760,7 @@ iso_stream_init ( think_time = dev->tt ? dev->tt->think_time : 0; think_bytes = (think_time+665)/666; - stream->tt_usecs = NS_TO_US (think_time + usb_calc_bus_time ( - dev->speed, is_input, 1, maxp)); - hs_transfers = max (1u, (maxp + 187) / 188); - if (is_input) { - u32 tmp; - addr |= 1 << 31; stream->c_usecs = stream->usecs + HS_USECS_ISO (1); @@ -2218,12 +1778,6 @@ iso_stream_init ( /* expressed in full speed bytes */ stream->tt_bytes = think_bytes + maxp*8 + 98; } - - stream->raw_mask = 1; - - /* c-mask as specified in USB 2.0 11.18.4 3.c */ - tmp = (1 << (hs_transfers + 2)) - 1; - stream->raw_mask |= tmp << (8 + 2); } else { /* out */ @@ -2241,13 +1795,9 @@ iso_stream_init ( /* expressed in full speed bytes */ stream->tt_bytes = think_bytes + maxp*8 + 98; } - - stream->raw_mask = smask_out [hs_transfers - 1]; } bandwidth = stream->usecs + stream->c_usecs; bandwidth /= 1 << (interval + 2); - - /* stream->splits gets created from raw_mask later */ stream->address = cpu_to_le32 (addr); stream->interval = interval<<3; } @@ -2276,8 +1826,6 @@ iso_stream_put(struct ehci_hcd *ehci, st these drivers when it's not necessary to do so. This saves substantial overhead in that case.*/ if (stream->refcount == 0) { - int is_in; - // BUG_ON (!list_empty(&stream->td_list)); while (!list_empty (&stream->free_list)) { @@ -2303,20 +1851,9 @@ iso_stream_put(struct ehci_hcd *ehci, st sitd->sitd_dma); } } - - is_in = (stream->bEndpointAddress & USB_DIR_IN) ? 0x10 : 0; stream->bEndpointAddress &= 0x0f; stream->ep->hcpriv = NULL; - if (stream->rescheduled) { - ehci_info (ehci, "ep%d%s-iso rescheduled " - "%lu times in %lu seconds\n", - stream->bEndpointAddress, is_in ? "in" : "out", - stream->rescheduled, - ((jiffies - stream->start)/HZ) - ); - } - /* eliminate this stream from the shadow budget */ if(sched_verbose) ehci_info(ehci, "Releasing bandwidth for ISO %p\n", @@ -2395,119 +1932,8 @@ iso_stream_find (struct ehci_hcd *ehci, return stream; } -/*-------------------------------------------------------------------------*/ - -static inline int -itd_slot_ok ( - struct ehci_hcd *ehci, - u32 mod, - u32 uframe, - u8 usecs, - u32 period -) -{ - uframe %= period; - do { - /* can't commit more than 80% periodic == 100 usec */ - if (periodic_usecs (ehci, uframe >> 3, uframe & 0x7) - > (100 - usecs)) - return 0; - - /* we know urb->interval is 2^N uframes */ - uframe += period; - } while (uframe < mod); - return 1; -} - -static inline int -sitd_slot_ok ( - struct ehci_hcd *ehci, - u32 mod, - struct ehci_iso_stream *stream, - u32 uframe, - struct ehci_iso_sched *sched, - u32 period_uframes -) -{ - u32 mask, tmp; - u32 frame, uf; - - mask = stream->raw_mask << (uframe & 7); - - /* for IN, don't wrap CSPLIT into the next frame */ - if (mask & ~0xffff) - return 0; - - /* this multi-pass logic is simple, but performance may - * suffer when the schedule data isn't cached. - */ - - /* check bandwidth */ - uframe %= period_uframes; - do { - u32 max_used; - - frame = uframe >> 3; - uf = uframe & 7; - -#ifdef CONFIG_USB_EHCI_TT_NEWSCHED - /* The tt's fullspeed bus bandwidth must be available. - * tt_available scheduling guarantees 10+% for control/bulk. - */ - /* period_uframes << 3 had always been wrong */ - if (!tt_available (ehci, period_uframes >> 3, - stream->udev, frame, uf, stream->tt_usecs)) - return 0; -#else - /* tt must be idle for start(s), any gap, and csplit. - * assume scheduling slop leaves 10+% for control/bulk. - */ - if (!tt_no_collision (ehci, period_uframes >> 3, - stream->udev, frame, mask)) - return 0; -#endif - - /* check starts (OUT uses more than one) */ - max_used = 100 - stream->usecs; - for (tmp = stream->raw_mask & 0xff; tmp; tmp >>= 1, uf++) { - if (periodic_usecs (ehci, frame, uf) > max_used) - return 0; - } - - /* for IN, check CSPLIT */ - if (stream->c_usecs) { - uf = uframe & 7; - max_used = 100 - stream->c_usecs; - do { - tmp = 1 << uf; - tmp <<= 8; - if ((stream->raw_mask & tmp) == 0) - continue; - if (periodic_usecs (ehci, frame, uf) - > max_used) - return 0; - } while (++uf < 8); - } - - /* we know urb->interval is 2^N uframes */ - uframe += period_uframes; - } while (uframe < mod); - - stream->splits = cpu_to_le32(stream->raw_mask << (uframe & 7)); - return 1; -} - -/* - * This scheduler plans almost as far into the future as it has actual - * periodic schedule slots. (Affected by TUNE_FLS, which defaults to - * "as small as possible" to be cache-friendlier.) That limits the size - * transfers you can stream reliably; avoid more than 64 msec per urb. - * Also avoid queue depths of less than ehci's worst irq latency (affected - * by the per-urb URB_NO_INTERRUPT hint, the log2_irq_thresh module parameter, - * and other factors); or more than about 230 msec total (for portability, - * given EHCI_TUNE_FLS and the slop). Or, write a smarter scheduler! - */ - +/* Too large now? Budgeting is already done by the time we compute + using the slop */ #define SCHEDULE_SLOP 10 /* frames */ /* iso_stream_schedule - schedules an iso request transaction into the @@ -2524,7 +1950,6 @@ static int iso_stream_schedule ( struct ehci_iso_stream *stream ) { - u32 now, start, max, period; int status; unsigned mod = ehci->periodic_size << 3; struct ehci_iso_sched *sched = urb->hcpriv; @@ -2556,80 +1981,74 @@ static int iso_stream_schedule ( status=enable_periodic (ehci); if(status)goto fail; + stream->next_uframe = -1; stream->budget_state = 1; } - now = readl (&ehci->regs->frame_index) % mod; + if(stream->next_uframe == -1){ + int now, start; - /* when's the last uframe this urb could start? */ - max = now + mod; + /* initial schedule; when's the next (u)frame we could + * start? this is bigger than ehci->i_thresh allows; + * scheduling itself isn't free, the slop should handle + * reasonably slow cpus. it can also help high bandwidth if + * the dma and irq loads don't jump until after the queue is + * primed. + */ - /* typical case: reuse current schedule. stream is still active, - * and no gaps from host falling behind (irq delays etc) - */ - if (likely (!list_empty (&stream->td_list))) { - start = stream->next_uframe; - if (start < now) - start += mod; - if (likely ((start + sched->span) < max)) - goto ready; - /* else fell behind; someday, try to reschedule */ - status = -EL2NSYNC; + now = readl (&ehci->regs->frame_index) & (mod-1); + start = ((now + SCHEDULE_SLOP * 8) & (mod-1)) >> 3; + + stream->next_uframe = + (budget_schedule_next_frame (ehci, start, stream) & + (ehci->periodic_size-1))* 8; + + if(stream->next_uframe == -1){ + /* should be impossible */ + ehci_err(ehci,"Couldn't find budgeted slot " + "after scheduling\n"); + status = -ENOSPC; goto fail; } - /* need to schedule; when's the next (u)frame we could start? - * this is bigger than ehci->i_thresh allows; scheduling itself - * isn't free, the slop should handle reasonably slow cpus. it - * can also help high bandwidth if the dma and irq loads don't - * jump until after the queue is primed. - */ - start = SCHEDULE_SLOP * 8 + (now & ~0x07); - start %= mod; - stream->next_uframe = start; - - /* NOTE: assumes URB_ISO_ASAP, to limit complexity/bugs */ - period = stream->interval; - - /* find a uframe slot with enough bandwidth */ - for (; start < (stream->next_uframe + period); start++) { - int enough_space; - - /* check schedule: enough space? */ - if (stream->highspeed) - enough_space = itd_slot_ok (ehci, mod, start, - stream->usecs, period); - else { - if ((start % 8) >= 6) - continue; - enough_space = sitd_slot_ok (ehci, mod, stream, - start, sched, period); - } - - /* schedule it here if there's enough bandwidth */ - if (enough_space) { - stream->next_uframe = start % mod; - goto ready; + /* report high speed start in uframes; full speed, in frames */ + urb->start_frame = stream->next_uframe; + if (!stream->highspeed) + urb->start_frame >>= 3; + + }else{ + + /* use the next uframe field as already marked */ + /* did we fall behind? Throw a sync error if so. */ + /* in sync == tail < now < head */ + + int now = (readl (&ehci->regs->frame_index) & (mod-1)); + int tail = stream->next_uframe - stream->depth - + stream->interval - SCHEDULE_SLOP * 8; + int head = stream->next_uframe; + + if(tail < 0){ + tail += mod; + head += mod; + if(now < tail) now += mod; + } + + if (now < tail || now >= head){ + if(sched_verbose) + ehci_err(ehci,"Loss of sync!\n"); + stream->next_uframe = -1; + status = -EL2NSYNC; + goto fail; } } - /* no room in the schedule */ - ehci_dbg (ehci, "iso %ssched full %p (now %d max %d)\n", - list_empty (&stream->td_list) ? "" : "re", - urb, now, max); - status = -ENOSPC; + return 0; fail: iso_sched_free (stream, sched); urb->hcpriv = NULL; return status; -ready: - /* report high speed start in uframes; full speed, in frames */ - urb->start_frame = stream->next_uframe; - if (!stream->highspeed) - urb->start_frame >>= 3; - return 0; } /*-------------------------------------------------------------------------*/ @@ -2690,7 +2109,7 @@ itd_sched_init ( * @stream: iso stream * @ehci: pointer to ehci host controller device structure * @urb: current request - * mem_flags: flags to use for in-kernel memory allocation + * @mem_flags: flags to use for in-kernel memory allocation */ static int itd_urb_transaction ( @@ -2759,8 +2178,6 @@ itd_urb_transaction ( return 0; } -/*-------------------------------------------------------------------------*/ - /* itd_init - performs master initilization of the ITD fields that are * drawn from the stream structure. * @@ -2825,18 +2242,32 @@ itd_patch ( * * @ehci: pointer to ehci host controller device structure * @frame: shadow/hardware schedule frame + * @stream: stream to which ITD belongs * @itd: ITD to link into schedule */ -static inline void -itd_link (struct ehci_hcd *ehci, unsigned frame, struct ehci_itd *itd) -{ - /* always prepend ITD/SITD ... only QH tree is order-sensitive */ - itd->itd_next = ehci->pshadow [frame]; - itd->hw_next = ehci->periodic [frame]; - ehci->pshadow [frame].itd = itd; +static inline void itd_link (struct ehci_hcd *ehci, + unsigned frame, + struct ehci_iso_stream *stream, + struct ehci_itd *itd) +{ + __le32 *hw_p; + struct ehci_shadow_budget *b; + union ehci_shadow *here; + + if( budget_schedule_next(ehci,frame,stream,&b,&here,&hw_p) != frame){ + /* should be impossible unless we've hit a bug or + * hardware failure */ + ehci_err(ehci,"itd not where it should be " + "in the periodic budget"); + return; + } + itd->frame = frame; + itd->itd_next = *here; + itd->hw_next = *hw_p; + here->itd = itd; wmb (); - ehci->periodic [frame] = cpu_to_le32 (itd->itd_dma) | Q_TYPE_ITD; + *hw_p = cpu_to_le32 (itd->itd_dma) | Q_TYPE_ITD; } /* itd_link_urb - link urb's ITDs into the hardware schedule acording @@ -2860,18 +2291,11 @@ itd_link_urb ( struct ehci_iso_sched *iso_sched = urb->hcpriv; struct ehci_itd *itd; - next_uframe = stream->next_uframe % mod; + next_uframe = stream->next_uframe & (mod-1); if (unlikely (list_empty(&stream->td_list))) { ehci_to_hcd(ehci)->self.bandwidth_allocated += stream->bandwidth; - ehci_vdbg (ehci, - "schedule devp %s ep%d%s-iso period %d start %d.%d\n", - urb->dev->devpath, stream->bEndpointAddress & 0x0f, - (stream->bEndpointAddress & USB_DIR_IN) ? "in" : "out", - urb->interval, - next_uframe >> 3, next_uframe & 0x7); - stream->start = jiffies; } ehci_to_hcd(ehci)->self.bandwidth_isoc_reqs++; @@ -2905,10 +2329,12 @@ itd_link_urb ( /* link completed itds into the schedule */ if (((next_uframe >> 3) != frame) || packet == urb->number_of_packets) { - itd_link (ehci, frame % ehci->periodic_size, itd); + itd_link (ehci, frame & (ehci->periodic_size-1), + stream, itd); itd = NULL; } } + stream->next_uframe = next_uframe; /* don't need that schedule data any more */ @@ -2930,11 +2356,8 @@ itd_link_urb ( * @itd: completed ITD * @regs: ptrace registers */ -static unsigned -itd_complete ( - struct ehci_hcd *ehci, - struct ehci_itd *itd -) { +static unsigned itd_complete(struct ehci_hcd *ehci, struct ehci_itd *itd) +{ struct urb *urb = itd->urb; struct usb_iso_packet_descriptor *desc; u32 t; @@ -2997,13 +2420,16 @@ itd_complete ( ehci_to_hcd(ehci)->self.bandwidth_isoc_reqs--; + /* XXX Hm... now that we have the budget, this should be moved + to where the budget is released */ if (unlikely (list_empty (&stream->td_list))) { ehci_to_hcd(ehci)->self.bandwidth_allocated -= stream->bandwidth; ehci_vdbg (ehci, "deschedule devp %s ep%d%s-iso\n", dev->devpath, stream->bEndpointAddress & 0x0f, - (stream->bEndpointAddress & USB_DIR_IN) ? "in" : "out"); + (stream->bEndpointAddress & USB_DIR_IN) ? + "in" : "out"); } iso_stream_put (ehci, stream); @@ -3077,7 +2503,7 @@ sitd_sched_init ( struct ehci_iso_sched *iso_sched, struct ehci_iso_stream *stream, struct urb *urb -) + ) { unsigned i; dma_addr_t dma = urb->transfer_dma; @@ -3135,7 +2561,7 @@ sitd_urb_transaction ( struct ehci_hcd *ehci, struct urb *urb, gfp_t mem_flags -) + ) { struct ehci_sitd *sitd; dma_addr_t sitd_dma; @@ -3201,22 +2627,27 @@ sitd_urb_transaction ( * schedule. * * @stream: stream into which this request will be queued + * @b: shadow budget entry holding this sITD's bandwidth allocation + * information * @sitd: New sITD that is to be added to the shadow/hardware schedule * @uf: iso stream packet */ static inline void sitd_patch ( struct ehci_iso_stream *stream, + struct ehci_shadow_budget *b, struct ehci_sitd *sitd, - struct ehci_iso_packet *uf + struct ehci_iso_packet *uf, + dma_addr_t back ) { u64 bufp = uf->bufp; sitd->hw_next = EHCI_LIST_END; sitd->hw_fullspeed_ep = stream->address; - sitd->hw_uframe = stream->splits; sitd->hw_results = uf->transaction; + + sitd->hw_uframe = cpu_to_le32(b->smask | ((b->cmask&0xff)<<8)); sitd->hw_backpointer = EHCI_LIST_END; sitd->hw_buf [0] = cpu_to_le32 (bufp); @@ -3246,6 +2677,9 @@ static inline void sitd_link (struct ehc int index) { struct ehci_sitd *sitd; + __le32 *hw_p; + struct ehci_shadow_budget *b; + union ehci_shadow *here; /* get SITD from already allocated list */ BUG_ON (list_empty (&sched->td_list)); @@ -3255,18 +2689,30 @@ static inline void sitd_link (struct ehc sitd->stream = iso_stream_get (stream); sitd->urb = usb_get_urb (urb); + /* find the proper insertion point */ + if( budget_schedule_next(ehci,frame,stream,&b,&here,&hw_p) + != frame){ + /* should be impossible unless we've hit a bug or + * hardware failure */ + ehci_err(ehci,"sitd not where it should be in " + "the periodic budget"); + return; + } + /* set the sitd fields */ - sitd_patch (stream, sitd, &sched->packet [index]); + sitd_patch (stream, b, sitd, &sched->packet [index], 0); - /* note: sitd ordering could matter (CSPLIT then SSPLIT) */ sitd->frame = frame; sitd->index = index; - sitd->sitd_next = ehci->pshadow [frame]; - sitd->hw_next = ehci->periodic [frame]; - ehci->pshadow [frame].sitd = sitd; + sitd->sitd_next = *here; + sitd->hw_next = *hw_p; + sitd->budget = b; + + here->sitd = sitd; wmb (); - ehci->periodic [frame] = cpu_to_le32 (sitd->sitd_dma) | Q_TYPE_SITD; + /* make sitd reacahable from periodic schedule */ + *hw_p = cpu_to_le32 (sitd->sitd_dma) | Q_TYPE_SITD; } /* sitd_link_urb - link urb's sITDs into the hardware schedule @@ -3306,7 +2752,7 @@ sitd_link_urb ( stream->depth += stream->interval; } - stream->next_uframe = next_uframe % mod; + stream->next_uframe = next_uframe & (mod-1); /* don't need that schedule data any more */ /* also releases unused sITDs that weren't needed for spanning */ @@ -3337,7 +2783,7 @@ sitd_complete ( struct urb *urb = sitd->urb; struct usb_iso_packet_descriptor *desc; u32 t; - int urb_index = -1; + int urb_index; struct ehci_iso_stream *stream = sitd->stream; struct usb_device *dev; @@ -3384,13 +2830,16 @@ sitd_complete ( ehci_to_hcd(ehci)->self.bandwidth_isoc_reqs--; + /* XXX Hm... now that we have the budget, this should be moved + to where the budget is released */ if (list_empty (&stream->td_list)) { ehci_to_hcd(ehci)->self.bandwidth_allocated -= stream->bandwidth; ehci_vdbg (ehci, "deschedule devp %s ep%d%s-iso\n", dev->devpath, stream->bEndpointAddress & 0x0f, - (stream->bEndpointAddress & USB_DIR_IN) ? "in" : "out"); + (stream->bEndpointAddress & USB_DIR_IN) ? + "in" : "out"); } iso_stream_put (ehci, stream); --- gregkh-2.6.orig/drivers/usb/host/ehci.h +++ gregkh-2.6/drivers/usb/host/ehci.h @@ -485,13 +485,9 @@ struct ehci_qh { /* periodic schedule info */ u8 usecs; /* intr bandwidth */ - u8 gap_uf; /* uframes split/csplit gap */ u8 c_usecs; /* ... split completion bw */ - u16 tt_usecs; /* tt downstream bandwidth */ u16 tt_bytes; /* tt downstream bandwidth */ - unsigned short period; /* polling interval */ - unsigned short start; /* where polling starts */ -#define NO_FRAME ((unsigned short)~0) /* pick new start */ + unsigned short period; /* polling interval; uFrame */ struct usb_device *dev; /* access to TT */ struct ehci_shadow_budget *budget; /* pointer to budget placeholder */ } __attribute__ ((aligned (32))); @@ -537,10 +533,7 @@ struct ehci_iso_stream { struct usb_host_endpoint *ep; /* output of (re)scheduling */ - unsigned long start; /* jiffies */ - unsigned long rescheduled; int next_uframe; - __le32 splits; int budget_state; /* the rest is derived from the endpoint descriptor, @@ -549,7 +542,6 @@ struct ehci_iso_stream { */ u16 interval; u8 usecs, c_usecs; - u16 tt_usecs; u16 tt_bytes; u16 raw_mask; unsigned bandwidth; @@ -598,7 +590,7 @@ struct ehci_itd { /* any/all hw_transactions here may be used by that urb */ unsigned frame; /* where scheduled */ unsigned pg; - unsigned index[8]; /* in urb->iso_frame_desc */ + int index[8]; /* in urb->iso_frame_desc */ u8 usecs[8]; struct ehci_shadow_budget *budget; /* pointer to budget placeholder */ } __attribute__ ((aligned (32))); @@ -643,7 +635,7 @@ struct ehci_sitd { struct ehci_iso_stream *stream; /* endpoint's queue */ struct list_head sitd_list; /* list of stream's sitds */ unsigned frame; - unsigned index; + int index; struct ehci_shadow_budget *budget; /* pointer to budget placeholder */ } __attribute__ ((aligned (32)));