From xiphmont@gmail.com Thu Sep 28 00:29:44 2006 Message-ID: <806dafc20609280029x534cc3dvf89fe286a38758c7@mail.gmail.com> Date: Thu, 28 Sep 2006 03:29:41 -0400 From: "Christopher \"Monty\" Montgomery" To: linux-usb-devel@lists.sourceforge.net Subject: [PATCH 14/15] USB: ehci-hcd: add sITD frame spanning support Cc: greg@kroah.com, david-b@pacbell.net, xiphmont@gmail.com Content-Disposition: inline patch 14: Adds low-level scheduler mechanisms for linking, unlinking, manipulating and maintaining spanning ('dummy') sITDs such that split-iso transactions may span H-frames. Turns on shadow budget logic to allow/use sITD spanning in budgeting. Signed-off-by: Christopher "Monty" Montgomery Cc: David Brownell Signed-off-by: Greg Kroah-Hartman --- --- drivers/usb/host/ehci-sched.c | 201 ++++++++++++++++++++++++++++++++++++++---- drivers/usb/host/ehci.h | 1 2 files changed, 183 insertions(+), 19 deletions(-) --- gregkh-2.6.orig/drivers/usb/host/ehci-sched.c +++ gregkh-2.6/drivers/usb/host/ehci-sched.c @@ -61,10 +61,10 @@ * For isochronous, an "iso_stream" head serves the same role as a QH. * It keeps track of every ITD (or SITD) that's scheduled. * - * This scheduler supports and aggressively uses both FSTNs in order - * to allow interrupt QH transfers to span H-frame boundaries. This - * support is necessary for efficient FS bus bandwidth usage through a - * 2.0 hub's TT mechanism. + * This scheduler supports and aggressively uses both FSTNs and sITD + * backpointers in order to allow interrupt QH and sITD transfers to + * span H-frame boundaries. This support is necessary for efficient + * FS bus bandwidth usage through a 2.0 hub's TT mechanism. * * The transfer ordering in the shadow schedule looks like: * @@ -105,7 +105,21 @@ * schedule in the order specified by the budget. FSTN positioning is * static; it follows the recommendations of the EHCI 1.0 spec and * places one save-state FSTN at the lowest level in each branch of - * the INTR tree and one restore FSTN at the head of level one. + * the INTR tree and one restore FSTN at the head of level one. Dummy + * sITDs are positioned in the order of the original sITDs of the + * preceeding frame. + * + * The scheduler does not currently implement any inter- or + * intra-frame rebalancing; that is, the budget is not adjusted to + * fill in 'holes' or shuffle transfer slots to optimize remaining + * bandwidth when endpoints are removed. QHs will reuse holes at the + * moment, isoch transfers will not. This can also lead to starving + * QHs depending on the order in which the QHs are budgeted because + * the tree arrangement of the QHs places ordering restrictions upon + * QHs that do not exist for isoch transfers. Starvation is unlikely + * (and far less likely than for the old scheduler), but it's still + * possible to be unable to schedule a new QH before actually running + * out of FS bandwidth. * */ @@ -124,6 +138,14 @@ MODULE_PARM_DESC (use_fstn, "use FSTNs: allow use of FSTN to schedule full speed INTR " "transfers across frame boundaries; default is 1 (on)"); +/* enable/disable use of dummy sITDs */ +static unsigned use_dummy_sitds = 1; +module_param (use_dummy_sitds, uint, S_IRUGO); +MODULE_PARM_DESC (use_dummy_sitds, + "use dummy sITDs: allow use of 'dummy' sITDs to " + "schedule schedule full speed ISOCH transfers across frame " + "boundaries; default is 1 (on)"); + /* set limit on periodic load per TT uframe */ static unsigned fs_bytes_per_uframe = 188; module_param (fs_bytes_per_uframe, uint, S_IRUGO); @@ -480,6 +502,68 @@ static void *periodic_shadow_owner(struc } } +static union ehci_shadow * +periodic_translate_budget_dummy (struct ehci_hcd *ehci, + unsigned frame, + struct ehci_shadow_budget *translate, + __le32 **hw_p) +{ + struct ehci_shadow_budget *budget_prev = + ehci->budget [((frame-1) & BUDGET_SLOT_MASK)]; + union ehci_shadow *shadow = + &ehci->pshadow [frame & (ehci->periodic_size-1)]; + __le32 type; + + *hw_p = &ehci->periodic [frame & (ehci->periodic_size-1)]; + type = Q_NEXT_TYPE (**hw_p); + + /* Everything in the shadow schedule will be there in the budget + [except for FSTNs] */ + while (shadow->ptr){ + struct ehci_sitd *sitd; + if(type != Q_TYPE_SITD) return shadow; + + sitd = shadow->sitd; + if(sitd->hw_backpointer != EHCI_LIST_END) return shadow; + + /* advance budget until the current budget + placeholder is the one belonging to current + shadow entry or until we hit the stopping + point. */ + + while(budget_prev && + budget_prev != translate && + budget_prev->owner.ptr != + periodic_shadow_owner(ehci,shadow->ptr,type)){ + budget_prev = budget_prev->next; + } + if(budget_prev == translate){ + return shadow; /* a match! */ + } + + /* falling off the end of the budget indicates + * an internal logic error */ + if(budget_prev == NULL){ + ehci_err(ehci, + "schedule contains " + "unbudgeted transaction\n"); + return NULL; + } + + /* advance shadow and hardware schedule by one, then + loop to test this position */ + *hw_p = shadow->hw_next; + shadow = periodic_next_shadow (shadow, type); + type = Q_NEXT_TYPE (**hw_p); + + } + + /* nothing queued or fell off the end looking for stopping point. + Append to end. */ + return shadow; + +} + /* periodic_translate_budget - Finds the position in the * hardware/shadow schedule to insert a new hardware schedule entry * based on the position in the shadow budget of the passed in budget @@ -969,10 +1053,11 @@ static int budget_calc_fs_frame(struct e cmask = 0; } - /* don't allow spanning yet */ - if(cmask&(~0xff)) + /* have we disallowed frame spanning? */ + if(cmask&(~0xff)){ + if(use_dummy_sitds==0) return -1; - + } } /* FS/LS ssplit in uFrame 6 or 7 is illegal */ @@ -2782,7 +2867,7 @@ sitd_sched_init ( unsigned i; dma_addr_t dma = urb->transfer_dma; - /* how many frames are needed for these transfers */ + /* how many uframes are needed for these transfers */ iso_sched->span = urb->number_of_packets * stream->interval; /* figure out per-frame sitd fields that we'll need later @@ -2851,12 +2936,10 @@ sitd_urb_transaction ( /* allocate/init sITDs */ spin_lock_irqsave (&ehci->lock, flags); - for (i = 0; i < urb->number_of_packets; i++) { - /* NOTE: for now, we don't try to handle wraparound cases - * for IN (using sitd->hw_backpointer, like a FSTN), which - * means we never need two sitds for full speed packets. - */ + /* note: frame spanning may require two SITDs per transaction; + allocate all we may need, and free the ones we don't later. */ + for (i = 0; i < urb->number_of_packets * 2; i++) { /* free_list.next might be cache-hot ... but maybe * the HC caches it too. avoid that issue for now. @@ -2905,6 +2988,8 @@ sitd_urb_transaction ( * information * @sitd: New sITD that is to be added to the shadow/hardware schedule * @uf: iso stream packet + * @back: hw pointer (cpu order) pointing back to original sITD + * if this is a spanning dummy sITD */ static inline void sitd_patch ( @@ -2921,8 +3006,17 @@ sitd_patch ( sitd->hw_fullspeed_ep = stream->address; sitd->hw_results = uf->transaction; + if(back){ + /* spanning completion only */ + sitd->hw_uframe = cpu_to_le32( ((b->cmask>>8)&0xff)<<8 ); + /* set dummy's completion state to DoComplete (ehci + 4.12.3.3.x) */ + sitd->hw_results |= SITD_DOCOMPLETE; + sitd->hw_backpointer = cpu_to_le32 (back); + }else{ sitd->hw_uframe = cpu_to_le32(b->smask | ((b->cmask&0xff)<<8)); sitd->hw_backpointer = EHCI_LIST_END; + } sitd->hw_buf [0] = cpu_to_le32 (bufp); sitd->hw_buf_hi [0] = cpu_to_le32 (bufp >> 32); @@ -2934,7 +3028,8 @@ sitd_patch ( } /* sitd_link - fill in and link one sITD into the shadow/hardware - * schedule + * schedule; if this sITD requires a spanning dummy in the next frame, + * a spanning dummy is implicitly filled in and linked as well. * * @ehci: pointer to ehci host controller device structure * @urb: new USB request @@ -2950,7 +3045,7 @@ static inline void sitd_link (struct ehc struct ehci_iso_sched *sched, int index) { - struct ehci_sitd *sitd; + struct ehci_sitd *sitd, *dummy; __le32 *hw_p; struct ehci_shadow_budget *b; union ehci_shadow *here; @@ -2985,6 +3080,40 @@ static inline void sitd_link (struct ehc here->sitd = sitd; wmb (); + /* is this a wrap case? if so, have the dummy SITD fully in place + before the real SITD is reachable from the hardware schedule */ + + if(BUDGET_WRAP_P(b)){ + __le32 *d_hw_p; + union ehci_shadow *d_here; + + /* get SITD from already allocated list */ + BUG_ON (list_empty (&sched->td_list)); + dummy = list_entry (sched->td_list.next, + struct ehci_sitd, sitd_list); + list_move_tail (&dummy->sitd_list, &stream->td_list); + dummy->stream = iso_stream_get (stream); + dummy->urb = usb_get_urb (urb); + dummy->index = index; + dummy->frame = frame+1; + + /* find the proper insertion point */ + d_here = periodic_translate_budget_dummy (ehci, frame+1, b, &d_hw_p); + + /* set the sitd fields */ + sitd_patch (stream, b, dummy, &sched->packet [index], + sitd->sitd_dma); + dummy->budget = b; + + dummy->sitd_next = *d_here; + dummy->hw_next = *d_hw_p; + + d_here->sitd = dummy; + wmb (); + *d_hw_p = cpu_to_le32 (dummy->sitd_dma) | Q_TYPE_SITD; + + } + /* make sitd reacahable from periodic schedule */ *hw_p = cpu_to_le32 (sitd->sitd_dma) | Q_TYPE_SITD; } @@ -3173,6 +3302,40 @@ done: /*-------------------------------------------------------------------------*/ +static void unlink_dummy(struct ehci_hcd *ehci, struct ehci_sitd *sitd, + int frame) +{ + /* there is a dummy; search for it */ + + __le32 *hw_p = &ehci->periodic[frame & (ehci->periodic_size-1)]; + union ehci_shadow *here = + &ehci->pshadow[frame & (ehci->periodic_size-1)]; + __le32 type = Q_NEXT_TYPE(*hw_p); + + while (here->ptr) { + if (type == Q_TYPE_SITD) { + if (here->sitd->budget == sitd->budget) { + struct ehci_sitd *temp = here->sitd; + *hw_p = here->sitd->hw_next; + *here = here->sitd->sitd_next; + wmb(); + + iso_stream_put (ehci, temp->stream); + + break; /* only one dummy will match */ + } + if (here->sitd->hw_backpointer == EHCI_LIST_END) + break; /* all dummies come first */ + + } else + break; /* all dummies come first */ + + hw_p = here->hw_next; + here = periodic_next_shadow(here, type); + type = Q_NEXT_TYPE(*hw_p); + } +} + static int scan_frame(struct ehci_hcd *ehci, int frame, int uframes, int rescan) { @@ -3302,9 +3465,9 @@ static int scan_frame(struct ehci_hcd *e type = Q_NEXT_TYPE (*hw_p); wmb(); - /* when spanning sITD support is - * added, the spanning dummy will be - * unlinked here as well. */ + /* unlink the dummy (if any) */ + if (BUDGET_WRAP_P(q.sitd->budget)) + unlink_dummy(ehci, q.sitd, frame+1); modified = sitd_complete (ehci, q.sitd); --- gregkh-2.6.orig/drivers/usb/host/ehci.h +++ gregkh-2.6/drivers/usb/host/ehci.h @@ -628,6 +628,7 @@ struct ehci_sitd { #define SITD_STS_STS (1 << 1) /* split transaction state */ #define SITD_ACTIVE __constant_cpu_to_le32(SITD_STS_ACTIVE) +#define SITD_DOCOMPLETE __constant_cpu_to_le32(SITD_STS_STS) __le32 hw_buf [2]; /* EHCI table 3-12 */ __le32 hw_backpointer; /* EHCI table 3-13 */