From: "Bryan O'Sullivan" This patch fixes some problems uncovered during IB compliance testing to return the right values for error counters returned by the Performance Get Counters packet. Signed-off-by: Ralph Campbell Signed-off-by: Bryan O'Sullivan Cc: "Michael S. Tsirkin" Cc: Roland Dreier Signed-off-by: Andrew Morton --- drivers/infiniband/hw/ipath/ipath_driver.c | 17 ++++++ drivers/infiniband/hw/ipath/ipath_intr.c | 1 drivers/infiniband/hw/ipath/ipath_kernel.h | 5 + drivers/infiniband/hw/ipath/ipath_layer.c | 9 ++- drivers/infiniband/hw/ipath/ipath_layer.h | 2 drivers/infiniband/hw/ipath/ipath_mad.c | 52 ++++++++++--------- drivers/infiniband/hw/ipath/ipath_ud.c | 11 +++- drivers/infiniband/hw/ipath/ipath_verbs.c | 20 +++++++ drivers/infiniband/hw/ipath/ipath_verbs.h | 3 + 9 files changed, 93 insertions(+), 27 deletions(-) diff -puN drivers/infiniband/hw/ipath/ipath_driver.c~ib-ipath-fixes-to-performance-get-counters-for-ib drivers/infiniband/hw/ipath/ipath_driver.c --- a/drivers/infiniband/hw/ipath/ipath_driver.c~ib-ipath-fixes-to-performance-get-counters-for-ib +++ a/drivers/infiniband/hw/ipath/ipath_driver.c @@ -460,6 +460,8 @@ static int __devinit ipath_init_one(stru * by ipath_setup_htconfig. */ dd->ipath_flags = 0; + dd->ipath_lli_counter = 0; + dd->ipath_lli_errors = 0; if (dd->ipath_f_bus(dd, pdev)) ipath_dev_err(dd, "Failed to setup config space; " @@ -942,6 +944,18 @@ reloop: "tlen=%x opcode=%x egridx=%x: %s\n", eflags, l, etype, tlen, bthbytes[0], ips_get_index((__le32 *) rc), emsg); + /* Count local link integrity errors. */ + if (eflags & (INFINIPATH_RHF_H_ICRCERR | + INFINIPATH_RHF_H_VCRCERR)) { + u8 n = (dd->ipath_ibcctrl >> + INFINIPATH_IBCC_PHYERRTHRESHOLD_SHIFT) & + INFINIPATH_IBCC_PHYERRTHRESHOLD_MASK; + + if (++dd->ipath_lli_counter > n) { + dd->ipath_lli_counter = 0; + dd->ipath_lli_errors++; + } + } } else if (etype == RCVHQ_RCV_TYPE_NON_KD) { int ret = __ipath_verbs_rcv(dd, rc + 1, ebuf, tlen); @@ -949,6 +963,9 @@ reloop: ipath_cdbg(VERBOSE, "received IB packet, " "not SMA (QP=%x)\n", qp); + if (dd->ipath_lli_counter) + dd->ipath_lli_counter--; + } else if (etype == RCVHQ_RCV_TYPE_EAGER) { if (qp == IPATH_KD_QP && bthbytes[0] == ipath_layer_rcv_opcode && diff -puN drivers/infiniband/hw/ipath/ipath_intr.c~ib-ipath-fixes-to-performance-get-counters-for-ib drivers/infiniband/hw/ipath/ipath_intr.c --- a/drivers/infiniband/hw/ipath/ipath_intr.c~ib-ipath-fixes-to-performance-get-counters-for-ib +++ a/drivers/infiniband/hw/ipath/ipath_intr.c @@ -262,6 +262,7 @@ static void handle_e_ibstatuschanged(str | IPATH_LINKACTIVE | IPATH_LINKARMED); *dd->ipath_statusp &= ~IPATH_STATUS_IB_READY; + dd->ipath_lli_counter = 0; if (!noprint) { if (((dd->ipath_lastibcstat >> INFINIPATH_IBCS_LINKSTATE_SHIFT) & diff -puN drivers/infiniband/hw/ipath/ipath_kernel.h~ib-ipath-fixes-to-performance-get-counters-for-ib drivers/infiniband/hw/ipath/ipath_kernel.h --- a/drivers/infiniband/hw/ipath/ipath_kernel.h~ib-ipath-fixes-to-performance-get-counters-for-ib +++ a/drivers/infiniband/hw/ipath/ipath_kernel.h @@ -507,6 +507,11 @@ struct ipath_devdata { u8 ipath_pci_cacheline; /* LID mask control */ u8 ipath_lmc; + + /* local link integrity counter */ + u32 ipath_lli_counter; + /* local link integrity errors */ + u32 ipath_lli_errors; }; extern struct list_head ipath_dev_list; diff -puN drivers/infiniband/hw/ipath/ipath_layer.c~ib-ipath-fixes-to-performance-get-counters-for-ib drivers/infiniband/hw/ipath/ipath_layer.c --- a/drivers/infiniband/hw/ipath/ipath_layer.c~ib-ipath-fixes-to-performance-get-counters-for-ib +++ a/drivers/infiniband/hw/ipath/ipath_layer.c @@ -1032,19 +1032,22 @@ int ipath_layer_get_counters(struct ipat ipath_snap_cntr(dd, dd->ipath_cregs->cr_ibsymbolerrcnt); cntrs->link_error_recovery_counter = ipath_snap_cntr(dd, dd->ipath_cregs->cr_iblinkerrrecovcnt); + /* + * The link downed counter counts when the other side downs the + * connection. We add in the number of times we downed the link + * due to local link integrity errors to compensate. + */ cntrs->link_downed_counter = ipath_snap_cntr(dd, dd->ipath_cregs->cr_iblinkdowncnt); cntrs->port_rcv_errors = ipath_snap_cntr(dd, dd->ipath_cregs->cr_rxdroppktcnt) + ipath_snap_cntr(dd, dd->ipath_cregs->cr_rcvovflcnt) + ipath_snap_cntr(dd, dd->ipath_cregs->cr_portovflcnt) + - ipath_snap_cntr(dd, dd->ipath_cregs->cr_errrcvflowctrlcnt) + ipath_snap_cntr(dd, dd->ipath_cregs->cr_err_rlencnt) + ipath_snap_cntr(dd, dd->ipath_cregs->cr_invalidrlencnt) + ipath_snap_cntr(dd, dd->ipath_cregs->cr_erricrccnt) + ipath_snap_cntr(dd, dd->ipath_cregs->cr_errvcrccnt) + ipath_snap_cntr(dd, dd->ipath_cregs->cr_errlpcrccnt) + - ipath_snap_cntr(dd, dd->ipath_cregs->cr_errlinkcnt) + ipath_snap_cntr(dd, dd->ipath_cregs->cr_badformatcnt); cntrs->port_rcv_remphys_errors = ipath_snap_cntr(dd, dd->ipath_cregs->cr_rcvebpcnt); @@ -1058,6 +1061,8 @@ int ipath_layer_get_counters(struct ipat ipath_snap_cntr(dd, dd->ipath_cregs->cr_pktsendcnt); cntrs->port_rcv_packets = ipath_snap_cntr(dd, dd->ipath_cregs->cr_pktrcvcnt); + cntrs->local_link_integrity_errors = dd->ipath_lli_errors; + cntrs->excessive_buffer_overrun_errors = 0; /* XXX */ ret = 0; diff -puN drivers/infiniband/hw/ipath/ipath_layer.h~ib-ipath-fixes-to-performance-get-counters-for-ib drivers/infiniband/hw/ipath/ipath_layer.h --- a/drivers/infiniband/hw/ipath/ipath_layer.h~ib-ipath-fixes-to-performance-get-counters-for-ib +++ a/drivers/infiniband/hw/ipath/ipath_layer.h @@ -55,6 +55,8 @@ struct ipath_layer_counters { u64 port_rcv_data; u64 port_xmit_packets; u64 port_rcv_packets; + u32 local_link_integrity_errors; + u32 excessive_buffer_overrun_errors; }; /* diff -puN drivers/infiniband/hw/ipath/ipath_mad.c~ib-ipath-fixes-to-performance-get-counters-for-ib drivers/infiniband/hw/ipath/ipath_mad.c --- a/drivers/infiniband/hw/ipath/ipath_mad.c~ib-ipath-fixes-to-performance-get-counters-for-ib +++ a/drivers/infiniband/hw/ipath/ipath_mad.c @@ -613,6 +613,9 @@ struct ib_pma_portcounters { #define IB_PMA_SEL_PORT_RCV_ERRORS __constant_htons(0x0008) #define IB_PMA_SEL_PORT_RCV_REMPHYS_ERRORS __constant_htons(0x0010) #define IB_PMA_SEL_PORT_XMIT_DISCARDS __constant_htons(0x0040) +#define IB_PMA_SEL_LOCAL_LINK_INTEGRITY_ERRORS __constant_htons(0x0200) +#define IB_PMA_SEL_EXCESSIVE_BUFFER_OVERRUNS __constant_htons(0x0400) +#define IB_PMA_SEL_PORT_VL15_DROPPED __constant_htons(0x0800) #define IB_PMA_SEL_PORT_XMIT_DATA __constant_htons(0x1000) #define IB_PMA_SEL_PORT_RCV_DATA __constant_htons(0x2000) #define IB_PMA_SEL_PORT_XMIT_PACKETS __constant_htons(0x4000) @@ -859,6 +862,10 @@ static int recv_pma_get_portcounters(str cntrs.port_rcv_data -= dev->z_port_rcv_data; cntrs.port_xmit_packets -= dev->z_port_xmit_packets; cntrs.port_rcv_packets -= dev->z_port_rcv_packets; + cntrs.local_link_integrity_errors -= + dev->z_local_link_integrity_errors; + cntrs.excessive_buffer_overrun_errors -= + dev->z_excessive_buffer_overrun_errors; memset(pmp->data, 0, sizeof(pmp->data)); @@ -896,6 +903,16 @@ static int recv_pma_get_portcounters(str else p->port_xmit_discards = cpu_to_be16((u16)cntrs.port_xmit_discards); + if (cntrs.local_link_integrity_errors > 0xFUL) + cntrs.local_link_integrity_errors = 0xFUL; + if (cntrs.excessive_buffer_overrun_errors > 0xFUL) + cntrs.excessive_buffer_overrun_errors = 0xFUL; + p->lli_ebor_errors = (cntrs.local_link_integrity_errors << 4) | + cntrs.excessive_buffer_overrun_errors; + if (dev->n_vl15_dropped > 0xFFFFUL) + p->vl15_dropped = __constant_cpu_to_be16(0xFFFF); + else + p->vl15_dropped = cpu_to_be16((u16)dev->n_vl15_dropped); if (cntrs.port_xmit_data > 0xFFFFFFFFUL) p->port_xmit_data = __constant_cpu_to_be32(0xFFFFFFFF); else @@ -990,6 +1007,17 @@ static int recv_pma_set_portcounters(str if (p->counter_select & IB_PMA_SEL_PORT_XMIT_DISCARDS) dev->z_port_xmit_discards = cntrs.port_xmit_discards; + if (p->counter_select & IB_PMA_SEL_LOCAL_LINK_INTEGRITY_ERRORS) + dev->z_local_link_integrity_errors = + cntrs.local_link_integrity_errors; + + if (p->counter_select & IB_PMA_SEL_EXCESSIVE_BUFFER_OVERRUNS) + dev->z_excessive_buffer_overrun_errors = + cntrs.excessive_buffer_overrun_errors; + + if (p->counter_select & IB_PMA_SEL_PORT_VL15_DROPPED) + dev->n_vl15_dropped = 0; + if (p->counter_select & IB_PMA_SEL_PORT_XMIT_DATA) dev->z_port_xmit_data = cntrs.port_xmit_data; @@ -1275,32 +1303,8 @@ int ipath_process_mad(struct ib_device * struct ib_wc *in_wc, struct ib_grh *in_grh, struct ib_mad *in_mad, struct ib_mad *out_mad) { - struct ipath_ibdev *dev = to_idev(ibdev); int ret; - /* - * Snapshot current HW counters to "clear" them. - * This should be done when the driver is loaded except that for - * some reason we get a zillion errors when brining up the link. - */ - if (dev->rcv_errors == 0) { - struct ipath_layer_counters cntrs; - - ipath_layer_get_counters(to_idev(ibdev)->dd, &cntrs); - dev->rcv_errors++; - dev->z_symbol_error_counter = cntrs.symbol_error_counter; - dev->z_link_error_recovery_counter = - cntrs.link_error_recovery_counter; - dev->z_link_downed_counter = cntrs.link_downed_counter; - dev->z_port_rcv_errors = cntrs.port_rcv_errors + 1; - dev->z_port_rcv_remphys_errors = - cntrs.port_rcv_remphys_errors; - dev->z_port_xmit_discards = cntrs.port_xmit_discards; - dev->z_port_xmit_data = cntrs.port_xmit_data; - dev->z_port_rcv_data = cntrs.port_rcv_data; - dev->z_port_xmit_packets = cntrs.port_xmit_packets; - dev->z_port_rcv_packets = cntrs.port_rcv_packets; - } switch (in_mad->mad_hdr.mgmt_class) { case IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE: case IB_MGMT_CLASS_SUBN_LID_ROUTED: diff -puN drivers/infiniband/hw/ipath/ipath_ud.c~ib-ipath-fixes-to-performance-get-counters-for-ib drivers/infiniband/hw/ipath/ipath_ud.c --- a/drivers/infiniband/hw/ipath/ipath_ud.c~ib-ipath-fixes-to-performance-get-counters-for-ib +++ a/drivers/infiniband/hw/ipath/ipath_ud.c @@ -560,7 +560,16 @@ void ipath_ud_rcv(struct ipath_ibdev *de spin_lock_irqsave(&rq->lock, flags); if (rq->tail == rq->head) { spin_unlock_irqrestore(&rq->lock, flags); - dev->n_pkt_drops++; + /* + * Count VL15 packets dropped due to no receive buffer. + * Otherwise, count them as buffer overruns since usually, + * the HW will be able to receive packets even if there are + * no QPs with posted receive buffers. + */ + if (qp->ibqp.qp_num == 0) + dev->n_vl15_dropped++; + else + dev->rcv_errors++; goto bail; } /* Silently drop packets which are too big. */ diff -puN drivers/infiniband/hw/ipath/ipath_verbs.c~ib-ipath-fixes-to-performance-get-counters-for-ib drivers/infiniband/hw/ipath/ipath_verbs.c --- a/drivers/infiniband/hw/ipath/ipath_verbs.c~ib-ipath-fixes-to-performance-get-counters-for-ib +++ a/drivers/infiniband/hw/ipath/ipath_verbs.c @@ -981,6 +981,7 @@ static int ipath_verbs_register_sysfs(st */ static void *ipath_register_ib_device(int unit, struct ipath_devdata *dd) { + struct ipath_layer_counters cntrs; struct ipath_ibdev *idev; struct ib_device *dev; int ret; @@ -1031,6 +1032,25 @@ static void *ipath_register_ib_device(in idev->pma_counter_select[5] = IB_PMA_PORT_XMIT_WAIT; idev->link_width_enabled = 3; /* 1x or 4x */ + /* Snapshot current HW counters to "clear" them. */ + ipath_layer_get_counters(dd, &cntrs); + idev->z_symbol_error_counter = cntrs.symbol_error_counter; + idev->z_link_error_recovery_counter = + cntrs.link_error_recovery_counter; + idev->z_link_downed_counter = cntrs.link_downed_counter; + idev->z_port_rcv_errors = cntrs.port_rcv_errors; + idev->z_port_rcv_remphys_errors = + cntrs.port_rcv_remphys_errors; + idev->z_port_xmit_discards = cntrs.port_xmit_discards; + idev->z_port_xmit_data = cntrs.port_xmit_data; + idev->z_port_rcv_data = cntrs.port_rcv_data; + idev->z_port_xmit_packets = cntrs.port_xmit_packets; + idev->z_port_rcv_packets = cntrs.port_rcv_packets; + idev->z_local_link_integrity_errors = + cntrs.local_link_integrity_errors; + idev->z_excessive_buffer_overrun_errors = + cntrs.excessive_buffer_overrun_errors; + /* * The system image GUID is supposed to be the same for all * IB HCAs in a single system but since there can be other diff -puN drivers/infiniband/hw/ipath/ipath_verbs.h~ib-ipath-fixes-to-performance-get-counters-for-ib drivers/infiniband/hw/ipath/ipath_verbs.h --- a/drivers/infiniband/hw/ipath/ipath_verbs.h~ib-ipath-fixes-to-performance-get-counters-for-ib +++ a/drivers/infiniband/hw/ipath/ipath_verbs.h @@ -460,6 +460,8 @@ struct ipath_ibdev { u64 z_port_xmit_packets; /* starting count for PMA */ u64 z_port_rcv_packets; /* starting count for PMA */ u32 z_pkey_violations; /* starting count for PMA */ + u32 z_local_link_integrity_errors; /* starting count for PMA */ + u32 z_excessive_buffer_overrun_errors; /* starting count for PMA */ u32 n_rc_resends; u32 n_rc_acks; u32 n_rc_qacks; @@ -469,6 +471,7 @@ struct ipath_ibdev { u32 n_other_naks; u32 n_timeouts; u32 n_pkt_drops; + u32 n_vl15_dropped; u32 n_wqe_errs; u32 n_rdma_dup_busy; u32 n_piowait; _