From 7ec01ff950c455aa1f1ccfaf347eb1aa9ec160d5 Mon Sep 17 00:00:00 2001 From: Ralph Campbell Date: Fri, 15 Aug 2008 11:23:47 -0700 Subject: IB/ipath: Fix lost UD send work request If a UD QP has some work requests queued to be sent by the DMA engine followed by a local loopback work request, we have to wait for the previous work requests to finish or the completion for the local loopback work request would be generated out of order. The problem was that the work request queue pointer was already updated so that the request would not be processed when the DMA queue drained. Signed-off-by: Ralph Campbell Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ipath/ipath_ud.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/ipath/ipath_ud.c b/drivers/infiniband/hw/ipath/ipath_ud.c index 36aa242c487c..729446f56aab 100644 --- a/drivers/infiniband/hw/ipath/ipath_ud.c +++ b/drivers/infiniband/hw/ipath/ipath_ud.c @@ -267,6 +267,7 @@ int ipath_make_ud_req(struct ipath_qp *qp) u16 lrh0; u16 lid; int ret = 0; + int next_cur; spin_lock_irqsave(&qp->s_lock, flags); @@ -290,8 +291,9 @@ int ipath_make_ud_req(struct ipath_qp *qp) goto bail; wqe = get_swqe_ptr(qp, qp->s_cur); - if (++qp->s_cur >= qp->s_size) - qp->s_cur = 0; + next_cur = qp->s_cur + 1; + if (next_cur >= qp->s_size) + next_cur = 0; /* Construct the header. */ ah_attr = &to_iah(wqe->wr.wr.ud.ah)->attr; @@ -315,6 +317,7 @@ int ipath_make_ud_req(struct ipath_qp *qp) qp->s_flags |= IPATH_S_WAIT_DMA; goto bail; } + qp->s_cur = next_cur; spin_unlock_irqrestore(&qp->s_lock, flags); ipath_ud_loopback(qp, wqe); spin_lock_irqsave(&qp->s_lock, flags); @@ -323,6 +326,7 @@ int ipath_make_ud_req(struct ipath_qp *qp) } } + qp->s_cur = next_cur; extra_bytes = -wqe->length & 3; nwords = (wqe->length + extra_bytes) >> 2; -- cgit v1.2.3-58-ga151 From 24babadec0209e5f84c067cb89aca6515486d35c Mon Sep 17 00:00:00 2001 From: Dave Olson Date: Fri, 15 Aug 2008 11:25:20 -0700 Subject: IB/ipath: Fix incorrect check for max physical address in TID The check for max physical address was incorrect, thus limiting the range of allowed physical addresses. Signed-off-by: Dave Olson Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ipath/ipath_iba7220.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/ipath/ipath_iba7220.c b/drivers/infiniband/hw/ipath/ipath_iba7220.c index d90f5e9a54fa..9839e20119bc 100644 --- a/drivers/infiniband/hw/ipath/ipath_iba7220.c +++ b/drivers/infiniband/hw/ipath/ipath_iba7220.c @@ -1720,7 +1720,7 @@ static void ipath_7220_put_tid(struct ipath_devdata *dd, u64 __iomem *tidptr, "not 2KB aligned!\n", pa); return; } - if (pa >= (1UL << IBA7220_TID_SZ_SHIFT)) { + if (chippa >= (1UL << IBA7220_TID_SZ_SHIFT)) { ipath_dev_err(dd, "BUG: Physical page address 0x%lx " "larger than supported\n", pa); -- cgit v1.2.3-58-ga151 From a77a57a1a22afc31891d95879fe3cf2ab03838b0 Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Tue, 19 Aug 2008 15:01:32 -0700 Subject: IPoIB: Fix deadlock on RTNL in ipoib_stop() Commit c8c2afe3 ("IPoIB: Use rtnl lock/unlock when changing device flags") added a call to rtnl_lock() in ipoib_mcast_join_task(), which is run from the ipoib_workqueue. However, ipoib_stop() (which is run inside rtnl_lock()) flushes this workqueue, which leads to a deadlock if the join task is pending. Fix this by simply not flushing the workqueue from ipoib_stop(). It turns out that we really don't care about workqueue tasks running during or after ipoib_stop(), as long as we make sure to flush the workqueue before unregistering a netdev. This fixes . Signed-off-by: Roland Dreier --- drivers/infiniband/ulp/ipoib/ipoib_main.c | 19 +++++++++---------- drivers/infiniband/ulp/ipoib/ipoib_multicast.c | 10 +++++++++- 2 files changed, 18 insertions(+), 11 deletions(-) diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index f51201b17bfd..7e9e218738fa 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -156,14 +156,8 @@ static int ipoib_stop(struct net_device *dev) netif_stop_queue(dev); - /* - * Now flush workqueue to make sure a scheduled task doesn't - * bring our internal state back up. - */ - flush_workqueue(ipoib_workqueue); - - ipoib_ib_dev_down(dev, 1); - ipoib_ib_dev_stop(dev, 1); + ipoib_ib_dev_down(dev, 0); + ipoib_ib_dev_stop(dev, 0); if (!test_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags)) { struct ipoib_dev_priv *cpriv; @@ -1314,7 +1308,7 @@ sysfs_failed: register_failed: ib_unregister_event_handler(&priv->event_handler); - flush_scheduled_work(); + flush_workqueue(ipoib_workqueue); event_failed: ipoib_dev_cleanup(priv->dev); @@ -1373,7 +1367,12 @@ static void ipoib_remove_one(struct ib_device *device) list_for_each_entry_safe(priv, tmp, dev_list, list) { ib_unregister_event_handler(&priv->event_handler); - flush_scheduled_work(); + + rtnl_lock(); + dev_change_flags(priv->dev, priv->dev->flags & ~IFF_UP); + rtnl_unlock(); + + flush_workqueue(ipoib_workqueue); unregister_netdev(priv->dev); ipoib_dev_cleanup(priv->dev); diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c index 8950e9546f4e..ac33c8f3ea85 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c @@ -392,8 +392,16 @@ static int ipoib_mcast_join_complete(int status, &priv->mcast_task, 0); mutex_unlock(&mcast_mutex); - if (mcast == priv->broadcast) + if (mcast == priv->broadcast) { + /* + * Take RTNL lock here to avoid racing with + * ipoib_stop() and turning the carrier back + * on while a device is being removed. + */ + rtnl_lock(); netif_carrier_on(dev); + rtnl_unlock(); + } return 0; } -- cgit v1.2.3-58-ga151