diff options
author | David S. Miller <davem@davemloft.net> | 2016-08-18 01:17:32 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2016-08-18 01:17:32 -0400 |
commit | 60747ef4d173c2747bf7f0377fb22846cb422195 (patch) | |
tree | ea0faf33b952495c47909be1400c475a3f3821b0 /drivers | |
parent | 484334198f8ce9552e20930fff9408ebf6bcf94d (diff) | |
parent | 184ca823481c99dadd7d946e5afd4bb921eab30d (diff) |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net
Minor overlapping changes for both merge conflicts.
Resolution work done by Stephen Rothwell was used
as a reference.
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'drivers')
509 files changed, 33828 insertions, 7242 deletions
diff --git a/drivers/Makefile b/drivers/Makefile index 954824438fd1..53abb4a5f736 100644 --- a/drivers/Makefile +++ b/drivers/Makefile @@ -138,6 +138,7 @@ obj-$(CONFIG_OF) += of/ obj-$(CONFIG_SSB) += ssb/ obj-$(CONFIG_BCMA) += bcma/ obj-$(CONFIG_VHOST_RING) += vhost/ +obj-$(CONFIG_VHOST) += vhost/ obj-$(CONFIG_VLYNQ) += vlynq/ obj-$(CONFIG_STAGING) += staging/ obj-y += platform/ diff --git a/drivers/acpi/acpi_cmos_rtc.c b/drivers/acpi/acpi_cmos_rtc.c index 81dc75033f15..0980a133916f 100644 --- a/drivers/acpi/acpi_cmos_rtc.c +++ b/drivers/acpi/acpi_cmos_rtc.c @@ -14,7 +14,7 @@ #include <linux/err.h> #include <linux/kernel.h> #include <linux/module.h> -#include <asm-generic/rtc.h> +#include <linux/mc146818rtc.h> #include "internal.h" diff --git a/drivers/acpi/button.c b/drivers/acpi/button.c index 148f4e5ca104..31abb0bdd4f2 100644 --- a/drivers/acpi/button.c +++ b/drivers/acpi/button.c @@ -232,8 +232,10 @@ remove_dev_dir: acpi_device_dir(device) = NULL; remove_lid_dir: remove_proc_entry(ACPI_BUTTON_SUBCLASS_LID, acpi_button_dir); + acpi_lid_dir = NULL; remove_button_dir: remove_proc_entry(ACPI_BUTTON_CLASS, acpi_root_dir); + acpi_button_dir = NULL; goto done; } @@ -250,7 +252,9 @@ static int acpi_button_remove_fs(struct acpi_device *device) acpi_lid_dir); acpi_device_dir(device) = NULL; remove_proc_entry(ACPI_BUTTON_SUBCLASS_LID, acpi_button_dir); + acpi_lid_dir = NULL; remove_proc_entry(ACPI_BUTTON_CLASS, acpi_root_dir); + acpi_button_dir = NULL; return 0; } diff --git a/drivers/acpi/ec.c b/drivers/acpi/ec.c index 999a10914678..e7bd57cc550a 100644 --- a/drivers/acpi/ec.c +++ b/drivers/acpi/ec.c @@ -101,6 +101,7 @@ enum ec_command { #define ACPI_EC_UDELAY_POLL 550 /* Wait 1ms for EC transaction polling */ #define ACPI_EC_CLEAR_MAX 100 /* Maximum number of events to query * when trying to clear the EC */ +#define ACPI_EC_MAX_QUERIES 16 /* Maximum number of parallel queries */ enum { EC_FLAGS_QUERY_PENDING, /* Query is pending */ @@ -121,6 +122,10 @@ static unsigned int ec_delay __read_mostly = ACPI_EC_DELAY; module_param(ec_delay, uint, 0644); MODULE_PARM_DESC(ec_delay, "Timeout(ms) waited until an EC command completes"); +static unsigned int ec_max_queries __read_mostly = ACPI_EC_MAX_QUERIES; +module_param(ec_max_queries, uint, 0644); +MODULE_PARM_DESC(ec_max_queries, "Maximum parallel _Qxx evaluations"); + static bool ec_busy_polling __read_mostly; module_param(ec_busy_polling, bool, 0644); MODULE_PARM_DESC(ec_busy_polling, "Use busy polling to advance EC transaction"); @@ -174,6 +179,7 @@ static void acpi_ec_event_processor(struct work_struct *work); struct acpi_ec *boot_ec, *first_ec; EXPORT_SYMBOL(first_ec); +static struct workqueue_struct *ec_query_wq; static int EC_FLAGS_CLEAR_ON_RESUME; /* Needs acpi_ec_clear() on boot/resume */ static int EC_FLAGS_QUERY_HANDSHAKE; /* Needs QR_EC issued when SCI_EVT set */ @@ -1098,7 +1104,7 @@ static int acpi_ec_query(struct acpi_ec *ec, u8 *data) * work queue execution. */ ec_dbg_evt("Query(0x%02x) scheduled", value); - if (!schedule_work(&q->work)) { + if (!queue_work(ec_query_wq, &q->work)) { ec_dbg_evt("Query(0x%02x) overlapped", value); result = -EBUSY; } @@ -1660,15 +1666,41 @@ static struct acpi_driver acpi_ec_driver = { }, }; +static inline int acpi_ec_query_init(void) +{ + if (!ec_query_wq) { + ec_query_wq = alloc_workqueue("kec_query", 0, + ec_max_queries); + if (!ec_query_wq) + return -ENODEV; + } + return 0; +} + +static inline void acpi_ec_query_exit(void) +{ + if (ec_query_wq) { + destroy_workqueue(ec_query_wq); + ec_query_wq = NULL; + } +} + int __init acpi_ec_init(void) { - int result = 0; + int result; + /* register workqueue for _Qxx evaluations */ + result = acpi_ec_query_init(); + if (result) + goto err_exit; /* Now register the driver for the EC */ result = acpi_bus_register_driver(&acpi_ec_driver); - if (result < 0) - return -ENODEV; + if (result) + goto err_exit; +err_exit: + if (result) + acpi_ec_query_exit(); return result; } @@ -1678,5 +1710,6 @@ static void __exit acpi_ec_exit(void) { acpi_bus_unregister_driver(&acpi_ec_driver); + acpi_ec_query_exit(); } #endif /* 0 */ diff --git a/drivers/acpi/nfit/core.c b/drivers/acpi/nfit/core.c index 8c234dd9b8bc..80cc7c089a15 100644 --- a/drivers/acpi/nfit/core.c +++ b/drivers/acpi/nfit/core.c @@ -1527,11 +1527,12 @@ static u32 read_blk_stat(struct nfit_blk *nfit_blk, unsigned int bw) { struct nfit_blk_mmio *mmio = &nfit_blk->mmio[DCR]; u64 offset = nfit_blk->stat_offset + mmio->size * bw; + const u32 STATUS_MASK = 0x80000037; if (mmio->num_lines) offset = to_interleave_offset(offset, mmio); - return readl(mmio->addr.base + offset); + return readl(mmio->addr.base + offset) & STATUS_MASK; } static void write_blk_ctl(struct nfit_blk *nfit_blk, unsigned int bw, diff --git a/drivers/base/power/opp/core.c b/drivers/base/power/opp/core.c index 7c04c87738a6..df0c70963d9e 100644 --- a/drivers/base/power/opp/core.c +++ b/drivers/base/power/opp/core.c @@ -402,6 +402,22 @@ struct dev_pm_opp *dev_pm_opp_find_freq_exact(struct device *dev, } EXPORT_SYMBOL_GPL(dev_pm_opp_find_freq_exact); +static noinline struct dev_pm_opp *_find_freq_ceil(struct opp_table *opp_table, + unsigned long *freq) +{ + struct dev_pm_opp *temp_opp, *opp = ERR_PTR(-ERANGE); + + list_for_each_entry_rcu(temp_opp, &opp_table->opp_list, node) { + if (temp_opp->available && temp_opp->rate >= *freq) { + opp = temp_opp; + *freq = opp->rate; + break; + } + } + + return opp; +} + /** * dev_pm_opp_find_freq_ceil() - Search for an rounded ceil freq * @dev: device for which we do this operation @@ -427,7 +443,6 @@ struct dev_pm_opp *dev_pm_opp_find_freq_ceil(struct device *dev, unsigned long *freq) { struct opp_table *opp_table; - struct dev_pm_opp *temp_opp, *opp = ERR_PTR(-ERANGE); opp_rcu_lockdep_assert(); @@ -440,15 +455,7 @@ struct dev_pm_opp *dev_pm_opp_find_freq_ceil(struct device *dev, if (IS_ERR(opp_table)) return ERR_CAST(opp_table); - list_for_each_entry_rcu(temp_opp, &opp_table->opp_list, node) { - if (temp_opp->available && temp_opp->rate >= *freq) { - opp = temp_opp; - *freq = opp->rate; - break; - } - } - - return opp; + return _find_freq_ceil(opp_table, freq); } EXPORT_SYMBOL_GPL(dev_pm_opp_find_freq_ceil); @@ -612,7 +619,7 @@ int dev_pm_opp_set_rate(struct device *dev, unsigned long target_freq) return PTR_ERR(opp_table); } - old_opp = dev_pm_opp_find_freq_ceil(dev, &old_freq); + old_opp = _find_freq_ceil(opp_table, &old_freq); if (!IS_ERR(old_opp)) { ou_volt = old_opp->u_volt; ou_volt_min = old_opp->u_volt_min; @@ -622,7 +629,7 @@ int dev_pm_opp_set_rate(struct device *dev, unsigned long target_freq) __func__, old_freq, PTR_ERR(old_opp)); } - opp = dev_pm_opp_find_freq_ceil(dev, &freq); + opp = _find_freq_ceil(opp_table, &freq); if (IS_ERR(opp)) { ret = PTR_ERR(opp); dev_err(dev, "%s: failed to find OPP for freq %lu (%d)\n", diff --git a/drivers/base/power/trace.c b/drivers/base/power/trace.c index a6975795e7f3..efec10b49d59 100644 --- a/drivers/base/power/trace.c +++ b/drivers/base/power/trace.c @@ -11,7 +11,7 @@ #include <linux/export.h> #include <linux/rtc.h> -#include <asm/rtc.h> +#include <linux/mc146818rtc.h> #include "power.h" @@ -103,7 +103,7 @@ static int set_magic_time(unsigned int user, unsigned int file, unsigned int dev n /= 24; time.tm_min = (n % 20) * 3; n /= 20; - set_rtc_time(&time); + mc146818_set_time(&time); return n ? -1 : 0; } @@ -112,7 +112,7 @@ static unsigned int read_magic_time(void) struct rtc_time time; unsigned int val; - get_rtc_time(&time); + mc146818_get_time(&time); pr_info("RTC time: %2d:%02d:%02d, date: %02d/%02d/%02d\n", time.tm_hour, time.tm_min, time.tm_sec, time.tm_mon + 1, time.tm_mday, time.tm_year % 100); diff --git a/drivers/base/power/wakeup.c b/drivers/base/power/wakeup.c index 5fb7718f256c..62e4de2aa8d1 100644 --- a/drivers/base/power/wakeup.c +++ b/drivers/base/power/wakeup.c @@ -334,10 +334,9 @@ void device_wakeup_arm_wake_irqs(void) struct wakeup_source *ws; rcu_read_lock(); - list_for_each_entry_rcu(ws, &wakeup_sources, entry) { - if (ws->wakeirq) - dev_pm_arm_wake_irq(ws->wakeirq); - } + list_for_each_entry_rcu(ws, &wakeup_sources, entry) + dev_pm_arm_wake_irq(ws->wakeirq); + rcu_read_unlock(); } @@ -351,10 +350,9 @@ void device_wakeup_disarm_wake_irqs(void) struct wakeup_source *ws; rcu_read_lock(); - list_for_each_entry_rcu(ws, &wakeup_sources, entry) { - if (ws->wakeirq) - dev_pm_disarm_wake_irq(ws->wakeirq); - } + list_for_each_entry_rcu(ws, &wakeup_sources, entry) + dev_pm_disarm_wake_irq(ws->wakeirq); + rcu_read_unlock(); } @@ -390,9 +388,7 @@ int device_wakeup_disable(struct device *dev) return -EINVAL; ws = device_wakeup_detach(dev); - if (ws) - wakeup_source_unregister(ws); - + wakeup_source_unregister(ws); return 0; } EXPORT_SYMBOL_GPL(device_wakeup_disable); diff --git a/drivers/block/brd.c b/drivers/block/brd.c index 3022dad24071..0c76d4016eeb 100644 --- a/drivers/block/brd.c +++ b/drivers/block/brd.c @@ -300,20 +300,20 @@ static void copy_from_brd(void *dst, struct brd_device *brd, * Process a single bvec of a bio. */ static int brd_do_bvec(struct brd_device *brd, struct page *page, - unsigned int len, unsigned int off, int rw, + unsigned int len, unsigned int off, bool is_write, sector_t sector) { void *mem; int err = 0; - if (rw != READ) { + if (is_write) { err = copy_to_brd_setup(brd, sector, len); if (err) goto out; } mem = kmap_atomic(page); - if (rw == READ) { + if (!is_write) { copy_from_brd(mem + off, brd, sector, len); flush_dcache_page(page); } else { @@ -330,7 +330,6 @@ static blk_qc_t brd_make_request(struct request_queue *q, struct bio *bio) { struct block_device *bdev = bio->bi_bdev; struct brd_device *brd = bdev->bd_disk->private_data; - int rw; struct bio_vec bvec; sector_t sector; struct bvec_iter iter; @@ -347,14 +346,12 @@ static blk_qc_t brd_make_request(struct request_queue *q, struct bio *bio) goto out; } - rw = bio_data_dir(bio); - bio_for_each_segment(bvec, bio, iter) { unsigned int len = bvec.bv_len; int err; - err = brd_do_bvec(brd, bvec.bv_page, len, - bvec.bv_offset, rw, sector); + err = brd_do_bvec(brd, bvec.bv_page, len, bvec.bv_offset, + op_is_write(bio_op(bio)), sector); if (err) goto io_error; sector += len >> SECTOR_SHIFT; @@ -369,11 +366,11 @@ io_error: } static int brd_rw_page(struct block_device *bdev, sector_t sector, - struct page *page, int rw) + struct page *page, bool is_write) { struct brd_device *brd = bdev->bd_disk->private_data; - int err = brd_do_bvec(brd, page, PAGE_SIZE, 0, rw, sector); - page_endio(page, rw & WRITE, err); + int err = brd_do_bvec(brd, page, PAGE_SIZE, 0, is_write, sector); + page_endio(page, is_write, err); return err; } diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 0501ae0c517b..100be556e613 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -1663,13 +1663,13 @@ static u32 bio_flags_to_wire(struct drbd_connection *connection, struct bio *bio) { if (connection->agreed_pro_version >= 95) - return (bio->bi_rw & REQ_SYNC ? DP_RW_SYNC : 0) | - (bio->bi_rw & REQ_FUA ? DP_FUA : 0) | - (bio->bi_rw & REQ_PREFLUSH ? DP_FLUSH : 0) | + return (bio->bi_opf & REQ_SYNC ? DP_RW_SYNC : 0) | + (bio->bi_opf & REQ_FUA ? DP_FUA : 0) | + (bio->bi_opf & REQ_PREFLUSH ? DP_FLUSH : 0) | (bio_op(bio) == REQ_OP_WRITE_SAME ? DP_WSAME : 0) | (bio_op(bio) == REQ_OP_DISCARD ? DP_DISCARD : 0); else - return bio->bi_rw & REQ_SYNC ? DP_RW_SYNC : 0; + return bio->bi_opf & REQ_SYNC ? DP_RW_SYNC : 0; } /* Used to send write or TRIM aka REQ_DISCARD requests diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index df45713dfbe8..942384f34e22 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -1564,7 +1564,7 @@ static void drbd_issue_peer_wsame(struct drbd_device *device, * drbd_submit_peer_request() * @device: DRBD device. * @peer_req: peer request - * @rw: flag field, see bio->bi_rw + * @rw: flag field, see bio->bi_opf * * May spread the pages to multiple bios, * depending on bio_add_page restrictions. diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index 66b8e4bb74d8..de279fe4e4fd 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -288,7 +288,7 @@ void drbd_req_complete(struct drbd_request *req, struct bio_and_error *m) */ if (!ok && bio_op(req->master_bio) == REQ_OP_READ && - !(req->master_bio->bi_rw & REQ_RAHEAD) && + !(req->master_bio->bi_opf & REQ_RAHEAD) && !list_empty(&req->tl_requests)) req->rq_state |= RQ_POSTPONED; @@ -1137,7 +1137,7 @@ static int drbd_process_write_request(struct drbd_request *req) * replicating, in which case there is no point. */ if (unlikely(req->i.size == 0)) { /* The only size==0 bios we expect are empty flushes. */ - D_ASSERT(device, req->master_bio->bi_rw & REQ_PREFLUSH); + D_ASSERT(device, req->master_bio->bi_opf & REQ_PREFLUSH); if (remote) _req_mod(req, QUEUE_AS_DRBD_BARRIER); return remote; @@ -1176,7 +1176,7 @@ drbd_submit_req_private_bio(struct drbd_request *req) if (bio_op(bio) != REQ_OP_READ) type = DRBD_FAULT_DT_WR; - else if (bio->bi_rw & REQ_RAHEAD) + else if (bio->bi_opf & REQ_RAHEAD) type = DRBD_FAULT_DT_RA; else type = DRBD_FAULT_DT_RD; diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index 35dbb3dca47e..c6755c9a0aea 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -256,7 +256,7 @@ void drbd_request_endio(struct bio *bio) what = DISCARD_COMPLETED_WITH_ERROR; break; case REQ_OP_READ: - if (bio->bi_rw & REQ_RAHEAD) + if (bio->bi_opf & REQ_RAHEAD) what = READ_AHEAD_COMPLETED_WITH_ERROR; else what = READ_COMPLETED_WITH_ERROR; diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c index c557057fe8ae..b71a9c767009 100644 --- a/drivers/block/floppy.c +++ b/drivers/block/floppy.c @@ -3663,11 +3663,6 @@ static int floppy_open(struct block_device *bdev, fmode_t mode) opened_bdev[drive] = bdev; - if (!(mode & (FMODE_READ|FMODE_WRITE))) { - res = -EINVAL; - goto out; - } - res = -ENXIO; if (!floppy_track_buffer) { @@ -3711,13 +3706,15 @@ static int floppy_open(struct block_device *bdev, fmode_t mode) if (UFDCS->rawcmd == 1) UFDCS->rawcmd = 2; - UDRS->last_checked = 0; - clear_bit(FD_OPEN_SHOULD_FAIL_BIT, &UDRS->flags); - check_disk_change(bdev); - if (test_bit(FD_DISK_CHANGED_BIT, &UDRS->flags)) - goto out; - if (test_bit(FD_OPEN_SHOULD_FAIL_BIT, &UDRS->flags)) - goto out; + if (mode & (FMODE_READ|FMODE_WRITE)) { + UDRS->last_checked = 0; + clear_bit(FD_OPEN_SHOULD_FAIL_BIT, &UDRS->flags); + check_disk_change(bdev); + if (test_bit(FD_DISK_CHANGED_BIT, &UDRS->flags)) + goto out; + if (test_bit(FD_OPEN_SHOULD_FAIL_BIT, &UDRS->flags)) + goto out; + } res = -EROFS; diff --git a/drivers/block/loop.c b/drivers/block/loop.c index 075377eee0c0..c9f2107f7095 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -510,14 +510,10 @@ static int lo_rw_aio(struct loop_device *lo, struct loop_cmd *cmd, return 0; } - -static inline int lo_rw_simple(struct loop_device *lo, - struct request *rq, loff_t pos, bool rw) +static int do_req_filebacked(struct loop_device *lo, struct request *rq) { struct loop_cmd *cmd = blk_mq_rq_to_pdu(rq); - - if (cmd->use_aio) - return lo_rw_aio(lo, cmd, pos, rw); + loff_t pos = ((loff_t) blk_rq_pos(rq) << 9) + lo->lo_offset; /* * lo_write_simple and lo_read_simple should have been covered @@ -528,37 +524,30 @@ static inline int lo_rw_simple(struct loop_device *lo, * of the req at one time. And direct read IO doesn't need to * run flush_dcache_page(). */ - if (rw == WRITE) - return lo_write_simple(lo, rq, pos); - else - return lo_read_simple(lo, rq, pos); -} - -static int do_req_filebacked(struct loop_device *lo, struct request *rq) -{ - loff_t pos; - int ret; - - pos = ((loff_t) blk_rq_pos(rq) << 9) + lo->lo_offset; - - if (op_is_write(req_op(rq))) { - if (req_op(rq) == REQ_OP_FLUSH) - ret = lo_req_flush(lo, rq); - else if (req_op(rq) == REQ_OP_DISCARD) - ret = lo_discard(lo, rq, pos); - else if (lo->transfer) - ret = lo_write_transfer(lo, rq, pos); + switch (req_op(rq)) { + case REQ_OP_FLUSH: + return lo_req_flush(lo, rq); + case REQ_OP_DISCARD: + return lo_discard(lo, rq, pos); + case REQ_OP_WRITE: + if (lo->transfer) + return lo_write_transfer(lo, rq, pos); + else if (cmd->use_aio) + return lo_rw_aio(lo, cmd, pos, WRITE); else - ret = lo_rw_simple(lo, rq, pos, WRITE); - - } else { + return lo_write_simple(lo, rq, pos); + case REQ_OP_READ: if (lo->transfer) - ret = lo_read_transfer(lo, rq, pos); + return lo_read_transfer(lo, rq, pos); + else if (cmd->use_aio) + return lo_rw_aio(lo, cmd, pos, READ); else - ret = lo_rw_simple(lo, rq, pos, READ); + return lo_read_simple(lo, rq, pos); + default: + WARN_ON_ONCE(1); + return -EIO; + break; } - - return ret; } struct switch_request { @@ -1659,11 +1648,15 @@ static int loop_queue_rq(struct blk_mq_hw_ctx *hctx, if (lo->lo_state != Lo_bound) return -EIO; - if (lo->use_dio && (req_op(cmd->rq) != REQ_OP_FLUSH || - req_op(cmd->rq) == REQ_OP_DISCARD)) - cmd->use_aio = true; - else + switch (req_op(cmd->rq)) { + case REQ_OP_FLUSH: + case REQ_OP_DISCARD: cmd->use_aio = false; + break; + default: + cmd->use_aio = lo->use_dio; + break; + } queue_kthread_work(&lo->worker, &cmd->work); diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index 6f55b262b5ce..a9e398019f38 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -451,14 +451,9 @@ static int nbd_thread_recv(struct nbd_device *nbd, struct block_device *bdev) sk_set_memalloc(nbd->sock->sk); - nbd->task_recv = current; - ret = device_create_file(disk_to_dev(nbd->disk), &pid_attr); if (ret) { dev_err(disk_to_dev(nbd->disk), "device_create_file failed!\n"); - - nbd->task_recv = NULL; - return ret; } @@ -477,9 +472,6 @@ static int nbd_thread_recv(struct nbd_device *nbd, struct block_device *bdev) nbd_size_clear(nbd, bdev); device_remove_file(disk_to_dev(nbd->disk), &pid_attr); - - nbd->task_recv = NULL; - return ret; } @@ -788,6 +780,8 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd, if (!nbd->sock) return -EINVAL; + /* We have to claim the device under the lock */ + nbd->task_recv = current; mutex_unlock(&nbd->tx_lock); nbd_parse_flags(nbd, bdev); @@ -796,6 +790,7 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd, nbd_name(nbd)); if (IS_ERR(thread)) { mutex_lock(&nbd->tx_lock); + nbd->task_recv = NULL; return PTR_ERR(thread); } @@ -805,6 +800,7 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd, kthread_stop(thread); mutex_lock(&nbd->tx_lock); + nbd->task_recv = NULL; sock_shutdown(nbd); nbd_clear_que(nbd); diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c index 9393bc730acf..90fa4ac149db 100644 --- a/drivers/block/pktcdvd.c +++ b/drivers/block/pktcdvd.c @@ -1157,7 +1157,7 @@ static int pkt_start_recovery(struct packet_data *pkt) bio_reset(pkt->bio); pkt->bio->bi_bdev = pd->bdev; - pkt->bio->bi_rw = REQ_WRITE; + bio_set_op_attrs(pkt->bio, REQ_OP_WRITE, 0); pkt->bio->bi_iter.bi_sector = new_sector; pkt->bio->bi_iter.bi_size = pkt->frames * CD_FRAMESIZE; pkt->bio->bi_vcnt = pkt->frames; diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index 1a04af6d2421..6c6519f6492a 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -3950,6 +3950,7 @@ static void rbd_dev_release(struct device *dev) bool need_put = !!rbd_dev->opts; ceph_oid_destroy(&rbd_dev->header_oid); + ceph_oloc_destroy(&rbd_dev->header_oloc); rbd_put_client(rbd_dev->rbd_client); rbd_spec_put(rbd_dev->spec); @@ -5336,15 +5337,6 @@ static ssize_t do_rbd_add(struct bus_type *bus, } spec->pool_id = (u64)rc; - /* The ceph file layout needs to fit pool id in 32 bits */ - - if (spec->pool_id > (u64)U32_MAX) { - rbd_warn(NULL, "pool id too large (%llu > %u)", - (unsigned long long)spec->pool_id, U32_MAX); - rc = -EIO; - goto err_out_client; - } - rbd_dev = rbd_dev_create(rbdc, spec, rbd_opts); if (!rbd_dev) { rc = -ENOMEM; diff --git a/drivers/block/umem.c b/drivers/block/umem.c index d0a3e6d4515f..be90e15854ed 100644 --- a/drivers/block/umem.c +++ b/drivers/block/umem.c @@ -535,7 +535,7 @@ static blk_qc_t mm_make_request(struct request_queue *q, struct bio *bio) *card->biotail = bio; bio->bi_next = NULL; card->biotail = &bio->bi_next; - if (bio->bi_rw & REQ_SYNC || !mm_check_plugged(card)) + if (bio->bi_opf & REQ_SYNC || !mm_check_plugged(card)) activate(card); spin_unlock_irq(&card->lock); diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index 1523e05c46fc..93b1aaa5ba3b 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -391,22 +391,16 @@ static int init_vq(struct virtio_blk *vblk) num_vqs = 1; vblk->vqs = kmalloc(sizeof(*vblk->vqs) * num_vqs, GFP_KERNEL); - if (!vblk->vqs) { - err = -ENOMEM; - goto out; - } + if (!vblk->vqs) + return -ENOMEM; names = kmalloc(sizeof(*names) * num_vqs, GFP_KERNEL); - if (!names) - goto err_names; - callbacks = kmalloc(sizeof(*callbacks) * num_vqs, GFP_KERNEL); - if (!callbacks) - goto err_callbacks; - vqs = kmalloc(sizeof(*vqs) * num_vqs, GFP_KERNEL); - if (!vqs) - goto err_vqs; + if (!names || !callbacks || !vqs) { + err = -ENOMEM; + goto out; + } for (i = 0; i < num_vqs; i++) { callbacks[i] = virtblk_done; @@ -417,7 +411,7 @@ static int init_vq(struct virtio_blk *vblk) /* Discover virtqueues and write information to configuration. */ err = vdev->config->find_vqs(vdev, num_vqs, vqs, callbacks, names); if (err) - goto err_find_vqs; + goto out; for (i = 0; i < num_vqs; i++) { spin_lock_init(&vblk->vqs[i].lock); @@ -425,16 +419,12 @@ static int init_vq(struct virtio_blk *vblk) } vblk->num_vqs = num_vqs; - err_find_vqs: +out: kfree(vqs); - err_vqs: kfree(callbacks); - err_callbacks: kfree(names); - err_names: if (err) kfree(vblk->vqs); - out: return err; } diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index 7454cf188c8e..04365b17ee67 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -843,15 +843,16 @@ static void zram_bio_discard(struct zram *zram, u32 index, } static int zram_bvec_rw(struct zram *zram, struct bio_vec *bvec, u32 index, - int offset, int rw) + int offset, bool is_write) { unsigned long start_time = jiffies; + int rw_acct = is_write ? REQ_OP_WRITE : REQ_OP_READ; int ret; - generic_start_io_acct(rw, bvec->bv_len >> SECTOR_SHIFT, + generic_start_io_acct(rw_acct, bvec->bv_len >> SECTOR_SHIFT, &zram->disk->part0); - if (rw == READ) { + if (!is_write) { atomic64_inc(&zram->stats.num_reads); ret = zram_bvec_read(zram, bvec, index, offset); } else { @@ -859,10 +860,10 @@ static int zram_bvec_rw(struct zram *zram, struct bio_vec *bvec, u32 index, ret = zram_bvec_write(zram, bvec, index, offset); } - generic_end_io_acct(rw, &zram->disk->part0, start_time); + generic_end_io_acct(rw_acct, &zram->disk->part0, start_time); if (unlikely(ret)) { - if (rw == READ) + if (!is_write) atomic64_inc(&zram->stats.failed_reads); else atomic64_inc(&zram->stats.failed_writes); @@ -873,7 +874,7 @@ static int zram_bvec_rw(struct zram *zram, struct bio_vec *bvec, u32 index, static void __zram_make_request(struct zram *zram, struct bio *bio) { - int offset, rw; + int offset; u32 index; struct bio_vec bvec; struct bvec_iter iter; @@ -888,7 +889,6 @@ static void __zram_make_request(struct zram *zram, struct bio *bio) return; } - rw = bio_data_dir(bio); bio_for_each_segment(bvec, bio, iter) { int max_transfer_size = PAGE_SIZE - offset; @@ -903,15 +903,18 @@ static void __zram_make_request(struct zram *zram, struct bio *bio) bv.bv_len = max_transfer_size; bv.bv_offset = bvec.bv_offset; - if (zram_bvec_rw(zram, &bv, index, offset, rw) < 0) + if (zram_bvec_rw(zram, &bv, index, offset, + op_is_write(bio_op(bio))) < 0) goto out; bv.bv_len = bvec.bv_len - max_transfer_size; bv.bv_offset += max_transfer_size; - if (zram_bvec_rw(zram, &bv, index + 1, 0, rw) < 0) + if (zram_bvec_rw(zram, &bv, index + 1, 0, + op_is_write(bio_op(bio))) < 0) goto out; } else - if (zram_bvec_rw(zram, &bvec, index, offset, rw) < 0) + if (zram_bvec_rw(zram, &bvec, index, offset, + op_is_write(bio_op(bio))) < 0) goto out; update_position(&index, &offset, &bvec); @@ -968,7 +971,7 @@ static void zram_slot_free_notify(struct block_device *bdev, } static int zram_rw_page(struct block_device *bdev, sector_t sector, - struct page *page, int rw) + struct page *page, bool is_write) { int offset, err = -EIO; u32 index; @@ -992,7 +995,7 @@ static int zram_rw_page(struct block_device *bdev, sector_t sector, bv.bv_len = PAGE_SIZE; bv.bv_offset = 0; - err = zram_bvec_rw(zram, &bv, index, offset, rw); + err = zram_bvec_rw(zram, &bv, index, offset, is_write); put_zram: zram_meta_put(zram); out: @@ -1005,7 +1008,7 @@ out: * (e.g., SetPageError, set_page_dirty and extra works). */ if (err == 0) - page_endio(page, rw, 0); + page_endio(page, is_write, 0); return err; } diff --git a/drivers/char/Kconfig b/drivers/char/Kconfig index fdb8f3e10b6f..dcc09739a54e 100644 --- a/drivers/char/Kconfig +++ b/drivers/char/Kconfig @@ -293,7 +293,7 @@ if RTC_LIB=n config RTC tristate "Enhanced Real Time Clock Support (legacy PC RTC driver)" - depends on ALPHA || (MIPS && MACH_LOONGSON64) || MN10300 + depends on ALPHA || (MIPS && MACH_LOONGSON64) ---help--- If you say Y here and create a character special file /dev/rtc with major number 10 and minor number 135 using mknod ("man mknod"), you @@ -339,32 +339,6 @@ config JS_RTC To compile this driver as a module, choose M here: the module will be called js-rtc. -config GEN_RTC - tristate "Generic /dev/rtc emulation" - depends on RTC!=y - depends on ALPHA || M68K || MN10300 || PARISC || PPC || X86 - ---help--- - If you say Y here and create a character special file /dev/rtc with - major number 10 and minor number 135 using mknod ("man mknod"), you - will get access to the real time clock (or hardware clock) built - into your computer. - - It reports status information via the file /proc/driver/rtc and its - behaviour is set by various ioctls on /dev/rtc. If you enable the - "extended RTC operation" below it will also provide an emulation - for RTC_UIE which is required by some programs and may improve - precision in some cases. - - To compile this driver as a module, choose M here: the - module will be called genrtc. - -config GEN_RTC_X - bool "Extended RTC operation" - depends on GEN_RTC - help - Provides an emulation for RTC_UIE which is required by some programs - and may improve precision of the generic RTC support in some cases. - config EFI_RTC bool "EFI Real Time Clock Services" depends on IA64 diff --git a/drivers/char/Makefile b/drivers/char/Makefile index 55d16bf3ccc5..6e6c244a66a0 100644 --- a/drivers/char/Makefile +++ b/drivers/char/Makefile @@ -25,7 +25,6 @@ obj-$(CONFIG_APPLICOM) += applicom.o obj-$(CONFIG_SONYPI) += sonypi.o obj-$(CONFIG_RTC) += rtc.o obj-$(CONFIG_HPET) += hpet.o -obj-$(CONFIG_GEN_RTC) += genrtc.o obj-$(CONFIG_EFI_RTC) += efirtc.o obj-$(CONFIG_DS1302) += ds1302.o obj-$(CONFIG_XILINX_HWICAP) += xilinx_hwicap/ diff --git a/drivers/char/genrtc.c b/drivers/char/genrtc.c deleted file mode 100644 index 4f943759d376..000000000000 --- a/drivers/char/genrtc.c +++ /dev/null @@ -1,539 +0,0 @@ -/* - * Real Time Clock interface for - * - q40 and other m68k machines, - * - HP PARISC machines - * - PowerPC machines - * emulate some RTC irq capabilities in software - * - * Copyright (C) 1999 Richard Zidlicky - * - * based on Paul Gortmaker's rtc.c device and - * Sam Creasey Generic rtc driver - * - * This driver allows use of the real time clock (built into - * nearly all computers) from user space. It exports the /dev/rtc - * interface supporting various ioctl() and also the /proc/driver/rtc - * pseudo-file for status information. - * - * The ioctls can be used to set the interrupt behaviour where - * supported. - * - * The /dev/rtc interface will block on reads until an interrupt - * has been received. If a RTC interrupt has already happened, - * it will output an unsigned long and then block. The output value - * contains the interrupt status in the low byte and the number of - * interrupts since the last read in the remaining high bytes. The - * /dev/rtc interface can also be used with the select(2) call. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - * - - * 1.01 fix for 2.3.X rz@linux-m68k.org - * 1.02 merged with code from genrtc.c rz@linux-m68k.org - * 1.03 make it more portable zippel@linux-m68k.org - * 1.04 removed useless timer code rz@linux-m68k.org - * 1.05 portable RTC_UIE emulation rz@linux-m68k.org - * 1.06 set_rtc_time can return an error trini@kernel.crashing.org - * 1.07 ported to HP PARISC (hppa) Helge Deller <deller@gmx.de> - */ - -#define RTC_VERSION "1.07" - -#include <linux/module.h> -#include <linux/sched.h> -#include <linux/errno.h> -#include <linux/miscdevice.h> -#include <linux/fcntl.h> - -#include <linux/rtc.h> -#include <linux/init.h> -#include <linux/poll.h> -#include <linux/proc_fs.h> -#include <linux/seq_file.h> -#include <linux/mutex.h> -#include <linux/workqueue.h> - -#include <asm/uaccess.h> -#include <asm/rtc.h> - -/* - * We sponge a minor off of the misc major. No need slurping - * up another valuable major dev number for this. If you add - * an ioctl, make sure you don't conflict with SPARC's RTC - * ioctls. - */ - -static DEFINE_MUTEX(gen_rtc_mutex); -static DECLARE_WAIT_QUEUE_HEAD(gen_rtc_wait); - -/* - * Bits in gen_rtc_status. - */ - -#define RTC_IS_OPEN 0x01 /* means /dev/rtc is in use */ - -static unsigned char gen_rtc_status; /* bitmapped status byte. */ -static unsigned long gen_rtc_irq_data; /* our output to the world */ - -/* months start at 0 now */ -static unsigned char days_in_mo[] = -{31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31}; - -static int irq_active; - -#ifdef CONFIG_GEN_RTC_X -static struct work_struct genrtc_task; -static struct timer_list timer_task; - -static unsigned int oldsecs; -static int lostint; -static unsigned long tt_exp; - -static void gen_rtc_timer(unsigned long data); - -static volatile int stask_active; /* schedule_work */ -static volatile int ttask_active; /* timer_task */ -static int stop_rtc_timers; /* don't requeue tasks */ -static DEFINE_SPINLOCK(gen_rtc_lock); - -static void gen_rtc_interrupt(unsigned long arg); - -/* - * Routine to poll RTC seconds field for change as often as possible, - * after first RTC_UIE use timer to reduce polling - */ -static void genrtc_troutine(struct work_struct *work) -{ - unsigned int tmp = get_rtc_ss(); - - if (stop_rtc_timers) { - stask_active = 0; - return; - } - - if (oldsecs != tmp){ - oldsecs = tmp; - - timer_task.function = gen_rtc_timer; - timer_task.expires = jiffies + HZ - (HZ/10); - tt_exp=timer_task.expires; - ttask_active=1; - stask_active=0; - add_timer(&timer_task); - - gen_rtc_interrupt(0); - } else if (schedule_work(&genrtc_task) == 0) - stask_active = 0; -} - -static void gen_rtc_timer(unsigned long data) -{ - lostint = get_rtc_ss() - oldsecs ; - if (lostint<0) - lostint = 60 - lostint; - if (time_after(jiffies, tt_exp)) - printk(KERN_INFO "genrtc: timer task delayed by %ld jiffies\n", - jiffies-tt_exp); - ttask_active=0; - stask_active=1; - if ((schedule_work(&genrtc_task) == 0)) - stask_active = 0; -} - -/* - * call gen_rtc_interrupt function to signal an RTC_UIE, - * arg is unused. - * Could be invoked either from a real interrupt handler or - * from some routine that periodically (eg 100HZ) monitors - * whether RTC_SECS changed - */ -static void gen_rtc_interrupt(unsigned long arg) -{ - /* We store the status in the low byte and the number of - * interrupts received since the last read in the remainder - * of rtc_irq_data. */ - - gen_rtc_irq_data += 0x100; - gen_rtc_irq_data &= ~0xff; - gen_rtc_irq_data |= RTC_UIE; - - if (lostint){ - printk("genrtc: system delaying clock ticks?\n"); - /* increment count so that userspace knows something is wrong */ - gen_rtc_irq_data += ((lostint-1)<<8); - lostint = 0; - } - - wake_up_interruptible(&gen_rtc_wait); -} - -/* - * Now all the various file operations that we export. - */ -static ssize_t gen_rtc_read(struct file *file, char __user *buf, - size_t count, loff_t *ppos) -{ - unsigned long data; - ssize_t retval; - - if (count != sizeof (unsigned int) && count != sizeof (unsigned long)) - return -EINVAL; - - if (file->f_flags & O_NONBLOCK && !gen_rtc_irq_data) - return -EAGAIN; - - retval = wait_event_interruptible(gen_rtc_wait, - (data = xchg(&gen_rtc_irq_data, 0))); - if (retval) - goto out; - - /* first test allows optimizer to nuke this case for 32-bit machines */ - if (sizeof (int) != sizeof (long) && count == sizeof (unsigned int)) { - unsigned int uidata = data; - retval = put_user(uidata, (unsigned int __user *)buf) ?: - sizeof(unsigned int); - } - else { - retval = put_user(data, (unsigned long __user *)buf) ?: - sizeof(unsigned long); - } -out: - return retval; -} - -static unsigned int gen_rtc_poll(struct file *file, - struct poll_table_struct *wait) -{ - poll_wait(file, &gen_rtc_wait, wait); - if (gen_rtc_irq_data != 0) - return POLLIN | POLLRDNORM; - return 0; -} - -#endif - -/* - * Used to disable/enable interrupts, only RTC_UIE supported - * We also clear out any old irq data after an ioctl() that - * meddles with the interrupt enable/disable bits. - */ - -static inline void gen_clear_rtc_irq_bit(unsigned char bit) -{ -#ifdef CONFIG_GEN_RTC_X - stop_rtc_timers = 1; - if (ttask_active){ - del_timer_sync(&timer_task); - ttask_active = 0; - } - while (stask_active) - schedule(); - - spin_lock(&gen_rtc_lock); - irq_active = 0; - spin_unlock(&gen_rtc_lock); -#endif -} - -static inline int gen_set_rtc_irq_bit(unsigned char bit) -{ -#ifdef CONFIG_GEN_RTC_X - spin_lock(&gen_rtc_lock); - if ( !irq_active ) { - irq_active = 1; - stop_rtc_timers = 0; - lostint = 0; - INIT_WORK(&genrtc_task, genrtc_troutine); - oldsecs = get_rtc_ss(); - init_timer(&timer_task); - - stask_active = 1; - if (schedule_work(&genrtc_task) == 0){ - stask_active = 0; - } - } - spin_unlock(&gen_rtc_lock); - gen_rtc_irq_data = 0; - return 0; -#else - return -EINVAL; -#endif -} - -static int gen_rtc_ioctl(struct file *file, - unsigned int cmd, unsigned long arg) -{ - struct rtc_time wtime; - struct rtc_pll_info pll; - void __user *argp = (void __user *)arg; - - switch (cmd) { - - case RTC_PLL_GET: - if (get_rtc_pll(&pll)) - return -EINVAL; - else - return copy_to_user(argp, &pll, sizeof pll) ? -EFAULT : 0; - - case RTC_PLL_SET: - if (!capable(CAP_SYS_TIME)) - return -EACCES; - if (copy_from_user(&pll, argp, sizeof(pll))) - return -EFAULT; - return set_rtc_pll(&pll); - - case RTC_UIE_OFF: /* disable ints from RTC updates. */ - gen_clear_rtc_irq_bit(RTC_UIE); - return 0; - - case RTC_UIE_ON: /* enable ints for RTC updates. */ - return gen_set_rtc_irq_bit(RTC_UIE); - - case RTC_RD_TIME: /* Read the time/date from RTC */ - /* this doesn't get week-day, who cares */ - memset(&wtime, 0, sizeof(wtime)); - get_rtc_time(&wtime); - - return copy_to_user(argp, &wtime, sizeof(wtime)) ? -EFAULT : 0; - - case RTC_SET_TIME: /* Set the RTC */ - { - int year; - unsigned char leap_yr; - - if (!capable(CAP_SYS_TIME)) - return -EACCES; - - if (copy_from_user(&wtime, argp, sizeof(wtime))) - return -EFAULT; - - year = wtime.tm_year + 1900; - leap_yr = ((!(year % 4) && (year % 100)) || - !(year % 400)); - - if ((wtime.tm_mon < 0 || wtime.tm_mon > 11) || (wtime.tm_mday < 1)) - return -EINVAL; - - if (wtime.tm_mday < 0 || wtime.tm_mday > - (days_in_mo[wtime.tm_mon] + ((wtime.tm_mon == 1) && leap_yr))) - return -EINVAL; - - if (wtime.tm_hour < 0 || wtime.tm_hour >= 24 || - wtime.tm_min < 0 || wtime.tm_min >= 60 || - wtime.tm_sec < 0 || wtime.tm_sec >= 60) - return -EINVAL; - - return set_rtc_time(&wtime); - } - } - - return -EINVAL; -} - -static long gen_rtc_unlocked_ioctl(struct file *file, unsigned int cmd, - unsigned long arg) -{ - int ret; - - mutex_lock(&gen_rtc_mutex); - ret = gen_rtc_ioctl(file, cmd, arg); - mutex_unlock(&gen_rtc_mutex); - - return ret; -} - -/* - * We enforce only one user at a time here with the open/close. - * Also clear the previous interrupt data on an open, and clean - * up things on a close. - */ - -static int gen_rtc_open(struct inode *inode, struct file *file) -{ - mutex_lock(&gen_rtc_mutex); - if (gen_rtc_status & RTC_IS_OPEN) { - mutex_unlock(&gen_rtc_mutex); - return -EBUSY; - } - - gen_rtc_status |= RTC_IS_OPEN; - gen_rtc_irq_data = 0; - irq_active = 0; - mutex_unlock(&gen_rtc_mutex); - - return 0; -} - -static int gen_rtc_release(struct inode *inode, struct file *file) -{ - /* - * Turn off all interrupts once the device is no longer - * in use and clear the data. - */ - - gen_clear_rtc_irq_bit(RTC_PIE|RTC_AIE|RTC_UIE); - - gen_rtc_status &= ~RTC_IS_OPEN; - return 0; -} - - -#ifdef CONFIG_PROC_FS - -/* - * Info exported via "/proc/driver/rtc". - */ - -static int gen_rtc_proc_show(struct seq_file *m, void *v) -{ - struct rtc_time tm; - unsigned int flags; - struct rtc_pll_info pll; - - flags = get_rtc_time(&tm); - - seq_printf(m, - "rtc_time\t: %02d:%02d:%02d\n" - "rtc_date\t: %04d-%02d-%02d\n" - "rtc_epoch\t: %04u\n", - tm.tm_hour, tm.tm_min, tm.tm_sec, - tm.tm_year + 1900, tm.tm_mon + 1, tm.tm_mday, 1900); - - tm.tm_hour = tm.tm_min = tm.tm_sec = 0; - - seq_puts(m, "alarm\t\t: "); - if (tm.tm_hour <= 24) - seq_printf(m, "%02d:", tm.tm_hour); - else - seq_puts(m, "**:"); - - if (tm.tm_min <= 59) - seq_printf(m, "%02d:", tm.tm_min); - else - seq_puts(m, "**:"); - - if (tm.tm_sec <= 59) - seq_printf(m, "%02d\n", tm.tm_sec); - else - seq_puts(m, "**\n"); - - seq_printf(m, - "DST_enable\t: %s\n" - "BCD\t\t: %s\n" - "24hr\t\t: %s\n" - "square_wave\t: %s\n" - "alarm_IRQ\t: %s\n" - "update_IRQ\t: %s\n" - "periodic_IRQ\t: %s\n" - "periodic_freq\t: %ld\n" - "batt_status\t: %s\n", - (flags & RTC_DST_EN) ? "yes" : "no", - (flags & RTC_DM_BINARY) ? "no" : "yes", - (flags & RTC_24H) ? "yes" : "no", - (flags & RTC_SQWE) ? "yes" : "no", - (flags & RTC_AIE) ? "yes" : "no", - irq_active ? "yes" : "no", - (flags & RTC_PIE) ? "yes" : "no", - 0L /* freq */, - (flags & RTC_BATT_BAD) ? "bad" : "okay"); - if (!get_rtc_pll(&pll)) - seq_printf(m, - "PLL adjustment\t: %d\n" - "PLL max +ve adjustment\t: %d\n" - "PLL max -ve adjustment\t: %d\n" - "PLL +ve adjustment factor\t: %d\n" - "PLL -ve adjustment factor\t: %d\n" - "PLL frequency\t: %ld\n", - pll.pll_value, - pll.pll_max, - pll.pll_min, - pll.pll_posmult, - pll.pll_negmult, - pll.pll_clock); - return 0; -} - -static int gen_rtc_proc_open(struct inode *inode, struct file *file) -{ - return single_open(file, gen_rtc_proc_show, NULL); -} - -static const struct file_operations gen_rtc_proc_fops = { - .open = gen_rtc_proc_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - -static int __init gen_rtc_proc_init(void) -{ - struct proc_dir_entry *r; - - r = proc_create("driver/rtc", 0, NULL, &gen_rtc_proc_fops); - if (!r) - return -ENOMEM; - return 0; -} -#else -static inline int gen_rtc_proc_init(void) { return 0; } -#endif /* CONFIG_PROC_FS */ - - -/* - * The various file operations we support. - */ - -static const struct file_operations gen_rtc_fops = { - .owner = THIS_MODULE, -#ifdef CONFIG_GEN_RTC_X - .read = gen_rtc_read, - .poll = gen_rtc_poll, -#endif - .unlocked_ioctl = gen_rtc_unlocked_ioctl, - .open = gen_rtc_open, - .release = gen_rtc_release, - .llseek = noop_llseek, -}; - -static struct miscdevice rtc_gen_dev = -{ - .minor = RTC_MINOR, - .name = "rtc", - .fops = &gen_rtc_fops, -}; - -static int __init rtc_generic_init(void) -{ - int retval; - - printk(KERN_INFO "Generic RTC Driver v%s\n", RTC_VERSION); - - retval = misc_register(&rtc_gen_dev); - if (retval < 0) - return retval; - - retval = gen_rtc_proc_init(); - if (retval) { - misc_deregister(&rtc_gen_dev); - return retval; - } - - return 0; -} - -static void __exit rtc_generic_exit(void) -{ - remove_proc_entry ("driver/rtc", NULL); - misc_deregister(&rtc_gen_dev); -} - - -module_init(rtc_generic_init); -module_exit(rtc_generic_exit); - -MODULE_AUTHOR("Richard Zidlicky"); -MODULE_LICENSE("GPL"); -MODULE_ALIAS_MISCDEV(RTC_MINOR); diff --git a/drivers/clocksource/arm_arch_timer.c b/drivers/clocksource/arm_arch_timer.c index 28bce3f4f81d..57700541f951 100644 --- a/drivers/clocksource/arm_arch_timer.c +++ b/drivers/clocksource/arm_arch_timer.c @@ -8,6 +8,9 @@ * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. */ + +#define pr_fmt(fmt) "arm_arch_timer: " fmt + #include <linux/init.h> #include <linux/kernel.h> #include <linux/device.h> @@ -370,16 +373,33 @@ static bool arch_timer_has_nonsecure_ppi(void) arch_timer_ppi[PHYS_NONSECURE_PPI]); } +static u32 check_ppi_trigger(int irq) +{ + u32 flags = irq_get_trigger_type(irq); + + if (flags != IRQF_TRIGGER_HIGH && flags != IRQF_TRIGGER_LOW) { + pr_warn("WARNING: Invalid trigger for IRQ%d, assuming level low\n", irq); + pr_warn("WARNING: Please fix your firmware\n"); + flags = IRQF_TRIGGER_LOW; + } + + return flags; +} + static int arch_timer_starting_cpu(unsigned int cpu) { struct clock_event_device *clk = this_cpu_ptr(arch_timer_evt); + u32 flags; __arch_timer_setup(ARCH_CP15_TIMER, clk); - enable_percpu_irq(arch_timer_ppi[arch_timer_uses_ppi], 0); + flags = check_ppi_trigger(arch_timer_ppi[arch_timer_uses_ppi]); + enable_percpu_irq(arch_timer_ppi[arch_timer_uses_ppi], flags); - if (arch_timer_has_nonsecure_ppi()) - enable_percpu_irq(arch_timer_ppi[PHYS_NONSECURE_PPI], 0); + if (arch_timer_has_nonsecure_ppi()) { + flags = check_ppi_trigger(arch_timer_ppi[PHYS_NONSECURE_PPI]); + enable_percpu_irq(arch_timer_ppi[PHYS_NONSECURE_PPI], flags); + } arch_counter_set_user_access(); if (evtstrm_enable) diff --git a/drivers/cpufreq/Kconfig b/drivers/cpufreq/Kconfig index c822d72629d5..74919aa81dcb 100644 --- a/drivers/cpufreq/Kconfig +++ b/drivers/cpufreq/Kconfig @@ -32,7 +32,6 @@ config CPU_FREQ_BOOST_SW config CPU_FREQ_STAT bool "CPU frequency transition statistics" - default y help Export CPU frequency statistics information through sysfs. diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 9ec033b4f2d9..be9eade147f2 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -1374,6 +1374,8 @@ MODULE_DEVICE_TABLE(x86cpu, intel_pstate_cpu_ids); static const struct x86_cpu_id intel_pstate_cpu_oob_ids[] __initconst = { ICPU(INTEL_FAM6_BROADWELL_XEON_D, core_params), + ICPU(INTEL_FAM6_BROADWELL_X, core_params), + ICPU(INTEL_FAM6_SKYLAKE_X, core_params), {} }; diff --git a/drivers/cpufreq/powernv-cpufreq.c b/drivers/cpufreq/powernv-cpufreq.c index 87796e0864e9..d3ffde806629 100644 --- a/drivers/cpufreq/powernv-cpufreq.c +++ b/drivers/cpufreq/powernv-cpufreq.c @@ -145,11 +145,30 @@ static struct powernv_pstate_info { /* Use following macros for conversions between pstate_id and index */ static inline int idx_to_pstate(unsigned int i) { + if (unlikely(i >= powernv_pstate_info.nr_pstates)) { + pr_warn_once("index %u is out of bound\n", i); + return powernv_freqs[powernv_pstate_info.nominal].driver_data; + } + return powernv_freqs[i].driver_data; } static inline unsigned int pstate_to_idx(int pstate) { + int min = powernv_freqs[powernv_pstate_info.min].driver_data; + int max = powernv_freqs[powernv_pstate_info.max].driver_data; + + if (min > 0) { + if (unlikely((pstate < max) || (pstate > min))) { + pr_warn_once("pstate %d is out of bound\n", pstate); + return powernv_pstate_info.nominal; + } + } else { + if (unlikely((pstate > max) || (pstate < min))) { + pr_warn_once("pstate %d is out of bound\n", pstate); + return powernv_pstate_info.nominal; + } + } /* * abs() is deliberately used so that is works with * both monotonically increasing and decreasing @@ -593,7 +612,7 @@ void gpstate_timer_handler(unsigned long data) } else { gpstate_idx = calc_global_pstate(gpstates->elapsed_time, gpstates->highest_lpstate_idx, - freq_data.pstate_id); + gpstates->last_lpstate_idx); } /* diff --git a/drivers/crypto/caam/caamalg.c b/drivers/crypto/caam/caamalg.c index ea8189f4b021..6dc597126b79 100644 --- a/drivers/crypto/caam/caamalg.c +++ b/drivers/crypto/caam/caamalg.c @@ -441,6 +441,9 @@ static int aead_set_sh_desc(struct crypto_aead *aead) OP_ALG_AAI_CTR_MOD128); const bool is_rfc3686 = alg->caam.rfc3686; + if (!ctx->authsize) + return 0; + /* NULL encryption / decryption */ if (!ctx->enckeylen) return aead_null_set_sh_desc(aead); @@ -614,7 +617,7 @@ skip_enc: keys_fit_inline = true; /* aead_givencrypt shared descriptor */ - desc = ctx->sh_desc_givenc; + desc = ctx->sh_desc_enc; /* Note: Context registers are saved. */ init_sh_desc_key_aead(desc, ctx, keys_fit_inline, is_rfc3686); @@ -645,13 +648,13 @@ copy_iv: append_operation(desc, ctx->class2_alg_type | OP_ALG_AS_INITFINAL | OP_ALG_ENCRYPT); - /* ivsize + cryptlen = seqoutlen - authsize */ - append_math_sub_imm_u32(desc, REG3, SEQOUTLEN, IMM, ctx->authsize); - /* Read and write assoclen bytes */ append_math_add(desc, VARSEQINLEN, ZERO, REG3, CAAM_CMD_SZ); append_math_add(desc, VARSEQOUTLEN, ZERO, REG3, CAAM_CMD_SZ); + /* ivsize + cryptlen = seqoutlen - authsize */ + append_math_sub_imm_u32(desc, REG3, SEQOUTLEN, IMM, ctx->authsize); + /* Skip assoc data */ append_seq_fifo_store(desc, 0, FIFOST_TYPE_SKIP | FIFOLDST_VLF); @@ -697,7 +700,7 @@ copy_iv: ctx->sh_desc_enc_dma = dma_map_single(jrdev, desc, desc_bytes(desc), DMA_TO_DEVICE); - if (dma_mapping_error(jrdev, ctx->sh_desc_givenc_dma)) { + if (dma_mapping_error(jrdev, ctx->sh_desc_enc_dma)) { dev_err(jrdev, "unable to map shared descriptor\n"); return -ENOMEM; } diff --git a/drivers/crypto/caam/caamhash.c b/drivers/crypto/caam/caamhash.c index f1ecc8df8d41..36365b3efdfd 100644 --- a/drivers/crypto/caam/caamhash.c +++ b/drivers/crypto/caam/caamhash.c @@ -1898,6 +1898,7 @@ caam_hash_alloc(struct caam_hash_template *template, template->name); snprintf(alg->cra_driver_name, CRYPTO_MAX_ALG_NAME, "%s", template->driver_name); + t_alg->ahash_alg.setkey = NULL; } alg->cra_module = THIS_MODULE; alg->cra_init = caam_hash_cra_init; diff --git a/drivers/edac/sb_edac.c b/drivers/edac/sb_edac.c index 4fb2eb7c800d..ce0067b7a2f6 100644 --- a/drivers/edac/sb_edac.c +++ b/drivers/edac/sb_edac.c @@ -552,9 +552,9 @@ static const struct pci_id_table pci_dev_descr_haswell_table[] = { /* Knight's Landing Support */ /* * KNL's memory channels are swizzled between memory controllers. - * MC0 is mapped to CH3,5,6 and MC1 is mapped to CH0,1,2 + * MC0 is mapped to CH3,4,5 and MC1 is mapped to CH0,1,2 */ -#define knl_channel_remap(channel) ((channel + 3) % 6) +#define knl_channel_remap(mc, chan) ((mc) ? (chan) : (chan) + 3) /* Memory controller, TAD tables, error injection - 2-8-0, 2-9-0 (2 of these) */ #define PCI_DEVICE_ID_INTEL_KNL_IMC_MC 0x7840 @@ -1286,7 +1286,7 @@ static u32 knl_get_mc_route(int entry, u32 reg) mc = GET_BITFIELD(reg, entry*3, (entry*3)+2); chan = GET_BITFIELD(reg, (entry*2) + 18, (entry*2) + 18 + 1); - return knl_channel_remap(mc*3 + chan); + return knl_channel_remap(mc, chan); } /* @@ -2997,8 +2997,15 @@ static void sbridge_mce_output_error(struct mem_ctl_info *mci, } else { char A = *("A"); - channel = knl_channel_remap(channel); + /* + * Reported channel is in range 0-2, so we can't map it + * back to mc. To figure out mc we check machine check + * bank register that reported this error. + * bank15 means mc0 and bank16 means mc1. + */ + channel = knl_channel_remap(m->bank == 16, channel); channel_mask = 1 << channel; + snprintf(msg, sizeof(msg), "%s%s err_code:%04x:%04x channel:%d (DIMM_%c)", overflow ? " OVERFLOW" : "", diff --git a/drivers/firmware/broadcom/bcm47xx_sprom.c b/drivers/firmware/broadcom/bcm47xx_sprom.c index b6eb875d4af3..62aa3cf09b4d 100644 --- a/drivers/firmware/broadcom/bcm47xx_sprom.c +++ b/drivers/firmware/broadcom/bcm47xx_sprom.c @@ -669,7 +669,7 @@ static int bcm47xx_get_sprom_bcma(struct bcma_bus *bus, struct ssb_sprom *out) case BCMA_HOSTTYPE_PCI: memset(out, 0, sizeof(struct ssb_sprom)); /* On BCM47XX all PCI buses share the same domain */ - if (config_enabled(CONFIG_BCM47XX)) + if (IS_ENABLED(CONFIG_BCM47XX)) snprintf(buf, sizeof(buf), "pci/%u/%u/", bus->host_pci->bus->number + 1, PCI_SLOT(bus->host_pci->devfn)); diff --git a/drivers/firmware/efi/capsule-loader.c b/drivers/firmware/efi/capsule-loader.c index c99c24bc79b0..9ae6c116c474 100644 --- a/drivers/firmware/efi/capsule-loader.c +++ b/drivers/firmware/efi/capsule-loader.c @@ -16,6 +16,7 @@ #include <linux/slab.h> #include <linux/mutex.h> #include <linux/efi.h> +#include <linux/vmalloc.h> #define NO_FURTHER_WRITE_ACTION -1 @@ -108,14 +109,15 @@ static ssize_t efi_capsule_submit_update(struct capsule_info *cap_info) int ret; void *cap_hdr_temp; - cap_hdr_temp = kmap(cap_info->pages[0]); + cap_hdr_temp = vmap(cap_info->pages, cap_info->index, + VM_MAP, PAGE_KERNEL); if (!cap_hdr_temp) { - pr_debug("%s: kmap() failed\n", __func__); + pr_debug("%s: vmap() failed\n", __func__); return -EFAULT; } ret = efi_capsule_update(cap_hdr_temp, cap_info->pages); - kunmap(cap_info->pages[0]); + vunmap(cap_hdr_temp); if (ret) { pr_err("%s: efi_capsule_update() failed\n", __func__); return ret; diff --git a/drivers/firmware/efi/capsule.c b/drivers/firmware/efi/capsule.c index 53b9fd2293ee..6eedff45e6d7 100644 --- a/drivers/firmware/efi/capsule.c +++ b/drivers/firmware/efi/capsule.c @@ -190,9 +190,9 @@ efi_capsule_update_locked(efi_capsule_header_t *capsule, * map the capsule described by @capsule with its data in @pages and * send it to the firmware via the UpdateCapsule() runtime service. * - * @capsule must be a virtual mapping of the first page in @pages - * (@pages[0]) in the kernel address space. That is, a - * capsule_header_t that describes the entire contents of the capsule + * @capsule must be a virtual mapping of the complete capsule update in the + * kernel address space, as the capsule can be consumed immediately. + * A capsule_header_t that describes the entire contents of the capsule * must be at the start of the first data page. * * Even though this function will validate that the firmware supports diff --git a/drivers/fpga/Kconfig b/drivers/fpga/Kconfig index c9b9fdf6cfbb..d61410299ec0 100644 --- a/drivers/fpga/Kconfig +++ b/drivers/fpga/Kconfig @@ -21,6 +21,7 @@ config FPGA_MGR_SOCFPGA config FPGA_MGR_ZYNQ_FPGA tristate "Xilinx Zynq FPGA" + depends on HAS_DMA help FPGA manager driver support for Xilinx Zynq FPGAs. diff --git a/drivers/gpu/drm/Makefile b/drivers/gpu/drm/Makefile index e3dba6f44a79..0238bf8bc8c3 100644 --- a/drivers/gpu/drm/Makefile +++ b/drivers/gpu/drm/Makefile @@ -24,7 +24,7 @@ drm-$(CONFIG_AGP) += drm_agpsupport.o drm_kms_helper-y := drm_crtc_helper.o drm_dp_helper.o drm_probe_helper.o \ drm_plane_helper.o drm_dp_mst_topology.o drm_atomic_helper.o \ drm_kms_helper_common.o drm_dp_dual_mode_helper.o \ - drm_simple_kms_helper.o + drm_simple_kms_helper.o drm_blend.o drm_kms_helper-$(CONFIG_DRM_LOAD_EDID_FIRMWARE) += drm_edid_load.o drm_kms_helper-$(CONFIG_DRM_FBDEV_EMULATION) += drm_fb_helper.o diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c index ff63b88b0ffa..5cc7052e391d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c @@ -305,7 +305,7 @@ static ssize_t amdgpu_get_pp_table(struct device *dev, struct drm_device *ddev = dev_get_drvdata(dev); struct amdgpu_device *adev = ddev->dev_private; char *table = NULL; - int size, i; + int size; if (adev->pp_enabled) size = amdgpu_dpm_get_pp_table(adev, &table); @@ -315,10 +315,7 @@ static ssize_t amdgpu_get_pp_table(struct device *dev, if (size >= PAGE_SIZE) size = PAGE_SIZE - 1; - for (i = 0; i < size; i++) { - sprintf(buf + i, "%02x", table[i]); - } - sprintf(buf + i, "\n"); + memcpy(buf, table, size); return size; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index b7742e62972a..9b61c8ba7aaf 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -335,7 +335,7 @@ static int amdgpu_move_vram_ram(struct ttm_buffer_object *bo, if (unlikely(r)) { goto out_cleanup; } - r = ttm_bo_move_ttm(bo, true, no_wait_gpu, new_mem); + r = ttm_bo_move_ttm(bo, true, interruptible, no_wait_gpu, new_mem); out_cleanup: ttm_bo_mem_put(bo, &tmp_mem); return r; @@ -368,7 +368,7 @@ static int amdgpu_move_ram_vram(struct ttm_buffer_object *bo, if (unlikely(r)) { return r; } - r = ttm_bo_move_ttm(bo, true, no_wait_gpu, &tmp_mem); + r = ttm_bo_move_ttm(bo, true, interruptible, no_wait_gpu, &tmp_mem); if (unlikely(r)) { goto out_cleanup; } diff --git a/drivers/gpu/drm/amd/amdgpu/ci_dpm.c b/drivers/gpu/drm/amd/amdgpu/ci_dpm.c index e2f0e5d58d5c..a5c94b482459 100644 --- a/drivers/gpu/drm/amd/amdgpu/ci_dpm.c +++ b/drivers/gpu/drm/amd/amdgpu/ci_dpm.c @@ -5779,6 +5779,7 @@ static int ci_dpm_init_microcode(struct amdgpu_device *adev) break; case CHIP_KAVERI: case CHIP_KABINI: + case CHIP_MULLINS: default: BUG(); } diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index bff8668e9e6d..b8184617ca25 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -270,7 +270,8 @@ static const u32 tonga_mgcg_cgcg_init[] = static const u32 golden_settings_polaris11_a11[] = { - mmCB_HW_CONTROL, 0xfffdf3cf, 0x00006208, + mmCB_HW_CONTROL, 0x0000f3cf, 0x00007208, + mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000, mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040, mmDB_DEBUG2, 0xf00fffff, 0x00000400, mmPA_SC_ENHANCE, 0xffffffff, 0x20000001, @@ -279,7 +280,7 @@ static const u32 golden_settings_polaris11_a11[] = mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000, mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c, mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c, - mmSQ_CONFIG, 0x07f80000, 0x07180000, + mmSQ_CONFIG, 0x07f80000, 0x01180000, mmTA_CNTL_AUX, 0x000f000f, 0x000b0000, mmTCC_CTRL, 0x00100000, 0xf31fff7f, mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f3, @@ -301,8 +302,8 @@ static const u32 polaris11_golden_common_all[] = static const u32 golden_settings_polaris10_a11[] = { mmATC_MISC_CG, 0x000c0fc0, 0x000c0200, - mmCB_HW_CONTROL, 0xfffdf3cf, 0x00007208, - mmCB_HW_CONTROL_2, 0, 0x0f000000, + mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208, + mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000, mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040, mmDB_DEBUG2, 0xf00fffff, 0x00000400, mmPA_SC_ENHANCE, 0xffffffff, 0x20000001, @@ -409,6 +410,7 @@ static const u32 golden_settings_iceland_a11[] = mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000, mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x00000002, mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000, + mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c, mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd, mmTA_CNTL_AUX, 0x000f000f, 0x000b0000, mmTCC_CTRL, 0x00100000, 0xf31fff7f, @@ -505,8 +507,10 @@ static const u32 cz_golden_settings_a11[] = mmGB_GPU_ID, 0x0000000f, 0x00000000, mmPA_SC_ENHANCE, 0xffffffff, 0x00000001, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000, + mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c, mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd, mmTA_CNTL_AUX, 0x000f000f, 0x00010000, + mmTCC_CTRL, 0x00100000, 0xf31fff7f, mmTCC_EXE_DISABLE, 0x00000002, 0x00000002, mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f3, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00001302 diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c index d24a82bd0c7a..0b0f08641eed 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c @@ -144,6 +144,7 @@ static int gmc_v7_0_init_microcode(struct amdgpu_device *adev) break; case CHIP_KAVERI: case CHIP_KABINI: + case CHIP_MULLINS: return 0; default: BUG(); } diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c index 717359d3ba8c..2aee2c6f3cd5 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c @@ -103,6 +103,11 @@ static const u32 stoney_mgcg_cgcg_init[] = mmMC_MEM_POWER_LS, 0xffffffff, 0x00000104 }; +static const u32 golden_settings_stoney_common[] = +{ + mmMC_HUB_RDREQ_UVD, MC_HUB_RDREQ_UVD__PRESCALE_MASK, 0x00000004, + mmMC_RD_GRP_OTH, MC_RD_GRP_OTH__UVD_MASK, 0x00600000 +}; static void gmc_v8_0_init_golden_registers(struct amdgpu_device *adev) { @@ -142,6 +147,9 @@ static void gmc_v8_0_init_golden_registers(struct amdgpu_device *adev) amdgpu_program_register_sequence(adev, stoney_mgcg_cgcg_init, (const u32)ARRAY_SIZE(stoney_mgcg_cgcg_init)); + amdgpu_program_register_sequence(adev, + golden_settings_stoney_common, + (const u32)ARRAY_SIZE(golden_settings_stoney_common)); break; default: break; diff --git a/drivers/gpu/drm/cirrus/cirrus_main.c b/drivers/gpu/drm/cirrus/cirrus_main.c index 80446e2d3ab6..76bcb43e7c06 100644 --- a/drivers/gpu/drm/cirrus/cirrus_main.c +++ b/drivers/gpu/drm/cirrus/cirrus_main.c @@ -185,14 +185,23 @@ int cirrus_driver_load(struct drm_device *dev, unsigned long flags) goto out; } + /* + * cirrus_modeset_init() is initializing/registering the emulated fbdev + * and DRM internals can access/test some of the fields in + * mode_config->funcs as part of the fbdev registration process. + * Make sure dev->mode_config.funcs is properly set to avoid + * dereferencing a NULL pointer. + * FIXME: mode_config.funcs assignment should probably be done in + * cirrus_modeset_init() (that's a common pattern seen in other DRM + * drivers). + */ + dev->mode_config.funcs = &cirrus_mode_funcs; r = cirrus_modeset_init(cdev); if (r) { dev_err(&dev->pdev->dev, "Fatal error during modeset init: %d\n", r); goto out; } - dev->mode_config.funcs = (void *)&cirrus_mode_funcs; - return 0; out: cirrus_driver_unload(dev); diff --git a/drivers/gpu/drm/drm_atomic.c b/drivers/gpu/drm/drm_atomic.c index 8d2f111fa113..fa3930757972 100644 --- a/drivers/gpu/drm/drm_atomic.c +++ b/drivers/gpu/drm/drm_atomic.c @@ -711,6 +711,8 @@ int drm_atomic_plane_set_property(struct drm_plane *plane, state->src_h = val; } else if (property == config->rotation_property) { state->rotation = val; + } else if (property == plane->zpos_property) { + state->zpos = val; } else if (plane->funcs->atomic_set_property) { return plane->funcs->atomic_set_property(plane, state, property, val); @@ -767,6 +769,8 @@ drm_atomic_plane_get_property(struct drm_plane *plane, *val = state->src_h; } else if (property == config->rotation_property) { *val = state->rotation; + } else if (property == plane->zpos_property) { + *val = state->zpos; } else if (plane->funcs->atomic_get_property) { return plane->funcs->atomic_get_property(plane, state, property, val); } else { diff --git a/drivers/gpu/drm/drm_atomic_helper.c b/drivers/gpu/drm/drm_atomic_helper.c index de7fddce3cef..20be86d89a20 100644 --- a/drivers/gpu/drm/drm_atomic_helper.c +++ b/drivers/gpu/drm/drm_atomic_helper.c @@ -32,6 +32,8 @@ #include <drm/drm_atomic_helper.h> #include <linux/fence.h> +#include "drm_crtc_internal.h" + /** * DOC: overview * @@ -592,6 +594,10 @@ drm_atomic_helper_check_planes(struct drm_device *dev, struct drm_plane_state *plane_state; int i, ret = 0; + ret = drm_atomic_helper_normalize_zpos(dev, state); + if (ret) + return ret; + for_each_plane_in_state(state, plane, plane_state, i) { const struct drm_plane_helper_funcs *funcs; @@ -2955,6 +2961,7 @@ void __drm_atomic_helper_crtc_duplicate_state(struct drm_crtc *crtc, state->planes_changed = false; state->connectors_changed = false; state->color_mgmt_changed = false; + state->zpos_changed = false; state->event = NULL; } EXPORT_SYMBOL(__drm_atomic_helper_crtc_duplicate_state); diff --git a/drivers/gpu/drm/drm_blend.c b/drivers/gpu/drm/drm_blend.c new file mode 100644 index 000000000000..f3c0942bd756 --- /dev/null +++ b/drivers/gpu/drm/drm_blend.c @@ -0,0 +1,238 @@ +/* + * Copyright (C) 2016 Samsung Electronics Co.Ltd + * Authors: + * Marek Szyprowski <m.szyprowski@samsung.com> + * + * DRM core plane blending related functions + * + * Permission to use, copy, modify, distribute, and sell this software and its + * documentation for any purpose is hereby granted without fee, provided that + * the above copyright notice appear in all copies and that both that copyright + * notice and this permission notice appear in supporting documentation, and + * that the name of the copyright holders not be used in advertising or + * publicity pertaining to distribution of the software without specific, + * written prior permission. The copyright holders make no representations + * about the suitability of this software for any purpose. It is provided "as + * is" without express or implied warranty. + * + * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, + * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO + * EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY SPECIAL, INDIRECT OR + * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, + * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER + * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE + * OF THIS SOFTWARE. + */ +#include <drm/drmP.h> +#include <drm/drm_atomic.h> +#include <drm/drm_crtc.h> +#include <linux/export.h> +#include <linux/slab.h> +#include <linux/sort.h> + +#include "drm_internal.h" + +/** + * drm_plane_create_zpos_property - create mutable zpos property + * @plane: drm plane + * @zpos: initial value of zpos property + * @min: minimal possible value of zpos property + * @max: maximal possible value of zpos property + * + * This function initializes generic mutable zpos property and enables support + * for it in drm core. Drivers can then attach this property to planes to enable + * support for configurable planes arrangement during blending operation. + * Once mutable zpos property has been enabled, the DRM core will automatically + * calculate drm_plane_state->normalized_zpos values. Usually min should be set + * to 0 and max to maximal number of planes for given crtc - 1. + * + * If zpos of some planes cannot be changed (like fixed background or + * cursor/topmost planes), driver should adjust min/max values and assign those + * planes immutable zpos property with lower or higher values (for more + * information, see drm_mode_create_zpos_immutable_property() function). In such + * case driver should also assign proper initial zpos values for all planes in + * its plane_reset() callback, so the planes will be always sorted properly. + * + * Returns: + * Zero on success, negative errno on failure. + */ +int drm_plane_create_zpos_property(struct drm_plane *plane, + unsigned int zpos, + unsigned int min, unsigned int max) +{ + struct drm_property *prop; + + prop = drm_property_create_range(plane->dev, 0, "zpos", min, max); + if (!prop) + return -ENOMEM; + + drm_object_attach_property(&plane->base, prop, zpos); + + plane->zpos_property = prop; + + if (plane->state) { + plane->state->zpos = zpos; + plane->state->normalized_zpos = zpos; + } + + return 0; +} +EXPORT_SYMBOL(drm_plane_create_zpos_property); + +/** + * drm_plane_create_zpos_immutable_property - create immuttable zpos property + * @plane: drm plane + * @zpos: value of zpos property + * + * This function initializes generic immutable zpos property and enables + * support for it in drm core. Using this property driver lets userspace + * to get the arrangement of the planes for blending operation and notifies + * it that the hardware (or driver) doesn't support changing of the planes' + * order. + * + * Returns: + * Zero on success, negative errno on failure. + */ +int drm_plane_create_zpos_immutable_property(struct drm_plane *plane, + unsigned int zpos) +{ + struct drm_property *prop; + + prop = drm_property_create_range(plane->dev, DRM_MODE_PROP_IMMUTABLE, + "zpos", zpos, zpos); + if (!prop) + return -ENOMEM; + + drm_object_attach_property(&plane->base, prop, zpos); + + plane->zpos_property = prop; + + if (plane->state) { + plane->state->zpos = zpos; + plane->state->normalized_zpos = zpos; + } + + return 0; +} +EXPORT_SYMBOL(drm_plane_create_zpos_immutable_property); + +static int drm_atomic_state_zpos_cmp(const void *a, const void *b) +{ + const struct drm_plane_state *sa = *(struct drm_plane_state **)a; + const struct drm_plane_state *sb = *(struct drm_plane_state **)b; + + if (sa->zpos != sb->zpos) + return sa->zpos - sb->zpos; + else + return sa->plane->base.id - sb->plane->base.id; +} + +/** + * drm_atomic_helper_crtc_normalize_zpos - calculate normalized zpos values + * @crtc: crtc with planes, which have to be considered for normalization + * @crtc_state: new atomic state to apply + * + * This function checks new states of all planes assigned to given crtc and + * calculates normalized zpos value for them. Planes are compared first by their + * zpos values, then by plane id (if zpos equals). Plane with lowest zpos value + * is at the bottom. The plane_state->normalized_zpos is then filled with unique + * values from 0 to number of active planes in crtc minus one. + * + * RETURNS + * Zero for success or -errno + */ +static int drm_atomic_helper_crtc_normalize_zpos(struct drm_crtc *crtc, + struct drm_crtc_state *crtc_state) +{ + struct drm_atomic_state *state = crtc_state->state; + struct drm_device *dev = crtc->dev; + int total_planes = dev->mode_config.num_total_plane; + struct drm_plane_state **states; + struct drm_plane *plane; + int i, n = 0; + int ret = 0; + + DRM_DEBUG_ATOMIC("[CRTC:%d:%s] calculating normalized zpos values\n", + crtc->base.id, crtc->name); + + states = kmalloc_array(total_planes, sizeof(*states), GFP_TEMPORARY); + if (!states) + return -ENOMEM; + + /* + * Normalization process might create new states for planes which + * normalized_zpos has to be recalculated. + */ + drm_for_each_plane_mask(plane, dev, crtc_state->plane_mask) { + struct drm_plane_state *plane_state = + drm_atomic_get_plane_state(state, plane); + if (IS_ERR(plane_state)) { + ret = PTR_ERR(plane_state); + goto done; + } + states[n++] = plane_state; + DRM_DEBUG_ATOMIC("[PLANE:%d:%s] processing zpos value %d\n", + plane->base.id, plane->name, + plane_state->zpos); + } + + sort(states, n, sizeof(*states), drm_atomic_state_zpos_cmp, NULL); + + for (i = 0; i < n; i++) { + plane = states[i]->plane; + + states[i]->normalized_zpos = i; + DRM_DEBUG_ATOMIC("[PLANE:%d:%s] normalized zpos value %d\n", + plane->base.id, plane->name, i); + } + crtc_state->zpos_changed = true; + +done: + kfree(states); + return ret; +} + +/** + * drm_atomic_helper_normalize_zpos - calculate normalized zpos values for all + * crtcs + * @dev: DRM device + * @state: atomic state of DRM device + * + * This function calculates normalized zpos value for all modified planes in + * the provided atomic state of DRM device. For more information, see + * drm_atomic_helper_crtc_normalize_zpos() function. + * + * RETURNS + * Zero for success or -errno + */ +int drm_atomic_helper_normalize_zpos(struct drm_device *dev, + struct drm_atomic_state *state) +{ + struct drm_crtc *crtc; + struct drm_crtc_state *crtc_state; + struct drm_plane *plane; + struct drm_plane_state *plane_state; + int i, ret = 0; + + for_each_plane_in_state(state, plane, plane_state, i) { + crtc = plane_state->crtc; + if (!crtc) + continue; + if (plane->state->zpos != plane_state->zpos) { + crtc_state = + drm_atomic_get_existing_crtc_state(state, crtc); + crtc_state->zpos_changed = true; + } + } + + for_each_crtc_in_state(state, crtc, crtc_state, i) { + if (crtc_state->plane_mask != crtc->state->plane_mask || + crtc_state->zpos_changed) { + ret = drm_atomic_helper_crtc_normalize_zpos(crtc, + crtc_state); + if (ret) + return ret; + } + } + return 0; +} diff --git a/drivers/gpu/drm/drm_crtc.c b/drivers/gpu/drm/drm_crtc.c index f1d9f0569d7f..b1dbb60af99f 100644 --- a/drivers/gpu/drm/drm_crtc.c +++ b/drivers/gpu/drm/drm_crtc.c @@ -1121,16 +1121,14 @@ static int drm_connector_register_all(struct drm_device *dev) struct drm_connector *connector; int ret; - mutex_lock(&dev->mode_config.mutex); - - drm_for_each_connector(connector, dev) { + /* FIXME: taking the mode config mutex ends up in a clash with + * fbcon/backlight registration */ + list_for_each_entry(connector, &dev->mode_config.connector_list, head) { ret = drm_connector_register(connector); if (ret) goto err; } - mutex_unlock(&dev->mode_config.mutex); - return 0; err: diff --git a/drivers/gpu/drm/drm_crtc_internal.h b/drivers/gpu/drm/drm_crtc_internal.h index 47a500b90fd7..0c34e6d906d1 100644 --- a/drivers/gpu/drm/drm_crtc_internal.h +++ b/drivers/gpu/drm/drm_crtc_internal.h @@ -128,3 +128,7 @@ int drm_mode_atomic_ioctl(struct drm_device *dev, int drm_modeset_register_all(struct drm_device *dev); void drm_modeset_unregister_all(struct drm_device *dev); + +/* drm_blend.c */ +int drm_atomic_helper_normalize_zpos(struct drm_device *dev, + struct drm_atomic_state *state); diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c index 7df26d4b7ad8..637a0aa4d3a0 100644 --- a/drivers/gpu/drm/drm_edid.c +++ b/drivers/gpu/drm/drm_edid.c @@ -74,6 +74,8 @@ #define EDID_QUIRK_FORCE_8BPC (1 << 8) /* Force 12bpc */ #define EDID_QUIRK_FORCE_12BPC (1 << 9) +/* Force 6bpc */ +#define EDID_QUIRK_FORCE_6BPC (1 << 10) struct detailed_mode_closure { struct drm_connector *connector; @@ -100,6 +102,9 @@ static struct edid_quirk { /* Unknown Acer */ { "ACR", 2423, EDID_QUIRK_FIRST_DETAILED_PREFERRED }, + /* AEO model 0 reports 8 bpc, but is a 6 bpc panel */ + { "AEO", 0, EDID_QUIRK_FORCE_6BPC }, + /* Belinea 10 15 55 */ { "MAX", 1516, EDID_QUIRK_PREFER_LARGE_60 }, { "MAX", 0x77e, EDID_QUIRK_PREFER_LARGE_60 }, @@ -3862,6 +3867,20 @@ static void drm_add_display_info(struct edid *edid, /* HDMI deep color modes supported? Assign to info, if so */ drm_assign_hdmi_deep_color_info(edid, info, connector); + /* + * Digital sink with "DFP 1.x compliant TMDS" according to EDID 1.3? + * + * For such displays, the DFP spec 1.0, section 3.10 "EDID support" + * tells us to assume 8 bpc color depth if the EDID doesn't have + * extensions which tell otherwise. + */ + if ((info->bpc == 0) && (edid->revision < 4) && + (edid->input & DRM_EDID_DIGITAL_TYPE_DVI)) { + info->bpc = 8; + DRM_DEBUG("%s: Assigning DFP sink color depth as %d bpc.\n", + connector->name, info->bpc); + } + /* Only defined for 1.4 with digital displays */ if (edid->revision < 4) return; @@ -4082,6 +4101,9 @@ int drm_add_edid_modes(struct drm_connector *connector, struct edid *edid) drm_add_display_info(edid, &connector->display_info, connector); + if (quirks & EDID_QUIRK_FORCE_6BPC) + connector->display_info.bpc = 6; + if (quirks & EDID_QUIRK_FORCE_8BPC) connector->display_info.bpc = 8; diff --git a/drivers/gpu/drm/exynos/exynos_drm_drv.h b/drivers/gpu/drm/exynos/exynos_drm_drv.h index b39d521f093d..7f1a49d5bdbe 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_drv.h +++ b/drivers/gpu/drm/exynos/exynos_drm_drv.h @@ -64,7 +64,6 @@ struct exynos_drm_plane_state { struct exynos_drm_rect src; unsigned int h_ratio; unsigned int v_ratio; - unsigned int zpos; }; static inline struct exynos_drm_plane_state * @@ -221,7 +220,6 @@ struct exynos_drm_private { * this array is used to be aware of which crtc did it request vblank. */ struct drm_crtc *crtc[MAX_CRTC]; - struct drm_property *plane_zpos_property; struct device *dma_dev; void *mapping; diff --git a/drivers/gpu/drm/exynos/exynos_drm_fbdev.c b/drivers/gpu/drm/exynos/exynos_drm_fbdev.c index fb49443bfd32..4cfb39d543b4 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_fbdev.c +++ b/drivers/gpu/drm/exynos/exynos_drm_fbdev.c @@ -52,7 +52,7 @@ static int exynos_drm_fb_mmap(struct fb_info *info, ret = dma_mmap_attrs(to_dma_dev(helper->dev), vma, exynos_gem->cookie, exynos_gem->dma_addr, exynos_gem->size, - &exynos_gem->dma_attrs); + exynos_gem->dma_attrs); if (ret < 0) { DRM_ERROR("failed to mmap.\n"); return ret; diff --git a/drivers/gpu/drm/exynos/exynos_drm_g2d.c b/drivers/gpu/drm/exynos/exynos_drm_g2d.c index 8564c3da0d22..4bf00f57ffe8 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_g2d.c +++ b/drivers/gpu/drm/exynos/exynos_drm_g2d.c @@ -17,7 +17,6 @@ #include <linux/slab.h> #include <linux/workqueue.h> #include <linux/dma-mapping.h> -#include <linux/dma-attrs.h> #include <linux/of.h> #include <drm/drmP.h> @@ -235,7 +234,7 @@ struct g2d_data { struct mutex cmdlist_mutex; dma_addr_t cmdlist_pool; void *cmdlist_pool_virt; - struct dma_attrs cmdlist_dma_attrs; + unsigned long cmdlist_dma_attrs; /* runqueue*/ struct g2d_runqueue_node *runqueue_node; @@ -256,13 +255,12 @@ static int g2d_init_cmdlist(struct g2d_data *g2d) int ret; struct g2d_buf_info *buf_info; - init_dma_attrs(&g2d->cmdlist_dma_attrs); - dma_set_attr(DMA_ATTR_WRITE_COMBINE, &g2d->cmdlist_dma_attrs); + g2d->cmdlist_dma_attrs = DMA_ATTR_WRITE_COMBINE; g2d->cmdlist_pool_virt = dma_alloc_attrs(to_dma_dev(subdrv->drm_dev), G2D_CMDLIST_POOL_SIZE, &g2d->cmdlist_pool, GFP_KERNEL, - &g2d->cmdlist_dma_attrs); + g2d->cmdlist_dma_attrs); if (!g2d->cmdlist_pool_virt) { dev_err(dev, "failed to allocate dma memory\n"); return -ENOMEM; @@ -295,7 +293,7 @@ static int g2d_init_cmdlist(struct g2d_data *g2d) err: dma_free_attrs(to_dma_dev(subdrv->drm_dev), G2D_CMDLIST_POOL_SIZE, g2d->cmdlist_pool_virt, - g2d->cmdlist_pool, &g2d->cmdlist_dma_attrs); + g2d->cmdlist_pool, g2d->cmdlist_dma_attrs); return ret; } @@ -309,7 +307,7 @@ static void g2d_fini_cmdlist(struct g2d_data *g2d) dma_free_attrs(to_dma_dev(subdrv->drm_dev), G2D_CMDLIST_POOL_SIZE, g2d->cmdlist_pool_virt, - g2d->cmdlist_pool, &g2d->cmdlist_dma_attrs); + g2d->cmdlist_pool, g2d->cmdlist_dma_attrs); } } diff --git a/drivers/gpu/drm/exynos/exynos_drm_gem.c b/drivers/gpu/drm/exynos/exynos_drm_gem.c index cdf9f1af4347..f2ae72ba7d5a 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_gem.c +++ b/drivers/gpu/drm/exynos/exynos_drm_gem.c @@ -24,7 +24,7 @@ static int exynos_drm_alloc_buf(struct exynos_drm_gem *exynos_gem) { struct drm_device *dev = exynos_gem->base.dev; - enum dma_attr attr; + unsigned long attr; unsigned int nr_pages; struct sg_table sgt; int ret = -ENOMEM; @@ -34,7 +34,7 @@ static int exynos_drm_alloc_buf(struct exynos_drm_gem *exynos_gem) return 0; } - init_dma_attrs(&exynos_gem->dma_attrs); + exynos_gem->dma_attrs = 0; /* * if EXYNOS_BO_CONTIG, fully physically contiguous memory @@ -42,7 +42,7 @@ static int exynos_drm_alloc_buf(struct exynos_drm_gem *exynos_gem) * as possible. */ if (!(exynos_gem->flags & EXYNOS_BO_NONCONTIG)) - dma_set_attr(DMA_ATTR_FORCE_CONTIGUOUS, &exynos_gem->dma_attrs); + exynos_gem->dma_attrs |= DMA_ATTR_FORCE_CONTIGUOUS; /* * if EXYNOS_BO_WC or EXYNOS_BO_NONCACHABLE, writecombine mapping @@ -54,8 +54,8 @@ static int exynos_drm_alloc_buf(struct exynos_drm_gem *exynos_gem) else attr = DMA_ATTR_NON_CONSISTENT; - dma_set_attr(attr, &exynos_gem->dma_attrs); - dma_set_attr(DMA_ATTR_NO_KERNEL_MAPPING, &exynos_gem->dma_attrs); + exynos_gem->dma_attrs |= attr; + exynos_gem->dma_attrs |= DMA_ATTR_NO_KERNEL_MAPPING; nr_pages = exynos_gem->size >> PAGE_SHIFT; @@ -67,7 +67,7 @@ static int exynos_drm_alloc_buf(struct exynos_drm_gem *exynos_gem) exynos_gem->cookie = dma_alloc_attrs(to_dma_dev(dev), exynos_gem->size, &exynos_gem->dma_addr, GFP_KERNEL, - &exynos_gem->dma_attrs); + exynos_gem->dma_attrs); if (!exynos_gem->cookie) { DRM_ERROR("failed to allocate buffer.\n"); goto err_free; @@ -75,7 +75,7 @@ static int exynos_drm_alloc_buf(struct exynos_drm_gem *exynos_gem) ret = dma_get_sgtable_attrs(to_dma_dev(dev), &sgt, exynos_gem->cookie, exynos_gem->dma_addr, exynos_gem->size, - &exynos_gem->dma_attrs); + exynos_gem->dma_attrs); if (ret < 0) { DRM_ERROR("failed to get sgtable.\n"); goto err_dma_free; @@ -99,7 +99,7 @@ err_sgt_free: sg_free_table(&sgt); err_dma_free: dma_free_attrs(to_dma_dev(dev), exynos_gem->size, exynos_gem->cookie, - exynos_gem->dma_addr, &exynos_gem->dma_attrs); + exynos_gem->dma_addr, exynos_gem->dma_attrs); err_free: drm_free_large(exynos_gem->pages); @@ -120,7 +120,7 @@ static void exynos_drm_free_buf(struct exynos_drm_gem *exynos_gem) dma_free_attrs(to_dma_dev(dev), exynos_gem->size, exynos_gem->cookie, (dma_addr_t)exynos_gem->dma_addr, - &exynos_gem->dma_attrs); + exynos_gem->dma_attrs); drm_free_large(exynos_gem->pages); } @@ -346,7 +346,7 @@ static int exynos_drm_gem_mmap_buffer(struct exynos_drm_gem *exynos_gem, ret = dma_mmap_attrs(to_dma_dev(drm_dev), vma, exynos_gem->cookie, exynos_gem->dma_addr, exynos_gem->size, - &exynos_gem->dma_attrs); + exynos_gem->dma_attrs); if (ret < 0) { DRM_ERROR("failed to mmap.\n"); return ret; diff --git a/drivers/gpu/drm/exynos/exynos_drm_gem.h b/drivers/gpu/drm/exynos/exynos_drm_gem.h index 78100742281d..df7c543d6558 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_gem.h +++ b/drivers/gpu/drm/exynos/exynos_drm_gem.h @@ -50,7 +50,7 @@ struct exynos_drm_gem { void *cookie; void __iomem *kvaddr; dma_addr_t dma_addr; - struct dma_attrs dma_attrs; + unsigned long dma_attrs; struct page **pages; struct sg_table *sgt; }; diff --git a/drivers/gpu/drm/exynos/exynos_drm_plane.c b/drivers/gpu/drm/exynos/exynos_drm_plane.c index 77f12c00abf9..7f32419b25ea 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_plane.c +++ b/drivers/gpu/drm/exynos/exynos_drm_plane.c @@ -139,9 +139,9 @@ static void exynos_drm_plane_reset(struct drm_plane *plane) exynos_state = kzalloc(sizeof(*exynos_state), GFP_KERNEL); if (exynos_state) { - exynos_state->zpos = exynos_plane->config->zpos; plane->state = &exynos_state->base; plane->state->plane = plane; + plane->state->zpos = exynos_plane->config->zpos; } } @@ -157,7 +157,6 @@ exynos_drm_plane_duplicate_state(struct drm_plane *plane) return NULL; __drm_atomic_helper_plane_duplicate_state(plane, ©->base); - copy->zpos = exynos_state->zpos; return ©->base; } @@ -170,43 +169,6 @@ static void exynos_drm_plane_destroy_state(struct drm_plane *plane, kfree(old_exynos_state); } -static int exynos_drm_plane_atomic_set_property(struct drm_plane *plane, - struct drm_plane_state *state, - struct drm_property *property, - uint64_t val) -{ - struct exynos_drm_plane *exynos_plane = to_exynos_plane(plane); - struct exynos_drm_plane_state *exynos_state = - to_exynos_plane_state(state); - struct exynos_drm_private *dev_priv = plane->dev->dev_private; - const struct exynos_drm_plane_config *config = exynos_plane->config; - - if (property == dev_priv->plane_zpos_property && - (config->capabilities & EXYNOS_DRM_PLANE_CAP_ZPOS)) - exynos_state->zpos = val; - else - return -EINVAL; - - return 0; -} - -static int exynos_drm_plane_atomic_get_property(struct drm_plane *plane, - const struct drm_plane_state *state, - struct drm_property *property, - uint64_t *val) -{ - const struct exynos_drm_plane_state *exynos_state = - container_of(state, const struct exynos_drm_plane_state, base); - struct exynos_drm_private *dev_priv = plane->dev->dev_private; - - if (property == dev_priv->plane_zpos_property) - *val = exynos_state->zpos; - else - return -EINVAL; - - return 0; -} - static struct drm_plane_funcs exynos_plane_funcs = { .update_plane = drm_atomic_helper_update_plane, .disable_plane = drm_atomic_helper_disable_plane, @@ -215,8 +177,6 @@ static struct drm_plane_funcs exynos_plane_funcs = { .reset = exynos_drm_plane_reset, .atomic_duplicate_state = exynos_drm_plane_duplicate_state, .atomic_destroy_state = exynos_drm_plane_destroy_state, - .atomic_set_property = exynos_drm_plane_atomic_set_property, - .atomic_get_property = exynos_drm_plane_atomic_get_property, }; static int @@ -304,23 +264,13 @@ static const struct drm_plane_helper_funcs plane_helper_funcs = { }; static void exynos_plane_attach_zpos_property(struct drm_plane *plane, - unsigned int zpos) + bool immutable) { - struct drm_device *dev = plane->dev; - struct exynos_drm_private *dev_priv = dev->dev_private; - struct drm_property *prop; - - prop = dev_priv->plane_zpos_property; - if (!prop) { - prop = drm_property_create_range(dev, 0, "zpos", - 0, MAX_PLANE - 1); - if (!prop) - return; - - dev_priv->plane_zpos_property = prop; - } - - drm_object_attach_property(&plane->base, prop, zpos); + /* FIXME */ + if (immutable) + drm_plane_create_zpos_immutable_property(plane, 0); + else + drm_plane_create_zpos_property(plane, 0, 0, MAX_PLANE - 1); } int exynos_plane_init(struct drm_device *dev, @@ -346,7 +296,8 @@ int exynos_plane_init(struct drm_device *dev, exynos_plane->index = index; exynos_plane->config = config; - exynos_plane_attach_zpos_property(&exynos_plane->base, config->zpos); + exynos_plane_attach_zpos_property(&exynos_plane->base, + !(config->capabilities & EXYNOS_DRM_PLANE_CAP_ZPOS)); return 0; } diff --git a/drivers/gpu/drm/exynos/exynos_mixer.c b/drivers/gpu/drm/exynos/exynos_mixer.c index 74a4269cc1b0..e1d47f9435fc 100644 --- a/drivers/gpu/drm/exynos/exynos_mixer.c +++ b/drivers/gpu/drm/exynos/exynos_mixer.c @@ -477,6 +477,7 @@ static void vp_video_buffer(struct mixer_context *ctx, struct drm_display_mode *mode = &state->base.crtc->state->adjusted_mode; struct mixer_resources *res = &ctx->mixer_res; struct drm_framebuffer *fb = state->base.fb; + unsigned int priority = state->base.normalized_zpos + 1; unsigned long flags; dma_addr_t luma_addr[2], chroma_addr[2]; bool tiled_mode = false; @@ -561,7 +562,7 @@ static void vp_video_buffer(struct mixer_context *ctx, mixer_cfg_scan(ctx, mode->vdisplay); mixer_cfg_rgb_fmt(ctx, mode->vdisplay); - mixer_cfg_layer(ctx, plane->index, state->zpos + 1, true); + mixer_cfg_layer(ctx, plane->index, priority, true); mixer_cfg_vp_blend(ctx); mixer_run(ctx); @@ -586,6 +587,7 @@ static void mixer_graph_buffer(struct mixer_context *ctx, struct drm_display_mode *mode = &state->base.crtc->state->adjusted_mode; struct mixer_resources *res = &ctx->mixer_res; struct drm_framebuffer *fb = state->base.fb; + unsigned int priority = state->base.normalized_zpos + 1; unsigned long flags; unsigned int win = plane->index; unsigned int x_ratio = 0, y_ratio = 0; @@ -677,7 +679,7 @@ static void mixer_graph_buffer(struct mixer_context *ctx, mixer_cfg_scan(ctx, mode->vdisplay); mixer_cfg_rgb_fmt(ctx, mode->vdisplay); - mixer_cfg_layer(ctx, win, state->zpos + 1, true); + mixer_cfg_layer(ctx, win, priority, true); mixer_cfg_gfx_blend(ctx, win, is_alpha_format(fb->pixel_format)); /* layer update mandatory for mixer 16.0.33.0 */ diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 915a3d0acff3..21f939074abc 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -3651,8 +3651,8 @@ void i915_debugfs_unregister(struct drm_i915_private *dev_priv); int i915_debugfs_connector_add(struct drm_connector *connector); void intel_display_crc_init(struct drm_device *dev); #else -static inline int i915_debugfs_register(struct drm_i915_private *) {return 0;} -static inline void i915_debugfs_unregister(struct drm_i915_private *) {} +static inline int i915_debugfs_register(struct drm_i915_private *dev_priv) {return 0;} +static inline void i915_debugfs_unregister(struct drm_i915_private *dev_priv) {} static inline int i915_debugfs_connector_add(struct drm_connector *connector) { return 0; } static inline void intel_display_crc_init(struct drm_device *dev) {} diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index c457eed76f1f..dcf93b3d4fb6 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -5691,15 +5691,7 @@ static bool skl_cdclk_pcu_ready(struct drm_i915_private *dev_priv) static bool skl_cdclk_wait_for_pcu_ready(struct drm_i915_private *dev_priv) { - unsigned int i; - - for (i = 0; i < 15; i++) { - if (skl_cdclk_pcu_ready(dev_priv)) - return true; - udelay(10); - } - - return false; + return _wait_for(skl_cdclk_pcu_ready(dev_priv), 3000, 10) == 0; } static void skl_set_cdclk(struct drm_i915_private *dev_priv, int cdclk, int vco) @@ -12114,21 +12106,11 @@ connected_sink_compute_bpp(struct intel_connector *connector, pipe_config->pipe_bpp = connector->base.display_info.bpc*3; } - /* Clamp bpp to default limit on screens without EDID 1.4 */ - if (connector->base.display_info.bpc == 0) { - int type = connector->base.connector_type; - int clamp_bpp = 24; - - /* Fall back to 18 bpp when DP sink capability is unknown. */ - if (type == DRM_MODE_CONNECTOR_DisplayPort || - type == DRM_MODE_CONNECTOR_eDP) - clamp_bpp = 18; - - if (bpp > clamp_bpp) { - DRM_DEBUG_KMS("clamping display bpp (was %d) to default limit of %d\n", - bpp, clamp_bpp); - pipe_config->pipe_bpp = clamp_bpp; - } + /* Clamp bpp to 8 on screens without EDID 1.4 */ + if (connector->base.display_info.bpc == 0 && bpp > 24) { + DRM_DEBUG_KMS("clamping display bpp (was %d) to default limit of 24\n", + bpp); + pipe_config->pipe_bpp = 24; } } diff --git a/drivers/gpu/drm/i915/intel_fbdev.c b/drivers/gpu/drm/i915/intel_fbdev.c index 86b00c6db1a6..3e3632c18733 100644 --- a/drivers/gpu/drm/i915/intel_fbdev.c +++ b/drivers/gpu/drm/i915/intel_fbdev.c @@ -782,7 +782,7 @@ void intel_fbdev_set_suspend(struct drm_device *dev, int state, bool synchronous struct intel_fbdev *ifbdev = dev_priv->fbdev; struct fb_info *info; - if (!ifbdev) + if (!ifbdev || !ifbdev->fb) return; info = ifbdev->helper.fbdev; @@ -827,31 +827,28 @@ void intel_fbdev_set_suspend(struct drm_device *dev, int state, bool synchronous void intel_fbdev_output_poll_changed(struct drm_device *dev) { - struct drm_i915_private *dev_priv = to_i915(dev); - if (dev_priv->fbdev) - drm_fb_helper_hotplug_event(&dev_priv->fbdev->helper); + struct intel_fbdev *ifbdev = to_i915(dev)->fbdev; + + if (ifbdev && ifbdev->fb) + drm_fb_helper_hotplug_event(&ifbdev->helper); } void intel_fbdev_restore_mode(struct drm_device *dev) { - int ret; - struct drm_i915_private *dev_priv = to_i915(dev); - struct intel_fbdev *ifbdev = dev_priv->fbdev; - struct drm_fb_helper *fb_helper; + struct intel_fbdev *ifbdev = to_i915(dev)->fbdev; if (!ifbdev) return; intel_fbdev_sync(ifbdev); + if (!ifbdev->fb) + return; - fb_helper = &ifbdev->helper; - - ret = drm_fb_helper_restore_fbdev_mode_unlocked(fb_helper); - if (ret) { + if (drm_fb_helper_restore_fbdev_mode_unlocked(&ifbdev->helper)) { DRM_DEBUG("failed to restore crtc mode\n"); } else { - mutex_lock(&fb_helper->dev->struct_mutex); + mutex_lock(&dev->struct_mutex); intel_fb_obj_invalidate(ifbdev->fb->obj, ORIGIN_GTT); - mutex_unlock(&fb_helper->dev->struct_mutex); + mutex_unlock(&dev->struct_mutex); } } diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index f4f3fcc8b3be..97ba6c8cf907 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -4892,7 +4892,8 @@ void gen6_rps_idle(struct drm_i915_private *dev_priv) else gen6_set_rps(dev_priv, dev_priv->rps.idle_freq); dev_priv->rps.last_adj = 0; - I915_WRITE(GEN6_PMINTRMSK, 0xffffffff); + I915_WRITE(GEN6_PMINTRMSK, + gen6_sanitize_rps_pm_mask(dev_priv, ~0)); } mutex_unlock(&dev_priv->rps.hw_lock); diff --git a/drivers/gpu/drm/mediatek/mtk_drm_gem.c b/drivers/gpu/drm/mediatek/mtk_drm_gem.c index fa2ec0cd00e8..7abc550ebc00 100644 --- a/drivers/gpu/drm/mediatek/mtk_drm_gem.c +++ b/drivers/gpu/drm/mediatek/mtk_drm_gem.c @@ -54,15 +54,14 @@ struct mtk_drm_gem_obj *mtk_drm_gem_create(struct drm_device *dev, obj = &mtk_gem->base; - init_dma_attrs(&mtk_gem->dma_attrs); - dma_set_attr(DMA_ATTR_WRITE_COMBINE, &mtk_gem->dma_attrs); + mtk_gem->dma_attrs = DMA_ATTR_WRITE_COMBINE; if (!alloc_kmap) - dma_set_attr(DMA_ATTR_NO_KERNEL_MAPPING, &mtk_gem->dma_attrs); + mtk_gem->dma_attrs |= DMA_ATTR_NO_KERNEL_MAPPING; mtk_gem->cookie = dma_alloc_attrs(priv->dma_dev, obj->size, &mtk_gem->dma_addr, GFP_KERNEL, - &mtk_gem->dma_attrs); + mtk_gem->dma_attrs); if (!mtk_gem->cookie) { DRM_ERROR("failed to allocate %zx byte dma buffer", obj->size); ret = -ENOMEM; @@ -93,7 +92,7 @@ void mtk_drm_gem_free_object(struct drm_gem_object *obj) drm_prime_gem_destroy(obj, mtk_gem->sg); else dma_free_attrs(priv->dma_dev, obj->size, mtk_gem->cookie, - mtk_gem->dma_addr, &mtk_gem->dma_attrs); + mtk_gem->dma_addr, mtk_gem->dma_attrs); /* release file pointer to gem object. */ drm_gem_object_release(obj); @@ -173,7 +172,7 @@ static int mtk_drm_gem_object_mmap(struct drm_gem_object *obj, vma->vm_pgoff = 0; ret = dma_mmap_attrs(priv->dma_dev, vma, mtk_gem->cookie, - mtk_gem->dma_addr, obj->size, &mtk_gem->dma_attrs); + mtk_gem->dma_addr, obj->size, mtk_gem->dma_attrs); if (ret) drm_gem_vm_close(vma); @@ -224,7 +223,7 @@ struct sg_table *mtk_gem_prime_get_sg_table(struct drm_gem_object *obj) ret = dma_get_sgtable_attrs(priv->dma_dev, sgt, mtk_gem->cookie, mtk_gem->dma_addr, obj->size, - &mtk_gem->dma_attrs); + mtk_gem->dma_attrs); if (ret) { DRM_ERROR("failed to allocate sgt, %d\n", ret); kfree(sgt); diff --git a/drivers/gpu/drm/mediatek/mtk_drm_gem.h b/drivers/gpu/drm/mediatek/mtk_drm_gem.h index 3a2a5624a1cb..2752718fa5b2 100644 --- a/drivers/gpu/drm/mediatek/mtk_drm_gem.h +++ b/drivers/gpu/drm/mediatek/mtk_drm_gem.h @@ -35,7 +35,7 @@ struct mtk_drm_gem_obj { void *cookie; void *kvaddr; dma_addr_t dma_addr; - struct dma_attrs dma_attrs; + unsigned long dma_attrs; struct sg_table *sg; }; diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c index 26f859ec24b3..8a0237008f74 100644 --- a/drivers/gpu/drm/msm/msm_drv.c +++ b/drivers/gpu/drm/msm/msm_drv.c @@ -238,11 +238,10 @@ static int msm_drm_uninit(struct device *dev) } if (priv->vram.paddr) { - DEFINE_DMA_ATTRS(attrs); - dma_set_attr(DMA_ATTR_NO_KERNEL_MAPPING, &attrs); + unsigned long attrs = DMA_ATTR_NO_KERNEL_MAPPING; drm_mm_takedown(&priv->vram.mm); dma_free_attrs(dev, priv->vram.size, NULL, - priv->vram.paddr, &attrs); + priv->vram.paddr, attrs); } component_unbind_all(dev, ddev); @@ -310,21 +309,21 @@ static int msm_init_vram(struct drm_device *dev) } if (size) { - DEFINE_DMA_ATTRS(attrs); + unsigned long attrs = 0; void *p; priv->vram.size = size; drm_mm_init(&priv->vram.mm, 0, (size >> PAGE_SHIFT) - 1); - dma_set_attr(DMA_ATTR_NO_KERNEL_MAPPING, &attrs); - dma_set_attr(DMA_ATTR_WRITE_COMBINE, &attrs); + attrs |= DMA_ATTR_NO_KERNEL_MAPPING; + attrs |= DMA_ATTR_WRITE_COMBINE; /* note that for no-kernel-mapping, the vaddr returned * is bogus, but non-null if allocation succeeded: */ p = dma_alloc_attrs(dev->dev, size, - &priv->vram.paddr, GFP_KERNEL, &attrs); + &priv->vram.paddr, GFP_KERNEL, attrs); if (!p) { dev_err(dev->dev, "failed to allocate VRAM\n"); priv->vram.paddr = 0; diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c index 528bdeffb339..6190035edfea 100644 --- a/drivers/gpu/drm/nouveau/nouveau_bo.c +++ b/drivers/gpu/drm/nouveau/nouveau_bo.c @@ -1151,7 +1151,7 @@ nouveau_bo_move_flipd(struct ttm_buffer_object *bo, bool evict, bool intr, if (ret) goto out; - ret = ttm_bo_move_ttm(bo, true, no_wait_gpu, new_mem); + ret = ttm_bo_move_ttm(bo, true, intr, no_wait_gpu, new_mem); out: ttm_bo_mem_put(bo, &tmp_mem); return ret; @@ -1179,7 +1179,7 @@ nouveau_bo_move_flips(struct ttm_buffer_object *bo, bool evict, bool intr, if (ret) return ret; - ret = ttm_bo_move_ttm(bo, true, no_wait_gpu, &tmp_mem); + ret = ttm_bo_move_ttm(bo, true, intr, no_wait_gpu, &tmp_mem); if (ret) goto out; diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/gk20a.c b/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/gk20a.c index 6b8f2a19b2d9..a6a7fa0d7679 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/gk20a.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/gk20a.c @@ -109,7 +109,7 @@ struct gk20a_instmem { u16 iommu_bit; /* Only used by DMA API */ - struct dma_attrs attrs; + unsigned long attrs; }; #define gk20a_instmem(p) container_of((p), struct gk20a_instmem, base) @@ -293,7 +293,7 @@ gk20a_instobj_dtor_dma(struct nvkm_memory *memory) goto out; dma_free_attrs(dev, node->base.mem.size << PAGE_SHIFT, node->base.vaddr, - node->handle, &imem->attrs); + node->handle, imem->attrs); out: return node; @@ -386,7 +386,7 @@ gk20a_instobj_ctor_dma(struct gk20a_instmem *imem, u32 npages, u32 align, node->base.vaddr = dma_alloc_attrs(dev, npages << PAGE_SHIFT, &node->handle, GFP_KERNEL, - &imem->attrs); + imem->attrs); if (!node->base.vaddr) { nvkm_error(subdev, "cannot allocate DMA memory\n"); return -ENOMEM; @@ -597,10 +597,9 @@ gk20a_instmem_new(struct nvkm_device *device, int index, nvkm_info(&imem->base.subdev, "using IOMMU\n"); } else { - init_dma_attrs(&imem->attrs); - dma_set_attr(DMA_ATTR_NON_CONSISTENT, &imem->attrs); - dma_set_attr(DMA_ATTR_WEAK_ORDERING, &imem->attrs); - dma_set_attr(DMA_ATTR_WRITE_COMBINE, &imem->attrs); + imem->attrs = DMA_ATTR_NON_CONSISTENT | + DMA_ATTR_WEAK_ORDERING | + DMA_ATTR_WRITE_COMBINE; nvkm_info(&imem->base.subdev, "using DMA API\n"); } diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c index ffdad81ef964..0c00e192c845 100644 --- a/drivers/gpu/drm/radeon/radeon_ttm.c +++ b/drivers/gpu/drm/radeon/radeon_ttm.c @@ -346,7 +346,7 @@ static int radeon_move_vram_ram(struct ttm_buffer_object *bo, if (unlikely(r)) { goto out_cleanup; } - r = ttm_bo_move_ttm(bo, true, no_wait_gpu, new_mem); + r = ttm_bo_move_ttm(bo, true, interruptible, no_wait_gpu, new_mem); out_cleanup: ttm_bo_mem_put(bo, &tmp_mem); return r; @@ -379,7 +379,7 @@ static int radeon_move_ram_vram(struct ttm_buffer_object *bo, if (unlikely(r)) { return r; } - r = ttm_bo_move_ttm(bo, true, no_wait_gpu, &tmp_mem); + r = ttm_bo_move_ttm(bo, true, interruptible, no_wait_gpu, &tmp_mem); if (unlikely(r)) { goto out_cleanup; } diff --git a/drivers/gpu/drm/rcar-du/rcar_du_crtc.c b/drivers/gpu/drm/rcar-du/rcar_du_crtc.c index e39fcef2e033..7316fc7fa0bd 100644 --- a/drivers/gpu/drm/rcar-du/rcar_du_crtc.c +++ b/drivers/gpu/drm/rcar-du/rcar_du_crtc.c @@ -196,7 +196,7 @@ void rcar_du_crtc_route_output(struct drm_crtc *crtc, static unsigned int plane_zpos(struct rcar_du_plane *plane) { - return to_rcar_plane_state(plane->plane.state)->zpos; + return plane->plane.state->normalized_zpos; } static const struct rcar_du_format_info * diff --git a/drivers/gpu/drm/rcar-du/rcar_du_drv.h b/drivers/gpu/drm/rcar-du/rcar_du_drv.h index ed35467d96cf..c843c3134498 100644 --- a/drivers/gpu/drm/rcar-du/rcar_du_drv.h +++ b/drivers/gpu/drm/rcar-du/rcar_du_drv.h @@ -92,7 +92,6 @@ struct rcar_du_device { struct { struct drm_property *alpha; struct drm_property *colorkey; - struct drm_property *zpos; } props; unsigned int dpad0_source; diff --git a/drivers/gpu/drm/rcar-du/rcar_du_hdmienc.c b/drivers/gpu/drm/rcar-du/rcar_du_hdmienc.c index 4de3ff0dbebd..e03004f4588d 100644 --- a/drivers/gpu/drm/rcar-du/rcar_du_hdmienc.c +++ b/drivers/gpu/drm/rcar-du/rcar_du_hdmienc.c @@ -125,6 +125,7 @@ int rcar_du_hdmienc_init(struct rcar_du_device *rcdu, /* Link drm_bridge to encoder */ bridge->encoder = encoder; + encoder->bridge = bridge; ret = drm_bridge_attach(rcdu->ddev, bridge); if (ret) { diff --git a/drivers/gpu/drm/rcar-du/rcar_du_kms.c b/drivers/gpu/drm/rcar-du/rcar_du_kms.c index 6bb032d8ac6b..f03eb55318c1 100644 --- a/drivers/gpu/drm/rcar-du/rcar_du_kms.c +++ b/drivers/gpu/drm/rcar-du/rcar_du_kms.c @@ -527,11 +527,6 @@ static int rcar_du_properties_init(struct rcar_du_device *rcdu) if (rcdu->props.colorkey == NULL) return -ENOMEM; - rcdu->props.zpos = - drm_property_create_range(rcdu->ddev, 0, "zpos", 1, 7); - if (rcdu->props.zpos == NULL) - return -ENOMEM; - return 0; } diff --git a/drivers/gpu/drm/rcar-du/rcar_du_plane.c b/drivers/gpu/drm/rcar-du/rcar_du_plane.c index bfe31ca870cc..a74f8ed8ca2e 100644 --- a/drivers/gpu/drm/rcar-du/rcar_du_plane.c +++ b/drivers/gpu/drm/rcar-du/rcar_du_plane.c @@ -652,7 +652,7 @@ static void rcar_du_plane_reset(struct drm_plane *plane) state->source = RCAR_DU_PLANE_MEMORY; state->alpha = 255; state->colorkey = RCAR_DU_COLORKEY_NONE; - state->zpos = plane->type == DRM_PLANE_TYPE_PRIMARY ? 0 : 1; + state->state.zpos = plane->type == DRM_PLANE_TYPE_PRIMARY ? 0 : 1; plane->state = &state->state; plane->state->plane = plane; @@ -670,8 +670,6 @@ static int rcar_du_plane_atomic_set_property(struct drm_plane *plane, rstate->alpha = val; else if (property == rcdu->props.colorkey) rstate->colorkey = val; - else if (property == rcdu->props.zpos) - rstate->zpos = val; else return -EINVAL; @@ -690,8 +688,6 @@ static int rcar_du_plane_atomic_get_property(struct drm_plane *plane, *val = rstate->alpha; else if (property == rcdu->props.colorkey) *val = rstate->colorkey; - else if (property == rcdu->props.zpos) - *val = rstate->zpos; else return -EINVAL; @@ -763,8 +759,7 @@ int rcar_du_planes_init(struct rcar_du_group *rgrp) drm_object_attach_property(&plane->plane.base, rcdu->props.colorkey, RCAR_DU_COLORKEY_NONE); - drm_object_attach_property(&plane->plane.base, - rcdu->props.zpos, 1); + drm_plane_create_zpos_property(&plane->plane, 1, 1, 7); } return 0; diff --git a/drivers/gpu/drm/rcar-du/rcar_du_plane.h b/drivers/gpu/drm/rcar-du/rcar_du_plane.h index b18b7b25dbfa..8b91dd3a46e4 100644 --- a/drivers/gpu/drm/rcar-du/rcar_du_plane.h +++ b/drivers/gpu/drm/rcar-du/rcar_du_plane.h @@ -51,7 +51,6 @@ static inline struct rcar_du_plane *to_rcar_plane(struct drm_plane *plane) * @hwindex: 0-based hardware plane index, -1 means unused * @alpha: value of the plane alpha property * @colorkey: value of the plane colorkey property - * @zpos: value of the plane zpos property */ struct rcar_du_plane_state { struct drm_plane_state state; @@ -62,7 +61,6 @@ struct rcar_du_plane_state { unsigned int alpha; unsigned int colorkey; - unsigned int zpos; }; static inline struct rcar_du_plane_state * diff --git a/drivers/gpu/drm/rcar-du/rcar_du_vsp.c b/drivers/gpu/drm/rcar-du/rcar_du_vsp.c index 6ac717f2056f..83ebd162f3ef 100644 --- a/drivers/gpu/drm/rcar-du/rcar_du_vsp.c +++ b/drivers/gpu/drm/rcar-du/rcar_du_vsp.c @@ -43,12 +43,12 @@ void rcar_du_vsp_enable(struct rcar_du_crtc *crtc) .src_y = 0, .src_w = mode->hdisplay << 16, .src_h = mode->vdisplay << 16, + .zpos = 0, }, .format = rcar_du_format_info(DRM_FORMAT_ARGB8888), .source = RCAR_DU_PLANE_VSPD1, .alpha = 255, .colorkey = 0, - .zpos = 0, }; if (rcdu->info->gen >= 3) @@ -152,7 +152,7 @@ static void rcar_du_vsp_plane_setup(struct rcar_du_vsp_plane *plane) .pixelformat = 0, .pitch = fb->pitches[0], .alpha = state->alpha, - .zpos = state->zpos, + .zpos = state->state.zpos, }; unsigned int i; @@ -267,7 +267,7 @@ static void rcar_du_vsp_plane_reset(struct drm_plane *plane) return; state->alpha = 255; - state->zpos = plane->type == DRM_PLANE_TYPE_PRIMARY ? 0 : 1; + state->state.zpos = plane->type == DRM_PLANE_TYPE_PRIMARY ? 0 : 1; plane->state = &state->state; plane->state->plane = plane; @@ -282,8 +282,6 @@ static int rcar_du_vsp_plane_atomic_set_property(struct drm_plane *plane, if (property == rcdu->props.alpha) rstate->alpha = val; - else if (property == rcdu->props.zpos) - rstate->zpos = val; else return -EINVAL; @@ -300,8 +298,6 @@ static int rcar_du_vsp_plane_atomic_get_property(struct drm_plane *plane, if (property == rcdu->props.alpha) *val = rstate->alpha; - else if (property == rcdu->props.zpos) - *val = rstate->zpos; else return -EINVAL; @@ -381,8 +377,8 @@ int rcar_du_vsp_init(struct rcar_du_vsp *vsp) drm_object_attach_property(&plane->plane.base, rcdu->props.alpha, 255); - drm_object_attach_property(&plane->plane.base, - rcdu->props.zpos, 1); + drm_plane_create_zpos_property(&plane->plane, 1, 1, + vsp->num_planes - 1); } return 0; diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_gem.c b/drivers/gpu/drm/rockchip/rockchip_drm_gem.c index 059e902f872d..b70f9423379c 100644 --- a/drivers/gpu/drm/rockchip/rockchip_drm_gem.c +++ b/drivers/gpu/drm/rockchip/rockchip_drm_gem.c @@ -17,8 +17,6 @@ #include <drm/drm_gem.h> #include <drm/drm_vma_manager.h> -#include <linux/dma-attrs.h> - #include "rockchip_drm_drv.h" #include "rockchip_drm_gem.h" @@ -28,15 +26,14 @@ static int rockchip_gem_alloc_buf(struct rockchip_gem_object *rk_obj, struct drm_gem_object *obj = &rk_obj->base; struct drm_device *drm = obj->dev; - init_dma_attrs(&rk_obj->dma_attrs); - dma_set_attr(DMA_ATTR_WRITE_COMBINE, &rk_obj->dma_attrs); + rk_obj->dma_attrs = DMA_ATTR_WRITE_COMBINE; if (!alloc_kmap) - dma_set_attr(DMA_ATTR_NO_KERNEL_MAPPING, &rk_obj->dma_attrs); + rk_obj->dma_attrs |= DMA_ATTR_NO_KERNEL_MAPPING; rk_obj->kvaddr = dma_alloc_attrs(drm->dev, obj->size, &rk_obj->dma_addr, GFP_KERNEL, - &rk_obj->dma_attrs); + rk_obj->dma_attrs); if (!rk_obj->kvaddr) { DRM_ERROR("failed to allocate %zu byte dma buffer", obj->size); return -ENOMEM; @@ -51,7 +48,7 @@ static void rockchip_gem_free_buf(struct rockchip_gem_object *rk_obj) struct drm_device *drm = obj->dev; dma_free_attrs(drm->dev, obj->size, rk_obj->kvaddr, rk_obj->dma_addr, - &rk_obj->dma_attrs); + rk_obj->dma_attrs); } static int rockchip_drm_gem_object_mmap(struct drm_gem_object *obj, @@ -70,7 +67,7 @@ static int rockchip_drm_gem_object_mmap(struct drm_gem_object *obj, vma->vm_pgoff = 0; ret = dma_mmap_attrs(drm->dev, vma, rk_obj->kvaddr, rk_obj->dma_addr, - obj->size, &rk_obj->dma_attrs); + obj->size, rk_obj->dma_attrs); if (ret) drm_gem_vm_close(vma); @@ -262,7 +259,7 @@ struct sg_table *rockchip_gem_prime_get_sg_table(struct drm_gem_object *obj) ret = dma_get_sgtable_attrs(drm->dev, sgt, rk_obj->kvaddr, rk_obj->dma_addr, obj->size, - &rk_obj->dma_attrs); + rk_obj->dma_attrs); if (ret) { DRM_ERROR("failed to allocate sgt, %d\n", ret); kfree(sgt); @@ -276,7 +273,7 @@ void *rockchip_gem_prime_vmap(struct drm_gem_object *obj) { struct rockchip_gem_object *rk_obj = to_rockchip_obj(obj); - if (dma_get_attr(DMA_ATTR_NO_KERNEL_MAPPING, &rk_obj->dma_attrs)) + if (rk_obj->dma_attrs & DMA_ATTR_NO_KERNEL_MAPPING) return NULL; return rk_obj->kvaddr; diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_gem.h b/drivers/gpu/drm/rockchip/rockchip_drm_gem.h index ad22618473a4..18b3488db4ec 100644 --- a/drivers/gpu/drm/rockchip/rockchip_drm_gem.h +++ b/drivers/gpu/drm/rockchip/rockchip_drm_gem.h @@ -23,7 +23,7 @@ struct rockchip_gem_object { void *kvaddr; dma_addr_t dma_addr; - struct dma_attrs dma_attrs; + unsigned long dma_attrs; }; struct sg_table *rockchip_gem_prime_get_sg_table(struct drm_gem_object *obj); diff --git a/drivers/gpu/drm/sti/sti_cursor.c b/drivers/gpu/drm/sti/sti_cursor.c index a263bbba4119..3b53f7f2e3fc 100644 --- a/drivers/gpu/drm/sti/sti_cursor.c +++ b/drivers/gpu/drm/sti/sti_cursor.c @@ -349,8 +349,8 @@ struct drm_plane_funcs sti_cursor_plane_helpers_funcs = { .update_plane = drm_atomic_helper_update_plane, .disable_plane = drm_atomic_helper_disable_plane, .destroy = sti_cursor_destroy, - .set_property = sti_plane_set_property, - .reset = drm_atomic_helper_plane_reset, + .set_property = drm_atomic_helper_plane_set_property, + .reset = sti_plane_reset, .atomic_duplicate_state = drm_atomic_helper_plane_duplicate_state, .atomic_destroy_state = drm_atomic_helper_plane_destroy_state, .late_register = sti_cursor_late_register, diff --git a/drivers/gpu/drm/sti/sti_gdp.c b/drivers/gpu/drm/sti/sti_gdp.c index bf63086a3dc8..b8d942ca45e8 100644 --- a/drivers/gpu/drm/sti/sti_gdp.c +++ b/drivers/gpu/drm/sti/sti_gdp.c @@ -886,8 +886,8 @@ struct drm_plane_funcs sti_gdp_plane_helpers_funcs = { .update_plane = drm_atomic_helper_update_plane, .disable_plane = drm_atomic_helper_disable_plane, .destroy = sti_gdp_destroy, - .set_property = sti_plane_set_property, - .reset = drm_atomic_helper_plane_reset, + .set_property = drm_atomic_helper_plane_set_property, + .reset = sti_plane_reset, .atomic_duplicate_state = drm_atomic_helper_plane_duplicate_state, .atomic_destroy_state = drm_atomic_helper_plane_destroy_state, .late_register = sti_gdp_late_register, diff --git a/drivers/gpu/drm/sti/sti_hqvdp.c b/drivers/gpu/drm/sti/sti_hqvdp.c index b03232247966..b5ee783e3e7c 100644 --- a/drivers/gpu/drm/sti/sti_hqvdp.c +++ b/drivers/gpu/drm/sti/sti_hqvdp.c @@ -1254,8 +1254,8 @@ struct drm_plane_funcs sti_hqvdp_plane_helpers_funcs = { .update_plane = drm_atomic_helper_update_plane, .disable_plane = drm_atomic_helper_disable_plane, .destroy = sti_hqvdp_destroy, - .set_property = sti_plane_set_property, - .reset = drm_atomic_helper_plane_reset, + .set_property = drm_atomic_helper_plane_set_property, + .reset = sti_plane_reset, .atomic_duplicate_state = drm_atomic_helper_plane_duplicate_state, .atomic_destroy_state = drm_atomic_helper_plane_destroy_state, .late_register = sti_hqvdp_late_register, diff --git a/drivers/gpu/drm/sti/sti_mixer.c b/drivers/gpu/drm/sti/sti_mixer.c index 1885c7ab5a8b..7d9aea805eab 100644 --- a/drivers/gpu/drm/sti/sti_mixer.c +++ b/drivers/gpu/drm/sti/sti_mixer.c @@ -239,13 +239,10 @@ static void sti_mixer_set_background_area(struct sti_mixer *mixer, int sti_mixer_set_plane_depth(struct sti_mixer *mixer, struct sti_plane *plane) { - int plane_id, depth = plane->zorder; + int plane_id, depth = plane->drm_plane.state->normalized_zpos; unsigned int i; u32 mask, val; - if ((depth < 1) || (depth > GAM_MIXER_NB_DEPTH_LEVEL)) - return 1; - switch (plane->desc) { case STI_GDP_0: plane_id = GAM_DEPTH_GDP0_ID; @@ -278,8 +275,8 @@ int sti_mixer_set_plane_depth(struct sti_mixer *mixer, struct sti_plane *plane) break; } - mask |= GAM_DEPTH_MASK_ID << (3 * (depth - 1)); - plane_id = plane_id << (3 * (depth - 1)); + mask |= GAM_DEPTH_MASK_ID << (3 * depth); + plane_id = plane_id << (3 * depth); DRM_DEBUG_DRIVER("%s %s depth=%d\n", sti_mixer_to_str(mixer), sti_plane_to_str(plane), depth); diff --git a/drivers/gpu/drm/sti/sti_plane.c b/drivers/gpu/drm/sti/sti_plane.c index 0cf3335ef37c..ca4b3719a64a 100644 --- a/drivers/gpu/drm/sti/sti_plane.c +++ b/drivers/gpu/drm/sti/sti_plane.c @@ -14,15 +14,6 @@ #include "sti_drv.h" #include "sti_plane.h" -/* (Background) < GDP0 < GDP1 < HQVDP0 < GDP2 < GDP3 < (ForeGround) */ -enum sti_plane_desc sti_plane_default_zorder[] = { - STI_GDP_0, - STI_GDP_1, - STI_HQVDP_0, - STI_GDP_2, - STI_GDP_3, -}; - const char *sti_plane_to_str(struct sti_plane *plane) { switch (plane->desc) { @@ -96,59 +87,46 @@ void sti_plane_update_fps(struct sti_plane *plane, plane->fps_info.fips_str); } -int sti_plane_set_property(struct drm_plane *drm_plane, - struct drm_property *property, - uint64_t val) +static int sti_plane_get_default_zpos(enum drm_plane_type type) { - struct drm_device *dev = drm_plane->dev; - struct sti_private *private = dev->dev_private; - struct sti_plane *plane = to_sti_plane(drm_plane); - - DRM_DEBUG_DRIVER("\n"); - - if (property == private->plane_zorder_property) { - plane->zorder = val; + switch (type) { + case DRM_PLANE_TYPE_PRIMARY: return 0; + case DRM_PLANE_TYPE_OVERLAY: + return 1; + case DRM_PLANE_TYPE_CURSOR: + return 7; } + return 0; +} - return -EINVAL; +void sti_plane_reset(struct drm_plane *plane) +{ + drm_atomic_helper_plane_reset(plane); + plane->state->zpos = sti_plane_get_default_zpos(plane->type); } -static void sti_plane_attach_zorder_property(struct drm_plane *drm_plane) +static void sti_plane_attach_zorder_property(struct drm_plane *drm_plane, + enum drm_plane_type type) { - struct drm_device *dev = drm_plane->dev; - struct sti_private *private = dev->dev_private; - struct sti_plane *plane = to_sti_plane(drm_plane); - struct drm_property *prop; - - prop = private->plane_zorder_property; - if (!prop) { - prop = drm_property_create_range(dev, 0, "zpos", 1, - GAM_MIXER_NB_DEPTH_LEVEL); - if (!prop) - return; - - private->plane_zorder_property = prop; + int zpos = sti_plane_get_default_zpos(type); + + switch (type) { + case DRM_PLANE_TYPE_PRIMARY: + case DRM_PLANE_TYPE_OVERLAY: + drm_plane_create_zpos_property(drm_plane, zpos, 0, 6); + break; + case DRM_PLANE_TYPE_CURSOR: + drm_plane_create_zpos_immutable_property(drm_plane, zpos); + break; } - - drm_object_attach_property(&drm_plane->base, prop, plane->zorder); } void sti_plane_init_property(struct sti_plane *plane, enum drm_plane_type type) { - unsigned int i; - - for (i = 0; i < ARRAY_SIZE(sti_plane_default_zorder); i++) - if (sti_plane_default_zorder[i] == plane->desc) - break; - - plane->zorder = i + 1; - - if (type == DRM_PLANE_TYPE_OVERLAY) - sti_plane_attach_zorder_property(&plane->drm_plane); + sti_plane_attach_zorder_property(&plane->drm_plane, type); - DRM_DEBUG_DRIVER("drm plane:%d mapped to %s with zorder:%d\n", - plane->drm_plane.base.id, - sti_plane_to_str(plane), plane->zorder); + DRM_DEBUG_DRIVER("drm plane:%d mapped to %s\n", + plane->drm_plane.base.id, sti_plane_to_str(plane)); } diff --git a/drivers/gpu/drm/sti/sti_plane.h b/drivers/gpu/drm/sti/sti_plane.h index e0ea1dd3bb88..ce3e8d6c88bb 100644 --- a/drivers/gpu/drm/sti/sti_plane.h +++ b/drivers/gpu/drm/sti/sti_plane.h @@ -66,14 +66,12 @@ struct sti_fps_info { * @plane: drm plane it is bound to (if any) * @desc: plane type & id * @status: to know the status of the plane - * @zorder: plane z-order * @fps_info: frame per second info */ struct sti_plane { struct drm_plane drm_plane; enum sti_plane_desc desc; enum sti_plane_status status; - int zorder; struct sti_fps_info fps_info; }; @@ -82,10 +80,7 @@ void sti_plane_update_fps(struct sti_plane *plane, bool new_frame, bool new_field); -int sti_plane_set_property(struct drm_plane *drm_plane, - struct drm_property *property, - uint64_t val); - void sti_plane_init_property(struct sti_plane *plane, enum drm_plane_type type); +void sti_plane_reset(struct drm_plane *plane); #endif diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index 4054d804fe06..42c074a9c955 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -354,7 +354,8 @@ static int ttm_bo_handle_move_mem(struct ttm_buffer_object *bo, if (!(old_man->flags & TTM_MEMTYPE_FLAG_FIXED) && !(new_man->flags & TTM_MEMTYPE_FLAG_FIXED)) - ret = ttm_bo_move_ttm(bo, evict, no_wait_gpu, mem); + ret = ttm_bo_move_ttm(bo, evict, interruptible, no_wait_gpu, + mem); else if (bdev->driver->move) ret = bdev->driver->move(bo, evict, interruptible, no_wait_gpu, mem); diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c b/drivers/gpu/drm/ttm/ttm_bo_util.c index 2df602a35f92..f157a9efd220 100644 --- a/drivers/gpu/drm/ttm/ttm_bo_util.c +++ b/drivers/gpu/drm/ttm/ttm_bo_util.c @@ -45,7 +45,7 @@ void ttm_bo_free_old_node(struct ttm_buffer_object *bo) } int ttm_bo_move_ttm(struct ttm_buffer_object *bo, - bool evict, + bool evict, bool interruptible, bool no_wait_gpu, struct ttm_mem_reg *new_mem) { struct ttm_tt *ttm = bo->ttm; @@ -53,6 +53,14 @@ int ttm_bo_move_ttm(struct ttm_buffer_object *bo, int ret; if (old_mem->mem_type != TTM_PL_SYSTEM) { + ret = ttm_bo_wait(bo, interruptible, no_wait_gpu); + + if (unlikely(ret != 0)) { + if (ret != -ERESTARTSYS) + pr_err("Failed to expire sync object before unbinding TTM\n"); + return ret; + } + ttm_tt_unbind(ttm); ttm_bo_free_old_node(bo); ttm_flag_masked(&old_mem->placement, TTM_PL_FLAG_SYSTEM, diff --git a/drivers/infiniband/Kconfig b/drivers/infiniband/Kconfig index 2137adfbd8c3..e9b7dc037ff8 100644 --- a/drivers/infiniband/Kconfig +++ b/drivers/infiniband/Kconfig @@ -84,6 +84,7 @@ source "drivers/infiniband/ulp/iser/Kconfig" source "drivers/infiniband/ulp/isert/Kconfig" source "drivers/infiniband/sw/rdmavt/Kconfig" +source "drivers/infiniband/sw/rxe/Kconfig" source "drivers/infiniband/hw/hfi1/Kconfig" diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index ad1b1adcf6f0..e6dfa1bd3def 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -68,6 +68,7 @@ MODULE_DESCRIPTION("Generic RDMA CM Agent"); MODULE_LICENSE("Dual BSD/GPL"); #define CMA_CM_RESPONSE_TIMEOUT 20 +#define CMA_QUERY_CLASSPORT_INFO_TIMEOUT 3000 #define CMA_MAX_CM_RETRIES 15 #define CMA_CM_MRA_SETTING (IB_CM_MRA_FLAG_DELAY | 24) #define CMA_IBOE_PACKET_LIFETIME 18 @@ -162,6 +163,14 @@ struct rdma_bind_list { unsigned short port; }; +struct class_port_info_context { + struct ib_class_port_info *class_port_info; + struct ib_device *device; + struct completion done; + struct ib_sa_query *sa_query; + u8 port_num; +}; + static int cma_ps_alloc(struct net *net, enum rdma_port_space ps, struct rdma_bind_list *bind_list, int snum) { @@ -306,6 +315,7 @@ struct cma_multicast { struct sockaddr_storage addr; struct kref mcref; bool igmp_joined; + u8 join_state; }; struct cma_work { @@ -3752,10 +3762,63 @@ static void cma_set_mgid(struct rdma_id_private *id_priv, } } +static void cma_query_sa_classport_info_cb(int status, + struct ib_class_port_info *rec, + void *context) +{ + struct class_port_info_context *cb_ctx = context; + + WARN_ON(!context); + + if (status || !rec) { + pr_debug("RDMA CM: %s port %u failed query ClassPortInfo status: %d\n", + cb_ctx->device->name, cb_ctx->port_num, status); + goto out; + } + + memcpy(cb_ctx->class_port_info, rec, sizeof(struct ib_class_port_info)); + +out: + complete(&cb_ctx->done); +} + +static int cma_query_sa_classport_info(struct ib_device *device, u8 port_num, + struct ib_class_port_info *class_port_info) +{ + struct class_port_info_context *cb_ctx; + int ret; + + cb_ctx = kmalloc(sizeof(*cb_ctx), GFP_KERNEL); + if (!cb_ctx) + return -ENOMEM; + + cb_ctx->device = device; + cb_ctx->class_port_info = class_port_info; + cb_ctx->port_num = port_num; + init_completion(&cb_ctx->done); + + ret = ib_sa_classport_info_rec_query(&sa_client, device, port_num, + CMA_QUERY_CLASSPORT_INFO_TIMEOUT, + GFP_KERNEL, cma_query_sa_classport_info_cb, + cb_ctx, &cb_ctx->sa_query); + if (ret < 0) { + pr_err("RDMA CM: %s port %u failed to send ClassPortInfo query, ret: %d\n", + device->name, port_num, ret); + goto out; + } + + wait_for_completion(&cb_ctx->done); + +out: + kfree(cb_ctx); + return ret; +} + static int cma_join_ib_multicast(struct rdma_id_private *id_priv, struct cma_multicast *mc) { struct ib_sa_mcmember_rec rec; + struct ib_class_port_info class_port_info; struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr; ib_sa_comp_mask comp_mask; int ret; @@ -3774,7 +3837,24 @@ static int cma_join_ib_multicast(struct rdma_id_private *id_priv, rec.qkey = cpu_to_be32(id_priv->qkey); rdma_addr_get_sgid(dev_addr, &rec.port_gid); rec.pkey = cpu_to_be16(ib_addr_get_pkey(dev_addr)); - rec.join_state = 1; + rec.join_state = mc->join_state; + + if (rec.join_state == BIT(SENDONLY_FULLMEMBER_JOIN)) { + ret = cma_query_sa_classport_info(id_priv->id.device, + id_priv->id.port_num, + &class_port_info); + + if (ret) + return ret; + + if (!(ib_get_cpi_capmask2(&class_port_info) & + IB_SA_CAP_MASK2_SENDONLY_FULL_MEM_SUPPORT)) { + pr_warn("RDMA CM: %s port %u Unable to multicast join\n" + "RDMA CM: SM doesn't support Send Only Full Member option\n", + id_priv->id.device->name, id_priv->id.port_num); + return -EOPNOTSUPP; + } + } comp_mask = IB_SA_MCMEMBER_REC_MGID | IB_SA_MCMEMBER_REC_PORT_GID | IB_SA_MCMEMBER_REC_PKEY | IB_SA_MCMEMBER_REC_JOIN_STATE | @@ -3843,6 +3923,9 @@ static int cma_iboe_join_multicast(struct rdma_id_private *id_priv, struct sockaddr *addr = (struct sockaddr *)&mc->addr; struct net_device *ndev = NULL; enum ib_gid_type gid_type; + bool send_only; + + send_only = mc->join_state == BIT(SENDONLY_FULLMEMBER_JOIN); if (cma_zero_addr((struct sockaddr *)&mc->addr)) return -EINVAL; @@ -3878,10 +3961,12 @@ static int cma_iboe_join_multicast(struct rdma_id_private *id_priv, if (addr->sa_family == AF_INET) { if (gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) { mc->multicast.ib->rec.hop_limit = IPV6_DEFAULT_HOPLIMIT; - err = cma_igmp_send(ndev, &mc->multicast.ib->rec.mgid, - true); - if (!err) - mc->igmp_joined = true; + if (!send_only) { + err = cma_igmp_send(ndev, &mc->multicast.ib->rec.mgid, + true); + if (!err) + mc->igmp_joined = true; + } } } else { if (gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) @@ -3911,7 +3996,7 @@ out1: } int rdma_join_multicast(struct rdma_cm_id *id, struct sockaddr *addr, - void *context) + u8 join_state, void *context) { struct rdma_id_private *id_priv; struct cma_multicast *mc; @@ -3930,6 +4015,7 @@ int rdma_join_multicast(struct rdma_cm_id *id, struct sockaddr *addr, mc->context = context; mc->id_priv = id_priv; mc->igmp_joined = false; + mc->join_state = join_state; spin_lock(&id_priv->lock); list_add(&mc->list, &id_priv->mc_list); spin_unlock(&id_priv->lock); diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index 5c155fa91eec..760ef603a468 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -311,6 +311,15 @@ static int read_port_immutable(struct ib_device *device) return 0; } +void ib_get_device_fw_str(struct ib_device *dev, char *str, size_t str_len) +{ + if (dev->get_dev_fw_str) + dev->get_dev_fw_str(dev, str, str_len); + else + str[0] = '\0'; +} +EXPORT_SYMBOL(ib_get_device_fw_str); + /** * ib_register_device - Register an IB device with IB core * @device:Device to register diff --git a/drivers/infiniband/core/iwcm.c b/drivers/infiniband/core/iwcm.c index f0572049d291..357624f8b9d3 100644 --- a/drivers/infiniband/core/iwcm.c +++ b/drivers/infiniband/core/iwcm.c @@ -183,15 +183,14 @@ static void free_cm_id(struct iwcm_id_private *cm_id_priv) /* * Release a reference on cm_id. If the last reference is being - * released, enable the waiting thread (in iw_destroy_cm_id) to - * get woken up, and return 1 if a thread is already waiting. + * released, free the cm_id and return 1. */ static int iwcm_deref_id(struct iwcm_id_private *cm_id_priv) { BUG_ON(atomic_read(&cm_id_priv->refcount)==0); if (atomic_dec_and_test(&cm_id_priv->refcount)) { BUG_ON(!list_empty(&cm_id_priv->work_list)); - complete(&cm_id_priv->destroy_comp); + free_cm_id(cm_id_priv); return 1; } @@ -208,19 +207,10 @@ static void add_ref(struct iw_cm_id *cm_id) static void rem_ref(struct iw_cm_id *cm_id) { struct iwcm_id_private *cm_id_priv; - int cb_destroy; cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); - /* - * Test bit before deref in case the cm_id gets freed on another - * thread. - */ - cb_destroy = test_bit(IWCM_F_CALLBACK_DESTROY, &cm_id_priv->flags); - if (iwcm_deref_id(cm_id_priv) && cb_destroy) { - BUG_ON(!list_empty(&cm_id_priv->work_list)); - free_cm_id(cm_id_priv); - } + (void)iwcm_deref_id(cm_id_priv); } static int cm_event_handler(struct iw_cm_id *cm_id, struct iw_cm_event *event); @@ -370,6 +360,12 @@ static void destroy_cm_id(struct iw_cm_id *cm_id) wait_event(cm_id_priv->connect_wait, !test_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags)); + /* + * Since we're deleting the cm_id, drop any events that + * might arrive before the last dereference. + */ + set_bit(IWCM_F_DROP_EVENTS, &cm_id_priv->flags); + spin_lock_irqsave(&cm_id_priv->lock, flags); switch (cm_id_priv->state) { case IW_CM_STATE_LISTEN: @@ -433,13 +429,7 @@ void iw_destroy_cm_id(struct iw_cm_id *cm_id) struct iwcm_id_private *cm_id_priv; cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); - BUG_ON(test_bit(IWCM_F_CALLBACK_DESTROY, &cm_id_priv->flags)); - destroy_cm_id(cm_id); - - wait_for_completion(&cm_id_priv->destroy_comp); - - free_cm_id(cm_id_priv); } EXPORT_SYMBOL(iw_destroy_cm_id); @@ -809,10 +799,7 @@ static void cm_conn_req_handler(struct iwcm_id_private *listen_id_priv, ret = cm_id->cm_handler(cm_id, iw_event); if (ret) { iw_cm_reject(cm_id, NULL, 0); - set_bit(IWCM_F_CALLBACK_DESTROY, &cm_id_priv->flags); - destroy_cm_id(cm_id); - if (atomic_read(&cm_id_priv->refcount)==0) - free_cm_id(cm_id_priv); + iw_destroy_cm_id(cm_id); } out: @@ -1000,7 +987,6 @@ static void cm_work_handler(struct work_struct *_work) unsigned long flags; int empty; int ret = 0; - int destroy_id; spin_lock_irqsave(&cm_id_priv->lock, flags); empty = list_empty(&cm_id_priv->work_list); @@ -1013,20 +999,14 @@ static void cm_work_handler(struct work_struct *_work) put_work(work); spin_unlock_irqrestore(&cm_id_priv->lock, flags); - ret = process_event(cm_id_priv, &levent); - if (ret) { - set_bit(IWCM_F_CALLBACK_DESTROY, &cm_id_priv->flags); - destroy_cm_id(&cm_id_priv->id); - } - BUG_ON(atomic_read(&cm_id_priv->refcount)==0); - destroy_id = test_bit(IWCM_F_CALLBACK_DESTROY, &cm_id_priv->flags); - if (iwcm_deref_id(cm_id_priv)) { - if (destroy_id) { - BUG_ON(!list_empty(&cm_id_priv->work_list)); - free_cm_id(cm_id_priv); - } + if (!test_bit(IWCM_F_DROP_EVENTS, &cm_id_priv->flags)) { + ret = process_event(cm_id_priv, &levent); + if (ret) + destroy_cm_id(&cm_id_priv->id); + } else + pr_debug("dropping event %d\n", levent.event); + if (iwcm_deref_id(cm_id_priv)) return; - } if (empty) return; spin_lock_irqsave(&cm_id_priv->lock, flags); diff --git a/drivers/infiniband/core/iwcm.h b/drivers/infiniband/core/iwcm.h index 3f6cc82564c8..82c2cd1b0a80 100644 --- a/drivers/infiniband/core/iwcm.h +++ b/drivers/infiniband/core/iwcm.h @@ -56,7 +56,7 @@ struct iwcm_id_private { struct list_head work_free_list; }; -#define IWCM_F_CALLBACK_DESTROY 1 +#define IWCM_F_DROP_EVENTS 1 #define IWCM_F_CONNECT_WAIT 2 #endif /* IWCM_H */ diff --git a/drivers/infiniband/core/iwpm_util.c b/drivers/infiniband/core/iwpm_util.c index b65e06c560d7..ade71e7f0131 100644 --- a/drivers/infiniband/core/iwpm_util.c +++ b/drivers/infiniband/core/iwpm_util.c @@ -37,6 +37,7 @@ #define IWPM_MAPINFO_HASH_MASK (IWPM_MAPINFO_HASH_SIZE - 1) #define IWPM_REMINFO_HASH_SIZE 64 #define IWPM_REMINFO_HASH_MASK (IWPM_REMINFO_HASH_SIZE - 1) +#define IWPM_MSG_SIZE 512 static LIST_HEAD(iwpm_nlmsg_req_list); static DEFINE_SPINLOCK(iwpm_nlmsg_req_lock); @@ -452,7 +453,7 @@ struct sk_buff *iwpm_create_nlmsg(u32 nl_op, struct nlmsghdr **nlh, { struct sk_buff *skb = NULL; - skb = dev_alloc_skb(NLMSG_GOODSIZE); + skb = dev_alloc_skb(IWPM_MSG_SIZE); if (!skb) { pr_err("%s Unable to allocate skb\n", __func__); goto create_nlmsg_exit; diff --git a/drivers/infiniband/core/multicast.c b/drivers/infiniband/core/multicast.c index a83ec28a147b..3a3c5d73bbfc 100644 --- a/drivers/infiniband/core/multicast.c +++ b/drivers/infiniband/core/multicast.c @@ -93,18 +93,6 @@ enum { struct mcast_member; -/* -* There are 4 types of join states: -* FullMember, NonMember, SendOnlyNonMember, SendOnlyFullMember. -*/ -enum { - FULLMEMBER_JOIN, - NONMEMBER_JOIN, - SENDONLY_NONMEBER_JOIN, - SENDONLY_FULLMEMBER_JOIN, - NUM_JOIN_MEMBERSHIP_TYPES, -}; - struct mcast_group { struct ib_sa_mcmember_rec rec; struct rb_node node; diff --git a/drivers/infiniband/core/netlink.c b/drivers/infiniband/core/netlink.c index 9b8c20c8209b..10469b0088b5 100644 --- a/drivers/infiniband/core/netlink.c +++ b/drivers/infiniband/core/netlink.c @@ -229,7 +229,10 @@ static void ibnl_rcv(struct sk_buff *skb) int ibnl_unicast(struct sk_buff *skb, struct nlmsghdr *nlh, __u32 pid) { - return nlmsg_unicast(nls, skb, pid); + int err; + + err = netlink_unicast(nls, skb, pid, 0); + return (err < 0) ? err : 0; } EXPORT_SYMBOL(ibnl_unicast); @@ -252,6 +255,7 @@ int __init ibnl_init(void) return -ENOMEM; } + nls->sk_sndtimeo = 10 * HZ; return 0; } diff --git a/drivers/infiniband/core/rw.c b/drivers/infiniband/core/rw.c index 1eb9b1294a63..dbfd854c32c9 100644 --- a/drivers/infiniband/core/rw.c +++ b/drivers/infiniband/core/rw.c @@ -58,19 +58,13 @@ static inline bool rdma_rw_io_needs_mr(struct ib_device *dev, u8 port_num, return false; } -static inline u32 rdma_rw_max_sge(struct ib_device *dev, - enum dma_data_direction dir) -{ - return dir == DMA_TO_DEVICE ? - dev->attrs.max_sge : dev->attrs.max_sge_rd; -} - static inline u32 rdma_rw_fr_page_list_len(struct ib_device *dev) { /* arbitrary limit to avoid allocating gigantic resources */ return min_t(u32, dev->attrs.max_fast_reg_page_list_len, 256); } +/* Caller must have zero-initialized *reg. */ static int rdma_rw_init_one_mr(struct ib_qp *qp, u8 port_num, struct rdma_rw_reg_ctx *reg, struct scatterlist *sg, u32 sg_cnt, u32 offset) @@ -114,6 +108,7 @@ static int rdma_rw_init_mr_wrs(struct rdma_rw_ctx *ctx, struct ib_qp *qp, u8 port_num, struct scatterlist *sg, u32 sg_cnt, u32 offset, u64 remote_addr, u32 rkey, enum dma_data_direction dir) { + struct rdma_rw_reg_ctx *prev = NULL; u32 pages_per_mr = rdma_rw_fr_page_list_len(qp->pd->device); int i, j, ret = 0, count = 0; @@ -125,7 +120,6 @@ static int rdma_rw_init_mr_wrs(struct rdma_rw_ctx *ctx, struct ib_qp *qp, } for (i = 0; i < ctx->nr_ops; i++) { - struct rdma_rw_reg_ctx *prev = i ? &ctx->reg[i - 1] : NULL; struct rdma_rw_reg_ctx *reg = &ctx->reg[i]; u32 nents = min(sg_cnt, pages_per_mr); @@ -162,9 +156,13 @@ static int rdma_rw_init_mr_wrs(struct rdma_rw_ctx *ctx, struct ib_qp *qp, sg_cnt -= nents; for (j = 0; j < nents; j++) sg = sg_next(sg); + prev = reg; offset = 0; } + if (prev) + prev->wr.wr.next = NULL; + ctx->type = RDMA_RW_MR; return count; @@ -181,7 +179,8 @@ static int rdma_rw_init_map_wrs(struct rdma_rw_ctx *ctx, struct ib_qp *qp, u64 remote_addr, u32 rkey, enum dma_data_direction dir) { struct ib_device *dev = qp->pd->device; - u32 max_sge = rdma_rw_max_sge(dev, dir); + u32 max_sge = dir == DMA_TO_DEVICE ? qp->max_write_sge : + qp->max_read_sge; struct ib_sge *sge; u32 total_len = 0, i, j; @@ -205,11 +204,10 @@ static int rdma_rw_init_map_wrs(struct rdma_rw_ctx *ctx, struct ib_qp *qp, rdma_wr->wr.opcode = IB_WR_RDMA_READ; rdma_wr->remote_addr = remote_addr + total_len; rdma_wr->rkey = rkey; + rdma_wr->wr.num_sge = nr_sge; rdma_wr->wr.sg_list = sge; for (j = 0; j < nr_sge; j++, sg = sg_next(sg)) { - rdma_wr->wr.num_sge++; - sge->addr = ib_sg_dma_address(dev, sg) + offset; sge->length = ib_sg_dma_len(dev, sg) - offset; sge->lkey = qp->pd->local_dma_lkey; @@ -220,8 +218,8 @@ static int rdma_rw_init_map_wrs(struct rdma_rw_ctx *ctx, struct ib_qp *qp, offset = 0; } - if (i + 1 < ctx->nr_ops) - rdma_wr->wr.next = &ctx->map.wrs[i + 1].wr; + rdma_wr->wr.next = i + 1 < ctx->nr_ops ? + &ctx->map.wrs[i + 1].wr : NULL; } ctx->type = RDMA_RW_MULTI_WR; diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c index e95538650dc6..b9bf7aa055e7 100644 --- a/drivers/infiniband/core/sa_query.c +++ b/drivers/infiniband/core/sa_query.c @@ -65,10 +65,17 @@ struct ib_sa_sm_ah { u8 src_path_mask; }; +struct ib_sa_classport_cache { + bool valid; + struct ib_class_port_info data; +}; + struct ib_sa_port { struct ib_mad_agent *agent; struct ib_sa_sm_ah *sm_ah; struct work_struct update_task; + struct ib_sa_classport_cache classport_info; + spinlock_t classport_lock; /* protects class port info set */ spinlock_t ah_lock; u8 port_num; }; @@ -998,6 +1005,13 @@ static void ib_sa_event(struct ib_event_handler *handler, struct ib_event *event port->sm_ah = NULL; spin_unlock_irqrestore(&port->ah_lock, flags); + if (event->event == IB_EVENT_SM_CHANGE || + event->event == IB_EVENT_CLIENT_REREGISTER || + event->event == IB_EVENT_LID_CHANGE) { + spin_lock_irqsave(&port->classport_lock, flags); + port->classport_info.valid = false; + spin_unlock_irqrestore(&port->classport_lock, flags); + } queue_work(ib_wq, &sa_dev->port[event->element.port_num - sa_dev->start_port].update_task); } @@ -1719,6 +1733,7 @@ static void ib_sa_classport_info_rec_callback(struct ib_sa_query *sa_query, int status, struct ib_sa_mad *mad) { + unsigned long flags; struct ib_sa_classport_info_query *query = container_of(sa_query, struct ib_sa_classport_info_query, sa_query); @@ -1728,6 +1743,16 @@ static void ib_sa_classport_info_rec_callback(struct ib_sa_query *sa_query, ib_unpack(classport_info_rec_table, ARRAY_SIZE(classport_info_rec_table), mad->data, &rec); + + spin_lock_irqsave(&sa_query->port->classport_lock, flags); + if (!status && !sa_query->port->classport_info.valid) { + memcpy(&sa_query->port->classport_info.data, &rec, + sizeof(sa_query->port->classport_info.data)); + + sa_query->port->classport_info.valid = true; + } + spin_unlock_irqrestore(&sa_query->port->classport_lock, flags); + query->callback(status, &rec, query->context); } else { query->callback(status, NULL, query->context); @@ -1754,7 +1779,9 @@ int ib_sa_classport_info_rec_query(struct ib_sa_client *client, struct ib_sa_port *port; struct ib_mad_agent *agent; struct ib_sa_mad *mad; + struct ib_class_port_info cached_class_port_info; int ret; + unsigned long flags; if (!sa_dev) return -ENODEV; @@ -1762,6 +1789,17 @@ int ib_sa_classport_info_rec_query(struct ib_sa_client *client, port = &sa_dev->port[port_num - sa_dev->start_port]; agent = port->agent; + /* Use cached ClassPortInfo attribute if valid instead of sending mad */ + spin_lock_irqsave(&port->classport_lock, flags); + if (port->classport_info.valid && callback) { + memcpy(&cached_class_port_info, &port->classport_info.data, + sizeof(cached_class_port_info)); + spin_unlock_irqrestore(&port->classport_lock, flags); + callback(0, &cached_class_port_info, context); + return 0; + } + spin_unlock_irqrestore(&port->classport_lock, flags); + query = kzalloc(sizeof(*query), gfp_mask); if (!query) return -ENOMEM; @@ -1885,6 +1923,9 @@ static void ib_sa_add_one(struct ib_device *device) sa_dev->port[i].sm_ah = NULL; sa_dev->port[i].port_num = i + s; + spin_lock_init(&sa_dev->port[i].classport_lock); + sa_dev->port[i].classport_info.valid = false; + sa_dev->port[i].agent = ib_register_mad_agent(device, i + s, IB_QPT_GSI, NULL, 0, send_handler, diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c index 60df4f8e81be..15defefecb4f 100644 --- a/drivers/infiniband/core/sysfs.c +++ b/drivers/infiniband/core/sysfs.c @@ -38,6 +38,7 @@ #include <linux/stat.h> #include <linux/string.h> #include <linux/netdevice.h> +#include <linux/ethtool.h> #include <rdma/ib_mad.h> #include <rdma/ib_pma.h> @@ -1200,16 +1201,28 @@ static ssize_t set_node_desc(struct device *device, return count; } +static ssize_t show_fw_ver(struct device *device, struct device_attribute *attr, + char *buf) +{ + struct ib_device *dev = container_of(device, struct ib_device, dev); + + ib_get_device_fw_str(dev, buf, PAGE_SIZE); + strlcat(buf, "\n", PAGE_SIZE); + return strlen(buf); +} + static DEVICE_ATTR(node_type, S_IRUGO, show_node_type, NULL); static DEVICE_ATTR(sys_image_guid, S_IRUGO, show_sys_image_guid, NULL); static DEVICE_ATTR(node_guid, S_IRUGO, show_node_guid, NULL); static DEVICE_ATTR(node_desc, S_IRUGO | S_IWUSR, show_node_desc, set_node_desc); +static DEVICE_ATTR(fw_ver, S_IRUGO, show_fw_ver, NULL); static struct device_attribute *ib_class_attributes[] = { &dev_attr_node_type, &dev_attr_sys_image_guid, &dev_attr_node_guid, - &dev_attr_node_desc + &dev_attr_node_desc, + &dev_attr_fw_ver, }; static void free_port_list_attributes(struct ib_device *device) diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c index c0f3826abb30..2825ece91d3c 100644 --- a/drivers/infiniband/core/ucma.c +++ b/drivers/infiniband/core/ucma.c @@ -106,6 +106,7 @@ struct ucma_multicast { int events_reported; u64 uid; + u8 join_state; struct list_head list; struct sockaddr_storage addr; }; @@ -1317,12 +1318,20 @@ static ssize_t ucma_process_join(struct ucma_file *file, struct ucma_multicast *mc; struct sockaddr *addr; int ret; + u8 join_state; if (out_len < sizeof(resp)) return -ENOSPC; addr = (struct sockaddr *) &cmd->addr; - if (cmd->reserved || !cmd->addr_size || (cmd->addr_size != rdma_addr_size(addr))) + if (!cmd->addr_size || (cmd->addr_size != rdma_addr_size(addr))) + return -EINVAL; + + if (cmd->join_flags == RDMA_MC_JOIN_FLAG_FULLMEMBER) + join_state = BIT(FULLMEMBER_JOIN); + else if (cmd->join_flags == RDMA_MC_JOIN_FLAG_SENDONLY_FULLMEMBER) + join_state = BIT(SENDONLY_FULLMEMBER_JOIN); + else return -EINVAL; ctx = ucma_get_ctx(file, cmd->id); @@ -1335,10 +1344,11 @@ static ssize_t ucma_process_join(struct ucma_file *file, ret = -ENOMEM; goto err1; } - + mc->join_state = join_state; mc->uid = cmd->uid; memcpy(&mc->addr, addr, cmd->addr_size); - ret = rdma_join_multicast(ctx->cm_id, (struct sockaddr *) &mc->addr, mc); + ret = rdma_join_multicast(ctx->cm_id, (struct sockaddr *)&mc->addr, + join_state, mc); if (ret) goto err2; @@ -1382,7 +1392,7 @@ static ssize_t ucma_join_ip_multicast(struct ucma_file *file, join_cmd.uid = cmd.uid; join_cmd.id = cmd.id; join_cmd.addr_size = rdma_addr_size((struct sockaddr *) &cmd.addr); - join_cmd.reserved = 0; + join_cmd.join_flags = RDMA_MC_JOIN_FLAG_FULLMEMBER; memcpy(&join_cmd.addr, &cmd.addr, join_cmd.addr_size); return ucma_process_join(file, &join_cmd, out_len); diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c index fe4d2e1a8b58..c68746ce6624 100644 --- a/drivers/infiniband/core/umem.c +++ b/drivers/infiniband/core/umem.c @@ -37,7 +37,6 @@ #include <linux/sched.h> #include <linux/export.h> #include <linux/hugetlb.h> -#include <linux/dma-attrs.h> #include <linux/slab.h> #include <rdma/ib_umem_odp.h> @@ -92,12 +91,12 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr, unsigned long npages; int ret; int i; - DEFINE_DMA_ATTRS(attrs); + unsigned long dma_attrs = 0; struct scatterlist *sg, *sg_list_start; int need_release = 0; if (dmasync) - dma_set_attr(DMA_ATTR_WRITE_BARRIER, &attrs); + dma_attrs |= DMA_ATTR_WRITE_BARRIER; if (!size) return ERR_PTR(-EINVAL); @@ -215,7 +214,7 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr, umem->sg_head.sgl, umem->npages, DMA_BIDIRECTIONAL, - &attrs); + dma_attrs); if (umem->nmap <= 0) { ret = -ENOMEM; diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h index 612ccfd39bf9..df26a741cda6 100644 --- a/drivers/infiniband/core/uverbs.h +++ b/drivers/infiniband/core/uverbs.h @@ -116,6 +116,7 @@ struct ib_uverbs_event_file { struct ib_uverbs_file { struct kref ref; struct mutex mutex; + struct mutex cleanup_mutex; /* protect cleanup */ struct ib_uverbs_device *device; struct ib_ucontext *ucontext; struct ib_event_handler event_handler; @@ -162,6 +163,10 @@ struct ib_uqp_object { struct ib_uxrcd_object *uxrcd; }; +struct ib_uwq_object { + struct ib_uevent_object uevent; +}; + struct ib_ucq_object { struct ib_uobject uobject; struct ib_uverbs_file *uverbs_file; @@ -181,6 +186,8 @@ extern struct idr ib_uverbs_qp_idr; extern struct idr ib_uverbs_srq_idr; extern struct idr ib_uverbs_xrcd_idr; extern struct idr ib_uverbs_rule_idr; +extern struct idr ib_uverbs_wq_idr; +extern struct idr ib_uverbs_rwq_ind_tbl_idr; void idr_remove_uobj(struct idr *idp, struct ib_uobject *uobj); @@ -199,6 +206,7 @@ void ib_uverbs_release_uevent(struct ib_uverbs_file *file, void ib_uverbs_comp_handler(struct ib_cq *cq, void *cq_context); void ib_uverbs_cq_event_handler(struct ib_event *event, void *context_ptr); void ib_uverbs_qp_event_handler(struct ib_event *event, void *context_ptr); +void ib_uverbs_wq_event_handler(struct ib_event *event, void *context_ptr); void ib_uverbs_srq_event_handler(struct ib_event *event, void *context_ptr); void ib_uverbs_event_handler(struct ib_event_handler *handler, struct ib_event *event); @@ -219,6 +227,7 @@ struct ib_uverbs_flow_spec { struct ib_uverbs_flow_spec_eth eth; struct ib_uverbs_flow_spec_ipv4 ipv4; struct ib_uverbs_flow_spec_tcp_udp tcp_udp; + struct ib_uverbs_flow_spec_ipv6 ipv6; }; }; @@ -275,5 +284,10 @@ IB_UVERBS_DECLARE_EX_CMD(destroy_flow); IB_UVERBS_DECLARE_EX_CMD(query_device); IB_UVERBS_DECLARE_EX_CMD(create_cq); IB_UVERBS_DECLARE_EX_CMD(create_qp); +IB_UVERBS_DECLARE_EX_CMD(create_wq); +IB_UVERBS_DECLARE_EX_CMD(modify_wq); +IB_UVERBS_DECLARE_EX_CMD(destroy_wq); +IB_UVERBS_DECLARE_EX_CMD(create_rwq_ind_table); +IB_UVERBS_DECLARE_EX_CMD(destroy_rwq_ind_table); #endif /* UVERBS_H */ diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 825021d1008b..f6647318138d 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -57,6 +57,8 @@ static struct uverbs_lock_class ah_lock_class = { .name = "AH-uobj" }; static struct uverbs_lock_class srq_lock_class = { .name = "SRQ-uobj" }; static struct uverbs_lock_class xrcd_lock_class = { .name = "XRCD-uobj" }; static struct uverbs_lock_class rule_lock_class = { .name = "RULE-uobj" }; +static struct uverbs_lock_class wq_lock_class = { .name = "WQ-uobj" }; +static struct uverbs_lock_class rwq_ind_table_lock_class = { .name = "IND_TBL-uobj" }; /* * The ib_uobject locking scheme is as follows: @@ -243,6 +245,27 @@ static struct ib_qp *idr_read_qp(int qp_handle, struct ib_ucontext *context) return idr_read_obj(&ib_uverbs_qp_idr, qp_handle, context, 0); } +static struct ib_wq *idr_read_wq(int wq_handle, struct ib_ucontext *context) +{ + return idr_read_obj(&ib_uverbs_wq_idr, wq_handle, context, 0); +} + +static void put_wq_read(struct ib_wq *wq) +{ + put_uobj_read(wq->uobject); +} + +static struct ib_rwq_ind_table *idr_read_rwq_indirection_table(int ind_table_handle, + struct ib_ucontext *context) +{ + return idr_read_obj(&ib_uverbs_rwq_ind_tbl_idr, ind_table_handle, context, 0); +} + +static void put_rwq_indirection_table_read(struct ib_rwq_ind_table *ind_table) +{ + put_uobj_read(ind_table->uobject); +} + static struct ib_qp *idr_write_qp(int qp_handle, struct ib_ucontext *context) { struct ib_uobject *uobj; @@ -326,6 +349,8 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file, INIT_LIST_HEAD(&ucontext->qp_list); INIT_LIST_HEAD(&ucontext->srq_list); INIT_LIST_HEAD(&ucontext->ah_list); + INIT_LIST_HEAD(&ucontext->wq_list); + INIT_LIST_HEAD(&ucontext->rwq_ind_tbl_list); INIT_LIST_HEAD(&ucontext->xrcd_list); INIT_LIST_HEAD(&ucontext->rule_list); rcu_read_lock(); @@ -1750,6 +1775,8 @@ static int create_qp(struct ib_uverbs_file *file, struct ib_qp_init_attr attr = {}; struct ib_uverbs_ex_create_qp_resp resp; int ret; + struct ib_rwq_ind_table *ind_tbl = NULL; + bool has_sq = true; if (cmd->qp_type == IB_QPT_RAW_PACKET && !capable(CAP_NET_RAW)) return -EPERM; @@ -1761,6 +1788,32 @@ static int create_qp(struct ib_uverbs_file *file, init_uobj(&obj->uevent.uobject, cmd->user_handle, file->ucontext, &qp_lock_class); down_write(&obj->uevent.uobject.mutex); + if (cmd_sz >= offsetof(typeof(*cmd), rwq_ind_tbl_handle) + + sizeof(cmd->rwq_ind_tbl_handle) && + (cmd->comp_mask & IB_UVERBS_CREATE_QP_MASK_IND_TABLE)) { + ind_tbl = idr_read_rwq_indirection_table(cmd->rwq_ind_tbl_handle, + file->ucontext); + if (!ind_tbl) { + ret = -EINVAL; + goto err_put; + } + + attr.rwq_ind_tbl = ind_tbl; + } + + if ((cmd_sz >= offsetof(typeof(*cmd), reserved1) + + sizeof(cmd->reserved1)) && cmd->reserved1) { + ret = -EOPNOTSUPP; + goto err_put; + } + + if (ind_tbl && (cmd->max_recv_wr || cmd->max_recv_sge || cmd->is_srq)) { + ret = -EINVAL; + goto err_put; + } + + if (ind_tbl && !cmd->max_send_wr) + has_sq = false; if (cmd->qp_type == IB_QPT_XRC_TGT) { xrcd = idr_read_xrcd(cmd->pd_handle, file->ucontext, @@ -1784,20 +1837,24 @@ static int create_qp(struct ib_uverbs_file *file, } } - if (cmd->recv_cq_handle != cmd->send_cq_handle) { - rcq = idr_read_cq(cmd->recv_cq_handle, - file->ucontext, 0); - if (!rcq) { - ret = -EINVAL; - goto err_put; + if (!ind_tbl) { + if (cmd->recv_cq_handle != cmd->send_cq_handle) { + rcq = idr_read_cq(cmd->recv_cq_handle, + file->ucontext, 0); + if (!rcq) { + ret = -EINVAL; + goto err_put; + } } } } - scq = idr_read_cq(cmd->send_cq_handle, file->ucontext, !!rcq); - rcq = rcq ?: scq; + if (has_sq) + scq = idr_read_cq(cmd->send_cq_handle, file->ucontext, !!rcq); + if (!ind_tbl) + rcq = rcq ?: scq; pd = idr_read_pd(cmd->pd_handle, file->ucontext); - if (!pd || !scq) { + if (!pd || (!scq && has_sq)) { ret = -EINVAL; goto err_put; } @@ -1864,16 +1921,20 @@ static int create_qp(struct ib_uverbs_file *file, qp->send_cq = attr.send_cq; qp->recv_cq = attr.recv_cq; qp->srq = attr.srq; + qp->rwq_ind_tbl = ind_tbl; qp->event_handler = attr.event_handler; qp->qp_context = attr.qp_context; qp->qp_type = attr.qp_type; atomic_set(&qp->usecnt, 0); atomic_inc(&pd->usecnt); - atomic_inc(&attr.send_cq->usecnt); + if (attr.send_cq) + atomic_inc(&attr.send_cq->usecnt); if (attr.recv_cq) atomic_inc(&attr.recv_cq->usecnt); if (attr.srq) atomic_inc(&attr.srq->usecnt); + if (ind_tbl) + atomic_inc(&ind_tbl->usecnt); } qp->uobject = &obj->uevent.uobject; @@ -1913,6 +1974,8 @@ static int create_qp(struct ib_uverbs_file *file, put_cq_read(rcq); if (srq) put_srq_read(srq); + if (ind_tbl) + put_rwq_indirection_table_read(ind_tbl); mutex_lock(&file->mutex); list_add_tail(&obj->uevent.uobject.list, &file->ucontext->qp_list); @@ -1940,6 +2003,8 @@ err_put: put_cq_read(rcq); if (srq) put_srq_read(srq); + if (ind_tbl) + put_rwq_indirection_table_read(ind_tbl); put_uobj_write(&obj->uevent.uobject); return ret; @@ -2033,7 +2098,7 @@ int ib_uverbs_ex_create_qp(struct ib_uverbs_file *file, if (err) return err; - if (cmd.comp_mask) + if (cmd.comp_mask & ~IB_UVERBS_CREATE_QP_SUP_COMP_MASK) return -EINVAL; if (cmd.reserved) @@ -3040,6 +3105,15 @@ static int kern_spec_to_ib_spec(struct ib_uverbs_flow_spec *kern_spec, memcpy(&ib_spec->ipv4.mask, &kern_spec->ipv4.mask, sizeof(struct ib_flow_ipv4_filter)); break; + case IB_FLOW_SPEC_IPV6: + ib_spec->ipv6.size = sizeof(struct ib_flow_spec_ipv6); + if (ib_spec->ipv6.size != kern_spec->ipv6.size) + return -EINVAL; + memcpy(&ib_spec->ipv6.val, &kern_spec->ipv6.val, + sizeof(struct ib_flow_ipv6_filter)); + memcpy(&ib_spec->ipv6.mask, &kern_spec->ipv6.mask, + sizeof(struct ib_flow_ipv6_filter)); + break; case IB_FLOW_SPEC_TCP: case IB_FLOW_SPEC_UDP: ib_spec->tcp_udp.size = sizeof(struct ib_flow_spec_tcp_udp); @@ -3056,6 +3130,445 @@ static int kern_spec_to_ib_spec(struct ib_uverbs_flow_spec *kern_spec, return 0; } +int ib_uverbs_ex_create_wq(struct ib_uverbs_file *file, + struct ib_device *ib_dev, + struct ib_udata *ucore, + struct ib_udata *uhw) +{ + struct ib_uverbs_ex_create_wq cmd = {}; + struct ib_uverbs_ex_create_wq_resp resp = {}; + struct ib_uwq_object *obj; + int err = 0; + struct ib_cq *cq; + struct ib_pd *pd; + struct ib_wq *wq; + struct ib_wq_init_attr wq_init_attr = {}; + size_t required_cmd_sz; + size_t required_resp_len; + + required_cmd_sz = offsetof(typeof(cmd), max_sge) + sizeof(cmd.max_sge); + required_resp_len = offsetof(typeof(resp), wqn) + sizeof(resp.wqn); + + if (ucore->inlen < required_cmd_sz) + return -EINVAL; + + if (ucore->outlen < required_resp_len) + return -ENOSPC; + + if (ucore->inlen > sizeof(cmd) && + !ib_is_udata_cleared(ucore, sizeof(cmd), + ucore->inlen - sizeof(cmd))) + return -EOPNOTSUPP; + + err = ib_copy_from_udata(&cmd, ucore, min(sizeof(cmd), ucore->inlen)); + if (err) + return err; + + if (cmd.comp_mask) + return -EOPNOTSUPP; + + obj = kmalloc(sizeof(*obj), GFP_KERNEL); + if (!obj) + return -ENOMEM; + + init_uobj(&obj->uevent.uobject, cmd.user_handle, file->ucontext, + &wq_lock_class); + down_write(&obj->uevent.uobject.mutex); + pd = idr_read_pd(cmd.pd_handle, file->ucontext); + if (!pd) { + err = -EINVAL; + goto err_uobj; + } + + cq = idr_read_cq(cmd.cq_handle, file->ucontext, 0); + if (!cq) { + err = -EINVAL; + goto err_put_pd; + } + + wq_init_attr.cq = cq; + wq_init_attr.max_sge = cmd.max_sge; + wq_init_attr.max_wr = cmd.max_wr; + wq_init_attr.wq_context = file; + wq_init_attr.wq_type = cmd.wq_type; + wq_init_attr.event_handler = ib_uverbs_wq_event_handler; + obj->uevent.events_reported = 0; + INIT_LIST_HEAD(&obj->uevent.event_list); + wq = pd->device->create_wq(pd, &wq_init_attr, uhw); + if (IS_ERR(wq)) { + err = PTR_ERR(wq); + goto err_put_cq; + } + + wq->uobject = &obj->uevent.uobject; + obj->uevent.uobject.object = wq; + wq->wq_type = wq_init_attr.wq_type; + wq->cq = cq; + wq->pd = pd; + wq->device = pd->device; + wq->wq_context = wq_init_attr.wq_context; + atomic_set(&wq->usecnt, 0); + atomic_inc(&pd->usecnt); + atomic_inc(&cq->usecnt); + wq->uobject = &obj->uevent.uobject; + obj->uevent.uobject.object = wq; + err = idr_add_uobj(&ib_uverbs_wq_idr, &obj->uevent.uobject); + if (err) + goto destroy_wq; + + memset(&resp, 0, sizeof(resp)); + resp.wq_handle = obj->uevent.uobject.id; + resp.max_sge = wq_init_attr.max_sge; + resp.max_wr = wq_init_attr.max_wr; + resp.wqn = wq->wq_num; + resp.response_length = required_resp_len; + err = ib_copy_to_udata(ucore, + &resp, resp.response_length); + if (err) + goto err_copy; + + put_pd_read(pd); + put_cq_read(cq); + + mutex_lock(&file->mutex); + list_add_tail(&obj->uevent.uobject.list, &file->ucontext->wq_list); + mutex_unlock(&file->mutex); + + obj->uevent.uobject.live = 1; + up_write(&obj->uevent.uobject.mutex); + return 0; + +err_copy: + idr_remove_uobj(&ib_uverbs_wq_idr, &obj->uevent.uobject); +destroy_wq: + ib_destroy_wq(wq); +err_put_cq: + put_cq_read(cq); +err_put_pd: + put_pd_read(pd); +err_uobj: + put_uobj_write(&obj->uevent.uobject); + + return err; +} + +int ib_uverbs_ex_destroy_wq(struct ib_uverbs_file *file, + struct ib_device *ib_dev, + struct ib_udata *ucore, + struct ib_udata *uhw) +{ + struct ib_uverbs_ex_destroy_wq cmd = {}; + struct ib_uverbs_ex_destroy_wq_resp resp = {}; + struct ib_wq *wq; + struct ib_uobject *uobj; + struct ib_uwq_object *obj; + size_t required_cmd_sz; + size_t required_resp_len; + int ret; + + required_cmd_sz = offsetof(typeof(cmd), wq_handle) + sizeof(cmd.wq_handle); + required_resp_len = offsetof(typeof(resp), reserved) + sizeof(resp.reserved); + + if (ucore->inlen < required_cmd_sz) + return -EINVAL; + + if (ucore->outlen < required_resp_len) + return -ENOSPC; + + if (ucore->inlen > sizeof(cmd) && + !ib_is_udata_cleared(ucore, sizeof(cmd), + ucore->inlen - sizeof(cmd))) + return -EOPNOTSUPP; + + ret = ib_copy_from_udata(&cmd, ucore, min(sizeof(cmd), ucore->inlen)); + if (ret) + return ret; + + if (cmd.comp_mask) + return -EOPNOTSUPP; + + resp.response_length = required_resp_len; + uobj = idr_write_uobj(&ib_uverbs_wq_idr, cmd.wq_handle, + file->ucontext); + if (!uobj) + return -EINVAL; + + wq = uobj->object; + obj = container_of(uobj, struct ib_uwq_object, uevent.uobject); + ret = ib_destroy_wq(wq); + if (!ret) + uobj->live = 0; + + put_uobj_write(uobj); + if (ret) + return ret; + + idr_remove_uobj(&ib_uverbs_wq_idr, uobj); + + mutex_lock(&file->mutex); + list_del(&uobj->list); + mutex_unlock(&file->mutex); + + ib_uverbs_release_uevent(file, &obj->uevent); + resp.events_reported = obj->uevent.events_reported; + put_uobj(uobj); + + ret = ib_copy_to_udata(ucore, &resp, resp.response_length); + if (ret) + return ret; + + return 0; +} + +int ib_uverbs_ex_modify_wq(struct ib_uverbs_file *file, + struct ib_device *ib_dev, + struct ib_udata *ucore, + struct ib_udata *uhw) +{ + struct ib_uverbs_ex_modify_wq cmd = {}; + struct ib_wq *wq; + struct ib_wq_attr wq_attr = {}; + size_t required_cmd_sz; + int ret; + + required_cmd_sz = offsetof(typeof(cmd), curr_wq_state) + sizeof(cmd.curr_wq_state); + if (ucore->inlen < required_cmd_sz) + return -EINVAL; + + if (ucore->inlen > sizeof(cmd) && + !ib_is_udata_cleared(ucore, sizeof(cmd), + ucore->inlen - sizeof(cmd))) + return -EOPNOTSUPP; + + ret = ib_copy_from_udata(&cmd, ucore, min(sizeof(cmd), ucore->inlen)); + if (ret) + return ret; + + if (!cmd.attr_mask) + return -EINVAL; + + if (cmd.attr_mask > (IB_WQ_STATE | IB_WQ_CUR_STATE)) + return -EINVAL; + + wq = idr_read_wq(cmd.wq_handle, file->ucontext); + if (!wq) + return -EINVAL; + + wq_attr.curr_wq_state = cmd.curr_wq_state; + wq_attr.wq_state = cmd.wq_state; + ret = wq->device->modify_wq(wq, &wq_attr, cmd.attr_mask, uhw); + put_wq_read(wq); + return ret; +} + +int ib_uverbs_ex_create_rwq_ind_table(struct ib_uverbs_file *file, + struct ib_device *ib_dev, + struct ib_udata *ucore, + struct ib_udata *uhw) +{ + struct ib_uverbs_ex_create_rwq_ind_table cmd = {}; + struct ib_uverbs_ex_create_rwq_ind_table_resp resp = {}; + struct ib_uobject *uobj; + int err = 0; + struct ib_rwq_ind_table_init_attr init_attr = {}; + struct ib_rwq_ind_table *rwq_ind_tbl; + struct ib_wq **wqs = NULL; + u32 *wqs_handles = NULL; + struct ib_wq *wq = NULL; + int i, j, num_read_wqs; + u32 num_wq_handles; + u32 expected_in_size; + size_t required_cmd_sz_header; + size_t required_resp_len; + + required_cmd_sz_header = offsetof(typeof(cmd), log_ind_tbl_size) + sizeof(cmd.log_ind_tbl_size); + required_resp_len = offsetof(typeof(resp), ind_tbl_num) + sizeof(resp.ind_tbl_num); + + if (ucore->inlen < required_cmd_sz_header) + return -EINVAL; + + if (ucore->outlen < required_resp_len) + return -ENOSPC; + + err = ib_copy_from_udata(&cmd, ucore, required_cmd_sz_header); + if (err) + return err; + + ucore->inbuf += required_cmd_sz_header; + ucore->inlen -= required_cmd_sz_header; + + if (cmd.comp_mask) + return -EOPNOTSUPP; + + if (cmd.log_ind_tbl_size > IB_USER_VERBS_MAX_LOG_IND_TBL_SIZE) + return -EINVAL; + + num_wq_handles = 1 << cmd.log_ind_tbl_size; + expected_in_size = num_wq_handles * sizeof(__u32); + if (num_wq_handles == 1) + /* input size for wq handles is u64 aligned */ + expected_in_size += sizeof(__u32); + + if (ucore->inlen < expected_in_size) + return -EINVAL; + + if (ucore->inlen > expected_in_size && + !ib_is_udata_cleared(ucore, expected_in_size, + ucore->inlen - expected_in_size)) + return -EOPNOTSUPP; + + wqs_handles = kcalloc(num_wq_handles, sizeof(*wqs_handles), + GFP_KERNEL); + if (!wqs_handles) + return -ENOMEM; + + err = ib_copy_from_udata(wqs_handles, ucore, + num_wq_handles * sizeof(__u32)); + if (err) + goto err_free; + + wqs = kcalloc(num_wq_handles, sizeof(*wqs), GFP_KERNEL); + if (!wqs) { + err = -ENOMEM; + goto err_free; + } + + for (num_read_wqs = 0; num_read_wqs < num_wq_handles; + num_read_wqs++) { + wq = idr_read_wq(wqs_handles[num_read_wqs], file->ucontext); + if (!wq) { + err = -EINVAL; + goto put_wqs; + } + + wqs[num_read_wqs] = wq; + } + + uobj = kmalloc(sizeof(*uobj), GFP_KERNEL); + if (!uobj) { + err = -ENOMEM; + goto put_wqs; + } + + init_uobj(uobj, 0, file->ucontext, &rwq_ind_table_lock_class); + down_write(&uobj->mutex); + init_attr.log_ind_tbl_size = cmd.log_ind_tbl_size; + init_attr.ind_tbl = wqs; + rwq_ind_tbl = ib_dev->create_rwq_ind_table(ib_dev, &init_attr, uhw); + + if (IS_ERR(rwq_ind_tbl)) { + err = PTR_ERR(rwq_ind_tbl); + goto err_uobj; + } + + rwq_ind_tbl->ind_tbl = wqs; + rwq_ind_tbl->log_ind_tbl_size = init_attr.log_ind_tbl_size; + rwq_ind_tbl->uobject = uobj; + uobj->object = rwq_ind_tbl; + rwq_ind_tbl->device = ib_dev; + atomic_set(&rwq_ind_tbl->usecnt, 0); + + for (i = 0; i < num_wq_handles; i++) + atomic_inc(&wqs[i]->usecnt); + + err = idr_add_uobj(&ib_uverbs_rwq_ind_tbl_idr, uobj); + if (err) + goto destroy_ind_tbl; + + resp.ind_tbl_handle = uobj->id; + resp.ind_tbl_num = rwq_ind_tbl->ind_tbl_num; + resp.response_length = required_resp_len; + + err = ib_copy_to_udata(ucore, + &resp, resp.response_length); + if (err) + goto err_copy; + + kfree(wqs_handles); + + for (j = 0; j < num_read_wqs; j++) + put_wq_read(wqs[j]); + + mutex_lock(&file->mutex); + list_add_tail(&uobj->list, &file->ucontext->rwq_ind_tbl_list); + mutex_unlock(&file->mutex); + + uobj->live = 1; + + up_write(&uobj->mutex); + return 0; + +err_copy: + idr_remove_uobj(&ib_uverbs_rwq_ind_tbl_idr, uobj); +destroy_ind_tbl: + ib_destroy_rwq_ind_table(rwq_ind_tbl); +err_uobj: + put_uobj_write(uobj); +put_wqs: + for (j = 0; j < num_read_wqs; j++) + put_wq_read(wqs[j]); +err_free: + kfree(wqs_handles); + kfree(wqs); + return err; +} + +int ib_uverbs_ex_destroy_rwq_ind_table(struct ib_uverbs_file *file, + struct ib_device *ib_dev, + struct ib_udata *ucore, + struct ib_udata *uhw) +{ + struct ib_uverbs_ex_destroy_rwq_ind_table cmd = {}; + struct ib_rwq_ind_table *rwq_ind_tbl; + struct ib_uobject *uobj; + int ret; + struct ib_wq **ind_tbl; + size_t required_cmd_sz; + + required_cmd_sz = offsetof(typeof(cmd), ind_tbl_handle) + sizeof(cmd.ind_tbl_handle); + + if (ucore->inlen < required_cmd_sz) + return -EINVAL; + + if (ucore->inlen > sizeof(cmd) && + !ib_is_udata_cleared(ucore, sizeof(cmd), + ucore->inlen - sizeof(cmd))) + return -EOPNOTSUPP; + + ret = ib_copy_from_udata(&cmd, ucore, min(sizeof(cmd), ucore->inlen)); + if (ret) + return ret; + + if (cmd.comp_mask) + return -EOPNOTSUPP; + + uobj = idr_write_uobj(&ib_uverbs_rwq_ind_tbl_idr, cmd.ind_tbl_handle, + file->ucontext); + if (!uobj) + return -EINVAL; + rwq_ind_tbl = uobj->object; + ind_tbl = rwq_ind_tbl->ind_tbl; + + ret = ib_destroy_rwq_ind_table(rwq_ind_tbl); + if (!ret) + uobj->live = 0; + + put_uobj_write(uobj); + + if (ret) + return ret; + + idr_remove_uobj(&ib_uverbs_rwq_ind_tbl_idr, uobj); + + mutex_lock(&file->mutex); + list_del(&uobj->list); + mutex_unlock(&file->mutex); + + put_uobj(uobj); + kfree(ind_tbl); + return ret; +} + int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file, struct ib_device *ib_dev, struct ib_udata *ucore, diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c index 31f422a70623..0012fa58c105 100644 --- a/drivers/infiniband/core/uverbs_main.c +++ b/drivers/infiniband/core/uverbs_main.c @@ -76,6 +76,8 @@ DEFINE_IDR(ib_uverbs_qp_idr); DEFINE_IDR(ib_uverbs_srq_idr); DEFINE_IDR(ib_uverbs_xrcd_idr); DEFINE_IDR(ib_uverbs_rule_idr); +DEFINE_IDR(ib_uverbs_wq_idr); +DEFINE_IDR(ib_uverbs_rwq_ind_tbl_idr); static DEFINE_SPINLOCK(map_lock); static DECLARE_BITMAP(dev_map, IB_UVERBS_MAX_DEVICES); @@ -130,6 +132,11 @@ static int (*uverbs_ex_cmd_table[])(struct ib_uverbs_file *file, [IB_USER_VERBS_EX_CMD_QUERY_DEVICE] = ib_uverbs_ex_query_device, [IB_USER_VERBS_EX_CMD_CREATE_CQ] = ib_uverbs_ex_create_cq, [IB_USER_VERBS_EX_CMD_CREATE_QP] = ib_uverbs_ex_create_qp, + [IB_USER_VERBS_EX_CMD_CREATE_WQ] = ib_uverbs_ex_create_wq, + [IB_USER_VERBS_EX_CMD_MODIFY_WQ] = ib_uverbs_ex_modify_wq, + [IB_USER_VERBS_EX_CMD_DESTROY_WQ] = ib_uverbs_ex_destroy_wq, + [IB_USER_VERBS_EX_CMD_CREATE_RWQ_IND_TBL] = ib_uverbs_ex_create_rwq_ind_table, + [IB_USER_VERBS_EX_CMD_DESTROY_RWQ_IND_TBL] = ib_uverbs_ex_destroy_rwq_ind_table, }; static void ib_uverbs_add_one(struct ib_device *device); @@ -265,6 +272,27 @@ static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file, kfree(uqp); } + list_for_each_entry_safe(uobj, tmp, &context->rwq_ind_tbl_list, list) { + struct ib_rwq_ind_table *rwq_ind_tbl = uobj->object; + struct ib_wq **ind_tbl = rwq_ind_tbl->ind_tbl; + + idr_remove_uobj(&ib_uverbs_rwq_ind_tbl_idr, uobj); + ib_destroy_rwq_ind_table(rwq_ind_tbl); + kfree(ind_tbl); + kfree(uobj); + } + + list_for_each_entry_safe(uobj, tmp, &context->wq_list, list) { + struct ib_wq *wq = uobj->object; + struct ib_uwq_object *uwq = + container_of(uobj, struct ib_uwq_object, uevent.uobject); + + idr_remove_uobj(&ib_uverbs_wq_idr, uobj); + ib_destroy_wq(wq); + ib_uverbs_release_uevent(file, &uwq->uevent); + kfree(uwq); + } + list_for_each_entry_safe(uobj, tmp, &context->srq_list, list) { struct ib_srq *srq = uobj->object; struct ib_uevent_object *uevent = @@ -568,6 +596,16 @@ void ib_uverbs_qp_event_handler(struct ib_event *event, void *context_ptr) &uobj->events_reported); } +void ib_uverbs_wq_event_handler(struct ib_event *event, void *context_ptr) +{ + struct ib_uevent_object *uobj = container_of(event->element.wq->uobject, + struct ib_uevent_object, uobject); + + ib_uverbs_async_handler(context_ptr, uobj->uobject.user_handle, + event->event, &uobj->event_list, + &uobj->events_reported); +} + void ib_uverbs_srq_event_handler(struct ib_event *event, void *context_ptr) { struct ib_uevent_object *uobj; @@ -931,6 +969,7 @@ static int ib_uverbs_open(struct inode *inode, struct file *filp) file->async_file = NULL; kref_init(&file->ref); mutex_init(&file->mutex); + mutex_init(&file->cleanup_mutex); filp->private_data = file; kobject_get(&dev->kobj); @@ -956,18 +995,20 @@ static int ib_uverbs_close(struct inode *inode, struct file *filp) { struct ib_uverbs_file *file = filp->private_data; struct ib_uverbs_device *dev = file->device; - struct ib_ucontext *ucontext = NULL; + + mutex_lock(&file->cleanup_mutex); + if (file->ucontext) { + ib_uverbs_cleanup_ucontext(file, file->ucontext); + file->ucontext = NULL; + } + mutex_unlock(&file->cleanup_mutex); mutex_lock(&file->device->lists_mutex); - ucontext = file->ucontext; - file->ucontext = NULL; if (!file->is_closed) { list_del(&file->list); file->is_closed = 1; } mutex_unlock(&file->device->lists_mutex); - if (ucontext) - ib_uverbs_cleanup_ucontext(file, ucontext); if (file->async_file) kref_put(&file->async_file->ref, ib_uverbs_release_event_file); @@ -1181,22 +1222,30 @@ static void ib_uverbs_free_hw_resources(struct ib_uverbs_device *uverbs_dev, mutex_lock(&uverbs_dev->lists_mutex); while (!list_empty(&uverbs_dev->uverbs_file_list)) { struct ib_ucontext *ucontext; - file = list_first_entry(&uverbs_dev->uverbs_file_list, struct ib_uverbs_file, list); file->is_closed = 1; - ucontext = file->ucontext; list_del(&file->list); - file->ucontext = NULL; kref_get(&file->ref); mutex_unlock(&uverbs_dev->lists_mutex); - /* We must release the mutex before going ahead and calling - * disassociate_ucontext. disassociate_ucontext might end up - * indirectly calling uverbs_close, for example due to freeing - * the resources (e.g mmput). - */ + ib_uverbs_event_handler(&file->event_handler, &event); + + mutex_lock(&file->cleanup_mutex); + ucontext = file->ucontext; + file->ucontext = NULL; + mutex_unlock(&file->cleanup_mutex); + + /* At this point ib_uverbs_close cannot be running + * ib_uverbs_cleanup_ucontext + */ if (ucontext) { + /* We must release the mutex before going ahead and + * calling disassociate_ucontext. disassociate_ucontext + * might end up indirectly calling uverbs_close, + * for example due to freeing the resources + * (e.g mmput). + */ ib_dev->disassociate_ucontext(ucontext); ib_uverbs_cleanup_ucontext(file, ucontext); } diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index 6298f54b4137..f2b776efab3a 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -758,6 +758,12 @@ struct ib_qp *ib_create_qp(struct ib_pd *pd, struct ib_qp *qp; int ret; + if (qp_init_attr->rwq_ind_tbl && + (qp_init_attr->recv_cq || + qp_init_attr->srq || qp_init_attr->cap.max_recv_wr || + qp_init_attr->cap.max_recv_sge)) + return ERR_PTR(-EINVAL); + /* * If the callers is using the RDMA API calculate the resources * needed for the RDMA READ/WRITE operations. @@ -775,6 +781,7 @@ struct ib_qp *ib_create_qp(struct ib_pd *pd, qp->real_qp = qp; qp->uobject = NULL; qp->qp_type = qp_init_attr->qp_type; + qp->rwq_ind_tbl = qp_init_attr->rwq_ind_tbl; atomic_set(&qp->usecnt, 0); qp->mrs_used = 0; @@ -792,7 +799,8 @@ struct ib_qp *ib_create_qp(struct ib_pd *pd, qp->srq = NULL; } else { qp->recv_cq = qp_init_attr->recv_cq; - atomic_inc(&qp_init_attr->recv_cq->usecnt); + if (qp_init_attr->recv_cq) + atomic_inc(&qp_init_attr->recv_cq->usecnt); qp->srq = qp_init_attr->srq; if (qp->srq) atomic_inc(&qp_init_attr->srq->usecnt); @@ -803,7 +811,10 @@ struct ib_qp *ib_create_qp(struct ib_pd *pd, qp->xrcd = NULL; atomic_inc(&pd->usecnt); - atomic_inc(&qp_init_attr->send_cq->usecnt); + if (qp_init_attr->send_cq) + atomic_inc(&qp_init_attr->send_cq->usecnt); + if (qp_init_attr->rwq_ind_tbl) + atomic_inc(&qp->rwq_ind_tbl->usecnt); if (qp_init_attr->cap.max_rdma_ctxs) { ret = rdma_rw_init_mrs(qp, qp_init_attr); @@ -814,6 +825,15 @@ struct ib_qp *ib_create_qp(struct ib_pd *pd, } } + /* + * Note: all hw drivers guarantee that max_send_sge is lower than + * the device RDMA WRITE SGE limit but not all hw drivers ensure that + * max_send_sge <= max_sge_rd. + */ + qp->max_write_sge = qp_init_attr->cap.max_send_sge; + qp->max_read_sge = min_t(u32, qp_init_attr->cap.max_send_sge, + device->attrs.max_sge_rd); + return qp; } EXPORT_SYMBOL(ib_create_qp); @@ -1283,6 +1303,7 @@ int ib_destroy_qp(struct ib_qp *qp) struct ib_pd *pd; struct ib_cq *scq, *rcq; struct ib_srq *srq; + struct ib_rwq_ind_table *ind_tbl; int ret; WARN_ON_ONCE(qp->mrs_used > 0); @@ -1297,6 +1318,7 @@ int ib_destroy_qp(struct ib_qp *qp) scq = qp->send_cq; rcq = qp->recv_cq; srq = qp->srq; + ind_tbl = qp->rwq_ind_tbl; if (!qp->uobject) rdma_rw_cleanup_mrs(qp); @@ -1311,6 +1333,8 @@ int ib_destroy_qp(struct ib_qp *qp) atomic_dec(&rcq->usecnt); if (srq) atomic_dec(&srq->usecnt); + if (ind_tbl) + atomic_dec(&ind_tbl->usecnt); } return ret; @@ -1558,6 +1582,150 @@ int ib_dealloc_xrcd(struct ib_xrcd *xrcd) } EXPORT_SYMBOL(ib_dealloc_xrcd); +/** + * ib_create_wq - Creates a WQ associated with the specified protection + * domain. + * @pd: The protection domain associated with the WQ. + * @wq_init_attr: A list of initial attributes required to create the + * WQ. If WQ creation succeeds, then the attributes are updated to + * the actual capabilities of the created WQ. + * + * wq_init_attr->max_wr and wq_init_attr->max_sge determine + * the requested size of the WQ, and set to the actual values allocated + * on return. + * If ib_create_wq() succeeds, then max_wr and max_sge will always be + * at least as large as the requested values. + */ +struct ib_wq *ib_create_wq(struct ib_pd *pd, + struct ib_wq_init_attr *wq_attr) +{ + struct ib_wq *wq; + + if (!pd->device->create_wq) + return ERR_PTR(-ENOSYS); + + wq = pd->device->create_wq(pd, wq_attr, NULL); + if (!IS_ERR(wq)) { + wq->event_handler = wq_attr->event_handler; + wq->wq_context = wq_attr->wq_context; + wq->wq_type = wq_attr->wq_type; + wq->cq = wq_attr->cq; + wq->device = pd->device; + wq->pd = pd; + wq->uobject = NULL; + atomic_inc(&pd->usecnt); + atomic_inc(&wq_attr->cq->usecnt); + atomic_set(&wq->usecnt, 0); + } + return wq; +} +EXPORT_SYMBOL(ib_create_wq); + +/** + * ib_destroy_wq - Destroys the specified WQ. + * @wq: The WQ to destroy. + */ +int ib_destroy_wq(struct ib_wq *wq) +{ + int err; + struct ib_cq *cq = wq->cq; + struct ib_pd *pd = wq->pd; + + if (atomic_read(&wq->usecnt)) + return -EBUSY; + + err = wq->device->destroy_wq(wq); + if (!err) { + atomic_dec(&pd->usecnt); + atomic_dec(&cq->usecnt); + } + return err; +} +EXPORT_SYMBOL(ib_destroy_wq); + +/** + * ib_modify_wq - Modifies the specified WQ. + * @wq: The WQ to modify. + * @wq_attr: On input, specifies the WQ attributes to modify. + * @wq_attr_mask: A bit-mask used to specify which attributes of the WQ + * are being modified. + * On output, the current values of selected WQ attributes are returned. + */ +int ib_modify_wq(struct ib_wq *wq, struct ib_wq_attr *wq_attr, + u32 wq_attr_mask) +{ + int err; + + if (!wq->device->modify_wq) + return -ENOSYS; + + err = wq->device->modify_wq(wq, wq_attr, wq_attr_mask, NULL); + return err; +} +EXPORT_SYMBOL(ib_modify_wq); + +/* + * ib_create_rwq_ind_table - Creates a RQ Indirection Table. + * @device: The device on which to create the rwq indirection table. + * @ib_rwq_ind_table_init_attr: A list of initial attributes required to + * create the Indirection Table. + * + * Note: The life time of ib_rwq_ind_table_init_attr->ind_tbl is not less + * than the created ib_rwq_ind_table object and the caller is responsible + * for its memory allocation/free. + */ +struct ib_rwq_ind_table *ib_create_rwq_ind_table(struct ib_device *device, + struct ib_rwq_ind_table_init_attr *init_attr) +{ + struct ib_rwq_ind_table *rwq_ind_table; + int i; + u32 table_size; + + if (!device->create_rwq_ind_table) + return ERR_PTR(-ENOSYS); + + table_size = (1 << init_attr->log_ind_tbl_size); + rwq_ind_table = device->create_rwq_ind_table(device, + init_attr, NULL); + if (IS_ERR(rwq_ind_table)) + return rwq_ind_table; + + rwq_ind_table->ind_tbl = init_attr->ind_tbl; + rwq_ind_table->log_ind_tbl_size = init_attr->log_ind_tbl_size; + rwq_ind_table->device = device; + rwq_ind_table->uobject = NULL; + atomic_set(&rwq_ind_table->usecnt, 0); + + for (i = 0; i < table_size; i++) + atomic_inc(&rwq_ind_table->ind_tbl[i]->usecnt); + + return rwq_ind_table; +} +EXPORT_SYMBOL(ib_create_rwq_ind_table); + +/* + * ib_destroy_rwq_ind_table - Destroys the specified Indirection Table. + * @wq_ind_table: The Indirection Table to destroy. +*/ +int ib_destroy_rwq_ind_table(struct ib_rwq_ind_table *rwq_ind_table) +{ + int err, i; + u32 table_size = (1 << rwq_ind_table->log_ind_tbl_size); + struct ib_wq **ind_tbl = rwq_ind_table->ind_tbl; + + if (atomic_read(&rwq_ind_table->usecnt)) + return -EBUSY; + + err = rwq_ind_table->device->destroy_rwq_ind_table(rwq_ind_table); + if (!err) { + for (i = 0; i < table_size; i++) + atomic_dec(&ind_tbl[i]->usecnt); + } + + return err; +} +EXPORT_SYMBOL(ib_destroy_rwq_ind_table); + struct ib_flow *ib_create_flow(struct ib_qp *qp, struct ib_flow_attr *flow_attr, int domain) diff --git a/drivers/infiniband/hw/cxgb3/iwch_cm.c b/drivers/infiniband/hw/cxgb3/iwch_cm.c index 3e8431b5cad7..04bbf172abde 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_cm.c +++ b/drivers/infiniband/hw/cxgb3/iwch_cm.c @@ -1396,10 +1396,10 @@ static int pass_accept_req(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) state_set(&child_ep->com, CONNECTING); child_ep->com.tdev = tdev; child_ep->com.cm_id = NULL; - child_ep->com.local_addr.sin_family = PF_INET; + child_ep->com.local_addr.sin_family = AF_INET; child_ep->com.local_addr.sin_port = req->local_port; child_ep->com.local_addr.sin_addr.s_addr = req->local_ip; - child_ep->com.remote_addr.sin_family = PF_INET; + child_ep->com.remote_addr.sin_family = AF_INET; child_ep->com.remote_addr.sin_port = req->peer_port; child_ep->com.remote_addr.sin_addr.s_addr = req->peer_ip; get_ep(&parent_ep->com); diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.c b/drivers/infiniband/hw/cxgb3/iwch_provider.c index bb1a839d4d6d..3edb80644b53 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_provider.c +++ b/drivers/infiniband/hw/cxgb3/iwch_provider.c @@ -1183,18 +1183,6 @@ static ssize_t show_rev(struct device *dev, struct device_attribute *attr, return sprintf(buf, "%d\n", iwch_dev->rdev.t3cdev_p->type); } -static ssize_t show_fw_ver(struct device *dev, struct device_attribute *attr, char *buf) -{ - struct iwch_dev *iwch_dev = container_of(dev, struct iwch_dev, - ibdev.dev); - struct ethtool_drvinfo info; - struct net_device *lldev = iwch_dev->rdev.t3cdev_p->lldev; - - PDBG("%s dev 0x%p\n", __func__, dev); - lldev->ethtool_ops->get_drvinfo(lldev, &info); - return sprintf(buf, "%s\n", info.fw_version); -} - static ssize_t show_hca(struct device *dev, struct device_attribute *attr, char *buf) { @@ -1334,13 +1322,11 @@ static int iwch_get_mib(struct ib_device *ibdev, struct rdma_hw_stats *stats, } static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL); -static DEVICE_ATTR(fw_ver, S_IRUGO, show_fw_ver, NULL); static DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL); static DEVICE_ATTR(board_id, S_IRUGO, show_board, NULL); static struct device_attribute *iwch_class_attributes[] = { &dev_attr_hw_rev, - &dev_attr_fw_ver, &dev_attr_hca_type, &dev_attr_board_id, }; @@ -1362,6 +1348,18 @@ static int iwch_port_immutable(struct ib_device *ibdev, u8 port_num, return 0; } +static void get_dev_fw_ver_str(struct ib_device *ibdev, char *str, + size_t str_len) +{ + struct iwch_dev *iwch_dev = to_iwch_dev(ibdev); + struct ethtool_drvinfo info; + struct net_device *lldev = iwch_dev->rdev.t3cdev_p->lldev; + + PDBG("%s dev 0x%p\n", __func__, iwch_dev); + lldev->ethtool_ops->get_drvinfo(lldev, &info); + snprintf(str, str_len, "%s", info.fw_version); +} + int iwch_register_device(struct iwch_dev *dev) { int ret; @@ -1437,6 +1435,7 @@ int iwch_register_device(struct iwch_dev *dev) dev->ibdev.get_hw_stats = iwch_get_mib; dev->ibdev.uverbs_abi_ver = IWCH_UVERBS_ABI_VERSION; dev->ibdev.get_port_immutable = iwch_port_immutable; + dev->ibdev.get_dev_fw_str = get_dev_fw_ver_str; dev->ibdev.iwcm = kmalloc(sizeof(struct iw_cm_verbs), GFP_KERNEL); if (!dev->ibdev.iwcm) diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index a3a67216bce6..3aca7f6171b4 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -294,6 +294,25 @@ static void state_set(struct c4iw_ep_common *epc, enum c4iw_ep_state new) return; } +static int alloc_ep_skb_list(struct sk_buff_head *ep_skb_list, int size) +{ + struct sk_buff *skb; + unsigned int i; + size_t len; + + len = roundup(sizeof(union cpl_wr_size), 16); + for (i = 0; i < size; i++) { + skb = alloc_skb(len, GFP_KERNEL); + if (!skb) + goto fail; + skb_queue_tail(ep_skb_list, skb); + } + return 0; +fail: + skb_queue_purge(ep_skb_list); + return -ENOMEM; +} + static void *alloc_ep(int size, gfp_t gfp) { struct c4iw_ep_common *epc; @@ -384,6 +403,8 @@ void _c4iw_free_ep(struct kref *kref) if (ep->mpa_skb) kfree_skb(ep->mpa_skb); } + if (!skb_queue_empty(&ep->com.ep_skb_list)) + skb_queue_purge(&ep->com.ep_skb_list); kfree(ep); } @@ -620,25 +641,27 @@ static void abort_arp_failure(void *handle, struct sk_buff *skb) } } -static int send_flowc(struct c4iw_ep *ep, struct sk_buff *skb) +static int send_flowc(struct c4iw_ep *ep) { - unsigned int flowclen = 80; struct fw_flowc_wr *flowc; + struct sk_buff *skb = skb_dequeue(&ep->com.ep_skb_list); int i; u16 vlan = ep->l2t->vlan; int nparams; + if (WARN_ON(!skb)) + return -ENOMEM; + if (vlan == CPL_L2T_VLAN_NONE) nparams = 8; else nparams = 9; - skb = get_skb(skb, flowclen, GFP_KERNEL); - flowc = (struct fw_flowc_wr *)__skb_put(skb, flowclen); + flowc = (struct fw_flowc_wr *)__skb_put(skb, FLOWC_LEN); flowc->op_to_nparams = cpu_to_be32(FW_WR_OP_V(FW_FLOWC_WR) | FW_FLOWC_WR_NPARAMS_V(nparams)); - flowc->flowid_len16 = cpu_to_be32(FW_WR_LEN16_V(DIV_ROUND_UP(flowclen, + flowc->flowid_len16 = cpu_to_be32(FW_WR_LEN16_V(DIV_ROUND_UP(FLOWC_LEN, 16)) | FW_WR_FLOWID_V(ep->hwtid)); flowc->mnemval[0].mnemonic = FW_FLOWC_MNEM_PFNVFN; @@ -679,18 +702,16 @@ static int send_flowc(struct c4iw_ep *ep, struct sk_buff *skb) return c4iw_ofld_send(&ep->com.dev->rdev, skb); } -static int send_halfclose(struct c4iw_ep *ep, gfp_t gfp) +static int send_halfclose(struct c4iw_ep *ep) { struct cpl_close_con_req *req; - struct sk_buff *skb; + struct sk_buff *skb = skb_dequeue(&ep->com.ep_skb_list); int wrlen = roundup(sizeof *req, 16); PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid); - skb = get_skb(NULL, wrlen, gfp); - if (!skb) { - printk(KERN_ERR MOD "%s - failed to alloc skb\n", __func__); + if (WARN_ON(!skb)) return -ENOMEM; - } + set_wr_txq(skb, CPL_PRIORITY_DATA, ep->txq_idx); t4_set_arp_err_handler(skb, NULL, arp_failure_discard); req = (struct cpl_close_con_req *) skb_put(skb, wrlen); @@ -701,26 +722,24 @@ static int send_halfclose(struct c4iw_ep *ep, gfp_t gfp) return c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t); } -static int send_abort(struct c4iw_ep *ep, struct sk_buff *skb, gfp_t gfp) +static int send_abort(struct c4iw_ep *ep) { struct cpl_abort_req *req; int wrlen = roundup(sizeof *req, 16); + struct sk_buff *req_skb = skb_dequeue(&ep->com.ep_skb_list); PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid); - skb = get_skb(skb, wrlen, gfp); - if (!skb) { - printk(KERN_ERR MOD "%s - failed to alloc skb.\n", - __func__); + if (WARN_ON(!req_skb)) return -ENOMEM; - } - set_wr_txq(skb, CPL_PRIORITY_DATA, ep->txq_idx); - t4_set_arp_err_handler(skb, ep, abort_arp_failure); - req = (struct cpl_abort_req *) skb_put(skb, wrlen); + + set_wr_txq(req_skb, CPL_PRIORITY_DATA, ep->txq_idx); + t4_set_arp_err_handler(req_skb, ep, abort_arp_failure); + req = (struct cpl_abort_req *)skb_put(req_skb, wrlen); memset(req, 0, wrlen); INIT_TP_WR(req, ep->hwtid); OPCODE_TID(req) = cpu_to_be32(MK_OPCODE_TID(CPL_ABORT_REQ, ep->hwtid)); req->cmd = CPL_ABORT_SEND_RST; - return c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t); + return c4iw_l2t_send(&ep->com.dev->rdev, req_skb, ep->l2t); } static void best_mtu(const unsigned short *mtus, unsigned short mtu, @@ -992,9 +1011,19 @@ static int send_mpa_req(struct c4iw_ep *ep, struct sk_buff *skb, mpa = (struct mpa_message *)(req + 1); memcpy(mpa->key, MPA_KEY_REQ, sizeof(mpa->key)); - mpa->flags = (crc_enabled ? MPA_CRC : 0) | - (markers_enabled ? MPA_MARKERS : 0) | - (mpa_rev_to_use == 2 ? MPA_ENHANCED_RDMA_CONN : 0); + + mpa->flags = 0; + if (crc_enabled) + mpa->flags |= MPA_CRC; + if (markers_enabled) { + mpa->flags |= MPA_MARKERS; + ep->mpa_attr.recv_marker_enabled = 1; + } else { + ep->mpa_attr.recv_marker_enabled = 0; + } + if (mpa_rev_to_use == 2) + mpa->flags |= MPA_ENHANCED_RDMA_CONN; + mpa->private_data_size = htons(ep->plen); mpa->revision = mpa_rev_to_use; if (mpa_rev_to_use == 1) { @@ -1169,8 +1198,11 @@ static int send_mpa_reply(struct c4iw_ep *ep, const void *pdata, u8 plen) mpa = (struct mpa_message *)(req + 1); memset(mpa, 0, sizeof(*mpa)); memcpy(mpa->key, MPA_KEY_REP, sizeof(mpa->key)); - mpa->flags = (ep->mpa_attr.crc_enabled ? MPA_CRC : 0) | - (markers_enabled ? MPA_MARKERS : 0); + mpa->flags = 0; + if (ep->mpa_attr.crc_enabled) + mpa->flags |= MPA_CRC; + if (ep->mpa_attr.recv_marker_enabled) + mpa->flags |= MPA_MARKERS; mpa->revision = ep->mpa_attr.version; mpa->private_data_size = htons(plen); @@ -1248,7 +1280,7 @@ static int act_establish(struct c4iw_dev *dev, struct sk_buff *skb) set_bit(ACT_ESTAB, &ep->com.history); /* start MPA negotiation */ - ret = send_flowc(ep, NULL); + ret = send_flowc(ep); if (ret) goto err; if (ep->retry_with_mpa_v1) @@ -1555,7 +1587,6 @@ static int process_mpa_reply(struct c4iw_ep *ep, struct sk_buff *skb) */ __state_set(&ep->com, FPDU_MODE); ep->mpa_attr.crc_enabled = (mpa->flags & MPA_CRC) | crc_enabled ? 1 : 0; - ep->mpa_attr.recv_marker_enabled = markers_enabled; ep->mpa_attr.xmit_marker_enabled = mpa->flags & MPA_MARKERS ? 1 : 0; ep->mpa_attr.version = mpa->revision; ep->mpa_attr.p2p_type = FW_RI_INIT_P2PTYPE_DISABLED; @@ -2004,12 +2035,17 @@ static int send_fw_act_open_req(struct c4iw_ep *ep, unsigned int atid) } /* - * Return whether a failed active open has allocated a TID + * Some of the error codes above implicitly indicate that there is no TID + * allocated with the result of an ACT_OPEN. We use this predicate to make + * that explicit. */ static inline int act_open_has_tid(int status) { - return status != CPL_ERR_TCAM_FULL && status != CPL_ERR_CONN_EXIST && - status != CPL_ERR_ARP_MISS; + return (status != CPL_ERR_TCAM_PARITY && + status != CPL_ERR_TCAM_MISS && + status != CPL_ERR_TCAM_FULL && + status != CPL_ERR_CONN_EXIST_SYNRECV && + status != CPL_ERR_CONN_EXIST); } /* Returns whether a CPL status conveys negative advice. @@ -2130,6 +2166,7 @@ out: static int c4iw_reconnect(struct c4iw_ep *ep) { int err = 0; + int size = 0; struct sockaddr_in *laddr = (struct sockaddr_in *) &ep->com.cm_id->m_local_addr; struct sockaddr_in *raddr = (struct sockaddr_in *) @@ -2145,6 +2182,21 @@ static int c4iw_reconnect(struct c4iw_ep *ep) init_timer(&ep->timer); c4iw_init_wr_wait(&ep->com.wr_wait); + /* When MPA revision is different on nodes, the node with MPA_rev=2 + * tries to reconnect with MPA_rev 1 for the same EP through + * c4iw_reconnect(), where the same EP is assigned with new tid for + * further connection establishment. As we are using the same EP pointer + * for reconnect, few skbs are used during the previous c4iw_connect(), + * which leaves the EP with inadequate skbs for further + * c4iw_reconnect(), Further causing an assert BUG_ON() due to empty + * skb_list() during peer_abort(). Allocate skbs which is already used. + */ + size = (CN_MAX_CON_BUF - skb_queue_len(&ep->com.ep_skb_list)); + if (alloc_ep_skb_list(&ep->com.ep_skb_list, size)) { + err = -ENOMEM; + goto fail1; + } + /* * Allocate an active TID to initiate a TCP connection. */ @@ -2210,6 +2262,7 @@ fail2: * response of 1st connect request. */ connect_reply_upcall(ep, -ECONNRESET); +fail1: c4iw_put_ep(&ep->com); out: return err; @@ -2576,6 +2629,10 @@ static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb) if (peer_mss && child_ep->mtu > (peer_mss + hdrs)) child_ep->mtu = peer_mss + hdrs; + skb_queue_head_init(&child_ep->com.ep_skb_list); + if (alloc_ep_skb_list(&child_ep->com.ep_skb_list, CN_MAX_CON_BUF)) + goto fail; + state_set(&child_ep->com, CONNECTING); child_ep->com.dev = dev; child_ep->com.cm_id = NULL; @@ -2640,6 +2697,8 @@ static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb) (const u32 *)&sin6->sin6_addr.s6_addr, 1); } goto out; +fail: + c4iw_put_ep(&child_ep->com); reject: reject_cr(dev, hwtid, skb); if (parent_ep) @@ -2670,7 +2729,7 @@ static int pass_establish(struct c4iw_dev *dev, struct sk_buff *skb) ep->com.state = MPA_REQ_WAIT; start_ep_timer(ep); set_bit(PASS_ESTAB, &ep->com.history); - ret = send_flowc(ep, skb); + ret = send_flowc(ep); mutex_unlock(&ep->com.mutex); if (ret) c4iw_ep_disconnect(ep, 1, GFP_KERNEL); @@ -2871,10 +2930,8 @@ static int peer_abort(struct c4iw_dev *dev, struct sk_buff *skb) } mutex_unlock(&ep->com.mutex); - rpl_skb = get_skb(skb, sizeof(*rpl), GFP_KERNEL); - if (!rpl_skb) { - printk(KERN_ERR MOD "%s - cannot allocate skb!\n", - __func__); + rpl_skb = skb_dequeue(&ep->com.ep_skb_list); + if (WARN_ON(!rpl_skb)) { release = 1; goto out; } @@ -3011,9 +3068,9 @@ static int fw4_ack(struct c4iw_dev *dev, struct sk_buff *skb) PDBG("%s last streaming msg ack ep %p tid %u state %u " "initiator %u freeing skb\n", __func__, ep, ep->hwtid, state_read(&ep->com), ep->mpa_attr.initiator ? 1 : 0); + mutex_lock(&ep->com.mutex); kfree_skb(ep->mpa_skb); ep->mpa_skb = NULL; - mutex_lock(&ep->com.mutex); if (test_bit(STOP_MPA_TIMER, &ep->com.flags)) stop_ep_timer(ep); mutex_unlock(&ep->com.mutex); @@ -3025,9 +3082,9 @@ out: int c4iw_reject_cr(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len) { - int err = 0; - int disconnect = 0; + int abort; struct c4iw_ep *ep = to_ep(cm_id); + PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid); mutex_lock(&ep->com.mutex); @@ -3038,16 +3095,13 @@ int c4iw_reject_cr(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len) } set_bit(ULP_REJECT, &ep->com.history); if (mpa_rev == 0) - disconnect = 2; - else { - err = send_mpa_reject(ep, pdata, pdata_len); - disconnect = 1; - } + abort = 1; + else + abort = send_mpa_reject(ep, pdata, pdata_len); mutex_unlock(&ep->com.mutex); - if (disconnect) { - stop_ep_timer(ep); - err = c4iw_ep_disconnect(ep, disconnect == 2, GFP_KERNEL); - } + + stop_ep_timer(ep); + c4iw_ep_disconnect(ep, abort != 0, GFP_KERNEL); c4iw_put_ep(&ep->com); return 0; } @@ -3248,6 +3302,13 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) err = -ENOMEM; goto out; } + + skb_queue_head_init(&ep->com.ep_skb_list); + if (alloc_ep_skb_list(&ep->com.ep_skb_list, CN_MAX_CON_BUF)) { + err = -ENOMEM; + goto fail1; + } + init_timer(&ep->timer); ep->plen = conn_param->private_data_len; if (ep->plen) @@ -3266,7 +3327,7 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) if (!ep->com.qp) { PDBG("%s qpn 0x%x not found!\n", __func__, conn_param->qpn); err = -EINVAL; - goto fail1; + goto fail2; } ref_qp(ep); PDBG("%s qpn 0x%x qp %p cm_id %p\n", __func__, conn_param->qpn, @@ -3279,7 +3340,7 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) if (ep->atid == -1) { printk(KERN_ERR MOD "%s - cannot alloc atid.\n", __func__); err = -ENOMEM; - goto fail1; + goto fail2; } insert_handle(dev, &dev->atid_idr, ep, ep->atid); @@ -3303,7 +3364,7 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) if (raddr->sin_addr.s_addr == htonl(INADDR_ANY)) { err = pick_local_ipaddrs(dev, cm_id); if (err) - goto fail1; + goto fail2; } /* find a route */ @@ -3323,7 +3384,7 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) if (ipv6_addr_type(&raddr6->sin6_addr) == IPV6_ADDR_ANY) { err = pick_local_ip6addrs(dev, cm_id); if (err) - goto fail1; + goto fail2; } /* find a route */ @@ -3339,14 +3400,14 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) if (!ep->dst) { printk(KERN_ERR MOD "%s - cannot find route.\n", __func__); err = -EHOSTUNREACH; - goto fail2; + goto fail3; } err = import_ep(ep, iptype, ra, ep->dst, ep->com.dev, true, ep->com.dev->rdev.lldi.adapter_type, cm_id->tos); if (err) { printk(KERN_ERR MOD "%s - cannot alloc l2e.\n", __func__); - goto fail3; + goto fail4; } PDBG("%s txq_idx %u tx_chan %u smac_idx %u rss_qid %u l2t_idx %u\n", @@ -3362,13 +3423,15 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) goto out; cxgb4_l2t_release(ep->l2t); -fail3: +fail4: dst_release(ep->dst); -fail2: +fail3: remove_handle(ep->com.dev, &ep->com.dev->atid_idr, ep->atid); cxgb4_free_atid(ep->com.dev->rdev.lldi.tids, ep->atid); -fail1: +fail2: + skb_queue_purge(&ep->com.ep_skb_list); deref_cm_id(&ep->com); +fail1: c4iw_put_ep(&ep->com); out: return err; @@ -3461,6 +3524,7 @@ int c4iw_create_listen(struct iw_cm_id *cm_id, int backlog) err = -ENOMEM; goto fail1; } + skb_queue_head_init(&ep->com.ep_skb_list); PDBG("%s ep %p\n", __func__, ep); ep->com.cm_id = cm_id; ref_cm_id(&ep->com); @@ -3577,11 +3641,22 @@ int c4iw_ep_disconnect(struct c4iw_ep *ep, int abrupt, gfp_t gfp) case MPA_REQ_RCVD: case MPA_REP_SENT: case FPDU_MODE: + case CONNECTING: close = 1; if (abrupt) ep->com.state = ABORTING; else { ep->com.state = CLOSING; + + /* + * if we close before we see the fw4_ack() then we fix + * up the timer state since we're reusing it. + */ + if (ep->mpa_skb && + test_bit(STOP_MPA_TIMER, &ep->com.flags)) { + clear_bit(STOP_MPA_TIMER, &ep->com.flags); + stop_ep_timer(ep); + } start_ep_timer(ep); } set_bit(CLOSE_SENT, &ep->com.flags); @@ -3611,10 +3686,10 @@ int c4iw_ep_disconnect(struct c4iw_ep *ep, int abrupt, gfp_t gfp) if (abrupt) { set_bit(EP_DISC_ABORT, &ep->com.history); close_complete_upcall(ep, -ECONNRESET); - ret = send_abort(ep, NULL, gfp); + ret = send_abort(ep); } else { set_bit(EP_DISC_CLOSE, &ep->com.history); - ret = send_halfclose(ep, gfp); + ret = send_halfclose(ep); } if (ret) { set_bit(EP_DISC_FAIL, &ep->com.history); diff --git a/drivers/infiniband/hw/cxgb4/cq.c b/drivers/infiniband/hw/cxgb4/cq.c index b0b955724458..812ab7278b8e 100644 --- a/drivers/infiniband/hw/cxgb4/cq.c +++ b/drivers/infiniband/hw/cxgb4/cq.c @@ -33,19 +33,15 @@ #include "iw_cxgb4.h" static int destroy_cq(struct c4iw_rdev *rdev, struct t4_cq *cq, - struct c4iw_dev_ucontext *uctx) + struct c4iw_dev_ucontext *uctx, struct sk_buff *skb) { struct fw_ri_res_wr *res_wr; struct fw_ri_res *res; int wr_len; struct c4iw_wr_wait wr_wait; - struct sk_buff *skb; int ret; wr_len = sizeof *res_wr + sizeof *res; - skb = alloc_skb(wr_len, GFP_KERNEL); - if (!skb) - return -ENOMEM; set_wr_txq(skb, CPL_PRIORITY_CONTROL, 0); res_wr = (struct fw_ri_res_wr *)__skb_put(skb, wr_len); @@ -863,7 +859,9 @@ int c4iw_destroy_cq(struct ib_cq *ib_cq) ucontext = ib_cq->uobject ? to_c4iw_ucontext(ib_cq->uobject->context) : NULL; destroy_cq(&chp->rhp->rdev, &chp->cq, - ucontext ? &ucontext->uctx : &chp->cq.rdev->uctx); + ucontext ? &ucontext->uctx : &chp->cq.rdev->uctx, + chp->destroy_skb); + chp->destroy_skb = NULL; kfree(chp); return 0; } @@ -879,7 +877,7 @@ struct ib_cq *c4iw_create_cq(struct ib_device *ibdev, struct c4iw_cq *chp; struct c4iw_create_cq_resp uresp; struct c4iw_ucontext *ucontext = NULL; - int ret; + int ret, wr_len; size_t memsize, hwentries; struct c4iw_mm_entry *mm, *mm2; @@ -896,6 +894,13 @@ struct ib_cq *c4iw_create_cq(struct ib_device *ibdev, if (!chp) return ERR_PTR(-ENOMEM); + wr_len = sizeof(struct fw_ri_res_wr) + sizeof(struct fw_ri_res); + chp->destroy_skb = alloc_skb(wr_len, GFP_KERNEL); + if (!chp->destroy_skb) { + ret = -ENOMEM; + goto err1; + } + if (ib_context) ucontext = to_c4iw_ucontext(ib_context); @@ -936,7 +941,7 @@ struct ib_cq *c4iw_create_cq(struct ib_device *ibdev, ret = create_cq(&rhp->rdev, &chp->cq, ucontext ? &ucontext->uctx : &rhp->rdev.uctx); if (ret) - goto err1; + goto err2; chp->rhp = rhp; chp->cq.size--; /* status page */ @@ -947,15 +952,15 @@ struct ib_cq *c4iw_create_cq(struct ib_device *ibdev, init_waitqueue_head(&chp->wait); ret = insert_handle(rhp, &rhp->cqidr, chp, chp->cq.cqid); if (ret) - goto err2; + goto err3; if (ucontext) { mm = kmalloc(sizeof *mm, GFP_KERNEL); if (!mm) - goto err3; + goto err4; mm2 = kmalloc(sizeof *mm2, GFP_KERNEL); if (!mm2) - goto err4; + goto err5; uresp.qid_mask = rhp->rdev.cqmask; uresp.cqid = chp->cq.cqid; @@ -970,7 +975,7 @@ struct ib_cq *c4iw_create_cq(struct ib_device *ibdev, ret = ib_copy_to_udata(udata, &uresp, sizeof(uresp) - sizeof(uresp.reserved)); if (ret) - goto err5; + goto err6; mm->key = uresp.key; mm->addr = virt_to_phys(chp->cq.queue); @@ -986,15 +991,18 @@ struct ib_cq *c4iw_create_cq(struct ib_device *ibdev, __func__, chp->cq.cqid, chp, chp->cq.size, chp->cq.memsize, (unsigned long long) chp->cq.dma_addr); return &chp->ibcq; -err5: +err6: kfree(mm2); -err4: +err5: kfree(mm); -err3: +err4: remove_handle(rhp, &rhp->cqidr, chp->cq.cqid); -err2: +err3: destroy_cq(&chp->rhp->rdev, &chp->cq, - ucontext ? &ucontext->uctx : &rhp->rdev.uctx); + ucontext ? &ucontext->uctx : &rhp->rdev.uctx, + chp->destroy_skb); +err2: + kfree_skb(chp->destroy_skb); err1: kfree(chp); return ERR_PTR(ret); diff --git a/drivers/infiniband/hw/cxgb4/device.c b/drivers/infiniband/hw/cxgb4/device.c index ae2e8b23d2dd..071d7332ec06 100644 --- a/drivers/infiniband/hw/cxgb4/device.c +++ b/drivers/infiniband/hw/cxgb4/device.c @@ -317,7 +317,7 @@ static int qp_open(struct inode *inode, struct file *file) idr_for_each(&qpd->devp->qpidr, count_idrs, &count); spin_unlock_irq(&qpd->devp->lock); - qpd->bufsize = count * 128; + qpd->bufsize = count * 180; qpd->buf = vmalloc(qpd->bufsize); if (!qpd->buf) { kfree(qpd); diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h index f6f34a75af27..aa47e0ae80bc 100644 --- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h +++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h @@ -384,6 +384,7 @@ struct c4iw_mr { struct ib_mr ibmr; struct ib_umem *umem; struct c4iw_dev *rhp; + struct sk_buff *dereg_skb; u64 kva; struct tpt_attributes attr; u64 *mpl; @@ -400,6 +401,7 @@ static inline struct c4iw_mr *to_c4iw_mr(struct ib_mr *ibmr) struct c4iw_mw { struct ib_mw ibmw; struct c4iw_dev *rhp; + struct sk_buff *dereg_skb; u64 kva; struct tpt_attributes attr; }; @@ -412,6 +414,7 @@ static inline struct c4iw_mw *to_c4iw_mw(struct ib_mw *ibmw) struct c4iw_cq { struct ib_cq ibcq; struct c4iw_dev *rhp; + struct sk_buff *destroy_skb; struct t4_cq cq; spinlock_t lock; spinlock_t comp_handler_lock; @@ -472,7 +475,7 @@ struct c4iw_qp { struct t4_wq wq; spinlock_t lock; struct mutex mutex; - atomic_t refcnt; + struct kref kref; wait_queue_head_t wait; struct timer_list timer; int sq_sig_all; @@ -789,10 +792,29 @@ enum c4iw_ep_history { CM_ID_DEREFED = 28, }; +enum conn_pre_alloc_buffers { + CN_ABORT_REQ_BUF, + CN_ABORT_RPL_BUF, + CN_CLOSE_CON_REQ_BUF, + CN_DESTROY_BUF, + CN_FLOWC_BUF, + CN_MAX_CON_BUF +}; + +#define FLOWC_LEN 80 +union cpl_wr_size { + struct cpl_abort_req abrt_req; + struct cpl_abort_rpl abrt_rpl; + struct fw_ri_wr ri_req; + struct cpl_close_con_req close_req; + char flowc_buf[FLOWC_LEN]; +}; + struct c4iw_ep_common { struct iw_cm_id *cm_id; struct c4iw_qp *qp; struct c4iw_dev *dev; + struct sk_buff_head ep_skb_list; enum c4iw_ep_state state; struct kref kref; struct mutex mutex; diff --git a/drivers/infiniband/hw/cxgb4/mem.c b/drivers/infiniband/hw/cxgb4/mem.c index 55d0651ee4de..0b91b0f4df71 100644 --- a/drivers/infiniband/hw/cxgb4/mem.c +++ b/drivers/infiniband/hw/cxgb4/mem.c @@ -59,9 +59,9 @@ static int mr_exceeds_hw_limits(struct c4iw_dev *dev, u64 length) } static int _c4iw_write_mem_dma_aligned(struct c4iw_rdev *rdev, u32 addr, - u32 len, dma_addr_t data, int wait) + u32 len, dma_addr_t data, + int wait, struct sk_buff *skb) { - struct sk_buff *skb; struct ulp_mem_io *req; struct ulptx_sgl *sgl; u8 wr_len; @@ -74,9 +74,11 @@ static int _c4iw_write_mem_dma_aligned(struct c4iw_rdev *rdev, u32 addr, c4iw_init_wr_wait(&wr_wait); wr_len = roundup(sizeof(*req) + sizeof(*sgl), 16); - skb = alloc_skb(wr_len, GFP_KERNEL); - if (!skb) - return -ENOMEM; + if (!skb) { + skb = alloc_skb(wr_len, GFP_KERNEL | __GFP_NOFAIL); + if (!skb) + return -ENOMEM; + } set_wr_txq(skb, CPL_PRIORITY_CONTROL, 0); req = (struct ulp_mem_io *)__skb_put(skb, wr_len); @@ -108,9 +110,8 @@ static int _c4iw_write_mem_dma_aligned(struct c4iw_rdev *rdev, u32 addr, } static int _c4iw_write_mem_inline(struct c4iw_rdev *rdev, u32 addr, u32 len, - void *data) + void *data, struct sk_buff *skb) { - struct sk_buff *skb; struct ulp_mem_io *req; struct ulptx_idata *sc; u8 wr_len, *to_dp, *from_dp; @@ -134,9 +135,11 @@ static int _c4iw_write_mem_inline(struct c4iw_rdev *rdev, u32 addr, u32 len, wr_len = roundup(sizeof *req + sizeof *sc + roundup(copy_len, T4_ULPTX_MIN_IO), 16); - skb = alloc_skb(wr_len, GFP_KERNEL); - if (!skb) - return -ENOMEM; + if (!skb) { + skb = alloc_skb(wr_len, GFP_KERNEL | __GFP_NOFAIL); + if (!skb) + return -ENOMEM; + } set_wr_txq(skb, CPL_PRIORITY_CONTROL, 0); req = (struct ulp_mem_io *)__skb_put(skb, wr_len); @@ -173,6 +176,7 @@ static int _c4iw_write_mem_inline(struct c4iw_rdev *rdev, u32 addr, u32 len, memset(to_dp + copy_len, 0, T4_ULPTX_MIN_IO - (copy_len % T4_ULPTX_MIN_IO)); ret = c4iw_ofld_send(rdev, skb); + skb = NULL; if (ret) return ret; len -= C4IW_MAX_INLINE_SIZE; @@ -182,7 +186,8 @@ static int _c4iw_write_mem_inline(struct c4iw_rdev *rdev, u32 addr, u32 len, return ret; } -static int _c4iw_write_mem_dma(struct c4iw_rdev *rdev, u32 addr, u32 len, void *data) +static int _c4iw_write_mem_dma(struct c4iw_rdev *rdev, u32 addr, u32 len, + void *data, struct sk_buff *skb) { u32 remain = len; u32 dmalen; @@ -205,7 +210,7 @@ static int _c4iw_write_mem_dma(struct c4iw_rdev *rdev, u32 addr, u32 len, void * dmalen = T4_ULPTX_MAX_DMA; remain -= dmalen; ret = _c4iw_write_mem_dma_aligned(rdev, addr, dmalen, daddr, - !remain); + !remain, skb); if (ret) goto out; addr += dmalen >> 5; @@ -213,7 +218,7 @@ static int _c4iw_write_mem_dma(struct c4iw_rdev *rdev, u32 addr, u32 len, void * daddr += dmalen; } if (remain) - ret = _c4iw_write_mem_inline(rdev, addr, remain, data); + ret = _c4iw_write_mem_inline(rdev, addr, remain, data, skb); out: dma_unmap_single(&rdev->lldi.pdev->dev, save, len, DMA_TO_DEVICE); return ret; @@ -224,23 +229,25 @@ out: * If data is NULL, clear len byte of memory to zero. */ static int write_adapter_mem(struct c4iw_rdev *rdev, u32 addr, u32 len, - void *data) + void *data, struct sk_buff *skb) { if (is_t5(rdev->lldi.adapter_type) && use_dsgl) { if (len > inline_threshold) { - if (_c4iw_write_mem_dma(rdev, addr, len, data)) { + if (_c4iw_write_mem_dma(rdev, addr, len, data, skb)) { printk_ratelimited(KERN_WARNING "%s: dma map" " failure (non fatal)\n", pci_name(rdev->lldi.pdev)); return _c4iw_write_mem_inline(rdev, addr, len, - data); - } else + data, skb); + } else { return 0; + } } else - return _c4iw_write_mem_inline(rdev, addr, len, data); + return _c4iw_write_mem_inline(rdev, addr, + len, data, skb); } else - return _c4iw_write_mem_inline(rdev, addr, len, data); + return _c4iw_write_mem_inline(rdev, addr, len, data, skb); } /* @@ -253,7 +260,8 @@ static int write_tpt_entry(struct c4iw_rdev *rdev, u32 reset_tpt_entry, u32 *stag, u8 stag_state, u32 pdid, enum fw_ri_stag_type type, enum fw_ri_mem_perms perm, int bind_enabled, u32 zbva, u64 to, - u64 len, u8 page_size, u32 pbl_size, u32 pbl_addr) + u64 len, u8 page_size, u32 pbl_size, u32 pbl_addr, + struct sk_buff *skb) { int err; struct fw_ri_tpte tpt; @@ -307,7 +315,7 @@ static int write_tpt_entry(struct c4iw_rdev *rdev, u32 reset_tpt_entry, } err = write_adapter_mem(rdev, stag_idx + (rdev->lldi.vr->stag.start >> 5), - sizeof(tpt), &tpt); + sizeof(tpt), &tpt, skb); if (reset_tpt_entry) { c4iw_put_resource(&rdev->resource.tpt_table, stag_idx); @@ -327,28 +335,29 @@ static int write_pbl(struct c4iw_rdev *rdev, __be64 *pbl, __func__, pbl_addr, rdev->lldi.vr->pbl.start, pbl_size); - err = write_adapter_mem(rdev, pbl_addr >> 5, pbl_size << 3, pbl); + err = write_adapter_mem(rdev, pbl_addr >> 5, pbl_size << 3, pbl, NULL); return err; } static int dereg_mem(struct c4iw_rdev *rdev, u32 stag, u32 pbl_size, - u32 pbl_addr) + u32 pbl_addr, struct sk_buff *skb) { return write_tpt_entry(rdev, 1, &stag, 0, 0, 0, 0, 0, 0, 0UL, 0, 0, - pbl_size, pbl_addr); + pbl_size, pbl_addr, skb); } static int allocate_window(struct c4iw_rdev *rdev, u32 * stag, u32 pdid) { *stag = T4_STAG_UNSET; return write_tpt_entry(rdev, 0, stag, 0, pdid, FW_RI_STAG_MW, 0, 0, 0, - 0UL, 0, 0, 0, 0); + 0UL, 0, 0, 0, 0, NULL); } -static int deallocate_window(struct c4iw_rdev *rdev, u32 stag) +static int deallocate_window(struct c4iw_rdev *rdev, u32 stag, + struct sk_buff *skb) { return write_tpt_entry(rdev, 1, &stag, 0, 0, 0, 0, 0, 0, 0UL, 0, 0, 0, - 0); + 0, skb); } static int allocate_stag(struct c4iw_rdev *rdev, u32 *stag, u32 pdid, @@ -356,7 +365,7 @@ static int allocate_stag(struct c4iw_rdev *rdev, u32 *stag, u32 pdid, { *stag = T4_STAG_UNSET; return write_tpt_entry(rdev, 0, stag, 0, pdid, FW_RI_STAG_NSMR, 0, 0, 0, - 0UL, 0, 0, pbl_size, pbl_addr); + 0UL, 0, 0, pbl_size, pbl_addr, NULL); } static int finish_mem_reg(struct c4iw_mr *mhp, u32 stag) @@ -383,14 +392,16 @@ static int register_mem(struct c4iw_dev *rhp, struct c4iw_pd *php, mhp->attr.mw_bind_enable, mhp->attr.zbva, mhp->attr.va_fbo, mhp->attr.len ? mhp->attr.len : -1, shift - 12, - mhp->attr.pbl_size, mhp->attr.pbl_addr); + mhp->attr.pbl_size, mhp->attr.pbl_addr, NULL); if (ret) return ret; ret = finish_mem_reg(mhp, stag); - if (ret) + if (ret) { dereg_mem(&rhp->rdev, mhp->attr.stag, mhp->attr.pbl_size, - mhp->attr.pbl_addr); + mhp->attr.pbl_addr, mhp->dereg_skb); + mhp->dereg_skb = NULL; + } return ret; } @@ -423,6 +434,12 @@ struct ib_mr *c4iw_get_dma_mr(struct ib_pd *pd, int acc) if (!mhp) return ERR_PTR(-ENOMEM); + mhp->dereg_skb = alloc_skb(SGE_MAX_WR_LEN, GFP_KERNEL); + if (!mhp->dereg_skb) { + ret = -ENOMEM; + goto err0; + } + mhp->rhp = rhp; mhp->attr.pdid = php->pdid; mhp->attr.perms = c4iw_ib_to_tpt_access(acc); @@ -435,7 +452,8 @@ struct ib_mr *c4iw_get_dma_mr(struct ib_pd *pd, int acc) ret = write_tpt_entry(&rhp->rdev, 0, &stag, 1, php->pdid, FW_RI_STAG_NSMR, mhp->attr.perms, - mhp->attr.mw_bind_enable, 0, 0, ~0ULL, 0, 0, 0); + mhp->attr.mw_bind_enable, 0, 0, ~0ULL, 0, 0, 0, + NULL); if (ret) goto err1; @@ -445,8 +463,10 @@ struct ib_mr *c4iw_get_dma_mr(struct ib_pd *pd, int acc) return &mhp->ibmr; err2: dereg_mem(&rhp->rdev, mhp->attr.stag, mhp->attr.pbl_size, - mhp->attr.pbl_addr); + mhp->attr.pbl_addr, mhp->dereg_skb); err1: + kfree_skb(mhp->dereg_skb); +err0: kfree(mhp); return ERR_PTR(ret); } @@ -481,11 +501,18 @@ struct ib_mr *c4iw_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, if (!mhp) return ERR_PTR(-ENOMEM); + mhp->dereg_skb = alloc_skb(SGE_MAX_WR_LEN, GFP_KERNEL); + if (!mhp->dereg_skb) { + kfree(mhp); + return ERR_PTR(-ENOMEM); + } + mhp->rhp = rhp; mhp->umem = ib_umem_get(pd->uobject->context, start, length, acc, 0); if (IS_ERR(mhp->umem)) { err = PTR_ERR(mhp->umem); + kfree_skb(mhp->dereg_skb); kfree(mhp); return ERR_PTR(err); } @@ -550,6 +577,7 @@ err_pbl: err: ib_umem_release(mhp->umem); + kfree_skb(mhp->dereg_skb); kfree(mhp); return ERR_PTR(err); } @@ -572,11 +600,16 @@ struct ib_mw *c4iw_alloc_mw(struct ib_pd *pd, enum ib_mw_type type, mhp = kzalloc(sizeof(*mhp), GFP_KERNEL); if (!mhp) return ERR_PTR(-ENOMEM); - ret = allocate_window(&rhp->rdev, &stag, php->pdid); - if (ret) { - kfree(mhp); - return ERR_PTR(ret); + + mhp->dereg_skb = alloc_skb(SGE_MAX_WR_LEN, GFP_KERNEL); + if (!mhp->dereg_skb) { + ret = -ENOMEM; + goto free_mhp; } + + ret = allocate_window(&rhp->rdev, &stag, php->pdid); + if (ret) + goto free_skb; mhp->rhp = rhp; mhp->attr.pdid = php->pdid; mhp->attr.type = FW_RI_STAG_MW; @@ -584,12 +617,19 @@ struct ib_mw *c4iw_alloc_mw(struct ib_pd *pd, enum ib_mw_type type, mmid = (stag) >> 8; mhp->ibmw.rkey = stag; if (insert_handle(rhp, &rhp->mmidr, mhp, mmid)) { - deallocate_window(&rhp->rdev, mhp->attr.stag); - kfree(mhp); - return ERR_PTR(-ENOMEM); + ret = -ENOMEM; + goto dealloc_win; } PDBG("%s mmid 0x%x mhp %p stag 0x%x\n", __func__, mmid, mhp, stag); return &(mhp->ibmw); + +dealloc_win: + deallocate_window(&rhp->rdev, mhp->attr.stag, mhp->dereg_skb); +free_skb: + kfree_skb(mhp->dereg_skb); +free_mhp: + kfree(mhp); + return ERR_PTR(ret); } int c4iw_dealloc_mw(struct ib_mw *mw) @@ -602,7 +642,8 @@ int c4iw_dealloc_mw(struct ib_mw *mw) rhp = mhp->rhp; mmid = (mw->rkey) >> 8; remove_handle(rhp, &rhp->mmidr, mmid); - deallocate_window(&rhp->rdev, mhp->attr.stag); + deallocate_window(&rhp->rdev, mhp->attr.stag, mhp->dereg_skb); + kfree_skb(mhp->dereg_skb); kfree(mhp); PDBG("%s ib_mw %p mmid 0x%x ptr %p\n", __func__, mw, mmid, mhp); return 0; @@ -666,7 +707,7 @@ struct ib_mr *c4iw_alloc_mr(struct ib_pd *pd, return &(mhp->ibmr); err3: dereg_mem(&rhp->rdev, stag, mhp->attr.pbl_size, - mhp->attr.pbl_addr); + mhp->attr.pbl_addr, mhp->dereg_skb); err2: c4iw_pblpool_free(&mhp->rhp->rdev, mhp->attr.pbl_addr, mhp->attr.pbl_size << 3); @@ -717,7 +758,7 @@ int c4iw_dereg_mr(struct ib_mr *ib_mr) dma_free_coherent(&mhp->rhp->rdev.lldi.pdev->dev, mhp->max_mpl_len, mhp->mpl, mhp->mpl_addr); dereg_mem(&rhp->rdev, mhp->attr.stag, mhp->attr.pbl_size, - mhp->attr.pbl_addr); + mhp->attr.pbl_addr, mhp->dereg_skb); if (mhp->attr.pbl_size) c4iw_pblpool_free(&mhp->rhp->rdev, mhp->attr.pbl_addr, mhp->attr.pbl_size << 3); diff --git a/drivers/infiniband/hw/cxgb4/provider.c b/drivers/infiniband/hw/cxgb4/provider.c index dd8a86b726d2..df127ce6b6ec 100644 --- a/drivers/infiniband/hw/cxgb4/provider.c +++ b/drivers/infiniband/hw/cxgb4/provider.c @@ -409,20 +409,6 @@ static ssize_t show_rev(struct device *dev, struct device_attribute *attr, CHELSIO_CHIP_RELEASE(c4iw_dev->rdev.lldi.adapter_type)); } -static ssize_t show_fw_ver(struct device *dev, struct device_attribute *attr, - char *buf) -{ - struct c4iw_dev *c4iw_dev = container_of(dev, struct c4iw_dev, - ibdev.dev); - PDBG("%s dev 0x%p\n", __func__, dev); - - return sprintf(buf, "%u.%u.%u.%u\n", - FW_HDR_FW_VER_MAJOR_G(c4iw_dev->rdev.lldi.fw_vers), - FW_HDR_FW_VER_MINOR_G(c4iw_dev->rdev.lldi.fw_vers), - FW_HDR_FW_VER_MICRO_G(c4iw_dev->rdev.lldi.fw_vers), - FW_HDR_FW_VER_BUILD_G(c4iw_dev->rdev.lldi.fw_vers)); -} - static ssize_t show_hca(struct device *dev, struct device_attribute *attr, char *buf) { @@ -502,13 +488,11 @@ static int c4iw_get_mib(struct ib_device *ibdev, } static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL); -static DEVICE_ATTR(fw_ver, S_IRUGO, show_fw_ver, NULL); static DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL); static DEVICE_ATTR(board_id, S_IRUGO, show_board, NULL); static struct device_attribute *c4iw_class_attributes[] = { &dev_attr_hw_rev, - &dev_attr_fw_ver, &dev_attr_hca_type, &dev_attr_board_id, }; @@ -530,6 +514,20 @@ static int c4iw_port_immutable(struct ib_device *ibdev, u8 port_num, return 0; } +static void get_dev_fw_str(struct ib_device *dev, char *str, + size_t str_len) +{ + struct c4iw_dev *c4iw_dev = container_of(dev, struct c4iw_dev, + ibdev); + PDBG("%s dev 0x%p\n", __func__, dev); + + snprintf(str, str_len, "%u.%u.%u.%u", + FW_HDR_FW_VER_MAJOR_G(c4iw_dev->rdev.lldi.fw_vers), + FW_HDR_FW_VER_MINOR_G(c4iw_dev->rdev.lldi.fw_vers), + FW_HDR_FW_VER_MICRO_G(c4iw_dev->rdev.lldi.fw_vers), + FW_HDR_FW_VER_BUILD_G(c4iw_dev->rdev.lldi.fw_vers)); +} + int c4iw_register_device(struct c4iw_dev *dev) { int ret; @@ -605,6 +603,7 @@ int c4iw_register_device(struct c4iw_dev *dev) dev->ibdev.get_hw_stats = c4iw_get_mib; dev->ibdev.uverbs_abi_ver = C4IW_UVERBS_ABI_VERSION; dev->ibdev.get_port_immutable = c4iw_port_immutable; + dev->ibdev.get_dev_fw_str = get_dev_fw_str; dev->ibdev.drain_sq = c4iw_drain_sq; dev->ibdev.drain_rq = c4iw_drain_rq; diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c index e8993e49b8b3..edb1172b6f54 100644 --- a/drivers/infiniband/hw/cxgb4/qp.c +++ b/drivers/infiniband/hw/cxgb4/qp.c @@ -683,17 +683,25 @@ static int build_inv_stag(union t4_wr *wqe, struct ib_send_wr *wr, return 0; } +void _free_qp(struct kref *kref) +{ + struct c4iw_qp *qhp; + + qhp = container_of(kref, struct c4iw_qp, kref); + PDBG("%s qhp %p\n", __func__, qhp); + kfree(qhp); +} + void c4iw_qp_add_ref(struct ib_qp *qp) { PDBG("%s ib_qp %p\n", __func__, qp); - atomic_inc(&(to_c4iw_qp(qp)->refcnt)); + kref_get(&to_c4iw_qp(qp)->kref); } void c4iw_qp_rem_ref(struct ib_qp *qp) { PDBG("%s ib_qp %p\n", __func__, qp); - if (atomic_dec_and_test(&(to_c4iw_qp(qp)->refcnt))) - wake_up(&(to_c4iw_qp(qp)->wait)); + kref_put(&to_c4iw_qp(qp)->kref, _free_qp); } static void add_to_fc_list(struct list_head *head, struct list_head *entry) @@ -1081,9 +1089,10 @@ static void post_terminate(struct c4iw_qp *qhp, struct t4_cqe *err_cqe, PDBG("%s qhp %p qid 0x%x tid %u\n", __func__, qhp, qhp->wq.sq.qid, qhp->ep->hwtid); - skb = alloc_skb(sizeof *wqe, gfp); - if (!skb) + skb = skb_dequeue(&qhp->ep->com.ep_skb_list); + if (WARN_ON(!skb)) return; + set_wr_txq(skb, CPL_PRIORITY_DATA, qhp->ep->txq_idx); wqe = (struct fw_ri_wr *)__skb_put(skb, sizeof(*wqe)); @@ -1202,9 +1211,10 @@ static int rdma_fini(struct c4iw_dev *rhp, struct c4iw_qp *qhp, PDBG("%s qhp %p qid 0x%x tid %u\n", __func__, qhp, qhp->wq.sq.qid, ep->hwtid); - skb = alloc_skb(sizeof *wqe, GFP_KERNEL); - if (!skb) + skb = skb_dequeue(&ep->com.ep_skb_list); + if (WARN_ON(!skb)) return -ENOMEM; + set_wr_txq(skb, CPL_PRIORITY_DATA, ep->txq_idx); wqe = (struct fw_ri_wr *)__skb_put(skb, sizeof(*wqe)); @@ -1592,8 +1602,6 @@ int c4iw_destroy_qp(struct ib_qp *ib_qp) wait_event(qhp->wait, !qhp->ep); remove_handle(rhp, &rhp->qpidr, qhp->wq.sq.qid); - atomic_dec(&qhp->refcnt); - wait_event(qhp->wait, !atomic_read(&qhp->refcnt)); spin_lock_irq(&rhp->lock); if (!list_empty(&qhp->db_fc_entry)) @@ -1606,8 +1614,9 @@ int c4iw_destroy_qp(struct ib_qp *ib_qp) destroy_qp(&rhp->rdev, &qhp->wq, ucontext ? &ucontext->uctx : &rhp->rdev.uctx); + c4iw_qp_rem_ref(ib_qp); + PDBG("%s ib_qp %p qpid 0x%0x\n", __func__, ib_qp, qhp->wq.sq.qid); - kfree(qhp); return 0; } @@ -1704,7 +1713,7 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs, init_completion(&qhp->rq_drained); mutex_init(&qhp->mutex); init_waitqueue_head(&qhp->wait); - atomic_set(&qhp->refcnt, 1); + kref_init(&qhp->kref); ret = insert_handle(rhp, &rhp->qpidr, qhp, qhp->wq.sq.qid); if (ret) @@ -1896,12 +1905,20 @@ int c4iw_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, return 0; } +static void move_qp_to_err(struct c4iw_qp *qp) +{ + struct c4iw_qp_attributes attrs = { .next_state = C4IW_QP_STATE_ERROR }; + + (void)c4iw_modify_qp(qp->rhp, qp, C4IW_QP_ATTR_NEXT_STATE, &attrs, 1); +} + void c4iw_drain_sq(struct ib_qp *ibqp) { struct c4iw_qp *qp = to_c4iw_qp(ibqp); unsigned long flag; bool need_to_wait; + move_qp_to_err(qp); spin_lock_irqsave(&qp->lock, flag); need_to_wait = !t4_sq_empty(&qp->wq); spin_unlock_irqrestore(&qp->lock, flag); @@ -1916,6 +1933,7 @@ void c4iw_drain_rq(struct ib_qp *ibqp) unsigned long flag; bool need_to_wait; + move_qp_to_err(qp); spin_lock_irqsave(&qp->lock, flag); need_to_wait = !t4_rq_empty(&qp->wq); spin_unlock_irqrestore(&qp->lock, flag); diff --git a/drivers/infiniband/hw/hfi1/Kconfig b/drivers/infiniband/hw/hfi1/Kconfig index a925fb0db706..f6ea0881765a 100644 --- a/drivers/infiniband/hw/hfi1/Kconfig +++ b/drivers/infiniband/hw/hfi1/Kconfig @@ -1,9 +1,9 @@ config INFINIBAND_HFI1 tristate "Intel OPA Gen1 support" - depends on X86_64 && INFINIBAND_RDMAVT + depends on X86_64 && INFINIBAND_RDMAVT && I2C select MMU_NOTIFIER select CRC32 - default m + select I2C_ALGOBIT ---help--- This is a low-level driver for Intel OPA Gen1 adapter. config HFI1_DEBUG_SDMA_ORDER diff --git a/drivers/infiniband/hw/hfi1/Makefile b/drivers/infiniband/hw/hfi1/Makefile index 9b5382c94b0c..0cf97a09b64b 100644 --- a/drivers/infiniband/hw/hfi1/Makefile +++ b/drivers/infiniband/hw/hfi1/Makefile @@ -10,7 +10,7 @@ obj-$(CONFIG_INFINIBAND_HFI1) += hfi1.o hfi1-y := affinity.o chip.o device.o driver.o efivar.o \ eprom.o file_ops.o firmware.o \ init.o intr.o mad.o mmu_rb.o pcie.o pio.o pio_copy.o platform.o \ - qp.o qsfp.o rc.o ruc.o sdma.o sysfs.o trace.o twsi.o \ + qp.o qsfp.o rc.o ruc.o sdma.o sysfs.o trace.o \ uc.o ud.o user_exp_rcv.o user_pages.o user_sdma.o verbs.o \ verbs_txreq.o hfi1-$(CONFIG_DEBUG_FS) += debugfs.o diff --git a/drivers/infiniband/hw/hfi1/affinity.c b/drivers/infiniband/hw/hfi1/affinity.c index 14d7eeb09be6..79575ee873f2 100644 --- a/drivers/infiniband/hw/hfi1/affinity.c +++ b/drivers/infiniband/hw/hfi1/affinity.c @@ -47,12 +47,18 @@ #include <linux/topology.h> #include <linux/cpumask.h> #include <linux/module.h> +#include <linux/cpumask.h> #include "hfi.h" #include "affinity.h" #include "sdma.h" #include "trace.h" +struct hfi1_affinity_node_list node_affinity = { + .list = LIST_HEAD_INIT(node_affinity.list), + .lock = __SPIN_LOCK_UNLOCKED(&node_affinity.lock), +}; + /* Name of IRQ types, indexed by enum irq_type */ static const char * const irq_type_names[] = { "SDMA", @@ -61,6 +67,9 @@ static const char * const irq_type_names[] = { "OTHER", }; +/* Per NUMA node count of HFI devices */ +static unsigned int *hfi1_per_node_cntr; + static inline void init_cpu_mask_set(struct cpu_mask_set *set) { cpumask_clear(&set->mask); @@ -69,47 +78,136 @@ static inline void init_cpu_mask_set(struct cpu_mask_set *set) } /* Initialize non-HT cpu cores mask */ -int init_real_cpu_mask(struct hfi1_devdata *dd) +void init_real_cpu_mask(void) { - struct hfi1_affinity *info; int possible, curr_cpu, i, ht; - info = kzalloc(sizeof(*info), GFP_KERNEL); - if (!info) - return -ENOMEM; - - cpumask_clear(&info->real_cpu_mask); + cpumask_clear(&node_affinity.real_cpu_mask); /* Start with cpu online mask as the real cpu mask */ - cpumask_copy(&info->real_cpu_mask, cpu_online_mask); + cpumask_copy(&node_affinity.real_cpu_mask, cpu_online_mask); /* * Remove HT cores from the real cpu mask. Do this in two steps below. */ - possible = cpumask_weight(&info->real_cpu_mask); + possible = cpumask_weight(&node_affinity.real_cpu_mask); ht = cpumask_weight(topology_sibling_cpumask( - cpumask_first(&info->real_cpu_mask))); + cpumask_first(&node_affinity.real_cpu_mask))); /* * Step 1. Skip over the first N HT siblings and use them as the * "real" cores. Assumes that HT cores are not enumerated in * succession (except in the single core case). */ - curr_cpu = cpumask_first(&info->real_cpu_mask); + curr_cpu = cpumask_first(&node_affinity.real_cpu_mask); for (i = 0; i < possible / ht; i++) - curr_cpu = cpumask_next(curr_cpu, &info->real_cpu_mask); + curr_cpu = cpumask_next(curr_cpu, &node_affinity.real_cpu_mask); /* * Step 2. Remove the remaining HT siblings. Use cpumask_next() to * skip any gaps. */ for (; i < possible; i++) { - cpumask_clear_cpu(curr_cpu, &info->real_cpu_mask); - curr_cpu = cpumask_next(curr_cpu, &info->real_cpu_mask); + cpumask_clear_cpu(curr_cpu, &node_affinity.real_cpu_mask); + curr_cpu = cpumask_next(curr_cpu, &node_affinity.real_cpu_mask); + } +} + +int node_affinity_init(void) +{ + int node; + struct pci_dev *dev = NULL; + const struct pci_device_id *ids = hfi1_pci_tbl; + + cpumask_clear(&node_affinity.proc.used); + cpumask_copy(&node_affinity.proc.mask, cpu_online_mask); + + node_affinity.proc.gen = 0; + node_affinity.num_core_siblings = + cpumask_weight(topology_sibling_cpumask( + cpumask_first(&node_affinity.proc.mask) + )); + node_affinity.num_online_nodes = num_online_nodes(); + node_affinity.num_online_cpus = num_online_cpus(); + + /* + * The real cpu mask is part of the affinity struct but it has to be + * initialized early. It is needed to calculate the number of user + * contexts in set_up_context_variables(). + */ + init_real_cpu_mask(); + + hfi1_per_node_cntr = kcalloc(num_possible_nodes(), + sizeof(*hfi1_per_node_cntr), GFP_KERNEL); + if (!hfi1_per_node_cntr) + return -ENOMEM; + + while (ids->vendor) { + dev = NULL; + while ((dev = pci_get_device(ids->vendor, ids->device, dev))) { + node = pcibus_to_node(dev->bus); + if (node < 0) + node = numa_node_id(); + + hfi1_per_node_cntr[node]++; + } + ids++; } - dd->affinity = info; return 0; } +void node_affinity_destroy(void) +{ + struct list_head *pos, *q; + struct hfi1_affinity_node *entry; + + spin_lock(&node_affinity.lock); + list_for_each_safe(pos, q, &node_affinity.list) { + entry = list_entry(pos, struct hfi1_affinity_node, + list); + list_del(pos); + kfree(entry); + } + spin_unlock(&node_affinity.lock); + kfree(hfi1_per_node_cntr); +} + +static struct hfi1_affinity_node *node_affinity_allocate(int node) +{ + struct hfi1_affinity_node *entry; + + entry = kzalloc(sizeof(*entry), GFP_KERNEL); + if (!entry) + return NULL; + entry->node = node; + INIT_LIST_HEAD(&entry->list); + + return entry; +} + +/* + * It appends an entry to the list. + * It *must* be called with node_affinity.lock held. + */ +static void node_affinity_add_tail(struct hfi1_affinity_node *entry) +{ + list_add_tail(&entry->list, &node_affinity.list); +} + +/* It must be called with node_affinity.lock held */ +static struct hfi1_affinity_node *node_affinity_lookup(int node) +{ + struct list_head *pos; + struct hfi1_affinity_node *entry; + + list_for_each(pos, &node_affinity.list) { + entry = list_entry(pos, struct hfi1_affinity_node, list); + if (entry->node == node) + return entry; + } + + return NULL; +} + /* * Interrupt affinity. * @@ -121,10 +219,10 @@ int init_real_cpu_mask(struct hfi1_devdata *dd) * to the node relative 1 as necessary. * */ -void hfi1_dev_affinity_init(struct hfi1_devdata *dd) +int hfi1_dev_affinity_init(struct hfi1_devdata *dd) { int node = pcibus_to_node(dd->pcidev->bus); - struct hfi1_affinity *info = dd->affinity; + struct hfi1_affinity_node *entry; const struct cpumask *local_mask; int curr_cpu, possible, i; @@ -132,56 +230,93 @@ void hfi1_dev_affinity_init(struct hfi1_devdata *dd) node = numa_node_id(); dd->node = node; - spin_lock_init(&info->lock); - - init_cpu_mask_set(&info->def_intr); - init_cpu_mask_set(&info->rcv_intr); - init_cpu_mask_set(&info->proc); - local_mask = cpumask_of_node(dd->node); if (cpumask_first(local_mask) >= nr_cpu_ids) local_mask = topology_core_cpumask(0); - /* Use the "real" cpu mask of this node as the default */ - cpumask_and(&info->def_intr.mask, &info->real_cpu_mask, local_mask); - - /* fill in the receive list */ - possible = cpumask_weight(&info->def_intr.mask); - curr_cpu = cpumask_first(&info->def_intr.mask); - if (possible == 1) { - /* only one CPU, everyone will use it */ - cpumask_set_cpu(curr_cpu, &info->rcv_intr.mask); - } else { - /* - * Retain the first CPU in the default list for the control - * context. - */ - curr_cpu = cpumask_next(curr_cpu, &info->def_intr.mask); - /* - * Remove the remaining kernel receive queues from - * the default list and add them to the receive list. - */ - for (i = 0; i < dd->n_krcv_queues - 1; i++) { - cpumask_clear_cpu(curr_cpu, &info->def_intr.mask); - cpumask_set_cpu(curr_cpu, &info->rcv_intr.mask); - curr_cpu = cpumask_next(curr_cpu, &info->def_intr.mask); - if (curr_cpu >= nr_cpu_ids) - break; + + spin_lock(&node_affinity.lock); + entry = node_affinity_lookup(dd->node); + spin_unlock(&node_affinity.lock); + + /* + * If this is the first time this NUMA node's affinity is used, + * create an entry in the global affinity structure and initialize it. + */ + if (!entry) { + entry = node_affinity_allocate(node); + if (!entry) { + dd_dev_err(dd, + "Unable to allocate global affinity node\n"); + return -ENOMEM; } - } + init_cpu_mask_set(&entry->def_intr); + init_cpu_mask_set(&entry->rcv_intr); + cpumask_clear(&entry->general_intr_mask); + /* Use the "real" cpu mask of this node as the default */ + cpumask_and(&entry->def_intr.mask, &node_affinity.real_cpu_mask, + local_mask); + + /* fill in the receive list */ + possible = cpumask_weight(&entry->def_intr.mask); + curr_cpu = cpumask_first(&entry->def_intr.mask); + + if (possible == 1) { + /* only one CPU, everyone will use it */ + cpumask_set_cpu(curr_cpu, &entry->rcv_intr.mask); + cpumask_set_cpu(curr_cpu, &entry->general_intr_mask); + } else { + /* + * The general/control context will be the first CPU in + * the default list, so it is removed from the default + * list and added to the general interrupt list. + */ + cpumask_clear_cpu(curr_cpu, &entry->def_intr.mask); + cpumask_set_cpu(curr_cpu, &entry->general_intr_mask); + curr_cpu = cpumask_next(curr_cpu, + &entry->def_intr.mask); - cpumask_copy(&info->proc.mask, cpu_online_mask); -} + /* + * Remove the remaining kernel receive queues from + * the default list and add them to the receive list. + */ + for (i = 0; + i < (dd->n_krcv_queues - 1) * + hfi1_per_node_cntr[dd->node]; + i++) { + cpumask_clear_cpu(curr_cpu, + &entry->def_intr.mask); + cpumask_set_cpu(curr_cpu, + &entry->rcv_intr.mask); + curr_cpu = cpumask_next(curr_cpu, + &entry->def_intr.mask); + if (curr_cpu >= nr_cpu_ids) + break; + } -void hfi1_dev_affinity_free(struct hfi1_devdata *dd) -{ - kfree(dd->affinity); + /* + * If there ends up being 0 CPU cores leftover for SDMA + * engines, use the same CPU cores as general/control + * context. + */ + if (cpumask_weight(&entry->def_intr.mask) == 0) + cpumask_copy(&entry->def_intr.mask, + &entry->general_intr_mask); + } + + spin_lock(&node_affinity.lock); + node_affinity_add_tail(entry); + spin_unlock(&node_affinity.lock); + } + + return 0; } int hfi1_get_irq_affinity(struct hfi1_devdata *dd, struct hfi1_msix_entry *msix) { int ret; cpumask_var_t diff; - struct cpu_mask_set *set; + struct hfi1_affinity_node *entry; + struct cpu_mask_set *set = NULL; struct sdma_engine *sde = NULL; struct hfi1_ctxtdata *rcd = NULL; char extra[64]; @@ -194,22 +329,25 @@ int hfi1_get_irq_affinity(struct hfi1_devdata *dd, struct hfi1_msix_entry *msix) if (!ret) return -ENOMEM; + spin_lock(&node_affinity.lock); + entry = node_affinity_lookup(dd->node); + spin_unlock(&node_affinity.lock); + switch (msix->type) { case IRQ_SDMA: sde = (struct sdma_engine *)msix->arg; scnprintf(extra, 64, "engine %u", sde->this_idx); - /* fall through */ + set = &entry->def_intr; + break; case IRQ_GENERAL: - set = &dd->affinity->def_intr; + cpu = cpumask_first(&entry->general_intr_mask); break; case IRQ_RCVCTXT: rcd = (struct hfi1_ctxtdata *)msix->arg; - if (rcd->ctxt == HFI1_CTRL_CTXT) { - set = &dd->affinity->def_intr; - cpu = cpumask_first(&set->mask); - } else { - set = &dd->affinity->rcv_intr; - } + if (rcd->ctxt == HFI1_CTRL_CTXT) + cpu = cpumask_first(&entry->general_intr_mask); + else + set = &entry->rcv_intr; scnprintf(extra, 64, "ctxt %u", rcd->ctxt); break; default: @@ -218,12 +356,12 @@ int hfi1_get_irq_affinity(struct hfi1_devdata *dd, struct hfi1_msix_entry *msix) } /* - * The control receive context is placed on a particular CPU, which - * is set above. Skip accounting for it. Everything else finds its - * CPU here. + * The general and control contexts are placed on a particular + * CPU, which is set above. Skip accounting for it. Everything else + * finds its CPU here. */ - if (cpu == -1) { - spin_lock(&dd->affinity->lock); + if (cpu == -1 && set) { + spin_lock(&node_affinity.lock); if (cpumask_equal(&set->mask, &set->used)) { /* * We've used up all the CPUs, bump up the generation @@ -235,7 +373,7 @@ int hfi1_get_irq_affinity(struct hfi1_devdata *dd, struct hfi1_msix_entry *msix) cpumask_andnot(diff, &set->mask, &set->used); cpu = cpumask_first(diff); cpumask_set_cpu(cpu, &set->used); - spin_unlock(&dd->affinity->lock); + spin_unlock(&node_affinity.lock); } switch (msix->type) { @@ -263,43 +401,84 @@ void hfi1_put_irq_affinity(struct hfi1_devdata *dd, { struct cpu_mask_set *set = NULL; struct hfi1_ctxtdata *rcd; + struct hfi1_affinity_node *entry; + + spin_lock(&node_affinity.lock); + entry = node_affinity_lookup(dd->node); + spin_unlock(&node_affinity.lock); switch (msix->type) { case IRQ_SDMA: + set = &entry->def_intr; + break; case IRQ_GENERAL: - set = &dd->affinity->def_intr; + /* Don't do accounting for general contexts */ break; case IRQ_RCVCTXT: rcd = (struct hfi1_ctxtdata *)msix->arg; - /* only do accounting for non control contexts */ + /* Don't do accounting for control contexts */ if (rcd->ctxt != HFI1_CTRL_CTXT) - set = &dd->affinity->rcv_intr; + set = &entry->rcv_intr; break; default: return; } if (set) { - spin_lock(&dd->affinity->lock); + spin_lock(&node_affinity.lock); cpumask_andnot(&set->used, &set->used, &msix->mask); if (cpumask_empty(&set->used) && set->gen) { set->gen--; cpumask_copy(&set->used, &set->mask); } - spin_unlock(&dd->affinity->lock); + spin_unlock(&node_affinity.lock); } irq_set_affinity_hint(msix->msix.vector, NULL); cpumask_clear(&msix->mask); } -int hfi1_get_proc_affinity(struct hfi1_devdata *dd, int node) +/* This should be called with node_affinity.lock held */ +static void find_hw_thread_mask(uint hw_thread_no, cpumask_var_t hw_thread_mask, + struct hfi1_affinity_node_list *affinity) { - int cpu = -1, ret; - cpumask_var_t diff, mask, intrs; + int possible, curr_cpu, i; + uint num_cores_per_socket = node_affinity.num_online_cpus / + affinity->num_core_siblings / + node_affinity.num_online_nodes; + + cpumask_copy(hw_thread_mask, &affinity->proc.mask); + if (affinity->num_core_siblings > 0) { + /* Removing other siblings not needed for now */ + possible = cpumask_weight(hw_thread_mask); + curr_cpu = cpumask_first(hw_thread_mask); + for (i = 0; + i < num_cores_per_socket * node_affinity.num_online_nodes; + i++) + curr_cpu = cpumask_next(curr_cpu, hw_thread_mask); + + for (; i < possible; i++) { + cpumask_clear_cpu(curr_cpu, hw_thread_mask); + curr_cpu = cpumask_next(curr_cpu, hw_thread_mask); + } + + /* Identifying correct HW threads within physical cores */ + cpumask_shift_left(hw_thread_mask, hw_thread_mask, + num_cores_per_socket * + node_affinity.num_online_nodes * + hw_thread_no); + } +} + +int hfi1_get_proc_affinity(int node) +{ + int cpu = -1, ret, i; + struct hfi1_affinity_node *entry; + cpumask_var_t diff, hw_thread_mask, available_mask, intrs_mask; const struct cpumask *node_mask, *proc_mask = tsk_cpus_allowed(current); - struct cpu_mask_set *set = &dd->affinity->proc; + struct hfi1_affinity_node_list *affinity = &node_affinity; + struct cpu_mask_set *set = &affinity->proc; /* * check whether process/context affinity has already @@ -325,22 +504,41 @@ int hfi1_get_proc_affinity(struct hfi1_devdata *dd, int node) /* * The process does not have a preset CPU affinity so find one to - * recommend. We prefer CPUs on the same NUMA as the device. + * recommend using the following algorithm: + * + * For each user process that is opening a context on HFI Y: + * a) If all cores are filled, reinitialize the bitmask + * b) Fill real cores first, then HT cores (First set of HT + * cores on all physical cores, then second set of HT core, + * and, so on) in the following order: + * + * 1. Same NUMA node as HFI Y and not running an IRQ + * handler + * 2. Same NUMA node as HFI Y and running an IRQ handler + * 3. Different NUMA node to HFI Y and not running an IRQ + * handler + * 4. Different NUMA node to HFI Y and running an IRQ + * handler + * c) Mark core as filled in the bitmask. As user processes are + * done, clear cores from the bitmask. */ ret = zalloc_cpumask_var(&diff, GFP_KERNEL); if (!ret) goto done; - ret = zalloc_cpumask_var(&mask, GFP_KERNEL); + ret = zalloc_cpumask_var(&hw_thread_mask, GFP_KERNEL); if (!ret) goto free_diff; - ret = zalloc_cpumask_var(&intrs, GFP_KERNEL); + ret = zalloc_cpumask_var(&available_mask, GFP_KERNEL); + if (!ret) + goto free_hw_thread_mask; + ret = zalloc_cpumask_var(&intrs_mask, GFP_KERNEL); if (!ret) - goto free_mask; + goto free_available_mask; - spin_lock(&dd->affinity->lock); + spin_lock(&affinity->lock); /* - * If we've used all available CPUs, clear the mask and start + * If we've used all available HW threads, clear the mask and start * overloading. */ if (cpumask_equal(&set->mask, &set->used)) { @@ -348,81 +546,198 @@ int hfi1_get_proc_affinity(struct hfi1_devdata *dd, int node) cpumask_clear(&set->used); } - /* CPUs used by interrupt handlers */ - cpumask_copy(intrs, (dd->affinity->def_intr.gen ? - &dd->affinity->def_intr.mask : - &dd->affinity->def_intr.used)); - cpumask_or(intrs, intrs, (dd->affinity->rcv_intr.gen ? - &dd->affinity->rcv_intr.mask : - &dd->affinity->rcv_intr.used)); + /* + * If NUMA node has CPUs used by interrupt handlers, include them in the + * interrupt handler mask. + */ + entry = node_affinity_lookup(node); + if (entry) { + cpumask_copy(intrs_mask, (entry->def_intr.gen ? + &entry->def_intr.mask : + &entry->def_intr.used)); + cpumask_or(intrs_mask, intrs_mask, (entry->rcv_intr.gen ? + &entry->rcv_intr.mask : + &entry->rcv_intr.used)); + cpumask_or(intrs_mask, intrs_mask, &entry->general_intr_mask); + } hfi1_cdbg(PROC, "CPUs used by interrupts: %*pbl", - cpumask_pr_args(intrs)); + cpumask_pr_args(intrs_mask)); + + cpumask_copy(hw_thread_mask, &set->mask); /* - * If we don't have a NUMA node requested, preference is towards - * device NUMA node + * If HT cores are enabled, identify which HW threads within the + * physical cores should be used. */ - if (node == -1) - node = dd->node; + if (affinity->num_core_siblings > 0) { + for (i = 0; i < affinity->num_core_siblings; i++) { + find_hw_thread_mask(i, hw_thread_mask, affinity); + + /* + * If there's at least one available core for this HW + * thread number, stop looking for a core. + * + * diff will always be not empty at least once in this + * loop as the used mask gets reset when + * (set->mask == set->used) before this loop. + */ + cpumask_andnot(diff, hw_thread_mask, &set->used); + if (!cpumask_empty(diff)) + break; + } + } + hfi1_cdbg(PROC, "Same available HW thread on all physical CPUs: %*pbl", + cpumask_pr_args(hw_thread_mask)); + node_mask = cpumask_of_node(node); - hfi1_cdbg(PROC, "device on NUMA %u, CPUs %*pbl", node, + hfi1_cdbg(PROC, "Device on NUMA %u, CPUs %*pbl", node, cpumask_pr_args(node_mask)); - /* diff will hold all unused cpus */ - cpumask_andnot(diff, &set->mask, &set->used); - hfi1_cdbg(PROC, "unused CPUs (all) %*pbl", cpumask_pr_args(diff)); - - /* get cpumask of available CPUs on preferred NUMA */ - cpumask_and(mask, diff, node_mask); - hfi1_cdbg(PROC, "available cpus on NUMA %*pbl", cpumask_pr_args(mask)); + /* Get cpumask of available CPUs on preferred NUMA */ + cpumask_and(available_mask, hw_thread_mask, node_mask); + cpumask_andnot(available_mask, available_mask, &set->used); + hfi1_cdbg(PROC, "Available CPUs on NUMA %u: %*pbl", node, + cpumask_pr_args(available_mask)); /* * At first, we don't want to place processes on the same - * CPUs as interrupt handlers. + * CPUs as interrupt handlers. Then, CPUs running interrupt + * handlers are used. + * + * 1) If diff is not empty, then there are CPUs not running + * non-interrupt handlers available, so diff gets copied + * over to available_mask. + * 2) If diff is empty, then all CPUs not running interrupt + * handlers are taken, so available_mask contains all + * available CPUs running interrupt handlers. + * 3) If available_mask is empty, then all CPUs on the + * preferred NUMA node are taken, so other NUMA nodes are + * used for process assignments using the same method as + * the preferred NUMA node. */ - cpumask_andnot(diff, mask, intrs); + cpumask_andnot(diff, available_mask, intrs_mask); if (!cpumask_empty(diff)) - cpumask_copy(mask, diff); + cpumask_copy(available_mask, diff); - /* - * if we don't have a cpu on the preferred NUMA, get - * the list of the remaining available CPUs - */ - if (cpumask_empty(mask)) { - cpumask_andnot(diff, &set->mask, &set->used); - cpumask_andnot(mask, diff, node_mask); + /* If we don't have CPUs on the preferred node, use other NUMA nodes */ + if (cpumask_empty(available_mask)) { + cpumask_andnot(available_mask, hw_thread_mask, &set->used); + /* Excluding preferred NUMA cores */ + cpumask_andnot(available_mask, available_mask, node_mask); + hfi1_cdbg(PROC, + "Preferred NUMA node cores are taken, cores available in other NUMA nodes: %*pbl", + cpumask_pr_args(available_mask)); + + /* + * At first, we don't want to place processes on the same + * CPUs as interrupt handlers. + */ + cpumask_andnot(diff, available_mask, intrs_mask); + if (!cpumask_empty(diff)) + cpumask_copy(available_mask, diff); } - hfi1_cdbg(PROC, "possible CPUs for process %*pbl", - cpumask_pr_args(mask)); + hfi1_cdbg(PROC, "Possible CPUs for process: %*pbl", + cpumask_pr_args(available_mask)); - cpu = cpumask_first(mask); + cpu = cpumask_first(available_mask); if (cpu >= nr_cpu_ids) /* empty */ cpu = -1; else cpumask_set_cpu(cpu, &set->used); - spin_unlock(&dd->affinity->lock); - - free_cpumask_var(intrs); -free_mask: - free_cpumask_var(mask); + spin_unlock(&affinity->lock); + hfi1_cdbg(PROC, "Process assigned to CPU %d", cpu); + + free_cpumask_var(intrs_mask); +free_available_mask: + free_cpumask_var(available_mask); +free_hw_thread_mask: + free_cpumask_var(hw_thread_mask); free_diff: free_cpumask_var(diff); done: return cpu; } -void hfi1_put_proc_affinity(struct hfi1_devdata *dd, int cpu) +void hfi1_put_proc_affinity(int cpu) { - struct cpu_mask_set *set = &dd->affinity->proc; + struct hfi1_affinity_node_list *affinity = &node_affinity; + struct cpu_mask_set *set = &affinity->proc; if (cpu < 0) return; - spin_lock(&dd->affinity->lock); + spin_lock(&affinity->lock); cpumask_clear_cpu(cpu, &set->used); + hfi1_cdbg(PROC, "Returning CPU %d for future process assignment", cpu); if (cpumask_empty(&set->used) && set->gen) { set->gen--; cpumask_copy(&set->used, &set->mask); } - spin_unlock(&dd->affinity->lock); + spin_unlock(&affinity->lock); } +/* Prevents concurrent reads and writes of the sdma_affinity attrib */ +static DEFINE_MUTEX(sdma_affinity_mutex); + +int hfi1_set_sdma_affinity(struct hfi1_devdata *dd, const char *buf, + size_t count) +{ + struct hfi1_affinity_node *entry; + struct cpumask mask; + int ret, i; + + spin_lock(&node_affinity.lock); + entry = node_affinity_lookup(dd->node); + spin_unlock(&node_affinity.lock); + + if (!entry) + return -EINVAL; + + ret = cpulist_parse(buf, &mask); + if (ret) + return ret; + + if (!cpumask_subset(&mask, cpu_online_mask) || cpumask_empty(&mask)) { + dd_dev_warn(dd, "Invalid CPU mask\n"); + return -EINVAL; + } + + mutex_lock(&sdma_affinity_mutex); + /* reset the SDMA interrupt affinity details */ + init_cpu_mask_set(&entry->def_intr); + cpumask_copy(&entry->def_intr.mask, &mask); + /* + * Reassign the affinity for each SDMA interrupt. + */ + for (i = 0; i < dd->num_msix_entries; i++) { + struct hfi1_msix_entry *msix; + + msix = &dd->msix_entries[i]; + if (msix->type != IRQ_SDMA) + continue; + + ret = hfi1_get_irq_affinity(dd, msix); + + if (ret) + break; + } + + mutex_unlock(&sdma_affinity_mutex); + return ret ? ret : strnlen(buf, PAGE_SIZE); +} + +int hfi1_get_sdma_affinity(struct hfi1_devdata *dd, char *buf) +{ + struct hfi1_affinity_node *entry; + + spin_lock(&node_affinity.lock); + entry = node_affinity_lookup(dd->node); + spin_unlock(&node_affinity.lock); + + if (!entry) + return -EINVAL; + + mutex_lock(&sdma_affinity_mutex); + cpumap_print_to_pagebuf(true, buf, &entry->def_intr.mask); + mutex_unlock(&sdma_affinity_mutex); + return strnlen(buf, PAGE_SIZE); +} diff --git a/drivers/infiniband/hw/hfi1/affinity.h b/drivers/infiniband/hw/hfi1/affinity.h index 20f52fe74091..8879cf7a8cac 100644 --- a/drivers/infiniband/hw/hfi1/affinity.h +++ b/drivers/infiniband/hw/hfi1/affinity.h @@ -73,7 +73,6 @@ struct cpu_mask_set { struct hfi1_affinity { struct cpu_mask_set def_intr; struct cpu_mask_set rcv_intr; - struct cpu_mask_set proc; struct cpumask real_cpu_mask; /* spin lock to protect affinity struct */ spinlock_t lock; @@ -82,11 +81,9 @@ struct hfi1_affinity { struct hfi1_msix_entry; /* Initialize non-HT cpu cores mask */ -int init_real_cpu_mask(struct hfi1_devdata *); +void init_real_cpu_mask(void); /* Initialize driver affinity data */ -void hfi1_dev_affinity_init(struct hfi1_devdata *); -/* Free driver affinity data */ -void hfi1_dev_affinity_free(struct hfi1_devdata *); +int hfi1_dev_affinity_init(struct hfi1_devdata *); /* * Set IRQ affinity to a CPU. The function will determine the * CPU and set the affinity to it. @@ -101,8 +98,35 @@ void hfi1_put_irq_affinity(struct hfi1_devdata *, struct hfi1_msix_entry *); * Determine a CPU affinity for a user process, if the process does not * have an affinity set yet. */ -int hfi1_get_proc_affinity(struct hfi1_devdata *, int); +int hfi1_get_proc_affinity(int); /* Release a CPU used by a user process. */ -void hfi1_put_proc_affinity(struct hfi1_devdata *, int); +void hfi1_put_proc_affinity(int); + +int hfi1_get_sdma_affinity(struct hfi1_devdata *dd, char *buf); +int hfi1_set_sdma_affinity(struct hfi1_devdata *dd, const char *buf, + size_t count); + +struct hfi1_affinity_node { + int node; + struct cpu_mask_set def_intr; + struct cpu_mask_set rcv_intr; + struct cpumask general_intr_mask; + struct list_head list; +}; + +struct hfi1_affinity_node_list { + struct list_head list; + struct cpumask real_cpu_mask; + struct cpu_mask_set proc; + int num_core_siblings; + int num_online_nodes; + int num_online_cpus; + /* protect affinity node list */ + spinlock_t lock; +}; + +int node_affinity_init(void); +void node_affinity_destroy(void); +extern struct hfi1_affinity_node_list node_affinity; #endif /* _HFI1_AFFINITY_H */ diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index dad4d0ebbdff..b32638d58ae8 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -63,6 +63,7 @@ #include "efivar.h" #include "platform.h" #include "aspm.h" +#include "affinity.h" #define NUM_IB_PORTS 1 @@ -121,6 +122,7 @@ struct flag_table { #define SEC_SC_HALTED 0x4 /* per-context only */ #define SEC_SPC_FREEZE 0x8 /* per-HFI only */ +#define DEFAULT_KRCVQS 2 #define MIN_KERNEL_KCTXTS 2 #define FIRST_KERNEL_KCTXT 1 /* sizes for both the QP and RSM map tables */ @@ -238,6 +240,9 @@ struct flag_table { /* all CceStatus sub-block RXE pause bits */ #define ALL_RXE_PAUSE CCE_STATUS_RXE_PAUSED_SMASK +#define CNTR_MAX 0xFFFFFFFFFFFFFFFFULL +#define CNTR_32BIT_MAX 0x00000000FFFFFFFF + /* * CCE Error flags. */ @@ -3947,6 +3952,28 @@ static u64 access_sdma_wrong_dw_err_cnt(const struct cntr_entry *entry, return dd->sw_send_dma_eng_err_status_cnt[0]; } +static u64 access_dc_rcv_err_cnt(const struct cntr_entry *entry, + void *context, int vl, int mode, + u64 data) +{ + struct hfi1_devdata *dd = (struct hfi1_devdata *)context; + + u64 val = 0; + u64 csr = entry->csr; + + val = read_write_csr(dd, csr, mode, data); + if (mode == CNTR_MODE_R) { + val = val > CNTR_MAX - dd->sw_rcv_bypass_packet_errors ? + CNTR_MAX : val + dd->sw_rcv_bypass_packet_errors; + } else if (mode == CNTR_MODE_W) { + dd->sw_rcv_bypass_packet_errors = 0; + } else { + dd_dev_err(dd, "Invalid cntr register access mode"); + return 0; + } + return val; +} + #define def_access_sw_cpu(cntr) \ static u64 access_sw_cpu_##cntr(const struct cntr_entry *entry, \ void *context, int vl, int mode, u64 data) \ @@ -4020,7 +4047,8 @@ static struct cntr_entry dev_cntrs[DEV_CNTR_LAST] = { CCE_SEND_CREDIT_INT_CNT, CNTR_NORMAL), [C_DC_UNC_ERR] = DC_PERF_CNTR(DcUnctblErr, DCC_ERR_UNCORRECTABLE_CNT, CNTR_SYNTH), -[C_DC_RCV_ERR] = DC_PERF_CNTR(DcRecvErr, DCC_ERR_PORTRCV_ERR_CNT, CNTR_SYNTH), +[C_DC_RCV_ERR] = CNTR_ELEM("DcRecvErr", DCC_ERR_PORTRCV_ERR_CNT, 0, CNTR_SYNTH, + access_dc_rcv_err_cnt), [C_DC_FM_CFG_ERR] = DC_PERF_CNTR(DcFmCfgErr, DCC_ERR_FMCONFIG_ERR_CNT, CNTR_SYNTH), [C_DC_RMT_PHY_ERR] = DC_PERF_CNTR(DcRmtPhyErr, DCC_ERR_RCVREMOTE_PHY_ERR_CNT, @@ -8798,30 +8826,6 @@ static int write_tx_settings(struct hfi1_devdata *dd, return load_8051_config(dd, TX_SETTINGS, GENERAL_CONFIG, frame); } -static void check_fabric_firmware_versions(struct hfi1_devdata *dd) -{ - u32 frame, version, prod_id; - int ret, lane; - - /* 4 lanes */ - for (lane = 0; lane < 4; lane++) { - ret = read_8051_config(dd, SPICO_FW_VERSION, lane, &frame); - if (ret) { - dd_dev_err(dd, - "Unable to read lane %d firmware details\n", - lane); - continue; - } - version = (frame >> SPICO_ROM_VERSION_SHIFT) - & SPICO_ROM_VERSION_MASK; - prod_id = (frame >> SPICO_ROM_PROD_ID_SHIFT) - & SPICO_ROM_PROD_ID_MASK; - dd_dev_info(dd, - "Lane %d firmware: version 0x%04x, prod_id 0x%04x\n", - lane, version, prod_id); - } -} - /* * Read an idle LCB message. * @@ -9187,17 +9191,24 @@ static void wait_for_qsfp_init(struct hfi1_pportdata *ppd) unsigned long timeout; /* - * Check for QSFP interrupt for t_init (SFF 8679) + * Some QSFP cables have a quirk that asserts the IntN line as a side + * effect of power up on plug-in. We ignore this false positive + * interrupt until the module has finished powering up by waiting for + * a minimum timeout of the module inrush initialization time of + * 500 ms (SFF 8679 Table 5-6) to ensure the voltage rails in the + * module have stabilized. + */ + msleep(500); + + /* + * Check for QSFP interrupt for t_init (SFF 8679 Table 8-1) */ timeout = jiffies + msecs_to_jiffies(2000); while (1) { mask = read_csr(dd, dd->hfi1_id ? ASIC_QSFP2_IN : ASIC_QSFP1_IN); - if (!(mask & QSFP_HFI0_INT_N)) { - write_csr(dd, dd->hfi1_id ? ASIC_QSFP2_CLEAR : - ASIC_QSFP1_CLEAR, QSFP_HFI0_INT_N); + if (!(mask & QSFP_HFI0_INT_N)) break; - } if (time_after(jiffies, timeout)) { dd_dev_info(dd, "%s: No IntN detected, reset complete\n", __func__); @@ -9213,10 +9224,17 @@ static void set_qsfp_int_n(struct hfi1_pportdata *ppd, u8 enable) u64 mask; mask = read_csr(dd, dd->hfi1_id ? ASIC_QSFP2_MASK : ASIC_QSFP1_MASK); - if (enable) + if (enable) { + /* + * Clear the status register to avoid an immediate interrupt + * when we re-enable the IntN pin + */ + write_csr(dd, dd->hfi1_id ? ASIC_QSFP2_CLEAR : ASIC_QSFP1_CLEAR, + QSFP_HFI0_INT_N); mask |= (u64)QSFP_HFI0_INT_N; - else + } else { mask &= ~(u64)QSFP_HFI0_INT_N; + } write_csr(dd, dd->hfi1_id ? ASIC_QSFP2_MASK : ASIC_QSFP1_MASK, mask); } @@ -9630,14 +9648,6 @@ void hfi1_clear_tids(struct hfi1_ctxtdata *rcd) hfi1_put_tid(dd, i, PT_INVALID, 0, 0); } -int hfi1_get_base_kinfo(struct hfi1_ctxtdata *rcd, - struct hfi1_ctxt_info *kinfo) -{ - kinfo->runtime_flags = (HFI1_MISC_GET() << HFI1_CAP_USER_SHIFT) | - HFI1_CAP_UGET(MASK) | HFI1_CAP_KGET(K2U); - return 0; -} - struct hfi1_message_header *hfi1_get_msgheader( struct hfi1_devdata *dd, __le32 *rhf_addr) { @@ -9890,6 +9900,131 @@ static int wait_phy_linkstate(struct hfi1_devdata *dd, u32 state, u32 msecs) return 0; } +static const char *state_completed_string(u32 completed) +{ + static const char * const state_completed[] = { + "EstablishComm", + "OptimizeEQ", + "VerifyCap" + }; + + if (completed < ARRAY_SIZE(state_completed)) + return state_completed[completed]; + + return "unknown"; +} + +static const char all_lanes_dead_timeout_expired[] = + "All lanes were inactive – was the interconnect media removed?"; +static const char tx_out_of_policy[] = + "Passing lanes on local port do not meet the local link width policy"; +static const char no_state_complete[] = + "State timeout occurred before link partner completed the state"; +static const char * const state_complete_reasons[] = { + [0x00] = "Reason unknown", + [0x01] = "Link was halted by driver, refer to LinkDownReason", + [0x02] = "Link partner reported failure", + [0x10] = "Unable to achieve frame sync on any lane", + [0x11] = + "Unable to find a common bit rate with the link partner", + [0x12] = + "Unable to achieve frame sync on sufficient lanes to meet the local link width policy", + [0x13] = + "Unable to identify preset equalization on sufficient lanes to meet the local link width policy", + [0x14] = no_state_complete, + [0x15] = + "State timeout occurred before link partner identified equalization presets", + [0x16] = + "Link partner completed the EstablishComm state, but the passing lanes do not meet the local link width policy", + [0x17] = tx_out_of_policy, + [0x20] = all_lanes_dead_timeout_expired, + [0x21] = + "Unable to achieve acceptable BER on sufficient lanes to meet the local link width policy", + [0x22] = no_state_complete, + [0x23] = + "Link partner completed the OptimizeEq state, but the passing lanes do not meet the local link width policy", + [0x24] = tx_out_of_policy, + [0x30] = all_lanes_dead_timeout_expired, + [0x31] = + "State timeout occurred waiting for host to process received frames", + [0x32] = no_state_complete, + [0x33] = + "Link partner completed the VerifyCap state, but the passing lanes do not meet the local link width policy", + [0x34] = tx_out_of_policy, +}; + +static const char *state_complete_reason_code_string(struct hfi1_pportdata *ppd, + u32 code) +{ + const char *str = NULL; + + if (code < ARRAY_SIZE(state_complete_reasons)) + str = state_complete_reasons[code]; + + if (str) + return str; + return "Reserved"; +} + +/* describe the given last state complete frame */ +static void decode_state_complete(struct hfi1_pportdata *ppd, u32 frame, + const char *prefix) +{ + struct hfi1_devdata *dd = ppd->dd; + u32 success; + u32 state; + u32 reason; + u32 lanes; + + /* + * Decode frame: + * [ 0: 0] - success + * [ 3: 1] - state + * [ 7: 4] - next state timeout + * [15: 8] - reason code + * [31:16] - lanes + */ + success = frame & 0x1; + state = (frame >> 1) & 0x7; + reason = (frame >> 8) & 0xff; + lanes = (frame >> 16) & 0xffff; + + dd_dev_err(dd, "Last %s LNI state complete frame 0x%08x:\n", + prefix, frame); + dd_dev_err(dd, " last reported state state: %s (0x%x)\n", + state_completed_string(state), state); + dd_dev_err(dd, " state successfully completed: %s\n", + success ? "yes" : "no"); + dd_dev_err(dd, " fail reason 0x%x: %s\n", + reason, state_complete_reason_code_string(ppd, reason)); + dd_dev_err(dd, " passing lane mask: 0x%x", lanes); +} + +/* + * Read the last state complete frames and explain them. This routine + * expects to be called if the link went down during link negotiation + * and initialization (LNI). That is, anywhere between polling and link up. + */ +static void check_lni_states(struct hfi1_pportdata *ppd) +{ + u32 last_local_state; + u32 last_remote_state; + + read_last_local_state(ppd->dd, &last_local_state); + read_last_remote_state(ppd->dd, &last_remote_state); + + /* + * Don't report anything if there is nothing to report. A value of + * 0 means the link was taken down while polling and there was no + * training in-process. + */ + if (last_local_state == 0 && last_remote_state == 0) + return; + + decode_state_complete(ppd, last_local_state, "transmitted"); + decode_state_complete(ppd, last_remote_state, "received"); +} + /* * Helper for set_link_state(). Do not call except from that routine. * Expects ppd->hls_mutex to be held. @@ -9902,8 +10037,6 @@ static int goto_offline(struct hfi1_pportdata *ppd, u8 rem_reason) { struct hfi1_devdata *dd = ppd->dd; u32 pstate, previous_state; - u32 last_local_state; - u32 last_remote_state; int ret; int do_transition; int do_wait; @@ -10003,12 +10136,7 @@ static int goto_offline(struct hfi1_pportdata *ppd, u8 rem_reason) } else if (previous_state & (HLS_DN_POLL | HLS_VERIFY_CAP | HLS_GOING_UP)) { /* went down while attempting link up */ - /* byte 1 of last_*_state is the failure reason */ - read_last_local_state(dd, &last_local_state); - read_last_remote_state(dd, &last_remote_state); - dd_dev_err(dd, - "LNI failure last states: local 0x%08x, remote 0x%08x\n", - last_local_state, last_remote_state); + check_lni_states(ppd); } /* the active link width (downgrade) is 0 on link down */ @@ -11668,9 +11796,6 @@ static void free_cntrs(struct hfi1_devdata *dd) dd->cntrnames = NULL; } -#define CNTR_MAX 0xFFFFFFFFFFFFFFFFULL -#define CNTR_32BIT_MAX 0x00000000FFFFFFFF - static u64 read_dev_port_cntr(struct hfi1_devdata *dd, struct cntr_entry *entry, u64 *psval, void *context, int vl) { @@ -12325,37 +12450,6 @@ u8 hfi1_ibphys_portstate(struct hfi1_pportdata *ppd) return ib_pstate; } -/* - * Read/modify/write ASIC_QSFP register bits as selected by mask - * data: 0 or 1 in the positions depending on what needs to be written - * dir: 0 for read, 1 for write - * mask: select by setting - * I2CCLK (bit 0) - * I2CDATA (bit 1) - */ -u64 hfi1_gpio_mod(struct hfi1_devdata *dd, u32 target, u32 data, u32 dir, - u32 mask) -{ - u64 qsfp_oe, target_oe; - - target_oe = target ? ASIC_QSFP2_OE : ASIC_QSFP1_OE; - if (mask) { - /* We are writing register bits, so lock access */ - dir &= mask; - data &= mask; - - qsfp_oe = read_csr(dd, target_oe); - qsfp_oe = (qsfp_oe & ~(u64)mask) | (u64)dir; - write_csr(dd, target_oe, qsfp_oe); - } - /* We are exclusively reading bits here, but it is unlikely - * we'll get valid data when we set the direction of the pin - * in the same call, so read should call this function again - * to get valid data - */ - return read_csr(dd, target ? ASIC_QSFP2_IN : ASIC_QSFP1_IN); -} - #define CLEAR_STATIC_RATE_CONTROL_SMASK(r) \ (r &= ~SEND_CTXT_CHECK_ENABLE_DISALLOW_PBC_STATIC_RATE_CONTROL_SMASK) @@ -12780,7 +12874,6 @@ static int set_up_context_variables(struct hfi1_devdata *dd) /* * Kernel receive contexts: - * - min of 2 or 1 context/numa (excluding control context) * - Context 0 - control context (VL15/multicast/error) * - Context 1 - first kernel context * - Context 2 - second kernel context @@ -12794,9 +12887,7 @@ static int set_up_context_variables(struct hfi1_devdata *dd) */ num_kernel_contexts = n_krcvqs + 1; else - num_kernel_contexts = num_online_nodes() + 1; - num_kernel_contexts = - max_t(int, MIN_KERNEL_KCTXTS, num_kernel_contexts); + num_kernel_contexts = DEFAULT_KRCVQS + 1; /* * Every kernel receive context needs an ACK send context. * one send context is allocated for each VL{0-7} and VL15 @@ -12815,7 +12906,7 @@ static int set_up_context_variables(struct hfi1_devdata *dd) */ if (num_user_contexts < 0) num_user_contexts = - cpumask_weight(&dd->affinity->real_cpu_mask); + cpumask_weight(&node_affinity.real_cpu_mask); total_contexts = num_kernel_contexts + num_user_contexts; @@ -14141,6 +14232,11 @@ static int init_asic_data(struct hfi1_devdata *dd) } dd->asic_data->dds[dd->hfi1_id] = dd; /* self back-pointer */ spin_unlock_irqrestore(&hfi1_devs_lock, flags); + + /* first one through - set up i2c devices */ + if (!peer) + ret = set_up_i2c(dd, dd->asic_data); + return ret; } @@ -14445,19 +14541,6 @@ struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev, (dd->revision >> CCE_REVISION_SW_SHIFT) & CCE_REVISION_SW_MASK); - /* - * The real cpu mask is part of the affinity struct but has to be - * initialized earlier than the rest of the affinity struct because it - * is needed to calculate the number of user contexts in - * set_up_context_variables(). However, hfi1_dev_affinity_init(), - * which initializes the rest of the affinity struct members, - * depends on set_up_context_variables() for the number of kernel - * contexts, so it cannot be called before set_up_context_variables(). - */ - ret = init_real_cpu_mask(dd); - if (ret) - goto bail_cleanup; - ret = set_up_context_variables(dd); if (ret) goto bail_cleanup; @@ -14471,7 +14554,9 @@ struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev, /* set up KDETH QP prefix in both RX and TX CSRs */ init_kdeth_qp(dd); - hfi1_dev_affinity_init(dd); + ret = hfi1_dev_affinity_init(dd); + if (ret) + goto bail_cleanup; /* send contexts must be set up before receive contexts */ ret = init_send_contexts(dd); @@ -14508,8 +14593,14 @@ struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev, /* set up LCB access - must be after set_up_interrupts() */ init_lcb_access(dd); + /* + * Serial number is created from the base guid: + * [27:24] = base guid [38:35] + * [23: 0] = base guid [23: 0] + */ snprintf(dd->serial, SERIAL_MAX, "0x%08llx\n", - dd->base_guid & 0xFFFFFF); + (dd->base_guid & 0xFFFFFF) | + ((dd->base_guid >> 11) & 0xF000000)); dd->oui1 = dd->base_guid >> 56 & 0xFF; dd->oui2 = dd->base_guid >> 48 & 0xFF; @@ -14518,7 +14609,6 @@ struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev, ret = load_firmware(dd); /* asymmetric with dispose_firmware() */ if (ret) goto bail_clear_intr; - check_fabric_firmware_versions(dd); thermal_init(dd); diff --git a/drivers/infiniband/hw/hfi1/chip.h b/drivers/infiniband/hw/hfi1/chip.h index 66a327978739..ed11107c50fe 100644 --- a/drivers/infiniband/hw/hfi1/chip.h +++ b/drivers/infiniband/hw/hfi1/chip.h @@ -640,6 +640,7 @@ extern uint platform_config_load; /* SBus commands */ #define RESET_SBUS_RECEIVER 0x20 #define WRITE_SBUS_RECEIVER 0x21 +#define READ_SBUS_RECEIVER 0x22 void sbus_request(struct hfi1_devdata *dd, u8 receiver_addr, u8 data_addr, u8 command, u32 data_in); int sbus_request_slow(struct hfi1_devdata *dd, @@ -1336,10 +1337,6 @@ void hfi1_start_cleanup(struct hfi1_devdata *dd); void hfi1_clear_tids(struct hfi1_ctxtdata *rcd); struct hfi1_message_header *hfi1_get_msgheader( struct hfi1_devdata *dd, __le32 *rhf_addr); -int hfi1_get_base_kinfo(struct hfi1_ctxtdata *rcd, - struct hfi1_ctxt_info *kinfo); -u64 hfi1_gpio_mod(struct hfi1_devdata *dd, u32 target, u32 data, u32 dir, - u32 mask); int hfi1_init_ctxt(struct send_context *sc); void hfi1_put_tid(struct hfi1_devdata *dd, u32 index, u32 type, unsigned long pa, u16 order); diff --git a/drivers/infiniband/hw/hfi1/chip_registers.h b/drivers/infiniband/hw/hfi1/chip_registers.h index 8744de6667c2..5b9993899789 100644 --- a/drivers/infiniband/hw/hfi1/chip_registers.h +++ b/drivers/infiniband/hw/hfi1/chip_registers.h @@ -471,6 +471,10 @@ #define ASIC_STS_SBUS_RESULT (ASIC + 0x000000000010) #define ASIC_STS_SBUS_RESULT_DONE_SMASK 0x1ull #define ASIC_STS_SBUS_RESULT_RCV_DATA_VALID_SMASK 0x2ull +#define ASIC_STS_SBUS_RESULT_RESULT_CODE_SHIFT 2 +#define ASIC_STS_SBUS_RESULT_RESULT_CODE_MASK 0x7ull +#define ASIC_STS_SBUS_RESULT_DATA_OUT_SHIFT 32 +#define ASIC_STS_SBUS_RESULT_DATA_OUT_MASK 0xFFFFFFFFull #define ASIC_STS_THERM (ASIC + 0x000000000058) #define ASIC_STS_THERM_CRIT_TEMP_MASK 0x7FFull #define ASIC_STS_THERM_CRIT_TEMP_SHIFT 18 diff --git a/drivers/infiniband/hw/hfi1/driver.c b/drivers/infiniband/hw/hfi1/driver.c index c75b0ae688f8..8246dc7d0573 100644 --- a/drivers/infiniband/hw/hfi1/driver.c +++ b/drivers/infiniband/hw/hfi1/driver.c @@ -392,9 +392,7 @@ static void rcv_hdrerr(struct hfi1_ctxtdata *rcd, struct hfi1_pportdata *ppd, u16 rlid; u8 svc_type, sl, sc5; - sc5 = (be16_to_cpu(rhdr->lrh[0]) >> 12) & 0xf; - if (rhf_dc_info(packet->rhf)) - sc5 |= 0x10; + sc5 = hdr2sc(rhdr, packet->rhf); sl = ibp->sc_to_sl[sc5]; lqpn = be32_to_cpu(bth[1]) & RVT_QPN_MASK; @@ -450,14 +448,20 @@ static inline void init_packet(struct hfi1_ctxtdata *rcd, packet->rcv_flags = 0; } -static void process_ecn(struct rvt_qp *qp, struct hfi1_ib_header *hdr, - struct hfi1_other_headers *ohdr, - u64 rhf, u32 bth1, struct ib_grh *grh) +void hfi1_process_ecn_slowpath(struct rvt_qp *qp, struct hfi1_packet *pkt, + bool do_cnp) { struct hfi1_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num); - u32 rqpn = 0; - u16 rlid; - u8 sc5, svc_type; + struct hfi1_ib_header *hdr = pkt->hdr; + struct hfi1_other_headers *ohdr = pkt->ohdr; + struct ib_grh *grh = NULL; + u32 rqpn = 0, bth1; + u16 rlid, dlid = be16_to_cpu(hdr->lrh[1]); + u8 sc, svc_type; + bool is_mcast = false; + + if (pkt->rcv_flags & HFI1_HAS_GRH) + grh = &hdr->u.l.grh; switch (qp->ibqp.qp_type) { case IB_QPT_SMI: @@ -466,6 +470,8 @@ static void process_ecn(struct rvt_qp *qp, struct hfi1_ib_header *hdr, rlid = be16_to_cpu(hdr->lrh[3]); rqpn = be32_to_cpu(ohdr->u.ud.deth[1]) & RVT_QPN_MASK; svc_type = IB_CC_SVCTYPE_UD; + is_mcast = (dlid > be16_to_cpu(IB_MULTICAST_LID_BASE)) && + (dlid != be16_to_cpu(IB_LID_PERMISSIVE)); break; case IB_QPT_UC: rlid = qp->remote_ah_attr.dlid; @@ -481,24 +487,23 @@ static void process_ecn(struct rvt_qp *qp, struct hfi1_ib_header *hdr, return; } - sc5 = (be16_to_cpu(hdr->lrh[0]) >> 12) & 0xf; - if (rhf_dc_info(rhf)) - sc5 |= 0x10; + sc = hdr2sc((struct hfi1_message_header *)hdr, pkt->rhf); - if (bth1 & HFI1_FECN_SMASK) { + bth1 = be32_to_cpu(ohdr->bth[1]); + if (do_cnp && (bth1 & HFI1_FECN_SMASK)) { u16 pkey = (u16)be32_to_cpu(ohdr->bth[0]); - u16 dlid = be16_to_cpu(hdr->lrh[1]); - return_cnp(ibp, qp, rqpn, pkey, dlid, rlid, sc5, grh); + return_cnp(ibp, qp, rqpn, pkey, dlid, rlid, sc, grh); } - if (bth1 & HFI1_BECN_SMASK) { + if (!is_mcast && (bth1 & HFI1_BECN_SMASK)) { struct hfi1_pportdata *ppd = ppd_from_ibp(ibp); u32 lqpn = bth1 & RVT_QPN_MASK; - u8 sl = ibp->sc_to_sl[sc5]; + u8 sl = ibp->sc_to_sl[sc]; process_becn(ppd, sl, rlid, lqpn, rqpn, svc_type); } + } struct ps_mdata { @@ -596,7 +601,6 @@ static void __prescan_rxq(struct hfi1_packet *packet) struct rvt_qp *qp; struct hfi1_ib_header *hdr; struct hfi1_other_headers *ohdr; - struct ib_grh *grh = NULL; struct rvt_dev_info *rdi = &dd->verbs_dev.rdi; u64 rhf = rhf_to_cpu(rhf_addr); u32 etype = rhf_rcv_type(rhf), qpn, bth1; @@ -616,14 +620,13 @@ static void __prescan_rxq(struct hfi1_packet *packet) hfi1_get_msgheader(dd, rhf_addr); lnh = be16_to_cpu(hdr->lrh[0]) & 3; - if (lnh == HFI1_LRH_BTH) { + if (lnh == HFI1_LRH_BTH) ohdr = &hdr->u.oth; - } else if (lnh == HFI1_LRH_GRH) { + else if (lnh == HFI1_LRH_GRH) ohdr = &hdr->u.l.oth; - grh = &hdr->u.l.grh; - } else { + else goto next; /* just in case */ - } + bth1 = be32_to_cpu(ohdr->bth[1]); is_ecn = !!(bth1 & (HFI1_FECN_SMASK | HFI1_BECN_SMASK)); @@ -639,7 +642,7 @@ static void __prescan_rxq(struct hfi1_packet *packet) goto next; } - process_ecn(qp, hdr, ohdr, rhf, bth1, grh); + process_ecn(qp, packet, true); rcu_read_unlock(); /* turn off BECN, FECN */ @@ -1362,6 +1365,7 @@ int process_receive_bypass(struct hfi1_packet *packet) dd_dev_err(packet->rcd->dd, "Bypass packets are not supported in normal operation. Dropping\n"); + incr_cntr64(&packet->rcd->dd->sw_rcv_bypass_packet_errors); return RHF_RCV_CONTINUE; } diff --git a/drivers/infiniband/hw/hfi1/file_ops.c b/drivers/infiniband/hw/hfi1/file_ops.c index c702a009608f..1ecbec192358 100644 --- a/drivers/infiniband/hw/hfi1/file_ops.c +++ b/drivers/infiniband/hw/hfi1/file_ops.c @@ -168,6 +168,7 @@ static inline int is_valid_mmap(u64 token) static int hfi1_file_open(struct inode *inode, struct file *fp) { + struct hfi1_filedata *fd; struct hfi1_devdata *dd = container_of(inode->i_cdev, struct hfi1_devdata, user_cdev); @@ -176,10 +177,17 @@ static int hfi1_file_open(struct inode *inode, struct file *fp) kobject_get(&dd->kobj); /* The real work is performed later in assign_ctxt() */ - fp->private_data = kzalloc(sizeof(struct hfi1_filedata), GFP_KERNEL); - if (fp->private_data) /* no cpu affinity by default */ - ((struct hfi1_filedata *)fp->private_data)->rec_cpu_num = -1; - return fp->private_data ? 0 : -ENOMEM; + + fd = kzalloc(sizeof(*fd), GFP_KERNEL); + + if (fd) { + fd->rec_cpu_num = -1; /* no cpu affinity by default */ + fd->mm = current->mm; + } + + fp->private_data = fd; + + return fd ? 0 : -ENOMEM; } static long hfi1_file_ioctl(struct file *fp, unsigned int cmd, @@ -228,7 +236,7 @@ static long hfi1_file_ioctl(struct file *fp, unsigned int cmd, sizeof(struct hfi1_base_info)); break; case HFI1_IOCTL_CREDIT_UPD: - if (uctxt && uctxt->sc) + if (uctxt) sc_return_credits(uctxt->sc); break; @@ -392,41 +400,38 @@ static ssize_t hfi1_write_iter(struct kiocb *kiocb, struct iov_iter *from) struct hfi1_filedata *fd = kiocb->ki_filp->private_data; struct hfi1_user_sdma_pkt_q *pq = fd->pq; struct hfi1_user_sdma_comp_q *cq = fd->cq; - int ret = 0, done = 0, reqs = 0; + int done = 0, reqs = 0; unsigned long dim = from->nr_segs; - if (!cq || !pq) { - ret = -EIO; - goto done; - } + if (!cq || !pq) + return -EIO; - if (!iter_is_iovec(from) || !dim) { - ret = -EINVAL; - goto done; - } + if (!iter_is_iovec(from) || !dim) + return -EINVAL; hfi1_cdbg(SDMA, "SDMA request from %u:%u (%lu)", fd->uctxt->ctxt, fd->subctxt, dim); - if (atomic_read(&pq->n_reqs) == pq->n_max_reqs) { - ret = -ENOSPC; - goto done; - } + if (atomic_read(&pq->n_reqs) == pq->n_max_reqs) + return -ENOSPC; while (dim) { + int ret; unsigned long count = 0; ret = hfi1_user_sdma_process_request( kiocb->ki_filp, (struct iovec *)(from->iov + done), dim, &count); - if (ret) - goto done; + if (ret) { + reqs = ret; + break; + } dim -= count; done += count; reqs++; } -done: - return ret ? ret : reqs; + + return reqs; } static int hfi1_file_mmap(struct file *fp, struct vm_area_struct *vma) @@ -718,7 +723,7 @@ static int hfi1_file_close(struct inode *inode, struct file *fp) hfi1_user_sdma_free_queues(fdata); /* release the cpu */ - hfi1_put_proc_affinity(dd, fdata->rec_cpu_num); + hfi1_put_proc_affinity(fdata->rec_cpu_num); /* * Clear any left over, unhandled events so the next process that @@ -730,7 +735,6 @@ static int hfi1_file_close(struct inode *inode, struct file *fp) if (--uctxt->cnt) { uctxt->active_slaves &= ~(1 << fdata->subctxt); - uctxt->subpid[fdata->subctxt] = 0; mutex_unlock(&hfi1_mutex); goto done; } @@ -756,7 +760,6 @@ static int hfi1_file_close(struct inode *inode, struct file *fp) write_kctxt_csr(dd, uctxt->sc->hw_context, SEND_CTXT_CHECK_ENABLE, hfi1_pkt_default_send_ctxt_mask(dd, uctxt->sc->type)); sc_disable(uctxt->sc); - uctxt->pid = 0; spin_unlock_irqrestore(&dd->uctxt_lock, flags); dd->rcd[uctxt->ctxt] = NULL; @@ -818,9 +821,10 @@ static int assign_ctxt(struct file *fp, struct hfi1_user_info *uinfo) ret = find_shared_ctxt(fp, uinfo); if (ret < 0) goto done_unlock; - if (ret) - fd->rec_cpu_num = hfi1_get_proc_affinity( - fd->uctxt->dd, fd->uctxt->numa_id); + if (ret) { + fd->rec_cpu_num = + hfi1_get_proc_affinity(fd->uctxt->numa_id); + } } /* @@ -895,7 +899,6 @@ static int find_shared_ctxt(struct file *fp, } fd->uctxt = uctxt; fd->subctxt = uctxt->cnt++; - uctxt->subpid[fd->subctxt] = current->pid; uctxt->active_slaves |= 1 << fd->subctxt; ret = 1; goto done; @@ -932,7 +935,11 @@ static int allocate_ctxt(struct file *fp, struct hfi1_devdata *dd, if (ctxt == dd->num_rcv_contexts) return -EBUSY; - fd->rec_cpu_num = hfi1_get_proc_affinity(dd, -1); + /* + * If we don't have a NUMA node requested, preference is towards + * device NUMA node. + */ + fd->rec_cpu_num = hfi1_get_proc_affinity(dd->node); if (fd->rec_cpu_num != -1) numa = cpu_to_node(fd->rec_cpu_num); else @@ -976,8 +983,7 @@ static int allocate_ctxt(struct file *fp, struct hfi1_devdata *dd, return ret; } uctxt->userversion = uinfo->userversion; - uctxt->pid = current->pid; - uctxt->flags = HFI1_CAP_UGET(MASK); + uctxt->flags = hfi1_cap_mask; /* save current flag state */ init_waitqueue_head(&uctxt->wait); strlcpy(uctxt->comm, current->comm, sizeof(uctxt->comm)); memcpy(uctxt->uuid, uinfo->uuid, sizeof(uctxt->uuid)); @@ -1080,18 +1086,18 @@ static int user_init(struct file *fp) hfi1_set_ctxt_jkey(uctxt->dd, uctxt->ctxt, uctxt->jkey); rcvctrl_ops = HFI1_RCVCTRL_CTXT_ENB; - if (HFI1_CAP_KGET_MASK(uctxt->flags, HDRSUPP)) + if (HFI1_CAP_UGET_MASK(uctxt->flags, HDRSUPP)) rcvctrl_ops |= HFI1_RCVCTRL_TIDFLOW_ENB; /* * Ignore the bit in the flags for now until proper * support for multiple packet per rcv array entry is * added. */ - if (!HFI1_CAP_KGET_MASK(uctxt->flags, MULTI_PKT_EGR)) + if (!HFI1_CAP_UGET_MASK(uctxt->flags, MULTI_PKT_EGR)) rcvctrl_ops |= HFI1_RCVCTRL_ONE_PKT_EGR_ENB; - if (HFI1_CAP_KGET_MASK(uctxt->flags, NODROP_EGR_FULL)) + if (HFI1_CAP_UGET_MASK(uctxt->flags, NODROP_EGR_FULL)) rcvctrl_ops |= HFI1_RCVCTRL_NO_EGR_DROP_ENB; - if (HFI1_CAP_KGET_MASK(uctxt->flags, NODROP_RHQ_FULL)) + if (HFI1_CAP_UGET_MASK(uctxt->flags, NODROP_RHQ_FULL)) rcvctrl_ops |= HFI1_RCVCTRL_NO_RHQ_DROP_ENB; /* * The RcvCtxtCtrl.TailUpd bit has to be explicitly written. @@ -1099,7 +1105,7 @@ static int user_init(struct file *fp) * uses of the chip or ctxt. Therefore, add the rcvctrl op * for both cases. */ - if (HFI1_CAP_KGET_MASK(uctxt->flags, DMA_RTAIL)) + if (HFI1_CAP_UGET_MASK(uctxt->flags, DMA_RTAIL)) rcvctrl_ops |= HFI1_RCVCTRL_TAILUPD_ENB; else rcvctrl_ops |= HFI1_RCVCTRL_TAILUPD_DIS; @@ -1122,9 +1128,14 @@ static int get_ctxt_info(struct file *fp, void __user *ubase, __u32 len) int ret = 0; memset(&cinfo, 0, sizeof(cinfo)); - ret = hfi1_get_base_kinfo(uctxt, &cinfo); - if (ret < 0) - goto done; + cinfo.runtime_flags = (((uctxt->flags >> HFI1_CAP_MISC_SHIFT) & + HFI1_CAP_MISC_MASK) << HFI1_CAP_USER_SHIFT) | + HFI1_CAP_UGET_MASK(uctxt->flags, MASK) | + HFI1_CAP_KGET_MASK(uctxt->flags, K2U); + /* adjust flag if this fd is not able to cache */ + if (!fd->handler) + cinfo.runtime_flags |= HFI1_CAP_TID_UNMAP; /* no caching */ + cinfo.num_active = hfi1_count_active_units(); cinfo.unit = uctxt->dd->unit; cinfo.ctxt = uctxt->ctxt; @@ -1146,7 +1157,7 @@ static int get_ctxt_info(struct file *fp, void __user *ubase, __u32 len) trace_hfi1_ctxt_info(uctxt->dd, uctxt->ctxt, fd->subctxt, cinfo); if (copy_to_user(ubase, &cinfo, sizeof(cinfo))) ret = -EFAULT; -done: + return ret; } diff --git a/drivers/infiniband/hw/hfi1/firmware.c b/drivers/infiniband/hw/hfi1/firmware.c index ed680fda611d..13db8eb4f4ec 100644 --- a/drivers/infiniband/hw/hfi1/firmware.c +++ b/drivers/infiniband/hw/hfi1/firmware.c @@ -206,6 +206,9 @@ static const struct firmware *platform_config; /* the number of fabric SerDes on the SBus */ #define NUM_FABRIC_SERDES 4 +/* ASIC_STS_SBUS_RESULT.RESULT_CODE value */ +#define SBUS_READ_COMPLETE 0x4 + /* SBus fabric SerDes addresses, one set per HFI */ static const u8 fabric_serdes_addrs[2][NUM_FABRIC_SERDES] = { { 0x01, 0x02, 0x03, 0x04 }, @@ -240,6 +243,7 @@ static const u8 all_pcie_serdes_broadcast = 0xe0; static void dispose_one_firmware(struct firmware_details *fdet); static int load_fabric_serdes_firmware(struct hfi1_devdata *dd, struct firmware_details *fdet); +static void dump_fw_version(struct hfi1_devdata *dd); /* * Read a single 64-bit value from 8051 data memory. @@ -1079,6 +1083,44 @@ void sbus_request(struct hfi1_devdata *dd, } /* + * Read a value from the SBus. + * + * Requires the caller to be in fast mode + */ +static u32 sbus_read(struct hfi1_devdata *dd, u8 receiver_addr, u8 data_addr, + u32 data_in) +{ + u64 reg; + int retries; + int success = 0; + u32 result = 0; + u32 result_code = 0; + + sbus_request(dd, receiver_addr, data_addr, READ_SBUS_RECEIVER, data_in); + + for (retries = 0; retries < 100; retries++) { + usleep_range(1000, 1200); /* arbitrary */ + reg = read_csr(dd, ASIC_STS_SBUS_RESULT); + result_code = (reg >> ASIC_STS_SBUS_RESULT_RESULT_CODE_SHIFT) + & ASIC_STS_SBUS_RESULT_RESULT_CODE_MASK; + if (result_code != SBUS_READ_COMPLETE) + continue; + + success = 1; + result = (reg >> ASIC_STS_SBUS_RESULT_DATA_OUT_SHIFT) + & ASIC_STS_SBUS_RESULT_DATA_OUT_MASK; + break; + } + + if (!success) { + dd_dev_err(dd, "%s: read failed, result code 0x%x\n", __func__, + result_code); + } + + return result; +} + +/* * Turn off the SBus and fabric serdes spicos. * * + Must be called with Sbus fast mode turned on. @@ -1636,6 +1678,7 @@ int load_firmware(struct hfi1_devdata *dd) return ret; } + dump_fw_version(dd); return 0; } @@ -2054,3 +2097,85 @@ void read_guid(struct hfi1_devdata *dd) dd_dev_info(dd, "GUID %llx", (unsigned long long)dd->base_guid); } + +/* read and display firmware version info */ +static void dump_fw_version(struct hfi1_devdata *dd) +{ + u32 pcie_vers[NUM_PCIE_SERDES]; + u32 fabric_vers[NUM_FABRIC_SERDES]; + u32 sbus_vers; + int i; + int all_same; + int ret; + u8 rcv_addr; + + ret = acquire_chip_resource(dd, CR_SBUS, SBUS_TIMEOUT); + if (ret) { + dd_dev_err(dd, "Unable to acquire SBus to read firmware versions\n"); + return; + } + + /* set fast mode */ + set_sbus_fast_mode(dd); + + /* read version for SBus Master */ + sbus_request(dd, SBUS_MASTER_BROADCAST, 0x02, WRITE_SBUS_RECEIVER, 0); + sbus_request(dd, SBUS_MASTER_BROADCAST, 0x07, WRITE_SBUS_RECEIVER, 0x1); + /* wait for interrupt to be processed */ + usleep_range(10000, 11000); + sbus_vers = sbus_read(dd, SBUS_MASTER_BROADCAST, 0x08, 0x1); + dd_dev_info(dd, "SBus Master firmware version 0x%08x\n", sbus_vers); + + /* read version for PCIe SerDes */ + all_same = 1; + pcie_vers[0] = 0; + for (i = 0; i < NUM_PCIE_SERDES; i++) { + rcv_addr = pcie_serdes_addrs[dd->hfi1_id][i]; + sbus_request(dd, rcv_addr, 0x03, WRITE_SBUS_RECEIVER, 0); + /* wait for interrupt to be processed */ + usleep_range(10000, 11000); + pcie_vers[i] = sbus_read(dd, rcv_addr, 0x04, 0x0); + if (i > 0 && pcie_vers[0] != pcie_vers[i]) + all_same = 0; + } + + if (all_same) { + dd_dev_info(dd, "PCIe SerDes firmware version 0x%x\n", + pcie_vers[0]); + } else { + dd_dev_warn(dd, "PCIe SerDes do not have the same firmware version\n"); + for (i = 0; i < NUM_PCIE_SERDES; i++) { + dd_dev_info(dd, + "PCIe SerDes lane %d firmware version 0x%x\n", + i, pcie_vers[i]); + } + } + + /* read version for fabric SerDes */ + all_same = 1; + fabric_vers[0] = 0; + for (i = 0; i < NUM_FABRIC_SERDES; i++) { + rcv_addr = fabric_serdes_addrs[dd->hfi1_id][i]; + sbus_request(dd, rcv_addr, 0x03, WRITE_SBUS_RECEIVER, 0); + /* wait for interrupt to be processed */ + usleep_range(10000, 11000); + fabric_vers[i] = sbus_read(dd, rcv_addr, 0x04, 0x0); + if (i > 0 && fabric_vers[0] != fabric_vers[i]) + all_same = 0; + } + + if (all_same) { + dd_dev_info(dd, "Fabric SerDes firmware version 0x%x\n", + fabric_vers[0]); + } else { + dd_dev_warn(dd, "Fabric SerDes do not have the same firmware version\n"); + for (i = 0; i < NUM_FABRIC_SERDES; i++) { + dd_dev_info(dd, + "Fabric SerDes lane %d firmware version 0x%x\n", + i, fabric_vers[i]); + } + } + + clear_sbus_fast_mode(dd); + release_chip_resource(dd, CR_SBUS); +} diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h index 4417a0fd3ef9..1000e0fd96d9 100644 --- a/drivers/infiniband/hw/hfi1/hfi.h +++ b/drivers/infiniband/hw/hfi1/hfi.h @@ -62,6 +62,8 @@ #include <linux/cdev.h> #include <linux/delay.h> #include <linux/kthread.h> +#include <linux/i2c.h> +#include <linux/i2c-algo-bit.h> #include <rdma/rdma_vt.h> #include "chip_registers.h" @@ -253,7 +255,7 @@ struct hfi1_ctxtdata { /* chip offset of PIO buffers for this ctxt */ u32 piobufs; /* per-context configuration flags */ - u32 flags; + unsigned long flags; /* per-context event flags for fileops/intr communication */ unsigned long event_flags; /* WAIT_RCV that timed out, no interrupt */ @@ -268,9 +270,6 @@ struct hfi1_ctxtdata { u32 urgent; /* saved total number of polled urgent packets for poll edge trigger */ u32 urgent_poll; - /* pid of process using this ctxt */ - pid_t pid; - pid_t subpid[HFI1_MAX_SHARED_CTXTS]; /* same size as task_struct .comm[], command that opened context */ char comm[TASK_COMM_LEN]; /* so file ops can get at unit */ @@ -366,11 +365,6 @@ struct hfi1_packet { u8 etype; }; -static inline bool has_sc4_bit(struct hfi1_packet *p) -{ - return !!rhf_dc_info(p->rhf); -} - /* * Private data for snoop/capture support. */ @@ -805,10 +799,19 @@ struct hfi1_temp { u8 triggers; /* temperature triggers */ }; +struct hfi1_i2c_bus { + struct hfi1_devdata *controlling_dd; /* current controlling device */ + struct i2c_adapter adapter; /* bus details */ + struct i2c_algo_bit_data algo; /* bus algorithm details */ + int num; /* bus number, 0 or 1 */ +}; + /* common data between shared ASIC HFIs */ struct hfi1_asic_data { struct hfi1_devdata *dds[2]; /* back pointers */ struct mutex asic_resource_mutex; + struct hfi1_i2c_bus *i2c_bus0; + struct hfi1_i2c_bus *i2c_bus1; }; /* device data struct now contains only "general per-device" info. @@ -1128,7 +1131,8 @@ struct hfi1_devdata { NUM_SEND_DMA_ENG_ERR_STATUS_COUNTERS]; /* Software counter that aggregates all cce_err_status errors */ u64 sw_cce_err_status_aggregate; - + /* Software counter that aggregates all bypass packet rcv errors */ + u64 sw_rcv_bypass_packet_errors; /* receive interrupt functions */ rhf_rcv_function_ptr *rhf_rcv_function_map; rhf_rcv_function_ptr normal_rhf_rcv_functions[8]; @@ -1174,6 +1178,8 @@ struct hfi1_devdata { /* 8051 firmware version helper */ #define dc8051_ver(a, b) ((a) << 8 | (b)) +#define dc8051_ver_maj(a) ((a & 0xff00) >> 8) +#define dc8051_ver_min(a) (a & 0x00ff) /* f_put_tid types */ #define PT_EXPECTED 0 @@ -1182,6 +1188,7 @@ struct hfi1_devdata { struct tid_rb_node; struct mmu_rb_node; +struct mmu_rb_handler; /* Private data for file operations */ struct hfi1_filedata { @@ -1192,7 +1199,7 @@ struct hfi1_filedata { /* for cpu affinity; -1 if none */ int rec_cpu_num; u32 tid_n_pinned; - struct rb_root tid_rb_root; + struct mmu_rb_handler *handler; struct tid_rb_node **entry_to_rb; spinlock_t tid_lock; /* protect tid_[limit,used] counters */ u32 tid_limit; @@ -1201,6 +1208,7 @@ struct hfi1_filedata { u32 invalid_tid_idx; /* protect invalid_tids array and invalid_tid_idx */ spinlock_t invalid_lock; + struct mm_struct *mm; }; extern struct list_head hfi1_dev_list; @@ -1234,6 +1242,8 @@ int handle_receive_interrupt_nodma_rtail(struct hfi1_ctxtdata *, int); int handle_receive_interrupt_dma_rtail(struct hfi1_ctxtdata *, int); void set_all_slowpath(struct hfi1_devdata *dd); +extern const struct pci_device_id hfi1_pci_tbl[]; + /* receive packet handler dispositions */ #define RCV_PKT_OK 0x0 /* keep going */ #define RCV_PKT_LIMIT 0x1 /* stop, hit limit, start thread */ @@ -1259,7 +1269,7 @@ void receive_interrupt_work(struct work_struct *work); static inline int hdr2sc(struct hfi1_message_header *hdr, u64 rhf) { return ((be16_to_cpu(hdr->lrh[0]) >> 12) & 0xf) | - ((!!(rhf & RHF_DC_INFO_SMASK)) << 4); + ((!!(rhf_dc_info(rhf))) << 4); } static inline u16 generate_jkey(kuid_t uid) @@ -1569,6 +1579,22 @@ static inline struct hfi1_ibport *to_iport(struct ib_device *ibdev, u8 port) return &dd->pport[pidx].ibport_data; } +void hfi1_process_ecn_slowpath(struct rvt_qp *qp, struct hfi1_packet *pkt, + bool do_cnp); +static inline bool process_ecn(struct rvt_qp *qp, struct hfi1_packet *pkt, + bool do_cnp) +{ + struct hfi1_other_headers *ohdr = pkt->ohdr; + u32 bth1; + + bth1 = be32_to_cpu(ohdr->bth[1]); + if (unlikely(bth1 & (HFI1_BECN_SMASK | HFI1_FECN_SMASK))) { + hfi1_process_ecn_slowpath(qp, pkt, do_cnp); + return bth1 & HFI1_FECN_SMASK; + } + return false; +} + /* * Return the indexed PKEY from the port PKEY table. */ @@ -1586,8 +1612,7 @@ static inline u16 hfi1_get_pkey(struct hfi1_ibport *ibp, unsigned index) } /* - * Readers of cc_state must call get_cc_state() under rcu_read_lock(). - * Writers of cc_state must call get_cc_state() under cc_state_lock. + * Called by readers of cc_state only, must call under rcu_read_lock(). */ static inline struct cc_state *get_cc_state(struct hfi1_pportdata *ppd) { @@ -1595,6 +1620,16 @@ static inline struct cc_state *get_cc_state(struct hfi1_pportdata *ppd) } /* + * Called by writers of cc_state only, must call under cc_state_lock. + */ +static inline +struct cc_state *get_cc_state_protected(struct hfi1_pportdata *ppd) +{ + return rcu_dereference_protected(ppd->cc_state, + lockdep_is_held(&ppd->cc_state_lock)); +} + +/* * values for dd->flags (_device_ related flags) */ #define HFI1_INITTED 0x1 /* chip and driver up and initted */ @@ -1669,9 +1704,12 @@ void shutdown_led_override(struct hfi1_pportdata *ppd); */ #define DEFAULT_RCVHDR_ENTSIZE 32 -bool hfi1_can_pin_pages(struct hfi1_devdata *, u32, u32); -int hfi1_acquire_user_pages(unsigned long, size_t, bool, struct page **); -void hfi1_release_user_pages(struct mm_struct *, struct page **, size_t, bool); +bool hfi1_can_pin_pages(struct hfi1_devdata *dd, struct mm_struct *mm, + u32 nlocked, u32 npages); +int hfi1_acquire_user_pages(struct mm_struct *mm, unsigned long vaddr, + size_t npages, bool writable, struct page **pages); +void hfi1_release_user_pages(struct mm_struct *mm, struct page **p, + size_t npages, bool dirty); static inline void clear_rcvhdrtail(const struct hfi1_ctxtdata *rcd) { @@ -1947,4 +1985,55 @@ static inline u32 qsfp_resource(struct hfi1_devdata *dd) int hfi1_tempsense_rd(struct hfi1_devdata *dd, struct hfi1_temp *temp); +#define DD_DEV_ENTRY(dd) __string(dev, dev_name(&(dd)->pcidev->dev)) +#define DD_DEV_ASSIGN(dd) __assign_str(dev, dev_name(&(dd)->pcidev->dev)) + +#define packettype_name(etype) { RHF_RCV_TYPE_##etype, #etype } +#define show_packettype(etype) \ +__print_symbolic(etype, \ + packettype_name(EXPECTED), \ + packettype_name(EAGER), \ + packettype_name(IB), \ + packettype_name(ERROR), \ + packettype_name(BYPASS)) + +#define ib_opcode_name(opcode) { IB_OPCODE_##opcode, #opcode } +#define show_ib_opcode(opcode) \ +__print_symbolic(opcode, \ + ib_opcode_name(RC_SEND_FIRST), \ + ib_opcode_name(RC_SEND_MIDDLE), \ + ib_opcode_name(RC_SEND_LAST), \ + ib_opcode_name(RC_SEND_LAST_WITH_IMMEDIATE), \ + ib_opcode_name(RC_SEND_ONLY), \ + ib_opcode_name(RC_SEND_ONLY_WITH_IMMEDIATE), \ + ib_opcode_name(RC_RDMA_WRITE_FIRST), \ + ib_opcode_name(RC_RDMA_WRITE_MIDDLE), \ + ib_opcode_name(RC_RDMA_WRITE_LAST), \ + ib_opcode_name(RC_RDMA_WRITE_LAST_WITH_IMMEDIATE), \ + ib_opcode_name(RC_RDMA_WRITE_ONLY), \ + ib_opcode_name(RC_RDMA_WRITE_ONLY_WITH_IMMEDIATE), \ + ib_opcode_name(RC_RDMA_READ_REQUEST), \ + ib_opcode_name(RC_RDMA_READ_RESPONSE_FIRST), \ + ib_opcode_name(RC_RDMA_READ_RESPONSE_MIDDLE), \ + ib_opcode_name(RC_RDMA_READ_RESPONSE_LAST), \ + ib_opcode_name(RC_RDMA_READ_RESPONSE_ONLY), \ + ib_opcode_name(RC_ACKNOWLEDGE), \ + ib_opcode_name(RC_ATOMIC_ACKNOWLEDGE), \ + ib_opcode_name(RC_COMPARE_SWAP), \ + ib_opcode_name(RC_FETCH_ADD), \ + ib_opcode_name(UC_SEND_FIRST), \ + ib_opcode_name(UC_SEND_MIDDLE), \ + ib_opcode_name(UC_SEND_LAST), \ + ib_opcode_name(UC_SEND_LAST_WITH_IMMEDIATE), \ + ib_opcode_name(UC_SEND_ONLY), \ + ib_opcode_name(UC_SEND_ONLY_WITH_IMMEDIATE), \ + ib_opcode_name(UC_RDMA_WRITE_FIRST), \ + ib_opcode_name(UC_RDMA_WRITE_MIDDLE), \ + ib_opcode_name(UC_RDMA_WRITE_LAST), \ + ib_opcode_name(UC_RDMA_WRITE_LAST_WITH_IMMEDIATE), \ + ib_opcode_name(UC_RDMA_WRITE_ONLY), \ + ib_opcode_name(UC_RDMA_WRITE_ONLY_WITH_IMMEDIATE), \ + ib_opcode_name(UD_SEND_ONLY), \ + ib_opcode_name(UD_SEND_ONLY_WITH_IMMEDIATE), \ + ib_opcode_name(CNP)) #endif /* _HFI1_KERNEL_H */ diff --git a/drivers/infiniband/hw/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c index eed971ccd2a1..a358d23ecd54 100644 --- a/drivers/infiniband/hw/hfi1/init.c +++ b/drivers/infiniband/hw/hfi1/init.c @@ -64,6 +64,7 @@ #include "debugfs.h" #include "verbs.h" #include "aspm.h" +#include "affinity.h" #undef pr_fmt #define pr_fmt(fmt) DRIVER_NAME ": " fmt @@ -474,8 +475,9 @@ static enum hrtimer_restart cca_timer_fn(struct hrtimer *t) void hfi1_init_pportdata(struct pci_dev *pdev, struct hfi1_pportdata *ppd, struct hfi1_devdata *dd, u8 hw_pidx, u8 port) { - int i, size; + int i; uint default_pkey_idx; + struct cc_state *cc_state; ppd->dd = dd; ppd->hw_pidx = hw_pidx; @@ -526,9 +528,9 @@ void hfi1_init_pportdata(struct pci_dev *pdev, struct hfi1_pportdata *ppd, spin_lock_init(&ppd->cc_state_lock); spin_lock_init(&ppd->cc_log_lock); - size = sizeof(struct cc_state); - RCU_INIT_POINTER(ppd->cc_state, kzalloc(size, GFP_KERNEL)); - if (!rcu_dereference(ppd->cc_state)) + cc_state = kzalloc(sizeof(*cc_state), GFP_KERNEL); + RCU_INIT_POINTER(ppd->cc_state, cc_state); + if (!cc_state) goto bail; return; @@ -972,39 +974,49 @@ void hfi1_free_ctxtdata(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd) /* * Release our hold on the shared asic data. If we are the last one, - * free the structure. Must be holding hfi1_devs_lock. + * return the structure to be finalized outside the lock. Must be + * holding hfi1_devs_lock. */ -static void release_asic_data(struct hfi1_devdata *dd) +static struct hfi1_asic_data *release_asic_data(struct hfi1_devdata *dd) { + struct hfi1_asic_data *ad; int other; if (!dd->asic_data) - return; + return NULL; dd->asic_data->dds[dd->hfi1_id] = NULL; other = dd->hfi1_id ? 0 : 1; - if (!dd->asic_data->dds[other]) { - /* we are the last holder, free it */ - kfree(dd->asic_data); - } + ad = dd->asic_data; dd->asic_data = NULL; + /* return NULL if the other dd still has a link */ + return ad->dds[other] ? NULL : ad; +} + +static void finalize_asic_data(struct hfi1_devdata *dd, + struct hfi1_asic_data *ad) +{ + clean_up_i2c(dd, ad); + kfree(ad); } static void __hfi1_free_devdata(struct kobject *kobj) { struct hfi1_devdata *dd = container_of(kobj, struct hfi1_devdata, kobj); + struct hfi1_asic_data *ad; unsigned long flags; spin_lock_irqsave(&hfi1_devs_lock, flags); idr_remove(&hfi1_unit_table, dd->unit); list_del(&dd->list); - release_asic_data(dd); + ad = release_asic_data(dd); spin_unlock_irqrestore(&hfi1_devs_lock, flags); + if (ad) + finalize_asic_data(dd, ad); free_platform_config(dd); rcu_barrier(); /* wait for rcu callbacks to complete */ free_percpu(dd->int_counter); free_percpu(dd->rcv_limit); - hfi1_dev_affinity_free(dd); free_percpu(dd->send_schedule); rvt_dealloc_device(&dd->verbs_dev.rdi); } @@ -1162,7 +1174,7 @@ static int init_one(struct pci_dev *, const struct pci_device_id *); #define DRIVER_LOAD_MSG "Intel " DRIVER_NAME " loaded: " #define PFX DRIVER_NAME ": " -static const struct pci_device_id hfi1_pci_tbl[] = { +const struct pci_device_id hfi1_pci_tbl[] = { { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL0) }, { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL1) }, { 0, } @@ -1198,6 +1210,10 @@ static int __init hfi1_mod_init(void) if (ret) goto bail; + ret = node_affinity_init(); + if (ret) + goto bail; + /* validate max MTU before any devices start */ if (!valid_opa_max_mtu(hfi1_max_mtu)) { pr_err("Invalid max_mtu 0x%x, using 0x%x instead\n", @@ -1278,6 +1294,7 @@ module_init(hfi1_mod_init); static void __exit hfi1_mod_cleanup(void) { pci_unregister_driver(&hfi1_pci_driver); + node_affinity_destroy(); hfi1_wss_exit(); hfi1_dbg_exit(); hfi1_cpulist_count = 0; @@ -1311,7 +1328,7 @@ static void cleanup_device_data(struct hfi1_devdata *dd) hrtimer_cancel(&ppd->cca_timer[i].hrtimer); spin_lock(&ppd->cc_state_lock); - cc_state = get_cc_state(ppd); + cc_state = get_cc_state_protected(ppd); RCU_INIT_POINTER(ppd->cc_state, NULL); spin_unlock(&ppd->cc_state_lock); @@ -1760,8 +1777,8 @@ int hfi1_setup_eagerbufs(struct hfi1_ctxtdata *rcd) hfi1_cdbg(PROC, "ctxt%u: Alloced %u rcv tid entries @ %uKB, total %zuKB\n", - rcd->ctxt, rcd->egrbufs.alloced, rcd->egrbufs.rcvtid_size, - rcd->egrbufs.size); + rcd->ctxt, rcd->egrbufs.alloced, + rcd->egrbufs.rcvtid_size / 1024, rcd->egrbufs.size / 1024); /* * Set the contexts rcv array head update threshold to the closest diff --git a/drivers/infiniband/hw/hfi1/mad.c b/drivers/infiniband/hw/hfi1/mad.c index fca07a1d6c28..1263abe01999 100644 --- a/drivers/infiniband/hw/hfi1/mad.c +++ b/drivers/infiniband/hw/hfi1/mad.c @@ -588,7 +588,6 @@ static int __subn_get_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data, pi->port_phys_conf = (ppd->port_type & 0xf); -#if PI_LED_ENABLE_SUP pi->port_states.ledenable_offlinereason = ppd->neighbor_normal << 4; pi->port_states.ledenable_offlinereason |= ppd->is_sm_config_started << 5; @@ -602,11 +601,6 @@ static int __subn_get_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data, pi->port_states.ledenable_offlinereason |= is_beaconing_active << 6; pi->port_states.ledenable_offlinereason |= ppd->offline_disabled_reason; -#else - pi->port_states.offline_reason = ppd->neighbor_normal << 4; - pi->port_states.offline_reason |= ppd->is_sm_config_started << 5; - pi->port_states.offline_reason |= ppd->offline_disabled_reason; -#endif /* PI_LED_ENABLE_SUP */ pi->port_states.portphysstate_portstate = (hfi1_ibphys_portstate(ppd) << 4) | state; @@ -1752,17 +1746,11 @@ static int __subn_get_opa_psi(struct opa_smp *smp, u32 am, u8 *data, if (start_of_sm_config && (lstate == IB_PORT_INIT)) ppd->is_sm_config_started = 1; -#if PI_LED_ENABLE_SUP psi->port_states.ledenable_offlinereason = ppd->neighbor_normal << 4; psi->port_states.ledenable_offlinereason |= ppd->is_sm_config_started << 5; psi->port_states.ledenable_offlinereason |= ppd->offline_disabled_reason; -#else - psi->port_states.offline_reason = ppd->neighbor_normal << 4; - psi->port_states.offline_reason |= ppd->is_sm_config_started << 5; - psi->port_states.offline_reason |= ppd->offline_disabled_reason; -#endif /* PI_LED_ENABLE_SUP */ psi->port_states.portphysstate_portstate = (hfi1_ibphys_portstate(ppd) << 4) | (lstate & 0xf); @@ -2430,14 +2418,9 @@ static int pma_get_opa_portstatus(struct opa_pma_mad *pmp, rsp->port_rcv_remote_physical_errors = cpu_to_be64(read_dev_cntr(dd, C_DC_RMT_PHY_ERR, CNTR_INVALID_VL)); - tmp = read_dev_cntr(dd, C_DC_RX_REPLAY, CNTR_INVALID_VL); - tmp2 = tmp + read_dev_cntr(dd, C_DC_TX_REPLAY, CNTR_INVALID_VL); - if (tmp2 < tmp) { - /* overflow/wrapped */ - rsp->local_link_integrity_errors = cpu_to_be64(~0); - } else { - rsp->local_link_integrity_errors = cpu_to_be64(tmp2); - } + rsp->local_link_integrity_errors = + cpu_to_be64(read_dev_cntr(dd, C_DC_RX_REPLAY, + CNTR_INVALID_VL)); tmp = read_dev_cntr(dd, C_DC_SEQ_CRC_CNT, CNTR_INVALID_VL); tmp2 = tmp + read_dev_cntr(dd, C_DC_REINIT_FROM_PEER_CNT, CNTR_INVALID_VL); @@ -2499,6 +2482,9 @@ static int pma_get_opa_portstatus(struct opa_pma_mad *pmp, cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_BCN_VL, idx_from_vl(vl))); + rsp->vls[vfi].port_vl_xmit_discards = + cpu_to_be64(read_port_cntr(ppd, C_SW_XMIT_DSCD_VL, + idx_from_vl(vl))); vlinfo++; vfi++; } @@ -2529,9 +2515,8 @@ static u64 get_error_counter_summary(struct ib_device *ibdev, u8 port, error_counter_summary += read_dev_cntr(dd, C_DC_RMT_PHY_ERR, CNTR_INVALID_VL); /* local link integrity must be right-shifted by the lli resolution */ - tmp = read_dev_cntr(dd, C_DC_RX_REPLAY, CNTR_INVALID_VL); - tmp += read_dev_cntr(dd, C_DC_TX_REPLAY, CNTR_INVALID_VL); - error_counter_summary += (tmp >> res_lli); + error_counter_summary += (read_dev_cntr(dd, C_DC_RX_REPLAY, + CNTR_INVALID_VL) >> res_lli); /* link error recovery must b right-shifted by the ler resolution */ tmp = read_dev_cntr(dd, C_DC_SEQ_CRC_CNT, CNTR_INVALID_VL); tmp += read_dev_cntr(dd, C_DC_REINIT_FROM_PEER_CNT, CNTR_INVALID_VL); @@ -2800,14 +2785,9 @@ static void pma_get_opa_port_ectrs(struct ib_device *ibdev, rsp->port_rcv_constraint_errors = cpu_to_be64(read_port_cntr(ppd, C_SW_RCV_CSTR_ERR, CNTR_INVALID_VL)); - tmp = read_dev_cntr(dd, C_DC_RX_REPLAY, CNTR_INVALID_VL); - tmp2 = tmp + read_dev_cntr(dd, C_DC_TX_REPLAY, CNTR_INVALID_VL); - if (tmp2 < tmp) { - /* overflow/wrapped */ - rsp->local_link_integrity_errors = cpu_to_be64(~0); - } else { - rsp->local_link_integrity_errors = cpu_to_be64(tmp2); - } + rsp->local_link_integrity_errors = + cpu_to_be64(read_dev_cntr(dd, C_DC_RX_REPLAY, + CNTR_INVALID_VL)); rsp->excessive_buffer_overruns = cpu_to_be64(read_dev_cntr(dd, C_RCV_OVF, CNTR_INVALID_VL)); } @@ -2883,14 +2863,17 @@ static int pma_get_opa_porterrors(struct opa_pma_mad *pmp, tmp = read_dev_cntr(dd, C_DC_UNC_ERR, CNTR_INVALID_VL); rsp->uncorrectable_errors = tmp < 0x100 ? (tmp & 0xff) : 0xff; - + rsp->port_rcv_errors = + cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_ERR, CNTR_INVALID_VL)); vlinfo = &rsp->vls[0]; vfi = 0; vl_select_mask = be32_to_cpu(req->vl_select_mask); for_each_set_bit(vl, (unsigned long *)&(vl_select_mask), 8 * sizeof(req->vl_select_mask)) { memset(vlinfo, 0, sizeof(*vlinfo)); - /* vlinfo->vls[vfi].port_vl_xmit_discards ??? */ + rsp->vls[vfi].port_vl_xmit_discards = + cpu_to_be64(read_port_cntr(ppd, C_SW_XMIT_DSCD_VL, + idx_from_vl(vl))); vlinfo += 1; vfi++; } @@ -3162,10 +3145,8 @@ static int pma_set_opa_portstatus(struct opa_pma_mad *pmp, if (counter_select & CS_PORT_RCV_REMOTE_PHYSICAL_ERRORS) write_dev_cntr(dd, C_DC_RMT_PHY_ERR, CNTR_INVALID_VL, 0); - if (counter_select & CS_LOCAL_LINK_INTEGRITY_ERRORS) { - write_dev_cntr(dd, C_DC_TX_REPLAY, CNTR_INVALID_VL, 0); + if (counter_select & CS_LOCAL_LINK_INTEGRITY_ERRORS) write_dev_cntr(dd, C_DC_RX_REPLAY, CNTR_INVALID_VL, 0); - } if (counter_select & CS_LINK_ERROR_RECOVERY) { write_dev_cntr(dd, C_DC_SEQ_CRC_CNT, CNTR_INVALID_VL, 0); @@ -3223,7 +3204,9 @@ static int pma_set_opa_portstatus(struct opa_pma_mad *pmp, /* if (counter_select & CS_PORT_MARK_FECN) * write_csr(dd, DCC_PRF_PORT_VL_MARK_FECN_CNT + offset, 0); */ - /* port_vl_xmit_discards ??? */ + if (counter_select & C_SW_XMIT_DSCD_VL) + write_port_cntr(ppd, C_SW_XMIT_DSCD_VL, + idx_from_vl(vl), 0); } if (resp_len) @@ -3392,7 +3375,7 @@ static void apply_cc_state(struct hfi1_pportdata *ppd) */ spin_lock(&ppd->cc_state_lock); - old_cc_state = get_cc_state(ppd); + old_cc_state = get_cc_state_protected(ppd); if (!old_cc_state) { /* never active, or shutting down */ spin_unlock(&ppd->cc_state_lock); @@ -3960,7 +3943,6 @@ void clear_linkup_counters(struct hfi1_devdata *dd) write_dev_cntr(dd, C_DC_SEQ_CRC_CNT, CNTR_INVALID_VL, 0); write_dev_cntr(dd, C_DC_REINIT_FROM_PEER_CNT, CNTR_INVALID_VL, 0); /* LocalLinkIntegrityErrors */ - write_dev_cntr(dd, C_DC_TX_REPLAY, CNTR_INVALID_VL, 0); write_dev_cntr(dd, C_DC_RX_REPLAY, CNTR_INVALID_VL, 0); /* ExcessiveBufferOverruns */ write_dev_cntr(dd, C_RCV_OVF, CNTR_INVALID_VL, 0); diff --git a/drivers/infiniband/hw/hfi1/mad.h b/drivers/infiniband/hw/hfi1/mad.h index 8b734aaae88a..5aa3fd1be653 100644 --- a/drivers/infiniband/hw/hfi1/mad.h +++ b/drivers/infiniband/hw/hfi1/mad.h @@ -48,15 +48,8 @@ #define _HFI1_MAD_H #include <rdma/ib_pma.h> -#define USE_PI_LED_ENABLE 1 /* - * use led enabled bit in struct - * opa_port_states, if available - */ #include <rdma/opa_smi.h> #include <rdma/opa_port_info.h> -#ifndef PI_LED_ENABLE_SUP -#define PI_LED_ENABLE_SUP 0 -#endif #include "opa_compat.h" /* diff --git a/drivers/infiniband/hw/hfi1/mmu_rb.c b/drivers/infiniband/hw/hfi1/mmu_rb.c index b7a80aa1ae30..7ad30898fc19 100644 --- a/drivers/infiniband/hw/hfi1/mmu_rb.c +++ b/drivers/infiniband/hw/hfi1/mmu_rb.c @@ -53,19 +53,20 @@ #include "trace.h" struct mmu_rb_handler { - struct list_head list; struct mmu_notifier mn; - struct rb_root *root; + struct rb_root root; + void *ops_arg; spinlock_t lock; /* protect the RB tree */ struct mmu_rb_ops *ops; + struct mm_struct *mm; + struct list_head lru_list; + struct work_struct del_work; + struct list_head del_list; + struct workqueue_struct *wq; }; -static LIST_HEAD(mmu_rb_handlers); -static DEFINE_SPINLOCK(mmu_rb_lock); /* protect mmu_rb_handlers list */ - static unsigned long mmu_node_start(struct mmu_rb_node *); static unsigned long mmu_node_last(struct mmu_rb_node *); -static struct mmu_rb_handler *find_mmu_handler(struct rb_root *); static inline void mmu_notifier_page(struct mmu_notifier *, struct mm_struct *, unsigned long); static inline void mmu_notifier_range_start(struct mmu_notifier *, @@ -76,6 +77,9 @@ static void mmu_notifier_mem_invalidate(struct mmu_notifier *, unsigned long, unsigned long); static struct mmu_rb_node *__mmu_rb_search(struct mmu_rb_handler *, unsigned long, unsigned long); +static void do_remove(struct mmu_rb_handler *handler, + struct list_head *del_list); +static void handle_remove(struct work_struct *work); static struct mmu_notifier_ops mn_opts = { .invalidate_page = mmu_notifier_page, @@ -95,73 +99,79 @@ static unsigned long mmu_node_last(struct mmu_rb_node *node) return PAGE_ALIGN(node->addr + node->len) - 1; } -int hfi1_mmu_rb_register(struct rb_root *root, struct mmu_rb_ops *ops) +int hfi1_mmu_rb_register(void *ops_arg, struct mm_struct *mm, + struct mmu_rb_ops *ops, + struct workqueue_struct *wq, + struct mmu_rb_handler **handler) { struct mmu_rb_handler *handlr; - - if (!ops->invalidate) - return -EINVAL; + int ret; handlr = kmalloc(sizeof(*handlr), GFP_KERNEL); if (!handlr) return -ENOMEM; - handlr->root = root; + handlr->root = RB_ROOT; handlr->ops = ops; + handlr->ops_arg = ops_arg; INIT_HLIST_NODE(&handlr->mn.hlist); spin_lock_init(&handlr->lock); handlr->mn.ops = &mn_opts; - spin_lock(&mmu_rb_lock); - list_add_tail_rcu(&handlr->list, &mmu_rb_handlers); - spin_unlock(&mmu_rb_lock); + handlr->mm = mm; + INIT_WORK(&handlr->del_work, handle_remove); + INIT_LIST_HEAD(&handlr->del_list); + INIT_LIST_HEAD(&handlr->lru_list); + handlr->wq = wq; + + ret = mmu_notifier_register(&handlr->mn, handlr->mm); + if (ret) { + kfree(handlr); + return ret; + } - return mmu_notifier_register(&handlr->mn, current->mm); + *handler = handlr; + return 0; } -void hfi1_mmu_rb_unregister(struct rb_root *root) +void hfi1_mmu_rb_unregister(struct mmu_rb_handler *handler) { - struct mmu_rb_handler *handler = find_mmu_handler(root); + struct mmu_rb_node *rbnode; + struct rb_node *node; unsigned long flags; - - if (!handler) - return; + struct list_head del_list; /* Unregister first so we don't get any more notifications. */ - if (current->mm) - mmu_notifier_unregister(&handler->mn, current->mm); + mmu_notifier_unregister(&handler->mn, handler->mm); - spin_lock(&mmu_rb_lock); - list_del_rcu(&handler->list); - spin_unlock(&mmu_rb_lock); - synchronize_rcu(); + /* + * Make sure the wq delete handler is finished running. It will not + * be triggered once the mmu notifiers are unregistered above. + */ + flush_work(&handler->del_work); + + INIT_LIST_HEAD(&del_list); spin_lock_irqsave(&handler->lock, flags); - if (!RB_EMPTY_ROOT(root)) { - struct rb_node *node; - struct mmu_rb_node *rbnode; - - while ((node = rb_first(root))) { - rbnode = rb_entry(node, struct mmu_rb_node, node); - rb_erase(node, root); - if (handler->ops->remove) - handler->ops->remove(root, rbnode, NULL); - } + while ((node = rb_first(&handler->root))) { + rbnode = rb_entry(node, struct mmu_rb_node, node); + rb_erase(node, &handler->root); + /* move from LRU list to delete list */ + list_move(&rbnode->list, &del_list); } spin_unlock_irqrestore(&handler->lock, flags); + do_remove(handler, &del_list); + kfree(handler); } -int hfi1_mmu_rb_insert(struct rb_root *root, struct mmu_rb_node *mnode) +int hfi1_mmu_rb_insert(struct mmu_rb_handler *handler, + struct mmu_rb_node *mnode) { - struct mmu_rb_handler *handler = find_mmu_handler(root); struct mmu_rb_node *node; unsigned long flags; int ret = 0; - if (!handler) - return -EINVAL; - spin_lock_irqsave(&handler->lock, flags); hfi1_cdbg(MMU, "Inserting node addr 0x%llx, len %u", mnode->addr, mnode->len); @@ -170,12 +180,13 @@ int hfi1_mmu_rb_insert(struct rb_root *root, struct mmu_rb_node *mnode) ret = -EINVAL; goto unlock; } - __mmu_int_rb_insert(mnode, root); + __mmu_int_rb_insert(mnode, &handler->root); + list_add(&mnode->list, &handler->lru_list); - if (handler->ops->insert) { - ret = handler->ops->insert(root, mnode); - if (ret) - __mmu_int_rb_remove(mnode, root); + ret = handler->ops->insert(handler->ops_arg, mnode); + if (ret) { + __mmu_int_rb_remove(mnode, &handler->root); + list_del(&mnode->list); /* remove from LRU list */ } unlock: spin_unlock_irqrestore(&handler->lock, flags); @@ -191,10 +202,10 @@ static struct mmu_rb_node *__mmu_rb_search(struct mmu_rb_handler *handler, hfi1_cdbg(MMU, "Searching for addr 0x%llx, len %u", addr, len); if (!handler->ops->filter) { - node = __mmu_int_rb_iter_first(handler->root, addr, + node = __mmu_int_rb_iter_first(&handler->root, addr, (addr + len) - 1); } else { - for (node = __mmu_int_rb_iter_first(handler->root, addr, + for (node = __mmu_int_rb_iter_first(&handler->root, addr, (addr + len) - 1); node; node = __mmu_int_rb_iter_next(node, addr, @@ -206,82 +217,72 @@ static struct mmu_rb_node *__mmu_rb_search(struct mmu_rb_handler *handler, return node; } -/* Caller must *not* hold handler lock. */ -static void __mmu_rb_remove(struct mmu_rb_handler *handler, - struct mmu_rb_node *node, struct mm_struct *mm) -{ - unsigned long flags; - - /* Validity of handler and node pointers has been checked by caller. */ - hfi1_cdbg(MMU, "Removing node addr 0x%llx, len %u", node->addr, - node->len); - spin_lock_irqsave(&handler->lock, flags); - __mmu_int_rb_remove(node, handler->root); - spin_unlock_irqrestore(&handler->lock, flags); - - if (handler->ops->remove) - handler->ops->remove(handler->root, node, mm); -} - -struct mmu_rb_node *hfi1_mmu_rb_search(struct rb_root *root, unsigned long addr, - unsigned long len) +struct mmu_rb_node *hfi1_mmu_rb_extract(struct mmu_rb_handler *handler, + unsigned long addr, unsigned long len) { - struct mmu_rb_handler *handler = find_mmu_handler(root); struct mmu_rb_node *node; unsigned long flags; - if (!handler) - return ERR_PTR(-EINVAL); - spin_lock_irqsave(&handler->lock, flags); node = __mmu_rb_search(handler, addr, len); + if (node) { + __mmu_int_rb_remove(node, &handler->root); + list_del(&node->list); /* remove from LRU list */ + } spin_unlock_irqrestore(&handler->lock, flags); return node; } -struct mmu_rb_node *hfi1_mmu_rb_extract(struct rb_root *root, - unsigned long addr, unsigned long len) +void hfi1_mmu_rb_evict(struct mmu_rb_handler *handler, void *evict_arg) { - struct mmu_rb_handler *handler = find_mmu_handler(root); - struct mmu_rb_node *node; + struct mmu_rb_node *rbnode, *ptr; + struct list_head del_list; unsigned long flags; + bool stop = false; - if (!handler) - return ERR_PTR(-EINVAL); + INIT_LIST_HEAD(&del_list); spin_lock_irqsave(&handler->lock, flags); - node = __mmu_rb_search(handler, addr, len); - if (node) - __mmu_int_rb_remove(node, handler->root); + list_for_each_entry_safe_reverse(rbnode, ptr, &handler->lru_list, + list) { + if (handler->ops->evict(handler->ops_arg, rbnode, evict_arg, + &stop)) { + __mmu_int_rb_remove(rbnode, &handler->root); + /* move from LRU list to delete list */ + list_move(&rbnode->list, &del_list); + } + if (stop) + break; + } spin_unlock_irqrestore(&handler->lock, flags); - return node; + while (!list_empty(&del_list)) { + rbnode = list_first_entry(&del_list, struct mmu_rb_node, list); + list_del(&rbnode->list); + handler->ops->remove(handler->ops_arg, rbnode); + } } -void hfi1_mmu_rb_remove(struct rb_root *root, struct mmu_rb_node *node) +/* + * It is up to the caller to ensure that this function does not race with the + * mmu invalidate notifier which may be calling the users remove callback on + * 'node'. + */ +void hfi1_mmu_rb_remove(struct mmu_rb_handler *handler, + struct mmu_rb_node *node) { - struct mmu_rb_handler *handler = find_mmu_handler(root); - - if (!handler || !node) - return; - - __mmu_rb_remove(handler, node, NULL); -} + unsigned long flags; -static struct mmu_rb_handler *find_mmu_handler(struct rb_root *root) -{ - struct mmu_rb_handler *handler; + /* Validity of handler and node pointers has been checked by caller. */ + hfi1_cdbg(MMU, "Removing node addr 0x%llx, len %u", node->addr, + node->len); + spin_lock_irqsave(&handler->lock, flags); + __mmu_int_rb_remove(node, &handler->root); + list_del(&node->list); /* remove from LRU list */ + spin_unlock_irqrestore(&handler->lock, flags); - rcu_read_lock(); - list_for_each_entry_rcu(handler, &mmu_rb_handlers, list) { - if (handler->root == root) - goto unlock; - } - handler = NULL; -unlock: - rcu_read_unlock(); - return handler; + handler->ops->remove(handler->ops_arg, node); } static inline void mmu_notifier_page(struct mmu_notifier *mn, @@ -304,9 +305,10 @@ static void mmu_notifier_mem_invalidate(struct mmu_notifier *mn, { struct mmu_rb_handler *handler = container_of(mn, struct mmu_rb_handler, mn); - struct rb_root *root = handler->root; + struct rb_root *root = &handler->root; struct mmu_rb_node *node, *ptr = NULL; unsigned long flags; + bool added = false; spin_lock_irqsave(&handler->lock, flags); for (node = __mmu_int_rb_iter_first(root, start, end - 1); @@ -315,11 +317,53 @@ static void mmu_notifier_mem_invalidate(struct mmu_notifier *mn, ptr = __mmu_int_rb_iter_next(node, start, end - 1); hfi1_cdbg(MMU, "Invalidating node addr 0x%llx, len %u", node->addr, node->len); - if (handler->ops->invalidate(root, node)) { + if (handler->ops->invalidate(handler->ops_arg, node)) { __mmu_int_rb_remove(node, root); - if (handler->ops->remove) - handler->ops->remove(root, node, mm); + /* move from LRU list to delete list */ + list_move(&node->list, &handler->del_list); + added = true; } } spin_unlock_irqrestore(&handler->lock, flags); + + if (added) + queue_work(handler->wq, &handler->del_work); +} + +/* + * Call the remove function for the given handler and the list. This + * is expected to be called with a delete list extracted from handler. + * The caller should not be holding the handler lock. + */ +static void do_remove(struct mmu_rb_handler *handler, + struct list_head *del_list) +{ + struct mmu_rb_node *node; + + while (!list_empty(del_list)) { + node = list_first_entry(del_list, struct mmu_rb_node, list); + list_del(&node->list); + handler->ops->remove(handler->ops_arg, node); + } +} + +/* + * Work queue function to remove all nodes that have been queued up to + * be removed. The key feature is that mm->mmap_sem is not being held + * and the remove callback can sleep while taking it, if needed. + */ +static void handle_remove(struct work_struct *work) +{ + struct mmu_rb_handler *handler = container_of(work, + struct mmu_rb_handler, + del_work); + struct list_head del_list; + unsigned long flags; + + /* remove anything that is queued to get removed */ + spin_lock_irqsave(&handler->lock, flags); + list_replace_init(&handler->del_list, &del_list); + spin_unlock_irqrestore(&handler->lock, flags); + + do_remove(handler, &del_list); } diff --git a/drivers/infiniband/hw/hfi1/mmu_rb.h b/drivers/infiniband/hw/hfi1/mmu_rb.h index 7a57b9c49d27..754f6ebf13fb 100644 --- a/drivers/infiniband/hw/hfi1/mmu_rb.h +++ b/drivers/infiniband/hw/hfi1/mmu_rb.h @@ -54,23 +54,34 @@ struct mmu_rb_node { unsigned long len; unsigned long __last; struct rb_node node; + struct list_head list; }; +/* + * NOTE: filter, insert, invalidate, and evict must not sleep. Only remove is + * allowed to sleep. + */ struct mmu_rb_ops { - bool (*filter)(struct mmu_rb_node *, unsigned long, unsigned long); - int (*insert)(struct rb_root *, struct mmu_rb_node *); - void (*remove)(struct rb_root *, struct mmu_rb_node *, - struct mm_struct *); - int (*invalidate)(struct rb_root *, struct mmu_rb_node *); + bool (*filter)(struct mmu_rb_node *node, unsigned long addr, + unsigned long len); + int (*insert)(void *ops_arg, struct mmu_rb_node *mnode); + void (*remove)(void *ops_arg, struct mmu_rb_node *mnode); + int (*invalidate)(void *ops_arg, struct mmu_rb_node *node); + int (*evict)(void *ops_arg, struct mmu_rb_node *mnode, + void *evict_arg, bool *stop); }; -int hfi1_mmu_rb_register(struct rb_root *root, struct mmu_rb_ops *ops); -void hfi1_mmu_rb_unregister(struct rb_root *); -int hfi1_mmu_rb_insert(struct rb_root *, struct mmu_rb_node *); -void hfi1_mmu_rb_remove(struct rb_root *, struct mmu_rb_node *); -struct mmu_rb_node *hfi1_mmu_rb_search(struct rb_root *, unsigned long, - unsigned long); -struct mmu_rb_node *hfi1_mmu_rb_extract(struct rb_root *, unsigned long, - unsigned long); +int hfi1_mmu_rb_register(void *ops_arg, struct mm_struct *mm, + struct mmu_rb_ops *ops, + struct workqueue_struct *wq, + struct mmu_rb_handler **handler); +void hfi1_mmu_rb_unregister(struct mmu_rb_handler *handler); +int hfi1_mmu_rb_insert(struct mmu_rb_handler *handler, + struct mmu_rb_node *mnode); +void hfi1_mmu_rb_evict(struct mmu_rb_handler *handler, void *evict_arg); +void hfi1_mmu_rb_remove(struct mmu_rb_handler *handler, + struct mmu_rb_node *mnode); +struct mmu_rb_node *hfi1_mmu_rb_extract(struct mmu_rb_handler *handler, + unsigned long addr, unsigned long len); #endif /* _HFI1_MMU_RB_H */ diff --git a/drivers/infiniband/hw/hfi1/pcie.c b/drivers/infiniband/hw/hfi1/pcie.c index 0bac21e6a658..89c68da1c273 100644 --- a/drivers/infiniband/hw/hfi1/pcie.c +++ b/drivers/infiniband/hw/hfi1/pcie.c @@ -679,6 +679,10 @@ static uint pcie_pset = UNSET_PSET; module_param(pcie_pset, uint, S_IRUGO); MODULE_PARM_DESC(pcie_pset, "PCIe Eq Pset value to use, range is 0-10"); +static uint pcie_ctle = 1; /* discrete on, integrated off */ +module_param(pcie_ctle, uint, S_IRUGO); +MODULE_PARM_DESC(pcie_ctle, "PCIe static CTLE mode, bit 0 - discrete on/off, bit 1 - integrated on/off"); + /* equalization columns */ #define PREC 0 #define ATTN 1 @@ -716,6 +720,36 @@ static const u8 integrated_preliminary_eq[11][3] = { { 0x00, 0x1e, 0x0a }, /* p10 */ }; +static const u8 discrete_ctle_tunings[11][4] = { + /* DC LF HF BW */ + { 0x48, 0x0b, 0x04, 0x04 }, /* p0 */ + { 0x60, 0x05, 0x0f, 0x0a }, /* p1 */ + { 0x50, 0x09, 0x06, 0x06 }, /* p2 */ + { 0x68, 0x05, 0x0f, 0x0a }, /* p3 */ + { 0x80, 0x05, 0x0f, 0x0a }, /* p4 */ + { 0x70, 0x05, 0x0f, 0x0a }, /* p5 */ + { 0x68, 0x05, 0x0f, 0x0a }, /* p6 */ + { 0x38, 0x0f, 0x00, 0x00 }, /* p7 */ + { 0x48, 0x09, 0x06, 0x06 }, /* p8 */ + { 0x60, 0x05, 0x0f, 0x0a }, /* p9 */ + { 0x38, 0x0f, 0x00, 0x00 }, /* p10 */ +}; + +static const u8 integrated_ctle_tunings[11][4] = { + /* DC LF HF BW */ + { 0x38, 0x0f, 0x00, 0x00 }, /* p0 */ + { 0x38, 0x0f, 0x00, 0x00 }, /* p1 */ + { 0x38, 0x0f, 0x00, 0x00 }, /* p2 */ + { 0x38, 0x0f, 0x00, 0x00 }, /* p3 */ + { 0x58, 0x0a, 0x05, 0x05 }, /* p4 */ + { 0x48, 0x0a, 0x05, 0x05 }, /* p5 */ + { 0x40, 0x0a, 0x05, 0x05 }, /* p6 */ + { 0x38, 0x0f, 0x00, 0x00 }, /* p7 */ + { 0x38, 0x0f, 0x00, 0x00 }, /* p8 */ + { 0x38, 0x09, 0x06, 0x06 }, /* p9 */ + { 0x38, 0x0e, 0x01, 0x01 }, /* p10 */ +}; + /* helper to format the value to write to hardware */ #define eq_value(pre, curr, post) \ ((((u32)(pre)) << \ @@ -951,11 +985,14 @@ int do_pcie_gen3_transition(struct hfi1_devdata *dd) u32 status, err; int ret; int do_retry, retry_count = 0; + int intnum = 0; uint default_pset; u16 target_vector, target_speed; u16 lnkctl2, vendor; u8 div; const u8 (*eq)[3]; + const u8 (*ctle_tunings)[4]; + uint static_ctle_mode; int return_error = 0; /* PCIe Gen3 is for the ASIC only */ @@ -1089,6 +1126,9 @@ retry: div = 3; eq = discrete_preliminary_eq; default_pset = DEFAULT_DISCRETE_PSET; + ctle_tunings = discrete_ctle_tunings; + /* bit 0 - discrete on/off */ + static_ctle_mode = pcie_ctle & 0x1; } else { /* 400mV, FS=29, LF = 9 */ fs = 29; @@ -1096,6 +1136,9 @@ retry: div = 1; eq = integrated_preliminary_eq; default_pset = DEFAULT_MCP_PSET; + ctle_tunings = integrated_ctle_tunings; + /* bit 1 - integrated on/off */ + static_ctle_mode = (pcie_ctle >> 1) & 0x1; } pci_write_config_dword(dd->pcidev, PCIE_CFG_REG_PL101, (fs << @@ -1135,16 +1178,33 @@ retry: * step 5c: Program gasket interrupts */ /* set the Rx Bit Rate to REFCLK ratio */ - write_gasket_interrupt(dd, 0, 0x0006, 0x0050); + write_gasket_interrupt(dd, intnum++, 0x0006, 0x0050); /* disable pCal for PCIe Gen3 RX equalization */ - write_gasket_interrupt(dd, 1, 0x0026, 0x5b01); + /* select adaptive or static CTLE */ + write_gasket_interrupt(dd, intnum++, 0x0026, + 0x5b01 | (static_ctle_mode << 3)); /* * Enable iCal for PCIe Gen3 RX equalization, and set which * evaluation of RX_EQ_EVAL will launch the iCal procedure. */ - write_gasket_interrupt(dd, 2, 0x0026, 0x5202); + write_gasket_interrupt(dd, intnum++, 0x0026, 0x5202); + + if (static_ctle_mode) { + /* apply static CTLE tunings */ + u8 pcie_dc, pcie_lf, pcie_hf, pcie_bw; + + pcie_dc = ctle_tunings[pcie_pset][0]; + pcie_lf = ctle_tunings[pcie_pset][1]; + pcie_hf = ctle_tunings[pcie_pset][2]; + pcie_bw = ctle_tunings[pcie_pset][3]; + write_gasket_interrupt(dd, intnum++, 0x0026, 0x0200 | pcie_dc); + write_gasket_interrupt(dd, intnum++, 0x0026, 0x0100 | pcie_lf); + write_gasket_interrupt(dd, intnum++, 0x0026, 0x0000 | pcie_hf); + write_gasket_interrupt(dd, intnum++, 0x0026, 0x5500 | pcie_bw); + } + /* terminate list */ - write_gasket_interrupt(dd, 3, 0x0000, 0x0000); + write_gasket_interrupt(dd, intnum++, 0x0000, 0x0000); /* * step 5d: program XMT margin diff --git a/drivers/infiniband/hw/hfi1/pio.c b/drivers/infiniband/hw/hfi1/pio.c index d4022450b73f..ac1bf4a73571 100644 --- a/drivers/infiniband/hw/hfi1/pio.c +++ b/drivers/infiniband/hw/hfi1/pio.c @@ -1952,13 +1952,17 @@ int init_pervl_scs(struct hfi1_devdata *dd) dd->vld[15].sc = sc_alloc(dd, SC_VL15, dd->rcd[0]->rcvhdrqentsize, dd->node); if (!dd->vld[15].sc) - goto nomem; + return -ENOMEM; + hfi1_init_ctxt(dd->vld[15].sc); dd->vld[15].mtu = enum_to_mtu(OPA_MTU_2048); - dd->kernel_send_context = kmalloc_node(dd->num_send_contexts * + dd->kernel_send_context = kzalloc_node(dd->num_send_contexts * sizeof(struct send_context *), GFP_KERNEL, dd->node); + if (!dd->kernel_send_context) + goto freesc15; + dd->kernel_send_context[0] = dd->vld[15].sc; for (i = 0; i < num_vls; i++) { @@ -2010,12 +2014,21 @@ int init_pervl_scs(struct hfi1_devdata *dd) if (pio_map_init(dd, ppd->port - 1, num_vls, NULL)) goto nomem; return 0; + nomem: - sc_free(dd->vld[15].sc); - for (i = 0; i < num_vls; i++) + for (i = 0; i < num_vls; i++) { sc_free(dd->vld[i].sc); + dd->vld[i].sc = NULL; + } + for (i = num_vls; i < INIT_SC_PER_VL * num_vls; i++) sc_free(dd->kernel_send_context[i + 1]); + + kfree(dd->kernel_send_context); + dd->kernel_send_context = NULL; + +freesc15: + sc_free(dd->vld[15].sc); return -ENOMEM; } diff --git a/drivers/infiniband/hw/hfi1/platform.c b/drivers/infiniband/hw/hfi1/platform.c index 03df9322f862..965c8aef0c60 100644 --- a/drivers/infiniband/hw/hfi1/platform.c +++ b/drivers/infiniband/hw/hfi1/platform.c @@ -537,20 +537,6 @@ static void apply_tunings( u8 precur = 0, attn = 0, postcur = 0, external_device_config = 0; u8 *cache = ppd->qsfp_info.cache; - /* Enable external device config if channel is limiting active */ - read_8051_config(ppd->dd, LINK_OPTIMIZATION_SETTINGS, - GENERAL_CONFIG, &config_data); - config_data &= ~(0xff << ENABLE_EXT_DEV_CONFIG_SHIFT); - config_data |= ((u32)limiting_active << ENABLE_EXT_DEV_CONFIG_SHIFT); - ret = load_8051_config(ppd->dd, LINK_OPTIMIZATION_SETTINGS, - GENERAL_CONFIG, config_data); - if (ret != HCMD_SUCCESS) - dd_dev_err( - ppd->dd, - "%s: Failed to set enable external device config\n", - __func__); - - config_data = 0; /* re-init */ /* Pass tuning method to 8051 */ read_8051_config(ppd->dd, LINK_TUNING_PARAMETERS, GENERAL_CONFIG, &config_data); @@ -638,9 +624,13 @@ static int tune_active_qsfp(struct hfi1_pportdata *ppd, u32 *ptr_tx_preset, if (ret) return ret; + /* + * We'll change the QSFP memory contents from here on out, thus we set a + * flag here to remind ourselves to reset the QSFP module. This prevents + * reuse of stale settings established in our previous pass through. + */ if (ppd->qsfp_info.reset_needed) { reset_qsfp(ppd); - ppd->qsfp_info.reset_needed = 0; refresh_qsfp_cache(ppd, &ppd->qsfp_info); } else { ppd->qsfp_info.reset_needed = 1; diff --git a/drivers/infiniband/hw/hfi1/qp.c b/drivers/infiniband/hw/hfi1/qp.c index 1a942ffba4cb..a5aa3517e7d5 100644 --- a/drivers/infiniband/hw/hfi1/qp.c +++ b/drivers/infiniband/hw/hfi1/qp.c @@ -52,6 +52,7 @@ #include <linux/seq_file.h> #include <rdma/rdma_vt.h> #include <rdma/rdmavt_qp.h> +#include <rdma/ib_verbs.h> #include "hfi.h" #include "qp.h" @@ -115,6 +116,66 @@ static const u16 credit_table[31] = { 32768 /* 1E */ }; +const struct rvt_operation_params hfi1_post_parms[RVT_OPERATION_MAX] = { +[IB_WR_RDMA_WRITE] = { + .length = sizeof(struct ib_rdma_wr), + .qpt_support = BIT(IB_QPT_UC) | BIT(IB_QPT_RC), +}, + +[IB_WR_RDMA_READ] = { + .length = sizeof(struct ib_rdma_wr), + .qpt_support = BIT(IB_QPT_RC), + .flags = RVT_OPERATION_ATOMIC, +}, + +[IB_WR_ATOMIC_CMP_AND_SWP] = { + .length = sizeof(struct ib_atomic_wr), + .qpt_support = BIT(IB_QPT_RC), + .flags = RVT_OPERATION_ATOMIC | RVT_OPERATION_ATOMIC_SGE, +}, + +[IB_WR_ATOMIC_FETCH_AND_ADD] = { + .length = sizeof(struct ib_atomic_wr), + .qpt_support = BIT(IB_QPT_RC), + .flags = RVT_OPERATION_ATOMIC | RVT_OPERATION_ATOMIC_SGE, +}, + +[IB_WR_RDMA_WRITE_WITH_IMM] = { + .length = sizeof(struct ib_rdma_wr), + .qpt_support = BIT(IB_QPT_UC) | BIT(IB_QPT_RC), +}, + +[IB_WR_SEND] = { + .length = sizeof(struct ib_send_wr), + .qpt_support = BIT(IB_QPT_UD) | BIT(IB_QPT_SMI) | BIT(IB_QPT_GSI) | + BIT(IB_QPT_UC) | BIT(IB_QPT_RC), +}, + +[IB_WR_SEND_WITH_IMM] = { + .length = sizeof(struct ib_send_wr), + .qpt_support = BIT(IB_QPT_UD) | BIT(IB_QPT_SMI) | BIT(IB_QPT_GSI) | + BIT(IB_QPT_UC) | BIT(IB_QPT_RC), +}, + +[IB_WR_REG_MR] = { + .length = sizeof(struct ib_reg_wr), + .qpt_support = BIT(IB_QPT_UC) | BIT(IB_QPT_RC), + .flags = RVT_OPERATION_LOCAL, +}, + +[IB_WR_LOCAL_INV] = { + .length = sizeof(struct ib_send_wr), + .qpt_support = BIT(IB_QPT_UC) | BIT(IB_QPT_RC), + .flags = RVT_OPERATION_LOCAL, +}, + +[IB_WR_SEND_WITH_INV] = { + .length = sizeof(struct ib_send_wr), + .qpt_support = BIT(IB_QPT_RC), +}, + +}; + static void flush_tx_list(struct rvt_qp *qp) { struct hfi1_qp_priv *priv = qp->priv; @@ -745,8 +806,9 @@ void *qp_priv_alloc(struct rvt_dev_info *rdi, struct rvt_qp *qp, priv->owner = qp; - priv->s_hdr = kzalloc_node(sizeof(*priv->s_hdr), gfp, rdi->dparms.node); - if (!priv->s_hdr) { + priv->s_ahg = kzalloc_node(sizeof(*priv->s_ahg), gfp, + rdi->dparms.node); + if (!priv->s_ahg) { kfree(priv); return ERR_PTR(-ENOMEM); } @@ -759,7 +821,7 @@ void qp_priv_free(struct rvt_dev_info *rdi, struct rvt_qp *qp) { struct hfi1_qp_priv *priv = qp->priv; - kfree(priv->s_hdr); + kfree(priv->s_ahg); kfree(priv); } diff --git a/drivers/infiniband/hw/hfi1/qp.h b/drivers/infiniband/hw/hfi1/qp.h index e7bc8d6cf681..587d84d65bb8 100644 --- a/drivers/infiniband/hw/hfi1/qp.h +++ b/drivers/infiniband/hw/hfi1/qp.h @@ -54,6 +54,8 @@ extern unsigned int hfi1_qp_table_size; +extern const struct rvt_operation_params hfi1_post_parms[]; + /* * free_ahg - clear ahg from QP */ @@ -61,7 +63,7 @@ static inline void clear_ahg(struct rvt_qp *qp) { struct hfi1_qp_priv *priv = qp->priv; - priv->s_hdr->ahgcount = 0; + priv->s_ahg->ahgcount = 0; qp->s_flags &= ~(RVT_S_AHG_VALID | RVT_S_AHG_CLEAR); if (priv->s_sde && qp->s_ahgidx >= 0) sdma_ahg_free(priv->s_sde, qp->s_ahgidx); diff --git a/drivers/infiniband/hw/hfi1/qsfp.c b/drivers/infiniband/hw/hfi1/qsfp.c index 9fb561682c66..a207717ade2a 100644 --- a/drivers/infiniband/hw/hfi1/qsfp.c +++ b/drivers/infiniband/hw/hfi1/qsfp.c @@ -50,46 +50,285 @@ #include <linux/vmalloc.h> #include "hfi.h" -#include "twsi.h" + +/* for the given bus number, return the CSR for reading an i2c line */ +static inline u32 i2c_in_csr(u32 bus_num) +{ + return bus_num ? ASIC_QSFP2_IN : ASIC_QSFP1_IN; +} + +/* for the given bus number, return the CSR for writing an i2c line */ +static inline u32 i2c_oe_csr(u32 bus_num) +{ + return bus_num ? ASIC_QSFP2_OE : ASIC_QSFP1_OE; +} + +static void hfi1_setsda(void *data, int state) +{ + struct hfi1_i2c_bus *bus = (struct hfi1_i2c_bus *)data; + struct hfi1_devdata *dd = bus->controlling_dd; + u64 reg; + u32 target_oe; + + target_oe = i2c_oe_csr(bus->num); + reg = read_csr(dd, target_oe); + /* + * The OE bit value is inverted and connected to the pin. When + * OE is 0 the pin is left to be pulled up, when the OE is 1 + * the pin is driven low. This matches the "open drain" or "open + * collector" convention. + */ + if (state) + reg &= ~QSFP_HFI0_I2CDAT; + else + reg |= QSFP_HFI0_I2CDAT; + write_csr(dd, target_oe, reg); + /* do a read to force the write into the chip */ + (void)read_csr(dd, target_oe); +} + +static void hfi1_setscl(void *data, int state) +{ + struct hfi1_i2c_bus *bus = (struct hfi1_i2c_bus *)data; + struct hfi1_devdata *dd = bus->controlling_dd; + u64 reg; + u32 target_oe; + + target_oe = i2c_oe_csr(bus->num); + reg = read_csr(dd, target_oe); + /* + * The OE bit value is inverted and connected to the pin. When + * OE is 0 the pin is left to be pulled up, when the OE is 1 + * the pin is driven low. This matches the "open drain" or "open + * collector" convention. + */ + if (state) + reg &= ~QSFP_HFI0_I2CCLK; + else + reg |= QSFP_HFI0_I2CCLK; + write_csr(dd, target_oe, reg); + /* do a read to force the write into the chip */ + (void)read_csr(dd, target_oe); +} + +static int hfi1_getsda(void *data) +{ + struct hfi1_i2c_bus *bus = (struct hfi1_i2c_bus *)data; + u64 reg; + u32 target_in; + + hfi1_setsda(data, 1); /* clear OE so we do not pull line down */ + udelay(2); /* 1us pull up + 250ns hold */ + + target_in = i2c_in_csr(bus->num); + reg = read_csr(bus->controlling_dd, target_in); + return !!(reg & QSFP_HFI0_I2CDAT); +} + +static int hfi1_getscl(void *data) +{ + struct hfi1_i2c_bus *bus = (struct hfi1_i2c_bus *)data; + u64 reg; + u32 target_in; + + hfi1_setscl(data, 1); /* clear OE so we do not pull line down */ + udelay(2); /* 1us pull up + 250ns hold */ + + target_in = i2c_in_csr(bus->num); + reg = read_csr(bus->controlling_dd, target_in); + return !!(reg & QSFP_HFI0_I2CCLK); +} /* - * QSFP support for hfi driver, using "Two Wire Serial Interface" driver - * in twsi.c + * Allocate and initialize the given i2c bus number. + * Returns NULL on failure. */ -#define I2C_MAX_RETRY 4 +static struct hfi1_i2c_bus *init_i2c_bus(struct hfi1_devdata *dd, + struct hfi1_asic_data *ad, int num) +{ + struct hfi1_i2c_bus *bus; + int ret; + + bus = kzalloc(sizeof(*bus), GFP_KERNEL); + if (!bus) + return NULL; + + bus->controlling_dd = dd; + bus->num = num; /* our bus number */ + + bus->algo.setsda = hfi1_setsda; + bus->algo.setscl = hfi1_setscl; + bus->algo.getsda = hfi1_getsda; + bus->algo.getscl = hfi1_getscl; + bus->algo.udelay = 5; + bus->algo.timeout = usecs_to_jiffies(50); + bus->algo.data = bus; + + bus->adapter.owner = THIS_MODULE; + bus->adapter.algo_data = &bus->algo; + bus->adapter.dev.parent = &dd->pcidev->dev; + snprintf(bus->adapter.name, sizeof(bus->adapter.name), + "hfi1_i2c%d", num); + + ret = i2c_bit_add_bus(&bus->adapter); + if (ret) { + dd_dev_info(dd, "%s: unable to add i2c bus %d, err %d\n", + __func__, num, ret); + kfree(bus); + return NULL; + } + + return bus; +} /* - * Raw i2c write. No set-up or lock checking. + * Initialize i2c buses. + * Return 0 on success, -errno on error. */ -static int __i2c_write(struct hfi1_pportdata *ppd, u32 target, int i2c_addr, - int offset, void *bp, int len) +int set_up_i2c(struct hfi1_devdata *dd, struct hfi1_asic_data *ad) { - struct hfi1_devdata *dd = ppd->dd; - int ret, cnt; - u8 *buff = bp; + ad->i2c_bus0 = init_i2c_bus(dd, ad, 0); + ad->i2c_bus1 = init_i2c_bus(dd, ad, 1); + if (!ad->i2c_bus0 || !ad->i2c_bus1) + return -ENOMEM; + return 0; +}; - cnt = 0; - while (cnt < len) { - int wlen = len - cnt; +static void clean_i2c_bus(struct hfi1_i2c_bus *bus) +{ + if (bus) { + i2c_del_adapter(&bus->adapter); + kfree(bus); + } +} - ret = hfi1_twsi_blk_wr(dd, target, i2c_addr, offset, - buff + cnt, wlen); - if (ret) { - /* hfi1_twsi_blk_wr() 1 for error, else 0 */ - return -EIO; - } - offset += wlen; - cnt += wlen; +void clean_up_i2c(struct hfi1_devdata *dd, struct hfi1_asic_data *ad) +{ + clean_i2c_bus(ad->i2c_bus0); + ad->i2c_bus0 = NULL; + clean_i2c_bus(ad->i2c_bus1); + ad->i2c_bus1 = NULL; +} + +static int i2c_bus_write(struct hfi1_devdata *dd, struct hfi1_i2c_bus *i2c, + u8 slave_addr, int offset, int offset_size, + u8 *data, u16 len) +{ + int ret; + int num_msgs; + u8 offset_bytes[2]; + struct i2c_msg msgs[2]; + + switch (offset_size) { + case 0: + num_msgs = 1; + msgs[0].addr = slave_addr; + msgs[0].flags = 0; + msgs[0].len = len; + msgs[0].buf = data; + break; + case 2: + offset_bytes[1] = (offset >> 8) & 0xff; + /* fall through */ + case 1: + num_msgs = 2; + offset_bytes[0] = offset & 0xff; + + msgs[0].addr = slave_addr; + msgs[0].flags = 0; + msgs[0].len = offset_size; + msgs[0].buf = offset_bytes; + + msgs[1].addr = slave_addr; + msgs[1].flags = I2C_M_NOSTART, + msgs[1].len = len; + msgs[1].buf = data; + break; + default: + return -EINVAL; } - /* Must wait min 20us between qsfp i2c transactions */ - udelay(20); + i2c->controlling_dd = dd; + ret = i2c_transfer(&i2c->adapter, msgs, num_msgs); + if (ret != num_msgs) { + dd_dev_err(dd, "%s: bus %d, i2c slave 0x%x, offset 0x%x, len 0x%x; write failed, ret %d\n", + __func__, i2c->num, slave_addr, offset, len, ret); + return ret < 0 ? ret : -EIO; + } + return 0; +} + +static int i2c_bus_read(struct hfi1_devdata *dd, struct hfi1_i2c_bus *bus, + u8 slave_addr, int offset, int offset_size, + u8 *data, u16 len) +{ + int ret; + int num_msgs; + u8 offset_bytes[2]; + struct i2c_msg msgs[2]; + + switch (offset_size) { + case 0: + num_msgs = 1; + msgs[0].addr = slave_addr; + msgs[0].flags = I2C_M_RD; + msgs[0].len = len; + msgs[0].buf = data; + break; + case 2: + offset_bytes[1] = (offset >> 8) & 0xff; + /* fall through */ + case 1: + num_msgs = 2; + offset_bytes[0] = offset & 0xff; + + msgs[0].addr = slave_addr; + msgs[0].flags = 0; + msgs[0].len = offset_size; + msgs[0].buf = offset_bytes; + + msgs[1].addr = slave_addr; + msgs[1].flags = I2C_M_RD, + msgs[1].len = len; + msgs[1].buf = data; + break; + default: + return -EINVAL; + } - return cnt; + bus->controlling_dd = dd; + ret = i2c_transfer(&bus->adapter, msgs, num_msgs); + if (ret != num_msgs) { + dd_dev_err(dd, "%s: bus %d, i2c slave 0x%x, offset 0x%x, len 0x%x; read failed, ret %d\n", + __func__, bus->num, slave_addr, offset, len, ret); + return ret < 0 ? ret : -EIO; + } + return 0; +} + +/* + * Raw i2c write. No set-up or lock checking. + * + * Return 0 on success, -errno on error. + */ +static int __i2c_write(struct hfi1_pportdata *ppd, u32 target, int i2c_addr, + int offset, void *bp, int len) +{ + struct hfi1_devdata *dd = ppd->dd; + struct hfi1_i2c_bus *bus; + u8 slave_addr; + int offset_size; + + bus = target ? dd->asic_data->i2c_bus1 : dd->asic_data->i2c_bus0; + slave_addr = (i2c_addr & 0xff) >> 1; /* convert to 7-bit addr */ + offset_size = (i2c_addr >> 8) & 0x3; + return i2c_bus_write(dd, bus, slave_addr, offset, offset_size, bp, len); } /* * Caller must hold the i2c chain resource. + * + * Return number of bytes written, or -errno. */ int i2c_write(struct hfi1_pportdata *ppd, u32 target, int i2c_addr, int offset, void *bp, int len) @@ -99,63 +338,36 @@ int i2c_write(struct hfi1_pportdata *ppd, u32 target, int i2c_addr, int offset, if (!check_chip_resource(ppd->dd, i2c_target(target), __func__)) return -EACCES; - /* make sure the TWSI bus is in a sane state */ - ret = hfi1_twsi_reset(ppd->dd, target); - if (ret) { - hfi1_dev_porterr(ppd->dd, ppd->port, - "I2C chain %d write interface reset failed\n", - target); + ret = __i2c_write(ppd, target, i2c_addr, offset, bp, len); + if (ret) return ret; - } - return __i2c_write(ppd, target, i2c_addr, offset, bp, len); + return len; } /* * Raw i2c read. No set-up or lock checking. + * + * Return 0 on success, -errno on error. */ static int __i2c_read(struct hfi1_pportdata *ppd, u32 target, int i2c_addr, int offset, void *bp, int len) { struct hfi1_devdata *dd = ppd->dd; - int ret, cnt, pass = 0; - int orig_offset = offset; - - cnt = 0; - while (cnt < len) { - int rlen = len - cnt; - - ret = hfi1_twsi_blk_rd(dd, target, i2c_addr, offset, - bp + cnt, rlen); - /* Some QSFP's fail first try. Retry as experiment */ - if (ret && cnt == 0 && ++pass < I2C_MAX_RETRY) - continue; - if (ret) { - /* hfi1_twsi_blk_rd() 1 for error, else 0 */ - ret = -EIO; - goto exit; - } - offset += rlen; - cnt += rlen; - } - - ret = cnt; - -exit: - if (ret < 0) { - hfi1_dev_porterr(dd, ppd->port, - "I2C chain %d read failed, addr 0x%x, offset 0x%x, len %d\n", - target, i2c_addr, orig_offset, len); - } - - /* Must wait min 20us between qsfp i2c transactions */ - udelay(20); - - return ret; + struct hfi1_i2c_bus *bus; + u8 slave_addr; + int offset_size; + + bus = target ? dd->asic_data->i2c_bus1 : dd->asic_data->i2c_bus0; + slave_addr = (i2c_addr & 0xff) >> 1; /* convert to 7-bit addr */ + offset_size = (i2c_addr >> 8) & 0x3; + return i2c_bus_read(dd, bus, slave_addr, offset, offset_size, bp, len); } /* * Caller must hold the i2c chain resource. + * + * Return number of bytes read, or -errno. */ int i2c_read(struct hfi1_pportdata *ppd, u32 target, int i2c_addr, int offset, void *bp, int len) @@ -165,16 +377,11 @@ int i2c_read(struct hfi1_pportdata *ppd, u32 target, int i2c_addr, int offset, if (!check_chip_resource(ppd->dd, i2c_target(target), __func__)) return -EACCES; - /* make sure the TWSI bus is in a sane state */ - ret = hfi1_twsi_reset(ppd->dd, target); - if (ret) { - hfi1_dev_porterr(ppd->dd, ppd->port, - "I2C chain %d read interface reset failed\n", - target); + ret = __i2c_read(ppd, target, i2c_addr, offset, bp, len); + if (ret) return ret; - } - return __i2c_read(ppd, target, i2c_addr, offset, bp, len); + return len; } /* @@ -182,6 +389,8 @@ int i2c_read(struct hfi1_pportdata *ppd, u32 target, int i2c_addr, int offset, * by writing @addr = ((256 * n) + m) * * Caller must hold the i2c chain resource. + * + * Return number of bytes written or -errno. */ int qsfp_write(struct hfi1_pportdata *ppd, u32 target, int addr, void *bp, int len) @@ -189,21 +398,12 @@ int qsfp_write(struct hfi1_pportdata *ppd, u32 target, int addr, void *bp, int count = 0; int offset; int nwrite; - int ret; + int ret = 0; u8 page; if (!check_chip_resource(ppd->dd, i2c_target(target), __func__)) return -EACCES; - /* make sure the TWSI bus is in a sane state */ - ret = hfi1_twsi_reset(ppd->dd, target); - if (ret) { - hfi1_dev_porterr(ppd->dd, ppd->port, - "QSFP chain %d write interface reset failed\n", - target); - return ret; - } - while (count < len) { /* * Set the qsfp page based on a zero-based address @@ -213,11 +413,12 @@ int qsfp_write(struct hfi1_pportdata *ppd, u32 target, int addr, void *bp, ret = __i2c_write(ppd, target, QSFP_DEV | QSFP_OFFSET_SIZE, QSFP_PAGE_SELECT_BYTE_OFFS, &page, 1); - if (ret != 1) { + /* QSFPs require a 5-10msec delay after write operations */ + mdelay(5); + if (ret) { hfi1_dev_porterr(ppd->dd, ppd->port, "QSFP chain %d can't write QSFP_PAGE_SELECT_BYTE: %d\n", target, ret); - ret = -EIO; break; } @@ -229,11 +430,13 @@ int qsfp_write(struct hfi1_pportdata *ppd, u32 target, int addr, void *bp, ret = __i2c_write(ppd, target, QSFP_DEV | QSFP_OFFSET_SIZE, offset, bp + count, nwrite); - if (ret <= 0) /* stop on error or nothing written */ + /* QSFPs require a 5-10msec delay after write operations */ + mdelay(5); + if (ret) /* stop on error */ break; - count += ret; - addr += ret; + count += nwrite; + addr += nwrite; } if (ret < 0) @@ -243,7 +446,7 @@ int qsfp_write(struct hfi1_pportdata *ppd, u32 target, int addr, void *bp, /* * Perform a stand-alone single QSFP write. Acquire the resource, do the - * read, then release the resource. + * write, then release the resource. */ int one_qsfp_write(struct hfi1_pportdata *ppd, u32 target, int addr, void *bp, int len) @@ -266,6 +469,8 @@ int one_qsfp_write(struct hfi1_pportdata *ppd, u32 target, int addr, void *bp, * by reading @addr = ((256 * n) + m) * * Caller must hold the i2c chain resource. + * + * Return the number of bytes read or -errno. */ int qsfp_read(struct hfi1_pportdata *ppd, u32 target, int addr, void *bp, int len) @@ -273,21 +478,12 @@ int qsfp_read(struct hfi1_pportdata *ppd, u32 target, int addr, void *bp, int count = 0; int offset; int nread; - int ret; + int ret = 0; u8 page; if (!check_chip_resource(ppd->dd, i2c_target(target), __func__)) return -EACCES; - /* make sure the TWSI bus is in a sane state */ - ret = hfi1_twsi_reset(ppd->dd, target); - if (ret) { - hfi1_dev_porterr(ppd->dd, ppd->port, - "QSFP chain %d read interface reset failed\n", - target); - return ret; - } - while (count < len) { /* * Set the qsfp page based on a zero-based address @@ -296,11 +492,12 @@ int qsfp_read(struct hfi1_pportdata *ppd, u32 target, int addr, void *bp, page = (u8)(addr / QSFP_PAGESIZE); ret = __i2c_write(ppd, target, QSFP_DEV | QSFP_OFFSET_SIZE, QSFP_PAGE_SELECT_BYTE_OFFS, &page, 1); - if (ret != 1) { + /* QSFPs require a 5-10msec delay after write operations */ + mdelay(5); + if (ret) { hfi1_dev_porterr(ppd->dd, ppd->port, "QSFP chain %d can't write QSFP_PAGE_SELECT_BYTE: %d\n", target, ret); - ret = -EIO; break; } @@ -310,15 +507,13 @@ int qsfp_read(struct hfi1_pportdata *ppd, u32 target, int addr, void *bp, if (((addr % QSFP_RW_BOUNDARY) + nread) > QSFP_RW_BOUNDARY) nread = QSFP_RW_BOUNDARY - (addr % QSFP_RW_BOUNDARY); - /* QSFPs require a 5-10msec delay after write operations */ - mdelay(5); ret = __i2c_read(ppd, target, QSFP_DEV | QSFP_OFFSET_SIZE, offset, bp + count, nread); - if (ret <= 0) /* stop on error or nothing read */ + if (ret) /* stop on error */ break; - count += ret; - addr += ret; + count += nread; + addr += nread; } if (ret < 0) diff --git a/drivers/infiniband/hw/hfi1/qsfp.h b/drivers/infiniband/hw/hfi1/qsfp.h index dadc66c442b9..69275ebd9597 100644 --- a/drivers/infiniband/hw/hfi1/qsfp.h +++ b/drivers/infiniband/hw/hfi1/qsfp.h @@ -238,3 +238,6 @@ int one_qsfp_write(struct hfi1_pportdata *ppd, u32 target, int addr, void *bp, int len); int one_qsfp_read(struct hfi1_pportdata *ppd, u32 target, int addr, void *bp, int len); +struct hfi1_asic_data; +int set_up_i2c(struct hfi1_devdata *dd, struct hfi1_asic_data *ad); +void clean_up_i2c(struct hfi1_devdata *dd, struct hfi1_asic_data *ad); diff --git a/drivers/infiniband/hw/hfi1/rc.c b/drivers/infiniband/hw/hfi1/rc.c index 792f15eb8efe..5da190e6011b 100644 --- a/drivers/infiniband/hw/hfi1/rc.c +++ b/drivers/infiniband/hw/hfi1/rc.c @@ -477,6 +477,37 @@ int hfi1_make_rc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps) qp->s_flags |= RVT_S_WAIT_FENCE; goto bail; } + /* + * Local operations are processed immediately + * after all prior requests have completed + */ + if (wqe->wr.opcode == IB_WR_REG_MR || + wqe->wr.opcode == IB_WR_LOCAL_INV) { + int local_ops = 0; + int err = 0; + + if (qp->s_last != qp->s_cur) + goto bail; + if (++qp->s_cur == qp->s_size) + qp->s_cur = 0; + if (++qp->s_tail == qp->s_size) + qp->s_tail = 0; + if (!(wqe->wr.send_flags & + RVT_SEND_COMPLETION_ONLY)) { + err = rvt_invalidate_rkey( + qp, + wqe->wr.ex.invalidate_rkey); + local_ops = 1; + } + hfi1_send_complete(qp, wqe, + err ? IB_WC_LOC_PROT_ERR + : IB_WC_SUCCESS); + if (local_ops) + atomic_dec(&qp->local_ops_pending); + qp->s_hdrwords = 0; + goto done_free_tx; + } + newreq = 1; qp->s_psn = wqe->psn; } @@ -491,6 +522,7 @@ int hfi1_make_rc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps) switch (wqe->wr.opcode) { case IB_WR_SEND: case IB_WR_SEND_WITH_IMM: + case IB_WR_SEND_WITH_INV: /* If no credit, return. */ if (!(qp->s_flags & RVT_S_UNLIMITED_CREDIT) && cmp_msn(wqe->ssn, qp->s_lsn + 1) > 0) { @@ -504,11 +536,17 @@ int hfi1_make_rc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps) } if (wqe->wr.opcode == IB_WR_SEND) { qp->s_state = OP(SEND_ONLY); - } else { + } else if (wqe->wr.opcode == IB_WR_SEND_WITH_IMM) { qp->s_state = OP(SEND_ONLY_WITH_IMMEDIATE); /* Immediate data comes after the BTH */ ohdr->u.imm_data = wqe->wr.ex.imm_data; hwords += 1; + } else { + qp->s_state = OP(SEND_ONLY_WITH_INVALIDATE); + /* Invalidate rkey comes after the BTH */ + ohdr->u.ieth = cpu_to_be32( + wqe->wr.ex.invalidate_rkey); + hwords += 1; } if (wqe->wr.send_flags & IB_SEND_SOLICITED) bth0 |= IB_BTH_SOLICITED; @@ -671,11 +709,16 @@ int hfi1_make_rc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps) } if (wqe->wr.opcode == IB_WR_SEND) { qp->s_state = OP(SEND_LAST); - } else { + } else if (wqe->wr.opcode == IB_WR_SEND_WITH_IMM) { qp->s_state = OP(SEND_LAST_WITH_IMMEDIATE); /* Immediate data comes after the BTH */ ohdr->u.imm_data = wqe->wr.ex.imm_data; hwords += 1; + } else { + qp->s_state = OP(SEND_LAST_WITH_INVALIDATE); + /* invalidate data comes after the BTH */ + ohdr->u.ieth = cpu_to_be32(wqe->wr.ex.invalidate_rkey); + hwords += 1; } if (wqe->wr.send_flags & IB_SEND_SOLICITED) bth0 |= IB_BTH_SOLICITED; @@ -1047,7 +1090,7 @@ void hfi1_rc_timeout(unsigned long arg) ibp->rvp.n_rc_timeouts++; qp->s_flags &= ~RVT_S_TIMER; del_timer(&qp->s_timer); - trace_hfi1_rc_timeout(qp, qp->s_last_psn + 1); + trace_hfi1_timeout(qp, qp->s_last_psn + 1); restart_rc(qp, qp->s_last_psn + 1, 1); hfi1_schedule_send(qp); } @@ -1171,7 +1214,7 @@ void hfi1_rc_send_complete(struct rvt_qp *qp, struct hfi1_ib_header *hdr) * If we were waiting for sends to complete before re-sending, * and they are now complete, restart sending. */ - trace_hfi1_rc_sendcomplete(qp, psn); + trace_hfi1_sendcomplete(qp, psn); if (qp->s_flags & RVT_S_WAIT_PSN && cmp_psn(qp->s_sending_psn, qp->s_sending_hpsn) > 0) { qp->s_flags &= ~RVT_S_WAIT_PSN; @@ -1567,7 +1610,7 @@ static void rc_rcv_resp(struct hfi1_ibport *ibp, spin_lock_irqsave(&qp->s_lock, flags); - trace_hfi1_rc_ack(qp, psn); + trace_hfi1_ack(qp, psn); /* Ignore invalid responses. */ smp_read_barrier_depends(); /* see post_one_send */ @@ -1782,7 +1825,7 @@ static noinline int rc_rcv_error(struct hfi1_other_headers *ohdr, void *data, u8 i, prev; int old_req; - trace_hfi1_rc_rcv_error(qp, psn); + trace_hfi1_rcv_error(qp, psn); if (diff > 0) { /* * Packet sequence error. @@ -2086,7 +2129,6 @@ void hfi1_rc_rcv(struct hfi1_packet *packet) u32 tlen = packet->tlen; struct rvt_qp *qp = packet->qp; struct hfi1_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num); - struct hfi1_pportdata *ppd = ppd_from_ibp(ibp); struct hfi1_other_headers *ohdr = packet->ohdr; u32 bth0, opcode; u32 hdrsize = packet->hlen; @@ -2097,30 +2139,15 @@ void hfi1_rc_rcv(struct hfi1_packet *packet) int diff; struct ib_reth *reth; unsigned long flags; - u32 bth1; int ret, is_fecn = 0; int copy_last = 0; + u32 rkey; bth0 = be32_to_cpu(ohdr->bth[0]); if (hfi1_ruc_check_hdr(ibp, hdr, rcv_flags & HFI1_HAS_GRH, qp, bth0)) return; - bth1 = be32_to_cpu(ohdr->bth[1]); - if (unlikely(bth1 & (HFI1_BECN_SMASK | HFI1_FECN_SMASK))) { - if (bth1 & HFI1_BECN_SMASK) { - u16 rlid = qp->remote_ah_attr.dlid; - u32 lqpn, rqpn; - - lqpn = qp->ibqp.qp_num; - rqpn = qp->remote_qpn; - process_becn( - ppd, - qp->remote_ah_attr.sl, - rlid, lqpn, rqpn, - IB_CC_SVCTYPE_RC); - } - is_fecn = bth1 & HFI1_FECN_SMASK; - } + is_fecn = process_ecn(qp, packet, false); psn = be32_to_cpu(ohdr->bth[2]); opcode = (bth0 >> 24) & 0xff; @@ -2154,7 +2181,8 @@ void hfi1_rc_rcv(struct hfi1_packet *packet) case OP(SEND_MIDDLE): if (opcode == OP(SEND_MIDDLE) || opcode == OP(SEND_LAST) || - opcode == OP(SEND_LAST_WITH_IMMEDIATE)) + opcode == OP(SEND_LAST_WITH_IMMEDIATE) || + opcode == OP(SEND_LAST_WITH_INVALIDATE)) break; goto nack_inv; @@ -2170,6 +2198,7 @@ void hfi1_rc_rcv(struct hfi1_packet *packet) if (opcode == OP(SEND_MIDDLE) || opcode == OP(SEND_LAST) || opcode == OP(SEND_LAST_WITH_IMMEDIATE) || + opcode == OP(SEND_LAST_WITH_INVALIDATE) || opcode == OP(RDMA_WRITE_MIDDLE) || opcode == OP(RDMA_WRITE_LAST) || opcode == OP(RDMA_WRITE_LAST_WITH_IMMEDIATE)) @@ -2218,6 +2247,7 @@ send_middle: case OP(SEND_ONLY): case OP(SEND_ONLY_WITH_IMMEDIATE): + case OP(SEND_ONLY_WITH_INVALIDATE): ret = hfi1_rvt_get_rwqe(qp, 0); if (ret < 0) goto nack_op_err; @@ -2226,12 +2256,22 @@ send_middle: qp->r_rcv_len = 0; if (opcode == OP(SEND_ONLY)) goto no_immediate_data; + if (opcode == OP(SEND_ONLY_WITH_INVALIDATE)) + goto send_last_inv; /* FALLTHROUGH for SEND_ONLY_WITH_IMMEDIATE */ case OP(SEND_LAST_WITH_IMMEDIATE): send_last_imm: wc.ex.imm_data = ohdr->u.imm_data; wc.wc_flags = IB_WC_WITH_IMM; goto send_last; + case OP(SEND_LAST_WITH_INVALIDATE): +send_last_inv: + rkey = be32_to_cpu(ohdr->u.ieth); + if (rvt_invalidate_rkey(qp, rkey)) + goto no_immediate_data; + wc.ex.invalidate_rkey = rkey; + wc.wc_flags = IB_WC_WITH_INVALIDATE; + goto send_last; case OP(RDMA_WRITE_LAST): copy_last = ibpd_to_rvtpd(qp->ibqp.pd)->user; /* fall through */ diff --git a/drivers/infiniband/hw/hfi1/ruc.c b/drivers/infiniband/hw/hfi1/ruc.c index a659aec3c3c6..48d5094f98e2 100644 --- a/drivers/infiniband/hw/hfi1/ruc.c +++ b/drivers/infiniband/hw/hfi1/ruc.c @@ -372,6 +372,7 @@ static void ruc_loopback(struct rvt_qp *sqp) int ret; int copy_last = 0; u32 to; + int local_ops = 0; rcu_read_lock(); @@ -440,11 +441,31 @@ again: sqp->s_sge.num_sge = wqe->wr.num_sge; sqp->s_len = wqe->length; switch (wqe->wr.opcode) { + case IB_WR_REG_MR: + goto send_comp; + + case IB_WR_LOCAL_INV: + if (!(wqe->wr.send_flags & RVT_SEND_COMPLETION_ONLY)) { + if (rvt_invalidate_rkey(sqp, + wqe->wr.ex.invalidate_rkey)) + send_status = IB_WC_LOC_PROT_ERR; + local_ops = 1; + } + goto send_comp; + + case IB_WR_SEND_WITH_INV: + if (!rvt_invalidate_rkey(qp, wqe->wr.ex.invalidate_rkey)) { + wc.wc_flags = IB_WC_WITH_INVALIDATE; + wc.ex.invalidate_rkey = wqe->wr.ex.invalidate_rkey; + } + goto send; + case IB_WR_SEND_WITH_IMM: wc.wc_flags = IB_WC_WITH_IMM; wc.ex.imm_data = wqe->wr.ex.imm_data; /* FALLTHROUGH */ case IB_WR_SEND: +send: ret = hfi1_rvt_get_rwqe(qp, 0); if (ret < 0) goto op_err; @@ -583,6 +604,10 @@ send_comp: flush_send: sqp->s_rnr_retry = sqp->s_rnr_retry_cnt; hfi1_send_complete(sqp, wqe, send_status); + if (local_ops) { + atomic_dec(&sqp->local_ops_pending); + local_ops = 0; + } goto again; rnr_nak: @@ -683,10 +708,10 @@ u32 hfi1_make_grh(struct hfi1_ibport *ibp, struct ib_grh *hdr, return sizeof(struct ib_grh) / sizeof(u32); } -#define BTH2_OFFSET (offsetof(struct hfi1_pio_header, hdr.u.oth.bth[2]) / 4) +#define BTH2_OFFSET (offsetof(struct hfi1_sdma_header, hdr.u.oth.bth[2]) / 4) /** - * build_ahg - create ahg in s_hdr + * build_ahg - create ahg in s_ahg * @qp: a pointer to QP * @npsn: the next PSN for the request/response * @@ -708,19 +733,18 @@ static inline void build_ahg(struct rvt_qp *qp, u32 npsn) qp->s_ahgidx = sdma_ahg_alloc(priv->s_sde); if (qp->s_ahgidx >= 0) { qp->s_ahgpsn = npsn; - priv->s_hdr->tx_flags |= SDMA_TXREQ_F_AHG_COPY; + priv->s_ahg->tx_flags |= SDMA_TXREQ_F_AHG_COPY; /* save to protect a change in another thread */ - priv->s_hdr->sde = priv->s_sde; - priv->s_hdr->ahgidx = qp->s_ahgidx; + priv->s_ahg->ahgidx = qp->s_ahgidx; qp->s_flags |= RVT_S_AHG_VALID; } } else { /* subsequent middle after valid */ if (qp->s_ahgidx >= 0) { - priv->s_hdr->tx_flags |= SDMA_TXREQ_F_USE_AHG; - priv->s_hdr->ahgidx = qp->s_ahgidx; - priv->s_hdr->ahgcount++; - priv->s_hdr->ahgdesc[0] = + priv->s_ahg->tx_flags |= SDMA_TXREQ_F_USE_AHG; + priv->s_ahg->ahgidx = qp->s_ahgidx; + priv->s_ahg->ahgcount++; + priv->s_ahg->ahgdesc[0] = sdma_build_ahg_descriptor( (__force u16)cpu_to_be16((u16)npsn), BTH2_OFFSET, @@ -728,8 +752,8 @@ static inline void build_ahg(struct rvt_qp *qp, u32 npsn) 16); if ((npsn & 0xffff0000) != (qp->s_ahgpsn & 0xffff0000)) { - priv->s_hdr->ahgcount++; - priv->s_hdr->ahgdesc[1] = + priv->s_ahg->ahgcount++; + priv->s_ahg->ahgdesc[1] = sdma_build_ahg_descriptor( (__force u16)cpu_to_be16( (u16)(npsn >> 16)), @@ -766,7 +790,7 @@ void hfi1_make_ruc_header(struct rvt_qp *qp, struct hfi1_other_headers *ohdr, } lrh0 |= (priv->s_sc & 0xf) << 12 | (qp->remote_ah_attr.sl & 0xf) << 4; /* - * reset s_hdr/AHG fields + * reset s_ahg/AHG fields * * This insures that the ahgentry/ahgcount * are at a non-AHG default to protect @@ -776,10 +800,9 @@ void hfi1_make_ruc_header(struct rvt_qp *qp, struct hfi1_other_headers *ohdr, * build_ahg() will modify as appropriate * to use the AHG feature. */ - priv->s_hdr->tx_flags = 0; - priv->s_hdr->ahgcount = 0; - priv->s_hdr->ahgidx = 0; - priv->s_hdr->sde = NULL; + priv->s_ahg->tx_flags = 0; + priv->s_ahg->ahgcount = 0; + priv->s_ahg->ahgidx = 0; if (qp->s_mig_state == IB_MIG_MIGRATED) bth0 |= IB_BTH_MIG_REQ; else @@ -890,7 +913,7 @@ void hfi1_do_send(struct rvt_qp *qp) */ if (hfi1_verbs_send(qp, &ps)) return; - /* Record that s_hdr is empty. */ + /* Record that s_ahg is empty. */ qp->s_hdrwords = 0; /* allow other tasks to run */ if (unlikely(time_after(jiffies, timeout))) { diff --git a/drivers/infiniband/hw/hfi1/sysfs.c b/drivers/infiniband/hw/hfi1/sysfs.c index 91fc2aed6aed..74c84c655f7e 100644 --- a/drivers/infiniband/hw/hfi1/sysfs.c +++ b/drivers/infiniband/hw/hfi1/sysfs.c @@ -49,6 +49,7 @@ #include "hfi.h" #include "mad.h" #include "trace.h" +#include "affinity.h" /* * Start of per-port congestion control structures and support code @@ -622,6 +623,27 @@ static ssize_t show_tempsense(struct device *device, return ret; } +static ssize_t show_sdma_affinity(struct device *device, + struct device_attribute *attr, char *buf) +{ + struct hfi1_ibdev *dev = + container_of(device, struct hfi1_ibdev, rdi.ibdev.dev); + struct hfi1_devdata *dd = dd_from_dev(dev); + + return hfi1_get_sdma_affinity(dd, buf); +} + +static ssize_t store_sdma_affinity(struct device *device, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct hfi1_ibdev *dev = + container_of(device, struct hfi1_ibdev, rdi.ibdev.dev); + struct hfi1_devdata *dd = dd_from_dev(dev); + + return hfi1_set_sdma_affinity(dd, buf, count); +} + /* * end of per-unit (or driver, in some cases, but replicated * per unit) functions @@ -636,6 +658,8 @@ static DEVICE_ATTR(serial, S_IRUGO, show_serial, NULL); static DEVICE_ATTR(boardversion, S_IRUGO, show_boardversion, NULL); static DEVICE_ATTR(tempsense, S_IRUGO, show_tempsense, NULL); static DEVICE_ATTR(chip_reset, S_IWUSR, NULL, store_chip_reset); +static DEVICE_ATTR(sdma_affinity, S_IWUSR | S_IRUGO, show_sdma_affinity, + store_sdma_affinity); static struct device_attribute *hfi1_attributes[] = { &dev_attr_hw_rev, @@ -646,6 +670,7 @@ static struct device_attribute *hfi1_attributes[] = { &dev_attr_boardversion, &dev_attr_tempsense, &dev_attr_chip_reset, + &dev_attr_sdma_affinity, }; int hfi1_create_port_files(struct ib_device *ibdev, u8 port_num, diff --git a/drivers/infiniband/hw/hfi1/trace.h b/drivers/infiniband/hw/hfi1/trace.h index 28c1d0832886..92dc88f013c9 100644 --- a/drivers/infiniband/hw/hfi1/trace.h +++ b/drivers/infiniband/hw/hfi1/trace.h @@ -44,1329 +44,10 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * */ -#undef TRACE_SYSTEM_VAR -#define TRACE_SYSTEM_VAR hfi1 - -#if !defined(__HFI1_TRACE_H) || defined(TRACE_HEADER_MULTI_READ) -#define __HFI1_TRACE_H - -#include <linux/tracepoint.h> -#include <linux/trace_seq.h> - -#include "hfi.h" -#include "mad.h" -#include "sdma.h" - -#define DD_DEV_ENTRY(dd) __string(dev, dev_name(&(dd)->pcidev->dev)) -#define DD_DEV_ASSIGN(dd) __assign_str(dev, dev_name(&(dd)->pcidev->dev)) - -#define packettype_name(etype) { RHF_RCV_TYPE_##etype, #etype } -#define show_packettype(etype) \ -__print_symbolic(etype, \ - packettype_name(EXPECTED), \ - packettype_name(EAGER), \ - packettype_name(IB), \ - packettype_name(ERROR), \ - packettype_name(BYPASS)) - -#undef TRACE_SYSTEM -#define TRACE_SYSTEM hfi1_rx - -TRACE_EVENT(hfi1_rcvhdr, - TP_PROTO(struct hfi1_devdata *dd, - u32 ctxt, - u64 eflags, - u32 etype, - u32 hlen, - u32 tlen, - u32 updegr, - u32 etail - ), - TP_ARGS(dd, ctxt, eflags, etype, hlen, tlen, updegr, etail), - TP_STRUCT__entry(DD_DEV_ENTRY(dd) - __field(u64, eflags) - __field(u32, ctxt) - __field(u32, etype) - __field(u32, hlen) - __field(u32, tlen) - __field(u32, updegr) - __field(u32, etail) - ), - TP_fast_assign(DD_DEV_ASSIGN(dd); - __entry->eflags = eflags; - __entry->ctxt = ctxt; - __entry->etype = etype; - __entry->hlen = hlen; - __entry->tlen = tlen; - __entry->updegr = updegr; - __entry->etail = etail; - ), - TP_printk( - "[%s] ctxt %d eflags 0x%llx etype %d,%s hlen %d tlen %d updegr %d etail %d", - __get_str(dev), - __entry->ctxt, - __entry->eflags, - __entry->etype, show_packettype(__entry->etype), - __entry->hlen, - __entry->tlen, - __entry->updegr, - __entry->etail - ) -); - -TRACE_EVENT(hfi1_receive_interrupt, - TP_PROTO(struct hfi1_devdata *dd, u32 ctxt), - TP_ARGS(dd, ctxt), - TP_STRUCT__entry(DD_DEV_ENTRY(dd) - __field(u32, ctxt) - __field(u8, slow_path) - __field(u8, dma_rtail) - ), - TP_fast_assign(DD_DEV_ASSIGN(dd); - __entry->ctxt = ctxt; - if (dd->rcd[ctxt]->do_interrupt == - &handle_receive_interrupt) { - __entry->slow_path = 1; - __entry->dma_rtail = 0xFF; - } else if (dd->rcd[ctxt]->do_interrupt == - &handle_receive_interrupt_dma_rtail){ - __entry->dma_rtail = 1; - __entry->slow_path = 0; - } else if (dd->rcd[ctxt]->do_interrupt == - &handle_receive_interrupt_nodma_rtail) { - __entry->dma_rtail = 0; - __entry->slow_path = 0; - } - ), - TP_printk("[%s] ctxt %d SlowPath: %d DmaRtail: %d", - __get_str(dev), - __entry->ctxt, - __entry->slow_path, - __entry->dma_rtail - ) -); - -TRACE_EVENT(hfi1_exp_tid_reg, - TP_PROTO(unsigned ctxt, u16 subctxt, u32 rarr, - u32 npages, unsigned long va, unsigned long pa, - dma_addr_t dma), - TP_ARGS(ctxt, subctxt, rarr, npages, va, pa, dma), - TP_STRUCT__entry( - __field(unsigned, ctxt) - __field(u16, subctxt) - __field(u32, rarr) - __field(u32, npages) - __field(unsigned long, va) - __field(unsigned long, pa) - __field(dma_addr_t, dma) - ), - TP_fast_assign( - __entry->ctxt = ctxt; - __entry->subctxt = subctxt; - __entry->rarr = rarr; - __entry->npages = npages; - __entry->va = va; - __entry->pa = pa; - __entry->dma = dma; - ), - TP_printk("[%u:%u] entry:%u, %u pages @ 0x%lx, va:0x%lx dma:0x%llx", - __entry->ctxt, - __entry->subctxt, - __entry->rarr, - __entry->npages, - __entry->pa, - __entry->va, - __entry->dma - ) - ); - -TRACE_EVENT(hfi1_exp_tid_unreg, - TP_PROTO(unsigned ctxt, u16 subctxt, u32 rarr, u32 npages, - unsigned long va, unsigned long pa, dma_addr_t dma), - TP_ARGS(ctxt, subctxt, rarr, npages, va, pa, dma), - TP_STRUCT__entry( - __field(unsigned, ctxt) - __field(u16, subctxt) - __field(u32, rarr) - __field(u32, npages) - __field(unsigned long, va) - __field(unsigned long, pa) - __field(dma_addr_t, dma) - ), - TP_fast_assign( - __entry->ctxt = ctxt; - __entry->subctxt = subctxt; - __entry->rarr = rarr; - __entry->npages = npages; - __entry->va = va; - __entry->pa = pa; - __entry->dma = dma; - ), - TP_printk("[%u:%u] entry:%u, %u pages @ 0x%lx, va:0x%lx dma:0x%llx", - __entry->ctxt, - __entry->subctxt, - __entry->rarr, - __entry->npages, - __entry->pa, - __entry->va, - __entry->dma - ) - ); - -TRACE_EVENT(hfi1_exp_tid_inval, - TP_PROTO(unsigned ctxt, u16 subctxt, unsigned long va, u32 rarr, - u32 npages, dma_addr_t dma), - TP_ARGS(ctxt, subctxt, va, rarr, npages, dma), - TP_STRUCT__entry( - __field(unsigned, ctxt) - __field(u16, subctxt) - __field(unsigned long, va) - __field(u32, rarr) - __field(u32, npages) - __field(dma_addr_t, dma) - ), - TP_fast_assign( - __entry->ctxt = ctxt; - __entry->subctxt = subctxt; - __entry->va = va; - __entry->rarr = rarr; - __entry->npages = npages; - __entry->dma = dma; - ), - TP_printk("[%u:%u] entry:%u, %u pages @ 0x%lx dma: 0x%llx", - __entry->ctxt, - __entry->subctxt, - __entry->rarr, - __entry->npages, - __entry->va, - __entry->dma - ) - ); - -TRACE_EVENT(hfi1_mmu_invalidate, - TP_PROTO(unsigned ctxt, u16 subctxt, const char *type, - unsigned long start, unsigned long end), - TP_ARGS(ctxt, subctxt, type, start, end), - TP_STRUCT__entry( - __field(unsigned, ctxt) - __field(u16, subctxt) - __string(type, type) - __field(unsigned long, start) - __field(unsigned long, end) - ), - TP_fast_assign( - __entry->ctxt = ctxt; - __entry->subctxt = subctxt; - __assign_str(type, type); - __entry->start = start; - __entry->end = end; - ), - TP_printk("[%3u:%02u] MMU Invalidate (%s) 0x%lx - 0x%lx", - __entry->ctxt, - __entry->subctxt, - __get_str(type), - __entry->start, - __entry->end - ) - ); - -#undef TRACE_SYSTEM -#define TRACE_SYSTEM hfi1_tx - -TRACE_EVENT(hfi1_piofree, - TP_PROTO(struct send_context *sc, int extra), - TP_ARGS(sc, extra), - TP_STRUCT__entry(DD_DEV_ENTRY(sc->dd) - __field(u32, sw_index) - __field(u32, hw_context) - __field(int, extra) - ), - TP_fast_assign(DD_DEV_ASSIGN(sc->dd); - __entry->sw_index = sc->sw_index; - __entry->hw_context = sc->hw_context; - __entry->extra = extra; - ), - TP_printk("[%s] ctxt %u(%u) extra %d", - __get_str(dev), - __entry->sw_index, - __entry->hw_context, - __entry->extra - ) -); - -TRACE_EVENT(hfi1_wantpiointr, - TP_PROTO(struct send_context *sc, u32 needint, u64 credit_ctrl), - TP_ARGS(sc, needint, credit_ctrl), - TP_STRUCT__entry(DD_DEV_ENTRY(sc->dd) - __field(u32, sw_index) - __field(u32, hw_context) - __field(u32, needint) - __field(u64, credit_ctrl) - ), - TP_fast_assign(DD_DEV_ASSIGN(sc->dd); - __entry->sw_index = sc->sw_index; - __entry->hw_context = sc->hw_context; - __entry->needint = needint; - __entry->credit_ctrl = credit_ctrl; - ), - TP_printk("[%s] ctxt %u(%u) on %d credit_ctrl 0x%llx", - __get_str(dev), - __entry->sw_index, - __entry->hw_context, - __entry->needint, - (unsigned long long)__entry->credit_ctrl - ) -); - -DECLARE_EVENT_CLASS(hfi1_qpsleepwakeup_template, - TP_PROTO(struct rvt_qp *qp, u32 flags), - TP_ARGS(qp, flags), - TP_STRUCT__entry( - DD_DEV_ENTRY(dd_from_ibdev(qp->ibqp.device)) - __field(u32, qpn) - __field(u32, flags) - __field(u32, s_flags) - ), - TP_fast_assign( - DD_DEV_ASSIGN(dd_from_ibdev(qp->ibqp.device)) - __entry->flags = flags; - __entry->qpn = qp->ibqp.qp_num; - __entry->s_flags = qp->s_flags; - ), - TP_printk( - "[%s] qpn 0x%x flags 0x%x s_flags 0x%x", - __get_str(dev), - __entry->qpn, - __entry->flags, - __entry->s_flags - ) -); - -DEFINE_EVENT(hfi1_qpsleepwakeup_template, hfi1_qpwakeup, - TP_PROTO(struct rvt_qp *qp, u32 flags), - TP_ARGS(qp, flags)); - -DEFINE_EVENT(hfi1_qpsleepwakeup_template, hfi1_qpsleep, - TP_PROTO(struct rvt_qp *qp, u32 flags), - TP_ARGS(qp, flags)); - -#undef TRACE_SYSTEM -#define TRACE_SYSTEM hfi1_ibhdrs - -u8 ibhdr_exhdr_len(struct hfi1_ib_header *hdr); -const char *parse_everbs_hdrs(struct trace_seq *p, u8 opcode, void *ehdrs); - -#define __parse_ib_ehdrs(op, ehdrs) parse_everbs_hdrs(p, op, ehdrs) - -const char *parse_sdma_flags(struct trace_seq *p, u64 desc0, u64 desc1); - -#define __parse_sdma_flags(desc0, desc1) parse_sdma_flags(p, desc0, desc1) - -#define lrh_name(lrh) { HFI1_##lrh, #lrh } -#define show_lnh(lrh) \ -__print_symbolic(lrh, \ - lrh_name(LRH_BTH), \ - lrh_name(LRH_GRH)) - -#define ib_opcode_name(opcode) { IB_OPCODE_##opcode, #opcode } -#define show_ib_opcode(opcode) \ -__print_symbolic(opcode, \ - ib_opcode_name(RC_SEND_FIRST), \ - ib_opcode_name(RC_SEND_MIDDLE), \ - ib_opcode_name(RC_SEND_LAST), \ - ib_opcode_name(RC_SEND_LAST_WITH_IMMEDIATE), \ - ib_opcode_name(RC_SEND_ONLY), \ - ib_opcode_name(RC_SEND_ONLY_WITH_IMMEDIATE), \ - ib_opcode_name(RC_RDMA_WRITE_FIRST), \ - ib_opcode_name(RC_RDMA_WRITE_MIDDLE), \ - ib_opcode_name(RC_RDMA_WRITE_LAST), \ - ib_opcode_name(RC_RDMA_WRITE_LAST_WITH_IMMEDIATE), \ - ib_opcode_name(RC_RDMA_WRITE_ONLY), \ - ib_opcode_name(RC_RDMA_WRITE_ONLY_WITH_IMMEDIATE), \ - ib_opcode_name(RC_RDMA_READ_REQUEST), \ - ib_opcode_name(RC_RDMA_READ_RESPONSE_FIRST), \ - ib_opcode_name(RC_RDMA_READ_RESPONSE_MIDDLE), \ - ib_opcode_name(RC_RDMA_READ_RESPONSE_LAST), \ - ib_opcode_name(RC_RDMA_READ_RESPONSE_ONLY), \ - ib_opcode_name(RC_ACKNOWLEDGE), \ - ib_opcode_name(RC_ATOMIC_ACKNOWLEDGE), \ - ib_opcode_name(RC_COMPARE_SWAP), \ - ib_opcode_name(RC_FETCH_ADD), \ - ib_opcode_name(RC_SEND_LAST_WITH_INVALIDATE), \ - ib_opcode_name(RC_SEND_ONLY_WITH_INVALIDATE), \ - ib_opcode_name(UC_SEND_FIRST), \ - ib_opcode_name(UC_SEND_MIDDLE), \ - ib_opcode_name(UC_SEND_LAST), \ - ib_opcode_name(UC_SEND_LAST_WITH_IMMEDIATE), \ - ib_opcode_name(UC_SEND_ONLY), \ - ib_opcode_name(UC_SEND_ONLY_WITH_IMMEDIATE), \ - ib_opcode_name(UC_RDMA_WRITE_FIRST), \ - ib_opcode_name(UC_RDMA_WRITE_MIDDLE), \ - ib_opcode_name(UC_RDMA_WRITE_LAST), \ - ib_opcode_name(UC_RDMA_WRITE_LAST_WITH_IMMEDIATE), \ - ib_opcode_name(UC_RDMA_WRITE_ONLY), \ - ib_opcode_name(UC_RDMA_WRITE_ONLY_WITH_IMMEDIATE), \ - ib_opcode_name(UD_SEND_ONLY), \ - ib_opcode_name(UD_SEND_ONLY_WITH_IMMEDIATE), \ - ib_opcode_name(CNP)) - -#define LRH_PRN "vl %d lver %d sl %d lnh %d,%s dlid %.4x len %d slid %.4x" -#define BTH_PRN \ - "op 0x%.2x,%s se %d m %d pad %d tver %d pkey 0x%.4x " \ - "f %d b %d qpn 0x%.6x a %d psn 0x%.8x" -#define EHDR_PRN "%s" - -DECLARE_EVENT_CLASS(hfi1_ibhdr_template, - TP_PROTO(struct hfi1_devdata *dd, - struct hfi1_ib_header *hdr), - TP_ARGS(dd, hdr), - TP_STRUCT__entry( - DD_DEV_ENTRY(dd) - /* LRH */ - __field(u8, vl) - __field(u8, lver) - __field(u8, sl) - __field(u8, lnh) - __field(u16, dlid) - __field(u16, len) - __field(u16, slid) - /* BTH */ - __field(u8, opcode) - __field(u8, se) - __field(u8, m) - __field(u8, pad) - __field(u8, tver) - __field(u16, pkey) - __field(u8, f) - __field(u8, b) - __field(u32, qpn) - __field(u8, a) - __field(u32, psn) - /* extended headers */ - __dynamic_array(u8, ehdrs, ibhdr_exhdr_len(hdr)) - ), - TP_fast_assign( - struct hfi1_other_headers *ohdr; - - DD_DEV_ASSIGN(dd); - /* LRH */ - __entry->vl = - (u8)(be16_to_cpu(hdr->lrh[0]) >> 12); - __entry->lver = - (u8)(be16_to_cpu(hdr->lrh[0]) >> 8) & 0xf; - __entry->sl = - (u8)(be16_to_cpu(hdr->lrh[0]) >> 4) & 0xf; - __entry->lnh = - (u8)(be16_to_cpu(hdr->lrh[0]) & 3); - __entry->dlid = - be16_to_cpu(hdr->lrh[1]); - /* allow for larger len */ - __entry->len = - be16_to_cpu(hdr->lrh[2]); - __entry->slid = - be16_to_cpu(hdr->lrh[3]); - /* BTH */ - if (__entry->lnh == HFI1_LRH_BTH) - ohdr = &hdr->u.oth; - else - ohdr = &hdr->u.l.oth; - __entry->opcode = - (be32_to_cpu(ohdr->bth[0]) >> 24) & 0xff; - __entry->se = - (be32_to_cpu(ohdr->bth[0]) >> 23) & 1; - __entry->m = - (be32_to_cpu(ohdr->bth[0]) >> 22) & 1; - __entry->pad = - (be32_to_cpu(ohdr->bth[0]) >> 20) & 3; - __entry->tver = - (be32_to_cpu(ohdr->bth[0]) >> 16) & 0xf; - __entry->pkey = - be32_to_cpu(ohdr->bth[0]) & 0xffff; - __entry->f = - (be32_to_cpu(ohdr->bth[1]) >> HFI1_FECN_SHIFT) & - HFI1_FECN_MASK; - __entry->b = - (be32_to_cpu(ohdr->bth[1]) >> HFI1_BECN_SHIFT) & - HFI1_BECN_MASK; - __entry->qpn = - be32_to_cpu(ohdr->bth[1]) & RVT_QPN_MASK; - __entry->a = - (be32_to_cpu(ohdr->bth[2]) >> 31) & 1; - /* allow for larger PSN */ - __entry->psn = - be32_to_cpu(ohdr->bth[2]) & 0x7fffffff; - /* extended headers */ - memcpy(__get_dynamic_array(ehdrs), &ohdr->u, - ibhdr_exhdr_len(hdr)); - ), - TP_printk("[%s] " LRH_PRN " " BTH_PRN " " EHDR_PRN, - __get_str(dev), - /* LRH */ - __entry->vl, - __entry->lver, - __entry->sl, - __entry->lnh, show_lnh(__entry->lnh), - __entry->dlid, - __entry->len, - __entry->slid, - /* BTH */ - __entry->opcode, show_ib_opcode(__entry->opcode), - __entry->se, - __entry->m, - __entry->pad, - __entry->tver, - __entry->pkey, - __entry->f, - __entry->b, - __entry->qpn, - __entry->a, - __entry->psn, - /* extended headers */ - __parse_ib_ehdrs( - __entry->opcode, - (void *)__get_dynamic_array(ehdrs)) - ) -); - -DEFINE_EVENT(hfi1_ibhdr_template, input_ibhdr, - TP_PROTO(struct hfi1_devdata *dd, struct hfi1_ib_header *hdr), - TP_ARGS(dd, hdr)); - -DEFINE_EVENT(hfi1_ibhdr_template, pio_output_ibhdr, - TP_PROTO(struct hfi1_devdata *dd, struct hfi1_ib_header *hdr), - TP_ARGS(dd, hdr)); - -DEFINE_EVENT(hfi1_ibhdr_template, ack_output_ibhdr, - TP_PROTO(struct hfi1_devdata *dd, struct hfi1_ib_header *hdr), - TP_ARGS(dd, hdr)); - -DEFINE_EVENT(hfi1_ibhdr_template, sdma_output_ibhdr, - TP_PROTO(struct hfi1_devdata *dd, struct hfi1_ib_header *hdr), - TP_ARGS(dd, hdr)); - -#define SNOOP_PRN \ - "slid %.4x dlid %.4x qpn 0x%.6x opcode 0x%.2x,%s " \ - "svc lvl %d pkey 0x%.4x [header = %d bytes] [data = %d bytes]" - -#undef TRACE_SYSTEM -#define TRACE_SYSTEM hfi1_snoop - -TRACE_EVENT(snoop_capture, - TP_PROTO(struct hfi1_devdata *dd, - int hdr_len, - struct hfi1_ib_header *hdr, - int data_len, - void *data), - TP_ARGS(dd, hdr_len, hdr, data_len, data), - TP_STRUCT__entry( - DD_DEV_ENTRY(dd) - __field(u16, slid) - __field(u16, dlid) - __field(u32, qpn) - __field(u8, opcode) - __field(u8, sl) - __field(u16, pkey) - __field(u32, hdr_len) - __field(u32, data_len) - __field(u8, lnh) - __dynamic_array(u8, raw_hdr, hdr_len) - __dynamic_array(u8, raw_pkt, data_len) - ), - TP_fast_assign( - struct hfi1_other_headers *ohdr; - - __entry->lnh = (u8)(be16_to_cpu(hdr->lrh[0]) & 3); - if (__entry->lnh == HFI1_LRH_BTH) - ohdr = &hdr->u.oth; - else - ohdr = &hdr->u.l.oth; - DD_DEV_ASSIGN(dd); - __entry->slid = be16_to_cpu(hdr->lrh[3]); - __entry->dlid = be16_to_cpu(hdr->lrh[1]); - __entry->qpn = be32_to_cpu(ohdr->bth[1]) & RVT_QPN_MASK; - __entry->opcode = (be32_to_cpu(ohdr->bth[0]) >> 24) & 0xff; - __entry->sl = (u8)(be16_to_cpu(hdr->lrh[0]) >> 4) & 0xf; - __entry->pkey = be32_to_cpu(ohdr->bth[0]) & 0xffff; - __entry->hdr_len = hdr_len; - __entry->data_len = data_len; - memcpy(__get_dynamic_array(raw_hdr), hdr, hdr_len); - memcpy(__get_dynamic_array(raw_pkt), data, data_len); - ), - TP_printk( - "[%s] " SNOOP_PRN, - __get_str(dev), - __entry->slid, - __entry->dlid, - __entry->qpn, - __entry->opcode, - show_ib_opcode(__entry->opcode), - __entry->sl, - __entry->pkey, - __entry->hdr_len, - __entry->data_len - ) -); - -#undef TRACE_SYSTEM -#define TRACE_SYSTEM hfi1_ctxts - -#define UCTXT_FMT \ - "cred:%u, credaddr:0x%llx, piobase:0x%llx, rcvhdr_cnt:%u, " \ - "rcvbase:0x%llx, rcvegrc:%u, rcvegrb:0x%llx" -TRACE_EVENT(hfi1_uctxtdata, - TP_PROTO(struct hfi1_devdata *dd, struct hfi1_ctxtdata *uctxt), - TP_ARGS(dd, uctxt), - TP_STRUCT__entry(DD_DEV_ENTRY(dd) - __field(unsigned, ctxt) - __field(u32, credits) - __field(u64, hw_free) - __field(u64, piobase) - __field(u16, rcvhdrq_cnt) - __field(u64, rcvhdrq_phys) - __field(u32, eager_cnt) - __field(u64, rcvegr_phys) - ), - TP_fast_assign(DD_DEV_ASSIGN(dd); - __entry->ctxt = uctxt->ctxt; - __entry->credits = uctxt->sc->credits; - __entry->hw_free = (u64)uctxt->sc->hw_free; - __entry->piobase = (u64)uctxt->sc->base_addr; - __entry->rcvhdrq_cnt = uctxt->rcvhdrq_cnt; - __entry->rcvhdrq_phys = uctxt->rcvhdrq_phys; - __entry->eager_cnt = uctxt->egrbufs.alloced; - __entry->rcvegr_phys = - uctxt->egrbufs.rcvtids[0].phys; - ), - TP_printk("[%s] ctxt %u " UCTXT_FMT, - __get_str(dev), - __entry->ctxt, - __entry->credits, - __entry->hw_free, - __entry->piobase, - __entry->rcvhdrq_cnt, - __entry->rcvhdrq_phys, - __entry->eager_cnt, - __entry->rcvegr_phys - ) -); - -#define CINFO_FMT \ - "egrtids:%u, egr_size:%u, hdrq_cnt:%u, hdrq_size:%u, sdma_ring_size:%u" -TRACE_EVENT(hfi1_ctxt_info, - TP_PROTO(struct hfi1_devdata *dd, unsigned ctxt, unsigned subctxt, - struct hfi1_ctxt_info cinfo), - TP_ARGS(dd, ctxt, subctxt, cinfo), - TP_STRUCT__entry(DD_DEV_ENTRY(dd) - __field(unsigned, ctxt) - __field(unsigned, subctxt) - __field(u16, egrtids) - __field(u16, rcvhdrq_cnt) - __field(u16, rcvhdrq_size) - __field(u16, sdma_ring_size) - __field(u32, rcvegr_size) - ), - TP_fast_assign(DD_DEV_ASSIGN(dd); - __entry->ctxt = ctxt; - __entry->subctxt = subctxt; - __entry->egrtids = cinfo.egrtids; - __entry->rcvhdrq_cnt = cinfo.rcvhdrq_cnt; - __entry->rcvhdrq_size = cinfo.rcvhdrq_entsize; - __entry->sdma_ring_size = cinfo.sdma_ring_size; - __entry->rcvegr_size = cinfo.rcvegr_size; - ), - TP_printk("[%s] ctxt %u:%u " CINFO_FMT, - __get_str(dev), - __entry->ctxt, - __entry->subctxt, - __entry->egrtids, - __entry->rcvegr_size, - __entry->rcvhdrq_cnt, - __entry->rcvhdrq_size, - __entry->sdma_ring_size - ) -); - -#undef TRACE_SYSTEM -#define TRACE_SYSTEM hfi1_sma - -#define BCT_FORMAT \ - "shared_limit %x vls 0-7 [%x,%x][%x,%x][%x,%x][%x,%x][%x,%x][%x,%x][%x,%x][%x,%x] 15 [%x,%x]" - -#define BCT(field) \ - be16_to_cpu( \ - ((struct buffer_control *)__get_dynamic_array(bct))->field \ - ) - -DECLARE_EVENT_CLASS(hfi1_bct_template, - TP_PROTO(struct hfi1_devdata *dd, - struct buffer_control *bc), - TP_ARGS(dd, bc), - TP_STRUCT__entry(DD_DEV_ENTRY(dd) - __dynamic_array(u8, bct, sizeof(*bc)) - ), - TP_fast_assign(DD_DEV_ASSIGN(dd); - memcpy(__get_dynamic_array(bct), bc, - sizeof(*bc)); - ), - TP_printk(BCT_FORMAT, - BCT(overall_shared_limit), - - BCT(vl[0].dedicated), - BCT(vl[0].shared), - - BCT(vl[1].dedicated), - BCT(vl[1].shared), - - BCT(vl[2].dedicated), - BCT(vl[2].shared), - - BCT(vl[3].dedicated), - BCT(vl[3].shared), - - BCT(vl[4].dedicated), - BCT(vl[4].shared), - - BCT(vl[5].dedicated), - BCT(vl[5].shared), - - BCT(vl[6].dedicated), - BCT(vl[6].shared), - - BCT(vl[7].dedicated), - BCT(vl[7].shared), - - BCT(vl[15].dedicated), - BCT(vl[15].shared) - ) -); - -DEFINE_EVENT(hfi1_bct_template, bct_set, - TP_PROTO(struct hfi1_devdata *dd, struct buffer_control *bc), - TP_ARGS(dd, bc)); - -DEFINE_EVENT(hfi1_bct_template, bct_get, - TP_PROTO(struct hfi1_devdata *dd, struct buffer_control *bc), - TP_ARGS(dd, bc)); - -#undef TRACE_SYSTEM -#define TRACE_SYSTEM hfi1_sdma - -TRACE_EVENT(hfi1_sdma_descriptor, - TP_PROTO(struct sdma_engine *sde, - u64 desc0, - u64 desc1, - u16 e, - void *descp), - TP_ARGS(sde, desc0, desc1, e, descp), - TP_STRUCT__entry(DD_DEV_ENTRY(sde->dd) - __field(void *, descp) - __field(u64, desc0) - __field(u64, desc1) - __field(u16, e) - __field(u8, idx) - ), - TP_fast_assign(DD_DEV_ASSIGN(sde->dd); - __entry->desc0 = desc0; - __entry->desc1 = desc1; - __entry->idx = sde->this_idx; - __entry->descp = descp; - __entry->e = e; - ), - TP_printk( - "[%s] SDE(%u) flags:%s addr:0x%016llx gen:%u len:%u d0:%016llx d1:%016llx to %p,%u", - __get_str(dev), - __entry->idx, - __parse_sdma_flags(__entry->desc0, __entry->desc1), - (__entry->desc0 >> SDMA_DESC0_PHY_ADDR_SHIFT) & - SDMA_DESC0_PHY_ADDR_MASK, - (u8)((__entry->desc1 >> SDMA_DESC1_GENERATION_SHIFT) & - SDMA_DESC1_GENERATION_MASK), - (u16)((__entry->desc0 >> SDMA_DESC0_BYTE_COUNT_SHIFT) & - SDMA_DESC0_BYTE_COUNT_MASK), - __entry->desc0, - __entry->desc1, - __entry->descp, - __entry->e - ) -); - -TRACE_EVENT(hfi1_sdma_engine_select, - TP_PROTO(struct hfi1_devdata *dd, u32 sel, u8 vl, u8 idx), - TP_ARGS(dd, sel, vl, idx), - TP_STRUCT__entry(DD_DEV_ENTRY(dd) - __field(u32, sel) - __field(u8, vl) - __field(u8, idx) - ), - TP_fast_assign(DD_DEV_ASSIGN(dd); - __entry->sel = sel; - __entry->vl = vl; - __entry->idx = idx; - ), - TP_printk("[%s] selecting SDE %u sel 0x%x vl %u", - __get_str(dev), - __entry->idx, - __entry->sel, - __entry->vl - ) -); - -DECLARE_EVENT_CLASS(hfi1_sdma_engine_class, - TP_PROTO(struct sdma_engine *sde, u64 status), - TP_ARGS(sde, status), - TP_STRUCT__entry(DD_DEV_ENTRY(sde->dd) - __field(u64, status) - __field(u8, idx) - ), - TP_fast_assign(DD_DEV_ASSIGN(sde->dd); - __entry->status = status; - __entry->idx = sde->this_idx; - ), - TP_printk("[%s] SDE(%u) status %llx", - __get_str(dev), - __entry->idx, - (unsigned long long)__entry->status - ) -); - -DEFINE_EVENT(hfi1_sdma_engine_class, hfi1_sdma_engine_interrupt, - TP_PROTO(struct sdma_engine *sde, u64 status), - TP_ARGS(sde, status) -); - -DEFINE_EVENT(hfi1_sdma_engine_class, hfi1_sdma_engine_progress, - TP_PROTO(struct sdma_engine *sde, u64 status), - TP_ARGS(sde, status) -); - -DECLARE_EVENT_CLASS(hfi1_sdma_ahg_ad, - TP_PROTO(struct sdma_engine *sde, int aidx), - TP_ARGS(sde, aidx), - TP_STRUCT__entry(DD_DEV_ENTRY(sde->dd) - __field(int, aidx) - __field(u8, idx) - ), - TP_fast_assign(DD_DEV_ASSIGN(sde->dd); - __entry->idx = sde->this_idx; - __entry->aidx = aidx; - ), - TP_printk("[%s] SDE(%u) aidx %d", - __get_str(dev), - __entry->idx, - __entry->aidx - ) -); - -DEFINE_EVENT(hfi1_sdma_ahg_ad, hfi1_ahg_allocate, - TP_PROTO(struct sdma_engine *sde, int aidx), - TP_ARGS(sde, aidx)); - -DEFINE_EVENT(hfi1_sdma_ahg_ad, hfi1_ahg_deallocate, - TP_PROTO(struct sdma_engine *sde, int aidx), - TP_ARGS(sde, aidx)); - -#ifdef CONFIG_HFI1_DEBUG_SDMA_ORDER -TRACE_EVENT(hfi1_sdma_progress, - TP_PROTO(struct sdma_engine *sde, - u16 hwhead, - u16 swhead, - struct sdma_txreq *txp - ), - TP_ARGS(sde, hwhead, swhead, txp), - TP_STRUCT__entry(DD_DEV_ENTRY(sde->dd) - __field(u64, sn) - __field(u16, hwhead) - __field(u16, swhead) - __field(u16, txnext) - __field(u16, tx_tail) - __field(u16, tx_head) - __field(u8, idx) - ), - TP_fast_assign(DD_DEV_ASSIGN(sde->dd); - __entry->hwhead = hwhead; - __entry->swhead = swhead; - __entry->tx_tail = sde->tx_tail; - __entry->tx_head = sde->tx_head; - __entry->txnext = txp ? txp->next_descq_idx : ~0; - __entry->idx = sde->this_idx; - __entry->sn = txp ? txp->sn : ~0; - ), - TP_printk( - "[%s] SDE(%u) sn %llu hwhead %u swhead %u next_descq_idx %u tx_head %u tx_tail %u", - __get_str(dev), - __entry->idx, - __entry->sn, - __entry->hwhead, - __entry->swhead, - __entry->txnext, - __entry->tx_head, - __entry->tx_tail - ) -); -#else -TRACE_EVENT(hfi1_sdma_progress, - TP_PROTO(struct sdma_engine *sde, - u16 hwhead, u16 swhead, - struct sdma_txreq *txp - ), - TP_ARGS(sde, hwhead, swhead, txp), - TP_STRUCT__entry(DD_DEV_ENTRY(sde->dd) - __field(u16, hwhead) - __field(u16, swhead) - __field(u16, txnext) - __field(u16, tx_tail) - __field(u16, tx_head) - __field(u8, idx) - ), - TP_fast_assign(DD_DEV_ASSIGN(sde->dd); - __entry->hwhead = hwhead; - __entry->swhead = swhead; - __entry->tx_tail = sde->tx_tail; - __entry->tx_head = sde->tx_head; - __entry->txnext = txp ? txp->next_descq_idx : ~0; - __entry->idx = sde->this_idx; - ), - TP_printk( - "[%s] SDE(%u) hwhead %u swhead %u next_descq_idx %u tx_head %u tx_tail %u", - __get_str(dev), - __entry->idx, - __entry->hwhead, - __entry->swhead, - __entry->txnext, - __entry->tx_head, - __entry->tx_tail - ) -); -#endif - -DECLARE_EVENT_CLASS(hfi1_sdma_sn, - TP_PROTO(struct sdma_engine *sde, u64 sn), - TP_ARGS(sde, sn), - TP_STRUCT__entry(DD_DEV_ENTRY(sde->dd) - __field(u64, sn) - __field(u8, idx) - ), - TP_fast_assign(DD_DEV_ASSIGN(sde->dd); - __entry->sn = sn; - __entry->idx = sde->this_idx; - ), - TP_printk("[%s] SDE(%u) sn %llu", - __get_str(dev), - __entry->idx, - __entry->sn - ) -); - -DEFINE_EVENT(hfi1_sdma_sn, hfi1_sdma_out_sn, - TP_PROTO( - struct sdma_engine *sde, - u64 sn - ), - TP_ARGS(sde, sn) -); - -DEFINE_EVENT(hfi1_sdma_sn, hfi1_sdma_in_sn, - TP_PROTO(struct sdma_engine *sde, u64 sn), - TP_ARGS(sde, sn) -); - -#define USDMA_HDR_FORMAT \ - "[%s:%u:%u:%u] PBC=(0x%x 0x%x) LRH=(0x%x 0x%x) BTH=(0x%x 0x%x 0x%x) KDETH=(0x%x 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x) TIDVal=0x%x" - -TRACE_EVENT(hfi1_sdma_user_header, - TP_PROTO(struct hfi1_devdata *dd, u16 ctxt, u8 subctxt, u16 req, - struct hfi1_pkt_header *hdr, u32 tidval), - TP_ARGS(dd, ctxt, subctxt, req, hdr, tidval), - TP_STRUCT__entry( - DD_DEV_ENTRY(dd) - __field(u16, ctxt) - __field(u8, subctxt) - __field(u16, req) - __field(__le32, pbc0) - __field(__le32, pbc1) - __field(__be32, lrh0) - __field(__be32, lrh1) - __field(__be32, bth0) - __field(__be32, bth1) - __field(__be32, bth2) - __field(__le32, kdeth0) - __field(__le32, kdeth1) - __field(__le32, kdeth2) - __field(__le32, kdeth3) - __field(__le32, kdeth4) - __field(__le32, kdeth5) - __field(__le32, kdeth6) - __field(__le32, kdeth7) - __field(__le32, kdeth8) - __field(u32, tidval) - ), - TP_fast_assign( - __le32 *pbc = (__le32 *)hdr->pbc; - __be32 *lrh = (__be32 *)hdr->lrh; - __be32 *bth = (__be32 *)hdr->bth; - __le32 *kdeth = (__le32 *)&hdr->kdeth; - - DD_DEV_ASSIGN(dd); - __entry->ctxt = ctxt; - __entry->subctxt = subctxt; - __entry->req = req; - __entry->pbc0 = pbc[0]; - __entry->pbc1 = pbc[1]; - __entry->lrh0 = be32_to_cpu(lrh[0]); - __entry->lrh1 = be32_to_cpu(lrh[1]); - __entry->bth0 = be32_to_cpu(bth[0]); - __entry->bth1 = be32_to_cpu(bth[1]); - __entry->bth2 = be32_to_cpu(bth[2]); - __entry->kdeth0 = kdeth[0]; - __entry->kdeth1 = kdeth[1]; - __entry->kdeth2 = kdeth[2]; - __entry->kdeth3 = kdeth[3]; - __entry->kdeth4 = kdeth[4]; - __entry->kdeth5 = kdeth[5]; - __entry->kdeth6 = kdeth[6]; - __entry->kdeth7 = kdeth[7]; - __entry->kdeth8 = kdeth[8]; - __entry->tidval = tidval; - ), - TP_printk(USDMA_HDR_FORMAT, - __get_str(dev), - __entry->ctxt, - __entry->subctxt, - __entry->req, - __entry->pbc1, - __entry->pbc0, - __entry->lrh0, - __entry->lrh1, - __entry->bth0, - __entry->bth1, - __entry->bth2, - __entry->kdeth0, - __entry->kdeth1, - __entry->kdeth2, - __entry->kdeth3, - __entry->kdeth4, - __entry->kdeth5, - __entry->kdeth6, - __entry->kdeth7, - __entry->kdeth8, - __entry->tidval - ) - ); - -#define SDMA_UREQ_FMT \ - "[%s:%u:%u] ver/op=0x%x, iovcnt=%u, npkts=%u, frag=%u, idx=%u" -TRACE_EVENT(hfi1_sdma_user_reqinfo, - TP_PROTO(struct hfi1_devdata *dd, u16 ctxt, u8 subctxt, u16 *i), - TP_ARGS(dd, ctxt, subctxt, i), - TP_STRUCT__entry( - DD_DEV_ENTRY(dd); - __field(u16, ctxt) - __field(u8, subctxt) - __field(u8, ver_opcode) - __field(u8, iovcnt) - __field(u16, npkts) - __field(u16, fragsize) - __field(u16, comp_idx) - ), - TP_fast_assign( - DD_DEV_ASSIGN(dd); - __entry->ctxt = ctxt; - __entry->subctxt = subctxt; - __entry->ver_opcode = i[0] & 0xff; - __entry->iovcnt = (i[0] >> 8) & 0xff; - __entry->npkts = i[1]; - __entry->fragsize = i[2]; - __entry->comp_idx = i[3]; - ), - TP_printk(SDMA_UREQ_FMT, - __get_str(dev), - __entry->ctxt, - __entry->subctxt, - __entry->ver_opcode, - __entry->iovcnt, - __entry->npkts, - __entry->fragsize, - __entry->comp_idx - ) - ); - -#define usdma_complete_name(st) { st, #st } -#define show_usdma_complete_state(st) \ - __print_symbolic(st, \ - usdma_complete_name(FREE), \ - usdma_complete_name(QUEUED), \ - usdma_complete_name(COMPLETE), \ - usdma_complete_name(ERROR)) - -TRACE_EVENT(hfi1_sdma_user_completion, - TP_PROTO(struct hfi1_devdata *dd, u16 ctxt, u8 subctxt, u16 idx, - u8 state, int code), - TP_ARGS(dd, ctxt, subctxt, idx, state, code), - TP_STRUCT__entry( - DD_DEV_ENTRY(dd) - __field(u16, ctxt) - __field(u8, subctxt) - __field(u16, idx) - __field(u8, state) - __field(int, code) - ), - TP_fast_assign( - DD_DEV_ASSIGN(dd); - __entry->ctxt = ctxt; - __entry->subctxt = subctxt; - __entry->idx = idx; - __entry->state = state; - __entry->code = code; - ), - TP_printk("[%s:%u:%u:%u] SDMA completion state %s (%d)", - __get_str(dev), __entry->ctxt, __entry->subctxt, - __entry->idx, show_usdma_complete_state(__entry->state), - __entry->code) - ); - -const char *print_u32_array(struct trace_seq *, u32 *, int); -#define __print_u32_hex(arr, len) print_u32_array(p, arr, len) - -TRACE_EVENT(hfi1_sdma_user_header_ahg, - TP_PROTO(struct hfi1_devdata *dd, u16 ctxt, u8 subctxt, u16 req, - u8 sde, u8 ahgidx, u32 *ahg, int len, u32 tidval), - TP_ARGS(dd, ctxt, subctxt, req, sde, ahgidx, ahg, len, tidval), - TP_STRUCT__entry( - DD_DEV_ENTRY(dd) - __field(u16, ctxt) - __field(u8, subctxt) - __field(u16, req) - __field(u8, sde) - __field(u8, idx) - __field(int, len) - __field(u32, tidval) - __array(u32, ahg, 10) - ), - TP_fast_assign( - DD_DEV_ASSIGN(dd); - __entry->ctxt = ctxt; - __entry->subctxt = subctxt; - __entry->req = req; - __entry->sde = sde; - __entry->idx = ahgidx; - __entry->len = len; - __entry->tidval = tidval; - memcpy(__entry->ahg, ahg, len * sizeof(u32)); - ), - TP_printk("[%s:%u:%u:%u] (SDE%u/AHG%u) ahg[0-%d]=(%s) TIDVal=0x%x", - __get_str(dev), - __entry->ctxt, - __entry->subctxt, - __entry->req, - __entry->sde, - __entry->idx, - __entry->len - 1, - __print_u32_hex(__entry->ahg, __entry->len), - __entry->tidval - ) - ); - -TRACE_EVENT(hfi1_sdma_state, - TP_PROTO(struct sdma_engine *sde, - const char *cstate, - const char *nstate - ), - TP_ARGS(sde, cstate, nstate), - TP_STRUCT__entry(DD_DEV_ENTRY(sde->dd) - __string(curstate, cstate) - __string(newstate, nstate) - ), - TP_fast_assign(DD_DEV_ASSIGN(sde->dd); - __assign_str(curstate, cstate); - __assign_str(newstate, nstate); - ), - TP_printk("[%s] current state %s new state %s", - __get_str(dev), - __get_str(curstate), - __get_str(newstate) - ) -); - -#undef TRACE_SYSTEM -#define TRACE_SYSTEM hfi1_rc - -DECLARE_EVENT_CLASS(hfi1_rc_template, - TP_PROTO(struct rvt_qp *qp, u32 psn), - TP_ARGS(qp, psn), - TP_STRUCT__entry( - DD_DEV_ENTRY(dd_from_ibdev(qp->ibqp.device)) - __field(u32, qpn) - __field(u32, s_flags) - __field(u32, psn) - __field(u32, s_psn) - __field(u32, s_next_psn) - __field(u32, s_sending_psn) - __field(u32, s_sending_hpsn) - __field(u32, r_psn) - ), - TP_fast_assign( - DD_DEV_ASSIGN(dd_from_ibdev(qp->ibqp.device)) - __entry->qpn = qp->ibqp.qp_num; - __entry->s_flags = qp->s_flags; - __entry->psn = psn; - __entry->s_psn = qp->s_psn; - __entry->s_next_psn = qp->s_next_psn; - __entry->s_sending_psn = qp->s_sending_psn; - __entry->s_sending_hpsn = qp->s_sending_hpsn; - __entry->r_psn = qp->r_psn; - ), - TP_printk( - "[%s] qpn 0x%x s_flags 0x%x psn 0x%x s_psn 0x%x s_next_psn 0x%x s_sending_psn 0x%x sending_hpsn 0x%x r_psn 0x%x", - __get_str(dev), - __entry->qpn, - __entry->s_flags, - __entry->psn, - __entry->s_psn, - __entry->s_next_psn, - __entry->s_sending_psn, - __entry->s_sending_hpsn, - __entry->r_psn - ) -); - -DEFINE_EVENT(hfi1_rc_template, hfi1_rc_sendcomplete, - TP_PROTO(struct rvt_qp *qp, u32 psn), - TP_ARGS(qp, psn) -); - -DEFINE_EVENT(hfi1_rc_template, hfi1_rc_ack, - TP_PROTO(struct rvt_qp *qp, u32 psn), - TP_ARGS(qp, psn) -); - -DEFINE_EVENT(hfi1_rc_template, hfi1_rc_timeout, - TP_PROTO(struct rvt_qp *qp, u32 psn), - TP_ARGS(qp, psn) -); - -DEFINE_EVENT(hfi1_rc_template, hfi1_rc_rcv_error, - TP_PROTO(struct rvt_qp *qp, u32 psn), - TP_ARGS(qp, psn) -); - -#undef TRACE_SYSTEM -#define TRACE_SYSTEM hfi1_misc - -TRACE_EVENT(hfi1_interrupt, - TP_PROTO(struct hfi1_devdata *dd, const struct is_table *is_entry, - int src), - TP_ARGS(dd, is_entry, src), - TP_STRUCT__entry(DD_DEV_ENTRY(dd) - __array(char, buf, 64) - __field(int, src) - ), - TP_fast_assign(DD_DEV_ASSIGN(dd) - is_entry->is_name(__entry->buf, 64, - src - is_entry->start); - __entry->src = src; - ), - TP_printk("[%s] source: %s [%d]", __get_str(dev), __entry->buf, - __entry->src) -); - -/* - * Note: - * This produces a REALLY ugly trace in the console output when the string is - * too long. - */ - -#undef TRACE_SYSTEM -#define TRACE_SYSTEM hfi1_trace - -#define MAX_MSG_LEN 512 - -DECLARE_EVENT_CLASS(hfi1_trace_template, - TP_PROTO(const char *function, struct va_format *vaf), - TP_ARGS(function, vaf), - TP_STRUCT__entry(__string(function, function) - __dynamic_array(char, msg, MAX_MSG_LEN) - ), - TP_fast_assign(__assign_str(function, function); - WARN_ON_ONCE(vsnprintf - (__get_dynamic_array(msg), - MAX_MSG_LEN, vaf->fmt, - *vaf->va) >= - MAX_MSG_LEN); - ), - TP_printk("(%s) %s", - __get_str(function), - __get_str(msg)) -); - -/* - * It may be nice to macroize the __hfi1_trace but the va_* stuff requires an - * actual function to work and can not be in a macro. - */ -#define __hfi1_trace_def(lvl) \ -void __hfi1_trace_##lvl(const char *funct, char *fmt, ...); \ - \ -DEFINE_EVENT(hfi1_trace_template, hfi1_ ##lvl, \ - TP_PROTO(const char *function, struct va_format *vaf), \ - TP_ARGS(function, vaf)) - -#define __hfi1_trace_fn(lvl) \ -void __hfi1_trace_##lvl(const char *func, char *fmt, ...) \ -{ \ - struct va_format vaf = { \ - .fmt = fmt, \ - }; \ - va_list args; \ - \ - va_start(args, fmt); \ - vaf.va = &args; \ - trace_hfi1_ ##lvl(func, &vaf); \ - va_end(args); \ - return; \ -} - -/* - * To create a new trace level simply define it below and as a __hfi1_trace_fn - * in trace.c. This will create all the hooks for calling - * hfi1_cdbg(LVL, fmt, ...); as well as take care of all - * the debugfs stuff. - */ -__hfi1_trace_def(PKT); -__hfi1_trace_def(PROC); -__hfi1_trace_def(SDMA); -__hfi1_trace_def(LINKVERB); -__hfi1_trace_def(DEBUG); -__hfi1_trace_def(SNOOP); -__hfi1_trace_def(CNTR); -__hfi1_trace_def(PIO); -__hfi1_trace_def(DC8051); -__hfi1_trace_def(FIRMWARE); -__hfi1_trace_def(RCVCTRL); -__hfi1_trace_def(TID); -__hfi1_trace_def(MMU); -__hfi1_trace_def(IOCTL); - -#define hfi1_cdbg(which, fmt, ...) \ - __hfi1_trace_##which(__func__, fmt, ##__VA_ARGS__) - -#define hfi1_dbg(fmt, ...) \ - hfi1_cdbg(DEBUG, fmt, ##__VA_ARGS__) - -/* - * Define HFI1_EARLY_DBG at compile time or here to enable early trace - * messages. Do not check in an enablement for this. - */ - -#ifdef HFI1_EARLY_DBG -#define hfi1_dbg_early(fmt, ...) \ - trace_printk(fmt, ##__VA_ARGS__) -#else -#define hfi1_dbg_early(fmt, ...) -#endif - -#endif /* __HFI1_TRACE_H */ - -#undef TRACE_INCLUDE_PATH -#undef TRACE_INCLUDE_FILE -#define TRACE_INCLUDE_PATH . -#define TRACE_INCLUDE_FILE trace -#include <trace/define_trace.h> +#include "trace_dbg.h" +#include "trace_misc.h" +#include "trace_ctxts.h" +#include "trace_ibhdrs.h" +#include "trace_rc.h" +#include "trace_rx.h" +#include "trace_tx.h" diff --git a/drivers/infiniband/hw/hfi1/trace_ctxts.h b/drivers/infiniband/hw/hfi1/trace_ctxts.h new file mode 100644 index 000000000000..31654bbac1cf --- /dev/null +++ b/drivers/infiniband/hw/hfi1/trace_ctxts.h @@ -0,0 +1,141 @@ +/* +* Copyright(c) 2015, 2016 Intel Corporation. +* +* This file is provided under a dual BSD/GPLv2 license. When using or +* redistributing this file, you may do so under either license. +* +* GPL LICENSE SUMMARY +* +* This program is free software; you can redistribute it and/or modify +* it under the terms of version 2 of the GNU General Public License as +* published by the Free Software Foundation. +* +* This program is distributed in the hope that it will be useful, but +* WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +* General Public License for more details. +* +* BSD LICENSE +* +* Redistribution and use in source and binary forms, with or without +* modification, are permitted provided that the following conditions +* are met: +* +* - Redistributions of source code must retain the above copyright +* notice, this list of conditions and the following disclaimer. +* - Redistributions in binary form must reproduce the above copyright +* notice, this list of conditions and the following disclaimer in +* the documentation and/or other materials provided with the +* distribution. +* - Neither the name of Intel Corporation nor the names of its +* contributors may be used to endorse or promote products derived +* from this software without specific prior written permission. +* +* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +* +*/ +#if !defined(__HFI1_TRACE_CTXTS_H) || defined(TRACE_HEADER_MULTI_READ) +#define __HFI1_TRACE_CTXTS_H + +#include <linux/tracepoint.h> +#include <linux/trace_seq.h> + +#include "hfi.h" + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM hfi1_ctxts + +#define UCTXT_FMT \ + "cred:%u, credaddr:0x%llx, piobase:0x%p, rcvhdr_cnt:%u, " \ + "rcvbase:0x%llx, rcvegrc:%u, rcvegrb:0x%llx" +TRACE_EVENT(hfi1_uctxtdata, + TP_PROTO(struct hfi1_devdata *dd, struct hfi1_ctxtdata *uctxt), + TP_ARGS(dd, uctxt), + TP_STRUCT__entry(DD_DEV_ENTRY(dd) + __field(unsigned int, ctxt) + __field(u32, credits) + __field(u64, hw_free) + __field(void __iomem *, piobase) + __field(u16, rcvhdrq_cnt) + __field(u64, rcvhdrq_phys) + __field(u32, eager_cnt) + __field(u64, rcvegr_phys) + ), + TP_fast_assign(DD_DEV_ASSIGN(dd); + __entry->ctxt = uctxt->ctxt; + __entry->credits = uctxt->sc->credits; + __entry->hw_free = le64_to_cpu(*uctxt->sc->hw_free); + __entry->piobase = uctxt->sc->base_addr; + __entry->rcvhdrq_cnt = uctxt->rcvhdrq_cnt; + __entry->rcvhdrq_phys = uctxt->rcvhdrq_phys; + __entry->eager_cnt = uctxt->egrbufs.alloced; + __entry->rcvegr_phys = + uctxt->egrbufs.rcvtids[0].phys; + ), + TP_printk("[%s] ctxt %u " UCTXT_FMT, + __get_str(dev), + __entry->ctxt, + __entry->credits, + __entry->hw_free, + __entry->piobase, + __entry->rcvhdrq_cnt, + __entry->rcvhdrq_phys, + __entry->eager_cnt, + __entry->rcvegr_phys + ) +); + +#define CINFO_FMT \ + "egrtids:%u, egr_size:%u, hdrq_cnt:%u, hdrq_size:%u, sdma_ring_size:%u" +TRACE_EVENT(hfi1_ctxt_info, + TP_PROTO(struct hfi1_devdata *dd, unsigned int ctxt, + unsigned int subctxt, + struct hfi1_ctxt_info cinfo), + TP_ARGS(dd, ctxt, subctxt, cinfo), + TP_STRUCT__entry(DD_DEV_ENTRY(dd) + __field(unsigned int, ctxt) + __field(unsigned int, subctxt) + __field(u16, egrtids) + __field(u16, rcvhdrq_cnt) + __field(u16, rcvhdrq_size) + __field(u16, sdma_ring_size) + __field(u32, rcvegr_size) + ), + TP_fast_assign(DD_DEV_ASSIGN(dd); + __entry->ctxt = ctxt; + __entry->subctxt = subctxt; + __entry->egrtids = cinfo.egrtids; + __entry->rcvhdrq_cnt = cinfo.rcvhdrq_cnt; + __entry->rcvhdrq_size = cinfo.rcvhdrq_entsize; + __entry->sdma_ring_size = cinfo.sdma_ring_size; + __entry->rcvegr_size = cinfo.rcvegr_size; + ), + TP_printk("[%s] ctxt %u:%u " CINFO_FMT, + __get_str(dev), + __entry->ctxt, + __entry->subctxt, + __entry->egrtids, + __entry->rcvegr_size, + __entry->rcvhdrq_cnt, + __entry->rcvhdrq_size, + __entry->sdma_ring_size + ) +); + +#endif /* __HFI1_TRACE_CTXTS_H */ + +#undef TRACE_INCLUDE_PATH +#undef TRACE_INCLUDE_FILE +#define TRACE_INCLUDE_PATH . +#define TRACE_INCLUDE_FILE trace_ctxts +#include <trace/define_trace.h> diff --git a/drivers/infiniband/hw/hfi1/trace_dbg.h b/drivers/infiniband/hw/hfi1/trace_dbg.h new file mode 100644 index 000000000000..0e7d929530c5 --- /dev/null +++ b/drivers/infiniband/hw/hfi1/trace_dbg.h @@ -0,0 +1,155 @@ +/* +* Copyright(c) 2015, 2016 Intel Corporation. +* +* This file is provided under a dual BSD/GPLv2 license. When using or +* redistributing this file, you may do so under either license. +* +* GPL LICENSE SUMMARY +* +* This program is free software; you can redistribute it and/or modify +* it under the terms of version 2 of the GNU General Public License as +* published by the Free Software Foundation. +* +* This program is distributed in the hope that it will be useful, but +* WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +* General Public License for more details. +* +* BSD LICENSE +* +* Redistribution and use in source and binary forms, with or without +* modification, are permitted provided that the following conditions +* are met: +* +* - Redistributions of source code must retain the above copyright +* notice, this list of conditions and the following disclaimer. +* - Redistributions in binary form must reproduce the above copyright +* notice, this list of conditions and the following disclaimer in +* the documentation and/or other materials provided with the +* distribution. +* - Neither the name of Intel Corporation nor the names of its +* contributors may be used to endorse or promote products derived +* from this software without specific prior written permission. +* +* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +* +*/ +#if !defined(__HFI1_TRACE_EXTRA_H) || defined(TRACE_HEADER_MULTI_READ) +#define __HFI1_TRACE_EXTRA_H + +#include <linux/tracepoint.h> +#include <linux/trace_seq.h> + +#include "hfi.h" + +/* + * Note: + * This produces a REALLY ugly trace in the console output when the string is + * too long. + */ + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM hfi1_dbg + +#define MAX_MSG_LEN 512 + +DECLARE_EVENT_CLASS(hfi1_trace_template, + TP_PROTO(const char *function, struct va_format *vaf), + TP_ARGS(function, vaf), + TP_STRUCT__entry(__string(function, function) + __dynamic_array(char, msg, MAX_MSG_LEN) + ), + TP_fast_assign(__assign_str(function, function); + WARN_ON_ONCE(vsnprintf + (__get_dynamic_array(msg), + MAX_MSG_LEN, vaf->fmt, + *vaf->va) >= + MAX_MSG_LEN); + ), + TP_printk("(%s) %s", + __get_str(function), + __get_str(msg)) +); + +/* + * It may be nice to macroize the __hfi1_trace but the va_* stuff requires an + * actual function to work and can not be in a macro. + */ +#define __hfi1_trace_def(lvl) \ +void __hfi1_trace_##lvl(const char *funct, char *fmt, ...); \ + \ +DEFINE_EVENT(hfi1_trace_template, hfi1_ ##lvl, \ + TP_PROTO(const char *function, struct va_format *vaf), \ + TP_ARGS(function, vaf)) + +#define __hfi1_trace_fn(lvl) \ +void __hfi1_trace_##lvl(const char *func, char *fmt, ...) \ +{ \ + struct va_format vaf = { \ + .fmt = fmt, \ + }; \ + va_list args; \ + \ + va_start(args, fmt); \ + vaf.va = &args; \ + trace_hfi1_ ##lvl(func, &vaf); \ + va_end(args); \ + return; \ +} + +/* + * To create a new trace level simply define it below and as a __hfi1_trace_fn + * in trace.c. This will create all the hooks for calling + * hfi1_cdbg(LVL, fmt, ...); as well as take care of all + * the debugfs stuff. + */ +__hfi1_trace_def(PKT); +__hfi1_trace_def(PROC); +__hfi1_trace_def(SDMA); +__hfi1_trace_def(LINKVERB); +__hfi1_trace_def(DEBUG); +__hfi1_trace_def(SNOOP); +__hfi1_trace_def(CNTR); +__hfi1_trace_def(PIO); +__hfi1_trace_def(DC8051); +__hfi1_trace_def(FIRMWARE); +__hfi1_trace_def(RCVCTRL); +__hfi1_trace_def(TID); +__hfi1_trace_def(MMU); +__hfi1_trace_def(IOCTL); + +#define hfi1_cdbg(which, fmt, ...) \ + __hfi1_trace_##which(__func__, fmt, ##__VA_ARGS__) + +#define hfi1_dbg(fmt, ...) \ + hfi1_cdbg(DEBUG, fmt, ##__VA_ARGS__) + +/* + * Define HFI1_EARLY_DBG at compile time or here to enable early trace + * messages. Do not check in an enablement for this. + */ + +#ifdef HFI1_EARLY_DBG +#define hfi1_dbg_early(fmt, ...) \ + trace_printk(fmt, ##__VA_ARGS__) +#else +#define hfi1_dbg_early(fmt, ...) +#endif + +#endif /* __HFI1_TRACE_EXTRA_H */ + +#undef TRACE_INCLUDE_PATH +#undef TRACE_INCLUDE_FILE +#define TRACE_INCLUDE_PATH . +#define TRACE_INCLUDE_FILE trace_dbg +#include <trace/define_trace.h> diff --git a/drivers/infiniband/hw/hfi1/trace_ibhdrs.h b/drivers/infiniband/hw/hfi1/trace_ibhdrs.h new file mode 100644 index 000000000000..c3e41aed0034 --- /dev/null +++ b/drivers/infiniband/hw/hfi1/trace_ibhdrs.h @@ -0,0 +1,209 @@ +/* + * Copyright(c) 2015, 2016 Intel Corporation. + * + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * BSD LICENSE + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * - Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ +#if !defined(__HFI1_TRACE_IBHDRS_H) || defined(TRACE_HEADER_MULTI_READ) +#define __HFI1_TRACE_IBHDRS_H + +#include <linux/tracepoint.h> +#include <linux/trace_seq.h> + +#include "hfi.h" + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM hfi1_ibhdrs + +u8 ibhdr_exhdr_len(struct hfi1_ib_header *hdr); +const char *parse_everbs_hdrs(struct trace_seq *p, u8 opcode, void *ehdrs); + +#define __parse_ib_ehdrs(op, ehdrs) parse_everbs_hdrs(p, op, ehdrs) + +#define lrh_name(lrh) { HFI1_##lrh, #lrh } +#define show_lnh(lrh) \ +__print_symbolic(lrh, \ + lrh_name(LRH_BTH), \ + lrh_name(LRH_GRH)) + +#define LRH_PRN "vl %d lver %d sl %d lnh %d,%s dlid %.4x len %d slid %.4x" +#define BTH_PRN \ + "op 0x%.2x,%s se %d m %d pad %d tver %d pkey 0x%.4x " \ + "f %d b %d qpn 0x%.6x a %d psn 0x%.8x" +#define EHDR_PRN "%s" + +DECLARE_EVENT_CLASS(hfi1_ibhdr_template, + TP_PROTO(struct hfi1_devdata *dd, + struct hfi1_ib_header *hdr), + TP_ARGS(dd, hdr), + TP_STRUCT__entry( + DD_DEV_ENTRY(dd) + /* LRH */ + __field(u8, vl) + __field(u8, lver) + __field(u8, sl) + __field(u8, lnh) + __field(u16, dlid) + __field(u16, len) + __field(u16, slid) + /* BTH */ + __field(u8, opcode) + __field(u8, se) + __field(u8, m) + __field(u8, pad) + __field(u8, tver) + __field(u16, pkey) + __field(u8, f) + __field(u8, b) + __field(u32, qpn) + __field(u8, a) + __field(u32, psn) + /* extended headers */ + __dynamic_array(u8, ehdrs, ibhdr_exhdr_len(hdr)) + ), + TP_fast_assign( + struct hfi1_other_headers *ohdr; + + DD_DEV_ASSIGN(dd); + /* LRH */ + __entry->vl = + (u8)(be16_to_cpu(hdr->lrh[0]) >> 12); + __entry->lver = + (u8)(be16_to_cpu(hdr->lrh[0]) >> 8) & 0xf; + __entry->sl = + (u8)(be16_to_cpu(hdr->lrh[0]) >> 4) & 0xf; + __entry->lnh = + (u8)(be16_to_cpu(hdr->lrh[0]) & 3); + __entry->dlid = + be16_to_cpu(hdr->lrh[1]); + /* allow for larger len */ + __entry->len = + be16_to_cpu(hdr->lrh[2]); + __entry->slid = + be16_to_cpu(hdr->lrh[3]); + /* BTH */ + if (__entry->lnh == HFI1_LRH_BTH) + ohdr = &hdr->u.oth; + else + ohdr = &hdr->u.l.oth; + __entry->opcode = + (be32_to_cpu(ohdr->bth[0]) >> 24) & 0xff; + __entry->se = + (be32_to_cpu(ohdr->bth[0]) >> 23) & 1; + __entry->m = + (be32_to_cpu(ohdr->bth[0]) >> 22) & 1; + __entry->pad = + (be32_to_cpu(ohdr->bth[0]) >> 20) & 3; + __entry->tver = + (be32_to_cpu(ohdr->bth[0]) >> 16) & 0xf; + __entry->pkey = + be32_to_cpu(ohdr->bth[0]) & 0xffff; + __entry->f = + (be32_to_cpu(ohdr->bth[1]) >> HFI1_FECN_SHIFT) & + HFI1_FECN_MASK; + __entry->b = + (be32_to_cpu(ohdr->bth[1]) >> HFI1_BECN_SHIFT) & + HFI1_BECN_MASK; + __entry->qpn = + be32_to_cpu(ohdr->bth[1]) & RVT_QPN_MASK; + __entry->a = + (be32_to_cpu(ohdr->bth[2]) >> 31) & 1; + /* allow for larger PSN */ + __entry->psn = + be32_to_cpu(ohdr->bth[2]) & 0x7fffffff; + /* extended headers */ + memcpy(__get_dynamic_array(ehdrs), &ohdr->u, + ibhdr_exhdr_len(hdr)); + ), + TP_printk("[%s] " LRH_PRN " " BTH_PRN " " EHDR_PRN, + __get_str(dev), + /* LRH */ + __entry->vl, + __entry->lver, + __entry->sl, + __entry->lnh, show_lnh(__entry->lnh), + __entry->dlid, + __entry->len, + __entry->slid, + /* BTH */ + __entry->opcode, show_ib_opcode(__entry->opcode), + __entry->se, + __entry->m, + __entry->pad, + __entry->tver, + __entry->pkey, + __entry->f, + __entry->b, + __entry->qpn, + __entry->a, + __entry->psn, + /* extended headers */ + __parse_ib_ehdrs( + __entry->opcode, + (void *)__get_dynamic_array(ehdrs)) + ) +); + +DEFINE_EVENT(hfi1_ibhdr_template, input_ibhdr, + TP_PROTO(struct hfi1_devdata *dd, struct hfi1_ib_header *hdr), + TP_ARGS(dd, hdr)); + +DEFINE_EVENT(hfi1_ibhdr_template, pio_output_ibhdr, + TP_PROTO(struct hfi1_devdata *dd, struct hfi1_ib_header *hdr), + TP_ARGS(dd, hdr)); + +DEFINE_EVENT(hfi1_ibhdr_template, ack_output_ibhdr, + TP_PROTO(struct hfi1_devdata *dd, struct hfi1_ib_header *hdr), + TP_ARGS(dd, hdr)); + +DEFINE_EVENT(hfi1_ibhdr_template, sdma_output_ibhdr, + TP_PROTO(struct hfi1_devdata *dd, struct hfi1_ib_header *hdr), + TP_ARGS(dd, hdr)); + +#endif /* __HFI1_TRACE_IBHDRS_H */ + +#undef TRACE_INCLUDE_PATH +#undef TRACE_INCLUDE_FILE +#define TRACE_INCLUDE_PATH . +#define TRACE_INCLUDE_FILE trace_ibhdrs +#include <trace/define_trace.h> diff --git a/drivers/infiniband/hw/hfi1/trace_misc.h b/drivers/infiniband/hw/hfi1/trace_misc.h new file mode 100644 index 000000000000..d308454af7fd --- /dev/null +++ b/drivers/infiniband/hw/hfi1/trace_misc.h @@ -0,0 +1,81 @@ +/* +* Copyright(c) 2015, 2016 Intel Corporation. +* +* This file is provided under a dual BSD/GPLv2 license. When using or +* redistributing this file, you may do so under either license. +* +* GPL LICENSE SUMMARY +* +* This program is free software; you can redistribute it and/or modify +* it under the terms of version 2 of the GNU General Public License as +* published by the Free Software Foundation. +* +* This program is distributed in the hope that it will be useful, but +* WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +* General Public License for more details. +* +* BSD LICENSE +* +* Redistribution and use in source and binary forms, with or without +* modification, are permitted provided that the following conditions +* are met: +* +* - Redistributions of source code must retain the above copyright +* notice, this list of conditions and the following disclaimer. +* - Redistributions in binary form must reproduce the above copyright +* notice, this list of conditions and the following disclaimer in +* the documentation and/or other materials provided with the +* distribution. +* - Neither the name of Intel Corporation nor the names of its +* contributors may be used to endorse or promote products derived +* from this software without specific prior written permission. +* +* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +* +*/ +#if !defined(__HFI1_TRACE_MISC_H) || defined(TRACE_HEADER_MULTI_READ) +#define __HFI1_TRACE_MISC_H + +#include <linux/tracepoint.h> +#include <linux/trace_seq.h> + +#include "hfi.h" + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM hfi1_misc + +TRACE_EVENT(hfi1_interrupt, + TP_PROTO(struct hfi1_devdata *dd, const struct is_table *is_entry, + int src), + TP_ARGS(dd, is_entry, src), + TP_STRUCT__entry(DD_DEV_ENTRY(dd) + __array(char, buf, 64) + __field(int, src) + ), + TP_fast_assign(DD_DEV_ASSIGN(dd) + is_entry->is_name(__entry->buf, 64, + src - is_entry->start); + __entry->src = src; + ), + TP_printk("[%s] source: %s [%d]", __get_str(dev), __entry->buf, + __entry->src) +); + +#endif /* __HFI1_TRACE_MISC_H */ + +#undef TRACE_INCLUDE_PATH +#undef TRACE_INCLUDE_FILE +#define TRACE_INCLUDE_PATH . +#define TRACE_INCLUDE_FILE trace_misc +#include <trace/define_trace.h> diff --git a/drivers/infiniband/hw/hfi1/trace_rc.h b/drivers/infiniband/hw/hfi1/trace_rc.h new file mode 100644 index 000000000000..5ea5005f9f41 --- /dev/null +++ b/drivers/infiniband/hw/hfi1/trace_rc.h @@ -0,0 +1,123 @@ +/* +* Copyright(c) 2015, 2016 Intel Corporation. +* +* This file is provided under a dual BSD/GPLv2 license. When using or +* redistributing this file, you may do so under either license. +* +* GPL LICENSE SUMMARY +* +* This program is free software; you can redistribute it and/or modify +* it under the terms of version 2 of the GNU General Public License as +* published by the Free Software Foundation. +* +* This program is distributed in the hope that it will be useful, but +* WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +* General Public License for more details. +* +* BSD LICENSE +* +* Redistribution and use in source and binary forms, with or without +* modification, are permitted provided that the following conditions +* are met: +* +* - Redistributions of source code must retain the above copyright +* notice, this list of conditions and the following disclaimer. +* - Redistributions in binary form must reproduce the above copyright +* notice, this list of conditions and the following disclaimer in +* the documentation and/or other materials provided with the +* distribution. +* - Neither the name of Intel Corporation nor the names of its +* contributors may be used to endorse or promote products derived +* from this software without specific prior written permission. +* +* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +* +*/ +#if !defined(__HFI1_TRACE_RC_H) || defined(TRACE_HEADER_MULTI_READ) +#define __HFI1_TRACE_RC_H + +#include <linux/tracepoint.h> +#include <linux/trace_seq.h> + +#include "hfi.h" + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM hfi1_rc + +DECLARE_EVENT_CLASS(hfi1_rc_template, + TP_PROTO(struct rvt_qp *qp, u32 psn), + TP_ARGS(qp, psn), + TP_STRUCT__entry( + DD_DEV_ENTRY(dd_from_ibdev(qp->ibqp.device)) + __field(u32, qpn) + __field(u32, s_flags) + __field(u32, psn) + __field(u32, s_psn) + __field(u32, s_next_psn) + __field(u32, s_sending_psn) + __field(u32, s_sending_hpsn) + __field(u32, r_psn) + ), + TP_fast_assign( + DD_DEV_ASSIGN(dd_from_ibdev(qp->ibqp.device)) + __entry->qpn = qp->ibqp.qp_num; + __entry->s_flags = qp->s_flags; + __entry->psn = psn; + __entry->s_psn = qp->s_psn; + __entry->s_next_psn = qp->s_next_psn; + __entry->s_sending_psn = qp->s_sending_psn; + __entry->s_sending_hpsn = qp->s_sending_hpsn; + __entry->r_psn = qp->r_psn; + ), + TP_printk( + "[%s] qpn 0x%x s_flags 0x%x psn 0x%x s_psn 0x%x s_next_psn 0x%x s_sending_psn 0x%x sending_hpsn 0x%x r_psn 0x%x", + __get_str(dev), + __entry->qpn, + __entry->s_flags, + __entry->psn, + __entry->s_psn, + __entry->s_next_psn, + __entry->s_sending_psn, + __entry->s_sending_hpsn, + __entry->r_psn + ) +); + +DEFINE_EVENT(hfi1_rc_template, hfi1_sendcomplete, + TP_PROTO(struct rvt_qp *qp, u32 psn), + TP_ARGS(qp, psn) +); + +DEFINE_EVENT(hfi1_rc_template, hfi1_ack, + TP_PROTO(struct rvt_qp *qp, u32 psn), + TP_ARGS(qp, psn) +); + +DEFINE_EVENT(hfi1_rc_template, hfi1_timeout, + TP_PROTO(struct rvt_qp *qp, u32 psn), + TP_ARGS(qp, psn) +); + +DEFINE_EVENT(hfi1_rc_template, hfi1_rcv_error, + TP_PROTO(struct rvt_qp *qp, u32 psn), + TP_ARGS(qp, psn) +); + +#endif /* __HFI1_TRACE_RC_H */ + +#undef TRACE_INCLUDE_PATH +#undef TRACE_INCLUDE_FILE +#define TRACE_INCLUDE_PATH . +#define TRACE_INCLUDE_FILE trace_rc +#include <trace/define_trace.h> diff --git a/drivers/infiniband/hw/hfi1/trace_rx.h b/drivers/infiniband/hw/hfi1/trace_rx.h new file mode 100644 index 000000000000..9ba1f615ec95 --- /dev/null +++ b/drivers/infiniband/hw/hfi1/trace_rx.h @@ -0,0 +1,322 @@ +/* + * Copyright(c) 2015, 2016 Intel Corporation. + * + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * BSD LICENSE + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * - Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ +#if !defined(__HFI1_TRACE_RX_H) || defined(TRACE_HEADER_MULTI_READ) +#define __HFI1_TRACE_RX_H + +#include <linux/tracepoint.h> +#include <linux/trace_seq.h> + +#include "hfi.h" + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM hfi1_rx + +TRACE_EVENT(hfi1_rcvhdr, + TP_PROTO(struct hfi1_devdata *dd, + u32 ctxt, + u64 eflags, + u32 etype, + u32 hlen, + u32 tlen, + u32 updegr, + u32 etail + ), + TP_ARGS(dd, ctxt, eflags, etype, hlen, tlen, updegr, etail), + TP_STRUCT__entry(DD_DEV_ENTRY(dd) + __field(u64, eflags) + __field(u32, ctxt) + __field(u32, etype) + __field(u32, hlen) + __field(u32, tlen) + __field(u32, updegr) + __field(u32, etail) + ), + TP_fast_assign(DD_DEV_ASSIGN(dd); + __entry->eflags = eflags; + __entry->ctxt = ctxt; + __entry->etype = etype; + __entry->hlen = hlen; + __entry->tlen = tlen; + __entry->updegr = updegr; + __entry->etail = etail; + ), + TP_printk( + "[%s] ctxt %d eflags 0x%llx etype %d,%s hlen %d tlen %d updegr %d etail %d", + __get_str(dev), + __entry->ctxt, + __entry->eflags, + __entry->etype, show_packettype(__entry->etype), + __entry->hlen, + __entry->tlen, + __entry->updegr, + __entry->etail + ) +); + +TRACE_EVENT(hfi1_receive_interrupt, + TP_PROTO(struct hfi1_devdata *dd, u32 ctxt), + TP_ARGS(dd, ctxt), + TP_STRUCT__entry(DD_DEV_ENTRY(dd) + __field(u32, ctxt) + __field(u8, slow_path) + __field(u8, dma_rtail) + ), + TP_fast_assign(DD_DEV_ASSIGN(dd); + __entry->ctxt = ctxt; + if (dd->rcd[ctxt]->do_interrupt == + &handle_receive_interrupt) { + __entry->slow_path = 1; + __entry->dma_rtail = 0xFF; + } else if (dd->rcd[ctxt]->do_interrupt == + &handle_receive_interrupt_dma_rtail){ + __entry->dma_rtail = 1; + __entry->slow_path = 0; + } else if (dd->rcd[ctxt]->do_interrupt == + &handle_receive_interrupt_nodma_rtail) { + __entry->dma_rtail = 0; + __entry->slow_path = 0; + } + ), + TP_printk("[%s] ctxt %d SlowPath: %d DmaRtail: %d", + __get_str(dev), + __entry->ctxt, + __entry->slow_path, + __entry->dma_rtail + ) +); + +TRACE_EVENT(hfi1_exp_tid_reg, + TP_PROTO(unsigned int ctxt, u16 subctxt, u32 rarr, + u32 npages, unsigned long va, unsigned long pa, + dma_addr_t dma), + TP_ARGS(ctxt, subctxt, rarr, npages, va, pa, dma), + TP_STRUCT__entry( + __field(unsigned int, ctxt) + __field(u16, subctxt) + __field(u32, rarr) + __field(u32, npages) + __field(unsigned long, va) + __field(unsigned long, pa) + __field(dma_addr_t, dma) + ), + TP_fast_assign( + __entry->ctxt = ctxt; + __entry->subctxt = subctxt; + __entry->rarr = rarr; + __entry->npages = npages; + __entry->va = va; + __entry->pa = pa; + __entry->dma = dma; + ), + TP_printk("[%u:%u] entry:%u, %u pages @ 0x%lx, va:0x%lx dma:0x%llx", + __entry->ctxt, + __entry->subctxt, + __entry->rarr, + __entry->npages, + __entry->pa, + __entry->va, + __entry->dma + ) + ); + +TRACE_EVENT(hfi1_exp_tid_unreg, + TP_PROTO(unsigned int ctxt, u16 subctxt, u32 rarr, u32 npages, + unsigned long va, unsigned long pa, dma_addr_t dma), + TP_ARGS(ctxt, subctxt, rarr, npages, va, pa, dma), + TP_STRUCT__entry( + __field(unsigned int, ctxt) + __field(u16, subctxt) + __field(u32, rarr) + __field(u32, npages) + __field(unsigned long, va) + __field(unsigned long, pa) + __field(dma_addr_t, dma) + ), + TP_fast_assign( + __entry->ctxt = ctxt; + __entry->subctxt = subctxt; + __entry->rarr = rarr; + __entry->npages = npages; + __entry->va = va; + __entry->pa = pa; + __entry->dma = dma; + ), + TP_printk("[%u:%u] entry:%u, %u pages @ 0x%lx, va:0x%lx dma:0x%llx", + __entry->ctxt, + __entry->subctxt, + __entry->rarr, + __entry->npages, + __entry->pa, + __entry->va, + __entry->dma + ) + ); + +TRACE_EVENT(hfi1_exp_tid_inval, + TP_PROTO(unsigned int ctxt, u16 subctxt, unsigned long va, u32 rarr, + u32 npages, dma_addr_t dma), + TP_ARGS(ctxt, subctxt, va, rarr, npages, dma), + TP_STRUCT__entry( + __field(unsigned int, ctxt) + __field(u16, subctxt) + __field(unsigned long, va) + __field(u32, rarr) + __field(u32, npages) + __field(dma_addr_t, dma) + ), + TP_fast_assign( + __entry->ctxt = ctxt; + __entry->subctxt = subctxt; + __entry->va = va; + __entry->rarr = rarr; + __entry->npages = npages; + __entry->dma = dma; + ), + TP_printk("[%u:%u] entry:%u, %u pages @ 0x%lx dma: 0x%llx", + __entry->ctxt, + __entry->subctxt, + __entry->rarr, + __entry->npages, + __entry->va, + __entry->dma + ) + ); + +TRACE_EVENT(hfi1_mmu_invalidate, + TP_PROTO(unsigned int ctxt, u16 subctxt, const char *type, + unsigned long start, unsigned long end), + TP_ARGS(ctxt, subctxt, type, start, end), + TP_STRUCT__entry( + __field(unsigned int, ctxt) + __field(u16, subctxt) + __string(type, type) + __field(unsigned long, start) + __field(unsigned long, end) + ), + TP_fast_assign( + __entry->ctxt = ctxt; + __entry->subctxt = subctxt; + __assign_str(type, type); + __entry->start = start; + __entry->end = end; + ), + TP_printk("[%3u:%02u] MMU Invalidate (%s) 0x%lx - 0x%lx", + __entry->ctxt, + __entry->subctxt, + __get_str(type), + __entry->start, + __entry->end + ) + ); + +#define SNOOP_PRN \ + "slid %.4x dlid %.4x qpn 0x%.6x opcode 0x%.2x,%s " \ + "svc lvl %d pkey 0x%.4x [header = %d bytes] [data = %d bytes]" + +TRACE_EVENT(snoop_capture, + TP_PROTO(struct hfi1_devdata *dd, + int hdr_len, + struct hfi1_ib_header *hdr, + int data_len, + void *data), + TP_ARGS(dd, hdr_len, hdr, data_len, data), + TP_STRUCT__entry( + DD_DEV_ENTRY(dd) + __field(u16, slid) + __field(u16, dlid) + __field(u32, qpn) + __field(u8, opcode) + __field(u8, sl) + __field(u16, pkey) + __field(u32, hdr_len) + __field(u32, data_len) + __field(u8, lnh) + __dynamic_array(u8, raw_hdr, hdr_len) + __dynamic_array(u8, raw_pkt, data_len) + ), + TP_fast_assign( + struct hfi1_other_headers *ohdr; + + __entry->lnh = (u8)(be16_to_cpu(hdr->lrh[0]) & 3); + if (__entry->lnh == HFI1_LRH_BTH) + ohdr = &hdr->u.oth; + else + ohdr = &hdr->u.l.oth; + DD_DEV_ASSIGN(dd); + __entry->slid = be16_to_cpu(hdr->lrh[3]); + __entry->dlid = be16_to_cpu(hdr->lrh[1]); + __entry->qpn = be32_to_cpu(ohdr->bth[1]) & RVT_QPN_MASK; + __entry->opcode = (be32_to_cpu(ohdr->bth[0]) >> 24) & 0xff; + __entry->sl = (u8)(be16_to_cpu(hdr->lrh[0]) >> 4) & 0xf; + __entry->pkey = be32_to_cpu(ohdr->bth[0]) & 0xffff; + __entry->hdr_len = hdr_len; + __entry->data_len = data_len; + memcpy(__get_dynamic_array(raw_hdr), hdr, hdr_len); + memcpy(__get_dynamic_array(raw_pkt), data, data_len); + ), + TP_printk( + "[%s] " SNOOP_PRN, + __get_str(dev), + __entry->slid, + __entry->dlid, + __entry->qpn, + __entry->opcode, + show_ib_opcode(__entry->opcode), + __entry->sl, + __entry->pkey, + __entry->hdr_len, + __entry->data_len + ) +); + +#endif /* __HFI1_TRACE_RX_H */ + +#undef TRACE_INCLUDE_PATH +#undef TRACE_INCLUDE_FILE +#define TRACE_INCLUDE_PATH . +#define TRACE_INCLUDE_FILE trace_rx +#include <trace/define_trace.h> diff --git a/drivers/infiniband/hw/hfi1/trace_tx.h b/drivers/infiniband/hw/hfi1/trace_tx.h new file mode 100644 index 000000000000..415d6be42c5d --- /dev/null +++ b/drivers/infiniband/hw/hfi1/trace_tx.h @@ -0,0 +1,642 @@ +/* + * Copyright(c) 2015, 2016 Intel Corporation. + * + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * BSD LICENSE + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * - Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ +#if !defined(__HFI1_TRACE_TX_H) || defined(TRACE_HEADER_MULTI_READ) +#define __HFI1_TRACE_TX_H + +#include <linux/tracepoint.h> +#include <linux/trace_seq.h> + +#include "hfi.h" +#include "mad.h" +#include "sdma.h" + +const char *parse_sdma_flags(struct trace_seq *p, u64 desc0, u64 desc1); + +#define __parse_sdma_flags(desc0, desc1) parse_sdma_flags(p, desc0, desc1) + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM hfi1_tx + +TRACE_EVENT(hfi1_piofree, + TP_PROTO(struct send_context *sc, int extra), + TP_ARGS(sc, extra), + TP_STRUCT__entry(DD_DEV_ENTRY(sc->dd) + __field(u32, sw_index) + __field(u32, hw_context) + __field(int, extra) + ), + TP_fast_assign(DD_DEV_ASSIGN(sc->dd); + __entry->sw_index = sc->sw_index; + __entry->hw_context = sc->hw_context; + __entry->extra = extra; + ), + TP_printk("[%s] ctxt %u(%u) extra %d", + __get_str(dev), + __entry->sw_index, + __entry->hw_context, + __entry->extra + ) +); + +TRACE_EVENT(hfi1_wantpiointr, + TP_PROTO(struct send_context *sc, u32 needint, u64 credit_ctrl), + TP_ARGS(sc, needint, credit_ctrl), + TP_STRUCT__entry(DD_DEV_ENTRY(sc->dd) + __field(u32, sw_index) + __field(u32, hw_context) + __field(u32, needint) + __field(u64, credit_ctrl) + ), + TP_fast_assign(DD_DEV_ASSIGN(sc->dd); + __entry->sw_index = sc->sw_index; + __entry->hw_context = sc->hw_context; + __entry->needint = needint; + __entry->credit_ctrl = credit_ctrl; + ), + TP_printk("[%s] ctxt %u(%u) on %d credit_ctrl 0x%llx", + __get_str(dev), + __entry->sw_index, + __entry->hw_context, + __entry->needint, + (unsigned long long)__entry->credit_ctrl + ) +); + +DECLARE_EVENT_CLASS(hfi1_qpsleepwakeup_template, + TP_PROTO(struct rvt_qp *qp, u32 flags), + TP_ARGS(qp, flags), + TP_STRUCT__entry( + DD_DEV_ENTRY(dd_from_ibdev(qp->ibqp.device)) + __field(u32, qpn) + __field(u32, flags) + __field(u32, s_flags) + ), + TP_fast_assign( + DD_DEV_ASSIGN(dd_from_ibdev(qp->ibqp.device)) + __entry->flags = flags; + __entry->qpn = qp->ibqp.qp_num; + __entry->s_flags = qp->s_flags; + ), + TP_printk( + "[%s] qpn 0x%x flags 0x%x s_flags 0x%x", + __get_str(dev), + __entry->qpn, + __entry->flags, + __entry->s_flags + ) +); + +DEFINE_EVENT(hfi1_qpsleepwakeup_template, hfi1_qpwakeup, + TP_PROTO(struct rvt_qp *qp, u32 flags), + TP_ARGS(qp, flags)); + +DEFINE_EVENT(hfi1_qpsleepwakeup_template, hfi1_qpsleep, + TP_PROTO(struct rvt_qp *qp, u32 flags), + TP_ARGS(qp, flags)); + +TRACE_EVENT(hfi1_sdma_descriptor, + TP_PROTO(struct sdma_engine *sde, + u64 desc0, + u64 desc1, + u16 e, + void *descp), + TP_ARGS(sde, desc0, desc1, e, descp), + TP_STRUCT__entry(DD_DEV_ENTRY(sde->dd) + __field(void *, descp) + __field(u64, desc0) + __field(u64, desc1) + __field(u16, e) + __field(u8, idx) + ), + TP_fast_assign(DD_DEV_ASSIGN(sde->dd); + __entry->desc0 = desc0; + __entry->desc1 = desc1; + __entry->idx = sde->this_idx; + __entry->descp = descp; + __entry->e = e; + ), + TP_printk( + "[%s] SDE(%u) flags:%s addr:0x%016llx gen:%u len:%u d0:%016llx d1:%016llx to %p,%u", + __get_str(dev), + __entry->idx, + __parse_sdma_flags(__entry->desc0, __entry->desc1), + (__entry->desc0 >> SDMA_DESC0_PHY_ADDR_SHIFT) & + SDMA_DESC0_PHY_ADDR_MASK, + (u8)((__entry->desc1 >> SDMA_DESC1_GENERATION_SHIFT) & + SDMA_DESC1_GENERATION_MASK), + (u16)((__entry->desc0 >> SDMA_DESC0_BYTE_COUNT_SHIFT) & + SDMA_DESC0_BYTE_COUNT_MASK), + __entry->desc0, + __entry->desc1, + __entry->descp, + __entry->e + ) +); + +TRACE_EVENT(hfi1_sdma_engine_select, + TP_PROTO(struct hfi1_devdata *dd, u32 sel, u8 vl, u8 idx), + TP_ARGS(dd, sel, vl, idx), + TP_STRUCT__entry(DD_DEV_ENTRY(dd) + __field(u32, sel) + __field(u8, vl) + __field(u8, idx) + ), + TP_fast_assign(DD_DEV_ASSIGN(dd); + __entry->sel = sel; + __entry->vl = vl; + __entry->idx = idx; + ), + TP_printk("[%s] selecting SDE %u sel 0x%x vl %u", + __get_str(dev), + __entry->idx, + __entry->sel, + __entry->vl + ) +); + +DECLARE_EVENT_CLASS(hfi1_sdma_engine_class, + TP_PROTO(struct sdma_engine *sde, u64 status), + TP_ARGS(sde, status), + TP_STRUCT__entry(DD_DEV_ENTRY(sde->dd) + __field(u64, status) + __field(u8, idx) + ), + TP_fast_assign(DD_DEV_ASSIGN(sde->dd); + __entry->status = status; + __entry->idx = sde->this_idx; + ), + TP_printk("[%s] SDE(%u) status %llx", + __get_str(dev), + __entry->idx, + (unsigned long long)__entry->status + ) +); + +DEFINE_EVENT(hfi1_sdma_engine_class, hfi1_sdma_engine_interrupt, + TP_PROTO(struct sdma_engine *sde, u64 status), + TP_ARGS(sde, status) +); + +DEFINE_EVENT(hfi1_sdma_engine_class, hfi1_sdma_engine_progress, + TP_PROTO(struct sdma_engine *sde, u64 status), + TP_ARGS(sde, status) +); + +DECLARE_EVENT_CLASS(hfi1_sdma_ahg_ad, + TP_PROTO(struct sdma_engine *sde, int aidx), + TP_ARGS(sde, aidx), + TP_STRUCT__entry(DD_DEV_ENTRY(sde->dd) + __field(int, aidx) + __field(u8, idx) + ), + TP_fast_assign(DD_DEV_ASSIGN(sde->dd); + __entry->idx = sde->this_idx; + __entry->aidx = aidx; + ), + TP_printk("[%s] SDE(%u) aidx %d", + __get_str(dev), + __entry->idx, + __entry->aidx + ) +); + +DEFINE_EVENT(hfi1_sdma_ahg_ad, hfi1_ahg_allocate, + TP_PROTO(struct sdma_engine *sde, int aidx), + TP_ARGS(sde, aidx)); + +DEFINE_EVENT(hfi1_sdma_ahg_ad, hfi1_ahg_deallocate, + TP_PROTO(struct sdma_engine *sde, int aidx), + TP_ARGS(sde, aidx)); + +#ifdef CONFIG_HFI1_DEBUG_SDMA_ORDER +TRACE_EVENT(hfi1_sdma_progress, + TP_PROTO(struct sdma_engine *sde, + u16 hwhead, + u16 swhead, + struct sdma_txreq *txp + ), + TP_ARGS(sde, hwhead, swhead, txp), + TP_STRUCT__entry(DD_DEV_ENTRY(sde->dd) + __field(u64, sn) + __field(u16, hwhead) + __field(u16, swhead) + __field(u16, txnext) + __field(u16, tx_tail) + __field(u16, tx_head) + __field(u8, idx) + ), + TP_fast_assign(DD_DEV_ASSIGN(sde->dd); + __entry->hwhead = hwhead; + __entry->swhead = swhead; + __entry->tx_tail = sde->tx_tail; + __entry->tx_head = sde->tx_head; + __entry->txnext = txp ? txp->next_descq_idx : ~0; + __entry->idx = sde->this_idx; + __entry->sn = txp ? txp->sn : ~0; + ), + TP_printk( + "[%s] SDE(%u) sn %llu hwhead %u swhead %u next_descq_idx %u tx_head %u tx_tail %u", + __get_str(dev), + __entry->idx, + __entry->sn, + __entry->hwhead, + __entry->swhead, + __entry->txnext, + __entry->tx_head, + __entry->tx_tail + ) +); +#else +TRACE_EVENT(hfi1_sdma_progress, + TP_PROTO(struct sdma_engine *sde, + u16 hwhead, u16 swhead, + struct sdma_txreq *txp + ), + TP_ARGS(sde, hwhead, swhead, txp), + TP_STRUCT__entry(DD_DEV_ENTRY(sde->dd) + __field(u16, hwhead) + __field(u16, swhead) + __field(u16, txnext) + __field(u16, tx_tail) + __field(u16, tx_head) + __field(u8, idx) + ), + TP_fast_assign(DD_DEV_ASSIGN(sde->dd); + __entry->hwhead = hwhead; + __entry->swhead = swhead; + __entry->tx_tail = sde->tx_tail; + __entry->tx_head = sde->tx_head; + __entry->txnext = txp ? txp->next_descq_idx : ~0; + __entry->idx = sde->this_idx; + ), + TP_printk( + "[%s] SDE(%u) hwhead %u swhead %u next_descq_idx %u tx_head %u tx_tail %u", + __get_str(dev), + __entry->idx, + __entry->hwhead, + __entry->swhead, + __entry->txnext, + __entry->tx_head, + __entry->tx_tail + ) +); +#endif + +DECLARE_EVENT_CLASS(hfi1_sdma_sn, + TP_PROTO(struct sdma_engine *sde, u64 sn), + TP_ARGS(sde, sn), + TP_STRUCT__entry(DD_DEV_ENTRY(sde->dd) + __field(u64, sn) + __field(u8, idx) + ), + TP_fast_assign(DD_DEV_ASSIGN(sde->dd); + __entry->sn = sn; + __entry->idx = sde->this_idx; + ), + TP_printk("[%s] SDE(%u) sn %llu", + __get_str(dev), + __entry->idx, + __entry->sn + ) +); + +DEFINE_EVENT(hfi1_sdma_sn, hfi1_sdma_out_sn, + TP_PROTO( + struct sdma_engine *sde, + u64 sn + ), + TP_ARGS(sde, sn) +); + +DEFINE_EVENT(hfi1_sdma_sn, hfi1_sdma_in_sn, + TP_PROTO(struct sdma_engine *sde, u64 sn), + TP_ARGS(sde, sn) +); + +#define USDMA_HDR_FORMAT \ + "[%s:%u:%u:%u] PBC=(0x%x 0x%x) LRH=(0x%x 0x%x) BTH=(0x%x 0x%x 0x%x) KDETH=(0x%x 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x) TIDVal=0x%x" + +TRACE_EVENT(hfi1_sdma_user_header, + TP_PROTO(struct hfi1_devdata *dd, u16 ctxt, u8 subctxt, u16 req, + struct hfi1_pkt_header *hdr, u32 tidval), + TP_ARGS(dd, ctxt, subctxt, req, hdr, tidval), + TP_STRUCT__entry( + DD_DEV_ENTRY(dd) + __field(u16, ctxt) + __field(u8, subctxt) + __field(u16, req) + __field(u32, pbc0) + __field(u32, pbc1) + __field(u32, lrh0) + __field(u32, lrh1) + __field(u32, bth0) + __field(u32, bth1) + __field(u32, bth2) + __field(u32, kdeth0) + __field(u32, kdeth1) + __field(u32, kdeth2) + __field(u32, kdeth3) + __field(u32, kdeth4) + __field(u32, kdeth5) + __field(u32, kdeth6) + __field(u32, kdeth7) + __field(u32, kdeth8) + __field(u32, tidval) + ), + TP_fast_assign( + __le32 *pbc = (__le32 *)hdr->pbc; + __be32 *lrh = (__be32 *)hdr->lrh; + __be32 *bth = (__be32 *)hdr->bth; + __le32 *kdeth = (__le32 *)&hdr->kdeth; + + DD_DEV_ASSIGN(dd); + __entry->ctxt = ctxt; + __entry->subctxt = subctxt; + __entry->req = req; + __entry->pbc0 = le32_to_cpu(pbc[0]); + __entry->pbc1 = le32_to_cpu(pbc[1]); + __entry->lrh0 = be32_to_cpu(lrh[0]); + __entry->lrh1 = be32_to_cpu(lrh[1]); + __entry->bth0 = be32_to_cpu(bth[0]); + __entry->bth1 = be32_to_cpu(bth[1]); + __entry->bth2 = be32_to_cpu(bth[2]); + __entry->kdeth0 = le32_to_cpu(kdeth[0]); + __entry->kdeth1 = le32_to_cpu(kdeth[1]); + __entry->kdeth2 = le32_to_cpu(kdeth[2]); + __entry->kdeth3 = le32_to_cpu(kdeth[3]); + __entry->kdeth4 = le32_to_cpu(kdeth[4]); + __entry->kdeth5 = le32_to_cpu(kdeth[5]); + __entry->kdeth6 = le32_to_cpu(kdeth[6]); + __entry->kdeth7 = le32_to_cpu(kdeth[7]); + __entry->kdeth8 = le32_to_cpu(kdeth[8]); + __entry->tidval = tidval; + ), + TP_printk(USDMA_HDR_FORMAT, + __get_str(dev), + __entry->ctxt, + __entry->subctxt, + __entry->req, + __entry->pbc1, + __entry->pbc0, + __entry->lrh0, + __entry->lrh1, + __entry->bth0, + __entry->bth1, + __entry->bth2, + __entry->kdeth0, + __entry->kdeth1, + __entry->kdeth2, + __entry->kdeth3, + __entry->kdeth4, + __entry->kdeth5, + __entry->kdeth6, + __entry->kdeth7, + __entry->kdeth8, + __entry->tidval + ) +); + +#define SDMA_UREQ_FMT \ + "[%s:%u:%u] ver/op=0x%x, iovcnt=%u, npkts=%u, frag=%u, idx=%u" +TRACE_EVENT(hfi1_sdma_user_reqinfo, + TP_PROTO(struct hfi1_devdata *dd, u16 ctxt, u8 subctxt, u16 *i), + TP_ARGS(dd, ctxt, subctxt, i), + TP_STRUCT__entry( + DD_DEV_ENTRY(dd); + __field(u16, ctxt) + __field(u8, subctxt) + __field(u8, ver_opcode) + __field(u8, iovcnt) + __field(u16, npkts) + __field(u16, fragsize) + __field(u16, comp_idx) + ), + TP_fast_assign( + DD_DEV_ASSIGN(dd); + __entry->ctxt = ctxt; + __entry->subctxt = subctxt; + __entry->ver_opcode = i[0] & 0xff; + __entry->iovcnt = (i[0] >> 8) & 0xff; + __entry->npkts = i[1]; + __entry->fragsize = i[2]; + __entry->comp_idx = i[3]; + ), + TP_printk(SDMA_UREQ_FMT, + __get_str(dev), + __entry->ctxt, + __entry->subctxt, + __entry->ver_opcode, + __entry->iovcnt, + __entry->npkts, + __entry->fragsize, + __entry->comp_idx + ) +); + +#define usdma_complete_name(st) { st, #st } +#define show_usdma_complete_state(st) \ + __print_symbolic(st, \ + usdma_complete_name(FREE), \ + usdma_complete_name(QUEUED), \ + usdma_complete_name(COMPLETE), \ + usdma_complete_name(ERROR)) + +TRACE_EVENT(hfi1_sdma_user_completion, + TP_PROTO(struct hfi1_devdata *dd, u16 ctxt, u8 subctxt, u16 idx, + u8 state, int code), + TP_ARGS(dd, ctxt, subctxt, idx, state, code), + TP_STRUCT__entry( + DD_DEV_ENTRY(dd) + __field(u16, ctxt) + __field(u8, subctxt) + __field(u16, idx) + __field(u8, state) + __field(int, code) + ), + TP_fast_assign( + DD_DEV_ASSIGN(dd); + __entry->ctxt = ctxt; + __entry->subctxt = subctxt; + __entry->idx = idx; + __entry->state = state; + __entry->code = code; + ), + TP_printk("[%s:%u:%u:%u] SDMA completion state %s (%d)", + __get_str(dev), __entry->ctxt, __entry->subctxt, + __entry->idx, show_usdma_complete_state(__entry->state), + __entry->code) +); + +const char *print_u32_array(struct trace_seq *, u32 *, int); +#define __print_u32_hex(arr, len) print_u32_array(p, arr, len) + +TRACE_EVENT(hfi1_sdma_user_header_ahg, + TP_PROTO(struct hfi1_devdata *dd, u16 ctxt, u8 subctxt, u16 req, + u8 sde, u8 ahgidx, u32 *ahg, int len, u32 tidval), + TP_ARGS(dd, ctxt, subctxt, req, sde, ahgidx, ahg, len, tidval), + TP_STRUCT__entry( + DD_DEV_ENTRY(dd) + __field(u16, ctxt) + __field(u8, subctxt) + __field(u16, req) + __field(u8, sde) + __field(u8, idx) + __field(int, len) + __field(u32, tidval) + __array(u32, ahg, 10) + ), + TP_fast_assign( + DD_DEV_ASSIGN(dd); + __entry->ctxt = ctxt; + __entry->subctxt = subctxt; + __entry->req = req; + __entry->sde = sde; + __entry->idx = ahgidx; + __entry->len = len; + __entry->tidval = tidval; + memcpy(__entry->ahg, ahg, len * sizeof(u32)); + ), + TP_printk("[%s:%u:%u:%u] (SDE%u/AHG%u) ahg[0-%d]=(%s) TIDVal=0x%x", + __get_str(dev), + __entry->ctxt, + __entry->subctxt, + __entry->req, + __entry->sde, + __entry->idx, + __entry->len - 1, + __print_u32_hex(__entry->ahg, __entry->len), + __entry->tidval + ) +); + +TRACE_EVENT(hfi1_sdma_state, + TP_PROTO(struct sdma_engine *sde, + const char *cstate, + const char *nstate + ), + TP_ARGS(sde, cstate, nstate), + TP_STRUCT__entry(DD_DEV_ENTRY(sde->dd) + __string(curstate, cstate) + __string(newstate, nstate) + ), + TP_fast_assign(DD_DEV_ASSIGN(sde->dd); + __assign_str(curstate, cstate); + __assign_str(newstate, nstate); + ), + TP_printk("[%s] current state %s new state %s", + __get_str(dev), + __get_str(curstate), + __get_str(newstate) + ) +); + +#define BCT_FORMAT \ + "shared_limit %x vls 0-7 [%x,%x][%x,%x][%x,%x][%x,%x][%x,%x][%x,%x][%x,%x][%x,%x] 15 [%x,%x]" + +#define BCT(field) \ + be16_to_cpu( \ + ((struct buffer_control *)__get_dynamic_array(bct))->field \ + ) + +DECLARE_EVENT_CLASS(hfi1_bct_template, + TP_PROTO(struct hfi1_devdata *dd, + struct buffer_control *bc), + TP_ARGS(dd, bc), + TP_STRUCT__entry(DD_DEV_ENTRY(dd) + __dynamic_array(u8, bct, sizeof(*bc)) + ), + TP_fast_assign(DD_DEV_ASSIGN(dd); + memcpy(__get_dynamic_array(bct), bc, + sizeof(*bc)); + ), + TP_printk(BCT_FORMAT, + BCT(overall_shared_limit), + + BCT(vl[0].dedicated), + BCT(vl[0].shared), + + BCT(vl[1].dedicated), + BCT(vl[1].shared), + + BCT(vl[2].dedicated), + BCT(vl[2].shared), + + BCT(vl[3].dedicated), + BCT(vl[3].shared), + + BCT(vl[4].dedicated), + BCT(vl[4].shared), + + BCT(vl[5].dedicated), + BCT(vl[5].shared), + + BCT(vl[6].dedicated), + BCT(vl[6].shared), + + BCT(vl[7].dedicated), + BCT(vl[7].shared), + + BCT(vl[15].dedicated), + BCT(vl[15].shared) + ) +); + +DEFINE_EVENT(hfi1_bct_template, bct_set, + TP_PROTO(struct hfi1_devdata *dd, struct buffer_control *bc), + TP_ARGS(dd, bc)); + +DEFINE_EVENT(hfi1_bct_template, bct_get, + TP_PROTO(struct hfi1_devdata *dd, struct buffer_control *bc), + TP_ARGS(dd, bc)); + +#endif /* __HFI1_TRACE_TX_H */ + +#undef TRACE_INCLUDE_PATH +#undef TRACE_INCLUDE_FILE +#define TRACE_INCLUDE_PATH . +#define TRACE_INCLUDE_FILE trace_tx +#include <trace/define_trace.h> diff --git a/drivers/infiniband/hw/hfi1/twsi.c b/drivers/infiniband/hw/hfi1/twsi.c deleted file mode 100644 index e82e52a63d35..000000000000 --- a/drivers/infiniband/hw/hfi1/twsi.c +++ /dev/null @@ -1,489 +0,0 @@ -/* - * Copyright(c) 2015, 2016 Intel Corporation. - * - * This file is provided under a dual BSD/GPLv2 license. When using or - * redistributing this file, you may do so under either license. - * - * GPL LICENSE SUMMARY - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of version 2 of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * BSD LICENSE - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * - Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * - Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - */ - -#include <linux/delay.h> -#include <linux/pci.h> -#include <linux/vmalloc.h> - -#include "hfi.h" -#include "twsi.h" - -/* - * "Two Wire Serial Interface" support. - * - * Originally written for a not-quite-i2c serial eeprom, which is - * still used on some supported boards. Later boards have added a - * variety of other uses, most board-specific, so the bit-boffing - * part has been split off to this file, while the other parts - * have been moved to chip-specific files. - * - * We have also dropped all pretense of fully generic (e.g. pretend - * we don't know whether '1' is the higher voltage) interface, as - * the restrictions of the generic i2c interface (e.g. no access from - * driver itself) make it unsuitable for this use. - */ - -#define READ_CMD 1 -#define WRITE_CMD 0 - -/** - * i2c_wait_for_writes - wait for a write - * @dd: the hfi1_ib device - * - * We use this instead of udelay directly, so we can make sure - * that previous register writes have been flushed all the way - * to the chip. Since we are delaying anyway, the cost doesn't - * hurt, and makes the bit twiddling more regular - */ -static void i2c_wait_for_writes(struct hfi1_devdata *dd, u32 target) -{ - /* - * implicit read of EXTStatus is as good as explicit - * read of scratch, if all we want to do is flush - * writes. - */ - hfi1_gpio_mod(dd, target, 0, 0, 0); - rmb(); /* inlined, so prevent compiler reordering */ -} - -/* - * QSFP modules are allowed to hold SCL low for 500uSec. Allow twice that - * for "almost compliant" modules - */ -#define SCL_WAIT_USEC 1000 - -/* BUF_WAIT is time bus must be free between STOP or ACK and to next START. - * Should be 20, but some chips need more. - */ -#define TWSI_BUF_WAIT_USEC 60 - -static void scl_out(struct hfi1_devdata *dd, u32 target, u8 bit) -{ - u32 mask; - - udelay(1); - - mask = QSFP_HFI0_I2CCLK; - - /* SCL is meant to be bare-drain, so never set "OUT", just DIR */ - hfi1_gpio_mod(dd, target, 0, bit ? 0 : mask, mask); - - /* - * Allow for slow slaves by simple - * delay for falling edge, sampling on rise. - */ - if (!bit) { - udelay(2); - } else { - int rise_usec; - - for (rise_usec = SCL_WAIT_USEC; rise_usec > 0; rise_usec -= 2) { - if (mask & hfi1_gpio_mod(dd, target, 0, 0, 0)) - break; - udelay(2); - } - if (rise_usec <= 0) - dd_dev_err(dd, "SCL interface stuck low > %d uSec\n", - SCL_WAIT_USEC); - } - i2c_wait_for_writes(dd, target); -} - -static u8 scl_in(struct hfi1_devdata *dd, u32 target, int wait) -{ - u32 read_val, mask; - - mask = QSFP_HFI0_I2CCLK; - /* SCL is meant to be bare-drain, so never set "OUT", just DIR */ - hfi1_gpio_mod(dd, target, 0, 0, mask); - read_val = hfi1_gpio_mod(dd, target, 0, 0, 0); - if (wait) - i2c_wait_for_writes(dd, target); - return (read_val & mask) >> GPIO_SCL_NUM; -} - -static void sda_out(struct hfi1_devdata *dd, u32 target, u8 bit) -{ - u32 mask; - - mask = QSFP_HFI0_I2CDAT; - - /* SDA is meant to be bare-drain, so never set "OUT", just DIR */ - hfi1_gpio_mod(dd, target, 0, bit ? 0 : mask, mask); - - i2c_wait_for_writes(dd, target); - udelay(2); -} - -static u8 sda_in(struct hfi1_devdata *dd, u32 target, int wait) -{ - u32 read_val, mask; - - mask = QSFP_HFI0_I2CDAT; - /* SDA is meant to be bare-drain, so never set "OUT", just DIR */ - hfi1_gpio_mod(dd, target, 0, 0, mask); - read_val = hfi1_gpio_mod(dd, target, 0, 0, 0); - if (wait) - i2c_wait_for_writes(dd, target); - return (read_val & mask) >> GPIO_SDA_NUM; -} - -/** - * i2c_ackrcv - see if ack following write is true - * @dd: the hfi1_ib device - */ -static int i2c_ackrcv(struct hfi1_devdata *dd, u32 target) -{ - u8 ack_received; - - /* AT ENTRY SCL = LOW */ - /* change direction, ignore data */ - ack_received = sda_in(dd, target, 1); - scl_out(dd, target, 1); - ack_received = sda_in(dd, target, 1) == 0; - scl_out(dd, target, 0); - return ack_received; -} - -static void stop_cmd(struct hfi1_devdata *dd, u32 target); - -/** - * rd_byte - read a byte, sending STOP on last, else ACK - * @dd: the hfi1_ib device - * - * Returns byte shifted out of device - */ -static int rd_byte(struct hfi1_devdata *dd, u32 target, int last) -{ - int bit_cntr, data; - - data = 0; - - for (bit_cntr = 7; bit_cntr >= 0; --bit_cntr) { - data <<= 1; - scl_out(dd, target, 1); - data |= sda_in(dd, target, 0); - scl_out(dd, target, 0); - } - if (last) { - scl_out(dd, target, 1); - stop_cmd(dd, target); - } else { - sda_out(dd, target, 0); - scl_out(dd, target, 1); - scl_out(dd, target, 0); - sda_out(dd, target, 1); - } - return data; -} - -/** - * wr_byte - write a byte, one bit at a time - * @dd: the hfi1_ib device - * @data: the byte to write - * - * Returns 0 if we got the following ack, otherwise 1 - */ -static int wr_byte(struct hfi1_devdata *dd, u32 target, u8 data) -{ - int bit_cntr; - u8 bit; - - for (bit_cntr = 7; bit_cntr >= 0; bit_cntr--) { - bit = (data >> bit_cntr) & 1; - sda_out(dd, target, bit); - scl_out(dd, target, 1); - scl_out(dd, target, 0); - } - return (!i2c_ackrcv(dd, target)) ? 1 : 0; -} - -/* - * issue TWSI start sequence: - * (both clock/data high, clock high, data low while clock is high) - */ -static void start_seq(struct hfi1_devdata *dd, u32 target) -{ - sda_out(dd, target, 1); - scl_out(dd, target, 1); - sda_out(dd, target, 0); - udelay(1); - scl_out(dd, target, 0); -} - -/** - * stop_seq - transmit the stop sequence - * @dd: the hfi1_ib device - * - * (both clock/data low, clock high, data high while clock is high) - */ -static void stop_seq(struct hfi1_devdata *dd, u32 target) -{ - scl_out(dd, target, 0); - sda_out(dd, target, 0); - scl_out(dd, target, 1); - sda_out(dd, target, 1); -} - -/** - * stop_cmd - transmit the stop condition - * @dd: the hfi1_ib device - * - * (both clock/data low, clock high, data high while clock is high) - */ -static void stop_cmd(struct hfi1_devdata *dd, u32 target) -{ - stop_seq(dd, target); - udelay(TWSI_BUF_WAIT_USEC); -} - -/** - * hfi1_twsi_reset - reset I2C communication - * @dd: the hfi1_ib device - * returns 0 if ok, -EIO on error - */ -int hfi1_twsi_reset(struct hfi1_devdata *dd, u32 target) -{ - int clock_cycles_left = 9; - u32 mask; - - /* Both SCL and SDA should be high. If not, there - * is something wrong. - */ - mask = QSFP_HFI0_I2CCLK | QSFP_HFI0_I2CDAT; - - /* - * Force pins to desired innocuous state. - * This is the default power-on state with out=0 and dir=0, - * So tri-stated and should be floating high (barring HW problems) - */ - hfi1_gpio_mod(dd, target, 0, 0, mask); - - /* Check if SCL is low, if it is low then we have a slave device - * misbehaving and there is not much we can do. - */ - if (!scl_in(dd, target, 0)) - return -EIO; - - /* Check if SDA is low, if it is low then we have to clock SDA - * up to 9 times for the device to release the bus - */ - while (clock_cycles_left--) { - if (sda_in(dd, target, 0)) - return 0; - scl_out(dd, target, 0); - scl_out(dd, target, 1); - } - - return -EIO; -} - -#define HFI1_TWSI_START 0x100 -#define HFI1_TWSI_STOP 0x200 - -/* Write byte to TWSI, optionally prefixed with START or suffixed with - * STOP. - * returns 0 if OK (ACK received), else != 0 - */ -static int twsi_wr(struct hfi1_devdata *dd, u32 target, int data, int flags) -{ - int ret = 1; - - if (flags & HFI1_TWSI_START) - start_seq(dd, target); - - /* Leaves SCL low (from i2c_ackrcv()) */ - ret = wr_byte(dd, target, data); - - if (flags & HFI1_TWSI_STOP) - stop_cmd(dd, target); - return ret; -} - -/* Added functionality for IBA7220-based cards */ -#define HFI1_TEMP_DEV 0x98 - -/* - * hfi1_twsi_blk_rd - * General interface for data transfer from twsi devices. - * One vestige of its former role is that it recognizes a device - * HFI1_TWSI_NO_DEV and does the correct operation for the legacy part, - * which responded to all TWSI device codes, interpreting them as - * address within device. On all other devices found on board handled by - * this driver, the device is followed by a N-byte "address" which selects - * the "register" or "offset" within the device from which data should - * be read. - */ -int hfi1_twsi_blk_rd(struct hfi1_devdata *dd, u32 target, int dev, int addr, - void *buffer, int len) -{ - u8 *bp = buffer; - int ret = 1; - int i; - int offset_size; - - /* obtain the offset size, strip it from the device address */ - offset_size = (dev >> 8) & 0xff; - dev &= 0xff; - - /* allow at most a 2 byte offset */ - if (offset_size > 2) - goto bail; - - if (dev == HFI1_TWSI_NO_DEV) { - /* legacy not-really-I2C */ - addr = (addr << 1) | READ_CMD; - ret = twsi_wr(dd, target, addr, HFI1_TWSI_START); - } else { - /* Actual I2C */ - if (offset_size) { - ret = twsi_wr(dd, target, - dev | WRITE_CMD, HFI1_TWSI_START); - if (ret) { - stop_cmd(dd, target); - goto bail; - } - - for (i = 0; i < offset_size; i++) { - ret = twsi_wr(dd, target, - (addr >> (i * 8)) & 0xff, 0); - udelay(TWSI_BUF_WAIT_USEC); - if (ret) { - dd_dev_err(dd, "Failed to write byte %d of offset 0x%04X\n", - i, addr); - goto bail; - } - } - } - ret = twsi_wr(dd, target, dev | READ_CMD, HFI1_TWSI_START); - } - if (ret) { - stop_cmd(dd, target); - goto bail; - } - - /* - * block devices keeps clocking data out as long as we ack, - * automatically incrementing the address. Some have "pages" - * whose boundaries will not be crossed, but the handling - * of these is left to the caller, who is in a better - * position to know. - */ - while (len-- > 0) { - /* - * Get and store data, sending ACK if length remaining, - * else STOP - */ - *bp++ = rd_byte(dd, target, !len); - } - - ret = 0; - -bail: - return ret; -} - -/* - * hfi1_twsi_blk_wr - * General interface for data transfer to twsi devices. - * One vestige of its former role is that it recognizes a device - * HFI1_TWSI_NO_DEV and does the correct operation for the legacy part, - * which responded to all TWSI device codes, interpreting them as - * address within device. On all other devices found on board handled by - * this driver, the device is followed by a N-byte "address" which selects - * the "register" or "offset" within the device to which data should - * be written. - */ -int hfi1_twsi_blk_wr(struct hfi1_devdata *dd, u32 target, int dev, int addr, - const void *buffer, int len) -{ - const u8 *bp = buffer; - int ret = 1; - int i; - int offset_size; - - /* obtain the offset size, strip it from the device address */ - offset_size = (dev >> 8) & 0xff; - dev &= 0xff; - - /* allow at most a 2 byte offset */ - if (offset_size > 2) - goto bail; - - if (dev == HFI1_TWSI_NO_DEV) { - if (twsi_wr(dd, target, (addr << 1) | WRITE_CMD, - HFI1_TWSI_START)) { - goto failed_write; - } - } else { - /* Real I2C */ - if (twsi_wr(dd, target, dev | WRITE_CMD, HFI1_TWSI_START)) - goto failed_write; - } - - for (i = 0; i < offset_size; i++) { - ret = twsi_wr(dd, target, (addr >> (i * 8)) & 0xff, 0); - udelay(TWSI_BUF_WAIT_USEC); - if (ret) { - dd_dev_err(dd, "Failed to write byte %d of offset 0x%04X\n", - i, addr); - goto bail; - } - } - - for (i = 0; i < len; i++) - if (twsi_wr(dd, target, *bp++, 0)) - goto failed_write; - - ret = 0; - -failed_write: - stop_cmd(dd, target); - -bail: - return ret; -} diff --git a/drivers/infiniband/hw/hfi1/twsi.h b/drivers/infiniband/hw/hfi1/twsi.h deleted file mode 100644 index 5b8a5b5e7eae..000000000000 --- a/drivers/infiniband/hw/hfi1/twsi.h +++ /dev/null @@ -1,65 +0,0 @@ -#ifndef _TWSI_H -#define _TWSI_H -/* - * Copyright(c) 2015, 2016 Intel Corporation. - * - * This file is provided under a dual BSD/GPLv2 license. When using or - * redistributing this file, you may do so under either license. - * - * GPL LICENSE SUMMARY - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of version 2 of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * BSD LICENSE - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * - Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * - Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - */ - -#define HFI1_TWSI_NO_DEV 0xFF - -struct hfi1_devdata; - -/* Bit position of SDA/SCL pins in ASIC_QSFP* registers */ -#define GPIO_SDA_NUM 1 -#define GPIO_SCL_NUM 0 - -/* these functions must be called with qsfp_lock held */ -int hfi1_twsi_reset(struct hfi1_devdata *dd, u32 target); -int hfi1_twsi_blk_rd(struct hfi1_devdata *dd, u32 target, int dev, int addr, - void *buffer, int len); -int hfi1_twsi_blk_wr(struct hfi1_devdata *dd, u32 target, int dev, int addr, - const void *buffer, int len); - -#endif /* _TWSI_H */ diff --git a/drivers/infiniband/hw/hfi1/uc.c b/drivers/infiniband/hw/hfi1/uc.c index df773d433297..a726d96d185f 100644 --- a/drivers/infiniband/hw/hfi1/uc.c +++ b/drivers/infiniband/hw/hfi1/uc.c @@ -119,6 +119,31 @@ int hfi1_make_uc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps) goto bail; } /* + * Local operations are processed immediately + * after all prior requests have completed. + */ + if (wqe->wr.opcode == IB_WR_REG_MR || + wqe->wr.opcode == IB_WR_LOCAL_INV) { + int local_ops = 0; + int err = 0; + + if (qp->s_last != qp->s_cur) + goto bail; + if (++qp->s_cur == qp->s_size) + qp->s_cur = 0; + if (!(wqe->wr.send_flags & RVT_SEND_COMPLETION_ONLY)) { + err = rvt_invalidate_rkey( + qp, wqe->wr.ex.invalidate_rkey); + local_ops = 1; + } + hfi1_send_complete(qp, wqe, err ? IB_WC_LOC_PROT_ERR + : IB_WC_SUCCESS); + if (local_ops) + atomic_dec(&qp->local_ops_pending); + qp->s_hdrwords = 0; + goto done_free_tx; + } + /* * Start a new request. */ qp->s_psn = wqe->psn; @@ -294,46 +319,12 @@ void hfi1_uc_rcv(struct hfi1_packet *packet) struct ib_reth *reth; int has_grh = rcv_flags & HFI1_HAS_GRH; int ret; - u32 bth1; bth0 = be32_to_cpu(ohdr->bth[0]); if (hfi1_ruc_check_hdr(ibp, hdr, has_grh, qp, bth0)) return; - bth1 = be32_to_cpu(ohdr->bth[1]); - if (unlikely(bth1 & (HFI1_BECN_SMASK | HFI1_FECN_SMASK))) { - if (bth1 & HFI1_BECN_SMASK) { - struct hfi1_pportdata *ppd = ppd_from_ibp(ibp); - u32 rqpn, lqpn; - u16 rlid = be16_to_cpu(hdr->lrh[3]); - u8 sl, sc5; - - lqpn = bth1 & RVT_QPN_MASK; - rqpn = qp->remote_qpn; - - sc5 = ibp->sl_to_sc[qp->remote_ah_attr.sl]; - sl = ibp->sc_to_sl[sc5]; - - process_becn(ppd, sl, rlid, lqpn, rqpn, - IB_CC_SVCTYPE_UC); - } - - if (bth1 & HFI1_FECN_SMASK) { - struct ib_grh *grh = NULL; - u16 pkey = (u16)be32_to_cpu(ohdr->bth[0]); - u16 slid = be16_to_cpu(hdr->lrh[3]); - u16 dlid = be16_to_cpu(hdr->lrh[1]); - u32 src_qp = qp->remote_qpn; - u8 sc5; - - sc5 = ibp->sl_to_sc[qp->remote_ah_attr.sl]; - if (has_grh) - grh = &hdr->u.l.grh; - - return_cnp(ibp, qp, src_qp, pkey, dlid, slid, sc5, - grh); - } - } + process_ecn(qp, packet, true); psn = be32_to_cpu(ohdr->bth[2]); opcode = (bth0 >> 24) & 0xff; diff --git a/drivers/infiniband/hw/hfi1/ud.c b/drivers/infiniband/hw/hfi1/ud.c index be91f6fa1c87..f01e8e1d62d3 100644 --- a/drivers/infiniband/hw/hfi1/ud.c +++ b/drivers/infiniband/hw/hfi1/ud.c @@ -184,8 +184,12 @@ static void ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe) } if (ah_attr->ah_flags & IB_AH_GRH) { - hfi1_copy_sge(&qp->r_sge, &ah_attr->grh, - sizeof(struct ib_grh), 1, 0); + struct ib_grh grh; + struct ib_global_route grd = ah_attr->grh; + + hfi1_make_grh(ibp, &grh, &grd, 0, 0); + hfi1_copy_sge(&qp->r_sge, &grh, + sizeof(grh), 1, 0); wc.wc_flags |= IB_WC_GRH; } else { hfi1_skip_sge(&qp->r_sge, sizeof(struct ib_grh), 1); @@ -430,10 +434,9 @@ int hfi1_make_ud_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps) qp->qkey : wqe->ud_wr.remote_qkey); ohdr->u.ud.deth[1] = cpu_to_be32(qp->ibqp.qp_num); /* disarm any ahg */ - priv->s_hdr->ahgcount = 0; - priv->s_hdr->ahgidx = 0; - priv->s_hdr->tx_flags = 0; - priv->s_hdr->sde = NULL; + priv->s_ahg->ahgcount = 0; + priv->s_ahg->ahgidx = 0; + priv->s_ahg->tx_flags = 0; /* pbc */ ps->s_txreq->hdr_dwords = qp->s_hdrwords + 2; @@ -665,13 +668,13 @@ void hfi1_ud_rcv(struct hfi1_packet *packet) struct hfi1_other_headers *ohdr = packet->ohdr; int opcode; u32 hdrsize = packet->hlen; - u32 pad; struct ib_wc wc; u32 qkey; u32 src_qp; u16 dlid, pkey; int mgmt_pkey_idx = -1; struct hfi1_ibport *ibp = &packet->rcd->ppd->ibport_data; + struct hfi1_pportdata *ppd = ppd_from_ibp(ibp); struct hfi1_ib_header *hdr = packet->hdr; u32 rcv_flags = packet->rcv_flags; void *data = packet->ebuf; @@ -680,52 +683,33 @@ void hfi1_ud_rcv(struct hfi1_packet *packet) bool has_grh = rcv_flags & HFI1_HAS_GRH; u8 sc5 = hdr2sc((struct hfi1_message_header *)hdr, packet->rhf); u32 bth1; - int is_mcast; - struct ib_grh *grh = NULL; + u8 sl_from_sc, sl; + u16 slid; + u8 extra_bytes; qkey = be32_to_cpu(ohdr->u.ud.deth[0]); src_qp = be32_to_cpu(ohdr->u.ud.deth[1]) & RVT_QPN_MASK; dlid = be16_to_cpu(hdr->lrh[1]); - is_mcast = (dlid > be16_to_cpu(IB_MULTICAST_LID_BASE)) && - (dlid != be16_to_cpu(IB_LID_PERMISSIVE)); bth1 = be32_to_cpu(ohdr->bth[1]); - if (unlikely(bth1 & HFI1_BECN_SMASK)) { - /* - * In pre-B0 h/w the CNP_OPCODE is handled via an - * error path. - */ - struct hfi1_pportdata *ppd = ppd_from_ibp(ibp); - u32 lqpn = be32_to_cpu(ohdr->bth[1]) & RVT_QPN_MASK; - u8 sl; - - sl = ibp->sc_to_sl[sc5]; - - process_becn(ppd, sl, 0, lqpn, 0, IB_CC_SVCTYPE_UD); - } + slid = be16_to_cpu(hdr->lrh[3]); + pkey = (u16)be32_to_cpu(ohdr->bth[0]); + sl = (be16_to_cpu(hdr->lrh[0]) >> 4) & 0xf; + extra_bytes = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3; + extra_bytes += (SIZE_OF_CRC << 2); + sl_from_sc = ibp->sc_to_sl[sc5]; - /* - * The opcode is in the low byte when its in network order - * (top byte when in host order). - */ opcode = be32_to_cpu(ohdr->bth[0]) >> 24; opcode &= 0xff; - pkey = (u16)be32_to_cpu(ohdr->bth[0]); - - if (!is_mcast && (opcode != IB_OPCODE_CNP) && bth1 & HFI1_FECN_SMASK) { - u16 slid = be16_to_cpu(hdr->lrh[3]); - - return_cnp(ibp, qp, src_qp, pkey, dlid, slid, sc5, grh); - } + process_ecn(qp, packet, (opcode != IB_OPCODE_CNP)); /* * Get the number of bytes the message was padded by * and drop incomplete packets. */ - pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3; - if (unlikely(tlen < (hdrsize + pad + 4))) + if (unlikely(tlen < (hdrsize + extra_bytes))) goto drop; - tlen -= hdrsize + pad + 4; + tlen -= hdrsize + extra_bytes; /* * Check that the permissive LID is only used on QP0 @@ -736,10 +720,6 @@ void hfi1_ud_rcv(struct hfi1_packet *packet) hdr->lrh[3] == IB_LID_PERMISSIVE)) goto drop; if (qp->ibqp.qp_num > 1) { - struct hfi1_pportdata *ppd = ppd_from_ibp(ibp); - u16 slid; - - slid = be16_to_cpu(hdr->lrh[3]); if (unlikely(rcv_pkey_check(ppd, pkey, sc5, slid))) { /* * Traps will not be sent for packets dropped @@ -748,12 +728,9 @@ void hfi1_ud_rcv(struct hfi1_packet *packet) * IB spec (release 1.3, section 10.9.4) */ hfi1_bad_pqkey(ibp, OPA_TRAP_BAD_P_KEY, - pkey, - (be16_to_cpu(hdr->lrh[0]) >> 4) & - 0xF, + pkey, sl, src_qp, qp->ibqp.qp_num, - be16_to_cpu(hdr->lrh[3]), - be16_to_cpu(hdr->lrh[1])); + slid, dlid); return; } } else { @@ -763,22 +740,18 @@ void hfi1_ud_rcv(struct hfi1_packet *packet) goto drop; } if (unlikely(qkey != qp->qkey)) { - hfi1_bad_pqkey(ibp, OPA_TRAP_BAD_Q_KEY, qkey, - (be16_to_cpu(hdr->lrh[0]) >> 4) & 0xF, + hfi1_bad_pqkey(ibp, OPA_TRAP_BAD_Q_KEY, qkey, sl, src_qp, qp->ibqp.qp_num, - be16_to_cpu(hdr->lrh[3]), - be16_to_cpu(hdr->lrh[1])); + slid, dlid); return; } /* Drop invalid MAD packets (see 13.5.3.1). */ if (unlikely(qp->ibqp.qp_num == 1 && - (tlen > 2048 || - (be16_to_cpu(hdr->lrh[0]) >> 12) == 15))) + (tlen > 2048 || (sc5 == 0xF)))) goto drop; } else { /* Received on QP0, and so by definition, this is an SMP */ struct opa_smp *smp = (struct opa_smp *)data; - u16 slid = be16_to_cpu(hdr->lrh[3]); if (opa_smp_check(ibp, pkey, sc5, qp, slid, smp)) goto drop; @@ -861,7 +834,6 @@ void hfi1_ud_rcv(struct hfi1_packet *packet) qp->ibqp.qp_type == IB_QPT_SMI) { if (mgmt_pkey_idx < 0) { if (net_ratelimit()) { - struct hfi1_pportdata *ppd = ppd_from_ibp(ibp); struct hfi1_devdata *dd = ppd->dd; dd_dev_err(dd, "QP type %d mgmt_pkey_idx < 0 and packet not dropped???\n", @@ -874,8 +846,8 @@ void hfi1_ud_rcv(struct hfi1_packet *packet) wc.pkey_index = 0; } - wc.slid = be16_to_cpu(hdr->lrh[3]); - wc.sl = ibp->sc_to_sl[sc5]; + wc.slid = slid; + wc.sl = sl_from_sc; /* * Save the LMC lower bits if the destination LID is a unicast LID. diff --git a/drivers/infiniband/hw/hfi1/user_exp_rcv.c b/drivers/infiniband/hw/hfi1/user_exp_rcv.c index 1b640a35b3fe..64d26525435a 100644 --- a/drivers/infiniband/hw/hfi1/user_exp_rcv.c +++ b/drivers/infiniband/hw/hfi1/user_exp_rcv.c @@ -82,24 +82,25 @@ struct tid_pageset { ((unsigned long)vaddr & PAGE_MASK)) >> PAGE_SHIFT)) static void unlock_exp_tids(struct hfi1_ctxtdata *, struct exp_tid_set *, - struct rb_root *); + struct hfi1_filedata *); static u32 find_phys_blocks(struct page **, unsigned, struct tid_pageset *); static int set_rcvarray_entry(struct file *, unsigned long, u32, struct tid_group *, struct page **, unsigned); -static int mmu_rb_insert(struct rb_root *, struct mmu_rb_node *); -static void mmu_rb_remove(struct rb_root *, struct mmu_rb_node *, - struct mm_struct *); -static int mmu_rb_invalidate(struct rb_root *, struct mmu_rb_node *); +static int tid_rb_insert(void *, struct mmu_rb_node *); +static void cacheless_tid_rb_remove(struct hfi1_filedata *fdata, + struct tid_rb_node *tnode); +static void tid_rb_remove(void *, struct mmu_rb_node *); +static int tid_rb_invalidate(void *, struct mmu_rb_node *); static int program_rcvarray(struct file *, unsigned long, struct tid_group *, struct tid_pageset *, unsigned, u16, struct page **, u32 *, unsigned *, unsigned *); static int unprogram_rcvarray(struct file *, u32, struct tid_group **); -static void clear_tid_node(struct hfi1_filedata *, u16, struct tid_rb_node *); +static void clear_tid_node(struct hfi1_filedata *fd, struct tid_rb_node *node); static struct mmu_rb_ops tid_rb_ops = { - .insert = mmu_rb_insert, - .remove = mmu_rb_remove, - .invalidate = mmu_rb_invalidate + .insert = tid_rb_insert, + .remove = tid_rb_remove, + .invalidate = tid_rb_invalidate }; static inline u32 rcventry2tidinfo(u32 rcventry) @@ -162,7 +163,6 @@ int hfi1_user_exp_rcv_init(struct file *fp) spin_lock_init(&fd->tid_lock); spin_lock_init(&fd->invalid_lock); - fd->tid_rb_root = RB_ROOT; if (!uctxt->subctxt_cnt || !fd->subctxt) { exp_tid_group_init(&uctxt->tid_group_list); @@ -197,7 +197,7 @@ int hfi1_user_exp_rcv_init(struct file *fp) if (!fd->entry_to_rb) return -ENOMEM; - if (!HFI1_CAP_IS_USET(TID_UNMAP)) { + if (!HFI1_CAP_UGET_MASK(uctxt->flags, TID_UNMAP)) { fd->invalid_tid_idx = 0; fd->invalid_tids = kzalloc(uctxt->expected_count * sizeof(u32), GFP_KERNEL); @@ -208,15 +208,15 @@ int hfi1_user_exp_rcv_init(struct file *fp) /* * Register MMU notifier callbacks. If the registration - * fails, continue but turn off the TID caching for - * all user contexts. + * fails, continue without TID caching for this context. */ - ret = hfi1_mmu_rb_register(&fd->tid_rb_root, &tid_rb_ops); + ret = hfi1_mmu_rb_register(fd, fd->mm, &tid_rb_ops, + dd->pport->hfi1_wq, + &fd->handler); if (ret) { dd_dev_info(dd, "Failed MMU notifier registration %d\n", ret); - HFI1_CAP_USET(TID_UNMAP); ret = 0; } } @@ -235,7 +235,7 @@ int hfi1_user_exp_rcv_init(struct file *fp) * init. */ spin_lock(&fd->tid_lock); - if (uctxt->subctxt_cnt && !HFI1_CAP_IS_USET(TID_UNMAP)) { + if (uctxt->subctxt_cnt && fd->handler) { u16 remainder; fd->tid_limit = uctxt->expected_count / uctxt->subctxt_cnt; @@ -261,18 +261,16 @@ int hfi1_user_exp_rcv_free(struct hfi1_filedata *fd) * The notifier would have been removed when the process'es mm * was freed. */ - if (!HFI1_CAP_IS_USET(TID_UNMAP)) - hfi1_mmu_rb_unregister(&fd->tid_rb_root); + if (fd->handler) + hfi1_mmu_rb_unregister(fd->handler); kfree(fd->invalid_tids); if (!uctxt->cnt) { if (!EXP_TID_SET_EMPTY(uctxt->tid_full_list)) - unlock_exp_tids(uctxt, &uctxt->tid_full_list, - &fd->tid_rb_root); + unlock_exp_tids(uctxt, &uctxt->tid_full_list, fd); if (!EXP_TID_SET_EMPTY(uctxt->tid_used_list)) - unlock_exp_tids(uctxt, &uctxt->tid_used_list, - &fd->tid_rb_root); + unlock_exp_tids(uctxt, &uctxt->tid_used_list, fd); list_for_each_entry_safe(grp, gptr, &uctxt->tid_group_list.list, list) { list_del_init(&grp->list); @@ -399,12 +397,12 @@ int hfi1_user_exp_rcv_setup(struct file *fp, struct hfi1_tid_info *tinfo) * pages, accept the amount pinned so far and program only that. * User space knows how to deal with partially programmed buffers. */ - if (!hfi1_can_pin_pages(dd, fd->tid_n_pinned, npages)) { + if (!hfi1_can_pin_pages(dd, fd->mm, fd->tid_n_pinned, npages)) { ret = -ENOMEM; goto bail; } - pinned = hfi1_acquire_user_pages(vaddr, npages, true, pages); + pinned = hfi1_acquire_user_pages(fd->mm, vaddr, npages, true, pages); if (pinned <= 0) { ret = pinned; goto bail; @@ -559,7 +557,7 @@ nomem: * for example), unpin all unmapped pages so we can pin them nex time. */ if (mapped_pages != pinned) { - hfi1_release_user_pages(current->mm, &pages[mapped_pages], + hfi1_release_user_pages(fd->mm, &pages[mapped_pages], pinned - mapped_pages, false); fd->tid_n_pinned -= pinned - mapped_pages; @@ -829,7 +827,6 @@ static int set_rcvarray_entry(struct file *fp, unsigned long vaddr, struct hfi1_ctxtdata *uctxt = fd->uctxt; struct tid_rb_node *node; struct hfi1_devdata *dd = uctxt->dd; - struct rb_root *root = &fd->tid_rb_root; dma_addr_t phys; /* @@ -861,10 +858,10 @@ static int set_rcvarray_entry(struct file *fp, unsigned long vaddr, node->freed = false; memcpy(node->pages, pages, sizeof(struct page *) * npages); - if (HFI1_CAP_IS_USET(TID_UNMAP)) - ret = mmu_rb_insert(root, &node->mmu); + if (!fd->handler) + ret = tid_rb_insert(fd, &node->mmu); else - ret = hfi1_mmu_rb_insert(root, &node->mmu); + ret = hfi1_mmu_rb_insert(fd->handler, &node->mmu); if (ret) { hfi1_cdbg(TID, "Failed to insert RB node %u 0x%lx, 0x%lx %d", @@ -904,19 +901,19 @@ static int unprogram_rcvarray(struct file *fp, u32 tidinfo, node = fd->entry_to_rb[rcventry]; if (!node || node->rcventry != (uctxt->expected_base + rcventry)) return -EBADF; - if (HFI1_CAP_IS_USET(TID_UNMAP)) - mmu_rb_remove(&fd->tid_rb_root, &node->mmu, NULL); - else - hfi1_mmu_rb_remove(&fd->tid_rb_root, &node->mmu); if (grp) *grp = node->grp; - clear_tid_node(fd, fd->subctxt, node); + + if (!fd->handler) + cacheless_tid_rb_remove(fd, node); + else + hfi1_mmu_rb_remove(fd->handler, &node->mmu); + return 0; } -static void clear_tid_node(struct hfi1_filedata *fd, u16 subctxt, - struct tid_rb_node *node) +static void clear_tid_node(struct hfi1_filedata *fd, struct tid_rb_node *node) { struct hfi1_ctxtdata *uctxt = fd->uctxt; struct hfi1_devdata *dd = uctxt->dd; @@ -934,7 +931,7 @@ static void clear_tid_node(struct hfi1_filedata *fd, u16 subctxt, pci_unmap_single(dd->pcidev, node->dma_addr, node->mmu.len, PCI_DMA_FROMDEVICE); - hfi1_release_user_pages(current->mm, node->pages, node->npages, true); + hfi1_release_user_pages(fd->mm, node->pages, node->npages, true); fd->tid_n_pinned -= node->npages; node->grp->used--; @@ -949,12 +946,15 @@ static void clear_tid_node(struct hfi1_filedata *fd, u16 subctxt, kfree(node); } +/* + * As a simple helper for hfi1_user_exp_rcv_free, this function deals with + * clearing nodes in the non-cached case. + */ static void unlock_exp_tids(struct hfi1_ctxtdata *uctxt, - struct exp_tid_set *set, struct rb_root *root) + struct exp_tid_set *set, + struct hfi1_filedata *fd) { struct tid_group *grp, *ptr; - struct hfi1_filedata *fd = container_of(root, struct hfi1_filedata, - tid_rb_root); int i; list_for_each_entry_safe(grp, ptr, &set->list, list) { @@ -969,22 +969,23 @@ static void unlock_exp_tids(struct hfi1_ctxtdata *uctxt, uctxt->expected_base]; if (!node || node->rcventry != rcventry) continue; - if (HFI1_CAP_IS_USET(TID_UNMAP)) - mmu_rb_remove(&fd->tid_rb_root, - &node->mmu, NULL); - else - hfi1_mmu_rb_remove(&fd->tid_rb_root, - &node->mmu); - clear_tid_node(fd, -1, node); + + cacheless_tid_rb_remove(fd, node); } } } } -static int mmu_rb_invalidate(struct rb_root *root, struct mmu_rb_node *mnode) +/* + * Always return 0 from this function. A non-zero return indicates that the + * remove operation will be called and that memory should be unpinned. + * However, the driver cannot unpin out from under PSM. Instead, retain the + * memory (by returning 0) and inform PSM that the memory is going away. PSM + * will call back later when it has removed the memory from its list. + */ +static int tid_rb_invalidate(void *arg, struct mmu_rb_node *mnode) { - struct hfi1_filedata *fdata = - container_of(root, struct hfi1_filedata, tid_rb_root); + struct hfi1_filedata *fdata = arg; struct hfi1_ctxtdata *uctxt = fdata->uctxt; struct tid_rb_node *node = container_of(mnode, struct tid_rb_node, mmu); @@ -1025,10 +1026,9 @@ static int mmu_rb_invalidate(struct rb_root *root, struct mmu_rb_node *mnode) return 0; } -static int mmu_rb_insert(struct rb_root *root, struct mmu_rb_node *node) +static int tid_rb_insert(void *arg, struct mmu_rb_node *node) { - struct hfi1_filedata *fdata = - container_of(root, struct hfi1_filedata, tid_rb_root); + struct hfi1_filedata *fdata = arg; struct tid_rb_node *tnode = container_of(node, struct tid_rb_node, mmu); u32 base = fdata->uctxt->expected_base; @@ -1037,14 +1037,20 @@ static int mmu_rb_insert(struct rb_root *root, struct mmu_rb_node *node) return 0; } -static void mmu_rb_remove(struct rb_root *root, struct mmu_rb_node *node, - struct mm_struct *mm) +static void cacheless_tid_rb_remove(struct hfi1_filedata *fdata, + struct tid_rb_node *tnode) { - struct hfi1_filedata *fdata = - container_of(root, struct hfi1_filedata, tid_rb_root); - struct tid_rb_node *tnode = - container_of(node, struct tid_rb_node, mmu); u32 base = fdata->uctxt->expected_base; fdata->entry_to_rb[tnode->rcventry - base] = NULL; + clear_tid_node(fdata, tnode); +} + +static void tid_rb_remove(void *arg, struct mmu_rb_node *node) +{ + struct hfi1_filedata *fdata = arg; + struct tid_rb_node *tnode = + container_of(node, struct tid_rb_node, mmu); + + cacheless_tid_rb_remove(fdata, tnode); } diff --git a/drivers/infiniband/hw/hfi1/user_pages.c b/drivers/infiniband/hw/hfi1/user_pages.c index 88e10b5f55f1..20f4ddcac3b0 100644 --- a/drivers/infiniband/hw/hfi1/user_pages.c +++ b/drivers/infiniband/hw/hfi1/user_pages.c @@ -68,7 +68,8 @@ MODULE_PARM_DESC(cache_size, "Send and receive side cache size limit (in MB)"); * could keeping caching buffers. * */ -bool hfi1_can_pin_pages(struct hfi1_devdata *dd, u32 nlocked, u32 npages) +bool hfi1_can_pin_pages(struct hfi1_devdata *dd, struct mm_struct *mm, + u32 nlocked, u32 npages) { unsigned long ulimit = rlimit(RLIMIT_MEMLOCK), pinned, cache_limit, size = (cache_size * (1UL << 20)); /* convert to bytes */ @@ -89,9 +90,9 @@ bool hfi1_can_pin_pages(struct hfi1_devdata *dd, u32 nlocked, u32 npages) /* Convert to number of pages */ size = DIV_ROUND_UP(size, PAGE_SIZE); - down_read(¤t->mm->mmap_sem); - pinned = current->mm->pinned_vm; - up_read(¤t->mm->mmap_sem); + down_read(&mm->mmap_sem); + pinned = mm->pinned_vm; + up_read(&mm->mmap_sem); /* First, check the absolute limit against all pinned pages. */ if (pinned + npages >= ulimit && !can_lock) @@ -100,8 +101,8 @@ bool hfi1_can_pin_pages(struct hfi1_devdata *dd, u32 nlocked, u32 npages) return ((nlocked + npages) <= size) || can_lock; } -int hfi1_acquire_user_pages(unsigned long vaddr, size_t npages, bool writable, - struct page **pages) +int hfi1_acquire_user_pages(struct mm_struct *mm, unsigned long vaddr, size_t npages, + bool writable, struct page **pages) { int ret; @@ -109,9 +110,9 @@ int hfi1_acquire_user_pages(unsigned long vaddr, size_t npages, bool writable, if (ret < 0) return ret; - down_write(¤t->mm->mmap_sem); - current->mm->pinned_vm += ret; - up_write(¤t->mm->mmap_sem); + down_write(&mm->mmap_sem); + mm->pinned_vm += ret; + up_write(&mm->mmap_sem); return ret; } diff --git a/drivers/infiniband/hw/hfi1/user_sdma.c b/drivers/infiniband/hw/hfi1/user_sdma.c index 47ffd273ecbd..0ecf27903dc2 100644 --- a/drivers/infiniband/hw/hfi1/user_sdma.c +++ b/drivers/infiniband/hw/hfi1/user_sdma.c @@ -145,7 +145,7 @@ MODULE_PARM_DESC(sdma_comp_size, "Size of User SDMA completion ring. Default: 12 /* Last packet in the request */ #define TXREQ_FLAGS_REQ_LAST_PKT BIT(0) -#define SDMA_REQ_IN_USE 0 +/* SDMA request flag bits */ #define SDMA_REQ_FOR_THREAD 1 #define SDMA_REQ_SEND_DONE 2 #define SDMA_REQ_HAVE_AHG 3 @@ -183,16 +183,18 @@ struct user_sdma_iovec { struct sdma_mmu_node *node; }; -#define SDMA_CACHE_NODE_EVICT 0 - struct sdma_mmu_node { struct mmu_rb_node rb; - struct list_head list; struct hfi1_user_sdma_pkt_q *pq; atomic_t refcount; struct page **pages; unsigned npages; - unsigned long flags; +}; + +/* evict operation argument */ +struct evict_data { + u32 cleared; /* count evicted so far */ + u32 target; /* target count to evict */ }; struct user_sdma_request { @@ -305,14 +307,16 @@ static int defer_packet_queue( unsigned seq); static void activate_packet_queue(struct iowait *, int); static bool sdma_rb_filter(struct mmu_rb_node *, unsigned long, unsigned long); -static int sdma_rb_insert(struct rb_root *, struct mmu_rb_node *); -static void sdma_rb_remove(struct rb_root *, struct mmu_rb_node *, - struct mm_struct *); -static int sdma_rb_invalidate(struct rb_root *, struct mmu_rb_node *); +static int sdma_rb_insert(void *, struct mmu_rb_node *); +static int sdma_rb_evict(void *arg, struct mmu_rb_node *mnode, + void *arg2, bool *stop); +static void sdma_rb_remove(void *, struct mmu_rb_node *); +static int sdma_rb_invalidate(void *, struct mmu_rb_node *); static struct mmu_rb_ops sdma_rb_ops = { .filter = sdma_rb_filter, .insert = sdma_rb_insert, + .evict = sdma_rb_evict, .remove = sdma_rb_remove, .invalidate = sdma_rb_invalidate }; @@ -397,6 +401,11 @@ int hfi1_user_sdma_alloc_queues(struct hfi1_ctxtdata *uctxt, struct file *fp) if (!pq->reqs) goto pq_reqs_nomem; + memsize = BITS_TO_LONGS(hfi1_sdma_comp_ring_size) * sizeof(long); + pq->req_in_use = kzalloc(memsize, GFP_KERNEL); + if (!pq->req_in_use) + goto pq_reqs_no_in_use; + INIT_LIST_HEAD(&pq->list); pq->dd = dd; pq->ctxt = uctxt->ctxt; @@ -405,9 +414,8 @@ int hfi1_user_sdma_alloc_queues(struct hfi1_ctxtdata *uctxt, struct file *fp) pq->state = SDMA_PKT_Q_INACTIVE; atomic_set(&pq->n_reqs, 0); init_waitqueue_head(&pq->wait); - pq->sdma_rb_root = RB_ROOT; - INIT_LIST_HEAD(&pq->evict); - spin_lock_init(&pq->evict_lock); + atomic_set(&pq->n_locked, 0); + pq->mm = fd->mm; iowait_init(&pq->busy, 0, NULL, defer_packet_queue, activate_packet_queue, NULL); @@ -437,7 +445,8 @@ int hfi1_user_sdma_alloc_queues(struct hfi1_ctxtdata *uctxt, struct file *fp) cq->nentries = hfi1_sdma_comp_ring_size; fd->cq = cq; - ret = hfi1_mmu_rb_register(&pq->sdma_rb_root, &sdma_rb_ops); + ret = hfi1_mmu_rb_register(pq, pq->mm, &sdma_rb_ops, dd->pport->hfi1_wq, + &pq->handler); if (ret) { dd_dev_err(dd, "Failed to register with MMU %d", ret); goto done; @@ -453,6 +462,8 @@ cq_comps_nomem: cq_nomem: kmem_cache_destroy(pq->txreq_cache); pq_txreq_nomem: + kfree(pq->req_in_use); +pq_reqs_no_in_use: kfree(pq->reqs); pq_reqs_nomem: kfree(pq); @@ -472,8 +483,9 @@ int hfi1_user_sdma_free_queues(struct hfi1_filedata *fd) hfi1_cdbg(SDMA, "[%u:%u:%u] Freeing user SDMA queues", uctxt->dd->unit, uctxt->ctxt, fd->subctxt); pq = fd->pq; - hfi1_mmu_rb_unregister(&pq->sdma_rb_root); if (pq) { + if (pq->handler) + hfi1_mmu_rb_unregister(pq->handler); spin_lock_irqsave(&uctxt->sdma_qlock, flags); if (!list_empty(&pq->list)) list_del_init(&pq->list); @@ -484,6 +496,7 @@ int hfi1_user_sdma_free_queues(struct hfi1_filedata *fd) pq->wait, (ACCESS_ONCE(pq->state) == SDMA_PKT_Q_INACTIVE)); kfree(pq->reqs); + kfree(pq->req_in_use); kmem_cache_destroy(pq->txreq_cache); kfree(pq); fd->pq = NULL; @@ -496,10 +509,31 @@ int hfi1_user_sdma_free_queues(struct hfi1_filedata *fd) return 0; } +static u8 dlid_to_selector(u16 dlid) +{ + static u8 mapping[256]; + static int initialized; + static u8 next; + int hash; + + if (!initialized) { + memset(mapping, 0xFF, 256); + initialized = 1; + } + + hash = ((dlid >> 8) ^ dlid) & 0xFF; + if (mapping[hash] == 0xFF) { + mapping[hash] = next; + next = (next + 1) & 0x7F; + } + + return mapping[hash]; +} + int hfi1_user_sdma_process_request(struct file *fp, struct iovec *iovec, unsigned long dim, unsigned long *count) { - int ret = 0, i = 0; + int ret = 0, i; struct hfi1_filedata *fd = fp->private_data; struct hfi1_ctxtdata *uctxt = fd->uctxt; struct hfi1_user_sdma_pkt_q *pq = fd->pq; @@ -511,6 +545,8 @@ int hfi1_user_sdma_process_request(struct file *fp, struct iovec *iovec, struct user_sdma_request *req; u8 opcode, sc, vl; int req_queued = 0; + u16 dlid; + u8 selector; if (iovec[idx].iov_len < sizeof(info) + sizeof(req->hdr)) { hfi1_cdbg( @@ -529,30 +565,48 @@ int hfi1_user_sdma_process_request(struct file *fp, struct iovec *iovec, trace_hfi1_sdma_user_reqinfo(dd, uctxt->ctxt, fd->subctxt, (u16 *)&info); - if (cq->comps[info.comp_idx].status == QUEUED || - test_bit(SDMA_REQ_IN_USE, &pq->reqs[info.comp_idx].flags)) { - hfi1_cdbg(SDMA, "[%u:%u:%u] Entry %u is in QUEUED state", - dd->unit, uctxt->ctxt, fd->subctxt, - info.comp_idx); - return -EBADSLT; + + if (info.comp_idx >= hfi1_sdma_comp_ring_size) { + hfi1_cdbg(SDMA, + "[%u:%u:%u:%u] Invalid comp index", + dd->unit, uctxt->ctxt, fd->subctxt, info.comp_idx); + return -EINVAL; } + + /* + * Sanity check the header io vector count. Need at least 1 vector + * (header) and cannot be larger than the actual io vector count. + */ + if (req_iovcnt(info.ctrl) < 1 || req_iovcnt(info.ctrl) > dim) { + hfi1_cdbg(SDMA, + "[%u:%u:%u:%u] Invalid iov count %d, dim %ld", + dd->unit, uctxt->ctxt, fd->subctxt, info.comp_idx, + req_iovcnt(info.ctrl), dim); + return -EINVAL; + } + if (!info.fragsize) { hfi1_cdbg(SDMA, "[%u:%u:%u:%u] Request does not specify fragsize", dd->unit, uctxt->ctxt, fd->subctxt, info.comp_idx); return -EINVAL; } + + /* Try to claim the request. */ + if (test_and_set_bit(info.comp_idx, pq->req_in_use)) { + hfi1_cdbg(SDMA, "[%u:%u:%u] Entry %u is in use", + dd->unit, uctxt->ctxt, fd->subctxt, + info.comp_idx); + return -EBADSLT; + } /* - * We've done all the safety checks that we can up to this point, - * "allocate" the request entry. + * All safety checks have been done and this request has been claimed. */ hfi1_cdbg(SDMA, "[%u:%u:%u] Using req/comp entry %u\n", dd->unit, uctxt->ctxt, fd->subctxt, info.comp_idx); req = pq->reqs + info.comp_idx; memset(req, 0, sizeof(*req)); - /* Mark the request as IN_USE before we start filling it in. */ - set_bit(SDMA_REQ_IN_USE, &req->flags); - req->data_iovs = req_iovcnt(info.ctrl) - 1; + req->data_iovs = req_iovcnt(info.ctrl) - 1; /* subtract header vector */ req->pq = pq; req->cq = cq; req->status = -1; @@ -560,13 +614,22 @@ int hfi1_user_sdma_process_request(struct file *fp, struct iovec *iovec, memcpy(&req->info, &info, sizeof(info)); - if (req_opcode(info.ctrl) == EXPECTED) + if (req_opcode(info.ctrl) == EXPECTED) { + /* expected must have a TID info and at least one data vector */ + if (req->data_iovs < 2) { + SDMA_DBG(req, + "Not enough vectors for expected request"); + ret = -EINVAL; + goto free_req; + } req->data_iovs--; + } if (!info.npkts || req->data_iovs > MAX_VECTORS_PER_REQ) { SDMA_DBG(req, "Too many vectors (%u/%u)", req->data_iovs, MAX_VECTORS_PER_REQ); - return -EINVAL; + ret = -EINVAL; + goto free_req; } /* Copy the header from the user buffer */ ret = copy_from_user(&req->hdr, iovec[idx].iov_base + sizeof(info), @@ -634,7 +697,7 @@ int hfi1_user_sdma_process_request(struct file *fp, struct iovec *iovec, idx++; /* Save all the IO vector structures */ - while (i < req->data_iovs) { + for (i = 0; i < req->data_iovs; i++) { INIT_LIST_HEAD(&req->iovs[i].list); memcpy(&req->iovs[i].iov, iovec + idx++, sizeof(struct iovec)); ret = pin_vector_pages(req, &req->iovs[i]); @@ -642,7 +705,7 @@ int hfi1_user_sdma_process_request(struct file *fp, struct iovec *iovec, req->status = ret; goto free_req; } - req->data_len += req->iovs[i++].iov.iov_len; + req->data_len += req->iovs[i].iov.iov_len; } SDMA_DBG(req, "total data length %u", req->data_len); @@ -686,9 +749,13 @@ int hfi1_user_sdma_process_request(struct file *fp, struct iovec *iovec, idx++; } + dlid = be16_to_cpu(req->hdr.lrh[1]); + selector = dlid_to_selector(dlid); + /* Have to select the engine */ req->sde = sdma_select_engine_vl(dd, - (u32)(uctxt->ctxt + fd->subctxt), + (u32)(uctxt->ctxt + fd->subctxt + + selector), vl); if (!req->sde || !sdma_running(req->sde)) { ret = -ECOMM; @@ -766,14 +833,21 @@ static inline u32 compute_data_length(struct user_sdma_request *req, * The size of the data of the first packet is in the header * template. However, it includes the header and ICRC, which need * to be subtracted. + * The minimum representable packet data length in a header is 4 bytes, + * therefore, when the data length request is less than 4 bytes, there's + * only one packet, and the packet data length is equal to that of the + * request data length. * The size of the remaining packets is the minimum of the frag * size (MTU) or remaining data in the request. */ u32 len; if (!req->seqnum) { - len = ((be16_to_cpu(req->hdr.lrh[2]) << 2) - - (sizeof(tx->hdr) - 4)); + if (req->data_len < sizeof(u32)) + len = req->data_len; + else + len = ((be16_to_cpu(req->hdr.lrh[2]) << 2) - + (sizeof(tx->hdr) - 4)); } else if (req_opcode(req->info.ctrl) == EXPECTED) { u32 tidlen = EXP_TID_GET(req->tids[req->tididx], LEN) * PAGE_SIZE; @@ -803,6 +877,13 @@ static inline u32 compute_data_length(struct user_sdma_request *req, return len; } +static inline u32 pad_len(u32 len) +{ + if (len & (sizeof(u32) - 1)) + len += sizeof(u32) - (len & (sizeof(u32) - 1)); + return len; +} + static inline u32 get_lrh_len(struct hfi1_pkt_header hdr, u32 len) { /* (Size of complete header - size of PBC) + 4B ICRC + data length */ @@ -894,7 +975,8 @@ static int user_sdma_send_pkts(struct user_sdma_request *req, unsigned maxpkts) if (test_bit(SDMA_REQ_HAVE_AHG, &req->flags)) { if (!req->seqnum) { u16 pbclen = le16_to_cpu(req->hdr.pbc[0]); - u32 lrhlen = get_lrh_len(req->hdr, datalen); + u32 lrhlen = get_lrh_len(req->hdr, + pad_len(datalen)); /* * Copy the request header into the tx header * because the HW needs a cacheline-aligned @@ -1048,39 +1130,24 @@ static inline int num_user_pages(const struct iovec *iov) static u32 sdma_cache_evict(struct hfi1_user_sdma_pkt_q *pq, u32 npages) { - u32 cleared = 0; - struct sdma_mmu_node *node, *ptr; - struct list_head to_evict = LIST_HEAD_INIT(to_evict); - - spin_lock(&pq->evict_lock); - list_for_each_entry_safe_reverse(node, ptr, &pq->evict, list) { - /* Make sure that no one is still using the node. */ - if (!atomic_read(&node->refcount)) { - set_bit(SDMA_CACHE_NODE_EVICT, &node->flags); - list_del_init(&node->list); - list_add(&node->list, &to_evict); - cleared += node->npages; - if (cleared >= npages) - break; - } - } - spin_unlock(&pq->evict_lock); - - list_for_each_entry_safe(node, ptr, &to_evict, list) - hfi1_mmu_rb_remove(&pq->sdma_rb_root, &node->rb); + struct evict_data evict_data; - return cleared; + evict_data.cleared = 0; + evict_data.target = npages; + hfi1_mmu_rb_evict(pq->handler, &evict_data); + return evict_data.cleared; } static int pin_vector_pages(struct user_sdma_request *req, - struct user_sdma_iovec *iovec) { + struct user_sdma_iovec *iovec) +{ int ret = 0, pinned, npages, cleared; struct page **pages; struct hfi1_user_sdma_pkt_q *pq = req->pq; struct sdma_mmu_node *node = NULL; struct mmu_rb_node *rb_node; - rb_node = hfi1_mmu_rb_extract(&pq->sdma_rb_root, + rb_node = hfi1_mmu_rb_extract(pq->handler, (unsigned long)iovec->iov.iov_base, iovec->iov.iov_len); if (rb_node && !IS_ERR(rb_node)) @@ -1096,7 +1163,6 @@ static int pin_vector_pages(struct user_sdma_request *req, node->rb.addr = (unsigned long)iovec->iov.iov_base; node->pq = pq; atomic_set(&node->refcount, 0); - INIT_LIST_HEAD(&node->list); } npages = num_user_pages(&iovec->iov); @@ -1111,28 +1177,14 @@ static int pin_vector_pages(struct user_sdma_request *req, npages -= node->npages; - /* - * If rb_node is NULL, it means that this is brand new node - * and, therefore not on the eviction list. - * If, however, the rb_node is non-NULL, it means that the - * node is already in RB tree and, therefore on the eviction - * list (nodes are unconditionally inserted in the eviction - * list). In that case, we have to remove the node prior to - * calling the eviction function in order to prevent it from - * freeing this node. - */ - if (rb_node) { - spin_lock(&pq->evict_lock); - list_del_init(&node->list); - spin_unlock(&pq->evict_lock); - } retry: - if (!hfi1_can_pin_pages(pq->dd, pq->n_locked, npages)) { + if (!hfi1_can_pin_pages(pq->dd, pq->mm, + atomic_read(&pq->n_locked), npages)) { cleared = sdma_cache_evict(pq, npages); if (cleared >= npages) goto retry; } - pinned = hfi1_acquire_user_pages( + pinned = hfi1_acquire_user_pages(pq->mm, ((unsigned long)iovec->iov.iov_base + (node->npages * PAGE_SIZE)), npages, 0, pages + node->npages); @@ -1142,7 +1194,7 @@ retry: goto bail; } if (pinned != npages) { - unpin_vector_pages(current->mm, pages, node->npages, + unpin_vector_pages(pq->mm, pages, node->npages, pinned); ret = -EFAULT; goto bail; @@ -1152,28 +1204,22 @@ retry: node->pages = pages; node->npages += pinned; npages = node->npages; - spin_lock(&pq->evict_lock); - list_add(&node->list, &pq->evict); - pq->n_locked += pinned; - spin_unlock(&pq->evict_lock); + atomic_add(pinned, &pq->n_locked); } iovec->pages = node->pages; iovec->npages = npages; iovec->node = node; - ret = hfi1_mmu_rb_insert(&req->pq->sdma_rb_root, &node->rb); + ret = hfi1_mmu_rb_insert(req->pq->handler, &node->rb); if (ret) { - spin_lock(&pq->evict_lock); - if (!list_empty(&node->list)) - list_del(&node->list); - pq->n_locked -= node->npages; - spin_unlock(&pq->evict_lock); + atomic_sub(node->npages, &pq->n_locked); + iovec->node = NULL; goto bail; } return 0; bail: if (rb_node) - unpin_vector_pages(current->mm, node->pages, 0, node->npages); + unpin_vector_pages(pq->mm, node->pages, 0, node->npages); kfree(node); return ret; } @@ -1181,7 +1227,7 @@ bail: static void unpin_vector_pages(struct mm_struct *mm, struct page **pages, unsigned start, unsigned npages) { - hfi1_release_user_pages(mm, pages + start, npages, 0); + hfi1_release_user_pages(mm, pages + start, npages, false); kfree(pages); } @@ -1192,16 +1238,14 @@ static int check_header_template(struct user_sdma_request *req, /* * Perform safety checks for any type of packet: * - transfer size is multiple of 64bytes - * - packet length is multiple of 4bytes - * - entire request length is multiple of 4bytes + * - packet length is multiple of 4 bytes * - packet length is not larger than MTU size * * These checks are only done for the first packet of the * transfer since the header is "given" to us by user space. * For the remainder of the packets we compute the values. */ - if (req->info.fragsize % PIO_BLOCK_SIZE || - lrhlen & 0x3 || req->data_len & 0x3 || + if (req->info.fragsize % PIO_BLOCK_SIZE || lrhlen & 0x3 || lrhlen > get_lrh_len(*hdr, req->info.fragsize)) return -EINVAL; @@ -1263,7 +1307,7 @@ static int set_txreq_header(struct user_sdma_request *req, struct hfi1_pkt_header *hdr = &tx->hdr; u16 pbclen; int ret; - u32 tidval = 0, lrhlen = get_lrh_len(*hdr, datalen); + u32 tidval = 0, lrhlen = get_lrh_len(*hdr, pad_len(datalen)); /* Copy the header template to the request before modification */ memcpy(hdr, &req->hdr, sizeof(*hdr)); @@ -1374,7 +1418,7 @@ static int set_txreq_header_ahg(struct user_sdma_request *req, struct hfi1_user_sdma_pkt_q *pq = req->pq; struct hfi1_pkt_header *hdr = &req->hdr; u16 pbclen = le16_to_cpu(hdr->pbc[0]); - u32 val32, tidval = 0, lrhlen = get_lrh_len(*hdr, len); + u32 val32, tidval = 0, lrhlen = get_lrh_len(*hdr, pad_len(len)); if (PBC2LRH(pbclen) != lrhlen) { /* PBC.PbcLengthDWs */ @@ -1534,14 +1578,14 @@ static void user_sdma_free_request(struct user_sdma_request *req, bool unpin) continue; if (unpin) - hfi1_mmu_rb_remove(&req->pq->sdma_rb_root, + hfi1_mmu_rb_remove(req->pq->handler, &node->rb); else atomic_dec(&node->refcount); } } kfree(req->tids); - clear_bit(SDMA_REQ_IN_USE, &req->flags); + clear_bit(req->info.comp_idx, req->pq->req_in_use); } static inline void set_comp_state(struct hfi1_user_sdma_pkt_q *pq, @@ -1564,7 +1608,7 @@ static bool sdma_rb_filter(struct mmu_rb_node *node, unsigned long addr, return (bool)(node->addr == addr); } -static int sdma_rb_insert(struct rb_root *root, struct mmu_rb_node *mnode) +static int sdma_rb_insert(void *arg, struct mmu_rb_node *mnode) { struct sdma_mmu_node *node = container_of(mnode, struct sdma_mmu_node, rb); @@ -1573,48 +1617,45 @@ static int sdma_rb_insert(struct rb_root *root, struct mmu_rb_node *mnode) return 0; } -static void sdma_rb_remove(struct rb_root *root, struct mmu_rb_node *mnode, - struct mm_struct *mm) +/* + * Return 1 to remove the node from the rb tree and call the remove op. + * + * Called with the rb tree lock held. + */ +static int sdma_rb_evict(void *arg, struct mmu_rb_node *mnode, + void *evict_arg, bool *stop) +{ + struct sdma_mmu_node *node = + container_of(mnode, struct sdma_mmu_node, rb); + struct evict_data *evict_data = evict_arg; + + /* is this node still being used? */ + if (atomic_read(&node->refcount)) + return 0; /* keep this node */ + + /* this node will be evicted, add its pages to our count */ + evict_data->cleared += node->npages; + + /* have enough pages been cleared? */ + if (evict_data->cleared >= evict_data->target) + *stop = true; + + return 1; /* remove this node */ +} + +static void sdma_rb_remove(void *arg, struct mmu_rb_node *mnode) { struct sdma_mmu_node *node = container_of(mnode, struct sdma_mmu_node, rb); - spin_lock(&node->pq->evict_lock); - /* - * We've been called by the MMU notifier but this node has been - * scheduled for eviction. The eviction function will take care - * of freeing this node. - * We have to take the above lock first because we are racing - * against the setting of the bit in the eviction function. - */ - if (mm && test_bit(SDMA_CACHE_NODE_EVICT, &node->flags)) { - spin_unlock(&node->pq->evict_lock); - return; - } + atomic_sub(node->npages, &node->pq->n_locked); - if (!list_empty(&node->list)) - list_del(&node->list); - node->pq->n_locked -= node->npages; - spin_unlock(&node->pq->evict_lock); + unpin_vector_pages(node->pq->mm, node->pages, 0, node->npages); - /* - * If mm is set, we are being called by the MMU notifier and we - * should not pass a mm_struct to unpin_vector_page(). This is to - * prevent a deadlock when hfi1_release_user_pages() attempts to - * take the mmap_sem, which the MMU notifier has already taken. - */ - unpin_vector_pages(mm ? NULL : current->mm, node->pages, 0, - node->npages); - /* - * If called by the MMU notifier, we have to adjust the pinned - * page count ourselves. - */ - if (mm) - mm->pinned_vm -= node->npages; kfree(node); } -static int sdma_rb_invalidate(struct rb_root *root, struct mmu_rb_node *mnode) +static int sdma_rb_invalidate(void *arg, struct mmu_rb_node *mnode) { struct sdma_mmu_node *node = container_of(mnode, struct sdma_mmu_node, rb); diff --git a/drivers/infiniband/hw/hfi1/user_sdma.h b/drivers/infiniband/hw/hfi1/user_sdma.h index b9240e351161..39001714f551 100644 --- a/drivers/infiniband/hw/hfi1/user_sdma.h +++ b/drivers/infiniband/hw/hfi1/user_sdma.h @@ -63,14 +63,14 @@ struct hfi1_user_sdma_pkt_q { struct hfi1_devdata *dd; struct kmem_cache *txreq_cache; struct user_sdma_request *reqs; + unsigned long *req_in_use; struct iowait busy; unsigned state; wait_queue_head_t wait; unsigned long unpinned; - struct rb_root sdma_rb_root; - u32 n_locked; - struct list_head evict; - spinlock_t evict_lock; /* protect evict and n_locked */ + struct mmu_rb_handler *handler; + atomic_t n_locked; + struct mm_struct *mm; }; struct hfi1_user_sdma_comp_q { diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c index 849c4b9399d4..2b359540901d 100644 --- a/drivers/infiniband/hw/hfi1/verbs.c +++ b/drivers/infiniband/hw/hfi1/verbs.c @@ -306,7 +306,10 @@ const enum ib_wc_opcode ib_hfi1_wc_opcode[] = { [IB_WR_SEND_WITH_IMM] = IB_WC_SEND, [IB_WR_RDMA_READ] = IB_WC_RDMA_READ, [IB_WR_ATOMIC_CMP_AND_SWP] = IB_WC_COMP_SWAP, - [IB_WR_ATOMIC_FETCH_AND_ADD] = IB_WC_FETCH_ADD + [IB_WR_ATOMIC_FETCH_AND_ADD] = IB_WC_FETCH_ADD, + [IB_WR_SEND_WITH_INV] = IB_WC_SEND, + [IB_WR_LOCAL_INV] = IB_WC_LOCAL_INV, + [IB_WR_REG_MR] = IB_WC_REG_MR }; /* @@ -378,6 +381,8 @@ static const opcode_handler opcode_handler_tbl[256] = { [IB_OPCODE_RC_ATOMIC_ACKNOWLEDGE] = &hfi1_rc_rcv, [IB_OPCODE_RC_COMPARE_SWAP] = &hfi1_rc_rcv, [IB_OPCODE_RC_FETCH_ADD] = &hfi1_rc_rcv, + [IB_OPCODE_RC_SEND_LAST_WITH_INVALIDATE] = &hfi1_rc_rcv, + [IB_OPCODE_RC_SEND_ONLY_WITH_INVALIDATE] = &hfi1_rc_rcv, /* UC */ [IB_OPCODE_UC_SEND_FIRST] = &hfi1_uc_rcv, [IB_OPCODE_UC_SEND_MIDDLE] = &hfi1_uc_rcv, @@ -540,19 +545,15 @@ void hfi1_skip_sge(struct rvt_sge_state *ss, u32 length, int release) /* * Make sure the QP is ready and able to accept the given opcode. */ -static inline int qp_ok(int opcode, struct hfi1_packet *packet) +static inline opcode_handler qp_ok(int opcode, struct hfi1_packet *packet) { - struct hfi1_ibport *ibp; - if (!(ib_rvt_state_ops[packet->qp->state] & RVT_PROCESS_RECV_OK)) - goto dropit; + return NULL; if (((opcode & RVT_OPCODE_QP_MASK) == packet->qp->allowed_ops) || (opcode == IB_OPCODE_CNP)) - return 1; -dropit: - ibp = &packet->rcd->ppd->ibport_data; - ibp->rvp.n_pkt_drops++; - return 0; + return opcode_handler_tbl[opcode]; + + return NULL; } /** @@ -571,6 +572,7 @@ void hfi1_ib_rcv(struct hfi1_packet *packet) struct hfi1_pportdata *ppd = rcd->ppd; struct hfi1_ibport *ibp = &ppd->ibport_data; struct rvt_dev_info *rdi = &ppd->dd->verbs_dev.rdi; + opcode_handler packet_handler; unsigned long flags; u32 qp_num; int lnh; @@ -616,8 +618,11 @@ void hfi1_ib_rcv(struct hfi1_packet *packet) list_for_each_entry_rcu(p, &mcast->qp_list, list) { packet->qp = p->qp; spin_lock_irqsave(&packet->qp->r_lock, flags); - if (likely((qp_ok(opcode, packet)))) - opcode_handler_tbl[opcode](packet); + packet_handler = qp_ok(opcode, packet); + if (likely(packet_handler)) + packet_handler(packet); + else + ibp->rvp.n_pkt_drops++; spin_unlock_irqrestore(&packet->qp->r_lock, flags); } /* @@ -634,8 +639,11 @@ void hfi1_ib_rcv(struct hfi1_packet *packet) goto drop; } spin_lock_irqsave(&packet->qp->r_lock, flags); - if (likely((qp_ok(opcode, packet)))) - opcode_handler_tbl[opcode](packet); + packet_handler = qp_ok(opcode, packet); + if (likely(packet_handler)) + packet_handler(packet); + else + ibp->rvp.n_pkt_drops++; spin_unlock_irqrestore(&packet->qp->r_lock, flags); rcu_read_unlock(); } @@ -808,19 +816,19 @@ static int build_verbs_tx_desc( struct rvt_sge_state *ss, u32 length, struct verbs_txreq *tx, - struct ahg_ib_header *ahdr, + struct hfi1_ahg_info *ahg_info, u64 pbc) { int ret = 0; - struct hfi1_pio_header *phdr = &tx->phdr; + struct hfi1_sdma_header *phdr = &tx->phdr; u16 hdrbytes = tx->hdr_dwords << 2; - if (!ahdr->ahgcount) { + if (!ahg_info->ahgcount) { ret = sdma_txinit_ahg( &tx->txreq, - ahdr->tx_flags, + ahg_info->tx_flags, hdrbytes + length, - ahdr->ahgidx, + ahg_info->ahgidx, 0, NULL, 0, @@ -838,11 +846,11 @@ static int build_verbs_tx_desc( } else { ret = sdma_txinit_ahg( &tx->txreq, - ahdr->tx_flags, + ahg_info->tx_flags, length, - ahdr->ahgidx, - ahdr->ahgcount, - ahdr->ahgdesc, + ahg_info->ahgidx, + ahg_info->ahgcount, + ahg_info->ahgdesc, hdrbytes, verbs_sdma_complete); if (ret) @@ -860,7 +868,7 @@ int hfi1_verbs_send_dma(struct rvt_qp *qp, struct hfi1_pkt_state *ps, u64 pbc) { struct hfi1_qp_priv *priv = qp->priv; - struct ahg_ib_header *ahdr = priv->s_hdr; + struct hfi1_ahg_info *ahg_info = priv->s_ahg; u32 hdrwords = qp->s_hdrwords; struct rvt_sge_state *ss = qp->s_cur_sge; u32 len = qp->s_cur_size; @@ -888,7 +896,7 @@ int hfi1_verbs_send_dma(struct rvt_qp *qp, struct hfi1_pkt_state *ps, plen); } tx->wqe = qp->s_wqe; - ret = build_verbs_tx_desc(tx->sde, ss, len, tx, ahdr, pbc); + ret = build_verbs_tx_desc(tx->sde, ss, len, tx, ahg_info, pbc); if (unlikely(ret)) goto bail_build; } @@ -1291,19 +1299,24 @@ int hfi1_verbs_send(struct rvt_qp *qp, struct hfi1_pkt_state *ps) static void hfi1_fill_device_attr(struct hfi1_devdata *dd) { struct rvt_dev_info *rdi = &dd->verbs_dev.rdi; + u16 ver = dd->dc8051_ver; memset(&rdi->dparms.props, 0, sizeof(rdi->dparms.props)); + rdi->dparms.props.fw_ver = ((u64)(dc8051_ver_maj(ver)) << 16) | + (u64)dc8051_ver_min(ver); rdi->dparms.props.device_cap_flags = IB_DEVICE_BAD_PKEY_CNTR | IB_DEVICE_BAD_QKEY_CNTR | IB_DEVICE_SHUTDOWN_PORT | IB_DEVICE_SYS_IMAGE_GUID | IB_DEVICE_RC_RNR_NAK_GEN | - IB_DEVICE_PORT_ACTIVE_EVENT | IB_DEVICE_SRQ_RESIZE; + IB_DEVICE_PORT_ACTIVE_EVENT | IB_DEVICE_SRQ_RESIZE | + IB_DEVICE_MEM_MGT_EXTENSIONS; rdi->dparms.props.page_size_cap = PAGE_SIZE; rdi->dparms.props.vendor_id = dd->oui1 << 16 | dd->oui2 << 8 | dd->oui3; rdi->dparms.props.vendor_part_id = dd->pcidev->device; rdi->dparms.props.hw_ver = dd->minrev; rdi->dparms.props.sys_image_guid = ib_hfi1_sys_image_guid; - rdi->dparms.props.max_mr_size = ~0ULL; + rdi->dparms.props.max_mr_size = U64_MAX; + rdi->dparms.props.max_fast_reg_page_list_len = UINT_MAX; rdi->dparms.props.max_qp = hfi1_max_qps; rdi->dparms.props.max_qp_wr = hfi1_max_qp_wrs; rdi->dparms.props.max_sge = hfi1_max_sges; @@ -1567,6 +1580,17 @@ static void init_ibport(struct hfi1_pportdata *ppd) RCU_INIT_POINTER(ibp->rvp.qp[1], NULL); } +static void hfi1_get_dev_fw_str(struct ib_device *ibdev, char *str, + size_t str_len) +{ + struct rvt_dev_info *rdi = ib_to_rvt(ibdev); + struct hfi1_ibdev *dev = dev_from_rdi(rdi); + u16 ver = dd_from_dev(dev)->dc8051_ver; + + snprintf(str, str_len, "%u.%u", dc8051_ver_maj(ver), + dc8051_ver_min(ver)); +} + /** * hfi1_register_ib_device - register our device with the infiniband core * @dd: the device data structure @@ -1613,6 +1637,7 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd) /* keep process mad in the driver */ ibdev->process_mad = hfi1_process_mad; + ibdev->get_dev_fw_str = hfi1_get_dev_fw_str; strncpy(ibdev->node_desc, init_utsname()->nodename, sizeof(ibdev->node_desc)); @@ -1680,6 +1705,9 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd) dd->verbs_dev.rdi.dparms.nports = dd->num_pports; dd->verbs_dev.rdi.dparms.npkeys = hfi1_get_npkeys(dd); + /* post send table */ + dd->verbs_dev.rdi.post_parms = hfi1_post_parms; + ppd = dd->pport; for (i = 0; i < dd->num_pports; i++, ppd++) rvt_init_port(&dd->verbs_dev.rdi, @@ -1730,8 +1758,7 @@ void hfi1_cnp_rcv(struct hfi1_packet *packet) struct rvt_qp *qp = packet->qp; u32 lqpn, rqpn = 0; u16 rlid = 0; - u8 sl, sc5, sc4_bit, svc_type; - bool sc4_set = has_sc4_bit(packet); + u8 sl, sc5, svc_type; switch (packet->qp->ibqp.qp_type) { case IB_QPT_UC: @@ -1754,9 +1781,7 @@ void hfi1_cnp_rcv(struct hfi1_packet *packet) return; } - sc4_bit = sc4_set << 4; - sc5 = (be16_to_cpu(hdr->lrh[0]) >> 12) & 0xf; - sc5 |= sc4_bit; + sc5 = hdr2sc((struct hfi1_message_header *)hdr, packet->rhf); sl = ibp->sc_to_sl[sc5]; lqpn = qp->ibqp.qp_num; diff --git a/drivers/infiniband/hw/hfi1/verbs.h b/drivers/infiniband/hw/hfi1/verbs.h index 488356775627..d1b101c54828 100644 --- a/drivers/infiniband/hw/hfi1/verbs.h +++ b/drivers/infiniband/hw/hfi1/verbs.h @@ -178,16 +178,14 @@ struct hfi1_ib_header { } u; } __packed; -struct ahg_ib_header { - struct sdma_engine *sde; +struct hfi1_ahg_info { u32 ahgdesc[2]; u16 tx_flags; u8 ahgcount; u8 ahgidx; - struct hfi1_ib_header ibh; }; -struct hfi1_pio_header { +struct hfi1_sdma_header { __le64 pbc; struct hfi1_ib_header hdr; } __packed; @@ -197,7 +195,7 @@ struct hfi1_pio_header { * pair is made common */ struct hfi1_qp_priv { - struct ahg_ib_header *s_hdr; /* next header to send */ + struct hfi1_ahg_info *s_ahg; /* ahg info for next header */ struct sdma_engine *s_sde; /* current sde */ struct send_context *s_sendcontext; /* current sendcontext */ u8 s_sc; /* SC[0..4] for next packet */ diff --git a/drivers/infiniband/hw/hfi1/verbs_txreq.h b/drivers/infiniband/hw/hfi1/verbs_txreq.h index a1d6e0807f97..5660897593ba 100644 --- a/drivers/infiniband/hw/hfi1/verbs_txreq.h +++ b/drivers/infiniband/hw/hfi1/verbs_txreq.h @@ -56,7 +56,7 @@ #include "iowait.h" struct verbs_txreq { - struct hfi1_pio_header phdr; + struct hfi1_sdma_header phdr; struct sdma_txreq txreq; struct rvt_qp *qp; struct rvt_swqe *wqe; diff --git a/drivers/infiniband/hw/i40iw/i40iw_cm.c b/drivers/infiniband/hw/i40iw/i40iw_cm.c index d2fa72516960..5026dc79978a 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_cm.c +++ b/drivers/infiniband/hw/i40iw/i40iw_cm.c @@ -1567,12 +1567,12 @@ static enum i40iw_status_code i40iw_del_multiple_qhash( ret = i40iw_manage_qhash(iwdev, cm_info, I40IW_QHASH_TYPE_TCP_SYN, I40IW_QHASH_MANAGE_TYPE_DELETE, NULL, false); - kfree(child_listen_node); - cm_parent_listen_node->cm_core->stats_listen_nodes_destroyed++; i40iw_debug(&iwdev->sc_dev, I40IW_DEBUG_CM, "freed pointer = %p\n", child_listen_node); + kfree(child_listen_node); + cm_parent_listen_node->cm_core->stats_listen_nodes_destroyed++; } spin_unlock_irqrestore(&iwdev->cm_core.listen_list_lock, flags); diff --git a/drivers/infiniband/hw/i40iw/i40iw_d.h b/drivers/infiniband/hw/i40iw/i40iw_d.h index bd942da91a27..2fac1db0e0a0 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_d.h +++ b/drivers/infiniband/hw/i40iw/i40iw_d.h @@ -1557,6 +1557,9 @@ enum i40iw_alignment { #define I40IW_RING_MOVE_TAIL(_ring) \ (_ring).tail = ((_ring).tail + 1) % (_ring).size +#define I40IW_RING_MOVE_HEAD_NOCHECK(_ring) \ + (_ring).head = ((_ring).head + 1) % (_ring).size + #define I40IW_RING_MOVE_TAIL_BY_COUNT(_ring, _count) \ (_ring).tail = ((_ring).tail + (_count)) % (_ring).size diff --git a/drivers/infiniband/hw/i40iw/i40iw_puda.c b/drivers/infiniband/hw/i40iw/i40iw_puda.c index e9c6e82af9c7..c62d354f7810 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_puda.c +++ b/drivers/infiniband/hw/i40iw/i40iw_puda.c @@ -1025,6 +1025,8 @@ static void i40iw_ieq_compl_pfpdu(struct i40iw_puda_rsrc *ieq, u16 txoffset, bufoffset; buf = i40iw_puda_get_listbuf(pbufl); + if (!buf) + return; nextseqnum = buf->seqnum + fpdu_len; txbuf->totallen = buf->hdrlen + fpdu_len; txbuf->data = (u8 *)txbuf->mem.va + buf->hdrlen; @@ -1048,6 +1050,8 @@ static void i40iw_ieq_compl_pfpdu(struct i40iw_puda_rsrc *ieq, fpdu_len -= buf->datalen; i40iw_puda_ret_bufpool(ieq, buf); buf = i40iw_puda_get_listbuf(pbufl); + if (!buf) + return; bufoffset = (u16)(buf->data - (u8 *)buf->mem.va); } while (1); diff --git a/drivers/infiniband/hw/i40iw/i40iw_type.h b/drivers/infiniband/hw/i40iw/i40iw_type.h index 16cc61720b53..2b1a04e9ca3c 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_type.h +++ b/drivers/infiniband/hw/i40iw/i40iw_type.h @@ -667,7 +667,7 @@ struct i40iw_tcp_offload_info { bool time_stamp; u8 cwnd_inc_limit; bool drop_ooo_seg; - bool dup_ack_thresh; + u8 dup_ack_thresh; u8 ttl; u8 src_mac_addr_idx; bool avoid_stretch_ack; diff --git a/drivers/infiniband/hw/i40iw/i40iw_uk.c b/drivers/infiniband/hw/i40iw/i40iw_uk.c index e35faea88c13..4d28c3cb03cc 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_uk.c +++ b/drivers/infiniband/hw/i40iw/i40iw_uk.c @@ -291,9 +291,9 @@ static enum i40iw_status_code i40iw_rdma_write(struct i40iw_qp_uk *qp, i40iw_set_fragment(wqe, 0, op_info->lo_sg_list); - for (i = 1; i < op_info->num_lo_sges; i++) { - byte_off = 32 + (i - 1) * 16; + for (i = 1, byte_off = 32; i < op_info->num_lo_sges; i++) { i40iw_set_fragment(wqe, byte_off, &op_info->lo_sg_list[i]); + byte_off += 16; } wmb(); /* make sure WQE is populated before valid bit is set */ @@ -401,9 +401,9 @@ static enum i40iw_status_code i40iw_send(struct i40iw_qp_uk *qp, i40iw_set_fragment(wqe, 0, op_info->sg_list); - for (i = 1; i < op_info->num_sges; i++) { - byte_off = 32 + (i - 1) * 16; + for (i = 1, byte_off = 32; i < op_info->num_sges; i++) { i40iw_set_fragment(wqe, byte_off, &op_info->sg_list[i]); + byte_off += 16; } wmb(); /* make sure WQE is populated before valid bit is set */ @@ -685,9 +685,9 @@ static enum i40iw_status_code i40iw_post_receive(struct i40iw_qp_uk *qp, i40iw_set_fragment(wqe, 0, info->sg_list); - for (i = 1; i < info->num_sges; i++) { - byte_off = 32 + (i - 1) * 16; + for (i = 1, byte_off = 32; i < info->num_sges; i++) { i40iw_set_fragment(wqe, byte_off, &info->sg_list[i]); + byte_off += 16; } wmb(); /* make sure WQE is populated before valid bit is set */ @@ -753,8 +753,7 @@ static enum i40iw_status_code i40iw_cq_post_entries(struct i40iw_cq_uk *cq, * @post_cq: update cq tail */ static enum i40iw_status_code i40iw_cq_poll_completion(struct i40iw_cq_uk *cq, - struct i40iw_cq_poll_info *info, - bool post_cq) + struct i40iw_cq_poll_info *info) { u64 comp_ctx, qword0, qword2, qword3, wqe_qword; u64 *cqe, *sw_wqe; @@ -762,7 +761,6 @@ static enum i40iw_status_code i40iw_cq_poll_completion(struct i40iw_cq_uk *cq, struct i40iw_ring *pring = NULL; u32 wqe_idx, q_type, array_idx = 0; enum i40iw_status_code ret_code = 0; - enum i40iw_status_code ret_code2 = 0; bool move_cq_head = true; u8 polarity; u8 addl_wqes = 0; @@ -870,19 +868,14 @@ exit: move_cq_head = false; if (move_cq_head) { - I40IW_RING_MOVE_HEAD(cq->cq_ring, ret_code2); - - if (ret_code2 && !ret_code) - ret_code = ret_code2; + I40IW_RING_MOVE_HEAD_NOCHECK(cq->cq_ring); if (I40IW_RING_GETCURRENT_HEAD(cq->cq_ring) == 0) cq->polarity ^= 1; - if (post_cq) { - I40IW_RING_MOVE_TAIL(cq->cq_ring); - set_64bit_val(cq->shadow_area, 0, - I40IW_RING_GETCURRENT_HEAD(cq->cq_ring)); - } + I40IW_RING_MOVE_TAIL(cq->cq_ring); + set_64bit_val(cq->shadow_area, 0, + I40IW_RING_GETCURRENT_HEAD(cq->cq_ring)); } else { if (info->is_srq) return ret_code; diff --git a/drivers/infiniband/hw/i40iw/i40iw_user.h b/drivers/infiniband/hw/i40iw/i40iw_user.h index 4627646fe8cd..276bcefffd7e 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_user.h +++ b/drivers/infiniband/hw/i40iw/i40iw_user.h @@ -327,7 +327,7 @@ struct i40iw_cq_ops { void (*iw_cq_request_notification)(struct i40iw_cq_uk *, enum i40iw_completion_notify); enum i40iw_status_code (*iw_cq_poll_completion)(struct i40iw_cq_uk *, - struct i40iw_cq_poll_info *, bool); + struct i40iw_cq_poll_info *); enum i40iw_status_code (*iw_cq_post_entries)(struct i40iw_cq_uk *, u8 count); void (*iw_cq_clean)(void *, struct i40iw_cq_uk *); }; diff --git a/drivers/infiniband/hw/i40iw/i40iw_verbs.c b/drivers/infiniband/hw/i40iw/i40iw_verbs.c index 283b64c942ee..2360338877bf 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_verbs.c +++ b/drivers/infiniband/hw/i40iw/i40iw_verbs.c @@ -529,7 +529,7 @@ static int i40iw_setup_kmode_qp(struct i40iw_device *iwdev, status = i40iw_get_wqe_shift(rq_size, ukinfo->max_rq_frag_cnt, 0, &rqshift); if (status) - return -ENOSYS; + return -ENOMEM; sqdepth = sq_size << sqshift; rqdepth = rq_size << rqshift; @@ -671,7 +671,7 @@ static struct ib_qp *i40iw_create_qp(struct ib_pd *ibpd, iwqp->ctx_info.qp_compl_ctx = (uintptr_t)qp; if (init_attr->qp_type != IB_QPT_RC) { - err_code = -ENOSYS; + err_code = -EINVAL; goto error; } if (iwdev->push_mode) @@ -1840,6 +1840,7 @@ struct ib_mr *i40iw_reg_phys_mr(struct ib_pd *pd, iwmr->ibmr.lkey = stag; iwmr->page_cnt = 1; iwmr->pgaddrmem[0] = addr; + iwmr->length = size; status = i40iw_hwreg_mr(iwdev, iwmr, access); if (status) { i40iw_free_stag(iwdev, stag); @@ -1863,7 +1864,7 @@ static struct ib_mr *i40iw_get_dma_mr(struct ib_pd *pd, int acc) { u64 kva = 0; - return i40iw_reg_phys_mr(pd, 0, 0xffffffffffULL, acc, &kva); + return i40iw_reg_phys_mr(pd, 0, 0, acc, &kva); } /** @@ -1975,18 +1976,6 @@ static ssize_t i40iw_show_rev(struct device *dev, } /** - * i40iw_show_fw_ver - */ -static ssize_t i40iw_show_fw_ver(struct device *dev, - struct device_attribute *attr, char *buf) -{ - u32 firmware_version = I40IW_FW_VERSION; - - return sprintf(buf, "%u.%u\n", firmware_version, - (firmware_version & 0x000000ff)); -} - -/** * i40iw_show_hca */ static ssize_t i40iw_show_hca(struct device *dev, @@ -2006,13 +1995,11 @@ static ssize_t i40iw_show_board(struct device *dev, } static DEVICE_ATTR(hw_rev, S_IRUGO, i40iw_show_rev, NULL); -static DEVICE_ATTR(fw_ver, S_IRUGO, i40iw_show_fw_ver, NULL); static DEVICE_ATTR(hca_type, S_IRUGO, i40iw_show_hca, NULL); static DEVICE_ATTR(board_id, S_IRUGO, i40iw_show_board, NULL); static struct device_attribute *i40iw_dev_attributes[] = { &dev_attr_hw_rev, - &dev_attr_fw_ver, &dev_attr_hca_type, &dev_attr_board_id }; @@ -2091,8 +2078,12 @@ static int i40iw_post_send(struct ib_qp *ibqp, ret = ukqp->ops.iw_send(ukqp, &info, ib_wr->ex.invalidate_rkey, false); } - if (ret) - err = -EIO; + if (ret) { + if (ret == I40IW_ERR_QP_TOOMANY_WRS_POSTED) + err = -ENOMEM; + else + err = -EINVAL; + } break; case IB_WR_RDMA_WRITE: info.op_type = I40IW_OP_TYPE_RDMA_WRITE; @@ -2113,8 +2104,12 @@ static int i40iw_post_send(struct ib_qp *ibqp, ret = ukqp->ops.iw_rdma_write(ukqp, &info, false); } - if (ret) - err = -EIO; + if (ret) { + if (ret == I40IW_ERR_QP_TOOMANY_WRS_POSTED) + err = -ENOMEM; + else + err = -EINVAL; + } break; case IB_WR_RDMA_READ_WITH_INV: inv_stag = true; @@ -2132,15 +2127,19 @@ static int i40iw_post_send(struct ib_qp *ibqp, info.op.rdma_read.lo_addr.stag = ib_wr->sg_list->lkey; info.op.rdma_read.lo_addr.len = ib_wr->sg_list->length; ret = ukqp->ops.iw_rdma_read(ukqp, &info, inv_stag, false); - if (ret) - err = -EIO; + if (ret) { + if (ret == I40IW_ERR_QP_TOOMANY_WRS_POSTED) + err = -ENOMEM; + else + err = -EINVAL; + } break; case IB_WR_LOCAL_INV: info.op_type = I40IW_OP_TYPE_INV_STAG; info.op.inv_local_stag.target_stag = ib_wr->ex.invalidate_rkey; ret = ukqp->ops.iw_stag_local_invalidate(ukqp, &info, true); if (ret) - err = -EIO; + err = -ENOMEM; break; case IB_WR_REG_MR: { @@ -2174,7 +2173,7 @@ static int i40iw_post_send(struct ib_qp *ibqp, ret = dev->iw_priv_qp_ops->iw_mr_fast_register(&iwqp->sc_qp, &info, true); if (ret) - err = -EIO; + err = -ENOMEM; break; } default: @@ -2214,6 +2213,7 @@ static int i40iw_post_recv(struct ib_qp *ibqp, struct i40iw_sge sg_list[I40IW_MAX_WQ_FRAGMENT_COUNT]; enum i40iw_status_code ret = 0; unsigned long flags; + int err = 0; iwqp = (struct i40iw_qp *)ibqp; ukqp = &iwqp->sc_qp.qp_uk; @@ -2228,6 +2228,10 @@ static int i40iw_post_recv(struct ib_qp *ibqp, ret = ukqp->ops.iw_post_receive(ukqp, &post_recv); if (ret) { i40iw_pr_err(" post_recv err %d\n", ret); + if (ret == I40IW_ERR_QP_TOOMANY_WRS_POSTED) + err = -ENOMEM; + else + err = -EINVAL; *bad_wr = ib_wr; goto out; } @@ -2235,9 +2239,7 @@ static int i40iw_post_recv(struct ib_qp *ibqp, } out: spin_unlock_irqrestore(&iwqp->lock, flags); - if (ret) - return -ENOSYS; - return 0; + return err; } /** @@ -2264,7 +2266,7 @@ static int i40iw_poll_cq(struct ib_cq *ibcq, spin_lock_irqsave(&iwcq->lock, flags); while (cqe_count < num_entries) { - ret = ukcq->ops.iw_cq_poll_completion(ukcq, &cq_poll_info, true); + ret = ukcq->ops.iw_cq_poll_completion(ukcq, &cq_poll_info); if (ret == I40IW_ERR_QUEUE_EMPTY) { break; } else if (ret == I40IW_ERR_QUEUE_DESTROYED) { @@ -2437,6 +2439,15 @@ static const char * const i40iw_hw_stat_names[] = { "iwRdmaInv" }; +static void i40iw_get_dev_fw_str(struct ib_device *dev, char *str, + size_t str_len) +{ + u32 firmware_version = I40IW_FW_VERSION; + + snprintf(str, str_len, "%u.%u", firmware_version, + (firmware_version & 0x000000ff)); +} + /** * i40iw_alloc_hw_stats - Allocate a hw stats structure * @ibdev: device pointer from stack @@ -2528,7 +2539,7 @@ static int i40iw_modify_port(struct ib_device *ibdev, int port_modify_mask, struct ib_port_modify *props) { - return 0; + return -ENOSYS; } /** @@ -2660,6 +2671,7 @@ static struct i40iw_ib_device *i40iw_init_rdma_device(struct i40iw_device *iwdev memcpy(iwibdev->ibdev.iwcm->ifname, netdev->name, sizeof(iwibdev->ibdev.iwcm->ifname)); iwibdev->ibdev.get_port_immutable = i40iw_port_immutable; + iwibdev->ibdev.get_dev_fw_str = i40iw_get_dev_fw_str; iwibdev->ibdev.poll_cq = i40iw_poll_cq; iwibdev->ibdev.req_notify_cq = i40iw_req_notify_cq; iwibdev->ibdev.post_send = i40iw_post_send; @@ -2723,7 +2735,7 @@ int i40iw_register_rdma_device(struct i40iw_device *iwdev) iwdev->iwibdev = i40iw_init_rdma_device(iwdev); if (!iwdev->iwibdev) - return -ENOSYS; + return -ENOMEM; iwibdev = iwdev->iwibdev; ret = ib_register_device(&iwibdev->ibdev, NULL); @@ -2748,5 +2760,5 @@ error: kfree(iwdev->iwibdev->ibdev.iwcm); iwdev->iwibdev->ibdev.iwcm = NULL; ib_dealloc_device(&iwdev->iwibdev->ibdev); - return -ENOSYS; + return ret; } diff --git a/drivers/infiniband/hw/mlx4/cq.c b/drivers/infiniband/hw/mlx4/cq.c index 9f8b516eb2b0..d6fc8a6e8c33 100644 --- a/drivers/infiniband/hw/mlx4/cq.c +++ b/drivers/infiniband/hw/mlx4/cq.c @@ -288,7 +288,7 @@ static int mlx4_alloc_resize_buf(struct mlx4_ib_dev *dev, struct mlx4_ib_cq *cq, if (cq->resize_buf) return -EBUSY; - cq->resize_buf = kmalloc(sizeof *cq->resize_buf, GFP_ATOMIC); + cq->resize_buf = kmalloc(sizeof *cq->resize_buf, GFP_KERNEL); if (!cq->resize_buf) return -ENOMEM; @@ -316,7 +316,7 @@ static int mlx4_alloc_resize_umem(struct mlx4_ib_dev *dev, struct mlx4_ib_cq *cq if (ib_copy_from_udata(&ucmd, udata, sizeof ucmd)) return -EFAULT; - cq->resize_buf = kmalloc(sizeof *cq->resize_buf, GFP_ATOMIC); + cq->resize_buf = kmalloc(sizeof *cq->resize_buf, GFP_KERNEL); if (!cq->resize_buf) return -ENOMEM; diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index 42a46078d7d5..2af44c2de262 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -2025,16 +2025,6 @@ static ssize_t show_hca(struct device *device, struct device_attribute *attr, return sprintf(buf, "MT%d\n", dev->dev->persist->pdev->device); } -static ssize_t show_fw_ver(struct device *device, struct device_attribute *attr, - char *buf) -{ - struct mlx4_ib_dev *dev = - container_of(device, struct mlx4_ib_dev, ib_dev.dev); - return sprintf(buf, "%d.%d.%d\n", (int) (dev->dev->caps.fw_ver >> 32), - (int) (dev->dev->caps.fw_ver >> 16) & 0xffff, - (int) dev->dev->caps.fw_ver & 0xffff); -} - static ssize_t show_rev(struct device *device, struct device_attribute *attr, char *buf) { @@ -2053,17 +2043,204 @@ static ssize_t show_board(struct device *device, struct device_attribute *attr, } static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL); -static DEVICE_ATTR(fw_ver, S_IRUGO, show_fw_ver, NULL); static DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL); static DEVICE_ATTR(board_id, S_IRUGO, show_board, NULL); static struct device_attribute *mlx4_class_attributes[] = { &dev_attr_hw_rev, - &dev_attr_fw_ver, &dev_attr_hca_type, &dev_attr_board_id }; +struct diag_counter { + const char *name; + u32 offset; +}; + +#define DIAG_COUNTER(_name, _offset) \ + { .name = #_name, .offset = _offset } + +static const struct diag_counter diag_basic[] = { + DIAG_COUNTER(rq_num_lle, 0x00), + DIAG_COUNTER(sq_num_lle, 0x04), + DIAG_COUNTER(rq_num_lqpoe, 0x08), + DIAG_COUNTER(sq_num_lqpoe, 0x0C), + DIAG_COUNTER(rq_num_lpe, 0x18), + DIAG_COUNTER(sq_num_lpe, 0x1C), + DIAG_COUNTER(rq_num_wrfe, 0x20), + DIAG_COUNTER(sq_num_wrfe, 0x24), + DIAG_COUNTER(sq_num_mwbe, 0x2C), + DIAG_COUNTER(sq_num_bre, 0x34), + DIAG_COUNTER(sq_num_rire, 0x44), + DIAG_COUNTER(rq_num_rire, 0x48), + DIAG_COUNTER(sq_num_rae, 0x4C), + DIAG_COUNTER(rq_num_rae, 0x50), + DIAG_COUNTER(sq_num_roe, 0x54), + DIAG_COUNTER(sq_num_tree, 0x5C), + DIAG_COUNTER(sq_num_rree, 0x64), + DIAG_COUNTER(rq_num_rnr, 0x68), + DIAG_COUNTER(sq_num_rnr, 0x6C), + DIAG_COUNTER(rq_num_oos, 0x100), + DIAG_COUNTER(sq_num_oos, 0x104), +}; + +static const struct diag_counter diag_ext[] = { + DIAG_COUNTER(rq_num_dup, 0x130), + DIAG_COUNTER(sq_num_to, 0x134), +}; + +static const struct diag_counter diag_device_only[] = { + DIAG_COUNTER(num_cqovf, 0x1A0), + DIAG_COUNTER(rq_num_udsdprd, 0x118), +}; + +static struct rdma_hw_stats *mlx4_ib_alloc_hw_stats(struct ib_device *ibdev, + u8 port_num) +{ + struct mlx4_ib_dev *dev = to_mdev(ibdev); + struct mlx4_ib_diag_counters *diag = dev->diag_counters; + + if (!diag[!!port_num].name) + return NULL; + + return rdma_alloc_hw_stats_struct(diag[!!port_num].name, + diag[!!port_num].num_counters, + RDMA_HW_STATS_DEFAULT_LIFESPAN); +} + +static int mlx4_ib_get_hw_stats(struct ib_device *ibdev, + struct rdma_hw_stats *stats, + u8 port, int index) +{ + struct mlx4_ib_dev *dev = to_mdev(ibdev); + struct mlx4_ib_diag_counters *diag = dev->diag_counters; + u32 hw_value[ARRAY_SIZE(diag_device_only) + + ARRAY_SIZE(diag_ext) + ARRAY_SIZE(diag_basic)] = {}; + int ret; + int i; + + ret = mlx4_query_diag_counters(dev->dev, + MLX4_OP_MOD_QUERY_TRANSPORT_CI_ERRORS, + diag[!!port].offset, hw_value, + diag[!!port].num_counters, port); + + if (ret) + return ret; + + for (i = 0; i < diag[!!port].num_counters; i++) + stats->value[i] = hw_value[i]; + + return diag[!!port].num_counters; +} + +static int __mlx4_ib_alloc_diag_counters(struct mlx4_ib_dev *ibdev, + const char ***name, + u32 **offset, + u32 *num, + bool port) +{ + u32 num_counters; + + num_counters = ARRAY_SIZE(diag_basic); + + if (ibdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_DIAG_PER_PORT) + num_counters += ARRAY_SIZE(diag_ext); + + if (!port) + num_counters += ARRAY_SIZE(diag_device_only); + + *name = kcalloc(num_counters, sizeof(**name), GFP_KERNEL); + if (!*name) + return -ENOMEM; + + *offset = kcalloc(num_counters, sizeof(**offset), GFP_KERNEL); + if (!*offset) + goto err_name; + + *num = num_counters; + + return 0; + +err_name: + kfree(*name); + return -ENOMEM; +} + +static void mlx4_ib_fill_diag_counters(struct mlx4_ib_dev *ibdev, + const char **name, + u32 *offset, + bool port) +{ + int i; + int j; + + for (i = 0, j = 0; i < ARRAY_SIZE(diag_basic); i++, j++) { + name[i] = diag_basic[i].name; + offset[i] = diag_basic[i].offset; + } + + if (ibdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_DIAG_PER_PORT) { + for (i = 0; i < ARRAY_SIZE(diag_ext); i++, j++) { + name[j] = diag_ext[i].name; + offset[j] = diag_ext[i].offset; + } + } + + if (!port) { + for (i = 0; i < ARRAY_SIZE(diag_device_only); i++, j++) { + name[j] = diag_device_only[i].name; + offset[j] = diag_device_only[i].offset; + } + } +} + +static int mlx4_ib_alloc_diag_counters(struct mlx4_ib_dev *ibdev) +{ + struct mlx4_ib_diag_counters *diag = ibdev->diag_counters; + int i; + int ret; + bool per_port = !!(ibdev->dev->caps.flags2 & + MLX4_DEV_CAP_FLAG2_DIAG_PER_PORT); + + for (i = 0; i < MLX4_DIAG_COUNTERS_TYPES; i++) { + /* i == 1 means we are building port counters */ + if (i && !per_port) + continue; + + ret = __mlx4_ib_alloc_diag_counters(ibdev, &diag[i].name, + &diag[i].offset, + &diag[i].num_counters, i); + if (ret) + goto err_alloc; + + mlx4_ib_fill_diag_counters(ibdev, diag[i].name, + diag[i].offset, i); + } + + ibdev->ib_dev.get_hw_stats = mlx4_ib_get_hw_stats; + ibdev->ib_dev.alloc_hw_stats = mlx4_ib_alloc_hw_stats; + + return 0; + +err_alloc: + if (i) { + kfree(diag[i - 1].name); + kfree(diag[i - 1].offset); + } + + return ret; +} + +static void mlx4_ib_diag_cleanup(struct mlx4_ib_dev *ibdev) +{ + int i; + + for (i = 0; i < MLX4_DIAG_COUNTERS_TYPES; i++) { + kfree(ibdev->diag_counters[i].offset); + kfree(ibdev->diag_counters[i].name); + } +} + #define MLX4_IB_INVALID_MAC ((u64)-1) static void mlx4_ib_update_qps(struct mlx4_ib_dev *ibdev, struct net_device *dev, @@ -2280,6 +2457,17 @@ static int mlx4_port_immutable(struct ib_device *ibdev, u8 port_num, return 0; } +static void get_fw_ver_str(struct ib_device *device, char *str, + size_t str_len) +{ + struct mlx4_ib_dev *dev = + container_of(device, struct mlx4_ib_dev, ib_dev); + snprintf(str, str_len, "%d.%d.%d", + (int) (dev->dev->caps.fw_ver >> 32), + (int) (dev->dev->caps.fw_ver >> 16) & 0xffff, + (int) dev->dev->caps.fw_ver & 0xffff); +} + static void *mlx4_ib_add(struct mlx4_dev *dev) { struct mlx4_ib_dev *ibdev; @@ -2413,6 +2601,7 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) ibdev->ib_dev.detach_mcast = mlx4_ib_mcg_detach; ibdev->ib_dev.process_mad = mlx4_ib_process_mad; ibdev->ib_dev.get_port_immutable = mlx4_port_immutable; + ibdev->ib_dev.get_dev_fw_str = get_fw_ver_str; ibdev->ib_dev.disassociate_ucontext = mlx4_ib_disassociate_ucontext; if (!mlx4_is_slave(ibdev->dev)) { @@ -2555,9 +2744,12 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) for (j = 1; j <= ibdev->dev->caps.num_ports; j++) atomic64_set(&iboe->mac[j - 1], ibdev->dev->caps.def_mac[j]); - if (ib_register_device(&ibdev->ib_dev, NULL)) + if (mlx4_ib_alloc_diag_counters(ibdev)) goto err_steer_free_bitmap; + if (ib_register_device(&ibdev->ib_dev, NULL)) + goto err_diag_counters; + if (mlx4_ib_mad_init(ibdev)) goto err_reg; @@ -2623,6 +2815,9 @@ err_mad: err_reg: ib_unregister_device(&ibdev->ib_dev); +err_diag_counters: + mlx4_ib_diag_cleanup(ibdev); + err_steer_free_bitmap: kfree(ibdev->ib_uc_qpns_bitmap); @@ -2726,6 +2921,7 @@ static void mlx4_ib_remove(struct mlx4_dev *dev, void *ibdev_ptr) mlx4_ib_close_sriov(ibdev); mlx4_ib_mad_cleanup(ibdev); ib_unregister_device(&ibdev->ib_dev); + mlx4_ib_diag_cleanup(ibdev); if (ibdev->iboe.nb.notifier_call) { if (unregister_netdevice_notifier(&ibdev->iboe.nb)) pr_warn("failure unregistering notifier\n"); diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h index 29acda249612..7c5832ede4bd 100644 --- a/drivers/infiniband/hw/mlx4/mlx4_ib.h +++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h @@ -549,6 +549,14 @@ struct mlx4_ib_counters { u32 default_counter; }; +#define MLX4_DIAG_COUNTERS_TYPES 2 + +struct mlx4_ib_diag_counters { + const char **name; + u32 *offset; + u32 num_counters; +}; + struct mlx4_ib_dev { struct ib_device ib_dev; struct mlx4_dev *dev; @@ -585,6 +593,7 @@ struct mlx4_ib_dev { /* protect resources needed as part of reset flow */ spinlock_t reset_flow_resource_lock; struct list_head qp_list; + struct mlx4_ib_diag_counters diag_counters[MLX4_DIAG_COUNTERS_TYPES]; }; struct ib_event_work { diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c index 9c0e67bd2ba7..308a358e5b46 100644 --- a/drivers/infiniband/hw/mlx5/cq.c +++ b/drivers/infiniband/hw/mlx5/cq.c @@ -424,6 +424,83 @@ static void get_sig_err_item(struct mlx5_sig_err_cqe *cqe, item->key = be32_to_cpu(cqe->mkey); } +static void sw_send_comp(struct mlx5_ib_qp *qp, int num_entries, + struct ib_wc *wc, int *npolled) +{ + struct mlx5_ib_wq *wq; + unsigned int cur; + unsigned int idx; + int np; + int i; + + wq = &qp->sq; + cur = wq->head - wq->tail; + np = *npolled; + + if (cur == 0) + return; + + for (i = 0; i < cur && np < num_entries; i++) { + idx = wq->last_poll & (wq->wqe_cnt - 1); + wc->wr_id = wq->wrid[idx]; + wc->status = IB_WC_WR_FLUSH_ERR; + wc->vendor_err = MLX5_CQE_SYNDROME_WR_FLUSH_ERR; + wq->tail++; + np++; + wc->qp = &qp->ibqp; + wc++; + wq->last_poll = wq->w_list[idx].next; + } + *npolled = np; +} + +static void sw_recv_comp(struct mlx5_ib_qp *qp, int num_entries, + struct ib_wc *wc, int *npolled) +{ + struct mlx5_ib_wq *wq; + unsigned int cur; + int np; + int i; + + wq = &qp->rq; + cur = wq->head - wq->tail; + np = *npolled; + + if (cur == 0) + return; + + for (i = 0; i < cur && np < num_entries; i++) { + wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)]; + wc->status = IB_WC_WR_FLUSH_ERR; + wc->vendor_err = MLX5_CQE_SYNDROME_WR_FLUSH_ERR; + wq->tail++; + np++; + wc->qp = &qp->ibqp; + wc++; + } + *npolled = np; +} + +static void mlx5_ib_poll_sw_comp(struct mlx5_ib_cq *cq, int num_entries, + struct ib_wc *wc, int *npolled) +{ + struct mlx5_ib_qp *qp; + + *npolled = 0; + /* Find uncompleted WQEs belonging to that cq and retrun mmics ones */ + list_for_each_entry(qp, &cq->list_send_qp, cq_send_list) { + sw_send_comp(qp, num_entries, wc + *npolled, npolled); + if (*npolled >= num_entries) + return; + } + + list_for_each_entry(qp, &cq->list_recv_qp, cq_recv_list) { + sw_recv_comp(qp, num_entries, wc + *npolled, npolled); + if (*npolled >= num_entries) + return; + } +} + static int mlx5_poll_one(struct mlx5_ib_cq *cq, struct mlx5_ib_qp **cur_qp, struct ib_wc *wc) @@ -594,12 +671,18 @@ int mlx5_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc) { struct mlx5_ib_cq *cq = to_mcq(ibcq); struct mlx5_ib_qp *cur_qp = NULL; + struct mlx5_ib_dev *dev = to_mdev(cq->ibcq.device); + struct mlx5_core_dev *mdev = dev->mdev; unsigned long flags; int soft_polled = 0; int npolled; int err = 0; spin_lock_irqsave(&cq->lock, flags); + if (mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) { + mlx5_ib_poll_sw_comp(cq, num_entries, wc, &npolled); + goto out; + } if (unlikely(!list_empty(&cq->wc_list))) soft_polled = poll_soft_wc(cq, num_entries, wc); @@ -612,7 +695,7 @@ int mlx5_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc) if (npolled) mlx5_cq_set_ci(&cq->mcq); - +out: spin_unlock_irqrestore(&cq->lock, flags); if (err == 0 || err == -EAGAIN) @@ -843,6 +926,8 @@ struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev, cq->resize_buf = NULL; cq->resize_umem = NULL; cq->create_flags = attr->flags; + INIT_LIST_HEAD(&cq->list_send_qp); + INIT_LIST_HEAD(&cq->list_recv_qp); if (context) { err = create_cq_user(dev, udata, context, cq, entries, diff --git a/drivers/infiniband/hw/mlx5/gsi.c b/drivers/infiniband/hw/mlx5/gsi.c index 53e03c8ede79..79e6309460dc 100644 --- a/drivers/infiniband/hw/mlx5/gsi.c +++ b/drivers/infiniband/hw/mlx5/gsi.c @@ -69,15 +69,6 @@ static bool mlx5_ib_deth_sqpn_cap(struct mlx5_ib_dev *dev) return MLX5_CAP_GEN(dev->mdev, set_deth_sqpn); } -static u32 next_outstanding(struct mlx5_ib_gsi_qp *gsi, u32 index) -{ - return ++index % gsi->cap.max_send_wr; -} - -#define for_each_outstanding_wr(gsi, index) \ - for (index = gsi->outstanding_ci; index != gsi->outstanding_pi; \ - index = next_outstanding(gsi, index)) - /* Call with gsi->lock locked */ static void generate_completions(struct mlx5_ib_gsi_qp *gsi) { @@ -85,8 +76,9 @@ static void generate_completions(struct mlx5_ib_gsi_qp *gsi) struct mlx5_ib_gsi_wr *wr; u32 index; - for_each_outstanding_wr(gsi, index) { - wr = &gsi->outstanding_wrs[index]; + for (index = gsi->outstanding_ci; index != gsi->outstanding_pi; + index++) { + wr = &gsi->outstanding_wrs[index % gsi->cap.max_send_wr]; if (!wr->completed) break; @@ -430,8 +422,9 @@ static int mlx5_ib_add_outstanding_wr(struct mlx5_ib_gsi_qp *gsi, return -ENOMEM; } - gsi_wr = &gsi->outstanding_wrs[gsi->outstanding_pi]; - gsi->outstanding_pi = next_outstanding(gsi, gsi->outstanding_pi); + gsi_wr = &gsi->outstanding_wrs[gsi->outstanding_pi % + gsi->cap.max_send_wr]; + gsi->outstanding_pi++; if (!wc) { memset(&gsi_wr->wc, 0, sizeof(gsi_wr->wc)); diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index dad63f038bb8..a84bb766fc62 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -42,11 +42,13 @@ #include <asm/pat.h> #endif #include <linux/sched.h> +#include <linux/delay.h> #include <rdma/ib_user_verbs.h> #include <rdma/ib_addr.h> #include <rdma/ib_cache.h> #include <linux/mlx5/port.h> #include <linux/mlx5/vport.h> +#include <linux/list.h> #include <rdma/ib_smi.h> #include <rdma/ib_umem.h> #include <linux/in.h> @@ -457,8 +459,17 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, int max_rq_sg; int max_sq_sg; u64 min_page_size = 1ull << MLX5_CAP_GEN(mdev, log_pg_sz); + struct mlx5_ib_query_device_resp resp = {}; + size_t resp_len; + u64 max_tso; - if (uhw->inlen || uhw->outlen) + resp_len = sizeof(resp.comp_mask) + sizeof(resp.response_length); + if (uhw->outlen && uhw->outlen < resp_len) + return -EINVAL; + else + resp.response_length = resp_len; + + if (uhw->inlen && !ib_is_udata_cleared(uhw, 0, uhw->inlen)) return -EINVAL; memset(props, 0, sizeof(*props)); @@ -511,10 +522,21 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, if (MLX5_CAP_GEN(mdev, block_lb_mc)) props->device_cap_flags |= IB_DEVICE_BLOCK_MULTICAST_LOOPBACK; - if (MLX5_CAP_GEN(dev->mdev, eth_net_offloads) && - (MLX5_CAP_ETH(dev->mdev, csum_cap))) + if (MLX5_CAP_GEN(dev->mdev, eth_net_offloads)) { + if (MLX5_CAP_ETH(mdev, csum_cap)) props->device_cap_flags |= IB_DEVICE_RAW_IP_CSUM; + if (field_avail(typeof(resp), tso_caps, uhw->outlen)) { + max_tso = MLX5_CAP_ETH(mdev, max_lso_cap); + if (max_tso) { + resp.tso_caps.max_tso = 1 << max_tso; + resp.tso_caps.supported_qpts |= + 1 << IB_QPT_RAW_PACKET; + resp.response_length += sizeof(resp.tso_caps); + } + } + } + if (MLX5_CAP_GEN(mdev, ipoib_basic_offloads)) { props->device_cap_flags |= IB_DEVICE_UD_IP_CSUM; props->device_cap_flags |= IB_DEVICE_UD_TSO; @@ -576,6 +598,13 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, if (!mlx5_core_is_pf(mdev)) props->device_cap_flags |= IB_DEVICE_VIRTUAL_FUNCTION; + if (uhw->outlen) { + err = ib_copy_to_udata(uhw, &resp, resp.response_length); + + if (err) + return err; + } + return 0; } @@ -983,6 +1012,7 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev, goto out_uars; } + INIT_LIST_HEAD(&context->vma_private_list); INIT_LIST_HEAD(&context->db_page_list); mutex_init(&context->db_page_mutex); @@ -992,6 +1022,11 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev, if (field_avail(typeof(resp), cqe_version, udata->outlen)) resp.response_length += sizeof(resp.cqe_version); + if (field_avail(typeof(resp), cmds_supp_uhw, udata->outlen)) { + resp.cmds_supp_uhw |= MLX5_USER_CMDS_SUPP_UHW_QUERY_DEVICE; + resp.response_length += sizeof(resp.cmds_supp_uhw); + } + /* * We don't want to expose information from the PCI bar that is located * after 4096 bytes, so if the arch only supports larger pages, let's @@ -1006,8 +1041,7 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev, offsetof(struct mlx5_init_seg, internal_timer_h) % PAGE_SIZE; resp.response_length += sizeof(resp.hca_core_clock_offset) + - sizeof(resp.reserved2) + - sizeof(resp.reserved3); + sizeof(resp.reserved2); } err = ib_copy_to_udata(udata, &resp, resp.response_length); @@ -1086,6 +1120,125 @@ static int get_index(unsigned long offset) return get_arg(offset); } +static void mlx5_ib_vma_open(struct vm_area_struct *area) +{ + /* vma_open is called when a new VMA is created on top of our VMA. This + * is done through either mremap flow or split_vma (usually due to + * mlock, madvise, munmap, etc.) We do not support a clone of the VMA, + * as this VMA is strongly hardware related. Therefore we set the + * vm_ops of the newly created/cloned VMA to NULL, to prevent it from + * calling us again and trying to do incorrect actions. We assume that + * the original VMA size is exactly a single page, and therefore all + * "splitting" operation will not happen to it. + */ + area->vm_ops = NULL; +} + +static void mlx5_ib_vma_close(struct vm_area_struct *area) +{ + struct mlx5_ib_vma_private_data *mlx5_ib_vma_priv_data; + + /* It's guaranteed that all VMAs opened on a FD are closed before the + * file itself is closed, therefore no sync is needed with the regular + * closing flow. (e.g. mlx5 ib_dealloc_ucontext) + * However need a sync with accessing the vma as part of + * mlx5_ib_disassociate_ucontext. + * The close operation is usually called under mm->mmap_sem except when + * process is exiting. + * The exiting case is handled explicitly as part of + * mlx5_ib_disassociate_ucontext. + */ + mlx5_ib_vma_priv_data = (struct mlx5_ib_vma_private_data *)area->vm_private_data; + + /* setting the vma context pointer to null in the mlx5_ib driver's + * private data, to protect a race condition in + * mlx5_ib_disassociate_ucontext(). + */ + mlx5_ib_vma_priv_data->vma = NULL; + list_del(&mlx5_ib_vma_priv_data->list); + kfree(mlx5_ib_vma_priv_data); +} + +static const struct vm_operations_struct mlx5_ib_vm_ops = { + .open = mlx5_ib_vma_open, + .close = mlx5_ib_vma_close +}; + +static int mlx5_ib_set_vma_data(struct vm_area_struct *vma, + struct mlx5_ib_ucontext *ctx) +{ + struct mlx5_ib_vma_private_data *vma_prv; + struct list_head *vma_head = &ctx->vma_private_list; + + vma_prv = kzalloc(sizeof(*vma_prv), GFP_KERNEL); + if (!vma_prv) + return -ENOMEM; + + vma_prv->vma = vma; + vma->vm_private_data = vma_prv; + vma->vm_ops = &mlx5_ib_vm_ops; + + list_add(&vma_prv->list, vma_head); + + return 0; +} + +static void mlx5_ib_disassociate_ucontext(struct ib_ucontext *ibcontext) +{ + int ret; + struct vm_area_struct *vma; + struct mlx5_ib_vma_private_data *vma_private, *n; + struct mlx5_ib_ucontext *context = to_mucontext(ibcontext); + struct task_struct *owning_process = NULL; + struct mm_struct *owning_mm = NULL; + + owning_process = get_pid_task(ibcontext->tgid, PIDTYPE_PID); + if (!owning_process) + return; + + owning_mm = get_task_mm(owning_process); + if (!owning_mm) { + pr_info("no mm, disassociate ucontext is pending task termination\n"); + while (1) { + put_task_struct(owning_process); + usleep_range(1000, 2000); + owning_process = get_pid_task(ibcontext->tgid, + PIDTYPE_PID); + if (!owning_process || + owning_process->state == TASK_DEAD) { + pr_info("disassociate ucontext done, task was terminated\n"); + /* in case task was dead need to release the + * task struct. + */ + if (owning_process) + put_task_struct(owning_process); + return; + } + } + } + + /* need to protect from a race on closing the vma as part of + * mlx5_ib_vma_close. + */ + down_read(&owning_mm->mmap_sem); + list_for_each_entry_safe(vma_private, n, &context->vma_private_list, + list) { + vma = vma_private->vma; + ret = zap_vma_ptes(vma, vma->vm_start, + PAGE_SIZE); + WARN_ONCE(ret, "%s: zap_vma_ptes failed", __func__); + /* context going to be destroyed, should + * not access ops any more. + */ + vma->vm_ops = NULL; + list_del(&vma_private->list); + kfree(vma_private); + } + up_read(&owning_mm->mmap_sem); + mmput(owning_mm); + put_task_struct(owning_process); +} + static inline char *mmap_cmd2str(enum mlx5_ib_mmap_cmd cmd) { switch (cmd) { @@ -1101,8 +1254,10 @@ static inline char *mmap_cmd2str(enum mlx5_ib_mmap_cmd cmd) } static int uar_mmap(struct mlx5_ib_dev *dev, enum mlx5_ib_mmap_cmd cmd, - struct vm_area_struct *vma, struct mlx5_uuar_info *uuari) + struct vm_area_struct *vma, + struct mlx5_ib_ucontext *context) { + struct mlx5_uuar_info *uuari = &context->uuari; int err; unsigned long idx; phys_addr_t pfn, pa; @@ -1152,14 +1307,13 @@ static int uar_mmap(struct mlx5_ib_dev *dev, enum mlx5_ib_mmap_cmd cmd, mlx5_ib_dbg(dev, "mapped %s at 0x%lx, PA %pa\n", mmap_cmd2str(cmd), vma->vm_start, &pa); - return 0; + return mlx5_ib_set_vma_data(vma, context); } static int mlx5_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vma) { struct mlx5_ib_ucontext *context = to_mucontext(ibcontext); struct mlx5_ib_dev *dev = to_mdev(ibcontext->device); - struct mlx5_uuar_info *uuari = &context->uuari; unsigned long command; phys_addr_t pfn; @@ -1168,7 +1322,7 @@ static int mlx5_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vm case MLX5_IB_MMAP_WC_PAGE: case MLX5_IB_MMAP_NC_PAGE: case MLX5_IB_MMAP_REGULAR_PAGE: - return uar_mmap(dev, command, vma, uuari); + return uar_mmap(dev, command, vma, context); case MLX5_IB_MMAP_GET_CONTIGUOUS_PAGES: return -ENOSYS; @@ -1331,6 +1485,32 @@ static int parse_flow_attr(u32 *match_c, u32 *match_v, &ib_spec->ipv4.val.dst_ip, sizeof(ib_spec->ipv4.val.dst_ip)); break; + case IB_FLOW_SPEC_IPV6: + if (ib_spec->size != sizeof(ib_spec->ipv6)) + return -EINVAL; + + MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, + ethertype, 0xffff); + MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, + ethertype, ETH_P_IPV6); + + memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_c, + src_ipv4_src_ipv6.ipv6_layout.ipv6), + &ib_spec->ipv6.mask.src_ip, + sizeof(ib_spec->ipv6.mask.src_ip)); + memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_v, + src_ipv4_src_ipv6.ipv6_layout.ipv6), + &ib_spec->ipv6.val.src_ip, + sizeof(ib_spec->ipv6.val.src_ip)); + memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_c, + dst_ipv4_dst_ipv6.ipv6_layout.ipv6), + &ib_spec->ipv6.mask.dst_ip, + sizeof(ib_spec->ipv6.mask.dst_ip)); + memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_v, + dst_ipv4_dst_ipv6.ipv6_layout.ipv6), + &ib_spec->ipv6.val.dst_ip, + sizeof(ib_spec->ipv6.val.dst_ip)); + break; case IB_FLOW_SPEC_TCP: if (ib_spec->size != sizeof(ib_spec->tcp_udp)) return -EINVAL; @@ -1801,15 +1981,6 @@ static ssize_t show_hca(struct device *device, struct device_attribute *attr, return sprintf(buf, "MT%d\n", dev->mdev->pdev->device); } -static ssize_t show_fw_ver(struct device *device, struct device_attribute *attr, - char *buf) -{ - struct mlx5_ib_dev *dev = - container_of(device, struct mlx5_ib_dev, ib_dev.dev); - return sprintf(buf, "%d.%d.%04d\n", fw_rev_maj(dev->mdev), - fw_rev_min(dev->mdev), fw_rev_sub(dev->mdev)); -} - static ssize_t show_rev(struct device *device, struct device_attribute *attr, char *buf) { @@ -1828,7 +1999,6 @@ static ssize_t show_board(struct device *device, struct device_attribute *attr, } static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL); -static DEVICE_ATTR(fw_ver, S_IRUGO, show_fw_ver, NULL); static DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL); static DEVICE_ATTR(board_id, S_IRUGO, show_board, NULL); static DEVICE_ATTR(fw_pages, S_IRUGO, show_fw_pages, NULL); @@ -1836,7 +2006,6 @@ static DEVICE_ATTR(reg_pages, S_IRUGO, show_reg_pages, NULL); static struct device_attribute *mlx5_class_attributes[] = { &dev_attr_hw_rev, - &dev_attr_fw_ver, &dev_attr_hca_type, &dev_attr_board_id, &dev_attr_fw_pages, @@ -1854,6 +2023,65 @@ static void pkey_change_handler(struct work_struct *work) mutex_unlock(&ports->devr->mutex); } +static void mlx5_ib_handle_internal_error(struct mlx5_ib_dev *ibdev) +{ + struct mlx5_ib_qp *mqp; + struct mlx5_ib_cq *send_mcq, *recv_mcq; + struct mlx5_core_cq *mcq; + struct list_head cq_armed_list; + unsigned long flags_qp; + unsigned long flags_cq; + unsigned long flags; + + INIT_LIST_HEAD(&cq_armed_list); + + /* Go over qp list reside on that ibdev, sync with create/destroy qp.*/ + spin_lock_irqsave(&ibdev->reset_flow_resource_lock, flags); + list_for_each_entry(mqp, &ibdev->qp_list, qps_list) { + spin_lock_irqsave(&mqp->sq.lock, flags_qp); + if (mqp->sq.tail != mqp->sq.head) { + send_mcq = to_mcq(mqp->ibqp.send_cq); + spin_lock_irqsave(&send_mcq->lock, flags_cq); + if (send_mcq->mcq.comp && + mqp->ibqp.send_cq->comp_handler) { + if (!send_mcq->mcq.reset_notify_added) { + send_mcq->mcq.reset_notify_added = 1; + list_add_tail(&send_mcq->mcq.reset_notify, + &cq_armed_list); + } + } + spin_unlock_irqrestore(&send_mcq->lock, flags_cq); + } + spin_unlock_irqrestore(&mqp->sq.lock, flags_qp); + spin_lock_irqsave(&mqp->rq.lock, flags_qp); + /* no handling is needed for SRQ */ + if (!mqp->ibqp.srq) { + if (mqp->rq.tail != mqp->rq.head) { + recv_mcq = to_mcq(mqp->ibqp.recv_cq); + spin_lock_irqsave(&recv_mcq->lock, flags_cq); + if (recv_mcq->mcq.comp && + mqp->ibqp.recv_cq->comp_handler) { + if (!recv_mcq->mcq.reset_notify_added) { + recv_mcq->mcq.reset_notify_added = 1; + list_add_tail(&recv_mcq->mcq.reset_notify, + &cq_armed_list); + } + } + spin_unlock_irqrestore(&recv_mcq->lock, + flags_cq); + } + } + spin_unlock_irqrestore(&mqp->rq.lock, flags_qp); + } + /*At that point all inflight post send were put to be executed as of we + * lock/unlock above locks Now need to arm all involved CQs. + */ + list_for_each_entry(mcq, &cq_armed_list, reset_notify) { + mcq->comp(mcq); + } + spin_unlock_irqrestore(&ibdev->reset_flow_resource_lock, flags); +} + static void mlx5_ib_event(struct mlx5_core_dev *dev, void *context, enum mlx5_dev_event event, unsigned long param) { @@ -1866,6 +2094,7 @@ static void mlx5_ib_event(struct mlx5_core_dev *dev, void *context, case MLX5_DEV_EVENT_SYS_ERROR: ibdev->ib_active = false; ibev.event = IB_EVENT_DEVICE_FATAL; + mlx5_ib_handle_internal_error(ibdev); break; case MLX5_DEV_EVENT_PORT_UP: @@ -2272,6 +2501,15 @@ static int mlx5_port_immutable(struct ib_device *ibdev, u8 port_num, return 0; } +static void get_dev_fw_str(struct ib_device *ibdev, char *str, + size_t str_len) +{ + struct mlx5_ib_dev *dev = + container_of(ibdev, struct mlx5_ib_dev, ib_dev); + snprintf(str, str_len, "%d.%d.%04d", fw_rev_maj(dev->mdev), + fw_rev_min(dev->mdev), fw_rev_sub(dev->mdev)); +} + static int mlx5_enable_roce(struct mlx5_ib_dev *dev) { int err; @@ -2298,6 +2536,113 @@ static void mlx5_disable_roce(struct mlx5_ib_dev *dev) unregister_netdevice_notifier(&dev->roce.nb); } +static void mlx5_ib_dealloc_q_counters(struct mlx5_ib_dev *dev) +{ + unsigned int i; + + for (i = 0; i < dev->num_ports; i++) + mlx5_core_dealloc_q_counter(dev->mdev, + dev->port[i].q_cnt_id); +} + +static int mlx5_ib_alloc_q_counters(struct mlx5_ib_dev *dev) +{ + int i; + int ret; + + for (i = 0; i < dev->num_ports; i++) { + ret = mlx5_core_alloc_q_counter(dev->mdev, + &dev->port[i].q_cnt_id); + if (ret) { + mlx5_ib_warn(dev, + "couldn't allocate queue counter for port %d, err %d\n", + i + 1, ret); + goto dealloc_counters; + } + } + + return 0; + +dealloc_counters: + while (--i >= 0) + mlx5_core_dealloc_q_counter(dev->mdev, + dev->port[i].q_cnt_id); + + return ret; +} + +static const char * const names[] = { + "rx_write_requests", + "rx_read_requests", + "rx_atomic_requests", + "out_of_buffer", + "out_of_sequence", + "duplicate_request", + "rnr_nak_retry_err", + "packet_seq_err", + "implied_nak_seq_err", + "local_ack_timeout_err", +}; + +static const size_t stats_offsets[] = { + MLX5_BYTE_OFF(query_q_counter_out, rx_write_requests), + MLX5_BYTE_OFF(query_q_counter_out, rx_read_requests), + MLX5_BYTE_OFF(query_q_counter_out, rx_atomic_requests), + MLX5_BYTE_OFF(query_q_counter_out, out_of_buffer), + MLX5_BYTE_OFF(query_q_counter_out, out_of_sequence), + MLX5_BYTE_OFF(query_q_counter_out, duplicate_request), + MLX5_BYTE_OFF(query_q_counter_out, rnr_nak_retry_err), + MLX5_BYTE_OFF(query_q_counter_out, packet_seq_err), + MLX5_BYTE_OFF(query_q_counter_out, implied_nak_seq_err), + MLX5_BYTE_OFF(query_q_counter_out, local_ack_timeout_err), +}; + +static struct rdma_hw_stats *mlx5_ib_alloc_hw_stats(struct ib_device *ibdev, + u8 port_num) +{ + BUILD_BUG_ON(ARRAY_SIZE(names) != ARRAY_SIZE(stats_offsets)); + + /* We support only per port stats */ + if (port_num == 0) + return NULL; + + return rdma_alloc_hw_stats_struct(names, ARRAY_SIZE(names), + RDMA_HW_STATS_DEFAULT_LIFESPAN); +} + +static int mlx5_ib_get_hw_stats(struct ib_device *ibdev, + struct rdma_hw_stats *stats, + u8 port, int index) +{ + struct mlx5_ib_dev *dev = to_mdev(ibdev); + int outlen = MLX5_ST_SZ_BYTES(query_q_counter_out); + void *out; + __be32 val; + int ret; + int i; + + if (!port || !stats) + return -ENOSYS; + + out = mlx5_vzalloc(outlen); + if (!out) + return -ENOMEM; + + ret = mlx5_core_query_q_counter(dev->mdev, + dev->port[port - 1].q_cnt_id, 0, + out, outlen); + if (ret) + goto free; + + for (i = 0; i < ARRAY_SIZE(names); i++) { + val = *(__be32 *)(out + stats_offsets[i]); + stats->value[i] = (u64)be32_to_cpu(val); + } +free: + kvfree(out); + return ARRAY_SIZE(names); +} + static void *mlx5_ib_add(struct mlx5_core_dev *mdev) { struct mlx5_ib_dev *dev; @@ -2320,10 +2665,15 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev) dev->mdev = mdev; + dev->port = kcalloc(MLX5_CAP_GEN(mdev, num_ports), sizeof(*dev->port), + GFP_KERNEL); + if (!dev->port) + goto err_dealloc; + rwlock_init(&dev->roce.netdev_lock); err = get_port_caps(dev); if (err) - goto err_dealloc; + goto err_free_port; if (mlx5_use_mad_ifc(dev)) get_ext_port_caps(dev); @@ -2418,6 +2768,7 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev) dev->ib_dev.map_mr_sg = mlx5_ib_map_mr_sg; dev->ib_dev.check_mr_status = mlx5_ib_check_mr_status; dev->ib_dev.get_port_immutable = mlx5_port_immutable; + dev->ib_dev.get_dev_fw_str = get_dev_fw_str; if (mlx5_core_is_pf(mdev)) { dev->ib_dev.get_vf_config = mlx5_ib_get_vf_config; dev->ib_dev.set_vf_link_state = mlx5_ib_set_vf_link_state; @@ -2425,6 +2776,8 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev) dev->ib_dev.set_vf_guid = mlx5_ib_set_vf_guid; } + dev->ib_dev.disassociate_ucontext = mlx5_ib_disassociate_ucontext; + mlx5_ib_internal_fill_odp_caps(dev); if (MLX5_CAP_GEN(mdev, imaicl)) { @@ -2435,6 +2788,12 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev) (1ull << IB_USER_VERBS_CMD_DEALLOC_MW); } + if (MLX5_CAP_GEN(dev->mdev, out_of_seq_cnt) && + MLX5_CAP_GEN(dev->mdev, retransmission_q_counters)) { + dev->ib_dev.get_hw_stats = mlx5_ib_get_hw_stats; + dev->ib_dev.alloc_hw_stats = mlx5_ib_alloc_hw_stats; + } + if (MLX5_CAP_GEN(mdev, xrc)) { dev->ib_dev.alloc_xrcd = mlx5_ib_alloc_xrcd; dev->ib_dev.dealloc_xrcd = mlx5_ib_dealloc_xrcd; @@ -2447,9 +2806,19 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev) IB_LINK_LAYER_ETHERNET) { dev->ib_dev.create_flow = mlx5_ib_create_flow; dev->ib_dev.destroy_flow = mlx5_ib_destroy_flow; + dev->ib_dev.create_wq = mlx5_ib_create_wq; + dev->ib_dev.modify_wq = mlx5_ib_modify_wq; + dev->ib_dev.destroy_wq = mlx5_ib_destroy_wq; + dev->ib_dev.create_rwq_ind_table = mlx5_ib_create_rwq_ind_table; + dev->ib_dev.destroy_rwq_ind_table = mlx5_ib_destroy_rwq_ind_table; dev->ib_dev.uverbs_ex_cmd_mask |= (1ull << IB_USER_VERBS_EX_CMD_CREATE_FLOW) | - (1ull << IB_USER_VERBS_EX_CMD_DESTROY_FLOW); + (1ull << IB_USER_VERBS_EX_CMD_DESTROY_FLOW) | + (1ull << IB_USER_VERBS_EX_CMD_CREATE_WQ) | + (1ull << IB_USER_VERBS_EX_CMD_MODIFY_WQ) | + (1ull << IB_USER_VERBS_EX_CMD_DESTROY_WQ) | + (1ull << IB_USER_VERBS_EX_CMD_CREATE_RWQ_IND_TBL) | + (1ull << IB_USER_VERBS_EX_CMD_DESTROY_RWQ_IND_TBL); } err = init_node_data(dev); if (err) @@ -2457,6 +2826,8 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev) mutex_init(&dev->flow_db.lock); mutex_init(&dev->cap_mask_mutex); + INIT_LIST_HEAD(&dev->qp_list); + spin_lock_init(&dev->reset_flow_resource_lock); if (ll == IB_LINK_LAYER_ETHERNET) { err = mlx5_enable_roce(dev); @@ -2472,10 +2843,14 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev) if (err) goto err_rsrc; - err = ib_register_device(&dev->ib_dev, NULL); + err = mlx5_ib_alloc_q_counters(dev); if (err) goto err_odp; + err = ib_register_device(&dev->ib_dev, NULL); + if (err) + goto err_q_cnt; + err = create_umr_res(dev); if (err) goto err_dev; @@ -2497,6 +2872,9 @@ err_umrc: err_dev: ib_unregister_device(&dev->ib_dev); +err_q_cnt: + mlx5_ib_dealloc_q_counters(dev); + err_odp: mlx5_ib_odp_remove_one(dev); @@ -2507,6 +2885,9 @@ err_disable_roce: if (ll == IB_LINK_LAYER_ETHERNET) mlx5_disable_roce(dev); +err_free_port: + kfree(dev->port); + err_dealloc: ib_dealloc_device((struct ib_device *)dev); @@ -2519,11 +2900,13 @@ static void mlx5_ib_remove(struct mlx5_core_dev *mdev, void *context) enum rdma_link_layer ll = mlx5_ib_port_link_layer(&dev->ib_dev, 1); ib_unregister_device(&dev->ib_dev); + mlx5_ib_dealloc_q_counters(dev); destroy_umrc_res(dev); mlx5_ib_odp_remove_one(dev); destroy_dev_resources(&dev->devr); if (ll == IB_LINK_LAYER_ETHERNET) mlx5_disable_roce(dev); + kfree(dev->port); ib_dealloc_device(&dev->ib_dev); } diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index c4a9825828bc..372385d0f993 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -105,6 +105,11 @@ enum { MLX5_CQE_VERSION_V1, }; +struct mlx5_ib_vma_private_data { + struct list_head list; + struct vm_area_struct *vma; +}; + struct mlx5_ib_ucontext { struct ib_ucontext ibucontext; struct list_head db_page_list; @@ -116,6 +121,7 @@ struct mlx5_ib_ucontext { u8 cqe_version; /* Transport Domain number */ u32 tdn; + struct list_head vma_private_list; }; static inline struct mlx5_ib_ucontext *to_mucontext(struct ib_ucontext *ibucontext) @@ -217,12 +223,41 @@ struct mlx5_ib_wq { void *qend; }; +struct mlx5_ib_rwq { + struct ib_wq ibwq; + u32 rqn; + u32 rq_num_pas; + u32 log_rq_stride; + u32 log_rq_size; + u32 rq_page_offset; + u32 log_page_size; + struct ib_umem *umem; + size_t buf_size; + unsigned int page_shift; + int create_type; + struct mlx5_db db; + u32 user_index; + u32 wqe_count; + u32 wqe_shift; + int wq_sig; +}; + enum { MLX5_QP_USER, MLX5_QP_KERNEL, MLX5_QP_EMPTY }; +enum { + MLX5_WQ_USER, + MLX5_WQ_KERNEL +}; + +struct mlx5_ib_rwq_ind_table { + struct ib_rwq_ind_table ib_rwq_ind_tbl; + u32 rqtn; +}; + /* * Connect-IB can trigger up to four concurrent pagefaults * per-QP. @@ -266,6 +301,10 @@ struct mlx5_ib_qp_trans { u8 resp_depth; }; +struct mlx5_ib_rss_qp { + u32 tirn; +}; + struct mlx5_ib_rq { struct mlx5_ib_qp_base base; struct mlx5_ib_wq *rq; @@ -294,6 +333,7 @@ struct mlx5_ib_qp { union { struct mlx5_ib_qp_trans trans_qp; struct mlx5_ib_raw_packet_qp raw_packet_qp; + struct mlx5_ib_rss_qp rss_qp; }; struct mlx5_buf buf; @@ -340,6 +380,9 @@ struct mlx5_ib_qp { spinlock_t disable_page_faults_lock; struct mlx5_ib_pfault pagefaults[MLX5_IB_PAGEFAULT_CONTEXTS]; #endif + struct list_head qps_list; + struct list_head cq_recv_list; + struct list_head cq_send_list; }; struct mlx5_ib_cq_buf { @@ -401,6 +444,8 @@ struct mlx5_ib_cq { struct mlx5_ib_cq_buf *resize_buf; struct ib_umem *resize_umem; int cqe_size; + struct list_head list_send_qp; + struct list_head list_recv_qp; u32 create_flags; struct list_head wc_list; enum ib_cq_notify_flags notify_flags; @@ -546,6 +591,10 @@ struct mlx5_ib_resources { struct mutex mutex; }; +struct mlx5_ib_port { + u16 q_cnt_id; +}; + struct mlx5_roce { /* Protect mlx5_ib_get_netdev from invoking dev_hold() with a NULL * netdev pointer @@ -581,6 +630,11 @@ struct mlx5_ib_dev { struct srcu_struct mr_srcu; #endif struct mlx5_ib_flow_db flow_db; + /* protect resources needed as part of reset flow */ + spinlock_t reset_flow_resource_lock; + struct list_head qp_list; + /* Array with num_ports elements */ + struct mlx5_ib_port *port; }; static inline struct mlx5_ib_cq *to_mibcq(struct mlx5_core_cq *mcq) @@ -628,6 +682,16 @@ static inline struct mlx5_ib_qp *to_mqp(struct ib_qp *ibqp) return container_of(ibqp, struct mlx5_ib_qp, ibqp); } +static inline struct mlx5_ib_rwq *to_mrwq(struct ib_wq *ibwq) +{ + return container_of(ibwq, struct mlx5_ib_rwq, ibwq); +} + +static inline struct mlx5_ib_rwq_ind_table *to_mrwq_ind_table(struct ib_rwq_ind_table *ib_rwq_ind_tbl) +{ + return container_of(ib_rwq_ind_tbl, struct mlx5_ib_rwq_ind_table, ib_rwq_ind_tbl); +} + static inline struct mlx5_ib_srq *to_mibsrq(struct mlx5_core_srq *msrq) { return container_of(msrq, struct mlx5_ib_srq, msrq); @@ -762,6 +826,16 @@ int mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev); int mlx5_mr_ib_cont_pages(struct ib_umem *umem, u64 addr, int *count, int *shift); int mlx5_ib_check_mr_status(struct ib_mr *ibmr, u32 check_mask, struct ib_mr_status *mr_status); +struct ib_wq *mlx5_ib_create_wq(struct ib_pd *pd, + struct ib_wq_init_attr *init_attr, + struct ib_udata *udata); +int mlx5_ib_destroy_wq(struct ib_wq *wq); +int mlx5_ib_modify_wq(struct ib_wq *wq, struct ib_wq_attr *wq_attr, + u32 wq_attr_mask, struct ib_udata *udata); +struct ib_rwq_ind_table *mlx5_ib_create_rwq_ind_table(struct ib_device *device, + struct ib_rwq_ind_table_init_attr *init_attr, + struct ib_udata *udata); +int mlx5_ib_destroy_rwq_ind_table(struct ib_rwq_ind_table *wq_ind_table); #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING extern struct workqueue_struct *mlx5_ib_page_fault_wq; diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 8cf2ce50511f..4b021305c321 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -1193,12 +1193,16 @@ error: static int unreg_umr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) { + struct mlx5_core_dev *mdev = dev->mdev; struct umr_common *umrc = &dev->umrc; struct mlx5_ib_umr_context umr_context; struct mlx5_umr_wr umrwr = {}; struct ib_send_wr *bad; int err; + if (mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) + return 0; + mlx5_ib_init_umr_context(&umr_context); umrwr.wr.wr_cqe = &umr_context.cqe; diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index ce0a7ab35a22..0dd7d93cac95 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -77,6 +77,10 @@ struct mlx5_wqe_eth_pad { u8 rsvd0[16]; }; +static void get_cqs(enum ib_qp_type qp_type, + struct ib_cq *ib_send_cq, struct ib_cq *ib_recv_cq, + struct mlx5_ib_cq **send_cq, struct mlx5_ib_cq **recv_cq); + static int is_qp0(enum ib_qp_type qp_type) { return qp_type == IB_QPT_SMI; @@ -609,6 +613,11 @@ static int to_mlx5_st(enum ib_qp_type type) } } +static void mlx5_ib_lock_cqs(struct mlx5_ib_cq *send_cq, + struct mlx5_ib_cq *recv_cq); +static void mlx5_ib_unlock_cqs(struct mlx5_ib_cq *send_cq, + struct mlx5_ib_cq *recv_cq); + static int uuarn_to_uar_index(struct mlx5_uuar_info *uuari, int uuarn) { return uuari->uars[uuarn / MLX5_BF_REGS_PER_PAGE].index; @@ -649,6 +658,71 @@ err_umem: return err; } +static void destroy_user_rq(struct ib_pd *pd, struct mlx5_ib_rwq *rwq) +{ + struct mlx5_ib_ucontext *context; + + context = to_mucontext(pd->uobject->context); + mlx5_ib_db_unmap_user(context, &rwq->db); + if (rwq->umem) + ib_umem_release(rwq->umem); +} + +static int create_user_rq(struct mlx5_ib_dev *dev, struct ib_pd *pd, + struct mlx5_ib_rwq *rwq, + struct mlx5_ib_create_wq *ucmd) +{ + struct mlx5_ib_ucontext *context; + int page_shift = 0; + int npages; + u32 offset = 0; + int ncont = 0; + int err; + + if (!ucmd->buf_addr) + return -EINVAL; + + context = to_mucontext(pd->uobject->context); + rwq->umem = ib_umem_get(pd->uobject->context, ucmd->buf_addr, + rwq->buf_size, 0, 0); + if (IS_ERR(rwq->umem)) { + mlx5_ib_dbg(dev, "umem_get failed\n"); + err = PTR_ERR(rwq->umem); + return err; + } + + mlx5_ib_cont_pages(rwq->umem, ucmd->buf_addr, &npages, &page_shift, + &ncont, NULL); + err = mlx5_ib_get_buf_offset(ucmd->buf_addr, page_shift, + &rwq->rq_page_offset); + if (err) { + mlx5_ib_warn(dev, "bad offset\n"); + goto err_umem; + } + + rwq->rq_num_pas = ncont; + rwq->page_shift = page_shift; + rwq->log_page_size = page_shift - MLX5_ADAPTER_PAGE_SHIFT; + rwq->wq_sig = !!(ucmd->flags & MLX5_WQ_FLAG_SIGNATURE); + + mlx5_ib_dbg(dev, "addr 0x%llx, size %zd, npages %d, page_shift %d, ncont %d, offset %d\n", + (unsigned long long)ucmd->buf_addr, rwq->buf_size, + npages, page_shift, ncont, offset); + + err = mlx5_ib_db_map_user(context, ucmd->db_addr, &rwq->db); + if (err) { + mlx5_ib_dbg(dev, "map failed\n"); + goto err_umem; + } + + rwq->create_type = MLX5_WQ_USER; + return 0; + +err_umem: + ib_umem_release(rwq->umem); + return err; +} + static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd, struct mlx5_ib_qp *qp, struct ib_udata *udata, struct ib_qp_init_attr *attr, @@ -1201,6 +1275,187 @@ static void raw_packet_qp_copy_info(struct mlx5_ib_qp *qp, rq->doorbell = &qp->db; } +static void destroy_rss_raw_qp_tir(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp) +{ + mlx5_core_destroy_tir(dev->mdev, qp->rss_qp.tirn); +} + +static int create_rss_raw_qp_tir(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, + struct ib_pd *pd, + struct ib_qp_init_attr *init_attr, + struct ib_udata *udata) +{ + struct ib_uobject *uobj = pd->uobject; + struct ib_ucontext *ucontext = uobj->context; + struct mlx5_ib_ucontext *mucontext = to_mucontext(ucontext); + struct mlx5_ib_create_qp_resp resp = {}; + int inlen; + int err; + u32 *in; + void *tirc; + void *hfso; + u32 selected_fields = 0; + size_t min_resp_len; + u32 tdn = mucontext->tdn; + struct mlx5_ib_create_qp_rss ucmd = {}; + size_t required_cmd_sz; + + if (init_attr->qp_type != IB_QPT_RAW_PACKET) + return -EOPNOTSUPP; + + if (init_attr->create_flags || init_attr->send_cq) + return -EINVAL; + + min_resp_len = offsetof(typeof(resp), uuar_index) + sizeof(resp.uuar_index); + if (udata->outlen < min_resp_len) + return -EINVAL; + + required_cmd_sz = offsetof(typeof(ucmd), reserved1) + sizeof(ucmd.reserved1); + if (udata->inlen < required_cmd_sz) { + mlx5_ib_dbg(dev, "invalid inlen\n"); + return -EINVAL; + } + + if (udata->inlen > sizeof(ucmd) && + !ib_is_udata_cleared(udata, sizeof(ucmd), + udata->inlen - sizeof(ucmd))) { + mlx5_ib_dbg(dev, "inlen is not supported\n"); + return -EOPNOTSUPP; + } + + if (ib_copy_from_udata(&ucmd, udata, min(sizeof(ucmd), udata->inlen))) { + mlx5_ib_dbg(dev, "copy failed\n"); + return -EFAULT; + } + + if (ucmd.comp_mask) { + mlx5_ib_dbg(dev, "invalid comp mask\n"); + return -EOPNOTSUPP; + } + + if (memchr_inv(ucmd.reserved, 0, sizeof(ucmd.reserved)) || ucmd.reserved1) { + mlx5_ib_dbg(dev, "invalid reserved\n"); + return -EOPNOTSUPP; + } + + err = ib_copy_to_udata(udata, &resp, min_resp_len); + if (err) { + mlx5_ib_dbg(dev, "copy failed\n"); + return -EINVAL; + } + + inlen = MLX5_ST_SZ_BYTES(create_tir_in); + in = mlx5_vzalloc(inlen); + if (!in) + return -ENOMEM; + + tirc = MLX5_ADDR_OF(create_tir_in, in, ctx); + MLX5_SET(tirc, tirc, disp_type, + MLX5_TIRC_DISP_TYPE_INDIRECT); + MLX5_SET(tirc, tirc, indirect_table, + init_attr->rwq_ind_tbl->ind_tbl_num); + MLX5_SET(tirc, tirc, transport_domain, tdn); + + hfso = MLX5_ADDR_OF(tirc, tirc, rx_hash_field_selector_outer); + switch (ucmd.rx_hash_function) { + case MLX5_RX_HASH_FUNC_TOEPLITZ: + { + void *rss_key = MLX5_ADDR_OF(tirc, tirc, rx_hash_toeplitz_key); + size_t len = MLX5_FLD_SZ_BYTES(tirc, rx_hash_toeplitz_key); + + if (len != ucmd.rx_key_len) { + err = -EINVAL; + goto err; + } + + MLX5_SET(tirc, tirc, rx_hash_fn, MLX5_RX_HASH_FN_TOEPLITZ); + MLX5_SET(tirc, tirc, rx_hash_symmetric, 1); + memcpy(rss_key, ucmd.rx_hash_key, len); + break; + } + default: + err = -EOPNOTSUPP; + goto err; + } + + if (!ucmd.rx_hash_fields_mask) { + /* special case when this TIR serves as steering entry without hashing */ + if (!init_attr->rwq_ind_tbl->log_ind_tbl_size) + goto create_tir; + err = -EINVAL; + goto err; + } + + if (((ucmd.rx_hash_fields_mask & MLX5_RX_HASH_SRC_IPV4) || + (ucmd.rx_hash_fields_mask & MLX5_RX_HASH_DST_IPV4)) && + ((ucmd.rx_hash_fields_mask & MLX5_RX_HASH_SRC_IPV6) || + (ucmd.rx_hash_fields_mask & MLX5_RX_HASH_DST_IPV6))) { + err = -EINVAL; + goto err; + } + + /* If none of IPV4 & IPV6 SRC/DST was set - this bit field is ignored */ + if ((ucmd.rx_hash_fields_mask & MLX5_RX_HASH_SRC_IPV4) || + (ucmd.rx_hash_fields_mask & MLX5_RX_HASH_DST_IPV4)) + MLX5_SET(rx_hash_field_select, hfso, l3_prot_type, + MLX5_L3_PROT_TYPE_IPV4); + else if ((ucmd.rx_hash_fields_mask & MLX5_RX_HASH_SRC_IPV6) || + (ucmd.rx_hash_fields_mask & MLX5_RX_HASH_DST_IPV6)) + MLX5_SET(rx_hash_field_select, hfso, l3_prot_type, + MLX5_L3_PROT_TYPE_IPV6); + + if (((ucmd.rx_hash_fields_mask & MLX5_RX_HASH_SRC_PORT_TCP) || + (ucmd.rx_hash_fields_mask & MLX5_RX_HASH_DST_PORT_TCP)) && + ((ucmd.rx_hash_fields_mask & MLX5_RX_HASH_SRC_PORT_UDP) || + (ucmd.rx_hash_fields_mask & MLX5_RX_HASH_DST_PORT_UDP))) { + err = -EINVAL; + goto err; + } + + /* If none of TCP & UDP SRC/DST was set - this bit field is ignored */ + if ((ucmd.rx_hash_fields_mask & MLX5_RX_HASH_SRC_PORT_TCP) || + (ucmd.rx_hash_fields_mask & MLX5_RX_HASH_DST_PORT_TCP)) + MLX5_SET(rx_hash_field_select, hfso, l4_prot_type, + MLX5_L4_PROT_TYPE_TCP); + else if ((ucmd.rx_hash_fields_mask & MLX5_RX_HASH_SRC_PORT_UDP) || + (ucmd.rx_hash_fields_mask & MLX5_RX_HASH_DST_PORT_UDP)) + MLX5_SET(rx_hash_field_select, hfso, l4_prot_type, + MLX5_L4_PROT_TYPE_UDP); + + if ((ucmd.rx_hash_fields_mask & MLX5_RX_HASH_SRC_IPV4) || + (ucmd.rx_hash_fields_mask & MLX5_RX_HASH_SRC_IPV6)) + selected_fields |= MLX5_HASH_FIELD_SEL_SRC_IP; + + if ((ucmd.rx_hash_fields_mask & MLX5_RX_HASH_DST_IPV4) || + (ucmd.rx_hash_fields_mask & MLX5_RX_HASH_DST_IPV6)) + selected_fields |= MLX5_HASH_FIELD_SEL_DST_IP; + + if ((ucmd.rx_hash_fields_mask & MLX5_RX_HASH_SRC_PORT_TCP) || + (ucmd.rx_hash_fields_mask & MLX5_RX_HASH_SRC_PORT_UDP)) + selected_fields |= MLX5_HASH_FIELD_SEL_L4_SPORT; + + if ((ucmd.rx_hash_fields_mask & MLX5_RX_HASH_DST_PORT_TCP) || + (ucmd.rx_hash_fields_mask & MLX5_RX_HASH_DST_PORT_UDP)) + selected_fields |= MLX5_HASH_FIELD_SEL_L4_DPORT; + + MLX5_SET(rx_hash_field_select, hfso, selected_fields, selected_fields); + +create_tir: + err = mlx5_core_create_tir(dev->mdev, in, inlen, &qp->rss_qp.tirn); + + if (err) + goto err; + + kvfree(in); + /* qpn is reserved for that QP */ + qp->trans_qp.base.mqp.qpn = 0; + return 0; + +err: + kvfree(in); + return err; +} + static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd, struct ib_qp_init_attr *init_attr, struct ib_udata *udata, struct mlx5_ib_qp *qp) @@ -1211,6 +1466,9 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd, struct mlx5_ib_create_qp_resp resp; struct mlx5_create_qp_mbox_in *in; struct mlx5_ib_create_qp ucmd; + struct mlx5_ib_cq *send_cq; + struct mlx5_ib_cq *recv_cq; + unsigned long flags; int inlen = sizeof(*in); int err; u32 uidx = MLX5_IB_DEFAULT_UIDX; @@ -1227,6 +1485,14 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd, spin_lock_init(&qp->sq.lock); spin_lock_init(&qp->rq.lock); + if (init_attr->rwq_ind_tbl) { + if (!udata) + return -ENOSYS; + + err = create_rss_raw_qp_tir(dev, qp, pd, init_attr, udata); + return err; + } + if (init_attr->create_flags & IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK) { if (!MLX5_CAP_GEN(mdev, block_lb_mc)) { mlx5_ib_dbg(dev, "block multicast loopback isn't supported\n"); @@ -1460,6 +1726,23 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd, base->container_mibqp = qp; base->mqp.event = mlx5_ib_qp_event; + get_cqs(init_attr->qp_type, init_attr->send_cq, init_attr->recv_cq, + &send_cq, &recv_cq); + spin_lock_irqsave(&dev->reset_flow_resource_lock, flags); + mlx5_ib_lock_cqs(send_cq, recv_cq); + /* Maintain device to QPs access, needed for further handling via reset + * flow + */ + list_add_tail(&qp->qps_list, &dev->qp_list); + /* Maintain CQ to QPs access, needed for further handling via reset flow + */ + if (send_cq) + list_add_tail(&qp->cq_send_list, &send_cq->list_send_qp); + if (recv_cq) + list_add_tail(&qp->cq_recv_list, &recv_cq->list_recv_qp); + mlx5_ib_unlock_cqs(send_cq, recv_cq); + spin_unlock_irqrestore(&dev->reset_flow_resource_lock, flags); + return 0; err_create: @@ -1478,23 +1761,23 @@ static void mlx5_ib_lock_cqs(struct mlx5_ib_cq *send_cq, struct mlx5_ib_cq *recv if (send_cq) { if (recv_cq) { if (send_cq->mcq.cqn < recv_cq->mcq.cqn) { - spin_lock_irq(&send_cq->lock); + spin_lock(&send_cq->lock); spin_lock_nested(&recv_cq->lock, SINGLE_DEPTH_NESTING); } else if (send_cq->mcq.cqn == recv_cq->mcq.cqn) { - spin_lock_irq(&send_cq->lock); + spin_lock(&send_cq->lock); __acquire(&recv_cq->lock); } else { - spin_lock_irq(&recv_cq->lock); + spin_lock(&recv_cq->lock); spin_lock_nested(&send_cq->lock, SINGLE_DEPTH_NESTING); } } else { - spin_lock_irq(&send_cq->lock); + spin_lock(&send_cq->lock); __acquire(&recv_cq->lock); } } else if (recv_cq) { - spin_lock_irq(&recv_cq->lock); + spin_lock(&recv_cq->lock); __acquire(&send_cq->lock); } else { __acquire(&send_cq->lock); @@ -1509,21 +1792,21 @@ static void mlx5_ib_unlock_cqs(struct mlx5_ib_cq *send_cq, struct mlx5_ib_cq *re if (recv_cq) { if (send_cq->mcq.cqn < recv_cq->mcq.cqn) { spin_unlock(&recv_cq->lock); - spin_unlock_irq(&send_cq->lock); + spin_unlock(&send_cq->lock); } else if (send_cq->mcq.cqn == recv_cq->mcq.cqn) { __release(&recv_cq->lock); - spin_unlock_irq(&send_cq->lock); + spin_unlock(&send_cq->lock); } else { spin_unlock(&send_cq->lock); - spin_unlock_irq(&recv_cq->lock); + spin_unlock(&recv_cq->lock); } } else { __release(&recv_cq->lock); - spin_unlock_irq(&send_cq->lock); + spin_unlock(&send_cq->lock); } } else if (recv_cq) { __release(&send_cq->lock); - spin_unlock_irq(&recv_cq->lock); + spin_unlock(&recv_cq->lock); } else { __release(&recv_cq->lock); __release(&send_cq->lock); @@ -1535,17 +1818,18 @@ static struct mlx5_ib_pd *get_pd(struct mlx5_ib_qp *qp) return to_mpd(qp->ibqp.pd); } -static void get_cqs(struct mlx5_ib_qp *qp, +static void get_cqs(enum ib_qp_type qp_type, + struct ib_cq *ib_send_cq, struct ib_cq *ib_recv_cq, struct mlx5_ib_cq **send_cq, struct mlx5_ib_cq **recv_cq) { - switch (qp->ibqp.qp_type) { + switch (qp_type) { case IB_QPT_XRC_TGT: *send_cq = NULL; *recv_cq = NULL; break; case MLX5_IB_QPT_REG_UMR: case IB_QPT_XRC_INI: - *send_cq = to_mcq(qp->ibqp.send_cq); + *send_cq = ib_send_cq ? to_mcq(ib_send_cq) : NULL; *recv_cq = NULL; break; @@ -1557,8 +1841,8 @@ static void get_cqs(struct mlx5_ib_qp *qp, case IB_QPT_RAW_IPV6: case IB_QPT_RAW_ETHERTYPE: case IB_QPT_RAW_PACKET: - *send_cq = to_mcq(qp->ibqp.send_cq); - *recv_cq = to_mcq(qp->ibqp.recv_cq); + *send_cq = ib_send_cq ? to_mcq(ib_send_cq) : NULL; + *recv_cq = ib_recv_cq ? to_mcq(ib_recv_cq) : NULL; break; case IB_QPT_MAX: @@ -1577,8 +1861,14 @@ static void destroy_qp_common(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp) struct mlx5_ib_cq *send_cq, *recv_cq; struct mlx5_ib_qp_base *base = &qp->trans_qp.base; struct mlx5_modify_qp_mbox_in *in; + unsigned long flags; int err; + if (qp->ibqp.rwq_ind_tbl) { + destroy_rss_raw_qp_tir(dev, qp); + return; + } + base = qp->ibqp.qp_type == IB_QPT_RAW_PACKET ? &qp->raw_packet_qp.rq.base : &qp->trans_qp.base; @@ -1602,17 +1892,28 @@ static void destroy_qp_common(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp) base->mqp.qpn); } - get_cqs(qp, &send_cq, &recv_cq); + get_cqs(qp->ibqp.qp_type, qp->ibqp.send_cq, qp->ibqp.recv_cq, + &send_cq, &recv_cq); + + spin_lock_irqsave(&dev->reset_flow_resource_lock, flags); + mlx5_ib_lock_cqs(send_cq, recv_cq); + /* del from lists under both locks above to protect reset flow paths */ + list_del(&qp->qps_list); + if (send_cq) + list_del(&qp->cq_send_list); + + if (recv_cq) + list_del(&qp->cq_recv_list); if (qp->create_type == MLX5_QP_KERNEL) { - mlx5_ib_lock_cqs(send_cq, recv_cq); __mlx5_ib_cq_clean(recv_cq, base->mqp.qpn, qp->ibqp.srq ? to_msrq(qp->ibqp.srq) : NULL); if (send_cq != recv_cq) __mlx5_ib_cq_clean(send_cq, base->mqp.qpn, NULL); - mlx5_ib_unlock_cqs(send_cq, recv_cq); } + mlx5_ib_unlock_cqs(send_cq, recv_cq); + spin_unlock_irqrestore(&dev->reset_flow_resource_lock, flags); if (qp->ibqp.qp_type == IB_QPT_RAW_PACKET) { destroy_raw_packet_qp(dev, qp); @@ -2300,7 +2601,8 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp, } pd = get_pd(qp); - get_cqs(qp, &send_cq, &recv_cq); + get_cqs(qp->ibqp.qp_type, qp->ibqp.send_cq, qp->ibqp.recv_cq, + &send_cq, &recv_cq); context->flags_pd = cpu_to_be32(pd ? pd->pdn : to_mpd(dev->devr.p0)->pdn); context->cqn_send = send_cq ? cpu_to_be32(send_cq->mcq.cqn) : 0; @@ -2349,6 +2651,15 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp, else sqd_event = 0; + if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) { + u8 port_num = (attr_mask & IB_QP_PORT ? attr->port_num : + qp->port) - 1; + struct mlx5_ib_port *mibport = &dev->port[port_num]; + + context->qp_counter_set_usr_page |= + cpu_to_be32((u32)(mibport->q_cnt_id) << 24); + } + if (!ibqp->uobject && cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) context->sq_crq_size |= cpu_to_be16(1 << 4); @@ -2439,6 +2750,9 @@ int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int port; enum rdma_link_layer ll = IB_LINK_LAYER_UNSPECIFIED; + if (ibqp->rwq_ind_tbl) + return -ENOSYS; + if (unlikely(ibqp->qp_type == IB_QPT_GSI)) return mlx5_ib_gsi_modify_qp(ibqp, attr, attr_mask); @@ -3397,6 +3711,7 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, { struct mlx5_wqe_ctrl_seg *ctrl = NULL; /* compiler warning */ struct mlx5_ib_dev *dev = to_mdev(ibqp->device); + struct mlx5_core_dev *mdev = dev->mdev; struct mlx5_ib_qp *qp; struct mlx5_ib_mr *mr; struct mlx5_wqe_data_seg *dpseg; @@ -3424,6 +3739,13 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, spin_lock_irqsave(&qp->sq.lock, flags); + if (mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) { + err = -EIO; + *bad_wr = wr; + nreq = 0; + goto out; + } + for (nreq = 0; wr; nreq++, wr = wr->next) { if (unlikely(wr->opcode >= ARRAY_SIZE(mlx5_ib_opcode))) { mlx5_ib_warn(dev, "\n"); @@ -3725,6 +4047,8 @@ int mlx5_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, struct mlx5_ib_qp *qp = to_mqp(ibqp); struct mlx5_wqe_data_seg *scat; struct mlx5_rwqe_sig *sig; + struct mlx5_ib_dev *dev = to_mdev(ibqp->device); + struct mlx5_core_dev *mdev = dev->mdev; unsigned long flags; int err = 0; int nreq; @@ -3736,6 +4060,13 @@ int mlx5_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, spin_lock_irqsave(&qp->rq.lock, flags); + if (mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) { + err = -EIO; + *bad_wr = wr; + nreq = 0; + goto out; + } + ind = qp->rq.head & (qp->rq.wqe_cnt - 1); for (nreq = 0; wr; nreq++, wr = wr->next) { @@ -4055,6 +4386,9 @@ int mlx5_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int err = 0; u8 raw_packet_qp_state; + if (ibqp->rwq_ind_tbl) + return -ENOSYS; + if (unlikely(ibqp->qp_type == IB_QPT_GSI)) return mlx5_ib_gsi_query_qp(ibqp, qp_attr, qp_attr_mask, qp_init_attr); @@ -4164,3 +4498,322 @@ int mlx5_ib_dealloc_xrcd(struct ib_xrcd *xrcd) return 0; } + +static int create_rq(struct mlx5_ib_rwq *rwq, struct ib_pd *pd, + struct ib_wq_init_attr *init_attr) +{ + struct mlx5_ib_dev *dev; + __be64 *rq_pas0; + void *in; + void *rqc; + void *wq; + int inlen; + int err; + + dev = to_mdev(pd->device); + + inlen = MLX5_ST_SZ_BYTES(create_rq_in) + sizeof(u64) * rwq->rq_num_pas; + in = mlx5_vzalloc(inlen); + if (!in) + return -ENOMEM; + + rqc = MLX5_ADDR_OF(create_rq_in, in, ctx); + MLX5_SET(rqc, rqc, mem_rq_type, + MLX5_RQC_MEM_RQ_TYPE_MEMORY_RQ_INLINE); + MLX5_SET(rqc, rqc, user_index, rwq->user_index); + MLX5_SET(rqc, rqc, cqn, to_mcq(init_attr->cq)->mcq.cqn); + MLX5_SET(rqc, rqc, state, MLX5_RQC_STATE_RST); + MLX5_SET(rqc, rqc, flush_in_error_en, 1); + wq = MLX5_ADDR_OF(rqc, rqc, wq); + MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_CYCLIC); + MLX5_SET(wq, wq, end_padding_mode, MLX5_WQ_END_PAD_MODE_ALIGN); + MLX5_SET(wq, wq, log_wq_stride, rwq->log_rq_stride); + MLX5_SET(wq, wq, log_wq_sz, rwq->log_rq_size); + MLX5_SET(wq, wq, pd, to_mpd(pd)->pdn); + MLX5_SET(wq, wq, page_offset, rwq->rq_page_offset); + MLX5_SET(wq, wq, log_wq_pg_sz, rwq->log_page_size); + MLX5_SET(wq, wq, wq_signature, rwq->wq_sig); + MLX5_SET64(wq, wq, dbr_addr, rwq->db.dma); + rq_pas0 = (__be64 *)MLX5_ADDR_OF(wq, wq, pas); + mlx5_ib_populate_pas(dev, rwq->umem, rwq->page_shift, rq_pas0, 0); + err = mlx5_core_create_rq(dev->mdev, in, inlen, &rwq->rqn); + kvfree(in); + return err; +} + +static int set_user_rq_size(struct mlx5_ib_dev *dev, + struct ib_wq_init_attr *wq_init_attr, + struct mlx5_ib_create_wq *ucmd, + struct mlx5_ib_rwq *rwq) +{ + /* Sanity check RQ size before proceeding */ + if (wq_init_attr->max_wr > (1 << MLX5_CAP_GEN(dev->mdev, log_max_wq_sz))) + return -EINVAL; + + if (!ucmd->rq_wqe_count) + return -EINVAL; + + rwq->wqe_count = ucmd->rq_wqe_count; + rwq->wqe_shift = ucmd->rq_wqe_shift; + rwq->buf_size = (rwq->wqe_count << rwq->wqe_shift); + rwq->log_rq_stride = rwq->wqe_shift; + rwq->log_rq_size = ilog2(rwq->wqe_count); + return 0; +} + +static int prepare_user_rq(struct ib_pd *pd, + struct ib_wq_init_attr *init_attr, + struct ib_udata *udata, + struct mlx5_ib_rwq *rwq) +{ + struct mlx5_ib_dev *dev = to_mdev(pd->device); + struct mlx5_ib_create_wq ucmd = {}; + int err; + size_t required_cmd_sz; + + required_cmd_sz = offsetof(typeof(ucmd), reserved) + sizeof(ucmd.reserved); + if (udata->inlen < required_cmd_sz) { + mlx5_ib_dbg(dev, "invalid inlen\n"); + return -EINVAL; + } + + if (udata->inlen > sizeof(ucmd) && + !ib_is_udata_cleared(udata, sizeof(ucmd), + udata->inlen - sizeof(ucmd))) { + mlx5_ib_dbg(dev, "inlen is not supported\n"); + return -EOPNOTSUPP; + } + + if (ib_copy_from_udata(&ucmd, udata, min(sizeof(ucmd), udata->inlen))) { + mlx5_ib_dbg(dev, "copy failed\n"); + return -EFAULT; + } + + if (ucmd.comp_mask) { + mlx5_ib_dbg(dev, "invalid comp mask\n"); + return -EOPNOTSUPP; + } + + if (ucmd.reserved) { + mlx5_ib_dbg(dev, "invalid reserved\n"); + return -EOPNOTSUPP; + } + + err = set_user_rq_size(dev, init_attr, &ucmd, rwq); + if (err) { + mlx5_ib_dbg(dev, "err %d\n", err); + return err; + } + + err = create_user_rq(dev, pd, rwq, &ucmd); + if (err) { + mlx5_ib_dbg(dev, "err %d\n", err); + if (err) + return err; + } + + rwq->user_index = ucmd.user_index; + return 0; +} + +struct ib_wq *mlx5_ib_create_wq(struct ib_pd *pd, + struct ib_wq_init_attr *init_attr, + struct ib_udata *udata) +{ + struct mlx5_ib_dev *dev; + struct mlx5_ib_rwq *rwq; + struct mlx5_ib_create_wq_resp resp = {}; + size_t min_resp_len; + int err; + + if (!udata) + return ERR_PTR(-ENOSYS); + + min_resp_len = offsetof(typeof(resp), reserved) + sizeof(resp.reserved); + if (udata->outlen && udata->outlen < min_resp_len) + return ERR_PTR(-EINVAL); + + dev = to_mdev(pd->device); + switch (init_attr->wq_type) { + case IB_WQT_RQ: + rwq = kzalloc(sizeof(*rwq), GFP_KERNEL); + if (!rwq) + return ERR_PTR(-ENOMEM); + err = prepare_user_rq(pd, init_attr, udata, rwq); + if (err) + goto err; + err = create_rq(rwq, pd, init_attr); + if (err) + goto err_user_rq; + break; + default: + mlx5_ib_dbg(dev, "unsupported wq type %d\n", + init_attr->wq_type); + return ERR_PTR(-EINVAL); + } + + rwq->ibwq.wq_num = rwq->rqn; + rwq->ibwq.state = IB_WQS_RESET; + if (udata->outlen) { + resp.response_length = offsetof(typeof(resp), response_length) + + sizeof(resp.response_length); + err = ib_copy_to_udata(udata, &resp, resp.response_length); + if (err) + goto err_copy; + } + + return &rwq->ibwq; + +err_copy: + mlx5_core_destroy_rq(dev->mdev, rwq->rqn); +err_user_rq: + destroy_user_rq(pd, rwq); +err: + kfree(rwq); + return ERR_PTR(err); +} + +int mlx5_ib_destroy_wq(struct ib_wq *wq) +{ + struct mlx5_ib_dev *dev = to_mdev(wq->device); + struct mlx5_ib_rwq *rwq = to_mrwq(wq); + + mlx5_core_destroy_rq(dev->mdev, rwq->rqn); + destroy_user_rq(wq->pd, rwq); + kfree(rwq); + + return 0; +} + +struct ib_rwq_ind_table *mlx5_ib_create_rwq_ind_table(struct ib_device *device, + struct ib_rwq_ind_table_init_attr *init_attr, + struct ib_udata *udata) +{ + struct mlx5_ib_dev *dev = to_mdev(device); + struct mlx5_ib_rwq_ind_table *rwq_ind_tbl; + int sz = 1 << init_attr->log_ind_tbl_size; + struct mlx5_ib_create_rwq_ind_tbl_resp resp = {}; + size_t min_resp_len; + int inlen; + int err; + int i; + u32 *in; + void *rqtc; + + if (udata->inlen > 0 && + !ib_is_udata_cleared(udata, 0, + udata->inlen)) + return ERR_PTR(-EOPNOTSUPP); + + min_resp_len = offsetof(typeof(resp), reserved) + sizeof(resp.reserved); + if (udata->outlen && udata->outlen < min_resp_len) + return ERR_PTR(-EINVAL); + + rwq_ind_tbl = kzalloc(sizeof(*rwq_ind_tbl), GFP_KERNEL); + if (!rwq_ind_tbl) + return ERR_PTR(-ENOMEM); + + inlen = MLX5_ST_SZ_BYTES(create_rqt_in) + sizeof(u32) * sz; + in = mlx5_vzalloc(inlen); + if (!in) { + err = -ENOMEM; + goto err; + } + + rqtc = MLX5_ADDR_OF(create_rqt_in, in, rqt_context); + + MLX5_SET(rqtc, rqtc, rqt_actual_size, sz); + MLX5_SET(rqtc, rqtc, rqt_max_size, sz); + + for (i = 0; i < sz; i++) + MLX5_SET(rqtc, rqtc, rq_num[i], init_attr->ind_tbl[i]->wq_num); + + err = mlx5_core_create_rqt(dev->mdev, in, inlen, &rwq_ind_tbl->rqtn); + kvfree(in); + + if (err) + goto err; + + rwq_ind_tbl->ib_rwq_ind_tbl.ind_tbl_num = rwq_ind_tbl->rqtn; + if (udata->outlen) { + resp.response_length = offsetof(typeof(resp), response_length) + + sizeof(resp.response_length); + err = ib_copy_to_udata(udata, &resp, resp.response_length); + if (err) + goto err_copy; + } + + return &rwq_ind_tbl->ib_rwq_ind_tbl; + +err_copy: + mlx5_core_destroy_rqt(dev->mdev, rwq_ind_tbl->rqtn); +err: + kfree(rwq_ind_tbl); + return ERR_PTR(err); +} + +int mlx5_ib_destroy_rwq_ind_table(struct ib_rwq_ind_table *ib_rwq_ind_tbl) +{ + struct mlx5_ib_rwq_ind_table *rwq_ind_tbl = to_mrwq_ind_table(ib_rwq_ind_tbl); + struct mlx5_ib_dev *dev = to_mdev(ib_rwq_ind_tbl->device); + + mlx5_core_destroy_rqt(dev->mdev, rwq_ind_tbl->rqtn); + + kfree(rwq_ind_tbl); + return 0; +} + +int mlx5_ib_modify_wq(struct ib_wq *wq, struct ib_wq_attr *wq_attr, + u32 wq_attr_mask, struct ib_udata *udata) +{ + struct mlx5_ib_dev *dev = to_mdev(wq->device); + struct mlx5_ib_rwq *rwq = to_mrwq(wq); + struct mlx5_ib_modify_wq ucmd = {}; + size_t required_cmd_sz; + int curr_wq_state; + int wq_state; + int inlen; + int err; + void *rqc; + void *in; + + required_cmd_sz = offsetof(typeof(ucmd), reserved) + sizeof(ucmd.reserved); + if (udata->inlen < required_cmd_sz) + return -EINVAL; + + if (udata->inlen > sizeof(ucmd) && + !ib_is_udata_cleared(udata, sizeof(ucmd), + udata->inlen - sizeof(ucmd))) + return -EOPNOTSUPP; + + if (ib_copy_from_udata(&ucmd, udata, min(sizeof(ucmd), udata->inlen))) + return -EFAULT; + + if (ucmd.comp_mask || ucmd.reserved) + return -EOPNOTSUPP; + + inlen = MLX5_ST_SZ_BYTES(modify_rq_in); + in = mlx5_vzalloc(inlen); + if (!in) + return -ENOMEM; + + rqc = MLX5_ADDR_OF(modify_rq_in, in, ctx); + + curr_wq_state = (wq_attr_mask & IB_WQ_CUR_STATE) ? + wq_attr->curr_wq_state : wq->state; + wq_state = (wq_attr_mask & IB_WQ_STATE) ? + wq_attr->wq_state : curr_wq_state; + if (curr_wq_state == IB_WQS_ERR) + curr_wq_state = MLX5_RQC_STATE_ERR; + if (wq_state == IB_WQS_ERR) + wq_state = MLX5_RQC_STATE_ERR; + MLX5_SET(modify_rq_in, in, rq_state, curr_wq_state); + MLX5_SET(rqc, rqc, state, wq_state); + + err = mlx5_core_modify_rq(dev->mdev, rwq->rqn, in, inlen); + kvfree(in); + if (!err) + rwq->ibwq.state = (wq_state == MLX5_RQC_STATE_ERR) ? IB_WQS_ERR : wq_state; + + return err; +} diff --git a/drivers/infiniband/hw/mlx5/srq.c b/drivers/infiniband/hw/mlx5/srq.c index 3b2ddd64a371..ed6ac52355f1 100644 --- a/drivers/infiniband/hw/mlx5/srq.c +++ b/drivers/infiniband/hw/mlx5/srq.c @@ -74,14 +74,12 @@ static void mlx5_ib_srq_event(struct mlx5_core_srq *srq, enum mlx5_event type) } static int create_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq, - struct mlx5_create_srq_mbox_in **in, - struct ib_udata *udata, int buf_size, int *inlen, - int is_xrc) + struct mlx5_srq_attr *in, + struct ib_udata *udata, int buf_size) { struct mlx5_ib_dev *dev = to_mdev(pd->device); struct mlx5_ib_create_srq ucmd = {}; size_t ucmdlen; - void *xsrqc; int err; int npages; int page_shift; @@ -104,7 +102,7 @@ static int create_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq, udata->inlen - sizeof(ucmd))) return -EINVAL; - if (is_xrc) { + if (in->type == IB_SRQT_XRC) { err = get_srq_user_index(to_mucontext(pd->uobject->context), &ucmd, udata->inlen, &uidx); if (err) @@ -130,14 +128,13 @@ static int create_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq, goto err_umem; } - *inlen = sizeof(**in) + sizeof(*(*in)->pas) * ncont; - *in = mlx5_vzalloc(*inlen); - if (!(*in)) { + in->pas = mlx5_vzalloc(sizeof(*in->pas) * ncont); + if (!in->pas) { err = -ENOMEM; goto err_umem; } - mlx5_ib_populate_pas(dev, srq->umem, page_shift, (*in)->pas, 0); + mlx5_ib_populate_pas(dev, srq->umem, page_shift, in->pas, 0); err = mlx5_ib_db_map_user(to_mucontext(pd->uobject->context), ucmd.db_addr, &srq->db); @@ -146,20 +143,16 @@ static int create_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq, goto err_in; } - (*in)->ctx.log_pg_sz = page_shift - MLX5_ADAPTER_PAGE_SHIFT; - (*in)->ctx.pgoff_cqn = cpu_to_be32(offset << 26); - - if ((MLX5_CAP_GEN(dev->mdev, cqe_version) == MLX5_CQE_VERSION_V1) && - is_xrc){ - xsrqc = MLX5_ADDR_OF(create_xrc_srq_in, *in, - xrc_srq_context_entry); - MLX5_SET(xrc_srqc, xsrqc, user_index, uidx); - } + in->log_page_size = page_shift - MLX5_ADAPTER_PAGE_SHIFT; + in->page_offset = offset; + if (MLX5_CAP_GEN(dev->mdev, cqe_version) == MLX5_CQE_VERSION_V1 && + in->type == IB_SRQT_XRC) + in->user_index = uidx; return 0; err_in: - kvfree(*in); + kvfree(in->pas); err_umem: ib_umem_release(srq->umem); @@ -168,15 +161,13 @@ err_umem: } static int create_srq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_srq *srq, - struct mlx5_create_srq_mbox_in **in, int buf_size, - int *inlen, int is_xrc) + struct mlx5_srq_attr *in, int buf_size) { int err; int i; struct mlx5_wqe_srq_next_seg *next; int page_shift; int npages; - void *xsrqc; err = mlx5_db_alloc(dev->mdev, &srq->db); if (err) { @@ -204,13 +195,12 @@ static int create_srq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_srq *srq, npages = DIV_ROUND_UP(srq->buf.npages, 1 << (page_shift - PAGE_SHIFT)); mlx5_ib_dbg(dev, "buf_size %d, page_shift %d, npages %d, calc npages %d\n", buf_size, page_shift, srq->buf.npages, npages); - *inlen = sizeof(**in) + sizeof(*(*in)->pas) * npages; - *in = mlx5_vzalloc(*inlen); - if (!*in) { + in->pas = mlx5_vzalloc(sizeof(*in->pas) * npages); + if (!in->pas) { err = -ENOMEM; goto err_buf; } - mlx5_fill_page_array(&srq->buf, (*in)->pas); + mlx5_fill_page_array(&srq->buf, in->pas); srq->wrid = kmalloc(srq->msrq.max * sizeof(u64), GFP_KERNEL); if (!srq->wrid) { @@ -221,20 +211,15 @@ static int create_srq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_srq *srq, } srq->wq_sig = !!srq_signature; - (*in)->ctx.log_pg_sz = page_shift - MLX5_ADAPTER_PAGE_SHIFT; - - if ((MLX5_CAP_GEN(dev->mdev, cqe_version) == MLX5_CQE_VERSION_V1) && - is_xrc){ - xsrqc = MLX5_ADDR_OF(create_xrc_srq_in, *in, - xrc_srq_context_entry); - /* 0xffffff means we ask to work with cqe version 0 */ - MLX5_SET(xrc_srqc, xsrqc, user_index, MLX5_IB_DEFAULT_UIDX); - } + in->log_page_size = page_shift - MLX5_ADAPTER_PAGE_SHIFT; + if (MLX5_CAP_GEN(dev->mdev, cqe_version) == MLX5_CQE_VERSION_V1 && + in->type == IB_SRQT_XRC) + in->user_index = MLX5_IB_DEFAULT_UIDX; return 0; err_in: - kvfree(*in); + kvfree(in->pas); err_buf: mlx5_buf_free(dev->mdev, &srq->buf); @@ -267,10 +252,7 @@ struct ib_srq *mlx5_ib_create_srq(struct ib_pd *pd, int desc_size; int buf_size; int err; - struct mlx5_create_srq_mbox_in *uninitialized_var(in); - int uninitialized_var(inlen); - int is_xrc; - u32 flgs, xrcdn; + struct mlx5_srq_attr in = {0}; __u32 max_srq_wqes = 1 << MLX5_CAP_GEN(dev->mdev, log_max_srq_sz); /* Sanity check SRQ size before proceeding */ @@ -302,14 +284,10 @@ struct ib_srq *mlx5_ib_create_srq(struct ib_pd *pd, desc_size, init_attr->attr.max_wr, srq->msrq.max, srq->msrq.max_gs, srq->msrq.max_avail_gather); - is_xrc = (init_attr->srq_type == IB_SRQT_XRC); - if (pd->uobject) - err = create_srq_user(pd, srq, &in, udata, buf_size, &inlen, - is_xrc); + err = create_srq_user(pd, srq, &in, udata, buf_size); else - err = create_srq_kernel(dev, srq, &in, buf_size, &inlen, - is_xrc); + err = create_srq_kernel(dev, srq, &in, buf_size); if (err) { mlx5_ib_warn(dev, "create srq %s failed, err %d\n", @@ -317,23 +295,23 @@ struct ib_srq *mlx5_ib_create_srq(struct ib_pd *pd, goto err_srq; } - in->ctx.state_log_sz = ilog2(srq->msrq.max); - flgs = ((srq->msrq.wqe_shift - 4) | (is_xrc << 5) | (srq->wq_sig << 7)) << 24; - xrcdn = 0; - if (is_xrc) { - xrcdn = to_mxrcd(init_attr->ext.xrc.xrcd)->xrcdn; - in->ctx.pgoff_cqn |= cpu_to_be32(to_mcq(init_attr->ext.xrc.cq)->mcq.cqn); + in.type = init_attr->srq_type; + in.log_size = ilog2(srq->msrq.max); + in.wqe_shift = srq->msrq.wqe_shift - 4; + if (srq->wq_sig) + in.flags |= MLX5_SRQ_FLAG_WQ_SIG; + if (init_attr->srq_type == IB_SRQT_XRC) { + in.xrcd = to_mxrcd(init_attr->ext.xrc.xrcd)->xrcdn; + in.cqn = to_mcq(init_attr->ext.xrc.cq)->mcq.cqn; } else if (init_attr->srq_type == IB_SRQT_BASIC) { - xrcdn = to_mxrcd(dev->devr.x0)->xrcdn; - in->ctx.pgoff_cqn |= cpu_to_be32(to_mcq(dev->devr.c0)->mcq.cqn); + in.xrcd = to_mxrcd(dev->devr.x0)->xrcdn; + in.cqn = to_mcq(dev->devr.c0)->mcq.cqn; } - in->ctx.flags_xrcd = cpu_to_be32((flgs & 0xFF000000) | (xrcdn & 0xFFFFFF)); - - in->ctx.pd = cpu_to_be32(to_mpd(pd)->pdn); - in->ctx.db_record = cpu_to_be64(srq->db.dma); - err = mlx5_core_create_srq(dev->mdev, &srq->msrq, in, inlen, is_xrc); - kvfree(in); + in.pd = to_mpd(pd)->pdn; + in.db_record = srq->db.dma; + err = mlx5_core_create_srq(dev->mdev, &srq->msrq, &in); + kvfree(in.pas); if (err) { mlx5_ib_dbg(dev, "create SRQ failed, err %d\n", err); goto err_usr_kern_srq; @@ -401,7 +379,7 @@ int mlx5_ib_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *srq_attr) struct mlx5_ib_dev *dev = to_mdev(ibsrq->device); struct mlx5_ib_srq *srq = to_msrq(ibsrq); int ret; - struct mlx5_query_srq_mbox_out *out; + struct mlx5_srq_attr *out; out = kzalloc(sizeof(*out), GFP_KERNEL); if (!out) @@ -411,7 +389,7 @@ int mlx5_ib_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *srq_attr) if (ret) goto out_box; - srq_attr->srq_limit = be16_to_cpu(out->ctx.lwm); + srq_attr->srq_limit = out->lwm; srq_attr->max_wr = srq->msrq.max - 1; srq_attr->max_sge = srq->msrq.max_gs; @@ -458,6 +436,8 @@ int mlx5_ib_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr, struct mlx5_ib_srq *srq = to_msrq(ibsrq); struct mlx5_wqe_srq_next_seg *next; struct mlx5_wqe_data_seg *scat; + struct mlx5_ib_dev *dev = to_mdev(ibsrq->device); + struct mlx5_core_dev *mdev = dev->mdev; unsigned long flags; int err = 0; int nreq; @@ -465,6 +445,12 @@ int mlx5_ib_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr, spin_lock_irqsave(&srq->lock, flags); + if (mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) { + err = -EIO; + *bad_wr = wr; + goto out; + } + for (nreq = 0; wr; nreq++, wr = wr->next) { if (unlikely(wr->num_sge > srq->msrq.max_gs)) { err = -EINVAL; @@ -507,7 +493,7 @@ int mlx5_ib_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr, *srq->db.db = cpu_to_be32(srq->wqe_ctr); } - +out: spin_unlock_irqrestore(&srq->lock, flags); return err; diff --git a/drivers/infiniband/hw/mlx5/user.h b/drivers/infiniband/hw/mlx5/user.h index 61bc308bb802..188dac4301b5 100644 --- a/drivers/infiniband/hw/mlx5/user.h +++ b/drivers/infiniband/hw/mlx5/user.h @@ -46,6 +46,10 @@ enum { MLX5_SRQ_FLAG_SIGNATURE = 1 << 0, }; +enum { + MLX5_WQ_FLAG_SIGNATURE = 1 << 0, +}; + /* Increment this value if any changes that break userspace ABI * compatibility are made. @@ -79,6 +83,10 @@ enum mlx5_ib_alloc_ucontext_resp_mask { MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_CORE_CLOCK_OFFSET = 1UL << 0, }; +enum mlx5_user_cmds_supp_uhw { + MLX5_USER_CMDS_SUPP_UHW_QUERY_DEVICE = 1 << 0, +}; + struct mlx5_ib_alloc_ucontext_resp { __u32 qp_tab_size; __u32 bf_reg_size; @@ -94,8 +102,8 @@ struct mlx5_ib_alloc_ucontext_resp { __u32 comp_mask; __u32 response_length; __u8 cqe_version; - __u8 reserved2; - __u16 reserved3; + __u8 cmds_supp_uhw; + __u16 reserved2; __u64 hca_core_clock_offset; }; @@ -103,6 +111,22 @@ struct mlx5_ib_alloc_pd_resp { __u32 pdn; }; +struct mlx5_ib_tso_caps { + __u32 max_tso; /* Maximum tso payload size in bytes */ + + /* Corresponding bit will be set if qp type from + * 'enum ib_qp_type' is supported, e.g. + * supported_qpts |= 1 << IB_QPT_UD + */ + __u32 supported_qpts; +}; + +struct mlx5_ib_query_device_resp { + __u32 comp_mask; + __u32 response_length; + struct mlx5_ib_tso_caps tso_caps; +}; + struct mlx5_ib_create_cq { __u64 buf_addr; __u64 db_addr; @@ -148,6 +172,40 @@ struct mlx5_ib_create_qp { __u64 sq_buf_addr; }; +/* RX Hash function flags */ +enum mlx5_rx_hash_function_flags { + MLX5_RX_HASH_FUNC_TOEPLITZ = 1 << 0, +}; + +/* + * RX Hash flags, these flags allows to set which incoming packet's field should + * participates in RX Hash. Each flag represent certain packet's field, + * when the flag is set the field that is represented by the flag will + * participate in RX Hash calculation. + * Note: *IPV4 and *IPV6 flags can't be enabled together on the same QP + * and *TCP and *UDP flags can't be enabled together on the same QP. +*/ +enum mlx5_rx_hash_fields { + MLX5_RX_HASH_SRC_IPV4 = 1 << 0, + MLX5_RX_HASH_DST_IPV4 = 1 << 1, + MLX5_RX_HASH_SRC_IPV6 = 1 << 2, + MLX5_RX_HASH_DST_IPV6 = 1 << 3, + MLX5_RX_HASH_SRC_PORT_TCP = 1 << 4, + MLX5_RX_HASH_DST_PORT_TCP = 1 << 5, + MLX5_RX_HASH_SRC_PORT_UDP = 1 << 6, + MLX5_RX_HASH_DST_PORT_UDP = 1 << 7 +}; + +struct mlx5_ib_create_qp_rss { + __u64 rx_hash_fields_mask; /* enum mlx5_rx_hash_fields */ + __u8 rx_hash_function; /* enum mlx5_rx_hash_function_flags */ + __u8 rx_key_len; /* valid only for Toeplitz */ + __u8 reserved[6]; + __u8 rx_hash_key[128]; /* valid only for Toeplitz */ + __u32 comp_mask; + __u32 reserved1; +}; + struct mlx5_ib_create_qp_resp { __u32 uuar_index; }; @@ -159,6 +217,32 @@ struct mlx5_ib_alloc_mw { __u16 reserved2; }; +struct mlx5_ib_create_wq { + __u64 buf_addr; + __u64 db_addr; + __u32 rq_wqe_count; + __u32 rq_wqe_shift; + __u32 user_index; + __u32 flags; + __u32 comp_mask; + __u32 reserved; +}; + +struct mlx5_ib_create_wq_resp { + __u32 response_length; + __u32 reserved; +}; + +struct mlx5_ib_create_rwq_ind_tbl_resp { + __u32 response_length; + __u32 reserved; +}; + +struct mlx5_ib_modify_wq { + __u32 comp_mask; + __u32 reserved; +}; + static inline int get_qp_user_index(struct mlx5_ib_ucontext *ucontext, struct mlx5_ib_create_qp *ucmd, int inlen, diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c index 9866c35cc977..da2335f7f7c3 100644 --- a/drivers/infiniband/hw/mthca/mthca_provider.c +++ b/drivers/infiniband/hw/mthca/mthca_provider.c @@ -1081,16 +1081,6 @@ static ssize_t show_rev(struct device *device, struct device_attribute *attr, return sprintf(buf, "%x\n", dev->rev_id); } -static ssize_t show_fw_ver(struct device *device, struct device_attribute *attr, - char *buf) -{ - struct mthca_dev *dev = - container_of(device, struct mthca_dev, ib_dev.dev); - return sprintf(buf, "%d.%d.%d\n", (int) (dev->fw_ver >> 32), - (int) (dev->fw_ver >> 16) & 0xffff, - (int) dev->fw_ver & 0xffff); -} - static ssize_t show_hca(struct device *device, struct device_attribute *attr, char *buf) { @@ -1120,13 +1110,11 @@ static ssize_t show_board(struct device *device, struct device_attribute *attr, } static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL); -static DEVICE_ATTR(fw_ver, S_IRUGO, show_fw_ver, NULL); static DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL); static DEVICE_ATTR(board_id, S_IRUGO, show_board, NULL); static struct device_attribute *mthca_dev_attributes[] = { &dev_attr_hw_rev, - &dev_attr_fw_ver, &dev_attr_hca_type, &dev_attr_board_id }; @@ -1187,6 +1175,17 @@ static int mthca_port_immutable(struct ib_device *ibdev, u8 port_num, return 0; } +static void get_dev_fw_str(struct ib_device *device, char *str, + size_t str_len) +{ + struct mthca_dev *dev = + container_of(device, struct mthca_dev, ib_dev); + snprintf(str, str_len, "%d.%d.%d", + (int) (dev->fw_ver >> 32), + (int) (dev->fw_ver >> 16) & 0xffff, + (int) dev->fw_ver & 0xffff); +} + int mthca_register_device(struct mthca_dev *dev) { int ret; @@ -1266,6 +1265,7 @@ int mthca_register_device(struct mthca_dev *dev) dev->ib_dev.reg_user_mr = mthca_reg_user_mr; dev->ib_dev.dereg_mr = mthca_dereg_mr; dev->ib_dev.get_port_immutable = mthca_port_immutable; + dev->ib_dev.get_dev_fw_str = get_dev_fw_str; if (dev->mthca_flags & MTHCA_FLAG_FMR) { dev->ib_dev.alloc_fmr = mthca_alloc_fmr; diff --git a/drivers/infiniband/hw/mthca/mthca_reset.c b/drivers/infiniband/hw/mthca/mthca_reset.c index 74c6a9426047..6727af27c017 100644 --- a/drivers/infiniband/hw/mthca/mthca_reset.c +++ b/drivers/infiniband/hw/mthca/mthca_reset.c @@ -98,7 +98,7 @@ int mthca_reset(struct mthca_dev *mdev) err = -ENOMEM; mthca_err(mdev, "Couldn't allocate memory to save HCA " "PCI header, aborting.\n"); - goto out; + goto put_dev; } for (i = 0; i < 64; ++i) { @@ -108,7 +108,7 @@ int mthca_reset(struct mthca_dev *mdev) err = -ENODEV; mthca_err(mdev, "Couldn't save HCA " "PCI header, aborting.\n"); - goto out; + goto free_hca; } } @@ -121,7 +121,7 @@ int mthca_reset(struct mthca_dev *mdev) err = -ENOMEM; mthca_err(mdev, "Couldn't allocate memory to save HCA " "bridge PCI header, aborting.\n"); - goto out; + goto free_hca; } for (i = 0; i < 64; ++i) { @@ -131,7 +131,7 @@ int mthca_reset(struct mthca_dev *mdev) err = -ENODEV; mthca_err(mdev, "Couldn't save HCA bridge " "PCI header, aborting.\n"); - goto out; + goto free_bh; } } bridge_pcix_cap = pci_find_capability(bridge, PCI_CAP_ID_PCIX); @@ -139,7 +139,7 @@ int mthca_reset(struct mthca_dev *mdev) err = -ENODEV; mthca_err(mdev, "Couldn't locate HCA bridge " "PCI-X capability, aborting.\n"); - goto out; + goto free_bh; } } @@ -152,7 +152,7 @@ int mthca_reset(struct mthca_dev *mdev) err = -ENOMEM; mthca_err(mdev, "Couldn't map HCA reset register, " "aborting.\n"); - goto out; + goto free_bh; } writel(MTHCA_RESET_VALUE, reset); @@ -172,7 +172,7 @@ int mthca_reset(struct mthca_dev *mdev) err = -ENODEV; mthca_err(mdev, "Couldn't access HCA after reset, " "aborting.\n"); - goto out; + goto free_bh; } if (v != 0xffffffff) @@ -184,7 +184,7 @@ int mthca_reset(struct mthca_dev *mdev) err = -ENODEV; mthca_err(mdev, "PCI device did not come back after reset, " "aborting.\n"); - goto out; + goto free_bh; } good: @@ -195,14 +195,14 @@ good: err = -ENODEV; mthca_err(mdev, "Couldn't restore HCA bridge Upstream " "split transaction control, aborting.\n"); - goto out; + goto free_bh; } if (pci_write_config_dword(bridge, bridge_pcix_cap + 0xc, bridge_header[(bridge_pcix_cap + 0xc) / 4])) { err = -ENODEV; mthca_err(mdev, "Couldn't restore HCA bridge Downstream " "split transaction control, aborting.\n"); - goto out; + goto free_bh; } /* * Bridge control register is at 0x3e, so we'll @@ -216,7 +216,7 @@ good: err = -ENODEV; mthca_err(mdev, "Couldn't restore HCA bridge reg %x, " "aborting.\n", i); - goto out; + goto free_bh; } } @@ -225,7 +225,7 @@ good: err = -ENODEV; mthca_err(mdev, "Couldn't restore HCA bridge COMMAND, " "aborting.\n"); - goto out; + goto free_bh; } } @@ -235,7 +235,7 @@ good: err = -ENODEV; mthca_err(mdev, "Couldn't restore HCA PCI-X " "command register, aborting.\n"); - goto out; + goto free_bh; } } @@ -246,7 +246,7 @@ good: err = -ENODEV; mthca_err(mdev, "Couldn't restore HCA PCI Express " "Device Control register, aborting.\n"); - goto out; + goto free_bh; } linkctl = hca_header[(hca_pcie_cap + PCI_EXP_LNKCTL) / 4]; if (pcie_capability_write_word(mdev->pdev, PCI_EXP_LNKCTL, @@ -254,7 +254,7 @@ good: err = -ENODEV; mthca_err(mdev, "Couldn't restore HCA PCI Express " "Link control register, aborting.\n"); - goto out; + goto free_bh; } } @@ -266,7 +266,7 @@ good: err = -ENODEV; mthca_err(mdev, "Couldn't restore HCA reg %x, " "aborting.\n", i); - goto out; + goto free_bh; } } @@ -275,14 +275,12 @@ good: err = -ENODEV; mthca_err(mdev, "Couldn't restore HCA COMMAND, " "aborting.\n"); - goto out; } - -out: - if (bridge) - pci_dev_put(bridge); +free_bh: kfree(bridge_header); +free_hca: kfree(hca_header); - +put_dev: + pci_dev_put(bridge); return err; } diff --git a/drivers/infiniband/hw/nes/nes_verbs.c b/drivers/infiniband/hw/nes/nes_verbs.c index 464d6da5fe91..bd69125731c1 100644 --- a/drivers/infiniband/hw/nes/nes_verbs.c +++ b/drivers/infiniband/hw/nes/nes_verbs.c @@ -2606,23 +2606,6 @@ static ssize_t show_rev(struct device *dev, struct device_attribute *attr, /** - * show_fw_ver - */ -static ssize_t show_fw_ver(struct device *dev, struct device_attribute *attr, - char *buf) -{ - struct nes_ib_device *nesibdev = - container_of(dev, struct nes_ib_device, ibdev.dev); - struct nes_vnic *nesvnic = nesibdev->nesvnic; - - nes_debug(NES_DBG_INIT, "\n"); - return sprintf(buf, "%u.%u\n", - (nesvnic->nesdev->nesadapter->firmware_version >> 16), - (nesvnic->nesdev->nesadapter->firmware_version & 0x000000ff)); -} - - -/** * show_hca */ static ssize_t show_hca(struct device *dev, struct device_attribute *attr, @@ -2645,13 +2628,11 @@ static ssize_t show_board(struct device *dev, struct device_attribute *attr, static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL); -static DEVICE_ATTR(fw_ver, S_IRUGO, show_fw_ver, NULL); static DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL); static DEVICE_ATTR(board_id, S_IRUGO, show_board, NULL); static struct device_attribute *nes_dev_attributes[] = { &dev_attr_hw_rev, - &dev_attr_fw_ver, &dev_attr_hca_type, &dev_attr_board_id }; @@ -3703,6 +3684,19 @@ static int nes_port_immutable(struct ib_device *ibdev, u8 port_num, return 0; } +static void get_dev_fw_str(struct ib_device *dev, char *str, + size_t str_len) +{ + struct nes_ib_device *nesibdev = + container_of(dev, struct nes_ib_device, ibdev); + struct nes_vnic *nesvnic = nesibdev->nesvnic; + + nes_debug(NES_DBG_INIT, "\n"); + snprintf(str, str_len, "%u.%u", + (nesvnic->nesdev->nesadapter->firmware_version >> 16), + (nesvnic->nesdev->nesadapter->firmware_version & 0x000000ff)); +} + /** * nes_init_ofa_device */ @@ -3802,6 +3796,7 @@ struct nes_ib_device *nes_init_ofa_device(struct net_device *netdev) nesibdev->ibdev.iwcm->create_listen = nes_create_listen; nesibdev->ibdev.iwcm->destroy_listen = nes_destroy_listen; nesibdev->ibdev.get_port_immutable = nes_port_immutable; + nesibdev->ibdev.get_dev_fw_str = get_dev_fw_str; memcpy(nesibdev->ibdev.iwcm->ifname, netdev->name, sizeof(nesibdev->ibdev.iwcm->ifname)); diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_main.c b/drivers/infiniband/hw/ocrdma/ocrdma_main.c index 3d75f65ce87e..07d0c6c5b046 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_main.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_main.c @@ -107,6 +107,14 @@ static int ocrdma_port_immutable(struct ib_device *ibdev, u8 port_num, return 0; } +static void get_dev_fw_str(struct ib_device *device, char *str, + size_t str_len) +{ + struct ocrdma_dev *dev = get_ocrdma_dev(device); + + snprintf(str, str_len, "%s", &dev->attr.fw_ver[0]); +} + static int ocrdma_register_device(struct ocrdma_dev *dev) { strlcpy(dev->ibdev.name, "ocrdma%d", IB_DEVICE_NAME_MAX); @@ -193,6 +201,7 @@ static int ocrdma_register_device(struct ocrdma_dev *dev) dev->ibdev.process_mad = ocrdma_process_mad; dev->ibdev.get_port_immutable = ocrdma_port_immutable; + dev->ibdev.get_dev_fw_str = get_dev_fw_str; if (ocrdma_get_asic_type(dev) == OCRDMA_ASIC_GEN_SKH_R) { dev->ibdev.uverbs_cmd_mask |= @@ -262,14 +271,6 @@ static ssize_t show_rev(struct device *device, struct device_attribute *attr, return scnprintf(buf, PAGE_SIZE, "0x%x\n", dev->nic_info.pdev->vendor); } -static ssize_t show_fw_ver(struct device *device, struct device_attribute *attr, - char *buf) -{ - struct ocrdma_dev *dev = dev_get_drvdata(device); - - return scnprintf(buf, PAGE_SIZE, "%s\n", &dev->attr.fw_ver[0]); -} - static ssize_t show_hca_type(struct device *device, struct device_attribute *attr, char *buf) { @@ -279,12 +280,10 @@ static ssize_t show_hca_type(struct device *device, } static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL); -static DEVICE_ATTR(fw_ver, S_IRUGO, show_fw_ver, NULL); static DEVICE_ATTR(hca_type, S_IRUGO, show_hca_type, NULL); static struct device_attribute *ocrdma_attributes[] = { &dev_attr_hw_rev, - &dev_attr_fw_ver, &dev_attr_hca_type }; diff --git a/drivers/infiniband/hw/qib/qib_qp.c b/drivers/infiniband/hw/qib/qib_qp.c index 575b737d9ef3..9cc0aae1d781 100644 --- a/drivers/infiniband/hw/qib/qib_qp.c +++ b/drivers/infiniband/hw/qib/qib_qp.c @@ -106,6 +106,49 @@ static u32 credit_table[31] = { 32768 /* 1E */ }; +const struct rvt_operation_params qib_post_parms[RVT_OPERATION_MAX] = { +[IB_WR_RDMA_WRITE] = { + .length = sizeof(struct ib_rdma_wr), + .qpt_support = BIT(IB_QPT_UC) | BIT(IB_QPT_RC), +}, + +[IB_WR_RDMA_READ] = { + .length = sizeof(struct ib_rdma_wr), + .qpt_support = BIT(IB_QPT_RC), + .flags = RVT_OPERATION_ATOMIC, +}, + +[IB_WR_ATOMIC_CMP_AND_SWP] = { + .length = sizeof(struct ib_atomic_wr), + .qpt_support = BIT(IB_QPT_RC), + .flags = RVT_OPERATION_ATOMIC | RVT_OPERATION_ATOMIC_SGE, +}, + +[IB_WR_ATOMIC_FETCH_AND_ADD] = { + .length = sizeof(struct ib_atomic_wr), + .qpt_support = BIT(IB_QPT_RC), + .flags = RVT_OPERATION_ATOMIC | RVT_OPERATION_ATOMIC_SGE, +}, + +[IB_WR_RDMA_WRITE_WITH_IMM] = { + .length = sizeof(struct ib_rdma_wr), + .qpt_support = BIT(IB_QPT_UC) | BIT(IB_QPT_RC), +}, + +[IB_WR_SEND] = { + .length = sizeof(struct ib_send_wr), + .qpt_support = BIT(IB_QPT_UD) | BIT(IB_QPT_SMI) | BIT(IB_QPT_GSI) | + BIT(IB_QPT_UC) | BIT(IB_QPT_RC), +}, + +[IB_WR_SEND_WITH_IMM] = { + .length = sizeof(struct ib_send_wr), + .qpt_support = BIT(IB_QPT_UD) | BIT(IB_QPT_SMI) | BIT(IB_QPT_GSI) | + BIT(IB_QPT_UC) | BIT(IB_QPT_RC), +}, + +}; + static void get_map_page(struct rvt_qpn_table *qpt, struct rvt_qpn_map *map, gfp_t gfp) { diff --git a/drivers/infiniband/hw/qib/qib_ud.c b/drivers/infiniband/hw/qib/qib_ud.c index 846e6c726df7..10d062561bd9 100644 --- a/drivers/infiniband/hw/qib/qib_ud.c +++ b/drivers/infiniband/hw/qib/qib_ud.c @@ -169,8 +169,12 @@ static void qib_ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe) } if (ah_attr->ah_flags & IB_AH_GRH) { - qib_copy_sge(&qp->r_sge, &ah_attr->grh, - sizeof(struct ib_grh), 1); + struct ib_grh grh; + struct ib_global_route grd = ah_attr->grh; + + qib_make_grh(ibp, &grh, &grd, 0, 0); + qib_copy_sge(&qp->r_sge, &grh, + sizeof(grh), 1); wc.wc_flags |= IB_WC_GRH; } else qib_skip_sge(&qp->r_sge, sizeof(struct ib_grh), 1); diff --git a/drivers/infiniband/hw/qib/qib_verbs.c b/drivers/infiniband/hw/qib/qib_verbs.c index cbf6200e6afc..fd1dfbce5539 100644 --- a/drivers/infiniband/hw/qib/qib_verbs.c +++ b/drivers/infiniband/hw/qib/qib_verbs.c @@ -1582,6 +1582,8 @@ static void qib_fill_device_attr(struct qib_devdata *dd) rdi->dparms.props.max_total_mcast_qp_attach = rdi->dparms.props.max_mcast_qp_attach * rdi->dparms.props.max_mcast_grp; + /* post send table */ + dd->verbs_dev.rdi.post_parms = qib_post_parms; } /** diff --git a/drivers/infiniband/hw/qib/qib_verbs.h b/drivers/infiniband/hw/qib/qib_verbs.h index 4f878151f81f..736ced684842 100644 --- a/drivers/infiniband/hw/qib/qib_verbs.h +++ b/drivers/infiniband/hw/qib/qib_verbs.h @@ -497,4 +497,6 @@ extern unsigned int ib_qib_max_srq_wrs; extern const u32 ib_qib_rnr_table[]; +extern const struct rvt_operation_params qib_post_parms[]; + #endif /* QIB_VERBS_H */ diff --git a/drivers/infiniband/hw/usnic/usnic_ib_main.c b/drivers/infiniband/hw/usnic/usnic_ib_main.c index 565c881a44ba..c229b9f4a52d 100644 --- a/drivers/infiniband/hw/usnic/usnic_ib_main.c +++ b/drivers/infiniband/hw/usnic/usnic_ib_main.c @@ -331,6 +331,21 @@ static int usnic_port_immutable(struct ib_device *ibdev, u8 port_num, return 0; } +static void usnic_get_dev_fw_str(struct ib_device *device, + char *str, + size_t str_len) +{ + struct usnic_ib_dev *us_ibdev = + container_of(device, struct usnic_ib_dev, ib_dev); + struct ethtool_drvinfo info; + + mutex_lock(&us_ibdev->usdev_lock); + us_ibdev->netdev->ethtool_ops->get_drvinfo(us_ibdev->netdev, &info); + mutex_unlock(&us_ibdev->usdev_lock); + + snprintf(str, str_len, "%s", info.fw_version); +} + /* Start of PF discovery section */ static void *usnic_ib_device_add(struct pci_dev *dev) { @@ -414,6 +429,7 @@ static void *usnic_ib_device_add(struct pci_dev *dev) us_ibdev->ib_dev.req_notify_cq = usnic_ib_req_notify_cq; us_ibdev->ib_dev.get_dma_mr = usnic_ib_get_dma_mr; us_ibdev->ib_dev.get_port_immutable = usnic_port_immutable; + us_ibdev->ib_dev.get_dev_fw_str = usnic_get_dev_fw_str; if (ib_register_device(&us_ibdev->ib_dev, NULL)) diff --git a/drivers/infiniband/hw/usnic/usnic_ib_sysfs.c b/drivers/infiniband/hw/usnic/usnic_ib_sysfs.c index 3412ea06116e..80ef3f8998c8 100644 --- a/drivers/infiniband/hw/usnic/usnic_ib_sysfs.c +++ b/drivers/infiniband/hw/usnic/usnic_ib_sysfs.c @@ -45,21 +45,6 @@ #include "usnic_ib_verbs.h" #include "usnic_log.h" -static ssize_t usnic_ib_show_fw_ver(struct device *device, - struct device_attribute *attr, - char *buf) -{ - struct usnic_ib_dev *us_ibdev = - container_of(device, struct usnic_ib_dev, ib_dev.dev); - struct ethtool_drvinfo info; - - mutex_lock(&us_ibdev->usdev_lock); - us_ibdev->netdev->ethtool_ops->get_drvinfo(us_ibdev->netdev, &info); - mutex_unlock(&us_ibdev->usdev_lock); - - return scnprintf(buf, PAGE_SIZE, "%s\n", info.fw_version); -} - static ssize_t usnic_ib_show_board(struct device *device, struct device_attribute *attr, char *buf) @@ -192,7 +177,6 @@ usnic_ib_show_cq_per_vf(struct device *device, struct device_attribute *attr, us_ibdev->vf_res_cnt[USNIC_VNIC_RES_TYPE_CQ]); } -static DEVICE_ATTR(fw_ver, S_IRUGO, usnic_ib_show_fw_ver, NULL); static DEVICE_ATTR(board_id, S_IRUGO, usnic_ib_show_board, NULL); static DEVICE_ATTR(config, S_IRUGO, usnic_ib_show_config, NULL); static DEVICE_ATTR(iface, S_IRUGO, usnic_ib_show_iface, NULL); @@ -201,7 +185,6 @@ static DEVICE_ATTR(qp_per_vf, S_IRUGO, usnic_ib_show_qp_per_vf, NULL); static DEVICE_ATTR(cq_per_vf, S_IRUGO, usnic_ib_show_cq_per_vf, NULL); static struct device_attribute *usnic_class_attributes[] = { - &dev_attr_fw_ver, &dev_attr_board_id, &dev_attr_config, &dev_attr_iface, diff --git a/drivers/infiniband/sw/Makefile b/drivers/infiniband/sw/Makefile index 988b6a0101a4..8b095b27db87 100644 --- a/drivers/infiniband/sw/Makefile +++ b/drivers/infiniband/sw/Makefile @@ -1 +1,2 @@ obj-$(CONFIG_INFINIBAND_RDMAVT) += rdmavt/ +obj-$(CONFIG_RDMA_RXE) += rxe/ diff --git a/drivers/infiniband/sw/rdmavt/Kconfig b/drivers/infiniband/sw/rdmavt/Kconfig index 11aa6a34bd71..1da8d01a6855 100644 --- a/drivers/infiniband/sw/rdmavt/Kconfig +++ b/drivers/infiniband/sw/rdmavt/Kconfig @@ -1,6 +1,5 @@ config INFINIBAND_RDMAVT tristate "RDMA verbs transport library" depends on 64BIT - default m ---help--- This is a common software verbs provider for RDMA networks. diff --git a/drivers/infiniband/sw/rdmavt/cq.c b/drivers/infiniband/sw/rdmavt/cq.c index 6ca6fa80dd6e..f2f229efbe64 100644 --- a/drivers/infiniband/sw/rdmavt/cq.c +++ b/drivers/infiniband/sw/rdmavt/cq.c @@ -510,6 +510,7 @@ int rvt_driver_cq_init(struct rvt_dev_info *rdi) if (rdi->worker) return 0; + spin_lock_init(&rdi->n_cqs_lock); rdi->worker = kzalloc(sizeof(*rdi->worker), GFP_KERNEL); if (!rdi->worker) return -ENOMEM; diff --git a/drivers/infiniband/sw/rdmavt/mr.c b/drivers/infiniband/sw/rdmavt/mr.c index 0f4d4500f45e..80c4b6b401b8 100644 --- a/drivers/infiniband/sw/rdmavt/mr.c +++ b/drivers/infiniband/sw/rdmavt/mr.c @@ -140,6 +140,7 @@ static int rvt_init_mregion(struct rvt_mregion *mr, struct ib_pd *pd, init_completion(&mr->comp); /* count returning the ptr to user */ atomic_set(&mr->refcount, 1); + atomic_set(&mr->lkey_invalid, 0); mr->pd = pd; mr->max_segs = count; return 0; @@ -480,6 +481,123 @@ struct ib_mr *rvt_alloc_mr(struct ib_pd *pd, } /** + * rvt_set_page - page assignment function called by ib_sg_to_pages + * @ibmr: memory region + * @addr: dma address of mapped page + * + * Return: 0 on success + */ +static int rvt_set_page(struct ib_mr *ibmr, u64 addr) +{ + struct rvt_mr *mr = to_imr(ibmr); + u32 ps = 1 << mr->mr.page_shift; + u32 mapped_segs = mr->mr.length >> mr->mr.page_shift; + int m, n; + + if (unlikely(mapped_segs == mr->mr.max_segs)) + return -ENOMEM; + + if (mr->mr.length == 0) { + mr->mr.user_base = addr; + mr->mr.iova = addr; + } + + m = mapped_segs / RVT_SEGSZ; + n = mapped_segs % RVT_SEGSZ; + mr->mr.map[m]->segs[n].vaddr = (void *)addr; + mr->mr.map[m]->segs[n].length = ps; + mr->mr.length += ps; + + return 0; +} + +/** + * rvt_map_mr_sg - map sg list and set it the memory region + * @ibmr: memory region + * @sg: dma mapped scatterlist + * @sg_nents: number of entries in sg + * @sg_offset: offset in bytes into sg + * + * Return: number of sg elements mapped to the memory region + */ +int rvt_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, + int sg_nents, unsigned int *sg_offset) +{ + struct rvt_mr *mr = to_imr(ibmr); + + mr->mr.length = 0; + mr->mr.page_shift = PAGE_SHIFT; + return ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, + rvt_set_page); +} + +/** + * rvt_fast_reg_mr - fast register physical MR + * @qp: the queue pair where the work request comes from + * @ibmr: the memory region to be registered + * @key: updated key for this memory region + * @access: access flags for this memory region + * + * Returns 0 on success. + */ +int rvt_fast_reg_mr(struct rvt_qp *qp, struct ib_mr *ibmr, u32 key, + int access) +{ + struct rvt_mr *mr = to_imr(ibmr); + + if (qp->ibqp.pd != mr->mr.pd) + return -EACCES; + + /* not applicable to dma MR or user MR */ + if (!mr->mr.lkey || mr->umem) + return -EINVAL; + + if ((key & 0xFFFFFF00) != (mr->mr.lkey & 0xFFFFFF00)) + return -EINVAL; + + ibmr->lkey = key; + ibmr->rkey = key; + mr->mr.lkey = key; + mr->mr.access_flags = access; + atomic_set(&mr->mr.lkey_invalid, 0); + + return 0; +} +EXPORT_SYMBOL(rvt_fast_reg_mr); + +/** + * rvt_invalidate_rkey - invalidate an MR rkey + * @qp: queue pair associated with the invalidate op + * @rkey: rkey to invalidate + * + * Returns 0 on success. + */ +int rvt_invalidate_rkey(struct rvt_qp *qp, u32 rkey) +{ + struct rvt_dev_info *dev = ib_to_rvt(qp->ibqp.device); + struct rvt_lkey_table *rkt = &dev->lkey_table; + struct rvt_mregion *mr; + + if (rkey == 0) + return -EINVAL; + + rcu_read_lock(); + mr = rcu_dereference( + rkt->table[(rkey >> (32 - dev->dparms.lkey_table_size))]); + if (unlikely(!mr || mr->lkey != rkey || qp->ibqp.pd != mr->pd)) + goto bail; + + atomic_set(&mr->lkey_invalid, 1); + rcu_read_unlock(); + return 0; + +bail: + rcu_read_unlock(); + return -EINVAL; +} +EXPORT_SYMBOL(rvt_invalidate_rkey); + +/** * rvt_alloc_fmr - allocate a fast memory region * @pd: the protection domain for this memory region * @mr_access_flags: access flags for this memory region @@ -682,7 +800,8 @@ int rvt_lkey_ok(struct rvt_lkey_table *rkt, struct rvt_pd *pd, } mr = rcu_dereference( rkt->table[(sge->lkey >> (32 - dev->dparms.lkey_table_size))]); - if (unlikely(!mr || mr->lkey != sge->lkey || mr->pd != &pd->ibpd)) + if (unlikely(!mr || atomic_read(&mr->lkey_invalid) || + mr->lkey != sge->lkey || mr->pd != &pd->ibpd)) goto bail; off = sge->addr - mr->user_base; @@ -782,7 +901,8 @@ int rvt_rkey_ok(struct rvt_qp *qp, struct rvt_sge *sge, mr = rcu_dereference( rkt->table[(rkey >> (32 - dev->dparms.lkey_table_size))]); - if (unlikely(!mr || mr->lkey != rkey || qp->ibqp.pd != mr->pd)) + if (unlikely(!mr || atomic_read(&mr->lkey_invalid) || + mr->lkey != rkey || qp->ibqp.pd != mr->pd)) goto bail; off = vaddr - mr->iova; diff --git a/drivers/infiniband/sw/rdmavt/mr.h b/drivers/infiniband/sw/rdmavt/mr.h index 69380512c6d1..132800ee0205 100644 --- a/drivers/infiniband/sw/rdmavt/mr.h +++ b/drivers/infiniband/sw/rdmavt/mr.h @@ -82,6 +82,8 @@ int rvt_dereg_mr(struct ib_mr *ibmr); struct ib_mr *rvt_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, u32 max_num_sg); +int rvt_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, + int sg_nents, unsigned int *sg_offset); struct ib_fmr *rvt_alloc_fmr(struct ib_pd *pd, int mr_access_flags, struct ib_fmr_attr *fmr_attr); int rvt_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list, diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c index 41ba7e9cadaa..bdb540f25a88 100644 --- a/drivers/infiniband/sw/rdmavt/qp.c +++ b/drivers/infiniband/sw/rdmavt/qp.c @@ -435,8 +435,7 @@ static void rvt_clear_mr_refs(struct rvt_qp *qp, int clr_sends) for (n = 0; n < rvt_max_atomic(rdi); n++) { struct rvt_ack_entry *e = &qp->s_ack_queue[n]; - if (e->opcode == IB_OPCODE_RC_RDMA_READ_REQUEST && - e->rdma_sge.mr) { + if (e->rdma_sge.mr) { rvt_put_mr(e->rdma_sge.mr); e->rdma_sge.mr = NULL; } @@ -584,6 +583,7 @@ static void rvt_reset_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp, qp->r_rq.wq->tail = 0; } qp->r_sge.num_sge = 0; + atomic_set(&qp->s_reserved_used, 0); } /** @@ -613,6 +613,7 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd, struct rvt_dev_info *rdi = ib_to_rvt(ibpd->device); void *priv = NULL; gfp_t gfp; + size_t sqsize; if (!rdi) return ERR_PTR(-EINVAL); @@ -643,7 +644,9 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd, init_attr->cap.max_recv_wr == 0) return ERR_PTR(-EINVAL); } - + sqsize = + init_attr->cap.max_send_wr + 1 + + rdi->dparms.reserved_operations; switch (init_attr->qp_type) { case IB_QPT_SMI: case IB_QPT_GSI: @@ -658,11 +661,11 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd, sizeof(struct rvt_swqe); if (gfp == GFP_NOIO) swq = __vmalloc( - (init_attr->cap.max_send_wr + 1) * sz, + sqsize * sz, gfp | __GFP_ZERO, PAGE_KERNEL); else swq = vzalloc_node( - (init_attr->cap.max_send_wr + 1) * sz, + sqsize * sz, rdi->dparms.node); if (!swq) return ERR_PTR(-ENOMEM); @@ -741,13 +744,14 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd, spin_lock_init(&qp->s_lock); spin_lock_init(&qp->r_rq.lock); atomic_set(&qp->refcount, 0); + atomic_set(&qp->local_ops_pending, 0); init_waitqueue_head(&qp->wait); init_timer(&qp->s_timer); qp->s_timer.data = (unsigned long)qp; INIT_LIST_HEAD(&qp->rspwait); qp->state = IB_QPS_RESET; qp->s_wq = swq; - qp->s_size = init_attr->cap.max_send_wr + 1; + qp->s_size = sqsize; qp->s_avail = init_attr->cap.max_send_wr; qp->s_max_sge = init_attr->cap.max_send_sge; if (init_attr->sq_sig_type == IB_SIGNAL_REQ_WR) @@ -1332,7 +1336,8 @@ int rvt_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, attr->sq_psn = qp->s_next_psn & rdi->dparms.psn_mask; attr->dest_qp_num = qp->remote_qpn; attr->qp_access_flags = qp->qp_access_flags; - attr->cap.max_send_wr = qp->s_size - 1; + attr->cap.max_send_wr = qp->s_size - 1 - + rdi->dparms.reserved_operations; attr->cap.max_recv_wr = qp->ibqp.srq ? 0 : qp->r_rq.size - 1; attr->cap.max_send_sge = qp->s_max_sge; attr->cap.max_recv_sge = qp->r_rq.max_sge; @@ -1440,25 +1445,116 @@ int rvt_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, } /** - * qp_get_savail - return number of avail send entries + * rvt_qp_valid_operation - validate post send wr request + * @qp - the qp + * @post-parms - the post send table for the driver + * @wr - the work request + * + * The routine validates the operation based on the + * validation table an returns the length of the operation + * which can extend beyond the ib_send_bw. Operation + * dependent flags key atomic operation validation. * + * There is an exception for UD qps that validates the pd and + * overrides the length to include the additional UD specific + * length. + * + * Returns a negative error or the length of the work request + * for building the swqe. + */ +static inline int rvt_qp_valid_operation( + struct rvt_qp *qp, + const struct rvt_operation_params *post_parms, + struct ib_send_wr *wr) +{ + int len; + + if (wr->opcode >= RVT_OPERATION_MAX || !post_parms[wr->opcode].length) + return -EINVAL; + if (!(post_parms[wr->opcode].qpt_support & BIT(qp->ibqp.qp_type))) + return -EINVAL; + if ((post_parms[wr->opcode].flags & RVT_OPERATION_PRIV) && + ibpd_to_rvtpd(qp->ibqp.pd)->user) + return -EINVAL; + if (post_parms[wr->opcode].flags & RVT_OPERATION_ATOMIC_SGE && + (wr->num_sge == 0 || + wr->sg_list[0].length < sizeof(u64) || + wr->sg_list[0].addr & (sizeof(u64) - 1))) + return -EINVAL; + if (post_parms[wr->opcode].flags & RVT_OPERATION_ATOMIC && + !qp->s_max_rd_atomic) + return -EINVAL; + len = post_parms[wr->opcode].length; + /* UD specific */ + if (qp->ibqp.qp_type != IB_QPT_UC && + qp->ibqp.qp_type != IB_QPT_RC) { + if (qp->ibqp.pd != ud_wr(wr)->ah->pd) + return -EINVAL; + len = sizeof(struct ib_ud_wr); + } + return len; +} + +/** + * rvt_qp_is_avail - determine queue capacity * @qp - the qp + * @rdi - the rdmavt device + * @reserved_op - is reserved operation * * This assumes the s_hlock is held but the s_last * qp variable is uncontrolled. + * + * For non reserved operations, the qp->s_avail + * may be changed. + * + * The return value is zero or a -ENOMEM. */ -static inline u32 qp_get_savail(struct rvt_qp *qp) +static inline int rvt_qp_is_avail( + struct rvt_qp *qp, + struct rvt_dev_info *rdi, + bool reserved_op) { u32 slast; - u32 ret; - + u32 avail; + u32 reserved_used; + + /* see rvt_qp_wqe_unreserve() */ + smp_mb__before_atomic(); + reserved_used = atomic_read(&qp->s_reserved_used); + if (unlikely(reserved_op)) { + /* see rvt_qp_wqe_unreserve() */ + smp_mb__before_atomic(); + if (reserved_used >= rdi->dparms.reserved_operations) + return -ENOMEM; + return 0; + } + /* non-reserved operations */ + if (likely(qp->s_avail)) + return 0; smp_read_barrier_depends(); /* see rc.c */ slast = ACCESS_ONCE(qp->s_last); if (qp->s_head >= slast) - ret = qp->s_size - (qp->s_head - slast); + avail = qp->s_size - (qp->s_head - slast); else - ret = slast - qp->s_head; - return ret - 1; + avail = slast - qp->s_head; + + /* see rvt_qp_wqe_unreserve() */ + smp_mb__before_atomic(); + reserved_used = atomic_read(&qp->s_reserved_used); + avail = avail - 1 - + (rdi->dparms.reserved_operations - reserved_used); + /* insure we don't assign a negative s_avail */ + if ((s32)avail <= 0) + return -ENOMEM; + qp->s_avail = avail; + if (WARN_ON(qp->s_avail > + (qp->s_size - 1 - rdi->dparms.reserved_operations))) + rvt_pr_err(rdi, + "More avail entries than QP RB size.\nQP: %u, size: %u, avail: %u\nhead: %u, tail: %u, cur: %u, acked: %u, last: %u", + qp->ibqp.qp_num, qp->s_size, qp->s_avail, + qp->s_head, qp->s_tail, qp->s_cur, + qp->s_acked, qp->s_last); + return 0; } /** @@ -1480,49 +1576,64 @@ static int rvt_post_one_wr(struct rvt_qp *qp, struct rvt_dev_info *rdi = ib_to_rvt(qp->ibqp.device); u8 log_pmtu; int ret; + size_t cplen; + bool reserved_op; + int local_ops_delayed = 0; + + BUILD_BUG_ON(IB_QPT_MAX >= (sizeof(u32) * BITS_PER_BYTE)); /* IB spec says that num_sge == 0 is OK. */ if (unlikely(wr->num_sge > qp->s_max_sge)) return -EINVAL; + ret = rvt_qp_valid_operation(qp, rdi->post_parms, wr); + if (ret < 0) + return ret; + cplen = ret; + /* - * Don't allow RDMA reads or atomic operations on UC or - * undefined operations. - * Make sure buffer is large enough to hold the result for atomics. + * Local operations include fast register and local invalidate. + * Fast register needs to be processed immediately because the + * registered lkey may be used by following work requests and the + * lkey needs to be valid at the time those requests are posted. + * Local invalidate can be processed immediately if fencing is + * not required and no previous local invalidate ops are pending. + * Signaled local operations that have been processed immediately + * need to have requests with "completion only" flags set posted + * to the send queue in order to generate completions. */ - if (qp->ibqp.qp_type == IB_QPT_UC) { - if ((unsigned)wr->opcode >= IB_WR_RDMA_READ) - return -EINVAL; - } else if (qp->ibqp.qp_type != IB_QPT_RC) { - /* Check IB_QPT_SMI, IB_QPT_GSI, IB_QPT_UD opcode */ - if (wr->opcode != IB_WR_SEND && - wr->opcode != IB_WR_SEND_WITH_IMM) - return -EINVAL; - /* Check UD destination address PD */ - if (qp->ibqp.pd != ud_wr(wr)->ah->pd) + if ((rdi->post_parms[wr->opcode].flags & RVT_OPERATION_LOCAL)) { + switch (wr->opcode) { + case IB_WR_REG_MR: + ret = rvt_fast_reg_mr(qp, + reg_wr(wr)->mr, + reg_wr(wr)->key, + reg_wr(wr)->access); + if (ret || !(wr->send_flags & IB_SEND_SIGNALED)) + return ret; + break; + case IB_WR_LOCAL_INV: + if ((wr->send_flags & IB_SEND_FENCE) || + atomic_read(&qp->local_ops_pending)) { + local_ops_delayed = 1; + } else { + ret = rvt_invalidate_rkey( + qp, wr->ex.invalidate_rkey); + if (ret || !(wr->send_flags & IB_SEND_SIGNALED)) + return ret; + } + break; + default: return -EINVAL; - } else if ((unsigned)wr->opcode > IB_WR_ATOMIC_FETCH_AND_ADD) { - return -EINVAL; - } else if (wr->opcode >= IB_WR_ATOMIC_CMP_AND_SWP && - (wr->num_sge == 0 || - wr->sg_list[0].length < sizeof(u64) || - wr->sg_list[0].addr & (sizeof(u64) - 1))) { - return -EINVAL; - } else if (wr->opcode >= IB_WR_RDMA_READ && !qp->s_max_rd_atomic) { - return -EINVAL; + } } + + reserved_op = rdi->post_parms[wr->opcode].flags & + RVT_OPERATION_USE_RESERVE; /* check for avail */ - if (unlikely(!qp->s_avail)) { - qp->s_avail = qp_get_savail(qp); - if (WARN_ON(qp->s_avail > (qp->s_size - 1))) - rvt_pr_err(rdi, - "More avail entries than QP RB size.\nQP: %u, size: %u, avail: %u\nhead: %u, tail: %u, cur: %u, acked: %u, last: %u", - qp->ibqp.qp_num, qp->s_size, qp->s_avail, - qp->s_head, qp->s_tail, qp->s_cur, - qp->s_acked, qp->s_last); - if (!qp->s_avail) - return -ENOMEM; - } + ret = rvt_qp_is_avail(qp, rdi, reserved_op); + if (ret) + return ret; next = qp->s_head + 1; if (next >= qp->s_size) next = 0; @@ -1531,18 +1642,8 @@ static int rvt_post_one_wr(struct rvt_qp *qp, pd = ibpd_to_rvtpd(qp->ibqp.pd); wqe = rvt_get_swqe_ptr(qp, qp->s_head); - if (qp->ibqp.qp_type != IB_QPT_UC && - qp->ibqp.qp_type != IB_QPT_RC) - memcpy(&wqe->ud_wr, ud_wr(wr), sizeof(wqe->ud_wr)); - else if (wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM || - wr->opcode == IB_WR_RDMA_WRITE || - wr->opcode == IB_WR_RDMA_READ) - memcpy(&wqe->rdma_wr, rdma_wr(wr), sizeof(wqe->rdma_wr)); - else if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP || - wr->opcode == IB_WR_ATOMIC_FETCH_AND_ADD) - memcpy(&wqe->atomic_wr, atomic_wr(wr), sizeof(wqe->atomic_wr)); - else - memcpy(&wqe->wr, wr, sizeof(wqe->wr)); + /* cplen has length from above */ + memcpy(&wqe->wr, wr, cplen); wqe->length = 0; j = 0; @@ -1585,14 +1686,29 @@ static int rvt_post_one_wr(struct rvt_qp *qp, atomic_inc(&ibah_to_rvtah(ud_wr(wr)->ah)->refcount); } - wqe->ssn = qp->s_ssn++; - wqe->psn = qp->s_next_psn; - wqe->lpsn = wqe->psn + - (wqe->length ? ((wqe->length - 1) >> log_pmtu) : 0); - qp->s_next_psn = wqe->lpsn + 1; + if (rdi->post_parms[wr->opcode].flags & RVT_OPERATION_LOCAL) { + if (local_ops_delayed) + atomic_inc(&qp->local_ops_pending); + else + wqe->wr.send_flags |= RVT_SEND_COMPLETION_ONLY; + wqe->ssn = 0; + wqe->psn = 0; + wqe->lpsn = 0; + } else { + wqe->ssn = qp->s_ssn++; + wqe->psn = qp->s_next_psn; + wqe->lpsn = wqe->psn + + (wqe->length ? + ((wqe->length - 1) >> log_pmtu) : + 0); + qp->s_next_psn = wqe->lpsn + 1; + } trace_rvt_post_one_wr(qp, wqe); + if (unlikely(reserved_op)) + rvt_qp_wqe_reserve(qp, wqe); + else + qp->s_avail--; smp_wmb(); /* see request builders */ - qp->s_avail--; qp->s_head = next; return 0; diff --git a/drivers/infiniband/sw/rdmavt/vt.c b/drivers/infiniband/sw/rdmavt/vt.c index 30c4fda7a05a..d430c2f7cec4 100644 --- a/drivers/infiniband/sw/rdmavt/vt.c +++ b/drivers/infiniband/sw/rdmavt/vt.c @@ -370,6 +370,7 @@ enum { REG_USER_MR, DEREG_MR, ALLOC_MR, + MAP_MR_SG, ALLOC_FMR, MAP_PHYS_FMR, UNMAP_FMR, @@ -528,7 +529,8 @@ static noinline int check_support(struct rvt_dev_info *rdi, int verb) post_send), rvt_post_send)) if (!rdi->driver_f.schedule_send || - !rdi->driver_f.do_send) + !rdi->driver_f.do_send || + !rdi->post_parms) return -EINVAL; break; @@ -633,6 +635,12 @@ static noinline int check_support(struct rvt_dev_info *rdi, int verb) rvt_alloc_mr); break; + case MAP_MR_SG: + check_driver_override(rdi, offsetof(struct ib_device, + map_mr_sg), + rvt_map_mr_sg); + break; + case MAP_PHYS_FMR: check_driver_override(rdi, offsetof(struct ib_device, map_phys_fmr), diff --git a/drivers/infiniband/sw/rxe/Kconfig b/drivers/infiniband/sw/rxe/Kconfig new file mode 100644 index 000000000000..1e4e628fe7b0 --- /dev/null +++ b/drivers/infiniband/sw/rxe/Kconfig @@ -0,0 +1,24 @@ +config RDMA_RXE + tristate "Software RDMA over Ethernet (RoCE) driver" + depends on INET && PCI && INFINIBAND + depends on NET_UDP_TUNNEL + ---help--- + This driver implements the InfiniBand RDMA transport over + the Linux network stack. It enables a system with a + standard Ethernet adapter to interoperate with a RoCE + adapter or with another system running the RXE driver. + Documentation on InfiniBand and RoCE can be downloaded at + www.infinibandta.org and www.openfabrics.org. (See also + siw which is a similar software driver for iWARP.) + + The driver is split into two layers, one interfaces with the + Linux RDMA stack and implements a kernel or user space + verbs API. The user space verbs API requires a support + library named librxe which is loaded by the generic user + space verbs API, libibverbs. The other layer interfaces + with the Linux network stack at layer 3. + + To configure and work with soft-RoCE driver please use the + following wiki page under "configure Soft-RoCE (RXE)" section: + + https://github.com/SoftRoCE/rxe-dev/wiki/rxe-dev:-Home diff --git a/drivers/infiniband/sw/rxe/Makefile b/drivers/infiniband/sw/rxe/Makefile new file mode 100644 index 000000000000..3b3fb9d1c470 --- /dev/null +++ b/drivers/infiniband/sw/rxe/Makefile @@ -0,0 +1,24 @@ +obj-$(CONFIG_RDMA_RXE) += rdma_rxe.o + +rdma_rxe-y := \ + rxe.o \ + rxe_comp.o \ + rxe_req.o \ + rxe_resp.o \ + rxe_recv.o \ + rxe_pool.o \ + rxe_queue.o \ + rxe_verbs.o \ + rxe_av.o \ + rxe_srq.o \ + rxe_qp.o \ + rxe_cq.o \ + rxe_mr.o \ + rxe_dma.o \ + rxe_opcode.o \ + rxe_mmap.o \ + rxe_icrc.o \ + rxe_mcast.o \ + rxe_task.o \ + rxe_net.o \ + rxe_sysfs.o diff --git a/drivers/infiniband/sw/rxe/rxe.c b/drivers/infiniband/sw/rxe/rxe.c new file mode 100644 index 000000000000..55f0e8f0ca79 --- /dev/null +++ b/drivers/infiniband/sw/rxe/rxe.c @@ -0,0 +1,386 @@ +/* + * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. + * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "rxe.h" +#include "rxe_loc.h" + +MODULE_AUTHOR("Bob Pearson, Frank Zago, John Groves, Kamal Heib"); +MODULE_DESCRIPTION("Soft RDMA transport"); +MODULE_LICENSE("Dual BSD/GPL"); +MODULE_VERSION("0.2"); + +/* free resources for all ports on a device */ +static void rxe_cleanup_ports(struct rxe_dev *rxe) +{ + kfree(rxe->port.pkey_tbl); + rxe->port.pkey_tbl = NULL; + +} + +/* free resources for a rxe device all objects created for this device must + * have been destroyed + */ +static void rxe_cleanup(struct rxe_dev *rxe) +{ + rxe_pool_cleanup(&rxe->uc_pool); + rxe_pool_cleanup(&rxe->pd_pool); + rxe_pool_cleanup(&rxe->ah_pool); + rxe_pool_cleanup(&rxe->srq_pool); + rxe_pool_cleanup(&rxe->qp_pool); + rxe_pool_cleanup(&rxe->cq_pool); + rxe_pool_cleanup(&rxe->mr_pool); + rxe_pool_cleanup(&rxe->mw_pool); + rxe_pool_cleanup(&rxe->mc_grp_pool); + rxe_pool_cleanup(&rxe->mc_elem_pool); + + rxe_cleanup_ports(rxe); +} + +/* called when all references have been dropped */ +void rxe_release(struct kref *kref) +{ + struct rxe_dev *rxe = container_of(kref, struct rxe_dev, ref_cnt); + + rxe_cleanup(rxe); + ib_dealloc_device(&rxe->ib_dev); +} + +void rxe_dev_put(struct rxe_dev *rxe) +{ + kref_put(&rxe->ref_cnt, rxe_release); +} +EXPORT_SYMBOL_GPL(rxe_dev_put); + +/* initialize rxe device parameters */ +static int rxe_init_device_param(struct rxe_dev *rxe) +{ + rxe->max_inline_data = RXE_MAX_INLINE_DATA; + + rxe->attr.fw_ver = RXE_FW_VER; + rxe->attr.max_mr_size = RXE_MAX_MR_SIZE; + rxe->attr.page_size_cap = RXE_PAGE_SIZE_CAP; + rxe->attr.vendor_id = RXE_VENDOR_ID; + rxe->attr.vendor_part_id = RXE_VENDOR_PART_ID; + rxe->attr.hw_ver = RXE_HW_VER; + rxe->attr.max_qp = RXE_MAX_QP; + rxe->attr.max_qp_wr = RXE_MAX_QP_WR; + rxe->attr.device_cap_flags = RXE_DEVICE_CAP_FLAGS; + rxe->attr.max_sge = RXE_MAX_SGE; + rxe->attr.max_sge_rd = RXE_MAX_SGE_RD; + rxe->attr.max_cq = RXE_MAX_CQ; + rxe->attr.max_cqe = (1 << RXE_MAX_LOG_CQE) - 1; + rxe->attr.max_mr = RXE_MAX_MR; + rxe->attr.max_pd = RXE_MAX_PD; + rxe->attr.max_qp_rd_atom = RXE_MAX_QP_RD_ATOM; + rxe->attr.max_ee_rd_atom = RXE_MAX_EE_RD_ATOM; + rxe->attr.max_res_rd_atom = RXE_MAX_RES_RD_ATOM; + rxe->attr.max_qp_init_rd_atom = RXE_MAX_QP_INIT_RD_ATOM; + rxe->attr.max_ee_init_rd_atom = RXE_MAX_EE_INIT_RD_ATOM; + rxe->attr.atomic_cap = RXE_ATOMIC_CAP; + rxe->attr.max_ee = RXE_MAX_EE; + rxe->attr.max_rdd = RXE_MAX_RDD; + rxe->attr.max_mw = RXE_MAX_MW; + rxe->attr.max_raw_ipv6_qp = RXE_MAX_RAW_IPV6_QP; + rxe->attr.max_raw_ethy_qp = RXE_MAX_RAW_ETHY_QP; + rxe->attr.max_mcast_grp = RXE_MAX_MCAST_GRP; + rxe->attr.max_mcast_qp_attach = RXE_MAX_MCAST_QP_ATTACH; + rxe->attr.max_total_mcast_qp_attach = RXE_MAX_TOT_MCAST_QP_ATTACH; + rxe->attr.max_ah = RXE_MAX_AH; + rxe->attr.max_fmr = RXE_MAX_FMR; + rxe->attr.max_map_per_fmr = RXE_MAX_MAP_PER_FMR; + rxe->attr.max_srq = RXE_MAX_SRQ; + rxe->attr.max_srq_wr = RXE_MAX_SRQ_WR; + rxe->attr.max_srq_sge = RXE_MAX_SRQ_SGE; + rxe->attr.max_fast_reg_page_list_len = RXE_MAX_FMR_PAGE_LIST_LEN; + rxe->attr.max_pkeys = RXE_MAX_PKEYS; + rxe->attr.local_ca_ack_delay = RXE_LOCAL_CA_ACK_DELAY; + + rxe->max_ucontext = RXE_MAX_UCONTEXT; + + return 0; +} + +/* initialize port attributes */ +static int rxe_init_port_param(struct rxe_port *port) +{ + port->attr.state = RXE_PORT_STATE; + port->attr.max_mtu = RXE_PORT_MAX_MTU; + port->attr.active_mtu = RXE_PORT_ACTIVE_MTU; + port->attr.gid_tbl_len = RXE_PORT_GID_TBL_LEN; + port->attr.port_cap_flags = RXE_PORT_PORT_CAP_FLAGS; + port->attr.max_msg_sz = RXE_PORT_MAX_MSG_SZ; + port->attr.bad_pkey_cntr = RXE_PORT_BAD_PKEY_CNTR; + port->attr.qkey_viol_cntr = RXE_PORT_QKEY_VIOL_CNTR; + port->attr.pkey_tbl_len = RXE_PORT_PKEY_TBL_LEN; + port->attr.lid = RXE_PORT_LID; + port->attr.sm_lid = RXE_PORT_SM_LID; + port->attr.lmc = RXE_PORT_LMC; + port->attr.max_vl_num = RXE_PORT_MAX_VL_NUM; + port->attr.sm_sl = RXE_PORT_SM_SL; + port->attr.subnet_timeout = RXE_PORT_SUBNET_TIMEOUT; + port->attr.init_type_reply = RXE_PORT_INIT_TYPE_REPLY; + port->attr.active_width = RXE_PORT_ACTIVE_WIDTH; + port->attr.active_speed = RXE_PORT_ACTIVE_SPEED; + port->attr.phys_state = RXE_PORT_PHYS_STATE; + port->mtu_cap = + ib_mtu_enum_to_int(RXE_PORT_ACTIVE_MTU); + port->subnet_prefix = cpu_to_be64(RXE_PORT_SUBNET_PREFIX); + + return 0; +} + +/* initialize port state, note IB convention that HCA ports are always + * numbered from 1 + */ +static int rxe_init_ports(struct rxe_dev *rxe) +{ + struct rxe_port *port = &rxe->port; + + rxe_init_port_param(port); + + if (!port->attr.pkey_tbl_len || !port->attr.gid_tbl_len) + return -EINVAL; + + port->pkey_tbl = kcalloc(port->attr.pkey_tbl_len, + sizeof(*port->pkey_tbl), GFP_KERNEL); + + if (!port->pkey_tbl) + return -ENOMEM; + + port->pkey_tbl[0] = 0xffff; + port->port_guid = rxe->ifc_ops->port_guid(rxe); + + spin_lock_init(&port->port_lock); + + return 0; +} + +/* init pools of managed objects */ +static int rxe_init_pools(struct rxe_dev *rxe) +{ + int err; + + err = rxe_pool_init(rxe, &rxe->uc_pool, RXE_TYPE_UC, + rxe->max_ucontext); + if (err) + goto err1; + + err = rxe_pool_init(rxe, &rxe->pd_pool, RXE_TYPE_PD, + rxe->attr.max_pd); + if (err) + goto err2; + + err = rxe_pool_init(rxe, &rxe->ah_pool, RXE_TYPE_AH, + rxe->attr.max_ah); + if (err) + goto err3; + + err = rxe_pool_init(rxe, &rxe->srq_pool, RXE_TYPE_SRQ, + rxe->attr.max_srq); + if (err) + goto err4; + + err = rxe_pool_init(rxe, &rxe->qp_pool, RXE_TYPE_QP, + rxe->attr.max_qp); + if (err) + goto err5; + + err = rxe_pool_init(rxe, &rxe->cq_pool, RXE_TYPE_CQ, + rxe->attr.max_cq); + if (err) + goto err6; + + err = rxe_pool_init(rxe, &rxe->mr_pool, RXE_TYPE_MR, + rxe->attr.max_mr); + if (err) + goto err7; + + err = rxe_pool_init(rxe, &rxe->mw_pool, RXE_TYPE_MW, + rxe->attr.max_mw); + if (err) + goto err8; + + err = rxe_pool_init(rxe, &rxe->mc_grp_pool, RXE_TYPE_MC_GRP, + rxe->attr.max_mcast_grp); + if (err) + goto err9; + + err = rxe_pool_init(rxe, &rxe->mc_elem_pool, RXE_TYPE_MC_ELEM, + rxe->attr.max_total_mcast_qp_attach); + if (err) + goto err10; + + return 0; + +err10: + rxe_pool_cleanup(&rxe->mc_grp_pool); +err9: + rxe_pool_cleanup(&rxe->mw_pool); +err8: + rxe_pool_cleanup(&rxe->mr_pool); +err7: + rxe_pool_cleanup(&rxe->cq_pool); +err6: + rxe_pool_cleanup(&rxe->qp_pool); +err5: + rxe_pool_cleanup(&rxe->srq_pool); +err4: + rxe_pool_cleanup(&rxe->ah_pool); +err3: + rxe_pool_cleanup(&rxe->pd_pool); +err2: + rxe_pool_cleanup(&rxe->uc_pool); +err1: + return err; +} + +/* initialize rxe device state */ +static int rxe_init(struct rxe_dev *rxe) +{ + int err; + + /* init default device parameters */ + rxe_init_device_param(rxe); + + err = rxe_init_ports(rxe); + if (err) + goto err1; + + err = rxe_init_pools(rxe); + if (err) + goto err2; + + /* init pending mmap list */ + spin_lock_init(&rxe->mmap_offset_lock); + spin_lock_init(&rxe->pending_lock); + INIT_LIST_HEAD(&rxe->pending_mmaps); + INIT_LIST_HEAD(&rxe->list); + + mutex_init(&rxe->usdev_lock); + + return 0; + +err2: + rxe_cleanup_ports(rxe); +err1: + return err; +} + +int rxe_set_mtu(struct rxe_dev *rxe, unsigned int ndev_mtu) +{ + struct rxe_port *port = &rxe->port; + enum ib_mtu mtu; + + mtu = eth_mtu_int_to_enum(ndev_mtu); + + /* Make sure that new MTU in range */ + mtu = mtu ? min_t(enum ib_mtu, mtu, RXE_PORT_MAX_MTU) : IB_MTU_256; + + port->attr.active_mtu = mtu; + port->mtu_cap = ib_mtu_enum_to_int(mtu); + + return 0; +} +EXPORT_SYMBOL(rxe_set_mtu); + +/* called by ifc layer to create new rxe device. + * The caller should allocate memory for rxe by calling ib_alloc_device. + */ +int rxe_add(struct rxe_dev *rxe, unsigned int mtu) +{ + int err; + + kref_init(&rxe->ref_cnt); + + err = rxe_init(rxe); + if (err) + goto err1; + + err = rxe_set_mtu(rxe, mtu); + if (err) + goto err1; + + err = rxe_register_device(rxe); + if (err) + goto err1; + + return 0; + +err1: + rxe_dev_put(rxe); + return err; +} +EXPORT_SYMBOL(rxe_add); + +/* called by the ifc layer to remove a device */ +void rxe_remove(struct rxe_dev *rxe) +{ + rxe_unregister_device(rxe); + + rxe_dev_put(rxe); +} +EXPORT_SYMBOL(rxe_remove); + +static int __init rxe_module_init(void) +{ + int err; + + /* initialize slab caches for managed objects */ + err = rxe_cache_init(); + if (err) { + pr_err("rxe: unable to init object pools\n"); + return err; + } + + err = rxe_net_init(); + if (err) { + pr_err("rxe: unable to init\n"); + rxe_cache_exit(); + return err; + } + pr_info("rxe: loaded\n"); + + return 0; +} + +static void __exit rxe_module_exit(void) +{ + rxe_remove_all(); + rxe_net_exit(); + rxe_cache_exit(); + + pr_info("rxe: unloaded\n"); +} + +module_init(rxe_module_init); +module_exit(rxe_module_exit); diff --git a/drivers/infiniband/sw/rxe/rxe.h b/drivers/infiniband/sw/rxe/rxe.h new file mode 100644 index 000000000000..12c71c549f97 --- /dev/null +++ b/drivers/infiniband/sw/rxe/rxe.h @@ -0,0 +1,77 @@ +/* + * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. + * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef RXE_H +#define RXE_H + +#include <linux/module.h> +#include <linux/skbuff.h> +#include <linux/crc32.h> + +#include <rdma/ib_verbs.h> +#include <rdma/ib_user_verbs.h> +#include <rdma/ib_pack.h> +#include <rdma/ib_smi.h> +#include <rdma/ib_umem.h> +#include <rdma/ib_cache.h> +#include <rdma/ib_addr.h> + +#include "rxe_net.h" +#include "rxe_opcode.h" +#include "rxe_hdr.h" +#include "rxe_param.h" +#include "rxe_verbs.h" + +#define RXE_UVERBS_ABI_VERSION (1) + +#define IB_PHYS_STATE_LINK_UP (5) +#define IB_PHYS_STATE_LINK_DOWN (3) + +#define RXE_ROCE_V2_SPORT (0xc000) + +int rxe_set_mtu(struct rxe_dev *rxe, unsigned int dev_mtu); + +int rxe_add(struct rxe_dev *rxe, unsigned int mtu); +void rxe_remove(struct rxe_dev *rxe); +void rxe_remove_all(void); + +int rxe_rcv(struct sk_buff *skb); + +void rxe_dev_put(struct rxe_dev *rxe); +struct rxe_dev *net_to_rxe(struct net_device *ndev); +struct rxe_dev *get_rxe_by_name(const char* name); + +void rxe_port_up(struct rxe_dev *rxe); +void rxe_port_down(struct rxe_dev *rxe); + +#endif /* RXE_H */ diff --git a/drivers/infiniband/sw/rxe/rxe_av.c b/drivers/infiniband/sw/rxe/rxe_av.c new file mode 100644 index 000000000000..5c9474212d4e --- /dev/null +++ b/drivers/infiniband/sw/rxe/rxe_av.c @@ -0,0 +1,98 @@ +/* + * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. + * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "rxe.h" +#include "rxe_loc.h" + +int rxe_av_chk_attr(struct rxe_dev *rxe, struct ib_ah_attr *attr) +{ + struct rxe_port *port; + + if (attr->port_num != 1) { + pr_info("rxe: invalid port_num = %d\n", attr->port_num); + return -EINVAL; + } + + port = &rxe->port; + + if (attr->ah_flags & IB_AH_GRH) { + if (attr->grh.sgid_index > port->attr.gid_tbl_len) { + pr_info("rxe: invalid sgid index = %d\n", + attr->grh.sgid_index); + return -EINVAL; + } + } + + return 0; +} + +int rxe_av_from_attr(struct rxe_dev *rxe, u8 port_num, + struct rxe_av *av, struct ib_ah_attr *attr) +{ + memset(av, 0, sizeof(*av)); + memcpy(&av->grh, &attr->grh, sizeof(attr->grh)); + av->port_num = port_num; + return 0; +} + +int rxe_av_to_attr(struct rxe_dev *rxe, struct rxe_av *av, + struct ib_ah_attr *attr) +{ + memcpy(&attr->grh, &av->grh, sizeof(av->grh)); + attr->port_num = av->port_num; + return 0; +} + +int rxe_av_fill_ip_info(struct rxe_dev *rxe, + struct rxe_av *av, + struct ib_ah_attr *attr, + struct ib_gid_attr *sgid_attr, + union ib_gid *sgid) +{ + rdma_gid2ip(&av->sgid_addr._sockaddr, sgid); + rdma_gid2ip(&av->dgid_addr._sockaddr, &attr->grh.dgid); + av->network_type = ib_gid_to_network_type(sgid_attr->gid_type, sgid); + + return 0; +} + +struct rxe_av *rxe_get_av(struct rxe_pkt_info *pkt) +{ + if (!pkt || !pkt->qp) + return NULL; + + if (qp_type(pkt->qp) == IB_QPT_RC || qp_type(pkt->qp) == IB_QPT_UC) + return &pkt->qp->pri_av; + + return (pkt->wqe) ? &pkt->wqe->av : NULL; +} diff --git a/drivers/infiniband/sw/rxe/rxe_comp.c b/drivers/infiniband/sw/rxe/rxe_comp.c new file mode 100644 index 000000000000..36f67de44095 --- /dev/null +++ b/drivers/infiniband/sw/rxe/rxe_comp.c @@ -0,0 +1,734 @@ +/* + * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. + * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/skbuff.h> + +#include "rxe.h" +#include "rxe_loc.h" +#include "rxe_queue.h" +#include "rxe_task.h" + +enum comp_state { + COMPST_GET_ACK, + COMPST_GET_WQE, + COMPST_COMP_WQE, + COMPST_COMP_ACK, + COMPST_CHECK_PSN, + COMPST_CHECK_ACK, + COMPST_READ, + COMPST_ATOMIC, + COMPST_WRITE_SEND, + COMPST_UPDATE_COMP, + COMPST_ERROR_RETRY, + COMPST_RNR_RETRY, + COMPST_ERROR, + COMPST_EXIT, /* We have an issue, and we want to rerun the completer */ + COMPST_DONE, /* The completer finished successflly */ +}; + +static char *comp_state_name[] = { + [COMPST_GET_ACK] = "GET ACK", + [COMPST_GET_WQE] = "GET WQE", + [COMPST_COMP_WQE] = "COMP WQE", + [COMPST_COMP_ACK] = "COMP ACK", + [COMPST_CHECK_PSN] = "CHECK PSN", + [COMPST_CHECK_ACK] = "CHECK ACK", + [COMPST_READ] = "READ", + [COMPST_ATOMIC] = "ATOMIC", + [COMPST_WRITE_SEND] = "WRITE/SEND", + [COMPST_UPDATE_COMP] = "UPDATE COMP", + [COMPST_ERROR_RETRY] = "ERROR RETRY", + [COMPST_RNR_RETRY] = "RNR RETRY", + [COMPST_ERROR] = "ERROR", + [COMPST_EXIT] = "EXIT", + [COMPST_DONE] = "DONE", +}; + +static unsigned long rnrnak_usec[32] = { + [IB_RNR_TIMER_655_36] = 655360, + [IB_RNR_TIMER_000_01] = 10, + [IB_RNR_TIMER_000_02] = 20, + [IB_RNR_TIMER_000_03] = 30, + [IB_RNR_TIMER_000_04] = 40, + [IB_RNR_TIMER_000_06] = 60, + [IB_RNR_TIMER_000_08] = 80, + [IB_RNR_TIMER_000_12] = 120, + [IB_RNR_TIMER_000_16] = 160, + [IB_RNR_TIMER_000_24] = 240, + [IB_RNR_TIMER_000_32] = 320, + [IB_RNR_TIMER_000_48] = 480, + [IB_RNR_TIMER_000_64] = 640, + [IB_RNR_TIMER_000_96] = 960, + [IB_RNR_TIMER_001_28] = 1280, + [IB_RNR_TIMER_001_92] = 1920, + [IB_RNR_TIMER_002_56] = 2560, + [IB_RNR_TIMER_003_84] = 3840, + [IB_RNR_TIMER_005_12] = 5120, + [IB_RNR_TIMER_007_68] = 7680, + [IB_RNR_TIMER_010_24] = 10240, + [IB_RNR_TIMER_015_36] = 15360, + [IB_RNR_TIMER_020_48] = 20480, + [IB_RNR_TIMER_030_72] = 30720, + [IB_RNR_TIMER_040_96] = 40960, + [IB_RNR_TIMER_061_44] = 61410, + [IB_RNR_TIMER_081_92] = 81920, + [IB_RNR_TIMER_122_88] = 122880, + [IB_RNR_TIMER_163_84] = 163840, + [IB_RNR_TIMER_245_76] = 245760, + [IB_RNR_TIMER_327_68] = 327680, + [IB_RNR_TIMER_491_52] = 491520, +}; + +static inline unsigned long rnrnak_jiffies(u8 timeout) +{ + return max_t(unsigned long, + usecs_to_jiffies(rnrnak_usec[timeout]), 1); +} + +static enum ib_wc_opcode wr_to_wc_opcode(enum ib_wr_opcode opcode) +{ + switch (opcode) { + case IB_WR_RDMA_WRITE: return IB_WC_RDMA_WRITE; + case IB_WR_RDMA_WRITE_WITH_IMM: return IB_WC_RDMA_WRITE; + case IB_WR_SEND: return IB_WC_SEND; + case IB_WR_SEND_WITH_IMM: return IB_WC_SEND; + case IB_WR_RDMA_READ: return IB_WC_RDMA_READ; + case IB_WR_ATOMIC_CMP_AND_SWP: return IB_WC_COMP_SWAP; + case IB_WR_ATOMIC_FETCH_AND_ADD: return IB_WC_FETCH_ADD; + case IB_WR_LSO: return IB_WC_LSO; + case IB_WR_SEND_WITH_INV: return IB_WC_SEND; + case IB_WR_RDMA_READ_WITH_INV: return IB_WC_RDMA_READ; + case IB_WR_LOCAL_INV: return IB_WC_LOCAL_INV; + case IB_WR_REG_MR: return IB_WC_REG_MR; + + default: + return 0xff; + } +} + +void retransmit_timer(unsigned long data) +{ + struct rxe_qp *qp = (struct rxe_qp *)data; + + if (qp->valid) { + qp->comp.timeout = 1; + rxe_run_task(&qp->comp.task, 1); + } +} + +void rxe_comp_queue_pkt(struct rxe_dev *rxe, struct rxe_qp *qp, + struct sk_buff *skb) +{ + int must_sched; + + skb_queue_tail(&qp->resp_pkts, skb); + + must_sched = skb_queue_len(&qp->resp_pkts) > 1; + rxe_run_task(&qp->comp.task, must_sched); +} + +static inline enum comp_state get_wqe(struct rxe_qp *qp, + struct rxe_pkt_info *pkt, + struct rxe_send_wqe **wqe_p) +{ + struct rxe_send_wqe *wqe; + + /* we come here whether or not we found a response packet to see if + * there are any posted WQEs + */ + wqe = queue_head(qp->sq.queue); + *wqe_p = wqe; + + /* no WQE or requester has not started it yet */ + if (!wqe || wqe->state == wqe_state_posted) + return pkt ? COMPST_DONE : COMPST_EXIT; + + /* WQE does not require an ack */ + if (wqe->state == wqe_state_done) + return COMPST_COMP_WQE; + + /* WQE caused an error */ + if (wqe->state == wqe_state_error) + return COMPST_ERROR; + + /* we have a WQE, if we also have an ack check its PSN */ + return pkt ? COMPST_CHECK_PSN : COMPST_EXIT; +} + +static inline void reset_retry_counters(struct rxe_qp *qp) +{ + qp->comp.retry_cnt = qp->attr.retry_cnt; + qp->comp.rnr_retry = qp->attr.rnr_retry; +} + +static inline enum comp_state check_psn(struct rxe_qp *qp, + struct rxe_pkt_info *pkt, + struct rxe_send_wqe *wqe) +{ + s32 diff; + + /* check to see if response is past the oldest WQE. if it is, complete + * send/write or error read/atomic + */ + diff = psn_compare(pkt->psn, wqe->last_psn); + if (diff > 0) { + if (wqe->state == wqe_state_pending) { + if (wqe->mask & WR_ATOMIC_OR_READ_MASK) + return COMPST_ERROR_RETRY; + + reset_retry_counters(qp); + return COMPST_COMP_WQE; + } else { + return COMPST_DONE; + } + } + + /* compare response packet to expected response */ + diff = psn_compare(pkt->psn, qp->comp.psn); + if (diff < 0) { + /* response is most likely a retried packet if it matches an + * uncompleted WQE go complete it else ignore it + */ + if (pkt->psn == wqe->last_psn) + return COMPST_COMP_ACK; + else + return COMPST_DONE; + } else if ((diff > 0) && (wqe->mask & WR_ATOMIC_OR_READ_MASK)) { + return COMPST_ERROR_RETRY; + } else { + return COMPST_CHECK_ACK; + } +} + +static inline enum comp_state check_ack(struct rxe_qp *qp, + struct rxe_pkt_info *pkt, + struct rxe_send_wqe *wqe) +{ + unsigned int mask = pkt->mask; + u8 syn; + + /* Check the sequence only */ + switch (qp->comp.opcode) { + case -1: + /* Will catch all *_ONLY cases. */ + if (!(mask & RXE_START_MASK)) + return COMPST_ERROR; + + break; + + case IB_OPCODE_RC_RDMA_READ_RESPONSE_FIRST: + case IB_OPCODE_RC_RDMA_READ_RESPONSE_MIDDLE: + if (pkt->opcode != IB_OPCODE_RC_RDMA_READ_RESPONSE_MIDDLE && + pkt->opcode != IB_OPCODE_RC_RDMA_READ_RESPONSE_LAST) { + return COMPST_ERROR; + } + break; + default: + WARN_ON(1); + } + + /* Check operation validity. */ + switch (pkt->opcode) { + case IB_OPCODE_RC_RDMA_READ_RESPONSE_FIRST: + case IB_OPCODE_RC_RDMA_READ_RESPONSE_LAST: + case IB_OPCODE_RC_RDMA_READ_RESPONSE_ONLY: + syn = aeth_syn(pkt); + + if ((syn & AETH_TYPE_MASK) != AETH_ACK) + return COMPST_ERROR; + + /* Fall through (IB_OPCODE_RC_RDMA_READ_RESPONSE_MIDDLE + * doesn't have an AETH) + */ + case IB_OPCODE_RC_RDMA_READ_RESPONSE_MIDDLE: + if (wqe->wr.opcode != IB_WR_RDMA_READ && + wqe->wr.opcode != IB_WR_RDMA_READ_WITH_INV) { + return COMPST_ERROR; + } + reset_retry_counters(qp); + return COMPST_READ; + + case IB_OPCODE_RC_ATOMIC_ACKNOWLEDGE: + syn = aeth_syn(pkt); + + if ((syn & AETH_TYPE_MASK) != AETH_ACK) + return COMPST_ERROR; + + if (wqe->wr.opcode != IB_WR_ATOMIC_CMP_AND_SWP && + wqe->wr.opcode != IB_WR_ATOMIC_FETCH_AND_ADD) + return COMPST_ERROR; + reset_retry_counters(qp); + return COMPST_ATOMIC; + + case IB_OPCODE_RC_ACKNOWLEDGE: + syn = aeth_syn(pkt); + switch (syn & AETH_TYPE_MASK) { + case AETH_ACK: + reset_retry_counters(qp); + return COMPST_WRITE_SEND; + + case AETH_RNR_NAK: + return COMPST_RNR_RETRY; + + case AETH_NAK: + switch (syn) { + case AETH_NAK_PSN_SEQ_ERROR: + /* a nak implicitly acks all packets with psns + * before + */ + if (psn_compare(pkt->psn, qp->comp.psn) > 0) { + qp->comp.psn = pkt->psn; + if (qp->req.wait_psn) { + qp->req.wait_psn = 0; + rxe_run_task(&qp->req.task, 1); + } + } + return COMPST_ERROR_RETRY; + + case AETH_NAK_INVALID_REQ: + wqe->status = IB_WC_REM_INV_REQ_ERR; + return COMPST_ERROR; + + case AETH_NAK_REM_ACC_ERR: + wqe->status = IB_WC_REM_ACCESS_ERR; + return COMPST_ERROR; + + case AETH_NAK_REM_OP_ERR: + wqe->status = IB_WC_REM_OP_ERR; + return COMPST_ERROR; + + default: + pr_warn("unexpected nak %x\n", syn); + wqe->status = IB_WC_REM_OP_ERR; + return COMPST_ERROR; + } + + default: + return COMPST_ERROR; + } + break; + + default: + pr_warn("unexpected opcode\n"); + } + + return COMPST_ERROR; +} + +static inline enum comp_state do_read(struct rxe_qp *qp, + struct rxe_pkt_info *pkt, + struct rxe_send_wqe *wqe) +{ + struct rxe_dev *rxe = to_rdev(qp->ibqp.device); + int ret; + + ret = copy_data(rxe, qp->pd, IB_ACCESS_LOCAL_WRITE, + &wqe->dma, payload_addr(pkt), + payload_size(pkt), to_mem_obj, NULL); + if (ret) + return COMPST_ERROR; + + if (wqe->dma.resid == 0 && (pkt->mask & RXE_END_MASK)) + return COMPST_COMP_ACK; + else + return COMPST_UPDATE_COMP; +} + +static inline enum comp_state do_atomic(struct rxe_qp *qp, + struct rxe_pkt_info *pkt, + struct rxe_send_wqe *wqe) +{ + struct rxe_dev *rxe = to_rdev(qp->ibqp.device); + int ret; + + u64 atomic_orig = atmack_orig(pkt); + + ret = copy_data(rxe, qp->pd, IB_ACCESS_LOCAL_WRITE, + &wqe->dma, &atomic_orig, + sizeof(u64), to_mem_obj, NULL); + if (ret) + return COMPST_ERROR; + else + return COMPST_COMP_ACK; +} + +static void make_send_cqe(struct rxe_qp *qp, struct rxe_send_wqe *wqe, + struct rxe_cqe *cqe) +{ + memset(cqe, 0, sizeof(*cqe)); + + if (!qp->is_user) { + struct ib_wc *wc = &cqe->ibwc; + + wc->wr_id = wqe->wr.wr_id; + wc->status = wqe->status; + wc->opcode = wr_to_wc_opcode(wqe->wr.opcode); + if (wqe->wr.opcode == IB_WR_RDMA_WRITE_WITH_IMM || + wqe->wr.opcode == IB_WR_SEND_WITH_IMM) + wc->wc_flags = IB_WC_WITH_IMM; + wc->byte_len = wqe->dma.length; + wc->qp = &qp->ibqp; + } else { + struct ib_uverbs_wc *uwc = &cqe->uibwc; + + uwc->wr_id = wqe->wr.wr_id; + uwc->status = wqe->status; + uwc->opcode = wr_to_wc_opcode(wqe->wr.opcode); + if (wqe->wr.opcode == IB_WR_RDMA_WRITE_WITH_IMM || + wqe->wr.opcode == IB_WR_SEND_WITH_IMM) + uwc->wc_flags = IB_WC_WITH_IMM; + uwc->byte_len = wqe->dma.length; + uwc->qp_num = qp->ibqp.qp_num; + } +} + +static void do_complete(struct rxe_qp *qp, struct rxe_send_wqe *wqe) +{ + struct rxe_cqe cqe; + + if ((qp->sq_sig_type == IB_SIGNAL_ALL_WR) || + (wqe->wr.send_flags & IB_SEND_SIGNALED) || + (qp->req.state == QP_STATE_ERROR)) { + make_send_cqe(qp, wqe, &cqe); + rxe_cq_post(qp->scq, &cqe, 0); + } + + advance_consumer(qp->sq.queue); + + /* + * we completed something so let req run again + * if it is trying to fence + */ + if (qp->req.wait_fence) { + qp->req.wait_fence = 0; + rxe_run_task(&qp->req.task, 1); + } +} + +static inline enum comp_state complete_ack(struct rxe_qp *qp, + struct rxe_pkt_info *pkt, + struct rxe_send_wqe *wqe) +{ + unsigned long flags; + + if (wqe->has_rd_atomic) { + wqe->has_rd_atomic = 0; + atomic_inc(&qp->req.rd_atomic); + if (qp->req.need_rd_atomic) { + qp->comp.timeout_retry = 0; + qp->req.need_rd_atomic = 0; + rxe_run_task(&qp->req.task, 1); + } + } + + if (unlikely(qp->req.state == QP_STATE_DRAIN)) { + /* state_lock used by requester & completer */ + spin_lock_irqsave(&qp->state_lock, flags); + if ((qp->req.state == QP_STATE_DRAIN) && + (qp->comp.psn == qp->req.psn)) { + qp->req.state = QP_STATE_DRAINED; + spin_unlock_irqrestore(&qp->state_lock, flags); + + if (qp->ibqp.event_handler) { + struct ib_event ev; + + ev.device = qp->ibqp.device; + ev.element.qp = &qp->ibqp; + ev.event = IB_EVENT_SQ_DRAINED; + qp->ibqp.event_handler(&ev, + qp->ibqp.qp_context); + } + } else { + spin_unlock_irqrestore(&qp->state_lock, flags); + } + } + + do_complete(qp, wqe); + + if (psn_compare(pkt->psn, qp->comp.psn) >= 0) + return COMPST_UPDATE_COMP; + else + return COMPST_DONE; +} + +static inline enum comp_state complete_wqe(struct rxe_qp *qp, + struct rxe_pkt_info *pkt, + struct rxe_send_wqe *wqe) +{ + qp->comp.opcode = -1; + + if (pkt) { + if (psn_compare(pkt->psn, qp->comp.psn) >= 0) + qp->comp.psn = (pkt->psn + 1) & BTH_PSN_MASK; + + if (qp->req.wait_psn) { + qp->req.wait_psn = 0; + rxe_run_task(&qp->req.task, 1); + } + } + + do_complete(qp, wqe); + + return COMPST_GET_WQE; +} + +int rxe_completer(void *arg) +{ + struct rxe_qp *qp = (struct rxe_qp *)arg; + struct rxe_send_wqe *wqe = wqe; + struct sk_buff *skb = NULL; + struct rxe_pkt_info *pkt = NULL; + enum comp_state state; + + if (!qp->valid) { + while ((skb = skb_dequeue(&qp->resp_pkts))) { + rxe_drop_ref(qp); + kfree_skb(skb); + } + skb = NULL; + pkt = NULL; + + while (queue_head(qp->sq.queue)) + advance_consumer(qp->sq.queue); + + goto exit; + } + + if (qp->req.state == QP_STATE_ERROR) { + while ((skb = skb_dequeue(&qp->resp_pkts))) { + rxe_drop_ref(qp); + kfree_skb(skb); + } + skb = NULL; + pkt = NULL; + + while ((wqe = queue_head(qp->sq.queue))) { + wqe->status = IB_WC_WR_FLUSH_ERR; + do_complete(qp, wqe); + } + + goto exit; + } + + if (qp->req.state == QP_STATE_RESET) { + while ((skb = skb_dequeue(&qp->resp_pkts))) { + rxe_drop_ref(qp); + kfree_skb(skb); + } + skb = NULL; + pkt = NULL; + + while (queue_head(qp->sq.queue)) + advance_consumer(qp->sq.queue); + + goto exit; + } + + if (qp->comp.timeout) { + qp->comp.timeout_retry = 1; + qp->comp.timeout = 0; + } else { + qp->comp.timeout_retry = 0; + } + + if (qp->req.need_retry) + goto exit; + + state = COMPST_GET_ACK; + + while (1) { + pr_debug("state = %s\n", comp_state_name[state]); + switch (state) { + case COMPST_GET_ACK: + skb = skb_dequeue(&qp->resp_pkts); + if (skb) { + pkt = SKB_TO_PKT(skb); + qp->comp.timeout_retry = 0; + } + state = COMPST_GET_WQE; + break; + + case COMPST_GET_WQE: + state = get_wqe(qp, pkt, &wqe); + break; + + case COMPST_CHECK_PSN: + state = check_psn(qp, pkt, wqe); + break; + + case COMPST_CHECK_ACK: + state = check_ack(qp, pkt, wqe); + break; + + case COMPST_READ: + state = do_read(qp, pkt, wqe); + break; + + case COMPST_ATOMIC: + state = do_atomic(qp, pkt, wqe); + break; + + case COMPST_WRITE_SEND: + if (wqe->state == wqe_state_pending && + wqe->last_psn == pkt->psn) + state = COMPST_COMP_ACK; + else + state = COMPST_UPDATE_COMP; + break; + + case COMPST_COMP_ACK: + state = complete_ack(qp, pkt, wqe); + break; + + case COMPST_COMP_WQE: + state = complete_wqe(qp, pkt, wqe); + break; + + case COMPST_UPDATE_COMP: + if (pkt->mask & RXE_END_MASK) + qp->comp.opcode = -1; + else + qp->comp.opcode = pkt->opcode; + + if (psn_compare(pkt->psn, qp->comp.psn) >= 0) + qp->comp.psn = (pkt->psn + 1) & BTH_PSN_MASK; + + if (qp->req.wait_psn) { + qp->req.wait_psn = 0; + rxe_run_task(&qp->req.task, 1); + } + + state = COMPST_DONE; + break; + + case COMPST_DONE: + if (pkt) { + rxe_drop_ref(pkt->qp); + kfree_skb(skb); + } + goto done; + + case COMPST_EXIT: + if (qp->comp.timeout_retry && wqe) { + state = COMPST_ERROR_RETRY; + break; + } + + /* re reset the timeout counter if + * (1) QP is type RC + * (2) the QP is alive + * (3) there is a packet sent by the requester that + * might be acked (we still might get spurious + * timeouts but try to keep them as few as possible) + * (4) the timeout parameter is set + */ + if ((qp_type(qp) == IB_QPT_RC) && + (qp->req.state == QP_STATE_READY) && + (psn_compare(qp->req.psn, qp->comp.psn) > 0) && + qp->qp_timeout_jiffies) + mod_timer(&qp->retrans_timer, + jiffies + qp->qp_timeout_jiffies); + goto exit; + + case COMPST_ERROR_RETRY: + /* we come here if the retry timer fired and we did + * not receive a response packet. try to retry the send + * queue if that makes sense and the limits have not + * been exceeded. remember that some timeouts are + * spurious since we do not reset the timer but kick + * it down the road or let it expire + */ + + /* there is nothing to retry in this case */ + if (!wqe || (wqe->state == wqe_state_posted)) + goto exit; + + if (qp->comp.retry_cnt > 0) { + if (qp->comp.retry_cnt != 7) + qp->comp.retry_cnt--; + + /* no point in retrying if we have already + * seen the last ack that the requester could + * have caused + */ + if (psn_compare(qp->req.psn, + qp->comp.psn) > 0) { + /* tell the requester to retry the + * send send queue next time around + */ + qp->req.need_retry = 1; + rxe_run_task(&qp->req.task, 1); + } + goto exit; + } else { + wqe->status = IB_WC_RETRY_EXC_ERR; + state = COMPST_ERROR; + } + break; + + case COMPST_RNR_RETRY: + if (qp->comp.rnr_retry > 0) { + if (qp->comp.rnr_retry != 7) + qp->comp.rnr_retry--; + + qp->req.need_retry = 1; + pr_debug("set rnr nak timer\n"); + mod_timer(&qp->rnr_nak_timer, + jiffies + rnrnak_jiffies(aeth_syn(pkt) + & ~AETH_TYPE_MASK)); + goto exit; + } else { + wqe->status = IB_WC_RNR_RETRY_EXC_ERR; + state = COMPST_ERROR; + } + break; + + case COMPST_ERROR: + do_complete(qp, wqe); + rxe_qp_error(qp); + goto exit; + } + } + +exit: + /* we come here if we are done with processing and want the task to + * exit from the loop calling us + */ + return -EAGAIN; + +done: + /* we come here if we have processed a packet we want the task to call + * us again to see if there is anything else to do + */ + return 0; +} diff --git a/drivers/infiniband/sw/rxe/rxe_cq.c b/drivers/infiniband/sw/rxe/rxe_cq.c new file mode 100644 index 000000000000..e5e6a5e7dee9 --- /dev/null +++ b/drivers/infiniband/sw/rxe/rxe_cq.c @@ -0,0 +1,165 @@ +/* + * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. + * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "rxe.h" +#include "rxe_loc.h" +#include "rxe_queue.h" + +int rxe_cq_chk_attr(struct rxe_dev *rxe, struct rxe_cq *cq, + int cqe, int comp_vector, struct ib_udata *udata) +{ + int count; + + if (cqe <= 0) { + pr_warn("cqe(%d) <= 0\n", cqe); + goto err1; + } + + if (cqe > rxe->attr.max_cqe) { + pr_warn("cqe(%d) > max_cqe(%d)\n", + cqe, rxe->attr.max_cqe); + goto err1; + } + + if (cq) { + count = queue_count(cq->queue); + if (cqe < count) { + pr_warn("cqe(%d) < current # elements in queue (%d)", + cqe, count); + goto err1; + } + } + + return 0; + +err1: + return -EINVAL; +} + +static void rxe_send_complete(unsigned long data) +{ + struct rxe_cq *cq = (struct rxe_cq *)data; + + cq->ibcq.comp_handler(&cq->ibcq, cq->ibcq.cq_context); +} + +int rxe_cq_from_init(struct rxe_dev *rxe, struct rxe_cq *cq, int cqe, + int comp_vector, struct ib_ucontext *context, + struct ib_udata *udata) +{ + int err; + + cq->queue = rxe_queue_init(rxe, &cqe, + sizeof(struct rxe_cqe)); + if (!cq->queue) { + pr_warn("unable to create cq\n"); + return -ENOMEM; + } + + err = do_mmap_info(rxe, udata, false, context, cq->queue->buf, + cq->queue->buf_size, &cq->queue->ip); + if (err) { + kvfree(cq->queue->buf); + kfree(cq->queue); + return err; + } + + if (udata) + cq->is_user = 1; + + tasklet_init(&cq->comp_task, rxe_send_complete, (unsigned long)cq); + + spin_lock_init(&cq->cq_lock); + cq->ibcq.cqe = cqe; + return 0; +} + +int rxe_cq_resize_queue(struct rxe_cq *cq, int cqe, struct ib_udata *udata) +{ + int err; + + err = rxe_queue_resize(cq->queue, (unsigned int *)&cqe, + sizeof(struct rxe_cqe), + cq->queue->ip ? cq->queue->ip->context : NULL, + udata, NULL, &cq->cq_lock); + if (!err) + cq->ibcq.cqe = cqe; + + return err; +} + +int rxe_cq_post(struct rxe_cq *cq, struct rxe_cqe *cqe, int solicited) +{ + struct ib_event ev; + unsigned long flags; + + spin_lock_irqsave(&cq->cq_lock, flags); + + if (unlikely(queue_full(cq->queue))) { + spin_unlock_irqrestore(&cq->cq_lock, flags); + if (cq->ibcq.event_handler) { + ev.device = cq->ibcq.device; + ev.element.cq = &cq->ibcq; + ev.event = IB_EVENT_CQ_ERR; + cq->ibcq.event_handler(&ev, cq->ibcq.cq_context); + } + + return -EBUSY; + } + + memcpy(producer_addr(cq->queue), cqe, sizeof(*cqe)); + + /* make sure all changes to the CQ are written before we update the + * producer pointer + */ + smp_wmb(); + + advance_producer(cq->queue); + spin_unlock_irqrestore(&cq->cq_lock, flags); + + if ((cq->notify == IB_CQ_NEXT_COMP) || + (cq->notify == IB_CQ_SOLICITED && solicited)) { + cq->notify = 0; + tasklet_schedule(&cq->comp_task); + } + + return 0; +} + +void rxe_cq_cleanup(void *arg) +{ + struct rxe_cq *cq = arg; + + if (cq->queue) + rxe_queue_cleanup(cq->queue); +} diff --git a/drivers/infiniband/sw/rxe/rxe_dma.c b/drivers/infiniband/sw/rxe/rxe_dma.c new file mode 100644 index 000000000000..7634c1a81b2b --- /dev/null +++ b/drivers/infiniband/sw/rxe/rxe_dma.c @@ -0,0 +1,166 @@ +/* + * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. + * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "rxe.h" +#include "rxe_loc.h" + +#define DMA_BAD_ADDER ((u64)0) + +static int rxe_mapping_error(struct ib_device *dev, u64 dma_addr) +{ + return dma_addr == DMA_BAD_ADDER; +} + +static u64 rxe_dma_map_single(struct ib_device *dev, + void *cpu_addr, size_t size, + enum dma_data_direction direction) +{ + WARN_ON(!valid_dma_direction(direction)); + return (uintptr_t)cpu_addr; +} + +static void rxe_dma_unmap_single(struct ib_device *dev, + u64 addr, size_t size, + enum dma_data_direction direction) +{ + WARN_ON(!valid_dma_direction(direction)); +} + +static u64 rxe_dma_map_page(struct ib_device *dev, + struct page *page, + unsigned long offset, + size_t size, enum dma_data_direction direction) +{ + u64 addr; + + WARN_ON(!valid_dma_direction(direction)); + + if (offset + size > PAGE_SIZE) { + addr = DMA_BAD_ADDER; + goto done; + } + + addr = (uintptr_t)page_address(page); + if (addr) + addr += offset; + +done: + return addr; +} + +static void rxe_dma_unmap_page(struct ib_device *dev, + u64 addr, size_t size, + enum dma_data_direction direction) +{ + WARN_ON(!valid_dma_direction(direction)); +} + +static int rxe_map_sg(struct ib_device *dev, struct scatterlist *sgl, + int nents, enum dma_data_direction direction) +{ + struct scatterlist *sg; + u64 addr; + int i; + int ret = nents; + + WARN_ON(!valid_dma_direction(direction)); + + for_each_sg(sgl, sg, nents, i) { + addr = (uintptr_t)page_address(sg_page(sg)); + if (!addr) { + ret = 0; + break; + } + sg->dma_address = addr + sg->offset; +#ifdef CONFIG_NEED_SG_DMA_LENGTH + sg->dma_length = sg->length; +#endif + } + + return ret; +} + +static void rxe_unmap_sg(struct ib_device *dev, + struct scatterlist *sg, int nents, + enum dma_data_direction direction) +{ + WARN_ON(!valid_dma_direction(direction)); +} + +static void rxe_sync_single_for_cpu(struct ib_device *dev, + u64 addr, + size_t size, enum dma_data_direction dir) +{ +} + +static void rxe_sync_single_for_device(struct ib_device *dev, + u64 addr, + size_t size, enum dma_data_direction dir) +{ +} + +static void *rxe_dma_alloc_coherent(struct ib_device *dev, size_t size, + u64 *dma_handle, gfp_t flag) +{ + struct page *p; + void *addr = NULL; + + p = alloc_pages(flag, get_order(size)); + if (p) + addr = page_address(p); + + if (dma_handle) + *dma_handle = (uintptr_t)addr; + + return addr; +} + +static void rxe_dma_free_coherent(struct ib_device *dev, size_t size, + void *cpu_addr, u64 dma_handle) +{ + free_pages((unsigned long)cpu_addr, get_order(size)); +} + +struct ib_dma_mapping_ops rxe_dma_mapping_ops = { + .mapping_error = rxe_mapping_error, + .map_single = rxe_dma_map_single, + .unmap_single = rxe_dma_unmap_single, + .map_page = rxe_dma_map_page, + .unmap_page = rxe_dma_unmap_page, + .map_sg = rxe_map_sg, + .unmap_sg = rxe_unmap_sg, + .sync_single_for_cpu = rxe_sync_single_for_cpu, + .sync_single_for_device = rxe_sync_single_for_device, + .alloc_coherent = rxe_dma_alloc_coherent, + .free_coherent = rxe_dma_free_coherent +}; diff --git a/drivers/infiniband/sw/rxe/rxe_hdr.h b/drivers/infiniband/sw/rxe/rxe_hdr.h new file mode 100644 index 000000000000..d57b5e956ceb --- /dev/null +++ b/drivers/infiniband/sw/rxe/rxe_hdr.h @@ -0,0 +1,952 @@ +/* + * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. + * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef RXE_HDR_H +#define RXE_HDR_H + +/* extracted information about a packet carried in an sk_buff struct fits in + * the skbuff cb array. Must be at most 48 bytes. stored in control block of + * sk_buff for received packets. + */ +struct rxe_pkt_info { + struct rxe_dev *rxe; /* device that owns packet */ + struct rxe_qp *qp; /* qp that owns packet */ + struct rxe_send_wqe *wqe; /* send wqe */ + u8 *hdr; /* points to bth */ + u32 mask; /* useful info about pkt */ + u32 psn; /* bth psn of packet */ + u16 pkey_index; /* partition of pkt */ + u16 paylen; /* length of bth - icrc */ + u8 port_num; /* port pkt received on */ + u8 opcode; /* bth opcode of packet */ + u8 offset; /* bth offset from pkt->hdr */ +}; + +/* Macros should be used only for received skb */ +#define SKB_TO_PKT(skb) ((struct rxe_pkt_info *)(skb)->cb) +#define PKT_TO_SKB(pkt) container_of((void *)(pkt), struct sk_buff, cb) + +/* + * IBA header types and methods + * + * Some of these are for reference and completeness only since + * rxe does not currently support RD transport + * most of this could be moved into IB core. ib_pack.h has + * part of this but is incomplete + * + * Header specific routines to insert/extract values to/from headers + * the routines that are named __hhh_(set_)fff() take a pointer to a + * hhh header and get(set) the fff field. The routines named + * hhh_(set_)fff take a packet info struct and find the + * header and field based on the opcode in the packet. + * Conversion to/from network byte order from cpu order is also done. + */ + +#define RXE_ICRC_SIZE (4) +#define RXE_MAX_HDR_LENGTH (80) + +/****************************************************************************** + * Base Transport Header + ******************************************************************************/ +struct rxe_bth { + u8 opcode; + u8 flags; + __be16 pkey; + __be32 qpn; + __be32 apsn; +}; + +#define BTH_TVER (0) +#define BTH_DEF_PKEY (0xffff) + +#define BTH_SE_MASK (0x80) +#define BTH_MIG_MASK (0x40) +#define BTH_PAD_MASK (0x30) +#define BTH_TVER_MASK (0x0f) +#define BTH_FECN_MASK (0x80000000) +#define BTH_BECN_MASK (0x40000000) +#define BTH_RESV6A_MASK (0x3f000000) +#define BTH_QPN_MASK (0x00ffffff) +#define BTH_ACK_MASK (0x80000000) +#define BTH_RESV7_MASK (0x7f000000) +#define BTH_PSN_MASK (0x00ffffff) + +static inline u8 __bth_opcode(void *arg) +{ + struct rxe_bth *bth = arg; + + return bth->opcode; +} + +static inline void __bth_set_opcode(void *arg, u8 opcode) +{ + struct rxe_bth *bth = arg; + + bth->opcode = opcode; +} + +static inline u8 __bth_se(void *arg) +{ + struct rxe_bth *bth = arg; + + return 0 != (BTH_SE_MASK & bth->flags); +} + +static inline void __bth_set_se(void *arg, int se) +{ + struct rxe_bth *bth = arg; + + if (se) + bth->flags |= BTH_SE_MASK; + else + bth->flags &= ~BTH_SE_MASK; +} + +static inline u8 __bth_mig(void *arg) +{ + struct rxe_bth *bth = arg; + + return 0 != (BTH_MIG_MASK & bth->flags); +} + +static inline void __bth_set_mig(void *arg, u8 mig) +{ + struct rxe_bth *bth = arg; + + if (mig) + bth->flags |= BTH_MIG_MASK; + else + bth->flags &= ~BTH_MIG_MASK; +} + +static inline u8 __bth_pad(void *arg) +{ + struct rxe_bth *bth = arg; + + return (BTH_PAD_MASK & bth->flags) >> 4; +} + +static inline void __bth_set_pad(void *arg, u8 pad) +{ + struct rxe_bth *bth = arg; + + bth->flags = (BTH_PAD_MASK & (pad << 4)) | + (~BTH_PAD_MASK & bth->flags); +} + +static inline u8 __bth_tver(void *arg) +{ + struct rxe_bth *bth = arg; + + return BTH_TVER_MASK & bth->flags; +} + +static inline void __bth_set_tver(void *arg, u8 tver) +{ + struct rxe_bth *bth = arg; + + bth->flags = (BTH_TVER_MASK & tver) | + (~BTH_TVER_MASK & bth->flags); +} + +static inline u16 __bth_pkey(void *arg) +{ + struct rxe_bth *bth = arg; + + return be16_to_cpu(bth->pkey); +} + +static inline void __bth_set_pkey(void *arg, u16 pkey) +{ + struct rxe_bth *bth = arg; + + bth->pkey = cpu_to_be16(pkey); +} + +static inline u32 __bth_qpn(void *arg) +{ + struct rxe_bth *bth = arg; + + return BTH_QPN_MASK & be32_to_cpu(bth->qpn); +} + +static inline void __bth_set_qpn(void *arg, u32 qpn) +{ + struct rxe_bth *bth = arg; + u32 resvqpn = be32_to_cpu(bth->qpn); + + bth->qpn = cpu_to_be32((BTH_QPN_MASK & qpn) | + (~BTH_QPN_MASK & resvqpn)); +} + +static inline int __bth_fecn(void *arg) +{ + struct rxe_bth *bth = arg; + + return 0 != (cpu_to_be32(BTH_FECN_MASK) & bth->qpn); +} + +static inline void __bth_set_fecn(void *arg, int fecn) +{ + struct rxe_bth *bth = arg; + + if (fecn) + bth->qpn |= cpu_to_be32(BTH_FECN_MASK); + else + bth->qpn &= ~cpu_to_be32(BTH_FECN_MASK); +} + +static inline int __bth_becn(void *arg) +{ + struct rxe_bth *bth = arg; + + return 0 != (cpu_to_be32(BTH_BECN_MASK) & bth->qpn); +} + +static inline void __bth_set_becn(void *arg, int becn) +{ + struct rxe_bth *bth = arg; + + if (becn) + bth->qpn |= cpu_to_be32(BTH_BECN_MASK); + else + bth->qpn &= ~cpu_to_be32(BTH_BECN_MASK); +} + +static inline u8 __bth_resv6a(void *arg) +{ + struct rxe_bth *bth = arg; + + return (BTH_RESV6A_MASK & be32_to_cpu(bth->qpn)) >> 24; +} + +static inline void __bth_set_resv6a(void *arg) +{ + struct rxe_bth *bth = arg; + + bth->qpn = cpu_to_be32(~BTH_RESV6A_MASK); +} + +static inline int __bth_ack(void *arg) +{ + struct rxe_bth *bth = arg; + + return 0 != (cpu_to_be32(BTH_ACK_MASK) & bth->apsn); +} + +static inline void __bth_set_ack(void *arg, int ack) +{ + struct rxe_bth *bth = arg; + + if (ack) + bth->apsn |= cpu_to_be32(BTH_ACK_MASK); + else + bth->apsn &= ~cpu_to_be32(BTH_ACK_MASK); +} + +static inline void __bth_set_resv7(void *arg) +{ + struct rxe_bth *bth = arg; + + bth->apsn &= ~cpu_to_be32(BTH_RESV7_MASK); +} + +static inline u32 __bth_psn(void *arg) +{ + struct rxe_bth *bth = arg; + + return BTH_PSN_MASK & be32_to_cpu(bth->apsn); +} + +static inline void __bth_set_psn(void *arg, u32 psn) +{ + struct rxe_bth *bth = arg; + u32 apsn = be32_to_cpu(bth->apsn); + + bth->apsn = cpu_to_be32((BTH_PSN_MASK & psn) | + (~BTH_PSN_MASK & apsn)); +} + +static inline u8 bth_opcode(struct rxe_pkt_info *pkt) +{ + return __bth_opcode(pkt->hdr + pkt->offset); +} + +static inline void bth_set_opcode(struct rxe_pkt_info *pkt, u8 opcode) +{ + __bth_set_opcode(pkt->hdr + pkt->offset, opcode); +} + +static inline u8 bth_se(struct rxe_pkt_info *pkt) +{ + return __bth_se(pkt->hdr + pkt->offset); +} + +static inline void bth_set_se(struct rxe_pkt_info *pkt, int se) +{ + __bth_set_se(pkt->hdr + pkt->offset, se); +} + +static inline u8 bth_mig(struct rxe_pkt_info *pkt) +{ + return __bth_mig(pkt->hdr + pkt->offset); +} + +static inline void bth_set_mig(struct rxe_pkt_info *pkt, u8 mig) +{ + __bth_set_mig(pkt->hdr + pkt->offset, mig); +} + +static inline u8 bth_pad(struct rxe_pkt_info *pkt) +{ + return __bth_pad(pkt->hdr + pkt->offset); +} + +static inline void bth_set_pad(struct rxe_pkt_info *pkt, u8 pad) +{ + __bth_set_pad(pkt->hdr + pkt->offset, pad); +} + +static inline u8 bth_tver(struct rxe_pkt_info *pkt) +{ + return __bth_tver(pkt->hdr + pkt->offset); +} + +static inline void bth_set_tver(struct rxe_pkt_info *pkt, u8 tver) +{ + __bth_set_tver(pkt->hdr + pkt->offset, tver); +} + +static inline u16 bth_pkey(struct rxe_pkt_info *pkt) +{ + return __bth_pkey(pkt->hdr + pkt->offset); +} + +static inline void bth_set_pkey(struct rxe_pkt_info *pkt, u16 pkey) +{ + __bth_set_pkey(pkt->hdr + pkt->offset, pkey); +} + +static inline u32 bth_qpn(struct rxe_pkt_info *pkt) +{ + return __bth_qpn(pkt->hdr + pkt->offset); +} + +static inline void bth_set_qpn(struct rxe_pkt_info *pkt, u32 qpn) +{ + __bth_set_qpn(pkt->hdr + pkt->offset, qpn); +} + +static inline int bth_fecn(struct rxe_pkt_info *pkt) +{ + return __bth_fecn(pkt->hdr + pkt->offset); +} + +static inline void bth_set_fecn(struct rxe_pkt_info *pkt, int fecn) +{ + __bth_set_fecn(pkt->hdr + pkt->offset, fecn); +} + +static inline int bth_becn(struct rxe_pkt_info *pkt) +{ + return __bth_becn(pkt->hdr + pkt->offset); +} + +static inline void bth_set_becn(struct rxe_pkt_info *pkt, int becn) +{ + __bth_set_becn(pkt->hdr + pkt->offset, becn); +} + +static inline u8 bth_resv6a(struct rxe_pkt_info *pkt) +{ + return __bth_resv6a(pkt->hdr + pkt->offset); +} + +static inline void bth_set_resv6a(struct rxe_pkt_info *pkt) +{ + __bth_set_resv6a(pkt->hdr + pkt->offset); +} + +static inline int bth_ack(struct rxe_pkt_info *pkt) +{ + return __bth_ack(pkt->hdr + pkt->offset); +} + +static inline void bth_set_ack(struct rxe_pkt_info *pkt, int ack) +{ + __bth_set_ack(pkt->hdr + pkt->offset, ack); +} + +static inline void bth_set_resv7(struct rxe_pkt_info *pkt) +{ + __bth_set_resv7(pkt->hdr + pkt->offset); +} + +static inline u32 bth_psn(struct rxe_pkt_info *pkt) +{ + return __bth_psn(pkt->hdr + pkt->offset); +} + +static inline void bth_set_psn(struct rxe_pkt_info *pkt, u32 psn) +{ + __bth_set_psn(pkt->hdr + pkt->offset, psn); +} + +static inline void bth_init(struct rxe_pkt_info *pkt, u8 opcode, int se, + int mig, int pad, u16 pkey, u32 qpn, int ack_req, + u32 psn) +{ + struct rxe_bth *bth = (struct rxe_bth *)(pkt->hdr + pkt->offset); + + bth->opcode = opcode; + bth->flags = (pad << 4) & BTH_PAD_MASK; + if (se) + bth->flags |= BTH_SE_MASK; + if (mig) + bth->flags |= BTH_MIG_MASK; + bth->pkey = cpu_to_be16(pkey); + bth->qpn = cpu_to_be32(qpn & BTH_QPN_MASK); + psn &= BTH_PSN_MASK; + if (ack_req) + psn |= BTH_ACK_MASK; + bth->apsn = cpu_to_be32(psn); +} + +/****************************************************************************** + * Reliable Datagram Extended Transport Header + ******************************************************************************/ +struct rxe_rdeth { + __be32 een; +}; + +#define RDETH_EEN_MASK (0x00ffffff) + +static inline u8 __rdeth_een(void *arg) +{ + struct rxe_rdeth *rdeth = arg; + + return RDETH_EEN_MASK & be32_to_cpu(rdeth->een); +} + +static inline void __rdeth_set_een(void *arg, u32 een) +{ + struct rxe_rdeth *rdeth = arg; + + rdeth->een = cpu_to_be32(RDETH_EEN_MASK & een); +} + +static inline u8 rdeth_een(struct rxe_pkt_info *pkt) +{ + return __rdeth_een(pkt->hdr + pkt->offset + + rxe_opcode[pkt->opcode].offset[RXE_RDETH]); +} + +static inline void rdeth_set_een(struct rxe_pkt_info *pkt, u32 een) +{ + __rdeth_set_een(pkt->hdr + pkt->offset + + rxe_opcode[pkt->opcode].offset[RXE_RDETH], een); +} + +/****************************************************************************** + * Datagram Extended Transport Header + ******************************************************************************/ +struct rxe_deth { + __be32 qkey; + __be32 sqp; +}; + +#define GSI_QKEY (0x80010000) +#define DETH_SQP_MASK (0x00ffffff) + +static inline u32 __deth_qkey(void *arg) +{ + struct rxe_deth *deth = arg; + + return be32_to_cpu(deth->qkey); +} + +static inline void __deth_set_qkey(void *arg, u32 qkey) +{ + struct rxe_deth *deth = arg; + + deth->qkey = cpu_to_be32(qkey); +} + +static inline u32 __deth_sqp(void *arg) +{ + struct rxe_deth *deth = arg; + + return DETH_SQP_MASK & be32_to_cpu(deth->sqp); +} + +static inline void __deth_set_sqp(void *arg, u32 sqp) +{ + struct rxe_deth *deth = arg; + + deth->sqp = cpu_to_be32(DETH_SQP_MASK & sqp); +} + +static inline u32 deth_qkey(struct rxe_pkt_info *pkt) +{ + return __deth_qkey(pkt->hdr + pkt->offset + + rxe_opcode[pkt->opcode].offset[RXE_DETH]); +} + +static inline void deth_set_qkey(struct rxe_pkt_info *pkt, u32 qkey) +{ + __deth_set_qkey(pkt->hdr + pkt->offset + + rxe_opcode[pkt->opcode].offset[RXE_DETH], qkey); +} + +static inline u32 deth_sqp(struct rxe_pkt_info *pkt) +{ + return __deth_sqp(pkt->hdr + pkt->offset + + rxe_opcode[pkt->opcode].offset[RXE_DETH]); +} + +static inline void deth_set_sqp(struct rxe_pkt_info *pkt, u32 sqp) +{ + __deth_set_sqp(pkt->hdr + pkt->offset + + rxe_opcode[pkt->opcode].offset[RXE_DETH], sqp); +} + +/****************************************************************************** + * RDMA Extended Transport Header + ******************************************************************************/ +struct rxe_reth { + __be64 va; + __be32 rkey; + __be32 len; +}; + +static inline u64 __reth_va(void *arg) +{ + struct rxe_reth *reth = arg; + + return be64_to_cpu(reth->va); +} + +static inline void __reth_set_va(void *arg, u64 va) +{ + struct rxe_reth *reth = arg; + + reth->va = cpu_to_be64(va); +} + +static inline u32 __reth_rkey(void *arg) +{ + struct rxe_reth *reth = arg; + + return be32_to_cpu(reth->rkey); +} + +static inline void __reth_set_rkey(void *arg, u32 rkey) +{ + struct rxe_reth *reth = arg; + + reth->rkey = cpu_to_be32(rkey); +} + +static inline u32 __reth_len(void *arg) +{ + struct rxe_reth *reth = arg; + + return be32_to_cpu(reth->len); +} + +static inline void __reth_set_len(void *arg, u32 len) +{ + struct rxe_reth *reth = arg; + + reth->len = cpu_to_be32(len); +} + +static inline u64 reth_va(struct rxe_pkt_info *pkt) +{ + return __reth_va(pkt->hdr + pkt->offset + + rxe_opcode[pkt->opcode].offset[RXE_RETH]); +} + +static inline void reth_set_va(struct rxe_pkt_info *pkt, u64 va) +{ + __reth_set_va(pkt->hdr + pkt->offset + + rxe_opcode[pkt->opcode].offset[RXE_RETH], va); +} + +static inline u32 reth_rkey(struct rxe_pkt_info *pkt) +{ + return __reth_rkey(pkt->hdr + pkt->offset + + rxe_opcode[pkt->opcode].offset[RXE_RETH]); +} + +static inline void reth_set_rkey(struct rxe_pkt_info *pkt, u32 rkey) +{ + __reth_set_rkey(pkt->hdr + pkt->offset + + rxe_opcode[pkt->opcode].offset[RXE_RETH], rkey); +} + +static inline u32 reth_len(struct rxe_pkt_info *pkt) +{ + return __reth_len(pkt->hdr + pkt->offset + + rxe_opcode[pkt->opcode].offset[RXE_RETH]); +} + +static inline void reth_set_len(struct rxe_pkt_info *pkt, u32 len) +{ + __reth_set_len(pkt->hdr + pkt->offset + + rxe_opcode[pkt->opcode].offset[RXE_RETH], len); +} + +/****************************************************************************** + * Atomic Extended Transport Header + ******************************************************************************/ +struct rxe_atmeth { + __be64 va; + __be32 rkey; + __be64 swap_add; + __be64 comp; +} __attribute__((__packed__)); + +static inline u64 __atmeth_va(void *arg) +{ + struct rxe_atmeth *atmeth = arg; + + return be64_to_cpu(atmeth->va); +} + +static inline void __atmeth_set_va(void *arg, u64 va) +{ + struct rxe_atmeth *atmeth = arg; + + atmeth->va = cpu_to_be64(va); +} + +static inline u32 __atmeth_rkey(void *arg) +{ + struct rxe_atmeth *atmeth = arg; + + return be32_to_cpu(atmeth->rkey); +} + +static inline void __atmeth_set_rkey(void *arg, u32 rkey) +{ + struct rxe_atmeth *atmeth = arg; + + atmeth->rkey = cpu_to_be32(rkey); +} + +static inline u64 __atmeth_swap_add(void *arg) +{ + struct rxe_atmeth *atmeth = arg; + + return be64_to_cpu(atmeth->swap_add); +} + +static inline void __atmeth_set_swap_add(void *arg, u64 swap_add) +{ + struct rxe_atmeth *atmeth = arg; + + atmeth->swap_add = cpu_to_be64(swap_add); +} + +static inline u64 __atmeth_comp(void *arg) +{ + struct rxe_atmeth *atmeth = arg; + + return be64_to_cpu(atmeth->comp); +} + +static inline void __atmeth_set_comp(void *arg, u64 comp) +{ + struct rxe_atmeth *atmeth = arg; + + atmeth->comp = cpu_to_be64(comp); +} + +static inline u64 atmeth_va(struct rxe_pkt_info *pkt) +{ + return __atmeth_va(pkt->hdr + pkt->offset + + rxe_opcode[pkt->opcode].offset[RXE_ATMETH]); +} + +static inline void atmeth_set_va(struct rxe_pkt_info *pkt, u64 va) +{ + __atmeth_set_va(pkt->hdr + pkt->offset + + rxe_opcode[pkt->opcode].offset[RXE_ATMETH], va); +} + +static inline u32 atmeth_rkey(struct rxe_pkt_info *pkt) +{ + return __atmeth_rkey(pkt->hdr + pkt->offset + + rxe_opcode[pkt->opcode].offset[RXE_ATMETH]); +} + +static inline void atmeth_set_rkey(struct rxe_pkt_info *pkt, u32 rkey) +{ + __atmeth_set_rkey(pkt->hdr + pkt->offset + + rxe_opcode[pkt->opcode].offset[RXE_ATMETH], rkey); +} + +static inline u64 atmeth_swap_add(struct rxe_pkt_info *pkt) +{ + return __atmeth_swap_add(pkt->hdr + pkt->offset + + rxe_opcode[pkt->opcode].offset[RXE_ATMETH]); +} + +static inline void atmeth_set_swap_add(struct rxe_pkt_info *pkt, u64 swap_add) +{ + __atmeth_set_swap_add(pkt->hdr + pkt->offset + + rxe_opcode[pkt->opcode].offset[RXE_ATMETH], swap_add); +} + +static inline u64 atmeth_comp(struct rxe_pkt_info *pkt) +{ + return __atmeth_comp(pkt->hdr + pkt->offset + + rxe_opcode[pkt->opcode].offset[RXE_ATMETH]); +} + +static inline void atmeth_set_comp(struct rxe_pkt_info *pkt, u64 comp) +{ + __atmeth_set_comp(pkt->hdr + pkt->offset + + rxe_opcode[pkt->opcode].offset[RXE_ATMETH], comp); +} + +/****************************************************************************** + * Ack Extended Transport Header + ******************************************************************************/ +struct rxe_aeth { + __be32 smsn; +}; + +#define AETH_SYN_MASK (0xff000000) +#define AETH_MSN_MASK (0x00ffffff) + +enum aeth_syndrome { + AETH_TYPE_MASK = 0xe0, + AETH_ACK = 0x00, + AETH_RNR_NAK = 0x20, + AETH_RSVD = 0x40, + AETH_NAK = 0x60, + AETH_ACK_UNLIMITED = 0x1f, + AETH_NAK_PSN_SEQ_ERROR = 0x60, + AETH_NAK_INVALID_REQ = 0x61, + AETH_NAK_REM_ACC_ERR = 0x62, + AETH_NAK_REM_OP_ERR = 0x63, + AETH_NAK_INV_RD_REQ = 0x64, +}; + +static inline u8 __aeth_syn(void *arg) +{ + struct rxe_aeth *aeth = arg; + + return (AETH_SYN_MASK & be32_to_cpu(aeth->smsn)) >> 24; +} + +static inline void __aeth_set_syn(void *arg, u8 syn) +{ + struct rxe_aeth *aeth = arg; + u32 smsn = be32_to_cpu(aeth->smsn); + + aeth->smsn = cpu_to_be32((AETH_SYN_MASK & (syn << 24)) | + (~AETH_SYN_MASK & smsn)); +} + +static inline u32 __aeth_msn(void *arg) +{ + struct rxe_aeth *aeth = arg; + + return AETH_MSN_MASK & be32_to_cpu(aeth->smsn); +} + +static inline void __aeth_set_msn(void *arg, u32 msn) +{ + struct rxe_aeth *aeth = arg; + u32 smsn = be32_to_cpu(aeth->smsn); + + aeth->smsn = cpu_to_be32((AETH_MSN_MASK & msn) | + (~AETH_MSN_MASK & smsn)); +} + +static inline u8 aeth_syn(struct rxe_pkt_info *pkt) +{ + return __aeth_syn(pkt->hdr + pkt->offset + + rxe_opcode[pkt->opcode].offset[RXE_AETH]); +} + +static inline void aeth_set_syn(struct rxe_pkt_info *pkt, u8 syn) +{ + __aeth_set_syn(pkt->hdr + pkt->offset + + rxe_opcode[pkt->opcode].offset[RXE_AETH], syn); +} + +static inline u32 aeth_msn(struct rxe_pkt_info *pkt) +{ + return __aeth_msn(pkt->hdr + pkt->offset + + rxe_opcode[pkt->opcode].offset[RXE_AETH]); +} + +static inline void aeth_set_msn(struct rxe_pkt_info *pkt, u32 msn) +{ + __aeth_set_msn(pkt->hdr + pkt->offset + + rxe_opcode[pkt->opcode].offset[RXE_AETH], msn); +} + +/****************************************************************************** + * Atomic Ack Extended Transport Header + ******************************************************************************/ +struct rxe_atmack { + __be64 orig; +}; + +static inline u64 __atmack_orig(void *arg) +{ + struct rxe_atmack *atmack = arg; + + return be64_to_cpu(atmack->orig); +} + +static inline void __atmack_set_orig(void *arg, u64 orig) +{ + struct rxe_atmack *atmack = arg; + + atmack->orig = cpu_to_be64(orig); +} + +static inline u64 atmack_orig(struct rxe_pkt_info *pkt) +{ + return __atmack_orig(pkt->hdr + pkt->offset + + rxe_opcode[pkt->opcode].offset[RXE_ATMACK]); +} + +static inline void atmack_set_orig(struct rxe_pkt_info *pkt, u64 orig) +{ + __atmack_set_orig(pkt->hdr + pkt->offset + + rxe_opcode[pkt->opcode].offset[RXE_ATMACK], orig); +} + +/****************************************************************************** + * Immediate Extended Transport Header + ******************************************************************************/ +struct rxe_immdt { + __be32 imm; +}; + +static inline __be32 __immdt_imm(void *arg) +{ + struct rxe_immdt *immdt = arg; + + return immdt->imm; +} + +static inline void __immdt_set_imm(void *arg, __be32 imm) +{ + struct rxe_immdt *immdt = arg; + + immdt->imm = imm; +} + +static inline __be32 immdt_imm(struct rxe_pkt_info *pkt) +{ + return __immdt_imm(pkt->hdr + pkt->offset + + rxe_opcode[pkt->opcode].offset[RXE_IMMDT]); +} + +static inline void immdt_set_imm(struct rxe_pkt_info *pkt, __be32 imm) +{ + __immdt_set_imm(pkt->hdr + pkt->offset + + rxe_opcode[pkt->opcode].offset[RXE_IMMDT], imm); +} + +/****************************************************************************** + * Invalidate Extended Transport Header + ******************************************************************************/ +struct rxe_ieth { + __be32 rkey; +}; + +static inline u32 __ieth_rkey(void *arg) +{ + struct rxe_ieth *ieth = arg; + + return be32_to_cpu(ieth->rkey); +} + +static inline void __ieth_set_rkey(void *arg, u32 rkey) +{ + struct rxe_ieth *ieth = arg; + + ieth->rkey = cpu_to_be32(rkey); +} + +static inline u32 ieth_rkey(struct rxe_pkt_info *pkt) +{ + return __ieth_rkey(pkt->hdr + pkt->offset + + rxe_opcode[pkt->opcode].offset[RXE_IETH]); +} + +static inline void ieth_set_rkey(struct rxe_pkt_info *pkt, u32 rkey) +{ + __ieth_set_rkey(pkt->hdr + pkt->offset + + rxe_opcode[pkt->opcode].offset[RXE_IETH], rkey); +} + +enum rxe_hdr_length { + RXE_BTH_BYTES = sizeof(struct rxe_bth), + RXE_DETH_BYTES = sizeof(struct rxe_deth), + RXE_IMMDT_BYTES = sizeof(struct rxe_immdt), + RXE_RETH_BYTES = sizeof(struct rxe_reth), + RXE_AETH_BYTES = sizeof(struct rxe_aeth), + RXE_ATMACK_BYTES = sizeof(struct rxe_atmack), + RXE_ATMETH_BYTES = sizeof(struct rxe_atmeth), + RXE_IETH_BYTES = sizeof(struct rxe_ieth), + RXE_RDETH_BYTES = sizeof(struct rxe_rdeth), +}; + +static inline size_t header_size(struct rxe_pkt_info *pkt) +{ + return pkt->offset + rxe_opcode[pkt->opcode].length; +} + +static inline void *payload_addr(struct rxe_pkt_info *pkt) +{ + return pkt->hdr + pkt->offset + + rxe_opcode[pkt->opcode].offset[RXE_PAYLOAD]; +} + +static inline size_t payload_size(struct rxe_pkt_info *pkt) +{ + return pkt->paylen - rxe_opcode[pkt->opcode].offset[RXE_PAYLOAD] + - bth_pad(pkt) - RXE_ICRC_SIZE; +} + +#endif /* RXE_HDR_H */ diff --git a/drivers/infiniband/sw/rxe/rxe_icrc.c b/drivers/infiniband/sw/rxe/rxe_icrc.c new file mode 100644 index 000000000000..413b56b23a06 --- /dev/null +++ b/drivers/infiniband/sw/rxe/rxe_icrc.c @@ -0,0 +1,96 @@ +/* + * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. + * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "rxe.h" +#include "rxe_loc.h" + +/* Compute a partial ICRC for all the IB transport headers. */ +u32 rxe_icrc_hdr(struct rxe_pkt_info *pkt, struct sk_buff *skb) +{ + unsigned int bth_offset = 0; + struct iphdr *ip4h = NULL; + struct ipv6hdr *ip6h = NULL; + struct udphdr *udph; + struct rxe_bth *bth; + int crc; + int length; + int hdr_size = sizeof(struct udphdr) + + (skb->protocol == htons(ETH_P_IP) ? + sizeof(struct iphdr) : sizeof(struct ipv6hdr)); + /* pseudo header buffer size is calculate using ipv6 header size since + * it is bigger than ipv4 + */ + u8 pshdr[sizeof(struct udphdr) + + sizeof(struct ipv6hdr) + + RXE_BTH_BYTES]; + + /* This seed is the result of computing a CRC with a seed of + * 0xfffffff and 8 bytes of 0xff representing a masked LRH. + */ + crc = 0xdebb20e3; + + if (skb->protocol == htons(ETH_P_IP)) { /* IPv4 */ + memcpy(pshdr, ip_hdr(skb), hdr_size); + ip4h = (struct iphdr *)pshdr; + udph = (struct udphdr *)(ip4h + 1); + + ip4h->ttl = 0xff; + ip4h->check = CSUM_MANGLED_0; + ip4h->tos = 0xff; + } else { /* IPv6 */ + memcpy(pshdr, ipv6_hdr(skb), hdr_size); + ip6h = (struct ipv6hdr *)pshdr; + udph = (struct udphdr *)(ip6h + 1); + + memset(ip6h->flow_lbl, 0xff, sizeof(ip6h->flow_lbl)); + ip6h->priority = 0xf; + ip6h->hop_limit = 0xff; + } + udph->check = CSUM_MANGLED_0; + + bth_offset += hdr_size; + + memcpy(&pshdr[bth_offset], pkt->hdr, RXE_BTH_BYTES); + bth = (struct rxe_bth *)&pshdr[bth_offset]; + + /* exclude bth.resv8a */ + bth->qpn |= cpu_to_be32(~BTH_QPN_MASK); + + length = hdr_size + RXE_BTH_BYTES; + crc = crc32_le(crc, pshdr, length); + + /* And finish to compute the CRC on the remainder of the headers. */ + crc = crc32_le(crc, pkt->hdr + RXE_BTH_BYTES, + rxe_opcode[pkt->opcode].length - RXE_BTH_BYTES); + return crc; +} diff --git a/drivers/infiniband/sw/rxe/rxe_loc.h b/drivers/infiniband/sw/rxe/rxe_loc.h new file mode 100644 index 000000000000..4a5484ef604f --- /dev/null +++ b/drivers/infiniband/sw/rxe/rxe_loc.h @@ -0,0 +1,286 @@ +/* + * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. + * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef RXE_LOC_H +#define RXE_LOC_H + +/* rxe_av.c */ + +int rxe_av_chk_attr(struct rxe_dev *rxe, struct ib_ah_attr *attr); + +int rxe_av_from_attr(struct rxe_dev *rxe, u8 port_num, + struct rxe_av *av, struct ib_ah_attr *attr); + +int rxe_av_to_attr(struct rxe_dev *rxe, struct rxe_av *av, + struct ib_ah_attr *attr); + +int rxe_av_fill_ip_info(struct rxe_dev *rxe, + struct rxe_av *av, + struct ib_ah_attr *attr, + struct ib_gid_attr *sgid_attr, + union ib_gid *sgid); + +struct rxe_av *rxe_get_av(struct rxe_pkt_info *pkt); + +/* rxe_cq.c */ +int rxe_cq_chk_attr(struct rxe_dev *rxe, struct rxe_cq *cq, + int cqe, int comp_vector, struct ib_udata *udata); + +int rxe_cq_from_init(struct rxe_dev *rxe, struct rxe_cq *cq, int cqe, + int comp_vector, struct ib_ucontext *context, + struct ib_udata *udata); + +int rxe_cq_resize_queue(struct rxe_cq *cq, int new_cqe, struct ib_udata *udata); + +int rxe_cq_post(struct rxe_cq *cq, struct rxe_cqe *cqe, int solicited); + +void rxe_cq_cleanup(void *arg); + +/* rxe_mcast.c */ +int rxe_mcast_get_grp(struct rxe_dev *rxe, union ib_gid *mgid, + struct rxe_mc_grp **grp_p); + +int rxe_mcast_add_grp_elem(struct rxe_dev *rxe, struct rxe_qp *qp, + struct rxe_mc_grp *grp); + +int rxe_mcast_drop_grp_elem(struct rxe_dev *rxe, struct rxe_qp *qp, + union ib_gid *mgid); + +void rxe_drop_all_mcast_groups(struct rxe_qp *qp); + +void rxe_mc_cleanup(void *arg); + +/* rxe_mmap.c */ +struct rxe_mmap_info { + struct list_head pending_mmaps; + struct ib_ucontext *context; + struct kref ref; + void *obj; + + struct mminfo info; +}; + +void rxe_mmap_release(struct kref *ref); + +struct rxe_mmap_info *rxe_create_mmap_info(struct rxe_dev *dev, + u32 size, + struct ib_ucontext *context, + void *obj); + +int rxe_mmap(struct ib_ucontext *context, struct vm_area_struct *vma); + +/* rxe_mr.c */ +enum copy_direction { + to_mem_obj, + from_mem_obj, +}; + +int rxe_mem_init_dma(struct rxe_dev *rxe, struct rxe_pd *pd, + int access, struct rxe_mem *mem); + +int rxe_mem_init_user(struct rxe_dev *rxe, struct rxe_pd *pd, u64 start, + u64 length, u64 iova, int access, struct ib_udata *udata, + struct rxe_mem *mr); + +int rxe_mem_init_fast(struct rxe_dev *rxe, struct rxe_pd *pd, + int max_pages, struct rxe_mem *mem); + +int rxe_mem_copy(struct rxe_mem *mem, u64 iova, void *addr, + int length, enum copy_direction dir, u32 *crcp); + +int copy_data(struct rxe_dev *rxe, struct rxe_pd *pd, int access, + struct rxe_dma_info *dma, void *addr, int length, + enum copy_direction dir, u32 *crcp); + +void *iova_to_vaddr(struct rxe_mem *mem, u64 iova, int length); + +enum lookup_type { + lookup_local, + lookup_remote, +}; + +struct rxe_mem *lookup_mem(struct rxe_pd *pd, int access, u32 key, + enum lookup_type type); + +int mem_check_range(struct rxe_mem *mem, u64 iova, size_t length); + +int rxe_mem_map_pages(struct rxe_dev *rxe, struct rxe_mem *mem, + u64 *page, int num_pages, u64 iova); + +void rxe_mem_cleanup(void *arg); + +int advance_dma_data(struct rxe_dma_info *dma, unsigned int length); + +/* rxe_qp.c */ +int rxe_qp_chk_init(struct rxe_dev *rxe, struct ib_qp_init_attr *init); + +int rxe_qp_from_init(struct rxe_dev *rxe, struct rxe_qp *qp, struct rxe_pd *pd, + struct ib_qp_init_attr *init, struct ib_udata *udata, + struct ib_pd *ibpd); + +int rxe_qp_to_init(struct rxe_qp *qp, struct ib_qp_init_attr *init); + +int rxe_qp_chk_attr(struct rxe_dev *rxe, struct rxe_qp *qp, + struct ib_qp_attr *attr, int mask); + +int rxe_qp_from_attr(struct rxe_qp *qp, struct ib_qp_attr *attr, + int mask, struct ib_udata *udata); + +int rxe_qp_to_attr(struct rxe_qp *qp, struct ib_qp_attr *attr, int mask); + +void rxe_qp_error(struct rxe_qp *qp); + +void rxe_qp_destroy(struct rxe_qp *qp); + +void rxe_qp_cleanup(void *arg); + +static inline int qp_num(struct rxe_qp *qp) +{ + return qp->ibqp.qp_num; +} + +static inline enum ib_qp_type qp_type(struct rxe_qp *qp) +{ + return qp->ibqp.qp_type; +} + +static inline enum ib_qp_state qp_state(struct rxe_qp *qp) +{ + return qp->attr.qp_state; +} + +static inline int qp_mtu(struct rxe_qp *qp) +{ + if (qp->ibqp.qp_type == IB_QPT_RC || qp->ibqp.qp_type == IB_QPT_UC) + return qp->attr.path_mtu; + else + return RXE_PORT_MAX_MTU; +} + +static inline int rcv_wqe_size(int max_sge) +{ + return sizeof(struct rxe_recv_wqe) + + max_sge * sizeof(struct ib_sge); +} + +void free_rd_atomic_resource(struct rxe_qp *qp, struct resp_res *res); + +static inline void rxe_advance_resp_resource(struct rxe_qp *qp) +{ + qp->resp.res_head++; + if (unlikely(qp->resp.res_head == qp->attr.max_rd_atomic)) + qp->resp.res_head = 0; +} + +void retransmit_timer(unsigned long data); +void rnr_nak_timer(unsigned long data); + +void dump_qp(struct rxe_qp *qp); + +/* rxe_srq.c */ +#define IB_SRQ_INIT_MASK (~IB_SRQ_LIMIT) + +int rxe_srq_chk_attr(struct rxe_dev *rxe, struct rxe_srq *srq, + struct ib_srq_attr *attr, enum ib_srq_attr_mask mask); + +int rxe_srq_from_init(struct rxe_dev *rxe, struct rxe_srq *srq, + struct ib_srq_init_attr *init, + struct ib_ucontext *context, struct ib_udata *udata); + +int rxe_srq_from_attr(struct rxe_dev *rxe, struct rxe_srq *srq, + struct ib_srq_attr *attr, enum ib_srq_attr_mask mask, + struct ib_udata *udata); + +extern struct ib_dma_mapping_ops rxe_dma_mapping_ops; + +void rxe_release(struct kref *kref); + +int rxe_completer(void *arg); +int rxe_requester(void *arg); +int rxe_responder(void *arg); + +u32 rxe_icrc_hdr(struct rxe_pkt_info *pkt, struct sk_buff *skb); + +void rxe_resp_queue_pkt(struct rxe_dev *rxe, + struct rxe_qp *qp, struct sk_buff *skb); + +void rxe_comp_queue_pkt(struct rxe_dev *rxe, + struct rxe_qp *qp, struct sk_buff *skb); + +static inline unsigned wr_opcode_mask(int opcode, struct rxe_qp *qp) +{ + return rxe_wr_opcode_info[opcode].mask[qp->ibqp.qp_type]; +} + +static inline int rxe_xmit_packet(struct rxe_dev *rxe, struct rxe_qp *qp, + struct rxe_pkt_info *pkt, struct sk_buff *skb) +{ + int err; + int is_request = pkt->mask & RXE_REQ_MASK; + + if ((is_request && (qp->req.state != QP_STATE_READY)) || + (!is_request && (qp->resp.state != QP_STATE_READY))) { + pr_info("Packet dropped. QP is not in ready state\n"); + goto drop; + } + + if (pkt->mask & RXE_LOOPBACK_MASK) { + memcpy(SKB_TO_PKT(skb), pkt, sizeof(*pkt)); + err = rxe->ifc_ops->loopback(skb); + } else { + err = rxe->ifc_ops->send(rxe, pkt, skb); + } + + if (err) { + rxe->xmit_errors++; + return err; + } + + atomic_inc(&qp->skb_out); + + if ((qp_type(qp) != IB_QPT_RC) && + (pkt->mask & RXE_END_MASK)) { + pkt->wqe->state = wqe_state_done; + rxe_run_task(&qp->comp.task, 1); + } + + goto done; + +drop: + kfree_skb(skb); + err = 0; +done: + return err; +} + +#endif /* RXE_LOC_H */ diff --git a/drivers/infiniband/sw/rxe/rxe_mcast.c b/drivers/infiniband/sw/rxe/rxe_mcast.c new file mode 100644 index 000000000000..fa95544ca7e0 --- /dev/null +++ b/drivers/infiniband/sw/rxe/rxe_mcast.c @@ -0,0 +1,190 @@ +/* + * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. + * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "rxe.h" +#include "rxe_loc.h" + +int rxe_mcast_get_grp(struct rxe_dev *rxe, union ib_gid *mgid, + struct rxe_mc_grp **grp_p) +{ + int err; + struct rxe_mc_grp *grp; + + if (rxe->attr.max_mcast_qp_attach == 0) { + err = -EINVAL; + goto err1; + } + + grp = rxe_pool_get_key(&rxe->mc_grp_pool, mgid); + if (grp) + goto done; + + grp = rxe_alloc(&rxe->mc_grp_pool); + if (!grp) { + err = -ENOMEM; + goto err1; + } + + INIT_LIST_HEAD(&grp->qp_list); + spin_lock_init(&grp->mcg_lock); + grp->rxe = rxe; + + rxe_add_key(grp, mgid); + + err = rxe->ifc_ops->mcast_add(rxe, mgid); + if (err) + goto err2; + +done: + *grp_p = grp; + return 0; + +err2: + rxe_drop_ref(grp); +err1: + return err; +} + +int rxe_mcast_add_grp_elem(struct rxe_dev *rxe, struct rxe_qp *qp, + struct rxe_mc_grp *grp) +{ + int err; + struct rxe_mc_elem *elem; + + /* check to see of the qp is already a member of the group */ + spin_lock_bh(&qp->grp_lock); + spin_lock_bh(&grp->mcg_lock); + list_for_each_entry(elem, &grp->qp_list, qp_list) { + if (elem->qp == qp) { + err = 0; + goto out; + } + } + + if (grp->num_qp >= rxe->attr.max_mcast_qp_attach) { + err = -ENOMEM; + goto out; + } + + elem = rxe_alloc(&rxe->mc_elem_pool); + if (!elem) { + err = -ENOMEM; + goto out; + } + + /* each qp holds a ref on the grp */ + rxe_add_ref(grp); + + grp->num_qp++; + elem->qp = qp; + elem->grp = grp; + + list_add(&elem->qp_list, &grp->qp_list); + list_add(&elem->grp_list, &qp->grp_list); + + err = 0; +out: + spin_unlock_bh(&grp->mcg_lock); + spin_unlock_bh(&qp->grp_lock); + return err; +} + +int rxe_mcast_drop_grp_elem(struct rxe_dev *rxe, struct rxe_qp *qp, + union ib_gid *mgid) +{ + struct rxe_mc_grp *grp; + struct rxe_mc_elem *elem, *tmp; + + grp = rxe_pool_get_key(&rxe->mc_grp_pool, mgid); + if (!grp) + goto err1; + + spin_lock_bh(&qp->grp_lock); + spin_lock_bh(&grp->mcg_lock); + + list_for_each_entry_safe(elem, tmp, &grp->qp_list, qp_list) { + if (elem->qp == qp) { + list_del(&elem->qp_list); + list_del(&elem->grp_list); + grp->num_qp--; + + spin_unlock_bh(&grp->mcg_lock); + spin_unlock_bh(&qp->grp_lock); + rxe_drop_ref(elem); + rxe_drop_ref(grp); /* ref held by QP */ + rxe_drop_ref(grp); /* ref from get_key */ + return 0; + } + } + + spin_unlock_bh(&grp->mcg_lock); + spin_unlock_bh(&qp->grp_lock); + rxe_drop_ref(grp); /* ref from get_key */ +err1: + return -EINVAL; +} + +void rxe_drop_all_mcast_groups(struct rxe_qp *qp) +{ + struct rxe_mc_grp *grp; + struct rxe_mc_elem *elem; + + while (1) { + spin_lock_bh(&qp->grp_lock); + if (list_empty(&qp->grp_list)) { + spin_unlock_bh(&qp->grp_lock); + break; + } + elem = list_first_entry(&qp->grp_list, struct rxe_mc_elem, + grp_list); + list_del(&elem->grp_list); + spin_unlock_bh(&qp->grp_lock); + + grp = elem->grp; + spin_lock_bh(&grp->mcg_lock); + list_del(&elem->qp_list); + grp->num_qp--; + spin_unlock_bh(&grp->mcg_lock); + rxe_drop_ref(grp); + rxe_drop_ref(elem); + } +} + +void rxe_mc_cleanup(void *arg) +{ + struct rxe_mc_grp *grp = arg; + struct rxe_dev *rxe = grp->rxe; + + rxe_drop_key(grp); + rxe->ifc_ops->mcast_delete(rxe, &grp->mgid); +} diff --git a/drivers/infiniband/sw/rxe/rxe_mmap.c b/drivers/infiniband/sw/rxe/rxe_mmap.c new file mode 100644 index 000000000000..54b3c7c99eff --- /dev/null +++ b/drivers/infiniband/sw/rxe/rxe_mmap.c @@ -0,0 +1,173 @@ +/* + * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. + * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/module.h> +#include <linux/vmalloc.h> +#include <linux/mm.h> +#include <linux/errno.h> +#include <asm/pgtable.h> + +#include "rxe.h" +#include "rxe_loc.h" +#include "rxe_queue.h" + +void rxe_mmap_release(struct kref *ref) +{ + struct rxe_mmap_info *ip = container_of(ref, + struct rxe_mmap_info, ref); + struct rxe_dev *rxe = to_rdev(ip->context->device); + + spin_lock_bh(&rxe->pending_lock); + + if (!list_empty(&ip->pending_mmaps)) + list_del(&ip->pending_mmaps); + + spin_unlock_bh(&rxe->pending_lock); + + vfree(ip->obj); /* buf */ + kfree(ip); +} + +/* + * open and close keep track of how many times the memory region is mapped, + * to avoid releasing it. + */ +static void rxe_vma_open(struct vm_area_struct *vma) +{ + struct rxe_mmap_info *ip = vma->vm_private_data; + + kref_get(&ip->ref); +} + +static void rxe_vma_close(struct vm_area_struct *vma) +{ + struct rxe_mmap_info *ip = vma->vm_private_data; + + kref_put(&ip->ref, rxe_mmap_release); +} + +static struct vm_operations_struct rxe_vm_ops = { + .open = rxe_vma_open, + .close = rxe_vma_close, +}; + +/** + * rxe_mmap - create a new mmap region + * @context: the IB user context of the process making the mmap() call + * @vma: the VMA to be initialized + * Return zero if the mmap is OK. Otherwise, return an errno. + */ +int rxe_mmap(struct ib_ucontext *context, struct vm_area_struct *vma) +{ + struct rxe_dev *rxe = to_rdev(context->device); + unsigned long offset = vma->vm_pgoff << PAGE_SHIFT; + unsigned long size = vma->vm_end - vma->vm_start; + struct rxe_mmap_info *ip, *pp; + int ret; + + /* + * Search the device's list of objects waiting for a mmap call. + * Normally, this list is very short since a call to create a + * CQ, QP, or SRQ is soon followed by a call to mmap(). + */ + spin_lock_bh(&rxe->pending_lock); + list_for_each_entry_safe(ip, pp, &rxe->pending_mmaps, pending_mmaps) { + if (context != ip->context || (__u64)offset != ip->info.offset) + continue; + + /* Don't allow a mmap larger than the object. */ + if (size > ip->info.size) { + pr_err("mmap region is larger than the object!\n"); + spin_unlock_bh(&rxe->pending_lock); + ret = -EINVAL; + goto done; + } + + goto found_it; + } + pr_warn("unable to find pending mmap info\n"); + spin_unlock_bh(&rxe->pending_lock); + ret = -EINVAL; + goto done; + +found_it: + list_del_init(&ip->pending_mmaps); + spin_unlock_bh(&rxe->pending_lock); + + ret = remap_vmalloc_range(vma, ip->obj, 0); + if (ret) { + pr_err("rxe: err %d from remap_vmalloc_range\n", ret); + goto done; + } + + vma->vm_ops = &rxe_vm_ops; + vma->vm_private_data = ip; + rxe_vma_open(vma); +done: + return ret; +} + +/* + * Allocate information for rxe_mmap + */ +struct rxe_mmap_info *rxe_create_mmap_info(struct rxe_dev *rxe, + u32 size, + struct ib_ucontext *context, + void *obj) +{ + struct rxe_mmap_info *ip; + + ip = kmalloc(sizeof(*ip), GFP_KERNEL); + if (!ip) + return NULL; + + size = PAGE_ALIGN(size); + + spin_lock_bh(&rxe->mmap_offset_lock); + + if (rxe->mmap_offset == 0) + rxe->mmap_offset = PAGE_SIZE; + + ip->info.offset = rxe->mmap_offset; + rxe->mmap_offset += size; + + spin_unlock_bh(&rxe->mmap_offset_lock); + + INIT_LIST_HEAD(&ip->pending_mmaps); + ip->info.size = size; + ip->context = context; + ip->obj = obj; + kref_init(&ip->ref); + + return ip; +} diff --git a/drivers/infiniband/sw/rxe/rxe_mr.c b/drivers/infiniband/sw/rxe/rxe_mr.c new file mode 100644 index 000000000000..f3dab6574504 --- /dev/null +++ b/drivers/infiniband/sw/rxe/rxe_mr.c @@ -0,0 +1,643 @@ +/* + * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. + * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "rxe.h" +#include "rxe_loc.h" + +/* + * lfsr (linear feedback shift register) with period 255 + */ +static u8 rxe_get_key(void) +{ + static unsigned key = 1; + + key = key << 1; + + key |= (0 != (key & 0x100)) ^ (0 != (key & 0x10)) + ^ (0 != (key & 0x80)) ^ (0 != (key & 0x40)); + + key &= 0xff; + + return key; +} + +int mem_check_range(struct rxe_mem *mem, u64 iova, size_t length) +{ + switch (mem->type) { + case RXE_MEM_TYPE_DMA: + return 0; + + case RXE_MEM_TYPE_MR: + case RXE_MEM_TYPE_FMR: + return ((iova < mem->iova) || + ((iova + length) > (mem->iova + mem->length))) ? + -EFAULT : 0; + + default: + return -EFAULT; + } +} + +#define IB_ACCESS_REMOTE (IB_ACCESS_REMOTE_READ \ + | IB_ACCESS_REMOTE_WRITE \ + | IB_ACCESS_REMOTE_ATOMIC) + +static void rxe_mem_init(int access, struct rxe_mem *mem) +{ + u32 lkey = mem->pelem.index << 8 | rxe_get_key(); + u32 rkey = (access & IB_ACCESS_REMOTE) ? lkey : 0; + + if (mem->pelem.pool->type == RXE_TYPE_MR) { + mem->ibmr.lkey = lkey; + mem->ibmr.rkey = rkey; + } + + mem->lkey = lkey; + mem->rkey = rkey; + mem->state = RXE_MEM_STATE_INVALID; + mem->type = RXE_MEM_TYPE_NONE; + mem->map_shift = ilog2(RXE_BUF_PER_MAP); +} + +void rxe_mem_cleanup(void *arg) +{ + struct rxe_mem *mem = arg; + int i; + + if (mem->umem) + ib_umem_release(mem->umem); + + if (mem->map) { + for (i = 0; i < mem->num_map; i++) + kfree(mem->map[i]); + + kfree(mem->map); + } +} + +static int rxe_mem_alloc(struct rxe_dev *rxe, struct rxe_mem *mem, int num_buf) +{ + int i; + int num_map; + struct rxe_map **map = mem->map; + + num_map = (num_buf + RXE_BUF_PER_MAP - 1) / RXE_BUF_PER_MAP; + + mem->map = kmalloc_array(num_map, sizeof(*map), GFP_KERNEL); + if (!mem->map) + goto err1; + + for (i = 0; i < num_map; i++) { + mem->map[i] = kmalloc(sizeof(**map), GFP_KERNEL); + if (!mem->map[i]) + goto err2; + } + + WARN_ON(!is_power_of_2(RXE_BUF_PER_MAP)); + + mem->map_shift = ilog2(RXE_BUF_PER_MAP); + mem->map_mask = RXE_BUF_PER_MAP - 1; + + mem->num_buf = num_buf; + mem->num_map = num_map; + mem->max_buf = num_map * RXE_BUF_PER_MAP; + + return 0; + +err2: + for (i--; i >= 0; i--) + kfree(mem->map[i]); + + kfree(mem->map); +err1: + return -ENOMEM; +} + +int rxe_mem_init_dma(struct rxe_dev *rxe, struct rxe_pd *pd, + int access, struct rxe_mem *mem) +{ + rxe_mem_init(access, mem); + + mem->pd = pd; + mem->access = access; + mem->state = RXE_MEM_STATE_VALID; + mem->type = RXE_MEM_TYPE_DMA; + + return 0; +} + +int rxe_mem_init_user(struct rxe_dev *rxe, struct rxe_pd *pd, u64 start, + u64 length, u64 iova, int access, struct ib_udata *udata, + struct rxe_mem *mem) +{ + int entry; + struct rxe_map **map; + struct rxe_phys_buf *buf = NULL; + struct ib_umem *umem; + struct scatterlist *sg; + int num_buf; + void *vaddr; + int err; + + umem = ib_umem_get(pd->ibpd.uobject->context, start, length, access, 0); + if (IS_ERR(umem)) { + pr_warn("err %d from rxe_umem_get\n", + (int)PTR_ERR(umem)); + err = -EINVAL; + goto err1; + } + + mem->umem = umem; + num_buf = umem->nmap; + + rxe_mem_init(access, mem); + + err = rxe_mem_alloc(rxe, mem, num_buf); + if (err) { + pr_warn("err %d from rxe_mem_alloc\n", err); + ib_umem_release(umem); + goto err1; + } + + WARN_ON(!is_power_of_2(umem->page_size)); + + mem->page_shift = ilog2(umem->page_size); + mem->page_mask = umem->page_size - 1; + + num_buf = 0; + map = mem->map; + if (length > 0) { + buf = map[0]->buf; + + for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) { + vaddr = page_address(sg_page(sg)); + if (!vaddr) { + pr_warn("null vaddr\n"); + err = -ENOMEM; + goto err1; + } + + buf->addr = (uintptr_t)vaddr; + buf->size = umem->page_size; + num_buf++; + buf++; + + if (num_buf >= RXE_BUF_PER_MAP) { + map++; + buf = map[0]->buf; + num_buf = 0; + } + } + } + + mem->pd = pd; + mem->umem = umem; + mem->access = access; + mem->length = length; + mem->iova = iova; + mem->va = start; + mem->offset = ib_umem_offset(umem); + mem->state = RXE_MEM_STATE_VALID; + mem->type = RXE_MEM_TYPE_MR; + + return 0; + +err1: + return err; +} + +int rxe_mem_init_fast(struct rxe_dev *rxe, struct rxe_pd *pd, + int max_pages, struct rxe_mem *mem) +{ + int err; + + rxe_mem_init(0, mem); + + /* In fastreg, we also set the rkey */ + mem->ibmr.rkey = mem->ibmr.lkey; + + err = rxe_mem_alloc(rxe, mem, max_pages); + if (err) + goto err1; + + mem->pd = pd; + mem->max_buf = max_pages; + mem->state = RXE_MEM_STATE_FREE; + mem->type = RXE_MEM_TYPE_MR; + + return 0; + +err1: + return err; +} + +static void lookup_iova( + struct rxe_mem *mem, + u64 iova, + int *m_out, + int *n_out, + size_t *offset_out) +{ + size_t offset = iova - mem->iova + mem->offset; + int map_index; + int buf_index; + u64 length; + + if (likely(mem->page_shift)) { + *offset_out = offset & mem->page_mask; + offset >>= mem->page_shift; + *n_out = offset & mem->map_mask; + *m_out = offset >> mem->map_shift; + } else { + map_index = 0; + buf_index = 0; + + length = mem->map[map_index]->buf[buf_index].size; + + while (offset >= length) { + offset -= length; + buf_index++; + + if (buf_index == RXE_BUF_PER_MAP) { + map_index++; + buf_index = 0; + } + length = mem->map[map_index]->buf[buf_index].size; + } + + *m_out = map_index; + *n_out = buf_index; + *offset_out = offset; + } +} + +void *iova_to_vaddr(struct rxe_mem *mem, u64 iova, int length) +{ + size_t offset; + int m, n; + void *addr; + + if (mem->state != RXE_MEM_STATE_VALID) { + pr_warn("mem not in valid state\n"); + addr = NULL; + goto out; + } + + if (!mem->map) { + addr = (void *)(uintptr_t)iova; + goto out; + } + + if (mem_check_range(mem, iova, length)) { + pr_warn("range violation\n"); + addr = NULL; + goto out; + } + + lookup_iova(mem, iova, &m, &n, &offset); + + if (offset + length > mem->map[m]->buf[n].size) { + pr_warn("crosses page boundary\n"); + addr = NULL; + goto out; + } + + addr = (void *)(uintptr_t)mem->map[m]->buf[n].addr + offset; + +out: + return addr; +} + +/* copy data from a range (vaddr, vaddr+length-1) to or from + * a mem object starting at iova. Compute incremental value of + * crc32 if crcp is not zero. caller must hold a reference to mem + */ +int rxe_mem_copy(struct rxe_mem *mem, u64 iova, void *addr, int length, + enum copy_direction dir, u32 *crcp) +{ + int err; + int bytes; + u8 *va; + struct rxe_map **map; + struct rxe_phys_buf *buf; + int m; + int i; + size_t offset; + u32 crc = crcp ? (*crcp) : 0; + + if (mem->type == RXE_MEM_TYPE_DMA) { + u8 *src, *dest; + + src = (dir == to_mem_obj) ? + addr : ((void *)(uintptr_t)iova); + + dest = (dir == to_mem_obj) ? + ((void *)(uintptr_t)iova) : addr; + + if (crcp) + *crcp = crc32_le(*crcp, src, length); + + memcpy(dest, src, length); + + return 0; + } + + WARN_ON(!mem->map); + + err = mem_check_range(mem, iova, length); + if (err) { + err = -EFAULT; + goto err1; + } + + lookup_iova(mem, iova, &m, &i, &offset); + + map = mem->map + m; + buf = map[0]->buf + i; + + while (length > 0) { + u8 *src, *dest; + + va = (u8 *)(uintptr_t)buf->addr + offset; + src = (dir == to_mem_obj) ? addr : va; + dest = (dir == to_mem_obj) ? va : addr; + + bytes = buf->size - offset; + + if (bytes > length) + bytes = length; + + if (crcp) + crc = crc32_le(crc, src, bytes); + + memcpy(dest, src, bytes); + + length -= bytes; + addr += bytes; + + offset = 0; + buf++; + i++; + + if (i == RXE_BUF_PER_MAP) { + i = 0; + map++; + buf = map[0]->buf; + } + } + + if (crcp) + *crcp = crc; + + return 0; + +err1: + return err; +} + +/* copy data in or out of a wqe, i.e. sg list + * under the control of a dma descriptor + */ +int copy_data( + struct rxe_dev *rxe, + struct rxe_pd *pd, + int access, + struct rxe_dma_info *dma, + void *addr, + int length, + enum copy_direction dir, + u32 *crcp) +{ + int bytes; + struct rxe_sge *sge = &dma->sge[dma->cur_sge]; + int offset = dma->sge_offset; + int resid = dma->resid; + struct rxe_mem *mem = NULL; + u64 iova; + int err; + + if (length == 0) + return 0; + + if (length > resid) { + err = -EINVAL; + goto err2; + } + + if (sge->length && (offset < sge->length)) { + mem = lookup_mem(pd, access, sge->lkey, lookup_local); + if (!mem) { + err = -EINVAL; + goto err1; + } + } + + while (length > 0) { + bytes = length; + + if (offset >= sge->length) { + if (mem) { + rxe_drop_ref(mem); + mem = NULL; + } + sge++; + dma->cur_sge++; + offset = 0; + + if (dma->cur_sge >= dma->num_sge) { + err = -ENOSPC; + goto err2; + } + + if (sge->length) { + mem = lookup_mem(pd, access, sge->lkey, + lookup_local); + if (!mem) { + err = -EINVAL; + goto err1; + } + } else { + continue; + } + } + + if (bytes > sge->length - offset) + bytes = sge->length - offset; + + if (bytes > 0) { + iova = sge->addr + offset; + + err = rxe_mem_copy(mem, iova, addr, bytes, dir, crcp); + if (err) + goto err2; + + offset += bytes; + resid -= bytes; + length -= bytes; + addr += bytes; + } + } + + dma->sge_offset = offset; + dma->resid = resid; + + if (mem) + rxe_drop_ref(mem); + + return 0; + +err2: + if (mem) + rxe_drop_ref(mem); +err1: + return err; +} + +int advance_dma_data(struct rxe_dma_info *dma, unsigned int length) +{ + struct rxe_sge *sge = &dma->sge[dma->cur_sge]; + int offset = dma->sge_offset; + int resid = dma->resid; + + while (length) { + unsigned int bytes; + + if (offset >= sge->length) { + sge++; + dma->cur_sge++; + offset = 0; + if (dma->cur_sge >= dma->num_sge) + return -ENOSPC; + } + + bytes = length; + + if (bytes > sge->length - offset) + bytes = sge->length - offset; + + offset += bytes; + resid -= bytes; + length -= bytes; + } + + dma->sge_offset = offset; + dma->resid = resid; + + return 0; +} + +/* (1) find the mem (mr or mw) corresponding to lkey/rkey + * depending on lookup_type + * (2) verify that the (qp) pd matches the mem pd + * (3) verify that the mem can support the requested access + * (4) verify that mem state is valid + */ +struct rxe_mem *lookup_mem(struct rxe_pd *pd, int access, u32 key, + enum lookup_type type) +{ + struct rxe_mem *mem; + struct rxe_dev *rxe = to_rdev(pd->ibpd.device); + int index = key >> 8; + + if (index >= RXE_MIN_MR_INDEX && index <= RXE_MAX_MR_INDEX) { + mem = rxe_pool_get_index(&rxe->mr_pool, index); + if (!mem) + goto err1; + } else { + goto err1; + } + + if ((type == lookup_local && mem->lkey != key) || + (type == lookup_remote && mem->rkey != key)) + goto err2; + + if (mem->pd != pd) + goto err2; + + if (access && !(access & mem->access)) + goto err2; + + if (mem->state != RXE_MEM_STATE_VALID) + goto err2; + + return mem; + +err2: + rxe_drop_ref(mem); +err1: + return NULL; +} + +int rxe_mem_map_pages(struct rxe_dev *rxe, struct rxe_mem *mem, + u64 *page, int num_pages, u64 iova) +{ + int i; + int num_buf; + int err; + struct rxe_map **map; + struct rxe_phys_buf *buf; + int page_size; + + if (num_pages > mem->max_buf) { + err = -EINVAL; + goto err1; + } + + num_buf = 0; + page_size = 1 << mem->page_shift; + map = mem->map; + buf = map[0]->buf; + + for (i = 0; i < num_pages; i++) { + buf->addr = *page++; + buf->size = page_size; + buf++; + num_buf++; + + if (num_buf == RXE_BUF_PER_MAP) { + map++; + buf = map[0]->buf; + num_buf = 0; + } + } + + mem->iova = iova; + mem->va = iova; + mem->length = num_pages << mem->page_shift; + mem->state = RXE_MEM_STATE_VALID; + + return 0; + +err1: + return err; +} diff --git a/drivers/infiniband/sw/rxe/rxe_net.c b/drivers/infiniband/sw/rxe/rxe_net.c new file mode 100644 index 000000000000..0b8d2ea8b41d --- /dev/null +++ b/drivers/infiniband/sw/rxe/rxe_net.c @@ -0,0 +1,708 @@ +/* + * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. + * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/skbuff.h> +#include <linux/if_arp.h> +#include <linux/netdevice.h> +#include <linux/if.h> +#include <linux/if_vlan.h> +#include <net/udp_tunnel.h> +#include <net/sch_generic.h> +#include <linux/netfilter.h> +#include <rdma/ib_addr.h> + +#include "rxe.h" +#include "rxe_net.h" +#include "rxe_loc.h" + +static LIST_HEAD(rxe_dev_list); +static spinlock_t dev_list_lock; /* spinlock for device list */ + +struct rxe_dev *net_to_rxe(struct net_device *ndev) +{ + struct rxe_dev *rxe; + struct rxe_dev *found = NULL; + + spin_lock_bh(&dev_list_lock); + list_for_each_entry(rxe, &rxe_dev_list, list) { + if (rxe->ndev == ndev) { + found = rxe; + break; + } + } + spin_unlock_bh(&dev_list_lock); + + return found; +} + +struct rxe_dev *get_rxe_by_name(const char* name) +{ + struct rxe_dev *rxe; + struct rxe_dev *found = NULL; + + spin_lock_bh(&dev_list_lock); + list_for_each_entry(rxe, &rxe_dev_list, list) { + if (!strcmp(name, rxe->ib_dev.name)) { + found = rxe; + break; + } + } + spin_unlock_bh(&dev_list_lock); + return found; +} + + +struct rxe_recv_sockets recv_sockets; + +static __be64 rxe_mac_to_eui64(struct net_device *ndev) +{ + unsigned char *mac_addr = ndev->dev_addr; + __be64 eui64; + unsigned char *dst = (unsigned char *)&eui64; + + dst[0] = mac_addr[0] ^ 2; + dst[1] = mac_addr[1]; + dst[2] = mac_addr[2]; + dst[3] = 0xff; + dst[4] = 0xfe; + dst[5] = mac_addr[3]; + dst[6] = mac_addr[4]; + dst[7] = mac_addr[5]; + + return eui64; +} + +static __be64 node_guid(struct rxe_dev *rxe) +{ + return rxe_mac_to_eui64(rxe->ndev); +} + +static __be64 port_guid(struct rxe_dev *rxe) +{ + return rxe_mac_to_eui64(rxe->ndev); +} + +static struct device *dma_device(struct rxe_dev *rxe) +{ + struct net_device *ndev; + + ndev = rxe->ndev; + + if (ndev->priv_flags & IFF_802_1Q_VLAN) + ndev = vlan_dev_real_dev(ndev); + + return ndev->dev.parent; +} + +static int mcast_add(struct rxe_dev *rxe, union ib_gid *mgid) +{ + int err; + unsigned char ll_addr[ETH_ALEN]; + + ipv6_eth_mc_map((struct in6_addr *)mgid->raw, ll_addr); + err = dev_mc_add(rxe->ndev, ll_addr); + + return err; +} + +static int mcast_delete(struct rxe_dev *rxe, union ib_gid *mgid) +{ + int err; + unsigned char ll_addr[ETH_ALEN]; + + ipv6_eth_mc_map((struct in6_addr *)mgid->raw, ll_addr); + err = dev_mc_del(rxe->ndev, ll_addr); + + return err; +} + +static struct dst_entry *rxe_find_route4(struct net_device *ndev, + struct in_addr *saddr, + struct in_addr *daddr) +{ + struct rtable *rt; + struct flowi4 fl = { { 0 } }; + + memset(&fl, 0, sizeof(fl)); + fl.flowi4_oif = ndev->ifindex; + memcpy(&fl.saddr, saddr, sizeof(*saddr)); + memcpy(&fl.daddr, daddr, sizeof(*daddr)); + fl.flowi4_proto = IPPROTO_UDP; + + rt = ip_route_output_key(&init_net, &fl); + if (IS_ERR(rt)) { + pr_err_ratelimited("no route to %pI4\n", &daddr->s_addr); + return NULL; + } + + return &rt->dst; +} + +#if IS_ENABLED(CONFIG_IPV6) +static struct dst_entry *rxe_find_route6(struct net_device *ndev, + struct in6_addr *saddr, + struct in6_addr *daddr) +{ + struct dst_entry *ndst; + struct flowi6 fl6 = { { 0 } }; + + memset(&fl6, 0, sizeof(fl6)); + fl6.flowi6_oif = ndev->ifindex; + memcpy(&fl6.saddr, saddr, sizeof(*saddr)); + memcpy(&fl6.daddr, daddr, sizeof(*daddr)); + fl6.flowi6_proto = IPPROTO_UDP; + + if (unlikely(ipv6_stub->ipv6_dst_lookup(sock_net(recv_sockets.sk6->sk), + recv_sockets.sk6->sk, &ndst, &fl6))) { + pr_err_ratelimited("no route to %pI6\n", daddr); + goto put; + } + + if (unlikely(ndst->error)) { + pr_err("no route to %pI6\n", daddr); + goto put; + } + + return ndst; +put: + dst_release(ndst); + return NULL; +} + +#else + +static struct dst_entry *rxe_find_route6(struct net_device *ndev, + struct in6_addr *saddr, + struct in6_addr *daddr) +{ + return NULL; +} + +#endif + +static int rxe_udp_encap_recv(struct sock *sk, struct sk_buff *skb) +{ + struct udphdr *udph; + struct net_device *ndev = skb->dev; + struct rxe_dev *rxe = net_to_rxe(ndev); + struct rxe_pkt_info *pkt = SKB_TO_PKT(skb); + + if (!rxe) + goto drop; + + if (skb_linearize(skb)) { + pr_err("skb_linearize failed\n"); + goto drop; + } + + udph = udp_hdr(skb); + pkt->rxe = rxe; + pkt->port_num = 1; + pkt->hdr = (u8 *)(udph + 1); + pkt->mask = RXE_GRH_MASK; + pkt->paylen = be16_to_cpu(udph->len) - sizeof(*udph); + + return rxe_rcv(skb); +drop: + kfree_skb(skb); + return 0; +} + +static struct socket *rxe_setup_udp_tunnel(struct net *net, __be16 port, + bool ipv6) +{ + int err; + struct socket *sock; + struct udp_port_cfg udp_cfg; + struct udp_tunnel_sock_cfg tnl_cfg; + + memset(&udp_cfg, 0, sizeof(udp_cfg)); + + if (ipv6) { + udp_cfg.family = AF_INET6; + udp_cfg.ipv6_v6only = 1; + } else { + udp_cfg.family = AF_INET; + } + + udp_cfg.local_udp_port = port; + + /* Create UDP socket */ + err = udp_sock_create(net, &udp_cfg, &sock); + if (err < 0) { + pr_err("failed to create udp socket. err = %d\n", err); + return ERR_PTR(err); + } + + tnl_cfg.sk_user_data = NULL; + tnl_cfg.encap_type = 1; + tnl_cfg.encap_rcv = rxe_udp_encap_recv; + tnl_cfg.encap_destroy = NULL; + + /* Setup UDP tunnel */ + setup_udp_tunnel_sock(net, sock, &tnl_cfg); + + return sock; +} + +static void rxe_release_udp_tunnel(struct socket *sk) +{ + udp_tunnel_sock_release(sk); +} + +static void prepare_udp_hdr(struct sk_buff *skb, __be16 src_port, + __be16 dst_port) +{ + struct udphdr *udph; + + __skb_push(skb, sizeof(*udph)); + skb_reset_transport_header(skb); + udph = udp_hdr(skb); + + udph->dest = dst_port; + udph->source = src_port; + udph->len = htons(skb->len); + udph->check = 0; +} + +static void prepare_ipv4_hdr(struct dst_entry *dst, struct sk_buff *skb, + __be32 saddr, __be32 daddr, __u8 proto, + __u8 tos, __u8 ttl, __be16 df, bool xnet) +{ + struct iphdr *iph; + + skb_scrub_packet(skb, xnet); + + skb_clear_hash(skb); + skb_dst_set(skb, dst); + memset(IPCB(skb), 0, sizeof(*IPCB(skb))); + + skb_push(skb, sizeof(struct iphdr)); + skb_reset_network_header(skb); + + iph = ip_hdr(skb); + + iph->version = IPVERSION; + iph->ihl = sizeof(struct iphdr) >> 2; + iph->frag_off = df; + iph->protocol = proto; + iph->tos = tos; + iph->daddr = daddr; + iph->saddr = saddr; + iph->ttl = ttl; + __ip_select_ident(dev_net(dst->dev), iph, + skb_shinfo(skb)->gso_segs ?: 1); + iph->tot_len = htons(skb->len); + ip_send_check(iph); +} + +static void prepare_ipv6_hdr(struct dst_entry *dst, struct sk_buff *skb, + struct in6_addr *saddr, struct in6_addr *daddr, + __u8 proto, __u8 prio, __u8 ttl) +{ + struct ipv6hdr *ip6h; + + memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); + IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED + | IPSKB_REROUTED); + skb_dst_set(skb, dst); + + __skb_push(skb, sizeof(*ip6h)); + skb_reset_network_header(skb); + ip6h = ipv6_hdr(skb); + ip6_flow_hdr(ip6h, prio, htonl(0)); + ip6h->payload_len = htons(skb->len); + ip6h->nexthdr = proto; + ip6h->hop_limit = ttl; + ip6h->daddr = *daddr; + ip6h->saddr = *saddr; + ip6h->payload_len = htons(skb->len - sizeof(*ip6h)); +} + +static int prepare4(struct rxe_dev *rxe, struct sk_buff *skb, struct rxe_av *av) +{ + struct dst_entry *dst; + bool xnet = false; + __be16 df = htons(IP_DF); + struct in_addr *saddr = &av->sgid_addr._sockaddr_in.sin_addr; + struct in_addr *daddr = &av->dgid_addr._sockaddr_in.sin_addr; + struct rxe_pkt_info *pkt = SKB_TO_PKT(skb); + + dst = rxe_find_route4(rxe->ndev, saddr, daddr); + if (!dst) { + pr_err("Host not reachable\n"); + return -EHOSTUNREACH; + } + + if (!memcmp(saddr, daddr, sizeof(*daddr))) + pkt->mask |= RXE_LOOPBACK_MASK; + + prepare_udp_hdr(skb, htons(RXE_ROCE_V2_SPORT), + htons(ROCE_V2_UDP_DPORT)); + + prepare_ipv4_hdr(dst, skb, saddr->s_addr, daddr->s_addr, IPPROTO_UDP, + av->grh.traffic_class, av->grh.hop_limit, df, xnet); + return 0; +} + +static int prepare6(struct rxe_dev *rxe, struct sk_buff *skb, struct rxe_av *av) +{ + struct dst_entry *dst; + struct in6_addr *saddr = &av->sgid_addr._sockaddr_in6.sin6_addr; + struct in6_addr *daddr = &av->dgid_addr._sockaddr_in6.sin6_addr; + struct rxe_pkt_info *pkt = SKB_TO_PKT(skb); + + dst = rxe_find_route6(rxe->ndev, saddr, daddr); + if (!dst) { + pr_err("Host not reachable\n"); + return -EHOSTUNREACH; + } + + if (!memcmp(saddr, daddr, sizeof(*daddr))) + pkt->mask |= RXE_LOOPBACK_MASK; + + prepare_udp_hdr(skb, htons(RXE_ROCE_V2_SPORT), + htons(ROCE_V2_UDP_DPORT)); + + prepare_ipv6_hdr(dst, skb, saddr, daddr, IPPROTO_UDP, + av->grh.traffic_class, + av->grh.hop_limit); + return 0; +} + +static int prepare(struct rxe_dev *rxe, struct rxe_pkt_info *pkt, + struct sk_buff *skb, u32 *crc) +{ + int err = 0; + struct rxe_av *av = rxe_get_av(pkt); + + if (av->network_type == RDMA_NETWORK_IPV4) + err = prepare4(rxe, skb, av); + else if (av->network_type == RDMA_NETWORK_IPV6) + err = prepare6(rxe, skb, av); + + *crc = rxe_icrc_hdr(pkt, skb); + + return err; +} + +static void rxe_skb_tx_dtor(struct sk_buff *skb) +{ + struct sock *sk = skb->sk; + struct rxe_qp *qp = sk->sk_user_data; + int skb_out = atomic_dec_return(&qp->skb_out); + + if (unlikely(qp->need_req_skb && + skb_out < RXE_INFLIGHT_SKBS_PER_QP_LOW)) + rxe_run_task(&qp->req.task, 1); +} + +static int send(struct rxe_dev *rxe, struct rxe_pkt_info *pkt, + struct sk_buff *skb) +{ + struct sk_buff *nskb; + struct rxe_av *av; + int err; + + av = rxe_get_av(pkt); + + nskb = skb_clone(skb, GFP_ATOMIC); + if (!nskb) + return -ENOMEM; + + nskb->destructor = rxe_skb_tx_dtor; + nskb->sk = pkt->qp->sk->sk; + + if (av->network_type == RDMA_NETWORK_IPV4) { + err = ip_local_out(dev_net(skb_dst(skb)->dev), nskb->sk, nskb); + } else if (av->network_type == RDMA_NETWORK_IPV6) { + err = ip6_local_out(dev_net(skb_dst(skb)->dev), nskb->sk, nskb); + } else { + pr_err("Unknown layer 3 protocol: %d\n", av->network_type); + kfree_skb(nskb); + return -EINVAL; + } + + if (unlikely(net_xmit_eval(err))) { + pr_debug("error sending packet: %d\n", err); + return -EAGAIN; + } + + kfree_skb(skb); + + return 0; +} + +static int loopback(struct sk_buff *skb) +{ + return rxe_rcv(skb); +} + +static inline int addr_same(struct rxe_dev *rxe, struct rxe_av *av) +{ + return rxe->port.port_guid == av->grh.dgid.global.interface_id; +} + +static struct sk_buff *init_packet(struct rxe_dev *rxe, struct rxe_av *av, + int paylen, struct rxe_pkt_info *pkt) +{ + unsigned int hdr_len; + struct sk_buff *skb; + + if (av->network_type == RDMA_NETWORK_IPV4) + hdr_len = ETH_HLEN + sizeof(struct udphdr) + + sizeof(struct iphdr); + else + hdr_len = ETH_HLEN + sizeof(struct udphdr) + + sizeof(struct ipv6hdr); + + skb = alloc_skb(paylen + hdr_len + LL_RESERVED_SPACE(rxe->ndev), + GFP_ATOMIC); + if (unlikely(!skb)) + return NULL; + + skb_reserve(skb, hdr_len + LL_RESERVED_SPACE(rxe->ndev)); + + skb->dev = rxe->ndev; + if (av->network_type == RDMA_NETWORK_IPV4) + skb->protocol = htons(ETH_P_IP); + else + skb->protocol = htons(ETH_P_IPV6); + + pkt->rxe = rxe; + pkt->port_num = 1; + pkt->hdr = skb_put(skb, paylen); + pkt->mask |= RXE_GRH_MASK; + + memset(pkt->hdr, 0, paylen); + + return skb; +} + +/* + * this is required by rxe_cfg to match rxe devices in + * /sys/class/infiniband up with their underlying ethernet devices + */ +static char *parent_name(struct rxe_dev *rxe, unsigned int port_num) +{ + return rxe->ndev->name; +} + +static enum rdma_link_layer link_layer(struct rxe_dev *rxe, + unsigned int port_num) +{ + return IB_LINK_LAYER_ETHERNET; +} + +static struct rxe_ifc_ops ifc_ops = { + .node_guid = node_guid, + .port_guid = port_guid, + .dma_device = dma_device, + .mcast_add = mcast_add, + .mcast_delete = mcast_delete, + .prepare = prepare, + .send = send, + .loopback = loopback, + .init_packet = init_packet, + .parent_name = parent_name, + .link_layer = link_layer, +}; + +struct rxe_dev *rxe_net_add(struct net_device *ndev) +{ + int err; + struct rxe_dev *rxe = NULL; + + rxe = (struct rxe_dev *)ib_alloc_device(sizeof(*rxe)); + if (!rxe) + return NULL; + + rxe->ifc_ops = &ifc_ops; + rxe->ndev = ndev; + + err = rxe_add(rxe, ndev->mtu); + if (err) { + ib_dealloc_device(&rxe->ib_dev); + return NULL; + } + + spin_lock_bh(&dev_list_lock); + list_add_tail(&rxe_dev_list, &rxe->list); + spin_unlock_bh(&dev_list_lock); + return rxe; +} + +void rxe_remove_all(void) +{ + spin_lock_bh(&dev_list_lock); + while (!list_empty(&rxe_dev_list)) { + struct rxe_dev *rxe = + list_first_entry(&rxe_dev_list, struct rxe_dev, list); + + list_del(&rxe->list); + spin_unlock_bh(&dev_list_lock); + rxe_remove(rxe); + spin_lock_bh(&dev_list_lock); + } + spin_unlock_bh(&dev_list_lock); +} +EXPORT_SYMBOL(rxe_remove_all); + +static void rxe_port_event(struct rxe_dev *rxe, + enum ib_event_type event) +{ + struct ib_event ev; + + ev.device = &rxe->ib_dev; + ev.element.port_num = 1; + ev.event = event; + + ib_dispatch_event(&ev); +} + +/* Caller must hold net_info_lock */ +void rxe_port_up(struct rxe_dev *rxe) +{ + struct rxe_port *port; + + port = &rxe->port; + port->attr.state = IB_PORT_ACTIVE; + port->attr.phys_state = IB_PHYS_STATE_LINK_UP; + + rxe_port_event(rxe, IB_EVENT_PORT_ACTIVE); + pr_info("rxe: set %s active\n", rxe->ib_dev.name); + return; +} + +/* Caller must hold net_info_lock */ +void rxe_port_down(struct rxe_dev *rxe) +{ + struct rxe_port *port; + + port = &rxe->port; + port->attr.state = IB_PORT_DOWN; + port->attr.phys_state = IB_PHYS_STATE_LINK_DOWN; + + rxe_port_event(rxe, IB_EVENT_PORT_ERR); + pr_info("rxe: set %s down\n", rxe->ib_dev.name); + return; +} + +static int rxe_notify(struct notifier_block *not_blk, + unsigned long event, + void *arg) +{ + struct net_device *ndev = netdev_notifier_info_to_dev(arg); + struct rxe_dev *rxe = net_to_rxe(ndev); + + if (!rxe) + goto out; + + switch (event) { + case NETDEV_UNREGISTER: + list_del(&rxe->list); + rxe_remove(rxe); + break; + case NETDEV_UP: + rxe_port_up(rxe); + break; + case NETDEV_DOWN: + rxe_port_down(rxe); + break; + case NETDEV_CHANGEMTU: + pr_info("rxe: %s changed mtu to %d\n", ndev->name, ndev->mtu); + rxe_set_mtu(rxe, ndev->mtu); + break; + case NETDEV_REBOOT: + case NETDEV_CHANGE: + case NETDEV_GOING_DOWN: + case NETDEV_CHANGEADDR: + case NETDEV_CHANGENAME: + case NETDEV_FEAT_CHANGE: + default: + pr_info("rxe: ignoring netdev event = %ld for %s\n", + event, ndev->name); + break; + } +out: + return NOTIFY_OK; +} + +static struct notifier_block rxe_net_notifier = { + .notifier_call = rxe_notify, +}; + +int rxe_net_init(void) +{ + int err; + + spin_lock_init(&dev_list_lock); + + recv_sockets.sk6 = rxe_setup_udp_tunnel(&init_net, + htons(ROCE_V2_UDP_DPORT), true); + if (IS_ERR(recv_sockets.sk6)) { + recv_sockets.sk6 = NULL; + pr_err("rxe: Failed to create IPv6 UDP tunnel\n"); + return -1; + } + + recv_sockets.sk4 = rxe_setup_udp_tunnel(&init_net, + htons(ROCE_V2_UDP_DPORT), false); + if (IS_ERR(recv_sockets.sk4)) { + rxe_release_udp_tunnel(recv_sockets.sk6); + recv_sockets.sk4 = NULL; + recv_sockets.sk6 = NULL; + pr_err("rxe: Failed to create IPv4 UDP tunnel\n"); + return -1; + } + + err = register_netdevice_notifier(&rxe_net_notifier); + if (err) { + rxe_release_udp_tunnel(recv_sockets.sk6); + rxe_release_udp_tunnel(recv_sockets.sk4); + pr_err("rxe: Failed to rigister netdev notifier\n"); + } + + return err; +} + +void rxe_net_exit(void) +{ + if (recv_sockets.sk6) + rxe_release_udp_tunnel(recv_sockets.sk6); + + if (recv_sockets.sk4) + rxe_release_udp_tunnel(recv_sockets.sk4); + + unregister_netdevice_notifier(&rxe_net_notifier); +} diff --git a/drivers/infiniband/sw/rxe/rxe_net.h b/drivers/infiniband/sw/rxe/rxe_net.h new file mode 100644 index 000000000000..7b06f76d16cc --- /dev/null +++ b/drivers/infiniband/sw/rxe/rxe_net.h @@ -0,0 +1,53 @@ +/* + * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. + * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef RXE_NET_H +#define RXE_NET_H + +#include <net/sock.h> +#include <net/if_inet6.h> +#include <linux/module.h> + +struct rxe_recv_sockets { + struct socket *sk4; + struct socket *sk6; +}; + +extern struct rxe_recv_sockets recv_sockets; + +struct rxe_dev *rxe_net_add(struct net_device *ndev); + +int rxe_net_init(void); +void rxe_net_exit(void); + +#endif /* RXE_NET_H */ diff --git a/drivers/infiniband/sw/rxe/rxe_opcode.c b/drivers/infiniband/sw/rxe/rxe_opcode.c new file mode 100644 index 000000000000..61927c165b59 --- /dev/null +++ b/drivers/infiniband/sw/rxe/rxe_opcode.c @@ -0,0 +1,961 @@ +/* + * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. + * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <rdma/ib_pack.h> +#include "rxe_opcode.h" +#include "rxe_hdr.h" + +/* useful information about work request opcodes and pkt opcodes in + * table form + */ +struct rxe_wr_opcode_info rxe_wr_opcode_info[] = { + [IB_WR_RDMA_WRITE] = { + .name = "IB_WR_RDMA_WRITE", + .mask = { + [IB_QPT_RC] = WR_INLINE_MASK | WR_WRITE_MASK, + [IB_QPT_UC] = WR_INLINE_MASK | WR_WRITE_MASK, + }, + }, + [IB_WR_RDMA_WRITE_WITH_IMM] = { + .name = "IB_WR_RDMA_WRITE_WITH_IMM", + .mask = { + [IB_QPT_RC] = WR_INLINE_MASK | WR_WRITE_MASK, + [IB_QPT_UC] = WR_INLINE_MASK | WR_WRITE_MASK, + }, + }, + [IB_WR_SEND] = { + .name = "IB_WR_SEND", + .mask = { + [IB_QPT_SMI] = WR_INLINE_MASK | WR_SEND_MASK, + [IB_QPT_GSI] = WR_INLINE_MASK | WR_SEND_MASK, + [IB_QPT_RC] = WR_INLINE_MASK | WR_SEND_MASK, + [IB_QPT_UC] = WR_INLINE_MASK | WR_SEND_MASK, + [IB_QPT_UD] = WR_INLINE_MASK | WR_SEND_MASK, + }, + }, + [IB_WR_SEND_WITH_IMM] = { + .name = "IB_WR_SEND_WITH_IMM", + .mask = { + [IB_QPT_SMI] = WR_INLINE_MASK | WR_SEND_MASK, + [IB_QPT_GSI] = WR_INLINE_MASK | WR_SEND_MASK, + [IB_QPT_RC] = WR_INLINE_MASK | WR_SEND_MASK, + [IB_QPT_UC] = WR_INLINE_MASK | WR_SEND_MASK, + [IB_QPT_UD] = WR_INLINE_MASK | WR_SEND_MASK, + }, + }, + [IB_WR_RDMA_READ] = { + .name = "IB_WR_RDMA_READ", + .mask = { + [IB_QPT_RC] = WR_READ_MASK, + }, + }, + [IB_WR_ATOMIC_CMP_AND_SWP] = { + .name = "IB_WR_ATOMIC_CMP_AND_SWP", + .mask = { + [IB_QPT_RC] = WR_ATOMIC_MASK, + }, + }, + [IB_WR_ATOMIC_FETCH_AND_ADD] = { + .name = "IB_WR_ATOMIC_FETCH_AND_ADD", + .mask = { + [IB_QPT_RC] = WR_ATOMIC_MASK, + }, + }, + [IB_WR_LSO] = { + .name = "IB_WR_LSO", + .mask = { + /* not supported */ + }, + }, + [IB_WR_SEND_WITH_INV] = { + .name = "IB_WR_SEND_WITH_INV", + .mask = { + [IB_QPT_RC] = WR_INLINE_MASK | WR_SEND_MASK, + [IB_QPT_UC] = WR_INLINE_MASK | WR_SEND_MASK, + [IB_QPT_UD] = WR_INLINE_MASK | WR_SEND_MASK, + }, + }, + [IB_WR_RDMA_READ_WITH_INV] = { + .name = "IB_WR_RDMA_READ_WITH_INV", + .mask = { + [IB_QPT_RC] = WR_READ_MASK, + }, + }, + [IB_WR_LOCAL_INV] = { + .name = "IB_WR_LOCAL_INV", + .mask = { + [IB_QPT_RC] = WR_REG_MASK, + }, + }, + [IB_WR_REG_MR] = { + .name = "IB_WR_REG_MR", + .mask = { + [IB_QPT_RC] = WR_REG_MASK, + }, + }, +}; + +struct rxe_opcode_info rxe_opcode[RXE_NUM_OPCODE] = { + [IB_OPCODE_RC_SEND_FIRST] = { + .name = "IB_OPCODE_RC_SEND_FIRST", + .mask = RXE_PAYLOAD_MASK | RXE_REQ_MASK | RXE_RWR_MASK + | RXE_SEND_MASK | RXE_START_MASK, + .length = RXE_BTH_BYTES, + .offset = { + [RXE_BTH] = 0, + [RXE_PAYLOAD] = RXE_BTH_BYTES, + } + }, + [IB_OPCODE_RC_SEND_MIDDLE] = { + .name = "IB_OPCODE_RC_SEND_MIDDLE]", + .mask = RXE_PAYLOAD_MASK | RXE_REQ_MASK | RXE_SEND_MASK + | RXE_MIDDLE_MASK, + .length = RXE_BTH_BYTES, + .offset = { + [RXE_BTH] = 0, + [RXE_PAYLOAD] = RXE_BTH_BYTES, + } + }, + [IB_OPCODE_RC_SEND_LAST] = { + .name = "IB_OPCODE_RC_SEND_LAST", + .mask = RXE_PAYLOAD_MASK | RXE_REQ_MASK | RXE_COMP_MASK + | RXE_SEND_MASK | RXE_END_MASK, + .length = RXE_BTH_BYTES, + .offset = { + [RXE_BTH] = 0, + [RXE_PAYLOAD] = RXE_BTH_BYTES, + } + }, + [IB_OPCODE_RC_SEND_LAST_WITH_IMMEDIATE] = { + .name = "IB_OPCODE_RC_SEND_LAST_WITH_IMMEDIATE", + .mask = RXE_IMMDT_MASK | RXE_PAYLOAD_MASK | RXE_REQ_MASK + | RXE_COMP_MASK | RXE_SEND_MASK | RXE_END_MASK, + .length = RXE_BTH_BYTES + RXE_IMMDT_BYTES, + .offset = { + [RXE_BTH] = 0, + [RXE_IMMDT] = RXE_BTH_BYTES, + [RXE_PAYLOAD] = RXE_BTH_BYTES + + RXE_IMMDT_BYTES, + } + }, + [IB_OPCODE_RC_SEND_ONLY] = { + .name = "IB_OPCODE_RC_SEND_ONLY", + .mask = RXE_PAYLOAD_MASK | RXE_REQ_MASK | RXE_COMP_MASK + | RXE_RWR_MASK | RXE_SEND_MASK + | RXE_START_MASK | RXE_END_MASK, + .length = RXE_BTH_BYTES, + .offset = { + [RXE_BTH] = 0, + [RXE_PAYLOAD] = RXE_BTH_BYTES, + } + }, + [IB_OPCODE_RC_SEND_ONLY_WITH_IMMEDIATE] = { + .name = "IB_OPCODE_RC_SEND_ONLY_WITH_IMMEDIATE", + .mask = RXE_IMMDT_MASK | RXE_PAYLOAD_MASK | RXE_REQ_MASK + | RXE_COMP_MASK | RXE_RWR_MASK | RXE_SEND_MASK + | RXE_START_MASK | RXE_END_MASK, + .length = RXE_BTH_BYTES + RXE_IMMDT_BYTES, + .offset = { + [RXE_BTH] = 0, + [RXE_IMMDT] = RXE_BTH_BYTES, + [RXE_PAYLOAD] = RXE_BTH_BYTES + + RXE_IMMDT_BYTES, + } + }, + [IB_OPCODE_RC_RDMA_WRITE_FIRST] = { + .name = "IB_OPCODE_RC_RDMA_WRITE_FIRST", + .mask = RXE_RETH_MASK | RXE_PAYLOAD_MASK | RXE_REQ_MASK + | RXE_WRITE_MASK | RXE_START_MASK, + .length = RXE_BTH_BYTES + RXE_RETH_BYTES, + .offset = { + [RXE_BTH] = 0, + [RXE_RETH] = RXE_BTH_BYTES, + [RXE_PAYLOAD] = RXE_BTH_BYTES + + RXE_RETH_BYTES, + } + }, + [IB_OPCODE_RC_RDMA_WRITE_MIDDLE] = { + .name = "IB_OPCODE_RC_RDMA_WRITE_MIDDLE", + .mask = RXE_PAYLOAD_MASK | RXE_REQ_MASK | RXE_WRITE_MASK + | RXE_MIDDLE_MASK, + .length = RXE_BTH_BYTES, + .offset = { + [RXE_BTH] = 0, + [RXE_PAYLOAD] = RXE_BTH_BYTES, + } + }, + [IB_OPCODE_RC_RDMA_WRITE_LAST] = { + .name = "IB_OPCODE_RC_RDMA_WRITE_LAST", + .mask = RXE_PAYLOAD_MASK | RXE_REQ_MASK | RXE_WRITE_MASK + | RXE_END_MASK, + .length = RXE_BTH_BYTES, + .offset = { + [RXE_BTH] = 0, + [RXE_PAYLOAD] = RXE_BTH_BYTES, + } + }, + [IB_OPCODE_RC_RDMA_WRITE_LAST_WITH_IMMEDIATE] = { + .name = "IB_OPCODE_RC_RDMA_WRITE_LAST_WITH_IMMEDIATE", + .mask = RXE_IMMDT_MASK | RXE_PAYLOAD_MASK | RXE_REQ_MASK + | RXE_WRITE_MASK | RXE_COMP_MASK | RXE_RWR_MASK + | RXE_END_MASK, + .length = RXE_BTH_BYTES + RXE_IMMDT_BYTES, + .offset = { + [RXE_BTH] = 0, + [RXE_IMMDT] = RXE_BTH_BYTES, + [RXE_PAYLOAD] = RXE_BTH_BYTES + + RXE_IMMDT_BYTES, + } + }, + [IB_OPCODE_RC_RDMA_WRITE_ONLY] = { + .name = "IB_OPCODE_RC_RDMA_WRITE_ONLY", + .mask = RXE_RETH_MASK | RXE_PAYLOAD_MASK | RXE_REQ_MASK + | RXE_WRITE_MASK | RXE_START_MASK + | RXE_END_MASK, + .length = RXE_BTH_BYTES + RXE_RETH_BYTES, + .offset = { + [RXE_BTH] = 0, + [RXE_RETH] = RXE_BTH_BYTES, + [RXE_PAYLOAD] = RXE_BTH_BYTES + + RXE_RETH_BYTES, + } + }, + [IB_OPCODE_RC_RDMA_WRITE_ONLY_WITH_IMMEDIATE] = { + .name = "IB_OPCODE_RC_RDMA_WRITE_ONLY_WITH_IMMEDIATE", + .mask = RXE_RETH_MASK | RXE_IMMDT_MASK | RXE_PAYLOAD_MASK + | RXE_REQ_MASK | RXE_WRITE_MASK + | RXE_COMP_MASK | RXE_RWR_MASK + | RXE_START_MASK | RXE_END_MASK, + .length = RXE_BTH_BYTES + RXE_IMMDT_BYTES + RXE_RETH_BYTES, + .offset = { + [RXE_BTH] = 0, + [RXE_RETH] = RXE_BTH_BYTES, + [RXE_IMMDT] = RXE_BTH_BYTES + + RXE_RETH_BYTES, + [RXE_PAYLOAD] = RXE_BTH_BYTES + + RXE_RETH_BYTES + + RXE_IMMDT_BYTES, + } + }, + [IB_OPCODE_RC_RDMA_READ_REQUEST] = { + .name = "IB_OPCODE_RC_RDMA_READ_REQUEST", + .mask = RXE_RETH_MASK | RXE_REQ_MASK | RXE_READ_MASK + | RXE_START_MASK | RXE_END_MASK, + .length = RXE_BTH_BYTES + RXE_RETH_BYTES, + .offset = { + [RXE_BTH] = 0, + [RXE_RETH] = RXE_BTH_BYTES, + [RXE_PAYLOAD] = RXE_BTH_BYTES + + RXE_RETH_BYTES, + } + }, + [IB_OPCODE_RC_RDMA_READ_RESPONSE_FIRST] = { + .name = "IB_OPCODE_RC_RDMA_READ_RESPONSE_FIRST", + .mask = RXE_AETH_MASK | RXE_PAYLOAD_MASK | RXE_ACK_MASK + | RXE_START_MASK, + .length = RXE_BTH_BYTES + RXE_AETH_BYTES, + .offset = { + [RXE_BTH] = 0, + [RXE_AETH] = RXE_BTH_BYTES, + [RXE_PAYLOAD] = RXE_BTH_BYTES + + RXE_AETH_BYTES, + } + }, + [IB_OPCODE_RC_RDMA_READ_RESPONSE_MIDDLE] = { + .name = "IB_OPCODE_RC_RDMA_READ_RESPONSE_MIDDLE", + .mask = RXE_PAYLOAD_MASK | RXE_ACK_MASK | RXE_MIDDLE_MASK, + .length = RXE_BTH_BYTES, + .offset = { + [RXE_BTH] = 0, + [RXE_PAYLOAD] = RXE_BTH_BYTES, + } + }, + [IB_OPCODE_RC_RDMA_READ_RESPONSE_LAST] = { + .name = "IB_OPCODE_RC_RDMA_READ_RESPONSE_LAST", + .mask = RXE_AETH_MASK | RXE_PAYLOAD_MASK | RXE_ACK_MASK + | RXE_END_MASK, + .length = RXE_BTH_BYTES + RXE_AETH_BYTES, + .offset = { + [RXE_BTH] = 0, + [RXE_AETH] = RXE_BTH_BYTES, + [RXE_PAYLOAD] = RXE_BTH_BYTES + + RXE_AETH_BYTES, + } + }, + [IB_OPCODE_RC_RDMA_READ_RESPONSE_ONLY] = { + .name = "IB_OPCODE_RC_RDMA_READ_RESPONSE_ONLY", + .mask = RXE_AETH_MASK | RXE_PAYLOAD_MASK | RXE_ACK_MASK + | RXE_START_MASK | RXE_END_MASK, + .length = RXE_BTH_BYTES + RXE_AETH_BYTES, + .offset = { + [RXE_BTH] = 0, + [RXE_AETH] = RXE_BTH_BYTES, + [RXE_PAYLOAD] = RXE_BTH_BYTES + + RXE_AETH_BYTES, + } + }, + [IB_OPCODE_RC_ACKNOWLEDGE] = { + .name = "IB_OPCODE_RC_ACKNOWLEDGE", + .mask = RXE_AETH_MASK | RXE_ACK_MASK | RXE_START_MASK + | RXE_END_MASK, + .length = RXE_BTH_BYTES + RXE_AETH_BYTES, + .offset = { + [RXE_BTH] = 0, + [RXE_AETH] = RXE_BTH_BYTES, + [RXE_PAYLOAD] = RXE_BTH_BYTES + + RXE_AETH_BYTES, + } + }, + [IB_OPCODE_RC_ATOMIC_ACKNOWLEDGE] = { + .name = "IB_OPCODE_RC_ATOMIC_ACKNOWLEDGE", + .mask = RXE_AETH_MASK | RXE_ATMACK_MASK | RXE_ACK_MASK + | RXE_START_MASK | RXE_END_MASK, + .length = RXE_BTH_BYTES + RXE_ATMACK_BYTES + RXE_AETH_BYTES, + .offset = { + [RXE_BTH] = 0, + [RXE_AETH] = RXE_BTH_BYTES, + [RXE_ATMACK] = RXE_BTH_BYTES + + RXE_AETH_BYTES, + [RXE_PAYLOAD] = RXE_BTH_BYTES + + RXE_ATMACK_BYTES + RXE_AETH_BYTES, + } + }, + [IB_OPCODE_RC_COMPARE_SWAP] = { + .name = "IB_OPCODE_RC_COMPARE_SWAP", + .mask = RXE_ATMETH_MASK | RXE_REQ_MASK | RXE_ATOMIC_MASK + | RXE_START_MASK | RXE_END_MASK, + .length = RXE_BTH_BYTES + RXE_ATMETH_BYTES, + .offset = { + [RXE_BTH] = 0, + [RXE_ATMETH] = RXE_BTH_BYTES, + [RXE_PAYLOAD] = RXE_BTH_BYTES + + RXE_ATMETH_BYTES, + } + }, + [IB_OPCODE_RC_FETCH_ADD] = { + .name = "IB_OPCODE_RC_FETCH_ADD", + .mask = RXE_ATMETH_MASK | RXE_REQ_MASK | RXE_ATOMIC_MASK + | RXE_START_MASK | RXE_END_MASK, + .length = RXE_BTH_BYTES + RXE_ATMETH_BYTES, + .offset = { + [RXE_BTH] = 0, + [RXE_ATMETH] = RXE_BTH_BYTES, + [RXE_PAYLOAD] = RXE_BTH_BYTES + + RXE_ATMETH_BYTES, + } + }, + [IB_OPCODE_RC_SEND_LAST_WITH_INVALIDATE] = { + .name = "IB_OPCODE_RC_SEND_LAST_WITH_INVALIDATE", + .mask = RXE_IETH_MASK | RXE_PAYLOAD_MASK | RXE_REQ_MASK + | RXE_COMP_MASK | RXE_SEND_MASK | RXE_END_MASK, + .length = RXE_BTH_BYTES + RXE_IETH_BYTES, + .offset = { + [RXE_BTH] = 0, + [RXE_IETH] = RXE_BTH_BYTES, + [RXE_PAYLOAD] = RXE_BTH_BYTES + + RXE_IETH_BYTES, + } + }, + [IB_OPCODE_RC_SEND_ONLY_WITH_INVALIDATE] = { + .name = "IB_OPCODE_RC_SEND_ONLY_INV", + .mask = RXE_IETH_MASK | RXE_PAYLOAD_MASK | RXE_REQ_MASK + | RXE_COMP_MASK | RXE_RWR_MASK | RXE_SEND_MASK + | RXE_END_MASK, + .length = RXE_BTH_BYTES + RXE_IETH_BYTES, + .offset = { + [RXE_BTH] = 0, + [RXE_IETH] = RXE_BTH_BYTES, + [RXE_PAYLOAD] = RXE_BTH_BYTES + + RXE_IETH_BYTES, + } + }, + + /* UC */ + [IB_OPCODE_UC_SEND_FIRST] = { + .name = "IB_OPCODE_UC_SEND_FIRST", + .mask = RXE_PAYLOAD_MASK | RXE_REQ_MASK | RXE_RWR_MASK + | RXE_SEND_MASK | RXE_START_MASK, + .length = RXE_BTH_BYTES, + .offset = { + [RXE_BTH] = 0, + [RXE_PAYLOAD] = RXE_BTH_BYTES, + } + }, + [IB_OPCODE_UC_SEND_MIDDLE] = { + .name = "IB_OPCODE_UC_SEND_MIDDLE", + .mask = RXE_PAYLOAD_MASK | RXE_REQ_MASK | RXE_SEND_MASK + | RXE_MIDDLE_MASK, + .length = RXE_BTH_BYTES, + .offset = { + [RXE_BTH] = 0, + [RXE_PAYLOAD] = RXE_BTH_BYTES, + } + }, + [IB_OPCODE_UC_SEND_LAST] = { + .name = "IB_OPCODE_UC_SEND_LAST", + .mask = RXE_PAYLOAD_MASK | RXE_REQ_MASK | RXE_COMP_MASK + | RXE_SEND_MASK | RXE_END_MASK, + .length = RXE_BTH_BYTES, + .offset = { + [RXE_BTH] = 0, + [RXE_PAYLOAD] = RXE_BTH_BYTES, + } + }, + [IB_OPCODE_UC_SEND_LAST_WITH_IMMEDIATE] = { + .name = "IB_OPCODE_UC_SEND_LAST_WITH_IMMEDIATE", + .mask = RXE_IMMDT_MASK | RXE_PAYLOAD_MASK | RXE_REQ_MASK + | RXE_COMP_MASK | RXE_SEND_MASK | RXE_END_MASK, + .length = RXE_BTH_BYTES + RXE_IMMDT_BYTES, + .offset = { + [RXE_BTH] = 0, + [RXE_IMMDT] = RXE_BTH_BYTES, + [RXE_PAYLOAD] = RXE_BTH_BYTES + + RXE_IMMDT_BYTES, + } + }, + [IB_OPCODE_UC_SEND_ONLY] = { + .name = "IB_OPCODE_UC_SEND_ONLY", + .mask = RXE_PAYLOAD_MASK | RXE_REQ_MASK | RXE_COMP_MASK + | RXE_RWR_MASK | RXE_SEND_MASK + | RXE_START_MASK | RXE_END_MASK, + .length = RXE_BTH_BYTES, + .offset = { + [RXE_BTH] = 0, + [RXE_PAYLOAD] = RXE_BTH_BYTES, + } + }, + [IB_OPCODE_UC_SEND_ONLY_WITH_IMMEDIATE] = { + .name = "IB_OPCODE_UC_SEND_ONLY_WITH_IMMEDIATE", + .mask = RXE_IMMDT_MASK | RXE_PAYLOAD_MASK | RXE_REQ_MASK + | RXE_COMP_MASK | RXE_RWR_MASK | RXE_SEND_MASK + | RXE_START_MASK | RXE_END_MASK, + .length = RXE_BTH_BYTES + RXE_IMMDT_BYTES, + .offset = { + [RXE_BTH] = 0, + [RXE_IMMDT] = RXE_BTH_BYTES, + [RXE_PAYLOAD] = RXE_BTH_BYTES + + RXE_IMMDT_BYTES, + } + }, + [IB_OPCODE_UC_RDMA_WRITE_FIRST] = { + .name = "IB_OPCODE_UC_RDMA_WRITE_FIRST", + .mask = RXE_RETH_MASK | RXE_PAYLOAD_MASK | RXE_REQ_MASK + | RXE_WRITE_MASK | RXE_START_MASK, + .length = RXE_BTH_BYTES + RXE_RETH_BYTES, + .offset = { + [RXE_BTH] = 0, + [RXE_RETH] = RXE_BTH_BYTES, + [RXE_PAYLOAD] = RXE_BTH_BYTES + + RXE_RETH_BYTES, + } + }, + [IB_OPCODE_UC_RDMA_WRITE_MIDDLE] = { + .name = "IB_OPCODE_UC_RDMA_WRITE_MIDDLE", + .mask = RXE_PAYLOAD_MASK | RXE_REQ_MASK | RXE_WRITE_MASK + | RXE_MIDDLE_MASK, + .length = RXE_BTH_BYTES, + .offset = { + [RXE_BTH] = 0, + [RXE_PAYLOAD] = RXE_BTH_BYTES, + } + }, + [IB_OPCODE_UC_RDMA_WRITE_LAST] = { + .name = "IB_OPCODE_UC_RDMA_WRITE_LAST", + .mask = RXE_PAYLOAD_MASK | RXE_REQ_MASK | RXE_WRITE_MASK + | RXE_END_MASK, + .length = RXE_BTH_BYTES, + .offset = { + [RXE_BTH] = 0, + [RXE_PAYLOAD] = RXE_BTH_BYTES, + } + }, + [IB_OPCODE_UC_RDMA_WRITE_LAST_WITH_IMMEDIATE] = { + .name = "IB_OPCODE_UC_RDMA_WRITE_LAST_WITH_IMMEDIATE", + .mask = RXE_IMMDT_MASK | RXE_PAYLOAD_MASK | RXE_REQ_MASK + | RXE_WRITE_MASK | RXE_COMP_MASK | RXE_RWR_MASK + | RXE_END_MASK, + .length = RXE_BTH_BYTES + RXE_IMMDT_BYTES, + .offset = { + [RXE_BTH] = 0, + [RXE_IMMDT] = RXE_BTH_BYTES, + [RXE_PAYLOAD] = RXE_BTH_BYTES + + RXE_IMMDT_BYTES, + } + }, + [IB_OPCODE_UC_RDMA_WRITE_ONLY] = { + .name = "IB_OPCODE_UC_RDMA_WRITE_ONLY", + .mask = RXE_RETH_MASK | RXE_PAYLOAD_MASK | RXE_REQ_MASK + | RXE_WRITE_MASK | RXE_START_MASK + | RXE_END_MASK, + .length = RXE_BTH_BYTES + RXE_RETH_BYTES, + .offset = { + [RXE_BTH] = 0, + [RXE_RETH] = RXE_BTH_BYTES, + [RXE_PAYLOAD] = RXE_BTH_BYTES + + RXE_RETH_BYTES, + } + }, + [IB_OPCODE_UC_RDMA_WRITE_ONLY_WITH_IMMEDIATE] = { + .name = "IB_OPCODE_UC_RDMA_WRITE_ONLY_WITH_IMMEDIATE", + .mask = RXE_RETH_MASK | RXE_IMMDT_MASK | RXE_PAYLOAD_MASK + | RXE_REQ_MASK | RXE_WRITE_MASK + | RXE_COMP_MASK | RXE_RWR_MASK + | RXE_START_MASK | RXE_END_MASK, + .length = RXE_BTH_BYTES + RXE_IMMDT_BYTES + RXE_RETH_BYTES, + .offset = { + [RXE_BTH] = 0, + [RXE_RETH] = RXE_BTH_BYTES, + [RXE_IMMDT] = RXE_BTH_BYTES + + RXE_RETH_BYTES, + [RXE_PAYLOAD] = RXE_BTH_BYTES + + RXE_RETH_BYTES + + RXE_IMMDT_BYTES, + } + }, + + /* RD */ + [IB_OPCODE_RD_SEND_FIRST] = { + .name = "IB_OPCODE_RD_SEND_FIRST", + .mask = RXE_RDETH_MASK | RXE_DETH_MASK | RXE_PAYLOAD_MASK + | RXE_REQ_MASK | RXE_RWR_MASK | RXE_SEND_MASK + | RXE_START_MASK, + .length = RXE_BTH_BYTES + RXE_DETH_BYTES + RXE_RDETH_BYTES, + .offset = { + [RXE_BTH] = 0, + [RXE_RDETH] = RXE_BTH_BYTES, + [RXE_DETH] = RXE_BTH_BYTES + + RXE_RDETH_BYTES, + [RXE_PAYLOAD] = RXE_BTH_BYTES + + RXE_RDETH_BYTES + + RXE_DETH_BYTES, + } + }, + [IB_OPCODE_RD_SEND_MIDDLE] = { + .name = "IB_OPCODE_RD_SEND_MIDDLE", + .mask = RXE_RDETH_MASK | RXE_DETH_MASK | RXE_PAYLOAD_MASK + | RXE_REQ_MASK | RXE_SEND_MASK + | RXE_MIDDLE_MASK, + .length = RXE_BTH_BYTES + RXE_DETH_BYTES + RXE_RDETH_BYTES, + .offset = { + [RXE_BTH] = 0, + [RXE_RDETH] = RXE_BTH_BYTES, + [RXE_DETH] = RXE_BTH_BYTES + + RXE_RDETH_BYTES, + [RXE_PAYLOAD] = RXE_BTH_BYTES + + RXE_RDETH_BYTES + + RXE_DETH_BYTES, + } + }, + [IB_OPCODE_RD_SEND_LAST] = { + .name = "IB_OPCODE_RD_SEND_LAST", + .mask = RXE_RDETH_MASK | RXE_DETH_MASK | RXE_PAYLOAD_MASK + | RXE_REQ_MASK | RXE_COMP_MASK | RXE_SEND_MASK + | RXE_END_MASK, + .length = RXE_BTH_BYTES + RXE_DETH_BYTES + RXE_RDETH_BYTES, + .offset = { + [RXE_BTH] = 0, + [RXE_RDETH] = RXE_BTH_BYTES, + [RXE_DETH] = RXE_BTH_BYTES + + RXE_RDETH_BYTES, + [RXE_PAYLOAD] = RXE_BTH_BYTES + + RXE_RDETH_BYTES + + RXE_DETH_BYTES, + } + }, + [IB_OPCODE_RD_SEND_LAST_WITH_IMMEDIATE] = { + .name = "IB_OPCODE_RD_SEND_LAST_WITH_IMMEDIATE", + .mask = RXE_RDETH_MASK | RXE_DETH_MASK | RXE_IMMDT_MASK + | RXE_PAYLOAD_MASK | RXE_REQ_MASK + | RXE_COMP_MASK | RXE_SEND_MASK + | RXE_END_MASK, + .length = RXE_BTH_BYTES + RXE_IMMDT_BYTES + RXE_DETH_BYTES + + RXE_RDETH_BYTES, + .offset = { + [RXE_BTH] = 0, + [RXE_RDETH] = RXE_BTH_BYTES, + [RXE_DETH] = RXE_BTH_BYTES + + RXE_RDETH_BYTES, + [RXE_IMMDT] = RXE_BTH_BYTES + + RXE_RDETH_BYTES + + RXE_DETH_BYTES, + [RXE_PAYLOAD] = RXE_BTH_BYTES + + RXE_RDETH_BYTES + + RXE_DETH_BYTES + + RXE_IMMDT_BYTES, + } + }, + [IB_OPCODE_RD_SEND_ONLY] = { + .name = "IB_OPCODE_RD_SEND_ONLY", + .mask = RXE_RDETH_MASK | RXE_DETH_MASK | RXE_PAYLOAD_MASK + | RXE_REQ_MASK | RXE_COMP_MASK | RXE_RWR_MASK + | RXE_SEND_MASK | RXE_START_MASK | RXE_END_MASK, + .length = RXE_BTH_BYTES + RXE_DETH_BYTES + RXE_RDETH_BYTES, + .offset = { + [RXE_BTH] = 0, + [RXE_RDETH] = RXE_BTH_BYTES, + [RXE_DETH] = RXE_BTH_BYTES + + RXE_RDETH_BYTES, + [RXE_PAYLOAD] = RXE_BTH_BYTES + + RXE_RDETH_BYTES + + RXE_DETH_BYTES, + } + }, + [IB_OPCODE_RD_SEND_ONLY_WITH_IMMEDIATE] = { + .name = "IB_OPCODE_RD_SEND_ONLY_WITH_IMMEDIATE", + .mask = RXE_RDETH_MASK | RXE_DETH_MASK | RXE_IMMDT_MASK + | RXE_PAYLOAD_MASK | RXE_REQ_MASK + | RXE_COMP_MASK | RXE_RWR_MASK | RXE_SEND_MASK + | RXE_START_MASK | RXE_END_MASK, + .length = RXE_BTH_BYTES + RXE_IMMDT_BYTES + RXE_DETH_BYTES + + RXE_RDETH_BYTES, + .offset = { + [RXE_BTH] = 0, + [RXE_RDETH] = RXE_BTH_BYTES, + [RXE_DETH] = RXE_BTH_BYTES + + RXE_RDETH_BYTES, + [RXE_IMMDT] = RXE_BTH_BYTES + + RXE_RDETH_BYTES + + RXE_DETH_BYTES, + [RXE_PAYLOAD] = RXE_BTH_BYTES + + RXE_RDETH_BYTES + + RXE_DETH_BYTES + + RXE_IMMDT_BYTES, + } + }, + [IB_OPCODE_RD_RDMA_WRITE_FIRST] = { + .name = "IB_OPCODE_RD_RDMA_WRITE_FIRST", + .mask = RXE_RDETH_MASK | RXE_DETH_MASK | RXE_RETH_MASK + | RXE_PAYLOAD_MASK | RXE_REQ_MASK + | RXE_WRITE_MASK | RXE_START_MASK, + .length = RXE_BTH_BYTES + RXE_RETH_BYTES + RXE_DETH_BYTES + + RXE_RDETH_BYTES, + .offset = { + [RXE_BTH] = 0, + [RXE_RDETH] = RXE_BTH_BYTES, + [RXE_DETH] = RXE_BTH_BYTES + + RXE_RDETH_BYTES, + [RXE_RETH] = RXE_BTH_BYTES + + RXE_RDETH_BYTES + + RXE_DETH_BYTES, + [RXE_PAYLOAD] = RXE_BTH_BYTES + + RXE_RDETH_BYTES + + RXE_DETH_BYTES + + RXE_RETH_BYTES, + } + }, + [IB_OPCODE_RD_RDMA_WRITE_MIDDLE] = { + .name = "IB_OPCODE_RD_RDMA_WRITE_MIDDLE", + .mask = RXE_RDETH_MASK | RXE_DETH_MASK | RXE_PAYLOAD_MASK + | RXE_REQ_MASK | RXE_WRITE_MASK + | RXE_MIDDLE_MASK, + .length = RXE_BTH_BYTES + RXE_DETH_BYTES + RXE_RDETH_BYTES, + .offset = { + [RXE_BTH] = 0, + [RXE_RDETH] = RXE_BTH_BYTES, + [RXE_DETH] = RXE_BTH_BYTES + + RXE_RDETH_BYTES, + [RXE_PAYLOAD] = RXE_BTH_BYTES + + RXE_RDETH_BYTES + + RXE_DETH_BYTES, + } + }, + [IB_OPCODE_RD_RDMA_WRITE_LAST] = { + .name = "IB_OPCODE_RD_RDMA_WRITE_LAST", + .mask = RXE_RDETH_MASK | RXE_DETH_MASK | RXE_PAYLOAD_MASK + | RXE_REQ_MASK | RXE_WRITE_MASK + | RXE_END_MASK, + .length = RXE_BTH_BYTES + RXE_DETH_BYTES + RXE_RDETH_BYTES, + .offset = { + [RXE_BTH] = 0, + [RXE_RDETH] = RXE_BTH_BYTES, + [RXE_DETH] = RXE_BTH_BYTES + + RXE_RDETH_BYTES, + [RXE_PAYLOAD] = RXE_BTH_BYTES + + RXE_RDETH_BYTES + + RXE_DETH_BYTES, + } + }, + [IB_OPCODE_RD_RDMA_WRITE_LAST_WITH_IMMEDIATE] = { + .name = "IB_OPCODE_RD_RDMA_WRITE_LAST_WITH_IMMEDIATE", + .mask = RXE_RDETH_MASK | RXE_DETH_MASK | RXE_IMMDT_MASK + | RXE_PAYLOAD_MASK | RXE_REQ_MASK + | RXE_WRITE_MASK | RXE_COMP_MASK | RXE_RWR_MASK + | RXE_END_MASK, + .length = RXE_BTH_BYTES + RXE_IMMDT_BYTES + RXE_DETH_BYTES + + RXE_RDETH_BYTES, + .offset = { + [RXE_BTH] = 0, + [RXE_RDETH] = RXE_BTH_BYTES, + [RXE_DETH] = RXE_BTH_BYTES + + RXE_RDETH_BYTES, + [RXE_IMMDT] = RXE_BTH_BYTES + + RXE_RDETH_BYTES + + RXE_DETH_BYTES, + [RXE_PAYLOAD] = RXE_BTH_BYTES + + RXE_RDETH_BYTES + + RXE_DETH_BYTES + + RXE_IMMDT_BYTES, + } + }, + [IB_OPCODE_RD_RDMA_WRITE_ONLY] = { + .name = "IB_OPCODE_RD_RDMA_WRITE_ONLY", + .mask = RXE_RDETH_MASK | RXE_DETH_MASK | RXE_RETH_MASK + | RXE_PAYLOAD_MASK | RXE_REQ_MASK + | RXE_WRITE_MASK | RXE_START_MASK + | RXE_END_MASK, + .length = RXE_BTH_BYTES + RXE_RETH_BYTES + RXE_DETH_BYTES + + RXE_RDETH_BYTES, + .offset = { + [RXE_BTH] = 0, + [RXE_RDETH] = RXE_BTH_BYTES, + [RXE_DETH] = RXE_BTH_BYTES + + RXE_RDETH_BYTES, + [RXE_RETH] = RXE_BTH_BYTES + + RXE_RDETH_BYTES + + RXE_DETH_BYTES, + [RXE_PAYLOAD] = RXE_BTH_BYTES + + RXE_RDETH_BYTES + + RXE_DETH_BYTES + + RXE_RETH_BYTES, + } + }, + [IB_OPCODE_RD_RDMA_WRITE_ONLY_WITH_IMMEDIATE] = { + .name = "IB_OPCODE_RD_RDMA_WRITE_ONLY_WITH_IMMEDIATE", + .mask = RXE_RDETH_MASK | RXE_DETH_MASK | RXE_RETH_MASK + | RXE_IMMDT_MASK | RXE_PAYLOAD_MASK + | RXE_REQ_MASK | RXE_WRITE_MASK + | RXE_COMP_MASK | RXE_RWR_MASK + | RXE_START_MASK | RXE_END_MASK, + .length = RXE_BTH_BYTES + RXE_IMMDT_BYTES + RXE_RETH_BYTES + + RXE_DETH_BYTES + RXE_RDETH_BYTES, + .offset = { + [RXE_BTH] = 0, + [RXE_RDETH] = RXE_BTH_BYTES, + [RXE_DETH] = RXE_BTH_BYTES + + RXE_RDETH_BYTES, + [RXE_RETH] = RXE_BTH_BYTES + + RXE_RDETH_BYTES + + RXE_DETH_BYTES, + [RXE_IMMDT] = RXE_BTH_BYTES + + RXE_RDETH_BYTES + + RXE_DETH_BYTES + + RXE_RETH_BYTES, + [RXE_PAYLOAD] = RXE_BTH_BYTES + + RXE_RDETH_BYTES + + RXE_DETH_BYTES + + RXE_RETH_BYTES + + RXE_IMMDT_BYTES, + } + }, + [IB_OPCODE_RD_RDMA_READ_REQUEST] = { + .name = "IB_OPCODE_RD_RDMA_READ_REQUEST", + .mask = RXE_RDETH_MASK | RXE_DETH_MASK | RXE_RETH_MASK + | RXE_REQ_MASK | RXE_READ_MASK + | RXE_START_MASK | RXE_END_MASK, + .length = RXE_BTH_BYTES + RXE_RETH_BYTES + RXE_DETH_BYTES + + RXE_RDETH_BYTES, + .offset = { + [RXE_BTH] = 0, + [RXE_RDETH] = RXE_BTH_BYTES, + [RXE_DETH] = RXE_BTH_BYTES + + RXE_RDETH_BYTES, + [RXE_RETH] = RXE_BTH_BYTES + + RXE_RDETH_BYTES + + RXE_DETH_BYTES, + [RXE_PAYLOAD] = RXE_BTH_BYTES + + RXE_RETH_BYTES + + RXE_DETH_BYTES + + RXE_RDETH_BYTES, + } + }, + [IB_OPCODE_RD_RDMA_READ_RESPONSE_FIRST] = { + .name = "IB_OPCODE_RD_RDMA_READ_RESPONSE_FIRST", + .mask = RXE_RDETH_MASK | RXE_AETH_MASK + | RXE_PAYLOAD_MASK | RXE_ACK_MASK + | RXE_START_MASK, + .length = RXE_BTH_BYTES + RXE_AETH_BYTES + RXE_RDETH_BYTES, + .offset = { + [RXE_BTH] = 0, + [RXE_RDETH] = RXE_BTH_BYTES, + [RXE_AETH] = RXE_BTH_BYTES + + RXE_RDETH_BYTES, + [RXE_PAYLOAD] = RXE_BTH_BYTES + + RXE_RDETH_BYTES + + RXE_AETH_BYTES, + } + }, + [IB_OPCODE_RD_RDMA_READ_RESPONSE_MIDDLE] = { + .name = "IB_OPCODE_RD_RDMA_READ_RESPONSE_MIDDLE", + .mask = RXE_RDETH_MASK | RXE_PAYLOAD_MASK | RXE_ACK_MASK + | RXE_MIDDLE_MASK, + .length = RXE_BTH_BYTES + RXE_RDETH_BYTES, + .offset = { + [RXE_BTH] = 0, + [RXE_RDETH] = RXE_BTH_BYTES, + [RXE_PAYLOAD] = RXE_BTH_BYTES + + RXE_RDETH_BYTES, + } + }, + [IB_OPCODE_RD_RDMA_READ_RESPONSE_LAST] = { + .name = "IB_OPCODE_RD_RDMA_READ_RESPONSE_LAST", + .mask = RXE_RDETH_MASK | RXE_AETH_MASK | RXE_PAYLOAD_MASK + | RXE_ACK_MASK | RXE_END_MASK, + .length = RXE_BTH_BYTES + RXE_AETH_BYTES + RXE_RDETH_BYTES, + .offset = { + [RXE_BTH] = 0, + [RXE_RDETH] = RXE_BTH_BYTES, + [RXE_AETH] = RXE_BTH_BYTES + + RXE_RDETH_BYTES, + [RXE_PAYLOAD] = RXE_BTH_BYTES + + RXE_RDETH_BYTES + + RXE_AETH_BYTES, + } + }, + [IB_OPCODE_RD_RDMA_READ_RESPONSE_ONLY] = { + .name = "IB_OPCODE_RD_RDMA_READ_RESPONSE_ONLY", + .mask = RXE_RDETH_MASK | RXE_AETH_MASK | RXE_PAYLOAD_MASK + | RXE_ACK_MASK | RXE_START_MASK | RXE_END_MASK, + .length = RXE_BTH_BYTES + RXE_AETH_BYTES + RXE_RDETH_BYTES, + .offset = { + [RXE_BTH] = 0, + [RXE_RDETH] = RXE_BTH_BYTES, + [RXE_AETH] = RXE_BTH_BYTES + + RXE_RDETH_BYTES, + [RXE_PAYLOAD] = RXE_BTH_BYTES + + RXE_RDETH_BYTES + + RXE_AETH_BYTES, + } + }, + [IB_OPCODE_RD_ACKNOWLEDGE] = { + .name = "IB_OPCODE_RD_ACKNOWLEDGE", + .mask = RXE_RDETH_MASK | RXE_AETH_MASK | RXE_ACK_MASK + | RXE_START_MASK | RXE_END_MASK, + .length = RXE_BTH_BYTES + RXE_AETH_BYTES + RXE_RDETH_BYTES, + .offset = { + [RXE_BTH] = 0, + [RXE_RDETH] = RXE_BTH_BYTES, + [RXE_AETH] = RXE_BTH_BYTES + + RXE_RDETH_BYTES, + } + }, + [IB_OPCODE_RD_ATOMIC_ACKNOWLEDGE] = { + .name = "IB_OPCODE_RD_ATOMIC_ACKNOWLEDGE", + .mask = RXE_RDETH_MASK | RXE_AETH_MASK | RXE_ATMACK_MASK + | RXE_ACK_MASK | RXE_START_MASK | RXE_END_MASK, + .length = RXE_BTH_BYTES + RXE_ATMACK_BYTES + RXE_AETH_BYTES + + RXE_RDETH_BYTES, + .offset = { + [RXE_BTH] = 0, + [RXE_RDETH] = RXE_BTH_BYTES, + [RXE_AETH] = RXE_BTH_BYTES + + RXE_RDETH_BYTES, + [RXE_ATMACK] = RXE_BTH_BYTES + + RXE_RDETH_BYTES + + RXE_AETH_BYTES, + } + }, + [IB_OPCODE_RD_COMPARE_SWAP] = { + .name = "RD_COMPARE_SWAP", + .mask = RXE_RDETH_MASK | RXE_DETH_MASK | RXE_ATMETH_MASK + | RXE_REQ_MASK | RXE_ATOMIC_MASK + | RXE_START_MASK | RXE_END_MASK, + .length = RXE_BTH_BYTES + RXE_ATMETH_BYTES + RXE_DETH_BYTES + + RXE_RDETH_BYTES, + .offset = { + [RXE_BTH] = 0, + [RXE_RDETH] = RXE_BTH_BYTES, + [RXE_DETH] = RXE_BTH_BYTES + + RXE_RDETH_BYTES, + [RXE_ATMETH] = RXE_BTH_BYTES + + RXE_RDETH_BYTES + + RXE_DETH_BYTES, + [RXE_PAYLOAD] = RXE_BTH_BYTES + + + RXE_ATMETH_BYTES + + RXE_DETH_BYTES + + + RXE_RDETH_BYTES, + } + }, + [IB_OPCODE_RD_FETCH_ADD] = { + .name = "IB_OPCODE_RD_FETCH_ADD", + .mask = RXE_RDETH_MASK | RXE_DETH_MASK | RXE_ATMETH_MASK + | RXE_REQ_MASK | RXE_ATOMIC_MASK + | RXE_START_MASK | RXE_END_MASK, + .length = RXE_BTH_BYTES + RXE_ATMETH_BYTES + RXE_DETH_BYTES + + RXE_RDETH_BYTES, + .offset = { + [RXE_BTH] = 0, + [RXE_RDETH] = RXE_BTH_BYTES, + [RXE_DETH] = RXE_BTH_BYTES + + RXE_RDETH_BYTES, + [RXE_ATMETH] = RXE_BTH_BYTES + + RXE_RDETH_BYTES + + RXE_DETH_BYTES, + [RXE_PAYLOAD] = RXE_BTH_BYTES + + + RXE_ATMETH_BYTES + + RXE_DETH_BYTES + + + RXE_RDETH_BYTES, + } + }, + + /* UD */ + [IB_OPCODE_UD_SEND_ONLY] = { + .name = "IB_OPCODE_UD_SEND_ONLY", + .mask = RXE_DETH_MASK | RXE_PAYLOAD_MASK | RXE_REQ_MASK + | RXE_COMP_MASK | RXE_RWR_MASK | RXE_SEND_MASK + | RXE_START_MASK | RXE_END_MASK, + .length = RXE_BTH_BYTES + RXE_DETH_BYTES, + .offset = { + [RXE_BTH] = 0, + [RXE_DETH] = RXE_BTH_BYTES, + [RXE_PAYLOAD] = RXE_BTH_BYTES + + RXE_DETH_BYTES, + } + }, + [IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE] = { + .name = "IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE", + .mask = RXE_DETH_MASK | RXE_IMMDT_MASK | RXE_PAYLOAD_MASK + | RXE_REQ_MASK | RXE_COMP_MASK | RXE_RWR_MASK + | RXE_SEND_MASK | RXE_START_MASK | RXE_END_MASK, + .length = RXE_BTH_BYTES + RXE_IMMDT_BYTES + RXE_DETH_BYTES, + .offset = { + [RXE_BTH] = 0, + [RXE_DETH] = RXE_BTH_BYTES, + [RXE_IMMDT] = RXE_BTH_BYTES + + RXE_DETH_BYTES, + [RXE_PAYLOAD] = RXE_BTH_BYTES + + RXE_DETH_BYTES + + RXE_IMMDT_BYTES, + } + }, + +}; diff --git a/drivers/infiniband/sw/rxe/rxe_opcode.h b/drivers/infiniband/sw/rxe/rxe_opcode.h new file mode 100644 index 000000000000..307604e9c78d --- /dev/null +++ b/drivers/infiniband/sw/rxe/rxe_opcode.h @@ -0,0 +1,129 @@ +/* + * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. + * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef RXE_OPCODE_H +#define RXE_OPCODE_H + +/* + * contains header bit mask definitions and header lengths + * declaration of the rxe_opcode_info struct and + * rxe_wr_opcode_info struct + */ + +enum rxe_wr_mask { + WR_INLINE_MASK = BIT(0), + WR_ATOMIC_MASK = BIT(1), + WR_SEND_MASK = BIT(2), + WR_READ_MASK = BIT(3), + WR_WRITE_MASK = BIT(4), + WR_LOCAL_MASK = BIT(5), + WR_REG_MASK = BIT(6), + + WR_READ_OR_WRITE_MASK = WR_READ_MASK | WR_WRITE_MASK, + WR_READ_WRITE_OR_SEND_MASK = WR_READ_OR_WRITE_MASK | WR_SEND_MASK, + WR_WRITE_OR_SEND_MASK = WR_WRITE_MASK | WR_SEND_MASK, + WR_ATOMIC_OR_READ_MASK = WR_ATOMIC_MASK | WR_READ_MASK, +}; + +#define WR_MAX_QPT (8) + +struct rxe_wr_opcode_info { + char *name; + enum rxe_wr_mask mask[WR_MAX_QPT]; +}; + +extern struct rxe_wr_opcode_info rxe_wr_opcode_info[]; + +enum rxe_hdr_type { + RXE_LRH, + RXE_GRH, + RXE_BTH, + RXE_RETH, + RXE_AETH, + RXE_ATMETH, + RXE_ATMACK, + RXE_IETH, + RXE_RDETH, + RXE_DETH, + RXE_IMMDT, + RXE_PAYLOAD, + NUM_HDR_TYPES +}; + +enum rxe_hdr_mask { + RXE_LRH_MASK = BIT(RXE_LRH), + RXE_GRH_MASK = BIT(RXE_GRH), + RXE_BTH_MASK = BIT(RXE_BTH), + RXE_IMMDT_MASK = BIT(RXE_IMMDT), + RXE_RETH_MASK = BIT(RXE_RETH), + RXE_AETH_MASK = BIT(RXE_AETH), + RXE_ATMETH_MASK = BIT(RXE_ATMETH), + RXE_ATMACK_MASK = BIT(RXE_ATMACK), + RXE_IETH_MASK = BIT(RXE_IETH), + RXE_RDETH_MASK = BIT(RXE_RDETH), + RXE_DETH_MASK = BIT(RXE_DETH), + RXE_PAYLOAD_MASK = BIT(RXE_PAYLOAD), + + RXE_REQ_MASK = BIT(NUM_HDR_TYPES + 0), + RXE_ACK_MASK = BIT(NUM_HDR_TYPES + 1), + RXE_SEND_MASK = BIT(NUM_HDR_TYPES + 2), + RXE_WRITE_MASK = BIT(NUM_HDR_TYPES + 3), + RXE_READ_MASK = BIT(NUM_HDR_TYPES + 4), + RXE_ATOMIC_MASK = BIT(NUM_HDR_TYPES + 5), + + RXE_RWR_MASK = BIT(NUM_HDR_TYPES + 6), + RXE_COMP_MASK = BIT(NUM_HDR_TYPES + 7), + + RXE_START_MASK = BIT(NUM_HDR_TYPES + 8), + RXE_MIDDLE_MASK = BIT(NUM_HDR_TYPES + 9), + RXE_END_MASK = BIT(NUM_HDR_TYPES + 10), + + RXE_LOOPBACK_MASK = BIT(NUM_HDR_TYPES + 12), + + RXE_READ_OR_ATOMIC = (RXE_READ_MASK | RXE_ATOMIC_MASK), + RXE_WRITE_OR_SEND = (RXE_WRITE_MASK | RXE_SEND_MASK), +}; + +#define OPCODE_NONE (-1) +#define RXE_NUM_OPCODE 256 + +struct rxe_opcode_info { + char *name; + enum rxe_hdr_mask mask; + int length; + int offset[NUM_HDR_TYPES]; +}; + +extern struct rxe_opcode_info rxe_opcode[RXE_NUM_OPCODE]; + +#endif /* RXE_OPCODE_H */ diff --git a/drivers/infiniband/sw/rxe/rxe_param.h b/drivers/infiniband/sw/rxe/rxe_param.h new file mode 100644 index 000000000000..f459c43a77c8 --- /dev/null +++ b/drivers/infiniband/sw/rxe/rxe_param.h @@ -0,0 +1,172 @@ +/* + * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. + * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef RXE_PARAM_H +#define RXE_PARAM_H + +static inline enum ib_mtu rxe_mtu_int_to_enum(int mtu) +{ + if (mtu < 256) + return 0; + else if (mtu < 512) + return IB_MTU_256; + else if (mtu < 1024) + return IB_MTU_512; + else if (mtu < 2048) + return IB_MTU_1024; + else if (mtu < 4096) + return IB_MTU_2048; + else + return IB_MTU_4096; +} + +/* Find the IB mtu for a given network MTU. */ +static inline enum ib_mtu eth_mtu_int_to_enum(int mtu) +{ + mtu -= RXE_MAX_HDR_LENGTH; + + return rxe_mtu_int_to_enum(mtu); +} + +/* default/initial rxe device parameter settings */ +enum rxe_device_param { + RXE_FW_VER = 0, + RXE_MAX_MR_SIZE = -1ull, + RXE_PAGE_SIZE_CAP = 0xfffff000, + RXE_VENDOR_ID = 0, + RXE_VENDOR_PART_ID = 0, + RXE_HW_VER = 0, + RXE_MAX_QP = 0x10000, + RXE_MAX_QP_WR = 0x4000, + RXE_MAX_INLINE_DATA = 400, + RXE_DEVICE_CAP_FLAGS = IB_DEVICE_BAD_PKEY_CNTR + | IB_DEVICE_BAD_QKEY_CNTR + | IB_DEVICE_AUTO_PATH_MIG + | IB_DEVICE_CHANGE_PHY_PORT + | IB_DEVICE_UD_AV_PORT_ENFORCE + | IB_DEVICE_PORT_ACTIVE_EVENT + | IB_DEVICE_SYS_IMAGE_GUID + | IB_DEVICE_RC_RNR_NAK_GEN + | IB_DEVICE_SRQ_RESIZE + | IB_DEVICE_MEM_MGT_EXTENSIONS, + RXE_MAX_SGE = 32, + RXE_MAX_SGE_RD = 32, + RXE_MAX_CQ = 16384, + RXE_MAX_LOG_CQE = 13, + RXE_MAX_MR = 2 * 1024, + RXE_MAX_PD = 0x7ffc, + RXE_MAX_QP_RD_ATOM = 128, + RXE_MAX_EE_RD_ATOM = 0, + RXE_MAX_RES_RD_ATOM = 0x3f000, + RXE_MAX_QP_INIT_RD_ATOM = 128, + RXE_MAX_EE_INIT_RD_ATOM = 0, + RXE_ATOMIC_CAP = 1, + RXE_MAX_EE = 0, + RXE_MAX_RDD = 0, + RXE_MAX_MW = 0, + RXE_MAX_RAW_IPV6_QP = 0, + RXE_MAX_RAW_ETHY_QP = 0, + RXE_MAX_MCAST_GRP = 8192, + RXE_MAX_MCAST_QP_ATTACH = 56, + RXE_MAX_TOT_MCAST_QP_ATTACH = 0x70000, + RXE_MAX_AH = 100, + RXE_MAX_FMR = 0, + RXE_MAX_MAP_PER_FMR = 0, + RXE_MAX_SRQ = 960, + RXE_MAX_SRQ_WR = 0x4000, + RXE_MIN_SRQ_WR = 1, + RXE_MAX_SRQ_SGE = 27, + RXE_MIN_SRQ_SGE = 1, + RXE_MAX_FMR_PAGE_LIST_LEN = 512, + RXE_MAX_PKEYS = 64, + RXE_LOCAL_CA_ACK_DELAY = 15, + + RXE_MAX_UCONTEXT = 512, + + RXE_NUM_PORT = 1, + RXE_NUM_COMP_VECTORS = 1, + + RXE_MIN_QP_INDEX = 16, + RXE_MAX_QP_INDEX = 0x00020000, + + RXE_MIN_SRQ_INDEX = 0x00020001, + RXE_MAX_SRQ_INDEX = 0x00040000, + + RXE_MIN_MR_INDEX = 0x00000001, + RXE_MAX_MR_INDEX = 0x00040000, + RXE_MIN_MW_INDEX = 0x00040001, + RXE_MAX_MW_INDEX = 0x00060000, + RXE_MAX_PKT_PER_ACK = 64, + + RXE_MAX_UNACKED_PSNS = 128, + + /* Max inflight SKBs per queue pair */ + RXE_INFLIGHT_SKBS_PER_QP_HIGH = 64, + RXE_INFLIGHT_SKBS_PER_QP_LOW = 16, + + /* Delay before calling arbiter timer */ + RXE_NSEC_ARB_TIMER_DELAY = 200, +}; + +/* default/initial rxe port parameters */ +enum rxe_port_param { + RXE_PORT_STATE = IB_PORT_DOWN, + RXE_PORT_MAX_MTU = IB_MTU_4096, + RXE_PORT_ACTIVE_MTU = IB_MTU_256, + RXE_PORT_GID_TBL_LEN = 1024, + RXE_PORT_PORT_CAP_FLAGS = RDMA_CORE_CAP_PROT_ROCE_UDP_ENCAP, + RXE_PORT_MAX_MSG_SZ = 0x800000, + RXE_PORT_BAD_PKEY_CNTR = 0, + RXE_PORT_QKEY_VIOL_CNTR = 0, + RXE_PORT_LID = 0, + RXE_PORT_SM_LID = 0, + RXE_PORT_SM_SL = 0, + RXE_PORT_LMC = 0, + RXE_PORT_MAX_VL_NUM = 1, + RXE_PORT_SUBNET_TIMEOUT = 0, + RXE_PORT_INIT_TYPE_REPLY = 0, + RXE_PORT_ACTIVE_WIDTH = IB_WIDTH_1X, + RXE_PORT_ACTIVE_SPEED = 1, + RXE_PORT_PKEY_TBL_LEN = 64, + RXE_PORT_PHYS_STATE = 2, + RXE_PORT_SUBNET_PREFIX = 0xfe80000000000000ULL, +}; + +/* default/initial port info parameters */ +enum rxe_port_info_param { + RXE_PORT_INFO_VL_CAP = 4, /* 1-8 */ + RXE_PORT_INFO_MTU_CAP = 5, /* 4096 */ + RXE_PORT_INFO_OPER_VL = 1, /* 1 */ +}; + +#endif /* RXE_PARAM_H */ diff --git a/drivers/infiniband/sw/rxe/rxe_pool.c b/drivers/infiniband/sw/rxe/rxe_pool.c new file mode 100644 index 000000000000..6bac0717c540 --- /dev/null +++ b/drivers/infiniband/sw/rxe/rxe_pool.c @@ -0,0 +1,502 @@ +/* + * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. + * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "rxe.h" +#include "rxe_loc.h" + +/* info about object pools + * note that mr and mw share a single index space + * so that one can map an lkey to the correct type of object + */ +struct rxe_type_info rxe_type_info[RXE_NUM_TYPES] = { + [RXE_TYPE_UC] = { + .name = "rxe-uc", + .size = sizeof(struct rxe_ucontext), + }, + [RXE_TYPE_PD] = { + .name = "rxe-pd", + .size = sizeof(struct rxe_pd), + }, + [RXE_TYPE_AH] = { + .name = "rxe-ah", + .size = sizeof(struct rxe_ah), + .flags = RXE_POOL_ATOMIC, + }, + [RXE_TYPE_SRQ] = { + .name = "rxe-srq", + .size = sizeof(struct rxe_srq), + .flags = RXE_POOL_INDEX, + .min_index = RXE_MIN_SRQ_INDEX, + .max_index = RXE_MAX_SRQ_INDEX, + }, + [RXE_TYPE_QP] = { + .name = "rxe-qp", + .size = sizeof(struct rxe_qp), + .cleanup = rxe_qp_cleanup, + .flags = RXE_POOL_INDEX, + .min_index = RXE_MIN_QP_INDEX, + .max_index = RXE_MAX_QP_INDEX, + }, + [RXE_TYPE_CQ] = { + .name = "rxe-cq", + .size = sizeof(struct rxe_cq), + .cleanup = rxe_cq_cleanup, + }, + [RXE_TYPE_MR] = { + .name = "rxe-mr", + .size = sizeof(struct rxe_mem), + .cleanup = rxe_mem_cleanup, + .flags = RXE_POOL_INDEX, + .max_index = RXE_MAX_MR_INDEX, + .min_index = RXE_MIN_MR_INDEX, + }, + [RXE_TYPE_MW] = { + .name = "rxe-mw", + .size = sizeof(struct rxe_mem), + .flags = RXE_POOL_INDEX, + .max_index = RXE_MAX_MW_INDEX, + .min_index = RXE_MIN_MW_INDEX, + }, + [RXE_TYPE_MC_GRP] = { + .name = "rxe-mc_grp", + .size = sizeof(struct rxe_mc_grp), + .cleanup = rxe_mc_cleanup, + .flags = RXE_POOL_KEY, + .key_offset = offsetof(struct rxe_mc_grp, mgid), + .key_size = sizeof(union ib_gid), + }, + [RXE_TYPE_MC_ELEM] = { + .name = "rxe-mc_elem", + .size = sizeof(struct rxe_mc_elem), + .flags = RXE_POOL_ATOMIC, + }, +}; + +static inline char *pool_name(struct rxe_pool *pool) +{ + return rxe_type_info[pool->type].name; +} + +static inline struct kmem_cache *pool_cache(struct rxe_pool *pool) +{ + return rxe_type_info[pool->type].cache; +} + +static inline enum rxe_elem_type rxe_type(void *arg) +{ + struct rxe_pool_entry *elem = arg; + + return elem->pool->type; +} + +int rxe_cache_init(void) +{ + int err; + int i; + size_t size; + struct rxe_type_info *type; + + for (i = 0; i < RXE_NUM_TYPES; i++) { + type = &rxe_type_info[i]; + size = ALIGN(type->size, RXE_POOL_ALIGN); + type->cache = kmem_cache_create(type->name, size, + RXE_POOL_ALIGN, + RXE_POOL_CACHE_FLAGS, NULL); + if (!type->cache) { + pr_err("Unable to init kmem cache for %s\n", + type->name); + err = -ENOMEM; + goto err1; + } + } + + return 0; + +err1: + while (--i >= 0) { + kmem_cache_destroy(type->cache); + type->cache = NULL; + } + + return err; +} + +void rxe_cache_exit(void) +{ + int i; + struct rxe_type_info *type; + + for (i = 0; i < RXE_NUM_TYPES; i++) { + type = &rxe_type_info[i]; + kmem_cache_destroy(type->cache); + type->cache = NULL; + } +} + +static int rxe_pool_init_index(struct rxe_pool *pool, u32 max, u32 min) +{ + int err = 0; + size_t size; + + if ((max - min + 1) < pool->max_elem) { + pr_warn("not enough indices for max_elem\n"); + err = -EINVAL; + goto out; + } + + pool->max_index = max; + pool->min_index = min; + + size = BITS_TO_LONGS(max - min + 1) * sizeof(long); + pool->table = kmalloc(size, GFP_KERNEL); + if (!pool->table) { + pr_warn("no memory for bit table\n"); + err = -ENOMEM; + goto out; + } + + pool->table_size = size; + bitmap_zero(pool->table, max - min + 1); + +out: + return err; +} + +int rxe_pool_init( + struct rxe_dev *rxe, + struct rxe_pool *pool, + enum rxe_elem_type type, + unsigned max_elem) +{ + int err = 0; + size_t size = rxe_type_info[type].size; + + memset(pool, 0, sizeof(*pool)); + + pool->rxe = rxe; + pool->type = type; + pool->max_elem = max_elem; + pool->elem_size = ALIGN(size, RXE_POOL_ALIGN); + pool->flags = rxe_type_info[type].flags; + pool->tree = RB_ROOT; + pool->cleanup = rxe_type_info[type].cleanup; + + atomic_set(&pool->num_elem, 0); + + kref_init(&pool->ref_cnt); + + spin_lock_init(&pool->pool_lock); + + if (rxe_type_info[type].flags & RXE_POOL_INDEX) { + err = rxe_pool_init_index(pool, + rxe_type_info[type].max_index, + rxe_type_info[type].min_index); + if (err) + goto out; + } + + if (rxe_type_info[type].flags & RXE_POOL_KEY) { + pool->key_offset = rxe_type_info[type].key_offset; + pool->key_size = rxe_type_info[type].key_size; + } + + pool->state = rxe_pool_valid; + +out: + return err; +} + +static void rxe_pool_release(struct kref *kref) +{ + struct rxe_pool *pool = container_of(kref, struct rxe_pool, ref_cnt); + + pool->state = rxe_pool_invalid; + kfree(pool->table); +} + +static void rxe_pool_put(struct rxe_pool *pool) +{ + kref_put(&pool->ref_cnt, rxe_pool_release); +} + +int rxe_pool_cleanup(struct rxe_pool *pool) +{ + unsigned long flags; + + spin_lock_irqsave(&pool->pool_lock, flags); + pool->state = rxe_pool_invalid; + if (atomic_read(&pool->num_elem) > 0) + pr_warn("%s pool destroyed with unfree'd elem\n", + pool_name(pool)); + spin_unlock_irqrestore(&pool->pool_lock, flags); + + rxe_pool_put(pool); + + return 0; +} + +static u32 alloc_index(struct rxe_pool *pool) +{ + u32 index; + u32 range = pool->max_index - pool->min_index + 1; + + index = find_next_zero_bit(pool->table, range, pool->last); + if (index >= range) + index = find_first_zero_bit(pool->table, range); + + set_bit(index, pool->table); + pool->last = index; + return index + pool->min_index; +} + +static void insert_index(struct rxe_pool *pool, struct rxe_pool_entry *new) +{ + struct rb_node **link = &pool->tree.rb_node; + struct rb_node *parent = NULL; + struct rxe_pool_entry *elem; + + while (*link) { + parent = *link; + elem = rb_entry(parent, struct rxe_pool_entry, node); + + if (elem->index == new->index) { + pr_warn("element already exists!\n"); + goto out; + } + + if (elem->index > new->index) + link = &(*link)->rb_left; + else + link = &(*link)->rb_right; + } + + rb_link_node(&new->node, parent, link); + rb_insert_color(&new->node, &pool->tree); +out: + return; +} + +static void insert_key(struct rxe_pool *pool, struct rxe_pool_entry *new) +{ + struct rb_node **link = &pool->tree.rb_node; + struct rb_node *parent = NULL; + struct rxe_pool_entry *elem; + int cmp; + + while (*link) { + parent = *link; + elem = rb_entry(parent, struct rxe_pool_entry, node); + + cmp = memcmp((u8 *)elem + pool->key_offset, + (u8 *)new + pool->key_offset, pool->key_size); + + if (cmp == 0) { + pr_warn("key already exists!\n"); + goto out; + } + + if (cmp > 0) + link = &(*link)->rb_left; + else + link = &(*link)->rb_right; + } + + rb_link_node(&new->node, parent, link); + rb_insert_color(&new->node, &pool->tree); +out: + return; +} + +void rxe_add_key(void *arg, void *key) +{ + struct rxe_pool_entry *elem = arg; + struct rxe_pool *pool = elem->pool; + unsigned long flags; + + spin_lock_irqsave(&pool->pool_lock, flags); + memcpy((u8 *)elem + pool->key_offset, key, pool->key_size); + insert_key(pool, elem); + spin_unlock_irqrestore(&pool->pool_lock, flags); +} + +void rxe_drop_key(void *arg) +{ + struct rxe_pool_entry *elem = arg; + struct rxe_pool *pool = elem->pool; + unsigned long flags; + + spin_lock_irqsave(&pool->pool_lock, flags); + rb_erase(&elem->node, &pool->tree); + spin_unlock_irqrestore(&pool->pool_lock, flags); +} + +void rxe_add_index(void *arg) +{ + struct rxe_pool_entry *elem = arg; + struct rxe_pool *pool = elem->pool; + unsigned long flags; + + spin_lock_irqsave(&pool->pool_lock, flags); + elem->index = alloc_index(pool); + insert_index(pool, elem); + spin_unlock_irqrestore(&pool->pool_lock, flags); +} + +void rxe_drop_index(void *arg) +{ + struct rxe_pool_entry *elem = arg; + struct rxe_pool *pool = elem->pool; + unsigned long flags; + + spin_lock_irqsave(&pool->pool_lock, flags); + clear_bit(elem->index - pool->min_index, pool->table); + rb_erase(&elem->node, &pool->tree); + spin_unlock_irqrestore(&pool->pool_lock, flags); +} + +void *rxe_alloc(struct rxe_pool *pool) +{ + struct rxe_pool_entry *elem; + unsigned long flags; + + might_sleep_if(!(pool->flags & RXE_POOL_ATOMIC)); + + spin_lock_irqsave(&pool->pool_lock, flags); + if (pool->state != rxe_pool_valid) { + spin_unlock_irqrestore(&pool->pool_lock, flags); + return NULL; + } + kref_get(&pool->ref_cnt); + spin_unlock_irqrestore(&pool->pool_lock, flags); + + kref_get(&pool->rxe->ref_cnt); + + if (atomic_inc_return(&pool->num_elem) > pool->max_elem) { + atomic_dec(&pool->num_elem); + rxe_dev_put(pool->rxe); + rxe_pool_put(pool); + return NULL; + } + + elem = kmem_cache_zalloc(pool_cache(pool), + (pool->flags & RXE_POOL_ATOMIC) ? + GFP_ATOMIC : GFP_KERNEL); + + elem->pool = pool; + kref_init(&elem->ref_cnt); + + return elem; +} + +void rxe_elem_release(struct kref *kref) +{ + struct rxe_pool_entry *elem = + container_of(kref, struct rxe_pool_entry, ref_cnt); + struct rxe_pool *pool = elem->pool; + + if (pool->cleanup) + pool->cleanup(elem); + + kmem_cache_free(pool_cache(pool), elem); + atomic_dec(&pool->num_elem); + rxe_dev_put(pool->rxe); + rxe_pool_put(pool); +} + +void *rxe_pool_get_index(struct rxe_pool *pool, u32 index) +{ + struct rb_node *node = NULL; + struct rxe_pool_entry *elem = NULL; + unsigned long flags; + + spin_lock_irqsave(&pool->pool_lock, flags); + + if (pool->state != rxe_pool_valid) + goto out; + + node = pool->tree.rb_node; + + while (node) { + elem = rb_entry(node, struct rxe_pool_entry, node); + + if (elem->index > index) + node = node->rb_left; + else if (elem->index < index) + node = node->rb_right; + else + break; + } + + if (node) + kref_get(&elem->ref_cnt); + +out: + spin_unlock_irqrestore(&pool->pool_lock, flags); + return node ? (void *)elem : NULL; +} + +void *rxe_pool_get_key(struct rxe_pool *pool, void *key) +{ + struct rb_node *node = NULL; + struct rxe_pool_entry *elem = NULL; + int cmp; + unsigned long flags; + + spin_lock_irqsave(&pool->pool_lock, flags); + + if (pool->state != rxe_pool_valid) + goto out; + + node = pool->tree.rb_node; + + while (node) { + elem = rb_entry(node, struct rxe_pool_entry, node); + + cmp = memcmp((u8 *)elem + pool->key_offset, + key, pool->key_size); + + if (cmp > 0) + node = node->rb_left; + else if (cmp < 0) + node = node->rb_right; + else + break; + } + + if (node) + kref_get(&elem->ref_cnt); + +out: + spin_unlock_irqrestore(&pool->pool_lock, flags); + return node ? ((void *)elem) : NULL; +} diff --git a/drivers/infiniband/sw/rxe/rxe_pool.h b/drivers/infiniband/sw/rxe/rxe_pool.h new file mode 100644 index 000000000000..4d04830adcae --- /dev/null +++ b/drivers/infiniband/sw/rxe/rxe_pool.h @@ -0,0 +1,163 @@ +/* + * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. + * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef RXE_POOL_H +#define RXE_POOL_H + +#define RXE_POOL_ALIGN (16) +#define RXE_POOL_CACHE_FLAGS (0) + +enum rxe_pool_flags { + RXE_POOL_ATOMIC = BIT(0), + RXE_POOL_INDEX = BIT(1), + RXE_POOL_KEY = BIT(2), +}; + +enum rxe_elem_type { + RXE_TYPE_UC, + RXE_TYPE_PD, + RXE_TYPE_AH, + RXE_TYPE_SRQ, + RXE_TYPE_QP, + RXE_TYPE_CQ, + RXE_TYPE_MR, + RXE_TYPE_MW, + RXE_TYPE_MC_GRP, + RXE_TYPE_MC_ELEM, + RXE_NUM_TYPES, /* keep me last */ +}; + +struct rxe_type_info { + char *name; + size_t size; + void (*cleanup)(void *obj); + enum rxe_pool_flags flags; + u32 max_index; + u32 min_index; + size_t key_offset; + size_t key_size; + struct kmem_cache *cache; +}; + +extern struct rxe_type_info rxe_type_info[]; + +enum rxe_pool_state { + rxe_pool_invalid, + rxe_pool_valid, +}; + +struct rxe_pool_entry { + struct rxe_pool *pool; + struct kref ref_cnt; + struct list_head list; + + /* only used if indexed or keyed */ + struct rb_node node; + u32 index; +}; + +struct rxe_pool { + struct rxe_dev *rxe; + spinlock_t pool_lock; /* pool spinlock */ + size_t elem_size; + struct kref ref_cnt; + void (*cleanup)(void *obj); + enum rxe_pool_state state; + enum rxe_pool_flags flags; + enum rxe_elem_type type; + + unsigned int max_elem; + atomic_t num_elem; + + /* only used if indexed or keyed */ + struct rb_root tree; + unsigned long *table; + size_t table_size; + u32 max_index; + u32 min_index; + u32 last; + size_t key_offset; + size_t key_size; +}; + +/* initialize slab caches for managed objects */ +int rxe_cache_init(void); + +/* cleanup slab caches for managed objects */ +void rxe_cache_exit(void); + +/* initialize a pool of objects with given limit on + * number of elements. gets parameters from rxe_type_info + * pool elements will be allocated out of a slab cache + */ +int rxe_pool_init(struct rxe_dev *rxe, struct rxe_pool *pool, + enum rxe_elem_type type, u32 max_elem); + +/* free resources from object pool */ +int rxe_pool_cleanup(struct rxe_pool *pool); + +/* allocate an object from pool */ +void *rxe_alloc(struct rxe_pool *pool); + +/* assign an index to an indexed object and insert object into + * pool's rb tree + */ +void rxe_add_index(void *elem); + +/* drop an index and remove object from rb tree */ +void rxe_drop_index(void *elem); + +/* assign a key to a keyed object and insert object into + * pool's rb tree + */ +void rxe_add_key(void *elem, void *key); + +/* remove elem from rb tree */ +void rxe_drop_key(void *elem); + +/* lookup an indexed object from index. takes a reference on object */ +void *rxe_pool_get_index(struct rxe_pool *pool, u32 index); + +/* lookup keyed object from key. takes a reference on the object */ +void *rxe_pool_get_key(struct rxe_pool *pool, void *key); + +/* cleanup an object when all references are dropped */ +void rxe_elem_release(struct kref *kref); + +/* take a reference on an object */ +#define rxe_add_ref(elem) kref_get(&(elem)->pelem.ref_cnt) + +/* drop a reference on an object */ +#define rxe_drop_ref(elem) kref_put(&(elem)->pelem.ref_cnt, rxe_elem_release) + +#endif /* RXE_POOL_H */ diff --git a/drivers/infiniband/sw/rxe/rxe_qp.c b/drivers/infiniband/sw/rxe/rxe_qp.c new file mode 100644 index 000000000000..22ba24f2a2c1 --- /dev/null +++ b/drivers/infiniband/sw/rxe/rxe_qp.c @@ -0,0 +1,851 @@ +/* + * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. + * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/skbuff.h> +#include <linux/delay.h> +#include <linux/sched.h> + +#include "rxe.h" +#include "rxe_loc.h" +#include "rxe_queue.h" +#include "rxe_task.h" + +char *rxe_qp_state_name[] = { + [QP_STATE_RESET] = "RESET", + [QP_STATE_INIT] = "INIT", + [QP_STATE_READY] = "READY", + [QP_STATE_DRAIN] = "DRAIN", + [QP_STATE_DRAINED] = "DRAINED", + [QP_STATE_ERROR] = "ERROR", +}; + +static int rxe_qp_chk_cap(struct rxe_dev *rxe, struct ib_qp_cap *cap, + int has_srq) +{ + if (cap->max_send_wr > rxe->attr.max_qp_wr) { + pr_warn("invalid send wr = %d > %d\n", + cap->max_send_wr, rxe->attr.max_qp_wr); + goto err1; + } + + if (cap->max_send_sge > rxe->attr.max_sge) { + pr_warn("invalid send sge = %d > %d\n", + cap->max_send_sge, rxe->attr.max_sge); + goto err1; + } + + if (!has_srq) { + if (cap->max_recv_wr > rxe->attr.max_qp_wr) { + pr_warn("invalid recv wr = %d > %d\n", + cap->max_recv_wr, rxe->attr.max_qp_wr); + goto err1; + } + + if (cap->max_recv_sge > rxe->attr.max_sge) { + pr_warn("invalid recv sge = %d > %d\n", + cap->max_recv_sge, rxe->attr.max_sge); + goto err1; + } + } + + if (cap->max_inline_data > rxe->max_inline_data) { + pr_warn("invalid max inline data = %d > %d\n", + cap->max_inline_data, rxe->max_inline_data); + goto err1; + } + + return 0; + +err1: + return -EINVAL; +} + +int rxe_qp_chk_init(struct rxe_dev *rxe, struct ib_qp_init_attr *init) +{ + struct ib_qp_cap *cap = &init->cap; + struct rxe_port *port; + int port_num = init->port_num; + + if (!init->recv_cq || !init->send_cq) { + pr_warn("missing cq\n"); + goto err1; + } + + if (rxe_qp_chk_cap(rxe, cap, !!init->srq)) + goto err1; + + if (init->qp_type == IB_QPT_SMI || init->qp_type == IB_QPT_GSI) { + if (port_num != 1) { + pr_warn("invalid port = %d\n", port_num); + goto err1; + } + + port = &rxe->port; + + if (init->qp_type == IB_QPT_SMI && port->qp_smi_index) { + pr_warn("SMI QP exists for port %d\n", port_num); + goto err1; + } + + if (init->qp_type == IB_QPT_GSI && port->qp_gsi_index) { + pr_warn("GSI QP exists for port %d\n", port_num); + goto err1; + } + } + + return 0; + +err1: + return -EINVAL; +} + +static int alloc_rd_atomic_resources(struct rxe_qp *qp, unsigned int n) +{ + qp->resp.res_head = 0; + qp->resp.res_tail = 0; + qp->resp.resources = kcalloc(n, sizeof(struct resp_res), GFP_KERNEL); + + if (!qp->resp.resources) + return -ENOMEM; + + return 0; +} + +static void free_rd_atomic_resources(struct rxe_qp *qp) +{ + if (qp->resp.resources) { + int i; + + for (i = 0; i < qp->attr.max_rd_atomic; i++) { + struct resp_res *res = &qp->resp.resources[i]; + + free_rd_atomic_resource(qp, res); + } + kfree(qp->resp.resources); + qp->resp.resources = NULL; + } +} + +void free_rd_atomic_resource(struct rxe_qp *qp, struct resp_res *res) +{ + if (res->type == RXE_ATOMIC_MASK) { + rxe_drop_ref(qp); + kfree_skb(res->atomic.skb); + } else if (res->type == RXE_READ_MASK) { + if (res->read.mr) + rxe_drop_ref(res->read.mr); + } + res->type = 0; +} + +static void cleanup_rd_atomic_resources(struct rxe_qp *qp) +{ + int i; + struct resp_res *res; + + if (qp->resp.resources) { + for (i = 0; i < qp->attr.max_rd_atomic; i++) { + res = &qp->resp.resources[i]; + free_rd_atomic_resource(qp, res); + } + } +} + +static void rxe_qp_init_misc(struct rxe_dev *rxe, struct rxe_qp *qp, + struct ib_qp_init_attr *init) +{ + struct rxe_port *port; + u32 qpn; + + qp->sq_sig_type = init->sq_sig_type; + qp->attr.path_mtu = 1; + qp->mtu = ib_mtu_enum_to_int(qp->attr.path_mtu); + + qpn = qp->pelem.index; + port = &rxe->port; + + switch (init->qp_type) { + case IB_QPT_SMI: + qp->ibqp.qp_num = 0; + port->qp_smi_index = qpn; + qp->attr.port_num = init->port_num; + break; + + case IB_QPT_GSI: + qp->ibqp.qp_num = 1; + port->qp_gsi_index = qpn; + qp->attr.port_num = init->port_num; + break; + + default: + qp->ibqp.qp_num = qpn; + break; + } + + INIT_LIST_HEAD(&qp->grp_list); + + skb_queue_head_init(&qp->send_pkts); + + spin_lock_init(&qp->grp_lock); + spin_lock_init(&qp->state_lock); + + atomic_set(&qp->ssn, 0); + atomic_set(&qp->skb_out, 0); +} + +static int rxe_qp_init_req(struct rxe_dev *rxe, struct rxe_qp *qp, + struct ib_qp_init_attr *init, + struct ib_ucontext *context, struct ib_udata *udata) +{ + int err; + int wqe_size; + + err = sock_create_kern(&init_net, AF_INET, SOCK_DGRAM, 0, &qp->sk); + if (err < 0) + return err; + qp->sk->sk->sk_user_data = qp; + + qp->sq.max_wr = init->cap.max_send_wr; + qp->sq.max_sge = init->cap.max_send_sge; + qp->sq.max_inline = init->cap.max_inline_data; + + wqe_size = max_t(int, sizeof(struct rxe_send_wqe) + + qp->sq.max_sge * sizeof(struct ib_sge), + sizeof(struct rxe_send_wqe) + + qp->sq.max_inline); + + qp->sq.queue = rxe_queue_init(rxe, + &qp->sq.max_wr, + wqe_size); + if (!qp->sq.queue) + return -ENOMEM; + + err = do_mmap_info(rxe, udata, true, + context, qp->sq.queue->buf, + qp->sq.queue->buf_size, &qp->sq.queue->ip); + + if (err) { + kvfree(qp->sq.queue->buf); + kfree(qp->sq.queue); + return err; + } + + qp->req.wqe_index = producer_index(qp->sq.queue); + qp->req.state = QP_STATE_RESET; + qp->req.opcode = -1; + qp->comp.opcode = -1; + + spin_lock_init(&qp->sq.sq_lock); + skb_queue_head_init(&qp->req_pkts); + + rxe_init_task(rxe, &qp->req.task, qp, + rxe_requester, "req"); + rxe_init_task(rxe, &qp->comp.task, qp, + rxe_completer, "comp"); + + init_timer(&qp->rnr_nak_timer); + qp->rnr_nak_timer.function = rnr_nak_timer; + qp->rnr_nak_timer.data = (unsigned long)qp; + + init_timer(&qp->retrans_timer); + qp->retrans_timer.function = retransmit_timer; + qp->retrans_timer.data = (unsigned long)qp; + qp->qp_timeout_jiffies = 0; /* Can't be set for UD/UC in modify_qp */ + + return 0; +} + +static int rxe_qp_init_resp(struct rxe_dev *rxe, struct rxe_qp *qp, + struct ib_qp_init_attr *init, + struct ib_ucontext *context, struct ib_udata *udata) +{ + int err; + int wqe_size; + + if (!qp->srq) { + qp->rq.max_wr = init->cap.max_recv_wr; + qp->rq.max_sge = init->cap.max_recv_sge; + + wqe_size = rcv_wqe_size(qp->rq.max_sge); + + pr_debug("max_wr = %d, max_sge = %d, wqe_size = %d\n", + qp->rq.max_wr, qp->rq.max_sge, wqe_size); + + qp->rq.queue = rxe_queue_init(rxe, + &qp->rq.max_wr, + wqe_size); + if (!qp->rq.queue) + return -ENOMEM; + + err = do_mmap_info(rxe, udata, false, context, + qp->rq.queue->buf, + qp->rq.queue->buf_size, + &qp->rq.queue->ip); + if (err) { + kvfree(qp->rq.queue->buf); + kfree(qp->rq.queue); + return err; + } + } + + spin_lock_init(&qp->rq.producer_lock); + spin_lock_init(&qp->rq.consumer_lock); + + skb_queue_head_init(&qp->resp_pkts); + + rxe_init_task(rxe, &qp->resp.task, qp, + rxe_responder, "resp"); + + qp->resp.opcode = OPCODE_NONE; + qp->resp.msn = 0; + qp->resp.state = QP_STATE_RESET; + + return 0; +} + +/* called by the create qp verb */ +int rxe_qp_from_init(struct rxe_dev *rxe, struct rxe_qp *qp, struct rxe_pd *pd, + struct ib_qp_init_attr *init, struct ib_udata *udata, + struct ib_pd *ibpd) +{ + int err; + struct rxe_cq *rcq = to_rcq(init->recv_cq); + struct rxe_cq *scq = to_rcq(init->send_cq); + struct rxe_srq *srq = init->srq ? to_rsrq(init->srq) : NULL; + struct ib_ucontext *context = udata ? ibpd->uobject->context : NULL; + + rxe_add_ref(pd); + rxe_add_ref(rcq); + rxe_add_ref(scq); + if (srq) + rxe_add_ref(srq); + + qp->pd = pd; + qp->rcq = rcq; + qp->scq = scq; + qp->srq = srq; + + rxe_qp_init_misc(rxe, qp, init); + + err = rxe_qp_init_req(rxe, qp, init, context, udata); + if (err) + goto err1; + + err = rxe_qp_init_resp(rxe, qp, init, context, udata); + if (err) + goto err2; + + qp->attr.qp_state = IB_QPS_RESET; + qp->valid = 1; + + return 0; + +err2: + rxe_queue_cleanup(qp->sq.queue); +err1: + if (srq) + rxe_drop_ref(srq); + rxe_drop_ref(scq); + rxe_drop_ref(rcq); + rxe_drop_ref(pd); + + return err; +} + +/* called by the query qp verb */ +int rxe_qp_to_init(struct rxe_qp *qp, struct ib_qp_init_attr *init) +{ + init->event_handler = qp->ibqp.event_handler; + init->qp_context = qp->ibqp.qp_context; + init->send_cq = qp->ibqp.send_cq; + init->recv_cq = qp->ibqp.recv_cq; + init->srq = qp->ibqp.srq; + + init->cap.max_send_wr = qp->sq.max_wr; + init->cap.max_send_sge = qp->sq.max_sge; + init->cap.max_inline_data = qp->sq.max_inline; + + if (!qp->srq) { + init->cap.max_recv_wr = qp->rq.max_wr; + init->cap.max_recv_sge = qp->rq.max_sge; + } + + init->sq_sig_type = qp->sq_sig_type; + + init->qp_type = qp->ibqp.qp_type; + init->port_num = 1; + + return 0; +} + +/* called by the modify qp verb, this routine checks all the parameters before + * making any changes + */ +int rxe_qp_chk_attr(struct rxe_dev *rxe, struct rxe_qp *qp, + struct ib_qp_attr *attr, int mask) +{ + enum ib_qp_state cur_state = (mask & IB_QP_CUR_STATE) ? + attr->cur_qp_state : qp->attr.qp_state; + enum ib_qp_state new_state = (mask & IB_QP_STATE) ? + attr->qp_state : cur_state; + + if (!ib_modify_qp_is_ok(cur_state, new_state, qp_type(qp), mask, + IB_LINK_LAYER_ETHERNET)) { + pr_warn("invalid mask or state for qp\n"); + goto err1; + } + + if (mask & IB_QP_STATE) { + if (cur_state == IB_QPS_SQD) { + if (qp->req.state == QP_STATE_DRAIN && + new_state != IB_QPS_ERR) + goto err1; + } + } + + if (mask & IB_QP_PORT) { + if (attr->port_num != 1) { + pr_warn("invalid port %d\n", attr->port_num); + goto err1; + } + } + + if (mask & IB_QP_CAP && rxe_qp_chk_cap(rxe, &attr->cap, !!qp->srq)) + goto err1; + + if (mask & IB_QP_AV && rxe_av_chk_attr(rxe, &attr->ah_attr)) + goto err1; + + if (mask & IB_QP_ALT_PATH) { + if (rxe_av_chk_attr(rxe, &attr->alt_ah_attr)) + goto err1; + if (attr->alt_port_num != 1) { + pr_warn("invalid alt port %d\n", attr->alt_port_num); + goto err1; + } + if (attr->alt_timeout > 31) { + pr_warn("invalid QP alt timeout %d > 31\n", + attr->alt_timeout); + goto err1; + } + } + + if (mask & IB_QP_PATH_MTU) { + struct rxe_port *port = &rxe->port; + + enum ib_mtu max_mtu = port->attr.max_mtu; + enum ib_mtu mtu = attr->path_mtu; + + if (mtu > max_mtu) { + pr_debug("invalid mtu (%d) > (%d)\n", + ib_mtu_enum_to_int(mtu), + ib_mtu_enum_to_int(max_mtu)); + goto err1; + } + } + + if (mask & IB_QP_MAX_QP_RD_ATOMIC) { + if (attr->max_rd_atomic > rxe->attr.max_qp_rd_atom) { + pr_warn("invalid max_rd_atomic %d > %d\n", + attr->max_rd_atomic, + rxe->attr.max_qp_rd_atom); + goto err1; + } + } + + if (mask & IB_QP_TIMEOUT) { + if (attr->timeout > 31) { + pr_warn("invalid QP timeout %d > 31\n", + attr->timeout); + goto err1; + } + } + + return 0; + +err1: + return -EINVAL; +} + +/* move the qp to the reset state */ +static void rxe_qp_reset(struct rxe_qp *qp) +{ + /* stop tasks from running */ + rxe_disable_task(&qp->resp.task); + + /* stop request/comp */ + if (qp->sq.queue) { + if (qp_type(qp) == IB_QPT_RC) + rxe_disable_task(&qp->comp.task); + rxe_disable_task(&qp->req.task); + } + + /* move qp to the reset state */ + qp->req.state = QP_STATE_RESET; + qp->resp.state = QP_STATE_RESET; + + /* let state machines reset themselves drain work and packet queues + * etc. + */ + __rxe_do_task(&qp->resp.task); + + if (qp->sq.queue) { + __rxe_do_task(&qp->comp.task); + __rxe_do_task(&qp->req.task); + } + + /* cleanup attributes */ + atomic_set(&qp->ssn, 0); + qp->req.opcode = -1; + qp->req.need_retry = 0; + qp->req.noack_pkts = 0; + qp->resp.msn = 0; + qp->resp.opcode = -1; + qp->resp.drop_msg = 0; + qp->resp.goto_error = 0; + qp->resp.sent_psn_nak = 0; + + if (qp->resp.mr) { + rxe_drop_ref(qp->resp.mr); + qp->resp.mr = NULL; + } + + cleanup_rd_atomic_resources(qp); + + /* reenable tasks */ + rxe_enable_task(&qp->resp.task); + + if (qp->sq.queue) { + if (qp_type(qp) == IB_QPT_RC) + rxe_enable_task(&qp->comp.task); + + rxe_enable_task(&qp->req.task); + } +} + +/* drain the send queue */ +static void rxe_qp_drain(struct rxe_qp *qp) +{ + if (qp->sq.queue) { + if (qp->req.state != QP_STATE_DRAINED) { + qp->req.state = QP_STATE_DRAIN; + if (qp_type(qp) == IB_QPT_RC) + rxe_run_task(&qp->comp.task, 1); + else + __rxe_do_task(&qp->comp.task); + rxe_run_task(&qp->req.task, 1); + } + } +} + +/* move the qp to the error state */ +void rxe_qp_error(struct rxe_qp *qp) +{ + qp->req.state = QP_STATE_ERROR; + qp->resp.state = QP_STATE_ERROR; + + /* drain work and packet queues */ + rxe_run_task(&qp->resp.task, 1); + + if (qp_type(qp) == IB_QPT_RC) + rxe_run_task(&qp->comp.task, 1); + else + __rxe_do_task(&qp->comp.task); + rxe_run_task(&qp->req.task, 1); +} + +/* called by the modify qp verb */ +int rxe_qp_from_attr(struct rxe_qp *qp, struct ib_qp_attr *attr, int mask, + struct ib_udata *udata) +{ + int err; + struct rxe_dev *rxe = to_rdev(qp->ibqp.device); + union ib_gid sgid; + struct ib_gid_attr sgid_attr; + + if (mask & IB_QP_MAX_QP_RD_ATOMIC) { + int max_rd_atomic = __roundup_pow_of_two(attr->max_rd_atomic); + + free_rd_atomic_resources(qp); + + err = alloc_rd_atomic_resources(qp, max_rd_atomic); + if (err) + return err; + + qp->attr.max_rd_atomic = max_rd_atomic; + atomic_set(&qp->req.rd_atomic, max_rd_atomic); + } + + if (mask & IB_QP_CUR_STATE) + qp->attr.cur_qp_state = attr->qp_state; + + if (mask & IB_QP_EN_SQD_ASYNC_NOTIFY) + qp->attr.en_sqd_async_notify = attr->en_sqd_async_notify; + + if (mask & IB_QP_ACCESS_FLAGS) + qp->attr.qp_access_flags = attr->qp_access_flags; + + if (mask & IB_QP_PKEY_INDEX) + qp->attr.pkey_index = attr->pkey_index; + + if (mask & IB_QP_PORT) + qp->attr.port_num = attr->port_num; + + if (mask & IB_QP_QKEY) + qp->attr.qkey = attr->qkey; + + if (mask & IB_QP_AV) { + ib_get_cached_gid(&rxe->ib_dev, 1, + attr->ah_attr.grh.sgid_index, &sgid, + &sgid_attr); + rxe_av_from_attr(rxe, attr->port_num, &qp->pri_av, + &attr->ah_attr); + rxe_av_fill_ip_info(rxe, &qp->pri_av, &attr->ah_attr, + &sgid_attr, &sgid); + if (sgid_attr.ndev) + dev_put(sgid_attr.ndev); + } + + if (mask & IB_QP_ALT_PATH) { + ib_get_cached_gid(&rxe->ib_dev, 1, + attr->alt_ah_attr.grh.sgid_index, &sgid, + &sgid_attr); + + rxe_av_from_attr(rxe, attr->alt_port_num, &qp->alt_av, + &attr->alt_ah_attr); + rxe_av_fill_ip_info(rxe, &qp->alt_av, &attr->alt_ah_attr, + &sgid_attr, &sgid); + if (sgid_attr.ndev) + dev_put(sgid_attr.ndev); + + qp->attr.alt_port_num = attr->alt_port_num; + qp->attr.alt_pkey_index = attr->alt_pkey_index; + qp->attr.alt_timeout = attr->alt_timeout; + } + + if (mask & IB_QP_PATH_MTU) { + qp->attr.path_mtu = attr->path_mtu; + qp->mtu = ib_mtu_enum_to_int(attr->path_mtu); + } + + if (mask & IB_QP_TIMEOUT) { + qp->attr.timeout = attr->timeout; + if (attr->timeout == 0) { + qp->qp_timeout_jiffies = 0; + } else { + /* According to the spec, timeout = 4.096 * 2 ^ attr->timeout [us] */ + int j = nsecs_to_jiffies(4096ULL << attr->timeout); + + qp->qp_timeout_jiffies = j ? j : 1; + } + } + + if (mask & IB_QP_RETRY_CNT) { + qp->attr.retry_cnt = attr->retry_cnt; + qp->comp.retry_cnt = attr->retry_cnt; + pr_debug("set retry count = %d\n", attr->retry_cnt); + } + + if (mask & IB_QP_RNR_RETRY) { + qp->attr.rnr_retry = attr->rnr_retry; + qp->comp.rnr_retry = attr->rnr_retry; + pr_debug("set rnr retry count = %d\n", attr->rnr_retry); + } + + if (mask & IB_QP_RQ_PSN) { + qp->attr.rq_psn = (attr->rq_psn & BTH_PSN_MASK); + qp->resp.psn = qp->attr.rq_psn; + pr_debug("set resp psn = 0x%x\n", qp->resp.psn); + } + + if (mask & IB_QP_MIN_RNR_TIMER) { + qp->attr.min_rnr_timer = attr->min_rnr_timer; + pr_debug("set min rnr timer = 0x%x\n", + attr->min_rnr_timer); + } + + if (mask & IB_QP_SQ_PSN) { + qp->attr.sq_psn = (attr->sq_psn & BTH_PSN_MASK); + qp->req.psn = qp->attr.sq_psn; + qp->comp.psn = qp->attr.sq_psn; + pr_debug("set req psn = 0x%x\n", qp->req.psn); + } + + if (mask & IB_QP_MAX_DEST_RD_ATOMIC) { + qp->attr.max_dest_rd_atomic = + __roundup_pow_of_two(attr->max_dest_rd_atomic); + } + + if (mask & IB_QP_PATH_MIG_STATE) + qp->attr.path_mig_state = attr->path_mig_state; + + if (mask & IB_QP_DEST_QPN) + qp->attr.dest_qp_num = attr->dest_qp_num; + + if (mask & IB_QP_STATE) { + qp->attr.qp_state = attr->qp_state; + + switch (attr->qp_state) { + case IB_QPS_RESET: + pr_debug("qp state -> RESET\n"); + rxe_qp_reset(qp); + break; + + case IB_QPS_INIT: + pr_debug("qp state -> INIT\n"); + qp->req.state = QP_STATE_INIT; + qp->resp.state = QP_STATE_INIT; + break; + + case IB_QPS_RTR: + pr_debug("qp state -> RTR\n"); + qp->resp.state = QP_STATE_READY; + break; + + case IB_QPS_RTS: + pr_debug("qp state -> RTS\n"); + qp->req.state = QP_STATE_READY; + break; + + case IB_QPS_SQD: + pr_debug("qp state -> SQD\n"); + rxe_qp_drain(qp); + break; + + case IB_QPS_SQE: + pr_warn("qp state -> SQE !!?\n"); + /* Not possible from modify_qp. */ + break; + + case IB_QPS_ERR: + pr_debug("qp state -> ERR\n"); + rxe_qp_error(qp); + break; + } + } + + return 0; +} + +/* called by the query qp verb */ +int rxe_qp_to_attr(struct rxe_qp *qp, struct ib_qp_attr *attr, int mask) +{ + struct rxe_dev *rxe = to_rdev(qp->ibqp.device); + + *attr = qp->attr; + + attr->rq_psn = qp->resp.psn; + attr->sq_psn = qp->req.psn; + + attr->cap.max_send_wr = qp->sq.max_wr; + attr->cap.max_send_sge = qp->sq.max_sge; + attr->cap.max_inline_data = qp->sq.max_inline; + + if (!qp->srq) { + attr->cap.max_recv_wr = qp->rq.max_wr; + attr->cap.max_recv_sge = qp->rq.max_sge; + } + + rxe_av_to_attr(rxe, &qp->pri_av, &attr->ah_attr); + rxe_av_to_attr(rxe, &qp->alt_av, &attr->alt_ah_attr); + + if (qp->req.state == QP_STATE_DRAIN) { + attr->sq_draining = 1; + /* applications that get this state + * typically spin on it. yield the + * processor + */ + cond_resched(); + } else { + attr->sq_draining = 0; + } + + pr_debug("attr->sq_draining = %d\n", attr->sq_draining); + + return 0; +} + +/* called by the destroy qp verb */ +void rxe_qp_destroy(struct rxe_qp *qp) +{ + qp->valid = 0; + qp->qp_timeout_jiffies = 0; + rxe_cleanup_task(&qp->resp.task); + + del_timer_sync(&qp->retrans_timer); + del_timer_sync(&qp->rnr_nak_timer); + + rxe_cleanup_task(&qp->req.task); + if (qp_type(qp) == IB_QPT_RC) + rxe_cleanup_task(&qp->comp.task); + + /* flush out any receive wr's or pending requests */ + __rxe_do_task(&qp->req.task); + if (qp->sq.queue) { + __rxe_do_task(&qp->comp.task); + __rxe_do_task(&qp->req.task); + } +} + +/* called when the last reference to the qp is dropped */ +void rxe_qp_cleanup(void *arg) +{ + struct rxe_qp *qp = arg; + + rxe_drop_all_mcast_groups(qp); + + if (qp->sq.queue) + rxe_queue_cleanup(qp->sq.queue); + + if (qp->srq) + rxe_drop_ref(qp->srq); + + if (qp->rq.queue) + rxe_queue_cleanup(qp->rq.queue); + + if (qp->scq) + rxe_drop_ref(qp->scq); + if (qp->rcq) + rxe_drop_ref(qp->rcq); + if (qp->pd) + rxe_drop_ref(qp->pd); + + if (qp->resp.mr) { + rxe_drop_ref(qp->resp.mr); + qp->resp.mr = NULL; + } + + free_rd_atomic_resources(qp); + + kernel_sock_shutdown(qp->sk, SHUT_RDWR); +} diff --git a/drivers/infiniband/sw/rxe/rxe_queue.c b/drivers/infiniband/sw/rxe/rxe_queue.c new file mode 100644 index 000000000000..08274254eb88 --- /dev/null +++ b/drivers/infiniband/sw/rxe/rxe_queue.c @@ -0,0 +1,217 @@ +/* + * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. + * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must retailuce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/vmalloc.h> +#include "rxe.h" +#include "rxe_loc.h" +#include "rxe_queue.h" + +int do_mmap_info(struct rxe_dev *rxe, + struct ib_udata *udata, + bool is_req, + struct ib_ucontext *context, + struct rxe_queue_buf *buf, + size_t buf_size, + struct rxe_mmap_info **ip_p) +{ + int err; + u32 len, offset; + struct rxe_mmap_info *ip = NULL; + + if (udata) { + if (is_req) { + len = udata->outlen - sizeof(struct mminfo); + offset = sizeof(struct mminfo); + } else { + len = udata->outlen; + offset = 0; + } + + if (len < sizeof(ip->info)) + goto err1; + + ip = rxe_create_mmap_info(rxe, buf_size, context, buf); + if (!ip) + goto err1; + + err = copy_to_user(udata->outbuf + offset, &ip->info, + sizeof(ip->info)); + if (err) + goto err2; + + spin_lock_bh(&rxe->pending_lock); + list_add(&ip->pending_mmaps, &rxe->pending_mmaps); + spin_unlock_bh(&rxe->pending_lock); + } + + *ip_p = ip; + + return 0; + +err2: + kfree(ip); +err1: + return -EINVAL; +} + +struct rxe_queue *rxe_queue_init(struct rxe_dev *rxe, + int *num_elem, + unsigned int elem_size) +{ + struct rxe_queue *q; + size_t buf_size; + unsigned int num_slots; + + /* num_elem == 0 is allowed, but uninteresting */ + if (*num_elem < 0) + goto err1; + + q = kmalloc(sizeof(*q), GFP_KERNEL); + if (!q) + goto err1; + + q->rxe = rxe; + + /* used in resize, only need to copy used part of queue */ + q->elem_size = elem_size; + + /* pad element up to at least a cacheline and always a power of 2 */ + if (elem_size < cache_line_size()) + elem_size = cache_line_size(); + elem_size = roundup_pow_of_two(elem_size); + + q->log2_elem_size = order_base_2(elem_size); + + num_slots = *num_elem + 1; + num_slots = roundup_pow_of_two(num_slots); + q->index_mask = num_slots - 1; + + buf_size = sizeof(struct rxe_queue_buf) + num_slots * elem_size; + + q->buf = vmalloc_user(buf_size); + if (!q->buf) + goto err2; + + q->buf->log2_elem_size = q->log2_elem_size; + q->buf->index_mask = q->index_mask; + + q->buf_size = buf_size; + + *num_elem = num_slots - 1; + return q; + +err2: + kfree(q); +err1: + return NULL; +} + +/* copies elements from original q to new q and then swaps the contents of the + * two q headers. This is so that if anyone is holding a pointer to q it will + * still work + */ +static int resize_finish(struct rxe_queue *q, struct rxe_queue *new_q, + unsigned int num_elem) +{ + if (!queue_empty(q) && (num_elem < queue_count(q))) + return -EINVAL; + + while (!queue_empty(q)) { + memcpy(producer_addr(new_q), consumer_addr(q), + new_q->elem_size); + advance_producer(new_q); + advance_consumer(q); + } + + swap(*q, *new_q); + + return 0; +} + +int rxe_queue_resize(struct rxe_queue *q, + unsigned int *num_elem_p, + unsigned int elem_size, + struct ib_ucontext *context, + struct ib_udata *udata, + spinlock_t *producer_lock, + spinlock_t *consumer_lock) +{ + struct rxe_queue *new_q; + unsigned int num_elem = *num_elem_p; + int err; + unsigned long flags = 0, flags1; + + new_q = rxe_queue_init(q->rxe, &num_elem, elem_size); + if (!new_q) + return -ENOMEM; + + err = do_mmap_info(new_q->rxe, udata, false, context, new_q->buf, + new_q->buf_size, &new_q->ip); + if (err) { + vfree(new_q->buf); + kfree(new_q); + goto err1; + } + + spin_lock_irqsave(consumer_lock, flags1); + + if (producer_lock) { + spin_lock_irqsave(producer_lock, flags); + err = resize_finish(q, new_q, num_elem); + spin_unlock_irqrestore(producer_lock, flags); + } else { + err = resize_finish(q, new_q, num_elem); + } + + spin_unlock_irqrestore(consumer_lock, flags1); + + rxe_queue_cleanup(new_q); /* new/old dep on err */ + if (err) + goto err1; + + *num_elem_p = num_elem; + return 0; + +err1: + return err; +} + +void rxe_queue_cleanup(struct rxe_queue *q) +{ + if (q->ip) + kref_put(&q->ip->ref, rxe_mmap_release); + else + vfree(q->buf); + + kfree(q); +} diff --git a/drivers/infiniband/sw/rxe/rxe_queue.h b/drivers/infiniband/sw/rxe/rxe_queue.h new file mode 100644 index 000000000000..239fd609c31e --- /dev/null +++ b/drivers/infiniband/sw/rxe/rxe_queue.h @@ -0,0 +1,178 @@ +/* + * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. + * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef RXE_QUEUE_H +#define RXE_QUEUE_H + +/* implements a simple circular buffer that can optionally be + * shared between user space and the kernel and can be resized + + * the requested element size is rounded up to a power of 2 + * and the number of elements in the buffer is also rounded + * up to a power of 2. Since the queue is empty when the + * producer and consumer indices match the maximum capacity + * of the queue is one less than the number of element slots + */ + +/* this data structure is shared between user space and kernel + * space for those cases where the queue is shared. It contains + * the producer and consumer indices. Is also contains a copy + * of the queue size parameters for user space to use but the + * kernel must use the parameters in the rxe_queue struct + * this MUST MATCH the corresponding librxe struct + * for performance reasons arrange to have producer and consumer + * pointers in separate cache lines + * the kernel should always mask the indices to avoid accessing + * memory outside of the data area + */ +struct rxe_queue_buf { + __u32 log2_elem_size; + __u32 index_mask; + __u32 pad_1[30]; + __u32 producer_index; + __u32 pad_2[31]; + __u32 consumer_index; + __u32 pad_3[31]; + __u8 data[0]; +}; + +struct rxe_queue { + struct rxe_dev *rxe; + struct rxe_queue_buf *buf; + struct rxe_mmap_info *ip; + size_t buf_size; + size_t elem_size; + unsigned int log2_elem_size; + unsigned int index_mask; +}; + +int do_mmap_info(struct rxe_dev *rxe, + struct ib_udata *udata, + bool is_req, + struct ib_ucontext *context, + struct rxe_queue_buf *buf, + size_t buf_size, + struct rxe_mmap_info **ip_p); + +struct rxe_queue *rxe_queue_init(struct rxe_dev *rxe, + int *num_elem, + unsigned int elem_size); + +int rxe_queue_resize(struct rxe_queue *q, + unsigned int *num_elem_p, + unsigned int elem_size, + struct ib_ucontext *context, + struct ib_udata *udata, + /* Protect producers while resizing queue */ + spinlock_t *producer_lock, + /* Protect consumers while resizing queue */ + spinlock_t *consumer_lock); + +void rxe_queue_cleanup(struct rxe_queue *queue); + +static inline int next_index(struct rxe_queue *q, int index) +{ + return (index + 1) & q->buf->index_mask; +} + +static inline int queue_empty(struct rxe_queue *q) +{ + return ((q->buf->producer_index - q->buf->consumer_index) + & q->index_mask) == 0; +} + +static inline int queue_full(struct rxe_queue *q) +{ + return ((q->buf->producer_index + 1 - q->buf->consumer_index) + & q->index_mask) == 0; +} + +static inline void advance_producer(struct rxe_queue *q) +{ + q->buf->producer_index = (q->buf->producer_index + 1) + & q->index_mask; +} + +static inline void advance_consumer(struct rxe_queue *q) +{ + q->buf->consumer_index = (q->buf->consumer_index + 1) + & q->index_mask; +} + +static inline void *producer_addr(struct rxe_queue *q) +{ + return q->buf->data + ((q->buf->producer_index & q->index_mask) + << q->log2_elem_size); +} + +static inline void *consumer_addr(struct rxe_queue *q) +{ + return q->buf->data + ((q->buf->consumer_index & q->index_mask) + << q->log2_elem_size); +} + +static inline unsigned int producer_index(struct rxe_queue *q) +{ + return q->buf->producer_index; +} + +static inline unsigned int consumer_index(struct rxe_queue *q) +{ + return q->buf->consumer_index; +} + +static inline void *addr_from_index(struct rxe_queue *q, unsigned int index) +{ + return q->buf->data + ((index & q->index_mask) + << q->buf->log2_elem_size); +} + +static inline unsigned int index_from_addr(const struct rxe_queue *q, + const void *addr) +{ + return (((u8 *)addr - q->buf->data) >> q->log2_elem_size) + & q->index_mask; +} + +static inline unsigned int queue_count(const struct rxe_queue *q) +{ + return (q->buf->producer_index - q->buf->consumer_index) + & q->index_mask; +} + +static inline void *queue_head(struct rxe_queue *q) +{ + return queue_empty(q) ? NULL : consumer_addr(q); +} + +#endif /* RXE_QUEUE_H */ diff --git a/drivers/infiniband/sw/rxe/rxe_recv.c b/drivers/infiniband/sw/rxe/rxe_recv.c new file mode 100644 index 000000000000..3d464c23e08b --- /dev/null +++ b/drivers/infiniband/sw/rxe/rxe_recv.c @@ -0,0 +1,420 @@ +/* + * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. + * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/skbuff.h> + +#include "rxe.h" +#include "rxe_loc.h" + +static int check_type_state(struct rxe_dev *rxe, struct rxe_pkt_info *pkt, + struct rxe_qp *qp) +{ + if (unlikely(!qp->valid)) + goto err1; + + switch (qp_type(qp)) { + case IB_QPT_RC: + if (unlikely((pkt->opcode & IB_OPCODE_RC) != 0)) { + pr_warn_ratelimited("bad qp type\n"); + goto err1; + } + break; + case IB_QPT_UC: + if (unlikely(!(pkt->opcode & IB_OPCODE_UC))) { + pr_warn_ratelimited("bad qp type\n"); + goto err1; + } + break; + case IB_QPT_UD: + case IB_QPT_SMI: + case IB_QPT_GSI: + if (unlikely(!(pkt->opcode & IB_OPCODE_UD))) { + pr_warn_ratelimited("bad qp type\n"); + goto err1; + } + break; + default: + pr_warn_ratelimited("unsupported qp type\n"); + goto err1; + } + + if (pkt->mask & RXE_REQ_MASK) { + if (unlikely(qp->resp.state != QP_STATE_READY)) + goto err1; + } else if (unlikely(qp->req.state < QP_STATE_READY || + qp->req.state > QP_STATE_DRAINED)) { + goto err1; + } + + return 0; + +err1: + return -EINVAL; +} + +static void set_bad_pkey_cntr(struct rxe_port *port) +{ + spin_lock_bh(&port->port_lock); + port->attr.bad_pkey_cntr = min((u32)0xffff, + port->attr.bad_pkey_cntr + 1); + spin_unlock_bh(&port->port_lock); +} + +static void set_qkey_viol_cntr(struct rxe_port *port) +{ + spin_lock_bh(&port->port_lock); + port->attr.qkey_viol_cntr = min((u32)0xffff, + port->attr.qkey_viol_cntr + 1); + spin_unlock_bh(&port->port_lock); +} + +static int check_keys(struct rxe_dev *rxe, struct rxe_pkt_info *pkt, + u32 qpn, struct rxe_qp *qp) +{ + int i; + int found_pkey = 0; + struct rxe_port *port = &rxe->port; + u16 pkey = bth_pkey(pkt); + + pkt->pkey_index = 0; + + if (qpn == 1) { + for (i = 0; i < port->attr.pkey_tbl_len; i++) { + if (pkey_match(pkey, port->pkey_tbl[i])) { + pkt->pkey_index = i; + found_pkey = 1; + break; + } + } + + if (!found_pkey) { + pr_warn_ratelimited("bad pkey = 0x%x\n", pkey); + set_bad_pkey_cntr(port); + goto err1; + } + } else if (qpn != 0) { + if (unlikely(!pkey_match(pkey, + port->pkey_tbl[qp->attr.pkey_index] + ))) { + pr_warn_ratelimited("bad pkey = 0x%0x\n", pkey); + set_bad_pkey_cntr(port); + goto err1; + } + pkt->pkey_index = qp->attr.pkey_index; + } + + if ((qp_type(qp) == IB_QPT_UD || qp_type(qp) == IB_QPT_GSI) && + qpn != 0 && pkt->mask) { + u32 qkey = (qpn == 1) ? GSI_QKEY : qp->attr.qkey; + + if (unlikely(deth_qkey(pkt) != qkey)) { + pr_warn_ratelimited("bad qkey, got 0x%x expected 0x%x for qpn 0x%x\n", + deth_qkey(pkt), qkey, qpn); + set_qkey_viol_cntr(port); + goto err1; + } + } + + return 0; + +err1: + return -EINVAL; +} + +static int check_addr(struct rxe_dev *rxe, struct rxe_pkt_info *pkt, + struct rxe_qp *qp) +{ + struct sk_buff *skb = PKT_TO_SKB(pkt); + + if (qp_type(qp) != IB_QPT_RC && qp_type(qp) != IB_QPT_UC) + goto done; + + if (unlikely(pkt->port_num != qp->attr.port_num)) { + pr_warn_ratelimited("port %d != qp port %d\n", + pkt->port_num, qp->attr.port_num); + goto err1; + } + + if (skb->protocol == htons(ETH_P_IP)) { + struct in_addr *saddr = + &qp->pri_av.sgid_addr._sockaddr_in.sin_addr; + struct in_addr *daddr = + &qp->pri_av.dgid_addr._sockaddr_in.sin_addr; + + if (ip_hdr(skb)->daddr != saddr->s_addr) { + pr_warn_ratelimited("dst addr %pI4 != qp source addr %pI4\n", + &ip_hdr(skb)->daddr, + &saddr->s_addr); + goto err1; + } + + if (ip_hdr(skb)->saddr != daddr->s_addr) { + pr_warn_ratelimited("source addr %pI4 != qp dst addr %pI4\n", + &ip_hdr(skb)->saddr, + &daddr->s_addr); + goto err1; + } + + } else if (skb->protocol == htons(ETH_P_IPV6)) { + struct in6_addr *saddr = + &qp->pri_av.sgid_addr._sockaddr_in6.sin6_addr; + struct in6_addr *daddr = + &qp->pri_av.dgid_addr._sockaddr_in6.sin6_addr; + + if (memcmp(&ipv6_hdr(skb)->daddr, saddr, sizeof(*saddr))) { + pr_warn_ratelimited("dst addr %pI6 != qp source addr %pI6\n", + &ipv6_hdr(skb)->daddr, saddr); + goto err1; + } + + if (memcmp(&ipv6_hdr(skb)->saddr, daddr, sizeof(*daddr))) { + pr_warn_ratelimited("source addr %pI6 != qp dst addr %pI6\n", + &ipv6_hdr(skb)->saddr, daddr); + goto err1; + } + } + +done: + return 0; + +err1: + return -EINVAL; +} + +static int hdr_check(struct rxe_pkt_info *pkt) +{ + struct rxe_dev *rxe = pkt->rxe; + struct rxe_port *port = &rxe->port; + struct rxe_qp *qp = NULL; + u32 qpn = bth_qpn(pkt); + int index; + int err; + + if (unlikely(bth_tver(pkt) != BTH_TVER)) { + pr_warn_ratelimited("bad tver\n"); + goto err1; + } + + if (qpn != IB_MULTICAST_QPN) { + index = (qpn == 0) ? port->qp_smi_index : + ((qpn == 1) ? port->qp_gsi_index : qpn); + qp = rxe_pool_get_index(&rxe->qp_pool, index); + if (unlikely(!qp)) { + pr_warn_ratelimited("no qp matches qpn 0x%x\n", qpn); + goto err1; + } + + err = check_type_state(rxe, pkt, qp); + if (unlikely(err)) + goto err2; + + err = check_addr(rxe, pkt, qp); + if (unlikely(err)) + goto err2; + + err = check_keys(rxe, pkt, qpn, qp); + if (unlikely(err)) + goto err2; + } else { + if (unlikely((pkt->mask & RXE_GRH_MASK) == 0)) { + pr_warn_ratelimited("no grh for mcast qpn\n"); + goto err1; + } + } + + pkt->qp = qp; + return 0; + +err2: + if (qp) + rxe_drop_ref(qp); +err1: + return -EINVAL; +} + +static inline void rxe_rcv_pkt(struct rxe_dev *rxe, + struct rxe_pkt_info *pkt, + struct sk_buff *skb) +{ + if (pkt->mask & RXE_REQ_MASK) + rxe_resp_queue_pkt(rxe, pkt->qp, skb); + else + rxe_comp_queue_pkt(rxe, pkt->qp, skb); +} + +static void rxe_rcv_mcast_pkt(struct rxe_dev *rxe, struct sk_buff *skb) +{ + struct rxe_pkt_info *pkt = SKB_TO_PKT(skb); + struct rxe_mc_grp *mcg; + struct sk_buff *skb_copy; + struct rxe_mc_elem *mce; + struct rxe_qp *qp; + union ib_gid dgid; + int err; + + if (skb->protocol == htons(ETH_P_IP)) + ipv6_addr_set_v4mapped(ip_hdr(skb)->daddr, + (struct in6_addr *)&dgid); + else if (skb->protocol == htons(ETH_P_IPV6)) + memcpy(&dgid, &ipv6_hdr(skb)->daddr, sizeof(dgid)); + + /* lookup mcast group corresponding to mgid, takes a ref */ + mcg = rxe_pool_get_key(&rxe->mc_grp_pool, &dgid); + if (!mcg) + goto err1; /* mcast group not registered */ + + spin_lock_bh(&mcg->mcg_lock); + + list_for_each_entry(mce, &mcg->qp_list, qp_list) { + qp = mce->qp; + pkt = SKB_TO_PKT(skb); + + /* validate qp for incoming packet */ + err = check_type_state(rxe, pkt, qp); + if (err) + continue; + + err = check_keys(rxe, pkt, bth_qpn(pkt), qp); + if (err) + continue; + + /* if *not* the last qp in the list + * make a copy of the skb to post to the next qp + */ + skb_copy = (mce->qp_list.next != &mcg->qp_list) ? + skb_clone(skb, GFP_KERNEL) : NULL; + + pkt->qp = qp; + rxe_add_ref(qp); + rxe_rcv_pkt(rxe, pkt, skb); + + skb = skb_copy; + if (!skb) + break; + } + + spin_unlock_bh(&mcg->mcg_lock); + + rxe_drop_ref(mcg); /* drop ref from rxe_pool_get_key. */ + +err1: + if (skb) + kfree_skb(skb); +} + +static int rxe_match_dgid(struct rxe_dev *rxe, struct sk_buff *skb) +{ + union ib_gid dgid; + union ib_gid *pdgid; + u16 index; + + if (skb->protocol == htons(ETH_P_IP)) { + ipv6_addr_set_v4mapped(ip_hdr(skb)->daddr, + (struct in6_addr *)&dgid); + pdgid = &dgid; + } else { + pdgid = (union ib_gid *)&ipv6_hdr(skb)->daddr; + } + + return ib_find_cached_gid_by_port(&rxe->ib_dev, pdgid, + IB_GID_TYPE_ROCE_UDP_ENCAP, + 1, rxe->ndev, &index); +} + +/* rxe_rcv is called from the interface driver */ +int rxe_rcv(struct sk_buff *skb) +{ + int err; + struct rxe_pkt_info *pkt = SKB_TO_PKT(skb); + struct rxe_dev *rxe = pkt->rxe; + __be32 *icrcp; + u32 calc_icrc, pack_icrc; + + pkt->offset = 0; + + if (unlikely(skb->len < pkt->offset + RXE_BTH_BYTES)) + goto drop; + + if (unlikely(rxe_match_dgid(rxe, skb) < 0)) { + pr_warn_ratelimited("failed matching dgid\n"); + goto drop; + } + + pkt->opcode = bth_opcode(pkt); + pkt->psn = bth_psn(pkt); + pkt->qp = NULL; + pkt->mask |= rxe_opcode[pkt->opcode].mask; + + if (unlikely(skb->len < header_size(pkt))) + goto drop; + + err = hdr_check(pkt); + if (unlikely(err)) + goto drop; + + /* Verify ICRC */ + icrcp = (__be32 *)(pkt->hdr + pkt->paylen - RXE_ICRC_SIZE); + pack_icrc = be32_to_cpu(*icrcp); + + calc_icrc = rxe_icrc_hdr(pkt, skb); + calc_icrc = crc32_le(calc_icrc, (u8 *)payload_addr(pkt), payload_size(pkt)); + calc_icrc = cpu_to_be32(~calc_icrc); + if (unlikely(calc_icrc != pack_icrc)) { + char saddr[sizeof(struct in6_addr)]; + + if (skb->protocol == htons(ETH_P_IPV6)) + sprintf(saddr, "%pI6", &ipv6_hdr(skb)->saddr); + else if (skb->protocol == htons(ETH_P_IP)) + sprintf(saddr, "%pI4", &ip_hdr(skb)->saddr); + else + sprintf(saddr, "unknown"); + + pr_warn_ratelimited("bad ICRC from %s\n", saddr); + goto drop; + } + + if (unlikely(bth_qpn(pkt) == IB_MULTICAST_QPN)) + rxe_rcv_mcast_pkt(rxe, skb); + else + rxe_rcv_pkt(rxe, pkt, skb); + + return 0; + +drop: + if (pkt->qp) + rxe_drop_ref(pkt->qp); + + kfree_skb(skb); + return 0; +} +EXPORT_SYMBOL(rxe_rcv); diff --git a/drivers/infiniband/sw/rxe/rxe_req.c b/drivers/infiniband/sw/rxe/rxe_req.c new file mode 100644 index 000000000000..33b2d9d77021 --- /dev/null +++ b/drivers/infiniband/sw/rxe/rxe_req.c @@ -0,0 +1,726 @@ +/* + * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. + * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/skbuff.h> + +#include "rxe.h" +#include "rxe_loc.h" +#include "rxe_queue.h" + +static int next_opcode(struct rxe_qp *qp, struct rxe_send_wqe *wqe, + unsigned opcode); + +static inline void retry_first_write_send(struct rxe_qp *qp, + struct rxe_send_wqe *wqe, + unsigned mask, int npsn) +{ + int i; + + for (i = 0; i < npsn; i++) { + int to_send = (wqe->dma.resid > qp->mtu) ? + qp->mtu : wqe->dma.resid; + + qp->req.opcode = next_opcode(qp, wqe, + wqe->wr.opcode); + + if (wqe->wr.send_flags & IB_SEND_INLINE) { + wqe->dma.resid -= to_send; + wqe->dma.sge_offset += to_send; + } else { + advance_dma_data(&wqe->dma, to_send); + } + if (mask & WR_WRITE_MASK) + wqe->iova += qp->mtu; + } +} + +static void req_retry(struct rxe_qp *qp) +{ + struct rxe_send_wqe *wqe; + unsigned int wqe_index; + unsigned int mask; + int npsn; + int first = 1; + + wqe = queue_head(qp->sq.queue); + npsn = (qp->comp.psn - wqe->first_psn) & BTH_PSN_MASK; + + qp->req.wqe_index = consumer_index(qp->sq.queue); + qp->req.psn = qp->comp.psn; + qp->req.opcode = -1; + + for (wqe_index = consumer_index(qp->sq.queue); + wqe_index != producer_index(qp->sq.queue); + wqe_index = next_index(qp->sq.queue, wqe_index)) { + wqe = addr_from_index(qp->sq.queue, wqe_index); + mask = wr_opcode_mask(wqe->wr.opcode, qp); + + if (wqe->state == wqe_state_posted) + break; + + if (wqe->state == wqe_state_done) + continue; + + wqe->iova = (mask & WR_ATOMIC_MASK) ? + wqe->wr.wr.atomic.remote_addr : + (mask & WR_READ_OR_WRITE_MASK) ? + wqe->wr.wr.rdma.remote_addr : + 0; + + if (!first || (mask & WR_READ_MASK) == 0) { + wqe->dma.resid = wqe->dma.length; + wqe->dma.cur_sge = 0; + wqe->dma.sge_offset = 0; + } + + if (first) { + first = 0; + + if (mask & WR_WRITE_OR_SEND_MASK) + retry_first_write_send(qp, wqe, mask, npsn); + + if (mask & WR_READ_MASK) + wqe->iova += npsn * qp->mtu; + } + + wqe->state = wqe_state_posted; + } +} + +void rnr_nak_timer(unsigned long data) +{ + struct rxe_qp *qp = (struct rxe_qp *)data; + + pr_debug("rnr nak timer fired\n"); + rxe_run_task(&qp->req.task, 1); +} + +static struct rxe_send_wqe *req_next_wqe(struct rxe_qp *qp) +{ + struct rxe_send_wqe *wqe = queue_head(qp->sq.queue); + unsigned long flags; + + if (unlikely(qp->req.state == QP_STATE_DRAIN)) { + /* check to see if we are drained; + * state_lock used by requester and completer + */ + spin_lock_irqsave(&qp->state_lock, flags); + do { + if (qp->req.state != QP_STATE_DRAIN) { + /* comp just finished */ + spin_unlock_irqrestore(&qp->state_lock, + flags); + break; + } + + if (wqe && ((qp->req.wqe_index != + consumer_index(qp->sq.queue)) || + (wqe->state != wqe_state_posted))) { + /* comp not done yet */ + spin_unlock_irqrestore(&qp->state_lock, + flags); + break; + } + + qp->req.state = QP_STATE_DRAINED; + spin_unlock_irqrestore(&qp->state_lock, flags); + + if (qp->ibqp.event_handler) { + struct ib_event ev; + + ev.device = qp->ibqp.device; + ev.element.qp = &qp->ibqp; + ev.event = IB_EVENT_SQ_DRAINED; + qp->ibqp.event_handler(&ev, + qp->ibqp.qp_context); + } + } while (0); + } + + if (qp->req.wqe_index == producer_index(qp->sq.queue)) + return NULL; + + wqe = addr_from_index(qp->sq.queue, qp->req.wqe_index); + + if (unlikely((qp->req.state == QP_STATE_DRAIN || + qp->req.state == QP_STATE_DRAINED) && + (wqe->state != wqe_state_processing))) + return NULL; + + if (unlikely((wqe->wr.send_flags & IB_SEND_FENCE) && + (qp->req.wqe_index != consumer_index(qp->sq.queue)))) { + qp->req.wait_fence = 1; + return NULL; + } + + wqe->mask = wr_opcode_mask(wqe->wr.opcode, qp); + return wqe; +} + +static int next_opcode_rc(struct rxe_qp *qp, unsigned opcode, int fits) +{ + switch (opcode) { + case IB_WR_RDMA_WRITE: + if (qp->req.opcode == IB_OPCODE_RC_RDMA_WRITE_FIRST || + qp->req.opcode == IB_OPCODE_RC_RDMA_WRITE_MIDDLE) + return fits ? + IB_OPCODE_RC_RDMA_WRITE_LAST : + IB_OPCODE_RC_RDMA_WRITE_MIDDLE; + else + return fits ? + IB_OPCODE_RC_RDMA_WRITE_ONLY : + IB_OPCODE_RC_RDMA_WRITE_FIRST; + + case IB_WR_RDMA_WRITE_WITH_IMM: + if (qp->req.opcode == IB_OPCODE_RC_RDMA_WRITE_FIRST || + qp->req.opcode == IB_OPCODE_RC_RDMA_WRITE_MIDDLE) + return fits ? + IB_OPCODE_RC_RDMA_WRITE_LAST_WITH_IMMEDIATE : + IB_OPCODE_RC_RDMA_WRITE_MIDDLE; + else + return fits ? + IB_OPCODE_RC_RDMA_WRITE_ONLY_WITH_IMMEDIATE : + IB_OPCODE_RC_RDMA_WRITE_FIRST; + + case IB_WR_SEND: + if (qp->req.opcode == IB_OPCODE_RC_SEND_FIRST || + qp->req.opcode == IB_OPCODE_RC_SEND_MIDDLE) + return fits ? + IB_OPCODE_RC_SEND_LAST : + IB_OPCODE_RC_SEND_MIDDLE; + else + return fits ? + IB_OPCODE_RC_SEND_ONLY : + IB_OPCODE_RC_SEND_FIRST; + + case IB_WR_SEND_WITH_IMM: + if (qp->req.opcode == IB_OPCODE_RC_SEND_FIRST || + qp->req.opcode == IB_OPCODE_RC_SEND_MIDDLE) + return fits ? + IB_OPCODE_RC_SEND_LAST_WITH_IMMEDIATE : + IB_OPCODE_RC_SEND_MIDDLE; + else + return fits ? + IB_OPCODE_RC_SEND_ONLY_WITH_IMMEDIATE : + IB_OPCODE_RC_SEND_FIRST; + + case IB_WR_RDMA_READ: + return IB_OPCODE_RC_RDMA_READ_REQUEST; + + case IB_WR_ATOMIC_CMP_AND_SWP: + return IB_OPCODE_RC_COMPARE_SWAP; + + case IB_WR_ATOMIC_FETCH_AND_ADD: + return IB_OPCODE_RC_FETCH_ADD; + + case IB_WR_SEND_WITH_INV: + if (qp->req.opcode == IB_OPCODE_RC_SEND_FIRST || + qp->req.opcode == IB_OPCODE_RC_SEND_MIDDLE) + return fits ? IB_OPCODE_RC_SEND_LAST_WITH_INVALIDATE : + IB_OPCODE_RC_SEND_MIDDLE; + else + return fits ? IB_OPCODE_RC_SEND_ONLY_WITH_INVALIDATE : + IB_OPCODE_RC_SEND_FIRST; + case IB_WR_REG_MR: + case IB_WR_LOCAL_INV: + return opcode; + } + + return -EINVAL; +} + +static int next_opcode_uc(struct rxe_qp *qp, unsigned opcode, int fits) +{ + switch (opcode) { + case IB_WR_RDMA_WRITE: + if (qp->req.opcode == IB_OPCODE_UC_RDMA_WRITE_FIRST || + qp->req.opcode == IB_OPCODE_UC_RDMA_WRITE_MIDDLE) + return fits ? + IB_OPCODE_UC_RDMA_WRITE_LAST : + IB_OPCODE_UC_RDMA_WRITE_MIDDLE; + else + return fits ? + IB_OPCODE_UC_RDMA_WRITE_ONLY : + IB_OPCODE_UC_RDMA_WRITE_FIRST; + + case IB_WR_RDMA_WRITE_WITH_IMM: + if (qp->req.opcode == IB_OPCODE_UC_RDMA_WRITE_FIRST || + qp->req.opcode == IB_OPCODE_UC_RDMA_WRITE_MIDDLE) + return fits ? + IB_OPCODE_UC_RDMA_WRITE_LAST_WITH_IMMEDIATE : + IB_OPCODE_UC_RDMA_WRITE_MIDDLE; + else + return fits ? + IB_OPCODE_UC_RDMA_WRITE_ONLY_WITH_IMMEDIATE : + IB_OPCODE_UC_RDMA_WRITE_FIRST; + + case IB_WR_SEND: + if (qp->req.opcode == IB_OPCODE_UC_SEND_FIRST || + qp->req.opcode == IB_OPCODE_UC_SEND_MIDDLE) + return fits ? + IB_OPCODE_UC_SEND_LAST : + IB_OPCODE_UC_SEND_MIDDLE; + else + return fits ? + IB_OPCODE_UC_SEND_ONLY : + IB_OPCODE_UC_SEND_FIRST; + + case IB_WR_SEND_WITH_IMM: + if (qp->req.opcode == IB_OPCODE_UC_SEND_FIRST || + qp->req.opcode == IB_OPCODE_UC_SEND_MIDDLE) + return fits ? + IB_OPCODE_UC_SEND_LAST_WITH_IMMEDIATE : + IB_OPCODE_UC_SEND_MIDDLE; + else + return fits ? + IB_OPCODE_UC_SEND_ONLY_WITH_IMMEDIATE : + IB_OPCODE_UC_SEND_FIRST; + } + + return -EINVAL; +} + +static int next_opcode(struct rxe_qp *qp, struct rxe_send_wqe *wqe, + unsigned opcode) +{ + int fits = (wqe->dma.resid <= qp->mtu); + + switch (qp_type(qp)) { + case IB_QPT_RC: + return next_opcode_rc(qp, opcode, fits); + + case IB_QPT_UC: + return next_opcode_uc(qp, opcode, fits); + + case IB_QPT_SMI: + case IB_QPT_UD: + case IB_QPT_GSI: + switch (opcode) { + case IB_WR_SEND: + return IB_OPCODE_UD_SEND_ONLY; + + case IB_WR_SEND_WITH_IMM: + return IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE; + } + break; + + default: + break; + } + + return -EINVAL; +} + +static inline int check_init_depth(struct rxe_qp *qp, struct rxe_send_wqe *wqe) +{ + int depth; + + if (wqe->has_rd_atomic) + return 0; + + qp->req.need_rd_atomic = 1; + depth = atomic_dec_return(&qp->req.rd_atomic); + + if (depth >= 0) { + qp->req.need_rd_atomic = 0; + wqe->has_rd_atomic = 1; + return 0; + } + + atomic_inc(&qp->req.rd_atomic); + return -EAGAIN; +} + +static inline int get_mtu(struct rxe_qp *qp, struct rxe_send_wqe *wqe) +{ + struct rxe_dev *rxe = to_rdev(qp->ibqp.device); + struct rxe_port *port; + struct rxe_av *av; + + if ((qp_type(qp) == IB_QPT_RC) || (qp_type(qp) == IB_QPT_UC)) + return qp->mtu; + + av = &wqe->av; + port = &rxe->port; + + return port->mtu_cap; +} + +static struct sk_buff *init_req_packet(struct rxe_qp *qp, + struct rxe_send_wqe *wqe, + int opcode, int payload, + struct rxe_pkt_info *pkt) +{ + struct rxe_dev *rxe = to_rdev(qp->ibqp.device); + struct rxe_port *port = &rxe->port; + struct sk_buff *skb; + struct rxe_send_wr *ibwr = &wqe->wr; + struct rxe_av *av; + int pad = (-payload) & 0x3; + int paylen; + int solicited; + u16 pkey; + u32 qp_num; + int ack_req; + + /* length from start of bth to end of icrc */ + paylen = rxe_opcode[opcode].length + payload + pad + RXE_ICRC_SIZE; + + /* pkt->hdr, rxe, port_num and mask are initialized in ifc + * layer + */ + pkt->opcode = opcode; + pkt->qp = qp; + pkt->psn = qp->req.psn; + pkt->mask = rxe_opcode[opcode].mask; + pkt->paylen = paylen; + pkt->offset = 0; + pkt->wqe = wqe; + + /* init skb */ + av = rxe_get_av(pkt); + skb = rxe->ifc_ops->init_packet(rxe, av, paylen, pkt); + if (unlikely(!skb)) + return NULL; + + /* init bth */ + solicited = (ibwr->send_flags & IB_SEND_SOLICITED) && + (pkt->mask & RXE_END_MASK) && + ((pkt->mask & (RXE_SEND_MASK)) || + (pkt->mask & (RXE_WRITE_MASK | RXE_IMMDT_MASK)) == + (RXE_WRITE_MASK | RXE_IMMDT_MASK)); + + pkey = (qp_type(qp) == IB_QPT_GSI) ? + port->pkey_tbl[ibwr->wr.ud.pkey_index] : + port->pkey_tbl[qp->attr.pkey_index]; + + qp_num = (pkt->mask & RXE_DETH_MASK) ? ibwr->wr.ud.remote_qpn : + qp->attr.dest_qp_num; + + ack_req = ((pkt->mask & RXE_END_MASK) || + (qp->req.noack_pkts++ > RXE_MAX_PKT_PER_ACK)); + if (ack_req) + qp->req.noack_pkts = 0; + + bth_init(pkt, pkt->opcode, solicited, 0, pad, pkey, qp_num, + ack_req, pkt->psn); + + /* init optional headers */ + if (pkt->mask & RXE_RETH_MASK) { + reth_set_rkey(pkt, ibwr->wr.rdma.rkey); + reth_set_va(pkt, wqe->iova); + reth_set_len(pkt, wqe->dma.length); + } + + if (pkt->mask & RXE_IMMDT_MASK) + immdt_set_imm(pkt, ibwr->ex.imm_data); + + if (pkt->mask & RXE_IETH_MASK) + ieth_set_rkey(pkt, ibwr->ex.invalidate_rkey); + + if (pkt->mask & RXE_ATMETH_MASK) { + atmeth_set_va(pkt, wqe->iova); + if (opcode == IB_OPCODE_RC_COMPARE_SWAP || + opcode == IB_OPCODE_RD_COMPARE_SWAP) { + atmeth_set_swap_add(pkt, ibwr->wr.atomic.swap); + atmeth_set_comp(pkt, ibwr->wr.atomic.compare_add); + } else { + atmeth_set_swap_add(pkt, ibwr->wr.atomic.compare_add); + } + atmeth_set_rkey(pkt, ibwr->wr.atomic.rkey); + } + + if (pkt->mask & RXE_DETH_MASK) { + if (qp->ibqp.qp_num == 1) + deth_set_qkey(pkt, GSI_QKEY); + else + deth_set_qkey(pkt, ibwr->wr.ud.remote_qkey); + deth_set_sqp(pkt, qp->ibqp.qp_num); + } + + return skb; +} + +static int fill_packet(struct rxe_qp *qp, struct rxe_send_wqe *wqe, + struct rxe_pkt_info *pkt, struct sk_buff *skb, + int paylen) +{ + struct rxe_dev *rxe = to_rdev(qp->ibqp.device); + u32 crc = 0; + u32 *p; + int err; + + err = rxe->ifc_ops->prepare(rxe, pkt, skb, &crc); + if (err) + return err; + + if (pkt->mask & RXE_WRITE_OR_SEND) { + if (wqe->wr.send_flags & IB_SEND_INLINE) { + u8 *tmp = &wqe->dma.inline_data[wqe->dma.sge_offset]; + + crc = crc32_le(crc, tmp, paylen); + + memcpy(payload_addr(pkt), tmp, paylen); + + wqe->dma.resid -= paylen; + wqe->dma.sge_offset += paylen; + } else { + err = copy_data(rxe, qp->pd, 0, &wqe->dma, + payload_addr(pkt), paylen, + from_mem_obj, + &crc); + if (err) + return err; + } + } + p = payload_addr(pkt) + paylen + bth_pad(pkt); + + *p = ~crc; + + return 0; +} + +static void update_wqe_state(struct rxe_qp *qp, + struct rxe_send_wqe *wqe, + struct rxe_pkt_info *pkt, + enum wqe_state *prev_state) +{ + enum wqe_state prev_state_ = wqe->state; + + if (pkt->mask & RXE_END_MASK) { + if (qp_type(qp) == IB_QPT_RC) + wqe->state = wqe_state_pending; + } else { + wqe->state = wqe_state_processing; + } + + *prev_state = prev_state_; +} + +static void update_state(struct rxe_qp *qp, struct rxe_send_wqe *wqe, + struct rxe_pkt_info *pkt, int payload) +{ + /* number of packets left to send including current one */ + int num_pkt = (wqe->dma.resid + payload + qp->mtu - 1) / qp->mtu; + + /* handle zero length packet case */ + if (num_pkt == 0) + num_pkt = 1; + + if (pkt->mask & RXE_START_MASK) { + wqe->first_psn = qp->req.psn; + wqe->last_psn = (qp->req.psn + num_pkt - 1) & BTH_PSN_MASK; + } + + if (pkt->mask & RXE_READ_MASK) + qp->req.psn = (wqe->first_psn + num_pkt) & BTH_PSN_MASK; + else + qp->req.psn = (qp->req.psn + 1) & BTH_PSN_MASK; + + qp->req.opcode = pkt->opcode; + + + if (pkt->mask & RXE_END_MASK) + qp->req.wqe_index = next_index(qp->sq.queue, qp->req.wqe_index); + + qp->need_req_skb = 0; + + if (qp->qp_timeout_jiffies && !timer_pending(&qp->retrans_timer)) + mod_timer(&qp->retrans_timer, + jiffies + qp->qp_timeout_jiffies); +} + +int rxe_requester(void *arg) +{ + struct rxe_qp *qp = (struct rxe_qp *)arg; + struct rxe_pkt_info pkt; + struct sk_buff *skb; + struct rxe_send_wqe *wqe; + unsigned mask; + int payload; + int mtu; + int opcode; + int ret; + enum wqe_state prev_state; + +next_wqe: + if (unlikely(!qp->valid || qp->req.state == QP_STATE_ERROR)) + goto exit; + + if (unlikely(qp->req.state == QP_STATE_RESET)) { + qp->req.wqe_index = consumer_index(qp->sq.queue); + qp->req.opcode = -1; + qp->req.need_rd_atomic = 0; + qp->req.wait_psn = 0; + qp->req.need_retry = 0; + goto exit; + } + + if (unlikely(qp->req.need_retry)) { + req_retry(qp); + qp->req.need_retry = 0; + } + + wqe = req_next_wqe(qp); + if (unlikely(!wqe)) + goto exit; + + if (wqe->mask & WR_REG_MASK) { + if (wqe->wr.opcode == IB_WR_LOCAL_INV) { + struct rxe_dev *rxe = to_rdev(qp->ibqp.device); + struct rxe_mem *rmr; + + rmr = rxe_pool_get_index(&rxe->mr_pool, + wqe->wr.ex.invalidate_rkey >> 8); + if (!rmr) { + pr_err("No mr for key %#x\n", wqe->wr.ex.invalidate_rkey); + wqe->state = wqe_state_error; + wqe->status = IB_WC_MW_BIND_ERR; + goto exit; + } + rmr->state = RXE_MEM_STATE_FREE; + wqe->state = wqe_state_done; + wqe->status = IB_WC_SUCCESS; + } else if (wqe->wr.opcode == IB_WR_REG_MR) { + struct rxe_mem *rmr = to_rmr(wqe->wr.wr.reg.mr); + + rmr->state = RXE_MEM_STATE_VALID; + rmr->access = wqe->wr.wr.reg.access; + rmr->lkey = wqe->wr.wr.reg.key; + rmr->rkey = wqe->wr.wr.reg.key; + wqe->state = wqe_state_done; + wqe->status = IB_WC_SUCCESS; + } else { + goto exit; + } + qp->req.wqe_index = next_index(qp->sq.queue, + qp->req.wqe_index); + goto next_wqe; + } + + if (unlikely(qp_type(qp) == IB_QPT_RC && + qp->req.psn > (qp->comp.psn + RXE_MAX_UNACKED_PSNS))) { + qp->req.wait_psn = 1; + goto exit; + } + + /* Limit the number of inflight SKBs per QP */ + if (unlikely(atomic_read(&qp->skb_out) > + RXE_INFLIGHT_SKBS_PER_QP_HIGH)) { + qp->need_req_skb = 1; + goto exit; + } + + opcode = next_opcode(qp, wqe, wqe->wr.opcode); + if (unlikely(opcode < 0)) { + wqe->status = IB_WC_LOC_QP_OP_ERR; + goto exit; + } + + mask = rxe_opcode[opcode].mask; + if (unlikely(mask & RXE_READ_OR_ATOMIC)) { + if (check_init_depth(qp, wqe)) + goto exit; + } + + mtu = get_mtu(qp, wqe); + payload = (mask & RXE_WRITE_OR_SEND) ? wqe->dma.resid : 0; + if (payload > mtu) { + if (qp_type(qp) == IB_QPT_UD) { + /* C10-93.1.1: If the total sum of all the buffer lengths specified for a + * UD message exceeds the MTU of the port as returned by QueryHCA, the CI + * shall not emit any packets for this message. Further, the CI shall not + * generate an error due to this condition. + */ + + /* fake a successful UD send */ + wqe->first_psn = qp->req.psn; + wqe->last_psn = qp->req.psn; + qp->req.psn = (qp->req.psn + 1) & BTH_PSN_MASK; + qp->req.opcode = IB_OPCODE_UD_SEND_ONLY; + qp->req.wqe_index = next_index(qp->sq.queue, + qp->req.wqe_index); + wqe->state = wqe_state_done; + wqe->status = IB_WC_SUCCESS; + goto complete; + } + payload = mtu; + } + + skb = init_req_packet(qp, wqe, opcode, payload, &pkt); + if (unlikely(!skb)) { + pr_err("Failed allocating skb\n"); + goto err; + } + + if (fill_packet(qp, wqe, &pkt, skb, payload)) { + pr_debug("Error during fill packet\n"); + goto err; + } + + update_wqe_state(qp, wqe, &pkt, &prev_state); + ret = rxe_xmit_packet(to_rdev(qp->ibqp.device), qp, &pkt, skb); + if (ret) { + qp->need_req_skb = 1; + kfree_skb(skb); + + wqe->state = prev_state; + + if (ret == -EAGAIN) { + rxe_run_task(&qp->req.task, 1); + goto exit; + } + + goto err; + } + + update_state(qp, wqe, &pkt, payload); + + goto next_wqe; + +err: + kfree_skb(skb); + wqe->status = IB_WC_LOC_PROT_ERR; + wqe->state = wqe_state_error; + +complete: + if (qp_type(qp) != IB_QPT_RC) { + while (rxe_completer(qp) == 0) + ; + } + + return 0; + +exit: + return -EAGAIN; +} diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c new file mode 100644 index 000000000000..ebb03b46e2ad --- /dev/null +++ b/drivers/infiniband/sw/rxe/rxe_resp.c @@ -0,0 +1,1380 @@ +/* + * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. + * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/skbuff.h> + +#include "rxe.h" +#include "rxe_loc.h" +#include "rxe_queue.h" + +enum resp_states { + RESPST_NONE, + RESPST_GET_REQ, + RESPST_CHK_PSN, + RESPST_CHK_OP_SEQ, + RESPST_CHK_OP_VALID, + RESPST_CHK_RESOURCE, + RESPST_CHK_LENGTH, + RESPST_CHK_RKEY, + RESPST_EXECUTE, + RESPST_READ_REPLY, + RESPST_COMPLETE, + RESPST_ACKNOWLEDGE, + RESPST_CLEANUP, + RESPST_DUPLICATE_REQUEST, + RESPST_ERR_MALFORMED_WQE, + RESPST_ERR_UNSUPPORTED_OPCODE, + RESPST_ERR_MISALIGNED_ATOMIC, + RESPST_ERR_PSN_OUT_OF_SEQ, + RESPST_ERR_MISSING_OPCODE_FIRST, + RESPST_ERR_MISSING_OPCODE_LAST_C, + RESPST_ERR_MISSING_OPCODE_LAST_D1E, + RESPST_ERR_TOO_MANY_RDMA_ATM_REQ, + RESPST_ERR_RNR, + RESPST_ERR_RKEY_VIOLATION, + RESPST_ERR_LENGTH, + RESPST_ERR_CQ_OVERFLOW, + RESPST_ERROR, + RESPST_RESET, + RESPST_DONE, + RESPST_EXIT, +}; + +static char *resp_state_name[] = { + [RESPST_NONE] = "NONE", + [RESPST_GET_REQ] = "GET_REQ", + [RESPST_CHK_PSN] = "CHK_PSN", + [RESPST_CHK_OP_SEQ] = "CHK_OP_SEQ", + [RESPST_CHK_OP_VALID] = "CHK_OP_VALID", + [RESPST_CHK_RESOURCE] = "CHK_RESOURCE", + [RESPST_CHK_LENGTH] = "CHK_LENGTH", + [RESPST_CHK_RKEY] = "CHK_RKEY", + [RESPST_EXECUTE] = "EXECUTE", + [RESPST_READ_REPLY] = "READ_REPLY", + [RESPST_COMPLETE] = "COMPLETE", + [RESPST_ACKNOWLEDGE] = "ACKNOWLEDGE", + [RESPST_CLEANUP] = "CLEANUP", + [RESPST_DUPLICATE_REQUEST] = "DUPLICATE_REQUEST", + [RESPST_ERR_MALFORMED_WQE] = "ERR_MALFORMED_WQE", + [RESPST_ERR_UNSUPPORTED_OPCODE] = "ERR_UNSUPPORTED_OPCODE", + [RESPST_ERR_MISALIGNED_ATOMIC] = "ERR_MISALIGNED_ATOMIC", + [RESPST_ERR_PSN_OUT_OF_SEQ] = "ERR_PSN_OUT_OF_SEQ", + [RESPST_ERR_MISSING_OPCODE_FIRST] = "ERR_MISSING_OPCODE_FIRST", + [RESPST_ERR_MISSING_OPCODE_LAST_C] = "ERR_MISSING_OPCODE_LAST_C", + [RESPST_ERR_MISSING_OPCODE_LAST_D1E] = "ERR_MISSING_OPCODE_LAST_D1E", + [RESPST_ERR_TOO_MANY_RDMA_ATM_REQ] = "ERR_TOO_MANY_RDMA_ATM_REQ", + [RESPST_ERR_RNR] = "ERR_RNR", + [RESPST_ERR_RKEY_VIOLATION] = "ERR_RKEY_VIOLATION", + [RESPST_ERR_LENGTH] = "ERR_LENGTH", + [RESPST_ERR_CQ_OVERFLOW] = "ERR_CQ_OVERFLOW", + [RESPST_ERROR] = "ERROR", + [RESPST_RESET] = "RESET", + [RESPST_DONE] = "DONE", + [RESPST_EXIT] = "EXIT", +}; + +/* rxe_recv calls here to add a request packet to the input queue */ +void rxe_resp_queue_pkt(struct rxe_dev *rxe, struct rxe_qp *qp, + struct sk_buff *skb) +{ + int must_sched; + struct rxe_pkt_info *pkt = SKB_TO_PKT(skb); + + skb_queue_tail(&qp->req_pkts, skb); + + must_sched = (pkt->opcode == IB_OPCODE_RC_RDMA_READ_REQUEST) || + (skb_queue_len(&qp->req_pkts) > 1); + + rxe_run_task(&qp->resp.task, must_sched); +} + +static inline enum resp_states get_req(struct rxe_qp *qp, + struct rxe_pkt_info **pkt_p) +{ + struct sk_buff *skb; + + if (qp->resp.state == QP_STATE_ERROR) { + skb = skb_dequeue(&qp->req_pkts); + if (skb) { + /* drain request packet queue */ + rxe_drop_ref(qp); + kfree_skb(skb); + return RESPST_GET_REQ; + } + + /* go drain recv wr queue */ + return RESPST_CHK_RESOURCE; + } + + skb = skb_peek(&qp->req_pkts); + if (!skb) + return RESPST_EXIT; + + *pkt_p = SKB_TO_PKT(skb); + + return (qp->resp.res) ? RESPST_READ_REPLY : RESPST_CHK_PSN; +} + +static enum resp_states check_psn(struct rxe_qp *qp, + struct rxe_pkt_info *pkt) +{ + int diff = psn_compare(pkt->psn, qp->resp.psn); + + switch (qp_type(qp)) { + case IB_QPT_RC: + if (diff > 0) { + if (qp->resp.sent_psn_nak) + return RESPST_CLEANUP; + + qp->resp.sent_psn_nak = 1; + return RESPST_ERR_PSN_OUT_OF_SEQ; + + } else if (diff < 0) { + return RESPST_DUPLICATE_REQUEST; + } + + if (qp->resp.sent_psn_nak) + qp->resp.sent_psn_nak = 0; + + break; + + case IB_QPT_UC: + if (qp->resp.drop_msg || diff != 0) { + if (pkt->mask & RXE_START_MASK) { + qp->resp.drop_msg = 0; + return RESPST_CHK_OP_SEQ; + } + + qp->resp.drop_msg = 1; + return RESPST_CLEANUP; + } + break; + default: + break; + } + + return RESPST_CHK_OP_SEQ; +} + +static enum resp_states check_op_seq(struct rxe_qp *qp, + struct rxe_pkt_info *pkt) +{ + switch (qp_type(qp)) { + case IB_QPT_RC: + switch (qp->resp.opcode) { + case IB_OPCODE_RC_SEND_FIRST: + case IB_OPCODE_RC_SEND_MIDDLE: + switch (pkt->opcode) { + case IB_OPCODE_RC_SEND_MIDDLE: + case IB_OPCODE_RC_SEND_LAST: + case IB_OPCODE_RC_SEND_LAST_WITH_IMMEDIATE: + case IB_OPCODE_RC_SEND_LAST_WITH_INVALIDATE: + return RESPST_CHK_OP_VALID; + default: + return RESPST_ERR_MISSING_OPCODE_LAST_C; + } + + case IB_OPCODE_RC_RDMA_WRITE_FIRST: + case IB_OPCODE_RC_RDMA_WRITE_MIDDLE: + switch (pkt->opcode) { + case IB_OPCODE_RC_RDMA_WRITE_MIDDLE: + case IB_OPCODE_RC_RDMA_WRITE_LAST: + case IB_OPCODE_RC_RDMA_WRITE_LAST_WITH_IMMEDIATE: + return RESPST_CHK_OP_VALID; + default: + return RESPST_ERR_MISSING_OPCODE_LAST_C; + } + + default: + switch (pkt->opcode) { + case IB_OPCODE_RC_SEND_MIDDLE: + case IB_OPCODE_RC_SEND_LAST: + case IB_OPCODE_RC_SEND_LAST_WITH_IMMEDIATE: + case IB_OPCODE_RC_SEND_LAST_WITH_INVALIDATE: + case IB_OPCODE_RC_RDMA_WRITE_MIDDLE: + case IB_OPCODE_RC_RDMA_WRITE_LAST: + case IB_OPCODE_RC_RDMA_WRITE_LAST_WITH_IMMEDIATE: + return RESPST_ERR_MISSING_OPCODE_FIRST; + default: + return RESPST_CHK_OP_VALID; + } + } + break; + + case IB_QPT_UC: + switch (qp->resp.opcode) { + case IB_OPCODE_UC_SEND_FIRST: + case IB_OPCODE_UC_SEND_MIDDLE: + switch (pkt->opcode) { + case IB_OPCODE_UC_SEND_MIDDLE: + case IB_OPCODE_UC_SEND_LAST: + case IB_OPCODE_UC_SEND_LAST_WITH_IMMEDIATE: + return RESPST_CHK_OP_VALID; + default: + return RESPST_ERR_MISSING_OPCODE_LAST_D1E; + } + + case IB_OPCODE_UC_RDMA_WRITE_FIRST: + case IB_OPCODE_UC_RDMA_WRITE_MIDDLE: + switch (pkt->opcode) { + case IB_OPCODE_UC_RDMA_WRITE_MIDDLE: + case IB_OPCODE_UC_RDMA_WRITE_LAST: + case IB_OPCODE_UC_RDMA_WRITE_LAST_WITH_IMMEDIATE: + return RESPST_CHK_OP_VALID; + default: + return RESPST_ERR_MISSING_OPCODE_LAST_D1E; + } + + default: + switch (pkt->opcode) { + case IB_OPCODE_UC_SEND_MIDDLE: + case IB_OPCODE_UC_SEND_LAST: + case IB_OPCODE_UC_SEND_LAST_WITH_IMMEDIATE: + case IB_OPCODE_UC_RDMA_WRITE_MIDDLE: + case IB_OPCODE_UC_RDMA_WRITE_LAST: + case IB_OPCODE_UC_RDMA_WRITE_LAST_WITH_IMMEDIATE: + qp->resp.drop_msg = 1; + return RESPST_CLEANUP; + default: + return RESPST_CHK_OP_VALID; + } + } + break; + + default: + return RESPST_CHK_OP_VALID; + } +} + +static enum resp_states check_op_valid(struct rxe_qp *qp, + struct rxe_pkt_info *pkt) +{ + switch (qp_type(qp)) { + case IB_QPT_RC: + if (((pkt->mask & RXE_READ_MASK) && + !(qp->attr.qp_access_flags & IB_ACCESS_REMOTE_READ)) || + ((pkt->mask & RXE_WRITE_MASK) && + !(qp->attr.qp_access_flags & IB_ACCESS_REMOTE_WRITE)) || + ((pkt->mask & RXE_ATOMIC_MASK) && + !(qp->attr.qp_access_flags & IB_ACCESS_REMOTE_ATOMIC))) { + return RESPST_ERR_UNSUPPORTED_OPCODE; + } + + break; + + case IB_QPT_UC: + if ((pkt->mask & RXE_WRITE_MASK) && + !(qp->attr.qp_access_flags & IB_ACCESS_REMOTE_WRITE)) { + qp->resp.drop_msg = 1; + return RESPST_CLEANUP; + } + + break; + + case IB_QPT_UD: + case IB_QPT_SMI: + case IB_QPT_GSI: + break; + + default: + WARN_ON(1); + break; + } + + return RESPST_CHK_RESOURCE; +} + +static enum resp_states get_srq_wqe(struct rxe_qp *qp) +{ + struct rxe_srq *srq = qp->srq; + struct rxe_queue *q = srq->rq.queue; + struct rxe_recv_wqe *wqe; + struct ib_event ev; + + if (srq->error) + return RESPST_ERR_RNR; + + spin_lock_bh(&srq->rq.consumer_lock); + + wqe = queue_head(q); + if (!wqe) { + spin_unlock_bh(&srq->rq.consumer_lock); + return RESPST_ERR_RNR; + } + + /* note kernel and user space recv wqes have same size */ + memcpy(&qp->resp.srq_wqe, wqe, sizeof(qp->resp.srq_wqe)); + + qp->resp.wqe = &qp->resp.srq_wqe.wqe; + advance_consumer(q); + + if (srq->limit && srq->ibsrq.event_handler && + (queue_count(q) < srq->limit)) { + srq->limit = 0; + goto event; + } + + spin_unlock_bh(&srq->rq.consumer_lock); + return RESPST_CHK_LENGTH; + +event: + spin_unlock_bh(&srq->rq.consumer_lock); + ev.device = qp->ibqp.device; + ev.element.srq = qp->ibqp.srq; + ev.event = IB_EVENT_SRQ_LIMIT_REACHED; + srq->ibsrq.event_handler(&ev, srq->ibsrq.srq_context); + return RESPST_CHK_LENGTH; +} + +static enum resp_states check_resource(struct rxe_qp *qp, + struct rxe_pkt_info *pkt) +{ + struct rxe_srq *srq = qp->srq; + + if (qp->resp.state == QP_STATE_ERROR) { + if (qp->resp.wqe) { + qp->resp.status = IB_WC_WR_FLUSH_ERR; + return RESPST_COMPLETE; + } else if (!srq) { + qp->resp.wqe = queue_head(qp->rq.queue); + if (qp->resp.wqe) { + qp->resp.status = IB_WC_WR_FLUSH_ERR; + return RESPST_COMPLETE; + } else { + return RESPST_EXIT; + } + } else { + return RESPST_EXIT; + } + } + + if (pkt->mask & RXE_READ_OR_ATOMIC) { + /* it is the requesters job to not send + * too many read/atomic ops, we just + * recycle the responder resource queue + */ + if (likely(qp->attr.max_rd_atomic > 0)) + return RESPST_CHK_LENGTH; + else + return RESPST_ERR_TOO_MANY_RDMA_ATM_REQ; + } + + if (pkt->mask & RXE_RWR_MASK) { + if (srq) + return get_srq_wqe(qp); + + qp->resp.wqe = queue_head(qp->rq.queue); + return (qp->resp.wqe) ? RESPST_CHK_LENGTH : RESPST_ERR_RNR; + } + + return RESPST_CHK_LENGTH; +} + +static enum resp_states check_length(struct rxe_qp *qp, + struct rxe_pkt_info *pkt) +{ + switch (qp_type(qp)) { + case IB_QPT_RC: + return RESPST_CHK_RKEY; + + case IB_QPT_UC: + return RESPST_CHK_RKEY; + + default: + return RESPST_CHK_RKEY; + } +} + +static enum resp_states check_rkey(struct rxe_qp *qp, + struct rxe_pkt_info *pkt) +{ + struct rxe_mem *mem; + u64 va; + u32 rkey; + u32 resid; + u32 pktlen; + int mtu = qp->mtu; + enum resp_states state; + int access; + + if (pkt->mask & (RXE_READ_MASK | RXE_WRITE_MASK)) { + if (pkt->mask & RXE_RETH_MASK) { + qp->resp.va = reth_va(pkt); + qp->resp.rkey = reth_rkey(pkt); + qp->resp.resid = reth_len(pkt); + } + access = (pkt->mask & RXE_READ_MASK) ? IB_ACCESS_REMOTE_READ + : IB_ACCESS_REMOTE_WRITE; + } else if (pkt->mask & RXE_ATOMIC_MASK) { + qp->resp.va = atmeth_va(pkt); + qp->resp.rkey = atmeth_rkey(pkt); + qp->resp.resid = sizeof(u64); + access = IB_ACCESS_REMOTE_ATOMIC; + } else { + return RESPST_EXECUTE; + } + + va = qp->resp.va; + rkey = qp->resp.rkey; + resid = qp->resp.resid; + pktlen = payload_size(pkt); + + mem = lookup_mem(qp->pd, access, rkey, lookup_remote); + if (!mem) { + state = RESPST_ERR_RKEY_VIOLATION; + goto err1; + } + + if (unlikely(mem->state == RXE_MEM_STATE_FREE)) { + state = RESPST_ERR_RKEY_VIOLATION; + goto err1; + } + + if (mem_check_range(mem, va, resid)) { + state = RESPST_ERR_RKEY_VIOLATION; + goto err2; + } + + if (pkt->mask & RXE_WRITE_MASK) { + if (resid > mtu) { + if (pktlen != mtu || bth_pad(pkt)) { + state = RESPST_ERR_LENGTH; + goto err2; + } + + resid = mtu; + } else { + if (pktlen != resid) { + state = RESPST_ERR_LENGTH; + goto err2; + } + if ((bth_pad(pkt) != (0x3 & (-resid)))) { + /* This case may not be exactly that + * but nothing else fits. + */ + state = RESPST_ERR_LENGTH; + goto err2; + } + } + } + + WARN_ON(qp->resp.mr); + + qp->resp.mr = mem; + return RESPST_EXECUTE; + +err2: + rxe_drop_ref(mem); +err1: + return state; +} + +static enum resp_states send_data_in(struct rxe_qp *qp, void *data_addr, + int data_len) +{ + int err; + struct rxe_dev *rxe = to_rdev(qp->ibqp.device); + + err = copy_data(rxe, qp->pd, IB_ACCESS_LOCAL_WRITE, &qp->resp.wqe->dma, + data_addr, data_len, to_mem_obj, NULL); + if (unlikely(err)) + return (err == -ENOSPC) ? RESPST_ERR_LENGTH + : RESPST_ERR_MALFORMED_WQE; + + return RESPST_NONE; +} + +static enum resp_states write_data_in(struct rxe_qp *qp, + struct rxe_pkt_info *pkt) +{ + enum resp_states rc = RESPST_NONE; + int err; + int data_len = payload_size(pkt); + + err = rxe_mem_copy(qp->resp.mr, qp->resp.va, payload_addr(pkt), + data_len, to_mem_obj, NULL); + if (err) { + rc = RESPST_ERR_RKEY_VIOLATION; + goto out; + } + + qp->resp.va += data_len; + qp->resp.resid -= data_len; + +out: + return rc; +} + +/* Guarantee atomicity of atomic operations at the machine level. */ +static DEFINE_SPINLOCK(atomic_ops_lock); + +static enum resp_states process_atomic(struct rxe_qp *qp, + struct rxe_pkt_info *pkt) +{ + u64 iova = atmeth_va(pkt); + u64 *vaddr; + enum resp_states ret; + struct rxe_mem *mr = qp->resp.mr; + + if (mr->state != RXE_MEM_STATE_VALID) { + ret = RESPST_ERR_RKEY_VIOLATION; + goto out; + } + + vaddr = iova_to_vaddr(mr, iova, sizeof(u64)); + + /* check vaddr is 8 bytes aligned. */ + if (!vaddr || (uintptr_t)vaddr & 7) { + ret = RESPST_ERR_MISALIGNED_ATOMIC; + goto out; + } + + spin_lock_bh(&atomic_ops_lock); + + qp->resp.atomic_orig = *vaddr; + + if (pkt->opcode == IB_OPCODE_RC_COMPARE_SWAP || + pkt->opcode == IB_OPCODE_RD_COMPARE_SWAP) { + if (*vaddr == atmeth_comp(pkt)) + *vaddr = atmeth_swap_add(pkt); + } else { + *vaddr += atmeth_swap_add(pkt); + } + + spin_unlock_bh(&atomic_ops_lock); + + ret = RESPST_NONE; +out: + return ret; +} + +static struct sk_buff *prepare_ack_packet(struct rxe_qp *qp, + struct rxe_pkt_info *pkt, + struct rxe_pkt_info *ack, + int opcode, + int payload, + u32 psn, + u8 syndrome, + u32 *crcp) +{ + struct rxe_dev *rxe = to_rdev(qp->ibqp.device); + struct sk_buff *skb; + u32 crc = 0; + u32 *p; + int paylen; + int pad; + int err; + + /* + * allocate packet + */ + pad = (-payload) & 0x3; + paylen = rxe_opcode[opcode].length + payload + pad + RXE_ICRC_SIZE; + + skb = rxe->ifc_ops->init_packet(rxe, &qp->pri_av, paylen, ack); + if (!skb) + return NULL; + + ack->qp = qp; + ack->opcode = opcode; + ack->mask = rxe_opcode[opcode].mask; + ack->offset = pkt->offset; + ack->paylen = paylen; + + /* fill in bth using the request packet headers */ + memcpy(ack->hdr, pkt->hdr, pkt->offset + RXE_BTH_BYTES); + + bth_set_opcode(ack, opcode); + bth_set_qpn(ack, qp->attr.dest_qp_num); + bth_set_pad(ack, pad); + bth_set_se(ack, 0); + bth_set_psn(ack, psn); + bth_set_ack(ack, 0); + ack->psn = psn; + + if (ack->mask & RXE_AETH_MASK) { + aeth_set_syn(ack, syndrome); + aeth_set_msn(ack, qp->resp.msn); + } + + if (ack->mask & RXE_ATMACK_MASK) + atmack_set_orig(ack, qp->resp.atomic_orig); + + err = rxe->ifc_ops->prepare(rxe, ack, skb, &crc); + if (err) { + kfree_skb(skb); + return NULL; + } + + if (crcp) { + /* CRC computation will be continued by the caller */ + *crcp = crc; + } else { + p = payload_addr(ack) + payload + bth_pad(ack); + *p = ~crc; + } + + return skb; +} + +/* RDMA read response. If res is not NULL, then we have a current RDMA request + * being processed or replayed. + */ +static enum resp_states read_reply(struct rxe_qp *qp, + struct rxe_pkt_info *req_pkt) +{ + struct rxe_dev *rxe = to_rdev(qp->ibqp.device); + struct rxe_pkt_info ack_pkt; + struct sk_buff *skb; + int mtu = qp->mtu; + enum resp_states state; + int payload; + int opcode; + int err; + struct resp_res *res = qp->resp.res; + u32 icrc; + u32 *p; + + if (!res) { + /* This is the first time we process that request. Get a + * resource + */ + res = &qp->resp.resources[qp->resp.res_head]; + + free_rd_atomic_resource(qp, res); + rxe_advance_resp_resource(qp); + + res->type = RXE_READ_MASK; + + res->read.va = qp->resp.va; + res->read.va_org = qp->resp.va; + + res->first_psn = req_pkt->psn; + res->last_psn = req_pkt->psn + + (reth_len(req_pkt) + mtu - 1) / + mtu - 1; + res->cur_psn = req_pkt->psn; + + res->read.resid = qp->resp.resid; + res->read.length = qp->resp.resid; + res->read.rkey = qp->resp.rkey; + + /* note res inherits the reference to mr from qp */ + res->read.mr = qp->resp.mr; + qp->resp.mr = NULL; + + qp->resp.res = res; + res->state = rdatm_res_state_new; + } + + if (res->state == rdatm_res_state_new) { + if (res->read.resid <= mtu) + opcode = IB_OPCODE_RC_RDMA_READ_RESPONSE_ONLY; + else + opcode = IB_OPCODE_RC_RDMA_READ_RESPONSE_FIRST; + } else { + if (res->read.resid > mtu) + opcode = IB_OPCODE_RC_RDMA_READ_RESPONSE_MIDDLE; + else + opcode = IB_OPCODE_RC_RDMA_READ_RESPONSE_LAST; + } + + res->state = rdatm_res_state_next; + + payload = min_t(int, res->read.resid, mtu); + + skb = prepare_ack_packet(qp, req_pkt, &ack_pkt, opcode, payload, + res->cur_psn, AETH_ACK_UNLIMITED, &icrc); + if (!skb) + return RESPST_ERR_RNR; + + err = rxe_mem_copy(res->read.mr, res->read.va, payload_addr(&ack_pkt), + payload, from_mem_obj, &icrc); + if (err) + pr_err("Failed copying memory\n"); + + p = payload_addr(&ack_pkt) + payload + bth_pad(&ack_pkt); + *p = ~icrc; + + err = rxe_xmit_packet(rxe, qp, &ack_pkt, skb); + if (err) { + pr_err("Failed sending RDMA reply.\n"); + kfree_skb(skb); + return RESPST_ERR_RNR; + } + + res->read.va += payload; + res->read.resid -= payload; + res->cur_psn = (res->cur_psn + 1) & BTH_PSN_MASK; + + if (res->read.resid > 0) { + state = RESPST_DONE; + } else { + qp->resp.res = NULL; + qp->resp.opcode = -1; + qp->resp.psn = res->cur_psn; + state = RESPST_CLEANUP; + } + + return state; +} + +/* Executes a new request. A retried request never reach that function (send + * and writes are discarded, and reads and atomics are retried elsewhere. + */ +static enum resp_states execute(struct rxe_qp *qp, struct rxe_pkt_info *pkt) +{ + enum resp_states err; + + if (pkt->mask & RXE_SEND_MASK) { + if (qp_type(qp) == IB_QPT_UD || + qp_type(qp) == IB_QPT_SMI || + qp_type(qp) == IB_QPT_GSI) { + union rdma_network_hdr hdr; + struct sk_buff *skb = PKT_TO_SKB(pkt); + + memset(&hdr, 0, sizeof(hdr)); + if (skb->protocol == htons(ETH_P_IP)) + memcpy(&hdr.roce4grh, ip_hdr(skb), sizeof(hdr.roce4grh)); + else if (skb->protocol == htons(ETH_P_IPV6)) + memcpy(&hdr.ibgrh, ipv6_hdr(skb), sizeof(hdr.ibgrh)); + + err = send_data_in(qp, &hdr, sizeof(hdr)); + if (err) + return err; + } + err = send_data_in(qp, payload_addr(pkt), payload_size(pkt)); + if (err) + return err; + } else if (pkt->mask & RXE_WRITE_MASK) { + err = write_data_in(qp, pkt); + if (err) + return err; + } else if (pkt->mask & RXE_READ_MASK) { + /* For RDMA Read we can increment the msn now. See C9-148. */ + qp->resp.msn++; + return RESPST_READ_REPLY; + } else if (pkt->mask & RXE_ATOMIC_MASK) { + err = process_atomic(qp, pkt); + if (err) + return err; + } else + /* Unreachable */ + WARN_ON(1); + + /* We successfully processed this new request. */ + qp->resp.msn++; + + /* next expected psn, read handles this separately */ + qp->resp.psn = (pkt->psn + 1) & BTH_PSN_MASK; + + qp->resp.opcode = pkt->opcode; + qp->resp.status = IB_WC_SUCCESS; + + if (pkt->mask & RXE_COMP_MASK) + return RESPST_COMPLETE; + else if (qp_type(qp) == IB_QPT_RC) + return RESPST_ACKNOWLEDGE; + else + return RESPST_CLEANUP; +} + +static enum resp_states do_complete(struct rxe_qp *qp, + struct rxe_pkt_info *pkt) +{ + struct rxe_cqe cqe; + struct ib_wc *wc = &cqe.ibwc; + struct ib_uverbs_wc *uwc = &cqe.uibwc; + struct rxe_recv_wqe *wqe = qp->resp.wqe; + + if (unlikely(!wqe)) + return RESPST_CLEANUP; + + memset(&cqe, 0, sizeof(cqe)); + + wc->wr_id = wqe->wr_id; + wc->status = qp->resp.status; + wc->qp = &qp->ibqp; + + /* fields after status are not required for errors */ + if (wc->status == IB_WC_SUCCESS) { + wc->opcode = (pkt->mask & RXE_IMMDT_MASK && + pkt->mask & RXE_WRITE_MASK) ? + IB_WC_RECV_RDMA_WITH_IMM : IB_WC_RECV; + wc->vendor_err = 0; + wc->byte_len = wqe->dma.length - wqe->dma.resid; + + /* fields after byte_len are different between kernel and user + * space + */ + if (qp->rcq->is_user) { + uwc->wc_flags = IB_WC_GRH; + + if (pkt->mask & RXE_IMMDT_MASK) { + uwc->wc_flags |= IB_WC_WITH_IMM; + uwc->ex.imm_data = + (__u32 __force)immdt_imm(pkt); + } + + if (pkt->mask & RXE_IETH_MASK) { + uwc->wc_flags |= IB_WC_WITH_INVALIDATE; + uwc->ex.invalidate_rkey = ieth_rkey(pkt); + } + + uwc->qp_num = qp->ibqp.qp_num; + + if (pkt->mask & RXE_DETH_MASK) + uwc->src_qp = deth_sqp(pkt); + + uwc->port_num = qp->attr.port_num; + } else { + struct sk_buff *skb = PKT_TO_SKB(pkt); + + wc->wc_flags = IB_WC_GRH | IB_WC_WITH_NETWORK_HDR_TYPE; + if (skb->protocol == htons(ETH_P_IP)) + wc->network_hdr_type = RDMA_NETWORK_IPV4; + else + wc->network_hdr_type = RDMA_NETWORK_IPV6; + + if (pkt->mask & RXE_IMMDT_MASK) { + wc->wc_flags |= IB_WC_WITH_IMM; + wc->ex.imm_data = immdt_imm(pkt); + } + + if (pkt->mask & RXE_IETH_MASK) { + struct rxe_dev *rxe = to_rdev(qp->ibqp.device); + struct rxe_mem *rmr; + + wc->wc_flags |= IB_WC_WITH_INVALIDATE; + wc->ex.invalidate_rkey = ieth_rkey(pkt); + + rmr = rxe_pool_get_index(&rxe->mr_pool, + wc->ex.invalidate_rkey >> 8); + if (unlikely(!rmr)) { + pr_err("Bad rkey %#x invalidation\n", wc->ex.invalidate_rkey); + return RESPST_ERROR; + } + rmr->state = RXE_MEM_STATE_FREE; + } + + wc->qp = &qp->ibqp; + + if (pkt->mask & RXE_DETH_MASK) + wc->src_qp = deth_sqp(pkt); + + wc->port_num = qp->attr.port_num; + } + } + + /* have copy for srq and reference for !srq */ + if (!qp->srq) + advance_consumer(qp->rq.queue); + + qp->resp.wqe = NULL; + + if (rxe_cq_post(qp->rcq, &cqe, pkt ? bth_se(pkt) : 1)) + return RESPST_ERR_CQ_OVERFLOW; + + if (qp->resp.state == QP_STATE_ERROR) + return RESPST_CHK_RESOURCE; + + if (!pkt) + return RESPST_DONE; + else if (qp_type(qp) == IB_QPT_RC) + return RESPST_ACKNOWLEDGE; + else + return RESPST_CLEANUP; +} + +static int send_ack(struct rxe_qp *qp, struct rxe_pkt_info *pkt, + u8 syndrome, u32 psn) +{ + int err = 0; + struct rxe_pkt_info ack_pkt; + struct sk_buff *skb; + struct rxe_dev *rxe = to_rdev(qp->ibqp.device); + + skb = prepare_ack_packet(qp, pkt, &ack_pkt, IB_OPCODE_RC_ACKNOWLEDGE, + 0, psn, syndrome, NULL); + if (!skb) { + err = -ENOMEM; + goto err1; + } + + err = rxe_xmit_packet(rxe, qp, &ack_pkt, skb); + if (err) { + pr_err_ratelimited("Failed sending ack\n"); + kfree_skb(skb); + } + +err1: + return err; +} + +static int send_atomic_ack(struct rxe_qp *qp, struct rxe_pkt_info *pkt, + u8 syndrome) +{ + int rc = 0; + struct rxe_pkt_info ack_pkt; + struct sk_buff *skb; + struct sk_buff *skb_copy; + struct rxe_dev *rxe = to_rdev(qp->ibqp.device); + struct resp_res *res; + + skb = prepare_ack_packet(qp, pkt, &ack_pkt, + IB_OPCODE_RC_ATOMIC_ACKNOWLEDGE, 0, pkt->psn, + syndrome, NULL); + if (!skb) { + rc = -ENOMEM; + goto out; + } + + skb_copy = skb_clone(skb, GFP_ATOMIC); + if (skb_copy) + rxe_add_ref(qp); /* for the new SKB */ + else { + pr_warn("Could not clone atomic response\n"); + rc = -ENOMEM; + goto out; + } + + res = &qp->resp.resources[qp->resp.res_head]; + free_rd_atomic_resource(qp, res); + rxe_advance_resp_resource(qp); + + res->type = RXE_ATOMIC_MASK; + res->atomic.skb = skb; + res->first_psn = qp->resp.psn; + res->last_psn = qp->resp.psn; + res->cur_psn = qp->resp.psn; + + rc = rxe_xmit_packet(rxe, qp, &ack_pkt, skb_copy); + if (rc) { + pr_err_ratelimited("Failed sending ack\n"); + rxe_drop_ref(qp); + kfree_skb(skb_copy); + } + +out: + return rc; +} + +static enum resp_states acknowledge(struct rxe_qp *qp, + struct rxe_pkt_info *pkt) +{ + if (qp_type(qp) != IB_QPT_RC) + return RESPST_CLEANUP; + + if (qp->resp.aeth_syndrome != AETH_ACK_UNLIMITED) + send_ack(qp, pkt, qp->resp.aeth_syndrome, pkt->psn); + else if (pkt->mask & RXE_ATOMIC_MASK) + send_atomic_ack(qp, pkt, AETH_ACK_UNLIMITED); + else if (bth_ack(pkt)) + send_ack(qp, pkt, AETH_ACK_UNLIMITED, pkt->psn); + + return RESPST_CLEANUP; +} + +static enum resp_states cleanup(struct rxe_qp *qp, + struct rxe_pkt_info *pkt) +{ + struct sk_buff *skb; + + if (pkt) { + skb = skb_dequeue(&qp->req_pkts); + rxe_drop_ref(qp); + kfree_skb(skb); + } + + if (qp->resp.mr) { + rxe_drop_ref(qp->resp.mr); + qp->resp.mr = NULL; + } + + return RESPST_DONE; +} + +static struct resp_res *find_resource(struct rxe_qp *qp, u32 psn) +{ + int i; + + for (i = 0; i < qp->attr.max_rd_atomic; i++) { + struct resp_res *res = &qp->resp.resources[i]; + + if (res->type == 0) + continue; + + if (psn_compare(psn, res->first_psn) >= 0 && + psn_compare(psn, res->last_psn) <= 0) { + return res; + } + } + + return NULL; +} + +static enum resp_states duplicate_request(struct rxe_qp *qp, + struct rxe_pkt_info *pkt) +{ + enum resp_states rc; + + if (pkt->mask & RXE_SEND_MASK || + pkt->mask & RXE_WRITE_MASK) { + /* SEND. Ack again and cleanup. C9-105. */ + if (bth_ack(pkt)) + send_ack(qp, pkt, AETH_ACK_UNLIMITED, qp->resp.psn - 1); + rc = RESPST_CLEANUP; + goto out; + } else if (pkt->mask & RXE_READ_MASK) { + struct resp_res *res; + + res = find_resource(qp, pkt->psn); + if (!res) { + /* Resource not found. Class D error. Drop the + * request. + */ + rc = RESPST_CLEANUP; + goto out; + } else { + /* Ensure this new request is the same as the previous + * one or a subset of it. + */ + u64 iova = reth_va(pkt); + u32 resid = reth_len(pkt); + + if (iova < res->read.va_org || + resid > res->read.length || + (iova + resid) > (res->read.va_org + + res->read.length)) { + rc = RESPST_CLEANUP; + goto out; + } + + if (reth_rkey(pkt) != res->read.rkey) { + rc = RESPST_CLEANUP; + goto out; + } + + res->cur_psn = pkt->psn; + res->state = (pkt->psn == res->first_psn) ? + rdatm_res_state_new : + rdatm_res_state_replay; + + /* Reset the resource, except length. */ + res->read.va_org = iova; + res->read.va = iova; + res->read.resid = resid; + + /* Replay the RDMA read reply. */ + qp->resp.res = res; + rc = RESPST_READ_REPLY; + goto out; + } + } else { + struct resp_res *res; + + /* Find the operation in our list of responder resources. */ + res = find_resource(qp, pkt->psn); + if (res) { + struct sk_buff *skb_copy; + + skb_copy = skb_clone(res->atomic.skb, GFP_ATOMIC); + if (skb_copy) { + rxe_add_ref(qp); /* for the new SKB */ + } else { + pr_warn("Couldn't clone atomic resp\n"); + rc = RESPST_CLEANUP; + goto out; + } + bth_set_psn(SKB_TO_PKT(skb_copy), + qp->resp.psn - 1); + /* Resend the result. */ + rc = rxe_xmit_packet(to_rdev(qp->ibqp.device), qp, + pkt, skb_copy); + if (rc) { + pr_err("Failed resending result. This flow is not handled - skb ignored\n"); + kfree_skb(skb_copy); + rc = RESPST_CLEANUP; + goto out; + } + } + + /* Resource not found. Class D error. Drop the request. */ + rc = RESPST_CLEANUP; + goto out; + } +out: + return rc; +} + +/* Process a class A or C. Both are treated the same in this implementation. */ +static void do_class_ac_error(struct rxe_qp *qp, u8 syndrome, + enum ib_wc_status status) +{ + qp->resp.aeth_syndrome = syndrome; + qp->resp.status = status; + + /* indicate that we should go through the ERROR state */ + qp->resp.goto_error = 1; +} + +static enum resp_states do_class_d1e_error(struct rxe_qp *qp) +{ + /* UC */ + if (qp->srq) { + /* Class E */ + qp->resp.drop_msg = 1; + if (qp->resp.wqe) { + qp->resp.status = IB_WC_REM_INV_REQ_ERR; + return RESPST_COMPLETE; + } else { + return RESPST_CLEANUP; + } + } else { + /* Class D1. This packet may be the start of a + * new message and could be valid. The previous + * message is invalid and ignored. reset the + * recv wr to its original state + */ + if (qp->resp.wqe) { + qp->resp.wqe->dma.resid = qp->resp.wqe->dma.length; + qp->resp.wqe->dma.cur_sge = 0; + qp->resp.wqe->dma.sge_offset = 0; + qp->resp.opcode = -1; + } + + if (qp->resp.mr) { + rxe_drop_ref(qp->resp.mr); + qp->resp.mr = NULL; + } + + return RESPST_CLEANUP; + } +} + +int rxe_responder(void *arg) +{ + struct rxe_qp *qp = (struct rxe_qp *)arg; + enum resp_states state; + struct rxe_pkt_info *pkt = NULL; + int ret = 0; + + qp->resp.aeth_syndrome = AETH_ACK_UNLIMITED; + + if (!qp->valid) { + ret = -EINVAL; + goto done; + } + + switch (qp->resp.state) { + case QP_STATE_RESET: + state = RESPST_RESET; + break; + + default: + state = RESPST_GET_REQ; + break; + } + + while (1) { + pr_debug("state = %s\n", resp_state_name[state]); + switch (state) { + case RESPST_GET_REQ: + state = get_req(qp, &pkt); + break; + case RESPST_CHK_PSN: + state = check_psn(qp, pkt); + break; + case RESPST_CHK_OP_SEQ: + state = check_op_seq(qp, pkt); + break; + case RESPST_CHK_OP_VALID: + state = check_op_valid(qp, pkt); + break; + case RESPST_CHK_RESOURCE: + state = check_resource(qp, pkt); + break; + case RESPST_CHK_LENGTH: + state = check_length(qp, pkt); + break; + case RESPST_CHK_RKEY: + state = check_rkey(qp, pkt); + break; + case RESPST_EXECUTE: + state = execute(qp, pkt); + break; + case RESPST_COMPLETE: + state = do_complete(qp, pkt); + break; + case RESPST_READ_REPLY: + state = read_reply(qp, pkt); + break; + case RESPST_ACKNOWLEDGE: + state = acknowledge(qp, pkt); + break; + case RESPST_CLEANUP: + state = cleanup(qp, pkt); + break; + case RESPST_DUPLICATE_REQUEST: + state = duplicate_request(qp, pkt); + break; + case RESPST_ERR_PSN_OUT_OF_SEQ: + /* RC only - Class B. Drop packet. */ + send_ack(qp, pkt, AETH_NAK_PSN_SEQ_ERROR, qp->resp.psn); + state = RESPST_CLEANUP; + break; + + case RESPST_ERR_TOO_MANY_RDMA_ATM_REQ: + case RESPST_ERR_MISSING_OPCODE_FIRST: + case RESPST_ERR_MISSING_OPCODE_LAST_C: + case RESPST_ERR_UNSUPPORTED_OPCODE: + case RESPST_ERR_MISALIGNED_ATOMIC: + /* RC Only - Class C. */ + do_class_ac_error(qp, AETH_NAK_INVALID_REQ, + IB_WC_REM_INV_REQ_ERR); + state = RESPST_COMPLETE; + break; + + case RESPST_ERR_MISSING_OPCODE_LAST_D1E: + state = do_class_d1e_error(qp); + break; + case RESPST_ERR_RNR: + if (qp_type(qp) == IB_QPT_RC) { + /* RC - class B */ + send_ack(qp, pkt, AETH_RNR_NAK | + (~AETH_TYPE_MASK & + qp->attr.min_rnr_timer), + pkt->psn); + } else { + /* UD/UC - class D */ + qp->resp.drop_msg = 1; + } + state = RESPST_CLEANUP; + break; + + case RESPST_ERR_RKEY_VIOLATION: + if (qp_type(qp) == IB_QPT_RC) { + /* Class C */ + do_class_ac_error(qp, AETH_NAK_REM_ACC_ERR, + IB_WC_REM_ACCESS_ERR); + state = RESPST_COMPLETE; + } else { + qp->resp.drop_msg = 1; + if (qp->srq) { + /* UC/SRQ Class D */ + qp->resp.status = IB_WC_REM_ACCESS_ERR; + state = RESPST_COMPLETE; + } else { + /* UC/non-SRQ Class E. */ + state = RESPST_CLEANUP; + } + } + break; + + case RESPST_ERR_LENGTH: + if (qp_type(qp) == IB_QPT_RC) { + /* Class C */ + do_class_ac_error(qp, AETH_NAK_INVALID_REQ, + IB_WC_REM_INV_REQ_ERR); + state = RESPST_COMPLETE; + } else if (qp->srq) { + /* UC/UD - class E */ + qp->resp.status = IB_WC_REM_INV_REQ_ERR; + state = RESPST_COMPLETE; + } else { + /* UC/UD - class D */ + qp->resp.drop_msg = 1; + state = RESPST_CLEANUP; + } + break; + + case RESPST_ERR_MALFORMED_WQE: + /* All, Class A. */ + do_class_ac_error(qp, AETH_NAK_REM_OP_ERR, + IB_WC_LOC_QP_OP_ERR); + state = RESPST_COMPLETE; + break; + + case RESPST_ERR_CQ_OVERFLOW: + /* All - Class G */ + state = RESPST_ERROR; + break; + + case RESPST_DONE: + if (qp->resp.goto_error) { + state = RESPST_ERROR; + break; + } + + goto done; + + case RESPST_EXIT: + if (qp->resp.goto_error) { + state = RESPST_ERROR; + break; + } + + goto exit; + + case RESPST_RESET: { + struct sk_buff *skb; + + while ((skb = skb_dequeue(&qp->req_pkts))) { + rxe_drop_ref(qp); + kfree_skb(skb); + } + + while (!qp->srq && qp->rq.queue && + queue_head(qp->rq.queue)) + advance_consumer(qp->rq.queue); + + qp->resp.wqe = NULL; + goto exit; + } + + case RESPST_ERROR: + qp->resp.goto_error = 0; + pr_warn("qp#%d moved to error state\n", qp_num(qp)); + rxe_qp_error(qp); + goto exit; + + default: + WARN_ON(1); + } + } + +exit: + ret = -EAGAIN; +done: + return ret; +} diff --git a/drivers/infiniband/sw/rxe/rxe_srq.c b/drivers/infiniband/sw/rxe/rxe_srq.c new file mode 100644 index 000000000000..2a6e3cd2d4e8 --- /dev/null +++ b/drivers/infiniband/sw/rxe/rxe_srq.c @@ -0,0 +1,193 @@ +/* + * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. + * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "rxe.h" +#include "rxe_loc.h" +#include "rxe_queue.h" + +int rxe_srq_chk_attr(struct rxe_dev *rxe, struct rxe_srq *srq, + struct ib_srq_attr *attr, enum ib_srq_attr_mask mask) +{ + if (srq && srq->error) { + pr_warn("srq in error state\n"); + goto err1; + } + + if (mask & IB_SRQ_MAX_WR) { + if (attr->max_wr > rxe->attr.max_srq_wr) { + pr_warn("max_wr(%d) > max_srq_wr(%d)\n", + attr->max_wr, rxe->attr.max_srq_wr); + goto err1; + } + + if (attr->max_wr <= 0) { + pr_warn("max_wr(%d) <= 0\n", attr->max_wr); + goto err1; + } + + if (srq && srq->limit && (attr->max_wr < srq->limit)) { + pr_warn("max_wr (%d) < srq->limit (%d)\n", + attr->max_wr, srq->limit); + goto err1; + } + + if (attr->max_wr < RXE_MIN_SRQ_WR) + attr->max_wr = RXE_MIN_SRQ_WR; + } + + if (mask & IB_SRQ_LIMIT) { + if (attr->srq_limit > rxe->attr.max_srq_wr) { + pr_warn("srq_limit(%d) > max_srq_wr(%d)\n", + attr->srq_limit, rxe->attr.max_srq_wr); + goto err1; + } + + if (srq && (attr->srq_limit > srq->rq.queue->buf->index_mask)) { + pr_warn("srq_limit (%d) > cur limit(%d)\n", + attr->srq_limit, + srq->rq.queue->buf->index_mask); + goto err1; + } + } + + if (mask == IB_SRQ_INIT_MASK) { + if (attr->max_sge > rxe->attr.max_srq_sge) { + pr_warn("max_sge(%d) > max_srq_sge(%d)\n", + attr->max_sge, rxe->attr.max_srq_sge); + goto err1; + } + + if (attr->max_sge < RXE_MIN_SRQ_SGE) + attr->max_sge = RXE_MIN_SRQ_SGE; + } + + return 0; + +err1: + return -EINVAL; +} + +int rxe_srq_from_init(struct rxe_dev *rxe, struct rxe_srq *srq, + struct ib_srq_init_attr *init, + struct ib_ucontext *context, struct ib_udata *udata) +{ + int err; + int srq_wqe_size; + struct rxe_queue *q; + + srq->ibsrq.event_handler = init->event_handler; + srq->ibsrq.srq_context = init->srq_context; + srq->limit = init->attr.srq_limit; + srq->srq_num = srq->pelem.index; + srq->rq.max_wr = init->attr.max_wr; + srq->rq.max_sge = init->attr.max_sge; + + srq_wqe_size = rcv_wqe_size(srq->rq.max_sge); + + spin_lock_init(&srq->rq.producer_lock); + spin_lock_init(&srq->rq.consumer_lock); + + q = rxe_queue_init(rxe, &srq->rq.max_wr, + srq_wqe_size); + if (!q) { + pr_warn("unable to allocate queue for srq\n"); + return -ENOMEM; + } + + srq->rq.queue = q; + + err = do_mmap_info(rxe, udata, false, context, q->buf, + q->buf_size, &q->ip); + if (err) + return err; + + if (udata && udata->outlen >= sizeof(struct mminfo) + sizeof(u32)) { + if (copy_to_user(udata->outbuf + sizeof(struct mminfo), + &srq->srq_num, sizeof(u32))) + return -EFAULT; + } + return 0; +} + +int rxe_srq_from_attr(struct rxe_dev *rxe, struct rxe_srq *srq, + struct ib_srq_attr *attr, enum ib_srq_attr_mask mask, + struct ib_udata *udata) +{ + int err; + struct rxe_queue *q = srq->rq.queue; + struct mminfo mi = { .offset = 1, .size = 0}; + + if (mask & IB_SRQ_MAX_WR) { + /* Check that we can write the mminfo struct to user space */ + if (udata && udata->inlen >= sizeof(__u64)) { + __u64 mi_addr; + + /* Get address of user space mminfo struct */ + err = ib_copy_from_udata(&mi_addr, udata, + sizeof(mi_addr)); + if (err) + goto err1; + + udata->outbuf = (void __user *)(unsigned long)mi_addr; + udata->outlen = sizeof(mi); + + if (!access_ok(VERIFY_WRITE, + (void __user *)udata->outbuf, + udata->outlen)) { + err = -EFAULT; + goto err1; + } + } + + err = rxe_queue_resize(q, (unsigned int *)&attr->max_wr, + rcv_wqe_size(srq->rq.max_sge), + srq->rq.queue->ip ? + srq->rq.queue->ip->context : + NULL, + udata, &srq->rq.producer_lock, + &srq->rq.consumer_lock); + if (err) + goto err2; + } + + if (mask & IB_SRQ_LIMIT) + srq->limit = attr->srq_limit; + + return 0; + +err2: + rxe_queue_cleanup(q); + srq->rq.queue = NULL; +err1: + return err; +} diff --git a/drivers/infiniband/sw/rxe/rxe_sysfs.c b/drivers/infiniband/sw/rxe/rxe_sysfs.c new file mode 100644 index 000000000000..cf8e77800046 --- /dev/null +++ b/drivers/infiniband/sw/rxe/rxe_sysfs.c @@ -0,0 +1,157 @@ +/* + * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. + * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "rxe.h" +#include "rxe_net.h" + +/* Copy argument and remove trailing CR. Return the new length. */ +static int sanitize_arg(const char *val, char *intf, int intf_len) +{ + int len; + + if (!val) + return 0; + + /* Remove newline. */ + for (len = 0; len < intf_len - 1 && val[len] && val[len] != '\n'; len++) + intf[len] = val[len]; + intf[len] = 0; + + if (len == 0 || (val[len] != 0 && val[len] != '\n')) + return 0; + + return len; +} + +static void rxe_set_port_state(struct net_device *ndev) +{ + struct rxe_dev *rxe = net_to_rxe(ndev); + bool is_up = netif_running(ndev) && netif_carrier_ok(ndev); + + if (!rxe) + goto out; + + if (is_up) + rxe_port_up(rxe); + else + rxe_port_down(rxe); /* down for unknown state */ +out: + return; +} + +static int rxe_param_set_add(const char *val, const struct kernel_param *kp) +{ + int len; + int err = 0; + char intf[32]; + struct net_device *ndev = NULL; + struct rxe_dev *rxe; + + len = sanitize_arg(val, intf, sizeof(intf)); + if (!len) { + pr_err("rxe: add: invalid interface name\n"); + err = -EINVAL; + goto err; + } + + ndev = dev_get_by_name(&init_net, intf); + if (!ndev) { + pr_err("interface %s not found\n", intf); + err = -EINVAL; + goto err; + } + + if (net_to_rxe(ndev)) { + pr_err("rxe: already configured on %s\n", intf); + err = -EINVAL; + goto err; + } + + rxe = rxe_net_add(ndev); + if (!rxe) { + pr_err("rxe: failed to add %s\n", intf); + err = -EINVAL; + goto err; + } + + rxe_set_port_state(ndev); + pr_info("rxe: added %s to %s\n", rxe->ib_dev.name, intf); +err: + if (ndev) + dev_put(ndev); + return err; +} + +static int rxe_param_set_remove(const char *val, const struct kernel_param *kp) +{ + int len; + char intf[32]; + struct rxe_dev *rxe; + + len = sanitize_arg(val, intf, sizeof(intf)); + if (!len) { + pr_err("rxe: add: invalid interface name\n"); + return -EINVAL; + } + + if (strncmp("all", intf, len) == 0) { + pr_info("rxe_sys: remove all"); + rxe_remove_all(); + return 0; + } + + rxe = get_rxe_by_name(intf); + + if (!rxe) { + pr_err("rxe: not configured on %s\n", intf); + return -EINVAL; + } + + list_del(&rxe->list); + rxe_remove(rxe); + + return 0; +} + +static const struct kernel_param_ops rxe_add_ops = { + .set = rxe_param_set_add, +}; + +static const struct kernel_param_ops rxe_remove_ops = { + .set = rxe_param_set_remove, +}; + +module_param_cb(add, &rxe_add_ops, NULL, 0200); +MODULE_PARM_DESC(add, "Create RXE device over network interface"); +module_param_cb(remove, &rxe_remove_ops, NULL, 0200); +MODULE_PARM_DESC(remove, "Remove RXE device over network interface"); diff --git a/drivers/infiniband/sw/rxe/rxe_task.c b/drivers/infiniband/sw/rxe/rxe_task.c new file mode 100644 index 000000000000..1e19bf828a6e --- /dev/null +++ b/drivers/infiniband/sw/rxe/rxe_task.c @@ -0,0 +1,154 @@ +/* + * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. + * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/kernel.h> +#include <linux/interrupt.h> +#include <linux/hardirq.h> + +#include "rxe_task.h" + +int __rxe_do_task(struct rxe_task *task) + +{ + int ret; + + while ((ret = task->func(task->arg)) == 0) + ; + + task->ret = ret; + + return ret; +} + +/* + * this locking is due to a potential race where + * a second caller finds the task already running + * but looks just after the last call to func + */ +void rxe_do_task(unsigned long data) +{ + int cont; + int ret; + unsigned long flags; + struct rxe_task *task = (struct rxe_task *)data; + + spin_lock_irqsave(&task->state_lock, flags); + switch (task->state) { + case TASK_STATE_START: + task->state = TASK_STATE_BUSY; + spin_unlock_irqrestore(&task->state_lock, flags); + break; + + case TASK_STATE_BUSY: + task->state = TASK_STATE_ARMED; + /* fall through to */ + case TASK_STATE_ARMED: + spin_unlock_irqrestore(&task->state_lock, flags); + return; + + default: + spin_unlock_irqrestore(&task->state_lock, flags); + pr_warn("bad state = %d in rxe_do_task\n", task->state); + return; + } + + do { + cont = 0; + ret = task->func(task->arg); + + spin_lock_irqsave(&task->state_lock, flags); + switch (task->state) { + case TASK_STATE_BUSY: + if (ret) + task->state = TASK_STATE_START; + else + cont = 1; + break; + + /* soneone tried to run the task since the last time we called + * func, so we will call one more time regardless of the + * return value + */ + case TASK_STATE_ARMED: + task->state = TASK_STATE_BUSY; + cont = 1; + break; + + default: + pr_warn("bad state = %d in rxe_do_task\n", + task->state); + } + spin_unlock_irqrestore(&task->state_lock, flags); + } while (cont); + + task->ret = ret; +} + +int rxe_init_task(void *obj, struct rxe_task *task, + void *arg, int (*func)(void *), char *name) +{ + task->obj = obj; + task->arg = arg; + task->func = func; + snprintf(task->name, sizeof(task->name), "%s", name); + + tasklet_init(&task->tasklet, rxe_do_task, (unsigned long)task); + + task->state = TASK_STATE_START; + spin_lock_init(&task->state_lock); + + return 0; +} + +void rxe_cleanup_task(struct rxe_task *task) +{ + tasklet_kill(&task->tasklet); +} + +void rxe_run_task(struct rxe_task *task, int sched) +{ + if (sched) + tasklet_schedule(&task->tasklet); + else + rxe_do_task((unsigned long)task); +} + +void rxe_disable_task(struct rxe_task *task) +{ + tasklet_disable(&task->tasklet); +} + +void rxe_enable_task(struct rxe_task *task) +{ + tasklet_enable(&task->tasklet); +} diff --git a/drivers/infiniband/sw/rxe/rxe_task.h b/drivers/infiniband/sw/rxe/rxe_task.h new file mode 100644 index 000000000000..d14aa6daed05 --- /dev/null +++ b/drivers/infiniband/sw/rxe/rxe_task.h @@ -0,0 +1,95 @@ +/* + * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. + * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef RXE_TASK_H +#define RXE_TASK_H + +enum { + TASK_STATE_START = 0, + TASK_STATE_BUSY = 1, + TASK_STATE_ARMED = 2, +}; + +/* + * data structure to describe a 'task' which is a short + * function that returns 0 as long as it needs to be + * called again. + */ +struct rxe_task { + void *obj; + struct tasklet_struct tasklet; + int state; + spinlock_t state_lock; /* spinlock for task state */ + void *arg; + int (*func)(void *arg); + int ret; + char name[16]; +}; + +/* + * init rxe_task structure + * arg => parameter to pass to fcn + * fcn => function to call until it returns != 0 + */ +int rxe_init_task(void *obj, struct rxe_task *task, + void *arg, int (*func)(void *), char *name); + +/* cleanup task */ +void rxe_cleanup_task(struct rxe_task *task); + +/* + * raw call to func in loop without any checking + * can call when tasklets are disabled + */ +int __rxe_do_task(struct rxe_task *task); + +/* + * common function called by any of the main tasklets + * If there is any chance that there is additional + * work to do someone must reschedule the task before + * leaving + */ +void rxe_do_task(unsigned long data); + +/* run a task, else schedule it to run as a tasklet, The decision + * to run or schedule tasklet is based on the parameter sched. + */ +void rxe_run_task(struct rxe_task *task, int sched); + +/* keep a task from scheduling */ +void rxe_disable_task(struct rxe_task *task); + +/* allow task to run */ +void rxe_enable_task(struct rxe_task *task); + +#endif /* RXE_TASK_H */ diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c new file mode 100644 index 000000000000..4552be960c6a --- /dev/null +++ b/drivers/infiniband/sw/rxe/rxe_verbs.c @@ -0,0 +1,1330 @@ +/* + * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. + * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "rxe.h" +#include "rxe_loc.h" +#include "rxe_queue.h" + +static int rxe_query_device(struct ib_device *dev, + struct ib_device_attr *attr, + struct ib_udata *uhw) +{ + struct rxe_dev *rxe = to_rdev(dev); + + if (uhw->inlen || uhw->outlen) + return -EINVAL; + + *attr = rxe->attr; + return 0; +} + +static void rxe_eth_speed_to_ib_speed(int speed, u8 *active_speed, + u8 *active_width) +{ + if (speed <= 1000) { + *active_width = IB_WIDTH_1X; + *active_speed = IB_SPEED_SDR; + } else if (speed <= 10000) { + *active_width = IB_WIDTH_1X; + *active_speed = IB_SPEED_FDR10; + } else if (speed <= 20000) { + *active_width = IB_WIDTH_4X; + *active_speed = IB_SPEED_DDR; + } else if (speed <= 30000) { + *active_width = IB_WIDTH_4X; + *active_speed = IB_SPEED_QDR; + } else if (speed <= 40000) { + *active_width = IB_WIDTH_4X; + *active_speed = IB_SPEED_FDR10; + } else { + *active_width = IB_WIDTH_4X; + *active_speed = IB_SPEED_EDR; + } +} + +static int rxe_query_port(struct ib_device *dev, + u8 port_num, struct ib_port_attr *attr) +{ + struct rxe_dev *rxe = to_rdev(dev); + struct rxe_port *port; + u32 speed; + + if (unlikely(port_num != 1)) { + pr_warn("invalid port_number %d\n", port_num); + goto err1; + } + + port = &rxe->port; + + *attr = port->attr; + + mutex_lock(&rxe->usdev_lock); + if (rxe->ndev->ethtool_ops->get_link_ksettings) { + struct ethtool_link_ksettings ks; + + rxe->ndev->ethtool_ops->get_link_ksettings(rxe->ndev, &ks); + speed = ks.base.speed; + } else if (rxe->ndev->ethtool_ops->get_settings) { + struct ethtool_cmd cmd; + + rxe->ndev->ethtool_ops->get_settings(rxe->ndev, &cmd); + speed = cmd.speed; + } else { + pr_warn("%s speed is unknown, defaulting to 1000\n", rxe->ndev->name); + speed = 1000; + } + rxe_eth_speed_to_ib_speed(speed, &attr->active_speed, &attr->active_width); + mutex_unlock(&rxe->usdev_lock); + + return 0; + +err1: + return -EINVAL; +} + +static int rxe_query_gid(struct ib_device *device, + u8 port_num, int index, union ib_gid *gid) +{ + int ret; + + if (index > RXE_PORT_GID_TBL_LEN) + return -EINVAL; + + ret = ib_get_cached_gid(device, port_num, index, gid, NULL); + if (ret == -EAGAIN) { + memcpy(gid, &zgid, sizeof(*gid)); + return 0; + } + + return ret; +} + +static int rxe_add_gid(struct ib_device *device, u8 port_num, unsigned int + index, const union ib_gid *gid, + const struct ib_gid_attr *attr, void **context) +{ + if (index >= RXE_PORT_GID_TBL_LEN) + return -EINVAL; + return 0; +} + +static int rxe_del_gid(struct ib_device *device, u8 port_num, unsigned int + index, void **context) +{ + if (index >= RXE_PORT_GID_TBL_LEN) + return -EINVAL; + return 0; +} + +static struct net_device *rxe_get_netdev(struct ib_device *device, + u8 port_num) +{ + struct rxe_dev *rxe = to_rdev(device); + + if (rxe->ndev) { + dev_hold(rxe->ndev); + return rxe->ndev; + } + + return NULL; +} + +static int rxe_query_pkey(struct ib_device *device, + u8 port_num, u16 index, u16 *pkey) +{ + struct rxe_dev *rxe = to_rdev(device); + struct rxe_port *port; + + if (unlikely(port_num != 1)) { + dev_warn(device->dma_device, "invalid port_num = %d\n", + port_num); + goto err1; + } + + port = &rxe->port; + + if (unlikely(index >= port->attr.pkey_tbl_len)) { + dev_warn(device->dma_device, "invalid index = %d\n", + index); + goto err1; + } + + *pkey = port->pkey_tbl[index]; + return 0; + +err1: + return -EINVAL; +} + +static int rxe_modify_device(struct ib_device *dev, + int mask, struct ib_device_modify *attr) +{ + struct rxe_dev *rxe = to_rdev(dev); + + if (mask & IB_DEVICE_MODIFY_SYS_IMAGE_GUID) + rxe->attr.sys_image_guid = cpu_to_be64(attr->sys_image_guid); + + if (mask & IB_DEVICE_MODIFY_NODE_DESC) { + memcpy(rxe->ib_dev.node_desc, + attr->node_desc, sizeof(rxe->ib_dev.node_desc)); + } + + return 0; +} + +static int rxe_modify_port(struct ib_device *dev, + u8 port_num, int mask, struct ib_port_modify *attr) +{ + struct rxe_dev *rxe = to_rdev(dev); + struct rxe_port *port; + + if (unlikely(port_num != 1)) { + pr_warn("invalid port_num = %d\n", port_num); + goto err1; + } + + port = &rxe->port; + + port->attr.port_cap_flags |= attr->set_port_cap_mask; + port->attr.port_cap_flags &= ~attr->clr_port_cap_mask; + + if (mask & IB_PORT_RESET_QKEY_CNTR) + port->attr.qkey_viol_cntr = 0; + + return 0; + +err1: + return -EINVAL; +} + +static enum rdma_link_layer rxe_get_link_layer(struct ib_device *dev, + u8 port_num) +{ + struct rxe_dev *rxe = to_rdev(dev); + + return rxe->ifc_ops->link_layer(rxe, port_num); +} + +static struct ib_ucontext *rxe_alloc_ucontext(struct ib_device *dev, + struct ib_udata *udata) +{ + struct rxe_dev *rxe = to_rdev(dev); + struct rxe_ucontext *uc; + + uc = rxe_alloc(&rxe->uc_pool); + return uc ? &uc->ibuc : ERR_PTR(-ENOMEM); +} + +static int rxe_dealloc_ucontext(struct ib_ucontext *ibuc) +{ + struct rxe_ucontext *uc = to_ruc(ibuc); + + rxe_drop_ref(uc); + return 0; +} + +static int rxe_port_immutable(struct ib_device *dev, u8 port_num, + struct ib_port_immutable *immutable) +{ + int err; + struct ib_port_attr attr; + + err = rxe_query_port(dev, port_num, &attr); + if (err) + return err; + + immutable->pkey_tbl_len = attr.pkey_tbl_len; + immutable->gid_tbl_len = attr.gid_tbl_len; + immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP; + immutable->max_mad_size = IB_MGMT_MAD_SIZE; + + return 0; +} + +static struct ib_pd *rxe_alloc_pd(struct ib_device *dev, + struct ib_ucontext *context, + struct ib_udata *udata) +{ + struct rxe_dev *rxe = to_rdev(dev); + struct rxe_pd *pd; + + pd = rxe_alloc(&rxe->pd_pool); + return pd ? &pd->ibpd : ERR_PTR(-ENOMEM); +} + +static int rxe_dealloc_pd(struct ib_pd *ibpd) +{ + struct rxe_pd *pd = to_rpd(ibpd); + + rxe_drop_ref(pd); + return 0; +} + +static int rxe_init_av(struct rxe_dev *rxe, struct ib_ah_attr *attr, + struct rxe_av *av) +{ + int err; + union ib_gid sgid; + struct ib_gid_attr sgid_attr; + + err = ib_get_cached_gid(&rxe->ib_dev, attr->port_num, + attr->grh.sgid_index, &sgid, + &sgid_attr); + if (err) { + pr_err("Failed to query sgid. err = %d\n", err); + return err; + } + + err = rxe_av_from_attr(rxe, attr->port_num, av, attr); + if (!err) + err = rxe_av_fill_ip_info(rxe, av, attr, &sgid_attr, &sgid); + + if (sgid_attr.ndev) + dev_put(sgid_attr.ndev); + return err; +} + +static struct ib_ah *rxe_create_ah(struct ib_pd *ibpd, struct ib_ah_attr *attr) +{ + int err; + struct rxe_dev *rxe = to_rdev(ibpd->device); + struct rxe_pd *pd = to_rpd(ibpd); + struct rxe_ah *ah; + + err = rxe_av_chk_attr(rxe, attr); + if (err) + goto err1; + + ah = rxe_alloc(&rxe->ah_pool); + if (!ah) { + err = -ENOMEM; + goto err1; + } + + rxe_add_ref(pd); + ah->pd = pd; + + err = rxe_init_av(rxe, attr, &ah->av); + if (err) + goto err2; + + return &ah->ibah; + +err2: + rxe_drop_ref(pd); + rxe_drop_ref(ah); +err1: + return ERR_PTR(err); +} + +static int rxe_modify_ah(struct ib_ah *ibah, struct ib_ah_attr *attr) +{ + int err; + struct rxe_dev *rxe = to_rdev(ibah->device); + struct rxe_ah *ah = to_rah(ibah); + + err = rxe_av_chk_attr(rxe, attr); + if (err) + return err; + + err = rxe_init_av(rxe, attr, &ah->av); + if (err) + return err; + + return 0; +} + +static int rxe_query_ah(struct ib_ah *ibah, struct ib_ah_attr *attr) +{ + struct rxe_dev *rxe = to_rdev(ibah->device); + struct rxe_ah *ah = to_rah(ibah); + + rxe_av_to_attr(rxe, &ah->av, attr); + return 0; +} + +static int rxe_destroy_ah(struct ib_ah *ibah) +{ + struct rxe_ah *ah = to_rah(ibah); + + rxe_drop_ref(ah->pd); + rxe_drop_ref(ah); + return 0; +} + +static int post_one_recv(struct rxe_rq *rq, struct ib_recv_wr *ibwr) +{ + int err; + int i; + u32 length; + struct rxe_recv_wqe *recv_wqe; + int num_sge = ibwr->num_sge; + + if (unlikely(queue_full(rq->queue))) { + err = -ENOMEM; + goto err1; + } + + if (unlikely(num_sge > rq->max_sge)) { + err = -EINVAL; + goto err1; + } + + length = 0; + for (i = 0; i < num_sge; i++) + length += ibwr->sg_list[i].length; + + recv_wqe = producer_addr(rq->queue); + recv_wqe->wr_id = ibwr->wr_id; + recv_wqe->num_sge = num_sge; + + memcpy(recv_wqe->dma.sge, ibwr->sg_list, + num_sge * sizeof(struct ib_sge)); + + recv_wqe->dma.length = length; + recv_wqe->dma.resid = length; + recv_wqe->dma.num_sge = num_sge; + recv_wqe->dma.cur_sge = 0; + recv_wqe->dma.sge_offset = 0; + + /* make sure all changes to the work queue are written before we + * update the producer pointer + */ + smp_wmb(); + + advance_producer(rq->queue); + return 0; + +err1: + return err; +} + +static struct ib_srq *rxe_create_srq(struct ib_pd *ibpd, + struct ib_srq_init_attr *init, + struct ib_udata *udata) +{ + int err; + struct rxe_dev *rxe = to_rdev(ibpd->device); + struct rxe_pd *pd = to_rpd(ibpd); + struct rxe_srq *srq; + struct ib_ucontext *context = udata ? ibpd->uobject->context : NULL; + + err = rxe_srq_chk_attr(rxe, NULL, &init->attr, IB_SRQ_INIT_MASK); + if (err) + goto err1; + + srq = rxe_alloc(&rxe->srq_pool); + if (!srq) { + err = -ENOMEM; + goto err1; + } + + rxe_add_index(srq); + rxe_add_ref(pd); + srq->pd = pd; + + err = rxe_srq_from_init(rxe, srq, init, context, udata); + if (err) + goto err2; + + return &srq->ibsrq; + +err2: + rxe_drop_ref(pd); + rxe_drop_index(srq); + rxe_drop_ref(srq); +err1: + return ERR_PTR(err); +} + +static int rxe_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, + enum ib_srq_attr_mask mask, + struct ib_udata *udata) +{ + int err; + struct rxe_srq *srq = to_rsrq(ibsrq); + struct rxe_dev *rxe = to_rdev(ibsrq->device); + + err = rxe_srq_chk_attr(rxe, srq, attr, mask); + if (err) + goto err1; + + err = rxe_srq_from_attr(rxe, srq, attr, mask, udata); + if (err) + goto err1; + + return 0; + +err1: + return err; +} + +static int rxe_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr) +{ + struct rxe_srq *srq = to_rsrq(ibsrq); + + if (srq->error) + return -EINVAL; + + attr->max_wr = srq->rq.queue->buf->index_mask; + attr->max_sge = srq->rq.max_sge; + attr->srq_limit = srq->limit; + return 0; +} + +static int rxe_destroy_srq(struct ib_srq *ibsrq) +{ + struct rxe_srq *srq = to_rsrq(ibsrq); + + if (srq->rq.queue) + rxe_queue_cleanup(srq->rq.queue); + + rxe_drop_ref(srq->pd); + rxe_drop_index(srq); + rxe_drop_ref(srq); + + return 0; +} + +static int rxe_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr, + struct ib_recv_wr **bad_wr) +{ + int err = 0; + unsigned long flags; + struct rxe_srq *srq = to_rsrq(ibsrq); + + spin_lock_irqsave(&srq->rq.producer_lock, flags); + + while (wr) { + err = post_one_recv(&srq->rq, wr); + if (unlikely(err)) + break; + wr = wr->next; + } + + spin_unlock_irqrestore(&srq->rq.producer_lock, flags); + + if (err) + *bad_wr = wr; + + return err; +} + +static struct ib_qp *rxe_create_qp(struct ib_pd *ibpd, + struct ib_qp_init_attr *init, + struct ib_udata *udata) +{ + int err; + struct rxe_dev *rxe = to_rdev(ibpd->device); + struct rxe_pd *pd = to_rpd(ibpd); + struct rxe_qp *qp; + + err = rxe_qp_chk_init(rxe, init); + if (err) + goto err1; + + qp = rxe_alloc(&rxe->qp_pool); + if (!qp) { + err = -ENOMEM; + goto err1; + } + + if (udata) { + if (udata->inlen) { + err = -EINVAL; + goto err1; + } + qp->is_user = 1; + } + + rxe_add_index(qp); + + err = rxe_qp_from_init(rxe, qp, pd, init, udata, ibpd); + if (err) + goto err2; + + return &qp->ibqp; + +err2: + rxe_drop_index(qp); + rxe_drop_ref(qp); +err1: + return ERR_PTR(err); +} + +static int rxe_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, + int mask, struct ib_udata *udata) +{ + int err; + struct rxe_dev *rxe = to_rdev(ibqp->device); + struct rxe_qp *qp = to_rqp(ibqp); + + err = rxe_qp_chk_attr(rxe, qp, attr, mask); + if (err) + goto err1; + + err = rxe_qp_from_attr(qp, attr, mask, udata); + if (err) + goto err1; + + return 0; + +err1: + return err; +} + +static int rxe_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, + int mask, struct ib_qp_init_attr *init) +{ + struct rxe_qp *qp = to_rqp(ibqp); + + rxe_qp_to_init(qp, init); + rxe_qp_to_attr(qp, attr, mask); + + return 0; +} + +static int rxe_destroy_qp(struct ib_qp *ibqp) +{ + struct rxe_qp *qp = to_rqp(ibqp); + + rxe_qp_destroy(qp); + rxe_drop_index(qp); + rxe_drop_ref(qp); + return 0; +} + +static int validate_send_wr(struct rxe_qp *qp, struct ib_send_wr *ibwr, + unsigned int mask, unsigned int length) +{ + int num_sge = ibwr->num_sge; + struct rxe_sq *sq = &qp->sq; + + if (unlikely(num_sge > sq->max_sge)) + goto err1; + + if (unlikely(mask & WR_ATOMIC_MASK)) { + if (length < 8) + goto err1; + + if (atomic_wr(ibwr)->remote_addr & 0x7) + goto err1; + } + + if (unlikely((ibwr->send_flags & IB_SEND_INLINE) && + (length > sq->max_inline))) + goto err1; + + return 0; + +err1: + return -EINVAL; +} + +static void init_send_wr(struct rxe_qp *qp, struct rxe_send_wr *wr, + struct ib_send_wr *ibwr) +{ + wr->wr_id = ibwr->wr_id; + wr->num_sge = ibwr->num_sge; + wr->opcode = ibwr->opcode; + wr->send_flags = ibwr->send_flags; + + if (qp_type(qp) == IB_QPT_UD || + qp_type(qp) == IB_QPT_SMI || + qp_type(qp) == IB_QPT_GSI) { + wr->wr.ud.remote_qpn = ud_wr(ibwr)->remote_qpn; + wr->wr.ud.remote_qkey = ud_wr(ibwr)->remote_qkey; + if (qp_type(qp) == IB_QPT_GSI) + wr->wr.ud.pkey_index = ud_wr(ibwr)->pkey_index; + if (wr->opcode == IB_WR_SEND_WITH_IMM) + wr->ex.imm_data = ibwr->ex.imm_data; + } else { + switch (wr->opcode) { + case IB_WR_RDMA_WRITE_WITH_IMM: + wr->ex.imm_data = ibwr->ex.imm_data; + case IB_WR_RDMA_READ: + case IB_WR_RDMA_WRITE: + wr->wr.rdma.remote_addr = rdma_wr(ibwr)->remote_addr; + wr->wr.rdma.rkey = rdma_wr(ibwr)->rkey; + break; + case IB_WR_SEND_WITH_IMM: + wr->ex.imm_data = ibwr->ex.imm_data; + break; + case IB_WR_SEND_WITH_INV: + wr->ex.invalidate_rkey = ibwr->ex.invalidate_rkey; + break; + case IB_WR_ATOMIC_CMP_AND_SWP: + case IB_WR_ATOMIC_FETCH_AND_ADD: + wr->wr.atomic.remote_addr = + atomic_wr(ibwr)->remote_addr; + wr->wr.atomic.compare_add = + atomic_wr(ibwr)->compare_add; + wr->wr.atomic.swap = atomic_wr(ibwr)->swap; + wr->wr.atomic.rkey = atomic_wr(ibwr)->rkey; + break; + case IB_WR_LOCAL_INV: + wr->ex.invalidate_rkey = ibwr->ex.invalidate_rkey; + break; + case IB_WR_REG_MR: + wr->wr.reg.mr = reg_wr(ibwr)->mr; + wr->wr.reg.key = reg_wr(ibwr)->key; + wr->wr.reg.access = reg_wr(ibwr)->access; + break; + default: + break; + } + } +} + +static int init_send_wqe(struct rxe_qp *qp, struct ib_send_wr *ibwr, + unsigned int mask, unsigned int length, + struct rxe_send_wqe *wqe) +{ + int num_sge = ibwr->num_sge; + struct ib_sge *sge; + int i; + u8 *p; + + init_send_wr(qp, &wqe->wr, ibwr); + + if (qp_type(qp) == IB_QPT_UD || + qp_type(qp) == IB_QPT_SMI || + qp_type(qp) == IB_QPT_GSI) + memcpy(&wqe->av, &to_rah(ud_wr(ibwr)->ah)->av, sizeof(wqe->av)); + + if (unlikely(ibwr->send_flags & IB_SEND_INLINE)) { + p = wqe->dma.inline_data; + + sge = ibwr->sg_list; + for (i = 0; i < num_sge; i++, sge++) { + if (qp->is_user && copy_from_user(p, (__user void *) + (uintptr_t)sge->addr, sge->length)) + return -EFAULT; + + else if (!qp->is_user) + memcpy(p, (void *)(uintptr_t)sge->addr, + sge->length); + + p += sge->length; + } + } else if (mask & WR_REG_MASK) { + wqe->mask = mask; + wqe->state = wqe_state_posted; + return 0; + } else + memcpy(wqe->dma.sge, ibwr->sg_list, + num_sge * sizeof(struct ib_sge)); + + wqe->iova = (mask & WR_ATOMIC_MASK) ? + atomic_wr(ibwr)->remote_addr : + rdma_wr(ibwr)->remote_addr; + wqe->mask = mask; + wqe->dma.length = length; + wqe->dma.resid = length; + wqe->dma.num_sge = num_sge; + wqe->dma.cur_sge = 0; + wqe->dma.sge_offset = 0; + wqe->state = wqe_state_posted; + wqe->ssn = atomic_add_return(1, &qp->ssn); + + return 0; +} + +static int post_one_send(struct rxe_qp *qp, struct ib_send_wr *ibwr, + unsigned mask, u32 length) +{ + int err; + struct rxe_sq *sq = &qp->sq; + struct rxe_send_wqe *send_wqe; + unsigned long flags; + + err = validate_send_wr(qp, ibwr, mask, length); + if (err) + return err; + + spin_lock_irqsave(&qp->sq.sq_lock, flags); + + if (unlikely(queue_full(sq->queue))) { + err = -ENOMEM; + goto err1; + } + + send_wqe = producer_addr(sq->queue); + + err = init_send_wqe(qp, ibwr, mask, length, send_wqe); + if (unlikely(err)) + goto err1; + + /* + * make sure all changes to the work queue are + * written before we update the producer pointer + */ + smp_wmb(); + + advance_producer(sq->queue); + spin_unlock_irqrestore(&qp->sq.sq_lock, flags); + + return 0; + +err1: + spin_unlock_irqrestore(&qp->sq.sq_lock, flags); + return err; +} + +static int rxe_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, + struct ib_send_wr **bad_wr) +{ + int err = 0; + struct rxe_qp *qp = to_rqp(ibqp); + unsigned int mask; + unsigned int length = 0; + int i; + int must_sched; + + if (unlikely(!qp->valid)) { + *bad_wr = wr; + return -EINVAL; + } + + if (unlikely(qp->req.state < QP_STATE_READY)) { + *bad_wr = wr; + return -EINVAL; + } + + while (wr) { + mask = wr_opcode_mask(wr->opcode, qp); + if (unlikely(!mask)) { + err = -EINVAL; + *bad_wr = wr; + break; + } + + if (unlikely((wr->send_flags & IB_SEND_INLINE) && + !(mask & WR_INLINE_MASK))) { + err = -EINVAL; + *bad_wr = wr; + break; + } + + length = 0; + for (i = 0; i < wr->num_sge; i++) + length += wr->sg_list[i].length; + + err = post_one_send(qp, wr, mask, length); + + if (err) { + *bad_wr = wr; + break; + } + wr = wr->next; + } + + /* + * Must sched in case of GSI QP because ib_send_mad() hold irq lock, + * and the requester call ip_local_out_sk() that takes spin_lock_bh. + */ + must_sched = (qp_type(qp) == IB_QPT_GSI) || + (queue_count(qp->sq.queue) > 1); + + rxe_run_task(&qp->req.task, must_sched); + + return err; +} + +static int rxe_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, + struct ib_recv_wr **bad_wr) +{ + int err = 0; + struct rxe_qp *qp = to_rqp(ibqp); + struct rxe_rq *rq = &qp->rq; + unsigned long flags; + + if (unlikely((qp_state(qp) < IB_QPS_INIT) || !qp->valid)) { + *bad_wr = wr; + err = -EINVAL; + goto err1; + } + + if (unlikely(qp->srq)) { + *bad_wr = wr; + err = -EINVAL; + goto err1; + } + + spin_lock_irqsave(&rq->producer_lock, flags); + + while (wr) { + err = post_one_recv(rq, wr); + if (unlikely(err)) { + *bad_wr = wr; + break; + } + wr = wr->next; + } + + spin_unlock_irqrestore(&rq->producer_lock, flags); + +err1: + return err; +} + +static struct ib_cq *rxe_create_cq(struct ib_device *dev, + const struct ib_cq_init_attr *attr, + struct ib_ucontext *context, + struct ib_udata *udata) +{ + int err; + struct rxe_dev *rxe = to_rdev(dev); + struct rxe_cq *cq; + + if (attr->flags) + return ERR_PTR(-EINVAL); + + err = rxe_cq_chk_attr(rxe, NULL, attr->cqe, attr->comp_vector, udata); + if (err) + goto err1; + + cq = rxe_alloc(&rxe->cq_pool); + if (!cq) { + err = -ENOMEM; + goto err1; + } + + err = rxe_cq_from_init(rxe, cq, attr->cqe, attr->comp_vector, + context, udata); + if (err) + goto err2; + + return &cq->ibcq; + +err2: + rxe_drop_ref(cq); +err1: + return ERR_PTR(err); +} + +static int rxe_destroy_cq(struct ib_cq *ibcq) +{ + struct rxe_cq *cq = to_rcq(ibcq); + + rxe_drop_ref(cq); + return 0; +} + +static int rxe_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata) +{ + int err; + struct rxe_cq *cq = to_rcq(ibcq); + struct rxe_dev *rxe = to_rdev(ibcq->device); + + err = rxe_cq_chk_attr(rxe, cq, cqe, 0, udata); + if (err) + goto err1; + + err = rxe_cq_resize_queue(cq, cqe, udata); + if (err) + goto err1; + + return 0; + +err1: + return err; +} + +static int rxe_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc) +{ + int i; + struct rxe_cq *cq = to_rcq(ibcq); + struct rxe_cqe *cqe; + unsigned long flags; + + spin_lock_irqsave(&cq->cq_lock, flags); + for (i = 0; i < num_entries; i++) { + cqe = queue_head(cq->queue); + if (!cqe) + break; + + memcpy(wc++, &cqe->ibwc, sizeof(*wc)); + advance_consumer(cq->queue); + } + spin_unlock_irqrestore(&cq->cq_lock, flags); + + return i; +} + +static int rxe_peek_cq(struct ib_cq *ibcq, int wc_cnt) +{ + struct rxe_cq *cq = to_rcq(ibcq); + int count = queue_count(cq->queue); + + return (count > wc_cnt) ? wc_cnt : count; +} + +static int rxe_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags) +{ + struct rxe_cq *cq = to_rcq(ibcq); + + if (cq->notify != IB_CQ_NEXT_COMP) + cq->notify = flags & IB_CQ_SOLICITED_MASK; + + return 0; +} + +static struct ib_mr *rxe_get_dma_mr(struct ib_pd *ibpd, int access) +{ + struct rxe_dev *rxe = to_rdev(ibpd->device); + struct rxe_pd *pd = to_rpd(ibpd); + struct rxe_mem *mr; + int err; + + mr = rxe_alloc(&rxe->mr_pool); + if (!mr) { + err = -ENOMEM; + goto err1; + } + + rxe_add_index(mr); + + rxe_add_ref(pd); + + err = rxe_mem_init_dma(rxe, pd, access, mr); + if (err) + goto err2; + + return &mr->ibmr; + +err2: + rxe_drop_ref(pd); + rxe_drop_index(mr); + rxe_drop_ref(mr); +err1: + return ERR_PTR(err); +} + +static struct ib_mr *rxe_reg_user_mr(struct ib_pd *ibpd, + u64 start, + u64 length, + u64 iova, + int access, struct ib_udata *udata) +{ + int err; + struct rxe_dev *rxe = to_rdev(ibpd->device); + struct rxe_pd *pd = to_rpd(ibpd); + struct rxe_mem *mr; + + mr = rxe_alloc(&rxe->mr_pool); + if (!mr) { + err = -ENOMEM; + goto err2; + } + + rxe_add_index(mr); + + rxe_add_ref(pd); + + err = rxe_mem_init_user(rxe, pd, start, length, iova, + access, udata, mr); + if (err) + goto err3; + + return &mr->ibmr; + +err3: + rxe_drop_ref(pd); + rxe_drop_index(mr); + rxe_drop_ref(mr); +err2: + return ERR_PTR(err); +} + +static int rxe_dereg_mr(struct ib_mr *ibmr) +{ + struct rxe_mem *mr = to_rmr(ibmr); + + mr->state = RXE_MEM_STATE_ZOMBIE; + rxe_drop_ref(mr->pd); + rxe_drop_index(mr); + rxe_drop_ref(mr); + return 0; +} + +static struct ib_mr *rxe_alloc_mr(struct ib_pd *ibpd, + enum ib_mr_type mr_type, + u32 max_num_sg) +{ + struct rxe_dev *rxe = to_rdev(ibpd->device); + struct rxe_pd *pd = to_rpd(ibpd); + struct rxe_mem *mr; + int err; + + if (mr_type != IB_MR_TYPE_MEM_REG) + return ERR_PTR(-EINVAL); + + mr = rxe_alloc(&rxe->mr_pool); + if (!mr) { + err = -ENOMEM; + goto err1; + } + + rxe_add_index(mr); + + rxe_add_ref(pd); + + err = rxe_mem_init_fast(rxe, pd, max_num_sg, mr); + if (err) + goto err2; + + return &mr->ibmr; + +err2: + rxe_drop_ref(pd); + rxe_drop_index(mr); + rxe_drop_ref(mr); +err1: + return ERR_PTR(err); +} + +static int rxe_set_page(struct ib_mr *ibmr, u64 addr) +{ + struct rxe_mem *mr = to_rmr(ibmr); + struct rxe_map *map; + struct rxe_phys_buf *buf; + + if (unlikely(mr->nbuf == mr->num_buf)) + return -ENOMEM; + + map = mr->map[mr->nbuf / RXE_BUF_PER_MAP]; + buf = &map->buf[mr->nbuf % RXE_BUF_PER_MAP]; + + buf->addr = addr; + buf->size = ibmr->page_size; + mr->nbuf++; + + return 0; +} + +static int rxe_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents, + unsigned int *sg_offset) +{ + struct rxe_mem *mr = to_rmr(ibmr); + int n; + + mr->nbuf = 0; + + n = ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, rxe_set_page); + + mr->va = ibmr->iova; + mr->iova = ibmr->iova; + mr->length = ibmr->length; + mr->page_shift = ilog2(ibmr->page_size); + mr->page_mask = ibmr->page_size - 1; + mr->offset = mr->iova & mr->page_mask; + + return n; +} + +static int rxe_attach_mcast(struct ib_qp *ibqp, union ib_gid *mgid, u16 mlid) +{ + int err; + struct rxe_dev *rxe = to_rdev(ibqp->device); + struct rxe_qp *qp = to_rqp(ibqp); + struct rxe_mc_grp *grp; + + /* takes a ref on grp if successful */ + err = rxe_mcast_get_grp(rxe, mgid, &grp); + if (err) + return err; + + err = rxe_mcast_add_grp_elem(rxe, qp, grp); + + rxe_drop_ref(grp); + return err; +} + +static int rxe_detach_mcast(struct ib_qp *ibqp, union ib_gid *mgid, u16 mlid) +{ + struct rxe_dev *rxe = to_rdev(ibqp->device); + struct rxe_qp *qp = to_rqp(ibqp); + + return rxe_mcast_drop_grp_elem(rxe, qp, mgid); +} + +static ssize_t rxe_show_parent(struct device *device, + struct device_attribute *attr, char *buf) +{ + struct rxe_dev *rxe = container_of(device, struct rxe_dev, + ib_dev.dev); + char *name; + + name = rxe->ifc_ops->parent_name(rxe, 1); + return snprintf(buf, 16, "%s\n", name); +} + +static DEVICE_ATTR(parent, S_IRUGO, rxe_show_parent, NULL); + +static struct device_attribute *rxe_dev_attributes[] = { + &dev_attr_parent, +}; + +int rxe_register_device(struct rxe_dev *rxe) +{ + int err; + int i; + struct ib_device *dev = &rxe->ib_dev; + + strlcpy(dev->name, "rxe%d", IB_DEVICE_NAME_MAX); + strlcpy(dev->node_desc, "rxe", sizeof(dev->node_desc)); + + dev->owner = THIS_MODULE; + dev->node_type = RDMA_NODE_IB_CA; + dev->phys_port_cnt = 1; + dev->num_comp_vectors = RXE_NUM_COMP_VECTORS; + dev->dma_device = rxe->ifc_ops->dma_device(rxe); + dev->local_dma_lkey = 0; + dev->node_guid = rxe->ifc_ops->node_guid(rxe); + dev->dma_ops = &rxe_dma_mapping_ops; + + dev->uverbs_abi_ver = RXE_UVERBS_ABI_VERSION; + dev->uverbs_cmd_mask = BIT_ULL(IB_USER_VERBS_CMD_GET_CONTEXT) + | BIT_ULL(IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) + | BIT_ULL(IB_USER_VERBS_CMD_QUERY_DEVICE) + | BIT_ULL(IB_USER_VERBS_CMD_QUERY_PORT) + | BIT_ULL(IB_USER_VERBS_CMD_ALLOC_PD) + | BIT_ULL(IB_USER_VERBS_CMD_DEALLOC_PD) + | BIT_ULL(IB_USER_VERBS_CMD_CREATE_SRQ) + | BIT_ULL(IB_USER_VERBS_CMD_MODIFY_SRQ) + | BIT_ULL(IB_USER_VERBS_CMD_QUERY_SRQ) + | BIT_ULL(IB_USER_VERBS_CMD_DESTROY_SRQ) + | BIT_ULL(IB_USER_VERBS_CMD_POST_SRQ_RECV) + | BIT_ULL(IB_USER_VERBS_CMD_CREATE_QP) + | BIT_ULL(IB_USER_VERBS_CMD_MODIFY_QP) + | BIT_ULL(IB_USER_VERBS_CMD_QUERY_QP) + | BIT_ULL(IB_USER_VERBS_CMD_DESTROY_QP) + | BIT_ULL(IB_USER_VERBS_CMD_POST_SEND) + | BIT_ULL(IB_USER_VERBS_CMD_POST_RECV) + | BIT_ULL(IB_USER_VERBS_CMD_CREATE_CQ) + | BIT_ULL(IB_USER_VERBS_CMD_RESIZE_CQ) + | BIT_ULL(IB_USER_VERBS_CMD_DESTROY_CQ) + | BIT_ULL(IB_USER_VERBS_CMD_POLL_CQ) + | BIT_ULL(IB_USER_VERBS_CMD_PEEK_CQ) + | BIT_ULL(IB_USER_VERBS_CMD_REQ_NOTIFY_CQ) + | BIT_ULL(IB_USER_VERBS_CMD_REG_MR) + | BIT_ULL(IB_USER_VERBS_CMD_DEREG_MR) + | BIT_ULL(IB_USER_VERBS_CMD_CREATE_AH) + | BIT_ULL(IB_USER_VERBS_CMD_MODIFY_AH) + | BIT_ULL(IB_USER_VERBS_CMD_QUERY_AH) + | BIT_ULL(IB_USER_VERBS_CMD_DESTROY_AH) + | BIT_ULL(IB_USER_VERBS_CMD_ATTACH_MCAST) + | BIT_ULL(IB_USER_VERBS_CMD_DETACH_MCAST) + ; + + dev->query_device = rxe_query_device; + dev->modify_device = rxe_modify_device; + dev->query_port = rxe_query_port; + dev->modify_port = rxe_modify_port; + dev->get_link_layer = rxe_get_link_layer; + dev->query_gid = rxe_query_gid; + dev->get_netdev = rxe_get_netdev; + dev->add_gid = rxe_add_gid; + dev->del_gid = rxe_del_gid; + dev->query_pkey = rxe_query_pkey; + dev->alloc_ucontext = rxe_alloc_ucontext; + dev->dealloc_ucontext = rxe_dealloc_ucontext; + dev->mmap = rxe_mmap; + dev->get_port_immutable = rxe_port_immutable; + dev->alloc_pd = rxe_alloc_pd; + dev->dealloc_pd = rxe_dealloc_pd; + dev->create_ah = rxe_create_ah; + dev->modify_ah = rxe_modify_ah; + dev->query_ah = rxe_query_ah; + dev->destroy_ah = rxe_destroy_ah; + dev->create_srq = rxe_create_srq; + dev->modify_srq = rxe_modify_srq; + dev->query_srq = rxe_query_srq; + dev->destroy_srq = rxe_destroy_srq; + dev->post_srq_recv = rxe_post_srq_recv; + dev->create_qp = rxe_create_qp; + dev->modify_qp = rxe_modify_qp; + dev->query_qp = rxe_query_qp; + dev->destroy_qp = rxe_destroy_qp; + dev->post_send = rxe_post_send; + dev->post_recv = rxe_post_recv; + dev->create_cq = rxe_create_cq; + dev->destroy_cq = rxe_destroy_cq; + dev->resize_cq = rxe_resize_cq; + dev->poll_cq = rxe_poll_cq; + dev->peek_cq = rxe_peek_cq; + dev->req_notify_cq = rxe_req_notify_cq; + dev->get_dma_mr = rxe_get_dma_mr; + dev->reg_user_mr = rxe_reg_user_mr; + dev->dereg_mr = rxe_dereg_mr; + dev->alloc_mr = rxe_alloc_mr; + dev->map_mr_sg = rxe_map_mr_sg; + dev->attach_mcast = rxe_attach_mcast; + dev->detach_mcast = rxe_detach_mcast; + + err = ib_register_device(dev, NULL); + if (err) { + pr_warn("rxe_register_device failed, err = %d\n", err); + goto err1; + } + + for (i = 0; i < ARRAY_SIZE(rxe_dev_attributes); ++i) { + err = device_create_file(&dev->dev, rxe_dev_attributes[i]); + if (err) { + pr_warn("device_create_file failed, i = %d, err = %d\n", + i, err); + goto err2; + } + } + + return 0; + +err2: + ib_unregister_device(dev); +err1: + return err; +} + +int rxe_unregister_device(struct rxe_dev *rxe) +{ + int i; + struct ib_device *dev = &rxe->ib_dev; + + for (i = 0; i < ARRAY_SIZE(rxe_dev_attributes); ++i) + device_remove_file(&dev->dev, rxe_dev_attributes[i]); + + ib_unregister_device(dev); + + return 0; +} diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.h b/drivers/infiniband/sw/rxe/rxe_verbs.h new file mode 100644 index 000000000000..cac1d52a08f0 --- /dev/null +++ b/drivers/infiniband/sw/rxe/rxe_verbs.h @@ -0,0 +1,480 @@ +/* + * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. + * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef RXE_VERBS_H +#define RXE_VERBS_H + +#include <linux/interrupt.h> +#include <rdma/rdma_user_rxe.h> +#include "rxe_pool.h" +#include "rxe_task.h" + +static inline int pkey_match(u16 key1, u16 key2) +{ + return (((key1 & 0x7fff) != 0) && + ((key1 & 0x7fff) == (key2 & 0x7fff)) && + ((key1 & 0x8000) || (key2 & 0x8000))) ? 1 : 0; +} + +/* Return >0 if psn_a > psn_b + * 0 if psn_a == psn_b + * <0 if psn_a < psn_b + */ +static inline int psn_compare(u32 psn_a, u32 psn_b) +{ + s32 diff; + + diff = (psn_a - psn_b) << 8; + return diff; +} + +struct rxe_ucontext { + struct rxe_pool_entry pelem; + struct ib_ucontext ibuc; +}; + +struct rxe_pd { + struct rxe_pool_entry pelem; + struct ib_pd ibpd; +}; + +struct rxe_ah { + struct rxe_pool_entry pelem; + struct ib_ah ibah; + struct rxe_pd *pd; + struct rxe_av av; +}; + +struct rxe_cqe { + union { + struct ib_wc ibwc; + struct ib_uverbs_wc uibwc; + }; +}; + +struct rxe_cq { + struct rxe_pool_entry pelem; + struct ib_cq ibcq; + struct rxe_queue *queue; + spinlock_t cq_lock; + u8 notify; + int is_user; + struct tasklet_struct comp_task; +}; + +enum wqe_state { + wqe_state_posted, + wqe_state_processing, + wqe_state_pending, + wqe_state_done, + wqe_state_error, +}; + +struct rxe_sq { + int max_wr; + int max_sge; + int max_inline; + spinlock_t sq_lock; /* guard queue */ + struct rxe_queue *queue; +}; + +struct rxe_rq { + int max_wr; + int max_sge; + spinlock_t producer_lock; /* guard queue producer */ + spinlock_t consumer_lock; /* guard queue consumer */ + struct rxe_queue *queue; +}; + +struct rxe_srq { + struct rxe_pool_entry pelem; + struct ib_srq ibsrq; + struct rxe_pd *pd; + struct rxe_rq rq; + u32 srq_num; + + int limit; + int error; +}; + +enum rxe_qp_state { + QP_STATE_RESET, + QP_STATE_INIT, + QP_STATE_READY, + QP_STATE_DRAIN, /* req only */ + QP_STATE_DRAINED, /* req only */ + QP_STATE_ERROR +}; + +extern char *rxe_qp_state_name[]; + +struct rxe_req_info { + enum rxe_qp_state state; + int wqe_index; + u32 psn; + int opcode; + atomic_t rd_atomic; + int wait_fence; + int need_rd_atomic; + int wait_psn; + int need_retry; + int noack_pkts; + struct rxe_task task; +}; + +struct rxe_comp_info { + u32 psn; + int opcode; + int timeout; + int timeout_retry; + u32 retry_cnt; + u32 rnr_retry; + struct rxe_task task; +}; + +enum rdatm_res_state { + rdatm_res_state_next, + rdatm_res_state_new, + rdatm_res_state_replay, +}; + +struct resp_res { + int type; + u32 first_psn; + u32 last_psn; + u32 cur_psn; + enum rdatm_res_state state; + + union { + struct { + struct sk_buff *skb; + } atomic; + struct { + struct rxe_mem *mr; + u64 va_org; + u32 rkey; + u32 length; + u64 va; + u32 resid; + } read; + }; +}; + +struct rxe_resp_info { + enum rxe_qp_state state; + u32 msn; + u32 psn; + int opcode; + int drop_msg; + int goto_error; + int sent_psn_nak; + enum ib_wc_status status; + u8 aeth_syndrome; + + /* Receive only */ + struct rxe_recv_wqe *wqe; + + /* RDMA read / atomic only */ + u64 va; + struct rxe_mem *mr; + u32 resid; + u32 rkey; + u64 atomic_orig; + + /* SRQ only */ + struct { + struct rxe_recv_wqe wqe; + struct ib_sge sge[RXE_MAX_SGE]; + } srq_wqe; + + /* Responder resources. It's a circular list where the oldest + * resource is dropped first. + */ + struct resp_res *resources; + unsigned int res_head; + unsigned int res_tail; + struct resp_res *res; + struct rxe_task task; +}; + +struct rxe_qp { + struct rxe_pool_entry pelem; + struct ib_qp ibqp; + struct ib_qp_attr attr; + unsigned int valid; + unsigned int mtu; + int is_user; + + struct rxe_pd *pd; + struct rxe_srq *srq; + struct rxe_cq *scq; + struct rxe_cq *rcq; + + enum ib_sig_type sq_sig_type; + + struct rxe_sq sq; + struct rxe_rq rq; + + struct socket *sk; + + struct rxe_av pri_av; + struct rxe_av alt_av; + + /* list of mcast groups qp has joined (for cleanup) */ + struct list_head grp_list; + spinlock_t grp_lock; /* guard grp_list */ + + struct sk_buff_head req_pkts; + struct sk_buff_head resp_pkts; + struct sk_buff_head send_pkts; + + struct rxe_req_info req; + struct rxe_comp_info comp; + struct rxe_resp_info resp; + + atomic_t ssn; + atomic_t skb_out; + int need_req_skb; + + /* Timer for retranmitting packet when ACKs have been lost. RC + * only. The requester sets it when it is not already + * started. The responder resets it whenever an ack is + * received. + */ + struct timer_list retrans_timer; + u64 qp_timeout_jiffies; + + /* Timer for handling RNR NAKS. */ + struct timer_list rnr_nak_timer; + + spinlock_t state_lock; /* guard requester and completer */ +}; + +enum rxe_mem_state { + RXE_MEM_STATE_ZOMBIE, + RXE_MEM_STATE_INVALID, + RXE_MEM_STATE_FREE, + RXE_MEM_STATE_VALID, +}; + +enum rxe_mem_type { + RXE_MEM_TYPE_NONE, + RXE_MEM_TYPE_DMA, + RXE_MEM_TYPE_MR, + RXE_MEM_TYPE_FMR, + RXE_MEM_TYPE_MW, +}; + +#define RXE_BUF_PER_MAP (PAGE_SIZE / sizeof(struct rxe_phys_buf)) + +struct rxe_phys_buf { + u64 addr; + u64 size; +}; + +struct rxe_map { + struct rxe_phys_buf buf[RXE_BUF_PER_MAP]; +}; + +struct rxe_mem { + struct rxe_pool_entry pelem; + union { + struct ib_mr ibmr; + struct ib_mw ibmw; + }; + + struct rxe_pd *pd; + struct ib_umem *umem; + + u32 lkey; + u32 rkey; + + enum rxe_mem_state state; + enum rxe_mem_type type; + u64 va; + u64 iova; + size_t length; + u32 offset; + int access; + + int page_shift; + int page_mask; + int map_shift; + int map_mask; + + u32 num_buf; + u32 nbuf; + + u32 max_buf; + u32 num_map; + + struct rxe_map **map; +}; + +struct rxe_mc_grp { + struct rxe_pool_entry pelem; + spinlock_t mcg_lock; /* guard group */ + struct rxe_dev *rxe; + struct list_head qp_list; + union ib_gid mgid; + int num_qp; + u32 qkey; + u16 pkey; +}; + +struct rxe_mc_elem { + struct rxe_pool_entry pelem; + struct list_head qp_list; + struct list_head grp_list; + struct rxe_qp *qp; + struct rxe_mc_grp *grp; +}; + +struct rxe_port { + struct ib_port_attr attr; + u16 *pkey_tbl; + __be64 port_guid; + __be64 subnet_prefix; + spinlock_t port_lock; /* guard port */ + unsigned int mtu_cap; + /* special QPs */ + u32 qp_smi_index; + u32 qp_gsi_index; +}; + +/* callbacks from rdma_rxe to network interface layer */ +struct rxe_ifc_ops { + void (*release)(struct rxe_dev *rxe); + __be64 (*node_guid)(struct rxe_dev *rxe); + __be64 (*port_guid)(struct rxe_dev *rxe); + struct device *(*dma_device)(struct rxe_dev *rxe); + int (*mcast_add)(struct rxe_dev *rxe, union ib_gid *mgid); + int (*mcast_delete)(struct rxe_dev *rxe, union ib_gid *mgid); + int (*prepare)(struct rxe_dev *rxe, struct rxe_pkt_info *pkt, + struct sk_buff *skb, u32 *crc); + int (*send)(struct rxe_dev *rxe, struct rxe_pkt_info *pkt, + struct sk_buff *skb); + int (*loopback)(struct sk_buff *skb); + struct sk_buff *(*init_packet)(struct rxe_dev *rxe, struct rxe_av *av, + int paylen, struct rxe_pkt_info *pkt); + char *(*parent_name)(struct rxe_dev *rxe, unsigned int port_num); + enum rdma_link_layer (*link_layer)(struct rxe_dev *rxe, + unsigned int port_num); +}; + +struct rxe_dev { + struct ib_device ib_dev; + struct ib_device_attr attr; + int max_ucontext; + int max_inline_data; + struct kref ref_cnt; + struct mutex usdev_lock; + + struct rxe_ifc_ops *ifc_ops; + + struct net_device *ndev; + + int xmit_errors; + + struct rxe_pool uc_pool; + struct rxe_pool pd_pool; + struct rxe_pool ah_pool; + struct rxe_pool srq_pool; + struct rxe_pool qp_pool; + struct rxe_pool cq_pool; + struct rxe_pool mr_pool; + struct rxe_pool mw_pool; + struct rxe_pool mc_grp_pool; + struct rxe_pool mc_elem_pool; + + spinlock_t pending_lock; /* guard pending_mmaps */ + struct list_head pending_mmaps; + + spinlock_t mmap_offset_lock; /* guard mmap_offset */ + int mmap_offset; + + struct rxe_port port; + struct list_head list; +}; + +static inline struct rxe_dev *to_rdev(struct ib_device *dev) +{ + return dev ? container_of(dev, struct rxe_dev, ib_dev) : NULL; +} + +static inline struct rxe_ucontext *to_ruc(struct ib_ucontext *uc) +{ + return uc ? container_of(uc, struct rxe_ucontext, ibuc) : NULL; +} + +static inline struct rxe_pd *to_rpd(struct ib_pd *pd) +{ + return pd ? container_of(pd, struct rxe_pd, ibpd) : NULL; +} + +static inline struct rxe_ah *to_rah(struct ib_ah *ah) +{ + return ah ? container_of(ah, struct rxe_ah, ibah) : NULL; +} + +static inline struct rxe_srq *to_rsrq(struct ib_srq *srq) +{ + return srq ? container_of(srq, struct rxe_srq, ibsrq) : NULL; +} + +static inline struct rxe_qp *to_rqp(struct ib_qp *qp) +{ + return qp ? container_of(qp, struct rxe_qp, ibqp) : NULL; +} + +static inline struct rxe_cq *to_rcq(struct ib_cq *cq) +{ + return cq ? container_of(cq, struct rxe_cq, ibcq) : NULL; +} + +static inline struct rxe_mem *to_rmr(struct ib_mr *mr) +{ + return mr ? container_of(mr, struct rxe_mem, ibmr) : NULL; +} + +static inline struct rxe_mem *to_rmw(struct ib_mw *mw) +{ + return mw ? container_of(mw, struct rxe_mem, ibmw) : NULL; +} + +int rxe_register_device(struct rxe_dev *rxe); +int rxe_unregister_device(struct rxe_dev *rxe); + +void rxe_mc_cleanup(void *arg); + +#endif /* RXE_VERBS_H */ diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c b/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c index 1502199c8e56..7b6d40ff1acf 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c @@ -62,10 +62,8 @@ static void ipoib_get_drvinfo(struct net_device *netdev, { struct ipoib_dev_priv *priv = netdev_priv(netdev); - snprintf(drvinfo->fw_version, sizeof(drvinfo->fw_version), - "%d.%d.%d", (int)(priv->ca->attrs.fw_ver >> 32), - (int)(priv->ca->attrs.fw_ver >> 16) & 0xffff, - (int)priv->ca->attrs.fw_ver & 0xffff); + ib_get_device_fw_str(priv->ca, drvinfo->fw_version, + sizeof(drvinfo->fw_version)); strlcpy(drvinfo->bus_info, dev_name(priv->ca->dma_device), sizeof(drvinfo->bus_info)); diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index 5f58c41ef787..74bcaa064226 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -1967,8 +1967,7 @@ int ipoib_set_dev_features(struct ipoib_dev_priv *priv, struct ib_device *hca) priv->hca_caps = hca->attrs.device_cap_flags; if (priv->hca_caps & IB_DEVICE_UD_IP_CSUM) { - priv->dev->hw_features = NETIF_F_SG | - NETIF_F_IP_CSUM | NETIF_F_RXCSUM; + priv->dev->hw_features = NETIF_F_IP_CSUM | NETIF_F_RXCSUM; if (priv->hca_caps & IB_DEVICE_UD_TSO) priv->dev->hw_features |= NETIF_F_TSO; diff --git a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c index 1e7cbbaa15bd..c55ecb2c3736 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c @@ -135,7 +135,8 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca) .cap = { .max_send_wr = ipoib_sendq_size, .max_recv_wr = ipoib_recvq_size, - .max_send_sge = 1, + .max_send_sge = min_t(u32, priv->ca->attrs.max_sge, + MAX_SKB_FRAGS + 1), .max_recv_sge = IPOIB_UD_RX_SG }, .sq_sig_type = IB_SIGNAL_ALL_WR, @@ -205,10 +206,6 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca) if (priv->hca_caps & IB_DEVICE_MANAGED_FLOW_STEERING) init_attr.create_flags |= IB_QP_CREATE_NETIF_QP; - if (dev->features & NETIF_F_SG) - init_attr.cap.max_send_sge = - min_t(u32, priv->ca->attrs.max_sge, MAX_SKB_FRAGS + 1); - priv->qp = ib_create_qp(priv->pd, &init_attr); if (IS_ERR(priv->qp)) { printk(KERN_WARNING "%s: failed to create QP\n", ca->name); @@ -234,6 +231,9 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca) priv->rx_wr.next = NULL; priv->rx_wr.sg_list = priv->rx_sge; + if (init_attr.cap.max_send_sge > 1) + dev->features |= NETIF_F_SG; + priv->max_send_sge = init_attr.cap.max_send_sge; return 0; diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c index a990c04208c9..ba6be060a476 100644 --- a/drivers/infiniband/ulp/isert/ib_isert.c +++ b/drivers/infiniband/ulp/isert/ib_isert.c @@ -137,8 +137,6 @@ isert_create_qp(struct isert_conn *isert_conn, attr.cap.max_recv_wr = ISERT_QP_MAX_RECV_DTOS + 1; attr.cap.max_rdma_ctxs = ISCSI_DEF_XMIT_CMDS_MAX; attr.cap.max_send_sge = device->ib_device->attrs.max_sge; - isert_conn->max_sge = min(device->ib_device->attrs.max_sge, - device->ib_device->attrs.max_sge_rd); attr.cap.max_recv_sge = 1; attr.sq_sig_type = IB_SIGNAL_REQ_WR; attr.qp_type = IB_QPT_RC; diff --git a/drivers/infiniband/ulp/isert/ib_isert.h b/drivers/infiniband/ulp/isert/ib_isert.h index e512ba941f2f..fc791efe3a10 100644 --- a/drivers/infiniband/ulp/isert/ib_isert.h +++ b/drivers/infiniband/ulp/isert/ib_isert.h @@ -138,7 +138,6 @@ struct isert_conn { u32 responder_resources; u32 initiator_depth; bool pi_support; - u32 max_sge; struct iser_rx_desc *login_req_buf; char *login_rsp_buf; u64 login_req_dma; diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c index 4a4155640d51..dfa23b075a88 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.c +++ b/drivers/infiniband/ulp/srpt/ib_srpt.c @@ -1601,6 +1601,7 @@ static int srpt_create_ch_ib(struct srpt_rdma_ch *ch) struct ib_qp_init_attr *qp_init; struct srpt_port *sport = ch->sport; struct srpt_device *sdev = sport->sdev; + const struct ib_device_attr *attrs = &sdev->device->attrs; u32 srp_sq_size = sport->port_attrib.srp_sq_size; int ret; @@ -1638,7 +1639,7 @@ retry: */ qp_init->cap.max_send_wr = srp_sq_size / 2; qp_init->cap.max_rdma_ctxs = srp_sq_size / 2; - qp_init->cap.max_send_sge = SRPT_DEF_SG_PER_WQE; + qp_init->cap.max_send_sge = min(attrs->max_sge, SRPT_MAX_SG_PER_WQE); qp_init->port_num = ch->sport->port; ch->qp = ib_create_qp(sdev->pd, qp_init); @@ -2261,7 +2262,7 @@ static void srpt_queue_response(struct se_cmd *cmd) container_of(cmd, struct srpt_send_ioctx, cmd); struct srpt_rdma_ch *ch = ioctx->ch; struct srpt_device *sdev = ch->sport->sdev; - struct ib_send_wr send_wr, *first_wr = NULL, *bad_wr; + struct ib_send_wr send_wr, *first_wr = &send_wr, *bad_wr; struct ib_sge sge; enum srpt_command_state state; unsigned long flags; @@ -2302,11 +2303,8 @@ static void srpt_queue_response(struct se_cmd *cmd) struct srpt_rw_ctx *ctx = &ioctx->rw_ctxs[i]; first_wr = rdma_rw_ctx_wrs(&ctx->rw, ch->qp, - ch->sport->port, NULL, - first_wr ? first_wr : &send_wr); + ch->sport->port, NULL, first_wr); } - } else { - first_wr = &send_wr; } if (state != SRPT_STATE_MGMT) diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.h b/drivers/infiniband/ulp/srpt/ib_srpt.h index 389030487da7..581878782854 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.h +++ b/drivers/infiniband/ulp/srpt/ib_srpt.h @@ -106,7 +106,11 @@ enum { SRP_LOGIN_RSP_MULTICHAN_MAINTAINED = 0x2, SRPT_DEF_SG_TABLESIZE = 128, - SRPT_DEF_SG_PER_WQE = 16, + /* + * An experimentally determined value that avoids that QP creation + * fails due to "swiotlb buffer is full" on systems using the swiotlb. + */ + SRPT_MAX_SG_PER_WQE = 16, MIN_SRPT_SQ_SIZE = 16, DEF_SRPT_SQ_SIZE = 4096, diff --git a/drivers/input/joystick/xpad.c b/drivers/input/joystick/xpad.c index a529a4535457..83af17ad0f1f 100644 --- a/drivers/input/joystick/xpad.c +++ b/drivers/input/joystick/xpad.c @@ -115,6 +115,10 @@ static bool sticks_to_null; module_param(sticks_to_null, bool, S_IRUGO); MODULE_PARM_DESC(sticks_to_null, "Do not map sticks at all for unknown pads"); +static bool auto_poweroff = true; +module_param(auto_poweroff, bool, S_IWUSR | S_IRUGO); +MODULE_PARM_DESC(auto_poweroff, "Power off wireless controllers on suspend"); + static const struct xpad_device { u16 idVendor; u16 idProduct; @@ -1248,6 +1252,36 @@ static void xpad_stop_input(struct usb_xpad *xpad) usb_kill_urb(xpad->irq_in); } +static void xpad360w_poweroff_controller(struct usb_xpad *xpad) +{ + unsigned long flags; + struct xpad_output_packet *packet = + &xpad->out_packets[XPAD_OUT_CMD_IDX]; + + spin_lock_irqsave(&xpad->odata_lock, flags); + + packet->data[0] = 0x00; + packet->data[1] = 0x00; + packet->data[2] = 0x08; + packet->data[3] = 0xC0; + packet->data[4] = 0x00; + packet->data[5] = 0x00; + packet->data[6] = 0x00; + packet->data[7] = 0x00; + packet->data[8] = 0x00; + packet->data[9] = 0x00; + packet->data[10] = 0x00; + packet->data[11] = 0x00; + packet->len = 12; + packet->pending = true; + + /* Reset the sequence so we send out poweroff now */ + xpad->last_out_packet = -1; + xpad_try_sending_next_out_packet(xpad); + + spin_unlock_irqrestore(&xpad->odata_lock, flags); +} + static int xpad360w_start_input(struct usb_xpad *xpad) { int error; @@ -1590,6 +1624,15 @@ static int xpad_suspend(struct usb_interface *intf, pm_message_t message) * or goes away. */ xpad360w_stop_input(xpad); + + /* + * The wireless adapter is going off now, so the + * gamepads are going to become disconnected. + * Unless explicitly disabled, power them down + * so they don't just sit there flashing. + */ + if (auto_poweroff && xpad->pad_present) + xpad360w_poweroff_controller(xpad); } else { mutex_lock(&input->mutex); if (input->users) diff --git a/drivers/input/keyboard/cros_ec_keyb.c b/drivers/input/keyboard/cros_ec_keyb.c index b01966dc7eb3..4b0878f35471 100644 --- a/drivers/input/keyboard/cros_ec_keyb.c +++ b/drivers/input/keyboard/cros_ec_keyb.c @@ -186,7 +186,7 @@ static irqreturn_t cros_ec_keyb_irq(int irq, void *data) if (ret >= 0) cros_ec_keyb_process(ckdev, kb_state, ret); else - dev_err(ec->dev, "failed to get keyboard state: %d\n", ret); + dev_err(ckdev->dev, "failed to get keyboard state: %d\n", ret); return IRQ_HANDLED; } @@ -236,7 +236,7 @@ static void cros_ec_keyb_compute_valid_keys(struct cros_ec_keyb *ckdev) static int cros_ec_keyb_probe(struct platform_device *pdev) { struct cros_ec_device *ec = dev_get_drvdata(pdev->dev.parent); - struct device *dev = ec->dev; + struct device *dev = &pdev->dev; struct cros_ec_keyb *ckdev; struct input_dev *idev; struct device_node *np; @@ -246,23 +246,22 @@ static int cros_ec_keyb_probe(struct platform_device *pdev) if (!np) return -ENODEV; - ckdev = devm_kzalloc(&pdev->dev, sizeof(*ckdev), GFP_KERNEL); + ckdev = devm_kzalloc(dev, sizeof(*ckdev), GFP_KERNEL); if (!ckdev) return -ENOMEM; - err = matrix_keypad_parse_of_params(&pdev->dev, &ckdev->rows, - &ckdev->cols); + err = matrix_keypad_parse_of_params(dev, &ckdev->rows, &ckdev->cols); if (err) return err; - ckdev->valid_keys = devm_kzalloc(&pdev->dev, ckdev->cols, GFP_KERNEL); + ckdev->valid_keys = devm_kzalloc(dev, ckdev->cols, GFP_KERNEL); if (!ckdev->valid_keys) return -ENOMEM; - ckdev->old_kb_state = devm_kzalloc(&pdev->dev, ckdev->cols, GFP_KERNEL); + ckdev->old_kb_state = devm_kzalloc(dev, ckdev->cols, GFP_KERNEL); if (!ckdev->old_kb_state) return -ENOMEM; - idev = devm_input_allocate_device(&pdev->dev); + idev = devm_input_allocate_device(dev); if (!idev) return -ENOMEM; @@ -273,7 +272,7 @@ static int cros_ec_keyb_probe(struct platform_device *pdev) ckdev->ec = ec; ckdev->dev = dev; - dev_set_drvdata(&pdev->dev, ckdev); + dev_set_drvdata(dev, ckdev); idev->name = CROS_EC_DEV_NAME; idev->phys = ec->phys_name; @@ -282,7 +281,7 @@ static int cros_ec_keyb_probe(struct platform_device *pdev) idev->id.bustype = BUS_VIRTUAL; idev->id.version = 1; idev->id.product = 0; - idev->dev.parent = &pdev->dev; + idev->dev.parent = dev; idev->open = cros_ec_keyb_open; idev->close = cros_ec_keyb_close; diff --git a/drivers/input/misc/rotary_encoder.c b/drivers/input/misc/rotary_encoder.c index c7fc8d4fb080..1588aecafff7 100644 --- a/drivers/input/misc/rotary_encoder.c +++ b/drivers/input/misc/rotary_encoder.c @@ -28,6 +28,11 @@ #define DRV_NAME "rotary-encoder" +enum rotary_encoder_encoding { + ROTENC_GRAY, + ROTENC_BINARY, +}; + struct rotary_encoder { struct input_dev *input; @@ -37,6 +42,7 @@ struct rotary_encoder { u32 axis; bool relative_axis; bool rollover; + enum rotary_encoder_encoding encoding; unsigned int pos; @@ -57,8 +63,9 @@ static unsigned int rotary_encoder_get_state(struct rotary_encoder *encoder) for (i = 0; i < encoder->gpios->ndescs; ++i) { int val = gpiod_get_value_cansleep(encoder->gpios->desc[i]); + /* convert from gray encoding to normal */ - if (ret & 1) + if (encoder->encoding == ROTENC_GRAY && ret & 1) val = !val; ret = ret << 1 | val; @@ -213,6 +220,20 @@ static int rotary_encoder_probe(struct platform_device *pdev) encoder->rollover = device_property_read_bool(dev, "rotary-encoder,rollover"); + if (!device_property_present(dev, "rotary-encoder,encoding") || + !device_property_match_string(dev, "rotary-encoder,encoding", + "gray")) { + dev_info(dev, "gray"); + encoder->encoding = ROTENC_GRAY; + } else if (!device_property_match_string(dev, "rotary-encoder,encoding", + "binary")) { + dev_info(dev, "binary"); + encoder->encoding = ROTENC_BINARY; + } else { + dev_err(dev, "unknown encoding setting\n"); + return -EINVAL; + } + device_property_read_u32(dev, "linux,axis", &encoder->axis); encoder->relative_axis = device_property_read_bool(dev, "rotary-encoder,relative-axis"); diff --git a/drivers/input/mouse/elan_i2c_core.c b/drivers/input/mouse/elan_i2c_core.c index 2f589857a039..d15b33813021 100644 --- a/drivers/input/mouse/elan_i2c_core.c +++ b/drivers/input/mouse/elan_i2c_core.c @@ -4,7 +4,8 @@ * Copyright (c) 2013 ELAN Microelectronics Corp. * * Author: æž—æ”¿ç¶ (Duson Lin) <dusonlin@emc.com.tw> - * Version: 1.6.0 + * Author: KT Liao <kt.liao@emc.com.tw> + * Version: 1.6.2 * * Based on cyapa driver: * copyright (c) 2011-2012 Cypress Semiconductor, Inc. @@ -40,7 +41,7 @@ #include "elan_i2c.h" #define DRIVER_NAME "elan_i2c" -#define ELAN_DRIVER_VERSION "1.6.1" +#define ELAN_DRIVER_VERSION "1.6.2" #define ELAN_VENDOR_ID 0x04f3 #define ETP_MAX_PRESSURE 255 #define ETP_FWIDTH_REDUCE 90 @@ -199,9 +200,41 @@ static int elan_sleep(struct elan_tp_data *data) return error; } +static int elan_query_product(struct elan_tp_data *data) +{ + int error; + + error = data->ops->get_product_id(data->client, &data->product_id); + if (error) + return error; + + error = data->ops->get_sm_version(data->client, &data->ic_type, + &data->sm_version); + if (error) + return error; + + return 0; +} + +static int elan_check_ASUS_special_fw(struct elan_tp_data *data) +{ + if (data->ic_type != 0x0E) + return false; + + switch (data->product_id) { + case 0x05 ... 0x07: + case 0x09: + case 0x13: + return true; + default: + return false; + } +} + static int __elan_initialize(struct elan_tp_data *data) { struct i2c_client *client = data->client; + bool woken_up = false; int error; error = data->ops->initialize(client); @@ -210,6 +243,27 @@ static int __elan_initialize(struct elan_tp_data *data) return error; } + error = elan_query_product(data); + if (error) + return error; + + /* + * Some ASUS devices were shipped with firmware that requires + * touchpads to be woken up first, before attempting to switch + * them into absolute reporting mode. + */ + if (elan_check_ASUS_special_fw(data)) { + error = data->ops->sleep_control(client, false); + if (error) { + dev_err(&client->dev, + "failed to wake device up: %d\n", error); + return error; + } + + msleep(200); + woken_up = true; + } + data->mode |= ETP_ENABLE_ABS; error = data->ops->set_mode(client, data->mode); if (error) { @@ -218,11 +272,13 @@ static int __elan_initialize(struct elan_tp_data *data) return error; } - error = data->ops->sleep_control(client, false); - if (error) { - dev_err(&client->dev, - "failed to wake device up: %d\n", error); - return error; + if (!woken_up) { + error = data->ops->sleep_control(client, false); + if (error) { + dev_err(&client->dev, + "failed to wake device up: %d\n", error); + return error; + } } return 0; @@ -248,10 +304,6 @@ static int elan_query_device_info(struct elan_tp_data *data) { int error; - error = data->ops->get_product_id(data->client, &data->product_id); - if (error) - return error; - error = data->ops->get_version(data->client, false, &data->fw_version); if (error) return error; @@ -261,11 +313,6 @@ static int elan_query_device_info(struct elan_tp_data *data) if (error) return error; - error = data->ops->get_sm_version(data->client, &data->ic_type, - &data->sm_version); - if (error) - return error; - error = data->ops->get_version(data->client, true, &data->iap_version); if (error) return error; diff --git a/drivers/input/mouse/elantech.c b/drivers/input/mouse/elantech.c index 615d23ec0d8e..08e252a42480 100644 --- a/drivers/input/mouse/elantech.c +++ b/drivers/input/mouse/elantech.c @@ -222,12 +222,8 @@ static int elantech_write_reg(struct psmouse *psmouse, unsigned char reg, */ static void elantech_packet_dump(struct psmouse *psmouse) { - int i; - - psmouse_printk(KERN_DEBUG, psmouse, "PS/2 packet ["); - for (i = 0; i < psmouse->pktsize; i++) - printk("%s0x%02x ", i ? ", " : " ", psmouse->packet[i]); - printk("]\n"); + psmouse_printk(KERN_DEBUG, psmouse, "PS/2 packet [%*ph]\n", + psmouse->pktsize, psmouse->packet); } /* diff --git a/drivers/input/rmi4/rmi_bus.c b/drivers/input/rmi4/rmi_bus.c index 253df96be427..a73580654c6b 100644 --- a/drivers/input/rmi4/rmi_bus.c +++ b/drivers/input/rmi4/rmi_bus.c @@ -232,10 +232,7 @@ err_put_device: void rmi_unregister_function(struct rmi_function *fn) { device_del(&fn->dev); - - if (fn->dev.of_node) - of_node_put(fn->dev.of_node); - + of_node_put(fn->dev.of_node); put_device(&fn->dev); } diff --git a/drivers/input/serio/i8042.c b/drivers/input/serio/i8042.c index 454195709a82..b4d34086e73f 100644 --- a/drivers/input/serio/i8042.c +++ b/drivers/input/serio/i8042.c @@ -1277,6 +1277,7 @@ static int __init i8042_create_kbd_port(void) serio->start = i8042_start; serio->stop = i8042_stop; serio->close = i8042_port_close; + serio->ps2_cmd_mutex = &i8042_mutex; serio->port_data = port; serio->dev.parent = &i8042_platform_device->dev; strlcpy(serio->name, "i8042 KBD port", sizeof(serio->name)); @@ -1373,21 +1374,6 @@ static void i8042_unregister_ports(void) } } -/* - * Checks whether port belongs to i8042 controller. - */ -bool i8042_check_port_owner(const struct serio *port) -{ - int i; - - for (i = 0; i < I8042_NUM_PORTS; i++) - if (i8042_ports[i].serio == port) - return true; - - return false; -} -EXPORT_SYMBOL(i8042_check_port_owner); - static void i8042_free_irqs(void) { if (i8042_aux_irq_registered) diff --git a/drivers/input/serio/libps2.c b/drivers/input/serio/libps2.c index 316f2c897101..83e9c663aa67 100644 --- a/drivers/input/serio/libps2.c +++ b/drivers/input/serio/libps2.c @@ -56,19 +56,17 @@ EXPORT_SYMBOL(ps2_sendbyte); void ps2_begin_command(struct ps2dev *ps2dev) { - mutex_lock(&ps2dev->cmd_mutex); + struct mutex *m = ps2dev->serio->ps2_cmd_mutex ?: &ps2dev->cmd_mutex; - if (i8042_check_port_owner(ps2dev->serio)) - i8042_lock_chip(); + mutex_lock(m); } EXPORT_SYMBOL(ps2_begin_command); void ps2_end_command(struct ps2dev *ps2dev) { - if (i8042_check_port_owner(ps2dev->serio)) - i8042_unlock_chip(); + struct mutex *m = ps2dev->serio->ps2_cmd_mutex ?: &ps2dev->cmd_mutex; - mutex_unlock(&ps2dev->cmd_mutex); + mutex_unlock(m); } EXPORT_SYMBOL(ps2_end_command); diff --git a/drivers/input/touchscreen/Kconfig b/drivers/input/touchscreen/Kconfig index ee02dc7422bd..2fb1f430a431 100644 --- a/drivers/input/touchscreen/Kconfig +++ b/drivers/input/touchscreen/Kconfig @@ -1059,6 +1059,31 @@ config TOUCHSCREEN_RM_TS To compile this driver as a module, choose M here: the module will be called raydium_i2c_ts. +config TOUCHSCREEN_SILEAD + tristate "Silead I2C touchscreen" + depends on I2C + help + Say Y here if you have the Silead touchscreen connected to + your system. + + If unsure, say N. + + To compile this driver as a module, choose M here: the + module will be called silead. + +config TOUCHSCREEN_SIS_I2C + tristate "SiS 9200 family I2C touchscreen" + depends on I2C + select CRC_ITU_T + depends on GPIOLIB || COMPILE_TEST + help + This enables support for SiS 9200 family over I2C based touchscreens. + + If unsure, say N. + + To compile this driver as a module, choose M here: the + module will be called sis_i2c. + config TOUCHSCREEN_ST1232 tristate "Sitronix ST1232 touchscreen controllers" depends on I2C diff --git a/drivers/input/touchscreen/Makefile b/drivers/input/touchscreen/Makefile index 3315882905f7..b4373d6be402 100644 --- a/drivers/input/touchscreen/Makefile +++ b/drivers/input/touchscreen/Makefile @@ -64,6 +64,8 @@ obj-$(CONFIG_TOUCHSCREEN_PENMOUNT) += penmount.o obj-$(CONFIG_TOUCHSCREEN_PIXCIR) += pixcir_i2c_ts.o obj-$(CONFIG_TOUCHSCREEN_RM_TS) += raydium_i2c_ts.o obj-$(CONFIG_TOUCHSCREEN_S3C2410) += s3c2410_ts.o +obj-$(CONFIG_TOUCHSCREEN_SILEAD) += silead.o +obj-$(CONFIG_TOUCHSCREEN_SIS_I2C) += sis_i2c.o obj-$(CONFIG_TOUCHSCREEN_ST1232) += st1232.o obj-$(CONFIG_TOUCHSCREEN_STMPE) += stmpe-ts.o obj-$(CONFIG_TOUCHSCREEN_SUN4I) += sun4i-ts.o diff --git a/drivers/input/touchscreen/ili210x.c b/drivers/input/touchscreen/ili210x.c index ddf694b9fffc..fe4848bd1f4c 100644 --- a/drivers/input/touchscreen/ili210x.c +++ b/drivers/input/touchscreen/ili210x.c @@ -169,7 +169,7 @@ static ssize_t ili210x_calibrate(struct device *dev, return count; } -static DEVICE_ATTR(calibrate, 0644, NULL, ili210x_calibrate); +static DEVICE_ATTR(calibrate, S_IWUSR, NULL, ili210x_calibrate); static struct attribute *ili210x_attributes[] = { &dev_attr_calibrate.attr, diff --git a/drivers/input/touchscreen/silead.c b/drivers/input/touchscreen/silead.c new file mode 100644 index 000000000000..7379fe153cf9 --- /dev/null +++ b/drivers/input/touchscreen/silead.c @@ -0,0 +1,565 @@ +/* ------------------------------------------------------------------------- + * Copyright (C) 2014-2015, Intel Corporation + * + * Derived from: + * gslX68X.c + * Copyright (C) 2010-2015, Shanghai Sileadinc Co.Ltd + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * ------------------------------------------------------------------------- + */ + +#include <linux/i2c.h> +#include <linux/module.h> +#include <linux/acpi.h> +#include <linux/interrupt.h> +#include <linux/gpio/consumer.h> +#include <linux/delay.h> +#include <linux/firmware.h> +#include <linux/input.h> +#include <linux/input/mt.h> +#include <linux/input/touchscreen.h> +#include <linux/pm.h> +#include <linux/irq.h> + +#include <asm/unaligned.h> + +#define SILEAD_TS_NAME "silead_ts" + +#define SILEAD_REG_RESET 0xE0 +#define SILEAD_REG_DATA 0x80 +#define SILEAD_REG_TOUCH_NR 0x80 +#define SILEAD_REG_POWER 0xBC +#define SILEAD_REG_CLOCK 0xE4 +#define SILEAD_REG_STATUS 0xB0 +#define SILEAD_REG_ID 0xFC +#define SILEAD_REG_MEM_CHECK 0xB0 + +#define SILEAD_STATUS_OK 0x5A5A5A5A +#define SILEAD_TS_DATA_LEN 44 +#define SILEAD_CLOCK 0x04 + +#define SILEAD_CMD_RESET 0x88 +#define SILEAD_CMD_START 0x00 + +#define SILEAD_POINT_DATA_LEN 0x04 +#define SILEAD_POINT_Y_OFF 0x00 +#define SILEAD_POINT_Y_MSB_OFF 0x01 +#define SILEAD_POINT_X_OFF 0x02 +#define SILEAD_POINT_X_MSB_OFF 0x03 +#define SILEAD_TOUCH_ID_MASK 0xF0 + +#define SILEAD_CMD_SLEEP_MIN 10000 +#define SILEAD_CMD_SLEEP_MAX 20000 +#define SILEAD_POWER_SLEEP 20 +#define SILEAD_STARTUP_SLEEP 30 + +#define SILEAD_MAX_FINGERS 10 + +enum silead_ts_power { + SILEAD_POWER_ON = 1, + SILEAD_POWER_OFF = 0 +}; + +struct silead_ts_data { + struct i2c_client *client; + struct gpio_desc *gpio_power; + struct input_dev *input; + char fw_name[64]; + struct touchscreen_properties prop; + u32 max_fingers; + u32 chip_id; + struct input_mt_pos pos[SILEAD_MAX_FINGERS]; + int slots[SILEAD_MAX_FINGERS]; + int id[SILEAD_MAX_FINGERS]; +}; + +struct silead_fw_data { + u32 offset; + u32 val; +}; + +static int silead_ts_request_input_dev(struct silead_ts_data *data) +{ + struct device *dev = &data->client->dev; + int error; + + data->input = devm_input_allocate_device(dev); + if (!data->input) { + dev_err(dev, + "Failed to allocate input device\n"); + return -ENOMEM; + } + + input_set_abs_params(data->input, ABS_MT_POSITION_X, 0, 4095, 0, 0); + input_set_abs_params(data->input, ABS_MT_POSITION_Y, 0, 4095, 0, 0); + touchscreen_parse_properties(data->input, true, &data->prop); + + input_mt_init_slots(data->input, data->max_fingers, + INPUT_MT_DIRECT | INPUT_MT_DROP_UNUSED | + INPUT_MT_TRACK); + + data->input->name = SILEAD_TS_NAME; + data->input->phys = "input/ts"; + data->input->id.bustype = BUS_I2C; + + error = input_register_device(data->input); + if (error) { + dev_err(dev, "Failed to register input device: %d\n", error); + return error; + } + + return 0; +} + +static void silead_ts_set_power(struct i2c_client *client, + enum silead_ts_power state) +{ + struct silead_ts_data *data = i2c_get_clientdata(client); + + if (data->gpio_power) { + gpiod_set_value_cansleep(data->gpio_power, state); + msleep(SILEAD_POWER_SLEEP); + } +} + +static void silead_ts_read_data(struct i2c_client *client) +{ + struct silead_ts_data *data = i2c_get_clientdata(client); + struct input_dev *input = data->input; + struct device *dev = &client->dev; + u8 *bufp, buf[SILEAD_TS_DATA_LEN]; + int touch_nr, error, i; + + error = i2c_smbus_read_i2c_block_data(client, SILEAD_REG_DATA, + SILEAD_TS_DATA_LEN, buf); + if (error < 0) { + dev_err(dev, "Data read error %d\n", error); + return; + } + + touch_nr = buf[0]; + if (touch_nr > data->max_fingers) { + dev_warn(dev, "More touches reported then supported %d > %d\n", + touch_nr, data->max_fingers); + touch_nr = data->max_fingers; + } + + bufp = buf + SILEAD_POINT_DATA_LEN; + for (i = 0; i < touch_nr; i++, bufp += SILEAD_POINT_DATA_LEN) { + /* Bits 4-7 are the touch id */ + data->id[i] = (bufp[SILEAD_POINT_X_MSB_OFF] & + SILEAD_TOUCH_ID_MASK) >> 4; + touchscreen_set_mt_pos(&data->pos[i], &data->prop, + get_unaligned_le16(&bufp[SILEAD_POINT_X_OFF]) & 0xfff, + get_unaligned_le16(&bufp[SILEAD_POINT_Y_OFF]) & 0xfff); + } + + input_mt_assign_slots(input, data->slots, data->pos, touch_nr, 0); + + for (i = 0; i < touch_nr; i++) { + input_mt_slot(input, data->slots[i]); + input_mt_report_slot_state(input, MT_TOOL_FINGER, true); + input_report_abs(input, ABS_MT_POSITION_X, data->pos[i].x); + input_report_abs(input, ABS_MT_POSITION_Y, data->pos[i].y); + + dev_dbg(dev, "x=%d y=%d hw_id=%d sw_id=%d\n", data->pos[i].x, + data->pos[i].y, data->id[i], data->slots[i]); + } + + input_mt_sync_frame(input); + input_sync(input); +} + +static int silead_ts_init(struct i2c_client *client) +{ + struct silead_ts_data *data = i2c_get_clientdata(client); + int error; + + error = i2c_smbus_write_byte_data(client, SILEAD_REG_RESET, + SILEAD_CMD_RESET); + if (error) + goto i2c_write_err; + usleep_range(SILEAD_CMD_SLEEP_MIN, SILEAD_CMD_SLEEP_MAX); + + error = i2c_smbus_write_byte_data(client, SILEAD_REG_TOUCH_NR, + data->max_fingers); + if (error) + goto i2c_write_err; + usleep_range(SILEAD_CMD_SLEEP_MIN, SILEAD_CMD_SLEEP_MAX); + + error = i2c_smbus_write_byte_data(client, SILEAD_REG_CLOCK, + SILEAD_CLOCK); + if (error) + goto i2c_write_err; + usleep_range(SILEAD_CMD_SLEEP_MIN, SILEAD_CMD_SLEEP_MAX); + + error = i2c_smbus_write_byte_data(client, SILEAD_REG_RESET, + SILEAD_CMD_START); + if (error) + goto i2c_write_err; + usleep_range(SILEAD_CMD_SLEEP_MIN, SILEAD_CMD_SLEEP_MAX); + + return 0; + +i2c_write_err: + dev_err(&client->dev, "Registers clear error %d\n", error); + return error; +} + +static int silead_ts_reset(struct i2c_client *client) +{ + int error; + + error = i2c_smbus_write_byte_data(client, SILEAD_REG_RESET, + SILEAD_CMD_RESET); + if (error) + goto i2c_write_err; + usleep_range(SILEAD_CMD_SLEEP_MIN, SILEAD_CMD_SLEEP_MAX); + + error = i2c_smbus_write_byte_data(client, SILEAD_REG_CLOCK, + SILEAD_CLOCK); + if (error) + goto i2c_write_err; + usleep_range(SILEAD_CMD_SLEEP_MIN, SILEAD_CMD_SLEEP_MAX); + + error = i2c_smbus_write_byte_data(client, SILEAD_REG_POWER, + SILEAD_CMD_START); + if (error) + goto i2c_write_err; + usleep_range(SILEAD_CMD_SLEEP_MIN, SILEAD_CMD_SLEEP_MAX); + + return 0; + +i2c_write_err: + dev_err(&client->dev, "Chip reset error %d\n", error); + return error; +} + +static int silead_ts_startup(struct i2c_client *client) +{ + int error; + + error = i2c_smbus_write_byte_data(client, SILEAD_REG_RESET, 0x00); + if (error) { + dev_err(&client->dev, "Startup error %d\n", error); + return error; + } + + msleep(SILEAD_STARTUP_SLEEP); + + return 0; +} + +static int silead_ts_load_fw(struct i2c_client *client) +{ + struct device *dev = &client->dev; + struct silead_ts_data *data = i2c_get_clientdata(client); + unsigned int fw_size, i; + const struct firmware *fw; + struct silead_fw_data *fw_data; + int error; + + dev_dbg(dev, "Firmware file name: %s", data->fw_name); + + error = request_firmware(&fw, data->fw_name, dev); + if (error) { + dev_err(dev, "Firmware request error %d\n", error); + return error; + } + + fw_size = fw->size / sizeof(*fw_data); + fw_data = (struct silead_fw_data *)fw->data; + + for (i = 0; i < fw_size; i++) { + error = i2c_smbus_write_i2c_block_data(client, + fw_data[i].offset, + 4, + (u8 *)&fw_data[i].val); + if (error) { + dev_err(dev, "Firmware load error %d\n", error); + break; + } + } + + release_firmware(fw); + return error ?: 0; +} + +static u32 silead_ts_get_status(struct i2c_client *client) +{ + int error; + __le32 status; + + error = i2c_smbus_read_i2c_block_data(client, SILEAD_REG_STATUS, + sizeof(status), (u8 *)&status); + if (error < 0) { + dev_err(&client->dev, "Status read error %d\n", error); + return error; + } + + return le32_to_cpu(status); +} + +static int silead_ts_get_id(struct i2c_client *client) +{ + struct silead_ts_data *data = i2c_get_clientdata(client); + __le32 chip_id; + int error; + + error = i2c_smbus_read_i2c_block_data(client, SILEAD_REG_ID, + sizeof(chip_id), (u8 *)&chip_id); + if (error < 0) { + dev_err(&client->dev, "Chip ID read error %d\n", error); + return error; + } + + data->chip_id = le32_to_cpu(chip_id); + dev_info(&client->dev, "Silead chip ID: 0x%8X", data->chip_id); + + return 0; +} + +static int silead_ts_setup(struct i2c_client *client) +{ + int error; + u32 status; + + silead_ts_set_power(client, SILEAD_POWER_OFF); + silead_ts_set_power(client, SILEAD_POWER_ON); + + error = silead_ts_get_id(client); + if (error) + return error; + + error = silead_ts_init(client); + if (error) + return error; + + error = silead_ts_reset(client); + if (error) + return error; + + error = silead_ts_load_fw(client); + if (error) + return error; + + error = silead_ts_startup(client); + if (error) + return error; + + status = silead_ts_get_status(client); + if (status != SILEAD_STATUS_OK) { + dev_err(&client->dev, + "Initialization error, status: 0x%X\n", status); + return -ENODEV; + } + + return 0; +} + +static irqreturn_t silead_ts_threaded_irq_handler(int irq, void *id) +{ + struct silead_ts_data *data = id; + struct i2c_client *client = data->client; + + silead_ts_read_data(client); + + return IRQ_HANDLED; +} + +static void silead_ts_read_props(struct i2c_client *client) +{ + struct silead_ts_data *data = i2c_get_clientdata(client); + struct device *dev = &client->dev; + const char *str; + int error; + + error = device_property_read_u32(dev, "silead,max-fingers", + &data->max_fingers); + if (error) { + dev_dbg(dev, "Max fingers read error %d\n", error); + data->max_fingers = 5; /* Most devices handle up-to 5 fingers */ + } + + error = device_property_read_string(dev, "touchscreen-fw-name", &str); + if (!error) + snprintf(data->fw_name, sizeof(data->fw_name), "%s", str); + else + dev_dbg(dev, "Firmware file name read error. Using default."); +} + +#ifdef CONFIG_ACPI +static int silead_ts_set_default_fw_name(struct silead_ts_data *data, + const struct i2c_device_id *id) +{ + const struct acpi_device_id *acpi_id; + struct device *dev = &data->client->dev; + int i; + + if (ACPI_HANDLE(dev)) { + acpi_id = acpi_match_device(dev->driver->acpi_match_table, dev); + if (!acpi_id) + return -ENODEV; + + snprintf(data->fw_name, sizeof(data->fw_name), "%s.fw", + acpi_id->id); + + for (i = 0; i < strlen(data->fw_name); i++) + data->fw_name[i] = tolower(data->fw_name[i]); + } else { + snprintf(data->fw_name, sizeof(data->fw_name), "%s.fw", + id->name); + } + + return 0; +} +#else +static int silead_ts_set_default_fw_name(struct silead_ts_data *data, + const struct i2c_device_id *id) +{ + snprintf(data->fw_name, sizeof(data->fw_name), "%s.fw", id->name); + return 0; +} +#endif + +static int silead_ts_probe(struct i2c_client *client, + const struct i2c_device_id *id) +{ + struct silead_ts_data *data; + struct device *dev = &client->dev; + int error; + + if (!i2c_check_functionality(client->adapter, + I2C_FUNC_I2C | + I2C_FUNC_SMBUS_READ_I2C_BLOCK | + I2C_FUNC_SMBUS_WRITE_I2C_BLOCK)) { + dev_err(dev, "I2C functionality check failed\n"); + return -ENXIO; + } + + data = devm_kzalloc(dev, sizeof(*data), GFP_KERNEL); + if (!data) + return -ENOMEM; + + i2c_set_clientdata(client, data); + data->client = client; + + error = silead_ts_set_default_fw_name(data, id); + if (error) + return error; + + silead_ts_read_props(client); + + /* We must have the IRQ provided by DT or ACPI subsytem */ + if (client->irq <= 0) + return -ENODEV; + + /* Power GPIO pin */ + data->gpio_power = gpiod_get_optional(dev, "power", GPIOD_OUT_LOW); + if (IS_ERR(data->gpio_power)) { + if (PTR_ERR(data->gpio_power) != -EPROBE_DEFER) + dev_err(dev, "Shutdown GPIO request failed\n"); + return PTR_ERR(data->gpio_power); + } + + error = silead_ts_setup(client); + if (error) + return error; + + error = silead_ts_request_input_dev(data); + if (error) + return error; + + error = devm_request_threaded_irq(dev, client->irq, + NULL, silead_ts_threaded_irq_handler, + IRQF_ONESHOT, client->name, data); + if (error) { + if (error != -EPROBE_DEFER) + dev_err(dev, "IRQ request failed %d\n", error); + return error; + } + + return 0; +} + +static int __maybe_unused silead_ts_suspend(struct device *dev) +{ + struct i2c_client *client = to_i2c_client(dev); + + silead_ts_set_power(client, SILEAD_POWER_OFF); + return 0; +} + +static int __maybe_unused silead_ts_resume(struct device *dev) +{ + struct i2c_client *client = to_i2c_client(dev); + int error, status; + + silead_ts_set_power(client, SILEAD_POWER_ON); + + error = silead_ts_reset(client); + if (error) + return error; + + error = silead_ts_startup(client); + if (error) + return error; + + status = silead_ts_get_status(client); + if (status != SILEAD_STATUS_OK) { + dev_err(dev, "Resume error, status: 0x%02x\n", status); + return -ENODEV; + } + + return 0; +} + +static SIMPLE_DEV_PM_OPS(silead_ts_pm, silead_ts_suspend, silead_ts_resume); + +static const struct i2c_device_id silead_ts_id[] = { + { "gsl1680", 0 }, + { "gsl1688", 0 }, + { "gsl3670", 0 }, + { "gsl3675", 0 }, + { "gsl3692", 0 }, + { "mssl1680", 0 }, + { } +}; +MODULE_DEVICE_TABLE(i2c, silead_ts_id); + +#ifdef CONFIG_ACPI +static const struct acpi_device_id silead_ts_acpi_match[] = { + { "GSL1680", 0 }, + { "GSL1688", 0 }, + { "GSL3670", 0 }, + { "GSL3675", 0 }, + { "GSL3692", 0 }, + { "MSSL1680", 0 }, + { } +}; +MODULE_DEVICE_TABLE(acpi, silead_ts_acpi_match); +#endif + +static struct i2c_driver silead_ts_driver = { + .probe = silead_ts_probe, + .id_table = silead_ts_id, + .driver = { + .name = SILEAD_TS_NAME, + .acpi_match_table = ACPI_PTR(silead_ts_acpi_match), + .pm = &silead_ts_pm, + }, +}; +module_i2c_driver(silead_ts_driver); + +MODULE_AUTHOR("Robert Dolca <robert.dolca@intel.com>"); +MODULE_DESCRIPTION("Silead I2C touchscreen driver"); +MODULE_LICENSE("GPL"); diff --git a/drivers/input/touchscreen/sis_i2c.c b/drivers/input/touchscreen/sis_i2c.c new file mode 100644 index 000000000000..8d93f8c9a403 --- /dev/null +++ b/drivers/input/touchscreen/sis_i2c.c @@ -0,0 +1,413 @@ +/* + * Touch Screen driver for SiS 9200 family I2C Touch panels + * + * Copyright (C) 2015 SiS, Inc. + * Copyright (C) 2016 Nextfour Group + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include <linux/crc-itu-t.h> +#include <linux/delay.h> +#include <linux/i2c.h> +#include <linux/input.h> +#include <linux/input/mt.h> +#include <linux/interrupt.h> +#include <linux/gpio/consumer.h> +#include <linux/module.h> +#include <linux/slab.h> +#include <asm/unaligned.h> + +#define SIS_I2C_NAME "sis_i2c_ts" + +/* + * The I2C packet format: + * le16 byte count + * u8 Report ID + * <contact data - variable length> + * u8 Number of contacts + * le16 Scan Time (optional) + * le16 CRC + * + * One touch point information consists of 6+ bytes, the order is: + * u8 contact state + * u8 finger id + * le16 x axis + * le16 y axis + * u8 contact width (optional) + * u8 contact height (optional) + * u8 pressure (optional) + * + * Maximum amount of data transmitted in one shot is 64 bytes, if controller + * needs to report more contacts than fit in one packet it will send true + * number of contacts in first packet and 0 as number of contacts in second + * packet. + */ + +#define SIS_MAX_PACKET_SIZE 64 + +#define SIS_PKT_LEN_OFFSET 0 +#define SIS_PKT_REPORT_OFFSET 2 /* Report ID/type */ +#define SIS_PKT_CONTACT_OFFSET 3 /* First contact */ + +#define SIS_SCAN_TIME_LEN 2 + +/* Supported report types */ +#define SIS_ALL_IN_ONE_PACKAGE 0x10 +#define SIS_PKT_IS_TOUCH(x) (((x) & 0x0f) == 0x01) +#define SIS_PKT_IS_HIDI2C(x) (((x) & 0x0f) == 0x06) + +/* Contact properties within report */ +#define SIS_PKT_HAS_AREA(x) ((x) & BIT(4)) +#define SIS_PKT_HAS_PRESSURE(x) ((x) & BIT(5)) +#define SIS_PKT_HAS_SCANTIME(x) ((x) & BIT(6)) + +/* Contact size */ +#define SIS_BASE_LEN_PER_CONTACT 6 +#define SIS_AREA_LEN_PER_CONTACT 2 +#define SIS_PRESSURE_LEN_PER_CONTACT 1 + +/* Offsets within contact data */ +#define SIS_CONTACT_STATUS_OFFSET 0 +#define SIS_CONTACT_ID_OFFSET 1 /* Contact ID */ +#define SIS_CONTACT_X_OFFSET 2 +#define SIS_CONTACT_Y_OFFSET 4 +#define SIS_CONTACT_WIDTH_OFFSET 6 +#define SIS_CONTACT_HEIGHT_OFFSET 7 +#define SIS_CONTACT_PRESSURE_OFFSET(id) (SIS_PKT_HAS_AREA(id) ? 8 : 6) + +/* Individual contact state */ +#define SIS_STATUS_UP 0x0 +#define SIS_STATUS_DOWN 0x3 + +/* Touchscreen parameters */ +#define SIS_MAX_FINGERS 10 +#define SIS_MAX_X 4095 +#define SIS_MAX_Y 4095 +#define SIS_MAX_PRESSURE 255 + +/* Resolution diagonal */ +#define SIS_AREA_LENGTH_LONGER 5792 +/*((SIS_MAX_X^2) + (SIS_MAX_Y^2))^0.5*/ +#define SIS_AREA_LENGTH_SHORT 5792 +#define SIS_AREA_UNIT (5792 / 32) + +struct sis_ts_data { + struct i2c_client *client; + struct input_dev *input; + + struct gpio_desc *attn_gpio; + struct gpio_desc *reset_gpio; + + u8 packet[SIS_MAX_PACKET_SIZE]; +}; + +static int sis_read_packet(struct i2c_client *client, u8 *buf, + unsigned int *num_contacts, + unsigned int *contact_size) +{ + int count_idx; + int ret; + u16 len; + u16 crc, pkg_crc; + u8 report_id; + + ret = i2c_master_recv(client, buf, SIS_MAX_PACKET_SIZE); + if (ret <= 0) + return -EIO; + + len = get_unaligned_le16(&buf[SIS_PKT_LEN_OFFSET]); + if (len > SIS_MAX_PACKET_SIZE) { + dev_err(&client->dev, + "%s: invalid packet length (%d vs %d)\n", + __func__, len, SIS_MAX_PACKET_SIZE); + return -E2BIG; + } + + if (len < 10) + return -EINVAL; + + report_id = buf[SIS_PKT_REPORT_OFFSET]; + count_idx = len - 1; + *contact_size = SIS_BASE_LEN_PER_CONTACT; + + if (report_id != SIS_ALL_IN_ONE_PACKAGE) { + if (SIS_PKT_IS_TOUCH(report_id)) { + /* + * Calculate CRC ignoring packet length + * in the beginning and CRC transmitted + * at the end of the packet. + */ + crc = crc_itu_t(0, buf + 2, len - 2 - 2); + pkg_crc = get_unaligned_le16(&buf[len - 2]); + + if (crc != pkg_crc) { + dev_err(&client->dev, + "%s: CRC Error (%d vs %d)\n", + __func__, crc, pkg_crc); + return -EINVAL; + } + + count_idx -= 2; + + } else if (!SIS_PKT_IS_HIDI2C(report_id)) { + dev_err(&client->dev, + "%s: invalid packet ID %#02x\n", + __func__, report_id); + return -EINVAL; + } + + if (SIS_PKT_HAS_SCANTIME(report_id)) + count_idx -= SIS_SCAN_TIME_LEN; + + if (SIS_PKT_HAS_AREA(report_id)) + *contact_size += SIS_AREA_LEN_PER_CONTACT; + if (SIS_PKT_HAS_PRESSURE(report_id)) + *contact_size += SIS_PRESSURE_LEN_PER_CONTACT; + } + + *num_contacts = buf[count_idx]; + return 0; +} + +static int sis_ts_report_contact(struct sis_ts_data *ts, const u8 *data, u8 id) +{ + struct input_dev *input = ts->input; + int slot; + u8 status = data[SIS_CONTACT_STATUS_OFFSET]; + u8 pressure; + u8 height, width; + u16 x, y; + + if (status != SIS_STATUS_DOWN && status != SIS_STATUS_UP) { + dev_err(&ts->client->dev, "Unexpected touch status: %#02x\n", + data[SIS_CONTACT_STATUS_OFFSET]); + return -EINVAL; + } + + slot = input_mt_get_slot_by_key(input, data[SIS_CONTACT_ID_OFFSET]); + if (slot < 0) + return -ENOENT; + + input_mt_slot(input, slot); + input_mt_report_slot_state(input, MT_TOOL_FINGER, + status == SIS_STATUS_DOWN); + + if (status == SIS_STATUS_DOWN) { + pressure = height = width = 1; + if (id != SIS_ALL_IN_ONE_PACKAGE) { + if (SIS_PKT_HAS_AREA(id)) { + width = data[SIS_CONTACT_WIDTH_OFFSET]; + height = data[SIS_CONTACT_HEIGHT_OFFSET]; + } + + if (SIS_PKT_HAS_PRESSURE(id)) + pressure = + data[SIS_CONTACT_PRESSURE_OFFSET(id)]; + } + + x = get_unaligned_le16(&data[SIS_CONTACT_X_OFFSET]); + y = get_unaligned_le16(&data[SIS_CONTACT_Y_OFFSET]); + + input_report_abs(input, ABS_MT_TOUCH_MAJOR, + width * SIS_AREA_UNIT); + input_report_abs(input, ABS_MT_TOUCH_MINOR, + height * SIS_AREA_UNIT); + input_report_abs(input, ABS_MT_PRESSURE, pressure); + input_report_abs(input, ABS_MT_POSITION_X, x); + input_report_abs(input, ABS_MT_POSITION_Y, y); + } + + return 0; +} + +static void sis_ts_handle_packet(struct sis_ts_data *ts) +{ + const u8 *contact; + unsigned int num_to_report = 0; + unsigned int num_contacts; + unsigned int num_reported; + unsigned int contact_size; + int error; + u8 report_id; + + do { + error = sis_read_packet(ts->client, ts->packet, + &num_contacts, &contact_size); + if (error) + break; + + if (num_to_report == 0) { + num_to_report = num_contacts; + } else if (num_contacts != 0) { + dev_err(&ts->client->dev, + "%s: nonzero (%d) point count in tail packet\n", + __func__, num_contacts); + break; + } + + report_id = ts->packet[SIS_PKT_REPORT_OFFSET]; + contact = &ts->packet[SIS_PKT_CONTACT_OFFSET]; + num_reported = 0; + + while (num_to_report > 0) { + error = sis_ts_report_contact(ts, contact, report_id); + if (error) + break; + + contact += contact_size; + num_to_report--; + num_reported++; + + if (report_id != SIS_ALL_IN_ONE_PACKAGE && + num_reported >= 5) { + /* + * The remainder of contacts is sent + * in the 2nd packet. + */ + break; + } + } + } while (num_to_report > 0); + + input_mt_sync_frame(ts->input); + input_sync(ts->input); +} + +static irqreturn_t sis_ts_irq_handler(int irq, void *dev_id) +{ + struct sis_ts_data *ts = dev_id; + + do { + sis_ts_handle_packet(ts); + } while (ts->attn_gpio && gpiod_get_value_cansleep(ts->attn_gpio)); + + return IRQ_HANDLED; +} + +static void sis_ts_reset(struct sis_ts_data *ts) +{ + if (ts->reset_gpio) { + /* Get out of reset */ + usleep_range(1000, 2000); + gpiod_set_value(ts->reset_gpio, 1); + usleep_range(1000, 2000); + gpiod_set_value(ts->reset_gpio, 0); + msleep(100); + } +} + +static int sis_ts_probe(struct i2c_client *client, + const struct i2c_device_id *id) +{ + struct sis_ts_data *ts; + struct input_dev *input; + int error; + + ts = devm_kzalloc(&client->dev, sizeof(*ts), GFP_KERNEL); + if (!ts) + return -ENOMEM; + + ts->client = client; + i2c_set_clientdata(client, ts); + + ts->attn_gpio = devm_gpiod_get_optional(&client->dev, + "attn", GPIOD_IN); + if (IS_ERR(ts->attn_gpio)) { + error = PTR_ERR(ts->attn_gpio); + if (error != -EPROBE_DEFER) + dev_err(&client->dev, + "Failed to get attention GPIO: %d\n", error); + return error; + } + + ts->reset_gpio = devm_gpiod_get_optional(&client->dev, + "reset", GPIOD_OUT_LOW); + if (IS_ERR(ts->reset_gpio)) { + error = PTR_ERR(ts->reset_gpio); + if (error != -EPROBE_DEFER) + dev_err(&client->dev, + "Failed to get reset GPIO: %d\n", error); + return error; + } + + sis_ts_reset(ts); + + ts->input = input = devm_input_allocate_device(&client->dev); + if (!input) { + dev_err(&client->dev, "Failed to allocate input device\n"); + return -ENOMEM; + } + + input->name = "SiS Touchscreen"; + input->id.bustype = BUS_I2C; + + input_set_abs_params(input, ABS_MT_POSITION_X, 0, SIS_MAX_X, 0, 0); + input_set_abs_params(input, ABS_MT_POSITION_Y, 0, SIS_MAX_Y, 0, 0); + input_set_abs_params(input, ABS_MT_PRESSURE, 0, SIS_MAX_PRESSURE, 0, 0); + input_set_abs_params(input, ABS_MT_TOUCH_MAJOR, + 0, SIS_AREA_LENGTH_LONGER, 0, 0); + input_set_abs_params(input, ABS_MT_TOUCH_MINOR, + 0, SIS_AREA_LENGTH_SHORT, 0, 0); + + error = input_mt_init_slots(input, SIS_MAX_FINGERS, INPUT_MT_DIRECT); + if (error) { + dev_err(&client->dev, + "Failed to initialize MT slots: %d\n", error); + return error; + } + + error = devm_request_threaded_irq(&client->dev, client->irq, + NULL, sis_ts_irq_handler, + IRQF_ONESHOT, + client->name, ts); + if (error) { + dev_err(&client->dev, "Failed to request IRQ: %d\n", error); + return error; + } + + error = input_register_device(ts->input); + if (error) { + dev_err(&client->dev, + "Failed to register input device: %d\n", error); + return error; + } + + return 0; +} + +#ifdef CONFIG_OF +static const struct of_device_id sis_ts_dt_ids[] = { + { .compatible = "sis,9200-ts" }, + { /* sentinel */ } +}; +MODULE_DEVICE_TABLE(of, sis_ts_dt_ids); +#endif + +static const struct i2c_device_id sis_ts_id[] = { + { SIS_I2C_NAME, 0 }, + { "9200-ts", 0 }, + { /* sentinel */ } +}; +MODULE_DEVICE_TABLE(i2c, sis_ts_id); + +static struct i2c_driver sis_ts_driver = { + .driver = { + .name = SIS_I2C_NAME, + .of_match_table = of_match_ptr(sis_ts_dt_ids), + }, + .probe = sis_ts_probe, + .id_table = sis_ts_id, +}; +module_i2c_driver(sis_ts_driver); + +MODULE_DESCRIPTION("SiS 9200 Family Touchscreen Driver"); +MODULE_LICENSE("GPL v2"); +MODULE_AUTHOR("Mika Penttilä <mika.penttila@nextfour.com>"); diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index 33c177ba93be..96de97a46079 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -2375,7 +2375,7 @@ static void __unmap_single(struct dma_ops_domain *dma_dom, static dma_addr_t map_page(struct device *dev, struct page *page, unsigned long offset, size_t size, enum dma_data_direction dir, - struct dma_attrs *attrs) + unsigned long attrs) { phys_addr_t paddr = page_to_phys(page) + offset; struct protection_domain *domain; @@ -2398,7 +2398,7 @@ static dma_addr_t map_page(struct device *dev, struct page *page, * The exported unmap_single function for dma_ops. */ static void unmap_page(struct device *dev, dma_addr_t dma_addr, size_t size, - enum dma_data_direction dir, struct dma_attrs *attrs) + enum dma_data_direction dir, unsigned long attrs) { struct protection_domain *domain; struct dma_ops_domain *dma_dom; @@ -2444,7 +2444,7 @@ static int sg_num_pages(struct device *dev, */ static int map_sg(struct device *dev, struct scatterlist *sglist, int nelems, enum dma_data_direction direction, - struct dma_attrs *attrs) + unsigned long attrs) { int mapped_pages = 0, npages = 0, prot = 0, i; struct protection_domain *domain; @@ -2525,7 +2525,7 @@ out_err: */ static void unmap_sg(struct device *dev, struct scatterlist *sglist, int nelems, enum dma_data_direction dir, - struct dma_attrs *attrs) + unsigned long attrs) { struct protection_domain *domain; struct dma_ops_domain *dma_dom; @@ -2548,7 +2548,7 @@ static void unmap_sg(struct device *dev, struct scatterlist *sglist, */ static void *alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_addr, gfp_t flag, - struct dma_attrs *attrs) + unsigned long attrs) { u64 dma_mask = dev->coherent_dma_mask; struct protection_domain *domain; @@ -2604,7 +2604,7 @@ out_free: */ static void free_coherent(struct device *dev, size_t size, void *virt_addr, dma_addr_t dma_addr, - struct dma_attrs *attrs) + unsigned long attrs) { struct protection_domain *domain; struct dma_ops_domain *dma_dom; diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c index ea5a9ebf0f78..00c8a08d56e7 100644 --- a/drivers/iommu/dma-iommu.c +++ b/drivers/iommu/dma-iommu.c @@ -68,7 +68,8 @@ void iommu_put_dma_cookie(struct iommu_domain *domain) if (!iovad) return; - put_iova_domain(iovad); + if (iovad->granule) + put_iova_domain(iovad); kfree(iovad); domain->iova_cookie = NULL; } @@ -151,12 +152,15 @@ int dma_direction_to_prot(enum dma_data_direction dir, bool coherent) } } -static struct iova *__alloc_iova(struct iova_domain *iovad, size_t size, +static struct iova *__alloc_iova(struct iommu_domain *domain, size_t size, dma_addr_t dma_limit) { + struct iova_domain *iovad = domain->iova_cookie; unsigned long shift = iova_shift(iovad); unsigned long length = iova_align(iovad, size) >> shift; + if (domain->geometry.force_aperture) + dma_limit = min(dma_limit, domain->geometry.aperture_end); /* * Enforce size-alignment to be safe - there could perhaps be an * attribute to control this per-device, or at least per-domain... @@ -286,7 +290,7 @@ void iommu_dma_free(struct device *dev, struct page **pages, size_t size, * or NULL on failure. */ struct page **iommu_dma_alloc(struct device *dev, size_t size, gfp_t gfp, - struct dma_attrs *attrs, int prot, dma_addr_t *handle, + unsigned long attrs, int prot, dma_addr_t *handle, void (*flush_page)(struct device *, const void *, phys_addr_t)) { struct iommu_domain *domain = iommu_get_domain_for_dev(dev); @@ -306,7 +310,7 @@ struct page **iommu_dma_alloc(struct device *dev, size_t size, gfp_t gfp, } else { size = ALIGN(size, min_size); } - if (dma_get_attr(DMA_ATTR_ALLOC_SINGLE_PAGES, attrs)) + if (attrs & DMA_ATTR_ALLOC_SINGLE_PAGES) alloc_sizes = min_size; count = PAGE_ALIGN(size) >> PAGE_SHIFT; @@ -314,7 +318,7 @@ struct page **iommu_dma_alloc(struct device *dev, size_t size, gfp_t gfp, if (!pages) return NULL; - iova = __alloc_iova(iovad, size, dev->coherent_dma_mask); + iova = __alloc_iova(domain, size, dev->coherent_dma_mask); if (!iova) goto out_free_pages; @@ -386,7 +390,7 @@ dma_addr_t iommu_dma_map_page(struct device *dev, struct page *page, phys_addr_t phys = page_to_phys(page) + offset; size_t iova_off = iova_offset(iovad, phys); size_t len = iova_align(iovad, size + iova_off); - struct iova *iova = __alloc_iova(iovad, len, dma_get_mask(dev)); + struct iova *iova = __alloc_iova(domain, len, dma_get_mask(dev)); if (!iova) return DMA_ERROR_CODE; @@ -400,7 +404,7 @@ dma_addr_t iommu_dma_map_page(struct device *dev, struct page *page, } void iommu_dma_unmap_page(struct device *dev, dma_addr_t handle, size_t size, - enum dma_data_direction dir, struct dma_attrs *attrs) + enum dma_data_direction dir, unsigned long attrs) { __iommu_dma_unmap(iommu_get_domain_for_dev(dev), handle); } @@ -538,7 +542,7 @@ int iommu_dma_map_sg(struct device *dev, struct scatterlist *sg, prev = s; } - iova = __alloc_iova(iovad, iova_len, dma_get_mask(dev)); + iova = __alloc_iova(domain, iova_len, dma_get_mask(dev)); if (!iova) goto out_restore_sg; @@ -560,7 +564,7 @@ out_restore_sg: } void iommu_dma_unmap_sg(struct device *dev, struct scatterlist *sg, int nents, - enum dma_data_direction dir, struct dma_attrs *attrs) + enum dma_data_direction dir, unsigned long attrs) { /* * The scatterlist segments are mapped into a single diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index afbaa2c69a59..ebb5bf3ddbd9 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -3552,7 +3552,7 @@ error: static dma_addr_t intel_map_page(struct device *dev, struct page *page, unsigned long offset, size_t size, enum dma_data_direction dir, - struct dma_attrs *attrs) + unsigned long attrs) { return __intel_map_single(dev, page_to_phys(page) + offset, size, dir, *dev->dma_mask); @@ -3711,14 +3711,14 @@ static void intel_unmap(struct device *dev, dma_addr_t dev_addr, size_t size) static void intel_unmap_page(struct device *dev, dma_addr_t dev_addr, size_t size, enum dma_data_direction dir, - struct dma_attrs *attrs) + unsigned long attrs) { intel_unmap(dev, dev_addr, size); } static void *intel_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t flags, - struct dma_attrs *attrs) + unsigned long attrs) { struct page *page = NULL; int order; @@ -3764,7 +3764,7 @@ static void *intel_alloc_coherent(struct device *dev, size_t size, } static void intel_free_coherent(struct device *dev, size_t size, void *vaddr, - dma_addr_t dma_handle, struct dma_attrs *attrs) + dma_addr_t dma_handle, unsigned long attrs) { int order; struct page *page = virt_to_page(vaddr); @@ -3779,7 +3779,7 @@ static void intel_free_coherent(struct device *dev, size_t size, void *vaddr, static void intel_unmap_sg(struct device *dev, struct scatterlist *sglist, int nelems, enum dma_data_direction dir, - struct dma_attrs *attrs) + unsigned long attrs) { dma_addr_t startaddr = sg_dma_address(sglist) & PAGE_MASK; unsigned long nrpages = 0; @@ -3808,7 +3808,7 @@ static int intel_nontranslate_map_sg(struct device *hddev, } static int intel_map_sg(struct device *dev, struct scatterlist *sglist, int nelems, - enum dma_data_direction dir, struct dma_attrs *attrs) + enum dma_data_direction dir, unsigned long attrs) { int i; struct dmar_domain *domain; diff --git a/drivers/iommu/mtk_iommu.h b/drivers/iommu/mtk_iommu.h index 9ed0a8462ccf..3dab13b4a211 100644 --- a/drivers/iommu/mtk_iommu.h +++ b/drivers/iommu/mtk_iommu.h @@ -55,19 +55,19 @@ struct mtk_iommu_data { bool enable_4GB; }; -static int compare_of(struct device *dev, void *data) +static inline int compare_of(struct device *dev, void *data) { return dev->of_node == data; } -static int mtk_iommu_bind(struct device *dev) +static inline int mtk_iommu_bind(struct device *dev) { struct mtk_iommu_data *data = dev_get_drvdata(dev); return component_bind_all(dev, &data->smi_imu); } -static void mtk_iommu_unbind(struct device *dev) +static inline void mtk_iommu_unbind(struct device *dev) { struct mtk_iommu_data *data = dev_get_drvdata(dev); diff --git a/drivers/irqchip/irq-mips-gic.c b/drivers/irqchip/irq-mips-gic.c index 3786d0f21972..c5f33c3bd228 100644 --- a/drivers/irqchip/irq-mips-gic.c +++ b/drivers/irqchip/irq-mips-gic.c @@ -359,7 +359,7 @@ static void gic_handle_shared_int(bool chained) pending_reg += gic_reg_step; intrmask_reg += gic_reg_step; - if (!config_enabled(CONFIG_64BIT) || mips_cm_is64) + if (!IS_ENABLED(CONFIG_64BIT) || mips_cm_is64) continue; pending[i] |= (u64)gic_read(pending_reg) << 32; diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c index 69f16f43f8ab..4b177fe11ebb 100644 --- a/drivers/md/bcache/request.c +++ b/drivers/md/bcache/request.c @@ -208,7 +208,7 @@ static void bch_data_insert_start(struct closure *cl) * Journal writes are marked REQ_PREFLUSH; if the original write was a * flush, it'll wait on the journal write. */ - bio->bi_rw &= ~(REQ_PREFLUSH|REQ_FUA); + bio->bi_opf &= ~(REQ_PREFLUSH|REQ_FUA); do { unsigned i; @@ -405,7 +405,7 @@ static bool check_should_bypass(struct cached_dev *dc, struct bio *bio) if (!congested && mode == CACHE_MODE_WRITEBACK && op_is_write(bio_op(bio)) && - (bio->bi_rw & REQ_SYNC)) + (bio->bi_opf & REQ_SYNC)) goto rescale; spin_lock(&dc->io_lock); @@ -668,7 +668,7 @@ static inline struct search *search_alloc(struct bio *bio, s->iop.write_prio = 0; s->iop.error = 0; s->iop.flags = 0; - s->iop.flush_journal = (bio->bi_rw & (REQ_PREFLUSH|REQ_FUA)) != 0; + s->iop.flush_journal = (bio->bi_opf & (REQ_PREFLUSH|REQ_FUA)) != 0; s->iop.wq = bcache_wq; return s; @@ -796,8 +796,8 @@ static int cached_dev_cache_miss(struct btree *b, struct search *s, goto out_submit; } - if (!(bio->bi_rw & REQ_RAHEAD) && - !(bio->bi_rw & REQ_META) && + if (!(bio->bi_opf & REQ_RAHEAD) && + !(bio->bi_opf & REQ_META) && s->iop.c->gc_stats.in_use < CUTOFF_CACHE_READA) reada = min_t(sector_t, dc->readahead >> 9, bdev_sectors(bio->bi_bdev) - bio_end_sector(bio)); @@ -920,7 +920,7 @@ static void cached_dev_write(struct cached_dev *dc, struct search *s) bch_writeback_add(dc); s->iop.bio = bio; - if (bio->bi_rw & REQ_PREFLUSH) { + if (bio->bi_opf & REQ_PREFLUSH) { /* Also need to send a flush to the backing device */ struct bio *flush = bio_alloc_bioset(GFP_NOIO, 0, dc->disk.bio_split); diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index 88ef6d14cce3..95a4ca6ce6ff 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -347,7 +347,7 @@ static void uuid_io(struct cache_set *c, int op, unsigned long op_flags, for (i = 0; i < KEY_PTRS(k); i++) { struct bio *bio = bch_bbio_alloc(c); - bio->bi_rw = REQ_SYNC|REQ_META|op_flags; + bio->bi_opf = REQ_SYNC | REQ_META | op_flags; bio->bi_iter.bi_size = KEY_SIZE(k) << 9; bio->bi_end_io = uuid_endio; diff --git a/drivers/md/bcache/writeback.h b/drivers/md/bcache/writeback.h index 073a042aed24..301eaf565167 100644 --- a/drivers/md/bcache/writeback.h +++ b/drivers/md/bcache/writeback.h @@ -57,7 +57,7 @@ static inline bool should_writeback(struct cached_dev *dc, struct bio *bio, if (would_skip) return false; - return bio->bi_rw & REQ_SYNC || + return bio->bi_opf & REQ_SYNC || in_use <= CUTOFF_WRITEBACK; } diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c index 718744db62df..59b2c50562e4 100644 --- a/drivers/md/dm-cache-target.c +++ b/drivers/md/dm-cache-target.c @@ -788,7 +788,7 @@ static void check_if_tick_bio_needed(struct cache *cache, struct bio *bio) spin_lock_irqsave(&cache->lock, flags); if (cache->need_tick_bio && - !(bio->bi_rw & (REQ_FUA | REQ_PREFLUSH)) && + !(bio->bi_opf & (REQ_FUA | REQ_PREFLUSH)) && bio_op(bio) != REQ_OP_DISCARD) { pb->tick = true; cache->need_tick_bio = false; @@ -830,7 +830,7 @@ static dm_oblock_t get_bio_block(struct cache *cache, struct bio *bio) static int bio_triggers_commit(struct cache *cache, struct bio *bio) { - return bio->bi_rw & (REQ_PREFLUSH | REQ_FUA); + return bio->bi_opf & (REQ_PREFLUSH | REQ_FUA); } /* @@ -1069,7 +1069,7 @@ static void dec_io_migrations(struct cache *cache) static bool discard_or_flush(struct bio *bio) { return bio_op(bio) == REQ_OP_DISCARD || - bio->bi_rw & (REQ_PREFLUSH | REQ_FUA); + bio->bi_opf & (REQ_PREFLUSH | REQ_FUA); } static void __cell_defer(struct cache *cache, struct dm_bio_prison_cell *cell) @@ -1980,7 +1980,7 @@ static void process_deferred_bios(struct cache *cache) bio = bio_list_pop(&bios); - if (bio->bi_rw & REQ_PREFLUSH) + if (bio->bi_opf & REQ_PREFLUSH) process_flush_bio(cache, bio); else if (bio_op(bio) == REQ_OP_DISCARD) process_discard_bio(cache, &structs, bio); diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index 8f2e3e2ffd26..4e9784b4e0ac 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -1136,7 +1136,7 @@ static void clone_init(struct dm_crypt_io *io, struct bio *clone) clone->bi_private = io; clone->bi_end_io = crypt_endio; clone->bi_bdev = cc->dev->bdev; - bio_set_op_attrs(clone, bio_op(io->base_bio), io->base_bio->bi_rw); + bio_set_op_attrs(clone, bio_op(io->base_bio), io->base_bio->bi_opf); } static int kcryptd_io_read(struct dm_crypt_io *io, gfp_t gfp) @@ -1915,7 +1915,7 @@ static int crypt_map(struct dm_target *ti, struct bio *bio) * - for REQ_PREFLUSH device-mapper core ensures that no IO is in-flight * - for REQ_OP_DISCARD caller must use flush if IO ordering matters */ - if (unlikely(bio->bi_rw & REQ_PREFLUSH || + if (unlikely(bio->bi_opf & REQ_PREFLUSH || bio_op(bio) == REQ_OP_DISCARD)) { bio->bi_bdev = cc->dev->bdev; if (bio_sectors(bio)) diff --git a/drivers/md/dm-era-target.c b/drivers/md/dm-era-target.c index 2faf49d8f4d7..bf2b2676cb8a 100644 --- a/drivers/md/dm-era-target.c +++ b/drivers/md/dm-era-target.c @@ -1542,7 +1542,7 @@ static int era_map(struct dm_target *ti, struct bio *bio) /* * REQ_PREFLUSH bios carry no data, so we're not interested in them. */ - if (!(bio->bi_rw & REQ_PREFLUSH) && + if (!(bio->bi_opf & REQ_PREFLUSH) && (bio_data_dir(bio) == WRITE) && !metadata_current_marked(era->md, block)) { defer_bio(era, bio); diff --git a/drivers/md/dm-flakey.c b/drivers/md/dm-flakey.c index 29b99fb6a16a..97e446d54a15 100644 --- a/drivers/md/dm-flakey.c +++ b/drivers/md/dm-flakey.c @@ -16,7 +16,7 @@ #define DM_MSG_PREFIX "flakey" #define all_corrupt_bio_flags_match(bio, fc) \ - (((bio)->bi_rw & (fc)->corrupt_bio_flags) == (fc)->corrupt_bio_flags) + (((bio)->bi_opf & (fc)->corrupt_bio_flags) == (fc)->corrupt_bio_flags) /* * Flakey: Used for testing only, simulates intermittent, @@ -266,9 +266,9 @@ static void corrupt_bio_data(struct bio *bio, struct flakey_c *fc) data[fc->corrupt_bio_byte - 1] = fc->corrupt_bio_value; DMDEBUG("Corrupting data bio=%p by writing %u to byte %u " - "(rw=%c bi_rw=%u bi_sector=%llu cur_bytes=%u)\n", + "(rw=%c bi_opf=%u bi_sector=%llu cur_bytes=%u)\n", bio, fc->corrupt_bio_value, fc->corrupt_bio_byte, - (bio_data_dir(bio) == WRITE) ? 'w' : 'r', bio->bi_rw, + (bio_data_dir(bio) == WRITE) ? 'w' : 'r', bio->bi_opf, (unsigned long long)bio->bi_iter.bi_sector, bio_bytes); } } @@ -289,10 +289,16 @@ static int flakey_map(struct dm_target *ti, struct bio *bio) pb->bio_submitted = true; /* - * Map reads as normal. + * Map reads as normal only if corrupt_bio_byte set. */ - if (bio_data_dir(bio) == READ) - goto map_bio; + if (bio_data_dir(bio) == READ) { + /* If flags were specified, only corrupt those that match. */ + if (fc->corrupt_bio_byte && (fc->corrupt_bio_rw == READ) && + all_corrupt_bio_flags_match(bio, fc)) + goto map_bio; + else + return -EIO; + } /* * Drop writes? @@ -330,12 +336,13 @@ static int flakey_end_io(struct dm_target *ti, struct bio *bio, int error) /* * Corrupt successful READs while in down state. - * If flags were specified, only corrupt those that match. */ - if (fc->corrupt_bio_byte && !error && pb->bio_submitted && - (bio_data_dir(bio) == READ) && (fc->corrupt_bio_rw == READ) && - all_corrupt_bio_flags_match(bio, fc)) - corrupt_bio_data(bio, fc); + if (!error && pb->bio_submitted && (bio_data_dir(bio) == READ)) { + if (fc->corrupt_bio_byte) + corrupt_bio_data(bio, fc); + else + return -EIO; + } return error; } diff --git a/drivers/md/dm-io.c b/drivers/md/dm-io.c index daa03e41654a..0bf1a12e35fe 100644 --- a/drivers/md/dm-io.c +++ b/drivers/md/dm-io.c @@ -505,9 +505,9 @@ static int dp_init(struct dm_io_request *io_req, struct dpages *dp, * New collapsed (a)synchronous interface. * * If the IO is asynchronous (i.e. it has notify.fn), you must either unplug - * the queue with blk_unplug() some time later or set REQ_SYNC in io_req->bi_rw. - * If you fail to do one of these, the IO will be submitted to the disk after - * q->unplug_delay, which defaults to 3ms in blk-settings.c. + * the queue with blk_unplug() some time later or set REQ_SYNC in + * io_req->bi_opf. If you fail to do one of these, the IO will be submitted to + * the disk after q->unplug_delay, which defaults to 3ms in blk-settings.c. */ int dm_io(struct dm_io_request *io_req, unsigned num_regions, struct dm_io_region *where, unsigned long *sync_error_bits) diff --git a/drivers/md/dm-log-writes.c b/drivers/md/dm-log-writes.c index b5dbf7a0515e..4ab68033f9d1 100644 --- a/drivers/md/dm-log-writes.c +++ b/drivers/md/dm-log-writes.c @@ -555,8 +555,8 @@ static int log_writes_map(struct dm_target *ti, struct bio *bio) struct bio_vec bv; size_t alloc_size; int i = 0; - bool flush_bio = (bio->bi_rw & REQ_PREFLUSH); - bool fua_bio = (bio->bi_rw & REQ_FUA); + bool flush_bio = (bio->bi_opf & REQ_PREFLUSH); + bool fua_bio = (bio->bi_opf & REQ_FUA); bool discard_bio = (bio_op(bio) == REQ_OP_DISCARD); pb->block = NULL; diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c index 7eac080fcb18..ac734e5bbe48 100644 --- a/drivers/md/dm-mpath.c +++ b/drivers/md/dm-mpath.c @@ -507,13 +507,27 @@ static bool __must_push_back(struct multipath *m) static bool must_push_back_rq(struct multipath *m) { - return (test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags) || - __must_push_back(m)); + bool r; + unsigned long flags; + + spin_lock_irqsave(&m->lock, flags); + r = (test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags) || + __must_push_back(m)); + spin_unlock_irqrestore(&m->lock, flags); + + return r; } static bool must_push_back_bio(struct multipath *m) { - return __must_push_back(m); + bool r; + unsigned long flags; + + spin_lock_irqsave(&m->lock, flags); + r = __must_push_back(m); + spin_unlock_irqrestore(&m->lock, flags); + + return r; } /* @@ -647,7 +661,7 @@ static int __multipath_map_bio(struct multipath *m, struct bio *bio, struct dm_m bio->bi_error = 0; bio->bi_bdev = pgpath->path.dev->bdev; - bio->bi_rw |= REQ_FAILFAST_TRANSPORT; + bio->bi_opf |= REQ_FAILFAST_TRANSPORT; if (pgpath->pg->ps.type->start_io) pgpath->pg->ps.type->start_io(&pgpath->pg->ps, @@ -1680,12 +1694,14 @@ static void multipath_postsuspend(struct dm_target *ti) static void multipath_resume(struct dm_target *ti) { struct multipath *m = ti->private; + unsigned long flags; + spin_lock_irqsave(&m->lock, flags); if (test_bit(MPATHF_SAVED_QUEUE_IF_NO_PATH, &m->flags)) set_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags); else clear_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags); - smp_mb__after_atomic(); + spin_unlock_irqrestore(&m->lock, flags); } /* diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c index 84983549b5e1..1b9795d75ef8 100644 --- a/drivers/md/dm-raid.c +++ b/drivers/md/dm-raid.c @@ -1270,7 +1270,7 @@ static int parse_raid_params(struct raid_set *rs, struct dm_arg_set *as, } rs->md.sync_speed_min = (int)value; } else if (!strcasecmp(key, dm_raid_arg_name_by_flag(CTR_FLAG_MAX_RECOVERY_RATE))) { - if (test_and_set_bit(__CTR_FLAG_MIN_RECOVERY_RATE, &rs->ctr_flags)) { + if (test_and_set_bit(__CTR_FLAG_MAX_RECOVERY_RATE, &rs->ctr_flags)) { rs->ti->error = "Only one max_recovery_rate argument pair allowed"; return -EINVAL; } @@ -1960,6 +1960,7 @@ static void super_sync(struct mddev *mddev, struct md_rdev *rdev) sb->data_offset = cpu_to_le64(rdev->data_offset); sb->new_data_offset = cpu_to_le64(rdev->new_data_offset); sb->sectors = cpu_to_le64(rdev->sectors); + sb->incompat_features = cpu_to_le32(0); /* Zero out the rest of the payload after the size of the superblock */ memset(sb + 1, 0, rdev->sb_size - sizeof(*sb)); @@ -3577,7 +3578,6 @@ static int raid_preresume(struct dm_target *ti) /* Be prepared for mddev_resume() in raid_resume() */ set_bit(MD_RECOVERY_FROZEN, &mddev->recovery); if (mddev->recovery_cp && mddev->recovery_cp < MaxSector) { - set_bit(MD_RECOVERY_REQUESTED, &mddev->recovery); set_bit(MD_RECOVERY_SYNC, &mddev->recovery); mddev->resync_min = mddev->recovery_cp; } diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c index dac55b254a09..bdf1606f67bc 100644 --- a/drivers/md/dm-raid1.c +++ b/drivers/md/dm-raid1.c @@ -657,7 +657,7 @@ static void do_write(struct mirror_set *ms, struct bio *bio) struct mirror *m; struct dm_io_request io_req = { .bi_op = REQ_OP_WRITE, - .bi_op_flags = bio->bi_rw & WRITE_FLUSH_FUA, + .bi_op_flags = bio->bi_opf & WRITE_FLUSH_FUA, .mem.type = DM_IO_BIO, .mem.ptr.bio = bio, .notify.fn = write_callback, @@ -704,7 +704,7 @@ static void do_writes(struct mirror_set *ms, struct bio_list *writes) bio_list_init(&requeue); while ((bio = bio_list_pop(writes))) { - if ((bio->bi_rw & REQ_PREFLUSH) || + if ((bio->bi_opf & REQ_PREFLUSH) || (bio_op(bio) == REQ_OP_DISCARD)) { bio_list_add(&sync, bio); continue; @@ -1217,7 +1217,7 @@ static int mirror_map(struct dm_target *ti, struct bio *bio) * If region is not in-sync queue the bio. */ if (!r || (r == -EWOULDBLOCK)) { - if (bio->bi_rw & REQ_RAHEAD) + if (bio->bi_opf & REQ_RAHEAD) return -EWOULDBLOCK; queue_bio(ms, bio, rw); @@ -1253,7 +1253,7 @@ static int mirror_end_io(struct dm_target *ti, struct bio *bio, int error) * We need to dec pending if this was a write. */ if (rw == WRITE) { - if (!(bio->bi_rw & REQ_PREFLUSH) && + if (!(bio->bi_opf & REQ_PREFLUSH) && bio_op(bio) != REQ_OP_DISCARD) dm_rh_dec(ms->rh, bio_record->write_region); return error; @@ -1262,7 +1262,7 @@ static int mirror_end_io(struct dm_target *ti, struct bio *bio, int error) if (error == -EOPNOTSUPP) goto out; - if ((error == -EWOULDBLOCK) && (bio->bi_rw & REQ_RAHEAD)) + if ((error == -EWOULDBLOCK) && (bio->bi_opf & REQ_RAHEAD)) goto out; if (unlikely(error)) { diff --git a/drivers/md/dm-region-hash.c b/drivers/md/dm-region-hash.c index b11813431f31..85c32b22a420 100644 --- a/drivers/md/dm-region-hash.c +++ b/drivers/md/dm-region-hash.c @@ -398,7 +398,7 @@ void dm_rh_mark_nosync(struct dm_region_hash *rh, struct bio *bio) region_t region = dm_rh_bio_to_region(rh, bio); int recovering = 0; - if (bio->bi_rw & REQ_PREFLUSH) { + if (bio->bi_opf & REQ_PREFLUSH) { rh->flush_failure = 1; return; } @@ -526,7 +526,7 @@ void dm_rh_inc_pending(struct dm_region_hash *rh, struct bio_list *bios) struct bio *bio; for (bio = bios->head; bio; bio = bio->bi_next) { - if (bio->bi_rw & REQ_PREFLUSH || bio_op(bio) == REQ_OP_DISCARD) + if (bio->bi_opf & REQ_PREFLUSH || bio_op(bio) == REQ_OP_DISCARD) continue; rh_inc(rh, dm_rh_bio_to_region(rh, bio)); } diff --git a/drivers/md/dm-rq.c b/drivers/md/dm-rq.c index 7a9661868496..1ca7463e8bb2 100644 --- a/drivers/md/dm-rq.c +++ b/drivers/md/dm-rq.c @@ -78,6 +78,7 @@ void dm_start_queue(struct request_queue *q) if (!q->mq_ops) dm_old_start_queue(q); else { + queue_flag_clear_unlocked(QUEUE_FLAG_STOPPED, q); blk_mq_start_stopped_hw_queues(q, true); blk_mq_kick_requeue_list(q); } @@ -101,8 +102,14 @@ void dm_stop_queue(struct request_queue *q) { if (!q->mq_ops) dm_old_stop_queue(q); - else + else { + spin_lock_irq(q->queue_lock); + queue_flag_set(QUEUE_FLAG_STOPPED, q); + spin_unlock_irq(q->queue_lock); + + blk_mq_cancel_requeue_work(q); blk_mq_stop_hw_queues(q); + } } static struct dm_rq_target_io *alloc_old_rq_tio(struct mapped_device *md, @@ -864,6 +871,17 @@ static int dm_mq_queue_rq(struct blk_mq_hw_ctx *hctx, dm_put_live_table(md, srcu_idx); } + /* + * On suspend dm_stop_queue() handles stopping the blk-mq + * request_queue BUT: even though the hw_queues are marked + * BLK_MQ_S_STOPPED at that point there is still a race that + * is allowing block/blk-mq.c to call ->queue_rq against a + * hctx that it really shouldn't. The following check guards + * against this rarity (albeit _not_ race-free). + */ + if (unlikely(test_bit(BLK_MQ_S_STOPPED, &hctx->state))) + return BLK_MQ_RQ_QUEUE_BUSY; + if (ti->type->busy && ti->type->busy(ti)) return BLK_MQ_RQ_QUEUE_BUSY; diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c index ce2a910709f7..c65feeada864 100644 --- a/drivers/md/dm-snap.c +++ b/drivers/md/dm-snap.c @@ -1680,7 +1680,7 @@ static int snapshot_map(struct dm_target *ti, struct bio *bio) init_tracked_chunk(bio); - if (bio->bi_rw & REQ_PREFLUSH) { + if (bio->bi_opf & REQ_PREFLUSH) { bio->bi_bdev = s->cow->bdev; return DM_MAPIO_REMAPPED; } @@ -1800,7 +1800,7 @@ static int snapshot_merge_map(struct dm_target *ti, struct bio *bio) init_tracked_chunk(bio); - if (bio->bi_rw & REQ_PREFLUSH) { + if (bio->bi_opf & REQ_PREFLUSH) { if (!dm_bio_get_target_bio_nr(bio)) bio->bi_bdev = s->origin->bdev; else @@ -2286,7 +2286,7 @@ static int origin_map(struct dm_target *ti, struct bio *bio) bio->bi_bdev = o->dev->bdev; - if (unlikely(bio->bi_rw & REQ_PREFLUSH)) + if (unlikely(bio->bi_opf & REQ_PREFLUSH)) return DM_MAPIO_REMAPPED; if (bio_data_dir(bio) != WRITE) diff --git a/drivers/md/dm-stripe.c b/drivers/md/dm-stripe.c index 83f1d4667195..28193a57bf47 100644 --- a/drivers/md/dm-stripe.c +++ b/drivers/md/dm-stripe.c @@ -286,7 +286,7 @@ static int stripe_map(struct dm_target *ti, struct bio *bio) uint32_t stripe; unsigned target_bio_nr; - if (bio->bi_rw & REQ_PREFLUSH) { + if (bio->bi_opf & REQ_PREFLUSH) { target_bio_nr = dm_bio_get_target_bio_nr(bio); BUG_ON(target_bio_nr >= sc->stripes); bio->bi_bdev = sc->stripe[target_bio_nr].dev->bdev; @@ -383,7 +383,7 @@ static int stripe_end_io(struct dm_target *ti, struct bio *bio, int error) if (!error) return 0; /* I/O complete */ - if ((error == -EWOULDBLOCK) && (bio->bi_rw & REQ_RAHEAD)) + if ((error == -EWOULDBLOCK) && (bio->bi_opf & REQ_RAHEAD)) return error; if (error == -EOPNOTSUPP) diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c index 197ea2003400..d1c05c12a9db 100644 --- a/drivers/md/dm-thin.c +++ b/drivers/md/dm-thin.c @@ -699,7 +699,7 @@ static void remap_to_origin(struct thin_c *tc, struct bio *bio) static int bio_triggers_commit(struct thin_c *tc, struct bio *bio) { - return (bio->bi_rw & (REQ_PREFLUSH | REQ_FUA)) && + return (bio->bi_opf & (REQ_PREFLUSH | REQ_FUA)) && dm_thin_changed_this_transaction(tc->td); } @@ -870,7 +870,7 @@ static void __inc_remap_and_issue_cell(void *context, struct bio *bio; while ((bio = bio_list_pop(&cell->bios))) { - if (bio->bi_rw & (REQ_PREFLUSH | REQ_FUA) || + if (bio->bi_opf & (REQ_PREFLUSH | REQ_FUA) || bio_op(bio) == REQ_OP_DISCARD) bio_list_add(&info->defer_bios, bio); else { @@ -1717,7 +1717,7 @@ static void __remap_and_issue_shared_cell(void *context, while ((bio = bio_list_pop(&cell->bios))) { if ((bio_data_dir(bio) == WRITE) || - (bio->bi_rw & (REQ_PREFLUSH | REQ_FUA) || + (bio->bi_opf & (REQ_PREFLUSH | REQ_FUA) || bio_op(bio) == REQ_OP_DISCARD)) bio_list_add(&info->defer_bios, bio); else { @@ -2635,7 +2635,7 @@ static int thin_bio_map(struct dm_target *ti, struct bio *bio) return DM_MAPIO_SUBMITTED; } - if (bio->bi_rw & (REQ_PREFLUSH | REQ_FUA) || + if (bio->bi_opf & (REQ_PREFLUSH | REQ_FUA) || bio_op(bio) == REQ_OP_DISCARD) { thin_defer_bio_with_throttle(tc, bio); return DM_MAPIO_SUBMITTED; diff --git a/drivers/md/dm-zero.c b/drivers/md/dm-zero.c index 618b8752dcf1..b616f11d8473 100644 --- a/drivers/md/dm-zero.c +++ b/drivers/md/dm-zero.c @@ -37,7 +37,7 @@ static int zero_map(struct dm_target *ti, struct bio *bio) { switch (bio_op(bio)) { case REQ_OP_READ: - if (bio->bi_rw & REQ_RAHEAD) { + if (bio->bi_opf & REQ_RAHEAD) { /* readahead of null bytes only wastes buffer cache */ return -EIO; } diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 25d1d97154a8..fa9b1cb4438a 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -798,12 +798,12 @@ static void dec_pending(struct dm_io *io, int error) if (io_error == DM_ENDIO_REQUEUE) return; - if ((bio->bi_rw & REQ_PREFLUSH) && bio->bi_iter.bi_size) { + if ((bio->bi_opf & REQ_PREFLUSH) && bio->bi_iter.bi_size) { /* * Preflush done for flush with data, reissue * without REQ_PREFLUSH. */ - bio->bi_rw &= ~REQ_PREFLUSH; + bio->bi_opf &= ~REQ_PREFLUSH; queue_io(md, bio); } else { /* done with normal IO or empty flush */ @@ -964,7 +964,7 @@ void dm_accept_partial_bio(struct bio *bio, unsigned n_sectors) { struct dm_target_io *tio = container_of(bio, struct dm_target_io, clone); unsigned bi_size = bio->bi_iter.bi_size >> SECTOR_SHIFT; - BUG_ON(bio->bi_rw & REQ_PREFLUSH); + BUG_ON(bio->bi_opf & REQ_PREFLUSH); BUG_ON(bi_size > *tio->len_ptr); BUG_ON(n_sectors > bi_size); *tio->len_ptr -= bi_size - n_sectors; @@ -1252,7 +1252,7 @@ static void __split_and_process_bio(struct mapped_device *md, start_io_acct(ci.io); - if (bio->bi_rw & REQ_PREFLUSH) { + if (bio->bi_opf & REQ_PREFLUSH) { ci.bio = &ci.md->flush_bio; ci.sector_count = 0; error = __send_empty_flush(&ci); @@ -1290,7 +1290,7 @@ static blk_qc_t dm_make_request(struct request_queue *q, struct bio *bio) if (unlikely(test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags))) { dm_put_live_table(md, srcu_idx); - if (!(bio->bi_rw & REQ_RAHEAD)) + if (!(bio->bi_opf & REQ_RAHEAD)) queue_io(md, bio); else bio_io_error(bio); @@ -2082,7 +2082,8 @@ static void unlock_fs(struct mapped_device *md) * Caller must hold md->suspend_lock */ static int __dm_suspend(struct mapped_device *md, struct dm_table *map, - unsigned suspend_flags, int interruptible) + unsigned suspend_flags, int interruptible, + int dmf_suspended_flag) { bool do_lockfs = suspend_flags & DM_SUSPEND_LOCKFS_FLAG; bool noflush = suspend_flags & DM_SUSPEND_NOFLUSH_FLAG; @@ -2149,6 +2150,8 @@ static int __dm_suspend(struct mapped_device *md, struct dm_table *map, * to finish. */ r = dm_wait_for_completion(md, interruptible); + if (!r) + set_bit(dmf_suspended_flag, &md->flags); if (noflush) clear_bit(DMF_NOFLUSH_SUSPENDING, &md->flags); @@ -2210,12 +2213,10 @@ retry: map = rcu_dereference_protected(md->map, lockdep_is_held(&md->suspend_lock)); - r = __dm_suspend(md, map, suspend_flags, TASK_INTERRUPTIBLE); + r = __dm_suspend(md, map, suspend_flags, TASK_INTERRUPTIBLE, DMF_SUSPENDED); if (r) goto out_unlock; - set_bit(DMF_SUSPENDED, &md->flags); - dm_table_postsuspend_targets(map); out_unlock: @@ -2309,9 +2310,8 @@ static void __dm_internal_suspend(struct mapped_device *md, unsigned suspend_fla * would require changing .presuspend to return an error -- avoid this * until there is a need for more elaborate variants of internal suspend. */ - (void) __dm_suspend(md, map, suspend_flags, TASK_UNINTERRUPTIBLE); - - set_bit(DMF_SUSPENDED_INTERNALLY, &md->flags); + (void) __dm_suspend(md, map, suspend_flags, TASK_UNINTERRUPTIBLE, + DMF_SUSPENDED_INTERNALLY); dm_table_postsuspend_targets(map); } diff --git a/drivers/md/linear.c b/drivers/md/linear.c index 70ff888d25d0..86f5d435901d 100644 --- a/drivers/md/linear.c +++ b/drivers/md/linear.c @@ -221,7 +221,7 @@ static void linear_make_request(struct mddev *mddev, struct bio *bio) struct bio *split; sector_t start_sector, end_sector, data_offset; - if (unlikely(bio->bi_rw & REQ_PREFLUSH)) { + if (unlikely(bio->bi_opf & REQ_PREFLUSH)) { md_flush_request(mddev, bio); return; } diff --git a/drivers/md/md.c b/drivers/md/md.c index 2c3ab6f5e6be..d646f6e444f0 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -285,7 +285,7 @@ static blk_qc_t md_make_request(struct request_queue *q, struct bio *bio) */ sectors = bio_sectors(bio); /* bio could be mergeable after passing to underlayer */ - bio->bi_rw &= ~REQ_NOMERGE; + bio->bi_opf &= ~REQ_NOMERGE; mddev->pers->make_request(mddev, bio); cpu = part_stat_lock(); @@ -414,7 +414,7 @@ static void md_submit_flush_data(struct work_struct *ws) /* an empty barrier - all done */ bio_endio(bio); else { - bio->bi_rw &= ~REQ_PREFLUSH; + bio->bi_opf &= ~REQ_PREFLUSH; mddev->pers->make_request(mddev, bio); } diff --git a/drivers/md/multipath.c b/drivers/md/multipath.c index 4974682842ae..673efbd6fc47 100644 --- a/drivers/md/multipath.c +++ b/drivers/md/multipath.c @@ -91,7 +91,7 @@ static void multipath_end_request(struct bio *bio) if (!bio->bi_error) multipath_end_bh_io(mp_bh, 0); - else if (!(bio->bi_rw & REQ_RAHEAD)) { + else if (!(bio->bi_opf & REQ_RAHEAD)) { /* * oops, IO error: */ @@ -112,7 +112,7 @@ static void multipath_make_request(struct mddev *mddev, struct bio * bio) struct multipath_bh * mp_bh; struct multipath_info *multipath; - if (unlikely(bio->bi_rw & REQ_PREFLUSH)) { + if (unlikely(bio->bi_opf & REQ_PREFLUSH)) { md_flush_request(mddev, bio); return; } @@ -135,7 +135,7 @@ static void multipath_make_request(struct mddev *mddev, struct bio * bio) mp_bh->bio.bi_iter.bi_sector += multipath->rdev->data_offset; mp_bh->bio.bi_bdev = multipath->rdev->bdev; - mp_bh->bio.bi_rw |= REQ_FAILFAST_TRANSPORT; + mp_bh->bio.bi_opf |= REQ_FAILFAST_TRANSPORT; mp_bh->bio.bi_end_io = multipath_end_request; mp_bh->bio.bi_private = mp_bh; generic_make_request(&mp_bh->bio); @@ -360,7 +360,7 @@ static void multipathd(struct md_thread *thread) bio->bi_iter.bi_sector += conf->multipaths[mp_bh->path].rdev->data_offset; bio->bi_bdev = conf->multipaths[mp_bh->path].rdev->bdev; - bio->bi_rw |= REQ_FAILFAST_TRANSPORT; + bio->bi_opf |= REQ_FAILFAST_TRANSPORT; bio->bi_end_io = multipath_end_request; bio->bi_private = mp_bh; generic_make_request(bio); diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c index c3d439083212..258986a2699d 100644 --- a/drivers/md/raid0.c +++ b/drivers/md/raid0.c @@ -458,7 +458,7 @@ static void raid0_make_request(struct mddev *mddev, struct bio *bio) struct md_rdev *tmp_dev; struct bio *split; - if (unlikely(bio->bi_rw & REQ_PREFLUSH)) { + if (unlikely(bio->bi_opf & REQ_PREFLUSH)) { md_flush_request(mddev, bio); return; } diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 46168ef2e279..21dc00eb1989 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -1043,8 +1043,8 @@ static void raid1_make_request(struct mddev *mddev, struct bio * bio) unsigned long flags; const int op = bio_op(bio); const int rw = bio_data_dir(bio); - const unsigned long do_sync = (bio->bi_rw & REQ_SYNC); - const unsigned long do_flush_fua = (bio->bi_rw & + const unsigned long do_sync = (bio->bi_opf & REQ_SYNC); + const unsigned long do_flush_fua = (bio->bi_opf & (REQ_PREFLUSH | REQ_FUA)); struct md_rdev *blocked_rdev; struct blk_plug_cb *cb; @@ -2318,7 +2318,7 @@ read_more: raid_end_bio_io(r1_bio); } else { const unsigned long do_sync - = r1_bio->master_bio->bi_rw & REQ_SYNC; + = r1_bio->master_bio->bi_opf & REQ_SYNC; if (bio) { r1_bio->bios[r1_bio->read_disk] = mddev->ro ? IO_BLOCKED : NULL; diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index ed29fc899f06..0e4efcd10795 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -1054,8 +1054,8 @@ static void __make_request(struct mddev *mddev, struct bio *bio) int i; const int op = bio_op(bio); const int rw = bio_data_dir(bio); - const unsigned long do_sync = (bio->bi_rw & REQ_SYNC); - const unsigned long do_fua = (bio->bi_rw & REQ_FUA); + const unsigned long do_sync = (bio->bi_opf & REQ_SYNC); + const unsigned long do_fua = (bio->bi_opf & REQ_FUA); unsigned long flags; struct md_rdev *blocked_rdev; struct blk_plug_cb *cb; @@ -1440,7 +1440,7 @@ static void raid10_make_request(struct mddev *mddev, struct bio *bio) struct bio *split; - if (unlikely(bio->bi_rw & REQ_PREFLUSH)) { + if (unlikely(bio->bi_opf & REQ_PREFLUSH)) { md_flush_request(mddev, bio); return; } @@ -2533,7 +2533,7 @@ read_more: return; } - do_sync = (r10_bio->master_bio->bi_rw & REQ_SYNC); + do_sync = (r10_bio->master_bio->bi_opf & REQ_SYNC); slot = r10_bio->read_slot; printk_ratelimited( KERN_ERR diff --git a/drivers/md/raid5-cache.c b/drivers/md/raid5-cache.c index 5504ce2bac06..51f76ddbe265 100644 --- a/drivers/md/raid5-cache.c +++ b/drivers/md/raid5-cache.c @@ -536,7 +536,7 @@ int r5l_handle_flush_request(struct r5l_log *log, struct bio *bio) bio_endio(bio); return 0; } - bio->bi_rw &= ~REQ_PREFLUSH; + bio->bi_opf &= ~REQ_PREFLUSH; return -EAGAIN; } diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index d189e894b921..8912407a4dd0 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -806,7 +806,7 @@ static void stripe_add_to_batch_list(struct r5conf *conf, struct stripe_head *sh dd_idx = 0; while (dd_idx == sh->pd_idx || dd_idx == sh->qd_idx) dd_idx++; - if (head->dev[dd_idx].towrite->bi_rw != sh->dev[dd_idx].towrite->bi_rw || + if (head->dev[dd_idx].towrite->bi_opf != sh->dev[dd_idx].towrite->bi_opf || bio_op(head->dev[dd_idx].towrite) != bio_op(sh->dev[dd_idx].towrite)) goto unlock_out; @@ -1003,7 +1003,7 @@ again: pr_debug("%s: for %llu schedule op %d on disc %d\n", __func__, (unsigned long long)sh->sector, - bi->bi_rw, i); + bi->bi_opf, i); atomic_inc(&sh->count); if (sh != head_sh) atomic_inc(&head_sh->count); @@ -1014,7 +1014,7 @@ again: bi->bi_iter.bi_sector = (sh->sector + rdev->data_offset); if (test_bit(R5_ReadNoMerge, &head_sh->dev[i].flags)) - bi->bi_rw |= REQ_NOMERGE; + bi->bi_opf |= REQ_NOMERGE; if (test_bit(R5_SkipCopy, &sh->dev[i].flags)) WARN_ON(test_bit(R5_UPTODATE, &sh->dev[i].flags)); @@ -1055,7 +1055,7 @@ again: pr_debug("%s: for %llu schedule op %d on " "replacement disc %d\n", __func__, (unsigned long long)sh->sector, - rbi->bi_rw, i); + rbi->bi_opf, i); atomic_inc(&sh->count); if (sh != head_sh) atomic_inc(&head_sh->count); @@ -1088,7 +1088,7 @@ again: if (op_is_write(op)) set_bit(STRIPE_DEGRADED, &sh->state); pr_debug("skip op %d on disc %d for sector %llu\n", - bi->bi_rw, i, (unsigned long long)sh->sector); + bi->bi_opf, i, (unsigned long long)sh->sector); clear_bit(R5_LOCKED, &sh->dev[i].flags); set_bit(STRIPE_HANDLE, &sh->state); } @@ -1619,9 +1619,9 @@ again: while (wbi && wbi->bi_iter.bi_sector < dev->sector + STRIPE_SECTORS) { - if (wbi->bi_rw & REQ_FUA) + if (wbi->bi_opf & REQ_FUA) set_bit(R5_WantFUA, &dev->flags); - if (wbi->bi_rw & REQ_SYNC) + if (wbi->bi_opf & REQ_SYNC) set_bit(R5_SyncIO, &dev->flags); if (bio_op(wbi) == REQ_OP_DISCARD) set_bit(R5_Discard, &dev->flags); @@ -5154,7 +5154,7 @@ static void raid5_make_request(struct mddev *mddev, struct bio * bi) DEFINE_WAIT(w); bool do_prepare; - if (unlikely(bi->bi_rw & REQ_PREFLUSH)) { + if (unlikely(bi->bi_opf & REQ_PREFLUSH)) { int ret = r5l_handle_flush_request(conf->log, bi); if (ret == 0) @@ -5237,7 +5237,7 @@ static void raid5_make_request(struct mddev *mddev, struct bio * bi) (unsigned long long)logical_sector); sh = raid5_get_active_stripe(conf, new_sector, previous, - (bi->bi_rw & REQ_RAHEAD), 0); + (bi->bi_opf & REQ_RAHEAD), 0); if (sh) { if (unlikely(previous)) { /* expansion might have moved on while waiting for a @@ -5305,7 +5305,7 @@ static void raid5_make_request(struct mddev *mddev, struct bio * bi) set_bit(STRIPE_HANDLE, &sh->state); clear_bit(STRIPE_DELAYED, &sh->state); if ((!sh->batch_head || sh == sh->batch_head) && - (bi->bi_rw & REQ_SYNC) && + (bi->bi_opf & REQ_SYNC) && !test_and_set_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) atomic_inc(&conf->preread_active_stripes); release_stripe_plug(mddev, sh); diff --git a/drivers/media/dvb-frontends/cxd2841er.c b/drivers/media/dvb-frontends/cxd2841er.c index 09c39346167f..ffe88bc6b813 100644 --- a/drivers/media/dvb-frontends/cxd2841er.c +++ b/drivers/media/dvb-frontends/cxd2841er.c @@ -3378,20 +3378,28 @@ static int cxd2841er_tune_tc(struct dvb_frontend *fe, ret = cxd2841er_get_carrier_offset_i( priv, p->bandwidth_hz, &carrier_offset); + if (ret) + return ret; break; case SYS_DVBT: ret = cxd2841er_get_carrier_offset_t( priv, p->bandwidth_hz, &carrier_offset); + if (ret) + return ret; break; case SYS_DVBT2: ret = cxd2841er_get_carrier_offset_t2( priv, p->bandwidth_hz, &carrier_offset); + if (ret) + return ret; break; case SYS_DVBC_ANNEX_A: ret = cxd2841er_get_carrier_offset_c( priv, &carrier_offset); + if (ret) + return ret; break; default: dev_dbg(&priv->i2c->dev, @@ -3399,8 +3407,6 @@ static int cxd2841er_tune_tc(struct dvb_frontend *fe, __func__, priv->system); return -EINVAL; } - if (ret) - return ret; dev_dbg(&priv->i2c->dev, "%s(): carrier offset %d\n", __func__, carrier_offset); p->frequency += carrier_offset; diff --git a/drivers/media/i2c/adv7180.c b/drivers/media/i2c/adv7180.c index b77b0a4dbf68..95cbc857f36e 100644 --- a/drivers/media/i2c/adv7180.c +++ b/drivers/media/i2c/adv7180.c @@ -100,7 +100,7 @@ #define ADV7180_REG_IDENT 0x0011 #define ADV7180_ID_7180 0x18 -#define ADV7180_REG_ICONF1 0x0040 +#define ADV7180_REG_ICONF1 0x2040 #define ADV7180_ICONF1_ACTIVE_LOW 0x01 #define ADV7180_ICONF1_PSYNC_ONLY 0x10 #define ADV7180_ICONF1_ACTIVE_TO_CLR 0xC0 @@ -113,15 +113,15 @@ #define ADV7180_IRQ1_LOCK 0x01 #define ADV7180_IRQ1_UNLOCK 0x02 -#define ADV7180_REG_ISR1 0x0042 -#define ADV7180_REG_ICR1 0x0043 -#define ADV7180_REG_IMR1 0x0044 -#define ADV7180_REG_IMR2 0x0048 +#define ADV7180_REG_ISR1 0x2042 +#define ADV7180_REG_ICR1 0x2043 +#define ADV7180_REG_IMR1 0x2044 +#define ADV7180_REG_IMR2 0x2048 #define ADV7180_IRQ3_AD_CHANGE 0x08 -#define ADV7180_REG_ISR3 0x004A -#define ADV7180_REG_ICR3 0x004B -#define ADV7180_REG_IMR3 0x004C -#define ADV7180_REG_IMR4 0x50 +#define ADV7180_REG_ISR3 0x204A +#define ADV7180_REG_ICR3 0x204B +#define ADV7180_REG_IMR3 0x204C +#define ADV7180_REG_IMR4 0x2050 #define ADV7180_REG_NTSC_V_BIT_END 0x00E6 #define ADV7180_NTSC_V_BIT_END_MANUAL_NVEND 0x4F diff --git a/drivers/media/i2c/adv7511.c b/drivers/media/i2c/adv7511.c index 6d7cad54a65d..53030d631653 100644 --- a/drivers/media/i2c/adv7511.c +++ b/drivers/media/i2c/adv7511.c @@ -1041,6 +1041,8 @@ static int adv7511_s_dv_timings(struct v4l2_subdev *sd, struct v4l2_dv_timings *timings) { struct adv7511_state *state = get_adv7511_state(sd); + struct v4l2_bt_timings *bt = &timings->bt; + u32 fps; v4l2_dbg(1, debug, sd, "%s:\n", __func__); @@ -1052,15 +1054,29 @@ static int adv7511_s_dv_timings(struct v4l2_subdev *sd, if the format is one of the CEA or DMT timings. */ v4l2_find_dv_timings_cap(timings, &adv7511_timings_cap, 0, NULL, NULL); - timings->bt.flags &= ~V4L2_DV_FL_REDUCED_FPS; - /* save timings */ state->dv_timings = *timings; /* set h/vsync polarities */ adv7511_wr_and_or(sd, 0x17, 0x9f, - ((timings->bt.polarities & V4L2_DV_VSYNC_POS_POL) ? 0 : 0x40) | - ((timings->bt.polarities & V4L2_DV_HSYNC_POS_POL) ? 0 : 0x20)); + ((bt->polarities & V4L2_DV_VSYNC_POS_POL) ? 0 : 0x40) | + ((bt->polarities & V4L2_DV_HSYNC_POS_POL) ? 0 : 0x20)); + + fps = (u32)bt->pixelclock / (V4L2_DV_BT_FRAME_WIDTH(bt) * V4L2_DV_BT_FRAME_HEIGHT(bt)); + switch (fps) { + case 24: + adv7511_wr_and_or(sd, 0xfb, 0xf9, 1 << 1); + break; + case 25: + adv7511_wr_and_or(sd, 0xfb, 0xf9, 2 << 1); + break; + case 30: + adv7511_wr_and_or(sd, 0xfb, 0xf9, 3 << 1); + break; + default: + adv7511_wr_and_or(sd, 0xfb, 0xf9, 0); + break; + } /* update quantization range based on new dv_timings */ adv7511_set_rgb_quantization_mode(sd, state->rgb_quantization_range_ctrl); diff --git a/drivers/media/platform/mtk-vcodec/mtk_vcodec_enc_drv.c b/drivers/media/platform/mtk-vcodec/mtk_vcodec_enc_drv.c index e277b7c23516..c7806ecda2dd 100644 --- a/drivers/media/platform/mtk-vcodec/mtk_vcodec_enc_drv.c +++ b/drivers/media/platform/mtk-vcodec/mtk_vcodec_enc_drv.c @@ -246,7 +246,6 @@ static int mtk_vcodec_probe(struct platform_device *pdev) struct video_device *vfd_enc; struct resource *res; int i, j, ret; - DEFINE_DMA_ATTRS(attrs); dev = devm_kzalloc(&pdev->dev, sizeof(*dev), GFP_KERNEL); if (!dev) @@ -378,9 +377,6 @@ static int mtk_vcodec_probe(struct platform_device *pdev) goto err_enc_reg; } - /* Avoid the iommu eat big hunks */ - dma_set_attr(DMA_ATTR_ALLOC_SINGLE_PAGES, &attrs); - mtk_v4l2_debug(0, "encoder registered as /dev/video%d", vfd_enc->num); diff --git a/drivers/media/platform/sti/bdisp/bdisp-hw.c b/drivers/media/platform/sti/bdisp/bdisp-hw.c index 3df66d11c795..b7892f3efd98 100644 --- a/drivers/media/platform/sti/bdisp/bdisp-hw.c +++ b/drivers/media/platform/sti/bdisp/bdisp-hw.c @@ -430,14 +430,11 @@ int bdisp_hw_get_and_clear_irq(struct bdisp_dev *bdisp) */ void bdisp_hw_free_nodes(struct bdisp_ctx *ctx) { - if (ctx && ctx->node[0]) { - DEFINE_DMA_ATTRS(attrs); - - dma_set_attr(DMA_ATTR_WRITE_COMBINE, &attrs); + if (ctx && ctx->node[0]) dma_free_attrs(ctx->bdisp_dev->dev, sizeof(struct bdisp_node) * MAX_NB_NODE, - ctx->node[0], ctx->node_paddr[0], &attrs); - } + ctx->node[0], ctx->node_paddr[0], + DMA_ATTR_WRITE_COMBINE); } /** @@ -455,12 +452,10 @@ int bdisp_hw_alloc_nodes(struct bdisp_ctx *ctx) unsigned int i, node_size = sizeof(struct bdisp_node); void *base; dma_addr_t paddr; - DEFINE_DMA_ATTRS(attrs); /* Allocate all the nodes within a single memory page */ - dma_set_attr(DMA_ATTR_WRITE_COMBINE, &attrs); base = dma_alloc_attrs(dev, node_size * MAX_NB_NODE, &paddr, - GFP_KERNEL | GFP_DMA, &attrs); + GFP_KERNEL | GFP_DMA, DMA_ATTR_WRITE_COMBINE); if (!base) { dev_err(dev, "%s no mem\n", __func__); return -ENOMEM; @@ -493,13 +488,9 @@ void bdisp_hw_free_filters(struct device *dev) { int size = (BDISP_HF_NB * NB_H_FILTER) + (BDISP_VF_NB * NB_V_FILTER); - if (bdisp_h_filter[0].virt) { - DEFINE_DMA_ATTRS(attrs); - - dma_set_attr(DMA_ATTR_WRITE_COMBINE, &attrs); + if (bdisp_h_filter[0].virt) dma_free_attrs(dev, size, bdisp_h_filter[0].virt, - bdisp_h_filter[0].paddr, &attrs); - } + bdisp_h_filter[0].paddr, DMA_ATTR_WRITE_COMBINE); } /** @@ -516,12 +507,11 @@ int bdisp_hw_alloc_filters(struct device *dev) unsigned int i, size; void *base; dma_addr_t paddr; - DEFINE_DMA_ATTRS(attrs); /* Allocate all the filters within a single memory page */ size = (BDISP_HF_NB * NB_H_FILTER) + (BDISP_VF_NB * NB_V_FILTER); - dma_set_attr(DMA_ATTR_WRITE_COMBINE, &attrs); - base = dma_alloc_attrs(dev, size, &paddr, GFP_KERNEL | GFP_DMA, &attrs); + base = dma_alloc_attrs(dev, size, &paddr, GFP_KERNEL | GFP_DMA, + DMA_ATTR_WRITE_COMBINE); if (!base) return -ENOMEM; diff --git a/drivers/media/platform/vim2m.c b/drivers/media/platform/vim2m.c index 6b17015048ae..cd0ff4a66fdc 100644 --- a/drivers/media/platform/vim2m.c +++ b/drivers/media/platform/vim2m.c @@ -171,6 +171,9 @@ struct vim2m_ctx { int mode; enum v4l2_colorspace colorspace; + enum v4l2_ycbcr_encoding ycbcr_enc; + enum v4l2_xfer_func xfer_func; + enum v4l2_quantization quant; /* Source and destination queue data */ struct vim2m_q_data q_data[2]; @@ -493,6 +496,9 @@ static int vidioc_g_fmt(struct vim2m_ctx *ctx, struct v4l2_format *f) f->fmt.pix.bytesperline = (q_data->width * q_data->fmt->depth) >> 3; f->fmt.pix.sizeimage = q_data->sizeimage; f->fmt.pix.colorspace = ctx->colorspace; + f->fmt.pix.xfer_func = ctx->xfer_func; + f->fmt.pix.ycbcr_enc = ctx->ycbcr_enc; + f->fmt.pix.quantization = ctx->quant; return 0; } @@ -549,6 +555,9 @@ static int vidioc_try_fmt_vid_cap(struct file *file, void *priv, return -EINVAL; } f->fmt.pix.colorspace = ctx->colorspace; + f->fmt.pix.xfer_func = ctx->xfer_func; + f->fmt.pix.ycbcr_enc = ctx->ycbcr_enc; + f->fmt.pix.quantization = ctx->quant; return vidioc_try_fmt(f, fmt); } @@ -630,8 +639,12 @@ static int vidioc_s_fmt_vid_out(struct file *file, void *priv, return ret; ret = vidioc_s_fmt(file2ctx(file), f); - if (!ret) + if (!ret) { ctx->colorspace = f->fmt.pix.colorspace; + ctx->xfer_func = f->fmt.pix.xfer_func; + ctx->ycbcr_enc = f->fmt.pix.ycbcr_enc; + ctx->quant = f->fmt.pix.quantization; + } return ret; } diff --git a/drivers/media/platform/vivid/vivid-cec.c b/drivers/media/platform/vivid/vivid-cec.c index 66aa7292076b..f9f878b8e0a7 100644 --- a/drivers/media/platform/vivid/vivid-cec.c +++ b/drivers/media/platform/vivid/vivid-cec.c @@ -169,7 +169,6 @@ static int vivid_received(struct cec_adapter *adap, struct cec_msg *msg) struct vivid_dev *dev = adap->priv; struct cec_msg reply; u8 dest = cec_msg_destination(msg); - u16 pa; u8 disp_ctl; char osd[14]; @@ -178,15 +177,6 @@ static int vivid_received(struct cec_adapter *adap, struct cec_msg *msg) cec_msg_init(&reply, dest, cec_msg_initiator(msg)); switch (cec_msg_opcode(msg)) { - case CEC_MSG_SET_STREAM_PATH: - if (cec_is_sink(adap)) - return -ENOMSG; - cec_ops_set_stream_path(msg, &pa); - if (pa != adap->phys_addr) - return -ENOMSG; - cec_msg_active_source(&reply, adap->phys_addr); - cec_transmit_msg(adap, &reply, false); - break; case CEC_MSG_SET_OSD_STRING: if (!cec_is_sink(adap)) return -ENOMSG; diff --git a/drivers/media/v4l2-core/videobuf2-dma-contig.c b/drivers/media/v4l2-core/videobuf2-dma-contig.c index 863f658a3fa1..59fa204b15f3 100644 --- a/drivers/media/v4l2-core/videobuf2-dma-contig.c +++ b/drivers/media/v4l2-core/videobuf2-dma-contig.c @@ -27,7 +27,7 @@ struct vb2_dc_buf { unsigned long size; void *cookie; dma_addr_t dma_addr; - struct dma_attrs attrs; + unsigned long attrs; enum dma_data_direction dma_dir; struct sg_table *dma_sgt; struct frame_vector *vec; @@ -130,12 +130,12 @@ static void vb2_dc_put(void *buf_priv) kfree(buf->sgt_base); } dma_free_attrs(buf->dev, buf->size, buf->cookie, buf->dma_addr, - &buf->attrs); + buf->attrs); put_device(buf->dev); kfree(buf); } -static void *vb2_dc_alloc(struct device *dev, const struct dma_attrs *attrs, +static void *vb2_dc_alloc(struct device *dev, unsigned long attrs, unsigned long size, enum dma_data_direction dma_dir, gfp_t gfp_flags) { @@ -146,16 +146,16 @@ static void *vb2_dc_alloc(struct device *dev, const struct dma_attrs *attrs, return ERR_PTR(-ENOMEM); if (attrs) - buf->attrs = *attrs; + buf->attrs = attrs; buf->cookie = dma_alloc_attrs(dev, size, &buf->dma_addr, - GFP_KERNEL | gfp_flags, &buf->attrs); + GFP_KERNEL | gfp_flags, buf->attrs); if (!buf->cookie) { dev_err(dev, "dma_alloc_coherent of size %ld failed\n", size); kfree(buf); return ERR_PTR(-ENOMEM); } - if (!dma_get_attr(DMA_ATTR_NO_KERNEL_MAPPING, &buf->attrs)) + if ((buf->attrs & DMA_ATTR_NO_KERNEL_MAPPING) == 0) buf->vaddr = buf->cookie; /* Prevent the device from being released while the buffer is used */ @@ -189,7 +189,7 @@ static int vb2_dc_mmap(void *buf_priv, struct vm_area_struct *vma) vma->vm_pgoff = 0; ret = dma_mmap_attrs(buf->dev, vma, buf->cookie, - buf->dma_addr, buf->size, &buf->attrs); + buf->dma_addr, buf->size, buf->attrs); if (ret) { pr_err("Remapping memory failed, error: %d\n", ret); @@ -372,7 +372,7 @@ static struct sg_table *vb2_dc_get_base_sgt(struct vb2_dc_buf *buf) } ret = dma_get_sgtable_attrs(buf->dev, sgt, buf->cookie, buf->dma_addr, - buf->size, &buf->attrs); + buf->size, buf->attrs); if (ret < 0) { dev_err(buf->dev, "failed to get scatterlist from DMA API\n"); kfree(sgt); @@ -421,15 +421,12 @@ static void vb2_dc_put_userptr(void *buf_priv) struct page **pages; if (sgt) { - DEFINE_DMA_ATTRS(attrs); - - dma_set_attr(DMA_ATTR_SKIP_CPU_SYNC, &attrs); /* * No need to sync to CPU, it's already synced to the CPU * since the finish() memop will have been called before this. */ dma_unmap_sg_attrs(buf->dev, sgt->sgl, sgt->orig_nents, - buf->dma_dir, &attrs); + buf->dma_dir, DMA_ATTR_SKIP_CPU_SYNC); pages = frame_vector_pages(buf->vec); /* sgt should exist only if vector contains pages... */ BUG_ON(IS_ERR(pages)); @@ -484,9 +481,6 @@ static void *vb2_dc_get_userptr(struct device *dev, unsigned long vaddr, struct sg_table *sgt; unsigned long contig_size; unsigned long dma_align = dma_get_cache_alignment(); - DEFINE_DMA_ATTRS(attrs); - - dma_set_attr(DMA_ATTR_SKIP_CPU_SYNC, &attrs); /* Only cache aligned DMA transfers are reliable */ if (!IS_ALIGNED(vaddr | size, dma_align)) { @@ -548,7 +542,7 @@ static void *vb2_dc_get_userptr(struct device *dev, unsigned long vaddr, * prepare() memop is called. */ sgt->nents = dma_map_sg_attrs(buf->dev, sgt->sgl, sgt->orig_nents, - buf->dma_dir, &attrs); + buf->dma_dir, DMA_ATTR_SKIP_CPU_SYNC); if (sgt->nents <= 0) { pr_err("failed to map scatterlist\n"); ret = -EIO; @@ -572,7 +566,7 @@ out: fail_map_sg: dma_unmap_sg_attrs(buf->dev, sgt->sgl, sgt->orig_nents, - buf->dma_dir, &attrs); + buf->dma_dir, DMA_ATTR_SKIP_CPU_SYNC); fail_sgt_init: sg_free_table(sgt); @@ -749,7 +743,7 @@ EXPORT_SYMBOL_GPL(vb2_dma_contig_memops); int vb2_dma_contig_set_max_seg_size(struct device *dev, unsigned int size) { if (!dev->dma_parms) { - dev->dma_parms = kzalloc(sizeof(dev->dma_parms), GFP_KERNEL); + dev->dma_parms = kzalloc(sizeof(*dev->dma_parms), GFP_KERNEL); if (!dev->dma_parms) return -ENOMEM; } diff --git a/drivers/media/v4l2-core/videobuf2-dma-sg.c b/drivers/media/v4l2-core/videobuf2-dma-sg.c index a39db8a6db7a..bd82d709ee82 100644 --- a/drivers/media/v4l2-core/videobuf2-dma-sg.c +++ b/drivers/media/v4l2-core/videobuf2-dma-sg.c @@ -95,7 +95,7 @@ static int vb2_dma_sg_alloc_compacted(struct vb2_dma_sg_buf *buf, return 0; } -static void *vb2_dma_sg_alloc(struct device *dev, const struct dma_attrs *dma_attrs, +static void *vb2_dma_sg_alloc(struct device *dev, unsigned long dma_attrs, unsigned long size, enum dma_data_direction dma_dir, gfp_t gfp_flags) { @@ -103,9 +103,6 @@ static void *vb2_dma_sg_alloc(struct device *dev, const struct dma_attrs *dma_at struct sg_table *sgt; int ret; int num_pages; - DEFINE_DMA_ATTRS(attrs); - - dma_set_attr(DMA_ATTR_SKIP_CPU_SYNC, &attrs); if (WARN_ON(dev == NULL)) return NULL; @@ -144,7 +141,7 @@ static void *vb2_dma_sg_alloc(struct device *dev, const struct dma_attrs *dma_at * prepare() memop is called. */ sgt->nents = dma_map_sg_attrs(buf->dev, sgt->sgl, sgt->orig_nents, - buf->dma_dir, &attrs); + buf->dma_dir, DMA_ATTR_SKIP_CPU_SYNC); if (!sgt->nents) goto fail_map; @@ -179,13 +176,10 @@ static void vb2_dma_sg_put(void *buf_priv) int i = buf->num_pages; if (atomic_dec_and_test(&buf->refcount)) { - DEFINE_DMA_ATTRS(attrs); - - dma_set_attr(DMA_ATTR_SKIP_CPU_SYNC, &attrs); dprintk(1, "%s: Freeing buffer of %d pages\n", __func__, buf->num_pages); dma_unmap_sg_attrs(buf->dev, sgt->sgl, sgt->orig_nents, - buf->dma_dir, &attrs); + buf->dma_dir, DMA_ATTR_SKIP_CPU_SYNC); if (buf->vaddr) vm_unmap_ram(buf->vaddr, buf->num_pages); sg_free_table(buf->dma_sgt); @@ -228,10 +222,8 @@ static void *vb2_dma_sg_get_userptr(struct device *dev, unsigned long vaddr, { struct vb2_dma_sg_buf *buf; struct sg_table *sgt; - DEFINE_DMA_ATTRS(attrs); struct frame_vector *vec; - dma_set_attr(DMA_ATTR_SKIP_CPU_SYNC, &attrs); buf = kzalloc(sizeof *buf, GFP_KERNEL); if (!buf) return NULL; @@ -262,7 +254,7 @@ static void *vb2_dma_sg_get_userptr(struct device *dev, unsigned long vaddr, * prepare() memop is called. */ sgt->nents = dma_map_sg_attrs(buf->dev, sgt->sgl, sgt->orig_nents, - buf->dma_dir, &attrs); + buf->dma_dir, DMA_ATTR_SKIP_CPU_SYNC); if (!sgt->nents) goto userptr_fail_map; @@ -286,14 +278,11 @@ static void vb2_dma_sg_put_userptr(void *buf_priv) struct vb2_dma_sg_buf *buf = buf_priv; struct sg_table *sgt = &buf->sg_table; int i = buf->num_pages; - DEFINE_DMA_ATTRS(attrs); - - dma_set_attr(DMA_ATTR_SKIP_CPU_SYNC, &attrs); dprintk(1, "%s: Releasing userspace buffer of %d pages\n", __func__, buf->num_pages); dma_unmap_sg_attrs(buf->dev, sgt->sgl, sgt->orig_nents, buf->dma_dir, - &attrs); + DMA_ATTR_SKIP_CPU_SYNC); if (buf->vaddr) vm_unmap_ram(buf->vaddr, buf->num_pages); sg_free_table(buf->dma_sgt); diff --git a/drivers/media/v4l2-core/videobuf2-vmalloc.c b/drivers/media/v4l2-core/videobuf2-vmalloc.c index 7e8a07ed8d82..c2820a6e164d 100644 --- a/drivers/media/v4l2-core/videobuf2-vmalloc.c +++ b/drivers/media/v4l2-core/videobuf2-vmalloc.c @@ -33,7 +33,7 @@ struct vb2_vmalloc_buf { static void vb2_vmalloc_put(void *buf_priv); -static void *vb2_vmalloc_alloc(struct device *dev, const struct dma_attrs *attrs, +static void *vb2_vmalloc_alloc(struct device *dev, unsigned long attrs, unsigned long size, enum dma_data_direction dma_dir, gfp_t gfp_flags) { diff --git a/drivers/misc/Makefile b/drivers/misc/Makefile index 4387ccb79e64..7410c6d9a34d 100644 --- a/drivers/misc/Makefile +++ b/drivers/misc/Makefile @@ -69,5 +69,6 @@ OBJCOPYFLAGS := OBJCOPYFLAGS_lkdtm_rodata_objcopy.o := \ --set-section-flags .text=alloc,readonly \ --rename-section .text=.rodata -$(obj)/lkdtm_rodata_objcopy.o: $(obj)/lkdtm_rodata.o +targets += lkdtm_rodata.o lkdtm_rodata_objcopy.o +$(obj)/lkdtm_rodata_objcopy.o: $(obj)/lkdtm_rodata.o FORCE $(call if_changed,objcopy) diff --git a/drivers/misc/cxl/context.c b/drivers/misc/cxl/context.c index bdee9a01ef35..c466ee2b0c97 100644 --- a/drivers/misc/cxl/context.c +++ b/drivers/misc/cxl/context.c @@ -90,8 +90,7 @@ int cxl_context_init(struct cxl_context *ctx, struct cxl_afu *afu, bool master, */ mutex_lock(&afu->contexts_lock); idr_preload(GFP_KERNEL); - i = idr_alloc(&ctx->afu->contexts_idr, ctx, - ctx->afu->adapter->native->sl_ops->min_pe, + i = idr_alloc(&ctx->afu->contexts_idr, ctx, ctx->afu->adapter->min_pe, ctx->afu->num_procs, GFP_NOWAIT); idr_preload_end(); mutex_unlock(&afu->contexts_lock); diff --git a/drivers/misc/cxl/cxl.h b/drivers/misc/cxl/cxl.h index de090533f18c..344a0ff8f8c7 100644 --- a/drivers/misc/cxl/cxl.h +++ b/drivers/misc/cxl/cxl.h @@ -561,7 +561,6 @@ struct cxl_service_layer_ops { u64 (*timebase_read)(struct cxl *adapter); int capi_mode; bool needs_reset_before_disable; - int min_pe; }; struct cxl_native { @@ -603,6 +602,7 @@ struct cxl { struct bin_attribute cxl_attr; int adapter_num; int user_irqs; + int min_pe; u64 ps_size; u16 psl_rev; u16 base_image; diff --git a/drivers/misc/cxl/native.c b/drivers/misc/cxl/native.c index 3bcdaee11ba1..e606fdc4bc9c 100644 --- a/drivers/misc/cxl/native.c +++ b/drivers/misc/cxl/native.c @@ -924,7 +924,7 @@ static irqreturn_t native_irq_multiplexed(int irq, void *data) return fail_psl_irq(afu, &irq_info); } -void native_irq_wait(struct cxl_context *ctx) +static void native_irq_wait(struct cxl_context *ctx) { u64 dsisr; int timeout = 1000; diff --git a/drivers/misc/cxl/pci.c b/drivers/misc/cxl/pci.c index d152e2de8c93..6f0c4ac4b649 100644 --- a/drivers/misc/cxl/pci.c +++ b/drivers/misc/cxl/pci.c @@ -379,7 +379,7 @@ static int calc_capp_routing(struct pci_dev *dev, u64 *chipid, u64 *capp_unit_id static int init_implementation_adapter_psl_regs(struct cxl *adapter, struct pci_dev *dev) { - u64 psl_dsnctl; + u64 psl_dsnctl, psl_fircntl; u64 chipid; u64 capp_unit_id; int rc; @@ -398,8 +398,11 @@ static int init_implementation_adapter_psl_regs(struct cxl *adapter, struct pci_ cxl_p1_write(adapter, CXL_PSL_RESLCKTO, 0x20000000200ULL); /* snoop write mask */ cxl_p1_write(adapter, CXL_PSL_SNWRALLOC, 0x00000000FFFFFFFFULL); - /* set fir_accum */ - cxl_p1_write(adapter, CXL_PSL_FIR_CNTL, 0x0800000000000000ULL); + /* set fir_cntl to recommended value for production env */ + psl_fircntl = (0x2ULL << (63-3)); /* ce_report */ + psl_fircntl |= (0x1ULL << (63-6)); /* FIR_report */ + psl_fircntl |= 0x1ULL; /* ce_thresh */ + cxl_p1_write(adapter, CXL_PSL_FIR_CNTL, psl_fircntl); /* for debugging with trace arrays */ cxl_p1_write(adapter, CXL_PSL_TRACE, 0x0000FF7C00000000ULL); @@ -1521,14 +1524,15 @@ static const struct cxl_service_layer_ops xsl_ops = { .write_timebase_ctrl = write_timebase_ctrl_xsl, .timebase_read = timebase_read_xsl, .capi_mode = OPAL_PHB_CAPI_MODE_DMA, - .min_pe = 1, /* Workaround for Mellanox CX4 HW bug */ }; static void set_sl_ops(struct cxl *adapter, struct pci_dev *dev) { if (dev->vendor == PCI_VENDOR_ID_MELLANOX && dev->device == 0x1013) { + /* Mellanox CX-4 */ dev_info(&adapter->dev, "Device uses an XSL\n"); adapter->native->sl_ops = &xsl_ops; + adapter->min_pe = 1; /* Workaround for CX-4 hardware bug */ } else { dev_info(&adapter->dev, "Device uses a PSL\n"); adapter->native->sl_ops = &psl_ops; diff --git a/drivers/misc/cxl/vphb.c b/drivers/misc/cxl/vphb.c index dee8def1c193..7ada5f1b7bb6 100644 --- a/drivers/misc/cxl/vphb.c +++ b/drivers/misc/cxl/vphb.c @@ -221,7 +221,7 @@ int cxl_pci_vphb_add(struct cxl_afu *afu) /* Setup the PHB using arch provided callback */ phb->ops = &cxl_pcie_pci_ops; phb->cfg_addr = NULL; - phb->cfg_data = 0; + phb->cfg_data = NULL; phb->private_data = afu; phb->controller_ops = cxl_pci_controller_ops; diff --git a/drivers/misc/lkdtm_usercopy.c b/drivers/misc/lkdtm_usercopy.c index 5a3fd76eec27..5525a204db93 100644 --- a/drivers/misc/lkdtm_usercopy.c +++ b/drivers/misc/lkdtm_usercopy.c @@ -49,7 +49,7 @@ static noinline void do_usercopy_stack(bool to_user, bool bad_frame) /* This is a pointer to outside our current stack frame. */ if (bad_frame) { - bad_stack = do_usercopy_stack_callee((uintptr_t)bad_stack); + bad_stack = do_usercopy_stack_callee((uintptr_t)&bad_stack); } else { /* Put start address just inside stack. */ bad_stack = task_stack_page(current) + THREAD_SIZE; diff --git a/drivers/misc/mic/Kconfig b/drivers/misc/mic/Kconfig index 89e5917e1c33..6fd9d367dea7 100644 --- a/drivers/misc/mic/Kconfig +++ b/drivers/misc/mic/Kconfig @@ -146,3 +146,7 @@ config VOP More information about the Intel MIC family as well as the Linux OS and tools for MIC to use with this driver are available from <http://software.intel.com/en-us/mic-developer>. + +if VOP +source "drivers/vhost/Kconfig.vringh" +endif diff --git a/drivers/misc/mic/host/mic_boot.c b/drivers/misc/mic/host/mic_boot.c index e047efd83f57..9599d732aff3 100644 --- a/drivers/misc/mic/host/mic_boot.c +++ b/drivers/misc/mic/host/mic_boot.c @@ -38,7 +38,7 @@ static inline struct mic_device *vpdev_to_mdev(struct device *dev) static dma_addr_t _mic_dma_map_page(struct device *dev, struct page *page, unsigned long offset, size_t size, - enum dma_data_direction dir, struct dma_attrs *attrs) + enum dma_data_direction dir, unsigned long attrs) { void *va = phys_to_virt(page_to_phys(page)) + offset; struct mic_device *mdev = vpdev_to_mdev(dev); @@ -48,7 +48,7 @@ _mic_dma_map_page(struct device *dev, struct page *page, static void _mic_dma_unmap_page(struct device *dev, dma_addr_t dma_addr, size_t size, enum dma_data_direction dir, - struct dma_attrs *attrs) + unsigned long attrs) { struct mic_device *mdev = vpdev_to_mdev(dev); @@ -144,7 +144,7 @@ static inline struct mic_device *scdev_to_mdev(struct scif_hw_dev *scdev) static void *__mic_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t gfp, - struct dma_attrs *attrs) + unsigned long attrs) { struct scif_hw_dev *scdev = dev_get_drvdata(dev); struct mic_device *mdev = scdev_to_mdev(scdev); @@ -164,7 +164,7 @@ static void *__mic_dma_alloc(struct device *dev, size_t size, } static void __mic_dma_free(struct device *dev, size_t size, void *vaddr, - dma_addr_t dma_handle, struct dma_attrs *attrs) + dma_addr_t dma_handle, unsigned long attrs) { struct scif_hw_dev *scdev = dev_get_drvdata(dev); struct mic_device *mdev = scdev_to_mdev(scdev); @@ -176,7 +176,7 @@ static void __mic_dma_free(struct device *dev, size_t size, void *vaddr, static dma_addr_t __mic_dma_map_page(struct device *dev, struct page *page, unsigned long offset, size_t size, enum dma_data_direction dir, - struct dma_attrs *attrs) + unsigned long attrs) { void *va = phys_to_virt(page_to_phys(page)) + offset; struct scif_hw_dev *scdev = dev_get_drvdata(dev); @@ -188,7 +188,7 @@ __mic_dma_map_page(struct device *dev, struct page *page, unsigned long offset, static void __mic_dma_unmap_page(struct device *dev, dma_addr_t dma_addr, size_t size, enum dma_data_direction dir, - struct dma_attrs *attrs) + unsigned long attrs) { struct scif_hw_dev *scdev = dev_get_drvdata(dev); struct mic_device *mdev = scdev_to_mdev(scdev); @@ -198,7 +198,7 @@ __mic_dma_unmap_page(struct device *dev, dma_addr_t dma_addr, static int __mic_dma_map_sg(struct device *dev, struct scatterlist *sg, int nents, enum dma_data_direction dir, - struct dma_attrs *attrs) + unsigned long attrs) { struct scif_hw_dev *scdev = dev_get_drvdata(dev); struct mic_device *mdev = scdev_to_mdev(scdev); @@ -229,7 +229,7 @@ err: static void __mic_dma_unmap_sg(struct device *dev, struct scatterlist *sg, int nents, enum dma_data_direction dir, - struct dma_attrs *attrs) + unsigned long attrs) { struct scif_hw_dev *scdev = dev_get_drvdata(dev); struct mic_device *mdev = scdev_to_mdev(scdev); @@ -327,7 +327,7 @@ static inline struct mic_device *mbdev_to_mdev(struct mbus_device *mbdev) static dma_addr_t mic_dma_map_page(struct device *dev, struct page *page, unsigned long offset, size_t size, enum dma_data_direction dir, - struct dma_attrs *attrs) + unsigned long attrs) { void *va = phys_to_virt(page_to_phys(page)) + offset; struct mic_device *mdev = dev_get_drvdata(dev->parent); @@ -338,7 +338,7 @@ mic_dma_map_page(struct device *dev, struct page *page, static void mic_dma_unmap_page(struct device *dev, dma_addr_t dma_addr, size_t size, enum dma_data_direction dir, - struct dma_attrs *attrs) + unsigned long attrs) { struct mic_device *mdev = dev_get_drvdata(dev->parent); mic_unmap_single(mdev, dma_addr, size); diff --git a/drivers/mtd/bcm47xxpart.c b/drivers/mtd/bcm47xxpart.c index 845dd27d9f41..377947580203 100644 --- a/drivers/mtd/bcm47xxpart.c +++ b/drivers/mtd/bcm47xxpart.c @@ -122,7 +122,7 @@ static int bcm47xxpart_parse(struct mtd_info *master, for (offset = 0; offset <= master->size - blocksize; offset += blocksize) { /* Nothing more in higher memory on BCM47XX (MIPS) */ - if (config_enabled(CONFIG_BCM47XX) && offset >= 0x2000000) + if (IS_ENABLED(CONFIG_BCM47XX) && offset >= 0x2000000) break; if (curr_part >= BCM47XXPART_MAX_PARTS) { diff --git a/drivers/mtd/ubi/attach.c b/drivers/mtd/ubi/attach.c index c1aaf0336cf2..903becd31410 100644 --- a/drivers/mtd/ubi/attach.c +++ b/drivers/mtd/ubi/attach.c @@ -175,6 +175,40 @@ static int add_corrupted(struct ubi_attach_info *ai, int pnum, int ec) } /** + * add_fastmap - add a Fastmap related physical eraseblock. + * @ai: attaching information + * @pnum: physical eraseblock number the VID header came from + * @vid_hdr: the volume identifier header + * @ec: erase counter of the physical eraseblock + * + * This function allocates a 'struct ubi_ainf_peb' object for a Fastamp + * physical eraseblock @pnum and adds it to the 'fastmap' list. + * Such blocks can be Fastmap super and data blocks from both the most + * recent Fastmap we're attaching from or from old Fastmaps which will + * be erased. + */ +static int add_fastmap(struct ubi_attach_info *ai, int pnum, + struct ubi_vid_hdr *vid_hdr, int ec) +{ + struct ubi_ainf_peb *aeb; + + aeb = kmem_cache_alloc(ai->aeb_slab_cache, GFP_KERNEL); + if (!aeb) + return -ENOMEM; + + aeb->pnum = pnum; + aeb->vol_id = be32_to_cpu(vidh->vol_id); + aeb->sqnum = be64_to_cpu(vidh->sqnum); + aeb->ec = ec; + list_add(&aeb->u.list, &ai->fastmap); + + dbg_bld("add to fastmap list: PEB %d, vol_id %d, sqnum: %llu", pnum, + aeb->vol_id, aeb->sqnum); + + return 0; +} + +/** * validate_vid_hdr - check volume identifier header. * @ubi: UBI device description object * @vid_hdr: the volume identifier header to check @@ -803,13 +837,26 @@ out_unlock: return err; } +static bool vol_ignored(int vol_id) +{ + switch (vol_id) { + case UBI_LAYOUT_VOLUME_ID: + return true; + } + +#ifdef CONFIG_MTD_UBI_FASTMAP + return ubi_is_fm_vol(vol_id); +#else + return false; +#endif +} + /** * scan_peb - scan and process UBI headers of a PEB. * @ubi: UBI device description object * @ai: attaching information * @pnum: the physical eraseblock number - * @vid: The volume ID of the found volume will be stored in this pointer - * @sqnum: The sqnum of the found volume will be stored in this pointer + * @fast: true if we're scanning for a Fastmap * * This function reads UBI headers of PEB @pnum, checks them, and adds * information about this PEB to the corresponding list or RB-tree in the @@ -817,9 +864,9 @@ out_unlock: * successfully handled and a negative error code in case of failure. */ static int scan_peb(struct ubi_device *ubi, struct ubi_attach_info *ai, - int pnum, int *vid, unsigned long long *sqnum) + int pnum, bool fast) { - long long uninitialized_var(ec); + long long ec; int err, bitflips = 0, vol_id = -1, ec_err = 0; dbg_bld("scan PEB %d", pnum); @@ -935,6 +982,20 @@ static int scan_peb(struct ubi_device *ubi, struct ubi_attach_info *ai, */ ai->maybe_bad_peb_count += 1; case UBI_IO_BAD_HDR: + /* + * If we're facing a bad VID header we have to drop *all* + * Fastmap data structures we find. The most recent Fastmap + * could be bad and therefore there is a chance that we attach + * from an old one. On a fine MTD stack a PEB must not render + * bad all of a sudden, but the reality is different. + * So, let's be paranoid and help finding the root cause by + * falling back to scanning mode instead of attaching with a + * bad EBA table and cause data corruption which is hard to + * analyze. + */ + if (fast) + ai->force_full_scan = 1; + if (ec_err) /* * Both headers are corrupted. There is a possibility @@ -991,21 +1052,15 @@ static int scan_peb(struct ubi_device *ubi, struct ubi_attach_info *ai, } vol_id = be32_to_cpu(vidh->vol_id); - if (vid) - *vid = vol_id; - if (sqnum) - *sqnum = be64_to_cpu(vidh->sqnum); - if (vol_id > UBI_MAX_VOLUMES && vol_id != UBI_LAYOUT_VOLUME_ID) { + if (vol_id > UBI_MAX_VOLUMES && !vol_ignored(vol_id)) { int lnum = be32_to_cpu(vidh->lnum); /* Unsupported internal volume */ switch (vidh->compat) { case UBI_COMPAT_DELETE: - if (vol_id != UBI_FM_SB_VOLUME_ID - && vol_id != UBI_FM_DATA_VOLUME_ID) { - ubi_msg(ubi, "\"delete\" compatible internal volume %d:%d found, will remove it", - vol_id, lnum); - } + ubi_msg(ubi, "\"delete\" compatible internal volume %d:%d found, will remove it", + vol_id, lnum); + err = add_to_list(ai, pnum, vol_id, lnum, ec, 1, &ai->erase); if (err) @@ -1037,7 +1092,12 @@ static int scan_peb(struct ubi_device *ubi, struct ubi_attach_info *ai, if (ec_err) ubi_warn(ubi, "valid VID header but corrupted EC header at PEB %d", pnum); - err = ubi_add_to_av(ubi, ai, pnum, ec, vidh, bitflips); + + if (ubi_is_fm_vol(vol_id)) + err = add_fastmap(ai, pnum, vidh, ec); + else + err = ubi_add_to_av(ubi, ai, pnum, ec, vidh, bitflips); + if (err) return err; @@ -1186,6 +1246,10 @@ static void destroy_ai(struct ubi_attach_info *ai) list_del(&aeb->u.list); kmem_cache_free(ai->aeb_slab_cache, aeb); } + list_for_each_entry_safe(aeb, aeb_tmp, &ai->fastmap, u.list) { + list_del(&aeb->u.list); + kmem_cache_free(ai->aeb_slab_cache, aeb); + } /* Destroy the volume RB-tree */ rb = ai->volumes.rb_node; @@ -1245,7 +1309,7 @@ static int scan_all(struct ubi_device *ubi, struct ubi_attach_info *ai, cond_resched(); dbg_gen("process PEB %d", pnum); - err = scan_peb(ubi, ai, pnum, NULL, NULL); + err = scan_peb(ubi, ai, pnum, false); if (err < 0) goto out_vidh; } @@ -1311,6 +1375,7 @@ static struct ubi_attach_info *alloc_ai(void) INIT_LIST_HEAD(&ai->free); INIT_LIST_HEAD(&ai->erase); INIT_LIST_HEAD(&ai->alien); + INIT_LIST_HEAD(&ai->fastmap); ai->volumes = RB_ROOT; ai->aeb_slab_cache = kmem_cache_create("ubi_aeb_slab_cache", sizeof(struct ubi_ainf_peb), @@ -1326,7 +1391,7 @@ static struct ubi_attach_info *alloc_ai(void) #ifdef CONFIG_MTD_UBI_FASTMAP /** - * scan_fastmap - try to find a fastmap and attach from it. + * scan_fast - try to find a fastmap and attach from it. * @ubi: UBI device description object * @ai: attach info object * @@ -1337,52 +1402,58 @@ static struct ubi_attach_info *alloc_ai(void) */ static int scan_fast(struct ubi_device *ubi, struct ubi_attach_info **ai) { - int err, pnum, fm_anchor = -1; - unsigned long long max_sqnum = 0; + int err, pnum; + struct ubi_attach_info *scan_ai; err = -ENOMEM; + scan_ai = alloc_ai(); + if (!scan_ai) + goto out; + ech = kzalloc(ubi->ec_hdr_alsize, GFP_KERNEL); if (!ech) - goto out; + goto out_ai; vidh = ubi_zalloc_vid_hdr(ubi, GFP_KERNEL); if (!vidh) goto out_ech; for (pnum = 0; pnum < UBI_FM_MAX_START; pnum++) { - int vol_id = -1; - unsigned long long sqnum = -1; cond_resched(); dbg_gen("process PEB %d", pnum); - err = scan_peb(ubi, *ai, pnum, &vol_id, &sqnum); + err = scan_peb(ubi, scan_ai, pnum, true); if (err < 0) goto out_vidh; - - if (vol_id == UBI_FM_SB_VOLUME_ID && sqnum > max_sqnum) { - max_sqnum = sqnum; - fm_anchor = pnum; - } } ubi_free_vid_hdr(ubi, vidh); kfree(ech); - if (fm_anchor < 0) - return UBI_NO_FASTMAP; + if (scan_ai->force_full_scan) + err = UBI_NO_FASTMAP; + else + err = ubi_scan_fastmap(ubi, *ai, scan_ai); - destroy_ai(*ai); - *ai = alloc_ai(); - if (!*ai) - return -ENOMEM; + if (err) { + /* + * Didn't attach via fastmap, do a full scan but reuse what + * we've aready scanned. + */ + destroy_ai(*ai); + *ai = scan_ai; + } else + destroy_ai(scan_ai); - return ubi_scan_fastmap(ubi, *ai, fm_anchor); + return err; out_vidh: ubi_free_vid_hdr(ubi, vidh); out_ech: kfree(ech); +out_ai: + destroy_ai(scan_ai); out: return err; } diff --git a/drivers/mtd/ubi/build.c b/drivers/mtd/ubi/build.c index ef3618299494..0680516bb472 100644 --- a/drivers/mtd/ubi/build.c +++ b/drivers/mtd/ubi/build.c @@ -874,7 +874,7 @@ int ubi_attach_mtd_dev(struct mtd_info *mtd, int ubi_num, for (i = 0; i < UBI_MAX_DEVICES; i++) { ubi = ubi_devices[i]; if (ubi && mtd->index == ubi->mtd->index) { - ubi_err(ubi, "mtd%d is already attached to ubi%d", + pr_err("ubi: mtd%d is already attached to ubi%d", mtd->index, i); return -EEXIST; } @@ -889,7 +889,7 @@ int ubi_attach_mtd_dev(struct mtd_info *mtd, int ubi_num, * no sense to attach emulated MTD devices, so we prohibit this. */ if (mtd->type == MTD_UBIVOLUME) { - ubi_err(ubi, "refuse attaching mtd%d - it is already emulated on top of UBI", + pr_err("ubi: refuse attaching mtd%d - it is already emulated on top of UBI", mtd->index); return -EINVAL; } @@ -900,7 +900,7 @@ int ubi_attach_mtd_dev(struct mtd_info *mtd, int ubi_num, if (!ubi_devices[ubi_num]) break; if (ubi_num == UBI_MAX_DEVICES) { - ubi_err(ubi, "only %d UBI devices may be created", + pr_err("ubi: only %d UBI devices may be created", UBI_MAX_DEVICES); return -ENFILE; } @@ -910,7 +910,7 @@ int ubi_attach_mtd_dev(struct mtd_info *mtd, int ubi_num, /* Make sure ubi_num is not busy */ if (ubi_devices[ubi_num]) { - ubi_err(ubi, "already exists"); + pr_err("ubi: ubi%i already exists", ubi_num); return -EEXIST; } } @@ -992,6 +992,9 @@ int ubi_attach_mtd_dev(struct mtd_info *mtd, int ubi_num, goto out_detach; } + /* Make device "available" before it becomes accessible via sysfs */ + ubi_devices[ubi_num] = ubi; + err = uif_init(ubi, &ref); if (err) goto out_detach; @@ -1036,7 +1039,6 @@ int ubi_attach_mtd_dev(struct mtd_info *mtd, int ubi_num, wake_up_process(ubi->bgt_thread); spin_unlock(&ubi->wl_lock); - ubi_devices[ubi_num] = ubi; ubi_notify_all(ubi, UBI_VOLUME_ADDED, NULL); return ubi_num; @@ -1047,6 +1049,7 @@ out_uif: ubi_assert(ref); uif_close(ubi); out_detach: + ubi_devices[ubi_num] = NULL; ubi_wl_close(ubi); ubi_free_internal_volumes(ubi); vfree(ubi->vtbl); diff --git a/drivers/mtd/ubi/fastmap.c b/drivers/mtd/ubi/fastmap.c index 990898b9dc72..48eb55f344eb 100644 --- a/drivers/mtd/ubi/fastmap.c +++ b/drivers/mtd/ubi/fastmap.c @@ -15,20 +15,22 @@ */ #include <linux/crc32.h> +#include <linux/bitmap.h> #include "ubi.h" /** * init_seen - allocate memory for used for debugging. * @ubi: UBI device description object */ -static inline int *init_seen(struct ubi_device *ubi) +static inline unsigned long *init_seen(struct ubi_device *ubi) { - int *ret; + unsigned long *ret; if (!ubi_dbg_chk_fastmap(ubi)) return NULL; - ret = kcalloc(ubi->peb_count, sizeof(int), GFP_KERNEL); + ret = kcalloc(BITS_TO_LONGS(ubi->peb_count), sizeof(unsigned long), + GFP_KERNEL); if (!ret) return ERR_PTR(-ENOMEM); @@ -39,7 +41,7 @@ static inline int *init_seen(struct ubi_device *ubi) * free_seen - free the seen logic integer array. * @seen: integer array of @ubi->peb_count size */ -static inline void free_seen(int *seen) +static inline void free_seen(unsigned long *seen) { kfree(seen); } @@ -50,12 +52,12 @@ static inline void free_seen(int *seen) * @pnum: The PEB to be makred as seen * @seen: integer array of @ubi->peb_count size */ -static inline void set_seen(struct ubi_device *ubi, int pnum, int *seen) +static inline void set_seen(struct ubi_device *ubi, int pnum, unsigned long *seen) { if (!ubi_dbg_chk_fastmap(ubi) || !seen) return; - seen[pnum] = 1; + set_bit(pnum, seen); } /** @@ -63,7 +65,7 @@ static inline void set_seen(struct ubi_device *ubi, int pnum, int *seen) * @ubi: UBI device description object * @seen: integer array of @ubi->peb_count size */ -static int self_check_seen(struct ubi_device *ubi, int *seen) +static int self_check_seen(struct ubi_device *ubi, unsigned long *seen) { int pnum, ret = 0; @@ -71,7 +73,7 @@ static int self_check_seen(struct ubi_device *ubi, int *seen) return 0; for (pnum = 0; pnum < ubi->peb_count; pnum++) { - if (!seen[pnum] && ubi->lookuptbl[pnum]) { + if (test_bit(pnum, seen) && ubi->lookuptbl[pnum]) { ubi_err(ubi, "self-check failed for PEB %d, fastmap didn't see it", pnum); ret = -EINVAL; } @@ -578,7 +580,7 @@ static int count_fastmap_pebs(struct ubi_attach_info *ai) list_for_each_entry(aeb, &ai->free, u.list) n++; - ubi_rb_for_each_entry(rb1, av, &ai->volumes, rb) + ubi_rb_for_each_entry(rb1, av, &ai->volumes, rb) ubi_rb_for_each_entry(rb2, aeb, &av->root, u.rb) n++; @@ -850,27 +852,57 @@ fail: } /** + * find_fm_anchor - find the most recent Fastmap superblock (anchor) + * @ai: UBI attach info to be filled + */ +static int find_fm_anchor(struct ubi_attach_info *ai) +{ + int ret = -1; + struct ubi_ainf_peb *aeb; + unsigned long long max_sqnum = 0; + + list_for_each_entry(aeb, &ai->fastmap, u.list) { + if (aeb->vol_id == UBI_FM_SB_VOLUME_ID && aeb->sqnum > max_sqnum) { + max_sqnum = aeb->sqnum; + ret = aeb->pnum; + } + } + + return ret; +} + +/** * ubi_scan_fastmap - scan the fastmap. * @ubi: UBI device object * @ai: UBI attach info to be filled - * @fm_anchor: The fastmap starts at this PEB + * @scan_ai: UBI attach info from the first 64 PEBs, + * used to find the most recent Fastmap data structure * * Returns 0 on success, UBI_NO_FASTMAP if no fastmap was found, * UBI_BAD_FASTMAP if one was found but is not usable. * < 0 indicates an internal error. */ int ubi_scan_fastmap(struct ubi_device *ubi, struct ubi_attach_info *ai, - int fm_anchor) + struct ubi_attach_info *scan_ai) { struct ubi_fm_sb *fmsb, *fmsb2; struct ubi_vid_hdr *vh; struct ubi_ec_hdr *ech; struct ubi_fastmap_layout *fm; - int i, used_blocks, pnum, ret = 0; + struct ubi_ainf_peb *tmp_aeb, *aeb; + int i, used_blocks, pnum, fm_anchor, ret = 0; size_t fm_size; __be32 crc, tmp_crc; unsigned long long sqnum = 0; + fm_anchor = find_fm_anchor(scan_ai); + if (fm_anchor < 0) + return UBI_NO_FASTMAP; + + /* Move all (possible) fastmap blocks into our new attach structure. */ + list_for_each_entry_safe(aeb, tmp_aeb, &scan_ai->fastmap, u.list) + list_move_tail(&aeb->u.list, &ai->fastmap); + down_write(&ubi->fm_protect); memset(ubi->fm_buf, 0, ubi->fm_size); @@ -945,6 +977,13 @@ int ubi_scan_fastmap(struct ubi_device *ubi, struct ubi_attach_info *ai, goto free_hdr; } + if (i == 0 && pnum != fm_anchor) { + ubi_err(ubi, "Fastmap anchor PEB mismatch: PEB: %i vs. %i", + pnum, fm_anchor); + ret = UBI_BAD_FASTMAP; + goto free_hdr; + } + ret = ubi_io_read_ec_hdr(ubi, pnum, ech, 0); if (ret && ret != UBI_IO_BITFLIPS) { ubi_err(ubi, "unable to read fastmap block# %i EC (PEB: %i)", @@ -1102,7 +1141,7 @@ static int ubi_write_fastmap(struct ubi_device *ubi, struct rb_node *tmp_rb; int ret, i, j, free_peb_count, used_peb_count, vol_count; int scrub_peb_count, erase_peb_count; - int *seen_pebs = NULL; + unsigned long *seen_pebs = NULL; fm_raw = ubi->fm_buf; memset(ubi->fm_buf, 0, ubi->fm_size); diff --git a/drivers/mtd/ubi/gluebi.c b/drivers/mtd/ubi/gluebi.c index cb7c075f2144..1cb287ec32ad 100644 --- a/drivers/mtd/ubi/gluebi.c +++ b/drivers/mtd/ubi/gluebi.c @@ -99,9 +99,6 @@ static int gluebi_get_device(struct mtd_info *mtd) struct gluebi_device *gluebi; int ubi_mode = UBI_READONLY; - if (!try_module_get(THIS_MODULE)) - return -ENODEV; - if (mtd->flags & MTD_WRITEABLE) ubi_mode = UBI_READWRITE; @@ -129,7 +126,6 @@ static int gluebi_get_device(struct mtd_info *mtd) ubi_mode); if (IS_ERR(gluebi->desc)) { mutex_unlock(&devices_mutex); - module_put(THIS_MODULE); return PTR_ERR(gluebi->desc); } gluebi->refcnt += 1; @@ -153,7 +149,6 @@ static void gluebi_put_device(struct mtd_info *mtd) gluebi->refcnt -= 1; if (gluebi->refcnt == 0) ubi_close_volume(gluebi->desc); - module_put(THIS_MODULE); mutex_unlock(&devices_mutex); } diff --git a/drivers/mtd/ubi/io.c b/drivers/mtd/ubi/io.c index 10cf3b549959..ff8cafe1e5cd 100644 --- a/drivers/mtd/ubi/io.c +++ b/drivers/mtd/ubi/io.c @@ -1019,7 +1019,7 @@ int ubi_io_read_vid_hdr(struct ubi_device *ubi, int pnum, p = (char *)vid_hdr - ubi->vid_hdr_shift; read_err = ubi_io_read(ubi, p, pnum, ubi->vid_hdr_aloffset, - ubi->vid_hdr_alsize); + ubi->vid_hdr_shift + UBI_VID_HDR_SIZE); if (read_err && read_err != UBI_IO_BITFLIPS && !mtd_is_eccerr(read_err)) return read_err; diff --git a/drivers/mtd/ubi/ubi.h b/drivers/mtd/ubi/ubi.h index 61d4e99755a4..b616a115c9d3 100644 --- a/drivers/mtd/ubi/ubi.h +++ b/drivers/mtd/ubi/ubi.h @@ -703,6 +703,8 @@ struct ubi_ainf_volume { * @erase: list of physical eraseblocks which have to be erased * @alien: list of physical eraseblocks which should not be used by UBI (e.g., * those belonging to "preserve"-compatible internal volumes) + * @fastmap: list of physical eraseblocks which relate to fastmap (e.g., + * eraseblocks of the current and not yet erased old fastmap blocks) * @corr_peb_count: count of PEBs in the @corr list * @empty_peb_count: count of PEBs which are presumably empty (contain only * 0xFF bytes) @@ -713,6 +715,8 @@ struct ubi_ainf_volume { * @vols_found: number of volumes found * @highest_vol_id: highest volume ID * @is_empty: flag indicating whether the MTD device is empty or not + * @force_full_scan: flag indicating whether we need to do a full scan and drop + all existing Fastmap data structures * @min_ec: lowest erase counter value * @max_ec: highest erase counter value * @max_sqnum: highest sequence number value @@ -731,6 +735,7 @@ struct ubi_attach_info { struct list_head free; struct list_head erase; struct list_head alien; + struct list_head fastmap; int corr_peb_count; int empty_peb_count; int alien_peb_count; @@ -739,6 +744,7 @@ struct ubi_attach_info { int vols_found; int highest_vol_id; int is_empty; + int force_full_scan; int min_ec; int max_ec; unsigned long long max_sqnum; @@ -911,7 +917,7 @@ int ubi_compare_lebs(struct ubi_device *ubi, const struct ubi_ainf_peb *aeb, size_t ubi_calc_fm_size(struct ubi_device *ubi); int ubi_update_fastmap(struct ubi_device *ubi); int ubi_scan_fastmap(struct ubi_device *ubi, struct ubi_attach_info *ai, - int fm_anchor); + struct ubi_attach_info *scan_ai); #else static inline int ubi_update_fastmap(struct ubi_device *ubi) { return 0; } #endif @@ -1105,4 +1111,42 @@ static inline int idx2vol_id(const struct ubi_device *ubi, int idx) return idx; } +/** + * ubi_is_fm_vol - check whether a volume ID is a Fastmap volume. + * @vol_id: volume ID + */ +static inline bool ubi_is_fm_vol(int vol_id) +{ + switch (vol_id) { + case UBI_FM_SB_VOLUME_ID: + case UBI_FM_DATA_VOLUME_ID: + return true; + } + + return false; +} + +/** + * ubi_find_fm_block - check whether a PEB is part of the current Fastmap. + * @ubi: UBI device description object + * @pnum: physical eraseblock to look for + * + * This function returns a wear leveling object if @pnum relates to the current + * fastmap, @NULL otherwise. + */ +static inline struct ubi_wl_entry *ubi_find_fm_block(const struct ubi_device *ubi, + int pnum) +{ + int i; + + if (ubi->fm) { + for (i = 0; i < ubi->fm->used_blocks; i++) { + if (ubi->fm->e[i]->pnum == pnum) + return ubi->fm->e[i]; + } + } + + return NULL; +} + #endif /* !__UBI_UBI_H__ */ diff --git a/drivers/mtd/ubi/vmt.c b/drivers/mtd/ubi/vmt.c index 10059dfdc1b6..0138f526474a 100644 --- a/drivers/mtd/ubi/vmt.c +++ b/drivers/mtd/ubi/vmt.c @@ -488,13 +488,6 @@ int ubi_resize_volume(struct ubi_volume_desc *desc, int reserved_pebs) spin_unlock(&ubi->volumes_lock); } - /* Change volume table record */ - vtbl_rec = ubi->vtbl[vol_id]; - vtbl_rec.reserved_pebs = cpu_to_be32(reserved_pebs); - err = ubi_change_vtbl_record(ubi, vol_id, &vtbl_rec); - if (err) - goto out_acc; - if (pebs < 0) { for (i = 0; i < -pebs; i++) { err = ubi_eba_unmap_leb(ubi, vol, reserved_pebs + i); @@ -512,6 +505,24 @@ int ubi_resize_volume(struct ubi_volume_desc *desc, int reserved_pebs) spin_unlock(&ubi->volumes_lock); } + /* + * When we shrink a volume we have to flush all pending (erase) work. + * Otherwise it can happen that upon next attach UBI finds a LEB with + * lnum > highest_lnum and refuses to attach. + */ + if (pebs < 0) { + err = ubi_wl_flush(ubi, vol_id, UBI_ALL); + if (err) + goto out_acc; + } + + /* Change volume table record */ + vtbl_rec = ubi->vtbl[vol_id]; + vtbl_rec.reserved_pebs = cpu_to_be32(reserved_pebs); + err = ubi_change_vtbl_record(ubi, vol_id, &vtbl_rec); + if (err) + goto out_acc; + vol->reserved_pebs = reserved_pebs; if (vol->vol_type == UBI_DYNAMIC_VOLUME) { vol->used_ebs = reserved_pebs; diff --git a/drivers/mtd/ubi/wl.c b/drivers/mtd/ubi/wl.c index 959c7b12e0b1..f4533266d7b2 100644 --- a/drivers/mtd/ubi/wl.c +++ b/drivers/mtd/ubi/wl.c @@ -1598,19 +1598,44 @@ int ubi_wl_init(struct ubi_device *ubi, struct ubi_attach_info *ai) } } - dbg_wl("found %i PEBs", found_pebs); + list_for_each_entry(aeb, &ai->fastmap, u.list) { + cond_resched(); + + e = ubi_find_fm_block(ubi, aeb->pnum); - if (ubi->fm) { - ubi_assert(ubi->good_peb_count == - found_pebs + ubi->fm->used_blocks); + if (e) { + ubi_assert(!ubi->lookuptbl[e->pnum]); + ubi->lookuptbl[e->pnum] = e; + } else { + /* + * Usually old Fastmap PEBs are scheduled for erasure + * and we don't have to care about them but if we face + * an power cut before scheduling them we need to + * take care of them here. + */ + if (ubi->lookuptbl[aeb->pnum]) + continue; + + e = kmem_cache_alloc(ubi_wl_entry_slab, GFP_KERNEL); + if (!e) + goto out_free; - for (i = 0; i < ubi->fm->used_blocks; i++) { - e = ubi->fm->e[i]; + e->pnum = aeb->pnum; + e->ec = aeb->ec; + ubi_assert(!ubi->lookuptbl[e->pnum]); ubi->lookuptbl[e->pnum] = e; + if (schedule_erase(ubi, e, aeb->vol_id, aeb->lnum, 0)) { + wl_entry_destroy(ubi, e); + goto out_free; + } } + + found_pebs++; } - else - ubi_assert(ubi->good_peb_count == found_pebs); + + dbg_wl("found %i PEBs", found_pebs); + + ubi_assert(ubi->good_peb_count == found_pebs); reserved_pebs = WL_RESERVED_PEBS; ubi_fastmap_init(ubi, &reserved_pebs); diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 1f276fa30ba6..217e8da0628c 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -152,7 +152,7 @@ module_param(lacp_rate, charp, 0); MODULE_PARM_DESC(lacp_rate, "LACPDU tx rate to request from 802.3ad partner; " "0 for slow, 1 for fast"); module_param(ad_select, charp, 0); -MODULE_PARM_DESC(ad_select, "803.ad aggregation selection logic; " +MODULE_PARM_DESC(ad_select, "802.3ad aggregation selection logic; " "0 for stable (default), 1 for bandwidth, " "2 for count"); module_param(min_links, int, 0); diff --git a/drivers/net/caif/Kconfig b/drivers/net/caif/Kconfig index 547098086773..f81df91a9ce1 100644 --- a/drivers/net/caif/Kconfig +++ b/drivers/net/caif/Kconfig @@ -52,5 +52,5 @@ config CAIF_VIRTIO The caif driver for CAIF over Virtio. if CAIF_VIRTIO -source "drivers/vhost/Kconfig" +source "drivers/vhost/Kconfig.vringh" endif diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_msg.h b/drivers/net/ethernet/chelsio/cxgb4/t4_msg.h index 4705e2dea423..e0ebe1378cb2 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4_msg.h +++ b/drivers/net/ethernet/chelsio/cxgb4/t4_msg.h @@ -104,6 +104,8 @@ enum { enum CPL_error { CPL_ERR_NONE = 0, + CPL_ERR_TCAM_PARITY = 1, + CPL_ERR_TCAM_MISS = 2, CPL_ERR_TCAM_FULL = 3, CPL_ERR_BAD_LENGTH = 15, CPL_ERR_BAD_ROUTE = 18, diff --git a/drivers/net/ethernet/davicom/dm9000.c b/drivers/net/ethernet/davicom/dm9000.c index 1471e16ba719..f45385f5c6e5 100644 --- a/drivers/net/ethernet/davicom/dm9000.c +++ b/drivers/net/ethernet/davicom/dm9000.c @@ -1299,6 +1299,7 @@ static int dm9000_open(struct net_device *dev) { struct board_info *db = netdev_priv(dev); + unsigned int irq_flags = irq_get_trigger_type(dev->irq); if (netif_msg_ifup(db)) dev_dbg(db->dev, "enabling %s\n", dev->name); @@ -1306,9 +1307,11 @@ dm9000_open(struct net_device *dev) /* If there is no IRQ type specified, tell the user that this is a * problem */ - if (irq_get_trigger_type(dev->irq) == IRQF_TRIGGER_NONE) + if (irq_flags == IRQF_TRIGGER_NONE) dev_warn(db->dev, "WARNING: no IRQ resource flags set.\n"); + irq_flags |= IRQF_SHARED; + /* GPIO0 on pre-activate PHY, Reg 1F is not set by reset */ iow(db, DM9000_GPR, 0); /* REG_1F bit0 activate phyxcer */ mdelay(1); /* delay needs by DM9000B */ @@ -1316,8 +1319,7 @@ dm9000_open(struct net_device *dev) /* Initialize DM9000 board */ dm9000_init_dm9000(dev); - if (request_irq(dev->irq, dm9000_interrupt, IRQF_SHARED, - dev->name, dev)) + if (request_irq(dev->irq, dm9000_interrupt, irq_flags, dev->name, dev)) return -EAGAIN; /* Now that we have an interrupt handler hooked up we can unmask * our interrupts diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_gmac.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_gmac.c index 1235c7f2564b..1e1eb92998fb 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_gmac.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_gmac.c @@ -17,7 +17,7 @@ static const struct mac_stats_string g_gmac_stats_string[] = { {"gmac_rx_octets_total_ok", MAC_STATS_FIELD_OFF(rx_good_bytes)}, {"gmac_rx_octets_bad", MAC_STATS_FIELD_OFF(rx_bad_bytes)}, {"gmac_rx_uc_pkts", MAC_STATS_FIELD_OFF(rx_uc_pkts)}, - {"gamc_rx_mc_pkts", MAC_STATS_FIELD_OFF(rx_mc_pkts)}, + {"gmac_rx_mc_pkts", MAC_STATS_FIELD_OFF(rx_mc_pkts)}, {"gmac_rx_bc_pkts", MAC_STATS_FIELD_OFF(rx_bc_pkts)}, {"gmac_rx_pkts_64octets", MAC_STATS_FIELD_OFF(rx_64bytes)}, {"gmac_rx_pkts_65to127", MAC_STATS_FIELD_OFF(rx_65to127)}, diff --git a/drivers/net/ethernet/intel/e1000e/82571.c b/drivers/net/ethernet/intel/e1000e/82571.c index 7fd4d54599e4..6b03c8553e59 100644 --- a/drivers/net/ethernet/intel/e1000e/82571.c +++ b/drivers/net/ethernet/intel/e1000e/82571.c @@ -2032,7 +2032,8 @@ const struct e1000_info e1000_82574_info = { | FLAG2_DISABLE_ASPM_L0S | FLAG2_DISABLE_ASPM_L1 | FLAG2_NO_DISABLE_RX - | FLAG2_DMA_BURST, + | FLAG2_DMA_BURST + | FLAG2_CHECK_SYSTIM_OVERFLOW, .pba = 32, .max_hw_frame_size = DEFAULT_JUMBO, .get_variants = e1000_get_variants_82571, @@ -2053,7 +2054,8 @@ const struct e1000_info e1000_82583_info = { | FLAG_HAS_CTRLEXT_ON_LOAD, .flags2 = FLAG2_DISABLE_ASPM_L0S | FLAG2_DISABLE_ASPM_L1 - | FLAG2_NO_DISABLE_RX, + | FLAG2_NO_DISABLE_RX + | FLAG2_CHECK_SYSTIM_OVERFLOW, .pba = 32, .max_hw_frame_size = DEFAULT_JUMBO, .get_variants = e1000_get_variants_82571, diff --git a/drivers/net/ethernet/intel/e1000e/e1000.h b/drivers/net/ethernet/intel/e1000e/e1000.h index ef96cd11d6d2..879cca47b021 100644 --- a/drivers/net/ethernet/intel/e1000e/e1000.h +++ b/drivers/net/ethernet/intel/e1000e/e1000.h @@ -452,6 +452,7 @@ s32 e1000e_get_base_timinca(struct e1000_adapter *adapter, u32 *timinca); #define FLAG2_PCIM2PCI_ARBITER_WA BIT(11) #define FLAG2_DFLT_CRC_STRIPPING BIT(12) #define FLAG2_CHECK_RX_HWTSTAMP BIT(13) +#define FLAG2_CHECK_SYSTIM_OVERFLOW BIT(14) #define E1000_RX_DESC_PS(R, i) \ (&(((union e1000_rx_desc_packet_split *)((R).desc))[i])) diff --git a/drivers/net/ethernet/intel/e1000e/ich8lan.c b/drivers/net/ethernet/intel/e1000e/ich8lan.c index 3e11322d8d58..f3aaca743ea3 100644 --- a/drivers/net/ethernet/intel/e1000e/ich8lan.c +++ b/drivers/net/ethernet/intel/e1000e/ich8lan.c @@ -5885,7 +5885,8 @@ const struct e1000_info e1000_pch_lpt_info = { | FLAG_HAS_JUMBO_FRAMES | FLAG_APME_IN_WUC, .flags2 = FLAG2_HAS_PHY_STATS - | FLAG2_HAS_EEE, + | FLAG2_HAS_EEE + | FLAG2_CHECK_SYSTIM_OVERFLOW, .pba = 26, .max_hw_frame_size = 9022, .get_variants = e1000_get_variants_ich8lan, diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c index 02f443958f31..7017281ba2dc 100644 --- a/drivers/net/ethernet/intel/e1000e/netdev.c +++ b/drivers/net/ethernet/intel/e1000e/netdev.c @@ -4303,6 +4303,42 @@ void e1000e_reinit_locked(struct e1000_adapter *adapter) } /** + * e1000e_sanitize_systim - sanitize raw cycle counter reads + * @hw: pointer to the HW structure + * @systim: cycle_t value read, sanitized and returned + * + * Errata for 82574/82583 possible bad bits read from SYSTIMH/L: + * check to see that the time is incrementing at a reasonable + * rate and is a multiple of incvalue. + **/ +static cycle_t e1000e_sanitize_systim(struct e1000_hw *hw, cycle_t systim) +{ + u64 time_delta, rem, temp; + cycle_t systim_next; + u32 incvalue; + int i; + + incvalue = er32(TIMINCA) & E1000_TIMINCA_INCVALUE_MASK; + for (i = 0; i < E1000_MAX_82574_SYSTIM_REREADS; i++) { + /* latch SYSTIMH on read of SYSTIML */ + systim_next = (cycle_t)er32(SYSTIML); + systim_next |= (cycle_t)er32(SYSTIMH) << 32; + + time_delta = systim_next - systim; + temp = time_delta; + /* VMWare users have seen incvalue of zero, don't div / 0 */ + rem = incvalue ? do_div(temp, incvalue) : (time_delta != 0); + + systim = systim_next; + + if ((time_delta < E1000_82574_SYSTIM_EPSILON) && (rem == 0)) + break; + } + + return systim; +} + +/** * e1000e_cyclecounter_read - read raw cycle counter (used by time counter) * @cc: cyclecounter structure **/ @@ -4312,7 +4348,7 @@ static cycle_t e1000e_cyclecounter_read(const struct cyclecounter *cc) cc); struct e1000_hw *hw = &adapter->hw; u32 systimel, systimeh; - cycle_t systim, systim_next; + cycle_t systim; /* SYSTIMH latching upon SYSTIML read does not work well. * This means that if SYSTIML overflows after we read it but before * we read SYSTIMH, the value of SYSTIMH has been incremented and we @@ -4335,33 +4371,9 @@ static cycle_t e1000e_cyclecounter_read(const struct cyclecounter *cc) systim = (cycle_t)systimel; systim |= (cycle_t)systimeh << 32; - if ((hw->mac.type == e1000_82574) || (hw->mac.type == e1000_82583)) { - u64 time_delta, rem, temp; - u32 incvalue; - int i; - - /* errata for 82574/82583 possible bad bits read from SYSTIMH/L - * check to see that the time is incrementing at a reasonable - * rate and is a multiple of incvalue - */ - incvalue = er32(TIMINCA) & E1000_TIMINCA_INCVALUE_MASK; - for (i = 0; i < E1000_MAX_82574_SYSTIM_REREADS; i++) { - /* latch SYSTIMH on read of SYSTIML */ - systim_next = (cycle_t)er32(SYSTIML); - systim_next |= (cycle_t)er32(SYSTIMH) << 32; - - time_delta = systim_next - systim; - temp = time_delta; - /* VMWare users have seen incvalue of zero, don't div / 0 */ - rem = incvalue ? do_div(temp, incvalue) : (time_delta != 0); - - systim = systim_next; + if (adapter->flags2 & FLAG2_CHECK_SYSTIM_OVERFLOW) + systim = e1000e_sanitize_systim(hw, systim); - if ((time_delta < E1000_82574_SYSTIM_EPSILON) && - (rem == 0)) - break; - } - } return systim; } diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 81c99e1be708..c6ac7a61812f 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -4554,23 +4554,38 @@ static u8 i40e_get_iscsi_tc_map(struct i40e_pf *pf) **/ static u8 i40e_dcb_get_num_tc(struct i40e_dcbx_config *dcbcfg) { + int i, tc_unused = 0; u8 num_tc = 0; - int i; + u8 ret = 0; /* Scan the ETS Config Priority Table to find * traffic class enabled for a given priority - * and use the traffic class index to get the - * number of traffic classes enabled + * and create a bitmask of enabled TCs */ - for (i = 0; i < I40E_MAX_USER_PRIORITY; i++) { - if (dcbcfg->etscfg.prioritytable[i] > num_tc) - num_tc = dcbcfg->etscfg.prioritytable[i]; - } + for (i = 0; i < I40E_MAX_USER_PRIORITY; i++) + num_tc |= BIT(dcbcfg->etscfg.prioritytable[i]); - /* Traffic class index starts from zero so - * increment to return the actual count + /* Now scan the bitmask to check for + * contiguous TCs starting with TC0 */ - return num_tc + 1; + for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) { + if (num_tc & BIT(i)) { + if (!tc_unused) { + ret++; + } else { + pr_err("Non-contiguous TC - Disabling DCB\n"); + return 1; + } + } else { + tc_unused = 1; + } + } + + /* There is always at least TC0 */ + if (!ret) + ret = 1; + + return ret; } /** diff --git a/drivers/net/ethernet/intel/igb/igb_ptp.c b/drivers/net/ethernet/intel/igb/igb_ptp.c index e61b647f5f2a..336c103ae374 100644 --- a/drivers/net/ethernet/intel/igb/igb_ptp.c +++ b/drivers/net/ethernet/intel/igb/igb_ptp.c @@ -744,7 +744,8 @@ static void igb_ptp_tx_hwtstamp(struct igb_adapter *adapter) } } - shhwtstamps.hwtstamp = ktime_sub_ns(shhwtstamps.hwtstamp, adjust); + shhwtstamps.hwtstamp = + ktime_add_ns(shhwtstamps.hwtstamp, adjust); skb_tstamp_tx(adapter->ptp_tx_skb, &shhwtstamps); dev_kfree_skb_any(adapter->ptp_tx_skb); @@ -767,13 +768,32 @@ void igb_ptp_rx_pktstamp(struct igb_q_vector *q_vector, struct sk_buff *skb) { __le64 *regval = (__le64 *)va; + struct igb_adapter *adapter = q_vector->adapter; + int adjust = 0; /* The timestamp is recorded in little endian format. * DWORD: 0 1 2 3 * Field: Reserved Reserved SYSTIML SYSTIMH */ - igb_ptp_systim_to_hwtstamp(q_vector->adapter, skb_hwtstamps(skb), + igb_ptp_systim_to_hwtstamp(adapter, skb_hwtstamps(skb), le64_to_cpu(regval[1])); + + /* adjust timestamp for the RX latency based on link speed */ + if (adapter->hw.mac.type == e1000_i210) { + switch (adapter->link_speed) { + case SPEED_10: + adjust = IGB_I210_RX_LATENCY_10; + break; + case SPEED_100: + adjust = IGB_I210_RX_LATENCY_100; + break; + case SPEED_1000: + adjust = IGB_I210_RX_LATENCY_1000; + break; + } + } + skb_hwtstamps(skb)->hwtstamp = + ktime_sub_ns(skb_hwtstamps(skb)->hwtstamp, adjust); } /** @@ -825,7 +845,7 @@ void igb_ptp_rx_rgtstamp(struct igb_q_vector *q_vector, } } skb_hwtstamps(skb)->hwtstamp = - ktime_add_ns(skb_hwtstamps(skb)->hwtstamp, adjust); + ktime_sub_ns(skb_hwtstamps(skb)->hwtstamp, adjust); /* Update the last_rx_timestamp timer in order to enable watchdog check * for error case of latched timestamp on a dropped packet. diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 5418c69a7463..b4f03748adc0 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -4100,6 +4100,8 @@ static void ixgbe_vlan_promisc_enable(struct ixgbe_adapter *adapter) struct ixgbe_hw *hw = &adapter->hw; u32 vlnctrl, i; + vlnctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL); + switch (hw->mac.type) { case ixgbe_mac_82599EB: case ixgbe_mac_X540: @@ -4112,8 +4114,7 @@ static void ixgbe_vlan_promisc_enable(struct ixgbe_adapter *adapter) /* fall through */ case ixgbe_mac_82598EB: /* legacy case, we can just disable VLAN filtering */ - vlnctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL); - vlnctrl &= ~(IXGBE_VLNCTRL_VFE | IXGBE_VLNCTRL_CFIEN); + vlnctrl &= ~IXGBE_VLNCTRL_VFE; IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlnctrl); return; } @@ -4125,6 +4126,10 @@ static void ixgbe_vlan_promisc_enable(struct ixgbe_adapter *adapter) /* Set flag so we don't redo unnecessary work */ adapter->flags2 |= IXGBE_FLAG2_VLAN_PROMISC; + /* For VMDq and SR-IOV we must leave VLAN filtering enabled */ + vlnctrl |= IXGBE_VLNCTRL_VFE; + IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlnctrl); + /* Add PF to all active pools */ for (i = IXGBE_VLVF_ENTRIES; --i;) { u32 reg_offset = IXGBE_VLVFB(i * 2 + VMDQ_P(0) / 32); @@ -4191,6 +4196,11 @@ static void ixgbe_vlan_promisc_disable(struct ixgbe_adapter *adapter) struct ixgbe_hw *hw = &adapter->hw; u32 vlnctrl, i; + /* Set VLAN filtering to enabled */ + vlnctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL); + vlnctrl |= IXGBE_VLNCTRL_VFE; + IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlnctrl); + switch (hw->mac.type) { case ixgbe_mac_82599EB: case ixgbe_mac_X540: @@ -4202,10 +4212,6 @@ static void ixgbe_vlan_promisc_disable(struct ixgbe_adapter *adapter) break; /* fall through */ case ixgbe_mac_82598EB: - vlnctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL); - vlnctrl &= ~IXGBE_VLNCTRL_CFIEN; - vlnctrl |= IXGBE_VLNCTRL_VFE; - IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlnctrl); return; } @@ -8390,12 +8396,14 @@ static int parse_tc_actions(struct ixgbe_adapter *adapter, struct tcf_exts *exts, u64 *action, u8 *queue) { const struct tc_action *a; + LIST_HEAD(actions); int err; if (tc_no_actions(exts)) return -EINVAL; - tc_for_each_action(a, exts) { + tcf_exts_to_list(exts, &actions); + list_for_each_entry(a, &actions, list) { /* Drop action */ if (is_tcf_gact_shot(a)) { @@ -9517,6 +9525,7 @@ skip_sriov: /* copy netdev features into list of user selectable features */ netdev->hw_features |= netdev->features | + NETIF_F_HW_VLAN_CTAG_FILTER | NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_RXALL | diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index 994232e03380..1801fd83acc0 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -245,12 +245,16 @@ static int mtk_phy_connect(struct mtk_mac *mac) case PHY_INTERFACE_MODE_MII: ge_mode = 1; break; - case PHY_INTERFACE_MODE_RMII: + case PHY_INTERFACE_MODE_REVMII: ge_mode = 2; break; + case PHY_INTERFACE_MODE_RMII: + if (!mac->id) + goto err_phy; + ge_mode = 3; + break; default: - dev_err(eth->dev, "invalid phy_mode\n"); - return -1; + goto err_phy; } /* put the gmac into the right mode */ @@ -263,13 +267,25 @@ static int mtk_phy_connect(struct mtk_mac *mac) mac->phy_dev->autoneg = AUTONEG_ENABLE; mac->phy_dev->speed = 0; mac->phy_dev->duplex = 0; + + if (of_phy_is_fixed_link(mac->of_node)) + mac->phy_dev->supported |= + SUPPORTED_Pause | SUPPORTED_Asym_Pause; + mac->phy_dev->supported &= PHY_GBIT_FEATURES | SUPPORTED_Pause | SUPPORTED_Asym_Pause; mac->phy_dev->advertising = mac->phy_dev->supported | ADVERTISED_Autoneg; phy_start_aneg(mac->phy_dev); + of_node_put(np); + return 0; + +err_phy: + of_node_put(np); + dev_err(eth->dev, "invalid phy_mode\n"); + return -EINVAL; } static int mtk_mdio_init(struct mtk_eth *eth) @@ -541,15 +557,15 @@ static inline struct mtk_tx_buf *mtk_desc_to_tx_buf(struct mtk_tx_ring *ring, return &ring->buf[idx]; } -static void mtk_tx_unmap(struct device *dev, struct mtk_tx_buf *tx_buf) +static void mtk_tx_unmap(struct mtk_eth *eth, struct mtk_tx_buf *tx_buf) { if (tx_buf->flags & MTK_TX_FLAGS_SINGLE0) { - dma_unmap_single(dev, + dma_unmap_single(eth->dev, dma_unmap_addr(tx_buf, dma_addr0), dma_unmap_len(tx_buf, dma_len0), DMA_TO_DEVICE); } else if (tx_buf->flags & MTK_TX_FLAGS_PAGE0) { - dma_unmap_page(dev, + dma_unmap_page(eth->dev, dma_unmap_addr(tx_buf, dma_addr0), dma_unmap_len(tx_buf, dma_len0), DMA_TO_DEVICE); @@ -594,9 +610,9 @@ static int mtk_tx_map(struct sk_buff *skb, struct net_device *dev, if (skb_vlan_tag_present(skb)) txd4 |= TX_DMA_INS_VLAN | skb_vlan_tag_get(skb); - mapped_addr = dma_map_single(&dev->dev, skb->data, + mapped_addr = dma_map_single(eth->dev, skb->data, skb_headlen(skb), DMA_TO_DEVICE); - if (unlikely(dma_mapping_error(&dev->dev, mapped_addr))) + if (unlikely(dma_mapping_error(eth->dev, mapped_addr))) return -ENOMEM; WRITE_ONCE(itxd->txd1, mapped_addr); @@ -622,10 +638,10 @@ static int mtk_tx_map(struct sk_buff *skb, struct net_device *dev, n_desc++; frag_map_size = min(frag_size, MTK_TX_DMA_BUF_LEN); - mapped_addr = skb_frag_dma_map(&dev->dev, frag, offset, + mapped_addr = skb_frag_dma_map(eth->dev, frag, offset, frag_map_size, DMA_TO_DEVICE); - if (unlikely(dma_mapping_error(&dev->dev, mapped_addr))) + if (unlikely(dma_mapping_error(eth->dev, mapped_addr))) goto err_dma; if (i == nr_frags - 1 && @@ -678,7 +694,7 @@ err_dma: tx_buf = mtk_desc_to_tx_buf(ring, itxd); /* unmap dma */ - mtk_tx_unmap(&dev->dev, tx_buf); + mtk_tx_unmap(eth, tx_buf); itxd->txd3 = TX_DMA_LS0 | TX_DMA_OWNER_CPU; itxd = mtk_qdma_phys_to_virt(ring, itxd->txd2); @@ -834,11 +850,11 @@ static int mtk_poll_rx(struct napi_struct *napi, int budget, netdev->stats.rx_dropped++; goto release_desc; } - dma_addr = dma_map_single(ð->netdev[mac]->dev, + dma_addr = dma_map_single(eth->dev, new_data + NET_SKB_PAD, ring->buf_size, DMA_FROM_DEVICE); - if (unlikely(dma_mapping_error(&netdev->dev, dma_addr))) { + if (unlikely(dma_mapping_error(eth->dev, dma_addr))) { skb_free_frag(new_data); netdev->stats.rx_dropped++; goto release_desc; @@ -853,7 +869,7 @@ static int mtk_poll_rx(struct napi_struct *napi, int budget, } skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN); - dma_unmap_single(&netdev->dev, trxd.rxd1, + dma_unmap_single(eth->dev, trxd.rxd1, ring->buf_size, DMA_FROM_DEVICE); pktlen = RX_DMA_GET_PLEN0(trxd.rxd2); skb->dev = netdev; @@ -935,7 +951,7 @@ static int mtk_poll_tx(struct mtk_eth *eth, int budget) done[mac]++; budget--; } - mtk_tx_unmap(eth->dev, tx_buf); + mtk_tx_unmap(eth, tx_buf); ring->last_free = desc; atomic_inc(&ring->free_count); @@ -1090,7 +1106,7 @@ static void mtk_tx_clean(struct mtk_eth *eth) if (ring->buf) { for (i = 0; i < MTK_DMA_SIZE; i++) - mtk_tx_unmap(eth->dev, &ring->buf[i]); + mtk_tx_unmap(eth, &ring->buf[i]); kfree(ring->buf); ring->buf = NULL; } @@ -1748,6 +1764,7 @@ static int mtk_add_mac(struct mtk_eth *eth, struct device_node *np) goto free_netdev; } spin_lock_init(&mac->hw_stats->stats_lock); + u64_stats_init(&mac->hw_stats->syncp); mac->hw_stats->reg_offset = id * MTK_STAT_OFFSET; SET_NETDEV_DEV(eth->netdev[id], eth->dev); diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c index f4497cf4d06d..d728704d0c7b 100644 --- a/drivers/net/ethernet/mellanox/mlx4/fw.c +++ b/drivers/net/ethernet/mellanox/mlx4/fw.c @@ -721,6 +721,7 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) #define QUERY_DEV_CAP_RSVD_LKEY_OFFSET 0x98 #define QUERY_DEV_CAP_MAX_ICM_SZ_OFFSET 0xa0 #define QUERY_DEV_CAP_ETH_BACKPL_OFFSET 0x9c +#define QUERY_DEV_CAP_DIAG_RPRT_PER_PORT 0x9c #define QUERY_DEV_CAP_FW_REASSIGN_MAC 0x9d #define QUERY_DEV_CAP_VXLAN 0x9e #define QUERY_DEV_CAP_MAD_DEMUX_OFFSET 0xb0 @@ -935,6 +936,9 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_ETH_BACKPL_AN_REP; if (field32 & (1 << 7)) dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_RECOVERABLE_ERROR_EVENT; + MLX4_GET(field32, outbox, QUERY_DEV_CAP_DIAG_RPRT_PER_PORT); + if (field32 & (1 << 17)) + dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_DIAG_PER_PORT; MLX4_GET(field, outbox, QUERY_DEV_CAP_FW_REASSIGN_MAC); if (field & 1<<6) dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_REASSIGN_MAC_EN; @@ -2457,6 +2461,42 @@ int mlx4_NOP(struct mlx4_dev *dev) MLX4_CMD_NATIVE); } +int mlx4_query_diag_counters(struct mlx4_dev *dev, u8 op_modifier, + const u32 offset[], + u32 value[], size_t array_len, u8 port) +{ + struct mlx4_cmd_mailbox *mailbox; + u32 *outbox; + size_t i; + int ret; + + mailbox = mlx4_alloc_cmd_mailbox(dev); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + + outbox = mailbox->buf; + + ret = mlx4_cmd_box(dev, 0, mailbox->dma, port, op_modifier, + MLX4_CMD_DIAG_RPRT, MLX4_CMD_TIME_CLASS_A, + MLX4_CMD_NATIVE); + if (ret) + goto out; + + for (i = 0; i < array_len; i++) { + if (offset[i] > MLX4_MAILBOX_SIZE) { + ret = -EINVAL; + goto out; + } + + MLX4_GET(value[i], outbox, offset[i]); + } + +out: + mlx4_free_cmd_mailbox(dev, mailbox); + return ret; +} +EXPORT_SYMBOL(mlx4_query_diag_counters); + int mlx4_get_phys_port_id(struct mlx4_dev *dev) { u8 port; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 0f19b01e3fff..dc8b1cb0fdc8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -318,6 +318,7 @@ static int parse_tc_nic_actions(struct mlx5e_priv *priv, struct tcf_exts *exts, u32 *action, u32 *flow_tag) { const struct tc_action *a; + LIST_HEAD(actions); if (tc_no_actions(exts)) return -EINVAL; @@ -325,7 +326,8 @@ static int parse_tc_nic_actions(struct mlx5e_priv *priv, struct tcf_exts *exts, *flow_tag = MLX5_FS_DEFAULT_FLOW_TAG; *action = 0; - tc_for_each_action(a, exts) { + tcf_exts_to_list(exts, &actions); + list_for_each_entry(a, &actions, list) { /* Only support a single action per rule */ if (*action) return -EINVAL; @@ -362,13 +364,15 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts, u32 *action, u32 *dest_vport) { const struct tc_action *a; + LIST_HEAD(actions); if (tc_no_actions(exts)) return -EINVAL; *action = 0; - tc_for_each_action(a, exts) { + tcf_exts_to_list(exts, &actions); + list_for_each_entry(a, &actions, list) { /* Only support a single action per rule */ if (*action) return -EINVAL; @@ -503,6 +507,7 @@ int mlx5e_stats_flower(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow; struct tc_action *a; struct mlx5_fc *counter; + LIST_HEAD(actions); u64 bytes; u64 packets; u64 lastuse; @@ -518,7 +523,8 @@ int mlx5e_stats_flower(struct mlx5e_priv *priv, mlx5_fc_query_cached(counter, &bytes, &packets, &lastuse); - tc_for_each_action(a, f->exts) + tcf_exts_to_list(f->exts, &actions); + list_for_each_entry(a, &actions, list) tcf_action_stats_update(a, bytes, packets, lastuse); return 0; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/srq.c b/drivers/net/ethernet/mellanox/mlx5/core/srq.c index 04bc522605a0..c07f4d01b70e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/srq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/srq.c @@ -63,12 +63,12 @@ void mlx5_srq_event(struct mlx5_core_dev *dev, u32 srqn, int event_type) complete(&srq->free); } -static int get_pas_size(void *srqc) +static int get_pas_size(struct mlx5_srq_attr *in) { - u32 log_page_size = MLX5_GET(srqc, srqc, log_page_size) + 12; - u32 log_srq_size = MLX5_GET(srqc, srqc, log_srq_size); - u32 log_rq_stride = MLX5_GET(srqc, srqc, log_rq_stride); - u32 page_offset = MLX5_GET(srqc, srqc, page_offset); + u32 log_page_size = in->log_page_size + 12; + u32 log_srq_size = in->log_size; + u32 log_rq_stride = in->wqe_shift; + u32 page_offset = in->page_offset; u32 po_quanta = 1 << (log_page_size - 6); u32 rq_sz = 1 << (log_srq_size + 4 + log_rq_stride); u32 page_size = 1 << log_page_size; @@ -78,57 +78,58 @@ static int get_pas_size(void *srqc) return rq_num_pas * sizeof(u64); } -static void rmpc_srqc_reformat(void *srqc, void *rmpc, bool srqc_to_rmpc) +static void set_wq(void *wq, struct mlx5_srq_attr *in) { - void *wq = MLX5_ADDR_OF(rmpc, rmpc, wq); - - if (srqc_to_rmpc) { - switch (MLX5_GET(srqc, srqc, state)) { - case MLX5_SRQC_STATE_GOOD: - MLX5_SET(rmpc, rmpc, state, MLX5_RMPC_STATE_RDY); - break; - case MLX5_SRQC_STATE_ERROR: - MLX5_SET(rmpc, rmpc, state, MLX5_RMPC_STATE_ERR); - break; - default: - pr_warn("%s: %d: Unknown srq state = 0x%x\n", __func__, - __LINE__, MLX5_GET(srqc, srqc, state)); - MLX5_SET(rmpc, rmpc, state, MLX5_GET(srqc, srqc, state)); - } - - MLX5_SET(wq, wq, wq_signature, MLX5_GET(srqc, srqc, wq_signature)); - MLX5_SET(wq, wq, log_wq_pg_sz, MLX5_GET(srqc, srqc, log_page_size)); - MLX5_SET(wq, wq, log_wq_stride, MLX5_GET(srqc, srqc, log_rq_stride) + 4); - MLX5_SET(wq, wq, log_wq_sz, MLX5_GET(srqc, srqc, log_srq_size)); - MLX5_SET(wq, wq, page_offset, MLX5_GET(srqc, srqc, page_offset)); - MLX5_SET(wq, wq, lwm, MLX5_GET(srqc, srqc, lwm)); - MLX5_SET(wq, wq, pd, MLX5_GET(srqc, srqc, pd)); - MLX5_SET64(wq, wq, dbr_addr, MLX5_GET64(srqc, srqc, dbr_addr)); - } else { - switch (MLX5_GET(rmpc, rmpc, state)) { - case MLX5_RMPC_STATE_RDY: - MLX5_SET(srqc, srqc, state, MLX5_SRQC_STATE_GOOD); - break; - case MLX5_RMPC_STATE_ERR: - MLX5_SET(srqc, srqc, state, MLX5_SRQC_STATE_ERROR); - break; - default: - pr_warn("%s: %d: Unknown rmp state = 0x%x\n", - __func__, __LINE__, - MLX5_GET(rmpc, rmpc, state)); - MLX5_SET(srqc, srqc, state, - MLX5_GET(rmpc, rmpc, state)); - } - - MLX5_SET(srqc, srqc, wq_signature, MLX5_GET(wq, wq, wq_signature)); - MLX5_SET(srqc, srqc, log_page_size, MLX5_GET(wq, wq, log_wq_pg_sz)); - MLX5_SET(srqc, srqc, log_rq_stride, MLX5_GET(wq, wq, log_wq_stride) - 4); - MLX5_SET(srqc, srqc, log_srq_size, MLX5_GET(wq, wq, log_wq_sz)); - MLX5_SET(srqc, srqc, page_offset, MLX5_GET(wq, wq, page_offset)); - MLX5_SET(srqc, srqc, lwm, MLX5_GET(wq, wq, lwm)); - MLX5_SET(srqc, srqc, pd, MLX5_GET(wq, wq, pd)); - MLX5_SET64(srqc, srqc, dbr_addr, MLX5_GET64(wq, wq, dbr_addr)); - } + MLX5_SET(wq, wq, wq_signature, !!(in->flags + & MLX5_SRQ_FLAG_WQ_SIG)); + MLX5_SET(wq, wq, log_wq_pg_sz, in->log_page_size); + MLX5_SET(wq, wq, log_wq_stride, in->wqe_shift + 4); + MLX5_SET(wq, wq, log_wq_sz, in->log_size); + MLX5_SET(wq, wq, page_offset, in->page_offset); + MLX5_SET(wq, wq, lwm, in->lwm); + MLX5_SET(wq, wq, pd, in->pd); + MLX5_SET64(wq, wq, dbr_addr, in->db_record); +} + +static void set_srqc(void *srqc, struct mlx5_srq_attr *in) +{ + MLX5_SET(srqc, srqc, wq_signature, !!(in->flags + & MLX5_SRQ_FLAG_WQ_SIG)); + MLX5_SET(srqc, srqc, log_page_size, in->log_page_size); + MLX5_SET(srqc, srqc, log_rq_stride, in->wqe_shift); + MLX5_SET(srqc, srqc, log_srq_size, in->log_size); + MLX5_SET(srqc, srqc, page_offset, in->page_offset); + MLX5_SET(srqc, srqc, lwm, in->lwm); + MLX5_SET(srqc, srqc, pd, in->pd); + MLX5_SET64(srqc, srqc, dbr_addr, in->db_record); + MLX5_SET(srqc, srqc, xrcd, in->xrcd); + MLX5_SET(srqc, srqc, cqn, in->cqn); +} + +static void get_wq(void *wq, struct mlx5_srq_attr *in) +{ + if (MLX5_GET(wq, wq, wq_signature)) + in->flags &= MLX5_SRQ_FLAG_WQ_SIG; + in->log_page_size = MLX5_GET(wq, wq, log_wq_pg_sz); + in->wqe_shift = MLX5_GET(wq, wq, log_wq_stride) - 4; + in->log_size = MLX5_GET(wq, wq, log_wq_sz); + in->page_offset = MLX5_GET(wq, wq, page_offset); + in->lwm = MLX5_GET(wq, wq, lwm); + in->pd = MLX5_GET(wq, wq, pd); + in->db_record = MLX5_GET64(wq, wq, dbr_addr); +} + +static void get_srqc(void *srqc, struct mlx5_srq_attr *in) +{ + if (MLX5_GET(srqc, srqc, wq_signature)) + in->flags &= MLX5_SRQ_FLAG_WQ_SIG; + in->log_page_size = MLX5_GET(srqc, srqc, log_page_size); + in->wqe_shift = MLX5_GET(srqc, srqc, log_rq_stride); + in->log_size = MLX5_GET(srqc, srqc, log_srq_size); + in->page_offset = MLX5_GET(srqc, srqc, page_offset); + in->lwm = MLX5_GET(srqc, srqc, lwm); + in->pd = MLX5_GET(srqc, srqc, pd); + in->db_record = MLX5_GET64(srqc, srqc, dbr_addr); } struct mlx5_core_srq *mlx5_core_get_srq(struct mlx5_core_dev *dev, u32 srqn) @@ -149,19 +150,36 @@ struct mlx5_core_srq *mlx5_core_get_srq(struct mlx5_core_dev *dev, u32 srqn) EXPORT_SYMBOL(mlx5_core_get_srq); static int create_srq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, - struct mlx5_create_srq_mbox_in *in, int inlen) + struct mlx5_srq_attr *in) { - struct mlx5_create_srq_mbox_out out; + u32 create_out[MLX5_ST_SZ_DW(create_srq_out)] = {0}; + void *create_in; + void *srqc; + void *pas; + int pas_size; + int inlen; int err; - memset(&out, 0, sizeof(out)); + pas_size = get_pas_size(in); + inlen = MLX5_ST_SZ_BYTES(create_srq_in) + pas_size; + create_in = mlx5_vzalloc(inlen); + if (!create_in) + return -ENOMEM; + + srqc = MLX5_ADDR_OF(create_srq_in, create_in, srq_context_entry); + pas = MLX5_ADDR_OF(create_srq_in, create_in, pas); - in->hdr.opcode = cpu_to_be16(MLX5_CMD_OP_CREATE_SRQ); + set_srqc(srqc, in); + memcpy(pas, in->pas, pas_size); - err = mlx5_cmd_exec_check_status(dev, (u32 *)in, inlen, (u32 *)(&out), - sizeof(out)); + MLX5_SET(create_srq_in, create_in, opcode, + MLX5_CMD_OP_CREATE_SRQ); - srq->srqn = be32_to_cpu(out.srqn) & 0xffffff; + err = mlx5_cmd_exec_check_status(dev, create_in, inlen, create_out, + sizeof(create_out)); + kvfree(create_in); + if (!err) + srq->srqn = MLX5_GET(create_srq_out, create_out, srqn); return err; } @@ -169,67 +187,75 @@ static int create_srq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, static int destroy_srq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq) { - struct mlx5_destroy_srq_mbox_in in; - struct mlx5_destroy_srq_mbox_out out; + u32 srq_in[MLX5_ST_SZ_DW(destroy_srq_in)] = {0}; + u32 srq_out[MLX5_ST_SZ_DW(destroy_srq_out)] = {0}; - memset(&in, 0, sizeof(in)); - memset(&out, 0, sizeof(out)); - in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_DESTROY_SRQ); - in.srqn = cpu_to_be32(srq->srqn); + MLX5_SET(destroy_srq_in, srq_in, opcode, + MLX5_CMD_OP_DESTROY_SRQ); + MLX5_SET(destroy_srq_in, srq_in, srqn, srq->srqn); - return mlx5_cmd_exec_check_status(dev, (u32 *)(&in), sizeof(in), - (u32 *)(&out), sizeof(out)); + return mlx5_cmd_exec_check_status(dev, srq_in, sizeof(srq_in), + srq_out, sizeof(srq_out)); } static int arm_srq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, u16 lwm, int is_srq) { - struct mlx5_arm_srq_mbox_in in; - struct mlx5_arm_srq_mbox_out out; - - memset(&in, 0, sizeof(in)); - memset(&out, 0, sizeof(out)); + /* arm_srq structs missing using identical xrc ones */ + u32 srq_in[MLX5_ST_SZ_DW(arm_xrc_srq_in)] = {0}; + u32 srq_out[MLX5_ST_SZ_DW(arm_xrc_srq_out)] = {0}; - in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_ARM_RQ); - in.hdr.opmod = cpu_to_be16(!!is_srq); - in.srqn = cpu_to_be32(srq->srqn); - in.lwm = cpu_to_be16(lwm); + MLX5_SET(arm_xrc_srq_in, srq_in, opcode, MLX5_CMD_OP_ARM_XRC_SRQ); + MLX5_SET(arm_xrc_srq_in, srq_in, xrc_srqn, srq->srqn); + MLX5_SET(arm_xrc_srq_in, srq_in, lwm, lwm); - return mlx5_cmd_exec_check_status(dev, (u32 *)(&in), - sizeof(in), (u32 *)(&out), - sizeof(out)); + return mlx5_cmd_exec_check_status(dev, srq_in, sizeof(srq_in), + srq_out, sizeof(srq_out)); } static int query_srq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, - struct mlx5_query_srq_mbox_out *out) + struct mlx5_srq_attr *out) { - struct mlx5_query_srq_mbox_in in; + u32 srq_in[MLX5_ST_SZ_DW(query_srq_in)] = {0}; + u32 *srq_out; + void *srqc; + int err; - memset(&in, 0, sizeof(in)); + srq_out = mlx5_vzalloc(MLX5_ST_SZ_BYTES(query_srq_out)); + if (!srq_out) + return -ENOMEM; - in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_QUERY_SRQ); - in.srqn = cpu_to_be32(srq->srqn); + MLX5_SET(query_srq_in, srq_in, opcode, + MLX5_CMD_OP_QUERY_SRQ); + MLX5_SET(query_srq_in, srq_in, srqn, srq->srqn); + err = mlx5_cmd_exec_check_status(dev, srq_in, sizeof(srq_in), + srq_out, + MLX5_ST_SZ_BYTES(query_srq_out)); + if (err) + goto out; - return mlx5_cmd_exec_check_status(dev, (u32 *)(&in), sizeof(in), - (u32 *)out, sizeof(*out)); + srqc = MLX5_ADDR_OF(query_srq_out, srq_out, srq_context_entry); + get_srqc(srqc, out); + if (MLX5_GET(srqc, srqc, state) != MLX5_SRQC_STATE_GOOD) + out->flags |= MLX5_SRQ_FLAG_ERR; +out: + kvfree(srq_out); + return err; } static int create_xrc_srq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, - struct mlx5_create_srq_mbox_in *in, - int srq_inlen) + struct mlx5_srq_attr *in) { u32 create_out[MLX5_ST_SZ_DW(create_xrc_srq_out)]; void *create_in; - void *srqc; void *xrc_srqc; void *pas; int pas_size; int inlen; int err; - srqc = MLX5_ADDR_OF(create_srq_in, in, srq_context_entry); - pas_size = get_pas_size(srqc); + pas_size = get_pas_size(in); inlen = MLX5_ST_SZ_BYTES(create_xrc_srq_in) + pas_size; create_in = mlx5_vzalloc(inlen); if (!create_in) @@ -239,7 +265,8 @@ static int create_xrc_srq_cmd(struct mlx5_core_dev *dev, xrc_srq_context_entry); pas = MLX5_ADDR_OF(create_xrc_srq_in, create_in, pas); - memcpy(xrc_srqc, srqc, MLX5_ST_SZ_BYTES(srqc)); + set_srqc(xrc_srqc, in); + MLX5_SET(xrc_srqc, xrc_srqc, user_index, in->user_index); memcpy(pas, in->pas, pas_size); MLX5_SET(create_xrc_srq_in, create_in, opcode, MLX5_CMD_OP_CREATE_XRC_SRQ); @@ -293,11 +320,10 @@ static int arm_xrc_srq_cmd(struct mlx5_core_dev *dev, static int query_xrc_srq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, - struct mlx5_query_srq_mbox_out *out) + struct mlx5_srq_attr *out) { u32 xrcsrq_in[MLX5_ST_SZ_DW(query_xrc_srq_in)]; u32 *xrcsrq_out; - void *srqc; void *xrc_srqc; int err; @@ -317,8 +343,9 @@ static int query_xrc_srq_cmd(struct mlx5_core_dev *dev, xrc_srqc = MLX5_ADDR_OF(query_xrc_srq_out, xrcsrq_out, xrc_srq_context_entry); - srqc = MLX5_ADDR_OF(query_srq_out, out, srq_context_entry); - memcpy(srqc, xrc_srqc, MLX5_ST_SZ_BYTES(srqc)); + get_srqc(xrc_srqc, out); + if (MLX5_GET(xrc_srqc, xrc_srqc, state) != MLX5_XRC_SRQC_STATE_GOOD) + out->flags |= MLX5_SRQ_FLAG_ERR; out: kvfree(xrcsrq_out); @@ -326,26 +353,27 @@ out: } static int create_rmp_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, - struct mlx5_create_srq_mbox_in *in, int srq_inlen) + struct mlx5_srq_attr *in) { void *create_in; void *rmpc; - void *srqc; + void *wq; int pas_size; int inlen; int err; - srqc = MLX5_ADDR_OF(create_srq_in, in, srq_context_entry); - pas_size = get_pas_size(srqc); + pas_size = get_pas_size(in); inlen = MLX5_ST_SZ_BYTES(create_rmp_in) + pas_size; create_in = mlx5_vzalloc(inlen); if (!create_in) return -ENOMEM; rmpc = MLX5_ADDR_OF(create_rmp_in, create_in, ctx); + wq = MLX5_ADDR_OF(rmpc, rmpc, wq); + MLX5_SET(rmpc, rmpc, state, MLX5_RMPC_STATE_RDY); + set_wq(wq, in); memcpy(MLX5_ADDR_OF(rmpc, rmpc, wq.pas), in->pas, pas_size); - rmpc_srqc_reformat(srqc, rmpc, true); err = mlx5_core_create_rmp(dev, create_in, inlen, &srq->srqn); @@ -390,11 +418,10 @@ static int arm_rmp_cmd(struct mlx5_core_dev *dev, } static int query_rmp_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, - struct mlx5_query_srq_mbox_out *out) + struct mlx5_srq_attr *out) { u32 *rmp_out; void *rmpc; - void *srqc; int err; rmp_out = mlx5_vzalloc(MLX5_ST_SZ_BYTES(query_rmp_out)); @@ -405,9 +432,10 @@ static int query_rmp_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, if (err) goto out; - srqc = MLX5_ADDR_OF(query_srq_out, out, srq_context_entry); rmpc = MLX5_ADDR_OF(query_rmp_out, rmp_out, rmp_context); - rmpc_srqc_reformat(srqc, rmpc, false); + get_wq(MLX5_ADDR_OF(rmpc, rmpc, wq), out); + if (MLX5_GET(rmpc, rmpc, state) != MLX5_RMPC_STATE_RDY) + out->flags |= MLX5_SRQ_FLAG_ERR; out: kvfree(rmp_out); @@ -416,15 +444,14 @@ out: static int create_srq_split(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, - struct mlx5_create_srq_mbox_in *in, - int inlen, int is_xrc) + struct mlx5_srq_attr *in) { if (!dev->issi) - return create_srq_cmd(dev, srq, in, inlen); + return create_srq_cmd(dev, srq, in); else if (srq->common.res == MLX5_RES_XSRQ) - return create_xrc_srq_cmd(dev, srq, in, inlen); + return create_xrc_srq_cmd(dev, srq, in); else - return create_rmp_cmd(dev, srq, in, inlen); + return create_rmp_cmd(dev, srq, in); } static int destroy_srq_split(struct mlx5_core_dev *dev, @@ -439,15 +466,17 @@ static int destroy_srq_split(struct mlx5_core_dev *dev, } int mlx5_core_create_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, - struct mlx5_create_srq_mbox_in *in, int inlen, - int is_xrc) + struct mlx5_srq_attr *in) { int err; struct mlx5_srq_table *table = &dev->priv.srq_table; - srq->common.res = is_xrc ? MLX5_RES_XSRQ : MLX5_RES_SRQ; + if (in->type == IB_SRQT_XRC) + srq->common.res = MLX5_RES_XSRQ; + else + srq->common.res = MLX5_RES_SRQ; - err = create_srq_split(dev, srq, in, inlen, is_xrc); + err = create_srq_split(dev, srq, in); if (err) return err; @@ -502,7 +531,7 @@ int mlx5_core_destroy_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq) EXPORT_SYMBOL(mlx5_core_destroy_srq); int mlx5_core_query_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, - struct mlx5_query_srq_mbox_out *out) + struct mlx5_srq_attr *out) { if (!dev->issi) return query_srq_cmd(dev, srq, out); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/transobj.c b/drivers/net/ethernet/mellanox/mlx5/core/transobj.c index 03a5093ffeb7..28274a6fbafe 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/transobj.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/transobj.c @@ -85,6 +85,7 @@ int mlx5_core_create_rq(struct mlx5_core_dev *dev, u32 *in, int inlen, u32 *rqn) return err; } +EXPORT_SYMBOL(mlx5_core_create_rq); int mlx5_core_modify_rq(struct mlx5_core_dev *dev, u32 rqn, u32 *in, int inlen) { @@ -110,6 +111,7 @@ void mlx5_core_destroy_rq(struct mlx5_core_dev *dev, u32 rqn) mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, sizeof(out)); } +EXPORT_SYMBOL(mlx5_core_destroy_rq); int mlx5_core_query_rq(struct mlx5_core_dev *dev, u32 rqn, u32 *out) { @@ -430,6 +432,7 @@ int mlx5_core_create_rqt(struct mlx5_core_dev *dev, u32 *in, int inlen, return err; } +EXPORT_SYMBOL(mlx5_core_create_rqt); int mlx5_core_modify_rqt(struct mlx5_core_dev *dev, u32 rqtn, u32 *in, int inlen) @@ -455,3 +458,4 @@ void mlx5_core_destroy_rqt(struct mlx5_core_dev *dev, u32 rqtn) mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, sizeof(out)); } +EXPORT_SYMBOL(mlx5_core_destroy_rqt); diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h index 7ca9201f7dcb..1721098eef13 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/reg.h +++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h @@ -3383,6 +3383,15 @@ MLXSW_ITEM32(reg, ritr, ipv4_fe, 0x04, 29, 1); */ MLXSW_ITEM32(reg, ritr, ipv6_fe, 0x04, 28, 1); +/* reg_ritr_lb_en + * Loop-back filter enable for unicast packets. + * If the flag is set then loop-back filter for unicast packets is + * implemented on the RIF. Multicast packets are always subject to + * loop-back filtering. + * Access: RW + */ +MLXSW_ITEM32(reg, ritr, lb_en, 0x04, 24, 1); + /* reg_ritr_virtual_router * Virtual router ID associated with the router interface. * Access: RW @@ -3484,6 +3493,7 @@ static inline void mlxsw_reg_ritr_pack(char *payload, bool enable, mlxsw_reg_ritr_op_set(payload, op); mlxsw_reg_ritr_rif_set(payload, rif); mlxsw_reg_ritr_ipv4_fe_set(payload, 1); + mlxsw_reg_ritr_lb_en_set(payload, 1); mlxsw_reg_ritr_mtu_set(payload, mtu); mlxsw_reg_ritr_if_mac_memcpy_to(payload, mac); } @@ -4000,6 +4010,7 @@ static inline void mlxsw_reg_ralue_pack(char *payload, { MLXSW_REG_ZERO(ralue, payload); mlxsw_reg_ralue_protocol_set(payload, protocol); + mlxsw_reg_ralue_op_set(payload, op); mlxsw_reg_ralue_virtual_router_set(payload, virtual_router); mlxsw_reg_ralue_prefix_len_set(payload, prefix_len); mlxsw_reg_ralue_entry_type_set(payload, diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index e1b8f62ccaed..1f8168906811 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -942,8 +942,8 @@ static void mlxsw_sp_port_vport_destroy(struct mlxsw_sp_port *mlxsw_sp_vport) kfree(mlxsw_sp_vport); } -int mlxsw_sp_port_add_vid(struct net_device *dev, __be16 __always_unused proto, - u16 vid) +static int mlxsw_sp_port_add_vid(struct net_device *dev, + __be16 __always_unused proto, u16 vid) { struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); struct mlxsw_sp_port *mlxsw_sp_vport; @@ -956,16 +956,12 @@ int mlxsw_sp_port_add_vid(struct net_device *dev, __be16 __always_unused proto, if (!vid) return 0; - if (mlxsw_sp_port_vport_find(mlxsw_sp_port, vid)) { - netdev_warn(dev, "VID=%d already configured\n", vid); + if (mlxsw_sp_port_vport_find(mlxsw_sp_port, vid)) return 0; - } mlxsw_sp_vport = mlxsw_sp_port_vport_create(mlxsw_sp_port, vid); - if (!mlxsw_sp_vport) { - netdev_err(dev, "Failed to create vPort for VID=%d\n", vid); + if (!mlxsw_sp_vport) return -ENOMEM; - } /* When adding the first VLAN interface on a bridged port we need to * transition all the active 802.1Q bridge VLANs to use explicit @@ -973,24 +969,17 @@ int mlxsw_sp_port_add_vid(struct net_device *dev, __be16 __always_unused proto, */ if (list_is_singular(&mlxsw_sp_port->vports_list)) { err = mlxsw_sp_port_vp_mode_trans(mlxsw_sp_port); - if (err) { - netdev_err(dev, "Failed to set to Virtual mode\n"); + if (err) goto err_port_vp_mode_trans; - } } err = mlxsw_sp_port_vid_learning_set(mlxsw_sp_vport, vid, false); - if (err) { - netdev_err(dev, "Failed to disable learning for VID=%d\n", vid); + if (err) goto err_port_vid_learning_set; - } err = mlxsw_sp_port_vlan_set(mlxsw_sp_vport, vid, vid, true, untagged); - if (err) { - netdev_err(dev, "Failed to set VLAN membership for VID=%d\n", - vid); + if (err) goto err_port_add_vid; - } return 0; @@ -1010,7 +999,6 @@ static int mlxsw_sp_port_kill_vid(struct net_device *dev, struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); struct mlxsw_sp_port *mlxsw_sp_vport; struct mlxsw_sp_fid *f; - int err; /* VLAN 0 is removed from HW filter when device goes down, but * it is reserved in our case, so simply return. @@ -1019,23 +1007,12 @@ static int mlxsw_sp_port_kill_vid(struct net_device *dev, return 0; mlxsw_sp_vport = mlxsw_sp_port_vport_find(mlxsw_sp_port, vid); - if (!mlxsw_sp_vport) { - netdev_warn(dev, "VID=%d does not exist\n", vid); + if (WARN_ON(!mlxsw_sp_vport)) return 0; - } - err = mlxsw_sp_port_vlan_set(mlxsw_sp_vport, vid, vid, false, false); - if (err) { - netdev_err(dev, "Failed to set VLAN membership for VID=%d\n", - vid); - return err; - } + mlxsw_sp_port_vlan_set(mlxsw_sp_vport, vid, vid, false, false); - err = mlxsw_sp_port_vid_learning_set(mlxsw_sp_vport, vid, true); - if (err) { - netdev_err(dev, "Failed to enable learning for VID=%d\n", vid); - return err; - } + mlxsw_sp_port_vid_learning_set(mlxsw_sp_vport, vid, true); /* Drop FID reference. If this was the last reference the * resources will be freed. @@ -1048,13 +1025,8 @@ static int mlxsw_sp_port_kill_vid(struct net_device *dev, * transition all active 802.1Q bridge VLANs to use VID to FID * mappings and set port's mode to VLAN mode. */ - if (list_is_singular(&mlxsw_sp_port->vports_list)) { - err = mlxsw_sp_port_vlan_mode_trans(mlxsw_sp_port); - if (err) { - netdev_err(dev, "Failed to set to VLAN mode\n"); - return err; - } - } + if (list_is_singular(&mlxsw_sp_port->vports_list)) + mlxsw_sp_port_vlan_mode_trans(mlxsw_sp_port); mlxsw_sp_port_vport_destroy(mlxsw_sp_vport); @@ -1149,6 +1121,7 @@ static int mlxsw_sp_port_add_cls_matchall(struct mlxsw_sp_port *mlxsw_sp_port, bool ingress) { const struct tc_action *a; + LIST_HEAD(actions); int err; if (!tc_single_action(cls->exts)) { @@ -1156,7 +1129,8 @@ static int mlxsw_sp_port_add_cls_matchall(struct mlxsw_sp_port *mlxsw_sp_port, return -ENOTSUPP; } - tc_for_each_action(a, cls->exts) { + tcf_exts_to_list(cls->exts, &actions); + list_for_each_entry(a, &actions, list) { if (!is_tcf_mirred_mirror(a) || protocol != htons(ETH_P_ALL)) return -ENOTSUPP; @@ -2076,6 +2050,18 @@ static int mlxsw_sp_port_ets_init(struct mlxsw_sp_port *mlxsw_sp_port) return 0; } +static int mlxsw_sp_port_pvid_vport_create(struct mlxsw_sp_port *mlxsw_sp_port) +{ + mlxsw_sp_port->pvid = 1; + + return mlxsw_sp_port_add_vid(mlxsw_sp_port->dev, 0, 1); +} + +static int mlxsw_sp_port_pvid_vport_destroy(struct mlxsw_sp_port *mlxsw_sp_port) +{ + return mlxsw_sp_port_kill_vid(mlxsw_sp_port->dev, 0, 1); +} + static int mlxsw_sp_port_create(struct mlxsw_sp *mlxsw_sp, u8 local_port, bool split, u8 module, u8 width, u8 lane) { @@ -2191,7 +2177,15 @@ static int mlxsw_sp_port_create(struct mlxsw_sp *mlxsw_sp, u8 local_port, goto err_port_dcb_init; } + err = mlxsw_sp_port_pvid_vport_create(mlxsw_sp_port); + if (err) { + dev_err(mlxsw_sp->bus_info->dev, "Port %d: Failed to create PVID vPort\n", + mlxsw_sp_port->local_port); + goto err_port_pvid_vport_create; + } + mlxsw_sp_port_switchdev_init(mlxsw_sp_port); + mlxsw_sp->ports[local_port] = mlxsw_sp_port; err = register_netdev(dev); if (err) { dev_err(mlxsw_sp->bus_info->dev, "Port %d: Failed to register netdev\n", @@ -2208,18 +2202,15 @@ static int mlxsw_sp_port_create(struct mlxsw_sp *mlxsw_sp, u8 local_port, goto err_core_port_init; } - err = mlxsw_sp_port_vlan_init(mlxsw_sp_port); - if (err) - goto err_port_vlan_init; - - mlxsw_sp->ports[local_port] = mlxsw_sp_port; return 0; -err_port_vlan_init: - mlxsw_core_port_fini(&mlxsw_sp_port->core_port); err_core_port_init: unregister_netdev(dev); err_register_netdev: + mlxsw_sp->ports[local_port] = NULL; + mlxsw_sp_port_switchdev_fini(mlxsw_sp_port); + mlxsw_sp_port_pvid_vport_destroy(mlxsw_sp_port); +err_port_pvid_vport_create: mlxsw_sp_port_dcb_fini(mlxsw_sp_port); err_port_dcb_init: err_port_ets_init: @@ -2227,6 +2218,7 @@ err_port_buffers_init: err_port_admin_status_set: err_port_mtu_set: err_port_speed_by_width_set: + mlxsw_sp_port_swid_set(mlxsw_sp_port, MLXSW_PORT_SWID_DISABLED_PORT); err_port_swid_set: err_port_system_port_mapping_set: err_dev_addr_init: @@ -2246,12 +2238,12 @@ static void mlxsw_sp_port_remove(struct mlxsw_sp *mlxsw_sp, u8 local_port) if (!mlxsw_sp_port) return; - mlxsw_sp->ports[local_port] = NULL; mlxsw_core_port_fini(&mlxsw_sp_port->core_port); unregister_netdev(mlxsw_sp_port->dev); /* This calls ndo_stop */ - mlxsw_sp_port_dcb_fini(mlxsw_sp_port); - mlxsw_sp_port_kill_vid(mlxsw_sp_port->dev, 0, 1); + mlxsw_sp->ports[local_port] = NULL; mlxsw_sp_port_switchdev_fini(mlxsw_sp_port); + mlxsw_sp_port_pvid_vport_destroy(mlxsw_sp_port); + mlxsw_sp_port_dcb_fini(mlxsw_sp_port); mlxsw_sp_port_swid_set(mlxsw_sp_port, MLXSW_PORT_SWID_DISABLED_PORT); mlxsw_sp_port_module_unmap(mlxsw_sp, mlxsw_sp_port->local_port); free_percpu(mlxsw_sp_port->pcpu_stats); @@ -2663,6 +2655,26 @@ static const struct mlxsw_rx_listener mlxsw_sp_rx_listener[] = { { .func = mlxsw_sp_rx_listener_func, .local_port = MLXSW_PORT_DONT_CARE, + .trap_id = MLXSW_TRAP_ID_MTUERROR, + }, + { + .func = mlxsw_sp_rx_listener_func, + .local_port = MLXSW_PORT_DONT_CARE, + .trap_id = MLXSW_TRAP_ID_TTLERROR, + }, + { + .func = mlxsw_sp_rx_listener_func, + .local_port = MLXSW_PORT_DONT_CARE, + .trap_id = MLXSW_TRAP_ID_LBERROR, + }, + { + .func = mlxsw_sp_rx_listener_func, + .local_port = MLXSW_PORT_DONT_CARE, + .trap_id = MLXSW_TRAP_ID_OSPF, + }, + { + .func = mlxsw_sp_rx_listener_func, + .local_port = MLXSW_PORT_DONT_CARE, .trap_id = MLXSW_TRAP_ID_IP2ME, }, { diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h index f69aa37d1521..ab3feb81bd43 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h @@ -536,8 +536,6 @@ int mlxsw_sp_port_vid_to_fid_set(struct mlxsw_sp_port *mlxsw_sp_port, u16 vid); int mlxsw_sp_port_vlan_set(struct mlxsw_sp_port *mlxsw_sp_port, u16 vid_begin, u16 vid_end, bool is_member, bool untagged); -int mlxsw_sp_port_add_vid(struct net_device *dev, __be16 __always_unused proto, - u16 vid); int mlxsw_sp_vport_flood_set(struct mlxsw_sp_port *mlxsw_sp_vport, u16 fid, bool set); void mlxsw_sp_port_active_vlans_del(struct mlxsw_sp_port *mlxsw_sp_port); diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c index 074cdda7b6f3..237418a0e6e0 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c @@ -330,7 +330,7 @@ static const struct mlxsw_sp_sb_cm mlxsw_sp_cpu_port_sb_cms[] = { MLXSW_SP_CPU_PORT_SB_CM, MLXSW_SP_CPU_PORT_SB_CM, MLXSW_SP_CPU_PORT_SB_CM, - MLXSW_SP_CPU_PORT_SB_CM, + MLXSW_SP_SB_CM(MLXSW_SP_BYTES_TO_CELLS(10000), 0, 0), MLXSW_SP_CPU_PORT_SB_CM, MLXSW_SP_CPU_PORT_SB_CM, MLXSW_SP_CPU_PORT_SB_CM, diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 81418d629231..90bb93b037ec 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -1651,9 +1651,10 @@ static void mlxsw_sp_router_fib4_add_info_destroy(void const *data) const struct mlxsw_sp_router_fib4_add_info *info = data; struct mlxsw_sp_fib_entry *fib_entry = info->fib_entry; struct mlxsw_sp *mlxsw_sp = info->mlxsw_sp; + struct mlxsw_sp_vr *vr = fib_entry->vr; mlxsw_sp_fib_entry_destroy(fib_entry); - mlxsw_sp_vr_put(mlxsw_sp, fib_entry->vr); + mlxsw_sp_vr_put(mlxsw_sp, vr); kfree(info); } diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c index a1ad5e6bdfa8..d1b59cdfacc1 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c @@ -450,6 +450,8 @@ void mlxsw_sp_fid_destroy(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_fid *f) kfree(f); + mlxsw_sp_fid_map(mlxsw_sp, fid, false); + mlxsw_sp_fid_op(mlxsw_sp, fid, false); } @@ -997,13 +999,13 @@ static int mlxsw_sp_port_obj_add(struct net_device *dev, } static int __mlxsw_sp_port_vlans_del(struct mlxsw_sp_port *mlxsw_sp_port, - u16 vid_begin, u16 vid_end, bool init) + u16 vid_begin, u16 vid_end) { struct net_device *dev = mlxsw_sp_port->dev; u16 vid, pvid; int err; - if (!init && !mlxsw_sp_port->bridged) + if (!mlxsw_sp_port->bridged) return -EINVAL; err = __mlxsw_sp_port_vlans_set(mlxsw_sp_port, vid_begin, vid_end, @@ -1014,9 +1016,6 @@ static int __mlxsw_sp_port_vlans_del(struct mlxsw_sp_port *mlxsw_sp_port, return err; } - if (init) - goto out; - pvid = mlxsw_sp_port->pvid; if (pvid >= vid_begin && pvid <= vid_end) { err = mlxsw_sp_port_pvid_set(mlxsw_sp_port, 0); @@ -1028,7 +1027,6 @@ static int __mlxsw_sp_port_vlans_del(struct mlxsw_sp_port *mlxsw_sp_port, mlxsw_sp_port_fid_leave(mlxsw_sp_port, vid_begin, vid_end); -out: /* Changing activity bits only if HW operation succeded */ for (vid = vid_begin; vid <= vid_end; vid++) clear_bit(vid, mlxsw_sp_port->active_vlans); @@ -1039,8 +1037,8 @@ out: static int mlxsw_sp_port_vlans_del(struct mlxsw_sp_port *mlxsw_sp_port, const struct switchdev_obj_port_vlan *vlan) { - return __mlxsw_sp_port_vlans_del(mlxsw_sp_port, - vlan->vid_begin, vlan->vid_end, false); + return __mlxsw_sp_port_vlans_del(mlxsw_sp_port, vlan->vid_begin, + vlan->vid_end); } void mlxsw_sp_port_active_vlans_del(struct mlxsw_sp_port *mlxsw_sp_port) @@ -1048,7 +1046,7 @@ void mlxsw_sp_port_active_vlans_del(struct mlxsw_sp_port *mlxsw_sp_port) u16 vid; for_each_set_bit(vid, mlxsw_sp_port->active_vlans, VLAN_N_VID) - __mlxsw_sp_port_vlans_del(mlxsw_sp_port, vid, vid, false); + __mlxsw_sp_port_vlans_del(mlxsw_sp_port, vid, vid); } static int @@ -1546,32 +1544,6 @@ void mlxsw_sp_switchdev_fini(struct mlxsw_sp *mlxsw_sp) mlxsw_sp_fdb_fini(mlxsw_sp); } -int mlxsw_sp_port_vlan_init(struct mlxsw_sp_port *mlxsw_sp_port) -{ - struct net_device *dev = mlxsw_sp_port->dev; - int err; - - /* Allow only untagged packets to ingress and tag them internally - * with VID 1. - */ - mlxsw_sp_port->pvid = 1; - err = __mlxsw_sp_port_vlans_del(mlxsw_sp_port, 0, VLAN_N_VID - 1, - true); - if (err) { - netdev_err(dev, "Unable to init VLANs\n"); - return err; - } - - /* Add implicit VLAN interface in the device, so that untagged - * packets will be classified to the default vFID. - */ - err = mlxsw_sp_port_add_vid(dev, 0, 1); - if (err) - netdev_err(dev, "Failed to configure default vFID\n"); - - return err; -} - void mlxsw_sp_port_switchdev_init(struct mlxsw_sp_port *mlxsw_sp_port) { mlxsw_sp_port->dev->switchdev_ops = &mlxsw_sp_port_switchdev_ops; diff --git a/drivers/net/ethernet/mellanox/mlxsw/trap.h b/drivers/net/ethernet/mellanox/mlxsw/trap.h index 470d7696e9fe..ed8e30186400 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/trap.h +++ b/drivers/net/ethernet/mellanox/mlxsw/trap.h @@ -56,6 +56,10 @@ enum { MLXSW_TRAP_ID_IGMP_V3_REPORT = 0x34, MLXSW_TRAP_ID_ARPBC = 0x50, MLXSW_TRAP_ID_ARPUC = 0x51, + MLXSW_TRAP_ID_MTUERROR = 0x52, + MLXSW_TRAP_ID_TTLERROR = 0x53, + MLXSW_TRAP_ID_LBERROR = 0x54, + MLXSW_TRAP_ID_OSPF = 0x55, MLXSW_TRAP_ID_IP2ME = 0x5F, MLXSW_TRAP_ID_RTR_INGRESS0 = 0x70, MLXSW_TRAP_ID_HOST_MISS_IPV4 = 0x90, diff --git a/drivers/net/ethernet/qlogic/qed/qed_dcbx.c b/drivers/net/ethernet/qlogic/qed/qed_dcbx.c index d2f202eae6b2..b900dfbb57ff 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_dcbx.c +++ b/drivers/net/ethernet/qlogic/qed/qed_dcbx.c @@ -52,40 +52,94 @@ static bool qed_dcbx_app_ethtype(u32 app_info_bitmap) DCBX_APP_SF_ETHTYPE); } +static bool qed_dcbx_ieee_app_ethtype(u32 app_info_bitmap) +{ + u8 mfw_val = QED_MFW_GET_FIELD(app_info_bitmap, DCBX_APP_SF_IEEE); + + /* Old MFW */ + if (mfw_val == DCBX_APP_SF_IEEE_RESERVED) + return qed_dcbx_app_ethtype(app_info_bitmap); + + return !!(mfw_val == DCBX_APP_SF_IEEE_ETHTYPE); +} + static bool qed_dcbx_app_port(u32 app_info_bitmap) { return !!(QED_MFW_GET_FIELD(app_info_bitmap, DCBX_APP_SF) == DCBX_APP_SF_PORT); } -static bool qed_dcbx_default_tlv(u32 app_info_bitmap, u16 proto_id) +static bool qed_dcbx_ieee_app_port(u32 app_info_bitmap, u8 type) { - return !!(qed_dcbx_app_ethtype(app_info_bitmap) && - proto_id == QED_ETH_TYPE_DEFAULT); + u8 mfw_val = QED_MFW_GET_FIELD(app_info_bitmap, DCBX_APP_SF_IEEE); + + /* Old MFW */ + if (mfw_val == DCBX_APP_SF_IEEE_RESERVED) + return qed_dcbx_app_port(app_info_bitmap); + + return !!(mfw_val == type || mfw_val == DCBX_APP_SF_IEEE_TCP_UDP_PORT); } -static bool qed_dcbx_iscsi_tlv(u32 app_info_bitmap, u16 proto_id) +static bool qed_dcbx_default_tlv(u32 app_info_bitmap, u16 proto_id, bool ieee) { - return !!(qed_dcbx_app_port(app_info_bitmap) && - proto_id == QED_TCP_PORT_ISCSI); + bool ethtype; + + if (ieee) + ethtype = qed_dcbx_ieee_app_ethtype(app_info_bitmap); + else + ethtype = qed_dcbx_app_ethtype(app_info_bitmap); + + return !!(ethtype && (proto_id == QED_ETH_TYPE_DEFAULT)); } -static bool qed_dcbx_fcoe_tlv(u32 app_info_bitmap, u16 proto_id) +static bool qed_dcbx_iscsi_tlv(u32 app_info_bitmap, u16 proto_id, bool ieee) { - return !!(qed_dcbx_app_ethtype(app_info_bitmap) && - proto_id == QED_ETH_TYPE_FCOE); + bool port; + + if (ieee) + port = qed_dcbx_ieee_app_port(app_info_bitmap, + DCBX_APP_SF_IEEE_TCP_PORT); + else + port = qed_dcbx_app_port(app_info_bitmap); + + return !!(port && (proto_id == QED_TCP_PORT_ISCSI)); } -static bool qed_dcbx_roce_tlv(u32 app_info_bitmap, u16 proto_id) +static bool qed_dcbx_fcoe_tlv(u32 app_info_bitmap, u16 proto_id, bool ieee) { - return !!(qed_dcbx_app_ethtype(app_info_bitmap) && - proto_id == QED_ETH_TYPE_ROCE); + bool ethtype; + + if (ieee) + ethtype = qed_dcbx_ieee_app_ethtype(app_info_bitmap); + else + ethtype = qed_dcbx_app_ethtype(app_info_bitmap); + + return !!(ethtype && (proto_id == QED_ETH_TYPE_FCOE)); } -static bool qed_dcbx_roce_v2_tlv(u32 app_info_bitmap, u16 proto_id) +static bool qed_dcbx_roce_tlv(u32 app_info_bitmap, u16 proto_id, bool ieee) { - return !!(qed_dcbx_app_port(app_info_bitmap) && - proto_id == QED_UDP_PORT_TYPE_ROCE_V2); + bool ethtype; + + if (ieee) + ethtype = qed_dcbx_ieee_app_ethtype(app_info_bitmap); + else + ethtype = qed_dcbx_app_ethtype(app_info_bitmap); + + return !!(ethtype && (proto_id == QED_ETH_TYPE_ROCE)); +} + +static bool qed_dcbx_roce_v2_tlv(u32 app_info_bitmap, u16 proto_id, bool ieee) +{ + bool port; + + if (ieee) + port = qed_dcbx_ieee_app_port(app_info_bitmap, + DCBX_APP_SF_IEEE_UDP_PORT); + else + port = qed_dcbx_app_port(app_info_bitmap); + + return !!(port && (proto_id == QED_UDP_PORT_TYPE_ROCE_V2)); } static void @@ -164,17 +218,17 @@ qed_dcbx_update_app_info(struct qed_dcbx_results *p_data, static bool qed_dcbx_get_app_protocol_type(struct qed_hwfn *p_hwfn, u32 app_prio_bitmap, - u16 id, enum dcbx_protocol_type *type) + u16 id, enum dcbx_protocol_type *type, bool ieee) { - if (qed_dcbx_fcoe_tlv(app_prio_bitmap, id)) { + if (qed_dcbx_fcoe_tlv(app_prio_bitmap, id, ieee)) { *type = DCBX_PROTOCOL_FCOE; - } else if (qed_dcbx_roce_tlv(app_prio_bitmap, id)) { + } else if (qed_dcbx_roce_tlv(app_prio_bitmap, id, ieee)) { *type = DCBX_PROTOCOL_ROCE; - } else if (qed_dcbx_iscsi_tlv(app_prio_bitmap, id)) { + } else if (qed_dcbx_iscsi_tlv(app_prio_bitmap, id, ieee)) { *type = DCBX_PROTOCOL_ISCSI; - } else if (qed_dcbx_default_tlv(app_prio_bitmap, id)) { + } else if (qed_dcbx_default_tlv(app_prio_bitmap, id, ieee)) { *type = DCBX_PROTOCOL_ETH; - } else if (qed_dcbx_roce_v2_tlv(app_prio_bitmap, id)) { + } else if (qed_dcbx_roce_v2_tlv(app_prio_bitmap, id, ieee)) { *type = DCBX_PROTOCOL_ROCE_V2; } else { *type = DCBX_MAX_PROTOCOL_TYPE; @@ -194,17 +248,18 @@ static int qed_dcbx_process_tlv(struct qed_hwfn *p_hwfn, struct qed_dcbx_results *p_data, struct dcbx_app_priority_entry *p_tbl, - u32 pri_tc_tbl, int count, bool dcbx_enabled) + u32 pri_tc_tbl, int count, u8 dcbx_version) { u8 tc, priority_map; enum dcbx_protocol_type type; + bool enable, ieee; u16 protocol_id; int priority; - bool enable; int i; DP_VERBOSE(p_hwfn, QED_MSG_DCB, "Num APP entries = %d\n", count); + ieee = (dcbx_version == DCBX_CONFIG_VERSION_IEEE); /* Parse APP TLV */ for (i = 0; i < count; i++) { protocol_id = QED_MFW_GET_FIELD(p_tbl[i].entry, @@ -219,7 +274,7 @@ qed_dcbx_process_tlv(struct qed_hwfn *p_hwfn, tc = QED_DCBX_PRIO2TC(pri_tc_tbl, priority); if (qed_dcbx_get_app_protocol_type(p_hwfn, p_tbl[i].entry, - protocol_id, &type)) { + protocol_id, &type, ieee)) { /* ETH always have the enable bit reset, as it gets * vlan information per packet. For other protocols, * should be set according to the dcbx_enabled @@ -275,15 +330,12 @@ static int qed_dcbx_process_mib_info(struct qed_hwfn *p_hwfn) struct dcbx_ets_feature *p_ets; struct qed_hw_info *p_info; u32 pri_tc_tbl, flags; - bool dcbx_enabled; + u8 dcbx_version; int num_entries; int rc = 0; - /* If DCBx version is non zero, then negotiation was - * successfuly performed - */ flags = p_hwfn->p_dcbx_info->operational.flags; - dcbx_enabled = !!QED_MFW_GET_FIELD(flags, DCBX_CONFIG_VERSION); + dcbx_version = QED_MFW_GET_FIELD(flags, DCBX_CONFIG_VERSION); p_app = &p_hwfn->p_dcbx_info->operational.features.app; p_tbl = p_app->app_pri_tbl; @@ -295,13 +347,13 @@ static int qed_dcbx_process_mib_info(struct qed_hwfn *p_hwfn) num_entries = QED_MFW_GET_FIELD(p_app->flags, DCBX_APP_NUM_ENTRIES); rc = qed_dcbx_process_tlv(p_hwfn, &data, p_tbl, pri_tc_tbl, - num_entries, dcbx_enabled); + num_entries, dcbx_version); if (rc) return rc; p_info->num_tc = QED_MFW_GET_FIELD(p_ets->flags, DCBX_ETS_MAX_TCS); data.pf_id = p_hwfn->rel_pf_id; - data.dcbx_enabled = dcbx_enabled; + data.dcbx_enabled = !!dcbx_version; qed_dcbx_dp_protocol(p_hwfn, &data); @@ -400,7 +452,7 @@ static void qed_dcbx_get_app_data(struct qed_hwfn *p_hwfn, struct dcbx_app_priority_feature *p_app, struct dcbx_app_priority_entry *p_tbl, - struct qed_dcbx_params *p_params) + struct qed_dcbx_params *p_params, bool ieee) { struct qed_app_entry *entry; u8 pri_map; @@ -414,15 +466,46 @@ qed_dcbx_get_app_data(struct qed_hwfn *p_hwfn, DCBX_APP_NUM_ENTRIES); for (i = 0; i < DCBX_MAX_APP_PROTOCOL; i++) { entry = &p_params->app_entry[i]; - entry->ethtype = !(QED_MFW_GET_FIELD(p_tbl[i].entry, - DCBX_APP_SF)); + if (ieee) { + u8 sf_ieee; + u32 val; + + sf_ieee = QED_MFW_GET_FIELD(p_tbl[i].entry, + DCBX_APP_SF_IEEE); + switch (sf_ieee) { + case DCBX_APP_SF_IEEE_RESERVED: + /* Old MFW */ + val = QED_MFW_GET_FIELD(p_tbl[i].entry, + DCBX_APP_SF); + entry->sf_ieee = val ? + QED_DCBX_SF_IEEE_TCP_UDP_PORT : + QED_DCBX_SF_IEEE_ETHTYPE; + break; + case DCBX_APP_SF_IEEE_ETHTYPE: + entry->sf_ieee = QED_DCBX_SF_IEEE_ETHTYPE; + break; + case DCBX_APP_SF_IEEE_TCP_PORT: + entry->sf_ieee = QED_DCBX_SF_IEEE_TCP_PORT; + break; + case DCBX_APP_SF_IEEE_UDP_PORT: + entry->sf_ieee = QED_DCBX_SF_IEEE_UDP_PORT; + break; + case DCBX_APP_SF_IEEE_TCP_UDP_PORT: + entry->sf_ieee = QED_DCBX_SF_IEEE_TCP_UDP_PORT; + break; + } + } else { + entry->ethtype = !(QED_MFW_GET_FIELD(p_tbl[i].entry, + DCBX_APP_SF)); + } + pri_map = QED_MFW_GET_FIELD(p_tbl[i].entry, DCBX_APP_PRI_MAP); entry->prio = ffs(pri_map) - 1; entry->proto_id = QED_MFW_GET_FIELD(p_tbl[i].entry, DCBX_APP_PROTOCOL_ID); qed_dcbx_get_app_protocol_type(p_hwfn, p_tbl[i].entry, entry->proto_id, - &entry->proto_type); + &entry->proto_type, ieee); } DP_VERBOSE(p_hwfn, QED_MSG_DCB, @@ -483,7 +566,7 @@ qed_dcbx_get_ets_data(struct qed_hwfn *p_hwfn, bw_map[1] = be32_to_cpu(p_ets->tc_bw_tbl[1]); tsa_map[0] = be32_to_cpu(p_ets->tc_tsa_tbl[0]); tsa_map[1] = be32_to_cpu(p_ets->tc_tsa_tbl[1]); - pri_map = be32_to_cpu(p_ets->pri_tc_tbl[0]); + pri_map = p_ets->pri_tc_tbl[0]; for (i = 0; i < QED_MAX_PFC_PRIORITIES; i++) { p_params->ets_tc_bw_tbl[i] = ((u8 *)bw_map)[i]; p_params->ets_tc_tsa_tbl[i] = ((u8 *)tsa_map)[i]; @@ -500,9 +583,9 @@ qed_dcbx_get_common_params(struct qed_hwfn *p_hwfn, struct dcbx_app_priority_feature *p_app, struct dcbx_app_priority_entry *p_tbl, struct dcbx_ets_feature *p_ets, - u32 pfc, struct qed_dcbx_params *p_params) + u32 pfc, struct qed_dcbx_params *p_params, bool ieee) { - qed_dcbx_get_app_data(p_hwfn, p_app, p_tbl, p_params); + qed_dcbx_get_app_data(p_hwfn, p_app, p_tbl, p_params, ieee); qed_dcbx_get_ets_data(p_hwfn, p_ets, p_params); qed_dcbx_get_pfc_data(p_hwfn, pfc, p_params); } @@ -516,7 +599,7 @@ qed_dcbx_get_local_params(struct qed_hwfn *p_hwfn, p_feat = &p_hwfn->p_dcbx_info->local_admin.features; qed_dcbx_get_common_params(p_hwfn, &p_feat->app, p_feat->app.app_pri_tbl, &p_feat->ets, - p_feat->pfc, ¶ms->local.params); + p_feat->pfc, ¶ms->local.params, false); params->local.valid = true; } @@ -529,7 +612,7 @@ qed_dcbx_get_remote_params(struct qed_hwfn *p_hwfn, p_feat = &p_hwfn->p_dcbx_info->remote.features; qed_dcbx_get_common_params(p_hwfn, &p_feat->app, p_feat->app.app_pri_tbl, &p_feat->ets, - p_feat->pfc, ¶ms->remote.params); + p_feat->pfc, ¶ms->remote.params, false); params->remote.valid = true; } @@ -574,7 +657,8 @@ qed_dcbx_get_operational_params(struct qed_hwfn *p_hwfn, qed_dcbx_get_common_params(p_hwfn, &p_feat->app, p_feat->app.app_pri_tbl, &p_feat->ets, - p_feat->pfc, ¶ms->operational.params); + p_feat->pfc, ¶ms->operational.params, + p_operational->ieee); qed_dcbx_get_priority_info(p_hwfn, &p_operational->app_prio, p_results); err = QED_MFW_GET_FIELD(p_feat->app.flags, DCBX_APP_ERROR); p_operational->err = err; @@ -944,7 +1028,6 @@ qed_dcbx_set_ets_data(struct qed_hwfn *p_hwfn, val = (((u32)p_params->ets_pri_tc_tbl[i]) << ((7 - i) * 4)); p_ets->pri_tc_tbl[0] |= val; } - p_ets->pri_tc_tbl[0] = cpu_to_be32(p_ets->pri_tc_tbl[0]); for (i = 0; i < 2; i++) { p_ets->tc_bw_tbl[i] = cpu_to_be32(p_ets->tc_bw_tbl[i]); p_ets->tc_tsa_tbl[i] = cpu_to_be32(p_ets->tc_tsa_tbl[i]); @@ -954,7 +1037,7 @@ qed_dcbx_set_ets_data(struct qed_hwfn *p_hwfn, static void qed_dcbx_set_app_data(struct qed_hwfn *p_hwfn, struct dcbx_app_priority_feature *p_app, - struct qed_dcbx_params *p_params) + struct qed_dcbx_params *p_params, bool ieee) { u32 *entry; int i; @@ -975,12 +1058,36 @@ qed_dcbx_set_app_data(struct qed_hwfn *p_hwfn, for (i = 0; i < DCBX_MAX_APP_PROTOCOL; i++) { entry = &p_app->app_pri_tbl[i].entry; - *entry &= ~DCBX_APP_SF_MASK; - if (p_params->app_entry[i].ethtype) - *entry |= ((u32)DCBX_APP_SF_ETHTYPE << - DCBX_APP_SF_SHIFT); - else - *entry |= ((u32)DCBX_APP_SF_PORT << DCBX_APP_SF_SHIFT); + if (ieee) { + *entry &= ~DCBX_APP_SF_IEEE_MASK; + switch (p_params->app_entry[i].sf_ieee) { + case QED_DCBX_SF_IEEE_ETHTYPE: + *entry |= ((u32)DCBX_APP_SF_IEEE_ETHTYPE << + DCBX_APP_SF_IEEE_SHIFT); + break; + case QED_DCBX_SF_IEEE_TCP_PORT: + *entry |= ((u32)DCBX_APP_SF_IEEE_TCP_PORT << + DCBX_APP_SF_IEEE_SHIFT); + break; + case QED_DCBX_SF_IEEE_UDP_PORT: + *entry |= ((u32)DCBX_APP_SF_IEEE_UDP_PORT << + DCBX_APP_SF_IEEE_SHIFT); + break; + case QED_DCBX_SF_IEEE_TCP_UDP_PORT: + *entry |= ((u32)DCBX_APP_SF_IEEE_TCP_UDP_PORT << + DCBX_APP_SF_IEEE_SHIFT); + break; + } + } else { + *entry &= ~DCBX_APP_SF_MASK; + if (p_params->app_entry[i].ethtype) + *entry |= ((u32)DCBX_APP_SF_ETHTYPE << + DCBX_APP_SF_SHIFT); + else + *entry |= ((u32)DCBX_APP_SF_PORT << + DCBX_APP_SF_SHIFT); + } + *entry &= ~DCBX_APP_PROTOCOL_ID_MASK; *entry |= ((u32)p_params->app_entry[i].proto_id << DCBX_APP_PROTOCOL_ID_SHIFT); @@ -995,15 +1102,19 @@ qed_dcbx_set_local_params(struct qed_hwfn *p_hwfn, struct dcbx_local_params *local_admin, struct qed_dcbx_set *params) { + bool ieee = false; + local_admin->flags = 0; memcpy(&local_admin->features, &p_hwfn->p_dcbx_info->operational.features, sizeof(local_admin->features)); - if (params->enabled) + if (params->enabled) { local_admin->config = params->ver_num; - else + ieee = !!(params->ver_num & DCBX_CONFIG_VERSION_IEEE); + } else { local_admin->config = DCBX_CONFIG_VERSION_DISABLED; + } if (params->override_flags & QED_DCBX_OVERRIDE_PFC_CFG) qed_dcbx_set_pfc_data(p_hwfn, &local_admin->features.pfc, @@ -1015,7 +1126,7 @@ qed_dcbx_set_local_params(struct qed_hwfn *p_hwfn, if (params->override_flags & QED_DCBX_OVERRIDE_APP_CFG) qed_dcbx_set_app_data(p_hwfn, &local_admin->features.app, - ¶ms->config.params); + ¶ms->config.params, ieee); } int qed_dcbx_config_params(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt, @@ -1596,8 +1707,10 @@ static int qed_dcbnl_setapp(struct qed_dev *cdev, if ((entry->ethtype == ethtype) && (entry->proto_id == idval)) break; /* First empty slot */ - if (!entry->proto_id) + if (!entry->proto_id) { + dcbx_set.config.params.num_app_entries++; break; + } } if (i == QED_DCBX_MAX_APP_PROTOCOL) { @@ -2118,8 +2231,10 @@ int qed_dcbnl_ieee_setapp(struct qed_dev *cdev, struct dcb_app *app) (entry->proto_id == app->protocol)) break; /* First empty slot */ - if (!entry->proto_id) + if (!entry->proto_id) { + dcbx_set.config.params.num_app_entries++; break; + } } if (i == QED_DCBX_MAX_APP_PROTOCOL) { diff --git a/drivers/net/ethernet/qlogic/qed/qed_hsi.h b/drivers/net/ethernet/qlogic/qed/qed_hsi.h index 592784019994..6f9d3b831a2a 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_hsi.h +++ b/drivers/net/ethernet/qlogic/qed/qed_hsi.h @@ -6850,6 +6850,14 @@ struct dcbx_app_priority_entry { #define DCBX_APP_SF_SHIFT 8 #define DCBX_APP_SF_ETHTYPE 0 #define DCBX_APP_SF_PORT 1 +#define DCBX_APP_SF_IEEE_MASK 0x0000f000 +#define DCBX_APP_SF_IEEE_SHIFT 12 +#define DCBX_APP_SF_IEEE_RESERVED 0 +#define DCBX_APP_SF_IEEE_ETHTYPE 1 +#define DCBX_APP_SF_IEEE_TCP_PORT 2 +#define DCBX_APP_SF_IEEE_UDP_PORT 3 +#define DCBX_APP_SF_IEEE_TCP_UDP_PORT 4 + #define DCBX_APP_PROTOCOL_ID_MASK 0xffff0000 #define DCBX_APP_PROTOCOL_ID_SHIFT 16 }; diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c index b4d3b410c32a..421ebdaf1208 100644 --- a/drivers/net/ethernet/ti/cpsw.c +++ b/drivers/net/ethernet/ti/cpsw.c @@ -730,6 +730,7 @@ static void cpsw_rx_handler(void *token, int len, int status) netif_receive_skb(skb); ndev->stats.rx_bytes += len; ndev->stats.rx_packets++; + kmemleak_not_leak(new_skb); } else { ndev->stats.rx_dropped++; new_skb = skb; @@ -1307,6 +1308,7 @@ static int cpsw_ndo_open(struct net_device *ndev) kfree_skb(skb); goto err_cleanup; } + kmemleak_not_leak(skb); } /* continue even if we didn't manage to submit all * receive descs diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h index c1403fda874c..fa7b1e42508b 100644 --- a/drivers/net/hyperv/hyperv_net.h +++ b/drivers/net/hyperv/hyperv_net.h @@ -645,12 +645,6 @@ struct netvsc_reconfig { u32 event; }; -struct garp_wrk { - struct work_struct dwrk; - struct net_device *netdev; - struct netvsc_device *netvsc_dev; -}; - /* The context of the netvsc device */ struct net_device_context { /* point back to our device context */ @@ -668,7 +662,6 @@ struct net_device_context { struct work_struct work; u32 msg_enable; /* debug level */ - struct garp_wrk gwrk; struct netvsc_stats __percpu *tx_stats; struct netvsc_stats __percpu *rx_stats; @@ -679,6 +672,15 @@ struct net_device_context { /* the device is going away */ bool start_remove; + + /* State to manage the associated VF interface. */ + struct net_device *vf_netdev; + bool vf_inject; + atomic_t vf_use_cnt; + /* 1: allocated, serial number is valid. 0: not allocated */ + u32 vf_alloc; + /* Serial number of the VF to team with */ + u32 vf_serial; }; /* Per netvsc device */ @@ -734,15 +736,7 @@ struct netvsc_device { u32 max_pkt; /* max number of pkt in one send, e.g. 8 */ u32 pkt_align; /* alignment bytes, e.g. 8 */ - /* 1: allocated, serial number is valid. 0: not allocated */ - u32 vf_alloc; - /* Serial number of the VF to team with */ - u32 vf_serial; atomic_t open_cnt; - /* State to manage the associated VF interface. */ - bool vf_inject; - struct net_device *vf_netdev; - atomic_t vf_use_cnt; }; static inline struct netvsc_device * diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c index 36a7994f2811..8078bc209cac 100644 --- a/drivers/net/hyperv/netvsc.c +++ b/drivers/net/hyperv/netvsc.c @@ -77,13 +77,9 @@ static struct netvsc_device *alloc_net_device(void) init_waitqueue_head(&net_device->wait_drain); net_device->destroy = false; atomic_set(&net_device->open_cnt, 0); - atomic_set(&net_device->vf_use_cnt, 0); net_device->max_pkt = RNDIS_MAX_PKT_DEFAULT; net_device->pkt_align = RNDIS_PKT_ALIGN_DEFAULT; - net_device->vf_netdev = NULL; - net_device->vf_inject = false; - return net_device; } @@ -1110,16 +1106,16 @@ static void netvsc_send_table(struct hv_device *hdev, nvscdev->send_table[i] = tab[i]; } -static void netvsc_send_vf(struct netvsc_device *nvdev, +static void netvsc_send_vf(struct net_device_context *net_device_ctx, struct nvsp_message *nvmsg) { - nvdev->vf_alloc = nvmsg->msg.v4_msg.vf_assoc.allocated; - nvdev->vf_serial = nvmsg->msg.v4_msg.vf_assoc.serial; + net_device_ctx->vf_alloc = nvmsg->msg.v4_msg.vf_assoc.allocated; + net_device_ctx->vf_serial = nvmsg->msg.v4_msg.vf_assoc.serial; } static inline void netvsc_receive_inband(struct hv_device *hdev, - struct netvsc_device *nvdev, - struct nvsp_message *nvmsg) + struct net_device_context *net_device_ctx, + struct nvsp_message *nvmsg) { switch (nvmsg->hdr.msg_type) { case NVSP_MSG5_TYPE_SEND_INDIRECTION_TABLE: @@ -1127,7 +1123,7 @@ static inline void netvsc_receive_inband(struct hv_device *hdev, break; case NVSP_MSG4_TYPE_SEND_VF_ASSOCIATION: - netvsc_send_vf(nvdev, nvmsg); + netvsc_send_vf(net_device_ctx, nvmsg); break; } } @@ -1140,6 +1136,7 @@ static void netvsc_process_raw_pkt(struct hv_device *device, struct vmpacket_descriptor *desc) { struct nvsp_message *nvmsg; + struct net_device_context *net_device_ctx = netdev_priv(ndev); nvmsg = (struct nvsp_message *)((unsigned long) desc + (desc->offset8 << 3)); @@ -1154,7 +1151,7 @@ static void netvsc_process_raw_pkt(struct hv_device *device, break; case VM_PKT_DATA_INBAND: - netvsc_receive_inband(device, net_device, nvmsg); + netvsc_receive_inband(device, net_device_ctx, nvmsg); break; default: diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index 82c8f6d47283..eb2c122ec10a 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -671,20 +671,19 @@ int netvsc_recv_callback(struct hv_device *device_obj, struct sk_buff *skb; struct sk_buff *vf_skb; struct netvsc_stats *rx_stats; - struct netvsc_device *netvsc_dev = net_device_ctx->nvdev; u32 bytes_recvd = packet->total_data_buflen; int ret = 0; if (!net || net->reg_state != NETREG_REGISTERED) return NVSP_STAT_FAIL; - if (READ_ONCE(netvsc_dev->vf_inject)) { - atomic_inc(&netvsc_dev->vf_use_cnt); - if (!READ_ONCE(netvsc_dev->vf_inject)) { + if (READ_ONCE(net_device_ctx->vf_inject)) { + atomic_inc(&net_device_ctx->vf_use_cnt); + if (!READ_ONCE(net_device_ctx->vf_inject)) { /* * We raced; just move on. */ - atomic_dec(&netvsc_dev->vf_use_cnt); + atomic_dec(&net_device_ctx->vf_use_cnt); goto vf_injection_done; } @@ -696,17 +695,19 @@ int netvsc_recv_callback(struct hv_device *device_obj, * the host). Deliver these via the VF interface * in the guest. */ - vf_skb = netvsc_alloc_recv_skb(netvsc_dev->vf_netdev, packet, - csum_info, *data, vlan_tci); + vf_skb = netvsc_alloc_recv_skb(net_device_ctx->vf_netdev, + packet, csum_info, *data, + vlan_tci); if (vf_skb != NULL) { - ++netvsc_dev->vf_netdev->stats.rx_packets; - netvsc_dev->vf_netdev->stats.rx_bytes += bytes_recvd; + ++net_device_ctx->vf_netdev->stats.rx_packets; + net_device_ctx->vf_netdev->stats.rx_bytes += + bytes_recvd; netif_receive_skb(vf_skb); } else { ++net->stats.rx_dropped; ret = NVSP_STAT_FAIL; } - atomic_dec(&netvsc_dev->vf_use_cnt); + atomic_dec(&net_device_ctx->vf_use_cnt); return ret; } @@ -1163,17 +1164,6 @@ static void netvsc_free_netdev(struct net_device *netdev) free_netdev(netdev); } -static void netvsc_notify_peers(struct work_struct *wrk) -{ - struct garp_wrk *gwrk; - - gwrk = container_of(wrk, struct garp_wrk, dwrk); - - netdev_notify_peers(gwrk->netdev); - - atomic_dec(&gwrk->netvsc_dev->vf_use_cnt); -} - static struct net_device *get_netvsc_net_device(char *mac) { struct net_device *dev, *found = NULL; @@ -1216,7 +1206,7 @@ static int netvsc_register_vf(struct net_device *vf_netdev) net_device_ctx = netdev_priv(ndev); netvsc_dev = net_device_ctx->nvdev; - if (netvsc_dev == NULL) + if (!netvsc_dev || net_device_ctx->vf_netdev) return NOTIFY_DONE; netdev_info(ndev, "VF registering: %s\n", vf_netdev->name); @@ -1224,10 +1214,23 @@ static int netvsc_register_vf(struct net_device *vf_netdev) * Take a reference on the module. */ try_module_get(THIS_MODULE); - netvsc_dev->vf_netdev = vf_netdev; + net_device_ctx->vf_netdev = vf_netdev; return NOTIFY_OK; } +static void netvsc_inject_enable(struct net_device_context *net_device_ctx) +{ + net_device_ctx->vf_inject = true; +} + +static void netvsc_inject_disable(struct net_device_context *net_device_ctx) +{ + net_device_ctx->vf_inject = false; + + /* Wait for currently active users to drain out. */ + while (atomic_read(&net_device_ctx->vf_use_cnt) != 0) + udelay(50); +} static int netvsc_vf_up(struct net_device *vf_netdev) { @@ -1246,11 +1249,11 @@ static int netvsc_vf_up(struct net_device *vf_netdev) net_device_ctx = netdev_priv(ndev); netvsc_dev = net_device_ctx->nvdev; - if ((netvsc_dev == NULL) || (netvsc_dev->vf_netdev == NULL)) + if (!netvsc_dev || !net_device_ctx->vf_netdev) return NOTIFY_DONE; netdev_info(ndev, "VF up: %s\n", vf_netdev->name); - netvsc_dev->vf_inject = true; + netvsc_inject_enable(net_device_ctx); /* * Open the device before switching data path. @@ -1265,15 +1268,8 @@ static int netvsc_vf_up(struct net_device *vf_netdev) netif_carrier_off(ndev); - /* - * Now notify peers. We are scheduling work to - * notify peers; take a reference to prevent - * the VF interface from vanishing. - */ - atomic_inc(&netvsc_dev->vf_use_cnt); - net_device_ctx->gwrk.netdev = vf_netdev; - net_device_ctx->gwrk.netvsc_dev = netvsc_dev; - schedule_work(&net_device_ctx->gwrk.dwrk); + /* Now notify peers through VF device. */ + call_netdevice_notifiers(NETDEV_NOTIFY_PEERS, vf_netdev); return NOTIFY_OK; } @@ -1296,29 +1292,18 @@ static int netvsc_vf_down(struct net_device *vf_netdev) net_device_ctx = netdev_priv(ndev); netvsc_dev = net_device_ctx->nvdev; - if ((netvsc_dev == NULL) || (netvsc_dev->vf_netdev == NULL)) + if (!netvsc_dev || !net_device_ctx->vf_netdev) return NOTIFY_DONE; netdev_info(ndev, "VF down: %s\n", vf_netdev->name); - netvsc_dev->vf_inject = false; - /* - * Wait for currently active users to - * drain out. - */ - - while (atomic_read(&netvsc_dev->vf_use_cnt) != 0) - udelay(50); + netvsc_inject_disable(net_device_ctx); netvsc_switch_datapath(ndev, false); netdev_info(ndev, "Data path switched from VF: %s\n", vf_netdev->name); rndis_filter_close(netvsc_dev); netif_carrier_on(ndev); - /* - * Notify peers. - */ - atomic_inc(&netvsc_dev->vf_use_cnt); - net_device_ctx->gwrk.netdev = ndev; - net_device_ctx->gwrk.netvsc_dev = netvsc_dev; - schedule_work(&net_device_ctx->gwrk.dwrk); + + /* Now notify peers through netvsc device. */ + call_netdevice_notifiers(NETDEV_NOTIFY_PEERS, ndev); return NOTIFY_OK; } @@ -1340,11 +1325,11 @@ static int netvsc_unregister_vf(struct net_device *vf_netdev) net_device_ctx = netdev_priv(ndev); netvsc_dev = net_device_ctx->nvdev; - if (netvsc_dev == NULL) + if (!netvsc_dev || !net_device_ctx->vf_netdev) return NOTIFY_DONE; netdev_info(ndev, "VF unregistering: %s\n", vf_netdev->name); - - netvsc_dev->vf_netdev = NULL; + netvsc_inject_disable(net_device_ctx); + net_device_ctx->vf_netdev = NULL; module_put(THIS_MODULE); return NOTIFY_OK; } @@ -1392,11 +1377,14 @@ static int netvsc_probe(struct hv_device *dev, INIT_DELAYED_WORK(&net_device_ctx->dwork, netvsc_link_change); INIT_WORK(&net_device_ctx->work, do_set_multicast); - INIT_WORK(&net_device_ctx->gwrk.dwrk, netvsc_notify_peers); spin_lock_init(&net_device_ctx->lock); INIT_LIST_HEAD(&net_device_ctx->reconfig_events); + atomic_set(&net_device_ctx->vf_use_cnt, 0); + net_device_ctx->vf_netdev = NULL; + net_device_ctx->vf_inject = false; + net->netdev_ops = &device_ops; net->hw_features = NETVSC_HW_FEATURES; @@ -1507,8 +1495,13 @@ static int netvsc_netdev_event(struct notifier_block *this, { struct net_device *event_dev = netdev_notifier_info_to_dev(ptr); - /* Avoid Vlan, Bonding dev with same MAC registering as VF */ - if (event_dev->priv_flags & (IFF_802_1Q_VLAN | IFF_BONDING)) + /* Avoid Vlan dev with same MAC registering as VF */ + if (event_dev->priv_flags & IFF_802_1Q_VLAN) + return NOTIFY_DONE; + + /* Avoid Bonding master dev with same MAC registering as VF */ + if (event_dev->priv_flags & IFF_BONDING && + event_dev->flags & IFF_MASTER) return NOTIFY_DONE; switch (event) { diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c index d13e6e15d7b5..351e701eb043 100644 --- a/drivers/net/macsec.c +++ b/drivers/net/macsec.c @@ -270,6 +270,7 @@ struct macsec_dev { struct pcpu_secy_stats __percpu *stats; struct list_head secys; struct gro_cells gro_cells; + unsigned int nest_level; }; /** @@ -2699,6 +2700,8 @@ static netdev_tx_t macsec_start_xmit(struct sk_buff *skb, #define MACSEC_FEATURES \ (NETIF_F_SG | NETIF_F_HIGHDMA | NETIF_F_FRAGLIST) +static struct lock_class_key macsec_netdev_addr_lock_key; + static int macsec_dev_init(struct net_device *dev) { struct macsec_dev *macsec = macsec_priv(dev); @@ -2910,6 +2913,13 @@ static int macsec_get_iflink(const struct net_device *dev) return macsec_priv(dev)->real_dev->ifindex; } + +static int macsec_get_nest_level(struct net_device *dev) +{ + return macsec_priv(dev)->nest_level; +} + + static const struct net_device_ops macsec_netdev_ops = { .ndo_init = macsec_dev_init, .ndo_uninit = macsec_dev_uninit, @@ -2923,6 +2933,7 @@ static const struct net_device_ops macsec_netdev_ops = { .ndo_start_xmit = macsec_start_xmit, .ndo_get_stats64 = macsec_get_stats64, .ndo_get_iflink = macsec_get_iflink, + .ndo_get_lock_subclass = macsec_get_nest_level, }; static const struct device_type macsec_type = { @@ -3047,22 +3058,31 @@ static void macsec_del_dev(struct macsec_dev *macsec) } } +static void macsec_common_dellink(struct net_device *dev, struct list_head *head) +{ + struct macsec_dev *macsec = macsec_priv(dev); + struct net_device *real_dev = macsec->real_dev; + + unregister_netdevice_queue(dev, head); + list_del_rcu(&macsec->secys); + macsec_del_dev(macsec); + netdev_upper_dev_unlink(real_dev, dev); + + macsec_generation++; +} + static void macsec_dellink(struct net_device *dev, struct list_head *head) { struct macsec_dev *macsec = macsec_priv(dev); struct net_device *real_dev = macsec->real_dev; struct macsec_rxh_data *rxd = macsec_data_rtnl(real_dev); - macsec_generation++; + macsec_common_dellink(dev, head); - unregister_netdevice_queue(dev, head); - list_del_rcu(&macsec->secys); if (list_empty(&rxd->secys)) { netdev_rx_handler_unregister(real_dev); kfree(rxd); } - - macsec_del_dev(macsec); } static int register_macsec_dev(struct net_device *real_dev, @@ -3181,6 +3201,16 @@ static int macsec_newlink(struct net *net, struct net_device *dev, dev_hold(real_dev); + macsec->nest_level = dev_get_nest_level(real_dev) + 1; + netdev_lockdep_set_classes(dev); + lockdep_set_class_and_subclass(&dev->addr_list_lock, + &macsec_netdev_addr_lock_key, + macsec_get_nest_level(dev)); + + err = netdev_upper_dev_link(real_dev, dev); + if (err < 0) + goto unregister; + /* need to be already registered so that ->init has run and * the MAC addr is set */ @@ -3193,12 +3223,12 @@ static int macsec_newlink(struct net *net, struct net_device *dev, if (rx_handler && sci_exists(real_dev, sci)) { err = -EBUSY; - goto unregister; + goto unlink; } err = macsec_add_dev(dev, sci, icv_len); if (err) - goto unregister; + goto unlink; if (data) macsec_changelink_common(dev, data); @@ -3213,6 +3243,8 @@ static int macsec_newlink(struct net *net, struct net_device *dev, del_dev: macsec_del_dev(macsec); +unlink: + netdev_upper_dev_unlink(real_dev, dev); unregister: unregister_netdevice(dev); return err; @@ -3382,8 +3414,12 @@ static int macsec_notify(struct notifier_block *this, unsigned long event, rxd = macsec_data_rtnl(real_dev); list_for_each_entry_safe(m, n, &rxd->secys, secys) { - macsec_dellink(m->secy.netdev, &head); + macsec_common_dellink(m->secy.netdev, &head); } + + netdev_rx_handler_unregister(real_dev); + kfree(rxd); + unregister_netdevice_many(&head); break; } diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c index cd9b53834bf6..3234fcdea317 100644 --- a/drivers/net/macvlan.c +++ b/drivers/net/macvlan.c @@ -1315,7 +1315,7 @@ int macvlan_common_newlink(struct net *src_net, struct net_device *dev, vlan->dev = dev; vlan->port = port; vlan->set_features = MACVLAN_FEATURES; - vlan->nest_level = dev_get_nest_level(lowerdev, netif_is_macvlan) + 1; + vlan->nest_level = dev_get_nest_level(lowerdev) + 1; vlan->mode = MACVLAN_MODE_VEPA; if (data && data[IFLA_MACVLAN_MODE]) diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c index a38c0dac514b..070e3290aa6e 100644 --- a/drivers/net/macvtap.c +++ b/drivers/net/macvtap.c @@ -275,7 +275,6 @@ static void macvtap_put_queue(struct macvtap_queue *q) rtnl_unlock(); synchronize_rcu(); - skb_array_cleanup(&q->skb_array); sock_put(&q->sk); } @@ -533,10 +532,8 @@ static void macvtap_sock_write_space(struct sock *sk) static void macvtap_sock_destruct(struct sock *sk) { struct macvtap_queue *q = container_of(sk, struct macvtap_queue, sk); - struct sk_buff *skb; - while ((skb = skb_array_consume(&q->skb_array)) != NULL) - kfree_skb(skb); + skb_array_cleanup(&q->skb_array); } static int macvtap_open(struct inode *inode, struct file *file) diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c index 1882d9828c99..053e87905b94 100644 --- a/drivers/net/phy/micrel.c +++ b/drivers/net/phy/micrel.c @@ -677,17 +677,28 @@ static void kszphy_get_stats(struct phy_device *phydev, data[i] = kszphy_get_stat(phydev, i); } -static int kszphy_resume(struct phy_device *phydev) +static int kszphy_suspend(struct phy_device *phydev) { - int value; + /* Disable PHY Interrupts */ + if (phy_interrupt_is_valid(phydev)) { + phydev->interrupts = PHY_INTERRUPT_DISABLED; + if (phydev->drv->config_intr) + phydev->drv->config_intr(phydev); + } - mutex_lock(&phydev->lock); + return genphy_suspend(phydev); +} - value = phy_read(phydev, MII_BMCR); - phy_write(phydev, MII_BMCR, value & ~BMCR_PDOWN); +static int kszphy_resume(struct phy_device *phydev) +{ + genphy_resume(phydev); - kszphy_config_intr(phydev); - mutex_unlock(&phydev->lock); + /* Enable PHY Interrupts */ + if (phy_interrupt_is_valid(phydev)) { + phydev->interrupts = PHY_INTERRUPT_ENABLED; + if (phydev->drv->config_intr) + phydev->drv->config_intr(phydev); + } return 0; } @@ -900,7 +911,7 @@ static struct phy_driver ksphy_driver[] = { .get_sset_count = kszphy_get_sset_count, .get_strings = kszphy_get_strings, .get_stats = kszphy_get_stats, - .suspend = genphy_suspend, + .suspend = kszphy_suspend, .resume = kszphy_resume, }, { .phy_id = PHY_ID_KSZ8061, diff --git a/drivers/net/wireless/ath/ath10k/debug.c b/drivers/net/wireless/ath/ath10k/debug.c index 355e1ae665f9..8f0fd41dfd4b 100644 --- a/drivers/net/wireless/ath/ath10k/debug.c +++ b/drivers/net/wireless/ath/ath10k/debug.c @@ -139,11 +139,11 @@ void ath10k_debug_print_hwfw_info(struct ath10k *ar) ar->id.subsystem_vendor, ar->id.subsystem_device); ath10k_info(ar, "kconfig debug %d debugfs %d tracing %d dfs %d testmode %d\n", - config_enabled(CONFIG_ATH10K_DEBUG), - config_enabled(CONFIG_ATH10K_DEBUGFS), - config_enabled(CONFIG_ATH10K_TRACING), - config_enabled(CONFIG_ATH10K_DFS_CERTIFIED), - config_enabled(CONFIG_NL80211_TESTMODE)); + IS_ENABLED(CONFIG_ATH10K_DEBUG), + IS_ENABLED(CONFIG_ATH10K_DEBUGFS), + IS_ENABLED(CONFIG_ATH10K_TRACING), + IS_ENABLED(CONFIG_ATH10K_DFS_CERTIFIED), + IS_ENABLED(CONFIG_NL80211_TESTMODE)); firmware = ar->normal_mode_fw.fw_file.firmware; if (firmware) @@ -2424,7 +2424,7 @@ int ath10k_debug_register(struct ath10k *ar) debugfs_create_file("nf_cal_period", S_IRUSR | S_IWUSR, ar->debug.debugfs_phy, ar, &fops_nf_cal_period); - if (config_enabled(CONFIG_ATH10K_DFS_CERTIFIED)) { + if (IS_ENABLED(CONFIG_ATH10K_DFS_CERTIFIED)) { debugfs_create_file("dfs_simulate_radar", S_IWUSR, ar->debug.debugfs_phy, ar, &fops_simulate_radar); diff --git a/drivers/net/wireless/ath/ath10k/mac.c b/drivers/net/wireless/ath/ath10k/mac.c index fb8e38df9446..0bbd0a00edcc 100644 --- a/drivers/net/wireless/ath/ath10k/mac.c +++ b/drivers/net/wireless/ath/ath10k/mac.c @@ -3039,7 +3039,7 @@ static void ath10k_regd_update(struct ath10k *ar) regpair = ar->ath_common.regulatory.regpair; - if (config_enabled(CONFIG_ATH10K_DFS_CERTIFIED) && ar->dfs_detector) { + if (IS_ENABLED(CONFIG_ATH10K_DFS_CERTIFIED) && ar->dfs_detector) { nl_dfs_reg = ar->dfs_detector->region; wmi_dfs_reg = ath10k_mac_get_dfs_region(nl_dfs_reg); } else { @@ -3068,7 +3068,7 @@ static void ath10k_reg_notifier(struct wiphy *wiphy, ath_reg_notifier_apply(wiphy, request, &ar->ath_common.regulatory); - if (config_enabled(CONFIG_ATH10K_DFS_CERTIFIED) && ar->dfs_detector) { + if (IS_ENABLED(CONFIG_ATH10K_DFS_CERTIFIED) && ar->dfs_detector) { ath10k_dbg(ar, ATH10K_DBG_REGULATORY, "dfs region 0x%x\n", request->dfs_region); result = ar->dfs_detector->set_dfs_domain(ar->dfs_detector, @@ -7955,7 +7955,7 @@ int ath10k_mac_register(struct ath10k *ar) if (!test_bit(ATH10K_FLAG_RAW_MODE, &ar->dev_flags)) ar->hw->netdev_features = NETIF_F_HW_CSUM; - if (config_enabled(CONFIG_ATH10K_DFS_CERTIFIED)) { + if (IS_ENABLED(CONFIG_ATH10K_DFS_CERTIFIED)) { /* Init ath dfs pattern detector */ ar->ath_common.debug_mask = ATH_DBG_DFS; ar->dfs_detector = dfs_pattern_detector_init(&ar->ath_common, @@ -8003,7 +8003,7 @@ err_unregister: ieee80211_unregister_hw(ar->hw); err_dfs_detector_exit: - if (config_enabled(CONFIG_ATH10K_DFS_CERTIFIED) && ar->dfs_detector) + if (IS_ENABLED(CONFIG_ATH10K_DFS_CERTIFIED) && ar->dfs_detector) ar->dfs_detector->exit(ar->dfs_detector); err_free: @@ -8018,7 +8018,7 @@ void ath10k_mac_unregister(struct ath10k *ar) { ieee80211_unregister_hw(ar->hw); - if (config_enabled(CONFIG_ATH10K_DFS_CERTIFIED) && ar->dfs_detector) + if (IS_ENABLED(CONFIG_ATH10K_DFS_CERTIFIED) && ar->dfs_detector) ar->dfs_detector->exit(ar->dfs_detector); kfree(ar->mac.sbands[NL80211_BAND_2GHZ].channels); diff --git a/drivers/net/wireless/ath/ath10k/wmi.c b/drivers/net/wireless/ath/ath10k/wmi.c index 169cd2e783eb..d2462886b75c 100644 --- a/drivers/net/wireless/ath/ath10k/wmi.c +++ b/drivers/net/wireless/ath/ath10k/wmi.c @@ -3704,7 +3704,7 @@ void ath10k_wmi_event_dfs(struct ath10k *ar, phyerr->tsf_timestamp, tsf, buf_len); /* Skip event if DFS disabled */ - if (!config_enabled(CONFIG_ATH10K_DFS_CERTIFIED)) + if (!IS_ENABLED(CONFIG_ATH10K_DFS_CERTIFIED)) return; ATH10K_DFS_STAT_INC(ar, pulses_total); diff --git a/drivers/net/wireless/ath/ath6kl/cfg80211.c b/drivers/net/wireless/ath/ath6kl/cfg80211.c index 4ad6284fc37d..72e2ec67768d 100644 --- a/drivers/net/wireless/ath/ath6kl/cfg80211.c +++ b/drivers/net/wireless/ath/ath6kl/cfg80211.c @@ -3881,7 +3881,7 @@ int ath6kl_cfg80211_init(struct ath6kl *ar) BIT(NL80211_IFTYPE_P2P_CLIENT); } - if (config_enabled(CONFIG_ATH6KL_REGDOMAIN) && + if (IS_ENABLED(CONFIG_ATH6KL_REGDOMAIN) && test_bit(ATH6KL_FW_CAPABILITY_REGDOMAIN, ar->fw_capabilities)) { wiphy->reg_notifier = ath6kl_cfg80211_reg_notify; ar->wiphy->features |= NL80211_FEATURE_CELL_BASE_REG_HINTS; diff --git a/drivers/net/wireless/ath/ath9k/common-spectral.c b/drivers/net/wireless/ath/ath9k/common-spectral.c index a8762711ad74..e2512d5bc0e1 100644 --- a/drivers/net/wireless/ath/ath9k/common-spectral.c +++ b/drivers/net/wireless/ath/ath9k/common-spectral.c @@ -731,7 +731,7 @@ void ath9k_cmn_spectral_scan_trigger(struct ath_common *common, struct ath_hw *ah = spec_priv->ah; u32 rxfilter; - if (config_enabled(CONFIG_ATH9K_TX99)) + if (IS_ENABLED(CONFIG_ATH9K_TX99)) return; if (!ath9k_hw_ops(ah)->spectral_scan_trigger) { @@ -806,7 +806,7 @@ static ssize_t write_file_spec_scan_ctl(struct file *file, char buf[32]; ssize_t len; - if (config_enabled(CONFIG_ATH9K_TX99)) + if (IS_ENABLED(CONFIG_ATH9K_TX99)) return -EOPNOTSUPP; len = min(count, sizeof(buf) - 1); @@ -1072,7 +1072,7 @@ static struct rchan_callbacks rfs_spec_scan_cb = { void ath9k_cmn_spectral_deinit_debug(struct ath_spec_scan_priv *spec_priv) { - if (config_enabled(CONFIG_ATH9K_DEBUGFS)) { + if (IS_ENABLED(CONFIG_ATH9K_DEBUGFS)) { relay_close(spec_priv->rfs_chan_spec_scan); spec_priv->rfs_chan_spec_scan = NULL; } diff --git a/drivers/net/wireless/ath/ath9k/init.c b/drivers/net/wireless/ath/ath9k/init.c index edc74fca60aa..cfa3fe82ade3 100644 --- a/drivers/net/wireless/ath/ath9k/init.c +++ b/drivers/net/wireless/ath/ath9k/init.c @@ -843,7 +843,7 @@ static void ath9k_set_hw_capab(struct ath_softc *sc, struct ieee80211_hw *hw) NL80211_FEATURE_AP_MODE_CHAN_WIDTH_CHANGE | NL80211_FEATURE_P2P_GO_CTWIN; - if (!config_enabled(CONFIG_ATH9K_TX99)) { + if (!IS_ENABLED(CONFIG_ATH9K_TX99)) { hw->wiphy->interface_modes = BIT(NL80211_IFTYPE_P2P_GO) | BIT(NL80211_IFTYPE_P2P_CLIENT) | diff --git a/drivers/net/wireless/ath/ath9k/main.c b/drivers/net/wireless/ath/ath9k/main.c index 7594650f214f..a394622c9022 100644 --- a/drivers/net/wireless/ath/ath9k/main.c +++ b/drivers/net/wireless/ath/ath9k/main.c @@ -1250,7 +1250,7 @@ static int ath9k_add_interface(struct ieee80211_hw *hw, mutex_lock(&sc->mutex); - if (config_enabled(CONFIG_ATH9K_TX99)) { + if (IS_ENABLED(CONFIG_ATH9K_TX99)) { if (sc->cur_chan->nvifs >= 1) { mutex_unlock(&sc->mutex); return -EOPNOTSUPP; @@ -1300,7 +1300,7 @@ static int ath9k_change_interface(struct ieee80211_hw *hw, mutex_lock(&sc->mutex); - if (config_enabled(CONFIG_ATH9K_TX99)) { + if (IS_ENABLED(CONFIG_ATH9K_TX99)) { mutex_unlock(&sc->mutex); return -EOPNOTSUPP; } @@ -1360,7 +1360,7 @@ static void ath9k_enable_ps(struct ath_softc *sc) struct ath_hw *ah = sc->sc_ah; struct ath_common *common = ath9k_hw_common(ah); - if (config_enabled(CONFIG_ATH9K_TX99)) + if (IS_ENABLED(CONFIG_ATH9K_TX99)) return; sc->ps_enabled = true; @@ -1379,7 +1379,7 @@ static void ath9k_disable_ps(struct ath_softc *sc) struct ath_hw *ah = sc->sc_ah; struct ath_common *common = ath9k_hw_common(ah); - if (config_enabled(CONFIG_ATH9K_TX99)) + if (IS_ENABLED(CONFIG_ATH9K_TX99)) return; sc->ps_enabled = false; @@ -1953,7 +1953,7 @@ static int ath9k_get_survey(struct ieee80211_hw *hw, int idx, struct ieee80211_channel *chan; int pos; - if (config_enabled(CONFIG_ATH9K_TX99)) + if (IS_ENABLED(CONFIG_ATH9K_TX99)) return -EOPNOTSUPP; spin_lock_bh(&common->cc_lock); @@ -2003,7 +2003,7 @@ static void ath9k_set_coverage_class(struct ieee80211_hw *hw, struct ath_softc *sc = hw->priv; struct ath_hw *ah = sc->sc_ah; - if (config_enabled(CONFIG_ATH9K_TX99)) + if (IS_ENABLED(CONFIG_ATH9K_TX99)) return; mutex_lock(&sc->mutex); diff --git a/drivers/net/wireless/ath/ath9k/recv.c b/drivers/net/wireless/ath/ath9k/recv.c index 32160fca876a..669734252664 100644 --- a/drivers/net/wireless/ath/ath9k/recv.c +++ b/drivers/net/wireless/ath/ath9k/recv.c @@ -377,7 +377,7 @@ u32 ath_calcrxfilter(struct ath_softc *sc) struct ath_common *common = ath9k_hw_common(sc->sc_ah); u32 rfilt; - if (config_enabled(CONFIG_ATH9K_TX99)) + if (IS_ENABLED(CONFIG_ATH9K_TX99)) return 0; rfilt = ATH9K_RX_FILTER_UCAST | ATH9K_RX_FILTER_BCAST diff --git a/drivers/net/wireless/ath/dfs_pattern_detector.c b/drivers/net/wireless/ath/dfs_pattern_detector.c index 2303ef96299d..2f8136d50f78 100644 --- a/drivers/net/wireless/ath/dfs_pattern_detector.c +++ b/drivers/net/wireless/ath/dfs_pattern_detector.c @@ -352,7 +352,7 @@ dfs_pattern_detector_init(struct ath_common *common, { struct dfs_pattern_detector *dpd; - if (!config_enabled(CONFIG_CFG80211_CERTIFICATION_ONUS)) + if (!IS_ENABLED(CONFIG_CFG80211_CERTIFICATION_ONUS)) return NULL; dpd = kmalloc(sizeof(*dpd), GFP_KERNEL); diff --git a/drivers/net/wireless/ath/regd.c b/drivers/net/wireless/ath/regd.c index 7e15ed9ed31f..f8506037736f 100644 --- a/drivers/net/wireless/ath/regd.c +++ b/drivers/net/wireless/ath/regd.c @@ -116,7 +116,7 @@ static const struct ieee80211_regdomain ath_world_regdom_67_68_6A_6C = { static bool dynamic_country_user_possible(struct ath_regulatory *reg) { - if (config_enabled(CONFIG_ATH_REG_DYNAMIC_USER_CERT_TESTING)) + if (IS_ENABLED(CONFIG_ATH_REG_DYNAMIC_USER_CERT_TESTING)) return true; switch (reg->country_code) { @@ -188,7 +188,7 @@ static bool dynamic_country_user_possible(struct ath_regulatory *reg) static bool ath_reg_dyn_country_user_allow(struct ath_regulatory *reg) { - if (!config_enabled(CONFIG_ATH_REG_DYNAMIC_USER_REG_HINTS)) + if (!IS_ENABLED(CONFIG_ATH_REG_DYNAMIC_USER_REG_HINTS)) return false; if (!dynamic_country_user_possible(reg)) return false; diff --git a/drivers/ntb/hw/intel/ntb_hw_intel.c b/drivers/ntb/hw/intel/ntb_hw_intel.c index 40d04ef5da9e..0d5c29ae51de 100644 --- a/drivers/ntb/hw/intel/ntb_hw_intel.c +++ b/drivers/ntb/hw/intel/ntb_hw_intel.c @@ -551,13 +551,15 @@ static ssize_t ndev_debugfs_read(struct file *filp, char __user *ubuf, size_t count, loff_t *offp) { struct intel_ntb_dev *ndev; + struct pci_dev *pdev; void __iomem *mmio; char *buf; size_t buf_size; ssize_t ret, off; - union { u64 v64; u32 v32; u16 v16; } u; + union { u64 v64; u32 v32; u16 v16; u8 v8; } u; ndev = filp->private_data; + pdev = ndev_pdev(ndev); mmio = ndev->self_mmio; buf_size = min(count, 0x800ul); @@ -632,6 +634,41 @@ static ssize_t ndev_debugfs_read(struct file *filp, char __user *ubuf, "Doorbell Bell -\t\t%#llx\n", u.v64); off += scnprintf(buf + off, buf_size - off, + "\nNTB Window Size:\n"); + + pci_read_config_byte(pdev, XEON_PBAR23SZ_OFFSET, &u.v8); + off += scnprintf(buf + off, buf_size - off, + "PBAR23SZ %hhu\n", u.v8); + if (!ndev->bar4_split) { + pci_read_config_byte(pdev, XEON_PBAR45SZ_OFFSET, &u.v8); + off += scnprintf(buf + off, buf_size - off, + "PBAR45SZ %hhu\n", u.v8); + } else { + pci_read_config_byte(pdev, XEON_PBAR4SZ_OFFSET, &u.v8); + off += scnprintf(buf + off, buf_size - off, + "PBAR4SZ %hhu\n", u.v8); + pci_read_config_byte(pdev, XEON_PBAR5SZ_OFFSET, &u.v8); + off += scnprintf(buf + off, buf_size - off, + "PBAR5SZ %hhu\n", u.v8); + } + + pci_read_config_byte(pdev, XEON_SBAR23SZ_OFFSET, &u.v8); + off += scnprintf(buf + off, buf_size - off, + "SBAR23SZ %hhu\n", u.v8); + if (!ndev->bar4_split) { + pci_read_config_byte(pdev, XEON_SBAR45SZ_OFFSET, &u.v8); + off += scnprintf(buf + off, buf_size - off, + "SBAR45SZ %hhu\n", u.v8); + } else { + pci_read_config_byte(pdev, XEON_SBAR4SZ_OFFSET, &u.v8); + off += scnprintf(buf + off, buf_size - off, + "SBAR4SZ %hhu\n", u.v8); + pci_read_config_byte(pdev, XEON_SBAR5SZ_OFFSET, &u.v8); + off += scnprintf(buf + off, buf_size - off, + "SBAR5SZ %hhu\n", u.v8); + } + + off += scnprintf(buf + off, buf_size - off, "\nNTB Incoming XLAT:\n"); u.v64 = ioread64(mmio + bar2_off(ndev->xlat_reg->bar2_xlat, 2)); @@ -669,7 +706,7 @@ static ssize_t ndev_debugfs_read(struct file *filp, char __user *ubuf, "LMT45 -\t\t\t%#018llx\n", u.v64); } - if (pdev_is_xeon(ndev->ntb.pdev)) { + if (pdev_is_xeon(pdev)) { if (ntb_topo_is_b2b(ndev->ntb.topo)) { off += scnprintf(buf + off, buf_size - off, "\nNTB Outgoing B2B XLAT:\n"); @@ -750,22 +787,22 @@ static ssize_t ndev_debugfs_read(struct file *filp, char __user *ubuf, off += scnprintf(buf + off, buf_size - off, "\nXEON NTB Hardware Errors:\n"); - if (!pci_read_config_word(ndev->ntb.pdev, + if (!pci_read_config_word(pdev, XEON_DEVSTS_OFFSET, &u.v16)) off += scnprintf(buf + off, buf_size - off, "DEVSTS -\t\t%#06x\n", u.v16); - if (!pci_read_config_word(ndev->ntb.pdev, + if (!pci_read_config_word(pdev, XEON_LINK_STATUS_OFFSET, &u.v16)) off += scnprintf(buf + off, buf_size - off, "LNKSTS -\t\t%#06x\n", u.v16); - if (!pci_read_config_dword(ndev->ntb.pdev, + if (!pci_read_config_dword(pdev, XEON_UNCERRSTS_OFFSET, &u.v32)) off += scnprintf(buf + off, buf_size - off, "UNCERRSTS -\t\t%#06x\n", u.v32); - if (!pci_read_config_dword(ndev->ntb.pdev, + if (!pci_read_config_dword(pdev, XEON_CORERRSTS_OFFSET, &u.v32)) off += scnprintf(buf + off, buf_size - off, "CORERRSTS -\t\t%#06x\n", u.v32); diff --git a/drivers/ntb/ntb_transport.c b/drivers/ntb/ntb_transport.c index 2ef9d9130864..d5c5894f252e 100644 --- a/drivers/ntb/ntb_transport.c +++ b/drivers/ntb/ntb_transport.c @@ -153,6 +153,7 @@ struct ntb_transport_qp { unsigned int rx_index; unsigned int rx_max_entry; unsigned int rx_max_frame; + unsigned int rx_alloc_entry; dma_cookie_t last_cookie; struct tasklet_struct rxc_db_work; @@ -480,7 +481,9 @@ static ssize_t debugfs_read(struct file *filp, char __user *ubuf, size_t count, out_offset += snprintf(buf + out_offset, out_count - out_offset, "rx_index - \t%u\n", qp->rx_index); out_offset += snprintf(buf + out_offset, out_count - out_offset, - "rx_max_entry - \t%u\n\n", qp->rx_max_entry); + "rx_max_entry - \t%u\n", qp->rx_max_entry); + out_offset += snprintf(buf + out_offset, out_count - out_offset, + "rx_alloc_entry - \t%u\n\n", qp->rx_alloc_entry); out_offset += snprintf(buf + out_offset, out_count - out_offset, "tx_bytes - \t%llu\n", qp->tx_bytes); @@ -597,9 +600,12 @@ static int ntb_transport_setup_qp_mw(struct ntb_transport_ctx *nt, { struct ntb_transport_qp *qp = &nt->qp_vec[qp_num]; struct ntb_transport_mw *mw; + struct ntb_dev *ndev = nt->ndev; + struct ntb_queue_entry *entry; unsigned int rx_size, num_qps_mw; unsigned int mw_num, mw_count, qp_count; unsigned int i; + int node; mw_count = nt->mw_count; qp_count = nt->qp_count; @@ -626,6 +632,23 @@ static int ntb_transport_setup_qp_mw(struct ntb_transport_ctx *nt, qp->rx_max_entry = rx_size / qp->rx_max_frame; qp->rx_index = 0; + /* + * Checking to see if we have more entries than the default. + * We should add additional entries if that is the case so we + * can be in sync with the transport frames. + */ + node = dev_to_node(&ndev->dev); + for (i = qp->rx_alloc_entry; i < qp->rx_max_entry; i++) { + entry = kzalloc_node(sizeof(*entry), GFP_ATOMIC, node); + if (!entry) + return -ENOMEM; + + entry->qp = qp; + ntb_list_add(&qp->ntb_rx_q_lock, &entry->entry, + &qp->rx_free_q); + qp->rx_alloc_entry++; + } + qp->remote_rx_info->entry = qp->rx_max_entry - 1; /* setup the hdr offsets with 0's */ @@ -1037,6 +1060,13 @@ static int ntb_transport_probe(struct ntb_client *self, struct ntb_dev *ndev) int node; int rc, i; + mw_count = ntb_mw_count(ndev); + if (ntb_spad_count(ndev) < (NUM_MWS + 1 + mw_count * 2)) { + dev_err(&ndev->dev, "Not enough scratch pad registers for %s", + NTB_TRANSPORT_NAME); + return -EIO; + } + if (ntb_db_is_unsafe(ndev)) dev_dbg(&ndev->dev, "doorbell is unsafe, proceed anyway...\n"); @@ -1052,8 +1082,6 @@ static int ntb_transport_probe(struct ntb_client *self, struct ntb_dev *ndev) nt->ndev = ndev; - mw_count = ntb_mw_count(ndev); - nt->mw_count = mw_count; nt->mw_vec = kzalloc_node(mw_count * sizeof(*nt->mw_vec), @@ -1722,8 +1750,9 @@ ntb_transport_create_queue(void *data, struct device *client_dev, ntb_list_add(&qp->ntb_rx_q_lock, &entry->entry, &qp->rx_free_q); } + qp->rx_alloc_entry = NTB_QP_DEF_NUM_ENTRIES; - for (i = 0; i < NTB_QP_DEF_NUM_ENTRIES; i++) { + for (i = 0; i < qp->tx_max_entry; i++) { entry = kzalloc_node(sizeof(*entry), GFP_ATOMIC, node); if (!entry) goto err2; @@ -1744,6 +1773,7 @@ err2: while ((entry = ntb_list_rm(&qp->ntb_tx_free_q_lock, &qp->tx_free_q))) kfree(entry); err1: + qp->rx_alloc_entry = 0; while ((entry = ntb_list_rm(&qp->ntb_rx_q_lock, &qp->rx_free_q))) kfree(entry); if (qp->tx_dma_chan) diff --git a/drivers/ntb/test/ntb_perf.c b/drivers/ntb/test/ntb_perf.c index 8dfce9c9aad0..6a50f20bf1cd 100644 --- a/drivers/ntb/test/ntb_perf.c +++ b/drivers/ntb/test/ntb_perf.c @@ -58,6 +58,7 @@ #include <linux/delay.h> #include <linux/sizes.h> #include <linux/ntb.h> +#include <linux/mutex.h> #define DRIVER_NAME "ntb_perf" #define DRIVER_DESCRIPTION "PCIe NTB Performance Measurement Tool" @@ -83,6 +84,10 @@ MODULE_DESCRIPTION(DRIVER_DESCRIPTION); static struct dentry *perf_debugfs_dir; +static unsigned long max_mw_size; +module_param(max_mw_size, ulong, 0644); +MODULE_PARM_DESC(max_mw_size, "Limit size of large memory windows"); + static unsigned int seg_order = 19; /* 512K */ module_param(seg_order, uint, 0644); MODULE_PARM_DESC(seg_order, "size order [n^2] of buffer segment for testing"); @@ -117,6 +122,10 @@ struct pthr_ctx { int dma_prep_err; int src_idx; void *srcs[MAX_SRCS]; + wait_queue_head_t *wq; + int status; + u64 copied; + u64 diff_us; }; struct perf_ctx { @@ -124,23 +133,23 @@ struct perf_ctx { spinlock_t db_lock; struct perf_mw mw; bool link_is_up; - struct work_struct link_cleanup; struct delayed_work link_work; + wait_queue_head_t link_wq; struct dentry *debugfs_node_dir; struct dentry *debugfs_run; struct dentry *debugfs_threads; u8 perf_threads; - bool run; + /* mutex ensures only one set of threads run at once */ + struct mutex run_mutex; struct pthr_ctx pthr_ctx[MAX_THREADS]; atomic_t tsync; + atomic_t tdone; }; enum { VERSION = 0, MW_SZ_HIGH, MW_SZ_LOW, - SPAD_MSG, - SPAD_ACK, MAX_SPAD }; @@ -148,10 +157,16 @@ static void perf_link_event(void *ctx) { struct perf_ctx *perf = ctx; - if (ntb_link_is_up(perf->ntb, NULL, NULL) == 1) + if (ntb_link_is_up(perf->ntb, NULL, NULL) == 1) { schedule_delayed_work(&perf->link_work, 2*HZ); - else - schedule_work(&perf->link_cleanup); + } else { + dev_dbg(&perf->ntb->pdev->dev, "link down\n"); + + if (!perf->link_is_up) + cancel_delayed_work_sync(&perf->link_work); + + perf->link_is_up = false; + } } static void perf_db_event(void *ctx, int vec) @@ -271,6 +286,7 @@ static int perf_move_data(struct pthr_ctx *pctx, char __iomem *dst, char *src, char __iomem *tmp = dst; u64 perf, diff_us; ktime_t kstart, kstop, kdiff; + unsigned long last_sleep = jiffies; chunks = div64_u64(win_size, buf_size); total_chunks = div64_u64(total, buf_size); @@ -286,30 +302,40 @@ static int perf_move_data(struct pthr_ctx *pctx, char __iomem *dst, char *src, } else tmp += buf_size; - /* Probably should schedule every 4GB to prevent soft hang. */ - if (((copied % SZ_4G) == 0) && !use_dma) { + /* Probably should schedule every 5s to prevent soft hang. */ + if (unlikely((jiffies - last_sleep) > 5 * HZ)) { + last_sleep = jiffies; set_current_state(TASK_INTERRUPTIBLE); schedule_timeout(1); } + + if (unlikely(kthread_should_stop())) + break; } if (use_dma) { - pr_info("%s: All DMA descriptors submitted\n", current->comm); - while (atomic_read(&pctx->dma_sync) != 0) + pr_debug("%s: All DMA descriptors submitted\n", current->comm); + while (atomic_read(&pctx->dma_sync) != 0) { + if (kthread_should_stop()) + break; msleep(20); + } } kstop = ktime_get(); kdiff = ktime_sub(kstop, kstart); diff_us = ktime_to_us(kdiff); - pr_info("%s: copied %llu bytes\n", current->comm, copied); + pr_debug("%s: copied %llu bytes\n", current->comm, copied); - pr_info("%s: lasted %llu usecs\n", current->comm, diff_us); + pr_debug("%s: lasted %llu usecs\n", current->comm, diff_us); perf = div64_u64(copied, diff_us); - pr_info("%s: MBytes/s: %llu\n", current->comm, perf); + pr_debug("%s: MBytes/s: %llu\n", current->comm, perf); + + pctx->copied = copied; + pctx->diff_us = diff_us; return 0; } @@ -331,7 +357,7 @@ static int ntb_perf_thread(void *data) int rc, node, i; struct dma_chan *dma_chan = NULL; - pr_info("kthread %s starting...\n", current->comm); + pr_debug("kthread %s starting...\n", current->comm); node = dev_to_node(&pdev->dev); @@ -389,7 +415,10 @@ static int ntb_perf_thread(void *data) pctx->srcs[i] = NULL; } - return 0; + atomic_inc(&perf->tdone); + wake_up(pctx->wq); + rc = 0; + goto done; err: for (i = 0; i < MAX_SRCS; i++) { @@ -402,6 +431,16 @@ err: pctx->dma_chan = NULL; } +done: + /* Wait until we are told to stop */ + for (;;) { + set_current_state(TASK_INTERRUPTIBLE); + if (kthread_should_stop()) + break; + schedule(); + } + __set_current_state(TASK_RUNNING); + return rc; } @@ -472,6 +511,10 @@ static void perf_link_work(struct work_struct *work) dev_dbg(&perf->ntb->pdev->dev, "%s called\n", __func__); size = perf->mw.phys_size; + + if (max_mw_size && size > max_mw_size) + size = max_mw_size; + ntb_peer_spad_write(ndev, MW_SZ_HIGH, upper_32_bits(size)); ntb_peer_spad_write(ndev, MW_SZ_LOW, lower_32_bits(size)); ntb_peer_spad_write(ndev, VERSION, PERF_VERSION); @@ -496,6 +539,7 @@ static void perf_link_work(struct work_struct *work) goto out1; perf->link_is_up = true; + wake_up(&perf->link_wq); return; @@ -508,18 +552,6 @@ out: msecs_to_jiffies(PERF_LINK_DOWN_TIMEOUT)); } -static void perf_link_cleanup(struct work_struct *work) -{ - struct perf_ctx *perf = container_of(work, - struct perf_ctx, - link_cleanup); - - dev_dbg(&perf->ntb->pdev->dev, "%s called\n", __func__); - - if (!perf->link_is_up) - cancel_delayed_work_sync(&perf->link_work); -} - static int perf_setup_mw(struct ntb_dev *ntb, struct perf_ctx *perf) { struct perf_mw *mw; @@ -544,16 +576,44 @@ static ssize_t debugfs_run_read(struct file *filp, char __user *ubuf, { struct perf_ctx *perf = filp->private_data; char *buf; - ssize_t ret, out_offset; + ssize_t ret, out_off = 0; + struct pthr_ctx *pctx; + int i; + u64 rate; if (!perf) return 0; - buf = kmalloc(64, GFP_KERNEL); + buf = kmalloc(1024, GFP_KERNEL); if (!buf) return -ENOMEM; - out_offset = snprintf(buf, 64, "%d\n", perf->run); - ret = simple_read_from_buffer(ubuf, count, offp, buf, out_offset); + + if (mutex_is_locked(&perf->run_mutex)) { + out_off = snprintf(buf, 64, "running\n"); + goto read_from_buf; + } + + for (i = 0; i < MAX_THREADS; i++) { + pctx = &perf->pthr_ctx[i]; + + if (pctx->status == -ENODATA) + break; + + if (pctx->status) { + out_off += snprintf(buf + out_off, 1024 - out_off, + "%d: error %d\n", i, + pctx->status); + continue; + } + + rate = div64_u64(pctx->copied, pctx->diff_us); + out_off += snprintf(buf + out_off, 1024 - out_off, + "%d: copied %llu bytes in %llu usecs, %llu MBytes/s\n", + i, pctx->copied, pctx->diff_us, rate); + } + +read_from_buf: + ret = simple_read_from_buffer(ubuf, count, offp, buf, out_off); kfree(buf); return ret; @@ -564,80 +624,90 @@ static void threads_cleanup(struct perf_ctx *perf) struct pthr_ctx *pctx; int i; - perf->run = false; for (i = 0; i < MAX_THREADS; i++) { pctx = &perf->pthr_ctx[i]; if (pctx->thread) { - kthread_stop(pctx->thread); + pctx->status = kthread_stop(pctx->thread); pctx->thread = NULL; } } } +static void perf_clear_thread_status(struct perf_ctx *perf) +{ + int i; + + for (i = 0; i < MAX_THREADS; i++) + perf->pthr_ctx[i].status = -ENODATA; +} + static ssize_t debugfs_run_write(struct file *filp, const char __user *ubuf, size_t count, loff_t *offp) { struct perf_ctx *perf = filp->private_data; int node, i; + DECLARE_WAIT_QUEUE_HEAD(wq); - if (!perf->link_is_up) - return 0; + if (wait_event_interruptible(perf->link_wq, perf->link_is_up)) + return -ENOLINK; if (perf->perf_threads == 0) - return 0; + return -EINVAL; - if (atomic_read(&perf->tsync) == 0) - perf->run = false; + if (!mutex_trylock(&perf->run_mutex)) + return -EBUSY; - if (perf->run) - threads_cleanup(perf); - else { - perf->run = true; + perf_clear_thread_status(perf); - if (perf->perf_threads > MAX_THREADS) { - perf->perf_threads = MAX_THREADS; - pr_info("Reset total threads to: %u\n", MAX_THREADS); - } + if (perf->perf_threads > MAX_THREADS) { + perf->perf_threads = MAX_THREADS; + pr_info("Reset total threads to: %u\n", MAX_THREADS); + } - /* no greater than 1M */ - if (seg_order > MAX_SEG_ORDER) { - seg_order = MAX_SEG_ORDER; - pr_info("Fix seg_order to %u\n", seg_order); - } + /* no greater than 1M */ + if (seg_order > MAX_SEG_ORDER) { + seg_order = MAX_SEG_ORDER; + pr_info("Fix seg_order to %u\n", seg_order); + } - if (run_order < seg_order) { - run_order = seg_order; - pr_info("Fix run_order to %u\n", run_order); - } + if (run_order < seg_order) { + run_order = seg_order; + pr_info("Fix run_order to %u\n", run_order); + } - node = dev_to_node(&perf->ntb->pdev->dev); - /* launch kernel thread */ - for (i = 0; i < perf->perf_threads; i++) { - struct pthr_ctx *pctx; - - pctx = &perf->pthr_ctx[i]; - atomic_set(&pctx->dma_sync, 0); - pctx->perf = perf; - pctx->thread = - kthread_create_on_node(ntb_perf_thread, - (void *)pctx, - node, "ntb_perf %d", i); - if (IS_ERR(pctx->thread)) { - pctx->thread = NULL; - goto err; - } else - wake_up_process(pctx->thread); - - if (perf->run == false) - return -ENXIO; - } + node = dev_to_node(&perf->ntb->pdev->dev); + atomic_set(&perf->tdone, 0); + /* launch kernel thread */ + for (i = 0; i < perf->perf_threads; i++) { + struct pthr_ctx *pctx; + + pctx = &perf->pthr_ctx[i]; + atomic_set(&pctx->dma_sync, 0); + pctx->perf = perf; + pctx->wq = &wq; + pctx->thread = + kthread_create_on_node(ntb_perf_thread, + (void *)pctx, + node, "ntb_perf %d", i); + if (IS_ERR(pctx->thread)) { + pctx->thread = NULL; + goto err; + } else { + wake_up_process(pctx->thread); + } } + wait_event_interruptible(wq, + atomic_read(&perf->tdone) == perf->perf_threads); + + threads_cleanup(perf); + mutex_unlock(&perf->run_mutex); return count; err: threads_cleanup(perf); + mutex_unlock(&perf->run_mutex); return -ENXIO; } @@ -688,6 +758,12 @@ static int perf_probe(struct ntb_client *client, struct ntb_dev *ntb) int node; int rc = 0; + if (ntb_spad_count(ntb) < MAX_SPAD) { + dev_err(&ntb->dev, "Not enough scratch pad registers for %s", + DRIVER_NAME); + return -EIO; + } + node = dev_to_node(&pdev->dev); perf = kzalloc_node(sizeof(*perf), GFP_KERNEL, node); @@ -699,11 +775,11 @@ static int perf_probe(struct ntb_client *client, struct ntb_dev *ntb) perf->ntb = ntb; perf->perf_threads = 1; atomic_set(&perf->tsync, 0); - perf->run = false; + mutex_init(&perf->run_mutex); spin_lock_init(&perf->db_lock); perf_setup_mw(ntb, perf); + init_waitqueue_head(&perf->link_wq); INIT_DELAYED_WORK(&perf->link_work, perf_link_work); - INIT_WORK(&perf->link_cleanup, perf_link_cleanup); rc = ntb_set_ctx(ntb, perf, &perf_ops); if (rc) @@ -717,11 +793,12 @@ static int perf_probe(struct ntb_client *client, struct ntb_dev *ntb) if (rc) goto err_ctx; + perf_clear_thread_status(perf); + return 0; err_ctx: cancel_delayed_work_sync(&perf->link_work); - cancel_work_sync(&perf->link_cleanup); kfree(perf); err_perf: return rc; @@ -734,8 +811,9 @@ static void perf_remove(struct ntb_client *client, struct ntb_dev *ntb) dev_dbg(&perf->ntb->dev, "%s called\n", __func__); + mutex_lock(&perf->run_mutex); + cancel_delayed_work_sync(&perf->link_work); - cancel_work_sync(&perf->link_cleanup); ntb_clear_ctx(ntb); ntb_link_disable(ntb); diff --git a/drivers/ntb/test/ntb_pingpong.c b/drivers/ntb/test/ntb_pingpong.c index fe1600566981..7d311799fca1 100644 --- a/drivers/ntb/test/ntb_pingpong.c +++ b/drivers/ntb/test/ntb_pingpong.c @@ -61,6 +61,7 @@ #include <linux/pci.h> #include <linux/slab.h> #include <linux/spinlock.h> +#include <linux/debugfs.h> #include <linux/ntb.h> @@ -96,8 +97,13 @@ struct pp_ctx { spinlock_t db_lock; struct timer_list db_timer; unsigned long db_delay; + struct dentry *debugfs_node_dir; + struct dentry *debugfs_count; + atomic_t count; }; +static struct dentry *pp_debugfs_dir; + static void pp_ping(unsigned long ctx) { struct pp_ctx *pp = (void *)ctx; @@ -171,10 +177,32 @@ static void pp_db_event(void *ctx, int vec) dev_dbg(&pp->ntb->dev, "Pong vec %d bits %#llx\n", vec, db_bits); + atomic_inc(&pp->count); } spin_unlock_irqrestore(&pp->db_lock, irqflags); } +static int pp_debugfs_setup(struct pp_ctx *pp) +{ + struct pci_dev *pdev = pp->ntb->pdev; + + if (!pp_debugfs_dir) + return -ENODEV; + + pp->debugfs_node_dir = debugfs_create_dir(pci_name(pdev), + pp_debugfs_dir); + if (!pp->debugfs_node_dir) + return -ENODEV; + + pp->debugfs_count = debugfs_create_atomic_t("count", S_IRUSR | S_IWUSR, + pp->debugfs_node_dir, + &pp->count); + if (!pp->debugfs_count) + return -ENODEV; + + return 0; +} + static const struct ntb_ctx_ops pp_ops = { .link_event = pp_link_event, .db_event = pp_db_event, @@ -210,6 +238,7 @@ static int pp_probe(struct ntb_client *client, pp->ntb = ntb; pp->db_bits = 0; + atomic_set(&pp->count, 0); spin_lock_init(&pp->db_lock); setup_timer(&pp->db_timer, pp_ping, (unsigned long)pp); pp->db_delay = msecs_to_jiffies(delay_ms); @@ -218,6 +247,10 @@ static int pp_probe(struct ntb_client *client, if (rc) goto err_ctx; + rc = pp_debugfs_setup(pp); + if (rc) + goto err_ctx; + ntb_link_enable(ntb, NTB_SPEED_AUTO, NTB_WIDTH_AUTO); ntb_link_event(ntb); @@ -234,6 +267,8 @@ static void pp_remove(struct ntb_client *client, { struct pp_ctx *pp = ntb->ctx; + debugfs_remove_recursive(pp->debugfs_node_dir); + ntb_clear_ctx(ntb); del_timer_sync(&pp->db_timer); ntb_link_disable(ntb); @@ -247,4 +282,29 @@ static struct ntb_client pp_client = { .remove = pp_remove, }, }; -module_ntb_client(pp_client); + +static int __init pp_init(void) +{ + int rc; + + if (debugfs_initialized()) + pp_debugfs_dir = debugfs_create_dir(KBUILD_MODNAME, NULL); + + rc = ntb_register_client(&pp_client); + if (rc) + goto err_client; + + return 0; + +err_client: + debugfs_remove_recursive(pp_debugfs_dir); + return rc; +} +module_init(pp_init); + +static void __exit pp_exit(void) +{ + ntb_unregister_client(&pp_client); + debugfs_remove_recursive(pp_debugfs_dir); +} +module_exit(pp_exit); diff --git a/drivers/ntb/test/ntb_tool.c b/drivers/ntb/test/ntb_tool.c index 6f5dc6ca673d..61bf2ef87e0e 100644 --- a/drivers/ntb/test/ntb_tool.c +++ b/drivers/ntb/test/ntb_tool.c @@ -59,6 +59,12 @@ * * Eg: check if clearing the doorbell mask generates an interrupt. * + * # Check the link status + * root@self# cat $DBG_DIR/link + * + * # Block until the link is up + * root@self# echo Y > $DBG_DIR/link_event + * * # Set the doorbell mask * root@self# echo 's 1' > $DBG_DIR/mask * @@ -79,6 +85,13 @@ * root@self# cat $DBG_DIR/spad * * Observe that spad 0 and 1 have the values set by the peer. + * + * # Check the memory window translation info + * cat $DBG_DIR/peer_trans0 + * + * # Setup a 16k memory window buffer + * echo 16384 > $DBG_DIR/peer_trans0 + * */ #include <linux/init.h> @@ -89,6 +102,7 @@ #include <linux/dma-mapping.h> #include <linux/pci.h> #include <linux/slab.h> +#include <linux/uaccess.h> #include <linux/ntb.h> @@ -105,11 +119,27 @@ MODULE_VERSION(DRIVER_VERSION); MODULE_AUTHOR(DRIVER_AUTHOR); MODULE_DESCRIPTION(DRIVER_DESCRIPTION); +#define MAX_MWS 16 + static struct dentry *tool_dbgfs; +struct tool_mw { + int idx; + struct tool_ctx *tc; + resource_size_t win_size; + resource_size_t size; + u8 __iomem *local; + u8 *peer; + dma_addr_t peer_dma; + struct dentry *peer_dbg_file; +}; + struct tool_ctx { struct ntb_dev *ntb; struct dentry *dbgfs; + wait_queue_head_t link_wq; + int mw_count; + struct tool_mw mws[MAX_MWS]; }; #define SPAD_FNAME_SIZE 0x10 @@ -135,6 +165,8 @@ static void tool_link_event(void *ctx) dev_dbg(&tc->ntb->dev, "link is %s speed %d width %d\n", up ? "up" : "down", speed, width); + + wake_up(&tc->link_wq); } static void tool_db_event(void *ctx, int vec) @@ -239,7 +271,14 @@ static ssize_t tool_spadfn_read(struct tool_ctx *tc, char __user *ubuf, if (!spad_read_fn) return -EINVAL; - buf_size = min_t(size_t, size, 0x100); + spad_count = ntb_spad_count(tc->ntb); + + /* + * We multiply the number of spads by 15 to get the buffer size + * this is from 3 for the %d, 10 for the largest hex value + * (0x00000000) and 2 for the tab and line feed. + */ + buf_size = min_t(size_t, size, spad_count * 15); buf = kmalloc(buf_size, GFP_KERNEL); if (!buf) @@ -247,7 +286,6 @@ static ssize_t tool_spadfn_read(struct tool_ctx *tc, char __user *ubuf, pos = 0; - spad_count = ntb_spad_count(tc->ntb); for (i = 0; i < spad_count; ++i) { pos += scnprintf(buf + pos, buf_size - pos, "%d\t%#x\n", i, spad_read_fn(tc->ntb, i)); @@ -268,7 +306,7 @@ static ssize_t tool_spadfn_write(struct tool_ctx *tc, { int spad_idx; u32 spad_val; - char *buf; + char *buf, *buf_ptr; int pos, n; ssize_t rc; @@ -288,14 +326,15 @@ static ssize_t tool_spadfn_write(struct tool_ctx *tc, } buf[size] = 0; - - n = sscanf(buf, "%d %i%n", &spad_idx, &spad_val, &pos); + buf_ptr = buf; + n = sscanf(buf_ptr, "%d %i%n", &spad_idx, &spad_val, &pos); while (n == 2) { + buf_ptr += pos; rc = spad_write_fn(tc->ntb, spad_idx, spad_val); if (rc) break; - n = sscanf(buf + pos, "%d %i%n", &spad_idx, &spad_val, &pos); + n = sscanf(buf_ptr, "%d %i%n", &spad_idx, &spad_val, &pos); } if (n < 0) @@ -442,8 +481,384 @@ static TOOL_FOPS_RDWR(tool_peer_spad_fops, tool_peer_spad_read, tool_peer_spad_write); +static ssize_t tool_link_read(struct file *filep, char __user *ubuf, + size_t size, loff_t *offp) +{ + struct tool_ctx *tc = filep->private_data; + char buf[3]; + + buf[0] = ntb_link_is_up(tc->ntb, NULL, NULL) ? 'Y' : 'N'; + buf[1] = '\n'; + buf[2] = '\0'; + + return simple_read_from_buffer(ubuf, size, offp, buf, 2); +} + +static ssize_t tool_link_write(struct file *filep, const char __user *ubuf, + size_t size, loff_t *offp) +{ + struct tool_ctx *tc = filep->private_data; + char buf[32]; + size_t buf_size; + bool val; + int rc; + + buf_size = min(size, (sizeof(buf) - 1)); + if (copy_from_user(buf, ubuf, buf_size)) + return -EFAULT; + + buf[buf_size] = '\0'; + + rc = strtobool(buf, &val); + if (rc) + return rc; + + if (val) + rc = ntb_link_enable(tc->ntb, NTB_SPEED_AUTO, NTB_WIDTH_AUTO); + else + rc = ntb_link_disable(tc->ntb); + + if (rc) + return rc; + + return size; +} + +static TOOL_FOPS_RDWR(tool_link_fops, + tool_link_read, + tool_link_write); + +static ssize_t tool_link_event_write(struct file *filep, + const char __user *ubuf, + size_t size, loff_t *offp) +{ + struct tool_ctx *tc = filep->private_data; + char buf[32]; + size_t buf_size; + bool val; + int rc; + + buf_size = min(size, (sizeof(buf) - 1)); + if (copy_from_user(buf, ubuf, buf_size)) + return -EFAULT; + + buf[buf_size] = '\0'; + + rc = strtobool(buf, &val); + if (rc) + return rc; + + if (wait_event_interruptible(tc->link_wq, + ntb_link_is_up(tc->ntb, NULL, NULL) == val)) + return -ERESTART; + + return size; +} + +static TOOL_FOPS_RDWR(tool_link_event_fops, + NULL, + tool_link_event_write); + +static ssize_t tool_mw_read(struct file *filep, char __user *ubuf, + size_t size, loff_t *offp) +{ + struct tool_mw *mw = filep->private_data; + ssize_t rc; + loff_t pos = *offp; + void *buf; + + if (mw->local == NULL) + return -EIO; + if (pos < 0) + return -EINVAL; + if (pos >= mw->win_size || !size) + return 0; + if (size > mw->win_size - pos) + size = mw->win_size - pos; + + buf = kmalloc(size, GFP_KERNEL); + if (!buf) + return -ENOMEM; + + memcpy_fromio(buf, mw->local + pos, size); + rc = copy_to_user(ubuf, buf, size); + if (rc == size) { + rc = -EFAULT; + goto err_free; + } + + size -= rc; + *offp = pos + size; + rc = size; + +err_free: + kfree(buf); + + return rc; +} + +static ssize_t tool_mw_write(struct file *filep, const char __user *ubuf, + size_t size, loff_t *offp) +{ + struct tool_mw *mw = filep->private_data; + ssize_t rc; + loff_t pos = *offp; + void *buf; + + if (pos < 0) + return -EINVAL; + if (pos >= mw->win_size || !size) + return 0; + if (size > mw->win_size - pos) + size = mw->win_size - pos; + + buf = kmalloc(size, GFP_KERNEL); + if (!buf) + return -ENOMEM; + + rc = copy_from_user(buf, ubuf, size); + if (rc == size) { + rc = -EFAULT; + goto err_free; + } + + size -= rc; + *offp = pos + size; + rc = size; + + memcpy_toio(mw->local + pos, buf, size); + +err_free: + kfree(buf); + + return rc; +} + +static TOOL_FOPS_RDWR(tool_mw_fops, + tool_mw_read, + tool_mw_write); + +static ssize_t tool_peer_mw_read(struct file *filep, char __user *ubuf, + size_t size, loff_t *offp) +{ + struct tool_mw *mw = filep->private_data; + + if (!mw->peer) + return -ENXIO; + + return simple_read_from_buffer(ubuf, size, offp, mw->peer, mw->size); +} + +static ssize_t tool_peer_mw_write(struct file *filep, const char __user *ubuf, + size_t size, loff_t *offp) +{ + struct tool_mw *mw = filep->private_data; + + if (!mw->peer) + return -ENXIO; + + return simple_write_to_buffer(mw->peer, mw->size, offp, ubuf, size); +} + +static TOOL_FOPS_RDWR(tool_peer_mw_fops, + tool_peer_mw_read, + tool_peer_mw_write); + +static int tool_setup_mw(struct tool_ctx *tc, int idx, size_t req_size) +{ + int rc; + struct tool_mw *mw = &tc->mws[idx]; + phys_addr_t base; + resource_size_t size, align, align_size; + char buf[16]; + + if (mw->peer) + return 0; + + rc = ntb_mw_get_range(tc->ntb, idx, &base, &size, &align, + &align_size); + if (rc) + return rc; + + mw->size = min_t(resource_size_t, req_size, size); + mw->size = round_up(mw->size, align); + mw->size = round_up(mw->size, align_size); + mw->peer = dma_alloc_coherent(&tc->ntb->pdev->dev, mw->size, + &mw->peer_dma, GFP_KERNEL); + + if (!mw->peer) + return -ENOMEM; + + rc = ntb_mw_set_trans(tc->ntb, idx, mw->peer_dma, mw->size); + if (rc) + goto err_free_dma; + + snprintf(buf, sizeof(buf), "peer_mw%d", idx); + mw->peer_dbg_file = debugfs_create_file(buf, S_IRUSR | S_IWUSR, + mw->tc->dbgfs, mw, + &tool_peer_mw_fops); + + return 0; + +err_free_dma: + dma_free_coherent(&tc->ntb->pdev->dev, mw->size, + mw->peer, + mw->peer_dma); + mw->peer = NULL; + mw->peer_dma = 0; + mw->size = 0; + + return rc; +} + +static void tool_free_mw(struct tool_ctx *tc, int idx) +{ + struct tool_mw *mw = &tc->mws[idx]; + + if (mw->peer) { + ntb_mw_clear_trans(tc->ntb, idx); + dma_free_coherent(&tc->ntb->pdev->dev, mw->size, + mw->peer, + mw->peer_dma); + } + + mw->peer = NULL; + mw->peer_dma = 0; + + debugfs_remove(mw->peer_dbg_file); + + mw->peer_dbg_file = NULL; +} + +static ssize_t tool_peer_mw_trans_read(struct file *filep, + char __user *ubuf, + size_t size, loff_t *offp) +{ + struct tool_mw *mw = filep->private_data; + + char *buf; + size_t buf_size; + ssize_t ret, off = 0; + + phys_addr_t base; + resource_size_t mw_size; + resource_size_t align; + resource_size_t align_size; + + buf_size = min_t(size_t, size, 512); + + buf = kmalloc(buf_size, GFP_KERNEL); + if (!buf) + return -ENOMEM; + + ntb_mw_get_range(mw->tc->ntb, mw->idx, + &base, &mw_size, &align, &align_size); + + off += scnprintf(buf + off, buf_size - off, + "Peer MW %d Information:\n", mw->idx); + + off += scnprintf(buf + off, buf_size - off, + "Physical Address \t%pa[p]\n", + &base); + + off += scnprintf(buf + off, buf_size - off, + "Window Size \t%lld\n", + (unsigned long long)mw_size); + + off += scnprintf(buf + off, buf_size - off, + "Alignment \t%lld\n", + (unsigned long long)align); + + off += scnprintf(buf + off, buf_size - off, + "Size Alignment \t%lld\n", + (unsigned long long)align_size); + + off += scnprintf(buf + off, buf_size - off, + "Ready \t%c\n", + (mw->peer) ? 'Y' : 'N'); + + off += scnprintf(buf + off, buf_size - off, + "Allocated Size \t%zd\n", + (mw->peer) ? (size_t)mw->size : 0); + + ret = simple_read_from_buffer(ubuf, size, offp, buf, off); + kfree(buf); + return ret; +} + +static ssize_t tool_peer_mw_trans_write(struct file *filep, + const char __user *ubuf, + size_t size, loff_t *offp) +{ + struct tool_mw *mw = filep->private_data; + + char buf[32]; + size_t buf_size; + unsigned long long val; + int rc; + + buf_size = min(size, (sizeof(buf) - 1)); + if (copy_from_user(buf, ubuf, buf_size)) + return -EFAULT; + + buf[buf_size] = '\0'; + + rc = kstrtoull(buf, 0, &val); + if (rc) + return rc; + + tool_free_mw(mw->tc, mw->idx); + if (val) + rc = tool_setup_mw(mw->tc, mw->idx, val); + + if (rc) + return rc; + + return size; +} + +static TOOL_FOPS_RDWR(tool_peer_mw_trans_fops, + tool_peer_mw_trans_read, + tool_peer_mw_trans_write); + +static int tool_init_mw(struct tool_ctx *tc, int idx) +{ + struct tool_mw *mw = &tc->mws[idx]; + phys_addr_t base; + int rc; + + rc = ntb_mw_get_range(tc->ntb, idx, &base, &mw->win_size, + NULL, NULL); + if (rc) + return rc; + + mw->tc = tc; + mw->idx = idx; + mw->local = ioremap_wc(base, mw->win_size); + if (!mw->local) + return -EFAULT; + + return 0; +} + +static void tool_free_mws(struct tool_ctx *tc) +{ + int i; + + for (i = 0; i < tc->mw_count; i++) { + tool_free_mw(tc, i); + + if (tc->mws[i].local) + iounmap(tc->mws[i].local); + + tc->mws[i].local = NULL; + } +} + static void tool_setup_dbgfs(struct tool_ctx *tc) { + int i; + /* This modules is useless without dbgfs... */ if (!tool_dbgfs) { tc->dbgfs = NULL; @@ -472,12 +887,31 @@ static void tool_setup_dbgfs(struct tool_ctx *tc) debugfs_create_file("peer_spad", S_IRUSR | S_IWUSR, tc->dbgfs, tc, &tool_peer_spad_fops); + + debugfs_create_file("link", S_IRUSR | S_IWUSR, tc->dbgfs, + tc, &tool_link_fops); + + debugfs_create_file("link_event", S_IWUSR, tc->dbgfs, + tc, &tool_link_event_fops); + + for (i = 0; i < tc->mw_count; i++) { + char buf[30]; + + snprintf(buf, sizeof(buf), "mw%d", i); + debugfs_create_file(buf, S_IRUSR | S_IWUSR, tc->dbgfs, + &tc->mws[i], &tool_mw_fops); + + snprintf(buf, sizeof(buf), "peer_trans%d", i); + debugfs_create_file(buf, S_IRUSR | S_IWUSR, tc->dbgfs, + &tc->mws[i], &tool_peer_mw_trans_fops); + } } static int tool_probe(struct ntb_client *self, struct ntb_dev *ntb) { struct tool_ctx *tc; int rc; + int i; if (ntb_db_is_unsafe(ntb)) dev_dbg(&ntb->dev, "doorbell is unsafe\n"); @@ -485,13 +919,21 @@ static int tool_probe(struct ntb_client *self, struct ntb_dev *ntb) if (ntb_spad_is_unsafe(ntb)) dev_dbg(&ntb->dev, "scratchpad is unsafe\n"); - tc = kmalloc(sizeof(*tc), GFP_KERNEL); + tc = kzalloc(sizeof(*tc), GFP_KERNEL); if (!tc) { rc = -ENOMEM; goto err_tc; } tc->ntb = ntb; + init_waitqueue_head(&tc->link_wq); + + tc->mw_count = min(ntb_mw_count(tc->ntb), MAX_MWS); + for (i = 0; i < tc->mw_count; i++) { + rc = tool_init_mw(tc, i); + if (rc) + goto err_ctx; + } tool_setup_dbgfs(tc); @@ -505,6 +947,7 @@ static int tool_probe(struct ntb_client *self, struct ntb_dev *ntb) return 0; err_ctx: + tool_free_mws(tc); debugfs_remove_recursive(tc->dbgfs); kfree(tc); err_tc: @@ -515,6 +958,8 @@ static void tool_remove(struct ntb_client *self, struct ntb_dev *ntb) { struct tool_ctx *tc = ntb->ctx; + tool_free_mws(tc); + ntb_clear_ctx(ntb); ntb_link_disable(ntb); diff --git a/drivers/nvdimm/btt.c b/drivers/nvdimm/btt.c index 9dce03f420eb..368795aad5c9 100644 --- a/drivers/nvdimm/btt.c +++ b/drivers/nvdimm/btt.c @@ -1133,11 +1133,11 @@ static int btt_write_pg(struct btt *btt, struct bio_integrity_payload *bip, static int btt_do_bvec(struct btt *btt, struct bio_integrity_payload *bip, struct page *page, unsigned int len, unsigned int off, - int rw, sector_t sector) + bool is_write, sector_t sector) { int ret; - if (rw == READ) { + if (!is_write) { ret = btt_read_pg(btt, bip, page, off, sector, len); flush_dcache_page(page); } else { @@ -1155,7 +1155,7 @@ static blk_qc_t btt_make_request(struct request_queue *q, struct bio *bio) struct bvec_iter iter; unsigned long start; struct bio_vec bvec; - int err = 0, rw; + int err = 0; bool do_acct; /* @@ -1170,7 +1170,6 @@ static blk_qc_t btt_make_request(struct request_queue *q, struct bio *bio) } do_acct = nd_iostat_start(bio, &start); - rw = bio_data_dir(bio); bio_for_each_segment(bvec, bio, iter) { unsigned int len = bvec.bv_len; @@ -1181,11 +1180,12 @@ static blk_qc_t btt_make_request(struct request_queue *q, struct bio *bio) BUG_ON(len % btt->sector_size); err = btt_do_bvec(btt, bip, bvec.bv_page, len, bvec.bv_offset, - rw, iter.bi_sector); + op_is_write(bio_op(bio)), iter.bi_sector); if (err) { dev_info(&btt->nd_btt->dev, "io error in %s sector %lld, len %d,\n", - (rw == READ) ? "READ" : "WRITE", + (op_is_write(bio_op(bio))) ? "WRITE" : + "READ", (unsigned long long) iter.bi_sector, len); bio->bi_error = err; break; @@ -1200,12 +1200,12 @@ out: } static int btt_rw_page(struct block_device *bdev, sector_t sector, - struct page *page, int rw) + struct page *page, bool is_write) { struct btt *btt = bdev->bd_disk->private_data; - btt_do_bvec(btt, NULL, page, PAGE_SIZE, 0, rw, sector); - page_endio(page, rw & WRITE, 0); + btt_do_bvec(btt, NULL, page, PAGE_SIZE, 0, is_write, sector); + page_endio(page, is_write, 0); return 0; } @@ -1269,6 +1269,7 @@ static int btt_blk_init(struct btt *btt) } } set_capacity(btt->btt_disk, btt->nlba * btt->sector_size >> 9); + btt->nd_btt->size = btt->nlba * (u64)btt->sector_size; revalidate_disk(btt->btt_disk); return 0; diff --git a/drivers/nvdimm/btt_devs.c b/drivers/nvdimm/btt_devs.c index 3fa7919f94a8..97dd2925ed6e 100644 --- a/drivers/nvdimm/btt_devs.c +++ b/drivers/nvdimm/btt_devs.c @@ -140,10 +140,30 @@ static ssize_t namespace_store(struct device *dev, } static DEVICE_ATTR_RW(namespace); +static ssize_t size_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct nd_btt *nd_btt = to_nd_btt(dev); + ssize_t rc; + + device_lock(dev); + if (dev->driver) + rc = sprintf(buf, "%llu\n", nd_btt->size); + else { + /* no size to convey if the btt instance is disabled */ + rc = -ENXIO; + } + device_unlock(dev); + + return rc; +} +static DEVICE_ATTR_RO(size); + static struct attribute *nd_btt_attributes[] = { &dev_attr_sector_size.attr, &dev_attr_namespace.attr, &dev_attr_uuid.attr, + &dev_attr_size.attr, NULL, }; diff --git a/drivers/nvdimm/nd.h b/drivers/nvdimm/nd.h index 40476399d227..8024a0ef86d3 100644 --- a/drivers/nvdimm/nd.h +++ b/drivers/nvdimm/nd.h @@ -143,6 +143,7 @@ struct nd_btt { struct nd_namespace_common *ndns; struct btt *btt; unsigned long lbasize; + u64 size; u8 *uuid; int id; }; diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c index b511099457db..571a6c7ee2fc 100644 --- a/drivers/nvdimm/pmem.c +++ b/drivers/nvdimm/pmem.c @@ -67,7 +67,7 @@ static void pmem_clear_poison(struct pmem_device *pmem, phys_addr_t offset, } static int pmem_do_bvec(struct pmem_device *pmem, struct page *page, - unsigned int len, unsigned int off, int rw, + unsigned int len, unsigned int off, bool is_write, sector_t sector) { int rc = 0; @@ -79,7 +79,7 @@ static int pmem_do_bvec(struct pmem_device *pmem, struct page *page, if (unlikely(is_bad_pmem(&pmem->bb, sector, len))) bad_pmem = true; - if (rw == READ) { + if (!is_write) { if (unlikely(bad_pmem)) rc = -EIO; else { @@ -128,13 +128,13 @@ static blk_qc_t pmem_make_request(struct request_queue *q, struct bio *bio) struct pmem_device *pmem = q->queuedata; struct nd_region *nd_region = to_region(pmem); - if (bio->bi_rw & REQ_FLUSH) + if (bio->bi_opf & REQ_FLUSH) nvdimm_flush(nd_region); do_acct = nd_iostat_start(bio, &start); bio_for_each_segment(bvec, bio, iter) { rc = pmem_do_bvec(pmem, bvec.bv_page, bvec.bv_len, - bvec.bv_offset, bio_data_dir(bio), + bvec.bv_offset, op_is_write(bio_op(bio)), iter.bi_sector); if (rc) { bio->bi_error = rc; @@ -144,7 +144,7 @@ static blk_qc_t pmem_make_request(struct request_queue *q, struct bio *bio) if (do_acct) nd_iostat_end(bio, start); - if (bio->bi_rw & REQ_FUA) + if (bio->bi_opf & REQ_FUA) nvdimm_flush(nd_region); bio_endio(bio); @@ -152,12 +152,12 @@ static blk_qc_t pmem_make_request(struct request_queue *q, struct bio *bio) } static int pmem_rw_page(struct block_device *bdev, sector_t sector, - struct page *page, int rw) + struct page *page, bool is_write) { struct pmem_device *pmem = bdev->bd_queue->queuedata; int rc; - rc = pmem_do_bvec(pmem, page, PAGE_SIZE, 0, rw, sector); + rc = pmem_do_bvec(pmem, page, PAGE_SIZE, 0, is_write, sector); /* * The ->rw_page interface is subtle and tricky. The core @@ -166,7 +166,7 @@ static int pmem_rw_page(struct block_device *bdev, sector_t sector, * caused by double completion. */ if (rc == 0) - page_endio(page, rw & WRITE, 0); + page_endio(page, is_write, 0); return rc; } diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index d7c33f9361aa..8dcf5a960951 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -1543,15 +1543,10 @@ static void nvme_disable_io_queues(struct nvme_dev *dev) reinit_completion(&dev->ioq_wait); retry: timeout = ADMIN_TIMEOUT; - for (; i > 0; i--) { - struct nvme_queue *nvmeq = dev->queues[i]; - - if (!pass) - nvme_suspend_queue(nvmeq); - if (nvme_delete_queue(nvmeq, opcode)) + for (; i > 0; i--, sent++) + if (nvme_delete_queue(dev->queues[i], opcode)) break; - ++sent; - } + while (sent--) { timeout = wait_for_completion_io_timeout(&dev->ioq_wait, timeout); if (timeout == 0) @@ -1693,11 +1688,12 @@ static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown) nvme_stop_queues(&dev->ctrl); csts = readl(dev->bar + NVME_REG_CSTS); } + + for (i = dev->queue_count - 1; i > 0; i--) + nvme_suspend_queue(dev->queues[i]); + if (csts & NVME_CSTS_CFS || !(csts & NVME_CSTS_RDY)) { - for (i = dev->queue_count - 1; i >= 0; i--) { - struct nvme_queue *nvmeq = dev->queues[i]; - nvme_suspend_queue(nvmeq); - } + nvme_suspend_queue(dev->queues[0]); } else { nvme_disable_io_queues(dev); nvme_disable_admin_queue(dev, shutdown); diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index 3e3ce2b0424e..8d2875b4c56d 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -12,13 +12,11 @@ * more details. */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt -#include <linux/delay.h> #include <linux/module.h> #include <linux/init.h> #include <linux/slab.h> #include <linux/err.h> #include <linux/string.h> -#include <linux/jiffies.h> #include <linux/atomic.h> #include <linux/blk-mq.h> #include <linux/types.h> @@ -26,7 +24,6 @@ #include <linux/mutex.h> #include <linux/scatterlist.h> #include <linux/nvme.h> -#include <linux/t10-pi.h> #include <asm/unaligned.h> #include <rdma/ib_verbs.h> @@ -169,7 +166,6 @@ MODULE_PARM_DESC(register_always, static int nvme_rdma_cm_handler(struct rdma_cm_id *cm_id, struct rdma_cm_event *event); static void nvme_rdma_recv_done(struct ib_cq *cq, struct ib_wc *wc); -static int __nvme_rdma_del_ctrl(struct nvme_rdma_ctrl *ctrl); /* XXX: really should move to a generic header sooner or later.. */ static inline void put_unaligned_le24(u32 val, u8 *p) @@ -687,11 +683,6 @@ static void nvme_rdma_free_ctrl(struct nvme_ctrl *nctrl) list_del(&ctrl->list); mutex_unlock(&nvme_rdma_ctrl_mutex); - if (ctrl->ctrl.tagset) { - blk_cleanup_queue(ctrl->ctrl.connect_q); - blk_mq_free_tag_set(&ctrl->tag_set); - nvme_rdma_dev_put(ctrl->device); - } kfree(ctrl->queues); nvmf_free_options(nctrl->opts); free_ctrl: @@ -748,8 +739,11 @@ static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work) changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE); WARN_ON_ONCE(!changed); - if (ctrl->queue_count > 1) + if (ctrl->queue_count > 1) { nvme_start_queues(&ctrl->ctrl); + nvme_queue_scan(&ctrl->ctrl); + nvme_queue_async_events(&ctrl->ctrl); + } dev_info(ctrl->ctrl.device, "Successfully reconnected\n"); @@ -1269,7 +1263,7 @@ static int nvme_rdma_route_resolved(struct nvme_rdma_queue *queue) { struct nvme_rdma_ctrl *ctrl = queue->ctrl; struct rdma_conn_param param = { }; - struct nvme_rdma_cm_req priv; + struct nvme_rdma_cm_req priv = { }; int ret; param.qp_num = queue->qp->qp_num; @@ -1318,37 +1312,39 @@ out_destroy_queue_ib: * that caught the event. Since we hold the callout until the controller * deletion is completed, we'll deadlock if the controller deletion will * call rdma_destroy_id on this queue's cm_id. Thus, we claim ownership - * of destroying this queue before-hand, destroy the queue resources - * after the controller deletion completed with the exception of destroying - * the cm_id implicitely by returning a non-zero rc to the callout. + * of destroying this queue before-hand, destroy the queue resources, + * then queue the controller deletion which won't destroy this queue and + * we destroy the cm_id implicitely by returning a non-zero rc to the callout. */ static int nvme_rdma_device_unplug(struct nvme_rdma_queue *queue) { struct nvme_rdma_ctrl *ctrl = queue->ctrl; - int ret, ctrl_deleted = 0; + int ret; - /* First disable the queue so ctrl delete won't free it */ - if (!test_and_clear_bit(NVME_RDMA_Q_CONNECTED, &queue->flags)) - goto out; + /* Own the controller deletion */ + if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_DELETING)) + return 0; - /* delete the controller */ - ret = __nvme_rdma_del_ctrl(ctrl); - if (!ret) { - dev_warn(ctrl->ctrl.device, - "Got rdma device removal event, deleting ctrl\n"); - flush_work(&ctrl->delete_work); + dev_warn(ctrl->ctrl.device, + "Got rdma device removal event, deleting ctrl\n"); - /* Return non-zero so the cm_id will destroy implicitly */ - ctrl_deleted = 1; + /* Get rid of reconnect work if its running */ + cancel_delayed_work_sync(&ctrl->reconnect_work); + /* Disable the queue so ctrl delete won't free it */ + if (test_and_clear_bit(NVME_RDMA_Q_CONNECTED, &queue->flags)) { /* Free this queue ourselves */ - rdma_disconnect(queue->cm_id); - ib_drain_qp(queue->qp); + nvme_rdma_stop_queue(queue); nvme_rdma_destroy_queue_ib(queue); + + /* Return non-zero so the cm_id will destroy implicitly */ + ret = 1; } -out: - return ctrl_deleted; + /* Queue controller deletion */ + queue_work(nvme_rdma_wq, &ctrl->delete_work); + flush_work(&ctrl->delete_work); + return ret; } static int nvme_rdma_cm_handler(struct rdma_cm_id *cm_id, @@ -1648,7 +1644,7 @@ static void nvme_rdma_shutdown_ctrl(struct nvme_rdma_ctrl *ctrl) nvme_rdma_free_io_queues(ctrl); } - if (ctrl->ctrl.state == NVME_CTRL_LIVE) + if (test_bit(NVME_RDMA_Q_CONNECTED, &ctrl->queues[0].flags)) nvme_shutdown_ctrl(&ctrl->ctrl); blk_mq_stop_hw_queues(ctrl->ctrl.admin_q); @@ -1657,15 +1653,27 @@ static void nvme_rdma_shutdown_ctrl(struct nvme_rdma_ctrl *ctrl) nvme_rdma_destroy_admin_queue(ctrl); } +static void __nvme_rdma_remove_ctrl(struct nvme_rdma_ctrl *ctrl, bool shutdown) +{ + nvme_uninit_ctrl(&ctrl->ctrl); + if (shutdown) + nvme_rdma_shutdown_ctrl(ctrl); + + if (ctrl->ctrl.tagset) { + blk_cleanup_queue(ctrl->ctrl.connect_q); + blk_mq_free_tag_set(&ctrl->tag_set); + nvme_rdma_dev_put(ctrl->device); + } + + nvme_put_ctrl(&ctrl->ctrl); +} + static void nvme_rdma_del_ctrl_work(struct work_struct *work) { struct nvme_rdma_ctrl *ctrl = container_of(work, struct nvme_rdma_ctrl, delete_work); - nvme_remove_namespaces(&ctrl->ctrl); - nvme_rdma_shutdown_ctrl(ctrl); - nvme_uninit_ctrl(&ctrl->ctrl); - nvme_put_ctrl(&ctrl->ctrl); + __nvme_rdma_remove_ctrl(ctrl, true); } static int __nvme_rdma_del_ctrl(struct nvme_rdma_ctrl *ctrl) @@ -1698,9 +1706,7 @@ static void nvme_rdma_remove_ctrl_work(struct work_struct *work) struct nvme_rdma_ctrl *ctrl = container_of(work, struct nvme_rdma_ctrl, delete_work); - nvme_remove_namespaces(&ctrl->ctrl); - nvme_uninit_ctrl(&ctrl->ctrl); - nvme_put_ctrl(&ctrl->ctrl); + __nvme_rdma_remove_ctrl(ctrl, false); } static void nvme_rdma_reset_ctrl_work(struct work_struct *work) @@ -1739,6 +1745,7 @@ static void nvme_rdma_reset_ctrl_work(struct work_struct *work) if (ctrl->queue_count > 1) { nvme_start_queues(&ctrl->ctrl); nvme_queue_scan(&ctrl->ctrl); + nvme_queue_async_events(&ctrl->ctrl); } return; diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c index 2fac17a5ad53..47c564b5a289 100644 --- a/drivers/nvme/target/admin-cmd.c +++ b/drivers/nvme/target/admin-cmd.c @@ -13,7 +13,6 @@ */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/module.h> -#include <linux/random.h> #include <generated/utsrelease.h> #include "nvmet.h" @@ -83,7 +82,6 @@ static void nvmet_execute_identify_ctrl(struct nvmet_req *req) { struct nvmet_ctrl *ctrl = req->sq->ctrl; struct nvme_id_ctrl *id; - u64 serial; u16 status = 0; id = kzalloc(sizeof(*id), GFP_KERNEL); @@ -96,10 +94,8 @@ static void nvmet_execute_identify_ctrl(struct nvmet_req *req) id->vid = 0; id->ssvid = 0; - /* generate a random serial number as our controllers are ephemeral: */ - get_random_bytes(&serial, sizeof(serial)); memset(id->sn, ' ', sizeof(id->sn)); - snprintf(id->sn, sizeof(id->sn), "%llx", serial); + snprintf(id->sn, sizeof(id->sn), "%llx", ctrl->serial); memset(id->mn, ' ', sizeof(id->mn)); strncpy((char *)id->mn, "Linux", sizeof(id->mn)); diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c index 8a891ca53367..6559d5afa7bf 100644 --- a/drivers/nvme/target/core.c +++ b/drivers/nvme/target/core.c @@ -13,6 +13,7 @@ */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/module.h> +#include <linux/random.h> #include "nvmet.h" static struct nvmet_fabrics_ops *nvmet_transports[NVMF_TRTYPE_MAX]; @@ -728,6 +729,9 @@ u16 nvmet_alloc_ctrl(const char *subsysnqn, const char *hostnqn, memcpy(ctrl->subsysnqn, subsysnqn, NVMF_NQN_SIZE); memcpy(ctrl->hostnqn, hostnqn, NVMF_NQN_SIZE); + /* generate a random serial number as our controllers are ephemeral: */ + get_random_bytes(&ctrl->serial, sizeof(ctrl->serial)); + kref_init(&ctrl->ref); ctrl->subsys = subsys; diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c index 94e782987cc9..7affd40a6b33 100644 --- a/drivers/nvme/target/loop.c +++ b/drivers/nvme/target/loop.c @@ -414,9 +414,8 @@ static void nvme_loop_del_ctrl_work(struct work_struct *work) struct nvme_loop_ctrl *ctrl = container_of(work, struct nvme_loop_ctrl, delete_work); - nvme_remove_namespaces(&ctrl->ctrl); - nvme_loop_shutdown_ctrl(ctrl); nvme_uninit_ctrl(&ctrl->ctrl); + nvme_loop_shutdown_ctrl(ctrl); nvme_put_ctrl(&ctrl->ctrl); } @@ -501,7 +500,6 @@ out_free_queues: nvme_loop_destroy_admin_queue(ctrl); out_disable: dev_warn(ctrl->ctrl.device, "Removing after reset failure\n"); - nvme_remove_namespaces(&ctrl->ctrl); nvme_uninit_ctrl(&ctrl->ctrl); nvme_put_ctrl(&ctrl->ctrl); } diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h index 57dd6d834c28..76b6eedccaf9 100644 --- a/drivers/nvme/target/nvmet.h +++ b/drivers/nvme/target/nvmet.h @@ -113,6 +113,7 @@ struct nvmet_ctrl { struct mutex lock; u64 cap; + u64 serial; u32 cc; u32 csts; diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c index e06d504bdf0c..b4d648536c3e 100644 --- a/drivers/nvme/target/rdma.c +++ b/drivers/nvme/target/rdma.c @@ -77,6 +77,7 @@ enum nvmet_rdma_queue_state { NVMET_RDMA_Q_CONNECTING, NVMET_RDMA_Q_LIVE, NVMET_RDMA_Q_DISCONNECTING, + NVMET_RDMA_IN_DEVICE_REMOVAL, }; struct nvmet_rdma_queue { @@ -615,15 +616,10 @@ static u16 nvmet_rdma_map_sgl_keyed(struct nvmet_rdma_rsp *rsp, if (!len) return 0; - /* use the already allocated data buffer if possible */ - if (len <= NVMET_RDMA_INLINE_DATA_SIZE && rsp->queue->host_qid) { - nvmet_rdma_use_inline_sg(rsp, len, 0); - } else { - status = nvmet_rdma_alloc_sgl(&rsp->req.sg, &rsp->req.sg_cnt, - len); - if (status) - return status; - } + status = nvmet_rdma_alloc_sgl(&rsp->req.sg, &rsp->req.sg_cnt, + len); + if (status) + return status; ret = rdma_rw_ctx_init(&rsp->rw, cm_id->qp, cm_id->port_num, rsp->req.sg, rsp->req.sg_cnt, 0, addr, key, @@ -984,7 +980,10 @@ static void nvmet_rdma_release_queue_work(struct work_struct *w) struct nvmet_rdma_device *dev = queue->dev; nvmet_rdma_free_queue(queue); - rdma_destroy_id(cm_id); + + if (queue->state != NVMET_RDMA_IN_DEVICE_REMOVAL) + rdma_destroy_id(cm_id); + kref_put(&dev->ref, nvmet_rdma_free_dev); } @@ -1233,8 +1232,9 @@ static void __nvmet_rdma_queue_disconnect(struct nvmet_rdma_queue *queue) switch (queue->state) { case NVMET_RDMA_Q_CONNECTING: case NVMET_RDMA_Q_LIVE: - disconnect = true; queue->state = NVMET_RDMA_Q_DISCONNECTING; + case NVMET_RDMA_IN_DEVICE_REMOVAL: + disconnect = true; break; case NVMET_RDMA_Q_DISCONNECTING: break; @@ -1272,6 +1272,62 @@ static void nvmet_rdma_queue_connect_fail(struct rdma_cm_id *cm_id, schedule_work(&queue->release_work); } +/** + * nvme_rdma_device_removal() - Handle RDMA device removal + * @queue: nvmet rdma queue (cm id qp_context) + * @addr: nvmet address (cm_id context) + * + * DEVICE_REMOVAL event notifies us that the RDMA device is about + * to unplug so we should take care of destroying our RDMA resources. + * This event will be generated for each allocated cm_id. + * + * Note that this event can be generated on a normal queue cm_id + * and/or a device bound listener cm_id (where in this case + * queue will be null). + * + * we claim ownership on destroying the cm_id. For queues we move + * the queue state to NVMET_RDMA_IN_DEVICE_REMOVAL and for port + * we nullify the priv to prevent double cm_id destruction and destroying + * the cm_id implicitely by returning a non-zero rc to the callout. + */ +static int nvmet_rdma_device_removal(struct rdma_cm_id *cm_id, + struct nvmet_rdma_queue *queue) +{ + unsigned long flags; + + if (!queue) { + struct nvmet_port *port = cm_id->context; + + /* + * This is a listener cm_id. Make sure that + * future remove_port won't invoke a double + * cm_id destroy. use atomic xchg to make sure + * we don't compete with remove_port. + */ + if (xchg(&port->priv, NULL) != cm_id) + return 0; + } else { + /* + * This is a queue cm_id. Make sure that + * release queue will not destroy the cm_id + * and schedule all ctrl queues removal (only + * if the queue is not disconnecting already). + */ + spin_lock_irqsave(&queue->state_lock, flags); + if (queue->state != NVMET_RDMA_Q_DISCONNECTING) + queue->state = NVMET_RDMA_IN_DEVICE_REMOVAL; + spin_unlock_irqrestore(&queue->state_lock, flags); + nvmet_rdma_queue_disconnect(queue); + flush_scheduled_work(); + } + + /* + * We need to return 1 so that the core will destroy + * it's own ID. What a great API design.. + */ + return 1; +} + static int nvmet_rdma_cm_handler(struct rdma_cm_id *cm_id, struct rdma_cm_event *event) { @@ -1294,20 +1350,11 @@ static int nvmet_rdma_cm_handler(struct rdma_cm_id *cm_id, break; case RDMA_CM_EVENT_ADDR_CHANGE: case RDMA_CM_EVENT_DISCONNECTED: - case RDMA_CM_EVENT_DEVICE_REMOVAL: case RDMA_CM_EVENT_TIMEWAIT_EXIT: - /* - * We can get the device removal callback even for a - * CM ID that we aren't actually using. In that case - * the context pointer is NULL, so we shouldn't try - * to disconnect a non-existing queue. But we also - * need to return 1 so that the core will destroy - * it's own ID. What a great API design.. - */ - if (queue) - nvmet_rdma_queue_disconnect(queue); - else - ret = 1; + nvmet_rdma_queue_disconnect(queue); + break; + case RDMA_CM_EVENT_DEVICE_REMOVAL: + ret = nvmet_rdma_device_removal(cm_id, queue); break; case RDMA_CM_EVENT_REJECTED: case RDMA_CM_EVENT_UNREACHABLE: @@ -1396,9 +1443,10 @@ out_destroy_id: static void nvmet_rdma_remove_port(struct nvmet_port *port) { - struct rdma_cm_id *cm_id = port->priv; + struct rdma_cm_id *cm_id = xchg(&port->priv, NULL); - rdma_destroy_id(cm_id); + if (cm_id) + rdma_destroy_id(cm_id); } static struct nvmet_fabrics_ops nvmet_rdma_ops = { diff --git a/drivers/of/platform.c b/drivers/of/platform.c index 765390e3ed8d..8aa197691074 100644 --- a/drivers/of/platform.c +++ b/drivers/of/platform.c @@ -499,8 +499,24 @@ EXPORT_SYMBOL_GPL(of_platform_default_populate); static int __init of_platform_default_populate_init(void) { - if (of_have_populated_dt()) - of_platform_default_populate(NULL, NULL, NULL); + struct device_node *node; + + if (!of_have_populated_dt()) + return -ENODEV; + + /* + * Handle ramoops explicitly, since it is inside /reserved-memory, + * which lacks a "compatible" property. + */ + node = of_find_node_by_path("/reserved-memory"); + if (node) { + node = of_find_compatible_node(node, NULL, "ramoops"); + if (node) + of_platform_device_create(node, NULL, NULL); + } + + /* Populate everything else. */ + of_platform_default_populate(NULL, NULL, NULL); return 0; } diff --git a/drivers/parisc/ccio-dma.c b/drivers/parisc/ccio-dma.c index e24b05996a1b..3ed6238f8f6e 100644 --- a/drivers/parisc/ccio-dma.c +++ b/drivers/parisc/ccio-dma.c @@ -790,7 +790,7 @@ ccio_map_single(struct device *dev, void *addr, size_t size, static dma_addr_t ccio_map_page(struct device *dev, struct page *page, unsigned long offset, size_t size, enum dma_data_direction direction, - struct dma_attrs *attrs) + unsigned long attrs) { return ccio_map_single(dev, page_address(page) + offset, size, direction); @@ -806,7 +806,7 @@ ccio_map_page(struct device *dev, struct page *page, unsigned long offset, */ static void ccio_unmap_page(struct device *dev, dma_addr_t iova, size_t size, - enum dma_data_direction direction, struct dma_attrs *attrs) + enum dma_data_direction direction, unsigned long attrs) { struct ioc *ioc; unsigned long flags; @@ -844,7 +844,7 @@ ccio_unmap_page(struct device *dev, dma_addr_t iova, size_t size, */ static void * ccio_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t flag, - struct dma_attrs *attrs) + unsigned long attrs) { void *ret; #if 0 @@ -878,9 +878,9 @@ ccio_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t flag, */ static void ccio_free(struct device *dev, size_t size, void *cpu_addr, - dma_addr_t dma_handle, struct dma_attrs *attrs) + dma_addr_t dma_handle, unsigned long attrs) { - ccio_unmap_page(dev, dma_handle, size, 0, NULL); + ccio_unmap_page(dev, dma_handle, size, 0, 0); free_pages((unsigned long)cpu_addr, get_order(size)); } @@ -907,7 +907,7 @@ ccio_free(struct device *dev, size_t size, void *cpu_addr, */ static int ccio_map_sg(struct device *dev, struct scatterlist *sglist, int nents, - enum dma_data_direction direction, struct dma_attrs *attrs) + enum dma_data_direction direction, unsigned long attrs) { struct ioc *ioc; int coalesced, filled = 0; @@ -984,7 +984,7 @@ ccio_map_sg(struct device *dev, struct scatterlist *sglist, int nents, */ static void ccio_unmap_sg(struct device *dev, struct scatterlist *sglist, int nents, - enum dma_data_direction direction, struct dma_attrs *attrs) + enum dma_data_direction direction, unsigned long attrs) { struct ioc *ioc; @@ -1004,7 +1004,7 @@ ccio_unmap_sg(struct device *dev, struct scatterlist *sglist, int nents, ioc->usg_pages += sg_dma_len(sglist) >> PAGE_SHIFT; #endif ccio_unmap_page(dev, sg_dma_address(sglist), - sg_dma_len(sglist), direction, NULL); + sg_dma_len(sglist), direction, 0); ++sglist; } diff --git a/drivers/parisc/sba_iommu.c b/drivers/parisc/sba_iommu.c index 42ec4600b7e4..151b86b6d2e2 100644 --- a/drivers/parisc/sba_iommu.c +++ b/drivers/parisc/sba_iommu.c @@ -783,7 +783,7 @@ sba_map_single(struct device *dev, void *addr, size_t size, static dma_addr_t sba_map_page(struct device *dev, struct page *page, unsigned long offset, size_t size, enum dma_data_direction direction, - struct dma_attrs *attrs) + unsigned long attrs) { return sba_map_single(dev, page_address(page) + offset, size, direction); @@ -801,7 +801,7 @@ sba_map_page(struct device *dev, struct page *page, unsigned long offset, */ static void sba_unmap_page(struct device *dev, dma_addr_t iova, size_t size, - enum dma_data_direction direction, struct dma_attrs *attrs) + enum dma_data_direction direction, unsigned long attrs) { struct ioc *ioc; #if DELAYED_RESOURCE_CNT > 0 @@ -876,7 +876,7 @@ sba_unmap_page(struct device *dev, dma_addr_t iova, size_t size, * See Documentation/DMA-API-HOWTO.txt */ static void *sba_alloc(struct device *hwdev, size_t size, dma_addr_t *dma_handle, - gfp_t gfp, struct dma_attrs *attrs) + gfp_t gfp, unsigned long attrs) { void *ret; @@ -908,9 +908,9 @@ static void *sba_alloc(struct device *hwdev, size_t size, dma_addr_t *dma_handle */ static void sba_free(struct device *hwdev, size_t size, void *vaddr, - dma_addr_t dma_handle, struct dma_attrs *attrs) + dma_addr_t dma_handle, unsigned long attrs) { - sba_unmap_page(hwdev, dma_handle, size, 0, NULL); + sba_unmap_page(hwdev, dma_handle, size, 0, 0); free_pages((unsigned long) vaddr, get_order(size)); } @@ -943,7 +943,7 @@ int dump_run_sg = 0; */ static int sba_map_sg(struct device *dev, struct scatterlist *sglist, int nents, - enum dma_data_direction direction, struct dma_attrs *attrs) + enum dma_data_direction direction, unsigned long attrs) { struct ioc *ioc; int coalesced, filled = 0; @@ -1026,7 +1026,7 @@ sba_map_sg(struct device *dev, struct scatterlist *sglist, int nents, */ static void sba_unmap_sg(struct device *dev, struct scatterlist *sglist, int nents, - enum dma_data_direction direction, struct dma_attrs *attrs) + enum dma_data_direction direction, unsigned long attrs) { struct ioc *ioc; #ifdef ASSERT_PDIR_SANITY @@ -1051,7 +1051,7 @@ sba_unmap_sg(struct device *dev, struct scatterlist *sglist, int nents, while (sg_dma_len(sglist) && nents--) { sba_unmap_page(dev, sg_dma_address(sglist), sg_dma_len(sglist), - direction, NULL); + direction, 0); #ifdef SBA_COLLECT_STATS ioc->usg_pages += ((sg_dma_address(sglist) & ~IOVP_MASK) + sg_dma_len(sglist) + IOVP_SIZE - 1) >> PAGE_SHIFT; ioc->usingle_calls--; /* kluge since call is unmap_sg() */ diff --git a/drivers/pci/ecam.c b/drivers/pci/ecam.c index 66e0d718472f..43ed08dd8b01 100644 --- a/drivers/pci/ecam.c +++ b/drivers/pci/ecam.c @@ -27,7 +27,7 @@ * since we have enough virtual address range available. On 32-bit, we * ioremap the config space for each bus individually. */ -static const bool per_bus_mapping = !config_enabled(CONFIG_64BIT); +static const bool per_bus_mapping = !IS_ENABLED(CONFIG_64BIT); /* * Create a PCI config space window diff --git a/drivers/pci/hotplug/acpiphp_glue.c b/drivers/pci/hotplug/acpiphp_glue.c index 6a33ddcfa20b..a46b585fae31 100644 --- a/drivers/pci/hotplug/acpiphp_glue.c +++ b/drivers/pci/hotplug/acpiphp_glue.c @@ -675,7 +675,8 @@ static void acpiphp_check_bridge(struct acpiphp_bridge *bridge) if (bridge->is_going_away) return; - pm_runtime_get_sync(&bridge->pci_dev->dev); + if (bridge->pci_dev) + pm_runtime_get_sync(&bridge->pci_dev->dev); list_for_each_entry(slot, &bridge->slots, node) { struct pci_bus *bus = slot->bus; @@ -697,7 +698,8 @@ static void acpiphp_check_bridge(struct acpiphp_bridge *bridge) } } - pm_runtime_put(&bridge->pci_dev->dev); + if (bridge->pci_dev) + pm_runtime_put(&bridge->pci_dev->dev); } /* diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c index a02981efdad5..eafa6138a6b8 100644 --- a/drivers/pci/msi.c +++ b/drivers/pci/msi.c @@ -1411,6 +1411,8 @@ struct irq_domain *pci_msi_create_irq_domain(struct fwnode_handle *fwnode, if (info->flags & MSI_FLAG_USE_DEF_CHIP_OPS) pci_msi_domain_update_chip_ops(info); + info->flags |= MSI_FLAG_ACTIVATE_EARLY; + domain = msi_create_irq_domain(fwnode, info, parent); if (!domain) return NULL; diff --git a/drivers/perf/arm_pmu.c b/drivers/perf/arm_pmu.c index 6ccb994bdfcb..c494613c1909 100644 --- a/drivers/perf/arm_pmu.c +++ b/drivers/perf/arm_pmu.c @@ -688,7 +688,7 @@ static int cpu_pmu_request_irq(struct arm_pmu *cpu_pmu, irq_handler_t handler) return 0; } -static DEFINE_MUTEX(arm_pmu_mutex); +static DEFINE_SPINLOCK(arm_pmu_lock); static LIST_HEAD(arm_pmu_list); /* @@ -701,7 +701,7 @@ static int arm_perf_starting_cpu(unsigned int cpu) { struct arm_pmu *pmu; - mutex_lock(&arm_pmu_mutex); + spin_lock(&arm_pmu_lock); list_for_each_entry(pmu, &arm_pmu_list, entry) { if (!cpumask_test_cpu(cpu, &pmu->supported_cpus)) @@ -709,7 +709,7 @@ static int arm_perf_starting_cpu(unsigned int cpu) if (pmu->reset) pmu->reset(pmu); } - mutex_unlock(&arm_pmu_mutex); + spin_unlock(&arm_pmu_lock); return 0; } @@ -821,9 +821,9 @@ static int cpu_pmu_init(struct arm_pmu *cpu_pmu) if (!cpu_hw_events) return -ENOMEM; - mutex_lock(&arm_pmu_mutex); + spin_lock(&arm_pmu_lock); list_add_tail(&cpu_pmu->entry, &arm_pmu_list); - mutex_unlock(&arm_pmu_mutex); + spin_unlock(&arm_pmu_lock); err = cpu_pm_pmu_register(cpu_pmu); if (err) @@ -859,9 +859,9 @@ static int cpu_pmu_init(struct arm_pmu *cpu_pmu) return 0; out_unregister: - mutex_lock(&arm_pmu_mutex); + spin_lock(&arm_pmu_lock); list_del(&cpu_pmu->entry); - mutex_unlock(&arm_pmu_mutex); + spin_unlock(&arm_pmu_lock); free_percpu(cpu_hw_events); return err; } @@ -869,9 +869,9 @@ out_unregister: static void cpu_pmu_destroy(struct arm_pmu *cpu_pmu) { cpu_pm_pmu_unregister(cpu_pmu); - mutex_lock(&arm_pmu_mutex); + spin_lock(&arm_pmu_lock); list_del(&cpu_pmu->entry); - mutex_unlock(&arm_pmu_mutex); + spin_unlock(&arm_pmu_lock); free_percpu(cpu_pmu->hw_events); } @@ -967,11 +967,12 @@ static int of_pmu_irq_cfg(struct arm_pmu *pmu) /* If we didn't manage to parse anything, try the interrupt affinity */ if (cpumask_weight(&pmu->supported_cpus) == 0) { - if (!using_spi) { + int irq = platform_get_irq(pdev, 0); + + if (irq_is_percpu(irq)) { /* If using PPIs, check the affinity of the partition */ - int ret, irq; + int ret; - irq = platform_get_irq(pdev, 0); ret = irq_get_percpu_devid_partition(irq, &pmu->supported_cpus); if (ret) { kfree(irqs); diff --git a/drivers/pinctrl/intel/pinctrl-merrifield.c b/drivers/pinctrl/intel/pinctrl-merrifield.c index eb4990ff26ca..7fb765642ee7 100644 --- a/drivers/pinctrl/intel/pinctrl-merrifield.c +++ b/drivers/pinctrl/intel/pinctrl-merrifield.c @@ -11,6 +11,7 @@ #include <linux/bitops.h> #include <linux/err.h> +#include <linux/io.h> #include <linux/module.h> #include <linux/platform_device.h> #include <linux/pinctrl/pinconf.h> diff --git a/drivers/pinctrl/meson/pinctrl-meson.c b/drivers/pinctrl/meson/pinctrl-meson.c index 11623c6b0cb3..44e69c963f5d 100644 --- a/drivers/pinctrl/meson/pinctrl-meson.c +++ b/drivers/pinctrl/meson/pinctrl-meson.c @@ -727,13 +727,7 @@ static int meson_pinctrl_probe(struct platform_device *pdev) return PTR_ERR(pc->pcdev); } - ret = meson_gpiolib_register(pc); - if (ret) { - pinctrl_unregister(pc->pcdev); - return ret; - } - - return 0; + return meson_gpiolib_register(pc); } static struct platform_driver meson_pinctrl_driver = { diff --git a/drivers/pinctrl/pinctrl-amd.c b/drivers/pinctrl/pinctrl-amd.c index 634b4d30eefb..b3e772390ab6 100644 --- a/drivers/pinctrl/pinctrl-amd.c +++ b/drivers/pinctrl/pinctrl-amd.c @@ -43,17 +43,6 @@ static int amd_gpio_direction_input(struct gpio_chip *gc, unsigned offset) spin_lock_irqsave(&gpio_dev->lock, flags); pin_reg = readl(gpio_dev->base + offset * 4); - /* - * Suppose BIOS or Bootloader sets specific debounce for the - * GPIO. if not, set debounce to be 2.75ms and remove glitch. - */ - if ((pin_reg & DB_TMR_OUT_MASK) == 0) { - pin_reg |= 0xf; - pin_reg |= BIT(DB_TMR_OUT_UNIT_OFF); - pin_reg |= DB_TYPE_REMOVE_GLITCH << DB_CNTRL_OFF; - pin_reg &= ~BIT(DB_TMR_LARGE_OFF); - } - pin_reg &= ~BIT(OUTPUT_ENABLE_OFF); writel(pin_reg, gpio_dev->base + offset * 4); spin_unlock_irqrestore(&gpio_dev->lock, flags); @@ -326,15 +315,6 @@ static void amd_gpio_irq_enable(struct irq_data *d) spin_lock_irqsave(&gpio_dev->lock, flags); pin_reg = readl(gpio_dev->base + (d->hwirq)*4); - /* - Suppose BIOS or Bootloader sets specific debounce for the - GPIO. if not, set debounce to be 2.75ms. - */ - if ((pin_reg & DB_TMR_OUT_MASK) == 0) { - pin_reg |= 0xf; - pin_reg |= BIT(DB_TMR_OUT_UNIT_OFF); - pin_reg &= ~BIT(DB_TMR_LARGE_OFF); - } pin_reg |= BIT(INTERRUPT_ENABLE_OFF); pin_reg |= BIT(INTERRUPT_MASK_OFF); writel(pin_reg, gpio_dev->base + (d->hwirq)*4); diff --git a/drivers/pinctrl/pinctrl-pistachio.c b/drivers/pinctrl/pinctrl-pistachio.c index c6d410ef8de0..7bad200bd67c 100644 --- a/drivers/pinctrl/pinctrl-pistachio.c +++ b/drivers/pinctrl/pinctrl-pistachio.c @@ -1432,7 +1432,6 @@ static int pistachio_pinctrl_probe(struct platform_device *pdev) { struct pistachio_pinctrl *pctl; struct resource *res; - int ret; pctl = devm_kzalloc(&pdev->dev, sizeof(*pctl), GFP_KERNEL); if (!pctl) @@ -1464,13 +1463,7 @@ static int pistachio_pinctrl_probe(struct platform_device *pdev) return PTR_ERR(pctl->pctldev); } - ret = pistachio_gpio_register(pctl); - if (ret < 0) { - pinctrl_unregister(pctl->pctldev); - return ret; - } - - return 0; + return pistachio_gpio_register(pctl); } static struct platform_driver pistachio_pinctrl_driver = { diff --git a/drivers/platform/chrome/cros_ec_proto.c b/drivers/platform/chrome/cros_ec_proto.c index b6e161f71b26..6c084b266651 100644 --- a/drivers/platform/chrome/cros_ec_proto.c +++ b/drivers/platform/chrome/cros_ec_proto.c @@ -380,3 +380,20 @@ int cros_ec_cmd_xfer(struct cros_ec_device *ec_dev, return ret; } EXPORT_SYMBOL(cros_ec_cmd_xfer); + +int cros_ec_cmd_xfer_status(struct cros_ec_device *ec_dev, + struct cros_ec_command *msg) +{ + int ret; + + ret = cros_ec_cmd_xfer(ec_dev, msg); + if (ret < 0) { + dev_err(ec_dev->dev, "Command xfer error (err:%d)\n", ret); + } else if (msg->result != EC_RES_SUCCESS) { + dev_dbg(ec_dev->dev, "Command result (err: %d)\n", msg->result); + return -EPROTO; + } + + return ret; +} +EXPORT_SYMBOL(cros_ec_cmd_xfer_status); diff --git a/drivers/platform/x86/dell-wmi.c b/drivers/platform/x86/dell-wmi.c index d2bc092defd7..da2fe18162e1 100644 --- a/drivers/platform/x86/dell-wmi.c +++ b/drivers/platform/x86/dell-wmi.c @@ -110,8 +110,8 @@ static const struct key_entry dell_wmi_keymap_type_0000[] __initconst = { /* BIOS error detected */ { KE_IGNORE, 0xe00d, { KEY_RESERVED } }, - /* Unknown, defined in ACPI DSDT */ - /* { KE_IGNORE, 0xe00e, { KEY_RESERVED } }, */ + /* Battery was removed or inserted */ + { KE_IGNORE, 0xe00e, { KEY_RESERVED } }, /* Wifi Catcher */ { KE_KEY, 0xe011, { KEY_PROG2 } }, diff --git a/drivers/pnp/pnpbios/core.c b/drivers/pnp/pnpbios/core.c index bedb361746a0..c38a5b9733c8 100644 --- a/drivers/pnp/pnpbios/core.c +++ b/drivers/pnp/pnpbios/core.c @@ -60,6 +60,7 @@ #include <linux/delay.h> #include <linux/acpi.h> #include <linux/freezer.h> +#include <linux/kmod.h> #include <linux/kthread.h> #include <asm/page.h> diff --git a/drivers/power/max17042_battery.c b/drivers/power/max17042_battery.c index 9c65f134d447..da7a75f82489 100644 --- a/drivers/power/max17042_battery.c +++ b/drivers/power/max17042_battery.c @@ -457,13 +457,16 @@ static inline void max17042_write_model_data(struct max17042_chip *chip, } static inline void max17042_read_model_data(struct max17042_chip *chip, - u8 addr, u32 *data, int size) + u8 addr, u16 *data, int size) { struct regmap *map = chip->regmap; int i; + u32 tmp; - for (i = 0; i < size; i++) - regmap_read(map, addr + i, &data[i]); + for (i = 0; i < size; i++) { + regmap_read(map, addr + i, &tmp); + data[i] = (u16)tmp; + } } static inline int max17042_model_data_compare(struct max17042_chip *chip, @@ -486,7 +489,7 @@ static int max17042_init_model(struct max17042_chip *chip) { int ret; int table_size = ARRAY_SIZE(chip->pdata->config_data->cell_char_tbl); - u32 *temp_data; + u16 *temp_data; temp_data = kcalloc(table_size, sizeof(*temp_data), GFP_KERNEL); if (!temp_data) @@ -501,7 +504,7 @@ static int max17042_init_model(struct max17042_chip *chip) ret = max17042_model_data_compare( chip, chip->pdata->config_data->cell_char_tbl, - (u16 *)temp_data, + temp_data, table_size); max10742_lock_model(chip); @@ -514,7 +517,7 @@ static int max17042_verify_model_lock(struct max17042_chip *chip) { int i; int table_size = ARRAY_SIZE(chip->pdata->config_data->cell_char_tbl); - u32 *temp_data; + u16 *temp_data; int ret = 0; temp_data = kcalloc(table_size, sizeof(*temp_data), GFP_KERNEL); diff --git a/drivers/power/reset/Kconfig b/drivers/power/reset/Kconfig index 3bfac539334b..c74c3f67b8da 100644 --- a/drivers/power/reset/Kconfig +++ b/drivers/power/reset/Kconfig @@ -200,8 +200,8 @@ config REBOOT_MODE config SYSCON_REBOOT_MODE tristate "Generic SYSCON regmap reboot mode driver" depends on OF + depends on MFD_SYSCON select REBOOT_MODE - select MFD_SYSCON help Say y here will enable reboot mode driver. This will get reboot mode arguments and store it in SYSCON mapped diff --git a/drivers/power/reset/hisi-reboot.c b/drivers/power/reset/hisi-reboot.c index 9ab7f562a83b..f69387e12c1e 100644 --- a/drivers/power/reset/hisi-reboot.c +++ b/drivers/power/reset/hisi-reboot.c @@ -53,13 +53,16 @@ static int hisi_reboot_probe(struct platform_device *pdev) if (of_property_read_u32(np, "reboot-offset", &reboot_offset) < 0) { pr_err("failed to find reboot-offset property\n"); + iounmap(base); return -EINVAL; } err = register_restart_handler(&hisi_restart_nb); - if (err) + if (err) { dev_err(&pdev->dev, "cannot register restart handler (err=%d)\n", err); + iounmap(base); + } return err; } diff --git a/drivers/power/tps65217_charger.c b/drivers/power/tps65217_charger.c index 73dfae41def8..4c56e54af6ac 100644 --- a/drivers/power/tps65217_charger.c +++ b/drivers/power/tps65217_charger.c @@ -206,6 +206,7 @@ static int tps65217_charger_probe(struct platform_device *pdev) if (!charger) return -ENOMEM; + platform_set_drvdata(pdev, charger); charger->tps = tps; charger->dev = &pdev->dev; diff --git a/drivers/pwm/Kconfig b/drivers/pwm/Kconfig index c182efc62c7b..80a566a00d04 100644 --- a/drivers/pwm/Kconfig +++ b/drivers/pwm/Kconfig @@ -74,6 +74,16 @@ config PWM_ATMEL_TCB To compile this driver as a module, choose M here: the module will be called pwm-atmel-tcb. +config PWM_BCM_IPROC + tristate "iProc PWM support" + depends on ARCH_BCM_IPROC + help + Generic PWM framework driver for Broadcom iProc PWM block. This + block is used in Broadcom iProc SoC's. + + To compile this driver as a module, choose M here: the module + will be called pwm-bcm-iproc. + config PWM_BCM_KONA tristate "Kona PWM support" depends on ARCH_BCM_MOBILE @@ -137,6 +147,13 @@ config PWM_CRC Generic PWM framework driver for Crystalcove (CRC) PMIC based PWM control. +config PWM_CROS_EC + tristate "ChromeOS EC PWM driver" + depends on MFD_CROS_EC + help + PWM driver for exposing a PWM attached to the ChromeOS Embedded + Controller. + config PWM_EP93XX tristate "Cirrus Logic EP93xx PWM support" depends on ARCH_EP93XX @@ -305,7 +322,7 @@ config PWM_PXA config PWM_RCAR tristate "Renesas R-Car PWM support" - depends on ARCH_RCAR_GEN1 || ARCH_RCAR_GEN2 || COMPILE_TEST + depends on ARCH_RENESAS || COMPILE_TEST depends on HAS_IOMEM help This driver exposes the PWM Timer controller found in Renesas @@ -362,6 +379,13 @@ config PWM_STI To compile this driver as a module, choose M here: the module will be called pwm-sti. +config PWM_STMPE + bool "STMPE expander PWM export" + depends on MFD_STMPE + help + This enables support for the PWMs found in the STMPE I/O + expanders. + config PWM_SUN4I tristate "Allwinner PWM support" depends on ARCH_SUNXI || COMPILE_TEST diff --git a/drivers/pwm/Makefile b/drivers/pwm/Makefile index dd35bc121a18..feef1dd29f73 100644 --- a/drivers/pwm/Makefile +++ b/drivers/pwm/Makefile @@ -4,6 +4,7 @@ obj-$(CONFIG_PWM_AB8500) += pwm-ab8500.o obj-$(CONFIG_PWM_ATMEL) += pwm-atmel.o obj-$(CONFIG_PWM_ATMEL_HLCDC_PWM) += pwm-atmel-hlcdc.o obj-$(CONFIG_PWM_ATMEL_TCB) += pwm-atmel-tcb.o +obj-$(CONFIG_PWM_BCM_IPROC) += pwm-bcm-iproc.o obj-$(CONFIG_PWM_BCM_KONA) += pwm-bcm-kona.o obj-$(CONFIG_PWM_BCM2835) += pwm-bcm2835.o obj-$(CONFIG_PWM_BERLIN) += pwm-berlin.o @@ -11,6 +12,7 @@ obj-$(CONFIG_PWM_BFIN) += pwm-bfin.o obj-$(CONFIG_PWM_BRCMSTB) += pwm-brcmstb.o obj-$(CONFIG_PWM_CLPS711X) += pwm-clps711x.o obj-$(CONFIG_PWM_CRC) += pwm-crc.o +obj-$(CONFIG_PWM_CROS_EC) += pwm-cros-ec.o obj-$(CONFIG_PWM_EP93XX) += pwm-ep93xx.o obj-$(CONFIG_PWM_FSL_FTM) += pwm-fsl-ftm.o obj-$(CONFIG_PWM_IMG) += pwm-img.o @@ -34,6 +36,7 @@ obj-$(CONFIG_PWM_ROCKCHIP) += pwm-rockchip.o obj-$(CONFIG_PWM_SAMSUNG) += pwm-samsung.o obj-$(CONFIG_PWM_SPEAR) += pwm-spear.o obj-$(CONFIG_PWM_STI) += pwm-sti.o +obj-$(CONFIG_PWM_STMPE) += pwm-stmpe.o obj-$(CONFIG_PWM_SUN4I) += pwm-sun4i.o obj-$(CONFIG_PWM_TEGRA) += pwm-tegra.o obj-$(CONFIG_PWM_TIECAP) += pwm-tiecap.o diff --git a/drivers/pwm/core.c b/drivers/pwm/core.c index ed337a8c34ab..0dbd29e287db 100644 --- a/drivers/pwm/core.c +++ b/drivers/pwm/core.c @@ -526,6 +526,33 @@ int pwm_apply_state(struct pwm_device *pwm, struct pwm_state *state) EXPORT_SYMBOL_GPL(pwm_apply_state); /** + * pwm_capture() - capture and report a PWM signal + * @pwm: PWM device + * @result: structure to fill with capture result + * @timeout: time to wait, in milliseconds, before giving up on capture + * + * Returns: 0 on success or a negative error code on failure. + */ +int pwm_capture(struct pwm_device *pwm, struct pwm_capture *result, + unsigned long timeout) +{ + int err; + + if (!pwm || !pwm->chip->ops) + return -EINVAL; + + if (!pwm->chip->ops->capture) + return -ENOSYS; + + mutex_lock(&pwm_lock); + err = pwm->chip->ops->capture(pwm->chip, pwm, result, timeout); + mutex_unlock(&pwm_lock); + + return err; +} +EXPORT_SYMBOL_GPL(pwm_capture); + +/** * pwm_adjust_config() - adjust the current PWM config to the PWM arguments * @pwm: PWM device * diff --git a/drivers/pwm/pwm-atmel.c b/drivers/pwm/pwm-atmel.c index 0e4bd4e8e582..e6b8b1b7e6ba 100644 --- a/drivers/pwm/pwm-atmel.c +++ b/drivers/pwm/pwm-atmel.c @@ -64,7 +64,8 @@ struct atmel_pwm_chip { void __iomem *base; unsigned int updated_pwms; - struct mutex isr_lock; /* ISR is cleared when read, ensure only one thread does that */ + /* ISR is cleared when read, ensure only one thread does that */ + struct mutex isr_lock; void (*config)(struct pwm_chip *chip, struct pwm_device *pwm, unsigned long dty, unsigned long prd); @@ -271,6 +272,16 @@ static void atmel_pwm_disable(struct pwm_chip *chip, struct pwm_device *pwm) mutex_unlock(&atmel_pwm->isr_lock); atmel_pwm_writel(atmel_pwm, PWM_DIS, 1 << pwm->hwpwm); + /* + * Wait for the PWM channel disable operation to be effective before + * stopping the clock. + */ + timeout = jiffies + 2 * HZ; + + while ((atmel_pwm_readl(atmel_pwm, PWM_SR) & (1 << pwm->hwpwm)) && + time_before(jiffies, timeout)) + usleep_range(10, 100); + clk_disable(atmel_pwm->clk); } @@ -324,21 +335,14 @@ MODULE_DEVICE_TABLE(of, atmel_pwm_dt_ids); static inline const struct atmel_pwm_data * atmel_pwm_get_driver_data(struct platform_device *pdev) { - if (pdev->dev.of_node) { - const struct of_device_id *match; - - match = of_match_device(atmel_pwm_dt_ids, &pdev->dev); - if (!match) - return NULL; + const struct platform_device_id *id; - return match->data; - } else { - const struct platform_device_id *id; + if (pdev->dev.of_node) + return of_device_get_match_data(&pdev->dev); - id = platform_get_device_id(pdev); + id = platform_get_device_id(pdev); - return (struct atmel_pwm_data *)id->driver_data; - } + return (struct atmel_pwm_data *)id->driver_data; } static int atmel_pwm_probe(struct platform_device *pdev) diff --git a/drivers/pwm/pwm-bcm-iproc.c b/drivers/pwm/pwm-bcm-iproc.c new file mode 100644 index 000000000000..d961a8207b1c --- /dev/null +++ b/drivers/pwm/pwm-bcm-iproc.c @@ -0,0 +1,277 @@ +/* + * Copyright (C) 2016 Broadcom + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation version 2. + * + * This program is distributed "as is" WITHOUT ANY WARRANTY of any + * kind, whether express or implied; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include <linux/clk.h> +#include <linux/delay.h> +#include <linux/err.h> +#include <linux/io.h> +#include <linux/math64.h> +#include <linux/module.h> +#include <linux/of.h> +#include <linux/platform_device.h> +#include <linux/pwm.h> + +#define IPROC_PWM_CTRL_OFFSET 0x00 +#define IPROC_PWM_CTRL_TYPE_SHIFT(ch) (15 + (ch)) +#define IPROC_PWM_CTRL_POLARITY_SHIFT(ch) (8 + (ch)) +#define IPROC_PWM_CTRL_EN_SHIFT(ch) (ch) + +#define IPROC_PWM_PERIOD_OFFSET(ch) (0x04 + ((ch) << 3)) +#define IPROC_PWM_PERIOD_MIN 0x02 +#define IPROC_PWM_PERIOD_MAX 0xffff + +#define IPROC_PWM_DUTY_CYCLE_OFFSET(ch) (0x08 + ((ch) << 3)) +#define IPROC_PWM_DUTY_CYCLE_MIN 0x00 +#define IPROC_PWM_DUTY_CYCLE_MAX 0xffff + +#define IPROC_PWM_PRESCALE_OFFSET 0x24 +#define IPROC_PWM_PRESCALE_BITS 0x06 +#define IPROC_PWM_PRESCALE_SHIFT(ch) ((3 - (ch)) * \ + IPROC_PWM_PRESCALE_BITS) +#define IPROC_PWM_PRESCALE_MASK(ch) (IPROC_PWM_PRESCALE_MAX << \ + IPROC_PWM_PRESCALE_SHIFT(ch)) +#define IPROC_PWM_PRESCALE_MIN 0x00 +#define IPROC_PWM_PRESCALE_MAX 0x3f + +struct iproc_pwmc { + struct pwm_chip chip; + void __iomem *base; + struct clk *clk; +}; + +static inline struct iproc_pwmc *to_iproc_pwmc(struct pwm_chip *chip) +{ + return container_of(chip, struct iproc_pwmc, chip); +} + +static void iproc_pwmc_enable(struct iproc_pwmc *ip, unsigned int channel) +{ + u32 value; + + value = readl(ip->base + IPROC_PWM_CTRL_OFFSET); + value |= 1 << IPROC_PWM_CTRL_EN_SHIFT(channel); + writel(value, ip->base + IPROC_PWM_CTRL_OFFSET); + + /* must be a 400 ns delay between clearing and setting enable bit */ + ndelay(400); +} + +static void iproc_pwmc_disable(struct iproc_pwmc *ip, unsigned int channel) +{ + u32 value; + + value = readl(ip->base + IPROC_PWM_CTRL_OFFSET); + value &= ~(1 << IPROC_PWM_CTRL_EN_SHIFT(channel)); + writel(value, ip->base + IPROC_PWM_CTRL_OFFSET); + + /* must be a 400 ns delay between clearing and setting enable bit */ + ndelay(400); +} + +static void iproc_pwmc_get_state(struct pwm_chip *chip, struct pwm_device *pwm, + struct pwm_state *state) +{ + struct iproc_pwmc *ip = to_iproc_pwmc(chip); + u64 tmp, multi, rate; + u32 value, prescale; + + rate = clk_get_rate(ip->clk); + + value = readl(ip->base + IPROC_PWM_CTRL_OFFSET); + + if (value & BIT(IPROC_PWM_CTRL_EN_SHIFT(pwm->hwpwm))) + state->enabled = true; + else + state->enabled = false; + + if (value & BIT(IPROC_PWM_CTRL_POLARITY_SHIFT(pwm->hwpwm))) + state->polarity = PWM_POLARITY_NORMAL; + else + state->polarity = PWM_POLARITY_INVERSED; + + value = readl(ip->base + IPROC_PWM_PRESCALE_OFFSET); + prescale = value >> IPROC_PWM_PRESCALE_SHIFT(pwm->hwpwm); + prescale &= IPROC_PWM_PRESCALE_MAX; + + multi = NSEC_PER_SEC * (prescale + 1); + + value = readl(ip->base + IPROC_PWM_PERIOD_OFFSET(pwm->hwpwm)); + tmp = (value & IPROC_PWM_PERIOD_MAX) * multi; + state->period = div64_u64(tmp, rate); + + value = readl(ip->base + IPROC_PWM_DUTY_CYCLE_OFFSET(pwm->hwpwm)); + tmp = (value & IPROC_PWM_PERIOD_MAX) * multi; + state->duty_cycle = div64_u64(tmp, rate); +} + +static int iproc_pwmc_apply(struct pwm_chip *chip, struct pwm_device *pwm, + struct pwm_state *state) +{ + unsigned long prescale = IPROC_PWM_PRESCALE_MIN; + struct iproc_pwmc *ip = to_iproc_pwmc(chip); + u32 value, period, duty; + u64 rate; + + rate = clk_get_rate(ip->clk); + + /* + * Find period count, duty count and prescale to suit duty_cycle and + * period. This is done according to formulas described below: + * + * period_ns = 10^9 * (PRESCALE + 1) * PC / PWM_CLK_RATE + * duty_ns = 10^9 * (PRESCALE + 1) * DC / PWM_CLK_RATE + * + * PC = (PWM_CLK_RATE * period_ns) / (10^9 * (PRESCALE + 1)) + * DC = (PWM_CLK_RATE * duty_ns) / (10^9 * (PRESCALE + 1)) + */ + while (1) { + u64 value, div; + + div = NSEC_PER_SEC * (prescale + 1); + value = rate * state->period; + period = div64_u64(value, div); + value = rate * state->duty_cycle; + duty = div64_u64(value, div); + + if (period < IPROC_PWM_PERIOD_MIN || + duty < IPROC_PWM_DUTY_CYCLE_MIN) + return -EINVAL; + + if (period <= IPROC_PWM_PERIOD_MAX && + duty <= IPROC_PWM_DUTY_CYCLE_MAX) + break; + + /* Otherwise, increase prescale and recalculate counts */ + if (++prescale > IPROC_PWM_PRESCALE_MAX) + return -EINVAL; + } + + iproc_pwmc_disable(ip, pwm->hwpwm); + + /* Set prescale */ + value = readl(ip->base + IPROC_PWM_PRESCALE_OFFSET); + value &= ~IPROC_PWM_PRESCALE_MASK(pwm->hwpwm); + value |= prescale << IPROC_PWM_PRESCALE_SHIFT(pwm->hwpwm); + writel(value, ip->base + IPROC_PWM_PRESCALE_OFFSET); + + /* set period and duty cycle */ + writel(period, ip->base + IPROC_PWM_PERIOD_OFFSET(pwm->hwpwm)); + writel(duty, ip->base + IPROC_PWM_DUTY_CYCLE_OFFSET(pwm->hwpwm)); + + /* set polarity */ + value = readl(ip->base + IPROC_PWM_CTRL_OFFSET); + + if (state->polarity == PWM_POLARITY_NORMAL) + value |= 1 << IPROC_PWM_CTRL_POLARITY_SHIFT(pwm->hwpwm); + else + value &= ~(1 << IPROC_PWM_CTRL_POLARITY_SHIFT(pwm->hwpwm)); + + writel(value, ip->base + IPROC_PWM_CTRL_OFFSET); + + if (state->enabled) + iproc_pwmc_enable(ip, pwm->hwpwm); + + return 0; +} + +static const struct pwm_ops iproc_pwm_ops = { + .apply = iproc_pwmc_apply, + .get_state = iproc_pwmc_get_state, +}; + +static int iproc_pwmc_probe(struct platform_device *pdev) +{ + struct iproc_pwmc *ip; + struct resource *res; + unsigned int i; + u32 value; + int ret; + + ip = devm_kzalloc(&pdev->dev, sizeof(*ip), GFP_KERNEL); + if (!ip) + return -ENOMEM; + + platform_set_drvdata(pdev, ip); + + ip->chip.dev = &pdev->dev; + ip->chip.ops = &iproc_pwm_ops; + ip->chip.base = -1; + ip->chip.npwm = 4; + ip->chip.of_xlate = of_pwm_xlate_with_flags; + ip->chip.of_pwm_n_cells = 3; + + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + ip->base = devm_ioremap_resource(&pdev->dev, res); + if (IS_ERR(ip->base)) + return PTR_ERR(ip->base); + + ip->clk = devm_clk_get(&pdev->dev, NULL); + if (IS_ERR(ip->clk)) { + dev_err(&pdev->dev, "failed to get clock: %ld\n", + PTR_ERR(ip->clk)); + return PTR_ERR(ip->clk); + } + + ret = clk_prepare_enable(ip->clk); + if (ret < 0) { + dev_err(&pdev->dev, "failed to enable clock: %d\n", ret); + return ret; + } + + /* Set full drive and normal polarity for all channels */ + value = readl(ip->base + IPROC_PWM_CTRL_OFFSET); + + for (i = 0; i < ip->chip.npwm; i++) { + value &= ~(1 << IPROC_PWM_CTRL_TYPE_SHIFT(i)); + value |= 1 << IPROC_PWM_CTRL_POLARITY_SHIFT(i); + } + + writel(value, ip->base + IPROC_PWM_CTRL_OFFSET); + + ret = pwmchip_add(&ip->chip); + if (ret < 0) { + dev_err(&pdev->dev, "failed to add PWM chip: %d\n", ret); + clk_disable_unprepare(ip->clk); + } + + return ret; +} + +static int iproc_pwmc_remove(struct platform_device *pdev) +{ + struct iproc_pwmc *ip = platform_get_drvdata(pdev); + + clk_disable_unprepare(ip->clk); + + return pwmchip_remove(&ip->chip); +} + +static const struct of_device_id bcm_iproc_pwmc_dt[] = { + { .compatible = "brcm,iproc-pwm" }, + { }, +}; +MODULE_DEVICE_TABLE(of, bcm_iproc_pwmc_dt); + +static struct platform_driver iproc_pwmc_driver = { + .driver = { + .name = "bcm-iproc-pwm", + .of_match_table = bcm_iproc_pwmc_dt, + }, + .probe = iproc_pwmc_probe, + .remove = iproc_pwmc_remove, +}; +module_platform_driver(iproc_pwmc_driver); + +MODULE_AUTHOR("Yendapally Reddy Dhananjaya Reddy <yendapally.reddy@broadcom.com>"); +MODULE_DESCRIPTION("Broadcom iProc PWM driver"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/pwm/pwm-cros-ec.c b/drivers/pwm/pwm-cros-ec.c new file mode 100644 index 000000000000..99b9acc1a420 --- /dev/null +++ b/drivers/pwm/pwm-cros-ec.c @@ -0,0 +1,260 @@ +/* + * Copyright (C) 2016 Google, Inc + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2, as published by + * the Free Software Foundation. + * + * Expose a PWM controlled by the ChromeOS EC to the host processor. + */ + +#include <linux/module.h> +#include <linux/mfd/cros_ec.h> +#include <linux/mfd/cros_ec_commands.h> +#include <linux/platform_device.h> +#include <linux/pwm.h> +#include <linux/slab.h> + +/** + * struct cros_ec_pwm_device - Driver data for EC PWM + * + * @dev: Device node + * @ec: Pointer to EC device + * @chip: PWM controller chip + */ +struct cros_ec_pwm_device { + struct device *dev; + struct cros_ec_device *ec; + struct pwm_chip chip; +}; + +static inline struct cros_ec_pwm_device *pwm_to_cros_ec_pwm(struct pwm_chip *c) +{ + return container_of(c, struct cros_ec_pwm_device, chip); +} + +static int cros_ec_pwm_set_duty(struct cros_ec_device *ec, u8 index, u16 duty) +{ + struct { + struct cros_ec_command msg; + struct ec_params_pwm_set_duty params; + } buf; + struct ec_params_pwm_set_duty *params = &buf.params; + struct cros_ec_command *msg = &buf.msg; + + memset(&buf, 0, sizeof(buf)); + + msg->version = 0; + msg->command = EC_CMD_PWM_SET_DUTY; + msg->insize = 0; + msg->outsize = sizeof(*params); + + params->duty = duty; + params->pwm_type = EC_PWM_TYPE_GENERIC; + params->index = index; + + return cros_ec_cmd_xfer_status(ec, msg); +} + +static int __cros_ec_pwm_get_duty(struct cros_ec_device *ec, u8 index, + u32 *result) +{ + struct { + struct cros_ec_command msg; + union { + struct ec_params_pwm_get_duty params; + struct ec_response_pwm_get_duty resp; + }; + } buf; + struct ec_params_pwm_get_duty *params = &buf.params; + struct ec_response_pwm_get_duty *resp = &buf.resp; + struct cros_ec_command *msg = &buf.msg; + int ret; + + memset(&buf, 0, sizeof(buf)); + + msg->version = 0; + msg->command = EC_CMD_PWM_GET_DUTY; + msg->insize = sizeof(*params); + msg->outsize = sizeof(*resp); + + params->pwm_type = EC_PWM_TYPE_GENERIC; + params->index = index; + + ret = cros_ec_cmd_xfer_status(ec, msg); + if (result) + *result = msg->result; + if (ret < 0) + return ret; + + return resp->duty; +} + +static int cros_ec_pwm_get_duty(struct cros_ec_device *ec, u8 index) +{ + return __cros_ec_pwm_get_duty(ec, index, NULL); +} + +static int cros_ec_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm, + struct pwm_state *state) +{ + struct cros_ec_pwm_device *ec_pwm = pwm_to_cros_ec_pwm(chip); + int duty_cycle; + + /* The EC won't let us change the period */ + if (state->period != EC_PWM_MAX_DUTY) + return -EINVAL; + + /* + * EC doesn't separate the concept of duty cycle and enabled, but + * kernel does. Translate. + */ + duty_cycle = state->enabled ? state->duty_cycle : 0; + + return cros_ec_pwm_set_duty(ec_pwm->ec, pwm->hwpwm, duty_cycle); +} + +static void cros_ec_pwm_get_state(struct pwm_chip *chip, struct pwm_device *pwm, + struct pwm_state *state) +{ + struct cros_ec_pwm_device *ec_pwm = pwm_to_cros_ec_pwm(chip); + int ret; + + ret = cros_ec_pwm_get_duty(ec_pwm->ec, pwm->hwpwm); + if (ret < 0) { + dev_err(chip->dev, "error getting initial duty: %d\n", ret); + return; + } + + state->enabled = (ret > 0); + state->period = EC_PWM_MAX_DUTY; + + /* Note that "disabled" and "duty cycle == 0" are treated the same */ + state->duty_cycle = ret; +} + +static struct pwm_device * +cros_ec_pwm_xlate(struct pwm_chip *pc, const struct of_phandle_args *args) +{ + struct pwm_device *pwm; + + if (args->args[0] >= pc->npwm) + return ERR_PTR(-EINVAL); + + pwm = pwm_request_from_chip(pc, args->args[0], NULL); + if (IS_ERR(pwm)) + return pwm; + + /* The EC won't let us change the period */ + pwm->args.period = EC_PWM_MAX_DUTY; + + return pwm; +} + +static const struct pwm_ops cros_ec_pwm_ops = { + .get_state = cros_ec_pwm_get_state, + .apply = cros_ec_pwm_apply, + .owner = THIS_MODULE, +}; + +static int cros_ec_num_pwms(struct cros_ec_device *ec) +{ + int i, ret; + + /* The index field is only 8 bits */ + for (i = 0; i <= U8_MAX; i++) { + u32 result = 0; + + ret = __cros_ec_pwm_get_duty(ec, i, &result); + /* We want to parse EC protocol errors */ + if (ret < 0 && !(ret == -EPROTO && result)) + return ret; + + /* + * We look for SUCCESS, INVALID_COMMAND, or INVALID_PARAM + * responses; everything else is treated as an error. + */ + if (result == EC_RES_INVALID_COMMAND) + return -ENODEV; + else if (result == EC_RES_INVALID_PARAM) + return i; + else if (result) + return -EPROTO; + } + + return U8_MAX; +} + +static int cros_ec_pwm_probe(struct platform_device *pdev) +{ + struct cros_ec_device *ec = dev_get_drvdata(pdev->dev.parent); + struct device *dev = &pdev->dev; + struct cros_ec_pwm_device *ec_pwm; + struct pwm_chip *chip; + int ret; + + if (!ec) { + dev_err(dev, "no parent EC device\n"); + return -EINVAL; + } + + ec_pwm = devm_kzalloc(dev, sizeof(*ec_pwm), GFP_KERNEL); + if (!ec_pwm) + return -ENOMEM; + chip = &ec_pwm->chip; + ec_pwm->ec = ec; + + /* PWM chip */ + chip->dev = dev; + chip->ops = &cros_ec_pwm_ops; + chip->of_xlate = cros_ec_pwm_xlate; + chip->of_pwm_n_cells = 1; + chip->base = -1; + ret = cros_ec_num_pwms(ec); + if (ret < 0) { + dev_err(dev, "Couldn't find PWMs: %d\n", ret); + return ret; + } + chip->npwm = ret; + dev_dbg(dev, "Probed %u PWMs\n", chip->npwm); + + ret = pwmchip_add(chip); + if (ret < 0) { + dev_err(dev, "cannot register PWM: %d\n", ret); + return ret; + } + + platform_set_drvdata(pdev, ec_pwm); + + return ret; +} + +static int cros_ec_pwm_remove(struct platform_device *dev) +{ + struct cros_ec_pwm_device *ec_pwm = platform_get_drvdata(dev); + struct pwm_chip *chip = &ec_pwm->chip; + + return pwmchip_remove(chip); +} + +#ifdef CONFIG_OF +static const struct of_device_id cros_ec_pwm_of_match[] = { + { .compatible = "google,cros-ec-pwm" }, + {}, +}; +MODULE_DEVICE_TABLE(of, cros_ec_pwm_of_match); +#endif + +static struct platform_driver cros_ec_pwm_driver = { + .probe = cros_ec_pwm_probe, + .remove = cros_ec_pwm_remove, + .driver = { + .name = "cros-ec-pwm", + .of_match_table = of_match_ptr(cros_ec_pwm_of_match), + }, +}; +module_platform_driver(cros_ec_pwm_driver); + +MODULE_ALIAS("platform:cros-ec-pwm"); +MODULE_DESCRIPTION("ChromeOS EC PWM driver"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/pwm/pwm-lpc32xx.c b/drivers/pwm/pwm-lpc32xx.c index 4d470c1a406a..a9b3cff96aac 100644 --- a/drivers/pwm/pwm-lpc32xx.c +++ b/drivers/pwm/pwm-lpc32xx.c @@ -25,6 +25,7 @@ struct lpc32xx_pwm_chip { }; #define PWM_ENABLE BIT(31) +#define PWM_PIN_LEVEL BIT(30) #define to_lpc32xx_pwm_chip(_chip) \ container_of(_chip, struct lpc32xx_pwm_chip, chip) @@ -103,6 +104,7 @@ static int lpc32xx_pwm_probe(struct platform_device *pdev) struct lpc32xx_pwm_chip *lpc32xx; struct resource *res; int ret; + u32 val; lpc32xx = devm_kzalloc(&pdev->dev, sizeof(*lpc32xx), GFP_KERNEL); if (!lpc32xx) @@ -128,6 +130,11 @@ static int lpc32xx_pwm_probe(struct platform_device *pdev) return ret; } + /* When PWM is disable, configure the output to the default value */ + val = readl(lpc32xx->base + (lpc32xx->chip.pwms[0].hwpwm << 2)); + val &= ~PWM_PIN_LEVEL; + writel(val, lpc32xx->base + (lpc32xx->chip.pwms[0].hwpwm << 2)); + platform_set_drvdata(pdev, lpc32xx); return 0; diff --git a/drivers/pwm/pwm-lpss-pci.c b/drivers/pwm/pwm-lpss-pci.c index 7160e8ab38a4..3622f093490e 100644 --- a/drivers/pwm/pwm-lpss-pci.c +++ b/drivers/pwm/pwm-lpss-pci.c @@ -76,6 +76,7 @@ static const struct pci_device_id pwm_lpss_pci_ids[] = { { PCI_VDEVICE(INTEL, 0x0ac8), (unsigned long)&pwm_lpss_bxt_info}, { PCI_VDEVICE(INTEL, 0x0f08), (unsigned long)&pwm_lpss_byt_info}, { PCI_VDEVICE(INTEL, 0x0f09), (unsigned long)&pwm_lpss_byt_info}, + { PCI_VDEVICE(INTEL, 0x11a5), (unsigned long)&pwm_lpss_bxt_info}, { PCI_VDEVICE(INTEL, 0x1ac8), (unsigned long)&pwm_lpss_bxt_info}, { PCI_VDEVICE(INTEL, 0x2288), (unsigned long)&pwm_lpss_bsw_info}, { PCI_VDEVICE(INTEL, 0x2289), (unsigned long)&pwm_lpss_bsw_info}, diff --git a/drivers/pwm/pwm-lpss.c b/drivers/pwm/pwm-lpss.c index 295b963dbddb..72c0bce5a75c 100644 --- a/drivers/pwm/pwm-lpss.c +++ b/drivers/pwm/pwm-lpss.c @@ -27,7 +27,6 @@ #define PWM_SW_UPDATE BIT(30) #define PWM_BASE_UNIT_SHIFT 8 #define PWM_ON_TIME_DIV_MASK 0x000000ff -#define PWM_DIVISION_CORRECTION 0x2 /* Size of each PWM register space if multiple */ #define PWM_SIZE 0x400 @@ -92,8 +91,8 @@ static int pwm_lpss_config(struct pwm_chip *chip, struct pwm_device *pwm, int duty_ns, int period_ns) { struct pwm_lpss_chip *lpwm = to_lpwm(chip); - u8 on_time_div; - unsigned long c, base_unit_range; + unsigned long long on_time_div; + unsigned long c = lpwm->info->clk_rate, base_unit_range; unsigned long long base_unit, freq = NSEC_PER_SEC; u32 ctrl; @@ -101,21 +100,18 @@ static int pwm_lpss_config(struct pwm_chip *chip, struct pwm_device *pwm, /* * The equation is: - * base_unit = ((freq / c) * base_unit_range) + correction + * base_unit = round(base_unit_range * freq / c) */ base_unit_range = BIT(lpwm->info->base_unit_bits); - base_unit = freq * base_unit_range; + freq *= base_unit_range; - c = lpwm->info->clk_rate; - if (!c) - return -EINVAL; - - do_div(base_unit, c); - base_unit += PWM_DIVISION_CORRECTION; + base_unit = DIV_ROUND_CLOSEST_ULL(freq, c); if (duty_ns <= 0) duty_ns = 1; - on_time_div = 255 - (255 * duty_ns / period_ns); + on_time_div = 255ULL * duty_ns; + do_div(on_time_div, period_ns); + on_time_div = 255ULL - on_time_div; pm_runtime_get_sync(chip->dev); @@ -169,6 +165,7 @@ struct pwm_lpss_chip *pwm_lpss_probe(struct device *dev, struct resource *r, const struct pwm_lpss_boardinfo *info) { struct pwm_lpss_chip *lpwm; + unsigned long c; int ret; lpwm = devm_kzalloc(dev, sizeof(*lpwm), GFP_KERNEL); @@ -180,6 +177,11 @@ struct pwm_lpss_chip *pwm_lpss_probe(struct device *dev, struct resource *r, return ERR_CAST(lpwm->regs); lpwm->info = info; + + c = lpwm->info->clk_rate; + if (!c) + return ERR_PTR(-EINVAL); + lpwm->chip.dev = dev; lpwm->chip.ops = &pwm_lpss_ops; lpwm->chip.base = -1; diff --git a/drivers/pwm/pwm-rockchip.c b/drivers/pwm/pwm-rockchip.c index 7d9cc9049522..ef89df1f7336 100644 --- a/drivers/pwm/pwm-rockchip.c +++ b/drivers/pwm/pwm-rockchip.c @@ -47,10 +47,14 @@ struct rockchip_pwm_regs { struct rockchip_pwm_data { struct rockchip_pwm_regs regs; unsigned int prescaler; + bool supports_polarity; const struct pwm_ops *ops; void (*set_enable)(struct pwm_chip *chip, - struct pwm_device *pwm, bool enable); + struct pwm_device *pwm, bool enable, + enum pwm_polarity polarity); + void (*get_state)(struct pwm_chip *chip, struct pwm_device *pwm, + struct pwm_state *state); }; static inline struct rockchip_pwm_chip *to_rockchip_pwm_chip(struct pwm_chip *c) @@ -59,7 +63,8 @@ static inline struct rockchip_pwm_chip *to_rockchip_pwm_chip(struct pwm_chip *c) } static void rockchip_pwm_set_enable_v1(struct pwm_chip *chip, - struct pwm_device *pwm, bool enable) + struct pwm_device *pwm, bool enable, + enum pwm_polarity polarity) { struct rockchip_pwm_chip *pc = to_rockchip_pwm_chip(chip); u32 enable_conf = PWM_CTRL_OUTPUT_EN | PWM_CTRL_TIMER_EN; @@ -75,15 +80,29 @@ static void rockchip_pwm_set_enable_v1(struct pwm_chip *chip, writel_relaxed(val, pc->base + pc->data->regs.ctrl); } +static void rockchip_pwm_get_state_v1(struct pwm_chip *chip, + struct pwm_device *pwm, + struct pwm_state *state) +{ + struct rockchip_pwm_chip *pc = to_rockchip_pwm_chip(chip); + u32 enable_conf = PWM_CTRL_OUTPUT_EN | PWM_CTRL_TIMER_EN; + u32 val; + + val = readl_relaxed(pc->base + pc->data->regs.ctrl); + if ((val & enable_conf) == enable_conf) + state->enabled = true; +} + static void rockchip_pwm_set_enable_v2(struct pwm_chip *chip, - struct pwm_device *pwm, bool enable) + struct pwm_device *pwm, bool enable, + enum pwm_polarity polarity) { struct rockchip_pwm_chip *pc = to_rockchip_pwm_chip(chip); u32 enable_conf = PWM_OUTPUT_LEFT | PWM_LP_DISABLE | PWM_ENABLE | PWM_CONTINUOUS; u32 val; - if (pwm_get_polarity(pwm) == PWM_POLARITY_INVERSED) + if (polarity == PWM_POLARITY_INVERSED) enable_conf |= PWM_DUTY_NEGATIVE | PWM_INACTIVE_POSITIVE; else enable_conf |= PWM_DUTY_POSITIVE | PWM_INACTIVE_NEGATIVE; @@ -98,13 +117,59 @@ static void rockchip_pwm_set_enable_v2(struct pwm_chip *chip, writel_relaxed(val, pc->base + pc->data->regs.ctrl); } +static void rockchip_pwm_get_state_v2(struct pwm_chip *chip, + struct pwm_device *pwm, + struct pwm_state *state) +{ + struct rockchip_pwm_chip *pc = to_rockchip_pwm_chip(chip); + u32 enable_conf = PWM_OUTPUT_LEFT | PWM_LP_DISABLE | PWM_ENABLE | + PWM_CONTINUOUS; + u32 val; + + val = readl_relaxed(pc->base + pc->data->regs.ctrl); + if ((val & enable_conf) != enable_conf) + return; + + state->enabled = true; + + if (!(val & PWM_DUTY_POSITIVE)) + state->polarity = PWM_POLARITY_INVERSED; +} + +static void rockchip_pwm_get_state(struct pwm_chip *chip, + struct pwm_device *pwm, + struct pwm_state *state) +{ + struct rockchip_pwm_chip *pc = to_rockchip_pwm_chip(chip); + unsigned long clk_rate; + u64 tmp; + int ret; + + ret = clk_enable(pc->clk); + if (ret) + return; + + clk_rate = clk_get_rate(pc->clk); + + tmp = readl_relaxed(pc->base + pc->data->regs.period); + tmp *= pc->data->prescaler * NSEC_PER_SEC; + state->period = DIV_ROUND_CLOSEST_ULL(tmp, clk_rate); + + tmp = readl_relaxed(pc->base + pc->data->regs.duty); + tmp *= pc->data->prescaler * NSEC_PER_SEC; + state->duty_cycle = DIV_ROUND_CLOSEST_ULL(tmp, clk_rate); + + pc->data->get_state(chip, pwm, state); + + clk_disable(pc->clk); +} + static int rockchip_pwm_config(struct pwm_chip *chip, struct pwm_device *pwm, int duty_ns, int period_ns) { struct rockchip_pwm_chip *pc = to_rockchip_pwm_chip(chip); unsigned long period, duty; u64 clk_rate, div; - int ret; clk_rate = clk_get_rate(pc->clk); @@ -114,74 +179,72 @@ static int rockchip_pwm_config(struct pwm_chip *chip, struct pwm_device *pwm, * default prescaler value for all practical clock rate values. */ div = clk_rate * period_ns; - do_div(div, pc->data->prescaler * NSEC_PER_SEC); - period = div; + period = DIV_ROUND_CLOSEST_ULL(div, + pc->data->prescaler * NSEC_PER_SEC); div = clk_rate * duty_ns; - do_div(div, pc->data->prescaler * NSEC_PER_SEC); - duty = div; - - ret = clk_enable(pc->clk); - if (ret) - return ret; + duty = DIV_ROUND_CLOSEST_ULL(div, pc->data->prescaler * NSEC_PER_SEC); writel(period, pc->base + pc->data->regs.period); writel(duty, pc->base + pc->data->regs.duty); - writel(0, pc->base + pc->data->regs.cntr); - - clk_disable(pc->clk); - - return 0; -} - -static int rockchip_pwm_set_polarity(struct pwm_chip *chip, - struct pwm_device *pwm, - enum pwm_polarity polarity) -{ - /* - * No action needed here because pwm->polarity will be set by the core - * and the core will only change polarity when the PWM is not enabled. - * We'll handle things in set_enable(). - */ return 0; } -static int rockchip_pwm_enable(struct pwm_chip *chip, struct pwm_device *pwm) +static int rockchip_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm, + struct pwm_state *state) { struct rockchip_pwm_chip *pc = to_rockchip_pwm_chip(chip); + struct pwm_state curstate; + bool enabled; int ret; + pwm_get_state(pwm, &curstate); + enabled = curstate.enabled; + ret = clk_enable(pc->clk); if (ret) return ret; - pc->data->set_enable(chip, pwm, true); + if (state->polarity != curstate.polarity && enabled) { + pc->data->set_enable(chip, pwm, false, state->polarity); + enabled = false; + } - return 0; -} + ret = rockchip_pwm_config(chip, pwm, state->duty_cycle, state->period); + if (ret) { + if (enabled != curstate.enabled) + pc->data->set_enable(chip, pwm, !enabled, + state->polarity); -static void rockchip_pwm_disable(struct pwm_chip *chip, struct pwm_device *pwm) -{ - struct rockchip_pwm_chip *pc = to_rockchip_pwm_chip(chip); + goto out; + } + + if (state->enabled != enabled) + pc->data->set_enable(chip, pwm, state->enabled, + state->polarity); - pc->data->set_enable(chip, pwm, false); + /* + * Update the state with the real hardware, which can differ a bit + * because of period/duty_cycle approximation. + */ + rockchip_pwm_get_state(chip, pwm, state); +out: clk_disable(pc->clk); + + return ret; } static const struct pwm_ops rockchip_pwm_ops_v1 = { - .config = rockchip_pwm_config, - .enable = rockchip_pwm_enable, - .disable = rockchip_pwm_disable, + .get_state = rockchip_pwm_get_state, + .apply = rockchip_pwm_apply, .owner = THIS_MODULE, }; static const struct pwm_ops rockchip_pwm_ops_v2 = { - .config = rockchip_pwm_config, - .set_polarity = rockchip_pwm_set_polarity, - .enable = rockchip_pwm_enable, - .disable = rockchip_pwm_disable, + .get_state = rockchip_pwm_get_state, + .apply = rockchip_pwm_apply, .owner = THIS_MODULE, }; @@ -195,6 +258,7 @@ static const struct rockchip_pwm_data pwm_data_v1 = { .prescaler = 2, .ops = &rockchip_pwm_ops_v1, .set_enable = rockchip_pwm_set_enable_v1, + .get_state = rockchip_pwm_get_state_v1, }; static const struct rockchip_pwm_data pwm_data_v2 = { @@ -205,8 +269,10 @@ static const struct rockchip_pwm_data pwm_data_v2 = { .ctrl = 0x0c, }, .prescaler = 1, + .supports_polarity = true, .ops = &rockchip_pwm_ops_v2, .set_enable = rockchip_pwm_set_enable_v2, + .get_state = rockchip_pwm_get_state_v2, }; static const struct rockchip_pwm_data pwm_data_vop = { @@ -217,8 +283,10 @@ static const struct rockchip_pwm_data pwm_data_vop = { .ctrl = 0x00, }, .prescaler = 1, + .supports_polarity = true, .ops = &rockchip_pwm_ops_v2, .set_enable = rockchip_pwm_set_enable_v2, + .get_state = rockchip_pwm_get_state_v2, }; static const struct of_device_id rockchip_pwm_dt_ids[] = { @@ -253,7 +321,7 @@ static int rockchip_pwm_probe(struct platform_device *pdev) if (IS_ERR(pc->clk)) return PTR_ERR(pc->clk); - ret = clk_prepare(pc->clk); + ret = clk_prepare_enable(pc->clk); if (ret) return ret; @@ -265,7 +333,7 @@ static int rockchip_pwm_probe(struct platform_device *pdev) pc->chip.base = -1; pc->chip.npwm = 1; - if (pc->data->ops->set_polarity) { + if (pc->data->supports_polarity) { pc->chip.of_xlate = of_pwm_xlate_with_flags; pc->chip.of_pwm_n_cells = 3; } @@ -276,6 +344,10 @@ static int rockchip_pwm_probe(struct platform_device *pdev) dev_err(&pdev->dev, "pwmchip_add() failed: %d\n", ret); } + /* Keep the PWM clk enabled if the PWM appears to be up and running. */ + if (!pwm_is_enabled(pc->chip.pwms)) + clk_disable(pc->clk); + return ret; } @@ -283,6 +355,20 @@ static int rockchip_pwm_remove(struct platform_device *pdev) { struct rockchip_pwm_chip *pc = platform_get_drvdata(pdev); + /* + * Disable the PWM clk before unpreparing it if the PWM device is still + * running. This should only happen when the last PWM user left it + * enabled, or when nobody requested a PWM that was previously enabled + * by the bootloader. + * + * FIXME: Maybe the core should disable all PWM devices in + * pwmchip_remove(). In this case we'd only have to call + * clk_unprepare() after pwmchip_remove(). + * + */ + if (pwm_is_enabled(pc->chip.pwms)) + clk_disable(pc->clk); + clk_unprepare(pc->clk); return pwmchip_remove(&pc->chip); diff --git a/drivers/pwm/pwm-stmpe.c b/drivers/pwm/pwm-stmpe.c new file mode 100644 index 000000000000..e464582a390a --- /dev/null +++ b/drivers/pwm/pwm-stmpe.c @@ -0,0 +1,319 @@ +/* + * Copyright (C) 2016 Linaro Ltd. + * + * Author: Linus Walleij <linus.walleij@linaro.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2, as + * published by the Free Software Foundation. + * + */ + +#include <linux/bitops.h> +#include <linux/delay.h> +#include <linux/err.h> +#include <linux/mfd/stmpe.h> +#include <linux/module.h> +#include <linux/of.h> +#include <linux/platform_device.h> +#include <linux/pwm.h> +#include <linux/slab.h> + +#define STMPE24XX_PWMCS 0x30 +#define PWMCS_EN_PWM0 BIT(0) +#define PWMCS_EN_PWM1 BIT(1) +#define PWMCS_EN_PWM2 BIT(2) +#define STMPE24XX_PWMIC0 0x38 +#define STMPE24XX_PWMIC1 0x39 +#define STMPE24XX_PWMIC2 0x3a + +#define STMPE_PWM_24XX_PINBASE 21 + +struct stmpe_pwm { + struct stmpe *stmpe; + struct pwm_chip chip; + u8 last_duty; +}; + +static inline struct stmpe_pwm *to_stmpe_pwm(struct pwm_chip *chip) +{ + return container_of(chip, struct stmpe_pwm, chip); +} + +static int stmpe_24xx_pwm_enable(struct pwm_chip *chip, struct pwm_device *pwm) +{ + struct stmpe_pwm *stmpe_pwm = to_stmpe_pwm(chip); + u8 value; + int ret; + + ret = stmpe_reg_read(stmpe_pwm->stmpe, STMPE24XX_PWMCS); + if (ret < 0) { + dev_err(chip->dev, "error reading PWM#%u control\n", + pwm->hwpwm); + return ret; + } + + value = ret | BIT(pwm->hwpwm); + + ret = stmpe_reg_write(stmpe_pwm->stmpe, STMPE24XX_PWMCS, value); + if (ret) { + dev_err(chip->dev, "error writing PWM#%u control\n", + pwm->hwpwm); + return ret; + } + + return 0; +} + +static void stmpe_24xx_pwm_disable(struct pwm_chip *chip, + struct pwm_device *pwm) +{ + struct stmpe_pwm *stmpe_pwm = to_stmpe_pwm(chip); + u8 value; + int ret; + + ret = stmpe_reg_read(stmpe_pwm->stmpe, STMPE24XX_PWMCS); + if (ret < 0) { + dev_err(chip->dev, "error reading PWM#%u control\n", + pwm->hwpwm); + return; + } + + value = ret & ~BIT(pwm->hwpwm); + + ret = stmpe_reg_write(stmpe_pwm->stmpe, STMPE24XX_PWMCS, value); + if (ret) { + dev_err(chip->dev, "error writing PWM#%u control\n", + pwm->hwpwm); + return; + } +} + +/* STMPE 24xx PWM instructions */ +#define SMAX 0x007f +#define SMIN 0x00ff +#define GTS 0x0000 +#define LOAD BIT(14) /* Only available on 2403 */ +#define RAMPUP 0x0000 +#define RAMPDOWN BIT(7) +#define PRESCALE_512 BIT(14) +#define STEPTIME_1 BIT(8) +#define BRANCH (BIT(15) | BIT(13)) + +static int stmpe_24xx_pwm_config(struct pwm_chip *chip, struct pwm_device *pwm, + int duty_ns, int period_ns) +{ + struct stmpe_pwm *stmpe_pwm = to_stmpe_pwm(chip); + unsigned int i, pin; + u16 program[3] = { + SMAX, + GTS, + GTS, + }; + u8 offset; + int ret; + + /* Make sure we are disabled */ + if (pwm_is_enabled(pwm)) { + stmpe_24xx_pwm_disable(chip, pwm); + } else { + /* Connect the PWM to the pin */ + pin = pwm->hwpwm; + + /* On STMPE2401 and 2403 pins 21,22,23 are used */ + if (stmpe_pwm->stmpe->partnum == STMPE2401 || + stmpe_pwm->stmpe->partnum == STMPE2403) + pin += STMPE_PWM_24XX_PINBASE; + + ret = stmpe_set_altfunc(stmpe_pwm->stmpe, BIT(pin), + STMPE_BLOCK_PWM); + if (ret) { + dev_err(chip->dev, "unable to connect PWM#%u to pin\n", + pwm->hwpwm); + return ret; + } + } + + /* STMPE24XX */ + switch (pwm->hwpwm) { + case 0: + offset = STMPE24XX_PWMIC0; + break; + + case 1: + offset = STMPE24XX_PWMIC1; + break; + + case 2: + offset = STMPE24XX_PWMIC1; + break; + + default: + /* Should not happen as npwm is 3 */ + return -ENODEV; + } + + dev_dbg(chip->dev, "PWM#%u: config duty %d ns, period %d ns\n", + pwm->hwpwm, duty_ns, period_ns); + + if (duty_ns == 0) { + if (stmpe_pwm->stmpe->partnum == STMPE2401) + program[0] = SMAX; /* off all the time */ + + if (stmpe_pwm->stmpe->partnum == STMPE2403) + program[0] = LOAD | 0xff; /* LOAD 0xff */ + + stmpe_pwm->last_duty = 0x00; + } else if (duty_ns == period_ns) { + if (stmpe_pwm->stmpe->partnum == STMPE2401) + program[0] = SMIN; /* on all the time */ + + if (stmpe_pwm->stmpe->partnum == STMPE2403) + program[0] = LOAD | 0x00; /* LOAD 0x00 */ + + stmpe_pwm->last_duty = 0xff; + } else { + u8 value, last = stmpe_pwm->last_duty; + unsigned long duty; + + /* + * Counter goes from 0x00 to 0xff repeatedly at 32768 Hz, + * (means a period of 30517 ns) then this is compared to the + * counter from the ramp, if this is >= PWM counter the output + * is high. With LOAD we can define how much of the cycle it + * is on. + * + * Prescale = 0 -> 2 kHz -> T = 1/f = 488281.25 ns + */ + + /* Scale to 0..0xff */ + duty = duty_ns * 256; + duty = DIV_ROUND_CLOSEST(duty, period_ns); + value = duty; + + if (value == last) { + /* Run the old program */ + if (pwm_is_enabled(pwm)) + stmpe_24xx_pwm_enable(chip, pwm); + + return 0; + } else if (stmpe_pwm->stmpe->partnum == STMPE2403) { + /* STMPE2403 can simply set the right PWM value */ + program[0] = LOAD | value; + program[1] = 0x0000; + } else if (stmpe_pwm->stmpe->partnum == STMPE2401) { + /* STMPE2401 need a complex program */ + u16 incdec = 0x0000; + + if (last < value) + /* Count up */ + incdec = RAMPUP | (value - last); + else + /* Count down */ + incdec = RAMPDOWN | (last - value); + + /* Step to desired value, smoothly */ + program[0] = PRESCALE_512 | STEPTIME_1 | incdec; + + /* Loop eternally to 0x00 */ + program[1] = BRANCH; + } + + dev_dbg(chip->dev, + "PWM#%u: value = %02x, last_duty = %02x, program=%04x,%04x,%04x\n", + pwm->hwpwm, value, last, program[0], program[1], + program[2]); + stmpe_pwm->last_duty = value; + } + + /* + * We can write programs of up to 64 16-bit words into this channel. + */ + for (i = 0; i < ARRAY_SIZE(program); i++) { + u8 value; + + value = (program[i] >> 8) & 0xff; + + ret = stmpe_reg_write(stmpe_pwm->stmpe, offset, value); + if (ret) { + dev_err(chip->dev, "error writing register %02x: %d\n", + offset, ret); + return ret; + } + + value = program[i] & 0xff; + + ret = stmpe_reg_write(stmpe_pwm->stmpe, offset, value); + if (ret) { + dev_err(chip->dev, "error writing register %02x: %d\n", + offset, ret); + return ret; + } + } + + /* If we were enabled, re-enable this PWM */ + if (pwm_is_enabled(pwm)) + stmpe_24xx_pwm_enable(chip, pwm); + + /* Sleep for 200ms so we're sure it will take effect */ + msleep(200); + + dev_dbg(chip->dev, "programmed PWM#%u, %u bytes\n", pwm->hwpwm, i); + + return 0; +} + +static const struct pwm_ops stmpe_24xx_pwm_ops = { + .config = stmpe_24xx_pwm_config, + .enable = stmpe_24xx_pwm_enable, + .disable = stmpe_24xx_pwm_disable, + .owner = THIS_MODULE, +}; + +static int __init stmpe_pwm_probe(struct platform_device *pdev) +{ + struct stmpe *stmpe = dev_get_drvdata(pdev->dev.parent); + struct stmpe_pwm *pwm; + int ret; + + pwm = devm_kzalloc(&pdev->dev, sizeof(*pwm), GFP_KERNEL); + if (!pwm) + return -ENOMEM; + + pwm->stmpe = stmpe; + pwm->chip.dev = &pdev->dev; + pwm->chip.base = -1; + + if (stmpe->partnum == STMPE2401 || stmpe->partnum == STMPE2403) { + pwm->chip.ops = &stmpe_24xx_pwm_ops; + pwm->chip.npwm = 3; + } else { + if (stmpe->partnum == STMPE1601) + dev_err(&pdev->dev, "STMPE1601 not yet supported\n"); + else + dev_err(&pdev->dev, "Unknown STMPE PWM\n"); + + return -ENODEV; + } + + ret = stmpe_enable(stmpe, STMPE_BLOCK_PWM); + if (ret) + return ret; + + ret = pwmchip_add(&pwm->chip); + if (ret) { + stmpe_disable(stmpe, STMPE_BLOCK_PWM); + return ret; + } + + platform_set_drvdata(pdev, pwm); + + return 0; +} + +static struct platform_driver stmpe_pwm_driver = { + .driver = { + .name = "stmpe-pwm", + }, +}; +builtin_platform_driver_probe(stmpe_pwm_driver, stmpe_pwm_probe); diff --git a/drivers/pwm/pwm-tegra.c b/drivers/pwm/pwm-tegra.c index d4de0607b502..e4647840cd6e 100644 --- a/drivers/pwm/pwm-tegra.c +++ b/drivers/pwm/pwm-tegra.c @@ -26,9 +26,11 @@ #include <linux/io.h> #include <linux/module.h> #include <linux/of.h> +#include <linux/of_device.h> #include <linux/pwm.h> #include <linux/platform_device.h> #include <linux/slab.h> +#include <linux/reset.h> #define PWM_ENABLE (1 << 31) #define PWM_DUTY_WIDTH 8 @@ -36,15 +38,20 @@ #define PWM_SCALE_WIDTH 13 #define PWM_SCALE_SHIFT 0 -#define NUM_PWM 4 +struct tegra_pwm_soc { + unsigned int num_channels; +}; struct tegra_pwm_chip { - struct pwm_chip chip; - struct device *dev; + struct pwm_chip chip; + struct device *dev; + + struct clk *clk; + struct reset_control*rst; - struct clk *clk; + void __iomem *regs; - void __iomem *mmio_base; + const struct tegra_pwm_soc *soc; }; static inline struct tegra_pwm_chip *to_tegra_pwm_chip(struct pwm_chip *chip) @@ -54,20 +61,20 @@ static inline struct tegra_pwm_chip *to_tegra_pwm_chip(struct pwm_chip *chip) static inline u32 pwm_readl(struct tegra_pwm_chip *chip, unsigned int num) { - return readl(chip->mmio_base + (num << 4)); + return readl(chip->regs + (num << 4)); } static inline void pwm_writel(struct tegra_pwm_chip *chip, unsigned int num, unsigned long val) { - writel(val, chip->mmio_base + (num << 4)); + writel(val, chip->regs + (num << 4)); } static int tegra_pwm_config(struct pwm_chip *chip, struct pwm_device *pwm, int duty_ns, int period_ns) { struct tegra_pwm_chip *pc = to_tegra_pwm_chip(chip); - unsigned long long c; + unsigned long long c = duty_ns; unsigned long rate, hz; u32 val = 0; int err; @@ -77,7 +84,8 @@ static int tegra_pwm_config(struct pwm_chip *chip, struct pwm_device *pwm, * per (1 << PWM_DUTY_WIDTH) cycles and make sure to round to the * nearest integer during division. */ - c = duty_ns * ((1 << PWM_DUTY_WIDTH) - 1) + period_ns / 2; + c *= (1 << PWM_DUTY_WIDTH); + c += period_ns / 2; do_div(c, period_ns); val = (u32)c << PWM_DUTY_SHIFT; @@ -176,12 +184,13 @@ static int tegra_pwm_probe(struct platform_device *pdev) if (!pwm) return -ENOMEM; + pwm->soc = of_device_get_match_data(&pdev->dev); pwm->dev = &pdev->dev; r = platform_get_resource(pdev, IORESOURCE_MEM, 0); - pwm->mmio_base = devm_ioremap_resource(&pdev->dev, r); - if (IS_ERR(pwm->mmio_base)) - return PTR_ERR(pwm->mmio_base); + pwm->regs = devm_ioremap_resource(&pdev->dev, r); + if (IS_ERR(pwm->regs)) + return PTR_ERR(pwm->regs); platform_set_drvdata(pdev, pwm); @@ -189,14 +198,24 @@ static int tegra_pwm_probe(struct platform_device *pdev) if (IS_ERR(pwm->clk)) return PTR_ERR(pwm->clk); + pwm->rst = devm_reset_control_get(&pdev->dev, "pwm"); + if (IS_ERR(pwm->rst)) { + ret = PTR_ERR(pwm->rst); + dev_err(&pdev->dev, "Reset control is not found: %d\n", ret); + return ret; + } + + reset_control_deassert(pwm->rst); + pwm->chip.dev = &pdev->dev; pwm->chip.ops = &tegra_pwm_ops; pwm->chip.base = -1; - pwm->chip.npwm = NUM_PWM; + pwm->chip.npwm = pwm->soc->num_channels; ret = pwmchip_add(&pwm->chip); if (ret < 0) { dev_err(&pdev->dev, "pwmchip_add() failed: %d\n", ret); + reset_control_assert(pwm->rst); return ret; } @@ -206,12 +225,17 @@ static int tegra_pwm_probe(struct platform_device *pdev) static int tegra_pwm_remove(struct platform_device *pdev) { struct tegra_pwm_chip *pc = platform_get_drvdata(pdev); - int i; + unsigned int i; + int err; if (WARN_ON(!pc)) return -ENODEV; - for (i = 0; i < NUM_PWM; i++) { + err = clk_prepare_enable(pc->clk); + if (err < 0) + return err; + + for (i = 0; i < pc->chip.npwm; i++) { struct pwm_device *pwm = &pc->chip.pwms[i]; if (!pwm_is_enabled(pwm)) @@ -223,12 +247,23 @@ static int tegra_pwm_remove(struct platform_device *pdev) clk_disable_unprepare(pc->clk); } + reset_control_assert(pc->rst); + clk_disable_unprepare(pc->clk); + return pwmchip_remove(&pc->chip); } +static const struct tegra_pwm_soc tegra20_pwm_soc = { + .num_channels = 4, +}; + +static const struct tegra_pwm_soc tegra186_pwm_soc = { + .num_channels = 1, +}; + static const struct of_device_id tegra_pwm_of_match[] = { - { .compatible = "nvidia,tegra20-pwm" }, - { .compatible = "nvidia,tegra30-pwm" }, + { .compatible = "nvidia,tegra20-pwm", .data = &tegra20_pwm_soc }, + { .compatible = "nvidia,tegra186-pwm", .data = &tegra186_pwm_soc }, { } }; diff --git a/drivers/pwm/pwm-tiecap.c b/drivers/pwm/pwm-tiecap.c index 616af764a276..6ec342dd3eea 100644 --- a/drivers/pwm/pwm-tiecap.c +++ b/drivers/pwm/pwm-tiecap.c @@ -27,8 +27,6 @@ #include <linux/pwm.h> #include <linux/of_device.h> -#include "pwm-tipwmss.h" - /* ECAP registers and bits definitions */ #define CAP1 0x08 #define CAP2 0x0C @@ -195,6 +193,7 @@ static const struct pwm_ops ecap_pwm_ops = { }; static const struct of_device_id ecap_of_match[] = { + { .compatible = "ti,am3352-ecap" }, { .compatible = "ti,am33xx-ecap" }, {}, }; @@ -202,11 +201,11 @@ MODULE_DEVICE_TABLE(of, ecap_of_match); static int ecap_pwm_probe(struct platform_device *pdev) { + struct device_node *np = pdev->dev.of_node; int ret; struct resource *r; struct clk *clk; struct ecap_pwm_chip *pc; - u16 status; pc = devm_kzalloc(&pdev->dev, sizeof(*pc), GFP_KERNEL); if (!pc) @@ -214,6 +213,13 @@ static int ecap_pwm_probe(struct platform_device *pdev) clk = devm_clk_get(&pdev->dev, "fck"); if (IS_ERR(clk)) { + if (of_device_is_compatible(np, "ti,am33xx-ecap")) { + dev_warn(&pdev->dev, "Binding is obsolete.\n"); + clk = devm_clk_get(pdev->dev.parent, "fck"); + } + } + + if (IS_ERR(clk)) { dev_err(&pdev->dev, "failed to get clock\n"); return PTR_ERR(clk); } @@ -243,40 +249,15 @@ static int ecap_pwm_probe(struct platform_device *pdev) } pm_runtime_enable(&pdev->dev); - pm_runtime_get_sync(&pdev->dev); - - status = pwmss_submodule_state_change(pdev->dev.parent, - PWMSS_ECAPCLK_EN); - if (!(status & PWMSS_ECAPCLK_EN_ACK)) { - dev_err(&pdev->dev, "PWMSS config space clock enable failed\n"); - ret = -EINVAL; - goto pwmss_clk_failure; - } - - pm_runtime_put_sync(&pdev->dev); platform_set_drvdata(pdev, pc); return 0; - -pwmss_clk_failure: - pm_runtime_put_sync(&pdev->dev); - pm_runtime_disable(&pdev->dev); - pwmchip_remove(&pc->chip); - return ret; } static int ecap_pwm_remove(struct platform_device *pdev) { struct ecap_pwm_chip *pc = platform_get_drvdata(pdev); - pm_runtime_get_sync(&pdev->dev); - /* - * Due to hardware misbehaviour, acknowledge of the stop_req - * is missing. Hence checking of the status bit skipped. - */ - pwmss_submodule_state_change(pdev->dev.parent, PWMSS_ECAPCLK_STOP_REQ); - pm_runtime_put_sync(&pdev->dev); - pm_runtime_disable(&pdev->dev); return pwmchip_remove(&pc->chip); } diff --git a/drivers/pwm/pwm-tiehrpwm.c b/drivers/pwm/pwm-tiehrpwm.c index 6a41e66015b6..b5c6b0636893 100644 --- a/drivers/pwm/pwm-tiehrpwm.c +++ b/drivers/pwm/pwm-tiehrpwm.c @@ -27,8 +27,6 @@ #include <linux/pm_runtime.h> #include <linux/of_device.h> -#include "pwm-tipwmss.h" - /* EHRPWM registers and bits definitions */ /* Time base module registers */ @@ -426,6 +424,7 @@ static const struct pwm_ops ehrpwm_pwm_ops = { }; static const struct of_device_id ehrpwm_of_match[] = { + { .compatible = "ti,am3352-ehrpwm" }, { .compatible = "ti,am33xx-ehrpwm" }, {}, }; @@ -433,11 +432,11 @@ MODULE_DEVICE_TABLE(of, ehrpwm_of_match); static int ehrpwm_pwm_probe(struct platform_device *pdev) { + struct device_node *np = pdev->dev.of_node; int ret; struct resource *r; struct clk *clk; struct ehrpwm_pwm_chip *pc; - u16 status; pc = devm_kzalloc(&pdev->dev, sizeof(*pc), GFP_KERNEL); if (!pc) @@ -445,6 +444,13 @@ static int ehrpwm_pwm_probe(struct platform_device *pdev) clk = devm_clk_get(&pdev->dev, "fck"); if (IS_ERR(clk)) { + if (of_device_is_compatible(np, "ti,am33xx-ecap")) { + dev_warn(&pdev->dev, "Binding is obsolete.\n"); + clk = devm_clk_get(pdev->dev.parent, "fck"); + } + } + + if (IS_ERR(clk)) { dev_err(&pdev->dev, "failed to get clock\n"); return PTR_ERR(clk); } @@ -487,27 +493,9 @@ static int ehrpwm_pwm_probe(struct platform_device *pdev) } pm_runtime_enable(&pdev->dev); - pm_runtime_get_sync(&pdev->dev); - - status = pwmss_submodule_state_change(pdev->dev.parent, - PWMSS_EPWMCLK_EN); - if (!(status & PWMSS_EPWMCLK_EN_ACK)) { - dev_err(&pdev->dev, "PWMSS config space clock enable failed\n"); - ret = -EINVAL; - goto pwmss_clk_failure; - } - - pm_runtime_put_sync(&pdev->dev); platform_set_drvdata(pdev, pc); return 0; - -pwmss_clk_failure: - pm_runtime_put_sync(&pdev->dev); - pm_runtime_disable(&pdev->dev); - pwmchip_remove(&pc->chip); - clk_unprepare(pc->tbclk); - return ret; } static int ehrpwm_pwm_remove(struct platform_device *pdev) @@ -516,14 +504,6 @@ static int ehrpwm_pwm_remove(struct platform_device *pdev) clk_unprepare(pc->tbclk); - pm_runtime_get_sync(&pdev->dev); - /* - * Due to hardware misbehaviour, acknowledge of the stop_req - * is missing. Hence checking of the status bit skipped. - */ - pwmss_submodule_state_change(pdev->dev.parent, PWMSS_EPWMCLK_STOP_REQ); - pm_runtime_put_sync(&pdev->dev); - pm_runtime_put_sync(&pdev->dev); pm_runtime_disable(&pdev->dev); return pwmchip_remove(&pc->chip); diff --git a/drivers/pwm/pwm-tipwmss.c b/drivers/pwm/pwm-tipwmss.c index 5cf65a15d021..829f4991c96f 100644 --- a/drivers/pwm/pwm-tipwmss.c +++ b/drivers/pwm/pwm-tipwmss.c @@ -22,32 +22,6 @@ #include <linux/pm_runtime.h> #include <linux/of_device.h> -#include "pwm-tipwmss.h" - -#define PWMSS_CLKCONFIG 0x8 /* Clock gating reg */ -#define PWMSS_CLKSTATUS 0xc /* Clock gating status reg */ - -struct pwmss_info { - void __iomem *mmio_base; - struct mutex pwmss_lock; - u16 pwmss_clkconfig; -}; - -u16 pwmss_submodule_state_change(struct device *dev, int set) -{ - struct pwmss_info *info = dev_get_drvdata(dev); - u16 val; - - mutex_lock(&info->pwmss_lock); - val = readw(info->mmio_base + PWMSS_CLKCONFIG); - val |= set; - writew(val , info->mmio_base + PWMSS_CLKCONFIG); - mutex_unlock(&info->pwmss_lock); - - return readw(info->mmio_base + PWMSS_CLKSTATUS); -} -EXPORT_SYMBOL(pwmss_submodule_state_change); - static const struct of_device_id pwmss_of_match[] = { { .compatible = "ti,am33xx-pwmss" }, {}, @@ -57,24 +31,10 @@ MODULE_DEVICE_TABLE(of, pwmss_of_match); static int pwmss_probe(struct platform_device *pdev) { int ret; - struct resource *r; - struct pwmss_info *info; struct device_node *node = pdev->dev.of_node; - info = devm_kzalloc(&pdev->dev, sizeof(*info), GFP_KERNEL); - if (!info) - return -ENOMEM; - - mutex_init(&info->pwmss_lock); - - r = platform_get_resource(pdev, IORESOURCE_MEM, 0); - info->mmio_base = devm_ioremap_resource(&pdev->dev, r); - if (IS_ERR(info->mmio_base)) - return PTR_ERR(info->mmio_base); - pm_runtime_enable(&pdev->dev); pm_runtime_get_sync(&pdev->dev); - platform_set_drvdata(pdev, info); /* Populate all the child nodes here... */ ret = of_platform_populate(node, NULL, NULL, &pdev->dev); @@ -86,30 +46,21 @@ static int pwmss_probe(struct platform_device *pdev) static int pwmss_remove(struct platform_device *pdev) { - struct pwmss_info *info = platform_get_drvdata(pdev); - pm_runtime_put_sync(&pdev->dev); pm_runtime_disable(&pdev->dev); - mutex_destroy(&info->pwmss_lock); return 0; } #ifdef CONFIG_PM_SLEEP static int pwmss_suspend(struct device *dev) { - struct pwmss_info *info = dev_get_drvdata(dev); - - info->pwmss_clkconfig = readw(info->mmio_base + PWMSS_CLKCONFIG); pm_runtime_put_sync(dev); return 0; } static int pwmss_resume(struct device *dev) { - struct pwmss_info *info = dev_get_drvdata(dev); - pm_runtime_get_sync(dev); - writew(info->pwmss_clkconfig, info->mmio_base + PWMSS_CLKCONFIG); return 0; } #endif diff --git a/drivers/pwm/pwm-tipwmss.h b/drivers/pwm/pwm-tipwmss.h deleted file mode 100644 index 10ad8040408b..000000000000 --- a/drivers/pwm/pwm-tipwmss.h +++ /dev/null @@ -1,39 +0,0 @@ -/* - * TI PWM Subsystem driver - * - * Copyright (C) 2012 Texas Instruments Incorporated - http://www.ti.com/ - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - */ - -#ifndef __TIPWMSS_H -#define __TIPWMSS_H - -/* PWM substem clock gating */ -#define PWMSS_ECAPCLK_EN BIT(0) -#define PWMSS_ECAPCLK_STOP_REQ BIT(1) -#define PWMSS_EPWMCLK_EN BIT(8) -#define PWMSS_EPWMCLK_STOP_REQ BIT(9) - -#define PWMSS_ECAPCLK_EN_ACK BIT(0) -#define PWMSS_EPWMCLK_EN_ACK BIT(8) - -#ifdef CONFIG_PWM_TIPWMSS -extern u16 pwmss_submodule_state_change(struct device *dev, int set); -#else -static inline u16 pwmss_submodule_state_change(struct device *dev, int set) -{ - /* return success status value */ - return 0xFFFF; -} -#endif -#endif /* __TIPWMSS_H */ diff --git a/drivers/pwm/sysfs.c b/drivers/pwm/sysfs.c index 01695d48dd54..18ed725594c3 100644 --- a/drivers/pwm/sysfs.c +++ b/drivers/pwm/sysfs.c @@ -208,16 +208,33 @@ static ssize_t polarity_store(struct device *child, return ret ? : size; } +static ssize_t capture_show(struct device *child, + struct device_attribute *attr, + char *buf) +{ + struct pwm_device *pwm = child_to_pwm_device(child); + struct pwm_capture result; + int ret; + + ret = pwm_capture(pwm, &result, jiffies_to_msecs(HZ)); + if (ret) + return ret; + + return sprintf(buf, "%u %u\n", result.period, result.duty_cycle); +} + static DEVICE_ATTR_RW(period); static DEVICE_ATTR_RW(duty_cycle); static DEVICE_ATTR_RW(enable); static DEVICE_ATTR_RW(polarity); +static DEVICE_ATTR_RO(capture); static struct attribute *pwm_attrs[] = { &dev_attr_period.attr, &dev_attr_duty_cycle.attr, &dev_attr_enable.attr, &dev_attr_polarity.attr, + &dev_attr_capture.attr, NULL }; ATTRIBUTE_GROUPS(pwm); diff --git a/drivers/rapidio/rio_cm.c b/drivers/rapidio/rio_cm.c index cecc15a880de..3fa17ac8df54 100644 --- a/drivers/rapidio/rio_cm.c +++ b/drivers/rapidio/rio_cm.c @@ -1080,8 +1080,8 @@ static int riocm_send_ack(struct rio_channel *ch) static struct rio_channel *riocm_ch_accept(u16 ch_id, u16 *new_ch_id, long timeout) { - struct rio_channel *ch = NULL; - struct rio_channel *new_ch = NULL; + struct rio_channel *ch; + struct rio_channel *new_ch; struct conn_req *req; struct cm_peer *peer; int found = 0; @@ -1155,6 +1155,7 @@ static struct rio_channel *riocm_ch_accept(u16 ch_id, u16 *new_ch_id, spin_unlock_bh(&ch->lock); riocm_put_channel(ch); + ch = NULL; kfree(req); down_read(&rdev_sem); @@ -1172,7 +1173,7 @@ static struct rio_channel *riocm_ch_accept(u16 ch_id, u16 *new_ch_id, if (!found) { /* If peer device object not found, simply ignore the request */ err = -ENODEV; - goto err_nodev; + goto err_put_new_ch; } new_ch->rdev = peer->rdev; @@ -1184,15 +1185,16 @@ static struct rio_channel *riocm_ch_accept(u16 ch_id, u16 *new_ch_id, *new_ch_id = new_ch->id; return new_ch; + +err_put_new_ch: + spin_lock_bh(&idr_lock); + idr_remove(&ch_idr, new_ch->id); + spin_unlock_bh(&idr_lock); + riocm_put_channel(new_ch); + err_put: - riocm_put_channel(ch); -err_nodev: - if (new_ch) { - spin_lock_bh(&idr_lock); - idr_remove(&ch_idr, new_ch->id); - spin_unlock_bh(&idr_lock); - riocm_put_channel(new_ch); - } + if (ch) + riocm_put_channel(ch); *new_ch_id = 0; return ERR_PTR(err); } diff --git a/drivers/regulator/pwm-regulator.c b/drivers/regulator/pwm-regulator.c index 666bc3bb52ef..c24524242da2 100644 --- a/drivers/regulator/pwm-regulator.c +++ b/drivers/regulator/pwm-regulator.c @@ -22,6 +22,12 @@ #include <linux/pwm.h> #include <linux/gpio/consumer.h> +struct pwm_continuous_reg_data { + unsigned int min_uV_dutycycle; + unsigned int max_uV_dutycycle; + unsigned int dutycycle_unit; +}; + struct pwm_regulator_data { /* Shared */ struct pwm_device *pwm; @@ -29,6 +35,9 @@ struct pwm_regulator_data { /* Voltage table */ struct pwm_voltages *duty_cycle_table; + /* Continuous mode info */ + struct pwm_continuous_reg_data continuous; + /* regulator descriptor */ struct regulator_desc desc; @@ -37,9 +46,6 @@ struct pwm_regulator_data { int state; - /* Continuous voltage */ - int volt_uV; - /* Enable GPIO */ struct gpio_desc *enb_gpio; }; @@ -52,10 +58,31 @@ struct pwm_voltages { /** * Voltage table call-backs */ +static void pwm_regulator_init_state(struct regulator_dev *rdev) +{ + struct pwm_regulator_data *drvdata = rdev_get_drvdata(rdev); + struct pwm_state pwm_state; + unsigned int dutycycle; + int i; + + pwm_get_state(drvdata->pwm, &pwm_state); + dutycycle = pwm_get_relative_duty_cycle(&pwm_state, 100); + + for (i = 0; i < rdev->desc->n_voltages; i++) { + if (dutycycle == drvdata->duty_cycle_table[i].dutycycle) { + drvdata->state = i; + return; + } + } +} + static int pwm_regulator_get_voltage_sel(struct regulator_dev *rdev) { struct pwm_regulator_data *drvdata = rdev_get_drvdata(rdev); + if (drvdata->state < 0) + pwm_regulator_init_state(rdev); + return drvdata->state; } @@ -63,16 +90,14 @@ static int pwm_regulator_set_voltage_sel(struct regulator_dev *rdev, unsigned selector) { struct pwm_regulator_data *drvdata = rdev_get_drvdata(rdev); - struct pwm_args pargs; - int dutycycle; + struct pwm_state pstate; int ret; - pwm_get_args(drvdata->pwm, &pargs); - - dutycycle = (pargs.period * - drvdata->duty_cycle_table[selector].dutycycle) / 100; + pwm_init_state(drvdata->pwm, &pstate); + pwm_set_relative_duty_cycle(&pstate, + drvdata->duty_cycle_table[selector].dutycycle, 100); - ret = pwm_config(drvdata->pwm, dutycycle, pargs.period); + ret = pwm_apply_state(drvdata->pwm, &pstate); if (ret) { dev_err(&rdev->dev, "Failed to configure PWM: %d\n", ret); return ret; @@ -129,57 +154,90 @@ static int pwm_regulator_is_enabled(struct regulator_dev *dev) static int pwm_regulator_get_voltage(struct regulator_dev *rdev) { struct pwm_regulator_data *drvdata = rdev_get_drvdata(rdev); + unsigned int min_uV_duty = drvdata->continuous.min_uV_dutycycle; + unsigned int max_uV_duty = drvdata->continuous.max_uV_dutycycle; + unsigned int duty_unit = drvdata->continuous.dutycycle_unit; + int min_uV = rdev->constraints->min_uV; + int max_uV = rdev->constraints->max_uV; + int diff_uV = max_uV - min_uV; + struct pwm_state pstate; + unsigned int diff_duty; + unsigned int voltage; + + pwm_get_state(drvdata->pwm, &pstate); + + voltage = pwm_get_relative_duty_cycle(&pstate, duty_unit); + + /* + * The dutycycle for min_uV might be greater than the one for max_uV. + * This is happening when the user needs an inversed polarity, but the + * PWM device does not support inversing it in hardware. + */ + if (max_uV_duty < min_uV_duty) { + voltage = min_uV_duty - voltage; + diff_duty = min_uV_duty - max_uV_duty; + } else { + voltage = voltage - min_uV_duty; + diff_duty = max_uV_duty - min_uV_duty; + } - return drvdata->volt_uV; + voltage = DIV_ROUND_CLOSEST_ULL((u64)voltage * diff_uV, diff_duty); + + return voltage + min_uV; } static int pwm_regulator_set_voltage(struct regulator_dev *rdev, - int min_uV, int max_uV, - unsigned *selector) + int req_min_uV, int req_max_uV, + unsigned int *selector) { struct pwm_regulator_data *drvdata = rdev_get_drvdata(rdev); + unsigned int min_uV_duty = drvdata->continuous.min_uV_dutycycle; + unsigned int max_uV_duty = drvdata->continuous.max_uV_dutycycle; + unsigned int duty_unit = drvdata->continuous.dutycycle_unit; unsigned int ramp_delay = rdev->constraints->ramp_delay; - struct pwm_args pargs; - unsigned int req_diff = min_uV - rdev->constraints->min_uV; - unsigned int diff; - unsigned int duty_pulse; - u64 req_period; - u32 rem; + int min_uV = rdev->constraints->min_uV; + int max_uV = rdev->constraints->max_uV; + int diff_uV = max_uV - min_uV; + struct pwm_state pstate; int old_uV = pwm_regulator_get_voltage(rdev); + unsigned int diff_duty; + unsigned int dutycycle; int ret; - pwm_get_args(drvdata->pwm, &pargs); - diff = rdev->constraints->max_uV - rdev->constraints->min_uV; + pwm_init_state(drvdata->pwm, &pstate); - /* First try to find out if we get the iduty cycle time which is - * factor of PWM period time. If (request_diff_to_min * pwm_period) - * is perfect divided by voltage_range_diff then it is possible to - * get duty cycle time which is factor of PWM period. This will help - * to get output voltage nearer to requested value as there is no - * calculation loss. + /* + * The dutycycle for min_uV might be greater than the one for max_uV. + * This is happening when the user needs an inversed polarity, but the + * PWM device does not support inversing it in hardware. */ - req_period = req_diff * pargs.period; - div_u64_rem(req_period, diff, &rem); - if (!rem) { - do_div(req_period, diff); - duty_pulse = (unsigned int)req_period; - } else { - duty_pulse = (pargs.period / 100) * ((req_diff * 100) / diff); - } + if (max_uV_duty < min_uV_duty) + diff_duty = min_uV_duty - max_uV_duty; + else + diff_duty = max_uV_duty - min_uV_duty; + + dutycycle = DIV_ROUND_CLOSEST_ULL((u64)(req_min_uV - min_uV) * + diff_duty, + diff_uV); + + if (max_uV_duty < min_uV_duty) + dutycycle = min_uV_duty - dutycycle; + else + dutycycle = min_uV_duty + dutycycle; + + pwm_set_relative_duty_cycle(&pstate, dutycycle, duty_unit); - ret = pwm_config(drvdata->pwm, duty_pulse, pargs.period); + ret = pwm_apply_state(drvdata->pwm, &pstate); if (ret) { dev_err(&rdev->dev, "Failed to configure PWM: %d\n", ret); return ret; } - drvdata->volt_uV = min_uV; - if ((ramp_delay == 0) || !pwm_regulator_is_enabled(rdev)) return 0; /* Ramp delay is in uV/uS. Adjust to uS and delay */ - ramp_delay = DIV_ROUND_UP(abs(min_uV - old_uV), ramp_delay); + ramp_delay = DIV_ROUND_UP(abs(req_min_uV - old_uV), ramp_delay); usleep_range(ramp_delay, ramp_delay + DIV_ROUND_UP(ramp_delay, 10)); return 0; @@ -239,6 +297,7 @@ static int pwm_regulator_init_table(struct platform_device *pdev, return ret; } + drvdata->state = -EINVAL; drvdata->duty_cycle_table = duty_cycle_table; memcpy(&drvdata->ops, &pwm_regulator_voltage_table_ops, sizeof(drvdata->ops)); @@ -251,11 +310,28 @@ static int pwm_regulator_init_table(struct platform_device *pdev, static int pwm_regulator_init_continuous(struct platform_device *pdev, struct pwm_regulator_data *drvdata) { + u32 dutycycle_range[2] = { 0, 100 }; + u32 dutycycle_unit = 100; + memcpy(&drvdata->ops, &pwm_regulator_voltage_continuous_ops, sizeof(drvdata->ops)); drvdata->desc.ops = &drvdata->ops; drvdata->desc.continuous_voltage_range = true; + of_property_read_u32_array(pdev->dev.of_node, + "pwm-dutycycle-range", + dutycycle_range, 2); + of_property_read_u32(pdev->dev.of_node, "pwm-dutycycle-unit", + &dutycycle_unit); + + if (dutycycle_range[0] > dutycycle_unit || + dutycycle_range[1] > dutycycle_unit) + return -EINVAL; + + drvdata->continuous.dutycycle_unit = dutycycle_unit; + drvdata->continuous.min_uV_dutycycle = dutycycle_range[0]; + drvdata->continuous.max_uV_dutycycle = dutycycle_range[1]; + return 0; } @@ -316,11 +392,9 @@ static int pwm_regulator_probe(struct platform_device *pdev) return ret; } - /* - * FIXME: pwm_apply_args() should be removed when switching to the - * atomic PWM API. - */ - pwm_apply_args(drvdata->pwm); + ret = pwm_adjust_config(drvdata->pwm); + if (ret) + return ret; regulator = devm_regulator_register(&pdev->dev, &drvdata->desc, &config); diff --git a/drivers/remoteproc/qcom_q6v5_pil.c b/drivers/remoteproc/qcom_q6v5_pil.c index 24791886219a..2a1b2c7d8f2c 100644 --- a/drivers/remoteproc/qcom_q6v5_pil.c +++ b/drivers/remoteproc/qcom_q6v5_pil.c @@ -349,13 +349,12 @@ static void q6v5proc_halt_axi_port(struct q6v5 *qproc, static int q6v5_mpss_init_image(struct q6v5 *qproc, const struct firmware *fw) { - DEFINE_DMA_ATTRS(attrs); + unsigned long dma_attrs = DMA_ATTR_FORCE_CONTIGUOUS; dma_addr_t phys; void *ptr; int ret; - dma_set_attr(DMA_ATTR_FORCE_CONTIGUOUS, &attrs); - ptr = dma_alloc_attrs(qproc->dev, fw->size, &phys, GFP_KERNEL, &attrs); + ptr = dma_alloc_attrs(qproc->dev, fw->size, &phys, GFP_KERNEL, dma_attrs); if (!ptr) { dev_err(qproc->dev, "failed to allocate mdt buffer\n"); return -ENOMEM; @@ -372,7 +371,7 @@ static int q6v5_mpss_init_image(struct q6v5 *qproc, const struct firmware *fw) else if (ret < 0) dev_err(qproc->dev, "MPSS header authentication failed: %d\n", ret); - dma_free_attrs(qproc->dev, fw->size, ptr, phys, &attrs); + dma_free_attrs(qproc->dev, fw->size, ptr, phys, dma_attrs); return ret < 0 ? ret : 0; } diff --git a/drivers/rtc/Kconfig b/drivers/rtc/Kconfig index 18639e0cb6e2..e215f50794b6 100644 --- a/drivers/rtc/Kconfig +++ b/drivers/rtc/Kconfig @@ -5,6 +5,10 @@ config RTC_LIB bool +config RTC_MC146818_LIB + bool + select RTC_LIB + menuconfig RTC_CLASS bool "Real Time Clock" default n @@ -574,10 +578,10 @@ config RTC_DRV_EM3027 will be called rtc-em3027. config RTC_DRV_RV8803 - tristate "Micro Crystal RV8803" + tristate "Micro Crystal RV8803, Epson RX8900" help - If you say yes here you get support for the Micro Crystal - RV8803 RTC chips. + If you say yes here you get support for the Micro Crystal RV8803 and + Epson RX8900 RTC chips. This driver can also be built as a module. If so, the module will be called rtc-rv8803. @@ -670,6 +674,18 @@ config RTC_DRV_DS1390 This driver can also be built as a module. If so, the module will be called rtc-ds1390. +config RTC_DRV_MAX6916 + tristate "Maxim MAX6916" + help + If you say yes here you will get support for the + Maxim MAX6916 SPI RTC chip. + + This driver only supports the RTC feature, and not other chip + features such as alarms. + + This driver can also be built as a module. If so, the module + will be called rtc-max6916. + config RTC_DRV_R9701 tristate "Epson RTC-9701JE" help @@ -795,8 +811,9 @@ comment "Platform RTC drivers" config RTC_DRV_CMOS tristate "PC-style 'CMOS'" - depends on X86 || ARM || M32R || PPC || MIPS || SPARC64 + depends on X86 || ARM || M32R || PPC || MIPS || SPARC64 || MN10300 default y if X86 + select RTC_MC146818_LIB help Say "yes" here to get direct support for the real time clock found in every PC or ACPI-based system, and some other boards. @@ -815,6 +832,7 @@ config RTC_DRV_CMOS config RTC_DRV_ALPHA bool "Alpha PC-style CMOS" depends on ALPHA + select RTC_MC146818_LIB default y help Direct support for the real-time clock found on every Alpha diff --git a/drivers/rtc/Makefile b/drivers/rtc/Makefile index ea2833723fa9..7cf7ad559c79 100644 --- a/drivers/rtc/Makefile +++ b/drivers/rtc/Makefile @@ -8,6 +8,7 @@ obj-$(CONFIG_RTC_LIB) += rtc-lib.o obj-$(CONFIG_RTC_HCTOSYS) += hctosys.o obj-$(CONFIG_RTC_SYSTOHC) += systohc.o obj-$(CONFIG_RTC_CLASS) += rtc-core.o +obj-$(CONFIG_RTC_MC146818_LIB) += rtc-mc146818-lib.o rtc-core-y := class.o interface.o ifdef CONFIG_RTC_DRV_EFI @@ -85,6 +86,7 @@ obj-$(CONFIG_RTC_DRV_M48T59) += rtc-m48t59.o obj-$(CONFIG_RTC_DRV_M48T86) += rtc-m48t86.o obj-$(CONFIG_RTC_DRV_MAX6900) += rtc-max6900.o obj-$(CONFIG_RTC_DRV_MAX6902) += rtc-max6902.o +obj-$(CONFIG_RTC_DRV_MAX6916) += rtc-max6916.o obj-$(CONFIG_RTC_DRV_MAX77686) += rtc-max77686.o obj-$(CONFIG_RTC_DRV_MAX8907) += rtc-max8907.o obj-$(CONFIG_RTC_DRV_MAX8925) += rtc-max8925.o diff --git a/drivers/rtc/interface.c b/drivers/rtc/interface.c index 9ef5f6f89f98..84a52db9b05f 100644 --- a/drivers/rtc/interface.c +++ b/drivers/rtc/interface.c @@ -104,7 +104,17 @@ static int rtc_read_alarm_internal(struct rtc_device *rtc, struct rtc_wkalrm *al else if (!rtc->ops->read_alarm) err = -EINVAL; else { - memset(alarm, 0, sizeof(struct rtc_wkalrm)); + alarm->enabled = 0; + alarm->pending = 0; + alarm->time.tm_sec = -1; + alarm->time.tm_min = -1; + alarm->time.tm_hour = -1; + alarm->time.tm_mday = -1; + alarm->time.tm_mon = -1; + alarm->time.tm_year = -1; + alarm->time.tm_wday = -1; + alarm->time.tm_yday = -1; + alarm->time.tm_isdst = -1; err = rtc->ops->read_alarm(rtc->dev.parent, alarm); } @@ -383,7 +393,7 @@ int rtc_initialize_alarm(struct rtc_device *rtc, struct rtc_wkalrm *alarm) rtc->aie_timer.node.expires = rtc_tm_to_ktime(alarm->time); rtc->aie_timer.period = ktime_set(0, 0); - /* Alarm has to be enabled & in the futrure for us to enqueue it */ + /* Alarm has to be enabled & in the future for us to enqueue it */ if (alarm->enabled && (rtc_tm_to_ktime(now).tv64 < rtc->aie_timer.node.expires.tv64)) { @@ -395,8 +405,6 @@ int rtc_initialize_alarm(struct rtc_device *rtc, struct rtc_wkalrm *alarm) } EXPORT_SYMBOL_GPL(rtc_initialize_alarm); - - int rtc_alarm_irq_enable(struct rtc_device *rtc, unsigned int enabled) { int err = mutex_lock_interruptible(&rtc->ops_lock); @@ -748,9 +756,23 @@ EXPORT_SYMBOL_GPL(rtc_irq_set_freq); */ static int rtc_timer_enqueue(struct rtc_device *rtc, struct rtc_timer *timer) { + struct timerqueue_node *next = timerqueue_getnext(&rtc->timerqueue); + struct rtc_time tm; + ktime_t now; + timer->enabled = 1; + __rtc_read_time(rtc, &tm); + now = rtc_tm_to_ktime(tm); + + /* Skip over expired timers */ + while (next) { + if (next->expires.tv64 >= now.tv64) + break; + next = timerqueue_iterate_next(next); + } + timerqueue_add(&rtc->timerqueue, &timer->node); - if (&timer->node == timerqueue_getnext(&rtc->timerqueue)) { + if (!next) { struct rtc_wkalrm alarm; int err; alarm.time = rtc_ktime_to_tm(timer->node.expires); diff --git a/drivers/rtc/rtc-abx80x.c b/drivers/rtc/rtc-abx80x.c index ba0d61934d35..fea9a60b06cf 100644 --- a/drivers/rtc/rtc-abx80x.c +++ b/drivers/rtc/rtc-abx80x.c @@ -643,17 +643,15 @@ static int abx80x_probe(struct i2c_client *client, return err; } - err = devm_add_action(&client->dev, rtc_calib_remove_sysfs_group, - &client->dev); - if (err) { - rtc_calib_remove_sysfs_group(&client->dev); + err = devm_add_action_or_reset(&client->dev, + rtc_calib_remove_sysfs_group, + &client->dev); + if (err) dev_err(&client->dev, "Failed to add sysfs cleanup action: %d\n", err); - return err; - } - return 0; + return err; } static int abx80x_remove(struct i2c_client *client) diff --git a/drivers/rtc/rtc-asm9260.c b/drivers/rtc/rtc-asm9260.c index 355fdb97a006..5219916ce11d 100644 --- a/drivers/rtc/rtc-asm9260.c +++ b/drivers/rtc/rtc-asm9260.c @@ -343,7 +343,6 @@ static struct platform_driver asm9260_rtc_driver = { .remove = asm9260_rtc_remove, .driver = { .name = "asm9260-rtc", - .owner = THIS_MODULE, .of_match_table = asm9260_dt_ids, }, }; diff --git a/drivers/rtc/rtc-at91sam9.c b/drivers/rtc/rtc-at91sam9.c index 99732e6f8c3b..7418a763ce52 100644 --- a/drivers/rtc/rtc-at91sam9.c +++ b/drivers/rtc/rtc-at91sam9.c @@ -375,6 +375,7 @@ static int at91_rtc_probe(struct platform_device *pdev) if (!rtc) return -ENOMEM; + spin_lock_init(&rtc->lock); rtc->irq = irq; /* platform setup code should have handled this; sigh */ diff --git a/drivers/rtc/rtc-cmos.c b/drivers/rtc/rtc-cmos.c index fbe9c72438e1..43745cac0141 100644 --- a/drivers/rtc/rtc-cmos.c +++ b/drivers/rtc/rtc-cmos.c @@ -43,7 +43,7 @@ #include <linux/of_platform.h> /* this is for "generic access to PC-style RTC" using CMOS_READ/CMOS_WRITE */ -#include <asm-generic/rtc.h> +#include <linux/mc146818rtc.h> struct cmos_rtc { struct rtc_device *rtc; @@ -190,10 +190,10 @@ static inline void cmos_write_bank2(unsigned char val, unsigned char addr) static int cmos_read_time(struct device *dev, struct rtc_time *t) { /* REVISIT: if the clock has a "century" register, use - * that instead of the heuristic in get_rtc_time(). + * that instead of the heuristic in mc146818_get_time(). * That'll make Y3K compatility (year > 2070) easy! */ - get_rtc_time(t); + mc146818_get_time(t); return 0; } @@ -205,7 +205,7 @@ static int cmos_set_time(struct device *dev, struct rtc_time *t) * takes effect exactly 500ms after we write the register. * (Also queueing and other delays before we get this far.) */ - return set_rtc_time(t); + return mc146818_set_time(t); } static int cmos_read_alarm(struct device *dev, struct rtc_wkalrm *t) @@ -220,8 +220,6 @@ static int cmos_read_alarm(struct device *dev, struct rtc_wkalrm *t) * Some also support day and month, for alarms up to a year in * the future. */ - t->time.tm_mday = -1; - t->time.tm_mon = -1; spin_lock_irq(&rtc_lock); t->time.tm_sec = CMOS_READ(RTC_SECONDS_ALARM); @@ -272,7 +270,6 @@ static int cmos_read_alarm(struct device *dev, struct rtc_wkalrm *t) } } } - t->time.tm_year = -1; t->enabled = !!(rtc_control & RTC_AIE); t->pending = 0; @@ -630,7 +627,7 @@ cmos_do_probe(struct device *dev, struct resource *ports, int rtc_irq) address_space = 64; #elif defined(__i386__) || defined(__x86_64__) || defined(__arm__) \ || defined(__sparc__) || defined(__mips__) \ - || defined(__powerpc__) + || defined(__powerpc__) || defined(CONFIG_MN10300) address_space = 128; #else #warning Assuming 128 bytes of RTC+NVRAM address space, not 64 bytes. @@ -1142,14 +1139,14 @@ static __init void cmos_of_init(struct platform_device *pdev) if (val) CMOS_WRITE(be32_to_cpup(val), RTC_FREQ_SELECT); - get_rtc_time(&time); + cmos_read_time(&pdev->dev, &time); ret = rtc_valid_tm(&time); if (ret) { struct rtc_time def_time = { .tm_year = 1, .tm_mday = 1, }; - set_rtc_time(&def_time); + cmos_set_time(&pdev->dev, &def_time); } } #else diff --git a/drivers/rtc/rtc-da9052.c b/drivers/rtc/rtc-da9052.c index a20bcf0e33cd..4273377562ec 100644 --- a/drivers/rtc/rtc-da9052.c +++ b/drivers/rtc/rtc-da9052.c @@ -85,6 +85,7 @@ static int da9052_read_alarm(struct da9052_rtc *rtc, struct rtc_time *rtc_tm) rtc_tm->tm_mday = v[0][2] & DA9052_RTC_DAY; rtc_tm->tm_hour = v[0][1] & DA9052_RTC_HOUR; rtc_tm->tm_min = v[0][0] & DA9052_RTC_MIN; + rtc_tm->tm_sec = 0; ret = rtc_valid_tm(rtc_tm); return ret; diff --git a/drivers/rtc/rtc-da9055.c b/drivers/rtc/rtc-da9055.c index 7ec0872d5e3b..678af8648c45 100644 --- a/drivers/rtc/rtc-da9055.c +++ b/drivers/rtc/rtc-da9055.c @@ -74,6 +74,7 @@ static int da9055_read_alarm(struct da9055 *da9055, struct rtc_time *rtc_tm) rtc_tm->tm_mday = v[2] & DA9055_RTC_ALM_DAY; rtc_tm->tm_hour = v[1] & DA9055_RTC_ALM_HOUR; rtc_tm->tm_min = v[0] & DA9055_RTC_ALM_MIN; + rtc_tm->tm_sec = 0; return rtc_valid_tm(rtc_tm); } diff --git a/drivers/rtc/rtc-davinci.c b/drivers/rtc/rtc-davinci.c index c5432bf64e1c..dba60c1dfce2 100644 --- a/drivers/rtc/rtc-davinci.c +++ b/drivers/rtc/rtc-davinci.c @@ -388,6 +388,8 @@ static int davinci_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alm) u8 day0, day1; unsigned long flags; + alm->time.tm_sec = 0; + spin_lock_irqsave(&davinci_rtc_lock, flags); davinci_rtcss_calendar_wait(davinci_rtc); diff --git a/drivers/rtc/rtc-ds1286.c b/drivers/rtc/rtc-ds1286.c index 756e509f6ed2..ef75c349dff9 100644 --- a/drivers/rtc/rtc-ds1286.c +++ b/drivers/rtc/rtc-ds1286.c @@ -16,7 +16,7 @@ #include <linux/rtc.h> #include <linux/platform_device.h> #include <linux/bcd.h> -#include <linux/ds1286.h> +#include <linux/rtc/ds1286.h> #include <linux/io.h> #include <linux/slab.h> diff --git a/drivers/rtc/rtc-ds1305.c b/drivers/rtc/rtc-ds1305.c index 8e41c4613e51..72b22935eb62 100644 --- a/drivers/rtc/rtc-ds1305.c +++ b/drivers/rtc/rtc-ds1305.c @@ -313,13 +313,6 @@ static int ds1305_get_alarm(struct device *dev, struct rtc_wkalrm *alm) alm->time.tm_sec = bcd2bin(buf[DS1305_SEC]); alm->time.tm_min = bcd2bin(buf[DS1305_MIN]); alm->time.tm_hour = bcd2hour(buf[DS1305_HOUR]); - alm->time.tm_mday = -1; - alm->time.tm_mon = -1; - alm->time.tm_year = -1; - /* next three fields are unused by Linux */ - alm->time.tm_wday = -1; - alm->time.tm_mday = -1; - alm->time.tm_isdst = -1; return 0; } diff --git a/drivers/rtc/rtc-ds1307.c b/drivers/rtc/rtc-ds1307.c index 821d9c089cdb..8e1c5cb6ece6 100644 --- a/drivers/rtc/rtc-ds1307.c +++ b/drivers/rtc/rtc-ds1307.c @@ -482,11 +482,6 @@ static int ds1337_read_alarm(struct device *dev, struct rtc_wkalrm *t) t->time.tm_min = bcd2bin(ds1307->regs[1] & 0x7f); t->time.tm_hour = bcd2bin(ds1307->regs[2] & 0x3f); t->time.tm_mday = bcd2bin(ds1307->regs[3] & 0x3f); - t->time.tm_mon = -1; - t->time.tm_year = -1; - t->time.tm_wday = -1; - t->time.tm_yday = -1; - t->time.tm_isdst = -1; /* ... and status */ t->enabled = !!(ds1307->regs[7] & DS1337_BIT_A1IE); @@ -602,6 +597,8 @@ static const struct rtc_class_ops ds13xx_rtc_ops = { * Alarm support for mcp794xx devices. */ +#define MCP794XX_REG_WEEKDAY 0x3 +#define MCP794XX_REG_WEEKDAY_WDAY_MASK 0x7 #define MCP794XX_REG_CONTROL 0x07 # define MCP794XX_BIT_ALM0_EN 0x10 # define MCP794XX_BIT_ALM1_EN 0x20 @@ -1231,13 +1228,16 @@ static int ds1307_probe(struct i2c_client *client, { struct ds1307 *ds1307; int err = -ENODEV; - int tmp; + int tmp, wday; struct chip_desc *chip = &chips[id->driver_data]; struct i2c_adapter *adapter = to_i2c_adapter(client->dev.parent); bool want_irq = false; bool ds1307_can_wakeup_device = false; unsigned char *buf; struct ds1307_platform_data *pdata = dev_get_platdata(&client->dev); + struct rtc_time tm; + unsigned long timestamp; + irq_handler_t irq_handler = ds1307_irq; static const int bbsqi_bitpos[] = { @@ -1526,6 +1526,27 @@ read_rtc: bin2bcd(tmp)); } + /* + * Some IPs have weekday reset value = 0x1 which might not correct + * hence compute the wday using the current date/month/year values + */ + ds1307_get_time(&client->dev, &tm); + wday = tm.tm_wday; + timestamp = rtc_tm_to_time64(&tm); + rtc_time64_to_tm(timestamp, &tm); + + /* + * Check if reset wday is different from the computed wday + * If different then set the wday which we computed using + * timestamp + */ + if (wday != tm.tm_wday) { + wday = i2c_smbus_read_byte_data(client, MCP794XX_REG_WEEKDAY); + wday = wday & ~MCP794XX_REG_WEEKDAY_WDAY_MASK; + wday = wday | (tm.tm_wday + 1); + i2c_smbus_write_byte_data(client, MCP794XX_REG_WEEKDAY, wday); + } + if (want_irq) { device_set_wakeup_capable(&client->dev, true); set_bit(HAS_ALARM, &ds1307->flags); diff --git a/drivers/rtc/rtc-ds1343.c b/drivers/rtc/rtc-ds1343.c index 23fa9f0cb5e3..895fbeeb47fe 100644 --- a/drivers/rtc/rtc-ds1343.c +++ b/drivers/rtc/rtc-ds1343.c @@ -504,12 +504,6 @@ static int ds1343_read_alarm(struct device *dev, struct rtc_wkalrm *alarm) alarm->time.tm_hour = priv->alarm_hour < 0 ? 0 : priv->alarm_hour; alarm->time.tm_mday = priv->alarm_mday < 0 ? 0 : priv->alarm_mday; - alarm->time.tm_mon = -1; - alarm->time.tm_year = -1; - alarm->time.tm_wday = -1; - alarm->time.tm_yday = -1; - alarm->time.tm_isdst = -1; - out: mutex_unlock(&priv->mutex); return res; diff --git a/drivers/rtc/rtc-ds1685.c b/drivers/rtc/rtc-ds1685.c index b3ce3c652fcd..ed43b4311660 100644 --- a/drivers/rtc/rtc-ds1685.c +++ b/drivers/rtc/rtc-ds1685.c @@ -103,6 +103,26 @@ ds1685_rtc_bin2bcd(struct ds1685_priv *rtc, u8 val, u8 bin_mask, u8 bcd_mask) } /** + * s1685_rtc_check_mday - check validity of the day of month. + * @rtc: pointer to the ds1685 rtc structure. + * @mday: day of month. + * + * Returns -EDOM if the day of month is not within 1..31 range. + */ +static inline int +ds1685_rtc_check_mday(struct ds1685_priv *rtc, u8 mday) +{ + if (rtc->bcd_mode) { + if (mday < 0x01 || mday > 0x31 || (mday & 0x0f) > 0x09) + return -EDOM; + } else { + if (mday < 1 || mday > 31) + return -EDOM; + } + return 0; +} + +/** * ds1685_rtc_switch_to_bank0 - switch the rtc to bank 0. * @rtc: pointer to the ds1685 rtc structure. */ @@ -377,6 +397,7 @@ ds1685_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alrm) struct platform_device *pdev = to_platform_device(dev); struct ds1685_priv *rtc = platform_get_drvdata(pdev); u8 seconds, minutes, hours, mday, ctrlb, ctrlc; + int ret; /* Fetch the alarm info from the RTC alarm registers. */ ds1685_rtc_begin_data_access(rtc); @@ -388,34 +409,29 @@ ds1685_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alrm) ctrlc = rtc->read(rtc, RTC_CTRL_C); ds1685_rtc_end_data_access(rtc); - /* Check month date. */ - if (!(mday >= 1) && (mday <= 31)) - return -EDOM; + /* Check the month date for validity. */ + ret = ds1685_rtc_check_mday(rtc, mday); + if (ret) + return ret; /* * Check the three alarm bytes. * * The Linux RTC system doesn't support the "don't care" capability * of this RTC chip. We check for it anyways in case support is - * added in the future. + * added in the future and only assign when we care. */ - if (unlikely(seconds >= 0xc0)) - alrm->time.tm_sec = -1; - else + if (likely(seconds < 0xc0)) alrm->time.tm_sec = ds1685_rtc_bcd2bin(rtc, seconds, RTC_SECS_BCD_MASK, RTC_SECS_BIN_MASK); - if (unlikely(minutes >= 0xc0)) - alrm->time.tm_min = -1; - else + if (likely(minutes < 0xc0)) alrm->time.tm_min = ds1685_rtc_bcd2bin(rtc, minutes, RTC_MINS_BCD_MASK, RTC_MINS_BIN_MASK); - if (unlikely(hours >= 0xc0)) - alrm->time.tm_hour = -1; - else + if (likely(hours < 0xc0)) alrm->time.tm_hour = ds1685_rtc_bcd2bin(rtc, hours, RTC_HRS_24_BCD_MASK, RTC_HRS_24_BIN_MASK); @@ -423,11 +439,6 @@ ds1685_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alrm) /* Write the data to rtc_wkalrm. */ alrm->time.tm_mday = ds1685_rtc_bcd2bin(rtc, mday, RTC_MDAY_BCD_MASK, RTC_MDAY_BIN_MASK); - alrm->time.tm_mon = -1; - alrm->time.tm_year = -1; - alrm->time.tm_wday = -1; - alrm->time.tm_yday = -1; - alrm->time.tm_isdst = -1; alrm->enabled = !!(ctrlb & RTC_CTRL_B_AIE); alrm->pending = !!(ctrlc & RTC_CTRL_C_AF); @@ -445,6 +456,7 @@ ds1685_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alrm) struct platform_device *pdev = to_platform_device(dev); struct ds1685_priv *rtc = platform_get_drvdata(pdev); u8 ctrlb, seconds, minutes, hours, mday; + int ret; /* Fetch the alarm info and convert to BCD. */ seconds = ds1685_rtc_bin2bcd(rtc, alrm->time.tm_sec, @@ -461,8 +473,9 @@ ds1685_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alrm) RTC_MDAY_BCD_MASK); /* Check the month date for validity. */ - if (!(mday >= 1) && (mday <= 31)) - return -EDOM; + ret = ds1685_rtc_check_mday(rtc, mday); + if (ret) + return ret; /* * Check the three alarm bytes. diff --git a/drivers/rtc/rtc-ds2404.c b/drivers/rtc/rtc-ds2404.c index 16310fe79d76..9a1582ed7070 100644 --- a/drivers/rtc/rtc-ds2404.c +++ b/drivers/rtc/rtc-ds2404.c @@ -13,7 +13,7 @@ #include <linux/rtc.h> #include <linux/types.h> #include <linux/bcd.h> -#include <linux/rtc-ds2404.h> +#include <linux/platform_data/rtc-ds2404.h> #include <linux/delay.h> #include <linux/gpio.h> #include <linux/slab.h> diff --git a/drivers/rtc/rtc-ds3232.c b/drivers/rtc/rtc-ds3232.c index 04fbd7fffd0d..b1f20d8c358f 100644 --- a/drivers/rtc/rtc-ds3232.c +++ b/drivers/rtc/rtc-ds3232.c @@ -197,12 +197,6 @@ static int ds3232_read_alarm(struct device *dev, struct rtc_wkalrm *alarm) alarm->time.tm_hour = bcd2bin(buf[2] & 0x7F); alarm->time.tm_mday = bcd2bin(buf[3] & 0x7F); - alarm->time.tm_mon = -1; - alarm->time.tm_year = -1; - alarm->time.tm_wday = -1; - alarm->time.tm_yday = -1; - alarm->time.tm_isdst = -1; - alarm->enabled = !!(control & DS3232_REG_CR_A1IE); alarm->pending = !!(stat & DS3232_REG_SR_A1F); diff --git a/drivers/rtc/rtc-efi.c b/drivers/rtc/rtc-efi.c index 96d38609d803..0130afd7fe88 100644 --- a/drivers/rtc/rtc-efi.c +++ b/drivers/rtc/rtc-efi.c @@ -259,6 +259,12 @@ static const struct rtc_class_ops efi_rtc_ops = { static int __init efi_rtc_probe(struct platform_device *dev) { struct rtc_device *rtc; + efi_time_t eft; + efi_time_cap_t cap; + + /* First check if the RTC is usable */ + if (efi.get_time(&eft, &cap) != EFI_SUCCESS) + return -ENODEV; rtc = devm_rtc_device_register(&dev->dev, "rtc-efi", &efi_rtc_ops, THIS_MODULE); diff --git a/drivers/rtc/rtc-generic.c b/drivers/rtc/rtc-generic.c index d726c6aa96a8..1bf5d2347928 100644 --- a/drivers/rtc/rtc-generic.c +++ b/drivers/rtc/rtc-generic.c @@ -9,44 +9,10 @@ #include <linux/platform_device.h> #include <linux/rtc.h> -#if defined(CONFIG_M68K) || defined(CONFIG_PARISC) || \ - defined(CONFIG_PPC) || defined(CONFIG_SUPERH32) -#include <asm/rtc.h> - -static int generic_get_time(struct device *dev, struct rtc_time *tm) -{ - unsigned int ret = get_rtc_time(tm); - - if (ret & RTC_BATT_BAD) - return -EOPNOTSUPP; - - return rtc_valid_tm(tm); -} - -static int generic_set_time(struct device *dev, struct rtc_time *tm) -{ - if (set_rtc_time(tm) < 0) - return -EOPNOTSUPP; - - return 0; -} - -static const struct rtc_class_ops generic_rtc_ops = { - .read_time = generic_get_time, - .set_time = generic_set_time, -}; -#else -#define generic_rtc_ops *(struct rtc_class_ops*)NULL -#endif - static int __init generic_rtc_probe(struct platform_device *dev) { struct rtc_device *rtc; - const struct rtc_class_ops *ops; - - ops = dev_get_platdata(&dev->dev); - if (!ops) - ops = &generic_rtc_ops; + const struct rtc_class_ops *ops = dev_get_platdata(&dev->dev); rtc = devm_rtc_device_register(&dev->dev, "rtc-generic", ops, THIS_MODULE); diff --git a/drivers/rtc/rtc-hym8563.c b/drivers/rtc/rtc-hym8563.c index 207270376b55..e5ad527cb75e 100644 --- a/drivers/rtc/rtc-hym8563.c +++ b/drivers/rtc/rtc-hym8563.c @@ -198,7 +198,7 @@ static int hym8563_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alm) return ret; /* The alarm only has a minute accuracy */ - alm_tm->tm_sec = -1; + alm_tm->tm_sec = 0; alm_tm->tm_min = (buf[0] & HYM8563_ALM_BIT_DISABLE) ? -1 : @@ -213,9 +213,6 @@ static int hym8563_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alm) -1 : bcd2bin(buf[3] & HYM8563_WEEKDAY_MASK); - alm_tm->tm_mon = -1; - alm_tm->tm_year = -1; - ret = i2c_smbus_read_byte_data(client, HYM8563_CTL2); if (ret < 0) return ret; diff --git a/drivers/rtc/rtc-isl12057.c b/drivers/rtc/rtc-isl12057.c index 54328d4ac0d3..0e7f0f52bfe4 100644 --- a/drivers/rtc/rtc-isl12057.c +++ b/drivers/rtc/rtc-isl12057.c @@ -245,8 +245,7 @@ static int isl12057_rtc_update_alarm(struct device *dev, int enable) static int isl12057_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alarm) { struct isl12057_rtc_data *data = dev_get_drvdata(dev); - struct rtc_time rtc_tm, *alarm_tm = &alarm->time; - unsigned long rtc_secs, alarm_secs; + struct rtc_time *alarm_tm = &alarm->time; u8 regs[ISL12057_A1_SEC_LEN]; unsigned int ir; int ret; @@ -264,36 +263,6 @@ static int isl12057_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alarm) alarm_tm->tm_min = bcd2bin(regs[1] & 0x7f); alarm_tm->tm_hour = bcd2bin(regs[2] & 0x3f); alarm_tm->tm_mday = bcd2bin(regs[3] & 0x3f); - alarm_tm->tm_wday = -1; - - /* - * The alarm section does not store year/month. We use the ones in rtc - * section as a basis and increment month and then year if needed to get - * alarm after current time. - */ - ret = _isl12057_rtc_read_time(dev, &rtc_tm); - if (ret) - goto err_unlock; - - alarm_tm->tm_year = rtc_tm.tm_year; - alarm_tm->tm_mon = rtc_tm.tm_mon; - - ret = rtc_tm_to_time(&rtc_tm, &rtc_secs); - if (ret) - goto err_unlock; - - ret = rtc_tm_to_time(alarm_tm, &alarm_secs); - if (ret) - goto err_unlock; - - if (alarm_secs < rtc_secs) { - if (alarm_tm->tm_mon == 11) { - alarm_tm->tm_mon = 0; - alarm_tm->tm_year += 1; - } else { - alarm_tm->tm_mon += 1; - } - } ret = regmap_read(data->regmap, ISL12057_REG_INT, &ir); if (ret) { diff --git a/drivers/rtc/rtc-m41t80.c b/drivers/rtc/rtc-m41t80.c index d1bf93a87200..58698d21c2c3 100644 --- a/drivers/rtc/rtc-m41t80.c +++ b/drivers/rtc/rtc-m41t80.c @@ -244,7 +244,7 @@ static int m41t80_alarm_irq_enable(struct device *dev, unsigned int enabled) retval = i2c_smbus_write_byte_data(client, M41T80_REG_ALARM_MON, flags); if (retval < 0) { - dev_info(dev, "Unable to enable alarm IRQ %d\n", retval); + dev_err(dev, "Unable to enable alarm IRQ %d\n", retval); return retval; } return 0; @@ -320,10 +320,8 @@ static int m41t80_read_alarm(struct device *dev, struct rtc_wkalrm *alrm) alrm->time.tm_sec = bcd2bin(alarmvals[4] & 0x7f); alrm->time.tm_min = bcd2bin(alarmvals[3] & 0x7f); alrm->time.tm_hour = bcd2bin(alarmvals[2] & 0x3f); - alrm->time.tm_wday = -1; alrm->time.tm_mday = bcd2bin(alarmvals[1] & 0x3f); alrm->time.tm_mon = bcd2bin(alarmvals[0] & 0x3f); - alrm->time.tm_year = -1; alrm->enabled = !!(alarmvals[0] & M41T80_ALMON_AFE); alrm->pending = (flags & M41T80_FLAGS_AF) && alrm->enabled; @@ -337,6 +335,30 @@ static struct rtc_class_ops m41t80_rtc_ops = { .proc = m41t80_rtc_proc, }; +#ifdef CONFIG_PM_SLEEP +static int m41t80_suspend(struct device *dev) +{ + struct i2c_client *client = to_i2c_client(dev); + + if (client->irq >= 0 && device_may_wakeup(dev)) + enable_irq_wake(client->irq); + + return 0; +} + +static int m41t80_resume(struct device *dev) +{ + struct i2c_client *client = to_i2c_client(dev); + + if (client->irq >= 0 && device_may_wakeup(dev)) + disable_irq_wake(client->irq); + + return 0; +} +#endif + +static SIMPLE_DEV_PM_OPS(m41t80_pm, m41t80_suspend, m41t80_resume); + static ssize_t flags_show(struct device *dev, struct device_attribute *attr, char *buf) { @@ -831,10 +853,9 @@ static int m41t80_probe(struct i2c_client *client, return rc; } - rc = devm_add_action(&client->dev, m41t80_remove_sysfs_group, - &client->dev); + rc = devm_add_action_or_reset(&client->dev, m41t80_remove_sysfs_group, + &client->dev); if (rc) { - m41t80_remove_sysfs_group(&client->dev); dev_err(&client->dev, "Failed to add sysfs cleanup action: %d\n", rc); return rc; @@ -873,6 +894,7 @@ static int m41t80_remove(struct i2c_client *client) static struct i2c_driver m41t80_driver = { .driver = { .name = "rtc-m41t80", + .pm = &m41t80_pm, }, .probe = m41t80_probe, .remove = m41t80_remove, diff --git a/drivers/rtc/rtc-m48t86.c b/drivers/rtc/rtc-m48t86.c index f72b91f2501f..0eeb5714c00f 100644 --- a/drivers/rtc/rtc-m48t86.c +++ b/drivers/rtc/rtc-m48t86.c @@ -16,7 +16,7 @@ #include <linux/module.h> #include <linux/rtc.h> #include <linux/platform_device.h> -#include <linux/m48t86.h> +#include <linux/platform_data/rtc-m48t86.h> #include <linux/bcd.h> #define M48T86_REG_SEC 0x00 diff --git a/drivers/rtc/rtc-max6916.c b/drivers/rtc/rtc-max6916.c new file mode 100644 index 000000000000..623ab27b2757 --- /dev/null +++ b/drivers/rtc/rtc-max6916.c @@ -0,0 +1,164 @@ +/* rtc-max6916.c + * + * Driver for MAXIM max6916 Low Current, SPI Compatible + * Real Time Clock + * + * Author : Venkat Prashanth B U <venkat.prashanth2498@gmail.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ + +#include <linux/init.h> +#include <linux/module.h> +#include <linux/device.h> +#include <linux/platform_device.h> +#include <linux/rtc.h> +#include <linux/spi/spi.h> +#include <linux/bcd.h> + +/* Registers in max6916 rtc */ + +#define MAX6916_SECONDS_REG 0x01 +#define MAX6916_MINUTES_REG 0x02 +#define MAX6916_HOURS_REG 0x03 +#define MAX6916_DATE_REG 0x04 +#define MAX6916_MONTH_REG 0x05 +#define MAX6916_DAY_REG 0x06 +#define MAX6916_YEAR_REG 0x07 +#define MAX6916_CONTROL_REG 0x08 +#define MAX6916_STATUS_REG 0x0C +#define MAX6916_CLOCK_BURST 0x3F + +static int max6916_read_reg(struct device *dev, unsigned char address, + unsigned char *data) +{ + struct spi_device *spi = to_spi_device(dev); + + *data = address | 0x80; + + return spi_write_then_read(spi, data, 1, data, 1); +} + +static int max6916_write_reg(struct device *dev, unsigned char address, + unsigned char data) +{ + struct spi_device *spi = to_spi_device(dev); + unsigned char buf[2]; + + buf[0] = address & 0x7F; + buf[1] = data; + + return spi_write_then_read(spi, buf, 2, NULL, 0); +} + +static int max6916_read_time(struct device *dev, struct rtc_time *dt) +{ + struct spi_device *spi = to_spi_device(dev); + int err; + unsigned char buf[8]; + + buf[0] = MAX6916_CLOCK_BURST | 0x80; + + err = spi_write_then_read(spi, buf, 1, buf, 8); + + if (err) + return err; + + dt->tm_sec = bcd2bin(buf[0]); + dt->tm_min = bcd2bin(buf[1]); + dt->tm_hour = bcd2bin(buf[2] & 0x3F); + dt->tm_mday = bcd2bin(buf[3]); + dt->tm_mon = bcd2bin(buf[4]) - 1; + dt->tm_wday = bcd2bin(buf[5]) - 1; + dt->tm_year = bcd2bin(buf[6]) + 100; + + return rtc_valid_tm(dt); +} + +static int max6916_set_time(struct device *dev, struct rtc_time *dt) +{ + struct spi_device *spi = to_spi_device(dev); + unsigned char buf[9]; + + if (dt->tm_year < 100 || dt->tm_year > 199) { + dev_err(&spi->dev, "Year must be between 2000 and 2099. It's %d.\n", + dt->tm_year + 1900); + return -EINVAL; + } + + buf[0] = MAX6916_CLOCK_BURST & 0x7F; + buf[1] = bin2bcd(dt->tm_sec); + buf[2] = bin2bcd(dt->tm_min); + buf[3] = (bin2bcd(dt->tm_hour) & 0X3F); + buf[4] = bin2bcd(dt->tm_mday); + buf[5] = bin2bcd(dt->tm_mon + 1); + buf[6] = bin2bcd(dt->tm_wday + 1); + buf[7] = bin2bcd(dt->tm_year % 100); + buf[8] = bin2bcd(0x00); + + /* write the rtc settings */ + return spi_write_then_read(spi, buf, 9, NULL, 0); +} + +static const struct rtc_class_ops max6916_rtc_ops = { + .read_time = max6916_read_time, + .set_time = max6916_set_time, +}; + +static int max6916_probe(struct spi_device *spi) +{ + struct rtc_device *rtc; + unsigned char data; + int res; + + /* spi setup with max6916 in mode 3 and bits per word as 8 */ + spi->mode = SPI_MODE_3; + spi->bits_per_word = 8; + spi_setup(spi); + + /* RTC Settings */ + res = max6916_read_reg(&spi->dev, MAX6916_SECONDS_REG, &data); + if (res) + return res; + + /* Disable the write protect of rtc */ + max6916_read_reg(&spi->dev, MAX6916_CONTROL_REG, &data); + data = data & ~(1 << 7); + max6916_write_reg(&spi->dev, MAX6916_CONTROL_REG, data); + + /*Enable oscillator,disable oscillator stop flag, glitch filter*/ + max6916_read_reg(&spi->dev, MAX6916_STATUS_REG, &data); + data = data & 0x1B; + max6916_write_reg(&spi->dev, MAX6916_STATUS_REG, data); + + /* display the settings */ + max6916_read_reg(&spi->dev, MAX6916_CONTROL_REG, &data); + dev_info(&spi->dev, "MAX6916 RTC CTRL Reg = 0x%02x\n", data); + + max6916_read_reg(&spi->dev, MAX6916_STATUS_REG, &data); + dev_info(&spi->dev, "MAX6916 RTC Status Reg = 0x%02x\n", data); + + rtc = devm_rtc_device_register(&spi->dev, "max6916", + &max6916_rtc_ops, THIS_MODULE); + if (IS_ERR(rtc)) + return PTR_ERR(rtc); + + spi_set_drvdata(spi, rtc); + + return 0; +} + +static struct spi_driver max6916_driver = { + .driver = { + .name = "max6916", + }, + .probe = max6916_probe, +}; +module_spi_driver(max6916_driver); + +MODULE_DESCRIPTION("MAX6916 SPI RTC DRIVER"); +MODULE_AUTHOR("Venkat Prashanth B U <venkat.prashanth2498@gmail.com>"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/rtc/rtc-mc146818-lib.c b/drivers/rtc/rtc-mc146818-lib.c new file mode 100644 index 000000000000..2f1772a358ca --- /dev/null +++ b/drivers/rtc/rtc-mc146818-lib.c @@ -0,0 +1,198 @@ +#include <linux/bcd.h> +#include <linux/delay.h> +#include <linux/export.h> +#include <linux/mc146818rtc.h> + +#ifdef CONFIG_ACPI +#include <linux/acpi.h> +#endif + +/* + * Returns true if a clock update is in progress + */ +static inline unsigned char mc146818_is_updating(void) +{ + unsigned char uip; + unsigned long flags; + + spin_lock_irqsave(&rtc_lock, flags); + uip = (CMOS_READ(RTC_FREQ_SELECT) & RTC_UIP); + spin_unlock_irqrestore(&rtc_lock, flags); + return uip; +} + +unsigned int mc146818_get_time(struct rtc_time *time) +{ + unsigned char ctrl; + unsigned long flags; + unsigned char century = 0; + +#ifdef CONFIG_MACH_DECSTATION + unsigned int real_year; +#endif + + /* + * read RTC once any update in progress is done. The update + * can take just over 2ms. We wait 20ms. There is no need to + * to poll-wait (up to 1s - eeccch) for the falling edge of RTC_UIP. + * If you need to know *exactly* when a second has started, enable + * periodic update complete interrupts, (via ioctl) and then + * immediately read /dev/rtc which will block until you get the IRQ. + * Once the read clears, read the RTC time (again via ioctl). Easy. + */ + if (mc146818_is_updating()) + mdelay(20); + + /* + * Only the values that we read from the RTC are set. We leave + * tm_wday, tm_yday and tm_isdst untouched. Even though the + * RTC has RTC_DAY_OF_WEEK, we ignore it, as it is only updated + * by the RTC when initially set to a non-zero value. + */ + spin_lock_irqsave(&rtc_lock, flags); + time->tm_sec = CMOS_READ(RTC_SECONDS); + time->tm_min = CMOS_READ(RTC_MINUTES); + time->tm_hour = CMOS_READ(RTC_HOURS); + time->tm_mday = CMOS_READ(RTC_DAY_OF_MONTH); + time->tm_mon = CMOS_READ(RTC_MONTH); + time->tm_year = CMOS_READ(RTC_YEAR); +#ifdef CONFIG_MACH_DECSTATION + real_year = CMOS_READ(RTC_DEC_YEAR); +#endif +#ifdef CONFIG_ACPI + if (acpi_gbl_FADT.header.revision >= FADT2_REVISION_ID && + acpi_gbl_FADT.century) + century = CMOS_READ(acpi_gbl_FADT.century); +#endif + ctrl = CMOS_READ(RTC_CONTROL); + spin_unlock_irqrestore(&rtc_lock, flags); + + if (!(ctrl & RTC_DM_BINARY) || RTC_ALWAYS_BCD) + { + time->tm_sec = bcd2bin(time->tm_sec); + time->tm_min = bcd2bin(time->tm_min); + time->tm_hour = bcd2bin(time->tm_hour); + time->tm_mday = bcd2bin(time->tm_mday); + time->tm_mon = bcd2bin(time->tm_mon); + time->tm_year = bcd2bin(time->tm_year); + century = bcd2bin(century); + } + +#ifdef CONFIG_MACH_DECSTATION + time->tm_year += real_year - 72; +#endif + + if (century) + time->tm_year += (century - 19) * 100; + + /* + * Account for differences between how the RTC uses the values + * and how they are defined in a struct rtc_time; + */ + if (time->tm_year <= 69) + time->tm_year += 100; + + time->tm_mon--; + + return RTC_24H; +} +EXPORT_SYMBOL_GPL(mc146818_get_time); + +/* Set the current date and time in the real time clock. */ +int mc146818_set_time(struct rtc_time *time) +{ + unsigned long flags; + unsigned char mon, day, hrs, min, sec; + unsigned char save_control, save_freq_select; + unsigned int yrs; +#ifdef CONFIG_MACH_DECSTATION + unsigned int real_yrs, leap_yr; +#endif + unsigned char century = 0; + + yrs = time->tm_year; + mon = time->tm_mon + 1; /* tm_mon starts at zero */ + day = time->tm_mday; + hrs = time->tm_hour; + min = time->tm_min; + sec = time->tm_sec; + + if (yrs > 255) /* They are unsigned */ + return -EINVAL; + + spin_lock_irqsave(&rtc_lock, flags); +#ifdef CONFIG_MACH_DECSTATION + real_yrs = yrs; + leap_yr = ((!((yrs + 1900) % 4) && ((yrs + 1900) % 100)) || + !((yrs + 1900) % 400)); + yrs = 72; + + /* + * We want to keep the year set to 73 until March + * for non-leap years, so that Feb, 29th is handled + * correctly. + */ + if (!leap_yr && mon < 3) { + real_yrs--; + yrs = 73; + } +#endif + +#ifdef CONFIG_ACPI + if (acpi_gbl_FADT.header.revision >= FADT2_REVISION_ID && + acpi_gbl_FADT.century) { + century = (yrs + 1900) / 100; + yrs %= 100; + } +#endif + + /* These limits and adjustments are independent of + * whether the chip is in binary mode or not. + */ + if (yrs > 169) { + spin_unlock_irqrestore(&rtc_lock, flags); + return -EINVAL; + } + + if (yrs >= 100) + yrs -= 100; + + if (!(CMOS_READ(RTC_CONTROL) & RTC_DM_BINARY) + || RTC_ALWAYS_BCD) { + sec = bin2bcd(sec); + min = bin2bcd(min); + hrs = bin2bcd(hrs); + day = bin2bcd(day); + mon = bin2bcd(mon); + yrs = bin2bcd(yrs); + century = bin2bcd(century); + } + + save_control = CMOS_READ(RTC_CONTROL); + CMOS_WRITE((save_control|RTC_SET), RTC_CONTROL); + save_freq_select = CMOS_READ(RTC_FREQ_SELECT); + CMOS_WRITE((save_freq_select|RTC_DIV_RESET2), RTC_FREQ_SELECT); + +#ifdef CONFIG_MACH_DECSTATION + CMOS_WRITE(real_yrs, RTC_DEC_YEAR); +#endif + CMOS_WRITE(yrs, RTC_YEAR); + CMOS_WRITE(mon, RTC_MONTH); + CMOS_WRITE(day, RTC_DAY_OF_MONTH); + CMOS_WRITE(hrs, RTC_HOURS); + CMOS_WRITE(min, RTC_MINUTES); + CMOS_WRITE(sec, RTC_SECONDS); +#ifdef CONFIG_ACPI + if (acpi_gbl_FADT.header.revision >= FADT2_REVISION_ID && + acpi_gbl_FADT.century) + CMOS_WRITE(century, acpi_gbl_FADT.century); +#endif + + CMOS_WRITE(save_control, RTC_CONTROL); + CMOS_WRITE(save_freq_select, RTC_FREQ_SELECT); + + spin_unlock_irqrestore(&rtc_lock, flags); + + return 0; +} +EXPORT_SYMBOL_GPL(mc146818_set_time); diff --git a/drivers/rtc/rtc-mrst.c b/drivers/rtc/rtc-mrst.c index 0094d9bdd1e6..7334c44fa7c3 100644 --- a/drivers/rtc/rtc-mrst.c +++ b/drivers/rtc/rtc-mrst.c @@ -32,11 +32,11 @@ #include <linux/interrupt.h> #include <linux/spinlock.h> #include <linux/kernel.h> +#include <linux/mc146818rtc.h> #include <linux/module.h> #include <linux/init.h> #include <linux/sfi.h> -#include <asm-generic/rtc.h> #include <asm/intel_scu_ipc.h> #include <asm/intel-mid.h> #include <asm/intel_mid_vrtc.h> @@ -149,14 +149,6 @@ static int mrst_read_alarm(struct device *dev, struct rtc_wkalrm *t) if (mrst->irq <= 0) return -EIO; - /* Basic alarms only support hour, minute, and seconds fields. - * Some also support day and month, for alarms up to a year in - * the future. - */ - t->time.tm_mday = -1; - t->time.tm_mon = -1; - t->time.tm_year = -1; - /* vRTC only supports binary mode */ spin_lock_irq(&rtc_lock); t->time.tm_sec = vrtc_cmos_read(RTC_SECONDS_ALARM); diff --git a/drivers/rtc/rtc-pcf2123.c b/drivers/rtc/rtc-pcf2123.c index f22e060709e5..b4478cc92b55 100644 --- a/drivers/rtc/rtc-pcf2123.c +++ b/drivers/rtc/rtc-pcf2123.c @@ -96,7 +96,7 @@ #define CD_TMR_TE BIT(3) /* Countdown timer enable */ /* PCF2123_REG_OFFSET BITS */ -#define OFFSET_SIGN_BIT BIT(6) /* 2's complement sign bit */ +#define OFFSET_SIGN_BIT 6 /* 2's complement sign bit */ #define OFFSET_COARSE BIT(7) /* Coarse mode offset */ #define OFFSET_STEP (2170) /* Offset step in parts per billion */ @@ -217,7 +217,7 @@ static int pcf2123_read_offset(struct device *dev, long *offset) if (reg & OFFSET_COARSE) reg <<= 1; /* multiply by 2 and sign extend */ else - reg |= (reg & OFFSET_SIGN_BIT) << 1; /* sign extend only */ + reg = sign_extend32(reg, OFFSET_SIGN_BIT); *offset = ((long)reg) * OFFSET_STEP; diff --git a/drivers/rtc/rtc-pcf85063.c b/drivers/rtc/rtc-pcf85063.c index e8ddbb359d11..efb0a08ac117 100644 --- a/drivers/rtc/rtc-pcf85063.c +++ b/drivers/rtc/rtc-pcf85063.c @@ -16,6 +16,16 @@ #include <linux/rtc.h> #include <linux/module.h> +/* + * Information for this driver was pulled from the following datasheets. + * + * http://www.nxp.com/documents/data_sheet/PCF85063A.pdf + * http://www.nxp.com/documents/data_sheet/PCF85063TP.pdf + * + * PCF85063A -- Rev. 6 — 18 November 2015 + * PCF85063TP -- Rev. 4 — 6 May 2015 +*/ + #define PCF85063_REG_CTRL1 0x00 /* status */ #define PCF85063_REG_CTRL1_STOP BIT(5) #define PCF85063_REG_CTRL2 0x01 @@ -55,10 +65,22 @@ static int pcf85063_stop_clock(struct i2c_client *client, u8 *ctrl1) return 0; } -/* - * In the routines that deal directly with the pcf85063 hardware, we use - * rtc_time -- month 0-11, hour 0-23, yr = calendar year-epoch. - */ +static int pcf85063_start_clock(struct i2c_client *client, u8 ctrl1) +{ + s32 ret; + + /* start the clock */ + ctrl1 &= PCF85063_REG_CTRL1_STOP; + + ret = i2c_smbus_write_byte_data(client, PCF85063_REG_CTRL1, ctrl1); + if (ret < 0) { + dev_err(&client->dev, "Failing to start the clock\n"); + return -EIO; + } + + return 0; +} + static int pcf85063_get_datetime(struct i2c_client *client, struct rtc_time *tm) { int rc; @@ -90,8 +112,7 @@ static int pcf85063_get_datetime(struct i2c_client *client, struct rtc_time *tm) tm->tm_wday = regs[4] & 0x07; tm->tm_mon = bcd2bin(regs[5] & 0x1F) - 1; /* rtc mn 1-12 */ tm->tm_year = bcd2bin(regs[6]); - if (tm->tm_year < 70) - tm->tm_year += 100; /* assume we are in 1970...2069 */ + tm->tm_year += 100; return rtc_valid_tm(tm); } @@ -99,13 +120,17 @@ static int pcf85063_get_datetime(struct i2c_client *client, struct rtc_time *tm) static int pcf85063_set_datetime(struct i2c_client *client, struct rtc_time *tm) { int rc; - u8 regs[8]; + u8 regs[7]; + u8 ctrl1; + + if ((tm->tm_year < 100) || (tm->tm_year > 199)) + return -EINVAL; /* * to accurately set the time, reset the divider chain and keep it in * reset state until all time/date registers are written */ - rc = pcf85063_stop_clock(client, ®s[7]); + rc = pcf85063_stop_clock(client, &ctrl1); if (rc != 0) return rc; @@ -125,14 +150,7 @@ static int pcf85063_set_datetime(struct i2c_client *client, struct rtc_time *tm) regs[5] = bin2bcd(tm->tm_mon + 1); /* year and century */ - regs[6] = bin2bcd(tm->tm_year % 100); - - /* - * after all time/date registers are written, let the 'address auto - * increment' feature wrap around and write register CTRL1 to re-enable - * the clock divider chain again - */ - regs[7] &= ~PCF85063_REG_CTRL1_STOP; + regs[6] = bin2bcd(tm->tm_year - 100); /* write all registers at once */ rc = i2c_smbus_write_i2c_block_data(client, PCF85063_REG_SC, @@ -142,6 +160,15 @@ static int pcf85063_set_datetime(struct i2c_client *client, struct rtc_time *tm) return rc; } + /* + * Write the control register as a separate action since the size of + * the register space is different between the PCF85063TP and + * PCF85063A devices. The rollover point can not be used. + */ + rc = pcf85063_start_clock(client, ctrl1); + if (rc != 0) + return rc; + return 0; } diff --git a/drivers/rtc/rtc-pcf8563.c b/drivers/rtc/rtc-pcf8563.c index b9ddbb001283..1227ceab61ee 100644 --- a/drivers/rtc/rtc-pcf8563.c +++ b/drivers/rtc/rtc-pcf8563.c @@ -341,14 +341,11 @@ static int pcf8563_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *tm) "%s: raw data is min=%02x, hr=%02x, mday=%02x, wday=%02x\n", __func__, buf[0], buf[1], buf[2], buf[3]); + tm->time.tm_sec = 0; tm->time.tm_min = bcd2bin(buf[0] & 0x7F); tm->time.tm_hour = bcd2bin(buf[1] & 0x3F); tm->time.tm_mday = bcd2bin(buf[2] & 0x3F); tm->time.tm_wday = bcd2bin(buf[3] & 0x7); - tm->time.tm_mon = -1; - tm->time.tm_year = -1; - tm->time.tm_yday = -1; - tm->time.tm_isdst = -1; err = pcf8563_get_alarm_mode(client, &tm->enabled, &tm->pending); if (err < 0) diff --git a/drivers/rtc/rtc-rc5t583.c b/drivers/rtc/rtc-rc5t583.c index f28d57788951..68ce77414bdc 100644 --- a/drivers/rtc/rtc-rc5t583.c +++ b/drivers/rtc/rtc-rc5t583.c @@ -128,6 +128,7 @@ static int rc5t583_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alm) return ret; } + alm->time.tm_sec = 0; alm->time.tm_min = bcd2bin(alarm_data[0]); alm->time.tm_hour = bcd2bin(alarm_data[1]); alm->time.tm_mday = bcd2bin(alarm_data[2]); diff --git a/drivers/rtc/rtc-rs5c372.c b/drivers/rtc/rtc-rs5c372.c index ef86229428fc..c8c757466783 100644 --- a/drivers/rtc/rtc-rs5c372.c +++ b/drivers/rtc/rtc-rs5c372.c @@ -341,12 +341,6 @@ static int rs5c_read_alarm(struct device *dev, struct rtc_wkalrm *t) t->time.tm_sec = 0; t->time.tm_min = bcd2bin(rs5c->regs[RS5C_REG_ALARM_A_MIN] & 0x7f); t->time.tm_hour = rs5c_reg2hr(rs5c, rs5c->regs[RS5C_REG_ALARM_A_HOURS]); - t->time.tm_mday = -1; - t->time.tm_mon = -1; - t->time.tm_year = -1; - t->time.tm_wday = -1; - t->time.tm_yday = -1; - t->time.tm_isdst = -1; /* ... and status */ t->enabled = !!(rs5c->regs[RS5C_REG_CTRL1] & RS5C_CTRL1_AALE); diff --git a/drivers/rtc/rtc-rv8803.c b/drivers/rtc/rtc-rv8803.c index f623038e586e..9a2f6a95d5a7 100644 --- a/drivers/rtc/rtc-rv8803.c +++ b/drivers/rtc/rtc-rv8803.c @@ -13,12 +13,15 @@ #include <linux/bcd.h> #include <linux/bitops.h> +#include <linux/log2.h> #include <linux/i2c.h> #include <linux/interrupt.h> #include <linux/kernel.h> #include <linux/module.h> #include <linux/rtc.h> +#define RV8803_I2C_TRY_COUNT 4 + #define RV8803_SEC 0x00 #define RV8803_MIN 0x01 #define RV8803_HOUR 0x02 @@ -56,19 +59,85 @@ struct rv8803_data { u8 ctrl; }; +static int rv8803_read_reg(const struct i2c_client *client, u8 reg) +{ + int try = RV8803_I2C_TRY_COUNT; + s32 ret; + + /* + * There is a 61µs window during which the RTC does not acknowledge I2C + * transfers. In that case, ensure that there are multiple attempts. + */ + do + ret = i2c_smbus_read_byte_data(client, reg); + while ((ret == -ENXIO || ret == -EIO) && --try); + if (ret < 0) + dev_err(&client->dev, "Unable to read register 0x%02x\n", reg); + + return ret; +} + +static int rv8803_read_regs(const struct i2c_client *client, + u8 reg, u8 count, u8 *values) +{ + int try = RV8803_I2C_TRY_COUNT; + s32 ret; + + do + ret = i2c_smbus_read_i2c_block_data(client, reg, count, values); + while ((ret == -ENXIO || ret == -EIO) && --try); + if (ret != count) { + dev_err(&client->dev, + "Unable to read registers 0x%02x..0x%02x\n", + reg, reg + count - 1); + return ret < 0 ? ret : -EIO; + } + + return 0; +} + +static int rv8803_write_reg(const struct i2c_client *client, u8 reg, u8 value) +{ + int try = RV8803_I2C_TRY_COUNT; + s32 ret; + + do + ret = i2c_smbus_write_byte_data(client, reg, value); + while ((ret == -ENXIO || ret == -EIO) && --try); + if (ret) + dev_err(&client->dev, "Unable to write register 0x%02x\n", reg); + + return ret; +} + +static int rv8803_write_regs(const struct i2c_client *client, + u8 reg, u8 count, const u8 *values) +{ + int try = RV8803_I2C_TRY_COUNT; + s32 ret; + + do + ret = i2c_smbus_write_i2c_block_data(client, reg, count, + values); + while ((ret == -ENXIO || ret == -EIO) && --try); + if (ret) + dev_err(&client->dev, + "Unable to write registers 0x%02x..0x%02x\n", + reg, reg + count - 1); + + return ret; +} + static irqreturn_t rv8803_handle_irq(int irq, void *dev_id) { struct i2c_client *client = dev_id; struct rv8803_data *rv8803 = i2c_get_clientdata(client); unsigned long events = 0; - int flags, try = 0; + int flags; mutex_lock(&rv8803->flags_lock); - do { - flags = i2c_smbus_read_byte_data(client, RV8803_FLAG); - try++; - } while ((flags == -ENXIO) && (try < 3)); + flags = rv8803_read_reg(client, RV8803_FLAG); if (flags <= 0) { mutex_unlock(&rv8803->flags_lock); return IRQ_NONE; @@ -100,9 +169,8 @@ static irqreturn_t rv8803_handle_irq(int irq, void *dev_id) if (events) { rtc_update_irq(rv8803->rtc, 1, events); - i2c_smbus_write_byte_data(client, RV8803_FLAG, flags); - i2c_smbus_write_byte_data(rv8803->client, RV8803_CTRL, - rv8803->ctrl); + rv8803_write_reg(client, RV8803_FLAG, flags); + rv8803_write_reg(rv8803->client, RV8803_CTRL, rv8803->ctrl); } mutex_unlock(&rv8803->flags_lock); @@ -118,7 +186,7 @@ static int rv8803_get_time(struct device *dev, struct rtc_time *tm) u8 *date = date1; int ret, flags; - flags = i2c_smbus_read_byte_data(rv8803->client, RV8803_FLAG); + flags = rv8803_read_reg(rv8803->client, RV8803_FLAG); if (flags < 0) return flags; @@ -127,16 +195,14 @@ static int rv8803_get_time(struct device *dev, struct rtc_time *tm) return -EINVAL; } - ret = i2c_smbus_read_i2c_block_data(rv8803->client, RV8803_SEC, - 7, date); - if (ret != 7) - return ret < 0 ? ret : -EIO; + ret = rv8803_read_regs(rv8803->client, RV8803_SEC, 7, date); + if (ret) + return ret; if ((date1[RV8803_SEC] & 0x7f) == bin2bcd(59)) { - ret = i2c_smbus_read_i2c_block_data(rv8803->client, RV8803_SEC, - 7, date2); - if (ret != 7) - return ret < 0 ? ret : -EIO; + ret = rv8803_read_regs(rv8803->client, RV8803_SEC, 7, date2); + if (ret) + return ret; if ((date2[RV8803_SEC] & 0x7f) != bin2bcd(59)) date = date2; @@ -145,23 +211,33 @@ static int rv8803_get_time(struct device *dev, struct rtc_time *tm) tm->tm_sec = bcd2bin(date[RV8803_SEC] & 0x7f); tm->tm_min = bcd2bin(date[RV8803_MIN] & 0x7f); tm->tm_hour = bcd2bin(date[RV8803_HOUR] & 0x3f); - tm->tm_wday = ffs(date[RV8803_WEEK] & 0x7f); + tm->tm_wday = ilog2(date[RV8803_WEEK] & 0x7f); tm->tm_mday = bcd2bin(date[RV8803_DAY] & 0x3f); tm->tm_mon = bcd2bin(date[RV8803_MONTH] & 0x1f) - 1; tm->tm_year = bcd2bin(date[RV8803_YEAR]) + 100; - return rtc_valid_tm(tm); + return 0; } static int rv8803_set_time(struct device *dev, struct rtc_time *tm) { struct rv8803_data *rv8803 = dev_get_drvdata(dev); u8 date[7]; - int flags, ret; + int ctrl, flags, ret; if ((tm->tm_year < 100) || (tm->tm_year > 199)) return -EINVAL; + ctrl = rv8803_read_reg(rv8803->client, RV8803_CTRL); + if (ctrl < 0) + return ctrl; + + /* Stop the clock */ + ret = rv8803_write_reg(rv8803->client, RV8803_CTRL, + ctrl | RV8803_CTRL_RESET); + if (ret) + return ret; + date[RV8803_SEC] = bin2bcd(tm->tm_sec); date[RV8803_MIN] = bin2bcd(tm->tm_min); date[RV8803_HOUR] = bin2bcd(tm->tm_hour); @@ -170,21 +246,26 @@ static int rv8803_set_time(struct device *dev, struct rtc_time *tm) date[RV8803_MONTH] = bin2bcd(tm->tm_mon + 1); date[RV8803_YEAR] = bin2bcd(tm->tm_year - 100); - ret = i2c_smbus_write_i2c_block_data(rv8803->client, RV8803_SEC, - 7, date); - if (ret < 0) + ret = rv8803_write_regs(rv8803->client, RV8803_SEC, 7, date); + if (ret) + return ret; + + /* Restart the clock */ + ret = rv8803_write_reg(rv8803->client, RV8803_CTRL, + ctrl & ~RV8803_CTRL_RESET); + if (ret) return ret; mutex_lock(&rv8803->flags_lock); - flags = i2c_smbus_read_byte_data(rv8803->client, RV8803_FLAG); + flags = rv8803_read_reg(rv8803->client, RV8803_FLAG); if (flags < 0) { mutex_unlock(&rv8803->flags_lock); return flags; } - ret = i2c_smbus_write_byte_data(rv8803->client, RV8803_FLAG, - flags & ~RV8803_FLAG_V2F); + ret = rv8803_write_reg(rv8803->client, RV8803_FLAG, + flags & ~(RV8803_FLAG_V1F | RV8803_FLAG_V2F)); mutex_unlock(&rv8803->flags_lock); @@ -198,22 +279,18 @@ static int rv8803_get_alarm(struct device *dev, struct rtc_wkalrm *alrm) u8 alarmvals[3]; int flags, ret; - ret = i2c_smbus_read_i2c_block_data(client, RV8803_ALARM_MIN, - 3, alarmvals); - if (ret != 3) - return ret < 0 ? ret : -EIO; + ret = rv8803_read_regs(client, RV8803_ALARM_MIN, 3, alarmvals); + if (ret) + return ret; - flags = i2c_smbus_read_byte_data(client, RV8803_FLAG); + flags = rv8803_read_reg(client, RV8803_FLAG); if (flags < 0) return flags; alrm->time.tm_sec = 0; alrm->time.tm_min = bcd2bin(alarmvals[0] & 0x7f); alrm->time.tm_hour = bcd2bin(alarmvals[1] & 0x3f); - alrm->time.tm_wday = -1; alrm->time.tm_mday = bcd2bin(alarmvals[2] & 0x3f); - alrm->time.tm_mon = -1; - alrm->time.tm_year = -1; alrm->enabled = !!(rv8803->ctrl & RV8803_CTRL_AIE); alrm->pending = (flags & RV8803_FLAG_AF) && alrm->enabled; @@ -239,10 +316,10 @@ static int rv8803_set_alarm(struct device *dev, struct rtc_wkalrm *alrm) mutex_lock(&rv8803->flags_lock); - ret = i2c_smbus_read_i2c_block_data(client, RV8803_FLAG, 2, ctrl); - if (ret != 2) { + ret = rv8803_read_regs(client, RV8803_FLAG, 2, ctrl); + if (ret) { mutex_unlock(&rv8803->flags_lock); - return ret < 0 ? ret : -EIO; + return ret; } alarmvals[0] = bin2bcd(alrm->time.tm_min); @@ -251,8 +328,8 @@ static int rv8803_set_alarm(struct device *dev, struct rtc_wkalrm *alrm) if (rv8803->ctrl & (RV8803_CTRL_AIE | RV8803_CTRL_UIE)) { rv8803->ctrl &= ~(RV8803_CTRL_AIE | RV8803_CTRL_UIE); - err = i2c_smbus_write_byte_data(rv8803->client, RV8803_CTRL, - rv8803->ctrl); + err = rv8803_write_reg(rv8803->client, RV8803_CTRL, + rv8803->ctrl); if (err) { mutex_unlock(&rv8803->flags_lock); return err; @@ -260,13 +337,12 @@ static int rv8803_set_alarm(struct device *dev, struct rtc_wkalrm *alrm) } ctrl[1] &= ~RV8803_FLAG_AF; - err = i2c_smbus_write_byte_data(rv8803->client, RV8803_FLAG, ctrl[1]); + err = rv8803_write_reg(rv8803->client, RV8803_FLAG, ctrl[1]); mutex_unlock(&rv8803->flags_lock); if (err) return err; - err = i2c_smbus_write_i2c_block_data(rv8803->client, RV8803_ALARM_MIN, - 3, alarmvals); + err = rv8803_write_regs(rv8803->client, RV8803_ALARM_MIN, 3, alarmvals); if (err) return err; @@ -276,8 +352,8 @@ static int rv8803_set_alarm(struct device *dev, struct rtc_wkalrm *alrm) if (rv8803->rtc->aie_timer.enabled) rv8803->ctrl |= RV8803_CTRL_AIE; - err = i2c_smbus_write_byte_data(rv8803->client, RV8803_CTRL, - rv8803->ctrl); + err = rv8803_write_reg(rv8803->client, RV8803_CTRL, + rv8803->ctrl); if (err) return err; } @@ -306,21 +382,20 @@ static int rv8803_alarm_irq_enable(struct device *dev, unsigned int enabled) } mutex_lock(&rv8803->flags_lock); - flags = i2c_smbus_read_byte_data(client, RV8803_FLAG); + flags = rv8803_read_reg(client, RV8803_FLAG); if (flags < 0) { mutex_unlock(&rv8803->flags_lock); return flags; } flags &= ~(RV8803_FLAG_AF | RV8803_FLAG_UF); - err = i2c_smbus_write_byte_data(client, RV8803_FLAG, flags); + err = rv8803_write_reg(client, RV8803_FLAG, flags); mutex_unlock(&rv8803->flags_lock); if (err) return err; if (ctrl != rv8803->ctrl) { rv8803->ctrl = ctrl; - err = i2c_smbus_write_byte_data(client, RV8803_CTRL, - rv8803->ctrl); + err = rv8803_write_reg(client, RV8803_CTRL, rv8803->ctrl); if (err) return err; } @@ -336,7 +411,7 @@ static int rv8803_ioctl(struct device *dev, unsigned int cmd, unsigned long arg) switch (cmd) { case RTC_VL_READ: - flags = i2c_smbus_read_byte_data(client, RV8803_FLAG); + flags = rv8803_read_reg(client, RV8803_FLAG); if (flags < 0) return flags; @@ -355,16 +430,16 @@ static int rv8803_ioctl(struct device *dev, unsigned int cmd, unsigned long arg) case RTC_VL_CLR: mutex_lock(&rv8803->flags_lock); - flags = i2c_smbus_read_byte_data(client, RV8803_FLAG); + flags = rv8803_read_reg(client, RV8803_FLAG); if (flags < 0) { mutex_unlock(&rv8803->flags_lock); return flags; } flags &= ~(RV8803_FLAG_V1F | RV8803_FLAG_V2F); - ret = i2c_smbus_write_byte_data(client, RV8803_FLAG, flags); + ret = rv8803_write_reg(client, RV8803_FLAG, flags); mutex_unlock(&rv8803->flags_lock); - if (ret < 0) + if (ret) return ret; return 0; @@ -382,8 +457,8 @@ static ssize_t rv8803_nvram_write(struct file *filp, struct kobject *kobj, struct i2c_client *client = to_i2c_client(dev); int ret; - ret = i2c_smbus_write_byte_data(client, RV8803_RAM, buf[0]); - if (ret < 0) + ret = rv8803_write_reg(client, RV8803_RAM, buf[0]); + if (ret) return ret; return 1; @@ -397,7 +472,7 @@ static ssize_t rv8803_nvram_read(struct file *filp, struct kobject *kobj, struct i2c_client *client = to_i2c_client(dev); int ret; - ret = i2c_smbus_read_byte_data(client, RV8803_RAM); + ret = rv8803_read_reg(client, RV8803_RAM); if (ret < 0) return ret; @@ -427,7 +502,7 @@ static int rv8803_probe(struct i2c_client *client, { struct i2c_adapter *adapter = to_i2c_adapter(client->dev.parent); struct rv8803_data *rv8803; - int err, flags, try = 0; + int err, flags; if (!i2c_check_functionality(adapter, I2C_FUNC_SMBUS_BYTE_DATA | I2C_FUNC_SMBUS_I2C_BLOCK)) { @@ -444,16 +519,7 @@ static int rv8803_probe(struct i2c_client *client, rv8803->client = client; i2c_set_clientdata(client, rv8803); - /* - * There is a 60µs window where the RTC may not reply on the i2c bus in - * that case, the transfer is not ACKed. In that case, ensure there are - * multiple attempts. - */ - do { - flags = i2c_smbus_read_byte_data(client, RV8803_FLAG); - try++; - } while ((flags == -ENXIO) && (try < 3)); - + flags = rv8803_read_reg(client, RV8803_FLAG); if (flags < 0) return flags; @@ -488,12 +554,7 @@ static int rv8803_probe(struct i2c_client *client, return PTR_ERR(rv8803->rtc); } - try = 0; - do { - err = i2c_smbus_write_byte_data(rv8803->client, RV8803_EXT, - RV8803_EXT_WADA); - try++; - } while ((err == -ENXIO) && (try < 3)); + err = rv8803_write_reg(rv8803->client, RV8803_EXT, RV8803_EXT_WADA); if (err) return err; diff --git a/drivers/rtc/rtc-rx8010.c b/drivers/rtc/rtc-rx8010.c index 772d221ec2d9..7163b91bb773 100644 --- a/drivers/rtc/rtc-rx8010.c +++ b/drivers/rtc/rtc-rx8010.c @@ -272,15 +272,9 @@ static int rx8010_read_alarm(struct device *dev, struct rtc_wkalrm *t) t->time.tm_min = bcd2bin(alarmvals[0] & 0x7f); t->time.tm_hour = bcd2bin(alarmvals[1] & 0x3f); - if (alarmvals[2] & RX8010_ALARM_AE) - t->time.tm_mday = -1; - else + if (!(alarmvals[2] & RX8010_ALARM_AE)) t->time.tm_mday = bcd2bin(alarmvals[2] & 0x7f); - t->time.tm_wday = -1; - t->time.tm_mon = -1; - t->time.tm_year = -1; - t->enabled = !!(rx8010->ctrlreg & RX8010_CTRL_AIE); t->pending = (flagreg & RX8010_FLAG_AF) && t->enabled; diff --git a/drivers/rtc/rtc-rx8025.c b/drivers/rtc/rtc-rx8025.c index 9f105efbc546..2b85cc7a24e7 100644 --- a/drivers/rtc/rtc-rx8025.c +++ b/drivers/rtc/rtc-rx8025.c @@ -319,11 +319,6 @@ static int rx8025_read_alarm(struct device *dev, struct rtc_wkalrm *t) t->time.tm_hour = bcd2bin(ald[1] & 0x1f) % 12 + (ald[1] & 0x20 ? 12 : 0); - t->time.tm_wday = -1; - t->time.tm_mday = -1; - t->time.tm_mon = -1; - t->time.tm_year = -1; - dev_dbg(dev, "%s: date: %ds %dm %dh %dmd %dm %dy\n", __func__, t->time.tm_sec, t->time.tm_min, t->time.tm_hour, diff --git a/drivers/rtc/rtc-s35390a.c b/drivers/rtc/rtc-s35390a.c index f40afdd0e5f5..5dab4665ca3b 100644 --- a/drivers/rtc/rtc-s35390a.c +++ b/drivers/rtc/rtc-s35390a.c @@ -15,6 +15,7 @@ #include <linux/bitrev.h> #include <linux/bcd.h> #include <linux/slab.h> +#include <linux/delay.h> #define S35390A_CMD_STATUS1 0 #define S35390A_CMD_STATUS2 1 @@ -34,10 +35,14 @@ #define S35390A_ALRM_BYTE_HOURS 1 #define S35390A_ALRM_BYTE_MINS 2 +/* flags for STATUS1 */ #define S35390A_FLAG_POC 0x01 #define S35390A_FLAG_BLD 0x02 +#define S35390A_FLAG_INT2 0x04 #define S35390A_FLAG_24H 0x40 #define S35390A_FLAG_RESET 0x80 + +/* flag for STATUS2 */ #define S35390A_FLAG_TEST 0x01 #define S35390A_INT2_MODE_MASK 0xF0 @@ -94,19 +99,63 @@ static int s35390a_get_reg(struct s35390a *s35390a, int reg, char *buf, int len) return 0; } -static int s35390a_reset(struct s35390a *s35390a) +/* + * Returns <0 on error, 0 if rtc is setup fine and 1 if the chip was reset. + * To keep the information if an irq is pending, pass the value read from + * STATUS1 to the caller. + */ +static int s35390a_reset(struct s35390a *s35390a, char *status1) { - char buf[1]; - - if (s35390a_get_reg(s35390a, S35390A_CMD_STATUS1, buf, sizeof(buf)) < 0) - return -EIO; - - if (!(buf[0] & (S35390A_FLAG_POC | S35390A_FLAG_BLD))) + char buf; + int ret; + unsigned initcount = 0; + + ret = s35390a_get_reg(s35390a, S35390A_CMD_STATUS1, status1, 1); + if (ret < 0) + return ret; + + if (*status1 & S35390A_FLAG_POC) + /* + * Do not communicate for 0.5 seconds since the power-on + * detection circuit is in operation. + */ + msleep(500); + else if (!(*status1 & S35390A_FLAG_BLD)) + /* + * If both POC and BLD are unset everything is fine. + */ return 0; - buf[0] |= (S35390A_FLAG_RESET | S35390A_FLAG_24H); - buf[0] &= 0xf0; - return s35390a_set_reg(s35390a, S35390A_CMD_STATUS1, buf, sizeof(buf)); + /* + * At least one of POC and BLD are set, so reinitialise chip. Keeping + * this information in the hardware to know later that the time isn't + * valid is unfortunately not possible because POC and BLD are cleared + * on read. So the reset is best done now. + * + * The 24H bit is kept over reset, so set it already here. + */ +initialize: + *status1 = S35390A_FLAG_24H; + buf = S35390A_FLAG_RESET | S35390A_FLAG_24H; + ret = s35390a_set_reg(s35390a, S35390A_CMD_STATUS1, &buf, 1); + + if (ret < 0) + return ret; + + ret = s35390a_get_reg(s35390a, S35390A_CMD_STATUS1, &buf, 1); + if (ret < 0) + return ret; + + if (buf & (S35390A_FLAG_POC | S35390A_FLAG_BLD)) { + /* Try up to five times to reset the chip */ + if (initcount < 5) { + ++initcount; + goto initialize; + } else + return -EIO; + } + + return 1; } static int s35390a_disable_test_mode(struct s35390a *s35390a) @@ -217,12 +266,12 @@ static int s35390a_set_alarm(struct i2c_client *client, struct rtc_wkalrm *alm) alm->time.tm_min, alm->time.tm_hour, alm->time.tm_mday, alm->time.tm_mon, alm->time.tm_year, alm->time.tm_wday); - /* disable interrupt */ + /* disable interrupt (which deasserts the irq line) */ err = s35390a_set_reg(s35390a, S35390A_CMD_STATUS2, &sts, sizeof(sts)); if (err < 0) return err; - /* clear pending interrupt, if any */ + /* clear pending interrupt (in STATUS1 only), if any */ err = s35390a_get_reg(s35390a, S35390A_CMD_STATUS1, &sts, sizeof(sts)); if (err < 0) return err; @@ -242,6 +291,8 @@ static int s35390a_set_alarm(struct i2c_client *client, struct rtc_wkalrm *alm) if (alm->time.tm_wday != -1) buf[S35390A_ALRM_BYTE_WDAY] = bin2bcd(alm->time.tm_wday) | 0x80; + else + buf[S35390A_ALRM_BYTE_WDAY] = 0; buf[S35390A_ALRM_BYTE_HOURS] = s35390a_hr2reg(s35390a, alm->time.tm_hour) | 0x80; @@ -269,23 +320,43 @@ static int s35390a_read_alarm(struct i2c_client *client, struct rtc_wkalrm *alm) if (err < 0) return err; - if (bitrev8(sts) != S35390A_INT2_MODE_ALARM) - return -EINVAL; + if ((bitrev8(sts) & S35390A_INT2_MODE_MASK) != S35390A_INT2_MODE_ALARM) { + /* + * When the alarm isn't enabled, the register to configure + * the alarm time isn't accessible. + */ + alm->enabled = 0; + return 0; + } else { + alm->enabled = 1; + } err = s35390a_get_reg(s35390a, S35390A_CMD_INT2_REG1, buf, sizeof(buf)); if (err < 0) return err; /* This chip returns the bits of each byte in reverse order */ - for (i = 0; i < 3; ++i) { + for (i = 0; i < 3; ++i) buf[i] = bitrev8(buf[i]); - buf[i] &= ~0x80; - } - alm->time.tm_wday = bcd2bin(buf[S35390A_ALRM_BYTE_WDAY]); - alm->time.tm_hour = s35390a_reg2hr(s35390a, - buf[S35390A_ALRM_BYTE_HOURS]); - alm->time.tm_min = bcd2bin(buf[S35390A_ALRM_BYTE_MINS]); + /* + * B0 of the three matching registers is an enable flag. Iff it is set + * the configured value is used for matching. + */ + if (buf[S35390A_ALRM_BYTE_WDAY] & 0x80) + alm->time.tm_wday = + bcd2bin(buf[S35390A_ALRM_BYTE_WDAY] & ~0x80); + + if (buf[S35390A_ALRM_BYTE_HOURS] & 0x80) + alm->time.tm_hour = + s35390a_reg2hr(s35390a, + buf[S35390A_ALRM_BYTE_HOURS] & ~0x80); + + if (buf[S35390A_ALRM_BYTE_MINS] & 0x80) + alm->time.tm_min = bcd2bin(buf[S35390A_ALRM_BYTE_MINS] & ~0x80); + + /* alarm triggers always at s=0 */ + alm->time.tm_sec = 0; dev_dbg(&client->dev, "%s: alm is mins=%d, hours=%d, wday=%d\n", __func__, alm->time.tm_min, alm->time.tm_hour, @@ -327,11 +398,11 @@ static struct i2c_driver s35390a_driver; static int s35390a_probe(struct i2c_client *client, const struct i2c_device_id *id) { - int err; + int err, err_reset; unsigned int i; struct s35390a *s35390a; struct rtc_time tm; - char buf[1]; + char buf, status1; if (!i2c_check_functionality(client->adapter, I2C_FUNC_I2C)) { err = -ENODEV; @@ -360,29 +431,35 @@ static int s35390a_probe(struct i2c_client *client, } } - err = s35390a_reset(s35390a); - if (err < 0) { + err_reset = s35390a_reset(s35390a, &status1); + if (err_reset < 0) { + err = err_reset; dev_err(&client->dev, "error resetting chip\n"); goto exit_dummy; } - err = s35390a_disable_test_mode(s35390a); - if (err < 0) { - dev_err(&client->dev, "error disabling test mode\n"); - goto exit_dummy; - } - - err = s35390a_get_reg(s35390a, S35390A_CMD_STATUS1, buf, sizeof(buf)); - if (err < 0) { - dev_err(&client->dev, "error checking 12/24 hour mode\n"); - goto exit_dummy; - } - if (buf[0] & S35390A_FLAG_24H) + if (status1 & S35390A_FLAG_24H) s35390a->twentyfourhour = 1; else s35390a->twentyfourhour = 0; - if (s35390a_get_datetime(client, &tm) < 0) + if (status1 & S35390A_FLAG_INT2) { + /* disable alarm (and maybe test mode) */ + buf = 0; + err = s35390a_set_reg(s35390a, S35390A_CMD_STATUS2, &buf, 1); + if (err < 0) { + dev_err(&client->dev, "error disabling alarm"); + goto exit_dummy; + } + } else { + err = s35390a_disable_test_mode(s35390a); + if (err < 0) { + dev_err(&client->dev, "error disabling test mode\n"); + goto exit_dummy; + } + } + + if (err_reset > 0 || s35390a_get_datetime(client, &tm) < 0) dev_warn(&client->dev, "clock needs to be set\n"); device_set_wakeup_capable(&client->dev, 1); @@ -395,6 +472,10 @@ static int s35390a_probe(struct i2c_client *client, err = PTR_ERR(s35390a->rtc); goto exit_dummy; } + + if (status1 & S35390A_FLAG_INT2) + rtc_update_irq(s35390a->rtc, 1, RTC_AF); + return 0; exit_dummy: diff --git a/drivers/rtc/rtc-s3c.c b/drivers/rtc/rtc-s3c.c index d01ad7e8078e..d44fb34df8fe 100644 --- a/drivers/rtc/rtc-s3c.c +++ b/drivers/rtc/rtc-s3c.c @@ -149,12 +149,14 @@ static int s3c_rtc_setfreq(struct s3c_rtc *info, int freq) if (!is_power_of_2(freq)) return -EINVAL; + s3c_rtc_enable_clk(info); spin_lock_irq(&info->pie_lock); if (info->data->set_freq) info->data->set_freq(info, freq); spin_unlock_irq(&info->pie_lock); + s3c_rtc_disable_clk(info); return 0; } @@ -264,35 +266,23 @@ static int s3c_rtc_getalarm(struct device *dev, struct rtc_wkalrm *alrm) /* decode the alarm enable field */ if (alm_en & S3C2410_RTCALM_SECEN) alm_tm->tm_sec = bcd2bin(alm_tm->tm_sec); - else - alm_tm->tm_sec = -1; if (alm_en & S3C2410_RTCALM_MINEN) alm_tm->tm_min = bcd2bin(alm_tm->tm_min); - else - alm_tm->tm_min = -1; if (alm_en & S3C2410_RTCALM_HOUREN) alm_tm->tm_hour = bcd2bin(alm_tm->tm_hour); - else - alm_tm->tm_hour = -1; if (alm_en & S3C2410_RTCALM_DAYEN) alm_tm->tm_mday = bcd2bin(alm_tm->tm_mday); - else - alm_tm->tm_mday = -1; if (alm_en & S3C2410_RTCALM_MONEN) { alm_tm->tm_mon = bcd2bin(alm_tm->tm_mon); alm_tm->tm_mon -= 1; - } else { - alm_tm->tm_mon = -1; } if (alm_en & S3C2410_RTCALM_YEAREN) alm_tm->tm_year = bcd2bin(alm_tm->tm_year); - else - alm_tm->tm_year = -1; return 0; } @@ -577,8 +567,6 @@ static int s3c_rtc_probe(struct platform_device *pdev) s3c_rtc_setfreq(info, 1); - s3c_rtc_disable_clk(info); - return 0; err_nortc: diff --git a/drivers/rtc/rtc-sh.c b/drivers/rtc/rtc-sh.c index a45845a571e5..17b6235d67a5 100644 --- a/drivers/rtc/rtc-sh.c +++ b/drivers/rtc/rtc-sh.c @@ -481,7 +481,6 @@ static int sh_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *wkalrm) tm->tm_mon = sh_rtc_read_alarm_value(rtc, RMONAR); if (tm->tm_mon > 0) tm->tm_mon -= 1; /* RTC is 1-12, tm_mon is 0-11 */ - tm->tm_year = 0xffff; wkalrm->enabled = (readb(rtc->regbase + RCR1) & RCR1_AIE) ? 1 : 0; @@ -500,52 +499,13 @@ static inline void sh_rtc_write_alarm_value(struct sh_rtc *rtc, writeb(bin2bcd(value) | AR_ENB, rtc->regbase + reg_off); } -static int sh_rtc_check_alarm(struct rtc_time *tm) -{ - /* - * The original rtc says anything > 0xc0 is "don't care" or "match - * all" - most users use 0xff but rtc-dev uses -1 for the same thing. - * The original rtc doesn't support years - some things use -1 and - * some 0xffff. We use -1 to make out tests easier. - */ - if (tm->tm_year == 0xffff) - tm->tm_year = -1; - if (tm->tm_mon >= 0xff) - tm->tm_mon = -1; - if (tm->tm_mday >= 0xff) - tm->tm_mday = -1; - if (tm->tm_wday >= 0xff) - tm->tm_wday = -1; - if (tm->tm_hour >= 0xff) - tm->tm_hour = -1; - if (tm->tm_min >= 0xff) - tm->tm_min = -1; - if (tm->tm_sec >= 0xff) - tm->tm_sec = -1; - - if (tm->tm_year > 9999 || - tm->tm_mon >= 12 || - tm->tm_mday == 0 || tm->tm_mday >= 32 || - tm->tm_wday >= 7 || - tm->tm_hour >= 24 || - tm->tm_min >= 60 || - tm->tm_sec >= 60) - return -EINVAL; - - return 0; -} - static int sh_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *wkalrm) { struct platform_device *pdev = to_platform_device(dev); struct sh_rtc *rtc = platform_get_drvdata(pdev); unsigned int rcr1; struct rtc_time *tm = &wkalrm->time; - int mon, err; - - err = sh_rtc_check_alarm(tm); - if (unlikely(err < 0)) - return err; + int mon; spin_lock_irq(&rtc->lock); diff --git a/drivers/rtc/rtc-tegra.c b/drivers/rtc/rtc-tegra.c index 60232bd366ef..15ac597d54da 100644 --- a/drivers/rtc/rtc-tegra.c +++ b/drivers/rtc/rtc-tegra.c @@ -179,12 +179,6 @@ static int tegra_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alarm) if (sec == 0) { /* alarm is disabled. */ alarm->enabled = 0; - alarm->time.tm_mon = -1; - alarm->time.tm_mday = -1; - alarm->time.tm_year = -1; - alarm->time.tm_hour = -1; - alarm->time.tm_min = -1; - alarm->time.tm_sec = -1; } else { /* alarm is enabled. */ alarm->enabled = 1; diff --git a/drivers/rtc/rtc-v3020.c b/drivers/rtc/rtc-v3020.c index 7a0436329d6c..1f3117b5a83c 100644 --- a/drivers/rtc/rtc-v3020.c +++ b/drivers/rtc/rtc-v3020.c @@ -25,7 +25,7 @@ #include <linux/rtc.h> #include <linux/types.h> #include <linux/bcd.h> -#include <linux/rtc-v3020.h> +#include <linux/platform_data/rtc-v3020.h> #include <linux/delay.h> #include <linux/gpio.h> #include <linux/slab.h> diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c index 8973d34ce5ba..fb1b56a71475 100644 --- a/drivers/s390/block/dasd.c +++ b/drivers/s390/block/dasd.c @@ -1643,9 +1643,18 @@ void dasd_int_handler(struct ccw_device *cdev, unsigned long intparm, u8 *sense = NULL; int expires; + cqr = (struct dasd_ccw_req *) intparm; if (IS_ERR(irb)) { switch (PTR_ERR(irb)) { case -EIO: + if (cqr && cqr->status == DASD_CQR_CLEAR_PENDING) { + device = (struct dasd_device *) cqr->startdev; + cqr->status = DASD_CQR_CLEARED; + dasd_device_clear_timer(device); + wake_up(&dasd_flush_wq); + dasd_schedule_device_bh(device); + return; + } break; case -ETIMEDOUT: DBF_EVENT_DEVID(DBF_WARNING, cdev, "%s: " @@ -1661,7 +1670,6 @@ void dasd_int_handler(struct ccw_device *cdev, unsigned long intparm, } now = get_tod_clock(); - cqr = (struct dasd_ccw_req *) intparm; /* check for conditions that should be handled immediately */ if (!cqr || !(scsw_dstat(&irb->scsw) == (DEV_STAT_CHN_END | DEV_STAT_DEV_END) && diff --git a/drivers/s390/block/dasd_eckd.c b/drivers/s390/block/dasd_eckd.c index fd2eff440098..98bbec44bcd0 100644 --- a/drivers/s390/block/dasd_eckd.c +++ b/drivers/s390/block/dasd_eckd.c @@ -5078,6 +5078,8 @@ static int dasd_eckd_read_message_buffer(struct dasd_device *device, return PTR_ERR(cqr); } + cqr->lpm = lpum; +retry: cqr->startdev = device; cqr->memdev = device; cqr->block = NULL; @@ -5122,6 +5124,14 @@ static int dasd_eckd_read_message_buffer(struct dasd_device *device, (prssdp + 1); memcpy(messages, message_buf, sizeof(struct dasd_rssd_messages)); + } else if (cqr->lpm) { + /* + * on z/VM we might not be able to do I/O on the requested path + * but instead we get the required information on any path + * so retry with open path mask + */ + cqr->lpm = 0; + goto retry; } else DBF_EVENT_DEVID(DBF_WARNING, device->cdev, "Reading messages failed with rc=%d\n" diff --git a/drivers/s390/cio/device.c b/drivers/s390/cio/device.c index 7ada078ffdd0..6a58bc8f46e2 100644 --- a/drivers/s390/cio/device.c +++ b/drivers/s390/cio/device.c @@ -762,7 +762,6 @@ static int io_subchannel_initialize_dev(struct subchannel *sch, priv->state = DEV_STATE_NOT_OPER; priv->dev_id.devno = sch->schib.pmcw.dev; priv->dev_id.ssid = sch->schid.ssid; - priv->schid = sch->schid; INIT_WORK(&priv->todo_work, ccw_device_todo); INIT_LIST_HEAD(&priv->cmb_list); @@ -1000,7 +999,6 @@ static int ccw_device_move_to_sch(struct ccw_device *cdev, put_device(&old_sch->dev); /* Initialize new subchannel. */ spin_lock_irq(sch->lock); - cdev->private->schid = sch->schid; cdev->ccwlock = sch->lock; if (!sch_is_pseudo_sch(sch)) sch_set_cdev(sch, cdev); diff --git a/drivers/s390/cio/device_status.c b/drivers/s390/cio/device_status.c index 15b56a15db15..9bc3512374c9 100644 --- a/drivers/s390/cio/device_status.c +++ b/drivers/s390/cio/device_status.c @@ -26,6 +26,7 @@ static void ccw_device_msg_control_check(struct ccw_device *cdev, struct irb *irb) { + struct subchannel *sch = to_subchannel(cdev->dev.parent); char dbf_text[15]; if (!scsw_is_valid_cstat(&irb->scsw) || @@ -36,10 +37,10 @@ ccw_device_msg_control_check(struct ccw_device *cdev, struct irb *irb) "received" " ... device %04x on subchannel 0.%x.%04x, dev_stat " ": %02X sch_stat : %02X\n", - cdev->private->dev_id.devno, cdev->private->schid.ssid, - cdev->private->schid.sch_no, + cdev->private->dev_id.devno, sch->schid.ssid, + sch->schid.sch_no, scsw_dstat(&irb->scsw), scsw_cstat(&irb->scsw)); - sprintf(dbf_text, "chk%x", cdev->private->schid.sch_no); + sprintf(dbf_text, "chk%x", sch->schid.sch_no); CIO_TRACE_EVENT(0, dbf_text); CIO_HEX_EVENT(0, irb, sizeof(struct irb)); } diff --git a/drivers/s390/cio/io_sch.h b/drivers/s390/cio/io_sch.h index 8975060af96c..220f49145b2f 100644 --- a/drivers/s390/cio/io_sch.h +++ b/drivers/s390/cio/io_sch.h @@ -120,7 +120,6 @@ struct ccw_device_private { int state; /* device state */ atomic_t onoff; struct ccw_dev_id dev_id; /* device id */ - struct subchannel_id schid; /* subchannel number */ struct ccw_request req; /* internal I/O request */ int iretry; u8 pgid_valid_mask; /* mask of valid PGIDs */ diff --git a/drivers/s390/cio/qdio_main.c b/drivers/s390/cio/qdio_main.c index 4bb5262f7aee..71bf9bded485 100644 --- a/drivers/s390/cio/qdio_main.c +++ b/drivers/s390/cio/qdio_main.c @@ -686,6 +686,15 @@ static void qdio_kick_handler(struct qdio_q *q) q->qdio_error = 0; } +static inline int qdio_tasklet_schedule(struct qdio_q *q) +{ + if (likely(q->irq_ptr->state == QDIO_IRQ_STATE_ACTIVE)) { + tasklet_schedule(&q->tasklet); + return 0; + } + return -EPERM; +} + static void __qdio_inbound_processing(struct qdio_q *q) { qperf_inc(q, tasklet_inbound); @@ -698,10 +707,8 @@ static void __qdio_inbound_processing(struct qdio_q *q) if (!qdio_inbound_q_done(q)) { /* means poll time is not yet over */ qperf_inc(q, tasklet_inbound_resched); - if (likely(q->irq_ptr->state != QDIO_IRQ_STATE_STOPPED)) { - tasklet_schedule(&q->tasklet); + if (!qdio_tasklet_schedule(q)) return; - } } qdio_stop_polling(q); @@ -711,8 +718,7 @@ static void __qdio_inbound_processing(struct qdio_q *q) */ if (!qdio_inbound_q_done(q)) { qperf_inc(q, tasklet_inbound_resched2); - if (likely(q->irq_ptr->state != QDIO_IRQ_STATE_STOPPED)) - tasklet_schedule(&q->tasklet); + qdio_tasklet_schedule(q); } } @@ -869,16 +875,15 @@ static void __qdio_outbound_processing(struct qdio_q *q) * is noticed and outbound_handler is called after some time. */ if (qdio_outbound_q_done(q)) - del_timer(&q->u.out.timer); + del_timer_sync(&q->u.out.timer); else - if (!timer_pending(&q->u.out.timer)) + if (!timer_pending(&q->u.out.timer) && + likely(q->irq_ptr->state == QDIO_IRQ_STATE_ACTIVE)) mod_timer(&q->u.out.timer, jiffies + 10 * HZ); return; sched: - if (unlikely(q->irq_ptr->state == QDIO_IRQ_STATE_STOPPED)) - return; - tasklet_schedule(&q->tasklet); + qdio_tasklet_schedule(q); } /* outbound tasklet */ @@ -892,9 +897,7 @@ void qdio_outbound_timer(unsigned long data) { struct qdio_q *q = (struct qdio_q *)data; - if (unlikely(q->irq_ptr->state == QDIO_IRQ_STATE_STOPPED)) - return; - tasklet_schedule(&q->tasklet); + qdio_tasklet_schedule(q); } static inline void qdio_check_outbound_after_thinint(struct qdio_q *q) @@ -907,7 +910,7 @@ static inline void qdio_check_outbound_after_thinint(struct qdio_q *q) for_each_output_queue(q->irq_ptr, out, i) if (!qdio_outbound_q_done(out)) - tasklet_schedule(&out->tasklet); + qdio_tasklet_schedule(out); } static void __tiqdio_inbound_processing(struct qdio_q *q) @@ -929,10 +932,8 @@ static void __tiqdio_inbound_processing(struct qdio_q *q) if (!qdio_inbound_q_done(q)) { qperf_inc(q, tasklet_inbound_resched); - if (likely(q->irq_ptr->state != QDIO_IRQ_STATE_STOPPED)) { - tasklet_schedule(&q->tasklet); + if (!qdio_tasklet_schedule(q)) return; - } } qdio_stop_polling(q); @@ -942,8 +943,7 @@ static void __tiqdio_inbound_processing(struct qdio_q *q) */ if (!qdio_inbound_q_done(q)) { qperf_inc(q, tasklet_inbound_resched2); - if (likely(q->irq_ptr->state != QDIO_IRQ_STATE_STOPPED)) - tasklet_schedule(&q->tasklet); + qdio_tasklet_schedule(q); } } @@ -977,7 +977,7 @@ static void qdio_int_handler_pci(struct qdio_irq *irq_ptr) int i; struct qdio_q *q; - if (unlikely(irq_ptr->state == QDIO_IRQ_STATE_STOPPED)) + if (unlikely(irq_ptr->state != QDIO_IRQ_STATE_ACTIVE)) return; for_each_input_queue(irq_ptr, q, i) { @@ -1003,7 +1003,7 @@ static void qdio_int_handler_pci(struct qdio_irq *irq_ptr) continue; if (need_siga_sync(q) && need_siga_sync_out_after_pci(q)) qdio_siga_sync_q(q); - tasklet_schedule(&q->tasklet); + qdio_tasklet_schedule(q); } } @@ -1066,10 +1066,12 @@ void qdio_int_handler(struct ccw_device *cdev, unsigned long intparm, struct irb *irb) { struct qdio_irq *irq_ptr = cdev->private->qdio_data; + struct subchannel_id schid; int cstat, dstat; if (!intparm || !irq_ptr) { - DBF_ERROR("qint:%4x", cdev->private->schid.sch_no); + ccw_device_get_schid(cdev, &schid); + DBF_ERROR("qint:%4x", schid.sch_no); return; } @@ -1122,12 +1124,14 @@ void qdio_int_handler(struct ccw_device *cdev, unsigned long intparm, int qdio_get_ssqd_desc(struct ccw_device *cdev, struct qdio_ssqd_desc *data) { + struct subchannel_id schid; if (!cdev || !cdev->private) return -EINVAL; - DBF_EVENT("get ssqd:%4x", cdev->private->schid.sch_no); - return qdio_setup_get_ssqd(NULL, &cdev->private->schid, data); + ccw_device_get_schid(cdev, &schid); + DBF_EVENT("get ssqd:%4x", schid.sch_no); + return qdio_setup_get_ssqd(NULL, &schid, data); } EXPORT_SYMBOL_GPL(qdio_get_ssqd_desc); @@ -1141,7 +1145,7 @@ static void qdio_shutdown_queues(struct ccw_device *cdev) tasklet_kill(&q->tasklet); for_each_output_queue(irq_ptr, q, i) { - del_timer(&q->u.out.timer); + del_timer_sync(&q->u.out.timer); tasklet_kill(&q->tasklet); } } @@ -1154,14 +1158,15 @@ static void qdio_shutdown_queues(struct ccw_device *cdev) int qdio_shutdown(struct ccw_device *cdev, int how) { struct qdio_irq *irq_ptr = cdev->private->qdio_data; + struct subchannel_id schid; int rc; - unsigned long flags; if (!irq_ptr) return -ENODEV; WARN_ON_ONCE(irqs_disabled()); - DBF_EVENT("qshutdown:%4x", cdev->private->schid.sch_no); + ccw_device_get_schid(cdev, &schid); + DBF_EVENT("qshutdown:%4x", schid.sch_no); mutex_lock(&irq_ptr->setup_mutex); /* @@ -1184,7 +1189,7 @@ int qdio_shutdown(struct ccw_device *cdev, int how) qdio_shutdown_debug_entries(irq_ptr); /* cleanup subchannel */ - spin_lock_irqsave(get_ccwdev_lock(cdev), flags); + spin_lock_irq(get_ccwdev_lock(cdev)); if (how & QDIO_FLAG_CLEANUP_USING_CLEAR) rc = ccw_device_clear(cdev, QDIO_DOING_CLEANUP); @@ -1198,12 +1203,12 @@ int qdio_shutdown(struct ccw_device *cdev, int how) } qdio_set_state(irq_ptr, QDIO_IRQ_STATE_CLEANUP); - spin_unlock_irqrestore(get_ccwdev_lock(cdev), flags); + spin_unlock_irq(get_ccwdev_lock(cdev)); wait_event_interruptible_timeout(cdev->private->wait_q, irq_ptr->state == QDIO_IRQ_STATE_INACTIVE || irq_ptr->state == QDIO_IRQ_STATE_ERR, 10 * HZ); - spin_lock_irqsave(get_ccwdev_lock(cdev), flags); + spin_lock_irq(get_ccwdev_lock(cdev)); no_cleanup: qdio_shutdown_thinint(irq_ptr); @@ -1211,7 +1216,7 @@ no_cleanup: /* restore interrupt handler */ if ((void *)cdev->handler == (void *)qdio_int_handler) cdev->handler = irq_ptr->orig_handler; - spin_unlock_irqrestore(get_ccwdev_lock(cdev), flags); + spin_unlock_irq(get_ccwdev_lock(cdev)); qdio_set_state(irq_ptr, QDIO_IRQ_STATE_INACTIVE); mutex_unlock(&irq_ptr->setup_mutex); @@ -1228,11 +1233,13 @@ EXPORT_SYMBOL_GPL(qdio_shutdown); int qdio_free(struct ccw_device *cdev) { struct qdio_irq *irq_ptr = cdev->private->qdio_data; + struct subchannel_id schid; if (!irq_ptr) return -ENODEV; - DBF_EVENT("qfree:%4x", cdev->private->schid.sch_no); + ccw_device_get_schid(cdev, &schid); + DBF_EVENT("qfree:%4x", schid.sch_no); DBF_DEV_EVENT(DBF_ERR, irq_ptr, "dbf abandoned"); mutex_lock(&irq_ptr->setup_mutex); @@ -1251,9 +1258,11 @@ EXPORT_SYMBOL_GPL(qdio_free); */ int qdio_allocate(struct qdio_initialize *init_data) { + struct subchannel_id schid; struct qdio_irq *irq_ptr; - DBF_EVENT("qallocate:%4x", init_data->cdev->private->schid.sch_no); + ccw_device_get_schid(init_data->cdev, &schid); + DBF_EVENT("qallocate:%4x", schid.sch_no); if ((init_data->no_input_qs && !init_data->input_handler) || (init_data->no_output_qs && !init_data->output_handler)) @@ -1331,20 +1340,18 @@ static void qdio_detect_hsicq(struct qdio_irq *irq_ptr) */ int qdio_establish(struct qdio_initialize *init_data) { - struct qdio_irq *irq_ptr; struct ccw_device *cdev = init_data->cdev; - unsigned long saveflags; + struct subchannel_id schid; + struct qdio_irq *irq_ptr; int rc; - DBF_EVENT("qestablish:%4x", cdev->private->schid.sch_no); + ccw_device_get_schid(cdev, &schid); + DBF_EVENT("qestablish:%4x", schid.sch_no); irq_ptr = cdev->private->qdio_data; if (!irq_ptr) return -ENODEV; - if (cdev->private->state != DEV_STATE_ONLINE) - return -EINVAL; - mutex_lock(&irq_ptr->setup_mutex); qdio_setup_irq(init_data); @@ -1361,17 +1368,14 @@ int qdio_establish(struct qdio_initialize *init_data) irq_ptr->ccw.count = irq_ptr->equeue.count; irq_ptr->ccw.cda = (u32)((addr_t)irq_ptr->qdr); - spin_lock_irqsave(get_ccwdev_lock(cdev), saveflags); + spin_lock_irq(get_ccwdev_lock(cdev)); ccw_device_set_options_mask(cdev, 0); rc = ccw_device_start(cdev, &irq_ptr->ccw, QDIO_DOING_ESTABLISH, 0, 0); + spin_unlock_irq(get_ccwdev_lock(cdev)); if (rc) { DBF_ERROR("%4x est IO ERR", irq_ptr->schid.sch_no); DBF_ERROR("rc:%4x", rc); - } - spin_unlock_irqrestore(get_ccwdev_lock(cdev), saveflags); - - if (rc) { mutex_unlock(&irq_ptr->setup_mutex); qdio_shutdown(cdev, QDIO_FLAG_CLEANUP_USING_CLEAR); return rc; @@ -1407,19 +1411,17 @@ EXPORT_SYMBOL_GPL(qdio_establish); */ int qdio_activate(struct ccw_device *cdev) { + struct subchannel_id schid; struct qdio_irq *irq_ptr; int rc; - unsigned long saveflags; - DBF_EVENT("qactivate:%4x", cdev->private->schid.sch_no); + ccw_device_get_schid(cdev, &schid); + DBF_EVENT("qactivate:%4x", schid.sch_no); irq_ptr = cdev->private->qdio_data; if (!irq_ptr) return -ENODEV; - if (cdev->private->state != DEV_STATE_ONLINE) - return -EINVAL; - mutex_lock(&irq_ptr->setup_mutex); if (irq_ptr->state == QDIO_IRQ_STATE_INACTIVE) { rc = -EBUSY; @@ -1431,19 +1433,17 @@ int qdio_activate(struct ccw_device *cdev) irq_ptr->ccw.count = irq_ptr->aqueue.count; irq_ptr->ccw.cda = 0; - spin_lock_irqsave(get_ccwdev_lock(cdev), saveflags); + spin_lock_irq(get_ccwdev_lock(cdev)); ccw_device_set_options(cdev, CCWDEV_REPORT_ALL); rc = ccw_device_start(cdev, &irq_ptr->ccw, QDIO_DOING_ACTIVATE, 0, DOIO_DENY_PREFETCH); + spin_unlock_irq(get_ccwdev_lock(cdev)); if (rc) { DBF_ERROR("%4x act IO ERR", irq_ptr->schid.sch_no); DBF_ERROR("rc:%4x", rc); - } - spin_unlock_irqrestore(get_ccwdev_lock(cdev), saveflags); - - if (rc) goto out; + } if (is_thinint_irq(irq_ptr)) tiqdio_add_input_queues(irq_ptr); @@ -1585,10 +1585,11 @@ static int handle_outbound(struct qdio_q *q, unsigned int callflags, /* in case of SIGA errors we must process the error immediately */ if (used >= q->u.out.scan_threshold || rc) - tasklet_schedule(&q->tasklet); + qdio_tasklet_schedule(q); else /* free the SBALs in case of no further traffic */ - if (!timer_pending(&q->u.out.timer)) + if (!timer_pending(&q->u.out.timer) && + likely(q->irq_ptr->state == QDIO_IRQ_STATE_ACTIVE)) mod_timer(&q->u.out.timer, jiffies + HZ); return rc; } diff --git a/drivers/s390/virtio/Makefile b/drivers/s390/virtio/Makefile index 241891a57caf..df40692a9011 100644 --- a/drivers/s390/virtio/Makefile +++ b/drivers/s390/virtio/Makefile @@ -6,4 +6,8 @@ # it under the terms of the GNU General Public License (version 2 only) # as published by the Free Software Foundation. -obj-$(CONFIG_S390_GUEST) += kvm_virtio.o virtio_ccw.o +s390-virtio-objs := virtio_ccw.o +ifdef CONFIG_S390_GUEST_OLD_TRANSPORT +s390-virtio-objs += kvm_virtio.o +endif +obj-$(CONFIG_S390_GUEST) += $(s390-virtio-objs) diff --git a/drivers/s390/virtio/kvm_virtio.c b/drivers/s390/virtio/kvm_virtio.c index 1d060fd293a3..5e5c11f37b24 100644 --- a/drivers/s390/virtio/kvm_virtio.c +++ b/drivers/s390/virtio/kvm_virtio.c @@ -458,6 +458,8 @@ static int __init kvm_devices_init(void) if (test_devices_support(total_memory_size) < 0) return -ENODEV; + pr_warn("The s390-virtio transport is deprecated. Please switch to a modern host providing virtio-ccw.\n"); + rc = vmem_add_mapping(total_memory_size, PAGE_SIZE); if (rc) return rc; @@ -482,7 +484,7 @@ static int __init kvm_devices_init(void) } /* code for early console output with virtio_console */ -static __init int early_put_chars(u32 vtermno, const char *buf, int count) +static int early_put_chars(u32 vtermno, const char *buf, int count) { char scratch[17]; unsigned int len = count; diff --git a/drivers/scsi/Kconfig b/drivers/scsi/Kconfig index 1918f5483b23..7d1b4317eccc 100644 --- a/drivers/scsi/Kconfig +++ b/drivers/scsi/Kconfig @@ -838,6 +838,23 @@ config SCSI_IBMVSCSI To compile this driver as a module, choose M here: the module will be called ibmvscsi. +config SCSI_IBMVSCSIS + tristate "IBM Virtual SCSI Server support" + depends on PPC_PSERIES && TARGET_CORE && SCSI && PCI + help + This is the IBM POWER Virtual SCSI Target Server + This driver uses the SRP protocol for communication betwen servers + guest and/or the host that run on the same server. + More information on VSCSI protocol can be found at www.power.org + + The userspace configuration needed to initialize the driver can be + be found here: + + https://github.com/powervm/ibmvscsis/wiki/Configuration + + To compile this driver as a module, choose M here: the + module will be called ibmvscsis. + config SCSI_IBMVFC tristate "IBM Virtual FC support" depends on PPC_PSERIES && SCSI diff --git a/drivers/scsi/Makefile b/drivers/scsi/Makefile index 862ab4efad61..d5397987e731 100644 --- a/drivers/scsi/Makefile +++ b/drivers/scsi/Makefile @@ -128,6 +128,7 @@ obj-$(CONFIG_SCSI_SNI_53C710) += 53c700.o sni_53c710.o obj-$(CONFIG_SCSI_NSP32) += nsp32.o obj-$(CONFIG_SCSI_IPR) += ipr.o obj-$(CONFIG_SCSI_IBMVSCSI) += ibmvscsi/ +obj-$(CONFIG_SCSI_IBMVSCSIS) += ibmvscsi_tgt/ obj-$(CONFIG_SCSI_IBMVFC) += ibmvscsi/ obj-$(CONFIG_SCSI_HPTIOP) += hptiop.o obj-$(CONFIG_SCSI_STEX) += stex.o diff --git a/drivers/scsi/cxlflash/main.c b/drivers/scsi/cxlflash/main.c index 860008d42466..661bb94e2548 100644 --- a/drivers/scsi/cxlflash/main.c +++ b/drivers/scsi/cxlflash/main.c @@ -778,7 +778,7 @@ static void notify_shutdown(struct cxlflash_cfg *cfg, bool wait) { struct afu *afu = cfg->afu; struct device *dev = &cfg->dev->dev; - struct sisl_global_map __iomem *global = &afu->afu_map->global; + struct sisl_global_map __iomem *global; struct dev_dependent_vals *ddv; u64 reg, status; int i, retry_cnt = 0; @@ -787,6 +787,14 @@ static void notify_shutdown(struct cxlflash_cfg *cfg, bool wait) if (!(ddv->flags & CXLFLASH_NOTIFY_SHUTDOWN)) return; + if (!afu || !afu->afu_map) { + dev_dbg(dev, "%s: The problem state area is not mapped\n", + __func__); + return; + } + + global = &afu->afu_map->global; + /* Notify AFU */ for (i = 0; i < NUM_FC_PORTS; i++) { reg = readq_be(&global->fc_regs[i][FC_CONFIG2 / 8]); diff --git a/drivers/scsi/fcoe/fcoe.c b/drivers/scsi/fcoe/fcoe.c index c8a4305c7662..9bd41a35a78a 100644 --- a/drivers/scsi/fcoe/fcoe.c +++ b/drivers/scsi/fcoe/fcoe.c @@ -92,6 +92,8 @@ static struct fcoe_interface static int fcoe_fip_recv(struct sk_buff *, struct net_device *, struct packet_type *, struct net_device *); +static int fcoe_fip_vlan_recv(struct sk_buff *, struct net_device *, + struct packet_type *, struct net_device *); static void fcoe_fip_send(struct fcoe_ctlr *, struct sk_buff *); static void fcoe_update_src_mac(struct fc_lport *, u8 *); @@ -363,6 +365,12 @@ static int fcoe_interface_setup(struct fcoe_interface *fcoe, fcoe->fip_packet_type.dev = netdev; dev_add_pack(&fcoe->fip_packet_type); + if (netdev != real_dev) { + fcoe->fip_vlan_packet_type.func = fcoe_fip_vlan_recv; + fcoe->fip_vlan_packet_type.type = htons(ETH_P_FIP); + fcoe->fip_vlan_packet_type.dev = real_dev; + dev_add_pack(&fcoe->fip_vlan_packet_type); + } return 0; } @@ -450,6 +458,8 @@ static void fcoe_interface_remove(struct fcoe_interface *fcoe) */ __dev_remove_pack(&fcoe->fcoe_packet_type); __dev_remove_pack(&fcoe->fip_packet_type); + if (netdev != fcoe->realdev) + __dev_remove_pack(&fcoe->fip_vlan_packet_type); synchronize_net(); /* Delete secondary MAC addresses */ @@ -520,6 +530,29 @@ static int fcoe_fip_recv(struct sk_buff *skb, struct net_device *netdev, } /** + * fcoe_fip_vlan_recv() - Handler for received FIP VLAN discovery frames + * @skb: The receive skb + * @netdev: The associated net device + * @ptype: The packet_type structure which was used to register this handler + * @orig_dev: The original net_device the the skb was received on. + * (in case dev is a bond) + * + * Returns: 0 for success + */ +static int fcoe_fip_vlan_recv(struct sk_buff *skb, struct net_device *netdev, + struct packet_type *ptype, + struct net_device *orig_dev) +{ + struct fcoe_interface *fcoe; + struct fcoe_ctlr *ctlr; + + fcoe = container_of(ptype, struct fcoe_interface, fip_vlan_packet_type); + ctlr = fcoe_to_ctlr(fcoe); + fcoe_ctlr_recv(ctlr, skb); + return 0; +} + +/** * fcoe_port_send() - Send an Ethernet-encapsulated FIP/FCoE frame * @port: The FCoE port * @skb: The FIP/FCoE packet to be sent @@ -539,7 +572,21 @@ static void fcoe_port_send(struct fcoe_port *port, struct sk_buff *skb) */ static void fcoe_fip_send(struct fcoe_ctlr *fip, struct sk_buff *skb) { - skb->dev = fcoe_from_ctlr(fip)->netdev; + struct fcoe_interface *fcoe = fcoe_from_ctlr(fip); + struct fip_frame { + struct ethhdr eth; + struct fip_header fip; + } __packed *frame; + + /* + * Use default VLAN for FIP VLAN discovery protocol + */ + frame = (struct fip_frame *)skb->data; + if (frame->fip.fip_op == ntohs(FIP_OP_VLAN) && + fcoe->realdev != fcoe->netdev) + skb->dev = fcoe->realdev; + else + skb->dev = fcoe->netdev; fcoe_port_send(lport_priv(fip->lp), skb); } @@ -2448,7 +2495,7 @@ static int __init fcoe_init(void) if (rc) { printk(KERN_ERR "failed to register an fcoe transport, check " "if libfcoe is loaded\n"); - return rc; + goto out_destroy; } mutex_lock(&fcoe_config_mutex); @@ -2471,6 +2518,7 @@ static int __init fcoe_init(void) out_free: mutex_unlock(&fcoe_config_mutex); +out_destroy: destroy_workqueue(fcoe_wq); return rc; } diff --git a/drivers/scsi/fcoe/fcoe.h b/drivers/scsi/fcoe/fcoe.h index 2b53672bf932..6aa4820f6cc0 100644 --- a/drivers/scsi/fcoe/fcoe.h +++ b/drivers/scsi/fcoe/fcoe.h @@ -80,6 +80,7 @@ struct fcoe_interface { struct net_device *realdev; struct packet_type fcoe_packet_type; struct packet_type fip_packet_type; + struct packet_type fip_vlan_packet_type; struct fc_exch_mgr *oem; u8 removed; u8 priority; diff --git a/drivers/scsi/ibmvscsi/ibmvfc.h b/drivers/scsi/ibmvscsi/ibmvfc.h index 8fae03215a85..5c70a52ad346 100644 --- a/drivers/scsi/ibmvscsi/ibmvfc.h +++ b/drivers/scsi/ibmvscsi/ibmvfc.h @@ -26,7 +26,7 @@ #include <linux/list.h> #include <linux/types.h> -#include "viosrp.h" +#include <scsi/viosrp.h> #define IBMVFC_NAME "ibmvfc" #define IBMVFC_DRIVER_VERSION "1.0.11" diff --git a/drivers/scsi/ibmvscsi/ibmvscsi.h b/drivers/scsi/ibmvscsi/ibmvscsi.h index 1067367395cd..e0f6c3aeb4ee 100644 --- a/drivers/scsi/ibmvscsi/ibmvscsi.h +++ b/drivers/scsi/ibmvscsi/ibmvscsi.h @@ -33,7 +33,7 @@ #include <linux/list.h> #include <linux/completion.h> #include <linux/interrupt.h> -#include "viosrp.h" +#include <scsi/viosrp.h> struct scsi_cmnd; struct Scsi_Host; diff --git a/drivers/scsi/ibmvscsi/viosrp.h b/drivers/scsi/ibmvscsi/viosrp.h deleted file mode 100644 index c1ab8a4c3161..000000000000 --- a/drivers/scsi/ibmvscsi/viosrp.h +++ /dev/null @@ -1,225 +0,0 @@ -/*****************************************************************************/ -/* srp.h -- SCSI RDMA Protocol definitions */ -/* */ -/* Written By: Colin Devilbis, IBM Corporation */ -/* */ -/* Copyright (C) 2003 IBM Corporation */ -/* */ -/* This program is free software; you can redistribute it and/or modify */ -/* it under the terms of the GNU General Public License as published by */ -/* the Free Software Foundation; either version 2 of the License, or */ -/* (at your option) any later version. */ -/* */ -/* This program is distributed in the hope that it will be useful, */ -/* but WITHOUT ANY WARRANTY; without even the implied warranty of */ -/* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the */ -/* GNU General Public License for more details. */ -/* */ -/* You should have received a copy of the GNU General Public License */ -/* along with this program; if not, write to the Free Software */ -/* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ -/* */ -/* */ -/* This file contains structures and definitions for IBM RPA (RS/6000 */ -/* platform architecture) implementation of the SRP (SCSI RDMA Protocol) */ -/* standard. SRP is used on IBM iSeries and pSeries platforms to send SCSI */ -/* commands between logical partitions. */ -/* */ -/* SRP Information Units (IUs) are sent on a "Command/Response Queue" (CRQ) */ -/* between partitions. The definitions in this file are architected, */ -/* and cannot be changed without breaking compatibility with other versions */ -/* of Linux and other operating systems (AIX, OS/400) that talk this protocol*/ -/* between logical partitions */ -/*****************************************************************************/ -#ifndef VIOSRP_H -#define VIOSRP_H -#include <scsi/srp.h> - -#define SRP_VERSION "16.a" -#define SRP_MAX_IU_LEN 256 -#define SRP_MAX_LOC_LEN 32 - -union srp_iu { - struct srp_login_req login_req; - struct srp_login_rsp login_rsp; - struct srp_login_rej login_rej; - struct srp_i_logout i_logout; - struct srp_t_logout t_logout; - struct srp_tsk_mgmt tsk_mgmt; - struct srp_cmd cmd; - struct srp_rsp rsp; - u8 reserved[SRP_MAX_IU_LEN]; -}; - -enum viosrp_crq_headers { - VIOSRP_CRQ_FREE = 0x00, - VIOSRP_CRQ_CMD_RSP = 0x80, - VIOSRP_CRQ_INIT_RSP = 0xC0, - VIOSRP_CRQ_XPORT_EVENT = 0xFF -}; - -enum viosrp_crq_init_formats { - VIOSRP_CRQ_INIT = 0x01, - VIOSRP_CRQ_INIT_COMPLETE = 0x02 -}; - -enum viosrp_crq_formats { - VIOSRP_SRP_FORMAT = 0x01, - VIOSRP_MAD_FORMAT = 0x02, - VIOSRP_OS400_FORMAT = 0x03, - VIOSRP_AIX_FORMAT = 0x04, - VIOSRP_LINUX_FORMAT = 0x05, - VIOSRP_INLINE_FORMAT = 0x06 -}; - -enum viosrp_crq_status { - VIOSRP_OK = 0x0, - VIOSRP_NONRECOVERABLE_ERR = 0x1, - VIOSRP_VIOLATES_MAX_XFER = 0x2, - VIOSRP_PARTNER_PANIC = 0x3, - VIOSRP_DEVICE_BUSY = 0x8, - VIOSRP_ADAPTER_FAIL = 0x10, - VIOSRP_OK2 = 0x99, -}; - -struct viosrp_crq { - u8 valid; /* used by RPA */ - u8 format; /* SCSI vs out-of-band */ - u8 reserved; - u8 status; /* non-scsi failure? (e.g. DMA failure) */ - __be16 timeout; /* in seconds */ - __be16 IU_length; /* in bytes */ - __be64 IU_data_ptr; /* the TCE for transferring data */ -}; - -/* MADs are Management requests above and beyond the IUs defined in the SRP - * standard. - */ -enum viosrp_mad_types { - VIOSRP_EMPTY_IU_TYPE = 0x01, - VIOSRP_ERROR_LOG_TYPE = 0x02, - VIOSRP_ADAPTER_INFO_TYPE = 0x03, - VIOSRP_CAPABILITIES_TYPE = 0x05, - VIOSRP_ENABLE_FAST_FAIL = 0x08, -}; - -enum viosrp_mad_status { - VIOSRP_MAD_SUCCESS = 0x00, - VIOSRP_MAD_NOT_SUPPORTED = 0xF1, - VIOSRP_MAD_FAILED = 0xF7, -}; - -enum viosrp_capability_type { - MIGRATION_CAPABILITIES = 0x01, - RESERVATION_CAPABILITIES = 0x02, -}; - -enum viosrp_capability_support { - SERVER_DOES_NOT_SUPPORTS_CAP = 0x0, - SERVER_SUPPORTS_CAP = 0x01, - SERVER_CAP_DATA = 0x02, -}; - -enum viosrp_reserve_type { - CLIENT_RESERVE_SCSI_2 = 0x01, -}; - -enum viosrp_capability_flag { - CLIENT_MIGRATED = 0x01, - CLIENT_RECONNECT = 0x02, - CAP_LIST_SUPPORTED = 0x04, - CAP_LIST_DATA = 0x08, -}; - -/* - * Common MAD header - */ -struct mad_common { - __be32 type; - __be16 status; - __be16 length; - __be64 tag; -}; - -/* - * All SRP (and MAD) requests normally flow from the - * client to the server. There is no way for the server to send - * an asynchronous message back to the client. The Empty IU is used - * to hang out a meaningless request to the server so that it can respond - * asynchrouously with something like a SCSI AER - */ -struct viosrp_empty_iu { - struct mad_common common; - __be64 buffer; - __be32 port; -}; - -struct viosrp_error_log { - struct mad_common common; - __be64 buffer; -}; - -struct viosrp_adapter_info { - struct mad_common common; - __be64 buffer; -}; - -struct viosrp_fast_fail { - struct mad_common common; -}; - -struct viosrp_capabilities { - struct mad_common common; - __be64 buffer; -}; - -struct mad_capability_common { - __be32 cap_type; - __be16 length; - __be16 server_support; -}; - -struct mad_reserve_cap { - struct mad_capability_common common; - __be32 type; -}; - -struct mad_migration_cap { - struct mad_capability_common common; - __be32 ecl; -}; - -struct capabilities{ - __be32 flags; - char name[SRP_MAX_LOC_LEN]; - char loc[SRP_MAX_LOC_LEN]; - struct mad_migration_cap migration; - struct mad_reserve_cap reserve; -}; - -union mad_iu { - struct viosrp_empty_iu empty_iu; - struct viosrp_error_log error_log; - struct viosrp_adapter_info adapter_info; - struct viosrp_fast_fail fast_fail; - struct viosrp_capabilities capabilities; -}; - -union viosrp_iu { - union srp_iu srp; - union mad_iu mad; -}; - -struct mad_adapter_info_data { - char srp_version[8]; - char partition_name[96]; - __be32 partition_number; -#define SRP_MAD_VERSION_1 1 - __be32 mad_version; -#define SRP_MAD_OS_LINUX 2 -#define SRP_MAD_OS_AIX 3 - __be32 os_type; - __be32 port_max_txu[8]; /* per-port maximum transfer */ -}; - -#endif diff --git a/drivers/scsi/ibmvscsi_tgt/Makefile b/drivers/scsi/ibmvscsi_tgt/Makefile new file mode 100644 index 000000000000..0c060ce64cb0 --- /dev/null +++ b/drivers/scsi/ibmvscsi_tgt/Makefile @@ -0,0 +1,3 @@ +obj-$(CONFIG_SCSI_IBMVSCSIS) += ibmvscsis.o + +ibmvscsis-y := libsrp.o ibmvscsi_tgt.o diff --git a/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c b/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c new file mode 100644 index 000000000000..b29fef9d0f27 --- /dev/null +++ b/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c @@ -0,0 +1,4087 @@ +/******************************************************************************* + * IBM Virtual SCSI Target Driver + * Copyright (C) 2003-2005 Dave Boutcher (boutcher@us.ibm.com) IBM Corp. + * Santiago Leon (santil@us.ibm.com) IBM Corp. + * Linda Xie (lxie@us.ibm.com) IBM Corp. + * + * Copyright (C) 2005-2011 FUJITA Tomonori <tomof@acm.org> + * Copyright (C) 2010 Nicholas A. Bellinger <nab@kernel.org> + * + * Authors: Bryant G. Ly <bryantly@linux.vnet.ibm.com> + * Authors: Michael Cyr <mikecyr@linux.vnet.ibm.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + ****************************************************************************/ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/slab.h> +#include <linux/types.h> +#include <linux/list.h> +#include <linux/string.h> + +#include <target/target_core_base.h> +#include <target/target_core_fabric.h> + +#include <asm/hvcall.h> +#include <asm/vio.h> + +#include <scsi/viosrp.h> + +#include "ibmvscsi_tgt.h" + +#define IBMVSCSIS_VERSION "v0.2" + +#define INITIAL_SRP_LIMIT 800 +#define DEFAULT_MAX_SECTORS 256 + +static uint max_vdma_size = MAX_H_COPY_RDMA; + +static char system_id[SYS_ID_NAME_LEN] = ""; +static char partition_name[PARTITION_NAMELEN] = "UNKNOWN"; +static uint partition_number = -1; + +/* Adapter list and lock to control it */ +static DEFINE_SPINLOCK(ibmvscsis_dev_lock); +static LIST_HEAD(ibmvscsis_dev_list); + +static long ibmvscsis_parse_command(struct scsi_info *vscsi, + struct viosrp_crq *crq); + +static void ibmvscsis_adapter_idle(struct scsi_info *vscsi); + +static void ibmvscsis_determine_resid(struct se_cmd *se_cmd, + struct srp_rsp *rsp) +{ + u32 residual_count = se_cmd->residual_count; + + if (!residual_count) + return; + + if (se_cmd->se_cmd_flags & SCF_UNDERFLOW_BIT) { + if (se_cmd->data_direction == DMA_TO_DEVICE) { + /* residual data from an underflow write */ + rsp->flags = SRP_RSP_FLAG_DOUNDER; + rsp->data_out_res_cnt = cpu_to_be32(residual_count); + } else if (se_cmd->data_direction == DMA_FROM_DEVICE) { + /* residual data from an underflow read */ + rsp->flags = SRP_RSP_FLAG_DIUNDER; + rsp->data_in_res_cnt = cpu_to_be32(residual_count); + } + } else if (se_cmd->se_cmd_flags & SCF_OVERFLOW_BIT) { + if (se_cmd->data_direction == DMA_TO_DEVICE) { + /* residual data from an overflow write */ + rsp->flags = SRP_RSP_FLAG_DOOVER; + rsp->data_out_res_cnt = cpu_to_be32(residual_count); + } else if (se_cmd->data_direction == DMA_FROM_DEVICE) { + /* residual data from an overflow read */ + rsp->flags = SRP_RSP_FLAG_DIOVER; + rsp->data_in_res_cnt = cpu_to_be32(residual_count); + } + } +} + +/** + * connection_broken() - Determine if the connection to the client is good + * @vscsi: Pointer to our adapter structure + * + * This function attempts to send a ping MAD to the client. If the call to + * queue the request returns H_CLOSED then the connection has been broken + * and the function returns TRUE. + * + * EXECUTION ENVIRONMENT: + * Interrupt or Process environment + */ +static bool connection_broken(struct scsi_info *vscsi) +{ + struct viosrp_crq *crq; + u64 buffer[2] = { 0, 0 }; + long h_return_code; + bool rc = false; + + /* create a PING crq */ + crq = (struct viosrp_crq *)&buffer; + crq->valid = VALID_CMD_RESP_EL; + crq->format = MESSAGE_IN_CRQ; + crq->status = PING; + + h_return_code = h_send_crq(vscsi->dds.unit_id, + cpu_to_be64(buffer[MSG_HI]), + cpu_to_be64(buffer[MSG_LOW])); + + pr_debug("connection_broken: rc %ld\n", h_return_code); + + if (h_return_code == H_CLOSED) + rc = true; + + return rc; +} + +/** + * ibmvscsis_unregister_command_q() - Helper Function-Unregister Command Queue + * @vscsi: Pointer to our adapter structure + * + * This function calls h_free_q then frees the interrupt bit etc. + * It must release the lock before doing so because of the time it can take + * for h_free_crq in PHYP + * NOTE: the caller must make sure that state and or flags will prevent + * interrupt handler from scheduling work. + * NOTE: anyone calling this function may need to set the CRQ_CLOSED flag + * we can't do it here, because we don't have the lock + * + * EXECUTION ENVIRONMENT: + * Process level + */ +static long ibmvscsis_unregister_command_q(struct scsi_info *vscsi) +{ + long qrc; + long rc = ADAPT_SUCCESS; + int ticks = 0; + + do { + qrc = h_free_crq(vscsi->dds.unit_id); + switch (qrc) { + case H_SUCCESS: + break; + + case H_HARDWARE: + case H_PARAMETER: + dev_err(&vscsi->dev, "unregister_command_q: error from h_free_crq %ld\n", + qrc); + rc = ERROR; + break; + + case H_BUSY: + case H_LONG_BUSY_ORDER_1_MSEC: + /* msleep not good for small values */ + usleep_range(1000, 2000); + ticks += 1; + break; + case H_LONG_BUSY_ORDER_10_MSEC: + usleep_range(10000, 20000); + ticks += 10; + break; + case H_LONG_BUSY_ORDER_100_MSEC: + msleep(100); + ticks += 100; + break; + case H_LONG_BUSY_ORDER_1_SEC: + ssleep(1); + ticks += 1000; + break; + case H_LONG_BUSY_ORDER_10_SEC: + ssleep(10); + ticks += 10000; + break; + case H_LONG_BUSY_ORDER_100_SEC: + ssleep(100); + ticks += 100000; + break; + default: + dev_err(&vscsi->dev, "unregister_command_q: unknown error %ld from h_free_crq\n", + qrc); + rc = ERROR; + break; + } + + /* + * dont wait more then 300 seconds + * ticks are in milliseconds more or less + */ + if (ticks > 300000 && qrc != H_SUCCESS) { + rc = ERROR; + dev_err(&vscsi->dev, "Excessive wait for h_free_crq\n"); + } + } while (qrc != H_SUCCESS && rc == ADAPT_SUCCESS); + + pr_debug("Freeing CRQ: phyp rc %ld, rc %ld\n", qrc, rc); + + return rc; +} + +/** + * ibmvscsis_delete_client_info() - Helper function to Delete Client Info + * @vscsi: Pointer to our adapter structure + * @client_closed: True if client closed its queue + * + * Deletes information specific to the client when the client goes away + * + * EXECUTION ENVIRONMENT: + * Interrupt or Process + */ +static void ibmvscsis_delete_client_info(struct scsi_info *vscsi, + bool client_closed) +{ + vscsi->client_cap = 0; + + /* + * Some things we don't want to clear if we're closing the queue, + * because some clients don't resend the host handshake when they + * get a transport event. + */ + if (client_closed) + vscsi->client_data.os_type = 0; +} + +/** + * ibmvscsis_free_command_q() - Free Command Queue + * @vscsi: Pointer to our adapter structure + * + * This function calls unregister_command_q, then clears interrupts and + * any pending interrupt acknowledgments associated with the command q. + * It also clears memory if there is no error. + * + * PHYP did not meet the PAPR architecture so that we must give up the + * lock. This causes a timing hole regarding state change. To close the + * hole this routine does accounting on any change that occurred during + * the time the lock is not held. + * NOTE: must give up and then acquire the interrupt lock, the caller must + * make sure that state and or flags will prevent interrupt handler from + * scheduling work. + * + * EXECUTION ENVIRONMENT: + * Process level, interrupt lock is held + */ +static long ibmvscsis_free_command_q(struct scsi_info *vscsi) +{ + int bytes; + u32 flags_under_lock; + u16 state_under_lock; + long rc = ADAPT_SUCCESS; + + if (!(vscsi->flags & CRQ_CLOSED)) { + vio_disable_interrupts(vscsi->dma_dev); + + state_under_lock = vscsi->new_state; + flags_under_lock = vscsi->flags; + vscsi->phyp_acr_state = 0; + vscsi->phyp_acr_flags = 0; + + spin_unlock_bh(&vscsi->intr_lock); + rc = ibmvscsis_unregister_command_q(vscsi); + spin_lock_bh(&vscsi->intr_lock); + + if (state_under_lock != vscsi->new_state) + vscsi->phyp_acr_state = vscsi->new_state; + + vscsi->phyp_acr_flags = ((~flags_under_lock) & vscsi->flags); + + if (rc == ADAPT_SUCCESS) { + bytes = vscsi->cmd_q.size * PAGE_SIZE; + memset(vscsi->cmd_q.base_addr, 0, bytes); + vscsi->cmd_q.index = 0; + vscsi->flags |= CRQ_CLOSED; + + ibmvscsis_delete_client_info(vscsi, false); + } + + pr_debug("free_command_q: flags 0x%x, state 0x%hx, acr_flags 0x%x, acr_state 0x%hx\n", + vscsi->flags, vscsi->state, vscsi->phyp_acr_flags, + vscsi->phyp_acr_state); + } + return rc; +} + +/** + * ibmvscsis_cmd_q_dequeue() - Get valid Command element + * @mask: Mask to use in case index wraps + * @current_index: Current index into command queue + * @base_addr: Pointer to start of command queue + * + * Returns a pointer to a valid command element or NULL, if the command + * queue is empty + * + * EXECUTION ENVIRONMENT: + * Interrupt environment, interrupt lock held + */ +static struct viosrp_crq *ibmvscsis_cmd_q_dequeue(uint mask, + uint *current_index, + struct viosrp_crq *base_addr) +{ + struct viosrp_crq *ptr; + + ptr = base_addr + *current_index; + + if (ptr->valid) { + *current_index = (*current_index + 1) & mask; + dma_rmb(); + } else { + ptr = NULL; + } + + return ptr; +} + +/** + * ibmvscsis_send_init_message() - send initialize message to the client + * @vscsi: Pointer to our adapter structure + * @format: Which Init Message format to send + * + * EXECUTION ENVIRONMENT: + * Interrupt environment interrupt lock held + */ +static long ibmvscsis_send_init_message(struct scsi_info *vscsi, u8 format) +{ + struct viosrp_crq *crq; + u64 buffer[2] = { 0, 0 }; + long rc; + + crq = (struct viosrp_crq *)&buffer; + crq->valid = VALID_INIT_MSG; + crq->format = format; + rc = h_send_crq(vscsi->dds.unit_id, cpu_to_be64(buffer[MSG_HI]), + cpu_to_be64(buffer[MSG_LOW])); + + return rc; +} + +/** + * ibmvscsis_check_init_msg() - Check init message valid + * @vscsi: Pointer to our adapter structure + * @format: Pointer to return format of Init Message, if any. + * Set to UNUSED_FORMAT if no Init Message in queue. + * + * Checks if an initialize message was queued by the initiatior + * after the queue was created and before the interrupt was enabled. + * + * EXECUTION ENVIRONMENT: + * Process level only, interrupt lock held + */ +static long ibmvscsis_check_init_msg(struct scsi_info *vscsi, uint *format) +{ + struct viosrp_crq *crq; + long rc = ADAPT_SUCCESS; + + crq = ibmvscsis_cmd_q_dequeue(vscsi->cmd_q.mask, &vscsi->cmd_q.index, + vscsi->cmd_q.base_addr); + if (!crq) { + *format = (uint)UNUSED_FORMAT; + } else if (crq->valid == VALID_INIT_MSG && crq->format == INIT_MSG) { + *format = (uint)INIT_MSG; + crq->valid = INVALIDATE_CMD_RESP_EL; + dma_rmb(); + + /* + * the caller has ensured no initialize message was + * sent after the queue was + * created so there should be no other message on the queue. + */ + crq = ibmvscsis_cmd_q_dequeue(vscsi->cmd_q.mask, + &vscsi->cmd_q.index, + vscsi->cmd_q.base_addr); + if (crq) { + *format = (uint)(crq->format); + rc = ERROR; + crq->valid = INVALIDATE_CMD_RESP_EL; + dma_rmb(); + } + } else { + *format = (uint)(crq->format); + rc = ERROR; + crq->valid = INVALIDATE_CMD_RESP_EL; + dma_rmb(); + } + + return rc; +} + +/** + * ibmvscsis_establish_new_q() - Establish new CRQ queue + * @vscsi: Pointer to our adapter structure + * @new_state: New state being established after resetting the queue + * + * Must be called with interrupt lock held. + */ +static long ibmvscsis_establish_new_q(struct scsi_info *vscsi, uint new_state) +{ + long rc = ADAPT_SUCCESS; + uint format; + + vscsi->flags &= PRESERVE_FLAG_FIELDS; + vscsi->rsp_q_timer.timer_pops = 0; + vscsi->debit = 0; + vscsi->credit = 0; + + rc = vio_enable_interrupts(vscsi->dma_dev); + if (rc) { + pr_warn("reset_queue: failed to enable interrupts, rc %ld\n", + rc); + return rc; + } + + rc = ibmvscsis_check_init_msg(vscsi, &format); + if (rc) { + dev_err(&vscsi->dev, "reset_queue: check_init_msg failed, rc %ld\n", + rc); + return rc; + } + + if (format == UNUSED_FORMAT && new_state == WAIT_CONNECTION) { + rc = ibmvscsis_send_init_message(vscsi, INIT_MSG); + switch (rc) { + case H_SUCCESS: + case H_DROPPED: + case H_CLOSED: + rc = ADAPT_SUCCESS; + break; + + case H_PARAMETER: + case H_HARDWARE: + break; + + default: + vscsi->state = UNDEFINED; + rc = H_HARDWARE; + break; + } + } + + return rc; +} + +/** + * ibmvscsis_reset_queue() - Reset CRQ Queue + * @vscsi: Pointer to our adapter structure + * @new_state: New state to establish after resetting the queue + * + * This function calls h_free_q and then calls h_reg_q and does all + * of the bookkeeping to get us back to where we can communicate. + * + * Actually, we don't always call h_free_crq. A problem was discovered + * where one partition would close and reopen his queue, which would + * cause his partner to get a transport event, which would cause him to + * close and reopen his queue, which would cause the original partition + * to get a transport event, etc., etc. To prevent this, we don't + * actually close our queue if the client initiated the reset, (i.e. + * either we got a transport event or we have detected that the client's + * queue is gone) + * + * EXECUTION ENVIRONMENT: + * Process environment, called with interrupt lock held + */ +static void ibmvscsis_reset_queue(struct scsi_info *vscsi, uint new_state) +{ + int bytes; + long rc = ADAPT_SUCCESS; + + pr_debug("reset_queue: flags 0x%x\n", vscsi->flags); + + /* don't reset, the client did it for us */ + if (vscsi->flags & (CLIENT_FAILED | TRANS_EVENT)) { + vscsi->flags &= PRESERVE_FLAG_FIELDS; + vscsi->rsp_q_timer.timer_pops = 0; + vscsi->debit = 0; + vscsi->credit = 0; + vscsi->state = new_state; + vio_enable_interrupts(vscsi->dma_dev); + } else { + rc = ibmvscsis_free_command_q(vscsi); + if (rc == ADAPT_SUCCESS) { + vscsi->state = new_state; + + bytes = vscsi->cmd_q.size * PAGE_SIZE; + rc = h_reg_crq(vscsi->dds.unit_id, + vscsi->cmd_q.crq_token, bytes); + if (rc == H_CLOSED || rc == H_SUCCESS) { + rc = ibmvscsis_establish_new_q(vscsi, + new_state); + } + + if (rc != ADAPT_SUCCESS) { + pr_debug("reset_queue: reg_crq rc %ld\n", rc); + + vscsi->state = ERR_DISCONNECTED; + vscsi->flags |= RESPONSE_Q_DOWN; + ibmvscsis_free_command_q(vscsi); + } + } else { + vscsi->state = ERR_DISCONNECTED; + vscsi->flags |= RESPONSE_Q_DOWN; + } + } +} + +/** + * ibmvscsis_free_cmd_resources() - Free command resources + * @vscsi: Pointer to our adapter structure + * @cmd: Command which is not longer in use + * + * Must be called with interrupt lock held. + */ +static void ibmvscsis_free_cmd_resources(struct scsi_info *vscsi, + struct ibmvscsis_cmd *cmd) +{ + struct iu_entry *iue = cmd->iue; + + switch (cmd->type) { + case TASK_MANAGEMENT: + case SCSI_CDB: + /* + * When the queue goes down this value is cleared, so it + * cannot be cleared in this general purpose function. + */ + if (vscsi->debit) + vscsi->debit -= 1; + break; + case ADAPTER_MAD: + vscsi->flags &= ~PROCESSING_MAD; + break; + case UNSET_TYPE: + break; + default: + dev_err(&vscsi->dev, "free_cmd_resources unknown type %d\n", + cmd->type); + break; + } + + cmd->iue = NULL; + list_add_tail(&cmd->list, &vscsi->free_cmd); + srp_iu_put(iue); + + if (list_empty(&vscsi->active_q) && list_empty(&vscsi->schedule_q) && + list_empty(&vscsi->waiting_rsp) && (vscsi->flags & WAIT_FOR_IDLE)) { + vscsi->flags &= ~WAIT_FOR_IDLE; + complete(&vscsi->wait_idle); + } +} + +/** + * ibmvscsis_disconnect() - Helper function to disconnect + * @work: Pointer to work_struct, gives access to our adapter structure + * + * An error has occurred or the driver received a Transport event, + * and the driver is requesting that the command queue be de-registered + * in a safe manner. If there is no outstanding I/O then we can stop the + * queue. If we are restarting the queue it will be reflected in the + * the state of the adapter. + * + * EXECUTION ENVIRONMENT: + * Process environment + */ +static void ibmvscsis_disconnect(struct work_struct *work) +{ + struct scsi_info *vscsi = container_of(work, struct scsi_info, + proc_work); + u16 new_state; + bool wait_idle = false; + long rc = ADAPT_SUCCESS; + + spin_lock_bh(&vscsi->intr_lock); + new_state = vscsi->new_state; + vscsi->new_state = 0; + + pr_debug("disconnect: flags 0x%x, state 0x%hx\n", vscsi->flags, + vscsi->state); + + /* + * check which state we are in and see if we + * should transitition to the new state + */ + switch (vscsi->state) { + /* Should never be called while in this state. */ + case NO_QUEUE: + /* + * Can never transition from this state; + * igonore errors and logout. + */ + case UNCONFIGURING: + break; + + /* can transition from this state to UNCONFIGURING */ + case ERR_DISCONNECT: + if (new_state == UNCONFIGURING) + vscsi->state = new_state; + break; + + /* + * Can transition from this state to to unconfiguring + * or err disconnect. + */ + case ERR_DISCONNECT_RECONNECT: + switch (new_state) { + case UNCONFIGURING: + case ERR_DISCONNECT: + vscsi->state = new_state; + break; + + case WAIT_IDLE: + break; + default: + break; + } + break; + + /* can transition from this state to UNCONFIGURING */ + case ERR_DISCONNECTED: + if (new_state == UNCONFIGURING) + vscsi->state = new_state; + break; + + /* + * If this is a transition into an error state. + * a client is attempting to establish a connection + * and has violated the RPA protocol. + * There can be nothing pending on the adapter although + * there can be requests in the command queue. + */ + case WAIT_ENABLED: + case PART_UP_WAIT_ENAB: + switch (new_state) { + case ERR_DISCONNECT: + vscsi->flags |= RESPONSE_Q_DOWN; + vscsi->state = new_state; + vscsi->flags &= ~(SCHEDULE_DISCONNECT | + DISCONNECT_SCHEDULED); + ibmvscsis_free_command_q(vscsi); + break; + case ERR_DISCONNECT_RECONNECT: + ibmvscsis_reset_queue(vscsi, WAIT_ENABLED); + break; + + /* should never happen */ + case WAIT_IDLE: + rc = ERROR; + dev_err(&vscsi->dev, "disconnect: invalid state %d for WAIT_IDLE\n", + vscsi->state); + break; + } + break; + + case WAIT_IDLE: + switch (new_state) { + case ERR_DISCONNECT: + case ERR_DISCONNECT_RECONNECT: + vscsi->state = new_state; + break; + } + break; + + /* + * Initiator has not done a successful srp login + * or has done a successful srp logout ( adapter was not + * busy). In the first case there can be responses queued + * waiting for space on the initiators response queue (MAD) + * The second case the adapter is idle. Assume the worse case, + * i.e. the second case. + */ + case WAIT_CONNECTION: + case CONNECTED: + case SRP_PROCESSING: + wait_idle = true; + vscsi->state = new_state; + break; + + /* can transition from this state to UNCONFIGURING */ + case UNDEFINED: + if (new_state == UNCONFIGURING) + vscsi->state = new_state; + break; + default: + break; + } + + if (wait_idle) { + pr_debug("disconnect start wait, active %d, sched %d\n", + (int)list_empty(&vscsi->active_q), + (int)list_empty(&vscsi->schedule_q)); + if (!list_empty(&vscsi->active_q) || + !list_empty(&vscsi->schedule_q)) { + vscsi->flags |= WAIT_FOR_IDLE; + pr_debug("disconnect flags 0x%x\n", vscsi->flags); + /* + * This routine is can not be called with the interrupt + * lock held. + */ + spin_unlock_bh(&vscsi->intr_lock); + wait_for_completion(&vscsi->wait_idle); + spin_lock_bh(&vscsi->intr_lock); + } + pr_debug("disconnect stop wait\n"); + + ibmvscsis_adapter_idle(vscsi); + } + + spin_unlock_bh(&vscsi->intr_lock); +} + +/** + * ibmvscsis_post_disconnect() - Schedule the disconnect + * @vscsi: Pointer to our adapter structure + * @new_state: State to move to after disconnecting + * @flag_bits: Flags to turn on in adapter structure + * + * If it's already been scheduled, then see if we need to "upgrade" + * the new state (if the one passed in is more "severe" than the + * previous one). + * + * PRECONDITION: + * interrupt lock is held + */ +static void ibmvscsis_post_disconnect(struct scsi_info *vscsi, uint new_state, + uint flag_bits) +{ + uint state; + + /* check the validity of the new state */ + switch (new_state) { + case UNCONFIGURING: + case ERR_DISCONNECT: + case ERR_DISCONNECT_RECONNECT: + case WAIT_IDLE: + break; + + default: + dev_err(&vscsi->dev, "post_disconnect: Invalid new state %d\n", + new_state); + return; + } + + vscsi->flags |= flag_bits; + + pr_debug("post_disconnect: new_state 0x%x, flag_bits 0x%x, vscsi->flags 0x%x, state %hx\n", + new_state, flag_bits, vscsi->flags, vscsi->state); + + if (!(vscsi->flags & (DISCONNECT_SCHEDULED | SCHEDULE_DISCONNECT))) { + vscsi->flags |= SCHEDULE_DISCONNECT; + vscsi->new_state = new_state; + + INIT_WORK(&vscsi->proc_work, ibmvscsis_disconnect); + (void)queue_work(vscsi->work_q, &vscsi->proc_work); + } else { + if (vscsi->new_state) + state = vscsi->new_state; + else + state = vscsi->state; + + switch (state) { + case NO_QUEUE: + case UNCONFIGURING: + break; + + case ERR_DISCONNECTED: + case ERR_DISCONNECT: + case UNDEFINED: + if (new_state == UNCONFIGURING) + vscsi->new_state = new_state; + break; + + case ERR_DISCONNECT_RECONNECT: + switch (new_state) { + case UNCONFIGURING: + case ERR_DISCONNECT: + vscsi->new_state = new_state; + break; + default: + break; + } + break; + + case WAIT_ENABLED: + case PART_UP_WAIT_ENAB: + case WAIT_IDLE: + case WAIT_CONNECTION: + case CONNECTED: + case SRP_PROCESSING: + vscsi->new_state = new_state; + break; + + default: + break; + } + } + + pr_debug("Leaving post_disconnect: flags 0x%x, new_state 0x%x\n", + vscsi->flags, vscsi->new_state); +} + +/** + * ibmvscsis_trans_event() - Handle a Transport Event + * @vscsi: Pointer to our adapter structure + * @crq: Pointer to CRQ entry containing the Transport Event + * + * Do the logic to close the I_T nexus. This function may not + * behave to specification. + * + * EXECUTION ENVIRONMENT: + * Interrupt, interrupt lock held + */ +static long ibmvscsis_trans_event(struct scsi_info *vscsi, + struct viosrp_crq *crq) +{ + long rc = ADAPT_SUCCESS; + + pr_debug("trans_event: format %d, flags 0x%x, state 0x%hx\n", + (int)crq->format, vscsi->flags, vscsi->state); + + switch (crq->format) { + case MIGRATED: + case PARTNER_FAILED: + case PARTNER_DEREGISTER: + ibmvscsis_delete_client_info(vscsi, true); + break; + + default: + rc = ERROR; + dev_err(&vscsi->dev, "trans_event: invalid format %d\n", + (uint)crq->format); + ibmvscsis_post_disconnect(vscsi, ERR_DISCONNECT, + RESPONSE_Q_DOWN); + break; + } + + if (rc == ADAPT_SUCCESS) { + switch (vscsi->state) { + case NO_QUEUE: + case ERR_DISCONNECTED: + case UNDEFINED: + break; + + case UNCONFIGURING: + vscsi->flags |= (RESPONSE_Q_DOWN | TRANS_EVENT); + break; + + case WAIT_ENABLED: + break; + + case WAIT_CONNECTION: + break; + + case CONNECTED: + ibmvscsis_post_disconnect(vscsi, WAIT_IDLE, + (RESPONSE_Q_DOWN | + TRANS_EVENT)); + break; + + case PART_UP_WAIT_ENAB: + vscsi->state = WAIT_ENABLED; + break; + + case SRP_PROCESSING: + if ((vscsi->debit > 0) || + !list_empty(&vscsi->schedule_q) || + !list_empty(&vscsi->waiting_rsp) || + !list_empty(&vscsi->active_q)) { + pr_debug("debit %d, sched %d, wait %d, active %d\n", + vscsi->debit, + (int)list_empty(&vscsi->schedule_q), + (int)list_empty(&vscsi->waiting_rsp), + (int)list_empty(&vscsi->active_q)); + pr_warn("connection lost with outstanding work\n"); + } else { + pr_debug("trans_event: SRP Processing, but no outstanding work\n"); + } + + ibmvscsis_post_disconnect(vscsi, WAIT_IDLE, + (RESPONSE_Q_DOWN | + TRANS_EVENT)); + break; + + case ERR_DISCONNECT: + case ERR_DISCONNECT_RECONNECT: + case WAIT_IDLE: + vscsi->flags |= (RESPONSE_Q_DOWN | TRANS_EVENT); + break; + } + } + + rc = vscsi->flags & SCHEDULE_DISCONNECT; + + pr_debug("Leaving trans_event: flags 0x%x, state 0x%hx, rc %ld\n", + vscsi->flags, vscsi->state, rc); + + return rc; +} + +/** + * ibmvscsis_poll_cmd_q() - Poll Command Queue + * @vscsi: Pointer to our adapter structure + * + * Called to handle command elements that may have arrived while + * interrupts were disabled. + * + * EXECUTION ENVIRONMENT: + * intr_lock must be held + */ +static void ibmvscsis_poll_cmd_q(struct scsi_info *vscsi) +{ + struct viosrp_crq *crq; + long rc; + bool ack = true; + volatile u8 valid; + + pr_debug("poll_cmd_q: flags 0x%x, state 0x%hx, q index %ud\n", + vscsi->flags, vscsi->state, vscsi->cmd_q.index); + + rc = vscsi->flags & SCHEDULE_DISCONNECT; + crq = vscsi->cmd_q.base_addr + vscsi->cmd_q.index; + valid = crq->valid; + dma_rmb(); + + while (valid) { +poll_work: + vscsi->cmd_q.index = + (vscsi->cmd_q.index + 1) & vscsi->cmd_q.mask; + + if (!rc) { + rc = ibmvscsis_parse_command(vscsi, crq); + } else { + if ((uint)crq->valid == VALID_TRANS_EVENT) { + /* + * must service the transport layer events even + * in an error state, dont break out until all + * the consecutive transport events have been + * processed + */ + rc = ibmvscsis_trans_event(vscsi, crq); + } else if (vscsi->flags & TRANS_EVENT) { + /* + * if a tranport event has occurred leave + * everything but transport events on the queue + */ + pr_debug("poll_cmd_q, ignoring\n"); + + /* + * need to decrement the queue index so we can + * look at the elment again + */ + if (vscsi->cmd_q.index) + vscsi->cmd_q.index -= 1; + else + /* + * index is at 0 it just wrapped. + * have it index last element in q + */ + vscsi->cmd_q.index = vscsi->cmd_q.mask; + break; + } + } + + crq->valid = INVALIDATE_CMD_RESP_EL; + + crq = vscsi->cmd_q.base_addr + vscsi->cmd_q.index; + valid = crq->valid; + dma_rmb(); + } + + if (!rc) { + if (ack) { + vio_enable_interrupts(vscsi->dma_dev); + ack = false; + pr_debug("poll_cmd_q, reenabling interrupts\n"); + } + valid = crq->valid; + dma_rmb(); + if (valid) + goto poll_work; + } + + pr_debug("Leaving poll_cmd_q: rc %ld\n", rc); +} + +/** + * ibmvscsis_free_cmd_qs() - Free elements in queue + * @vscsi: Pointer to our adapter structure + * + * Free all of the elements on all queues that are waiting for + * whatever reason. + * + * PRECONDITION: + * Called with interrupt lock held + */ +static void ibmvscsis_free_cmd_qs(struct scsi_info *vscsi) +{ + struct ibmvscsis_cmd *cmd, *nxt; + + pr_debug("free_cmd_qs: waiting_rsp empty %d, timer starter %d\n", + (int)list_empty(&vscsi->waiting_rsp), + vscsi->rsp_q_timer.started); + + list_for_each_entry_safe(cmd, nxt, &vscsi->waiting_rsp, list) { + list_del(&cmd->list); + ibmvscsis_free_cmd_resources(vscsi, cmd); + } +} + +/** + * ibmvscsis_get_free_cmd() - Get free command from list + * @vscsi: Pointer to our adapter structure + * + * Must be called with interrupt lock held. + */ +static struct ibmvscsis_cmd *ibmvscsis_get_free_cmd(struct scsi_info *vscsi) +{ + struct ibmvscsis_cmd *cmd = NULL; + struct iu_entry *iue; + + iue = srp_iu_get(&vscsi->target); + if (iue) { + cmd = list_first_entry_or_null(&vscsi->free_cmd, + struct ibmvscsis_cmd, list); + if (cmd) { + list_del(&cmd->list); + cmd->iue = iue; + cmd->type = UNSET_TYPE; + memset(&cmd->se_cmd, 0, sizeof(cmd->se_cmd)); + } else { + srp_iu_put(iue); + } + } + + return cmd; +} + +/** + * ibmvscsis_adapter_idle() - Helper function to handle idle adapter + * @vscsi: Pointer to our adapter structure + * + * This function is called when the adapter is idle when the driver + * is attempting to clear an error condition. + * The adapter is considered busy if any of its cmd queues + * are non-empty. This function can be invoked + * from the off level disconnect function. + * + * EXECUTION ENVIRONMENT: + * Process environment called with interrupt lock held + */ +static void ibmvscsis_adapter_idle(struct scsi_info *vscsi) +{ + int free_qs = false; + + pr_debug("adapter_idle: flags 0x%x, state 0x%hx\n", vscsi->flags, + vscsi->state); + + /* Only need to free qs if we're disconnecting from client */ + if (vscsi->state != WAIT_CONNECTION || vscsi->flags & TRANS_EVENT) + free_qs = true; + + switch (vscsi->state) { + case ERR_DISCONNECT_RECONNECT: + ibmvscsis_reset_queue(vscsi, WAIT_CONNECTION); + pr_debug("adapter_idle, disc_rec: flags 0x%x\n", vscsi->flags); + break; + + case ERR_DISCONNECT: + ibmvscsis_free_command_q(vscsi); + vscsi->flags &= ~DISCONNECT_SCHEDULED; + vscsi->flags |= RESPONSE_Q_DOWN; + vscsi->state = ERR_DISCONNECTED; + pr_debug("adapter_idle, disc: flags 0x%x, state 0x%hx\n", + vscsi->flags, vscsi->state); + break; + + case WAIT_IDLE: + vscsi->rsp_q_timer.timer_pops = 0; + vscsi->debit = 0; + vscsi->credit = 0; + if (vscsi->flags & TRANS_EVENT) { + vscsi->state = WAIT_CONNECTION; + vscsi->flags &= PRESERVE_FLAG_FIELDS; + } else { + vscsi->state = CONNECTED; + vscsi->flags &= ~DISCONNECT_SCHEDULED; + } + + pr_debug("adapter_idle, wait: flags 0x%x, state 0x%hx\n", + vscsi->flags, vscsi->state); + ibmvscsis_poll_cmd_q(vscsi); + break; + + case ERR_DISCONNECTED: + vscsi->flags &= ~DISCONNECT_SCHEDULED; + pr_debug("adapter_idle, disconnected: flags 0x%x, state 0x%hx\n", + vscsi->flags, vscsi->state); + break; + + default: + dev_err(&vscsi->dev, "adapter_idle: in invalid state %d\n", + vscsi->state); + break; + } + + if (free_qs) + ibmvscsis_free_cmd_qs(vscsi); + + /* + * There is a timing window where we could lose a disconnect request. + * The known path to this window occurs during the DISCONNECT_RECONNECT + * case above: reset_queue calls free_command_q, which will release the + * interrupt lock. During that time, a new post_disconnect call can be + * made with a "more severe" state (DISCONNECT or UNCONFIGURING). + * Because the DISCONNECT_SCHEDULED flag is already set, post_disconnect + * will only set the new_state. Now free_command_q reacquires the intr + * lock and clears the DISCONNECT_SCHEDULED flag (using PRESERVE_FLAG_ + * FIELDS), and the disconnect is lost. This is particularly bad when + * the new disconnect was for UNCONFIGURING, since the unconfigure hangs + * forever. + * Fix is that free command queue sets acr state and acr flags if there + * is a change under the lock + * note free command queue writes to this state it clears it + * before releasing the lock, different drivers call the free command + * queue different times so dont initialize above + */ + if (vscsi->phyp_acr_state != 0) { + /* + * set any bits in flags that may have been cleared by + * a call to free command queue in switch statement + * or reset queue + */ + vscsi->flags |= vscsi->phyp_acr_flags; + ibmvscsis_post_disconnect(vscsi, vscsi->phyp_acr_state, 0); + vscsi->phyp_acr_state = 0; + vscsi->phyp_acr_flags = 0; + + pr_debug("adapter_idle: flags 0x%x, state 0x%hx, acr_flags 0x%x, acr_state 0x%hx\n", + vscsi->flags, vscsi->state, vscsi->phyp_acr_flags, + vscsi->phyp_acr_state); + } + + pr_debug("Leaving adapter_idle: flags 0x%x, state 0x%hx, new_state 0x%x\n", + vscsi->flags, vscsi->state, vscsi->new_state); +} + +/** + * ibmvscsis_copy_crq_packet() - Copy CRQ Packet + * @vscsi: Pointer to our adapter structure + * @cmd: Pointer to command element to use to process the request + * @crq: Pointer to CRQ entry containing the request + * + * Copy the srp information unit from the hosted + * partition using remote dma + * + * EXECUTION ENVIRONMENT: + * Interrupt, interrupt lock held + */ +static long ibmvscsis_copy_crq_packet(struct scsi_info *vscsi, + struct ibmvscsis_cmd *cmd, + struct viosrp_crq *crq) +{ + struct iu_entry *iue = cmd->iue; + long rc = 0; + u16 len; + + len = be16_to_cpu(crq->IU_length); + if ((len > SRP_MAX_IU_LEN) || (len == 0)) { + dev_err(&vscsi->dev, "copy_crq: Invalid len %d passed", len); + ibmvscsis_post_disconnect(vscsi, ERR_DISCONNECT_RECONNECT, 0); + return SRP_VIOLATION; + } + + rc = h_copy_rdma(len, vscsi->dds.window[REMOTE].liobn, + be64_to_cpu(crq->IU_data_ptr), + vscsi->dds.window[LOCAL].liobn, iue->sbuf->dma); + + switch (rc) { + case H_SUCCESS: + cmd->init_time = mftb(); + iue->remote_token = crq->IU_data_ptr; + iue->iu_len = len; + pr_debug("copy_crq: ioba 0x%llx, init_time 0x%llx\n", + be64_to_cpu(crq->IU_data_ptr), cmd->init_time); + break; + case H_PERMISSION: + if (connection_broken(vscsi)) + ibmvscsis_post_disconnect(vscsi, + ERR_DISCONNECT_RECONNECT, + (RESPONSE_Q_DOWN | + CLIENT_FAILED)); + else + ibmvscsis_post_disconnect(vscsi, + ERR_DISCONNECT_RECONNECT, 0); + + dev_err(&vscsi->dev, "copy_crq: h_copy_rdma failed, rc %ld\n", + rc); + break; + case H_DEST_PARM: + case H_SOURCE_PARM: + default: + dev_err(&vscsi->dev, "copy_crq: h_copy_rdma failed, rc %ld\n", + rc); + ibmvscsis_post_disconnect(vscsi, ERR_DISCONNECT_RECONNECT, 0); + break; + } + + return rc; +} + +/** + * ibmvscsis_adapter_info - Service an Adapter Info MAnagement Data gram + * @vscsi: Pointer to our adapter structure + * @iue: Information Unit containing the Adapter Info MAD request + * + * EXECUTION ENVIRONMENT: + * Interrupt adpater lock is held + */ +static long ibmvscsis_adapter_info(struct scsi_info *vscsi, + struct iu_entry *iue) +{ + struct viosrp_adapter_info *mad = &vio_iu(iue)->mad.adapter_info; + struct mad_adapter_info_data *info; + uint flag_bits = 0; + dma_addr_t token; + long rc; + + mad->common.status = cpu_to_be16(VIOSRP_MAD_SUCCESS); + + if (be16_to_cpu(mad->common.length) > sizeof(*info)) { + mad->common.status = cpu_to_be16(VIOSRP_MAD_FAILED); + return 0; + } + + info = dma_alloc_coherent(&vscsi->dma_dev->dev, sizeof(*info), &token, + GFP_KERNEL); + if (!info) { + dev_err(&vscsi->dev, "bad dma_alloc_coherent %p\n", + iue->target); + mad->common.status = cpu_to_be16(VIOSRP_MAD_FAILED); + return 0; + } + + /* Get remote info */ + rc = h_copy_rdma(be16_to_cpu(mad->common.length), + vscsi->dds.window[REMOTE].liobn, + be64_to_cpu(mad->buffer), + vscsi->dds.window[LOCAL].liobn, token); + + if (rc != H_SUCCESS) { + if (rc == H_PERMISSION) { + if (connection_broken(vscsi)) + flag_bits = (RESPONSE_Q_DOWN | CLIENT_FAILED); + } + pr_warn("adapter_info: h_copy_rdma from client failed, rc %ld\n", + rc); + pr_debug("adapter_info: ioba 0x%llx, flags 0x%x, flag_bits 0x%x\n", + be64_to_cpu(mad->buffer), vscsi->flags, flag_bits); + ibmvscsis_post_disconnect(vscsi, ERR_DISCONNECT_RECONNECT, + flag_bits); + goto free_dma; + } + + /* + * Copy client info, but ignore partition number, which we + * already got from phyp - unless we failed to get it from + * phyp (e.g. if we're running on a p5 system). + */ + if (vscsi->client_data.partition_number == 0) + vscsi->client_data.partition_number = + be32_to_cpu(info->partition_number); + strncpy(vscsi->client_data.srp_version, info->srp_version, + sizeof(vscsi->client_data.srp_version)); + strncpy(vscsi->client_data.partition_name, info->partition_name, + sizeof(vscsi->client_data.partition_name)); + vscsi->client_data.mad_version = be32_to_cpu(info->mad_version); + vscsi->client_data.os_type = be32_to_cpu(info->os_type); + + /* Copy our info */ + strncpy(info->srp_version, SRP_VERSION, + sizeof(info->srp_version)); + strncpy(info->partition_name, vscsi->dds.partition_name, + sizeof(info->partition_name)); + info->partition_number = cpu_to_be32(vscsi->dds.partition_num); + info->mad_version = cpu_to_be32(MAD_VERSION_1); + info->os_type = cpu_to_be32(LINUX); + memset(&info->port_max_txu[0], 0, sizeof(info->port_max_txu)); + info->port_max_txu[0] = cpu_to_be32(128 * PAGE_SIZE); + + dma_wmb(); + rc = h_copy_rdma(sizeof(*info), vscsi->dds.window[LOCAL].liobn, + token, vscsi->dds.window[REMOTE].liobn, + be64_to_cpu(mad->buffer)); + switch (rc) { + case H_SUCCESS: + break; + + case H_SOURCE_PARM: + case H_DEST_PARM: + case H_PERMISSION: + if (connection_broken(vscsi)) + flag_bits = (RESPONSE_Q_DOWN | CLIENT_FAILED); + default: + dev_err(&vscsi->dev, "adapter_info: h_copy_rdma to client failed, rc %ld\n", + rc); + ibmvscsis_post_disconnect(vscsi, + ERR_DISCONNECT_RECONNECT, + flag_bits); + break; + } + +free_dma: + dma_free_coherent(&vscsi->dma_dev->dev, sizeof(*info), info, token); + pr_debug("Leaving adapter_info, rc %ld\n", rc); + + return rc; +} + +/** + * ibmvscsis_cap_mad() - Service a Capabilities MAnagement Data gram + * @vscsi: Pointer to our adapter structure + * @iue: Information Unit containing the Capabilities MAD request + * + * NOTE: if you return an error from this routine you must be + * disconnecting or you will cause a hang + * + * EXECUTION ENVIRONMENT: + * Interrupt called with adapter lock held + */ +static int ibmvscsis_cap_mad(struct scsi_info *vscsi, struct iu_entry *iue) +{ + struct viosrp_capabilities *mad = &vio_iu(iue)->mad.capabilities; + struct capabilities *cap; + struct mad_capability_common *common; + dma_addr_t token; + u16 olen, len, status, min_len, cap_len; + u32 flag; + uint flag_bits = 0; + long rc = 0; + + olen = be16_to_cpu(mad->common.length); + /* + * struct capabilities hardcodes a couple capabilities after the + * header, but the capabilities can actually be in any order. + */ + min_len = offsetof(struct capabilities, migration); + if ((olen < min_len) || (olen > PAGE_SIZE)) { + pr_warn("cap_mad: invalid len %d\n", olen); + mad->common.status = cpu_to_be16(VIOSRP_MAD_FAILED); + return 0; + } + + cap = dma_alloc_coherent(&vscsi->dma_dev->dev, olen, &token, + GFP_KERNEL); + if (!cap) { + dev_err(&vscsi->dev, "bad dma_alloc_coherent %p\n", + iue->target); + mad->common.status = cpu_to_be16(VIOSRP_MAD_FAILED); + return 0; + } + rc = h_copy_rdma(olen, vscsi->dds.window[REMOTE].liobn, + be64_to_cpu(mad->buffer), + vscsi->dds.window[LOCAL].liobn, token); + if (rc == H_SUCCESS) { + strncpy(cap->name, dev_name(&vscsi->dma_dev->dev), + SRP_MAX_LOC_LEN); + + len = olen - min_len; + status = VIOSRP_MAD_SUCCESS; + common = (struct mad_capability_common *)&cap->migration; + + while ((len > 0) && (status == VIOSRP_MAD_SUCCESS) && !rc) { + pr_debug("cap_mad: len left %hd, cap type %d, cap len %hd\n", + len, be32_to_cpu(common->cap_type), + be16_to_cpu(common->length)); + + cap_len = be16_to_cpu(common->length); + if (cap_len > len) { + dev_err(&vscsi->dev, "cap_mad: cap len mismatch with total len\n"); + status = VIOSRP_MAD_FAILED; + break; + } + + if (cap_len == 0) { + dev_err(&vscsi->dev, "cap_mad: cap len is 0\n"); + status = VIOSRP_MAD_FAILED; + break; + } + + switch (common->cap_type) { + default: + pr_debug("cap_mad: unsupported capability\n"); + common->server_support = 0; + flag = cpu_to_be32((u32)CAP_LIST_SUPPORTED); + cap->flags &= ~flag; + break; + } + + len = len - cap_len; + common = (struct mad_capability_common *) + ((char *)common + cap_len); + } + + mad->common.status = cpu_to_be16(status); + + dma_wmb(); + rc = h_copy_rdma(olen, vscsi->dds.window[LOCAL].liobn, token, + vscsi->dds.window[REMOTE].liobn, + be64_to_cpu(mad->buffer)); + + if (rc != H_SUCCESS) { + pr_debug("cap_mad: failed to copy to client, rc %ld\n", + rc); + + if (rc == H_PERMISSION) { + if (connection_broken(vscsi)) + flag_bits = (RESPONSE_Q_DOWN | + CLIENT_FAILED); + } + + pr_warn("cap_mad: error copying data to client, rc %ld\n", + rc); + ibmvscsis_post_disconnect(vscsi, + ERR_DISCONNECT_RECONNECT, + flag_bits); + } + } + + dma_free_coherent(&vscsi->dma_dev->dev, olen, cap, token); + + pr_debug("Leaving cap_mad, rc %ld, client_cap 0x%x\n", + rc, vscsi->client_cap); + + return rc; +} + +/** + * ibmvscsis_process_mad() - Service a MAnagement Data gram + * @vscsi: Pointer to our adapter structure + * @iue: Information Unit containing the MAD request + * + * Must be called with interrupt lock held. + */ +static long ibmvscsis_process_mad(struct scsi_info *vscsi, struct iu_entry *iue) +{ + struct mad_common *mad = (struct mad_common *)&vio_iu(iue)->mad; + struct viosrp_empty_iu *empty; + long rc = ADAPT_SUCCESS; + + switch (be32_to_cpu(mad->type)) { + case VIOSRP_EMPTY_IU_TYPE: + empty = &vio_iu(iue)->mad.empty_iu; + vscsi->empty_iu_id = be64_to_cpu(empty->buffer); + vscsi->empty_iu_tag = be64_to_cpu(empty->common.tag); + mad->status = cpu_to_be16(VIOSRP_MAD_SUCCESS); + break; + case VIOSRP_ADAPTER_INFO_TYPE: + rc = ibmvscsis_adapter_info(vscsi, iue); + break; + case VIOSRP_CAPABILITIES_TYPE: + rc = ibmvscsis_cap_mad(vscsi, iue); + break; + case VIOSRP_ENABLE_FAST_FAIL: + if (vscsi->state == CONNECTED) { + vscsi->fast_fail = true; + mad->status = cpu_to_be16(VIOSRP_MAD_SUCCESS); + } else { + pr_warn("fast fail mad sent after login\n"); + mad->status = cpu_to_be16(VIOSRP_MAD_FAILED); + } + break; + default: + mad->status = cpu_to_be16(VIOSRP_MAD_NOT_SUPPORTED); + break; + } + + return rc; +} + +/** + * srp_snd_msg_failed() - Handle an error when sending a response + * @vscsi: Pointer to our adapter structure + * @rc: The return code from the h_send_crq command + * + * Must be called with interrupt lock held. + */ +static void srp_snd_msg_failed(struct scsi_info *vscsi, long rc) +{ + ktime_t kt; + + if (rc != H_DROPPED) { + ibmvscsis_free_cmd_qs(vscsi); + + if (rc == H_CLOSED) + vscsi->flags |= CLIENT_FAILED; + + /* don't flag the same problem multiple times */ + if (!(vscsi->flags & RESPONSE_Q_DOWN)) { + vscsi->flags |= RESPONSE_Q_DOWN; + if (!(vscsi->state & (ERR_DISCONNECT | + ERR_DISCONNECT_RECONNECT | + ERR_DISCONNECTED | UNDEFINED))) { + dev_err(&vscsi->dev, "snd_msg_failed: setting RESPONSE_Q_DOWN, state 0x%hx, flags 0x%x, rc %ld\n", + vscsi->state, vscsi->flags, rc); + } + ibmvscsis_post_disconnect(vscsi, + ERR_DISCONNECT_RECONNECT, 0); + } + return; + } + + /* + * The response queue is full. + * If the server is processing SRP requests, i.e. + * the client has successfully done an + * SRP_LOGIN, then it will wait forever for room in + * the queue. However if the system admin + * is attempting to unconfigure the server then one + * or more children will be in a state where + * they are being removed. So if there is even one + * child being removed then the driver assumes + * the system admin is attempting to break the + * connection with the client and MAX_TIMER_POPS + * is honored. + */ + if ((vscsi->rsp_q_timer.timer_pops < MAX_TIMER_POPS) || + (vscsi->state == SRP_PROCESSING)) { + pr_debug("snd_msg_failed: response queue full, flags 0x%x, timer started %d, pops %d\n", + vscsi->flags, (int)vscsi->rsp_q_timer.started, + vscsi->rsp_q_timer.timer_pops); + + /* + * Check if the timer is running; if it + * is not then start it up. + */ + if (!vscsi->rsp_q_timer.started) { + if (vscsi->rsp_q_timer.timer_pops < + MAX_TIMER_POPS) { + kt = ktime_set(0, WAIT_NANO_SECONDS); + } else { + /* + * slide the timeslice if the maximum + * timer pops have already happened + */ + kt = ktime_set(WAIT_SECONDS, 0); + } + + vscsi->rsp_q_timer.started = true; + hrtimer_start(&vscsi->rsp_q_timer.timer, kt, + HRTIMER_MODE_REL); + } + } else { + /* + * TBD: Do we need to worry about this? Need to get + * remove working. + */ + /* + * waited a long time and it appears the system admin + * is bring this driver down + */ + vscsi->flags |= RESPONSE_Q_DOWN; + ibmvscsis_free_cmd_qs(vscsi); + /* + * if the driver is already attempting to disconnect + * from the client and has already logged an error + * trace this event but don't put it in the error log + */ + if (!(vscsi->state & (ERR_DISCONNECT | + ERR_DISCONNECT_RECONNECT | + ERR_DISCONNECTED | UNDEFINED))) { + dev_err(&vscsi->dev, "client crq full too long\n"); + ibmvscsis_post_disconnect(vscsi, + ERR_DISCONNECT_RECONNECT, + 0); + } + } +} + +/** + * ibmvscsis_send_messages() - Send a Response + * @vscsi: Pointer to our adapter structure + * + * Send a response, first checking the waiting queue. Responses are + * sent in order they are received. If the response cannot be sent, + * because the client queue is full, it stays on the waiting queue. + * + * PRECONDITION: + * Called with interrupt lock held + */ +static void ibmvscsis_send_messages(struct scsi_info *vscsi) +{ + u64 msg_hi = 0; + /* note do not attmempt to access the IU_data_ptr with this pointer + * it is not valid + */ + struct viosrp_crq *crq = (struct viosrp_crq *)&msg_hi; + struct ibmvscsis_cmd *cmd, *nxt; + struct iu_entry *iue; + long rc = ADAPT_SUCCESS; + + if (!(vscsi->flags & RESPONSE_Q_DOWN)) { + list_for_each_entry_safe(cmd, nxt, &vscsi->waiting_rsp, list) { + pr_debug("send_messages cmd %p\n", cmd); + + iue = cmd->iue; + + crq->valid = VALID_CMD_RESP_EL; + crq->format = cmd->rsp.format; + + if (cmd->flags & CMD_FAST_FAIL) + crq->status = VIOSRP_ADAPTER_FAIL; + + crq->IU_length = cpu_to_be16(cmd->rsp.len); + + rc = h_send_crq(vscsi->dma_dev->unit_address, + be64_to_cpu(msg_hi), + be64_to_cpu(cmd->rsp.tag)); + + pr_debug("send_messages: tag 0x%llx, rc %ld\n", + be64_to_cpu(cmd->rsp.tag), rc); + + /* if all ok free up the command element resources */ + if (rc == H_SUCCESS) { + /* some movement has occurred */ + vscsi->rsp_q_timer.timer_pops = 0; + list_del(&cmd->list); + + ibmvscsis_free_cmd_resources(vscsi, cmd); + } else { + srp_snd_msg_failed(vscsi, rc); + break; + } + } + + if (!rc) { + /* + * The timer could pop with the queue empty. If + * this happens, rc will always indicate a + * success; clear the pop count. + */ + vscsi->rsp_q_timer.timer_pops = 0; + } + } else { + ibmvscsis_free_cmd_qs(vscsi); + } +} + +/* Called with intr lock held */ +static void ibmvscsis_send_mad_resp(struct scsi_info *vscsi, + struct ibmvscsis_cmd *cmd, + struct viosrp_crq *crq) +{ + struct iu_entry *iue = cmd->iue; + struct mad_common *mad = (struct mad_common *)&vio_iu(iue)->mad; + uint flag_bits = 0; + long rc; + + dma_wmb(); + rc = h_copy_rdma(sizeof(struct mad_common), + vscsi->dds.window[LOCAL].liobn, iue->sbuf->dma, + vscsi->dds.window[REMOTE].liobn, + be64_to_cpu(crq->IU_data_ptr)); + if (!rc) { + cmd->rsp.format = VIOSRP_MAD_FORMAT; + cmd->rsp.len = sizeof(struct mad_common); + cmd->rsp.tag = mad->tag; + list_add_tail(&cmd->list, &vscsi->waiting_rsp); + ibmvscsis_send_messages(vscsi); + } else { + pr_debug("Error sending mad response, rc %ld\n", rc); + if (rc == H_PERMISSION) { + if (connection_broken(vscsi)) + flag_bits = (RESPONSE_Q_DOWN | CLIENT_FAILED); + } + dev_err(&vscsi->dev, "mad: failed to copy to client, rc %ld\n", + rc); + + ibmvscsis_free_cmd_resources(vscsi, cmd); + ibmvscsis_post_disconnect(vscsi, ERR_DISCONNECT_RECONNECT, + flag_bits); + } +} + +/** + * ibmvscsis_mad() - Service a MAnagement Data gram. + * @vscsi: Pointer to our adapter structure + * @crq: Pointer to the CRQ entry containing the MAD request + * + * EXECUTION ENVIRONMENT: + * Interrupt called with adapter lock held + */ +static long ibmvscsis_mad(struct scsi_info *vscsi, struct viosrp_crq *crq) +{ + struct iu_entry *iue; + struct ibmvscsis_cmd *cmd; + struct mad_common *mad; + long rc = ADAPT_SUCCESS; + + switch (vscsi->state) { + /* + * We have not exchanged Init Msgs yet, so this MAD was sent + * before the last Transport Event; client will not be + * expecting a response. + */ + case WAIT_CONNECTION: + pr_debug("mad: in Wait Connection state, ignoring MAD, flags %d\n", + vscsi->flags); + return ADAPT_SUCCESS; + + case SRP_PROCESSING: + case CONNECTED: + break; + + /* + * We should never get here while we're in these states. + * Just log an error and get out. + */ + case UNCONFIGURING: + case WAIT_IDLE: + case ERR_DISCONNECT: + case ERR_DISCONNECT_RECONNECT: + default: + dev_err(&vscsi->dev, "mad: invalid adapter state %d for mad\n", + vscsi->state); + return ADAPT_SUCCESS; + } + + cmd = ibmvscsis_get_free_cmd(vscsi); + if (!cmd) { + dev_err(&vscsi->dev, "mad: failed to get cmd, debit %d\n", + vscsi->debit); + ibmvscsis_post_disconnect(vscsi, ERR_DISCONNECT_RECONNECT, 0); + return ERROR; + } + iue = cmd->iue; + cmd->type = ADAPTER_MAD; + + rc = ibmvscsis_copy_crq_packet(vscsi, cmd, crq); + if (!rc) { + mad = (struct mad_common *)&vio_iu(iue)->mad; + + pr_debug("mad: type %d\n", be32_to_cpu(mad->type)); + + if (be16_to_cpu(mad->length) < 0) { + dev_err(&vscsi->dev, "mad: length is < 0\n"); + ibmvscsis_post_disconnect(vscsi, + ERR_DISCONNECT_RECONNECT, 0); + rc = SRP_VIOLATION; + } else { + rc = ibmvscsis_process_mad(vscsi, iue); + } + + pr_debug("mad: status %hd, rc %ld\n", be16_to_cpu(mad->status), + rc); + + if (!rc) + ibmvscsis_send_mad_resp(vscsi, cmd, crq); + } else { + ibmvscsis_free_cmd_resources(vscsi, cmd); + } + + pr_debug("Leaving mad, rc %ld\n", rc); + return rc; +} + +/** + * ibmvscsis_login_rsp() - Create/copy a login response notice to the client + * @vscsi: Pointer to our adapter structure + * @cmd: Pointer to the command for the SRP Login request + * + * EXECUTION ENVIRONMENT: + * Interrupt, interrupt lock held + */ +static long ibmvscsis_login_rsp(struct scsi_info *vscsi, + struct ibmvscsis_cmd *cmd) +{ + struct iu_entry *iue = cmd->iue; + struct srp_login_rsp *rsp = &vio_iu(iue)->srp.login_rsp; + struct format_code *fmt; + uint flag_bits = 0; + long rc = ADAPT_SUCCESS; + + memset(rsp, 0, sizeof(struct srp_login_rsp)); + + rsp->opcode = SRP_LOGIN_RSP; + rsp->req_lim_delta = cpu_to_be32(vscsi->request_limit); + rsp->tag = cmd->rsp.tag; + rsp->max_it_iu_len = cpu_to_be32(SRP_MAX_IU_LEN); + rsp->max_ti_iu_len = cpu_to_be32(SRP_MAX_IU_LEN); + fmt = (struct format_code *)&rsp->buf_fmt; + fmt->buffers = SUPPORTED_FORMATS; + vscsi->credit = 0; + + cmd->rsp.len = sizeof(struct srp_login_rsp); + + dma_wmb(); + rc = h_copy_rdma(cmd->rsp.len, vscsi->dds.window[LOCAL].liobn, + iue->sbuf->dma, vscsi->dds.window[REMOTE].liobn, + be64_to_cpu(iue->remote_token)); + + switch (rc) { + case H_SUCCESS: + break; + + case H_PERMISSION: + if (connection_broken(vscsi)) + flag_bits = RESPONSE_Q_DOWN | CLIENT_FAILED; + dev_err(&vscsi->dev, "login_rsp: error copying to client, rc %ld\n", + rc); + ibmvscsis_post_disconnect(vscsi, ERR_DISCONNECT_RECONNECT, + flag_bits); + break; + case H_SOURCE_PARM: + case H_DEST_PARM: + default: + dev_err(&vscsi->dev, "login_rsp: error copying to client, rc %ld\n", + rc); + ibmvscsis_post_disconnect(vscsi, ERR_DISCONNECT_RECONNECT, 0); + break; + } + + return rc; +} + +/** + * ibmvscsis_srp_login_rej() - Create/copy a login rejection notice to client + * @vscsi: Pointer to our adapter structure + * @cmd: Pointer to the command for the SRP Login request + * @reason: The reason the SRP Login is being rejected, per SRP protocol + * + * EXECUTION ENVIRONMENT: + * Interrupt, interrupt lock held + */ +static long ibmvscsis_srp_login_rej(struct scsi_info *vscsi, + struct ibmvscsis_cmd *cmd, u32 reason) +{ + struct iu_entry *iue = cmd->iue; + struct srp_login_rej *rej = &vio_iu(iue)->srp.login_rej; + struct format_code *fmt; + uint flag_bits = 0; + long rc = ADAPT_SUCCESS; + + memset(rej, 0, sizeof(*rej)); + + rej->opcode = SRP_LOGIN_REJ; + rej->reason = cpu_to_be32(reason); + rej->tag = cmd->rsp.tag; + fmt = (struct format_code *)&rej->buf_fmt; + fmt->buffers = SUPPORTED_FORMATS; + + cmd->rsp.len = sizeof(*rej); + + dma_wmb(); + rc = h_copy_rdma(cmd->rsp.len, vscsi->dds.window[LOCAL].liobn, + iue->sbuf->dma, vscsi->dds.window[REMOTE].liobn, + be64_to_cpu(iue->remote_token)); + + switch (rc) { + case H_SUCCESS: + break; + case H_PERMISSION: + if (connection_broken(vscsi)) + flag_bits = RESPONSE_Q_DOWN | CLIENT_FAILED; + dev_err(&vscsi->dev, "login_rej: error copying to client, rc %ld\n", + rc); + ibmvscsis_post_disconnect(vscsi, ERR_DISCONNECT_RECONNECT, + flag_bits); + break; + case H_SOURCE_PARM: + case H_DEST_PARM: + default: + dev_err(&vscsi->dev, "login_rej: error copying to client, rc %ld\n", + rc); + ibmvscsis_post_disconnect(vscsi, ERR_DISCONNECT_RECONNECT, 0); + break; + } + + return rc; +} + +static int ibmvscsis_make_nexus(struct ibmvscsis_tport *tport) +{ + char *name = tport->tport_name; + struct ibmvscsis_nexus *nexus; + int rc; + + if (tport->ibmv_nexus) { + pr_debug("tport->ibmv_nexus already exists\n"); + return 0; + } + + nexus = kzalloc(sizeof(*nexus), GFP_KERNEL); + if (!nexus) { + pr_err("Unable to allocate struct ibmvscsis_nexus\n"); + return -ENOMEM; + } + + nexus->se_sess = target_alloc_session(&tport->se_tpg, 0, 0, + TARGET_PROT_NORMAL, name, nexus, + NULL); + if (IS_ERR(nexus->se_sess)) { + rc = PTR_ERR(nexus->se_sess); + goto transport_init_fail; + } + + tport->ibmv_nexus = nexus; + + return 0; + +transport_init_fail: + kfree(nexus); + return rc; +} + +static int ibmvscsis_drop_nexus(struct ibmvscsis_tport *tport) +{ + struct se_session *se_sess; + struct ibmvscsis_nexus *nexus; + + nexus = tport->ibmv_nexus; + if (!nexus) + return -ENODEV; + + se_sess = nexus->se_sess; + if (!se_sess) + return -ENODEV; + + /* + * Release the SCSI I_T Nexus to the emulated ibmvscsis Target Port + */ + transport_deregister_session(se_sess); + tport->ibmv_nexus = NULL; + kfree(nexus); + + return 0; +} + +/** + * ibmvscsis_srp_login() - Process an SRP Login Request + * @vscsi: Pointer to our adapter structure + * @cmd: Command element to use to process the SRP Login request + * @crq: Pointer to CRQ entry containing the SRP Login request + * + * EXECUTION ENVIRONMENT: + * Interrupt, called with interrupt lock held + */ +static long ibmvscsis_srp_login(struct scsi_info *vscsi, + struct ibmvscsis_cmd *cmd, + struct viosrp_crq *crq) +{ + struct iu_entry *iue = cmd->iue; + struct srp_login_req *req = &vio_iu(iue)->srp.login_req; + struct port_id { + __be64 id_extension; + __be64 io_guid; + } *iport, *tport; + struct format_code *fmt; + u32 reason = 0x0; + long rc = ADAPT_SUCCESS; + + iport = (struct port_id *)req->initiator_port_id; + tport = (struct port_id *)req->target_port_id; + fmt = (struct format_code *)&req->req_buf_fmt; + if (be32_to_cpu(req->req_it_iu_len) > SRP_MAX_IU_LEN) + reason = SRP_LOGIN_REJ_REQ_IT_IU_LENGTH_TOO_LARGE; + else if (be32_to_cpu(req->req_it_iu_len) < 64) + reason = SRP_LOGIN_REJ_UNABLE_ESTABLISH_CHANNEL; + else if ((be64_to_cpu(iport->id_extension) > (MAX_NUM_PORTS - 1)) || + (be64_to_cpu(tport->id_extension) > (MAX_NUM_PORTS - 1))) + reason = SRP_LOGIN_REJ_UNABLE_ASSOCIATE_CHANNEL; + else if (req->req_flags & SRP_MULTICHAN_MULTI) + reason = SRP_LOGIN_REJ_MULTI_CHANNEL_UNSUPPORTED; + else if (fmt->buffers & (~SUPPORTED_FORMATS)) + reason = SRP_LOGIN_REJ_UNSUPPORTED_DESCRIPTOR_FMT; + else if ((fmt->buffers | SUPPORTED_FORMATS) == 0) + reason = SRP_LOGIN_REJ_UNSUPPORTED_DESCRIPTOR_FMT; + + if (vscsi->state == SRP_PROCESSING) + reason = SRP_LOGIN_REJ_CHANNEL_LIMIT_REACHED; + + rc = ibmvscsis_make_nexus(&vscsi->tport); + if (rc) + reason = SRP_LOGIN_REJ_UNABLE_ESTABLISH_CHANNEL; + + cmd->rsp.format = VIOSRP_SRP_FORMAT; + cmd->rsp.tag = req->tag; + + pr_debug("srp_login: reason 0x%x\n", reason); + + if (reason) + rc = ibmvscsis_srp_login_rej(vscsi, cmd, reason); + else + rc = ibmvscsis_login_rsp(vscsi, cmd); + + if (!rc) { + if (!reason) + vscsi->state = SRP_PROCESSING; + + list_add_tail(&cmd->list, &vscsi->waiting_rsp); + ibmvscsis_send_messages(vscsi); + } else { + ibmvscsis_free_cmd_resources(vscsi, cmd); + } + + pr_debug("Leaving srp_login, rc %ld\n", rc); + return rc; +} + +/** + * ibmvscsis_srp_i_logout() - Helper Function to close I_T Nexus + * @vscsi: Pointer to our adapter structure + * @cmd: Command element to use to process the Implicit Logout request + * @crq: Pointer to CRQ entry containing the Implicit Logout request + * + * Do the logic to close the I_T nexus. This function may not + * behave to specification. + * + * EXECUTION ENVIRONMENT: + * Interrupt, interrupt lock held + */ +static long ibmvscsis_srp_i_logout(struct scsi_info *vscsi, + struct ibmvscsis_cmd *cmd, + struct viosrp_crq *crq) +{ + struct iu_entry *iue = cmd->iue; + struct srp_i_logout *log_out = &vio_iu(iue)->srp.i_logout; + long rc = ADAPT_SUCCESS; + + if ((vscsi->debit > 0) || !list_empty(&vscsi->schedule_q) || + !list_empty(&vscsi->waiting_rsp)) { + dev_err(&vscsi->dev, "i_logout: outstanding work\n"); + ibmvscsis_post_disconnect(vscsi, ERR_DISCONNECT, 0); + } else { + cmd->rsp.format = SRP_FORMAT; + cmd->rsp.tag = log_out->tag; + cmd->rsp.len = sizeof(struct mad_common); + list_add_tail(&cmd->list, &vscsi->waiting_rsp); + ibmvscsis_send_messages(vscsi); + + ibmvscsis_post_disconnect(vscsi, WAIT_IDLE, 0); + } + + return rc; +} + +/* Called with intr lock held */ +static void ibmvscsis_srp_cmd(struct scsi_info *vscsi, struct viosrp_crq *crq) +{ + struct ibmvscsis_cmd *cmd; + struct iu_entry *iue; + struct srp_cmd *srp; + struct srp_tsk_mgmt *tsk; + long rc; + + if (vscsi->request_limit - vscsi->debit <= 0) { + /* Client has exceeded request limit */ + dev_err(&vscsi->dev, "Client exceeded the request limit (%d), debit %d\n", + vscsi->request_limit, vscsi->debit); + ibmvscsis_post_disconnect(vscsi, ERR_DISCONNECT_RECONNECT, 0); + return; + } + + cmd = ibmvscsis_get_free_cmd(vscsi); + if (!cmd) { + dev_err(&vscsi->dev, "srp_cmd failed to get cmd, debit %d\n", + vscsi->debit); + ibmvscsis_post_disconnect(vscsi, ERR_DISCONNECT_RECONNECT, 0); + return; + } + iue = cmd->iue; + srp = &vio_iu(iue)->srp.cmd; + + rc = ibmvscsis_copy_crq_packet(vscsi, cmd, crq); + if (rc) { + ibmvscsis_free_cmd_resources(vscsi, cmd); + return; + } + + if (vscsi->state == SRP_PROCESSING) { + switch (srp->opcode) { + case SRP_LOGIN_REQ: + rc = ibmvscsis_srp_login(vscsi, cmd, crq); + break; + + case SRP_TSK_MGMT: + tsk = &vio_iu(iue)->srp.tsk_mgmt; + pr_debug("tsk_mgmt tag: %llu (0x%llx)\n", tsk->tag, + tsk->tag); + cmd->rsp.tag = tsk->tag; + vscsi->debit += 1; + cmd->type = TASK_MANAGEMENT; + list_add_tail(&cmd->list, &vscsi->schedule_q); + queue_work(vscsi->work_q, &cmd->work); + break; + + case SRP_CMD: + pr_debug("srp_cmd tag: %llu (0x%llx)\n", srp->tag, + srp->tag); + cmd->rsp.tag = srp->tag; + vscsi->debit += 1; + cmd->type = SCSI_CDB; + /* + * We want to keep track of work waiting for + * the workqueue. + */ + list_add_tail(&cmd->list, &vscsi->schedule_q); + queue_work(vscsi->work_q, &cmd->work); + break; + + case SRP_I_LOGOUT: + rc = ibmvscsis_srp_i_logout(vscsi, cmd, crq); + break; + + case SRP_CRED_RSP: + case SRP_AER_RSP: + default: + ibmvscsis_free_cmd_resources(vscsi, cmd); + dev_err(&vscsi->dev, "invalid srp cmd, opcode %d\n", + (uint)srp->opcode); + ibmvscsis_post_disconnect(vscsi, + ERR_DISCONNECT_RECONNECT, 0); + break; + } + } else if (srp->opcode == SRP_LOGIN_REQ && vscsi->state == CONNECTED) { + rc = ibmvscsis_srp_login(vscsi, cmd, crq); + } else { + ibmvscsis_free_cmd_resources(vscsi, cmd); + dev_err(&vscsi->dev, "Invalid state %d to handle srp cmd\n", + vscsi->state); + ibmvscsis_post_disconnect(vscsi, ERR_DISCONNECT_RECONNECT, 0); + } +} + +/** + * ibmvscsis_ping_response() - Respond to a ping request + * @vscsi: Pointer to our adapter structure + * + * Let the client know that the server is alive and waiting on + * its native I/O stack. + * If any type of error occurs from the call to queue a ping + * response then the client is either not accepting or receiving + * interrupts. Disconnect with an error. + * + * EXECUTION ENVIRONMENT: + * Interrupt, interrupt lock held + */ +static long ibmvscsis_ping_response(struct scsi_info *vscsi) +{ + struct viosrp_crq *crq; + u64 buffer[2] = { 0, 0 }; + long rc; + + crq = (struct viosrp_crq *)&buffer; + crq->valid = VALID_CMD_RESP_EL; + crq->format = (u8)MESSAGE_IN_CRQ; + crq->status = PING_RESPONSE; + + rc = h_send_crq(vscsi->dds.unit_id, cpu_to_be64(buffer[MSG_HI]), + cpu_to_be64(buffer[MSG_LOW])); + + switch (rc) { + case H_SUCCESS: + break; + case H_CLOSED: + vscsi->flags |= CLIENT_FAILED; + case H_DROPPED: + vscsi->flags |= RESPONSE_Q_DOWN; + case H_REMOTE_PARM: + dev_err(&vscsi->dev, "ping_response: h_send_crq failed, rc %ld\n", + rc); + ibmvscsis_post_disconnect(vscsi, ERR_DISCONNECT_RECONNECT, 0); + break; + default: + dev_err(&vscsi->dev, "ping_response: h_send_crq returned unknown rc %ld\n", + rc); + ibmvscsis_post_disconnect(vscsi, ERR_DISCONNECT, 0); + break; + } + + return rc; +} + +/** + * ibmvscsis_handle_init_compl_msg() - Respond to an Init Complete Message + * @vscsi: Pointer to our adapter structure + * + * Must be called with interrupt lock held. + */ +static long ibmvscsis_handle_init_compl_msg(struct scsi_info *vscsi) +{ + long rc = ADAPT_SUCCESS; + + switch (vscsi->state) { + case NO_QUEUE: + case ERR_DISCONNECT: + case ERR_DISCONNECT_RECONNECT: + case ERR_DISCONNECTED: + case UNCONFIGURING: + case UNDEFINED: + rc = ERROR; + break; + + case WAIT_CONNECTION: + vscsi->state = CONNECTED; + break; + + case WAIT_IDLE: + case SRP_PROCESSING: + case CONNECTED: + case WAIT_ENABLED: + case PART_UP_WAIT_ENAB: + default: + rc = ERROR; + dev_err(&vscsi->dev, "init_msg: invalid state %d to get init compl msg\n", + vscsi->state); + ibmvscsis_post_disconnect(vscsi, ERR_DISCONNECT_RECONNECT, 0); + break; + } + + return rc; +} + +/** + * ibmvscsis_handle_init_msg() - Respond to an Init Message + * @vscsi: Pointer to our adapter structure + * + * Must be called with interrupt lock held. + */ +static long ibmvscsis_handle_init_msg(struct scsi_info *vscsi) +{ + long rc = ADAPT_SUCCESS; + + switch (vscsi->state) { + case WAIT_ENABLED: + vscsi->state = PART_UP_WAIT_ENAB; + break; + + case WAIT_CONNECTION: + rc = ibmvscsis_send_init_message(vscsi, INIT_COMPLETE_MSG); + switch (rc) { + case H_SUCCESS: + vscsi->state = CONNECTED; + break; + + case H_PARAMETER: + dev_err(&vscsi->dev, "init_msg: failed to send, rc %ld\n", + rc); + ibmvscsis_post_disconnect(vscsi, ERR_DISCONNECT, 0); + break; + + case H_DROPPED: + dev_err(&vscsi->dev, "init_msg: failed to send, rc %ld\n", + rc); + rc = ERROR; + ibmvscsis_post_disconnect(vscsi, + ERR_DISCONNECT_RECONNECT, 0); + break; + + case H_CLOSED: + pr_warn("init_msg: failed to send, rc %ld\n", rc); + rc = 0; + break; + } + break; + + case UNDEFINED: + rc = ERROR; + break; + + case UNCONFIGURING: + break; + + case PART_UP_WAIT_ENAB: + case CONNECTED: + case SRP_PROCESSING: + case WAIT_IDLE: + case NO_QUEUE: + case ERR_DISCONNECT: + case ERR_DISCONNECT_RECONNECT: + case ERR_DISCONNECTED: + default: + rc = ERROR; + dev_err(&vscsi->dev, "init_msg: invalid state %d to get init msg\n", + vscsi->state); + ibmvscsis_post_disconnect(vscsi, ERR_DISCONNECT_RECONNECT, 0); + break; + } + + return rc; +} + +/** + * ibmvscsis_init_msg() - Respond to an init message + * @vscsi: Pointer to our adapter structure + * @crq: Pointer to CRQ element containing the Init Message + * + * EXECUTION ENVIRONMENT: + * Interrupt, interrupt lock held + */ +static long ibmvscsis_init_msg(struct scsi_info *vscsi, struct viosrp_crq *crq) +{ + long rc = ADAPT_SUCCESS; + + pr_debug("init_msg: state 0x%hx\n", vscsi->state); + + rc = h_vioctl(vscsi->dds.unit_id, H_GET_PARTNER_INFO, + (u64)vscsi->map_ioba | ((u64)PAGE_SIZE << 32), 0, 0, 0, + 0); + if (rc == H_SUCCESS) { + vscsi->client_data.partition_number = + be64_to_cpu(*(u64 *)vscsi->map_buf); + pr_debug("init_msg, part num %d\n", + vscsi->client_data.partition_number); + } else { + pr_debug("init_msg h_vioctl rc %ld\n", rc); + rc = ADAPT_SUCCESS; + } + + if (crq->format == INIT_MSG) { + rc = ibmvscsis_handle_init_msg(vscsi); + } else if (crq->format == INIT_COMPLETE_MSG) { + rc = ibmvscsis_handle_init_compl_msg(vscsi); + } else { + rc = ERROR; + dev_err(&vscsi->dev, "init_msg: invalid format %d\n", + (uint)crq->format); + ibmvscsis_post_disconnect(vscsi, ERR_DISCONNECT_RECONNECT, 0); + } + + return rc; +} + +/** + * ibmvscsis_parse_command() - Parse an element taken from the cmd rsp queue. + * @vscsi: Pointer to our adapter structure + * @crq: Pointer to CRQ element containing the SRP request + * + * This function will return success if the command queue element is valid + * and the srp iu or MAD request it pointed to was also valid. That does + * not mean that an error was not returned to the client. + * + * EXECUTION ENVIRONMENT: + * Interrupt, intr lock held + */ +static long ibmvscsis_parse_command(struct scsi_info *vscsi, + struct viosrp_crq *crq) +{ + long rc = ADAPT_SUCCESS; + + switch (crq->valid) { + case VALID_CMD_RESP_EL: + switch (crq->format) { + case OS400_FORMAT: + case AIX_FORMAT: + case LINUX_FORMAT: + case MAD_FORMAT: + if (vscsi->flags & PROCESSING_MAD) { + rc = ERROR; + dev_err(&vscsi->dev, "parse_command: already processing mad\n"); + ibmvscsis_post_disconnect(vscsi, + ERR_DISCONNECT_RECONNECT, + 0); + } else { + vscsi->flags |= PROCESSING_MAD; + rc = ibmvscsis_mad(vscsi, crq); + } + break; + + case SRP_FORMAT: + ibmvscsis_srp_cmd(vscsi, crq); + break; + + case MESSAGE_IN_CRQ: + if (crq->status == PING) + ibmvscsis_ping_response(vscsi); + break; + + default: + dev_err(&vscsi->dev, "parse_command: invalid format %d\n", + (uint)crq->format); + ibmvscsis_post_disconnect(vscsi, + ERR_DISCONNECT_RECONNECT, 0); + break; + } + break; + + case VALID_TRANS_EVENT: + rc = ibmvscsis_trans_event(vscsi, crq); + break; + + case VALID_INIT_MSG: + rc = ibmvscsis_init_msg(vscsi, crq); + break; + + default: + dev_err(&vscsi->dev, "parse_command: invalid valid field %d\n", + (uint)crq->valid); + ibmvscsis_post_disconnect(vscsi, ERR_DISCONNECT_RECONNECT, 0); + break; + } + + /* + * Return only what the interrupt handler cares + * about. Most errors we keep right on trucking. + */ + rc = vscsi->flags & SCHEDULE_DISCONNECT; + + return rc; +} + +static int read_dma_window(struct scsi_info *vscsi) +{ + struct vio_dev *vdev = vscsi->dma_dev; + const __be32 *dma_window; + const __be32 *prop; + + /* TODO Using of_parse_dma_window would be better, but it doesn't give + * a way to read multiple windows without already knowing the size of + * a window or the number of windows. + */ + dma_window = (const __be32 *)vio_get_attribute(vdev, + "ibm,my-dma-window", + NULL); + if (!dma_window) { + pr_err("Couldn't find ibm,my-dma-window property\n"); + return -1; + } + + vscsi->dds.window[LOCAL].liobn = be32_to_cpu(*dma_window); + dma_window++; + + prop = (const __be32 *)vio_get_attribute(vdev, "ibm,#dma-address-cells", + NULL); + if (!prop) { + pr_warn("Couldn't find ibm,#dma-address-cells property\n"); + dma_window++; + } else { + dma_window += be32_to_cpu(*prop); + } + + prop = (const __be32 *)vio_get_attribute(vdev, "ibm,#dma-size-cells", + NULL); + if (!prop) { + pr_warn("Couldn't find ibm,#dma-size-cells property\n"); + dma_window++; + } else { + dma_window += be32_to_cpu(*prop); + } + + /* dma_window should point to the second window now */ + vscsi->dds.window[REMOTE].liobn = be32_to_cpu(*dma_window); + + return 0; +} + +static struct ibmvscsis_tport *ibmvscsis_lookup_port(const char *name) +{ + struct ibmvscsis_tport *tport = NULL; + struct vio_dev *vdev; + struct scsi_info *vscsi; + + spin_lock_bh(&ibmvscsis_dev_lock); + list_for_each_entry(vscsi, &ibmvscsis_dev_list, list) { + vdev = vscsi->dma_dev; + if (!strcmp(dev_name(&vdev->dev), name)) { + tport = &vscsi->tport; + break; + } + } + spin_unlock_bh(&ibmvscsis_dev_lock); + + return tport; +} + +/** + * ibmvscsis_parse_cmd() - Parse SRP Command + * @vscsi: Pointer to our adapter structure + * @cmd: Pointer to command element with SRP command + * + * Parse the srp command; if it is valid then submit it to tcm. + * Note: The return code does not reflect the status of the SCSI CDB. + * + * EXECUTION ENVIRONMENT: + * Process level + */ +static void ibmvscsis_parse_cmd(struct scsi_info *vscsi, + struct ibmvscsis_cmd *cmd) +{ + struct iu_entry *iue = cmd->iue; + struct srp_cmd *srp = (struct srp_cmd *)iue->sbuf->buf; + struct ibmvscsis_nexus *nexus; + u64 data_len = 0; + enum dma_data_direction dir; + int attr = 0; + int rc = 0; + + nexus = vscsi->tport.ibmv_nexus; + /* + * additional length in bytes. Note that the SRP spec says that + * additional length is in 4-byte words, but technically the + * additional length field is only the upper 6 bits of the byte. + * The lower 2 bits are reserved. If the lower 2 bits are 0 (as + * all reserved fields should be), then interpreting the byte as + * an int will yield the length in bytes. + */ + if (srp->add_cdb_len & 0x03) { + dev_err(&vscsi->dev, "parse_cmd: reserved bits set in IU\n"); + spin_lock_bh(&vscsi->intr_lock); + ibmvscsis_post_disconnect(vscsi, ERR_DISCONNECT_RECONNECT, 0); + ibmvscsis_free_cmd_resources(vscsi, cmd); + spin_unlock_bh(&vscsi->intr_lock); + return; + } + + if (srp_get_desc_table(srp, &dir, &data_len)) { + dev_err(&vscsi->dev, "0x%llx: parsing SRP descriptor table failed.\n", + srp->tag); + goto fail; + return; + } + + cmd->rsp.sol_not = srp->sol_not; + + switch (srp->task_attr) { + case SRP_SIMPLE_TASK: + attr = TCM_SIMPLE_TAG; + break; + case SRP_ORDERED_TASK: + attr = TCM_ORDERED_TAG; + break; + case SRP_HEAD_TASK: + attr = TCM_HEAD_TAG; + break; + case SRP_ACA_TASK: + attr = TCM_ACA_TAG; + break; + default: + dev_err(&vscsi->dev, "Invalid task attribute %d\n", + srp->task_attr); + goto fail; + } + + cmd->se_cmd.tag = be64_to_cpu(srp->tag); + + spin_lock_bh(&vscsi->intr_lock); + list_add_tail(&cmd->list, &vscsi->active_q); + spin_unlock_bh(&vscsi->intr_lock); + + srp->lun.scsi_lun[0] &= 0x3f; + + pr_debug("calling submit_cmd, se_cmd %p, lun 0x%llx, cdb 0x%x, attr:%d\n", + &cmd->se_cmd, scsilun_to_int(&srp->lun), (int)srp->cdb[0], + attr); + + rc = target_submit_cmd(&cmd->se_cmd, nexus->se_sess, srp->cdb, + cmd->sense_buf, scsilun_to_int(&srp->lun), + data_len, attr, dir, 0); + if (rc) { + dev_err(&vscsi->dev, "target_submit_cmd failed, rc %d\n", rc); + goto fail; + } + return; + +fail: + spin_lock_bh(&vscsi->intr_lock); + ibmvscsis_post_disconnect(vscsi, ERR_DISCONNECT_RECONNECT, 0); + spin_unlock_bh(&vscsi->intr_lock); +} + +/** + * ibmvscsis_parse_task() - Parse SRP Task Management Request + * @vscsi: Pointer to our adapter structure + * @cmd: Pointer to command element with SRP task management request + * + * Parse the srp task management request; if it is valid then submit it to tcm. + * Note: The return code does not reflect the status of the task management + * request. + * + * EXECUTION ENVIRONMENT: + * Processor level + */ +static void ibmvscsis_parse_task(struct scsi_info *vscsi, + struct ibmvscsis_cmd *cmd) +{ + struct iu_entry *iue = cmd->iue; + struct srp_tsk_mgmt *srp_tsk = &vio_iu(iue)->srp.tsk_mgmt; + int tcm_type; + u64 tag_to_abort = 0; + int rc = 0; + struct ibmvscsis_nexus *nexus; + + nexus = vscsi->tport.ibmv_nexus; + + cmd->rsp.sol_not = srp_tsk->sol_not; + + switch (srp_tsk->tsk_mgmt_func) { + case SRP_TSK_ABORT_TASK: + tcm_type = TMR_ABORT_TASK; + tag_to_abort = be64_to_cpu(srp_tsk->task_tag); + break; + case SRP_TSK_ABORT_TASK_SET: + tcm_type = TMR_ABORT_TASK_SET; + break; + case SRP_TSK_CLEAR_TASK_SET: + tcm_type = TMR_CLEAR_TASK_SET; + break; + case SRP_TSK_LUN_RESET: + tcm_type = TMR_LUN_RESET; + break; + case SRP_TSK_CLEAR_ACA: + tcm_type = TMR_CLEAR_ACA; + break; + default: + dev_err(&vscsi->dev, "unknown task mgmt func %d\n", + srp_tsk->tsk_mgmt_func); + cmd->se_cmd.se_tmr_req->response = + TMR_TASK_MGMT_FUNCTION_NOT_SUPPORTED; + rc = -1; + break; + } + + if (!rc) { + cmd->se_cmd.tag = be64_to_cpu(srp_tsk->tag); + + spin_lock_bh(&vscsi->intr_lock); + list_add_tail(&cmd->list, &vscsi->active_q); + spin_unlock_bh(&vscsi->intr_lock); + + srp_tsk->lun.scsi_lun[0] &= 0x3f; + + pr_debug("calling submit_tmr, func %d\n", + srp_tsk->tsk_mgmt_func); + rc = target_submit_tmr(&cmd->se_cmd, nexus->se_sess, NULL, + scsilun_to_int(&srp_tsk->lun), srp_tsk, + tcm_type, GFP_KERNEL, tag_to_abort, 0); + if (rc) { + dev_err(&vscsi->dev, "target_submit_tmr failed, rc %d\n", + rc); + cmd->se_cmd.se_tmr_req->response = + TMR_FUNCTION_REJECTED; + } + } + + if (rc) + transport_send_check_condition_and_sense(&cmd->se_cmd, 0, 0); +} + +static void ibmvscsis_scheduler(struct work_struct *work) +{ + struct ibmvscsis_cmd *cmd = container_of(work, struct ibmvscsis_cmd, + work); + struct scsi_info *vscsi = cmd->adapter; + + spin_lock_bh(&vscsi->intr_lock); + + /* Remove from schedule_q */ + list_del(&cmd->list); + + /* Don't submit cmd if we're disconnecting */ + if (vscsi->flags & (SCHEDULE_DISCONNECT | DISCONNECT_SCHEDULED)) { + ibmvscsis_free_cmd_resources(vscsi, cmd); + + /* ibmvscsis_disconnect might be waiting for us */ + if (list_empty(&vscsi->active_q) && + list_empty(&vscsi->schedule_q) && + (vscsi->flags & WAIT_FOR_IDLE)) { + vscsi->flags &= ~WAIT_FOR_IDLE; + complete(&vscsi->wait_idle); + } + + spin_unlock_bh(&vscsi->intr_lock); + return; + } + + spin_unlock_bh(&vscsi->intr_lock); + + switch (cmd->type) { + case SCSI_CDB: + ibmvscsis_parse_cmd(vscsi, cmd); + break; + case TASK_MANAGEMENT: + ibmvscsis_parse_task(vscsi, cmd); + break; + default: + dev_err(&vscsi->dev, "scheduler, invalid cmd type %d\n", + cmd->type); + spin_lock_bh(&vscsi->intr_lock); + ibmvscsis_free_cmd_resources(vscsi, cmd); + spin_unlock_bh(&vscsi->intr_lock); + break; + } +} + +static int ibmvscsis_alloc_cmds(struct scsi_info *vscsi, int num) +{ + struct ibmvscsis_cmd *cmd; + int i; + + INIT_LIST_HEAD(&vscsi->free_cmd); + vscsi->cmd_pool = kcalloc(num, sizeof(struct ibmvscsis_cmd), + GFP_KERNEL); + if (!vscsi->cmd_pool) + return -ENOMEM; + + for (i = 0, cmd = (struct ibmvscsis_cmd *)vscsi->cmd_pool; i < num; + i++, cmd++) { + cmd->adapter = vscsi; + INIT_WORK(&cmd->work, ibmvscsis_scheduler); + list_add_tail(&cmd->list, &vscsi->free_cmd); + } + + return 0; +} + +static void ibmvscsis_free_cmds(struct scsi_info *vscsi) +{ + kfree(vscsi->cmd_pool); + vscsi->cmd_pool = NULL; + INIT_LIST_HEAD(&vscsi->free_cmd); +} + +/** + * ibmvscsis_service_wait_q() - Service Waiting Queue + * @timer: Pointer to timer which has expired + * + * This routine is called when the timer pops to service the waiting + * queue. Elements on the queue have completed, their responses have been + * copied to the client, but the client's response queue was full so + * the queue message could not be sent. The routine grabs the proper locks + * and calls send messages. + * + * EXECUTION ENVIRONMENT: + * called at interrupt level + */ +static enum hrtimer_restart ibmvscsis_service_wait_q(struct hrtimer *timer) +{ + struct timer_cb *p_timer = container_of(timer, struct timer_cb, timer); + struct scsi_info *vscsi = container_of(p_timer, struct scsi_info, + rsp_q_timer); + + spin_lock_bh(&vscsi->intr_lock); + p_timer->timer_pops += 1; + p_timer->started = false; + ibmvscsis_send_messages(vscsi); + spin_unlock_bh(&vscsi->intr_lock); + + return HRTIMER_NORESTART; +} + +static long ibmvscsis_alloctimer(struct scsi_info *vscsi) +{ + struct timer_cb *p_timer; + + p_timer = &vscsi->rsp_q_timer; + hrtimer_init(&p_timer->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); + + p_timer->timer.function = ibmvscsis_service_wait_q; + p_timer->started = false; + p_timer->timer_pops = 0; + + return ADAPT_SUCCESS; +} + +static void ibmvscsis_freetimer(struct scsi_info *vscsi) +{ + struct timer_cb *p_timer; + + p_timer = &vscsi->rsp_q_timer; + + (void)hrtimer_cancel(&p_timer->timer); + + p_timer->started = false; + p_timer->timer_pops = 0; +} + +static irqreturn_t ibmvscsis_interrupt(int dummy, void *data) +{ + struct scsi_info *vscsi = data; + + vio_disable_interrupts(vscsi->dma_dev); + tasklet_schedule(&vscsi->work_task); + + return IRQ_HANDLED; +} + +/** + * ibmvscsis_check_q() - Helper function to Check Init Message Valid + * @vscsi: Pointer to our adapter structure + * + * Checks if a initialize message was queued by the initiatior + * while the timing window was open. This function is called from + * probe after the CRQ is created and interrupts are enabled. + * It would only be used by adapters who wait for some event before + * completing the init handshake with the client. For ibmvscsi, this + * event is waiting for the port to be enabled. + * + * EXECUTION ENVIRONMENT: + * Process level only, interrupt lock held + */ +static long ibmvscsis_check_q(struct scsi_info *vscsi) +{ + uint format; + long rc; + + rc = ibmvscsis_check_init_msg(vscsi, &format); + if (rc) + ibmvscsis_post_disconnect(vscsi, ERR_DISCONNECT_RECONNECT, 0); + else if (format == UNUSED_FORMAT) + vscsi->state = WAIT_ENABLED; + else + vscsi->state = PART_UP_WAIT_ENAB; + + return rc; +} + +/** + * ibmvscsis_enable_change_state() - Set new state based on enabled status + * @vscsi: Pointer to our adapter structure + * + * This function determines our new state now that we are enabled. This + * may involve sending an Init Complete message to the client. + * + * Must be called with interrupt lock held. + */ +static long ibmvscsis_enable_change_state(struct scsi_info *vscsi) +{ + long rc = ADAPT_SUCCESS; + +handle_state_change: + switch (vscsi->state) { + case WAIT_ENABLED: + rc = ibmvscsis_send_init_message(vscsi, INIT_MSG); + switch (rc) { + case H_SUCCESS: + case H_DROPPED: + case H_CLOSED: + vscsi->state = WAIT_CONNECTION; + rc = ADAPT_SUCCESS; + break; + + case H_PARAMETER: + break; + + case H_HARDWARE: + break; + + default: + vscsi->state = UNDEFINED; + rc = H_HARDWARE; + break; + } + break; + case PART_UP_WAIT_ENAB: + rc = ibmvscsis_send_init_message(vscsi, INIT_COMPLETE_MSG); + switch (rc) { + case H_SUCCESS: + vscsi->state = CONNECTED; + rc = ADAPT_SUCCESS; + break; + + case H_DROPPED: + case H_CLOSED: + vscsi->state = WAIT_ENABLED; + goto handle_state_change; + + case H_PARAMETER: + break; + + case H_HARDWARE: + break; + + default: + rc = H_HARDWARE; + break; + } + break; + + case WAIT_CONNECTION: + case WAIT_IDLE: + case SRP_PROCESSING: + case CONNECTED: + rc = ADAPT_SUCCESS; + break; + /* should not be able to get here */ + case UNCONFIGURING: + rc = ERROR; + vscsi->state = UNDEFINED; + break; + + /* driver should never allow this to happen */ + case ERR_DISCONNECT: + case ERR_DISCONNECT_RECONNECT: + default: + dev_err(&vscsi->dev, "in invalid state %d during enable_change_state\n", + vscsi->state); + rc = ADAPT_SUCCESS; + break; + } + + return rc; +} + +/** + * ibmvscsis_create_command_q() - Create Command Queue + * @vscsi: Pointer to our adapter structure + * @num_cmds: Currently unused. In the future, may be used to determine + * the size of the CRQ. + * + * Allocates memory for command queue maps remote memory into an ioba + * initializes the command response queue + * + * EXECUTION ENVIRONMENT: + * Process level only + */ +static long ibmvscsis_create_command_q(struct scsi_info *vscsi, int num_cmds) +{ + long rc = 0; + int pages; + struct vio_dev *vdev = vscsi->dma_dev; + + /* We might support multiple pages in the future, but just 1 for now */ + pages = 1; + + vscsi->cmd_q.size = pages; + + vscsi->cmd_q.base_addr = + (struct viosrp_crq *)get_zeroed_page(GFP_KERNEL); + if (!vscsi->cmd_q.base_addr) + return -ENOMEM; + + vscsi->cmd_q.mask = ((uint)pages * CRQ_PER_PAGE) - 1; + + vscsi->cmd_q.crq_token = dma_map_single(&vdev->dev, + vscsi->cmd_q.base_addr, + PAGE_SIZE, DMA_BIDIRECTIONAL); + if (dma_mapping_error(&vdev->dev, vscsi->cmd_q.crq_token)) { + free_page((unsigned long)vscsi->cmd_q.base_addr); + return -ENOMEM; + } + + rc = h_reg_crq(vscsi->dds.unit_id, vscsi->cmd_q.crq_token, PAGE_SIZE); + if (rc) { + if (rc == H_CLOSED) { + vscsi->state = WAIT_ENABLED; + rc = 0; + } else { + dma_unmap_single(&vdev->dev, vscsi->cmd_q.crq_token, + PAGE_SIZE, DMA_BIDIRECTIONAL); + free_page((unsigned long)vscsi->cmd_q.base_addr); + rc = -ENODEV; + } + } else { + vscsi->state = WAIT_ENABLED; + } + + return rc; +} + +/** + * ibmvscsis_destroy_command_q - Destroy Command Queue + * @vscsi: Pointer to our adapter structure + * + * Releases memory for command queue and unmaps mapped remote memory. + * + * EXECUTION ENVIRONMENT: + * Process level only + */ +static void ibmvscsis_destroy_command_q(struct scsi_info *vscsi) +{ + dma_unmap_single(&vscsi->dma_dev->dev, vscsi->cmd_q.crq_token, + PAGE_SIZE, DMA_BIDIRECTIONAL); + free_page((unsigned long)vscsi->cmd_q.base_addr); + vscsi->cmd_q.base_addr = NULL; + vscsi->state = NO_QUEUE; +} + +static u8 ibmvscsis_fast_fail(struct scsi_info *vscsi, + struct ibmvscsis_cmd *cmd) +{ + struct iu_entry *iue = cmd->iue; + struct se_cmd *se_cmd = &cmd->se_cmd; + struct srp_cmd *srp = (struct srp_cmd *)iue->sbuf->buf; + struct scsi_sense_hdr sshdr; + u8 rc = se_cmd->scsi_status; + + if (vscsi->fast_fail && (READ_CMD(srp->cdb) || WRITE_CMD(srp->cdb))) + if (scsi_normalize_sense(se_cmd->sense_buffer, + se_cmd->scsi_sense_length, &sshdr)) + if (sshdr.sense_key == HARDWARE_ERROR && + (se_cmd->residual_count == 0 || + se_cmd->residual_count == se_cmd->data_length)) { + rc = NO_SENSE; + cmd->flags |= CMD_FAST_FAIL; + } + + return rc; +} + +/** + * srp_build_response() - Build an SRP response buffer + * @vscsi: Pointer to our adapter structure + * @cmd: Pointer to command for which to send the response + * @len_p: Where to return the length of the IU response sent. This + * is needed to construct the CRQ response. + * + * Build the SRP response buffer and copy it to the client's memory space. + */ +static long srp_build_response(struct scsi_info *vscsi, + struct ibmvscsis_cmd *cmd, uint *len_p) +{ + struct iu_entry *iue = cmd->iue; + struct se_cmd *se_cmd = &cmd->se_cmd; + struct srp_rsp *rsp; + uint len; + u32 rsp_code; + char *data; + u32 *tsk_status; + long rc = ADAPT_SUCCESS; + + spin_lock_bh(&vscsi->intr_lock); + + rsp = &vio_iu(iue)->srp.rsp; + len = sizeof(*rsp); + memset(rsp, 0, len); + data = rsp->data; + + rsp->opcode = SRP_RSP; + + if (vscsi->credit > 0 && vscsi->state == SRP_PROCESSING) + rsp->req_lim_delta = cpu_to_be32(vscsi->credit); + else + rsp->req_lim_delta = cpu_to_be32(1 + vscsi->credit); + rsp->tag = cmd->rsp.tag; + rsp->flags = 0; + + if (cmd->type == SCSI_CDB) { + rsp->status = ibmvscsis_fast_fail(vscsi, cmd); + if (rsp->status) { + pr_debug("build_resp: cmd %p, scsi status %d\n", cmd, + (int)rsp->status); + ibmvscsis_determine_resid(se_cmd, rsp); + if (se_cmd->scsi_sense_length && se_cmd->sense_buffer) { + rsp->sense_data_len = + cpu_to_be32(se_cmd->scsi_sense_length); + rsp->flags |= SRP_RSP_FLAG_SNSVALID; + len += se_cmd->scsi_sense_length; + memcpy(data, se_cmd->sense_buffer, + se_cmd->scsi_sense_length); + } + rsp->sol_not = (cmd->rsp.sol_not & UCSOLNT) >> + UCSOLNT_RESP_SHIFT; + } else if (cmd->flags & CMD_FAST_FAIL) { + pr_debug("build_resp: cmd %p, fast fail\n", cmd); + rsp->sol_not = (cmd->rsp.sol_not & UCSOLNT) >> + UCSOLNT_RESP_SHIFT; + } else { + rsp->sol_not = (cmd->rsp.sol_not & SCSOLNT) >> + SCSOLNT_RESP_SHIFT; + } + } else { + /* this is task management */ + rsp->status = 0; + rsp->resp_data_len = cpu_to_be32(4); + rsp->flags |= SRP_RSP_FLAG_RSPVALID; + + switch (se_cmd->se_tmr_req->response) { + case TMR_FUNCTION_COMPLETE: + case TMR_TASK_DOES_NOT_EXIST: + rsp_code = SRP_TASK_MANAGEMENT_FUNCTION_COMPLETE; + rsp->sol_not = (cmd->rsp.sol_not & SCSOLNT) >> + SCSOLNT_RESP_SHIFT; + break; + case TMR_TASK_MGMT_FUNCTION_NOT_SUPPORTED: + case TMR_LUN_DOES_NOT_EXIST: + rsp_code = SRP_TASK_MANAGEMENT_FUNCTION_NOT_SUPPORTED; + rsp->sol_not = (cmd->rsp.sol_not & UCSOLNT) >> + UCSOLNT_RESP_SHIFT; + break; + case TMR_FUNCTION_FAILED: + case TMR_FUNCTION_REJECTED: + default: + rsp_code = SRP_TASK_MANAGEMENT_FUNCTION_FAILED; + rsp->sol_not = (cmd->rsp.sol_not & UCSOLNT) >> + UCSOLNT_RESP_SHIFT; + break; + } + + tsk_status = (u32 *)data; + *tsk_status = cpu_to_be32(rsp_code); + data = (char *)(tsk_status + 1); + len += 4; + } + + dma_wmb(); + rc = h_copy_rdma(len, vscsi->dds.window[LOCAL].liobn, iue->sbuf->dma, + vscsi->dds.window[REMOTE].liobn, + be64_to_cpu(iue->remote_token)); + + switch (rc) { + case H_SUCCESS: + vscsi->credit = 0; + *len_p = len; + break; + case H_PERMISSION: + if (connection_broken(vscsi)) + vscsi->flags |= RESPONSE_Q_DOWN | CLIENT_FAILED; + + dev_err(&vscsi->dev, "build_response: error copying to client, rc %ld, flags 0x%x, state 0x%hx\n", + rc, vscsi->flags, vscsi->state); + break; + case H_SOURCE_PARM: + case H_DEST_PARM: + default: + dev_err(&vscsi->dev, "build_response: error copying to client, rc %ld\n", + rc); + break; + } + + spin_unlock_bh(&vscsi->intr_lock); + + return rc; +} + +static int ibmvscsis_rdma(struct ibmvscsis_cmd *cmd, struct scatterlist *sg, + int nsg, struct srp_direct_buf *md, int nmd, + enum dma_data_direction dir, unsigned int bytes) +{ + struct iu_entry *iue = cmd->iue; + struct srp_target *target = iue->target; + struct scsi_info *vscsi = target->ldata; + struct scatterlist *sgp; + dma_addr_t client_ioba, server_ioba; + ulong buf_len; + ulong client_len, server_len; + int md_idx; + long tx_len; + long rc = 0; + + pr_debug("rdma: dir %d, bytes 0x%x\n", dir, bytes); + + if (bytes == 0) + return 0; + + sgp = sg; + client_len = 0; + server_len = 0; + md_idx = 0; + tx_len = bytes; + + do { + if (client_len == 0) { + if (md_idx >= nmd) { + dev_err(&vscsi->dev, "rdma: ran out of client memory descriptors\n"); + rc = -EIO; + break; + } + client_ioba = be64_to_cpu(md[md_idx].va); + client_len = be32_to_cpu(md[md_idx].len); + } + if (server_len == 0) { + if (!sgp) { + dev_err(&vscsi->dev, "rdma: ran out of scatter/gather list\n"); + rc = -EIO; + break; + } + server_ioba = sg_dma_address(sgp); + server_len = sg_dma_len(sgp); + } + + buf_len = tx_len; + + if (buf_len > client_len) + buf_len = client_len; + + if (buf_len > server_len) + buf_len = server_len; + + if (buf_len > max_vdma_size) + buf_len = max_vdma_size; + + if (dir == DMA_TO_DEVICE) { + /* read from client */ + rc = h_copy_rdma(buf_len, + vscsi->dds.window[REMOTE].liobn, + client_ioba, + vscsi->dds.window[LOCAL].liobn, + server_ioba); + } else { + /* write to client */ + struct srp_cmd *srp = (struct srp_cmd *)iue->sbuf->buf; + + if (!READ_CMD(srp->cdb)) + print_hex_dump_bytes(" data:", DUMP_PREFIX_NONE, + sg_virt(sgp), buf_len); + /* The h_copy_rdma will cause phyp, running in another + * partition, to read memory, so we need to make sure + * the data has been written out, hence these syncs. + */ + /* ensure that everything is in memory */ + isync(); + /* ensure that memory has been made visible */ + dma_wmb(); + rc = h_copy_rdma(buf_len, + vscsi->dds.window[LOCAL].liobn, + server_ioba, + vscsi->dds.window[REMOTE].liobn, + client_ioba); + } + switch (rc) { + case H_SUCCESS: + break; + case H_PERMISSION: + case H_SOURCE_PARM: + case H_DEST_PARM: + if (connection_broken(vscsi)) { + spin_lock_bh(&vscsi->intr_lock); + vscsi->flags |= + (RESPONSE_Q_DOWN | CLIENT_FAILED); + spin_unlock_bh(&vscsi->intr_lock); + } + dev_err(&vscsi->dev, "rdma: h_copy_rdma failed, rc %ld\n", + rc); + break; + + default: + dev_err(&vscsi->dev, "rdma: unknown error %ld from h_copy_rdma\n", + rc); + break; + } + + if (!rc) { + tx_len -= buf_len; + if (tx_len) { + client_len -= buf_len; + if (client_len == 0) + md_idx++; + else + client_ioba += buf_len; + + server_len -= buf_len; + if (server_len == 0) + sgp = sg_next(sgp); + else + server_ioba += buf_len; + } else { + break; + } + } + } while (!rc); + + return rc; +} + +/** + * ibmvscsis_handle_crq() - Handle CRQ + * @data: Pointer to our adapter structure + * + * Read the command elements from the command queue and copy the payloads + * associated with the command elements to local memory and execute the + * SRP requests. + * + * Note: this is an edge triggered interrupt. It can not be shared. + */ +static void ibmvscsis_handle_crq(unsigned long data) +{ + struct scsi_info *vscsi = (struct scsi_info *)data; + struct viosrp_crq *crq; + long rc; + bool ack = true; + volatile u8 valid; + + spin_lock_bh(&vscsi->intr_lock); + + pr_debug("got interrupt\n"); + + /* + * if we are in a path where we are waiting for all pending commands + * to complete because we received a transport event and anything in + * the command queue is for a new connection, do nothing + */ + if (TARGET_STOP(vscsi)) { + vio_enable_interrupts(vscsi->dma_dev); + + pr_debug("handle_crq, don't process: flags 0x%x, state 0x%hx\n", + vscsi->flags, vscsi->state); + spin_unlock_bh(&vscsi->intr_lock); + return; + } + + rc = vscsi->flags & SCHEDULE_DISCONNECT; + crq = vscsi->cmd_q.base_addr + vscsi->cmd_q.index; + valid = crq->valid; + dma_rmb(); + + while (valid) { + /* + * These are edege triggered interrupts. After dropping out of + * the while loop, the code must check for work since an + * interrupt could be lost, and an elment be left on the queue, + * hence the label. + */ +cmd_work: + vscsi->cmd_q.index = + (vscsi->cmd_q.index + 1) & vscsi->cmd_q.mask; + + if (!rc) { + rc = ibmvscsis_parse_command(vscsi, crq); + } else { + if ((uint)crq->valid == VALID_TRANS_EVENT) { + /* + * must service the transport layer events even + * in an error state, dont break out until all + * the consecutive transport events have been + * processed + */ + rc = ibmvscsis_trans_event(vscsi, crq); + } else if (vscsi->flags & TRANS_EVENT) { + /* + * if a tranport event has occurred leave + * everything but transport events on the queue + */ + pr_debug("handle_crq, ignoring\n"); + + /* + * need to decrement the queue index so we can + * look at the elment again + */ + if (vscsi->cmd_q.index) + vscsi->cmd_q.index -= 1; + else + /* + * index is at 0 it just wrapped. + * have it index last element in q + */ + vscsi->cmd_q.index = vscsi->cmd_q.mask; + break; + } + } + + crq->valid = INVALIDATE_CMD_RESP_EL; + + crq = vscsi->cmd_q.base_addr + vscsi->cmd_q.index; + valid = crq->valid; + dma_rmb(); + } + + if (!rc) { + if (ack) { + vio_enable_interrupts(vscsi->dma_dev); + ack = false; + pr_debug("handle_crq, reenabling interrupts\n"); + } + valid = crq->valid; + dma_rmb(); + if (valid) + goto cmd_work; + } else { + pr_debug("handle_crq, error: flags 0x%x, state 0x%hx, crq index 0x%x\n", + vscsi->flags, vscsi->state, vscsi->cmd_q.index); + } + + pr_debug("Leaving handle_crq: schedule_q empty %d, flags 0x%x, state 0x%hx\n", + (int)list_empty(&vscsi->schedule_q), vscsi->flags, + vscsi->state); + + spin_unlock_bh(&vscsi->intr_lock); +} + +static int ibmvscsis_probe(struct vio_dev *vdev, + const struct vio_device_id *id) +{ + struct scsi_info *vscsi; + int rc = 0; + long hrc = 0; + char wq_name[24]; + + vscsi = kzalloc(sizeof(*vscsi), GFP_KERNEL); + if (!vscsi) { + rc = -ENOMEM; + pr_err("probe: allocation of adapter failed\n"); + return rc; + } + + vscsi->dma_dev = vdev; + vscsi->dev = vdev->dev; + INIT_LIST_HEAD(&vscsi->schedule_q); + INIT_LIST_HEAD(&vscsi->waiting_rsp); + INIT_LIST_HEAD(&vscsi->active_q); + + snprintf(vscsi->tport.tport_name, 256, "%s", dev_name(&vdev->dev)); + + pr_debug("probe tport_name: %s\n", vscsi->tport.tport_name); + + rc = read_dma_window(vscsi); + if (rc) + goto free_adapter; + pr_debug("Probe: liobn 0x%x, riobn 0x%x\n", + vscsi->dds.window[LOCAL].liobn, + vscsi->dds.window[REMOTE].liobn); + + strcpy(vscsi->eye, "VSCSI "); + strncat(vscsi->eye, vdev->name, MAX_EYE); + + vscsi->dds.unit_id = vdev->unit_address; + + spin_lock_bh(&ibmvscsis_dev_lock); + list_add_tail(&vscsi->list, &ibmvscsis_dev_list); + spin_unlock_bh(&ibmvscsis_dev_lock); + + /* + * TBD: How do we determine # of cmds to request? Do we know how + * many "children" we have? + */ + vscsi->request_limit = INITIAL_SRP_LIMIT; + rc = srp_target_alloc(&vscsi->target, &vdev->dev, vscsi->request_limit, + SRP_MAX_IU_LEN); + if (rc) + goto rem_list; + + vscsi->target.ldata = vscsi; + + rc = ibmvscsis_alloc_cmds(vscsi, vscsi->request_limit); + if (rc) { + dev_err(&vscsi->dev, "alloc_cmds failed, rc %d, num %d\n", + rc, vscsi->request_limit); + goto free_target; + } + + /* + * Note: the lock is used in freeing timers, so must initialize + * first so that ordering in case of error is correct. + */ + spin_lock_init(&vscsi->intr_lock); + + rc = ibmvscsis_alloctimer(vscsi); + if (rc) { + dev_err(&vscsi->dev, "probe: alloctimer failed, rc %d\n", rc); + goto free_cmds; + } + + rc = ibmvscsis_create_command_q(vscsi, 256); + if (rc) { + dev_err(&vscsi->dev, "probe: create_command_q failed, rc %d\n", + rc); + goto free_timer; + } + + vscsi->map_buf = kzalloc(PAGE_SIZE, GFP_KERNEL); + if (!vscsi->map_buf) { + rc = -ENOMEM; + dev_err(&vscsi->dev, "probe: allocating cmd buffer failed\n"); + goto destroy_queue; + } + + vscsi->map_ioba = dma_map_single(&vdev->dev, vscsi->map_buf, PAGE_SIZE, + DMA_BIDIRECTIONAL); + if (dma_mapping_error(&vdev->dev, vscsi->map_ioba)) { + dev_err(&vscsi->dev, "probe: error mapping command buffer\n"); + goto free_buf; + } + + hrc = h_vioctl(vscsi->dds.unit_id, H_GET_PARTNER_INFO, + (u64)vscsi->map_ioba | ((u64)PAGE_SIZE << 32), 0, 0, 0, + 0); + if (hrc == H_SUCCESS) + vscsi->client_data.partition_number = + be64_to_cpu(*(u64 *)vscsi->map_buf); + /* + * We expect the VIOCTL to fail if we're configured as "any + * client can connect" and the client isn't activated yet. + * We'll make the call again when he sends an init msg. + */ + pr_debug("probe hrc %ld, client partition num %d\n", + hrc, vscsi->client_data.partition_number); + + tasklet_init(&vscsi->work_task, ibmvscsis_handle_crq, + (unsigned long)vscsi); + + init_completion(&vscsi->wait_idle); + + snprintf(wq_name, 24, "ibmvscsis%s", dev_name(&vdev->dev)); + vscsi->work_q = create_workqueue(wq_name); + if (!vscsi->work_q) { + rc = -ENOMEM; + dev_err(&vscsi->dev, "create_workqueue failed\n"); + goto unmap_buf; + } + + rc = request_irq(vdev->irq, ibmvscsis_interrupt, 0, "ibmvscsis", vscsi); + if (rc) { + rc = -EPERM; + dev_err(&vscsi->dev, "probe: request_irq failed, rc %d\n", rc); + goto destroy_WQ; + } + + spin_lock_bh(&vscsi->intr_lock); + vio_enable_interrupts(vdev); + if (rc) { + dev_err(&vscsi->dev, "enabling interrupts failed, rc %d\n", rc); + rc = -ENODEV; + spin_unlock_bh(&vscsi->intr_lock); + goto free_irq; + } + + if (ibmvscsis_check_q(vscsi)) { + rc = ERROR; + dev_err(&vscsi->dev, "probe: check_q failed, rc %d\n", rc); + spin_unlock_bh(&vscsi->intr_lock); + goto disable_interrupt; + } + spin_unlock_bh(&vscsi->intr_lock); + + dev_set_drvdata(&vdev->dev, vscsi); + + return 0; + +disable_interrupt: + vio_disable_interrupts(vdev); +free_irq: + free_irq(vdev->irq, vscsi); +destroy_WQ: + destroy_workqueue(vscsi->work_q); +unmap_buf: + dma_unmap_single(&vdev->dev, vscsi->map_ioba, PAGE_SIZE, + DMA_BIDIRECTIONAL); +free_buf: + kfree(vscsi->map_buf); +destroy_queue: + tasklet_kill(&vscsi->work_task); + ibmvscsis_unregister_command_q(vscsi); + ibmvscsis_destroy_command_q(vscsi); +free_timer: + ibmvscsis_freetimer(vscsi); +free_cmds: + ibmvscsis_free_cmds(vscsi); +free_target: + srp_target_free(&vscsi->target); +rem_list: + spin_lock_bh(&ibmvscsis_dev_lock); + list_del(&vscsi->list); + spin_unlock_bh(&ibmvscsis_dev_lock); +free_adapter: + kfree(vscsi); + + return rc; +} + +static int ibmvscsis_remove(struct vio_dev *vdev) +{ + struct scsi_info *vscsi = dev_get_drvdata(&vdev->dev); + + pr_debug("remove (%s)\n", dev_name(&vscsi->dma_dev->dev)); + + /* + * TBD: Need to handle if there are commands on the waiting_rsp q + * Actually, can there still be cmds outstanding to tcm? + */ + + vio_disable_interrupts(vdev); + free_irq(vdev->irq, vscsi); + destroy_workqueue(vscsi->work_q); + dma_unmap_single(&vdev->dev, vscsi->map_ioba, PAGE_SIZE, + DMA_BIDIRECTIONAL); + kfree(vscsi->map_buf); + tasklet_kill(&vscsi->work_task); + ibmvscsis_unregister_command_q(vscsi); + ibmvscsis_destroy_command_q(vscsi); + ibmvscsis_freetimer(vscsi); + ibmvscsis_free_cmds(vscsi); + srp_target_free(&vscsi->target); + spin_lock_bh(&ibmvscsis_dev_lock); + list_del(&vscsi->list); + spin_unlock_bh(&ibmvscsis_dev_lock); + kfree(vscsi); + + return 0; +} + +static ssize_t system_id_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return snprintf(buf, PAGE_SIZE, "%s\n", system_id); +} + +static ssize_t partition_number_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return snprintf(buf, PAGE_SIZE, "%x\n", partition_number); +} + +static ssize_t unit_address_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct scsi_info *vscsi = container_of(dev, struct scsi_info, dev); + + return snprintf(buf, PAGE_SIZE, "%x\n", vscsi->dma_dev->unit_address); +} + +static int ibmvscsis_get_system_info(void) +{ + struct device_node *rootdn, *vdevdn; + const char *id, *model, *name; + const uint *num; + + rootdn = of_find_node_by_path("/"); + if (!rootdn) + return -ENOENT; + + model = of_get_property(rootdn, "model", NULL); + id = of_get_property(rootdn, "system-id", NULL); + if (model && id) + snprintf(system_id, sizeof(system_id), "%s-%s", model, id); + + name = of_get_property(rootdn, "ibm,partition-name", NULL); + if (name) + strncpy(partition_name, name, sizeof(partition_name)); + + num = of_get_property(rootdn, "ibm,partition-no", NULL); + if (num) + partition_number = *num; + + of_node_put(rootdn); + + vdevdn = of_find_node_by_path("/vdevice"); + if (vdevdn) { + const uint *mvds; + + mvds = of_get_property(vdevdn, "ibm,max-virtual-dma-size", + NULL); + if (mvds) + max_vdma_size = *mvds; + of_node_put(vdevdn); + } + + return 0; +} + +static char *ibmvscsis_get_fabric_name(void) +{ + return "ibmvscsis"; +} + +static char *ibmvscsis_get_fabric_wwn(struct se_portal_group *se_tpg) +{ + struct ibmvscsis_tport *tport = + container_of(se_tpg, struct ibmvscsis_tport, se_tpg); + + return tport->tport_name; +} + +static u16 ibmvscsis_get_tag(struct se_portal_group *se_tpg) +{ + struct ibmvscsis_tport *tport = + container_of(se_tpg, struct ibmvscsis_tport, se_tpg); + + return tport->tport_tpgt; +} + +static u32 ibmvscsis_get_default_depth(struct se_portal_group *se_tpg) +{ + return 1; +} + +static int ibmvscsis_check_true(struct se_portal_group *se_tpg) +{ + return 1; +} + +static int ibmvscsis_check_false(struct se_portal_group *se_tpg) +{ + return 0; +} + +static u32 ibmvscsis_tpg_get_inst_index(struct se_portal_group *se_tpg) +{ + return 1; +} + +static int ibmvscsis_check_stop_free(struct se_cmd *se_cmd) +{ + return target_put_sess_cmd(se_cmd); +} + +static void ibmvscsis_release_cmd(struct se_cmd *se_cmd) +{ + struct ibmvscsis_cmd *cmd = container_of(se_cmd, struct ibmvscsis_cmd, + se_cmd); + struct scsi_info *vscsi = cmd->adapter; + + pr_debug("release_cmd %p, flags %d\n", se_cmd, cmd->flags); + + spin_lock_bh(&vscsi->intr_lock); + /* Remove from active_q */ + list_del(&cmd->list); + list_add_tail(&cmd->list, &vscsi->waiting_rsp); + ibmvscsis_send_messages(vscsi); + spin_unlock_bh(&vscsi->intr_lock); +} + +static u32 ibmvscsis_sess_get_index(struct se_session *se_sess) +{ + return 0; +} + +static int ibmvscsis_write_pending(struct se_cmd *se_cmd) +{ + struct ibmvscsis_cmd *cmd = container_of(se_cmd, struct ibmvscsis_cmd, + se_cmd); + struct iu_entry *iue = cmd->iue; + int rc; + + pr_debug("write_pending, se_cmd %p, length 0x%x\n", + se_cmd, se_cmd->data_length); + + rc = srp_transfer_data(cmd, &vio_iu(iue)->srp.cmd, ibmvscsis_rdma, + 1, 1); + if (rc) { + pr_err("srp_transfer_data() failed: %d\n", rc); + return -EAGAIN; + } + /* + * We now tell TCM to add this WRITE CDB directly into the TCM storage + * object execution queue. + */ + target_execute_cmd(se_cmd); + return 0; +} + +static int ibmvscsis_write_pending_status(struct se_cmd *se_cmd) +{ + return 0; +} + +static void ibmvscsis_set_default_node_attrs(struct se_node_acl *nacl) +{ +} + +static int ibmvscsis_get_cmd_state(struct se_cmd *se_cmd) +{ + return 0; +} + +static int ibmvscsis_queue_data_in(struct se_cmd *se_cmd) +{ + struct ibmvscsis_cmd *cmd = container_of(se_cmd, struct ibmvscsis_cmd, + se_cmd); + struct iu_entry *iue = cmd->iue; + struct scsi_info *vscsi = cmd->adapter; + char *sd; + uint len = 0; + int rc; + + pr_debug("queue_data_in, se_cmd %p, length 0x%x\n", + se_cmd, se_cmd->data_length); + + rc = srp_transfer_data(cmd, &vio_iu(iue)->srp.cmd, ibmvscsis_rdma, 1, + 1); + if (rc) { + pr_err("srp_transfer_data failed: %d\n", rc); + sd = se_cmd->sense_buffer; + se_cmd->scsi_sense_length = 18; + memset(se_cmd->sense_buffer, 0, se_cmd->scsi_sense_length); + /* Logical Unit Communication Time-out asc/ascq = 0x0801 */ + scsi_build_sense_buffer(0, se_cmd->sense_buffer, MEDIUM_ERROR, + 0x08, 0x01); + } + + srp_build_response(vscsi, cmd, &len); + cmd->rsp.format = SRP_FORMAT; + cmd->rsp.len = len; + + return 0; +} + +static int ibmvscsis_queue_status(struct se_cmd *se_cmd) +{ + struct ibmvscsis_cmd *cmd = container_of(se_cmd, struct ibmvscsis_cmd, + se_cmd); + struct scsi_info *vscsi = cmd->adapter; + uint len; + + pr_debug("queue_status %p\n", se_cmd); + + srp_build_response(vscsi, cmd, &len); + cmd->rsp.format = SRP_FORMAT; + cmd->rsp.len = len; + + return 0; +} + +static void ibmvscsis_queue_tm_rsp(struct se_cmd *se_cmd) +{ + struct ibmvscsis_cmd *cmd = container_of(se_cmd, struct ibmvscsis_cmd, + se_cmd); + struct scsi_info *vscsi = cmd->adapter; + uint len; + + pr_debug("queue_tm_rsp %p, status %d\n", + se_cmd, (int)se_cmd->se_tmr_req->response); + + srp_build_response(vscsi, cmd, &len); + cmd->rsp.format = SRP_FORMAT; + cmd->rsp.len = len; +} + +static void ibmvscsis_aborted_task(struct se_cmd *se_cmd) +{ + /* TBD: What (if anything) should we do here? */ + pr_debug("ibmvscsis_aborted_task %p\n", se_cmd); +} + +static struct se_wwn *ibmvscsis_make_tport(struct target_fabric_configfs *tf, + struct config_group *group, + const char *name) +{ + struct ibmvscsis_tport *tport; + + tport = ibmvscsis_lookup_port(name); + if (tport) { + tport->tport_proto_id = SCSI_PROTOCOL_SRP; + pr_debug("make_tport(%s), pointer:%p, tport_id:%x\n", + name, tport, tport->tport_proto_id); + return &tport->tport_wwn; + } + + return ERR_PTR(-EINVAL); +} + +static void ibmvscsis_drop_tport(struct se_wwn *wwn) +{ + struct ibmvscsis_tport *tport = container_of(wwn, + struct ibmvscsis_tport, + tport_wwn); + + pr_debug("drop_tport(%s)\n", + config_item_name(&tport->tport_wwn.wwn_group.cg_item)); +} + +static struct se_portal_group *ibmvscsis_make_tpg(struct se_wwn *wwn, + struct config_group *group, + const char *name) +{ + struct ibmvscsis_tport *tport = + container_of(wwn, struct ibmvscsis_tport, tport_wwn); + int rc; + + tport->releasing = false; + + rc = core_tpg_register(&tport->tport_wwn, &tport->se_tpg, + tport->tport_proto_id); + if (rc) + return ERR_PTR(rc); + + return &tport->se_tpg; +} + +static void ibmvscsis_drop_tpg(struct se_portal_group *se_tpg) +{ + struct ibmvscsis_tport *tport = container_of(se_tpg, + struct ibmvscsis_tport, + se_tpg); + + tport->releasing = true; + tport->enabled = false; + + /* + * Release the virtual I_T Nexus for this ibmvscsis TPG + */ + ibmvscsis_drop_nexus(tport); + /* + * Deregister the se_tpg from TCM.. + */ + core_tpg_deregister(se_tpg); +} + +static ssize_t ibmvscsis_wwn_version_show(struct config_item *item, + char *page) +{ + return scnprintf(page, PAGE_SIZE, "%s\n", IBMVSCSIS_VERSION); +} +CONFIGFS_ATTR_RO(ibmvscsis_wwn_, version); + +static struct configfs_attribute *ibmvscsis_wwn_attrs[] = { + &ibmvscsis_wwn_attr_version, + NULL, +}; + +static ssize_t ibmvscsis_tpg_enable_show(struct config_item *item, + char *page) +{ + struct se_portal_group *se_tpg = to_tpg(item); + struct ibmvscsis_tport *tport = container_of(se_tpg, + struct ibmvscsis_tport, + se_tpg); + + return snprintf(page, PAGE_SIZE, "%d\n", (tport->enabled) ? 1 : 0); +} + +static ssize_t ibmvscsis_tpg_enable_store(struct config_item *item, + const char *page, size_t count) +{ + struct se_portal_group *se_tpg = to_tpg(item); + struct ibmvscsis_tport *tport = container_of(se_tpg, + struct ibmvscsis_tport, + se_tpg); + struct scsi_info *vscsi = container_of(tport, struct scsi_info, tport); + unsigned long tmp; + int rc; + long lrc; + + rc = kstrtoul(page, 0, &tmp); + if (rc < 0) { + pr_err("Unable to extract srpt_tpg_store_enable\n"); + return -EINVAL; + } + + if ((tmp != 0) && (tmp != 1)) { + pr_err("Illegal value for srpt_tpg_store_enable\n"); + return -EINVAL; + } + + if (tmp) { + tport->enabled = true; + spin_lock_bh(&vscsi->intr_lock); + lrc = ibmvscsis_enable_change_state(vscsi); + if (lrc) + pr_err("enable_change_state failed, rc %ld state %d\n", + lrc, vscsi->state); + spin_unlock_bh(&vscsi->intr_lock); + } else { + tport->enabled = false; + } + + pr_debug("tpg_enable_store, state %d\n", vscsi->state); + + return count; +} +CONFIGFS_ATTR(ibmvscsis_tpg_, enable); + +static struct configfs_attribute *ibmvscsis_tpg_attrs[] = { + &ibmvscsis_tpg_attr_enable, + NULL, +}; + +static const struct target_core_fabric_ops ibmvscsis_ops = { + .module = THIS_MODULE, + .name = "ibmvscsis", + .get_fabric_name = ibmvscsis_get_fabric_name, + .tpg_get_wwn = ibmvscsis_get_fabric_wwn, + .tpg_get_tag = ibmvscsis_get_tag, + .tpg_get_default_depth = ibmvscsis_get_default_depth, + .tpg_check_demo_mode = ibmvscsis_check_true, + .tpg_check_demo_mode_cache = ibmvscsis_check_true, + .tpg_check_demo_mode_write_protect = ibmvscsis_check_false, + .tpg_check_prod_mode_write_protect = ibmvscsis_check_false, + .tpg_get_inst_index = ibmvscsis_tpg_get_inst_index, + .check_stop_free = ibmvscsis_check_stop_free, + .release_cmd = ibmvscsis_release_cmd, + .sess_get_index = ibmvscsis_sess_get_index, + .write_pending = ibmvscsis_write_pending, + .write_pending_status = ibmvscsis_write_pending_status, + .set_default_node_attributes = ibmvscsis_set_default_node_attrs, + .get_cmd_state = ibmvscsis_get_cmd_state, + .queue_data_in = ibmvscsis_queue_data_in, + .queue_status = ibmvscsis_queue_status, + .queue_tm_rsp = ibmvscsis_queue_tm_rsp, + .aborted_task = ibmvscsis_aborted_task, + /* + * Setup function pointers for logic in target_core_fabric_configfs.c + */ + .fabric_make_wwn = ibmvscsis_make_tport, + .fabric_drop_wwn = ibmvscsis_drop_tport, + .fabric_make_tpg = ibmvscsis_make_tpg, + .fabric_drop_tpg = ibmvscsis_drop_tpg, + + .tfc_wwn_attrs = ibmvscsis_wwn_attrs, + .tfc_tpg_base_attrs = ibmvscsis_tpg_attrs, +}; + +static void ibmvscsis_dev_release(struct device *dev) {}; + +static struct class_attribute ibmvscsis_class_attrs[] = { + __ATTR_NULL, +}; + +static struct device_attribute dev_attr_system_id = + __ATTR(system_id, S_IRUGO, system_id_show, NULL); + +static struct device_attribute dev_attr_partition_number = + __ATTR(partition_number, S_IRUGO, partition_number_show, NULL); + +static struct device_attribute dev_attr_unit_address = + __ATTR(unit_address, S_IRUGO, unit_address_show, NULL); + +static struct attribute *ibmvscsis_dev_attrs[] = { + &dev_attr_system_id.attr, + &dev_attr_partition_number.attr, + &dev_attr_unit_address.attr, +}; +ATTRIBUTE_GROUPS(ibmvscsis_dev); + +static struct class ibmvscsis_class = { + .name = "ibmvscsis", + .dev_release = ibmvscsis_dev_release, + .class_attrs = ibmvscsis_class_attrs, + .dev_groups = ibmvscsis_dev_groups, +}; + +static struct vio_device_id ibmvscsis_device_table[] = { + { "v-scsi-host", "IBM,v-scsi-host" }, + { "", "" } +}; +MODULE_DEVICE_TABLE(vio, ibmvscsis_device_table); + +static struct vio_driver ibmvscsis_driver = { + .name = "ibmvscsis", + .id_table = ibmvscsis_device_table, + .probe = ibmvscsis_probe, + .remove = ibmvscsis_remove, +}; + +/* + * ibmvscsis_init() - Kernel Module initialization + * + * Note: vio_register_driver() registers callback functions, and at least one + * of those callback functions calls TCM - Linux IO Target Subsystem, thus + * the SCSI Target template must be registered before vio_register_driver() + * is called. + */ +static int __init ibmvscsis_init(void) +{ + int rc = 0; + + rc = ibmvscsis_get_system_info(); + if (rc) { + pr_err("rc %d from get_system_info\n", rc); + goto out; + } + + rc = class_register(&ibmvscsis_class); + if (rc) { + pr_err("failed class register\n"); + goto out; + } + + rc = target_register_template(&ibmvscsis_ops); + if (rc) { + pr_err("rc %d from target_register_template\n", rc); + goto unregister_class; + } + + rc = vio_register_driver(&ibmvscsis_driver); + if (rc) { + pr_err("rc %d from vio_register_driver\n", rc); + goto unregister_target; + } + + return 0; + +unregister_target: + target_unregister_template(&ibmvscsis_ops); +unregister_class: + class_unregister(&ibmvscsis_class); +out: + return rc; +} + +static void __exit ibmvscsis_exit(void) +{ + pr_info("Unregister IBM virtual SCSI host driver\n"); + vio_unregister_driver(&ibmvscsis_driver); + target_unregister_template(&ibmvscsis_ops); + class_unregister(&ibmvscsis_class); +} + +MODULE_DESCRIPTION("IBMVSCSIS fabric driver"); +MODULE_AUTHOR("Bryant G. Ly and Michael Cyr"); +MODULE_LICENSE("GPL"); +MODULE_VERSION(IBMVSCSIS_VERSION); +module_init(ibmvscsis_init); +module_exit(ibmvscsis_exit); diff --git a/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.h b/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.h new file mode 100644 index 000000000000..981a0c992b6c --- /dev/null +++ b/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.h @@ -0,0 +1,346 @@ +/******************************************************************************* + * IBM Virtual SCSI Target Driver + * Copyright (C) 2003-2005 Dave Boutcher (boutcher@us.ibm.com) IBM Corp. + * Santiago Leon (santil@us.ibm.com) IBM Corp. + * Linda Xie (lxie@us.ibm.com) IBM Corp. + * + * Copyright (C) 2005-2011 FUJITA Tomonori <tomof@acm.org> + * Copyright (C) 2010 Nicholas A. Bellinger <nab@kernel.org> + * Copyright (C) 2016 Bryant G. Ly <bryantly@linux.vnet.ibm.com> IBM Corp. + * + * Authors: Bryant G. Ly <bryantly@linux.vnet.ibm.com> + * Authors: Michael Cyr <mikecyr@linux.vnet.ibm.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + ****************************************************************************/ + +#ifndef __H_IBMVSCSI_TGT +#define __H_IBMVSCSI_TGT + +#include "libsrp.h" + +#define SYS_ID_NAME_LEN 64 +#define PARTITION_NAMELEN 96 +#define IBMVSCSIS_NAMELEN 32 + +#define MSG_HI 0 +#define MSG_LOW 1 + +#define MAX_CMD_Q_PAGES 4 +#define CRQ_PER_PAGE (PAGE_SIZE / sizeof(struct viosrp_crq)) +/* in terms of number of elements */ +#define DEFAULT_CMD_Q_SIZE CRQ_PER_PAGE +#define MAX_CMD_Q_SIZE (DEFAULT_CMD_Q_SIZE * MAX_CMD_Q_PAGES) + +#define SRP_VIOLATION 0x102 /* general error code */ + +/* + * SRP buffer formats defined as of 16.a supported by this driver. + */ +#define SUPPORTED_FORMATS ((SRP_DATA_DESC_DIRECT << 1) | \ + (SRP_DATA_DESC_INDIRECT << 1)) + +#define SCSI_LUN_ADDR_METHOD_FLAT 1 + +struct dma_window { + u32 liobn; /* Unique per vdevice */ + u64 tce_base; /* Physical location of the TCE table */ + u64 tce_size; /* Size of the TCE table in bytes */ +}; + +struct target_dds { + u64 unit_id; /* 64 bit will force alignment */ +#define NUM_DMA_WINDOWS 2 +#define LOCAL 0 +#define REMOTE 1 + struct dma_window window[NUM_DMA_WINDOWS]; + + /* root node property "ibm,partition-no" */ + uint partition_num; + char partition_name[PARTITION_NAMELEN]; +}; + +#define MAX_NUM_PORTS 1 +#define MAX_H_COPY_RDMA (128 * 1024) + +#define MAX_EYE 64 + +/* Return codes */ +#define ADAPT_SUCCESS 0L +/* choose error codes that do not conflict with PHYP */ +#define ERROR -40L + +struct format_code { + u8 reserved; + u8 buffers; +}; + +struct client_info { +#define SRP_VERSION "16.a" + char srp_version[8]; + /* root node property ibm,partition-name */ + char partition_name[PARTITION_NAMELEN]; + /* root node property ibm,partition-no */ + u32 partition_number; + /* initially 1 */ + u32 mad_version; + u32 os_type; +}; + +/* + * Changing this constant changes the number of seconds to wait before + * considering the client will never service its queue again. + */ +#define SECONDS_TO_CONSIDER_FAILED 30 +/* + * These constants set the polling period used to determine if the client + * has freed at least one element in the response queue. + */ +#define WAIT_SECONDS 1 +#define WAIT_NANO_SECONDS 5000 +#define MAX_TIMER_POPS ((1000000 / WAIT_NANO_SECONDS) * \ + SECONDS_TO_CONSIDER_FAILED) +/* + * general purpose timer control block + * which can be used for multiple functions + */ +struct timer_cb { + struct hrtimer timer; + /* + * how long has it been since the client + * serviced the queue. The variable is incrmented + * in the service_wait_q routine and cleared + * in send messages + */ + int timer_pops; + /* the timer is started */ + bool started; +}; + +struct cmd_queue { + /* kva */ + struct viosrp_crq *base_addr; + dma_addr_t crq_token; + /* used to maintain index */ + uint mask; + /* current element */ + uint index; + int size; +}; + +#define SCSOLNT_RESP_SHIFT 1 +#define UCSOLNT_RESP_SHIFT 2 + +#define SCSOLNT BIT(SCSOLNT_RESP_SHIFT) +#define UCSOLNT BIT(UCSOLNT_RESP_SHIFT) + +enum cmd_type { + SCSI_CDB = 0x01, + TASK_MANAGEMENT = 0x02, + /* MAD or addressed to port 0 */ + ADAPTER_MAD = 0x04, + UNSET_TYPE = 0x08, +}; + +struct iu_rsp { + u8 format; + u8 sol_not; + u16 len; + /* tag is just to help client identify cmd, so don't translate be/le */ + u64 tag; +}; + +struct ibmvscsis_cmd { + struct list_head list; + /* Used for TCM Core operations */ + struct se_cmd se_cmd; + struct iu_entry *iue; + struct iu_rsp rsp; + struct work_struct work; + struct scsi_info *adapter; + /* Sense buffer that will be mapped into outgoing status */ + unsigned char sense_buf[TRANSPORT_SENSE_BUFFER]; + u64 init_time; +#define CMD_FAST_FAIL BIT(0) + u32 flags; + char type; +}; + +struct ibmvscsis_nexus { + struct se_session *se_sess; +}; + +struct ibmvscsis_tport { + /* SCSI protocol the tport is providing */ + u8 tport_proto_id; + /* ASCII formatted WWPN for SRP Target port */ + char tport_name[IBMVSCSIS_NAMELEN]; + /* Returned by ibmvscsis_make_tport() */ + struct se_wwn tport_wwn; + /* Returned by ibmvscsis_make_tpg() */ + struct se_portal_group se_tpg; + /* ibmvscsis port target portal group tag for TCM */ + u16 tport_tpgt; + /* Pointer to TCM session for I_T Nexus */ + struct ibmvscsis_nexus *ibmv_nexus; + bool enabled; + bool releasing; +}; + +struct scsi_info { + struct list_head list; + char eye[MAX_EYE]; + + /* commands waiting for space on repsonse queue */ + struct list_head waiting_rsp; +#define NO_QUEUE 0x00 +#define WAIT_ENABLED 0X01 + /* driver has received an initialize command */ +#define PART_UP_WAIT_ENAB 0x02 +#define WAIT_CONNECTION 0x04 + /* have established a connection */ +#define CONNECTED 0x08 + /* at least one port is processing SRP IU */ +#define SRP_PROCESSING 0x10 + /* remove request received */ +#define UNCONFIGURING 0x20 + /* disconnect by letting adapter go idle, no error */ +#define WAIT_IDLE 0x40 + /* disconnecting to clear an error */ +#define ERR_DISCONNECT 0x80 + /* disconnect to clear error state, then come back up */ +#define ERR_DISCONNECT_RECONNECT 0x100 + /* disconnected after clearing an error */ +#define ERR_DISCONNECTED 0x200 + /* A series of errors caused unexpected errors */ +#define UNDEFINED 0x400 + u16 state; + int fast_fail; + struct target_dds dds; + char *cmd_pool; + /* list of free commands */ + struct list_head free_cmd; + /* command elements ready for scheduler */ + struct list_head schedule_q; + /* commands sent to TCM */ + struct list_head active_q; + caddr_t *map_buf; + /* ioba of map buffer */ + dma_addr_t map_ioba; + /* allowable number of outstanding SRP requests */ + int request_limit; + /* extra credit */ + int credit; + /* outstanding transactions against credit limit */ + int debit; + + /* allow only one outstanding mad request */ +#define PROCESSING_MAD 0x00002 + /* Waiting to go idle */ +#define WAIT_FOR_IDLE 0x00004 + /* H_REG_CRQ called */ +#define CRQ_CLOSED 0x00010 + /* detected that client has failed */ +#define CLIENT_FAILED 0x00040 + /* detected that transport event occurred */ +#define TRANS_EVENT 0x00080 + /* don't attempt to send anything to the client */ +#define RESPONSE_Q_DOWN 0x00100 + /* request made to schedule disconnect handler */ +#define SCHEDULE_DISCONNECT 0x00400 + /* disconnect handler is scheduled */ +#define DISCONNECT_SCHEDULED 0x00800 + u32 flags; + /* adapter lock */ + spinlock_t intr_lock; + /* information needed to manage command queue */ + struct cmd_queue cmd_q; + /* used in hcall to copy response back into srp buffer */ + u64 empty_iu_id; + /* used in crq, to tag what iu the response is for */ + u64 empty_iu_tag; + uint new_state; + /* control block for the response queue timer */ + struct timer_cb rsp_q_timer; + /* keep last client to enable proper accounting */ + struct client_info client_data; + /* what can this client do */ + u32 client_cap; + /* + * The following two fields capture state and flag changes that + * can occur when the lock is given up. In the orginal design, + * the lock was held during calls into phyp; + * however, phyp did not meet PAPR architecture. This is + * a work around. + */ + u16 phyp_acr_state; + u32 phyp_acr_flags; + + struct workqueue_struct *work_q; + struct completion wait_idle; + struct device dev; + struct vio_dev *dma_dev; + struct srp_target target; + struct ibmvscsis_tport tport; + struct tasklet_struct work_task; + struct work_struct proc_work; +}; + +/* + * Provide a constant that allows software to detect the adapter is + * disconnecting from the client from one of several states. + */ +#define IS_DISCONNECTING (UNCONFIGURING | ERR_DISCONNECT_RECONNECT | \ + ERR_DISCONNECT) + +/* + * Provide a constant that can be used with interrupt handling that + * essentially lets the interrupt handler know that all requests should + * be thrown out, + */ +#define DONT_PROCESS_STATE (IS_DISCONNECTING | UNDEFINED | \ + ERR_DISCONNECTED | WAIT_IDLE) + +/* + * If any of these flag bits are set then do not allow the interrupt + * handler to schedule the off level handler. + */ +#define BLOCK (DISCONNECT_SCHEDULED) + +/* State and transition events that stop the interrupt handler */ +#define TARGET_STOP(VSCSI) (long)(((VSCSI)->state & DONT_PROCESS_STATE) | \ + ((VSCSI)->flags & BLOCK)) + +/* flag bit that are not reset during disconnect */ +#define PRESERVE_FLAG_FIELDS 0 + +#define vio_iu(IUE) ((union viosrp_iu *)((IUE)->sbuf->buf)) + +#define READ_CMD(cdb) (((cdb)[0] & 0x1F) == 8) +#define WRITE_CMD(cdb) (((cdb)[0] & 0x1F) == 0xA) + +#ifndef H_GET_PARTNER_INFO +#define H_GET_PARTNER_INFO 0x0000000000000008LL +#endif + +#define h_copy_rdma(l, sa, sb, da, db) \ + plpar_hcall_norets(H_COPY_RDMA, l, sa, sb, da, db) +#define h_vioctl(u, o, a, u1, u2, u3, u4) \ + plpar_hcall_norets(H_VIOCTL, u, o, a, u1, u2) +#define h_reg_crq(ua, tok, sz) \ + plpar_hcall_norets(H_REG_CRQ, ua, tok, sz) +#define h_free_crq(ua) \ + plpar_hcall_norets(H_FREE_CRQ, ua) +#define h_send_crq(ua, d1, d2) \ + plpar_hcall_norets(H_SEND_CRQ, ua, d1, d2) + +#endif diff --git a/drivers/scsi/ibmvscsi_tgt/libsrp.c b/drivers/scsi/ibmvscsi_tgt/libsrp.c new file mode 100644 index 000000000000..5a4cc28ca5ff --- /dev/null +++ b/drivers/scsi/ibmvscsi_tgt/libsrp.c @@ -0,0 +1,427 @@ +/******************************************************************************* + * SCSI RDMA Protocol lib functions + * + * Copyright (C) 2006 FUJITA Tomonori <tomof@acm.org> + * Copyright (C) 2016 Bryant G. Ly <bryantly@linux.vnet.ibm.com> IBM Corp. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + ***********************************************************************/ + +#define pr_fmt(fmt) "libsrp: " fmt + +#include <linux/printk.h> +#include <linux/err.h> +#include <linux/slab.h> +#include <linux/kfifo.h> +#include <linux/scatterlist.h> +#include <linux/dma-mapping.h> +#include <linux/module.h> +#include <scsi/srp.h> +#include <target/target_core_base.h> +#include "libsrp.h" +#include "ibmvscsi_tgt.h" + +static int srp_iu_pool_alloc(struct srp_queue *q, size_t max, + struct srp_buf **ring) +{ + struct iu_entry *iue; + int i; + + q->pool = kcalloc(max, sizeof(struct iu_entry *), GFP_KERNEL); + if (!q->pool) + return -ENOMEM; + q->items = kcalloc(max, sizeof(struct iu_entry), GFP_KERNEL); + if (!q->items) + goto free_pool; + + spin_lock_init(&q->lock); + kfifo_init(&q->queue, (void *)q->pool, max * sizeof(void *)); + + for (i = 0, iue = q->items; i < max; i++) { + kfifo_in(&q->queue, (void *)&iue, sizeof(void *)); + iue->sbuf = ring[i]; + iue++; + } + return 0; + +free_pool: + kfree(q->pool); + return -ENOMEM; +} + +static void srp_iu_pool_free(struct srp_queue *q) +{ + kfree(q->items); + kfree(q->pool); +} + +static struct srp_buf **srp_ring_alloc(struct device *dev, + size_t max, size_t size) +{ + struct srp_buf **ring; + int i; + + ring = kcalloc(max, sizeof(struct srp_buf *), GFP_KERNEL); + if (!ring) + return NULL; + + for (i = 0; i < max; i++) { + ring[i] = kzalloc(sizeof(*ring[i]), GFP_KERNEL); + if (!ring[i]) + goto out; + ring[i]->buf = dma_alloc_coherent(dev, size, &ring[i]->dma, + GFP_KERNEL); + if (!ring[i]->buf) + goto out; + } + return ring; + +out: + for (i = 0; i < max && ring[i]; i++) { + if (ring[i]->buf) { + dma_free_coherent(dev, size, ring[i]->buf, + ring[i]->dma); + } + kfree(ring[i]); + } + kfree(ring); + + return NULL; +} + +static void srp_ring_free(struct device *dev, struct srp_buf **ring, + size_t max, size_t size) +{ + int i; + + for (i = 0; i < max; i++) { + dma_free_coherent(dev, size, ring[i]->buf, ring[i]->dma); + kfree(ring[i]); + } + kfree(ring); +} + +int srp_target_alloc(struct srp_target *target, struct device *dev, + size_t nr, size_t iu_size) +{ + int err; + + spin_lock_init(&target->lock); + + target->dev = dev; + + target->srp_iu_size = iu_size; + target->rx_ring_size = nr; + target->rx_ring = srp_ring_alloc(target->dev, nr, iu_size); + if (!target->rx_ring) + return -ENOMEM; + err = srp_iu_pool_alloc(&target->iu_queue, nr, target->rx_ring); + if (err) + goto free_ring; + + dev_set_drvdata(target->dev, target); + return 0; + +free_ring: + srp_ring_free(target->dev, target->rx_ring, nr, iu_size); + return -ENOMEM; +} + +void srp_target_free(struct srp_target *target) +{ + dev_set_drvdata(target->dev, NULL); + srp_ring_free(target->dev, target->rx_ring, target->rx_ring_size, + target->srp_iu_size); + srp_iu_pool_free(&target->iu_queue); +} + +struct iu_entry *srp_iu_get(struct srp_target *target) +{ + struct iu_entry *iue = NULL; + + if (kfifo_out_locked(&target->iu_queue.queue, (void *)&iue, + sizeof(void *), + &target->iu_queue.lock) != sizeof(void *)) { + WARN_ONCE(1, "unexpected fifo state"); + return NULL; + } + if (!iue) + return iue; + iue->target = target; + iue->flags = 0; + return iue; +} + +void srp_iu_put(struct iu_entry *iue) +{ + kfifo_in_locked(&iue->target->iu_queue.queue, (void *)&iue, + sizeof(void *), &iue->target->iu_queue.lock); +} + +static int srp_direct_data(struct ibmvscsis_cmd *cmd, struct srp_direct_buf *md, + enum dma_data_direction dir, srp_rdma_t rdma_io, + int dma_map, int ext_desc) +{ + struct iu_entry *iue = NULL; + struct scatterlist *sg = NULL; + int err, nsg = 0, len; + + if (dma_map) { + iue = cmd->iue; + sg = cmd->se_cmd.t_data_sg; + nsg = dma_map_sg(iue->target->dev, sg, cmd->se_cmd.t_data_nents, + DMA_BIDIRECTIONAL); + if (!nsg) { + pr_err("fail to map %p %d\n", iue, + cmd->se_cmd.t_data_nents); + return 0; + } + len = min(cmd->se_cmd.data_length, be32_to_cpu(md->len)); + } else { + len = be32_to_cpu(md->len); + } + + err = rdma_io(cmd, sg, nsg, md, 1, dir, len); + + if (dma_map) + dma_unmap_sg(iue->target->dev, sg, nsg, DMA_BIDIRECTIONAL); + + return err; +} + +static int srp_indirect_data(struct ibmvscsis_cmd *cmd, struct srp_cmd *srp_cmd, + struct srp_indirect_buf *id, + enum dma_data_direction dir, srp_rdma_t rdma_io, + int dma_map, int ext_desc) +{ + struct iu_entry *iue = NULL; + struct srp_direct_buf *md = NULL; + struct scatterlist dummy, *sg = NULL; + dma_addr_t token = 0; + int err = 0; + int nmd, nsg = 0, len; + + if (dma_map || ext_desc) { + iue = cmd->iue; + sg = cmd->se_cmd.t_data_sg; + } + + nmd = be32_to_cpu(id->table_desc.len) / sizeof(struct srp_direct_buf); + + if ((dir == DMA_FROM_DEVICE && nmd == srp_cmd->data_in_desc_cnt) || + (dir == DMA_TO_DEVICE && nmd == srp_cmd->data_out_desc_cnt)) { + md = &id->desc_list[0]; + goto rdma; + } + + if (ext_desc && dma_map) { + md = dma_alloc_coherent(iue->target->dev, + be32_to_cpu(id->table_desc.len), + &token, GFP_KERNEL); + if (!md) { + pr_err("Can't get dma memory %u\n", + be32_to_cpu(id->table_desc.len)); + return -ENOMEM; + } + + sg_init_one(&dummy, md, be32_to_cpu(id->table_desc.len)); + sg_dma_address(&dummy) = token; + sg_dma_len(&dummy) = be32_to_cpu(id->table_desc.len); + err = rdma_io(cmd, &dummy, 1, &id->table_desc, 1, DMA_TO_DEVICE, + be32_to_cpu(id->table_desc.len)); + if (err) { + pr_err("Error copying indirect table %d\n", err); + goto free_mem; + } + } else { + pr_err("This command uses external indirect buffer\n"); + return -EINVAL; + } + +rdma: + if (dma_map) { + nsg = dma_map_sg(iue->target->dev, sg, cmd->se_cmd.t_data_nents, + DMA_BIDIRECTIONAL); + if (!nsg) { + pr_err("fail to map %p %d\n", iue, + cmd->se_cmd.t_data_nents); + err = -EIO; + goto free_mem; + } + len = min(cmd->se_cmd.data_length, be32_to_cpu(id->len)); + } else { + len = be32_to_cpu(id->len); + } + + err = rdma_io(cmd, sg, nsg, md, nmd, dir, len); + + if (dma_map) + dma_unmap_sg(iue->target->dev, sg, nsg, DMA_BIDIRECTIONAL); + +free_mem: + if (token && dma_map) { + dma_free_coherent(iue->target->dev, + be32_to_cpu(id->table_desc.len), md, token); + } + return err; +} + +static int data_out_desc_size(struct srp_cmd *cmd) +{ + int size = 0; + u8 fmt = cmd->buf_fmt >> 4; + + switch (fmt) { + case SRP_NO_DATA_DESC: + break; + case SRP_DATA_DESC_DIRECT: + size = sizeof(struct srp_direct_buf); + break; + case SRP_DATA_DESC_INDIRECT: + size = sizeof(struct srp_indirect_buf) + + sizeof(struct srp_direct_buf) * cmd->data_out_desc_cnt; + break; + default: + pr_err("client error. Invalid data_out_format %x\n", fmt); + break; + } + return size; +} + +/* + * TODO: this can be called multiple times for a single command if it + * has very long data. + */ +int srp_transfer_data(struct ibmvscsis_cmd *cmd, struct srp_cmd *srp_cmd, + srp_rdma_t rdma_io, int dma_map, int ext_desc) +{ + struct srp_direct_buf *md; + struct srp_indirect_buf *id; + enum dma_data_direction dir; + int offset, err = 0; + u8 format; + + if (!cmd->se_cmd.t_data_nents) + return 0; + + offset = srp_cmd->add_cdb_len & ~3; + + dir = srp_cmd_direction(srp_cmd); + if (dir == DMA_FROM_DEVICE) + offset += data_out_desc_size(srp_cmd); + + if (dir == DMA_TO_DEVICE) + format = srp_cmd->buf_fmt >> 4; + else + format = srp_cmd->buf_fmt & ((1U << 4) - 1); + + switch (format) { + case SRP_NO_DATA_DESC: + break; + case SRP_DATA_DESC_DIRECT: + md = (struct srp_direct_buf *)(srp_cmd->add_data + offset); + err = srp_direct_data(cmd, md, dir, rdma_io, dma_map, ext_desc); + break; + case SRP_DATA_DESC_INDIRECT: + id = (struct srp_indirect_buf *)(srp_cmd->add_data + offset); + err = srp_indirect_data(cmd, srp_cmd, id, dir, rdma_io, dma_map, + ext_desc); + break; + default: + pr_err("Unknown format %d %x\n", dir, format); + err = -EINVAL; + } + + return err; +} + +u64 srp_data_length(struct srp_cmd *cmd, enum dma_data_direction dir) +{ + struct srp_direct_buf *md; + struct srp_indirect_buf *id; + u64 len = 0; + uint offset = cmd->add_cdb_len & ~3; + u8 fmt; + + if (dir == DMA_TO_DEVICE) { + fmt = cmd->buf_fmt >> 4; + } else { + fmt = cmd->buf_fmt & ((1U << 4) - 1); + offset += data_out_desc_size(cmd); + } + + switch (fmt) { + case SRP_NO_DATA_DESC: + break; + case SRP_DATA_DESC_DIRECT: + md = (struct srp_direct_buf *)(cmd->add_data + offset); + len = be32_to_cpu(md->len); + break; + case SRP_DATA_DESC_INDIRECT: + id = (struct srp_indirect_buf *)(cmd->add_data + offset); + len = be32_to_cpu(id->len); + break; + default: + pr_err("invalid data format %x\n", fmt); + break; + } + return len; +} + +int srp_get_desc_table(struct srp_cmd *srp_cmd, enum dma_data_direction *dir, + u64 *data_len) +{ + struct srp_indirect_buf *idb; + struct srp_direct_buf *db; + uint add_cdb_offset; + int rc; + + /* + * The pointer computations below will only be compiled correctly + * if srp_cmd::add_data is declared as s8*, u8*, s8[] or u8[], so check + * whether srp_cmd::add_data has been declared as a byte pointer. + */ + BUILD_BUG_ON(!__same_type(srp_cmd->add_data[0], (s8)0) + && !__same_type(srp_cmd->add_data[0], (u8)0)); + + BUG_ON(!dir); + BUG_ON(!data_len); + + rc = 0; + *data_len = 0; + + *dir = DMA_NONE; + + if (srp_cmd->buf_fmt & 0xf) + *dir = DMA_FROM_DEVICE; + else if (srp_cmd->buf_fmt >> 4) + *dir = DMA_TO_DEVICE; + + add_cdb_offset = srp_cmd->add_cdb_len & ~3; + if (((srp_cmd->buf_fmt & 0xf) == SRP_DATA_DESC_DIRECT) || + ((srp_cmd->buf_fmt >> 4) == SRP_DATA_DESC_DIRECT)) { + db = (struct srp_direct_buf *)(srp_cmd->add_data + + add_cdb_offset); + *data_len = be32_to_cpu(db->len); + } else if (((srp_cmd->buf_fmt & 0xf) == SRP_DATA_DESC_INDIRECT) || + ((srp_cmd->buf_fmt >> 4) == SRP_DATA_DESC_INDIRECT)) { + idb = (struct srp_indirect_buf *)(srp_cmd->add_data + + add_cdb_offset); + + *data_len = be32_to_cpu(idb->len); + } + return rc; +} + +MODULE_DESCRIPTION("SCSI RDMA Protocol lib functions"); +MODULE_AUTHOR("FUJITA Tomonori"); +MODULE_LICENSE("GPL"); diff --git a/drivers/scsi/ibmvscsi_tgt/libsrp.h b/drivers/scsi/ibmvscsi_tgt/libsrp.h new file mode 100644 index 000000000000..4696f331453e --- /dev/null +++ b/drivers/scsi/ibmvscsi_tgt/libsrp.h @@ -0,0 +1,123 @@ +#ifndef __LIBSRP_H__ +#define __LIBSRP_H__ + +#include <linux/list.h> +#include <linux/kfifo.h> +#include <scsi/srp.h> + +enum srp_valid { + INVALIDATE_CMD_RESP_EL = 0, + VALID_CMD_RESP_EL = 0x80, + VALID_INIT_MSG = 0xC0, + VALID_TRANS_EVENT = 0xFF +}; + +enum srp_format { + SRP_FORMAT = 1, + MAD_FORMAT = 2, + OS400_FORMAT = 3, + AIX_FORMAT = 4, + LINUX_FORMAT = 5, + MESSAGE_IN_CRQ = 6 +}; + +enum srp_init_msg { + INIT_MSG = 1, + INIT_COMPLETE_MSG = 2 +}; + +enum srp_trans_event { + UNUSED_FORMAT = 0, + PARTNER_FAILED = 1, + PARTNER_DEREGISTER = 2, + MIGRATED = 6 +}; + +enum srp_status { + HEADER_DESCRIPTOR = 0xF1, + PING = 0xF5, + PING_RESPONSE = 0xF6 +}; + +enum srp_mad_version { + MAD_VERSION_1 = 1 +}; + +enum srp_os_type { + OS400 = 1, + LINUX = 2, + AIX = 3, + OFW = 4 +}; + +enum srp_task_attributes { + SRP_SIMPLE_TASK = 0, + SRP_HEAD_TASK = 1, + SRP_ORDERED_TASK = 2, + SRP_ACA_TASK = 4 +}; + +enum { + SRP_TASK_MANAGEMENT_FUNCTION_COMPLETE = 0, + SRP_REQUEST_FIELDS_INVALID = 2, + SRP_TASK_MANAGEMENT_FUNCTION_NOT_SUPPORTED = 4, + SRP_TASK_MANAGEMENT_FUNCTION_FAILED = 5 +}; + +struct srp_buf { + dma_addr_t dma; + void *buf; +}; + +struct srp_queue { + void *pool; + void *items; + struct kfifo queue; + spinlock_t lock; +}; + +struct srp_target { + struct device *dev; + + spinlock_t lock; + struct list_head cmd_queue; + + size_t srp_iu_size; + struct srp_queue iu_queue; + size_t rx_ring_size; + struct srp_buf **rx_ring; + + void *ldata; +}; + +struct iu_entry { + struct srp_target *target; + + struct list_head ilist; + dma_addr_t remote_token; + unsigned long flags; + + struct srp_buf *sbuf; + u16 iu_len; +}; + +struct ibmvscsis_cmd; + +typedef int (srp_rdma_t)(struct ibmvscsis_cmd *, struct scatterlist *, int, + struct srp_direct_buf *, int, + enum dma_data_direction, unsigned int); +int srp_target_alloc(struct srp_target *, struct device *, size_t, size_t); +void srp_target_free(struct srp_target *); +struct iu_entry *srp_iu_get(struct srp_target *); +void srp_iu_put(struct iu_entry *); +int srp_transfer_data(struct ibmvscsis_cmd *, struct srp_cmd *, + srp_rdma_t, int, int); +u64 srp_data_length(struct srp_cmd *cmd, enum dma_data_direction dir); +int srp_get_desc_table(struct srp_cmd *srp_cmd, enum dma_data_direction *dir, + u64 *data_len); +static inline int srp_cmd_direction(struct srp_cmd *cmd) +{ + return (cmd->buf_fmt >> 4) ? DMA_TO_DEVICE : DMA_FROM_DEVICE; +} + +#endif diff --git a/drivers/scsi/ipr.c b/drivers/scsi/ipr.c index 1f539c288ae8..17d04c702e1b 100644 --- a/drivers/scsi/ipr.c +++ b/drivers/scsi/ipr.c @@ -3288,6 +3288,11 @@ static void ipr_worker_thread(struct work_struct *work) return; } + if (!ioa_cfg->scan_enabled) { + spin_unlock_irqrestore(ioa_cfg->host->host_lock, lock_flags); + return; + } + restart: do { did_work = 0; @@ -10214,6 +10219,7 @@ static int ipr_probe_ioa(struct pci_dev *pdev, if (!ioa_cfg->reset_work_q) { dev_err(&pdev->dev, "Couldn't register reset workqueue\n"); + rc = -ENOMEM; goto out_free_irq; } } else @@ -10362,6 +10368,7 @@ static void ipr_remove(struct pci_dev *pdev) static int ipr_probe(struct pci_dev *pdev, const struct pci_device_id *dev_id) { struct ipr_ioa_cfg *ioa_cfg; + unsigned long flags; int rc, i; rc = ipr_probe_ioa(pdev, dev_id); @@ -10403,8 +10410,11 @@ static int ipr_probe(struct pci_dev *pdev, const struct pci_device_id *dev_id) __ipr_remove(pdev); return rc; } + spin_lock_irqsave(ioa_cfg->host->host_lock, flags); + ioa_cfg->scan_enabled = 1; + schedule_work(&ioa_cfg->work_q); + spin_unlock_irqrestore(ioa_cfg->host->host_lock, flags); - scsi_scan_host(ioa_cfg->host); ioa_cfg->iopoll_weight = ioa_cfg->chip_cfg->iopoll_weight; if (ioa_cfg->iopoll_weight && ioa_cfg->sis64 && ioa_cfg->nvectors > 1) { @@ -10414,7 +10424,8 @@ static int ipr_probe(struct pci_dev *pdev, const struct pci_device_id *dev_id) } } - schedule_work(&ioa_cfg->work_q); + scsi_scan_host(ioa_cfg->host); + return 0; } diff --git a/drivers/scsi/ipr.h b/drivers/scsi/ipr.h index 1d42c7464dfc..cdb51960b53c 100644 --- a/drivers/scsi/ipr.h +++ b/drivers/scsi/ipr.h @@ -1478,6 +1478,7 @@ struct ipr_ioa_cfg { u8 in_ioa_bringdown:1; u8 ioa_unit_checked:1; u8 dump_taken:1; + u8 scan_enabled:1; u8 scan_done:1; u8 needs_hard_reset:1; u8 dual_raid:1; diff --git a/drivers/scsi/lpfc/lpfc_scsi.c b/drivers/scsi/lpfc/lpfc_scsi.c index a5655d571906..d197aa176dee 100644 --- a/drivers/scsi/lpfc/lpfc_scsi.c +++ b/drivers/scsi/lpfc/lpfc_scsi.c @@ -3877,7 +3877,7 @@ int lpfc_sli4_scmd_to_wqidx_distr(struct lpfc_hba *phba, uint32_t tag; uint16_t hwq; - if (shost_use_blk_mq(cmnd->device->host)) { + if (cmnd && shost_use_blk_mq(cmnd->device->host)) { tag = blk_mq_unique_tag(cmnd->request); hwq = blk_mq_unique_tag_to_hwq(tag); diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c index 351d08ace24a..7080ce2920fd 100644 --- a/drivers/scsi/lpfc/lpfc_sli.c +++ b/drivers/scsi/lpfc/lpfc_sli.c @@ -1323,21 +1323,18 @@ lpfc_sli_ringtxcmpl_put(struct lpfc_hba *phba, struct lpfc_sli_ring *pring, { lockdep_assert_held(&phba->hbalock); + BUG_ON(!piocb || !piocb->vport); + list_add_tail(&piocb->list, &pring->txcmplq); piocb->iocb_flag |= LPFC_IO_ON_TXCMPLQ; if ((unlikely(pring->ringno == LPFC_ELS_RING)) && (piocb->iocb.ulpCommand != CMD_ABORT_XRI_CN) && (piocb->iocb.ulpCommand != CMD_CLOSE_XRI_CN) && - (!(piocb->vport->load_flag & FC_UNLOADING))) { - if (!piocb->vport) - BUG(); - else - mod_timer(&piocb->vport->els_tmofunc, - jiffies + - msecs_to_jiffies(1000 * (phba->fc_ratov << 1))); - } - + (!(piocb->vport->load_flag & FC_UNLOADING))) + mod_timer(&piocb->vport->els_tmofunc, + jiffies + + msecs_to_jiffies(1000 * (phba->fc_ratov << 1))); return 0; } diff --git a/drivers/ssb/driver_gpio.c b/drivers/ssb/driver_gpio.c index 180e027b1c8a..796e22037bc4 100644 --- a/drivers/ssb/driver_gpio.c +++ b/drivers/ssb/driver_gpio.c @@ -23,7 +23,7 @@ **************************************************/ #if IS_ENABLED(CONFIG_SSB_EMBEDDED) -static int ssb_gpio_to_irq(struct gpio_chip *chip, unsigned gpio) +static int ssb_gpio_to_irq(struct gpio_chip *chip, unsigned int gpio) { struct ssb_bus *bus = gpiochip_get_data(chip); @@ -38,14 +38,14 @@ static int ssb_gpio_to_irq(struct gpio_chip *chip, unsigned gpio) * ChipCommon **************************************************/ -static int ssb_gpio_chipco_get_value(struct gpio_chip *chip, unsigned gpio) +static int ssb_gpio_chipco_get_value(struct gpio_chip *chip, unsigned int gpio) { struct ssb_bus *bus = gpiochip_get_data(chip); return !!ssb_chipco_gpio_in(&bus->chipco, 1 << gpio); } -static void ssb_gpio_chipco_set_value(struct gpio_chip *chip, unsigned gpio, +static void ssb_gpio_chipco_set_value(struct gpio_chip *chip, unsigned int gpio, int value) { struct ssb_bus *bus = gpiochip_get_data(chip); @@ -54,7 +54,7 @@ static void ssb_gpio_chipco_set_value(struct gpio_chip *chip, unsigned gpio, } static int ssb_gpio_chipco_direction_input(struct gpio_chip *chip, - unsigned gpio) + unsigned int gpio) { struct ssb_bus *bus = gpiochip_get_data(chip); @@ -63,7 +63,7 @@ static int ssb_gpio_chipco_direction_input(struct gpio_chip *chip, } static int ssb_gpio_chipco_direction_output(struct gpio_chip *chip, - unsigned gpio, int value) + unsigned int gpio, int value) { struct ssb_bus *bus = gpiochip_get_data(chip); @@ -72,7 +72,7 @@ static int ssb_gpio_chipco_direction_output(struct gpio_chip *chip, return 0; } -static int ssb_gpio_chipco_request(struct gpio_chip *chip, unsigned gpio) +static int ssb_gpio_chipco_request(struct gpio_chip *chip, unsigned int gpio) { struct ssb_bus *bus = gpiochip_get_data(chip); @@ -85,7 +85,7 @@ static int ssb_gpio_chipco_request(struct gpio_chip *chip, unsigned gpio) return 0; } -static void ssb_gpio_chipco_free(struct gpio_chip *chip, unsigned gpio) +static void ssb_gpio_chipco_free(struct gpio_chip *chip, unsigned int gpio) { struct ssb_bus *bus = gpiochip_get_data(chip); @@ -256,14 +256,14 @@ static int ssb_gpio_chipco_init(struct ssb_bus *bus) #ifdef CONFIG_SSB_DRIVER_EXTIF -static int ssb_gpio_extif_get_value(struct gpio_chip *chip, unsigned gpio) +static int ssb_gpio_extif_get_value(struct gpio_chip *chip, unsigned int gpio) { struct ssb_bus *bus = gpiochip_get_data(chip); return !!ssb_extif_gpio_in(&bus->extif, 1 << gpio); } -static void ssb_gpio_extif_set_value(struct gpio_chip *chip, unsigned gpio, +static void ssb_gpio_extif_set_value(struct gpio_chip *chip, unsigned int gpio, int value) { struct ssb_bus *bus = gpiochip_get_data(chip); @@ -272,7 +272,7 @@ static void ssb_gpio_extif_set_value(struct gpio_chip *chip, unsigned gpio, } static int ssb_gpio_extif_direction_input(struct gpio_chip *chip, - unsigned gpio) + unsigned int gpio) { struct ssb_bus *bus = gpiochip_get_data(chip); @@ -281,7 +281,7 @@ static int ssb_gpio_extif_direction_input(struct gpio_chip *chip, } static int ssb_gpio_extif_direction_output(struct gpio_chip *chip, - unsigned gpio, int value) + unsigned int gpio, int value) { struct ssb_bus *bus = gpiochip_get_data(chip); diff --git a/drivers/staging/emxx_udc/Kconfig b/drivers/staging/emxx_udc/Kconfig index cc3402020487..d7577096fb25 100644 --- a/drivers/staging/emxx_udc/Kconfig +++ b/drivers/staging/emxx_udc/Kconfig @@ -1,5 +1,5 @@ config USB_EMXX - bool "EMXX USB Function Device Controller" + tristate "EMXX USB Function Device Controller" depends on USB_GADGET && (ARCH_SHMOBILE || (ARM && COMPILE_TEST)) help The Emma Mobile series of SoCs from Renesas Electronics and diff --git a/drivers/staging/emxx_udc/emxx_udc.c b/drivers/staging/emxx_udc/emxx_udc.c index 3bd91758b2da..3b56b2826263 100644 --- a/drivers/staging/emxx_udc/emxx_udc.c +++ b/drivers/staging/emxx_udc/emxx_udc.c @@ -15,7 +15,7 @@ */ #include <linux/kernel.h> -#include <linux/init.h> +#include <linux/module.h> #include <linux/platform_device.h> #include <linux/delay.h> #include <linux/ioport.h> @@ -39,9 +39,11 @@ #include "emxx_udc.h" +#define DRIVER_DESC "EMXX UDC driver" #define DMA_ADDR_INVALID (~(dma_addr_t)0) static const char driver_name[] = "emxx_udc"; +static const char driver_desc[] = DRIVER_DESC; /*===========================================================================*/ /* Prototype */ @@ -3296,6 +3298,28 @@ static void nbu2ss_drv_shutdown(struct platform_device *pdev) } /*-------------------------------------------------------------------------*/ +static int nbu2ss_drv_remove(struct platform_device *pdev) +{ + struct nbu2ss_udc *udc; + struct nbu2ss_ep *ep; + int i; + + udc = &udc_controller; + + for (i = 0; i < NUM_ENDPOINTS; i++) { + ep = &udc->ep[i]; + if (ep->virt_buf) + dma_free_coherent(NULL, PAGE_SIZE, + (void *)ep->virt_buf, ep->phys_buf); + } + + /* Interrupt Handler - Release */ + free_irq(INT_VBUS, udc); + + return 0; +} + +/*-------------------------------------------------------------------------*/ static int nbu2ss_drv_suspend(struct platform_device *pdev, pm_message_t state) { struct nbu2ss_udc *udc; @@ -3347,12 +3371,16 @@ static int nbu2ss_drv_resume(struct platform_device *pdev) static struct platform_driver udc_driver = { .probe = nbu2ss_drv_probe, .shutdown = nbu2ss_drv_shutdown, + .remove = nbu2ss_drv_remove, .suspend = nbu2ss_drv_suspend, .resume = nbu2ss_drv_resume, .driver = { - .name = driver_name, - .suppress_bind_attrs = true, + .name = driver_name, }, }; -builtin_platform_driver(udc_driver); +module_platform_driver(udc_driver); + +MODULE_DESCRIPTION(DRIVER_DESC); +MODULE_AUTHOR("Renesas Electronics Corporation"); +MODULE_LICENSE("GPL"); diff --git a/drivers/staging/lustre/lustre/llite/dcache.c b/drivers/staging/lustre/lustre/llite/dcache.c index 581a63a0a63e..463b1a360733 100644 --- a/drivers/staging/lustre/lustre/llite/dcache.c +++ b/drivers/staging/lustre/lustre/llite/dcache.c @@ -78,7 +78,7 @@ static void ll_release(struct dentry *de) * INVALID) so d_lookup() matches it, but we have no lock on it (so * lock_match() fails) and we spin around real_lookup(). */ -static int ll_dcompare(const struct dentry *parent, const struct dentry *dentry, +static int ll_dcompare(const struct dentry *dentry, unsigned int len, const char *str, const struct qstr *name) { diff --git a/drivers/staging/lustre/lustre/llite/statahead.c b/drivers/staging/lustre/lustre/llite/statahead.c index 8e52722266fe..c1cb6b19e724 100644 --- a/drivers/staging/lustre/lustre/llite/statahead.c +++ b/drivers/staging/lustre/lustre/llite/statahead.c @@ -781,7 +781,7 @@ static int sa_args_init(struct inode *dir, struct inode *child, struct ll_sa_entry *entry, struct md_enqueue_info **pmi, struct ldlm_enqueue_info **pei) { - struct qstr *qstr = &entry->se_qstr; + const struct qstr *qstr = &entry->se_qstr; struct ll_inode_info *lli = ll_i2info(dir); struct md_enqueue_info *minfo; struct ldlm_enqueue_info *einfo; @@ -1340,7 +1340,7 @@ enum { static int is_first_dirent(struct inode *dir, struct dentry *dentry) { struct ll_dir_chain chain; - struct qstr *target = &dentry->d_name; + const struct qstr *target = &dentry->d_name; struct page *page; __u64 pos = 0; int dot_de; diff --git a/drivers/staging/media/Kconfig b/drivers/staging/media/Kconfig index cae42e56f270..7292f23954df 100644 --- a/drivers/staging/media/Kconfig +++ b/drivers/staging/media/Kconfig @@ -16,7 +16,7 @@ menuconfig STAGING_MEDIA If in doubt, say N here. -if STAGING_MEDIA +if STAGING_MEDIA && MEDIA_SUPPORT # Please keep them in alphabetic order source "drivers/staging/media/bcm2048/Kconfig" diff --git a/drivers/staging/media/cec/cec-adap.c b/drivers/staging/media/cec/cec-adap.c index 9fffddb7ac7e..b2393bbacb26 100644 --- a/drivers/staging/media/cec/cec-adap.c +++ b/drivers/staging/media/cec/cec-adap.c @@ -1252,7 +1252,7 @@ int __cec_s_log_addrs(struct cec_adapter *adap, return -EINVAL; } /* Zero unused part of the feature array */ - memset(features + i, 0, feature_sz - i); + memset(features + i + 1, 0, feature_sz - i - 1); } if (log_addrs->cec_version >= CEC_OP_CEC_VERSION_2_0) { diff --git a/drivers/target/iscsi/iscsi_target.c b/drivers/target/iscsi/iscsi_target.c index 50f3d3a0dd7b..39b928c2849d 100644 --- a/drivers/target/iscsi/iscsi_target.c +++ b/drivers/target/iscsi/iscsi_target.c @@ -492,7 +492,8 @@ void iscsit_aborted_task(struct iscsi_conn *conn, struct iscsi_cmd *cmd) bool scsi_cmd = (cmd->iscsi_opcode == ISCSI_OP_SCSI_CMD); spin_lock_bh(&conn->cmd_lock); - if (!list_empty(&cmd->i_conn_node)) + if (!list_empty(&cmd->i_conn_node) && + !(cmd->se_cmd.transport_state & CMD_T_FABRIC_STOP)) list_del_init(&cmd->i_conn_node); spin_unlock_bh(&conn->cmd_lock); @@ -4034,6 +4035,7 @@ int iscsi_target_rx_thread(void *arg) static void iscsit_release_commands_from_conn(struct iscsi_conn *conn) { + LIST_HEAD(tmp_list); struct iscsi_cmd *cmd = NULL, *cmd_tmp = NULL; struct iscsi_session *sess = conn->sess; /* @@ -4042,18 +4044,26 @@ static void iscsit_release_commands_from_conn(struct iscsi_conn *conn) * has been reset -> returned sleeping pre-handler state. */ spin_lock_bh(&conn->cmd_lock); - list_for_each_entry_safe(cmd, cmd_tmp, &conn->conn_cmd_list, i_conn_node) { + list_splice_init(&conn->conn_cmd_list, &tmp_list); + list_for_each_entry(cmd, &tmp_list, i_conn_node) { + struct se_cmd *se_cmd = &cmd->se_cmd; + + if (se_cmd->se_tfo != NULL) { + spin_lock(&se_cmd->t_state_lock); + se_cmd->transport_state |= CMD_T_FABRIC_STOP; + spin_unlock(&se_cmd->t_state_lock); + } + } + spin_unlock_bh(&conn->cmd_lock); + + list_for_each_entry_safe(cmd, cmd_tmp, &tmp_list, i_conn_node) { list_del_init(&cmd->i_conn_node); - spin_unlock_bh(&conn->cmd_lock); iscsit_increment_maxcmdsn(cmd, sess); - iscsit_free_cmd(cmd, true); - spin_lock_bh(&conn->cmd_lock); } - spin_unlock_bh(&conn->cmd_lock); } static void iscsit_stop_timers_for_cmds( diff --git a/drivers/target/iscsi/iscsi_target_login.c b/drivers/target/iscsi/iscsi_target_login.c index b5212f0f9571..adf419fa4291 100644 --- a/drivers/target/iscsi/iscsi_target_login.c +++ b/drivers/target/iscsi/iscsi_target_login.c @@ -1371,8 +1371,9 @@ static int __iscsi_target_login_thread(struct iscsi_np *np) } login->zero_tsih = zero_tsih; - conn->sess->se_sess->sup_prot_ops = - conn->conn_transport->iscsit_get_sup_prot_ops(conn); + if (conn->sess) + conn->sess->se_sess->sup_prot_ops = + conn->conn_transport->iscsit_get_sup_prot_ops(conn); tpg = conn->tpg; if (!tpg) { diff --git a/drivers/target/target_core_device.c b/drivers/target/target_core_device.c index a4046ca6e60d..6b423485c5d6 100644 --- a/drivers/target/target_core_device.c +++ b/drivers/target/target_core_device.c @@ -821,13 +821,15 @@ struct se_device *target_alloc_device(struct se_hba *hba, const char *name) * in ATA and we need to set TPE=1 */ bool target_configure_unmap_from_queue(struct se_dev_attrib *attrib, - struct request_queue *q, int block_size) + struct request_queue *q) { + int block_size = queue_logical_block_size(q); + if (!blk_queue_discard(q)) return false; - attrib->max_unmap_lba_count = (q->limits.max_discard_sectors << 9) / - block_size; + attrib->max_unmap_lba_count = + q->limits.max_discard_sectors >> (ilog2(block_size) - 9); /* * Currently hardcoded to 1 in Linux/SCSI code.. */ diff --git a/drivers/target/target_core_file.c b/drivers/target/target_core_file.c index 75f0f08b2a34..d545993df18b 100644 --- a/drivers/target/target_core_file.c +++ b/drivers/target/target_core_file.c @@ -161,8 +161,7 @@ static int fd_configure_device(struct se_device *dev) dev_size, div_u64(dev_size, fd_dev->fd_block_size), fd_dev->fd_block_size); - if (target_configure_unmap_from_queue(&dev->dev_attrib, q, - fd_dev->fd_block_size)) + if (target_configure_unmap_from_queue(&dev->dev_attrib, q)) pr_debug("IFILE: BLOCK Discard support available," " disabled by default\n"); /* @@ -523,7 +522,7 @@ fd_execute_rw(struct se_cmd *cmd, struct scatterlist *sgl, u32 sgl_nents, */ if (cmd->data_length > FD_MAX_BYTES) { pr_err("FILEIO: Not able to process I/O of %u bytes due to" - "FD_MAX_BYTES: %u iovec count limitiation\n", + "FD_MAX_BYTES: %u iovec count limitation\n", cmd->data_length, FD_MAX_BYTES); return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE; } diff --git a/drivers/target/target_core_iblock.c b/drivers/target/target_core_iblock.c index 22af12f8b8eb..372d744315f3 100644 --- a/drivers/target/target_core_iblock.c +++ b/drivers/target/target_core_iblock.c @@ -121,8 +121,7 @@ static int iblock_configure_device(struct se_device *dev) dev->dev_attrib.hw_max_sectors = queue_max_hw_sectors(q); dev->dev_attrib.hw_queue_depth = q->nr_requests; - if (target_configure_unmap_from_queue(&dev->dev_attrib, q, - dev->dev_attrib.hw_block_size)) + if (target_configure_unmap_from_queue(&dev->dev_attrib, q)) pr_debug("IBLOCK: BLOCK Discard support available," " disabled by default\n"); @@ -389,7 +388,7 @@ iblock_execute_sync_cache(struct se_cmd *cmd) bio = bio_alloc(GFP_KERNEL, 0); bio->bi_end_io = iblock_end_io_flush; bio->bi_bdev = ib_dev->ibd_bd; - bio->bi_rw = WRITE_FLUSH; + bio_set_op_attrs(bio, REQ_OP_WRITE, WRITE_FLUSH); if (!immed) bio->bi_private = cmd; submit_bio(bio); diff --git a/drivers/target/target_core_internal.h b/drivers/target/target_core_internal.h index fc91e85f54ba..e2c970a9d61c 100644 --- a/drivers/target/target_core_internal.h +++ b/drivers/target/target_core_internal.h @@ -146,6 +146,7 @@ sense_reason_t target_cmd_size_check(struct se_cmd *cmd, unsigned int size); void target_qf_do_work(struct work_struct *work); bool target_check_wce(struct se_device *dev); bool target_check_fua(struct se_device *dev); +void __target_execute_cmd(struct se_cmd *, bool); /* target_core_stat.c */ void target_stat_setup_dev_default_groups(struct se_device *); diff --git a/drivers/target/target_core_sbc.c b/drivers/target/target_core_sbc.c index a9057aa07176..04f616b3ba0a 100644 --- a/drivers/target/target_core_sbc.c +++ b/drivers/target/target_core_sbc.c @@ -602,7 +602,7 @@ static sense_reason_t compare_and_write_callback(struct se_cmd *cmd, bool succes cmd->transport_state |= CMD_T_ACTIVE|CMD_T_BUSY|CMD_T_SENT; spin_unlock_irq(&cmd->t_state_lock); - __target_execute_cmd(cmd); + __target_execute_cmd(cmd, false); kfree(buf); return ret; diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c index 5ab3967dda43..6094a6beddde 100644 --- a/drivers/target/target_core_transport.c +++ b/drivers/target/target_core_transport.c @@ -754,7 +754,15 @@ EXPORT_SYMBOL(target_complete_cmd); void target_complete_cmd_with_length(struct se_cmd *cmd, u8 scsi_status, int length) { - if (scsi_status == SAM_STAT_GOOD && length < cmd->data_length) { + if (scsi_status != SAM_STAT_GOOD) { + return; + } + + /* + * Calculate new residual count based upon length of SCSI data + * transferred. + */ + if (length < cmd->data_length) { if (cmd->se_cmd_flags & SCF_UNDERFLOW_BIT) { cmd->residual_count += cmd->data_length - length; } else { @@ -763,6 +771,12 @@ void target_complete_cmd_with_length(struct se_cmd *cmd, u8 scsi_status, int len } cmd->data_length = length; + } else if (length > cmd->data_length) { + cmd->se_cmd_flags |= SCF_OVERFLOW_BIT; + cmd->residual_count = length - cmd->data_length; + } else { + cmd->se_cmd_flags &= ~(SCF_OVERFLOW_BIT | SCF_UNDERFLOW_BIT); + cmd->residual_count = 0; } target_complete_cmd(cmd, scsi_status); @@ -1303,23 +1317,6 @@ target_setup_cmd_from_cdb(struct se_cmd *cmd, unsigned char *cdb) trace_target_sequencer_start(cmd); - /* - * Check for an existing UNIT ATTENTION condition - */ - ret = target_scsi3_ua_check(cmd); - if (ret) - return ret; - - ret = target_alua_state_check(cmd); - if (ret) - return ret; - - ret = target_check_reservation(cmd); - if (ret) { - cmd->scsi_status = SAM_STAT_RESERVATION_CONFLICT; - return ret; - } - ret = dev->transport->parse_cdb(cmd); if (ret == TCM_UNSUPPORTED_SCSI_OPCODE) pr_warn_ratelimited("%s/%s: Unsupported SCSI Opcode 0x%02x, sending CHECK_CONDITION.\n", @@ -1761,20 +1758,45 @@ queue_full: } EXPORT_SYMBOL(transport_generic_request_failure); -void __target_execute_cmd(struct se_cmd *cmd) +void __target_execute_cmd(struct se_cmd *cmd, bool do_checks) { sense_reason_t ret; - if (cmd->execute_cmd) { - ret = cmd->execute_cmd(cmd); - if (ret) { - spin_lock_irq(&cmd->t_state_lock); - cmd->transport_state &= ~(CMD_T_BUSY|CMD_T_SENT); - spin_unlock_irq(&cmd->t_state_lock); + if (!cmd->execute_cmd) { + ret = TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE; + goto err; + } + if (do_checks) { + /* + * Check for an existing UNIT ATTENTION condition after + * target_handle_task_attr() has done SAM task attr + * checking, and possibly have already defered execution + * out to target_restart_delayed_cmds() context. + */ + ret = target_scsi3_ua_check(cmd); + if (ret) + goto err; - transport_generic_request_failure(cmd, ret); + ret = target_alua_state_check(cmd); + if (ret) + goto err; + + ret = target_check_reservation(cmd); + if (ret) { + cmd->scsi_status = SAM_STAT_RESERVATION_CONFLICT; + goto err; } } + + ret = cmd->execute_cmd(cmd); + if (!ret) + return; +err: + spin_lock_irq(&cmd->t_state_lock); + cmd->transport_state &= ~(CMD_T_BUSY|CMD_T_SENT); + spin_unlock_irq(&cmd->t_state_lock); + + transport_generic_request_failure(cmd, ret); } static int target_write_prot_action(struct se_cmd *cmd) @@ -1819,6 +1841,8 @@ static bool target_handle_task_attr(struct se_cmd *cmd) if (dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH) return false; + cmd->se_cmd_flags |= SCF_TASK_ATTR_SET; + /* * Check for the existence of HEAD_OF_QUEUE, and if true return 1 * to allow the passed struct se_cmd list of tasks to the front of the list. @@ -1899,7 +1923,7 @@ void target_execute_cmd(struct se_cmd *cmd) return; } - __target_execute_cmd(cmd); + __target_execute_cmd(cmd, true); } EXPORT_SYMBOL(target_execute_cmd); @@ -1923,7 +1947,7 @@ static void target_restart_delayed_cmds(struct se_device *dev) list_del(&cmd->se_delayed_node); spin_unlock(&dev->delayed_cmd_lock); - __target_execute_cmd(cmd); + __target_execute_cmd(cmd, true); if (cmd->sam_task_attr == TCM_ORDERED_TAG) break; @@ -1941,6 +1965,9 @@ static void transport_complete_task_attr(struct se_cmd *cmd) if (dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH) return; + if (!(cmd->se_cmd_flags & SCF_TASK_ATTR_SET)) + goto restart; + if (cmd->sam_task_attr == TCM_SIMPLE_TAG) { atomic_dec_mb(&dev->simple_cmds); dev->dev_cur_ordered_id++; @@ -1957,7 +1984,7 @@ static void transport_complete_task_attr(struct se_cmd *cmd) pr_debug("Incremented dev_cur_ordered_id: %u for ORDERED\n", dev->dev_cur_ordered_id); } - +restart: target_restart_delayed_cmds(dev); } @@ -2557,15 +2584,10 @@ static void target_release_cmd_kref(struct kref *kref) bool fabric_stop; spin_lock_irqsave(&se_sess->sess_cmd_lock, flags); - if (list_empty(&se_cmd->se_cmd_list)) { - spin_unlock_irqrestore(&se_sess->sess_cmd_lock, flags); - target_free_cmd_mem(se_cmd); - se_cmd->se_tfo->release_cmd(se_cmd); - return; - } spin_lock(&se_cmd->t_state_lock); - fabric_stop = (se_cmd->transport_state & CMD_T_FABRIC_STOP); + fabric_stop = (se_cmd->transport_state & CMD_T_FABRIC_STOP) && + (se_cmd->transport_state & CMD_T_ABORTED); spin_unlock(&se_cmd->t_state_lock); if (se_cmd->cmd_wait_set || fabric_stop) { diff --git a/drivers/target/tcm_fc/tfc_sess.c b/drivers/target/tcm_fc/tfc_sess.c index f5186a744399..6ffbb603d912 100644 --- a/drivers/target/tcm_fc/tfc_sess.c +++ b/drivers/target/tcm_fc/tfc_sess.c @@ -91,6 +91,7 @@ static void ft_tport_delete(struct ft_tport *tport) ft_sess_delete_all(tport); lport = tport->lport; + lport->service_params &= ~FCP_SPPF_TARG_FCN; BUG_ON(tport != lport->prov[FC_TYPE_FCP]); RCU_INIT_POINTER(lport->prov[FC_TYPE_FCP], NULL); @@ -110,6 +111,7 @@ void ft_lport_add(struct fc_lport *lport, void *arg) { mutex_lock(&ft_lport_lock); ft_tport_get(lport); + lport->service_params |= FCP_SPPF_TARG_FCN; mutex_unlock(&ft_lport_lock); } diff --git a/drivers/thermal/clock_cooling.c b/drivers/thermal/clock_cooling.c index 1b4ff0f4c716..ed5dd0e88657 100644 --- a/drivers/thermal/clock_cooling.c +++ b/drivers/thermal/clock_cooling.c @@ -426,6 +426,7 @@ clock_cooling_register(struct device *dev, const char *clock_name) if (!ccdev) return ERR_PTR(-ENOMEM); + mutex_init(&ccdev->lock); ccdev->dev = dev; ccdev->clk = devm_clk_get(dev, clock_name); if (IS_ERR(ccdev->clk)) diff --git a/drivers/thermal/fair_share.c b/drivers/thermal/fair_share.c index 34fe36504a55..68bd1b569118 100644 --- a/drivers/thermal/fair_share.c +++ b/drivers/thermal/fair_share.c @@ -116,7 +116,9 @@ static int fair_share_throttle(struct thermal_zone_device *tz, int trip) instance->target = get_target_state(tz, cdev, percentage, cur_trip_level); + mutex_lock(&instance->cdev->lock); instance->cdev->updated = false; + mutex_unlock(&instance->cdev->lock); thermal_cdev_update(cdev); } return 0; diff --git a/drivers/thermal/gov_bang_bang.c b/drivers/thermal/gov_bang_bang.c index fc52016d4e85..bb118a152cbb 100644 --- a/drivers/thermal/gov_bang_bang.c +++ b/drivers/thermal/gov_bang_bang.c @@ -71,7 +71,9 @@ static void thermal_zone_trip_update(struct thermal_zone_device *tz, int trip) dev_dbg(&instance->cdev->device, "target=%d\n", (int)instance->target); + mutex_lock(&instance->cdev->lock); instance->cdev->updated = false; /* cdev needs update */ + mutex_unlock(&instance->cdev->lock); } mutex_unlock(&tz->lock); diff --git a/drivers/thermal/intel_pch_thermal.c b/drivers/thermal/intel_pch_thermal.c index 6a6ec1c95a7a..9b4815e81b0d 100644 --- a/drivers/thermal/intel_pch_thermal.c +++ b/drivers/thermal/intel_pch_thermal.c @@ -21,6 +21,7 @@ #include <linux/init.h> #include <linux/pci.h> #include <linux/thermal.h> +#include <linux/pm.h> /* Intel PCH thermal Device IDs */ #define PCH_THERMAL_DID_WPT 0x9CA4 /* Wildcat Point */ @@ -65,6 +66,7 @@ struct pch_thermal_device { unsigned long crt_temp; int hot_trip_id; unsigned long hot_temp; + bool bios_enabled; }; static int pch_wpt_init(struct pch_thermal_device *ptd, int *nr_trips) @@ -75,8 +77,10 @@ static int pch_wpt_init(struct pch_thermal_device *ptd, int *nr_trips) *nr_trips = 0; /* Check if BIOS has already enabled thermal sensor */ - if (WPT_TSS_TSDSS & readb(ptd->hw_base + WPT_TSS)) + if (WPT_TSS_TSDSS & readb(ptd->hw_base + WPT_TSS)) { + ptd->bios_enabled = true; goto read_trips; + } tsel = readb(ptd->hw_base + WPT_TSEL); /* @@ -130,9 +134,39 @@ static int pch_wpt_get_temp(struct pch_thermal_device *ptd, int *temp) return 0; } +static int pch_wpt_suspend(struct pch_thermal_device *ptd) +{ + u8 tsel; + + if (ptd->bios_enabled) + return 0; + + tsel = readb(ptd->hw_base + WPT_TSEL); + + writeb(tsel & 0xFE, ptd->hw_base + WPT_TSEL); + + return 0; +} + +static int pch_wpt_resume(struct pch_thermal_device *ptd) +{ + u8 tsel; + + if (ptd->bios_enabled) + return 0; + + tsel = readb(ptd->hw_base + WPT_TSEL); + + writeb(tsel | WPT_TSEL_ETS, ptd->hw_base + WPT_TSEL); + + return 0; +} + struct pch_dev_ops { int (*hw_init)(struct pch_thermal_device *ptd, int *nr_trips); int (*get_temp)(struct pch_thermal_device *ptd, int *temp); + int (*suspend)(struct pch_thermal_device *ptd); + int (*resume)(struct pch_thermal_device *ptd); }; @@ -140,6 +174,8 @@ struct pch_dev_ops { static const struct pch_dev_ops pch_dev_ops_wpt = { .hw_init = pch_wpt_init, .get_temp = pch_wpt_get_temp, + .suspend = pch_wpt_suspend, + .resume = pch_wpt_resume, }; static int pch_thermal_get_temp(struct thermal_zone_device *tzd, int *temp) @@ -269,6 +305,22 @@ static void intel_pch_thermal_remove(struct pci_dev *pdev) pci_disable_device(pdev); } +static int intel_pch_thermal_suspend(struct device *device) +{ + struct pci_dev *pdev = to_pci_dev(device); + struct pch_thermal_device *ptd = pci_get_drvdata(pdev); + + return ptd->ops->suspend(ptd); +} + +static int intel_pch_thermal_resume(struct device *device) +{ + struct pci_dev *pdev = to_pci_dev(device); + struct pch_thermal_device *ptd = pci_get_drvdata(pdev); + + return ptd->ops->resume(ptd); +} + static struct pci_device_id intel_pch_thermal_id[] = { { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCH_THERMAL_DID_WPT) }, { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCH_THERMAL_DID_SKL) }, @@ -276,11 +328,17 @@ static struct pci_device_id intel_pch_thermal_id[] = { }; MODULE_DEVICE_TABLE(pci, intel_pch_thermal_id); +static const struct dev_pm_ops intel_pch_pm_ops = { + .suspend = intel_pch_thermal_suspend, + .resume = intel_pch_thermal_resume, +}; + static struct pci_driver intel_pch_thermal_driver = { .name = "intel_pch_thermal", .id_table = intel_pch_thermal_id, .probe = intel_pch_thermal_probe, .remove = intel_pch_thermal_remove, + .driver.pm = &intel_pch_pm_ops, }; module_pci_driver(intel_pch_thermal_driver); diff --git a/drivers/thermal/intel_powerclamp.c b/drivers/thermal/intel_powerclamp.c index 015ce2eb6eb7..0e4dc0afcfd2 100644 --- a/drivers/thermal/intel_powerclamp.c +++ b/drivers/thermal/intel_powerclamp.c @@ -388,7 +388,7 @@ static int clamp_thread(void *arg) int sleeptime; unsigned long target_jiffies; unsigned int guard; - unsigned int compensation = 0; + unsigned int compensated_ratio; int interval; /* jiffies to sleep for each attempt */ unsigned int duration_jiffies = msecs_to_jiffies(duration); unsigned int window_size_now; @@ -409,8 +409,11 @@ static int clamp_thread(void *arg) * c-states, thus we need to compensate the injected idle ratio * to achieve the actual target reported by the HW. */ - compensation = get_compensation(target_ratio); - interval = duration_jiffies*100/(target_ratio+compensation); + compensated_ratio = target_ratio + + get_compensation(target_ratio); + if (compensated_ratio <= 0) + compensated_ratio = 1; + interval = duration_jiffies * 100 / compensated_ratio; /* align idle time */ target_jiffies = roundup(jiffies, interval); @@ -647,8 +650,8 @@ static int powerclamp_set_cur_state(struct thermal_cooling_device *cdev, goto exit_set; } else if (set_target_ratio > 0 && new_target_ratio == 0) { pr_info("Stop forced idle injection\n"); - set_target_ratio = 0; end_power_clamp(); + set_target_ratio = 0; } else /* adjust currently running */ { set_target_ratio = new_target_ratio; /* make new set_target_ratio visible to other cpus */ diff --git a/drivers/thermal/power_allocator.c b/drivers/thermal/power_allocator.c index 2f1a863a8e15..b4d3116cfdaf 100644 --- a/drivers/thermal/power_allocator.c +++ b/drivers/thermal/power_allocator.c @@ -529,7 +529,9 @@ static void allow_maximum_power(struct thermal_zone_device *tz) continue; instance->target = 0; + mutex_lock(&instance->cdev->lock); instance->cdev->updated = false; + mutex_unlock(&instance->cdev->lock); thermal_cdev_update(instance->cdev); } } diff --git a/drivers/thermal/step_wise.c b/drivers/thermal/step_wise.c index ea9366ad3e6b..bcef2e7c4ec9 100644 --- a/drivers/thermal/step_wise.c +++ b/drivers/thermal/step_wise.c @@ -175,7 +175,9 @@ static void thermal_zone_trip_update(struct thermal_zone_device *tz, int trip) update_passive_instance(tz, trip_type, -1); instance->initialized = true; + mutex_lock(&instance->cdev->lock); instance->cdev->updated = false; /* cdev needs update */ + mutex_unlock(&instance->cdev->lock); } mutex_unlock(&tz->lock); diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c index 5133cd1e10b7..e2fc6161dded 100644 --- a/drivers/thermal/thermal_core.c +++ b/drivers/thermal/thermal_core.c @@ -1093,7 +1093,9 @@ int power_actor_set_power(struct thermal_cooling_device *cdev, return ret; instance->target = state; + mutex_lock(&cdev->lock); cdev->updated = false; + mutex_unlock(&cdev->lock); thermal_cdev_update(cdev); return 0; @@ -1623,11 +1625,13 @@ void thermal_cdev_update(struct thermal_cooling_device *cdev) struct thermal_instance *instance; unsigned long target = 0; + mutex_lock(&cdev->lock); /* cooling device is updated*/ - if (cdev->updated) + if (cdev->updated) { + mutex_unlock(&cdev->lock); return; + } - mutex_lock(&cdev->lock); /* Make sure cdev enters the deepest cooling state */ list_for_each_entry(instance, &cdev->thermal_instances, cdev_node) { dev_dbg(&cdev->device, "zone%d->target=%lu\n", @@ -1637,9 +1641,9 @@ void thermal_cdev_update(struct thermal_cooling_device *cdev) if (instance->target > target) target = instance->target; } - mutex_unlock(&cdev->lock); cdev->ops->set_cur_state(cdev, target); cdev->updated = true; + mutex_unlock(&cdev->lock); trace_cdev_update(cdev, target); dev_dbg(&cdev->device, "set to state %lu\n", target); } diff --git a/drivers/thermal/thermal_hwmon.c b/drivers/thermal/thermal_hwmon.c index 06fd2ed9ef9d..c41c7742903a 100644 --- a/drivers/thermal/thermal_hwmon.c +++ b/drivers/thermal/thermal_hwmon.c @@ -232,6 +232,7 @@ int thermal_add_hwmon_sysfs(struct thermal_zone_device *tz) return result; } +EXPORT_SYMBOL_GPL(thermal_add_hwmon_sysfs); void thermal_remove_hwmon_sysfs(struct thermal_zone_device *tz) { @@ -270,3 +271,4 @@ void thermal_remove_hwmon_sysfs(struct thermal_zone_device *tz) hwmon_device_unregister(hwmon->device); kfree(hwmon); } +EXPORT_SYMBOL_GPL(thermal_remove_hwmon_sysfs); diff --git a/drivers/tty/serial/ar933x_uart.c b/drivers/tty/serial/ar933x_uart.c index 1519d2ca7705..73137f4aac20 100644 --- a/drivers/tty/serial/ar933x_uart.c +++ b/drivers/tty/serial/ar933x_uart.c @@ -54,7 +54,7 @@ struct ar933x_uart_port { static inline bool ar933x_uart_console_enabled(void) { - return config_enabled(CONFIG_SERIAL_AR933X_CONSOLE); + return IS_ENABLED(CONFIG_SERIAL_AR933X_CONSOLE); } static inline unsigned int ar933x_uart_read(struct ar933x_uart_port *up, @@ -636,7 +636,7 @@ static int ar933x_uart_probe(struct platform_device *pdev) int ret; np = pdev->dev.of_node; - if (config_enabled(CONFIG_OF) && np) { + if (IS_ENABLED(CONFIG_OF) && np) { id = of_alias_get_id(np, "serial"); if (id < 0) { dev_err(&pdev->dev, "unable to get alias id, err=%d\n", diff --git a/drivers/usb/serial/cp210x.c b/drivers/usb/serial/cp210x.c index 96a70789b4c2..4d6a5c672a3d 100644 --- a/drivers/usb/serial/cp210x.c +++ b/drivers/usb/serial/cp210x.c @@ -496,12 +496,10 @@ static int cp210x_write_reg_block(struct usb_serial_port *port, u8 req, void *dmabuf; int result; - dmabuf = kmalloc(bufsize, GFP_KERNEL); + dmabuf = kmemdup(buf, bufsize, GFP_KERNEL); if (!dmabuf) return -ENOMEM; - memcpy(dmabuf, buf, bufsize); - result = usb_control_msg(serial->dev, usb_sndctrlpipe(serial->dev, 0), req, REQTYPE_HOST_TO_INTERFACE, 0, port_priv->bInterfaceNumber, dmabuf, bufsize, diff --git a/drivers/usb/serial/generic.c b/drivers/usb/serial/generic.c index ae8c0365abd6..944de657a07a 100644 --- a/drivers/usb/serial/generic.c +++ b/drivers/usb/serial/generic.c @@ -350,6 +350,7 @@ void usb_serial_generic_read_bulk_callback(struct urb *urb) struct usb_serial_port *port = urb->context; unsigned char *data = urb->transfer_buffer; unsigned long flags; + int status = urb->status; int i; for (i = 0; i < ARRAY_SIZE(port->read_urbs); ++i) { @@ -360,22 +361,22 @@ void usb_serial_generic_read_bulk_callback(struct urb *urb) dev_dbg(&port->dev, "%s - urb %d, len %d\n", __func__, i, urb->actual_length); - switch (urb->status) { + switch (status) { case 0: break; case -ENOENT: case -ECONNRESET: case -ESHUTDOWN: dev_dbg(&port->dev, "%s - urb stopped: %d\n", - __func__, urb->status); + __func__, status); return; case -EPIPE: dev_err(&port->dev, "%s - urb stopped: %d\n", - __func__, urb->status); + __func__, status); return; default: dev_dbg(&port->dev, "%s - nonzero urb status: %d\n", - __func__, urb->status); + __func__, status); goto resubmit; } @@ -399,6 +400,7 @@ void usb_serial_generic_write_bulk_callback(struct urb *urb) { unsigned long flags; struct usb_serial_port *port = urb->context; + int status = urb->status; int i; for (i = 0; i < ARRAY_SIZE(port->write_urbs); ++i) { @@ -410,22 +412,22 @@ void usb_serial_generic_write_bulk_callback(struct urb *urb) set_bit(i, &port->write_urbs_free); spin_unlock_irqrestore(&port->lock, flags); - switch (urb->status) { + switch (status) { case 0: break; case -ENOENT: case -ECONNRESET: case -ESHUTDOWN: dev_dbg(&port->dev, "%s - urb stopped: %d\n", - __func__, urb->status); + __func__, status); return; case -EPIPE: dev_err_console(port, "%s - urb stopped: %d\n", - __func__, urb->status); + __func__, status); return; default: dev_err_console(port, "%s - nonzero urb status: %d\n", - __func__, urb->status); + __func__, status); goto resubmit; } diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index d96d423d00e6..8e07536c233a 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -273,6 +273,7 @@ static void option_instat_callback(struct urb *urb); #define TELIT_PRODUCT_LE922_USBCFG5 0x1045 #define TELIT_PRODUCT_LE920 0x1200 #define TELIT_PRODUCT_LE910 0x1201 +#define TELIT_PRODUCT_LE910_USBCFG4 0x1206 /* ZTE PRODUCTS */ #define ZTE_VENDOR_ID 0x19d2 @@ -1198,6 +1199,8 @@ static const struct usb_device_id option_ids[] = { .driver_info = (kernel_ulong_t)&telit_le922_blacklist_usbcfg0 }, { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_LE910), .driver_info = (kernel_ulong_t)&telit_le910_blacklist }, + { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_LE910_USBCFG4), + .driver_info = (kernel_ulong_t)&telit_le922_blacklist_usbcfg3 }, { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_LE920), .driver_info = (kernel_ulong_t)&telit_le920_blacklist }, { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, ZTE_PRODUCT_MF622, 0xff, 0xff, 0xff) }, /* ZTE WCDMA products */ diff --git a/drivers/usb/serial/ti_usb_3410_5052.c b/drivers/usb/serial/ti_usb_3410_5052.c index e7dbbef2af2a..07b4bf01061d 100644 --- a/drivers/usb/serial/ti_usb_3410_5052.c +++ b/drivers/usb/serial/ti_usb_3410_5052.c @@ -1,5 +1,4 @@ -/* vi: ts=8 sw=8 - * +/* * TI 3410/5052 USB Serial Driver * * Copyright (C) 2004 Texas Instruments @@ -35,9 +34,238 @@ #include <linux/usb.h> #include <linux/usb/serial.h> -#include "ti_usb_3410_5052.h" - -/* Defines */ +/* Configuration ids */ +#define TI_BOOT_CONFIG 1 +#define TI_ACTIVE_CONFIG 2 + +/* Vendor and product ids */ +#define TI_VENDOR_ID 0x0451 +#define IBM_VENDOR_ID 0x04b3 +#define TI_3410_PRODUCT_ID 0x3410 +#define IBM_4543_PRODUCT_ID 0x4543 +#define IBM_454B_PRODUCT_ID 0x454b +#define IBM_454C_PRODUCT_ID 0x454c +#define TI_3410_EZ430_ID 0xF430 /* TI ez430 development tool */ +#define TI_5052_BOOT_PRODUCT_ID 0x5052 /* no EEPROM, no firmware */ +#define TI_5152_BOOT_PRODUCT_ID 0x5152 /* no EEPROM, no firmware */ +#define TI_5052_EEPROM_PRODUCT_ID 0x505A /* EEPROM, no firmware */ +#define TI_5052_FIRMWARE_PRODUCT_ID 0x505F /* firmware is running */ +#define FRI2_PRODUCT_ID 0x5053 /* Fish River Island II */ + +/* Multi-Tech vendor and product ids */ +#define MTS_VENDOR_ID 0x06E0 +#define MTS_GSM_NO_FW_PRODUCT_ID 0xF108 +#define MTS_CDMA_NO_FW_PRODUCT_ID 0xF109 +#define MTS_CDMA_PRODUCT_ID 0xF110 +#define MTS_GSM_PRODUCT_ID 0xF111 +#define MTS_EDGE_PRODUCT_ID 0xF112 +#define MTS_MT9234MU_PRODUCT_ID 0xF114 +#define MTS_MT9234ZBA_PRODUCT_ID 0xF115 +#define MTS_MT9234ZBAOLD_PRODUCT_ID 0x0319 + +/* Abbott Diabetics vendor and product ids */ +#define ABBOTT_VENDOR_ID 0x1a61 +#define ABBOTT_STEREO_PLUG_ID 0x3410 +#define ABBOTT_PRODUCT_ID ABBOTT_STEREO_PLUG_ID +#define ABBOTT_STRIP_PORT_ID 0x3420 + +/* Honeywell vendor and product IDs */ +#define HONEYWELL_VENDOR_ID 0x10ac +#define HONEYWELL_HGI80_PRODUCT_ID 0x0102 /* Honeywell HGI80 */ + +/* Moxa UPORT 11x0 vendor and product IDs */ +#define MXU1_VENDOR_ID 0x110a +#define MXU1_1110_PRODUCT_ID 0x1110 +#define MXU1_1130_PRODUCT_ID 0x1130 +#define MXU1_1150_PRODUCT_ID 0x1150 +#define MXU1_1151_PRODUCT_ID 0x1151 +#define MXU1_1131_PRODUCT_ID 0x1131 + +/* Commands */ +#define TI_GET_VERSION 0x01 +#define TI_GET_PORT_STATUS 0x02 +#define TI_GET_PORT_DEV_INFO 0x03 +#define TI_GET_CONFIG 0x04 +#define TI_SET_CONFIG 0x05 +#define TI_OPEN_PORT 0x06 +#define TI_CLOSE_PORT 0x07 +#define TI_START_PORT 0x08 +#define TI_STOP_PORT 0x09 +#define TI_TEST_PORT 0x0A +#define TI_PURGE_PORT 0x0B +#define TI_RESET_EXT_DEVICE 0x0C +#define TI_WRITE_DATA 0x80 +#define TI_READ_DATA 0x81 +#define TI_REQ_TYPE_CLASS 0x82 + +/* Module identifiers */ +#define TI_I2C_PORT 0x01 +#define TI_IEEE1284_PORT 0x02 +#define TI_UART1_PORT 0x03 +#define TI_UART2_PORT 0x04 +#define TI_RAM_PORT 0x05 + +/* Modem status */ +#define TI_MSR_DELTA_CTS 0x01 +#define TI_MSR_DELTA_DSR 0x02 +#define TI_MSR_DELTA_RI 0x04 +#define TI_MSR_DELTA_CD 0x08 +#define TI_MSR_CTS 0x10 +#define TI_MSR_DSR 0x20 +#define TI_MSR_RI 0x40 +#define TI_MSR_CD 0x80 +#define TI_MSR_DELTA_MASK 0x0F +#define TI_MSR_MASK 0xF0 + +/* Line status */ +#define TI_LSR_OVERRUN_ERROR 0x01 +#define TI_LSR_PARITY_ERROR 0x02 +#define TI_LSR_FRAMING_ERROR 0x04 +#define TI_LSR_BREAK 0x08 +#define TI_LSR_ERROR 0x0F +#define TI_LSR_RX_FULL 0x10 +#define TI_LSR_TX_EMPTY 0x20 + +/* Line control */ +#define TI_LCR_BREAK 0x40 + +/* Modem control */ +#define TI_MCR_LOOP 0x04 +#define TI_MCR_DTR 0x10 +#define TI_MCR_RTS 0x20 + +/* Mask settings */ +#define TI_UART_ENABLE_RTS_IN 0x0001 +#define TI_UART_DISABLE_RTS 0x0002 +#define TI_UART_ENABLE_PARITY_CHECKING 0x0008 +#define TI_UART_ENABLE_DSR_OUT 0x0010 +#define TI_UART_ENABLE_CTS_OUT 0x0020 +#define TI_UART_ENABLE_X_OUT 0x0040 +#define TI_UART_ENABLE_XA_OUT 0x0080 +#define TI_UART_ENABLE_X_IN 0x0100 +#define TI_UART_ENABLE_DTR_IN 0x0800 +#define TI_UART_DISABLE_DTR 0x1000 +#define TI_UART_ENABLE_MS_INTS 0x2000 +#define TI_UART_ENABLE_AUTO_START_DMA 0x4000 + +/* Parity */ +#define TI_UART_NO_PARITY 0x00 +#define TI_UART_ODD_PARITY 0x01 +#define TI_UART_EVEN_PARITY 0x02 +#define TI_UART_MARK_PARITY 0x03 +#define TI_UART_SPACE_PARITY 0x04 + +/* Stop bits */ +#define TI_UART_1_STOP_BITS 0x00 +#define TI_UART_1_5_STOP_BITS 0x01 +#define TI_UART_2_STOP_BITS 0x02 + +/* Bits per character */ +#define TI_UART_5_DATA_BITS 0x00 +#define TI_UART_6_DATA_BITS 0x01 +#define TI_UART_7_DATA_BITS 0x02 +#define TI_UART_8_DATA_BITS 0x03 + +/* 232/485 modes */ +#define TI_UART_232 0x00 +#define TI_UART_485_RECEIVER_DISABLED 0x01 +#define TI_UART_485_RECEIVER_ENABLED 0x02 + +/* Pipe transfer mode and timeout */ +#define TI_PIPE_MODE_CONTINUOUS 0x01 +#define TI_PIPE_MODE_MASK 0x03 +#define TI_PIPE_TIMEOUT_MASK 0x7C +#define TI_PIPE_TIMEOUT_ENABLE 0x80 + +/* Config struct */ +struct ti_uart_config { + __u16 wBaudRate; + __u16 wFlags; + __u8 bDataBits; + __u8 bParity; + __u8 bStopBits; + char cXon; + char cXoff; + __u8 bUartMode; +} __packed; + +/* Get port status */ +struct ti_port_status { + __u8 bCmdCode; + __u8 bModuleId; + __u8 bErrorCode; + __u8 bMSR; + __u8 bLSR; +} __packed; + +/* Purge modes */ +#define TI_PURGE_OUTPUT 0x00 +#define TI_PURGE_INPUT 0x80 + +/* Read/Write data */ +#define TI_RW_DATA_ADDR_SFR 0x10 +#define TI_RW_DATA_ADDR_IDATA 0x20 +#define TI_RW_DATA_ADDR_XDATA 0x30 +#define TI_RW_DATA_ADDR_CODE 0x40 +#define TI_RW_DATA_ADDR_GPIO 0x50 +#define TI_RW_DATA_ADDR_I2C 0x60 +#define TI_RW_DATA_ADDR_FLASH 0x70 +#define TI_RW_DATA_ADDR_DSP 0x80 + +#define TI_RW_DATA_UNSPECIFIED 0x00 +#define TI_RW_DATA_BYTE 0x01 +#define TI_RW_DATA_WORD 0x02 +#define TI_RW_DATA_DOUBLE_WORD 0x04 + +struct ti_write_data_bytes { + __u8 bAddrType; + __u8 bDataType; + __u8 bDataCounter; + __be16 wBaseAddrHi; + __be16 wBaseAddrLo; + __u8 bData[0]; +} __packed; + +struct ti_read_data_request { + __u8 bAddrType; + __u8 bDataType; + __u8 bDataCounter; + __be16 wBaseAddrHi; + __be16 wBaseAddrLo; +} __packed; + +struct ti_read_data_bytes { + __u8 bCmdCode; + __u8 bModuleId; + __u8 bErrorCode; + __u8 bData[0]; +} __packed; + +/* Interrupt struct */ +struct ti_interrupt { + __u8 bICode; + __u8 bIInfo; +} __packed; + +/* Interrupt codes */ +#define TI_CODE_HARDWARE_ERROR 0xFF +#define TI_CODE_DATA_ERROR 0x03 +#define TI_CODE_MODEM_STATUS 0x04 + +/* Download firmware max packet size */ +#define TI_DOWNLOAD_MAX_PACKET_SIZE 64 + +/* Firmware image header */ +struct ti_firmware_header { + __le16 wLength; + __u8 bCheckSum; +} __packed; + +/* UART addresses */ +#define TI_UART1_BASE_ADDR 0xFFA0 /* UART 1 base address */ +#define TI_UART2_BASE_ADDR 0xFFB0 /* UART 2 base address */ +#define TI_UART_OFFSET_LCR 0x0002 /* UART MCR register offset */ +#define TI_UART_OFFSET_MCR 0x0004 /* UART MCR register offset */ #define TI_DRIVER_AUTHOR "Al Borchers <alborchers@steinerpoint.com>" #define TI_DRIVER_DESC "TI USB 3410/5052 Serial Driver" @@ -58,9 +286,6 @@ #define TI_EXTRA_VID_PID_COUNT 5 - -/* Structures */ - struct ti_port { int tp_is_open; __u8 tp_msr; @@ -84,9 +309,6 @@ struct ti_device { int td_urb_error; }; - -/* Function Declarations */ - static int ti_startup(struct usb_serial *serial); static void ti_release(struct usb_serial *serial); static int ti_port_probe(struct usb_serial_port *port); @@ -136,13 +358,8 @@ static int ti_write_byte(struct usb_serial_port *port, struct ti_device *tdev, static int ti_download_firmware(struct ti_device *tdev); - -/* Data */ - -/* module parameters */ static int closing_wait = TI_DEFAULT_CLOSING_WAIT; -/* supported devices */ static const struct usb_device_id ti_id_table_3410[] = { { USB_DEVICE(TI_VENDOR_ID, TI_3410_PRODUCT_ID) }, { USB_DEVICE(TI_VENDOR_ID, TI_3410_EZ430_ID) }, @@ -174,7 +391,7 @@ static const struct usb_device_id ti_id_table_5052[] = { { USB_DEVICE(TI_VENDOR_ID, TI_5152_BOOT_PRODUCT_ID) }, { USB_DEVICE(TI_VENDOR_ID, TI_5052_EEPROM_PRODUCT_ID) }, { USB_DEVICE(TI_VENDOR_ID, TI_5052_FIRMWARE_PRODUCT_ID) }, - { } /* terminator */ + { } }; static const struct usb_device_id ti_id_table_combined[] = { @@ -275,8 +492,6 @@ static struct usb_serial_driver * const serial_drivers[] = { &ti_1port_device, &ti_2port_device, NULL }; -/* Module */ - MODULE_AUTHOR(TI_DRIVER_AUTHOR); MODULE_DESCRIPTION(TI_DRIVER_DESC); MODULE_LICENSE("GPL"); @@ -302,8 +517,6 @@ MODULE_DEVICE_TABLE(usb, ti_id_table_combined); module_usb_serial_driver(serial_drivers, ti_id_table_combined); -/* Functions */ - static int ti_startup(struct usb_serial *serial) { struct ti_device *tdev; @@ -319,7 +532,6 @@ static int ti_startup(struct usb_serial *serial) dev->descriptor.bNumConfigurations, dev->actconfig->desc.bConfigurationValue); - /* create device structure */ tdev = kzalloc(sizeof(struct ti_device), GFP_KERNEL); if (!tdev) return -ENOMEM; @@ -435,7 +647,7 @@ static int ti_open(struct tty_struct *tty, struct usb_serial_port *port) struct urb *urb; int port_number; int status; - __u16 open_settings = (__u8)(TI_PIPE_MODE_CONTINOUS | + __u16 open_settings = (__u8)(TI_PIPE_MODE_CONTINUOUS | TI_PIPE_TIMEOUT_ENABLE | (TI_TRANSFER_TIMEOUT << 2)); @@ -954,6 +1166,15 @@ static void ti_break(struct tty_struct *tty, int break_state) dev_dbg(&port->dev, "%s - error setting break, %d\n", __func__, status); } +static int ti_get_port_from_code(unsigned char code) +{ + return (code >> 4) - 3; +} + +static int ti_get_func_from_code(unsigned char code) +{ + return code & 0x0f; +} static void ti_interrupt_callback(struct urb *urb) { @@ -995,8 +1216,8 @@ static void ti_interrupt_callback(struct urb *urb) goto exit; } - port_number = TI_GET_PORT_FROM_CODE(data[0]); - function = TI_GET_FUNC_FROM_CODE(data[0]); + port_number = ti_get_port_from_code(data[0]); + function = ti_get_func_from_code(data[0]); dev_dbg(dev, "%s - port_number %d, function %d, data 0x%02X\n", __func__, port_number, function, data[1]); diff --git a/drivers/usb/serial/ti_usb_3410_5052.h b/drivers/usb/serial/ti_usb_3410_5052.h deleted file mode 100644 index bbfd3a184600..000000000000 --- a/drivers/usb/serial/ti_usb_3410_5052.h +++ /dev/null @@ -1,259 +0,0 @@ -/* vi: ts=8 sw=8 - * - * TI 3410/5052 USB Serial Driver Header - * - * Copyright (C) 2004 Texas Instruments - * - * This driver is based on the Linux io_ti driver, which is - * Copyright (C) 2000-2002 Inside Out Networks - * Copyright (C) 2001-2002 Greg Kroah-Hartman - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * For questions or problems with this driver, contact Texas Instruments - * technical support, or Al Borchers <alborchers@steinerpoint.com>, or - * Peter Berger <pberger@brimson.com>. - */ - -#ifndef _TI_3410_5052_H_ -#define _TI_3410_5052_H_ - -/* Configuration ids */ -#define TI_BOOT_CONFIG 1 -#define TI_ACTIVE_CONFIG 2 - -/* Vendor and product ids */ -#define TI_VENDOR_ID 0x0451 -#define IBM_VENDOR_ID 0x04b3 -#define TI_3410_PRODUCT_ID 0x3410 -#define IBM_4543_PRODUCT_ID 0x4543 -#define IBM_454B_PRODUCT_ID 0x454b -#define IBM_454C_PRODUCT_ID 0x454c -#define TI_3410_EZ430_ID 0xF430 /* TI ez430 development tool */ -#define TI_5052_BOOT_PRODUCT_ID 0x5052 /* no EEPROM, no firmware */ -#define TI_5152_BOOT_PRODUCT_ID 0x5152 /* no EEPROM, no firmware */ -#define TI_5052_EEPROM_PRODUCT_ID 0x505A /* EEPROM, no firmware */ -#define TI_5052_FIRMWARE_PRODUCT_ID 0x505F /* firmware is running */ -#define FRI2_PRODUCT_ID 0x5053 /* Fish River Island II */ - -/* Multi-Tech vendor and product ids */ -#define MTS_VENDOR_ID 0x06E0 -#define MTS_GSM_NO_FW_PRODUCT_ID 0xF108 -#define MTS_CDMA_NO_FW_PRODUCT_ID 0xF109 -#define MTS_CDMA_PRODUCT_ID 0xF110 -#define MTS_GSM_PRODUCT_ID 0xF111 -#define MTS_EDGE_PRODUCT_ID 0xF112 -#define MTS_MT9234MU_PRODUCT_ID 0xF114 -#define MTS_MT9234ZBA_PRODUCT_ID 0xF115 -#define MTS_MT9234ZBAOLD_PRODUCT_ID 0x0319 - -/* Abbott Diabetics vendor and product ids */ -#define ABBOTT_VENDOR_ID 0x1a61 -#define ABBOTT_STEREO_PLUG_ID 0x3410 -#define ABBOTT_PRODUCT_ID ABBOTT_STEREO_PLUG_ID -#define ABBOTT_STRIP_PORT_ID 0x3420 - -/* Honeywell vendor and product IDs */ -#define HONEYWELL_VENDOR_ID 0x10ac -#define HONEYWELL_HGI80_PRODUCT_ID 0x0102 /* Honeywell HGI80 */ - -/* Moxa UPORT 11x0 vendor and product IDs */ -#define MXU1_VENDOR_ID 0x110a -#define MXU1_1110_PRODUCT_ID 0x1110 -#define MXU1_1130_PRODUCT_ID 0x1130 -#define MXU1_1131_PRODUCT_ID 0x1131 -#define MXU1_1150_PRODUCT_ID 0x1150 -#define MXU1_1151_PRODUCT_ID 0x1151 - -/* Commands */ -#define TI_GET_VERSION 0x01 -#define TI_GET_PORT_STATUS 0x02 -#define TI_GET_PORT_DEV_INFO 0x03 -#define TI_GET_CONFIG 0x04 -#define TI_SET_CONFIG 0x05 -#define TI_OPEN_PORT 0x06 -#define TI_CLOSE_PORT 0x07 -#define TI_START_PORT 0x08 -#define TI_STOP_PORT 0x09 -#define TI_TEST_PORT 0x0A -#define TI_PURGE_PORT 0x0B -#define TI_RESET_EXT_DEVICE 0x0C -#define TI_WRITE_DATA 0x80 -#define TI_READ_DATA 0x81 -#define TI_REQ_TYPE_CLASS 0x82 - -/* Module identifiers */ -#define TI_I2C_PORT 0x01 -#define TI_IEEE1284_PORT 0x02 -#define TI_UART1_PORT 0x03 -#define TI_UART2_PORT 0x04 -#define TI_RAM_PORT 0x05 - -/* Modem status */ -#define TI_MSR_DELTA_CTS 0x01 -#define TI_MSR_DELTA_DSR 0x02 -#define TI_MSR_DELTA_RI 0x04 -#define TI_MSR_DELTA_CD 0x08 -#define TI_MSR_CTS 0x10 -#define TI_MSR_DSR 0x20 -#define TI_MSR_RI 0x40 -#define TI_MSR_CD 0x80 -#define TI_MSR_DELTA_MASK 0x0F -#define TI_MSR_MASK 0xF0 - -/* Line status */ -#define TI_LSR_OVERRUN_ERROR 0x01 -#define TI_LSR_PARITY_ERROR 0x02 -#define TI_LSR_FRAMING_ERROR 0x04 -#define TI_LSR_BREAK 0x08 -#define TI_LSR_ERROR 0x0F -#define TI_LSR_RX_FULL 0x10 -#define TI_LSR_TX_EMPTY 0x20 - -/* Line control */ -#define TI_LCR_BREAK 0x40 - -/* Modem control */ -#define TI_MCR_LOOP 0x04 -#define TI_MCR_DTR 0x10 -#define TI_MCR_RTS 0x20 - -/* Mask settings */ -#define TI_UART_ENABLE_RTS_IN 0x0001 -#define TI_UART_DISABLE_RTS 0x0002 -#define TI_UART_ENABLE_PARITY_CHECKING 0x0008 -#define TI_UART_ENABLE_DSR_OUT 0x0010 -#define TI_UART_ENABLE_CTS_OUT 0x0020 -#define TI_UART_ENABLE_X_OUT 0x0040 -#define TI_UART_ENABLE_XA_OUT 0x0080 -#define TI_UART_ENABLE_X_IN 0x0100 -#define TI_UART_ENABLE_DTR_IN 0x0800 -#define TI_UART_DISABLE_DTR 0x1000 -#define TI_UART_ENABLE_MS_INTS 0x2000 -#define TI_UART_ENABLE_AUTO_START_DMA 0x4000 - -/* Parity */ -#define TI_UART_NO_PARITY 0x00 -#define TI_UART_ODD_PARITY 0x01 -#define TI_UART_EVEN_PARITY 0x02 -#define TI_UART_MARK_PARITY 0x03 -#define TI_UART_SPACE_PARITY 0x04 - -/* Stop bits */ -#define TI_UART_1_STOP_BITS 0x00 -#define TI_UART_1_5_STOP_BITS 0x01 -#define TI_UART_2_STOP_BITS 0x02 - -/* Bits per character */ -#define TI_UART_5_DATA_BITS 0x00 -#define TI_UART_6_DATA_BITS 0x01 -#define TI_UART_7_DATA_BITS 0x02 -#define TI_UART_8_DATA_BITS 0x03 - -/* 232/485 modes */ -#define TI_UART_232 0x00 -#define TI_UART_485_RECEIVER_DISABLED 0x01 -#define TI_UART_485_RECEIVER_ENABLED 0x02 - -/* Pipe transfer mode and timeout */ -#define TI_PIPE_MODE_CONTINOUS 0x01 -#define TI_PIPE_MODE_MASK 0x03 -#define TI_PIPE_TIMEOUT_MASK 0x7C -#define TI_PIPE_TIMEOUT_ENABLE 0x80 - -/* Config struct */ -struct ti_uart_config { - __u16 wBaudRate; - __u16 wFlags; - __u8 bDataBits; - __u8 bParity; - __u8 bStopBits; - char cXon; - char cXoff; - __u8 bUartMode; -} __attribute__((packed)); - -/* Get port status */ -struct ti_port_status { - __u8 bCmdCode; - __u8 bModuleId; - __u8 bErrorCode; - __u8 bMSR; - __u8 bLSR; -} __attribute__((packed)); - -/* Purge modes */ -#define TI_PURGE_OUTPUT 0x00 -#define TI_PURGE_INPUT 0x80 - -/* Read/Write data */ -#define TI_RW_DATA_ADDR_SFR 0x10 -#define TI_RW_DATA_ADDR_IDATA 0x20 -#define TI_RW_DATA_ADDR_XDATA 0x30 -#define TI_RW_DATA_ADDR_CODE 0x40 -#define TI_RW_DATA_ADDR_GPIO 0x50 -#define TI_RW_DATA_ADDR_I2C 0x60 -#define TI_RW_DATA_ADDR_FLASH 0x70 -#define TI_RW_DATA_ADDR_DSP 0x80 - -#define TI_RW_DATA_UNSPECIFIED 0x00 -#define TI_RW_DATA_BYTE 0x01 -#define TI_RW_DATA_WORD 0x02 -#define TI_RW_DATA_DOUBLE_WORD 0x04 - -struct ti_write_data_bytes { - __u8 bAddrType; - __u8 bDataType; - __u8 bDataCounter; - __be16 wBaseAddrHi; - __be16 wBaseAddrLo; - __u8 bData[0]; -} __attribute__((packed)); - -struct ti_read_data_request { - __u8 bAddrType; - __u8 bDataType; - __u8 bDataCounter; - __be16 wBaseAddrHi; - __be16 wBaseAddrLo; -} __attribute__((packed)); - -struct ti_read_data_bytes { - __u8 bCmdCode; - __u8 bModuleId; - __u8 bErrorCode; - __u8 bData[0]; -} __attribute__((packed)); - -/* Interrupt struct */ -struct ti_interrupt { - __u8 bICode; - __u8 bIInfo; -} __attribute__((packed)); - -/* Interrupt codes */ -#define TI_GET_PORT_FROM_CODE(c) (((c) >> 4) - 3) -#define TI_GET_FUNC_FROM_CODE(c) ((c) & 0x0f) -#define TI_CODE_HARDWARE_ERROR 0xFF -#define TI_CODE_DATA_ERROR 0x03 -#define TI_CODE_MODEM_STATUS 0x04 - -/* Download firmware max packet size */ -#define TI_DOWNLOAD_MAX_PACKET_SIZE 64 - -/* Firmware image header */ -struct ti_firmware_header { - __le16 wLength; - __u8 bCheckSum; -} __attribute__((packed)); - -/* UART addresses */ -#define TI_UART1_BASE_ADDR 0xFFA0 /* UART 1 base address */ -#define TI_UART2_BASE_ADDR 0xFFB0 /* UART 2 base address */ -#define TI_UART_OFFSET_LCR 0x0002 /* UART MCR register offset */ -#define TI_UART_OFFSET_MCR 0x0004 /* UART MCR register offset */ - -#endif /* _TI_3410_5052_H_ */ diff --git a/drivers/vfio/pci/vfio_pci_intrs.c b/drivers/vfio/pci/vfio_pci_intrs.c index 15ecfc9c5f6c..152b43822ef1 100644 --- a/drivers/vfio/pci/vfio_pci_intrs.c +++ b/drivers/vfio/pci/vfio_pci_intrs.c @@ -564,67 +564,80 @@ static int vfio_pci_set_msi_trigger(struct vfio_pci_device *vdev, } static int vfio_pci_set_ctx_trigger_single(struct eventfd_ctx **ctx, - uint32_t flags, void *data) + unsigned int count, uint32_t flags, + void *data) { - int32_t fd = *(int32_t *)data; - - if (!(flags & VFIO_IRQ_SET_DATA_TYPE_MASK)) - return -EINVAL; - /* DATA_NONE/DATA_BOOL enables loopback testing */ if (flags & VFIO_IRQ_SET_DATA_NONE) { - if (*ctx) - eventfd_signal(*ctx, 1); - return 0; + if (*ctx) { + if (count) { + eventfd_signal(*ctx, 1); + } else { + eventfd_ctx_put(*ctx); + *ctx = NULL; + } + return 0; + } } else if (flags & VFIO_IRQ_SET_DATA_BOOL) { - uint8_t trigger = *(uint8_t *)data; + uint8_t trigger; + + if (!count) + return -EINVAL; + + trigger = *(uint8_t *)data; if (trigger && *ctx) eventfd_signal(*ctx, 1); - return 0; - } - /* Handle SET_DATA_EVENTFD */ - if (fd == -1) { - if (*ctx) - eventfd_ctx_put(*ctx); - *ctx = NULL; return 0; - } else if (fd >= 0) { - struct eventfd_ctx *efdctx; - efdctx = eventfd_ctx_fdget(fd); - if (IS_ERR(efdctx)) - return PTR_ERR(efdctx); - if (*ctx) - eventfd_ctx_put(*ctx); - *ctx = efdctx; + } else if (flags & VFIO_IRQ_SET_DATA_EVENTFD) { + int32_t fd; + + if (!count) + return -EINVAL; + + fd = *(int32_t *)data; + if (fd == -1) { + if (*ctx) + eventfd_ctx_put(*ctx); + *ctx = NULL; + } else if (fd >= 0) { + struct eventfd_ctx *efdctx; + + efdctx = eventfd_ctx_fdget(fd); + if (IS_ERR(efdctx)) + return PTR_ERR(efdctx); + + if (*ctx) + eventfd_ctx_put(*ctx); + + *ctx = efdctx; + } return 0; - } else - return -EINVAL; + } + + return -EINVAL; } static int vfio_pci_set_err_trigger(struct vfio_pci_device *vdev, unsigned index, unsigned start, unsigned count, uint32_t flags, void *data) { - if (index != VFIO_PCI_ERR_IRQ_INDEX) + if (index != VFIO_PCI_ERR_IRQ_INDEX || start != 0 || count > 1) return -EINVAL; - /* - * We should sanitize start & count, but that wasn't caught - * originally, so this IRQ index must forever ignore them :-( - */ - - return vfio_pci_set_ctx_trigger_single(&vdev->err_trigger, flags, data); + return vfio_pci_set_ctx_trigger_single(&vdev->err_trigger, + count, flags, data); } static int vfio_pci_set_req_trigger(struct vfio_pci_device *vdev, unsigned index, unsigned start, unsigned count, uint32_t flags, void *data) { - if (index != VFIO_PCI_REQ_IRQ_INDEX || start != 0 || count != 1) + if (index != VFIO_PCI_REQ_IRQ_INDEX || start != 0 || count > 1) return -EINVAL; - return vfio_pci_set_ctx_trigger_single(&vdev->req_trigger, flags, data); + return vfio_pci_set_ctx_trigger_single(&vdev->req_trigger, + count, flags, data); } int vfio_pci_set_irqs_ioctl(struct vfio_pci_device *vdev, uint32_t flags, diff --git a/drivers/vhost/Kconfig b/drivers/vhost/Kconfig index 533eaf04f12f..40764ecad9ce 100644 --- a/drivers/vhost/Kconfig +++ b/drivers/vhost/Kconfig @@ -2,7 +2,6 @@ config VHOST_NET tristate "Host kernel accelerator for virtio net" depends on NET && EVENTFD && (TUN || !TUN) && (MACVTAP || !MACVTAP) select VHOST - select VHOST_RING ---help--- This kernel module can be loaded in host kernel to accelerate guest networking with virtio_net. Not to be confused with virtio_net @@ -15,17 +14,24 @@ config VHOST_SCSI tristate "VHOST_SCSI TCM fabric driver" depends on TARGET_CORE && EVENTFD && m select VHOST - select VHOST_RING default n ---help--- Say M here to enable the vhost_scsi TCM fabric module for use with virtio-scsi guests -config VHOST_RING - tristate +config VHOST_VSOCK + tristate "vhost virtio-vsock driver" + depends on VSOCKETS && EVENTFD + select VIRTIO_VSOCKETS_COMMON + select VHOST + default n ---help--- - This option is selected by any driver which needs to access - the host side of a virtio ring. + This kernel module can be loaded in the host kernel to provide AF_VSOCK + sockets for communicating with guests. The guests must have the + virtio_transport.ko driver loaded to use the virtio-vsock device. + + To compile this driver as a module, choose M here: the module will be called + vhost_vsock. config VHOST tristate diff --git a/drivers/vhost/Kconfig.vringh b/drivers/vhost/Kconfig.vringh new file mode 100644 index 000000000000..6a4490c09d7f --- /dev/null +++ b/drivers/vhost/Kconfig.vringh @@ -0,0 +1,5 @@ +config VHOST_RING + tristate + ---help--- + This option is selected by any driver which needs to access + the host side of a virtio ring. diff --git a/drivers/vhost/Makefile b/drivers/vhost/Makefile index e0441c34db1c..6b012b986b57 100644 --- a/drivers/vhost/Makefile +++ b/drivers/vhost/Makefile @@ -4,5 +4,9 @@ vhost_net-y := net.o obj-$(CONFIG_VHOST_SCSI) += vhost_scsi.o vhost_scsi-y := scsi.o +obj-$(CONFIG_VHOST_VSOCK) += vhost_vsock.o +vhost_vsock-y := vsock.o + obj-$(CONFIG_VHOST_RING) += vringh.o + obj-$(CONFIG_VHOST) += vhost.o diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index e032ca397371..5dc128a8da83 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -61,7 +61,8 @@ MODULE_PARM_DESC(experimental_zcopytx, "Enable Zero Copy TX;" enum { VHOST_NET_FEATURES = VHOST_FEATURES | (1ULL << VHOST_NET_F_VIRTIO_NET_HDR) | - (1ULL << VIRTIO_NET_F_MRG_RXBUF) + (1ULL << VIRTIO_NET_F_MRG_RXBUF) | + (1ULL << VIRTIO_F_IOMMU_PLATFORM) }; enum { @@ -334,7 +335,7 @@ static int vhost_net_tx_get_vq_desc(struct vhost_net *net, { unsigned long uninitialized_var(endtime); int r = vhost_get_vq_desc(vq, vq->iov, ARRAY_SIZE(vq->iov), - out_num, in_num, NULL, NULL); + out_num, in_num, NULL, NULL); if (r == vq->num && vq->busyloop_timeout) { preempt_disable(); @@ -344,7 +345,7 @@ static int vhost_net_tx_get_vq_desc(struct vhost_net *net, cpu_relax_lowlatency(); preempt_enable(); r = vhost_get_vq_desc(vq, vq->iov, ARRAY_SIZE(vq->iov), - out_num, in_num, NULL, NULL); + out_num, in_num, NULL, NULL); } return r; @@ -377,6 +378,9 @@ static void handle_tx(struct vhost_net *net) if (!sock) goto out; + if (!vq_iotlb_prefetch(vq)) + goto out; + vhost_disable_notify(&net->dev, vq); hdr_size = nvq->vhost_hlen; @@ -652,6 +656,10 @@ static void handle_rx(struct vhost_net *net) sock = vq->private_data; if (!sock) goto out; + + if (!vq_iotlb_prefetch(vq)) + goto out; + vhost_disable_notify(&net->dev, vq); vhost_net_disable_vq(net, vq); @@ -1052,20 +1060,20 @@ static long vhost_net_reset_owner(struct vhost_net *n) struct socket *tx_sock = NULL; struct socket *rx_sock = NULL; long err; - struct vhost_memory *memory; + struct vhost_umem *umem; mutex_lock(&n->dev.mutex); err = vhost_dev_check_owner(&n->dev); if (err) goto done; - memory = vhost_dev_reset_owner_prepare(); - if (!memory) { + umem = vhost_dev_reset_owner_prepare(); + if (!umem) { err = -ENOMEM; goto done; } vhost_net_stop(n, &tx_sock, &rx_sock); vhost_net_flush(n); - vhost_dev_reset_owner(&n->dev, memory); + vhost_dev_reset_owner(&n->dev, umem); vhost_net_vq_reset(n); done: mutex_unlock(&n->dev.mutex); @@ -1096,10 +1104,14 @@ static int vhost_net_set_features(struct vhost_net *n, u64 features) } mutex_lock(&n->dev.mutex); if ((features & (1 << VHOST_F_LOG_ALL)) && - !vhost_log_access_ok(&n->dev)) { - mutex_unlock(&n->dev.mutex); - return -EFAULT; + !vhost_log_access_ok(&n->dev)) + goto out_unlock; + + if ((features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))) { + if (vhost_init_device_iotlb(&n->dev, true)) + goto out_unlock; } + for (i = 0; i < VHOST_NET_VQ_MAX; ++i) { mutex_lock(&n->vqs[i].vq.mutex); n->vqs[i].vq.acked_features = features; @@ -1109,6 +1121,10 @@ static int vhost_net_set_features(struct vhost_net *n, u64 features) } mutex_unlock(&n->dev.mutex); return 0; + +out_unlock: + mutex_unlock(&n->dev.mutex); + return -EFAULT; } static long vhost_net_set_owner(struct vhost_net *n) @@ -1182,9 +1198,40 @@ static long vhost_net_compat_ioctl(struct file *f, unsigned int ioctl, } #endif +static ssize_t vhost_net_chr_read_iter(struct kiocb *iocb, struct iov_iter *to) +{ + struct file *file = iocb->ki_filp; + struct vhost_net *n = file->private_data; + struct vhost_dev *dev = &n->dev; + int noblock = file->f_flags & O_NONBLOCK; + + return vhost_chr_read_iter(dev, to, noblock); +} + +static ssize_t vhost_net_chr_write_iter(struct kiocb *iocb, + struct iov_iter *from) +{ + struct file *file = iocb->ki_filp; + struct vhost_net *n = file->private_data; + struct vhost_dev *dev = &n->dev; + + return vhost_chr_write_iter(dev, from); +} + +static unsigned int vhost_net_chr_poll(struct file *file, poll_table *wait) +{ + struct vhost_net *n = file->private_data; + struct vhost_dev *dev = &n->dev; + + return vhost_chr_poll(file, dev, wait); +} + static const struct file_operations vhost_net_fops = { .owner = THIS_MODULE, .release = vhost_net_release, + .read_iter = vhost_net_chr_read_iter, + .write_iter = vhost_net_chr_write_iter, + .poll = vhost_net_chr_poll, .unlocked_ioctl = vhost_net_ioctl, #ifdef CONFIG_COMPAT .compat_ioctl = vhost_net_compat_ioctl, diff --git a/drivers/vhost/test.c b/drivers/vhost/test.c index 388eec4e1a90..97fb2f8fa930 100644 --- a/drivers/vhost/test.c +++ b/drivers/vhost/test.c @@ -220,20 +220,20 @@ static long vhost_test_reset_owner(struct vhost_test *n) { void *priv = NULL; long err; - struct vhost_memory *memory; + struct vhost_umem *umem; mutex_lock(&n->dev.mutex); err = vhost_dev_check_owner(&n->dev); if (err) goto done; - memory = vhost_dev_reset_owner_prepare(); - if (!memory) { + umem = vhost_dev_reset_owner_prepare(); + if (!umem) { err = -ENOMEM; goto done; } vhost_test_stop(n, &priv); vhost_test_flush(n); - vhost_dev_reset_owner(&n->dev, memory); + vhost_dev_reset_owner(&n->dev, umem); done: mutex_unlock(&n->dev.mutex); return err; diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index 669fef1e2bb6..c6f2d89c0e97 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -27,6 +27,7 @@ #include <linux/cgroup.h> #include <linux/module.h> #include <linux/sort.h> +#include <linux/interval_tree_generic.h> #include "vhost.h" @@ -34,6 +35,10 @@ static ushort max_mem_regions = 64; module_param(max_mem_regions, ushort, 0444); MODULE_PARM_DESC(max_mem_regions, "Maximum number of memory regions in memory map. (default: 64)"); +static int max_iotlb_entries = 2048; +module_param(max_iotlb_entries, int, 0444); +MODULE_PARM_DESC(max_iotlb_entries, + "Maximum number of iotlb entries. (default: 2048)"); enum { VHOST_MEMORY_F_LOG = 0x1, @@ -42,6 +47,10 @@ enum { #define vhost_used_event(vq) ((__virtio16 __user *)&vq->avail->ring[vq->num]) #define vhost_avail_event(vq) ((__virtio16 __user *)&vq->used->ring[vq->num]) +INTERVAL_TREE_DEFINE(struct vhost_umem_node, + rb, __u64, __subtree_last, + START, LAST, , vhost_umem_interval_tree); + #ifdef CONFIG_VHOST_CROSS_ENDIAN_LEGACY static void vhost_disable_cross_endian(struct vhost_virtqueue *vq) { @@ -131,6 +140,19 @@ static void vhost_reset_is_le(struct vhost_virtqueue *vq) vq->is_le = virtio_legacy_is_little_endian(); } +struct vhost_flush_struct { + struct vhost_work work; + struct completion wait_event; +}; + +static void vhost_flush_work(struct vhost_work *work) +{ + struct vhost_flush_struct *s; + + s = container_of(work, struct vhost_flush_struct, work); + complete(&s->wait_event); +} + static void vhost_poll_func(struct file *file, wait_queue_head_t *wqh, poll_table *pt) { @@ -155,11 +177,9 @@ static int vhost_poll_wakeup(wait_queue_t *wait, unsigned mode, int sync, void vhost_work_init(struct vhost_work *work, vhost_work_fn_t fn) { - INIT_LIST_HEAD(&work->node); + clear_bit(VHOST_WORK_QUEUED, &work->flags); work->fn = fn; init_waitqueue_head(&work->done); - work->flushing = 0; - work->queue_seq = work->done_seq = 0; } EXPORT_SYMBOL_GPL(vhost_work_init); @@ -211,31 +231,17 @@ void vhost_poll_stop(struct vhost_poll *poll) } EXPORT_SYMBOL_GPL(vhost_poll_stop); -static bool vhost_work_seq_done(struct vhost_dev *dev, struct vhost_work *work, - unsigned seq) -{ - int left; - - spin_lock_irq(&dev->work_lock); - left = seq - work->done_seq; - spin_unlock_irq(&dev->work_lock); - return left <= 0; -} - void vhost_work_flush(struct vhost_dev *dev, struct vhost_work *work) { - unsigned seq; - int flushing; + struct vhost_flush_struct flush; - spin_lock_irq(&dev->work_lock); - seq = work->queue_seq; - work->flushing++; - spin_unlock_irq(&dev->work_lock); - wait_event(work->done, vhost_work_seq_done(dev, work, seq)); - spin_lock_irq(&dev->work_lock); - flushing = --work->flushing; - spin_unlock_irq(&dev->work_lock); - BUG_ON(flushing < 0); + if (dev->worker) { + init_completion(&flush.wait_event); + vhost_work_init(&flush.work, vhost_flush_work); + + vhost_work_queue(dev, &flush.work); + wait_for_completion(&flush.wait_event); + } } EXPORT_SYMBOL_GPL(vhost_work_flush); @@ -249,16 +255,16 @@ EXPORT_SYMBOL_GPL(vhost_poll_flush); void vhost_work_queue(struct vhost_dev *dev, struct vhost_work *work) { - unsigned long flags; + if (!dev->worker) + return; - spin_lock_irqsave(&dev->work_lock, flags); - if (list_empty(&work->node)) { - list_add_tail(&work->node, &dev->work_list); - work->queue_seq++; - spin_unlock_irqrestore(&dev->work_lock, flags); + if (!test_and_set_bit(VHOST_WORK_QUEUED, &work->flags)) { + /* We can only add the work to the list after we're + * sure it was not in the list. + */ + smp_mb(); + llist_add(&work->node, &dev->work_list); wake_up_process(dev->worker); - } else { - spin_unlock_irqrestore(&dev->work_lock, flags); } } EXPORT_SYMBOL_GPL(vhost_work_queue); @@ -266,7 +272,7 @@ EXPORT_SYMBOL_GPL(vhost_work_queue); /* A lockless hint for busy polling code to exit the loop */ bool vhost_has_work(struct vhost_dev *dev) { - return !list_empty(&dev->work_list); + return !llist_empty(&dev->work_list); } EXPORT_SYMBOL_GPL(vhost_has_work); @@ -300,17 +306,18 @@ static void vhost_vq_reset(struct vhost_dev *dev, vq->call_ctx = NULL; vq->call = NULL; vq->log_ctx = NULL; - vq->memory = NULL; vhost_reset_is_le(vq); vhost_disable_cross_endian(vq); vq->busyloop_timeout = 0; + vq->umem = NULL; + vq->iotlb = NULL; } static int vhost_worker(void *data) { struct vhost_dev *dev = data; - struct vhost_work *work = NULL; - unsigned uninitialized_var(seq); + struct vhost_work *work, *work_next; + struct llist_node *node; mm_segment_t oldfs = get_fs(); set_fs(USER_DS); @@ -320,35 +327,25 @@ static int vhost_worker(void *data) /* mb paired w/ kthread_stop */ set_current_state(TASK_INTERRUPTIBLE); - spin_lock_irq(&dev->work_lock); - if (work) { - work->done_seq = seq; - if (work->flushing) - wake_up_all(&work->done); - } - if (kthread_should_stop()) { - spin_unlock_irq(&dev->work_lock); __set_current_state(TASK_RUNNING); break; } - if (!list_empty(&dev->work_list)) { - work = list_first_entry(&dev->work_list, - struct vhost_work, node); - list_del_init(&work->node); - seq = work->queue_seq; - } else - work = NULL; - spin_unlock_irq(&dev->work_lock); - if (work) { + node = llist_del_all(&dev->work_list); + if (!node) + schedule(); + + node = llist_reverse_order(node); + /* make sure flag is seen after deletion */ + smp_wmb(); + llist_for_each_entry_safe(work, work_next, node, node) { + clear_bit(VHOST_WORK_QUEUED, &work->flags); __set_current_state(TASK_RUNNING); work->fn(work); if (need_resched()) schedule(); - } else - schedule(); - + } } unuse_mm(dev->mm); set_fs(oldfs); @@ -407,11 +404,16 @@ void vhost_dev_init(struct vhost_dev *dev, mutex_init(&dev->mutex); dev->log_ctx = NULL; dev->log_file = NULL; - dev->memory = NULL; + dev->umem = NULL; + dev->iotlb = NULL; dev->mm = NULL; - spin_lock_init(&dev->work_lock); - INIT_LIST_HEAD(&dev->work_list); dev->worker = NULL; + init_llist_head(&dev->work_list); + init_waitqueue_head(&dev->wait); + INIT_LIST_HEAD(&dev->read_list); + INIT_LIST_HEAD(&dev->pending_list); + spin_lock_init(&dev->iotlb_lock); + for (i = 0; i < dev->nvqs; ++i) { vq = dev->vqs[i]; @@ -512,27 +514,36 @@ err_mm: } EXPORT_SYMBOL_GPL(vhost_dev_set_owner); -struct vhost_memory *vhost_dev_reset_owner_prepare(void) +static void *vhost_kvzalloc(unsigned long size) { - return kmalloc(offsetof(struct vhost_memory, regions), GFP_KERNEL); + void *n = kzalloc(size, GFP_KERNEL | __GFP_NOWARN | __GFP_REPEAT); + + if (!n) + n = vzalloc(size); + return n; +} + +struct vhost_umem *vhost_dev_reset_owner_prepare(void) +{ + return vhost_kvzalloc(sizeof(struct vhost_umem)); } EXPORT_SYMBOL_GPL(vhost_dev_reset_owner_prepare); /* Caller should have device mutex */ -void vhost_dev_reset_owner(struct vhost_dev *dev, struct vhost_memory *memory) +void vhost_dev_reset_owner(struct vhost_dev *dev, struct vhost_umem *umem) { int i; vhost_dev_cleanup(dev, true); /* Restore memory to default empty mapping. */ - memory->nregions = 0; - dev->memory = memory; + INIT_LIST_HEAD(&umem->umem_list); + dev->umem = umem; /* We don't need VQ locks below since vhost_dev_cleanup makes sure * VQs aren't running. */ for (i = 0; i < dev->nvqs; ++i) - dev->vqs[i]->memory = memory; + dev->vqs[i]->umem = umem; } EXPORT_SYMBOL_GPL(vhost_dev_reset_owner); @@ -549,6 +560,47 @@ void vhost_dev_stop(struct vhost_dev *dev) } EXPORT_SYMBOL_GPL(vhost_dev_stop); +static void vhost_umem_free(struct vhost_umem *umem, + struct vhost_umem_node *node) +{ + vhost_umem_interval_tree_remove(node, &umem->umem_tree); + list_del(&node->link); + kfree(node); + umem->numem--; +} + +static void vhost_umem_clean(struct vhost_umem *umem) +{ + struct vhost_umem_node *node, *tmp; + + if (!umem) + return; + + list_for_each_entry_safe(node, tmp, &umem->umem_list, link) + vhost_umem_free(umem, node); + + kvfree(umem); +} + +static void vhost_clear_msg(struct vhost_dev *dev) +{ + struct vhost_msg_node *node, *n; + + spin_lock(&dev->iotlb_lock); + + list_for_each_entry_safe(node, n, &dev->read_list, node) { + list_del(&node->node); + kfree(node); + } + + list_for_each_entry_safe(node, n, &dev->pending_list, node) { + list_del(&node->node); + kfree(node); + } + + spin_unlock(&dev->iotlb_lock); +} + /* Caller should have device mutex if and only if locked is set */ void vhost_dev_cleanup(struct vhost_dev *dev, bool locked) { @@ -575,9 +627,13 @@ void vhost_dev_cleanup(struct vhost_dev *dev, bool locked) fput(dev->log_file); dev->log_file = NULL; /* No one will access memory at this point */ - kvfree(dev->memory); - dev->memory = NULL; - WARN_ON(!list_empty(&dev->work_list)); + vhost_umem_clean(dev->umem); + dev->umem = NULL; + vhost_umem_clean(dev->iotlb); + dev->iotlb = NULL; + vhost_clear_msg(dev); + wake_up_interruptible_poll(&dev->wait, POLLIN | POLLRDNORM); + WARN_ON(!llist_empty(&dev->work_list)); if (dev->worker) { kthread_stop(dev->worker); dev->worker = NULL; @@ -601,26 +657,34 @@ static int log_access_ok(void __user *log_base, u64 addr, unsigned long sz) (sz + VHOST_PAGE_SIZE * 8 - 1) / VHOST_PAGE_SIZE / 8); } +static bool vhost_overflow(u64 uaddr, u64 size) +{ + /* Make sure 64 bit math will not overflow. */ + return uaddr > ULONG_MAX || size > ULONG_MAX || uaddr > ULONG_MAX - size; +} + /* Caller should have vq mutex and device mutex. */ -static int vq_memory_access_ok(void __user *log_base, struct vhost_memory *mem, +static int vq_memory_access_ok(void __user *log_base, struct vhost_umem *umem, int log_all) { - int i; + struct vhost_umem_node *node; - if (!mem) + if (!umem) return 0; - for (i = 0; i < mem->nregions; ++i) { - struct vhost_memory_region *m = mem->regions + i; - unsigned long a = m->userspace_addr; - if (m->memory_size > ULONG_MAX) + list_for_each_entry(node, &umem->umem_list, link) { + unsigned long a = node->userspace_addr; + + if (vhost_overflow(node->userspace_addr, node->size)) return 0; - else if (!access_ok(VERIFY_WRITE, (void __user *)a, - m->memory_size)) + + + if (!access_ok(VERIFY_WRITE, (void __user *)a, + node->size)) return 0; else if (log_all && !log_access_ok(log_base, - m->guest_phys_addr, - m->memory_size)) + node->start, + node->size)) return 0; } return 1; @@ -628,7 +692,7 @@ static int vq_memory_access_ok(void __user *log_base, struct vhost_memory *mem, /* Can we switch to this memory table? */ /* Caller should have device mutex but not vq mutex */ -static int memory_access_ok(struct vhost_dev *d, struct vhost_memory *mem, +static int memory_access_ok(struct vhost_dev *d, struct vhost_umem *umem, int log_all) { int i; @@ -641,7 +705,8 @@ static int memory_access_ok(struct vhost_dev *d, struct vhost_memory *mem, log = log_all || vhost_has_feature(d->vqs[i], VHOST_F_LOG_ALL); /* If ring is inactive, will check when it's enabled. */ if (d->vqs[i]->private_data) - ok = vq_memory_access_ok(d->vqs[i]->log_base, mem, log); + ok = vq_memory_access_ok(d->vqs[i]->log_base, + umem, log); else ok = 1; mutex_unlock(&d->vqs[i]->mutex); @@ -651,12 +716,385 @@ static int memory_access_ok(struct vhost_dev *d, struct vhost_memory *mem, return 1; } +static int translate_desc(struct vhost_virtqueue *vq, u64 addr, u32 len, + struct iovec iov[], int iov_size, int access); + +static int vhost_copy_to_user(struct vhost_virtqueue *vq, void *to, + const void *from, unsigned size) +{ + int ret; + + if (!vq->iotlb) + return __copy_to_user(to, from, size); + else { + /* This function should be called after iotlb + * prefetch, which means we're sure that all vq + * could be access through iotlb. So -EAGAIN should + * not happen in this case. + */ + /* TODO: more fast path */ + struct iov_iter t; + ret = translate_desc(vq, (u64)(uintptr_t)to, size, vq->iotlb_iov, + ARRAY_SIZE(vq->iotlb_iov), + VHOST_ACCESS_WO); + if (ret < 0) + goto out; + iov_iter_init(&t, WRITE, vq->iotlb_iov, ret, size); + ret = copy_to_iter(from, size, &t); + if (ret == size) + ret = 0; + } +out: + return ret; +} + +static int vhost_copy_from_user(struct vhost_virtqueue *vq, void *to, + void *from, unsigned size) +{ + int ret; + + if (!vq->iotlb) + return __copy_from_user(to, from, size); + else { + /* This function should be called after iotlb + * prefetch, which means we're sure that vq + * could be access through iotlb. So -EAGAIN should + * not happen in this case. + */ + /* TODO: more fast path */ + struct iov_iter f; + ret = translate_desc(vq, (u64)(uintptr_t)from, size, vq->iotlb_iov, + ARRAY_SIZE(vq->iotlb_iov), + VHOST_ACCESS_RO); + if (ret < 0) { + vq_err(vq, "IOTLB translation failure: uaddr " + "%p size 0x%llx\n", from, + (unsigned long long) size); + goto out; + } + iov_iter_init(&f, READ, vq->iotlb_iov, ret, size); + ret = copy_from_iter(to, size, &f); + if (ret == size) + ret = 0; + } + +out: + return ret; +} + +static void __user *__vhost_get_user(struct vhost_virtqueue *vq, + void *addr, unsigned size) +{ + int ret; + + /* This function should be called after iotlb + * prefetch, which means we're sure that vq + * could be access through iotlb. So -EAGAIN should + * not happen in this case. + */ + /* TODO: more fast path */ + ret = translate_desc(vq, (u64)(uintptr_t)addr, size, vq->iotlb_iov, + ARRAY_SIZE(vq->iotlb_iov), + VHOST_ACCESS_RO); + if (ret < 0) { + vq_err(vq, "IOTLB translation failure: uaddr " + "%p size 0x%llx\n", addr, + (unsigned long long) size); + return NULL; + } + + if (ret != 1 || vq->iotlb_iov[0].iov_len != size) { + vq_err(vq, "Non atomic userspace memory access: uaddr " + "%p size 0x%llx\n", addr, + (unsigned long long) size); + return NULL; + } + + return vq->iotlb_iov[0].iov_base; +} + +#define vhost_put_user(vq, x, ptr) \ +({ \ + int ret = -EFAULT; \ + if (!vq->iotlb) { \ + ret = __put_user(x, ptr); \ + } else { \ + __typeof__(ptr) to = \ + (__typeof__(ptr)) __vhost_get_user(vq, ptr, sizeof(*ptr)); \ + if (to != NULL) \ + ret = __put_user(x, to); \ + else \ + ret = -EFAULT; \ + } \ + ret; \ +}) + +#define vhost_get_user(vq, x, ptr) \ +({ \ + int ret; \ + if (!vq->iotlb) { \ + ret = __get_user(x, ptr); \ + } else { \ + __typeof__(ptr) from = \ + (__typeof__(ptr)) __vhost_get_user(vq, ptr, sizeof(*ptr)); \ + if (from != NULL) \ + ret = __get_user(x, from); \ + else \ + ret = -EFAULT; \ + } \ + ret; \ +}) + +static void vhost_dev_lock_vqs(struct vhost_dev *d) +{ + int i = 0; + for (i = 0; i < d->nvqs; ++i) + mutex_lock(&d->vqs[i]->mutex); +} + +static void vhost_dev_unlock_vqs(struct vhost_dev *d) +{ + int i = 0; + for (i = 0; i < d->nvqs; ++i) + mutex_unlock(&d->vqs[i]->mutex); +} + +static int vhost_new_umem_range(struct vhost_umem *umem, + u64 start, u64 size, u64 end, + u64 userspace_addr, int perm) +{ + struct vhost_umem_node *tmp, *node = kmalloc(sizeof(*node), GFP_ATOMIC); + + if (!node) + return -ENOMEM; + + if (umem->numem == max_iotlb_entries) { + tmp = list_first_entry(&umem->umem_list, typeof(*tmp), link); + vhost_umem_free(umem, tmp); + } + + node->start = start; + node->size = size; + node->last = end; + node->userspace_addr = userspace_addr; + node->perm = perm; + INIT_LIST_HEAD(&node->link); + list_add_tail(&node->link, &umem->umem_list); + vhost_umem_interval_tree_insert(node, &umem->umem_tree); + umem->numem++; + + return 0; +} + +static void vhost_del_umem_range(struct vhost_umem *umem, + u64 start, u64 end) +{ + struct vhost_umem_node *node; + + while ((node = vhost_umem_interval_tree_iter_first(&umem->umem_tree, + start, end))) + vhost_umem_free(umem, node); +} + +static void vhost_iotlb_notify_vq(struct vhost_dev *d, + struct vhost_iotlb_msg *msg) +{ + struct vhost_msg_node *node, *n; + + spin_lock(&d->iotlb_lock); + + list_for_each_entry_safe(node, n, &d->pending_list, node) { + struct vhost_iotlb_msg *vq_msg = &node->msg.iotlb; + if (msg->iova <= vq_msg->iova && + msg->iova + msg->size - 1 > vq_msg->iova && + vq_msg->type == VHOST_IOTLB_MISS) { + vhost_poll_queue(&node->vq->poll); + list_del(&node->node); + kfree(node); + } + } + + spin_unlock(&d->iotlb_lock); +} + +static int umem_access_ok(u64 uaddr, u64 size, int access) +{ + unsigned long a = uaddr; + + /* Make sure 64 bit math will not overflow. */ + if (vhost_overflow(uaddr, size)) + return -EFAULT; + + if ((access & VHOST_ACCESS_RO) && + !access_ok(VERIFY_READ, (void __user *)a, size)) + return -EFAULT; + if ((access & VHOST_ACCESS_WO) && + !access_ok(VERIFY_WRITE, (void __user *)a, size)) + return -EFAULT; + return 0; +} + +int vhost_process_iotlb_msg(struct vhost_dev *dev, + struct vhost_iotlb_msg *msg) +{ + int ret = 0; + + vhost_dev_lock_vqs(dev); + switch (msg->type) { + case VHOST_IOTLB_UPDATE: + if (!dev->iotlb) { + ret = -EFAULT; + break; + } + if (umem_access_ok(msg->uaddr, msg->size, msg->perm)) { + ret = -EFAULT; + break; + } + if (vhost_new_umem_range(dev->iotlb, msg->iova, msg->size, + msg->iova + msg->size - 1, + msg->uaddr, msg->perm)) { + ret = -ENOMEM; + break; + } + vhost_iotlb_notify_vq(dev, msg); + break; + case VHOST_IOTLB_INVALIDATE: + vhost_del_umem_range(dev->iotlb, msg->iova, + msg->iova + msg->size - 1); + break; + default: + ret = -EINVAL; + break; + } + + vhost_dev_unlock_vqs(dev); + return ret; +} +ssize_t vhost_chr_write_iter(struct vhost_dev *dev, + struct iov_iter *from) +{ + struct vhost_msg_node node; + unsigned size = sizeof(struct vhost_msg); + size_t ret; + int err; + + if (iov_iter_count(from) < size) + return 0; + ret = copy_from_iter(&node.msg, size, from); + if (ret != size) + goto done; + + switch (node.msg.type) { + case VHOST_IOTLB_MSG: + err = vhost_process_iotlb_msg(dev, &node.msg.iotlb); + if (err) + ret = err; + break; + default: + ret = -EINVAL; + break; + } + +done: + return ret; +} +EXPORT_SYMBOL(vhost_chr_write_iter); + +unsigned int vhost_chr_poll(struct file *file, struct vhost_dev *dev, + poll_table *wait) +{ + unsigned int mask = 0; + + poll_wait(file, &dev->wait, wait); + + if (!list_empty(&dev->read_list)) + mask |= POLLIN | POLLRDNORM; + + return mask; +} +EXPORT_SYMBOL(vhost_chr_poll); + +ssize_t vhost_chr_read_iter(struct vhost_dev *dev, struct iov_iter *to, + int noblock) +{ + DEFINE_WAIT(wait); + struct vhost_msg_node *node; + ssize_t ret = 0; + unsigned size = sizeof(struct vhost_msg); + + if (iov_iter_count(to) < size) + return 0; + + while (1) { + if (!noblock) + prepare_to_wait(&dev->wait, &wait, + TASK_INTERRUPTIBLE); + + node = vhost_dequeue_msg(dev, &dev->read_list); + if (node) + break; + if (noblock) { + ret = -EAGAIN; + break; + } + if (signal_pending(current)) { + ret = -ERESTARTSYS; + break; + } + if (!dev->iotlb) { + ret = -EBADFD; + break; + } + + schedule(); + } + + if (!noblock) + finish_wait(&dev->wait, &wait); + + if (node) { + ret = copy_to_iter(&node->msg, size, to); + + if (ret != size || node->msg.type != VHOST_IOTLB_MISS) { + kfree(node); + return ret; + } + + vhost_enqueue_msg(dev, &dev->pending_list, node); + } + + return ret; +} +EXPORT_SYMBOL_GPL(vhost_chr_read_iter); + +static int vhost_iotlb_miss(struct vhost_virtqueue *vq, u64 iova, int access) +{ + struct vhost_dev *dev = vq->dev; + struct vhost_msg_node *node; + struct vhost_iotlb_msg *msg; + + node = vhost_new_msg(vq, VHOST_IOTLB_MISS); + if (!node) + return -ENOMEM; + + msg = &node->msg.iotlb; + msg->type = VHOST_IOTLB_MISS; + msg->iova = iova; + msg->perm = access; + + vhost_enqueue_msg(dev, &dev->read_list, node); + + return 0; +} + static int vq_access_ok(struct vhost_virtqueue *vq, unsigned int num, struct vring_desc __user *desc, struct vring_avail __user *avail, struct vring_used __user *used) + { size_t s = vhost_has_feature(vq, VIRTIO_RING_F_EVENT_IDX) ? 2 : 0; + return access_ok(VERIFY_READ, desc, num * sizeof *desc) && access_ok(VERIFY_READ, avail, sizeof *avail + num * sizeof *avail->ring + s) && @@ -664,11 +1102,59 @@ static int vq_access_ok(struct vhost_virtqueue *vq, unsigned int num, sizeof *used + num * sizeof *used->ring + s); } +static int iotlb_access_ok(struct vhost_virtqueue *vq, + int access, u64 addr, u64 len) +{ + const struct vhost_umem_node *node; + struct vhost_umem *umem = vq->iotlb; + u64 s = 0, size; + + while (len > s) { + node = vhost_umem_interval_tree_iter_first(&umem->umem_tree, + addr, + addr + len - 1); + if (node == NULL || node->start > addr) { + vhost_iotlb_miss(vq, addr, access); + return false; + } else if (!(node->perm & access)) { + /* Report the possible access violation by + * request another translation from userspace. + */ + return false; + } + + size = node->size - addr + node->start; + s += size; + addr += size; + } + + return true; +} + +int vq_iotlb_prefetch(struct vhost_virtqueue *vq) +{ + size_t s = vhost_has_feature(vq, VIRTIO_RING_F_EVENT_IDX) ? 2 : 0; + unsigned int num = vq->num; + + if (!vq->iotlb) + return 1; + + return iotlb_access_ok(vq, VHOST_ACCESS_RO, (u64)(uintptr_t)vq->desc, + num * sizeof *vq->desc) && + iotlb_access_ok(vq, VHOST_ACCESS_RO, (u64)(uintptr_t)vq->avail, + sizeof *vq->avail + + num * sizeof *vq->avail->ring + s) && + iotlb_access_ok(vq, VHOST_ACCESS_WO, (u64)(uintptr_t)vq->used, + sizeof *vq->used + + num * sizeof *vq->used->ring + s); +} +EXPORT_SYMBOL_GPL(vq_iotlb_prefetch); + /* Can we log writes? */ /* Caller should have device mutex but not vq mutex */ int vhost_log_access_ok(struct vhost_dev *dev) { - return memory_access_ok(dev, dev->memory, 1); + return memory_access_ok(dev, dev->umem, 1); } EXPORT_SYMBOL_GPL(vhost_log_access_ok); @@ -679,7 +1165,7 @@ static int vq_log_access_ok(struct vhost_virtqueue *vq, { size_t s = vhost_has_feature(vq, VIRTIO_RING_F_EVENT_IDX) ? 2 : 0; - return vq_memory_access_ok(log_base, vq->memory, + return vq_memory_access_ok(log_base, vq->umem, vhost_has_feature(vq, VHOST_F_LOG_ALL)) && (!vq->log_used || log_access_ok(log_base, vq->log_addr, sizeof *vq->used + @@ -690,33 +1176,36 @@ static int vq_log_access_ok(struct vhost_virtqueue *vq, /* Caller should have vq mutex and device mutex */ int vhost_vq_access_ok(struct vhost_virtqueue *vq) { + if (vq->iotlb) { + /* When device IOTLB was used, the access validation + * will be validated during prefetching. + */ + return 1; + } return vq_access_ok(vq, vq->num, vq->desc, vq->avail, vq->used) && vq_log_access_ok(vq, vq->log_base); } EXPORT_SYMBOL_GPL(vhost_vq_access_ok); -static int vhost_memory_reg_sort_cmp(const void *p1, const void *p2) +static struct vhost_umem *vhost_umem_alloc(void) { - const struct vhost_memory_region *r1 = p1, *r2 = p2; - if (r1->guest_phys_addr < r2->guest_phys_addr) - return 1; - if (r1->guest_phys_addr > r2->guest_phys_addr) - return -1; - return 0; -} + struct vhost_umem *umem = vhost_kvzalloc(sizeof(*umem)); -static void *vhost_kvzalloc(unsigned long size) -{ - void *n = kzalloc(size, GFP_KERNEL | __GFP_NOWARN | __GFP_REPEAT); + if (!umem) + return NULL; - if (!n) - n = vzalloc(size); - return n; + umem->umem_tree = RB_ROOT; + umem->numem = 0; + INIT_LIST_HEAD(&umem->umem_list); + + return umem; } static long vhost_set_memory(struct vhost_dev *d, struct vhost_memory __user *m) { - struct vhost_memory mem, *newmem, *oldmem; + struct vhost_memory mem, *newmem; + struct vhost_memory_region *region; + struct vhost_umem *newumem, *oldumem; unsigned long size = offsetof(struct vhost_memory, regions); int i; @@ -736,24 +1225,47 @@ static long vhost_set_memory(struct vhost_dev *d, struct vhost_memory __user *m) kvfree(newmem); return -EFAULT; } - sort(newmem->regions, newmem->nregions, sizeof(*newmem->regions), - vhost_memory_reg_sort_cmp, NULL); - if (!memory_access_ok(d, newmem, 0)) { + newumem = vhost_umem_alloc(); + if (!newumem) { kvfree(newmem); - return -EFAULT; + return -ENOMEM; } - oldmem = d->memory; - d->memory = newmem; + + for (region = newmem->regions; + region < newmem->regions + mem.nregions; + region++) { + if (vhost_new_umem_range(newumem, + region->guest_phys_addr, + region->memory_size, + region->guest_phys_addr + + region->memory_size - 1, + region->userspace_addr, + VHOST_ACCESS_RW)) + goto err; + } + + if (!memory_access_ok(d, newumem, 0)) + goto err; + + oldumem = d->umem; + d->umem = newumem; /* All memory accesses are done under some VQ mutex. */ for (i = 0; i < d->nvqs; ++i) { mutex_lock(&d->vqs[i]->mutex); - d->vqs[i]->memory = newmem; + d->vqs[i]->umem = newumem; mutex_unlock(&d->vqs[i]->mutex); } - kvfree(oldmem); + + kvfree(newmem); + vhost_umem_clean(oldumem); return 0; + +err: + vhost_umem_clean(newumem); + kvfree(newmem); + return -EFAULT; } long vhost_vring_ioctl(struct vhost_dev *d, int ioctl, void __user *argp) @@ -974,6 +1486,30 @@ long vhost_vring_ioctl(struct vhost_dev *d, int ioctl, void __user *argp) } EXPORT_SYMBOL_GPL(vhost_vring_ioctl); +int vhost_init_device_iotlb(struct vhost_dev *d, bool enabled) +{ + struct vhost_umem *niotlb, *oiotlb; + int i; + + niotlb = vhost_umem_alloc(); + if (!niotlb) + return -ENOMEM; + + oiotlb = d->iotlb; + d->iotlb = niotlb; + + for (i = 0; i < d->nvqs; ++i) { + mutex_lock(&d->vqs[i]->mutex); + d->vqs[i]->iotlb = niotlb; + mutex_unlock(&d->vqs[i]->mutex); + } + + vhost_umem_clean(oiotlb); + + return 0; +} +EXPORT_SYMBOL_GPL(vhost_init_device_iotlb); + /* Caller must have device mutex */ long vhost_dev_ioctl(struct vhost_dev *d, unsigned int ioctl, void __user *argp) { @@ -1056,28 +1592,6 @@ done: } EXPORT_SYMBOL_GPL(vhost_dev_ioctl); -static const struct vhost_memory_region *find_region(struct vhost_memory *mem, - __u64 addr, __u32 len) -{ - const struct vhost_memory_region *reg; - int start = 0, end = mem->nregions; - - while (start < end) { - int slot = start + (end - start) / 2; - reg = mem->regions + slot; - if (addr >= reg->guest_phys_addr) - end = slot; - else - start = slot + 1; - } - - reg = mem->regions + start; - if (addr >= reg->guest_phys_addr && - reg->guest_phys_addr + reg->memory_size > addr) - return reg; - return NULL; -} - /* TODO: This is really inefficient. We need something like get_user() * (instruction directly accesses the data, with an exception table entry * returning -EFAULT). See Documentation/x86/exception-tables.txt. @@ -1156,7 +1670,8 @@ EXPORT_SYMBOL_GPL(vhost_log_write); static int vhost_update_used_flags(struct vhost_virtqueue *vq) { void __user *used; - if (__put_user(cpu_to_vhost16(vq, vq->used_flags), &vq->used->flags) < 0) + if (vhost_put_user(vq, cpu_to_vhost16(vq, vq->used_flags), + &vq->used->flags) < 0) return -EFAULT; if (unlikely(vq->log_used)) { /* Make sure the flag is seen before log. */ @@ -1174,7 +1689,8 @@ static int vhost_update_used_flags(struct vhost_virtqueue *vq) static int vhost_update_avail_event(struct vhost_virtqueue *vq, u16 avail_event) { - if (__put_user(cpu_to_vhost16(vq, vq->avail_idx), vhost_avail_event(vq))) + if (vhost_put_user(vq, cpu_to_vhost16(vq, vq->avail_idx), + vhost_avail_event(vq))) return -EFAULT; if (unlikely(vq->log_used)) { void __user *used; @@ -1208,15 +1724,20 @@ int vhost_vq_init_access(struct vhost_virtqueue *vq) if (r) goto err; vq->signalled_used_valid = false; - if (!access_ok(VERIFY_READ, &vq->used->idx, sizeof vq->used->idx)) { + if (!vq->iotlb && + !access_ok(VERIFY_READ, &vq->used->idx, sizeof vq->used->idx)) { r = -EFAULT; goto err; } - r = __get_user(last_used_idx, &vq->used->idx); - if (r) + r = vhost_get_user(vq, last_used_idx, &vq->used->idx); + if (r) { + vq_err(vq, "Can't access used idx at %p\n", + &vq->used->idx); goto err; + } vq->last_used_idx = vhost16_to_cpu(vq, last_used_idx); return 0; + err: vq->is_le = is_le; return r; @@ -1224,36 +1745,48 @@ err: EXPORT_SYMBOL_GPL(vhost_vq_init_access); static int translate_desc(struct vhost_virtqueue *vq, u64 addr, u32 len, - struct iovec iov[], int iov_size) + struct iovec iov[], int iov_size, int access) { - const struct vhost_memory_region *reg; - struct vhost_memory *mem; + const struct vhost_umem_node *node; + struct vhost_dev *dev = vq->dev; + struct vhost_umem *umem = dev->iotlb ? dev->iotlb : dev->umem; struct iovec *_iov; u64 s = 0; int ret = 0; - mem = vq->memory; while ((u64)len > s) { u64 size; if (unlikely(ret >= iov_size)) { ret = -ENOBUFS; break; } - reg = find_region(mem, addr, len); - if (unlikely(!reg)) { - ret = -EFAULT; + + node = vhost_umem_interval_tree_iter_first(&umem->umem_tree, + addr, addr + len - 1); + if (node == NULL || node->start > addr) { + if (umem != dev->iotlb) { + ret = -EFAULT; + break; + } + ret = -EAGAIN; + break; + } else if (!(node->perm & access)) { + ret = -EPERM; break; } + _iov = iov + ret; - size = reg->memory_size - addr + reg->guest_phys_addr; + size = node->size - addr + node->start; _iov->iov_len = min((u64)len - s, size); _iov->iov_base = (void __user *)(unsigned long) - (reg->userspace_addr + addr - reg->guest_phys_addr); + (node->userspace_addr + addr - node->start); s += size; addr += size; ++ret; } + if (ret == -EAGAIN) + vhost_iotlb_miss(vq, addr, access); return ret; } @@ -1288,7 +1821,7 @@ static int get_indirect(struct vhost_virtqueue *vq, unsigned int i = 0, count, found = 0; u32 len = vhost32_to_cpu(vq, indirect->len); struct iov_iter from; - int ret; + int ret, access; /* Sanity check */ if (unlikely(len % sizeof desc)) { @@ -1300,9 +1833,10 @@ static int get_indirect(struct vhost_virtqueue *vq, } ret = translate_desc(vq, vhost64_to_cpu(vq, indirect->addr), len, vq->indirect, - UIO_MAXIOV); + UIO_MAXIOV, VHOST_ACCESS_RO); if (unlikely(ret < 0)) { - vq_err(vq, "Translation failure %d in indirect.\n", ret); + if (ret != -EAGAIN) + vq_err(vq, "Translation failure %d in indirect.\n", ret); return ret; } iov_iter_init(&from, READ, vq->indirect, ret, len); @@ -1340,16 +1874,22 @@ static int get_indirect(struct vhost_virtqueue *vq, return -EINVAL; } + if (desc.flags & cpu_to_vhost16(vq, VRING_DESC_F_WRITE)) + access = VHOST_ACCESS_WO; + else + access = VHOST_ACCESS_RO; + ret = translate_desc(vq, vhost64_to_cpu(vq, desc.addr), vhost32_to_cpu(vq, desc.len), iov + iov_count, - iov_size - iov_count); + iov_size - iov_count, access); if (unlikely(ret < 0)) { - vq_err(vq, "Translation failure %d indirect idx %d\n", - ret, i); + if (ret != -EAGAIN) + vq_err(vq, "Translation failure %d indirect idx %d\n", + ret, i); return ret; } /* If this is an input descriptor, increment that count. */ - if (desc.flags & cpu_to_vhost16(vq, VRING_DESC_F_WRITE)) { + if (access == VHOST_ACCESS_WO) { *in_num += ret; if (unlikely(log)) { log[*log_num].addr = vhost64_to_cpu(vq, desc.addr); @@ -1388,11 +1928,11 @@ int vhost_get_vq_desc(struct vhost_virtqueue *vq, u16 last_avail_idx; __virtio16 avail_idx; __virtio16 ring_head; - int ret; + int ret, access; /* Check it isn't doing very strange things with descriptor numbers. */ last_avail_idx = vq->last_avail_idx; - if (unlikely(__get_user(avail_idx, &vq->avail->idx))) { + if (unlikely(vhost_get_user(vq, avail_idx, &vq->avail->idx))) { vq_err(vq, "Failed to access avail idx at %p\n", &vq->avail->idx); return -EFAULT; @@ -1414,8 +1954,8 @@ int vhost_get_vq_desc(struct vhost_virtqueue *vq, /* Grab the next descriptor number they're advertising, and increment * the index we've seen. */ - if (unlikely(__get_user(ring_head, - &vq->avail->ring[last_avail_idx & (vq->num - 1)]))) { + if (unlikely(vhost_get_user(vq, ring_head, + &vq->avail->ring[last_avail_idx & (vq->num - 1)]))) { vq_err(vq, "Failed to read head: idx %d address %p\n", last_avail_idx, &vq->avail->ring[last_avail_idx % vq->num]); @@ -1450,7 +1990,8 @@ int vhost_get_vq_desc(struct vhost_virtqueue *vq, i, vq->num, head); return -EINVAL; } - ret = __copy_from_user(&desc, vq->desc + i, sizeof desc); + ret = vhost_copy_from_user(vq, &desc, vq->desc + i, + sizeof desc); if (unlikely(ret)) { vq_err(vq, "Failed to get descriptor: idx %d addr %p\n", i, vq->desc + i); @@ -1461,22 +2002,28 @@ int vhost_get_vq_desc(struct vhost_virtqueue *vq, out_num, in_num, log, log_num, &desc); if (unlikely(ret < 0)) { - vq_err(vq, "Failure detected " - "in indirect descriptor at idx %d\n", i); + if (ret != -EAGAIN) + vq_err(vq, "Failure detected " + "in indirect descriptor at idx %d\n", i); return ret; } continue; } + if (desc.flags & cpu_to_vhost16(vq, VRING_DESC_F_WRITE)) + access = VHOST_ACCESS_WO; + else + access = VHOST_ACCESS_RO; ret = translate_desc(vq, vhost64_to_cpu(vq, desc.addr), vhost32_to_cpu(vq, desc.len), iov + iov_count, - iov_size - iov_count); + iov_size - iov_count, access); if (unlikely(ret < 0)) { - vq_err(vq, "Translation failure %d descriptor idx %d\n", - ret, i); + if (ret != -EAGAIN) + vq_err(vq, "Translation failure %d descriptor idx %d\n", + ret, i); return ret; } - if (desc.flags & cpu_to_vhost16(vq, VRING_DESC_F_WRITE)) { + if (access == VHOST_ACCESS_WO) { /* If this is an input descriptor, * increment that count. */ *in_num += ret; @@ -1538,15 +2085,15 @@ static int __vhost_add_used_n(struct vhost_virtqueue *vq, start = vq->last_used_idx & (vq->num - 1); used = vq->used->ring + start; if (count == 1) { - if (__put_user(heads[0].id, &used->id)) { + if (vhost_put_user(vq, heads[0].id, &used->id)) { vq_err(vq, "Failed to write used id"); return -EFAULT; } - if (__put_user(heads[0].len, &used->len)) { + if (vhost_put_user(vq, heads[0].len, &used->len)) { vq_err(vq, "Failed to write used len"); return -EFAULT; } - } else if (__copy_to_user(used, heads, count * sizeof *used)) { + } else if (vhost_copy_to_user(vq, used, heads, count * sizeof *used)) { vq_err(vq, "Failed to write used"); return -EFAULT; } @@ -1590,7 +2137,8 @@ int vhost_add_used_n(struct vhost_virtqueue *vq, struct vring_used_elem *heads, /* Make sure buffer is written before we update index. */ smp_wmb(); - if (__put_user(cpu_to_vhost16(vq, vq->last_used_idx), &vq->used->idx)) { + if (vhost_put_user(vq, cpu_to_vhost16(vq, vq->last_used_idx), + &vq->used->idx)) { vq_err(vq, "Failed to increment used idx"); return -EFAULT; } @@ -1622,7 +2170,7 @@ static bool vhost_notify(struct vhost_dev *dev, struct vhost_virtqueue *vq) if (!vhost_has_feature(vq, VIRTIO_RING_F_EVENT_IDX)) { __virtio16 flags; - if (__get_user(flags, &vq->avail->flags)) { + if (vhost_get_user(vq, flags, &vq->avail->flags)) { vq_err(vq, "Failed to get flags"); return true; } @@ -1636,7 +2184,7 @@ static bool vhost_notify(struct vhost_dev *dev, struct vhost_virtqueue *vq) if (unlikely(!v)) return true; - if (__get_user(event, vhost_used_event(vq))) { + if (vhost_get_user(vq, event, vhost_used_event(vq))) { vq_err(vq, "Failed to get used event idx"); return true; } @@ -1678,7 +2226,7 @@ bool vhost_vq_avail_empty(struct vhost_dev *dev, struct vhost_virtqueue *vq) __virtio16 avail_idx; int r; - r = __get_user(avail_idx, &vq->avail->idx); + r = vhost_get_user(vq, avail_idx, &vq->avail->idx); if (r) return false; @@ -1713,7 +2261,7 @@ bool vhost_enable_notify(struct vhost_dev *dev, struct vhost_virtqueue *vq) /* They could have slipped one in as we were doing that: make * sure it's written, then check again. */ smp_mb(); - r = __get_user(avail_idx, &vq->avail->idx); + r = vhost_get_user(vq, avail_idx, &vq->avail->idx); if (r) { vq_err(vq, "Failed to check avail idx at %p: %d\n", &vq->avail->idx, r); @@ -1741,6 +2289,47 @@ void vhost_disable_notify(struct vhost_dev *dev, struct vhost_virtqueue *vq) } EXPORT_SYMBOL_GPL(vhost_disable_notify); +/* Create a new message. */ +struct vhost_msg_node *vhost_new_msg(struct vhost_virtqueue *vq, int type) +{ + struct vhost_msg_node *node = kmalloc(sizeof *node, GFP_KERNEL); + if (!node) + return NULL; + node->vq = vq; + node->msg.type = type; + return node; +} +EXPORT_SYMBOL_GPL(vhost_new_msg); + +void vhost_enqueue_msg(struct vhost_dev *dev, struct list_head *head, + struct vhost_msg_node *node) +{ + spin_lock(&dev->iotlb_lock); + list_add_tail(&node->node, head); + spin_unlock(&dev->iotlb_lock); + + wake_up_interruptible_poll(&dev->wait, POLLIN | POLLRDNORM); +} +EXPORT_SYMBOL_GPL(vhost_enqueue_msg); + +struct vhost_msg_node *vhost_dequeue_msg(struct vhost_dev *dev, + struct list_head *head) +{ + struct vhost_msg_node *node = NULL; + + spin_lock(&dev->iotlb_lock); + if (!list_empty(head)) { + node = list_first_entry(head, struct vhost_msg_node, + node); + list_del(&node->node); + } + spin_unlock(&dev->iotlb_lock); + + return node; +} +EXPORT_SYMBOL_GPL(vhost_dequeue_msg); + + static int __init vhost_init(void) { return 0; diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h index d36d8beb3351..78f3c5fc02e4 100644 --- a/drivers/vhost/vhost.h +++ b/drivers/vhost/vhost.h @@ -15,13 +15,15 @@ struct vhost_work; typedef void (*vhost_work_fn_t)(struct vhost_work *work); +#define VHOST_WORK_QUEUED 1 struct vhost_work { - struct list_head node; + struct llist_node node; vhost_work_fn_t fn; wait_queue_head_t done; int flushing; unsigned queue_seq; unsigned done_seq; + unsigned long flags; }; /* Poll a file (eventfd or socket) */ @@ -53,6 +55,27 @@ struct vhost_log { u64 len; }; +#define START(node) ((node)->start) +#define LAST(node) ((node)->last) + +struct vhost_umem_node { + struct rb_node rb; + struct list_head link; + __u64 start; + __u64 last; + __u64 size; + __u64 userspace_addr; + __u32 perm; + __u32 flags_padding; + __u64 __subtree_last; +}; + +struct vhost_umem { + struct rb_root umem_tree; + struct list_head umem_list; + int numem; +}; + /* The virtqueue structure describes a queue attached to a device. */ struct vhost_virtqueue { struct vhost_dev *dev; @@ -98,10 +121,12 @@ struct vhost_virtqueue { u64 log_addr; struct iovec iov[UIO_MAXIOV]; + struct iovec iotlb_iov[64]; struct iovec *indirect; struct vring_used_elem *heads; /* Protected by virtqueue mutex. */ - struct vhost_memory *memory; + struct vhost_umem *umem; + struct vhost_umem *iotlb; void *private_data; u64 acked_features; /* Log write descriptors */ @@ -118,25 +143,35 @@ struct vhost_virtqueue { u32 busyloop_timeout; }; +struct vhost_msg_node { + struct vhost_msg msg; + struct vhost_virtqueue *vq; + struct list_head node; +}; + struct vhost_dev { - struct vhost_memory *memory; struct mm_struct *mm; struct mutex mutex; struct vhost_virtqueue **vqs; int nvqs; struct file *log_file; struct eventfd_ctx *log_ctx; - spinlock_t work_lock; - struct list_head work_list; + struct llist_head work_list; struct task_struct *worker; + struct vhost_umem *umem; + struct vhost_umem *iotlb; + spinlock_t iotlb_lock; + struct list_head read_list; + struct list_head pending_list; + wait_queue_head_t wait; }; void vhost_dev_init(struct vhost_dev *, struct vhost_virtqueue **vqs, int nvqs); long vhost_dev_set_owner(struct vhost_dev *dev); bool vhost_dev_has_owner(struct vhost_dev *dev); long vhost_dev_check_owner(struct vhost_dev *); -struct vhost_memory *vhost_dev_reset_owner_prepare(void); -void vhost_dev_reset_owner(struct vhost_dev *, struct vhost_memory *); +struct vhost_umem *vhost_dev_reset_owner_prepare(void); +void vhost_dev_reset_owner(struct vhost_dev *, struct vhost_umem *); void vhost_dev_cleanup(struct vhost_dev *, bool locked); void vhost_dev_stop(struct vhost_dev *); long vhost_dev_ioctl(struct vhost_dev *, unsigned int ioctl, void __user *argp); @@ -165,6 +200,21 @@ bool vhost_enable_notify(struct vhost_dev *, struct vhost_virtqueue *); int vhost_log_write(struct vhost_virtqueue *vq, struct vhost_log *log, unsigned int log_num, u64 len); +int vq_iotlb_prefetch(struct vhost_virtqueue *vq); + +struct vhost_msg_node *vhost_new_msg(struct vhost_virtqueue *vq, int type); +void vhost_enqueue_msg(struct vhost_dev *dev, + struct list_head *head, + struct vhost_msg_node *node); +struct vhost_msg_node *vhost_dequeue_msg(struct vhost_dev *dev, + struct list_head *head); +unsigned int vhost_chr_poll(struct file *file, struct vhost_dev *dev, + poll_table *wait); +ssize_t vhost_chr_read_iter(struct vhost_dev *dev, struct iov_iter *to, + int noblock); +ssize_t vhost_chr_write_iter(struct vhost_dev *dev, + struct iov_iter *from); +int vhost_init_device_iotlb(struct vhost_dev *d, bool enabled); #define vq_err(vq, fmt, ...) do { \ pr_debug(pr_fmt(fmt), ##__VA_ARGS__); \ diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c new file mode 100644 index 000000000000..e3b30ea9ece5 --- /dev/null +++ b/drivers/vhost/vsock.c @@ -0,0 +1,723 @@ +/* + * vhost transport for vsock + * + * Copyright (C) 2013-2015 Red Hat, Inc. + * Author: Asias He <asias@redhat.com> + * Stefan Hajnoczi <stefanha@redhat.com> + * + * This work is licensed under the terms of the GNU GPL, version 2. + */ +#include <linux/miscdevice.h> +#include <linux/atomic.h> +#include <linux/module.h> +#include <linux/mutex.h> +#include <linux/vmalloc.h> +#include <net/sock.h> +#include <linux/virtio_vsock.h> +#include <linux/vhost.h> + +#include <net/af_vsock.h> +#include "vhost.h" + +#define VHOST_VSOCK_DEFAULT_HOST_CID 2 + +enum { + VHOST_VSOCK_FEATURES = VHOST_FEATURES, +}; + +/* Used to track all the vhost_vsock instances on the system. */ +static DEFINE_SPINLOCK(vhost_vsock_lock); +static LIST_HEAD(vhost_vsock_list); + +struct vhost_vsock { + struct vhost_dev dev; + struct vhost_virtqueue vqs[2]; + + /* Link to global vhost_vsock_list, protected by vhost_vsock_lock */ + struct list_head list; + + struct vhost_work send_pkt_work; + spinlock_t send_pkt_list_lock; + struct list_head send_pkt_list; /* host->guest pending packets */ + + atomic_t queued_replies; + + u32 guest_cid; +}; + +static u32 vhost_transport_get_local_cid(void) +{ + return VHOST_VSOCK_DEFAULT_HOST_CID; +} + +static struct vhost_vsock *vhost_vsock_get(u32 guest_cid) +{ + struct vhost_vsock *vsock; + + spin_lock_bh(&vhost_vsock_lock); + list_for_each_entry(vsock, &vhost_vsock_list, list) { + u32 other_cid = vsock->guest_cid; + + /* Skip instances that have no CID yet */ + if (other_cid == 0) + continue; + + if (other_cid == guest_cid) { + spin_unlock_bh(&vhost_vsock_lock); + return vsock; + } + } + spin_unlock_bh(&vhost_vsock_lock); + + return NULL; +} + +static void +vhost_transport_do_send_pkt(struct vhost_vsock *vsock, + struct vhost_virtqueue *vq) +{ + struct vhost_virtqueue *tx_vq = &vsock->vqs[VSOCK_VQ_TX]; + bool added = false; + bool restart_tx = false; + + mutex_lock(&vq->mutex); + + if (!vq->private_data) + goto out; + + /* Avoid further vmexits, we're already processing the virtqueue */ + vhost_disable_notify(&vsock->dev, vq); + + for (;;) { + struct virtio_vsock_pkt *pkt; + struct iov_iter iov_iter; + unsigned out, in; + size_t nbytes; + size_t len; + int head; + + spin_lock_bh(&vsock->send_pkt_list_lock); + if (list_empty(&vsock->send_pkt_list)) { + spin_unlock_bh(&vsock->send_pkt_list_lock); + vhost_enable_notify(&vsock->dev, vq); + break; + } + + pkt = list_first_entry(&vsock->send_pkt_list, + struct virtio_vsock_pkt, list); + list_del_init(&pkt->list); + spin_unlock_bh(&vsock->send_pkt_list_lock); + + head = vhost_get_vq_desc(vq, vq->iov, ARRAY_SIZE(vq->iov), + &out, &in, NULL, NULL); + if (head < 0) { + spin_lock_bh(&vsock->send_pkt_list_lock); + list_add(&pkt->list, &vsock->send_pkt_list); + spin_unlock_bh(&vsock->send_pkt_list_lock); + break; + } + + if (head == vq->num) { + spin_lock_bh(&vsock->send_pkt_list_lock); + list_add(&pkt->list, &vsock->send_pkt_list); + spin_unlock_bh(&vsock->send_pkt_list_lock); + + /* We cannot finish yet if more buffers snuck in while + * re-enabling notify. + */ + if (unlikely(vhost_enable_notify(&vsock->dev, vq))) { + vhost_disable_notify(&vsock->dev, vq); + continue; + } + break; + } + + if (out) { + virtio_transport_free_pkt(pkt); + vq_err(vq, "Expected 0 output buffers, got %u\n", out); + break; + } + + len = iov_length(&vq->iov[out], in); + iov_iter_init(&iov_iter, READ, &vq->iov[out], in, len); + + nbytes = copy_to_iter(&pkt->hdr, sizeof(pkt->hdr), &iov_iter); + if (nbytes != sizeof(pkt->hdr)) { + virtio_transport_free_pkt(pkt); + vq_err(vq, "Faulted on copying pkt hdr\n"); + break; + } + + nbytes = copy_to_iter(pkt->buf, pkt->len, &iov_iter); + if (nbytes != pkt->len) { + virtio_transport_free_pkt(pkt); + vq_err(vq, "Faulted on copying pkt buf\n"); + break; + } + + vhost_add_used(vq, head, sizeof(pkt->hdr) + pkt->len); + added = true; + + if (pkt->reply) { + int val; + + val = atomic_dec_return(&vsock->queued_replies); + + /* Do we have resources to resume tx processing? */ + if (val + 1 == tx_vq->num) + restart_tx = true; + } + + virtio_transport_free_pkt(pkt); + } + if (added) + vhost_signal(&vsock->dev, vq); + +out: + mutex_unlock(&vq->mutex); + + if (restart_tx) + vhost_poll_queue(&tx_vq->poll); +} + +static void vhost_transport_send_pkt_work(struct vhost_work *work) +{ + struct vhost_virtqueue *vq; + struct vhost_vsock *vsock; + + vsock = container_of(work, struct vhost_vsock, send_pkt_work); + vq = &vsock->vqs[VSOCK_VQ_RX]; + + vhost_transport_do_send_pkt(vsock, vq); +} + +static int +vhost_transport_send_pkt(struct virtio_vsock_pkt *pkt) +{ + struct vhost_vsock *vsock; + struct vhost_virtqueue *vq; + int len = pkt->len; + + /* Find the vhost_vsock according to guest context id */ + vsock = vhost_vsock_get(le64_to_cpu(pkt->hdr.dst_cid)); + if (!vsock) { + virtio_transport_free_pkt(pkt); + return -ENODEV; + } + + vq = &vsock->vqs[VSOCK_VQ_RX]; + + if (pkt->reply) + atomic_inc(&vsock->queued_replies); + + spin_lock_bh(&vsock->send_pkt_list_lock); + list_add_tail(&pkt->list, &vsock->send_pkt_list); + spin_unlock_bh(&vsock->send_pkt_list_lock); + + vhost_work_queue(&vsock->dev, &vsock->send_pkt_work); + return len; +} + +static struct virtio_vsock_pkt * +vhost_vsock_alloc_pkt(struct vhost_virtqueue *vq, + unsigned int out, unsigned int in) +{ + struct virtio_vsock_pkt *pkt; + struct iov_iter iov_iter; + size_t nbytes; + size_t len; + + if (in != 0) { + vq_err(vq, "Expected 0 input buffers, got %u\n", in); + return NULL; + } + + pkt = kzalloc(sizeof(*pkt), GFP_KERNEL); + if (!pkt) + return NULL; + + len = iov_length(vq->iov, out); + iov_iter_init(&iov_iter, WRITE, vq->iov, out, len); + + nbytes = copy_from_iter(&pkt->hdr, sizeof(pkt->hdr), &iov_iter); + if (nbytes != sizeof(pkt->hdr)) { + vq_err(vq, "Expected %zu bytes for pkt->hdr, got %zu bytes\n", + sizeof(pkt->hdr), nbytes); + kfree(pkt); + return NULL; + } + + if (le16_to_cpu(pkt->hdr.type) == VIRTIO_VSOCK_TYPE_STREAM) + pkt->len = le32_to_cpu(pkt->hdr.len); + + /* No payload */ + if (!pkt->len) + return pkt; + + /* The pkt is too big */ + if (pkt->len > VIRTIO_VSOCK_MAX_PKT_BUF_SIZE) { + kfree(pkt); + return NULL; + } + + pkt->buf = kmalloc(pkt->len, GFP_KERNEL); + if (!pkt->buf) { + kfree(pkt); + return NULL; + } + + nbytes = copy_from_iter(pkt->buf, pkt->len, &iov_iter); + if (nbytes != pkt->len) { + vq_err(vq, "Expected %u byte payload, got %zu bytes\n", + pkt->len, nbytes); + virtio_transport_free_pkt(pkt); + return NULL; + } + + return pkt; +} + +/* Is there space left for replies to rx packets? */ +static bool vhost_vsock_more_replies(struct vhost_vsock *vsock) +{ + struct vhost_virtqueue *vq = &vsock->vqs[VSOCK_VQ_TX]; + int val; + + smp_rmb(); /* paired with atomic_inc() and atomic_dec_return() */ + val = atomic_read(&vsock->queued_replies); + + return val < vq->num; +} + +static void vhost_vsock_handle_tx_kick(struct vhost_work *work) +{ + struct vhost_virtqueue *vq = container_of(work, struct vhost_virtqueue, + poll.work); + struct vhost_vsock *vsock = container_of(vq->dev, struct vhost_vsock, + dev); + struct virtio_vsock_pkt *pkt; + int head; + unsigned int out, in; + bool added = false; + + mutex_lock(&vq->mutex); + + if (!vq->private_data) + goto out; + + vhost_disable_notify(&vsock->dev, vq); + for (;;) { + u32 len; + + if (!vhost_vsock_more_replies(vsock)) { + /* Stop tx until the device processes already + * pending replies. Leave tx virtqueue + * callbacks disabled. + */ + goto no_more_replies; + } + + head = vhost_get_vq_desc(vq, vq->iov, ARRAY_SIZE(vq->iov), + &out, &in, NULL, NULL); + if (head < 0) + break; + + if (head == vq->num) { + if (unlikely(vhost_enable_notify(&vsock->dev, vq))) { + vhost_disable_notify(&vsock->dev, vq); + continue; + } + break; + } + + pkt = vhost_vsock_alloc_pkt(vq, out, in); + if (!pkt) { + vq_err(vq, "Faulted on pkt\n"); + continue; + } + + len = pkt->len; + + /* Only accept correctly addressed packets */ + if (le64_to_cpu(pkt->hdr.src_cid) == vsock->guest_cid) + virtio_transport_recv_pkt(pkt); + else + virtio_transport_free_pkt(pkt); + + vhost_add_used(vq, head, sizeof(pkt->hdr) + len); + added = true; + } + +no_more_replies: + if (added) + vhost_signal(&vsock->dev, vq); + +out: + mutex_unlock(&vq->mutex); +} + +static void vhost_vsock_handle_rx_kick(struct vhost_work *work) +{ + struct vhost_virtqueue *vq = container_of(work, struct vhost_virtqueue, + poll.work); + struct vhost_vsock *vsock = container_of(vq->dev, struct vhost_vsock, + dev); + + vhost_transport_do_send_pkt(vsock, vq); +} + +static int vhost_vsock_start(struct vhost_vsock *vsock) +{ + size_t i; + int ret; + + mutex_lock(&vsock->dev.mutex); + + ret = vhost_dev_check_owner(&vsock->dev); + if (ret) + goto err; + + for (i = 0; i < ARRAY_SIZE(vsock->vqs); i++) { + struct vhost_virtqueue *vq = &vsock->vqs[i]; + + mutex_lock(&vq->mutex); + + if (!vhost_vq_access_ok(vq)) { + ret = -EFAULT; + mutex_unlock(&vq->mutex); + goto err_vq; + } + + if (!vq->private_data) { + vq->private_data = vsock; + vhost_vq_init_access(vq); + } + + mutex_unlock(&vq->mutex); + } + + mutex_unlock(&vsock->dev.mutex); + return 0; + +err_vq: + for (i = 0; i < ARRAY_SIZE(vsock->vqs); i++) { + struct vhost_virtqueue *vq = &vsock->vqs[i]; + + mutex_lock(&vq->mutex); + vq->private_data = NULL; + mutex_unlock(&vq->mutex); + } +err: + mutex_unlock(&vsock->dev.mutex); + return ret; +} + +static int vhost_vsock_stop(struct vhost_vsock *vsock) +{ + size_t i; + int ret; + + mutex_lock(&vsock->dev.mutex); + + ret = vhost_dev_check_owner(&vsock->dev); + if (ret) + goto err; + + for (i = 0; i < ARRAY_SIZE(vsock->vqs); i++) { + struct vhost_virtqueue *vq = &vsock->vqs[i]; + + mutex_lock(&vq->mutex); + vq->private_data = NULL; + mutex_unlock(&vq->mutex); + } + +err: + mutex_unlock(&vsock->dev.mutex); + return ret; +} + +static void vhost_vsock_free(struct vhost_vsock *vsock) +{ + kvfree(vsock); +} + +static int vhost_vsock_dev_open(struct inode *inode, struct file *file) +{ + struct vhost_virtqueue **vqs; + struct vhost_vsock *vsock; + int ret; + + /* This struct is large and allocation could fail, fall back to vmalloc + * if there is no other way. + */ + vsock = kzalloc(sizeof(*vsock), GFP_KERNEL | __GFP_NOWARN | __GFP_REPEAT); + if (!vsock) { + vsock = vmalloc(sizeof(*vsock)); + if (!vsock) + return -ENOMEM; + } + + vqs = kmalloc_array(ARRAY_SIZE(vsock->vqs), sizeof(*vqs), GFP_KERNEL); + if (!vqs) { + ret = -ENOMEM; + goto out; + } + + atomic_set(&vsock->queued_replies, 0); + + vqs[VSOCK_VQ_TX] = &vsock->vqs[VSOCK_VQ_TX]; + vqs[VSOCK_VQ_RX] = &vsock->vqs[VSOCK_VQ_RX]; + vsock->vqs[VSOCK_VQ_TX].handle_kick = vhost_vsock_handle_tx_kick; + vsock->vqs[VSOCK_VQ_RX].handle_kick = vhost_vsock_handle_rx_kick; + + vhost_dev_init(&vsock->dev, vqs, ARRAY_SIZE(vsock->vqs)); + + file->private_data = vsock; + spin_lock_init(&vsock->send_pkt_list_lock); + INIT_LIST_HEAD(&vsock->send_pkt_list); + vhost_work_init(&vsock->send_pkt_work, vhost_transport_send_pkt_work); + + spin_lock_bh(&vhost_vsock_lock); + list_add_tail(&vsock->list, &vhost_vsock_list); + spin_unlock_bh(&vhost_vsock_lock); + return 0; + +out: + vhost_vsock_free(vsock); + return ret; +} + +static void vhost_vsock_flush(struct vhost_vsock *vsock) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(vsock->vqs); i++) + if (vsock->vqs[i].handle_kick) + vhost_poll_flush(&vsock->vqs[i].poll); + vhost_work_flush(&vsock->dev, &vsock->send_pkt_work); +} + +static void vhost_vsock_reset_orphans(struct sock *sk) +{ + struct vsock_sock *vsk = vsock_sk(sk); + + /* vmci_transport.c doesn't take sk_lock here either. At least we're + * under vsock_table_lock so the sock cannot disappear while we're + * executing. + */ + + if (!vhost_vsock_get(vsk->local_addr.svm_cid)) { + sock_set_flag(sk, SOCK_DONE); + vsk->peer_shutdown = SHUTDOWN_MASK; + sk->sk_state = SS_UNCONNECTED; + sk->sk_err = ECONNRESET; + sk->sk_error_report(sk); + } +} + +static int vhost_vsock_dev_release(struct inode *inode, struct file *file) +{ + struct vhost_vsock *vsock = file->private_data; + + spin_lock_bh(&vhost_vsock_lock); + list_del(&vsock->list); + spin_unlock_bh(&vhost_vsock_lock); + + /* Iterating over all connections for all CIDs to find orphans is + * inefficient. Room for improvement here. */ + vsock_for_each_connected_socket(vhost_vsock_reset_orphans); + + vhost_vsock_stop(vsock); + vhost_vsock_flush(vsock); + vhost_dev_stop(&vsock->dev); + + spin_lock_bh(&vsock->send_pkt_list_lock); + while (!list_empty(&vsock->send_pkt_list)) { + struct virtio_vsock_pkt *pkt; + + pkt = list_first_entry(&vsock->send_pkt_list, + struct virtio_vsock_pkt, list); + list_del_init(&pkt->list); + virtio_transport_free_pkt(pkt); + } + spin_unlock_bh(&vsock->send_pkt_list_lock); + + vhost_dev_cleanup(&vsock->dev, false); + kfree(vsock->dev.vqs); + vhost_vsock_free(vsock); + return 0; +} + +static int vhost_vsock_set_cid(struct vhost_vsock *vsock, u64 guest_cid) +{ + struct vhost_vsock *other; + + /* Refuse reserved CIDs */ + if (guest_cid <= VMADDR_CID_HOST || + guest_cid == U32_MAX) + return -EINVAL; + + /* 64-bit CIDs are not yet supported */ + if (guest_cid > U32_MAX) + return -EINVAL; + + /* Refuse if CID is already in use */ + other = vhost_vsock_get(guest_cid); + if (other && other != vsock) + return -EADDRINUSE; + + spin_lock_bh(&vhost_vsock_lock); + vsock->guest_cid = guest_cid; + spin_unlock_bh(&vhost_vsock_lock); + + return 0; +} + +static int vhost_vsock_set_features(struct vhost_vsock *vsock, u64 features) +{ + struct vhost_virtqueue *vq; + int i; + + if (features & ~VHOST_VSOCK_FEATURES) + return -EOPNOTSUPP; + + mutex_lock(&vsock->dev.mutex); + if ((features & (1 << VHOST_F_LOG_ALL)) && + !vhost_log_access_ok(&vsock->dev)) { + mutex_unlock(&vsock->dev.mutex); + return -EFAULT; + } + + for (i = 0; i < ARRAY_SIZE(vsock->vqs); i++) { + vq = &vsock->vqs[i]; + mutex_lock(&vq->mutex); + vq->acked_features = features; + mutex_unlock(&vq->mutex); + } + mutex_unlock(&vsock->dev.mutex); + return 0; +} + +static long vhost_vsock_dev_ioctl(struct file *f, unsigned int ioctl, + unsigned long arg) +{ + struct vhost_vsock *vsock = f->private_data; + void __user *argp = (void __user *)arg; + u64 guest_cid; + u64 features; + int start; + int r; + + switch (ioctl) { + case VHOST_VSOCK_SET_GUEST_CID: + if (copy_from_user(&guest_cid, argp, sizeof(guest_cid))) + return -EFAULT; + return vhost_vsock_set_cid(vsock, guest_cid); + case VHOST_VSOCK_SET_RUNNING: + if (copy_from_user(&start, argp, sizeof(start))) + return -EFAULT; + if (start) + return vhost_vsock_start(vsock); + else + return vhost_vsock_stop(vsock); + case VHOST_GET_FEATURES: + features = VHOST_VSOCK_FEATURES; + if (copy_to_user(argp, &features, sizeof(features))) + return -EFAULT; + return 0; + case VHOST_SET_FEATURES: + if (copy_from_user(&features, argp, sizeof(features))) + return -EFAULT; + return vhost_vsock_set_features(vsock, features); + default: + mutex_lock(&vsock->dev.mutex); + r = vhost_dev_ioctl(&vsock->dev, ioctl, argp); + if (r == -ENOIOCTLCMD) + r = vhost_vring_ioctl(&vsock->dev, ioctl, argp); + else + vhost_vsock_flush(vsock); + mutex_unlock(&vsock->dev.mutex); + return r; + } +} + +static const struct file_operations vhost_vsock_fops = { + .owner = THIS_MODULE, + .open = vhost_vsock_dev_open, + .release = vhost_vsock_dev_release, + .llseek = noop_llseek, + .unlocked_ioctl = vhost_vsock_dev_ioctl, +}; + +static struct miscdevice vhost_vsock_misc = { + .minor = MISC_DYNAMIC_MINOR, + .name = "vhost-vsock", + .fops = &vhost_vsock_fops, +}; + +static struct virtio_transport vhost_transport = { + .transport = { + .get_local_cid = vhost_transport_get_local_cid, + + .init = virtio_transport_do_socket_init, + .destruct = virtio_transport_destruct, + .release = virtio_transport_release, + .connect = virtio_transport_connect, + .shutdown = virtio_transport_shutdown, + + .dgram_enqueue = virtio_transport_dgram_enqueue, + .dgram_dequeue = virtio_transport_dgram_dequeue, + .dgram_bind = virtio_transport_dgram_bind, + .dgram_allow = virtio_transport_dgram_allow, + + .stream_enqueue = virtio_transport_stream_enqueue, + .stream_dequeue = virtio_transport_stream_dequeue, + .stream_has_data = virtio_transport_stream_has_data, + .stream_has_space = virtio_transport_stream_has_space, + .stream_rcvhiwat = virtio_transport_stream_rcvhiwat, + .stream_is_active = virtio_transport_stream_is_active, + .stream_allow = virtio_transport_stream_allow, + + .notify_poll_in = virtio_transport_notify_poll_in, + .notify_poll_out = virtio_transport_notify_poll_out, + .notify_recv_init = virtio_transport_notify_recv_init, + .notify_recv_pre_block = virtio_transport_notify_recv_pre_block, + .notify_recv_pre_dequeue = virtio_transport_notify_recv_pre_dequeue, + .notify_recv_post_dequeue = virtio_transport_notify_recv_post_dequeue, + .notify_send_init = virtio_transport_notify_send_init, + .notify_send_pre_block = virtio_transport_notify_send_pre_block, + .notify_send_pre_enqueue = virtio_transport_notify_send_pre_enqueue, + .notify_send_post_enqueue = virtio_transport_notify_send_post_enqueue, + + .set_buffer_size = virtio_transport_set_buffer_size, + .set_min_buffer_size = virtio_transport_set_min_buffer_size, + .set_max_buffer_size = virtio_transport_set_max_buffer_size, + .get_buffer_size = virtio_transport_get_buffer_size, + .get_min_buffer_size = virtio_transport_get_min_buffer_size, + .get_max_buffer_size = virtio_transport_get_max_buffer_size, + }, + + .send_pkt = vhost_transport_send_pkt, +}; + +static int __init vhost_vsock_init(void) +{ + int ret; + + ret = vsock_core_init(&vhost_transport.transport); + if (ret < 0) + return ret; + return misc_register(&vhost_vsock_misc); +}; + +static void __exit vhost_vsock_exit(void) +{ + misc_deregister(&vhost_vsock_misc); + vsock_core_exit(); +}; + +module_init(vhost_vsock_init); +module_exit(vhost_vsock_exit); +MODULE_LICENSE("GPL v2"); +MODULE_AUTHOR("Asias He"); +MODULE_DESCRIPTION("vhost transport for vsock "); diff --git a/drivers/video/fbdev/omap2/omapfb/omapfb-main.c b/drivers/video/fbdev/omap2/omapfb/omapfb-main.c index 2fb90cb6803f..1d7c012f09db 100644 --- a/drivers/video/fbdev/omap2/omapfb/omapfb-main.c +++ b/drivers/video/fbdev/omap2/omapfb/omapfb-main.c @@ -1332,7 +1332,7 @@ static void omapfb_free_fbmem(struct fb_info *fbi) } dma_free_attrs(fbdev->dev, rg->size, rg->token, rg->dma_handle, - &rg->attrs); + rg->attrs); rg->token = NULL; rg->vaddr = NULL; @@ -1370,7 +1370,7 @@ static int omapfb_alloc_fbmem(struct fb_info *fbi, unsigned long size, struct omapfb2_device *fbdev = ofbi->fbdev; struct omapfb2_mem_region *rg; void *token; - DEFINE_DMA_ATTRS(attrs); + unsigned long attrs; dma_addr_t dma_handle; int r; @@ -1386,15 +1386,15 @@ static int omapfb_alloc_fbmem(struct fb_info *fbi, unsigned long size, size = PAGE_ALIGN(size); - dma_set_attr(DMA_ATTR_WRITE_COMBINE, &attrs); + attrs = DMA_ATTR_WRITE_COMBINE; if (ofbi->rotation_type == OMAP_DSS_ROT_VRFB) - dma_set_attr(DMA_ATTR_NO_KERNEL_MAPPING, &attrs); + attrs |= DMA_ATTR_NO_KERNEL_MAPPING; DBG("allocating %lu bytes for fb %d\n", size, ofbi->id); token = dma_alloc_attrs(fbdev->dev, size, &dma_handle, - GFP_KERNEL, &attrs); + GFP_KERNEL, attrs); if (token == NULL) { dev_err(fbdev->dev, "failed to allocate framebuffer\n"); @@ -1408,7 +1408,7 @@ static int omapfb_alloc_fbmem(struct fb_info *fbi, unsigned long size, r = omap_vrfb_request_ctx(&rg->vrfb); if (r) { dma_free_attrs(fbdev->dev, size, token, dma_handle, - &attrs); + attrs); dev_err(fbdev->dev, "vrfb create ctx failed\n"); return r; } diff --git a/drivers/video/fbdev/omap2/omapfb/omapfb.h b/drivers/video/fbdev/omap2/omapfb/omapfb.h index bcb9ff4a607d..555487d6dbea 100644 --- a/drivers/video/fbdev/omap2/omapfb/omapfb.h +++ b/drivers/video/fbdev/omap2/omapfb/omapfb.h @@ -28,7 +28,6 @@ #endif #include <linux/rwsem.h> -#include <linux/dma-attrs.h> #include <linux/dma-mapping.h> #include <video/omapfb_dss.h> @@ -51,7 +50,7 @@ extern bool omapfb_debug; struct omapfb2_mem_region { int id; - struct dma_attrs attrs; + unsigned long attrs; void *token; dma_addr_t dma_handle; u32 paddr; diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c index 888d5f8322ce..4e7003db12c4 100644 --- a/drivers/virtio/virtio_balloon.c +++ b/drivers/virtio/virtio_balloon.c @@ -207,6 +207,8 @@ static unsigned leak_balloon(struct virtio_balloon *vb, size_t num) num = min(num, ARRAY_SIZE(vb->pfns)); mutex_lock(&vb->balloon_lock); + /* We can't release more pages than taken */ + num = min(num, (size_t)vb->num_pages); for (vb->num_pfns = 0; vb->num_pfns < num; vb->num_pfns += VIRTIO_BALLOON_PAGES_PER_PAGE) { page = balloon_page_dequeue(vb_dev_info); diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index ca6bfddaacad..e383ecdaca59 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -117,7 +117,10 @@ struct vring_virtqueue { #define to_vvq(_vq) container_of(_vq, struct vring_virtqueue, vq) /* - * The interaction between virtio and a possible IOMMU is a mess. + * Modern virtio devices have feature bits to specify whether they need a + * quirk and bypass the IOMMU. If not there, just use the DMA API. + * + * If there, the interaction between virtio and DMA API is messy. * * On most systems with virtio, physical addresses match bus addresses, * and it doesn't particularly matter whether we use the DMA API. @@ -133,10 +136,18 @@ struct vring_virtqueue { * * For the time being, we preserve historic behavior and bypass the DMA * API. + * + * TODO: install a per-device DMA ops structure that does the right thing + * taking into account all the above quirks, and use the DMA API + * unconditionally on data path. */ static bool vring_use_dma_api(struct virtio_device *vdev) { + if (!virtio_has_iommu_quirk(vdev)) + return true; + + /* Otherwise, we are left to guess. */ /* * In theory, it's possible to have a buggy QEMU-supposed * emulated Q35 IOMMU and Xen enabled at the same time. On @@ -316,6 +327,8 @@ static inline int virtqueue_add(struct virtqueue *_vq, * host should service the ring ASAP. */ if (out_sgs) vq->notify(&vq->vq); + if (indirect) + kfree(desc); END_USE(vq); return -ENOSPC; } @@ -415,6 +428,7 @@ unmap_release: if (indirect) kfree(desc); + END_USE(vq); return -EIO; } @@ -1099,6 +1113,8 @@ void vring_transport_features(struct virtio_device *vdev) break; case VIRTIO_F_VERSION_1: break; + case VIRTIO_F_IOMMU_PLATFORM: + break; default: /* We don't understand this bit. */ __virtio_clear_bit(vdev, i); diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c index 7399782c0998..87e6035c9e81 100644 --- a/drivers/xen/swiotlb-xen.c +++ b/drivers/xen/swiotlb-xen.c @@ -294,7 +294,7 @@ error: void * xen_swiotlb_alloc_coherent(struct device *hwdev, size_t size, dma_addr_t *dma_handle, gfp_t flags, - struct dma_attrs *attrs) + unsigned long attrs) { void *ret; int order = get_order(size); @@ -346,7 +346,7 @@ EXPORT_SYMBOL_GPL(xen_swiotlb_alloc_coherent); void xen_swiotlb_free_coherent(struct device *hwdev, size_t size, void *vaddr, - dma_addr_t dev_addr, struct dma_attrs *attrs) + dma_addr_t dev_addr, unsigned long attrs) { int order = get_order(size); phys_addr_t phys; @@ -378,7 +378,7 @@ EXPORT_SYMBOL_GPL(xen_swiotlb_free_coherent); dma_addr_t xen_swiotlb_map_page(struct device *dev, struct page *page, unsigned long offset, size_t size, enum dma_data_direction dir, - struct dma_attrs *attrs) + unsigned long attrs) { phys_addr_t map, phys = page_to_phys(page) + offset; dma_addr_t dev_addr = xen_phys_to_bus(phys); @@ -434,7 +434,7 @@ EXPORT_SYMBOL_GPL(xen_swiotlb_map_page); */ static void xen_unmap_single(struct device *hwdev, dma_addr_t dev_addr, size_t size, enum dma_data_direction dir, - struct dma_attrs *attrs) + unsigned long attrs) { phys_addr_t paddr = xen_bus_to_phys(dev_addr); @@ -462,7 +462,7 @@ static void xen_unmap_single(struct device *hwdev, dma_addr_t dev_addr, void xen_swiotlb_unmap_page(struct device *hwdev, dma_addr_t dev_addr, size_t size, enum dma_data_direction dir, - struct dma_attrs *attrs) + unsigned long attrs) { xen_unmap_single(hwdev, dev_addr, size, dir, attrs); } @@ -538,7 +538,7 @@ EXPORT_SYMBOL_GPL(xen_swiotlb_sync_single_for_device); int xen_swiotlb_map_sg_attrs(struct device *hwdev, struct scatterlist *sgl, int nelems, enum dma_data_direction dir, - struct dma_attrs *attrs) + unsigned long attrs) { struct scatterlist *sg; int i; @@ -599,7 +599,7 @@ EXPORT_SYMBOL_GPL(xen_swiotlb_map_sg_attrs); void xen_swiotlb_unmap_sg_attrs(struct device *hwdev, struct scatterlist *sgl, int nelems, enum dma_data_direction dir, - struct dma_attrs *attrs) + unsigned long attrs) { struct scatterlist *sg; int i; |