diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2022-03-21 17:16:01 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2022-03-21 17:16:01 -0700 |
commit | 69d1dea852b54eecd8ad2ec92a7fd371e9aec4bd (patch) | |
tree | 4ab2be8e9d5d7eccd68e0fbf3aeea242a0e56613 | |
parent | 616355cc818c6ddadc393fdfd4491f94458cb715 (diff) | |
parent | ae53aea611b7a532a52ba966281a8b7a8cfd008a (diff) |
Merge tag 'for-5.18/drivers-2022-03-18' of git://git.kernel.dk/linux-block
Pull block driver updates from Jens Axboe:
- NVMe updates via Christoph:
- add vectored-io support for user-passthrough (Kanchan Joshi)
- add verbose error logging (Alan Adamson)
- support buffered I/O on block devices in nvmet (Chaitanya
Kulkarni)
- central discovery controller support (Martin Belanger)
- fix and extended the globally unique idenfier validation
(Christoph)
- move away from the deprecated IDA APIs (Sagi Grimberg)
- misc code cleanup (Keith Busch, Max Gurtovoy, Qinghua Jin,
Chaitanya Kulkarni)
- add lockdep annotations for in-kernel sockets (Chris Leech)
- use vmalloc for ANA log buffer (Hannes Reinecke)
- kerneldoc fixes (Chaitanya Kulkarni)
- cleanups (Guoqing Jiang, Chaitanya Kulkarni, Christoph)
- warn about shared namespaces without multipathing (Christoph)
- MD updates via Song with a set of cleanups (Christoph, Mariusz, Paul,
Erik, Dirk)
- loop cleanups and queue depth configuration (Chaitanya)
- null_blk cleanups and fixes (Chaitanya)
- Use descriptive init/exit names in virtio_blk (Randy)
- Use bvec_kmap_local() in drivers (Christoph)
- bcache fixes (Mingzhe)
- xen blk-front persistent grant speedups (Juergen)
- rnbd fix and cleanup (Gioh)
- Misc fixes (Christophe, Colin)
* tag 'for-5.18/drivers-2022-03-18' of git://git.kernel.dk/linux-block: (76 commits)
virtio_blk: eliminate anonymous module_init & module_exit
nvme: warn about shared namespaces without CONFIG_NVME_MULTIPATH
nvme: remove nvme_alloc_request and nvme_alloc_request_qid
nvme: cleanup how disk->disk_name is assigned
nvmet: move the call to nvmet_ns_changed out of nvmet_ns_revalidate
nvmet: use snprintf() with PAGE_SIZE in configfs
nvmet: don't fold lines
nvmet-rdma: fix kernel-doc warning for nvmet_rdma_device_removal
nvmet-fc: fix kernel-doc warning for nvmet_fc_unregister_targetport
nvmet-fc: fix kernel-doc warning for nvmet_fc_register_targetport
nvme-tcp: lockdep: annotate in-kernel sockets
nvme-tcp: don't fold the line
nvme-tcp: don't initialize ret variable
nvme-multipath: call bio_io_error in nvme_ns_head_submit_bio
nvme-multipath: use vmalloc for ANA log buffer
xen/blkfront: speed up purge_persistent_grants()
raid5: initialize the stripe_head embeeded bios as needed
raid5-cache: statically allocate the recovery ra bio
raid5-cache: fully initialize flush_bio when needed
raid5-ppl: fully initialize the bio in ppl_new_iounit
...
58 files changed, 819 insertions, 437 deletions
diff --git a/arch/xtensa/platforms/iss/simdisk.c b/arch/xtensa/platforms/iss/simdisk.c index 8eb6ad1a3a1d..0f0e0724397f 100644 --- a/arch/xtensa/platforms/iss/simdisk.c +++ b/arch/xtensa/platforms/iss/simdisk.c @@ -108,13 +108,13 @@ static void simdisk_submit_bio(struct bio *bio) sector_t sector = bio->bi_iter.bi_sector; bio_for_each_segment(bvec, bio, iter) { - char *buffer = kmap_atomic(bvec.bv_page) + bvec.bv_offset; + char *buffer = bvec_kmap_local(&bvec); unsigned len = bvec.bv_len >> SECTOR_SHIFT; simdisk_transfer(dev, sector, len, buffer, bio_data_dir(bio) == WRITE); sector += len; - kunmap_atomic(buffer); + kunmap_local(buffer); } bio_endio(bio); diff --git a/drivers/block/aoe/aoecmd.c b/drivers/block/aoe/aoecmd.c index cc11f89a0928..384073ef2323 100644 --- a/drivers/block/aoe/aoecmd.c +++ b/drivers/block/aoe/aoecmd.c @@ -1018,9 +1018,9 @@ bvcpy(struct sk_buff *skb, struct bio *bio, struct bvec_iter iter, long cnt) iter.bi_size = cnt; __bio_for_each_segment(bv, bio, iter, iter) { - char *p = kmap_atomic(bv.bv_page) + bv.bv_offset; + char *p = bvec_kmap_local(&bv); skb_copy_bits(skb, soff, p, bv.bv_len); - kunmap_atomic(p); + kunmap_local(p); soff += bv.bv_len; } } diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 04e3ec12d8b4..fa00cf2ea952 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -2017,10 +2017,10 @@ static int recv_dless_read(struct drbd_peer_device *peer_device, struct drbd_req D_ASSERT(peer_device->device, sector == bio->bi_iter.bi_sector); bio_for_each_segment(bvec, bio, iter) { - void *mapped = kmap(bvec.bv_page) + bvec.bv_offset; + void *mapped = bvec_kmap_local(&bvec); expect = min_t(int, data_size, bvec.bv_len); err = drbd_recv_all_warn(peer_device->connection, mapped, expect); - kunmap(bvec.bv_page); + kunmap_local(mapped); if (err) return err; data_size -= expect; diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index a5e04b38006b..1b48c8172a07 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -326,9 +326,9 @@ void drbd_csum_bio(struct crypto_shash *tfm, struct bio *bio, void *digest) bio_for_each_segment(bvec, bio, iter) { u8 *src; - src = kmap_atomic(bvec.bv_page); - crypto_shash_update(desc, src + bvec.bv_offset, bvec.bv_len); - kunmap_atomic(src); + src = bvec_kmap_local(&bvec); + crypto_shash_update(desc, src, bvec.bv_len); + kunmap_local(src); /* REQ_OP_WRITE_SAME has only one segment, * checksum the payload only once. */ diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c index 19c2d0327e15..8c647532e3ce 100644 --- a/drivers/block/floppy.c +++ b/drivers/block/floppy.c @@ -2485,11 +2485,9 @@ static void copy_buffer(int ssize, int max_sector, int max_sector_2) } if (CT(raw_cmd->cmd[COMMAND]) == FD_READ) - memcpy_to_page(bv.bv_page, bv.bv_offset, dma_buffer, - size); + memcpy_to_bvec(&bv, dma_buffer); else - memcpy_from_page(dma_buffer, bv.bv_page, bv.bv_offset, - size); + memcpy_from_bvec(dma_buffer, &bv); remaining -= size; dma_buffer += size; diff --git a/drivers/block/loop.c b/drivers/block/loop.c index 19fe19eaa50e..3e636a75c83a 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -86,6 +86,7 @@ #include <linux/uaccess.h> #define LOOP_IDLE_WORKER_TIMEOUT (60 * HZ) +#define LOOP_DEFAULT_HW_Q_DEPTH (128) static DEFINE_IDR(loop_index_idr); static DEFINE_MUTEX(loop_ctl_mutex); @@ -309,12 +310,11 @@ static int lo_fallocate(struct loop_device *lo, struct request *rq, loff_t pos, * a.k.a. discard/zerorange. */ struct file *file = lo->lo_backing_file; - struct request_queue *q = lo->lo_queue; int ret; mode |= FALLOC_FL_KEEP_SIZE; - if (!blk_queue_discard(q)) { + if (!blk_queue_discard(lo->lo_queue)) { ret = -EOPNOTSUPP; goto out; } @@ -328,8 +328,7 @@ static int lo_fallocate(struct loop_device *lo, struct request *rq, loff_t pos, static int lo_req_flush(struct loop_device *lo, struct request *rq) { - struct file *file = lo->lo_backing_file; - int ret = vfs_fsync(file, 0); + int ret = vfs_fsync(lo->lo_backing_file, 0); if (unlikely(ret && ret != -EINVAL)) ret = -EIO; @@ -681,33 +680,33 @@ static ssize_t loop_attr_backing_file_show(struct loop_device *lo, char *buf) static ssize_t loop_attr_offset_show(struct loop_device *lo, char *buf) { - return sprintf(buf, "%llu\n", (unsigned long long)lo->lo_offset); + return sysfs_emit(buf, "%llu\n", (unsigned long long)lo->lo_offset); } static ssize_t loop_attr_sizelimit_show(struct loop_device *lo, char *buf) { - return sprintf(buf, "%llu\n", (unsigned long long)lo->lo_sizelimit); + return sysfs_emit(buf, "%llu\n", (unsigned long long)lo->lo_sizelimit); } static ssize_t loop_attr_autoclear_show(struct loop_device *lo, char *buf) { int autoclear = (lo->lo_flags & LO_FLAGS_AUTOCLEAR); - return sprintf(buf, "%s\n", autoclear ? "1" : "0"); + return sysfs_emit(buf, "%s\n", autoclear ? "1" : "0"); } static ssize_t loop_attr_partscan_show(struct loop_device *lo, char *buf) { int partscan = (lo->lo_flags & LO_FLAGS_PARTSCAN); - return sprintf(buf, "%s\n", partscan ? "1" : "0"); + return sysfs_emit(buf, "%s\n", partscan ? "1" : "0"); } static ssize_t loop_attr_dio_show(struct loop_device *lo, char *buf) { int dio = (lo->lo_flags & LO_FLAGS_DIRECT_IO); - return sprintf(buf, "%s\n", dio ? "1" : "0"); + return sysfs_emit(buf, "%s\n", dio ? "1" : "0"); } LOOP_ATTR_RO(backing_file); @@ -1261,7 +1260,7 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info) if (size_changed && lo->lo_device->bd_inode->i_mapping->nrpages) { /* If any pages were dirtied after invalidate_bdev(), try again */ err = -EAGAIN; - pr_warn("%s: loop%d (%s) has still dirty pages (nrpages=%lu)\n", + pr_warn("%s: loop%d (%s) still has dirty pages (nrpages=%lu)\n", __func__, lo->lo_number, lo->lo_file_name, lo->lo_device->bd_inode->i_mapping->nrpages); goto out_unfreeze; @@ -1481,7 +1480,7 @@ static int loop_set_block_size(struct loop_device *lo, unsigned long arg) /* invalidate_bdev should have truncated all the pages */ if (lo->lo_device->bd_inode->i_mapping->nrpages) { err = -EAGAIN; - pr_warn("%s: loop%d (%s) has still dirty pages (nrpages=%lu)\n", + pr_warn("%s: loop%d (%s) still has dirty pages (nrpages=%lu)\n", __func__, lo->lo_number, lo->lo_file_name, lo->lo_device->bd_inode->i_mapping->nrpages); goto out_unfreeze; @@ -1786,6 +1785,24 @@ module_param(max_loop, int, 0444); MODULE_PARM_DESC(max_loop, "Maximum number of loop devices"); module_param(max_part, int, 0444); MODULE_PARM_DESC(max_part, "Maximum number of partitions per loop device"); + +static int hw_queue_depth = LOOP_DEFAULT_HW_Q_DEPTH; + +static int loop_set_hw_queue_depth(const char *s, const struct kernel_param *p) +{ + int ret = kstrtoint(s, 10, &hw_queue_depth); + + return (ret || (hw_queue_depth < 1)) ? -EINVAL : 0; +} + +static const struct kernel_param_ops loop_hw_qdepth_param_ops = { + .set = loop_set_hw_queue_depth, + .get = param_get_int, +}; + +device_param_cb(hw_queue_depth, &loop_hw_qdepth_param_ops, &hw_queue_depth, 0444); +MODULE_PARM_DESC(hw_queue_depth, "Queue depth for each hardware queue. Default: 128"); + MODULE_LICENSE("GPL"); MODULE_ALIAS_BLOCKDEV_MAJOR(LOOP_MAJOR); @@ -1980,7 +1997,7 @@ static int loop_add(int i) lo->tag_set.ops = &loop_mq_ops; lo->tag_set.nr_hw_queues = 1; - lo->tag_set.queue_depth = 128; + lo->tag_set.queue_depth = hw_queue_depth; lo->tag_set.numa_node = NUMA_NO_NODE; lo->tag_set.cmd_size = sizeof(struct loop_cmd); lo->tag_set.flags = BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_STACKING | @@ -2074,6 +2091,7 @@ static void loop_remove(struct loop_device *lo) del_gendisk(lo->lo_disk); blk_cleanup_disk(lo->lo_disk); blk_mq_free_tag_set(&lo->tag_set); + mutex_lock(&loop_ctl_mutex); idr_remove(&loop_index_idr, lo->lo_number); mutex_unlock(&loop_ctl_mutex); diff --git a/drivers/block/null_blk/main.c b/drivers/block/null_blk/main.c index 13004beb48ca..05b1120e6623 100644 --- a/drivers/block/null_blk/main.c +++ b/drivers/block/null_blk/main.c @@ -431,9 +431,10 @@ static ssize_t nullb_device_power_store(struct config_item *item, if (!dev->power && newp) { if (test_and_set_bit(NULLB_DEV_FL_UP, &dev->flags)) return count; - if (null_add_dev(dev)) { + ret = null_add_dev(dev); + if (ret) { clear_bit(NULLB_DEV_FL_UP, &dev->flags); - return -ENOMEM; + return ret; } set_bit(NULLB_DEV_FL_CONFIGURED, &dev->flags); @@ -719,26 +720,25 @@ static struct nullb_cmd *__alloc_cmd(struct nullb_queue *nq) return NULL; } -static struct nullb_cmd *alloc_cmd(struct nullb_queue *nq, int can_wait) +static struct nullb_cmd *alloc_cmd(struct nullb_queue *nq, struct bio *bio) { struct nullb_cmd *cmd; DEFINE_WAIT(wait); - cmd = __alloc_cmd(nq); - if (cmd || !can_wait) - return cmd; - do { - prepare_to_wait(&nq->wait, &wait, TASK_UNINTERRUPTIBLE); + /* + * This avoids multiple return statements, multiple calls to + * __alloc_cmd() and a fast path call to prepare_to_wait(). + */ cmd = __alloc_cmd(nq); - if (cmd) - break; - + if (cmd) { + cmd->bio = bio; + return cmd; + } + prepare_to_wait(&nq->wait, &wait, TASK_UNINTERRUPTIBLE); io_schedule(); + finish_wait(&nq->wait, &wait); } while (1); - - finish_wait(&nq->wait, &wait); - return cmd; } static void end_cmd(struct nullb_cmd *cmd) @@ -777,24 +777,22 @@ static void null_complete_rq(struct request *rq) end_cmd(blk_mq_rq_to_pdu(rq)); } -static struct nullb_page *null_alloc_page(gfp_t gfp_flags) +static struct nullb_page *null_alloc_page(void) { struct nullb_page *t_page; - t_page = kmalloc(sizeof(struct nullb_page), gfp_flags); + t_page = kmalloc(sizeof(struct nullb_page), GFP_NOIO); if (!t_page) - goto out; + return NULL; - t_page->page = alloc_pages(gfp_flags, 0); - if (!t_page->page) - goto out_freepage; + t_page->page = alloc_pages(GFP_NOIO, 0); + if (!t_page->page) { + kfree(t_page); + return NULL; + } memset(t_page->bitmap, 0, sizeof(t_page->bitmap)); return t_page; -out_freepage: - kfree(t_page); -out: - return NULL; } static void null_free_page(struct nullb_page *t_page) @@ -932,7 +930,7 @@ static struct nullb_page *null_insert_page(struct nullb *nullb, spin_unlock_irq(&nullb->lock); - t_page = null_alloc_page(GFP_NOIO); + t_page = null_alloc_page(); if (!t_page) goto out_lock; @@ -1476,12 +1474,8 @@ static void null_submit_bio(struct bio *bio) sector_t nr_sectors = bio_sectors(bio); struct nullb *nullb = bio->bi_bdev->bd_disk->private_data; struct nullb_queue *nq = nullb_to_queue(nullb); - struct nullb_cmd *cmd; - - cmd = alloc_cmd(nq, 1); - cmd->bio = bio; - null_handle_cmd(cmd, sector, nr_sectors, bio_op(bio)); + null_handle_cmd(alloc_cmd(nq, bio), sector, nr_sectors, bio_op(bio)); } static bool should_timeout_request(struct request *rq) diff --git a/drivers/block/rnbd/rnbd-clt.c b/drivers/block/rnbd/rnbd-clt.c index 58304f978e10..2f378684b735 100644 --- a/drivers/block/rnbd/rnbd-clt.c +++ b/drivers/block/rnbd/rnbd-clt.c @@ -23,7 +23,6 @@ MODULE_LICENSE("GPL"); static int rnbd_client_major; static DEFINE_IDA(index_ida); -static DEFINE_MUTEX(ida_lock); static DEFINE_MUTEX(sess_lock); static LIST_HEAD(sess_list); @@ -55,9 +54,7 @@ static void rnbd_clt_put_dev(struct rnbd_clt_dev *dev) if (!refcount_dec_and_test(&dev->refcount)) return; - mutex_lock(&ida_lock); - ida_simple_remove(&index_ida, dev->clt_device_id); - mutex_unlock(&ida_lock); + ida_free(&index_ida, dev->clt_device_id); kfree(dev->hw_queues); kfree(dev->pathname); rnbd_clt_put_sess(dev->sess); @@ -87,7 +84,6 @@ static int rnbd_clt_set_dev_attr(struct rnbd_clt_dev *dev, dev->discard_granularity = le32_to_cpu(rsp->discard_granularity); dev->discard_alignment = le32_to_cpu(rsp->discard_alignment); dev->secure_discard = le16_to_cpu(rsp->secure_discard); - dev->rotational = rsp->rotational; dev->wc = !!(rsp->cache_policy & RNBD_WRITEBACK); dev->fua = !!(rsp->cache_policy & RNBD_FUA); @@ -1262,9 +1258,9 @@ find_and_get_or_create_sess(const char *sessname, struct rtrs_clt_ops rtrs_ops; sess = find_or_create_sess(sessname, &first); - if (sess == ERR_PTR(-ENOMEM)) + if (sess == ERR_PTR(-ENOMEM)) { return ERR_PTR(-ENOMEM); - else if ((nr_poll_queues && !first) || (!nr_poll_queues && sess->nr_poll_queues)) { + } else if ((nr_poll_queues && !first) || (!nr_poll_queues && sess->nr_poll_queues)) { /* * A device MUST have its own session to use the polling-mode. * It must fail to map new device with the same session. @@ -1410,8 +1406,10 @@ static int rnbd_clt_setup_gen_disk(struct rnbd_clt_dev *dev, int idx) dev->read_only = false; } - if (!dev->rotational) - blk_queue_flag_set(QUEUE_FLAG_NONROT, dev->queue); + /* + * Network device does not need rotational + */ + blk_queue_flag_set(QUEUE_FLAG_NONROT, dev->queue); err = add_disk(dev->gd); if (err) blk_cleanup_disk(dev->gd); @@ -1459,10 +1457,8 @@ static struct rnbd_clt_dev *init_dev(struct rnbd_clt_session *sess, goto out_alloc; } - mutex_lock(&ida_lock); - ret = ida_simple_get(&index_ida, 0, 1 << (MINORBITS - RNBD_PART_BITS), - GFP_KERNEL); - mutex_unlock(&ida_lock); + ret = ida_alloc_max(&index_ida, 1 << (MINORBITS - RNBD_PART_BITS), + GFP_KERNEL); if (ret < 0) { pr_err("Failed to initialize device '%s' from session %s, allocating idr failed, err: %d\n", pathname, sess->sessname, ret); @@ -1610,13 +1606,13 @@ struct rnbd_clt_dev *rnbd_clt_map_device(const char *sessname, } rnbd_clt_info(dev, - "map_device: Device mapped as %s (nsectors: %zu, logical_block_size: %d, physical_block_size: %d, max_write_same_sectors: %d, max_discard_sectors: %d, discard_granularity: %d, discard_alignment: %d, secure_discard: %d, max_segments: %d, max_hw_sectors: %d, rotational: %d, wc: %d, fua: %d)\n", + "map_device: Device mapped as %s (nsectors: %zu, logical_block_size: %d, physical_block_size: %d, max_write_same_sectors: %d, max_discard_sectors: %d, discard_granularity: %d, discard_alignment: %d, secure_discard: %d, max_segments: %d, max_hw_sectors: %d, wc: %d, fua: %d)\n", dev->gd->disk_name, dev->nsectors, dev->logical_block_size, dev->physical_block_size, dev->max_write_same_sectors, dev->max_discard_sectors, dev->discard_granularity, dev->discard_alignment, dev->secure_discard, dev->max_segments, - dev->max_hw_sectors, dev->rotational, dev->wc, dev->fua); + dev->max_hw_sectors, dev->wc, dev->fua); mutex_unlock(&dev->lock); rnbd_clt_put_sess(sess); diff --git a/drivers/block/rnbd/rnbd-clt.h b/drivers/block/rnbd/rnbd-clt.h index 0c2cae7f39b9..62bf7c3fa63c 100644 --- a/drivers/block/rnbd/rnbd-clt.h +++ b/drivers/block/rnbd/rnbd-clt.h @@ -118,7 +118,6 @@ struct rnbd_clt_dev { enum rnbd_access_mode access_mode; u32 nr_poll_queues; bool read_only; - bool rotational; bool wc; bool fua; u32 max_hw_sectors; diff --git a/drivers/block/rnbd/rnbd-proto.h b/drivers/block/rnbd/rnbd-proto.h index de5d5a8df81d..c4a68b3a1cbe 100644 --- a/drivers/block/rnbd/rnbd-proto.h +++ b/drivers/block/rnbd/rnbd-proto.h @@ -128,7 +128,7 @@ enum rnbd_cache_policy { * @logical_block_size: logical block size device supports in bytes * @max_segments: max segments hardware support in one transfer * @secure_discard: supports secure discard - * @rotation: is a rotational disc? + * @obsolete_rotational: obsolete, not in used. * @cache_policy: support write-back caching or FUA? */ struct rnbd_msg_open_rsp { @@ -144,7 +144,7 @@ struct rnbd_msg_open_rsp { __le16 logical_block_size; __le16 max_segments; __le16 secure_discard; - u8 rotational; + u8 obsolete_rotational; u8 cache_policy; u8 reserved[10]; }; diff --git a/drivers/block/rnbd/rnbd-srv.c b/drivers/block/rnbd/rnbd-srv.c index 132e950685d5..6499efae5c43 100644 --- a/drivers/block/rnbd/rnbd-srv.c +++ b/drivers/block/rnbd/rnbd-srv.c @@ -558,7 +558,6 @@ static void rnbd_srv_fill_msg_open_rsp(struct rnbd_msg_open_rsp *rsp, cpu_to_le32(rnbd_dev_get_discard_alignment(rnbd_dev)); rsp->secure_discard = cpu_to_le16(rnbd_dev_get_secure_discard(rnbd_dev)); - rsp->rotational = !blk_queue_nonrot(q); rsp->cache_policy = 0; if (test_bit(QUEUE_FLAG_WC, &q->queue_flags)) rsp->cache_policy |= RNBD_WRITEBACK; diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index 35399a9eae0e..a8bcf3f664af 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -1020,7 +1020,7 @@ static struct virtio_driver virtio_blk = { #endif }; -static int __init init(void) +static int __init virtio_blk_init(void) { int error; @@ -1046,14 +1046,14 @@ out_destroy_workqueue: return error; } -static void __exit fini(void) +static void __exit virtio_blk_fini(void) { unregister_virtio_driver(&virtio_blk); unregister_blkdev(major, "virtblk"); destroy_workqueue(virtblk_wq); } -module_init(init); -module_exit(fini); +module_init(virtio_blk_init); +module_exit(virtio_blk_fini); MODULE_DEVICE_TABLE(virtio, id_table); MODULE_DESCRIPTION("Virtio block driver"); diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index 03b5fb341e58..85fc550508cc 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -2533,6 +2533,7 @@ static void purge_persistent_grants(struct blkfront_info *info) for_each_rinfo(info, rinfo, i) { struct grant *gnt_list_entry, *tmp; + LIST_HEAD(grants); spin_lock_irqsave(&rinfo->ring_lock, flags); @@ -2550,9 +2551,11 @@ static void purge_persistent_grants(struct blkfront_info *info) list_del(&gnt_list_entry->node); rinfo->persistent_gnts_c--; gnt_list_entry->gref = GRANT_INVALID_REF; - list_add_tail(&gnt_list_entry->node, &rinfo->grants); + list_add_tail(&gnt_list_entry->node, &grants); } + list_splice_tail(&grants, &rinfo->grants); + spin_unlock_irqrestore(&rinfo->ring_lock, flags); } } diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index a3a5e1e71326..e9474b02012d 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -1331,12 +1331,10 @@ static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec, goto out; if (is_partial_io(bvec)) { - void *dst = kmap_atomic(bvec->bv_page); void *src = kmap_atomic(page); - memcpy(dst + bvec->bv_offset, src + offset, bvec->bv_len); + memcpy_to_bvec(bvec, src + offset); kunmap_atomic(src); - kunmap_atomic(dst); } out: if (is_partial_io(bvec)) @@ -1467,7 +1465,6 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, { int ret; struct page *page = NULL; - void *src; struct bio_vec vec; vec = *bvec; @@ -1485,11 +1482,9 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, if (ret) goto out; - src = kmap_atomic(bvec->bv_page); dst = kmap_atomic(page); - memcpy(dst + offset, src + bvec->bv_offset, bvec->bv_len); + memcpy_from_bvec(dst + offset, bvec); kunmap_atomic(dst); - kunmap_atomic(src); vec.bv_page = page; vec.bv_len = PAGE_SIZE; diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c index 88c573eeb598..ad9f16689419 100644 --- a/drivers/md/bcache/btree.c +++ b/drivers/md/bcache/btree.c @@ -2060,9 +2060,11 @@ int bch_btree_check(struct cache_set *c) } } + /* + * Must wait for all threads to stop. + */ wait_event_interruptible(check_state->wait, - atomic_read(&check_state->started) == 0 || - test_bit(CACHE_SET_IO_DISABLE, &c->flags)); + atomic_read(&check_state->started) == 0); for (i = 0; i < check_state->total_threads; i++) { if (check_state->infos[i].result) { diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c index 6869e010475a..fdd0194f84dd 100644 --- a/drivers/md/bcache/request.c +++ b/drivers/md/bcache/request.c @@ -44,10 +44,10 @@ static void bio_csum(struct bio *bio, struct bkey *k) uint64_t csum = 0; bio_for_each_segment(bv, bio, iter) { - void *d = kmap(bv.bv_page) + bv.bv_offset; + void *d = bvec_kmap_local(&bv); csum = crc64_be(csum, d, bv.bv_len); - kunmap(bv.bv_page); + kunmap_local(d); } k->ptr[KEY_PTRS(k)] = csum & (~0ULL >> 1); diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c index d42301e6309d..9ee0005874cd 100644 --- a/drivers/md/bcache/writeback.c +++ b/drivers/md/bcache/writeback.c @@ -585,10 +585,13 @@ void bcache_dev_sectors_dirty_add(struct cache_set *c, unsigned int inode, sectors_dirty = atomic_add_return(s, d->stripe_sectors_dirty + stripe); - if (sectors_dirty == d->stripe_size) - set_bit(stripe, d->full_dirty_stripes); - else - clear_bit(stripe, d->full_dirty_stripes); + if (sectors_dirty == d->stripe_size) { + if (!test_bit(stripe, d->full_dirty_stripes)) + set_bit(stripe, d->full_dirty_stripes); + } else { + if (test_bit(stripe, d->full_dirty_stripes)) + clear_bit(stripe, d->full_dirty_stripes); + } nr_sectors -= s; stripe_offset = 0; @@ -998,9 +1001,11 @@ void bch_sectors_dirty_init(struct bcache_device *d) } } + /* + * Must wait for all threads to stop. + */ wait_event_interruptible(state->wait, - atomic_read(&state->started) == 0 || - test_bit(CACHE_SET_IO_DISABLE, &c->flags)); + atomic_read(&state->started) == 0); out: kfree(state); diff --git a/drivers/md/md.c b/drivers/md/md.c index f210a55af201..309b3af906ad 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -9582,7 +9582,7 @@ static int md_notify_reboot(struct notifier_block *this, * driver, we do want to have a safe RAID driver ... */ if (need_delay) - mdelay(1000*1); + msleep(1000); return NOTIFY_DONE; } diff --git a/drivers/md/raid1-10.c b/drivers/md/raid1-10.c index 83f9a4f3d82e..e61f6cad4e08 100644 --- a/drivers/md/raid1-10.c +++ b/drivers/md/raid1-10.c @@ -28,6 +28,11 @@ struct resync_pages { struct page *pages[RESYNC_PAGES]; }; +struct raid1_plug_cb { + struct blk_plug_cb cb; + struct bio_list pending; +}; + static void rbio_pool_free(void *rbio, void *data) { kfree(rbio); diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 03477e971699..934186724d21 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -824,7 +824,6 @@ static void flush_pending_writes(struct r1conf *conf) struct bio *bio; bio = bio_list_get(&conf->pending_bio_list); - conf->pending_count = 0; spin_unlock_irq(&conf->device_lock); /* @@ -1167,12 +1166,6 @@ free_pages: bio_put(behind_bio); } -struct raid1_plug_cb { - struct blk_plug_cb cb; - struct bio_list pending; - int pending_cnt; -}; - static void raid1_unplug(struct blk_plug_cb *cb, bool from_schedule) { struct raid1_plug_cb *plug = container_of(cb, struct raid1_plug_cb, @@ -1184,7 +1177,6 @@ static void raid1_unplug(struct blk_plug_cb *cb, bool from_schedule) if (from_schedule || current->bio_list) { spin_lock_irq(&conf->device_lock); bio_list_merge(&conf->pending_bio_list, &plug->pending); - conf->pending_count += plug->pending_cnt; spin_unlock_irq(&conf->device_lock); wake_up(&conf->wait_barrier); md_wakeup_thread(mddev->thread); @@ -1588,11 +1580,9 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio, plug = NULL; if (plug) { bio_list_add(&plug->pending, mbio); - plug->pending_cnt++; } else { spin_lock_irqsave(&conf->device_lock, flags); bio_list_add(&conf->pending_bio_list, mbio); - conf->pending_count++; spin_unlock_irqrestore(&conf->device_lock, flags); md_wakeup_thread(mddev->thread); } @@ -3057,7 +3047,6 @@ static struct r1conf *setup_conf(struct mddev *mddev) init_waitqueue_head(&conf->wait_barrier); bio_list_init(&conf->pending_bio_list); - conf->pending_count = 0; conf->recovery_disabled = mddev->recovery_disabled - 1; err = -EIO; diff --git a/drivers/md/raid1.h b/drivers/md/raid1.h index ccf10e59b116..ebb6788820e7 100644 --- a/drivers/md/raid1.h +++ b/drivers/md/raid1.h @@ -87,7 +87,6 @@ struct r1conf { /* queue pending writes to be submitted on unplug */ struct bio_list pending_bio_list; - int pending_count; /* for use when syncing mirrors: * We don't allow both normal IO and resync/recovery IO at diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 5dd2e17e1d0e..b369ebb965a9 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -861,7 +861,6 @@ static void flush_pending_writes(struct r10conf *conf) struct bio *bio; bio = bio_list_get(&conf->pending_bio_list); - conf->pending_count = 0; spin_unlock_irq(&conf->device_lock); /* @@ -1054,16 +1053,9 @@ static sector_t choose_data_offset(struct r10bio *r10_bio, return rdev->new_data_offset; } -struct raid10_plug_cb { - struct blk_plug_cb cb; - struct bio_list pending; - int pending_cnt; -}; - static void raid10_unplug(struct blk_plug_cb *cb, bool from_schedule) { - struct raid10_plug_cb *plug = container_of(cb, struct raid10_plug_cb, - cb); + struct raid1_plug_cb *plug = container_of(cb, struct raid1_plug_cb, cb); struct mddev *mddev = plug->cb.data; struct r10conf *conf = mddev->private; struct bio *bio; @@ -1071,7 +1063,6 @@ static void raid10_unplug(struct blk_plug_cb *cb, bool from_schedule) if (from_schedule || current->bio_list) { spin_lock_irq(&conf->device_lock); bio_list_merge(&conf->pending_bio_list, &plug->pending); - conf->pending_count += plug->pending_cnt; spin_unlock_irq(&conf->device_lock); wake_up(&conf->wait_barrier); md_wakeup_thread(mddev->thread); @@ -1238,7 +1229,7 @@ static void raid10_write_one_disk(struct mddev *mddev, struct r10bio *r10_bio, const unsigned long do_fua = (bio->bi_opf & REQ_FUA); unsigned long flags; struct blk_plug_cb *cb; - struct raid10_plug_cb *plug = NULL; + struct raid1_plug_cb *plug = NULL; struct r10conf *conf = mddev->private; struct md_rdev *rdev; int devnum = r10_bio->devs[n_copy].devnum; @@ -1280,16 +1271,14 @@ static void raid10_write_one_disk(struct mddev *mddev, struct r10bio *r10_bio, cb = blk_check_plugged(raid10_unplug, mddev, sizeof(*plug)); if (cb) - plug = container_of(cb, struct raid10_plug_cb, cb); + plug = container_of(cb, struct raid1_plug_cb, cb); else plug = NULL; if (plug) { bio_list_add(&plug->pending, mbio); - plug->pending_cnt++; } else { spin_lock_irqsave(&conf->device_lock, flags); bio_list_add(&conf->pending_bio_list, mbio); - conf->pending_count++; spin_unlock_irqrestore(&conf->device_lock, flags); md_wakeup_thread(mddev->thread); } diff --git a/drivers/md/raid10.h b/drivers/md/raid10.h index c34bb196790e..5c0804d8bb1f 100644 --- a/drivers/md/raid10.h +++ b/drivers/md/raid10.h @@ -75,7 +75,6 @@ struct r10conf { /* queue pending writes and submit them on unplug */ struct bio_list pending_bio_list; - int pending_count; spinlock_t resync_lock; atomic_t nr_pending; diff --git a/drivers/md/raid5-cache.c b/drivers/md/raid5-cache.c index 86e2bb89d9c7..a7d50ff9020a 100644 --- a/drivers/md/raid5-cache.c +++ b/drivers/md/raid5-cache.c @@ -1266,6 +1266,8 @@ static void r5l_log_flush_endio(struct bio *bio) r5l_io_run_stripes(io); list_splice_tail_init(&log->flushing_ios, &log->finished_ios); spin_unlock_irqrestore(&log->io_list_lock, flags); + + bio_uninit(bio); } /* @@ -1301,7 +1303,7 @@ void r5l_flush_stripe_to_raid(struct r5l_log *log) if (!do_flush) return; - bio_reset(&log->flush_bio, log->rdev->bdev, + bio_init(&log->flush_bio, log->rdev->bdev, NULL, 0, REQ_OP_WRITE | REQ_PREFLUSH); log->flush_bio.bi_end_io = r5l_log_flush_endio; submit_bio(&log->flush_bio); @@ -1621,10 +1623,10 @@ struct r5l_recovery_ctx { * just copy data from the pool. */ struct page *ra_pool[R5L_RECOVERY_PAGE_POOL_SIZE]; + struct bio_vec ra_bvec[R5L_RECOVERY_PAGE_POOL_SIZE]; sector_t pool_offset; /* offset of first page in the pool */ int total_pages; /* total allocated pages */ int valid_pages; /* pages with valid data */ - struct bio *ra_bio; /* bio to do the read ahead */ }; static int r5l_recovery_allocate_ra_pool(struct r5l_log *log, @@ -1632,11 +1634,6 @@ static int r5l_recovery_allocate_ra_pool(struct r5l_log *log, { struct page *page; - ctx->ra_bio = bio_alloc_bioset(NULL, BIO_MAX_VECS, 0, GFP_KERNEL, - &log->bs); - if (!ctx->ra_bio) - return -ENOMEM; - ctx->valid_pages = 0; ctx->total_pages = 0; while (ctx->total_pages < R5L_RECOVERY_PAGE_POOL_SIZE) { @@ -1648,10 +1645,8 @@ static int r5l_recovery_allocate_ra_pool(struct r5l_log *log, ctx->total_pages += 1; } - if (ctx->total_pages == 0) { - bio_put(ctx->ra_bio); + if (ctx->total_pages == 0) return -ENOMEM; - } ctx->pool_offset = 0; return 0; @@ -1664,7 +1659,6 @@ static void r5l_recovery_free_ra_pool(struct r5l_log *log, for (i = 0; i < ctx->total_pages; ++i) put_page(ctx->ra_pool[i]); - bio_put(ctx->ra_bio); } /* @@ -1677,15 +1671,19 @@ static int r5l_recovery_fetch_ra_pool(struct r5l_log *log, struct r5l_recovery_ctx *ctx, sector_t offset) { - bio_reset(ctx->ra_bio, log->rdev->bdev, REQ_OP_READ); - ctx->ra_bio->bi_iter.bi_sector = log->rdev->data_offset + offset; + struct bio bio; + int ret; + + bio_init(&bio, log->rdev->bdev, ctx->ra_bvec, + R5L_RECOVERY_PAGE_POOL_SIZE, REQ_OP_READ); + bio.bi_iter.bi_sector = log->rdev->data_offset + offset; ctx->valid_pages = 0; ctx->pool_offset = offset; while (ctx->valid_pages < ctx->total_pages) { - bio_add_page(ctx->ra_bio, - ctx->ra_pool[ctx->valid_pages], PAGE_SIZE, 0); + __bio_add_page(&bio, ctx->ra_pool[ctx->valid_pages], PAGE_SIZE, + 0); ctx->valid_pages += 1; offset = r5l_ring_add(log, offset, BLOCK_SECTORS); @@ -1694,7 +1692,9 @@ static int r5l_recovery_fetch_ra_pool(struct r5l_log *log, break; } - return submit_bio_wait(ctx->ra_bio); + ret = submit_bio_wait(&bio); + bio_uninit(&bio); + return ret; } /* @@ -3105,7 +3105,6 @@ int r5l_init_log(struct r5conf *conf, struct md_rdev *rdev) INIT_LIST_HEAD(&log->io_end_ios); INIT_LIST_HEAD(&log->flushing_ios); INIT_LIST_HEAD(&log->finished_ios); - bio_init(&log->flush_bio, NULL, NULL, 0, 0); log->io_kc = KMEM_CACHE(r5l_io_unit, 0); if (!log->io_kc) diff --git a/drivers/md/raid5-ppl.c b/drivers/md/raid5-ppl.c index bbb5673104ec..ea4cd8dd4dc3 100644 --- a/drivers/md/raid5-ppl.c +++ b/drivers/md/raid5-ppl.c @@ -250,7 +250,8 @@ static struct ppl_io_unit *ppl_new_iounit(struct ppl_log *log, INIT_LIST_HEAD(&io->stripe_list); atomic_set(&io->pending_stripes, 0); atomic_set(&io->pending_flushes, 0); - bio_init(&io->bio, NULL, io->biovec, PPL_IO_INLINE_BVECS, 0); + bio_init(&io->bio, log->rdev->bdev, io->biovec, PPL_IO_INLINE_BVECS, + REQ_OP_WRITE | REQ_FUA); pplhdr = page_address(io->header_page); clear_page(pplhdr); @@ -463,8 +464,6 @@ static void ppl_submit_iounit(struct ppl_io_unit *io) bio->bi_end_io = ppl_log_endio; - bio->bi_opf = REQ_OP_WRITE | REQ_FUA; - bio_set_dev(bio, log->rdev->bdev); bio->bi_iter.bi_sector = log->next_io_sector; bio_add_page(bio, io->header_page, PAGE_SIZE, 0); bio->bi_write_hint = ppl_conf->write_hint; diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 8891aaba6596..8bd5f06390ea 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -1060,6 +1060,7 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s) int i, disks = sh->disks; struct stripe_head *head_sh = sh; struct bio_list pending_bios = BIO_EMPTY_LIST; + struct r5dev *dev; bool should_defer; might_sleep(); @@ -1094,8 +1095,9 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s) op_flags |= REQ_SYNC; again: - bi = &sh->dev[i].req; - rbi = &sh->dev[i].rreq; /* For writing to replacement */ + dev = &sh->dev[i]; + bi = &dev->req; + rbi = &dev->rreq; /* For writing to replacement */ rcu_read_lock(); rrdev = rcu_dereference(conf->disks[i].replacement); @@ -1171,8 +1173,7 @@ again: set_bit(STRIPE_IO_STARTED, &sh->state); - bio_set_dev(bi, rdev->bdev); - bio_set_op_attrs(bi, op, op_flags); + bio_init(bi, rdev->bdev, &dev->vec, 1, op | op_flags); bi->bi_end_io = op_is_write(op) ? raid5_end_write_request : raid5_end_read_request; @@ -1238,8 +1239,7 @@ again: set_bit(STRIPE_IO_STARTED, &sh->state); - bio_set_dev(rbi, rrdev->bdev); - bio_set_op_attrs(rbi, op, op_flags); + bio_init(rbi, rrdev->bdev, &dev->rvec, 1, op | op_flags); BUG_ON(!op_is_write(op)); rbi->bi_end_io = raid5_end_write_request; rbi->bi_private = sh; @@ -2294,7 +2294,6 @@ static struct stripe_head *alloc_stripe(struct kmem_cache *sc, gfp_t gfp, int disks, struct r5conf *conf) { struct stripe_head *sh; - int i; sh = kmem_cache_zalloc(sc, gfp); if (sh) { @@ -2307,12 +2306,6 @@ static struct stripe_head *alloc_stripe(struct kmem_cache *sc, gfp_t gfp, atomic_set(&sh->count, 1); sh->raid_conf = conf; sh->log_start = MaxSector; - for (i = 0; i < disks; i++) { - struct r5dev *dev = &sh->dev[i]; - - bio_init(&dev->req, NULL, &dev->vec, 1, 0); - bio_init(&dev->rreq, NULL, &dev->rvec, 1, 0); - } if (raid5_has_ppl(conf)) { sh->ppl_page = alloc_page(gfp); @@ -2677,7 +2670,6 @@ static void raid5_end_read_request(struct bio * bi) (unsigned long long)sh->sector, i, atomic_read(&sh->count), bi->bi_status); if (i == disks) { - bio_reset(bi, NULL, 0); BUG(); return; } @@ -2785,7 +2777,7 @@ static void raid5_end_read_request(struct bio * bi) } } rdev_dec_pending(rdev, conf->mddev); - bio_reset(bi, NULL, 0); + bio_uninit(bi); clear_bit(R5_LOCKED, &sh->dev[i].flags); set_bit(STRIPE_HANDLE, &sh->state); raid5_release_stripe(sh); @@ -2823,7 +2815,6 @@ static void raid5_end_write_request(struct bio *bi) (unsigned long long)sh->sector, i, atomic_read(&sh->count), bi->bi_status); if (i == disks) { - bio_reset(bi, NULL, 0); BUG(); return; } @@ -2860,7 +2851,7 @@ static void raid5_end_write_request(struct bio *bi) if (sh->batch_head && bi->bi_status && !replacement) set_bit(STRIPE_BATCH_ERR, &sh->batch_head->state); - bio_reset(bi, NULL, 0); + bio_uninit(bi); if (!test_and_clear_bit(R5_DOUBLE_LOCKED, &sh->dev[i].flags)) clear_bit(R5_LOCKED, &sh->dev[i].flags); set_bit(STRIPE_HANDLE, &sh->state); diff --git a/drivers/nvdimm/blk.c b/drivers/nvdimm/blk.c index c1db43524d75..0a3873833594 100644 --- a/drivers/nvdimm/blk.c +++ b/drivers/nvdimm/blk.c @@ -88,10 +88,9 @@ static int nd_blk_rw_integrity(struct nd_namespace_blk *nsblk, */ cur_len = min(len, bv.bv_len); - iobuf = kmap_atomic(bv.bv_page); - err = ndbr->do_io(ndbr, dev_offset, iobuf + bv.bv_offset, - cur_len, rw); - kunmap_atomic(iobuf); + iobuf = bvec_kmap_local(&bv); + err = ndbr->do_io(ndbr, dev_offset, iobuf, cur_len, rw); + kunmap_local(iobuf); if (err) return err; diff --git a/drivers/nvdimm/btt.c b/drivers/nvdimm/btt.c index cbd994f7f1fe..9613e54c7a67 100644 --- a/drivers/nvdimm/btt.c +++ b/drivers/nvdimm/btt.c @@ -1163,17 +1163,15 @@ static int btt_rw_integrity(struct btt *btt, struct bio_integrity_payload *bip, */ cur_len = min(len, bv.bv_len); - mem = kmap_atomic(bv.bv_page); + mem = bvec_kmap_local(&bv); if (rw) - ret = arena_write_bytes(arena, meta_nsoff, - mem + bv.bv_offset, cur_len, + ret = arena_write_bytes(arena, meta_nsoff, mem, cur_len, NVDIMM_IO_ATOMIC); else - ret = arena_read_bytes(arena, meta_nsoff, - mem + bv.bv_offset, cur_len, + ret = arena_read_bytes(arena, meta_nsoff, mem, cur_len, NVDIMM_IO_ATOMIC); - kunmap_atomic(mem); + kunmap_local(mem); if (ret) return ret; diff --git a/drivers/nvme/host/Kconfig b/drivers/nvme/host/Kconfig index dc0450ca23a3..d6d056963c06 100644 --- a/drivers/nvme/host/Kconfig +++ b/drivers/nvme/host/Kconfig @@ -24,6 +24,14 @@ config NVME_MULTIPATH /dev/nvmeXnY device will show up for each NVMe namespace, even if it is accessible through multiple controllers. +config NVME_VERBOSE_ERRORS + bool "NVMe verbose error reporting" + depends on NVME_CORE + help + This option enables verbose reporting for NVMe errors. The + error translation table will grow the kernel image size by + about 4 KB. + config NVME_HWMON bool "NVMe hardware monitoring" depends on (NVME_CORE=y && HWMON=y) || (NVME_CORE=m && HWMON) diff --git a/drivers/nvme/host/Makefile b/drivers/nvme/host/Makefile index dfaacd472e5d..476c5c988496 100644 --- a/drivers/nvme/host/Makefile +++ b/drivers/nvme/host/Makefile @@ -9,7 +9,7 @@ obj-$(CONFIG_NVME_RDMA) += nvme-rdma.o obj-$(CONFIG_NVME_FC) += nvme-fc.o obj-$(CONFIG_NVME_TCP) += nvme-tcp.o -nvme-core-y := core.o ioctl.o +nvme-core-y := core.o ioctl.o constants.o nvme-core-$(CONFIG_TRACING) += trace.o nvme-core-$(CONFIG_NVME_MULTIPATH) += multipath.o nvme-core-$(CONFIG_BLK_DEV_ZONED) += zns.o diff --git a/drivers/nvme/host/constants.c b/drivers/nvme/host/constants.c new file mode 100644 index 000000000000..7d49eb34b348 --- /dev/null +++ b/drivers/nvme/host/constants.c @@ -0,0 +1,185 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * NVM Express device driver verbose errors + * Copyright (c) 2022, Oracle and/or its affiliates + */ + +#include <linux/blkdev.h> +#include "nvme.h" + +#ifdef CONFIG_NVME_VERBOSE_ERRORS +static const char * const nvme_ops[] = { + [nvme_cmd_flush] = "Flush", + [nvme_cmd_write] = "Write", + [nvme_cmd_read] = "Read", + [nvme_cmd_write_uncor] = "Write Uncorrectable", + [nvme_cmd_compare] = "Compare", + [nvme_cmd_write_zeroes] = "Write Zeros", + [nvme_cmd_dsm] = "Dataset Management", + [nvme_cmd_verify] = "Verify", + [nvme_cmd_resv_register] = "Reservation Register", + [nvme_cmd_resv_report] = "Reservation Report", + [nvme_cmd_resv_acquire] = "Reservation Acquire", + [nvme_cmd_resv_release] = "Reservation Release", + [nvme_cmd_zone_mgmt_send] = "Zone Management Send", + [nvme_cmd_zone_mgmt_recv] = "Zone Management Receive", + [nvme_cmd_zone_append] = "Zone Management Append", +}; + +static const char * const nvme_admin_ops[] = { + [nvme_admin_delete_sq] = "Delete SQ", + [nvme_admin_create_sq] = "Create SQ", + [nvme_admin_get_log_page] = "Get Log Page", + [nvme_admin_delete_cq] = "Delete CQ", + [nvme_admin_create_cq] = "Create CQ", + [nvme_admin_identify] = "Identify", + [nvme_admin_abort_cmd] = "Abort Command", + [nvme_admin_set_features] = "Set Features", + [nvme_admin_get_features] = "Get Features", + [nvme_admin_async_event] = "Async Event", + [nvme_admin_ns_mgmt] = "Namespace Management", + [nvme_admin_activate_fw] = "Activate Firmware", + [nvme_admin_download_fw] = "Download Firmware", + [nvme_admin_dev_self_test] = "Device Self Test", + [nvme_admin_ns_attach] = "Namespace Attach", + [nvme_admin_keep_alive] = "Keep Alive", + [nvme_admin_directive_send] = "Directive Send", + [nvme_admin_directive_recv] = "Directive Receive", + [nvme_admin_virtual_mgmt] = "Virtual Management", + [nvme_admin_nvme_mi_send] = "NVMe Send MI", + [nvme_admin_nvme_mi_recv] = "NVMe Receive MI", + [nvme_admin_dbbuf] = "Doorbell Buffer Config", + [nvme_admin_format_nvm] = "Format NVM", + [nvme_admin_security_send] = "Security Send", + [nvme_admin_security_recv] = "Security Receive", + [nvme_admin_sanitize_nvm] = "Sanitize NVM", + [nvme_admin_get_lba_status] = "Get LBA Status", +}; + +static const char * const nvme_statuses[] = { + [NVME_SC_SUCCESS] = "Success", + [NVME_SC_INVALID_OPCODE] = "Invalid Command Opcode", + [NVME_SC_INVALID_FIELD] = "Invalid Field in Command", + [NVME_SC_CMDID_CONFLICT] = "Command ID Conflict", + [NVME_SC_DATA_XFER_ERROR] = "Data Transfer Error", + [NVME_SC_POWER_LOSS] = "Commands Aborted due to Power Loss Notification", + [NVME_SC_INTERNAL] = "Internal Error", + [NVME_SC_ABORT_REQ] = "Command Abort Requested", + [NVME_SC_ABORT_QUEUE] = "Command Aborted due to SQ Deletion", + [NVME_SC_FUSED_FAIL] = "Command Aborted due to Failed Fused Command", + [NVME_SC_FUSED_MISSING] = "Command Aborted due to Missing Fused Command", + [NVME_SC_INVALID_NS] = "Invalid Namespace or Format", + [NVME_SC_CMD_SEQ_ERROR] = "Command Sequence Error", + [NVME_SC_SGL_INVALID_LAST] = "Invalid SGL Segment Descriptor", + [NVME_SC_SGL_INVALID_COUNT] = "Invalid Number of SGL Descriptors", + [NVME_SC_SGL_INVALID_DATA] = "Data SGL Length Invalid", + [NVME_SC_SGL_INVALID_METADATA] = "Metadata SGL Length Invalid", + [NVME_SC_SGL_INVALID_TYPE] = "SGL Descriptor Type Invalid", + [NVME_SC_CMB_INVALID_USE] = "Invalid Use of Controller Memory Buffer", + [NVME_SC_PRP_INVALID_OFFSET] = "PRP Offset Invalid", + [NVME_SC_ATOMIC_WU_EXCEEDED] = "Atomic Write Unit Exceeded", + [NVME_SC_OP_DENIED] = "Operation Denied", + [NVME_SC_SGL_INVALID_OFFSET] = "SGL Offset Invalid", + [NVME_SC_RESERVED] = "Reserved", + [NVME_SC_HOST_ID_INCONSIST] = "Host Identifier Inconsistent Format", + [NVME_SC_KA_TIMEOUT_EXPIRED] = "Keep Alive Timeout Expired", + [NVME_SC_KA_TIMEOUT_INVALID] = "Keep Alive Timeout Invalid", + [NVME_SC_ABORTED_PREEMPT_ABORT] = "Command Aborted due to Preempt and Abort", + [NVME_SC_SANITIZE_FAILED] = "Sanitize Failed", + [NVME_SC_SANITIZE_IN_PROGRESS] = "Sanitize In Progress", + [NVME_SC_SGL_INVALID_GRANULARITY] = "SGL Data Block Granularity Invalid", + [NVME_SC_CMD_NOT_SUP_CMB_QUEUE] = "Command Not Supported for Queue in CMB", + [NVME_SC_NS_WRITE_PROTECTED] = "Namespace is Write Protected", + [NVME_SC_CMD_INTERRUPTED] = "Command Interrupted", + [NVME_SC_TRANSIENT_TR_ERR] = "Transient Transport Error", + [NVME_SC_INVALID_IO_CMD_SET] = "Invalid IO Command Set", + [NVME_SC_LBA_RANGE] = "LBA Out of Range", + [NVME_SC_CAP_EXCEEDED] = "Capacity Exceeded", + [NVME_SC_NS_NOT_READY] = "Namespace Not Ready", + [NVME_SC_RESERVATION_CONFLICT] = "Reservation Conflict", + [NVME_SC_FORMAT_IN_PROGRESS] = "Format In Progress", + [NVME_SC_CQ_INVALID] = "Completion Queue Invalid", + [NVME_SC_QID_INVALID] = "Invalid Queue Identifier", + [NVME_SC_QUEUE_SIZE] = "Invalid Queue Size", + [NVME_SC_ABORT_LIMIT] = "Abort Command Limit Exceeded", + [NVME_SC_ABORT_MISSING] = "Reserved", /* XXX */ + [NVME_SC_ASYNC_LIMIT] = "Asynchronous Event Request Limit Exceeded", + [NVME_SC_FIRMWARE_SLOT] = "Invalid Firmware Slot", + [NVME_SC_FIRMWARE_IMAGE] = "Invalid Firmware Image", + [NVME_SC_INVALID_VECTOR] = "Invalid Interrupt Vector", + [NVME_SC_INVALID_LOG_PAGE] = "Invalid Log Page", + [NVME_SC_INVALID_FORMAT] = "Invalid Format", + [NVME_SC_FW_NEEDS_CONV_RESET] = "Firmware Activation Requires Conventional Reset", + [NVME_SC_INVALID_QUEUE] = "Invalid Queue Deletion", + [NVME_SC_FEATURE_NOT_SAVEABLE] = "Feature Identifier Not Saveable", + [NVME_SC_FEATURE_NOT_CHANGEABLE] = "Feature Not Changeable", + [NVME_SC_FEATURE_NOT_PER_NS] = "Feature Not Namespace Specific", + [NVME_SC_FW_NEEDS_SUBSYS_RESET] = "Firmware Activation Requires NVM Subsystem Reset", + [NVME_SC_FW_NEEDS_RESET] = "Firmware Activation Requires Reset", + [NVME_SC_FW_NEEDS_MAX_TIME] = "Firmware Activation Requires Maximum Time Violation", + [NVME_SC_FW_ACTIVATE_PROHIBITED] = "Firmware Activation Prohibited", + [NVME_SC_OVERLAPPING_RANGE] = "Overlapping Range", + [NVME_SC_NS_INSUFFICIENT_CAP] = "Namespace Insufficient Capacity", + [NVME_SC_NS_ID_UNAVAILABLE] = "Namespace Identifier Unavailable", + [NVME_SC_NS_ALREADY_ATTACHED] = "Namespace Already Attached", + [NVME_SC_NS_IS_PRIVATE] = "Namespace Is Private", + [NVME_SC_NS_NOT_ATTACHED] = "Namespace Not Attached", + [NVME_SC_THIN_PROV_NOT_SUPP] = "Thin Provisioning Not Supported", + [NVME_SC_CTRL_LIST_INVALID] = "Controller List Invalid", + [NVME_SC_SELT_TEST_IN_PROGRESS] = "Device Self-test In Progress", + [NVME_SC_BP_WRITE_PROHIBITED] = "Boot Partition Write Prohibited", + [NVME_SC_CTRL_ID_INVALID] = "Invalid Controller Identifier", + [NVME_SC_SEC_CTRL_STATE_INVALID] = "Invalid Secondary Controller State", + [NVME_SC_CTRL_RES_NUM_INVALID] = "Invalid Number of Controller Resources", + [NVME_SC_RES_ID_INVALID] = "Invalid Resource Identifier", + [NVME_SC_PMR_SAN_PROHIBITED] = "Sanitize Prohibited", + [NVME_SC_ANA_GROUP_ID_INVALID] = "ANA Group Identifier Invalid", + [NVME_SC_ANA_ATTACH_FAILED] = "ANA Attach Failed", + [NVME_SC_BAD_ATTRIBUTES] = "Conflicting Attributes", + [NVME_SC_INVALID_PI] = "Invalid Protection Information", + [NVME_SC_READ_ONLY] = "Attempted Write to Read Only Range", + [NVME_SC_ONCS_NOT_SUPPORTED] = "ONCS Not Supported", + [NVME_SC_ZONE_BOUNDARY_ERROR] = "Zoned Boundary Error", + [NVME_SC_ZONE_FULL] = "Zone Is Full", + [NVME_SC_ZONE_READ_ONLY] = "Zone Is Read Only", + [NVME_SC_ZONE_OFFLINE] = "Zone Is Offline", + [NVME_SC_ZONE_INVALID_WRITE] = "Zone Invalid Write", + [NVME_SC_ZONE_TOO_MANY_ACTIVE] = "Too Many Active Zones", + [NVME_SC_ZONE_TOO_MANY_OPEN] = "Too Many Open Zones", + [NVME_SC_ZONE_INVALID_TRANSITION] = "Invalid Zone State Transition", + [NVME_SC_WRITE_FAULT] = "Write Fault", + [NVME_SC_READ_ERROR] = "Unrecovered Read Error", + [NVME_SC_GUARD_CHECK] = "End-to-end Guard Check Error", + [NVME_SC_APPTAG_CHECK] = "End-to-end Application Tag Check Error", + [NVME_SC_REFTAG_CHECK] = "End-to-end Reference Tag Check Error", + [NVME_SC_COMPARE_FAILED] = "Compare Failure", + [NVME_SC_ACCESS_DENIED] = "Access Denied", + [NVME_SC_UNWRITTEN_BLOCK] = "Deallocated or Unwritten Logical Block", + [NVME_SC_ANA_PERSISTENT_LOSS] = "Asymmetric Access Persistent Loss", + [NVME_SC_ANA_INACCESSIBLE] = "Asymmetric Access Inaccessible", + [NVME_SC_ANA_TRANSITION] = "Asymmetric Access Transition", + [NVME_SC_HOST_PATH_ERROR] = "Host Pathing Error", +}; + +const unsigned char *nvme_get_error_status_str(u16 status) +{ + status &= 0x7ff; + if (status < ARRAY_SIZE(nvme_statuses) && nvme_statuses[status]) + return nvme_statuses[status & 0x7ff]; + return "Unknown"; +} + +const unsigned char *nvme_get_opcode_str(u8 opcode) +{ + if (opcode < ARRAY_SIZE(nvme_ops) && nvme_ops[opcode]) + return nvme_ops[opcode]; + return "Unknown"; +} + +const unsigned char *nvme_get_admin_opcode_str(u8 opcode) +{ + if (opcode < ARRAY_SIZE(nvme_admin_ops) && nvme_admin_ops[opcode]) + return nvme_admin_ops[opcode]; + return "Unknown"; +} +#endif /* CONFIG_NVME_VERBOSE_ERRORS */ diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index fd4720d37cc0..cd6eac8e3dd6 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -299,6 +299,37 @@ static void nvme_retry_req(struct request *req) blk_mq_delay_kick_requeue_list(req->q, delay); } +static void nvme_log_error(struct request *req) +{ + struct nvme_ns *ns = req->q->queuedata; + struct nvme_request *nr = nvme_req(req); + + if (ns) { + pr_err_ratelimited("%s: %s(0x%x) @ LBA %llu, %llu blocks, %s (sct 0x%x / sc 0x%x) %s%s\n", + ns->disk ? ns->disk->disk_name : "?", + nvme_get_opcode_str(nr->cmd->common.opcode), + nr->cmd->common.opcode, + (unsigned long long)nvme_sect_to_lba(ns, blk_rq_pos(req)), + (unsigned long long)blk_rq_bytes(req) >> ns->lba_shift, + nvme_get_error_status_str(nr->status), + nr->status >> 8 & 7, /* Status Code Type */ + nr->status & 0xff, /* Status Code */ + nr->status & NVME_SC_MORE ? "MORE " : "", + nr->status & NVME_SC_DNR ? "DNR " : ""); + return; + } + + pr_err_ratelimited("%s: %s(0x%x), %s (sct 0x%x / sc 0x%x) %s%s\n", + dev_name(nr->ctrl->device), + nvme_get_admin_opcode_str(nr->cmd->common.opcode), + nr->cmd->common.opcode, + nvme_get_error_status_str(nr->status), + nr->status >> 8 & 7, /* Status Code Type */ + nr->status & 0xff, /* Status Code */ + nr->status & NVME_SC_MORE ? "MORE " : "", + nr->status & NVME_SC_DNR ? "DNR " : ""); +} + enum nvme_disposition { COMPLETE, RETRY, @@ -339,6 +370,8 @@ static inline void nvme_end_req(struct request *req) { blk_status_t status = nvme_error_status(nvme_req(req)->status); + if (unlikely(nvme_req(req)->status != NVME_SC_SUCCESS)) + nvme_log_error(req); nvme_end_req_zoned(req); nvme_trace_bio_complete(req); blk_mq_end_request(req, status); @@ -562,7 +595,7 @@ static void nvme_free_ns_head(struct kref *ref) container_of(ref, struct nvme_ns_head, ref); nvme_mpath_remove_disk(head); - ida_simple_remove(&head->subsys->ns_ida, head->instance); + ida_free(&head->subsys->ns_ida, head->instance); cleanup_srcu_struct(&head->srcu); nvme_put_subsystem(head->subsys); kfree(head); @@ -607,13 +640,8 @@ static inline void nvme_clear_nvme_request(struct request *req) req->rq_flags |= RQF_DONTPREP; } -static inline unsigned int nvme_req_op(struct nvme_command *cmd) -{ - return nvme_is_write(cmd) ? REQ_OP_DRV_OUT : REQ_OP_DRV_IN; -} - -static inline void nvme_init_request(struct request *req, - struct nvme_command *cmd) +/* initialize a passthrough request */ +void nvme_init_request(struct request *req, struct nvme_command *cmd) { if (req->q->queuedata) req->timeout = NVME_IO_TIMEOUT; @@ -629,30 +657,7 @@ static inline void nvme_init_request(struct request *req, nvme_clear_nvme_request(req); memcpy(nvme_req(req)->cmd, cmd, sizeof(*cmd)); } - -struct request *nvme_alloc_request(struct request_queue *q, - struct nvme_command *cmd, blk_mq_req_flags_t flags) -{ - struct request *req; - - req = blk_mq_alloc_request(q, nvme_req_op(cmd), flags); - if (!IS_ERR(req)) - nvme_init_request(req, cmd); - return req; -} -EXPORT_SYMBOL_GPL(nvme_alloc_request); - -static struct request *nvme_alloc_request_qid(struct request_queue *q, - struct nvme_command *cmd, blk_mq_req_flags_t flags, int qid) -{ - struct request *req; - - req = blk_mq_alloc_request_hctx(q, nvme_req_op(cmd), flags, - qid ? qid - 1 : 0); - if (!IS_ERR(req)) - nvme_init_request(req, cmd); - return req; -} +EXPORT_SYMBOL_GPL(nvme_init_request); /* * For something we're not in a state to send to the device the default action @@ -758,6 +763,7 @@ static int nvme_get_stream_params(struct nvme_ctrl *ctrl, static int nvme_configure_directives(struct nvme_ctrl *ctrl) { struct streams_directive_params s; + u16 nssa; int ret; if (!(ctrl->oacs & NVME_CTRL_OACS_DIRECTIVES)) @@ -773,14 +779,16 @@ static int nvme_configure_directives(struct nvme_ctrl *ctrl) if (ret) goto out_disable_stream; - ctrl->nssa = le16_to_cpu(s.nssa); - if (ctrl->nssa < BLK_MAX_WRITE_HINTS - 1) { + nssa = le16_to_cpu(s.nssa); + if (nssa < BLK_MAX_WRITE_HINTS - 1) { dev_info(ctrl->device, "too few streams (%u) available\n", - ctrl->nssa); + nssa); + /* this condition is not an error: streams are optional */ + ret = 0; goto out_disable_stream; } - ctrl->nr_streams = min_t(u16, ctrl->nssa, BLK_MAX_WRITE_HINTS - 1); + ctrl->nr_streams = min_t(u16, nssa, BLK_MAX_WRITE_HINTS - 1); dev_info(ctrl->device, "Using %u streams\n", ctrl->nr_streams); return 0; @@ -1050,8 +1058,7 @@ EXPORT_SYMBOL_GPL(nvme_setup_cmd); * >0: nvme controller's cqe status response * <0: kernel error in lieu of controller response */ -static int nvme_execute_rq(struct gendisk *disk, struct request *rq, - bool at_head) +static int nvme_execute_rq(struct request *rq, bool at_head) { blk_status_t status; @@ -1076,11 +1083,14 @@ int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd, int ret; if (qid == NVME_QID_ANY) - req = nvme_alloc_request(q, cmd, flags); + req = blk_mq_alloc_request(q, nvme_req_op(cmd), flags); else - req = nvme_alloc_request_qid(q, cmd, flags, qid); + req = blk_mq_alloc_request_hctx(q, nvme_req_op(cmd), flags, + qid ? qid - 1 : 0); + if (IS_ERR(req)) return PTR_ERR(req); + nvme_init_request(req, cmd); if (timeout) req->timeout = timeout; @@ -1091,7 +1101,7 @@ int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd, goto out; } - ret = nvme_execute_rq(NULL, req, at_head); + ret = nvme_execute_rq(req, at_head); if (result && ret >= 0) *result = nvme_req(req)->result; out: @@ -1207,12 +1217,11 @@ int nvme_execute_passthru_rq(struct request *rq) struct nvme_command *cmd = nvme_req(rq)->cmd; struct nvme_ctrl *ctrl = nvme_req(rq)->ctrl; struct nvme_ns *ns = rq->q->queuedata; - struct gendisk *disk = ns ? ns->disk : NULL; u32 effects; int ret; effects = nvme_passthru_start(ctrl, ns, cmd->common.opcode); - ret = nvme_execute_rq(disk, rq, false); + ret = nvme_execute_rq(rq, false); if (effects) /* nothing to be done for zero cmd effects */ nvme_passthru_end(ctrl, effects, cmd, ret); @@ -1271,14 +1280,15 @@ static void nvme_keep_alive_work(struct work_struct *work) return; } - rq = nvme_alloc_request(ctrl->admin_q, &ctrl->ka_cmd, - BLK_MQ_REQ_RESERVED | BLK_MQ_REQ_NOWAIT); + rq = blk_mq_alloc_request(ctrl->admin_q, nvme_req_op(&ctrl->ka_cmd), + BLK_MQ_REQ_RESERVED | BLK_MQ_REQ_NOWAIT); if (IS_ERR(rq)) { /* allocation failure, reset the controller */ dev_err(ctrl->device, "keep-alive failed: %ld\n", PTR_ERR(rq)); nvme_reset_ctrl(ctrl); return; } + nvme_init_request(rq, &ctrl->ka_cmd); rq->timeout = ctrl->kato * HZ; rq->end_io_data = ctrl; @@ -1683,13 +1693,6 @@ static void nvme_config_discard(struct gendisk *disk, struct nvme_ns *ns) blk_queue_max_write_zeroes_sectors(queue, UINT_MAX); } -static bool nvme_ns_ids_valid(struct nvme_ns_ids *ids) -{ - return !uuid_is_null(&ids->uuid) || - memchr_inv(ids->nguid, 0, sizeof(ids->nguid)) || - memchr_inv(ids->eui64, 0, sizeof(ids->eui64)); -} - static bool nvme_ns_ids_equal(struct nvme_ns_ids *a, struct nvme_ns_ids *b) { return uuid_equal(&a->uuid, &b->uuid) && @@ -1977,7 +1980,7 @@ static char nvme_pr_type(enum pr_type type) default: return 0; } -}; +} static int nvme_send_ns_head_pr_command(struct block_device *bdev, struct nvme_command *c, u8 data[16]) @@ -2565,7 +2568,7 @@ static void nvme_release_subsystem(struct device *dev) container_of(dev, struct nvme_subsystem, dev); if (subsys->instance >= 0) - ida_simple_remove(&nvme_instance_ida, subsys->instance); + ida_free(&nvme_instance_ida, subsys->instance); kfree(subsys); } @@ -2990,6 +2993,9 @@ static int nvme_init_identify(struct nvme_ctrl *ctrl) ctrl->max_namespaces = le32_to_cpu(id->mnan); ctrl->ctratt = le32_to_cpu(id->ctratt); + ctrl->cntrltype = id->cntrltype; + ctrl->dctype = id->dctype; + if (id->rtd3e) { /* us -> s */ u32 transition_time = le32_to_cpu(id->rtd3e) / USEC_PER_SEC; @@ -3523,6 +3529,40 @@ static ssize_t nvme_ctrl_fast_io_fail_tmo_store(struct device *dev, static DEVICE_ATTR(fast_io_fail_tmo, S_IRUGO | S_IWUSR, nvme_ctrl_fast_io_fail_tmo_show, nvme_ctrl_fast_io_fail_tmo_store); +static ssize_t cntrltype_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + static const char * const type[] = { + [NVME_CTRL_IO] = "io\n", + [NVME_CTRL_DISC] = "discovery\n", + [NVME_CTRL_ADMIN] = "admin\n", + }; + struct nvme_ctrl *ctrl = dev_get_drvdata(dev); + + if (ctrl->cntrltype > NVME_CTRL_ADMIN || !type[ctrl->cntrltype]) + return sysfs_emit(buf, "reserved\n"); + + return sysfs_emit(buf, type[ctrl->cntrltype]); +} +static DEVICE_ATTR_RO(cntrltype); + +static ssize_t dctype_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + static const char * const type[] = { + [NVME_DCTYPE_NOT_REPORTED] = "none\n", + [NVME_DCTYPE_DDC] = "ddc\n", + [NVME_DCTYPE_CDC] = "cdc\n", + }; + struct nvme_ctrl *ctrl = dev_get_drvdata(dev); + + if (ctrl->dctype > NVME_DCTYPE_CDC || !type[ctrl->dctype]) + return sysfs_emit(buf, "reserved\n"); + + return sysfs_emit(buf, type[ctrl->dctype]); +} +static DEVICE_ATTR_RO(dctype); + static struct attribute *nvme_dev_attrs[] = { &dev_attr_reset_controller.attr, &dev_attr_rescan_controller.attr, @@ -3544,6 +3584,8 @@ static struct attribute *nvme_dev_attrs[] = { &dev_attr_reconnect_delay.attr, &dev_attr_fast_io_fail_tmo.attr, &dev_attr_kato.attr, + &dev_attr_cntrltype.attr, + &dev_attr_dctype.attr, NULL }; @@ -3598,16 +3640,24 @@ static struct nvme_ns_head *nvme_find_ns_head(struct nvme_subsystem *subsys, return NULL; } -static int __nvme_check_ids(struct nvme_subsystem *subsys, - struct nvme_ns_head *new) +static int nvme_subsys_check_duplicate_ids(struct nvme_subsystem *subsys, + struct nvme_ns_ids *ids) { + bool has_uuid = !uuid_is_null(&ids->uuid); + bool has_nguid = memchr_inv(ids->nguid, 0, sizeof(ids->nguid)); + bool has_eui64 = memchr_inv(ids->eui64, 0, sizeof(ids->eui64)); struct nvme_ns_head *h; lockdep_assert_held(&subsys->lock); list_for_each_entry(h, &subsys->nsheads, entry) { - if (nvme_ns_ids_valid(&new->ids) && - nvme_ns_ids_equal(&new->ids, &h->ids)) + if (has_uuid && uuid_equal(&ids->uuid, &h->ids.uuid)) + return -EINVAL; + if (has_nguid && + memcmp(&ids->nguid, &h->ids.nguid, sizeof(ids->nguid)) == 0) + return -EINVAL; + if (has_eui64 && + memcmp(&ids->eui64, &h->ids.eui64, sizeof(ids->eui64)) == 0) return -EINVAL; } @@ -3616,7 +3666,7 @@ static int __nvme_check_ids(struct nvme_subsystem *subsys, static void nvme_cdev_rel(struct device *dev) { - ida_simple_remove(&nvme_ns_chr_minor_ida, MINOR(dev->devt)); + ida_free(&nvme_ns_chr_minor_ida, MINOR(dev->devt)); } void nvme_cdev_del(struct cdev *cdev, struct device *cdev_device) @@ -3630,7 +3680,7 @@ int nvme_cdev_add(struct cdev *cdev, struct device *cdev_device, { int minor, ret; - minor = ida_simple_get(&nvme_ns_chr_minor_ida, 0, 0, GFP_KERNEL); + minor = ida_alloc(&nvme_ns_chr_minor_ida, GFP_KERNEL); if (minor < 0) return minor; cdev_device->devt = MKDEV(MAJOR(nvme_ns_chr_devt), minor); @@ -3693,7 +3743,7 @@ static struct nvme_ns_head *nvme_alloc_ns_head(struct nvme_ctrl *ctrl, head = kzalloc(size, GFP_KERNEL); if (!head) goto out; - ret = ida_simple_get(&ctrl->subsys->ns_ida, 1, 0, GFP_KERNEL); + ret = ida_alloc_min(&ctrl->subsys->ns_ida, 1, GFP_KERNEL); if (ret < 0) goto out_free_head; head->instance = ret; @@ -3706,13 +3756,6 @@ static struct nvme_ns_head *nvme_alloc_ns_head(struct nvme_ctrl *ctrl, head->ids = *ids; kref_init(&head->ref); - ret = __nvme_check_ids(ctrl->subsys, head); - if (ret) { - dev_err(ctrl->device, - "duplicate IDs for nsid %d\n", nsid); - goto out_cleanup_srcu; - } - if (head->ids.csi) { ret = nvme_get_effects_log(ctrl, head->ids.csi, &head->effects); if (ret) @@ -3732,7 +3775,7 @@ static struct nvme_ns_head *nvme_alloc_ns_head(struct nvme_ctrl *ctrl, out_cleanup_srcu: cleanup_srcu_struct(&head->srcu); out_ida_remove: - ida_simple_remove(&ctrl->subsys->ns_ida, head->instance); + ida_free(&ctrl->subsys->ns_ida, head->instance); out_free_head: kfree(head); out: @@ -3741,16 +3784,56 @@ out: return ERR_PTR(ret); } +static int nvme_global_check_duplicate_ids(struct nvme_subsystem *this, + struct nvme_ns_ids *ids) +{ + struct nvme_subsystem *s; + int ret = 0; + + /* + * Note that this check is racy as we try to avoid holding the global + * lock over the whole ns_head creation. But it is only intended as + * a sanity check anyway. + */ + mutex_lock(&nvme_subsystems_lock); + list_for_each_entry(s, &nvme_subsystems, entry) { + if (s == this) + continue; + mutex_lock(&s->lock); + ret = nvme_subsys_check_duplicate_ids(s, ids); + mutex_unlock(&s->lock); + if (ret) + break; + } + mutex_unlock(&nvme_subsystems_lock); + + return ret; +} + static int nvme_init_ns_head(struct nvme_ns *ns, unsigned nsid, struct nvme_ns_ids *ids, bool is_shared) { struct nvme_ctrl *ctrl = ns->ctrl; struct nvme_ns_head *head = NULL; - int ret = 0; + int ret; + + ret = nvme_global_check_duplicate_ids(ctrl->subsys, ids); + if (ret) { + dev_err(ctrl->device, + "globally duplicate IDs for nsid %d\n", nsid); + return ret; + } mutex_lock(&ctrl->subsys->lock); head = nvme_find_ns_head(ctrl->subsys, nsid); if (!head) { + ret = nvme_subsys_check_duplicate_ids(ctrl->subsys, ids); + if (ret) { + dev_err(ctrl->device, + "duplicate IDs in subsystem for nsid %d\n", + nsid); + goto out_unlock; + } head = nvme_alloc_ns_head(ctrl, nsid, ids); if (IS_ERR(head)) { ret = PTR_ERR(head); @@ -3770,6 +3853,14 @@ static int nvme_init_ns_head(struct nvme_ns *ns, unsigned nsid, nsid); goto out_put_ns_head; } + + if (!multipath && !list_empty(&head->list)) { + dev_warn(ctrl->device, + "Found shared namespace %d, but multipathing not supported.\n", + nsid); + dev_warn_once(ctrl->device, + "Support for shared namespaces without CONFIG_NVME_MULTIPATH is deprecated and will be removed in Linux 6.0\n."); + } } list_add_tail_rcu(&ns->siblings, &head->list); @@ -3858,13 +3949,27 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid, goto out_cleanup_disk; /* - * Without the multipath code enabled, multiple controller per - * subsystems are visible as devices and thus we cannot use the - * subsystem instance. + * If multipathing is enabled, the device name for all disks and not + * just those that represent shared namespaces needs to be based on the + * subsystem instance. Using the controller instance for private + * namespaces could lead to naming collisions between shared and private + * namespaces if they don't use a common numbering scheme. + * + * If multipathing is not enabled, disk names must use the controller + * instance as shared namespaces will show up as multiple block + * devices. */ - if (!nvme_mpath_set_disk_name(ns, disk->disk_name, &disk->flags)) + if (ns->head->disk) { + sprintf(disk->disk_name, "nvme%dc%dn%d", ctrl->subsys->instance, + ctrl->instance, ns->head->instance); + disk->flags |= GENHD_FL_HIDDEN; + } else if (multipath) { + sprintf(disk->disk_name, "nvme%dn%d", ctrl->subsys->instance, + ns->head->instance); + } else { sprintf(disk->disk_name, "nvme%dn%d", ctrl->instance, ns->head->instance); + } if (nvme_update_ns_info(ns, id)) goto out_unlink_ns; @@ -4229,6 +4334,13 @@ static int nvme_class_uevent(struct device *dev, struct kobj_uevent_env *env) return ret; } +static void nvme_change_uevent(struct nvme_ctrl *ctrl, char *envdata) +{ + char *envp[2] = { envdata, NULL }; + + kobject_uevent_env(&ctrl->device->kobj, KOBJ_CHANGE, envp); +} + static void nvme_aen_uevent(struct nvme_ctrl *ctrl) { char *envp[2] = { NULL, NULL }; @@ -4403,6 +4515,8 @@ void nvme_start_ctrl(struct nvme_ctrl *ctrl) nvme_queue_scan(ctrl); nvme_start_queues(ctrl); } + + nvme_change_uevent(ctrl, "NVME_EVENT=connected"); } EXPORT_SYMBOL_GPL(nvme_start_ctrl); @@ -4436,7 +4550,7 @@ static void nvme_free_ctrl(struct device *dev) struct nvme_subsystem *subsys = ctrl->subsys; if (!subsys || ctrl->instance != subsys->instance) - ida_simple_remove(&nvme_instance_ida, ctrl->instance); + ida_free(&nvme_instance_ida, ctrl->instance); nvme_free_cels(ctrl); nvme_mpath_uninit(ctrl); @@ -4495,7 +4609,7 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev, goto out; } - ret = ida_simple_get(&nvme_instance_ida, 0, 0, GFP_KERNEL); + ret = ida_alloc(&nvme_instance_ida, GFP_KERNEL); if (ret < 0) goto out; ctrl->instance = ret; @@ -4536,7 +4650,7 @@ out_free_name: nvme_put_ctrl(ctrl); kfree_const(ctrl->device->kobj.name); out_release_instance: - ida_simple_remove(&nvme_instance_ida, ctrl->instance); + ida_free(&nvme_instance_ida, ctrl->instance); out: if (ctrl->discard_page) __free_page(ctrl->discard_page); diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c index f79a66d4e22c..ee79a6d639b4 100644 --- a/drivers/nvme/host/fabrics.c +++ b/drivers/nvme/host/fabrics.c @@ -144,11 +144,10 @@ EXPORT_SYMBOL_GPL(nvmf_get_address); */ int nvmf_reg_read32(struct nvme_ctrl *ctrl, u32 off, u32 *val) { - struct nvme_command cmd; + struct nvme_command cmd = { }; union nvme_result res; int ret; - memset(&cmd, 0, sizeof(cmd)); cmd.prop_get.opcode = nvme_fabrics_command; cmd.prop_get.fctype = nvme_fabrics_type_property_get; cmd.prop_get.offset = cpu_to_le32(off); @@ -272,7 +271,7 @@ static void nvmf_log_connect_error(struct nvme_ctrl *ctrl, int err_sctype = errval & ~NVME_SC_DNR; switch (err_sctype) { - case (NVME_SC_CONNECT_INVALID_PARAM): + case NVME_SC_CONNECT_INVALID_PARAM: if (offset >> 16) { char *inv_data = "Connect Invalid Data Parameter"; @@ -873,7 +872,7 @@ static int nvmf_check_required_opts(struct nvmf_ctrl_options *opts, unsigned int required_opts) { if ((opts->mask & required_opts) != required_opts) { - int i; + unsigned int i; for (i = 0; i < ARRAY_SIZE(opt_tokens); i++) { if ((opt_tokens[i].token & required_opts) && @@ -923,7 +922,7 @@ static int nvmf_check_allowed_opts(struct nvmf_ctrl_options *opts, unsigned int allowed_opts) { if (opts->mask & ~allowed_opts) { - int i; + unsigned int i; for (i = 0; i < ARRAY_SIZE(opt_tokens); i++) { if ((opt_tokens[i].token & opts->mask) && diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index 71b3108c22f0..080f85f4105f 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -259,7 +259,7 @@ nvme_fc_free_lport(struct kref *ref) complete(&nvme_fc_unload_proceed); spin_unlock_irqrestore(&nvme_fc_lock, flags); - ida_simple_remove(&nvme_fc_local_port_cnt, lport->localport.port_num); + ida_free(&nvme_fc_local_port_cnt, lport->localport.port_num); ida_destroy(&lport->endp_cnt); put_device(lport->dev); @@ -399,7 +399,7 @@ nvme_fc_register_localport(struct nvme_fc_port_info *pinfo, goto out_reghost_failed; } - idx = ida_simple_get(&nvme_fc_local_port_cnt, 0, 0, GFP_KERNEL); + idx = ida_alloc(&nvme_fc_local_port_cnt, GFP_KERNEL); if (idx < 0) { ret = -ENOSPC; goto out_fail_kfree; @@ -439,7 +439,7 @@ nvme_fc_register_localport(struct nvme_fc_port_info *pinfo, return 0; out_ida_put: - ida_simple_remove(&nvme_fc_local_port_cnt, idx); + ida_free(&nvme_fc_local_port_cnt, idx); out_fail_kfree: kfree(newrec); out_reghost_failed: @@ -535,7 +535,7 @@ nvme_fc_free_rport(struct kref *ref) spin_unlock_irqrestore(&nvme_fc_lock, flags); WARN_ON(!list_empty(&rport->disc_list)); - ida_simple_remove(&lport->endp_cnt, rport->remoteport.port_num); + ida_free(&lport->endp_cnt, rport->remoteport.port_num); kfree(rport); @@ -713,7 +713,7 @@ nvme_fc_register_remoteport(struct nvme_fc_local_port *localport, goto out_lport_put; } - idx = ida_simple_get(&lport->endp_cnt, 0, 0, GFP_KERNEL); + idx = ida_alloc(&lport->endp_cnt, GFP_KERNEL); if (idx < 0) { ret = -ENOSPC; goto out_kfree_rport; @@ -2393,7 +2393,7 @@ nvme_fc_ctrl_free(struct kref *ref) put_device(ctrl->dev); nvme_fc_rport_put(ctrl->rport); - ida_simple_remove(&nvme_fc_ctrl_cnt, ctrl->cnum); + ida_free(&nvme_fc_ctrl_cnt, ctrl->cnum); if (ctrl->ctrl.opts) nvmf_free_options(ctrl->ctrl.opts); kfree(ctrl); @@ -2916,11 +2916,9 @@ nvme_fc_create_io_queues(struct nvme_fc_ctrl *ctrl) ctrl->ctrl.tagset = &ctrl->tag_set; - ctrl->ctrl.connect_q = blk_mq_init_queue(&ctrl->tag_set); - if (IS_ERR(ctrl->ctrl.connect_q)) { - ret = PTR_ERR(ctrl->ctrl.connect_q); + ret = nvme_ctrl_init_connect_q(&(ctrl->ctrl)); + if (ret) goto out_free_tag_set; - } ret = nvme_fc_create_hw_io_queues(ctrl, ctrl->ctrl.sqsize + 1); if (ret) @@ -3472,7 +3470,7 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts, goto out_fail; } - idx = ida_simple_get(&nvme_fc_ctrl_cnt, 0, 0, GFP_KERNEL); + idx = ida_alloc(&nvme_fc_ctrl_cnt, GFP_KERNEL); if (idx < 0) { ret = -ENOSPC; goto out_free_ctrl; @@ -3635,7 +3633,7 @@ out_free_queues: kfree(ctrl->queues); out_free_ida: put_device(ctrl->dev); - ida_simple_remove(&nvme_fc_ctrl_cnt, ctrl->cnum); + ida_free(&nvme_fc_ctrl_cnt, ctrl->cnum); out_free_ctrl: kfree(ctrl); out_fail: diff --git a/drivers/nvme/host/ioctl.c b/drivers/nvme/host/ioctl.c index 22314962842d..554566371ffa 100644 --- a/drivers/nvme/host/ioctl.c +++ b/drivers/nvme/host/ioctl.c @@ -56,7 +56,7 @@ out: static int nvme_submit_user_cmd(struct request_queue *q, struct nvme_command *cmd, void __user *ubuffer, unsigned bufflen, void __user *meta_buffer, unsigned meta_len, - u32 meta_seed, u64 *result, unsigned timeout) + u32 meta_seed, u64 *result, unsigned timeout, bool vec) { bool write = nvme_is_write(cmd); struct nvme_ns *ns = q->queuedata; @@ -66,17 +66,32 @@ static int nvme_submit_user_cmd(struct request_queue *q, void *meta = NULL; int ret; - req = nvme_alloc_request(q, cmd, 0); + req = blk_mq_alloc_request(q, nvme_req_op(cmd), 0); if (IS_ERR(req)) return PTR_ERR(req); + nvme_init_request(req, cmd); if (timeout) req->timeout = timeout; nvme_req(req)->flags |= NVME_REQ_USERCMD; if (ubuffer && bufflen) { - ret = blk_rq_map_user(q, req, NULL, ubuffer, bufflen, + if (!vec) + ret = blk_rq_map_user(q, req, NULL, ubuffer, bufflen, GFP_KERNEL); + else { + struct iovec fast_iov[UIO_FASTIOV]; + struct iovec *iov = fast_iov; + struct iov_iter iter; + + ret = import_iovec(rq_data_dir(req), ubuffer, bufflen, + UIO_FASTIOV, &iov, &iter); + if (ret < 0) + goto out; + ret = blk_rq_map_user_iov(q, req, NULL, &iter, + GFP_KERNEL); + kfree(iov); + } if (ret) goto out; bio = req->bio; @@ -170,7 +185,8 @@ static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio) return nvme_submit_user_cmd(ns->queue, &c, nvme_to_user_ptr(io.addr), length, - metadata, meta_len, lower_32_bits(io.slba), NULL, 0); + metadata, meta_len, lower_32_bits(io.slba), NULL, 0, + false); } static bool nvme_validate_passthru_nsid(struct nvme_ctrl *ctrl, @@ -224,7 +240,7 @@ static int nvme_user_cmd(struct nvme_ctrl *ctrl, struct nvme_ns *ns, status = nvme_submit_user_cmd(ns ? ns->queue : ctrl->admin_q, &c, nvme_to_user_ptr(cmd.addr), cmd.data_len, nvme_to_user_ptr(cmd.metadata), cmd.metadata_len, - 0, &result, timeout); + 0, &result, timeout, false); if (status >= 0) { if (put_user(result, &ucmd->result)) @@ -235,7 +251,7 @@ static int nvme_user_cmd(struct nvme_ctrl *ctrl, struct nvme_ns *ns, } static int nvme_user_cmd64(struct nvme_ctrl *ctrl, struct nvme_ns *ns, - struct nvme_passthru_cmd64 __user *ucmd) + struct nvme_passthru_cmd64 __user *ucmd, bool vec) { struct nvme_passthru_cmd64 cmd; struct nvme_command c; @@ -270,7 +286,7 @@ static int nvme_user_cmd64(struct nvme_ctrl *ctrl, struct nvme_ns *ns, status = nvme_submit_user_cmd(ns ? ns->queue : ctrl->admin_q, &c, nvme_to_user_ptr(cmd.addr), cmd.data_len, nvme_to_user_ptr(cmd.metadata), cmd.metadata_len, - 0, &cmd.result, timeout); + 0, &cmd.result, timeout, vec); if (status >= 0) { if (put_user(cmd.result, &ucmd->result)) @@ -296,7 +312,7 @@ static int nvme_ctrl_ioctl(struct nvme_ctrl *ctrl, unsigned int cmd, case NVME_IOCTL_ADMIN_CMD: return nvme_user_cmd(ctrl, NULL, argp); case NVME_IOCTL_ADMIN64_CMD: - return nvme_user_cmd64(ctrl, NULL, argp); + return nvme_user_cmd64(ctrl, NULL, argp, false); default: return sed_ioctl(ctrl->opal_dev, cmd, argp); } @@ -340,7 +356,9 @@ static int nvme_ns_ioctl(struct nvme_ns *ns, unsigned int cmd, case NVME_IOCTL_SUBMIT_IO: return nvme_submit_io(ns, argp); case NVME_IOCTL_IO64_CMD: - return nvme_user_cmd64(ns->ctrl, ns, argp); + return nvme_user_cmd64(ns->ctrl, ns, argp, false); + case NVME_IOCTL_IO64_CMD_VEC: + return nvme_user_cmd64(ns->ctrl, ns, argp, true); default: return -ENOTTY; } @@ -480,7 +498,7 @@ long nvme_dev_ioctl(struct file *file, unsigned int cmd, case NVME_IOCTL_ADMIN_CMD: return nvme_user_cmd(ctrl, NULL, argp); case NVME_IOCTL_ADMIN64_CMD: - return nvme_user_cmd64(ctrl, NULL, argp); + return nvme_user_cmd64(ctrl, NULL, argp, false); case NVME_IOCTL_IO_CMD: return nvme_dev_user_cmd(ctrl, argp); case NVME_IOCTL_RESET: diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c index ff775235534c..1b31f19e1053 100644 --- a/drivers/nvme/host/multipath.c +++ b/drivers/nvme/host/multipath.c @@ -5,10 +5,11 @@ #include <linux/backing-dev.h> #include <linux/moduleparam.h> +#include <linux/vmalloc.h> #include <trace/events/block.h> #include "nvme.h" -static bool multipath = true; +bool multipath = true; module_param(multipath, bool, 0444); MODULE_PARM_DESC(multipath, "turn on native support for multiple controllers per subsystem"); @@ -79,28 +80,6 @@ void nvme_mpath_start_freeze(struct nvme_subsystem *subsys) blk_freeze_queue_start(h->disk->queue); } -/* - * If multipathing is enabled we need to always use the subsystem instance - * number for numbering our devices to avoid conflicts between subsystems that - * have multiple controllers and thus use the multipath-aware subsystem node - * and those that have a single controller and use the controller node - * directly. - */ -bool nvme_mpath_set_disk_name(struct nvme_ns *ns, char *disk_name, int *flags) -{ - if (!multipath) - return false; - if (!ns->head->disk) { - sprintf(disk_name, "nvme%dn%d", ns->ctrl->subsys->instance, - ns->head->instance); - return true; - } - sprintf(disk_name, "nvme%dc%dn%d", ns->ctrl->subsys->instance, - ns->ctrl->instance, ns->head->instance); - *flags = GENHD_FL_HIDDEN; - return true; -} - void nvme_failover_req(struct request *req) { struct nvme_ns *ns = req->q->queuedata; @@ -386,8 +365,7 @@ static void nvme_ns_head_submit_bio(struct bio *bio) } else { dev_warn_ratelimited(dev, "no available path - failing I/O\n"); - bio->bi_status = BLK_STS_IOERR; - bio_endio(bio); + bio_io_error(bio); } srcu_read_unlock(&head->srcu, srcu_idx); @@ -898,7 +876,7 @@ int nvme_mpath_init_identify(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id) if (ana_log_size > ctrl->ana_log_size) { nvme_mpath_stop(ctrl); nvme_mpath_uninit(ctrl); - ctrl->ana_log_buf = kmalloc(ana_log_size, GFP_KERNEL); + ctrl->ana_log_buf = kvmalloc(ana_log_size, GFP_KERNEL); if (!ctrl->ana_log_buf) return -ENOMEM; } @@ -915,7 +893,7 @@ out_uninit: void nvme_mpath_uninit(struct nvme_ctrl *ctrl) { - kfree(ctrl->ana_log_buf); + kvfree(ctrl->ana_log_buf); ctrl->ana_log_buf = NULL; ctrl->ana_log_size = 0; } diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index a162f6c6da6e..1ea908d43e17 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -280,7 +280,6 @@ struct nvme_ctrl { u16 crdt[3]; u16 oncs; u16 oacs; - u16 nssa; u16 nr_streams; u16 sqsize; u32 max_namespaces; @@ -349,6 +348,9 @@ struct nvme_ctrl { unsigned long discard_page_busy; struct nvme_fault_inject fault_inject; + + enum nvme_ctrl_type cntrltype; + enum nvme_dctype dctype; }; enum nvme_iopolicy { @@ -696,9 +698,13 @@ void nvme_wait_freeze(struct nvme_ctrl *ctrl); int nvme_wait_freeze_timeout(struct nvme_ctrl *ctrl, long timeout); void nvme_start_freeze(struct nvme_ctrl *ctrl); +static inline unsigned int nvme_req_op(struct nvme_command *cmd) +{ + return nvme_is_write(cmd) ? REQ_OP_DRV_OUT : REQ_OP_DRV_IN; +} + #define NVME_QID_ANY -1 -struct request *nvme_alloc_request(struct request_queue *q, - struct nvme_command *cmd, blk_mq_req_flags_t flags); +void nvme_init_request(struct request *req, struct nvme_command *cmd); void nvme_cleanup_cmd(struct request *req); blk_status_t nvme_setup_cmd(struct nvme_ns *ns, struct request *req); blk_status_t nvme_fail_nonready_command(struct nvme_ctrl *ctrl, @@ -768,7 +774,6 @@ void nvme_mpath_unfreeze(struct nvme_subsystem *subsys); void nvme_mpath_wait_freeze(struct nvme_subsystem *subsys); void nvme_mpath_start_freeze(struct nvme_subsystem *subsys); void nvme_mpath_default_iopolicy(struct nvme_subsystem *subsys); -bool nvme_mpath_set_disk_name(struct nvme_ns *ns, char *disk_name, int *flags); void nvme_failover_req(struct request *req); void nvme_kick_requeue_lists(struct nvme_ctrl *ctrl); int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl,struct nvme_ns_head *head); @@ -791,20 +796,17 @@ static inline void nvme_trace_bio_complete(struct request *req) trace_block_bio_complete(ns->head->disk->queue, req->bio); } +extern bool multipath; extern struct device_attribute dev_attr_ana_grpid; extern struct device_attribute dev_attr_ana_state; extern struct device_attribute subsys_attr_iopolicy; #else +#define multipath false static inline bool nvme_ctrl_use_ana(struct nvme_ctrl *ctrl) { return false; } -static inline bool nvme_mpath_set_disk_name(struct nvme_ns *ns, char *disk_name, - int *flags) -{ - return false; -} static inline void nvme_failover_req(struct request *req) { } @@ -894,6 +896,14 @@ static inline int nvme_update_zone_info(struct nvme_ns *ns, unsigned lbaf) } #endif +static inline int nvme_ctrl_init_connect_q(struct nvme_ctrl *ctrl) +{ + ctrl->connect_q = blk_mq_init_queue(ctrl->tagset); + if (IS_ERR(ctrl->connect_q)) + return PTR_ERR(ctrl->connect_q); + return 0; +} + static inline struct nvme_ns *nvme_get_ns_from_dev(struct device *dev) { return dev_to_disk(dev)->private_data; @@ -930,4 +940,23 @@ static inline bool nvme_multi_css(struct nvme_ctrl *ctrl) return (ctrl->ctrl_config & NVME_CC_CSS_MASK) == NVME_CC_CSS_CSI; } +#ifdef CONFIG_NVME_VERBOSE_ERRORS +const unsigned char *nvme_get_error_status_str(u16 status); +const unsigned char *nvme_get_opcode_str(u8 opcode); +const unsigned char *nvme_get_admin_opcode_str(u8 opcode); +#else /* CONFIG_NVME_VERBOSE_ERRORS */ +static inline const unsigned char *nvme_get_error_status_str(u16 status) +{ + return "I/O Error"; +} +static inline const unsigned char *nvme_get_opcode_str(u8 opcode) +{ + return "I/O Cmd"; +} +static inline const unsigned char *nvme_get_admin_opcode_str(u8 opcode) +{ + return "Admin Cmd"; +} +#endif /* CONFIG_NVME_VERBOSE_ERRORS */ + #endif /* _NVME_H */ diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 6a99ed680915..9f4f3884fefe 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -424,8 +424,9 @@ static int nvme_init_hctx(struct blk_mq_hw_ctx *hctx, void *data, return 0; } -static int nvme_init_request(struct blk_mq_tag_set *set, struct request *req, - unsigned int hctx_idx, unsigned int numa_node) +static int nvme_pci_init_request(struct blk_mq_tag_set *set, + struct request *req, unsigned int hctx_idx, + unsigned int numa_node) { struct nvme_dev *dev = set->driver_data; struct nvme_iod *iod = blk_mq_rq_to_pdu(req); @@ -1428,12 +1429,13 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved) "I/O %d QID %d timeout, aborting\n", req->tag, nvmeq->qid); - abort_req = nvme_alloc_request(dev->ctrl.admin_q, &cmd, - BLK_MQ_REQ_NOWAIT); + abort_req = blk_mq_alloc_request(dev->ctrl.admin_q, nvme_req_op(&cmd), + BLK_MQ_REQ_NOWAIT); if (IS_ERR(abort_req)) { atomic_inc(&dev->ctrl.abort_limit); return BLK_EH_RESET_TIMER; } + nvme_init_request(abort_req, &cmd); abort_req->end_io_data = NULL; blk_execute_rq_nowait(abort_req, false, abort_endio); @@ -1722,7 +1724,7 @@ static const struct blk_mq_ops nvme_mq_admin_ops = { .queue_rq = nvme_queue_rq, .complete = nvme_pci_complete_rq, .init_hctx = nvme_admin_init_hctx, - .init_request = nvme_init_request, + .init_request = nvme_pci_init_request, .timeout = nvme_timeout, }; @@ -1732,7 +1734,7 @@ static const struct blk_mq_ops nvme_mq_ops = { .complete = nvme_pci_complete_rq, .commit_rqs = nvme_commit_rqs, .init_hctx = nvme_init_hctx, - .init_request = nvme_init_request, + .init_request = nvme_pci_init_request, .map_queues = nvme_pci_map_queues, .timeout = nvme_timeout, .poll = nvme_poll, @@ -2475,9 +2477,10 @@ static int nvme_delete_queue(struct nvme_queue *nvmeq, u8 opcode) cmd.delete_queue.opcode = opcode; cmd.delete_queue.qid = cpu_to_le16(nvmeq->qid); - req = nvme_alloc_request(q, &cmd, BLK_MQ_REQ_NOWAIT); + req = blk_mq_alloc_request(q, nvme_req_op(&cmd), BLK_MQ_REQ_NOWAIT); if (IS_ERR(req)) return PTR_ERR(req); + nvme_init_request(req, &cmd); req->end_io_data = nvmeq; diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index 9c55e4be8a39..d9f19d901313 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -978,11 +978,9 @@ static int nvme_rdma_configure_io_queues(struct nvme_rdma_ctrl *ctrl, bool new) goto out_free_io_queues; } - ctrl->ctrl.connect_q = blk_mq_init_queue(&ctrl->tag_set); - if (IS_ERR(ctrl->ctrl.connect_q)) { - ret = PTR_ERR(ctrl->ctrl.connect_q); + ret = nvme_ctrl_init_connect_q(&(ctrl->ctrl)); + if (ret) goto out_free_tag_set; - } } ret = nvme_rdma_start_io_queues(ctrl); @@ -1283,6 +1281,22 @@ static int nvme_rdma_inv_rkey(struct nvme_rdma_queue *queue, return ib_post_send(queue->qp, &wr, NULL); } +static void nvme_rdma_dma_unmap_req(struct ib_device *ibdev, struct request *rq) +{ + struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq); + + if (blk_integrity_rq(rq)) { + ib_dma_unmap_sg(ibdev, req->metadata_sgl->sg_table.sgl, + req->metadata_sgl->nents, rq_dma_dir(rq)); + sg_free_table_chained(&req->metadata_sgl->sg_table, + NVME_INLINE_METADATA_SG_CNT); + } + + ib_dma_unmap_sg(ibdev, req->data_sgl.sg_table.sgl, req->data_sgl.nents, + rq_dma_dir(rq)); + sg_free_table_chained(&req->data_sgl.sg_table, NVME_INLINE_SG_CNT); +} + static void nvme_rdma_unmap_data(struct nvme_rdma_queue *queue, struct request *rq) { @@ -1294,13 +1308,6 @@ static void nvme_rdma_unmap_data(struct nvme_rdma_queue *queue, if (!blk_rq_nr_phys_segments(rq)) return; - if (blk_integrity_rq(rq)) { - ib_dma_unmap_sg(ibdev, req->metadata_sgl->sg_table.sgl, - req->metadata_sgl->nents, rq_dma_dir(rq)); - sg_free_table_chained(&req->metadata_sgl->sg_table, - NVME_INLINE_METADATA_SG_CNT); - } - if (req->use_sig_mr) pool = &queue->qp->sig_mrs; @@ -1309,9 +1316,7 @@ static void nvme_rdma_unmap_data(struct nvme_rdma_queue *queue, req->mr = NULL; } - ib_dma_unmap_sg(ibdev, req->data_sgl.sg_table.sgl, req->data_sgl.nents, - rq_dma_dir(rq)); - sg_free_table_chained(&req->data_sgl.sg_table, NVME_INLINE_SG_CNT); + nvme_rdma_dma_unmap_req(ibdev, rq); } static int nvme_rdma_set_sg_null(struct nvme_command *c) @@ -1522,22 +1527,11 @@ mr_put: return -EINVAL; } -static int nvme_rdma_map_data(struct nvme_rdma_queue *queue, - struct request *rq, struct nvme_command *c) +static int nvme_rdma_dma_map_req(struct ib_device *ibdev, struct request *rq, + int *count, int *pi_count) { struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq); - struct nvme_rdma_device *dev = queue->device; - struct ib_device *ibdev = dev->dev; - int pi_count = 0; - int count, ret; - - req->num_sge = 1; - refcount_set(&req->ref, 2); /* send and recv completions */ - - c->common.flags |= NVME_CMD_SGL_METABUF; - - if (!blk_rq_nr_phys_segments(rq)) - return nvme_rdma_set_sg_null(c); + int ret; req->data_sgl.sg_table.sgl = (struct scatterlist *)(req + 1); ret = sg_alloc_table_chained(&req->data_sgl.sg_table, @@ -1549,9 +1543,9 @@ static int nvme_rdma_map_data(struct nvme_rdma_queue *queue, req->data_sgl.nents = blk_rq_map_sg(rq->q, rq, req->data_sgl.sg_table.sgl); - count = ib_dma_map_sg(ibdev, req->data_sgl.sg_table.sgl, - req->data_sgl.nents, rq_dma_dir(rq)); - if (unlikely(count <= 0)) { + *count = ib_dma_map_sg(ibdev, req->data_sgl.sg_table.sgl, + req->data_sgl.nents, rq_dma_dir(rq)); + if (unlikely(*count <= 0)) { ret = -EIO; goto out_free_table; } @@ -1570,16 +1564,50 @@ static int nvme_rdma_map_data(struct nvme_rdma_queue *queue, req->metadata_sgl->nents = blk_rq_map_integrity_sg(rq->q, rq->bio, req->metadata_sgl->sg_table.sgl); - pi_count = ib_dma_map_sg(ibdev, - req->metadata_sgl->sg_table.sgl, - req->metadata_sgl->nents, - rq_dma_dir(rq)); - if (unlikely(pi_count <= 0)) { + *pi_count = ib_dma_map_sg(ibdev, + req->metadata_sgl->sg_table.sgl, + req->metadata_sgl->nents, + rq_dma_dir(rq)); + if (unlikely(*pi_count <= 0)) { ret = -EIO; goto out_free_pi_table; } } + return 0; + +out_free_pi_table: + sg_free_table_chained(&req->metadata_sgl->sg_table, + NVME_INLINE_METADATA_SG_CNT); +out_unmap_sg: + ib_dma_unmap_sg(ibdev, req->data_sgl.sg_table.sgl, req->data_sgl.nents, + rq_dma_dir(rq)); +out_free_table: + sg_free_table_chained(&req->data_sgl.sg_table, NVME_INLINE_SG_CNT); + return ret; +} + +static int nvme_rdma_map_data(struct nvme_rdma_queue *queue, + struct request *rq, struct nvme_command *c) +{ + struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq); + struct nvme_rdma_device *dev = queue->device; + struct ib_device *ibdev = dev->dev; + int pi_count = 0; + int count, ret; + + req->num_sge = 1; + refcount_set(&req->ref, 2); /* send and recv completions */ + + c->common.flags |= NVME_CMD_SGL_METABUF; + + if (!blk_rq_nr_phys_segments(rq)) + return nvme_rdma_set_sg_null(c); + + ret = nvme_rdma_dma_map_req(ibdev, rq, &count, &pi_count); + if (unlikely(ret)) + return ret; + if (req->use_sig_mr) { ret = nvme_rdma_map_sg_pi(queue, req, c, count, pi_count); goto out; @@ -1603,23 +1631,12 @@ static int nvme_rdma_map_data(struct nvme_rdma_queue *queue, ret = nvme_rdma_map_sg_fr(queue, req, c, count); out: if (unlikely(ret)) - goto out_unmap_pi_sg; + goto out_dma_unmap_req; return 0; -out_unmap_pi_sg: - if (blk_integrity_rq(rq)) - ib_dma_unmap_sg(ibdev, req->metadata_sgl->sg_table.sgl, - req->metadata_sgl->nents, rq_dma_dir(rq)); -out_free_pi_table: - if (blk_integrity_rq(rq)) - sg_free_table_chained(&req->metadata_sgl->sg_table, - NVME_INLINE_METADATA_SG_CNT); -out_unmap_sg: - ib_dma_unmap_sg(ibdev, req->data_sgl.sg_table.sgl, req->data_sgl.nents, - rq_dma_dir(rq)); -out_free_table: - sg_free_table_chained(&req->data_sgl.sg_table, NVME_INLINE_SG_CNT); +out_dma_unmap_req: + nvme_rdma_dma_unmap_req(ibdev, rq); return ret; } diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index 65e00c64a588..ad3a2bf2f1e9 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -30,6 +30,44 @@ static int so_priority; module_param(so_priority, int, 0644); MODULE_PARM_DESC(so_priority, "nvme tcp socket optimize priority"); +#ifdef CONFIG_DEBUG_LOCK_ALLOC +/* lockdep can detect a circular dependency of the form + * sk_lock -> mmap_lock (page fault) -> fs locks -> sk_lock + * because dependencies are tracked for both nvme-tcp and user contexts. Using + * a separate class prevents lockdep from conflating nvme-tcp socket use with + * user-space socket API use. + */ +static struct lock_class_key nvme_tcp_sk_key[2]; +static struct lock_class_key nvme_tcp_slock_key[2]; + +static void nvme_tcp_reclassify_socket(struct socket *sock) +{ + struct sock *sk = sock->sk; + + if (WARN_ON_ONCE(!sock_allow_reclassification(sk))) + return; + + switch (sk->sk_family) { + case AF_INET: + sock_lock_init_class_and_name(sk, "slock-AF_INET-NVME", + &nvme_tcp_slock_key[0], + "sk_lock-AF_INET-NVME", + &nvme_tcp_sk_key[0]); + break; + case AF_INET6: + sock_lock_init_class_and_name(sk, "slock-AF_INET6-NVME", + &nvme_tcp_slock_key[1], + "sk_lock-AF_INET6-NVME", + &nvme_tcp_sk_key[1]); + break; + default: + WARN_ON_ONCE(1); + } +} +#else +static void nvme_tcp_reclassify_socket(struct socket *sock) { } +#endif + enum nvme_tcp_send_state { NVME_TCP_SEND_CMD_PDU = 0, NVME_TCP_SEND_H2C_PDU, @@ -1469,6 +1507,8 @@ static int nvme_tcp_alloc_queue(struct nvme_ctrl *nctrl, goto err_destroy_mutex; } + nvme_tcp_reclassify_socket(queue->sock); + /* Single syn retry */ tcp_sock_set_syncnt(queue->sock->sk, 1); @@ -1716,7 +1756,7 @@ static void nvme_tcp_stop_io_queues(struct nvme_ctrl *ctrl) static int nvme_tcp_start_io_queues(struct nvme_ctrl *ctrl) { - int i, ret = 0; + int i, ret; for (i = 1; i < ctrl->queue_count; i++) { ret = nvme_tcp_start_queue(ctrl, i); @@ -1756,8 +1796,7 @@ static int __nvme_tcp_alloc_io_queues(struct nvme_ctrl *ctrl) int i, ret; for (i = 1; i < ctrl->queue_count; i++) { - ret = nvme_tcp_alloc_queue(ctrl, i, - ctrl->sqsize + 1); + ret = nvme_tcp_alloc_queue(ctrl, i, ctrl->sqsize + 1); if (ret) goto out_free_queues; } @@ -1867,11 +1906,9 @@ static int nvme_tcp_configure_io_queues(struct nvme_ctrl *ctrl, bool new) goto out_free_io_queues; } - ctrl->connect_q = blk_mq_init_queue(ctrl->tagset); - if (IS_ERR(ctrl->connect_q)) { - ret = PTR_ERR(ctrl->connect_q); + ret = nvme_ctrl_init_connect_q(ctrl); + if (ret) goto out_free_tag_set; - } } ret = nvme_tcp_start_io_queues(ctrl); diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c index 6fb24746de06..46d0dab686dd 100644 --- a/drivers/nvme/target/admin-cmd.c +++ b/drivers/nvme/target/admin-cmd.c @@ -511,7 +511,11 @@ static void nvmet_execute_identify_ns(struct nvmet_req *req) goto done; } - nvmet_ns_revalidate(req->ns); + if (nvmet_ns_revalidate(req->ns)) { + mutex_lock(&req->ns->subsys->lock); + nvmet_ns_changed(req->ns->subsys, req->ns->nsid); + mutex_unlock(&req->ns->subsys->lock); + } /* * nuse = ncap = nsze isn't always true, but we have no way to find diff --git a/drivers/nvme/target/configfs.c b/drivers/nvme/target/configfs.c index 496d775c6770..8fedd1e052fe 100644 --- a/drivers/nvme/target/configfs.c +++ b/drivers/nvme/target/configfs.c @@ -60,10 +60,11 @@ static ssize_t nvmet_addr_adrfam_show(struct config_item *item, char *page) for (i = 1; i < ARRAY_SIZE(nvmet_addr_family); i++) { if (nvmet_addr_family[i].type == adrfam) - return sprintf(page, "%s\n", nvmet_addr_family[i].name); + return snprintf(page, PAGE_SIZE, "%s\n", + nvmet_addr_family[i].name); } - return sprintf(page, "\n"); + return snprintf(page, PAGE_SIZE, "\n"); } static ssize_t nvmet_addr_adrfam_store(struct config_item *item, @@ -93,10 +94,9 @@ CONFIGFS_ATTR(nvmet_, addr_adrfam); static ssize_t nvmet_addr_portid_show(struct config_item *item, char *page) { - struct nvmet_port *port = to_nvmet_port(item); + __le16 portid = to_nvmet_port(item)->disc_addr.portid; - return snprintf(page, PAGE_SIZE, "%d\n", - le16_to_cpu(port->disc_addr.portid)); + return snprintf(page, PAGE_SIZE, "%d\n", le16_to_cpu(portid)); } static ssize_t nvmet_addr_portid_store(struct config_item *item, @@ -124,8 +124,7 @@ static ssize_t nvmet_addr_traddr_show(struct config_item *item, { struct nvmet_port *port = to_nvmet_port(item); - return snprintf(page, PAGE_SIZE, "%s\n", - port->disc_addr.traddr); + return snprintf(page, PAGE_SIZE, "%s\n", port->disc_addr.traddr); } static ssize_t nvmet_addr_traddr_store(struct config_item *item, @@ -162,10 +161,11 @@ static ssize_t nvmet_addr_treq_show(struct config_item *item, char *page) for (i = 0; i < ARRAY_SIZE(nvmet_addr_treq); i++) { if (treq == nvmet_addr_treq[i].type) - return sprintf(page, "%s\n", nvmet_addr_treq[i].name); + return snprintf(page, PAGE_SIZE, "%s\n", + nvmet_addr_treq[i].name); } - return sprintf(page, "\n"); + return snprintf(page, PAGE_SIZE, "\n"); } static ssize_t nvmet_addr_treq_store(struct config_item *item, @@ -199,8 +199,7 @@ static ssize_t nvmet_addr_trsvcid_show(struct config_item *item, { struct nvmet_port *port = to_nvmet_port(item); - return snprintf(page, PAGE_SIZE, "%s\n", - port->disc_addr.trsvcid); + return snprintf(page, PAGE_SIZE, "%s\n", port->disc_addr.trsvcid); } static ssize_t nvmet_addr_trsvcid_store(struct config_item *item, @@ -284,7 +283,8 @@ static ssize_t nvmet_addr_trtype_show(struct config_item *item, for (i = 0; i < ARRAY_SIZE(nvmet_transport); i++) { if (port->disc_addr.trtype == nvmet_transport[i].type) - return sprintf(page, "%s\n", nvmet_transport[i].name); + return snprintf(page, PAGE_SIZE, + "%s\n", nvmet_transport[i].name); } return sprintf(page, "\n"); @@ -586,7 +586,8 @@ static ssize_t nvmet_ns_revalidate_size_store(struct config_item *item, mutex_unlock(&ns->subsys->lock); return -EINVAL; } - nvmet_ns_revalidate(ns); + if (nvmet_ns_revalidate(ns)) + nvmet_ns_changed(ns->subsys, ns->nsid); mutex_unlock(&ns->subsys->lock); return count; } diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c index 626caf6f1e4b..64c2d2f3e25c 100644 --- a/drivers/nvme/target/core.c +++ b/drivers/nvme/target/core.c @@ -531,7 +531,7 @@ static void nvmet_p2pmem_ns_add_p2p(struct nvmet_ctrl *ctrl, ns->nsid); } -void nvmet_ns_revalidate(struct nvmet_ns *ns) +bool nvmet_ns_revalidate(struct nvmet_ns *ns) { loff_t oldsize = ns->size; @@ -540,8 +540,7 @@ void nvmet_ns_revalidate(struct nvmet_ns *ns) else nvmet_file_ns_revalidate(ns); - if (oldsize != ns->size) - nvmet_ns_changed(ns->subsys, ns->nsid); + return oldsize != ns->size; } int nvmet_ns_enable(struct nvmet_ns *ns) @@ -1400,7 +1399,7 @@ u16 nvmet_alloc_ctrl(const char *subsysnqn, const char *hostnqn, if (subsys->cntlid_min > subsys->cntlid_max) goto out_free_sqs; - ret = ida_simple_get(&cntlid_ida, + ret = ida_alloc_range(&cntlid_ida, subsys->cntlid_min, subsys->cntlid_max, GFP_KERNEL); if (ret < 0) { @@ -1459,7 +1458,7 @@ static void nvmet_ctrl_free(struct kref *ref) flush_work(&ctrl->async_event_work); cancel_work_sync(&ctrl->fatal_err_work); - ida_simple_remove(&cntlid_ida, ctrl->cntlid); + ida_free(&cntlid_ida, ctrl->cntlid); nvmet_async_events_free(ctrl); kfree(ctrl->sqs); diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c index 22b5108168a6..de90001fc5c4 100644 --- a/drivers/nvme/target/fc.c +++ b/drivers/nvme/target/fc.c @@ -1115,7 +1115,7 @@ nvmet_fc_alloc_target_assoc(struct nvmet_fc_tgtport *tgtport, void *hosthandle) if (!assoc) return NULL; - idx = ida_simple_get(&tgtport->assoc_cnt, 0, 0, GFP_KERNEL); + idx = ida_alloc(&tgtport->assoc_cnt, GFP_KERNEL); if (idx < 0) goto out_free_assoc; @@ -1157,7 +1157,7 @@ nvmet_fc_alloc_target_assoc(struct nvmet_fc_tgtport *tgtport, void *hosthandle) out_put: nvmet_fc_tgtport_put(tgtport); out_ida: - ida_simple_remove(&tgtport->assoc_cnt, idx); + ida_free(&tgtport->assoc_cnt, idx); out_free_assoc: kfree(assoc); return NULL; @@ -1183,7 +1183,7 @@ nvmet_fc_target_assoc_free(struct kref *ref) /* if pending Rcv Disconnect Association LS, send rsp now */ if (oldls) nvmet_fc_xmt_ls_rsp(tgtport, oldls); - ida_simple_remove(&tgtport->assoc_cnt, assoc->a_id); + ida_free(&tgtport->assoc_cnt, assoc->a_id); dev_info(tgtport->dev, "{%d:%d} Association freed\n", tgtport->fc_target_port.port_num, assoc->a_id); @@ -1341,7 +1341,7 @@ nvmet_fc_portentry_rebind_tgt(struct nvmet_fc_tgtport *tgtport) } /** - * nvme_fc_register_targetport - transport entry point called by an + * nvmet_fc_register_targetport - transport entry point called by an * LLDD to register the existence of a local * NVME subystem FC port. * @pinfo: pointer to information about the port to be registered @@ -1383,7 +1383,7 @@ nvmet_fc_register_targetport(struct nvmet_fc_port_info *pinfo, goto out_regtgt_failed; } - idx = ida_simple_get(&nvmet_fc_tgtport_cnt, 0, 0, GFP_KERNEL); + idx = ida_alloc(&nvmet_fc_tgtport_cnt, GFP_KERNEL); if (idx < 0) { ret = -ENOSPC; goto out_fail_kfree; @@ -1433,7 +1433,7 @@ nvmet_fc_register_targetport(struct nvmet_fc_port_info *pinfo, out_free_newrec: put_device(dev); out_ida_put: - ida_simple_remove(&nvmet_fc_tgtport_cnt, idx); + ida_free(&nvmet_fc_tgtport_cnt, idx); out_fail_kfree: kfree(newrec); out_regtgt_failed: @@ -1460,7 +1460,7 @@ nvmet_fc_free_tgtport(struct kref *ref) /* let the LLDD know we've finished tearing it down */ tgtport->ops->targetport_delete(&tgtport->fc_target_port); - ida_simple_remove(&nvmet_fc_tgtport_cnt, + ida_free(&nvmet_fc_tgtport_cnt, tgtport->fc_target_port.port_num); ida_destroy(&tgtport->assoc_cnt); @@ -1604,7 +1604,7 @@ nvmet_fc_delete_ctrl(struct nvmet_ctrl *ctrl) } /** - * nvme_fc_unregister_targetport - transport entry point called by an + * nvmet_fc_unregister_targetport - transport entry point called by an * LLDD to deregister/remove a previously * registered a local NVME subsystem FC port. * @target_port: pointer to the (registered) target port that is to be diff --git a/drivers/nvme/target/io-cmd-bdev.c b/drivers/nvme/target/io-cmd-bdev.c index 95c2bbb0b2f5..e9194804ddee 100644 --- a/drivers/nvme/target/io-cmd-bdev.c +++ b/drivers/nvme/target/io-cmd-bdev.c @@ -76,6 +76,14 @@ int nvmet_bdev_ns_enable(struct nvmet_ns *ns) { int ret; + /* + * When buffered_io namespace attribute is enabled that means user want + * this block device to be used as a file, so block device can take + * an advantage of cache. + */ + if (ns->buffered_io) + return -ENOTBLK; + ns->bdev = blkdev_get_by_path(ns->device_path, FMODE_READ | FMODE_WRITE, NULL); if (IS_ERR(ns->bdev)) { diff --git a/drivers/nvme/target/io-cmd-file.c b/drivers/nvme/target/io-cmd-file.c index 6be6e59d273b..6485dc8eb974 100644 --- a/drivers/nvme/target/io-cmd-file.c +++ b/drivers/nvme/target/io-cmd-file.c @@ -14,16 +14,9 @@ #define NVMET_MAX_MPOOL_BVEC 16 #define NVMET_MIN_MPOOL_OBJ 16 -int nvmet_file_ns_revalidate(struct nvmet_ns *ns) +void nvmet_file_ns_revalidate(struct nvmet_ns *ns) { - struct kstat stat; - int ret; - - ret = vfs_getattr(&ns->file->f_path, &stat, STATX_SIZE, - AT_STATX_FORCE_SYNC); - if (!ret) - ns->size = stat.size; - return ret; + ns->size = i_size_read(ns->file->f_mapping->host); } void nvmet_file_ns_disable(struct nvmet_ns *ns) @@ -43,7 +36,7 @@ void nvmet_file_ns_disable(struct nvmet_ns *ns) int nvmet_file_ns_enable(struct nvmet_ns *ns) { int flags = O_RDWR | O_LARGEFILE; - int ret; + int ret = 0; if (!ns->buffered_io) flags |= O_DIRECT; @@ -57,9 +50,7 @@ int nvmet_file_ns_enable(struct nvmet_ns *ns) return ret; } - ret = nvmet_file_ns_revalidate(ns); - if (ret) - goto err; + nvmet_file_ns_revalidate(ns); /* * i_blkbits can be greater than the universally accepted upper bound, diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c index eb1094254c82..23f9d6f88804 100644 --- a/drivers/nvme/target/loop.c +++ b/drivers/nvme/target/loop.c @@ -543,11 +543,9 @@ static int nvme_loop_create_io_queues(struct nvme_loop_ctrl *ctrl) if (ret) goto out_destroy_queues; - ctrl->ctrl.connect_q = blk_mq_init_queue(&ctrl->tag_set); - if (IS_ERR(ctrl->ctrl.connect_q)) { - ret = PTR_ERR(ctrl->ctrl.connect_q); + ret = nvme_ctrl_init_connect_q(&(ctrl->ctrl)); + if (ret) goto out_free_tagset; - } ret = nvme_loop_connect_io_queues(ctrl); if (ret) diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h index af193423c10b..d910c6aad4b6 100644 --- a/drivers/nvme/target/nvmet.h +++ b/drivers/nvme/target/nvmet.h @@ -541,8 +541,8 @@ u16 nvmet_bdev_flush(struct nvmet_req *req); u16 nvmet_file_flush(struct nvmet_req *req); void nvmet_ns_changed(struct nvmet_subsys *subsys, u32 nsid); void nvmet_bdev_ns_revalidate(struct nvmet_ns *ns); -int nvmet_file_ns_revalidate(struct nvmet_ns *ns); -void nvmet_ns_revalidate(struct nvmet_ns *ns); +void nvmet_file_ns_revalidate(struct nvmet_ns *ns); +bool nvmet_ns_revalidate(struct nvmet_ns *ns); u16 blk_to_nvme_status(struct nvmet_req *req, blk_status_t blk_sts); bool nvmet_bdev_zns_enable(struct nvmet_ns *ns); diff --git a/drivers/nvme/target/passthru.c b/drivers/nvme/target/passthru.c index a810bf569fff..a4de1e0d518b 100644 --- a/drivers/nvme/target/passthru.c +++ b/drivers/nvme/target/passthru.c @@ -254,11 +254,12 @@ static void nvmet_passthru_execute_cmd(struct nvmet_req *req) timeout = nvmet_req_subsys(req)->admin_timeout; } - rq = nvme_alloc_request(q, req->cmd, 0); + rq = blk_mq_alloc_request(q, nvme_req_op(req->cmd), 0); if (IS_ERR(rq)) { status = NVME_SC_INTERNAL; goto out_put_ns; } + nvme_init_request(rq, req->cmd); if (timeout) rq->timeout = timeout; diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c index 1deb4043e242..2446d0918a41 100644 --- a/drivers/nvme/target/rdma.c +++ b/drivers/nvme/target/rdma.c @@ -1356,7 +1356,7 @@ static void nvmet_rdma_free_queue(struct nvmet_rdma_queue *queue) !queue->host_qid); } nvmet_rdma_free_rsps(queue); - ida_simple_remove(&nvmet_rdma_queue_ida, queue->idx); + ida_free(&nvmet_rdma_queue_ida, queue->idx); kfree(queue); } @@ -1459,7 +1459,7 @@ nvmet_rdma_alloc_queue(struct nvmet_rdma_device *ndev, spin_lock_init(&queue->rsps_lock); INIT_LIST_HEAD(&queue->queue_list); - queue->idx = ida_simple_get(&nvmet_rdma_queue_ida, 0, 0, GFP_KERNEL); + queue->idx = ida_alloc(&nvmet_rdma_queue_ida, GFP_KERNEL); if (queue->idx < 0) { ret = NVME_RDMA_CM_NO_RSC; goto out_destroy_sq; @@ -1510,7 +1510,7 @@ out_free_cmds: out_free_responses: nvmet_rdma_free_rsps(queue); out_ida_remove: - ida_simple_remove(&nvmet_rdma_queue_ida, queue->idx); + ida_free(&nvmet_rdma_queue_ida, queue->idx); out_destroy_sq: nvmet_sq_destroy(&queue->nvme_sq); out_free_queue: @@ -1703,7 +1703,7 @@ static void nvmet_rdma_queue_connect_fail(struct rdma_cm_id *cm_id, } /** - * nvme_rdma_device_removal() - Handle RDMA device removal + * nvmet_rdma_device_removal() - Handle RDMA device removal * @cm_id: rdma_cm id, used for nvmet port * @queue: nvmet rdma queue (cm id qp_context) * diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c index 7c1c43ce466b..83ca577f72be 100644 --- a/drivers/nvme/target/tcp.c +++ b/drivers/nvme/target/tcp.c @@ -1473,7 +1473,7 @@ static void nvmet_tcp_release_queue_work(struct work_struct *w) nvmet_tcp_free_cmds(queue); if (queue->hdr_digest || queue->data_digest) nvmet_tcp_free_crypto(queue); - ida_simple_remove(&nvmet_tcp_queue_ida, queue->idx); + ida_free(&nvmet_tcp_queue_ida, queue->idx); page = virt_to_head_page(queue->pf_cache.va); __page_frag_cache_drain(page, queue->pf_cache.pagecnt_bias); @@ -1613,7 +1613,7 @@ static int nvmet_tcp_alloc_queue(struct nvmet_tcp_port *port, init_llist_head(&queue->resp_list); INIT_LIST_HEAD(&queue->resp_send_list); - queue->idx = ida_simple_get(&nvmet_tcp_queue_ida, 0, 0, GFP_KERNEL); + queue->idx = ida_alloc(&nvmet_tcp_queue_ida, GFP_KERNEL); if (queue->idx < 0) { ret = queue->idx; goto out_free_queue; @@ -1646,7 +1646,7 @@ out_destroy_sq: out_free_connect: nvmet_tcp_free_cmd(&queue->connect); out_ida_remove: - ida_simple_remove(&nvmet_tcp_queue_ida, queue->idx); + ida_free(&nvmet_tcp_queue_ida, queue->idx); out_free_queue: kfree(queue); return ret; diff --git a/drivers/nvme/target/zns.c b/drivers/nvme/target/zns.c index 3e421217a7ad..e34718b09550 100644 --- a/drivers/nvme/target/zns.c +++ b/drivers/nvme/target/zns.c @@ -123,7 +123,11 @@ void nvmet_execute_identify_cns_cs_ns(struct nvmet_req *req) goto done; } - nvmet_ns_revalidate(req->ns); + if (nvmet_ns_revalidate(req->ns)) { + mutex_lock(&req->ns->subsys->lock); + nvmet_ns_changed(req->ns->subsys, req->ns->nsid); + mutex_unlock(&req->ns->subsys->lock); + } zsze = (bdev_zone_sectors(req->ns->bdev) << 9) >> req->ns->blksize_shift; id_zns->lbafe[0].zsze = cpu_to_le64(zsze); diff --git a/include/linux/nvme-fc-driver.h b/include/linux/nvme-fc-driver.h index cb909edb76c4..5358a5facdee 100644 --- a/include/linux/nvme-fc-driver.h +++ b/include/linux/nvme-fc-driver.h @@ -721,7 +721,7 @@ enum { * * Fields with static values for the port. Initialized by the * port_info struct supplied to the registration call. - * @port_num: NVME-FC transport subsytem port number + * @port_num: NVME-FC transport subsystem port number * @node_name: FC WWNN for the port * @port_name: FC WWPN for the port * @private: pointer to memory allocated alongside the local port diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 855dd9b3e84b..9dbc3ef4daf7 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -43,6 +43,12 @@ enum nvme_ctrl_type { NVME_CTRL_ADMIN = 3, /* Administrative controller */ }; +enum nvme_dctype { + NVME_DCTYPE_NOT_REPORTED = 0, + NVME_DCTYPE_DDC = 1, /* Direct Discovery Controller */ + NVME_DCTYPE_CDC = 2, /* Central Discovery Controller */ +}; + /* Address Family codes for Discovery Log Page entry ADRFAM field */ enum { NVMF_ADDR_FAMILY_PCI = 0, /* PCIe */ @@ -320,7 +326,9 @@ struct nvme_id_ctrl { __le16 icdoff; __u8 ctrattr; __u8 msdbd; - __u8 rsvd1804[244]; + __u8 rsvd1804[2]; + __u8 dctype; + __u8 rsvd1807[241]; struct nvme_id_power_state psd[32]; __u8 vs[1024]; }; @@ -1636,6 +1644,7 @@ enum { NVME_SC_HOST_ABORTED_CMD = 0x371, NVME_SC_CRD = 0x1800, + NVME_SC_MORE = 0x2000, NVME_SC_DNR = 0x4000, }; diff --git a/include/uapi/linux/nvme_ioctl.h b/include/uapi/linux/nvme_ioctl.h index d99b5a772698..b2e43185e3b5 100644 --- a/include/uapi/linux/nvme_ioctl.h +++ b/include/uapi/linux/nvme_ioctl.h @@ -55,7 +55,10 @@ struct nvme_passthru_cmd64 { __u64 metadata; __u64 addr; __u32 metadata_len; - __u32 data_len; + union { + __u32 data_len; /* for non-vectored io */ + __u32 vec_cnt; /* for vectored io */ + }; __u32 cdw10; __u32 cdw11; __u32 cdw12; @@ -78,5 +81,6 @@ struct nvme_passthru_cmd64 { #define NVME_IOCTL_RESCAN _IO('N', 0x46) #define NVME_IOCTL_ADMIN64_CMD _IOWR('N', 0x47, struct nvme_passthru_cmd64) #define NVME_IOCTL_IO64_CMD _IOWR('N', 0x48, struct nvme_passthru_cmd64) +#define NVME_IOCTL_IO64_CMD_VEC _IOWR('N', 0x49, struct nvme_passthru_cmd64) #endif /* _UAPI_LINUX_NVME_IOCTL_H */ diff --git a/lib/raid6/test/Makefile b/lib/raid6/test/Makefile index a4c7cd74cff5..4fb7700a741b 100644 --- a/lib/raid6/test/Makefile +++ b/lib/raid6/test/Makefile @@ -4,6 +4,8 @@ # from userspace. # +pound := \# + CC = gcc OPTFLAGS = -O2 # Adjust as desired CFLAGS = -I.. -I ../../../include -g $(OPTFLAGS) @@ -42,7 +44,7 @@ else ifeq ($(HAS_NEON),yes) OBJS += neon.o neon1.o neon2.o neon4.o neon8.o recov_neon.o recov_neon_inner.o CFLAGS += -DCONFIG_KERNEL_MODE_NEON=1 else - HAS_ALTIVEC := $(shell printf '\#include <altivec.h>\nvector int a;\n' |\ + HAS_ALTIVEC := $(shell printf '$(pound)include <altivec.h>\nvector int a;\n' |\ gcc -c -x c - >/dev/null && rm ./-.o && echo yes) ifeq ($(HAS_ALTIVEC),yes) CFLAGS += -I../../../arch/powerpc/include diff --git a/lib/raid6/test/test.c b/lib/raid6/test/test.c index a3cf071941ab..841a55242aba 100644 --- a/lib/raid6/test/test.c +++ b/lib/raid6/test/test.c @@ -19,7 +19,6 @@ #define NDISKS 16 /* Including P and Q */ const char raid6_empty_zero_page[PAGE_SIZE] __attribute__((aligned(PAGE_SIZE))); -struct raid6_calls raid6_call; char *dataptrs[NDISKS]; char data[NDISKS][PAGE_SIZE] __attribute__((aligned(PAGE_SIZE))); diff --git a/lib/raid6/vpermxor.uc b/lib/raid6/vpermxor.uc index 10475dc423c1..1bfb127fbfe8 100644 --- a/lib/raid6/vpermxor.uc +++ b/lib/raid6/vpermxor.uc @@ -24,9 +24,9 @@ #ifdef CONFIG_ALTIVEC #include <altivec.h> +#include <asm/ppc-opcode.h> #ifdef __KERNEL__ #include <asm/cputable.h> -#include <asm/ppc-opcode.h> #include <asm/switch_to.h> #endif |