summaryrefslogtreecommitdiff
path: root/drivers
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2021-04-28 14:39:37 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2021-04-28 14:39:37 -0700
commitfc0586062816559defb14c947319ef8c4c326fb3 (patch)
tree5ca73bd1fc9de596a11e6d3549fd8fbf6f87dafc /drivers
parent6c0029211382011af508273c4fc98a732f841d95 (diff)
parent8324fbae75ce65fc2eb960a8434799dca48248ac (diff)
Merge tag 'for-5.13/drivers-2021-04-27' of git://git.kernel.dk/linux-block
Pull block driver updates from Jens Axboe: - MD changes via Song: - raid5 POWER fix - raid1 failure fix - UAF fix for md cluster - mddev_find_or_alloc() clean up - Fix NULL pointer deref with external bitmap - Performance improvement for raid10 discard requests - Fix missing information of /proc/mdstat - rsxx const qualifier removal (Arnd) - Expose allocated brd pages (Calvin) - rnbd via Gioh Kim: - Change maintainer - Change domain address of maintainers' email - Add polling IO mode and document update - Fix memory leak and some bug detected by static code analysis tools - Code refactoring - Series of floppy cleanups/fixes (Denis) - s390 dasd fixes (Julian) - kerneldoc fixes (Lee) - null_blk double free (Lv) - null_blk virtual boundary addition (Max) - Remove xsysace driver (Michal) - umem driver removal (Davidlohr) - ataflop fixes (Dan) - Revalidate disk removal (Christoph) - Bounce buffer cleanups (Christoph) - Mark lightnvm as deprecated (Christoph) - mtip32xx init cleanups (Shixin) - Various fixes (Tian, Gustavo, Coly, Yang, Zhang, Zhiqiang) * tag 'for-5.13/drivers-2021-04-27' of git://git.kernel.dk/linux-block: (143 commits) async_xor: increase src_offs when dropping destination page drivers/block/null_blk/main: Fix a double free in null_init. md/raid1: properly indicate failure when ending a failed write request md-cluster: fix use-after-free issue when removing rdev nvme: introduce generic per-namespace chardev nvme: cleanup nvme_configure_apst nvme: do not try to reconfigure APST when the controller is not live nvme: add 'kato' sysfs attribute nvme: sanitize KATO setting nvmet: avoid queuing keep-alive timer if it is disabled brd: expose number of allocated pages in debugfs ataflop: fix off by one in ataflop_probe() ataflop: potential out of bounds in do_format() drbd: Fix fall-through warnings for Clang block/rnbd: Use strscpy instead of strlcpy block/rnbd-clt-sysfs: Remove copy buffer overlap in rnbd_clt_get_path_name block/rnbd-clt: Remove max_segment_size block/rnbd-clt: Generate kobject_uevent when the rnbd device state changes block/rnbd-srv: Remove unused arguments of rnbd_srv_rdma_ev Documentation/ABI/rnbd-clt: Add description for nr_poll_queues ...
Diffstat (limited to 'drivers')
-rw-r--r--drivers/block/Kconfig25
-rw-r--r--drivers/block/Makefile2
-rw-r--r--drivers/block/ataflop.c16
-rw-r--r--drivers/block/brd.c19
-rw-r--r--drivers/block/drbd/drbd_interval.c8
-rw-r--r--drivers/block/drbd/drbd_main.c35
-rw-r--r--drivers/block/drbd/drbd_nl.c17
-rw-r--r--drivers/block/drbd/drbd_receiver.c27
-rw-r--r--drivers/block/drbd/drbd_req.c1
-rw-r--r--drivers/block/drbd/drbd_state.c7
-rw-r--r--drivers/block/floppy.c159
-rw-r--r--drivers/block/mtip32xx/mtip32xx.c13
-rw-r--r--drivers/block/null_blk/main.c12
-rw-r--r--drivers/block/null_blk/null_blk.h1
-rw-r--r--drivers/block/null_blk/zoned.c1
-rw-r--r--drivers/block/paride/pd.c11
-rw-r--r--drivers/block/rnbd/rnbd-clt-sysfs.c84
-rw-r--r--drivers/block/rnbd/rnbd-clt.c171
-rw-r--r--drivers/block/rnbd/rnbd-clt.h6
-rw-r--r--drivers/block/rnbd/rnbd-srv-sysfs.c5
-rw-r--r--drivers/block/rnbd/rnbd-srv.c69
-rw-r--r--drivers/block/rnbd/rnbd-srv.h3
-rw-r--r--drivers/block/rsxx/core.c2
-rw-r--r--drivers/block/swim.c2
-rw-r--r--drivers/block/swim3.c34
-rw-r--r--drivers/block/umem.c1130
-rw-r--r--drivers/block/umem.h132
-rw-r--r--drivers/block/xen-blkfront.c6
-rw-r--r--drivers/block/xsysace.c1273
-rw-r--r--drivers/cdrom/gdrom.c5
-rw-r--r--drivers/infiniband/ulp/rtrs/rtrs-clt.c75
-rw-r--r--drivers/infiniband/ulp/rtrs/rtrs-clt.h1
-rw-r--r--drivers/infiniband/ulp/rtrs/rtrs-pri.h1
-rw-r--r--drivers/infiniband/ulp/rtrs/rtrs-srv.c4
-rw-r--r--drivers/infiniband/ulp/rtrs/rtrs.h13
-rw-r--r--drivers/lightnvm/Kconfig4
-rw-r--r--drivers/lightnvm/core.c4
-rw-r--r--drivers/md/bcache/alloc.c5
-rw-r--r--drivers/md/bcache/bcache.h11
-rw-r--r--drivers/md/bcache/btree.c4
-rw-r--r--drivers/md/bcache/debug.c2
-rw-r--r--drivers/md/bcache/extents.c4
-rw-r--r--drivers/md/bcache/features.c2
-rw-r--r--drivers/md/bcache/io.c4
-rw-r--r--drivers/md/bcache/journal.c6
-rw-r--r--drivers/md/bcache/super.c25
-rw-r--r--drivers/md/bcache/util.h2
-rw-r--r--drivers/md/bcache/writeback.c11
-rw-r--r--drivers/md/md-bitmap.c2
-rw-r--r--drivers/md/md.c206
-rw-r--r--drivers/md/md.h2
-rw-r--r--drivers/md/raid0.c14
-rw-r--r--drivers/md/raid1.c2
-rw-r--r--drivers/md/raid10.c434
-rw-r--r--drivers/md/raid10.h1
-rw-r--r--drivers/nvme/host/Makefile2
-rw-r--r--drivers/nvme/host/core.c1076
-rw-r--r--drivers/nvme/host/fabrics.c4
-rw-r--r--drivers/nvme/host/fc.c14
-rw-r--r--drivers/nvme/host/ioctl.c481
-rw-r--r--drivers/nvme/host/lightnvm.c10
-rw-r--r--drivers/nvme/host/multipath.c114
-rw-r--r--drivers/nvme/host/nvme.h64
-rw-r--r--drivers/nvme/host/pci.c30
-rw-r--r--drivers/nvme/host/rdma.c7
-rw-r--r--drivers/nvme/host/tcp.c16
-rw-r--r--drivers/nvme/host/zns.c4
-rw-r--r--drivers/nvme/target/admin-cmd.c14
-rw-r--r--drivers/nvme/target/configfs.c6
-rw-r--r--drivers/nvme/target/core.c33
-rw-r--r--drivers/nvme/target/discovery.c6
-rw-r--r--drivers/nvme/target/fabrics-cmd.c17
-rw-r--r--drivers/nvme/target/fc.c78
-rw-r--r--drivers/nvme/target/loop.c6
-rw-r--r--drivers/nvme/target/nvmet.h8
-rw-r--r--drivers/nvme/target/tcp.c79
-rw-r--r--drivers/s390/block/dasd.c17
-rw-r--r--drivers/s390/block/dasd_devmap.c15
-rw-r--r--drivers/s390/block/dasd_eckd.c1
-rw-r--r--drivers/s390/block/dasd_fba.c10
-rw-r--r--drivers/s390/block/dasd_int.h3
81 files changed, 2236 insertions, 3994 deletions
diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig
index fd236158f32d..63056cfd4b62 100644
--- a/drivers/block/Kconfig
+++ b/drivers/block/Kconfig
@@ -50,7 +50,7 @@ config MAC_FLOPPY
config BLK_DEV_SWIM
tristate "Support for SWIM Macintosh floppy"
- depends on M68K && MAC
+ depends on M68K && MAC && !HIGHMEM
help
You should select this option if you want floppy support
and you don't have a II, IIfx, Q900, Q950 or AV series.
@@ -121,23 +121,6 @@ source "drivers/block/mtip32xx/Kconfig"
source "drivers/block/zram/Kconfig"
-config BLK_DEV_UMEM
- tristate "Micro Memory MM5415 Battery Backed RAM support"
- depends on PCI
- help
- Saying Y here will include support for the MM5415 family of
- battery backed (Non-volatile) RAM cards.
- <http://www.umem.com/>
-
- The cards appear as block devices that can be partitioned into
- as many as 15 partitions.
-
- To compile this driver as a module, choose M here: the
- module will be called umem.
-
- The umem driver has not yet been allocated a MAJOR number, so
- one is chosen dynamically.
-
config BLK_DEV_UBD
bool "Virtual block device"
depends on UML
@@ -378,12 +361,6 @@ config SUNVDC
source "drivers/s390/block/Kconfig"
-config XILINX_SYSACE
- tristate "Xilinx SystemACE support"
- depends on 4xx || MICROBLAZE
- help
- Include support for the Xilinx SystemACE CompactFlash interface
-
config XEN_BLKDEV_FRONTEND
tristate "Xen virtual block device support"
depends on XEN
diff --git a/drivers/block/Makefile b/drivers/block/Makefile
index e3e3f1c79a82..bc68817ef496 100644
--- a/drivers/block/Makefile
+++ b/drivers/block/Makefile
@@ -20,11 +20,9 @@ obj-$(CONFIG_AMIGA_Z2RAM) += z2ram.o
obj-$(CONFIG_N64CART) += n64cart.o
obj-$(CONFIG_BLK_DEV_RAM) += brd.o
obj-$(CONFIG_BLK_DEV_LOOP) += loop.o
-obj-$(CONFIG_XILINX_SYSACE) += xsysace.o
obj-$(CONFIG_CDROM_PKTCDVD) += pktcdvd.o
obj-$(CONFIG_SUNVDC) += sunvdc.o
-obj-$(CONFIG_BLK_DEV_UMEM) += umem.o
obj-$(CONFIG_BLK_DEV_NBD) += nbd.o
obj-$(CONFIG_BLK_DEV_CRYPTOLOOP) += cryptoloop.o
obj-$(CONFIG_VIRTIO_BLK) += virtio_blk.o
diff --git a/drivers/block/ataflop.c b/drivers/block/ataflop.c
index 104b713f4055..d601e49f80e0 100644
--- a/drivers/block/ataflop.c
+++ b/drivers/block/ataflop.c
@@ -729,8 +729,12 @@ static int do_format(int drive, int type, struct atari_format_descr *desc)
unsigned long flags;
int ret;
- if (type)
+ if (type) {
type--;
+ if (type >= NUM_DISK_MINORS ||
+ minor2disktype[type].drive_types > DriveType)
+ return -EINVAL;
+ }
q = unit[drive].disk[type]->queue;
blk_mq_freeze_queue(q);
@@ -742,11 +746,6 @@ static int do_format(int drive, int type, struct atari_format_descr *desc)
local_irq_restore(flags);
if (type) {
- if (type >= NUM_DISK_MINORS ||
- minor2disktype[type].drive_types > DriveType) {
- ret = -EINVAL;
- goto out;
- }
type = minor2disktype[type].index;
UDT = &atari_disk_type[type];
}
@@ -2002,7 +2001,10 @@ static void ataflop_probe(dev_t dev)
int drive = MINOR(dev) & 3;
int type = MINOR(dev) >> 2;
- if (drive >= FD_MAX_UNITS || type > NUM_DISK_MINORS)
+ if (type)
+ type--;
+
+ if (drive >= FD_MAX_UNITS || type >= NUM_DISK_MINORS)
return;
mutex_lock(&ataflop_probe_lock);
if (!unit[drive].disk[type]) {
diff --git a/drivers/block/brd.c b/drivers/block/brd.c
index 18bf99906662..6e622c1327ee 100644
--- a/drivers/block/brd.c
+++ b/drivers/block/brd.c
@@ -22,6 +22,7 @@
#include <linux/fs.h>
#include <linux/slab.h>
#include <linux/backing-dev.h>
+#include <linux/debugfs.h>
#include <linux/uaccess.h>
@@ -48,6 +49,7 @@ struct brd_device {
*/
spinlock_t brd_lock;
struct radix_tree_root brd_pages;
+ u64 brd_nr_pages;
};
/*
@@ -116,6 +118,8 @@ static struct page *brd_insert_page(struct brd_device *brd, sector_t sector)
page = radix_tree_lookup(&brd->brd_pages, idx);
BUG_ON(!page);
BUG_ON(page->index != idx);
+ } else {
+ brd->brd_nr_pages++;
}
spin_unlock(&brd->brd_lock);
@@ -365,11 +369,13 @@ __setup("ramdisk_size=", ramdisk_size);
*/
static LIST_HEAD(brd_devices);
static DEFINE_MUTEX(brd_devices_mutex);
+static struct dentry *brd_debugfs_dir;
static struct brd_device *brd_alloc(int i)
{
struct brd_device *brd;
struct gendisk *disk;
+ char buf[DISK_NAME_LEN];
brd = kzalloc(sizeof(*brd), GFP_KERNEL);
if (!brd)
@@ -382,6 +388,11 @@ static struct brd_device *brd_alloc(int i)
if (!brd->brd_queue)
goto out_free_dev;
+ snprintf(buf, DISK_NAME_LEN, "ram%d", i);
+ if (!IS_ERR_OR_NULL(brd_debugfs_dir))
+ debugfs_create_u64(buf, 0444, brd_debugfs_dir,
+ &brd->brd_nr_pages);
+
/* This is so fdisk will align partitions on 4k, because of
* direct_access API needing 4k alignment, returning a PFN
* (This is only a problem on very small devices <= 4M,
@@ -397,7 +408,7 @@ static struct brd_device *brd_alloc(int i)
disk->fops = &brd_fops;
disk->private_data = brd;
disk->flags = GENHD_FL_EXT_DEVT;
- sprintf(disk->disk_name, "ram%d", i);
+ strlcpy(disk->disk_name, buf, DISK_NAME_LEN);
set_capacity(disk, rd_size * 2);
/* Tell the block layer that this is not a rotational device */
@@ -495,6 +506,8 @@ static int __init brd_init(void)
brd_check_and_reset_par();
+ brd_debugfs_dir = debugfs_create_dir("ramdisk_pages", NULL);
+
mutex_lock(&brd_devices_mutex);
for (i = 0; i < rd_nr; i++) {
brd = brd_alloc(i);
@@ -519,6 +532,8 @@ static int __init brd_init(void)
return 0;
out_free:
+ debugfs_remove_recursive(brd_debugfs_dir);
+
list_for_each_entry_safe(brd, next, &brd_devices, brd_list) {
list_del(&brd->brd_list);
brd_free(brd);
@@ -534,6 +549,8 @@ static void __exit brd_exit(void)
{
struct brd_device *brd, *next;
+ debugfs_remove_recursive(brd_debugfs_dir);
+
list_for_each_entry_safe(brd, next, &brd_devices, brd_list)
brd_del_one(brd);
diff --git a/drivers/block/drbd/drbd_interval.c b/drivers/block/drbd/drbd_interval.c
index 651bd0236a99..f07b4378388b 100644
--- a/drivers/block/drbd/drbd_interval.c
+++ b/drivers/block/drbd/drbd_interval.c
@@ -3,7 +3,7 @@
#include <linux/rbtree_augmented.h>
#include "drbd_interval.h"
-/**
+/*
* interval_end - return end of @node
*/
static inline
@@ -18,7 +18,7 @@ sector_t interval_end(struct rb_node *node)
RB_DECLARE_CALLBACKS_MAX(static, augment_callbacks,
struct drbd_interval, rb, sector_t, end, NODE_END);
-/**
+/*
* drbd_insert_interval - insert a new interval into a tree
*/
bool
@@ -56,6 +56,7 @@ drbd_insert_interval(struct rb_root *root, struct drbd_interval *this)
/**
* drbd_contains_interval - check if a tree contains a given interval
+ * @root: red black tree root
* @sector: start sector of @interval
* @interval: may not be a valid pointer
*
@@ -88,7 +89,7 @@ drbd_contains_interval(struct rb_root *root, sector_t sector,
return false;
}
-/**
+/*
* drbd_remove_interval - remove an interval from a tree
*/
void
@@ -99,6 +100,7 @@ drbd_remove_interval(struct rb_root *root, struct drbd_interval *this)
/**
* drbd_find_overlap - search for an interval overlapping with [sector, sector + size)
+ * @root: red black tree root
* @sector: start sector
* @size: size, aligned to 512 bytes
*
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index 25cd8a2f729d..de463773b530 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -125,7 +125,7 @@ struct bio_set drbd_io_bio_set;
member of struct page.
*/
struct page *drbd_pp_pool;
-spinlock_t drbd_pp_lock;
+DEFINE_SPINLOCK(drbd_pp_lock);
int drbd_pp_vacant;
wait_queue_head_t drbd_pp_wait;
@@ -268,7 +268,7 @@ void tl_restart(struct drbd_connection *connection, enum drbd_req_event what)
/**
* tl_clear() - Clears all requests and &struct drbd_tl_epoch objects out of the TL
- * @device: DRBD device.
+ * @connection: DRBD connection.
*
* This is called after the connection to the peer was lost. The storage covered
* by the requests on the transfer gets marked as our of sync. Called from the
@@ -479,7 +479,7 @@ int conn_lowest_minor(struct drbd_connection *connection)
}
#ifdef CONFIG_SMP
-/**
+/*
* drbd_calc_cpu_mask() - Generate CPU masks, spread over all CPUs
*
* Forces all threads of a resource onto the same CPU. This is beneficial for
@@ -518,7 +518,6 @@ static void drbd_calc_cpu_mask(cpumask_var_t *cpu_mask)
/**
* drbd_thread_current_set_cpu() - modifies the cpu mask of the _current_ thread
- * @device: DRBD device.
* @thi: drbd_thread object
*
* call in the "main loop" of _all_ threads, no need for any mutex, current won't die
@@ -538,7 +537,7 @@ void drbd_thread_current_set_cpu(struct drbd_thread *thi)
#define drbd_calc_cpu_mask(A) ({})
#endif
-/**
+/*
* drbd_header_size - size of a packet header
*
* The header size is a multiple of 8, so any payload following the header is
@@ -1193,7 +1192,7 @@ static int fill_bitmap_rle_bits(struct drbd_device *device,
return len;
}
-/**
+/*
* send_bitmap_rle_or_plain
*
* Return 0 when done, 1 when another iteration is needed, and a negative error
@@ -1324,11 +1323,11 @@ void drbd_send_b_ack(struct drbd_connection *connection, u32 barrier_nr, u32 set
/**
* _drbd_send_ack() - Sends an ack packet
- * @device: DRBD device.
- * @cmd: Packet command code.
- * @sector: sector, needs to be in big endian byte order
- * @blksize: size in byte, needs to be in big endian byte order
- * @block_id: Id, big endian byte order
+ * @peer_device: DRBD peer device.
+ * @cmd: Packet command code.
+ * @sector: sector, needs to be in big endian byte order
+ * @blksize: size in byte, needs to be in big endian byte order
+ * @block_id: Id, big endian byte order
*/
static int _drbd_send_ack(struct drbd_peer_device *peer_device, enum drbd_packet cmd,
u64 sector, u32 blksize, u64 block_id)
@@ -1370,9 +1369,9 @@ void drbd_send_ack_rp(struct drbd_peer_device *peer_device, enum drbd_packet cmd
/**
* drbd_send_ack() - Sends an ack packet
- * @device: DRBD device
- * @cmd: packet command code
- * @peer_req: peer request
+ * @peer_device: DRBD peer device
+ * @cmd: packet command code
+ * @peer_req: peer request
*/
int drbd_send_ack(struct drbd_peer_device *peer_device, enum drbd_packet cmd,
struct drbd_peer_request *peer_req)
@@ -1882,7 +1881,7 @@ int drbd_send(struct drbd_connection *connection, struct socket *sock,
return sent;
}
-/**
+/*
* drbd_send_all - Send an entire buffer
*
* Returns 0 upon success and a negative error value otherwise.
@@ -2161,9 +2160,6 @@ static int drbd_create_mempools(void)
if (ret)
goto Enomem;
- /* drbd's page pool */
- spin_lock_init(&drbd_pp_lock);
-
for (i = 0; i < number; i++) {
page = alloc_page(GFP_HIGHUSER);
if (!page)
@@ -3509,6 +3505,7 @@ static int w_bitmap_io(struct drbd_work *w, int unused)
* @io_fn: IO callback to be called when bitmap IO is possible
* @done: callback to be called after the bitmap IO was performed
* @why: Descriptive text of the reason for doing the IO
+ * @flags: Bitmap flags
*
* While IO on the bitmap happens we freeze application IO thus we ensure
* that drbd_set_out_of_sync() can not be called. This function MAY ONLY be
@@ -3554,6 +3551,7 @@ void drbd_queue_bitmap_io(struct drbd_device *device,
* @device: DRBD device.
* @io_fn: IO callback to be called when bitmap IO is possible
* @why: Descriptive text of the reason for doing the IO
+ * @flags: Bitmap flags
*
* freezes application IO while that the actual IO operations runs. This
* functions MAY NOT be called from worker context.
@@ -3657,7 +3655,6 @@ const char *cmdname(enum drbd_packet cmd)
[P_RS_CANCEL] = "RSCancel",
[P_CONN_ST_CHG_REQ] = "conn_st_chg_req",
[P_CONN_ST_CHG_REPLY] = "conn_st_chg_reply",
- [P_RETRY_WRITE] = "retry_write",
[P_PROTOCOL_UPDATE] = "protocol_update",
[P_RS_THIN_REQ] = "rs_thin_req",
[P_RS_DEALLOCATED] = "rs_deallocated",
diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c
index bf7de4c7b96c..e7d0e637e632 100644
--- a/drivers/block/drbd/drbd_nl.c
+++ b/drivers/block/drbd/drbd_nl.c
@@ -790,9 +790,11 @@ int drbd_adm_set_role(struct sk_buff *skb, struct genl_info *info)
mutex_lock(&adm_ctx.resource->adm_mutex);
if (info->genlhdr->cmd == DRBD_ADM_PRIMARY)
- retcode = drbd_set_role(adm_ctx.device, R_PRIMARY, parms.assume_uptodate);
+ retcode = (enum drbd_ret_code)drbd_set_role(adm_ctx.device,
+ R_PRIMARY, parms.assume_uptodate);
else
- retcode = drbd_set_role(adm_ctx.device, R_SECONDARY, 0);
+ retcode = (enum drbd_ret_code)drbd_set_role(adm_ctx.device,
+ R_SECONDARY, 0);
mutex_unlock(&adm_ctx.resource->adm_mutex);
genl_lock();
@@ -916,7 +918,7 @@ void drbd_resume_io(struct drbd_device *device)
wake_up(&device->misc_wait);
}
-/**
+/*
* drbd_determine_dev_size() - Sets the right device size obeying all constraints
* @device: DRBD device.
*
@@ -1134,7 +1136,7 @@ drbd_new_dev_size(struct drbd_device *device, struct drbd_backing_dev *bdev,
return size;
}
-/**
+/*
* drbd_check_al_size() - Ensures that the AL is of the right size
* @device: DRBD device.
*
@@ -1962,7 +1964,7 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info)
drbd_flush_workqueue(&connection->sender_work);
rv = _drbd_request_state(device, NS(disk, D_ATTACHING), CS_VERBOSE);
- retcode = rv; /* FIXME: Type mismatch. */
+ retcode = (enum drbd_ret_code)rv;
drbd_resume_io(device);
if (rv < SS_SUCCESS)
goto fail;
@@ -2687,7 +2689,8 @@ int drbd_adm_connect(struct sk_buff *skb, struct genl_info *info)
}
rcu_read_unlock();
- retcode = conn_request_state(connection, NS(conn, C_UNCONNECTED), CS_VERBOSE);
+ retcode = (enum drbd_ret_code)conn_request_state(connection,
+ NS(conn, C_UNCONNECTED), CS_VERBOSE);
conn_reconfig_done(connection);
mutex_unlock(&adm_ctx.resource->adm_mutex);
@@ -2800,7 +2803,7 @@ int drbd_adm_disconnect(struct sk_buff *skb, struct genl_info *info)
mutex_lock(&adm_ctx.resource->adm_mutex);
rv = conn_try_disconnect(connection, parms.force_disconnect);
if (rv < SS_SUCCESS)
- retcode = rv; /* FIXME: Type mismatch. */
+ retcode = (enum drbd_ret_code)rv;
else
retcode = NO_ERROR;
mutex_unlock(&adm_ctx.resource->adm_mutex);
diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c
index c3f09a122f20..69284ebba786 100644
--- a/drivers/block/drbd/drbd_receiver.c
+++ b/drivers/block/drbd/drbd_receiver.c
@@ -242,9 +242,9 @@ static void conn_reclaim_net_peer_reqs(struct drbd_connection *connection)
/**
* drbd_alloc_pages() - Returns @number pages, retries forever (or until signalled)
- * @device: DRBD device.
- * @number: number of pages requested
- * @retry: whether to retry, if not enough pages are available right now
+ * @peer_device: DRBD device.
+ * @number: number of pages requested
+ * @retry: whether to retry, if not enough pages are available right now
*
* Tries to allocate number pages, first from our own page pool, then from
* the kernel.
@@ -1352,7 +1352,7 @@ static void drbd_flush(struct drbd_connection *connection)
/**
* drbd_may_finish_epoch() - Applies an epoch_event to the epoch's state, eventually finishes it.
- * @device: DRBD device.
+ * @connection: DRBD connection.
* @epoch: Epoch object.
* @ev: Epoch event.
*/
@@ -1441,9 +1441,8 @@ max_allowed_wo(struct drbd_backing_dev *bdev, enum write_ordering_e wo)
return wo;
}
-/**
+/*
* drbd_bump_write_ordering() - Fall back to an other write ordering method
- * @connection: DRBD connection.
* @wo: Write ordering method to try.
*/
void drbd_bump_write_ordering(struct drbd_resource *resource, struct drbd_backing_dev *bdev,
@@ -1619,11 +1618,10 @@ static void drbd_issue_peer_wsame(struct drbd_device *device,
}
-/**
+/*
* drbd_submit_peer_request()
* @device: DRBD device.
* @peer_req: peer request
- * @rw: flag field, see bio->bi_opf
*
* May spread the pages to multiple bios,
* depending on bio_add_page restrictions.
@@ -3048,7 +3046,7 @@ out_free_e:
return -EIO;
}
-/**
+/*
* drbd_asb_recover_0p - Recover after split-brain with no remaining primaries
*/
static int drbd_asb_recover_0p(struct drbd_peer_device *peer_device) __must_hold(local)
@@ -3131,7 +3129,7 @@ static int drbd_asb_recover_0p(struct drbd_peer_device *peer_device) __must_hold
return rv;
}
-/**
+/*
* drbd_asb_recover_1p - Recover after split-brain with one remaining primary
*/
static int drbd_asb_recover_1p(struct drbd_peer_device *peer_device) __must_hold(local)
@@ -3188,7 +3186,7 @@ static int drbd_asb_recover_1p(struct drbd_peer_device *peer_device) __must_hold
return rv;
}
-/**
+/*
* drbd_asb_recover_2p - Recover after split-brain with two remaining primaries
*/
static int drbd_asb_recover_2p(struct drbd_peer_device *peer_device) __must_hold(local)
@@ -4672,7 +4670,7 @@ static int receive_sync_uuid(struct drbd_connection *connection, struct packet_i
return 0;
}
-/**
+/*
* receive_bitmap_plain
*
* Return 0 when done, 1 when another iteration is needed, and a negative error
@@ -4724,7 +4722,7 @@ static int dcbp_get_pad_bits(struct p_compressed_bm *p)
return (p->encoding >> 4) & 0x7;
}
-/**
+/*
* recv_bm_rle_bits
*
* Return 0 when done, 1 when another iteration is needed, and a negative error
@@ -4793,7 +4791,7 @@ recv_bm_rle_bits(struct drbd_peer_device *peer_device,
return (s != c->bm_bits);
}
-/**
+/*
* decode_bitmap_c
*
* Return 0 when done, 1 when another iteration is needed, and a negative error
@@ -5865,6 +5863,7 @@ static int got_NegRSDReply(struct drbd_connection *connection, struct packet_inf
switch (pi->cmd) {
case P_NEG_RS_DREPLY:
drbd_rs_failed_io(device, sector, size);
+ break;
case P_RS_CANCEL:
break;
default:
diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c
index 9398c2c2cb2d..13beb98a7c5a 100644
--- a/drivers/block/drbd/drbd_req.c
+++ b/drivers/block/drbd/drbd_req.c
@@ -753,6 +753,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what,
case WRITE_ACKED_BY_PEER_AND_SIS:
req->rq_state |= RQ_NET_SIS;
+ fallthrough;
case WRITE_ACKED_BY_PEER:
/* Normal operation protocol C: successfully written on peer.
* During resync, even in protocol != C,
diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c
index 0067d328f0b5..b8a27818ab3f 100644
--- a/drivers/block/drbd/drbd_state.c
+++ b/drivers/block/drbd/drbd_state.c
@@ -904,9 +904,9 @@ out:
* is_valid_soft_transition() - Returns an SS_ error code if the state transition is not possible
* This function limits state transitions that may be declined by DRBD. I.e.
* user requests (aka soft transitions).
- * @device: DRBD device.
- * @ns: new state.
* @os: old state.
+ * @ns: new state.
+ * @connection: DRBD connection.
*/
static enum drbd_state_rv
is_valid_soft_transition(union drbd_state os, union drbd_state ns, struct drbd_connection *connection)
@@ -1044,7 +1044,7 @@ static void print_sanitize_warnings(struct drbd_device *device, enum sanitize_st
* @device: DRBD device.
* @os: old state.
* @ns: new state.
- * @warn_sync_abort:
+ * @warn: placeholder for returned state warning.
*
* When we loose connection, we have to set the state of the peers disk (pdsk)
* to D_UNKNOWN. This rule and many more along those lines are in this function.
@@ -1696,6 +1696,7 @@ static bool lost_contact_to_peer_data(enum drbd_disk_state os, enum drbd_disk_st
* @os: old state.
* @ns: new state.
* @flags: Flags
+ * @state_change: state change to broadcast
*/
static void after_state_ch(struct drbd_device *device, union drbd_state os,
union drbd_state ns, enum chg_state_flags flags,
diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c
index 4aa9683ee0c1..8a9d22207c59 100644
--- a/drivers/block/floppy.c
+++ b/drivers/block/floppy.c
@@ -145,8 +145,6 @@
* Better audit of register_blkdev.
*/
-#undef FLOPPY_SILENT_DCL_CLEAR
-
#define REALLY_SLOW_IO
#define DEBUGT 2
@@ -2399,11 +2397,10 @@ static void rw_interrupt(void)
probing = 0;
}
- if (CT(raw_cmd->cmd[COMMAND]) != FD_READ ||
- raw_cmd->kernel_data == bio_data(current_req->bio)) {
+ if (CT(raw_cmd->cmd[COMMAND]) != FD_READ) {
/* transfer directly from buffer */
cont->done(1);
- } else if (CT(raw_cmd->cmd[COMMAND]) == FD_READ) {
+ } else {
buffer_track = raw_cmd->track;
buffer_drive = current_drive;
INFBOUND(buffer_max, nr_sectors + fsector_t);
@@ -2411,27 +2408,6 @@ static void rw_interrupt(void)
cont->redo();
}
-/* Compute maximal contiguous buffer size. */
-static int buffer_chain_size(void)
-{
- struct bio_vec bv;
- int size;
- struct req_iterator iter;
- char *base;
-
- base = bio_data(current_req->bio);
- size = 0;
-
- rq_for_each_segment(bv, current_req, iter) {
- if (page_address(bv.bv_page) + bv.bv_offset != base + size)
- break;
-
- size += bv.bv_len;
- }
-
- return size >> 9;
-}
-
/* Compute the maximal transfer size */
static int transfer_size(int ssize, int max_sector, int max_size)
{
@@ -2453,7 +2429,6 @@ static void copy_buffer(int ssize, int max_sector, int max_sector_2)
{
int remaining; /* number of transferred 512-byte sectors */
struct bio_vec bv;
- char *buffer;
char *dma_buffer;
int size;
struct req_iterator iter;
@@ -2492,8 +2467,6 @@ static void copy_buffer(int ssize, int max_sector, int max_sector_2)
size = bv.bv_len;
SUPBOUND(size, remaining);
-
- buffer = page_address(bv.bv_page) + bv.bv_offset;
if (dma_buffer + size >
floppy_track_buffer + (max_buffer_sectors << 10) ||
dma_buffer < floppy_track_buffer) {
@@ -2509,13 +2482,13 @@ static void copy_buffer(int ssize, int max_sector, int max_sector_2)
pr_info("write\n");
break;
}
- if (((unsigned long)buffer) % 512)
- DPRINT("%p buffer not aligned\n", buffer);
if (CT(raw_cmd->cmd[COMMAND]) == FD_READ)
- memcpy(buffer, dma_buffer, size);
+ memcpy_to_page(bv.bv_page, bv.bv_offset, dma_buffer,
+ size);
else
- memcpy(dma_buffer, buffer, size);
+ memcpy_from_page(dma_buffer, bv.bv_page, bv.bv_offset,
+ size);
remaining -= size;
dma_buffer += size;
@@ -2690,54 +2663,6 @@ static int make_raw_rw_request(void)
raw_cmd->flags &= ~FD_RAW_WRITE;
raw_cmd->flags |= FD_RAW_READ;
raw_cmd->cmd[COMMAND] = FM_MODE(_floppy, FD_READ);
- } else if ((unsigned long)bio_data(current_req->bio) < MAX_DMA_ADDRESS) {
- unsigned long dma_limit;
- int direct, indirect;
-
- indirect =
- transfer_size(ssize, max_sector,
- max_buffer_sectors * 2) - fsector_t;
-
- /*
- * Do NOT use minimum() here---MAX_DMA_ADDRESS is 64 bits wide
- * on a 64 bit machine!
- */
- max_size = buffer_chain_size();
- dma_limit = (MAX_DMA_ADDRESS -
- ((unsigned long)bio_data(current_req->bio))) >> 9;
- if ((unsigned long)max_size > dma_limit)
- max_size = dma_limit;
- /* 64 kb boundaries */
- if (CROSS_64KB(bio_data(current_req->bio), max_size << 9))
- max_size = (K_64 -
- ((unsigned long)bio_data(current_req->bio)) %
- K_64) >> 9;
- direct = transfer_size(ssize, max_sector, max_size) - fsector_t;
- /*
- * We try to read tracks, but if we get too many errors, we
- * go back to reading just one sector at a time.
- *
- * This means we should be able to read a sector even if there
- * are other bad sectors on this track.
- */
- if (!direct ||
- (indirect * 2 > direct * 3 &&
- *errors < drive_params[current_drive].max_errors.read_track &&
- ((!probing ||
- (drive_params[current_drive].read_track & (1 << drive_state[current_drive].probed_format)))))) {
- max_size = blk_rq_sectors(current_req);
- } else {
- raw_cmd->kernel_data = bio_data(current_req->bio);
- raw_cmd->length = current_count_sectors << 9;
- if (raw_cmd->length == 0) {
- DPRINT("%s: zero dma transfer attempted\n", __func__);
- DPRINT("indirect=%d direct=%d fsector_t=%d\n",
- indirect, direct, fsector_t);
- return 0;
- }
- virtualdmabug_workaround();
- return 2;
- }
}
if (CT(raw_cmd->cmd[COMMAND]) == FD_READ)
@@ -2781,19 +2706,17 @@ static int make_raw_rw_request(void)
raw_cmd->length = ((raw_cmd->length - 1) | (ssize - 1)) + 1;
raw_cmd->length <<= 9;
if ((raw_cmd->length < current_count_sectors << 9) ||
- (raw_cmd->kernel_data != bio_data(current_req->bio) &&
- CT(raw_cmd->cmd[COMMAND]) == FD_WRITE &&
+ (CT(raw_cmd->cmd[COMMAND]) == FD_WRITE &&
(aligned_sector_t + (raw_cmd->length >> 9) > buffer_max ||
aligned_sector_t < buffer_min)) ||
raw_cmd->length % (128 << raw_cmd->cmd[SIZECODE]) ||
raw_cmd->length <= 0 || current_count_sectors <= 0) {
DPRINT("fractionary current count b=%lx s=%lx\n",
raw_cmd->length, current_count_sectors);
- if (raw_cmd->kernel_data != bio_data(current_req->bio))
- pr_info("addr=%d, length=%ld\n",
- (int)((raw_cmd->kernel_data -
- floppy_track_buffer) >> 9),
- current_count_sectors);
+ pr_info("addr=%d, length=%ld\n",
+ (int)((raw_cmd->kernel_data -
+ floppy_track_buffer) >> 9),
+ current_count_sectors);
pr_info("st=%d ast=%d mse=%d msi=%d\n",
fsector_t, aligned_sector_t, max_sector, max_size);
pr_info("ssize=%x SIZECODE=%d\n", ssize, raw_cmd->cmd[SIZECODE]);
@@ -2807,31 +2730,21 @@ static int make_raw_rw_request(void)
return 0;
}
- if (raw_cmd->kernel_data != bio_data(current_req->bio)) {
- if (raw_cmd->kernel_data < floppy_track_buffer ||
- current_count_sectors < 0 ||
- raw_cmd->length < 0 ||
- raw_cmd->kernel_data + raw_cmd->length >
- floppy_track_buffer + (max_buffer_sectors << 10)) {
- DPRINT("buffer overrun in schedule dma\n");
- pr_info("fsector_t=%d buffer_min=%d current_count=%ld\n",
- fsector_t, buffer_min, raw_cmd->length >> 9);
- pr_info("current_count_sectors=%ld\n",
- current_count_sectors);
- if (CT(raw_cmd->cmd[COMMAND]) == FD_READ)
- pr_info("read\n");
- if (CT(raw_cmd->cmd[COMMAND]) == FD_WRITE)
- pr_info("write\n");
- return 0;
- }
- } else if (raw_cmd->length > blk_rq_bytes(current_req) ||
- current_count_sectors > blk_rq_sectors(current_req)) {
- DPRINT("buffer overrun in direct transfer\n");
+ if (raw_cmd->kernel_data < floppy_track_buffer ||
+ current_count_sectors < 0 ||
+ raw_cmd->length < 0 ||
+ raw_cmd->kernel_data + raw_cmd->length >
+ floppy_track_buffer + (max_buffer_sectors << 10)) {
+ DPRINT("buffer overrun in schedule dma\n");
+ pr_info("fsector_t=%d buffer_min=%d current_count=%ld\n",
+ fsector_t, buffer_min, raw_cmd->length >> 9);
+ pr_info("current_count_sectors=%ld\n",
+ current_count_sectors);
+ if (CT(raw_cmd->cmd[COMMAND]) == FD_READ)
+ pr_info("read\n");
+ if (CT(raw_cmd->cmd[COMMAND]) == FD_WRITE)
+ pr_info("write\n");
return 0;
- } else if (raw_cmd->length < current_count_sectors << 9) {
- DPRINT("more sectors than bytes\n");
- pr_info("bytes=%ld\n", raw_cmd->length >> 9);
- pr_info("sectors=%ld\n", current_count_sectors);
}
if (raw_cmd->length == 0) {
DPRINT("zero dma transfer attempted from make_raw_request\n");
@@ -3073,8 +2986,6 @@ static const char *drive_name(int type, int drive)
/* raw commands */
static void raw_cmd_done(int flag)
{
- int i;
-
if (!flag) {
raw_cmd->flags |= FD_RAW_FAILURE;
raw_cmd->flags |= FD_RAW_HARDFAILURE;
@@ -3082,8 +2993,7 @@ static void raw_cmd_done(int flag)
raw_cmd->reply_count = inr;
if (raw_cmd->reply_count > FD_RAW_REPLY_SIZE)
raw_cmd->reply_count = 0;
- for (i = 0; i < raw_cmd->reply_count; i++)
- raw_cmd->reply[i] = reply_buffer[i];
+ memcpy(raw_cmd->reply, reply_buffer, raw_cmd->reply_count);
if (raw_cmd->flags & (FD_RAW_READ | FD_RAW_WRITE)) {
unsigned long flags;
@@ -3175,7 +3085,6 @@ static int raw_cmd_copyin(int cmd, void __user *param,
{
struct floppy_raw_cmd *ptr;
int ret;
- int i;
*rcmd = NULL;
@@ -3194,8 +3103,7 @@ loop:
if (ptr->cmd_count > FD_RAW_CMD_FULLSIZE)
return -EINVAL;
- for (i = 0; i < FD_RAW_REPLY_SIZE; i++)
- ptr->reply[i] = 0;
+ memset(ptr->reply, 0, FD_RAW_REPLY_SIZE);
ptr->resultcode = 0;
if (ptr->flags & (FD_RAW_READ | FD_RAW_WRITE)) {
@@ -4317,7 +4225,7 @@ static char __init get_fdc_version(int fdc)
r = result(fdc);
if (r <= 0x00)
return FDC_NONE; /* No FDC present ??? */
- if ((r == 1) && (reply_buffer[0] == 0x80)) {
+ if ((r == 1) && (reply_buffer[ST0] == 0x80)) {
pr_info("FDC %d is an 8272A\n", fdc);
return FDC_8272A; /* 8272a/765 don't know DUMPREGS */
}
@@ -4342,12 +4250,12 @@ static char __init get_fdc_version(int fdc)
output_byte(fdc, FD_UNLOCK);
r = result(fdc);
- if ((r == 1) && (reply_buffer[0] == 0x80)) {
+ if ((r == 1) && (reply_buffer[ST0] == 0x80)) {
pr_info("FDC %d is a pre-1991 82077\n", fdc);
return FDC_82077_ORIG; /* Pre-1991 82077, doesn't know
* LOCK/UNLOCK */
}
- if ((r != 1) || (reply_buffer[0] != 0x00)) {
+ if ((r != 1) || (reply_buffer[ST0] != 0x00)) {
pr_info("FDC %d init: UNLOCK: unexpected return of %d bytes.\n",
fdc, r);
return FDC_UNKNOWN;
@@ -4359,11 +4267,11 @@ static char __init get_fdc_version(int fdc)
fdc, r);
return FDC_UNKNOWN;
}
- if (reply_buffer[0] == 0x80) {
+ if (reply_buffer[ST0] == 0x80) {
pr_info("FDC %d is a post-1991 82077\n", fdc);
return FDC_82077; /* Revised 82077AA passes all the tests */
}
- switch (reply_buffer[0] >> 5) {
+ switch (reply_buffer[ST0] >> 5) {
case 0x0:
/* Either a 82078-1 or a 82078SL running at 5Volt */
pr_info("FDC %d is an 82078.\n", fdc);
@@ -4379,7 +4287,7 @@ static char __init get_fdc_version(int fdc)
return FDC_87306;
default:
pr_info("FDC %d init: 82078 variant with unknown PARTID=%d.\n",
- fdc, reply_buffer[0] >> 5);
+ fdc, reply_buffer[ST0] >> 5);
return FDC_82078_UNKN;
}
} /* get_fdc_version */
@@ -4597,7 +4505,6 @@ static int floppy_alloc_disk(unsigned int drive, unsigned int type)
return err;
}
- blk_queue_bounce_limit(disk->queue, BLK_BOUNCE_HIGH);
blk_queue_max_hw_sectors(disk->queue, 64);
disk->major = FLOPPY_MAJOR;
disk->first_minor = TOMINOR(drive) | (type << 2);
diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c
index 3be0dbc674bd..589cb0f1e030 100644
--- a/drivers/block/mtip32xx/mtip32xx.c
+++ b/drivers/block/mtip32xx/mtip32xx.c
@@ -95,9 +95,9 @@
/* Device instance number, incremented each time a device is probed. */
static int instance;
-static struct list_head online_list;
-static struct list_head removing_list;
-static spinlock_t dev_lock;
+static LIST_HEAD(online_list);
+static LIST_HEAD(removing_list);
+static DEFINE_SPINLOCK(dev_lock);
/*
* Global variable used to hold the major block device number
@@ -1213,7 +1213,7 @@ static int mtip_standby_immediate(struct mtip_port *port)
{
int rv;
struct host_to_dev_fis fis;
- unsigned long start;
+ unsigned long __maybe_unused start;
unsigned int timeout;
/* Build the FIS. */
@@ -4363,11 +4363,6 @@ static int __init mtip_init(void)
pr_info(MTIP_DRV_NAME " Version " MTIP_DRV_VERSION "\n");
- spin_lock_init(&dev_lock);
-
- INIT_LIST_HEAD(&online_list);
- INIT_LIST_HEAD(&removing_list);
-
/* Allocate a major block device number to use with this driver. */
error = register_blkdev(0, MTIP_DRV_NAME);
if (error <= 0) {
diff --git a/drivers/block/null_blk/main.c b/drivers/block/null_blk/main.c
index 51bfd7737552..5f006d9e1472 100644
--- a/drivers/block/null_blk/main.c
+++ b/drivers/block/null_blk/main.c
@@ -84,6 +84,10 @@ enum {
NULL_Q_MQ = 2,
};
+static bool g_virt_boundary = false;
+module_param_named(virt_boundary, g_virt_boundary, bool, 0444);
+MODULE_PARM_DESC(virt_boundary, "Require a virtual boundary for the device. Default: False");
+
static int g_no_sched;
module_param_named(no_sched, g_no_sched, int, 0444);
MODULE_PARM_DESC(no_sched, "No io scheduler");
@@ -366,6 +370,7 @@ NULLB_DEVICE_ATTR(zone_capacity, ulong, NULL);
NULLB_DEVICE_ATTR(zone_nr_conv, uint, NULL);
NULLB_DEVICE_ATTR(zone_max_open, uint, NULL);
NULLB_DEVICE_ATTR(zone_max_active, uint, NULL);
+NULLB_DEVICE_ATTR(virt_boundary, bool, NULL);
static ssize_t nullb_device_power_show(struct config_item *item, char *page)
{
@@ -486,6 +491,7 @@ static struct configfs_attribute *nullb_device_attrs[] = {
&nullb_device_attr_zone_nr_conv,
&nullb_device_attr_zone_max_open,
&nullb_device_attr_zone_max_active,
+ &nullb_device_attr_virt_boundary,
NULL,
};
@@ -539,7 +545,7 @@ nullb_group_drop_item(struct config_group *group, struct config_item *item)
static ssize_t memb_group_features_show(struct config_item *item, char *page)
{
return snprintf(page, PAGE_SIZE,
- "memory_backed,discard,bandwidth,cache,badblocks,zoned,zone_size,zone_capacity,zone_nr_conv,zone_max_open,zone_max_active,blocksize,max_sectors\n");
+ "memory_backed,discard,bandwidth,cache,badblocks,zoned,zone_size,zone_capacity,zone_nr_conv,zone_max_open,zone_max_active,blocksize,max_sectors,virt_boundary\n");
}
CONFIGFS_ATTR_RO(memb_group_, features);
@@ -605,6 +611,7 @@ static struct nullb_device *null_alloc_dev(void)
dev->zone_nr_conv = g_zone_nr_conv;
dev->zone_max_open = g_zone_max_open;
dev->zone_max_active = g_zone_max_active;
+ dev->virt_boundary = g_virt_boundary;
return dev;
}
@@ -1896,6 +1903,9 @@ static int null_add_dev(struct nullb_device *dev)
BLK_DEF_MAX_SECTORS);
blk_queue_max_hw_sectors(nullb->q, dev->max_sectors);
+ if (dev->virt_boundary)
+ blk_queue_virt_boundary(nullb->q, PAGE_SIZE - 1);
+
null_config_discard(nullb);
sprintf(nullb->disk_name, "nullb%d", nullb->index);
diff --git a/drivers/block/null_blk/null_blk.h b/drivers/block/null_blk/null_blk.h
index 4876d5adb12d..64bef125d1df 100644
--- a/drivers/block/null_blk/null_blk.h
+++ b/drivers/block/null_blk/null_blk.h
@@ -97,6 +97,7 @@ struct nullb_device {
bool memory_backed; /* if data is stored in memory */
bool discard; /* if support discard */
bool zoned; /* if device is zoned */
+ bool virt_boundary; /* virtual boundary on/off for the device */
};
struct nullb {
diff --git a/drivers/block/null_blk/zoned.c b/drivers/block/null_blk/zoned.c
index bfcab1c782b5..dae54dd1aeac 100644
--- a/drivers/block/null_blk/zoned.c
+++ b/drivers/block/null_blk/zoned.c
@@ -180,6 +180,7 @@ int null_register_zoned_dev(struct nullb *nullb)
void null_free_zoned_dev(struct nullb_device *dev)
{
kvfree(dev->zones);
+ dev->zones = NULL;
}
int null_report_zones(struct gendisk *disk, sector_t sector,
diff --git a/drivers/block/paride/pd.c b/drivers/block/paride/pd.c
index 897acda20ac8..828a45ffe0e7 100644
--- a/drivers/block/paride/pd.c
+++ b/drivers/block/paride/pd.c
@@ -859,16 +859,6 @@ static unsigned int pd_check_events(struct gendisk *p, unsigned int clearing)
return r ? DISK_EVENT_MEDIA_CHANGE : 0;
}
-static int pd_revalidate(struct gendisk *p)
-{
- struct pd_unit *disk = p->private_data;
- if (pd_special_command(disk, pd_identify) == 0)
- set_capacity(p, disk->capacity);
- else
- set_capacity(p, 0);
- return 0;
-}
-
static const struct block_device_operations pd_fops = {
.owner = THIS_MODULE,
.open = pd_open,
@@ -877,7 +867,6 @@ static const struct block_device_operations pd_fops = {
.compat_ioctl = pd_ioctl,
.getgeo = pd_getgeo,
.check_events = pd_check_events,
- .revalidate_disk= pd_revalidate
};
/* probing */
diff --git a/drivers/block/rnbd/rnbd-clt-sysfs.c b/drivers/block/rnbd/rnbd-clt-sysfs.c
index d4aa6bfc9555..324afdd63a96 100644
--- a/drivers/block/rnbd/rnbd-clt-sysfs.c
+++ b/drivers/block/rnbd/rnbd-clt-sysfs.c
@@ -34,6 +34,7 @@ enum {
RNBD_OPT_DEV_PATH = 1 << 2,
RNBD_OPT_ACCESS_MODE = 1 << 3,
RNBD_OPT_SESSNAME = 1 << 6,
+ RNBD_OPT_NR_POLL_QUEUES = 1 << 7,
};
static const unsigned int rnbd_opt_mandatory[] = {
@@ -42,12 +43,13 @@ static const unsigned int rnbd_opt_mandatory[] = {
};
static const match_table_t rnbd_opt_tokens = {
- {RNBD_OPT_PATH, "path=%s" },
- {RNBD_OPT_DEV_PATH, "device_path=%s"},
- {RNBD_OPT_DEST_PORT, "dest_port=%d" },
- {RNBD_OPT_ACCESS_MODE, "access_mode=%s"},
- {RNBD_OPT_SESSNAME, "sessname=%s" },
- {RNBD_OPT_ERR, NULL },
+ {RNBD_OPT_PATH, "path=%s" },
+ {RNBD_OPT_DEV_PATH, "device_path=%s" },
+ {RNBD_OPT_DEST_PORT, "dest_port=%d" },
+ {RNBD_OPT_ACCESS_MODE, "access_mode=%s" },
+ {RNBD_OPT_SESSNAME, "sessname=%s" },
+ {RNBD_OPT_NR_POLL_QUEUES, "nr_poll_queues=%d" },
+ {RNBD_OPT_ERR, NULL },
};
struct rnbd_map_options {
@@ -57,6 +59,7 @@ struct rnbd_map_options {
char *pathname;
u16 *dest_port;
enum rnbd_access_mode *access_mode;
+ u32 *nr_poll_queues;
};
static int rnbd_clt_parse_map_options(const char *buf, size_t max_path_cnt,
@@ -68,7 +71,7 @@ static int rnbd_clt_parse_map_options(const char *buf, size_t max_path_cnt,
int opt_mask = 0;
int token;
int ret = -EINVAL;
- int i, dest_port;
+ int i, dest_port, nr_poll_queues;
int p_cnt = 0;
options = kstrdup(buf, GFP_KERNEL);
@@ -96,7 +99,7 @@ static int rnbd_clt_parse_map_options(const char *buf, size_t max_path_cnt,
kfree(p);
goto out;
}
- strlcpy(opt->sessname, p, NAME_MAX);
+ strscpy(opt->sessname, p, NAME_MAX);
kfree(p);
break;
@@ -139,7 +142,7 @@ static int rnbd_clt_parse_map_options(const char *buf, size_t max_path_cnt,
kfree(p);
goto out;
}
- strlcpy(opt->pathname, p, NAME_MAX);
+ strscpy(opt->pathname, p, NAME_MAX);
kfree(p);
break;
@@ -178,6 +181,19 @@ static int rnbd_clt_parse_map_options(const char *buf, size_t max_path_cnt,
kfree(p);
break;
+ case RNBD_OPT_NR_POLL_QUEUES:
+ if (match_int(args, &nr_poll_queues) || nr_poll_queues < -1 ||
+ nr_poll_queues > (int)nr_cpu_ids) {
+ pr_err("bad nr_poll_queues parameter '%d'\n",
+ nr_poll_queues);
+ ret = -EINVAL;
+ goto out;
+ }
+ if (nr_poll_queues == -1)
+ nr_poll_queues = nr_cpu_ids;
+ *opt->nr_poll_queues = nr_poll_queues;
+ break;
+
default:
pr_err("map_device: Unknown parameter or missing value '%s'\n",
p);
@@ -227,6 +243,19 @@ static ssize_t state_show(struct kobject *kobj,
static struct kobj_attribute rnbd_clt_state_attr = __ATTR_RO(state);
+static ssize_t nr_poll_queues_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *page)
+{
+ struct rnbd_clt_dev *dev;
+
+ dev = container_of(kobj, struct rnbd_clt_dev, kobj);
+
+ return sysfs_emit(page, "%d\n", dev->nr_poll_queues);
+}
+
+static struct kobj_attribute rnbd_clt_nr_poll_queues =
+ __ATTR_RO(nr_poll_queues);
+
static ssize_t mapping_path_show(struct kobject *kobj,
struct kobj_attribute *attr, char *page)
{
@@ -421,6 +450,7 @@ static struct attribute *rnbd_dev_attrs[] = {
&rnbd_clt_state_attr.attr,
&rnbd_clt_session_attr.attr,
&rnbd_clt_access_mode.attr,
+ &rnbd_clt_nr_poll_queues.attr,
NULL,
};
@@ -432,10 +462,14 @@ void rnbd_clt_remove_dev_symlink(struct rnbd_clt_dev *dev)
* i.e. rnbd_clt_unmap_dev_store() leading to a sysfs warning because
* of sysfs link already was removed already.
*/
- if (dev->blk_symlink_name && try_module_get(THIS_MODULE)) {
- sysfs_remove_link(rnbd_devs_kobj, dev->blk_symlink_name);
+ if (dev->blk_symlink_name) {
+ if (try_module_get(THIS_MODULE)) {
+ sysfs_remove_link(rnbd_devs_kobj, dev->blk_symlink_name);
+ module_put(THIS_MODULE);
+ }
+ /* It should be freed always. */
kfree(dev->blk_symlink_name);
- module_put(THIS_MODULE);
+ dev->blk_symlink_name = NULL;
}
}
@@ -456,6 +490,7 @@ static int rnbd_clt_add_dev_kobj(struct rnbd_clt_dev *dev)
ret);
kobject_put(&dev->kobj);
}
+ kobject_uevent(gd_kobj, KOBJ_ONLINE);
return ret;
}
@@ -465,7 +500,7 @@ static ssize_t rnbd_clt_map_device_show(struct kobject *kobj,
char *page)
{
return scnprintf(page, PAGE_SIZE,
- "Usage: echo \"[dest_port=server port number] sessname=<name of the rtrs session> path=<[srcaddr@]dstaddr> [path=<[srcaddr@]dstaddr>] device_path=<full path on remote side> [access_mode=<ro|rw|migration>]\" > %s\n\naddr ::= [ ip:<ipv4> | ip:<ipv6> | gid:<gid> ]\n",
+ "Usage: echo \"[dest_port=server port number] sessname=<name of the rtrs session> path=<[srcaddr@]dstaddr> [path=<[srcaddr@]dstaddr>] device_path=<full path on remote side> [access_mode=<ro|rw|migration>] [nr_poll_queues=<number of queues>]\" > %s\n\naddr ::= [ ip:<ipv4> | ip:<ipv6> | gid:<gid> ]\n",
attr->attr.name);
}
@@ -475,15 +510,11 @@ static int rnbd_clt_get_path_name(struct rnbd_clt_dev *dev, char *buf,
int ret;
char pathname[NAME_MAX], *s;
- strlcpy(pathname, dev->pathname, sizeof(pathname));
+ strscpy(pathname, dev->pathname, sizeof(pathname));
while ((s = strchr(pathname, '/')))
s[0] = '!';
- ret = snprintf(buf, len, "%s", pathname);
- if (ret >= len)
- return -ENAMETOOLONG;
-
- ret = snprintf(buf, len, "%s@%s", buf, dev->sess->sessname);
+ ret = snprintf(buf, len, "%s@%s", pathname, dev->sess->sessname);
if (ret >= len)
return -ENAMETOOLONG;
@@ -537,6 +568,7 @@ static ssize_t rnbd_clt_map_device_store(struct kobject *kobj,
char sessname[NAME_MAX];
enum rnbd_access_mode access_mode = RNBD_ACCESS_RW;
u16 port_nr = RTRS_PORT;
+ u32 nr_poll_queues = 0;
struct sockaddr_storage *addrs;
struct rtrs_addr paths[6];
@@ -548,6 +580,7 @@ static ssize_t rnbd_clt_map_device_store(struct kobject *kobj,
opt.pathname = pathname;
opt.dest_port = &port_nr;
opt.access_mode = &access_mode;
+ opt.nr_poll_queues = &nr_poll_queues;
addrs = kcalloc(ARRAY_SIZE(paths) * 2, sizeof(*addrs), GFP_KERNEL);
if (!addrs)
return -ENOMEM;
@@ -561,12 +594,13 @@ static ssize_t rnbd_clt_map_device_store(struct kobject *kobj,
if (ret)
goto out;
- pr_info("Mapping device %s on session %s, (access_mode: %s)\n",
+ pr_info("Mapping device %s on session %s, (access_mode: %s, nr_poll_queues: %d)\n",
pathname, sessname,
- rnbd_access_mode_str(access_mode));
+ rnbd_access_mode_str(access_mode),
+ nr_poll_queues);
dev = rnbd_clt_map_device(sessname, paths, path_cnt, port_nr, pathname,
- access_mode);
+ access_mode, nr_poll_queues);
if (IS_ERR(dev)) {
ret = PTR_ERR(dev);
goto out;
@@ -639,13 +673,9 @@ cls_destroy:
return err;
}
-void rnbd_clt_destroy_default_group(void)
-{
- sysfs_remove_group(&rnbd_dev->kobj, &default_attr_group);
-}
-
void rnbd_clt_destroy_sysfs_files(void)
{
+ sysfs_remove_group(&rnbd_dev->kobj, &default_attr_group);
kobject_del(rnbd_devs_kobj);
kobject_put(rnbd_devs_kobj);
device_destroy(rnbd_dev_class, MKDEV(0, 0));
diff --git a/drivers/block/rnbd/rnbd-clt.c b/drivers/block/rnbd/rnbd-clt.c
index 45a470076652..c01786afe1b1 100644
--- a/drivers/block/rnbd/rnbd-clt.c
+++ b/drivers/block/rnbd/rnbd-clt.c
@@ -110,6 +110,7 @@ static int rnbd_clt_change_capacity(struct rnbd_clt_dev *dev,
static int process_msg_open_rsp(struct rnbd_clt_dev *dev,
struct rnbd_msg_open_rsp *rsp)
{
+ struct kobject *gd_kobj;
int err = 0;
mutex_lock(&dev->lock);
@@ -128,6 +129,8 @@ static int process_msg_open_rsp(struct rnbd_clt_dev *dev,
*/
if (dev->nsectors != nsectors)
rnbd_clt_change_capacity(dev, nsectors);
+ gd_kobj = &disk_to_dev(dev->gd)->kobj;
+ kobject_uevent(gd_kobj, KOBJ_ONLINE);
rnbd_clt_info(dev, "Device online, device remapped successfully\n");
}
err = rnbd_clt_set_dev_attr(dev, rsp);
@@ -312,13 +315,11 @@ static void rnbd_rerun_all_if_idle(struct rnbd_clt_session *sess)
static struct rtrs_permit *rnbd_get_permit(struct rnbd_clt_session *sess,
enum rtrs_clt_con_type con_type,
- int wait)
+ enum wait_type wait)
{
struct rtrs_permit *permit;
- permit = rtrs_clt_get_permit(sess->rtrs, con_type,
- wait ? RTRS_PERMIT_WAIT :
- RTRS_PERMIT_NOWAIT);
+ permit = rtrs_clt_get_permit(sess->rtrs, con_type, wait);
if (likely(permit))
/* We have a subtle rare case here, when all permits can be
* consumed before busy counter increased. This is safe,
@@ -344,7 +345,7 @@ static void rnbd_put_permit(struct rnbd_clt_session *sess,
static struct rnbd_iu *rnbd_get_iu(struct rnbd_clt_session *sess,
enum rtrs_clt_con_type con_type,
- int wait)
+ enum wait_type wait)
{
struct rnbd_iu *iu;
struct rtrs_permit *permit;
@@ -354,9 +355,7 @@ static struct rnbd_iu *rnbd_get_iu(struct rnbd_clt_session *sess,
return NULL;
}
- permit = rnbd_get_permit(sess, con_type,
- wait ? RTRS_PERMIT_WAIT :
- RTRS_PERMIT_NOWAIT);
+ permit = rnbd_get_permit(sess, con_type, wait);
if (unlikely(!permit)) {
kfree(iu);
return NULL;
@@ -435,16 +434,11 @@ static void msg_conf(void *priv, int errno)
schedule_work(&iu->work);
}
-enum wait_type {
- NO_WAIT = 0,
- WAIT = 1
-};
-
static int send_usr_msg(struct rtrs_clt *rtrs, int dir,
struct rnbd_iu *iu, struct kvec *vec,
size_t len, struct scatterlist *sg, unsigned int sg_len,
void (*conf)(struct work_struct *work),
- int *errno, enum wait_type wait)
+ int *errno, int wait)
{
int err;
struct rtrs_clt_req_ops req_ops;
@@ -476,7 +470,8 @@ static void msg_close_conf(struct work_struct *work)
rnbd_clt_put_dev(dev);
}
-static int send_msg_close(struct rnbd_clt_dev *dev, u32 device_id, bool wait)
+static int send_msg_close(struct rnbd_clt_dev *dev, u32 device_id,
+ enum wait_type wait)
{
struct rnbd_clt_session *sess = dev->sess;
struct rnbd_msg_close msg;
@@ -530,7 +525,7 @@ static void msg_open_conf(struct work_struct *work)
* If server thinks its fine, but we fail to process
* then be nice and send a close to server.
*/
- (void)send_msg_close(dev, device_id, NO_WAIT);
+ send_msg_close(dev, device_id, RTRS_PERMIT_NOWAIT);
}
}
kfree(rsp);
@@ -554,7 +549,7 @@ static void msg_sess_info_conf(struct work_struct *work)
rnbd_clt_put_sess(sess);
}
-static int send_msg_open(struct rnbd_clt_dev *dev, bool wait)
+static int send_msg_open(struct rnbd_clt_dev *dev, enum wait_type wait)
{
struct rnbd_clt_session *sess = dev->sess;
struct rnbd_msg_open_rsp *rsp;
@@ -583,7 +578,7 @@ static int send_msg_open(struct rnbd_clt_dev *dev, bool wait)
msg.hdr.type = cpu_to_le16(RNBD_MSG_OPEN);
msg.access_mode = dev->access_mode;
- strlcpy(msg.dev_name, dev->pathname, sizeof(msg.dev_name));
+ strscpy(msg.dev_name, dev->pathname, sizeof(msg.dev_name));
WARN_ON(!rnbd_clt_get_dev(dev));
err = send_usr_msg(sess->rtrs, READ, iu,
@@ -601,7 +596,7 @@ static int send_msg_open(struct rnbd_clt_dev *dev, bool wait)
return err;
}
-static int send_msg_sess_info(struct rnbd_clt_session *sess, bool wait)
+static int send_msg_sess_info(struct rnbd_clt_session *sess, enum wait_type wait)
{
struct rnbd_msg_sess_info_rsp *rsp;
struct rnbd_msg_sess_info msg;
@@ -657,14 +652,18 @@ put_iu:
static void set_dev_states_to_disconnected(struct rnbd_clt_session *sess)
{
struct rnbd_clt_dev *dev;
+ struct kobject *gd_kobj;
mutex_lock(&sess->lock);
list_for_each_entry(dev, &sess->devs_list, list) {
rnbd_clt_err(dev, "Device disconnected.\n");
mutex_lock(&dev->lock);
- if (dev->dev_state == DEV_STATE_MAPPED)
+ if (dev->dev_state == DEV_STATE_MAPPED) {
dev->dev_state = DEV_STATE_MAPPED_DISCONNECTED;
+ gd_kobj = &disk_to_dev(dev->gd)->kobj;
+ kobject_uevent(gd_kobj, KOBJ_OFFLINE);
+ }
mutex_unlock(&dev->lock);
}
mutex_unlock(&sess->lock);
@@ -687,7 +686,7 @@ static void remap_devs(struct rnbd_clt_session *sess)
* be asynchronous.
*/
- err = send_msg_sess_info(sess, NO_WAIT);
+ err = send_msg_sess_info(sess, RTRS_PERMIT_NOWAIT);
if (err) {
pr_err("send_msg_sess_info(\"%s\"): %d\n", sess->sessname, err);
return;
@@ -711,7 +710,7 @@ static void remap_devs(struct rnbd_clt_session *sess)
continue;
rnbd_clt_info(dev, "session reconnected, remapping device\n");
- err = send_msg_open(dev, NO_WAIT);
+ err = send_msg_open(dev, RTRS_PERMIT_NOWAIT);
if (err) {
rnbd_clt_err(dev, "send_msg_open(): %d\n", err);
break;
@@ -801,7 +800,7 @@ static struct rnbd_clt_session *alloc_sess(const char *sessname)
sess = kzalloc_node(sizeof(*sess), GFP_KERNEL, NUMA_NO_NODE);
if (!sess)
return ERR_PTR(-ENOMEM);
- strlcpy(sess->sessname, sessname, sizeof(sess->sessname));
+ strscpy(sess->sessname, sessname, sizeof(sess->sessname));
atomic_set(&sess->busy, 0);
mutex_init(&sess->lock);
INIT_LIST_HEAD(&sess->devs_list);
@@ -918,6 +917,7 @@ again:
return NULL;
}
+/* caller is responsible for initializing 'first' to false */
static struct
rnbd_clt_session *find_or_create_sess(const char *sessname, bool *first)
{
@@ -933,8 +933,7 @@ rnbd_clt_session *find_or_create_sess(const char *sessname, bool *first)
}
list_add(&sess->list, &sess_list);
*first = true;
- } else
- *first = false;
+ }
mutex_unlock(&sess_lock);
return sess;
@@ -1173,9 +1172,54 @@ static blk_status_t rnbd_queue_rq(struct blk_mq_hw_ctx *hctx,
return ret;
}
+static int rnbd_rdma_poll(struct blk_mq_hw_ctx *hctx)
+{
+ struct rnbd_queue *q = hctx->driver_data;
+ struct rnbd_clt_dev *dev = q->dev;
+ int cnt;
+
+ cnt = rtrs_clt_rdma_cq_direct(dev->sess->rtrs, hctx->queue_num);
+ return cnt;
+}
+
+static int rnbd_rdma_map_queues(struct blk_mq_tag_set *set)
+{
+ struct rnbd_clt_session *sess = set->driver_data;
+
+ /* shared read/write queues */
+ set->map[HCTX_TYPE_DEFAULT].nr_queues = num_online_cpus();
+ set->map[HCTX_TYPE_DEFAULT].queue_offset = 0;
+ set->map[HCTX_TYPE_READ].nr_queues = num_online_cpus();
+ set->map[HCTX_TYPE_READ].queue_offset = 0;
+ blk_mq_map_queues(&set->map[HCTX_TYPE_DEFAULT]);
+ blk_mq_map_queues(&set->map[HCTX_TYPE_READ]);
+
+ if (sess->nr_poll_queues) {
+ /* dedicated queue for poll */
+ set->map[HCTX_TYPE_POLL].nr_queues = sess->nr_poll_queues;
+ set->map[HCTX_TYPE_POLL].queue_offset = set->map[HCTX_TYPE_READ].queue_offset +
+ set->map[HCTX_TYPE_READ].nr_queues;
+ blk_mq_map_queues(&set->map[HCTX_TYPE_POLL]);
+ pr_info("[session=%s] mapped %d/%d/%d default/read/poll queues.\n",
+ sess->sessname,
+ set->map[HCTX_TYPE_DEFAULT].nr_queues,
+ set->map[HCTX_TYPE_READ].nr_queues,
+ set->map[HCTX_TYPE_POLL].nr_queues);
+ } else {
+ pr_info("[session=%s] mapped %d/%d default/read queues.\n",
+ sess->sessname,
+ set->map[HCTX_TYPE_DEFAULT].nr_queues,
+ set->map[HCTX_TYPE_READ].nr_queues);
+ }
+
+ return 0;
+}
+
static struct blk_mq_ops rnbd_mq_ops = {
.queue_rq = rnbd_queue_rq,
.complete = rnbd_softirq_done_fn,
+ .map_queues = rnbd_rdma_map_queues,
+ .poll = rnbd_rdma_poll,
};
static int setup_mq_tags(struct rnbd_clt_session *sess)
@@ -1189,7 +1233,15 @@ static int setup_mq_tags(struct rnbd_clt_session *sess)
tag_set->flags = BLK_MQ_F_SHOULD_MERGE |
BLK_MQ_F_TAG_QUEUE_SHARED;
tag_set->cmd_size = sizeof(struct rnbd_iu) + RNBD_RDMA_SGL_SIZE;
- tag_set->nr_hw_queues = num_online_cpus();
+
+ /* for HCTX_TYPE_DEFAULT, HCTX_TYPE_READ, HCTX_TYPE_POLL */
+ tag_set->nr_maps = sess->nr_poll_queues ? HCTX_MAX_TYPES : 2;
+ /*
+ * HCTX_TYPE_DEFAULT and HCTX_TYPE_READ share one set of queues
+ * others are for HCTX_TYPE_POLL
+ */
+ tag_set->nr_hw_queues = num_online_cpus() + sess->nr_poll_queues;
+ tag_set->driver_data = sess;
return blk_mq_alloc_tag_set(tag_set);
}
@@ -1197,18 +1249,27 @@ static int setup_mq_tags(struct rnbd_clt_session *sess)
static struct rnbd_clt_session *
find_and_get_or_create_sess(const char *sessname,
const struct rtrs_addr *paths,
- size_t path_cnt, u16 port_nr)
+ size_t path_cnt, u16 port_nr, u32 nr_poll_queues)
{
struct rnbd_clt_session *sess;
struct rtrs_attrs attrs;
int err;
- bool first;
+ bool first = false;
struct rtrs_clt_ops rtrs_ops;
sess = find_or_create_sess(sessname, &first);
if (sess == ERR_PTR(-ENOMEM))
return ERR_PTR(-ENOMEM);
- else if (!first)
+ else if ((nr_poll_queues && !first) || (!nr_poll_queues && sess->nr_poll_queues)) {
+ /*
+ * A device MUST have its own session to use the polling-mode.
+ * It must fail to map new device with the same session.
+ */
+ err = -EINVAL;
+ goto put_sess;
+ }
+
+ if (!first)
return sess;
if (!path_cnt) {
@@ -1228,8 +1289,7 @@ find_and_get_or_create_sess(const char *sessname,
paths, path_cnt, port_nr,
0, /* Do not use pdu of rtrs */
RECONNECT_DELAY, BMAX_SEGMENTS,
- BLK_MAX_SEGMENT_SIZE,
- MAX_RECONNECTS);
+ MAX_RECONNECTS, nr_poll_queues);
if (IS_ERR(sess->rtrs)) {
err = PTR_ERR(sess->rtrs);
goto wake_up_and_put;
@@ -1237,12 +1297,13 @@ find_and_get_or_create_sess(const char *sessname,
rtrs_clt_query(sess->rtrs, &attrs);
sess->max_io_size = attrs.max_io_size;
sess->queue_depth = attrs.queue_depth;
+ sess->nr_poll_queues = nr_poll_queues;
err = setup_mq_tags(sess);
if (err)
goto close_rtrs;
- err = send_msg_sess_info(sess, WAIT);
+ err = send_msg_sess_info(sess, RTRS_PERMIT_WAIT);
if (err)
goto close_rtrs;
@@ -1352,12 +1413,12 @@ static void rnbd_clt_setup_gen_disk(struct rnbd_clt_dev *dev, int idx)
if (!dev->rotational)
blk_queue_flag_set(QUEUE_FLAG_NONROT, dev->queue);
+ add_disk(dev->gd);
}
-static int rnbd_client_setup_device(struct rnbd_clt_session *sess,
- struct rnbd_clt_dev *dev, int idx)
+static int rnbd_client_setup_device(struct rnbd_clt_dev *dev)
{
- int err;
+ int err, idx = dev->clt_device_id;
dev->size = dev->nsectors * dev->logical_block_size;
@@ -1380,7 +1441,8 @@ static int rnbd_client_setup_device(struct rnbd_clt_session *sess,
static struct rnbd_clt_dev *init_dev(struct rnbd_clt_session *sess,
enum rnbd_access_mode access_mode,
- const char *pathname)
+ const char *pathname,
+ u32 nr_poll_queues)
{
struct rnbd_clt_dev *dev;
int ret;
@@ -1389,7 +1451,12 @@ static struct rnbd_clt_dev *init_dev(struct rnbd_clt_session *sess,
if (!dev)
return ERR_PTR(-ENOMEM);
- dev->hw_queues = kcalloc(nr_cpu_ids, sizeof(*dev->hw_queues),
+ /*
+ * nr_cpu_ids: the number of softirq queues
+ * nr_poll_queues: the number of polling queues
+ */
+ dev->hw_queues = kcalloc(nr_cpu_ids + nr_poll_queues,
+ sizeof(*dev->hw_queues),
GFP_KERNEL);
if (!dev->hw_queues) {
ret = -ENOMEM;
@@ -1415,6 +1482,7 @@ static struct rnbd_clt_dev *init_dev(struct rnbd_clt_session *sess,
dev->clt_device_id = ret;
dev->sess = sess;
dev->access_mode = access_mode;
+ dev->nr_poll_queues = nr_poll_queues;
mutex_init(&dev->lock);
refcount_set(&dev->refcount, 1);
dev->dev_state = DEV_STATE_INIT;
@@ -1471,14 +1539,13 @@ static bool exists_devpath(const char *pathname, const char *sessname)
return found;
}
-static bool insert_dev_if_not_exists_devpath(const char *pathname,
- struct rnbd_clt_session *sess,
- struct rnbd_clt_dev *dev)
+static bool insert_dev_if_not_exists_devpath(struct rnbd_clt_dev *dev)
{
bool found;
+ struct rnbd_clt_session *sess = dev->sess;
mutex_lock(&sess_lock);
- found = __exists_dev(pathname, sess->sessname);
+ found = __exists_dev(dev->pathname, sess->sessname);
if (!found) {
mutex_lock(&sess->lock);
list_add_tail(&dev->list, &sess->devs_list);
@@ -1502,7 +1569,8 @@ struct rnbd_clt_dev *rnbd_clt_map_device(const char *sessname,
struct rtrs_addr *paths,
size_t path_cnt, u16 port_nr,
const char *pathname,
- enum rnbd_access_mode access_mode)
+ enum rnbd_access_mode access_mode,
+ u32 nr_poll_queues)
{
struct rnbd_clt_session *sess;
struct rnbd_clt_dev *dev;
@@ -1511,22 +1579,22 @@ struct rnbd_clt_dev *rnbd_clt_map_device(const char *sessname,
if (unlikely(exists_devpath(pathname, sessname)))
return ERR_PTR(-EEXIST);
- sess = find_and_get_or_create_sess(sessname, paths, path_cnt, port_nr);
+ sess = find_and_get_or_create_sess(sessname, paths, path_cnt, port_nr, nr_poll_queues);
if (IS_ERR(sess))
return ERR_CAST(sess);
- dev = init_dev(sess, access_mode, pathname);
+ dev = init_dev(sess, access_mode, pathname, nr_poll_queues);
if (IS_ERR(dev)) {
pr_err("map_device: failed to map device '%s' from session %s, can't initialize device, err: %ld\n",
pathname, sess->sessname, PTR_ERR(dev));
ret = PTR_ERR(dev);
goto put_sess;
}
- if (insert_dev_if_not_exists_devpath(pathname, sess, dev)) {
+ if (insert_dev_if_not_exists_devpath(dev)) {
ret = -EEXIST;
goto put_dev;
}
- ret = send_msg_open(dev, WAIT);
+ ret = send_msg_open(dev, RTRS_PERMIT_WAIT);
if (ret) {
rnbd_clt_err(dev,
"map_device: failed, can't open remote device, err: %d\n",
@@ -1536,7 +1604,7 @@ struct rnbd_clt_dev *rnbd_clt_map_device(const char *sessname,
mutex_lock(&dev->lock);
pr_debug("Opened remote device: session=%s, path='%s'\n",
sess->sessname, pathname);
- ret = rnbd_client_setup_device(sess, dev, dev->clt_device_id);
+ ret = rnbd_client_setup_device(dev);
if (ret) {
rnbd_clt_err(dev,
"map_device: Failed to configure device, err: %d\n",
@@ -1555,14 +1623,12 @@ struct rnbd_clt_dev *rnbd_clt_map_device(const char *sessname,
dev->max_hw_sectors, dev->rotational, dev->wc, dev->fua);
mutex_unlock(&dev->lock);
-
- add_disk(dev->gd);
rnbd_clt_put_sess(sess);
return dev;
send_close:
- send_msg_close(dev, dev->device_id, WAIT);
+ send_msg_close(dev, dev->device_id, RTRS_PERMIT_WAIT);
del_dev:
delete_dev(dev);
put_dev:
@@ -1622,7 +1688,7 @@ int rnbd_clt_unmap_device(struct rnbd_clt_dev *dev, bool force,
destroy_sysfs(dev, sysfs_self);
destroy_gen_disk(dev);
if (was_mapped && sess->rtrs)
- send_msg_close(dev, dev->device_id, WAIT);
+ send_msg_close(dev, dev->device_id, RTRS_PERMIT_WAIT);
rnbd_clt_info(dev, "Device is unmapped\n");
@@ -1656,7 +1722,7 @@ int rnbd_clt_remap_device(struct rnbd_clt_dev *dev)
mutex_unlock(&dev->lock);
if (!err) {
rnbd_clt_info(dev, "Remapping device.\n");
- err = send_msg_open(dev, WAIT);
+ err = send_msg_open(dev, RTRS_PERMIT_WAIT);
if (err)
rnbd_clt_err(dev, "remap_device: %d\n", err);
}
@@ -1678,7 +1744,6 @@ static void rnbd_destroy_sessions(void)
struct rnbd_clt_dev *dev, *tn;
/* Firstly forbid access through sysfs interface */
- rnbd_clt_destroy_default_group();
rnbd_clt_destroy_sysfs_files();
/*
diff --git a/drivers/block/rnbd/rnbd-clt.h b/drivers/block/rnbd/rnbd-clt.h
index 537d499dad3b..451e7383738f 100644
--- a/drivers/block/rnbd/rnbd-clt.h
+++ b/drivers/block/rnbd/rnbd-clt.h
@@ -90,6 +90,7 @@ struct rnbd_clt_session {
int queue_depth;
u32 max_io_size;
struct blk_mq_tag_set tag_set;
+ u32 nr_poll_queues;
struct mutex lock; /* protects state and devs_list */
struct list_head devs_list; /* list of struct rnbd_clt_dev */
refcount_t refcount;
@@ -118,6 +119,7 @@ struct rnbd_clt_dev {
enum rnbd_clt_dev_state dev_state;
char *pathname;
enum rnbd_access_mode access_mode;
+ u32 nr_poll_queues;
bool read_only;
bool rotational;
bool wc;
@@ -147,7 +149,8 @@ struct rnbd_clt_dev *rnbd_clt_map_device(const char *sessname,
struct rtrs_addr *paths,
size_t path_cnt, u16 port_nr,
const char *pathname,
- enum rnbd_access_mode access_mode);
+ enum rnbd_access_mode access_mode,
+ u32 nr_poll_queues);
int rnbd_clt_unmap_device(struct rnbd_clt_dev *dev, bool force,
const struct attribute *sysfs_self);
@@ -159,7 +162,6 @@ int rnbd_clt_resize_disk(struct rnbd_clt_dev *dev, size_t newsize);
int rnbd_clt_create_sysfs_files(void);
void rnbd_clt_destroy_sysfs_files(void);
-void rnbd_clt_destroy_default_group(void);
void rnbd_clt_remove_dev_symlink(struct rnbd_clt_dev *dev);
diff --git a/drivers/block/rnbd/rnbd-srv-sysfs.c b/drivers/block/rnbd/rnbd-srv-sysfs.c
index 05ffe488ddc6..acf5fced11ef 100644
--- a/drivers/block/rnbd/rnbd-srv-sysfs.c
+++ b/drivers/block/rnbd/rnbd-srv-sysfs.c
@@ -147,10 +147,7 @@ static ssize_t rnbd_srv_dev_session_force_close_store(struct kobject *kobj,
}
rnbd_srv_info(sess_dev, "force close requested\n");
-
- /* first remove sysfs itself to avoid deadlock */
- sysfs_remove_file_self(&sess_dev->kobj, &attr->attr);
- rnbd_srv_sess_dev_force_close(sess_dev);
+ rnbd_srv_sess_dev_force_close(sess_dev, attr);
return count;
}
diff --git a/drivers/block/rnbd/rnbd-srv.c b/drivers/block/rnbd/rnbd-srv.c
index a6a68d44f517..899dd9d7c10b 100644
--- a/drivers/block/rnbd/rnbd-srv.c
+++ b/drivers/block/rnbd/rnbd-srv.c
@@ -114,8 +114,7 @@ rnbd_get_sess_dev(int dev_id, struct rnbd_srv_session *srv_sess)
return sess_dev;
}
-static int process_rdma(struct rtrs_srv *sess,
- struct rnbd_srv_session *srv_sess,
+static int process_rdma(struct rnbd_srv_session *srv_sess,
struct rtrs_srv_op *id, void *data, u32 datalen,
const void *usr, size_t usrlen)
{
@@ -178,8 +177,10 @@ err:
return err;
}
-static void destroy_device(struct rnbd_srv_dev *dev)
+static void destroy_device(struct kref *kref)
{
+ struct rnbd_srv_dev *dev = container_of(kref, struct rnbd_srv_dev, kref);
+
WARN_ONCE(!list_empty(&dev->sess_dev_list),
"Device %s is being destroyed but still in use!\n",
dev->id);
@@ -198,18 +199,9 @@ static void destroy_device(struct rnbd_srv_dev *dev)
kfree(dev);
}
-static void destroy_device_cb(struct kref *kref)
-{
- struct rnbd_srv_dev *dev;
-
- dev = container_of(kref, struct rnbd_srv_dev, kref);
-
- destroy_device(dev);
-}
-
static void rnbd_put_srv_dev(struct rnbd_srv_dev *dev)
{
- kref_put(&dev->kref, destroy_device_cb);
+ kref_put(&dev->kref, destroy_device);
}
void rnbd_destroy_sess_dev(struct rnbd_srv_sess_dev *sess_dev, bool keep_id)
@@ -306,7 +298,7 @@ static int create_sess(struct rtrs_srv *rtrs)
mutex_unlock(&sess_lock);
srv_sess->rtrs = rtrs;
- strlcpy(srv_sess->sessname, sessname, sizeof(srv_sess->sessname));
+ strscpy(srv_sess->sessname, sessname, sizeof(srv_sess->sessname));
rtrs_srv_set_sess_priv(rtrs, srv_sess);
@@ -336,18 +328,22 @@ static int rnbd_srv_link_ev(struct rtrs_srv *rtrs,
}
}
-void rnbd_srv_sess_dev_force_close(struct rnbd_srv_sess_dev *sess_dev)
+void rnbd_srv_sess_dev_force_close(struct rnbd_srv_sess_dev *sess_dev,
+ struct kobj_attribute *attr)
{
struct rnbd_srv_session *sess = sess_dev->sess;
sess_dev->keep_id = true;
- mutex_lock(&sess->lock);
+ /* It is already started to close by client's close message. */
+ if (!mutex_trylock(&sess->lock))
+ return;
+ /* first remove sysfs itself to avoid deadlock */
+ sysfs_remove_file_self(&sess_dev->kobj, &attr->attr);
rnbd_srv_destroy_dev_session_sysfs(sess_dev);
mutex_unlock(&sess->lock);
}
-static int process_msg_close(struct rtrs_srv *rtrs,
- struct rnbd_srv_session *srv_sess,
+static int process_msg_close(struct rnbd_srv_session *srv_sess,
void *data, size_t datalen, const void *usr,
size_t usrlen)
{
@@ -366,20 +362,18 @@ static int process_msg_close(struct rtrs_srv *rtrs,
return 0;
}
-static int process_msg_open(struct rtrs_srv *rtrs,
- struct rnbd_srv_session *srv_sess,
+static int process_msg_open(struct rnbd_srv_session *srv_sess,
const void *msg, size_t len,
void *data, size_t datalen);
-static int process_msg_sess_info(struct rtrs_srv *rtrs,
- struct rnbd_srv_session *srv_sess,
+static int process_msg_sess_info(struct rnbd_srv_session *srv_sess,
const void *msg, size_t len,
void *data, size_t datalen);
-static int rnbd_srv_rdma_ev(struct rtrs_srv *rtrs, void *priv,
- struct rtrs_srv_op *id, int dir,
- void *data, size_t datalen, const void *usr,
- size_t usrlen)
+static int rnbd_srv_rdma_ev(void *priv,
+ struct rtrs_srv_op *id, int dir,
+ void *data, size_t datalen, const void *usr,
+ size_t usrlen)
{
struct rnbd_srv_session *srv_sess = priv;
const struct rnbd_msg_hdr *hdr = usr;
@@ -393,19 +387,16 @@ static int rnbd_srv_rdma_ev(struct rtrs_srv *rtrs, void *priv,
switch (type) {
case RNBD_MSG_IO:
- return process_rdma(rtrs, srv_sess, id, data, datalen, usr,
- usrlen);
+ return process_rdma(srv_sess, id, data, datalen, usr, usrlen);
case RNBD_MSG_CLOSE:
- ret = process_msg_close(rtrs, srv_sess, data, datalen,
- usr, usrlen);
+ ret = process_msg_close(srv_sess, data, datalen, usr, usrlen);
break;
case RNBD_MSG_OPEN:
- ret = process_msg_open(rtrs, srv_sess, usr, usrlen,
- data, datalen);
+ ret = process_msg_open(srv_sess, usr, usrlen, data, datalen);
break;
case RNBD_MSG_SESS_INFO:
- ret = process_msg_sess_info(rtrs, srv_sess, usr, usrlen,
- data, datalen);
+ ret = process_msg_sess_info(srv_sess, usr, usrlen, data,
+ datalen);
break;
default:
pr_warn("Received unexpected message type %d with dir %d from session %s\n",
@@ -446,7 +437,7 @@ static struct rnbd_srv_dev *rnbd_srv_init_srv_dev(const char *id)
if (!dev)
return ERR_PTR(-ENOMEM);
- strlcpy(dev->id, id, sizeof(dev->id));
+ strscpy(dev->id, id, sizeof(dev->id));
kref_init(&dev->kref);
INIT_LIST_HEAD(&dev->sess_dev_list);
mutex_init(&dev->lock);
@@ -598,7 +589,7 @@ rnbd_srv_create_set_sess_dev(struct rnbd_srv_session *srv_sess,
kref_init(&sdev->kref);
- strlcpy(sdev->pathname, open_msg->dev_name, sizeof(sdev->pathname));
+ strscpy(sdev->pathname, open_msg->dev_name, sizeof(sdev->pathname));
sdev->rnbd_dev = rnbd_dev;
sdev->sess = srv_sess;
@@ -658,8 +649,7 @@ static char *rnbd_srv_get_full_path(struct rnbd_srv_session *srv_sess,
return full_path;
}
-static int process_msg_sess_info(struct rtrs_srv *rtrs,
- struct rnbd_srv_session *srv_sess,
+static int process_msg_sess_info(struct rnbd_srv_session *srv_sess,
const void *msg, size_t len,
void *data, size_t datalen)
{
@@ -700,8 +690,7 @@ find_srv_sess_dev(struct rnbd_srv_session *srv_sess, const char *dev_name)
return NULL;
}
-static int process_msg_open(struct rtrs_srv *rtrs,
- struct rnbd_srv_session *srv_sess,
+static int process_msg_open(struct rnbd_srv_session *srv_sess,
const void *msg, size_t len,
void *data, size_t datalen)
{
diff --git a/drivers/block/rnbd/rnbd-srv.h b/drivers/block/rnbd/rnbd-srv.h
index b157371c25ed..98ddc31eb408 100644
--- a/drivers/block/rnbd/rnbd-srv.h
+++ b/drivers/block/rnbd/rnbd-srv.h
@@ -64,7 +64,8 @@ struct rnbd_srv_sess_dev {
enum rnbd_access_mode access_mode;
};
-void rnbd_srv_sess_dev_force_close(struct rnbd_srv_sess_dev *sess_dev);
+void rnbd_srv_sess_dev_force_close(struct rnbd_srv_sess_dev *sess_dev,
+ struct kobj_attribute *attr);
/* rnbd-srv-sysfs.c */
int rnbd_srv_create_dev_sysfs(struct rnbd_srv_dev *dev,
diff --git a/drivers/block/rsxx/core.c b/drivers/block/rsxx/core.c
index 227e1be4c6f9..83636714b8d7 100644
--- a/drivers/block/rsxx/core.c
+++ b/drivers/block/rsxx/core.c
@@ -392,7 +392,7 @@ static irqreturn_t rsxx_isr(int irq, void *pdata)
}
/*----------------- Card Event Handler -------------------*/
-static const char * const rsxx_card_state_to_str(unsigned int state)
+static const char *rsxx_card_state_to_str(unsigned int state)
{
static const char * const state_strings[] = {
"Unknown", "Shutdown", "Starting", "Formatting",
diff --git a/drivers/block/swim.c b/drivers/block/swim.c
index cc6a0bc6c005..2917b21f48ff 100644
--- a/drivers/block/swim.c
+++ b/drivers/block/swim.c
@@ -816,8 +816,6 @@ static int swim_floppy_init(struct swim_priv *swd)
}
swd->unit[drive].disk->queue = q;
- blk_queue_bounce_limit(swd->unit[drive].disk->queue,
- BLK_BOUNCE_HIGH);
swd->unit[drive].disk->queue->queuedata = &swd->unit[drive];
swd->unit[drive].swd = swd;
}
diff --git a/drivers/block/swim3.c b/drivers/block/swim3.c
index c2d922d125e2..a515d0c1d2cb 100644
--- a/drivers/block/swim3.c
+++ b/drivers/block/swim3.c
@@ -234,7 +234,6 @@ static unsigned short write_postamble[] = {
};
static void seek_track(struct floppy_state *fs, int n);
-static void init_dma(struct dbdma_cmd *cp, int cmd, void *buf, int count);
static void act(struct floppy_state *fs);
static void scan_timeout(struct timer_list *t);
static void seek_timeout(struct timer_list *t);
@@ -404,12 +403,28 @@ static inline void seek_track(struct floppy_state *fs, int n)
fs->settle_time = 0;
}
+/*
+ * XXX: this is a horrible hack, but at least allows ppc32 to get
+ * out of defining virt_to_bus, and this driver out of using the
+ * deprecated block layer bounce buffering for highmem addresses
+ * for no good reason.
+ */
+static unsigned long swim3_phys_to_bus(phys_addr_t paddr)
+{
+ return paddr + PCI_DRAM_OFFSET;
+}
+
+static phys_addr_t swim3_bio_phys(struct bio *bio)
+{
+ return page_to_phys(bio_page(bio)) + bio_offset(bio);
+}
+
static inline void init_dma(struct dbdma_cmd *cp, int cmd,
- void *buf, int count)
+ phys_addr_t paddr, int count)
{
cp->req_count = cpu_to_le16(count);
cp->command = cpu_to_le16(cmd);
- cp->phy_addr = cpu_to_le32(virt_to_bus(buf));
+ cp->phy_addr = cpu_to_le32(swim3_phys_to_bus(paddr));
cp->xfer_status = 0;
}
@@ -441,16 +456,18 @@ static inline void setup_transfer(struct floppy_state *fs)
out_8(&sw->sector, fs->req_sector);
out_8(&sw->nsect, n);
out_8(&sw->gap3, 0);
- out_le32(&dr->cmdptr, virt_to_bus(cp));
+ out_le32(&dr->cmdptr, swim3_phys_to_bus(virt_to_phys(cp)));
if (rq_data_dir(req) == WRITE) {
/* Set up 3 dma commands: write preamble, data, postamble */
- init_dma(cp, OUTPUT_MORE, write_preamble, sizeof(write_preamble));
+ init_dma(cp, OUTPUT_MORE, virt_to_phys(write_preamble),
+ sizeof(write_preamble));
++cp;
- init_dma(cp, OUTPUT_MORE, bio_data(req->bio), 512);
+ init_dma(cp, OUTPUT_MORE, swim3_bio_phys(req->bio), 512);
++cp;
- init_dma(cp, OUTPUT_LAST, write_postamble, sizeof(write_postamble));
+ init_dma(cp, OUTPUT_LAST, virt_to_phys(write_postamble),
+ sizeof(write_postamble));
} else {
- init_dma(cp, INPUT_LAST, bio_data(req->bio), n * 512);
+ init_dma(cp, INPUT_LAST, swim3_bio_phys(req->bio), n * 512);
}
++cp;
out_le16(&cp->command, DBDMA_STOP);
@@ -1201,7 +1218,6 @@ static int swim3_attach(struct macio_dev *mdev,
disk->queue = NULL;
goto out_put_disk;
}
- blk_queue_bounce_limit(disk->queue, BLK_BOUNCE_HIGH);
disk->queue->queuedata = fs;
rc = swim3_add_device(mdev, floppy_count);
diff --git a/drivers/block/umem.c b/drivers/block/umem.c
deleted file mode 100644
index 664280f23bee..000000000000
--- a/drivers/block/umem.c
+++ /dev/null
@@ -1,1130 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * mm.c - Micro Memory(tm) PCI memory board block device driver - v2.3
- *
- * (C) 2001 San Mehat <nettwerk@valinux.com>
- * (C) 2001 Johannes Erdfelt <jerdfelt@valinux.com>
- * (C) 2001 NeilBrown <neilb@cse.unsw.edu.au>
- *
- * This driver for the Micro Memory PCI Memory Module with Battery Backup
- * is Copyright Micro Memory Inc 2001-2002. All rights reserved.
- *
- * This driver provides a standard block device interface for Micro Memory(tm)
- * PCI based RAM boards.
- * 10/05/01: Phap Nguyen - Rebuilt the driver
- * 10/22/01: Phap Nguyen - v2.1 Added disk partitioning
- * 29oct2001:NeilBrown - Use make_request_fn instead of request_fn
- * - use stand disk partitioning (so fdisk works).
- * 08nov2001:NeilBrown - change driver name from "mm" to "umem"
- * - incorporate into main kernel
- * 08apr2002:NeilBrown - Move some of interrupt handle to tasklet
- * - use spin_lock_bh instead of _irq
- * - Never block on make_request. queue
- * bh's instead.
- * - unregister umem from devfs at mod unload
- * - Change version to 2.3
- * 07Nov2001:Phap Nguyen - Select pci read command: 06, 12, 15 (Decimal)
- * 07Jan2002: P. Nguyen - Used PCI Memory Write & Invalidate for DMA
- * 15May2002:NeilBrown - convert to bio for 2.5
- * 17May2002:NeilBrown - remove init_mem initialisation. Instead detect
- * - a sequence of writes that cover the card, and
- * - set initialised bit then.
- */
-
-#undef DEBUG /* #define DEBUG if you want debugging info (pr_debug) */
-#include <linux/fs.h>
-#include <linux/bio.h>
-#include <linux/kernel.h>
-#include <linux/mm.h>
-#include <linux/mman.h>
-#include <linux/gfp.h>
-#include <linux/ioctl.h>
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/interrupt.h>
-#include <linux/timer.h>
-#include <linux/pci.h>
-#include <linux/dma-mapping.h>
-
-#include <linux/fcntl.h> /* O_ACCMODE */
-#include <linux/hdreg.h> /* HDIO_GETGEO */
-
-#include "umem.h"
-
-#include <linux/uaccess.h>
-#include <asm/io.h>
-
-#define MM_MAXCARDS 4
-#define MM_RAHEAD 2 /* two sectors */
-#define MM_BLKSIZE 1024 /* 1k blocks */
-#define MM_HARDSECT 512 /* 512-byte hardware sectors */
-#define MM_SHIFT 6 /* max 64 partitions on 4 cards */
-
-/*
- * Version Information
- */
-
-#define DRIVER_NAME "umem"
-#define DRIVER_VERSION "v2.3"
-#define DRIVER_AUTHOR "San Mehat, Johannes Erdfelt, NeilBrown"
-#define DRIVER_DESC "Micro Memory(tm) PCI memory board block driver"
-
-static int debug;
-/* #define HW_TRACE(x) writeb(x,cards[0].csr_remap + MEMCTRLSTATUS_MAGIC) */
-#define HW_TRACE(x)
-
-#define DEBUG_LED_ON_TRANSFER 0x01
-#define DEBUG_BATTERY_POLLING 0x02
-
-module_param(debug, int, 0644);
-MODULE_PARM_DESC(debug, "Debug bitmask");
-
-static int pci_read_cmd = 0x0C; /* Read Multiple */
-module_param(pci_read_cmd, int, 0);
-MODULE_PARM_DESC(pci_read_cmd, "PCI read command");
-
-static int pci_write_cmd = 0x0F; /* Write and Invalidate */
-module_param(pci_write_cmd, int, 0);
-MODULE_PARM_DESC(pci_write_cmd, "PCI write command");
-
-static int pci_cmds;
-
-static int major_nr;
-
-#include <linux/blkdev.h>
-#include <linux/blkpg.h>
-
-struct cardinfo {
- struct pci_dev *dev;
-
- unsigned char __iomem *csr_remap;
- unsigned int mm_size; /* size in kbytes */
-
- unsigned int init_size; /* initial segment, in sectors,
- * that we know to
- * have been written
- */
- struct bio *bio, *currentbio, **biotail;
- struct bvec_iter current_iter;
-
- struct request_queue *queue;
-
- struct mm_page {
- dma_addr_t page_dma;
- struct mm_dma_desc *desc;
- int cnt, headcnt;
- struct bio *bio, **biotail;
- struct bvec_iter iter;
- } mm_pages[2];
-#define DESC_PER_PAGE ((PAGE_SIZE*2)/sizeof(struct mm_dma_desc))
-
- int Active, Ready;
-
- struct tasklet_struct tasklet;
- unsigned int dma_status;
-
- struct {
- int good;
- int warned;
- unsigned long last_change;
- } battery[2];
-
- spinlock_t lock;
- int check_batteries;
-
- int flags;
-};
-
-static struct cardinfo cards[MM_MAXCARDS];
-static struct timer_list battery_timer;
-
-static int num_cards;
-
-static struct gendisk *mm_gendisk[MM_MAXCARDS];
-
-static void check_batteries(struct cardinfo *card);
-
-static int get_userbit(struct cardinfo *card, int bit)
-{
- unsigned char led;
-
- led = readb(card->csr_remap + MEMCTRLCMD_LEDCTRL);
- return led & bit;
-}
-
-static int set_userbit(struct cardinfo *card, int bit, unsigned char state)
-{
- unsigned char led;
-
- led = readb(card->csr_remap + MEMCTRLCMD_LEDCTRL);
- if (state)
- led |= bit;
- else
- led &= ~bit;
- writeb(led, card->csr_remap + MEMCTRLCMD_LEDCTRL);
-
- return 0;
-}
-
-/*
- * NOTE: For the power LED, use the LED_POWER_* macros since they differ
- */
-static void set_led(struct cardinfo *card, int shift, unsigned char state)
-{
- unsigned char led;
-
- led = readb(card->csr_remap + MEMCTRLCMD_LEDCTRL);
- if (state == LED_FLIP)
- led ^= (1<<shift);
- else {
- led &= ~(0x03 << shift);
- led |= (state << shift);
- }
- writeb(led, card->csr_remap + MEMCTRLCMD_LEDCTRL);
-
-}
-
-#ifdef MM_DIAG
-static void dump_regs(struct cardinfo *card)
-{
- unsigned char *p;
- int i, i1;
-
- p = card->csr_remap;
- for (i = 0; i < 8; i++) {
- printk(KERN_DEBUG "%p ", p);
-
- for (i1 = 0; i1 < 16; i1++)
- printk("%02x ", *p++);
-
- printk("\n");
- }
-}
-#endif
-
-static void dump_dmastat(struct cardinfo *card, unsigned int dmastat)
-{
- dev_printk(KERN_DEBUG, &card->dev->dev, "DMAstat - ");
- if (dmastat & DMASCR_ANY_ERR)
- printk(KERN_CONT "ANY_ERR ");
- if (dmastat & DMASCR_MBE_ERR)
- printk(KERN_CONT "MBE_ERR ");
- if (dmastat & DMASCR_PARITY_ERR_REP)
- printk(KERN_CONT "PARITY_ERR_REP ");
- if (dmastat & DMASCR_PARITY_ERR_DET)
- printk(KERN_CONT "PARITY_ERR_DET ");
- if (dmastat & DMASCR_SYSTEM_ERR_SIG)
- printk(KERN_CONT "SYSTEM_ERR_SIG ");
- if (dmastat & DMASCR_TARGET_ABT)
- printk(KERN_CONT "TARGET_ABT ");
- if (dmastat & DMASCR_MASTER_ABT)
- printk(KERN_CONT "MASTER_ABT ");
- if (dmastat & DMASCR_CHAIN_COMPLETE)
- printk(KERN_CONT "CHAIN_COMPLETE ");
- if (dmastat & DMASCR_DMA_COMPLETE)
- printk(KERN_CONT "DMA_COMPLETE ");
- printk("\n");
-}
-
-/*
- * Theory of request handling
- *
- * Each bio is assigned to one mm_dma_desc - which may not be enough FIXME
- * We have two pages of mm_dma_desc, holding about 64 descriptors
- * each. These are allocated at init time.
- * One page is "Ready" and is either full, or can have request added.
- * The other page might be "Active", which DMA is happening on it.
- *
- * Whenever IO on the active page completes, the Ready page is activated
- * and the ex-Active page is clean out and made Ready.
- * Otherwise the Ready page is only activated when it becomes full.
- *
- * If a request arrives while both pages a full, it is queued, and b_rdev is
- * overloaded to record whether it was a read or a write.
- *
- * The interrupt handler only polls the device to clear the interrupt.
- * The processing of the result is done in a tasklet.
- */
-
-static void mm_start_io(struct cardinfo *card)
-{
- /* we have the lock, we know there is
- * no IO active, and we know that card->Active
- * is set
- */
- struct mm_dma_desc *desc;
- struct mm_page *page;
- int offset;
-
- /* make the last descriptor end the chain */
- page = &card->mm_pages[card->Active];
- pr_debug("start_io: %d %d->%d\n",
- card->Active, page->headcnt, page->cnt - 1);
- desc = &page->desc[page->cnt-1];
-
- desc->control_bits |= cpu_to_le32(DMASCR_CHAIN_COMP_EN);
- desc->control_bits &= ~cpu_to_le32(DMASCR_CHAIN_EN);
- desc->sem_control_bits = desc->control_bits;
-
-
- if (debug & DEBUG_LED_ON_TRANSFER)
- set_led(card, LED_REMOVE, LED_ON);
-
- desc = &page->desc[page->headcnt];
- writel(0, card->csr_remap + DMA_PCI_ADDR);
- writel(0, card->csr_remap + DMA_PCI_ADDR + 4);
-
- writel(0, card->csr_remap + DMA_LOCAL_ADDR);
- writel(0, card->csr_remap + DMA_LOCAL_ADDR + 4);
-
- writel(0, card->csr_remap + DMA_TRANSFER_SIZE);
- writel(0, card->csr_remap + DMA_TRANSFER_SIZE + 4);
-
- writel(0, card->csr_remap + DMA_SEMAPHORE_ADDR);
- writel(0, card->csr_remap + DMA_SEMAPHORE_ADDR + 4);
-
- offset = ((char *)desc) - ((char *)page->desc);
- writel(cpu_to_le32((page->page_dma+offset) & 0xffffffff),
- card->csr_remap + DMA_DESCRIPTOR_ADDR);
- /* Force the value to u64 before shifting otherwise >> 32 is undefined C
- * and on some ports will do nothing ! */
- writel(cpu_to_le32(((u64)page->page_dma)>>32),
- card->csr_remap + DMA_DESCRIPTOR_ADDR + 4);
-
- /* Go, go, go */
- writel(cpu_to_le32(DMASCR_GO | DMASCR_CHAIN_EN | pci_cmds),
- card->csr_remap + DMA_STATUS_CTRL);
-}
-
-static int add_bio(struct cardinfo *card);
-
-static void activate(struct cardinfo *card)
-{
- /* if No page is Active, and Ready is
- * not empty, then switch Ready page
- * to active and start IO.
- * Then add any bh's that are available to Ready
- */
-
- do {
- while (add_bio(card))
- ;
-
- if (card->Active == -1 &&
- card->mm_pages[card->Ready].cnt > 0) {
- card->Active = card->Ready;
- card->Ready = 1-card->Ready;
- mm_start_io(card);
- }
-
- } while (card->Active == -1 && add_bio(card));
-}
-
-static inline void reset_page(struct mm_page *page)
-{
- page->cnt = 0;
- page->headcnt = 0;
- page->bio = NULL;
- page->biotail = &page->bio;
-}
-
-/*
- * If there is room on Ready page, take
- * one bh off list and add it.
- * return 1 if there was room, else 0.
- */
-static int add_bio(struct cardinfo *card)
-{
- struct mm_page *p;
- struct mm_dma_desc *desc;
- dma_addr_t dma_handle;
- int offset;
- struct bio *bio;
- struct bio_vec vec;
-
- bio = card->currentbio;
- if (!bio && card->bio) {
- card->currentbio = card->bio;
- card->current_iter = card->bio->bi_iter;
- card->bio = card->bio->bi_next;
- if (card->bio == NULL)
- card->biotail = &card->bio;
- card->currentbio->bi_next = NULL;
- return 1;
- }
- if (!bio)
- return 0;
-
- if (card->mm_pages[card->Ready].cnt >= DESC_PER_PAGE)
- return 0;
-
- vec = bio_iter_iovec(bio, card->current_iter);
-
- dma_handle = dma_map_page(&card->dev->dev,
- vec.bv_page,
- vec.bv_offset,
- vec.bv_len,
- bio_op(bio) == REQ_OP_READ ?
- DMA_FROM_DEVICE : DMA_TO_DEVICE);
-
- p = &card->mm_pages[card->Ready];
- desc = &p->desc[p->cnt];
- p->cnt++;
- if (p->bio == NULL)
- p->iter = card->current_iter;
- if ((p->biotail) != &bio->bi_next) {
- *(p->biotail) = bio;
- p->biotail = &(bio->bi_next);
- bio->bi_next = NULL;
- }
-
- desc->data_dma_handle = dma_handle;
-
- desc->pci_addr = cpu_to_le64((u64)desc->data_dma_handle);
- desc->local_addr = cpu_to_le64(card->current_iter.bi_sector << 9);
- desc->transfer_size = cpu_to_le32(vec.bv_len);
- offset = (((char *)&desc->sem_control_bits) - ((char *)p->desc));
- desc->sem_addr = cpu_to_le64((u64)(p->page_dma+offset));
- desc->zero1 = desc->zero2 = 0;
- offset = (((char *)(desc+1)) - ((char *)p->desc));
- desc->next_desc_addr = cpu_to_le64(p->page_dma+offset);
- desc->control_bits = cpu_to_le32(DMASCR_GO|DMASCR_ERR_INT_EN|
- DMASCR_PARITY_INT_EN|
- DMASCR_CHAIN_EN |
- DMASCR_SEM_EN |
- pci_cmds);
- if (bio_op(bio) == REQ_OP_WRITE)
- desc->control_bits |= cpu_to_le32(DMASCR_TRANSFER_READ);
- desc->sem_control_bits = desc->control_bits;
-
-
- bio_advance_iter(bio, &card->current_iter, vec.bv_len);
- if (!card->current_iter.bi_size)
- card->currentbio = NULL;
-
- return 1;
-}
-
-static void process_page(unsigned long data)
-{
- /* check if any of the requests in the page are DMA_COMPLETE,
- * and deal with them appropriately.
- * If we find a descriptor without DMA_COMPLETE in the semaphore, then
- * dma must have hit an error on that descriptor, so use dma_status
- * instead and assume that all following descriptors must be re-tried.
- */
- struct mm_page *page;
- struct bio *return_bio = NULL;
- struct cardinfo *card = (struct cardinfo *)data;
- unsigned int dma_status = card->dma_status;
-
- spin_lock(&card->lock);
- if (card->Active < 0)
- goto out_unlock;
- page = &card->mm_pages[card->Active];
-
- while (page->headcnt < page->cnt) {
- struct bio *bio = page->bio;
- struct mm_dma_desc *desc = &page->desc[page->headcnt];
- int control = le32_to_cpu(desc->sem_control_bits);
- int last = 0;
- struct bio_vec vec;
-
- if (!(control & DMASCR_DMA_COMPLETE)) {
- control = dma_status;
- last = 1;
- }
-
- page->headcnt++;
- vec = bio_iter_iovec(bio, page->iter);
- bio_advance_iter(bio, &page->iter, vec.bv_len);
-
- if (!page->iter.bi_size) {
- page->bio = bio->bi_next;
- if (page->bio)
- page->iter = page->bio->bi_iter;
- }
-
- dma_unmap_page(&card->dev->dev, desc->data_dma_handle,
- vec.bv_len,
- (control & DMASCR_TRANSFER_READ) ?
- DMA_TO_DEVICE : DMA_FROM_DEVICE);
- if (control & DMASCR_HARD_ERROR) {
- /* error */
- bio->bi_status = BLK_STS_IOERR;
- dev_printk(KERN_WARNING, &card->dev->dev,
- "I/O error on sector %d/%d\n",
- le32_to_cpu(desc->local_addr)>>9,
- le32_to_cpu(desc->transfer_size));
- dump_dmastat(card, control);
- } else if (op_is_write(bio_op(bio)) &&
- le32_to_cpu(desc->local_addr) >> 9 ==
- card->init_size) {
- card->init_size += le32_to_cpu(desc->transfer_size) >> 9;
- if (card->init_size >> 1 >= card->mm_size) {
- dev_printk(KERN_INFO, &card->dev->dev,
- "memory now initialised\n");
- set_userbit(card, MEMORY_INITIALIZED, 1);
- }
- }
- if (bio != page->bio) {
- bio->bi_next = return_bio;
- return_bio = bio;
- }
-
- if (last)
- break;
- }
-
- if (debug & DEBUG_LED_ON_TRANSFER)
- set_led(card, LED_REMOVE, LED_OFF);
-
- if (card->check_batteries) {
- card->check_batteries = 0;
- check_batteries(card);
- }
- if (page->headcnt >= page->cnt) {
- reset_page(page);
- card->Active = -1;
- activate(card);
- } else {
- /* haven't finished with this one yet */
- pr_debug("do some more\n");
- mm_start_io(card);
- }
- out_unlock:
- spin_unlock(&card->lock);
-
- while (return_bio) {
- struct bio *bio = return_bio;
-
- return_bio = bio->bi_next;
- bio->bi_next = NULL;
- bio_endio(bio);
- }
-}
-
-static void mm_unplug(struct blk_plug_cb *cb, bool from_schedule)
-{
- struct cardinfo *card = cb->data;
-
- spin_lock_irq(&card->lock);
- activate(card);
- spin_unlock_irq(&card->lock);
- kfree(cb);
-}
-
-static int mm_check_plugged(struct cardinfo *card)
-{
- return !!blk_check_plugged(mm_unplug, card, sizeof(struct blk_plug_cb));
-}
-
-static blk_qc_t mm_submit_bio(struct bio *bio)
-{
- struct cardinfo *card = bio->bi_bdev->bd_disk->private_data;
-
- pr_debug("mm_make_request %llu %u\n",
- (unsigned long long)bio->bi_iter.bi_sector,
- bio->bi_iter.bi_size);
-
- blk_queue_split(&bio);
-
- spin_lock_irq(&card->lock);
- *card->biotail = bio;
- bio->bi_next = NULL;
- card->biotail = &bio->bi_next;
- if (op_is_sync(bio->bi_opf) || !mm_check_plugged(card))
- activate(card);
- spin_unlock_irq(&card->lock);
-
- return BLK_QC_T_NONE;
-}
-
-static irqreturn_t mm_interrupt(int irq, void *__card)
-{
- struct cardinfo *card = (struct cardinfo *) __card;
- unsigned int dma_status;
- unsigned short cfg_status;
-
-HW_TRACE(0x30);
-
- dma_status = le32_to_cpu(readl(card->csr_remap + DMA_STATUS_CTRL));
-
- if (!(dma_status & (DMASCR_ERROR_MASK | DMASCR_CHAIN_COMPLETE))) {
- /* interrupt wasn't for me ... */
- return IRQ_NONE;
- }
-
- /* clear COMPLETION interrupts */
- if (card->flags & UM_FLAG_NO_BYTE_STATUS)
- writel(cpu_to_le32(DMASCR_DMA_COMPLETE|DMASCR_CHAIN_COMPLETE),
- card->csr_remap + DMA_STATUS_CTRL);
- else
- writeb((DMASCR_DMA_COMPLETE|DMASCR_CHAIN_COMPLETE) >> 16,
- card->csr_remap + DMA_STATUS_CTRL + 2);
-
- /* log errors and clear interrupt status */
- if (dma_status & DMASCR_ANY_ERR) {
- unsigned int data_log1, data_log2;
- unsigned int addr_log1, addr_log2;
- unsigned char stat, count, syndrome, check;
-
- stat = readb(card->csr_remap + MEMCTRLCMD_ERRSTATUS);
-
- data_log1 = le32_to_cpu(readl(card->csr_remap +
- ERROR_DATA_LOG));
- data_log2 = le32_to_cpu(readl(card->csr_remap +
- ERROR_DATA_LOG + 4));
- addr_log1 = le32_to_cpu(readl(card->csr_remap +
- ERROR_ADDR_LOG));
- addr_log2 = readb(card->csr_remap + ERROR_ADDR_LOG + 4);
-
- count = readb(card->csr_remap + ERROR_COUNT);
- syndrome = readb(card->csr_remap + ERROR_SYNDROME);
- check = readb(card->csr_remap + ERROR_CHECK);
-
- dump_dmastat(card, dma_status);
-
- if (stat & 0x01)
- dev_printk(KERN_ERR, &card->dev->dev,
- "Memory access error detected (err count %d)\n",
- count);
- if (stat & 0x02)
- dev_printk(KERN_ERR, &card->dev->dev,
- "Multi-bit EDC error\n");
-
- dev_printk(KERN_ERR, &card->dev->dev,
- "Fault Address 0x%02x%08x, Fault Data 0x%08x%08x\n",
- addr_log2, addr_log1, data_log2, data_log1);
- dev_printk(KERN_ERR, &card->dev->dev,
- "Fault Check 0x%02x, Fault Syndrome 0x%02x\n",
- check, syndrome);
-
- writeb(0, card->csr_remap + ERROR_COUNT);
- }
-
- if (dma_status & DMASCR_PARITY_ERR_REP) {
- dev_printk(KERN_ERR, &card->dev->dev,
- "PARITY ERROR REPORTED\n");
- pci_read_config_word(card->dev, PCI_STATUS, &cfg_status);
- pci_write_config_word(card->dev, PCI_STATUS, cfg_status);
- }
-
- if (dma_status & DMASCR_PARITY_ERR_DET) {
- dev_printk(KERN_ERR, &card->dev->dev,
- "PARITY ERROR DETECTED\n");
- pci_read_config_word(card->dev, PCI_STATUS, &cfg_status);
- pci_write_config_word(card->dev, PCI_STATUS, cfg_status);
- }
-
- if (dma_status & DMASCR_SYSTEM_ERR_SIG) {
- dev_printk(KERN_ERR, &card->dev->dev, "SYSTEM ERROR\n");
- pci_read_config_word(card->dev, PCI_STATUS, &cfg_status);
- pci_write_config_word(card->dev, PCI_STATUS, cfg_status);
- }
-
- if (dma_status & DMASCR_TARGET_ABT) {
- dev_printk(KERN_ERR, &card->dev->dev, "TARGET ABORT\n");
- pci_read_config_word(card->dev, PCI_STATUS, &cfg_status);
- pci_write_config_word(card->dev, PCI_STATUS, cfg_status);
- }
-
- if (dma_status & DMASCR_MASTER_ABT) {
- dev_printk(KERN_ERR, &card->dev->dev, "MASTER ABORT\n");
- pci_read_config_word(card->dev, PCI_STATUS, &cfg_status);
- pci_write_config_word(card->dev, PCI_STATUS, cfg_status);
- }
-
- /* and process the DMA descriptors */
- card->dma_status = dma_status;
- tasklet_schedule(&card->tasklet);
-
-HW_TRACE(0x36);
-
- return IRQ_HANDLED;
-}
-
-/*
- * If both batteries are good, no LED
- * If either battery has been warned, solid LED
- * If both batteries are bad, flash the LED quickly
- * If either battery is bad, flash the LED semi quickly
- */
-static void set_fault_to_battery_status(struct cardinfo *card)
-{
- if (card->battery[0].good && card->battery[1].good)
- set_led(card, LED_FAULT, LED_OFF);
- else if (card->battery[0].warned || card->battery[1].warned)
- set_led(card, LED_FAULT, LED_ON);
- else if (!card->battery[0].good && !card->battery[1].good)
- set_led(card, LED_FAULT, LED_FLASH_7_0);
- else
- set_led(card, LED_FAULT, LED_FLASH_3_5);
-}
-
-static void init_battery_timer(void);
-
-static int check_battery(struct cardinfo *card, int battery, int status)
-{
- if (status != card->battery[battery].good) {
- card->battery[battery].good = !card->battery[battery].good;
- card->battery[battery].last_change = jiffies;
-
- if (card->battery[battery].good) {
- dev_printk(KERN_ERR, &card->dev->dev,
- "Battery %d now good\n", battery + 1);
- card->battery[battery].warned = 0;
- } else
- dev_printk(KERN_ERR, &card->dev->dev,
- "Battery %d now FAILED\n", battery + 1);
-
- return 1;
- } else if (!card->battery[battery].good &&
- !card->battery[battery].warned &&
- time_after_eq(jiffies, card->battery[battery].last_change +
- (HZ * 60 * 60 * 5))) {
- dev_printk(KERN_ERR, &card->dev->dev,
- "Battery %d still FAILED after 5 hours\n", battery + 1);
- card->battery[battery].warned = 1;
-
- return 1;
- }
-
- return 0;
-}
-
-static void check_batteries(struct cardinfo *card)
-{
- /* NOTE: this must *never* be called while the card
- * is doing (bus-to-card) DMA, or you will need the
- * reset switch
- */
- unsigned char status;
- int ret1, ret2;
-
- status = readb(card->csr_remap + MEMCTRLSTATUS_BATTERY);
- if (debug & DEBUG_BATTERY_POLLING)
- dev_printk(KERN_DEBUG, &card->dev->dev,
- "checking battery status, 1 = %s, 2 = %s\n",
- (status & BATTERY_1_FAILURE) ? "FAILURE" : "OK",
- (status & BATTERY_2_FAILURE) ? "FAILURE" : "OK");
-
- ret1 = check_battery(card, 0, !(status & BATTERY_1_FAILURE));
- ret2 = check_battery(card, 1, !(status & BATTERY_2_FAILURE));
-
- if (ret1 || ret2)
- set_fault_to_battery_status(card);
-}
-
-static void check_all_batteries(struct timer_list *unused)
-{
- int i;
-
- for (i = 0; i < num_cards; i++)
- if (!(cards[i].flags & UM_FLAG_NO_BATT)) {
- struct cardinfo *card = &cards[i];
- spin_lock_bh(&card->lock);
- if (card->Active >= 0)
- card->check_batteries = 1;
- else
- check_batteries(card);
- spin_unlock_bh(&card->lock);
- }
-
- init_battery_timer();
-}
-
-static void init_battery_timer(void)
-{
- timer_setup(&battery_timer, check_all_batteries, 0);
- battery_timer.expires = jiffies + (HZ * 60);
- add_timer(&battery_timer);
-}
-
-static void del_battery_timer(void)
-{
- del_timer(&battery_timer);
-}
-
-/*
- * Note no locks taken out here. In a worst case scenario, we could drop
- * a chunk of system memory. But that should never happen, since validation
- * happens at open or mount time, when locks are held.
- *
- * That's crap, since doing that while some partitions are opened
- * or mounted will give you really nasty results.
- */
-static int mm_revalidate(struct gendisk *disk)
-{
- struct cardinfo *card = disk->private_data;
- set_capacity(disk, card->mm_size << 1);
- return 0;
-}
-
-static int mm_getgeo(struct block_device *bdev, struct hd_geometry *geo)
-{
- struct cardinfo *card = bdev->bd_disk->private_data;
- int size = card->mm_size * (1024 / MM_HARDSECT);
-
- /*
- * get geometry: we have to fake one... trim the size to a
- * multiple of 2048 (1M): tell we have 32 sectors, 64 heads,
- * whatever cylinders.
- */
- geo->heads = 64;
- geo->sectors = 32;
- geo->cylinders = size / (geo->heads * geo->sectors);
- return 0;
-}
-
-static const struct block_device_operations mm_fops = {
- .owner = THIS_MODULE,
- .submit_bio = mm_submit_bio,
- .getgeo = mm_getgeo,
- .revalidate_disk = mm_revalidate,
-};
-
-static int mm_pci_probe(struct pci_dev *dev, const struct pci_device_id *id)
-{
- int ret;
- struct cardinfo *card = &cards[num_cards];
- unsigned char mem_present;
- unsigned char batt_status;
- unsigned int saved_bar, data;
- unsigned long csr_base;
- unsigned long csr_len;
- int magic_number;
- static int printed_version;
-
- if (!printed_version++)
- printk(KERN_INFO DRIVER_VERSION " : " DRIVER_DESC "\n");
-
- ret = pci_enable_device(dev);
- if (ret)
- return ret;
-
- pci_write_config_byte(dev, PCI_LATENCY_TIMER, 0xF8);
- pci_set_master(dev);
-
- card->dev = dev;
-
- csr_base = pci_resource_start(dev, 0);
- csr_len = pci_resource_len(dev, 0);
- if (!csr_base || !csr_len)
- return -ENODEV;
-
- dev_printk(KERN_INFO, &dev->dev,
- "Micro Memory(tm) controller found (PCI Mem Module (Battery Backup))\n");
-
- if (dma_set_mask(&dev->dev, DMA_BIT_MASK(64)) &&
- dma_set_mask(&dev->dev, DMA_BIT_MASK(32))) {
- dev_printk(KERN_WARNING, &dev->dev, "NO suitable DMA found\n");
- return -ENOMEM;
- }
-
- ret = pci_request_regions(dev, DRIVER_NAME);
- if (ret) {
- dev_printk(KERN_ERR, &card->dev->dev,
- "Unable to request memory region\n");
- goto failed_req_csr;
- }
-
- card->csr_remap = ioremap(csr_base, csr_len);
- if (!card->csr_remap) {
- dev_printk(KERN_ERR, &card->dev->dev,
- "Unable to remap memory region\n");
- ret = -ENOMEM;
-
- goto failed_remap_csr;
- }
-
- dev_printk(KERN_INFO, &card->dev->dev,
- "CSR 0x%08lx -> 0x%p (0x%lx)\n",
- csr_base, card->csr_remap, csr_len);
-
- switch (card->dev->device) {
- case 0x5415:
- card->flags |= UM_FLAG_NO_BYTE_STATUS | UM_FLAG_NO_BATTREG;
- magic_number = 0x59;
- break;
-
- case 0x5425:
- card->flags |= UM_FLAG_NO_BYTE_STATUS;
- magic_number = 0x5C;
- break;
-
- case 0x6155:
- card->flags |= UM_FLAG_NO_BYTE_STATUS |
- UM_FLAG_NO_BATTREG | UM_FLAG_NO_BATT;
- magic_number = 0x99;
- break;
-
- default:
- magic_number = 0x100;
- break;
- }
-
- if (readb(card->csr_remap + MEMCTRLSTATUS_MAGIC) != magic_number) {
- dev_printk(KERN_ERR, &card->dev->dev, "Magic number invalid\n");
- ret = -ENOMEM;
- goto failed_magic;
- }
-
- card->mm_pages[0].desc = dma_alloc_coherent(&card->dev->dev,
- PAGE_SIZE * 2, &card->mm_pages[0].page_dma, GFP_KERNEL);
- card->mm_pages[1].desc = dma_alloc_coherent(&card->dev->dev,
- PAGE_SIZE * 2, &card->mm_pages[1].page_dma, GFP_KERNEL);
- if (card->mm_pages[0].desc == NULL ||
- card->mm_pages[1].desc == NULL) {
- dev_printk(KERN_ERR, &card->dev->dev, "alloc failed\n");
- ret = -ENOMEM;
- goto failed_alloc;
- }
- reset_page(&card->mm_pages[0]);
- reset_page(&card->mm_pages[1]);
- card->Ready = 0; /* page 0 is ready */
- card->Active = -1; /* no page is active */
- card->bio = NULL;
- card->biotail = &card->bio;
- spin_lock_init(&card->lock);
-
- card->queue = blk_alloc_queue(NUMA_NO_NODE);
- if (!card->queue) {
- ret = -ENOMEM;
- goto failed_alloc;
- }
-
- tasklet_init(&card->tasklet, process_page, (unsigned long)card);
-
- card->check_batteries = 0;
-
- mem_present = readb(card->csr_remap + MEMCTRLSTATUS_MEMORY);
- switch (mem_present) {
- case MEM_128_MB:
- card->mm_size = 1024 * 128;
- break;
- case MEM_256_MB:
- card->mm_size = 1024 * 256;
- break;
- case MEM_512_MB:
- card->mm_size = 1024 * 512;
- break;
- case MEM_1_GB:
- card->mm_size = 1024 * 1024;
- break;
- case MEM_2_GB:
- card->mm_size = 1024 * 2048;
- break;
- default:
- card->mm_size = 0;
- break;
- }
-
- /* Clear the LED's we control */
- set_led(card, LED_REMOVE, LED_OFF);
- set_led(card, LED_FAULT, LED_OFF);
-
- batt_status = readb(card->csr_remap + MEMCTRLSTATUS_BATTERY);
-
- card->battery[0].good = !(batt_status & BATTERY_1_FAILURE);
- card->battery[1].good = !(batt_status & BATTERY_2_FAILURE);
- card->battery[0].last_change = card->battery[1].last_change = jiffies;
-
- if (card->flags & UM_FLAG_NO_BATT)
- dev_printk(KERN_INFO, &card->dev->dev,
- "Size %d KB\n", card->mm_size);
- else {
- dev_printk(KERN_INFO, &card->dev->dev,
- "Size %d KB, Battery 1 %s (%s), Battery 2 %s (%s)\n",
- card->mm_size,
- batt_status & BATTERY_1_DISABLED ? "Disabled" : "Enabled",
- card->battery[0].good ? "OK" : "FAILURE",
- batt_status & BATTERY_2_DISABLED ? "Disabled" : "Enabled",
- card->battery[1].good ? "OK" : "FAILURE");
-
- set_fault_to_battery_status(card);
- }
-
- pci_read_config_dword(dev, PCI_BASE_ADDRESS_1, &saved_bar);
- data = 0xffffffff;
- pci_write_config_dword(dev, PCI_BASE_ADDRESS_1, data);
- pci_read_config_dword(dev, PCI_BASE_ADDRESS_1, &data);
- pci_write_config_dword(dev, PCI_BASE_ADDRESS_1, saved_bar);
- data &= 0xfffffff0;
- data = ~data;
- data += 1;
-
- if (request_irq(dev->irq, mm_interrupt, IRQF_SHARED, DRIVER_NAME,
- card)) {
- dev_printk(KERN_ERR, &card->dev->dev,
- "Unable to allocate IRQ\n");
- ret = -ENODEV;
- goto failed_req_irq;
- }
-
- dev_printk(KERN_INFO, &card->dev->dev,
- "Window size %d bytes, IRQ %d\n", data, dev->irq);
-
- pci_set_drvdata(dev, card);
-
- if (pci_write_cmd != 0x0F) /* If not Memory Write & Invalidate */
- pci_write_cmd = 0x07; /* then Memory Write command */
-
- if (pci_write_cmd & 0x08) { /* use Memory Write and Invalidate */
- unsigned short cfg_command;
- pci_read_config_word(dev, PCI_COMMAND, &cfg_command);
- cfg_command |= 0x10; /* Memory Write & Invalidate Enable */
- pci_write_config_word(dev, PCI_COMMAND, cfg_command);
- }
- pci_cmds = (pci_read_cmd << 28) | (pci_write_cmd << 24);
-
- num_cards++;
-
- if (!get_userbit(card, MEMORY_INITIALIZED)) {
- dev_printk(KERN_INFO, &card->dev->dev,
- "memory NOT initialized. Consider over-writing whole device.\n");
- card->init_size = 0;
- } else {
- dev_printk(KERN_INFO, &card->dev->dev,
- "memory already initialized\n");
- card->init_size = card->mm_size;
- }
-
- /* Enable ECC */
- writeb(EDC_STORE_CORRECT, card->csr_remap + MEMCTRLCMD_ERRCTRL);
-
- return 0;
-
- failed_req_irq:
- failed_alloc:
- if (card->mm_pages[0].desc)
- dma_free_coherent(&card->dev->dev, PAGE_SIZE * 2,
- card->mm_pages[0].desc,
- card->mm_pages[0].page_dma);
- if (card->mm_pages[1].desc)
- dma_free_coherent(&card->dev->dev, PAGE_SIZE * 2,
- card->mm_pages[1].desc,
- card->mm_pages[1].page_dma);
- failed_magic:
- iounmap(card->csr_remap);
- failed_remap_csr:
- pci_release_regions(dev);
- failed_req_csr:
-
- return ret;
-}
-
-static void mm_pci_remove(struct pci_dev *dev)
-{
- struct cardinfo *card = pci_get_drvdata(dev);
-
- tasklet_kill(&card->tasklet);
- free_irq(dev->irq, card);
- iounmap(card->csr_remap);
-
- if (card->mm_pages[0].desc)
- dma_free_coherent(&card->dev->dev, PAGE_SIZE * 2,
- card->mm_pages[0].desc,
- card->mm_pages[0].page_dma);
- if (card->mm_pages[1].desc)
- dma_free_coherent(&card->dev->dev, PAGE_SIZE * 2,
- card->mm_pages[1].desc,
- card->mm_pages[1].page_dma);
- blk_cleanup_queue(card->queue);
-
- pci_release_regions(dev);
- pci_disable_device(dev);
-}
-
-static const struct pci_device_id mm_pci_ids[] = {
- {PCI_DEVICE(PCI_VENDOR_ID_MICRO_MEMORY, PCI_DEVICE_ID_MICRO_MEMORY_5415CN)},
- {PCI_DEVICE(PCI_VENDOR_ID_MICRO_MEMORY, PCI_DEVICE_ID_MICRO_MEMORY_5425CN)},
- {PCI_DEVICE(PCI_VENDOR_ID_MICRO_MEMORY, PCI_DEVICE_ID_MICRO_MEMORY_6155)},
- {
- .vendor = 0x8086,
- .device = 0xB555,
- .subvendor = 0x1332,
- .subdevice = 0x5460,
- .class = 0x050000,
- .class_mask = 0,
- }, { /* end: all zeroes */ }
-};
-
-MODULE_DEVICE_TABLE(pci, mm_pci_ids);
-
-static struct pci_driver mm_pci_driver = {
- .name = DRIVER_NAME,
- .id_table = mm_pci_ids,
- .probe = mm_pci_probe,
- .remove = mm_pci_remove,
-};
-
-static int __init mm_init(void)
-{
- int retval, i;
- int err;
-
- retval = pci_register_driver(&mm_pci_driver);
- if (retval)
- return -ENOMEM;
-
- err = major_nr = register_blkdev(0, DRIVER_NAME);
- if (err < 0) {
- pci_unregister_driver(&mm_pci_driver);
- return -EIO;
- }
-
- for (i = 0; i < num_cards; i++) {
- mm_gendisk[i] = alloc_disk(1 << MM_SHIFT);
- if (!mm_gendisk[i])
- goto out;
- }
-
- for (i = 0; i < num_cards; i++) {
- struct gendisk *disk = mm_gendisk[i];
- sprintf(disk->disk_name, "umem%c", 'a'+i);
- spin_lock_init(&cards[i].lock);
- disk->major = major_nr;
- disk->first_minor = i << MM_SHIFT;
- disk->fops = &mm_fops;
- disk->private_data = &cards[i];
- disk->queue = cards[i].queue;
- set_capacity(disk, cards[i].mm_size << 1);
- add_disk(disk);
- }
-
- init_battery_timer();
- printk(KERN_INFO "MM: desc_per_page = %ld\n", DESC_PER_PAGE);
-/* printk("mm_init: Done. 10-19-01 9:00\n"); */
- return 0;
-
-out:
- pci_unregister_driver(&mm_pci_driver);
- unregister_blkdev(major_nr, DRIVER_NAME);
- while (i--)
- put_disk(mm_gendisk[i]);
- return -ENOMEM;
-}
-
-static void __exit mm_cleanup(void)
-{
- int i;
-
- del_battery_timer();
-
- for (i = 0; i < num_cards ; i++) {
- del_gendisk(mm_gendisk[i]);
- put_disk(mm_gendisk[i]);
- }
-
- pci_unregister_driver(&mm_pci_driver);
-
- unregister_blkdev(major_nr, DRIVER_NAME);
-}
-
-module_init(mm_init);
-module_exit(mm_cleanup);
-
-MODULE_AUTHOR(DRIVER_AUTHOR);
-MODULE_DESCRIPTION(DRIVER_DESC);
-MODULE_LICENSE("GPL");
diff --git a/drivers/block/umem.h b/drivers/block/umem.h
deleted file mode 100644
index 58384978ff05..000000000000
--- a/drivers/block/umem.h
+++ /dev/null
@@ -1,132 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-
-/*
- * This file contains defines for the
- * Micro Memory MM5415
- * family PCI Memory Module with Battery Backup.
- *
- * Copyright Micro Memory INC 2001. All rights reserved.
- */
-
-#ifndef _DRIVERS_BLOCK_MM_H
-#define _DRIVERS_BLOCK_MM_H
-
-
-#define IRQ_TIMEOUT (1 * HZ)
-
-/* CSR register definition */
-#define MEMCTRLSTATUS_MAGIC 0x00
-#define MM_MAGIC_VALUE (unsigned char)0x59
-
-#define MEMCTRLSTATUS_BATTERY 0x04
-#define BATTERY_1_DISABLED 0x01
-#define BATTERY_1_FAILURE 0x02
-#define BATTERY_2_DISABLED 0x04
-#define BATTERY_2_FAILURE 0x08
-
-#define MEMCTRLSTATUS_MEMORY 0x07
-#define MEM_128_MB 0xfe
-#define MEM_256_MB 0xfc
-#define MEM_512_MB 0xf8
-#define MEM_1_GB 0xf0
-#define MEM_2_GB 0xe0
-
-#define MEMCTRLCMD_LEDCTRL 0x08
-#define LED_REMOVE 2
-#define LED_FAULT 4
-#define LED_POWER 6
-#define LED_FLIP 255
-#define LED_OFF 0x00
-#define LED_ON 0x01
-#define LED_FLASH_3_5 0x02
-#define LED_FLASH_7_0 0x03
-#define LED_POWER_ON 0x00
-#define LED_POWER_OFF 0x01
-#define USER_BIT1 0x01
-#define USER_BIT2 0x02
-
-#define MEMORY_INITIALIZED USER_BIT1
-
-#define MEMCTRLCMD_ERRCTRL 0x0C
-#define EDC_NONE_DEFAULT 0x00
-#define EDC_NONE 0x01
-#define EDC_STORE_READ 0x02
-#define EDC_STORE_CORRECT 0x03
-
-#define MEMCTRLCMD_ERRCNT 0x0D
-#define MEMCTRLCMD_ERRSTATUS 0x0E
-
-#define ERROR_DATA_LOG 0x20
-#define ERROR_ADDR_LOG 0x28
-#define ERROR_COUNT 0x3D
-#define ERROR_SYNDROME 0x3E
-#define ERROR_CHECK 0x3F
-
-#define DMA_PCI_ADDR 0x40
-#define DMA_LOCAL_ADDR 0x48
-#define DMA_TRANSFER_SIZE 0x50
-#define DMA_DESCRIPTOR_ADDR 0x58
-#define DMA_SEMAPHORE_ADDR 0x60
-#define DMA_STATUS_CTRL 0x68
-#define DMASCR_GO 0x00001
-#define DMASCR_TRANSFER_READ 0x00002
-#define DMASCR_CHAIN_EN 0x00004
-#define DMASCR_SEM_EN 0x00010
-#define DMASCR_DMA_COMP_EN 0x00020
-#define DMASCR_CHAIN_COMP_EN 0x00040
-#define DMASCR_ERR_INT_EN 0x00080
-#define DMASCR_PARITY_INT_EN 0x00100
-#define DMASCR_ANY_ERR 0x00800
-#define DMASCR_MBE_ERR 0x01000
-#define DMASCR_PARITY_ERR_REP 0x02000
-#define DMASCR_PARITY_ERR_DET 0x04000
-#define DMASCR_SYSTEM_ERR_SIG 0x08000
-#define DMASCR_TARGET_ABT 0x10000
-#define DMASCR_MASTER_ABT 0x20000
-#define DMASCR_DMA_COMPLETE 0x40000
-#define DMASCR_CHAIN_COMPLETE 0x80000
-
-/*
-3.SOME PCs HAVE HOST BRIDGES WHICH APPARENTLY DO NOT CORRECTLY HANDLE
-READ-LINE (0xE) OR READ-MULTIPLE (0xC) PCI COMMAND CODES DURING DMA
-TRANSFERS. IN OTHER SYSTEMS THESE COMMAND CODES WILL CAUSE THE HOST BRIDGE
-TO ALLOW LONGER BURSTS DURING DMA READ OPERATIONS. THE UPPER FOUR BITS
-(31..28) OF THE DMA CSR HAVE BEEN MADE PROGRAMMABLE, SO THAT EITHER A 0x6,
-AN 0xE OR A 0xC CAN BE WRITTEN TO THEM TO SET THE COMMAND CODE USED DURING
-DMA READ OPERATIONS.
-*/
-#define DMASCR_READ 0x60000000
-#define DMASCR_READLINE 0xE0000000
-#define DMASCR_READMULTI 0xC0000000
-
-
-#define DMASCR_ERROR_MASK (DMASCR_MASTER_ABT | DMASCR_TARGET_ABT | DMASCR_SYSTEM_ERR_SIG | DMASCR_PARITY_ERR_DET | DMASCR_MBE_ERR | DMASCR_ANY_ERR)
-#define DMASCR_HARD_ERROR (DMASCR_MASTER_ABT | DMASCR_TARGET_ABT | DMASCR_SYSTEM_ERR_SIG | DMASCR_PARITY_ERR_DET | DMASCR_MBE_ERR)
-
-#define WINDOWMAP_WINNUM 0x7B
-
-#define DMA_READ_FROM_HOST 0
-#define DMA_WRITE_TO_HOST 1
-
-struct mm_dma_desc {
- __le64 pci_addr;
- __le64 local_addr;
- __le32 transfer_size;
- u32 zero1;
- __le64 next_desc_addr;
- __le64 sem_addr;
- __le32 control_bits;
- u32 zero2;
-
- dma_addr_t data_dma_handle;
-
- /* Copy of the bits */
- __le64 sem_control_bits;
-} __attribute__((aligned(8)));
-
-/* bits for card->flags */
-#define UM_FLAG_DMA_IN_REGS 1
-#define UM_FLAG_NO_BYTE_STATUS 2
-#define UM_FLAG_NO_BATTREG 4
-#define UM_FLAG_NO_BATT 8
-#endif
diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index 06c4efd97780..10df39a8b18d 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -1949,7 +1949,7 @@ module_param(feature_persistent, bool, 0644);
MODULE_PARM_DESC(feature_persistent,
"Enables the persistent grants feature");
-/**
+/*
* Entry point to this code when a new device is created. Allocate the basic
* structures and the ring buffer for communication with the backend, and
* inform the backend of the appropriate details for those. Switch to
@@ -2075,7 +2075,7 @@ static int blkif_recover(struct blkfront_info *info)
return 0;
}
-/**
+/*
* We are reconnecting to the backend, due to a suspend/resume, or a backend
* driver restart. We tear down our blkif structure and recreate it, but
* leave the device-layer structures intact so that this is transparent to the
@@ -2440,7 +2440,7 @@ fail:
return;
}
-/**
+/*
* Callback received when the backend's state changes.
*/
static void blkback_changed(struct xenbus_device *dev,
diff --git a/drivers/block/xsysace.c b/drivers/block/xsysace.c
deleted file mode 100644
index eb8ef65778c3..000000000000
--- a/drivers/block/xsysace.c
+++ /dev/null
@@ -1,1273 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Xilinx SystemACE device driver
- *
- * Copyright 2007 Secret Lab Technologies Ltd.
- */
-
-/*
- * The SystemACE chip is designed to configure FPGAs by loading an FPGA
- * bitstream from a file on a CF card and squirting it into FPGAs connected
- * to the SystemACE JTAG chain. It also has the advantage of providing an
- * MPU interface which can be used to control the FPGA configuration process
- * and to use the attached CF card for general purpose storage.
- *
- * This driver is a block device driver for the SystemACE.
- *
- * Initialization:
- * The driver registers itself as a platform_device driver at module
- * load time. The platform bus will take care of calling the
- * ace_probe() method for all SystemACE instances in the system. Any
- * number of SystemACE instances are supported. ace_probe() calls
- * ace_setup() which initialized all data structures, reads the CF
- * id structure and registers the device.
- *
- * Processing:
- * Just about all of the heavy lifting in this driver is performed by
- * a Finite State Machine (FSM). The driver needs to wait on a number
- * of events; some raised by interrupts, some which need to be polled
- * for. Describing all of the behaviour in a FSM seems to be the
- * easiest way to keep the complexity low and make it easy to
- * understand what the driver is doing. If the block ops or the
- * request function need to interact with the hardware, then they
- * simply need to flag the request and kick of FSM processing.
- *
- * The FSM itself is atomic-safe code which can be run from any
- * context. The general process flow is:
- * 1. obtain the ace->lock spinlock.
- * 2. loop on ace_fsm_dostate() until the ace->fsm_continue flag is
- * cleared.
- * 3. release the lock.
- *
- * Individual states do not sleep in any way. If a condition needs to
- * be waited for then the state much clear the fsm_continue flag and
- * either schedule the FSM to be run again at a later time, or expect
- * an interrupt to call the FSM when the desired condition is met.
- *
- * In normal operation, the FSM is processed at interrupt context
- * either when the driver's tasklet is scheduled, or when an irq is
- * raised by the hardware. The tasklet can be scheduled at any time.
- * The request method in particular schedules the tasklet when a new
- * request has been indicated by the block layer. Once started, the
- * FSM proceeds as far as it can processing the request until it
- * needs on a hardware event. At this point, it must yield execution.
- *
- * A state has two options when yielding execution:
- * 1. ace_fsm_yield()
- * - Call if need to poll for event.
- * - clears the fsm_continue flag to exit the processing loop
- * - reschedules the tasklet to run again as soon as possible
- * 2. ace_fsm_yieldirq()
- * - Call if an irq is expected from the HW
- * - clears the fsm_continue flag to exit the processing loop
- * - does not reschedule the tasklet so the FSM will not be processed
- * again until an irq is received.
- * After calling a yield function, the state must return control back
- * to the FSM main loop.
- *
- * Additionally, the driver maintains a kernel timer which can process
- * the FSM. If the FSM gets stalled, typically due to a missed
- * interrupt, then the kernel timer will expire and the driver can
- * continue where it left off.
- *
- * To Do:
- * - Add FPGA configuration control interface.
- * - Request major number from lanana
- */
-
-#undef DEBUG
-
-#include <linux/module.h>
-#include <linux/ctype.h>
-#include <linux/init.h>
-#include <linux/interrupt.h>
-#include <linux/errno.h>
-#include <linux/kernel.h>
-#include <linux/delay.h>
-#include <linux/slab.h>
-#include <linux/blk-mq.h>
-#include <linux/mutex.h>
-#include <linux/ata.h>
-#include <linux/hdreg.h>
-#include <linux/platform_device.h>
-#if defined(CONFIG_OF)
-#include <linux/of_address.h>
-#include <linux/of_device.h>
-#include <linux/of_platform.h>
-#endif
-
-MODULE_AUTHOR("Grant Likely <grant.likely@secretlab.ca>");
-MODULE_DESCRIPTION("Xilinx SystemACE device driver");
-MODULE_LICENSE("GPL");
-
-/* SystemACE register definitions */
-#define ACE_BUSMODE (0x00)
-
-#define ACE_STATUS (0x04)
-#define ACE_STATUS_CFGLOCK (0x00000001)
-#define ACE_STATUS_MPULOCK (0x00000002)
-#define ACE_STATUS_CFGERROR (0x00000004) /* config controller error */
-#define ACE_STATUS_CFCERROR (0x00000008) /* CF controller error */
-#define ACE_STATUS_CFDETECT (0x00000010)
-#define ACE_STATUS_DATABUFRDY (0x00000020)
-#define ACE_STATUS_DATABUFMODE (0x00000040)
-#define ACE_STATUS_CFGDONE (0x00000080)
-#define ACE_STATUS_RDYFORCFCMD (0x00000100)
-#define ACE_STATUS_CFGMODEPIN (0x00000200)
-#define ACE_STATUS_CFGADDR_MASK (0x0000e000)
-#define ACE_STATUS_CFBSY (0x00020000)
-#define ACE_STATUS_CFRDY (0x00040000)
-#define ACE_STATUS_CFDWF (0x00080000)
-#define ACE_STATUS_CFDSC (0x00100000)
-#define ACE_STATUS_CFDRQ (0x00200000)
-#define ACE_STATUS_CFCORR (0x00400000)
-#define ACE_STATUS_CFERR (0x00800000)
-
-#define ACE_ERROR (0x08)
-#define ACE_CFGLBA (0x0c)
-#define ACE_MPULBA (0x10)
-
-#define ACE_SECCNTCMD (0x14)
-#define ACE_SECCNTCMD_RESET (0x0100)
-#define ACE_SECCNTCMD_IDENTIFY (0x0200)
-#define ACE_SECCNTCMD_READ_DATA (0x0300)
-#define ACE_SECCNTCMD_WRITE_DATA (0x0400)
-#define ACE_SECCNTCMD_ABORT (0x0600)
-
-#define ACE_VERSION (0x16)
-#define ACE_VERSION_REVISION_MASK (0x00FF)
-#define ACE_VERSION_MINOR_MASK (0x0F00)
-#define ACE_VERSION_MAJOR_MASK (0xF000)
-
-#define ACE_CTRL (0x18)
-#define ACE_CTRL_FORCELOCKREQ (0x0001)
-#define ACE_CTRL_LOCKREQ (0x0002)
-#define ACE_CTRL_FORCECFGADDR (0x0004)
-#define ACE_CTRL_FORCECFGMODE (0x0008)
-#define ACE_CTRL_CFGMODE (0x0010)
-#define ACE_CTRL_CFGSTART (0x0020)
-#define ACE_CTRL_CFGSEL (0x0040)
-#define ACE_CTRL_CFGRESET (0x0080)
-#define ACE_CTRL_DATABUFRDYIRQ (0x0100)
-#define ACE_CTRL_ERRORIRQ (0x0200)
-#define ACE_CTRL_CFGDONEIRQ (0x0400)
-#define ACE_CTRL_RESETIRQ (0x0800)
-#define ACE_CTRL_CFGPROG (0x1000)
-#define ACE_CTRL_CFGADDR_MASK (0xe000)
-
-#define ACE_FATSTAT (0x1c)
-
-#define ACE_NUM_MINORS 16
-#define ACE_SECTOR_SIZE (512)
-#define ACE_FIFO_SIZE (32)
-#define ACE_BUF_PER_SECTOR (ACE_SECTOR_SIZE / ACE_FIFO_SIZE)
-
-#define ACE_BUS_WIDTH_8 0
-#define ACE_BUS_WIDTH_16 1
-
-struct ace_reg_ops;
-
-struct ace_device {
- /* driver state data */
- int id;
- int media_change;
- int users;
- struct list_head list;
-
- /* finite state machine data */
- struct tasklet_struct fsm_tasklet;
- uint fsm_task; /* Current activity (ACE_TASK_*) */
- uint fsm_state; /* Current state (ACE_FSM_STATE_*) */
- uint fsm_continue_flag; /* cleared to exit FSM mainloop */
- uint fsm_iter_num;
- struct timer_list stall_timer;
-
- /* Transfer state/result, use for both id and block request */
- struct request *req; /* request being processed */
- void *data_ptr; /* pointer to I/O buffer */
- int data_count; /* number of buffers remaining */
- int data_result; /* Result of transfer; 0 := success */
-
- int id_req_count; /* count of id requests */
- int id_result;
- struct completion id_completion; /* used when id req finishes */
- int in_irq;
-
- /* Details of hardware device */
- resource_size_t physaddr;
- void __iomem *baseaddr;
- int irq;
- int bus_width; /* 0 := 8 bit; 1 := 16 bit */
- struct ace_reg_ops *reg_ops;
- int lock_count;
-
- /* Block device data structures */
- spinlock_t lock;
- struct device *dev;
- struct request_queue *queue;
- struct gendisk *gd;
- struct blk_mq_tag_set tag_set;
- struct list_head rq_list;
-
- /* Inserted CF card parameters */
- u16 cf_id[ATA_ID_WORDS];
-};
-
-static DEFINE_MUTEX(xsysace_mutex);
-static int ace_major;
-
-/* ---------------------------------------------------------------------
- * Low level register access
- */
-
-struct ace_reg_ops {
- u16(*in) (struct ace_device * ace, int reg);
- void (*out) (struct ace_device * ace, int reg, u16 val);
- void (*datain) (struct ace_device * ace);
- void (*dataout) (struct ace_device * ace);
-};
-
-/* 8 Bit bus width */
-static u16 ace_in_8(struct ace_device *ace, int reg)
-{
- void __iomem *r = ace->baseaddr + reg;
- return in_8(r) | (in_8(r + 1) << 8);
-}
-
-static void ace_out_8(struct ace_device *ace, int reg, u16 val)
-{
- void __iomem *r = ace->baseaddr + reg;
- out_8(r, val);
- out_8(r + 1, val >> 8);
-}
-
-static void ace_datain_8(struct ace_device *ace)
-{
- void __iomem *r = ace->baseaddr + 0x40;
- u8 *dst = ace->data_ptr;
- int i = ACE_FIFO_SIZE;
- while (i--)
- *dst++ = in_8(r++);
- ace->data_ptr = dst;
-}
-
-static void ace_dataout_8(struct ace_device *ace)
-{
- void __iomem *r = ace->baseaddr + 0x40;
- u8 *src = ace->data_ptr;
- int i = ACE_FIFO_SIZE;
- while (i--)
- out_8(r++, *src++);
- ace->data_ptr = src;
-}
-
-static struct ace_reg_ops ace_reg_8_ops = {
- .in = ace_in_8,
- .out = ace_out_8,
- .datain = ace_datain_8,
- .dataout = ace_dataout_8,
-};
-
-/* 16 bit big endian bus attachment */
-static u16 ace_in_be16(struct ace_device *ace, int reg)
-{
- return in_be16(ace->baseaddr + reg);
-}
-
-static void ace_out_be16(struct ace_device *ace, int reg, u16 val)
-{
- out_be16(ace->baseaddr + reg, val);
-}
-
-static void ace_datain_be16(struct ace_device *ace)
-{
- int i = ACE_FIFO_SIZE / 2;
- u16 *dst = ace->data_ptr;
- while (i--)
- *dst++ = in_le16(ace->baseaddr + 0x40);
- ace->data_ptr = dst;
-}
-
-static void ace_dataout_be16(struct ace_device *ace)
-{
- int i = ACE_FIFO_SIZE / 2;
- u16 *src = ace->data_ptr;
- while (i--)
- out_le16(ace->baseaddr + 0x40, *src++);
- ace->data_ptr = src;
-}
-
-/* 16 bit little endian bus attachment */
-static u16 ace_in_le16(struct ace_device *ace, int reg)
-{
- return in_le16(ace->baseaddr + reg);
-}
-
-static void ace_out_le16(struct ace_device *ace, int reg, u16 val)
-{
- out_le16(ace->baseaddr + reg, val);
-}
-
-static void ace_datain_le16(struct ace_device *ace)
-{
- int i = ACE_FIFO_SIZE / 2;
- u16 *dst = ace->data_ptr;
- while (i--)
- *dst++ = in_be16(ace->baseaddr + 0x40);
- ace->data_ptr = dst;
-}
-
-static void ace_dataout_le16(struct ace_device *ace)
-{
- int i = ACE_FIFO_SIZE / 2;
- u16 *src = ace->data_ptr;
- while (i--)
- out_be16(ace->baseaddr + 0x40, *src++);
- ace->data_ptr = src;
-}
-
-static struct ace_reg_ops ace_reg_be16_ops = {
- .in = ace_in_be16,
- .out = ace_out_be16,
- .datain = ace_datain_be16,
- .dataout = ace_dataout_be16,
-};
-
-static struct ace_reg_ops ace_reg_le16_ops = {
- .in = ace_in_le16,
- .out = ace_out_le16,
- .datain = ace_datain_le16,
- .dataout = ace_dataout_le16,
-};
-
-static inline u16 ace_in(struct ace_device *ace, int reg)
-{
- return ace->reg_ops->in(ace, reg);
-}
-
-static inline u32 ace_in32(struct ace_device *ace, int reg)
-{
- return ace_in(ace, reg) | (ace_in(ace, reg + 2) << 16);
-}
-
-static inline void ace_out(struct ace_device *ace, int reg, u16 val)
-{
- ace->reg_ops->out(ace, reg, val);
-}
-
-static inline void ace_out32(struct ace_device *ace, int reg, u32 val)
-{
- ace_out(ace, reg, val);
- ace_out(ace, reg + 2, val >> 16);
-}
-
-/* ---------------------------------------------------------------------
- * Debug support functions
- */
-
-#if defined(DEBUG)
-static void ace_dump_mem(void *base, int len)
-{
- const char *ptr = base;
- int i, j;
-
- for (i = 0; i < len; i += 16) {
- printk(KERN_INFO "%.8x:", i);
- for (j = 0; j < 16; j++) {
- if (!(j % 4))
- printk(" ");
- printk("%.2x", ptr[i + j]);
- }
- printk(" ");
- for (j = 0; j < 16; j++)
- printk("%c", isprint(ptr[i + j]) ? ptr[i + j] : '.');
- printk("\n");
- }
-}
-#else
-static inline void ace_dump_mem(void *base, int len)
-{
-}
-#endif
-
-static void ace_dump_regs(struct ace_device *ace)
-{
- dev_info(ace->dev,
- " ctrl: %.8x seccnt/cmd: %.4x ver:%.4x\n"
- " status:%.8x mpu_lba:%.8x busmode:%4x\n"
- " error: %.8x cfg_lba:%.8x fatstat:%.4x\n",
- ace_in32(ace, ACE_CTRL),
- ace_in(ace, ACE_SECCNTCMD),
- ace_in(ace, ACE_VERSION),
- ace_in32(ace, ACE_STATUS),
- ace_in32(ace, ACE_MPULBA),
- ace_in(ace, ACE_BUSMODE),
- ace_in32(ace, ACE_ERROR),
- ace_in32(ace, ACE_CFGLBA), ace_in(ace, ACE_FATSTAT));
-}
-
-static void ace_fix_driveid(u16 *id)
-{
-#if defined(__BIG_ENDIAN)
- int i;
-
- /* All half words have wrong byte order; swap the bytes */
- for (i = 0; i < ATA_ID_WORDS; i++, id++)
- *id = le16_to_cpu(*id);
-#endif
-}
-
-/* ---------------------------------------------------------------------
- * Finite State Machine (FSM) implementation
- */
-
-/* FSM tasks; used to direct state transitions */
-#define ACE_TASK_IDLE 0
-#define ACE_TASK_IDENTIFY 1
-#define ACE_TASK_READ 2
-#define ACE_TASK_WRITE 3
-#define ACE_FSM_NUM_TASKS 4
-
-/* FSM state definitions */
-#define ACE_FSM_STATE_IDLE 0
-#define ACE_FSM_STATE_REQ_LOCK 1
-#define ACE_FSM_STATE_WAIT_LOCK 2
-#define ACE_FSM_STATE_WAIT_CFREADY 3
-#define ACE_FSM_STATE_IDENTIFY_PREPARE 4
-#define ACE_FSM_STATE_IDENTIFY_TRANSFER 5
-#define ACE_FSM_STATE_IDENTIFY_COMPLETE 6
-#define ACE_FSM_STATE_REQ_PREPARE 7
-#define ACE_FSM_STATE_REQ_TRANSFER 8
-#define ACE_FSM_STATE_REQ_COMPLETE 9
-#define ACE_FSM_STATE_ERROR 10
-#define ACE_FSM_NUM_STATES 11
-
-/* Set flag to exit FSM loop and reschedule tasklet */
-static inline void ace_fsm_yieldpoll(struct ace_device *ace)
-{
- tasklet_schedule(&ace->fsm_tasklet);
- ace->fsm_continue_flag = 0;
-}
-
-static inline void ace_fsm_yield(struct ace_device *ace)
-{
- dev_dbg(ace->dev, "%s()\n", __func__);
- ace_fsm_yieldpoll(ace);
-}
-
-/* Set flag to exit FSM loop and wait for IRQ to reschedule tasklet */
-static inline void ace_fsm_yieldirq(struct ace_device *ace)
-{
- dev_dbg(ace->dev, "ace_fsm_yieldirq()\n");
-
- if (ace->irq > 0)
- ace->fsm_continue_flag = 0;
- else
- ace_fsm_yieldpoll(ace);
-}
-
-static bool ace_has_next_request(struct request_queue *q)
-{
- struct ace_device *ace = q->queuedata;
-
- return !list_empty(&ace->rq_list);
-}
-
-/* Get the next read/write request; ending requests that we don't handle */
-static struct request *ace_get_next_request(struct request_queue *q)
-{
- struct ace_device *ace = q->queuedata;
- struct request *rq;
-
- rq = list_first_entry_or_null(&ace->rq_list, struct request, queuelist);
- if (rq) {
- list_del_init(&rq->queuelist);
- blk_mq_start_request(rq);
- }
-
- return NULL;
-}
-
-static void ace_fsm_dostate(struct ace_device *ace)
-{
- struct request *req;
- u32 status;
- u16 val;
- int count;
-
-#if defined(DEBUG)
- dev_dbg(ace->dev, "fsm_state=%i, id_req_count=%i\n",
- ace->fsm_state, ace->id_req_count);
-#endif
-
- /* Verify that there is actually a CF in the slot. If not, then
- * bail out back to the idle state and wake up all the waiters */
- status = ace_in32(ace, ACE_STATUS);
- if ((status & ACE_STATUS_CFDETECT) == 0) {
- ace->fsm_state = ACE_FSM_STATE_IDLE;
- ace->media_change = 1;
- set_capacity(ace->gd, 0);
- dev_info(ace->dev, "No CF in slot\n");
-
- /* Drop all in-flight and pending requests */
- if (ace->req) {
- blk_mq_end_request(ace->req, BLK_STS_IOERR);
- ace->req = NULL;
- }
- while ((req = ace_get_next_request(ace->queue)) != NULL)
- blk_mq_end_request(req, BLK_STS_IOERR);
-
- /* Drop back to IDLE state and notify waiters */
- ace->fsm_state = ACE_FSM_STATE_IDLE;
- ace->id_result = -EIO;
- while (ace->id_req_count) {
- complete(&ace->id_completion);
- ace->id_req_count--;
- }
- }
-
- switch (ace->fsm_state) {
- case ACE_FSM_STATE_IDLE:
- /* See if there is anything to do */
- if (ace->id_req_count || ace_has_next_request(ace->queue)) {
- ace->fsm_iter_num++;
- ace->fsm_state = ACE_FSM_STATE_REQ_LOCK;
- mod_timer(&ace->stall_timer, jiffies + HZ);
- if (!timer_pending(&ace->stall_timer))
- add_timer(&ace->stall_timer);
- break;
- }
- del_timer(&ace->stall_timer);
- ace->fsm_continue_flag = 0;
- break;
-
- case ACE_FSM_STATE_REQ_LOCK:
- if (ace_in(ace, ACE_STATUS) & ACE_STATUS_MPULOCK) {
- /* Already have the lock, jump to next state */
- ace->fsm_state = ACE_FSM_STATE_WAIT_CFREADY;
- break;
- }
-
- /* Request the lock */
- val = ace_in(ace, ACE_CTRL);
- ace_out(ace, ACE_CTRL, val | ACE_CTRL_LOCKREQ);
- ace->fsm_state = ACE_FSM_STATE_WAIT_LOCK;
- break;
-
- case ACE_FSM_STATE_WAIT_LOCK:
- if (ace_in(ace, ACE_STATUS) & ACE_STATUS_MPULOCK) {
- /* got the lock; move to next state */
- ace->fsm_state = ACE_FSM_STATE_WAIT_CFREADY;
- break;
- }
-
- /* wait a bit for the lock */
- ace_fsm_yield(ace);
- break;
-
- case ACE_FSM_STATE_WAIT_CFREADY:
- status = ace_in32(ace, ACE_STATUS);
- if (!(status & ACE_STATUS_RDYFORCFCMD) ||
- (status & ACE_STATUS_CFBSY)) {
- /* CF card isn't ready; it needs to be polled */
- ace_fsm_yield(ace);
- break;
- }
-
- /* Device is ready for command; determine what to do next */
- if (ace->id_req_count)
- ace->fsm_state = ACE_FSM_STATE_IDENTIFY_PREPARE;
- else
- ace->fsm_state = ACE_FSM_STATE_REQ_PREPARE;
- break;
-
- case ACE_FSM_STATE_IDENTIFY_PREPARE:
- /* Send identify command */
- ace->fsm_task = ACE_TASK_IDENTIFY;
- ace->data_ptr = ace->cf_id;
- ace->data_count = ACE_BUF_PER_SECTOR;
- ace_out(ace, ACE_SECCNTCMD, ACE_SECCNTCMD_IDENTIFY);
-
- /* As per datasheet, put config controller in reset */
- val = ace_in(ace, ACE_CTRL);
- ace_out(ace, ACE_CTRL, val | ACE_CTRL_CFGRESET);
-
- /* irq handler takes over from this point; wait for the
- * transfer to complete */
- ace->fsm_state = ACE_FSM_STATE_IDENTIFY_TRANSFER;
- ace_fsm_yieldirq(ace);
- break;
-
- case ACE_FSM_STATE_IDENTIFY_TRANSFER:
- /* Check that the sysace is ready to receive data */
- status = ace_in32(ace, ACE_STATUS);
- if (status & ACE_STATUS_CFBSY) {
- dev_dbg(ace->dev, "CFBSY set; t=%i iter=%i dc=%i\n",
- ace->fsm_task, ace->fsm_iter_num,
- ace->data_count);
- ace_fsm_yield(ace);
- break;
- }
- if (!(status & ACE_STATUS_DATABUFRDY)) {
- ace_fsm_yield(ace);
- break;
- }
-
- /* Transfer the next buffer */
- ace->reg_ops->datain(ace);
- ace->data_count--;
-
- /* If there are still buffers to be transfers; jump out here */
- if (ace->data_count != 0) {
- ace_fsm_yieldirq(ace);
- break;
- }
-
- /* transfer finished; kick state machine */
- dev_dbg(ace->dev, "identify finished\n");
- ace->fsm_state = ACE_FSM_STATE_IDENTIFY_COMPLETE;
- break;
-
- case ACE_FSM_STATE_IDENTIFY_COMPLETE:
- ace_fix_driveid(ace->cf_id);
- ace_dump_mem(ace->cf_id, 512); /* Debug: Dump out disk ID */
-
- if (ace->data_result) {
- /* Error occurred, disable the disk */
- ace->media_change = 1;
- set_capacity(ace->gd, 0);
- dev_err(ace->dev, "error fetching CF id (%i)\n",
- ace->data_result);
- } else {
- ace->media_change = 0;
-
- /* Record disk parameters */
- set_capacity(ace->gd,
- ata_id_u32(ace->cf_id, ATA_ID_LBA_CAPACITY));
- dev_info(ace->dev, "capacity: %i sectors\n",
- ata_id_u32(ace->cf_id, ATA_ID_LBA_CAPACITY));
- }
-
- /* We're done, drop to IDLE state and notify waiters */
- ace->fsm_state = ACE_FSM_STATE_IDLE;
- ace->id_result = ace->data_result;
- while (ace->id_req_count) {
- complete(&ace->id_completion);
- ace->id_req_count--;
- }
- break;
-
- case ACE_FSM_STATE_REQ_PREPARE:
- req = ace_get_next_request(ace->queue);
- if (!req) {
- ace->fsm_state = ACE_FSM_STATE_IDLE;
- break;
- }
-
- /* Okay, it's a data request, set it up for transfer */
- dev_dbg(ace->dev,
- "request: sec=%llx hcnt=%x, ccnt=%x, dir=%i\n",
- (unsigned long long)blk_rq_pos(req),
- blk_rq_sectors(req), blk_rq_cur_sectors(req),
- rq_data_dir(req));
-
- ace->req = req;
- ace->data_ptr = bio_data(req->bio);
- ace->data_count = blk_rq_cur_sectors(req) * ACE_BUF_PER_SECTOR;
- ace_out32(ace, ACE_MPULBA, blk_rq_pos(req) & 0x0FFFFFFF);
-
- count = blk_rq_sectors(req);
- if (rq_data_dir(req)) {
- /* Kick off write request */
- dev_dbg(ace->dev, "write data\n");
- ace->fsm_task = ACE_TASK_WRITE;
- ace_out(ace, ACE_SECCNTCMD,
- count | ACE_SECCNTCMD_WRITE_DATA);
- } else {
- /* Kick off read request */
- dev_dbg(ace->dev, "read data\n");
- ace->fsm_task = ACE_TASK_READ;
- ace_out(ace, ACE_SECCNTCMD,
- count | ACE_SECCNTCMD_READ_DATA);
- }
-
- /* As per datasheet, put config controller in reset */
- val = ace_in(ace, ACE_CTRL);
- ace_out(ace, ACE_CTRL, val | ACE_CTRL_CFGRESET);
-
- /* Move to the transfer state. The systemace will raise
- * an interrupt once there is something to do
- */
- ace->fsm_state = ACE_FSM_STATE_REQ_TRANSFER;
- if (ace->fsm_task == ACE_TASK_READ)
- ace_fsm_yieldirq(ace); /* wait for data ready */
- break;
-
- case ACE_FSM_STATE_REQ_TRANSFER:
- /* Check that the sysace is ready to receive data */
- status = ace_in32(ace, ACE_STATUS);
- if (status & ACE_STATUS_CFBSY) {
- dev_dbg(ace->dev,
- "CFBSY set; t=%i iter=%i c=%i dc=%i irq=%i\n",
- ace->fsm_task, ace->fsm_iter_num,
- blk_rq_cur_sectors(ace->req) * 16,
- ace->data_count, ace->in_irq);
- ace_fsm_yield(ace); /* need to poll CFBSY bit */
- break;
- }
- if (!(status & ACE_STATUS_DATABUFRDY)) {
- dev_dbg(ace->dev,
- "DATABUF not set; t=%i iter=%i c=%i dc=%i irq=%i\n",
- ace->fsm_task, ace->fsm_iter_num,
- blk_rq_cur_sectors(ace->req) * 16,
- ace->data_count, ace->in_irq);
- ace_fsm_yieldirq(ace);
- break;
- }
-
- /* Transfer the next buffer */
- if (ace->fsm_task == ACE_TASK_WRITE)
- ace->reg_ops->dataout(ace);
- else
- ace->reg_ops->datain(ace);
- ace->data_count--;
-
- /* If there are still buffers to be transfers; jump out here */
- if (ace->data_count != 0) {
- ace_fsm_yieldirq(ace);
- break;
- }
-
- /* bio finished; is there another one? */
- if (blk_update_request(ace->req, BLK_STS_OK,
- blk_rq_cur_bytes(ace->req))) {
- /* dev_dbg(ace->dev, "next block; h=%u c=%u\n",
- * blk_rq_sectors(ace->req),
- * blk_rq_cur_sectors(ace->req));
- */
- ace->data_ptr = bio_data(ace->req->bio);
- ace->data_count = blk_rq_cur_sectors(ace->req) * 16;
- ace_fsm_yieldirq(ace);
- break;
- }
-
- ace->fsm_state = ACE_FSM_STATE_REQ_COMPLETE;
- break;
-
- case ACE_FSM_STATE_REQ_COMPLETE:
- ace->req = NULL;
-
- /* Finished request; go to idle state */
- ace->fsm_state = ACE_FSM_STATE_IDLE;
- break;
-
- default:
- ace->fsm_state = ACE_FSM_STATE_IDLE;
- break;
- }
-}
-
-static void ace_fsm_tasklet(unsigned long data)
-{
- struct ace_device *ace = (void *)data;
- unsigned long flags;
-
- spin_lock_irqsave(&ace->lock, flags);
-
- /* Loop over state machine until told to stop */
- ace->fsm_continue_flag = 1;
- while (ace->fsm_continue_flag)
- ace_fsm_dostate(ace);
-
- spin_unlock_irqrestore(&ace->lock, flags);
-}
-
-static void ace_stall_timer(struct timer_list *t)
-{
- struct ace_device *ace = from_timer(ace, t, stall_timer);
- unsigned long flags;
-
- dev_warn(ace->dev,
- "kicking stalled fsm; state=%i task=%i iter=%i dc=%i\n",
- ace->fsm_state, ace->fsm_task, ace->fsm_iter_num,
- ace->data_count);
- spin_lock_irqsave(&ace->lock, flags);
-
- /* Rearm the stall timer *before* entering FSM (which may then
- * delete the timer) */
- mod_timer(&ace->stall_timer, jiffies + HZ);
-
- /* Loop over state machine until told to stop */
- ace->fsm_continue_flag = 1;
- while (ace->fsm_continue_flag)
- ace_fsm_dostate(ace);
-
- spin_unlock_irqrestore(&ace->lock, flags);
-}
-
-/* ---------------------------------------------------------------------
- * Interrupt handling routines
- */
-static int ace_interrupt_checkstate(struct ace_device *ace)
-{
- u32 sreg = ace_in32(ace, ACE_STATUS);
- u16 creg = ace_in(ace, ACE_CTRL);
-
- /* Check for error occurrence */
- if ((sreg & (ACE_STATUS_CFGERROR | ACE_STATUS_CFCERROR)) &&
- (creg & ACE_CTRL_ERRORIRQ)) {
- dev_err(ace->dev, "transfer failure\n");
- ace_dump_regs(ace);
- return -EIO;
- }
-
- return 0;
-}
-
-static irqreturn_t ace_interrupt(int irq, void *dev_id)
-{
- u16 creg;
- struct ace_device *ace = dev_id;
-
- /* be safe and get the lock */
- spin_lock(&ace->lock);
- ace->in_irq = 1;
-
- /* clear the interrupt */
- creg = ace_in(ace, ACE_CTRL);
- ace_out(ace, ACE_CTRL, creg | ACE_CTRL_RESETIRQ);
- ace_out(ace, ACE_CTRL, creg);
-
- /* check for IO failures */
- if (ace_interrupt_checkstate(ace))
- ace->data_result = -EIO;
-
- if (ace->fsm_task == 0) {
- dev_err(ace->dev,
- "spurious irq; stat=%.8x ctrl=%.8x cmd=%.4x\n",
- ace_in32(ace, ACE_STATUS), ace_in32(ace, ACE_CTRL),
- ace_in(ace, ACE_SECCNTCMD));
- dev_err(ace->dev, "fsm_task=%i fsm_state=%i data_count=%i\n",
- ace->fsm_task, ace->fsm_state, ace->data_count);
- }
-
- /* Loop over state machine until told to stop */
- ace->fsm_continue_flag = 1;
- while (ace->fsm_continue_flag)
- ace_fsm_dostate(ace);
-
- /* done with interrupt; drop the lock */
- ace->in_irq = 0;
- spin_unlock(&ace->lock);
-
- return IRQ_HANDLED;
-}
-
-/* ---------------------------------------------------------------------
- * Block ops
- */
-static blk_status_t ace_queue_rq(struct blk_mq_hw_ctx *hctx,
- const struct blk_mq_queue_data *bd)
-{
- struct ace_device *ace = hctx->queue->queuedata;
- struct request *req = bd->rq;
-
- if (blk_rq_is_passthrough(req)) {
- blk_mq_start_request(req);
- return BLK_STS_IOERR;
- }
-
- spin_lock_irq(&ace->lock);
- list_add_tail(&req->queuelist, &ace->rq_list);
- spin_unlock_irq(&ace->lock);
-
- tasklet_schedule(&ace->fsm_tasklet);
- return BLK_STS_OK;
-}
-
-static unsigned int ace_check_events(struct gendisk *gd, unsigned int clearing)
-{
- struct ace_device *ace = gd->private_data;
- dev_dbg(ace->dev, "ace_check_events(): %i\n", ace->media_change);
-
- return ace->media_change ? DISK_EVENT_MEDIA_CHANGE : 0;
-}
-
-static void ace_media_changed(struct ace_device *ace)
-{
- unsigned long flags;
-
- dev_dbg(ace->dev, "requesting cf id and scheduling tasklet\n");
-
- spin_lock_irqsave(&ace->lock, flags);
- ace->id_req_count++;
- spin_unlock_irqrestore(&ace->lock, flags);
-
- tasklet_schedule(&ace->fsm_tasklet);
- wait_for_completion(&ace->id_completion);
-
- dev_dbg(ace->dev, "revalidate complete\n");
-}
-
-static int ace_open(struct block_device *bdev, fmode_t mode)
-{
- struct ace_device *ace = bdev->bd_disk->private_data;
- unsigned long flags;
-
- dev_dbg(ace->dev, "ace_open() users=%i\n", ace->users + 1);
-
- mutex_lock(&xsysace_mutex);
- spin_lock_irqsave(&ace->lock, flags);
- ace->users++;
- spin_unlock_irqrestore(&ace->lock, flags);
-
- if (bdev_check_media_change(bdev) && ace->media_change)
- ace_media_changed(ace);
- mutex_unlock(&xsysace_mutex);
-
- return 0;
-}
-
-static void ace_release(struct gendisk *disk, fmode_t mode)
-{
- struct ace_device *ace = disk->private_data;
- unsigned long flags;
- u16 val;
-
- dev_dbg(ace->dev, "ace_release() users=%i\n", ace->users - 1);
-
- mutex_lock(&xsysace_mutex);
- spin_lock_irqsave(&ace->lock, flags);
- ace->users--;
- if (ace->users == 0) {
- val = ace_in(ace, ACE_CTRL);
- ace_out(ace, ACE_CTRL, val & ~ACE_CTRL_LOCKREQ);
- }
- spin_unlock_irqrestore(&ace->lock, flags);
- mutex_unlock(&xsysace_mutex);
-}
-
-static int ace_getgeo(struct block_device *bdev, struct hd_geometry *geo)
-{
- struct ace_device *ace = bdev->bd_disk->private_data;
- u16 *cf_id = ace->cf_id;
-
- dev_dbg(ace->dev, "ace_getgeo()\n");
-
- geo->heads = cf_id[ATA_ID_HEADS];
- geo->sectors = cf_id[ATA_ID_SECTORS];
- geo->cylinders = cf_id[ATA_ID_CYLS];
-
- return 0;
-}
-
-static const struct block_device_operations ace_fops = {
- .owner = THIS_MODULE,
- .open = ace_open,
- .release = ace_release,
- .check_events = ace_check_events,
- .getgeo = ace_getgeo,
-};
-
-static const struct blk_mq_ops ace_mq_ops = {
- .queue_rq = ace_queue_rq,
-};
-
-/* --------------------------------------------------------------------
- * SystemACE device setup/teardown code
- */
-static int ace_setup(struct ace_device *ace)
-{
- u16 version;
- u16 val;
- int rc;
-
- dev_dbg(ace->dev, "ace_setup(ace=0x%p)\n", ace);
- dev_dbg(ace->dev, "physaddr=0x%llx irq=%i\n",
- (unsigned long long)ace->physaddr, ace->irq);
-
- spin_lock_init(&ace->lock);
- init_completion(&ace->id_completion);
- INIT_LIST_HEAD(&ace->rq_list);
-
- /*
- * Map the device
- */
- ace->baseaddr = ioremap(ace->physaddr, 0x80);
- if (!ace->baseaddr)
- goto err_ioremap;
-
- /*
- * Initialize the state machine tasklet and stall timer
- */
- tasklet_init(&ace->fsm_tasklet, ace_fsm_tasklet, (unsigned long)ace);
- timer_setup(&ace->stall_timer, ace_stall_timer, 0);
-
- /*
- * Initialize the request queue
- */
- ace->queue = blk_mq_init_sq_queue(&ace->tag_set, &ace_mq_ops, 2,
- BLK_MQ_F_SHOULD_MERGE);
- if (IS_ERR(ace->queue)) {
- rc = PTR_ERR(ace->queue);
- ace->queue = NULL;
- goto err_blk_initq;
- }
- ace->queue->queuedata = ace;
-
- blk_queue_logical_block_size(ace->queue, 512);
- blk_queue_bounce_limit(ace->queue, BLK_BOUNCE_HIGH);
-
- /*
- * Allocate and initialize GD structure
- */
- ace->gd = alloc_disk(ACE_NUM_MINORS);
- if (!ace->gd)
- goto err_alloc_disk;
-
- ace->gd->major = ace_major;
- ace->gd->first_minor = ace->id * ACE_NUM_MINORS;
- ace->gd->fops = &ace_fops;
- ace->gd->events = DISK_EVENT_MEDIA_CHANGE;
- ace->gd->queue = ace->queue;
- ace->gd->private_data = ace;
- snprintf(ace->gd->disk_name, 32, "xs%c", ace->id + 'a');
-
- /* set bus width */
- if (ace->bus_width == ACE_BUS_WIDTH_16) {
- /* 0x0101 should work regardless of endianess */
- ace_out_le16(ace, ACE_BUSMODE, 0x0101);
-
- /* read it back to determine endianess */
- if (ace_in_le16(ace, ACE_BUSMODE) == 0x0001)
- ace->reg_ops = &ace_reg_le16_ops;
- else
- ace->reg_ops = &ace_reg_be16_ops;
- } else {
- ace_out_8(ace, ACE_BUSMODE, 0x00);
- ace->reg_ops = &ace_reg_8_ops;
- }
-
- /* Make sure version register is sane */
- version = ace_in(ace, ACE_VERSION);
- if ((version == 0) || (version == 0xFFFF))
- goto err_read;
-
- /* Put sysace in a sane state by clearing most control reg bits */
- ace_out(ace, ACE_CTRL, ACE_CTRL_FORCECFGMODE |
- ACE_CTRL_DATABUFRDYIRQ | ACE_CTRL_ERRORIRQ);
-
- /* Now we can hook up the irq handler */
- if (ace->irq > 0) {
- rc = request_irq(ace->irq, ace_interrupt, 0, "systemace", ace);
- if (rc) {
- /* Failure - fall back to polled mode */
- dev_err(ace->dev, "request_irq failed\n");
- ace->irq = rc;
- }
- }
-
- /* Enable interrupts */
- val = ace_in(ace, ACE_CTRL);
- val |= ACE_CTRL_DATABUFRDYIRQ | ACE_CTRL_ERRORIRQ;
- ace_out(ace, ACE_CTRL, val);
-
- /* Print the identification */
- dev_info(ace->dev, "Xilinx SystemACE revision %i.%i.%i\n",
- (version >> 12) & 0xf, (version >> 8) & 0x0f, version & 0xff);
- dev_dbg(ace->dev, "physaddr 0x%llx, mapped to 0x%p, irq=%i\n",
- (unsigned long long) ace->physaddr, ace->baseaddr, ace->irq);
-
- ace->media_change = 1;
- ace_media_changed(ace);
-
- /* Make the sysace device 'live' */
- add_disk(ace->gd);
-
- return 0;
-
-err_read:
- /* prevent double queue cleanup */
- ace->gd->queue = NULL;
- put_disk(ace->gd);
-err_alloc_disk:
- blk_cleanup_queue(ace->queue);
- blk_mq_free_tag_set(&ace->tag_set);
-err_blk_initq:
- iounmap(ace->baseaddr);
-err_ioremap:
- dev_info(ace->dev, "xsysace: error initializing device at 0x%llx\n",
- (unsigned long long) ace->physaddr);
- return -ENOMEM;
-}
-
-static void ace_teardown(struct ace_device *ace)
-{
- if (ace->gd) {
- del_gendisk(ace->gd);
- put_disk(ace->gd);
- }
-
- if (ace->queue) {
- blk_cleanup_queue(ace->queue);
- blk_mq_free_tag_set(&ace->tag_set);
- }
-
- tasklet_kill(&ace->fsm_tasklet);
-
- if (ace->irq > 0)
- free_irq(ace->irq, ace);
-
- iounmap(ace->baseaddr);
-}
-
-static int ace_alloc(struct device *dev, int id, resource_size_t physaddr,
- int irq, int bus_width)
-{
- struct ace_device *ace;
- int rc;
- dev_dbg(dev, "ace_alloc(%p)\n", dev);
-
- /* Allocate and initialize the ace device structure */
- ace = kzalloc(sizeof(struct ace_device), GFP_KERNEL);
- if (!ace) {
- rc = -ENOMEM;
- goto err_alloc;
- }
-
- ace->dev = dev;
- ace->id = id;
- ace->physaddr = physaddr;
- ace->irq = irq;
- ace->bus_width = bus_width;
-
- /* Call the setup code */
- rc = ace_setup(ace);
- if (rc)
- goto err_setup;
-
- dev_set_drvdata(dev, ace);
- return 0;
-
-err_setup:
- dev_set_drvdata(dev, NULL);
- kfree(ace);
-err_alloc:
- dev_err(dev, "could not initialize device, err=%i\n", rc);
- return rc;
-}
-
-static void ace_free(struct device *dev)
-{
- struct ace_device *ace = dev_get_drvdata(dev);
- dev_dbg(dev, "ace_free(%p)\n", dev);
-
- if (ace) {
- ace_teardown(ace);
- dev_set_drvdata(dev, NULL);
- kfree(ace);
- }
-}
-
-/* ---------------------------------------------------------------------
- * Platform Bus Support
- */
-
-static int ace_probe(struct platform_device *dev)
-{
- int bus_width = ACE_BUS_WIDTH_16; /* FIXME: should not be hard coded */
- resource_size_t physaddr;
- struct resource *res;
- u32 id = dev->id;
- int irq;
- int i;
-
- dev_dbg(&dev->dev, "ace_probe(%p)\n", dev);
-
- /* device id and bus width */
- if (of_property_read_u32(dev->dev.of_node, "port-number", &id))
- id = 0;
- if (of_find_property(dev->dev.of_node, "8-bit", NULL))
- bus_width = ACE_BUS_WIDTH_8;
-
- res = platform_get_resource(dev, IORESOURCE_MEM, 0);
- if (!res)
- return -EINVAL;
-
- physaddr = res->start;
- if (!physaddr)
- return -ENODEV;
-
- irq = platform_get_irq_optional(dev, 0);
-
- /* Call the bus-independent setup code */
- return ace_alloc(&dev->dev, id, physaddr, irq, bus_width);
-}
-
-/*
- * Platform bus remove() method
- */
-static int ace_remove(struct platform_device *dev)
-{
- ace_free(&dev->dev);
- return 0;
-}
-
-#if defined(CONFIG_OF)
-/* Match table for of_platform binding */
-static const struct of_device_id ace_of_match[] = {
- { .compatible = "xlnx,opb-sysace-1.00.b", },
- { .compatible = "xlnx,opb-sysace-1.00.c", },
- { .compatible = "xlnx,xps-sysace-1.00.a", },
- { .compatible = "xlnx,sysace", },
- {},
-};
-MODULE_DEVICE_TABLE(of, ace_of_match);
-#else /* CONFIG_OF */
-#define ace_of_match NULL
-#endif /* CONFIG_OF */
-
-static struct platform_driver ace_platform_driver = {
- .probe = ace_probe,
- .remove = ace_remove,
- .driver = {
- .name = "xsysace",
- .of_match_table = ace_of_match,
- },
-};
-
-/* ---------------------------------------------------------------------
- * Module init/exit routines
- */
-static int __init ace_init(void)
-{
- int rc;
-
- ace_major = register_blkdev(ace_major, "xsysace");
- if (ace_major <= 0) {
- rc = -ENOMEM;
- goto err_blk;
- }
-
- rc = platform_driver_register(&ace_platform_driver);
- if (rc)
- goto err_plat;
-
- pr_info("Xilinx SystemACE device driver, major=%i\n", ace_major);
- return 0;
-
-err_plat:
- unregister_blkdev(ace_major, "xsysace");
-err_blk:
- printk(KERN_ERR "xsysace: registration failed; err=%i\n", rc);
- return rc;
-}
-module_init(ace_init);
-
-static void __exit ace_exit(void)
-{
- pr_debug("Unregistering Xilinx SystemACE driver\n");
- platform_driver_unregister(&ace_platform_driver);
- unregister_blkdev(ace_major, "xsysace");
-}
-module_exit(ace_exit);
diff --git a/drivers/cdrom/gdrom.c b/drivers/cdrom/gdrom.c
index 9874fc1c815b..742b4a0932e3 100644
--- a/drivers/cdrom/gdrom.c
+++ b/drivers/cdrom/gdrom.c
@@ -583,7 +583,8 @@ static blk_status_t gdrom_readdisk_dma(struct request *req)
read_command->cmd[1] = 0x20;
block = blk_rq_pos(req)/GD_TO_BLK + GD_SESSION_OFFSET;
block_cnt = blk_rq_sectors(req)/GD_TO_BLK;
- __raw_writel(virt_to_phys(bio_data(req->bio)), GDROM_DMA_STARTADDR_REG);
+ __raw_writel(page_to_phys(bio_page(req->bio)) + bio_offset(req->bio),
+ GDROM_DMA_STARTADDR_REG);
__raw_writel(block_cnt * GDROM_HARD_SECTOR, GDROM_DMA_LENGTH_REG);
__raw_writel(1, GDROM_DMA_DIRECTION_REG);
__raw_writel(1, GDROM_DMA_ENABLE_REG);
@@ -789,8 +790,6 @@ static int probe_gdrom(struct platform_device *devptr)
goto probe_fail_requestq;
}
- blk_queue_bounce_limit(gd.gdrom_rq, BLK_BOUNCE_HIGH);
-
err = probe_gdrom_setupqueue();
if (err)
goto probe_fail_toc;
diff --git a/drivers/infiniband/ulp/rtrs/rtrs-clt.c b/drivers/infiniband/ulp/rtrs/rtrs-clt.c
index 6734329cca33..b74a872387c4 100644
--- a/drivers/infiniband/ulp/rtrs/rtrs-clt.c
+++ b/drivers/infiniband/ulp/rtrs/rtrs-clt.c
@@ -103,11 +103,11 @@ static inline void __rtrs_put_permit(struct rtrs_clt *clt,
* up earlier.
*
* Context:
- * Can sleep if @wait == RTRS_TAG_WAIT
+ * Can sleep if @wait == RTRS_PERMIT_WAIT
*/
struct rtrs_permit *rtrs_clt_get_permit(struct rtrs_clt *clt,
enum rtrs_clt_con_type con_type,
- int can_wait)
+ enum wait_type can_wait)
{
struct rtrs_permit *permit;
DEFINE_WAIT(wait);
@@ -174,7 +174,7 @@ struct rtrs_clt_con *rtrs_permit_to_clt_con(struct rtrs_clt_sess *sess,
int id = 0;
if (likely(permit->con_type == RTRS_IO_CON))
- id = (permit->cpu_id % (sess->s.con_num - 1)) + 1;
+ id = (permit->cpu_id % (sess->s.irq_con_num - 1)) + 1;
return to_clt_con(sess->s.con[id]);
}
@@ -1400,23 +1400,29 @@ static void rtrs_clt_close_work(struct work_struct *work);
static struct rtrs_clt_sess *alloc_sess(struct rtrs_clt *clt,
const struct rtrs_addr *path,
size_t con_num, u16 max_segments,
- size_t max_segment_size)
+ u32 nr_poll_queues)
{
struct rtrs_clt_sess *sess;
int err = -ENOMEM;
int cpu;
+ size_t total_con;
sess = kzalloc(sizeof(*sess), GFP_KERNEL);
if (!sess)
goto err;
- /* Extra connection for user messages */
- con_num += 1;
-
- sess->s.con = kcalloc(con_num, sizeof(*sess->s.con), GFP_KERNEL);
+ /*
+ * irqmode and poll
+ * +1: Extra connection for user messages
+ */
+ total_con = con_num + nr_poll_queues + 1;
+ sess->s.con = kcalloc(total_con, sizeof(*sess->s.con), GFP_KERNEL);
if (!sess->s.con)
goto err_free_sess;
+ sess->s.con_num = total_con;
+ sess->s.irq_con_num = con_num + 1;
+
sess->stats = kzalloc(sizeof(*sess->stats), GFP_KERNEL);
if (!sess->stats)
goto err_free_con;
@@ -1435,9 +1441,8 @@ static struct rtrs_clt_sess *alloc_sess(struct rtrs_clt *clt,
memcpy(&sess->s.src_addr, path->src,
rdma_addr_size((struct sockaddr *)path->src));
strlcpy(sess->s.sessname, clt->sessname, sizeof(sess->s.sessname));
- sess->s.con_num = con_num;
sess->clt = clt;
- sess->max_pages_per_mr = max_segments * max_segment_size >> 12;
+ sess->max_pages_per_mr = max_segments;
init_waitqueue_head(&sess->state_wq);
sess->state = RTRS_CLT_CONNECTING;
atomic_set(&sess->connected_cnt, 0);
@@ -1576,9 +1581,14 @@ static int create_con_cq_qp(struct rtrs_clt_con *con)
}
cq_size = max_send_wr + max_recv_wr;
cq_vector = con->cpu % sess->s.dev->ib_dev->num_comp_vectors;
- err = rtrs_cq_qp_create(&sess->s, &con->c, sess->max_send_sge,
- cq_vector, cq_size, max_send_wr,
- max_recv_wr, IB_POLL_SOFTIRQ);
+ if (con->c.cid >= sess->s.irq_con_num)
+ err = rtrs_cq_qp_create(&sess->s, &con->c, sess->max_send_sge,
+ cq_vector, cq_size, max_send_wr,
+ max_recv_wr, IB_POLL_DIRECT);
+ else
+ err = rtrs_cq_qp_create(&sess->s, &con->c, sess->max_send_sge,
+ cq_vector, cq_size, max_send_wr,
+ max_recv_wr, IB_POLL_SOFTIRQ);
/*
* In case of error we do not bother to clean previous allocations,
* since destroy_con_cq_qp() must be called.
@@ -2528,7 +2538,6 @@ static struct rtrs_clt *alloc_clt(const char *sessname, size_t paths_num,
void (*link_ev)(void *priv,
enum rtrs_clt_link_ev ev),
unsigned int max_segments,
- size_t max_segment_size,
unsigned int reconnect_delay_sec,
unsigned int max_reconnect_attempts)
{
@@ -2558,7 +2567,6 @@ static struct rtrs_clt *alloc_clt(const char *sessname, size_t paths_num,
clt->port = port;
clt->pdu_sz = pdu_sz;
clt->max_segments = max_segments;
- clt->max_segment_size = max_segment_size;
clt->reconnect_delay_sec = reconnect_delay_sec;
clt->max_reconnect_attempts = max_reconnect_attempts;
clt->priv = priv;
@@ -2628,9 +2636,9 @@ static void free_clt(struct rtrs_clt *clt)
* @pdu_sz: Size of extra payload which can be accessed after permit allocation.
* @reconnect_delay_sec: time between reconnect tries
* @max_segments: Max. number of segments per IO request
- * @max_segment_size: Max. size of one segment
* @max_reconnect_attempts: Number of times to reconnect on error before giving
* up, 0 for * disabled, -1 for forever
+ * @nr_poll_queues: number of polling mode connection using IB_POLL_DIRECT flag
*
* Starts session establishment with the rtrs_server. The function can block
* up to ~2000ms before it returns.
@@ -2643,8 +2651,7 @@ struct rtrs_clt *rtrs_clt_open(struct rtrs_clt_ops *ops,
size_t paths_num, u16 port,
size_t pdu_sz, u8 reconnect_delay_sec,
u16 max_segments,
- size_t max_segment_size,
- s16 max_reconnect_attempts)
+ s16 max_reconnect_attempts, u32 nr_poll_queues)
{
struct rtrs_clt_sess *sess, *tmp;
struct rtrs_clt *clt;
@@ -2652,7 +2659,7 @@ struct rtrs_clt *rtrs_clt_open(struct rtrs_clt_ops *ops,
clt = alloc_clt(sessname, paths_num, port, pdu_sz, ops->priv,
ops->link_ev,
- max_segments, max_segment_size, reconnect_delay_sec,
+ max_segments, reconnect_delay_sec,
max_reconnect_attempts);
if (IS_ERR(clt)) {
err = PTR_ERR(clt);
@@ -2662,7 +2669,7 @@ struct rtrs_clt *rtrs_clt_open(struct rtrs_clt_ops *ops,
struct rtrs_clt_sess *sess;
sess = alloc_sess(clt, &paths[i], nr_cpu_ids,
- max_segments, max_segment_size);
+ max_segments, nr_poll_queues);
if (IS_ERR(sess)) {
err = PTR_ERR(sess);
goto close_all_sess;
@@ -2887,6 +2894,31 @@ int rtrs_clt_request(int dir, struct rtrs_clt_req_ops *ops,
}
EXPORT_SYMBOL(rtrs_clt_request);
+int rtrs_clt_rdma_cq_direct(struct rtrs_clt *clt, unsigned int index)
+{
+ int cnt;
+ struct rtrs_con *con;
+ struct rtrs_clt_sess *sess;
+ struct path_it it;
+
+ rcu_read_lock();
+ for (path_it_init(&it, clt);
+ (sess = it.next_path(&it)) && it.i < it.clt->paths_num; it.i++) {
+ if (READ_ONCE(sess->state) != RTRS_CLT_CONNECTED)
+ continue;
+
+ con = sess->s.con[index + 1];
+ cnt = ib_process_cq_direct(con->cq, -1);
+ if (cnt)
+ break;
+ }
+ path_it_deinit(&it);
+ rcu_read_unlock();
+
+ return cnt;
+}
+EXPORT_SYMBOL(rtrs_clt_rdma_cq_direct);
+
/**
* rtrs_clt_query() - queries RTRS session attributes
*@clt: session pointer
@@ -2915,8 +2947,7 @@ int rtrs_clt_create_path_from_sysfs(struct rtrs_clt *clt,
struct rtrs_clt_sess *sess;
int err;
- sess = alloc_sess(clt, addr, nr_cpu_ids, clt->max_segments,
- clt->max_segment_size);
+ sess = alloc_sess(clt, addr, nr_cpu_ids, clt->max_segments, 0);
if (IS_ERR(sess))
return PTR_ERR(sess);
diff --git a/drivers/infiniband/ulp/rtrs/rtrs-clt.h b/drivers/infiniband/ulp/rtrs/rtrs-clt.h
index 692bc83e1f09..98ba5d0a48b8 100644
--- a/drivers/infiniband/ulp/rtrs/rtrs-clt.h
+++ b/drivers/infiniband/ulp/rtrs/rtrs-clt.h
@@ -166,7 +166,6 @@ struct rtrs_clt {
unsigned int max_reconnect_attempts;
unsigned int reconnect_delay_sec;
unsigned int max_segments;
- size_t max_segment_size;
void *permits;
unsigned long *permits_map;
size_t queue_depth;
diff --git a/drivers/infiniband/ulp/rtrs/rtrs-pri.h b/drivers/infiniband/ulp/rtrs/rtrs-pri.h
index 8caad0a2322b..00eb45053339 100644
--- a/drivers/infiniband/ulp/rtrs/rtrs-pri.h
+++ b/drivers/infiniband/ulp/rtrs/rtrs-pri.h
@@ -101,6 +101,7 @@ struct rtrs_sess {
uuid_t uuid;
struct rtrs_con **con;
unsigned int con_num;
+ unsigned int irq_con_num;
unsigned int recon_cnt;
struct rtrs_ib_dev *dev;
int dev_ref;
diff --git a/drivers/infiniband/ulp/rtrs/rtrs-srv.c b/drivers/infiniband/ulp/rtrs/rtrs-srv.c
index d071809e3ed2..f7aa2a7e7442 100644
--- a/drivers/infiniband/ulp/rtrs/rtrs-srv.c
+++ b/drivers/infiniband/ulp/rtrs/rtrs-srv.c
@@ -998,7 +998,7 @@ static void process_read(struct rtrs_srv_con *con,
usr_len = le16_to_cpu(msg->usr_len);
data_len = off - usr_len;
data = page_address(srv->chunks[buf_id]);
- ret = ctx->ops.rdma_ev(srv, srv->priv, id, READ, data, data_len,
+ ret = ctx->ops.rdma_ev(srv->priv, id, READ, data, data_len,
data + data_len, usr_len);
if (unlikely(ret)) {
@@ -1051,7 +1051,7 @@ static void process_write(struct rtrs_srv_con *con,
usr_len = le16_to_cpu(req->usr_len);
data_len = off - usr_len;
data = page_address(srv->chunks[buf_id]);
- ret = ctx->ops.rdma_ev(srv, srv->priv, id, WRITE, data, data_len,
+ ret = ctx->ops.rdma_ev(srv->priv, id, WRITE, data, data_len,
data + data_len, usr_len);
if (unlikely(ret)) {
rtrs_err_rl(s,
diff --git a/drivers/infiniband/ulp/rtrs/rtrs.h b/drivers/infiniband/ulp/rtrs/rtrs.h
index 8738e90e715a..bebaa94c4728 100644
--- a/drivers/infiniband/ulp/rtrs/rtrs.h
+++ b/drivers/infiniband/ulp/rtrs/rtrs.h
@@ -58,14 +58,13 @@ struct rtrs_clt *rtrs_clt_open(struct rtrs_clt_ops *ops,
size_t path_cnt, u16 port,
size_t pdu_sz, u8 reconnect_delay_sec,
u16 max_segments,
- size_t max_segment_size,
- s16 max_reconnect_attempts);
+ s16 max_reconnect_attempts, u32 nr_poll_queues);
void rtrs_clt_close(struct rtrs_clt *sess);
-enum {
+enum wait_type {
RTRS_PERMIT_NOWAIT = 0,
- RTRS_PERMIT_WAIT = 1,
+ RTRS_PERMIT_WAIT = 1
};
/**
@@ -81,7 +80,7 @@ enum rtrs_clt_con_type {
struct rtrs_permit *rtrs_clt_get_permit(struct rtrs_clt *sess,
enum rtrs_clt_con_type con_type,
- int wait);
+ enum wait_type wait);
void rtrs_clt_put_permit(struct rtrs_clt *sess, struct rtrs_permit *permit);
@@ -103,6 +102,7 @@ int rtrs_clt_request(int dir, struct rtrs_clt_req_ops *ops,
struct rtrs_clt *sess, struct rtrs_permit *permit,
const struct kvec *vec, size_t nr, size_t len,
struct scatterlist *sg, unsigned int sg_cnt);
+int rtrs_clt_rdma_cq_direct(struct rtrs_clt *clt, unsigned int index);
/**
* rtrs_attrs - RTRS session attributes
@@ -138,7 +138,6 @@ struct rtrs_srv_ops {
* message for the data transfer will be sent to
* the client.
- * @sess: Session
* @priv: Private data set by rtrs_srv_set_sess_priv()
* @id: internal RTRS operation id
* @dir: READ/WRITE
@@ -152,7 +151,7 @@ struct rtrs_srv_ops {
* @usr: The extra user message sent by the client (%vec)
* @usrlen: Size of the user message
*/
- int (*rdma_ev)(struct rtrs_srv *sess, void *priv,
+ int (*rdma_ev)(void *priv,
struct rtrs_srv_op *id, int dir,
void *data, size_t datalen, const void *usr,
size_t usrlen);
diff --git a/drivers/lightnvm/Kconfig b/drivers/lightnvm/Kconfig
index 4c2ce210c123..04caa0f2d445 100644
--- a/drivers/lightnvm/Kconfig
+++ b/drivers/lightnvm/Kconfig
@@ -4,7 +4,7 @@
#
menuconfig NVM
- bool "Open-Channel SSD target support"
+ bool "Open-Channel SSD target support (DEPRECATED)"
depends on BLOCK
help
Say Y here to get to enable Open-channel SSDs.
@@ -15,6 +15,8 @@ menuconfig NVM
If you say N, all options in this submenu will be skipped and disabled
only do this if you know what you are doing.
+ This code is deprecated and will be removed in Linux 5.15.
+
if NVM
config NVM_PBLK
diff --git a/drivers/lightnvm/core.c b/drivers/lightnvm/core.c
index 28ddcaa5358b..40a948c08a0b 100644
--- a/drivers/lightnvm/core.c
+++ b/drivers/lightnvm/core.c
@@ -1174,6 +1174,8 @@ int nvm_register(struct nvm_dev *dev)
{
int ret, exp_pool_size;
+ pr_warn_once("lightnvm support is deprecated and will be removed in Linux 5.15.\n");
+
if (!dev->q || !dev->ops) {
kref_put(&dev->ref, nvm_free);
return -EINVAL;
@@ -1257,7 +1259,7 @@ static long nvm_ioctl_info(struct file *file, void __user *arg)
info = memdup_user(arg, sizeof(struct nvm_ioctl_info));
if (IS_ERR(info))
- return -EFAULT;
+ return PTR_ERR(info);
info->version[0] = NVM_VERSION_MAJOR;
info->version[1] = NVM_VERSION_MINOR;
diff --git a/drivers/md/bcache/alloc.c b/drivers/md/bcache/alloc.c
index 8c371d5eef8e..097577ae3c47 100644
--- a/drivers/md/bcache/alloc.c
+++ b/drivers/md/bcache/alloc.c
@@ -482,8 +482,7 @@ void bch_bucket_free(struct cache_set *c, struct bkey *k)
unsigned int i;
for (i = 0; i < KEY_PTRS(k); i++)
- __bch_bucket_free(PTR_CACHE(c, k, i),
- PTR_BUCKET(c, k, i));
+ __bch_bucket_free(c->cache, PTR_BUCKET(c, k, i));
}
int __bch_bucket_alloc_set(struct cache_set *c, unsigned int reserve,
@@ -674,7 +673,7 @@ bool bch_alloc_sectors(struct cache_set *c,
SET_PTR_OFFSET(&b->key, i, PTR_OFFSET(&b->key, i) + sectors);
atomic_long_add(sectors,
- &PTR_CACHE(c, &b->key, i)->sectors_written);
+ &c->cache->sectors_written);
}
if (b->sectors_free < c->cache->sb.block_size)
diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h
index 848dd4db1659..0a4551e165ab 100644
--- a/drivers/md/bcache/bcache.h
+++ b/drivers/md/bcache/bcache.h
@@ -804,13 +804,6 @@ static inline sector_t bucket_remainder(struct cache_set *c, sector_t s)
return s & (c->cache->sb.bucket_size - 1);
}
-static inline struct cache *PTR_CACHE(struct cache_set *c,
- const struct bkey *k,
- unsigned int ptr)
-{
- return c->cache;
-}
-
static inline size_t PTR_BUCKET_NR(struct cache_set *c,
const struct bkey *k,
unsigned int ptr)
@@ -822,7 +815,7 @@ static inline struct bucket *PTR_BUCKET(struct cache_set *c,
const struct bkey *k,
unsigned int ptr)
{
- return PTR_CACHE(c, k, ptr)->buckets + PTR_BUCKET_NR(c, k, ptr);
+ return c->cache->buckets + PTR_BUCKET_NR(c, k, ptr);
}
static inline uint8_t gen_after(uint8_t a, uint8_t b)
@@ -841,7 +834,7 @@ static inline uint8_t ptr_stale(struct cache_set *c, const struct bkey *k,
static inline bool ptr_available(struct cache_set *c, const struct bkey *k,
unsigned int i)
{
- return (PTR_DEV(k, i) < MAX_CACHES_PER_SET) && PTR_CACHE(c, k, i);
+ return (PTR_DEV(k, i) < MAX_CACHES_PER_SET) && c->cache;
}
/* Btree key macros */
diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c
index fe6dce125aba..183a58c89377 100644
--- a/drivers/md/bcache/btree.c
+++ b/drivers/md/bcache/btree.c
@@ -426,7 +426,7 @@ void __bch_btree_node_write(struct btree *b, struct closure *parent)
do_btree_node_write(b);
atomic_long_add(set_blocks(i, block_bytes(b->c->cache)) * b->c->cache->sb.block_size,
- &PTR_CACHE(b->c, &b->key, 0)->btree_sectors_written);
+ &b->c->cache->btree_sectors_written);
b->written += set_blocks(i, block_bytes(b->c->cache));
}
@@ -1161,7 +1161,7 @@ static void make_btree_freeing_key(struct btree *b, struct bkey *k)
for (i = 0; i < KEY_PTRS(k); i++)
SET_PTR_GEN(k, i,
- bch_inc_gen(PTR_CACHE(b->c, &b->key, i),
+ bch_inc_gen(b->c->cache,
PTR_BUCKET(b->c, &b->key, i)));
mutex_unlock(&b->c->bucket_lock);
diff --git a/drivers/md/bcache/debug.c b/drivers/md/bcache/debug.c
index 63e809f38e3f..116edda845c3 100644
--- a/drivers/md/bcache/debug.c
+++ b/drivers/md/bcache/debug.c
@@ -50,7 +50,7 @@ void bch_btree_verify(struct btree *b)
v->keys.ops = b->keys.ops;
bio = bch_bbio_alloc(b->c);
- bio_set_dev(bio, PTR_CACHE(b->c, &b->key, 0)->bdev);
+ bio_set_dev(bio, b->c->cache->bdev);
bio->bi_iter.bi_sector = PTR_OFFSET(&b->key, 0);
bio->bi_iter.bi_size = KEY_SIZE(&v->key) << 9;
bio->bi_opf = REQ_OP_READ | REQ_META;
diff --git a/drivers/md/bcache/extents.c b/drivers/md/bcache/extents.c
index f4658a1f37b8..d626ffcbecb9 100644
--- a/drivers/md/bcache/extents.c
+++ b/drivers/md/bcache/extents.c
@@ -50,7 +50,7 @@ static bool __ptr_invalid(struct cache_set *c, const struct bkey *k)
for (i = 0; i < KEY_PTRS(k); i++)
if (ptr_available(c, k, i)) {
- struct cache *ca = PTR_CACHE(c, k, i);
+ struct cache *ca = c->cache;
size_t bucket = PTR_BUCKET_NR(c, k, i);
size_t r = bucket_remainder(c, PTR_OFFSET(k, i));
@@ -71,7 +71,7 @@ static const char *bch_ptr_status(struct cache_set *c, const struct bkey *k)
for (i = 0; i < KEY_PTRS(k); i++)
if (ptr_available(c, k, i)) {
- struct cache *ca = PTR_CACHE(c, k, i);
+ struct cache *ca = c->cache;
size_t bucket = PTR_BUCKET_NR(c, k, i);
size_t r = bucket_remainder(c, PTR_OFFSET(k, i));
diff --git a/drivers/md/bcache/features.c b/drivers/md/bcache/features.c
index d636b7b2d070..6d2b7b84a7b7 100644
--- a/drivers/md/bcache/features.c
+++ b/drivers/md/bcache/features.c
@@ -19,7 +19,7 @@ struct feature {
static struct feature feature_list[] = {
{BCH_FEATURE_INCOMPAT, BCH_FEATURE_INCOMPAT_LOG_LARGE_BUCKET_SIZE,
"large_bucket"},
- {0, 0, 0 },
+ {0, 0, NULL },
};
#define compose_feature_string(type) \
diff --git a/drivers/md/bcache/io.c b/drivers/md/bcache/io.c
index dad71a6b7889..e4388fe3ab7e 100644
--- a/drivers/md/bcache/io.c
+++ b/drivers/md/bcache/io.c
@@ -36,7 +36,7 @@ void __bch_submit_bbio(struct bio *bio, struct cache_set *c)
struct bbio *b = container_of(bio, struct bbio, bio);
bio->bi_iter.bi_sector = PTR_OFFSET(&b->key, 0);
- bio_set_dev(bio, PTR_CACHE(c, &b->key, 0)->bdev);
+ bio_set_dev(bio, c->cache->bdev);
b->submit_time_us = local_clock_us();
closure_bio_submit(c, bio, bio->bi_private);
@@ -137,7 +137,7 @@ void bch_bbio_count_io_errors(struct cache_set *c, struct bio *bio,
blk_status_t error, const char *m)
{
struct bbio *b = container_of(bio, struct bbio, bio);
- struct cache *ca = PTR_CACHE(c, &b->key, 0);
+ struct cache *ca = c->cache;
int is_read = (bio_data_dir(bio) == READ ? 1 : 0);
unsigned int threshold = op_is_write(bio_op(bio))
diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c
index c6613e817333..61bd79babf7a 100644
--- a/drivers/md/bcache/journal.c
+++ b/drivers/md/bcache/journal.c
@@ -111,7 +111,7 @@ reread: left = ca->sb.bucket_size - offset;
* Check from the oldest jset for last_seq. If
* i->j.seq < j->last_seq, it means the oldest jset
* in list is expired and useless, remove it from
- * this list. Otherwise, j is a condidate jset for
+ * this list. Otherwise, j is a candidate jset for
* further following checks.
*/
while (!list_empty(list)) {
@@ -498,7 +498,7 @@ static void btree_flush_write(struct cache_set *c)
* - If there are matched nodes recorded in btree_nodes[],
* they are clean now (this is why and how the oldest
* journal entry can be reclaimed). These selected nodes
- * will be ignored and skipped in the folowing for-loop.
+ * will be ignored and skipped in the following for-loop.
*/
if (((btree_current_write(b)->journal - fifo_front_p) &
mask) != 0) {
@@ -768,7 +768,7 @@ static void journal_write_unlocked(struct closure *cl)
w->data->csum = csum_set(w->data);
for (i = 0; i < KEY_PTRS(k); i++) {
- ca = PTR_CACHE(c, k, i);
+ ca = c->cache;
bio = &ca->journal.bio;
atomic_long_add(sectors, &ca->meta_sectors_written);
diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
index 03e1fe4de53d..2b6d6e9cd680 100644
--- a/drivers/md/bcache/super.c
+++ b/drivers/md/bcache/super.c
@@ -1052,6 +1052,7 @@ static int cached_dev_status_update(void *arg)
int bch_cached_dev_run(struct cached_dev *dc)
{
+ int ret = 0;
struct bcache_device *d = &dc->disk;
char *buf = kmemdup_nul(dc->sb.label, SB_LABEL_SIZE, GFP_KERNEL);
char *env[] = {
@@ -1064,19 +1065,15 @@ int bch_cached_dev_run(struct cached_dev *dc)
if (dc->io_disable) {
pr_err("I/O disabled on cached dev %s\n",
dc->backing_dev_name);
- kfree(env[1]);
- kfree(env[2]);
- kfree(buf);
- return -EIO;
+ ret = -EIO;
+ goto out;
}
if (atomic_xchg(&dc->running, 1)) {
- kfree(env[1]);
- kfree(env[2]);
- kfree(buf);
pr_info("cached dev %s is running already\n",
dc->backing_dev_name);
- return -EBUSY;
+ ret = -EBUSY;
+ goto out;
}
if (!d->c &&
@@ -1097,15 +1094,13 @@ int bch_cached_dev_run(struct cached_dev *dc)
* only class / kset properties are persistent
*/
kobject_uevent_env(&disk_to_dev(d->disk)->kobj, KOBJ_CHANGE, env);
- kfree(env[1]);
- kfree(env[2]);
- kfree(buf);
if (sysfs_create_link(&d->kobj, &disk_to_dev(d->disk)->kobj, "dev") ||
sysfs_create_link(&disk_to_dev(d->disk)->kobj,
&d->kobj, "bcache")) {
pr_err("Couldn't create bcache dev <-> disk sysfs symlinks\n");
- return -ENOMEM;
+ ret = -ENOMEM;
+ goto out;
}
dc->status_update_thread = kthread_run(cached_dev_status_update,
@@ -1114,7 +1109,11 @@ int bch_cached_dev_run(struct cached_dev *dc)
pr_warn("failed to create bcache_status_update kthread, continue to run without monitoring backing device status\n");
}
- return 0;
+out:
+ kfree(env[1]);
+ kfree(env[2]);
+ kfree(buf);
+ return ret;
}
/*
diff --git a/drivers/md/bcache/util.h b/drivers/md/bcache/util.h
index c029f7443190..bca4a7c97da7 100644
--- a/drivers/md/bcache/util.h
+++ b/drivers/md/bcache/util.h
@@ -27,7 +27,7 @@ struct closure;
#else /* DEBUG */
-#define EBUG_ON(cond) do { if (cond); } while (0)
+#define EBUG_ON(cond) do { if (cond) do {} while (0); } while (0)
#define atomic_dec_bug(v) atomic_dec(v)
#define atomic_inc_bug(v, i) atomic_inc(v)
diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c
index 82d4e0880a99..8120da278161 100644
--- a/drivers/md/bcache/writeback.c
+++ b/drivers/md/bcache/writeback.c
@@ -110,13 +110,13 @@ static void __update_writeback_rate(struct cached_dev *dc)
int64_t fps;
if (c->gc_stats.in_use <= BCH_WRITEBACK_FRAGMENT_THRESHOLD_MID) {
- fp_term = dc->writeback_rate_fp_term_low *
+ fp_term = (int64_t)dc->writeback_rate_fp_term_low *
(c->gc_stats.in_use - BCH_WRITEBACK_FRAGMENT_THRESHOLD_LOW);
} else if (c->gc_stats.in_use <= BCH_WRITEBACK_FRAGMENT_THRESHOLD_HIGH) {
- fp_term = dc->writeback_rate_fp_term_mid *
+ fp_term = (int64_t)dc->writeback_rate_fp_term_mid *
(c->gc_stats.in_use - BCH_WRITEBACK_FRAGMENT_THRESHOLD_MID);
} else {
- fp_term = dc->writeback_rate_fp_term_high *
+ fp_term = (int64_t)dc->writeback_rate_fp_term_high *
(c->gc_stats.in_use - BCH_WRITEBACK_FRAGMENT_THRESHOLD_HIGH);
}
fps = div_s64(dirty, dirty_buckets) * fp_term;
@@ -416,7 +416,7 @@ static void read_dirty_endio(struct bio *bio)
struct dirty_io *io = w->private;
/* is_read = 1 */
- bch_count_io_errors(PTR_CACHE(io->dc->disk.c, &w->key, 0),
+ bch_count_io_errors(io->dc->disk.c->cache,
bio->bi_status, 1,
"reading dirty data from cache");
@@ -510,8 +510,7 @@ static void read_dirty(struct cached_dev *dc)
dirty_init(w);
bio_set_op_attrs(&io->bio, REQ_OP_READ, 0);
io->bio.bi_iter.bi_sector = PTR_OFFSET(&w->key, 0);
- bio_set_dev(&io->bio,
- PTR_CACHE(dc->disk.c, &w->key, 0)->bdev);
+ bio_set_dev(&io->bio, dc->disk.c->cache->bdev);
io->bio.bi_end_io = read_dirty_endio;
if (bch_bio_alloc_pages(&io->bio, GFP_KERNEL))
diff --git a/drivers/md/md-bitmap.c b/drivers/md/md-bitmap.c
index 200c5d0f08bf..ea3130e11680 100644
--- a/drivers/md/md-bitmap.c
+++ b/drivers/md/md-bitmap.c
@@ -1722,6 +1722,8 @@ void md_bitmap_flush(struct mddev *mddev)
md_bitmap_daemon_work(mddev);
bitmap->daemon_lastrun -= sleep;
md_bitmap_daemon_work(mddev);
+ if (mddev->bitmap_info.external)
+ md_super_wait(mddev);
md_bitmap_update_sb(bitmap);
}
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 21da0c48f6c2..49f897fbb89b 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -734,78 +734,94 @@ void mddev_init(struct mddev *mddev)
}
EXPORT_SYMBOL_GPL(mddev_init);
+static struct mddev *mddev_find_locked(dev_t unit)
+{
+ struct mddev *mddev;
+
+ list_for_each_entry(mddev, &all_mddevs, all_mddevs)
+ if (mddev->unit == unit)
+ return mddev;
+
+ return NULL;
+}
+
+/* find an unused unit number */
+static dev_t mddev_alloc_unit(void)
+{
+ static int next_minor = 512;
+ int start = next_minor;
+ bool is_free = 0;
+ dev_t dev = 0;
+
+ while (!is_free) {
+ dev = MKDEV(MD_MAJOR, next_minor);
+ next_minor++;
+ if (next_minor > MINORMASK)
+ next_minor = 0;
+ if (next_minor == start)
+ return 0; /* Oh dear, all in use. */
+ is_free = !mddev_find_locked(dev);
+ }
+
+ return dev;
+}
+
static struct mddev *mddev_find(dev_t unit)
{
- struct mddev *mddev, *new = NULL;
+ struct mddev *mddev;
- if (unit && MAJOR(unit) != MD_MAJOR)
- unit &= ~((1<<MdpMinorShift)-1);
+ if (MAJOR(unit) != MD_MAJOR)
+ unit &= ~((1 << MdpMinorShift) - 1);
- retry:
spin_lock(&all_mddevs_lock);
+ mddev = mddev_find_locked(unit);
+ if (mddev)
+ mddev_get(mddev);
+ spin_unlock(&all_mddevs_lock);
- if (unit) {
- list_for_each_entry(mddev, &all_mddevs, all_mddevs)
- if (mddev->unit == unit) {
- mddev_get(mddev);
- spin_unlock(&all_mddevs_lock);
- kfree(new);
- return mddev;
- }
+ return mddev;
+}
- if (new) {
- list_add(&new->all_mddevs, &all_mddevs);
- spin_unlock(&all_mddevs_lock);
- new->hold_active = UNTIL_IOCTL;
- return new;
- }
- } else if (new) {
- /* find an unused unit number */
- static int next_minor = 512;
- int start = next_minor;
- int is_free = 0;
- int dev = 0;
- while (!is_free) {
- dev = MKDEV(MD_MAJOR, next_minor);
- next_minor++;
- if (next_minor > MINORMASK)
- next_minor = 0;
- if (next_minor == start) {
- /* Oh dear, all in use. */
- spin_unlock(&all_mddevs_lock);
- kfree(new);
- return NULL;
- }
+static struct mddev *mddev_alloc(dev_t unit)
+{
+ struct mddev *new;
+ int error;
- is_free = 1;
- list_for_each_entry(mddev, &all_mddevs, all_mddevs)
- if (mddev->unit == dev) {
- is_free = 0;
- break;
- }
- }
- new->unit = dev;
- new->md_minor = MINOR(dev);
- new->hold_active = UNTIL_STOP;
- list_add(&new->all_mddevs, &all_mddevs);
- spin_unlock(&all_mddevs_lock);
- return new;
- }
- spin_unlock(&all_mddevs_lock);
+ if (unit && MAJOR(unit) != MD_MAJOR)
+ unit &= ~((1 << MdpMinorShift) - 1);
new = kzalloc(sizeof(*new), GFP_KERNEL);
if (!new)
- return NULL;
-
- new->unit = unit;
- if (MAJOR(unit) == MD_MAJOR)
- new->md_minor = MINOR(unit);
- else
- new->md_minor = MINOR(unit) >> MdpMinorShift;
-
+ return ERR_PTR(-ENOMEM);
mddev_init(new);
- goto retry;
+ spin_lock(&all_mddevs_lock);
+ if (unit) {
+ error = -EEXIST;
+ if (mddev_find_locked(unit))
+ goto out_free_new;
+ new->unit = unit;
+ if (MAJOR(unit) == MD_MAJOR)
+ new->md_minor = MINOR(unit);
+ else
+ new->md_minor = MINOR(unit) >> MdpMinorShift;
+ new->hold_active = UNTIL_IOCTL;
+ } else {
+ error = -ENODEV;
+ new->unit = mddev_alloc_unit();
+ if (!new->unit)
+ goto out_free_new;
+ new->md_minor = MINOR(new->unit);
+ new->hold_active = UNTIL_STOP;
+ }
+
+ list_add(&new->all_mddevs, &all_mddevs);
+ spin_unlock(&all_mddevs_lock);
+ return new;
+out_free_new:
+ spin_unlock(&all_mddevs_lock);
+ kfree(new);
+ return ERR_PTR(error);
}
static struct attribute_group md_redundancy_group;
@@ -5644,29 +5660,29 @@ static int md_alloc(dev_t dev, char *name)
* writing to /sys/module/md_mod/parameters/new_array.
*/
static DEFINE_MUTEX(disks_mutex);
- struct mddev *mddev = mddev_find(dev);
+ struct mddev *mddev;
struct gendisk *disk;
int partitioned;
int shift;
int unit;
- int error;
+ int error ;
- if (!mddev)
- return -ENODEV;
-
- partitioned = (MAJOR(mddev->unit) != MD_MAJOR);
- shift = partitioned ? MdpMinorShift : 0;
- unit = MINOR(mddev->unit) >> shift;
-
- /* wait for any previous instance of this device to be
- * completely removed (mddev_delayed_delete).
+ /*
+ * Wait for any previous instance of this device to be completely
+ * removed (mddev_delayed_delete).
*/
flush_workqueue(md_misc_wq);
mutex_lock(&disks_mutex);
- error = -EEXIST;
- if (mddev->gendisk)
- goto abort;
+ mddev = mddev_alloc(dev);
+ if (IS_ERR(mddev)) {
+ mutex_unlock(&disks_mutex);
+ return PTR_ERR(mddev);
+ }
+
+ partitioned = (MAJOR(mddev->unit) != MD_MAJOR);
+ shift = partitioned ? MdpMinorShift : 0;
+ unit = MINOR(mddev->unit) >> shift;
if (name && !dev) {
/* Need to ensure that 'name' is not a duplicate.
@@ -5678,6 +5694,7 @@ static int md_alloc(dev_t dev, char *name)
if (mddev2->gendisk &&
strcmp(mddev2->gendisk->disk_name, name) == 0) {
spin_unlock(&all_mddevs_lock);
+ error = -EEXIST;
goto abort;
}
spin_unlock(&all_mddevs_lock);
@@ -6524,11 +6541,9 @@ static void autorun_devices(int part)
md_probe(dev);
mddev = mddev_find(dev);
- if (!mddev || !mddev->gendisk) {
- if (mddev)
- mddev_put(mddev);
+ if (!mddev)
break;
- }
+
if (mddev_lock(mddev))
pr_warn("md: %s locked, cannot run\n", mdname(mddev));
else if (mddev->raid_disks || mddev->major_version
@@ -7821,8 +7836,7 @@ static int md_open(struct block_device *bdev, fmode_t mode)
/* Wait until bdev->bd_disk is definitely gone */
if (work_pending(&mddev->del_work))
flush_workqueue(md_misc_wq);
- /* Then retry the open from the top */
- return -ERESTARTSYS;
+ return -EBUSY;
}
BUG_ON(mddev != bdev->bd_disk->private_data);
@@ -8153,7 +8167,11 @@ static void *md_seq_start(struct seq_file *seq, loff_t *pos)
loff_t l = *pos;
struct mddev *mddev;
- if (l >= 0x10000)
+ if (l == 0x10000) {
+ ++*pos;
+ return (void *)2;
+ }
+ if (l > 0x10000)
return NULL;
if (!l--)
/* header */
@@ -8575,6 +8593,26 @@ void md_write_end(struct mddev *mddev)
EXPORT_SYMBOL(md_write_end);
+/* This is used by raid0 and raid10 */
+void md_submit_discard_bio(struct mddev *mddev, struct md_rdev *rdev,
+ struct bio *bio, sector_t start, sector_t size)
+{
+ struct bio *discard_bio = NULL;
+
+ if (__blkdev_issue_discard(rdev->bdev, start, size, GFP_NOIO, 0,
+ &discard_bio) || !discard_bio)
+ return;
+
+ bio_chain(discard_bio, bio);
+ bio_clone_blkg_association(discard_bio, bio);
+ if (mddev->gendisk)
+ trace_block_bio_remap(discard_bio,
+ disk_devt(mddev->gendisk),
+ bio->bi_iter.bi_sector);
+ submit_bio_noacct(discard_bio);
+}
+EXPORT_SYMBOL_GPL(md_submit_discard_bio);
+
/* md_allow_write(mddev)
* Calling this ensures that the array is marked 'active' so that writes
* may proceed without blocking. It is important to call this before
@@ -9251,11 +9289,11 @@ void md_check_recovery(struct mddev *mddev)
}
if (mddev_is_clustered(mddev)) {
- struct md_rdev *rdev;
+ struct md_rdev *rdev, *tmp;
/* kick the device if another node issued a
* remove disk.
*/
- rdev_for_each(rdev, mddev) {
+ rdev_for_each_safe(rdev, tmp, mddev) {
if (test_and_clear_bit(ClusterRemove, &rdev->flags) &&
rdev->raid_disk < 0)
md_kick_rdev_from_array(rdev);
@@ -9569,7 +9607,7 @@ err_wq:
static void check_sb_changes(struct mddev *mddev, struct md_rdev *rdev)
{
struct mdp_superblock_1 *sb = page_address(rdev->sb_page);
- struct md_rdev *rdev2;
+ struct md_rdev *rdev2, *tmp;
int role, ret;
char b[BDEVNAME_SIZE];
@@ -9586,7 +9624,7 @@ static void check_sb_changes(struct mddev *mddev, struct md_rdev *rdev)
}
/* Check for change of roles in the active devices */
- rdev_for_each(rdev2, mddev) {
+ rdev_for_each_safe(rdev2, tmp, mddev) {
if (test_bit(Faulty, &rdev2->flags))
continue;
diff --git a/drivers/md/md.h b/drivers/md/md.h
index bcbba1b5ec4a..fb7eab58cfd5 100644
--- a/drivers/md/md.h
+++ b/drivers/md/md.h
@@ -713,6 +713,8 @@ extern void md_write_end(struct mddev *mddev);
extern void md_done_sync(struct mddev *mddev, int blocks, int ok);
extern void md_error(struct mddev *mddev, struct md_rdev *rdev);
extern void md_finish_reshape(struct mddev *mddev);
+void md_submit_discard_bio(struct mddev *mddev, struct md_rdev *rdev,
+ struct bio *bio, sector_t start, sector_t size);
extern bool __must_check md_flush_request(struct mddev *mddev, struct bio *bio);
extern void md_super_write(struct mddev *mddev, struct md_rdev *rdev,
diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c
index 67f157f2525d..e5d7411cba9b 100644
--- a/drivers/md/raid0.c
+++ b/drivers/md/raid0.c
@@ -477,7 +477,6 @@ static void raid0_handle_discard(struct mddev *mddev, struct bio *bio)
for (disk = 0; disk < zone->nb_dev; disk++) {
sector_t dev_start, dev_end;
- struct bio *discard_bio = NULL;
struct md_rdev *rdev;
if (disk < start_disk_index)
@@ -500,18 +499,9 @@ static void raid0_handle_discard(struct mddev *mddev, struct bio *bio)
rdev = conf->devlist[(zone - conf->strip_zone) *
conf->strip_zone[0].nb_dev + disk];
- if (__blkdev_issue_discard(rdev->bdev,
+ md_submit_discard_bio(mddev, rdev, bio,
dev_start + zone->dev_start + rdev->data_offset,
- dev_end - dev_start, GFP_NOIO, 0, &discard_bio) ||
- !discard_bio)
- continue;
- bio_chain(discard_bio, bio);
- bio_clone_blkg_association(discard_bio, bio);
- if (mddev->gendisk)
- trace_block_bio_remap(discard_bio,
- disk_devt(mddev->gendisk),
- bio->bi_iter.bi_sector);
- submit_bio_noacct(discard_bio);
+ dev_end - dev_start);
}
bio_endio(bio);
}
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index d2378765dc15..ced076ba560e 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -478,6 +478,8 @@ static void raid1_end_write_request(struct bio *bio)
if (!test_bit(Faulty, &rdev->flags))
set_bit(R1BIO_WriteError, &r1_bio->state);
else {
+ /* Fail the request */
+ set_bit(R1BIO_Degraded, &r1_bio->state);
/* Finished with this branch */
r1_bio->bios[mirror] = NULL;
to_put = bio;
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index a9ae7d113492..13f5e6b2a73d 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -91,7 +91,7 @@ static inline struct r10bio *get_resync_r10bio(struct bio *bio)
static void * r10bio_pool_alloc(gfp_t gfp_flags, void *data)
{
struct r10conf *conf = data;
- int size = offsetof(struct r10bio, devs[conf->copies]);
+ int size = offsetof(struct r10bio, devs[conf->geo.raid_disks]);
/* allocate a r10bio with room for raid_disks entries in the
* bios array */
@@ -238,7 +238,7 @@ static void put_all_bios(struct r10conf *conf, struct r10bio *r10_bio)
{
int i;
- for (i = 0; i < conf->copies; i++) {
+ for (i = 0; i < conf->geo.raid_disks; i++) {
struct bio **bio = & r10_bio->devs[i].bio;
if (!BIO_SPECIAL(*bio))
bio_put(*bio);
@@ -327,7 +327,7 @@ static int find_bio_disk(struct r10conf *conf, struct r10bio *r10_bio,
int slot;
int repl = 0;
- for (slot = 0; slot < conf->copies; slot++) {
+ for (slot = 0; slot < conf->geo.raid_disks; slot++) {
if (r10_bio->devs[slot].bio == bio)
break;
if (r10_bio->devs[slot].repl_bio == bio) {
@@ -336,7 +336,6 @@ static int find_bio_disk(struct r10conf *conf, struct r10bio *r10_bio,
}
}
- BUG_ON(slot == conf->copies);
update_head_pos(slot, r10_bio);
if (slotp)
@@ -1274,12 +1273,77 @@ static void raid10_write_one_disk(struct mddev *mddev, struct r10bio *r10_bio,
}
}
+static void wait_blocked_dev(struct mddev *mddev, struct r10bio *r10_bio)
+{
+ int i;
+ struct r10conf *conf = mddev->private;
+ struct md_rdev *blocked_rdev;
+
+retry_wait:
+ blocked_rdev = NULL;
+ rcu_read_lock();
+ for (i = 0; i < conf->copies; i++) {
+ struct md_rdev *rdev = rcu_dereference(conf->mirrors[i].rdev);
+ struct md_rdev *rrdev = rcu_dereference(
+ conf->mirrors[i].replacement);
+ if (rdev == rrdev)
+ rrdev = NULL;
+ if (rdev && unlikely(test_bit(Blocked, &rdev->flags))) {
+ atomic_inc(&rdev->nr_pending);
+ blocked_rdev = rdev;
+ break;
+ }
+ if (rrdev && unlikely(test_bit(Blocked, &rrdev->flags))) {
+ atomic_inc(&rrdev->nr_pending);
+ blocked_rdev = rrdev;
+ break;
+ }
+
+ if (rdev && test_bit(WriteErrorSeen, &rdev->flags)) {
+ sector_t first_bad;
+ sector_t dev_sector = r10_bio->devs[i].addr;
+ int bad_sectors;
+ int is_bad;
+
+ /*
+ * Discard request doesn't care the write result
+ * so it doesn't need to wait blocked disk here.
+ */
+ if (!r10_bio->sectors)
+ continue;
+
+ is_bad = is_badblock(rdev, dev_sector, r10_bio->sectors,
+ &first_bad, &bad_sectors);
+ if (is_bad < 0) {
+ /*
+ * Mustn't write here until the bad block
+ * is acknowledged
+ */
+ atomic_inc(&rdev->nr_pending);
+ set_bit(BlockedBadBlocks, &rdev->flags);
+ blocked_rdev = rdev;
+ break;
+ }
+ }
+ }
+ rcu_read_unlock();
+
+ if (unlikely(blocked_rdev)) {
+ /* Have to wait for this device to get unblocked, then retry */
+ allow_barrier(conf);
+ raid10_log(conf->mddev, "%s wait rdev %d blocked",
+ __func__, blocked_rdev->raid_disk);
+ md_wait_for_blocked_rdev(blocked_rdev, mddev);
+ wait_barrier(conf);
+ goto retry_wait;
+ }
+}
+
static void raid10_write_request(struct mddev *mddev, struct bio *bio,
struct r10bio *r10_bio)
{
struct r10conf *conf = mddev->private;
int i;
- struct md_rdev *blocked_rdev;
sector_t sectors;
int max_sectors;
@@ -1337,8 +1401,9 @@ static void raid10_write_request(struct mddev *mddev, struct bio *bio,
r10_bio->read_slot = -1; /* make sure repl_bio gets freed */
raid10_find_phys(conf, r10_bio);
-retry_write:
- blocked_rdev = NULL;
+
+ wait_blocked_dev(mddev, r10_bio);
+
rcu_read_lock();
max_sectors = r10_bio->sectors;
@@ -1349,16 +1414,6 @@ retry_write:
conf->mirrors[d].replacement);
if (rdev == rrdev)
rrdev = NULL;
- if (rdev && unlikely(test_bit(Blocked, &rdev->flags))) {
- atomic_inc(&rdev->nr_pending);
- blocked_rdev = rdev;
- break;
- }
- if (rrdev && unlikely(test_bit(Blocked, &rrdev->flags))) {
- atomic_inc(&rrdev->nr_pending);
- blocked_rdev = rrdev;
- break;
- }
if (rdev && (test_bit(Faulty, &rdev->flags)))
rdev = NULL;
if (rrdev && (test_bit(Faulty, &rrdev->flags)))
@@ -1379,15 +1434,6 @@ retry_write:
is_bad = is_badblock(rdev, dev_sector, max_sectors,
&first_bad, &bad_sectors);
- if (is_bad < 0) {
- /* Mustn't write here until the bad block
- * is acknowledged
- */
- atomic_inc(&rdev->nr_pending);
- set_bit(BlockedBadBlocks, &rdev->flags);
- blocked_rdev = rdev;
- break;
- }
if (is_bad && first_bad <= dev_sector) {
/* Cannot write here at all */
bad_sectors -= (dev_sector - first_bad);
@@ -1423,35 +1469,6 @@ retry_write:
}
rcu_read_unlock();
- if (unlikely(blocked_rdev)) {
- /* Have to wait for this device to get unblocked, then retry */
- int j;
- int d;
-
- for (j = 0; j < i; j++) {
- if (r10_bio->devs[j].bio) {
- d = r10_bio->devs[j].devnum;
- rdev_dec_pending(conf->mirrors[d].rdev, mddev);
- }
- if (r10_bio->devs[j].repl_bio) {
- struct md_rdev *rdev;
- d = r10_bio->devs[j].devnum;
- rdev = conf->mirrors[d].replacement;
- if (!rdev) {
- /* Race with remove_disk */
- smp_mb();
- rdev = conf->mirrors[d].rdev;
- }
- rdev_dec_pending(rdev, mddev);
- }
- }
- allow_barrier(conf);
- raid10_log(conf->mddev, "wait rdev %d blocked", blocked_rdev->raid_disk);
- md_wait_for_blocked_rdev(blocked_rdev, mddev);
- wait_barrier(conf);
- goto retry_write;
- }
-
if (max_sectors < r10_bio->sectors)
r10_bio->sectors = max_sectors;
@@ -1492,7 +1509,8 @@ static void __make_request(struct mddev *mddev, struct bio *bio, int sectors)
r10_bio->sector = bio->bi_iter.bi_sector;
r10_bio->state = 0;
r10_bio->read_slot = -1;
- memset(r10_bio->devs, 0, sizeof(r10_bio->devs[0]) * conf->copies);
+ memset(r10_bio->devs, 0, sizeof(r10_bio->devs[0]) *
+ conf->geo.raid_disks);
if (bio_data_dir(bio) == READ)
raid10_read_request(mddev, bio, r10_bio);
@@ -1500,6 +1518,304 @@ static void __make_request(struct mddev *mddev, struct bio *bio, int sectors)
raid10_write_request(mddev, bio, r10_bio);
}
+static void raid_end_discard_bio(struct r10bio *r10bio)
+{
+ struct r10conf *conf = r10bio->mddev->private;
+ struct r10bio *first_r10bio;
+
+ while (atomic_dec_and_test(&r10bio->remaining)) {
+
+ allow_barrier(conf);
+
+ if (!test_bit(R10BIO_Discard, &r10bio->state)) {
+ first_r10bio = (struct r10bio *)r10bio->master_bio;
+ free_r10bio(r10bio);
+ r10bio = first_r10bio;
+ } else {
+ md_write_end(r10bio->mddev);
+ bio_endio(r10bio->master_bio);
+ free_r10bio(r10bio);
+ break;
+ }
+ }
+}
+
+static void raid10_end_discard_request(struct bio *bio)
+{
+ struct r10bio *r10_bio = bio->bi_private;
+ struct r10conf *conf = r10_bio->mddev->private;
+ struct md_rdev *rdev = NULL;
+ int dev;
+ int slot, repl;
+
+ /*
+ * We don't care the return value of discard bio
+ */
+ if (!test_bit(R10BIO_Uptodate, &r10_bio->state))
+ set_bit(R10BIO_Uptodate, &r10_bio->state);
+
+ dev = find_bio_disk(conf, r10_bio, bio, &slot, &repl);
+ if (repl)
+ rdev = conf->mirrors[dev].replacement;
+ if (!rdev) {
+ /*
+ * raid10_remove_disk uses smp_mb to make sure rdev is set to
+ * replacement before setting replacement to NULL. It can read
+ * rdev first without barrier protect even replacment is NULL
+ */
+ smp_rmb();
+ rdev = conf->mirrors[dev].rdev;
+ }
+
+ raid_end_discard_bio(r10_bio);
+ rdev_dec_pending(rdev, conf->mddev);
+}
+
+/*
+ * There are some limitations to handle discard bio
+ * 1st, the discard size is bigger than stripe_size*2.
+ * 2st, if the discard bio spans reshape progress, we use the old way to
+ * handle discard bio
+ */
+static int raid10_handle_discard(struct mddev *mddev, struct bio *bio)
+{
+ struct r10conf *conf = mddev->private;
+ struct geom *geo = &conf->geo;
+ int far_copies = geo->far_copies;
+ bool first_copy = true;
+ struct r10bio *r10_bio, *first_r10bio;
+ struct bio *split;
+ int disk;
+ sector_t chunk;
+ unsigned int stripe_size;
+ unsigned int stripe_data_disks;
+ sector_t split_size;
+ sector_t bio_start, bio_end;
+ sector_t first_stripe_index, last_stripe_index;
+ sector_t start_disk_offset;
+ unsigned int start_disk_index;
+ sector_t end_disk_offset;
+ unsigned int end_disk_index;
+ unsigned int remainder;
+
+ if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
+ return -EAGAIN;
+
+ wait_barrier(conf);
+
+ /*
+ * Check reshape again to avoid reshape happens after checking
+ * MD_RECOVERY_RESHAPE and before wait_barrier
+ */
+ if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
+ goto out;
+
+ if (geo->near_copies)
+ stripe_data_disks = geo->raid_disks / geo->near_copies +
+ geo->raid_disks % geo->near_copies;
+ else
+ stripe_data_disks = geo->raid_disks;
+
+ stripe_size = stripe_data_disks << geo->chunk_shift;
+
+ bio_start = bio->bi_iter.bi_sector;
+ bio_end = bio_end_sector(bio);
+
+ /*
+ * Maybe one discard bio is smaller than strip size or across one
+ * stripe and discard region is larger than one stripe size. For far
+ * offset layout, if the discard region is not aligned with stripe
+ * size, there is hole when we submit discard bio to member disk.
+ * For simplicity, we only handle discard bio which discard region
+ * is bigger than stripe_size * 2
+ */
+ if (bio_sectors(bio) < stripe_size*2)
+ goto out;
+
+ /*
+ * Keep bio aligned with strip size.
+ */
+ div_u64_rem(bio_start, stripe_size, &remainder);
+ if (remainder) {
+ split_size = stripe_size - remainder;
+ split = bio_split(bio, split_size, GFP_NOIO, &conf->bio_split);
+ bio_chain(split, bio);
+ allow_barrier(conf);
+ /* Resend the fist split part */
+ submit_bio_noacct(split);
+ wait_barrier(conf);
+ }
+ div_u64_rem(bio_end, stripe_size, &remainder);
+ if (remainder) {
+ split_size = bio_sectors(bio) - remainder;
+ split = bio_split(bio, split_size, GFP_NOIO, &conf->bio_split);
+ bio_chain(split, bio);
+ allow_barrier(conf);
+ /* Resend the second split part */
+ submit_bio_noacct(bio);
+ bio = split;
+ wait_barrier(conf);
+ }
+
+ bio_start = bio->bi_iter.bi_sector;
+ bio_end = bio_end_sector(bio);
+
+ /*
+ * Raid10 uses chunk as the unit to store data. It's similar like raid0.
+ * One stripe contains the chunks from all member disk (one chunk from
+ * one disk at the same HBA address). For layout detail, see 'man md 4'
+ */
+ chunk = bio_start >> geo->chunk_shift;
+ chunk *= geo->near_copies;
+ first_stripe_index = chunk;
+ start_disk_index = sector_div(first_stripe_index, geo->raid_disks);
+ if (geo->far_offset)
+ first_stripe_index *= geo->far_copies;
+ start_disk_offset = (bio_start & geo->chunk_mask) +
+ (first_stripe_index << geo->chunk_shift);
+
+ chunk = bio_end >> geo->chunk_shift;
+ chunk *= geo->near_copies;
+ last_stripe_index = chunk;
+ end_disk_index = sector_div(last_stripe_index, geo->raid_disks);
+ if (geo->far_offset)
+ last_stripe_index *= geo->far_copies;
+ end_disk_offset = (bio_end & geo->chunk_mask) +
+ (last_stripe_index << geo->chunk_shift);
+
+retry_discard:
+ r10_bio = mempool_alloc(&conf->r10bio_pool, GFP_NOIO);
+ r10_bio->mddev = mddev;
+ r10_bio->state = 0;
+ r10_bio->sectors = 0;
+ memset(r10_bio->devs, 0, sizeof(r10_bio->devs[0]) * geo->raid_disks);
+ wait_blocked_dev(mddev, r10_bio);
+
+ /*
+ * For far layout it needs more than one r10bio to cover all regions.
+ * Inspired by raid10_sync_request, we can use the first r10bio->master_bio
+ * to record the discard bio. Other r10bio->master_bio record the first
+ * r10bio. The first r10bio only release after all other r10bios finish.
+ * The discard bio returns only first r10bio finishes
+ */
+ if (first_copy) {
+ r10_bio->master_bio = bio;
+ set_bit(R10BIO_Discard, &r10_bio->state);
+ first_copy = false;
+ first_r10bio = r10_bio;
+ } else
+ r10_bio->master_bio = (struct bio *)first_r10bio;
+
+ rcu_read_lock();
+ for (disk = 0; disk < geo->raid_disks; disk++) {
+ struct md_rdev *rdev = rcu_dereference(conf->mirrors[disk].rdev);
+ struct md_rdev *rrdev = rcu_dereference(
+ conf->mirrors[disk].replacement);
+
+ r10_bio->devs[disk].bio = NULL;
+ r10_bio->devs[disk].repl_bio = NULL;
+
+ if (rdev && (test_bit(Faulty, &rdev->flags)))
+ rdev = NULL;
+ if (rrdev && (test_bit(Faulty, &rrdev->flags)))
+ rrdev = NULL;
+ if (!rdev && !rrdev)
+ continue;
+
+ if (rdev) {
+ r10_bio->devs[disk].bio = bio;
+ atomic_inc(&rdev->nr_pending);
+ }
+ if (rrdev) {
+ r10_bio->devs[disk].repl_bio = bio;
+ atomic_inc(&rrdev->nr_pending);
+ }
+ }
+ rcu_read_unlock();
+
+ atomic_set(&r10_bio->remaining, 1);
+ for (disk = 0; disk < geo->raid_disks; disk++) {
+ sector_t dev_start, dev_end;
+ struct bio *mbio, *rbio = NULL;
+ struct md_rdev *rdev = rcu_dereference(conf->mirrors[disk].rdev);
+ struct md_rdev *rrdev = rcu_dereference(
+ conf->mirrors[disk].replacement);
+
+ /*
+ * Now start to calculate the start and end address for each disk.
+ * The space between dev_start and dev_end is the discard region.
+ *
+ * For dev_start, it needs to consider three conditions:
+ * 1st, the disk is before start_disk, you can imagine the disk in
+ * the next stripe. So the dev_start is the start address of next
+ * stripe.
+ * 2st, the disk is after start_disk, it means the disk is at the
+ * same stripe of first disk
+ * 3st, the first disk itself, we can use start_disk_offset directly
+ */
+ if (disk < start_disk_index)
+ dev_start = (first_stripe_index + 1) * mddev->chunk_sectors;
+ else if (disk > start_disk_index)
+ dev_start = first_stripe_index * mddev->chunk_sectors;
+ else
+ dev_start = start_disk_offset;
+
+ if (disk < end_disk_index)
+ dev_end = (last_stripe_index + 1) * mddev->chunk_sectors;
+ else if (disk > end_disk_index)
+ dev_end = last_stripe_index * mddev->chunk_sectors;
+ else
+ dev_end = end_disk_offset;
+
+ /*
+ * It only handles discard bio which size is >= stripe size, so
+ * dev_end > dev_start all the time
+ */
+ if (r10_bio->devs[disk].bio) {
+ mbio = bio_clone_fast(bio, GFP_NOIO, &mddev->bio_set);
+ mbio->bi_end_io = raid10_end_discard_request;
+ mbio->bi_private = r10_bio;
+ r10_bio->devs[disk].bio = mbio;
+ r10_bio->devs[disk].devnum = disk;
+ atomic_inc(&r10_bio->remaining);
+ md_submit_discard_bio(mddev, rdev, mbio,
+ dev_start + choose_data_offset(r10_bio, rdev),
+ dev_end - dev_start);
+ bio_endio(mbio);
+ }
+ if (r10_bio->devs[disk].repl_bio) {
+ rbio = bio_clone_fast(bio, GFP_NOIO, &mddev->bio_set);
+ rbio->bi_end_io = raid10_end_discard_request;
+ rbio->bi_private = r10_bio;
+ r10_bio->devs[disk].repl_bio = rbio;
+ r10_bio->devs[disk].devnum = disk;
+ atomic_inc(&r10_bio->remaining);
+ md_submit_discard_bio(mddev, rrdev, rbio,
+ dev_start + choose_data_offset(r10_bio, rrdev),
+ dev_end - dev_start);
+ bio_endio(rbio);
+ }
+ }
+
+ if (!geo->far_offset && --far_copies) {
+ first_stripe_index += geo->stride >> geo->chunk_shift;
+ start_disk_offset += geo->stride;
+ last_stripe_index += geo->stride >> geo->chunk_shift;
+ end_disk_offset += geo->stride;
+ atomic_inc(&first_r10bio->remaining);
+ raid_end_discard_bio(r10_bio);
+ wait_barrier(conf);
+ goto retry_discard;
+ }
+
+ raid_end_discard_bio(r10_bio);
+
+ return 0;
+out:
+ allow_barrier(conf);
+ return -EAGAIN;
+}
+
static bool raid10_make_request(struct mddev *mddev, struct bio *bio)
{
struct r10conf *conf = mddev->private;
@@ -1514,6 +1830,10 @@ static bool raid10_make_request(struct mddev *mddev, struct bio *bio)
if (!md_write_start(mddev, bio))
return false;
+ if (unlikely(bio_op(bio) == REQ_OP_DISCARD))
+ if (!raid10_handle_discard(mddev, bio))
+ return true;
+
/*
* If this request crosses a chunk boundary, we need to split
* it.
@@ -3753,7 +4073,7 @@ static int raid10_run(struct mddev *mddev)
if (mddev->queue) {
blk_queue_max_discard_sectors(mddev->queue,
- mddev->chunk_sectors);
+ UINT_MAX);
blk_queue_max_write_same_sectors(mddev->queue, 0);
blk_queue_max_write_zeroes_sectors(mddev->queue, 0);
blk_queue_io_min(mddev->queue, mddev->chunk_sectors << 9);
diff --git a/drivers/md/raid10.h b/drivers/md/raid10.h
index 79cd2b7d3128..1461fd55311b 100644
--- a/drivers/md/raid10.h
+++ b/drivers/md/raid10.h
@@ -179,5 +179,6 @@ enum r10bio_state {
R10BIO_Previous,
/* failfast devices did receive failfast requests. */
R10BIO_FailFast,
+ R10BIO_Discard,
};
#endif
diff --git a/drivers/nvme/host/Makefile b/drivers/nvme/host/Makefile
index d7f6a87687b8..cbc509784b2e 100644
--- a/drivers/nvme/host/Makefile
+++ b/drivers/nvme/host/Makefile
@@ -9,7 +9,7 @@ obj-$(CONFIG_NVME_RDMA) += nvme-rdma.o
obj-$(CONFIG_NVME_FC) += nvme-fc.o
obj-$(CONFIG_NVME_TCP) += nvme-tcp.o
-nvme-core-y := core.o
+nvme-core-y := core.o ioctl.o
nvme-core-$(CONFIG_TRACING) += trace.o
nvme-core-$(CONFIG_NVME_MULTIPATH) += multipath.o
nvme-core-$(CONFIG_NVM) += lightnvm.o
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 5eaaa51a5e30..b6f7815fa239 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -89,6 +89,10 @@ static dev_t nvme_ctrl_base_chr_devt;
static struct class *nvme_class;
static struct class *nvme_subsys_class;
+static DEFINE_IDA(nvme_ns_chr_minor_ida);
+static dev_t nvme_ns_chr_devt;
+static struct class *nvme_ns_chr_class;
+
static void nvme_put_subsystem(struct nvme_subsystem *subsys);
static void nvme_remove_invalid_namespaces(struct nvme_ctrl *ctrl,
unsigned nsid);
@@ -112,7 +116,7 @@ static void nvme_set_queue_dying(struct nvme_ns *ns)
set_capacity_and_notify(ns->disk, 0);
}
-static void nvme_queue_scan(struct nvme_ctrl *ctrl)
+void nvme_queue_scan(struct nvme_ctrl *ctrl)
{
/*
* Only new queue scan work when admin and IO queues are both alive
@@ -179,7 +183,7 @@ int nvme_reset_ctrl(struct nvme_ctrl *ctrl)
}
EXPORT_SYMBOL_GPL(nvme_reset_ctrl);
-static int nvme_reset_ctrl_sync(struct nvme_ctrl *ctrl)
+int nvme_reset_ctrl_sync(struct nvme_ctrl *ctrl)
{
int ret;
@@ -549,7 +553,12 @@ static void nvme_free_ns_head(struct kref *ref)
kfree(head);
}
-static void nvme_put_ns_head(struct nvme_ns_head *head)
+bool nvme_tryget_ns_head(struct nvme_ns_head *head)
+{
+ return kref_get_unless_zero(&head->ref);
+}
+
+void nvme_put_ns_head(struct nvme_ns_head *head)
{
kref_put(&head->ref, nvme_free_ns_head);
}
@@ -575,11 +584,12 @@ EXPORT_SYMBOL_NS_GPL(nvme_put_ns, NVME_TARGET_PASSTHRU);
static inline void nvme_clear_nvme_request(struct request *req)
{
- if (!(req->rq_flags & RQF_DONTPREP)) {
- nvme_req(req)->retries = 0;
- nvme_req(req)->flags = 0;
- req->rq_flags |= RQF_DONTPREP;
- }
+ struct nvme_command *cmd = nvme_req(req)->cmd;
+
+ memset(cmd, 0, sizeof(*cmd));
+ nvme_req(req)->retries = 0;
+ nvme_req(req)->flags = 0;
+ req->rq_flags |= RQF_DONTPREP;
}
static inline unsigned int nvme_req_op(struct nvme_command *cmd)
@@ -595,9 +605,12 @@ static inline void nvme_init_request(struct request *req,
else /* no queuedata implies admin queue */
req->timeout = NVME_ADMIN_TIMEOUT;
+ /* passthru commands should let the driver set the SGL flags */
+ cmd->common.flags &= ~NVME_CMD_SGL_ALL;
+
req->cmd_flags |= REQ_FAILFAST_DRIVER;
nvme_clear_nvme_request(req);
- nvme_req(req)->cmd = cmd;
+ memcpy(nvme_req(req)->cmd, cmd, sizeof(*cmd));
}
struct request *nvme_alloc_request(struct request_queue *q,
@@ -726,14 +739,6 @@ static void nvme_assign_write_stream(struct nvme_ctrl *ctrl,
req->q->write_hints[streamid] += blk_rq_bytes(req) >> 9;
}
-static void nvme_setup_passthrough(struct request *req,
- struct nvme_command *cmd)
-{
- memcpy(cmd, nvme_req(req)->cmd, sizeof(*cmd));
- /* passthru commands should let the driver set the SGL flags */
- cmd->common.flags &= ~NVME_CMD_SGL_ALL;
-}
-
static inline void nvme_setup_flush(struct nvme_ns *ns,
struct nvme_command *cmnd)
{
@@ -888,18 +893,18 @@ void nvme_cleanup_cmd(struct request *req)
}
EXPORT_SYMBOL_GPL(nvme_cleanup_cmd);
-blk_status_t nvme_setup_cmd(struct nvme_ns *ns, struct request *req,
- struct nvme_command *cmd)
+blk_status_t nvme_setup_cmd(struct nvme_ns *ns, struct request *req)
{
+ struct nvme_command *cmd = nvme_req(req)->cmd;
blk_status_t ret = BLK_STS_OK;
- nvme_clear_nvme_request(req);
+ if (!(req->rq_flags & RQF_DONTPREP))
+ nvme_clear_nvme_request(req);
- memset(cmd, 0, sizeof(*cmd));
switch (req_op(req)) {
case REQ_OP_DRV_IN:
case REQ_OP_DRV_OUT:
- nvme_setup_passthrough(req, cmd);
+ /* these are setup prior to execution in nvme_init_request() */
break;
case REQ_OP_FLUSH:
nvme_setup_flush(ns, cmd);
@@ -1020,40 +1025,6 @@ int nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
}
EXPORT_SYMBOL_GPL(nvme_submit_sync_cmd);
-static void *nvme_add_user_metadata(struct bio *bio, void __user *ubuf,
- unsigned len, u32 seed, bool write)
-{
- struct bio_integrity_payload *bip;
- int ret = -ENOMEM;
- void *buf;
-
- buf = kmalloc(len, GFP_KERNEL);
- if (!buf)
- goto out;
-
- ret = -EFAULT;
- if (write && copy_from_user(buf, ubuf, len))
- goto out_free_meta;
-
- bip = bio_integrity_alloc(bio, GFP_KERNEL, 1);
- if (IS_ERR(bip)) {
- ret = PTR_ERR(bip);
- goto out_free_meta;
- }
-
- bip->bip_iter.bi_size = len;
- bip->bip_iter.bi_sector = seed;
- ret = bio_integrity_add_page(bio, virt_to_page(buf), len,
- offset_in_page(buf));
- if (ret == len)
- return buf;
- ret = -ENOMEM;
-out_free_meta:
- kfree(buf);
-out:
- return ERR_PTR(ret);
-}
-
static u32 nvme_known_admin_effects(u8 opcode)
{
switch (opcode) {
@@ -1076,9 +1047,9 @@ u32 nvme_command_effects(struct nvme_ctrl *ctrl, struct nvme_ns *ns, u8 opcode)
if (ns->head->effects)
effects = le32_to_cpu(ns->head->effects->iocs[opcode]);
if (effects & ~(NVME_CMD_EFFECTS_CSUPP | NVME_CMD_EFFECTS_LBCC))
- dev_warn(ctrl->device,
- "IO command:%02x has unhandled effects:%08x\n",
- opcode, effects);
+ dev_warn_once(ctrl->device,
+ "IO command:%02x has unhandled effects:%08x\n",
+ opcode, effects);
return 0;
}
@@ -1120,7 +1091,7 @@ static void nvme_passthru_end(struct nvme_ctrl *ctrl, u32 effects)
mutex_unlock(&ctrl->scan_lock);
}
if (effects & NVME_CMD_EFFECTS_CCC)
- nvme_init_identify(ctrl);
+ nvme_init_ctrl_finish(ctrl);
if (effects & (NVME_CMD_EFFECTS_NIC | NVME_CMD_EFFECTS_NCC)) {
nvme_queue_scan(ctrl);
flush_work(&ctrl->scan_work);
@@ -1137,68 +1108,20 @@ void nvme_execute_passthru_rq(struct request *rq)
effects = nvme_passthru_start(ctrl, ns, cmd->common.opcode);
blk_execute_rq(disk, rq, 0);
- nvme_passthru_end(ctrl, effects);
+ if (effects) /* nothing to be done for zero cmd effects */
+ nvme_passthru_end(ctrl, effects);
}
EXPORT_SYMBOL_NS_GPL(nvme_execute_passthru_rq, NVME_TARGET_PASSTHRU);
-static int nvme_submit_user_cmd(struct request_queue *q,
- struct nvme_command *cmd, void __user *ubuffer,
- unsigned bufflen, void __user *meta_buffer, unsigned meta_len,
- u32 meta_seed, u64 *result, unsigned timeout)
+/*
+ * Recommended frequency for KATO commands per NVMe 1.4 section 7.12.1:
+ *
+ * The host should send Keep Alive commands at half of the Keep Alive Timeout
+ * accounting for transport roundtrip times [..].
+ */
+static void nvme_queue_keep_alive_work(struct nvme_ctrl *ctrl)
{
- bool write = nvme_is_write(cmd);
- struct nvme_ns *ns = q->queuedata;
- struct block_device *bdev = ns ? ns->disk->part0 : NULL;
- struct request *req;
- struct bio *bio = NULL;
- void *meta = NULL;
- int ret;
-
- req = nvme_alloc_request(q, cmd, 0);
- if (IS_ERR(req))
- return PTR_ERR(req);
-
- if (timeout)
- req->timeout = timeout;
- nvme_req(req)->flags |= NVME_REQ_USERCMD;
-
- if (ubuffer && bufflen) {
- ret = blk_rq_map_user(q, req, NULL, ubuffer, bufflen,
- GFP_KERNEL);
- if (ret)
- goto out;
- bio = req->bio;
- if (bdev)
- bio_set_dev(bio, bdev);
- if (bdev && meta_buffer && meta_len) {
- meta = nvme_add_user_metadata(bio, meta_buffer, meta_len,
- meta_seed, write);
- if (IS_ERR(meta)) {
- ret = PTR_ERR(meta);
- goto out_unmap;
- }
- req->cmd_flags |= REQ_INTEGRITY;
- }
- }
-
- nvme_execute_passthru_rq(req);
- if (nvme_req(req)->flags & NVME_REQ_CANCELLED)
- ret = -EINTR;
- else
- ret = nvme_req(req)->status;
- if (result)
- *result = le64_to_cpu(nvme_req(req)->result.u64);
- if (meta && !ret && !write) {
- if (copy_to_user(meta_buffer, meta, meta_len))
- ret = -EFAULT;
- }
- kfree(meta);
- out_unmap:
- if (bio)
- blk_rq_unmap_user(bio);
- out:
- blk_mq_free_request(req);
- return ret;
+ queue_delayed_work(nvme_wq, &ctrl->ka_work, ctrl->kato * HZ / 2);
}
static void nvme_keep_alive_end_io(struct request *rq, blk_status_t status)
@@ -1223,7 +1146,7 @@ static void nvme_keep_alive_end_io(struct request *rq, blk_status_t status)
startka = true;
spin_unlock_irqrestore(&ctrl->lock, flags);
if (startka)
- queue_delayed_work(nvme_wq, &ctrl->ka_work, ctrl->kato * HZ);
+ nvme_queue_keep_alive_work(ctrl);
}
static void nvme_keep_alive_work(struct work_struct *work)
@@ -1237,7 +1160,7 @@ static void nvme_keep_alive_work(struct work_struct *work)
dev_dbg(ctrl->device,
"reschedule traffic based keep-alive timer\n");
ctrl->comp_seen = false;
- queue_delayed_work(nvme_wq, &ctrl->ka_work, ctrl->kato * HZ);
+ nvme_queue_keep_alive_work(ctrl);
return;
}
@@ -1260,7 +1183,7 @@ static void nvme_start_keep_alive(struct nvme_ctrl *ctrl)
if (unlikely(ctrl->kato == 0))
return;
- queue_delayed_work(nvme_wq, &ctrl->ka_work, ctrl->kato * HZ);
+ nvme_queue_keep_alive_work(ctrl);
}
void nvme_stop_keep_alive(struct nvme_ctrl *ctrl)
@@ -1536,170 +1459,6 @@ static void nvme_enable_aen(struct nvme_ctrl *ctrl)
}
/*
- * Convert integer values from ioctl structures to user pointers, silently
- * ignoring the upper bits in the compat case to match behaviour of 32-bit
- * kernels.
- */
-static void __user *nvme_to_user_ptr(uintptr_t ptrval)
-{
- if (in_compat_syscall())
- ptrval = (compat_uptr_t)ptrval;
- return (void __user *)ptrval;
-}
-
-static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio)
-{
- struct nvme_user_io io;
- struct nvme_command c;
- unsigned length, meta_len;
- void __user *metadata;
-
- if (copy_from_user(&io, uio, sizeof(io)))
- return -EFAULT;
- if (io.flags)
- return -EINVAL;
-
- switch (io.opcode) {
- case nvme_cmd_write:
- case nvme_cmd_read:
- case nvme_cmd_compare:
- break;
- default:
- return -EINVAL;
- }
-
- length = (io.nblocks + 1) << ns->lba_shift;
-
- if ((io.control & NVME_RW_PRINFO_PRACT) &&
- ns->ms == sizeof(struct t10_pi_tuple)) {
- /*
- * Protection information is stripped/inserted by the
- * controller.
- */
- if (nvme_to_user_ptr(io.metadata))
- return -EINVAL;
- meta_len = 0;
- metadata = NULL;
- } else {
- meta_len = (io.nblocks + 1) * ns->ms;
- metadata = nvme_to_user_ptr(io.metadata);
- }
-
- if (ns->features & NVME_NS_EXT_LBAS) {
- length += meta_len;
- meta_len = 0;
- } else if (meta_len) {
- if ((io.metadata & 3) || !io.metadata)
- return -EINVAL;
- }
-
- memset(&c, 0, sizeof(c));
- c.rw.opcode = io.opcode;
- c.rw.flags = io.flags;
- c.rw.nsid = cpu_to_le32(ns->head->ns_id);
- c.rw.slba = cpu_to_le64(io.slba);
- c.rw.length = cpu_to_le16(io.nblocks);
- c.rw.control = cpu_to_le16(io.control);
- c.rw.dsmgmt = cpu_to_le32(io.dsmgmt);
- c.rw.reftag = cpu_to_le32(io.reftag);
- c.rw.apptag = cpu_to_le16(io.apptag);
- c.rw.appmask = cpu_to_le16(io.appmask);
-
- return nvme_submit_user_cmd(ns->queue, &c,
- nvme_to_user_ptr(io.addr), length,
- metadata, meta_len, lower_32_bits(io.slba), NULL, 0);
-}
-
-static int nvme_user_cmd(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
- struct nvme_passthru_cmd __user *ucmd)
-{
- struct nvme_passthru_cmd cmd;
- struct nvme_command c;
- unsigned timeout = 0;
- u64 result;
- int status;
-
- if (!capable(CAP_SYS_ADMIN))
- return -EACCES;
- if (copy_from_user(&cmd, ucmd, sizeof(cmd)))
- return -EFAULT;
- if (cmd.flags)
- return -EINVAL;
-
- memset(&c, 0, sizeof(c));
- c.common.opcode = cmd.opcode;
- c.common.flags = cmd.flags;
- c.common.nsid = cpu_to_le32(cmd.nsid);
- c.common.cdw2[0] = cpu_to_le32(cmd.cdw2);
- c.common.cdw2[1] = cpu_to_le32(cmd.cdw3);
- c.common.cdw10 = cpu_to_le32(cmd.cdw10);
- c.common.cdw11 = cpu_to_le32(cmd.cdw11);
- c.common.cdw12 = cpu_to_le32(cmd.cdw12);
- c.common.cdw13 = cpu_to_le32(cmd.cdw13);
- c.common.cdw14 = cpu_to_le32(cmd.cdw14);
- c.common.cdw15 = cpu_to_le32(cmd.cdw15);
-
- if (cmd.timeout_ms)
- timeout = msecs_to_jiffies(cmd.timeout_ms);
-
- status = nvme_submit_user_cmd(ns ? ns->queue : ctrl->admin_q, &c,
- nvme_to_user_ptr(cmd.addr), cmd.data_len,
- nvme_to_user_ptr(cmd.metadata), cmd.metadata_len,
- 0, &result, timeout);
-
- if (status >= 0) {
- if (put_user(result, &ucmd->result))
- return -EFAULT;
- }
-
- return status;
-}
-
-static int nvme_user_cmd64(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
- struct nvme_passthru_cmd64 __user *ucmd)
-{
- struct nvme_passthru_cmd64 cmd;
- struct nvme_command c;
- unsigned timeout = 0;
- int status;
-
- if (!capable(CAP_SYS_ADMIN))
- return -EACCES;
- if (copy_from_user(&cmd, ucmd, sizeof(cmd)))
- return -EFAULT;
- if (cmd.flags)
- return -EINVAL;
-
- memset(&c, 0, sizeof(c));
- c.common.opcode = cmd.opcode;
- c.common.flags = cmd.flags;
- c.common.nsid = cpu_to_le32(cmd.nsid);
- c.common.cdw2[0] = cpu_to_le32(cmd.cdw2);
- c.common.cdw2[1] = cpu_to_le32(cmd.cdw3);
- c.common.cdw10 = cpu_to_le32(cmd.cdw10);
- c.common.cdw11 = cpu_to_le32(cmd.cdw11);
- c.common.cdw12 = cpu_to_le32(cmd.cdw12);
- c.common.cdw13 = cpu_to_le32(cmd.cdw13);
- c.common.cdw14 = cpu_to_le32(cmd.cdw14);
- c.common.cdw15 = cpu_to_le32(cmd.cdw15);
-
- if (cmd.timeout_ms)
- timeout = msecs_to_jiffies(cmd.timeout_ms);
-
- status = nvme_submit_user_cmd(ns ? ns->queue : ctrl->admin_q, &c,
- nvme_to_user_ptr(cmd.addr), cmd.data_len,
- nvme_to_user_ptr(cmd.metadata), cmd.metadata_len,
- 0, &cmd.result, timeout);
-
- if (status >= 0) {
- if (put_user(cmd.result, &ucmd->result))
- return -EFAULT;
- }
-
- return status;
-}
-
-/*
* Issue ioctl requests on the first available path. Note that unlike normal
* block layer requests we will not retry failed request on another controller.
*/
@@ -1729,136 +1488,12 @@ void nvme_put_ns_from_disk(struct nvme_ns_head *head, int idx)
srcu_read_unlock(&head->srcu, idx);
}
-static bool is_ctrl_ioctl(unsigned int cmd)
-{
- if (cmd == NVME_IOCTL_ADMIN_CMD || cmd == NVME_IOCTL_ADMIN64_CMD)
- return true;
- if (is_sed_ioctl(cmd))
- return true;
- return false;
-}
-
-static int nvme_handle_ctrl_ioctl(struct nvme_ns *ns, unsigned int cmd,
- void __user *argp,
- struct nvme_ns_head *head,
- int srcu_idx)
-{
- struct nvme_ctrl *ctrl = ns->ctrl;
- int ret;
-
- nvme_get_ctrl(ns->ctrl);
- nvme_put_ns_from_disk(head, srcu_idx);
-
- switch (cmd) {
- case NVME_IOCTL_ADMIN_CMD:
- ret = nvme_user_cmd(ctrl, NULL, argp);
- break;
- case NVME_IOCTL_ADMIN64_CMD:
- ret = nvme_user_cmd64(ctrl, NULL, argp);
- break;
- default:
- ret = sed_ioctl(ctrl->opal_dev, cmd, argp);
- break;
- }
- nvme_put_ctrl(ctrl);
- return ret;
-}
-
-static int nvme_ioctl(struct block_device *bdev, fmode_t mode,
- unsigned int cmd, unsigned long arg)
-{
- struct nvme_ns_head *head = NULL;
- void __user *argp = (void __user *)arg;
- struct nvme_ns *ns;
- int srcu_idx, ret;
-
- ns = nvme_get_ns_from_disk(bdev->bd_disk, &head, &srcu_idx);
- if (unlikely(!ns))
- return -EWOULDBLOCK;
-
- /*
- * Handle ioctls that apply to the controller instead of the namespace
- * seperately and drop the ns SRCU reference early. This avoids a
- * deadlock when deleting namespaces using the passthrough interface.
- */
- if (is_ctrl_ioctl(cmd))
- return nvme_handle_ctrl_ioctl(ns, cmd, argp, head, srcu_idx);
-
- switch (cmd) {
- case NVME_IOCTL_ID:
- force_successful_syscall_return();
- ret = ns->head->ns_id;
- break;
- case NVME_IOCTL_IO_CMD:
- ret = nvme_user_cmd(ns->ctrl, ns, argp);
- break;
- case NVME_IOCTL_SUBMIT_IO:
- ret = nvme_submit_io(ns, argp);
- break;
- case NVME_IOCTL_IO64_CMD:
- ret = nvme_user_cmd64(ns->ctrl, ns, argp);
- break;
- default:
- if (ns->ndev)
- ret = nvme_nvm_ioctl(ns, cmd, arg);
- else
- ret = -ENOTTY;
- }
-
- nvme_put_ns_from_disk(head, srcu_idx);
- return ret;
-}
-
-#ifdef CONFIG_COMPAT
-struct nvme_user_io32 {
- __u8 opcode;
- __u8 flags;
- __u16 control;
- __u16 nblocks;
- __u16 rsvd;
- __u64 metadata;
- __u64 addr;
- __u64 slba;
- __u32 dsmgmt;
- __u32 reftag;
- __u16 apptag;
- __u16 appmask;
-} __attribute__((__packed__));
-
-#define NVME_IOCTL_SUBMIT_IO32 _IOW('N', 0x42, struct nvme_user_io32)
-
-static int nvme_compat_ioctl(struct block_device *bdev, fmode_t mode,
- unsigned int cmd, unsigned long arg)
+static int nvme_ns_open(struct nvme_ns *ns)
{
- /*
- * Corresponds to the difference of NVME_IOCTL_SUBMIT_IO
- * between 32 bit programs and 64 bit kernel.
- * The cause is that the results of sizeof(struct nvme_user_io),
- * which is used to define NVME_IOCTL_SUBMIT_IO,
- * are not same between 32 bit compiler and 64 bit compiler.
- * NVME_IOCTL_SUBMIT_IO32 is for 64 bit kernel handling
- * NVME_IOCTL_SUBMIT_IO issued from 32 bit programs.
- * Other IOCTL numbers are same between 32 bit and 64 bit.
- * So there is nothing to do regarding to other IOCTL numbers.
- */
- if (cmd == NVME_IOCTL_SUBMIT_IO32)
- return nvme_ioctl(bdev, mode, NVME_IOCTL_SUBMIT_IO, arg);
- return nvme_ioctl(bdev, mode, cmd, arg);
-}
-#else
-#define nvme_compat_ioctl NULL
-#endif /* CONFIG_COMPAT */
-
-static int nvme_open(struct block_device *bdev, fmode_t mode)
-{
- struct nvme_ns *ns = bdev->bd_disk->private_data;
-
-#ifdef CONFIG_NVME_MULTIPATH
/* should never be called due to GENHD_FL_HIDDEN */
- if (WARN_ON_ONCE(ns->head->disk))
+ if (WARN_ON_ONCE(nvme_ns_head_multipath(ns->head)))
goto fail;
-#endif
if (!kref_get_unless_zero(&ns->kref))
goto fail;
if (!try_module_get(ns->ctrl->ops->module))
@@ -1872,15 +1507,24 @@ fail:
return -ENXIO;
}
-static void nvme_release(struct gendisk *disk, fmode_t mode)
+static void nvme_ns_release(struct nvme_ns *ns)
{
- struct nvme_ns *ns = disk->private_data;
module_put(ns->ctrl->ops->module);
nvme_put_ns(ns);
}
-static int nvme_getgeo(struct block_device *bdev, struct hd_geometry *geo)
+static int nvme_open(struct block_device *bdev, fmode_t mode)
+{
+ return nvme_ns_open(bdev->bd_disk->private_data);
+}
+
+static void nvme_release(struct gendisk *disk, fmode_t mode)
+{
+ nvme_ns_release(disk->private_data);
+}
+
+int nvme_getgeo(struct block_device *bdev, struct hd_geometry *geo)
{
/* some standard values */
geo->heads = 1 << 6;
@@ -1929,7 +1573,7 @@ static void nvme_config_discard(struct gendisk *disk, struct nvme_ns *ns)
struct request_queue *queue = disk->queue;
u32 size = queue_logical_block_size(queue);
- if (!(ctrl->oncs & NVME_CTRL_ONCS_DSM)) {
+ if (ctrl->max_discard_sectors == 0) {
blk_queue_flag_clear(QUEUE_FLAG_DISCARD, queue);
return;
}
@@ -1947,27 +1591,13 @@ static void nvme_config_discard(struct gendisk *disk, struct nvme_ns *ns)
if (blk_queue_flag_test_and_set(QUEUE_FLAG_DISCARD, queue))
return;
- blk_queue_max_discard_sectors(queue, UINT_MAX);
- blk_queue_max_discard_segments(queue, NVME_DSM_MAX_RANGES);
+ blk_queue_max_discard_sectors(queue, ctrl->max_discard_sectors);
+ blk_queue_max_discard_segments(queue, ctrl->max_discard_segments);
if (ctrl->quirks & NVME_QUIRK_DEALLOCATE_ZEROES)
blk_queue_max_write_zeroes_sectors(queue, UINT_MAX);
}
-/*
- * Even though NVMe spec explicitly states that MDTS is not applicable to the
- * write-zeroes, we are cautious and limit the size to the controllers
- * max_hw_sectors value, which is based on the MDTS field and possibly other
- * limiting factors.
- */
-static void nvme_config_write_zeroes(struct request_queue *q,
- struct nvme_ctrl *ctrl)
-{
- if ((ctrl->oncs & NVME_CTRL_ONCS_WRITE_ZEROES) &&
- !(ctrl->quirks & NVME_QUIRK_DISABLE_WRITE_ZEROES))
- blk_queue_max_write_zeroes_sectors(q, ctrl->max_hw_sectors);
-}
-
static bool nvme_ns_ids_valid(struct nvme_ns_ids *ids)
{
return !uuid_is_null(&ids->uuid) ||
@@ -2137,7 +1767,8 @@ static void nvme_update_disk_info(struct gendisk *disk,
set_capacity_and_notify(disk, capacity);
nvme_config_discard(disk, ns);
- nvme_config_write_zeroes(disk->queue, ns->ctrl);
+ blk_queue_max_write_zeroes_sectors(disk->queue,
+ ns->ctrl->max_zeroes_sectors);
set_disk_ro(disk, (id->nsattr & NVME_NS_ATTR_RO) ||
test_bit(NVME_NS_FORCE_RO, &ns->flags));
@@ -2206,11 +1837,10 @@ static int nvme_update_ns_info(struct nvme_ns *ns, struct nvme_id_ns *id)
if (blk_queue_is_zoned(ns->queue)) {
ret = nvme_revalidate_zones(ns);
if (ret && !nvme_first_scan(ns->disk))
- return ret;
+ goto out;
}
-#ifdef CONFIG_NVME_MULTIPATH
- if (ns->head->disk) {
+ if (nvme_ns_head_multipath(ns->head)) {
blk_mq_freeze_queue(ns->head->disk->queue);
nvme_update_disk_info(ns->head->disk, ns, id);
blk_stack_limits(&ns->head->disk->queue->limits,
@@ -2218,11 +1848,19 @@ static int nvme_update_ns_info(struct nvme_ns *ns, struct nvme_id_ns *id)
blk_queue_update_readahead(ns->head->disk->queue);
blk_mq_unfreeze_queue(ns->head->disk->queue);
}
-#endif
return 0;
out_unfreeze:
blk_mq_unfreeze_queue(ns->disk->queue);
+out:
+ /*
+ * If probing fails due an unsupported feature, hide the block device,
+ * but still allow other access.
+ */
+ if (ret == -ENODEV) {
+ ns->disk->flags |= GENHD_FL_HIDDEN;
+ ret = 0;
+ }
return ret;
}
@@ -2303,22 +1941,25 @@ static int nvme_pr_preempt(struct block_device *bdev, u64 old, u64 new,
enum pr_type type, bool abort)
{
u32 cdw10 = nvme_pr_type(type) << 8 | (abort ? 2 : 1);
+
return nvme_pr_command(bdev, cdw10, old, new, nvme_cmd_resv_acquire);
}
static int nvme_pr_clear(struct block_device *bdev, u64 key)
{
u32 cdw10 = 1 | (key ? 1 << 3 : 0);
+
return nvme_pr_command(bdev, cdw10, key, 0, nvme_cmd_resv_register);
}
static int nvme_pr_release(struct block_device *bdev, u64 key, enum pr_type type)
{
u32 cdw10 = nvme_pr_type(type) << 8 | (key ? 1 << 3 : 0);
+
return nvme_pr_command(bdev, cdw10, key, 0, nvme_cmd_resv_release);
}
-static const struct pr_ops nvme_pr_ops = {
+const struct pr_ops nvme_pr_ops = {
.pr_register = nvme_pr_register,
.pr_reserve = nvme_pr_reserve,
.pr_release = nvme_pr_release,
@@ -2351,7 +1992,6 @@ EXPORT_SYMBOL_GPL(nvme_sec_submit);
static const struct block_device_operations nvme_bdev_ops = {
.owner = THIS_MODULE,
.ioctl = nvme_ioctl,
- .compat_ioctl = nvme_compat_ioctl,
.open = nvme_open,
.release = nvme_release,
.getgeo = nvme_getgeo,
@@ -2360,31 +2000,25 @@ static const struct block_device_operations nvme_bdev_ops = {
};
#ifdef CONFIG_NVME_MULTIPATH
-static int nvme_ns_head_open(struct block_device *bdev, fmode_t mode)
+struct nvme_ctrl *nvme_find_get_live_ctrl(struct nvme_subsystem *subsys)
{
- struct nvme_ns_head *head = bdev->bd_disk->private_data;
-
- if (!kref_get_unless_zero(&head->ref))
- return -ENXIO;
- return 0;
-}
+ struct nvme_ctrl *ctrl;
+ int ret;
-static void nvme_ns_head_release(struct gendisk *disk, fmode_t mode)
-{
- nvme_put_ns_head(disk->private_data);
+ ret = mutex_lock_killable(&nvme_subsystems_lock);
+ if (ret)
+ return ERR_PTR(ret);
+ list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) {
+ if (ctrl->state == NVME_CTRL_LIVE)
+ goto found;
+ }
+ mutex_unlock(&nvme_subsystems_lock);
+ return ERR_PTR(-EWOULDBLOCK);
+found:
+ nvme_get_ctrl(ctrl);
+ mutex_unlock(&nvme_subsystems_lock);
+ return ctrl;
}
-
-const struct block_device_operations nvme_ns_head_ops = {
- .owner = THIS_MODULE,
- .submit_bio = nvme_ns_head_submit_bio,
- .open = nvme_ns_head_open,
- .release = nvme_ns_head_release,
- .ioctl = nvme_ioctl,
- .compat_ioctl = nvme_compat_ioctl,
- .getgeo = nvme_getgeo,
- .report_zones = nvme_report_zones,
- .pr_ops = &nvme_pr_ops,
-};
#endif /* CONFIG_NVME_MULTIPATH */
static int nvme_wait_ready(struct nvme_ctrl *ctrl, u64 cap, bool enabled)
@@ -2541,28 +2175,28 @@ static int nvme_configure_acre(struct nvme_ctrl *ctrl)
return ret;
}
+/*
+ * APST (Autonomous Power State Transition) lets us program a table of power
+ * state transitions that the controller will perform automatically.
+ * We configure it with a simple heuristic: we are willing to spend at most 2%
+ * of the time transitioning between power states. Therefore, when running in
+ * any given state, we will enter the next lower-power non-operational state
+ * after waiting 50 * (enlat + exlat) microseconds, as long as that state's exit
+ * latency is under the requested maximum latency.
+ *
+ * We will not autonomously enter any non-operational state for which the total
+ * latency exceeds ps_max_latency_us.
+ *
+ * Users can set ps_max_latency_us to zero to turn off APST.
+ */
static int nvme_configure_apst(struct nvme_ctrl *ctrl)
{
- /*
- * APST (Autonomous Power State Transition) lets us program a
- * table of power state transitions that the controller will
- * perform automatically. We configure it with a simple
- * heuristic: we are willing to spend at most 2% of the time
- * transitioning between power states. Therefore, when running
- * in any given state, we will enter the next lower-power
- * non-operational state after waiting 50 * (enlat + exlat)
- * microseconds, as long as that state's exit latency is under
- * the requested maximum latency.
- *
- * We will not autonomously enter any non-operational state for
- * which the total latency exceeds ps_max_latency_us. Users
- * can set ps_max_latency_us to zero to turn off APST.
- */
-
- unsigned apste;
struct nvme_feat_auto_pst *table;
+ unsigned apste = 0;
u64 max_lat_us = 0;
+ __le64 target = 0;
int max_ps = -1;
+ int state;
int ret;
/*
@@ -2583,83 +2217,72 @@ static int nvme_configure_apst(struct nvme_ctrl *ctrl)
if (!ctrl->apst_enabled || ctrl->ps_max_latency_us == 0) {
/* Turn off APST. */
- apste = 0;
dev_dbg(ctrl->device, "APST disabled\n");
- } else {
- __le64 target = cpu_to_le64(0);
- int state;
-
- /*
- * Walk through all states from lowest- to highest-power.
- * According to the spec, lower-numbered states use more
- * power. NPSS, despite the name, is the index of the
- * lowest-power state, not the number of states.
- */
- for (state = (int)ctrl->npss; state >= 0; state--) {
- u64 total_latency_us, exit_latency_us, transition_ms;
-
- if (target)
- table->entries[state] = target;
-
- /*
- * Don't allow transitions to the deepest state
- * if it's quirked off.
- */
- if (state == ctrl->npss &&
- (ctrl->quirks & NVME_QUIRK_NO_DEEPEST_PS))
- continue;
-
- /*
- * Is this state a useful non-operational state for
- * higher-power states to autonomously transition to?
- */
- if (!(ctrl->psd[state].flags &
- NVME_PS_FLAGS_NON_OP_STATE))
- continue;
-
- exit_latency_us =
- (u64)le32_to_cpu(ctrl->psd[state].exit_lat);
- if (exit_latency_us > ctrl->ps_max_latency_us)
- continue;
+ goto done;
+ }
- total_latency_us =
- exit_latency_us +
- le32_to_cpu(ctrl->psd[state].entry_lat);
+ /*
+ * Walk through all states from lowest- to highest-power.
+ * According to the spec, lower-numbered states use more power. NPSS,
+ * despite the name, is the index of the lowest-power state, not the
+ * number of states.
+ */
+ for (state = (int)ctrl->npss; state >= 0; state--) {
+ u64 total_latency_us, exit_latency_us, transition_ms;
- /*
- * This state is good. Use it as the APST idle
- * target for higher power states.
- */
- transition_ms = total_latency_us + 19;
- do_div(transition_ms, 20);
- if (transition_ms > (1 << 24) - 1)
- transition_ms = (1 << 24) - 1;
+ if (target)
+ table->entries[state] = target;
- target = cpu_to_le64((state << 3) |
- (transition_ms << 8));
+ /*
+ * Don't allow transitions to the deepest state if it's quirked
+ * off.
+ */
+ if (state == ctrl->npss &&
+ (ctrl->quirks & NVME_QUIRK_NO_DEEPEST_PS))
+ continue;
- if (max_ps == -1)
- max_ps = state;
+ /*
+ * Is this state a useful non-operational state for higher-power
+ * states to autonomously transition to?
+ */
+ if (!(ctrl->psd[state].flags & NVME_PS_FLAGS_NON_OP_STATE))
+ continue;
- if (total_latency_us > max_lat_us)
- max_lat_us = total_latency_us;
- }
+ exit_latency_us = (u64)le32_to_cpu(ctrl->psd[state].exit_lat);
+ if (exit_latency_us > ctrl->ps_max_latency_us)
+ continue;
- apste = 1;
+ total_latency_us = exit_latency_us +
+ le32_to_cpu(ctrl->psd[state].entry_lat);
- if (max_ps == -1) {
- dev_dbg(ctrl->device, "APST enabled but no non-operational states are available\n");
- } else {
- dev_dbg(ctrl->device, "APST enabled: max PS = %d, max round-trip latency = %lluus, table = %*phN\n",
- max_ps, max_lat_us, (int)sizeof(*table), table);
- }
+ /*
+ * This state is good. Use it as the APST idle target for
+ * higher power states.
+ */
+ transition_ms = total_latency_us + 19;
+ do_div(transition_ms, 20);
+ if (transition_ms > (1 << 24) - 1)
+ transition_ms = (1 << 24) - 1;
+
+ target = cpu_to_le64((state << 3) | (transition_ms << 8));
+ if (max_ps == -1)
+ max_ps = state;
+ if (total_latency_us > max_lat_us)
+ max_lat_us = total_latency_us;
}
+ if (max_ps == -1)
+ dev_dbg(ctrl->device, "APST enabled but no non-operational states are available\n");
+ else
+ dev_dbg(ctrl->device, "APST enabled: max PS = %d, max round-trip latency = %lluus, table = %*phN\n",
+ max_ps, max_lat_us, (int)sizeof(*table), table);
+ apste = 1;
+
+done:
ret = nvme_set_features(ctrl, NVME_FEAT_AUTO_PST, apste,
table, sizeof(*table), NULL);
if (ret)
dev_err(ctrl->device, "failed to set APST feature (%d)\n", ret);
-
kfree(table);
return ret;
}
@@ -2681,7 +2304,8 @@ static void nvme_set_latency_tolerance(struct device *dev, s32 val)
if (ctrl->ps_max_latency_us != latency) {
ctrl->ps_max_latency_us = latency;
- nvme_configure_apst(ctrl);
+ if (ctrl->state == NVME_CTRL_LIVE)
+ nvme_configure_apst(ctrl);
}
}
@@ -2854,8 +2478,8 @@ static ssize_t subsys_##field##_show(struct device *dev, \
{ \
struct nvme_subsystem *subsys = \
container_of(dev, struct nvme_subsystem, dev); \
- return sprintf(buf, "%.*s\n", \
- (int)sizeof(subsys->field), subsys->field); \
+ return sysfs_emit(buf, "%.*s\n", \
+ (int)sizeof(subsys->field), subsys->field); \
} \
static SUBSYS_ATTR_RO(field, S_IRUGO, subsys_##field##_show);
@@ -3038,28 +2662,74 @@ out:
return 0;
}
-/*
- * Initialize the cached copies of the Identify data and various controller
- * register in our nvme_ctrl structure. This should be called as soon as
- * the admin queue is fully up and running.
- */
-int nvme_init_identify(struct nvme_ctrl *ctrl)
+static inline u32 nvme_mps_to_sectors(struct nvme_ctrl *ctrl, u32 units)
{
- struct nvme_id_ctrl *id;
- int ret, page_shift;
- u32 max_hw_sectors;
- bool prev_apst_enabled;
+ u32 page_shift = NVME_CAP_MPSMIN(ctrl->cap) + 12, val;
- ret = ctrl->ops->reg_read32(ctrl, NVME_REG_VS, &ctrl->vs);
- if (ret) {
- dev_err(ctrl->device, "Reading VS failed (%d)\n", ret);
- return ret;
+ if (check_shl_overflow(1U, units + page_shift - 9, &val))
+ return UINT_MAX;
+ return val;
+}
+
+static int nvme_init_non_mdts_limits(struct nvme_ctrl *ctrl)
+{
+ struct nvme_command c = { };
+ struct nvme_id_ctrl_nvm *id;
+ int ret;
+
+ if (ctrl->oncs & NVME_CTRL_ONCS_DSM) {
+ ctrl->max_discard_sectors = UINT_MAX;
+ ctrl->max_discard_segments = NVME_DSM_MAX_RANGES;
+ } else {
+ ctrl->max_discard_sectors = 0;
+ ctrl->max_discard_segments = 0;
}
- page_shift = NVME_CAP_MPSMIN(ctrl->cap) + 12;
- ctrl->sqsize = min_t(u16, NVME_CAP_MQES(ctrl->cap), ctrl->sqsize);
- if (ctrl->vs >= NVME_VS(1, 1, 0))
- ctrl->subsystem = NVME_CAP_NSSRC(ctrl->cap);
+ /*
+ * Even though NVMe spec explicitly states that MDTS is not applicable
+ * to the write-zeroes, we are cautious and limit the size to the
+ * controllers max_hw_sectors value, which is based on the MDTS field
+ * and possibly other limiting factors.
+ */
+ if ((ctrl->oncs & NVME_CTRL_ONCS_WRITE_ZEROES) &&
+ !(ctrl->quirks & NVME_QUIRK_DISABLE_WRITE_ZEROES))
+ ctrl->max_zeroes_sectors = ctrl->max_hw_sectors;
+ else
+ ctrl->max_zeroes_sectors = 0;
+
+ if (nvme_ctrl_limited_cns(ctrl))
+ return 0;
+
+ id = kzalloc(sizeof(*id), GFP_KERNEL);
+ if (!id)
+ return 0;
+
+ c.identify.opcode = nvme_admin_identify;
+ c.identify.cns = NVME_ID_CNS_CS_CTRL;
+ c.identify.csi = NVME_CSI_NVM;
+
+ ret = nvme_submit_sync_cmd(ctrl->admin_q, &c, id, sizeof(*id));
+ if (ret)
+ goto free_data;
+
+ if (id->dmrl)
+ ctrl->max_discard_segments = id->dmrl;
+ if (id->dmrsl)
+ ctrl->max_discard_sectors = le32_to_cpu(id->dmrsl);
+ if (id->wzsl)
+ ctrl->max_zeroes_sectors = nvme_mps_to_sectors(ctrl, id->wzsl);
+
+free_data:
+ kfree(id);
+ return ret;
+}
+
+static int nvme_init_identify(struct nvme_ctrl *ctrl)
+{
+ struct nvme_id_ctrl *id;
+ u32 max_hw_sectors;
+ bool prev_apst_enabled;
+ int ret;
ret = nvme_identify_ctrl(ctrl, &id);
if (ret) {
@@ -3077,7 +2747,7 @@ int nvme_init_identify(struct nvme_ctrl *ctrl)
ctrl->cntlid = le16_to_cpu(id->cntlid);
if (!ctrl->identified) {
- int i;
+ unsigned int i;
ret = nvme_init_subsystem(ctrl, id);
if (ret)
@@ -3116,7 +2786,7 @@ int nvme_init_identify(struct nvme_ctrl *ctrl)
atomic_set(&ctrl->abort_limit, id->acl + 1);
ctrl->vwc = id->vwc;
if (id->mdts)
- max_hw_sectors = 1 << (id->mdts + page_shift - 9);
+ max_hw_sectors = nvme_mps_to_sectors(ctrl, id->mdts);
else
max_hw_sectors = UINT_MAX;
ctrl->max_hw_sectors =
@@ -3190,20 +2860,51 @@ int nvme_init_identify(struct nvme_ctrl *ctrl)
}
ret = nvme_mpath_init(ctrl, id);
- kfree(id);
-
if (ret < 0)
- return ret;
+ goto out_free;
if (ctrl->apst_enabled && !prev_apst_enabled)
dev_pm_qos_expose_latency_tolerance(ctrl->device);
else if (!ctrl->apst_enabled && prev_apst_enabled)
dev_pm_qos_hide_latency_tolerance(ctrl->device);
+out_free:
+ kfree(id);
+ return ret;
+}
+
+/*
+ * Initialize the cached copies of the Identify data and various controller
+ * register in our nvme_ctrl structure. This should be called as soon as
+ * the admin queue is fully up and running.
+ */
+int nvme_init_ctrl_finish(struct nvme_ctrl *ctrl)
+{
+ int ret;
+
+ ret = ctrl->ops->reg_read32(ctrl, NVME_REG_VS, &ctrl->vs);
+ if (ret) {
+ dev_err(ctrl->device, "Reading VS failed (%d)\n", ret);
+ return ret;
+ }
+
+ ctrl->sqsize = min_t(u16, NVME_CAP_MQES(ctrl->cap), ctrl->sqsize);
+
+ if (ctrl->vs >= NVME_VS(1, 1, 0))
+ ctrl->subsystem = NVME_CAP_NSSRC(ctrl->cap);
+
+ ret = nvme_init_identify(ctrl);
+ if (ret)
+ return ret;
+
+ ret = nvme_init_non_mdts_limits(ctrl);
+ if (ret < 0)
+ return ret;
+
ret = nvme_configure_apst(ctrl);
if (ret < 0)
return ret;
-
+
ret = nvme_configure_timestamp(ctrl);
if (ret < 0)
return ret;
@@ -3225,12 +2926,8 @@ int nvme_init_identify(struct nvme_ctrl *ctrl)
ctrl->identified = true;
return 0;
-
-out_free:
- kfree(id);
- return ret;
}
-EXPORT_SYMBOL_GPL(nvme_init_identify);
+EXPORT_SYMBOL_GPL(nvme_init_ctrl_finish);
static int nvme_dev_open(struct inode *inode, struct file *file)
{
@@ -3264,65 +2961,6 @@ static int nvme_dev_release(struct inode *inode, struct file *file)
return 0;
}
-static int nvme_dev_user_cmd(struct nvme_ctrl *ctrl, void __user *argp)
-{
- struct nvme_ns *ns;
- int ret;
-
- down_read(&ctrl->namespaces_rwsem);
- if (list_empty(&ctrl->namespaces)) {
- ret = -ENOTTY;
- goto out_unlock;
- }
-
- ns = list_first_entry(&ctrl->namespaces, struct nvme_ns, list);
- if (ns != list_last_entry(&ctrl->namespaces, struct nvme_ns, list)) {
- dev_warn(ctrl->device,
- "NVME_IOCTL_IO_CMD not supported when multiple namespaces present!\n");
- ret = -EINVAL;
- goto out_unlock;
- }
-
- dev_warn(ctrl->device,
- "using deprecated NVME_IOCTL_IO_CMD ioctl on the char device!\n");
- kref_get(&ns->kref);
- up_read(&ctrl->namespaces_rwsem);
-
- ret = nvme_user_cmd(ctrl, ns, argp);
- nvme_put_ns(ns);
- return ret;
-
-out_unlock:
- up_read(&ctrl->namespaces_rwsem);
- return ret;
-}
-
-static long nvme_dev_ioctl(struct file *file, unsigned int cmd,
- unsigned long arg)
-{
- struct nvme_ctrl *ctrl = file->private_data;
- void __user *argp = (void __user *)arg;
-
- switch (cmd) {
- case NVME_IOCTL_ADMIN_CMD:
- return nvme_user_cmd(ctrl, NULL, argp);
- case NVME_IOCTL_ADMIN64_CMD:
- return nvme_user_cmd64(ctrl, NULL, argp);
- case NVME_IOCTL_IO_CMD:
- return nvme_dev_user_cmd(ctrl, argp);
- case NVME_IOCTL_RESET:
- dev_warn(ctrl->device, "resetting controller\n");
- return nvme_reset_ctrl_sync(ctrl);
- case NVME_IOCTL_SUBSYS_RESET:
- return nvme_reset_subsystem(ctrl);
- case NVME_IOCTL_RESCAN:
- nvme_queue_scan(ctrl);
- return 0;
- default:
- return -ENOTTY;
- }
-}
-
static const struct file_operations nvme_dev_fops = {
.owner = THIS_MODULE,
.open = nvme_dev_open,
@@ -3376,13 +3014,13 @@ static ssize_t wwid_show(struct device *dev, struct device_attribute *attr,
int model_len = sizeof(subsys->model);
if (!uuid_is_null(&ids->uuid))
- return sprintf(buf, "uuid.%pU\n", &ids->uuid);
+ return sysfs_emit(buf, "uuid.%pU\n", &ids->uuid);
if (memchr_inv(ids->nguid, 0, sizeof(ids->nguid)))
- return sprintf(buf, "eui.%16phN\n", ids->nguid);
+ return sysfs_emit(buf, "eui.%16phN\n", ids->nguid);
if (memchr_inv(ids->eui64, 0, sizeof(ids->eui64)))
- return sprintf(buf, "eui.%8phN\n", ids->eui64);
+ return sysfs_emit(buf, "eui.%8phN\n", ids->eui64);
while (serial_len > 0 && (subsys->serial[serial_len - 1] == ' ' ||
subsys->serial[serial_len - 1] == '\0'))
@@ -3391,7 +3029,7 @@ static ssize_t wwid_show(struct device *dev, struct device_attribute *attr,
subsys->model[model_len - 1] == '\0'))
model_len--;
- return sprintf(buf, "nvme.%04x-%*phN-%*phN-%08x\n", subsys->vendor_id,
+ return sysfs_emit(buf, "nvme.%04x-%*phN-%*phN-%08x\n", subsys->vendor_id,
serial_len, subsys->serial, model_len, subsys->model,
head->ns_id);
}
@@ -3400,7 +3038,7 @@ static DEVICE_ATTR_RO(wwid);
static ssize_t nguid_show(struct device *dev, struct device_attribute *attr,
char *buf)
{
- return sprintf(buf, "%pU\n", dev_to_ns_head(dev)->ids.nguid);
+ return sysfs_emit(buf, "%pU\n", dev_to_ns_head(dev)->ids.nguid);
}
static DEVICE_ATTR_RO(nguid);
@@ -3415,23 +3053,23 @@ static ssize_t uuid_show(struct device *dev, struct device_attribute *attr,
if (uuid_is_null(&ids->uuid)) {
printk_ratelimited(KERN_WARNING
"No UUID available providing old NGUID\n");
- return sprintf(buf, "%pU\n", ids->nguid);
+ return sysfs_emit(buf, "%pU\n", ids->nguid);
}
- return sprintf(buf, "%pU\n", &ids->uuid);
+ return sysfs_emit(buf, "%pU\n", &ids->uuid);
}
static DEVICE_ATTR_RO(uuid);
static ssize_t eui_show(struct device *dev, struct device_attribute *attr,
char *buf)
{
- return sprintf(buf, "%8ph\n", dev_to_ns_head(dev)->ids.eui64);
+ return sysfs_emit(buf, "%8ph\n", dev_to_ns_head(dev)->ids.eui64);
}
static DEVICE_ATTR_RO(eui);
static ssize_t nsid_show(struct device *dev, struct device_attribute *attr,
char *buf)
{
- return sprintf(buf, "%d\n", dev_to_ns_head(dev)->ns_id);
+ return sysfs_emit(buf, "%d\n", dev_to_ns_head(dev)->ns_id);
}
static DEVICE_ATTR_RO(nsid);
@@ -3496,7 +3134,7 @@ static ssize_t field##_show(struct device *dev, \
struct device_attribute *attr, char *buf) \
{ \
struct nvme_ctrl *ctrl = dev_get_drvdata(dev); \
- return sprintf(buf, "%.*s\n", \
+ return sysfs_emit(buf, "%.*s\n", \
(int)sizeof(ctrl->subsys->field), ctrl->subsys->field); \
} \
static DEVICE_ATTR(field, S_IRUGO, field##_show, NULL);
@@ -3510,7 +3148,7 @@ static ssize_t field##_show(struct device *dev, \
struct device_attribute *attr, char *buf) \
{ \
struct nvme_ctrl *ctrl = dev_get_drvdata(dev); \
- return sprintf(buf, "%d\n", ctrl->field); \
+ return sysfs_emit(buf, "%d\n", ctrl->field); \
} \
static DEVICE_ATTR(field, S_IRUGO, field##_show, NULL);
@@ -3518,6 +3156,7 @@ nvme_show_int_function(cntlid);
nvme_show_int_function(numa_node);
nvme_show_int_function(queue_count);
nvme_show_int_function(sqsize);
+nvme_show_int_function(kato);
static ssize_t nvme_sysfs_delete(struct device *dev,
struct device_attribute *attr, const char *buf,
@@ -3558,9 +3197,9 @@ static ssize_t nvme_sysfs_show_state(struct device *dev,
if ((unsigned)ctrl->state < ARRAY_SIZE(state_name) &&
state_name[ctrl->state])
- return sprintf(buf, "%s\n", state_name[ctrl->state]);
+ return sysfs_emit(buf, "%s\n", state_name[ctrl->state]);
- return sprintf(buf, "unknown state\n");
+ return sysfs_emit(buf, "unknown state\n");
}
static DEVICE_ATTR(state, S_IRUGO, nvme_sysfs_show_state, NULL);
@@ -3612,9 +3251,9 @@ static ssize_t nvme_ctrl_loss_tmo_show(struct device *dev,
struct nvmf_ctrl_options *opts = ctrl->opts;
if (ctrl->opts->max_reconnects == -1)
- return sprintf(buf, "off\n");
- return sprintf(buf, "%d\n",
- opts->max_reconnects * opts->reconnect_delay);
+ return sysfs_emit(buf, "off\n");
+ return sysfs_emit(buf, "%d\n",
+ opts->max_reconnects * opts->reconnect_delay);
}
static ssize_t nvme_ctrl_loss_tmo_store(struct device *dev,
@@ -3628,7 +3267,7 @@ static ssize_t nvme_ctrl_loss_tmo_store(struct device *dev,
if (err)
return -EINVAL;
- else if (ctrl_loss_tmo < 0)
+ if (ctrl_loss_tmo < 0)
opts->max_reconnects = -1;
else
opts->max_reconnects = DIV_ROUND_UP(ctrl_loss_tmo,
@@ -3644,8 +3283,8 @@ static ssize_t nvme_ctrl_reconnect_delay_show(struct device *dev,
struct nvme_ctrl *ctrl = dev_get_drvdata(dev);
if (ctrl->opts->reconnect_delay == -1)
- return sprintf(buf, "off\n");
- return sprintf(buf, "%d\n", ctrl->opts->reconnect_delay);
+ return sysfs_emit(buf, "off\n");
+ return sysfs_emit(buf, "%d\n", ctrl->opts->reconnect_delay);
}
static ssize_t nvme_ctrl_reconnect_delay_store(struct device *dev,
@@ -3665,6 +3304,36 @@ static ssize_t nvme_ctrl_reconnect_delay_store(struct device *dev,
static DEVICE_ATTR(reconnect_delay, S_IRUGO | S_IWUSR,
nvme_ctrl_reconnect_delay_show, nvme_ctrl_reconnect_delay_store);
+static ssize_t nvme_ctrl_fast_io_fail_tmo_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct nvme_ctrl *ctrl = dev_get_drvdata(dev);
+
+ if (ctrl->opts->fast_io_fail_tmo == -1)
+ return sysfs_emit(buf, "off\n");
+ return sysfs_emit(buf, "%d\n", ctrl->opts->fast_io_fail_tmo);
+}
+
+static ssize_t nvme_ctrl_fast_io_fail_tmo_store(struct device *dev,
+ struct device_attribute *attr, const char *buf, size_t count)
+{
+ struct nvme_ctrl *ctrl = dev_get_drvdata(dev);
+ struct nvmf_ctrl_options *opts = ctrl->opts;
+ int fast_io_fail_tmo, err;
+
+ err = kstrtoint(buf, 10, &fast_io_fail_tmo);
+ if (err)
+ return -EINVAL;
+
+ if (fast_io_fail_tmo < 0)
+ opts->fast_io_fail_tmo = -1;
+ else
+ opts->fast_io_fail_tmo = fast_io_fail_tmo;
+ return count;
+}
+static DEVICE_ATTR(fast_io_fail_tmo, S_IRUGO | S_IWUSR,
+ nvme_ctrl_fast_io_fail_tmo_show, nvme_ctrl_fast_io_fail_tmo_store);
+
static struct attribute *nvme_dev_attrs[] = {
&dev_attr_reset_controller.attr,
&dev_attr_rescan_controller.attr,
@@ -3684,6 +3353,8 @@ static struct attribute *nvme_dev_attrs[] = {
&dev_attr_hostid.attr,
&dev_attr_ctrl_loss_tmo.attr,
&dev_attr_reconnect_delay.attr,
+ &dev_attr_fast_io_fail_tmo.attr,
+ &dev_attr_kato.attr,
NULL
};
@@ -3705,6 +3376,8 @@ static umode_t nvme_dev_attrs_are_visible(struct kobject *kobj,
return 0;
if (a == &dev_attr_reconnect_delay.attr && !ctrl->opts)
return 0;
+ if (a == &dev_attr_fast_io_fail_tmo.attr && !ctrl->opts)
+ return 0;
return a->mode;
}
@@ -3727,7 +3400,7 @@ static struct nvme_ns_head *nvme_find_ns_head(struct nvme_subsystem *subsys,
lockdep_assert_held(&subsys->lock);
list_for_each_entry(h, &subsys->nsheads, entry) {
- if (h->ns_id == nsid && kref_get_unless_zero(&h->ref))
+ if (h->ns_id == nsid && nvme_tryget_ns_head(h))
return h;
}
@@ -3750,6 +3423,66 @@ static int __nvme_check_ids(struct nvme_subsystem *subsys,
return 0;
}
+void nvme_cdev_del(struct cdev *cdev, struct device *cdev_device)
+{
+ cdev_device_del(cdev, cdev_device);
+ ida_simple_remove(&nvme_ns_chr_minor_ida, MINOR(cdev_device->devt));
+}
+
+int nvme_cdev_add(struct cdev *cdev, struct device *cdev_device,
+ const struct file_operations *fops, struct module *owner)
+{
+ int minor, ret;
+
+ minor = ida_simple_get(&nvme_ns_chr_minor_ida, 0, 0, GFP_KERNEL);
+ if (minor < 0)
+ return minor;
+ cdev_device->devt = MKDEV(MAJOR(nvme_ns_chr_devt), minor);
+ cdev_device->class = nvme_ns_chr_class;
+ device_initialize(cdev_device);
+ cdev_init(cdev, fops);
+ cdev->owner = owner;
+ ret = cdev_device_add(cdev, cdev_device);
+ if (ret)
+ ida_simple_remove(&nvme_ns_chr_minor_ida, minor);
+ return ret;
+}
+
+static int nvme_ns_chr_open(struct inode *inode, struct file *file)
+{
+ return nvme_ns_open(container_of(inode->i_cdev, struct nvme_ns, cdev));
+}
+
+static int nvme_ns_chr_release(struct inode *inode, struct file *file)
+{
+ nvme_ns_release(container_of(inode->i_cdev, struct nvme_ns, cdev));
+ return 0;
+}
+
+static const struct file_operations nvme_ns_chr_fops = {
+ .owner = THIS_MODULE,
+ .open = nvme_ns_chr_open,
+ .release = nvme_ns_chr_release,
+ .unlocked_ioctl = nvme_ns_chr_ioctl,
+ .compat_ioctl = compat_ptr_ioctl,
+};
+
+static int nvme_add_ns_cdev(struct nvme_ns *ns)
+{
+ int ret;
+
+ ns->cdev_device.parent = ns->ctrl->device;
+ ret = dev_set_name(&ns->cdev_device, "ng%dn%d",
+ ns->ctrl->instance, ns->head->instance);
+ if (ret)
+ return ret;
+ ret = nvme_cdev_add(&ns->cdev, &ns->cdev_device, &nvme_ns_chr_fops,
+ ns->ctrl->ops->module);
+ if (ret)
+ kfree_const(ns->cdev_device.kobj.name);
+ return ret;
+}
+
static struct nvme_ns_head *nvme_alloc_ns_head(struct nvme_ctrl *ctrl,
unsigned nsid, struct nvme_ns_ids *ids)
{
@@ -3890,8 +3623,7 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid,
struct nvme_ns *ns;
struct gendisk *disk;
struct nvme_id_ns *id;
- char disk_name[DISK_NAME_LEN];
- int node = ctrl->numa_node, flags = GENHD_FL_EXT_DEVT;
+ int node = ctrl->numa_node;
if (nvme_identify_ns(ctrl, nsid, ids, &id))
return;
@@ -3917,7 +3649,6 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid,
if (nvme_init_ns_head(ns, nsid, ids, id->nmic & NVME_NS_NMIC_SHARED))
goto out_free_queue;
- nvme_set_disk_name(disk_name, ns, ctrl, &flags);
disk = alloc_disk_node(0, node);
if (!disk)
@@ -3926,15 +3657,22 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid,
disk->fops = &nvme_bdev_ops;
disk->private_data = ns;
disk->queue = ns->queue;
- disk->flags = flags;
- memcpy(disk->disk_name, disk_name, DISK_NAME_LEN);
+ disk->flags = GENHD_FL_EXT_DEVT;
+ /*
+ * Without the multipath code enabled, multiple controller per
+ * subsystems are visible as devices and thus we cannot use the
+ * subsystem instance.
+ */
+ if (!nvme_mpath_set_disk_name(ns, disk->disk_name, &disk->flags))
+ sprintf(disk->disk_name, "nvme%dn%d", ctrl->instance,
+ ns->head->instance);
ns->disk = disk;
if (nvme_update_ns_info(ns, id))
goto out_put_disk;
if ((ctrl->quirks & NVME_QUIRK_LIGHTNVM) && id->vs[0] == 0x1) {
- if (nvme_nvm_register(ns, disk_name, node)) {
+ if (nvme_nvm_register(ns, disk->disk_name, node)) {
dev_warn(ctrl->device, "LightNVM init failure\n");
goto out_put_disk;
}
@@ -3947,6 +3685,8 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid,
nvme_get_ctrl(ctrl);
device_add_disk(ctrl->device, ns->disk, nvme_ns_id_attr_groups);
+ if (!nvme_ns_head_multipath(ns->head))
+ nvme_add_ns_cdev(ns);
nvme_mpath_add_disk(ns, id);
nvme_fault_inject_init(&ns->fault_inject, ns->disk->disk_name);
@@ -3991,6 +3731,8 @@ static void nvme_ns_remove(struct nvme_ns *ns)
synchronize_srcu(&ns->head->srcu); /* wait for concurrent submissions */
if (ns->disk->flags & GENHD_FL_UP) {
+ if (!nvme_ns_head_multipath(ns->head))
+ nvme_cdev_del(&ns->cdev, &ns->cdev_device);
del_gendisk(ns->disk);
blk_cleanup_queue(ns->queue);
if (blk_get_integrity(ns->disk))
@@ -4735,6 +4477,7 @@ static inline void _nvme_check_size(void)
BUILD_BUG_ON(sizeof(struct nvme_id_ns) != NVME_IDENTIFY_DATA_SIZE);
BUILD_BUG_ON(sizeof(struct nvme_id_ns_zns) != NVME_IDENTIFY_DATA_SIZE);
BUILD_BUG_ON(sizeof(struct nvme_id_ctrl_zns) != NVME_IDENTIFY_DATA_SIZE);
+ BUILD_BUG_ON(sizeof(struct nvme_id_ctrl_nvm) != NVME_IDENTIFY_DATA_SIZE);
BUILD_BUG_ON(sizeof(struct nvme_lba_range_type) != 64);
BUILD_BUG_ON(sizeof(struct nvme_smart_log) != 512);
BUILD_BUG_ON(sizeof(struct nvme_dbbuf) != 64);
@@ -4780,8 +4523,24 @@ static int __init nvme_core_init(void)
result = PTR_ERR(nvme_subsys_class);
goto destroy_class;
}
+
+ result = alloc_chrdev_region(&nvme_ns_chr_devt, 0, NVME_MINORS,
+ "nvme-generic");
+ if (result < 0)
+ goto destroy_subsys_class;
+
+ nvme_ns_chr_class = class_create(THIS_MODULE, "nvme-generic");
+ if (IS_ERR(nvme_ns_chr_class)) {
+ result = PTR_ERR(nvme_ns_chr_class);
+ goto unregister_generic_ns;
+ }
+
return 0;
+unregister_generic_ns:
+ unregister_chrdev_region(nvme_ns_chr_devt, NVME_MINORS);
+destroy_subsys_class:
+ class_destroy(nvme_subsys_class);
destroy_class:
class_destroy(nvme_class);
unregister_chrdev:
@@ -4798,12 +4557,15 @@ out:
static void __exit nvme_core_exit(void)
{
+ class_destroy(nvme_ns_chr_class);
class_destroy(nvme_subsys_class);
class_destroy(nvme_class);
+ unregister_chrdev_region(nvme_ns_chr_devt, NVME_MINORS);
unregister_chrdev_region(nvme_ctrl_base_chr_devt, NVME_MINORS);
destroy_workqueue(nvme_delete_wq);
destroy_workqueue(nvme_reset_wq);
destroy_workqueue(nvme_wq);
+ ida_destroy(&nvme_ns_chr_minor_ida);
ida_destroy(&nvme_instance_ida);
}
diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c
index 604ab0e5a2ad..13c2747e3d00 100644
--- a/drivers/nvme/host/fabrics.c
+++ b/drivers/nvme/host/fabrics.c
@@ -379,10 +379,8 @@ int nvmf_connect_admin_queue(struct nvme_ctrl *ctrl)
/*
* Set keep-alive timeout in seconds granularity (ms * 1000)
- * and add a grace period for controller kato enforcement
*/
- cmd.connect.kato = ctrl->kato ?
- cpu_to_le32((ctrl->kato + NVME_KATO_GRACE) * 1000) : 0;
+ cmd.connect.kato = cpu_to_le32(ctrl->kato * 1000);
if (ctrl->opts->disable_sqflow)
cmd.connect.cattr |= NVME_CONNECT_DISABLE_SQFLOW;
diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c
index 6ffa8de2a0d7..9b9b7be0f412 100644
--- a/drivers/nvme/host/fc.c
+++ b/drivers/nvme/host/fc.c
@@ -1708,7 +1708,7 @@ restart:
*
* If this routine returns error, the LLDD should abort the exchange.
*
- * @remoteport: pointer to the (registered) remote port that the LS
+ * @portptr: pointer to the (registered) remote port that the LS
* was received from. The remoteport is associated with
* a specific localport.
* @lsrsp: pointer to a nvmefc_ls_rsp response structure to be
@@ -2128,6 +2128,7 @@ nvme_fc_init_request(struct blk_mq_tag_set *set, struct request *rq,
op->op.fcp_req.first_sgl = op->sgl;
op->op.fcp_req.private = &op->priv[0];
nvme_req(rq)->ctrl = &ctrl->ctrl;
+ nvme_req(rq)->cmd = &op->op.cmd_iu.sqe;
return res;
}
@@ -2759,8 +2760,6 @@ nvme_fc_queue_rq(struct blk_mq_hw_ctx *hctx,
struct nvme_fc_ctrl *ctrl = queue->ctrl;
struct request *rq = bd->rq;
struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(rq);
- struct nvme_fc_cmd_iu *cmdiu = &op->cmd_iu;
- struct nvme_command *sqe = &cmdiu->sqe;
enum nvmefc_fcp_datadir io_dir;
bool queue_ready = test_bit(NVME_FC_Q_LIVE, &queue->flags);
u32 data_len;
@@ -2770,7 +2769,7 @@ nvme_fc_queue_rq(struct blk_mq_hw_ctx *hctx,
!nvmf_check_ready(&queue->ctrl->ctrl, rq, queue_ready))
return nvmf_fail_nonready_command(&queue->ctrl->ctrl, rq);
- ret = nvme_setup_cmd(ns, rq, sqe);
+ ret = nvme_setup_cmd(ns, rq);
if (ret)
return ret;
@@ -3086,7 +3085,7 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl)
blk_mq_unquiesce_queue(ctrl->ctrl.admin_q);
- ret = nvme_init_identify(&ctrl->ctrl);
+ ret = nvme_init_ctrl_finish(&ctrl->ctrl);
if (ret || test_bit(ASSOC_FAILED, &ctrl->flags))
goto out_disconnect_admin_queue;
@@ -3100,6 +3099,11 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl)
}
/* FC-NVME supports normal SGL Data Block Descriptors */
+ if (!(ctrl->ctrl.sgls & ((1 << 0) | (1 << 1)))) {
+ dev_err(ctrl->ctrl.device,
+ "Mandatory sgls are not supported!\n");
+ goto out_disconnect_admin_queue;
+ }
if (opts->queue_size > ctrl->ctrl.maxcmd) {
/* warn if maxcmd is lower than queue_size */
diff --git a/drivers/nvme/host/ioctl.c b/drivers/nvme/host/ioctl.c
new file mode 100644
index 000000000000..502f8e4a2a1f
--- /dev/null
+++ b/drivers/nvme/host/ioctl.c
@@ -0,0 +1,481 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2011-2014, Intel Corporation.
+ * Copyright (c) 2017-2021 Christoph Hellwig.
+ */
+#include <linux/ptrace.h> /* for force_successful_syscall_return */
+#include <linux/nvme_ioctl.h>
+#include "nvme.h"
+
+/*
+ * Convert integer values from ioctl structures to user pointers, silently
+ * ignoring the upper bits in the compat case to match behaviour of 32-bit
+ * kernels.
+ */
+static void __user *nvme_to_user_ptr(uintptr_t ptrval)
+{
+ if (in_compat_syscall())
+ ptrval = (compat_uptr_t)ptrval;
+ return (void __user *)ptrval;
+}
+
+static void *nvme_add_user_metadata(struct bio *bio, void __user *ubuf,
+ unsigned len, u32 seed, bool write)
+{
+ struct bio_integrity_payload *bip;
+ int ret = -ENOMEM;
+ void *buf;
+
+ buf = kmalloc(len, GFP_KERNEL);
+ if (!buf)
+ goto out;
+
+ ret = -EFAULT;
+ if (write && copy_from_user(buf, ubuf, len))
+ goto out_free_meta;
+
+ bip = bio_integrity_alloc(bio, GFP_KERNEL, 1);
+ if (IS_ERR(bip)) {
+ ret = PTR_ERR(bip);
+ goto out_free_meta;
+ }
+
+ bip->bip_iter.bi_size = len;
+ bip->bip_iter.bi_sector = seed;
+ ret = bio_integrity_add_page(bio, virt_to_page(buf), len,
+ offset_in_page(buf));
+ if (ret == len)
+ return buf;
+ ret = -ENOMEM;
+out_free_meta:
+ kfree(buf);
+out:
+ return ERR_PTR(ret);
+}
+
+static int nvme_submit_user_cmd(struct request_queue *q,
+ struct nvme_command *cmd, void __user *ubuffer,
+ unsigned bufflen, void __user *meta_buffer, unsigned meta_len,
+ u32 meta_seed, u64 *result, unsigned timeout)
+{
+ bool write = nvme_is_write(cmd);
+ struct nvme_ns *ns = q->queuedata;
+ struct block_device *bdev = ns ? ns->disk->part0 : NULL;
+ struct request *req;
+ struct bio *bio = NULL;
+ void *meta = NULL;
+ int ret;
+
+ req = nvme_alloc_request(q, cmd, 0);
+ if (IS_ERR(req))
+ return PTR_ERR(req);
+
+ if (timeout)
+ req->timeout = timeout;
+ nvme_req(req)->flags |= NVME_REQ_USERCMD;
+
+ if (ubuffer && bufflen) {
+ ret = blk_rq_map_user(q, req, NULL, ubuffer, bufflen,
+ GFP_KERNEL);
+ if (ret)
+ goto out;
+ bio = req->bio;
+ if (bdev)
+ bio_set_dev(bio, bdev);
+ if (bdev && meta_buffer && meta_len) {
+ meta = nvme_add_user_metadata(bio, meta_buffer, meta_len,
+ meta_seed, write);
+ if (IS_ERR(meta)) {
+ ret = PTR_ERR(meta);
+ goto out_unmap;
+ }
+ req->cmd_flags |= REQ_INTEGRITY;
+ }
+ }
+
+ nvme_execute_passthru_rq(req);
+ if (nvme_req(req)->flags & NVME_REQ_CANCELLED)
+ ret = -EINTR;
+ else
+ ret = nvme_req(req)->status;
+ if (result)
+ *result = le64_to_cpu(nvme_req(req)->result.u64);
+ if (meta && !ret && !write) {
+ if (copy_to_user(meta_buffer, meta, meta_len))
+ ret = -EFAULT;
+ }
+ kfree(meta);
+ out_unmap:
+ if (bio)
+ blk_rq_unmap_user(bio);
+ out:
+ blk_mq_free_request(req);
+ return ret;
+}
+
+
+static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio)
+{
+ struct nvme_user_io io;
+ struct nvme_command c;
+ unsigned length, meta_len;
+ void __user *metadata;
+
+ if (copy_from_user(&io, uio, sizeof(io)))
+ return -EFAULT;
+ if (io.flags)
+ return -EINVAL;
+
+ switch (io.opcode) {
+ case nvme_cmd_write:
+ case nvme_cmd_read:
+ case nvme_cmd_compare:
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ length = (io.nblocks + 1) << ns->lba_shift;
+
+ if ((io.control & NVME_RW_PRINFO_PRACT) &&
+ ns->ms == sizeof(struct t10_pi_tuple)) {
+ /*
+ * Protection information is stripped/inserted by the
+ * controller.
+ */
+ if (nvme_to_user_ptr(io.metadata))
+ return -EINVAL;
+ meta_len = 0;
+ metadata = NULL;
+ } else {
+ meta_len = (io.nblocks + 1) * ns->ms;
+ metadata = nvme_to_user_ptr(io.metadata);
+ }
+
+ if (ns->features & NVME_NS_EXT_LBAS) {
+ length += meta_len;
+ meta_len = 0;
+ } else if (meta_len) {
+ if ((io.metadata & 3) || !io.metadata)
+ return -EINVAL;
+ }
+
+ memset(&c, 0, sizeof(c));
+ c.rw.opcode = io.opcode;
+ c.rw.flags = io.flags;
+ c.rw.nsid = cpu_to_le32(ns->head->ns_id);
+ c.rw.slba = cpu_to_le64(io.slba);
+ c.rw.length = cpu_to_le16(io.nblocks);
+ c.rw.control = cpu_to_le16(io.control);
+ c.rw.dsmgmt = cpu_to_le32(io.dsmgmt);
+ c.rw.reftag = cpu_to_le32(io.reftag);
+ c.rw.apptag = cpu_to_le16(io.apptag);
+ c.rw.appmask = cpu_to_le16(io.appmask);
+
+ return nvme_submit_user_cmd(ns->queue, &c,
+ nvme_to_user_ptr(io.addr), length,
+ metadata, meta_len, lower_32_bits(io.slba), NULL, 0);
+}
+
+static int nvme_user_cmd(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
+ struct nvme_passthru_cmd __user *ucmd)
+{
+ struct nvme_passthru_cmd cmd;
+ struct nvme_command c;
+ unsigned timeout = 0;
+ u64 result;
+ int status;
+
+ if (!capable(CAP_SYS_ADMIN))
+ return -EACCES;
+ if (copy_from_user(&cmd, ucmd, sizeof(cmd)))
+ return -EFAULT;
+ if (cmd.flags)
+ return -EINVAL;
+ if (ns && cmd.nsid != ns->head->ns_id) {
+ dev_err(ctrl->device,
+ "%s: nsid (%u) in cmd does not match nsid (%u) of namespace\n",
+ current->comm, cmd.nsid, ns->head->ns_id);
+ return -EINVAL;
+ }
+
+ memset(&c, 0, sizeof(c));
+ c.common.opcode = cmd.opcode;
+ c.common.flags = cmd.flags;
+ c.common.nsid = cpu_to_le32(cmd.nsid);
+ c.common.cdw2[0] = cpu_to_le32(cmd.cdw2);
+ c.common.cdw2[1] = cpu_to_le32(cmd.cdw3);
+ c.common.cdw10 = cpu_to_le32(cmd.cdw10);
+ c.common.cdw11 = cpu_to_le32(cmd.cdw11);
+ c.common.cdw12 = cpu_to_le32(cmd.cdw12);
+ c.common.cdw13 = cpu_to_le32(cmd.cdw13);
+ c.common.cdw14 = cpu_to_le32(cmd.cdw14);
+ c.common.cdw15 = cpu_to_le32(cmd.cdw15);
+
+ if (cmd.timeout_ms)
+ timeout = msecs_to_jiffies(cmd.timeout_ms);
+
+ status = nvme_submit_user_cmd(ns ? ns->queue : ctrl->admin_q, &c,
+ nvme_to_user_ptr(cmd.addr), cmd.data_len,
+ nvme_to_user_ptr(cmd.metadata), cmd.metadata_len,
+ 0, &result, timeout);
+
+ if (status >= 0) {
+ if (put_user(result, &ucmd->result))
+ return -EFAULT;
+ }
+
+ return status;
+}
+
+static int nvme_user_cmd64(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
+ struct nvme_passthru_cmd64 __user *ucmd)
+{
+ struct nvme_passthru_cmd64 cmd;
+ struct nvme_command c;
+ unsigned timeout = 0;
+ int status;
+
+ if (!capable(CAP_SYS_ADMIN))
+ return -EACCES;
+ if (copy_from_user(&cmd, ucmd, sizeof(cmd)))
+ return -EFAULT;
+ if (cmd.flags)
+ return -EINVAL;
+ if (ns && cmd.nsid != ns->head->ns_id) {
+ dev_err(ctrl->device,
+ "%s: nsid (%u) in cmd does not match nsid (%u) of namespace\n",
+ current->comm, cmd.nsid, ns->head->ns_id);
+ return -EINVAL;
+ }
+
+ memset(&c, 0, sizeof(c));
+ c.common.opcode = cmd.opcode;
+ c.common.flags = cmd.flags;
+ c.common.nsid = cpu_to_le32(cmd.nsid);
+ c.common.cdw2[0] = cpu_to_le32(cmd.cdw2);
+ c.common.cdw2[1] = cpu_to_le32(cmd.cdw3);
+ c.common.cdw10 = cpu_to_le32(cmd.cdw10);
+ c.common.cdw11 = cpu_to_le32(cmd.cdw11);
+ c.common.cdw12 = cpu_to_le32(cmd.cdw12);
+ c.common.cdw13 = cpu_to_le32(cmd.cdw13);
+ c.common.cdw14 = cpu_to_le32(cmd.cdw14);
+ c.common.cdw15 = cpu_to_le32(cmd.cdw15);
+
+ if (cmd.timeout_ms)
+ timeout = msecs_to_jiffies(cmd.timeout_ms);
+
+ status = nvme_submit_user_cmd(ns ? ns->queue : ctrl->admin_q, &c,
+ nvme_to_user_ptr(cmd.addr), cmd.data_len,
+ nvme_to_user_ptr(cmd.metadata), cmd.metadata_len,
+ 0, &cmd.result, timeout);
+
+ if (status >= 0) {
+ if (put_user(cmd.result, &ucmd->result))
+ return -EFAULT;
+ }
+
+ return status;
+}
+
+static bool is_ctrl_ioctl(unsigned int cmd)
+{
+ if (cmd == NVME_IOCTL_ADMIN_CMD || cmd == NVME_IOCTL_ADMIN64_CMD)
+ return true;
+ if (is_sed_ioctl(cmd))
+ return true;
+ return false;
+}
+
+static int nvme_ctrl_ioctl(struct nvme_ctrl *ctrl, unsigned int cmd,
+ void __user *argp)
+{
+ switch (cmd) {
+ case NVME_IOCTL_ADMIN_CMD:
+ return nvme_user_cmd(ctrl, NULL, argp);
+ case NVME_IOCTL_ADMIN64_CMD:
+ return nvme_user_cmd64(ctrl, NULL, argp);
+ default:
+ return sed_ioctl(ctrl->opal_dev, cmd, argp);
+ }
+}
+
+#ifdef COMPAT_FOR_U64_ALIGNMENT
+struct nvme_user_io32 {
+ __u8 opcode;
+ __u8 flags;
+ __u16 control;
+ __u16 nblocks;
+ __u16 rsvd;
+ __u64 metadata;
+ __u64 addr;
+ __u64 slba;
+ __u32 dsmgmt;
+ __u32 reftag;
+ __u16 apptag;
+ __u16 appmask;
+} __attribute__((__packed__));
+#define NVME_IOCTL_SUBMIT_IO32 _IOW('N', 0x42, struct nvme_user_io32)
+#endif /* COMPAT_FOR_U64_ALIGNMENT */
+
+static int nvme_ns_ioctl(struct nvme_ns *ns, unsigned int cmd,
+ void __user *argp)
+{
+ switch (cmd) {
+ case NVME_IOCTL_ID:
+ force_successful_syscall_return();
+ return ns->head->ns_id;
+ case NVME_IOCTL_IO_CMD:
+ return nvme_user_cmd(ns->ctrl, ns, argp);
+ /*
+ * struct nvme_user_io can have different padding on some 32-bit ABIs.
+ * Just accept the compat version as all fields that are used are the
+ * same size and at the same offset.
+ */
+#ifdef COMPAT_FOR_U64_ALIGNMENT
+ case NVME_IOCTL_SUBMIT_IO32:
+#endif
+ case NVME_IOCTL_SUBMIT_IO:
+ return nvme_submit_io(ns, argp);
+ case NVME_IOCTL_IO64_CMD:
+ return nvme_user_cmd64(ns->ctrl, ns, argp);
+ default:
+ if (!ns->ndev)
+ return -ENOTTY;
+ return nvme_nvm_ioctl(ns, cmd, argp);
+ }
+}
+
+static int __nvme_ioctl(struct nvme_ns *ns, unsigned int cmd, void __user *arg)
+{
+ if (is_ctrl_ioctl(cmd))
+ return nvme_ctrl_ioctl(ns->ctrl, cmd, arg);
+ return nvme_ns_ioctl(ns, cmd, arg);
+}
+
+int nvme_ioctl(struct block_device *bdev, fmode_t mode,
+ unsigned int cmd, unsigned long arg)
+{
+ struct nvme_ns *ns = bdev->bd_disk->private_data;
+
+ return __nvme_ioctl(ns, cmd, (void __user *)arg);
+}
+
+long nvme_ns_chr_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+{
+ struct nvme_ns *ns =
+ container_of(file_inode(file)->i_cdev, struct nvme_ns, cdev);
+
+ return __nvme_ioctl(ns, cmd, (void __user *)arg);
+}
+
+#ifdef CONFIG_NVME_MULTIPATH
+static int nvme_ns_head_ctrl_ioctl(struct nvme_ns_head *head,
+ unsigned int cmd, void __user *argp)
+{
+ struct nvme_ctrl *ctrl = nvme_find_get_live_ctrl(head->subsys);
+ int ret;
+
+ if (IS_ERR(ctrl))
+ return PTR_ERR(ctrl);
+ ret = nvme_ctrl_ioctl(ctrl, cmd, argp);
+ nvme_put_ctrl(ctrl);
+ return ret;
+}
+
+static int nvme_ns_head_ns_ioctl(struct nvme_ns_head *head,
+ unsigned int cmd, void __user *argp)
+{
+ int srcu_idx = srcu_read_lock(&head->srcu);
+ struct nvme_ns *ns = nvme_find_path(head);
+ int ret = -EWOULDBLOCK;
+
+ if (ns)
+ ret = nvme_ns_ioctl(ns, cmd, argp);
+ srcu_read_unlock(&head->srcu, srcu_idx);
+ return ret;
+}
+
+int nvme_ns_head_ioctl(struct block_device *bdev, fmode_t mode,
+ unsigned int cmd, unsigned long arg)
+{
+ struct nvme_ns_head *head = bdev->bd_disk->private_data;
+ void __user *argp = (void __user *)arg;
+
+ if (is_ctrl_ioctl(cmd))
+ return nvme_ns_head_ctrl_ioctl(head, cmd, argp);
+ return nvme_ns_head_ns_ioctl(head, cmd, argp);
+}
+
+long nvme_ns_head_chr_ioctl(struct file *file, unsigned int cmd,
+ unsigned long arg)
+{
+ struct cdev *cdev = file_inode(file)->i_cdev;
+ struct nvme_ns_head *head =
+ container_of(cdev, struct nvme_ns_head, cdev);
+ void __user *argp = (void __user *)arg;
+
+ if (is_ctrl_ioctl(cmd))
+ return nvme_ns_head_ctrl_ioctl(head, cmd, argp);
+ return nvme_ns_head_ns_ioctl(head, cmd, argp);
+}
+#endif /* CONFIG_NVME_MULTIPATH */
+
+static int nvme_dev_user_cmd(struct nvme_ctrl *ctrl, void __user *argp)
+{
+ struct nvme_ns *ns;
+ int ret;
+
+ down_read(&ctrl->namespaces_rwsem);
+ if (list_empty(&ctrl->namespaces)) {
+ ret = -ENOTTY;
+ goto out_unlock;
+ }
+
+ ns = list_first_entry(&ctrl->namespaces, struct nvme_ns, list);
+ if (ns != list_last_entry(&ctrl->namespaces, struct nvme_ns, list)) {
+ dev_warn(ctrl->device,
+ "NVME_IOCTL_IO_CMD not supported when multiple namespaces present!\n");
+ ret = -EINVAL;
+ goto out_unlock;
+ }
+
+ dev_warn(ctrl->device,
+ "using deprecated NVME_IOCTL_IO_CMD ioctl on the char device!\n");
+ kref_get(&ns->kref);
+ up_read(&ctrl->namespaces_rwsem);
+
+ ret = nvme_user_cmd(ctrl, ns, argp);
+ nvme_put_ns(ns);
+ return ret;
+
+out_unlock:
+ up_read(&ctrl->namespaces_rwsem);
+ return ret;
+}
+
+long nvme_dev_ioctl(struct file *file, unsigned int cmd,
+ unsigned long arg)
+{
+ struct nvme_ctrl *ctrl = file->private_data;
+ void __user *argp = (void __user *)arg;
+
+ switch (cmd) {
+ case NVME_IOCTL_ADMIN_CMD:
+ return nvme_user_cmd(ctrl, NULL, argp);
+ case NVME_IOCTL_ADMIN64_CMD:
+ return nvme_user_cmd64(ctrl, NULL, argp);
+ case NVME_IOCTL_IO_CMD:
+ return nvme_dev_user_cmd(ctrl, argp);
+ case NVME_IOCTL_RESET:
+ dev_warn(ctrl->device, "resetting controller\n");
+ return nvme_reset_ctrl_sync(ctrl);
+ case NVME_IOCTL_SUBSYS_RESET:
+ return nvme_reset_subsystem(ctrl);
+ case NVME_IOCTL_RESCAN:
+ nvme_queue_scan(ctrl);
+ return 0;
+ default:
+ return -ENOTTY;
+ }
+}
diff --git a/drivers/nvme/host/lightnvm.c b/drivers/nvme/host/lightnvm.c
index f6ca2fbb711e..e9d9ad47f70f 100644
--- a/drivers/nvme/host/lightnvm.c
+++ b/drivers/nvme/host/lightnvm.c
@@ -930,15 +930,15 @@ static int nvme_nvm_user_vcmd(struct nvme_ns *ns, int admin,
return ret;
}
-int nvme_nvm_ioctl(struct nvme_ns *ns, unsigned int cmd, unsigned long arg)
+int nvme_nvm_ioctl(struct nvme_ns *ns, unsigned int cmd, void __user *argp)
{
switch (cmd) {
case NVME_NVM_IOCTL_ADMIN_VIO:
- return nvme_nvm_user_vcmd(ns, 1, (void __user *)arg);
+ return nvme_nvm_user_vcmd(ns, 1, argp);
case NVME_NVM_IOCTL_IO_VIO:
- return nvme_nvm_user_vcmd(ns, 0, (void __user *)arg);
+ return nvme_nvm_user_vcmd(ns, 0, argp);
case NVME_NVM_IOCTL_SUBMIT_VIO:
- return nvme_nvm_submit_vio(ns, (void __user *)arg);
+ return nvme_nvm_submit_vio(ns, argp);
default:
return -ENOTTY;
}
@@ -1240,7 +1240,7 @@ static struct attribute *nvm_dev_attrs[] = {
static umode_t nvm_dev_attrs_visible(struct kobject *kobj,
struct attribute *attr, int index)
{
- struct device *dev = container_of(kobj, struct device, kobj);
+ struct device *dev = kobj_to_dev(kobj);
struct gendisk *disk = dev_to_disk(dev);
struct nvme_ns *ns = disk->private_data;
struct nvm_dev *ndev = ns->ndev;
diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c
index a1d476e1ac02..0d0de3433f37 100644
--- a/drivers/nvme/host/multipath.c
+++ b/drivers/nvme/host/multipath.c
@@ -50,19 +50,19 @@ void nvme_mpath_start_freeze(struct nvme_subsystem *subsys)
* and those that have a single controller and use the controller node
* directly.
*/
-void nvme_set_disk_name(char *disk_name, struct nvme_ns *ns,
- struct nvme_ctrl *ctrl, int *flags)
-{
- if (!multipath) {
- sprintf(disk_name, "nvme%dn%d", ctrl->instance, ns->head->instance);
- } else if (ns->head->disk) {
- sprintf(disk_name, "nvme%dc%dn%d", ctrl->subsys->instance,
- ctrl->instance, ns->head->instance);
- *flags = GENHD_FL_HIDDEN;
- } else {
- sprintf(disk_name, "nvme%dn%d", ctrl->subsys->instance,
- ns->head->instance);
+bool nvme_mpath_set_disk_name(struct nvme_ns *ns, char *disk_name, int *flags)
+{
+ if (!multipath)
+ return false;
+ if (!ns->head->disk) {
+ sprintf(disk_name, "nvme%dn%d", ns->ctrl->subsys->instance,
+ ns->head->instance);
+ return true;
}
+ sprintf(disk_name, "nvme%dc%dn%d", ns->ctrl->subsys->instance,
+ ns->ctrl->instance, ns->head->instance);
+ *flags = GENHD_FL_HIDDEN;
+ return true;
}
void nvme_failover_req(struct request *req)
@@ -294,7 +294,7 @@ static bool nvme_available_path(struct nvme_ns_head *head)
return false;
}
-blk_qc_t nvme_ns_head_submit_bio(struct bio *bio)
+static blk_qc_t nvme_ns_head_submit_bio(struct bio *bio)
{
struct nvme_ns_head *head = bio->bi_bdev->bd_disk->private_data;
struct device *dev = disk_to_dev(head->disk);
@@ -334,6 +334,71 @@ blk_qc_t nvme_ns_head_submit_bio(struct bio *bio)
return ret;
}
+static int nvme_ns_head_open(struct block_device *bdev, fmode_t mode)
+{
+ if (!nvme_tryget_ns_head(bdev->bd_disk->private_data))
+ return -ENXIO;
+ return 0;
+}
+
+static void nvme_ns_head_release(struct gendisk *disk, fmode_t mode)
+{
+ nvme_put_ns_head(disk->private_data);
+}
+
+const struct block_device_operations nvme_ns_head_ops = {
+ .owner = THIS_MODULE,
+ .submit_bio = nvme_ns_head_submit_bio,
+ .open = nvme_ns_head_open,
+ .release = nvme_ns_head_release,
+ .ioctl = nvme_ns_head_ioctl,
+ .getgeo = nvme_getgeo,
+ .report_zones = nvme_report_zones,
+ .pr_ops = &nvme_pr_ops,
+};
+
+static inline struct nvme_ns_head *cdev_to_ns_head(struct cdev *cdev)
+{
+ return container_of(cdev, struct nvme_ns_head, cdev);
+}
+
+static int nvme_ns_head_chr_open(struct inode *inode, struct file *file)
+{
+ if (!nvme_tryget_ns_head(cdev_to_ns_head(inode->i_cdev)))
+ return -ENXIO;
+ return 0;
+}
+
+static int nvme_ns_head_chr_release(struct inode *inode, struct file *file)
+{
+ nvme_put_ns_head(cdev_to_ns_head(inode->i_cdev));
+ return 0;
+}
+
+static const struct file_operations nvme_ns_head_chr_fops = {
+ .owner = THIS_MODULE,
+ .open = nvme_ns_head_chr_open,
+ .release = nvme_ns_head_chr_release,
+ .unlocked_ioctl = nvme_ns_head_chr_ioctl,
+ .compat_ioctl = compat_ptr_ioctl,
+};
+
+static int nvme_add_ns_head_cdev(struct nvme_ns_head *head)
+{
+ int ret;
+
+ head->cdev_device.parent = &head->subsys->dev;
+ ret = dev_set_name(&head->cdev_device, "ng%dn%d",
+ head->subsys->instance, head->instance);
+ if (ret)
+ return ret;
+ ret = nvme_cdev_add(&head->cdev, &head->cdev_device,
+ &nvme_ns_head_chr_fops, THIS_MODULE);
+ if (ret)
+ kfree_const(head->cdev_device.kobj.name);
+ return ret;
+}
+
static void nvme_requeue_work(struct work_struct *work)
{
struct nvme_ns_head *head =
@@ -412,9 +477,11 @@ static void nvme_mpath_set_live(struct nvme_ns *ns)
if (!head->disk)
return;
- if (!test_and_set_bit(NVME_NSHEAD_DISK_LIVE, &head->flags))
+ if (!test_and_set_bit(NVME_NSHEAD_DISK_LIVE, &head->flags)) {
device_add_disk(&head->subsys->dev, head->disk,
nvme_ns_id_attr_groups);
+ nvme_add_ns_head_cdev(head);
+ }
mutex_lock(&head->lock);
if (nvme_path_is_optimized(ns)) {
@@ -602,8 +669,8 @@ static ssize_t nvme_subsys_iopolicy_show(struct device *dev,
struct nvme_subsystem *subsys =
container_of(dev, struct nvme_subsystem, dev);
- return sprintf(buf, "%s\n",
- nvme_iopolicy_names[READ_ONCE(subsys->iopolicy)]);
+ return sysfs_emit(buf, "%s\n",
+ nvme_iopolicy_names[READ_ONCE(subsys->iopolicy)]);
}
static ssize_t nvme_subsys_iopolicy_store(struct device *dev,
@@ -628,7 +695,7 @@ SUBSYS_ATTR_RW(iopolicy, S_IRUGO | S_IWUSR,
static ssize_t ana_grpid_show(struct device *dev, struct device_attribute *attr,
char *buf)
{
- return sprintf(buf, "%d\n", nvme_get_ns_from_dev(dev)->ana_grpid);
+ return sysfs_emit(buf, "%d\n", nvme_get_ns_from_dev(dev)->ana_grpid);
}
DEVICE_ATTR_RO(ana_grpid);
@@ -637,7 +704,7 @@ static ssize_t ana_state_show(struct device *dev, struct device_attribute *attr,
{
struct nvme_ns *ns = nvme_get_ns_from_dev(dev);
- return sprintf(buf, "%s\n", nvme_ana_state_names[ns->ana_state]);
+ return sysfs_emit(buf, "%s\n", nvme_ana_state_names[ns->ana_state]);
}
DEVICE_ATTR_RO(ana_state);
@@ -668,9 +735,13 @@ void nvme_mpath_add_disk(struct nvme_ns *ns, struct nvme_id_ns *id)
if (desc.state) {
/* found the group desc: update */
nvme_update_ns_ana_state(&desc, ns);
+ } else {
+ /* group desc not found: trigger a re-read */
+ set_bit(NVME_NS_ANA_PENDING, &ns->flags);
+ queue_work(nvme_wq, &ns->ctrl->ana_work);
}
} else {
- ns->ana_state = NVME_ANA_OPTIMIZED;
+ ns->ana_state = NVME_ANA_OPTIMIZED;
nvme_mpath_set_live(ns);
}
@@ -687,8 +758,10 @@ void nvme_mpath_remove_disk(struct nvme_ns_head *head)
{
if (!head->disk)
return;
- if (head->disk->flags & GENHD_FL_UP)
+ if (head->disk->flags & GENHD_FL_UP) {
+ nvme_cdev_del(&head->cdev, &head->cdev_device);
del_gendisk(head->disk);
+ }
blk_set_queue_dying(head->disk->queue);
/* make sure all pending bios are cleaned up */
kblockd_schedule_work(&head->requeue_work);
@@ -758,4 +831,3 @@ void nvme_mpath_uninit(struct nvme_ctrl *ctrl)
kfree(ctrl->ana_log_buf);
ctrl->ana_log_buf = NULL;
}
-
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index 07b34175c6ce..773dde5b231d 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -27,7 +27,6 @@ extern unsigned int admin_timeout;
#define NVME_ADMIN_TIMEOUT (admin_timeout * HZ)
#define NVME_DEFAULT_KATO 5
-#define NVME_KATO_GRACE 10
#ifdef CONFIG_ARCH_NO_SG_CHAIN
#define NVME_INLINE_SG_CNT 0
@@ -276,6 +275,9 @@ struct nvme_ctrl {
u32 max_hw_sectors;
u32 max_segments;
u32 max_integrity_segments;
+ u32 max_discard_sectors;
+ u32 max_discard_segments;
+ u32 max_zeroes_sectors;
#ifdef CONFIG_BLK_DEV_ZONED
u32 max_zone_append;
#endif
@@ -410,8 +412,12 @@ struct nvme_ns_head {
bool shared;
int instance;
struct nvme_effects_log *effects;
-#ifdef CONFIG_NVME_MULTIPATH
+
+ struct cdev cdev;
+ struct device cdev_device;
+
struct gendisk *disk;
+#ifdef CONFIG_NVME_MULTIPATH
struct bio_list requeue_list;
spinlock_t requeue_lock;
struct work_struct requeue_work;
@@ -422,6 +428,11 @@ struct nvme_ns_head {
#endif
};
+static inline bool nvme_ns_head_multipath(struct nvme_ns_head *head)
+{
+ return IS_ENABLED(CONFIG_NVME_MULTIPATH) && head->disk;
+}
+
enum nvme_ns_features {
NVME_NS_EXT_LBAS = 1 << 0, /* support extended LBA format */
NVME_NS_METADATA_SUPPORTED = 1 << 1, /* support getting generated md */
@@ -457,6 +468,9 @@ struct nvme_ns {
#define NVME_NS_ANA_PENDING 2
#define NVME_NS_FORCE_RO 3
+ struct cdev cdev;
+ struct device cdev_device;
+
struct nvme_fault_inject fault_inject;
};
@@ -599,7 +613,7 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev,
void nvme_uninit_ctrl(struct nvme_ctrl *ctrl);
void nvme_start_ctrl(struct nvme_ctrl *ctrl);
void nvme_stop_ctrl(struct nvme_ctrl *ctrl);
-int nvme_init_identify(struct nvme_ctrl *ctrl);
+int nvme_init_ctrl_finish(struct nvme_ctrl *ctrl);
void nvme_remove_namespaces(struct nvme_ctrl *ctrl);
@@ -623,8 +637,7 @@ void nvme_start_freeze(struct nvme_ctrl *ctrl);
struct request *nvme_alloc_request(struct request_queue *q,
struct nvme_command *cmd, blk_mq_req_flags_t flags);
void nvme_cleanup_cmd(struct request *req);
-blk_status_t nvme_setup_cmd(struct nvme_ns *ns, struct request *req,
- struct nvme_command *cmd);
+blk_status_t nvme_setup_cmd(struct nvme_ns *ns, struct request *req);
int nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
void *buf, unsigned bufflen);
int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
@@ -640,16 +653,34 @@ int nvme_get_features(struct nvme_ctrl *dev, unsigned int fid,
int nvme_set_queue_count(struct nvme_ctrl *ctrl, int *count);
void nvme_stop_keep_alive(struct nvme_ctrl *ctrl);
int nvme_reset_ctrl(struct nvme_ctrl *ctrl);
+int nvme_reset_ctrl_sync(struct nvme_ctrl *ctrl);
int nvme_try_sched_reset(struct nvme_ctrl *ctrl);
int nvme_delete_ctrl(struct nvme_ctrl *ctrl);
-
+void nvme_queue_scan(struct nvme_ctrl *ctrl);
int nvme_get_log(struct nvme_ctrl *ctrl, u32 nsid, u8 log_page, u8 lsp, u8 csi,
void *log, size_t size, u64 offset);
struct nvme_ns *nvme_get_ns_from_disk(struct gendisk *disk,
struct nvme_ns_head **head, int *srcu_idx);
void nvme_put_ns_from_disk(struct nvme_ns_head *head, int idx);
+bool nvme_tryget_ns_head(struct nvme_ns_head *head);
+void nvme_put_ns_head(struct nvme_ns_head *head);
+struct nvme_ctrl *nvme_find_get_live_ctrl(struct nvme_subsystem *subsys);
+int nvme_cdev_add(struct cdev *cdev, struct device *cdev_device,
+ const struct file_operations *fops, struct module *owner);
+void nvme_cdev_del(struct cdev *cdev, struct device *cdev_device);
+int nvme_ioctl(struct block_device *bdev, fmode_t mode,
+ unsigned int cmd, unsigned long arg);
+long nvme_ns_chr_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
+int nvme_ns_head_ioctl(struct block_device *bdev, fmode_t mode,
+ unsigned int cmd, unsigned long arg);
+long nvme_ns_head_chr_ioctl(struct file *file, unsigned int cmd,
+ unsigned long arg);
+long nvme_dev_ioctl(struct file *file, unsigned int cmd,
+ unsigned long arg);
+int nvme_getgeo(struct block_device *bdev, struct hd_geometry *geo);
extern const struct attribute_group *nvme_ns_id_attr_groups[];
+extern const struct pr_ops nvme_pr_ops;
extern const struct block_device_operations nvme_ns_head_ops;
#ifdef CONFIG_NVME_MULTIPATH
@@ -661,8 +692,7 @@ static inline bool nvme_ctrl_use_ana(struct nvme_ctrl *ctrl)
void nvme_mpath_unfreeze(struct nvme_subsystem *subsys);
void nvme_mpath_wait_freeze(struct nvme_subsystem *subsys);
void nvme_mpath_start_freeze(struct nvme_subsystem *subsys);
-void nvme_set_disk_name(char *disk_name, struct nvme_ns *ns,
- struct nvme_ctrl *ctrl, int *flags);
+bool nvme_mpath_set_disk_name(struct nvme_ns *ns, char *disk_name, int *flags);
void nvme_failover_req(struct request *req);
void nvme_kick_requeue_lists(struct nvme_ctrl *ctrl);
int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl,struct nvme_ns_head *head);
@@ -674,7 +704,6 @@ void nvme_mpath_stop(struct nvme_ctrl *ctrl);
bool nvme_mpath_clear_current_path(struct nvme_ns *ns);
void nvme_mpath_clear_ctrl_paths(struct nvme_ctrl *ctrl);
struct nvme_ns *nvme_find_path(struct nvme_ns_head *head);
-blk_qc_t nvme_ns_head_submit_bio(struct bio *bio);
static inline void nvme_mpath_check_last_path(struct nvme_ns *ns)
{
@@ -701,16 +730,11 @@ static inline bool nvme_ctrl_use_ana(struct nvme_ctrl *ctrl)
{
return false;
}
-/*
- * Without the multipath code enabled, multiple controller per subsystems are
- * visible as devices and thus we cannot use the subsystem instance.
- */
-static inline void nvme_set_disk_name(char *disk_name, struct nvme_ns *ns,
- struct nvme_ctrl *ctrl, int *flags)
+static inline bool nvme_mpath_set_disk_name(struct nvme_ns *ns, char *disk_name,
+ int *flags)
{
- sprintf(disk_name, "nvme%dn%d", ctrl->instance, ns->head->instance);
+ return false;
}
-
static inline void nvme_failover_req(struct request *req)
{
}
@@ -745,7 +769,7 @@ static inline void nvme_trace_bio_complete(struct request *req)
static inline int nvme_mpath_init(struct nvme_ctrl *ctrl,
struct nvme_id_ctrl *id)
{
- if (ctrl->subsys->cmic & (1 << 3))
+ if (ctrl->subsys->cmic & NVME_CTRL_CMIC_ANA)
dev_warn(ctrl->device,
"Please enable CONFIG_NVME_MULTIPATH for full support of multi-port devices.\n");
return 0;
@@ -798,7 +822,7 @@ static inline int nvme_update_zone_info(struct nvme_ns *ns, unsigned lbaf)
int nvme_nvm_register(struct nvme_ns *ns, char *disk_name, int node);
void nvme_nvm_unregister(struct nvme_ns *ns);
extern const struct attribute_group nvme_nvm_attr_group;
-int nvme_nvm_ioctl(struct nvme_ns *ns, unsigned int cmd, unsigned long arg);
+int nvme_nvm_ioctl(struct nvme_ns *ns, unsigned int cmd, void __user *argp);
#else
static inline int nvme_nvm_register(struct nvme_ns *ns, char *disk_name,
int node)
@@ -808,7 +832,7 @@ static inline int nvme_nvm_register(struct nvme_ns *ns, char *disk_name,
static inline void nvme_nvm_unregister(struct nvme_ns *ns) {};
static inline int nvme_nvm_ioctl(struct nvme_ns *ns, unsigned int cmd,
- unsigned long arg)
+ void __user *argp)
{
return -ENOTTY;
}
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 7249ae74f71f..09d4c5f99fc3 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -224,6 +224,7 @@ struct nvme_queue {
*/
struct nvme_iod {
struct nvme_request req;
+ struct nvme_command cmd;
struct nvme_queue *nvmeq;
bool use_sgl;
int aborted;
@@ -429,6 +430,7 @@ static int nvme_init_request(struct blk_mq_tag_set *set, struct request *req,
iod->nvmeq = nvmeq;
nvme_req(req)->ctrl = &dev->ctrl;
+ nvme_req(req)->cmd = &iod->cmd;
return 0;
}
@@ -852,7 +854,7 @@ static blk_status_t nvme_map_data(struct nvme_dev *dev, struct request *req,
return nvme_setup_prp_simple(dev, req,
&cmnd->rw, &bv);
- if (iod->nvmeq->qid &&
+ if (iod->nvmeq->qid && sgl_threshold &&
dev->ctrl.sgls & ((1 << 0) | (1 << 1)))
return nvme_setup_sgl_simple(dev, req,
&cmnd->rw, &bv);
@@ -917,7 +919,7 @@ static blk_status_t nvme_queue_rq(struct blk_mq_hw_ctx *hctx,
struct nvme_dev *dev = nvmeq->dev;
struct request *req = bd->rq;
struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
- struct nvme_command cmnd;
+ struct nvme_command *cmnd = &iod->cmd;
blk_status_t ret;
iod->aborted = 0;
@@ -931,24 +933,24 @@ static blk_status_t nvme_queue_rq(struct blk_mq_hw_ctx *hctx,
if (unlikely(!test_bit(NVMEQ_ENABLED, &nvmeq->flags)))
return BLK_STS_IOERR;
- ret = nvme_setup_cmd(ns, req, &cmnd);
+ ret = nvme_setup_cmd(ns, req);
if (ret)
return ret;
if (blk_rq_nr_phys_segments(req)) {
- ret = nvme_map_data(dev, req, &cmnd);
+ ret = nvme_map_data(dev, req, cmnd);
if (ret)
goto out_free_cmd;
}
if (blk_integrity_rq(req)) {
- ret = nvme_map_metadata(dev, req, &cmnd);
+ ret = nvme_map_metadata(dev, req, cmnd);
if (ret)
goto out_unmap_data;
}
blk_mq_start_request(req);
- nvme_submit_cmd(nvmeq, &cmnd, bd->last);
+ nvme_submit_cmd(nvmeq, cmnd, bd->last);
return BLK_STS_OK;
out_unmap_data:
nvme_unmap_data(dev, req);
@@ -1060,18 +1062,10 @@ static inline int nvme_process_cq(struct nvme_queue *nvmeq)
static irqreturn_t nvme_irq(int irq, void *data)
{
struct nvme_queue *nvmeq = data;
- irqreturn_t ret = IRQ_NONE;
- /*
- * The rmb/wmb pair ensures we see all updates from a previous run of
- * the irq handler, even if that was on another CPU.
- */
- rmb();
if (nvme_process_cq(nvmeq))
- ret = IRQ_HANDLED;
- wmb();
-
- return ret;
+ return IRQ_HANDLED;
+ return IRQ_NONE;
}
static irqreturn_t nvme_irq_check(int irq, void *data)
@@ -2178,7 +2172,7 @@ static int nvme_setup_io_queues(struct nvme_dev *dev)
if (nr_io_queues == 0)
return 0;
-
+
clear_bit(NVMEQ_ENABLED, &adminq->flags);
if (dev->cmb_use_sqes) {
@@ -2653,7 +2647,7 @@ static void nvme_reset_work(struct work_struct *work)
*/
dev->ctrl.max_integrity_segments = 1;
- result = nvme_init_identify(&dev->ctrl);
+ result = nvme_init_ctrl_finish(&dev->ctrl);
if (result)
goto out;
diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
index be905d4fdb47..660c774fa9e1 100644
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -314,6 +314,7 @@ static int nvme_rdma_init_request(struct blk_mq_tag_set *set,
NVME_RDMA_DATA_SGL_SIZE;
req->queue = queue;
+ nvme_req(rq)->cmd = req->sqe.data;
return 0;
}
@@ -920,7 +921,7 @@ static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl,
blk_mq_unquiesce_queue(ctrl->ctrl.admin_q);
- error = nvme_init_identify(&ctrl->ctrl);
+ error = nvme_init_ctrl_finish(&ctrl->ctrl);
if (error)
goto out_quiesce_queue;
@@ -2041,7 +2042,7 @@ static blk_status_t nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx,
struct request *rq = bd->rq;
struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq);
struct nvme_rdma_qe *sqe = &req->sqe;
- struct nvme_command *c = sqe->data;
+ struct nvme_command *c = nvme_req(rq)->cmd;
struct ib_device *dev;
bool queue_ready = test_bit(NVME_RDMA_Q_LIVE, &queue->flags);
blk_status_t ret;
@@ -2064,7 +2065,7 @@ static blk_status_t nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx,
ib_dma_sync_single_for_cpu(dev, sqe->dma,
sizeof(struct nvme_command), DMA_TO_DEVICE);
- ret = nvme_setup_cmd(ns, rq, c);
+ ret = nvme_setup_cmd(ns, rq);
if (ret)
goto unmap_qe;
diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
index a0f00cb8f9f3..75435cdb156c 100644
--- a/drivers/nvme/host/tcp.c
+++ b/drivers/nvme/host/tcp.c
@@ -417,6 +417,7 @@ static int nvme_tcp_init_request(struct blk_mq_tag_set *set,
{
struct nvme_tcp_ctrl *ctrl = set->driver_data;
struct nvme_tcp_request *req = blk_mq_rq_to_pdu(rq);
+ struct nvme_tcp_cmd_pdu *pdu;
int queue_idx = (set == &ctrl->tag_set) ? hctx_idx + 1 : 0;
struct nvme_tcp_queue *queue = &ctrl->queues[queue_idx];
u8 hdgst = nvme_tcp_hdgst_len(queue);
@@ -427,8 +428,10 @@ static int nvme_tcp_init_request(struct blk_mq_tag_set *set,
if (!req->pdu)
return -ENOMEM;
+ pdu = req->pdu;
req->queue = queue;
nvme_req(rq)->ctrl = &ctrl->ctrl;
+ nvme_req(rq)->cmd = &pdu->cmd;
return 0;
}
@@ -874,7 +877,7 @@ static void nvme_tcp_state_change(struct sock *sk)
{
struct nvme_tcp_queue *queue;
- read_lock(&sk->sk_callback_lock);
+ read_lock_bh(&sk->sk_callback_lock);
queue = sk->sk_user_data;
if (!queue)
goto done;
@@ -895,7 +898,7 @@ static void nvme_tcp_state_change(struct sock *sk)
queue->state_change(sk);
done:
- read_unlock(&sk->sk_callback_lock);
+ read_unlock_bh(&sk->sk_callback_lock);
}
static inline bool nvme_tcp_queue_more(struct nvme_tcp_queue *queue)
@@ -1885,7 +1888,7 @@ static int nvme_tcp_configure_admin_queue(struct nvme_ctrl *ctrl, bool new)
blk_mq_unquiesce_queue(ctrl->admin_q);
- error = nvme_init_identify(ctrl);
+ error = nvme_init_ctrl_finish(ctrl);
if (error)
goto out_quiesce_queue;
@@ -1973,6 +1976,11 @@ static int nvme_tcp_setup_ctrl(struct nvme_ctrl *ctrl, bool new)
goto destroy_admin;
}
+ if (!(ctrl->sgls & ((1 << 0) | (1 << 1)))) {
+ dev_err(ctrl->device, "Mandatory sgls are not supported!\n");
+ goto destroy_admin;
+ }
+
if (opts->queue_size > ctrl->sqsize + 1)
dev_warn(ctrl->device,
"queue_size %zu > ctrl sqsize %u, clamping down\n",
@@ -2269,7 +2277,7 @@ static blk_status_t nvme_tcp_setup_cmd_pdu(struct nvme_ns *ns,
u8 hdgst = nvme_tcp_hdgst_len(queue), ddgst = 0;
blk_status_t ret;
- ret = nvme_setup_cmd(ns, rq, &pdu->cmd);
+ ret = nvme_setup_cmd(ns, rq);
if (ret)
return ret;
diff --git a/drivers/nvme/host/zns.c b/drivers/nvme/host/zns.c
index bc2f344f0ae0..475dd45c3db4 100644
--- a/drivers/nvme/host/zns.c
+++ b/drivers/nvme/host/zns.c
@@ -96,7 +96,7 @@ int nvme_update_zone_info(struct nvme_ns *ns, unsigned lbaf)
dev_warn(ns->ctrl->device,
"zone operations:%x not supported for namespace:%u\n",
le16_to_cpu(id->zoc), ns->head->ns_id);
- status = -EINVAL;
+ status = -ENODEV;
goto free_data;
}
@@ -105,7 +105,7 @@ int nvme_update_zone_info(struct nvme_ns *ns, unsigned lbaf)
dev_warn(ns->ctrl->device,
"invalid zone size:%llu for namespace:%u\n",
ns->zsze, ns->head->ns_id);
- status = -EINVAL;
+ status = -ENODEV;
goto free_data;
}
diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c
index fe6b8aa90b53..d2a26ff3f7b3 100644
--- a/drivers/nvme/target/admin-cmd.c
+++ b/drivers/nvme/target/admin-cmd.c
@@ -513,7 +513,7 @@ static void nvmet_execute_identify_ns(struct nvmet_req *req)
default:
id->nuse = id->nsze;
break;
- }
+ }
if (req->ns->bdev)
nvmet_bdev_set_limits(req->ns->bdev, id);
@@ -919,15 +919,21 @@ void nvmet_execute_async_event(struct nvmet_req *req)
void nvmet_execute_keep_alive(struct nvmet_req *req)
{
struct nvmet_ctrl *ctrl = req->sq->ctrl;
+ u16 status = 0;
if (!nvmet_check_transfer_len(req, 0))
return;
+ if (!ctrl->kato) {
+ status = NVME_SC_KA_TIMEOUT_INVALID;
+ goto out;
+ }
+
pr_debug("ctrl %d update keep-alive timer for %d secs\n",
ctrl->cntlid, ctrl->kato);
-
mod_delayed_work(system_wq, &ctrl->ka_work, ctrl->kato * HZ);
- nvmet_req_complete(req, 0);
+out:
+ nvmet_req_complete(req, status);
}
u16 nvmet_parse_admin_cmd(struct nvmet_req *req)
@@ -940,7 +946,7 @@ u16 nvmet_parse_admin_cmd(struct nvmet_req *req)
if (nvmet_req_subsys(req)->type == NVME_NQN_DISC)
return nvmet_parse_discovery_cmd(req);
- ret = nvmet_check_ctrl_status(req, cmd);
+ ret = nvmet_check_ctrl_status(req);
if (unlikely(ret))
return ret;
diff --git a/drivers/nvme/target/configfs.c b/drivers/nvme/target/configfs.c
index e5dbd1923b7b..65a0cf99f557 100644
--- a/drivers/nvme/target/configfs.c
+++ b/drivers/nvme/target/configfs.c
@@ -1149,6 +1149,12 @@ static ssize_t nvmet_subsys_attr_model_store_locked(struct nvmet_subsys *subsys,
if (!len)
return -EINVAL;
+ if (len > NVMET_MN_MAX_SIZE) {
+ pr_err("Model number size can not exceed %d Bytes\n",
+ NVMET_MN_MAX_SIZE);
+ return -EINVAL;
+ }
+
for (pos = 0; pos < len; pos++) {
if (!nvmet_is_ascii(page[pos]))
return -EINVAL;
diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c
index a027433b8be8..25cc2ee8de3f 100644
--- a/drivers/nvme/target/core.c
+++ b/drivers/nvme/target/core.c
@@ -864,10 +864,9 @@ static inline u16 nvmet_io_cmd_check_access(struct nvmet_req *req)
static u16 nvmet_parse_io_cmd(struct nvmet_req *req)
{
- struct nvme_command *cmd = req->cmd;
u16 ret;
- ret = nvmet_check_ctrl_status(req, cmd);
+ ret = nvmet_check_ctrl_status(req);
if (unlikely(ret))
return ret;
@@ -1190,19 +1189,19 @@ static void nvmet_init_cap(struct nvmet_ctrl *ctrl)
ctrl->cap |= NVMET_QUEUE_SIZE - 1;
}
-u16 nvmet_ctrl_find_get(const char *subsysnqn, const char *hostnqn, u16 cntlid,
- struct nvmet_req *req, struct nvmet_ctrl **ret)
+struct nvmet_ctrl *nvmet_ctrl_find_get(const char *subsysnqn,
+ const char *hostnqn, u16 cntlid,
+ struct nvmet_req *req)
{
+ struct nvmet_ctrl *ctrl = NULL;
struct nvmet_subsys *subsys;
- struct nvmet_ctrl *ctrl;
- u16 status = 0;
subsys = nvmet_find_get_subsys(req->port, subsysnqn);
if (!subsys) {
pr_warn("connect request for invalid subsystem %s!\n",
subsysnqn);
req->cqe->result.u32 = IPO_IATTR_CONNECT_DATA(subsysnqn);
- return NVME_SC_CONNECT_INVALID_PARAM | NVME_SC_DNR;
+ goto out;
}
mutex_lock(&subsys->lock);
@@ -1215,33 +1214,34 @@ u16 nvmet_ctrl_find_get(const char *subsysnqn, const char *hostnqn, u16 cntlid,
if (!kref_get_unless_zero(&ctrl->ref))
continue;
- *ret = ctrl;
- goto out;
+ /* ctrl found */
+ goto found;
}
}
+ ctrl = NULL; /* ctrl not found */
pr_warn("could not find controller %d for subsys %s / host %s\n",
cntlid, subsysnqn, hostnqn);
req->cqe->result.u32 = IPO_IATTR_CONNECT_DATA(cntlid);
- status = NVME_SC_CONNECT_INVALID_PARAM | NVME_SC_DNR;
-out:
+found:
mutex_unlock(&subsys->lock);
nvmet_subsys_put(subsys);
- return status;
+out:
+ return ctrl;
}
-u16 nvmet_check_ctrl_status(struct nvmet_req *req, struct nvme_command *cmd)
+u16 nvmet_check_ctrl_status(struct nvmet_req *req)
{
if (unlikely(!(req->sq->ctrl->cc & NVME_CC_ENABLE))) {
pr_err("got cmd %d while CC.EN == 0 on qid = %d\n",
- cmd->common.opcode, req->sq->qid);
+ req->cmd->common.opcode, req->sq->qid);
return NVME_SC_CMD_SEQ_ERROR | NVME_SC_DNR;
}
if (unlikely(!(req->sq->ctrl->csts & NVME_CSTS_RDY))) {
pr_err("got cmd %d while CSTS.RDY == 0 on qid = %d\n",
- cmd->common.opcode, req->sq->qid);
+ req->cmd->common.opcode, req->sq->qid);
return NVME_SC_CMD_SEQ_ERROR | NVME_SC_DNR;
}
return 0;
@@ -1322,10 +1322,10 @@ u16 nvmet_alloc_ctrl(const char *subsysnqn, const char *hostnqn,
pr_warn("connect request for invalid subsystem %s!\n",
subsysnqn);
req->cqe->result.u32 = IPO_IATTR_CONNECT_DATA(subsysnqn);
+ req->error_loc = offsetof(struct nvme_common_command, dptr);
goto out;
}
- status = NVME_SC_CONNECT_INVALID_PARAM | NVME_SC_DNR;
down_read(&nvmet_config_sem);
if (!nvmet_host_allowed(subsys, hostnqn)) {
pr_info("connect by host %s for subsystem %s not allowed\n",
@@ -1333,6 +1333,7 @@ u16 nvmet_alloc_ctrl(const char *subsysnqn, const char *hostnqn,
req->cqe->result.u32 = IPO_IATTR_CONNECT_DATA(hostnqn);
up_read(&nvmet_config_sem);
status = NVME_SC_CONNECT_INVALID_HOST | NVME_SC_DNR;
+ req->error_loc = offsetof(struct nvme_common_command, dptr);
goto out_put_subsystem;
}
up_read(&nvmet_config_sem);
diff --git a/drivers/nvme/target/discovery.c b/drivers/nvme/target/discovery.c
index 682854e0e079..4845d12e374a 100644
--- a/drivers/nvme/target/discovery.c
+++ b/drivers/nvme/target/discovery.c
@@ -178,12 +178,14 @@ static void nvmet_execute_disc_get_log_page(struct nvmet_req *req)
if (req->cmd->get_log_page.lid != NVME_LOG_DISC) {
req->error_loc =
offsetof(struct nvme_get_log_page_command, lid);
- status = NVME_SC_INVALID_OPCODE | NVME_SC_DNR;
+ status = NVME_SC_INVALID_FIELD | NVME_SC_DNR;
goto out;
}
/* Spec requires dword aligned offsets */
if (offset & 0x3) {
+ req->error_loc =
+ offsetof(struct nvme_get_log_page_command, lpo);
status = NVME_SC_INVALID_FIELD | NVME_SC_DNR;
goto out;
}
@@ -250,7 +252,7 @@ static void nvmet_execute_disc_identify(struct nvmet_req *req)
if (req->cmd->identify.cns != NVME_ID_CNS_CTRL) {
req->error_loc = offsetof(struct nvme_identify, cns);
- status = NVME_SC_INVALID_OPCODE | NVME_SC_DNR;
+ status = NVME_SC_INVALID_FIELD | NVME_SC_DNR;
goto out;
}
diff --git a/drivers/nvme/target/fabrics-cmd.c b/drivers/nvme/target/fabrics-cmd.c
index 42bd12b8bf00..1420a8e3e0b1 100644
--- a/drivers/nvme/target/fabrics-cmd.c
+++ b/drivers/nvme/target/fabrics-cmd.c
@@ -190,12 +190,8 @@ static void nvmet_execute_admin_connect(struct nvmet_req *req)
status = nvmet_alloc_ctrl(d->subsysnqn, d->hostnqn, req,
le32_to_cpu(c->kato), &ctrl);
- if (status) {
- if (status == (NVME_SC_INVALID_FIELD | NVME_SC_DNR))
- req->error_loc =
- offsetof(struct nvme_common_command, opcode);
+ if (status)
goto out;
- }
ctrl->pi_support = ctrl->port->pi_enable && ctrl->subsys->pi_support;
@@ -222,7 +218,7 @@ static void nvmet_execute_io_connect(struct nvmet_req *req)
{
struct nvmf_connect_command *c = &req->cmd->connect;
struct nvmf_connect_data *d;
- struct nvmet_ctrl *ctrl = NULL;
+ struct nvmet_ctrl *ctrl;
u16 qid = le16_to_cpu(c->qid);
u16 status = 0;
@@ -249,11 +245,12 @@ static void nvmet_execute_io_connect(struct nvmet_req *req)
goto out;
}
- status = nvmet_ctrl_find_get(d->subsysnqn, d->hostnqn,
- le16_to_cpu(d->cntlid),
- req, &ctrl);
- if (status)
+ ctrl = nvmet_ctrl_find_get(d->subsysnqn, d->hostnqn,
+ le16_to_cpu(d->cntlid), req);
+ if (!ctrl) {
+ status = NVME_SC_CONNECT_INVALID_PARAM | NVME_SC_DNR;
goto out;
+ }
if (unlikely(qid > ctrl->subsys->max_qid)) {
pr_warn("invalid queue id (%d)\n", qid);
diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c
index d375745fc4ed..19e113240fff 100644
--- a/drivers/nvme/target/fc.c
+++ b/drivers/nvme/target/fc.c
@@ -1021,60 +1021,75 @@ nvmet_fc_free_hostport(struct nvmet_fc_hostport *hostport)
}
static struct nvmet_fc_hostport *
+nvmet_fc_match_hostport(struct nvmet_fc_tgtport *tgtport, void *hosthandle)
+{
+ struct nvmet_fc_hostport *host;
+
+ lockdep_assert_held(&tgtport->lock);
+
+ list_for_each_entry(host, &tgtport->host_list, host_list) {
+ if (host->hosthandle == hosthandle && !host->invalid) {
+ if (nvmet_fc_hostport_get(host))
+ return (host);
+ }
+ }
+
+ return NULL;
+}
+
+static struct nvmet_fc_hostport *
nvmet_fc_alloc_hostport(struct nvmet_fc_tgtport *tgtport, void *hosthandle)
{
- struct nvmet_fc_hostport *newhost, *host, *match = NULL;
+ struct nvmet_fc_hostport *newhost, *match = NULL;
unsigned long flags;
/* if LLDD not implemented, leave as NULL */
if (!hosthandle)
return NULL;
- /* take reference for what will be the newly allocated hostport */
+ /*
+ * take reference for what will be the newly allocated hostport if
+ * we end up using a new allocation
+ */
if (!nvmet_fc_tgtport_get(tgtport))
return ERR_PTR(-EINVAL);
+ spin_lock_irqsave(&tgtport->lock, flags);
+ match = nvmet_fc_match_hostport(tgtport, hosthandle);
+ spin_unlock_irqrestore(&tgtport->lock, flags);
+
+ if (match) {
+ /* no new allocation - release reference */
+ nvmet_fc_tgtport_put(tgtport);
+ return match;
+ }
+
newhost = kzalloc(sizeof(*newhost), GFP_KERNEL);
if (!newhost) {
- spin_lock_irqsave(&tgtport->lock, flags);
- list_for_each_entry(host, &tgtport->host_list, host_list) {
- if (host->hosthandle == hosthandle && !host->invalid) {
- if (nvmet_fc_hostport_get(host)) {
- match = host;
- break;
- }
- }
- }
- spin_unlock_irqrestore(&tgtport->lock, flags);
- /* no allocation - release reference */
+ /* no new allocation - release reference */
nvmet_fc_tgtport_put(tgtport);
- return (match) ? match : ERR_PTR(-ENOMEM);
+ return ERR_PTR(-ENOMEM);
}
- newhost->tgtport = tgtport;
- newhost->hosthandle = hosthandle;
- INIT_LIST_HEAD(&newhost->host_list);
- kref_init(&newhost->ref);
-
spin_lock_irqsave(&tgtport->lock, flags);
- list_for_each_entry(host, &tgtport->host_list, host_list) {
- if (host->hosthandle == hosthandle && !host->invalid) {
- if (nvmet_fc_hostport_get(host)) {
- match = host;
- break;
- }
- }
- }
+ match = nvmet_fc_match_hostport(tgtport, hosthandle);
if (match) {
+ /* new allocation not needed */
kfree(newhost);
- newhost = NULL;
- /* releasing allocation - release reference */
+ newhost = match;
+ /* no new allocation - release reference */
nvmet_fc_tgtport_put(tgtport);
- } else
+ } else {
+ newhost->tgtport = tgtport;
+ newhost->hosthandle = hosthandle;
+ INIT_LIST_HEAD(&newhost->host_list);
+ kref_init(&newhost->ref);
+
list_add_tail(&newhost->host_list, &tgtport->host_list);
+ }
spin_unlock_irqrestore(&tgtport->lock, flags);
- return (match) ? match : newhost;
+ return newhost;
}
static void
@@ -1996,6 +2011,7 @@ nvmet_fc_handle_ls_rqst_work(struct work_struct *work)
*
* @target_port: pointer to the (registered) target port the LS was
* received on.
+ * @hosthandle: pointer to the host specific data, gets stored in iod.
* @lsrsp: pointer to a lsrsp structure to be used to reference
* the exchange corresponding to the LS.
* @lsreqbuf: pointer to the buffer containing the LS Request
diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c
index 3e189e753bcf..6665da3b634f 100644
--- a/drivers/nvme/target/loop.c
+++ b/drivers/nvme/target/loop.c
@@ -141,7 +141,7 @@ static blk_status_t nvme_loop_queue_rq(struct blk_mq_hw_ctx *hctx,
if (!nvmf_check_ready(&queue->ctrl->ctrl, req, queue_ready))
return nvmf_fail_nonready_command(&queue->ctrl->ctrl, req);
- ret = nvme_setup_cmd(ns, req, &iod->cmd);
+ ret = nvme_setup_cmd(ns, req);
if (ret)
return ret;
@@ -205,8 +205,10 @@ static int nvme_loop_init_request(struct blk_mq_tag_set *set,
unsigned int numa_node)
{
struct nvme_loop_ctrl *ctrl = set->driver_data;
+ struct nvme_loop_iod *iod = blk_mq_rq_to_pdu(req);
nvme_req(req)->ctrl = &ctrl->ctrl;
+ nvme_req(req)->cmd = &iod->cmd;
return nvme_loop_init_iod(ctrl, blk_mq_rq_to_pdu(req),
(set == &ctrl->tag_set) ? hctx_idx + 1 : 0);
}
@@ -396,7 +398,7 @@ static int nvme_loop_configure_admin_queue(struct nvme_loop_ctrl *ctrl)
blk_mq_unquiesce_queue(ctrl->ctrl.admin_q);
- error = nvme_init_identify(&ctrl->ctrl);
+ error = nvme_init_ctrl_finish(&ctrl->ctrl);
if (error)
goto out_cleanup_queue;
diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h
index 4b84edb49f22..5566ed403576 100644
--- a/drivers/nvme/target/nvmet.h
+++ b/drivers/nvme/target/nvmet.h
@@ -27,6 +27,7 @@
#define NVMET_ERROR_LOG_SLOTS 128
#define NVMET_NO_ERROR_LOC ((u16)-1)
#define NVMET_DEFAULT_CTRL_MODEL "Linux"
+#define NVMET_MN_MAX_SIZE 40
/*
* Supported optional AENs:
@@ -428,10 +429,11 @@ void nvmet_ctrl_fatal_error(struct nvmet_ctrl *ctrl);
void nvmet_update_cc(struct nvmet_ctrl *ctrl, u32 new);
u16 nvmet_alloc_ctrl(const char *subsysnqn, const char *hostnqn,
struct nvmet_req *req, u32 kato, struct nvmet_ctrl **ctrlp);
-u16 nvmet_ctrl_find_get(const char *subsysnqn, const char *hostnqn, u16 cntlid,
- struct nvmet_req *req, struct nvmet_ctrl **ret);
+struct nvmet_ctrl *nvmet_ctrl_find_get(const char *subsysnqn,
+ const char *hostnqn, u16 cntlid,
+ struct nvmet_req *req);
void nvmet_ctrl_put(struct nvmet_ctrl *ctrl);
-u16 nvmet_check_ctrl_status(struct nvmet_req *req, struct nvme_command *cmd);
+u16 nvmet_check_ctrl_status(struct nvmet_req *req);
struct nvmet_subsys *nvmet_subsys_alloc(const char *subsysnqn,
enum nvme_subsys_type type);
diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c
index d658c6e8263a..f9f34f6caf5e 100644
--- a/drivers/nvme/target/tcp.c
+++ b/drivers/nvme/target/tcp.c
@@ -29,6 +29,16 @@ static int so_priority;
module_param(so_priority, int, 0644);
MODULE_PARM_DESC(so_priority, "nvmet tcp socket optimize priority");
+/* Define a time period (in usecs) that io_work() shall sample an activated
+ * queue before determining it to be idle. This optional module behavior
+ * can enable NIC solutions that support socket optimized packet processing
+ * using advanced interrupt moderation techniques.
+ */
+static int idle_poll_period_usecs;
+module_param(idle_poll_period_usecs, int, 0644);
+MODULE_PARM_DESC(idle_poll_period_usecs,
+ "nvmet tcp io_work poll till idle time period in usecs");
+
#define NVMET_TCP_RECV_BUDGET 8
#define NVMET_TCP_SEND_BUDGET 8
#define NVMET_TCP_IO_WORK_BUDGET 64
@@ -119,6 +129,8 @@ struct nvmet_tcp_queue {
struct ahash_request *snd_hash;
struct ahash_request *rcv_hash;
+ unsigned long poll_end;
+
spinlock_t state_lock;
enum nvmet_tcp_queue_state state;
@@ -525,11 +537,36 @@ static void nvmet_tcp_queue_response(struct nvmet_req *req)
struct nvmet_tcp_cmd *cmd =
container_of(req, struct nvmet_tcp_cmd, req);
struct nvmet_tcp_queue *queue = cmd->queue;
+ struct nvme_sgl_desc *sgl;
+ u32 len;
+
+ if (unlikely(cmd == queue->cmd)) {
+ sgl = &cmd->req.cmd->common.dptr.sgl;
+ len = le32_to_cpu(sgl->length);
+
+ /*
+ * Wait for inline data before processing the response.
+ * Avoid using helpers, this might happen before
+ * nvmet_req_init is completed.
+ */
+ if (queue->rcv_state == NVMET_TCP_RECV_PDU &&
+ len && len < cmd->req.port->inline_data_size &&
+ nvme_is_write(cmd->req.cmd))
+ return;
+ }
llist_add(&cmd->lentry, &queue->resp_list);
queue_work_on(queue_cpu(queue), nvmet_tcp_wq, &cmd->queue->io_work);
}
+static void nvmet_tcp_execute_request(struct nvmet_tcp_cmd *cmd)
+{
+ if (unlikely(cmd->flags & NVMET_TCP_F_INIT_FAILED))
+ nvmet_tcp_queue_response(&cmd->req);
+ else
+ cmd->req.execute(&cmd->req);
+}
+
static int nvmet_try_send_data_pdu(struct nvmet_tcp_cmd *cmd)
{
u8 hdgst = nvmet_tcp_hdgst_len(cmd->queue);
@@ -961,7 +998,7 @@ static int nvmet_tcp_done_recv_pdu(struct nvmet_tcp_queue *queue)
le32_to_cpu(req->cmd->common.dptr.sgl.length));
nvmet_tcp_handle_req_failure(queue, queue->cmd, req);
- return -EAGAIN;
+ return 0;
}
ret = nvmet_tcp_map_data(queue->cmd);
@@ -1104,10 +1141,8 @@ static int nvmet_tcp_try_recv_data(struct nvmet_tcp_queue *queue)
return 0;
}
- if (!(cmd->flags & NVMET_TCP_F_INIT_FAILED) &&
- cmd->rbytes_done == cmd->req.transfer_len) {
- cmd->req.execute(&cmd->req);
- }
+ if (cmd->rbytes_done == cmd->req.transfer_len)
+ nvmet_tcp_execute_request(cmd);
nvmet_prepare_receive_pdu(queue);
return 0;
@@ -1144,9 +1179,9 @@ static int nvmet_tcp_try_recv_ddgst(struct nvmet_tcp_queue *queue)
goto out;
}
- if (!(cmd->flags & NVMET_TCP_F_INIT_FAILED) &&
- cmd->rbytes_done == cmd->req.transfer_len)
- cmd->req.execute(&cmd->req);
+ if (cmd->rbytes_done == cmd->req.transfer_len)
+ nvmet_tcp_execute_request(cmd);
+
ret = 0;
out:
nvmet_prepare_receive_pdu(queue);
@@ -1216,6 +1251,23 @@ static void nvmet_tcp_schedule_release_queue(struct nvmet_tcp_queue *queue)
spin_unlock(&queue->state_lock);
}
+static inline void nvmet_tcp_arm_queue_deadline(struct nvmet_tcp_queue *queue)
+{
+ queue->poll_end = jiffies + usecs_to_jiffies(idle_poll_period_usecs);
+}
+
+static bool nvmet_tcp_check_queue_deadline(struct nvmet_tcp_queue *queue,
+ int ops)
+{
+ if (!idle_poll_period_usecs)
+ return false;
+
+ if (ops)
+ nvmet_tcp_arm_queue_deadline(queue);
+
+ return !time_after(jiffies, queue->poll_end);
+}
+
static void nvmet_tcp_io_work(struct work_struct *w)
{
struct nvmet_tcp_queue *queue =
@@ -1241,9 +1293,10 @@ static void nvmet_tcp_io_work(struct work_struct *w)
} while (pending && ops < NVMET_TCP_IO_WORK_BUDGET);
/*
- * We exahusted our budget, requeue our selves
+ * Requeue the worker if idle deadline period is in progress or any
+ * ops activity was recorded during the do-while loop above.
*/
- if (pending)
+ if (nvmet_tcp_check_queue_deadline(queue, ops) || pending)
queue_work_on(queue_cpu(queue), nvmet_tcp_wq, &queue->io_work);
}
@@ -1434,7 +1487,7 @@ static void nvmet_tcp_state_change(struct sock *sk)
{
struct nvmet_tcp_queue *queue;
- write_lock_bh(&sk->sk_callback_lock);
+ read_lock_bh(&sk->sk_callback_lock);
queue = sk->sk_user_data;
if (!queue)
goto done;
@@ -1452,7 +1505,7 @@ static void nvmet_tcp_state_change(struct sock *sk)
queue->idx, sk->sk_state);
}
done:
- write_unlock_bh(&sk->sk_callback_lock);
+ read_unlock_bh(&sk->sk_callback_lock);
}
static int nvmet_tcp_set_queue_sock(struct nvmet_tcp_queue *queue)
@@ -1501,6 +1554,8 @@ static int nvmet_tcp_set_queue_sock(struct nvmet_tcp_queue *queue)
sock->sk->sk_state_change = nvmet_tcp_state_change;
queue->write_space = sock->sk->sk_write_space;
sock->sk->sk_write_space = nvmet_tcp_write_space;
+ if (idle_poll_period_usecs)
+ nvmet_tcp_arm_queue_deadline(queue);
queue_work_on(queue_cpu(queue), nvmet_tcp_wq, &queue->io_work);
}
write_unlock_bh(&sock->sk->sk_callback_lock);
diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c
index 3a945abf268c..c8df75e99f4c 100644
--- a/drivers/s390/block/dasd.c
+++ b/drivers/s390/block/dasd.c
@@ -3439,15 +3439,6 @@ static void dasd_generic_auto_online(void *data, async_cookie_t cookie)
*/
int dasd_generic_probe(struct ccw_device *cdev)
{
- int ret;
-
- ret = dasd_add_sysfs_files(cdev);
- if (ret) {
- DBF_EVENT_DEVID(DBF_WARNING, cdev, "%s",
- "dasd_generic_probe: could not add "
- "sysfs entries");
- return ret;
- }
cdev->handler = &dasd_int_handler;
/*
@@ -3488,15 +3479,13 @@ void dasd_generic_remove(struct ccw_device *cdev)
struct dasd_block *block;
device = dasd_device_from_cdev(cdev);
- if (IS_ERR(device)) {
- dasd_remove_sysfs_files(cdev);
+ if (IS_ERR(device))
return;
- }
+
if (test_and_set_bit(DASD_FLAG_OFFLINE, &device->flags) &&
!test_bit(DASD_FLAG_SAFE_OFFLINE_RUNNING, &device->flags)) {
/* Already doing offline processing */
dasd_put_device(device);
- dasd_remove_sysfs_files(cdev);
return;
}
/*
@@ -3515,8 +3504,6 @@ void dasd_generic_remove(struct ccw_device *cdev)
*/
if (block)
dasd_free_block(block);
-
- dasd_remove_sysfs_files(cdev);
}
EXPORT_SYMBOL_GPL(dasd_generic_remove);
diff --git a/drivers/s390/block/dasd_devmap.c b/drivers/s390/block/dasd_devmap.c
index 03d27ee9cac6..2c40fe15da55 100644
--- a/drivers/s390/block/dasd_devmap.c
+++ b/drivers/s390/block/dasd_devmap.c
@@ -1772,12 +1772,13 @@ static const struct attribute_group ext_pool_attr_group = {
.attrs = ext_pool_attrs,
};
-static const struct attribute_group *dasd_attr_groups[] = {
+const struct attribute_group *dasd_dev_groups[] = {
&dasd_attr_group,
&capacity_attr_group,
&ext_pool_attr_group,
NULL,
};
+EXPORT_SYMBOL_GPL(dasd_dev_groups);
/*
* Return value of the specified feature.
@@ -1895,18 +1896,6 @@ void dasd_path_remove_kobjects(struct dasd_device *device)
}
EXPORT_SYMBOL(dasd_path_remove_kobjects);
-int dasd_add_sysfs_files(struct ccw_device *cdev)
-{
- return sysfs_create_groups(&cdev->dev.kobj, dasd_attr_groups);
-}
-
-void
-dasd_remove_sysfs_files(struct ccw_device *cdev)
-{
- sysfs_remove_groups(&cdev->dev.kobj, dasd_attr_groups);
-}
-
-
int
dasd_devmap_init(void)
{
diff --git a/drivers/s390/block/dasd_eckd.c b/drivers/s390/block/dasd_eckd.c
index 65eb87cbbb9b..a6ac505cbdd7 100644
--- a/drivers/s390/block/dasd_eckd.c
+++ b/drivers/s390/block/dasd_eckd.c
@@ -6630,6 +6630,7 @@ static struct ccw_driver dasd_eckd_driver = {
.driver = {
.name = "dasd-eckd",
.owner = THIS_MODULE,
+ .dev_groups = dasd_dev_groups,
},
.ids = dasd_eckd_ids,
.probe = dasd_eckd_probe,
diff --git a/drivers/s390/block/dasd_fba.c b/drivers/s390/block/dasd_fba.c
index 1aeb68794ce8..4789410885e4 100644
--- a/drivers/s390/block/dasd_fba.c
+++ b/drivers/s390/block/dasd_fba.c
@@ -54,13 +54,6 @@ static struct ccw_device_id dasd_fba_ids[] = {
MODULE_DEVICE_TABLE(ccw, dasd_fba_ids);
-static struct ccw_driver dasd_fba_driver; /* see below */
-static int
-dasd_fba_probe(struct ccw_device *cdev)
-{
- return dasd_generic_probe(cdev);
-}
-
static int
dasd_fba_set_online(struct ccw_device *cdev)
{
@@ -71,9 +64,10 @@ static struct ccw_driver dasd_fba_driver = {
.driver = {
.name = "dasd-fba",
.owner = THIS_MODULE,
+ .dev_groups = dasd_dev_groups,
},
.ids = dasd_fba_ids,
- .probe = dasd_fba_probe,
+ .probe = dasd_generic_probe,
.remove = dasd_generic_remove,
.set_offline = dasd_generic_set_offline,
.set_online = dasd_fba_set_online,
diff --git a/drivers/s390/block/dasd_int.h b/drivers/s390/block/dasd_int.h
index b8a04c42d1d2..1c59b0e86a9f 100644
--- a/drivers/s390/block/dasd_int.h
+++ b/drivers/s390/block/dasd_int.h
@@ -854,8 +854,7 @@ void dasd_delete_device(struct dasd_device *);
int dasd_get_feature(struct ccw_device *, int);
int dasd_set_feature(struct ccw_device *, int, int);
-int dasd_add_sysfs_files(struct ccw_device *);
-void dasd_remove_sysfs_files(struct ccw_device *);
+extern const struct attribute_group *dasd_dev_groups[];
void dasd_path_create_kobj(struct dasd_device *, int);
void dasd_path_create_kobjects(struct dasd_device *);
void dasd_path_remove_kobjects(struct dasd_device *);