diff options
-rw-r--r-- | Documentation/block/switching-sched.txt | 18 | ||||
-rw-r--r-- | Documentation/cgroup-v1/blkio-controller.txt | 96 | ||||
-rw-r--r-- | block/Kconfig | 1 | ||||
-rw-r--r-- | block/blk-mq-debugfs.c | 145 | ||||
-rw-r--r-- | block/blk-mq-debugfs.h | 36 | ||||
-rw-r--r-- | block/blk-mq-sched.c | 1 | ||||
-rw-r--r-- | drivers/ata/libata-core.c | 9 | ||||
-rw-r--r-- | drivers/block/null_blk_zoned.c | 4 | ||||
-rw-r--r-- | drivers/block/ps3vram.c | 2 | ||||
-rw-r--r-- | drivers/md/bcache/bset.c | 16 | ||||
-rw-r--r-- | drivers/md/bcache/bset.h | 34 | ||||
-rw-r--r-- | drivers/md/bcache/sysfs.c | 7 | ||||
-rw-r--r-- | fs/io_uring.c | 4 |
13 files changed, 114 insertions, 259 deletions
diff --git a/Documentation/block/switching-sched.txt b/Documentation/block/switching-sched.txt index 3b2612e342f1..7977f6fb8b20 100644 --- a/Documentation/block/switching-sched.txt +++ b/Documentation/block/switching-sched.txt @@ -13,11 +13,9 @@ you can do so by typing: # mount none /sys -t sysfs -As of the Linux 2.6.10 kernel, it is now possible to change the -IO scheduler for a given block device on the fly (thus making it possible, -for instance, to set the CFQ scheduler for the system default, but -set a specific device to use the deadline or noop schedulers - which -can improve that device's throughput). +It is possible to change the IO scheduler for a given block device on +the fly to select one of mq-deadline, none, bfq, or kyber schedulers - +which can improve that device's throughput. To set a specific scheduler, simply do this: @@ -30,8 +28,8 @@ The list of defined schedulers can be found by simply doing a "cat /sys/block/DEV/queue/scheduler" - the list of valid names will be displayed, with the currently selected scheduler in brackets: -# cat /sys/block/hda/queue/scheduler -noop deadline [cfq] -# echo deadline > /sys/block/hda/queue/scheduler -# cat /sys/block/hda/queue/scheduler -noop [deadline] cfq +# cat /sys/block/sda/queue/scheduler +[mq-deadline] kyber bfq none +# echo none >/sys/block/sda/queue/scheduler +# cat /sys/block/sda/queue/scheduler +[none] mq-deadline kyber bfq diff --git a/Documentation/cgroup-v1/blkio-controller.txt b/Documentation/cgroup-v1/blkio-controller.txt index 673dc34d3f78..d1a1b7bdd03a 100644 --- a/Documentation/cgroup-v1/blkio-controller.txt +++ b/Documentation/cgroup-v1/blkio-controller.txt @@ -8,61 +8,13 @@ both at leaf nodes as well as at intermediate nodes in a storage hierarchy. Plan is to use the same cgroup based management interface for blkio controller and based on user options switch IO policies in the background. -Currently two IO control policies are implemented. First one is proportional -weight time based division of disk policy. It is implemented in CFQ. Hence -this policy takes effect only on leaf nodes when CFQ is being used. The second -one is throttling policy which can be used to specify upper IO rate limits -on devices. This policy is implemented in generic block layer and can be -used on leaf nodes as well as higher level logical devices like device mapper. +One IO control policy is throttling policy which can be used to +specify upper IO rate limits on devices. This policy is implemented in +generic block layer and can be used on leaf nodes as well as higher +level logical devices like device mapper. HOWTO ===== -Proportional Weight division of bandwidth ------------------------------------------ -You can do a very simple testing of running two dd threads in two different -cgroups. Here is what you can do. - -- Enable Block IO controller - CONFIG_BLK_CGROUP=y - -- Enable group scheduling in CFQ - CONFIG_CFQ_GROUP_IOSCHED=y - -- Compile and boot into kernel and mount IO controller (blkio); see - cgroups.txt, Why are cgroups needed?. - - mount -t tmpfs cgroup_root /sys/fs/cgroup - mkdir /sys/fs/cgroup/blkio - mount -t cgroup -o blkio none /sys/fs/cgroup/blkio - -- Create two cgroups - mkdir -p /sys/fs/cgroup/blkio/test1/ /sys/fs/cgroup/blkio/test2 - -- Set weights of group test1 and test2 - echo 1000 > /sys/fs/cgroup/blkio/test1/blkio.weight - echo 500 > /sys/fs/cgroup/blkio/test2/blkio.weight - -- Create two same size files (say 512MB each) on same disk (file1, file2) and - launch two dd threads in different cgroup to read those files. - - sync - echo 3 > /proc/sys/vm/drop_caches - - dd if=/mnt/sdb/zerofile1 of=/dev/null & - echo $! > /sys/fs/cgroup/blkio/test1/tasks - cat /sys/fs/cgroup/blkio/test1/tasks - - dd if=/mnt/sdb/zerofile2 of=/dev/null & - echo $! > /sys/fs/cgroup/blkio/test2/tasks - cat /sys/fs/cgroup/blkio/test2/tasks - -- At macro level, first dd should finish first. To get more precise data, keep - on looking at (with the help of script), at blkio.disk_time and - blkio.disk_sectors files of both test1 and test2 groups. This will tell how - much disk time (in milliseconds), each group got and how many sectors each - group dispatched to the disk. We provide fairness in terms of disk time, so - ideally io.disk_time of cgroups should be in proportion to the weight. - Throttling/Upper Limit policy ----------------------------- - Enable Block IO controller @@ -94,7 +46,7 @@ Throttling/Upper Limit policy Hierarchical Cgroups ==================== -Both CFQ and throttling implement hierarchy support; however, +Throttling implements hierarchy support; however, throttling's hierarchy support is enabled iff "sane_behavior" is enabled from cgroup side, which currently is a development option and not publicly available. @@ -107,9 +59,8 @@ If somebody created a hierarchy like as follows. | test3 -CFQ by default and throttling with "sane_behavior" will handle the -hierarchy correctly. For details on CFQ hierarchy support, refer to -Documentation/block/cfq-iosched.txt. For throttling, all limits apply +Throttling with "sane_behavior" will handle the +hierarchy correctly. For throttling, all limits apply to the whole subtree while all statistics are local to the IOs directly generated by tasks in that cgroup. @@ -130,10 +81,6 @@ CONFIG_DEBUG_BLK_CGROUP - Debug help. Right now some additional stats file show up in cgroup if this option is enabled. -CONFIG_CFQ_GROUP_IOSCHED - - Enables group scheduling in CFQ. Currently only 1 level of group - creation is allowed. - CONFIG_BLK_DEV_THROTTLING - Enable block device throttling support in block layer. @@ -344,32 +291,3 @@ Common files among various policies - blkio.reset_stats - Writing an int to this file will result in resetting all the stats for that cgroup. - -CFQ sysfs tunable -================= -/sys/block/<disk>/queue/iosched/slice_idle ------------------------------------------- -On a faster hardware CFQ can be slow, especially with sequential workload. -This happens because CFQ idles on a single queue and single queue might not -drive deeper request queue depths to keep the storage busy. In such scenarios -one can try setting slice_idle=0 and that would switch CFQ to IOPS -(IO operations per second) mode on NCQ supporting hardware. - -That means CFQ will not idle between cfq queues of a cfq group and hence be -able to driver higher queue depth and achieve better throughput. That also -means that cfq provides fairness among groups in terms of IOPS and not in -terms of disk time. - -/sys/block/<disk>/queue/iosched/group_idle ------------------------------------------- -If one disables idling on individual cfq queues and cfq service trees by -setting slice_idle=0, group_idle kicks in. That means CFQ will still idle -on the group in an attempt to provide fairness among groups. - -By default group_idle is same as slice_idle and does not do anything if -slice_idle is enabled. - -One can experience an overall throughput drop if you have created multiple -groups and put applications in that group which are not driving enough -IO to keep disk busy. In that case set group_idle=0, and CFQ will not idle -on individual groups and throughput should improve. diff --git a/block/Kconfig b/block/Kconfig index 1b220101a9cb..2466dcc3ef1d 100644 --- a/block/Kconfig +++ b/block/Kconfig @@ -73,6 +73,7 @@ config BLK_DEV_INTEGRITY config BLK_DEV_ZONED bool "Zoned block device support" + select MQ_IOSCHED_DEADLINE ---help--- Block layer zoned block device support. This option enables support for ZAC/ZBC host-managed and host-aware zoned block devices. diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c index 6aea0ebc3a73..2489ddbb21db 100644 --- a/block/blk-mq-debugfs.c +++ b/block/blk-mq-debugfs.c @@ -821,38 +821,28 @@ static const struct blk_mq_debugfs_attr blk_mq_debugfs_ctx_attrs[] = { {}, }; -static bool debugfs_create_files(struct dentry *parent, void *data, +static void debugfs_create_files(struct dentry *parent, void *data, const struct blk_mq_debugfs_attr *attr) { if (IS_ERR_OR_NULL(parent)) - return false; + return; d_inode(parent)->i_private = data; - for (; attr->name; attr++) { - if (!debugfs_create_file(attr->name, attr->mode, parent, - (void *)attr, &blk_mq_debugfs_fops)) - return false; - } - return true; + for (; attr->name; attr++) + debugfs_create_file(attr->name, attr->mode, parent, + (void *)attr, &blk_mq_debugfs_fops); } -int blk_mq_debugfs_register(struct request_queue *q) +void blk_mq_debugfs_register(struct request_queue *q) { struct blk_mq_hw_ctx *hctx; int i; - if (!blk_debugfs_root) - return -ENOENT; - q->debugfs_dir = debugfs_create_dir(kobject_name(q->kobj.parent), blk_debugfs_root); - if (!q->debugfs_dir) - return -ENOMEM; - if (!debugfs_create_files(q->debugfs_dir, q, - blk_mq_debugfs_queue_attrs)) - goto err; + debugfs_create_files(q->debugfs_dir, q, blk_mq_debugfs_queue_attrs); /* * blk_mq_init_sched() attempted to do this already, but q->debugfs_dir @@ -864,11 +854,10 @@ int blk_mq_debugfs_register(struct request_queue *q) /* Similarly, blk_mq_init_hctx() couldn't do this previously. */ queue_for_each_hw_ctx(q, hctx, i) { - if (!hctx->debugfs_dir && blk_mq_debugfs_register_hctx(q, hctx)) - goto err; - if (q->elevator && !hctx->sched_debugfs_dir && - blk_mq_debugfs_register_sched_hctx(q, hctx)) - goto err; + if (!hctx->debugfs_dir) + blk_mq_debugfs_register_hctx(q, hctx); + if (q->elevator && !hctx->sched_debugfs_dir) + blk_mq_debugfs_register_sched_hctx(q, hctx); } if (q->rq_qos) { @@ -879,12 +868,6 @@ int blk_mq_debugfs_register(struct request_queue *q) rqos = rqos->next; } } - - return 0; - -err: - blk_mq_debugfs_unregister(q); - return -ENOMEM; } void blk_mq_debugfs_unregister(struct request_queue *q) @@ -894,52 +877,32 @@ void blk_mq_debugfs_unregister(struct request_queue *q) q->debugfs_dir = NULL; } -static int blk_mq_debugfs_register_ctx(struct blk_mq_hw_ctx *hctx, - struct blk_mq_ctx *ctx) +static void blk_mq_debugfs_register_ctx(struct blk_mq_hw_ctx *hctx, + struct blk_mq_ctx *ctx) { struct dentry *ctx_dir; char name[20]; snprintf(name, sizeof(name), "cpu%u", ctx->cpu); ctx_dir = debugfs_create_dir(name, hctx->debugfs_dir); - if (!ctx_dir) - return -ENOMEM; - if (!debugfs_create_files(ctx_dir, ctx, blk_mq_debugfs_ctx_attrs)) - return -ENOMEM; - - return 0; + debugfs_create_files(ctx_dir, ctx, blk_mq_debugfs_ctx_attrs); } -int blk_mq_debugfs_register_hctx(struct request_queue *q, - struct blk_mq_hw_ctx *hctx) +void blk_mq_debugfs_register_hctx(struct request_queue *q, + struct blk_mq_hw_ctx *hctx) { struct blk_mq_ctx *ctx; char name[20]; int i; - if (!q->debugfs_dir) - return -ENOENT; - snprintf(name, sizeof(name), "hctx%u", hctx->queue_num); hctx->debugfs_dir = debugfs_create_dir(name, q->debugfs_dir); - if (!hctx->debugfs_dir) - return -ENOMEM; - - if (!debugfs_create_files(hctx->debugfs_dir, hctx, - blk_mq_debugfs_hctx_attrs)) - goto err; - - hctx_for_each_ctx(hctx, ctx, i) { - if (blk_mq_debugfs_register_ctx(hctx, ctx)) - goto err; - } - return 0; + debugfs_create_files(hctx->debugfs_dir, hctx, blk_mq_debugfs_hctx_attrs); -err: - blk_mq_debugfs_unregister_hctx(hctx); - return -ENOMEM; + hctx_for_each_ctx(hctx, ctx, i) + blk_mq_debugfs_register_ctx(hctx, ctx); } void blk_mq_debugfs_unregister_hctx(struct blk_mq_hw_ctx *hctx) @@ -949,17 +912,13 @@ void blk_mq_debugfs_unregister_hctx(struct blk_mq_hw_ctx *hctx) hctx->debugfs_dir = NULL; } -int blk_mq_debugfs_register_hctxs(struct request_queue *q) +void blk_mq_debugfs_register_hctxs(struct request_queue *q) { struct blk_mq_hw_ctx *hctx; int i; - queue_for_each_hw_ctx(q, hctx, i) { - if (blk_mq_debugfs_register_hctx(q, hctx)) - return -ENOMEM; - } - - return 0; + queue_for_each_hw_ctx(q, hctx, i) + blk_mq_debugfs_register_hctx(q, hctx); } void blk_mq_debugfs_unregister_hctxs(struct request_queue *q) @@ -971,29 +930,16 @@ void blk_mq_debugfs_unregister_hctxs(struct request_queue *q) blk_mq_debugfs_unregister_hctx(hctx); } -int blk_mq_debugfs_register_sched(struct request_queue *q) +void blk_mq_debugfs_register_sched(struct request_queue *q) { struct elevator_type *e = q->elevator->type; - if (!q->debugfs_dir) - return -ENOENT; - if (!e->queue_debugfs_attrs) - return 0; + return; q->sched_debugfs_dir = debugfs_create_dir("sched", q->debugfs_dir); - if (!q->sched_debugfs_dir) - return -ENOMEM; - if (!debugfs_create_files(q->sched_debugfs_dir, q, - e->queue_debugfs_attrs)) - goto err; - - return 0; - -err: - blk_mq_debugfs_unregister_sched(q); - return -ENOMEM; + debugfs_create_files(q->sched_debugfs_dir, q, e->queue_debugfs_attrs); } void blk_mq_debugfs_unregister_sched(struct request_queue *q) @@ -1008,36 +954,22 @@ void blk_mq_debugfs_unregister_rqos(struct rq_qos *rqos) rqos->debugfs_dir = NULL; } -int blk_mq_debugfs_register_rqos(struct rq_qos *rqos) +void blk_mq_debugfs_register_rqos(struct rq_qos *rqos) { struct request_queue *q = rqos->q; const char *dir_name = rq_qos_id_to_name(rqos->id); - if (!q->debugfs_dir) - return -ENOENT; - if (rqos->debugfs_dir || !rqos->ops->debugfs_attrs) - return 0; + return; - if (!q->rqos_debugfs_dir) { + if (!q->rqos_debugfs_dir) q->rqos_debugfs_dir = debugfs_create_dir("rqos", q->debugfs_dir); - if (!q->rqos_debugfs_dir) - return -ENOMEM; - } rqos->debugfs_dir = debugfs_create_dir(dir_name, rqos->q->rqos_debugfs_dir); - if (!rqos->debugfs_dir) - return -ENOMEM; - if (!debugfs_create_files(rqos->debugfs_dir, rqos, - rqos->ops->debugfs_attrs)) - goto err; - return 0; - err: - blk_mq_debugfs_unregister_rqos(rqos); - return -ENOMEM; + debugfs_create_files(rqos->debugfs_dir, rqos, rqos->ops->debugfs_attrs); } void blk_mq_debugfs_unregister_queue_rqos(struct request_queue *q) @@ -1046,27 +978,18 @@ void blk_mq_debugfs_unregister_queue_rqos(struct request_queue *q) q->rqos_debugfs_dir = NULL; } -int blk_mq_debugfs_register_sched_hctx(struct request_queue *q, - struct blk_mq_hw_ctx *hctx) +void blk_mq_debugfs_register_sched_hctx(struct request_queue *q, + struct blk_mq_hw_ctx *hctx) { struct elevator_type *e = q->elevator->type; - if (!hctx->debugfs_dir) - return -ENOENT; - if (!e->hctx_debugfs_attrs) - return 0; + return; hctx->sched_debugfs_dir = debugfs_create_dir("sched", hctx->debugfs_dir); - if (!hctx->sched_debugfs_dir) - return -ENOMEM; - - if (!debugfs_create_files(hctx->sched_debugfs_dir, hctx, - e->hctx_debugfs_attrs)) - return -ENOMEM; - - return 0; + debugfs_create_files(hctx->sched_debugfs_dir, hctx, + e->hctx_debugfs_attrs); } void blk_mq_debugfs_unregister_sched_hctx(struct blk_mq_hw_ctx *hctx) diff --git a/block/blk-mq-debugfs.h b/block/blk-mq-debugfs.h index 8c9012a578c1..a68aa6041a10 100644 --- a/block/blk-mq-debugfs.h +++ b/block/blk-mq-debugfs.h @@ -18,74 +18,68 @@ struct blk_mq_debugfs_attr { int __blk_mq_debugfs_rq_show(struct seq_file *m, struct request *rq); int blk_mq_debugfs_rq_show(struct seq_file *m, void *v); -int blk_mq_debugfs_register(struct request_queue *q); +void blk_mq_debugfs_register(struct request_queue *q); void blk_mq_debugfs_unregister(struct request_queue *q); -int blk_mq_debugfs_register_hctx(struct request_queue *q, - struct blk_mq_hw_ctx *hctx); +void blk_mq_debugfs_register_hctx(struct request_queue *q, + struct blk_mq_hw_ctx *hctx); void blk_mq_debugfs_unregister_hctx(struct blk_mq_hw_ctx *hctx); -int blk_mq_debugfs_register_hctxs(struct request_queue *q); +void blk_mq_debugfs_register_hctxs(struct request_queue *q); void blk_mq_debugfs_unregister_hctxs(struct request_queue *q); -int blk_mq_debugfs_register_sched(struct request_queue *q); +void blk_mq_debugfs_register_sched(struct request_queue *q); void blk_mq_debugfs_unregister_sched(struct request_queue *q); -int blk_mq_debugfs_register_sched_hctx(struct request_queue *q, +void blk_mq_debugfs_register_sched_hctx(struct request_queue *q, struct blk_mq_hw_ctx *hctx); void blk_mq_debugfs_unregister_sched_hctx(struct blk_mq_hw_ctx *hctx); -int blk_mq_debugfs_register_rqos(struct rq_qos *rqos); +void blk_mq_debugfs_register_rqos(struct rq_qos *rqos); void blk_mq_debugfs_unregister_rqos(struct rq_qos *rqos); void blk_mq_debugfs_unregister_queue_rqos(struct request_queue *q); #else -static inline int blk_mq_debugfs_register(struct request_queue *q) +static inline void blk_mq_debugfs_register(struct request_queue *q) { - return 0; } static inline void blk_mq_debugfs_unregister(struct request_queue *q) { } -static inline int blk_mq_debugfs_register_hctx(struct request_queue *q, - struct blk_mq_hw_ctx *hctx) +static inline void blk_mq_debugfs_register_hctx(struct request_queue *q, + struct blk_mq_hw_ctx *hctx) { - return 0; } static inline void blk_mq_debugfs_unregister_hctx(struct blk_mq_hw_ctx *hctx) { } -static inline int blk_mq_debugfs_register_hctxs(struct request_queue *q) +static inline void blk_mq_debugfs_register_hctxs(struct request_queue *q) { - return 0; } static inline void blk_mq_debugfs_unregister_hctxs(struct request_queue *q) { } -static inline int blk_mq_debugfs_register_sched(struct request_queue *q) +static inline void blk_mq_debugfs_register_sched(struct request_queue *q) { - return 0; } static inline void blk_mq_debugfs_unregister_sched(struct request_queue *q) { } -static inline int blk_mq_debugfs_register_sched_hctx(struct request_queue *q, - struct blk_mq_hw_ctx *hctx) +static inline void blk_mq_debugfs_register_sched_hctx(struct request_queue *q, + struct blk_mq_hw_ctx *hctx) { - return 0; } static inline void blk_mq_debugfs_unregister_sched_hctx(struct blk_mq_hw_ctx *hctx) { } -static inline int blk_mq_debugfs_register_rqos(struct rq_qos *rqos) +static inline void blk_mq_debugfs_register_rqos(struct rq_qos *rqos) { - return 0; } static inline void blk_mq_debugfs_unregister_rqos(struct rq_qos *rqos) diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c index 500cb04901cc..2766066a15db 100644 --- a/block/blk-mq-sched.c +++ b/block/blk-mq-sched.c @@ -555,7 +555,6 @@ void blk_mq_sched_free_requests(struct request_queue *q) int i; lockdep_assert_held(&q->sysfs_lock); - WARN_ON(!q->elevator); queue_for_each_hw_ctx(q, hctx, i) { if (hctx->sched_tags) diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index aaa57e0c809d..4a2dff303865 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -4460,9 +4460,12 @@ static const struct ata_blacklist_entry ata_device_blacklist [] = { { "ST3320[68]13AS", "SD1[5-9]", ATA_HORKAGE_NONCQ | ATA_HORKAGE_FIRMWARE_WARN }, - /* drives which fail FPDMA_AA activation (some may freeze afterwards) */ - { "ST1000LM024 HN-M101MBB", "2AR10001", ATA_HORKAGE_BROKEN_FPDMA_AA }, - { "ST1000LM024 HN-M101MBB", "2BA30001", ATA_HORKAGE_BROKEN_FPDMA_AA }, + /* drives which fail FPDMA_AA activation (some may freeze afterwards) + the ST disks also have LPM issues */ + { "ST1000LM024 HN-M101MBB", "2AR10001", ATA_HORKAGE_BROKEN_FPDMA_AA | + ATA_HORKAGE_NOLPM, }, + { "ST1000LM024 HN-M101MBB", "2BA30001", ATA_HORKAGE_BROKEN_FPDMA_AA | + ATA_HORKAGE_NOLPM, }, { "VB0250EAVER", "HPG7", ATA_HORKAGE_BROKEN_FPDMA_AA }, /* Blacklist entries taken from Silicon Image 3124/3132 diff --git a/drivers/block/null_blk_zoned.c b/drivers/block/null_blk_zoned.c index 5d1c261a2cfd..fca0c97ff1aa 100644 --- a/drivers/block/null_blk_zoned.c +++ b/drivers/block/null_blk_zoned.c @@ -74,10 +74,6 @@ int null_zone_report(struct gendisk *disk, sector_t sector, struct nullb_device *dev = nullb->dev; unsigned int zno, nrz = 0; - if (!dev->zoned) - /* Not a zoned null device */ - return -EOPNOTSUPP; - zno = null_zone_no(dev, sector); if (zno < dev->nr_zones) { nrz = min_t(unsigned int, *nr_zones, dev->nr_zones - zno); diff --git a/drivers/block/ps3vram.c b/drivers/block/ps3vram.c index 4c7f51b1eda9..4628e1a27a2b 100644 --- a/drivers/block/ps3vram.c +++ b/drivers/block/ps3vram.c @@ -767,7 +767,7 @@ static int ps3vram_probe(struct ps3_system_bus_device *dev) strlcpy(gendisk->disk_name, DEVICE_NAME, sizeof(gendisk->disk_name)); set_capacity(gendisk, priv->size >> 9); - dev_info(&dev->core, "%s: Using %lu MiB of GPU memory\n", + dev_info(&dev->core, "%s: Using %llu MiB of GPU memory\n", gendisk->disk_name, get_capacity(gendisk) >> 11); device_add_disk(&dev->core, gendisk, NULL); diff --git a/drivers/md/bcache/bset.c b/drivers/md/bcache/bset.c index 8f07fa6e1739..268f1b685084 100644 --- a/drivers/md/bcache/bset.c +++ b/drivers/md/bcache/bset.c @@ -887,12 +887,22 @@ unsigned int bch_btree_insert_key(struct btree_keys *b, struct bkey *k, struct bset *i = bset_tree_last(b)->data; struct bkey *m, *prev = NULL; struct btree_iter iter; + struct bkey preceding_key_on_stack = ZERO_KEY; + struct bkey *preceding_key_p = &preceding_key_on_stack; BUG_ON(b->ops->is_extents && !KEY_SIZE(k)); - m = bch_btree_iter_init(b, &iter, b->ops->is_extents - ? PRECEDING_KEY(&START_KEY(k)) - : PRECEDING_KEY(k)); + /* + * If k has preceding key, preceding_key_p will be set to address + * of k's preceding key; otherwise preceding_key_p will be set + * to NULL inside preceding_key(). + */ + if (b->ops->is_extents) + preceding_key(&START_KEY(k), &preceding_key_p); + else + preceding_key(k, &preceding_key_p); + + m = bch_btree_iter_init(b, &iter, preceding_key_p); if (b->ops->insert_fixup(b, k, &iter, replace_key)) return status; diff --git a/drivers/md/bcache/bset.h b/drivers/md/bcache/bset.h index bac76aabca6d..c71365e7c1fa 100644 --- a/drivers/md/bcache/bset.h +++ b/drivers/md/bcache/bset.h @@ -434,20 +434,26 @@ static inline bool bch_cut_back(const struct bkey *where, struct bkey *k) return __bch_cut_back(where, k); } -#define PRECEDING_KEY(_k) \ -({ \ - struct bkey *_ret = NULL; \ - \ - if (KEY_INODE(_k) || KEY_OFFSET(_k)) { \ - _ret = &KEY(KEY_INODE(_k), KEY_OFFSET(_k), 0); \ - \ - if (!_ret->low) \ - _ret->high--; \ - _ret->low--; \ - } \ - \ - _ret; \ -}) +/* + * Pointer '*preceding_key_p' points to a memory object to store preceding + * key of k. If the preceding key does not exist, set '*preceding_key_p' to + * NULL. So the caller of preceding_key() needs to take care of memory + * which '*preceding_key_p' pointed to before calling preceding_key(). + * Currently the only caller of preceding_key() is bch_btree_insert_key(), + * and it points to an on-stack variable, so the memory release is handled + * by stackframe itself. + */ +static inline void preceding_key(struct bkey *k, struct bkey **preceding_key_p) +{ + if (KEY_INODE(k) || KEY_OFFSET(k)) { + (**preceding_key_p) = KEY(KEY_INODE(k), KEY_OFFSET(k), 0); + if (!(*preceding_key_p)->low) + (*preceding_key_p)->high--; + (*preceding_key_p)->low--; + } else { + (*preceding_key_p) = NULL; + } +} static inline bool bch_ptr_invalid(struct btree_keys *b, const struct bkey *k) { diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c index 6cd44d3cf906..bfb437ffb13c 100644 --- a/drivers/md/bcache/sysfs.c +++ b/drivers/md/bcache/sysfs.c @@ -431,8 +431,13 @@ STORE(bch_cached_dev) bch_writeback_queue(dc); } + /* + * Only set BCACHE_DEV_WB_RUNNING when cached device attached to + * a cache set, otherwise it doesn't make sense. + */ if (attr == &sysfs_writeback_percent) - if (!test_and_set_bit(BCACHE_DEV_WB_RUNNING, &dc->disk.flags)) + if ((dc->disk.c != NULL) && + (!test_and_set_bit(BCACHE_DEV_WB_RUNNING, &dc->disk.flags))) schedule_delayed_work(&dc->writeback_rate_update, dc->writeback_rate_update_seconds * HZ); diff --git a/fs/io_uring.c b/fs/io_uring.c index 0fbb486a320e..86a2bd721900 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -2777,8 +2777,10 @@ static void io_ring_ctx_free(struct io_ring_ctx *ctx) io_eventfd_unregister(ctx); #if defined(CONFIG_UNIX) - if (ctx->ring_sock) + if (ctx->ring_sock) { + ctx->ring_sock->file = NULL; /* so that iput() is called */ sock_release(ctx->ring_sock); + } #endif io_mem_free(ctx->sq_ring); |