diff options
author | Jens Axboe <axboe@kernel.dk> | 2020-05-09 16:13:58 -0600 |
---|---|---|
committer | Jens Axboe <axboe@kernel.dk> | 2020-05-09 16:13:58 -0600 |
commit | 873f1c8df7e0dac2156a722f3c7848a30693a242 (patch) | |
tree | 629e95d72af5be4cfd843e2c369b59c3de5baae6 | |
parent | 8b075e5ba459c7afdd7b2fde14cbc01c51e25eac (diff) | |
parent | 59c7c3caaaf8750df4ec3255082f15eb4e371514 (diff) |
Merge branch 'block-5.7' into for-5.8/block
Pull in block-5.7 fixes for 5.8. Mostly to resolve a conflict with
the blk-iocost changes, but we also need the base of the bdi
use-after-free as well as we build on top of it.
* block-5.7:
nvme: fix possible hang when ns scanning fails during error recovery
nvme-pci: fix "slimmer CQ head update"
bdi: add a ->dev_name field to struct backing_dev_info
bdi: use bdi_dev_name() to get device name
bdi: move bdi_dev_name out of line
vboxsf: don't use the source name in the bdi name
iocost: protect iocg->abs_vdebt with iocg->waitq.lock
block: remove the bd_openers checks in blk_drop_partitions
nvme: prevent double free in nvme_alloc_ns() error handling
null_blk: Cleanup zoned device initialization
null_blk: Fix zoned command handling
block: remove unused header
blk-iocost: Fix error on iocost_ioc_vrate_adj
bdev: Reduce time holding bd_mutex in sync in blkdev_close()
buffer: remove useless comment and WB_REASON_FREE_MORE_MEM, reason.
Signed-off-by: Jens Axboe <axboe@kernel.dk>
-rw-r--r-- | block/bfq-iosched.c | 6 | ||||
-rw-r--r-- | block/blk-cgroup.c | 2 | ||||
-rw-r--r-- | block/blk-iocost.c | 121 | ||||
-rw-r--r-- | block/partitions/core.c | 2 | ||||
-rw-r--r-- | drivers/block/null_blk.h | 29 | ||||
-rw-r--r-- | drivers/block/null_blk_main.c | 62 | ||||
-rw-r--r-- | drivers/block/null_blk_zoned.c | 45 | ||||
-rw-r--r-- | drivers/nvme/host/core.c | 4 | ||||
-rw-r--r-- | drivers/nvme/host/pci.c | 6 | ||||
-rw-r--r-- | fs/block_dev.c | 11 | ||||
-rw-r--r-- | fs/buffer.c | 2 | ||||
-rw-r--r-- | fs/ceph/debugfs.c | 2 | ||||
-rw-r--r-- | fs/vboxsf/super.c | 2 | ||||
-rw-r--r-- | include/linux/backing-dev-defs.h | 2 | ||||
-rw-r--r-- | include/linux/backing-dev.h | 9 | ||||
-rw-r--r-- | include/trace/events/iocost.h | 6 | ||||
-rw-r--r-- | include/trace/events/wbt.h | 8 | ||||
-rw-r--r-- | include/trace/events/writeback.h | 1 | ||||
-rw-r--r-- | mm/backing-dev.c | 13 | ||||
-rw-r--r-- | tools/cgroup/iocost_monitor.py | 7 |
20 files changed, 209 insertions, 131 deletions
diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c index 78ba57efd16b..3d411716d7ee 100644 --- a/block/bfq-iosched.c +++ b/block/bfq-iosched.c @@ -123,6 +123,7 @@ #include <linux/ioprio.h> #include <linux/sbitmap.h> #include <linux/delay.h> +#include <linux/backing-dev.h> #include "blk.h" #include "blk-mq.h" @@ -4976,8 +4977,9 @@ bfq_set_next_ioprio_data(struct bfq_queue *bfqq, struct bfq_io_cq *bic) ioprio_class = IOPRIO_PRIO_CLASS(bic->ioprio); switch (ioprio_class) { default: - dev_err(bfqq->bfqd->queue->backing_dev_info->dev, - "bfq: bad prio class %d\n", ioprio_class); + pr_err("bdi %s: bfq: bad prio class %d\n", + bdi_dev_name(bfqq->bfqd->queue->backing_dev_info), + ioprio_class); /* fall through */ case IOPRIO_CLASS_NONE: /* diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index 0a63c6cbbcb1..0ecc897b225c 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -496,7 +496,7 @@ const char *blkg_dev_name(struct blkcg_gq *blkg) { /* some drivers (floppy) instantiate a queue w/o disk registered */ if (blkg->q->backing_dev_info->dev) - return dev_name(blkg->q->backing_dev_info->dev); + return bdi_dev_name(blkg->q->backing_dev_info); return NULL; } diff --git a/block/blk-iocost.c b/block/blk-iocost.c index 9a667dd75eef..12eefb3d113f 100644 --- a/block/blk-iocost.c +++ b/block/blk-iocost.c @@ -467,7 +467,7 @@ struct ioc_gq { */ atomic64_t vtime; atomic64_t done_vtime; - atomic64_t abs_vdebt; + u64 abs_vdebt; u64 last_vtime; /* @@ -1143,7 +1143,7 @@ static void iocg_kick_waitq(struct ioc_gq *iocg, struct ioc_now *now) struct iocg_wake_ctx ctx = { .iocg = iocg }; u64 margin_ns = (u64)(ioc->period_us * WAITQ_TIMER_MARGIN_PCT / 100) * NSEC_PER_USEC; - u64 abs_vdebt, vdebt, vshortage, expires, oexpires; + u64 vdebt, vshortage, expires, oexpires; s64 vbudget; u32 hw_inuse; @@ -1153,18 +1153,15 @@ static void iocg_kick_waitq(struct ioc_gq *iocg, struct ioc_now *now) vbudget = now->vnow - atomic64_read(&iocg->vtime); /* pay off debt */ - abs_vdebt = atomic64_read(&iocg->abs_vdebt); - vdebt = abs_cost_to_cost(abs_vdebt, hw_inuse); + vdebt = abs_cost_to_cost(iocg->abs_vdebt, hw_inuse); if (vdebt && vbudget > 0) { u64 delta = min_t(u64, vbudget, vdebt); u64 abs_delta = min(cost_to_abs_cost(delta, hw_inuse), - abs_vdebt); + iocg->abs_vdebt); atomic64_add(delta, &iocg->vtime); atomic64_add(delta, &iocg->done_vtime); - atomic64_sub(abs_delta, &iocg->abs_vdebt); - if (WARN_ON_ONCE(atomic64_read(&iocg->abs_vdebt) < 0)) - atomic64_set(&iocg->abs_vdebt, 0); + iocg->abs_vdebt -= abs_delta; } /* @@ -1220,12 +1217,18 @@ static bool iocg_kick_delay(struct ioc_gq *iocg, struct ioc_now *now) u64 delta_ns, expires, oexpires; u32 hw_inuse; + lockdep_assert_held(&iocg->waitq.lock); + /* debt-adjust vtime */ current_hweight(iocg, NULL, &hw_inuse); - vtime += abs_cost_to_cost(atomic64_read(&iocg->abs_vdebt), hw_inuse); + vtime += abs_cost_to_cost(iocg->abs_vdebt, hw_inuse); - /* clear or maintain depending on the overage */ - if (time_before_eq64(vtime, now->vnow)) { + /* + * Clear or maintain depending on the overage. Non-zero vdebt is what + * guarantees that @iocg is online and future iocg_kick_delay() will + * clear use_delay. Don't leave it on when there's no vdebt. + */ + if (!iocg->abs_vdebt || time_before_eq64(vtime, now->vnow)) { blkcg_clear_delay(blkg); return false; } @@ -1254,9 +1257,12 @@ static enum hrtimer_restart iocg_delay_timer_fn(struct hrtimer *timer) { struct ioc_gq *iocg = container_of(timer, struct ioc_gq, delay_timer); struct ioc_now now; + unsigned long flags; + spin_lock_irqsave(&iocg->waitq.lock, flags); ioc_now(iocg->ioc, &now); iocg_kick_delay(iocg, &now); + spin_unlock_irqrestore(&iocg->waitq.lock, flags); return HRTIMER_NORESTART; } @@ -1364,14 +1370,13 @@ static void ioc_timer_fn(struct timer_list *timer) * should have woken up in the last period and expire idle iocgs. */ list_for_each_entry_safe(iocg, tiocg, &ioc->active_iocgs, active_list) { - if (!waitqueue_active(&iocg->waitq) && - !atomic64_read(&iocg->abs_vdebt) && !iocg_is_idle(iocg)) + if (!waitqueue_active(&iocg->waitq) && iocg->abs_vdebt && + !iocg_is_idle(iocg)) continue; spin_lock(&iocg->waitq.lock); - if (waitqueue_active(&iocg->waitq) || - atomic64_read(&iocg->abs_vdebt)) { + if (waitqueue_active(&iocg->waitq) || iocg->abs_vdebt) { /* might be oversleeping vtime / hweight changes, kick */ iocg_kick_waitq(iocg, &now); iocg_kick_delay(iocg, &now); @@ -1587,7 +1592,7 @@ skip_surplus_transfers: vrate_min, vrate_max); } - trace_iocost_ioc_vrate_adj(ioc, vrate, &missed_ppm, rq_wait_pct, + trace_iocost_ioc_vrate_adj(ioc, vrate, missed_ppm, rq_wait_pct, nr_lagging, nr_shortages, nr_surpluses); @@ -1596,7 +1601,7 @@ skip_surplus_transfers: ioc->period_us * vrate * INUSE_MARGIN_PCT, 100); } else if (ioc->busy_level != prev_busy_level || nr_lagging) { trace_iocost_ioc_vrate_adj(ioc, atomic64_read(&ioc->vtime_rate), - &missed_ppm, rq_wait_pct, nr_lagging, + missed_ppm, rq_wait_pct, nr_lagging, nr_shortages, nr_surpluses); } @@ -1739,28 +1744,49 @@ static void ioc_rqos_throttle(struct rq_qos *rqos, struct bio *bio) * tests are racy but the races aren't systemic - we only miss once * in a while which is fine. */ - if (!waitqueue_active(&iocg->waitq) && - !atomic64_read(&iocg->abs_vdebt) && + if (!waitqueue_active(&iocg->waitq) && !iocg->abs_vdebt && time_before_eq64(vtime + cost, now.vnow)) { iocg_commit_bio(iocg, bio, cost); return; } /* - * We're over budget. If @bio has to be issued regardless, - * remember the abs_cost instead of advancing vtime. - * iocg_kick_waitq() will pay off the debt before waking more IOs. + * We activated above but w/o any synchronization. Deactivation is + * synchronized with waitq.lock and we won't get deactivated as long + * as we're waiting or has debt, so we're good if we're activated + * here. In the unlikely case that we aren't, just issue the IO. + */ + spin_lock_irq(&iocg->waitq.lock); + + if (unlikely(list_empty(&iocg->active_list))) { + spin_unlock_irq(&iocg->waitq.lock); + iocg_commit_bio(iocg, bio, cost); + return; + } + + /* + * We're over budget. If @bio has to be issued regardless, remember + * the abs_cost instead of advancing vtime. iocg_kick_waitq() will pay + * off the debt before waking more IOs. + * * This way, the debt is continuously paid off each period with the - * actual budget available to the cgroup. If we just wound vtime, - * we would incorrectly use the current hw_inuse for the entire - * amount which, for example, can lead to the cgroup staying - * blocked for a long time even with substantially raised hw_inuse. + * actual budget available to the cgroup. If we just wound vtime, we + * would incorrectly use the current hw_inuse for the entire amount + * which, for example, can lead to the cgroup staying blocked for a + * long time even with substantially raised hw_inuse. + * + * An iocg with vdebt should stay online so that the timer can keep + * deducting its vdebt and [de]activate use_delay mechanism + * accordingly. We don't want to race against the timer trying to + * clear them and leave @iocg inactive w/ dangling use_delay heavily + * penalizing the cgroup and its descendants. */ if (bio_issue_as_root_blkg(bio) || fatal_signal_pending(current)) { - atomic64_add(abs_cost, &iocg->abs_vdebt); + iocg->abs_vdebt += abs_cost; if (iocg_kick_delay(iocg, &now)) blkcg_schedule_throttle(rqos->q, (bio->bi_opf & REQ_SWAP) == REQ_SWAP); + spin_unlock_irq(&iocg->waitq.lock); return; } @@ -1777,20 +1803,6 @@ static void ioc_rqos_throttle(struct rq_qos *rqos, struct bio *bio) * All waiters are on iocg->waitq and the wait states are * synchronized using waitq.lock. */ - spin_lock_irq(&iocg->waitq.lock); - - /* - * We activated above but w/o any synchronization. Deactivation is - * synchronized with waitq.lock and we won't get deactivated as - * long as we're waiting, so we're good if we're activated here. - * In the unlikely case that we are deactivated, just issue the IO. - */ - if (unlikely(list_empty(&iocg->active_list))) { - spin_unlock_irq(&iocg->waitq.lock); - iocg_commit_bio(iocg, bio, cost); - return; - } - init_waitqueue_func_entry(&wait.wait, iocg_wake_fn); wait.wait.private = current; wait.bio = bio; @@ -1822,6 +1834,7 @@ static void ioc_rqos_merge(struct rq_qos *rqos, struct request *rq, struct ioc_now now; u32 hw_inuse; u64 abs_cost, cost; + unsigned long flags; /* bypass if disabled or for root cgroup */ if (!ioc->enabled || !iocg->level) @@ -1841,15 +1854,28 @@ static void ioc_rqos_merge(struct rq_qos *rqos, struct request *rq, iocg->cursor = bio_end; /* - * Charge if there's enough vtime budget and the existing request - * has cost assigned. Otherwise, account it as debt. See debt - * handling in ioc_rqos_throttle() for details. + * Charge if there's enough vtime budget and the existing request has + * cost assigned. */ if (rq->bio && rq->bio->bi_iocost_cost && - time_before_eq64(atomic64_read(&iocg->vtime) + cost, now.vnow)) + time_before_eq64(atomic64_read(&iocg->vtime) + cost, now.vnow)) { iocg_commit_bio(iocg, bio, cost); - else - atomic64_add(abs_cost, &iocg->abs_vdebt); + return; + } + + /* + * Otherwise, account it as debt if @iocg is online, which it should + * be for the vast majority of cases. See debt handling in + * ioc_rqos_throttle() for details. + */ + spin_lock_irqsave(&iocg->waitq.lock, flags); + if (likely(!list_empty(&iocg->active_list))) { + iocg->abs_vdebt += abs_cost; + iocg_kick_delay(iocg, &now); + } else { + iocg_commit_bio(iocg, bio, cost); + } + spin_unlock_irqrestore(&iocg->waitq.lock, flags); } static void ioc_rqos_done_bio(struct rq_qos *rqos, struct bio *bio) @@ -2021,7 +2047,6 @@ static void ioc_pd_init(struct blkg_policy_data *pd) iocg->ioc = ioc; atomic64_set(&iocg->vtime, now.vnow); atomic64_set(&iocg->done_vtime, now.vnow); - atomic64_set(&iocg->abs_vdebt, 0); atomic64_set(&iocg->active_period, atomic64_read(&ioc->cur_period)); INIT_LIST_HEAD(&iocg->active_list); iocg->hweight_active = HWEIGHT_WHOLE; diff --git a/block/partitions/core.c b/block/partitions/core.c index c085bf85509b..873999e2e2f2 100644 --- a/block/partitions/core.c +++ b/block/partitions/core.c @@ -610,7 +610,7 @@ int blk_drop_partitions(struct block_device *bdev) if (!disk_part_scan_enabled(bdev->bd_disk)) return 0; - if (bdev->bd_part_count || bdev->bd_openers > 1) + if (bdev->bd_part_count) return -EBUSY; sync_blockdev(bdev); diff --git a/drivers/block/null_blk.h b/drivers/block/null_blk.h index 62b660821dbc..81b311c9d781 100644 --- a/drivers/block/null_blk.h +++ b/drivers/block/null_blk.h @@ -85,26 +85,35 @@ struct nullb { char disk_name[DISK_NAME_LEN]; }; +blk_status_t null_process_cmd(struct nullb_cmd *cmd, + enum req_opf op, sector_t sector, + unsigned int nr_sectors); + #ifdef CONFIG_BLK_DEV_ZONED -int null_zone_init(struct nullb_device *dev); -void null_zone_exit(struct nullb_device *dev); +int null_init_zoned_dev(struct nullb_device *dev, struct request_queue *q); +int null_register_zoned_dev(struct nullb *nullb); +void null_free_zoned_dev(struct nullb_device *dev); int null_report_zones(struct gendisk *disk, sector_t sector, unsigned int nr_zones, report_zones_cb cb, void *data); -blk_status_t null_handle_zoned(struct nullb_cmd *cmd, - enum req_opf op, sector_t sector, - sector_t nr_sectors); +blk_status_t null_process_zoned_cmd(struct nullb_cmd *cmd, + enum req_opf op, sector_t sector, + sector_t nr_sectors); size_t null_zone_valid_read_len(struct nullb *nullb, sector_t sector, unsigned int len); #else -static inline int null_zone_init(struct nullb_device *dev) +static inline int null_init_zoned_dev(struct nullb_device *dev, + struct request_queue *q) { pr_err("CONFIG_BLK_DEV_ZONED not enabled\n"); return -EINVAL; } -static inline void null_zone_exit(struct nullb_device *dev) {} -static inline blk_status_t null_handle_zoned(struct nullb_cmd *cmd, - enum req_opf op, sector_t sector, - sector_t nr_sectors) +static inline int null_register_zoned_dev(struct nullb *nullb) +{ + return -ENODEV; +} +static inline void null_free_zoned_dev(struct nullb_device *dev) {} +static inline blk_status_t null_process_zoned_cmd(struct nullb_cmd *cmd, + enum req_opf op, sector_t sector, sector_t nr_sectors) { return BLK_STS_NOTSUPP; } diff --git a/drivers/block/null_blk_main.c b/drivers/block/null_blk_main.c index 4e1c0712278e..8efd8778e209 100644 --- a/drivers/block/null_blk_main.c +++ b/drivers/block/null_blk_main.c @@ -580,7 +580,7 @@ static void null_free_dev(struct nullb_device *dev) if (!dev) return; - null_zone_exit(dev); + null_free_zoned_dev(dev); badblocks_exit(&dev->badblocks); kfree(dev); } @@ -1276,6 +1276,25 @@ static inline void nullb_complete_cmd(struct nullb_cmd *cmd) } } +blk_status_t null_process_cmd(struct nullb_cmd *cmd, + enum req_opf op, sector_t sector, + unsigned int nr_sectors) +{ + struct nullb_device *dev = cmd->nq->dev; + blk_status_t ret; + + if (dev->badblocks.shift != -1) { + ret = null_handle_badblocks(cmd, sector, nr_sectors); + if (ret != BLK_STS_OK) + return ret; + } + + if (dev->memory_backed) + return null_handle_memory_backed(cmd, op); + + return BLK_STS_OK; +} + static blk_status_t null_handle_cmd(struct nullb_cmd *cmd, sector_t sector, sector_t nr_sectors, enum req_opf op) { @@ -1294,17 +1313,11 @@ static blk_status_t null_handle_cmd(struct nullb_cmd *cmd, sector_t sector, goto out; } - if (nullb->dev->badblocks.shift != -1) { - cmd->error = null_handle_badblocks(cmd, sector, nr_sectors); - if (cmd->error != BLK_STS_OK) - goto out; - } - - if (dev->memory_backed) - cmd->error = null_handle_memory_backed(cmd, op); - - if (!cmd->error && dev->zoned) - cmd->error = null_handle_zoned(cmd, op, sector, nr_sectors); + if (dev->zoned) + cmd->error = null_process_zoned_cmd(cmd, op, + sector, nr_sectors); + else + cmd->error = null_process_cmd(cmd, op, sector, nr_sectors); out: nullb_complete_cmd(cmd); @@ -1605,19 +1618,12 @@ static int null_gendisk_register(struct nullb *nullb) disk->queue = nullb->q; strncpy(disk->disk_name, nullb->disk_name, DISK_NAME_LEN); -#ifdef CONFIG_BLK_DEV_ZONED if (nullb->dev->zoned) { - if (queue_is_mq(nullb->q)) { - int ret = blk_revalidate_disk_zones(disk); - if (ret) - return ret; - } else { - blk_queue_chunk_sectors(nullb->q, - nullb->dev->zone_size_sects); - nullb->q->nr_zones = blkdev_nr_zones(disk); - } + int ret = null_register_zoned_dev(nullb); + + if (ret) + return ret; } -#endif add_disk(disk); return 0; @@ -1773,14 +1779,9 @@ static int null_add_dev(struct nullb_device *dev) } if (dev->zoned) { - rv = null_zone_init(dev); + rv = null_init_zoned_dev(dev, nullb->q); if (rv) goto out_cleanup_blk_queue; - - nullb->q->limits.zoned = BLK_ZONED_HM; - blk_queue_flag_set(QUEUE_FLAG_ZONE_RESETALL, nullb->q); - blk_queue_required_elevator_features(nullb->q, - ELEVATOR_F_ZBD_SEQ_WRITE); } nullb->q->queuedata = nullb; @@ -1809,8 +1810,7 @@ static int null_add_dev(struct nullb_device *dev) return 0; out_cleanup_zone: - if (dev->zoned) - null_zone_exit(dev); + null_free_zoned_dev(dev); out_cleanup_blk_queue: blk_cleanup_queue(nullb->q); out_cleanup_tags: diff --git a/drivers/block/null_blk_zoned.c b/drivers/block/null_blk_zoned.c index 673618d8222a..9e4bcdad1a80 100644 --- a/drivers/block/null_blk_zoned.c +++ b/drivers/block/null_blk_zoned.c @@ -13,7 +13,7 @@ static inline unsigned int null_zone_no(struct nullb_device *dev, sector_t sect) return sect >> ilog2(dev->zone_size_sects); } -int null_zone_init(struct nullb_device *dev) +int null_init_zoned_dev(struct nullb_device *dev, struct request_queue *q) { sector_t dev_size = (sector_t)dev->size * 1024 * 1024; sector_t sector = 0; @@ -61,10 +61,27 @@ int null_zone_init(struct nullb_device *dev) sector += dev->zone_size_sects; } + q->limits.zoned = BLK_ZONED_HM; + blk_queue_flag_set(QUEUE_FLAG_ZONE_RESETALL, q); + blk_queue_required_elevator_features(q, ELEVATOR_F_ZBD_SEQ_WRITE); + + return 0; +} + +int null_register_zoned_dev(struct nullb *nullb) +{ + struct request_queue *q = nullb->q; + + if (queue_is_mq(q)) + return blk_revalidate_disk_zones(nullb->disk); + + blk_queue_chunk_sectors(q, nullb->dev->zone_size_sects); + q->nr_zones = blkdev_nr_zones(nullb->disk); + return 0; } -void null_zone_exit(struct nullb_device *dev) +void null_free_zoned_dev(struct nullb_device *dev) { kvfree(dev->zones); } @@ -126,11 +143,16 @@ static blk_status_t null_zone_write(struct nullb_cmd *cmd, sector_t sector, struct nullb_device *dev = cmd->nq->dev; unsigned int zno = null_zone_no(dev, sector); struct blk_zone *zone = &dev->zones[zno]; + blk_status_t ret; + + trace_nullb_zone_op(cmd, zno, zone->cond); + + if (zone->type == BLK_ZONE_TYPE_CONVENTIONAL) + return null_process_cmd(cmd, REQ_OP_WRITE, sector, nr_sectors); switch (zone->cond) { case BLK_ZONE_COND_FULL: /* Cannot write to a full zone */ - cmd->error = BLK_STS_IOERR; return BLK_STS_IOERR; case BLK_ZONE_COND_EMPTY: case BLK_ZONE_COND_IMP_OPEN: @@ -143,19 +165,18 @@ static blk_status_t null_zone_write(struct nullb_cmd *cmd, sector_t sector, if (zone->cond != BLK_ZONE_COND_EXP_OPEN) zone->cond = BLK_ZONE_COND_IMP_OPEN; + ret = null_process_cmd(cmd, REQ_OP_WRITE, sector, nr_sectors); + if (ret != BLK_STS_OK) + return ret; + zone->wp += nr_sectors; if (zone->wp == zone->start + zone->len) zone->cond = BLK_ZONE_COND_FULL; - break; - case BLK_ZONE_COND_NOT_WP: - break; + return BLK_STS_OK; default: /* Invalid zone condition */ return BLK_STS_IOERR; } - - trace_nullb_zone_op(cmd, zno, zone->cond); - return BLK_STS_OK; } static blk_status_t null_zone_mgmt(struct nullb_cmd *cmd, enum req_opf op, @@ -216,8 +237,8 @@ static blk_status_t null_zone_mgmt(struct nullb_cmd *cmd, enum req_opf op, return BLK_STS_OK; } -blk_status_t null_handle_zoned(struct nullb_cmd *cmd, enum req_opf op, - sector_t sector, sector_t nr_sectors) +blk_status_t null_process_zoned_cmd(struct nullb_cmd *cmd, enum req_opf op, + sector_t sector, sector_t nr_sectors) { switch (op) { case REQ_OP_WRITE: @@ -229,6 +250,6 @@ blk_status_t null_handle_zoned(struct nullb_cmd *cmd, enum req_opf op, case REQ_OP_ZONE_FINISH: return null_zone_mgmt(cmd, op, sector); default: - return BLK_STS_OK; + return null_process_cmd(cmd, op, sector, nr_sectors); } } diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 91c1bd659947..f3c037f5a9ba 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1110,7 +1110,7 @@ static int nvme_identify_ns_descs(struct nvme_ctrl *ctrl, unsigned nsid, * Don't treat an error as fatal, as we potentially already * have a NGUID or EUI-64. */ - if (status > 0) + if (status > 0 && !(status & NVME_SC_DNR)) status = 0; goto free_data; } @@ -3642,6 +3642,8 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid) return; out_put_disk: + /* prevent double queue cleanup */ + ns->disk->queue = NULL; put_disk(ns->disk); out_unlink_ns: mutex_lock(&ctrl->subsys->lock); diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 4e79e412b276..e13c370de830 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -973,9 +973,13 @@ static inline void nvme_handle_cqe(struct nvme_queue *nvmeq, u16 idx) static inline void nvme_update_cq_head(struct nvme_queue *nvmeq) { - if (++nvmeq->cq_head == nvmeq->q_depth) { + u16 tmp = nvmeq->cq_head + 1; + + if (tmp == nvmeq->q_depth) { nvmeq->cq_head = 0; nvmeq->cq_phase ^= 1; + } else { + nvmeq->cq_head = tmp; } } diff --git a/fs/block_dev.c b/fs/block_dev.c index 5eb30a474f6d..7cbb7b79935e 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -19,7 +19,6 @@ #include <linux/module.h> #include <linux/blkpg.h> #include <linux/magic.h> -#include <linux/dax.h> #include <linux/buffer_head.h> #include <linux/swap.h> #include <linux/pagevec.h> @@ -1876,6 +1875,16 @@ static void __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part) struct gendisk *disk = bdev->bd_disk; struct block_device *victim = NULL; + /* + * Sync early if it looks like we're the last one. If someone else + * opens the block device between now and the decrement of bd_openers + * then we did a sync that we didn't need to, but that's not the end + * of the world and we want to avoid long (could be several minute) + * syncs while holding the mutex. + */ + if (bdev->bd_openers == 1) + sync_blockdev(bdev); + mutex_lock_nested(&bdev->bd_mutex, for_part); if (for_part) bdev->bd_part_count--; diff --git a/fs/buffer.c b/fs/buffer.c index 599a0bf7257b..a60f60396cfa 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -967,7 +967,7 @@ grow_dev_page(struct block_device *bdev, sector_t block, struct page *page; struct buffer_head *bh; sector_t end_block; - int ret = 0; /* Will call free_more_memory() */ + int ret = 0; gfp_t gfp_mask; gfp_mask = mapping_gfp_constraint(inode->i_mapping, ~__GFP_FS) | gfp; diff --git a/fs/ceph/debugfs.c b/fs/ceph/debugfs.c index 481ac97b4d25..dcaed75de9e6 100644 --- a/fs/ceph/debugfs.c +++ b/fs/ceph/debugfs.c @@ -271,7 +271,7 @@ void ceph_fs_debugfs_init(struct ceph_fs_client *fsc) &congestion_kb_fops); snprintf(name, sizeof(name), "../../bdi/%s", - dev_name(fsc->sb->s_bdi->dev)); + bdi_dev_name(fsc->sb->s_bdi)); fsc->debugfs_bdi = debugfs_create_symlink("bdi", fsc->client->debugfs_dir, diff --git a/fs/vboxsf/super.c b/fs/vboxsf/super.c index 675e26989376..8fe03b4a0d2b 100644 --- a/fs/vboxsf/super.c +++ b/fs/vboxsf/super.c @@ -164,7 +164,7 @@ static int vboxsf_fill_super(struct super_block *sb, struct fs_context *fc) goto fail_free; } - err = super_setup_bdi_name(sb, "vboxsf-%s.%d", fc->source, sbi->bdi_id); + err = super_setup_bdi_name(sb, "vboxsf-%d", sbi->bdi_id); if (err) goto fail_free; diff --git a/include/linux/backing-dev-defs.h b/include/linux/backing-dev-defs.h index 4fc87dee005a..7367150f962a 100644 --- a/include/linux/backing-dev-defs.h +++ b/include/linux/backing-dev-defs.h @@ -54,7 +54,6 @@ enum wb_reason { WB_REASON_SYNC, WB_REASON_PERIODIC, WB_REASON_LAPTOP_TIMER, - WB_REASON_FREE_MORE_MEM, WB_REASON_FS_FREE_SPACE, /* * There is no bdi forker thread any more and works are done @@ -220,6 +219,7 @@ struct backing_dev_info { wait_queue_head_t wb_waitq; struct device *dev; + char dev_name[64]; struct device *owner; struct timer_list laptop_mode_wb_timer; diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index f88197c1ffc2..c9ad5c3b7b4b 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -505,13 +505,6 @@ static inline int bdi_rw_congested(struct backing_dev_info *bdi) (1 << WB_async_congested)); } -extern const char *bdi_unknown_name; - -static inline const char *bdi_dev_name(struct backing_dev_info *bdi) -{ - if (!bdi || !bdi->dev) - return bdi_unknown_name; - return dev_name(bdi->dev); -} +const char *bdi_dev_name(struct backing_dev_info *bdi); #endif /* _LINUX_BACKING_DEV_H */ diff --git a/include/trace/events/iocost.h b/include/trace/events/iocost.h index 7ecaa65b7106..c2f580fd371b 100644 --- a/include/trace/events/iocost.h +++ b/include/trace/events/iocost.h @@ -130,7 +130,7 @@ DEFINE_EVENT(iocg_inuse_update, iocost_inuse_reset, TRACE_EVENT(iocost_ioc_vrate_adj, - TP_PROTO(struct ioc *ioc, u64 new_vrate, u32 (*missed_ppm)[2], + TP_PROTO(struct ioc *ioc, u64 new_vrate, u32 *missed_ppm, u32 rq_wait_pct, int nr_lagging, int nr_shortages, int nr_surpluses), @@ -155,8 +155,8 @@ TRACE_EVENT(iocost_ioc_vrate_adj, __entry->old_vrate = atomic64_read(&ioc->vtime_rate);; __entry->new_vrate = new_vrate; __entry->busy_level = ioc->busy_level; - __entry->read_missed_ppm = (*missed_ppm)[READ]; - __entry->write_missed_ppm = (*missed_ppm)[WRITE]; + __entry->read_missed_ppm = missed_ppm[READ]; + __entry->write_missed_ppm = missed_ppm[WRITE]; __entry->rq_wait_pct = rq_wait_pct; __entry->nr_lagging = nr_lagging; __entry->nr_shortages = nr_shortages; diff --git a/include/trace/events/wbt.h b/include/trace/events/wbt.h index 784814160197..9c66e59d859c 100644 --- a/include/trace/events/wbt.h +++ b/include/trace/events/wbt.h @@ -33,7 +33,7 @@ TRACE_EVENT(wbt_stat, ), TP_fast_assign( - strlcpy(__entry->name, dev_name(bdi->dev), + strlcpy(__entry->name, bdi_dev_name(bdi), ARRAY_SIZE(__entry->name)); __entry->rmean = stat[0].mean; __entry->rmin = stat[0].min; @@ -68,7 +68,7 @@ TRACE_EVENT(wbt_lat, ), TP_fast_assign( - strlcpy(__entry->name, dev_name(bdi->dev), + strlcpy(__entry->name, bdi_dev_name(bdi), ARRAY_SIZE(__entry->name)); __entry->lat = div_u64(lat, 1000); ), @@ -105,7 +105,7 @@ TRACE_EVENT(wbt_step, ), TP_fast_assign( - strlcpy(__entry->name, dev_name(bdi->dev), + strlcpy(__entry->name, bdi_dev_name(bdi), ARRAY_SIZE(__entry->name)); __entry->msg = msg; __entry->step = step; @@ -141,7 +141,7 @@ TRACE_EVENT(wbt_timer, ), TP_fast_assign( - strlcpy(__entry->name, dev_name(bdi->dev), + strlcpy(__entry->name, bdi_dev_name(bdi), ARRAY_SIZE(__entry->name)); __entry->status = status; __entry->step = step; diff --git a/include/trace/events/writeback.h b/include/trace/events/writeback.h index d94def25e4dc..85a33bea76f1 100644 --- a/include/trace/events/writeback.h +++ b/include/trace/events/writeback.h @@ -36,7 +36,6 @@ EM( WB_REASON_SYNC, "sync") \ EM( WB_REASON_PERIODIC, "periodic") \ EM( WB_REASON_LAPTOP_TIMER, "laptop_timer") \ - EM( WB_REASON_FREE_MORE_MEM, "free_more_memory") \ EM( WB_REASON_FS_FREE_SPACE, "fs_free_space") \ EMe(WB_REASON_FORKER_THREAD, "forker_thread") diff --git a/mm/backing-dev.c b/mm/backing-dev.c index c81b4f3a7268..efc5b83acd2d 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c @@ -21,7 +21,7 @@ struct backing_dev_info noop_backing_dev_info = { EXPORT_SYMBOL_GPL(noop_backing_dev_info); static struct class *bdi_class; -const char *bdi_unknown_name = "(unknown)"; +static const char *bdi_unknown_name = "(unknown)"; /* * bdi_lock protects bdi_tree and updates to bdi_list. bdi_list has RCU @@ -938,7 +938,8 @@ int bdi_register_va(struct backing_dev_info *bdi, const char *fmt, va_list args) if (bdi->dev) /* The driver needs to use separate queues per device */ return 0; - dev = device_create_vargs(bdi_class, NULL, MKDEV(0, 0), bdi, fmt, args); + vsnprintf(bdi->dev_name, sizeof(bdi->dev_name), fmt, args); + dev = device_create(bdi_class, NULL, MKDEV(0, 0), bdi, bdi->dev_name); if (IS_ERR(dev)) return PTR_ERR(dev); @@ -1043,6 +1044,14 @@ void bdi_put(struct backing_dev_info *bdi) } EXPORT_SYMBOL(bdi_put); +const char *bdi_dev_name(struct backing_dev_info *bdi) +{ + if (!bdi || !bdi->dev) + return bdi_unknown_name; + return bdi->dev_name; +} +EXPORT_SYMBOL_GPL(bdi_dev_name); + static wait_queue_head_t congestion_wqh[2] = { __WAIT_QUEUE_HEAD_INITIALIZER(congestion_wqh[0]), __WAIT_QUEUE_HEAD_INITIALIZER(congestion_wqh[1]) diff --git a/tools/cgroup/iocost_monitor.py b/tools/cgroup/iocost_monitor.py index 188b3379b9a1..3c21de88af9e 100644 --- a/tools/cgroup/iocost_monitor.py +++ b/tools/cgroup/iocost_monitor.py @@ -160,7 +160,12 @@ class IocgStat: else: self.inflight_pct = 0 - self.debt_ms = iocg.abs_vdebt.counter.value_() / VTIME_PER_USEC / 1000 + # vdebt used to be an atomic64_t and is now u64, support both + try: + self.debt_ms = iocg.abs_vdebt.counter.value_() / VTIME_PER_USEC / 1000 + except: + self.debt_ms = iocg.abs_vdebt.value_() / VTIME_PER_USEC / 1000 + self.use_delay = blkg.use_delay.counter.value_() self.delay_ms = blkg.delay_nsec.counter.value_() / 1_000_000 |