diff options
Diffstat (limited to 'fs')
53 files changed, 600 insertions, 552 deletions
diff --git a/fs/9p/v9fs.c b/fs/9p/v9fs.c index a89f3cfe3c7d..c202930086ed 100644 --- a/fs/9p/v9fs.c +++ b/fs/9p/v9fs.c @@ -333,10 +333,6 @@ struct p9_fid *v9fs_session_init(struct v9fs_session_info *v9ses, goto err_names; init_rwsem(&v9ses->rename_sem); - rc = bdi_setup_and_register(&v9ses->bdi, "9p"); - if (rc) - goto err_names; - v9ses->uid = INVALID_UID; v9ses->dfltuid = V9FS_DEFUID; v9ses->dfltgid = V9FS_DEFGID; @@ -345,7 +341,7 @@ struct p9_fid *v9fs_session_init(struct v9fs_session_info *v9ses, if (IS_ERR(v9ses->clnt)) { rc = PTR_ERR(v9ses->clnt); p9_debug(P9_DEBUG_ERROR, "problem initializing 9p client\n"); - goto err_bdi; + goto err_names; } v9ses->flags = V9FS_ACCESS_USER; @@ -415,8 +411,6 @@ struct p9_fid *v9fs_session_init(struct v9fs_session_info *v9ses, err_clnt: p9_client_destroy(v9ses->clnt); -err_bdi: - bdi_destroy(&v9ses->bdi); err_names: kfree(v9ses->uname); kfree(v9ses->aname); @@ -445,8 +439,6 @@ void v9fs_session_close(struct v9fs_session_info *v9ses) kfree(v9ses->uname); kfree(v9ses->aname); - bdi_destroy(&v9ses->bdi); - spin_lock(&v9fs_sessionlist_lock); list_del(&v9ses->slist); spin_unlock(&v9fs_sessionlist_lock); diff --git a/fs/9p/v9fs.h b/fs/9p/v9fs.h index 443d12e02043..76eaf49abd3a 100644 --- a/fs/9p/v9fs.h +++ b/fs/9p/v9fs.h @@ -114,7 +114,6 @@ struct v9fs_session_info { kuid_t uid; /* if ACCESS_SINGLE, the uid that has access */ struct p9_client *clnt; /* 9p client */ struct list_head slist; /* list of sessions registered with v9fs */ - struct backing_dev_info bdi; struct rw_semaphore rename_sem; }; diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c index de3ed8629196..a0965fb587a5 100644 --- a/fs/9p/vfs_super.c +++ b/fs/9p/vfs_super.c @@ -72,10 +72,12 @@ static int v9fs_set_super(struct super_block *s, void *data) * */ -static void +static int v9fs_fill_super(struct super_block *sb, struct v9fs_session_info *v9ses, int flags, void *data) { + int ret; + sb->s_maxbytes = MAX_LFS_FILESIZE; sb->s_blocksize_bits = fls(v9ses->maxdata - 1); sb->s_blocksize = 1 << sb->s_blocksize_bits; @@ -85,7 +87,11 @@ v9fs_fill_super(struct super_block *sb, struct v9fs_session_info *v9ses, sb->s_xattr = v9fs_xattr_handlers; } else sb->s_op = &v9fs_super_ops; - sb->s_bdi = &v9ses->bdi; + + ret = super_setup_bdi(sb); + if (ret) + return ret; + if (v9ses->cache) sb->s_bdi->ra_pages = (VM_MAX_READAHEAD * 1024)/PAGE_SIZE; @@ -99,6 +105,7 @@ v9fs_fill_super(struct super_block *sb, struct v9fs_session_info *v9ses, #endif save_mount_options(sb, data); + return 0; } /** @@ -138,7 +145,9 @@ static struct dentry *v9fs_mount(struct file_system_type *fs_type, int flags, retval = PTR_ERR(sb); goto clunk_fid; } - v9fs_fill_super(sb, v9ses, flags, data); + retval = v9fs_fill_super(sb, v9ses, flags, data); + if (retval) + goto release_sb; if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) sb->s_d_op = &v9fs_cached_dentry_operations; diff --git a/fs/afs/internal.h b/fs/afs/internal.h index a6901360fb81..393672997cc2 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -318,7 +318,6 @@ struct afs_volume { unsigned short rjservers; /* number of servers discarded due to -ENOMEDIUM */ struct afs_server *servers[8]; /* servers on which volume resides (ordered) */ struct rw_semaphore server_sem; /* lock for accessing current server */ - struct backing_dev_info bdi; }; /* diff --git a/fs/afs/super.c b/fs/afs/super.c index fbdb022b75a2..c79633e5cfd8 100644 --- a/fs/afs/super.c +++ b/fs/afs/super.c @@ -319,7 +319,10 @@ static int afs_fill_super(struct super_block *sb, sb->s_blocksize_bits = PAGE_SHIFT; sb->s_magic = AFS_FS_MAGIC; sb->s_op = &afs_super_ops; - sb->s_bdi = &as->volume->bdi; + ret = super_setup_bdi(sb); + if (ret) + return ret; + sb->s_bdi->ra_pages = VM_MAX_READAHEAD * 1024 / PAGE_SIZE; strlcpy(sb->s_id, as->volume->vlocation->vldb.name, sizeof(sb->s_id)); /* allocate the root inode and dentry */ diff --git a/fs/afs/volume.c b/fs/afs/volume.c index 546f9d01710b..db73d6dad02b 100644 --- a/fs/afs/volume.c +++ b/fs/afs/volume.c @@ -106,11 +106,6 @@ struct afs_volume *afs_volume_lookup(struct afs_mount_params *params) volume->cell = params->cell; volume->vid = vlocation->vldb.vid[params->type]; - volume->bdi.ra_pages = VM_MAX_READAHEAD*1024/PAGE_SIZE; - ret = bdi_setup_and_register(&volume->bdi, "afs"); - if (ret) - goto error_bdi; - init_rwsem(&volume->server_sem); /* look up all the applicable server records */ @@ -156,8 +151,6 @@ error: return ERR_PTR(ret); error_discard: - bdi_destroy(&volume->bdi); -error_bdi: up_write(¶ms->cell->vl_sem); for (loop = volume->nservers - 1; loop >= 0; loop--) @@ -207,7 +200,6 @@ void afs_put_volume(struct afs_volume *volume) for (loop = volume->nservers - 1; loop >= 0; loop--) afs_put_server(volume->servers[loop]); - bdi_destroy(&volume->bdi); kfree(volume); _leave(" [destroyed]"); diff --git a/fs/block_dev.c b/fs/block_dev.c index 2eca00ec4370..9ccabe3bb7de 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -885,6 +885,8 @@ static void bdev_evict_inode(struct inode *inode) spin_lock(&bdev_lock); list_del_init(&bdev->bd_list); spin_unlock(&bdev_lock); + /* Detach inode from wb early as bdi_put() may free bdi->wb */ + inode_detach_wb(inode); if (bdev->bd_bdi != &noop_backing_dev_info) { bdi_put(bdev->bd_bdi); bdev->bd_bdi = &noop_backing_dev_info; @@ -1451,7 +1453,6 @@ int revalidate_disk(struct gendisk *disk) if (disk->fops->revalidate_disk) ret = disk->fops->revalidate_disk(disk); - blk_integrity_revalidate(disk); bdev = bdget_disk(disk, 0); if (!bdev) return ret; @@ -1556,8 +1557,6 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part) bdev->bd_disk = disk; bdev->bd_queue = disk->queue; bdev->bd_contains = bdev; - if (bdev->bd_bdi == &noop_backing_dev_info) - bdev->bd_bdi = bdi_get(disk->queue->backing_dev_info); if (!partno) { ret = -ENXIO; @@ -1622,6 +1621,9 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part) } bd_set_size(bdev, (loff_t)bdev->bd_part->nr_sects << 9); } + + if (bdev->bd_bdi == &noop_backing_dev_info) + bdev->bd_bdi = bdi_get(disk->queue->backing_dev_info); } else { if (bdev->bd_contains == bdev) { ret = 0; @@ -1653,8 +1655,6 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part) bdev->bd_disk = NULL; bdev->bd_part = NULL; bdev->bd_queue = NULL; - bdi_put(bdev->bd_bdi); - bdev->bd_bdi = &noop_backing_dev_info; if (bdev != bdev->bd_contains) __blkdev_put(bdev->bd_contains, mode, 1); bdev->bd_contains = NULL; @@ -1876,12 +1876,6 @@ static void __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part) kill_bdev(bdev); bdev_write_inode(bdev); - /* - * Detaching bdev inode from its wb in __destroy_inode() - * is too late: the queue which embeds its bdi (along with - * root wb) can be gone as soon as we put_disk() below. - */ - inode_detach_wb(bdev->bd_inode); } if (bdev->bd_contains == bdev) { if (disk->fops->release) @@ -2074,7 +2068,6 @@ static long blkdev_fallocate(struct file *file, int mode, loff_t start, loff_t len) { struct block_device *bdev = I_BDEV(bdev_file_inode(file)); - struct request_queue *q = bdev_get_queue(bdev); struct address_space *mapping; loff_t end = start + len - 1; loff_t isize; @@ -2110,18 +2103,13 @@ static long blkdev_fallocate(struct file *file, int mode, loff_t start, case FALLOC_FL_ZERO_RANGE: case FALLOC_FL_ZERO_RANGE | FALLOC_FL_KEEP_SIZE: error = blkdev_issue_zeroout(bdev, start >> 9, len >> 9, - GFP_KERNEL, false); + GFP_KERNEL, BLKDEV_ZERO_NOUNMAP); break; case FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE: - /* Only punch if the device can do zeroing discard. */ - if (!blk_queue_discard(q) || !q->limits.discard_zeroes_data) - return -EOPNOTSUPP; - error = blkdev_issue_discard(bdev, start >> 9, len >> 9, - GFP_KERNEL, 0); + error = blkdev_issue_zeroout(bdev, start >> 9, len >> 9, + GFP_KERNEL, BLKDEV_ZERO_NOFALLBACK); break; case FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE | FALLOC_FL_NO_HIDE_STALE: - if (!blk_queue_discard(q)) - return -EOPNOTSUPP; error = blkdev_issue_discard(bdev, start >> 9, len >> 9, GFP_KERNEL, 0); break; diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index c4115901d906..3e21211e99c3 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -810,7 +810,6 @@ struct btrfs_fs_info { struct btrfs_super_block *super_for_commit; struct super_block *sb; struct inode *btree_inode; - struct backing_dev_info bdi; struct mutex tree_log_mutex; struct mutex transaction_kthread_mutex; struct mutex cleaner_mutex; diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index eb1ee7b6f532..061c1d1f774f 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1808,21 +1808,6 @@ static int btrfs_congested_fn(void *congested_data, int bdi_bits) return ret; } -static int setup_bdi(struct btrfs_fs_info *info, struct backing_dev_info *bdi) -{ - int err; - - err = bdi_setup_and_register(bdi, "btrfs"); - if (err) - return err; - - bdi->ra_pages = VM_MAX_READAHEAD * 1024 / PAGE_SIZE; - bdi->congested_fn = btrfs_congested_fn; - bdi->congested_data = info; - bdi->capabilities |= BDI_CAP_CGROUP_WRITEBACK; - return 0; -} - /* * called by the kthread helper functions to finally call the bio end_io * functions. This is where read checksum verification actually happens @@ -2601,16 +2586,10 @@ int open_ctree(struct super_block *sb, goto fail; } - ret = setup_bdi(fs_info, &fs_info->bdi); - if (ret) { - err = ret; - goto fail_srcu; - } - ret = percpu_counter_init(&fs_info->dirty_metadata_bytes, 0, GFP_KERNEL); if (ret) { err = ret; - goto fail_bdi; + goto fail_srcu; } fs_info->dirty_metadata_batch = PAGE_SIZE * (1 + ilog2(nr_cpu_ids)); @@ -2718,7 +2697,6 @@ int open_ctree(struct super_block *sb, sb->s_blocksize = 4096; sb->s_blocksize_bits = blksize_bits(4096); - sb->s_bdi = &fs_info->bdi; btrfs_init_btree_inode(fs_info); @@ -2915,9 +2893,12 @@ int open_ctree(struct super_block *sb, goto fail_sb_buffer; } - fs_info->bdi.ra_pages *= btrfs_super_num_devices(disk_super); - fs_info->bdi.ra_pages = max(fs_info->bdi.ra_pages, - SZ_4M / PAGE_SIZE); + sb->s_bdi->congested_fn = btrfs_congested_fn; + sb->s_bdi->congested_data = fs_info; + sb->s_bdi->capabilities |= BDI_CAP_CGROUP_WRITEBACK; + sb->s_bdi->ra_pages = VM_MAX_READAHEAD * 1024 / PAGE_SIZE; + sb->s_bdi->ra_pages *= btrfs_super_num_devices(disk_super); + sb->s_bdi->ra_pages = max(sb->s_bdi->ra_pages, SZ_4M / PAGE_SIZE); sb->s_blocksize = sectorsize; sb->s_blocksize_bits = blksize_bits(sectorsize); @@ -3285,8 +3266,6 @@ fail_delalloc_bytes: percpu_counter_destroy(&fs_info->delalloc_bytes); fail_dirty_metadata_bytes: percpu_counter_destroy(&fs_info->dirty_metadata_bytes); -fail_bdi: - bdi_destroy(&fs_info->bdi); fail_srcu: cleanup_srcu_struct(&fs_info->subvol_srcu); fail: @@ -4007,7 +3986,6 @@ void close_ctree(struct btrfs_fs_info *fs_info) percpu_counter_destroy(&fs_info->dirty_metadata_bytes); percpu_counter_destroy(&fs_info->delalloc_bytes); percpu_counter_destroy(&fs_info->bio_counter); - bdi_destroy(&fs_info->bdi); cleanup_srcu_struct(&fs_info->subvol_srcu); btrfs_free_stripe_hash_table(fs_info); diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index a59801dc2a34..afbea61d957e 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -1042,9 +1042,12 @@ static void report_reserved_underflow(struct btrfs_fs_info *fs_info, struct btrfs_qgroup *qgroup, u64 num_bytes) { - btrfs_warn(fs_info, +#ifdef CONFIG_BTRFS_DEBUG + WARN_ON(qgroup->reserved < num_bytes); + btrfs_debug(fs_info, "qgroup %llu reserved space underflow, have: %llu, to free: %llu", qgroup->qgroupid, qgroup->reserved, num_bytes); +#endif qgroup->reserved = 0; } /* @@ -1075,7 +1078,7 @@ static int __qgroup_excl_accounting(struct btrfs_fs_info *fs_info, qgroup->excl += sign * num_bytes; qgroup->excl_cmpr += sign * num_bytes; if (sign > 0) { - if (WARN_ON(qgroup->reserved < num_bytes)) + if (qgroup->reserved < num_bytes) report_reserved_underflow(fs_info, qgroup, num_bytes); else qgroup->reserved -= num_bytes; @@ -1100,7 +1103,7 @@ static int __qgroup_excl_accounting(struct btrfs_fs_info *fs_info, WARN_ON(sign < 0 && qgroup->excl < num_bytes); qgroup->excl += sign * num_bytes; if (sign > 0) { - if (WARN_ON(qgroup->reserved < num_bytes)) + if (qgroup->reserved < num_bytes) report_reserved_underflow(fs_info, qgroup, num_bytes); else @@ -2469,7 +2472,7 @@ void btrfs_qgroup_free_refroot(struct btrfs_fs_info *fs_info, qg = unode_aux_to_qgroup(unode); - if (WARN_ON(qg->reserved < num_bytes)) + if (qg->reserved < num_bytes) report_reserved_underflow(fs_info, qg, num_bytes); else qg->reserved -= num_bytes; diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 9530a333d302..72a053c9a7f0 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -1136,6 +1136,13 @@ static int btrfs_fill_super(struct super_block *sb, #endif sb->s_flags |= MS_I_VERSION; sb->s_iflags |= SB_I_CGROUPWB; + + err = super_setup_bdi(sb); + if (err) { + btrfs_err(fs_info, "super_setup_bdi failed"); + return err; + } + err = open_ctree(sb, fs_devices, (char *)data); if (err) { btrfs_err(fs_info, "open_ctree failed"); diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index 1a3e1b40799a..9ecb2fd348cb 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c @@ -578,7 +578,7 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc) writeback_stat = atomic_long_inc_return(&fsc->writeback_count); if (writeback_stat > CONGESTION_ON_THRESH(fsc->mount_options->congestion_kb)) - set_bdi_congested(&fsc->backing_dev_info, BLK_RW_ASYNC); + set_bdi_congested(inode_to_bdi(inode), BLK_RW_ASYNC); set_page_writeback(page); err = ceph_osdc_writepages(osdc, ceph_vino(inode), @@ -700,7 +700,7 @@ static void writepages_finish(struct ceph_osd_request *req) if (atomic_long_dec_return(&fsc->writeback_count) < CONGESTION_OFF_THRESH( fsc->mount_options->congestion_kb)) - clear_bdi_congested(&fsc->backing_dev_info, + clear_bdi_congested(inode_to_bdi(inode), BLK_RW_ASYNC); if (rc < 0) @@ -979,7 +979,7 @@ get_more_pages: if (atomic_long_inc_return(&fsc->writeback_count) > CONGESTION_ON_THRESH( fsc->mount_options->congestion_kb)) { - set_bdi_congested(&fsc->backing_dev_info, + set_bdi_congested(inode_to_bdi(inode), BLK_RW_ASYNC); } diff --git a/fs/ceph/debugfs.c b/fs/ceph/debugfs.c index f2ae393e2c31..3ef11bc8d728 100644 --- a/fs/ceph/debugfs.c +++ b/fs/ceph/debugfs.c @@ -251,7 +251,7 @@ int ceph_fs_debugfs_init(struct ceph_fs_client *fsc) goto out; snprintf(name, sizeof(name), "../../bdi/%s", - dev_name(fsc->backing_dev_info.dev)); + dev_name(fsc->sb->s_bdi->dev)); fsc->debugfs_bdi = debugfs_create_symlink("bdi", fsc->client->debugfs_dir, diff --git a/fs/ceph/super.c b/fs/ceph/super.c index 0ec8d0114e57..a8c81b2052ca 100644 --- a/fs/ceph/super.c +++ b/fs/ceph/super.c @@ -579,10 +579,6 @@ static struct ceph_fs_client *create_fs_client(struct ceph_mount_options *fsopt, atomic_long_set(&fsc->writeback_count, 0); - err = bdi_init(&fsc->backing_dev_info); - if (err < 0) - goto fail_client; - err = -ENOMEM; /* * The number of concurrent works can be high but they don't need @@ -590,7 +586,7 @@ static struct ceph_fs_client *create_fs_client(struct ceph_mount_options *fsopt, */ fsc->wb_wq = alloc_workqueue("ceph-writeback", 0, 1); if (fsc->wb_wq == NULL) - goto fail_bdi; + goto fail_client; fsc->pg_inv_wq = alloc_workqueue("ceph-pg-invalid", 0, 1); if (fsc->pg_inv_wq == NULL) goto fail_wb_wq; @@ -624,8 +620,6 @@ fail_pg_inv_wq: destroy_workqueue(fsc->pg_inv_wq); fail_wb_wq: destroy_workqueue(fsc->wb_wq); -fail_bdi: - bdi_destroy(&fsc->backing_dev_info); fail_client: ceph_destroy_client(fsc->client); fail: @@ -643,8 +637,6 @@ static void destroy_fs_client(struct ceph_fs_client *fsc) destroy_workqueue(fsc->pg_inv_wq); destroy_workqueue(fsc->trunc_wq); - bdi_destroy(&fsc->backing_dev_info); - mempool_destroy(fsc->wb_pagevec_pool); destroy_mount_options(fsc->mount_options); @@ -937,33 +929,32 @@ static int ceph_compare_super(struct super_block *sb, void *data) */ static atomic_long_t bdi_seq = ATOMIC_LONG_INIT(0); -static int ceph_register_bdi(struct super_block *sb, - struct ceph_fs_client *fsc) +static int ceph_setup_bdi(struct super_block *sb, struct ceph_fs_client *fsc) { int err; + err = super_setup_bdi_name(sb, "ceph-%ld", + atomic_long_inc_return(&bdi_seq)); + if (err) + return err; + /* set ra_pages based on rasize mount option? */ if (fsc->mount_options->rasize >= PAGE_SIZE) - fsc->backing_dev_info.ra_pages = + sb->s_bdi->ra_pages = (fsc->mount_options->rasize + PAGE_SIZE - 1) >> PAGE_SHIFT; else - fsc->backing_dev_info.ra_pages = - VM_MAX_READAHEAD * 1024 / PAGE_SIZE; + sb->s_bdi->ra_pages = VM_MAX_READAHEAD * 1024 / PAGE_SIZE; if (fsc->mount_options->rsize > fsc->mount_options->rasize && fsc->mount_options->rsize >= PAGE_SIZE) - fsc->backing_dev_info.io_pages = + sb->s_bdi->io_pages = (fsc->mount_options->rsize + PAGE_SIZE - 1) >> PAGE_SHIFT; else if (fsc->mount_options->rsize == 0) - fsc->backing_dev_info.io_pages = ULONG_MAX; + sb->s_bdi->io_pages = ULONG_MAX; - err = bdi_register(&fsc->backing_dev_info, NULL, "ceph-%ld", - atomic_long_inc_return(&bdi_seq)); - if (!err) - sb->s_bdi = &fsc->backing_dev_info; - return err; + return 0; } static struct dentry *ceph_mount(struct file_system_type *fs_type, @@ -1018,7 +1009,7 @@ static struct dentry *ceph_mount(struct file_system_type *fs_type, dout("get_sb got existing client %p\n", fsc); } else { dout("get_sb using new client %p\n", fsc); - err = ceph_register_bdi(sb, fsc); + err = ceph_setup_bdi(sb, fsc); if (err < 0) { res = ERR_PTR(err); goto out_splat; diff --git a/fs/ceph/super.h b/fs/ceph/super.h index fe6b9cfc4013..176186b12457 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h @@ -92,8 +92,6 @@ struct ceph_fs_client { struct workqueue_struct *trunc_wq; atomic_long_t writeback_count; - struct backing_dev_info backing_dev_info; - #ifdef CONFIG_DEBUG_FS struct dentry *debugfs_dentry_lru, *debugfs_caps; struct dentry *debugfs_congestion_kb; diff --git a/fs/cifs/cifs_fs_sb.h b/fs/cifs/cifs_fs_sb.h index 07ed81cf1552..cbd216b57239 100644 --- a/fs/cifs/cifs_fs_sb.h +++ b/fs/cifs/cifs_fs_sb.h @@ -68,7 +68,6 @@ struct cifs_sb_info { umode_t mnt_dir_mode; unsigned int mnt_cifs_flags; char *mountdata; /* options received at mount time or via DFS refs */ - struct backing_dev_info bdi; struct delayed_work prune_tlinks; struct rcu_head rcu; char *prepath; diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index dd3f5fabfdf6..34fee9fb7e4f 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -138,7 +138,12 @@ cifs_read_super(struct super_block *sb) sb->s_magic = CIFS_MAGIC_NUMBER; sb->s_op = &cifs_super_ops; sb->s_xattr = cifs_xattr_handlers; - sb->s_bdi = &cifs_sb->bdi; + rc = super_setup_bdi(sb); + if (rc) + goto out_no_root; + /* tune readahead according to rsize */ + sb->s_bdi->ra_pages = cifs_sb->rsize / PAGE_SIZE; + sb->s_blocksize = CIFS_MAX_MSGSIZE; sb->s_blocksize_bits = 14; /* default 2**14 = CIFS_MAX_MSGSIZE */ inode = cifs_root_iget(sb); diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index d95744d8b8ab..0f2781677ccc 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -3690,10 +3690,6 @@ cifs_mount(struct cifs_sb_info *cifs_sb, struct smb_vol *volume_info) int referral_walks_count = 0; #endif - rc = bdi_setup_and_register(&cifs_sb->bdi, "cifs"); - if (rc) - return rc; - #ifdef CONFIG_CIFS_DFS_UPCALL try_mount_again: /* cleanup activities if we're chasing a referral */ @@ -3721,7 +3717,6 @@ try_mount_again: server = cifs_get_tcp_session(volume_info); if (IS_ERR(server)) { rc = PTR_ERR(server); - bdi_destroy(&cifs_sb->bdi); goto out; } if ((volume_info->max_credits < 20) || @@ -3778,9 +3773,6 @@ try_mount_again: cifs_sb->wsize = server->ops->negotiate_wsize(tcon, volume_info); cifs_sb->rsize = server->ops->negotiate_rsize(tcon, volume_info); - /* tune readahead according to rsize */ - cifs_sb->bdi.ra_pages = cifs_sb->rsize / PAGE_SIZE; - remote_path_check: #ifdef CONFIG_CIFS_DFS_UPCALL /* @@ -3897,7 +3889,6 @@ mount_fail_check: cifs_put_smb_ses(ses); else cifs_put_tcp_session(server, 0); - bdi_destroy(&cifs_sb->bdi); } out: @@ -4100,7 +4091,6 @@ cifs_umount(struct cifs_sb_info *cifs_sb) } spin_unlock(&cifs_sb->tlink_tree_lock); - bdi_destroy(&cifs_sb->bdi); kfree(cifs_sb->mountdata); kfree(cifs_sb->prepath); call_rcu(&cifs_sb->rcu, delayed_free); diff --git a/fs/coda/inode.c b/fs/coda/inode.c index 2dea594da199..6058df380cc0 100644 --- a/fs/coda/inode.c +++ b/fs/coda/inode.c @@ -183,10 +183,6 @@ static int coda_fill_super(struct super_block *sb, void *data, int silent) goto unlock_out; } - error = bdi_setup_and_register(&vc->bdi, "coda"); - if (error) - goto unlock_out; - vc->vc_sb = sb; mutex_unlock(&vc->vc_mutex); @@ -197,7 +193,10 @@ static int coda_fill_super(struct super_block *sb, void *data, int silent) sb->s_magic = CODA_SUPER_MAGIC; sb->s_op = &coda_super_operations; sb->s_d_op = &coda_dentry_operations; - sb->s_bdi = &vc->bdi; + + error = super_setup_bdi(sb); + if (error) + goto error; /* get root fid from Venus: this needs the root inode */ error = venus_rootfid(sb, &fid); @@ -228,7 +227,6 @@ static int coda_fill_super(struct super_block *sb, void *data, int silent) error: mutex_lock(&vc->vc_mutex); - bdi_destroy(&vc->bdi); vc->vc_sb = NULL; sb->s_fs_info = NULL; unlock_out: @@ -240,7 +238,6 @@ static void coda_put_super(struct super_block *sb) { struct venus_comm *vcp = coda_vcp(sb); mutex_lock(&vcp->vc_mutex); - bdi_destroy(&vcp->bdi); vcp->vc_sb = NULL; sb->s_fs_info = NULL; mutex_unlock(&vcp->vc_mutex); @@ -991,7 +991,7 @@ int __dax_zero_page_range(struct block_device *bdev, sector_t sector, sector_t start_sector = dax.sector + (offset >> 9); return blkdev_issue_zeroout(bdev, start_sector, - length >> 9, GFP_NOFS, true); + length >> 9, GFP_NOFS, 0); } else { if (dax_map_atomic(bdev, &dax) < 0) return PTR_ERR(dax.addr); diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h index 95c1c8d34539..9c351bf757b2 100644 --- a/fs/ecryptfs/ecryptfs_kernel.h +++ b/fs/ecryptfs/ecryptfs_kernel.h @@ -349,7 +349,6 @@ struct ecryptfs_mount_crypt_stat { struct ecryptfs_sb_info { struct super_block *wsi_sb; struct ecryptfs_mount_crypt_stat mount_crypt_stat; - struct backing_dev_info bdi; }; /* file private data. */ diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c index 151872dcc1f4..9014479d0160 100644 --- a/fs/ecryptfs/main.c +++ b/fs/ecryptfs/main.c @@ -519,12 +519,11 @@ static struct dentry *ecryptfs_mount(struct file_system_type *fs_type, int flags goto out; } - rc = bdi_setup_and_register(&sbi->bdi, "ecryptfs"); + rc = super_setup_bdi(s); if (rc) goto out1; ecryptfs_set_superblock_private(s, sbi); - s->s_bdi = &sbi->bdi; /* ->kill_sb() will take care of sbi after that point */ sbi = NULL; @@ -633,7 +632,6 @@ static void ecryptfs_kill_block_super(struct super_block *sb) if (!sb_info) return; ecryptfs_destroy_mount_crypt_stat(&sb_info->mount_crypt_stat); - bdi_destroy(&sb_info->bdi); kmem_cache_free(ecryptfs_sb_info_cache, sb_info); } diff --git a/fs/exec.c b/fs/exec.c index 65145a3df065..72934df68471 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1320,6 +1320,7 @@ void setup_new_exec(struct linux_binprm * bprm) else set_dumpable(current->mm, suid_dumpable); + arch_setup_new_exec(); perf_event_exec(); __set_task_comm(current, kbasename(bprm->filename), true); diff --git a/fs/exofs/exofs.h b/fs/exofs/exofs.h index 2e86086bc940..5dc392404559 100644 --- a/fs/exofs/exofs.h +++ b/fs/exofs/exofs.h @@ -64,7 +64,6 @@ struct exofs_dev { * our extension to the in-memory superblock */ struct exofs_sb_info { - struct backing_dev_info bdi; /* register our bdi with VFS */ struct exofs_sb_stats s_ess; /* Written often, pre-allocate*/ int s_timeout; /* timeout for OSD operations */ uint64_t s_nextid; /* highest object ID used */ diff --git a/fs/exofs/super.c b/fs/exofs/super.c index 1076a4233b39..819624cfc8da 100644 --- a/fs/exofs/super.c +++ b/fs/exofs/super.c @@ -464,7 +464,6 @@ static void exofs_put_super(struct super_block *sb) sbi->one_comp.obj.partition); exofs_sysfs_sb_del(sbi); - bdi_destroy(&sbi->bdi); exofs_free_sbi(sbi); sb->s_fs_info = NULL; } @@ -809,8 +808,12 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent) __sbi_read_stats(sbi); /* set up operation vectors */ - sbi->bdi.ra_pages = __ra_pages(&sbi->layout); - sb->s_bdi = &sbi->bdi; + ret = super_setup_bdi(sb); + if (ret) { + EXOFS_DBGMSG("Failed to super_setup_bdi\n"); + goto free_sbi; + } + sb->s_bdi->ra_pages = __ra_pages(&sbi->layout); sb->s_fs_info = sbi; sb->s_op = &exofs_sops; sb->s_export_op = &exofs_export_ops; @@ -836,14 +839,6 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent) goto free_sbi; } - ret = bdi_setup_and_register(&sbi->bdi, "exofs"); - if (ret) { - EXOFS_DBGMSG("Failed to bdi_setup_and_register\n"); - dput(sb->s_root); - sb->s_root = NULL; - goto free_sbi; - } - exofs_sysfs_dbg_print(); _exofs_print_device("Mounting", opts->dev_name, ore_comp_dev(&sbi->oc, 0), diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index b681b43c766e..c2d7f3a92679 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -382,9 +382,9 @@ static void request_end(struct fuse_conn *fc, struct fuse_req *req) wake_up(&fc->blocked_waitq); if (fc->num_background == fc->congestion_threshold && - fc->connected && fc->bdi_initialized) { - clear_bdi_congested(&fc->bdi, BLK_RW_SYNC); - clear_bdi_congested(&fc->bdi, BLK_RW_ASYNC); + fc->connected && fc->sb) { + clear_bdi_congested(fc->sb->s_bdi, BLK_RW_SYNC); + clear_bdi_congested(fc->sb->s_bdi, BLK_RW_ASYNC); } fc->num_background--; fc->active_background--; @@ -573,10 +573,9 @@ void fuse_request_send_background_locked(struct fuse_conn *fc, fc->num_background++; if (fc->num_background == fc->max_background) fc->blocked = 1; - if (fc->num_background == fc->congestion_threshold && - fc->bdi_initialized) { - set_bdi_congested(&fc->bdi, BLK_RW_SYNC); - set_bdi_congested(&fc->bdi, BLK_RW_ASYNC); + if (fc->num_background == fc->congestion_threshold && fc->sb) { + set_bdi_congested(fc->sb->s_bdi, BLK_RW_SYNC); + set_bdi_congested(fc->sb->s_bdi, BLK_RW_ASYNC); } list_add_tail(&req->list, &fc->bg_queue); flush_bg_queue(fc); diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index 32ac2c9b09c0..f33341d9501a 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -527,9 +527,6 @@ struct fuse_conn { /** Filesystem supports NFS exporting. Only set in INIT */ unsigned export_support:1; - /** Set if bdi is valid */ - unsigned bdi_initialized:1; - /** write-back cache policy (default is write-through) */ unsigned writeback_cache:1; @@ -631,9 +628,6 @@ struct fuse_conn { /** Negotiated minor version */ unsigned minor; - /** Backing dev info */ - struct backing_dev_info bdi; - /** Entry on the fuse_conn_list */ struct list_head entry; diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 6fe6a88ecb4a..73cf05135252 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -386,12 +386,6 @@ static void fuse_send_destroy(struct fuse_conn *fc) } } -static void fuse_bdi_destroy(struct fuse_conn *fc) -{ - if (fc->bdi_initialized) - bdi_destroy(&fc->bdi); -} - static void fuse_put_super(struct super_block *sb) { struct fuse_conn *fc = get_fuse_conn_super(sb); @@ -403,7 +397,6 @@ static void fuse_put_super(struct super_block *sb) list_del(&fc->entry); fuse_ctl_remove_conn(fc); mutex_unlock(&fuse_mutex); - fuse_bdi_destroy(fc); fuse_conn_put(fc); } @@ -928,7 +921,8 @@ static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req) fc->no_flock = 1; } - fc->bdi.ra_pages = min(fc->bdi.ra_pages, ra_pages); + fc->sb->s_bdi->ra_pages = + min(fc->sb->s_bdi->ra_pages, ra_pages); fc->minor = arg->minor; fc->max_write = arg->minor < 5 ? 4096 : arg->max_write; fc->max_write = max_t(unsigned, 4096, fc->max_write); @@ -944,7 +938,7 @@ static void fuse_send_init(struct fuse_conn *fc, struct fuse_req *req) arg->major = FUSE_KERNEL_VERSION; arg->minor = FUSE_KERNEL_MINOR_VERSION; - arg->max_readahead = fc->bdi.ra_pages * PAGE_SIZE; + arg->max_readahead = fc->sb->s_bdi->ra_pages * PAGE_SIZE; arg->flags |= FUSE_ASYNC_READ | FUSE_POSIX_LOCKS | FUSE_ATOMIC_O_TRUNC | FUSE_EXPORT_SUPPORT | FUSE_BIG_WRITES | FUSE_DONT_MASK | FUSE_SPLICE_WRITE | FUSE_SPLICE_MOVE | FUSE_SPLICE_READ | @@ -976,27 +970,18 @@ static void fuse_free_conn(struct fuse_conn *fc) static int fuse_bdi_init(struct fuse_conn *fc, struct super_block *sb) { int err; + char *suffix = ""; - fc->bdi.name = "fuse"; - fc->bdi.ra_pages = (VM_MAX_READAHEAD * 1024) / PAGE_SIZE; - /* fuse does it's own writeback accounting */ - fc->bdi.capabilities = BDI_CAP_NO_ACCT_WB | BDI_CAP_STRICTLIMIT; - - err = bdi_init(&fc->bdi); + if (sb->s_bdev) + suffix = "-fuseblk"; + err = super_setup_bdi_name(sb, "%u:%u%s", MAJOR(fc->dev), + MINOR(fc->dev), suffix); if (err) return err; - fc->bdi_initialized = 1; - - if (sb->s_bdev) { - err = bdi_register(&fc->bdi, NULL, "%u:%u-fuseblk", - MAJOR(fc->dev), MINOR(fc->dev)); - } else { - err = bdi_register_dev(&fc->bdi, fc->dev); - } - - if (err) - return err; + sb->s_bdi->ra_pages = (VM_MAX_READAHEAD * 1024) / PAGE_SIZE; + /* fuse does it's own writeback accounting */ + sb->s_bdi->capabilities = BDI_CAP_NO_ACCT_WB | BDI_CAP_STRICTLIMIT; /* * For a single fuse filesystem use max 1% of dirty + @@ -1010,7 +995,7 @@ static int fuse_bdi_init(struct fuse_conn *fc, struct super_block *sb) * * /sys/class/bdi/<bdi>/max_ratio */ - bdi_set_max_ratio(&fc->bdi, 1); + bdi_set_max_ratio(sb->s_bdi, 1); return 0; } @@ -1113,8 +1098,6 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent) if (err) goto err_dev_free; - sb->s_bdi = &fc->bdi; - /* Handle umasking inside the fuse code */ if (sb->s_flags & MS_POSIXACL) fc->dont_mask = 1; @@ -1182,7 +1165,6 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent) err_dev_free: fuse_dev_free(fud); err_put_conn: - fuse_bdi_destroy(fc); fuse_conn_put(fc); err_fput: fput(file); diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index b108e7ba81af..ed67548b286c 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c @@ -23,6 +23,7 @@ #include <linux/quotaops.h> #include <linux/lockdep.h> #include <linux/module.h> +#include <linux/backing-dev.h> #include "gfs2.h" #include "incore.h" @@ -1222,12 +1223,7 @@ static int set_gfs2_super(struct super_block *s, void *data) { s->s_bdev = data; s->s_dev = s->s_bdev->bd_dev; - - /* - * We set the bdi here to the queue backing, file systems can - * overwrite this in ->fill_super() - */ - s->s_bdi = bdev_get_queue(s->s_bdev)->backing_dev_info; + s->s_bdi = bdi_get(s->s_bdev->bd_bdi); return 0; } diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c index d5606099712a..6d0f14c86099 100644 --- a/fs/ncpfs/inode.c +++ b/fs/ncpfs/inode.c @@ -554,12 +554,11 @@ static int ncp_fill_super(struct super_block *sb, void *raw_data, int silent) sb->s_magic = NCP_SUPER_MAGIC; sb->s_op = &ncp_sops; sb->s_d_op = &ncp_dentry_operations; - sb->s_bdi = &server->bdi; server = NCP_SBP(sb); memset(server, 0, sizeof(*server)); - error = bdi_setup_and_register(&server->bdi, "ncpfs"); + error = super_setup_bdi(sb); if (error) goto out_fput; @@ -568,7 +567,7 @@ static int ncp_fill_super(struct super_block *sb, void *raw_data, int silent) if (data.info_fd != -1) { struct socket *info_sock = sockfd_lookup(data.info_fd, &error); if (!info_sock) - goto out_bdi; + goto out_fput; server->info_sock = info_sock; error = -EBADFD; if (info_sock->type != SOCK_STREAM) @@ -746,8 +745,6 @@ out_nls: out_fput2: if (server->info_sock) sockfd_put(server->info_sock); -out_bdi: - bdi_destroy(&server->bdi); out_fput: sockfd_put(sock); out: @@ -788,7 +785,6 @@ static void ncp_put_super(struct super_block *sb) kill_pid(server->m.wdog_pid, SIGTERM, 1); put_pid(server->m.wdog_pid); - bdi_destroy(&server->bdi); kfree(server->priv.data); kfree(server->auth.object_name); vfree(server->rxbuf); diff --git a/fs/ncpfs/ncp_fs_sb.h b/fs/ncpfs/ncp_fs_sb.h index 55e26fd80886..366fd63cc506 100644 --- a/fs/ncpfs/ncp_fs_sb.h +++ b/fs/ncpfs/ncp_fs_sb.h @@ -143,7 +143,6 @@ struct ncp_server { size_t len; __u8 data[128]; } unexpected_packet; - struct backing_dev_info bdi; }; extern void ncp_tcp_rcv_proc(struct work_struct *work); diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 390ada8741bc..04d15a0045e3 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -761,9 +761,6 @@ static void nfs_server_set_fsinfo(struct nfs_server *server, server->rsize = NFS_MAX_FILE_IO_SIZE; server->rpages = (server->rsize + PAGE_SIZE - 1) >> PAGE_SHIFT; - server->backing_dev_info.name = "nfs"; - server->backing_dev_info.ra_pages = server->rpages * NFS_MAX_READAHEAD; - if (server->wsize > max_rpc_payload) server->wsize = max_rpc_payload; if (server->wsize > NFS_MAX_FILE_IO_SIZE) @@ -917,12 +914,6 @@ struct nfs_server *nfs_alloc_server(void) return NULL; } - if (bdi_init(&server->backing_dev_info)) { - nfs_free_iostats(server->io_stats); - kfree(server); - return NULL; - } - ida_init(&server->openowner_id); ida_init(&server->lockowner_id); pnfs_init_server(server); @@ -953,7 +944,6 @@ void nfs_free_server(struct nfs_server *server) ida_destroy(&server->lockowner_id); ida_destroy(&server->openowner_id); nfs_free_iostats(server->io_stats); - bdi_destroy(&server->backing_dev_info); kfree(server); nfs_release_automount_timer(); dprintk("<-- nfs_free_server()\n"); diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 7b38fedb7e03..9dc65d7ae754 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -139,7 +139,7 @@ struct nfs_mount_request { }; struct nfs_mount_info { - void (*fill_super)(struct super_block *, struct nfs_mount_info *); + int (*fill_super)(struct super_block *, struct nfs_mount_info *); int (*set_security)(struct super_block *, struct dentry *, struct nfs_mount_info *); struct nfs_parsed_mount_data *parsed; struct nfs_clone_mount *cloned; @@ -407,7 +407,7 @@ struct dentry *nfs_fs_mount(struct file_system_type *, int, const char *, void * struct dentry * nfs_xdev_mount_common(struct file_system_type *, int, const char *, struct nfs_mount_info *); void nfs_kill_super(struct super_block *); -void nfs_fill_super(struct super_block *, struct nfs_mount_info *); +int nfs_fill_super(struct super_block *, struct nfs_mount_info *); extern struct rpc_stat nfs_rpcstat; @@ -458,7 +458,7 @@ extern void nfs_read_prepare(struct rpc_task *task, void *calldata); extern void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio); /* super.c */ -void nfs_clone_super(struct super_block *, struct nfs_mount_info *); +int nfs_clone_super(struct super_block *, struct nfs_mount_info *); void nfs_umount_begin(struct super_block *); int nfs_statfs(struct dentry *, struct kstatfs *); int nfs_show_options(struct seq_file *, struct dentry *); diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 54e0f9f2dd94..dc69314d455e 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -2315,18 +2315,17 @@ inline void nfs_initialise_sb(struct super_block *sb) sb->s_blocksize = nfs_block_bits(server->wsize, &sb->s_blocksize_bits); - sb->s_bdi = &server->backing_dev_info; - nfs_super_set_maxbytes(sb, server->maxfilesize); } /* * Finish setting up an NFS2/3 superblock */ -void nfs_fill_super(struct super_block *sb, struct nfs_mount_info *mount_info) +int nfs_fill_super(struct super_block *sb, struct nfs_mount_info *mount_info) { struct nfs_parsed_mount_data *data = mount_info->parsed; struct nfs_server *server = NFS_SB(sb); + int ret; sb->s_blocksize_bits = 0; sb->s_blocksize = 0; @@ -2344,13 +2343,21 @@ void nfs_fill_super(struct super_block *sb, struct nfs_mount_info *mount_info) } nfs_initialise_sb(sb); + + ret = super_setup_bdi_name(sb, "%u:%u", MAJOR(server->s_dev), + MINOR(server->s_dev)); + if (ret) + return ret; + sb->s_bdi->ra_pages = server->rpages * NFS_MAX_READAHEAD; + return 0; + } EXPORT_SYMBOL_GPL(nfs_fill_super); /* * Finish setting up a cloned NFS2/3/4 superblock */ -void nfs_clone_super(struct super_block *sb, struct nfs_mount_info *mount_info) +int nfs_clone_super(struct super_block *sb, struct nfs_mount_info *mount_info) { const struct super_block *old_sb = mount_info->cloned->sb; struct nfs_server *server = NFS_SB(sb); @@ -2370,6 +2377,10 @@ void nfs_clone_super(struct super_block *sb, struct nfs_mount_info *mount_info) } nfs_initialise_sb(sb); + + sb->s_bdi = bdi_get(old_sb->s_bdi); + + return 0; } static int nfs_compare_mount_options(const struct super_block *s, const struct nfs_server *b, int flags) @@ -2522,11 +2533,6 @@ static void nfs_get_cache_cookie(struct super_block *sb, } #endif -static int nfs_bdi_register(struct nfs_server *server) -{ - return bdi_register_dev(&server->backing_dev_info, server->s_dev); -} - int nfs_set_sb_security(struct super_block *s, struct dentry *mntroot, struct nfs_mount_info *mount_info) { @@ -2594,17 +2600,14 @@ struct dentry *nfs_fs_mount_common(struct nfs_server *server, nfs_free_server(server); server = NULL; } else { - error = nfs_bdi_register(server); - if (error) { - mntroot = ERR_PTR(error); - goto error_splat_super; - } server->super = s; } if (!s->s_root) { /* initial superblock/root creation */ - mount_info->fill_super(s, mount_info); + error = mount_info->fill_super(s, mount_info); + if (error) + goto error_splat_super; nfs_get_cache_cookie(s, mount_info->parsed, mount_info->cloned); } diff --git a/fs/nfs/write.c b/fs/nfs/write.c index abb2c8a3be42..cc341fc7fd44 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -263,16 +263,15 @@ int nfs_congestion_kb; static void nfs_set_page_writeback(struct page *page) { - struct nfs_server *nfss = NFS_SERVER(page_file_mapping(page)->host); + struct inode *inode = page_file_mapping(page)->host; + struct nfs_server *nfss = NFS_SERVER(inode); int ret = test_set_page_writeback(page); WARN_ON_ONCE(ret != 0); if (atomic_long_inc_return(&nfss->writeback) > - NFS_CONGESTION_ON_THRESH) { - set_bdi_congested(&nfss->backing_dev_info, - BLK_RW_ASYNC); - } + NFS_CONGESTION_ON_THRESH) + set_bdi_congested(inode_to_bdi(inode), BLK_RW_ASYNC); } static void nfs_end_page_writeback(struct nfs_page *req) @@ -285,7 +284,7 @@ static void nfs_end_page_writeback(struct nfs_page *req) end_page_writeback(req->wb_page); if (atomic_long_dec_return(&nfss->writeback) < NFS_CONGESTION_OFF_THRESH) - clear_bdi_congested(&nfss->backing_dev_info, BLK_RW_ASYNC); + clear_bdi_congested(inode_to_bdi(inode), BLK_RW_ASYNC); } @@ -1808,7 +1807,7 @@ static void nfs_commit_release_pages(struct nfs_commit_data *data) } nfss = NFS_SERVER(data->inode); if (atomic_long_read(&nfss->writeback) < NFS_CONGESTION_OFF_THRESH) - clear_bdi_congested(&nfss->backing_dev_info, BLK_RW_ASYNC); + clear_bdi_congested(inode_to_bdi(data->inode), BLK_RW_ASYNC); nfs_init_cinfo(&cinfo, data->inode, data->dreq); nfs_commit_end(cinfo.mds); diff --git a/fs/nfsd/blocklayout.c b/fs/nfsd/blocklayout.c index 92b4b41d19d2..fb5213afc854 100644 --- a/fs/nfsd/blocklayout.c +++ b/fs/nfsd/blocklayout.c @@ -242,10 +242,11 @@ static int nfsd4_scsi_identify_device(struct block_device *bdev, req->cmd[4] = bufflen & 0xff; req->cmd_len = COMMAND_SIZE(INQUIRY); - error = blk_execute_rq(rq->q, NULL, rq, 1); - if (error) { + blk_execute_rq(rq->q, NULL, rq, 1); + if (req->result) { pr_err("pNFS: INQUIRY 0x83 failed with: %x\n", - rq->errors); + req->result); + error = -EIO; goto out_put_request; } diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c index dba2ff8eaa68..452334694a5d 100644 --- a/fs/nfsd/nfs3xdr.c +++ b/fs/nfsd/nfs3xdr.c @@ -358,6 +358,8 @@ nfs3svc_decode_writeargs(struct svc_rqst *rqstp, __be32 *p, { unsigned int len, v, hdr, dlen; u32 max_blocksize = svc_max_payload(rqstp); + struct kvec *head = rqstp->rq_arg.head; + struct kvec *tail = rqstp->rq_arg.tail; p = decode_fh(p, &args->fh); if (!p) @@ -367,6 +369,8 @@ nfs3svc_decode_writeargs(struct svc_rqst *rqstp, __be32 *p, args->count = ntohl(*p++); args->stable = ntohl(*p++); len = args->len = ntohl(*p++); + if ((void *)p > head->iov_base + head->iov_len) + return 0; /* * The count must equal the amount of data passed. */ @@ -377,9 +381,8 @@ nfs3svc_decode_writeargs(struct svc_rqst *rqstp, __be32 *p, * Check to make sure that we got the right number of * bytes. */ - hdr = (void*)p - rqstp->rq_arg.head[0].iov_base; - dlen = rqstp->rq_arg.head[0].iov_len + rqstp->rq_arg.page_len - + rqstp->rq_arg.tail[0].iov_len - hdr; + hdr = (void*)p - head->iov_base; + dlen = head->iov_len + rqstp->rq_arg.page_len + tail->iov_len - hdr; /* * Round the length of the data which was specified up to * the next multiple of XDR units and then compare that @@ -396,7 +399,7 @@ nfs3svc_decode_writeargs(struct svc_rqst *rqstp, __be32 *p, len = args->len = max_blocksize; } rqstp->rq_vec[0].iov_base = (void*)p; - rqstp->rq_vec[0].iov_len = rqstp->rq_arg.head[0].iov_len - hdr; + rqstp->rq_vec[0].iov_len = head->iov_len - hdr; v = 0; while (len > rqstp->rq_vec[v].iov_len) { len -= rqstp->rq_vec[v].iov_len; @@ -471,6 +474,8 @@ nfs3svc_decode_symlinkargs(struct svc_rqst *rqstp, __be32 *p, /* first copy and check from the first page */ old = (char*)p; vec = &rqstp->rq_arg.head[0]; + if ((void *)old > vec->iov_base + vec->iov_len) + return 0; avail = vec->iov_len - (old - (char*)vec->iov_base); while (len && avail && *old) { *new++ = *old++; diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index 31e1f9593457..59979f0bbd4b 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -747,6 +747,37 @@ static __be32 map_new_errors(u32 vers, __be32 nfserr) return nfserr; } +/* + * A write procedure can have a large argument, and a read procedure can + * have a large reply, but no NFSv2 or NFSv3 procedure has argument and + * reply that can both be larger than a page. The xdr code has taken + * advantage of this assumption to be a sloppy about bounds checking in + * some cases. Pending a rewrite of the NFSv2/v3 xdr code to fix that + * problem, we enforce these assumptions here: + */ +static bool nfs_request_too_big(struct svc_rqst *rqstp, + struct svc_procedure *proc) +{ + /* + * The ACL code has more careful bounds-checking and is not + * susceptible to this problem: + */ + if (rqstp->rq_prog != NFS_PROGRAM) + return false; + /* + * Ditto NFSv4 (which can in theory have argument and reply both + * more than a page): + */ + if (rqstp->rq_vers >= 4) + return false; + /* The reply will be small, we're OK: */ + if (proc->pc_xdrressize > 0 && + proc->pc_xdrressize < XDR_QUADLEN(PAGE_SIZE)) + return false; + + return rqstp->rq_arg.len > PAGE_SIZE; +} + int nfsd_dispatch(struct svc_rqst *rqstp, __be32 *statp) { @@ -759,6 +790,11 @@ nfsd_dispatch(struct svc_rqst *rqstp, __be32 *statp) rqstp->rq_vers, rqstp->rq_proc); proc = rqstp->rq_procinfo; + if (nfs_request_too_big(rqstp, proc)) { + dprintk("nfsd: NFSv%d argument too large\n", rqstp->rq_vers); + *statp = rpc_garbage_args; + return 1; + } /* * Give the xdr decoder a chance to change this if it wants * (necessary in the NFSv4.0 compound case) diff --git a/fs/nfsd/nfsxdr.c b/fs/nfsd/nfsxdr.c index 41b468a6a90f..de07ff625777 100644 --- a/fs/nfsd/nfsxdr.c +++ b/fs/nfsd/nfsxdr.c @@ -280,6 +280,7 @@ nfssvc_decode_writeargs(struct svc_rqst *rqstp, __be32 *p, struct nfsd_writeargs *args) { unsigned int len, hdr, dlen; + struct kvec *head = rqstp->rq_arg.head; int v; p = decode_fh(p, &args->fh); @@ -300,9 +301,10 @@ nfssvc_decode_writeargs(struct svc_rqst *rqstp, __be32 *p, * Check to make sure that we got the right number of * bytes. */ - hdr = (void*)p - rqstp->rq_arg.head[0].iov_base; - dlen = rqstp->rq_arg.head[0].iov_len + rqstp->rq_arg.page_len - - hdr; + hdr = (void*)p - head->iov_base; + if (hdr > head->iov_len) + return 0; + dlen = head->iov_len + rqstp->rq_arg.page_len - hdr; /* * Round the length of the data which was specified up to @@ -316,7 +318,7 @@ nfssvc_decode_writeargs(struct svc_rqst *rqstp, __be32 *p, return 0; rqstp->rq_vec[0].iov_base = (void*)p; - rqstp->rq_vec[0].iov_len = rqstp->rq_arg.head[0].iov_len - hdr; + rqstp->rq_vec[0].iov_len = head->iov_len - hdr; v = 0; while (len > rqstp->rq_vec[v].iov_len) { len -= rqstp->rq_vec[v].iov_len; diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 19d50f600e8d..9aaf6ca77569 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -1004,7 +1004,7 @@ out_nfserr: else err = nfserrno(host_err); if (test_bit(RQ_LOCAL, &rqstp->rq_flags)) - tsk_restore_flags(current, pflags, PF_LESS_THROTTLE); + current_restore_flags(pflags, PF_LESS_THROTTLE); return err; } diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c index e1872f36147f..926682981d61 100644 --- a/fs/nilfs2/super.c +++ b/fs/nilfs2/super.c @@ -1068,7 +1068,7 @@ nilfs_fill_super(struct super_block *sb, void *data, int silent) sb->s_time_gran = 1; sb->s_max_links = NILFS_LINK_MAX; - sb->s_bdi = bdev_get_queue(sb->s_bdev)->backing_dev_info; + sb->s_bdi = bdi_get(sb->s_bdev->bd_bdi); err = load_nilfs(nilfs, sb); if (err) diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c index d0ab7e56d0b4..5b51c31c892d 100644 --- a/fs/ocfs2/cluster/tcp.c +++ b/fs/ocfs2/cluster/tcp.c @@ -1460,27 +1460,10 @@ static void o2net_rx_until_empty(struct work_struct *work) static int o2net_set_nodelay(struct socket *sock) { - int ret, val = 1; - mm_segment_t oldfs; + int val = 1; - oldfs = get_fs(); - set_fs(KERNEL_DS); - - /* - * Dear unsuspecting programmer, - * - * Don't use sock_setsockopt() for SOL_TCP. It doesn't check its level - * argument and assumes SOL_SOCKET so, say, your TCP_NODELAY will - * silently turn into SO_DEBUG. - * - * Yours, - * Keeper of hilariously fragile interfaces. - */ - ret = sock->ops->setsockopt(sock, SOL_TCP, TCP_NODELAY, - (char __user *)&val, sizeof(val)); - - set_fs(oldfs); - return ret; + return kernel_setsockopt(sock, SOL_TCP, TCP_NODELAY, + (void *)&val, sizeof(val)); } static int o2net_set_usertimeout(struct socket *sock) @@ -1488,7 +1471,7 @@ static int o2net_set_usertimeout(struct socket *sock) int user_timeout = O2NET_TCP_USER_TIMEOUT; return kernel_setsockopt(sock, SOL_TCP, TCP_USER_TIMEOUT, - (char *)&user_timeout, sizeof(user_timeout)); + (void *)&user_timeout, sizeof(user_timeout)); } static void o2net_initialize_handshake(void) diff --git a/fs/pstore/ftrace.c b/fs/pstore/ftrace.c index 899d0ba0bd6c..06aab07b6bb7 100644 --- a/fs/pstore/ftrace.c +++ b/fs/pstore/ftrace.c @@ -37,6 +37,12 @@ static void notrace pstore_ftrace_call(unsigned long ip, { unsigned long flags; struct pstore_ftrace_record rec = {}; + struct pstore_record record = { + .type = PSTORE_TYPE_FTRACE, + .buf = (char *)&rec, + .size = sizeof(rec), + .psi = psinfo, + }; if (unlikely(oops_in_progress)) return; @@ -47,8 +53,7 @@ static void notrace pstore_ftrace_call(unsigned long ip, rec.parent_ip = parent_ip; pstore_ftrace_write_timestamp(&rec, pstore_ftrace_stamp++); pstore_ftrace_encode_cpu(&rec, raw_smp_processor_id()); - psinfo->write_buf(PSTORE_TYPE_FTRACE, 0, NULL, 0, (void *)&rec, - 0, sizeof(rec), psinfo); + psinfo->write(&record); local_irq_restore(flags); } @@ -117,7 +122,7 @@ void pstore_register_ftrace(void) { struct dentry *file; - if (!psinfo->write_buf) + if (!psinfo->write) return; pstore_ftrace_dir = debugfs_create_dir("pstore", NULL); diff --git a/fs/pstore/inode.c b/fs/pstore/inode.c index 57c0646479f5..792a4e5f9226 100644 --- a/fs/pstore/inode.c +++ b/fs/pstore/inode.c @@ -47,12 +47,8 @@ static LIST_HEAD(allpstore); struct pstore_private { struct list_head list; - struct pstore_info *psi; - enum pstore_type_id type; - u64 id; - int count; - ssize_t size; - char data[]; + struct pstore_record *record; + size_t total_size; }; struct pstore_ftrace_seq_data { @@ -63,6 +59,17 @@ struct pstore_ftrace_seq_data { #define REC_SIZE sizeof(struct pstore_ftrace_record) +static void free_pstore_private(struct pstore_private *private) +{ + if (!private) + return; + if (private->record) { + kfree(private->record->buf); + kfree(private->record); + } + kfree(private); +} + static void *pstore_ftrace_seq_start(struct seq_file *s, loff_t *pos) { struct pstore_private *ps = s->private; @@ -72,9 +79,9 @@ static void *pstore_ftrace_seq_start(struct seq_file *s, loff_t *pos) if (!data) return NULL; - data->off = ps->size % REC_SIZE; + data->off = ps->total_size % REC_SIZE; data->off += *pos * REC_SIZE; - if (data->off + REC_SIZE > ps->size) { + if (data->off + REC_SIZE > ps->total_size) { kfree(data); return NULL; } @@ -94,7 +101,7 @@ static void *pstore_ftrace_seq_next(struct seq_file *s, void *v, loff_t *pos) struct pstore_ftrace_seq_data *data = v; data->off += REC_SIZE; - if (data->off + REC_SIZE > ps->size) + if (data->off + REC_SIZE > ps->total_size) return NULL; (*pos)++; @@ -105,7 +112,9 @@ static int pstore_ftrace_seq_show(struct seq_file *s, void *v) { struct pstore_private *ps = s->private; struct pstore_ftrace_seq_data *data = v; - struct pstore_ftrace_record *rec = (void *)(ps->data + data->off); + struct pstore_ftrace_record *rec; + + rec = (struct pstore_ftrace_record *)(ps->record->buf + data->off); seq_printf(s, "CPU:%d ts:%llu %08lx %08lx %pf <- %pF\n", pstore_ftrace_decode_cpu(rec), @@ -125,7 +134,7 @@ static const struct seq_operations pstore_ftrace_seq_ops = { static int pstore_check_syslog_permissions(struct pstore_private *ps) { - switch (ps->type) { + switch (ps->record->type) { case PSTORE_TYPE_DMESG: case PSTORE_TYPE_CONSOLE: return check_syslog_permissions(SYSLOG_ACTION_READ_ALL, @@ -141,9 +150,10 @@ static ssize_t pstore_file_read(struct file *file, char __user *userbuf, struct seq_file *sf = file->private_data; struct pstore_private *ps = sf->private; - if (ps->type == PSTORE_TYPE_FTRACE) + if (ps->record->type == PSTORE_TYPE_FTRACE) return seq_read(file, userbuf, count, ppos); - return simple_read_from_buffer(userbuf, count, ppos, ps->data, ps->size); + return simple_read_from_buffer(userbuf, count, ppos, + ps->record->buf, ps->total_size); } static int pstore_file_open(struct inode *inode, struct file *file) @@ -157,7 +167,7 @@ static int pstore_file_open(struct inode *inode, struct file *file) if (err) return err; - if (ps->type == PSTORE_TYPE_FTRACE) + if (ps->record->type == PSTORE_TYPE_FTRACE) sops = &pstore_ftrace_seq_ops; err = seq_open(file, sops); @@ -193,20 +203,19 @@ static const struct file_operations pstore_file_operations = { static int pstore_unlink(struct inode *dir, struct dentry *dentry) { struct pstore_private *p = d_inode(dentry)->i_private; + struct pstore_record *record = p->record; int err; err = pstore_check_syslog_permissions(p); if (err) return err; - if (p->psi->erase) { - mutex_lock(&p->psi->read_mutex); - p->psi->erase(p->type, p->id, p->count, - d_inode(dentry)->i_ctime, p->psi); - mutex_unlock(&p->psi->read_mutex); - } else { + if (!record->psi->erase) return -EPERM; - } + + mutex_lock(&record->psi->read_mutex); + record->psi->erase(record); + mutex_unlock(&record->psi->read_mutex); return simple_unlink(dir, dentry); } @@ -221,7 +230,7 @@ static void pstore_evict_inode(struct inode *inode) spin_lock_irqsave(&allpstore_lock, flags); list_del(&p->list); spin_unlock_irqrestore(&allpstore_lock, flags); - kfree(p); + free_pstore_private(p); } } @@ -302,23 +311,23 @@ bool pstore_is_mounted(void) * Load it up with "size" bytes of data from "buf". * Set the mtime & ctime to the date that this record was originally stored. */ -int pstore_mkfile(enum pstore_type_id type, char *psname, u64 id, int count, - char *data, bool compressed, size_t size, - struct timespec time, struct pstore_info *psi) +int pstore_mkfile(struct dentry *root, struct pstore_record *record) { - struct dentry *root = pstore_sb->s_root; struct dentry *dentry; struct inode *inode; int rc = 0; char name[PSTORE_NAMELEN]; struct pstore_private *private, *pos; unsigned long flags; + size_t size = record->size + record->ecc_notice_size; + + WARN_ON(!inode_is_locked(d_inode(root))); spin_lock_irqsave(&allpstore_lock, flags); list_for_each_entry(pos, &allpstore, list) { - if (pos->type == type && - pos->id == id && - pos->psi == psi) { + if (pos->record->type == record->type && + pos->record->id == record->id && + pos->record->psi == record->psi) { rc = -EEXIST; break; } @@ -328,72 +337,74 @@ int pstore_mkfile(enum pstore_type_id type, char *psname, u64 id, int count, return rc; rc = -ENOMEM; - inode = pstore_get_inode(pstore_sb); + inode = pstore_get_inode(root->d_sb); if (!inode) goto fail; inode->i_mode = S_IFREG | 0444; inode->i_fop = &pstore_file_operations; - private = kmalloc(sizeof *private + size, GFP_KERNEL); + private = kzalloc(sizeof(*private), GFP_KERNEL); if (!private) goto fail_alloc; - private->type = type; - private->id = id; - private->count = count; - private->psi = psi; + private->record = record; - switch (type) { + switch (record->type) { case PSTORE_TYPE_DMESG: scnprintf(name, sizeof(name), "dmesg-%s-%lld%s", - psname, id, compressed ? ".enc.z" : ""); + record->psi->name, record->id, + record->compressed ? ".enc.z" : ""); break; case PSTORE_TYPE_CONSOLE: - scnprintf(name, sizeof(name), "console-%s-%lld", psname, id); + scnprintf(name, sizeof(name), "console-%s-%lld", + record->psi->name, record->id); break; case PSTORE_TYPE_FTRACE: - scnprintf(name, sizeof(name), "ftrace-%s-%lld", psname, id); + scnprintf(name, sizeof(name), "ftrace-%s-%lld", + record->psi->name, record->id); break; case PSTORE_TYPE_MCE: - scnprintf(name, sizeof(name), "mce-%s-%lld", psname, id); + scnprintf(name, sizeof(name), "mce-%s-%lld", + record->psi->name, record->id); break; case PSTORE_TYPE_PPC_RTAS: - scnprintf(name, sizeof(name), "rtas-%s-%lld", psname, id); + scnprintf(name, sizeof(name), "rtas-%s-%lld", + record->psi->name, record->id); break; case PSTORE_TYPE_PPC_OF: scnprintf(name, sizeof(name), "powerpc-ofw-%s-%lld", - psname, id); + record->psi->name, record->id); break; case PSTORE_TYPE_PPC_COMMON: scnprintf(name, sizeof(name), "powerpc-common-%s-%lld", - psname, id); + record->psi->name, record->id); break; case PSTORE_TYPE_PMSG: - scnprintf(name, sizeof(name), "pmsg-%s-%lld", psname, id); + scnprintf(name, sizeof(name), "pmsg-%s-%lld", + record->psi->name, record->id); break; case PSTORE_TYPE_PPC_OPAL: - sprintf(name, "powerpc-opal-%s-%lld", psname, id); + scnprintf(name, sizeof(name), "powerpc-opal-%s-%lld", + record->psi->name, record->id); break; case PSTORE_TYPE_UNKNOWN: - scnprintf(name, sizeof(name), "unknown-%s-%lld", psname, id); + scnprintf(name, sizeof(name), "unknown-%s-%lld", + record->psi->name, record->id); break; default: scnprintf(name, sizeof(name), "type%d-%s-%lld", - type, psname, id); + record->type, record->psi->name, record->id); break; } - inode_lock(d_inode(root)); - dentry = d_alloc_name(root, name); if (!dentry) - goto fail_lockedalloc; + goto fail_private; - memcpy(private->data, data, size); - inode->i_size = private->size = size; + inode->i_size = private->total_size = size; inode->i_private = private; - if (time.tv_sec) - inode->i_mtime = inode->i_ctime = time; + if (record->time.tv_sec) + inode->i_mtime = inode->i_ctime = record->time; d_add(dentry, inode); @@ -401,13 +412,10 @@ int pstore_mkfile(enum pstore_type_id type, char *psname, u64 id, int count, list_add(&private->list, &allpstore); spin_unlock_irqrestore(&allpstore_lock, flags); - inode_unlock(d_inode(root)); - return 0; -fail_lockedalloc: - inode_unlock(d_inode(root)); - kfree(private); +fail_private: + free_pstore_private(private); fail_alloc: iput(inode); @@ -415,6 +423,27 @@ fail: return rc; } +/* + * Read all the records from the persistent store. Create + * files in our filesystem. Don't warn about -EEXIST errors + * when we are re-scanning the backing store looking to add new + * error records. + */ +void pstore_get_records(int quiet) +{ + struct pstore_info *psi = psinfo; + struct dentry *root; + + if (!psi || !pstore_sb) + return; + + root = pstore_sb->s_root; + + inode_lock(d_inode(root)); + pstore_get_backend_records(psi, root, quiet); + inode_unlock(d_inode(root)); +} + static int pstore_fill_super(struct super_block *sb, void *data, int silent) { struct inode *inode; diff --git a/fs/pstore/internal.h b/fs/pstore/internal.h index da416e6591c9..c416e653dc4f 100644 --- a/fs/pstore/internal.h +++ b/fs/pstore/internal.h @@ -25,10 +25,10 @@ extern struct pstore_info *psinfo; extern void pstore_set_kmsg_bytes(int); extern void pstore_get_records(int); -extern int pstore_mkfile(enum pstore_type_id, char *psname, u64 id, - int count, char *data, bool compressed, - size_t size, struct timespec time, - struct pstore_info *psi); +extern void pstore_get_backend_records(struct pstore_info *psi, + struct dentry *root, int quiet); +extern int pstore_mkfile(struct dentry *root, + struct pstore_record *record); extern bool pstore_is_mounted(void); #endif diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c index efab7b64925b..d468eec9b8a6 100644 --- a/fs/pstore/platform.c +++ b/fs/pstore/platform.c @@ -267,7 +267,7 @@ static void free_zlib(void) big_oops_buf_sz = 0; } -static struct pstore_zbackend backend_zlib = { +static const struct pstore_zbackend backend_zlib = { .compress = compress_zlib, .decompress = decompress_zlib, .allocate = allocate_zlib, @@ -328,7 +328,7 @@ static void free_lzo(void) big_oops_buf_sz = 0; } -static struct pstore_zbackend backend_lzo = { +static const struct pstore_zbackend backend_lzo = { .compress = compress_lzo, .decompress = decompress_lzo, .allocate = allocate_lzo, @@ -393,7 +393,7 @@ static void free_lz4(void) big_oops_buf_sz = 0; } -static struct pstore_zbackend backend_lz4 = { +static const struct pstore_zbackend backend_lz4 = { .compress = compress_lz4, .decompress = decompress_lz4, .allocate = allocate_lz4, @@ -402,7 +402,7 @@ static struct pstore_zbackend backend_lz4 = { }; #endif -static struct pstore_zbackend *zbackend = +static const struct pstore_zbackend *zbackend = #if defined(CONFIG_PSTORE_ZLIB_COMPRESS) &backend_zlib; #elif defined(CONFIG_PSTORE_LZO_COMPRESS) @@ -484,7 +484,6 @@ static void pstore_dump(struct kmsg_dumper *dumper, { unsigned long total = 0; const char *why; - u64 id; unsigned int part = 1; unsigned long flags = 0; int is_locked; @@ -506,48 +505,59 @@ static void pstore_dump(struct kmsg_dumper *dumper, oopscount++; while (total < kmsg_bytes) { char *dst; - unsigned long size; - int hsize; + size_t dst_size; + int header_size; int zipped_len = -1; - size_t len; - bool compressed = false; - size_t total_len; + size_t dump_size; + struct pstore_record record = { + .type = PSTORE_TYPE_DMESG, + .count = oopscount, + .reason = reason, + .part = part, + .compressed = false, + .buf = psinfo->buf, + .psi = psinfo, + }; if (big_oops_buf && is_locked) { dst = big_oops_buf; - size = big_oops_buf_sz; + dst_size = big_oops_buf_sz; } else { dst = psinfo->buf; - size = psinfo->bufsize; + dst_size = psinfo->bufsize; } - hsize = sprintf(dst, "%s#%d Part%u\n", why, oopscount, part); - size -= hsize; + /* Write dump header. */ + header_size = snprintf(dst, dst_size, "%s#%d Part%u\n", why, + oopscount, part); + dst_size -= header_size; - if (!kmsg_dump_get_buffer(dumper, true, dst + hsize, - size, &len)) + /* Write dump contents. */ + if (!kmsg_dump_get_buffer(dumper, true, dst + header_size, + dst_size, &dump_size)) break; if (big_oops_buf && is_locked) { zipped_len = pstore_compress(dst, psinfo->buf, - hsize + len, psinfo->bufsize); + header_size + dump_size, + psinfo->bufsize); if (zipped_len > 0) { - compressed = true; - total_len = zipped_len; + record.compressed = true; + record.size = zipped_len; } else { - total_len = copy_kmsg_to_buffer(hsize, len); + record.size = copy_kmsg_to_buffer(header_size, + dump_size); } } else { - total_len = hsize + len; + record.size = header_size + dump_size; } - ret = psinfo->write(PSTORE_TYPE_DMESG, reason, &id, part, - oopscount, compressed, total_len, psinfo); + ret = psinfo->write(&record); if (ret == 0 && reason == KMSG_DUMP_OOPS && pstore_is_mounted()) pstore_new_entry = 1; - total += total_len; + total += record.size; part++; } if (is_locked) @@ -577,8 +587,11 @@ static void pstore_console_write(struct console *con, const char *s, unsigned c) const char *e = s + c; while (s < e) { + struct pstore_record record = { + .type = PSTORE_TYPE_CONSOLE, + .psi = psinfo, + }; unsigned long flags; - u64 id; if (c > psinfo->bufsize) c = psinfo->bufsize; @@ -589,8 +602,9 @@ static void pstore_console_write(struct console *con, const char *s, unsigned c) } else { spin_lock_irqsave(&psinfo->buf_lock, flags); } - psinfo->write_buf(PSTORE_TYPE_CONSOLE, 0, &id, 0, - s, 0, c, psinfo); + record.buf = (char *)s; + record.size = c; + psinfo->write(&record); spin_unlock_irqrestore(&psinfo->buf_lock, flags); s += c; c = e - s; @@ -618,48 +632,30 @@ static void pstore_register_console(void) {} static void pstore_unregister_console(void) {} #endif -static int pstore_write_compat(enum pstore_type_id type, - enum kmsg_dump_reason reason, - u64 *id, unsigned int part, int count, - bool compressed, size_t size, - struct pstore_info *psi) -{ - return psi->write_buf(type, reason, id, part, psinfo->buf, compressed, - size, psi); -} - -static int pstore_write_buf_user_compat(enum pstore_type_id type, - enum kmsg_dump_reason reason, - u64 *id, unsigned int part, - const char __user *buf, - bool compressed, size_t size, - struct pstore_info *psi) -{ - unsigned long flags = 0; - size_t i, bufsize = size; - long ret = 0; - - if (unlikely(!access_ok(VERIFY_READ, buf, size))) - return -EFAULT; - if (bufsize > psinfo->bufsize) - bufsize = psinfo->bufsize; - spin_lock_irqsave(&psinfo->buf_lock, flags); - for (i = 0; i < size; ) { - size_t c = min(size - i, bufsize); - - ret = __copy_from_user(psinfo->buf, buf + i, c); - if (unlikely(ret != 0)) { - ret = -EFAULT; - break; - } - ret = psi->write_buf(type, reason, id, part, psinfo->buf, - compressed, c, psi); - if (unlikely(ret < 0)) - break; - i += c; +static int pstore_write_user_compat(struct pstore_record *record, + const char __user *buf) +{ + int ret = 0; + + if (record->buf) + return -EINVAL; + + record->buf = kmalloc(record->size, GFP_KERNEL); + if (!record->buf) + return -ENOMEM; + + if (unlikely(copy_from_user(record->buf, buf, record->size))) { + ret = -EFAULT; + goto out; } - spin_unlock_irqrestore(&psinfo->buf_lock, flags); - return unlikely(ret < 0) ? ret : size; + + ret = record->psi->write(record); + +out: + kfree(record->buf); + record->buf = NULL; + + return unlikely(ret < 0) ? ret : record->size; } /* @@ -673,19 +669,35 @@ int pstore_register(struct pstore_info *psi) { struct module *owner = psi->owner; - if (backend && strcmp(backend, psi->name)) + if (backend && strcmp(backend, psi->name)) { + pr_warn("ignoring unexpected backend '%s'\n", psi->name); return -EPERM; + } + + /* Sanity check flags. */ + if (!psi->flags) { + pr_warn("backend '%s' must support at least one frontend\n", + psi->name); + return -EINVAL; + } + + /* Check for required functions. */ + if (!psi->read || !psi->write) { + pr_warn("backend '%s' must implement read() and write()\n", + psi->name); + return -EINVAL; + } spin_lock(&pstore_lock); if (psinfo) { + pr_warn("backend '%s' already loaded: ignoring '%s'\n", + psinfo->name, psi->name); spin_unlock(&pstore_lock); return -EBUSY; } - if (!psi->write) - psi->write = pstore_write_compat; - if (!psi->write_buf_user) - psi->write_buf_user = pstore_write_buf_user_compat; + if (!psi->write_user) + psi->write_user = pstore_write_user_compat; psinfo = psi; mutex_init(&psinfo->read_mutex); spin_unlock(&pstore_lock); @@ -709,6 +721,7 @@ int pstore_register(struct pstore_info *psi) if (psi->flags & PSTORE_FLAGS_PMSG) pstore_register_pmsg(); + /* Start watching for new records, if desired. */ if (pstore_update_ms >= 0) { pstore_timer.expires = jiffies + msecs_to_jiffies(pstore_update_ms); @@ -721,16 +734,21 @@ int pstore_register(struct pstore_info *psi) */ backend = psi->name; - module_put(owner); - pr_info("Registered %s as persistent store backend\n", psi->name); + module_put(owner); + return 0; } EXPORT_SYMBOL_GPL(pstore_register); void pstore_unregister(struct pstore_info *psi) { + /* Stop timer and make sure all work has finished. */ + pstore_update_ms = -1; + del_timer_sync(&pstore_timer); + flush_work(&pstore_work); + if (psi->flags & PSTORE_FLAGS_PMSG) pstore_unregister_pmsg(); if (psi->flags & PSTORE_FLAGS_FTRACE) @@ -747,66 +765,99 @@ void pstore_unregister(struct pstore_info *psi) } EXPORT_SYMBOL_GPL(pstore_unregister); +static void decompress_record(struct pstore_record *record) +{ + int unzipped_len; + char *decompressed; + + /* Only PSTORE_TYPE_DMESG support compression. */ + if (!record->compressed || record->type != PSTORE_TYPE_DMESG) { + pr_warn("ignored compressed record type %d\n", record->type); + return; + } + + /* No compression method has created the common buffer. */ + if (!big_oops_buf) { + pr_warn("no decompression buffer allocated\n"); + return; + } + + unzipped_len = pstore_decompress(record->buf, big_oops_buf, + record->size, big_oops_buf_sz); + if (unzipped_len <= 0) { + pr_err("decompression failed: %d\n", unzipped_len); + return; + } + + /* Build new buffer for decompressed contents. */ + decompressed = kmalloc(unzipped_len + record->ecc_notice_size, + GFP_KERNEL); + if (!decompressed) { + pr_err("decompression ran out of memory\n"); + return; + } + memcpy(decompressed, big_oops_buf, unzipped_len); + + /* Append ECC notice to decompressed buffer. */ + memcpy(decompressed + unzipped_len, record->buf + record->size, + record->ecc_notice_size); + + /* Swap out compresed contents with decompressed contents. */ + kfree(record->buf); + record->buf = decompressed; + record->size = unzipped_len; + record->compressed = false; +} + /* - * Read all the records from the persistent store. Create + * Read all the records from one persistent store backend. Create * files in our filesystem. Don't warn about -EEXIST errors * when we are re-scanning the backing store looking to add new * error records. */ -void pstore_get_records(int quiet) -{ - struct pstore_info *psi = psinfo; - char *buf = NULL; - ssize_t size; - u64 id; - int count; - enum pstore_type_id type; - struct timespec time; - int failed = 0, rc; - bool compressed; - int unzipped_len = -1; - ssize_t ecc_notice_size = 0; - - if (!psi) +void pstore_get_backend_records(struct pstore_info *psi, + struct dentry *root, int quiet) +{ + int failed = 0; + + if (!psi || !root) return; mutex_lock(&psi->read_mutex); if (psi->open && psi->open(psi)) goto out; - while ((size = psi->read(&id, &type, &count, &time, &buf, &compressed, - &ecc_notice_size, psi)) > 0) { - if (compressed && (type == PSTORE_TYPE_DMESG)) { - if (big_oops_buf) - unzipped_len = pstore_decompress(buf, - big_oops_buf, size, - big_oops_buf_sz); - - if (unzipped_len > 0) { - if (ecc_notice_size) - memcpy(big_oops_buf + unzipped_len, - buf + size, ecc_notice_size); - kfree(buf); - buf = big_oops_buf; - size = unzipped_len; - compressed = false; - } else { - pr_err("decompression failed;returned %d\n", - unzipped_len); - compressed = true; - } + /* + * Backend callback read() allocates record.buf. decompress_record() + * may reallocate record.buf. On success, pstore_mkfile() will keep + * the record.buf, so free it only on failure. + */ + for (;;) { + struct pstore_record *record; + int rc; + + record = kzalloc(sizeof(*record), GFP_KERNEL); + if (!record) { + pr_err("out of memory creating record\n"); + break; + } + record->psi = psi; + + record->size = psi->read(record); + + /* No more records left in backend? */ + if (record->size <= 0) + break; + + decompress_record(record); + rc = pstore_mkfile(root, record); + if (rc) { + /* pstore_mkfile() did not take record, so free it. */ + kfree(record->buf); + kfree(record); + if (rc != -EEXIST || !quiet) + failed++; } - rc = pstore_mkfile(type, psi->name, id, count, buf, - compressed, size + ecc_notice_size, - time, psi); - if (unzipped_len < 0) { - /* Free buffer other than big oops */ - kfree(buf); - buf = NULL; - } else - unzipped_len = -1; - if (rc && (rc != -EEXIST || !quiet)) - failed++; } if (psi->close) psi->close(psi); @@ -830,7 +881,9 @@ static void pstore_timefunc(unsigned long dummy) schedule_work(&pstore_work); } - mod_timer(&pstore_timer, jiffies + msecs_to_jiffies(pstore_update_ms)); + if (pstore_update_ms >= 0) + mod_timer(&pstore_timer, + jiffies + msecs_to_jiffies(pstore_update_ms)); } module_param(backend, charp, 0444); diff --git a/fs/pstore/pmsg.c b/fs/pstore/pmsg.c index 78f6176c020f..209755e0d7c8 100644 --- a/fs/pstore/pmsg.c +++ b/fs/pstore/pmsg.c @@ -15,7 +15,6 @@ #include <linux/device.h> #include <linux/fs.h> #include <linux/uaccess.h> -#include <linux/vmalloc.h> #include "internal.h" static DEFINE_MUTEX(pmsg_lock); @@ -23,19 +22,22 @@ static DEFINE_MUTEX(pmsg_lock); static ssize_t write_pmsg(struct file *file, const char __user *buf, size_t count, loff_t *ppos) { - u64 id; + struct pstore_record record = { + .type = PSTORE_TYPE_PMSG, + .size = count, + .psi = psinfo, + }; int ret; if (!count) return 0; - /* check outside lock, page in any data. write_buf_user also checks */ + /* check outside lock, page in any data. write_user also checks */ if (!access_ok(VERIFY_READ, buf, count)) return -EFAULT; mutex_lock(&pmsg_lock); - ret = psinfo->write_buf_user(PSTORE_TYPE_PMSG, 0, &id, 0, buf, 0, count, - psinfo); + ret = psinfo->write_user(&record, buf); mutex_unlock(&pmsg_lock); return ret ? ret : count; } diff --git a/fs/pstore/ram.c b/fs/pstore/ram.c index 11f918d34b1e..5523df7f17ef 100644 --- a/fs/pstore/ram.c +++ b/fs/pstore/ram.c @@ -235,35 +235,34 @@ static ssize_t ftrace_log_combine(struct persistent_ram_zone *dest, return 0; } -static ssize_t ramoops_pstore_read(u64 *id, enum pstore_type_id *type, - int *count, struct timespec *time, - char **buf, bool *compressed, - ssize_t *ecc_notice_size, - struct pstore_info *psi) +static ssize_t ramoops_pstore_read(struct pstore_record *record) { ssize_t size = 0; - struct ramoops_context *cxt = psi->data; + struct ramoops_context *cxt = record->psi->data; struct persistent_ram_zone *prz = NULL; int header_length = 0; bool free_prz = false; - /* Ramoops headers provide time stamps for PSTORE_TYPE_DMESG, but + /* + * Ramoops headers provide time stamps for PSTORE_TYPE_DMESG, but * PSTORE_TYPE_CONSOLE and PSTORE_TYPE_FTRACE don't currently have * valid time stamps, so it is initialized to zero. */ - time->tv_sec = 0; - time->tv_nsec = 0; - *compressed = false; + record->time.tv_sec = 0; + record->time.tv_nsec = 0; + record->compressed = false; /* Find the next valid persistent_ram_zone for DMESG */ while (cxt->dump_read_cnt < cxt->max_dump_cnt && !prz) { prz = ramoops_get_next_prz(cxt->dprzs, &cxt->dump_read_cnt, - cxt->max_dump_cnt, id, type, + cxt->max_dump_cnt, &record->id, + &record->type, PSTORE_TYPE_DMESG, 1); if (!prz_ok(prz)) continue; header_length = ramoops_read_kmsg_hdr(persistent_ram_old(prz), - time, compressed); + &record->time, + &record->compressed); /* Clear and skip this DMESG record if it has no valid header */ if (!header_length) { persistent_ram_free_old(prz); @@ -274,18 +273,20 @@ static ssize_t ramoops_pstore_read(u64 *id, enum pstore_type_id *type, if (!prz_ok(prz)) prz = ramoops_get_next_prz(&cxt->cprz, &cxt->console_read_cnt, - 1, id, type, PSTORE_TYPE_CONSOLE, 0); + 1, &record->id, &record->type, + PSTORE_TYPE_CONSOLE, 0); if (!prz_ok(prz)) prz = ramoops_get_next_prz(&cxt->mprz, &cxt->pmsg_read_cnt, - 1, id, type, PSTORE_TYPE_PMSG, 0); + 1, &record->id, &record->type, + PSTORE_TYPE_PMSG, 0); /* ftrace is last since it may want to dynamically allocate memory. */ if (!prz_ok(prz)) { if (!(cxt->flags & RAMOOPS_FLAG_FTRACE_PER_CPU)) { prz = ramoops_get_next_prz(cxt->fprzs, - &cxt->ftrace_read_cnt, 1, id, type, - PSTORE_TYPE_FTRACE, 0); + &cxt->ftrace_read_cnt, 1, &record->id, + &record->type, PSTORE_TYPE_FTRACE, 0); } else { /* * Build a new dummy record which combines all the @@ -302,8 +303,10 @@ static ssize_t ramoops_pstore_read(u64 *id, enum pstore_type_id *type, while (cxt->ftrace_read_cnt < cxt->max_ftrace_cnt) { prz_next = ramoops_get_next_prz(cxt->fprzs, &cxt->ftrace_read_cnt, - cxt->max_ftrace_cnt, id, - type, PSTORE_TYPE_FTRACE, 0); + cxt->max_ftrace_cnt, + &record->id, + &record->type, + PSTORE_TYPE_FTRACE, 0); if (!prz_ok(prz_next)) continue; @@ -316,7 +319,7 @@ static ssize_t ramoops_pstore_read(u64 *id, enum pstore_type_id *type, if (size) goto out; } - *id = 0; + record->id = 0; prz = tmp_prz; } } @@ -329,17 +332,19 @@ static ssize_t ramoops_pstore_read(u64 *id, enum pstore_type_id *type, size = persistent_ram_old_size(prz) - header_length; /* ECC correction notice */ - *ecc_notice_size = persistent_ram_ecc_string(prz, NULL, 0); + record->ecc_notice_size = persistent_ram_ecc_string(prz, NULL, 0); - *buf = kmalloc(size + *ecc_notice_size + 1, GFP_KERNEL); - if (*buf == NULL) { + record->buf = kmalloc(size + record->ecc_notice_size + 1, GFP_KERNEL); + if (record->buf == NULL) { size = -ENOMEM; goto out; } - memcpy(*buf, (char *)persistent_ram_old(prz) + header_length, size); + memcpy(record->buf, (char *)persistent_ram_old(prz) + header_length, + size); - persistent_ram_ecc_string(prz, *buf + size, *ecc_notice_size + 1); + persistent_ram_ecc_string(prz, record->buf + size, + record->ecc_notice_size + 1); out: if (free_prz) { @@ -373,23 +378,18 @@ static size_t ramoops_write_kmsg_hdr(struct persistent_ram_zone *prz, return len; } -static int notrace ramoops_pstore_write_buf(enum pstore_type_id type, - enum kmsg_dump_reason reason, - u64 *id, unsigned int part, - const char *buf, - bool compressed, size_t size, - struct pstore_info *psi) +static int notrace ramoops_pstore_write(struct pstore_record *record) { - struct ramoops_context *cxt = psi->data; + struct ramoops_context *cxt = record->psi->data; struct persistent_ram_zone *prz; - size_t hlen; + size_t size, hlen; - if (type == PSTORE_TYPE_CONSOLE) { + if (record->type == PSTORE_TYPE_CONSOLE) { if (!cxt->cprz) return -ENOMEM; - persistent_ram_write(cxt->cprz, buf, size); + persistent_ram_write(cxt->cprz, record->buf, record->size); return 0; - } else if (type == PSTORE_TYPE_FTRACE) { + } else if (record->type == PSTORE_TYPE_FTRACE) { int zonenum; if (!cxt->fprzs) @@ -402,33 +402,36 @@ static int notrace ramoops_pstore_write_buf(enum pstore_type_id type, else zonenum = 0; - persistent_ram_write(cxt->fprzs[zonenum], buf, size); + persistent_ram_write(cxt->fprzs[zonenum], record->buf, + record->size); return 0; - } else if (type == PSTORE_TYPE_PMSG) { + } else if (record->type == PSTORE_TYPE_PMSG) { pr_warn_ratelimited("PMSG shouldn't call %s\n", __func__); return -EINVAL; } - if (type != PSTORE_TYPE_DMESG) + if (record->type != PSTORE_TYPE_DMESG) return -EINVAL; - /* Out of the various dmesg dump types, ramoops is currently designed + /* + * Out of the various dmesg dump types, ramoops is currently designed * to only store crash logs, rather than storing general kernel logs. */ - if (reason != KMSG_DUMP_OOPS && - reason != KMSG_DUMP_PANIC) + if (record->reason != KMSG_DUMP_OOPS && + record->reason != KMSG_DUMP_PANIC) return -EINVAL; /* Skip Oopes when configured to do so. */ - if (reason == KMSG_DUMP_OOPS && !cxt->dump_oops) + if (record->reason == KMSG_DUMP_OOPS && !cxt->dump_oops) return -EINVAL; - /* Explicitly only take the first part of any new crash. + /* + * Explicitly only take the first part of any new crash. * If our buffer is larger than kmsg_bytes, this can never happen, * and if our buffer is smaller than kmsg_bytes, we don't want the * report split across multiple records. */ - if (part != 1) + if (record->part != 1) return -ENOSPC; if (!cxt->dprzs) @@ -436,53 +439,50 @@ static int notrace ramoops_pstore_write_buf(enum pstore_type_id type, prz = cxt->dprzs[cxt->dump_write_cnt]; - hlen = ramoops_write_kmsg_hdr(prz, compressed); + /* Build header and append record contents. */ + hlen = ramoops_write_kmsg_hdr(prz, record->compressed); + size = record->size; if (size + hlen > prz->buffer_size) size = prz->buffer_size - hlen; - persistent_ram_write(prz, buf, size); + persistent_ram_write(prz, record->buf, size); cxt->dump_write_cnt = (cxt->dump_write_cnt + 1) % cxt->max_dump_cnt; return 0; } -static int notrace ramoops_pstore_write_buf_user(enum pstore_type_id type, - enum kmsg_dump_reason reason, - u64 *id, unsigned int part, - const char __user *buf, - bool compressed, size_t size, - struct pstore_info *psi) +static int notrace ramoops_pstore_write_user(struct pstore_record *record, + const char __user *buf) { - if (type == PSTORE_TYPE_PMSG) { - struct ramoops_context *cxt = psi->data; + if (record->type == PSTORE_TYPE_PMSG) { + struct ramoops_context *cxt = record->psi->data; if (!cxt->mprz) return -ENOMEM; - return persistent_ram_write_user(cxt->mprz, buf, size); + return persistent_ram_write_user(cxt->mprz, buf, record->size); } return -EINVAL; } -static int ramoops_pstore_erase(enum pstore_type_id type, u64 id, int count, - struct timespec time, struct pstore_info *psi) +static int ramoops_pstore_erase(struct pstore_record *record) { - struct ramoops_context *cxt = psi->data; + struct ramoops_context *cxt = record->psi->data; struct persistent_ram_zone *prz; - switch (type) { + switch (record->type) { case PSTORE_TYPE_DMESG: - if (id >= cxt->max_dump_cnt) + if (record->id >= cxt->max_dump_cnt) return -EINVAL; - prz = cxt->dprzs[id]; + prz = cxt->dprzs[record->id]; break; case PSTORE_TYPE_CONSOLE: prz = cxt->cprz; break; case PSTORE_TYPE_FTRACE: - if (id >= cxt->max_ftrace_cnt) + if (record->id >= cxt->max_ftrace_cnt) return -EINVAL; - prz = cxt->fprzs[id]; + prz = cxt->fprzs[record->id]; break; case PSTORE_TYPE_PMSG: prz = cxt->mprz; @@ -503,8 +503,8 @@ static struct ramoops_context oops_cxt = { .name = "ramoops", .open = ramoops_pstore_open, .read = ramoops_pstore_read, - .write_buf = ramoops_pstore_write_buf, - .write_buf_user = ramoops_pstore_write_buf_user, + .write = ramoops_pstore_write, + .write_user = ramoops_pstore_write_user, .erase = ramoops_pstore_erase, }, }; diff --git a/fs/pstore/ram_core.c b/fs/pstore/ram_core.c index bc927e30bdcc..e11672aa4575 100644 --- a/fs/pstore/ram_core.c +++ b/fs/pstore/ram_core.c @@ -532,7 +532,7 @@ struct persistent_ram_zone *persistent_ram_new(phys_addr_t start, size_t size, } /* Initialize general buffer state. */ - prz->buffer_lock = __RAW_SPIN_LOCK_UNLOCKED(buffer_lock); + raw_spin_lock_init(&prz->buffer_lock); prz->flags = flags; ret = persistent_ram_buffer_map(start, size, prz, memtype); diff --git a/fs/super.c b/fs/super.c index b8b6a086c03b..adb0c0de428c 100644 --- a/fs/super.c +++ b/fs/super.c @@ -446,6 +446,10 @@ void generic_shutdown_super(struct super_block *sb) hlist_del_init(&sb->s_instances); spin_unlock(&sb_lock); up_write(&sb->s_umount); + if (sb->s_bdi != &noop_backing_dev_info) { + bdi_put(sb->s_bdi); + sb->s_bdi = &noop_backing_dev_info; + } } EXPORT_SYMBOL(generic_shutdown_super); @@ -1049,12 +1053,8 @@ static int set_bdev_super(struct super_block *s, void *data) { s->s_bdev = data; s->s_dev = s->s_bdev->bd_dev; + s->s_bdi = bdi_get(s->s_bdev->bd_bdi); - /* - * We set the bdi here to the queue backing, file systems can - * overwrite this in ->fill_super() - */ - s->s_bdi = bdev_get_queue(s->s_bdev)->backing_dev_info; return 0; } @@ -1256,6 +1256,49 @@ out: } /* + * Setup private BDI for given superblock. It gets automatically cleaned up + * in generic_shutdown_super(). + */ +int super_setup_bdi_name(struct super_block *sb, char *fmt, ...) +{ + struct backing_dev_info *bdi; + int err; + va_list args; + + bdi = bdi_alloc(GFP_KERNEL); + if (!bdi) + return -ENOMEM; + + bdi->name = sb->s_type->name; + + va_start(args, fmt); + err = bdi_register_va(bdi, fmt, args); + va_end(args); + if (err) { + bdi_put(bdi); + return err; + } + WARN_ON(sb->s_bdi != &noop_backing_dev_info); + sb->s_bdi = bdi; + + return 0; +} +EXPORT_SYMBOL(super_setup_bdi_name); + +/* + * Setup private BDI for given superblock. I gets automatically cleaned up + * in generic_shutdown_super(). + */ +int super_setup_bdi(struct super_block *sb) +{ + static atomic_long_t bdi_seq = ATOMIC_LONG_INIT(0); + + return super_setup_bdi_name(sb, "%.28s-%ld", sb->s_type->name, + atomic_long_inc_return(&bdi_seq)); +} +EXPORT_SYMBOL(super_setup_bdi); + +/* * This is an internal function, please use sb_end_{write,pagefault,intwrite} * instead. */ diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c index b73811bd7676..cf4cc99b75b5 100644 --- a/fs/ubifs/super.c +++ b/fs/ubifs/super.c @@ -1827,7 +1827,6 @@ static void ubifs_put_super(struct super_block *sb) } ubifs_umount(c); - bdi_destroy(&c->bdi); ubi_close_volume(c->ubi); mutex_unlock(&c->umount_mutex); } @@ -2019,29 +2018,25 @@ static int ubifs_fill_super(struct super_block *sb, void *data, int silent) goto out; } + err = ubifs_parse_options(c, data, 0); + if (err) + goto out_close; + /* * UBIFS provides 'backing_dev_info' in order to disable read-ahead. For * UBIFS, I/O is not deferred, it is done immediately in readpage, * which means the user would have to wait not just for their own I/O * but the read-ahead I/O as well i.e. completely pointless. * - * Read-ahead will be disabled because @c->bdi.ra_pages is 0. + * Read-ahead will be disabled because @sb->s_bdi->ra_pages is 0. Also + * @sb->s_bdi->capabilities are initialized to 0 so there won't be any + * writeback happening. */ - c->bdi.name = "ubifs", - c->bdi.capabilities = 0; - err = bdi_init(&c->bdi); + err = super_setup_bdi_name(sb, "ubifs_%d_%d", c->vi.ubi_num, + c->vi.vol_id); if (err) goto out_close; - err = bdi_register(&c->bdi, NULL, "ubifs_%d_%d", - c->vi.ubi_num, c->vi.vol_id); - if (err) - goto out_bdi; - - err = ubifs_parse_options(c, data, 0); - if (err) - goto out_bdi; - sb->s_bdi = &c->bdi; sb->s_fs_info = c; sb->s_magic = UBIFS_SUPER_MAGIC; sb->s_blocksize = UBIFS_BLOCK_SIZE; @@ -2080,8 +2075,6 @@ out_umount: ubifs_umount(c); out_unlock: mutex_unlock(&c->umount_mutex); -out_bdi: - bdi_destroy(&c->bdi); out_close: ubi_close_volume(c->ubi); out: diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h index 4d57e488038e..4da10a6d702a 100644 --- a/fs/ubifs/ubifs.h +++ b/fs/ubifs/ubifs.h @@ -972,7 +972,6 @@ struct ubifs_debug_info; * struct ubifs_info - UBIFS file-system description data structure * (per-superblock). * @vfs_sb: VFS @struct super_block object - * @bdi: backing device info object to make VFS happy and disable read-ahead * * @highest_inum: highest used inode number * @max_sqnum: current global sequence number @@ -1220,7 +1219,6 @@ struct ubifs_debug_info; */ struct ubifs_info { struct super_block *vfs_sb; - struct backing_dev_info bdi; ino_t highest_inum; unsigned long long max_sqnum; @@ -1461,7 +1459,6 @@ extern const struct inode_operations ubifs_file_inode_operations; extern const struct file_operations ubifs_dir_operations; extern const struct inode_operations ubifs_dir_inode_operations; extern const struct inode_operations ubifs_symlink_inode_operations; -extern struct backing_dev_info ubifs_backing_dev_info; extern struct ubifs_compressor *ubifs_compressors[UBIFS_COMPR_TYPES_CNT]; /* io.c */ diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c index 828532ce0adc..8795e9cd867c 100644 --- a/fs/xfs/xfs_bmap_util.c +++ b/fs/xfs/xfs_bmap_util.c @@ -81,7 +81,7 @@ xfs_zero_extent( return blkdev_issue_zeroout(xfs_find_bdev_for_inode(VFS_I(ip)), block << (mp->m_super->s_blocksize_bits - 9), count_fsb << (mp->m_super->s_blocksize_bits - 9), - GFP_NOFS, true); + GFP_NOFS, 0); } int |