From be3a5d233922d73f27002ce2767f6ec03c3f473d Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 23 Jun 2015 19:51:55 +0800 Subject: NFSv.2/pnfs Add a LAYOUTSTATS rpc function Reviewed-by: Jeff Layton Signed-off-by: Trond Myklebust Signed-off-by: Peng Tao Signed-off-by: Trond Myklebust --- fs/nfs/nfs42.h | 9 +++- fs/nfs/nfs42proc.c | 27 ++++++++++++ fs/nfs/nfs42xdr.c | 122 +++++++++++++++++++++++++++++++++++++++++++++++++++++ fs/nfs/nfs4_fs.h | 1 + fs/nfs/nfs4proc.c | 4 +- fs/nfs/nfs4xdr.c | 1 + 6 files changed, 161 insertions(+), 3 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs42.h b/fs/nfs/nfs42.h index 7afb8947dfdf..ff66ae700b89 100644 --- a/fs/nfs/nfs42.h +++ b/fs/nfs/nfs42.h @@ -5,11 +5,18 @@ #ifndef __LINUX_FS_NFS_NFS4_2_H #define __LINUX_FS_NFS_NFS4_2_H +/* + * FIXME: four LAYOUTSTATS calls per compound at most! Do we need to support + * more? Need to consider not to pre-alloc too much for a compound. + */ +#define PNFS_LAYOUTSTATS_MAXDEV (4) + /* nfs4.2proc.c */ int nfs42_proc_allocate(struct file *, loff_t, loff_t); int nfs42_proc_deallocate(struct file *, loff_t, loff_t); loff_t nfs42_proc_llseek(struct file *, loff_t, int); - +int nfs42_proc_layoutstats_generic(struct nfs_server *, + struct nfs42_layoutstat_data *); /* nfs4.2xdr.h */ extern struct rpc_procinfo nfs4_2_procedures[]; diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c index 3a9e75235f30..ac929685350b 100644 --- a/fs/nfs/nfs42proc.c +++ b/fs/nfs/nfs42proc.c @@ -165,3 +165,30 @@ loff_t nfs42_proc_llseek(struct file *filep, loff_t offset, int whence) return vfs_setpos(filep, res.sr_offset, inode->i_sb->s_maxbytes); } + +static const struct rpc_call_ops nfs42_layoutstat_ops = { +}; + +int nfs42_proc_layoutstats_generic(struct nfs_server *server, + struct nfs42_layoutstat_data *data) +{ + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LAYOUTSTATS], + .rpc_argp = &data->args, + .rpc_resp = &data->res, + }; + struct rpc_task_setup task_setup = { + .rpc_client = server->client, + .rpc_message = &msg, + .callback_ops = &nfs42_layoutstat_ops, + .callback_data = data, + .flags = RPC_TASK_ASYNC, + }; + struct rpc_task *task; + + nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 0); + task = rpc_run_task(&task_setup); + if (IS_ERR(task)) + return PTR_ERR(task); + return 0; +} diff --git a/fs/nfs/nfs42xdr.c b/fs/nfs/nfs42xdr.c index 1a25b27248f2..9aae0205ef11 100644 --- a/fs/nfs/nfs42xdr.c +++ b/fs/nfs/nfs42xdr.c @@ -4,6 +4,8 @@ #ifndef __LINUX_FS_NFS_NFS4_2XDR_H #define __LINUX_FS_NFS_NFS4_2XDR_H +#include "nfs42.h" + #define encode_fallocate_maxsz (encode_stateid_maxsz + \ 2 /* offset */ + \ 2 /* length */) @@ -22,6 +24,16 @@ 1 /* whence */ + \ 2 /* offset */ + \ 2 /* length */) +#define encode_io_info_maxsz 4 +#define encode_layoutstats_maxsz (op_decode_hdr_maxsz + \ + 2 /* offset */ + \ + 2 /* length */ + \ + encode_stateid_maxsz + \ + encode_io_info_maxsz + \ + encode_io_info_maxsz + \ + 1 /* opaque devaddr4 length */ + \ + XDR_QUADLEN(PNFS_LAYOUTSTATS_MAXSIZE)) +#define decode_layoutstats_maxsz (op_decode_hdr_maxsz) #define NFS4_enc_allocate_sz (compound_encode_hdr_maxsz + \ encode_putfh_maxsz + \ @@ -45,6 +57,14 @@ #define NFS4_dec_seek_sz (compound_decode_hdr_maxsz + \ decode_putfh_maxsz + \ decode_seek_maxsz) +#define NFS4_enc_layoutstats_sz (compound_encode_hdr_maxsz + \ + encode_sequence_maxsz + \ + encode_putfh_maxsz + \ + PNFS_LAYOUTSTATS_MAXDEV * encode_layoutstats_maxsz) +#define NFS4_dec_layoutstats_sz (compound_decode_hdr_maxsz + \ + decode_sequence_maxsz + \ + decode_putfh_maxsz + \ + PNFS_LAYOUTSTATS_MAXDEV * decode_layoutstats_maxsz) static void encode_fallocate(struct xdr_stream *xdr, @@ -81,6 +101,33 @@ static void encode_seek(struct xdr_stream *xdr, encode_uint32(xdr, args->sa_what); } +static void encode_layoutstats(struct xdr_stream *xdr, + struct nfs42_layoutstat_args *args, + struct nfs42_layoutstat_devinfo *devinfo, + struct compound_hdr *hdr) +{ + __be32 *p; + + encode_op_hdr(xdr, OP_LAYOUTSTATS, decode_layoutstats_maxsz, hdr); + p = reserve_space(xdr, 8 + 8); + p = xdr_encode_hyper(p, devinfo->offset); + p = xdr_encode_hyper(p, devinfo->length); + encode_nfs4_stateid(xdr, &args->stateid); + p = reserve_space(xdr, 4*8 + NFS4_DEVICEID4_SIZE + 4); + p = xdr_encode_hyper(p, devinfo->read_count); + p = xdr_encode_hyper(p, devinfo->read_bytes); + p = xdr_encode_hyper(p, devinfo->write_count); + p = xdr_encode_hyper(p, devinfo->write_bytes); + p = xdr_encode_opaque_fixed(p, devinfo->dev_id.data, + NFS4_DEVICEID4_SIZE); + /* Encode layoutupdate4 */ + *p++ = cpu_to_be32(devinfo->layout_type); + if (devinfo->layoutstats_encode != NULL) + devinfo->layoutstats_encode(xdr, args, devinfo); + else + encode_uint32(xdr, 0); +} + /* * Encode ALLOCATE request */ @@ -137,6 +184,28 @@ static void nfs4_xdr_enc_seek(struct rpc_rqst *req, encode_nops(&hdr); } +/* + * Encode LAYOUTSTATS request + */ +static void nfs4_xdr_enc_layoutstats(struct rpc_rqst *req, + struct xdr_stream *xdr, + struct nfs42_layoutstat_args *args) +{ + int i; + + struct compound_hdr hdr = { + .minorversion = nfs4_xdr_minorversion(&args->seq_args), + }; + + encode_compound_hdr(xdr, req, &hdr); + encode_sequence(xdr, &args->seq_args, &hdr); + encode_putfh(xdr, args->fh, &hdr); + WARN_ON(args->num_dev > PNFS_LAYOUTSTATS_MAXDEV); + for (i = 0; i < args->num_dev; i++) + encode_layoutstats(xdr, args, &args->devinfo[i], &hdr); + encode_nops(&hdr); +} + static int decode_allocate(struct xdr_stream *xdr, struct nfs42_falloc_res *res) { return decode_op_hdr(xdr, OP_ALLOCATE); @@ -169,6 +238,28 @@ out_overflow: return -EIO; } +static int decode_layoutstats(struct xdr_stream *xdr, + struct nfs42_layoutstat_res *res) +{ + int status; + __be32 *p; + + status = decode_op_hdr(xdr, OP_LAYOUTSTATS); + if (status) + return status; + + p = xdr_inline_decode(xdr, 4); + if (unlikely(!p)) + goto out_overflow; + + res->rpc_status = be32_to_cpup(p++); + return 0; + +out_overflow: + print_overflow_msg(__func__, xdr); + return -EIO; +} + /* * Decode ALLOCATE request */ @@ -246,4 +337,35 @@ static int nfs4_xdr_dec_seek(struct rpc_rqst *rqstp, out: return status; } + +/* + * Decode LAYOUTSTATS request + */ +static int nfs4_xdr_dec_layoutstats(struct rpc_rqst *rqstp, + struct xdr_stream *xdr, + struct nfs42_layoutstat_res *res) +{ + struct compound_hdr hdr; + int status, i; + + status = decode_compound_hdr(xdr, &hdr); + if (status) + goto out; + status = decode_sequence(xdr, &res->seq_res, rqstp); + if (status) + goto out; + status = decode_putfh(xdr); + if (status) + goto out; + WARN_ON(res->num_dev > PNFS_LAYOUTSTATS_MAXDEV); + for (i = 0; i < res->num_dev; i++) { + status = decode_layoutstats(xdr, res); + if (status) + goto out; + } +out: + res->rpc_status = status; + return status; +} + #endif /* __LINUX_FS_NFS_NFS4_2XDR_H */ diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index fdef424b0cd3..ea3bee919a76 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -233,6 +233,7 @@ extern int nfs4_handle_exception(struct nfs_server *, int, struct nfs4_exception extern int nfs4_call_sync(struct rpc_clnt *, struct nfs_server *, struct rpc_message *, struct nfs4_sequence_args *, struct nfs4_sequence_res *, int); +extern void nfs4_init_sequence(struct nfs4_sequence_args *, struct nfs4_sequence_res *, int); extern int nfs4_proc_setclientid(struct nfs_client *, u32, unsigned short, struct rpc_cred *, struct nfs4_setclientid_res *); extern int nfs4_proc_setclientid_confirm(struct nfs_client *, struct nfs4_setclientid_res *arg, struct rpc_cred *); extern int nfs4_proc_get_rootfh(struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *, bool); diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index e9cd45f1d60f..643ce3a91b22 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -476,8 +476,8 @@ struct nfs4_call_sync_data { struct nfs4_sequence_res *seq_res; }; -static void nfs4_init_sequence(struct nfs4_sequence_args *args, - struct nfs4_sequence_res *res, int cache_reply) +void nfs4_init_sequence(struct nfs4_sequence_args *args, + struct nfs4_sequence_res *res, int cache_reply) { args->sa_slot = NULL; args->sa_cache_this = cache_reply; diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index a07555d6968c..558cd65dbdb7 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -7431,6 +7431,7 @@ struct rpc_procinfo nfs4_procedures[] = { PROC(SEEK, enc_seek, dec_seek), PROC(ALLOCATE, enc_allocate, dec_allocate), PROC(DEALLOCATE, enc_deallocate, dec_deallocate), + PROC(LAYOUTSTATS, enc_layoutstats, dec_layoutstats), #endif /* CONFIG_NFS_V4_2 */ }; -- cgit v1.2.3-58-ga151 From 1b4a4bd82c308aed36f13fff4beb33d3840cfa0e Mon Sep 17 00:00:00 2001 From: Peng Tao Date: Tue, 23 Jun 2015 19:51:56 +0800 Subject: pNFS: fill in nfs42_layoutstat_ops Reviewed-by: Jeff Layton Signed-off-by: Peng Tao Signed-off-by: Trond Myklebust --- fs/nfs/nfs42proc.c | 47 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c index ac929685350b..a0530418255f 100644 --- a/fs/nfs/nfs42proc.c +++ b/fs/nfs/nfs42proc.c @@ -10,6 +10,11 @@ #include #include "nfs4_fs.h" #include "nfs42.h" +#include "iostat.h" +#include "pnfs.h" +#include "internal.h" + +#define NFSDBG_FACILITY NFSDBG_PNFS static int nfs42_set_rw_stateid(nfs4_stateid *dst, struct file *file, fmode_t fmode) @@ -166,7 +171,44 @@ loff_t nfs42_proc_llseek(struct file *filep, loff_t offset, int whence) return vfs_setpos(filep, res.sr_offset, inode->i_sb->s_maxbytes); } +static void +nfs42_layoutstat_prepare(struct rpc_task *task, void *calldata) +{ + struct nfs42_layoutstat_data *data = calldata; + struct nfs_server *server = NFS_SERVER(data->args.inode); + + nfs41_setup_sequence(nfs4_get_session(server), &data->args.seq_args, + &data->res.seq_res, task); +} + +static void +nfs42_layoutstat_done(struct rpc_task *task, void *calldata) +{ + struct nfs42_layoutstat_data *data = calldata; + + if (!nfs4_sequence_done(task, &data->res.seq_res)) + return; + + /* well, we don't care about errors at all! */ + if (task->tk_status) + dprintk("%s server returns %d\n", __func__, task->tk_status); +} + +static void +nfs42_layoutstat_release(void *calldata) +{ + struct nfs42_layoutstat_data *data = calldata; + + pnfs_put_layout_hdr(NFS_I(data->args.inode)->layout); + nfs_iput_and_deactive(data->inode); + kfree(data->args.devinfo); + kfree(data); +} + static const struct rpc_call_ops nfs42_layoutstat_ops = { + .rpc_call_prepare = nfs42_layoutstat_prepare, + .rpc_call_done = nfs42_layoutstat_done, + .rpc_release = nfs42_layoutstat_release, }; int nfs42_proc_layoutstats_generic(struct nfs_server *server, @@ -186,6 +228,11 @@ int nfs42_proc_layoutstats_generic(struct nfs_server *server, }; struct rpc_task *task; + data->inode = nfs_igrab_and_active(data->args.inode); + if (!data->inode) { + nfs42_layoutstat_release(data); + return -EAGAIN; + } nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 0); task = rpc_run_task(&task_setup); if (IS_ERR(task)) -- cgit v1.2.3-58-ga151 From 8733408d6ed713d080c325262d7b51a780136d41 Mon Sep 17 00:00:00 2001 From: Peng Tao Date: Tue, 23 Jun 2015 19:51:57 +0800 Subject: pnfs: add pnfs_report_layoutstat helper function Reviewed-by: Jeff Layton Signed-off-by: Peng Tao Signed-off-by: Trond Myklebust --- fs/nfs/nfs42proc.c | 4 ++++ fs/nfs/pnfs.c | 49 +++++++++++++++++++++++++++++++++++++++++++++++++ fs/nfs/pnfs.h | 3 +++ 3 files changed, 56 insertions(+) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c index a0530418255f..ee0248340a42 100644 --- a/fs/nfs/nfs42proc.c +++ b/fs/nfs/nfs42proc.c @@ -198,6 +198,10 @@ static void nfs42_layoutstat_release(void *calldata) { struct nfs42_layoutstat_data *data = calldata; + struct nfs_server *nfss = NFS_SERVER(data->args.inode); + + if (nfss->pnfs_curr_ld->cleanup_layoutstats) + nfss->pnfs_curr_ld->cleanup_layoutstats(data); pnfs_put_layout_hdr(NFS_I(data->args.inode)->layout); nfs_iput_and_deactive(data->inode); diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index d47c188682b1..6279cff7df74 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -35,6 +35,7 @@ #include "iostat.h" #include "nfs4trace.h" #include "delegation.h" +#include "nfs42.h" #define NFSDBG_FACILITY NFSDBG_PNFS #define PNFS_LAYOUTGET_RETRY_TIMEOUT (120*HZ) @@ -2250,3 +2251,51 @@ struct nfs4_threshold *pnfs_mdsthreshold_alloc(void) } return thp; } + +int +pnfs_report_layoutstat(struct inode *inode) +{ + struct pnfs_layoutdriver_type *ld = NFS_SERVER(inode)->pnfs_curr_ld; + struct nfs_server *server = NFS_SERVER(inode); + struct nfs42_layoutstat_data *data; + struct pnfs_layout_hdr *hdr; + int status = 0; + + if (!pnfs_enabled_sb(server) || !ld->prepare_layoutstats) + goto out; + + spin_lock(&inode->i_lock); + if (!NFS_I(inode)->layout) { + spin_unlock(&inode->i_lock); + goto out; + } + hdr = NFS_I(inode)->layout; + pnfs_get_layout_hdr(hdr); + spin_unlock(&inode->i_lock); + + data = kzalloc(sizeof(*data), GFP_KERNEL); + if (!data) { + status = -ENOMEM; + goto out_put; + } + + data->args.fh = NFS_FH(inode); + data->args.inode = inode; + nfs4_stateid_copy(&data->args.stateid, &hdr->plh_stateid); + status = ld->prepare_layoutstats(&data->args); + if (status) + goto out_free; + + status = nfs42_proc_layoutstats_generic(NFS_SERVER(inode), data); + +out: + dprintk("%s returns %d\n", __func__, status); + return status; + +out_free: + kfree(data); +out_put: + pnfs_put_layout_hdr(hdr); + goto out; +} +EXPORT_SYMBOL_GPL(pnfs_report_layoutstat); diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index 1e6308f82fc3..0a4723945db2 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -178,6 +178,8 @@ struct pnfs_layoutdriver_type { void (*encode_layoutcommit) (struct pnfs_layout_hdr *lo, struct xdr_stream *xdr, const struct nfs4_layoutcommit_args *args); + int (*prepare_layoutstats) (struct nfs42_layoutstat_args *args); + void (*cleanup_layoutstats) (struct nfs42_layoutstat_data *data); }; struct pnfs_layout_hdr { @@ -290,6 +292,7 @@ int pnfs_write_done_resend_to_mds(struct nfs_pgio_header *); struct nfs4_threshold *pnfs_mdsthreshold_alloc(void); void pnfs_error_mark_layout_for_return(struct inode *inode, struct pnfs_layout_segment *lseg); +int pnfs_report_layoutstat(struct inode *inode); /* nfs4_deviceid_flags */ enum { -- cgit v1.2.3-58-ga151 From 27797d1bb36bf9e21fa14094f23ab785b442bd76 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 23 Jun 2015 19:51:58 +0800 Subject: pNFS/flexfiles: Remove unused struct members user_name, group_name Reviewed-by: Jeff Layton Signed-off-by: Trond Myklebust --- fs/nfs/flexfilelayout/flexfilelayout.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/flexfilelayout/flexfilelayout.h b/fs/nfs/flexfilelayout/flexfilelayout.h index 070f20445b2d..275e4b735247 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.h +++ b/fs/nfs/flexfilelayout/flexfilelayout.h @@ -48,8 +48,6 @@ struct nfs4_ff_layout_mirror { u32 fh_versions_cnt; struct nfs_fh *fh_versions; nfs4_stateid stateid; - struct nfs4_string user_name; - struct nfs4_string group_name; u32 uid; u32 gid; struct rpc_cred *cred; -- cgit v1.2.3-58-ga151 From abcb7bfc9fdecd1550b98a5e13d40bfba6c0649b Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 23 Jun 2015 19:51:59 +0800 Subject: pNFS/flexfiles: add layoutstats tracking Reviewed-by: Jeff Layton Signed-off-by: Trond Myklebust --- fs/nfs/flexfilelayout/flexfilelayout.c | 192 +++++++++++++++++++++++++++++++-- fs/nfs/flexfilelayout/flexfilelayout.h | 22 ++++ 2 files changed, 203 insertions(+), 11 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c index 0f1410c94827..8ac31fbf040a 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.c +++ b/fs/nfs/flexfilelayout/flexfilelayout.c @@ -412,6 +412,130 @@ ff_layout_get_lseg_count(struct nfs4_ff_layout_segment *fls) return 1; } +static void +nfs4_ff_start_busy_timer(struct nfs4_ff_busy_timer *timer) +{ + ktime_t old, new; + + /* + * Note: careful here! + * If the counter is zero, then we must not increment it until after + * we've set the start_time. + * If we were instead to use atomic_inc_return(), then another + * request might come in, bump, and then call end_busy_timer() + * before we've set the timer->start_time. + */ + old = timer->start_time; + if (atomic_inc_not_zero(&timer->n_ops) == 0) { + new = ktime_get(); + cmpxchg(&timer->start_time.tv64, old.tv64, new.tv64); + atomic_inc(&timer->n_ops); + } +} + +static ktime_t +nfs4_ff_end_busy_timer(struct nfs4_ff_busy_timer *timer) +{ + ktime_t start, now; + + now = ktime_get(); + start.tv64 = xchg(&timer->start_time.tv64, now.tv64); + atomic_dec(&timer->n_ops); + return ktime_sub(now, start); +} + +static ktime_t +nfs4_ff_layout_calc_completion_time(struct rpc_task *task) +{ + return ktime_sub(ktime_get(), task->tk_start); +} + +static void +nfs4_ff_layoutstat_start_io(struct nfs4_ff_layoutstat *layoutstat) +{ + nfs4_ff_start_busy_timer(&layoutstat->busy_timer); +} + +static void +nfs4_ff_layout_stat_io_update_requested(struct nfs4_ff_layoutstat *layoutstat, + __u64 requested) +{ + struct nfs4_ff_io_stat *iostat = &layoutstat->io_stat; + + iostat->ops_requested++; + iostat->bytes_requested += requested; +} + +static void +nfs4_ff_layout_stat_io_update_completed(struct nfs4_ff_layoutstat *layoutstat, + __u64 requested, + __u64 completed, + ktime_t time_completed) +{ + struct nfs4_ff_io_stat *iostat = &layoutstat->io_stat; + ktime_t timer; + + iostat->ops_completed++; + iostat->bytes_completed += completed; + iostat->bytes_not_delivered += requested - completed; + + timer = nfs4_ff_end_busy_timer(&layoutstat->busy_timer); + iostat->total_busy_time = + ktime_add(iostat->total_busy_time, timer); + iostat->aggregate_completion_time = + ktime_add(iostat->aggregate_completion_time, time_completed); +} + +static void +nfs4_ff_layout_stat_io_start_read(struct nfs4_ff_layout_mirror *mirror, + __u64 requested) +{ + spin_lock(&mirror->lock); + nfs4_ff_layoutstat_start_io(&mirror->read_stat); + nfs4_ff_layout_stat_io_update_requested(&mirror->read_stat, requested); + spin_unlock(&mirror->lock); +} + +static void +nfs4_ff_layout_stat_io_end_read(struct rpc_task *task, + struct nfs4_ff_layout_mirror *mirror, + __u64 requested, + __u64 completed) +{ + spin_lock(&mirror->lock); + nfs4_ff_layout_stat_io_update_completed(&mirror->read_stat, + requested, completed, + nfs4_ff_layout_calc_completion_time(task)); + spin_unlock(&mirror->lock); +} + +static void +nfs4_ff_layout_stat_io_start_write(struct nfs4_ff_layout_mirror *mirror, + __u64 requested) +{ + spin_lock(&mirror->lock); + nfs4_ff_layoutstat_start_io(&mirror->write_stat); + nfs4_ff_layout_stat_io_update_requested(&mirror->write_stat, requested); + spin_unlock(&mirror->lock); +} + +static void +nfs4_ff_layout_stat_io_end_write(struct rpc_task *task, + struct nfs4_ff_layout_mirror *mirror, + __u64 requested, + __u64 completed, + enum nfs3_stable_how committed) +{ + if (committed == NFS_UNSTABLE) + requested = completed = 0; + + spin_lock(&mirror->lock); + nfs4_ff_layout_stat_io_update_completed(&mirror->write_stat, + requested, completed, + nfs4_ff_layout_calc_completion_time(task)); + spin_unlock(&mirror->lock); +} + static int ff_layout_alloc_commit_info(struct pnfs_layout_segment *lseg, struct nfs_commit_info *cinfo, @@ -906,6 +1030,10 @@ ff_layout_reset_to_mds(struct pnfs_layout_segment *lseg, int idx) static int ff_layout_read_prepare_common(struct rpc_task *task, struct nfs_pgio_header *hdr) { + nfs4_ff_layout_stat_io_start_read( + FF_LAYOUT_COMP(hdr->lseg, hdr->pgio_mirror_idx), + hdr->args.count); + if (unlikely(test_bit(NFS_CONTEXT_BAD, &hdr->args.context->flags))) { rpc_exit(task, -EIO); return -EIO; @@ -959,15 +1087,15 @@ static void ff_layout_read_prepare_v4(struct rpc_task *task, void *data) { struct nfs_pgio_header *hdr = data; - if (ff_layout_read_prepare_common(task, hdr)) - return; - if (ff_layout_setup_sequence(hdr->ds_clp, &hdr->args.seq_args, &hdr->res.seq_res, task)) return; + if (ff_layout_read_prepare_common(task, hdr)) + return; + if (nfs4_set_rw_stateid(&hdr->args.stateid, hdr->args.context, hdr->args.lock_context, FMODE_READ) == -EIO) rpc_exit(task, -EIO); /* lost lock, terminate I/O */ @@ -979,6 +1107,10 @@ static void ff_layout_read_call_done(struct rpc_task *task, void *data) dprintk("--> %s task->tk_status %d\n", __func__, task->tk_status); + nfs4_ff_layout_stat_io_end_read(task, + FF_LAYOUT_COMP(hdr->lseg, hdr->pgio_mirror_idx), + hdr->args.count, hdr->res.count); + if (test_bit(NFS_IOHDR_REDO, &hdr->flags) && task->tk_status == 0) { nfs4_sequence_done(task, &hdr->res.seq_res); @@ -1080,6 +1212,10 @@ static int ff_layout_commit_done_cb(struct rpc_task *task, static int ff_layout_write_prepare_common(struct rpc_task *task, struct nfs_pgio_header *hdr) { + nfs4_ff_layout_stat_io_start_write( + FF_LAYOUT_COMP(hdr->lseg, hdr->pgio_mirror_idx), + hdr->args.count); + if (unlikely(test_bit(NFS_CONTEXT_BAD, &hdr->args.context->flags))) { rpc_exit(task, -EIO); return -EIO; @@ -1113,15 +1249,15 @@ static void ff_layout_write_prepare_v4(struct rpc_task *task, void *data) { struct nfs_pgio_header *hdr = data; - if (ff_layout_write_prepare_common(task, hdr)) - return; - if (ff_layout_setup_sequence(hdr->ds_clp, &hdr->args.seq_args, &hdr->res.seq_res, task)) return; + if (ff_layout_write_prepare_common(task, hdr)) + return; + if (nfs4_set_rw_stateid(&hdr->args.stateid, hdr->args.context, hdr->args.lock_context, FMODE_WRITE) == -EIO) rpc_exit(task, -EIO); /* lost lock, terminate I/O */ @@ -1131,6 +1267,11 @@ static void ff_layout_write_call_done(struct rpc_task *task, void *data) { struct nfs_pgio_header *hdr = data; + nfs4_ff_layout_stat_io_end_write(task, + FF_LAYOUT_COMP(hdr->lseg, hdr->pgio_mirror_idx), + hdr->args.count, hdr->res.count, + hdr->res.verf->committed); + if (test_bit(NFS_IOHDR_REDO, &hdr->flags) && task->tk_status == 0) { nfs4_sequence_done(task, &hdr->res.seq_res); @@ -1149,8 +1290,17 @@ static void ff_layout_write_count_stats(struct rpc_task *task, void *data) &NFS_CLIENT(hdr->inode)->cl_metrics[NFSPROC4_CLNT_WRITE]); } +static void ff_layout_commit_prepare_common(struct rpc_task *task, + struct nfs_commit_data *cdata) +{ + nfs4_ff_layout_stat_io_start_write( + FF_LAYOUT_COMP(cdata->lseg, cdata->ds_commit_index), + 0); +} + static void ff_layout_commit_prepare_v3(struct rpc_task *task, void *data) { + ff_layout_commit_prepare_common(task, data); rpc_call_start(task); } @@ -1158,10 +1308,30 @@ static void ff_layout_commit_prepare_v4(struct rpc_task *task, void *data) { struct nfs_commit_data *wdata = data; - ff_layout_setup_sequence(wdata->ds_clp, + if (ff_layout_setup_sequence(wdata->ds_clp, &wdata->args.seq_args, &wdata->res.seq_res, - task); + task)) + return; + ff_layout_commit_prepare_common(task, data); +} + +static void ff_layout_commit_done(struct rpc_task *task, void *data) +{ + struct nfs_commit_data *cdata = data; + struct nfs_page *req; + __u64 count = 0; + + if (task->tk_status == 0) { + list_for_each_entry(req, &cdata->pages, wb_list) + count += req->wb_bytes; + } + + nfs4_ff_layout_stat_io_end_write(task, + FF_LAYOUT_COMP(cdata->lseg, cdata->ds_commit_index), + count, count, NFS_FILE_SYNC); + + pnfs_generic_write_commit_done(task, data); } static void ff_layout_commit_count_stats(struct rpc_task *task, void *data) @@ -1202,14 +1372,14 @@ static const struct rpc_call_ops ff_layout_write_call_ops_v4 = { static const struct rpc_call_ops ff_layout_commit_call_ops_v3 = { .rpc_call_prepare = ff_layout_commit_prepare_v3, - .rpc_call_done = pnfs_generic_write_commit_done, + .rpc_call_done = ff_layout_commit_done, .rpc_count_stats = ff_layout_commit_count_stats, .rpc_release = pnfs_generic_commit_release, }; static const struct rpc_call_ops ff_layout_commit_call_ops_v4 = { .rpc_call_prepare = ff_layout_commit_prepare_v4, - .rpc_call_done = pnfs_generic_write_commit_done, + .rpc_call_done = ff_layout_commit_done, .rpc_count_stats = ff_layout_commit_count_stats, .rpc_release = pnfs_generic_commit_release, }; @@ -1253,7 +1423,6 @@ ff_layout_read_pagelist(struct nfs_pgio_header *hdr) fh = nfs4_ff_layout_select_ds_fh(lseg, idx); if (fh) hdr->args.fh = fh; - /* * Note that if we ever decide to split across DSes, * then we may need to handle dense-like offsets. @@ -1382,6 +1551,7 @@ static int ff_layout_initiate_commit(struct nfs_commit_data *data, int how) fh = select_ds_fh_from_commit(lseg, data->ds_commit_index); if (fh) data->args.fh = fh; + return nfs_initiate_commit(ds_clnt, data, ds->ds_clp->rpc_ops, vers == 3 ? &ff_layout_commit_call_ops_v3 : &ff_layout_commit_call_ops_v4, diff --git a/fs/nfs/flexfilelayout/flexfilelayout.h b/fs/nfs/flexfilelayout/flexfilelayout.h index 275e4b735247..f7493f7cf13c 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.h +++ b/fs/nfs/flexfilelayout/flexfilelayout.h @@ -41,6 +41,26 @@ struct nfs4_ff_layout_ds_err { struct nfs4_deviceid deviceid; }; +struct nfs4_ff_io_stat { + __u64 ops_requested; + __u64 bytes_requested; + __u64 ops_completed; + __u64 bytes_completed; + __u64 bytes_not_delivered; + ktime_t total_busy_time; + ktime_t aggregate_completion_time; +}; + +struct nfs4_ff_busy_timer { + ktime_t start_time; + atomic_t n_ops; +}; + +struct nfs4_ff_layoutstat { + struct nfs4_ff_io_stat io_stat; + struct nfs4_ff_busy_timer busy_timer; +}; + struct nfs4_ff_layout_mirror { u32 ds_count; u32 efficiency; @@ -52,6 +72,8 @@ struct nfs4_ff_layout_mirror { u32 gid; struct rpc_cred *cred; spinlock_t lock; + struct nfs4_ff_layoutstat read_stat; + struct nfs4_ff_layoutstat write_stat; }; struct nfs4_ff_layout_segment { -- cgit v1.2.3-58-ga151 From d983803d3818a7aef3200147c72a45f573d84537 Mon Sep 17 00:00:00 2001 From: Peng Tao Date: Tue, 23 Jun 2015 19:52:00 +0800 Subject: pNFS/flexfiles: track when layout is first used So that we can report cumulative time since the beginning of statistics collection of the layout. Reviewed-by: Jeff Layton Signed-off-by: Peng Tao Signed-off-by: Trond Myklebust --- fs/nfs/flexfilelayout/flexfilelayout.c | 10 +++++++--- fs/nfs/flexfilelayout/flexfilelayout.h | 1 + 2 files changed, 8 insertions(+), 3 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c index 8ac31fbf040a..4215278c9c76 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.c +++ b/fs/nfs/flexfilelayout/flexfilelayout.c @@ -451,9 +451,13 @@ nfs4_ff_layout_calc_completion_time(struct rpc_task *task) } static void -nfs4_ff_layoutstat_start_io(struct nfs4_ff_layoutstat *layoutstat) +nfs4_ff_layoutstat_start_io(struct nfs4_ff_layout_mirror *mirror, + struct nfs4_ff_layoutstat *layoutstat) { + static const ktime_t notime = {0}; + nfs4_ff_start_busy_timer(&layoutstat->busy_timer); + cmpxchg(&mirror->start_time, notime, ktime_get()); } static void @@ -491,7 +495,7 @@ nfs4_ff_layout_stat_io_start_read(struct nfs4_ff_layout_mirror *mirror, __u64 requested) { spin_lock(&mirror->lock); - nfs4_ff_layoutstat_start_io(&mirror->read_stat); + nfs4_ff_layoutstat_start_io(mirror, &mirror->read_stat); nfs4_ff_layout_stat_io_update_requested(&mirror->read_stat, requested); spin_unlock(&mirror->lock); } @@ -514,7 +518,7 @@ nfs4_ff_layout_stat_io_start_write(struct nfs4_ff_layout_mirror *mirror, __u64 requested) { spin_lock(&mirror->lock); - nfs4_ff_layoutstat_start_io(&mirror->write_stat); + nfs4_ff_layoutstat_start_io(mirror, &mirror->write_stat); nfs4_ff_layout_stat_io_update_requested(&mirror->write_stat, requested); spin_unlock(&mirror->lock); } diff --git a/fs/nfs/flexfilelayout/flexfilelayout.h b/fs/nfs/flexfilelayout/flexfilelayout.h index f7493f7cf13c..0e7366f44d11 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.h +++ b/fs/nfs/flexfilelayout/flexfilelayout.h @@ -74,6 +74,7 @@ struct nfs4_ff_layout_mirror { spinlock_t lock; struct nfs4_ff_layoutstat read_stat; struct nfs4_ff_layoutstat write_stat; + ktime_t start_time; }; struct nfs4_ff_layout_segment { -- cgit v1.2.3-58-ga151 From ad4dc53e6496ba0b3c18aeda6f3367d9800b6ebf Mon Sep 17 00:00:00 2001 From: Peng Tao Date: Tue, 23 Jun 2015 19:52:01 +0800 Subject: pnfs/flexfiles: add ff_layout_prepare_layoutstats It fills in the generic part of LAYOUTSTATS call. One thing to note is that we don't really track if IO is continuous or not. So just fake to use the completed bytes for it. Still missing flexfiles specific part, which will be included in the next patch. Reviewed-by: Jeff Layton Signed-off-by: Peng Tao Signed-off-by: Trond Myklebust --- fs/nfs/flexfilelayout/flexfilelayout.c | 71 ++++++++++++++++++++++++++++++++++ 1 file changed, 71 insertions(+) (limited to 'fs/nfs') diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c index 4215278c9c76..cd999af504d9 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.c +++ b/fs/nfs/flexfilelayout/flexfilelayout.c @@ -20,6 +20,7 @@ #include "../nfs4trace.h" #include "../iostat.h" #include "../nfs.h" +#include "../nfs42.h" #define NFSDBG_FACILITY NFSDBG_PNFS_LD @@ -1659,6 +1660,75 @@ out: dprintk("%s: Return\n", __func__); } +static bool +ff_layout_mirror_prepare_stats(struct nfs42_layoutstat_args *args, + struct pnfs_layout_segment *pls, + int *dev_count, int dev_limit) +{ + struct nfs4_ff_layout_mirror *mirror; + struct nfs4_deviceid_node *dev; + struct nfs42_layoutstat_devinfo *devinfo; + int i; + + for (i = 0; i <= FF_LAYOUT_MIRROR_COUNT(pls); i++) { + if (*dev_count >= dev_limit) + break; + mirror = FF_LAYOUT_COMP(pls, i); + dev = FF_LAYOUT_DEVID_NODE(pls, i); + devinfo = &args->devinfo[*dev_count]; + memcpy(&devinfo->dev_id, &dev->deviceid, NFS4_DEVICEID4_SIZE); + devinfo->offset = pls->pls_range.offset; + devinfo->length = pls->pls_range.length; + /* well, we don't really know if IO is continuous or not! */ + devinfo->read_count = mirror->read_stat.io_stat.bytes_completed; + devinfo->read_bytes = mirror->read_stat.io_stat.bytes_completed; + devinfo->write_count = mirror->write_stat.io_stat.bytes_completed; + devinfo->write_bytes = mirror->write_stat.io_stat.bytes_completed; + devinfo->layout_type = LAYOUT_FLEX_FILES; + devinfo->layoutstats_encode = NULL; + devinfo->layout_private = NULL; + + ++(*dev_count); + } + + return *dev_count < dev_limit; +} + +static int +ff_layout_prepare_layoutstats(struct nfs42_layoutstat_args *args) +{ + struct pnfs_layout_segment *pls; + int dev_count = 0; + + spin_lock(&args->inode->i_lock); + list_for_each_entry(pls, &NFS_I(args->inode)->layout->plh_segs, pls_list) { + dev_count += FF_LAYOUT_MIRROR_COUNT(pls); + } + spin_unlock(&args->inode->i_lock); + /* For now, send at most PNFS_LAYOUTSTATS_MAXDEV statistics */ + if (dev_count > PNFS_LAYOUTSTATS_MAXDEV) { + dprintk("%s: truncating devinfo to limit (%d:%d)\n", + __func__, dev_count, PNFS_LAYOUTSTATS_MAXDEV); + dev_count = PNFS_LAYOUTSTATS_MAXDEV; + } + args->devinfo = kmalloc(dev_count * sizeof(*args->devinfo), GFP_KERNEL); + if (!args->devinfo) + return -ENOMEM; + + dev_count = 0; + spin_lock(&args->inode->i_lock); + list_for_each_entry(pls, &NFS_I(args->inode)->layout->plh_segs, pls_list) { + if (!ff_layout_mirror_prepare_stats(args, pls, &dev_count, + PNFS_LAYOUTSTATS_MAXDEV)) { + break; + } + } + spin_unlock(&args->inode->i_lock); + args->num_dev = dev_count; + + return 0; +} + static struct pnfs_layoutdriver_type flexfilelayout_type = { .id = LAYOUT_FLEX_FILES, .name = "LAYOUT_FLEX_FILES", @@ -1681,6 +1751,7 @@ static struct pnfs_layoutdriver_type flexfilelayout_type = { .alloc_deviceid_node = ff_layout_alloc_deviceid_node, .encode_layoutreturn = ff_layout_encode_layoutreturn, .sync = pnfs_nfs_generic_sync, + .prepare_layoutstats = ff_layout_prepare_layoutstats, }; static int __init nfs4flexfilelayout_init(void) -- cgit v1.2.3-58-ga151 From 27c430644369ccd9a2272492ba6d0e85e2e4800b Mon Sep 17 00:00:00 2001 From: Peng Tao Date: Tue, 23 Jun 2015 19:52:02 +0800 Subject: pnfs/flexfiles: encode LAYOUTSTATS flexfiles specific data Reviewed-by: Jeff Layton Signed-off-by: Peng Tao Signed-off-by: Trond Myklebust --- fs/nfs/flexfilelayout/flexfilelayout.c | 178 ++++++++++++++++++++++++++++++++- fs/nfs/flexfilelayout/flexfilelayout.h | 1 + 2 files changed, 177 insertions(+), 2 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c index cd999af504d9..6bcb6d6c9dc3 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.c +++ b/fs/nfs/flexfilelayout/flexfilelayout.c @@ -272,6 +272,7 @@ ff_layout_alloc_lseg(struct pnfs_layout_hdr *lh, spin_lock_init(&fls->mirror_array[i]->lock); fls->mirror_array[i]->ds_count = ds_count; + fls->mirror_array[i]->lseg = &fls->generic_hdr; /* deviceid */ rc = decode_deviceid(&stream, &devid); @@ -1660,6 +1661,161 @@ out: dprintk("%s: Return\n", __func__); } +static int +ff_layout_ntop4(const struct sockaddr *sap, char *buf, const size_t buflen) +{ + const struct sockaddr_in *sin = (struct sockaddr_in *)sap; + + return snprintf(buf, buflen, "%pI4", &sin->sin_addr); +} + +static size_t +ff_layout_ntop6_noscopeid(const struct sockaddr *sap, char *buf, + const int buflen) +{ + const struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)sap; + const struct in6_addr *addr = &sin6->sin6_addr; + + /* + * RFC 4291, Section 2.2.2 + * + * Shorthanded ANY address + */ + if (ipv6_addr_any(addr)) + return snprintf(buf, buflen, "::"); + + /* + * RFC 4291, Section 2.2.2 + * + * Shorthanded loopback address + */ + if (ipv6_addr_loopback(addr)) + return snprintf(buf, buflen, "::1"); + + /* + * RFC 4291, Section 2.2.3 + * + * Special presentation address format for mapped v4 + * addresses. + */ + if (ipv6_addr_v4mapped(addr)) + return snprintf(buf, buflen, "::ffff:%pI4", + &addr->s6_addr32[3]); + + /* + * RFC 4291, Section 2.2.1 + */ + return snprintf(buf, buflen, "%pI6c", addr); +} + +/* Derived from rpc_sockaddr2uaddr */ +static void +ff_layout_encode_netaddr(struct xdr_stream *xdr, struct nfs4_pnfs_ds_addr *da) +{ + struct sockaddr *sap = (struct sockaddr *)&da->da_addr; + char portbuf[RPCBIND_MAXUADDRPLEN]; + char addrbuf[RPCBIND_MAXUADDRLEN]; + char *netid; + unsigned short port; + int len, netid_len; + __be32 *p; + + switch (sap->sa_family) { + case AF_INET: + if (ff_layout_ntop4(sap, addrbuf, sizeof(addrbuf)) == 0) + return; + port = ntohs(((struct sockaddr_in *)sap)->sin_port); + netid = "tcp"; + netid_len = 3; + break; + case AF_INET6: + if (ff_layout_ntop6_noscopeid(sap, addrbuf, sizeof(addrbuf)) == 0) + return; + port = ntohs(((struct sockaddr_in6 *)sap)->sin6_port); + netid = "tcp6"; + netid_len = 4; + break; + default: + /* we only support tcp and tcp6 */ + WARN_ON_ONCE(1); + return; + } + + snprintf(portbuf, sizeof(portbuf), ".%u.%u", port >> 8, port & 0xff); + len = strlcat(addrbuf, portbuf, sizeof(addrbuf)); + + p = xdr_reserve_space(xdr, 4 + netid_len); + xdr_encode_opaque(p, netid, netid_len); + + p = xdr_reserve_space(xdr, 4 + len); + xdr_encode_opaque(p, addrbuf, len); +} + +static void +ff_layout_encode_nfstime(struct xdr_stream *xdr, + ktime_t t) +{ + struct timespec64 ts; + __be32 *p; + + p = xdr_reserve_space(xdr, 12); + ts = ktime_to_timespec64(t); + p = xdr_encode_hyper(p, ts.tv_sec); + *p++ = cpu_to_be32(ts.tv_nsec); +} + +static void +ff_layout_encode_io_latency(struct xdr_stream *xdr, + struct nfs4_ff_io_stat *stat) +{ + __be32 *p; + + p = xdr_reserve_space(xdr, 5 * 8); + p = xdr_encode_hyper(p, stat->ops_requested); + p = xdr_encode_hyper(p, stat->bytes_requested); + p = xdr_encode_hyper(p, stat->ops_completed); + p = xdr_encode_hyper(p, stat->bytes_completed); + p = xdr_encode_hyper(p, stat->bytes_not_delivered); + ff_layout_encode_nfstime(xdr, stat->total_busy_time); + ff_layout_encode_nfstime(xdr, stat->aggregate_completion_time); +} + +static void +ff_layout_encode_layoutstats(struct xdr_stream *xdr, + struct nfs42_layoutstat_args *args, + struct nfs42_layoutstat_devinfo *devinfo) +{ + struct nfs4_ff_layout_mirror *mirror = devinfo->layout_private; + struct nfs4_pnfs_ds_addr *da; + struct nfs4_pnfs_ds *ds = mirror->mirror_ds->ds; + struct nfs_fh *fh = &mirror->fh_versions[0]; + __be32 *p, *start; + + da = list_first_entry(&ds->ds_addrs, struct nfs4_pnfs_ds_addr, da_node); + dprintk("%s: DS %s: encoding address %s\n", + __func__, ds->ds_remotestr, da->da_remotestr); + /* layoutupdate length */ + start = xdr_reserve_space(xdr, 4); + /* netaddr4 */ + ff_layout_encode_netaddr(xdr, da); + /* nfs_fh4 */ + p = xdr_reserve_space(xdr, 4 + fh->size); + xdr_encode_opaque(p, fh->data, fh->size); + /* ff_io_latency4 read */ + spin_lock(&mirror->lock); + ff_layout_encode_io_latency(xdr, &mirror->read_stat.io_stat); + /* ff_io_latency4 write */ + ff_layout_encode_io_latency(xdr, &mirror->write_stat.io_stat); + spin_unlock(&mirror->lock); + /* nfstime4 */ + ff_layout_encode_nfstime(xdr, ktime_sub(ktime_get(), mirror->start_time)); + /* bool */ + p = xdr_reserve_space(xdr, 4); + *p = cpu_to_be32(false); + + *start = cpu_to_be32((xdr->p - start - 1) * 4); +} + static bool ff_layout_mirror_prepare_stats(struct nfs42_layoutstat_args *args, struct pnfs_layout_segment *pls, @@ -1674,6 +1830,8 @@ ff_layout_mirror_prepare_stats(struct nfs42_layoutstat_args *args, if (*dev_count >= dev_limit) break; mirror = FF_LAYOUT_COMP(pls, i); + if (!mirror || !mirror->mirror_ds) + continue; dev = FF_LAYOUT_DEVID_NODE(pls, i); devinfo = &args->devinfo[*dev_count]; memcpy(&devinfo->dev_id, &dev->deviceid, NFS4_DEVICEID4_SIZE); @@ -1685,8 +1843,10 @@ ff_layout_mirror_prepare_stats(struct nfs42_layoutstat_args *args, devinfo->write_count = mirror->write_stat.io_stat.bytes_completed; devinfo->write_bytes = mirror->write_stat.io_stat.bytes_completed; devinfo->layout_type = LAYOUT_FLEX_FILES; - devinfo->layoutstats_encode = NULL; - devinfo->layout_private = NULL; + devinfo->layoutstats_encode = ff_layout_encode_layoutstats; + devinfo->layout_private = mirror; + /* lseg refcount put in cleanup_layoutstats */ + pnfs_get_lseg(pls); ++(*dev_count); } @@ -1729,6 +1889,19 @@ ff_layout_prepare_layoutstats(struct nfs42_layoutstat_args *args) return 0; } +static void +ff_layout_cleanup_layoutstats(struct nfs42_layoutstat_data *data) +{ + struct nfs4_ff_layout_mirror *mirror; + int i; + + for (i = 0; i < data->args.num_dev; i++) { + mirror = data->args.devinfo[i].layout_private; + data->args.devinfo[i].layout_private = NULL; + pnfs_put_lseg(mirror->lseg); + } +} + static struct pnfs_layoutdriver_type flexfilelayout_type = { .id = LAYOUT_FLEX_FILES, .name = "LAYOUT_FLEX_FILES", @@ -1752,6 +1925,7 @@ static struct pnfs_layoutdriver_type flexfilelayout_type = { .encode_layoutreturn = ff_layout_encode_layoutreturn, .sync = pnfs_nfs_generic_sync, .prepare_layoutstats = ff_layout_prepare_layoutstats, + .cleanup_layoutstats = ff_layout_cleanup_layoutstats, }; static int __init nfs4flexfilelayout_init(void) diff --git a/fs/nfs/flexfilelayout/flexfilelayout.h b/fs/nfs/flexfilelayout/flexfilelayout.h index 0e7366f44d11..7e248874f46d 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.h +++ b/fs/nfs/flexfilelayout/flexfilelayout.h @@ -75,6 +75,7 @@ struct nfs4_ff_layout_mirror { struct nfs4_ff_layoutstat read_stat; struct nfs4_ff_layoutstat write_stat; ktime_t start_time; + struct pnfs_layout_segment *lseg; /* back pointer */ }; struct nfs4_ff_layout_segment { -- cgit v1.2.3-58-ga151 From 1bfe3b259ff2e579b2acb190f874397d53274497 Mon Sep 17 00:00:00 2001 From: Peng Tao Date: Tue, 23 Jun 2015 19:52:03 +0800 Subject: nfs42: serialize LAYOUTSTATS calls of the same file There is no need to report concurrently. Reviewed-by: Jeff Layton Signed-off-by: Peng Tao Signed-off-by: Trond Myklebust --- fs/nfs/nfs42proc.c | 3 +++ fs/nfs/pnfs.c | 7 +++++++ include/linux/nfs_fs.h | 1 + 3 files changed, 11 insertions(+) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c index ee0248340a42..06c74cd93875 100644 --- a/fs/nfs/nfs42proc.c +++ b/fs/nfs/nfs42proc.c @@ -204,6 +204,9 @@ nfs42_layoutstat_release(void *calldata) nfss->pnfs_curr_ld->cleanup_layoutstats(data); pnfs_put_layout_hdr(NFS_I(data->args.inode)->layout); + smp_mb__before_atomic(); + clear_bit(NFS_INO_LAYOUTSTATS, &NFS_I(data->args.inode)->flags); + smp_mb__after_atomic(); nfs_iput_and_deactive(data->inode); kfree(data->args.devinfo); kfree(data); diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 6279cff7df74..5b5224341bcd 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -2257,6 +2257,7 @@ pnfs_report_layoutstat(struct inode *inode) { struct pnfs_layoutdriver_type *ld = NFS_SERVER(inode)->pnfs_curr_ld; struct nfs_server *server = NFS_SERVER(inode); + struct nfs_inode *nfsi = NFS_I(inode); struct nfs42_layoutstat_data *data; struct pnfs_layout_hdr *hdr; int status = 0; @@ -2264,6 +2265,9 @@ pnfs_report_layoutstat(struct inode *inode) if (!pnfs_enabled_sb(server) || !ld->prepare_layoutstats) goto out; + if (test_and_set_bit(NFS_INO_LAYOUTSTATS, &nfsi->flags)) + goto out; + spin_lock(&inode->i_lock); if (!NFS_I(inode)->layout) { spin_unlock(&inode->i_lock); @@ -2296,6 +2300,9 @@ out_free: kfree(data); out_put: pnfs_put_layout_hdr(hdr); + smp_mb__before_atomic(); + clear_bit(NFS_INO_LAYOUTSTATS, &nfsi->flags); + smp_mb__after_atomic(); goto out; } EXPORT_SYMBOL_GPL(pnfs_report_layoutstat); diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index b95f914ce083..f91b5ade30c9 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -219,6 +219,7 @@ struct nfs_inode { #define NFS_INO_COMMIT (7) /* inode is committing unstable writes */ #define NFS_INO_LAYOUTCOMMIT (9) /* layoutcommit required */ #define NFS_INO_LAYOUTCOMMITTING (10) /* layoutcommit inflight */ +#define NFS_INO_LAYOUTSTATS (11) /* layoutstats inflight */ static inline struct nfs_inode *NFS_I(const struct inode *inode) { -- cgit v1.2.3-58-ga151 From 97ba375b5df43790aa82c256f7f16ee8eb95c272 Mon Sep 17 00:00:00 2001 From: Peng Tao Date: Tue, 23 Jun 2015 19:52:04 +0800 Subject: pnfs/flexfiles: report layoutstat regularly As a simple scheme, report every minute if IO is still going on. Reviewed-by: Jeff Layton Signed-off-by: Peng Tao Signed-off-by: Trond Myklebust --- fs/nfs/flexfilelayout/flexfilelayout.c | 27 +++++++++++++++++++++++---- fs/nfs/flexfilelayout/flexfilelayout.h | 6 +++++- 2 files changed, 28 insertions(+), 5 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c index 6bcb6d6c9dc3..d3b3b6236711 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.c +++ b/fs/nfs/flexfilelayout/flexfilelayout.c @@ -452,14 +452,23 @@ nfs4_ff_layout_calc_completion_time(struct rpc_task *task) return ktime_sub(ktime_get(), task->tk_start); } -static void +static bool nfs4_ff_layoutstat_start_io(struct nfs4_ff_layout_mirror *mirror, struct nfs4_ff_layoutstat *layoutstat) { static const ktime_t notime = {0}; + ktime_t now = ktime_get(); nfs4_ff_start_busy_timer(&layoutstat->busy_timer); - cmpxchg(&mirror->start_time, notime, ktime_get()); + cmpxchg(&mirror->start_time.tv64, notime.tv64, now.tv64); + cmpxchg(&mirror->last_report_time.tv64, notime.tv64, now.tv64); + if (ktime_to_ms(ktime_sub(now, mirror->last_report_time)) >= + FF_LAYOUTSTATS_REPORT_INTERVAL) { + mirror->last_report_time = now; + return true; + } + + return false; } static void @@ -496,10 +505,15 @@ static void nfs4_ff_layout_stat_io_start_read(struct nfs4_ff_layout_mirror *mirror, __u64 requested) { + bool report; + spin_lock(&mirror->lock); - nfs4_ff_layoutstat_start_io(mirror, &mirror->read_stat); + report = nfs4_ff_layoutstat_start_io(mirror, &mirror->read_stat); nfs4_ff_layout_stat_io_update_requested(&mirror->read_stat, requested); spin_unlock(&mirror->lock); + + if (report) + pnfs_report_layoutstat(mirror->lseg->pls_layout->plh_inode); } static void @@ -519,10 +533,15 @@ static void nfs4_ff_layout_stat_io_start_write(struct nfs4_ff_layout_mirror *mirror, __u64 requested) { + bool report; + spin_lock(&mirror->lock); - nfs4_ff_layoutstat_start_io(mirror, &mirror->write_stat); + report = nfs4_ff_layoutstat_start_io(mirror , &mirror->write_stat); nfs4_ff_layout_stat_io_update_requested(&mirror->write_stat, requested); spin_unlock(&mirror->lock); + + if (report) + pnfs_report_layoutstat(mirror->lseg->pls_layout->plh_inode); } static void diff --git a/fs/nfs/flexfilelayout/flexfilelayout.h b/fs/nfs/flexfilelayout/flexfilelayout.h index 7e248874f46d..6fcd8d5e8e3d 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.h +++ b/fs/nfs/flexfilelayout/flexfilelayout.h @@ -15,6 +15,9 @@ * due to network error etc. */ #define NFS4_FLEXFILE_LAYOUT_MAX_MIRROR_CNT 4096 +/* LAYOUTSTATS report interval in ms */ +#define FF_LAYOUTSTATS_REPORT_INTERVAL (60000L) + struct nfs4_ff_ds_version { u32 version; u32 minor_version; @@ -62,6 +65,7 @@ struct nfs4_ff_layoutstat { }; struct nfs4_ff_layout_mirror { + struct pnfs_layout_segment *lseg; /* back pointer */ u32 ds_count; u32 efficiency; struct nfs4_ff_layout_ds *mirror_ds; @@ -75,7 +79,7 @@ struct nfs4_ff_layout_mirror { struct nfs4_ff_layoutstat read_stat; struct nfs4_ff_layoutstat write_stat; ktime_t start_time; - struct pnfs_layout_segment *lseg; /* back pointer */ + ktime_t last_report_time; }; struct nfs4_ff_layout_segment { -- cgit v1.2.3-58-ga151 From 865a7ecb2131a3ba26cc1d49daf18275375109f0 Mon Sep 17 00:00:00 2001 From: Peng Tao Date: Thu, 25 Jun 2015 18:19:32 +0800 Subject: nfs: provide pnfs_report_layoutstat when NFS42 is disabled kbuild test robot reported: fs/built-in.o: In function `pnfs_report_layoutstat': >> (.text+0x151a1c): undefined reference to `nfs42_proc_layoutstats_generic' Reported-by: kbuild test robot Signed-off-by: Peng Tao Signed-off-by: Trond Myklebust --- fs/nfs/pnfs.c | 2 ++ fs/nfs/pnfs.h | 12 ++++++++++-- 2 files changed, 12 insertions(+), 2 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 5b5224341bcd..40bacebb5b97 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -2252,6 +2252,7 @@ struct nfs4_threshold *pnfs_mdsthreshold_alloc(void) return thp; } +#if IS_ENABLED(CONFIG_NFS_V4_2) int pnfs_report_layoutstat(struct inode *inode) { @@ -2306,3 +2307,4 @@ out_put: goto out; } EXPORT_SYMBOL_GPL(pnfs_report_layoutstat); +#endif diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index 0a4723945db2..3e6ab7bfbabd 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -292,8 +292,6 @@ int pnfs_write_done_resend_to_mds(struct nfs_pgio_header *); struct nfs4_threshold *pnfs_mdsthreshold_alloc(void); void pnfs_error_mark_layout_for_return(struct inode *inode, struct pnfs_layout_segment *lseg); -int pnfs_report_layoutstat(struct inode *inode); - /* nfs4_deviceid_flags */ enum { NFS_DEVICEID_INVALID = 0, /* set when MDS clientid recalled */ @@ -692,4 +690,14 @@ static inline void nfs4_pnfs_v3_ds_connect_unload(void) #endif /* CONFIG_NFS_V4_1 */ +#if IS_ENABLED(CONFIG_NFS_V4_2) +int pnfs_report_layoutstat(struct inode *inode); +#else +static inline int +pnfs_report_layoutstat(struct inode *inode) +{ + return 0; +} +#endif + #endif /* FS_NFS_PNFS_H */ -- cgit v1.2.3-58-ga151 From 9bbd9bb40cfae5ff35710e88866fcadb0e8f91ed Mon Sep 17 00:00:00 2001 From: Peng Tao Date: Fri, 26 Jun 2015 09:45:49 +0800 Subject: pnfs/flexfiles: protect ktime manipulation with mirror lock It looks as if xchg() and cmpxchg() are not available for 64-bit integers on sparc32: > New breakage seen in linux-next today: > > ERROR: "__xchg_called_with_bad_pointer" [fs/nfs/flexfilelayout/nfs_layout_flexfiles.ko] undefined! > ERROR: "__cmpxchg_called_with_bad_pointer" [fs/nfs/flexfilelayout/nfs_layout_flexfiles.ko] undefined! > make[2]: *** [__modpost] Error 1 > make[1]: *** [modules] Error 2 Given that mirror ktime manipulation is already under mirror->lock, let's make use of the fact. Reported-by: Paul Gortmaker Signed-off-by: Peng Tao Signed-off-by: Trond Myklebust --- fs/nfs/flexfilelayout/flexfilelayout.c | 31 ++++++++++++------------------- 1 file changed, 12 insertions(+), 19 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c index d3b3b6236711..85b4234b8090 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.c +++ b/fs/nfs/flexfilelayout/flexfilelayout.c @@ -417,21 +417,9 @@ ff_layout_get_lseg_count(struct nfs4_ff_layout_segment *fls) static void nfs4_ff_start_busy_timer(struct nfs4_ff_busy_timer *timer) { - ktime_t old, new; - - /* - * Note: careful here! - * If the counter is zero, then we must not increment it until after - * we've set the start_time. - * If we were instead to use atomic_inc_return(), then another - * request might come in, bump, and then call end_busy_timer() - * before we've set the timer->start_time. - */ - old = timer->start_time; - if (atomic_inc_not_zero(&timer->n_ops) == 0) { - new = ktime_get(); - cmpxchg(&timer->start_time.tv64, old.tv64, new.tv64); - atomic_inc(&timer->n_ops); + /* first IO request? */ + if (atomic_inc_return(&timer->n_ops) == 1) { + timer->start_time = ktime_get(); } } @@ -440,9 +428,12 @@ nfs4_ff_end_busy_timer(struct nfs4_ff_busy_timer *timer) { ktime_t start, now; + if (atomic_dec_return(&timer->n_ops) < 0) + WARN_ON_ONCE(1); + now = ktime_get(); - start.tv64 = xchg(&timer->start_time.tv64, now.tv64); - atomic_dec(&timer->n_ops); + start = timer->start_time; + timer->start_time = now; return ktime_sub(now, start); } @@ -460,8 +451,10 @@ nfs4_ff_layoutstat_start_io(struct nfs4_ff_layout_mirror *mirror, ktime_t now = ktime_get(); nfs4_ff_start_busy_timer(&layoutstat->busy_timer); - cmpxchg(&mirror->start_time.tv64, notime.tv64, now.tv64); - cmpxchg(&mirror->last_report_time.tv64, notime.tv64, now.tv64); + if (ktime_equal(mirror->start_time, notime)) + mirror->start_time = now; + if (ktime_equal(mirror->last_report_time, notime)) + mirror->last_report_time = now; if (ktime_to_ms(ktime_sub(now, mirror->last_report_time)) >= FF_LAYOUTSTATS_REPORT_INTERVAL) { mirror->last_report_time = now; -- cgit v1.2.3-58-ga151