From 022cae36551ba805bf82084047945b2901d75f36 Mon Sep 17 00:00:00 2001 From: "Venkateswararao Jujjuri (JV)" Date: Fri, 28 Jan 2011 14:11:13 -0800 Subject: [net/9p] Preparation and helper functions for zero copy This patch prepares p9_fcall structure for zero copy. Added fields send the payload buffer information to the transport layer. In addition it adds a 'private' field for the transport layer to store mapped/pinned page information so that it can be freed/unpinned during req_done. This patch also creates trans_common.[ch] to house helper functions. It adds the following helper functions. p9_release_req_pages - Release pages after the transaction. p9_nr_pages - Return number of pages needed to accomodate the payload. payload_gup - Translates user buffer into kernel pages. Signed-off-by: Venkateswararao Jujjuri Signed-off-by: Eric Van Hensbergen --- net/9p/Makefile | 1 + net/9p/protocol.c | 4 +++ net/9p/trans_common.c | 97 +++++++++++++++++++++++++++++++++++++++++++++++++++ net/9p/trans_common.h | 29 +++++++++++++++ 4 files changed, 131 insertions(+) create mode 100644 net/9p/trans_common.c create mode 100644 net/9p/trans_common.h (limited to 'net') diff --git a/net/9p/Makefile b/net/9p/Makefile index 198a640d53a6..a0874cc1f718 100644 --- a/net/9p/Makefile +++ b/net/9p/Makefile @@ -9,6 +9,7 @@ obj-$(CONFIG_NET_9P_RDMA) += 9pnet_rdma.o util.o \ protocol.o \ trans_fd.o \ + trans_common.o \ 9pnet_virtio-objs := \ trans_virtio.o \ diff --git a/net/9p/protocol.c b/net/9p/protocol.c index 1e308f210928..d888847db56e 100644 --- a/net/9p/protocol.c +++ b/net/9p/protocol.c @@ -606,6 +606,10 @@ void p9pdu_reset(struct p9_fcall *pdu) { pdu->offset = 0; pdu->size = 0; + pdu->private = NULL; + pdu->pubuf = NULL; + pdu->pkbuf = NULL; + pdu->pbuf_size = 0; } int p9dirent_read(char *buf, int len, struct p9_dirent *dirent, diff --git a/net/9p/trans_common.c b/net/9p/trans_common.c new file mode 100644 index 000000000000..d62b9aa58df8 --- /dev/null +++ b/net/9p/trans_common.c @@ -0,0 +1,97 @@ +/* + * Copyright IBM Corporation, 2010 + * Author Venkateswararao Jujjuri + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2.1 of the GNU Lesser General Public License + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + */ + +#include +#include +#include +#include +#include +#include "trans_common.h" + +/** + * p9_release_req_pages - Release pages after the transaction. + * @*private: PDU's private page of struct trans_rpage_info + */ +void +p9_release_req_pages(struct trans_rpage_info *rpinfo) +{ + int i = 0; + + while (rpinfo->rp_data[i] && rpinfo->rp_nr_pages--) { + put_page(rpinfo->rp_data[i]); + i++; + } +} +EXPORT_SYMBOL(p9_release_req_pages); + +/** + * p9_nr_pages - Return number of pages needed to accomodate the payload. + */ +int +p9_nr_pages(struct p9_req_t *req) +{ + int start_page, end_page; + start_page = (unsigned long long)req->tc->pubuf >> PAGE_SHIFT; + end_page = ((unsigned long long)req->tc->pubuf + req->tc->pbuf_size + + PAGE_SIZE - 1) >> PAGE_SHIFT; + return end_page - start_page; +} +EXPORT_SYMBOL(p9_nr_pages); + +/** + * payload_gup - Translates user buffer into kernel pages and + * pins them either for read/write through get_user_pages_fast(). + * @req: Request to be sent to server. + * @pdata_off: data offset into the first page after translation (gup). + * @pdata_len: Total length of the IO. gup may not return requested # of pages. + * @nr_pages: number of pages to accomodate the payload + * @rw: Indicates if the pages are for read or write. + */ +int +p9_payload_gup(struct p9_req_t *req, size_t *pdata_off, int *pdata_len, + int nr_pages, u8 rw) +{ + uint32_t first_page_bytes = 0; + uint32_t pdata_mapped_pages; + struct trans_rpage_info *rpinfo; + + *pdata_off = (size_t)req->tc->pubuf & (PAGE_SIZE-1); + + if (*pdata_off) + first_page_bytes = min((PAGE_SIZE - *pdata_off), + req->tc->pbuf_size); + + rpinfo = req->tc->private; + pdata_mapped_pages = get_user_pages_fast((unsigned long)req->tc->pubuf, + nr_pages, rw, &rpinfo->rp_data[0]); + + if (pdata_mapped_pages < 0) { + printk(KERN_ERR "get_user_pages_fast failed:%d udata:%p" + "nr_pages:%d\n", pdata_mapped_pages, + req->tc->pubuf, nr_pages); + pdata_mapped_pages = 0; + return -EIO; + } + rpinfo->rp_nr_pages = pdata_mapped_pages; + if (*pdata_off) { + *pdata_len = first_page_bytes; + *pdata_len += min((req->tc->pbuf_size - *pdata_len), + ((size_t)pdata_mapped_pages - 1) << PAGE_SHIFT); + } else { + *pdata_len = min(req->tc->pbuf_size, + (size_t)pdata_mapped_pages << PAGE_SHIFT); + } + return 0; +} +EXPORT_SYMBOL(p9_payload_gup); diff --git a/net/9p/trans_common.h b/net/9p/trans_common.h new file mode 100644 index 000000000000..04977e0ad938 --- /dev/null +++ b/net/9p/trans_common.h @@ -0,0 +1,29 @@ +/* + * Copyright IBM Corporation, 2010 + * Author Venkateswararao Jujjuri + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2.1 of the GNU Lesser General Public License + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + */ + +/** + * struct trans_rpage_info - To store mapped page information in PDU. + * @rp_alloc:Set if this structure is allocd, not a reuse unused space in pdu. + * @rp_nr_pages: Number of mapped pages + * @rp_data: Array of page pointers + */ +struct trans_rpage_info { + u8 rp_alloc; + int rp_nr_pages; + struct page *rp_data[0]; +}; + +void p9_release_req_pages(struct trans_rpage_info *); +int p9_payload_gup(struct p9_req_t *, size_t *, int *, int, u8); +int p9_nr_pages(struct p9_req_t *); -- cgit v1.2.3-58-ga151 From 9bb6c10a4ed48aef49a7243a6f798694722cf380 Mon Sep 17 00:00:00 2001 From: "Venkateswararao Jujjuri (JV)" Date: Wed, 2 Feb 2011 17:52:46 -0800 Subject: [net/9p] Assign type of transaction to tc->pdu->id which is otherwise unsed. This will be used by the transport layer to determine the out going request type. Transport layer uses this information to correctly place the mapped pages in the PDU. Patches following this will make use of this to achieve zero copy. Signed-off-by: Venkateswararao Jujjuri Signed-off-by: Eric Van Hensbergen --- net/9p/protocol.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/9p/protocol.c b/net/9p/protocol.c index d888847db56e..5936c50d0c93 100644 --- a/net/9p/protocol.c +++ b/net/9p/protocol.c @@ -579,6 +579,7 @@ EXPORT_SYMBOL(p9stat_read); int p9pdu_prepare(struct p9_fcall *pdu, int16_t tag, int8_t type) { + pdu->id = type; return p9pdu_writef(pdu, 0, "dbw", 0, type, tag); } -- cgit v1.2.3-58-ga151 From 4038866dab4e461e0ef144458bad9d70ce0c98c1 Mon Sep 17 00:00:00 2001 From: "Venkateswararao Jujjuri (JV)" Date: Fri, 28 Jan 2011 15:22:36 -0800 Subject: [net/9p] Add gup/zero_copy support to VirtIO transport layer. Modify p9_virtio_request() and req_done() functions to support additional payload sent down to the transport layer through tc->pubuf and tc->pkbuf. Signed-off-by: Venkateswararao Jujjuri Signed-off-by: Eric Van Hensbergen --- net/9p/trans_common.h | 3 ++ net/9p/trans_virtio.c | 128 ++++++++++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 126 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/9p/trans_common.h b/net/9p/trans_common.h index 04977e0ad938..76309223bb02 100644 --- a/net/9p/trans_common.h +++ b/net/9p/trans_common.h @@ -12,6 +12,9 @@ * */ +/* TRUE if it is user context */ +#define P9_IS_USER_CONTEXT (!segment_eq(get_fs(), KERNEL_DS)) + /** * struct trans_rpage_info - To store mapped page information in PDU. * @rp_alloc:Set if this structure is allocd, not a reuse unused space in pdu. diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c index c8f3f72ab20e..4b236de132da 100644 --- a/net/9p/trans_virtio.c +++ b/net/9p/trans_virtio.c @@ -45,6 +45,7 @@ #include #include #include +#include "trans_common.h" #define VIRTQUEUE_NUM 128 @@ -155,6 +156,14 @@ static void req_done(struct virtqueue *vq) rc->tag); req = p9_tag_lookup(chan->client, rc->tag); req->status = REQ_STATUS_RCVD; + if (req->tc->private) { + struct trans_rpage_info *rp = req->tc->private; + /*Release pages */ + p9_release_req_pages(rp); + if (rp->rp_alloc) + kfree(rp); + req->tc->private = NULL; + } p9_client_cb(chan->client, req); } else { spin_unlock_irqrestore(&chan->lock, flags); @@ -202,6 +211,38 @@ static int p9_virtio_cancel(struct p9_client *client, struct p9_req_t *req) return 1; } +/** + * pack_sg_list_p - Just like pack_sg_list. Instead of taking a buffer, + * this takes a list of pages. + * @sg: scatter/gather list to pack into + * @start: which segment of the sg_list to start at + * @pdata_off: Offset into the first page + * @**pdata: a list of pages to add into sg. + * @count: amount of data to pack into the scatter/gather list + */ +static int +pack_sg_list_p(struct scatterlist *sg, int start, int limit, size_t pdata_off, + struct page **pdata, int count) +{ + int s; + int i = 0; + int index = start; + + if (pdata_off) { + s = min((int)(PAGE_SIZE - pdata_off), count); + sg_set_page(&sg[index++], pdata[i++], s, pdata_off); + count -= s; + } + + while (count) { + BUG_ON(index > limit); + s = min((int)PAGE_SIZE, count); + sg_set_page(&sg[index++], pdata[i++], s, 0); + count -= s; + } + return index-start; +} + /** * p9_virtio_request - issue a request * @client: client instance issuing the request @@ -212,22 +253,97 @@ static int p9_virtio_cancel(struct p9_client *client, struct p9_req_t *req) static int p9_virtio_request(struct p9_client *client, struct p9_req_t *req) { - int in, out; + int in, out, inp, outp; struct virtio_chan *chan = client->trans; char *rdata = (char *)req->rc+sizeof(struct p9_fcall); unsigned long flags; - int err; + size_t pdata_off = 0; + struct trans_rpage_info *rpinfo = NULL; + int err, pdata_len = 0; P9_DPRINTK(P9_DEBUG_TRANS, "9p debug: virtio request\n"); req_retry: req->status = REQ_STATUS_SENT; + if (req->tc->pbuf_size && (req->tc->pubuf && P9_IS_USER_CONTEXT)) { + int nr_pages = p9_nr_pages(req); + int rpinfo_size = sizeof(struct trans_rpage_info) + + sizeof(struct page *) * nr_pages; + + if (rpinfo_size <= (req->tc->capacity - req->tc->size)) { + /* We can use sdata */ + req->tc->private = req->tc->sdata + req->tc->size; + rpinfo = (struct trans_rpage_info *)req->tc->private; + rpinfo->rp_alloc = 0; + } else { + req->tc->private = kmalloc(rpinfo_size, GFP_NOFS); + if (!req->tc->private) { + P9_DPRINTK(P9_DEBUG_TRANS, "9p debug: " + "private kmalloc returned NULL"); + return -ENOMEM; + } + rpinfo = (struct trans_rpage_info *)req->tc->private; + rpinfo->rp_alloc = 1; + } + + err = p9_payload_gup(req, &pdata_off, &pdata_len, nr_pages, + req->tc->id == P9_TREAD ? 1 : 0); + if (err < 0) { + if (rpinfo->rp_alloc) + kfree(rpinfo); + return err; + } + } + spin_lock_irqsave(&chan->lock, flags); + + /* Handle out VirtIO ring buffers */ out = pack_sg_list(chan->sg, 0, VIRTQUEUE_NUM, req->tc->sdata, - req->tc->size); - in = pack_sg_list(chan->sg, out, VIRTQUEUE_NUM-out, rdata, - client->msize); + req->tc->size); + + if (req->tc->pbuf_size && (req->tc->id == P9_TWRITE)) { + /* We have additional write payload buffer to take care */ + if (req->tc->pubuf && P9_IS_USER_CONTEXT) { + outp = pack_sg_list_p(chan->sg, out, VIRTQUEUE_NUM, + pdata_off, rpinfo->rp_data, pdata_len); + } else { + char *pbuf = req->tc->pubuf ? req->tc->pubuf : + req->tc->pkbuf; + outp = pack_sg_list(chan->sg, out, VIRTQUEUE_NUM, pbuf, + req->tc->pbuf_size); + } + out += outp; + } + + /* Handle in VirtIO ring buffers */ + if (req->tc->pbuf_size && + ((req->tc->id == P9_TREAD) || (req->tc->id == P9_TREADDIR))) { + /* + * Take care of additional Read payload. + * 11 is the read/write header = PDU Header(7) + IO Size (4). + * Arrange in such a way that server places header in the + * alloced memory and payload onto the user buffer. + */ + inp = pack_sg_list(chan->sg, out, VIRTQUEUE_NUM, rdata, 11); + /* + * Running executables in the filesystem may result in + * a read request with kernel buffer as opposed to user buffer. + */ + if (req->tc->pubuf && P9_IS_USER_CONTEXT) { + in = pack_sg_list_p(chan->sg, out+inp, VIRTQUEUE_NUM, + pdata_off, rpinfo->rp_data, pdata_len); + } else { + char *pbuf = req->tc->pubuf ? req->tc->pubuf : + req->tc->pkbuf; + in = pack_sg_list(chan->sg, out+inp, VIRTQUEUE_NUM, + pbuf, req->tc->pbuf_size); + } + in += inp; + } else { + in = pack_sg_list(chan->sg, out, VIRTQUEUE_NUM, rdata, + client->msize); + } err = virtqueue_add_buf(chan->vq, chan->sg, out, in, req->tc); if (err < 0) { @@ -246,6 +362,8 @@ req_retry: P9_DPRINTK(P9_DEBUG_TRANS, "9p debug: " "virtio rpc add_buf returned failure"); + if (rpinfo && rpinfo->rp_alloc) + kfree(rpinfo); return -EIO; } } -- cgit v1.2.3-58-ga151 From 6f69c395cefb26ebba2c9bb725296a3a4a9200ec Mon Sep 17 00:00:00 2001 From: "Venkateswararao Jujjuri (JV)" Date: Sun, 6 Feb 2011 12:08:01 -0800 Subject: [net/9p] Add preferences to transport layer. This patch adds preferences field to the p9_trans_module. Through this, now transport layer can express its preference about the payload. i.e if payload neds to be part of the PDU or it prefers it to be sent sepearetly so that the transport layer can handle it in a better way. Signed-off-by: Venkateswararao Jujjuri Signed-off-by: Eric Van Hensbergen --- include/net/9p/transport.h | 9 +++++++++ net/9p/trans_virtio.c | 1 + 2 files changed, 10 insertions(+) (limited to 'net') diff --git a/include/net/9p/transport.h b/include/net/9p/transport.h index 6d5886efb102..82868f18c573 100644 --- a/include/net/9p/transport.h +++ b/include/net/9p/transport.h @@ -26,11 +26,19 @@ #ifndef NET_9P_TRANSPORT_H #define NET_9P_TRANSPORT_H +#define P9_TRANS_PREF_PAYLOAD_MASK 0x1 + +/* Default. Add Payload to PDU before sending it down to transport layer */ +#define P9_TRANS_PREF_PAYLOAD_DEF 0x0 +/* Send pay load seperately to transport layer along with PDU.*/ +#define P9_TRANS_PREF_PAYLOAD_SEP 0x1 + /** * struct p9_trans_module - transport module interface * @list: used to maintain a list of currently available transports * @name: the human-readable name of the transport * @maxsize: transport provided maximum packet size + * @pref: Preferences of this transport * @def: set if this transport should be considered the default * @create: member function to create a new connection on this transport * @request: member function to issue a request to the transport @@ -47,6 +55,7 @@ struct p9_trans_module { struct list_head list; char *name; /* name of transport */ int maxsize; /* max message size of transport */ + int pref; /* Preferences of this transport */ int def; /* this transport should be default */ struct module *owner; int (*create)(struct p9_client *, const char *, char *); diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c index 4b236de132da..9b550ed9c711 100644 --- a/net/9p/trans_virtio.c +++ b/net/9p/trans_virtio.c @@ -566,6 +566,7 @@ static struct p9_trans_module p9_virtio_trans = { .request = p9_virtio_request, .cancel = p9_virtio_cancel, .maxsize = PAGE_SIZE*16, + .pref = P9_TRANS_PREF_PAYLOAD_SEP, .def = 0, .owner = THIS_MODULE, }; -- cgit v1.2.3-58-ga151 From bb2f8a55153ec58e66a496224504ac9be919c8f1 Mon Sep 17 00:00:00 2001 From: "Venkateswararao Jujjuri (JV)" Date: Fri, 28 Jan 2011 17:05:59 -0800 Subject: [net/9p] Read side zerocopy changes for 9P2000.L protocol. Modify p9_client_read() to check the transport preference and act accordingly. If the preference is P9_TRANS_PREF_PAYLOAD_SEP, send the payload separately instead of putting it directly on PDU. Signed-off-by: Venkateswararao Jujjuri Signed-off-by: Eric Van Hensbergen --- net/9p/client.c | 26 ++++++++++++++++++-------- net/9p/protocol.c | 21 +++++++++++++++++++++ 2 files changed, 39 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/net/9p/client.c b/net/9p/client.c index a848bca9fbff..82079f902f56 100644 --- a/net/9p/client.c +++ b/net/9p/client.c @@ -1270,7 +1270,15 @@ p9_client_read(struct p9_fid *fid, char *data, char __user *udata, u64 offset, if (count < rsize) rsize = count; - req = p9_client_rpc(clnt, P9_TREAD, "dqd", fid->fid, offset, rsize); + /* Don't bother zerocopy form small IO (< 1024) */ + if (((clnt->trans_mod->pref & P9_TRANS_PREF_PAYLOAD_MASK) == + P9_TRANS_PREF_PAYLOAD_SEP) && (rsize > 1024)) { + req = p9_client_rpc(clnt, P9_TREAD, "dqE", fid->fid, offset, + rsize, data, udata); + } else { + req = p9_client_rpc(clnt, P9_TREAD, "dqd", fid->fid, offset, + rsize); + } if (IS_ERR(req)) { err = PTR_ERR(req); goto error; @@ -1284,13 +1292,15 @@ p9_client_read(struct p9_fid *fid, char *data, char __user *udata, u64 offset, P9_DPRINTK(P9_DEBUG_9P, "<<< RREAD count %d\n", count); - if (data) { - memmove(data, dataptr, count); - } else { - err = copy_to_user(udata, dataptr, count); - if (err) { - err = -EFAULT; - goto free_and_error; + if (!req->tc->pbuf_size) { + if (data) { + memmove(data, dataptr, count); + } else { + err = copy_to_user(udata, dataptr, count); + if (err) { + err = -EFAULT; + goto free_and_error; + } } } p9_free_req(clnt, req); diff --git a/net/9p/protocol.c b/net/9p/protocol.c index 5936c50d0c93..7bca2421bfc8 100644 --- a/net/9p/protocol.c +++ b/net/9p/protocol.c @@ -114,6 +114,17 @@ pdu_write_u(struct p9_fcall *pdu, const char __user *udata, size_t size) return size - len; } +static size_t +pdu_write_urw(struct p9_fcall *pdu, const char *kdata, const char __user *udata, + size_t size) +{ + BUG_ON(pdu->size > P9_IOHDRSZ); + pdu->pubuf = (char __user *)udata; + pdu->pkbuf = (char *)kdata; + pdu->pbuf_size = size; + return 0; +} + /* b - int8_t w - int16_t @@ -445,6 +456,16 @@ p9pdu_vwritef(struct p9_fcall *pdu, int proto_version, const char *fmt, errcode = -EFAULT; } break; + case 'E':{ + int32_t cnt = va_arg(ap, int32_t); + const char *k = va_arg(ap, const void *); + const char *u = va_arg(ap, const void *); + errcode = p9pdu_writef(pdu, proto_version, "d", + cnt); + if (!errcode && pdu_write_urw(pdu, k, u, cnt)) + errcode = -EFAULT; + } + break; case 'U':{ int32_t count = va_arg(ap, int32_t); const char __user *udata = -- cgit v1.2.3-58-ga151 From 1fc52481c2b886c445bb167dfd16ee6de6922ef7 Mon Sep 17 00:00:00 2001 From: "Venkateswararao Jujjuri (JV)" Date: Sun, 13 Feb 2011 16:23:59 -0800 Subject: [net/9p] Write side zerocopy changes for 9P2000.L protocol. Modify p9_client_write() to check the transport preference and act accordingly. If the preference is P9_TRANS_PREF_PAYLOAD_SEP, send the payload separately instead of putting it directly on PDU. Signed-off-by: Venkateswararao Jujjuri Signed-off-by: Eric Van Hensbergen --- net/9p/client.c | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/9p/client.c b/net/9p/client.c index 82079f902f56..412c52e1de74 100644 --- a/net/9p/client.c +++ b/net/9p/client.c @@ -1333,12 +1333,21 @@ p9_client_write(struct p9_fid *fid, char *data, const char __user *udata, if (count < rsize) rsize = count; - if (data) - req = p9_client_rpc(clnt, P9_TWRITE, "dqD", fid->fid, offset, - rsize, data); - else - req = p9_client_rpc(clnt, P9_TWRITE, "dqU", fid->fid, offset, - rsize, udata); + + /* Don't bother zerocopy form small IO (< 1024) */ + if (((clnt->trans_mod->pref & P9_TRANS_PREF_PAYLOAD_MASK) == + P9_TRANS_PREF_PAYLOAD_SEP) && (rsize > 1024)) { + req = p9_client_rpc(clnt, P9_TWRITE, "dqE", fid->fid, offset, + rsize, data, udata); + } else { + + if (data) + req = p9_client_rpc(clnt, P9_TWRITE, "dqD", fid->fid, + offset, rsize, data); + else + req = p9_client_rpc(clnt, P9_TWRITE, "dqU", fid->fid, + offset, rsize, udata); + } if (IS_ERR(req)) { err = PTR_ERR(req); goto error; -- cgit v1.2.3-58-ga151 From 2c66523fd290edeea26cbe8cedd0af167d0f7e5f Mon Sep 17 00:00:00 2001 From: "Venkateswararao Jujjuri (JV)" Date: Wed, 16 Feb 2011 18:43:20 -0800 Subject: [net/9p] readdir zerocopy changes for 9P2000.L protocol. Modify p9_client_readdir() to check the transport preference and act according If the preference is P9_TRANS_PREF_PAYLOAD_SEP, send the payload separately instead of putting it directly on PDU. Signed-off-by: Venkateswararao Jujjuri Signed-off-by: Eric Van Hensbergen --- net/9p/client.c | 11 +++++++++-- net/9p/protocol.c | 18 ++++++++++++++++++ 2 files changed, 27 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/9p/client.c b/net/9p/client.c index 412c52e1de74..6e07ef494ff2 100644 --- a/net/9p/client.c +++ b/net/9p/client.c @@ -1735,7 +1735,14 @@ int p9_client_readdir(struct p9_fid *fid, char *data, u32 count, u64 offset) if (count < rsize) rsize = count; - req = p9_client_rpc(clnt, P9_TREADDIR, "dqd", fid->fid, offset, rsize); + if ((clnt->trans_mod->pref & P9_TRANS_PREF_PAYLOAD_MASK) == + P9_TRANS_PREF_PAYLOAD_SEP) { + req = p9_client_rpc(clnt, P9_TREADDIR, "dqF", fid->fid, + offset, rsize, data); + } else { + req = p9_client_rpc(clnt, P9_TREADDIR, "dqd", fid->fid, + offset, rsize); + } if (IS_ERR(req)) { err = PTR_ERR(req); goto error; @@ -1749,7 +1756,7 @@ int p9_client_readdir(struct p9_fid *fid, char *data, u32 count, u64 offset) P9_DPRINTK(P9_DEBUG_9P, "<<< RREADDIR count %d\n", count); - if (data) + if (!req->tc->pbuf_size && data) memmove(data, dataptr, count); p9_free_req(clnt, req); diff --git a/net/9p/protocol.c b/net/9p/protocol.c index 7bca2421bfc8..2ce515b859b3 100644 --- a/net/9p/protocol.c +++ b/net/9p/protocol.c @@ -125,6 +125,15 @@ pdu_write_urw(struct p9_fcall *pdu, const char *kdata, const char __user *udata, return 0; } +static size_t +pdu_write_readdir(struct p9_fcall *pdu, const char *kdata, size_t size) +{ + BUG_ON(pdu->size > P9_READDIRHDRSZ); + pdu->pkbuf = (char *)kdata; + pdu->pbuf_size = size; + return 0; +} + /* b - int8_t w - int16_t @@ -466,6 +475,15 @@ p9pdu_vwritef(struct p9_fcall *pdu, int proto_version, const char *fmt, errcode = -EFAULT; } break; + case 'F':{ + int32_t cnt = va_arg(ap, int32_t); + const char *k = va_arg(ap, const void *); + errcode = p9pdu_writef(pdu, proto_version, "d", + cnt); + if (!errcode && pdu_write_readdir(pdu, k, cnt)) + errcode = -EFAULT; + } + break; case 'U':{ int32_t count = va_arg(ap, int32_t); const char __user *udata = -- cgit v1.2.3-58-ga151 From ca41bb3e21d7b3cb2079e225e3a7e62e6c776518 Mon Sep 17 00:00:00 2001 From: "Venkateswararao Jujjuri (JV)" Date: Tue, 1 Feb 2011 20:04:59 -0800 Subject: [net/9p] Handle Zero Copy TREAD/RERROR case in !dotl case. This takes care of copying out error buffers from user buffer payloads when we are using zero copy. This happens because the only payload buffer the server has to respond to the request is the user buffer given for the zero copy read. Because we only use zerocopy when the amount of data to transfer is greater than a certain size (currently 4K) and error strings are limited to ERRMAX (currently 128) we don't need to worry about there being sufficient space for the error to fit in the payload. Signed-off-by: Venkateswararao Jujjuri Signed-off-by: Eric Van Hensbergen --- net/9p/client.c | 64 ++++++++++++++++++++++++++++++++++++--------------------- 1 file changed, 41 insertions(+), 23 deletions(-) (limited to 'net') diff --git a/net/9p/client.c b/net/9p/client.c index 6e07ef494ff2..251abb1699c4 100644 --- a/net/9p/client.c +++ b/net/9p/client.c @@ -443,6 +443,7 @@ static int p9_check_errors(struct p9_client *c, struct p9_req_t *req) { int8_t type; int err; + int ecode; err = p9_parse_header(req->rc, NULL, &type, NULL, 0); if (err) { @@ -450,36 +451,53 @@ static int p9_check_errors(struct p9_client *c, struct p9_req_t *req) return err; } - if (type == P9_RERROR || type == P9_RLERROR) { - int ecode; - - if (!p9_is_proto_dotl(c)) { - char *ename; + if (type != P9_RERROR && type != P9_RLERROR) + return 0; - err = p9pdu_readf(req->rc, c->proto_version, "s?d", - &ename, &ecode); - if (err) - goto out_err; + if (!p9_is_proto_dotl(c)) { + char *ename; + + if (req->tc->pbuf_size) { + /* Handle user buffers */ + size_t len = req->rc->size - req->rc->offset; + if (req->tc->pubuf) { + /* User Buffer */ + err = copy_from_user( + &req->rc->sdata[req->rc->offset], + req->tc->pubuf, len); + if (err) { + err = -EFAULT; + goto out_err; + } + } else { + /* Kernel Buffer */ + memmove(&req->rc->sdata[req->rc->offset], + req->tc->pkbuf, len); + } + } + err = p9pdu_readf(req->rc, c->proto_version, "s?d", + &ename, &ecode); + if (err) + goto out_err; - if (p9_is_proto_dotu(c)) - err = -ecode; + if (p9_is_proto_dotu(c)) + err = -ecode; - if (!err || !IS_ERR_VALUE(err)) { - err = p9_errstr2errno(ename, strlen(ename)); + if (!err || !IS_ERR_VALUE(err)) { + err = p9_errstr2errno(ename, strlen(ename)); - P9_DPRINTK(P9_DEBUG_9P, "<<< RERROR (%d) %s\n", -ecode, ename); + P9_DPRINTK(P9_DEBUG_9P, "<<< RERROR (%d) %s\n", -ecode, + ename); - kfree(ename); - } - } else { - err = p9pdu_readf(req->rc, c->proto_version, "d", &ecode); - err = -ecode; - - P9_DPRINTK(P9_DEBUG_9P, "<<< RLERROR (%d)\n", -ecode); + kfree(ename); } + } else { + err = p9pdu_readf(req->rc, c->proto_version, "d", &ecode); + err = -ecode; + + P9_DPRINTK(P9_DEBUG_9P, "<<< RLERROR (%d)\n", -ecode); + } - } else - err = 0; return err; -- cgit v1.2.3-58-ga151 From f735195d51e10b2550097f7b0ac12219060e962b Mon Sep 17 00:00:00 2001 From: "Venkateswararao Jujjuri (JV)" Date: Wed, 16 Feb 2011 12:54:22 -0800 Subject: [net/9p] Small non-IO PDUs for zero-copy supporting transports. If a transport prefers payload to be sent separate from the PDU (P9_TRANS_PREF_PAYLOAD_SEP), there is no need to allocate msize PDU buffers(struct p9_fcall). This patch allocates only upto 4k buffers for this kind of transports and there won't be any change to the legacy transports. Hence, this patch on top of zero copy changes allows user to specify higher msizes through the mount option without hogging the kernel heap. Signed-off-by: Venkateswararao Jujjuri Signed-off-by: Eric Van Hensbergen --- include/net/9p/9p.h | 2 +- net/9p/client.c | 23 +++++++++++++++++------ 2 files changed, 18 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/include/net/9p/9p.h b/include/net/9p/9p.h index 7aefa6d975ac..eaa45f932970 100644 --- a/include/net/9p/9p.h +++ b/include/net/9p/9p.h @@ -688,7 +688,7 @@ struct p9_rwstat { * @id: protocol operating identifier of type &p9_msg_t * @tag: transaction id of the request * @offset: used by marshalling routines to track currentposition in buffer - * @capacity: used by marshalling routines to track total capacity + * @capacity: used by marshalling routines to track total malloc'd capacity * @pubuf: Payload user buffer given by the caller * @pubuf: Payload kernel buffer given by the caller * @pbuf_size: pubuf/pkbuf(only one will be !NULL) size to be read/write. diff --git a/net/9p/client.c b/net/9p/client.c index 251abb1699c4..43ec78af4547 100644 --- a/net/9p/client.c +++ b/net/9p/client.c @@ -229,10 +229,23 @@ static struct p9_req_t *p9_tag_alloc(struct p9_client *c, u16 tag) return ERR_PTR(-ENOMEM); } init_waitqueue_head(req->wq); - req->tc = kmalloc(sizeof(struct p9_fcall)+c->msize, - GFP_KERNEL); - req->rc = kmalloc(sizeof(struct p9_fcall)+c->msize, - GFP_KERNEL); + if ((c->trans_mod->pref & P9_TRANS_PREF_PAYLOAD_MASK) == + P9_TRANS_PREF_PAYLOAD_SEP) { + int alloc_msize = min(c->msize, 4096); + req->tc = kmalloc(sizeof(struct p9_fcall)+alloc_msize, + GFP_KERNEL); + req->tc->capacity = alloc_msize; + req->rc = kmalloc(sizeof(struct p9_fcall)+alloc_msize, + GFP_KERNEL); + req->rc->capacity = alloc_msize; + } else { + req->tc = kmalloc(sizeof(struct p9_fcall)+c->msize, + GFP_KERNEL); + req->tc->capacity = c->msize; + req->rc = kmalloc(sizeof(struct p9_fcall)+c->msize, + GFP_KERNEL); + req->rc->capacity = c->msize; + } if ((!req->tc) || (!req->rc)) { printk(KERN_ERR "Couldn't grow tag array\n"); kfree(req->tc); @@ -243,9 +256,7 @@ static struct p9_req_t *p9_tag_alloc(struct p9_client *c, u16 tag) return ERR_PTR(-ENOMEM); } req->tc->sdata = (char *) req->tc + sizeof(struct p9_fcall); - req->tc->capacity = c->msize; req->rc->sdata = (char *) req->rc + sizeof(struct p9_fcall); - req->rc->capacity = c->msize; } p9pdu_reset(req->tc); -- cgit v1.2.3-58-ga151 From c0aa4caf4c8b87fa85c67538974a14b07e81a23f Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Mon, 28 Feb 2011 17:03:59 +0530 Subject: net/9p: Implement syncfs 9P operation Signed-off-by: Aneesh Kumar K.V Signed-off-by: Venkateswararao Jujjuri Signed-off-by: Eric Van Hensbergen --- include/net/9p/9p.h | 2 ++ include/net/9p/client.h | 1 + net/9p/client.c | 21 +++++++++++++++++++++ 3 files changed, 24 insertions(+) (limited to 'net') diff --git a/include/net/9p/9p.h b/include/net/9p/9p.h index eaa45f932970..6b75a6971346 100644 --- a/include/net/9p/9p.h +++ b/include/net/9p/9p.h @@ -139,6 +139,8 @@ do { \ */ enum p9_msg_t { + P9_TSYNCFS = 0, + P9_RSYNCFS, P9_TLERROR = 6, P9_RLERROR, P9_TSTATFS = 8, diff --git a/include/net/9p/client.h b/include/net/9p/client.h index 83ba6a4d58a3..0a30977e3c1f 100644 --- a/include/net/9p/client.h +++ b/include/net/9p/client.h @@ -230,6 +230,7 @@ int p9_client_create_dotl(struct p9_fid *ofid, char *name, u32 flags, u32 mode, gid_t gid, struct p9_qid *qid); int p9_client_clunk(struct p9_fid *fid); int p9_client_fsync(struct p9_fid *fid, int datasync); +int p9_client_sync_fs(struct p9_fid *fid); int p9_client_remove(struct p9_fid *fid); int p9_client_read(struct p9_fid *fid, char *data, char __user *udata, u64 offset, u32 count); diff --git a/net/9p/client.c b/net/9p/client.c index 43ec78af4547..347ec0cd2718 100644 --- a/net/9p/client.c +++ b/net/9p/client.c @@ -1220,6 +1220,27 @@ error: } EXPORT_SYMBOL(p9_client_fsync); +int p9_client_sync_fs(struct p9_fid *fid) +{ + int err = 0; + struct p9_req_t *req; + struct p9_client *clnt; + + P9_DPRINTK(P9_DEBUG_9P, ">>> TSYNC_FS fid %d\n", fid->fid); + + clnt = fid->clnt; + req = p9_client_rpc(clnt, P9_TSYNCFS, "d", fid->fid); + if (IS_ERR(req)) { + err = PTR_ERR(req); + goto error; + } + P9_DPRINTK(P9_DEBUG_9P, "<<< RSYNCFS fid %d\n", fid->fid); + p9_free_req(clnt, req); +error: + return err; +} +EXPORT_SYMBOL(p9_client_sync_fs); + int p9_client_clunk(struct p9_fid *fid) { int err; -- cgit v1.2.3-58-ga151