From df46b9a44ceb5af2ea2351ce8e28ae7bd840b00f Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Mon, 20 Jun 2005 14:04:44 +0200 Subject: [PATCH] Add blk_rq_map_kern() Add blk_rq_map_kern which takes a kernel buffer and maps it into a request and bio. This can be used by the dm hw_handlers, old sg_scsi_ioctl, and one day scsi special requests so all requests comming into scsi will have bios. All requests having bios should allow scsi to use scatter lists for all IO and allow it to use block layer functions. Signed-off-by: Jens Axboe --- fs/bio.c | 66 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 66 insertions(+) (limited to 'fs') diff --git a/fs/bio.c b/fs/bio.c index 3a1472acc361..707b9af2dd01 100644 --- a/fs/bio.c +++ b/fs/bio.c @@ -701,6 +701,71 @@ void bio_unmap_user(struct bio *bio) bio_put(bio); } +static struct bio *__bio_map_kern(request_queue_t *q, void *data, + unsigned int len, unsigned int gfp_mask) +{ + unsigned long kaddr = (unsigned long)data; + unsigned long end = (kaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT; + unsigned long start = kaddr >> PAGE_SHIFT; + const int nr_pages = end - start; + int offset, i; + struct bio *bio; + + bio = bio_alloc(gfp_mask, nr_pages); + if (!bio) + return ERR_PTR(-ENOMEM); + + offset = offset_in_page(kaddr); + for (i = 0; i < nr_pages; i++) { + unsigned int bytes = PAGE_SIZE - offset; + + if (len <= 0) + break; + + if (bytes > len) + bytes = len; + + if (__bio_add_page(q, bio, virt_to_page(data), bytes, + offset) < bytes) + break; + + data += bytes; + len -= bytes; + offset = 0; + } + + return bio; +} + +/** + * bio_map_kern - map kernel address into bio + * @q: the request_queue_t for the bio + * @data: pointer to buffer to map + * @len: length in bytes + * @gfp_mask: allocation flags for bio allocation + * + * Map the kernel address into a bio suitable for io to a block + * device. Returns an error pointer in case of error. + */ +struct bio *bio_map_kern(request_queue_t *q, void *data, unsigned int len, + unsigned int gfp_mask) +{ + struct bio *bio; + + bio = __bio_map_kern(q, data, len, gfp_mask); + if (IS_ERR(bio)) + return bio; + + if (bio->bi_size == len) + return bio; + + /* + * Don't support partial mappings. + */ + bio_put(bio); + return ERR_PTR(-EINVAL); +} + /* * bio_set_pages_dirty() and bio_check_pages_dirty() are support functions * for performing direct-IO in BIOs. @@ -1088,6 +1153,7 @@ EXPORT_SYMBOL(bio_add_page); EXPORT_SYMBOL(bio_get_nr_vecs); EXPORT_SYMBOL(bio_map_user); EXPORT_SYMBOL(bio_unmap_user); +EXPORT_SYMBOL(bio_map_kern); EXPORT_SYMBOL(bio_pair_release); EXPORT_SYMBOL(bio_split); EXPORT_SYMBOL(bio_split_pool); -- cgit v1.2.3-58-ga151 From b823825e8e09aac6dc1ca362cd5639a87329d636 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 20 Jun 2005 14:05:27 +0200 Subject: [PATCH] Keep the bio end_io parts inside of bio.c for blk_rq_map_kern() Signed-off-by: Jens Axboe --- drivers/block/ll_rw_blk.c | 11 ----------- fs/bio.c | 11 +++++++++++ 2 files changed, 11 insertions(+), 11 deletions(-) (limited to 'fs') diff --git a/drivers/block/ll_rw_blk.c b/drivers/block/ll_rw_blk.c index e30a3c93b70c..1471aca6fa18 100644 --- a/drivers/block/ll_rw_blk.c +++ b/drivers/block/ll_rw_blk.c @@ -2177,16 +2177,6 @@ int blk_rq_unmap_user(struct request *rq, struct bio *bio, unsigned int ulen) EXPORT_SYMBOL(blk_rq_unmap_user); -static int blk_rq_map_kern_endio(struct bio *bio, unsigned int bytes_done, - int error) -{ - if (bio->bi_size) - return 1; - - bio_put(bio); - return 0; -} - /** * blk_rq_map_kern - map kernel data to a request, for REQ_BLOCK_PC usage * @q: request queue where request should be inserted @@ -2213,7 +2203,6 @@ struct request *blk_rq_map_kern(request_queue_t *q, int rw, void *kbuf, if (!IS_ERR(bio)) { if (rw) bio->bi_rw |= (1 << BIO_RW); - bio->bi_end_io = blk_rq_map_kern_endio; rq->bio = rq->biotail = bio; blk_rq_bio_prep(q, rq, bio); diff --git a/fs/bio.c b/fs/bio.c index 707b9af2dd01..c0d9140e470c 100644 --- a/fs/bio.c +++ b/fs/bio.c @@ -701,6 +701,16 @@ void bio_unmap_user(struct bio *bio) bio_put(bio); } +static int bio_map_kern_endio(struct bio *bio, unsigned int bytes_done, int err) +{ + if (bio->bi_size) + return 1; + + bio_put(bio); + return 0; +} + + static struct bio *__bio_map_kern(request_queue_t *q, void *data, unsigned int len, unsigned int gfp_mask) { @@ -734,6 +744,7 @@ static struct bio *__bio_map_kern(request_queue_t *q, void *data, offset = 0; } + bio->bi_end_io = bio_map_kern_endio; return bio; } -- cgit v1.2.3-58-ga151 From f1970baf6d74e03bd32072ab453f2fc01bc1b8d3 Mon Sep 17 00:00:00 2001 From: James Bottomley Date: Mon, 20 Jun 2005 14:06:52 +0200 Subject: [PATCH] Add scatter-gather support for the block layer SG_IO Signed-off-by: Jens Axboe --- drivers/block/ll_rw_blk.c | 64 +++++++++++++++++-- drivers/block/scsi_ioctl.c | 34 ++++++---- fs/bio.c | 150 +++++++++++++++++++++++++++++++-------------- include/linux/bio.h | 4 ++ include/linux/blkdev.h | 1 + 5 files changed, 191 insertions(+), 62 deletions(-) (limited to 'fs') diff --git a/drivers/block/ll_rw_blk.c b/drivers/block/ll_rw_blk.c index 42c4f3651cf8..874e46fc3748 100644 --- a/drivers/block/ll_rw_blk.c +++ b/drivers/block/ll_rw_blk.c @@ -2148,6 +2148,50 @@ int blk_rq_map_user(request_queue_t *q, struct request *rq, void __user *ubuf, EXPORT_SYMBOL(blk_rq_map_user); +/** + * blk_rq_map_user_iov - map user data to a request, for REQ_BLOCK_PC usage + * @q: request queue where request should be inserted + * @rq: request to map data to + * @iov: pointer to the iovec + * @iov_count: number of elements in the iovec + * + * Description: + * Data will be mapped directly for zero copy io, if possible. Otherwise + * a kernel bounce buffer is used. + * + * A matching blk_rq_unmap_user() must be issued at the end of io, while + * still in process context. + * + * Note: The mapped bio may need to be bounced through blk_queue_bounce() + * before being submitted to the device, as pages mapped may be out of + * reach. It's the callers responsibility to make sure this happens. The + * original bio must be passed back in to blk_rq_unmap_user() for proper + * unmapping. + */ +int blk_rq_map_user_iov(request_queue_t *q, struct request *rq, + struct sg_iovec *iov, int iov_count) +{ + struct bio *bio; + + if (!iov || iov_count <= 0) + return -EINVAL; + + /* we don't allow misaligned data like bio_map_user() does. If the + * user is using sg, they're expected to know the alignment constraints + * and respect them accordingly */ + bio = bio_map_user_iov(q, NULL, iov, iov_count, rq_data_dir(rq)== READ); + if (IS_ERR(bio)) + return PTR_ERR(bio); + + rq->bio = rq->biotail = bio; + blk_rq_bio_prep(q, rq, bio); + rq->buffer = rq->data = NULL; + rq->data_len = bio->bi_size; + return 0; +} + +EXPORT_SYMBOL(blk_rq_map_user_iov); + /** * blk_rq_unmap_user - unmap a request with user data * @rq: request to be unmapped @@ -2207,6 +2251,19 @@ int blk_rq_map_kern(request_queue_t *q, struct request *rq, void *kbuf, EXPORT_SYMBOL(blk_rq_map_kern); +void blk_execute_rq_nowait(request_queue_t *q, struct gendisk *bd_disk, + struct request *rq, int at_head, + void (*done)(struct request *)) +{ + int where = at_head ? ELEVATOR_INSERT_FRONT : ELEVATOR_INSERT_BACK; + + rq->rq_disk = bd_disk; + rq->flags |= REQ_NOMERGE; + rq->end_io = done; + elv_add_request(q, rq, where, 1); + generic_unplug_device(q); +} + /** * blk_execute_rq - insert a request into queue for execution * @q: queue to insert the request in @@ -2224,8 +2281,6 @@ int blk_execute_rq(request_queue_t *q, struct gendisk *bd_disk, char sense[SCSI_SENSE_BUFFERSIZE]; int err = 0; - rq->rq_disk = bd_disk; - /* * we need an extra reference to the request, so we can look at * it after io completion @@ -2238,11 +2293,8 @@ int blk_execute_rq(request_queue_t *q, struct gendisk *bd_disk, rq->sense_len = 0; } - rq->flags |= REQ_NOMERGE; rq->waiting = &wait; - rq->end_io = blk_end_sync_rq; - elv_add_request(q, rq, ELEVATOR_INSERT_BACK, 1); - generic_unplug_device(q); + blk_execute_rq_nowait(q, bd_disk, rq, 0, blk_end_sync_rq); wait_for_completion(&wait); rq->waiting = NULL; diff --git a/drivers/block/scsi_ioctl.c b/drivers/block/scsi_ioctl.c index 93c4ca874be3..09a7e73a0812 100644 --- a/drivers/block/scsi_ioctl.c +++ b/drivers/block/scsi_ioctl.c @@ -231,17 +231,11 @@ static int sg_io(struct file *file, request_queue_t *q, if (verify_command(file, cmd)) return -EPERM; - /* - * we'll do that later - */ - if (hdr->iovec_count) - return -EOPNOTSUPP; - if (hdr->dxfer_len > (q->max_sectors << 9)) return -EIO; reading = writing = 0; - if (hdr->dxfer_len) { + if (hdr->dxfer_len) switch (hdr->dxfer_direction) { default: return -EINVAL; @@ -261,11 +255,29 @@ static int sg_io(struct file *file, request_queue_t *q, if (!rq) return -ENOMEM; - if (reading || writing) { - ret = blk_rq_map_user(q, rq, hdr->dxferp, hdr->dxfer_len); - if (ret) + if (hdr->iovec_count) { + const int size = sizeof(struct sg_iovec) * hdr->iovec_count; + struct sg_iovec *iov; + + iov = kmalloc(size, GFP_KERNEL); + if (!iov) { + ret = -ENOMEM; goto out; - } + } + + if (copy_from_user(iov, hdr->dxferp, size)) { + kfree(iov); + ret = -EFAULT; + goto out; + } + + ret = blk_rq_map_user_iov(q, rq, iov, hdr->iovec_count); + kfree(iov); + } else if (hdr->dxfer_len) + ret = blk_rq_map_user(q, rq, hdr->dxferp, hdr->dxfer_len); + + if (ret) + goto out; /* * fill in request structure diff --git a/fs/bio.c b/fs/bio.c index c0d9140e470c..24e4045788e2 100644 --- a/fs/bio.c +++ b/fs/bio.c @@ -25,6 +25,7 @@ #include #include #include +#include /* for struct sg_iovec */ #define BIO_POOL_SIZE 256 @@ -549,22 +550,34 @@ out_bmd: return ERR_PTR(ret); } -static struct bio *__bio_map_user(request_queue_t *q, struct block_device *bdev, - unsigned long uaddr, unsigned int len, - int write_to_vm) +static struct bio *__bio_map_user_iov(request_queue_t *q, + struct block_device *bdev, + struct sg_iovec *iov, int iov_count, + int write_to_vm) { - unsigned long end = (uaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT; - unsigned long start = uaddr >> PAGE_SHIFT; - const int nr_pages = end - start; - int ret, offset, i; + int i, j; + int nr_pages = 0; struct page **pages; struct bio *bio; + int cur_page = 0; + int ret, offset; - /* - * transfer and buffer must be aligned to at least hardsector - * size for now, in the future we can relax this restriction - */ - if ((uaddr & queue_dma_alignment(q)) || (len & queue_dma_alignment(q))) + for (i = 0; i < iov_count; i++) { + unsigned long uaddr = (unsigned long)iov[i].iov_base; + unsigned long len = iov[i].iov_len; + unsigned long end = (uaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT; + unsigned long start = uaddr >> PAGE_SHIFT; + + nr_pages += end - start; + /* + * transfer and buffer must be aligned to at least hardsector + * size for now, in the future we can relax this restriction + */ + if ((uaddr & queue_dma_alignment(q)) || (len & queue_dma_alignment(q))) + return ERR_PTR(-EINVAL); + } + + if (!nr_pages) return ERR_PTR(-EINVAL); bio = bio_alloc(GFP_KERNEL, nr_pages); @@ -576,42 +589,54 @@ static struct bio *__bio_map_user(request_queue_t *q, struct block_device *bdev, if (!pages) goto out; - down_read(¤t->mm->mmap_sem); - ret = get_user_pages(current, current->mm, uaddr, nr_pages, - write_to_vm, 0, pages, NULL); - up_read(¤t->mm->mmap_sem); - - if (ret < nr_pages) - goto out; - - bio->bi_bdev = bdev; - - offset = uaddr & ~PAGE_MASK; - for (i = 0; i < nr_pages; i++) { - unsigned int bytes = PAGE_SIZE - offset; - - if (len <= 0) - break; - - if (bytes > len) - bytes = len; + memset(pages, 0, nr_pages * sizeof(struct page *)); + + for (i = 0; i < iov_count; i++) { + unsigned long uaddr = (unsigned long)iov[i].iov_base; + unsigned long len = iov[i].iov_len; + unsigned long end = (uaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT; + unsigned long start = uaddr >> PAGE_SHIFT; + const int local_nr_pages = end - start; + const int page_limit = cur_page + local_nr_pages; + + down_read(¤t->mm->mmap_sem); + ret = get_user_pages(current, current->mm, uaddr, + local_nr_pages, + write_to_vm, 0, &pages[cur_page], NULL); + up_read(¤t->mm->mmap_sem); + + if (ret < local_nr_pages) + goto out_unmap; + + + offset = uaddr & ~PAGE_MASK; + for (j = cur_page; j < page_limit; j++) { + unsigned int bytes = PAGE_SIZE - offset; + + if (len <= 0) + break; + + if (bytes > len) + bytes = len; + + /* + * sorry... + */ + if (__bio_add_page(q, bio, pages[j], bytes, offset) < bytes) + break; + + len -= bytes; + offset = 0; + } + cur_page = j; /* - * sorry... + * release the pages we didn't map into the bio, if any */ - if (__bio_add_page(q, bio, pages[i], bytes, offset) < bytes) - break; - - len -= bytes; - offset = 0; + while (j < page_limit) + page_cache_release(pages[j++]); } - /* - * release the pages we didn't map into the bio, if any - */ - while (i < nr_pages) - page_cache_release(pages[i++]); - kfree(pages); /* @@ -620,9 +645,17 @@ static struct bio *__bio_map_user(request_queue_t *q, struct block_device *bdev, if (!write_to_vm) bio->bi_rw |= (1 << BIO_RW); + bio->bi_bdev = bdev; bio->bi_flags |= (1 << BIO_USER_MAPPED); return bio; -out: + + out_unmap: + for (i = 0; i < nr_pages; i++) { + if(!pages[i]) + break; + page_cache_release(pages[i]); + } + out: kfree(pages); bio_put(bio); return ERR_PTR(ret); @@ -641,10 +674,34 @@ out: */ struct bio *bio_map_user(request_queue_t *q, struct block_device *bdev, unsigned long uaddr, unsigned int len, int write_to_vm) +{ + struct sg_iovec iov; + + iov.iov_base = (__user void *)uaddr; + iov.iov_len = len; + + return bio_map_user_iov(q, bdev, &iov, 1, write_to_vm); +} + +/** + * bio_map_user_iov - map user sg_iovec table into bio + * @q: the request_queue_t for the bio + * @bdev: destination block device + * @iov: the iovec. + * @iov_count: number of elements in the iovec + * @write_to_vm: bool indicating writing to pages or not + * + * Map the user space address into a bio suitable for io to a block + * device. Returns an error pointer in case of error. + */ +struct bio *bio_map_user_iov(request_queue_t *q, struct block_device *bdev, + struct sg_iovec *iov, int iov_count, + int write_to_vm) { struct bio *bio; + int len = 0, i; - bio = __bio_map_user(q, bdev, uaddr, len, write_to_vm); + bio = __bio_map_user_iov(q, bdev, iov, iov_count, write_to_vm); if (IS_ERR(bio)) return bio; @@ -657,6 +714,9 @@ struct bio *bio_map_user(request_queue_t *q, struct block_device *bdev, */ bio_get(bio); + for (i = 0; i < iov_count; i++) + len += iov[i].iov_len; + if (bio->bi_size == len) return bio; diff --git a/include/linux/bio.h b/include/linux/bio.h index 1dd2bc2e84ae..ebcd03ba2e20 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -281,6 +281,10 @@ extern int bio_add_page(struct bio *, struct page *, unsigned int,unsigned int); extern int bio_get_nr_vecs(struct block_device *); extern struct bio *bio_map_user(struct request_queue *, struct block_device *, unsigned long, unsigned int, int); +struct sg_iovec; +extern struct bio *bio_map_user_iov(struct request_queue *, + struct block_device *, + struct sg_iovec *, int, int); extern void bio_unmap_user(struct bio *); extern struct bio *bio_map_kern(struct request_queue *, void *, unsigned int, unsigned int); diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index fc0dce078616..0430ea3e5f2e 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -561,6 +561,7 @@ extern void blk_queue_activity_fn(request_queue_t *, activity_fn *, void *); extern int blk_rq_map_user(request_queue_t *, struct request *, void __user *, unsigned int); extern int blk_rq_unmap_user(struct bio *, unsigned int); extern int blk_rq_map_kern(request_queue_t *, struct request *, void *, unsigned int, unsigned int); +extern int blk_rq_map_user_iov(request_queue_t *, struct request *, struct sg_iovec *, int); extern int blk_execute_rq(request_queue_t *, struct gendisk *, struct request *); static inline request_queue_t *bdev_get_queue(struct block_device *bdev) -- cgit v1.2.3-58-ga151