From a22b169df1b9f259391cf3b8ad8bfeea3d7be3f1 Mon Sep 17 00:00:00 2001 From: Vasily Tarasov Date: Wed, 11 Oct 2006 09:24:27 +0200 Subject: [PATCH] block layer: elevator_find function cleanup We can easily produce search through the elevator list without introducing additional elevator_type variable. Signed-off-by: Vasily Tarasov Signed-off-by: Jens Axboe --- block/elevator.c | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) (limited to 'block') diff --git a/block/elevator.c b/block/elevator.c index 487dd3da8853..d8030a84773a 100644 --- a/block/elevator.c +++ b/block/elevator.c @@ -93,21 +93,18 @@ static inline int elv_try_merge(struct request *__rq, struct bio *bio) static struct elevator_type *elevator_find(const char *name) { - struct elevator_type *e = NULL; + struct elevator_type *e; struct list_head *entry; list_for_each(entry, &elv_list) { - struct elevator_type *__e; - __e = list_entry(entry, struct elevator_type, list); + e = list_entry(entry, struct elevator_type, list); - if (!strcmp(__e->elevator_name, name)) { - e = __e; - break; - } + if (!strcmp(e->elevator_name, name)) + return e; } - return e; + return NULL; } static void elevator_put(struct elevator_type *e) -- cgit v1.2.3-58-ga151 From c5841642242e9ae817275e09b36b298456dc17d2 Mon Sep 17 00:00:00 2001 From: Vasily Tarasov Date: Wed, 11 Oct 2006 13:26:30 +0200 Subject: [PATCH] block layer: elv_iosched_show should get elv_list_lock elv_iosched_show function iterates other elv_list, hence elv_list_lock should be got. Signed-off-by: Vasily Tarasov Signed-off-by: Vasily Tarasov --- block/elevator.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'block') diff --git a/block/elevator.c b/block/elevator.c index d8030a84773a..8ccd163254b8 100644 --- a/block/elevator.c +++ b/block/elevator.c @@ -1085,7 +1085,7 @@ ssize_t elv_iosched_show(request_queue_t *q, char *name) struct list_head *entry; int len = 0; - spin_lock_irq(q->queue_lock); + spin_lock_irq(&elv_list_lock); list_for_each(entry, &elv_list) { struct elevator_type *__e; @@ -1095,7 +1095,7 @@ ssize_t elv_iosched_show(request_queue_t *q, char *name) else len += sprintf(name+len, "%s ", __e->elevator_name); } - spin_unlock_irq(q->queue_lock); + spin_unlock_irq(&elv_list_lock); len += sprintf(len+name, "\n"); return len; -- cgit v1.2.3-58-ga151 From 79e2de4bc53d7ca2a8eedee49e4a92479b4b530e Mon Sep 17 00:00:00 2001 From: Thomas Maier Date: Thu, 19 Oct 2006 23:28:15 -0700 Subject: [PATCH] export clear_queue_congested and set_queue_congested Export the clear_queue_congested() and set_queue_congested() functions located in ll_rw_blk.c The functions are renamed to blk_clear_queue_congested() and blk_set_queue_congested(). (needed in the pktcdvd driver's bio write congestion control) Signed-off-by: Thomas Maier Cc: Peter Osterlund Cc: Jens Axboe Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- block/ll_rw_blk.c | 20 ++++++++++---------- include/linux/blkdev.h | 2 ++ 2 files changed, 12 insertions(+), 10 deletions(-) (limited to 'block') diff --git a/block/ll_rw_blk.c b/block/ll_rw_blk.c index c847e17e5caa..132a858ce2c5 100644 --- a/block/ll_rw_blk.c +++ b/block/ll_rw_blk.c @@ -117,7 +117,7 @@ static void blk_queue_congestion_threshold(struct request_queue *q) * congested queues, and wake up anyone who was waiting for requests to be * put back. */ -static void clear_queue_congested(request_queue_t *q, int rw) +void blk_clear_queue_congested(request_queue_t *q, int rw) { enum bdi_state bit; wait_queue_head_t *wqh = &congestion_wqh[rw]; @@ -128,18 +128,20 @@ static void clear_queue_congested(request_queue_t *q, int rw) if (waitqueue_active(wqh)) wake_up(wqh); } +EXPORT_SYMBOL(blk_clear_queue_congested); /* * A queue has just entered congestion. Flag that in the queue's VM-visible * state flags and increment the global gounter of congested queues. */ -static void set_queue_congested(request_queue_t *q, int rw) +void blk_set_queue_congested(request_queue_t *q, int rw) { enum bdi_state bit; bit = (rw == WRITE) ? BDI_write_congested : BDI_read_congested; set_bit(bit, &q->backing_dev_info.state); } +EXPORT_SYMBOL(blk_set_queue_congested); /** * blk_get_backing_dev_info - get the address of a queue's backing_dev_info @@ -159,7 +161,6 @@ struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev) ret = &q->backing_dev_info; return ret; } - EXPORT_SYMBOL(blk_get_backing_dev_info); void blk_queue_activity_fn(request_queue_t *q, activity_fn *fn, void *data) @@ -167,7 +168,6 @@ void blk_queue_activity_fn(request_queue_t *q, activity_fn *fn, void *data) q->activity_fn = fn; q->activity_data = data; } - EXPORT_SYMBOL(blk_queue_activity_fn); /** @@ -2067,7 +2067,7 @@ static void __freed_request(request_queue_t *q, int rw) struct request_list *rl = &q->rq; if (rl->count[rw] < queue_congestion_off_threshold(q)) - clear_queue_congested(q, rw); + blk_clear_queue_congested(q, rw); if (rl->count[rw] + 1 <= q->nr_requests) { if (waitqueue_active(&rl->wait[rw])) @@ -2137,7 +2137,7 @@ static struct request *get_request(request_queue_t *q, int rw, struct bio *bio, } } } - set_queue_congested(q, rw); + blk_set_queue_congested(q, rw); } /* @@ -3765,14 +3765,14 @@ queue_requests_store(struct request_queue *q, const char *page, size_t count) blk_queue_congestion_threshold(q); if (rl->count[READ] >= queue_congestion_on_threshold(q)) - set_queue_congested(q, READ); + blk_set_queue_congested(q, READ); else if (rl->count[READ] < queue_congestion_off_threshold(q)) - clear_queue_congested(q, READ); + blk_clear_queue_congested(q, READ); if (rl->count[WRITE] >= queue_congestion_on_threshold(q)) - set_queue_congested(q, WRITE); + blk_set_queue_congested(q, WRITE); else if (rl->count[WRITE] < queue_congestion_off_threshold(q)) - clear_queue_congested(q, WRITE); + blk_clear_queue_congested(q, WRITE); if (rl->count[READ] >= q->nr_requests) { blk_set_queue_full(q, READ); diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index d370d2cfe138..9575e3a5ff2a 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -651,6 +651,8 @@ extern void blk_recount_segments(request_queue_t *, struct bio *); extern int scsi_cmd_ioctl(struct file *, struct gendisk *, unsigned int, void __user *); extern int sg_scsi_ioctl(struct file *, struct request_queue *, struct gendisk *, struct scsi_ioctl_command __user *); +extern void blk_clear_queue_congested(request_queue_t *q, int rw); +extern void blk_set_queue_congested(request_queue_t *q, int rw); extern void blk_start_queue(request_queue_t *q); extern void blk_stop_queue(request_queue_t *q); extern void blk_sync_queue(struct request_queue *q); -- cgit v1.2.3-58-ga151 From 3fcfab16c5b86eaa3db3a9a31adba550c5b67141 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Thu, 19 Oct 2006 23:28:16 -0700 Subject: [PATCH] separate bdi congestion functions from queue congestion functions Separate out the concept of "queue congestion" from "backing-dev congestion". Congestion is a backing-dev concept, not a queue concept. The blk_* congestion functions are retained, as wrappers around the core backing-dev congestion functions. This proper layering is needed so that NFS can cleanly use the congestion functions, and so that CONFIG_BLOCK=n actually links. Cc: "Thomas Maier" Cc: "Jens Axboe" Cc: Trond Myklebust Cc: David Howells Cc: Peter Osterlund Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/lib/usercopy.c | 3 +- block/ll_rw_blk.c | 71 --------------------------------------------- drivers/md/dm-crypt.c | 3 +- fs/fat/file.c | 3 +- fs/nfs/write.c | 4 ++- fs/reiserfs/journal.c | 3 +- fs/xfs/linux-2.6/kmem.c | 5 ++-- fs/xfs/linux-2.6/xfs_buf.c | 3 +- include/linux/backing-dev.h | 7 +++++ include/linux/blkdev.h | 24 ++++++++++++--- include/linux/writeback.h | 1 - mm/Makefile | 3 +- mm/backing-dev.c | 69 +++++++++++++++++++++++++++++++++++++++++++ mm/page-writeback.c | 17 +++-------- mm/page_alloc.c | 5 ++-- mm/shmem.c | 3 +- mm/vmscan.c | 6 ++-- 17 files changed, 126 insertions(+), 104 deletions(-) create mode 100644 mm/backing-dev.c (limited to 'block') diff --git a/arch/i386/lib/usercopy.c b/arch/i386/lib/usercopy.c index 258df6b4d7d7..d22cfc9d656c 100644 --- a/arch/i386/lib/usercopy.c +++ b/arch/i386/lib/usercopy.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include @@ -741,7 +742,7 @@ survive: if (retval == -ENOMEM && is_init(current)) { up_read(¤t->mm->mmap_sem); - blk_congestion_wait(WRITE, HZ/50); + congestion_wait(WRITE, HZ/50); goto survive; } diff --git a/block/ll_rw_blk.c b/block/ll_rw_blk.c index 132a858ce2c5..136066583c68 100644 --- a/block/ll_rw_blk.c +++ b/block/ll_rw_blk.c @@ -56,11 +56,6 @@ static kmem_cache_t *requestq_cachep; */ static kmem_cache_t *iocontext_cachep; -static wait_queue_head_t congestion_wqh[2] = { - __WAIT_QUEUE_HEAD_INITIALIZER(congestion_wqh[0]), - __WAIT_QUEUE_HEAD_INITIALIZER(congestion_wqh[1]) - }; - /* * Controlling structure to kblockd */ @@ -112,37 +107,6 @@ static void blk_queue_congestion_threshold(struct request_queue *q) q->nr_congestion_off = nr; } -/* - * A queue has just exitted congestion. Note this in the global counter of - * congested queues, and wake up anyone who was waiting for requests to be - * put back. - */ -void blk_clear_queue_congested(request_queue_t *q, int rw) -{ - enum bdi_state bit; - wait_queue_head_t *wqh = &congestion_wqh[rw]; - - bit = (rw == WRITE) ? BDI_write_congested : BDI_read_congested; - clear_bit(bit, &q->backing_dev_info.state); - smp_mb__after_clear_bit(); - if (waitqueue_active(wqh)) - wake_up(wqh); -} -EXPORT_SYMBOL(blk_clear_queue_congested); - -/* - * A queue has just entered congestion. Flag that in the queue's VM-visible - * state flags and increment the global gounter of congested queues. - */ -void blk_set_queue_congested(request_queue_t *q, int rw) -{ - enum bdi_state bit; - - bit = (rw == WRITE) ? BDI_write_congested : BDI_read_congested; - set_bit(bit, &q->backing_dev_info.state); -} -EXPORT_SYMBOL(blk_set_queue_congested); - /** * blk_get_backing_dev_info - get the address of a queue's backing_dev_info * @bdev: device @@ -2755,41 +2719,6 @@ void blk_end_sync_rq(struct request *rq, int error) } EXPORT_SYMBOL(blk_end_sync_rq); -/** - * blk_congestion_wait - wait for a queue to become uncongested - * @rw: READ or WRITE - * @timeout: timeout in jiffies - * - * Waits for up to @timeout jiffies for a queue (any queue) to exit congestion. - * If no queues are congested then just wait for the next request to be - * returned. - */ -long blk_congestion_wait(int rw, long timeout) -{ - long ret; - DEFINE_WAIT(wait); - wait_queue_head_t *wqh = &congestion_wqh[rw]; - - prepare_to_wait(wqh, &wait, TASK_UNINTERRUPTIBLE); - ret = io_schedule_timeout(timeout); - finish_wait(wqh, &wait); - return ret; -} - -EXPORT_SYMBOL(blk_congestion_wait); - -/** - * blk_congestion_end - wake up sleepers on a congestion queue - * @rw: READ or WRITE - */ -void blk_congestion_end(int rw) -{ - wait_queue_head_t *wqh = &congestion_wqh[rw]; - - if (waitqueue_active(wqh)) - wake_up(wqh); -} - /* * Has to be called with the request spinlock acquired */ diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index 655d816760e5..a625576fdeeb 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include @@ -602,7 +603,7 @@ static void process_write(struct crypt_io *io) /* out of memory -> run queues */ if (remaining) - blk_congestion_wait(bio_data_dir(clone), HZ/100); + congestion_wait(bio_data_dir(clone), HZ/100); } } diff --git a/fs/fat/file.c b/fs/fat/file.c index f4b8f8b3fbdd..8337451e7897 100644 --- a/fs/fat/file.c +++ b/fs/fat/file.c @@ -13,6 +13,7 @@ #include #include #include +#include #include int fat_generic_ioctl(struct inode *inode, struct file *filp, @@ -118,7 +119,7 @@ static int fat_file_release(struct inode *inode, struct file *filp) if ((filp->f_mode & FMODE_WRITE) && MSDOS_SB(inode->i_sb)->options.flush) { fat_flush_inodes(inode->i_sb, inode, NULL); - blk_congestion_wait(WRITE, HZ/10); + congestion_wait(WRITE, HZ/10); } return 0; } diff --git a/fs/nfs/write.c b/fs/nfs/write.c index f6675d2c386c..ca92ac36fe9d 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -57,6 +57,8 @@ #include #include #include +#include + #include #include @@ -395,7 +397,7 @@ int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc) out: clear_bit(BDI_write_congested, &bdi->state); wake_up_all(&nfs_write_congestion); - writeback_congestion_end(); + congestion_end(WRITE); return err; } diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c index ad8cbc49883a..85ce23268302 100644 --- a/fs/reiserfs/journal.c +++ b/fs/reiserfs/journal.c @@ -53,6 +53,7 @@ #include #include #include +#include /* gets a struct reiserfs_journal_list * from a list head */ #define JOURNAL_LIST_ENTRY(h) (list_entry((h), struct reiserfs_journal_list, \ @@ -970,7 +971,7 @@ int reiserfs_async_progress_wait(struct super_block *s) DEFINE_WAIT(wait); struct reiserfs_journal *j = SB_JOURNAL(s); if (atomic_read(&j->j_async_throttle)) - blk_congestion_wait(WRITE, HZ / 10); + congestion_wait(WRITE, HZ / 10); return 0; } diff --git a/fs/xfs/linux-2.6/kmem.c b/fs/xfs/linux-2.6/kmem.c index d59737589815..004baf600611 100644 --- a/fs/xfs/linux-2.6/kmem.c +++ b/fs/xfs/linux-2.6/kmem.c @@ -21,6 +21,7 @@ #include #include #include +#include #include "time.h" #include "kmem.h" @@ -53,7 +54,7 @@ kmem_alloc(size_t size, unsigned int __nocast flags) printk(KERN_ERR "XFS: possible memory allocation " "deadlock in %s (mode:0x%x)\n", __FUNCTION__, lflags); - blk_congestion_wait(WRITE, HZ/50); + congestion_wait(WRITE, HZ/50); } while (1); } @@ -131,7 +132,7 @@ kmem_zone_alloc(kmem_zone_t *zone, unsigned int __nocast flags) printk(KERN_ERR "XFS: possible memory allocation " "deadlock in %s (mode:0x%x)\n", __FUNCTION__, lflags); - blk_congestion_wait(WRITE, HZ/50); + congestion_wait(WRITE, HZ/50); } while (1); } diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c index 9bbadafdcb00..db5f5a3608ca 100644 --- a/fs/xfs/linux-2.6/xfs_buf.c +++ b/fs/xfs/linux-2.6/xfs_buf.c @@ -30,6 +30,7 @@ #include #include #include +#include #include "xfs_linux.h" STATIC kmem_zone_t *xfs_buf_zone; @@ -395,7 +396,7 @@ _xfs_buf_lookup_pages( XFS_STATS_INC(xb_page_retries); xfsbufd_wakeup(0, gfp_mask); - blk_congestion_wait(WRITE, HZ/50); + congestion_wait(WRITE, HZ/50); goto retry; } diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index f7a1390d67f5..7011d6255593 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -10,6 +10,8 @@ #include +struct page; + /* * Bits in backing_dev_info.state */ @@ -88,6 +90,11 @@ static inline int bdi_rw_congested(struct backing_dev_info *bdi) (1 << BDI_write_congested)); } +void clear_bdi_congested(struct backing_dev_info *bdi, int rw); +void set_bdi_congested(struct backing_dev_info *bdi, int rw); +long congestion_wait(int rw, long timeout); +void congestion_end(int rw); + #define bdi_cap_writeback_dirty(bdi) \ (!((bdi)->capabilities & BDI_CAP_NO_WRITEBACK)) diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 9575e3a5ff2a..7bfcde2d5578 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -651,8 +651,26 @@ extern void blk_recount_segments(request_queue_t *, struct bio *); extern int scsi_cmd_ioctl(struct file *, struct gendisk *, unsigned int, void __user *); extern int sg_scsi_ioctl(struct file *, struct request_queue *, struct gendisk *, struct scsi_ioctl_command __user *); -extern void blk_clear_queue_congested(request_queue_t *q, int rw); -extern void blk_set_queue_congested(request_queue_t *q, int rw); + +/* + * A queue has just exitted congestion. Note this in the global counter of + * congested queues, and wake up anyone who was waiting for requests to be + * put back. + */ +static inline void blk_clear_queue_congested(request_queue_t *q, int rw) +{ + clear_bdi_congested(&q->backing_dev_info, rw); +} + +/* + * A queue has just entered congestion. Flag that in the queue's VM-visible + * state flags and increment the global gounter of congested queues. + */ +static inline void blk_set_queue_congested(request_queue_t *q, int rw) +{ + set_bdi_congested(&q->backing_dev_info, rw); +} + extern void blk_start_queue(request_queue_t *q); extern void blk_stop_queue(request_queue_t *q); extern void blk_sync_queue(struct request_queue *q); @@ -767,10 +785,8 @@ extern int blk_queue_init_tags(request_queue_t *, int, struct blk_queue_tag *); extern void blk_queue_free_tags(request_queue_t *); extern int blk_queue_resize_tags(request_queue_t *, int); extern void blk_queue_invalidate_tags(request_queue_t *); -extern long blk_congestion_wait(int rw, long timeout); extern struct blk_queue_tag *blk_init_tags(int); extern void blk_free_tags(struct blk_queue_tag *); -extern void blk_congestion_end(int rw); static inline struct request *blk_map_queue_find_tag(struct blk_queue_tag *bqt, int tag) diff --git a/include/linux/writeback.h b/include/linux/writeback.h index a341c8032866..fc35e6bdfb93 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -85,7 +85,6 @@ int wakeup_pdflush(long nr_pages); void laptop_io_completion(void); void laptop_sync_completion(void); void throttle_vm_writeout(void); -void writeback_congestion_end(void); /* These are exported to sysctl. */ extern int dirty_background_ratio; diff --git a/mm/Makefile b/mm/Makefile index 12b3a4eee88d..f3c077eb0b8e 100644 --- a/mm/Makefile +++ b/mm/Makefile @@ -10,7 +10,8 @@ mmu-$(CONFIG_MMU) := fremap.o highmem.o madvise.o memory.o mincore.o \ obj-y := bootmem.o filemap.o mempool.o oom_kill.o fadvise.o \ page_alloc.o page-writeback.o pdflush.o \ readahead.o swap.o truncate.o vmscan.o \ - prio_tree.o util.o mmzone.o vmstat.o $(mmu-y) + prio_tree.o util.o mmzone.o vmstat.o backing-dev.o \ + $(mmu-y) ifeq ($(CONFIG_MMU)$(CONFIG_BLOCK),yy) obj-y += bounce.o diff --git a/mm/backing-dev.c b/mm/backing-dev.c new file mode 100644 index 000000000000..f50a2811f9dc --- /dev/null +++ b/mm/backing-dev.c @@ -0,0 +1,69 @@ + +#include +#include +#include +#include +#include + +static wait_queue_head_t congestion_wqh[2] = { + __WAIT_QUEUE_HEAD_INITIALIZER(congestion_wqh[0]), + __WAIT_QUEUE_HEAD_INITIALIZER(congestion_wqh[1]) + }; + + +void clear_bdi_congested(struct backing_dev_info *bdi, int rw) +{ + enum bdi_state bit; + wait_queue_head_t *wqh = &congestion_wqh[rw]; + + bit = (rw == WRITE) ? BDI_write_congested : BDI_read_congested; + clear_bit(bit, &bdi->state); + smp_mb__after_clear_bit(); + if (waitqueue_active(wqh)) + wake_up(wqh); +} +EXPORT_SYMBOL(clear_bdi_congested); + +void set_bdi_congested(struct backing_dev_info *bdi, int rw) +{ + enum bdi_state bit; + + bit = (rw == WRITE) ? BDI_write_congested : BDI_read_congested; + set_bit(bit, &bdi->state); +} +EXPORT_SYMBOL(set_bdi_congested); + +/** + * congestion_wait - wait for a backing_dev to become uncongested + * @rw: READ or WRITE + * @timeout: timeout in jiffies + * + * Waits for up to @timeout jiffies for a backing_dev (any backing_dev) to exit + * write congestion. If no backing_devs are congested then just wait for the + * next write to be completed. + */ +long congestion_wait(int rw, long timeout) +{ + long ret; + DEFINE_WAIT(wait); + wait_queue_head_t *wqh = &congestion_wqh[rw]; + + prepare_to_wait(wqh, &wait, TASK_UNINTERRUPTIBLE); + ret = io_schedule_timeout(timeout); + finish_wait(wqh, &wait); + return ret; +} +EXPORT_SYMBOL(congestion_wait); + +/** + * congestion_end - wake up sleepers on a congested backing_dev_info + * @rw: READ or WRITE + */ +void congestion_end(int rw) +{ + wait_queue_head_t *wqh = &congestion_wqh[rw]; + + if (waitqueue_active(wqh)) + wake_up(wqh); +} +EXPORT_SYMBOL(congestion_end); diff --git a/mm/page-writeback.c b/mm/page-writeback.c index a0f339057449..8d9b19f239c3 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -222,7 +222,7 @@ static void balance_dirty_pages(struct address_space *mapping) if (pages_written >= write_chunk) break; /* We've done our duty */ } - blk_congestion_wait(WRITE, HZ/10); + congestion_wait(WRITE, HZ/10); } if (nr_reclaimable + global_page_state(NR_WRITEBACK) @@ -314,7 +314,7 @@ void throttle_vm_writeout(void) if (global_page_state(NR_UNSTABLE_NFS) + global_page_state(NR_WRITEBACK) <= dirty_thresh) break; - blk_congestion_wait(WRITE, HZ/10); + congestion_wait(WRITE, HZ/10); } } @@ -351,7 +351,7 @@ static void background_writeout(unsigned long _min_pages) min_pages -= MAX_WRITEBACK_PAGES - wbc.nr_to_write; if (wbc.nr_to_write > 0 || wbc.pages_skipped > 0) { /* Wrote less than expected */ - blk_congestion_wait(WRITE, HZ/10); + congestion_wait(WRITE, HZ/10); if (!wbc.encountered_congestion) break; } @@ -422,7 +422,7 @@ static void wb_kupdate(unsigned long arg) writeback_inodes(&wbc); if (wbc.nr_to_write > 0) { if (wbc.encountered_congestion) - blk_congestion_wait(WRITE, HZ/10); + congestion_wait(WRITE, HZ/10); else break; /* All the old data is written */ } @@ -955,15 +955,6 @@ int test_set_page_writeback(struct page *page) } EXPORT_SYMBOL(test_set_page_writeback); -/* - * Wakes up tasks that are being throttled due to writeback congestion - */ -void writeback_congestion_end(void) -{ - blk_congestion_end(WRITE); -} -EXPORT_SYMBOL(writeback_congestion_end); - /* * Return true if any of the pages in the mapping are marged with the * passed tag. diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 40db96a655d0..afee38f04d84 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -39,6 +39,7 @@ #include #include #include +#include #include #include @@ -1050,7 +1051,7 @@ nofail_alloc: if (page) goto got_pg; if (gfp_mask & __GFP_NOFAIL) { - blk_congestion_wait(WRITE, HZ/50); + congestion_wait(WRITE, HZ/50); goto nofail_alloc; } } @@ -1113,7 +1114,7 @@ rebalance: do_retry = 1; } if (do_retry) { - blk_congestion_wait(WRITE, HZ/50); + congestion_wait(WRITE, HZ/50); goto rebalance; } diff --git a/mm/shmem.c b/mm/shmem.c index b378f66cf2f9..4959535fc14c 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -48,6 +48,7 @@ #include #include #include +#include #include #include @@ -1131,7 +1132,7 @@ repeat: page_cache_release(swappage); if (error == -ENOMEM) { /* let kswapd refresh zone for GFP_ATOMICs */ - blk_congestion_wait(WRITE, HZ/50); + congestion_wait(WRITE, HZ/50); } goto repeat; } diff --git a/mm/vmscan.c b/mm/vmscan.c index af73c14f9d88..f05527bf792b 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -1059,7 +1059,7 @@ unsigned long try_to_free_pages(struct zone **zones, gfp_t gfp_mask) /* Take a nap, wait for some writeback to complete */ if (sc.nr_scanned && priority < DEF_PRIORITY - 2) - blk_congestion_wait(WRITE, HZ/10); + congestion_wait(WRITE, HZ/10); } /* top priority shrink_caches still had more to do? don't OOM, then */ if (!sc.all_unreclaimable) @@ -1214,7 +1214,7 @@ scan: * another pass across the zones. */ if (total_scanned && priority < DEF_PRIORITY - 2) - blk_congestion_wait(WRITE, HZ/10); + congestion_wait(WRITE, HZ/10); /* * We do this so kswapd doesn't build up large priorities for @@ -1458,7 +1458,7 @@ unsigned long shrink_all_memory(unsigned long nr_pages) goto out; if (sc.nr_scanned && prio < DEF_PRIORITY - 2) - blk_congestion_wait(WRITE, HZ / 10); + congestion_wait(WRITE, HZ / 10); } lru_pages = 0; -- cgit v1.2.3-58-ga151 From 0261d6886eb5822867a5310dc1e4479b940a1942 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 30 Oct 2006 19:07:48 +0100 Subject: [PATCH] CFQ: use irq safe locking in cfq_cic_link() If cfq_set_request() is called for a new process AND a non-fs io request (so that __GFP_WAIT may not be set), cfq_cic_link() may use spin_lock_irq() and spin_unlock_irq() with interrupts already disabled. Fix is to always use irq safe locking in cfq_cic_link() Acked-By: Arjan van de Ven Acked-by: Ingo Molnar Signed-off-by: Jens Axboe Signed-off-by: Linus Torvalds --- block/cfq-iosched.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'block') diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index d3d76136f53a..5c3da894a56c 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c @@ -1362,6 +1362,7 @@ cfq_cic_link(struct cfq_data *cfqd, struct io_context *ioc, struct rb_node **p; struct rb_node *parent; struct cfq_io_context *__cic; + unsigned long flags; void *k; cic->ioc = ioc; @@ -1391,9 +1392,9 @@ restart: rb_link_node(&cic->rb_node, parent, p); rb_insert_color(&cic->rb_node, &ioc->cic_root); - spin_lock_irq(cfqd->queue->queue_lock); + spin_lock_irqsave(cfqd->queue->queue_lock, flags); list_add(&cic->queue_list, &cfqd->cic_list); - spin_unlock_irq(cfqd->queue->queue_lock); + spin_unlock_irqrestore(cfqd->queue->queue_lock, flags); } /* -- cgit v1.2.3-58-ga151 From c1b707d253fe918b92882cff1dbd926b47e14fd2 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 30 Oct 2006 19:54:23 +0100 Subject: [PATCH] CFQ: bad locking in changed_ioprio() When the ioprio code recently got juggled a bit, a bug was introduced. changed_ioprio() is no longer called with interrupts disabled, so using plain spin_lock() on the queue_lock is a bug. Signed-off-by: Jens Axboe Signed-off-by: Linus Torvalds --- block/cfq-iosched.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'block') diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index 5c3da894a56c..25c4e7ed0d00 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c @@ -1215,11 +1215,12 @@ static inline void changed_ioprio(struct cfq_io_context *cic) { struct cfq_data *cfqd = cic->key; struct cfq_queue *cfqq; + unsigned long flags; if (unlikely(!cfqd)) return; - spin_lock(cfqd->queue->queue_lock); + spin_lock_irqsave(cfqd->queue->queue_lock, flags); cfqq = cic->cfqq[ASYNC]; if (cfqq) { @@ -1236,7 +1237,7 @@ static inline void changed_ioprio(struct cfq_io_context *cic) if (cfqq) cfq_mark_cfqq_prio_changed(cfqq); - spin_unlock(cfqd->queue->queue_lock); + spin_unlock_irqrestore(cfqd->queue->queue_lock, flags); } static void cfq_ioc_set_ioprio(struct io_context *ioc) -- cgit v1.2.3-58-ga151 From 5ddfe9691c91a244e8d1be597b6428fcefd58103 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 30 Oct 2006 22:07:21 -0800 Subject: [PATCH] md: check bio address after mapping through partitions. Partitions are not limited to live within a device. So we should range check after partition mapping. Note that 'maxsector' was being used for two different things. I have split off the second usage into 'old_sector' so that maxsector can be still be used for it's primary usage later in the function. Cc: Jens Axboe Signed-off-by: Neil Brown Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- block/ll_rw_blk.c | 24 ++++++++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) (limited to 'block') diff --git a/block/ll_rw_blk.c b/block/ll_rw_blk.c index 136066583c68..c7b1dac8bee9 100644 --- a/block/ll_rw_blk.c +++ b/block/ll_rw_blk.c @@ -2999,6 +2999,7 @@ void generic_make_request(struct bio *bio) { request_queue_t *q; sector_t maxsector; + sector_t old_sector; int ret, nr_sectors = bio_sectors(bio); dev_t old_dev; @@ -3027,7 +3028,7 @@ void generic_make_request(struct bio *bio) * NOTE: we don't repeat the blk_size check for each new device. * Stacking drivers are expected to know what they are doing. */ - maxsector = -1; + old_sector = -1; old_dev = 0; do { char b[BDEVNAME_SIZE]; @@ -3061,15 +3062,30 @@ end_io: */ blk_partition_remap(bio); - if (maxsector != -1) + if (old_sector != -1) blk_add_trace_remap(q, bio, old_dev, bio->bi_sector, - maxsector); + old_sector); blk_add_trace_bio(q, bio, BLK_TA_QUEUE); - maxsector = bio->bi_sector; + old_sector = bio->bi_sector; old_dev = bio->bi_bdev->bd_dev; + maxsector = bio->bi_bdev->bd_inode->i_size >> 9; + if (maxsector) { + sector_t sector = bio->bi_sector; + + if (maxsector < nr_sectors || maxsector - nr_sectors < sector) { + /* + * This may well happen - partitions are not checked + * to make sure they are within the size of the + * whole device. + */ + handle_bad_sector(bio); + goto end_io; + } + } + ret = q->make_request_fn(q, bio); } while (ret); } -- cgit v1.2.3-58-ga151 From 5fccbf61be2a7f32d2002b04afca4c5009612a58 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 31 Oct 2006 14:21:55 +0100 Subject: [PATCH] CFQ: request <-> request merging rr_list fixup In very rare circumstances would we be pruning a merged request and at the same time delete the implicated cfqq from the rr_list, and not readd it when the merged request got added. This could cause io stalls until that process issued io again. Fix it up by putting the rr_list add handling into cfq_add_rq_rb(), identical to how pruning is handled in cfq_del_rq_rb(). This fixes a hang reproducible with fsx-linux. Signed-off-by: Jens Axboe Signed-off-by: Linus Torvalds --- block/cfq-iosched.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'block') diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index 25c4e7ed0d00..1d9c3c70a9a0 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c @@ -456,6 +456,9 @@ static void cfq_add_rq_rb(struct request *rq) */ while ((__alias = elv_rb_add(&cfqq->sort_list, rq)) != NULL) cfq_dispatch_insert(cfqd->queue, __alias); + + if (!cfq_cfqq_on_rr(cfqq)) + cfq_add_cfqq_rr(cfqd, cfqq); } static inline void @@ -1652,9 +1655,6 @@ static void cfq_insert_request(request_queue_t *q, struct request *rq) cfq_add_rq_rb(rq); - if (!cfq_cfqq_on_rr(cfqq)) - cfq_add_cfqq_rr(cfqd, cfqq); - list_add_tail(&rq->queuelist, &cfqq->fifo); cfq_rq_enqueued(cfqd, cfqq, rq); -- cgit v1.2.3-58-ga151 From df66b8552be5fdab5c4b4d53ee08b99388b9bd02 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Thu, 2 Nov 2006 22:06:56 -0800 Subject: [PATCH] tidy "md: check bio address after mapping through partitions" Neil's xterms are too wide. Cc: Neil Brown Cc: Jens Axboe Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- block/ll_rw_blk.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'block') diff --git a/block/ll_rw_blk.c b/block/ll_rw_blk.c index c7b1dac8bee9..9eaee6640535 100644 --- a/block/ll_rw_blk.c +++ b/block/ll_rw_blk.c @@ -3075,11 +3075,12 @@ end_io: if (maxsector) { sector_t sector = bio->bi_sector; - if (maxsector < nr_sectors || maxsector - nr_sectors < sector) { + if (maxsector < nr_sectors || + maxsector - nr_sectors < sector) { /* - * This may well happen - partitions are not checked - * to make sure they are within the size of the - * whole device. + * This may well happen - partitions are not + * checked to make sure they are within the size + * of the whole device. */ handle_bad_sector(bio); goto end_io; -- cgit v1.2.3-58-ga151 From 616e8a091a035c0bd9b871695f4af191df123caa Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 13 Nov 2006 18:04:59 +0100 Subject: [PATCH] Fix bad data direction in SG_IO Contrary to what the name misleads you to believe, SG_DXFER_TO_FROM_DEV is really just a normal read seen from the device side. This patch fixes http://lkml.org/lkml/2006/10/13/100 Signed-off-by: Jens Axboe Signed-off-by: Linus Torvalds --- block/scsi_ioctl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'block') diff --git a/block/scsi_ioctl.c b/block/scsi_ioctl.c index 2dc326421a24..a19338e6215d 100644 --- a/block/scsi_ioctl.c +++ b/block/scsi_ioctl.c @@ -246,10 +246,10 @@ static int sg_io(struct file *file, request_queue_t *q, switch (hdr->dxfer_direction) { default: return -EINVAL; - case SG_DXFER_TO_FROM_DEV: case SG_DXFER_TO_DEV: writing = 1; break; + case SG_DXFER_TO_FROM_DEV: case SG_DXFER_FROM_DEV: break; } -- cgit v1.2.3-58-ga151 From 097b8457dafe7efc22201b4062e2d1e82e494067 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 16 Nov 2006 01:19:31 -0800 Subject: [PATCH] scsi: clear garbage after CDBs on SG_IO ATAPI devices transfer fixed number of bytes for CDBs (12 or 16). Some ATAPI devices choke when shorter CDB is used and the left bytes contain garbage. Block SG_IO cleared left bytes but SCSI SG_IO didn't. This patch makes SCSI SG_IO clear it and simplify CDB clearing in block SG_IO. Signed-off-by: Tejun Heo Cc: Mathieu Fluhr Cc: James Bottomley Cc: Douglas Gilbert Acked-by: Jens Axboe Cc: Acked-by: Jeff Garzik Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- block/scsi_ioctl.c | 3 +-- drivers/scsi/scsi_lib.c | 1 + 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'block') diff --git a/block/scsi_ioctl.c b/block/scsi_ioctl.c index a19338e6215d..e55a75621437 100644 --- a/block/scsi_ioctl.c +++ b/block/scsi_ioctl.c @@ -286,9 +286,8 @@ static int sg_io(struct file *file, request_queue_t *q, * fill in request structure */ rq->cmd_len = hdr->cmd_len; + memset(rq->cmd, 0, BLK_MAX_CDB); /* ATAPI hates garbage after CDB */ memcpy(rq->cmd, cmd, hdr->cmd_len); - if (sizeof(rq->cmd) != hdr->cmd_len) - memset(rq->cmd + hdr->cmd_len, 0, sizeof(rq->cmd) - hdr->cmd_len); memset(sense, 0, sizeof(sense)); rq->sense = sense; diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index d2c02df12fdc..3ac4890ce086 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -410,6 +410,7 @@ int scsi_execute_async(struct scsi_device *sdev, const unsigned char *cmd, goto free_req; req->cmd_len = cmd_len; + memset(req->cmd, 0, BLK_MAX_CDB); /* ATAPI hates garbage after CDB */ memcpy(req->cmd, cmd, req->cmd_len); req->sense = sioc->sense; req->sense_len = 0; -- cgit v1.2.3-58-ga151