summaryrefslogtreecommitdiff
path: root/fs/buffer.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2015-06-25 16:00:17 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2015-06-25 16:00:17 -0700
commite4bc13adfd016fc1036838170288b5680d1a98b0 (patch)
tree8d2cb749397749439732f3a827cb7f2336408337 /fs/buffer.c
parentad90fb97515b732bc27a0109baa10af636c3c8cd (diff)
parent3e1534cf4a2a8278e811e7c84a79da1a02347b8b (diff)
Merge branch 'for-4.2/writeback' of git://git.kernel.dk/linux-block
Pull cgroup writeback support from Jens Axboe: "This is the big pull request for adding cgroup writeback support. This code has been in development for a long time, and it has been simmering in for-next for a good chunk of this cycle too. This is one of those problems that has been talked about for at least half a decade, finally there's a solution and code to go with it. Also see last weeks writeup on LWN: http://lwn.net/Articles/648292/" * 'for-4.2/writeback' of git://git.kernel.dk/linux-block: (85 commits) writeback, blkio: add documentation for cgroup writeback support vfs, writeback: replace FS_CGROUP_WRITEBACK with SB_I_CGROUPWB writeback: do foreign inode detection iff cgroup writeback is enabled v9fs: fix error handling in v9fs_session_init() bdi: fix wrong error return value in cgwb_create() buffer: remove unusued 'ret' variable writeback: disassociate inodes from dying bdi_writebacks writeback: implement foreign cgroup inode bdi_writeback switching writeback: add lockdep annotation to inode_to_wb() writeback: use unlocked_inode_to_wb transaction in inode_congested() writeback: implement unlocked_inode_to_wb transaction and use it for stat updates writeback: implement [locked_]inode_to_wb_and_lock_list() writeback: implement foreign cgroup inode detection writeback: make writeback_control track the inode being written back writeback: relocate wb[_try]_get(), wb_put(), inode_{attach|detach}_wb() mm: vmscan: disable memcg direct reclaim stalling if cgroup writeback support is in use writeback: implement memcg writeback domain based throttling writeback: reset wb_domain->dirty_limit[_tstmp] when memcg domain size changes writeback: implement memcg wb_domain writeback: update wb_over_bg_thresh() to use wb_domain aware operations ...
Diffstat (limited to 'fs/buffer.c')
-rw-r--r--fs/buffer.c64
1 files changed, 49 insertions, 15 deletions
diff --git a/fs/buffer.c b/fs/buffer.c
index f96173ad62d9..1cf7a53a0277 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -30,6 +30,7 @@
#include <linux/quotaops.h>
#include <linux/highmem.h>
#include <linux/export.h>
+#include <linux/backing-dev.h>
#include <linux/writeback.h>
#include <linux/hash.h>
#include <linux/suspend.h>
@@ -44,6 +45,9 @@
#include <trace/events/block.h>
static int fsync_buffers_list(spinlock_t *lock, struct list_head *list);
+static int submit_bh_wbc(int rw, struct buffer_head *bh,
+ unsigned long bio_flags,
+ struct writeback_control *wbc);
#define BH_ENTRY(list) list_entry((list), struct buffer_head, b_assoc_buffers)
@@ -623,21 +627,22 @@ EXPORT_SYMBOL(mark_buffer_dirty_inode);
*
* If warn is true, then emit a warning if the page is not uptodate and has
* not been truncated.
+ *
+ * The caller must hold mem_cgroup_begin_page_stat() lock.
*/
-static void __set_page_dirty(struct page *page,
- struct address_space *mapping, int warn)
+static void __set_page_dirty(struct page *page, struct address_space *mapping,
+ struct mem_cgroup *memcg, int warn)
{
unsigned long flags;
spin_lock_irqsave(&mapping->tree_lock, flags);
if (page->mapping) { /* Race with truncate? */
WARN_ON_ONCE(warn && !PageUptodate(page));
- account_page_dirtied(page, mapping);
+ account_page_dirtied(page, mapping, memcg);
radix_tree_tag_set(&mapping->page_tree,
page_index(page), PAGECACHE_TAG_DIRTY);
}
spin_unlock_irqrestore(&mapping->tree_lock, flags);
- __mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
}
/*
@@ -668,6 +673,7 @@ static void __set_page_dirty(struct page *page,
int __set_page_dirty_buffers(struct page *page)
{
int newly_dirty;
+ struct mem_cgroup *memcg;
struct address_space *mapping = page_mapping(page);
if (unlikely(!mapping))
@@ -683,11 +689,22 @@ int __set_page_dirty_buffers(struct page *page)
bh = bh->b_this_page;
} while (bh != head);
}
+ /*
+ * Use mem_group_begin_page_stat() to keep PageDirty synchronized with
+ * per-memcg dirty page counters.
+ */
+ memcg = mem_cgroup_begin_page_stat(page);
newly_dirty = !TestSetPageDirty(page);
spin_unlock(&mapping->private_lock);
if (newly_dirty)
- __set_page_dirty(page, mapping, 1);
+ __set_page_dirty(page, mapping, memcg, 1);
+
+ mem_cgroup_end_page_stat(memcg);
+
+ if (newly_dirty)
+ __mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
+
return newly_dirty;
}
EXPORT_SYMBOL(__set_page_dirty_buffers);
@@ -1158,11 +1175,18 @@ void mark_buffer_dirty(struct buffer_head *bh)
if (!test_set_buffer_dirty(bh)) {
struct page *page = bh->b_page;
+ struct address_space *mapping = NULL;
+ struct mem_cgroup *memcg;
+
+ memcg = mem_cgroup_begin_page_stat(page);
if (!TestSetPageDirty(page)) {
- struct address_space *mapping = page_mapping(page);
+ mapping = page_mapping(page);
if (mapping)
- __set_page_dirty(page, mapping, 0);
+ __set_page_dirty(page, mapping, memcg, 0);
}
+ mem_cgroup_end_page_stat(memcg);
+ if (mapping)
+ __mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
}
}
EXPORT_SYMBOL(mark_buffer_dirty);
@@ -1684,8 +1708,7 @@ static int __block_write_full_page(struct inode *inode, struct page *page,
struct buffer_head *bh, *head;
unsigned int blocksize, bbits;
int nr_underway = 0;
- int write_op = (wbc->sync_mode == WB_SYNC_ALL ?
- WRITE_SYNC : WRITE);
+ int write_op = (wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : WRITE);
head = create_page_buffers(page, inode,
(1 << BH_Dirty)|(1 << BH_Uptodate));
@@ -1774,7 +1797,7 @@ static int __block_write_full_page(struct inode *inode, struct page *page,
do {
struct buffer_head *next = bh->b_this_page;
if (buffer_async_write(bh)) {
- submit_bh(write_op, bh);
+ submit_bh_wbc(write_op, bh, 0, wbc);
nr_underway++;
}
bh = next;
@@ -1828,7 +1851,7 @@ recover:
struct buffer_head *next = bh->b_this_page;
if (buffer_async_write(bh)) {
clear_buffer_dirty(bh);
- submit_bh(write_op, bh);
+ submit_bh_wbc(write_op, bh, 0, wbc);
nr_underway++;
}
bh = next;
@@ -2993,7 +3016,8 @@ void guard_bio_eod(int rw, struct bio *bio)
}
}
-int _submit_bh(int rw, struct buffer_head *bh, unsigned long bio_flags)
+static int submit_bh_wbc(int rw, struct buffer_head *bh,
+ unsigned long bio_flags, struct writeback_control *wbc)
{
struct bio *bio;
@@ -3015,6 +3039,11 @@ int _submit_bh(int rw, struct buffer_head *bh, unsigned long bio_flags)
*/
bio = bio_alloc(GFP_NOIO, 1);
+ if (wbc) {
+ wbc_init_bio(wbc, bio);
+ wbc_account_io(wbc, bh->b_page, bh->b_size);
+ }
+
bio->bi_iter.bi_sector = bh->b_blocknr * (bh->b_size >> 9);
bio->bi_bdev = bh->b_bdev;
bio->bi_io_vec[0].bv_page = bh->b_page;
@@ -3039,11 +3068,16 @@ int _submit_bh(int rw, struct buffer_head *bh, unsigned long bio_flags)
submit_bio(rw, bio);
return 0;
}
+
+int _submit_bh(int rw, struct buffer_head *bh, unsigned long bio_flags)
+{
+ return submit_bh_wbc(rw, bh, bio_flags, NULL);
+}
EXPORT_SYMBOL_GPL(_submit_bh);
int submit_bh(int rw, struct buffer_head *bh)
{
- return _submit_bh(rw, bh, 0);
+ return submit_bh_wbc(rw, bh, 0, NULL);
}
EXPORT_SYMBOL(submit_bh);
@@ -3232,8 +3266,8 @@ int try_to_free_buffers(struct page *page)
* to synchronise against __set_page_dirty_buffers and prevent the
* dirty bit from being lost.
*/
- if (ret && TestClearPageDirty(page))
- account_page_cleaned(page, mapping);
+ if (ret)
+ cancel_dirty_page(page);
spin_unlock(&mapping->private_lock);
out:
if (buffers_to_free) {