diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2012-05-21 19:21:20 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2012-05-21 19:21:20 -0700 |
commit | 62c8d922783a0fa41a9b4ca004f0467d6ca9be48 (patch) | |
tree | f9090aa5e65edab8528a0b2bb5d29a629a6e439d | |
parent | 06930b94d19a8641f8a2dc9d6ec27e2a5a39d17c (diff) | |
parent | 500242ac6152b8f20903f043a86e6fdd51478845 (diff) |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/steve/gfs2-3.0-nmw
Pull GFS2 changes from Steven Whitehouse.
* git://git.kernel.org/pub/scm/linux/kernel/git/steve/gfs2-3.0-nmw: (24 commits)
GFS2: Fix quota adjustment return code
GFS2: Add rgrp information to block_alloc trace point
GFS2: Eliminate unused "new" parameter to gfs2_meta_indirect_buffer
GFS2: Update glock doc to add new stats info
GFS2: Update main gfs2 doc
GFS2: Remove redundant metadata block type check
GFS2: Fix sgid propagation when using ACLs
GFS2: eliminate log elements and simplify
GFS2: Eliminate vestigial sd_log_le_rg
GFS2: Eliminate needless parameter from function gfs2_setbit
GFS2: Log code fixes
GFS2: Remove unused argument from gfs2_internal_read
GFS2: Remove bd_list_tr
GFS2: Remove duplicate log code
GFS2: Clean up log write code path
GFS2: Use variable rather than qa to determine if unstuff necessary
GFS2: Change variable blk to biblk
GFS2: Fix function parameter comments in rgrp.c
GFS2: Eliminate offset parameter to gfs2_setbit
GFS2: Use slab for block reservation memory
...
-rw-r--r-- | Documentation/filesystems/gfs2-glocks.txt | 119 | ||||
-rw-r--r-- | Documentation/filesystems/gfs2.txt | 9 | ||||
-rw-r--r-- | fs/gfs2/acl.c | 12 | ||||
-rw-r--r-- | fs/gfs2/aops.c | 18 | ||||
-rw-r--r-- | fs/gfs2/bmap.c | 10 | ||||
-rw-r--r-- | fs/gfs2/file.c | 12 | ||||
-rw-r--r-- | fs/gfs2/glops.c | 6 | ||||
-rw-r--r-- | fs/gfs2/incore.h | 26 | ||||
-rw-r--r-- | fs/gfs2/inode.h | 3 | ||||
-rw-r--r-- | fs/gfs2/log.c | 103 | ||||
-rw-r--r-- | fs/gfs2/log.h | 2 | ||||
-rw-r--r-- | fs/gfs2/lops.c | 520 | ||||
-rw-r--r-- | fs/gfs2/lops.h | 14 | ||||
-rw-r--r-- | fs/gfs2/main.c | 26 | ||||
-rw-r--r-- | fs/gfs2/meta_io.c | 28 | ||||
-rw-r--r-- | fs/gfs2/meta_io.h | 4 | ||||
-rw-r--r-- | fs/gfs2/ops_fstype.c | 1 | ||||
-rw-r--r-- | fs/gfs2/quota.c | 6 | ||||
-rw-r--r-- | fs/gfs2/rgrp.c | 102 | ||||
-rw-r--r-- | fs/gfs2/trace_gfs2.h | 16 | ||||
-rw-r--r-- | fs/gfs2/trans.c | 44 | ||||
-rw-r--r-- | fs/gfs2/util.c | 3 | ||||
-rw-r--r-- | fs/gfs2/util.h | 3 |
23 files changed, 588 insertions, 499 deletions
diff --git a/Documentation/filesystems/gfs2-glocks.txt b/Documentation/filesystems/gfs2-glocks.txt index 0494f78d87e4..fcc79957be63 100644 --- a/Documentation/filesystems/gfs2-glocks.txt +++ b/Documentation/filesystems/gfs2-glocks.txt @@ -61,7 +61,9 @@ go_unlock | Called on the final local unlock of a lock go_dump | Called to print content of object for debugfs file, or on | error to dump glock to the log. go_type | The type of the glock, LM_TYPE_..... -go_min_hold_time | The minimum hold time +go_callback | Called if the DLM sends a callback to drop this lock +go_flags | GLOF_ASPACE is set, if the glock has an address space + | associated with it The minimum hold time for each lock is the time after a remote lock grant for which we ignore remote demote requests. This is in order to @@ -89,6 +91,7 @@ go_demote_ok | Sometimes | Yes go_lock | Yes | No go_unlock | Yes | No go_dump | Sometimes | Yes +go_callback | Sometimes (N/A) | Yes N.B. Operations must not drop either the bit lock or the spinlock if its held on entry. go_dump and do_demote_ok must never block. @@ -111,4 +114,118 @@ itself (locking order as above), and the other, known as the iopen glock is used in conjunction with the i_nlink field in the inode to determine the lifetime of the inode in question. Locking of inodes is on a per-inode basis. Locking of rgrps is on a per rgrp basis. +In general we prefer to lock local locks prior to cluster locks. + + Glock Statistics + ------------------ + +The stats are divided into two sets: those relating to the +super block and those relating to an individual glock. The +super block stats are done on a per cpu basis in order to +try and reduce the overhead of gathering them. They are also +further divided by glock type. All timings are in nanoseconds. + +In the case of both the super block and glock statistics, +the same information is gathered in each case. The super +block timing statistics are used to provide default values for +the glock timing statistics, so that newly created glocks +should have, as far as possible, a sensible starting point. +The per-glock counters are initialised to zero when the +glock is created. The per-glock statistics are lost when +the glock is ejected from memory. + +The statistics are divided into three pairs of mean and +variance, plus two counters. The mean/variance pairs are +smoothed exponential estimates and the algorithm used is +one which will be very familiar to those used to calculation +of round trip times in network code. See "TCP/IP Illustrated, +Volume 1", W. Richard Stevens, sect 21.3, "Round-Trip Time Measurement", +p. 299 and onwards. Also, Volume 2, Sect. 25.10, p. 838 and onwards. +Unlike the TCP/IP Illustrated case, the mean and variance are +not scaled, but are in units of integer nanoseconds. + +The three pairs of mean/variance measure the following +things: + + 1. DLM lock time (non-blocking requests) + 2. DLM lock time (blocking requests) + 3. Inter-request time (again to the DLM) + +A non-blocking request is one which will complete right +away, whatever the state of the DLM lock in question. That +currently means any requests when (a) the current state of +the lock is exclusive, i.e. a lock demotion (b) the requested +state is either null or unlocked (again, a demotion) or (c) the +"try lock" flag is set. A blocking request covers all the other +lock requests. + +There are two counters. The first is there primarily to show +how many lock requests have been made, and thus how much data +has gone into the mean/variance calculations. The other counter +is counting queuing of holders at the top layer of the glock +code. Hopefully that number will be a lot larger than the number +of dlm lock requests issued. + +So why gather these statistics? There are several reasons +we'd like to get a better idea of these timings: + +1. To be able to better set the glock "min hold time" +2. To spot performance issues more easily +3. To improve the algorithm for selecting resource groups for +allocation (to base it on lock wait time, rather than blindly +using a "try lock") + +Due to the smoothing action of the updates, a step change in +some input quantity being sampled will only fully be taken +into account after 8 samples (or 4 for the variance) and this +needs to be carefully considered when interpreting the +results. + +Knowing both the time it takes a lock request to complete and +the average time between lock requests for a glock means we +can compute the total percentage of the time for which the +node is able to use a glock vs. time that the rest of the +cluster has its share. That will be very useful when setting +the lock min hold time. + +Great care has been taken to ensure that we +measure exactly the quantities that we want, as accurately +as possible. There are always inaccuracies in any +measuring system, but I hope this is as accurate as we +can reasonably make it. + +Per sb stats can be found here: +/sys/kernel/debug/gfs2/<fsname>/sbstats +Per glock stats can be found here: +/sys/kernel/debug/gfs2/<fsname>/glstats + +Assuming that debugfs is mounted on /sys/kernel/debug and also +that <fsname> is replaced with the name of the gfs2 filesystem +in question. + +The abbreviations used in the output as are follows: + +srtt - Smoothed round trip time for non-blocking dlm requests +srttvar - Variance estimate for srtt +srttb - Smoothed round trip time for (potentially) blocking dlm requests +srttvarb - Variance estimate for srttb +sirt - Smoothed inter-request time (for dlm requests) +sirtvar - Variance estimate for sirt +dlm - Number of dlm requests made (dcnt in glstats file) +queue - Number of glock requests queued (qcnt in glstats file) + +The sbstats file contains a set of these stats for each glock type (so 8 lines +for each type) and for each cpu (one column per cpu). The glstats file contains +a set of these stats for each glock in a similar format to the glocks file, but +using the format mean/variance for each of the timing stats. + +The gfs2_glock_lock_time tracepoint prints out the current values of the stats +for the glock in question, along with some addition information on each dlm +reply that is received: + +status - The status of the dlm request +flags - The dlm request flags +tdiff - The time taken by this specific request +(remaining fields as per above list) + diff --git a/Documentation/filesystems/gfs2.txt b/Documentation/filesystems/gfs2.txt index 4cda926628aa..cc4f2306609e 100644 --- a/Documentation/filesystems/gfs2.txt +++ b/Documentation/filesystems/gfs2.txt @@ -1,7 +1,7 @@ Global File System ------------------ -http://sources.redhat.com/cluster/wiki/ +https://fedorahosted.org/cluster/wiki/HomePage GFS is a cluster file system. It allows a cluster of computers to simultaneously use a block device that is shared between them (with FC, @@ -30,7 +30,8 @@ needed, simply: If you are using Fedora, you need to install the gfs2-utils package and, for lock_dlm, you will also need to install the cman package -and write a cluster.conf as per the documentation. +and write a cluster.conf as per the documentation. For F17 and above +cman has been replaced by the dlm package. GFS2 is not on-disk compatible with previous versions of GFS, but it is pretty close. @@ -39,8 +40,6 @@ The following man pages can be found at the URL above: fsck.gfs2 to repair a filesystem gfs2_grow to expand a filesystem online gfs2_jadd to add journals to a filesystem online - gfs2_tool to manipulate, examine and tune a filesystem - gfs2_quota to examine and change quota values in a filesystem + tunegfs2 to manipulate, examine and tune a filesystem gfs2_convert to convert a gfs filesystem to gfs2 in-place - mount.gfs2 to help mount(8) mount a filesystem mkfs.gfs2 to make a filesystem diff --git a/fs/gfs2/acl.c b/fs/gfs2/acl.c index 230eb0f005b6..bd4a5892c93c 100644 --- a/fs/gfs2/acl.c +++ b/fs/gfs2/acl.c @@ -73,12 +73,8 @@ static int gfs2_set_mode(struct inode *inode, umode_t mode) int error = 0; if (mode != inode->i_mode) { - struct iattr iattr; - - iattr.ia_valid = ATTR_MODE; - iattr.ia_mode = mode; - - error = gfs2_setattr_simple(inode, &iattr); + inode->i_mode = mode; + mark_inode_dirty(inode); } return error; @@ -126,9 +122,7 @@ int gfs2_acl_create(struct gfs2_inode *dip, struct inode *inode) return PTR_ERR(acl); if (!acl) { mode &= ~current_umask(); - if (mode != inode->i_mode) - error = gfs2_set_mode(inode, mode); - return error; + return gfs2_set_mode(inode, mode); } if (S_ISDIR(inode->i_mode)) { diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c index 9b2ff0e851b1..e80a464850c8 100644 --- a/fs/gfs2/aops.c +++ b/fs/gfs2/aops.c @@ -36,8 +36,8 @@ #include "glops.h" -void gfs2_page_add_databufs(struct gfs2_inode *ip, struct page *page, - unsigned int from, unsigned int to) +static void gfs2_page_add_databufs(struct gfs2_inode *ip, struct page *page, + unsigned int from, unsigned int to) { struct buffer_head *head = page_buffers(page); unsigned int bsize = head->b_size; @@ -517,15 +517,14 @@ out: /** * gfs2_internal_read - read an internal file * @ip: The gfs2 inode - * @ra_state: The readahead state (or NULL for no readahead) * @buf: The buffer to fill * @pos: The file position * @size: The amount to read * */ -int gfs2_internal_read(struct gfs2_inode *ip, struct file_ra_state *ra_state, - char *buf, loff_t *pos, unsigned size) +int gfs2_internal_read(struct gfs2_inode *ip, char *buf, loff_t *pos, + unsigned size) { struct address_space *mapping = ip->i_inode.i_mapping; unsigned long index = *pos / PAGE_CACHE_SIZE; @@ -943,8 +942,8 @@ static void gfs2_discard(struct gfs2_sbd *sdp, struct buffer_head *bh) clear_buffer_dirty(bh); bd = bh->b_private; if (bd) { - if (!list_empty(&bd->bd_le.le_list) && !buffer_pinned(bh)) - list_del_init(&bd->bd_le.le_list); + if (!list_empty(&bd->bd_list) && !buffer_pinned(bh)) + list_del_init(&bd->bd_list); else gfs2_remove_from_journal(bh, current->journal_info, 0); } @@ -1084,10 +1083,9 @@ int gfs2_releasepage(struct page *page, gfp_t gfp_mask) bd = bh->b_private; if (bd) { gfs2_assert_warn(sdp, bd->bd_bh == bh); - gfs2_assert_warn(sdp, list_empty(&bd->bd_list_tr)); - if (!list_empty(&bd->bd_le.le_list)) { + if (!list_empty(&bd->bd_list)) { if (!buffer_pinned(bh)) - list_del_init(&bd->bd_le.le_list); + list_del_init(&bd->bd_list); else bd = NULL; } diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c index 03c04febe26f..dab54099dd98 100644 --- a/fs/gfs2/bmap.c +++ b/fs/gfs2/bmap.c @@ -324,7 +324,7 @@ static int lookup_metapath(struct gfs2_inode *ip, struct metapath *mp) if (!dblock) return x + 1; - ret = gfs2_meta_indirect_buffer(ip, x+1, dblock, 0, &mp->mp_bh[x+1]); + ret = gfs2_meta_indirect_buffer(ip, x+1, dblock, &mp->mp_bh[x+1]); if (ret) return ret; } @@ -882,7 +882,7 @@ static int recursive_scan(struct gfs2_inode *ip, struct buffer_head *dibh, top = (__be64 *)(bh->b_data + sizeof(struct gfs2_dinode)) + mp->mp_list[0]; bottom = (__be64 *)(bh->b_data + sizeof(struct gfs2_dinode)) + sdp->sd_diptrs; } else { - error = gfs2_meta_indirect_buffer(ip, height, block, 0, &bh); + error = gfs2_meta_indirect_buffer(ip, height, block, &bh); if (error) return error; @@ -1169,6 +1169,7 @@ static int do_grow(struct inode *inode, u64 size) struct buffer_head *dibh; struct gfs2_qadata *qa = NULL; int error; + int unstuff = 0; if (gfs2_is_stuffed(ip) && (size > (sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode)))) { @@ -1183,13 +1184,14 @@ static int do_grow(struct inode *inode, u64 size) error = gfs2_inplace_reserve(ip, 1); if (error) goto do_grow_qunlock; + unstuff = 1; } error = gfs2_trans_begin(sdp, RES_DINODE + RES_STATFS + RES_RG_BIT, 0); if (error) goto do_grow_release; - if (qa) { + if (unstuff) { error = gfs2_unstuff_dinode(ip, NULL); if (error) goto do_end_trans; @@ -1208,7 +1210,7 @@ static int do_grow(struct inode *inode, u64 size) do_end_trans: gfs2_trans_end(sdp); do_grow_release: - if (qa) { + if (unstuff) { gfs2_inplace_release(ip); do_grow_qunlock: gfs2_quota_unlock(ip); diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c index a3d2c9ee8d66..31b199f6efc1 100644 --- a/fs/gfs2/file.c +++ b/fs/gfs2/file.c @@ -558,14 +558,14 @@ fail: } /** - * gfs2_close - called to close a struct file + * gfs2_release - called to close a struct file * @inode: the inode the struct file belongs to * @file: the struct file being closed * * Returns: errno */ -static int gfs2_close(struct inode *inode, struct file *file) +static int gfs2_release(struct inode *inode, struct file *file) { struct gfs2_sbd *sdp = inode->i_sb->s_fs_info; struct gfs2_file *fp; @@ -1005,7 +1005,7 @@ const struct file_operations gfs2_file_fops = { .unlocked_ioctl = gfs2_ioctl, .mmap = gfs2_mmap, .open = gfs2_open, - .release = gfs2_close, + .release = gfs2_release, .fsync = gfs2_fsync, .lock = gfs2_lock, .flock = gfs2_flock, @@ -1019,7 +1019,7 @@ const struct file_operations gfs2_dir_fops = { .readdir = gfs2_readdir, .unlocked_ioctl = gfs2_ioctl, .open = gfs2_open, - .release = gfs2_close, + .release = gfs2_release, .fsync = gfs2_fsync, .lock = gfs2_lock, .flock = gfs2_flock, @@ -1037,7 +1037,7 @@ const struct file_operations gfs2_file_fops_nolock = { .unlocked_ioctl = gfs2_ioctl, .mmap = gfs2_mmap, .open = gfs2_open, - .release = gfs2_close, + .release = gfs2_release, .fsync = gfs2_fsync, .splice_read = generic_file_splice_read, .splice_write = generic_file_splice_write, @@ -1049,7 +1049,7 @@ const struct file_operations gfs2_dir_fops_nolock = { .readdir = gfs2_readdir, .unlocked_ioctl = gfs2_ioctl, .open = gfs2_open, - .release = gfs2_close, + .release = gfs2_release, .fsync = gfs2_fsync, .llseek = default_llseek, }; diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c index 1656df7aacd2..4bdcf3784187 100644 --- a/fs/gfs2/glops.c +++ b/fs/gfs2/glops.c @@ -94,7 +94,6 @@ static void gfs2_ail_empty_gl(struct gfs2_glock *gl) /* A shortened, inline version of gfs2_trans_begin() */ tr.tr_reserved = 1 + gfs2_struct2blk(sdp, tr.tr_revokes, sizeof(u64)); tr.tr_ip = (unsigned long)__builtin_return_address(0); - INIT_LIST_HEAD(&tr.tr_list_buf); gfs2_log_reserve(sdp, tr.tr_reserved); BUG_ON(current->journal_info); current->journal_info = &tr; @@ -379,11 +378,6 @@ int gfs2_inode_refresh(struct gfs2_inode *ip) if (error) return error; - if (gfs2_metatype_check(GFS2_SB(&ip->i_inode), dibh, GFS2_METATYPE_DI)) { - brelse(dibh); - return -EIO; - } - error = gfs2_dinode_in(ip, dibh->b_data); brelse(dibh); clear_bit(GIF_INVALID, &ip->i_flags); diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index 47d0bda5ac2b..aa9949e5de26 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h @@ -26,7 +26,7 @@ #define DIO_METADATA 0x00000020 struct gfs2_log_operations; -struct gfs2_log_element; +struct gfs2_bufdata; struct gfs2_holder; struct gfs2_glock; struct gfs2_quota_data; @@ -52,7 +52,7 @@ struct gfs2_log_header_host { */ struct gfs2_log_operations { - void (*lo_add) (struct gfs2_sbd *sdp, struct gfs2_log_element *le); + void (*lo_add) (struct gfs2_sbd *sdp, struct gfs2_bufdata *bd); void (*lo_before_commit) (struct gfs2_sbd *sdp); void (*lo_after_commit) (struct gfs2_sbd *sdp, struct gfs2_ail *ai); void (*lo_before_scan) (struct gfs2_jdesc *jd, @@ -64,11 +64,6 @@ struct gfs2_log_operations { const char *lo_name; }; -struct gfs2_log_element { - struct list_head le_list; - const struct gfs2_log_operations *le_ops; -}; - #define GBF_FULL 1 struct gfs2_bitmap { @@ -118,15 +113,10 @@ TAS_BUFFER_FNS(Zeronew, zeronew) struct gfs2_bufdata { struct buffer_head *bd_bh; struct gfs2_glock *bd_gl; + u64 bd_blkno; - union { - struct list_head list_tr; - u64 blkno; - } u; -#define bd_list_tr u.list_tr -#define bd_blkno u.blkno - - struct gfs2_log_element bd_le; + struct list_head bd_list; + const struct gfs2_log_operations *bd_ops; struct gfs2_ail *bd_ail; struct list_head bd_ail_st_list; @@ -411,13 +401,10 @@ struct gfs2_trans { int tr_touched; - unsigned int tr_num_buf; unsigned int tr_num_buf_new; unsigned int tr_num_databuf_new; unsigned int tr_num_buf_rm; unsigned int tr_num_databuf_rm; - struct list_head tr_list_buf; - unsigned int tr_num_revoke; unsigned int tr_num_revoke_rm; }; @@ -699,7 +686,6 @@ struct gfs2_sbd { struct list_head sd_log_le_buf; struct list_head sd_log_le_revoke; - struct list_head sd_log_le_rg; struct list_head sd_log_le_databuf; struct list_head sd_log_le_ordered; @@ -716,7 +702,9 @@ struct gfs2_sbd { struct rw_semaphore sd_log_flush_lock; atomic_t sd_log_in_flight; + struct bio *sd_log_bio; wait_queue_head_t sd_log_flush_wait; + int sd_log_error; unsigned int sd_log_flush_head; u64 sd_log_flush_wrapped; diff --git a/fs/gfs2/inode.h b/fs/gfs2/inode.h index 276e7b52b658..c53c7477f6da 100644 --- a/fs/gfs2/inode.h +++ b/fs/gfs2/inode.h @@ -17,10 +17,7 @@ extern int gfs2_releasepage(struct page *page, gfp_t gfp_mask); extern int gfs2_internal_read(struct gfs2_inode *ip, - struct file_ra_state *ra_state, char *buf, loff_t *pos, unsigned size); -extern void gfs2_page_add_databufs(struct gfs2_inode *ip, struct page *page, - unsigned int from, unsigned int to); extern void gfs2_set_aops(struct inode *inode); static inline int gfs2_is_stuffed(const struct gfs2_inode *ip) diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c index 4752eadc7f6e..f4beeb9c81c1 100644 --- a/fs/gfs2/log.c +++ b/fs/gfs2/log.c @@ -32,8 +32,6 @@ #include "dir.h" #include "trace_gfs2.h" -#define PULL 1 - /** * gfs2_struct2blk - compute stuff * @sdp: the filesystem @@ -359,18 +357,6 @@ retry: return 0; } -u64 gfs2_log_bmap(struct gfs2_sbd *sdp, unsigned int lbn) -{ - struct gfs2_journal_extent *je; - - list_for_each_entry(je, &sdp->sd_jdesc->extent_list, extent_list) { - if (lbn >= je->lblock && lbn < je->lblock + je->blocks) - return je->dblock + lbn - je->lblock; - } - - return -1; -} - /** * log_distance - Compute distance between two journal blocks * @sdp: The GFS2 superblock @@ -466,17 +452,6 @@ static unsigned int current_tail(struct gfs2_sbd *sdp) return tail; } -void gfs2_log_incr_head(struct gfs2_sbd *sdp) -{ - BUG_ON((sdp->sd_log_flush_head == sdp->sd_log_tail) && - (sdp->sd_log_flush_head != sdp->sd_log_head)); - - if (++sdp->sd_log_flush_head == sdp->sd_jdesc->jd_blocks) { - sdp->sd_log_flush_head = 0; - sdp->sd_log_flush_wrapped = 1; - } -} - static void log_pull_tail(struct gfs2_sbd *sdp, unsigned int new_tail) { unsigned int dist = log_distance(sdp, new_tail, sdp->sd_log_tail); @@ -511,8 +486,8 @@ static int bd_cmp(void *priv, struct list_head *a, struct list_head *b) { struct gfs2_bufdata *bda, *bdb; - bda = list_entry(a, struct gfs2_bufdata, bd_le.le_list); - bdb = list_entry(b, struct gfs2_bufdata, bd_le.le_list); + bda = list_entry(a, struct gfs2_bufdata, bd_list); + bdb = list_entry(b, struct gfs2_bufdata, bd_list); if (bda->bd_bh->b_blocknr < bdb->bd_bh->b_blocknr) return -1; @@ -530,8 +505,8 @@ static void gfs2_ordered_write(struct gfs2_sbd *sdp) gfs2_log_lock(sdp); list_sort(NULL, &sdp->sd_log_le_ordered, &bd_cmp); while (!list_empty(&sdp->sd_log_le_ordered)) { - bd = list_entry(sdp->sd_log_le_ordered.next, struct gfs2_bufdata, bd_le.le_list); - list_move(&bd->bd_le.le_list, &written); + bd = list_entry(sdp->sd_log_le_ordered.next, struct gfs2_bufdata, bd_list); + list_move(&bd->bd_list, &written); bh = bd->bd_bh; if (!buffer_dirty(bh)) continue; @@ -558,7 +533,7 @@ static void gfs2_ordered_wait(struct gfs2_sbd *sdp) gfs2_log_lock(sdp); while (!list_empty(&sdp->sd_log_le_ordered)) { - bd = list_entry(sdp->sd_log_le_ordered.prev, struct gfs2_bufdata, bd_le.le_list); + bd = list_entry(sdp->sd_log_le_ordered.prev, struct gfs2_bufdata, bd_list); bh = bd->bd_bh; if (buffer_locked(bh)) { get_bh(bh); @@ -568,7 +543,7 @@ static void gfs2_ordered_wait(struct gfs2_sbd *sdp) gfs2_log_lock(sdp); continue; } - list_del_init(&bd->bd_le.le_list); + list_del_init(&bd->bd_list); } gfs2_log_unlock(sdp); } @@ -580,25 +555,19 @@ static void gfs2_ordered_wait(struct gfs2_sbd *sdp) * Returns: the initialized log buffer descriptor */ -static void log_write_header(struct gfs2_sbd *sdp, u32 flags, int pull) +static void log_write_header(struct gfs2_sbd *sdp, u32 flags) { - u64 blkno = gfs2_log_bmap(sdp, sdp->sd_log_flush_head); - struct buffer_head *bh; struct gfs2_log_header *lh; unsigned int tail; u32 hash; - - bh = sb_getblk(sdp->sd_vfs, blkno); - lock_buffer(bh); - memset(bh->b_data, 0, bh->b_size); - set_buffer_uptodate(bh); - clear_buffer_dirty(bh); + int rw = WRITE_FLUSH_FUA | REQ_META; + struct page *page = mempool_alloc(gfs2_page_pool, GFP_NOIO); + lh = page_address(page); + clear_page(lh); gfs2_ail1_empty(sdp); tail = current_tail(sdp); - lh = (struct gfs2_log_header *)bh->b_data; - memset(lh, 0, sizeof(struct gfs2_log_header)); lh->lh_header.mh_magic = cpu_to_be32(GFS2_MAGIC); lh->lh_header.mh_type = cpu_to_be32(GFS2_METATYPE_LH); lh->lh_header.__pad0 = cpu_to_be64(0); @@ -608,31 +577,22 @@ static void log_write_header(struct gfs2_sbd *sdp, u32 flags, int pull) lh->lh_flags = cpu_to_be32(flags); lh->lh_tail = cpu_to_be32(tail); lh->lh_blkno = cpu_to_be32(sdp->sd_log_flush_head); - hash = gfs2_disk_hash(bh->b_data, sizeof(struct gfs2_log_header)); + hash = gfs2_disk_hash(page_address(page), sizeof(struct gfs2_log_header)); lh->lh_hash = cpu_to_be32(hash); - bh->b_end_io = end_buffer_write_sync; - get_bh(bh); if (test_bit(SDF_NOBARRIERS, &sdp->sd_flags)) { gfs2_ordered_wait(sdp); log_flush_wait(sdp); - submit_bh(WRITE_SYNC | REQ_META | REQ_PRIO, bh); - } else { - submit_bh(WRITE_FLUSH_FUA | REQ_META, bh); + rw = WRITE_SYNC | REQ_META | REQ_PRIO; } - wait_on_buffer(bh); - if (!buffer_uptodate(bh)) - gfs2_io_error_bh(sdp, bh); - brelse(bh); + sdp->sd_log_idle = (tail == sdp->sd_log_flush_head); + gfs2_log_write_page(sdp, page); + gfs2_log_flush_bio(sdp, rw); + log_flush_wait(sdp); if (sdp->sd_log_tail != tail) log_pull_tail(sdp, tail); - else - gfs2_assert_withdraw(sdp, !pull); - - sdp->sd_log_idle = (tail == sdp->sd_log_flush_head); - gfs2_log_incr_head(sdp); } /** @@ -678,15 +638,14 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl) gfs2_ordered_write(sdp); lops_before_commit(sdp); + gfs2_log_flush_bio(sdp, WRITE); if (sdp->sd_log_head != sdp->sd_log_flush_head) { - log_write_header(sdp, 0, 0); + log_write_header(sdp, 0); } else if (sdp->sd_log_tail != current_tail(sdp) && !sdp->sd_log_idle){ - gfs2_log_lock(sdp); atomic_dec(&sdp->sd_log_blks_free); /* Adjust for unreserved buffer */ trace_gfs2_log_blocks(sdp, -1); - gfs2_log_unlock(sdp); - log_write_header(sdp, 0, PULL); + log_write_header(sdp, 0); } lops_after_commit(sdp, ai); @@ -735,21 +694,6 @@ static void log_refund(struct gfs2_sbd *sdp, struct gfs2_trans *tr) gfs2_log_unlock(sdp); } -static void buf_lo_incore_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr) -{ - struct list_head *head = &tr->tr_list_buf; - struct gfs2_bufdata *bd; - - gfs2_log_lock(sdp); - while (!list_empty(head)) { - bd = list_entry(head->next, struct gfs2_bufdata, bd_list_tr); - list_del_init(&bd->bd_list_tr); - tr->tr_num_buf--; - } - gfs2_log_unlock(sdp); - gfs2_assert_warn(sdp, !tr->tr_num_buf); -} - /** * gfs2_log_commit - Commit a transaction to the log * @sdp: the filesystem @@ -768,8 +712,6 @@ static void buf_lo_incore_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr) void gfs2_log_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr) { log_refund(sdp, tr); - buf_lo_incore_commit(sdp, tr); - up_read(&sdp->sd_log_flush_lock); if (atomic_read(&sdp->sd_log_pinned) > atomic_read(&sdp->sd_log_thresh1) || @@ -798,8 +740,7 @@ void gfs2_log_shutdown(struct gfs2_sbd *sdp) sdp->sd_log_flush_head = sdp->sd_log_head; sdp->sd_log_flush_wrapped = 0; - log_write_header(sdp, GFS2_LOG_HEAD_UNMOUNT, - (sdp->sd_log_tail == current_tail(sdp)) ? 0 : PULL); + log_write_header(sdp, GFS2_LOG_HEAD_UNMOUNT); gfs2_assert_warn(sdp, atomic_read(&sdp->sd_log_blks_free) == sdp->sd_jdesc->jd_blocks); gfs2_assert_warn(sdp, sdp->sd_log_head == sdp->sd_log_tail); @@ -854,11 +795,9 @@ int gfs2_logd(void *data) struct gfs2_sbd *sdp = data; unsigned long t = 1; DEFINE_WAIT(wait); - unsigned preflush; while (!kthread_should_stop()) { - preflush = atomic_read(&sdp->sd_log_pinned); if (gfs2_jrnl_flush_reqd(sdp) || t == 0) { gfs2_ail1_empty(sdp); gfs2_log_flush(sdp, NULL); diff --git a/fs/gfs2/log.h b/fs/gfs2/log.h index ff07454b582c..3fd5215ea25f 100644 --- a/fs/gfs2/log.h +++ b/fs/gfs2/log.h @@ -52,8 +52,6 @@ extern unsigned int gfs2_struct2blk(struct gfs2_sbd *sdp, unsigned int nstruct, unsigned int ssize); extern int gfs2_log_reserve(struct gfs2_sbd *sdp, unsigned int blks); -extern void gfs2_log_incr_head(struct gfs2_sbd *sdp); -extern u64 gfs2_log_bmap(struct gfs2_sbd *sdp, unsigned int lbn); extern void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl); extern void gfs2_log_commit(struct gfs2_sbd *sdp, struct gfs2_trans *trans); extern void gfs2_remove_from_ail(struct gfs2_bufdata *bd); diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c index 6b1efb594d90..852c1be1dd3b 100644 --- a/fs/gfs2/lops.c +++ b/fs/gfs2/lops.c @@ -127,146 +127,277 @@ static void gfs2_unpin(struct gfs2_sbd *sdp, struct buffer_head *bh, atomic_dec(&sdp->sd_log_pinned); } - -static inline struct gfs2_log_descriptor *bh_log_desc(struct buffer_head *bh) +static void gfs2_log_incr_head(struct gfs2_sbd *sdp) { - return (struct gfs2_log_descriptor *)bh->b_data; + BUG_ON((sdp->sd_log_flush_head == sdp->sd_log_tail) && + (sdp->sd_log_flush_head != sdp->sd_log_head)); + + if (++sdp->sd_log_flush_head == sdp->sd_jdesc->jd_blocks) { + sdp->sd_log_flush_head = 0; + sdp->sd_log_flush_wrapped = 1; + } } -static inline __be64 *bh_log_ptr(struct buffer_head *bh) +static u64 gfs2_log_bmap(struct gfs2_sbd *sdp) { - struct gfs2_log_descriptor *ld = bh_log_desc(bh); - return (__force __be64 *)(ld + 1); + unsigned int lbn = sdp->sd_log_flush_head; + struct gfs2_journal_extent *je; + u64 block; + + list_for_each_entry(je, &sdp->sd_jdesc->extent_list, extent_list) { + if (lbn >= je->lblock && lbn < je->lblock + je->blocks) { + block = je->dblock + lbn - je->lblock; + gfs2_log_incr_head(sdp); + return block; + } + } + + return -1; } -static inline __be64 *bh_ptr_end(struct buffer_head *bh) +/** + * gfs2_end_log_write_bh - end log write of pagecache data with buffers + * @sdp: The superblock + * @bvec: The bio_vec + * @error: The i/o status + * + * This finds the relavent buffers and unlocks then and sets the + * error flag according to the status of the i/o request. This is + * used when the log is writing data which has an in-place version + * that is pinned in the pagecache. + */ + +static void gfs2_end_log_write_bh(struct gfs2_sbd *sdp, struct bio_vec *bvec, + int error) { - return (__force __be64 *)(bh->b_data + bh->b_size); + struct buffer_head *bh, *next; + struct page *page = bvec->bv_page; + unsigned size; + + bh = page_buffers(page); + size = bvec->bv_len; + while (bh_offset(bh) < bvec->bv_offset) + bh = bh->b_this_page; + do { + if (error) + set_buffer_write_io_error(bh); + unlock_buffer(bh); + next = bh->b_this_page; + size -= bh->b_size; + brelse(bh); + bh = next; + } while(bh && size); } /** - * gfs2_log_write_endio - End of I/O for a log buffer - * @bh: The buffer head - * @uptodate: I/O Status + * gfs2_end_log_write - end of i/o to the log + * @bio: The bio + * @error: Status of i/o request + * + * Each bio_vec contains either data from the pagecache or data + * relating to the log itself. Here we iterate over the bio_vec + * array, processing both kinds of data. * */ -static void gfs2_log_write_endio(struct buffer_head *bh, int uptodate) +static void gfs2_end_log_write(struct bio *bio, int error) { - struct gfs2_sbd *sdp = bh->b_private; - bh->b_private = NULL; + struct gfs2_sbd *sdp = bio->bi_private; + struct bio_vec *bvec; + struct page *page; + int i; - end_buffer_write_sync(bh, uptodate); + if (error) { + sdp->sd_log_error = error; + fs_err(sdp, "Error %d writing to log\n", error); + } + + bio_for_each_segment(bvec, bio, i) { + page = bvec->bv_page; + if (page_has_buffers(page)) + gfs2_end_log_write_bh(sdp, bvec, error); + else + mempool_free(page, gfs2_page_pool); + } + + bio_put(bio); if (atomic_dec_and_test(&sdp->sd_log_in_flight)) wake_up(&sdp->sd_log_flush_wait); } /** - * gfs2_log_get_buf - Get and initialize a buffer to use for log control data - * @sdp: The GFS2 superblock + * gfs2_log_flush_bio - Submit any pending log bio + * @sdp: The superblock + * @rw: The rw flags * - * tReturns: the buffer_head + * Submit any pending part-built or full bio to the block device. If + * there is no pending bio, then this is a no-op. */ -static struct buffer_head *gfs2_log_get_buf(struct gfs2_sbd *sdp) +void gfs2_log_flush_bio(struct gfs2_sbd *sdp, int rw) { - u64 blkno = gfs2_log_bmap(sdp, sdp->sd_log_flush_head); - struct buffer_head *bh; + if (sdp->sd_log_bio) { + atomic_inc(&sdp->sd_log_in_flight); + submit_bio(rw, sdp->sd_log_bio); + sdp->sd_log_bio = NULL; + } +} - bh = sb_getblk(sdp->sd_vfs, blkno); - lock_buffer(bh); - memset(bh->b_data, 0, bh->b_size); - set_buffer_uptodate(bh); - clear_buffer_dirty(bh); - gfs2_log_incr_head(sdp); - atomic_inc(&sdp->sd_log_in_flight); - bh->b_private = sdp; - bh->b_end_io = gfs2_log_write_endio; +/** + * gfs2_log_alloc_bio - Allocate a new bio for log writing + * @sdp: The superblock + * @blkno: The next device block number we want to write to + * + * This should never be called when there is a cached bio in the + * super block. When it returns, there will be a cached bio in the + * super block which will have as many bio_vecs as the device is + * happy to handle. + * + * Returns: Newly allocated bio + */ - return bh; +static struct bio *gfs2_log_alloc_bio(struct gfs2_sbd *sdp, u64 blkno) +{ + struct super_block *sb = sdp->sd_vfs; + unsigned nrvecs = bio_get_nr_vecs(sb->s_bdev); + struct bio *bio; + + BUG_ON(sdp->sd_log_bio); + + while (1) { + bio = bio_alloc(GFP_NOIO, nrvecs); + if (likely(bio)) + break; + nrvecs = max(nrvecs/2, 1U); + } + + bio->bi_sector = blkno * (sb->s_blocksize >> 9); + bio->bi_bdev = sb->s_bdev; + bio->bi_end_io = gfs2_end_log_write; + bio->bi_private = sdp; + + sdp->sd_log_bio = bio; + + return bio; } /** - * gfs2_fake_write_endio - - * @bh: The buffer head - * @uptodate: The I/O Status + * gfs2_log_get_bio - Get cached log bio, or allocate a new one + * @sdp: The superblock + * @blkno: The device block number we want to write to + * + * If there is a cached bio, then if the next block number is sequential + * with the previous one, return it, otherwise flush the bio to the + * device. If there is not a cached bio, or we just flushed it, then + * allocate a new one. * + * Returns: The bio to use for log writes */ -static void gfs2_fake_write_endio(struct buffer_head *bh, int uptodate) +static struct bio *gfs2_log_get_bio(struct gfs2_sbd *sdp, u64 blkno) { - struct buffer_head *real_bh = bh->b_private; - struct gfs2_bufdata *bd = real_bh->b_private; - struct gfs2_sbd *sdp = bd->bd_gl->gl_sbd; + struct bio *bio = sdp->sd_log_bio; + u64 nblk; + + if (bio) { + nblk = bio->bi_sector + bio_sectors(bio); + nblk >>= sdp->sd_fsb2bb_shift; + if (blkno == nblk) + return bio; + gfs2_log_flush_bio(sdp, WRITE); + } - end_buffer_write_sync(bh, uptodate); - mempool_free(bh, gfs2_bh_pool); - unlock_buffer(real_bh); - brelse(real_bh); - if (atomic_dec_and_test(&sdp->sd_log_in_flight)) - wake_up(&sdp->sd_log_flush_wait); + return gfs2_log_alloc_bio(sdp, blkno); } + /** - * gfs2_log_fake_buf - Build a fake buffer head to write metadata buffer to log + * gfs2_log_write - write to log * @sdp: the filesystem - * @data: the data the buffer_head should point to + * @page: the page to write + * @size: the size of the data to write + * @offset: the offset within the page * - * Returns: the log buffer descriptor + * Try and add the page segment to the current bio. If that fails, + * submit the current bio to the device and create a new one, and + * then add the page segment to that. */ -static struct buffer_head *gfs2_log_fake_buf(struct gfs2_sbd *sdp, - struct buffer_head *real) +static void gfs2_log_write(struct gfs2_sbd *sdp, struct page *page, + unsigned size, unsigned offset) { - u64 blkno = gfs2_log_bmap(sdp, sdp->sd_log_flush_head); - struct buffer_head *bh; + u64 blkno = gfs2_log_bmap(sdp); + struct bio *bio; + int ret; + + bio = gfs2_log_get_bio(sdp, blkno); + ret = bio_add_page(bio, page, size, offset); + if (ret == 0) { + gfs2_log_flush_bio(sdp, WRITE); + bio = gfs2_log_alloc_bio(sdp, blkno); + ret = bio_add_page(bio, page, size, offset); + WARN_ON(ret == 0); + } +} + +/** + * gfs2_log_write_bh - write a buffer's content to the log + * @sdp: The super block + * @bh: The buffer pointing to the in-place location + * + * This writes the content of the buffer to the next available location + * in the log. The buffer will be unlocked once the i/o to the log has + * completed. + */ - bh = mempool_alloc(gfs2_bh_pool, GFP_NOFS); - atomic_set(&bh->b_count, 1); - bh->b_state = (1 << BH_Mapped) | (1 << BH_Uptodate) | (1 << BH_Lock); - set_bh_page(bh, real->b_page, bh_offset(real)); - bh->b_blocknr = blkno; - bh->b_size = sdp->sd_sb.sb_bsize; - bh->b_bdev = sdp->sd_vfs->s_bdev; - bh->b_private = real; - bh->b_end_io = gfs2_fake_write_endio; +static void gfs2_log_write_bh(struct gfs2_sbd *sdp, struct buffer_head *bh) +{ + gfs2_log_write(sdp, bh->b_page, bh->b_size, bh_offset(bh)); +} - gfs2_log_incr_head(sdp); - atomic_inc(&sdp->sd_log_in_flight); +/** + * gfs2_log_write_page - write one block stored in a page, into the log + * @sdp: The superblock + * @page: The struct page + * + * This writes the first block-sized part of the page into the log. Note + * that the page must have been allocated from the gfs2_page_pool mempool + * and that after this has been called, ownership has been transferred and + * the page may be freed at any time. + */ - return bh; +void gfs2_log_write_page(struct gfs2_sbd *sdp, struct page *page) +{ + struct super_block *sb = sdp->sd_vfs; + gfs2_log_write(sdp, page, sb->s_blocksize, 0); } -static struct buffer_head *gfs2_get_log_desc(struct gfs2_sbd *sdp, u32 ld_type) +static struct page *gfs2_get_log_desc(struct gfs2_sbd *sdp, u32 ld_type, + u32 ld_length, u32 ld_data1) { - struct buffer_head *bh = gfs2_log_get_buf(sdp); - struct gfs2_log_descriptor *ld = bh_log_desc(bh); + struct page *page = mempool_alloc(gfs2_page_pool, GFP_NOIO); + struct gfs2_log_descriptor *ld = page_address(page); + clear_page(ld); ld->ld_header.mh_magic = cpu_to_be32(GFS2_MAGIC); ld->ld_header.mh_type = cpu_to_be32(GFS2_METATYPE_LD); ld->ld_header.mh_format = cpu_to_be32(GFS2_FORMAT_LD); ld->ld_type = cpu_to_be32(ld_type); - ld->ld_length = 0; - ld->ld_data1 = 0; + ld->ld_length = cpu_to_be32(ld_length); + ld->ld_data1 = cpu_to_be32(ld_data1); ld->ld_data2 = 0; - memset(ld->ld_reserved, 0, sizeof(ld->ld_reserved)); - return bh; + return page; } -static void buf_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le) +static void buf_lo_add(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd) { - struct gfs2_bufdata *bd = container_of(le, struct gfs2_bufdata, bd_le); struct gfs2_meta_header *mh; struct gfs2_trans *tr; lock_buffer(bd->bd_bh); gfs2_log_lock(sdp); - if (!list_empty(&bd->bd_list_tr)) - goto out; tr = current->journal_info; tr->tr_touched = 1; - tr->tr_num_buf++; - list_add(&bd->bd_list_tr, &tr->tr_list_buf); - if (!list_empty(&le->le_list)) + if (!list_empty(&bd->bd_list)) goto out; set_bit(GLF_LFLUSH, &bd->bd_gl->gl_flags); set_bit(GLF_DIRTY, &bd->bd_gl->gl_flags); @@ -276,62 +407,86 @@ static void buf_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le) mh->__pad0 = cpu_to_be64(0); mh->mh_jid = cpu_to_be32(sdp->sd_jdesc->jd_jid); sdp->sd_log_num_buf++; - list_add(&le->le_list, &sdp->sd_log_le_buf); + list_add(&bd->bd_list, &sdp->sd_log_le_buf); tr->tr_num_buf_new++; out: gfs2_log_unlock(sdp); unlock_buffer(bd->bd_bh); } -static void buf_lo_before_commit(struct gfs2_sbd *sdp) +static void gfs2_check_magic(struct buffer_head *bh) +{ + void *kaddr; + __be32 *ptr; + + clear_buffer_escaped(bh); + kaddr = kmap_atomic(bh->b_page); + ptr = kaddr + bh_offset(bh); + if (*ptr == cpu_to_be32(GFS2_MAGIC)) + set_buffer_escaped(bh); + kunmap_atomic(kaddr); +} + +static void gfs2_before_commit(struct gfs2_sbd *sdp, unsigned int limit, + unsigned int total, struct list_head *blist, + bool is_databuf) { - struct buffer_head *bh; struct gfs2_log_descriptor *ld; struct gfs2_bufdata *bd1 = NULL, *bd2; - unsigned int total; - unsigned int limit; + struct page *page; unsigned int num; unsigned n; __be64 *ptr; - limit = buf_limit(sdp); - /* for 4k blocks, limit = 503 */ - gfs2_log_lock(sdp); - total = sdp->sd_log_num_buf; - bd1 = bd2 = list_prepare_entry(bd1, &sdp->sd_log_le_buf, bd_le.le_list); + bd1 = bd2 = list_prepare_entry(bd1, blist, bd_list); while(total) { num = total; if (total > limit) num = limit; gfs2_log_unlock(sdp); - bh = gfs2_get_log_desc(sdp, GFS2_LOG_DESC_METADATA); + page = gfs2_get_log_desc(sdp, GFS2_LOG_DESC_METADATA, num + 1, num); + ld = page_address(page); gfs2_log_lock(sdp); - ld = bh_log_desc(bh); - ptr = bh_log_ptr(bh); - ld->ld_length = cpu_to_be32(num + 1); - ld->ld_data1 = cpu_to_be32(num); + ptr = (__be64 *)(ld + 1); n = 0; - list_for_each_entry_continue(bd1, &sdp->sd_log_le_buf, - bd_le.le_list) { + list_for_each_entry_continue(bd1, blist, bd_list) { *ptr++ = cpu_to_be64(bd1->bd_bh->b_blocknr); + if (is_databuf) { + gfs2_check_magic(bd1->bd_bh); + *ptr++ = cpu_to_be64(buffer_escaped(bd1->bd_bh) ? 1 : 0); + } if (++n >= num) break; } gfs2_log_unlock(sdp); - submit_bh(WRITE_SYNC, bh); + gfs2_log_write_page(sdp, page); gfs2_log_lock(sdp); n = 0; - list_for_each_entry_continue(bd2, &sdp->sd_log_le_buf, - bd_le.le_list) { + list_for_each_entry_continue(bd2, blist, bd_list) { get_bh(bd2->bd_bh); gfs2_log_unlock(sdp); lock_buffer(bd2->bd_bh); - bh = gfs2_log_fake_buf(sdp, bd2->bd_bh); - submit_bh(WRITE_SYNC, bh); + + if (buffer_escaped(bd2->bd_bh)) { + void *kaddr; + page = mempool_alloc(gfs2_page_pool, GFP_NOIO); + ptr = page_address(page); + kaddr = kmap_atomic(bd2->bd_bh->b_page); + memcpy(ptr, kaddr + bh_offset(bd2->bd_bh), + bd2->bd_bh->b_size); + kunmap_atomic(kaddr); + *(__be32 *)ptr = 0; + clear_buffer_escaped(bd2->bd_bh); + unlock_buffer(bd2->bd_bh); + brelse(bd2->bd_bh); + gfs2_log_write_page(sdp, page); + } else { + gfs2_log_write_bh(sdp, bd2->bd_bh); + } gfs2_log_lock(sdp); if (++n >= num) break; @@ -343,14 +498,22 @@ static void buf_lo_before_commit(struct gfs2_sbd *sdp) gfs2_log_unlock(sdp); } +static void buf_lo_before_commit(struct gfs2_sbd *sdp) +{ + unsigned int limit = buf_limit(sdp); /* 503 for 4k blocks */ + + gfs2_before_commit(sdp, limit, sdp->sd_log_num_buf, + &sdp->sd_log_le_buf, 0); +} + static void buf_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_ail *ai) { struct list_head *head = &sdp->sd_log_le_buf; struct gfs2_bufdata *bd; while (!list_empty(head)) { - bd = list_entry(head->next, struct gfs2_bufdata, bd_le.le_list); - list_del_init(&bd->bd_le.le_list); + bd = list_entry(head->next, struct gfs2_bufdata, bd_list); + list_del_init(&bd->bd_list); sdp->sd_log_num_buf--; gfs2_unpin(sdp, bd->bd_bh, ai); @@ -437,9 +600,8 @@ static void buf_lo_after_scan(struct gfs2_jdesc *jd, int error, int pass) jd->jd_jid, sdp->sd_replayed_blocks, sdp->sd_found_blocks); } -static void revoke_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le) +static void revoke_lo_add(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd) { - struct gfs2_bufdata *bd = container_of(le, struct gfs2_bufdata, bd_le); struct gfs2_glock *gl = bd->bd_gl; struct gfs2_trans *tr; @@ -449,48 +611,48 @@ static void revoke_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le) sdp->sd_log_num_revoke++; atomic_inc(&gl->gl_revokes); set_bit(GLF_LFLUSH, &gl->gl_flags); - list_add(&le->le_list, &sdp->sd_log_le_revoke); + list_add(&bd->bd_list, &sdp->sd_log_le_revoke); } static void revoke_lo_before_commit(struct gfs2_sbd *sdp) { struct gfs2_log_descriptor *ld; struct gfs2_meta_header *mh; - struct buffer_head *bh; unsigned int offset; struct list_head *head = &sdp->sd_log_le_revoke; struct gfs2_bufdata *bd; + struct page *page; + unsigned int length; if (!sdp->sd_log_num_revoke) return; - bh = gfs2_get_log_desc(sdp, GFS2_LOG_DESC_REVOKE); - ld = bh_log_desc(bh); - ld->ld_length = cpu_to_be32(gfs2_struct2blk(sdp, sdp->sd_log_num_revoke, - sizeof(u64))); - ld->ld_data1 = cpu_to_be32(sdp->sd_log_num_revoke); + length = gfs2_struct2blk(sdp, sdp->sd_log_num_revoke, sizeof(u64)); + page = gfs2_get_log_desc(sdp, GFS2_LOG_DESC_REVOKE, length, sdp->sd_log_num_revoke); + ld = page_address(page); offset = sizeof(struct gfs2_log_descriptor); - list_for_each_entry(bd, head, bd_le.le_list) { + list_for_each_entry(bd, head, bd_list) { sdp->sd_log_num_revoke--; if (offset + sizeof(u64) > sdp->sd_sb.sb_bsize) { - submit_bh(WRITE_SYNC, bh); - bh = gfs2_log_get_buf(sdp); - mh = (struct gfs2_meta_header *)bh->b_data; + gfs2_log_write_page(sdp, page); + page = mempool_alloc(gfs2_page_pool, GFP_NOIO); + mh = page_address(page); + clear_page(mh); mh->mh_magic = cpu_to_be32(GFS2_MAGIC); mh->mh_type = cpu_to_be32(GFS2_METATYPE_LB); mh->mh_format = cpu_to_be32(GFS2_FORMAT_LB); offset = sizeof(struct gfs2_meta_header); } - *(__be64 *)(bh->b_data + offset) = cpu_to_be64(bd->bd_blkno); + *(__be64 *)(page_address(page) + offset) = cpu_to_be64(bd->bd_blkno); offset += sizeof(u64); } gfs2_assert_withdraw(sdp, !sdp->sd_log_num_revoke); - submit_bh(WRITE_SYNC, bh); + gfs2_log_write_page(sdp, page); } static void revoke_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_ail *ai) @@ -500,8 +662,8 @@ static void revoke_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_ail *ai) struct gfs2_glock *gl; while (!list_empty(head)) { - bd = list_entry(head->next, struct gfs2_bufdata, bd_le.le_list); - list_del_init(&bd->bd_le.le_list); + bd = list_entry(head->next, struct gfs2_bufdata, bd_list); + list_del_init(&bd->bd_list); gl = bd->bd_gl; atomic_dec(&gl->gl_revokes); clear_bit(GLF_LFLUSH, &gl->gl_flags); @@ -604,108 +766,33 @@ static void revoke_lo_after_scan(struct gfs2_jdesc *jd, int error, int pass) * blocks, which isn't an enormous overhead but twice as much as * for normal metadata blocks. */ -static void databuf_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le) +static void databuf_lo_add(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd) { - struct gfs2_bufdata *bd = container_of(le, struct gfs2_bufdata, bd_le); struct gfs2_trans *tr = current->journal_info; struct address_space *mapping = bd->bd_bh->b_page->mapping; struct gfs2_inode *ip = GFS2_I(mapping->host); lock_buffer(bd->bd_bh); gfs2_log_lock(sdp); - if (tr) { - if (!list_empty(&bd->bd_list_tr)) - goto out; + if (tr) tr->tr_touched = 1; - if (gfs2_is_jdata(ip)) { - tr->tr_num_buf++; - list_add(&bd->bd_list_tr, &tr->tr_list_buf); - } - } - if (!list_empty(&le->le_list)) + if (!list_empty(&bd->bd_list)) goto out; - set_bit(GLF_LFLUSH, &bd->bd_gl->gl_flags); set_bit(GLF_DIRTY, &bd->bd_gl->gl_flags); if (gfs2_is_jdata(ip)) { gfs2_pin(sdp, bd->bd_bh); tr->tr_num_databuf_new++; sdp->sd_log_num_databuf++; - list_add_tail(&le->le_list, &sdp->sd_log_le_databuf); + list_add_tail(&bd->bd_list, &sdp->sd_log_le_databuf); } else { - list_add_tail(&le->le_list, &sdp->sd_log_le_ordered); + list_add_tail(&bd->bd_list, &sdp->sd_log_le_ordered); } out: gfs2_log_unlock(sdp); unlock_buffer(bd->bd_bh); } -static void gfs2_check_magic(struct buffer_head *bh) -{ - void *kaddr; - __be32 *ptr; - - clear_buffer_escaped(bh); - kaddr = kmap_atomic(bh->b_page); - ptr = kaddr + bh_offset(bh); - if (*ptr == cpu_to_be32(GFS2_MAGIC)) - set_buffer_escaped(bh); - kunmap_atomic(kaddr); -} - -static void gfs2_write_blocks(struct gfs2_sbd *sdp, struct buffer_head *bh, - struct list_head *list, struct list_head *done, - unsigned int n) -{ - struct buffer_head *bh1; - struct gfs2_log_descriptor *ld; - struct gfs2_bufdata *bd; - __be64 *ptr; - - if (!bh) - return; - - ld = bh_log_desc(bh); - ld->ld_length = cpu_to_be32(n + 1); - ld->ld_data1 = cpu_to_be32(n); - - ptr = bh_log_ptr(bh); - - get_bh(bh); - submit_bh(WRITE_SYNC, bh); - gfs2_log_lock(sdp); - while(!list_empty(list)) { - bd = list_entry(list->next, struct gfs2_bufdata, bd_le.le_list); - list_move_tail(&bd->bd_le.le_list, done); - get_bh(bd->bd_bh); - while (be64_to_cpu(*ptr) != bd->bd_bh->b_blocknr) { - gfs2_log_incr_head(sdp); - ptr += 2; - } - gfs2_log_unlock(sdp); - lock_buffer(bd->bd_bh); - if (buffer_escaped(bd->bd_bh)) { - void *kaddr; - bh1 = gfs2_log_get_buf(sdp); - kaddr = kmap_atomic(bd->bd_bh->b_page); - memcpy(bh1->b_data, kaddr + bh_offset(bd->bd_bh), - bh1->b_size); - kunmap_atomic(kaddr); - *(__be32 *)bh1->b_data = 0; - clear_buffer_escaped(bd->bd_bh); - unlock_buffer(bd->bd_bh); - brelse(bd->bd_bh); - } else { - bh1 = gfs2_log_fake_buf(sdp, bd->bd_bh); - } - submit_bh(WRITE_SYNC, bh1); - gfs2_log_lock(sdp); - ptr += 2; - } - gfs2_log_unlock(sdp); - brelse(bh); -} - /** * databuf_lo_before_commit - Scan the data buffers, writing as we go * @@ -713,37 +800,10 @@ static void gfs2_write_blocks(struct gfs2_sbd *sdp, struct buffer_head *bh, static void databuf_lo_before_commit(struct gfs2_sbd *sdp) { - struct gfs2_bufdata *bd = NULL; - struct buffer_head *bh = NULL; - unsigned int n = 0; - __be64 *ptr = NULL, *end = NULL; - LIST_HEAD(processed); - LIST_HEAD(in_progress); + unsigned int limit = buf_limit(sdp) / 2; - gfs2_log_lock(sdp); - while (!list_empty(&sdp->sd_log_le_databuf)) { - if (ptr == end) { - gfs2_log_unlock(sdp); - gfs2_write_blocks(sdp, bh, &in_progress, &processed, n); - n = 0; - bh = gfs2_get_log_desc(sdp, GFS2_LOG_DESC_JDATA); - ptr = bh_log_ptr(bh); - end = bh_ptr_end(bh) - 1; - gfs2_log_lock(sdp); - continue; - } - bd = list_entry(sdp->sd_log_le_databuf.next, struct gfs2_bufdata, bd_le.le_list); - list_move_tail(&bd->bd_le.le_list, &in_progress); - gfs2_check_magic(bd->bd_bh); - *ptr++ = cpu_to_be64(bd->bd_bh->b_blocknr); - *ptr++ = cpu_to_be64(buffer_escaped(bh) ? 1 : 0); - n++; - } - gfs2_log_unlock(sdp); - gfs2_write_blocks(sdp, bh, &in_progress, &processed, n); - gfs2_log_lock(sdp); - list_splice(&processed, &sdp->sd_log_le_databuf); - gfs2_log_unlock(sdp); + gfs2_before_commit(sdp, limit, sdp->sd_log_num_databuf, + &sdp->sd_log_le_databuf, 1); } static int databuf_lo_scan_elements(struct gfs2_jdesc *jd, unsigned int start, @@ -822,8 +882,8 @@ static void databuf_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_ail *ai) struct gfs2_bufdata *bd; while (!list_empty(head)) { - bd = list_entry(head->next, struct gfs2_bufdata, bd_le.le_list); - list_del_init(&bd->bd_le.le_list); + bd = list_entry(head->next, struct gfs2_bufdata, bd_list); + list_del_init(&bd->bd_list); sdp->sd_log_num_databuf--; gfs2_unpin(sdp, bd->bd_bh, ai); } diff --git a/fs/gfs2/lops.h b/fs/gfs2/lops.h index 3c0b2737658a..954a330585f4 100644 --- a/fs/gfs2/lops.h +++ b/fs/gfs2/lops.h @@ -27,6 +27,8 @@ extern const struct gfs2_log_operations gfs2_rg_lops; extern const struct gfs2_log_operations gfs2_databuf_lops; extern const struct gfs2_log_operations *gfs2_log_ops[]; +extern void gfs2_log_write_page(struct gfs2_sbd *sdp, struct page *page); +extern void gfs2_log_flush_bio(struct gfs2_sbd *sdp, int rw); static inline unsigned int buf_limit(struct gfs2_sbd *sdp) { @@ -44,17 +46,17 @@ static inline unsigned int databuf_limit(struct gfs2_sbd *sdp) return limit; } -static inline void lops_init_le(struct gfs2_log_element *le, +static inline void lops_init_le(struct gfs2_bufdata *bd, const struct gfs2_log_operations *lops) { - INIT_LIST_HEAD(&le->le_list); - le->le_ops = lops; + INIT_LIST_HEAD(&bd->bd_list); + bd->bd_ops = lops; } -static inline void lops_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le) +static inline void lops_add(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd) { - if (le->le_ops->lo_add) - le->le_ops->lo_add(sdp, le); + if (bd->bd_ops->lo_add) + bd->bd_ops->lo_add(sdp, bd); } static inline void lops_before_commit(struct gfs2_sbd *sdp) diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c index 754426b1e52c..6cdb0f2a1b09 100644 --- a/fs/gfs2/main.c +++ b/fs/gfs2/main.c @@ -70,16 +70,6 @@ static void gfs2_init_gl_aspace_once(void *foo) address_space_init_once(mapping); } -static void *gfs2_bh_alloc(gfp_t mask, void *data) -{ - return alloc_buffer_head(mask); -} - -static void gfs2_bh_free(void *ptr, void *data) -{ - return free_buffer_head(ptr); -} - /** * init_gfs2_fs - Register GFS2 as a filesystem * @@ -143,6 +133,12 @@ static int __init init_gfs2_fs(void) if (!gfs2_quotad_cachep) goto fail; + gfs2_rsrv_cachep = kmem_cache_create("gfs2_mblk", + sizeof(struct gfs2_blkreserv), + 0, 0, NULL); + if (!gfs2_rsrv_cachep) + goto fail; + register_shrinker(&qd_shrinker); error = register_filesystem(&gfs2_fs_type); @@ -164,8 +160,8 @@ static int __init init_gfs2_fs(void) if (!gfs2_control_wq) goto fail_recovery; - gfs2_bh_pool = mempool_create(1024, gfs2_bh_alloc, gfs2_bh_free, NULL); - if (!gfs2_bh_pool) + gfs2_page_pool = mempool_create_page_pool(64, 0); + if (!gfs2_page_pool) goto fail_control; gfs2_register_debugfs(); @@ -186,6 +182,9 @@ fail: unregister_shrinker(&qd_shrinker); gfs2_glock_exit(); + if (gfs2_rsrv_cachep) + kmem_cache_destroy(gfs2_rsrv_cachep); + if (gfs2_quotad_cachep) kmem_cache_destroy(gfs2_quotad_cachep); @@ -225,7 +224,8 @@ static void __exit exit_gfs2_fs(void) rcu_barrier(); - mempool_destroy(gfs2_bh_pool); + mempool_destroy(gfs2_page_pool); + kmem_cache_destroy(gfs2_rsrv_cachep); kmem_cache_destroy(gfs2_quotad_cachep); kmem_cache_destroy(gfs2_rgrpd_cachep); kmem_cache_destroy(gfs2_bufdata_cachep); diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c index 181586e673f9..6c1e5d1c404a 100644 --- a/fs/gfs2/meta_io.c +++ b/fs/gfs2/meta_io.c @@ -293,11 +293,10 @@ void gfs2_attach_bufdata(struct gfs2_glock *gl, struct buffer_head *bh, bd->bd_bh = bh; bd->bd_gl = gl; - INIT_LIST_HEAD(&bd->bd_list_tr); if (meta) - lops_init_le(&bd->bd_le, &gfs2_buf_lops); + lops_init_le(bd, &gfs2_buf_lops); else - lops_init_le(&bd->bd_le, &gfs2_databuf_lops); + lops_init_le(bd, &gfs2_databuf_lops); bh->b_private = bd; if (meta) @@ -313,7 +312,7 @@ void gfs2_remove_from_journal(struct buffer_head *bh, struct gfs2_trans *tr, int if (test_clear_buffer_pinned(bh)) { trace_gfs2_pin(bd, 0); atomic_dec(&sdp->sd_log_pinned); - list_del_init(&bd->bd_le.le_list); + list_del_init(&bd->bd_list); if (meta) { gfs2_assert_warn(sdp, sdp->sd_log_num_buf); sdp->sd_log_num_buf--; @@ -375,33 +374,24 @@ void gfs2_meta_wipe(struct gfs2_inode *ip, u64 bstart, u32 blen) * @ip: The GFS2 inode * @height: The level of this buf in the metadata (indir addr) tree (if any) * @num: The block number (device relative) of the buffer - * @new: Non-zero if we may create a new buffer * @bhp: the buffer is returned here * * Returns: errno */ int gfs2_meta_indirect_buffer(struct gfs2_inode *ip, int height, u64 num, - int new, struct buffer_head **bhp) + struct buffer_head **bhp) { struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); struct gfs2_glock *gl = ip->i_gl; struct buffer_head *bh; int ret = 0; + u32 mtype = height ? GFS2_METATYPE_IN : GFS2_METATYPE_DI; - if (new) { - BUG_ON(height == 0); - bh = gfs2_meta_new(gl, num); - gfs2_trans_add_bh(ip->i_gl, bh, 1); - gfs2_metatype_set(bh, GFS2_METATYPE_IN, GFS2_FORMAT_IN); - gfs2_buffer_clear_tail(bh, sizeof(struct gfs2_meta_header)); - } else { - u32 mtype = height ? GFS2_METATYPE_IN : GFS2_METATYPE_DI; - ret = gfs2_meta_read(gl, num, DIO_WAIT, &bh); - if (ret == 0 && gfs2_metatype_check(sdp, bh, mtype)) { - brelse(bh); - ret = -EIO; - } + ret = gfs2_meta_read(gl, num, DIO_WAIT, &bh); + if (ret == 0 && gfs2_metatype_check(sdp, bh, mtype)) { + brelse(bh); + ret = -EIO; } *bhp = bh; return ret; diff --git a/fs/gfs2/meta_io.h b/fs/gfs2/meta_io.h index 22c526593131..c30973b07a7c 100644 --- a/fs/gfs2/meta_io.h +++ b/fs/gfs2/meta_io.h @@ -65,12 +65,12 @@ void gfs2_remove_from_journal(struct buffer_head *bh, struct gfs2_trans *tr, void gfs2_meta_wipe(struct gfs2_inode *ip, u64 bstart, u32 blen); int gfs2_meta_indirect_buffer(struct gfs2_inode *ip, int height, u64 num, - int new, struct buffer_head **bhp); + struct buffer_head **bhp); static inline int gfs2_meta_inode_buffer(struct gfs2_inode *ip, struct buffer_head **bhp) { - return gfs2_meta_indirect_buffer(ip, 0, ip->i_no_addr, 0, bhp); + return gfs2_meta_indirect_buffer(ip, 0, ip->i_no_addr, bhp); } struct buffer_head *gfs2_meta_ra(struct gfs2_glock *gl, u64 dblock, u32 extlen); diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index 6f3a18f9e176..c5871ae40561 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c @@ -99,7 +99,6 @@ static struct gfs2_sbd *init_sbd(struct super_block *sb) atomic_set(&sdp->sd_log_pinned, 0); INIT_LIST_HEAD(&sdp->sd_log_le_buf); INIT_LIST_HEAD(&sdp->sd_log_le_revoke); - INIT_LIST_HEAD(&sdp->sd_log_le_rg); INIT_LIST_HEAD(&sdp->sd_log_le_databuf); INIT_LIST_HEAD(&sdp->sd_log_le_ordered); diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c index 6019da3dcaed..b97178e7d397 100644 --- a/fs/gfs2/quota.c +++ b/fs/gfs2/quota.c @@ -652,7 +652,7 @@ static int gfs2_adjust_quota(struct gfs2_inode *ip, loff_t loc, } memset(&q, 0, sizeof(struct gfs2_quota)); - err = gfs2_internal_read(ip, NULL, (char *)&q, &loc, sizeof(q)); + err = gfs2_internal_read(ip, (char *)&q, &loc, sizeof(q)); if (err < 0) return err; @@ -744,7 +744,7 @@ get_a_page: i_size_write(inode, size); inode->i_mtime = inode->i_atime = CURRENT_TIME; mark_inode_dirty(inode); - return err; + return 0; unlock_out: unlock_page(page); @@ -852,7 +852,7 @@ static int update_qd(struct gfs2_sbd *sdp, struct gfs2_quota_data *qd) memset(&q, 0, sizeof(struct gfs2_quota)); pos = qd2offset(qd); - error = gfs2_internal_read(ip, NULL, (char *)&q, &pos, sizeof(q)); + error = gfs2_internal_read(ip, (char *)&q, &pos, sizeof(q)); if (error < 0) return error; diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index 3df65c9ab73b..f74fb9bd1973 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -70,15 +70,15 @@ static u32 rgblk_search(struct gfs2_rgrpd *rgd, u32 goal, /** * gfs2_setbit - Set a bit in the bitmaps - * @buffer: the buffer that holds the bitmaps - * @buflen: the length (in bytes) of the buffer + * @rgd: the resource group descriptor + * @buf2: the clone buffer that holds the bitmaps + * @bi: the bitmap structure * @block: the block to set * @new_state: the new state of the block * */ -static inline void gfs2_setbit(struct gfs2_rgrpd *rgd, unsigned char *buf1, - unsigned char *buf2, unsigned int offset, +static inline void gfs2_setbit(struct gfs2_rgrpd *rgd, unsigned char *buf2, struct gfs2_bitmap *bi, u32 block, unsigned char new_state) { @@ -86,8 +86,8 @@ static inline void gfs2_setbit(struct gfs2_rgrpd *rgd, unsigned char *buf1, unsigned int buflen = bi->bi_len; const unsigned int bit = (block % GFS2_NBBY) * GFS2_BIT_SIZE; - byte1 = buf1 + offset + (block / GFS2_NBBY); - end = buf1 + offset + buflen; + byte1 = bi->bi_bh->b_data + bi->bi_offset + (block / GFS2_NBBY); + end = bi->bi_bh->b_data + bi->bi_offset + buflen; BUG_ON(byte1 >= end); @@ -110,7 +110,7 @@ static inline void gfs2_setbit(struct gfs2_rgrpd *rgd, unsigned char *buf1, *byte1 ^= (cur_state ^ new_state) << bit; if (buf2) { - byte2 = buf2 + offset + (block / GFS2_NBBY); + byte2 = buf2 + bi->bi_offset + (block / GFS2_NBBY); cur_state = (*byte2 >> bit) & GFS2_BIT_MASK; *byte2 ^= (cur_state ^ new_state) << bit; } @@ -118,6 +118,7 @@ static inline void gfs2_setbit(struct gfs2_rgrpd *rgd, unsigned char *buf1, /** * gfs2_testbit - test a bit in the bitmaps + * @rgd: the resource group descriptor * @buffer: the buffer that holds the bitmaps * @buflen: the length (in bytes) of the buffer * @block: the block to read @@ -179,7 +180,7 @@ static inline u64 gfs2_bit_search(const __le64 *ptr, u64 mask, u8 state) /** * gfs2_bitfit - Search an rgrp's bitmap buffer to find a bit-pair representing * a block in a given allocation state. - * @buffer: the buffer that holds the bitmaps + * @buf: the buffer that holds the bitmaps * @len: the length (in bytes) of the buffer * @goal: start search at this block's bit-pair (within @buffer) * @state: GFS2_BLKST_XXX the state of the block we're looking for. @@ -231,6 +232,7 @@ static u32 gfs2_bitfit(const u8 *buf, const unsigned int len, /** * gfs2_bitcount - count the number of bits in a certain state + * @rgd: the resource group descriptor * @buffer: the buffer that holds the bitmaps * @buflen: the length (in bytes) of the buffer * @state: the state of the block we're looking for @@ -264,7 +266,6 @@ static u32 gfs2_bitcount(struct gfs2_rgrpd *rgd, const u8 *buffer, /** * gfs2_rgrp_verify - Verify that a resource group is consistent - * @sdp: the filesystem * @rgd: the rgrp * */ @@ -322,7 +323,8 @@ static inline int rgrp_contains_block(struct gfs2_rgrpd *rgd, u64 block) /** * gfs2_blk2rgrpd - Find resource group for a given data/meta block number * @sdp: The GFS2 superblock - * @n: The data block number + * @blk: The data block number + * @exact: True if this needs to be an exact match * * Returns: The resource group, or NULL if not found */ @@ -380,7 +382,7 @@ struct gfs2_rgrpd *gfs2_rgrpd_get_first(struct gfs2_sbd *sdp) /** * gfs2_rgrpd_get_next - get the next RG - * @rgd: A RG + * @rgd: the resource group descriptor * * Returns: The next rgrp */ @@ -529,6 +531,7 @@ static int compute_bitstructs(struct gfs2_rgrpd *rgd) /** * gfs2_ri_total - Total up the file system space, according to the rindex. + * @sdp: the filesystem * */ u64 gfs2_ri_total(struct gfs2_sbd *sdp) @@ -537,16 +540,14 @@ u64 gfs2_ri_total(struct gfs2_sbd *sdp) struct inode *inode = sdp->sd_rindex; struct gfs2_inode *ip = GFS2_I(inode); char buf[sizeof(struct gfs2_rindex)]; - struct file_ra_state ra_state; int error, rgrps; - file_ra_state_init(&ra_state, inode->i_mapping); for (rgrps = 0;; rgrps++) { loff_t pos = rgrps * sizeof(struct gfs2_rindex); if (pos + sizeof(struct gfs2_rindex) > i_size_read(inode)) break; - error = gfs2_internal_read(ip, &ra_state, buf, &pos, + error = gfs2_internal_read(ip, buf, &pos, sizeof(struct gfs2_rindex)); if (error != sizeof(struct gfs2_rindex)) break; @@ -582,13 +583,12 @@ static int rgd_insert(struct gfs2_rgrpd *rgd) /** * read_rindex_entry - Pull in a new resource index entry from the disk - * @gl: The glock covering the rindex inode + * @ip: Pointer to the rindex inode * * Returns: 0 on success, > 0 on EOF, error code otherwise */ -static int read_rindex_entry(struct gfs2_inode *ip, - struct file_ra_state *ra_state) +static int read_rindex_entry(struct gfs2_inode *ip) { struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); loff_t pos = sdp->sd_rgrps * sizeof(struct gfs2_rindex); @@ -599,7 +599,7 @@ static int read_rindex_entry(struct gfs2_inode *ip, if (pos >= i_size_read(&ip->i_inode)) return 1; - error = gfs2_internal_read(ip, ra_state, (char *)&buf, &pos, + error = gfs2_internal_read(ip, (char *)&buf, &pos, sizeof(struct gfs2_rindex)); if (error != sizeof(struct gfs2_rindex)) @@ -655,13 +655,10 @@ fail: static int gfs2_ri_update(struct gfs2_inode *ip) { struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); - struct inode *inode = &ip->i_inode; - struct file_ra_state ra_state; int error; - file_ra_state_init(&ra_state, inode->i_mapping); do { - error = read_rindex_entry(ip, &ra_state); + error = read_rindex_entry(ip); } while (error == 0); if (error < 0) @@ -741,7 +738,7 @@ static void gfs2_rgrp_out(struct gfs2_rgrpd *rgd, void *buf) /** * gfs2_rgrp_go_lock - Read in a RG's header and bitmaps - * @rgd: the struct gfs2_rgrpd describing the RG to read in + * @gh: The glock holder for the resource group * * Read in all of a Resource Group's header and bitmap blocks. * Caller must eventually call gfs2_rgrp_relse() to free the bitmaps. @@ -801,7 +798,7 @@ fail: /** * gfs2_rgrp_go_unlock - Release RG bitmaps read in with gfs2_rgrp_bh_get() - * @rgd: the struct gfs2_rgrpd describing the RG to read in + * @gh: The glock holder for the resource group * */ @@ -1002,11 +999,13 @@ struct gfs2_qadata *gfs2_qadata_get(struct gfs2_inode *ip) * Returns: the struct gfs2_qadata */ -static struct gfs2_blkreserv *gfs2_blkrsv_get(struct gfs2_inode *ip) +static int gfs2_blkrsv_get(struct gfs2_inode *ip) { BUG_ON(ip->i_res != NULL); - ip->i_res = kzalloc(sizeof(struct gfs2_blkreserv), GFP_NOFS); - return ip->i_res; + ip->i_res = kmem_cache_zalloc(gfs2_rsrv_cachep, GFP_NOFS); + if (!ip->i_res) + return -ENOMEM; + return 0; } /** @@ -1038,6 +1037,8 @@ static inline u32 gfs2_bi2rgd_blk(struct gfs2_bitmap *bi, u32 blk) /** * try_rgrp_unlink - Look for any unlinked, allocated, but unused inodes * @rgd: The rgrp + * @last_unlinked: block address of the last dinode we unlinked + * @skip: block address we should explicitly not unlink * * Returns: 0 if no error * The inode, if one has been found, in inode. @@ -1102,7 +1103,7 @@ static void try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 *last_unlinked, u64 skip /** * get_local_rgrp - Choose and lock a rgrp for allocation * @ip: the inode to reserve space for - * @rgp: the chosen and locked rgrp + * @last_unlinked: the last unlinked block * * Try to acquire rgrp in way which avoids contending with others. * @@ -1164,13 +1165,14 @@ static int get_local_rgrp(struct gfs2_inode *ip, u64 *last_unlinked) static void gfs2_blkrsv_put(struct gfs2_inode *ip) { BUG_ON(ip->i_res == NULL); - kfree(ip->i_res); + kmem_cache_free(gfs2_rsrv_cachep, ip->i_res); ip->i_res = NULL; } /** * gfs2_inplace_reserve - Reserve space in the filesystem * @ip: the inode to reserve space for + * @requested: the number of blocks to be reserved * * Returns: errno */ @@ -1179,14 +1181,15 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, u32 requested) { struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); struct gfs2_blkreserv *rs; - int error = 0; + int error; u64 last_unlinked = NO_BLOCK; int tries = 0; - rs = gfs2_blkrsv_get(ip); - if (!rs) - return -ENOMEM; + error = gfs2_blkrsv_get(ip); + if (error) + return error; + rs = ip->i_res; rs->rs_requested = requested; if (gfs2_assert_warn(sdp, requested)) { error = -EINVAL; @@ -1268,7 +1271,6 @@ static unsigned char gfs2_get_block_type(struct gfs2_rgrpd *rgd, u64 block) * @rgd: the resource group descriptor * @goal: the goal block within the RG (start here to search for avail block) * @state: GFS2_BLKST_XXX the before-allocation state to find - * @dinode: TRUE if the first block we allocate is for a dinode * @rbi: address of the pointer to the bitmap containing the block found * * Walk rgrp's bitmap to find bits that represent a block in @state. @@ -1282,13 +1284,12 @@ static unsigned char gfs2_get_block_type(struct gfs2_rgrpd *rgd, u64 block) * Returns: the block number found relative to the bitmap rbi */ -static u32 rgblk_search(struct gfs2_rgrpd *rgd, u32 goal, - unsigned char state, +static u32 rgblk_search(struct gfs2_rgrpd *rgd, u32 goal, unsigned char state, struct gfs2_bitmap **rbi) { struct gfs2_bitmap *bi = NULL; const u32 length = rgd->rd_length; - u32 blk = BFITNOENT; + u32 biblk = BFITNOENT; unsigned int buf, x; const u8 *buffer = NULL; @@ -1325,8 +1326,8 @@ do_search: if (state != GFS2_BLKST_UNLINKED && bi->bi_clone) buffer = bi->bi_clone + bi->bi_offset; - blk = gfs2_bitfit(buffer, bi->bi_len, goal, state); - if (blk != BFITNOENT) + biblk = gfs2_bitfit(buffer, bi->bi_len, goal, state); + if (biblk != BFITNOENT) break; if ((goal == 0) && (state == GFS2_BLKST_FREE)) @@ -1339,10 +1340,10 @@ skip: goal = 0; } - if (blk != BFITNOENT) + if (biblk != BFITNOENT) *rbi = bi; - return blk; + return biblk; } /** @@ -1367,8 +1368,8 @@ static u64 gfs2_alloc_extent(struct gfs2_rgrpd *rgd, struct gfs2_bitmap *bi, *n = 0; buffer = bi->bi_bh->b_data + bi->bi_offset; gfs2_trans_add_bh(rgd->rd_gl, bi->bi_bh, 1); - gfs2_setbit(rgd, bi->bi_bh->b_data, bi->bi_clone, bi->bi_offset, - bi, blk, dinode ? GFS2_BLKST_DINODE : GFS2_BLKST_USED); + gfs2_setbit(rgd, bi->bi_clone, bi, blk, + dinode ? GFS2_BLKST_DINODE : GFS2_BLKST_USED); (*n)++; goal = blk; while (*n < elen) { @@ -1378,8 +1379,7 @@ static u64 gfs2_alloc_extent(struct gfs2_rgrpd *rgd, struct gfs2_bitmap *bi, if (gfs2_testbit(rgd, buffer, bi->bi_len, goal) != GFS2_BLKST_FREE) break; - gfs2_setbit(rgd, bi->bi_bh->b_data, bi->bi_clone, bi->bi_offset, - bi, goal, GFS2_BLKST_USED); + gfs2_setbit(rgd, bi->bi_clone, bi, goal, GFS2_BLKST_USED); (*n)++; } blk = gfs2_bi2rgd_blk(bi, blk); @@ -1436,8 +1436,7 @@ static struct gfs2_rgrpd *rgblk_free(struct gfs2_sbd *sdp, u64 bstart, bi->bi_len); } gfs2_trans_add_bh(rgd->rd_gl, bi->bi_bh, 1); - gfs2_setbit(rgd, bi->bi_bh->b_data, NULL, bi->bi_offset, - bi, buf_blk, new_state); + gfs2_setbit(rgd, NULL, bi, buf_blk, new_state); } return rgd; @@ -1557,7 +1556,7 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks, ip->i_inode.i_gid); rgd->rd_free_clone -= *nblocks; - trace_gfs2_block_alloc(ip, block, *nblocks, + trace_gfs2_block_alloc(ip, rgd, block, *nblocks, dinode ? GFS2_BLKST_DINODE : GFS2_BLKST_USED); *bn = block; return 0; @@ -1584,7 +1583,7 @@ void __gfs2_free_blocks(struct gfs2_inode *ip, u64 bstart, u32 blen, int meta) rgd = rgblk_free(sdp, bstart, blen, GFS2_BLKST_FREE); if (!rgd) return; - trace_gfs2_block_alloc(ip, bstart, blen, GFS2_BLKST_FREE); + trace_gfs2_block_alloc(ip, rgd, bstart, blen, GFS2_BLKST_FREE); rgd->rd_free += blen; rgd->rd_flags &= ~GFS2_RGF_TRIMMED; gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1); @@ -1622,7 +1621,7 @@ void gfs2_unlink_di(struct inode *inode) rgd = rgblk_free(sdp, blkno, 1, GFS2_BLKST_UNLINKED); if (!rgd) return; - trace_gfs2_block_alloc(ip, blkno, 1, GFS2_BLKST_UNLINKED); + trace_gfs2_block_alloc(ip, rgd, blkno, 1, GFS2_BLKST_UNLINKED); gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1); gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data); } @@ -1652,7 +1651,7 @@ static void gfs2_free_uninit_di(struct gfs2_rgrpd *rgd, u64 blkno) void gfs2_free_di(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip) { gfs2_free_uninit_di(rgd, ip->i_no_addr); - trace_gfs2_block_alloc(ip, ip->i_no_addr, 1, GFS2_BLKST_FREE); + trace_gfs2_block_alloc(ip, rgd, ip->i_no_addr, 1, GFS2_BLKST_FREE); gfs2_quota_change(ip, -1, ip->i_inode.i_uid, ip->i_inode.i_gid); gfs2_meta_wipe(ip, ip->i_no_addr, 1); } @@ -1752,7 +1751,6 @@ void gfs2_rlist_add(struct gfs2_inode *ip, struct gfs2_rgrp_list *rlist, * and initialize an array of glock holders for them * @rlist: the list of resource groups * @state: the lock state to acquire the RG lock in - * @flags: the modifier flags for the holder structures * * FIXME: Don't use NOFAIL * diff --git a/fs/gfs2/trace_gfs2.h b/fs/gfs2/trace_gfs2.h index dfa89cd75534..1b8b81588199 100644 --- a/fs/gfs2/trace_gfs2.h +++ b/fs/gfs2/trace_gfs2.h @@ -457,10 +457,10 @@ TRACE_EVENT(gfs2_bmap, /* Keep track of blocks as they are allocated/freed */ TRACE_EVENT(gfs2_block_alloc, - TP_PROTO(const struct gfs2_inode *ip, u64 block, unsigned len, - u8 block_state), + TP_PROTO(const struct gfs2_inode *ip, struct gfs2_rgrpd *rgd, + u64 block, unsigned len, u8 block_state), - TP_ARGS(ip, block, len, block_state), + TP_ARGS(ip, rgd, block, len, block_state), TP_STRUCT__entry( __field( dev_t, dev ) @@ -468,6 +468,8 @@ TRACE_EVENT(gfs2_block_alloc, __field( u64, inum ) __field( u32, len ) __field( u8, block_state ) + __field( u64, rd_addr ) + __field( u32, rd_free_clone ) ), TP_fast_assign( @@ -476,14 +478,18 @@ TRACE_EVENT(gfs2_block_alloc, __entry->inum = ip->i_no_addr; __entry->len = len; __entry->block_state = block_state; + __entry->rd_addr = rgd->rd_addr; + __entry->rd_free_clone = rgd->rd_free_clone; ), - TP_printk("%u,%u bmap %llu alloc %llu/%lu %s", + TP_printk("%u,%u bmap %llu alloc %llu/%lu %s rg:%llu rf:%u", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->inum, (unsigned long long)__entry->start, (unsigned long)__entry->len, - block_state_name(__entry->block_state)) + block_state_name(__entry->block_state), + (unsigned long long)__entry->rd_addr, + __entry->rd_free_clone) ); #endif /* _TRACE_GFS2_H */ diff --git a/fs/gfs2/trans.c b/fs/gfs2/trans.c index 86ac75d99d31..ad3e2fb763d7 100644 --- a/fs/gfs2/trans.c +++ b/fs/gfs2/trans.c @@ -50,8 +50,6 @@ int gfs2_trans_begin(struct gfs2_sbd *sdp, unsigned int blocks, if (revokes) tr->tr_reserved += gfs2_struct2blk(sdp, revokes, sizeof(u64)); - INIT_LIST_HEAD(&tr->tr_list_buf); - gfs2_holder_init(sdp->sd_trans_gl, LM_ST_SHARED, 0, &tr->tr_t_gh); error = gfs2_glock_nq(&tr->tr_t_gh); @@ -93,10 +91,21 @@ static void gfs2_log_release(struct gfs2_sbd *sdp, unsigned int blks) up_read(&sdp->sd_log_flush_lock); } +static void gfs2_print_trans(const struct gfs2_trans *tr) +{ + print_symbol(KERN_WARNING "GFS2: Transaction created at: %s\n", tr->tr_ip); + printk(KERN_WARNING "GFS2: blocks=%u revokes=%u reserved=%u touched=%d\n", + tr->tr_blocks, tr->tr_revokes, tr->tr_reserved, tr->tr_touched); + printk(KERN_WARNING "GFS2: Buf %u/%u Databuf %u/%u Revoke %u/%u\n", + tr->tr_num_buf_new, tr->tr_num_buf_rm, + tr->tr_num_databuf_new, tr->tr_num_databuf_rm, + tr->tr_num_revoke, tr->tr_num_revoke_rm); +} + void gfs2_trans_end(struct gfs2_sbd *sdp) { struct gfs2_trans *tr = current->journal_info; - + s64 nbuf; BUG_ON(!tr); current->journal_info = NULL; @@ -110,16 +119,13 @@ void gfs2_trans_end(struct gfs2_sbd *sdp) return; } - if (gfs2_assert_withdraw(sdp, tr->tr_num_buf <= tr->tr_blocks)) { - fs_err(sdp, "tr_num_buf = %u, tr_blocks = %u ", - tr->tr_num_buf, tr->tr_blocks); - print_symbol(KERN_WARNING "GFS2: Transaction created at: %s\n", tr->tr_ip); - } - if (gfs2_assert_withdraw(sdp, tr->tr_num_revoke <= tr->tr_revokes)) { - fs_err(sdp, "tr_num_revoke = %u, tr_revokes = %u ", - tr->tr_num_revoke, tr->tr_revokes); - print_symbol(KERN_WARNING "GFS2: Transaction created at: %s\n", tr->tr_ip); - } + nbuf = tr->tr_num_buf_new + tr->tr_num_databuf_new; + nbuf -= tr->tr_num_buf_rm; + nbuf -= tr->tr_num_databuf_rm; + + if (gfs2_assert_withdraw(sdp, (nbuf <= tr->tr_blocks) && + (tr->tr_num_revoke <= tr->tr_revokes))) + gfs2_print_trans(tr); gfs2_log_commit(sdp, tr); if (tr->tr_t_gh.gh_gl) { @@ -152,16 +158,16 @@ void gfs2_trans_add_bh(struct gfs2_glock *gl, struct buffer_head *bh, int meta) gfs2_attach_bufdata(gl, bh, meta); bd = bh->b_private; } - lops_add(sdp, &bd->bd_le); + lops_add(sdp, bd); } void gfs2_trans_add_revoke(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd) { - BUG_ON(!list_empty(&bd->bd_le.le_list)); + BUG_ON(!list_empty(&bd->bd_list)); BUG_ON(!list_empty(&bd->bd_ail_st_list)); BUG_ON(!list_empty(&bd->bd_ail_gl_list)); - lops_init_le(&bd->bd_le, &gfs2_revoke_lops); - lops_add(sdp, &bd->bd_le); + lops_init_le(bd, &gfs2_revoke_lops); + lops_add(sdp, bd); } void gfs2_trans_add_unrevoke(struct gfs2_sbd *sdp, u64 blkno, unsigned int len) @@ -171,9 +177,9 @@ void gfs2_trans_add_unrevoke(struct gfs2_sbd *sdp, u64 blkno, unsigned int len) unsigned int n = len; gfs2_log_lock(sdp); - list_for_each_entry_safe(bd, tmp, &sdp->sd_log_le_revoke, bd_le.le_list) { + list_for_each_entry_safe(bd, tmp, &sdp->sd_log_le_revoke, bd_list) { if ((bd->bd_blkno >= blkno) && (bd->bd_blkno < (blkno + len))) { - list_del_init(&bd->bd_le.le_list); + list_del_init(&bd->bd_list); gfs2_assert_withdraw(sdp, sdp->sd_log_num_revoke); sdp->sd_log_num_revoke--; kmem_cache_free(gfs2_bufdata_cachep, bd); diff --git a/fs/gfs2/util.c b/fs/gfs2/util.c index 9e7765e8e7b0..f00d7c5744f6 100644 --- a/fs/gfs2/util.c +++ b/fs/gfs2/util.c @@ -25,7 +25,8 @@ struct kmem_cache *gfs2_inode_cachep __read_mostly; struct kmem_cache *gfs2_bufdata_cachep __read_mostly; struct kmem_cache *gfs2_rgrpd_cachep __read_mostly; struct kmem_cache *gfs2_quotad_cachep __read_mostly; -mempool_t *gfs2_bh_pool __read_mostly; +struct kmem_cache *gfs2_rsrv_cachep __read_mostly; +mempool_t *gfs2_page_pool __read_mostly; void gfs2_assert_i(struct gfs2_sbd *sdp) { diff --git a/fs/gfs2/util.h b/fs/gfs2/util.h index a4ce76c67dbb..3586b0dd6aa7 100644 --- a/fs/gfs2/util.h +++ b/fs/gfs2/util.h @@ -152,7 +152,8 @@ extern struct kmem_cache *gfs2_inode_cachep; extern struct kmem_cache *gfs2_bufdata_cachep; extern struct kmem_cache *gfs2_rgrpd_cachep; extern struct kmem_cache *gfs2_quotad_cachep; -extern mempool_t *gfs2_bh_pool; +extern struct kmem_cache *gfs2_rsrv_cachep; +extern mempool_t *gfs2_page_pool; static inline unsigned int gfs2_tune_get_i(struct gfs2_tune *gt, unsigned int *p) |