From 87e99511ea54510ffb60b98001d108794d5037f8 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 11 Aug 2010 17:05:45 +0200 Subject: kill BH_Ordered flag Instead of abusing a buffer_head flag just add a variant of sync_dirty_buffer which allows passing the exact type of write flag required. Signed-off-by: Christoph Hellwig Signed-off-by: Al Viro --- include/linux/buffer_head.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index 43e649a72529..72c1cf83eb85 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -32,7 +32,6 @@ enum bh_state_bits { BH_Delay, /* Buffer is not yet allocated on disk */ BH_Boundary, /* Block is followed by a discontiguity */ BH_Write_EIO, /* I/O error on write */ - BH_Ordered, /* ordered write */ BH_Eopnotsupp, /* operation not supported (barrier) */ BH_Unwritten, /* Buffer is allocated on disk but not written */ BH_Quiet, /* Buffer Error Prinks to be quiet */ @@ -125,7 +124,6 @@ BUFFER_FNS(Async_Write, async_write) BUFFER_FNS(Delay, delay) BUFFER_FNS(Boundary, boundary) BUFFER_FNS(Write_EIO, write_io_error) -BUFFER_FNS(Ordered, ordered) BUFFER_FNS(Eopnotsupp, eopnotsupp) BUFFER_FNS(Unwritten, unwritten) @@ -183,6 +181,7 @@ void unlock_buffer(struct buffer_head *bh); void __lock_buffer(struct buffer_head *bh); void ll_rw_block(int, int, struct buffer_head * bh[]); int sync_dirty_buffer(struct buffer_head *bh); +int __sync_dirty_buffer(struct buffer_head *bh, int rw); int submit_bh(int, struct buffer_head *); void write_boundary_block(struct block_device *bdev, sector_t bblock, unsigned blocksize); -- cgit v1.2.3-58-ga151 From 9cb569d601e0b93e01c20a22872270ec663b75f6 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 11 Aug 2010 17:06:24 +0200 Subject: remove SWRITE* I/O types These flags aren't real I/O types, but tell ll_rw_block to always lock the buffer instead of giving up on a failed trylock. Instead add a new write_dirty_buffer helper that implements this semantic and use it from the existing SWRITE* callers. Note that the ll_rw_block code had a bug where it didn't promote WRITE_SYNC_PLUG properly, which this patch fixes. In the ufs code clean up the helper that used to call ll_rw_block to mirror sync_dirty_buffer, which is the function it implements for compound buffers. Signed-off-by: Christoph Hellwig Signed-off-by: Al Viro --- fs/buffer.c | 52 +++++++++++++++++++++++++-------------------- fs/fat/misc.c | 4 +++- fs/jbd/checkpoint.c | 4 +++- fs/jbd/journal.c | 2 +- fs/jbd/revoke.c | 2 +- fs/jbd2/checkpoint.c | 4 +++- fs/jbd2/journal.c | 2 +- fs/jbd2/revoke.c | 2 +- fs/reiserfs/journal.c | 2 +- fs/ufs/balloc.c | 24 +++++++-------------- fs/ufs/ialloc.c | 18 ++++++---------- fs/ufs/truncate.c | 18 ++++++---------- fs/ufs/util.c | 20 +++++++---------- fs/ufs/util.h | 3 +-- include/linux/buffer_head.h | 1 + include/linux/fs.h | 9 -------- 16 files changed, 73 insertions(+), 94 deletions(-) (limited to 'include') diff --git a/fs/buffer.c b/fs/buffer.c index 6c8ad977f3d4..3e7dca279d1c 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -770,11 +770,12 @@ static int fsync_buffers_list(spinlock_t *lock, struct list_head *list) spin_unlock(lock); /* * Ensure any pending I/O completes so that - * ll_rw_block() actually writes the current - * contents - it is a noop if I/O is still in - * flight on potentially older contents. + * write_dirty_buffer() actually writes the + * current contents - it is a noop if I/O is + * still in flight on potentially older + * contents. */ - ll_rw_block(SWRITE_SYNC_PLUG, 1, &bh); + write_dirty_buffer(bh, WRITE_SYNC_PLUG); /* * Kick off IO for the previous mapping. Note @@ -2949,22 +2950,21 @@ EXPORT_SYMBOL(submit_bh); /** * ll_rw_block: low-level access to block devices (DEPRECATED) - * @rw: whether to %READ or %WRITE or %SWRITE or maybe %READA (readahead) + * @rw: whether to %READ or %WRITE or maybe %READA (readahead) * @nr: number of &struct buffer_heads in the array * @bhs: array of pointers to &struct buffer_head * * ll_rw_block() takes an array of pointers to &struct buffer_heads, and * requests an I/O operation on them, either a %READ or a %WRITE. The third - * %SWRITE is like %WRITE only we make sure that the *current* data in buffers - * are sent to disk. The fourth %READA option is described in the documentation - * for generic_make_request() which ll_rw_block() calls. + * %READA option is described in the documentation for generic_make_request() + * which ll_rw_block() calls. * * This function drops any buffer that it cannot get a lock on (with the - * BH_Lock state bit) unless SWRITE is required, any buffer that appears to be - * clean when doing a write request, and any buffer that appears to be - * up-to-date when doing read request. Further it marks as clean buffers that - * are processed for writing (the buffer cache won't assume that they are - * actually clean until the buffer gets unlocked). + * BH_Lock state bit), any buffer that appears to be clean when doing a write + * request, and any buffer that appears to be up-to-date when doing read + * request. Further it marks as clean buffers that are processed for + * writing (the buffer cache won't assume that they are actually clean + * until the buffer gets unlocked). * * ll_rw_block sets b_end_io to simple completion handler that marks * the buffer up-to-date (if approriate), unlocks the buffer and wakes @@ -2980,20 +2980,13 @@ void ll_rw_block(int rw, int nr, struct buffer_head *bhs[]) for (i = 0; i < nr; i++) { struct buffer_head *bh = bhs[i]; - if (rw == SWRITE || rw == SWRITE_SYNC || rw == SWRITE_SYNC_PLUG) - lock_buffer(bh); - else if (!trylock_buffer(bh)) + if (!trylock_buffer(bh)) continue; - - if (rw == WRITE || rw == SWRITE || rw == SWRITE_SYNC || - rw == SWRITE_SYNC_PLUG) { + if (rw == WRITE) { if (test_clear_buffer_dirty(bh)) { bh->b_end_io = end_buffer_write_sync; get_bh(bh); - if (rw == SWRITE_SYNC) - submit_bh(WRITE_SYNC, bh); - else - submit_bh(WRITE, bh); + submit_bh(WRITE, bh); continue; } } else { @@ -3009,6 +3002,19 @@ void ll_rw_block(int rw, int nr, struct buffer_head *bhs[]) } EXPORT_SYMBOL(ll_rw_block); +void write_dirty_buffer(struct buffer_head *bh, int rw) +{ + lock_buffer(bh); + if (!test_clear_buffer_dirty(bh)) { + unlock_buffer(bh); + return; + } + bh->b_end_io = end_buffer_write_sync; + get_bh(bh); + submit_bh(rw, bh); +} +EXPORT_SYMBOL(write_dirty_buffer); + /* * For a data-integrity writeout, we need to wait upon any in-progress I/O * and then start new I/O and then wait upon it. The caller must have a ref on diff --git a/fs/fat/misc.c b/fs/fat/misc.c index 1fa23f6ffba5..1736f2356388 100644 --- a/fs/fat/misc.c +++ b/fs/fat/misc.c @@ -250,7 +250,9 @@ int fat_sync_bhs(struct buffer_head **bhs, int nr_bhs) { int i, err = 0; - ll_rw_block(SWRITE, nr_bhs, bhs); + for (i = 0; i < nr_bhs; i++) + write_dirty_buffer(bhs[i], WRITE); + for (i = 0; i < nr_bhs; i++) { wait_on_buffer(bhs[i]); if (buffer_eopnotsupp(bhs[i])) { diff --git a/fs/jbd/checkpoint.c b/fs/jbd/checkpoint.c index b0435dd0654d..05a38b9c4c0e 100644 --- a/fs/jbd/checkpoint.c +++ b/fs/jbd/checkpoint.c @@ -254,7 +254,9 @@ __flush_batch(journal_t *journal, struct buffer_head **bhs, int *batch_count) { int i; - ll_rw_block(SWRITE, *batch_count, bhs); + for (i = 0; i < *batch_count; i++) + write_dirty_buffer(bhs[i], WRITE); + for (i = 0; i < *batch_count; i++) { struct buffer_head *bh = bhs[i]; clear_buffer_jwrite(bh); diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c index f19ce94693d8..2c4b1f109da9 100644 --- a/fs/jbd/journal.c +++ b/fs/jbd/journal.c @@ -1024,7 +1024,7 @@ void journal_update_superblock(journal_t *journal, int wait) if (wait) sync_dirty_buffer(bh); else - ll_rw_block(SWRITE, 1, &bh); + write_dirty_buffer(bh, WRITE); out: /* If we have just flushed the log (by marking s_start==0), then diff --git a/fs/jbd/revoke.c b/fs/jbd/revoke.c index ad717328343a..d29018307e2e 100644 --- a/fs/jbd/revoke.c +++ b/fs/jbd/revoke.c @@ -617,7 +617,7 @@ static void flush_descriptor(journal_t *journal, set_buffer_jwrite(bh); BUFFER_TRACE(bh, "write"); set_buffer_dirty(bh); - ll_rw_block((write_op == WRITE) ? SWRITE : SWRITE_SYNC_PLUG, 1, &bh); + write_dirty_buffer(bh, write_op); } #endif diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c index 1c23a0f4e8a3..5247e7ffdcb4 100644 --- a/fs/jbd2/checkpoint.c +++ b/fs/jbd2/checkpoint.c @@ -255,7 +255,9 @@ __flush_batch(journal_t *journal, int *batch_count) { int i; - ll_rw_block(SWRITE, *batch_count, journal->j_chkpt_bhs); + for (i = 0; i < *batch_count; i++) + write_dirty_buffer(journal->j_chkpt_bhs[i], WRITE); + for (i = 0; i < *batch_count; i++) { struct buffer_head *bh = journal->j_chkpt_bhs[i]; clear_buffer_jwrite(bh); diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index ad5866aaf0f9..0e8014ea6b94 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -1124,7 +1124,7 @@ void jbd2_journal_update_superblock(journal_t *journal, int wait) set_buffer_uptodate(bh); } } else - ll_rw_block(SWRITE, 1, &bh); + write_dirty_buffer(bh, WRITE); out: /* If we have just flushed the log (by marking s_start==0), then diff --git a/fs/jbd2/revoke.c b/fs/jbd2/revoke.c index a360b06af2e3..9ad321fd63fd 100644 --- a/fs/jbd2/revoke.c +++ b/fs/jbd2/revoke.c @@ -625,7 +625,7 @@ static void flush_descriptor(journal_t *journal, set_buffer_jwrite(bh); BUFFER_TRACE(bh, "write"); set_buffer_dirty(bh); - ll_rw_block((write_op == WRITE) ? SWRITE : SWRITE_SYNC_PLUG, 1, &bh); + write_dirty_buffer(bh, write_op); } #endif diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c index 1ec952b1f036..812e2c05aa29 100644 --- a/fs/reiserfs/journal.c +++ b/fs/reiserfs/journal.c @@ -2311,7 +2311,7 @@ static int journal_read_transaction(struct super_block *sb, /* flush out the real blocks */ for (i = 0; i < get_desc_trans_len(desc); i++) { set_buffer_dirty(real_blocks[i]); - ll_rw_block(SWRITE, 1, real_blocks + i); + write_dirty_buffer(real_blocks[i], WRITE); } for (i = 0; i < get_desc_trans_len(desc); i++) { wait_on_buffer(real_blocks[i]); diff --git a/fs/ufs/balloc.c b/fs/ufs/balloc.c index 048484fb10d2..46f7a807bbc1 100644 --- a/fs/ufs/balloc.c +++ b/fs/ufs/balloc.c @@ -114,10 +114,8 @@ void ufs_free_fragments(struct inode *inode, u64 fragment, unsigned count) ubh_mark_buffer_dirty (USPI_UBH(uspi)); ubh_mark_buffer_dirty (UCPI_UBH(ucpi)); - if (sb->s_flags & MS_SYNCHRONOUS) { - ubh_ll_rw_block(SWRITE, UCPI_UBH(ucpi)); - ubh_wait_on_buffer (UCPI_UBH(ucpi)); - } + if (sb->s_flags & MS_SYNCHRONOUS) + ubh_sync_block(UCPI_UBH(ucpi)); sb->s_dirt = 1; unlock_super (sb); @@ -207,10 +205,8 @@ do_more: ubh_mark_buffer_dirty (USPI_UBH(uspi)); ubh_mark_buffer_dirty (UCPI_UBH(ucpi)); - if (sb->s_flags & MS_SYNCHRONOUS) { - ubh_ll_rw_block(SWRITE, UCPI_UBH(ucpi)); - ubh_wait_on_buffer (UCPI_UBH(ucpi)); - } + if (sb->s_flags & MS_SYNCHRONOUS) + ubh_sync_block(UCPI_UBH(ucpi)); if (overflow) { fragment += count; @@ -558,10 +554,8 @@ static u64 ufs_add_fragments(struct inode *inode, u64 fragment, ubh_mark_buffer_dirty (USPI_UBH(uspi)); ubh_mark_buffer_dirty (UCPI_UBH(ucpi)); - if (sb->s_flags & MS_SYNCHRONOUS) { - ubh_ll_rw_block(SWRITE, UCPI_UBH(ucpi)); - ubh_wait_on_buffer (UCPI_UBH(ucpi)); - } + if (sb->s_flags & MS_SYNCHRONOUS) + ubh_sync_block(UCPI_UBH(ucpi)); sb->s_dirt = 1; UFSD("EXIT, fragment %llu\n", (unsigned long long)fragment); @@ -680,10 +674,8 @@ cg_found: succed: ubh_mark_buffer_dirty (USPI_UBH(uspi)); ubh_mark_buffer_dirty (UCPI_UBH(ucpi)); - if (sb->s_flags & MS_SYNCHRONOUS) { - ubh_ll_rw_block(SWRITE, UCPI_UBH(ucpi)); - ubh_wait_on_buffer (UCPI_UBH(ucpi)); - } + if (sb->s_flags & MS_SYNCHRONOUS) + ubh_sync_block(UCPI_UBH(ucpi)); sb->s_dirt = 1; result += cgno * uspi->s_fpg; diff --git a/fs/ufs/ialloc.c b/fs/ufs/ialloc.c index 428017e018fe..2eabf04af3de 100644 --- a/fs/ufs/ialloc.c +++ b/fs/ufs/ialloc.c @@ -113,10 +113,8 @@ void ufs_free_inode (struct inode * inode) ubh_mark_buffer_dirty (USPI_UBH(uspi)); ubh_mark_buffer_dirty (UCPI_UBH(ucpi)); - if (sb->s_flags & MS_SYNCHRONOUS) { - ubh_ll_rw_block(SWRITE, UCPI_UBH(ucpi)); - ubh_wait_on_buffer (UCPI_UBH(ucpi)); - } + if (sb->s_flags & MS_SYNCHRONOUS) + ubh_sync_block(UCPI_UBH(ucpi)); sb->s_dirt = 1; unlock_super (sb); @@ -156,10 +154,8 @@ static void ufs2_init_inodes_chunk(struct super_block *sb, fs32_add(sb, &ucg->cg_u.cg_u2.cg_initediblk, uspi->s_inopb); ubh_mark_buffer_dirty(UCPI_UBH(ucpi)); - if (sb->s_flags & MS_SYNCHRONOUS) { - ubh_ll_rw_block(SWRITE, UCPI_UBH(ucpi)); - ubh_wait_on_buffer(UCPI_UBH(ucpi)); - } + if (sb->s_flags & MS_SYNCHRONOUS) + ubh_sync_block(UCPI_UBH(ucpi)); UFSD("EXIT\n"); } @@ -290,10 +286,8 @@ cg_found: } ubh_mark_buffer_dirty (USPI_UBH(uspi)); ubh_mark_buffer_dirty (UCPI_UBH(ucpi)); - if (sb->s_flags & MS_SYNCHRONOUS) { - ubh_ll_rw_block(SWRITE, UCPI_UBH(ucpi)); - ubh_wait_on_buffer (UCPI_UBH(ucpi)); - } + if (sb->s_flags & MS_SYNCHRONOUS) + ubh_sync_block(UCPI_UBH(ucpi)); sb->s_dirt = 1; inode->i_ino = cg * uspi->s_ipg + bit; diff --git a/fs/ufs/truncate.c b/fs/ufs/truncate.c index 34d5cb135320..a58f9155fc9a 100644 --- a/fs/ufs/truncate.c +++ b/fs/ufs/truncate.c @@ -243,10 +243,8 @@ static int ufs_trunc_indirect(struct inode *inode, u64 offset, void *p) ubh_bforget(ind_ubh); ind_ubh = NULL; } - if (IS_SYNC(inode) && ind_ubh && ubh_buffer_dirty(ind_ubh)) { - ubh_ll_rw_block(SWRITE, ind_ubh); - ubh_wait_on_buffer (ind_ubh); - } + if (IS_SYNC(inode) && ind_ubh && ubh_buffer_dirty(ind_ubh)) + ubh_sync_block(ind_ubh); ubh_brelse (ind_ubh); UFSD("EXIT: ino %lu\n", inode->i_ino); @@ -307,10 +305,8 @@ static int ufs_trunc_dindirect(struct inode *inode, u64 offset, void *p) ubh_bforget(dind_bh); dind_bh = NULL; } - if (IS_SYNC(inode) && dind_bh && ubh_buffer_dirty(dind_bh)) { - ubh_ll_rw_block(SWRITE, dind_bh); - ubh_wait_on_buffer (dind_bh); - } + if (IS_SYNC(inode) && dind_bh && ubh_buffer_dirty(dind_bh)) + ubh_sync_block(dind_bh); ubh_brelse (dind_bh); UFSD("EXIT: ino %lu\n", inode->i_ino); @@ -367,10 +363,8 @@ static int ufs_trunc_tindirect(struct inode *inode) ubh_bforget(tind_bh); tind_bh = NULL; } - if (IS_SYNC(inode) && tind_bh && ubh_buffer_dirty(tind_bh)) { - ubh_ll_rw_block(SWRITE, tind_bh); - ubh_wait_on_buffer (tind_bh); - } + if (IS_SYNC(inode) && tind_bh && ubh_buffer_dirty(tind_bh)) + ubh_sync_block(tind_bh); ubh_brelse (tind_bh); UFSD("EXIT: ino %lu\n", inode->i_ino); diff --git a/fs/ufs/util.c b/fs/ufs/util.c index 85a7fc9e4a4e..d2c36d53fe66 100644 --- a/fs/ufs/util.c +++ b/fs/ufs/util.c @@ -113,21 +113,17 @@ void ubh_mark_buffer_uptodate (struct ufs_buffer_head * ubh, int flag) } } -void ubh_ll_rw_block(int rw, struct ufs_buffer_head *ubh) +void ubh_sync_block(struct ufs_buffer_head *ubh) { - if (!ubh) - return; + if (ubh) { + unsigned i; - ll_rw_block(rw, ubh->count, ubh->bh); -} + for (i = 0; i < ubh->count; i++) + write_dirty_buffer(ubh->bh[i], WRITE); -void ubh_wait_on_buffer (struct ufs_buffer_head * ubh) -{ - unsigned i; - if (!ubh) - return; - for ( i = 0; i < ubh->count; i++ ) - wait_on_buffer (ubh->bh[i]); + for (i = 0; i < ubh->count; i++) + wait_on_buffer(ubh->bh[i]); + } } void ubh_bforget (struct ufs_buffer_head * ubh) diff --git a/fs/ufs/util.h b/fs/ufs/util.h index 0466036912f1..9f8775ce381c 100644 --- a/fs/ufs/util.h +++ b/fs/ufs/util.h @@ -269,8 +269,7 @@ extern void ubh_brelse (struct ufs_buffer_head *); extern void ubh_brelse_uspi (struct ufs_sb_private_info *); extern void ubh_mark_buffer_dirty (struct ufs_buffer_head *); extern void ubh_mark_buffer_uptodate (struct ufs_buffer_head *, int); -extern void ubh_ll_rw_block(int, struct ufs_buffer_head *); -extern void ubh_wait_on_buffer (struct ufs_buffer_head *); +extern void ubh_sync_block(struct ufs_buffer_head *); extern void ubh_bforget (struct ufs_buffer_head *); extern int ubh_buffer_dirty (struct ufs_buffer_head *); #define ubh_ubhcpymem(mem,ubh,size) _ubh_ubhcpymem_(uspi,mem,ubh,size) diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index 72c1cf83eb85..ec94c12f21da 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -182,6 +182,7 @@ void __lock_buffer(struct buffer_head *bh); void ll_rw_block(int, int, struct buffer_head * bh[]); int sync_dirty_buffer(struct buffer_head *bh); int __sync_dirty_buffer(struct buffer_head *bh, int rw); +void write_dirty_buffer(struct buffer_head *bh, int rw); int submit_bh(int, struct buffer_head *); void write_boundary_block(struct block_device *bdev, sector_t bblock, unsigned blocksize); diff --git a/include/linux/fs.h b/include/linux/fs.h index 9a96b4d83fc1..29f7c975304c 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -125,9 +125,6 @@ struct inodes_stat_t { * block layer could (in theory) choose to ignore this * request if it runs into resource problems. * WRITE A normal async write. Device will be plugged. - * SWRITE Like WRITE, but a special case for ll_rw_block() that - * tells it to lock the buffer first. Normally a buffer - * must be locked before doing IO. * WRITE_SYNC_PLUG Synchronous write. Identical to WRITE, but passes down * the hint that someone will be waiting on this IO * shortly. The device must still be unplugged explicitly, @@ -138,9 +135,6 @@ struct inodes_stat_t { * immediately after submission. The write equivalent * of READ_SYNC. * WRITE_ODIRECT_PLUG Special case write for O_DIRECT only. - * SWRITE_SYNC - * SWRITE_SYNC_PLUG Like WRITE_SYNC/WRITE_SYNC_PLUG, but locks the buffer. - * See SWRITE. * WRITE_BARRIER Like WRITE_SYNC, but tells the block layer that all * previously submitted writes must be safely on storage * before this one is started. Also guarantees that when @@ -155,7 +149,6 @@ struct inodes_stat_t { #define READ 0 #define WRITE RW_MASK #define READA RWA_MASK -#define SWRITE (WRITE | READA) #define READ_SYNC (READ | REQ_SYNC | REQ_UNPLUG) #define READ_META (READ | REQ_META) @@ -165,8 +158,6 @@ struct inodes_stat_t { #define WRITE_META (WRITE | REQ_META) #define WRITE_BARRIER (WRITE | REQ_SYNC | REQ_NOIDLE | REQ_UNPLUG | \ REQ_HARDBARRIER) -#define SWRITE_SYNC_PLUG (SWRITE | REQ_SYNC | REQ_NOIDLE) -#define SWRITE_SYNC (SWRITE | REQ_SYNC | REQ_NOIDLE | REQ_UNPLUG) /* * These aren't really reads or writes, they pass down information about -- cgit v1.2.3-58-ga151 From 2a4419b5b2a77f3f4537c14f7ad7df95770655dd Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Wed, 18 Aug 2010 04:37:33 +1000 Subject: fs: fs_struct rwlock to spinlock fs: fs_struct rwlock to spinlock struct fs_struct.lock is an rwlock with the read-side used to protect root and pwd members while taking references to them. Taking a reference to a path typically requires just 2 atomic ops, so the critical section is very small. Parallel read-side operations would have cacheline contention on the lock, the dentry, and the vfsmount cachelines, so the rwlock is unlikely to ever give a real parallelism increase. Replace it with a spinlock to avoid one or two atomic operations in typical path lookup fastpath. Signed-off-by: Nick Piggin Signed-off-by: Al Viro --- drivers/staging/pohmelfs/path_entry.c | 8 ++++---- fs/exec.c | 4 ++-- fs/fs_struct.c | 32 ++++++++++++++++---------------- include/linux/fs_struct.h | 14 +++++++------- kernel/fork.c | 10 +++++----- 5 files changed, 34 insertions(+), 34 deletions(-) (limited to 'include') diff --git a/drivers/staging/pohmelfs/path_entry.c b/drivers/staging/pohmelfs/path_entry.c index cdc4dd50d638..8ec83d2dffb7 100644 --- a/drivers/staging/pohmelfs/path_entry.c +++ b/drivers/staging/pohmelfs/path_entry.c @@ -44,9 +44,9 @@ int pohmelfs_construct_path_string(struct pohmelfs_inode *pi, void *data, int le return -ENOENT; } - read_lock(¤t->fs->lock); + spin_lock(¤t->fs->lock); path.mnt = mntget(current->fs->root.mnt); - read_unlock(¤t->fs->lock); + spin_unlock(¤t->fs->lock); path.dentry = d; @@ -91,9 +91,9 @@ int pohmelfs_path_length(struct pohmelfs_inode *pi) return -ENOENT; } - read_lock(¤t->fs->lock); + spin_lock(¤t->fs->lock); root = dget(current->fs->root.dentry); - read_unlock(¤t->fs->lock); + spin_unlock(¤t->fs->lock); spin_lock(&dcache_lock); diff --git a/fs/exec.c b/fs/exec.c index 7761837e4500..5adab2c93eca 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1117,7 +1117,7 @@ int check_unsafe_exec(struct linux_binprm *bprm) bprm->unsafe = tracehook_unsafe_exec(p); n_fs = 1; - write_lock(&p->fs->lock); + spin_lock(&p->fs->lock); rcu_read_lock(); for (t = next_thread(p); t != p; t = next_thread(t)) { if (t->fs == p->fs) @@ -1134,7 +1134,7 @@ int check_unsafe_exec(struct linux_binprm *bprm) res = 1; } } - write_unlock(&p->fs->lock); + spin_unlock(&p->fs->lock); return res; } diff --git a/fs/fs_struct.c b/fs/fs_struct.c index 1ee40eb9a2c0..ed45a9cf5f3d 100644 --- a/fs/fs_struct.c +++ b/fs/fs_struct.c @@ -13,11 +13,11 @@ void set_fs_root(struct fs_struct *fs, struct path *path) { struct path old_root; - write_lock(&fs->lock); + spin_lock(&fs->lock); old_root = fs->root; fs->root = *path; path_get(path); - write_unlock(&fs->lock); + spin_unlock(&fs->lock); if (old_root.dentry) path_put(&old_root); } @@ -30,11 +30,11 @@ void set_fs_pwd(struct fs_struct *fs, struct path *path) { struct path old_pwd; - write_lock(&fs->lock); + spin_lock(&fs->lock); old_pwd = fs->pwd; fs->pwd = *path; path_get(path); - write_unlock(&fs->lock); + spin_unlock(&fs->lock); if (old_pwd.dentry) path_put(&old_pwd); @@ -51,7 +51,7 @@ void chroot_fs_refs(struct path *old_root, struct path *new_root) task_lock(p); fs = p->fs; if (fs) { - write_lock(&fs->lock); + spin_lock(&fs->lock); if (fs->root.dentry == old_root->dentry && fs->root.mnt == old_root->mnt) { path_get(new_root); @@ -64,7 +64,7 @@ void chroot_fs_refs(struct path *old_root, struct path *new_root) fs->pwd = *new_root; count++; } - write_unlock(&fs->lock); + spin_unlock(&fs->lock); } task_unlock(p); } while_each_thread(g, p); @@ -87,10 +87,10 @@ void exit_fs(struct task_struct *tsk) if (fs) { int kill; task_lock(tsk); - write_lock(&fs->lock); + spin_lock(&fs->lock); tsk->fs = NULL; kill = !--fs->users; - write_unlock(&fs->lock); + spin_unlock(&fs->lock); task_unlock(tsk); if (kill) free_fs_struct(fs); @@ -104,7 +104,7 @@ struct fs_struct *copy_fs_struct(struct fs_struct *old) if (fs) { fs->users = 1; fs->in_exec = 0; - rwlock_init(&fs->lock); + spin_lock_init(&fs->lock); fs->umask = old->umask; get_fs_root_and_pwd(old, &fs->root, &fs->pwd); } @@ -121,10 +121,10 @@ int unshare_fs_struct(void) return -ENOMEM; task_lock(current); - write_lock(&fs->lock); + spin_lock(&fs->lock); kill = !--fs->users; current->fs = new_fs; - write_unlock(&fs->lock); + spin_unlock(&fs->lock); task_unlock(current); if (kill) @@ -143,7 +143,7 @@ EXPORT_SYMBOL(current_umask); /* to be mentioned only in INIT_TASK */ struct fs_struct init_fs = { .users = 1, - .lock = __RW_LOCK_UNLOCKED(init_fs.lock), + .lock = __SPIN_LOCK_UNLOCKED(init_fs.lock), .umask = 0022, }; @@ -156,14 +156,14 @@ void daemonize_fs_struct(void) task_lock(current); - write_lock(&init_fs.lock); + spin_lock(&init_fs.lock); init_fs.users++; - write_unlock(&init_fs.lock); + spin_unlock(&init_fs.lock); - write_lock(&fs->lock); + spin_lock(&fs->lock); current->fs = &init_fs; kill = !--fs->users; - write_unlock(&fs->lock); + spin_unlock(&fs->lock); task_unlock(current); if (kill) diff --git a/include/linux/fs_struct.h b/include/linux/fs_struct.h index eca3d5202138..a42b5bf02f8b 100644 --- a/include/linux/fs_struct.h +++ b/include/linux/fs_struct.h @@ -5,7 +5,7 @@ struct fs_struct { int users; - rwlock_t lock; + spinlock_t lock; int umask; int in_exec; struct path root, pwd; @@ -23,29 +23,29 @@ extern int unshare_fs_struct(void); static inline void get_fs_root(struct fs_struct *fs, struct path *root) { - read_lock(&fs->lock); + spin_lock(&fs->lock); *root = fs->root; path_get(root); - read_unlock(&fs->lock); + spin_unlock(&fs->lock); } static inline void get_fs_pwd(struct fs_struct *fs, struct path *pwd) { - read_lock(&fs->lock); + spin_lock(&fs->lock); *pwd = fs->pwd; path_get(pwd); - read_unlock(&fs->lock); + spin_unlock(&fs->lock); } static inline void get_fs_root_and_pwd(struct fs_struct *fs, struct path *root, struct path *pwd) { - read_lock(&fs->lock); + spin_lock(&fs->lock); *root = fs->root; path_get(root); *pwd = fs->pwd; path_get(pwd); - read_unlock(&fs->lock); + spin_unlock(&fs->lock); } #endif /* _LINUX_FS_STRUCT_H */ diff --git a/kernel/fork.c b/kernel/fork.c index 98b450876f93..856eac3ec52e 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -752,13 +752,13 @@ static int copy_fs(unsigned long clone_flags, struct task_struct *tsk) struct fs_struct *fs = current->fs; if (clone_flags & CLONE_FS) { /* tsk->fs is already what we want */ - write_lock(&fs->lock); + spin_lock(&fs->lock); if (fs->in_exec) { - write_unlock(&fs->lock); + spin_unlock(&fs->lock); return -EAGAIN; } fs->users++; - write_unlock(&fs->lock); + spin_unlock(&fs->lock); return 0; } tsk->fs = copy_fs_struct(fs); @@ -1676,13 +1676,13 @@ SYSCALL_DEFINE1(unshare, unsigned long, unshare_flags) if (new_fs) { fs = current->fs; - write_lock(&fs->lock); + spin_lock(&fs->lock); current->fs = new_fs; if (--fs->users) new_fs = NULL; else new_fs = fs; - write_unlock(&fs->lock); + spin_unlock(&fs->lock); } if (new_mm) { -- cgit v1.2.3-58-ga151 From ee2ffa0dfdd2db19705f2ba1c6a4c0bfe8122dd8 Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Wed, 18 Aug 2010 04:37:35 +1000 Subject: fs: cleanup files_lock locking fs: cleanup files_lock locking Lock tty_files with a new spinlock, tty_files_lock; provide helpers to manipulate the per-sb files list; unexport the files_lock spinlock. Cc: linux-kernel@vger.kernel.org Cc: Christoph Hellwig Cc: Alan Cox Acked-by: Andi Kleen Acked-by: Greg Kroah-Hartman Signed-off-by: Nick Piggin Signed-off-by: Al Viro --- drivers/char/pty.c | 6 +++++- drivers/char/tty_io.c | 26 ++++++++++++++++++-------- fs/file_table.c | 42 ++++++++++++++++++------------------------ fs/open.c | 4 ++-- include/linux/fs.h | 7 ++----- include/linux/tty.h | 1 + security/selinux/hooks.c | 4 ++-- 7 files changed, 48 insertions(+), 42 deletions(-) (limited to 'include') diff --git a/drivers/char/pty.c b/drivers/char/pty.c index ad46eae1f9bb..2c64faa8efa4 100644 --- a/drivers/char/pty.c +++ b/drivers/char/pty.c @@ -676,7 +676,11 @@ static int ptmx_open(struct inode *inode, struct file *filp) set_bit(TTY_PTY_LOCK, &tty->flags); /* LOCK THE SLAVE */ filp->private_data = tty; - file_move(filp, &tty->tty_files); + + file_sb_list_del(filp); /* __dentry_open has put it on the sb list */ + spin_lock(&tty_files_lock); + list_add(&filp->f_u.fu_list, &tty->tty_files); + spin_unlock(&tty_files_lock); retval = devpts_pty_new(inode, tty->link); if (retval) diff --git a/drivers/char/tty_io.c b/drivers/char/tty_io.c index 0350c42375a2..cd5b829634ea 100644 --- a/drivers/char/tty_io.c +++ b/drivers/char/tty_io.c @@ -136,6 +136,9 @@ LIST_HEAD(tty_drivers); /* linked list of tty drivers */ DEFINE_MUTEX(tty_mutex); EXPORT_SYMBOL(tty_mutex); +/* Spinlock to protect the tty->tty_files list */ +DEFINE_SPINLOCK(tty_files_lock); + static ssize_t tty_read(struct file *, char __user *, size_t, loff_t *); static ssize_t tty_write(struct file *, const char __user *, size_t, loff_t *); ssize_t redirected_tty_write(struct file *, const char __user *, @@ -235,11 +238,11 @@ static int check_tty_count(struct tty_struct *tty, const char *routine) struct list_head *p; int count = 0; - file_list_lock(); + spin_lock(&tty_files_lock); list_for_each(p, &tty->tty_files) { count++; } - file_list_unlock(); + spin_unlock(&tty_files_lock); if (tty->driver->type == TTY_DRIVER_TYPE_PTY && tty->driver->subtype == PTY_TYPE_SLAVE && tty->link && tty->link->count) @@ -519,7 +522,7 @@ void __tty_hangup(struct tty_struct *tty) workqueue with the lock held */ check_tty_count(tty, "tty_hangup"); - file_list_lock(); + spin_lock(&tty_files_lock); /* This breaks for file handles being sent over AF_UNIX sockets ? */ list_for_each_entry(filp, &tty->tty_files, f_u.fu_list) { if (filp->f_op->write == redirected_tty_write) @@ -530,7 +533,7 @@ void __tty_hangup(struct tty_struct *tty) __tty_fasync(-1, filp, 0); /* can't block */ filp->f_op = &hung_up_tty_fops; } - file_list_unlock(); + spin_unlock(&tty_files_lock); tty_ldisc_hangup(tty); @@ -1424,9 +1427,9 @@ static void release_one_tty(struct work_struct *work) tty_driver_kref_put(driver); module_put(driver->owner); - file_list_lock(); + spin_lock(&tty_files_lock); list_del_init(&tty->tty_files); - file_list_unlock(); + spin_unlock(&tty_files_lock); put_pid(tty->pgrp); put_pid(tty->session); @@ -1671,7 +1674,10 @@ int tty_release(struct inode *inode, struct file *filp) * - do_tty_hangup no longer sees this file descriptor as * something that needs to be handled for hangups. */ - file_kill(filp); + spin_lock(&tty_files_lock); + BUG_ON(list_empty(&filp->f_u.fu_list)); + list_del_init(&filp->f_u.fu_list); + spin_unlock(&tty_files_lock); filp->private_data = NULL; /* @@ -1840,7 +1846,11 @@ got_driver: } filp->private_data = tty; - file_move(filp, &tty->tty_files); + BUG_ON(list_empty(&filp->f_u.fu_list)); + file_sb_list_del(filp); /* __dentry_open has put it on the sb list */ + spin_lock(&tty_files_lock); + list_add(&filp->f_u.fu_list, &tty->tty_files); + spin_unlock(&tty_files_lock); check_tty_count(tty, "tty_open"); if (tty->driver->type == TTY_DRIVER_TYPE_PTY && tty->driver->subtype == PTY_TYPE_MASTER) diff --git a/fs/file_table.c b/fs/file_table.c index edecd36fed9b..6f0e62ecfddd 100644 --- a/fs/file_table.c +++ b/fs/file_table.c @@ -32,8 +32,7 @@ struct files_stat_struct files_stat = { .max_files = NR_FILE }; -/* public. Not pretty! */ -__cacheline_aligned_in_smp DEFINE_SPINLOCK(files_lock); +static __cacheline_aligned_in_smp DEFINE_SPINLOCK(files_lock); /* SLAB cache for file structures */ static struct kmem_cache *filp_cachep __read_mostly; @@ -249,7 +248,7 @@ static void __fput(struct file *file) cdev_put(inode->i_cdev); fops_put(file->f_op); put_pid(file->f_owner.pid); - file_kill(file); + file_sb_list_del(file); if (file->f_mode & FMODE_WRITE) drop_file_write_access(file); file->f_path.dentry = NULL; @@ -328,31 +327,29 @@ struct file *fget_light(unsigned int fd, int *fput_needed) return file; } - void put_filp(struct file *file) { if (atomic_long_dec_and_test(&file->f_count)) { security_file_free(file); - file_kill(file); + file_sb_list_del(file); file_free(file); } } -void file_move(struct file *file, struct list_head *list) +void file_sb_list_add(struct file *file, struct super_block *sb) { - if (!list) - return; - file_list_lock(); - list_move(&file->f_u.fu_list, list); - file_list_unlock(); + spin_lock(&files_lock); + BUG_ON(!list_empty(&file->f_u.fu_list)); + list_add(&file->f_u.fu_list, &sb->s_files); + spin_unlock(&files_lock); } -void file_kill(struct file *file) +void file_sb_list_del(struct file *file) { if (!list_empty(&file->f_u.fu_list)) { - file_list_lock(); + spin_lock(&files_lock); list_del_init(&file->f_u.fu_list); - file_list_unlock(); + spin_unlock(&files_lock); } } @@ -361,7 +358,7 @@ int fs_may_remount_ro(struct super_block *sb) struct file *file; /* Check that no files are currently opened for writing. */ - file_list_lock(); + spin_lock(&files_lock); list_for_each_entry(file, &sb->s_files, f_u.fu_list) { struct inode *inode = file->f_path.dentry->d_inode; @@ -373,10 +370,10 @@ int fs_may_remount_ro(struct super_block *sb) if (S_ISREG(inode->i_mode) && (file->f_mode & FMODE_WRITE)) goto too_bad; } - file_list_unlock(); + spin_unlock(&files_lock); return 1; /* Tis' cool bro. */ too_bad: - file_list_unlock(); + spin_unlock(&files_lock); return 0; } @@ -392,7 +389,7 @@ void mark_files_ro(struct super_block *sb) struct file *f; retry: - file_list_lock(); + spin_lock(&files_lock); list_for_each_entry(f, &sb->s_files, f_u.fu_list) { struct vfsmount *mnt; if (!S_ISREG(f->f_path.dentry->d_inode->i_mode)) @@ -408,16 +405,13 @@ retry: continue; file_release_write(f); mnt = mntget(f->f_path.mnt); - file_list_unlock(); - /* - * This can sleep, so we can't hold - * the file_list_lock() spinlock. - */ + /* This can sleep, so we can't hold the spinlock. */ + spin_unlock(&files_lock); mnt_drop_write(mnt); mntput(mnt); goto retry; } - file_list_unlock(); + spin_unlock(&files_lock); } void __init files_init(unsigned long mempages) diff --git a/fs/open.c b/fs/open.c index 630715f9f73d..d74e1983e8dc 100644 --- a/fs/open.c +++ b/fs/open.c @@ -675,7 +675,7 @@ static struct file *__dentry_open(struct dentry *dentry, struct vfsmount *mnt, f->f_path.mnt = mnt; f->f_pos = 0; f->f_op = fops_get(inode->i_fop); - file_move(f, &inode->i_sb->s_files); + file_sb_list_add(f, inode->i_sb); error = security_dentry_open(f, cred); if (error) @@ -721,7 +721,7 @@ cleanup_all: mnt_drop_write(mnt); } } - file_kill(f); + file_sb_list_del(f); f->f_path.dentry = NULL; f->f_path.mnt = NULL; cleanup_file: diff --git a/include/linux/fs.h b/include/linux/fs.h index 29f7c975304c..5a9a9e5a3705 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -944,9 +944,6 @@ struct file { unsigned long f_mnt_write_state; #endif }; -extern spinlock_t files_lock; -#define file_list_lock() spin_lock(&files_lock); -#define file_list_unlock() spin_unlock(&files_lock); #define get_file(x) atomic_long_inc(&(x)->f_count) #define fput_atomic(x) atomic_long_add_unless(&(x)->f_count, -1, 1) @@ -2188,8 +2185,8 @@ static inline void insert_inode_hash(struct inode *inode) { __insert_inode_hash(inode, inode->i_ino); } -extern void file_move(struct file *f, struct list_head *list); -extern void file_kill(struct file *f); +extern void file_sb_list_add(struct file *f, struct super_block *sb); +extern void file_sb_list_del(struct file *f); #ifdef CONFIG_BLOCK extern void submit_bio(int, struct bio *); extern int bdev_read_only(struct block_device *); diff --git a/include/linux/tty.h b/include/linux/tty.h index 1437da3ddc62..f6b371a2514e 100644 --- a/include/linux/tty.h +++ b/include/linux/tty.h @@ -470,6 +470,7 @@ extern struct tty_struct *tty_pair_get_tty(struct tty_struct *tty); extern struct tty_struct *tty_pair_get_pty(struct tty_struct *tty); extern struct mutex tty_mutex; +extern spinlock_t tty_files_lock; extern void tty_write_unlock(struct tty_struct *tty); extern int tty_write_lock(struct tty_struct *tty, int ndelay); diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 42043f96e54f..bd7da0f0ccf3 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -2170,7 +2170,7 @@ static inline void flush_unauthorized_files(const struct cred *cred, tty = get_current_tty(); if (tty) { - file_list_lock(); + spin_lock(&tty_files_lock); if (!list_empty(&tty->tty_files)) { struct inode *inode; @@ -2186,7 +2186,7 @@ static inline void flush_unauthorized_files(const struct cred *cred, drop_tty = 1; } } - file_list_unlock(); + spin_unlock(&tty_files_lock); tty_kref_put(tty); } /* Reset controlling tty. */ -- cgit v1.2.3-58-ga151 From d996b62a8df1d935b01319bf8defb95b5709f7b8 Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Wed, 18 Aug 2010 04:37:36 +1000 Subject: tty: fix fu_list abuse tty: fix fu_list abuse tty code abuses fu_list, which causes a bug in remount,ro handling. If a tty device node is opened on a filesystem, then the last link to the inode removed, the filesystem will be allowed to be remounted readonly. This is because fs_may_remount_ro does not find the 0 link tty inode on the file sb list (because the tty code incorrectly removed it to use for its own purpose). This can result in a filesystem with errors after it is marked "clean". Taking idea from Christoph's initial patch, allocate a tty private struct at file->private_data and put our required list fields in there, linking file and tty. This makes tty nodes behave the same way as other device nodes and avoid meddling with the vfs, and avoids this bug. The error handling is not trivial in the tty code, so for this bugfix, I take the simple approach of using __GFP_NOFAIL and don't worry about memory errors. This is not a problem because our allocator doesn't fail small allocs as a rule anyway. So proper error handling is left as an exercise for tty hackers. [ Arguably filesystem's device inode would ideally be divorced from the driver's pseudo inode when it is opened, but in practice it's not clear whether that will ever be worth implementing. ] Cc: linux-kernel@vger.kernel.org Cc: Christoph Hellwig Cc: Alan Cox Cc: Greg Kroah-Hartman Signed-off-by: Nick Piggin Signed-off-by: Al Viro --- drivers/char/pty.c | 6 +--- drivers/char/tty_io.c | 84 +++++++++++++++++++++++++++++++----------------- fs/internal.h | 2 ++ include/linux/fs.h | 2 -- include/linux/tty.h | 8 +++++ security/selinux/hooks.c | 5 ++- 6 files changed, 69 insertions(+), 38 deletions(-) (limited to 'include') diff --git a/drivers/char/pty.c b/drivers/char/pty.c index 2c64faa8efa4..c350d01716bd 100644 --- a/drivers/char/pty.c +++ b/drivers/char/pty.c @@ -675,12 +675,8 @@ static int ptmx_open(struct inode *inode, struct file *filp) } set_bit(TTY_PTY_LOCK, &tty->flags); /* LOCK THE SLAVE */ - filp->private_data = tty; - file_sb_list_del(filp); /* __dentry_open has put it on the sb list */ - spin_lock(&tty_files_lock); - list_add(&filp->f_u.fu_list, &tty->tty_files); - spin_unlock(&tty_files_lock); + tty_add_file(tty, filp); retval = devpts_pty_new(inode, tty->link); if (retval) diff --git a/drivers/char/tty_io.c b/drivers/char/tty_io.c index cd5b829634ea..949067a0bd47 100644 --- a/drivers/char/tty_io.c +++ b/drivers/char/tty_io.c @@ -188,6 +188,41 @@ void free_tty_struct(struct tty_struct *tty) kfree(tty); } +static inline struct tty_struct *file_tty(struct file *file) +{ + return ((struct tty_file_private *)file->private_data)->tty; +} + +/* Associate a new file with the tty structure */ +void tty_add_file(struct tty_struct *tty, struct file *file) +{ + struct tty_file_private *priv; + + /* XXX: must implement proper error handling in callers */ + priv = kmalloc(sizeof(*priv), GFP_KERNEL|__GFP_NOFAIL); + + priv->tty = tty; + priv->file = file; + file->private_data = priv; + + spin_lock(&tty_files_lock); + list_add(&priv->list, &tty->tty_files); + spin_unlock(&tty_files_lock); +} + +/* Delete file from its tty */ +void tty_del_file(struct file *file) +{ + struct tty_file_private *priv = file->private_data; + + spin_lock(&tty_files_lock); + list_del(&priv->list); + spin_unlock(&tty_files_lock); + file->private_data = NULL; + kfree(priv); +} + + #define TTY_NUMBER(tty) ((tty)->index + (tty)->driver->name_base) /** @@ -500,6 +535,7 @@ void __tty_hangup(struct tty_struct *tty) struct file *cons_filp = NULL; struct file *filp, *f = NULL; struct task_struct *p; + struct tty_file_private *priv; int closecount = 0, n; unsigned long flags; int refs = 0; @@ -509,7 +545,7 @@ void __tty_hangup(struct tty_struct *tty) spin_lock(&redirect_lock); - if (redirect && redirect->private_data == tty) { + if (redirect && file_tty(redirect) == tty) { f = redirect; redirect = NULL; } @@ -524,7 +560,8 @@ void __tty_hangup(struct tty_struct *tty) spin_lock(&tty_files_lock); /* This breaks for file handles being sent over AF_UNIX sockets ? */ - list_for_each_entry(filp, &tty->tty_files, f_u.fu_list) { + list_for_each_entry(priv, &tty->tty_files, list) { + filp = priv->file; if (filp->f_op->write == redirected_tty_write) cons_filp = filp; if (filp->f_op->write != tty_write) @@ -892,12 +929,10 @@ static ssize_t tty_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) { int i; - struct tty_struct *tty; - struct inode *inode; + struct inode *inode = file->f_path.dentry->d_inode; + struct tty_struct *tty = file_tty(file); struct tty_ldisc *ld; - tty = file->private_data; - inode = file->f_path.dentry->d_inode; if (tty_paranoia_check(tty, inode, "tty_read")) return -EIO; if (!tty || (test_bit(TTY_IO_ERROR, &tty->flags))) @@ -1068,12 +1103,11 @@ void tty_write_message(struct tty_struct *tty, char *msg) static ssize_t tty_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos) { - struct tty_struct *tty; struct inode *inode = file->f_path.dentry->d_inode; + struct tty_struct *tty = file_tty(file); + struct tty_ldisc *ld; ssize_t ret; - struct tty_ldisc *ld; - tty = file->private_data; if (tty_paranoia_check(tty, inode, "tty_write")) return -EIO; if (!tty || !tty->ops->write || @@ -1510,13 +1544,13 @@ static void release_tty(struct tty_struct *tty, int idx) int tty_release(struct inode *inode, struct file *filp) { - struct tty_struct *tty, *o_tty; + struct tty_struct *tty = file_tty(filp); + struct tty_struct *o_tty; int pty_master, tty_closing, o_tty_closing, do_sleep; int devpts; int idx; char buf[64]; - tty = filp->private_data; if (tty_paranoia_check(tty, inode, "tty_release_dev")) return 0; @@ -1674,11 +1708,7 @@ int tty_release(struct inode *inode, struct file *filp) * - do_tty_hangup no longer sees this file descriptor as * something that needs to be handled for hangups. */ - spin_lock(&tty_files_lock); - BUG_ON(list_empty(&filp->f_u.fu_list)); - list_del_init(&filp->f_u.fu_list); - spin_unlock(&tty_files_lock); - filp->private_data = NULL; + tty_del_file(filp); /* * Perform some housekeeping before deciding whether to return. @@ -1845,12 +1875,8 @@ got_driver: return PTR_ERR(tty); } - filp->private_data = tty; - BUG_ON(list_empty(&filp->f_u.fu_list)); - file_sb_list_del(filp); /* __dentry_open has put it on the sb list */ - spin_lock(&tty_files_lock); - list_add(&filp->f_u.fu_list, &tty->tty_files); - spin_unlock(&tty_files_lock); + tty_add_file(tty, filp); + check_tty_count(tty, "tty_open"); if (tty->driver->type == TTY_DRIVER_TYPE_PTY && tty->driver->subtype == PTY_TYPE_MASTER) @@ -1926,11 +1952,10 @@ got_driver: static unsigned int tty_poll(struct file *filp, poll_table *wait) { - struct tty_struct *tty; + struct tty_struct *tty = file_tty(filp); struct tty_ldisc *ld; int ret = 0; - tty = filp->private_data; if (tty_paranoia_check(tty, filp->f_path.dentry->d_inode, "tty_poll")) return 0; @@ -1943,11 +1968,10 @@ static unsigned int tty_poll(struct file *filp, poll_table *wait) static int __tty_fasync(int fd, struct file *filp, int on) { - struct tty_struct *tty; + struct tty_struct *tty = file_tty(filp); unsigned long flags; int retval = 0; - tty = filp->private_data; if (tty_paranoia_check(tty, filp->f_path.dentry->d_inode, "tty_fasync")) goto out; @@ -2501,13 +2525,13 @@ EXPORT_SYMBOL(tty_pair_get_pty); */ long tty_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { - struct tty_struct *tty, *real_tty; + struct tty_struct *tty = file_tty(file); + struct tty_struct *real_tty; void __user *p = (void __user *)arg; int retval; struct tty_ldisc *ld; struct inode *inode = file->f_dentry->d_inode; - tty = file->private_data; if (tty_paranoia_check(tty, inode, "tty_ioctl")) return -EINVAL; @@ -2629,7 +2653,7 @@ static long tty_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { struct inode *inode = file->f_dentry->d_inode; - struct tty_struct *tty = file->private_data; + struct tty_struct *tty = file_tty(file); struct tty_ldisc *ld; int retval = -ENOIOCTLCMD; @@ -2721,7 +2745,7 @@ void __do_SAK(struct tty_struct *tty) if (!filp) continue; if (filp->f_op->read == tty_read && - filp->private_data == tty) { + file_tty(filp) == tty) { printk(KERN_NOTICE "SAK: killed process %d" " (%s): fd#%d opened to the tty\n", task_pid_nr(p), p->comm, i); diff --git a/fs/internal.h b/fs/internal.h index 6b706bc60a66..6a5c13a80660 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -80,6 +80,8 @@ extern void chroot_fs_refs(struct path *, struct path *); /* * file_table.c */ +extern void file_sb_list_add(struct file *f, struct super_block *sb); +extern void file_sb_list_del(struct file *f); extern void mark_files_ro(struct super_block *); extern struct file *get_empty_filp(void); diff --git a/include/linux/fs.h b/include/linux/fs.h index 5a9a9e5a3705..5e65add0f163 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2185,8 +2185,6 @@ static inline void insert_inode_hash(struct inode *inode) { __insert_inode_hash(inode, inode->i_ino); } -extern void file_sb_list_add(struct file *f, struct super_block *sb); -extern void file_sb_list_del(struct file *f); #ifdef CONFIG_BLOCK extern void submit_bio(int, struct bio *); extern int bdev_read_only(struct block_device *); diff --git a/include/linux/tty.h b/include/linux/tty.h index f6b371a2514e..67d64e6efe7a 100644 --- a/include/linux/tty.h +++ b/include/linux/tty.h @@ -329,6 +329,13 @@ struct tty_struct { struct tty_port *port; }; +/* Each of a tty's open files has private_data pointing to tty_file_private */ +struct tty_file_private { + struct tty_struct *tty; + struct file *file; + struct list_head list; +}; + /* tty magic number */ #define TTY_MAGIC 0x5401 @@ -458,6 +465,7 @@ extern void proc_clear_tty(struct task_struct *p); extern struct tty_struct *get_current_tty(void); extern void tty_default_fops(struct file_operations *fops); extern struct tty_struct *alloc_tty_struct(void); +extern void tty_add_file(struct tty_struct *tty, struct file *file); extern void free_tty_struct(struct tty_struct *tty); extern void initialize_tty_struct(struct tty_struct *tty, struct tty_driver *driver, int idx); diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index bd7da0f0ccf3..4796ddd4e721 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -2172,6 +2172,7 @@ static inline void flush_unauthorized_files(const struct cred *cred, if (tty) { spin_lock(&tty_files_lock); if (!list_empty(&tty->tty_files)) { + struct tty_file_private *file_priv; struct inode *inode; /* Revalidate access to controlling tty. @@ -2179,7 +2180,9 @@ static inline void flush_unauthorized_files(const struct cred *cred, than using file_has_perm, as this particular open file may belong to another process and we are only interested in the inode-based check here. */ - file = list_first_entry(&tty->tty_files, struct file, f_u.fu_list); + file_priv = list_first_entry(&tty->tty_files, + struct tty_file_private, list); + file = file_priv->file; inode = file->f_path.dentry->d_inode; if (inode_has_perm(cred, inode, FILE__READ | FILE__WRITE, NULL)) { -- cgit v1.2.3-58-ga151 From 2dc91abe03d8ce6dd7f9251faffafca5f6b9e85d Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Wed, 18 Aug 2010 04:37:37 +1000 Subject: lglock: introduce special lglock and brlock spin locks lglock: introduce special lglock and brlock spin locks This patch introduces "local-global" locks (lglocks). These can be used to: - Provide fast exclusive access to per-CPU data, with exclusive access to another CPU's data allowed but possibly subject to contention, and to provide very slow exclusive access to all per-CPU data. - Or to provide very fast and scalable read serialisation, and to provide very slow exclusive serialisation of data (not necessarily per-CPU data). Brlocks are also implemented as a short-hand notation for the latter use case. Thanks to Paul for local/global naming convention. Cc: linux-kernel@vger.kernel.org Cc: Al Viro Cc: "Paul E. McKenney" Signed-off-by: Nick Piggin Signed-off-by: Al Viro --- include/linux/lglock.h | 172 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 172 insertions(+) create mode 100644 include/linux/lglock.h (limited to 'include') diff --git a/include/linux/lglock.h b/include/linux/lglock.h new file mode 100644 index 000000000000..b288cb713b90 --- /dev/null +++ b/include/linux/lglock.h @@ -0,0 +1,172 @@ +/* + * Specialised local-global spinlock. Can only be declared as global variables + * to avoid overhead and keep things simple (and we don't want to start using + * these inside dynamically allocated structures). + * + * "local/global locks" (lglocks) can be used to: + * + * - Provide fast exclusive access to per-CPU data, with exclusive access to + * another CPU's data allowed but possibly subject to contention, and to + * provide very slow exclusive access to all per-CPU data. + * - Or to provide very fast and scalable read serialisation, and to provide + * very slow exclusive serialisation of data (not necessarily per-CPU data). + * + * Brlocks are also implemented as a short-hand notation for the latter use + * case. + * + * Copyright 2009, 2010, Nick Piggin, Novell Inc. + */ +#ifndef __LINUX_LGLOCK_H +#define __LINUX_LGLOCK_H + +#include +#include +#include + +/* can make br locks by using local lock for read side, global lock for write */ +#define br_lock_init(name) name##_lock_init() +#define br_read_lock(name) name##_local_lock() +#define br_read_unlock(name) name##_local_unlock() +#define br_write_lock(name) name##_global_lock_online() +#define br_write_unlock(name) name##_global_unlock_online() + +#define DECLARE_BRLOCK(name) DECLARE_LGLOCK(name) +#define DEFINE_BRLOCK(name) DEFINE_LGLOCK(name) + + +#define lg_lock_init(name) name##_lock_init() +#define lg_local_lock(name) name##_local_lock() +#define lg_local_unlock(name) name##_local_unlock() +#define lg_local_lock_cpu(name, cpu) name##_local_lock_cpu(cpu) +#define lg_local_unlock_cpu(name, cpu) name##_local_unlock_cpu(cpu) +#define lg_global_lock(name) name##_global_lock() +#define lg_global_unlock(name) name##_global_unlock() +#define lg_global_lock_online(name) name##_global_lock_online() +#define lg_global_unlock_online(name) name##_global_unlock_online() + +#ifdef CONFIG_DEBUG_LOCK_ALLOC +#define LOCKDEP_INIT_MAP lockdep_init_map + +#define DEFINE_LGLOCK_LOCKDEP(name) \ + struct lock_class_key name##_lock_key; \ + struct lockdep_map name##_lock_dep_map; \ + EXPORT_SYMBOL(name##_lock_dep_map) + +#else +#define LOCKDEP_INIT_MAP(a, b, c, d) + +#define DEFINE_LGLOCK_LOCKDEP(name) +#endif + + +#define DECLARE_LGLOCK(name) \ + extern void name##_lock_init(void); \ + extern void name##_local_lock(void); \ + extern void name##_local_unlock(void); \ + extern void name##_local_lock_cpu(int cpu); \ + extern void name##_local_unlock_cpu(int cpu); \ + extern void name##_global_lock(void); \ + extern void name##_global_unlock(void); \ + extern void name##_global_lock_online(void); \ + extern void name##_global_unlock_online(void); \ + +#define DEFINE_LGLOCK(name) \ + \ + DEFINE_PER_CPU(arch_spinlock_t, name##_lock); \ + DEFINE_LGLOCK_LOCKDEP(name); \ + \ + void name##_lock_init(void) { \ + int i; \ + LOCKDEP_INIT_MAP(&name##_lock_dep_map, #name, &name##_lock_key, 0); \ + for_each_possible_cpu(i) { \ + arch_spinlock_t *lock; \ + lock = &per_cpu(name##_lock, i); \ + *lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED; \ + } \ + } \ + EXPORT_SYMBOL(name##_lock_init); \ + \ + void name##_local_lock(void) { \ + arch_spinlock_t *lock; \ + preempt_disable(); \ + rwlock_acquire_read(&name##_lock_dep_map, 0, 0, _THIS_IP_); \ + lock = &__get_cpu_var(name##_lock); \ + arch_spin_lock(lock); \ + } \ + EXPORT_SYMBOL(name##_local_lock); \ + \ + void name##_local_unlock(void) { \ + arch_spinlock_t *lock; \ + rwlock_release(&name##_lock_dep_map, 1, _THIS_IP_); \ + lock = &__get_cpu_var(name##_lock); \ + arch_spin_unlock(lock); \ + preempt_enable(); \ + } \ + EXPORT_SYMBOL(name##_local_unlock); \ + \ + void name##_local_lock_cpu(int cpu) { \ + arch_spinlock_t *lock; \ + preempt_disable(); \ + rwlock_acquire_read(&name##_lock_dep_map, 0, 0, _THIS_IP_); \ + lock = &per_cpu(name##_lock, cpu); \ + arch_spin_lock(lock); \ + } \ + EXPORT_SYMBOL(name##_local_lock_cpu); \ + \ + void name##_local_unlock_cpu(int cpu) { \ + arch_spinlock_t *lock; \ + rwlock_release(&name##_lock_dep_map, 1, _THIS_IP_); \ + lock = &per_cpu(name##_lock, cpu); \ + arch_spin_unlock(lock); \ + preempt_enable(); \ + } \ + EXPORT_SYMBOL(name##_local_unlock_cpu); \ + \ + void name##_global_lock_online(void) { \ + int i; \ + preempt_disable(); \ + rwlock_acquire(&name##_lock_dep_map, 0, 0, _RET_IP_); \ + for_each_online_cpu(i) { \ + arch_spinlock_t *lock; \ + lock = &per_cpu(name##_lock, i); \ + arch_spin_lock(lock); \ + } \ + } \ + EXPORT_SYMBOL(name##_global_lock_online); \ + \ + void name##_global_unlock_online(void) { \ + int i; \ + rwlock_release(&name##_lock_dep_map, 1, _RET_IP_); \ + for_each_online_cpu(i) { \ + arch_spinlock_t *lock; \ + lock = &per_cpu(name##_lock, i); \ + arch_spin_unlock(lock); \ + } \ + preempt_enable(); \ + } \ + EXPORT_SYMBOL(name##_global_unlock_online); \ + \ + void name##_global_lock(void) { \ + int i; \ + preempt_disable(); \ + rwlock_acquire(&name##_lock_dep_map, 0, 0, _RET_IP_); \ + for_each_online_cpu(i) { \ + arch_spinlock_t *lock; \ + lock = &per_cpu(name##_lock, i); \ + arch_spin_lock(lock); \ + } \ + } \ + EXPORT_SYMBOL(name##_global_lock); \ + \ + void name##_global_unlock(void) { \ + int i; \ + rwlock_release(&name##_lock_dep_map, 1, _RET_IP_); \ + for_each_online_cpu(i) { \ + arch_spinlock_t *lock; \ + lock = &per_cpu(name##_lock, i); \ + arch_spin_unlock(lock); \ + } \ + preempt_enable(); \ + } \ + EXPORT_SYMBOL(name##_global_unlock); +#endif -- cgit v1.2.3-58-ga151 From 6416ccb7899960868f5016751fb81bf25213d24f Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Wed, 18 Aug 2010 04:37:38 +1000 Subject: fs: scale files_lock fs: scale files_lock Improve scalability of files_lock by adding per-cpu, per-sb files lists, protected with an lglock. The lglock provides fast access to the per-cpu lists to add and remove files. It also provides a snapshot of all the per-cpu lists (although this is very slow). One difficulty with this approach is that a file can be removed from the list by another CPU. We must track which per-cpu list the file is on with a new variale in the file struct (packed into a hole on 64-bit archs). Scalability could suffer if files are frequently removed from different cpu's list. However loads with frequent removal of files imply short interval between adding and removing the files, and the scheduler attempts to avoid moving processes too far away. Also, even in the case of cross-CPU removal, the hardware has much more opportunity to parallelise cacheline transfers with N cachelines than with 1. A worst-case test of 1 CPU allocating files subsequently being freed by N CPUs degenerates to contending on a single lock, which is no worse than before. When more than one CPU are allocating files, even if they are always freed by different CPUs, there will be more parallelism than the single-lock case. Testing results: On a 2 socket, 8 core opteron, I measure the number of times the lock is taken to remove the file, the number of times it is removed by the same CPU that added it, and the number of times it is removed by the same node that added it. Booting: locks= 25049 cpu-hits= 23174 (92.5%) node-hits= 23945 (95.6%) kbuild -j16 locks=2281913 cpu-hits=2208126 (96.8%) node-hits=2252674 (98.7%) dbench 64 locks=4306582 cpu-hits=4287247 (99.6%) node-hits=4299527 (99.8%) So a file is removed from the same CPU it was added by over 90% of the time. It remains within the same node 95% of the time. Tim Chen ran some numbers for a 64 thread Nehalem system performing a compile. throughput 2.6.34-rc2 24.5 +patch 24.9 us sys idle IO wait (in %) 2.6.34-rc2 51.25 28.25 17.25 3.25 +patch 53.75 18.5 19 8.75 So significantly less CPU time spent in kernel code, higher idle time and slightly higher throughput. Single threaded performance difference was within the noise of microbenchmarks. That is not to say penalty does not exist, the code is larger and more memory accesses required so it will be slightly slower. Cc: linux-kernel@vger.kernel.org Cc: Tim Chen Cc: Andi Kleen Signed-off-by: Nick Piggin Signed-off-by: Al Viro --- fs/file_table.c | 108 ++++++++++++++++++++++++++++++++++++++++++++--------- fs/super.c | 18 +++++++++ include/linux/fs.h | 7 ++++ 3 files changed, 115 insertions(+), 18 deletions(-) (limited to 'include') diff --git a/fs/file_table.c b/fs/file_table.c index 6f0e62ecfddd..a04bdd81c11c 100644 --- a/fs/file_table.c +++ b/fs/file_table.c @@ -20,7 +20,9 @@ #include #include #include +#include #include +#include #include #include @@ -32,7 +34,8 @@ struct files_stat_struct files_stat = { .max_files = NR_FILE }; -static __cacheline_aligned_in_smp DEFINE_SPINLOCK(files_lock); +DECLARE_LGLOCK(files_lglock); +DEFINE_LGLOCK(files_lglock); /* SLAB cache for file structures */ static struct kmem_cache *filp_cachep __read_mostly; @@ -336,30 +339,98 @@ void put_filp(struct file *file) } } +static inline int file_list_cpu(struct file *file) +{ +#ifdef CONFIG_SMP + return file->f_sb_list_cpu; +#else + return smp_processor_id(); +#endif +} + +/* helper for file_sb_list_add to reduce ifdefs */ +static inline void __file_sb_list_add(struct file *file, struct super_block *sb) +{ + struct list_head *list; +#ifdef CONFIG_SMP + int cpu; + cpu = smp_processor_id(); + file->f_sb_list_cpu = cpu; + list = per_cpu_ptr(sb->s_files, cpu); +#else + list = &sb->s_files; +#endif + list_add(&file->f_u.fu_list, list); +} + +/** + * file_sb_list_add - add a file to the sb's file list + * @file: file to add + * @sb: sb to add it to + * + * Use this function to associate a file with the superblock of the inode it + * refers to. + */ void file_sb_list_add(struct file *file, struct super_block *sb) { - spin_lock(&files_lock); - BUG_ON(!list_empty(&file->f_u.fu_list)); - list_add(&file->f_u.fu_list, &sb->s_files); - spin_unlock(&files_lock); + lg_local_lock(files_lglock); + __file_sb_list_add(file, sb); + lg_local_unlock(files_lglock); } +/** + * file_sb_list_del - remove a file from the sb's file list + * @file: file to remove + * @sb: sb to remove it from + * + * Use this function to remove a file from its superblock. + */ void file_sb_list_del(struct file *file) { if (!list_empty(&file->f_u.fu_list)) { - spin_lock(&files_lock); + lg_local_lock_cpu(files_lglock, file_list_cpu(file)); list_del_init(&file->f_u.fu_list); - spin_unlock(&files_lock); + lg_local_unlock_cpu(files_lglock, file_list_cpu(file)); } } +#ifdef CONFIG_SMP + +/* + * These macros iterate all files on all CPUs for a given superblock. + * files_lglock must be held globally. + */ +#define do_file_list_for_each_entry(__sb, __file) \ +{ \ + int i; \ + for_each_possible_cpu(i) { \ + struct list_head *list; \ + list = per_cpu_ptr((__sb)->s_files, i); \ + list_for_each_entry((__file), list, f_u.fu_list) + +#define while_file_list_for_each_entry \ + } \ +} + +#else + +#define do_file_list_for_each_entry(__sb, __file) \ +{ \ + struct list_head *list; \ + list = &(sb)->s_files; \ + list_for_each_entry((__file), list, f_u.fu_list) + +#define while_file_list_for_each_entry \ +} + +#endif + int fs_may_remount_ro(struct super_block *sb) { struct file *file; - /* Check that no files are currently opened for writing. */ - spin_lock(&files_lock); - list_for_each_entry(file, &sb->s_files, f_u.fu_list) { + lg_global_lock(files_lglock); + do_file_list_for_each_entry(sb, file) { struct inode *inode = file->f_path.dentry->d_inode; /* File with pending delete? */ @@ -369,11 +440,11 @@ int fs_may_remount_ro(struct super_block *sb) /* Writeable file? */ if (S_ISREG(inode->i_mode) && (file->f_mode & FMODE_WRITE)) goto too_bad; - } - spin_unlock(&files_lock); + } while_file_list_for_each_entry; + lg_global_unlock(files_lglock); return 1; /* Tis' cool bro. */ too_bad: - spin_unlock(&files_lock); + lg_global_unlock(files_lglock); return 0; } @@ -389,8 +460,8 @@ void mark_files_ro(struct super_block *sb) struct file *f; retry: - spin_lock(&files_lock); - list_for_each_entry(f, &sb->s_files, f_u.fu_list) { + lg_global_lock(files_lglock); + do_file_list_for_each_entry(sb, f) { struct vfsmount *mnt; if (!S_ISREG(f->f_path.dentry->d_inode->i_mode)) continue; @@ -406,12 +477,12 @@ retry: file_release_write(f); mnt = mntget(f->f_path.mnt); /* This can sleep, so we can't hold the spinlock. */ - spin_unlock(&files_lock); + lg_global_unlock(files_lglock); mnt_drop_write(mnt); mntput(mnt); goto retry; - } - spin_unlock(&files_lock); + } while_file_list_for_each_entry; + lg_global_unlock(files_lglock); } void __init files_init(unsigned long mempages) @@ -431,5 +502,6 @@ void __init files_init(unsigned long mempages) if (files_stat.max_files < NR_FILE) files_stat.max_files = NR_FILE; files_defer_init(); + lg_lock_init(files_lglock); percpu_counter_init(&nr_files, 0); } diff --git a/fs/super.c b/fs/super.c index 9674ab2c8718..8819e3a7ff20 100644 --- a/fs/super.c +++ b/fs/super.c @@ -54,7 +54,22 @@ static struct super_block *alloc_super(struct file_system_type *type) s = NULL; goto out; } +#ifdef CONFIG_SMP + s->s_files = alloc_percpu(struct list_head); + if (!s->s_files) { + security_sb_free(s); + kfree(s); + s = NULL; + goto out; + } else { + int i; + + for_each_possible_cpu(i) + INIT_LIST_HEAD(per_cpu_ptr(s->s_files, i)); + } +#else INIT_LIST_HEAD(&s->s_files); +#endif INIT_LIST_HEAD(&s->s_instances); INIT_HLIST_HEAD(&s->s_anon); INIT_LIST_HEAD(&s->s_inodes); @@ -108,6 +123,9 @@ out: */ static inline void destroy_super(struct super_block *s) { +#ifdef CONFIG_SMP + free_percpu(s->s_files); +#endif security_sb_free(s); kfree(s->s_subtype); kfree(s->s_options); diff --git a/include/linux/fs.h b/include/linux/fs.h index 5e65add0f163..76041b614758 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -920,6 +920,9 @@ struct file { #define f_vfsmnt f_path.mnt const struct file_operations *f_op; spinlock_t f_lock; /* f_ep_links, f_flags, no IRQ */ +#ifdef CONFIG_SMP + int f_sb_list_cpu; +#endif atomic_long_t f_count; unsigned int f_flags; fmode_t f_mode; @@ -1334,7 +1337,11 @@ struct super_block { struct list_head s_inodes; /* all inodes */ struct hlist_head s_anon; /* anonymous dentries for (nfs) exporting */ +#ifdef CONFIG_SMP + struct list_head __percpu *s_files; +#else struct list_head s_files; +#endif /* s_dentry_lru and s_nr_dentry_unused are protected by dcache_lock */ struct list_head s_dentry_lru; /* unused dentry lru */ int s_nr_dentry_unused; /* # of dentry on lru */ -- cgit v1.2.3-58-ga151