summaryrefslogtreecommitdiff
path: root/fs/jbd2
diff options
context:
space:
mode:
authorRitesh Harjani <riteshh@linux.ibm.com>2022-02-16 12:30:35 +0530
committerTheodore Ts'o <tytso@mit.edu>2022-02-25 21:28:13 -0500
commitf7f497cb702462e8505ff3d8d4e7722ad95626a1 (patch)
tree5bedaec0e0276f9ff333f61affb25ba7c0aa3433 /fs/jbd2
parentcc16eecae687912238ee6efbff71ad31e2bc414e (diff)
jbd2: kill t_handle_lock transaction spinlock
This patch kills t_handle_lock transaction spinlock completely from jbd2. To explain the reasoning, currently there were three sites at which this spinlock was used. 1. jbd2_journal_wait_updates() a. Based on careful code review it can be seen that, we don't need this lock here. This is since we wait for any currently ongoing updates based on a atomic variable t_updates. And we anyway don't take any t_handle_lock while in stop_this_handle(). i.e. write_lock(&journal->j_state_lock() jbd2_journal_wait_updates() stop_this_handle() while (atomic_read(txn->t_updates) { | DEFINE_WAIT(wait); | prepare_to_wait(); | if (atomic_read(txn->t_updates) if (atomic_dec_and_test(txn->t_updates)) write_unlock(&journal->j_state_lock); schedule(); wake_up() write_lock(&journal->j_state_lock); finish_wait(); } txn->t_state = T_COMMIT write_unlock(&journal->j_state_lock); b. Also note that between atomic_inc(&txn->t_updates) in start_this_handle() and jbd2_journal_wait_updates(), the synchronization happens via read_lock(journal->j_state_lock) in start_this_handle(); 2. jbd2_journal_extend() a. jbd2_journal_extend() is called with the handle of each process from task_struct. So no lock required in updating member fields of handle_t b. For member fields of h_transaction, all updates happens only via atomic APIs (which is also within read_lock()). So, no need of this transaction spinlock. 3. update_t_max_wait() Based on Jan suggestion, this can be carefully removed using atomic cmpxchg API. Note that there can be several processes which are waiting for a new transaction to be allocated and started. For doing this only one process will succeed in taking write_lock() and allocating a new txn. After that all of the process will be updating the t_max_wait (max transaction wait time). This can be done via below method w/o taking any locks using atomic cmpxchg. For more details refer [1] new = get_new_val(); old = READ_ONCE(ptr->max_val); while (old < new) old = cmpxchg(&ptr->max_val, old, new); [1]: https://lwn.net/Articles/849237/ Suggested-by: Jan Kara <jack@suse.cz> Signed-off-by: Ritesh Harjani <riteshh@linux.ibm.com> Reviewed-by: Jan Kara <jack@suse.cz> Link: https://lore.kernel.org/r/d89e599658b4a1f3893a48c6feded200073037fc.1644992076.git.riteshh@linux.ibm.com Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Diffstat (limited to 'fs/jbd2')
-rw-r--r--fs/jbd2/transaction.c28
1 files changed, 9 insertions, 19 deletions
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index 259e00046a8b..83801a8be078 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -107,7 +107,6 @@ static void jbd2_get_transaction(journal_t *journal,
transaction->t_start_time = ktime_get();
transaction->t_tid = journal->j_transaction_sequence++;
transaction->t_expires = jiffies + journal->j_commit_interval;
- spin_lock_init(&transaction->t_handle_lock);
atomic_set(&transaction->t_updates, 0);
atomic_set(&transaction->t_outstanding_credits,
jbd2_descriptor_blocks_per_trans(journal) +
@@ -139,24 +138,21 @@ static void jbd2_get_transaction(journal_t *journal,
/*
* Update transaction's maximum wait time, if debugging is enabled.
*
- * In order for t_max_wait to be reliable, it must be protected by a
- * lock. But doing so will mean that start_this_handle() can not be
- * run in parallel on SMP systems, which limits our scalability. So
- * unless debugging is enabled, we no longer update t_max_wait, which
- * means that maximum wait time reported by the jbd2_run_stats
- * tracepoint will always be zero.
+ * t_max_wait is carefully updated here with use of atomic compare exchange.
+ * Note that there could be multiplre threads trying to do this simultaneously
+ * hence using cmpxchg to avoid any use of locks in this case.
*/
static inline void update_t_max_wait(transaction_t *transaction,
unsigned long ts)
{
#ifdef CONFIG_JBD2_DEBUG
+ unsigned long oldts, newts;
if (jbd2_journal_enable_debug &&
time_after(transaction->t_start, ts)) {
- ts = jbd2_time_diff(ts, transaction->t_start);
- spin_lock(&transaction->t_handle_lock);
- if (ts > transaction->t_max_wait)
- transaction->t_max_wait = ts;
- spin_unlock(&transaction->t_handle_lock);
+ newts = jbd2_time_diff(ts, transaction->t_start);
+ oldts = READ_ONCE(transaction->t_max_wait);
+ while (oldts < newts)
+ oldts = cmpxchg(&transaction->t_max_wait, oldts, newts);
}
#endif
}
@@ -690,7 +686,6 @@ int jbd2_journal_extend(handle_t *handle, int nblocks, int revoke_records)
DIV_ROUND_UP(
handle->h_revoke_credits_requested,
journal->j_revoke_records_per_block);
- spin_lock(&transaction->t_handle_lock);
wanted = atomic_add_return(nblocks,
&transaction->t_outstanding_credits);
@@ -698,7 +693,7 @@ int jbd2_journal_extend(handle_t *handle, int nblocks, int revoke_records)
jbd_debug(3, "denied handle %p %d blocks: "
"transaction too large\n", handle, nblocks);
atomic_sub(nblocks, &transaction->t_outstanding_credits);
- goto unlock;
+ goto error_out;
}
trace_jbd2_handle_extend(journal->j_fs_dev->bd_dev,
@@ -714,8 +709,6 @@ int jbd2_journal_extend(handle_t *handle, int nblocks, int revoke_records)
result = 0;
jbd_debug(3, "extended handle %p by %d\n", handle, nblocks);
-unlock:
- spin_unlock(&transaction->t_handle_lock);
error_out:
read_unlock(&journal->j_state_lock);
return result;
@@ -860,15 +853,12 @@ void jbd2_journal_wait_updates(journal_t *journal)
if (!transaction)
break;
- spin_lock(&transaction->t_handle_lock);
prepare_to_wait(&journal->j_wait_updates, &wait,
TASK_UNINTERRUPTIBLE);
if (!atomic_read(&transaction->t_updates)) {
- spin_unlock(&transaction->t_handle_lock);
finish_wait(&journal->j_wait_updates, &wait);
break;
}
- spin_unlock(&transaction->t_handle_lock);
write_unlock(&journal->j_state_lock);
schedule();
finish_wait(&journal->j_wait_updates, &wait);