hugetlb: allocate vma lock for all sharable vmas

The hugetlb vma lock was originally designed to synchronize pmd sharing. As such, it was only necessary to allocate the lock for vmas that were capable of pmd sharing. Later in the development cycle, it was discovered that it could also be used to simplify fault/truncation races as described in [1]. However, a subsequent change to allocate the lock for all vmas that use the page cache was never made. A fault/truncation race could leave pages in a file past i_size until the file is removed. Remove the previous restriction and allocate lock for all VM_MAYSHARE vmas. Warn in the unlikely event of allocation failure. [1] https://lore.kernel.org/lkml/Yxiv0SkMkZ0JWGGp@monkey/#t Link: https://lkml.kernel.org/r/20221005011707.514612-4-mike.kravetz@oracle.com Fixes: "hugetlb: clean up code checking for fault/truncation races" Signed-off-by: Mike Kravetz <mike.kravetz@oracle.com> Cc: Andrea Arcangeli <aarcange@redhat.com> Cc: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com> Cc: Axel Rasmussen <axelrasmussen@google.com> Cc: David Hildenbrand <david@redhat.com> Cc: Davidlohr Bueso <dave@stgolabs.net> Cc: James Houghton <jthoughton@google.com> Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com> Cc: Miaohe Lin <linmiaohe@huawei.com> Cc: Michal Hocko <mhocko@suse.com> Cc: Mina Almasry <almasrymina@google.com> Cc: Muchun Song <songmuchun@bytedance.com> Cc: Naoya Horiguchi <naoya.horiguchi@linux.dev> Cc: Pasha Tatashin <pasha.tatashin@soleen.com> Cc: Peter Xu <peterx@redhat.com> Cc: Prakash Sangappa <prakash.sangappa@oracle.com> Cc: Sven Schnelle <svens@linux.ibm.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
author: Mike Kravetz <mike.kravetz@oracle.com> 2022-10-04 18:17:07 -0700
committer: Andrew Morton <akpm@linux-foundation.org> 2022-10-07 14:28:40 -0700
commit: bbff39cc6cbcb86ccfacb2dcafc79912a9f9df69 (patch)
tree: 8866b90080b9e9042eef198c6aa12d70b5c3c8ed /mm
parent: ecfbd733878da48ed03a5b8a9c301366a03e3cca (diff)
1 files changed, 15 insertions, 35 deletions
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 01f3e36caa6c..0ad53ad98e74 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -6687,10 +6687,11 @@ static unsigned long page_table_shareable(struct vm_area_struct *svma,
 	return saddr;
 }
 
-static bool __vma_aligned_range_pmd_shareable(struct vm_area_struct *vma,
-				unsigned long start, unsigned long end,
-				bool check_vma_lock)
+bool want_pmd_share(struct vm_area_struct *vma, unsigned long addr)
 {
+	unsigned long start = addr & PUD_MASK;
+	unsigned long end = start + PUD_SIZE;
+
 #ifdef CONFIG_USERFAULTFD
 	if (uffd_disable_huge_pmd_share(vma))
 		return false;
@@ -6700,38 +6701,13 @@ static bool __vma_aligned_range_pmd_shareable(struct vm_area_struct *vma,
 	 */
 	if (!(vma->vm_flags & VM_MAYSHARE))
 		return false;
-	if (check_vma_lock && !vma->vm_private_data)
+	if (!vma->vm_private_data)	/* vma lock required for sharing */
 		return false;
 	if (!range_in_vma(vma, start, end))
 		return false;
 	return true;
 }
 
-static bool vma_pmd_shareable(struct vm_area_struct *vma)
-{
-	unsigned long start = ALIGN(vma->vm_start, PUD_SIZE),
-		      end = ALIGN_DOWN(vma->vm_end, PUD_SIZE);
-
-	if (start >= end)
-		return false;
-
-	return __vma_aligned_range_pmd_shareable(vma, start, end, false);
-}
-
-static bool vma_addr_pmd_shareable(struct vm_area_struct *vma,
-						unsigned long addr)
-{
-	unsigned long start = addr & PUD_MASK;
-	unsigned long end = start + PUD_SIZE;
-
-	return __vma_aligned_range_pmd_shareable(vma, start, end, true);
-}
-
-bool want_pmd_share(struct vm_area_struct *vma, unsigned long addr)
-{
-	return vma_addr_pmd_shareable(vma, addr);
-}
-
 /*
  * Determine if start,end range within vma could be mapped by shared pmd.
  * If yes, adjust start and end to cover range associated with possible
@@ -6880,17 +6856,21 @@ static void hugetlb_vma_lock_alloc(struct vm_area_struct *vma)
 	if (vma->vm_private_data)
 		return;
 
-	/* Check size/alignment for pmd sharing possible */
-	if (!vma_pmd_shareable(vma))
-		return;
-
 	vma_lock = kmalloc(sizeof(*vma_lock), GFP_KERNEL);
-	if (!vma_lock)
+	if (!vma_lock) {
 		/*
 		 * If we can not allocate structure, then vma can not
-		 * participate in pmd sharing.
+		 * participate in pmd sharing.  This is only a possible
+		 * performance enhancement and memory saving issue.
+		 * However, the lock is also used to synchronize page
+		 * faults with truncation.  If the lock is not present,
+		 * unlikely races could leave pages in a file past i_size
+		 * until the file is removed.  Warn in the unlikely case of
+		 * allocation failure.
 		 */
+		pr_warn_once("HugeTLB: unable to allocate vma specific lock\n");
 		return;
+	}
 
 	kref_init(&vma_lock->refs);
 	init_rwsem(&vma_lock->rw_sema);
author	Mike Kravetz <mike.kravetz@oracle.com>	2022-10-04 18:17:07 -0700
committer	Andrew Morton <akpm@linux-foundation.org>	2022-10-07 14:28:40 -0700
commit	bbff39cc6cbcb86ccfacb2dcafc79912a9f9df69 (patch)
tree	8866b90080b9e9042eef198c6aa12d70b5c3c8ed /mm
parent	ecfbd733878da48ed03a5b8a9c301366a03e3cca (diff)