diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2020-02-04 07:24:48 +0000 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2020-02-04 07:24:48 +0000 |
commit | cc12071ff39060fc2e47c58b43e249fe0d0061ee (patch) | |
tree | 085789dec88100d2ac12d9f2c990c00b909d7ee2 /mm | |
parent | 9717c1cea16e3eae81ca226f4c3670bb799b61ad (diff) | |
parent | f3cc4e1d44a813a0685f2e558b78ace3db559722 (diff) |
Merge branch 'akpm' (patches from Andrew)
Merge more updates from Andrew Morton:
"The rest of MM and the rest of everything else: hotfixes, ipc, misc,
procfs, lib, cleanups, arm"
* emailed patches from Andrew Morton <akpm@linux-foundation.org>: (67 commits)
ARM: dma-api: fix max_pfn off-by-one error in __dma_supported()
treewide: remove redundant IS_ERR() before error code check
include/linux/cpumask.h: don't calculate length of the input string
lib: new testcases for bitmap_parse{_user}
lib: rework bitmap_parse()
lib: make bitmap_parse_user a wrapper on bitmap_parse
lib: add test for bitmap_parse()
bitops: more BITS_TO_* macros
lib/string: add strnchrnul()
proc: convert everything to "struct proc_ops"
proc: decouple proc from VFS with "struct proc_ops"
asm-generic/tlb: provide MMU_GATHER_TABLE_FREE
asm-generic/tlb: rename HAVE_MMU_GATHER_NO_GATHER
asm-generic/tlb: rename HAVE_MMU_GATHER_PAGE_SIZE
asm-generic/tlb: rename HAVE_RCU_TABLE_FREE
asm-generic/tlb: add missing CONFIG symbol
asm-gemeric/tlb: remove stray function declarations
asm-generic/tlb: avoid potential double flush
mm/mmu_gather: invalidate TLB correctly on batch allocation failure and flush
powerpc/mmu_gather: enable RCU_TABLE_FREE even for !SMP case
...
Diffstat (limited to 'mm')
-rw-r--r-- | mm/Kconfig.debug | 21 | ||||
-rw-r--r-- | mm/Makefile | 1 | ||||
-rw-r--r-- | mm/gup.c | 2 | ||||
-rw-r--r-- | mm/hmm.c | 62 | ||||
-rw-r--r-- | mm/memory_hotplug.c | 104 | ||||
-rw-r--r-- | mm/memremap.c | 2 | ||||
-rw-r--r-- | mm/migrate.c | 5 | ||||
-rw-r--r-- | mm/mincore.c | 1 | ||||
-rw-r--r-- | mm/mmu_gather.c | 134 | ||||
-rw-r--r-- | mm/page_alloc.c | 71 | ||||
-rw-r--r-- | mm/pagewalk.c | 163 | ||||
-rw-r--r-- | mm/ptdump.c | 139 | ||||
-rw-r--r-- | mm/slab_common.c | 37 | ||||
-rw-r--r-- | mm/sparse.c | 10 | ||||
-rw-r--r-- | mm/swapfile.c | 14 |
15 files changed, 514 insertions, 252 deletions
diff --git a/mm/Kconfig.debug b/mm/Kconfig.debug index 327b3ebf23bf..0271b22e063f 100644 --- a/mm/Kconfig.debug +++ b/mm/Kconfig.debug @@ -117,3 +117,24 @@ config DEBUG_RODATA_TEST depends on STRICT_KERNEL_RWX ---help--- This option enables a testcase for the setting rodata read-only. + +config GENERIC_PTDUMP + bool + +config PTDUMP_CORE + bool + +config PTDUMP_DEBUGFS + bool "Export kernel pagetable layout to userspace via debugfs" + depends on DEBUG_KERNEL + depends on DEBUG_FS + depends on GENERIC_PTDUMP + select PTDUMP_CORE + help + Say Y here if you want to show the kernel pagetable layout in a + debugfs file. This information is only useful for kernel developers + who are working in architecture specific areas of the kernel. + It is probably not a good idea to enable this feature in a production + kernel. + + If in doubt, say N. diff --git a/mm/Makefile b/mm/Makefile index 32f08e22e824..272e66039e70 100644 --- a/mm/Makefile +++ b/mm/Makefile @@ -109,3 +109,4 @@ obj-$(CONFIG_ZONE_DEVICE) += memremap.o obj-$(CONFIG_HMM_MIRROR) += hmm.o obj-$(CONFIG_MEMFD_CREATE) += memfd.o obj-$(CONFIG_MAPPING_DIRTY_HELPERS) += mapping_dirty_helpers.o +obj-$(CONFIG_PTDUMP_CORE) += ptdump.o @@ -1792,7 +1792,7 @@ EXPORT_SYMBOL(get_user_pages_unlocked); * Before activating this code, please be aware that the following assumptions * are currently made: * - * *) Either HAVE_RCU_TABLE_FREE is enabled, and tlb_remove_table() is used to + * *) Either MMU_GATHER_RCU_TABLE_FREE is enabled, and tlb_remove_table() is used to * free pages containing page tables or TLB flushing requires IPI broadcast. * * *) ptes can be read atomically by the architecture. @@ -186,7 +186,7 @@ static void hmm_range_need_fault(const struct hmm_vma_walk *hmm_vma_walk, } static int hmm_vma_walk_hole(unsigned long addr, unsigned long end, - struct mm_walk *walk) + __always_unused int depth, struct mm_walk *walk) { struct hmm_vma_walk *hmm_vma_walk = walk->private; struct hmm_range *range = hmm_vma_walk->range; @@ -380,7 +380,7 @@ static int hmm_vma_walk_pmd(pmd_t *pmdp, again: pmd = READ_ONCE(*pmdp); if (pmd_none(pmd)) - return hmm_vma_walk_hole(start, end, walk); + return hmm_vma_walk_hole(start, end, -1, walk); if (thp_migration_supported() && is_pmd_migration_entry(pmd)) { bool fault, write_fault; @@ -474,23 +474,32 @@ static int hmm_vma_walk_pud(pud_t *pudp, unsigned long start, unsigned long end, { struct hmm_vma_walk *hmm_vma_walk = walk->private; struct hmm_range *range = hmm_vma_walk->range; - unsigned long addr = start, next; - pmd_t *pmdp; + unsigned long addr = start; pud_t pud; - int ret; + int ret = 0; + spinlock_t *ptl = pud_trans_huge_lock(pudp, walk->vma); + + if (!ptl) + return 0; + + /* Normally we don't want to split the huge page */ + walk->action = ACTION_CONTINUE; -again: pud = READ_ONCE(*pudp); - if (pud_none(pud)) - return hmm_vma_walk_hole(start, end, walk); + if (pud_none(pud)) { + ret = hmm_vma_walk_hole(start, end, -1, walk); + goto out_unlock; + } if (pud_huge(pud) && pud_devmap(pud)) { unsigned long i, npages, pfn; uint64_t *pfns, cpu_flags; bool fault, write_fault; - if (!pud_present(pud)) - return hmm_vma_walk_hole(start, end, walk); + if (!pud_present(pud)) { + ret = hmm_vma_walk_hole(start, end, -1, walk); + goto out_unlock; + } i = (addr - range->start) >> PAGE_SHIFT; npages = (end - addr) >> PAGE_SHIFT; @@ -499,16 +508,20 @@ again: cpu_flags = pud_to_hmm_pfn_flags(range, pud); hmm_range_need_fault(hmm_vma_walk, pfns, npages, cpu_flags, &fault, &write_fault); - if (fault || write_fault) - return hmm_vma_walk_hole_(addr, end, fault, - write_fault, walk); + if (fault || write_fault) { + ret = hmm_vma_walk_hole_(addr, end, fault, + write_fault, walk); + goto out_unlock; + } pfn = pud_pfn(pud) + ((addr & ~PUD_MASK) >> PAGE_SHIFT); for (i = 0; i < npages; ++i, ++pfn) { hmm_vma_walk->pgmap = get_dev_pagemap(pfn, hmm_vma_walk->pgmap); - if (unlikely(!hmm_vma_walk->pgmap)) - return -EBUSY; + if (unlikely(!hmm_vma_walk->pgmap)) { + ret = -EBUSY; + goto out_unlock; + } pfns[i] = hmm_device_entry_from_pfn(range, pfn) | cpu_flags; } @@ -517,22 +530,15 @@ again: hmm_vma_walk->pgmap = NULL; } hmm_vma_walk->last = end; - return 0; + goto out_unlock; } - split_huge_pud(walk->vma, pudp, addr); - if (pud_none(*pudp)) - goto again; + /* Ask for the PUD to be split */ + walk->action = ACTION_SUBTREE; - pmdp = pmd_offset(pudp, addr); - do { - next = pmd_addr_end(addr, end); - ret = hmm_vma_walk_pmd(pmdp, addr, next, walk); - if (ret) - return ret; - } while (pmdp++, addr = next, addr != end); - - return 0; +out_unlock: + spin_unlock(ptl); + return ret; } #else #define hmm_vma_walk_pud NULL diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 36d80915ddc2..0a54ffac8c68 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -355,7 +355,7 @@ static unsigned long find_smallest_section_pfn(int nid, struct zone *zone, if (unlikely(pfn_to_nid(start_pfn) != nid)) continue; - if (zone && zone != page_zone(pfn_to_page(start_pfn))) + if (zone != page_zone(pfn_to_page(start_pfn))) continue; return start_pfn; @@ -380,7 +380,7 @@ static unsigned long find_biggest_section_pfn(int nid, struct zone *zone, if (unlikely(pfn_to_nid(pfn) != nid)) continue; - if (zone && zone != page_zone(pfn_to_page(pfn))) + if (zone != page_zone(pfn_to_page(pfn))) continue; return pfn; @@ -392,14 +392,11 @@ static unsigned long find_biggest_section_pfn(int nid, struct zone *zone, static void shrink_zone_span(struct zone *zone, unsigned long start_pfn, unsigned long end_pfn) { - unsigned long zone_start_pfn = zone->zone_start_pfn; - unsigned long z = zone_end_pfn(zone); /* zone_end_pfn namespace clash */ - unsigned long zone_end_pfn = z; unsigned long pfn; int nid = zone_to_nid(zone); zone_span_writelock(zone); - if (zone_start_pfn == start_pfn) { + if (zone->zone_start_pfn == start_pfn) { /* * If the section is smallest section in the zone, it need * shrink zone->zone_start_pfn and zone->zone_spanned_pages. @@ -407,50 +404,30 @@ static void shrink_zone_span(struct zone *zone, unsigned long start_pfn, * for shrinking zone. */ pfn = find_smallest_section_pfn(nid, zone, end_pfn, - zone_end_pfn); + zone_end_pfn(zone)); if (pfn) { + zone->spanned_pages = zone_end_pfn(zone) - pfn; zone->zone_start_pfn = pfn; - zone->spanned_pages = zone_end_pfn - pfn; + } else { + zone->zone_start_pfn = 0; + zone->spanned_pages = 0; } - } else if (zone_end_pfn == end_pfn) { + } else if (zone_end_pfn(zone) == end_pfn) { /* * If the section is biggest section in the zone, it need * shrink zone->spanned_pages. * In this case, we find second biggest valid mem_section for * shrinking zone. */ - pfn = find_biggest_section_pfn(nid, zone, zone_start_pfn, + pfn = find_biggest_section_pfn(nid, zone, zone->zone_start_pfn, start_pfn); if (pfn) - zone->spanned_pages = pfn - zone_start_pfn + 1; - } - - /* - * The section is not biggest or smallest mem_section in the zone, it - * only creates a hole in the zone. So in this case, we need not - * change the zone. But perhaps, the zone has only hole data. Thus - * it check the zone has only hole or not. - */ - pfn = zone_start_pfn; - for (; pfn < zone_end_pfn; pfn += PAGES_PER_SUBSECTION) { - if (unlikely(!pfn_to_online_page(pfn))) - continue; - - if (page_zone(pfn_to_page(pfn)) != zone) - continue; - - /* Skip range to be removed */ - if (pfn >= start_pfn && pfn < end_pfn) - continue; - - /* If we find valid section, we have nothing to do */ - zone_span_writeunlock(zone); - return; + zone->spanned_pages = pfn - zone->zone_start_pfn + 1; + else { + zone->zone_start_pfn = 0; + zone->spanned_pages = 0; + } } - - /* The zone has no valid section */ - zone->zone_start_pfn = 0; - zone->spanned_pages = 0; zone_span_writeunlock(zone); } @@ -490,6 +467,9 @@ void __ref remove_pfn_range_from_zone(struct zone *zone, struct pglist_data *pgdat = zone->zone_pgdat; unsigned long flags; + /* Poison struct pages because they are now uninitialized again. */ + page_init_poison(pfn_to_page(start_pfn), sizeof(struct page) * nr_pages); + #ifdef CONFIG_ZONE_DEVICE /* * Zone shrinking code cannot properly deal with ZONE_DEVICE. So @@ -536,25 +516,20 @@ static void __remove_section(unsigned long pfn, unsigned long nr_pages, void __remove_pages(unsigned long pfn, unsigned long nr_pages, struct vmem_altmap *altmap) { + const unsigned long end_pfn = pfn + nr_pages; + unsigned long cur_nr_pages; unsigned long map_offset = 0; - unsigned long nr, start_sec, end_sec; map_offset = vmem_altmap_offset(altmap); if (check_pfn_span(pfn, nr_pages, "remove")) return; - start_sec = pfn_to_section_nr(pfn); - end_sec = pfn_to_section_nr(pfn + nr_pages - 1); - for (nr = start_sec; nr <= end_sec; nr++) { - unsigned long pfns; - + for (; pfn < end_pfn; pfn += cur_nr_pages) { cond_resched(); - pfns = min(nr_pages, PAGES_PER_SECTION - - (pfn & ~PAGE_SECTION_MASK)); - __remove_section(pfn, pfns, map_offset, altmap); - pfn += pfns; - nr_pages -= pfns; + /* Select all remaining pages up to the next section boundary */ + cur_nr_pages = min(end_pfn - pfn, -(pfn | PAGE_SECTION_MASK)); + __remove_section(pfn, cur_nr_pages, map_offset, altmap); map_offset = 0; } } @@ -1197,14 +1172,13 @@ bool is_mem_section_removable(unsigned long start_pfn, unsigned long nr_pages) } /* - * Confirm all pages in a range [start, end) belong to the same zone. - * When true, return its valid [start, end). + * Confirm all pages in a range [start, end) belong to the same zone (skipping + * memory holes). When true, return the zone. */ -int test_pages_in_a_zone(unsigned long start_pfn, unsigned long end_pfn, - unsigned long *valid_start, unsigned long *valid_end) +struct zone *test_pages_in_a_zone(unsigned long start_pfn, + unsigned long end_pfn) { unsigned long pfn, sec_end_pfn; - unsigned long start, end; struct zone *zone = NULL; struct page *page; int i; @@ -1225,24 +1199,15 @@ int test_pages_in_a_zone(unsigned long start_pfn, unsigned long end_pfn, continue; /* Check if we got outside of the zone */ if (zone && !zone_spans_pfn(zone, pfn + i)) - return 0; + return NULL; page = pfn_to_page(pfn + i); if (zone && page_zone(page) != zone) - return 0; - if (!zone) - start = pfn + i; + return NULL; zone = page_zone(page); - end = pfn + MAX_ORDER_NR_PAGES; } } - if (zone) { - *valid_start = start; - *valid_end = min(end, end_pfn); - return 1; - } else { - return 0; - } + return zone; } /* @@ -1487,7 +1452,6 @@ static int __ref __offline_pages(unsigned long start_pfn, unsigned long offlined_pages = 0; int ret, node, nr_isolate_pageblock; unsigned long flags; - unsigned long valid_start, valid_end; struct zone *zone; struct memory_notify arg; char *reason; @@ -1512,14 +1476,12 @@ static int __ref __offline_pages(unsigned long start_pfn, /* This makes hotplug much easier...and readable. we assume this for now. .*/ - if (!test_pages_in_a_zone(start_pfn, end_pfn, &valid_start, - &valid_end)) { + zone = test_pages_in_a_zone(start_pfn, end_pfn); + if (!zone) { ret = -EINVAL; reason = "multizone range"; goto failed_removal; } - - zone = page_zone(pfn_to_page(valid_start)); node = zone_to_nid(zone); /* set above range as isolated */ diff --git a/mm/memremap.c b/mm/memremap.c index 4c723d2049d5..09b5b7adc773 100644 --- a/mm/memremap.c +++ b/mm/memremap.c @@ -120,6 +120,8 @@ void memunmap_pages(struct dev_pagemap *pgmap) nid = page_to_nid(first_page); mem_hotplug_begin(); + remove_pfn_range_from_zone(page_zone(first_page), PHYS_PFN(res->start), + PHYS_PFN(resource_size(res))); if (pgmap->type == MEMORY_DEVICE_PRIVATE) { __remove_pages(PHYS_PFN(res->start), PHYS_PFN(resource_size(res)), NULL); diff --git a/mm/migrate.c b/mm/migrate.c index edf42ed90030..b1092876e537 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -2151,6 +2151,7 @@ out_unlock: #ifdef CONFIG_DEVICE_PRIVATE static int migrate_vma_collect_hole(unsigned long start, unsigned long end, + __always_unused int depth, struct mm_walk *walk) { struct migrate_vma *migrate = walk->private; @@ -2195,7 +2196,7 @@ static int migrate_vma_collect_pmd(pmd_t *pmdp, again: if (pmd_none(*pmdp)) - return migrate_vma_collect_hole(start, end, walk); + return migrate_vma_collect_hole(start, end, -1, walk); if (pmd_trans_huge(*pmdp)) { struct page *page; @@ -2228,7 +2229,7 @@ again: return migrate_vma_collect_skip(start, end, walk); if (pmd_none(*pmdp)) - return migrate_vma_collect_hole(start, end, + return migrate_vma_collect_hole(start, end, -1, walk); } } diff --git a/mm/mincore.c b/mm/mincore.c index 49b6fa2f6aa1..0e6dd9948f1a 100644 --- a/mm/mincore.c +++ b/mm/mincore.c @@ -112,6 +112,7 @@ static int __mincore_unmapped_range(unsigned long addr, unsigned long end, } static int mincore_unmapped_range(unsigned long addr, unsigned long end, + __always_unused int depth, struct mm_walk *walk) { walk->private += __mincore_unmapped_range(addr, end, diff --git a/mm/mmu_gather.c b/mm/mmu_gather.c index 7d70e5c78f97..a3538cb2bcbe 100644 --- a/mm/mmu_gather.c +++ b/mm/mmu_gather.c @@ -11,7 +11,7 @@ #include <asm/pgalloc.h> #include <asm/tlb.h> -#ifndef CONFIG_HAVE_MMU_GATHER_NO_GATHER +#ifndef CONFIG_MMU_GATHER_NO_GATHER static bool tlb_next_batch(struct mmu_gather *tlb) { @@ -69,7 +69,7 @@ bool __tlb_remove_page_size(struct mmu_gather *tlb, struct page *page, int page_ VM_BUG_ON(!tlb->end); -#ifdef CONFIG_HAVE_MMU_GATHER_PAGE_SIZE +#ifdef CONFIG_MMU_GATHER_PAGE_SIZE VM_WARN_ON(tlb->page_size != page_size); #endif @@ -89,58 +89,108 @@ bool __tlb_remove_page_size(struct mmu_gather *tlb, struct page *page, int page_ return false; } -#endif /* HAVE_MMU_GATHER_NO_GATHER */ +#endif /* MMU_GATHER_NO_GATHER */ -#ifdef CONFIG_HAVE_RCU_TABLE_FREE +#ifdef CONFIG_MMU_GATHER_TABLE_FREE -/* - * See the comment near struct mmu_table_batch. - */ +static void __tlb_remove_table_free(struct mmu_table_batch *batch) +{ + int i; + + for (i = 0; i < batch->nr; i++) + __tlb_remove_table(batch->tables[i]); + + free_page((unsigned long)batch); +} + +#ifdef CONFIG_MMU_GATHER_RCU_TABLE_FREE /* - * If we want tlb_remove_table() to imply TLB invalidates. + * Semi RCU freeing of the page directories. + * + * This is needed by some architectures to implement software pagetable walkers. + * + * gup_fast() and other software pagetable walkers do a lockless page-table + * walk and therefore needs some synchronization with the freeing of the page + * directories. The chosen means to accomplish that is by disabling IRQs over + * the walk. + * + * Architectures that use IPIs to flush TLBs will then automagically DTRT, + * since we unlink the page, flush TLBs, free the page. Since the disabling of + * IRQs delays the completion of the TLB flush we can never observe an already + * freed page. + * + * Architectures that do not have this (PPC) need to delay the freeing by some + * other means, this is that means. + * + * What we do is batch the freed directory pages (tables) and RCU free them. + * We use the sched RCU variant, as that guarantees that IRQ/preempt disabling + * holds off grace periods. + * + * However, in order to batch these pages we need to allocate storage, this + * allocation is deep inside the MM code and can thus easily fail on memory + * pressure. To guarantee progress we fall back to single table freeing, see + * the implementation of tlb_remove_table_one(). + * */ -static inline void tlb_table_invalidate(struct mmu_gather *tlb) -{ -#ifndef CONFIG_HAVE_RCU_TABLE_NO_INVALIDATE - /* - * Invalidate page-table caches used by hardware walkers. Then we still - * need to RCU-sched wait while freeing the pages because software - * walkers can still be in-flight. - */ - tlb_flush_mmu_tlbonly(tlb); -#endif -} static void tlb_remove_table_smp_sync(void *arg) { /* Simply deliver the interrupt */ } -static void tlb_remove_table_one(void *table) +static void tlb_remove_table_sync_one(void) { /* * This isn't an RCU grace period and hence the page-tables cannot be * assumed to be actually RCU-freed. * * It is however sufficient for software page-table walkers that rely on - * IRQ disabling. See the comment near struct mmu_table_batch. + * IRQ disabling. */ smp_call_function(tlb_remove_table_smp_sync, NULL, 1); - __tlb_remove_table(table); } static void tlb_remove_table_rcu(struct rcu_head *head) { - struct mmu_table_batch *batch; - int i; + __tlb_remove_table_free(container_of(head, struct mmu_table_batch, rcu)); +} + +static void tlb_remove_table_free(struct mmu_table_batch *batch) +{ + call_rcu(&batch->rcu, tlb_remove_table_rcu); +} - batch = container_of(head, struct mmu_table_batch, rcu); +#else /* !CONFIG_MMU_GATHER_RCU_TABLE_FREE */ - for (i = 0; i < batch->nr; i++) - __tlb_remove_table(batch->tables[i]); +static void tlb_remove_table_sync_one(void) { } - free_page((unsigned long)batch); +static void tlb_remove_table_free(struct mmu_table_batch *batch) +{ + __tlb_remove_table_free(batch); +} + +#endif /* CONFIG_MMU_GATHER_RCU_TABLE_FREE */ + +/* + * If we want tlb_remove_table() to imply TLB invalidates. + */ +static inline void tlb_table_invalidate(struct mmu_gather *tlb) +{ + if (tlb_needs_table_invalidate()) { + /* + * Invalidate page-table caches used by hardware walkers. Then + * we still need to RCU-sched wait while freeing the pages + * because software walkers can still be in-flight. + */ + tlb_flush_mmu_tlbonly(tlb); + } +} + +static void tlb_remove_table_one(void *table) +{ + tlb_remove_table_sync_one(); + __tlb_remove_table(table); } static void tlb_table_flush(struct mmu_gather *tlb) @@ -149,7 +199,7 @@ static void tlb_table_flush(struct mmu_gather *tlb) if (*batch) { tlb_table_invalidate(tlb); - call_rcu(&(*batch)->rcu, tlb_remove_table_rcu); + tlb_remove_table_free(*batch); *batch = NULL; } } @@ -173,14 +223,22 @@ void tlb_remove_table(struct mmu_gather *tlb, void *table) tlb_table_flush(tlb); } -#endif /* CONFIG_HAVE_RCU_TABLE_FREE */ +static inline void tlb_table_init(struct mmu_gather *tlb) +{ + tlb->batch = NULL; +} + +#else /* !CONFIG_MMU_GATHER_TABLE_FREE */ + +static inline void tlb_table_flush(struct mmu_gather *tlb) { } +static inline void tlb_table_init(struct mmu_gather *tlb) { } + +#endif /* CONFIG_MMU_GATHER_TABLE_FREE */ static void tlb_flush_mmu_free(struct mmu_gather *tlb) { -#ifdef CONFIG_HAVE_RCU_TABLE_FREE tlb_table_flush(tlb); -#endif -#ifndef CONFIG_HAVE_MMU_GATHER_NO_GATHER +#ifndef CONFIG_MMU_GATHER_NO_GATHER tlb_batch_pages_flush(tlb); #endif } @@ -211,7 +269,7 @@ void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, /* Is it from 0 to ~0? */ tlb->fullmm = !(start | (end+1)); -#ifndef CONFIG_HAVE_MMU_GATHER_NO_GATHER +#ifndef CONFIG_MMU_GATHER_NO_GATHER tlb->need_flush_all = 0; tlb->local.next = NULL; tlb->local.nr = 0; @@ -220,10 +278,8 @@ void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, tlb->batch_count = 0; #endif -#ifdef CONFIG_HAVE_RCU_TABLE_FREE - tlb->batch = NULL; -#endif -#ifdef CONFIG_HAVE_MMU_GATHER_PAGE_SIZE + tlb_table_init(tlb); +#ifdef CONFIG_MMU_GATHER_PAGE_SIZE tlb->page_size = 0; #endif @@ -271,7 +327,7 @@ void tlb_finish_mmu(struct mmu_gather *tlb, tlb_flush_mmu(tlb); -#ifndef CONFIG_HAVE_MMU_GATHER_NO_GATHER +#ifndef CONFIG_MMU_GATHER_NO_GATHER tlb_batch_list_free(tlb); #endif dec_tlb_flush_pending(tlb->mm); diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 15e908ad933b..3c4eb750a199 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -5852,18 +5852,11 @@ overlap_memmap_init(unsigned long zone, unsigned long *pfn) /* Skip PFNs that belong to non-present sections */ static inline __meminit unsigned long next_pfn(unsigned long pfn) { - unsigned long section_nr; + const unsigned long section_nr = pfn_to_section_nr(++pfn); - section_nr = pfn_to_section_nr(++pfn); if (present_section_nr(section_nr)) return pfn; - - while (++section_nr <= __highest_present_section_nr) { - if (present_section_nr(section_nr)) - return section_nr_to_pfn(section_nr); - } - - return -1; + return section_nr_to_pfn(next_present_section_nr(section_nr)); } #else static inline __meminit unsigned long next_pfn(unsigned long pfn) @@ -5905,18 +5898,20 @@ void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone, } #endif - for (pfn = start_pfn; pfn < end_pfn; pfn++) { + for (pfn = start_pfn; pfn < end_pfn; ) { /* * There can be holes in boot-time mem_map[]s handed to this * function. They do not exist on hotplugged memory. */ if (context == MEMMAP_EARLY) { if (!early_pfn_valid(pfn)) { - pfn = next_pfn(pfn) - 1; + pfn = next_pfn(pfn); continue; } - if (!early_pfn_in_nid(pfn, nid)) + if (!early_pfn_in_nid(pfn, nid)) { + pfn++; continue; + } if (overlap_memmap_init(zone, &pfn)) continue; if (defer_init(nid, pfn, end_pfn)) @@ -5944,16 +5939,17 @@ void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone, set_pageblock_migratetype(page, MIGRATE_MOVABLE); cond_resched(); } + pfn++; } } #ifdef CONFIG_ZONE_DEVICE void __ref memmap_init_zone_device(struct zone *zone, unsigned long start_pfn, - unsigned long size, + unsigned long nr_pages, struct dev_pagemap *pgmap) { - unsigned long pfn, end_pfn = start_pfn + size; + unsigned long pfn, end_pfn = start_pfn + nr_pages; struct pglist_data *pgdat = zone->zone_pgdat; struct vmem_altmap *altmap = pgmap_altmap(pgmap); unsigned long zone_idx = zone_idx(zone); @@ -5970,7 +5966,7 @@ void __ref memmap_init_zone_device(struct zone *zone, */ if (altmap) { start_pfn = altmap->base_pfn + vmem_altmap_offset(altmap); - size = end_pfn - start_pfn; + nr_pages = end_pfn - start_pfn; } for (pfn = start_pfn; pfn < end_pfn; pfn++) { @@ -6017,7 +6013,7 @@ void __ref memmap_init_zone_device(struct zone *zone, } pr_info("%s initialised %lu pages in %ums\n", __func__, - size, jiffies_to_msecs(jiffies - start)); + nr_pages, jiffies_to_msecs(jiffies - start)); } #endif @@ -6916,10 +6912,10 @@ void __init free_area_init_node(int nid, unsigned long *zones_size, #if !defined(CONFIG_FLAT_NODE_MEM_MAP) /* - * Zero all valid struct pages in range [spfn, epfn), return number of struct - * pages zeroed + * Initialize all valid struct pages in the range [spfn, epfn) and mark them + * PageReserved(). Return the number of struct pages that were initialized. */ -static u64 zero_pfn_range(unsigned long spfn, unsigned long epfn) +static u64 __init init_unavailable_range(unsigned long spfn, unsigned long epfn) { unsigned long pfn; u64 pgcnt = 0; @@ -6930,7 +6926,13 @@ static u64 zero_pfn_range(unsigned long spfn, unsigned long epfn) + pageblock_nr_pages - 1; continue; } - mm_zero_struct_page(pfn_to_page(pfn)); + /* + * Use a fake node/zone (0) for now. Some of these pages + * (in memblock.reserved but not in memblock.memory) will + * get re-initialized via reserve_bootmem_region() later. + */ + __init_single_page(pfn_to_page(pfn), pfn, 0, 0); + __SetPageReserved(pfn_to_page(pfn)); pgcnt++; } @@ -6942,14 +6944,15 @@ static u64 zero_pfn_range(unsigned long spfn, unsigned long epfn) * initialized by going through __init_single_page(). But, there are some * struct pages which are reserved in memblock allocator and their fields * may be accessed (for example page_to_pfn() on some configuration accesses - * flags). We must explicitly zero those struct pages. + * flags). We must explicitly initialize those struct pages. * * This function also addresses a similar issue where struct pages are left * uninitialized because the physical address range is not covered by * memblock.memory or memblock.reserved. That could happen when memblock - * layout is manually configured via memmap=. + * layout is manually configured via memmap=, or when the highest physical + * address (max_pfn) does not end on a section boundary. */ -void __init zero_resv_unavail(void) +static void __init init_unavailable_mem(void) { phys_addr_t start, end; u64 i, pgcnt; @@ -6962,10 +6965,20 @@ void __init zero_resv_unavail(void) for_each_mem_range(i, &memblock.memory, NULL, NUMA_NO_NODE, MEMBLOCK_NONE, &start, &end, NULL) { if (next < start) - pgcnt += zero_pfn_range(PFN_DOWN(next), PFN_UP(start)); + pgcnt += init_unavailable_range(PFN_DOWN(next), + PFN_UP(start)); next = end; } - pgcnt += zero_pfn_range(PFN_DOWN(next), max_pfn); + + /* + * Early sections always have a fully populated memmap for the whole + * section - see pfn_valid(). If the last section has holes at the + * end and that section is marked "online", the memmap will be + * considered initialized. Make sure that memmap has a well defined + * state. + */ + pgcnt += init_unavailable_range(PFN_DOWN(next), + round_up(max_pfn, PAGES_PER_SECTION)); /* * Struct pages that do not have backing memory. This could be because @@ -6974,6 +6987,10 @@ void __init zero_resv_unavail(void) if (pgcnt) pr_info("Zeroed struct page in unavailable ranges: %lld pages", pgcnt); } +#else +static inline void __init init_unavailable_mem(void) +{ +} #endif /* !CONFIG_FLAT_NODE_MEM_MAP */ #ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP @@ -7403,7 +7420,7 @@ void __init free_area_init_nodes(unsigned long *max_zone_pfn) /* Initialise every node */ mminit_verify_pageflags_layout(); setup_nr_node_ids(); - zero_resv_unavail(); + init_unavailable_mem(); for_each_online_node(nid) { pg_data_t *pgdat = NODE_DATA(nid); free_area_init_node(nid, NULL, @@ -7598,7 +7615,7 @@ void __init set_dma_reserve(unsigned long new_dma_reserve) void __init free_area_init(unsigned long *zones_size) { - zero_resv_unavail(); + init_unavailable_mem(); free_area_init_node(0, zones_size, __pa(PAGE_OFFSET) >> PAGE_SHIFT, NULL); } diff --git a/mm/pagewalk.c b/mm/pagewalk.c index ea0b9e606ad1..928df1638c30 100644 --- a/mm/pagewalk.c +++ b/mm/pagewalk.c @@ -4,26 +4,57 @@ #include <linux/sched.h> #include <linux/hugetlb.h> -static int walk_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, - struct mm_walk *walk) +/* + * We want to know the real level where a entry is located ignoring any + * folding of levels which may be happening. For example if p4d is folded then + * a missing entry found at level 1 (p4d) is actually at level 0 (pgd). + */ +static int real_depth(int depth) +{ + if (depth == 3 && PTRS_PER_PMD == 1) + depth = 2; + if (depth == 2 && PTRS_PER_PUD == 1) + depth = 1; + if (depth == 1 && PTRS_PER_P4D == 1) + depth = 0; + return depth; +} + +static int walk_pte_range_inner(pte_t *pte, unsigned long addr, + unsigned long end, struct mm_walk *walk) { - pte_t *pte; - int err = 0; const struct mm_walk_ops *ops = walk->ops; - spinlock_t *ptl; + int err = 0; - pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl); for (;;) { err = ops->pte_entry(pte, addr, addr + PAGE_SIZE, walk); if (err) break; - addr += PAGE_SIZE; - if (addr == end) + if (addr >= end - PAGE_SIZE) break; + addr += PAGE_SIZE; pte++; } + return err; +} + +static int walk_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, + struct mm_walk *walk) +{ + pte_t *pte; + int err = 0; + spinlock_t *ptl; + + if (walk->no_vma) { + pte = pte_offset_map(pmd, addr); + err = walk_pte_range_inner(pte, addr, end, walk); + pte_unmap(pte); + } else { + pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl); + err = walk_pte_range_inner(pte, addr, end, walk); + pte_unmap_unlock(pte, ptl); + } - pte_unmap_unlock(pte, ptl); return err; } @@ -34,18 +65,22 @@ static int walk_pmd_range(pud_t *pud, unsigned long addr, unsigned long end, unsigned long next; const struct mm_walk_ops *ops = walk->ops; int err = 0; + int depth = real_depth(3); pmd = pmd_offset(pud, addr); do { again: next = pmd_addr_end(addr, end); - if (pmd_none(*pmd) || !walk->vma) { + if (pmd_none(*pmd) || (!walk->vma && !walk->no_vma)) { if (ops->pte_hole) - err = ops->pte_hole(addr, next, walk); + err = ops->pte_hole(addr, next, depth, walk); if (err) break; continue; } + + walk->action = ACTION_SUBTREE; + /* * This implies that each ->pmd_entry() handler * needs to know about pmd_trans_huge() pmds @@ -55,16 +90,24 @@ again: if (err) break; + if (walk->action == ACTION_AGAIN) + goto again; + /* * Check this here so we only break down trans_huge * pages when we _need_ to */ - if (!ops->pte_entry) + if ((!walk->vma && (pmd_leaf(*pmd) || !pmd_present(*pmd))) || + walk->action == ACTION_CONTINUE || + !(ops->pte_entry)) continue; - split_huge_pmd(walk->vma, pmd, addr); - if (pmd_trans_unstable(pmd)) - goto again; + if (walk->vma) { + split_huge_pmd(walk->vma, pmd, addr); + if (pmd_trans_unstable(pmd)) + goto again; + } + err = walk_pte_range(pmd, addr, next, walk); if (err) break; @@ -80,37 +123,41 @@ static int walk_pud_range(p4d_t *p4d, unsigned long addr, unsigned long end, unsigned long next; const struct mm_walk_ops *ops = walk->ops; int err = 0; + int depth = real_depth(2); pud = pud_offset(p4d, addr); do { again: next = pud_addr_end(addr, end); - if (pud_none(*pud) || !walk->vma) { + if (pud_none(*pud) || (!walk->vma && !walk->no_vma)) { if (ops->pte_hole) - err = ops->pte_hole(addr, next, walk); + err = ops->pte_hole(addr, next, depth, walk); if (err) break; continue; } - if (ops->pud_entry) { - spinlock_t *ptl = pud_trans_huge_lock(pud, walk->vma); + walk->action = ACTION_SUBTREE; - if (ptl) { - err = ops->pud_entry(pud, addr, next, walk); - spin_unlock(ptl); - if (err) - break; - continue; - } - } + if (ops->pud_entry) + err = ops->pud_entry(pud, addr, next, walk); + if (err) + break; + + if (walk->action == ACTION_AGAIN) + goto again; - split_huge_pud(walk->vma, pud, addr); + if ((!walk->vma && (pud_leaf(*pud) || !pud_present(*pud))) || + walk->action == ACTION_CONTINUE || + !(ops->pmd_entry || ops->pte_entry)) + continue; + + if (walk->vma) + split_huge_pud(walk->vma, pud, addr); if (pud_none(*pud)) goto again; - if (ops->pmd_entry || ops->pte_entry) - err = walk_pmd_range(pud, addr, next, walk); + err = walk_pmd_range(pud, addr, next, walk); if (err) break; } while (pud++, addr = next, addr != end); @@ -125,18 +172,24 @@ static int walk_p4d_range(pgd_t *pgd, unsigned long addr, unsigned long end, unsigned long next; const struct mm_walk_ops *ops = walk->ops; int err = 0; + int depth = real_depth(1); p4d = p4d_offset(pgd, addr); do { next = p4d_addr_end(addr, end); if (p4d_none_or_clear_bad(p4d)) { if (ops->pte_hole) - err = ops->pte_hole(addr, next, walk); + err = ops->pte_hole(addr, next, depth, walk); if (err) break; continue; } - if (ops->pmd_entry || ops->pte_entry) + if (ops->p4d_entry) { + err = ops->p4d_entry(p4d, addr, next, walk); + if (err) + break; + } + if (ops->pud_entry || ops->pmd_entry || ops->pte_entry) err = walk_pud_range(p4d, addr, next, walk); if (err) break; @@ -153,17 +206,26 @@ static int walk_pgd_range(unsigned long addr, unsigned long end, const struct mm_walk_ops *ops = walk->ops; int err = 0; - pgd = pgd_offset(walk->mm, addr); + if (walk->pgd) + pgd = walk->pgd + pgd_index(addr); + else + pgd = pgd_offset(walk->mm, addr); do { next = pgd_addr_end(addr, end); if (pgd_none_or_clear_bad(pgd)) { if (ops->pte_hole) - err = ops->pte_hole(addr, next, walk); + err = ops->pte_hole(addr, next, 0, walk); if (err) break; continue; } - if (ops->pmd_entry || ops->pte_entry) + if (ops->pgd_entry) { + err = ops->pgd_entry(pgd, addr, next, walk); + if (err) + break; + } + if (ops->p4d_entry || ops->pud_entry || ops->pmd_entry || + ops->pte_entry) err = walk_p4d_range(pgd, addr, next, walk); if (err) break; @@ -199,7 +261,7 @@ static int walk_hugetlb_range(unsigned long addr, unsigned long end, if (pte) err = ops->hugetlb_entry(pte, hmask, addr, next, walk); else if (ops->pte_hole) - err = ops->pte_hole(addr, next, walk); + err = ops->pte_hole(addr, next, -1, walk); if (err) break; @@ -243,7 +305,7 @@ static int walk_page_test(unsigned long start, unsigned long end, if (vma->vm_flags & VM_PFNMAP) { int err = 1; if (ops->pte_hole) - err = ops->pte_hole(start, end, walk); + err = ops->pte_hole(start, end, -1, walk); return err ? err : 1; } return 0; @@ -369,6 +431,33 @@ int walk_page_range(struct mm_struct *mm, unsigned long start, return err; } +/* + * Similar to walk_page_range() but can walk any page tables even if they are + * not backed by VMAs. Because 'unusual' entries may be walked this function + * will also not lock the PTEs for the pte_entry() callback. This is useful for + * walking the kernel pages tables or page tables for firmware. + */ +int walk_page_range_novma(struct mm_struct *mm, unsigned long start, + unsigned long end, const struct mm_walk_ops *ops, + pgd_t *pgd, + void *private) +{ + struct mm_walk walk = { + .ops = ops, + .mm = mm, + .pgd = pgd, + .private = private, + .no_vma = true + }; + + if (start >= end || !walk.mm) + return -EINVAL; + + lockdep_assert_held(&walk.mm->mmap_sem); + + return __walk_page_range(start, end, &walk); +} + int walk_page_vma(struct vm_area_struct *vma, const struct mm_walk_ops *ops, void *private) { diff --git a/mm/ptdump.c b/mm/ptdump.c new file mode 100644 index 000000000000..26208d0d03b7 --- /dev/null +++ b/mm/ptdump.c @@ -0,0 +1,139 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <linux/pagewalk.h> +#include <linux/ptdump.h> +#include <linux/kasan.h> + +#ifdef CONFIG_KASAN +/* + * This is an optimization for KASAN=y case. Since all kasan page tables + * eventually point to the kasan_early_shadow_page we could call note_page() + * right away without walking through lower level page tables. This saves + * us dozens of seconds (minutes for 5-level config) while checking for + * W+X mapping or reading kernel_page_tables debugfs file. + */ +static inline int note_kasan_page_table(struct mm_walk *walk, + unsigned long addr) +{ + struct ptdump_state *st = walk->private; + + st->note_page(st, addr, 4, pte_val(kasan_early_shadow_pte[0])); + + walk->action = ACTION_CONTINUE; + + return 0; +} +#endif + +static int ptdump_pgd_entry(pgd_t *pgd, unsigned long addr, + unsigned long next, struct mm_walk *walk) +{ + struct ptdump_state *st = walk->private; + pgd_t val = READ_ONCE(*pgd); + +#if CONFIG_PGTABLE_LEVELS > 4 && defined(CONFIG_KASAN) + if (pgd_page(val) == virt_to_page(lm_alias(kasan_early_shadow_p4d))) + return note_kasan_page_table(walk, addr); +#endif + + if (pgd_leaf(val)) + st->note_page(st, addr, 0, pgd_val(val)); + + return 0; +} + +static int ptdump_p4d_entry(p4d_t *p4d, unsigned long addr, + unsigned long next, struct mm_walk *walk) +{ + struct ptdump_state *st = walk->private; + p4d_t val = READ_ONCE(*p4d); + +#if CONFIG_PGTABLE_LEVELS > 3 && defined(CONFIG_KASAN) + if (p4d_page(val) == virt_to_page(lm_alias(kasan_early_shadow_pud))) + return note_kasan_page_table(walk, addr); +#endif + + if (p4d_leaf(val)) + st->note_page(st, addr, 1, p4d_val(val)); + + return 0; +} + +static int ptdump_pud_entry(pud_t *pud, unsigned long addr, + unsigned long next, struct mm_walk *walk) +{ + struct ptdump_state *st = walk->private; + pud_t val = READ_ONCE(*pud); + +#if CONFIG_PGTABLE_LEVELS > 2 && defined(CONFIG_KASAN) + if (pud_page(val) == virt_to_page(lm_alias(kasan_early_shadow_pmd))) + return note_kasan_page_table(walk, addr); +#endif + + if (pud_leaf(val)) + st->note_page(st, addr, 2, pud_val(val)); + + return 0; +} + +static int ptdump_pmd_entry(pmd_t *pmd, unsigned long addr, + unsigned long next, struct mm_walk *walk) +{ + struct ptdump_state *st = walk->private; + pmd_t val = READ_ONCE(*pmd); + +#if defined(CONFIG_KASAN) + if (pmd_page(val) == virt_to_page(lm_alias(kasan_early_shadow_pte))) + return note_kasan_page_table(walk, addr); +#endif + + if (pmd_leaf(val)) + st->note_page(st, addr, 3, pmd_val(val)); + + return 0; +} + +static int ptdump_pte_entry(pte_t *pte, unsigned long addr, + unsigned long next, struct mm_walk *walk) +{ + struct ptdump_state *st = walk->private; + + st->note_page(st, addr, 4, pte_val(READ_ONCE(*pte))); + + return 0; +} + +static int ptdump_hole(unsigned long addr, unsigned long next, + int depth, struct mm_walk *walk) +{ + struct ptdump_state *st = walk->private; + + st->note_page(st, addr, depth, 0); + + return 0; +} + +static const struct mm_walk_ops ptdump_ops = { + .pgd_entry = ptdump_pgd_entry, + .p4d_entry = ptdump_p4d_entry, + .pud_entry = ptdump_pud_entry, + .pmd_entry = ptdump_pmd_entry, + .pte_entry = ptdump_pte_entry, + .pte_hole = ptdump_hole, +}; + +void ptdump_walk_pgd(struct ptdump_state *st, struct mm_struct *mm, pgd_t *pgd) +{ + const struct ptdump_range *range = st->range; + + down_read(&mm->mmap_sem); + while (range->start != range->end) { + walk_page_range_novma(mm, range->start, range->end, + &ptdump_ops, pgd, st); + range++; + } + up_read(&mm->mmap_sem); + + /* Flush out the last page */ + st->note_page(st, 0, -1, 0); +} diff --git a/mm/slab_common.c b/mm/slab_common.c index 0d95ddea13b0..1907cb2903c7 100644 --- a/mm/slab_common.c +++ b/mm/slab_common.c @@ -1580,18 +1580,17 @@ static int slabinfo_open(struct inode *inode, struct file *file) return seq_open(file, &slabinfo_op); } -static const struct file_operations proc_slabinfo_operations = { - .open = slabinfo_open, - .read = seq_read, - .write = slabinfo_write, - .llseek = seq_lseek, - .release = seq_release, +static const struct proc_ops slabinfo_proc_ops = { + .proc_open = slabinfo_open, + .proc_read = seq_read, + .proc_write = slabinfo_write, + .proc_lseek = seq_lseek, + .proc_release = seq_release, }; static int __init slab_proc_init(void) { - proc_create("slabinfo", SLABINFO_RIGHTS, NULL, - &proc_slabinfo_operations); + proc_create("slabinfo", SLABINFO_RIGHTS, NULL, &slabinfo_proc_ops); return 0; } module_init(slab_proc_init); @@ -1677,28 +1676,6 @@ static __always_inline void *__do_krealloc(const void *p, size_t new_size, } /** - * __krealloc - like krealloc() but don't free @p. - * @p: object to reallocate memory for. - * @new_size: how many bytes of memory are required. - * @flags: the type of memory to allocate. - * - * This function is like krealloc() except it never frees the originally - * allocated buffer. Use this if you don't want to free the buffer immediately - * like, for example, with RCU. - * - * Return: pointer to the allocated memory or %NULL in case of error - */ -void *__krealloc(const void *p, size_t new_size, gfp_t flags) -{ - if (unlikely(!new_size)) - return ZERO_SIZE_PTR; - - return __do_krealloc(p, new_size, flags); - -} -EXPORT_SYMBOL(__krealloc); - -/** * krealloc - reallocate memory. The contents will remain unchanged. * @p: object to reallocate memory for. * @new_size: how many bytes of memory are required. diff --git a/mm/sparse.c b/mm/sparse.c index 3918fc3eaef1..c184b69460b7 100644 --- a/mm/sparse.c +++ b/mm/sparse.c @@ -198,16 +198,6 @@ static void section_mark_present(struct mem_section *ms) ms->section_mem_map |= SECTION_MARKED_PRESENT; } -static inline unsigned long next_present_section_nr(unsigned long section_nr) -{ - do { - section_nr++; - if (present_section_nr(section_nr)) - return section_nr; - } while ((section_nr <= __highest_present_section_nr)); - - return -1; -} #define for_each_present_section_nr(start, section_nr) \ for (section_nr = next_present_section_nr(start-1); \ ((section_nr != -1) && \ diff --git a/mm/swapfile.c b/mm/swapfile.c index 6febae9ad3cd..2c33ff456ed5 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -2796,17 +2796,17 @@ static int swaps_open(struct inode *inode, struct file *file) return 0; } -static const struct file_operations proc_swaps_operations = { - .open = swaps_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release, - .poll = swaps_poll, +static const struct proc_ops swaps_proc_ops = { + .proc_open = swaps_open, + .proc_read = seq_read, + .proc_lseek = seq_lseek, + .proc_release = seq_release, + .proc_poll = swaps_poll, }; static int __init procswaps_init(void) { - proc_create("swaps", 0, NULL, &proc_swaps_operations); + proc_create("swaps", 0, NULL, &swaps_proc_ops); return 0; } __initcall(procswaps_init); |