diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2021-02-22 14:34:00 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2021-02-22 14:34:00 -0800 |
commit | b12b47249688915e987a9a2a393b522f86f6b7ab (patch) | |
tree | eae34f7fa64474bb3123f7b69c411ade6127c41f | |
parent | 6ff6f86bc4d02949b5688d69de1c89c310d62c44 (diff) | |
parent | 82d2c16b350f72aa21ac2a6860c542aa4b43a51e (diff) |
Merge tag 'powerpc-5.12-1' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux
Pull powerpc updates from Michael Ellerman:
- A large series adding wrappers for our interrupt handlers, so that
irq/nmi/user tracking can be isolated in the wrappers rather than
spread in each handler.
- Conversion of the 32-bit syscall handling into C.
- A series from Nick to streamline our TLB flushing when using the
Radix MMU.
- Switch to using queued spinlocks by default for 64-bit server CPUs.
- A rework of our PCI probing so that it happens later in boot, when
more generic infrastructure is available.
- Two small fixes to allow 32-bit little-endian processes to run on
64-bit kernels.
- Other smaller features, fixes & cleanups.
Thanks to: Alexey Kardashevskiy, Ananth N Mavinakayanahalli, Aneesh
Kumar K.V, Athira Rajeev, Bhaskar Chowdhury, Cédric Le Goater, Chengyang
Fan, Christophe Leroy, Christopher M. Riedl, Fabiano Rosas, Florian
Fainelli, Frederic Barrat, Ganesh Goudar, Hari Bathini, Jiapeng Chong,
Joseph J Allen, Kajol Jain, Markus Elfring, Michal Suchanek, Nathan
Lynch, Naveen N. Rao, Nicholas Piggin, Oliver O'Halloran, Pingfan Liu,
Po-Hsu Lin, Qian Cai, Ram Pai, Randy Dunlap, Sandipan Das, Stephen
Rothwell, Tyrel Datwyler, Will Springer, Yury Norov, and Zheng Yongjun.
* tag 'powerpc-5.12-1' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux: (188 commits)
powerpc/perf: Adds support for programming of Thresholding in P10
powerpc/pci: Remove unimplemented prototypes
powerpc/uaccess: Merge raw_copy_to_user_allowed() into raw_copy_to_user()
powerpc/uaccess: Merge __put_user_size_allowed() into __put_user_size()
powerpc/uaccess: get rid of small constant size cases in raw_copy_{to,from}_user()
powerpc/64: Fix stack trace not displaying final frame
powerpc/time: Remove get_tbl()
powerpc/time: Avoid using get_tbl()
spi: mpc52xx: Avoid using get_tbl()
powerpc/syscall: Avoid storing 'current' in another pointer
powerpc/32: Handle bookE debugging in C in syscall entry/exit
powerpc/syscall: Do not check unsupported scv vector on PPC32
powerpc/32: Remove the counter in global_dbcr0
powerpc/32: Remove verification of MSR_PR on syscall in the ASM entry
powerpc/syscall: implement system call entry/exit logic in C for PPC32
powerpc/32: Always save non volatile GPRs at syscall entry
powerpc/syscall: Change condition to check MSR_RI
powerpc/syscall: Save r3 in regs->orig_r3
powerpc/syscall: Use is_compat_task()
powerpc/syscall: Make interrupt.c buildable on PPC32
...
193 files changed, 2718 insertions, 2133 deletions
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 9141f03060ce..2ffb229d8d2d 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -196,7 +196,6 @@ config PPC select HAVE_STACKPROTECTOR if PPC64 && $(cc-option,-mstack-protector-guard=tls -mstack-protector-guard-reg=r13) select HAVE_STACKPROTECTOR if PPC32 && $(cc-option,-mstack-protector-guard=tls -mstack-protector-guard-reg=r2) select HAVE_CONTEXT_TRACKING if PPC64 - select HAVE_TIF_NOHZ if PPC64 select HAVE_DEBUG_KMEMLEAK select HAVE_DEBUG_STACKOVERFLOW select HAVE_DYNAMIC_FTRACE @@ -503,18 +502,14 @@ config HOTPLUG_CPU Say N if you are unsure. config PPC_QUEUED_SPINLOCKS - bool "Queued spinlocks" + bool "Queued spinlocks" if EXPERT depends on SMP + default PPC_BOOK3S_64 help Say Y here to use queued spinlocks which give better scalability and fairness on large SMP and NUMA systems without harming single threaded performance. - This option is currently experimental, the code is more complex and - less tested so it defaults to "N" for the moment. - - If unsure, say "N". - config ARCH_CPU_PROBE_RELEASE def_bool y depends on HOTPLUG_CPU @@ -718,18 +713,6 @@ config ARCH_MEMORY_PROBE def_bool y depends on MEMORY_HOTPLUG -config STDBINUTILS - bool "Using standard binutils settings" - depends on 44x - default y - help - Turning this option off allows you to select 256KB PAGE_SIZE on 44x. - Note, that kernel will be able to run only those applications, - which had been compiled using binutils later than 2.17.50.0.3 with - '-zmax-page-size' set to 256K (the default is 64K). Or, if using - the older binutils, you can patch them with a trivial patch, which - changes the ELF_MAXPAGESIZE definition from 0x10000 to 0x40000. - choice prompt "Page size" default PPC_4K_PAGES @@ -769,17 +752,15 @@ config PPC_64K_PAGES select HAVE_ARCH_SOFT_DIRTY if PPC_BOOK3S_64 config PPC_256K_PAGES - bool "256k page size" - depends on 44x && !STDBINUTILS + bool "256k page size (Requires non-standard binutils settings)" + depends on 44x && !PPC_47x help Make the page size 256k. - As the ELF standard only requires alignment to support page - sizes up to 64k, you will need to compile all of your user - space applications with a non-standard binutils settings - (see the STDBINUTILS description for details). - - Say N unless you know what you are doing. + The kernel will only be able to run applications that have been + compiled with '-zmax-page-size' set to 256K (the default is 64K) using + binutils later than 2.17.50.0.3, or by patching the ELF_MAXPAGESIZE + definition from 0x10000 to 0x40000 in older versions. endchoice diff --git a/arch/powerpc/Kconfig.debug b/arch/powerpc/Kconfig.debug index b88900f4832f..ae084357994e 100644 --- a/arch/powerpc/Kconfig.debug +++ b/arch/powerpc/Kconfig.debug @@ -88,6 +88,7 @@ config PPC_IRQ_SOFT_MASK_DEBUG config XMON bool "Include xmon kernel debugger" depends on DEBUG_KERNEL + select CONSOLE_POLL if SERIAL_CPM_CONSOLE help Include in-kernel hooks for the xmon kernel monitor/debugger. Unless you are intending to debug the kernel, say N here. diff --git a/arch/powerpc/configs/44x/akebono_defconfig b/arch/powerpc/configs/44x/akebono_defconfig index 72b8f93a9bdd..4bc549c6edc5 100644 --- a/arch/powerpc/configs/44x/akebono_defconfig +++ b/arch/powerpc/configs/44x/akebono_defconfig @@ -20,6 +20,7 @@ CONFIG_IRQ_ALL_CPUS=y # CONFIG_COMPACTION is not set # CONFIG_SUSPEND is not set CONFIG_NET=y +CONFIG_NETDEVICES=y CONFIG_PACKET=y CONFIG_UNIX=y CONFIG_INET=y @@ -40,7 +41,9 @@ CONFIG_BLK_DEV_RAM_SIZE=35000 # CONFIG_SCSI_PROC_FS is not set CONFIG_BLK_DEV_SD=y # CONFIG_SCSI_LOWLEVEL is not set +CONFIG_ATA=y # CONFIG_SATA_PMP is not set +CONFIG_SATA_AHCI_PLATFORM=y # CONFIG_ATA_SFF is not set # CONFIG_NET_VENDOR_3COM is not set # CONFIG_NET_VENDOR_ADAPTEC is not set @@ -97,6 +100,8 @@ CONFIG_USB_OHCI_HCD=y # CONFIG_USB_OHCI_HCD_PCI is not set CONFIG_USB_STORAGE=y CONFIG_MMC=y +CONFIG_MMC_SDHCI=y +CONFIG_MMC_SDHCI_PLTFM=y CONFIG_RTC_CLASS=y CONFIG_RTC_DRV_M41T80=y CONFIG_EXT2_FS=y diff --git a/arch/powerpc/include/asm/asm-prototypes.h b/arch/powerpc/include/asm/asm-prototypes.h index d0b832cbbec8..939f3c94c8f3 100644 --- a/arch/powerpc/include/asm/asm-prototypes.h +++ b/arch/powerpc/include/asm/asm-prototypes.h @@ -56,35 +56,6 @@ int exit_vmx_usercopy(void); int enter_vmx_ops(void); void *exit_vmx_ops(void *dest); -/* Traps */ -long machine_check_early(struct pt_regs *regs); -long hmi_exception_realmode(struct pt_regs *regs); -void SMIException(struct pt_regs *regs); -void handle_hmi_exception(struct pt_regs *regs); -void instruction_breakpoint_exception(struct pt_regs *regs); -void RunModeException(struct pt_regs *regs); -void single_step_exception(struct pt_regs *regs); -void program_check_exception(struct pt_regs *regs); -void alignment_exception(struct pt_regs *regs); -void StackOverflow(struct pt_regs *regs); -void stack_overflow_exception(struct pt_regs *regs); -void kernel_fp_unavailable_exception(struct pt_regs *regs); -void altivec_unavailable_exception(struct pt_regs *regs); -void vsx_unavailable_exception(struct pt_regs *regs); -void fp_unavailable_tm(struct pt_regs *regs); -void altivec_unavailable_tm(struct pt_regs *regs); -void vsx_unavailable_tm(struct pt_regs *regs); -void facility_unavailable_exception(struct pt_regs *regs); -void TAUException(struct pt_regs *regs); -void altivec_assist_exception(struct pt_regs *regs); -void unrecoverable_exception(struct pt_regs *regs); -void kernel_bad_stack(struct pt_regs *regs); -void system_reset_exception(struct pt_regs *regs); -void machine_check_exception(struct pt_regs *regs); -void emulation_assist_interrupt(struct pt_regs *regs); -long do_slb_fault(struct pt_regs *regs, unsigned long ea); -void do_bad_slb_fault(struct pt_regs *regs, unsigned long ea, long err); - /* signals, syscalls and interrupts */ long sys_swapcontext(struct ucontext __user *old_ctx, struct ucontext __user *new_ctx, diff --git a/arch/powerpc/include/asm/book3s/32/kup.h b/arch/powerpc/include/asm/book3s/32/kup.h index a0117a9d5b06..73bc5d2c431d 100644 --- a/arch/powerpc/include/asm/book3s/32/kup.h +++ b/arch/powerpc/include/asm/book3s/32/kup.h @@ -95,12 +95,12 @@ static inline void kuap_update_sr(u32 sr, u32 addr, u32 end) addr &= 0xf0000000; /* align addr to start of segment */ barrier(); /* make sure thread.kuap is updated before playing with SRs */ while (addr < end) { - mtsrin(sr, addr); + mtsr(sr, addr); sr += 0x111; /* next VSID */ sr &= 0xf0ffffff; /* clear VSID overflow */ addr += 0x10000000; /* address of next segment */ } - isync(); /* Context sync required after mtsrin() */ + isync(); /* Context sync required after mtsr() */ } static __always_inline void allow_user_access(void __user *to, const void __user *from, @@ -122,7 +122,7 @@ static __always_inline void allow_user_access(void __user *to, const void __user end = min(addr + size, TASK_SIZE); current->thread.kuap = (addr & 0xf0000000) | ((((end - 1) >> 28) + 1) & 0xf); - kuap_update_sr(mfsrin(addr) & ~SR_KS, addr, end); /* Clear Ks */ + kuap_update_sr(mfsr(addr) & ~SR_KS, addr, end); /* Clear Ks */ } static __always_inline void prevent_user_access(void __user *to, const void __user *from, @@ -151,7 +151,7 @@ static __always_inline void prevent_user_access(void __user *to, const void __us } current->thread.kuap = 0; - kuap_update_sr(mfsrin(addr) | SR_KS, addr, end); /* set Ks */ + kuap_update_sr(mfsr(addr) | SR_KS, addr, end); /* set Ks */ } static inline unsigned long prevent_user_access_return(void) diff --git a/arch/powerpc/include/asm/book3s/32/mmu-hash.h b/arch/powerpc/include/asm/book3s/32/mmu-hash.h index 685c589e723f..b85f8e114a9c 100644 --- a/arch/powerpc/include/asm/book3s/32/mmu-hash.h +++ b/arch/powerpc/include/asm/book3s/32/mmu-hash.h @@ -94,7 +94,7 @@ typedef struct { } mm_context_t; void update_bats(void); -static inline void cleanup_cpu_mmu_context(void) { }; +static inline void cleanup_cpu_mmu_context(void) { } /* patch sites */ extern s32 patch__hash_page_A0, patch__hash_page_A1, patch__hash_page_A2; diff --git a/arch/powerpc/include/asm/book3s/64/kup.h b/arch/powerpc/include/asm/book3s/64/kup.h index 7d1ef7b9754e..8bd905050896 100644 --- a/arch/powerpc/include/asm/book3s/64/kup.h +++ b/arch/powerpc/include/asm/book3s/64/kup.h @@ -339,7 +339,7 @@ static inline unsigned long get_kuap(void) * This has no effect in terms of actually blocking things on hash, * so it doesn't break anything. */ - if (!early_mmu_has_feature(MMU_FTR_BOOK3S_KUAP)) + if (!mmu_has_feature(MMU_FTR_BOOK3S_KUAP)) return AMR_KUAP_BLOCKED; return mfspr(SPRN_AMR); @@ -347,7 +347,7 @@ static inline unsigned long get_kuap(void) static inline void set_kuap(unsigned long value) { - if (!early_mmu_has_feature(MMU_FTR_BOOK3S_KUAP)) + if (!mmu_has_feature(MMU_FTR_BOOK3S_KUAP)) return; /* diff --git a/arch/powerpc/include/asm/book3s/64/mmu-hash.h b/arch/powerpc/include/asm/book3s/64/mmu-hash.h index 066b1d34c7bc..f911bdb68d8b 100644 --- a/arch/powerpc/include/asm/book3s/64/mmu-hash.h +++ b/arch/powerpc/include/asm/book3s/64/mmu-hash.h @@ -454,6 +454,8 @@ static inline unsigned long hpt_hash(unsigned long vpn, #define HPTE_NOHPTE_UPDATE 0x2 #define HPTE_USE_KERNEL_KEY 0x4 +long hpte_insert_repeating(unsigned long hash, unsigned long vpn, unsigned long pa, + unsigned long rlags, unsigned long vflags, int psize, int ssize); extern int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid, pte_t *ptep, unsigned long trap, unsigned long flags, int ssize, int subpage_prot); @@ -467,6 +469,8 @@ extern int hash_page_mm(struct mm_struct *mm, unsigned long ea, unsigned long flags); extern int hash_page(unsigned long ea, unsigned long access, unsigned long trap, unsigned long dsisr); +void low_hash_fault(struct pt_regs *regs, unsigned long address, int rc); +int __hash_page(unsigned long trap, unsigned long ea, unsigned long dsisr, unsigned long msr); int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid, pte_t *ptep, unsigned long trap, unsigned long flags, int ssize, unsigned int shift, unsigned int mmu_psize); @@ -521,6 +525,7 @@ void slb_dump_contents(struct slb_entry *slb_ptr); extern void slb_vmalloc_update(void); extern void slb_set_size(u16 size); +void preload_new_slb_context(unsigned long start, unsigned long sp); #endif /* __ASSEMBLY__ */ /* diff --git a/arch/powerpc/include/asm/book3s/64/mmu.h b/arch/powerpc/include/asm/book3s/64/mmu.h index 995bbcdd0ef8..eace8c3f7b0a 100644 --- a/arch/powerpc/include/asm/book3s/64/mmu.h +++ b/arch/powerpc/include/asm/book3s/64/mmu.h @@ -239,7 +239,7 @@ static inline void setup_initial_memory_limit(phys_addr_t first_memblock_base, #ifdef CONFIG_PPC_PSERIES extern void radix_init_pseries(void); #else -static inline void radix_init_pseries(void) { }; +static inline void radix_init_pseries(void) { } #endif #ifdef CONFIG_HOTPLUG_CPU diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h index a39886681629..058601efbc8a 100644 --- a/arch/powerpc/include/asm/book3s/64/pgtable.h +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h @@ -388,11 +388,28 @@ static inline int __ptep_test_and_clear_young(struct mm_struct *mm, #define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG #define ptep_test_and_clear_young(__vma, __addr, __ptep) \ ({ \ - int __r; \ - __r = __ptep_test_and_clear_young((__vma)->vm_mm, __addr, __ptep); \ - __r; \ + __ptep_test_and_clear_young((__vma)->vm_mm, __addr, __ptep); \ }) +/* + * On Book3S CPUs, clearing the accessed bit without a TLB flush + * doesn't cause data corruption. [ It could cause incorrect + * page aging and the (mistaken) reclaim of hot pages, but the + * chance of that should be relatively low. ] + * + * So as a performance optimization don't flush the TLB when + * clearing the accessed bit, it will eventually be flushed by + * a context switch or a VM operation anyway. [ In the rare + * event of it not getting flushed for a long time the delay + * shouldn't really matter because there's no real memory + * pressure for swapout to react to. ] + */ +#define __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH +#define ptep_clear_flush_young ptep_test_and_clear_young + +#define __HAVE_ARCH_PMDP_CLEAR_YOUNG_FLUSH +#define pmdp_clear_flush_young pmdp_test_and_clear_young + static inline int __pte_write(pte_t pte) { return !!(pte_raw(pte) & cpu_to_be64(_PAGE_WRITE)); diff --git a/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h b/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h index 94439e0cefc9..8b33601cdb9d 100644 --- a/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h +++ b/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h @@ -35,7 +35,7 @@ extern void radix__flush_pwc_lpid(unsigned int lpid); extern void radix__flush_all_lpid(unsigned int lpid); extern void radix__flush_all_lpid_guest(unsigned int lpid); #else -static inline void radix__tlbiel_all(unsigned int action) { WARN_ON(1); }; +static inline void radix__tlbiel_all(unsigned int action) { WARN_ON(1); } static inline void radix__flush_tlb_lpid_page(unsigned int lpid, unsigned long addr, unsigned long page_size) diff --git a/arch/powerpc/include/asm/book3s/64/tlbflush.h b/arch/powerpc/include/asm/book3s/64/tlbflush.h index dcb5c3839d2f..215973b4cb26 100644 --- a/arch/powerpc/include/asm/book3s/64/tlbflush.h +++ b/arch/powerpc/include/asm/book3s/64/tlbflush.h @@ -31,7 +31,7 @@ static inline void tlbiel_all(void) hash__tlbiel_all(TLB_INVAL_SCOPE_GLOBAL); } #else -static inline void tlbiel_all(void) { BUG(); }; +static inline void tlbiel_all(void) { BUG(); } #endif static inline void tlbiel_all_lpid(bool radix) diff --git a/arch/powerpc/include/asm/bug.h b/arch/powerpc/include/asm/bug.h index 464f8ca8a5c9..d1635ffbb179 100644 --- a/arch/powerpc/include/asm/bug.h +++ b/arch/powerpc/include/asm/bug.h @@ -111,12 +111,15 @@ #ifndef __ASSEMBLY__ struct pt_regs; -extern int do_page_fault(struct pt_regs *, unsigned long, unsigned long); -extern void bad_page_fault(struct pt_regs *, unsigned long, int); -void __bad_page_fault(struct pt_regs *regs, unsigned long address, int sig); +long do_page_fault(struct pt_regs *); +long hash__do_page_fault(struct pt_regs *); +void bad_page_fault(struct pt_regs *, int); +void __bad_page_fault(struct pt_regs *regs, int sig); +void do_bad_page_fault_segv(struct pt_regs *regs); extern void _exception(int, struct pt_regs *, int, unsigned long); extern void _exception_pkey(struct pt_regs *, unsigned long, int); extern void die(const char *, struct pt_regs *, long); +void die_mce(const char *str, struct pt_regs *regs, long err); extern bool die_will_crash(void); extern void panic_flush_kmsg_start(void); extern void panic_flush_kmsg_end(void); diff --git a/arch/powerpc/include/asm/cacheflush.h b/arch/powerpc/include/asm/cacheflush.h index 138e46d8c04e..f63495109f63 100644 --- a/arch/powerpc/include/asm/cacheflush.h +++ b/arch/powerpc/include/asm/cacheflush.h @@ -8,6 +8,12 @@ #include <asm/cputable.h> #include <asm/cpu_has_feature.h> +/* + * This flag is used to indicate that the page pointed to by a pte is clean + * and does not require cleaning before returning it to the user. + */ +#define PG_dcache_clean PG_arch_1 + #ifdef CONFIG_PPC_BOOK3S_64 /* * Book3s has no ptesync after setting a pte, so without this ptesync it's diff --git a/arch/powerpc/include/asm/cputime.h b/arch/powerpc/include/asm/cputime.h index ed75d1c318e3..504f7fe6711a 100644 --- a/arch/powerpc/include/asm/cputime.h +++ b/arch/powerpc/include/asm/cputime.h @@ -87,6 +87,17 @@ static notrace inline void account_cpu_user_exit(void) acct->starttime_user = tb; } +static notrace inline void account_stolen_time(void) +{ +#ifdef CONFIG_PPC_SPLPAR + if (firmware_has_feature(FW_FEATURE_SPLPAR)) { + struct lppaca *lp = local_paca->lppaca_ptr; + + if (unlikely(local_paca->dtl_ridx != be64_to_cpu(lp->dtl_idx))) + accumulate_stolen_time(); + } +#endif +} #endif /* __KERNEL__ */ #else /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */ @@ -96,5 +107,8 @@ static inline void account_cpu_user_entry(void) static inline void account_cpu_user_exit(void) { } +static notrace inline void account_stolen_time(void) +{ +} #endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */ #endif /* __POWERPC_CPUTIME_H */ diff --git a/arch/powerpc/include/asm/debug.h b/arch/powerpc/include/asm/debug.h index ec57daf87f40..86a14736c76c 100644 --- a/arch/powerpc/include/asm/debug.h +++ b/arch/powerpc/include/asm/debug.h @@ -50,10 +50,6 @@ bool ppc_breakpoint_available(void); #ifdef CONFIG_PPC_ADV_DEBUG_REGS extern void do_send_trap(struct pt_regs *regs, unsigned long address, unsigned long error_code, int brkpt); -#else - -extern void do_break(struct pt_regs *regs, unsigned long address, - unsigned long error_code); #endif #endif /* _ASM_POWERPC_DEBUG_H */ diff --git a/arch/powerpc/include/asm/firmware.h b/arch/powerpc/include/asm/firmware.h index aa6a5ef5d483..7604673787d6 100644 --- a/arch/powerpc/include/asm/firmware.h +++ b/arch/powerpc/include/asm/firmware.h @@ -137,7 +137,7 @@ extern unsigned int __start___fw_ftr_fixup, __stop___fw_ftr_fixup; #ifdef CONFIG_PPC_PSERIES void pseries_probe_fw_features(void); #else -static inline void pseries_probe_fw_features(void) { }; +static inline void pseries_probe_fw_features(void) { } #endif #endif /* __ASSEMBLY__ */ diff --git a/arch/powerpc/include/asm/hugetlb.h b/arch/powerpc/include/asm/hugetlb.h index 013165e62618..f18c543bc01d 100644 --- a/arch/powerpc/include/asm/hugetlb.h +++ b/arch/powerpc/include/asm/hugetlb.h @@ -17,8 +17,6 @@ extern bool hugetlb_disabled; void hugetlbpage_init_default(void); -void flush_dcache_icache_hugepage(struct page *page); - int slice_is_hugepage_only_range(struct mm_struct *mm, unsigned long addr, unsigned long len); diff --git a/arch/powerpc/include/asm/hw_irq.h b/arch/powerpc/include/asm/hw_irq.h index 0363734ff56e..56a98936a6a9 100644 --- a/arch/powerpc/include/asm/hw_irq.h +++ b/arch/powerpc/include/asm/hw_irq.h @@ -38,6 +38,8 @@ #define PACA_IRQ_MUST_HARD_MASK (PACA_IRQ_EE) #endif +#endif /* CONFIG_PPC64 */ + /* * flags for paca->irq_soft_mask */ @@ -46,18 +48,56 @@ #define IRQS_PMI_DISABLED 2 #define IRQS_ALL_DISABLED (IRQS_DISABLED | IRQS_PMI_DISABLED) -#endif /* CONFIG_PPC64 */ - #ifndef __ASSEMBLY__ -extern void replay_system_reset(void); -extern void replay_soft_interrupts(void); +static inline void __hard_irq_enable(void) +{ + if (IS_ENABLED(CONFIG_BOOKE) || IS_ENABLED(CONFIG_40x)) + wrtee(MSR_EE); + else if (IS_ENABLED(CONFIG_PPC_8xx)) + wrtspr(SPRN_EIE); + else if (IS_ENABLED(CONFIG_PPC_BOOK3S_64)) + __mtmsrd(MSR_EE | MSR_RI, 1); + else + mtmsr(mfmsr() | MSR_EE); +} -extern void timer_interrupt(struct pt_regs *); -extern void timer_broadcast_interrupt(void); -extern void performance_monitor_exception(struct pt_regs *regs); -extern void WatchdogException(struct pt_regs *regs); -extern void unknown_exception(struct pt_regs *regs); +static inline void __hard_irq_disable(void) +{ + if (IS_ENABLED(CONFIG_BOOKE) || IS_ENABLED(CONFIG_40x)) + wrtee(0); + else if (IS_ENABLED(CONFIG_PPC_8xx)) + wrtspr(SPRN_EID); + else if (IS_ENABLED(CONFIG_PPC_BOOK3S_64)) + __mtmsrd(MSR_RI, 1); + else + mtmsr(mfmsr() & ~MSR_EE); +} + +static inline void __hard_EE_RI_disable(void) +{ + if (IS_ENABLED(CONFIG_BOOKE) || IS_ENABLED(CONFIG_40x)) + wrtee(0); + else if (IS_ENABLED(CONFIG_PPC_8xx)) + wrtspr(SPRN_NRI); + else if (IS_ENABLED(CONFIG_PPC_BOOK3S_64)) + __mtmsrd(0, 1); + else + mtmsr(mfmsr() & ~(MSR_EE | MSR_RI)); +} + +static inline void __hard_RI_enable(void) +{ + if (IS_ENABLED(CONFIG_BOOKE) || IS_ENABLED(CONFIG_40x)) + return; + + if (IS_ENABLED(CONFIG_PPC_8xx)) + wrtspr(SPRN_EID); + else if (IS_ENABLED(CONFIG_PPC_BOOK3S_64)) + __mtmsrd(MSR_RI, 1); + else + mtmsr(mfmsr() | MSR_RI); +} #ifdef CONFIG_PPC64 #include <asm/paca.h> @@ -221,18 +261,6 @@ static inline bool arch_irqs_disabled(void) #endif /* CONFIG_PPC_BOOK3S */ -#ifdef CONFIG_PPC_BOOK3E -#define __hard_irq_enable() wrtee(MSR_EE) -#define __hard_irq_disable() wrtee(0) -#define __hard_EE_RI_disable() wrtee(0) -#define __hard_RI_enable() do { } while (0) -#else -#define __hard_irq_enable() __mtmsrd(MSR_EE|MSR_RI, 1) -#define __hard_irq_disable() __mtmsrd(MSR_RI, 1) -#define __hard_EE_RI_disable() __mtmsrd(0, 1) -#define __hard_RI_enable() __mtmsrd(MSR_RI, 1) -#endif - #define hard_irq_disable() do { \ unsigned long flags; \ __hard_irq_disable(); \ @@ -296,8 +324,17 @@ extern void irq_set_pending_from_srr1(unsigned long srr1); extern void force_external_irq_replay(void); +static inline void irq_soft_mask_regs_set_state(struct pt_regs *regs, unsigned long val) +{ + regs->softe = val; +} #else /* CONFIG_PPC64 */ +static inline notrace unsigned long irq_soft_mask_return(void) +{ + return 0; +} + static inline unsigned long arch_local_save_flags(void) { return mfmsr(); @@ -327,22 +364,12 @@ static inline unsigned long arch_local_irq_save(void) static inline void arch_local_irq_disable(void) { - if (IS_ENABLED(CONFIG_BOOKE)) - wrtee(0); - else if (IS_ENABLED(CONFIG_PPC_8xx)) - wrtspr(SPRN_EID); - else - mtmsr(mfmsr() & ~MSR_EE); + __hard_irq_disable(); } static inline void arch_local_irq_enable(void) { - if (IS_ENABLED(CONFIG_BOOKE)) - wrtee(MSR_EE); - else if (IS_ENABLED(CONFIG_PPC_8xx)) - wrtspr(SPRN_EIE); - else - mtmsr(mfmsr() | MSR_EE); + __hard_irq_enable(); } static inline bool arch_irqs_disabled_flags(unsigned long flags) @@ -364,6 +391,9 @@ static inline bool arch_irq_disabled_regs(struct pt_regs *regs) static inline void may_hard_irq_enable(void) { } +static inline void irq_soft_mask_regs_set_state(struct pt_regs *regs, unsigned long val) +{ +} #endif /* CONFIG_PPC64 */ #define ARCH_IRQ_INIT_FLAGS IRQ_NOREQUEST diff --git a/arch/powerpc/include/asm/interrupt.h b/arch/powerpc/include/asm/interrupt.h new file mode 100644 index 000000000000..aedfba29e43a --- /dev/null +++ b/arch/powerpc/include/asm/interrupt.h @@ -0,0 +1,449 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +#ifndef _ASM_POWERPC_INTERRUPT_H +#define _ASM_POWERPC_INTERRUPT_H + +#include <linux/context_tracking.h> +#include <linux/hardirq.h> +#include <asm/cputime.h> +#include <asm/ftrace.h> +#include <asm/kprobes.h> +#include <asm/runlatch.h> + +struct interrupt_state { +#ifdef CONFIG_PPC_BOOK3E_64 + enum ctx_state ctx_state; +#endif +}; + +static inline void booke_restore_dbcr0(void) +{ +#ifdef CONFIG_PPC_ADV_DEBUG_REGS + unsigned long dbcr0 = current->thread.debug.dbcr0; + + if (IS_ENABLED(CONFIG_PPC32) && unlikely(dbcr0 & DBCR0_IDM)) { + mtspr(SPRN_DBSR, -1); + mtspr(SPRN_DBCR0, global_dbcr0[smp_processor_id()]); + } +#endif +} + +static inline void interrupt_enter_prepare(struct pt_regs *regs, struct interrupt_state *state) +{ + /* + * Book3E reconciles irq soft mask in asm + */ +#ifdef CONFIG_PPC_BOOK3S_64 + if (irq_soft_mask_set_return(IRQS_ALL_DISABLED) == IRQS_ENABLED) + trace_hardirqs_off(); + local_paca->irq_happened |= PACA_IRQ_HARD_DIS; + + if (user_mode(regs)) { + CT_WARN_ON(ct_state() != CONTEXT_USER); + user_exit_irqoff(); + + account_cpu_user_entry(); + account_stolen_time(); + } else { + /* + * CT_WARN_ON comes here via program_check_exception, + * so avoid recursion. + */ + if (TRAP(regs) != 0x700) + CT_WARN_ON(ct_state() != CONTEXT_KERNEL); + } +#endif + +#ifdef CONFIG_PPC_BOOK3E_64 + state->ctx_state = exception_enter(); + if (user_mode(regs)) + account_cpu_user_entry(); +#endif +} + +/* + * Care should be taken to note that interrupt_exit_prepare and + * interrupt_async_exit_prepare do not necessarily return immediately to + * regs context (e.g., if regs is usermode, we don't necessarily return to + * user mode). Other interrupts might be taken between here and return, + * context switch / preemption may occur in the exit path after this, or a + * signal may be delivered, etc. + * + * The real interrupt exit code is platform specific, e.g., + * interrupt_exit_user_prepare / interrupt_exit_kernel_prepare for 64s. + * + * However interrupt_nmi_exit_prepare does return directly to regs, because + * NMIs do not do "exit work" or replay soft-masked interrupts. + */ +static inline void interrupt_exit_prepare(struct pt_regs *regs, struct interrupt_state *state) +{ +#ifdef CONFIG_PPC_BOOK3E_64 + exception_exit(state->ctx_state); +#endif + + /* + * Book3S exits to user via interrupt_exit_user_prepare(), which does + * context tracking, which is a cleaner way to handle PREEMPT=y + * and avoid context entry/exit in e.g., preempt_schedule_irq()), + * which is likely to be where the core code wants to end up. + * + * The above comment explains why we can't do the + * + * if (user_mode(regs)) + * user_exit_irqoff(); + * + * sequence here. + */ +} + +static inline void interrupt_async_enter_prepare(struct pt_regs *regs, struct interrupt_state *state) +{ +#ifdef CONFIG_PPC_BOOK3S_64 + if (cpu_has_feature(CPU_FTR_CTRL) && + !test_thread_local_flags(_TLF_RUNLATCH)) + __ppc64_runlatch_on(); +#endif + + interrupt_enter_prepare(regs, state); + irq_enter(); +} + +static inline void interrupt_async_exit_prepare(struct pt_regs *regs, struct interrupt_state *state) +{ + irq_exit(); + interrupt_exit_prepare(regs, state); +} + +struct interrupt_nmi_state { +#ifdef CONFIG_PPC64 +#ifdef CONFIG_PPC_BOOK3S_64 + u8 irq_soft_mask; + u8 irq_happened; +#endif + u8 ftrace_enabled; +#endif +}; + +static inline void interrupt_nmi_enter_prepare(struct pt_regs *regs, struct interrupt_nmi_state *state) +{ +#ifdef CONFIG_PPC64 +#ifdef CONFIG_PPC_BOOK3S_64 + state->irq_soft_mask = local_paca->irq_soft_mask; + state->irq_happened = local_paca->irq_happened; + + /* + * Set IRQS_ALL_DISABLED unconditionally so irqs_disabled() does + * the right thing, and set IRQ_HARD_DIS. We do not want to reconcile + * because that goes through irq tracing which we don't want in NMI. + */ + local_paca->irq_soft_mask = IRQS_ALL_DISABLED; + local_paca->irq_happened |= PACA_IRQ_HARD_DIS; + + /* Don't do any per-CPU operations until interrupt state is fixed */ +#endif + /* Allow DEC and PMI to be traced when they are soft-NMI */ + if (TRAP(regs) != 0x900 && TRAP(regs) != 0xf00 && TRAP(regs) != 0x260) { + state->ftrace_enabled = this_cpu_get_ftrace_enabled(); + this_cpu_set_ftrace_enabled(0); + } +#endif + + /* + * Do not use nmi_enter() for pseries hash guest taking a real-mode + * NMI because not everything it touches is within the RMA limit. + */ + if (!IS_ENABLED(CONFIG_PPC_BOOK3S_64) || + !firmware_has_feature(FW_FEATURE_LPAR) || + radix_enabled() || (mfmsr() & MSR_DR)) + nmi_enter(); +} + +static inline void interrupt_nmi_exit_prepare(struct pt_regs *regs, struct interrupt_nmi_state *state) +{ + if (!IS_ENABLED(CONFIG_PPC_BOOK3S_64) || + !firmware_has_feature(FW_FEATURE_LPAR) || + radix_enabled() || (mfmsr() & MSR_DR)) + nmi_exit(); + +#ifdef CONFIG_PPC64 + if (TRAP(regs) != 0x900 && TRAP(regs) != 0xf00 && TRAP(regs) != 0x260) + this_cpu_set_ftrace_enabled(state->ftrace_enabled); + +#ifdef CONFIG_PPC_BOOK3S_64 + /* Check we didn't change the pending interrupt mask. */ + WARN_ON_ONCE((state->irq_happened | PACA_IRQ_HARD_DIS) != local_paca->irq_happened); + local_paca->irq_happened = state->irq_happened; + local_paca->irq_soft_mask = state->irq_soft_mask; +#endif +#endif +} + +/* + * Don't use noinstr here like x86, but rather add NOKPROBE_SYMBOL to each + * function definition. The reason for this is the noinstr section is placed + * after the main text section, i.e., very far away from the interrupt entry + * asm. That creates problems with fitting linker stubs when building large + * kernels. + */ +#define interrupt_handler __visible noinline notrace __no_kcsan __no_sanitize_address + +/** + * DECLARE_INTERRUPT_HANDLER_RAW - Declare raw interrupt handler function + * @func: Function name of the entry point + * @returns: Returns a value back to asm caller + */ +#define DECLARE_INTERRUPT_HANDLER_RAW(func) \ + __visible long func(struct pt_regs *regs) + +/** + * DEFINE_INTERRUPT_HANDLER_RAW - Define raw interrupt handler function + * @func: Function name of the entry point + * @returns: Returns a value back to asm caller + * + * @func is called from ASM entry code. + * + * This is a plain function which does no tracing, reconciling, etc. + * The macro is written so it acts as function definition. Append the + * body with a pair of curly brackets. + * + * raw interrupt handlers must not enable or disable interrupts, or + * schedule, tracing and instrumentation (ftrace, lockdep, etc) would + * not be advisable either, although may be possible in a pinch, the + * trace will look odd at least. + * + * A raw handler may call one of the other interrupt handler functions + * to be converted into that interrupt context without these restrictions. + * + * On PPC64, _RAW handlers may return with fast_interrupt_return. + * + * Specific handlers may have additional restrictions. + */ +#define DEFINE_INTERRUPT_HANDLER_RAW(func) \ +static __always_inline long ____##func(struct pt_regs *regs); \ + \ +interrupt_handler long func(struct pt_regs *regs) \ +{ \ + long ret; \ + \ + ret = ____##func (regs); \ + \ + return ret; \ +} \ +NOKPROBE_SYMBOL(func); \ + \ +static __always_inline long ____##func(struct pt_regs *regs) + +/** + * DECLARE_INTERRUPT_HANDLER - Declare synchronous interrupt handler function + * @func: Function name of the entry point + */ +#define DECLARE_INTERRUPT_HANDLER(func) \ + __visible void func(struct pt_regs *regs) + +/** + * DEFINE_INTERRUPT_HANDLER - Define synchronous interrupt handler function + * @func: Function name of the entry point + * + * @func is called from ASM entry code. + * + * The macro is written so it acts as function definition. Append the + * body with a pair of curly brackets. + */ +#define DEFINE_INTERRUPT_HANDLER(func) \ +static __always_inline void ____##func(struct pt_regs *regs); \ + \ +interrupt_handler void func(struct pt_regs *regs) \ +{ \ + struct interrupt_state state; \ + \ + interrupt_enter_prepare(regs, &state); \ + \ + ____##func (regs); \ + \ + interrupt_exit_prepare(regs, &state); \ +} \ +NOKPROBE_SYMBOL(func); \ + \ +static __always_inline void ____##func(struct pt_regs *regs) + +/** + * DECLARE_INTERRUPT_HANDLER_RET - Declare synchronous interrupt handler function + * @func: Function name of the entry point + * @returns: Returns a value back to asm caller + */ +#define DECLARE_INTERRUPT_HANDLER_RET(func) \ + __visible long func(struct pt_regs *regs) + +/** + * DEFINE_INTERRUPT_HANDLER_RET - Define synchronous interrupt handler function + * @func: Function name of the entry point + * @returns: Returns a value back to asm caller + * + * @func is called from ASM entry code. + * + * The macro is written so it acts as function definition. Append the + * body with a pair of curly brackets. + */ +#define DEFINE_INTERRUPT_HANDLER_RET(func) \ +static __always_inline long ____##func(struct pt_regs *regs); \ + \ +interrupt_handler long func(struct pt_regs *regs) \ +{ \ + struct interrupt_state state; \ + long ret; \ + \ + interrupt_enter_prepare(regs, &state); \ + \ + ret = ____##func (regs); \ + \ + interrupt_exit_prepare(regs, &state); \ + \ + return ret; \ +} \ +NOKPROBE_SYMBOL(func); \ + \ +static __always_inline long ____##func(struct pt_regs *regs) + +/** + * DECLARE_INTERRUPT_HANDLER_ASYNC - Declare asynchronous interrupt handler function + * @func: Function name of the entry point + */ +#define DECLARE_INTERRUPT_HANDLER_ASYNC(func) \ + __visible void func(struct pt_regs *regs) + +/** + * DEFINE_INTERRUPT_HANDLER_ASYNC - Define asynchronous interrupt handler function + * @func: Function name of the entry point + * + * @func is called from ASM entry code. + * + * The macro is written so it acts as function definition. Append the + * body with a pair of curly brackets. + */ +#define DEFINE_INTERRUPT_HANDLER_ASYNC(func) \ +static __always_inline void ____##func(struct pt_regs *regs); \ + \ +interrupt_handler void func(struct pt_regs *regs) \ +{ \ + struct interrupt_state state; \ + \ + interrupt_async_enter_prepare(regs, &state); \ + \ + ____##func (regs); \ + \ + interrupt_async_exit_prepare(regs, &state); \ +} \ +NOKPROBE_SYMBOL(func); \ + \ +static __always_inline void ____##func(struct pt_regs *regs) + +/** + * DECLARE_INTERRUPT_HANDLER_NMI - Declare NMI interrupt handler function + * @func: Function name of the entry point + * @returns: Returns a value back to asm caller + */ +#define DECLARE_INTERRUPT_HANDLER_NMI(func) \ + __visible long func(struct pt_regs *regs) + +/** + * DEFINE_INTERRUPT_HANDLER_NMI - Define NMI interrupt handler function + * @func: Function name of the entry point + * @returns: Returns a value back to asm caller + * + * @func is called from ASM entry code. + * + * The macro is written so it acts as function definition. Append the + * body with a pair of curly brackets. + */ +#define DEFINE_INTERRUPT_HANDLER_NMI(func) \ +static __always_inline long ____##func(struct pt_regs *regs); \ + \ +interrupt_handler long func(struct pt_regs *regs) \ +{ \ + struct interrupt_nmi_state state; \ + long ret; \ + \ + interrupt_nmi_enter_prepare(regs, &state); \ + \ + ret = ____##func (regs); \ + \ + interrupt_nmi_exit_prepare(regs, &state); \ + \ + return ret; \ +} \ +NOKPROBE_SYMBOL(func); \ + \ +static __always_inline long ____##func(struct pt_regs *regs) + + +/* Interrupt handlers */ +/* kernel/traps.c */ +DECLARE_INTERRUPT_HANDLER_NMI(system_reset_exception); +#ifdef CONFIG_PPC_BOOK3S_64 +DECLARE_INTERRUPT_HANDLER_ASYNC(machine_check_exception); +#else +DECLARE_INTERRUPT_HANDLER_NMI(machine_check_exception); +#endif +DECLARE_INTERRUPT_HANDLER(SMIException); +DECLARE_INTERRUPT_HANDLER(handle_hmi_exception); +DECLARE_INTERRUPT_HANDLER(unknown_exception); +DECLARE_INTERRUPT_HANDLER_ASYNC(unknown_async_exception); +DECLARE_INTERRUPT_HANDLER(instruction_breakpoint_exception); +DECLARE_INTERRUPT_HANDLER(RunModeException); +DECLARE_INTERRUPT_HANDLER(single_step_exception); +DECLARE_INTERRUPT_HANDLER(program_check_exception); +DECLARE_INTERRUPT_HANDLER(emulation_assist_interrupt); +DECLARE_INTERRUPT_HANDLER(alignment_exception); +DECLARE_INTERRUPT_HANDLER(StackOverflow); +DECLARE_INTERRUPT_HANDLER(stack_overflow_exception); +DECLARE_INTERRUPT_HANDLER(kernel_fp_unavailable_exception); +DECLARE_INTERRUPT_HANDLER(altivec_unavailable_exception); +DECLARE_INTERRUPT_HANDLER(vsx_unavailable_exception); +DECLARE_INTERRUPT_HANDLER(facility_unavailable_exception); +DECLARE_INTERRUPT_HANDLER(fp_unavailable_tm); +DECLARE_INTERRUPT_HANDLER(altivec_unavailable_tm); +DECLARE_INTERRUPT_HANDLER(vsx_unavailable_tm); +DECLARE_INTERRUPT_HANDLER_NMI(performance_monitor_exception_nmi); +DECLARE_INTERRUPT_HANDLER_ASYNC(performance_monitor_exception_async); +DECLARE_INTERRUPT_HANDLER_RAW(performance_monitor_exception); +DECLARE_INTERRUPT_HANDLER(DebugException); +DECLARE_INTERRUPT_HANDLER(altivec_assist_exception); +DECLARE_INTERRUPT_HANDLER(CacheLockingException); +DECLARE_INTERRUPT_HANDLER(SPEFloatingPointException); +DECLARE_INTERRUPT_HANDLER(SPEFloatingPointRoundException); +DECLARE_INTERRUPT_HANDLER(unrecoverable_exception); +DECLARE_INTERRUPT_HANDLER(WatchdogException); +DECLARE_INTERRUPT_HANDLER(kernel_bad_stack); + +/* slb.c */ +DECLARE_INTERRUPT_HANDLER_RAW(do_slb_fault); +DECLARE_INTERRUPT_HANDLER(do_bad_slb_fault); + +/* hash_utils.c */ +DECLARE_INTERRUPT_HANDLER_RAW(do_hash_fault); + +/* fault.c */ +DECLARE_INTERRUPT_HANDLER_RET(do_page_fault); +DECLARE_INTERRUPT_HANDLER(do_bad_page_fault_segv); + +/* process.c */ +DECLARE_INTERRUPT_HANDLER(do_break); + +/* time.c */ +DECLARE_INTERRUPT_HANDLER_ASYNC(timer_interrupt); + +/* mce.c */ +DECLARE_INTERRUPT_HANDLER_NMI(machine_check_early); +DECLARE_INTERRUPT_HANDLER_NMI(hmi_exception_realmode); + +DECLARE_INTERRUPT_HANDLER_ASYNC(TAUException); + +void replay_system_reset(void); +void replay_soft_interrupts(void); + +static inline void interrupt_cond_local_irq_enable(struct pt_regs *regs) +{ + if (!arch_irq_disabled_regs(regs)) + local_irq_enable(); +} + +#endif /* _ASM_POWERPC_INTERRUPT_H */ diff --git a/arch/powerpc/include/asm/kexec.h b/arch/powerpc/include/asm/kexec.h index 55d6ede30c19..9ab344d29a54 100644 --- a/arch/powerpc/include/asm/kexec.h +++ b/arch/powerpc/include/asm/kexec.h @@ -136,6 +136,7 @@ int load_crashdump_segments_ppc64(struct kimage *image, int setup_purgatory_ppc64(struct kimage *image, const void *slave_code, const void *fdt, unsigned long kernel_load_addr, unsigned long fdt_load_addr); +unsigned int kexec_fdt_totalsize_ppc64(struct kimage *image); int setup_new_fdt_ppc64(const struct kimage *image, void *fdt, unsigned long initrd_load_addr, unsigned long initrd_len, const char *cmdline); diff --git a/arch/powerpc/include/asm/kup.h b/arch/powerpc/include/asm/kup.h index bf221a2a523e..7ec21af49a45 100644 --- a/arch/powerpc/include/asm/kup.h +++ b/arch/powerpc/include/asm/kup.h @@ -91,6 +91,7 @@ static __always_inline void setup_kup(void) static inline void allow_read_from_user(const void __user *from, unsigned long size) { + barrier_nospec(); allow_user_access(NULL, from, size, KUAP_READ); } @@ -102,6 +103,7 @@ static inline void allow_write_to_user(void __user *to, unsigned long size) static inline void allow_read_write_user(void __user *to, const void __user *from, unsigned long size) { + barrier_nospec(); allow_user_access(to, from, size, KUAP_READ_WRITE); } diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h index d32ec9ae73bd..2f5f919f6cd3 100644 --- a/arch/powerpc/include/asm/kvm_book3s.h +++ b/arch/powerpc/include/asm/kvm_book3s.h @@ -277,6 +277,13 @@ extern int kvmppc_hcall_impl_hv_realmode(unsigned long cmd); extern void kvmppc_copy_to_svcpu(struct kvm_vcpu *vcpu); extern void kvmppc_copy_from_svcpu(struct kvm_vcpu *vcpu); +long kvmppc_read_intr(void); +void kvmppc_bad_interrupt(struct pt_regs *regs); +void kvmhv_p9_set_lpcr(struct kvm_split_mode *sip); +void kvmhv_p9_restore_lpcr(struct kvm_split_mode *sip); +void kvmppc_set_msr_hv(struct kvm_vcpu *vcpu, u64 msr); +void kvmppc_inject_interrupt_hv(struct kvm_vcpu *vcpu, int vec, u64 srr1_flags); + #ifdef CONFIG_PPC_TRANSACTIONAL_MEM void kvmppc_save_tm_pr(struct kvm_vcpu *vcpu); void kvmppc_restore_tm_pr(struct kvm_vcpu *vcpu); diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index df4bda867bab..8aacd76bb702 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -629,9 +629,9 @@ extern int h_ipi_redirect; static inline struct kvmppc_passthru_irqmap *kvmppc_get_passthru_irqmap( struct kvm *kvm) { return NULL; } -static inline void kvmppc_alloc_host_rm_ops(void) {}; -static inline void kvmppc_free_host_rm_ops(void) {}; -static inline void kvmppc_free_pimap(struct kvm *kvm) {}; +static inline void kvmppc_alloc_host_rm_ops(void) {} +static inline void kvmppc_free_host_rm_ops(void) {} +static inline void kvmppc_free_pimap(struct kvm *kvm) {} static inline int kvmppc_xics_rm_complete(struct kvm_vcpu *vcpu, u32 hcall) { return 0; } static inline int kvmppc_xics_enabled(struct kvm_vcpu *vcpu) @@ -883,9 +883,9 @@ static inline void kvmppc_mmu_flush_icache(kvm_pfn_t pfn) /* Clear i-cache for new pages */ page = pfn_to_page(pfn); - if (!test_bit(PG_arch_1, &page->flags)) { + if (!test_bit(PG_dcache_clean, &page->flags)) { flush_dcache_icache_page(page); - set_bit(PG_arch_1, &page->flags); + set_bit(PG_dcache_clean, &page->flags); } } diff --git a/arch/powerpc/include/asm/machdep.h b/arch/powerpc/include/asm/machdep.h index cf6ebbc16cb4..764f2732a821 100644 --- a/arch/powerpc/include/asm/machdep.h +++ b/arch/powerpc/include/asm/machdep.h @@ -59,6 +59,9 @@ struct machdep_calls { int (*pcibios_root_bridge_prepare)(struct pci_host_bridge *bridge); + /* finds all the pci_controllers present at boot */ + void (*discover_phbs)(void); + /* To setup PHBs when using automatic OF platform driver for PCI */ int (*pci_setup_phb)(struct pci_controller *host); diff --git a/arch/powerpc/include/asm/mce.h b/arch/powerpc/include/asm/mce.h index e6c27ae843dc..331d944280b8 100644 --- a/arch/powerpc/include/asm/mce.h +++ b/arch/powerpc/include/asm/mce.h @@ -204,7 +204,18 @@ struct mce_error_info { bool ignore_event; }; -#define MAX_MC_EVT 100 +#define MAX_MC_EVT 10 + +struct mce_info { + int mce_nest_count; + struct machine_check_event mce_event[MAX_MC_EVT]; + /* Queue for delayed MCE events. */ + int mce_queue_count; + struct machine_check_event mce_event_queue[MAX_MC_EVT]; + /* Queue for delayed MCE UE events. */ + int mce_ue_count; + struct machine_check_event mce_ue_event_queue[MAX_MC_EVT]; +}; /* Release flags for get_mce_event() */ #define MCE_EVENT_RELEASE true @@ -234,4 +245,11 @@ long __machine_check_early_realmode_p8(struct pt_regs *regs); long __machine_check_early_realmode_p9(struct pt_regs *regs); long __machine_check_early_realmode_p10(struct pt_regs *regs); #endif /* CONFIG_PPC_BOOK3S_64 */ + +#ifdef CONFIG_PPC_BOOK3S_64 +void mce_init(void); +#else +static inline void mce_init(void) { }; +#endif /* CONFIG_PPC_BOOK3S_64 */ + #endif /* __ASM_PPC64_MCE_H__ */ diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h index d5821834dba9..652ce85f9410 100644 --- a/arch/powerpc/include/asm/mmu_context.h +++ b/arch/powerpc/include/asm/mmu_context.h @@ -282,9 +282,6 @@ static inline bool arch_vma_access_permitted(struct vm_area_struct *vma, } #define pkey_mm_init(mm) -#define thread_pkey_regs_save(thread) -#define thread_pkey_regs_restore(new_thread, old_thread) -#define thread_pkey_regs_init(thread) #define arch_dup_pkeys(oldmm, mm) static inline u64 pte_to_hpte_pkey_bits(u64 pteflags, unsigned long flags) diff --git a/arch/powerpc/include/asm/nmi.h b/arch/powerpc/include/asm/nmi.h index 84b4cfe73edd..160abcb8e9fa 100644 --- a/arch/powerpc/include/asm/nmi.h +++ b/arch/powerpc/include/asm/nmi.h @@ -4,6 +4,7 @@ #ifdef CONFIG_PPC_WATCHDOG extern void arch_touch_nmi_watchdog(void); +long soft_nmi_interrupt(struct pt_regs *regs); #else static inline void arch_touch_nmi_watchdog(void) {} #endif diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h index 9454d29ff4b4..ec18ac818e3a 100644 --- a/arch/powerpc/include/asm/paca.h +++ b/arch/powerpc/include/asm/paca.h @@ -29,6 +29,7 @@ #include <asm/hmi.h> #include <asm/cpuidle.h> #include <asm/atomic.h> +#include <asm/mce.h> #include <asm-generic/mmiowb_types.h> @@ -108,8 +109,7 @@ struct paca_struct { */ /* used for most interrupts/exceptions */ u64 exgen[EX_SIZE] __attribute__((aligned(0x80))); - u64 exslb[EX_SIZE]; /* used for SLB/segment table misses - * on the linear mapping */ + /* SLB related definitions */ u16 vmalloc_sllp; u8 slb_cache_ptr; @@ -273,6 +273,9 @@ struct paca_struct { #ifdef CONFIG_MMIOWB struct mmiowb_state mmiowb_state; #endif +#ifdef CONFIG_PPC_BOOK3S_64 + struct mce_info *mce_info; +#endif /* CONFIG_PPC_BOOK3S_64 */ } ____cacheline_aligned; extern void copy_mm_to_paca(struct mm_struct *mm); @@ -285,9 +288,9 @@ extern void free_unused_pacas(void); #else /* CONFIG_PPC64 */ -static inline void allocate_paca_ptrs(void) { }; -static inline void allocate_paca(int cpu) { }; -static inline void free_unused_pacas(void) { }; +static inline void allocate_paca_ptrs(void) { } +static inline void allocate_paca(int cpu) { } +static inline void free_unused_pacas(void) { } #endif /* CONFIG_PPC64 */ diff --git a/arch/powerpc/include/asm/paravirt.h b/arch/powerpc/include/asm/paravirt.h index edc08f04aef7..5d1726bb28e7 100644 --- a/arch/powerpc/include/asm/paravirt.h +++ b/arch/powerpc/include/asm/paravirt.h @@ -10,6 +10,7 @@ #endif #ifdef CONFIG_PPC_SPLPAR +#include <linux/smp.h> #include <asm/kvm_guest.h> #include <asm/cputhreads.h> diff --git a/arch/powerpc/include/asm/perf_event.h b/arch/powerpc/include/asm/perf_event.h index daec64d41b44..164e910bf654 100644 --- a/arch/powerpc/include/asm/perf_event.h +++ b/arch/powerpc/include/asm/perf_event.h @@ -14,6 +14,7 @@ #include <asm/perf_event_server.h> #else static inline bool is_sier_available(void) { return false; } +static inline unsigned long get_pmcs_ext_regs(int idx) { return 0; } #endif #ifdef CONFIG_FSL_EMB_PERF_EVENT @@ -40,6 +41,7 @@ static inline bool is_sier_available(void) { return false; } /* To support perf_regs sier update */ extern bool is_sier_available(void); +extern unsigned long get_pmcs_ext_regs(int idx); /* To define perf extended regs mask value */ extern u64 PERF_REG_EXTENDED_MASK; #define PERF_REG_EXTENDED_MASK PERF_REG_EXTENDED_MASK diff --git a/arch/powerpc/include/asm/perf_event_server.h b/arch/powerpc/include/asm/perf_event_server.h index 3b7baba01c92..00e7e671bb4b 100644 --- a/arch/powerpc/include/asm/perf_event_server.h +++ b/arch/powerpc/include/asm/perf_event_server.h @@ -36,9 +36,9 @@ struct power_pmu { unsigned long test_adder; int (*compute_mmcr)(u64 events[], int n_ev, unsigned int hwc[], struct mmcr_regs *mmcr, - struct perf_event *pevents[]); + struct perf_event *pevents[], u32 flags); int (*get_constraint)(u64 event_id, unsigned long *mskp, - unsigned long *valp); + unsigned long *valp, u64 event_config1); int (*get_alternatives)(u64 event_id, unsigned int flags, u64 alt[]); void (*get_mem_data_src)(union perf_mem_data_src *dsrc, @@ -83,6 +83,7 @@ struct power_pmu { #define PPMU_NO_SIAR 0x00000100 /* Do not use SIAR */ #define PPMU_ARCH_31 0x00000200 /* Has MMCR3, SIER2 and SIER3 */ #define PPMU_P10_DD1 0x00000400 /* Is power10 DD1 processor version */ +#define PPMU_HAS_ATTR_CONFIG1 0x00000800 /* Using config1 attribute */ /* * Values for flags to get_alternatives() diff --git a/arch/powerpc/include/asm/pgtable.h b/arch/powerpc/include/asm/pgtable.h index f7613f43c9cf..4eed82172e33 100644 --- a/arch/powerpc/include/asm/pgtable.h +++ b/arch/powerpc/include/asm/pgtable.h @@ -162,6 +162,9 @@ static inline bool is_ioremap_addr(const void *x) return addr >= IOREMAP_BASE && addr < IOREMAP_END; } + +struct seq_file; +void arch_report_meminfo(struct seq_file *m); #endif /* CONFIG_PPC64 */ #endif /* __ASSEMBLY__ */ diff --git a/arch/powerpc/include/asm/pkeys.h b/arch/powerpc/include/asm/pkeys.h index a7951049e129..59a2c7dbc78f 100644 --- a/arch/powerpc/include/asm/pkeys.h +++ b/arch/powerpc/include/asm/pkeys.h @@ -169,10 +169,4 @@ static inline bool arch_pkeys_enabled(void) } extern void pkey_mm_init(struct mm_struct *mm); -extern bool arch_supports_pkeys(int cap); -extern unsigned int arch_usable_pkeys(void); -extern void thread_pkey_regs_save(struct thread_struct *thread); -extern void thread_pkey_regs_restore(struct thread_struct *new_thread, - struct thread_struct *old_thread); -extern void thread_pkey_regs_init(struct thread_struct *thread); #endif /*_ASM_POWERPC_KEYS_H */ diff --git a/arch/powerpc/include/asm/ppc-pci.h b/arch/powerpc/include/asm/ppc-pci.h index 7f4be5a05eb3..2b9edbf6e929 100644 --- a/arch/powerpc/include/asm/ppc-pci.h +++ b/arch/powerpc/include/asm/ppc-pci.h @@ -13,10 +13,6 @@ extern unsigned long isa_io_base; -extern void pci_setup_phb_io(struct pci_controller *hose, int primary); -extern void pci_setup_phb_io_dynamic(struct pci_controller *hose, int primary); - - extern struct list_head hose_list; extern struct pci_dev *isa_bridge_pcidev; /* may be NULL if no ISA bus */ @@ -32,9 +28,6 @@ struct pci_dn; void *pci_traverse_device_nodes(struct device_node *start, void *(*fn)(struct device_node *, void *), void *data); -void *traverse_pci_dn(struct pci_dn *root, - void *(*fn)(struct pci_dn *, void *), - void *data); extern void pci_devs_phb_init_dynamic(struct pci_controller *phb); /* From rtas_pci.h */ diff --git a/arch/powerpc/include/asm/ppc_asm.h b/arch/powerpc/include/asm/ppc_asm.h index cc1bca571332..3dceb64fc9af 100644 --- a/arch/powerpc/include/asm/ppc_asm.h +++ b/arch/powerpc/include/asm/ppc_asm.h @@ -25,7 +25,6 @@ #ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE #define ACCOUNT_CPU_USER_ENTRY(ptr, ra, rb) #define ACCOUNT_CPU_USER_EXIT(ptr, ra, rb) -#define ACCOUNT_STOLEN_TIME #else #define ACCOUNT_CPU_USER_ENTRY(ptr, ra, rb) \ MFTB(ra); /* get timebase */ \ @@ -44,29 +43,6 @@ PPC_LL ra, ACCOUNT_SYSTEM_TIME(ptr); \ add ra,ra,rb; /* add on to system time */ \ PPC_STL ra, ACCOUNT_SYSTEM_TIME(ptr) - -#ifdef CONFIG_PPC_SPLPAR -#define ACCOUNT_STOLEN_TIME \ -BEGIN_FW_FTR_SECTION; \ - beq 33f; \ - /* from user - see if there are any DTL entries to process */ \ - ld r10,PACALPPACAPTR(r13); /* get ptr to VPA */ \ - ld r11,PACA_DTL_RIDX(r13); /* get log read index */ \ - addi r10,r10,LPPACA_DTLIDX; \ - LDX_BE r10,0,r10; /* get log write index */ \ - cmpd cr1,r11,r10; \ - beq+ cr1,33f; \ - bl accumulate_stolen_time; \ - ld r12,_MSR(r1); \ - andi. r10,r12,MSR_PR; /* Restore cr0 (coming from user) */ \ -33: \ -END_FW_FTR_SECTION_IFSET(FW_FEATURE_SPLPAR) - -#else /* CONFIG_PPC_SPLPAR */ -#define ACCOUNT_STOLEN_TIME - -#endif /* CONFIG_PPC_SPLPAR */ - #endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */ /* diff --git a/arch/powerpc/include/asm/ptrace.h b/arch/powerpc/include/asm/ptrace.h index 58f9dc060a7b..975ba260006a 100644 --- a/arch/powerpc/include/asm/ptrace.h +++ b/arch/powerpc/include/asm/ptrace.h @@ -70,6 +70,9 @@ struct pt_regs }; #endif + +#define STACK_FRAME_WITH_PT_REGS (STACK_FRAME_OVERHEAD + sizeof(struct pt_regs)) + #ifdef __powerpc64__ /* @@ -229,6 +232,11 @@ static inline bool trap_is_scv(struct pt_regs *regs) return (IS_ENABLED(CONFIG_PPC_BOOK3S_64) && TRAP(regs) == 0x3000); } +static inline bool trap_is_unsupported_scv(struct pt_regs *regs) +{ + return IS_ENABLED(CONFIG_PPC_BOOK3S_64) && TRAP(regs) == 0x7ff0; +} + static inline bool trap_is_syscall(struct pt_regs *regs) { return (trap_is_scv(regs) || TRAP(regs) == 0xc00); diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index e40a921d78f9..da103e92c112 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -1375,6 +1375,7 @@ #define mtmsr(v) asm volatile("mtmsr %0" : \ : "r" ((unsigned long)(v)) \ : "memory") +#define __mtmsrd(v, l) BUILD_BUG() #define __MTMSR "mtmsr" #endif @@ -1413,13 +1414,24 @@ static inline void msr_check_and_clear(unsigned long bits) } #ifdef CONFIG_PPC32 -#define mfsrin(v) ({unsigned int rval; \ - asm volatile("mfsrin %0,%1" : "=r" (rval) : "r" (v)); \ - rval;}) +static inline u32 mfsr(u32 idx) +{ + u32 val; + + if (__builtin_constant_p(idx)) + asm volatile("mfsr %0, %1" : "=r" (val): "i" (idx >> 28)); + else + asm volatile("mfsrin %0, %1" : "=r" (val): "r" (idx)); -static inline void mtsrin(u32 val, u32 idx) + return val; +} + +static inline void mtsr(u32 val, u32 idx) { - asm volatile("mtsrin %0, %1" : : "r" (val), "r" (idx)); + if (__builtin_constant_p(idx)) + asm volatile("mtsr %1, %0" : : "r" (val), "i" (idx >> 28)); + else + asm volatile("mtsrin %0, %1" : : "r" (val), "r" (idx)); } #endif diff --git a/arch/powerpc/include/asm/reg_booke.h b/arch/powerpc/include/asm/reg_booke.h index 262782f08fd4..17b8dcd9a40d 100644 --- a/arch/powerpc/include/asm/reg_booke.h +++ b/arch/powerpc/include/asm/reg_booke.h @@ -691,6 +691,9 @@ #define mttmr(rn, v) asm volatile(MTTMR(rn, %0) : \ : "r" ((unsigned long)(v)) \ : "memory") + +extern unsigned long global_dbcr0[]; + #endif /* !__ASSEMBLY__ */ #endif /* __ASM_POWERPC_REG_BOOKE_H__ */ diff --git a/arch/powerpc/include/asm/rtas.h b/arch/powerpc/include/asm/rtas.h index 332e1000ca0f..658448ca5b8a 100644 --- a/arch/powerpc/include/asm/rtas.h +++ b/arch/powerpc/include/asm/rtas.h @@ -369,7 +369,7 @@ void rtas_initialize(void); #else static inline int page_is_rtas_user_buf(unsigned long pfn) { return 0;} static inline void pSeries_coalesce_init(void) { } -static inline void rtas_initialize(void) { }; +static inline void rtas_initialize(void) { } #endif extern int call_rtas(const char *, int, int, unsigned long *, ...); diff --git a/arch/powerpc/include/asm/setup.h b/arch/powerpc/include/asm/setup.h index a466749703f1..e89bfebd4e00 100644 --- a/arch/powerpc/include/asm/setup.h +++ b/arch/powerpc/include/asm/setup.h @@ -58,7 +58,7 @@ void do_rfi_flush_fixups(enum l1d_flush_type types); #ifdef CONFIG_PPC_BARRIER_NOSPEC void setup_barrier_nospec(void); #else -static inline void setup_barrier_nospec(void) { }; +static inline void setup_barrier_nospec(void) { } #endif void do_uaccess_flush_fixups(enum l1d_flush_type types); void do_entry_flush_fixups(enum l1d_flush_type types); @@ -68,13 +68,13 @@ extern bool barrier_nospec_enabled; #ifdef CONFIG_PPC_BARRIER_NOSPEC void do_barrier_nospec_fixups_range(bool enable, void *start, void *end); #else -static inline void do_barrier_nospec_fixups_range(bool enable, void *start, void *end) { }; +static inline void do_barrier_nospec_fixups_range(bool enable, void *start, void *end) { } #endif #ifdef CONFIG_PPC_FSL_BOOK3E void setup_spectre_v2(void); #else -static inline void setup_spectre_v2(void) {}; +static inline void setup_spectre_v2(void) {} #endif void do_btb_flush_fixups(void); diff --git a/arch/powerpc/include/asm/simple_spinlock.h b/arch/powerpc/include/asm/simple_spinlock.h index 9c3c30534333..5b862de29dff 100644 --- a/arch/powerpc/include/asm/simple_spinlock.h +++ b/arch/powerpc/include/asm/simple_spinlock.h @@ -90,8 +90,8 @@ static inline int arch_spin_trylock(arch_spinlock_t *lock) void splpar_spin_yield(arch_spinlock_t *lock); void splpar_rw_yield(arch_rwlock_t *lock); #else /* SPLPAR */ -static inline void splpar_spin_yield(arch_spinlock_t *lock) {}; -static inline void splpar_rw_yield(arch_rwlock_t *lock) {}; +static inline void splpar_spin_yield(arch_spinlock_t *lock) {} +static inline void splpar_rw_yield(arch_rwlock_t *lock) {} #endif static inline void spin_yield(arch_spinlock_t *lock) diff --git a/arch/powerpc/include/asm/smp.h b/arch/powerpc/include/asm/smp.h index c4e2d53acd2b..7a13bc20f0a0 100644 --- a/arch/powerpc/include/asm/smp.h +++ b/arch/powerpc/include/asm/smp.h @@ -236,7 +236,7 @@ static inline void set_hard_smp_processor_id(int cpu, int phys) #if defined(CONFIG_PPC64) && (defined(CONFIG_SMP) || defined(CONFIG_KEXEC_CORE)) extern void smp_release_cpus(void); #else -static inline void smp_release_cpus(void) { }; +static inline void smp_release_cpus(void) { } #endif extern int smt_enabled_at_boot; diff --git a/arch/powerpc/include/asm/thread_info.h b/arch/powerpc/include/asm/thread_info.h index 3d8a47af7a25..386d576673a1 100644 --- a/arch/powerpc/include/asm/thread_info.h +++ b/arch/powerpc/include/asm/thread_info.h @@ -94,7 +94,6 @@ void arch_setup_new_exec(void); #define TIF_PATCH_PENDING 6 /* pending live patching update */ #define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */ #define TIF_SINGLESTEP 8 /* singlestepping active */ -#define TIF_NOHZ 9 /* in adaptive nohz mode */ #define TIF_SECCOMP 10 /* secure computing */ #define TIF_RESTOREALL 11 /* Restore all regs (implies NOERROR) */ #define TIF_NOERROR 12 /* Force successful syscall return */ @@ -128,11 +127,10 @@ void arch_setup_new_exec(void); #define _TIF_UPROBE (1<<TIF_UPROBE) #define _TIF_SYSCALL_TRACEPOINT (1<<TIF_SYSCALL_TRACEPOINT) #define _TIF_EMULATE_STACK_STORE (1<<TIF_EMULATE_STACK_STORE) -#define _TIF_NOHZ (1<<TIF_NOHZ) #define _TIF_SYSCALL_EMU (1<<TIF_SYSCALL_EMU) #define _TIF_SYSCALL_DOTRACE (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | \ _TIF_SECCOMP | _TIF_SYSCALL_TRACEPOINT | \ - _TIF_NOHZ | _TIF_SYSCALL_EMU) + _TIF_SYSCALL_EMU) #define _TIF_USER_WORK_MASK (_TIF_SIGPENDING | _TIF_NEED_RESCHED | \ _TIF_NOTIFY_RESUME | _TIF_UPROBE | \ diff --git a/arch/powerpc/include/asm/time.h b/arch/powerpc/include/asm/time.h index 8f789b597bae..8dd3cdb25338 100644 --- a/arch/powerpc/include/asm/time.h +++ b/arch/powerpc/include/asm/time.h @@ -102,6 +102,8 @@ DECLARE_PER_CPU(u64, decrementers_next_tb); /* Convert timebase ticks to nanoseconds */ unsigned long long tb_to_ns(unsigned long long tb_ticks); +void timer_broadcast_interrupt(void); + /* SPLPAR */ void accumulate_stolen_time(void); diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h index 501c9a79038c..78e2a3990eab 100644 --- a/arch/powerpc/include/asm/uaccess.h +++ b/arch/powerpc/include/asm/uaccess.h @@ -52,8 +52,6 @@ static inline bool __access_ok(unsigned long addr, unsigned long size) __get_user_nocheck((x), (ptr), sizeof(*(ptr)), true) #define __put_user(x, ptr) \ __put_user_nocheck((__typeof__(*(ptr)))(x), (ptr), sizeof(*(ptr))) -#define __put_user_goto(x, ptr, label) \ - __put_user_nocheck_goto((__typeof__(*(ptr)))(x), (ptr), sizeof(*(ptr)), label) #define __get_user_allowed(x, ptr) \ __get_user_nocheck((x), (ptr), sizeof(*(ptr)), false) @@ -110,22 +108,18 @@ static inline bool __access_ok(unsigned long addr, unsigned long size) extern long __put_user_bad(void); -#define __put_user_size_allowed(x, ptr, size, retval) \ +#define __put_user_size(x, ptr, size, retval) \ do { \ __label__ __pu_failed; \ \ retval = 0; \ + allow_write_to_user(ptr, size); \ __put_user_size_goto(x, ptr, size, __pu_failed); \ + prevent_write_to_user(ptr, size); \ break; \ \ __pu_failed: \ retval = -EFAULT; \ -} while (0) - -#define __put_user_size(x, ptr, size, retval) \ -do { \ - allow_write_to_user(ptr, size); \ - __put_user_size_allowed(x, ptr, size, retval); \ prevent_write_to_user(ptr, size); \ } while (0) @@ -213,11 +207,9 @@ do { \ } \ } while (0) -#define __put_user_nocheck_goto(x, ptr, size, label) \ +#define __unsafe_put_user_goto(x, ptr, size, label) \ do { \ __typeof__(*(ptr)) __user *__pu_addr = (ptr); \ - if (!is_kernel_addr((unsigned long)__pu_addr)) \ - might_fault(); \ __chk_user_ptr(ptr); \ __put_user_size_goto((x), __pu_addr, (size), label); \ } while (0) @@ -313,9 +305,8 @@ do { \ __typeof__(size) __gu_size = (size); \ \ __chk_user_ptr(__gu_addr); \ - if (!is_kernel_addr((unsigned long)__gu_addr)) \ + if (do_allow && !is_kernel_addr((unsigned long)__gu_addr)) \ might_fault(); \ - barrier_nospec(); \ if (do_allow) \ __get_user_size(__gu_val, __gu_addr, __gu_size, __gu_err); \ else \ @@ -333,10 +324,8 @@ do { \ __typeof__(size) __gu_size = (size); \ \ might_fault(); \ - if (access_ok(__gu_addr, __gu_size)) { \ - barrier_nospec(); \ + if (access_ok(__gu_addr, __gu_size)) \ __get_user_size(__gu_val, __gu_addr, __gu_size, __gu_err); \ - } \ (x) = (__force __typeof__(*(ptr)))__gu_val; \ \ __gu_err; \ @@ -350,7 +339,6 @@ do { \ __typeof__(size) __gu_size = (size); \ \ __chk_user_ptr(__gu_addr); \ - barrier_nospec(); \ __get_user_size(__gu_val, __gu_addr, __gu_size, __gu_err); \ (x) = (__force __typeof__(*(ptr)))__gu_val; \ \ @@ -395,7 +383,6 @@ raw_copy_in_user(void __user *to, const void __user *from, unsigned long n) { unsigned long ret; - barrier_nospec(); allow_read_write_user(to, from, n); ret = __copy_tofrom_user(to, from, n); prevent_read_write_user(to, from, n); @@ -407,32 +394,7 @@ static inline unsigned long raw_copy_from_user(void *to, const void __user *from, unsigned long n) { unsigned long ret; - if (__builtin_constant_p(n) && (n <= 8)) { - ret = 1; - - switch (n) { - case 1: - barrier_nospec(); - __get_user_size(*(u8 *)to, from, 1, ret); - break; - case 2: - barrier_nospec(); - __get_user_size(*(u16 *)to, from, 2, ret); - break; - case 4: - barrier_nospec(); - __get_user_size(*(u32 *)to, from, 4, ret); - break; - case 8: - barrier_nospec(); - __get_user_size(*(u64 *)to, from, 8, ret); - break; - } - if (ret == 0) - return 0; - } - barrier_nospec(); allow_read_from_user(from, n); ret = __copy_tofrom_user((__force void __user *)to, from, n); prevent_read_from_user(from, n); @@ -440,39 +402,12 @@ static inline unsigned long raw_copy_from_user(void *to, } static inline unsigned long -raw_copy_to_user_allowed(void __user *to, const void *from, unsigned long n) -{ - if (__builtin_constant_p(n) && (n <= 8)) { - unsigned long ret = 1; - - switch (n) { - case 1: - __put_user_size_allowed(*(u8 *)from, (u8 __user *)to, 1, ret); - break; - case 2: - __put_user_size_allowed(*(u16 *)from, (u16 __user *)to, 2, ret); - break; - case 4: - __put_user_size_allowed(*(u32 *)from, (u32 __user *)to, 4, ret); - break; - case 8: - __put_user_size_allowed(*(u64 *)from, (u64 __user *)to, 8, ret); - break; - } - if (ret == 0) - return 0; - } - - return __copy_tofrom_user(to, (__force const void __user *)from, n); -} - -static inline unsigned long raw_copy_to_user(void __user *to, const void *from, unsigned long n) { unsigned long ret; allow_write_to_user(to, n); - ret = raw_copy_to_user_allowed(to, from, n); + ret = __copy_tofrom_user(to, (__force const void __user *)from, n); prevent_write_to_user(to, n); return ret; } @@ -508,6 +443,9 @@ static __must_check inline bool user_access_begin(const void __user *ptr, size_t { if (unlikely(!access_ok(ptr, len))) return false; + + might_fault(); + allow_read_write_user((void __user *)ptr, ptr, len); return true; } @@ -521,6 +459,9 @@ user_read_access_begin(const void __user *ptr, size_t len) { if (unlikely(!access_ok(ptr, len))) return false; + + might_fault(); + allow_read_from_user(ptr, len); return true; } @@ -532,6 +473,9 @@ user_write_access_begin(const void __user *ptr, size_t len) { if (unlikely(!access_ok(ptr, len))) return false; + + might_fault(); + allow_write_to_user((void __user *)ptr, len); return true; } @@ -540,7 +484,8 @@ user_write_access_begin(const void __user *ptr, size_t len) #define unsafe_op_wrap(op, err) do { if (unlikely(op)) goto err; } while (0) #define unsafe_get_user(x, p, e) unsafe_op_wrap(__get_user_allowed(x, p), e) -#define unsafe_put_user(x, p, e) __put_user_goto(x, p, e) +#define unsafe_put_user(x, p, e) \ + __unsafe_put_user_goto((__typeof__(*(p)))(x), (p), sizeof(*(p)), e) #define unsafe_copy_to_user(d, s, l, e) \ do { \ @@ -550,17 +495,17 @@ do { \ int _i; \ \ for (_i = 0; _i < (_len & ~(sizeof(long) - 1)); _i += sizeof(long)) \ - __put_user_goto(*(long*)(_src + _i), (long __user *)(_dst + _i), e);\ + unsafe_put_user(*(long*)(_src + _i), (long __user *)(_dst + _i), e); \ if (IS_ENABLED(CONFIG_PPC64) && (_len & 4)) { \ - __put_user_goto(*(u32*)(_src + _i), (u32 __user *)(_dst + _i), e); \ + unsafe_put_user(*(u32*)(_src + _i), (u32 __user *)(_dst + _i), e); \ _i += 4; \ } \ if (_len & 2) { \ - __put_user_goto(*(u16*)(_src + _i), (u16 __user *)(_dst + _i), e); \ + unsafe_put_user(*(u16*)(_src + _i), (u16 __user *)(_dst + _i), e); \ _i += 2; \ } \ if (_len & 1) \ - __put_user_goto(*(u8*)(_src + _i), (u8 __user *)(_dst + _i), e);\ + unsafe_put_user(*(u8*)(_src + _i), (u8 __user *)(_dst + _i), e); \ } while (0) #define HAVE_GET_KERNEL_NOFAULT diff --git a/arch/powerpc/include/asm/vdso/timebase.h b/arch/powerpc/include/asm/vdso/timebase.h index 881f655caa0a..891c9d5eaabe 100644 --- a/arch/powerpc/include/asm/vdso/timebase.h +++ b/arch/powerpc/include/asm/vdso/timebase.h @@ -43,12 +43,6 @@ #define mttbl(v) asm volatile("mttbl %0":: "r"(v)) #define mttbu(v) asm volatile("mttbu %0":: "r"(v)) -/* For compatibility, get_tbl() is defined as get_tb() on ppc64 */ -static inline unsigned long get_tbl(void) -{ - return mftb(); -} - static __always_inline u64 get_tb(void) { unsigned int tbhi, tblo, tbhi2; diff --git a/arch/powerpc/include/asm/xmon.h b/arch/powerpc/include/asm/xmon.h index 454a7fc6113b..68bfb2361f03 100644 --- a/arch/powerpc/include/asm/xmon.h +++ b/arch/powerpc/include/asm/xmon.h @@ -17,8 +17,8 @@ struct pt_regs; extern int xmon(struct pt_regs *excp); extern irqreturn_t xmon_irq(int, void *); #else -static inline void xmon_setup(void) { }; -static inline void xmon_register_spus(struct list_head *list) { }; +static inline void xmon_setup(void) { } +static inline void xmon_register_spus(struct list_head *list) { } #endif #if defined(CONFIG_XMON) && defined(CONFIG_SMP) diff --git a/arch/powerpc/include/uapi/asm/perf_regs.h b/arch/powerpc/include/uapi/asm/perf_regs.h index bdf5f10f8b9f..578b3ee86105 100644 --- a/arch/powerpc/include/uapi/asm/perf_regs.h +++ b/arch/powerpc/include/uapi/asm/perf_regs.h @@ -55,17 +55,33 @@ enum perf_event_powerpc_regs { PERF_REG_POWERPC_MMCR3, PERF_REG_POWERPC_SIER2, PERF_REG_POWERPC_SIER3, + PERF_REG_POWERPC_PMC1, + PERF_REG_POWERPC_PMC2, + PERF_REG_POWERPC_PMC3, + PERF_REG_POWERPC_PMC4, + PERF_REG_POWERPC_PMC5, + PERF_REG_POWERPC_PMC6, /* Max regs without the extended regs */ PERF_REG_POWERPC_MAX = PERF_REG_POWERPC_MMCRA + 1, }; #define PERF_REG_PMU_MASK ((1ULL << PERF_REG_POWERPC_MAX) - 1) -/* PERF_REG_EXTENDED_MASK value for CPU_FTR_ARCH_300 */ -#define PERF_REG_PMU_MASK_300 (((1ULL << (PERF_REG_POWERPC_MMCR2 + 1)) - 1) - PERF_REG_PMU_MASK) -/* PERF_REG_EXTENDED_MASK value for CPU_FTR_ARCH_31 */ -#define PERF_REG_PMU_MASK_31 (((1ULL << (PERF_REG_POWERPC_SIER3 + 1)) - 1) - PERF_REG_PMU_MASK) +/* Exclude MMCR3, SIER2, SIER3 for CPU_FTR_ARCH_300 */ +#define PERF_EXCLUDE_REG_EXT_300 (7ULL << PERF_REG_POWERPC_MMCR3) -#define PERF_REG_MAX_ISA_300 (PERF_REG_POWERPC_MMCR2 + 1) -#define PERF_REG_MAX_ISA_31 (PERF_REG_POWERPC_SIER3 + 1) +/* + * PERF_REG_EXTENDED_MASK value for CPU_FTR_ARCH_300 + * includes 9 SPRS from MMCR0 to PMC6 excluding the + * unsupported SPRS in PERF_EXCLUDE_REG_EXT_300. + */ +#define PERF_REG_PMU_MASK_300 ((0xfffULL << PERF_REG_POWERPC_MMCR0) - PERF_EXCLUDE_REG_EXT_300) + +/* + * PERF_REG_EXTENDED_MASK value for CPU_FTR_ARCH_31 + * includes 12 SPRs from MMCR0 to PMC6. + */ +#define PERF_REG_PMU_MASK_31 (0xfffULL << PERF_REG_POWERPC_MMCR0) + +#define PERF_REG_EXTENDED_MAX (PERF_REG_POWERPC_PMC6 + 1) #endif /* _UAPI_ASM_POWERPC_PERF_REGS_H */ diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index 79ee7750937d..6084fa499aa3 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -46,10 +46,10 @@ obj-y := cputable.o syscalls.o \ prom.o traps.o setup-common.o \ udbg.o misc.o io.o misc_$(BITS).o \ of_platform.o prom_parse.o firmware.o \ - hw_breakpoint_constraints.o + hw_breakpoint_constraints.o interrupt.o obj-y += ptrace/ obj-$(CONFIG_PPC64) += setup_64.o \ - paca.o nvram_64.o note.o syscall_64.o + paca.o nvram_64.o note.o obj-$(CONFIG_COMPAT) += sys_ppc32.o signal_32.o obj-$(CONFIG_VDSO32) += vdso32_wrapper.o obj-$(CONFIG_PPC_WATCHDOG) += watchdog.o diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index b690c70f061c..f3a662201a9f 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -255,7 +255,6 @@ int main(void) #endif /* CONFIG_PPC_MM_SLICES */ OFFSET(PACA_EXGEN, paca_struct, exgen); OFFSET(PACA_EXMC, paca_struct, exmc); - OFFSET(PACA_EXSLB, paca_struct, exslb); OFFSET(PACA_EXNMI, paca_struct, exnmi); #ifdef CONFIG_PPC_PSERIES OFFSET(PACALPPACAPTR, paca_struct, lppaca_ptr); @@ -309,7 +308,7 @@ int main(void) /* Interrupt register frame */ DEFINE(INT_FRAME_SIZE, STACK_INT_FRAME_SIZE); - DEFINE(SWITCH_FRAME_SIZE, STACK_FRAME_OVERHEAD + sizeof(struct pt_regs)); + DEFINE(SWITCH_FRAME_SIZE, STACK_FRAME_WITH_PT_REGS); STACK_PT_REGS_OFFSET(GPR0, gpr[0]); STACK_PT_REGS_OFFSET(GPR1, gpr[1]); STACK_PT_REGS_OFFSET(GPR2, gpr[2]); diff --git a/arch/powerpc/kernel/dbell.c b/arch/powerpc/kernel/dbell.c index 52680cf07c9d..5545c9cd17c1 100644 --- a/arch/powerpc/kernel/dbell.c +++ b/arch/powerpc/kernel/dbell.c @@ -12,17 +12,17 @@ #include <linux/hardirq.h> #include <asm/dbell.h> +#include <asm/interrupt.h> #include <asm/irq_regs.h> #include <asm/kvm_ppc.h> #include <asm/trace.h> #ifdef CONFIG_SMP -void doorbell_exception(struct pt_regs *regs) +DEFINE_INTERRUPT_HANDLER_ASYNC(doorbell_exception) { struct pt_regs *old_regs = set_irq_regs(regs); - irq_enter(); trace_doorbell_entry(regs); ppc_msgsync(); @@ -35,13 +35,12 @@ void doorbell_exception(struct pt_regs *regs) smp_ipi_demux_relaxed(); /* already performed the barrier */ trace_doorbell_exit(regs); - irq_exit(); + set_irq_regs(old_regs); } #else /* CONFIG_SMP */ -void doorbell_exception(struct pt_regs *regs) +DEFINE_INTERRUPT_HANDLER_ASYNC(doorbell_exception) { printk(KERN_WARNING "Received doorbell on non-smp system\n"); } #endif /* CONFIG_SMP */ - diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c index 813713c9120c..cd60bc1c8701 100644 --- a/arch/powerpc/kernel/eeh.c +++ b/arch/powerpc/kernel/eeh.c @@ -1596,6 +1596,35 @@ static int proc_eeh_show(struct seq_file *m, void *v) } #ifdef CONFIG_DEBUG_FS + + +static struct pci_dev *eeh_debug_lookup_pdev(struct file *filp, + const char __user *user_buf, + size_t count, loff_t *ppos) +{ + uint32_t domain, bus, dev, fn; + struct pci_dev *pdev; + char buf[20]; + int ret; + + memset(buf, 0, sizeof(buf)); + ret = simple_write_to_buffer(buf, sizeof(buf)-1, ppos, user_buf, count); + if (!ret) + return ERR_PTR(-EFAULT); + + ret = sscanf(buf, "%x:%x:%x.%x", &domain, &bus, &dev, &fn); + if (ret != 4) { + pr_err("%s: expected 4 args, got %d\n", __func__, ret); + return ERR_PTR(-EINVAL); + } + + pdev = pci_get_domain_bus_and_slot(domain, bus, (dev << 3) | fn); + if (!pdev) + return ERR_PTR(-ENODEV); + + return pdev; +} + static int eeh_enable_dbgfs_set(void *data, u64 val) { if (val) @@ -1688,26 +1717,13 @@ static ssize_t eeh_dev_check_write(struct file *filp, const char __user *user_buf, size_t count, loff_t *ppos) { - uint32_t domain, bus, dev, fn; struct pci_dev *pdev; struct eeh_dev *edev; - char buf[20]; int ret; - memset(buf, 0, sizeof(buf)); - ret = simple_write_to_buffer(buf, sizeof(buf)-1, ppos, user_buf, count); - if (!ret) - return -EFAULT; - - ret = sscanf(buf, "%x:%x:%x.%x", &domain, &bus, &dev, &fn); - if (ret != 4) { - pr_err("%s: expected 4 args, got %d\n", __func__, ret); - return -EINVAL; - } - - pdev = pci_get_domain_bus_and_slot(domain, bus, (dev << 3) | fn); - if (!pdev) - return -ENODEV; + pdev = eeh_debug_lookup_pdev(filp, user_buf, count, ppos); + if (IS_ERR(pdev)) + return PTR_ERR(pdev); edev = pci_dev_to_eeh_dev(pdev); if (!edev) { @@ -1717,8 +1733,8 @@ static ssize_t eeh_dev_check_write(struct file *filp, } ret = eeh_dev_check_failure(edev); - pci_info(pdev, "eeh_dev_check_failure(%04x:%02x:%02x.%01x) = %d\n", - domain, bus, dev, fn, ret); + pci_info(pdev, "eeh_dev_check_failure(%s) = %d\n", + pci_name(pdev), ret); pci_dev_put(pdev); @@ -1829,25 +1845,12 @@ static ssize_t eeh_dev_break_write(struct file *filp, const char __user *user_buf, size_t count, loff_t *ppos) { - uint32_t domain, bus, dev, fn; struct pci_dev *pdev; - char buf[20]; int ret; - memset(buf, 0, sizeof(buf)); - ret = simple_write_to_buffer(buf, sizeof(buf)-1, ppos, user_buf, count); - if (!ret) - return -EFAULT; - - ret = sscanf(buf, "%x:%x:%x.%x", &domain, &bus, &dev, &fn); - if (ret != 4) { - pr_err("%s: expected 4 args, got %d\n", __func__, ret); - return -EINVAL; - } - - pdev = pci_get_domain_bus_and_slot(domain, bus, (dev << 3) | fn); - if (!pdev) - return -ENODEV; + pdev = eeh_debug_lookup_pdev(filp, user_buf, count, ppos); + if (IS_ERR(pdev)) + return PTR_ERR(pdev); ret = eeh_debugfs_break_device(pdev); pci_dev_put(pdev); @@ -1865,6 +1868,53 @@ static const struct file_operations eeh_dev_break_fops = { .read = eeh_debugfs_dev_usage, }; +static ssize_t eeh_dev_can_recover(struct file *filp, + const char __user *user_buf, + size_t count, loff_t *ppos) +{ + struct pci_driver *drv; + struct pci_dev *pdev; + size_t ret; + + pdev = eeh_debug_lookup_pdev(filp, user_buf, count, ppos); + if (IS_ERR(pdev)) + return PTR_ERR(pdev); + + /* + * In order for error recovery to work the driver needs to implement + * .error_detected(), so it can quiesce IO to the device, and + * .slot_reset() so it can re-initialise the device after a reset. + * + * Ideally they'd implement .resume() too, but some drivers which + * we need to support (notably IPR) don't so I guess we can tolerate + * that. + * + * .mmio_enabled() is mostly there as a work-around for devices which + * take forever to re-init after a hot reset. Implementing that is + * strictly optional. + */ + drv = pci_dev_driver(pdev); + if (drv && + drv->err_handler && + drv->err_handler->error_detected && + drv->err_handler->slot_reset) { + ret = count; + } else { + ret = -EOPNOTSUPP; + } + + pci_dev_put(pdev); + + return ret; +} + +static const struct file_operations eeh_dev_can_recover_fops = { + .open = simple_open, + .llseek = no_llseek, + .write = eeh_dev_can_recover, + .read = eeh_debugfs_dev_usage, +}; + #endif static int __init eeh_init_proc(void) @@ -1889,6 +1939,9 @@ static int __init eeh_init_proc(void) debugfs_create_file_unsafe("eeh_force_recover", 0600, powerpc_debugfs_root, NULL, &eeh_force_recover_fops); + debugfs_create_file_unsafe("eeh_dev_can_recover", 0600, + powerpc_debugfs_root, NULL, + &eeh_dev_can_recover_fops); eeh_cache_debugfs_init(); #endif } diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S index 1c9b0ccc2172..78c430b7f9d9 100644 --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -175,14 +175,11 @@ transfer_to_handler: addi r11,r11,global_dbcr0@l #ifdef CONFIG_SMP lwz r9,TASK_CPU(r2) - slwi r9,r9,3 + slwi r9,r9,2 add r11,r11,r9 #endif lwz r12,0(r11) mtspr SPRN_DBCR0,r12 - lwz r12,4(r11) - addi r12,r12,-1 - stw r12,4(r11) #endif b 3f @@ -276,8 +273,7 @@ reenable_mmu: * We save a bunch of GPRs, * r3 can be different from GPR3(r1) at this point, r9 and r11 * contains the old MSR and handler address respectively, - * r4 & r5 can contain page fault arguments that need to be passed - * along as well. r0, r6-r8, r12, CCR, CTR, XER etc... are left + * r0, r4-r8, r12, CCR, CTR, XER etc... are left * clobbered as they aren't useful past this point. */ @@ -285,15 +281,11 @@ reenable_mmu: stw r9,8(r1) stw r11,12(r1) stw r3,16(r1) - stw r4,20(r1) - stw r5,24(r1) /* If we are disabling interrupts (normal case), simply log it with * lockdep */ 1: bl trace_hardirqs_off - lwz r5,24(r1) - lwz r4,20(r1) lwz r3,16(r1) lwz r11,12(r1) lwz r9,8(r1) @@ -334,132 +326,29 @@ stack_ovf: _ASM_NOKPROBE_SYMBOL(stack_ovf) #endif -#ifdef CONFIG_TRACE_IRQFLAGS -trace_syscall_entry_irq_off: - /* - * Syscall shouldn't happen while interrupts are disabled, - * so let's do a warning here. - */ -0: trap - EMIT_BUG_ENTRY 0b,__FILE__,__LINE__, BUGFLAG_WARNING - bl trace_hardirqs_on - - /* Now enable for real */ - LOAD_REG_IMMEDIATE(r10, MSR_KERNEL | MSR_EE) - mtmsr r10 - - REST_GPR(0, r1) - REST_4GPRS(3, r1) - REST_2GPRS(7, r1) - b DoSyscall -#endif /* CONFIG_TRACE_IRQFLAGS */ - .globl transfer_to_syscall transfer_to_syscall: -#ifdef CONFIG_TRACE_IRQFLAGS - andi. r12,r9,MSR_EE - beq- trace_syscall_entry_irq_off -#endif /* CONFIG_TRACE_IRQFLAGS */ + SAVE_NVGPRS(r1) +#ifdef CONFIG_PPC_BOOK3S_32 + kuep_lock r11, r12 +#endif -/* - * Handle a system call. - */ - .stabs "arch/powerpc/kernel/",N_SO,0,0,0f - .stabs "entry_32.S",N_SO,0,0,0f -0: - -_GLOBAL(DoSyscall) - stw r3,ORIG_GPR3(r1) - li r12,0 - stw r12,RESULT(r1) -#ifdef CONFIG_TRACE_IRQFLAGS - /* Make sure interrupts are enabled */ - mfmsr r11 - andi. r12,r11,MSR_EE - /* We came in with interrupts disabled, we WARN and mark them enabled - * for lockdep now */ -0: tweqi r12, 0 - EMIT_BUG_ENTRY 0b,__FILE__,__LINE__, BUGFLAG_WARNING -#endif /* CONFIG_TRACE_IRQFLAGS */ - lwz r11,TI_FLAGS(r2) - andi. r11,r11,_TIF_SYSCALL_DOTRACE - bne- syscall_dotrace -syscall_dotrace_cont: - cmplwi 0,r0,NR_syscalls - lis r10,sys_call_table@h - ori r10,r10,sys_call_table@l - slwi r0,r0,2 - bge- 66f - - barrier_nospec_asm - /* - * Prevent the load of the handler below (based on the user-passed - * system call number) being speculatively executed until the test - * against NR_syscalls and branch to .66f above has - * committed. - */ + /* Calling convention has r9 = orig r0, r10 = regs */ + addi r10,r1,STACK_FRAME_OVERHEAD + mr r9,r0 + stw r10,THREAD+PT_REGS(r2) + bl system_call_exception - lwzx r10,r10,r0 /* Fetch system call handler [ptr] */ - mtlr r10 - addi r9,r1,STACK_FRAME_OVERHEAD - PPC440EP_ERR42 - blrl /* Call handler */ - .globl ret_from_syscall ret_from_syscall: -#ifdef CONFIG_DEBUG_RSEQ - /* Check whether the syscall is issued inside a restartable sequence */ - stw r3,GPR3(r1) - addi r3,r1,STACK_FRAME_OVERHEAD - bl rseq_syscall - lwz r3,GPR3(r1) -#endif - mr r6,r3 - /* disable interrupts so current_thread_info()->flags can't change */ - LOAD_REG_IMMEDIATE(r10,MSR_KERNEL) /* doesn't include MSR_EE */ - /* Note: We don't bother telling lockdep about it */ - mtmsr r10 - lwz r9,TI_FLAGS(r2) - li r8,-MAX_ERRNO - andi. r0,r9,(_TIF_SYSCALL_DOTRACE|_TIF_SINGLESTEP|_TIF_USER_WORK_MASK|_TIF_PERSYSCALL_MASK) - bne- syscall_exit_work - cmplw 0,r3,r8 - blt+ syscall_exit_cont - lwz r11,_CCR(r1) /* Load CR */ - neg r3,r3 - oris r11,r11,0x1000 /* Set SO bit in CR */ - stw r11,_CCR(r1) -syscall_exit_cont: - lwz r8,_MSR(r1) -#ifdef CONFIG_TRACE_IRQFLAGS - /* If we are going to return from the syscall with interrupts - * off, we trace that here. It shouldn't normally happen. - */ - andi. r10,r8,MSR_EE - bne+ 1f - stw r3,GPR3(r1) - bl trace_hardirqs_off - lwz r3,GPR3(r1) -1: -#endif /* CONFIG_TRACE_IRQFLAGS */ -#if defined(CONFIG_4xx) || defined(CONFIG_BOOKE) - /* If the process has its own DBCR0 value, load it up. The internal - debug mode bit tells us that dbcr0 should be loaded. */ - lwz r0,THREAD+THREAD_DBCR0(r2) - andis. r10,r0,DBCR0_IDM@h - bnel- load_dbcr0 -#endif + addi r4,r1,STACK_FRAME_OVERHEAD + li r5,0 + bl syscall_exit_prepare #ifdef CONFIG_PPC_47x lis r4,icache_44x_need_flush@ha lwz r5,icache_44x_need_flush@l(r4) cmplwi cr0,r5,0 bne- 2f #endif /* CONFIG_PPC_47x */ -1: -BEGIN_FTR_SECTION - lwarx r7,0,r1 -END_FTR_SECTION_IFSET(CPU_FTR_NEED_PAIRED_STWCX) - stwcx. r0,0,r1 /* to clear the reservation */ - ACCOUNT_CPU_USER_EXIT(r2, r5, r7) #ifdef CONFIG_PPC_BOOK3S_32 kuep_unlock r5, r7 #endif @@ -467,21 +356,36 @@ END_FTR_SECTION_IFSET(CPU_FTR_NEED_PAIRED_STWCX) lwz r4,_LINK(r1) lwz r5,_CCR(r1) mtlr r4 - mtcr r5 lwz r7,_NIP(r1) - lwz r2,GPR2(r1) - lwz r1,GPR1(r1) + lwz r8,_MSR(r1) + cmpwi r3,0 + lwz r3,GPR3(r1) syscall_exit_finish: -#if defined(CONFIG_PPC_8xx) && defined(CONFIG_PERF_EVENTS) - mtspr SPRN_NRI, r0 -#endif mtspr SPRN_SRR0,r7 mtspr SPRN_SRR1,r8 + + bne 3f + mtcr r5 + +1: lwz r2,GPR2(r1) + lwz r1,GPR1(r1) rfi #ifdef CONFIG_40x b . /* Prevent prefetch past rfi */ #endif -_ASM_NOKPROBE_SYMBOL(syscall_exit_finish) + +3: mtcr r5 + lwz r4,_CTR(r1) + lwz r5,_XER(r1) + REST_NVGPRS(r1) + mtctr r4 + mtxer r5 + lwz r0,GPR0(r1) + lwz r3,GPR3(r1) + REST_8GPRS(4,r1) + lwz r12,GPR12(r1) + b 1b + #ifdef CONFIG_44x 2: li r7,0 iccci r0,r0 @@ -489,9 +393,6 @@ _ASM_NOKPROBE_SYMBOL(syscall_exit_finish) b 1b #endif /* CONFIG_44x */ -66: li r3,-ENOSYS - b ret_from_syscall - .globl ret_from_fork ret_from_fork: REST_NVGPRS(r1) @@ -510,157 +411,6 @@ ret_from_kernel_thread: li r3,0 b ret_from_syscall -/* Traced system call support */ -syscall_dotrace: - SAVE_NVGPRS(r1) - li r0,0xc00 - stw r0,_TRAP(r1) - addi r3,r1,STACK_FRAME_OVERHEAD - bl do_syscall_trace_enter - /* - * Restore argument registers possibly just changed. - * We use the return value of do_syscall_trace_enter - * for call number to look up in the table (r0). - */ - mr r0,r3 - lwz r3,GPR3(r1) - lwz r4,GPR4(r1) - lwz r5,GPR5(r1) - lwz r6,GPR6(r1) - lwz r7,GPR7(r1) - lwz r8,GPR8(r1) - REST_NVGPRS(r1) - - cmplwi r0,NR_syscalls - /* Return code is already in r3 thanks to do_syscall_trace_enter() */ - bge- ret_from_syscall - b syscall_dotrace_cont - -syscall_exit_work: - andi. r0,r9,_TIF_RESTOREALL - beq+ 0f - REST_NVGPRS(r1) - b 2f -0: cmplw 0,r3,r8 - blt+ 1f - andi. r0,r9,_TIF_NOERROR - bne- 1f - lwz r11,_CCR(r1) /* Load CR */ - neg r3,r3 - oris r11,r11,0x1000 /* Set SO bit in CR */ - stw r11,_CCR(r1) - -1: stw r6,RESULT(r1) /* Save result */ - stw r3,GPR3(r1) /* Update return value */ -2: andi. r0,r9,(_TIF_PERSYSCALL_MASK) - beq 4f - - /* Clear per-syscall TIF flags if any are set. */ - - li r11,_TIF_PERSYSCALL_MASK - addi r12,r2,TI_FLAGS -3: lwarx r8,0,r12 - andc r8,r8,r11 - stwcx. r8,0,r12 - bne- 3b - -4: /* Anything which requires enabling interrupts? */ - andi. r0,r9,(_TIF_SYSCALL_DOTRACE|_TIF_SINGLESTEP) - beq ret_from_except - - /* Re-enable interrupts. There is no need to trace that with - * lockdep as we are supposed to have IRQs on at this point - */ - ori r10,r10,MSR_EE - mtmsr r10 - - /* Save NVGPRS if they're not saved already */ - lwz r4,_TRAP(r1) - andi. r4,r4,1 - beq 5f - SAVE_NVGPRS(r1) - li r4,0xc00 - stw r4,_TRAP(r1) -5: - addi r3,r1,STACK_FRAME_OVERHEAD - bl do_syscall_trace_leave - b ret_from_except_full - - /* - * System call was called from kernel. We get here with SRR1 in r9. - * Mark the exception as recoverable once we have retrieved SRR0, - * trap a warning and return ENOSYS with CR[SO] set. - */ - .globl ret_from_kernel_syscall -ret_from_kernel_syscall: - mfspr r9, SPRN_SRR0 - mfspr r10, SPRN_SRR1 -#if !defined(CONFIG_4xx) && !defined(CONFIG_BOOKE) - LOAD_REG_IMMEDIATE(r11, MSR_KERNEL & ~(MSR_IR|MSR_DR)) - mtmsr r11 -#endif - -0: trap - EMIT_BUG_ENTRY 0b,__FILE__,__LINE__, BUGFLAG_WARNING - - li r3, ENOSYS - crset so -#if defined(CONFIG_PPC_8xx) && defined(CONFIG_PERF_EVENTS) - mtspr SPRN_NRI, r0 -#endif - mtspr SPRN_SRR0, r9 - mtspr SPRN_SRR1, r10 - rfi -#ifdef CONFIG_40x - b . /* Prevent prefetch past rfi */ -#endif -_ASM_NOKPROBE_SYMBOL(ret_from_kernel_syscall) - -/* - * The fork/clone functions need to copy the full register set into - * the child process. Therefore we need to save all the nonvolatile - * registers (r13 - r31) before calling the C code. - */ - .globl ppc_fork -ppc_fork: - SAVE_NVGPRS(r1) - lwz r0,_TRAP(r1) - rlwinm r0,r0,0,0,30 /* clear LSB to indicate full */ - stw r0,_TRAP(r1) /* register set saved */ - b sys_fork - - .globl ppc_vfork -ppc_vfork: - SAVE_NVGPRS(r1) - lwz r0,_TRAP(r1) - rlwinm r0,r0,0,0,30 /* clear LSB to indicate full */ - stw r0,_TRAP(r1) /* register set saved */ - b sys_vfork - - .globl ppc_clone -ppc_clone: - SAVE_NVGPRS(r1) - lwz r0,_TRAP(r1) - rlwinm r0,r0,0,0,30 /* clear LSB to indicate full */ - stw r0,_TRAP(r1) /* register set saved */ - b sys_clone - - .globl ppc_clone3 -ppc_clone3: - SAVE_NVGPRS(r1) - lwz r0,_TRAP(r1) - rlwinm r0,r0,0,0,30 /* clear LSB to indicate full */ - stw r0,_TRAP(r1) /* register set saved */ - b sys_clone3 - - .globl ppc_swapcontext -ppc_swapcontext: - SAVE_NVGPRS(r1) - lwz r0,_TRAP(r1) - rlwinm r0,r0,0,0,30 /* clear LSB to indicate full */ - stw r0,_TRAP(r1) /* register set saved */ - b sys_swapcontext - /* * Top-level page fault handling. * This is in assembler because if do_page_fault tells us that @@ -670,10 +420,6 @@ ppc_swapcontext: .globl handle_page_fault handle_page_fault: addi r3,r1,STACK_FRAME_OVERHEAD -#ifdef CONFIG_PPC_BOOK3S_32 - andis. r0,r5,DSISR_DABRMATCH@h - bne- handle_dabr_fault -#endif bl do_page_fault cmpwi r3,0 beq+ ret_from_except @@ -681,23 +427,11 @@ handle_page_fault: lwz r0,_TRAP(r1) clrrwi r0,r0,1 stw r0,_TRAP(r1) - mr r5,r3 + mr r4,r3 /* err arg for bad_page_fault */ addi r3,r1,STACK_FRAME_OVERHEAD - lwz r4,_DAR(r1) bl __bad_page_fault b ret_from_except_full -#ifdef CONFIG_PPC_BOOK3S_32 - /* We have a data breakpoint exception - handle it */ -handle_dabr_fault: - SAVE_NVGPRS(r1) - lwz r0,_TRAP(r1) - clrrwi r0,r0,1 - stw r0,_TRAP(r1) - bl do_break - b ret_from_except_full -#endif - /* * This routine switches between two different tasks. The process * state of one is saved on its kernel stack. Then the state @@ -1237,14 +971,11 @@ load_dbcr0: addi r11,r11,global_dbcr0@l #ifdef CONFIG_SMP lwz r9,TASK_CPU(r2) - slwi r9,r9,3 + slwi r9,r9,2 add r11,r11,r9 #endif stw r10,0(r11) mtspr SPRN_DBCR0,r0 - lwz r10,4(r11) - addi r10,r10,1 - stw r10,4(r11) li r11,-1 mtspr SPRN_DBSR,r11 /* clear all pending debug events */ blr @@ -1253,7 +984,7 @@ load_dbcr0: .align 4 .global global_dbcr0 global_dbcr0: - .space 8*NR_CPUS + .space 4*NR_CPUS .previous #endif /* !(CONFIG_4xx || CONFIG_BOOKE) */ diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index 33ddfeef4fe9..6c4d9e276c4d 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -108,7 +108,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_TM) li r11,\trapnr std r11,_TRAP(r1) std r12,_CCR(r1) - std r3,ORIG_GPR3(r1) addi r10,r1,STACK_FRAME_OVERHEAD ld r11,exception_marker@toc(r2) std r11,-16(r10) /* "regshere" marker */ @@ -226,6 +225,12 @@ _ASM_NOKPROBE_SYMBOL(system_call_vectored_emulate) #endif .balign IFETCH_ALIGN_BYTES + .globl system_call_common_real +system_call_common_real: + ld r10,PACAKMSR(r13) /* get MSR value for kernel */ + mtmsrd r10 + + .balign IFETCH_ALIGN_BYTES .globl system_call_common system_call_common: _ASM_NOKPROBE_SYMBOL(system_call_common) @@ -278,7 +283,6 @@ END_BTB_FLUSH_SECTION std r10,_LINK(r1) std r11,_TRAP(r1) std r12,_CCR(r1) - std r3,ORIG_GPR3(r1) addi r10,r1,STACK_FRAME_OVERHEAD ld r11,exception_marker@toc(r2) std r11,-16(r10) /* "regshere" marker */ diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S index 74d07dc0bb48..e8eb9992a270 100644 --- a/arch/powerpc/kernel/exceptions-64e.S +++ b/arch/powerpc/kernel/exceptions-64e.S @@ -398,7 +398,6 @@ exc_##n##_common: \ std r10,_NIP(r1); /* save SRR0 to stackframe */ \ std r11,_MSR(r1); /* save SRR1 to stackframe */ \ beq 2f; /* if from kernel mode */ \ - ACCOUNT_CPU_USER_ENTRY(r13,r10,r11);/* accounting (uses cr0+eq) */ \ 2: ld r3,excf+EX_R10(r13); /* get back r10 */ \ ld r4,excf+EX_R11(r13); /* get back r11 */ \ mfspr r5,scratch; /* get back r13 */ \ @@ -791,7 +790,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) EXCEPTION_COMMON_CRIT(0xd00) std r14,_DSISR(r1) addi r3,r1,STACK_FRAME_OVERHEAD - mr r4,r14 ld r14,PACA_EXCRIT+EX_R14(r13) ld r15,PACA_EXCRIT+EX_R15(r13) bl save_nvgprs @@ -864,7 +862,6 @@ kernel_dbg_exc: INTS_DISABLE std r14,_DSISR(r1) addi r3,r1,STACK_FRAME_OVERHEAD - mr r4,r14 ld r14,PACA_EXDBG+EX_R14(r13) ld r15,PACA_EXDBG+EX_R15(r13) bl save_nvgprs @@ -1011,8 +1008,6 @@ storage_fault_common: std r14,_DAR(r1) std r15,_DSISR(r1) addi r3,r1,STACK_FRAME_OVERHEAD - mr r4,r14 - mr r5,r15 ld r14,PACA_EXGEN+EX_R14(r13) ld r15,PACA_EXGEN+EX_R15(r13) bl do_page_fault @@ -1020,9 +1015,8 @@ storage_fault_common: bne- 1f b ret_from_except_lite 1: bl save_nvgprs - mr r5,r3 + mr r4,r3 addi r3,r1,STACK_FRAME_OVERHEAD - ld r4,_DAR(r1) bl __bad_page_fault b ret_from_except diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 6e53f7638737..60d3051a8bc8 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -139,7 +139,6 @@ name: #define IKVM_VIRT .L_IKVM_VIRT_\name\() /* Virt entry tests KVM */ #define ISTACK .L_ISTACK_\name\() /* Set regular kernel stack */ #define __ISTACK(name) .L_ISTACK_ ## name -#define IRECONCILE .L_IRECONCILE_\name\() /* Do RECONCILE_IRQ_STATE */ #define IKUAP .L_IKUAP_\name\() /* Do KUAP lock */ #define INT_DEFINE_BEGIN(n) \ @@ -203,9 +202,6 @@ do_define_int n .ifndef ISTACK ISTACK=1 .endif - .ifndef IRECONCILE - IRECONCILE=1 - .endif .ifndef IKUAP IKUAP=1 .endif @@ -581,7 +577,6 @@ DEFINE_FIXED_SYMBOL(\name\()_common_real) kuap_save_amr_and_lock r9, r10, cr1, cr0 .endif beq 101f /* if from kernel mode */ - ACCOUNT_CPU_USER_ENTRY(r13, r9, r10) BEGIN_FTR_SECTION ld r9,IAREA+EX_PPR(r13) /* Read PPR from paca */ std r9,_PPR(r1) @@ -649,14 +644,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_CFAR) ld r11,exception_marker@toc(r2) std r10,RESULT(r1) /* clear regs->result */ std r11,STACK_FRAME_OVERHEAD-16(r1) /* mark the frame */ - - .if ISTACK - ACCOUNT_STOLEN_TIME - .endif - - .if IRECONCILE - RECONCILE_IRQ_STATE(r10, r11) - .endif .endm /* @@ -705,14 +692,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_CFAR) ld r1,GPR1(r1) .endm -#define RUNLATCH_ON \ -BEGIN_FTR_SECTION \ - ld r3, PACA_THREAD_INFO(r13); \ - ld r4,TI_LOCAL_FLAGS(r3); \ - andi. r0,r4,_TLF_RUNLATCH; \ - beql ppc64_runlatch_on_trampoline; \ -END_FTR_SECTION_IFSET(CPU_FTR_CTRL) - /* * When the idle code in power4_idle puts the CPU into NAP mode, * it has to do so in a loop, and relies on the external interrupt @@ -935,7 +914,6 @@ INT_DEFINE_BEGIN(system_reset) */ ISET_RI=0 ISTACK=0 - IRECONCILE=0 IKVM_REAL=1 INT_DEFINE_END(system_reset) @@ -1022,20 +1000,6 @@ EXC_COMMON_BEGIN(system_reset_common) ld r1,PACA_NMI_EMERG_SP(r13) subi r1,r1,INT_FRAME_SIZE __GEN_COMMON_BODY system_reset - /* - * Set IRQS_ALL_DISABLED unconditionally so irqs_disabled() does - * the right thing. We do not want to reconcile because that goes - * through irq tracing which we don't want in NMI. - * - * Save PACAIRQHAPPENED to RESULT (otherwise unused), and set HARD_DIS - * as we are running with MSR[EE]=0. - */ - li r10,IRQS_ALL_DISABLED - stb r10,PACAIRQSOFTMASK(r13) - lbz r10,PACAIRQHAPPENED(r13) - std r10,RESULT(r1) - ori r10,r10,PACA_IRQ_HARD_DIS - stb r10,PACAIRQHAPPENED(r13) addi r3,r1,STACK_FRAME_OVERHEAD bl system_reset_exception @@ -1051,14 +1015,6 @@ EXC_COMMON_BEGIN(system_reset_common) subi r10,r10,1 sth r10,PACA_IN_NMI(r13) - /* - * Restore soft mask settings. - */ - ld r10,RESULT(r1) - stb r10,PACAIRQHAPPENED(r13) - ld r10,SOFTE(r1) - stb r10,PACAIRQSOFTMASK(r13) - kuap_kernel_restore r9, r10 EXCEPTION_RESTORE_REGS RFI_TO_USER_OR_KERNEL @@ -1123,7 +1079,6 @@ INT_DEFINE_BEGIN(machine_check_early) ISTACK=0 IDAR=1 IDSISR=1 - IRECONCILE=0 IKUAP=0 /* We don't touch AMR here, we never go to virtual mode */ INT_DEFINE_END(machine_check_early) @@ -1205,30 +1160,11 @@ END_FTR_SECTION_IFSET(CPU_FTR_HVMODE) li r10,MSR_RI mtmsrd r10,1 - /* - * Set IRQS_ALL_DISABLED and save PACAIRQHAPPENED (see - * system_reset_common) - */ - li r10,IRQS_ALL_DISABLED - stb r10,PACAIRQSOFTMASK(r13) - lbz r10,PACAIRQHAPPENED(r13) - std r10,RESULT(r1) - ori r10,r10,PACA_IRQ_HARD_DIS - stb r10,PACAIRQHAPPENED(r13) - addi r3,r1,STACK_FRAME_OVERHEAD bl machine_check_early std r3,RESULT(r1) /* Save result */ ld r12,_MSR(r1) - /* - * Restore soft mask settings. - */ - ld r10,RESULT(r1) - stb r10,PACAIRQHAPPENED(r13) - ld r10,SOFTE(r1) - stb r10,PACAIRQSOFTMASK(r13) - #ifdef CONFIG_PPC_P7_NAP /* * Check if thread was in power saving mode. We come here when any @@ -1401,14 +1337,15 @@ END_FTR_SECTION_IFSET(CPU_FTR_HVMODE) * * Handling: * - Hash MMU - * Go to do_hash_page first to see if the HPT can be filled from an entry in - * the Linux page table. Hash faults can hit in kernel mode in a fairly + * Go to do_hash_fault, which attempts to fill the HPT from an entry in the + * Linux page table. Hash faults can hit in kernel mode in a fairly * arbitrary state (e.g., interrupts disabled, locks held) when accessing * "non-bolted" regions, e.g., vmalloc space. However these should always be - * backed by Linux page tables. + * backed by Linux page table entries. * - * If none is found, do a Linux page fault. Linux page faults can happen in - * kernel mode due to user copy operations of course. + * If no entry is found the Linux page fault handler is invoked (by + * do_hash_fault). Linux page faults can happen in kernel mode due to user + * copy operations of course. * * KVM: The KVM HDSI handler may perform a load with MSR[DR]=1 in guest * MMU context, which may cause a DSI in the host, which must go to the @@ -1437,15 +1374,24 @@ EXC_VIRT_BEGIN(data_access, 0x4300, 0x80) EXC_VIRT_END(data_access, 0x4300, 0x80) EXC_COMMON_BEGIN(data_access_common) GEN_COMMON data_access - ld r4,_DAR(r1) - ld r5,_DSISR(r1) + ld r4,_DSISR(r1) + addi r3,r1,STACK_FRAME_OVERHEAD + andis. r0,r4,DSISR_DABRMATCH@h + bne- 1f BEGIN_MMU_FTR_SECTION - ld r6,_MSR(r1) - li r3,0x300 - b do_hash_page /* Try to handle as hpte fault */ + bl do_hash_fault MMU_FTR_SECTION_ELSE - b handle_page_fault + bl do_page_fault ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX) + b interrupt_return + +1: bl do_break + /* + * do_break() may have changed the NV GPRS while handling a breakpoint. + * If so, we need to restore them with their updated values. + */ + REST_NVGPRS(r1) + b interrupt_return GEN_KVM data_access @@ -1466,14 +1412,9 @@ ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX) * on user-handler data structures. * * KVM: Same as 0x300, DSLB must test for KVM guest. - * - * A dedicated save area EXSLB is used (XXX: but it actually need not be - * these days, we could use EXGEN). */ INT_DEFINE_BEGIN(data_access_slb) IVEC=0x380 - IAREA=PACA_EXSLB - IRECONCILE=0 IDAR=1 IKVM_SKIP=1 IKVM_REAL=1 @@ -1487,10 +1428,9 @@ EXC_VIRT_BEGIN(data_access_slb, 0x4380, 0x80) EXC_VIRT_END(data_access_slb, 0x4380, 0x80) EXC_COMMON_BEGIN(data_access_slb_common) GEN_COMMON data_access_slb - ld r4,_DAR(r1) - addi r3,r1,STACK_FRAME_OVERHEAD BEGIN_MMU_FTR_SECTION /* HPT case, do SLB fault */ + addi r3,r1,STACK_FRAME_OVERHEAD bl do_slb_fault cmpdi r3,0 bne- 1f @@ -1501,9 +1441,6 @@ MMU_FTR_SECTION_ELSE li r3,-EFAULT ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX) std r3,RESULT(r1) - RECONCILE_IRQ_STATE(r10, r11) - ld r4,_DAR(r1) - ld r5,RESULT(r1) addi r3,r1,STACK_FRAME_OVERHEAD bl do_bad_slb_fault b interrupt_return @@ -1538,15 +1475,13 @@ EXC_VIRT_BEGIN(instruction_access, 0x4400, 0x80) EXC_VIRT_END(instruction_access, 0x4400, 0x80) EXC_COMMON_BEGIN(instruction_access_common) GEN_COMMON instruction_access - ld r4,_DAR(r1) - ld r5,_DSISR(r1) + addi r3,r1,STACK_FRAME_OVERHEAD BEGIN_MMU_FTR_SECTION - ld r6,_MSR(r1) - li r3,0x400 - b do_hash_page /* Try to handle as hpte fault */ + bl do_hash_fault MMU_FTR_SECTION_ELSE - b handle_page_fault + bl do_page_fault ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX) + b interrupt_return GEN_KVM instruction_access @@ -1562,8 +1497,6 @@ ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX) */ INT_DEFINE_BEGIN(instruction_access_slb) IVEC=0x480 - IAREA=PACA_EXSLB - IRECONCILE=0 IISIDE=1 IDAR=1 #ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE @@ -1579,10 +1512,9 @@ EXC_VIRT_BEGIN(instruction_access_slb, 0x4480, 0x80) EXC_VIRT_END(instruction_access_slb, 0x4480, 0x80) EXC_COMMON_BEGIN(instruction_access_slb_common) GEN_COMMON instruction_access_slb - ld r4,_DAR(r1) - addi r3,r1,STACK_FRAME_OVERHEAD BEGIN_MMU_FTR_SECTION /* HPT case, do SLB fault */ + addi r3,r1,STACK_FRAME_OVERHEAD bl do_slb_fault cmpdi r3,0 bne- 1f @@ -1593,9 +1525,6 @@ MMU_FTR_SECTION_ELSE li r3,-EFAULT ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX) std r3,RESULT(r1) - RECONCILE_IRQ_STATE(r10, r11) - ld r4,_DAR(r1) - ld r5,RESULT(r1) addi r3,r1,STACK_FRAME_OVERHEAD bl do_bad_slb_fault b interrupt_return @@ -1643,7 +1572,6 @@ EXC_VIRT_END(hardware_interrupt, 0x4500, 0x100) EXC_COMMON_BEGIN(hardware_interrupt_common) GEN_COMMON hardware_interrupt FINISH_NAP - RUNLATCH_ON addi r3,r1,STACK_FRAME_OVERHEAD bl do_IRQ b interrupt_return @@ -1697,6 +1625,51 @@ INT_DEFINE_BEGIN(program_check) INT_DEFINE_END(program_check) EXC_REAL_BEGIN(program_check, 0x700, 0x100) + +#ifdef CONFIG_CPU_LITTLE_ENDIAN + /* + * There's a short window during boot where although the kernel is + * running little endian, any exceptions will cause the CPU to switch + * back to big endian. For example a WARN() boils down to a trap + * instruction, which will cause a program check, and we end up here but + * with the CPU in big endian mode. The first instruction of the program + * check handler (in GEN_INT_ENTRY below) is an mtsprg, which when + * executed in the wrong endian is an lhzu with a ~3GB displacement from + * r3. The content of r3 is random, so that is a load from some random + * location, and depending on the system can easily lead to a checkstop, + * or an infinitely recursive page fault. + * + * So to handle that case we have a trampoline here that can detect we + * are in the wrong endian and flip us back to the correct endian. We + * can't flip MSR[LE] using mtmsr, so we have to use rfid. That requires + * backing up SRR0/1 as well as a GPR. To do that we use SPRG0/2/3, as + * SPRG1 is already used for the paca. SPRG3 is user readable, but this + * trampoline is only active very early in boot, and SPRG3 will be + * reinitialised in vdso_getcpu_init() before userspace starts. + */ +BEGIN_FTR_SECTION + tdi 0,0,0x48 // Trap never, or in reverse endian: b . + 8 + b 1f // Skip trampoline if endian is correct + .long 0xa643707d // mtsprg 0, r11 Backup r11 + .long 0xa6027a7d // mfsrr0 r11 + .long 0xa643727d // mtsprg 2, r11 Backup SRR0 in SPRG2 + .long 0xa6027b7d // mfsrr1 r11 + .long 0xa643737d // mtsprg 3, r11 Backup SRR1 in SPRG3 + .long 0xa600607d // mfmsr r11 + .long 0x01006b69 // xori r11, r11, 1 Invert MSR[LE] + .long 0xa6037b7d // mtsrr1 r11 + .long 0x34076039 // li r11, 0x734 + .long 0xa6037a7d // mtsrr0 r11 + .long 0x2400004c // rfid + mfsprg r11, 3 + mtsrr1 r11 // Restore SRR1 + mfsprg r11, 2 + mtsrr0 r11 // Restore SRR0 + mfsprg r11, 0 // Restore r11 +1: +END_FTR_SECTION(0, 1) // nop out after boot +#endif /* CONFIG_CPU_LITTLE_ENDIAN */ + GEN_INT_ENTRY program_check, virt=0 EXC_REAL_END(program_check, 0x700, 0x100) EXC_VIRT_BEGIN(program_check, 0x4700, 0x100) @@ -1755,7 +1728,6 @@ EXC_COMMON_BEGIN(program_check_common) */ INT_DEFINE_BEGIN(fp_unavailable) IVEC=0x800 - IRECONCILE=0 #ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE IKVM_REAL=1 #endif @@ -1770,7 +1742,6 @@ EXC_VIRT_END(fp_unavailable, 0x4800, 0x100) EXC_COMMON_BEGIN(fp_unavailable_common) GEN_COMMON fp_unavailable bne 1f /* if from user, just load it up */ - RECONCILE_IRQ_STATE(r10, r11) addi r3,r1,STACK_FRAME_OVERHEAD bl kernel_fp_unavailable_exception 0: trap @@ -1789,7 +1760,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_TM) b fast_interrupt_return #ifdef CONFIG_PPC_TRANSACTIONAL_MEM 2: /* User process was in a transaction */ - RECONCILE_IRQ_STATE(r10, r11) addi r3,r1,STACK_FRAME_OVERHEAD bl fp_unavailable_tm b interrupt_return @@ -1832,7 +1802,6 @@ EXC_VIRT_END(decrementer, 0x4900, 0x80) EXC_COMMON_BEGIN(decrementer_common) GEN_COMMON decrementer FINISH_NAP - RUNLATCH_ON addi r3,r1,STACK_FRAME_OVERHEAD bl timer_interrupt b interrupt_return @@ -1854,7 +1823,6 @@ INT_DEFINE_BEGIN(hdecrementer) IVEC=0x980 IHSRR=1 ISTACK=0 - IRECONCILE=0 IKVM_REAL=1 IKVM_VIRT=1 INT_DEFINE_END(hdecrementer) @@ -1919,12 +1887,11 @@ EXC_VIRT_END(doorbell_super, 0x4a00, 0x100) EXC_COMMON_BEGIN(doorbell_super_common) GEN_COMMON doorbell_super FINISH_NAP - RUNLATCH_ON addi r3,r1,STACK_FRAME_OVERHEAD #ifdef CONFIG_PPC_DOORBELL bl doorbell_exception #else - bl unknown_exception + bl unknown_async_exception #endif b interrupt_return @@ -2001,12 +1968,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_REAL_LE) HMT_MEDIUM .if ! \virt - __LOAD_HANDLER(r10, system_call_common) - mtspr SPRN_SRR0,r10 - ld r10,PACAKMSR(r13) - mtspr SPRN_SRR1,r10 - RFI_TO_KERNEL - b . /* prevent speculative execution */ + __LOAD_HANDLER(r10, system_call_common_real) + mtctr r10 + bctr .else li r10,MSR_RI mtmsrd r10,1 /* Set RI (EE=0) */ @@ -2137,9 +2101,7 @@ EXC_COMMON_BEGIN(h_data_storage_common) GEN_COMMON h_data_storage addi r3,r1,STACK_FRAME_OVERHEAD BEGIN_MMU_FTR_SECTION - ld r4,_DAR(r1) - li r5,SIGSEGV - bl bad_page_fault + bl do_bad_page_fault_segv MMU_FTR_SECTION_ELSE bl unknown_exception ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_TYPE_RADIX) @@ -2230,7 +2192,6 @@ INT_DEFINE_BEGIN(hmi_exception_early) IHSRR=1 IREALMODE_COMMON=1 ISTACK=0 - IRECONCILE=0 IKUAP=0 /* We don't touch AMR here, we never go to virtual mode */ IKVM_REAL=1 INT_DEFINE_END(hmi_exception_early) @@ -2277,7 +2238,6 @@ EXC_COMMON_BEGIN(hmi_exception_early_common) EXC_COMMON_BEGIN(hmi_exception_common) GEN_COMMON hmi_exception FINISH_NAP - RUNLATCH_ON addi r3,r1,STACK_FRAME_OVERHEAD bl handle_hmi_exception b interrupt_return @@ -2307,12 +2267,11 @@ EXC_VIRT_END(h_doorbell, 0x4e80, 0x20) EXC_COMMON_BEGIN(h_doorbell_common) GEN_COMMON h_doorbell FINISH_NAP - RUNLATCH_ON addi r3,r1,STACK_FRAME_OVERHEAD #ifdef CONFIG_PPC_DOORBELL bl doorbell_exception #else - bl unknown_exception + bl unknown_async_exception #endif b interrupt_return @@ -2341,7 +2300,6 @@ EXC_VIRT_END(h_virt_irq, 0x4ea0, 0x20) EXC_COMMON_BEGIN(h_virt_irq_common) GEN_COMMON h_virt_irq FINISH_NAP - RUNLATCH_ON addi r3,r1,STACK_FRAME_OVERHEAD bl do_IRQ b interrupt_return @@ -2388,7 +2346,6 @@ EXC_VIRT_END(performance_monitor, 0x4f00, 0x20) EXC_COMMON_BEGIN(performance_monitor_common) GEN_COMMON performance_monitor FINISH_NAP - RUNLATCH_ON addi r3,r1,STACK_FRAME_OVERHEAD bl performance_monitor_exception b interrupt_return @@ -2404,7 +2361,6 @@ EXC_COMMON_BEGIN(performance_monitor_common) */ INT_DEFINE_BEGIN(altivec_unavailable) IVEC=0xf20 - IRECONCILE=0 #ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE IKVM_REAL=1 #endif @@ -2434,7 +2390,6 @@ BEGIN_FTR_SECTION b fast_interrupt_return #ifdef CONFIG_PPC_TRANSACTIONAL_MEM 2: /* User process was in a transaction */ - RECONCILE_IRQ_STATE(r10, r11) addi r3,r1,STACK_FRAME_OVERHEAD bl altivec_unavailable_tm b interrupt_return @@ -2442,7 +2397,6 @@ BEGIN_FTR_SECTION 1: END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) #endif - RECONCILE_IRQ_STATE(r10, r11) addi r3,r1,STACK_FRAME_OVERHEAD bl altivec_unavailable_exception b interrupt_return @@ -2458,7 +2412,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) */ INT_DEFINE_BEGIN(vsx_unavailable) IVEC=0xf40 - IRECONCILE=0 #ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE IKVM_REAL=1 #endif @@ -2487,7 +2440,6 @@ BEGIN_FTR_SECTION b load_up_vsx #ifdef CONFIG_PPC_TRANSACTIONAL_MEM 2: /* User process was in a transaction */ - RECONCILE_IRQ_STATE(r10, r11) addi r3,r1,STACK_FRAME_OVERHEAD bl vsx_unavailable_tm b interrupt_return @@ -2495,7 +2447,6 @@ BEGIN_FTR_SECTION 1: END_FTR_SECTION_IFSET(CPU_FTR_VSX) #endif - RECONCILE_IRQ_STATE(r10, r11) addi r3,r1,STACK_FRAME_OVERHEAD bl vsx_unavailable_exception b interrupt_return @@ -2830,7 +2781,6 @@ EXC_VIRT_NONE(0x5800, 0x100) INT_DEFINE_BEGIN(soft_nmi) IVEC=0x900 ISTACK=0 - IRECONCILE=0 /* Soft-NMI may fire under local_irq_disable */ INT_DEFINE_END(soft_nmi) /* @@ -2849,17 +2799,6 @@ EXC_COMMON_BEGIN(soft_nmi_common) subi r1,r1,INT_FRAME_SIZE __GEN_COMMON_BODY soft_nmi - /* - * Set IRQS_ALL_DISABLED and save PACAIRQHAPPENED (see - * system_reset_common) - */ - li r10,IRQS_ALL_DISABLED - stb r10,PACAIRQSOFTMASK(r13) - lbz r10,PACAIRQHAPPENED(r13) - std r10,RESULT(r1) - ori r10,r10,PACA_IRQ_HARD_DIS - stb r10,PACAIRQHAPPENED(r13) - addi r3,r1,STACK_FRAME_OVERHEAD bl soft_nmi_interrupt @@ -2867,14 +2806,6 @@ EXC_COMMON_BEGIN(soft_nmi_common) li r9,0 mtmsrd r9,1 - /* - * Restore soft mask settings. - */ - ld r10,RESULT(r1) - stb r10,PACAIRQHAPPENED(r13) - ld r10,SOFTE(r1) - stb r10,PACAIRQSOFTMASK(r13) - kuap_kernel_restore r9, r10 EXCEPTION_RESTORE_REGS hsrr=0 RFI_TO_KERNEL @@ -3148,9 +3079,6 @@ kvmppc_skip_Hinterrupt: * come here. */ -EXC_COMMON_BEGIN(ppc64_runlatch_on_trampoline) - b __ppc64_runlatch_on - USE_FIXED_SECTION(virt_trampolines) /* * All code below __end_interrupts is treated as soft-masked. If @@ -3221,99 +3149,3 @@ disable_machine_check: RFI_TO_KERNEL 1: mtlr r0 blr - -/* - * Hash table stuff - */ - .balign IFETCH_ALIGN_BYTES -do_hash_page: -#ifdef CONFIG_PPC_BOOK3S_64 - lis r0,(DSISR_BAD_FAULT_64S | DSISR_DABRMATCH | DSISR_KEYFAULT)@h - ori r0,r0,DSISR_BAD_FAULT_64S@l - and. r0,r5,r0 /* weird error? */ - bne- handle_page_fault /* if not, try to insert a HPTE */ - - /* - * If we are in an "NMI" (e.g., an interrupt when soft-disabled), then - * don't call hash_page, just fail the fault. This is required to - * prevent re-entrancy problems in the hash code, namely perf - * interrupts hitting while something holds H_PAGE_BUSY, and taking a - * hash fault. See the comment in hash_preload(). - */ - ld r11, PACA_THREAD_INFO(r13) - lwz r0,TI_PREEMPT(r11) - andis. r0,r0,NMI_MASK@h - bne 77f - - /* - * r3 contains the trap number - * r4 contains the faulting address - * r5 contains dsisr - * r6 msr - * - * at return r3 = 0 for success, 1 for page fault, negative for error - */ - bl __hash_page /* build HPTE if possible */ - cmpdi r3,0 /* see if __hash_page succeeded */ - - /* Success */ - beq interrupt_return /* Return from exception on success */ - - /* Error */ - blt- 13f - - /* Reload DAR/DSISR into r4/r5 for the DABR check below */ - ld r4,_DAR(r1) - ld r5,_DSISR(r1) -#endif /* CONFIG_PPC_BOOK3S_64 */ - -/* Here we have a page fault that hash_page can't handle. */ -handle_page_fault: -11: andis. r0,r5,DSISR_DABRMATCH@h - bne- handle_dabr_fault - addi r3,r1,STACK_FRAME_OVERHEAD - bl do_page_fault - cmpdi r3,0 - beq+ interrupt_return - mr r5,r3 - addi r3,r1,STACK_FRAME_OVERHEAD - ld r4,_DAR(r1) - bl __bad_page_fault - b interrupt_return - -/* We have a data breakpoint exception - handle it */ -handle_dabr_fault: - ld r4,_DAR(r1) - ld r5,_DSISR(r1) - addi r3,r1,STACK_FRAME_OVERHEAD - bl do_break - /* - * do_break() may have changed the NV GPRS while handling a breakpoint. - * If so, we need to restore them with their updated values. - */ - REST_NVGPRS(r1) - b interrupt_return - - -#ifdef CONFIG_PPC_BOOK3S_64 -/* We have a page fault that hash_page could handle but HV refused - * the PTE insertion - */ -13: mr r5,r3 - addi r3,r1,STACK_FRAME_OVERHEAD - ld r4,_DAR(r1) - bl low_hash_fault - b interrupt_return -#endif - -/* - * We come here as a result of a DSI at a point where we don't want - * to call hash_page, such as when we are accessing memory (possibly - * user memory) inside a PMU interrupt that occurred while interrupts - * were soft-disabled. We want to invoke the exception handler for - * the access, or panic if there isn't a handler. - */ -77: addi r3,r1,STACK_FRAME_OVERHEAD - li r5,SIGSEGV - bl bad_page_fault - b interrupt_return diff --git a/arch/powerpc/kernel/head_32.h b/arch/powerpc/kernel/head_32.h index a2f72c966baf..5d4706c14572 100644 --- a/arch/powerpc/kernel/head_32.h +++ b/arch/powerpc/kernel/head_32.h @@ -47,7 +47,7 @@ lwz r1,TASK_STACK-THREAD(r1) addi r1, r1, THREAD_SIZE - INT_FRAME_SIZE 1: - mtcrf 0x7f, r1 + mtcrf 0x3f, r1 bt 32 - THREAD_ALIGN_SHIFT, stack_overflow #else subi r11, r1, INT_FRAME_SIZE /* use r1 if kernel */ @@ -116,114 +116,44 @@ .endm .macro SYSCALL_ENTRY trapno - mfspr r12,SPRN_SPRG_THREAD mfspr r9, SPRN_SRR1 -#ifdef CONFIG_VMAP_STACK - mfspr r11, SPRN_SRR0 - mtctr r11 - andi. r11, r9, MSR_PR + mfspr r10, SPRN_SRR0 + LOAD_REG_IMMEDIATE(r11, MSR_KERNEL) /* can take exceptions */ + lis r12, 1f@h + ori r12, r12, 1f@l + mtspr SPRN_SRR1, r11 + mtspr SPRN_SRR0, r12 + mfspr r12,SPRN_SPRG_THREAD mr r11, r1 lwz r1,TASK_STACK-THREAD(r12) - beq- 99f - addi r1, r1, THREAD_SIZE - INT_FRAME_SIZE - li r10, MSR_KERNEL & ~(MSR_IR | MSR_RI) /* can take DTLB miss */ - mtmsr r10 - isync tovirt(r12, r12) + addi r1, r1, THREAD_SIZE - INT_FRAME_SIZE + rfi +1: stw r11,GPR1(r1) stw r11,0(r1) mr r11, r1 -#else - andi. r11, r9, MSR_PR - lwz r11,TASK_STACK-THREAD(r12) - beq- 99f - addi r11, r11, THREAD_SIZE - INT_FRAME_SIZE - tophys(r11, r11) - stw r1,GPR1(r11) - stw r1,0(r11) - tovirt(r1, r11) /* set new kernel sp */ -#endif + stw r10,_NIP(r11) mflr r10 stw r10, _LINK(r11) -#ifdef CONFIG_VMAP_STACK - mfctr r10 -#else - mfspr r10,SPRN_SRR0 -#endif - stw r10,_NIP(r11) mfcr r10 rlwinm r10,r10,0,4,2 /* Clear SO bit in CR */ stw r10,_CCR(r11) /* save registers */ #ifdef CONFIG_40x rlwinm r9,r9,0,14,12 /* clear MSR_WE (necessary?) */ -#else -#ifdef CONFIG_VMAP_STACK - LOAD_REG_IMMEDIATE(r10, MSR_KERNEL & ~MSR_IR) /* can take exceptions */ -#else - LOAD_REG_IMMEDIATE(r10, MSR_KERNEL & ~(MSR_IR|MSR_DR)) /* can take exceptions */ -#endif - mtmsr r10 /* (except for mach check in rtas) */ #endif lis r10,STACK_FRAME_REGS_MARKER@ha /* exception frame marker */ stw r2,GPR2(r11) addi r10,r10,STACK_FRAME_REGS_MARKER@l stw r9,_MSR(r11) - li r2, \trapno + 1 + li r2, \trapno stw r10,8(r11) stw r2,_TRAP(r11) SAVE_GPR(0, r11) SAVE_4GPRS(3, r11) SAVE_2GPRS(7, r11) - addi r11,r1,STACK_FRAME_OVERHEAD addi r2,r12,-THREAD - stw r11,PT_REGS(r12) -#if defined(CONFIG_40x) - /* Check to see if the dbcr0 register is set up to debug. Use the - internal debug mode bit to do this. */ - lwz r12,THREAD_DBCR0(r12) - andis. r12,r12,DBCR0_IDM@h -#endif - ACCOUNT_CPU_USER_ENTRY(r2, r11, r12) -#if defined(CONFIG_40x) - beq+ 3f - /* From user and task is ptraced - load up global dbcr0 */ - li r12,-1 /* clear all pending debug events */ - mtspr SPRN_DBSR,r12 - lis r11,global_dbcr0@ha - tophys(r11,r11) - addi r11,r11,global_dbcr0@l - lwz r12,0(r11) - mtspr SPRN_DBCR0,r12 - lwz r12,4(r11) - addi r12,r12,-1 - stw r12,4(r11) -#endif - -3: - tovirt_novmstack r2, r2 /* set r2 to current */ - lis r11, transfer_to_syscall@h - ori r11, r11, transfer_to_syscall@l -#ifdef CONFIG_TRACE_IRQFLAGS - /* - * If MSR is changing we need to keep interrupts disabled at this point - * otherwise we might risk taking an interrupt before we tell lockdep - * they are enabled. - */ - LOAD_REG_IMMEDIATE(r10, MSR_KERNEL) - rlwimi r10, r9, 0, MSR_EE -#else - LOAD_REG_IMMEDIATE(r10, MSR_KERNEL | MSR_EE) -#endif -#if defined(CONFIG_PPC_8xx) && defined(CONFIG_PERF_EVENTS) - mtspr SPRN_NRI, r0 -#endif - mtspr SPRN_SRR1,r10 - mtspr SPRN_SRR0,r11 - rfi /* jump to handler, enable MMU */ -#ifdef CONFIG_40x - b . /* Prevent prefetch past rfi */ -#endif -99: b ret_from_kernel_syscall + b transfer_to_syscall /* jump to handler */ .endm .macro save_dar_dsisr_on_stack reg1, reg2, sp diff --git a/arch/powerpc/kernel/head_40x.S b/arch/powerpc/kernel/head_40x.S index a1ae00689e0f..24724a7dad49 100644 --- a/arch/powerpc/kernel/head_40x.S +++ b/arch/powerpc/kernel/head_40x.S @@ -179,9 +179,9 @@ _ENTRY(saved_ksp_limit) */ START_EXCEPTION(0x0300, DataStorage) EXCEPTION_PROLOG - mfspr r5, SPRN_ESR /* Grab the ESR, save it, pass arg3 */ + mfspr r5, SPRN_ESR /* Grab the ESR, save it */ stw r5, _ESR(r11) - mfspr r4, SPRN_DEAR /* Grab the DEAR, save it, pass arg2 */ + mfspr r4, SPRN_DEAR /* Grab the DEAR, save it */ stw r4, _DEAR(r11) EXC_XFER_LITE(0x300, handle_page_fault) @@ -191,9 +191,9 @@ _ENTRY(saved_ksp_limit) */ START_EXCEPTION(0x0400, InstructionAccess) EXCEPTION_PROLOG - mr r4,r12 /* Pass SRR0 as arg2 */ - stw r4, _DEAR(r11) - li r5,0 /* Pass zero as arg3 */ + li r5,0 + stw r5, _ESR(r11) /* Zero ESR */ + stw r12, _DEAR(r11) /* SRR0 as DEAR */ EXC_XFER_LITE(0x400, handle_page_fault) /* 0x0500 - External Interrupt Exception */ @@ -476,6 +476,7 @@ _ENTRY(saved_ksp_limit) /* continue normal handling for a critical exception... */ 2: mfspr r4,SPRN_DBSR + stw r4,_ESR(r11) /* DebugException takes DBSR in _ESR */ addi r3,r1,STACK_FRAME_OVERHEAD EXC_XFER_TEMPLATE(DebugException, 0x2002, \ (MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)), \ diff --git a/arch/powerpc/kernel/head_44x.S b/arch/powerpc/kernel/head_44x.S index 8e36718f3167..813fa305c33b 100644 --- a/arch/powerpc/kernel/head_44x.S +++ b/arch/powerpc/kernel/head_44x.S @@ -376,7 +376,7 @@ interrupt_base: /* Load the next available TLB index */ lwz r13,tlb_44x_index@l(r10) - bne 2f /* Bail if permission mismach */ + bne 2f /* Bail if permission mismatch */ /* Increment, rollover, and store TLB index */ addi r13,r13,1 @@ -471,7 +471,7 @@ interrupt_base: /* Load the next available TLB index */ lwz r13,tlb_44x_index@l(r10) - bne 2f /* Bail if permission mismach */ + bne 2f /* Bail if permission mismatch */ /* Increment, rollover, and store TLB index */ addi r13,r13,1 diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index 52702f3db6df..46dff3f9c31f 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -165,7 +165,7 @@ SystemCall: /* On the MPC8xx, this is a software emulation interrupt. It occurs * for all unimplemented and illegal instructions. */ - EXCEPTION(0x1000, SoftEmu, program_check_exception, EXC_XFER_STD) + EXCEPTION(0x1000, SoftEmu, emulation_assist_interrupt, EXC_XFER_STD) . = 0x1100 /* @@ -312,14 +312,14 @@ DataStoreTLBMiss: . = 0x1300 InstructionTLBError: EXCEPTION_PROLOG - mr r4,r12 andis. r5,r9,DSISR_SRR1_MATCH_32S@h /* Filter relevant SRR1 bits */ andis. r10,r9,SRR1_ISI_NOPT@h beq+ .Litlbie - tlbie r4 + tlbie r12 /* 0x400 is InstructionAccess exception, needed by bad_page_fault() */ .Litlbie: - stw r4, _DAR(r11) + stw r12, _DAR(r11) + stw r5, _DSISR(r11) EXC_XFER_LITE(0x400, handle_page_fault) /* This is the data TLB error on the MPC8xx. This could be due to @@ -364,10 +364,9 @@ do_databreakpoint: addi r3,r1,STACK_FRAME_OVERHEAD mfspr r4,SPRN_BAR stw r4,_DAR(r11) -#ifdef CONFIG_VMAP_STACK - lwz r5,_DSISR(r11) -#else +#ifndef CONFIG_VMAP_STACK mfspr r5,SPRN_DSISR + stw r5,_DSISR(r11) #endif EXC_XFER_STD(0x1c00, do_break) diff --git a/arch/powerpc/kernel/head_book3s_32.S b/arch/powerpc/kernel/head_book3s_32.S index 858fbc8b19f3..727fdab557c9 100644 --- a/arch/powerpc/kernel/head_book3s_32.S +++ b/arch/powerpc/kernel/head_book3s_32.S @@ -238,8 +238,8 @@ __secondary_hold_acknowledge: /* System reset */ /* core99 pmac starts the seconary here by changing the vector, and - putting it back to what it was (unknown_exception) when done. */ - EXCEPTION(0x100, Reset, unknown_exception, EXC_XFER_STD) + putting it back to what it was (unknown_async_exception) when done. */ + EXCEPTION(0x100, Reset, unknown_async_exception, EXC_XFER_STD) /* Machine check */ /* @@ -278,12 +278,6 @@ MachineCheck: 7: EXCEPTION_PROLOG_2 addi r3,r1,STACK_FRAME_OVERHEAD #ifdef CONFIG_PPC_CHRP -#ifdef CONFIG_VMAP_STACK - mfspr r4, SPRN_SPRG_THREAD - tovirt(r4, r4) - lwz r4, RTAS_SP(r4) - cmpwi cr1, r4, 0 -#endif beq cr1, machine_check_tramp twi 31, 0, 0 #else @@ -295,6 +289,7 @@ MachineCheck: DO_KVM 0x300 DataAccess: #ifdef CONFIG_VMAP_STACK +#ifdef CONFIG_PPC_BOOK3S_604 BEGIN_MMU_FTR_SECTION mtspr SPRN_SPRG_SCRATCH2,r10 mfspr r10, SPRN_SPRG_THREAD @@ -311,12 +306,14 @@ BEGIN_MMU_FTR_SECTION MMU_FTR_SECTION_ELSE b 1f ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_HPTE_TABLE) +#endif 1: EXCEPTION_PROLOG_0 handle_dar_dsisr=1 EXCEPTION_PROLOG_1 b handle_page_fault_tramp_1 #else /* CONFIG_VMAP_STACK */ EXCEPTION_PROLOG handle_dar_dsisr=1 get_and_save_dar_dsisr_on_stack r4, r5, r11 +#ifdef CONFIG_PPC_BOOK3S_604 BEGIN_MMU_FTR_SECTION andis. r0, r5, (DSISR_BAD_FAULT_32S | DSISR_DABRMATCH)@h bne handle_page_fault_tramp_2 /* if not, try to put a PTE */ @@ -324,8 +321,11 @@ BEGIN_MMU_FTR_SECTION bl hash_page b handle_page_fault_tramp_1 MMU_FTR_SECTION_ELSE +#endif b handle_page_fault_tramp_2 +#ifdef CONFIG_PPC_BOOK3S_604 ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_HPTE_TABLE) +#endif #endif /* CONFIG_VMAP_STACK */ /* Instruction access exception. */ @@ -341,12 +341,14 @@ InstructionAccess: mfspr r11, SPRN_SRR1 /* check whether user or kernel */ stw r11, SRR1(r10) mfcr r10 +#ifdef CONFIG_PPC_BOOK3S_604 BEGIN_MMU_FTR_SECTION andis. r11, r11, SRR1_ISI_NOPT@h /* no pte found? */ bne hash_page_isi .Lhash_page_isi_cont: mfspr r11, SPRN_SRR1 /* check whether user or kernel */ END_MMU_FTR_SECTION_IFSET(MMU_FTR_HPTE_TABLE) +#endif andi. r11, r11, MSR_PR EXCEPTION_PROLOG_1 @@ -357,13 +359,15 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_HPTE_TABLE) beq 1f /* if so, try to put a PTE */ li r3,0 /* into the hash table */ mr r4,r12 /* SRR0 is fault address */ +#ifdef CONFIG_PPC_BOOK3S_604 BEGIN_MMU_FTR_SECTION bl hash_page END_MMU_FTR_SECTION_IFSET(MMU_FTR_HPTE_TABLE) +#endif #endif /* CONFIG_VMAP_STACK */ -1: mr r4,r12 andis. r5,r9,DSISR_SRR1_MATCH_32S@h /* Filter relevant SRR1 bits */ - stw r4, _DAR(r11) + stw r5, _DSISR(r11) + stw r12, _DAR(r11) EXC_XFER_LITE(0x400, handle_page_fault) /* External interrupt */ @@ -640,7 +644,7 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_NEED_DTLB_SW_LRU) #endif #ifndef CONFIG_TAU_INT -#define TAUException unknown_exception +#define TAUException unknown_async_exception #endif EXCEPTION(0x1300, Trap_13, instruction_breakpoint_exception, EXC_XFER_STD) @@ -685,13 +689,16 @@ handle_page_fault_tramp_1: #ifdef CONFIG_VMAP_STACK EXCEPTION_PROLOG_2 handle_dar_dsisr=1 #endif - lwz r4, _DAR(r11) lwz r5, _DSISR(r11) /* fall through */ handle_page_fault_tramp_2: + andis. r0, r5, DSISR_DABRMATCH@h + bne- 1f EXC_XFER_LITE(0x300, handle_page_fault) +1: EXC_XFER_STD(0x300, do_break) #ifdef CONFIG_VMAP_STACK +#ifdef CONFIG_PPC_BOOK3S_604 .macro save_regs_thread thread stw r0, THR0(\thread) stw r3, THR3(\thread) @@ -763,6 +770,7 @@ fast_hash_page_return: mfspr r11, SPRN_SPRG_SCRATCH1 mfspr r10, SPRN_SPRG_SCRATCH0 rfi +#endif /* CONFIG_PPC_BOOK3S_604 */ stack_overflow: vmap_stack_overflow_exception diff --git a/arch/powerpc/kernel/head_booke.h b/arch/powerpc/kernel/head_booke.h index 74e230c200fb..47857795f50a 100644 --- a/arch/powerpc/kernel/head_booke.h +++ b/arch/powerpc/kernel/head_booke.h @@ -106,10 +106,8 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_EMB_HV) #endif mfspr r9, SPRN_SRR1 BOOKE_CLEAR_BTB(r11) - andi. r11, r9, MSR_PR lwz r11, TASK_STACK - THREAD(r10) rlwinm r12,r12,0,4,2 /* Clear SO bit in CR */ - beq- 99f ALLOC_STACK_FRAME(r11, THREAD_SIZE - INT_FRAME_SIZE) stw r12, _CCR(r11) /* save various registers */ mflr r12 @@ -124,60 +122,15 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_EMB_HV) stw r2,GPR2(r11) addi r12, r12, STACK_FRAME_REGS_MARKER@l stw r9,_MSR(r11) - li r2, \trapno + 1 + li r2, \trapno stw r12, 8(r11) stw r2,_TRAP(r11) SAVE_GPR(0, r11) SAVE_4GPRS(3, r11) SAVE_2GPRS(7, r11) - addi r11,r1,STACK_FRAME_OVERHEAD addi r2,r10,-THREAD - stw r11,PT_REGS(r10) - /* Check to see if the dbcr0 register is set up to debug. Use the - internal debug mode bit to do this. */ - lwz r12,THREAD_DBCR0(r10) - andis. r12,r12,DBCR0_IDM@h - ACCOUNT_CPU_USER_ENTRY(r2, r11, r12) - beq+ 3f - /* From user and task is ptraced - load up global dbcr0 */ - li r12,-1 /* clear all pending debug events */ - mtspr SPRN_DBSR,r12 - lis r11,global_dbcr0@ha - tophys(r11,r11) - addi r11,r11,global_dbcr0@l -#ifdef CONFIG_SMP - lwz r10, TASK_CPU(r2) - slwi r10, r10, 3 - add r11, r11, r10 -#endif - lwz r12,0(r11) - mtspr SPRN_DBCR0,r12 - lwz r12,4(r11) - addi r12,r12,-1 - stw r12,4(r11) - -3: - tovirt(r2, r2) /* set r2 to current */ - lis r11, transfer_to_syscall@h - ori r11, r11, transfer_to_syscall@l -#ifdef CONFIG_TRACE_IRQFLAGS - /* - * If MSR is changing we need to keep interrupts disabled at this point - * otherwise we might risk taking an interrupt before we tell lockdep - * they are enabled. - */ - lis r10, MSR_KERNEL@h - ori r10, r10, MSR_KERNEL@l - rlwimi r10, r9, 0, MSR_EE -#else - lis r10, (MSR_KERNEL | MSR_EE)@h - ori r10, r10, (MSR_KERNEL | MSR_EE)@l -#endif - mtspr SPRN_SRR1,r10 - mtspr SPRN_SRR0,r11 - rfi /* jump to handler, enable MMU */ -99: b ret_from_kernel_syscall + b transfer_to_syscall /* jump to handler */ .endm /* To handle the additional exception priority levels on 40x and Book-E @@ -406,6 +359,7 @@ label: \ /* continue normal handling for a debug exception... */ \ 2: mfspr r4,SPRN_DBSR; \ + stw r4,_ESR(r11); /* DebugException takes DBSR in _ESR */\ addi r3,r1,STACK_FRAME_OVERHEAD; \ EXC_XFER_TEMPLATE(DebugException, 0x2008, (MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)), debug_transfer_to_handler, ret_from_debug_exc) @@ -459,6 +413,7 @@ label: \ /* continue normal handling for a critical exception... */ \ 2: mfspr r4,SPRN_DBSR; \ + stw r4,_ESR(r11); /* DebugException takes DBSR in _ESR */\ addi r3,r1,STACK_FRAME_OVERHEAD; \ EXC_XFER_TEMPLATE(DebugException, 0x2002, (MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)), crit_transfer_to_handler, ret_from_crit_exc) @@ -476,9 +431,7 @@ label: NORMAL_EXCEPTION_PROLOG(INST_STORAGE); \ mfspr r5,SPRN_ESR; /* Grab the ESR and save it */ \ stw r5,_ESR(r11); \ - mr r4,r12; /* Pass SRR0 as arg2 */ \ - stw r4, _DEAR(r11); \ - li r5,0; /* Pass zero as arg3 */ \ + stw r12, _DEAR(r11); /* Pass SRR0 as arg2 */ \ EXC_XFER_LITE(0x0400, handle_page_fault) #define ALIGNMENT_EXCEPTION \ diff --git a/arch/powerpc/kernel/head_fsl_booke.S b/arch/powerpc/kernel/head_fsl_booke.S index fdd4d274c245..3f4a40cccef5 100644 --- a/arch/powerpc/kernel/head_fsl_booke.S +++ b/arch/powerpc/kernel/head_fsl_booke.S @@ -364,12 +364,12 @@ interrupt_base: /* Data Storage Interrupt */ START_EXCEPTION(DataStorage) NORMAL_EXCEPTION_PROLOG(DATA_STORAGE) - mfspr r5,SPRN_ESR /* Grab the ESR, save it, pass arg3 */ + mfspr r5,SPRN_ESR /* Grab the ESR, save it */ stw r5,_ESR(r11) - mfspr r4,SPRN_DEAR /* Grab the DEAR, save it, pass arg2 */ + mfspr r4,SPRN_DEAR /* Grab the DEAR, save it */ + stw r4, _DEAR(r11) andis. r10,r5,(ESR_ILK|ESR_DLK)@h bne 1f - stw r4, _DEAR(r11) EXC_XFER_LITE(0x0300, handle_page_fault) 1: addi r3,r1,STACK_FRAME_OVERHEAD diff --git a/arch/powerpc/kernel/idle_book3s.S b/arch/powerpc/kernel/idle_book3s.S index 22f249b6f58d..f9e6d83e6720 100644 --- a/arch/powerpc/kernel/idle_book3s.S +++ b/arch/powerpc/kernel/idle_book3s.S @@ -52,28 +52,32 @@ _GLOBAL(isa300_idle_stop_mayloss) std r1,PACAR1(r13) mflr r4 mfcr r5 - /* use stack red zone rather than a new frame for saving regs */ - std r2,-8*0(r1) - std r14,-8*1(r1) - std r15,-8*2(r1) - std r16,-8*3(r1) - std r17,-8*4(r1) - std r18,-8*5(r1) - std r19,-8*6(r1) - std r20,-8*7(r1) - std r21,-8*8(r1) - std r22,-8*9(r1) - std r23,-8*10(r1) - std r24,-8*11(r1) - std r25,-8*12(r1) - std r26,-8*13(r1) - std r27,-8*14(r1) - std r28,-8*15(r1) - std r29,-8*16(r1) - std r30,-8*17(r1) - std r31,-8*18(r1) - std r4,-8*19(r1) - std r5,-8*20(r1) + /* + * Use the stack red zone rather than a new frame for saving regs since + * in the case of no GPR loss the wakeup code branches directly back to + * the caller without deallocating the stack frame first. + */ + std r2,-8*1(r1) + std r14,-8*2(r1) + std r15,-8*3(r1) + std r16,-8*4(r1) + std r17,-8*5(r1) + std r18,-8*6(r1) + std r19,-8*7(r1) + std r20,-8*8(r1) + std r21,-8*9(r1) + std r22,-8*10(r1) + std r23,-8*11(r1) + std r24,-8*12(r1) + std r25,-8*13(r1) + std r26,-8*14(r1) + std r27,-8*15(r1) + std r28,-8*16(r1) + std r29,-8*17(r1) + std r30,-8*18(r1) + std r31,-8*19(r1) + std r4,-8*20(r1) + std r5,-8*21(r1) /* 168 bytes */ PPC_STOP b . /* catch bugs */ @@ -89,8 +93,8 @@ _GLOBAL(isa300_idle_stop_mayloss) */ _GLOBAL(idle_return_gpr_loss) ld r1,PACAR1(r13) - ld r4,-8*19(r1) - ld r5,-8*20(r1) + ld r4,-8*20(r1) + ld r5,-8*21(r1) mtlr r4 mtcr r5 /* @@ -98,25 +102,25 @@ _GLOBAL(idle_return_gpr_loss) * from PACATOC. This could be avoided for that less common case * if KVM saved its r2. */ - ld r2,-8*0(r1) - ld r14,-8*1(r1) - ld r15,-8*2(r1) - ld r16,-8*3(r1) - ld r17,-8*4(r1) - ld r18,-8*5(r1) - ld r19,-8*6(r1) - ld r20,-8*7(r1) - ld r21,-8*8(r1) - ld r22,-8*9(r1) - ld r23,-8*10(r1) - ld r24,-8*11(r1) - ld r25,-8*12(r1) - ld r26,-8*13(r1) - ld r27,-8*14(r1) - ld r28,-8*15(r1) - ld r29,-8*16(r1) - ld r30,-8*17(r1) - ld r31,-8*18(r1) + ld r2,-8*1(r1) + ld r14,-8*2(r1) + ld r15,-8*3(r1) + ld r16,-8*4(r1) + ld r17,-8*5(r1) + ld r18,-8*6(r1) + ld r19,-8*7(r1) + ld r20,-8*8(r1) + ld r21,-8*9(r1) + ld r22,-8*10(r1) + ld r23,-8*11(r1) + ld r24,-8*12(r1) + ld r25,-8*13(r1) + ld r26,-8*14(r1) + ld r27,-8*15(r1) + ld r28,-8*16(r1) + ld r29,-8*17(r1) + ld r30,-8*18(r1) + ld r31,-8*19(r1) blr /* @@ -154,28 +158,32 @@ _GLOBAL(isa206_idle_insn_mayloss) std r1,PACAR1(r13) mflr r4 mfcr r5 - /* use stack red zone rather than a new frame for saving regs */ - std r2,-8*0(r1) - std r14,-8*1(r1) - std r15,-8*2(r1) - std r16,-8*3(r1) - std r17,-8*4(r1) - std r18,-8*5(r1) - std r19,-8*6(r1) - std r20,-8*7(r1) - std r21,-8*8(r1) - std r22,-8*9(r1) - std r23,-8*10(r1) - std r24,-8*11(r1) - std r25,-8*12(r1) - std r26,-8*13(r1) - std r27,-8*14(r1) - std r28,-8*15(r1) - std r29,-8*16(r1) - std r30,-8*17(r1) - std r31,-8*18(r1) - std r4,-8*19(r1) - std r5,-8*20(r1) + /* + * Use the stack red zone rather than a new frame for saving regs since + * in the case of no GPR loss the wakeup code branches directly back to + * the caller without deallocating the stack frame first. + */ + std r2,-8*1(r1) + std r14,-8*2(r1) + std r15,-8*3(r1) + std r16,-8*4(r1) + std r17,-8*5(r1) + std r18,-8*6(r1) + std r19,-8*7(r1) + std r20,-8*8(r1) + std r21,-8*9(r1) + std r22,-8*10(r1) + std r23,-8*11(r1) + std r24,-8*12(r1) + std r25,-8*13(r1) + std r26,-8*14(r1) + std r27,-8*15(r1) + std r28,-8*16(r1) + std r29,-8*17(r1) + std r30,-8*18(r1) + std r31,-8*19(r1) + std r4,-8*20(r1) + std r5,-8*21(r1) cmpwi r3,PNV_THREAD_NAP bne 1f IDLE_STATE_ENTER_SEQ_NORET(PPC_NAP) diff --git a/arch/powerpc/kernel/syscall_64.c b/arch/powerpc/kernel/interrupt.c index 7c85ed04a164..398cd86b6ada 100644 --- a/arch/powerpc/kernel/syscall_64.c +++ b/arch/powerpc/kernel/interrupt.c @@ -1,10 +1,15 @@ // SPDX-License-Identifier: GPL-2.0-or-later +#include <linux/context_tracking.h> #include <linux/err.h> +#include <linux/compat.h> + #include <asm/asm-prototypes.h> #include <asm/kup.h> #include <asm/cputime.h> +#include <asm/interrupt.h> #include <asm/hw_irq.h> +#include <asm/interrupt.h> #include <asm/kprobes.h> #include <asm/paca.h> #include <asm/ptrace.h> @@ -24,16 +29,21 @@ notrace long system_call_exception(long r3, long r4, long r5, { syscall_fn f; + regs->orig_gpr3 = r3; + if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG)) BUG_ON(irq_soft_mask_return() != IRQS_ALL_DISABLED); + CT_WARN_ON(ct_state() == CONTEXT_KERNEL); + user_exit_irqoff(); + trace_hardirqs_off(); /* finish reconciling */ - if (IS_ENABLED(CONFIG_PPC_BOOK3S)) + if (!IS_ENABLED(CONFIG_BOOKE) && !IS_ENABLED(CONFIG_40x)) BUG_ON(!(regs->msr & MSR_RI)); BUG_ON(!(regs->msr & MSR_PR)); BUG_ON(!FULL_REGS(regs)); - BUG_ON(regs->softe != IRQS_ENABLED); + BUG_ON(arch_irq_disabled_regs(regs)); #ifdef CONFIG_PPC_PKEY if (mmu_has_feature(MMU_FTR_PKEY)) { @@ -59,19 +69,15 @@ notrace long system_call_exception(long r3, long r4, long r5, isync(); } else #endif +#ifdef CONFIG_PPC64 kuap_check_amr(); +#endif - account_cpu_user_entry(); + booke_restore_dbcr0(); -#ifdef CONFIG_PPC_SPLPAR - if (IS_ENABLED(CONFIG_VIRT_CPU_ACCOUNTING_NATIVE) && - firmware_has_feature(FW_FEATURE_SPLPAR)) { - struct lppaca *lp = local_paca->lppaca_ptr; + account_cpu_user_entry(); - if (unlikely(local_paca->dtl_ridx != be64_to_cpu(lp->dtl_idx))) - accumulate_stolen_time(); - } -#endif + account_stolen_time(); /* * This is not required for the syscall exit path, but makes the @@ -79,12 +85,12 @@ notrace long system_call_exception(long r3, long r4, long r5, * frame, or if the unwinder was taught the first stack frame always * returns to user with IRQS_ENABLED, this store could be avoided! */ - regs->softe = IRQS_ENABLED; + irq_soft_mask_regs_set_state(regs, IRQS_ENABLED); local_irq_enable(); if (unlikely(current_thread_info()->flags & _TIF_SYSCALL_DOTRACE)) { - if (unlikely(regs->trap == 0x7ff0)) { + if (unlikely(trap_is_unsupported_scv(regs))) { /* Unsupported scv vector */ _exception(SIGILL, regs, ILL_ILLOPC, regs->nip); return regs->gpr[3]; @@ -107,7 +113,7 @@ notrace long system_call_exception(long r3, long r4, long r5, r8 = regs->gpr[8]; } else if (unlikely(r0 >= NR_syscalls)) { - if (unlikely(regs->trap == 0x7ff0)) { + if (unlikely(trap_is_unsupported_scv(regs))) { /* Unsupported scv vector */ _exception(SIGILL, regs, ILL_ILLOPC, regs->nip); return regs->gpr[3]; @@ -118,7 +124,7 @@ notrace long system_call_exception(long r3, long r4, long r5, /* May be faster to do array_index_nospec? */ barrier_nospec(); - if (unlikely(is_32bit_task())) { + if (unlikely(is_compat_task())) { f = (void *)compat_sys_call_table[r0]; r3 &= 0x00000000ffffffffULL; @@ -138,8 +144,12 @@ notrace long system_call_exception(long r3, long r4, long r5, /* * local irqs must be disabled. Returns false if the caller must re-enable * them, check for new work, and try again. + * + * This should be called with local irqs disabled, but if they were previously + * enabled when the interrupt handler returns (indicating a process-context / + * synchronous interrupt) then irqs_enabled should be true. */ -static notrace inline bool prep_irq_for_enabled_exit(bool clear_ri) +static notrace inline bool __prep_irq_for_enabled_exit(bool clear_ri) { /* This must be done with RI=1 because tracing may touch vmaps */ trace_hardirqs_on(); @@ -149,6 +159,7 @@ static notrace inline bool prep_irq_for_enabled_exit(bool clear_ri) __hard_EE_RI_disable(); else __hard_irq_disable(); +#ifdef CONFIG_PPC64 if (unlikely(lazy_irq_pending_nocheck())) { /* Took an interrupt, may have more exit work to do. */ if (clear_ri) @@ -160,10 +171,63 @@ static notrace inline bool prep_irq_for_enabled_exit(bool clear_ri) } local_paca->irq_happened = 0; irq_soft_mask_set(IRQS_ENABLED); - +#endif return true; } +static notrace inline bool prep_irq_for_enabled_exit(bool clear_ri, bool irqs_enabled) +{ + if (__prep_irq_for_enabled_exit(clear_ri)) + return true; + + /* + * Must replay pending soft-masked interrupts now. Don't just + * local_irq_enabe(); local_irq_disable(); because if we are + * returning from an asynchronous interrupt here, another one + * might hit after irqs are enabled, and it would exit via this + * same path allowing another to fire, and so on unbounded. + * + * If interrupts were enabled when this interrupt exited, + * indicating a process context (synchronous) interrupt, + * local_irq_enable/disable can be used, which will enable + * interrupts rather than keeping them masked (unclear how + * much benefit this is over just replaying for all cases, + * because we immediately disable again, so all we're really + * doing is allowing hard interrupts to execute directly for + * a very small time, rather than being masked and replayed). + */ + if (irqs_enabled) { + local_irq_enable(); + local_irq_disable(); + } else { + replay_soft_interrupts(); + } + + return false; +} + +static notrace void booke_load_dbcr0(void) +{ +#ifdef CONFIG_PPC_ADV_DEBUG_REGS + unsigned long dbcr0 = current->thread.debug.dbcr0; + + if (likely(!(dbcr0 & DBCR0_IDM))) + return; + + /* + * Check to see if the dbcr0 register is set up to debug. + * Use the internal debug mode bit to do this. + */ + mtmsr(mfmsr() & ~MSR_DE); + if (IS_ENABLED(CONFIG_PPC32)) { + isync(); + global_dbcr0[smp_processor_id()] = mfspr(SPRN_DBCR0); + } + mtspr(SPRN_DBCR0, dbcr0); + mtspr(SPRN_DBSR, -1); +#endif +} + /* * This should be called after a syscall returns, with r3 the return value * from the syscall. If this function returns non-zero, the system call @@ -177,20 +241,24 @@ notrace unsigned long syscall_exit_prepare(unsigned long r3, struct pt_regs *regs, long scv) { - unsigned long *ti_flagsp = ¤t_thread_info()->flags; unsigned long ti_flags; unsigned long ret = 0; + bool is_not_scv = !IS_ENABLED(CONFIG_PPC_BOOK3S_64) || !scv; + CT_WARN_ON(ct_state() == CONTEXT_USER); + +#ifdef CONFIG_PPC64 kuap_check_amr(); +#endif regs->result = r3; /* Check whether the syscall is issued inside a restartable sequence */ rseq_syscall(regs); - ti_flags = *ti_flagsp; + ti_flags = current_thread_info()->flags; - if (unlikely(r3 >= (unsigned long)-MAX_ERRNO) && !scv) { + if (unlikely(r3 >= (unsigned long)-MAX_ERRNO) && is_not_scv) { if (likely(!(ti_flags & (_TIF_NOERROR | _TIF_RESTOREALL)))) { r3 = -r3; regs->ccr |= 0x10000000; /* Set SO bit in CR */ @@ -202,7 +270,7 @@ notrace unsigned long syscall_exit_prepare(unsigned long r3, ret = _TIF_RESTOREALL; else regs->gpr[3] = r3; - clear_bits(_TIF_PERSYSCALL_MASK, ti_flagsp); + clear_bits(_TIF_PERSYSCALL_MASK, ¤t_thread_info()->flags); } else { regs->gpr[3] = r3; } @@ -212,9 +280,10 @@ notrace unsigned long syscall_exit_prepare(unsigned long r3, ret |= _TIF_RESTOREALL; } -again: local_irq_disable(); - ti_flags = READ_ONCE(*ti_flagsp); + +again: + ti_flags = READ_ONCE(current_thread_info()->flags); while (unlikely(ti_flags & (_TIF_USER_WORK_MASK & ~_TIF_RESTORE_TM))) { local_irq_enable(); if (ti_flags & _TIF_NEED_RESCHED) { @@ -230,7 +299,7 @@ again: do_notify_resume(regs, ti_flags); } local_irq_disable(); - ti_flags = READ_ONCE(*ti_flagsp); + ti_flags = READ_ONCE(current_thread_info()->flags); } if (IS_ENABLED(CONFIG_PPC_BOOK3S) && IS_ENABLED(CONFIG_PPC_FPU)) { @@ -257,9 +326,13 @@ again: } } + user_enter_irqoff(); + /* scv need not set RI=0 because SRRs are not used */ - if (unlikely(!prep_irq_for_enabled_exit(!scv))) { + if (unlikely(!__prep_irq_for_enabled_exit(is_not_scv))) { + user_exit_irqoff(); local_irq_enable(); + local_irq_disable(); goto again; } @@ -267,9 +340,11 @@ again: local_paca->tm_scratch = regs->msr; #endif + booke_load_dbcr0(); + account_cpu_user_exit(); -#ifdef CONFIG_PPC_BOOK3S /* BOOK3E not yet using this */ +#ifdef CONFIG_PPC_BOOK3S_64 /* BOOK3E and ppc32 not using this */ /* * We do this at the end so that we do context switch with KERNEL AMR */ @@ -278,33 +353,32 @@ again: return ret; } -#ifdef CONFIG_PPC_BOOK3S /* BOOK3E not yet using this */ +#ifndef CONFIG_PPC_BOOK3E_64 /* BOOK3E not yet using this */ notrace unsigned long interrupt_exit_user_prepare(struct pt_regs *regs, unsigned long msr) { -#ifdef CONFIG_PPC_BOOK3E - struct thread_struct *ts = ¤t->thread; -#endif - unsigned long *ti_flagsp = ¤t_thread_info()->flags; unsigned long ti_flags; unsigned long flags; unsigned long ret = 0; - if (IS_ENABLED(CONFIG_PPC_BOOK3S)) + if (!IS_ENABLED(CONFIG_BOOKE) && !IS_ENABLED(CONFIG_40x)) BUG_ON(!(regs->msr & MSR_RI)); BUG_ON(!(regs->msr & MSR_PR)); BUG_ON(!FULL_REGS(regs)); - BUG_ON(regs->softe != IRQS_ENABLED); + BUG_ON(arch_irq_disabled_regs(regs)); + CT_WARN_ON(ct_state() == CONTEXT_USER); /* * We don't need to restore AMR on the way back to userspace for KUAP. * AMR can only have been unlocked if we interrupted the kernel. */ +#ifdef CONFIG_PPC64 kuap_check_amr(); +#endif local_irq_save(flags); again: - ti_flags = READ_ONCE(*ti_flagsp); + ti_flags = READ_ONCE(current_thread_info()->flags); while (unlikely(ti_flags & (_TIF_USER_WORK_MASK & ~_TIF_RESTORE_TM))) { local_irq_enable(); /* returning to user: may enable */ if (ti_flags & _TIF_NEED_RESCHED) { @@ -315,7 +389,7 @@ again: do_notify_resume(regs, ti_flags); } local_irq_disable(); - ti_flags = READ_ONCE(*ti_flagsp); + ti_flags = READ_ONCE(current_thread_info()->flags); } if (IS_ENABLED(CONFIG_PPC_BOOK3S) && IS_ENABLED(CONFIG_PPC_FPU)) { @@ -336,23 +410,16 @@ again: } } - if (unlikely(!prep_irq_for_enabled_exit(true))) { + user_enter_irqoff(); + + if (unlikely(!__prep_irq_for_enabled_exit(true))) { + user_exit_irqoff(); local_irq_enable(); local_irq_disable(); goto again; } -#ifdef CONFIG_PPC_BOOK3E - if (unlikely(ts->debug.dbcr0 & DBCR0_IDM)) { - /* - * Check to see if the dbcr0 register is set up to debug. - * Use the internal debug mode bit to do this. - */ - mtmsr(mfmsr() & ~MSR_DE); - mtspr(SPRN_DBCR0, ts->debug.dbcr0); - mtspr(SPRN_DBSR, -1); - } -#endif + booke_load_dbcr0(); #ifdef CONFIG_PPC_TRANSACTIONAL_MEM local_paca->tm_scratch = regs->msr; @@ -363,7 +430,9 @@ again: /* * We do this at the end so that we do context switch with KERNEL AMR */ +#ifdef CONFIG_PPC64 kuap_user_restore(regs); +#endif return ret; } @@ -372,56 +441,56 @@ void preempt_schedule_irq(void); notrace unsigned long interrupt_exit_kernel_prepare(struct pt_regs *regs, unsigned long msr) { - unsigned long *ti_flagsp = ¤t_thread_info()->flags; unsigned long flags; unsigned long ret = 0; +#ifdef CONFIG_PPC64 unsigned long amr; +#endif - if (IS_ENABLED(CONFIG_PPC_BOOK3S) && unlikely(!(regs->msr & MSR_RI))) + if (!IS_ENABLED(CONFIG_BOOKE) && !IS_ENABLED(CONFIG_40x) && + unlikely(!(regs->msr & MSR_RI))) unrecoverable_exception(regs); BUG_ON(regs->msr & MSR_PR); BUG_ON(!FULL_REGS(regs)); + /* + * CT_WARN_ON comes here via program_check_exception, + * so avoid recursion. + */ + if (TRAP(regs) != 0x700) + CT_WARN_ON(ct_state() == CONTEXT_USER); +#ifdef CONFIG_PPC64 amr = kuap_get_and_check_amr(); +#endif - if (unlikely(*ti_flagsp & _TIF_EMULATE_STACK_STORE)) { - clear_bits(_TIF_EMULATE_STACK_STORE, ti_flagsp); + if (unlikely(current_thread_info()->flags & _TIF_EMULATE_STACK_STORE)) { + clear_bits(_TIF_EMULATE_STACK_STORE, ¤t_thread_info()->flags); ret = 1; } local_irq_save(flags); - if (regs->softe == IRQS_ENABLED) { + if (!arch_irq_disabled_regs(regs)) { /* Returning to a kernel context with local irqs enabled. */ WARN_ON_ONCE(!(regs->msr & MSR_EE)); again: if (IS_ENABLED(CONFIG_PREEMPT)) { /* Return to preemptible kernel context */ - if (unlikely(*ti_flagsp & _TIF_NEED_RESCHED)) { + if (unlikely(current_thread_info()->flags & _TIF_NEED_RESCHED)) { if (preempt_count() == 0) preempt_schedule_irq(); } } - if (unlikely(!prep_irq_for_enabled_exit(true))) { - /* - * Can't local_irq_restore to replay if we were in - * interrupt context. Must replay directly. - */ - if (irqs_disabled_flags(flags)) { - replay_soft_interrupts(); - } else { - local_irq_restore(flags); - local_irq_save(flags); - } - /* Took an interrupt, may have more exit work to do. */ + if (unlikely(!prep_irq_for_enabled_exit(true, !irqs_disabled_flags(flags)))) goto again; - } } else { /* Returning to a kernel context with local irqs disabled. */ __hard_EE_RI_disable(); +#ifdef CONFIG_PPC64 if (regs->msr & MSR_EE) local_paca->irq_happened &= ~PACA_IRQ_HARD_DIS; +#endif } @@ -434,7 +503,9 @@ again: * which would cause Read-After-Write stalls. Hence, we take the AMR * value from the check above. */ +#ifdef CONFIG_PPC64 kuap_kernel_restore(regs, amr); +#endif return ret; } diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c index 5b69a6a72a0e..c00214a4355c 100644 --- a/arch/powerpc/kernel/iommu.c +++ b/arch/powerpc/kernel/iommu.c @@ -25,6 +25,7 @@ #include <linux/pci.h> #include <linux/iommu.h> #include <linux/sched.h> +#include <linux/debugfs.h> #include <asm/io.h> #include <asm/prom.h> #include <asm/iommu.h> @@ -38,6 +39,47 @@ #define DBG(...) +#ifdef CONFIG_IOMMU_DEBUGFS +static int iommu_debugfs_weight_get(void *data, u64 *val) +{ + struct iommu_table *tbl = data; + *val = bitmap_weight(tbl->it_map, tbl->it_size); + return 0; +} +DEFINE_DEBUGFS_ATTRIBUTE(iommu_debugfs_fops_weight, iommu_debugfs_weight_get, NULL, "%llu\n"); + +static void iommu_debugfs_add(struct iommu_table *tbl) +{ + char name[10]; + struct dentry *liobn_entry; + + sprintf(name, "%08lx", tbl->it_index); + liobn_entry = debugfs_create_dir(name, iommu_debugfs_dir); + + debugfs_create_file_unsafe("weight", 0400, liobn_entry, tbl, &iommu_debugfs_fops_weight); + debugfs_create_ulong("it_size", 0400, liobn_entry, &tbl->it_size); + debugfs_create_ulong("it_page_shift", 0400, liobn_entry, &tbl->it_page_shift); + debugfs_create_ulong("it_reserved_start", 0400, liobn_entry, &tbl->it_reserved_start); + debugfs_create_ulong("it_reserved_end", 0400, liobn_entry, &tbl->it_reserved_end); + debugfs_create_ulong("it_indirect_levels", 0400, liobn_entry, &tbl->it_indirect_levels); + debugfs_create_ulong("it_level_size", 0400, liobn_entry, &tbl->it_level_size); +} + +static void iommu_debugfs_del(struct iommu_table *tbl) +{ + char name[10]; + struct dentry *liobn_entry; + + sprintf(name, "%08lx", tbl->it_index); + liobn_entry = debugfs_lookup(name, iommu_debugfs_dir); + if (liobn_entry) + debugfs_remove(liobn_entry); +} +#else +static void iommu_debugfs_add(struct iommu_table *tbl){} +static void iommu_debugfs_del(struct iommu_table *tbl){} +#endif + static int novmerge; static void __iommu_free(struct iommu_table *, dma_addr_t, unsigned int); @@ -725,6 +767,8 @@ struct iommu_table *iommu_init_table(struct iommu_table *tbl, int nid, welcomed = 1; } + iommu_debugfs_add(tbl); + return tbl; } @@ -744,6 +788,8 @@ static void iommu_table_free(struct kref *kref) return; } + iommu_debugfs_del(tbl); + iommu_table_release_pages(tbl); /* verify that table contains no entries */ diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index cc7a6271b6b4..086b0a743329 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -54,6 +54,7 @@ #include <linux/pgtable.h> #include <linux/uaccess.h> +#include <asm/interrupt.h> #include <asm/io.h> #include <asm/irq.h> #include <asm/cache.h> @@ -269,6 +270,31 @@ again: } } +#if defined(CONFIG_PPC_BOOK3S_64) && defined(CONFIG_PPC_KUAP) +static inline void replay_soft_interrupts_irqrestore(void) +{ + unsigned long kuap_state = get_kuap(); + + /* + * Check if anything calls local_irq_enable/restore() when KUAP is + * disabled (user access enabled). We handle that case here by saving + * and re-locking AMR but we shouldn't get here in the first place, + * hence the warning. + */ + kuap_check_amr(); + + if (kuap_state != AMR_KUAP_BLOCKED) + set_kuap(AMR_KUAP_BLOCKED); + + replay_soft_interrupts(); + + if (kuap_state != AMR_KUAP_BLOCKED) + set_kuap(kuap_state); +} +#else +#define replay_soft_interrupts_irqrestore() replay_soft_interrupts() +#endif + notrace void arch_local_irq_restore(unsigned long mask) { unsigned char irq_happened; @@ -332,7 +358,7 @@ notrace void arch_local_irq_restore(unsigned long mask) irq_soft_mask_set(IRQS_ALL_DISABLED); trace_hardirqs_off(); - replay_soft_interrupts(); + replay_soft_interrupts_irqrestore(); local_paca->irq_happened = 0; trace_hardirqs_on(); @@ -644,8 +670,6 @@ void __do_irq(struct pt_regs *regs) { unsigned int irq; - irq_enter(); - trace_irq_entry(regs); /* @@ -665,11 +689,9 @@ void __do_irq(struct pt_regs *regs) generic_handle_irq(irq); trace_irq_exit(regs); - - irq_exit(); } -void do_IRQ(struct pt_regs *regs) +DEFINE_INTERRUPT_HANDLER_ASYNC(do_IRQ) { struct pt_regs *old_regs = set_irq_regs(regs); void *cursp, *irqsp, *sirqsp; diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c index 9f3e133b57b7..11f0cae086ed 100644 --- a/arch/powerpc/kernel/mce.c +++ b/arch/powerpc/kernel/mce.c @@ -17,22 +17,15 @@ #include <linux/irq_work.h> #include <linux/extable.h> #include <linux/ftrace.h> +#include <linux/memblock.h> +#include <asm/interrupt.h> #include <asm/machdep.h> #include <asm/mce.h> #include <asm/nmi.h> +#include <asm/asm-prototypes.h> -static DEFINE_PER_CPU(int, mce_nest_count); -static DEFINE_PER_CPU(struct machine_check_event[MAX_MC_EVT], mce_event); - -/* Queue for delayed MCE events. */ -static DEFINE_PER_CPU(int, mce_queue_count); -static DEFINE_PER_CPU(struct machine_check_event[MAX_MC_EVT], mce_event_queue); - -/* Queue for delayed MCE UE events. */ -static DEFINE_PER_CPU(int, mce_ue_count); -static DEFINE_PER_CPU(struct machine_check_event[MAX_MC_EVT], - mce_ue_event_queue); +#include "setup.h" static void machine_check_process_queued_event(struct irq_work *work); static void machine_check_ue_irq_work(struct irq_work *work); @@ -103,9 +96,10 @@ void save_mce_event(struct pt_regs *regs, long handled, struct mce_error_info *mce_err, uint64_t nip, uint64_t addr, uint64_t phys_addr) { - int index = __this_cpu_inc_return(mce_nest_count) - 1; - struct machine_check_event *mce = this_cpu_ptr(&mce_event[index]); + int index = local_paca->mce_info->mce_nest_count++; + struct machine_check_event *mce; + mce = &local_paca->mce_info->mce_event[index]; /* * Return if we don't have enough space to log mce event. * mce_nest_count may go beyond MAX_MC_EVT but that's ok, @@ -191,7 +185,7 @@ void save_mce_event(struct pt_regs *regs, long handled, */ int get_mce_event(struct machine_check_event *mce, bool release) { - int index = __this_cpu_read(mce_nest_count) - 1; + int index = local_paca->mce_info->mce_nest_count - 1; struct machine_check_event *mc_evt; int ret = 0; @@ -201,7 +195,7 @@ int get_mce_event(struct machine_check_event *mce, bool release) /* Check if we have MCE info to process. */ if (index < MAX_MC_EVT) { - mc_evt = this_cpu_ptr(&mce_event[index]); + mc_evt = &local_paca->mce_info->mce_event[index]; /* Copy the event structure and release the original */ if (mce) *mce = *mc_evt; @@ -211,7 +205,7 @@ int get_mce_event(struct machine_check_event *mce, bool release) } /* Decrement the count to free the slot. */ if (release) - __this_cpu_dec(mce_nest_count); + local_paca->mce_info->mce_nest_count--; return ret; } @@ -233,13 +227,14 @@ static void machine_check_ue_event(struct machine_check_event *evt) { int index; - index = __this_cpu_inc_return(mce_ue_count) - 1; + index = local_paca->mce_info->mce_ue_count++; /* If queue is full, just return for now. */ if (index >= MAX_MC_EVT) { - __this_cpu_dec(mce_ue_count); + local_paca->mce_info->mce_ue_count--; return; } - memcpy(this_cpu_ptr(&mce_ue_event_queue[index]), evt, sizeof(*evt)); + memcpy(&local_paca->mce_info->mce_ue_event_queue[index], + evt, sizeof(*evt)); /* Queue work to process this event later. */ irq_work_queue(&mce_ue_event_irq_work); @@ -256,13 +251,14 @@ void machine_check_queue_event(void) if (!get_mce_event(&evt, MCE_EVENT_RELEASE)) return; - index = __this_cpu_inc_return(mce_queue_count) - 1; + index = local_paca->mce_info->mce_queue_count++; /* If queue is full, just return for now. */ if (index >= MAX_MC_EVT) { - __this_cpu_dec(mce_queue_count); + local_paca->mce_info->mce_queue_count--; return; } - memcpy(this_cpu_ptr(&mce_event_queue[index]), &evt, sizeof(evt)); + memcpy(&local_paca->mce_info->mce_event_queue[index], + &evt, sizeof(evt)); /* Queue irq work to process this event later. */ irq_work_queue(&mce_event_process_work); @@ -289,9 +285,9 @@ static void machine_process_ue_event(struct work_struct *work) int index; struct machine_check_event *evt; - while (__this_cpu_read(mce_ue_count) > 0) { - index = __this_cpu_read(mce_ue_count) - 1; - evt = this_cpu_ptr(&mce_ue_event_queue[index]); + while (local_paca->mce_info->mce_ue_count > 0) { + index = local_paca->mce_info->mce_ue_count - 1; + evt = &local_paca->mce_info->mce_ue_event_queue[index]; blocking_notifier_call_chain(&mce_notifier_list, 0, evt); #ifdef CONFIG_MEMORY_FAILURE /* @@ -304,7 +300,7 @@ static void machine_process_ue_event(struct work_struct *work) */ if (evt->error_type == MCE_ERROR_TYPE_UE) { if (evt->u.ue_error.ignore_event) { - __this_cpu_dec(mce_ue_count); + local_paca->mce_info->mce_ue_count--; continue; } @@ -320,7 +316,7 @@ static void machine_process_ue_event(struct work_struct *work) "was generated\n"); } #endif - __this_cpu_dec(mce_ue_count); + local_paca->mce_info->mce_ue_count--; } } /* @@ -338,17 +334,17 @@ static void machine_check_process_queued_event(struct irq_work *work) * For now just print it to console. * TODO: log this error event to FSP or nvram. */ - while (__this_cpu_read(mce_queue_count) > 0) { - index = __this_cpu_read(mce_queue_count) - 1; - evt = this_cpu_ptr(&mce_event_queue[index]); + while (local_paca->mce_info->mce_queue_count > 0) { + index = local_paca->mce_info->mce_queue_count - 1; + evt = &local_paca->mce_info->mce_event_queue[index]; if (evt->error_type == MCE_ERROR_TYPE_UE && evt->u.ue_error.ignore_event) { - __this_cpu_dec(mce_queue_count); + local_paca->mce_info->mce_queue_count--; continue; } machine_check_print_event_info(evt, false, false); - __this_cpu_dec(mce_queue_count); + local_paca->mce_info->mce_queue_count--; } } @@ -588,15 +584,9 @@ EXPORT_SYMBOL_GPL(machine_check_print_event_info); * * regs->nip and regs->msr contains srr0 and ssr1. */ -long notrace machine_check_early(struct pt_regs *regs) +DEFINE_INTERRUPT_HANDLER_NMI(machine_check_early) { long handled = 0; - u8 ftrace_enabled = this_cpu_get_ftrace_enabled(); - - this_cpu_set_ftrace_enabled(0); - /* Do not use nmi_enter/exit for pseries hpte guest */ - if (radix_enabled() || !firmware_has_feature(FW_FEATURE_LPAR)) - nmi_enter(); hv_nmi_check_nonrecoverable(regs); @@ -606,11 +596,6 @@ long notrace machine_check_early(struct pt_regs *regs) if (ppc_md.machine_check_early) handled = ppc_md.machine_check_early(regs); - if (radix_enabled() || !firmware_has_feature(FW_FEATURE_LPAR)) - nmi_exit(); - - this_cpu_set_ftrace_enabled(ftrace_enabled); - return handled; } @@ -722,7 +707,7 @@ long hmi_handle_debugtrig(struct pt_regs *regs) /* * Return values: */ -long hmi_exception_realmode(struct pt_regs *regs) +DEFINE_INTERRUPT_HANDLER_NMI(hmi_exception_realmode) { int ret; @@ -741,3 +726,24 @@ long hmi_exception_realmode(struct pt_regs *regs) return 1; } + +void __init mce_init(void) +{ + struct mce_info *mce_info; + u64 limit; + int i; + + limit = min(ppc64_bolted_size(), ppc64_rma_size); + for_each_possible_cpu(i) { + mce_info = memblock_alloc_try_nid(sizeof(*mce_info), + __alignof__(*mce_info), + MEMBLOCK_LOW_LIMIT, + limit, cpu_to_node(i)); + if (!mce_info) + goto err; + paca_ptrs[i]->mce_info = mce_info; + } + return; +err: + panic("Failed to allocate memory for MCE event data\n"); +} diff --git a/arch/powerpc/kernel/optprobes.c b/arch/powerpc/kernel/optprobes.c index 69bfe96884e2..7f7cdbeacd1a 100644 --- a/arch/powerpc/kernel/optprobes.c +++ b/arch/powerpc/kernel/optprobes.c @@ -142,29 +142,10 @@ void arch_remove_optimized_kprobe(struct optimized_kprobe *op) } /* - * emulate_step() requires insn to be emulated as - * second parameter. Load register 'r4' with the - * instruction. - */ -void patch_imm32_load_insns(unsigned int val, kprobe_opcode_t *addr) -{ - /* addis r4,0,(insn)@h */ - patch_instruction((struct ppc_inst *)addr, - ppc_inst(PPC_INST_ADDIS | ___PPC_RT(4) | - ((val >> 16) & 0xffff))); - addr++; - - /* ori r4,r4,(insn)@l */ - patch_instruction((struct ppc_inst *)addr, - ppc_inst(PPC_INST_ORI | ___PPC_RA(4) | - ___PPC_RS(4) | (val & 0xffff))); -} - -/* * Generate instructions to load provided immediate 64-bit value * to register 'reg' and patch these instructions at 'addr'. */ -void patch_imm64_load_insns(unsigned long val, int reg, kprobe_opcode_t *addr) +static void patch_imm64_load_insns(unsigned long val, int reg, kprobe_opcode_t *addr) { /* lis reg,(op)@highest */ patch_instruction((struct ppc_inst *)addr, diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c index 2b555997b295..001e90cd8948 100644 --- a/arch/powerpc/kernel/pci-common.c +++ b/arch/powerpc/kernel/pci-common.c @@ -1699,3 +1699,13 @@ static void fixup_hide_host_resource_fsl(struct pci_dev *dev) } DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MOTOROLA, PCI_ANY_ID, fixup_hide_host_resource_fsl); DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_FREESCALE, PCI_ANY_ID, fixup_hide_host_resource_fsl); + + +static int __init discover_phbs(void) +{ + if (ppc_md.discover_phbs) + ppc_md.discover_phbs(); + + return 0; +} +core_initcall(discover_phbs); diff --git a/arch/powerpc/kernel/pci_dn.c b/arch/powerpc/kernel/pci_dn.c index e99b7c547d7e..61571ae23953 100644 --- a/arch/powerpc/kernel/pci_dn.c +++ b/arch/powerpc/kernel/pci_dn.c @@ -443,46 +443,6 @@ void *pci_traverse_device_nodes(struct device_node *start, } EXPORT_SYMBOL_GPL(pci_traverse_device_nodes); -static struct pci_dn *pci_dn_next_one(struct pci_dn *root, - struct pci_dn *pdn) -{ - struct list_head *next = pdn->child_list.next; - - if (next != &pdn->child_list) - return list_entry(next, struct pci_dn, list); - - while (1) { - if (pdn == root) - return NULL; - - next = pdn->list.next; - if (next != &pdn->parent->child_list) - break; - - pdn = pdn->parent; - } - - return list_entry(next, struct pci_dn, list); -} - -void *traverse_pci_dn(struct pci_dn *root, - void *(*fn)(struct pci_dn *, void *), - void *data) -{ - struct pci_dn *pdn = root; - void *ret; - - /* Only scan the child nodes */ - for (pdn = pci_dn_next_one(root, pdn); pdn; - pdn = pci_dn_next_one(root, pdn)) { - ret = fn(pdn, data); - if (ret) - return ret; - } - - return NULL; -} - static void *add_pdn(struct device_node *dn, void *data) { struct pci_controller *hose = data; @@ -521,28 +481,6 @@ void pci_devs_phb_init_dynamic(struct pci_controller *phb) pci_traverse_device_nodes(dn, add_pdn, phb); } -/** - * pci_devs_phb_init - Initialize phbs and pci devs under them. - * - * This routine walks over all phb's (pci-host bridges) on the - * system, and sets up assorted pci-related structures - * (including pci info in the device node structs) for each - * pci device found underneath. This routine runs once, - * early in the boot sequence. - */ -static int __init pci_devs_phb_init(void) -{ - struct pci_controller *phb, *tmp; - - /* This must be done first so the device nodes have valid pci info! */ - list_for_each_entry_safe(phb, tmp, &hose_list, list_node) - pci_devs_phb_init_dynamic(phb); - - return 0; -} - -core_initcall(pci_devs_phb_init); - static void pci_dev_pdn_setup(struct pci_dev *pdev) { struct pci_dn *pdn; diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index a66f435dabbf..924d023dad0a 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -41,6 +41,7 @@ #include <linux/pkeys.h> #include <linux/seq_buf.h> +#include <asm/interrupt.h> #include <asm/io.h> #include <asm/processor.h> #include <asm/mmu.h> @@ -659,11 +660,10 @@ static void do_break_handler(struct pt_regs *regs) } } -void do_break (struct pt_regs *regs, unsigned long address, - unsigned long error_code) +DEFINE_INTERRUPT_HANDLER(do_break) { current->thread.trap_nr = TRAP_HWBKPT; - if (notify_die(DIE_DABR_MATCH, "dabr_match", regs, error_code, + if (notify_die(DIE_DABR_MATCH, "dabr_match", regs, regs->dsisr, 11, SIGSEGV) == NOTIFY_STOP) return; @@ -681,7 +681,7 @@ void do_break (struct pt_regs *regs, unsigned long address, do_break_handler(regs); /* Deliver the signal to userspace */ - force_sig_fault(SIGTRAP, TRAP_HWBKPT, (void __user *)address); + force_sig_fault(SIGTRAP, TRAP_HWBKPT, (void __user *)regs->dar); } #endif /* CONFIG_PPC_ADV_DEBUG_REGS */ @@ -2047,6 +2047,9 @@ static inline int valid_emergency_stack(unsigned long sp, struct task_struct *p, unsigned long stack_page; unsigned long cpu = task_cpu(p); + if (!paca_ptrs) + return 0; + stack_page = (unsigned long)paca_ptrs[cpu]->emergency_sp - THREAD_SIZE; if (sp >= stack_page && sp <= stack_page + THREAD_SIZE - nbytes) return 1; @@ -2176,7 +2179,7 @@ void show_stack(struct task_struct *tsk, unsigned long *stack, * See if this is an exception frame. * We look for the "regshere" marker in the current frame. */ - if (validate_sp(sp, tsk, STACK_INT_FRAME_SIZE) + if (validate_sp(sp, tsk, STACK_FRAME_WITH_PT_REGS) && stack[STACK_FRAME_MARKER] == STACK_FRAME_REGS_MARKER) { struct pt_regs *regs = (struct pt_regs *) (sp + STACK_FRAME_OVERHEAD); diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index ae3c41730367..9a4797d1d40d 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -707,7 +707,7 @@ static void __init save_fscr_to_task(void) init_task.thread.fscr = mfspr(SPRN_FSCR); } #else -static inline void save_fscr_to_task(void) {}; +static inline void save_fscr_to_task(void) {} #endif diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c index e9d4eb6144e1..ccf77b985c8f 100644 --- a/arch/powerpc/kernel/prom_init.c +++ b/arch/powerpc/kernel/prom_init.c @@ -1331,14 +1331,10 @@ static void __init prom_check_platform_support(void) if (prop_len > sizeof(vec)) prom_printf("WARNING: ibm,arch-vec-5-platform-support longer than expected (len: %d)\n", prop_len); - prom_getprop(prom.chosen, "ibm,arch-vec-5-platform-support", - &vec, sizeof(vec)); - for (i = 0; i < sizeof(vec); i += 2) { - prom_debug("%d: index = 0x%x val = 0x%x\n", i / 2 - , vec[i] - , vec[i + 1]); - prom_parse_platform_support(vec[i], vec[i + 1], - &supported); + prom_getprop(prom.chosen, "ibm,arch-vec-5-platform-support", &vec, sizeof(vec)); + for (i = 0; i < prop_len; i += 2) { + prom_debug("%d: index = 0x%x val = 0x%x\n", i / 2, vec[i], vec[i + 1]); + prom_parse_platform_support(vec[i], vec[i + 1], &supported); } } diff --git a/arch/powerpc/kernel/ptrace/ptrace.c b/arch/powerpc/kernel/ptrace/ptrace.c index 3d44b73adb83..4f3d4ff3728c 100644 --- a/arch/powerpc/kernel/ptrace/ptrace.c +++ b/arch/powerpc/kernel/ptrace/ptrace.c @@ -262,8 +262,6 @@ long do_syscall_trace_enter(struct pt_regs *regs) { u32 flags; - user_exit(); - flags = READ_ONCE(current_thread_info()->flags) & (_TIF_SYSCALL_EMU | _TIF_SYSCALL_TRACE); @@ -340,8 +338,6 @@ void do_syscall_trace_leave(struct pt_regs *regs) step = test_thread_flag(TIF_SINGLESTEP); if (step || test_thread_flag(TIF_SYSCALL_TRACE)) tracehook_report_syscall_exit(regs, step); - - user_enter(); } void __init pt_regs_check(void); diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c index 71f38e9248be..bee984b1887b 100644 --- a/arch/powerpc/kernel/setup-common.c +++ b/arch/powerpc/kernel/setup-common.c @@ -64,6 +64,7 @@ #include <asm/mmu_context.h> #include <asm/cpu_has_feature.h> #include <asm/kasan.h> +#include <asm/mce.h> #include "setup.h" @@ -237,18 +238,17 @@ static int show_cpuinfo(struct seq_file *m, void *v) maj = (pvr >> 8) & 0xFF; min = pvr & 0xFF; - seq_printf(m, "processor\t: %lu\n", cpu_id); - seq_printf(m, "cpu\t\t: "); + seq_printf(m, "processor\t: %lu\ncpu\t\t: ", cpu_id); if (cur_cpu_spec->pvr_mask && cur_cpu_spec->cpu_name) - seq_printf(m, "%s", cur_cpu_spec->cpu_name); + seq_puts(m, cur_cpu_spec->cpu_name); else seq_printf(m, "unknown (%08x)", pvr); if (cpu_has_feature(CPU_FTR_ALTIVEC)) - seq_printf(m, ", altivec supported"); + seq_puts(m, ", altivec supported"); - seq_printf(m, "\n"); + seq_putc(m, '\n'); #ifdef CONFIG_TAU if (cpu_has_feature(CPU_FTR_TAU)) { @@ -327,7 +327,7 @@ static int show_cpuinfo(struct seq_file *m, void *v) seq_printf(m, "bogomips\t: %lu.%02lu\n", loops_per_jiffy / (500000 / HZ), (loops_per_jiffy / (5000 / HZ)) % 100); - seq_printf(m, "\n"); + seq_putc(m, '\n'); /* If this is the last cpu, print the summary */ if (cpumask_next(cpu_id, cpu_online_mask) >= nr_cpu_ids) @@ -938,6 +938,7 @@ void __init setup_arch(char **cmdline_p) exc_lvl_early_init(); emergency_stack_init(); + mce_init(); smp_release_cpus(); initmem_init(); diff --git a/arch/powerpc/kernel/setup.h b/arch/powerpc/kernel/setup.h index 2dd0d9cb5a20..84058bbc8fe9 100644 --- a/arch/powerpc/kernel/setup.h +++ b/arch/powerpc/kernel/setup.h @@ -14,31 +14,31 @@ void irqstack_early_init(void); #ifdef CONFIG_PPC32 void setup_power_save(void); #else -static inline void setup_power_save(void) { }; +static inline void setup_power_save(void) { } #endif #if defined(CONFIG_PPC64) && defined(CONFIG_SMP) void check_smt_enabled(void); #else -static inline void check_smt_enabled(void) { }; +static inline void check_smt_enabled(void) { } #endif #if defined(CONFIG_PPC_BOOK3E) && defined(CONFIG_SMP) void setup_tlb_core_data(void); #else -static inline void setup_tlb_core_data(void) { }; +static inline void setup_tlb_core_data(void) { } #endif #if defined(CONFIG_PPC_BOOK3E) || defined(CONFIG_BOOKE) || defined(CONFIG_40x) void exc_lvl_early_init(void); #else -static inline void exc_lvl_early_init(void) { }; +static inline void exc_lvl_early_init(void) { } #endif #if defined(CONFIG_PPC64) || defined(CONFIG_VMAP_STACK) void emergency_stack_init(void); #else -static inline void emergency_stack_init(void) { }; +static inline void emergency_stack_init(void) { } #endif #ifdef CONFIG_PPC64 @@ -55,7 +55,7 @@ extern unsigned long spr_default_dscr; #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE void kvm_cma_reserve(void); #else -static inline void kvm_cma_reserve(void) { }; +static inline void kvm_cma_reserve(void) { } #endif #ifdef CONFIG_TAU diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index c28e949cc222..560ed8b975e7 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -67,6 +67,7 @@ #include <asm/kup.h> #include <asm/early_ioremap.h> #include <asm/pgalloc.h> +#include <asm/asm-prototypes.h> #include "setup.h" @@ -258,7 +259,7 @@ static void cpu_ready_for_interrupts(void) unsigned long spr_default_dscr = 0; -void __init record_spr_defaults(void) +static void __init record_spr_defaults(void) { if (early_cpu_has_feature(CPU_FTR_DSCR)) spr_default_dscr = mfspr(SPRN_DSCR); @@ -1008,7 +1009,7 @@ void rfi_flush_enable(bool enable) rfi_flush = enable; } -void entry_flush_enable(bool enable) +static void entry_flush_enable(bool enable) { if (enable) { do_entry_flush_fixups(enabled_flush_types); @@ -1020,7 +1021,7 @@ void entry_flush_enable(bool enable) entry_flush = enable; } -void uaccess_flush_enable(bool enable) +static void uaccess_flush_enable(bool enable) { if (enable) { do_uaccess_flush_fixups(enabled_flush_types); diff --git a/arch/powerpc/kernel/signal.c b/arch/powerpc/kernel/signal.c index 53782aa60ade..9ded046edb0e 100644 --- a/arch/powerpc/kernel/signal.c +++ b/arch/powerpc/kernel/signal.c @@ -282,8 +282,6 @@ static void do_signal(struct task_struct *tsk) void do_notify_resume(struct pt_regs *regs, unsigned long thread_info_flags) { - user_exit(); - if (thread_info_flags & _TIF_UPROBE) uprobe_notify_resume(regs); @@ -299,8 +297,6 @@ void do_notify_resume(struct pt_regs *regs, unsigned long thread_info_flags) tracehook_notify_resume(regs); rseq_handle_notify_resume(NULL, regs); } - - user_enter(); } static unsigned long get_tm_stackpointer(struct task_struct *tsk) diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c index 934cbdf6dd10..75ee918a120a 100644 --- a/arch/powerpc/kernel/signal_32.c +++ b/arch/powerpc/kernel/signal_32.c @@ -929,8 +929,9 @@ int handle_signal32(struct ksignal *ksig, sigset_t *oldset, regs->gpr[3] = ksig->sig; regs->gpr[4] = (unsigned long) sc; regs->nip = (unsigned long)ksig->ka.sa.sa_handler; - /* enter the signal handler in big-endian mode */ + /* enter the signal handler in native-endian mode */ regs->msr &= ~MSR_LE; + regs->msr |= (MSR_KERNEL & MSR_LE); return 0; failed: diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index 9e2246e80efd..5a4d59a1070d 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -34,6 +34,7 @@ #include <linux/random.h> #include <linux/stackprotector.h> #include <linux/pgtable.h> +#include <linux/clockchips.h> #include <asm/ptrace.h> #include <linux/atomic.h> @@ -576,7 +577,7 @@ void tick_broadcast(const struct cpumask *mask) #endif #ifdef CONFIG_DEBUGGER -void debugger_ipi_callback(struct pt_regs *regs) +static void debugger_ipi_callback(struct pt_regs *regs) { debugger_ipi(regs); } diff --git a/arch/powerpc/kernel/sys_ppc32.c b/arch/powerpc/kernel/sys_ppc32.c index d36c6391eaf5..16ff0399a257 100644 --- a/arch/powerpc/kernel/sys_ppc32.c +++ b/arch/powerpc/kernel/sys_ppc32.c @@ -59,57 +59,64 @@ unsigned long compat_sys_mmap2(unsigned long addr, size_t len, /* * long long munging: * The 32 bit ABI passes long longs in an odd even register pair. + * High and low parts are swapped depending on endian mode, + * so define a macro (similar to mips linux32) to handle that. */ +#ifdef __LITTLE_ENDIAN__ +#define merge_64(low, high) ((u64)high << 32) | low +#else +#define merge_64(high, low) ((u64)high << 32) | low +#endif compat_ssize_t compat_sys_pread64(unsigned int fd, char __user *ubuf, compat_size_t count, - u32 reg6, u32 poshi, u32 poslo) + u32 reg6, u32 pos1, u32 pos2) { - return ksys_pread64(fd, ubuf, count, ((loff_t)poshi << 32) | poslo); + return ksys_pread64(fd, ubuf, count, merge_64(pos1, pos2)); } compat_ssize_t compat_sys_pwrite64(unsigned int fd, const char __user *ubuf, compat_size_t count, - u32 reg6, u32 poshi, u32 poslo) + u32 reg6, u32 pos1, u32 pos2) { - return ksys_pwrite64(fd, ubuf, count, ((loff_t)poshi << 32) | poslo); + return ksys_pwrite64(fd, ubuf, count, merge_64(pos1, pos2)); } -compat_ssize_t compat_sys_readahead(int fd, u32 r4, u32 offhi, u32 offlo, u32 count) +compat_ssize_t compat_sys_readahead(int fd, u32 r4, u32 offset1, u32 offset2, u32 count) { - return ksys_readahead(fd, ((loff_t)offhi << 32) | offlo, count); + return ksys_readahead(fd, merge_64(offset1, offset2), count); } asmlinkage int compat_sys_truncate64(const char __user * path, u32 reg4, - unsigned long high, unsigned long low) + unsigned long len1, unsigned long len2) { - return ksys_truncate(path, (high << 32) | low); + return ksys_truncate(path, merge_64(len1, len2)); } -asmlinkage long compat_sys_fallocate(int fd, int mode, u32 offhi, u32 offlo, - u32 lenhi, u32 lenlo) +asmlinkage long compat_sys_fallocate(int fd, int mode, u32 offset1, u32 offset2, + u32 len1, u32 len2) { - return ksys_fallocate(fd, mode, ((loff_t)offhi << 32) | offlo, - ((loff_t)lenhi << 32) | lenlo); + return ksys_fallocate(fd, mode, ((loff_t)offset1 << 32) | offset2, + merge_64(len1, len2)); } -asmlinkage int compat_sys_ftruncate64(unsigned int fd, u32 reg4, unsigned long high, - unsigned long low) +asmlinkage int compat_sys_ftruncate64(unsigned int fd, u32 reg4, unsigned long len1, + unsigned long len2) { - return ksys_ftruncate(fd, (high << 32) | low); + return ksys_ftruncate(fd, merge_64(len1, len2)); } -long ppc32_fadvise64(int fd, u32 unused, u32 offset_high, u32 offset_low, +long ppc32_fadvise64(int fd, u32 unused, u32 offset1, u32 offset2, size_t len, int advice) { - return ksys_fadvise64_64(fd, (u64)offset_high << 32 | offset_low, len, + return ksys_fadvise64_64(fd, merge_64(offset1, offset2), len, advice); } asmlinkage long compat_sys_sync_file_range2(int fd, unsigned int flags, - unsigned offset_hi, unsigned offset_lo, - unsigned nbytes_hi, unsigned nbytes_lo) + unsigned offset1, unsigned offset2, + unsigned nbytes1, unsigned nbytes2) { - loff_t offset = ((loff_t)offset_hi << 32) | offset_lo; - loff_t nbytes = ((loff_t)nbytes_hi << 32) | nbytes_lo; + loff_t offset = merge_64(offset1, offset2); + loff_t nbytes = merge_64(nbytes1, nbytes2); return ksys_sync_file_range(fd, offset, nbytes, flags); } diff --git a/arch/powerpc/kernel/syscalls/syscall.tbl b/arch/powerpc/kernel/syscalls/syscall.tbl index f744eb5cba88..96b2157f0371 100644 --- a/arch/powerpc/kernel/syscalls/syscall.tbl +++ b/arch/powerpc/kernel/syscalls/syscall.tbl @@ -9,9 +9,7 @@ # 0 nospu restart_syscall sys_restart_syscall 1 nospu exit sys_exit -2 32 fork ppc_fork sys_fork -2 64 fork sys_fork -2 spu fork sys_ni_syscall +2 nospu fork sys_fork 3 common read sys_read 4 common write sys_write 5 common open sys_open compat_sys_open @@ -160,9 +158,7 @@ 119 32 sigreturn sys_sigreturn compat_sys_sigreturn 119 64 sigreturn sys_ni_syscall 119 spu sigreturn sys_ni_syscall -120 32 clone ppc_clone sys_clone -120 64 clone sys_clone -120 spu clone sys_ni_syscall +120 nospu clone sys_clone 121 common setdomainname sys_setdomainname 122 common uname sys_newuname 123 common modify_ldt sys_ni_syscall @@ -244,9 +240,7 @@ 186 spu sendfile sys_sendfile64 187 common getpmsg sys_ni_syscall 188 common putpmsg sys_ni_syscall -189 32 vfork ppc_vfork sys_vfork -189 64 vfork sys_vfork -189 spu vfork sys_ni_syscall +189 nospu vfork sys_vfork 190 common ugetrlimit sys_getrlimit compat_sys_getrlimit 191 common readahead sys_readahead compat_sys_readahead 192 32 mmap2 sys_mmap2 compat_sys_mmap2 @@ -322,9 +316,7 @@ 248 32 clock_nanosleep sys_clock_nanosleep_time32 248 64 clock_nanosleep sys_clock_nanosleep 248 spu clock_nanosleep sys_clock_nanosleep -249 32 swapcontext ppc_swapcontext compat_sys_swapcontext -249 64 swapcontext sys_swapcontext -249 spu swapcontext sys_ni_syscall +249 nospu swapcontext sys_swapcontext compat_sys_swapcontext 250 common tgkill sys_tgkill 251 32 utimes sys_utimes_time32 251 64 utimes sys_utimes @@ -522,9 +514,7 @@ 432 common fsmount sys_fsmount 433 common fspick sys_fspick 434 common pidfd_open sys_pidfd_open -435 32 clone3 ppc_clone3 sys_clone3 -435 64 clone3 sys_clone3 -435 spu clone3 sys_ni_syscall +435 nospu clone3 sys_clone3 436 common close_range sys_close_range 437 common openat2 sys_openat2 438 common pidfd_getfd sys_pidfd_getfd diff --git a/arch/powerpc/kernel/tau_6xx.c b/arch/powerpc/kernel/tau_6xx.c index 0b4694b8d248..6c31af7f4fa8 100644 --- a/arch/powerpc/kernel/tau_6xx.c +++ b/arch/powerpc/kernel/tau_6xx.c @@ -22,6 +22,7 @@ #include <linux/delay.h> #include <linux/workqueue.h> +#include <asm/interrupt.h> #include <asm/io.h> #include <asm/reg.h> #include <asm/nvram.h> @@ -100,16 +101,13 @@ static void TAUupdate(int cpu) * with interrupts disabled */ -void TAUException(struct pt_regs * regs) +DEFINE_INTERRUPT_HANDLER_ASYNC(TAUException) { int cpu = smp_processor_id(); - irq_enter(); tau[cpu].interrupts++; TAUupdate(cpu); - - irq_exit(); } #endif /* CONFIG_TAU_INT */ diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index 67feb3524460..b67d93a609a2 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -53,9 +53,11 @@ #include <linux/of_clk.h> #include <linux/suspend.h> #include <linux/sched/cputime.h> +#include <linux/sched/clock.h> #include <linux/processor.h> #include <asm/trace.h> +#include <asm/interrupt.h> #include <asm/io.h> #include <asm/nvram.h> #include <asm/cache.h> @@ -570,7 +572,7 @@ void arch_irq_work_raise(void) * timer_interrupt - gets called when the decrementer overflows, * with interrupts disabled. */ -void timer_interrupt(struct pt_regs *regs) +DEFINE_INTERRUPT_HANDLER_ASYNC(timer_interrupt) { struct clock_event_device *evt = this_cpu_ptr(&decrementers); u64 *next_tb = this_cpu_ptr(&decrementers_next_tb); @@ -609,7 +611,7 @@ void timer_interrupt(struct pt_regs *regs) #endif old_regs = set_irq_regs(regs); - irq_enter(); + trace_timer_interrupt_entry(regs); if (test_irq_work_pending()) { @@ -634,7 +636,7 @@ void timer_interrupt(struct pt_regs *regs) } trace_timer_interrupt_exit(regs); - irq_exit(); + set_irq_regs(old_regs); } EXPORT_SYMBOL(timer_interrupt); @@ -1030,6 +1032,7 @@ void __init time_init(void) tick_setup_hrtimer_broadcast(); of_clk_init(NULL); + enable_sched_clock_irqtime(); } /* diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index 3ec7b443fe6b..1583fd1c6010 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -41,6 +41,7 @@ #include <asm/emulated_ops.h> #include <linux/uaccess.h> #include <asm/debugfs.h> +#include <asm/interrupt.h> #include <asm/io.h> #include <asm/machdep.h> #include <asm/rtas.h> @@ -342,8 +343,8 @@ static bool exception_common(int signr, struct pt_regs *regs, int code, show_signal_msg(signr, regs, code, addr); - if (arch_irqs_disabled() && !arch_irq_disabled_regs(regs)) - local_irq_enable(); + if (arch_irqs_disabled()) + interrupt_cond_local_irq_enable(regs); current->thread.trap_nr = code; @@ -430,16 +431,10 @@ nonrecoverable: regs->msr &= ~MSR_RI; #endif } - -void system_reset_exception(struct pt_regs *regs) +DEFINE_INTERRUPT_HANDLER_NMI(system_reset_exception) { unsigned long hsrr0, hsrr1; bool saved_hsrrs = false; - u8 ftrace_enabled = this_cpu_get_ftrace_enabled(); - - this_cpu_set_ftrace_enabled(0); - - nmi_enter(); /* * System reset can interrupt code where HSRRs are live and MSR[RI]=1. @@ -503,19 +498,20 @@ out: die("Unrecoverable nested System Reset", regs, SIGABRT); #endif /* Must die if the interrupt is not recoverable */ - if (!(regs->msr & MSR_RI)) + if (!(regs->msr & MSR_RI)) { + /* For the reason explained in die_mce, nmi_exit before die */ + nmi_exit(); die("Unrecoverable System Reset", regs, SIGABRT); + } if (saved_hsrrs) { mtspr(SPRN_HSRR0, hsrr0); mtspr(SPRN_HSRR1, hsrr1); } - nmi_exit(); - - this_cpu_set_ftrace_enabled(ftrace_enabled); - /* What should we do here? We could issue a shutdown or hard reset. */ + + return 0; } /* @@ -788,23 +784,33 @@ int machine_check_generic(struct pt_regs *regs) } #endif /* everything else */ -void machine_check_exception(struct pt_regs *regs) +void die_mce(const char *str, struct pt_regs *regs, long err) { - int recover = 0; - /* - * BOOK3S_64 does not call this handler as a non-maskable interrupt - * (it uses its own early real-mode handler to handle the MCE proper - * and then raises irq_work to call this handler when interrupts are - * enabled). - * - * This is silly. The BOOK3S_64 should just call a different function - * rather than expecting semantics to magically change. Something - * like 'non_nmi_machine_check_exception()', perhaps? + * The machine check wants to kill the interrupted context, but + * do_exit() checks for in_interrupt() and panics in that case, so + * exit the irq/nmi before calling die. */ - const bool nmi = !IS_ENABLED(CONFIG_PPC_BOOK3S_64); + if (IS_ENABLED(CONFIG_PPC_BOOK3S_64)) + irq_exit(); + else + nmi_exit(); + die(str, regs, err); +} - if (nmi) nmi_enter(); +/* + * BOOK3S_64 does not call this handler as a non-maskable interrupt + * (it uses its own early real-mode handler to handle the MCE proper + * and then raises irq_work to call this handler when interrupts are + * enabled). + */ +#ifdef CONFIG_PPC_BOOK3S_64 +DEFINE_INTERRUPT_HANDLER_ASYNC(machine_check_exception) +#else +DEFINE_INTERRUPT_HANDLER_NMI(machine_check_exception) +#endif +{ + int recover = 0; __this_cpu_inc(irq_stat.mce_exceptions); @@ -830,21 +836,21 @@ void machine_check_exception(struct pt_regs *regs) if (check_io_access(regs)) goto bail; - if (nmi) nmi_exit(); - - die("Machine check", regs, SIGBUS); + die_mce("Machine check", regs, SIGBUS); +bail: /* Must die if the interrupt is not recoverable */ if (!(regs->msr & MSR_RI)) - die("Unrecoverable Machine check", regs, SIGBUS); + die_mce("Unrecoverable Machine check", regs, SIGBUS); +#ifdef CONFIG_PPC_BOOK3S_64 return; - -bail: - if (nmi) nmi_exit(); +#else + return 0; +#endif } -void SMIException(struct pt_regs *regs) +DEFINE_INTERRUPT_HANDLER(SMIException) /* async? */ { die("System Management Interrupt", regs, SIGABRT); } @@ -1030,12 +1036,11 @@ static void p9_hmi_special_emu(struct pt_regs *regs) } #endif /* CONFIG_VSX */ -void handle_hmi_exception(struct pt_regs *regs) +DEFINE_INTERRUPT_HANDLER_ASYNC(handle_hmi_exception) { struct pt_regs *old_regs; old_regs = set_irq_regs(regs); - irq_enter(); #ifdef CONFIG_VSX /* Real mode flagged P9 special emu is needed */ @@ -1055,46 +1060,42 @@ void handle_hmi_exception(struct pt_regs *regs) if (ppc_md.handle_hmi_exception) ppc_md.handle_hmi_exception(regs); - irq_exit(); set_irq_regs(old_regs); } -void unknown_exception(struct pt_regs *regs) +DEFINE_INTERRUPT_HANDLER(unknown_exception) { - enum ctx_state prev_state = exception_enter(); - printk("Bad trap at PC: %lx, SR: %lx, vector=%lx\n", regs->nip, regs->msr, regs->trap); _exception(SIGTRAP, regs, TRAP_UNK, 0); - - exception_exit(prev_state); } -void instruction_breakpoint_exception(struct pt_regs *regs) +DEFINE_INTERRUPT_HANDLER_ASYNC(unknown_async_exception) { - enum ctx_state prev_state = exception_enter(); + printk("Bad trap at PC: %lx, SR: %lx, vector=%lx\n", + regs->nip, regs->msr, regs->trap); + _exception(SIGTRAP, regs, TRAP_UNK, 0); +} + +DEFINE_INTERRUPT_HANDLER(instruction_breakpoint_exception) +{ if (notify_die(DIE_IABR_MATCH, "iabr_match", regs, 5, 5, SIGTRAP) == NOTIFY_STOP) - goto bail; + return; if (debugger_iabr_match(regs)) - goto bail; + return; _exception(SIGTRAP, regs, TRAP_BRKPT, regs->nip); - -bail: - exception_exit(prev_state); } -void RunModeException(struct pt_regs *regs) +DEFINE_INTERRUPT_HANDLER(RunModeException) { _exception(SIGTRAP, regs, TRAP_UNK, 0); } -void single_step_exception(struct pt_regs *regs) +DEFINE_INTERRUPT_HANDLER(single_step_exception) { - enum ctx_state prev_state = exception_enter(); - clear_single_step(regs); clear_br_trace(regs); @@ -1103,16 +1104,12 @@ void single_step_exception(struct pt_regs *regs) if (notify_die(DIE_SSTEP, "single_step", regs, 5, 5, SIGTRAP) == NOTIFY_STOP) - goto bail; + return; if (debugger_sstep(regs)) - goto bail; + return; _exception(SIGTRAP, regs, TRAP_TRACE, regs->nip); - -bail: - exception_exit(prev_state); } -NOKPROBE_SYMBOL(single_step_exception); /* * After we have successfully emulated an instruction, we have to @@ -1436,9 +1433,8 @@ static int emulate_math(struct pt_regs *regs) static inline int emulate_math(struct pt_regs *regs) { return -1; } #endif -void program_check_exception(struct pt_regs *regs) +static void do_program_check(struct pt_regs *regs) { - enum ctx_state prev_state = exception_enter(); unsigned int reason = get_reason(regs); /* We can now get here via a FP Unavailable exception if the core @@ -1447,22 +1443,22 @@ void program_check_exception(struct pt_regs *regs) if (reason & REASON_FP) { /* IEEE FP exception */ parse_fpe(regs); - goto bail; + return; } if (reason & REASON_TRAP) { unsigned long bugaddr; /* Debugger is first in line to stop recursive faults in * rcu_lock, notify_die, or atomic_notifier_call_chain */ if (debugger_bpt(regs)) - goto bail; + return; if (kprobe_handler(regs)) - goto bail; + return; /* trap exception */ if (notify_die(DIE_BPT, "breakpoint", regs, 5, 5, SIGTRAP) == NOTIFY_STOP) - goto bail; + return; bugaddr = regs->nip; /* @@ -1474,10 +1470,10 @@ void program_check_exception(struct pt_regs *regs) if (!(regs->msr & MSR_PR) && /* not user-mode */ report_bug(bugaddr, regs) == BUG_TRAP_TYPE_WARN) { regs->nip += 4; - goto bail; + return; } _exception(SIGTRAP, regs, TRAP_BRKPT, regs->nip); - goto bail; + return; } #ifdef CONFIG_PPC_TRANSACTIONAL_MEM if (reason & REASON_TM) { @@ -1498,7 +1494,7 @@ void program_check_exception(struct pt_regs *regs) */ if (user_mode(regs)) { _exception(SIGILL, regs, ILL_ILLOPN, regs->nip); - goto bail; + return; } else { printk(KERN_EMERG "Unexpected TM Bad Thing exception " "at %lx (msr 0x%lx) tm_scratch=%llx\n", @@ -1518,9 +1514,7 @@ void program_check_exception(struct pt_regs *regs) if (!user_mode(regs)) goto sigill; - /* We restore the interrupt state now */ - if (!arch_irq_disabled_regs(regs)) - local_irq_enable(); + interrupt_cond_local_irq_enable(regs); /* (reason & REASON_ILLEGAL) would be the obvious thing here, * but there seems to be a hardware bug on the 405GP (RevD) @@ -1531,7 +1525,7 @@ void program_check_exception(struct pt_regs *regs) * pattern to occurrences etc. -dgibson 31/Mar/2003 */ if (!emulate_math(regs)) - goto bail; + return; /* Try to emulate it if we should. */ if (reason & (REASON_ILLEGAL | REASON_PRIVILEGED)) { @@ -1539,10 +1533,10 @@ void program_check_exception(struct pt_regs *regs) case 0: regs->nip += 4; emulate_single_step(regs); - goto bail; + return; case -EFAULT: _exception(SIGSEGV, regs, SEGV_MAPERR, regs->nip); - goto bail; + return; } } @@ -1552,34 +1546,31 @@ sigill: else _exception(SIGILL, regs, ILL_ILLOPC, regs->nip); -bail: - exception_exit(prev_state); } -NOKPROBE_SYMBOL(program_check_exception); + +DEFINE_INTERRUPT_HANDLER(program_check_exception) +{ + do_program_check(regs); +} /* * This occurs when running in hypervisor mode on POWER6 or later * and an illegal instruction is encountered. */ -void emulation_assist_interrupt(struct pt_regs *regs) +DEFINE_INTERRUPT_HANDLER(emulation_assist_interrupt) { regs->msr |= REASON_ILLEGAL; - program_check_exception(regs); + do_program_check(regs); } -NOKPROBE_SYMBOL(emulation_assist_interrupt); -void alignment_exception(struct pt_regs *regs) +DEFINE_INTERRUPT_HANDLER(alignment_exception) { - enum ctx_state prev_state = exception_enter(); int sig, code, fixed = 0; unsigned long reason; - /* We restore the interrupt state now */ - if (!arch_irq_disabled_regs(regs)) - local_irq_enable(); + interrupt_cond_local_irq_enable(regs); reason = get_reason(regs); - if (reason & REASON_BOUNDARY) { sig = SIGBUS; code = BUS_ADRALN; @@ -1587,7 +1578,7 @@ void alignment_exception(struct pt_regs *regs) } if (tm_abort_check(regs, TM_CAUSE_ALIGNMENT | TM_CAUSE_PERSISTENT)) - goto bail; + return; /* we don't implement logging of alignment exceptions */ if (!(current->thread.align_ctl & PR_UNALIGN_SIGBUS)) @@ -1597,7 +1588,7 @@ void alignment_exception(struct pt_regs *regs) /* skip over emulated instruction */ regs->nip += inst_length(reason); emulate_single_step(regs); - goto bail; + return; } /* Operand address was bad */ @@ -1612,13 +1603,10 @@ bad: if (user_mode(regs)) _exception(sig, regs, code, regs->dar); else - bad_page_fault(regs, regs->dar, sig); - -bail: - exception_exit(prev_state); + bad_page_fault(regs, sig); } -void StackOverflow(struct pt_regs *regs) +DEFINE_INTERRUPT_HANDLER(StackOverflow) { pr_crit("Kernel stack overflow in process %s[%d], r1=%lx\n", current->comm, task_pid_nr(current), regs->gpr[1]); @@ -1627,46 +1615,33 @@ void StackOverflow(struct pt_regs *regs) panic("kernel stack overflow"); } -void stack_overflow_exception(struct pt_regs *regs) +DEFINE_INTERRUPT_HANDLER(stack_overflow_exception) { - enum ctx_state prev_state = exception_enter(); - die("Kernel stack overflow", regs, SIGSEGV); - - exception_exit(prev_state); } -void kernel_fp_unavailable_exception(struct pt_regs *regs) +DEFINE_INTERRUPT_HANDLER(kernel_fp_unavailable_exception) { - enum ctx_state prev_state = exception_enter(); - printk(KERN_EMERG "Unrecoverable FP Unavailable Exception " "%lx at %lx\n", regs->trap, regs->nip); die("Unrecoverable FP Unavailable Exception", regs, SIGABRT); - - exception_exit(prev_state); } -void altivec_unavailable_exception(struct pt_regs *regs) +DEFINE_INTERRUPT_HANDLER(altivec_unavailable_exception) { - enum ctx_state prev_state = exception_enter(); - if (user_mode(regs)) { /* A user program has executed an altivec instruction, but this kernel doesn't support altivec. */ _exception(SIGILL, regs, ILL_ILLOPC, regs->nip); - goto bail; + return; } printk(KERN_EMERG "Unrecoverable VMX/Altivec Unavailable Exception " "%lx at %lx\n", regs->trap, regs->nip); die("Unrecoverable VMX/Altivec Unavailable Exception", regs, SIGABRT); - -bail: - exception_exit(prev_state); } -void vsx_unavailable_exception(struct pt_regs *regs) +DEFINE_INTERRUPT_HANDLER(vsx_unavailable_exception) { if (user_mode(regs)) { /* A user program has executed an vsx instruction, @@ -1697,7 +1672,7 @@ static void tm_unavailable(struct pt_regs *regs) die("Unrecoverable TM Unavailable Exception", regs, SIGABRT); } -void facility_unavailable_exception(struct pt_regs *regs) +DEFINE_INTERRUPT_HANDLER(facility_unavailable_exception) { static char *facility_strings[] = { [FSCR_FP_LG] = "FPU", @@ -1737,9 +1712,7 @@ void facility_unavailable_exception(struct pt_regs *regs) die("Unexpected facility unavailable exception", regs, SIGABRT); } - /* We restore the interrupt state now */ - if (!arch_irq_disabled_regs(regs)) - local_irq_enable(); + interrupt_cond_local_irq_enable(regs); if (status == FSCR_DSCR_LG) { /* @@ -1817,7 +1790,7 @@ out: #ifdef CONFIG_PPC_TRANSACTIONAL_MEM -void fp_unavailable_tm(struct pt_regs *regs) +DEFINE_INTERRUPT_HANDLER(fp_unavailable_tm) { /* Note: This does not handle any kind of FP laziness. */ @@ -1850,7 +1823,7 @@ void fp_unavailable_tm(struct pt_regs *regs) tm_recheckpoint(¤t->thread); } -void altivec_unavailable_tm(struct pt_regs *regs) +DEFINE_INTERRUPT_HANDLER(altivec_unavailable_tm) { /* See the comments in fp_unavailable_tm(). This function operates * the same way. @@ -1865,7 +1838,7 @@ void altivec_unavailable_tm(struct pt_regs *regs) current->thread.used_vr = 1; } -void vsx_unavailable_tm(struct pt_regs *regs) +DEFINE_INTERRUPT_HANDLER(vsx_unavailable_tm) { /* See the comments in fp_unavailable_tm(). This works similarly, * though we're loading both FP and VEC registers in here. @@ -1890,11 +1863,40 @@ void vsx_unavailable_tm(struct pt_regs *regs) } #endif /* CONFIG_PPC_TRANSACTIONAL_MEM */ -void performance_monitor_exception(struct pt_regs *regs) +#ifdef CONFIG_PPC64 +DECLARE_INTERRUPT_HANDLER_NMI(performance_monitor_exception_nmi); +DEFINE_INTERRUPT_HANDLER_NMI(performance_monitor_exception_nmi) { __this_cpu_inc(irq_stat.pmu_irqs); perf_irq(regs); + + return 0; +} +#endif + +DECLARE_INTERRUPT_HANDLER_ASYNC(performance_monitor_exception_async); +DEFINE_INTERRUPT_HANDLER_ASYNC(performance_monitor_exception_async) +{ + __this_cpu_inc(irq_stat.pmu_irqs); + + perf_irq(regs); +} + +DEFINE_INTERRUPT_HANDLER_RAW(performance_monitor_exception) +{ + /* + * On 64-bit, if perf interrupts hit in a local_irq_disable + * (soft-masked) region, we consider them as NMIs. This is required to + * prevent hash faults on user addresses when reading callchains (and + * looks better from an irq tracing perspective). + */ + if (IS_ENABLED(CONFIG_PPC64) && unlikely(arch_irq_disabled_regs(regs))) + performance_monitor_exception_nmi(regs); + else + performance_monitor_exception_async(regs); + + return 0; } #ifdef CONFIG_PPC_ADV_DEBUG_REGS @@ -1957,8 +1959,10 @@ static void handle_debug(struct pt_regs *regs, unsigned long debug_status) mtspr(SPRN_DBCR0, current->thread.debug.dbcr0); } -void DebugException(struct pt_regs *regs, unsigned long debug_status) +DEFINE_INTERRUPT_HANDLER(DebugException) { + unsigned long debug_status = regs->dsisr; + current->thread.debug.dbsr = debug_status; /* Hack alert: On BookE, Branch Taken stops on the branch itself, while @@ -2024,11 +2028,10 @@ void DebugException(struct pt_regs *regs, unsigned long debug_status) } else handle_debug(regs, debug_status); } -NOKPROBE_SYMBOL(DebugException); #endif /* CONFIG_PPC_ADV_DEBUG_REGS */ #ifdef CONFIG_ALTIVEC -void altivec_assist_exception(struct pt_regs *regs) +DEFINE_INTERRUPT_HANDLER(altivec_assist_exception) { int err; @@ -2062,9 +2065,10 @@ void altivec_assist_exception(struct pt_regs *regs) #endif /* CONFIG_ALTIVEC */ #ifdef CONFIG_FSL_BOOKE -void CacheLockingException(struct pt_regs *regs, unsigned long address, - unsigned long error_code) +DEFINE_INTERRUPT_HANDLER(CacheLockingException) { + unsigned long error_code = regs->dsisr; + /* We treat cache locking instructions from the user * as priv ops, in the future we could try to do * something smarter @@ -2076,7 +2080,7 @@ void CacheLockingException(struct pt_regs *regs, unsigned long address, #endif /* CONFIG_FSL_BOOKE */ #ifdef CONFIG_SPE -void SPEFloatingPointException(struct pt_regs *regs) +DEFINE_INTERRUPT_HANDLER(SPEFloatingPointException) { extern int do_spe_mathemu(struct pt_regs *regs); unsigned long spefscr; @@ -2084,9 +2088,7 @@ void SPEFloatingPointException(struct pt_regs *regs) int code = FPE_FLTUNK; int err; - /* We restore the interrupt state now */ - if (!arch_irq_disabled_regs(regs)) - local_irq_enable(); + interrupt_cond_local_irq_enable(regs); flush_spe_to_thread(current); @@ -2128,14 +2130,12 @@ void SPEFloatingPointException(struct pt_regs *regs) return; } -void SPEFloatingPointRoundException(struct pt_regs *regs) +DEFINE_INTERRUPT_HANDLER(SPEFloatingPointRoundException) { extern int speround_handler(struct pt_regs *regs); int err; - /* We restore the interrupt state now */ - if (!arch_irq_disabled_regs(regs)) - local_irq_enable(); + interrupt_cond_local_irq_enable(regs); preempt_disable(); if (regs->msr & MSR_SPE) @@ -2170,13 +2170,12 @@ void SPEFloatingPointRoundException(struct pt_regs *regs) * in the MSR is 0. This indicates that SRR0/1 are live, and that * we therefore lost state by taking this exception. */ -void unrecoverable_exception(struct pt_regs *regs) +DEFINE_INTERRUPT_HANDLER(unrecoverable_exception) { pr_emerg("Unrecoverable exception %lx at %lx (msr=%lx)\n", regs->trap, regs->nip, regs->msr); die("Unrecoverable exception", regs, SIGABRT); } -NOKPROBE_SYMBOL(unrecoverable_exception); #if defined(CONFIG_BOOKE_WDT) || defined(CONFIG_40x) /* @@ -2190,7 +2189,7 @@ void __attribute__ ((weak)) WatchdogHandler(struct pt_regs *regs) return; } -void WatchdogException(struct pt_regs *regs) +DEFINE_INTERRUPT_HANDLER(WatchdogException) /* XXX NMI? async? */ { printk (KERN_EMERG "PowerPC Book-E Watchdog Exception\n"); WatchdogHandler(regs); @@ -2201,13 +2200,12 @@ void WatchdogException(struct pt_regs *regs) * We enter here if we discover during exception entry that we are * running in supervisor mode with a userspace value in the stack pointer. */ -void kernel_bad_stack(struct pt_regs *regs) +DEFINE_INTERRUPT_HANDLER(kernel_bad_stack) { printk(KERN_EMERG "Bad kernel stack pointer %lx at %lx\n", regs->gpr[1], regs->nip); die("Bad kernel stack pointer", regs, SIGABRT); } -NOKPROBE_SYMBOL(kernel_bad_stack); void __init trap_init(void) { diff --git a/arch/powerpc/kernel/watchdog.c b/arch/powerpc/kernel/watchdog.c index af3c15a1d41e..c9a8f4781a10 100644 --- a/arch/powerpc/kernel/watchdog.c +++ b/arch/powerpc/kernel/watchdog.c @@ -26,7 +26,9 @@ #include <linux/delay.h> #include <linux/smp.h> +#include <asm/interrupt.h> #include <asm/paca.h> +#include <asm/nmi.h> /* * The powerpc watchdog ensures that each CPU is able to service timers. @@ -247,16 +249,17 @@ static void watchdog_timer_interrupt(int cpu) watchdog_smp_panic(cpu, tb); } -void soft_nmi_interrupt(struct pt_regs *regs) +DEFINE_INTERRUPT_HANDLER_NMI(soft_nmi_interrupt) { unsigned long flags; int cpu = raw_smp_processor_id(); u64 tb; - if (!cpumask_test_cpu(cpu, &wd_cpus_enabled)) - return; + /* should only arrive from kernel, with irqs disabled */ + WARN_ON_ONCE(!arch_irq_disabled_regs(regs)); - nmi_enter(); + if (!cpumask_test_cpu(cpu, &wd_cpus_enabled)) + return 0; __this_cpu_inc(irq_stat.soft_nmi_irqs); @@ -265,7 +268,7 @@ void soft_nmi_interrupt(struct pt_regs *regs) wd_smp_lock(&flags); if (cpumask_test_cpu(cpu, &wd_smp_cpus_stuck)) { wd_smp_unlock(&flags); - goto out; + return 0; } set_cpu_stuck(cpu, tb); @@ -289,8 +292,7 @@ void soft_nmi_interrupt(struct pt_regs *regs) if (wd_panic_timeout_tb < 0x7fffffff) mtspr(SPRN_DEC, wd_panic_timeout_tb); -out: - nmi_exit(); + return 0; } static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer) diff --git a/arch/powerpc/kexec/elf_64.c b/arch/powerpc/kexec/elf_64.c index d0e459bb2f05..9842e33533df 100644 --- a/arch/powerpc/kexec/elf_64.c +++ b/arch/powerpc/kexec/elf_64.c @@ -102,7 +102,7 @@ static void *elf64_load(struct kimage *image, char *kernel_buf, pr_debug("Loaded initrd at 0x%lx\n", initrd_load_addr); } - fdt_size = fdt_totalsize(initial_boot_params) * 2; + fdt_size = kexec_fdt_totalsize_ppc64(image); fdt = kmalloc(fdt_size, GFP_KERNEL); if (!fdt) { pr_err("Not enough memory for the device tree.\n"); diff --git a/arch/powerpc/kexec/file_load_64.c b/arch/powerpc/kexec/file_load_64.c index c69bcf9b547a..02b9e4d0dc40 100644 --- a/arch/powerpc/kexec/file_load_64.c +++ b/arch/powerpc/kexec/file_load_64.c @@ -21,6 +21,7 @@ #include <linux/memblock.h> #include <linux/slab.h> #include <linux/vmalloc.h> +#include <asm/setup.h> #include <asm/drmem.h> #include <asm/kexec_ranges.h> #include <asm/crashdump-ppc64.h> @@ -926,6 +927,40 @@ out: } /** + * kexec_fdt_totalsize_ppc64 - Return the estimated size needed to setup FDT + * for kexec/kdump kernel. + * @image: kexec image being loaded. + * + * Returns the estimated size needed for kexec/kdump kernel FDT. + */ +unsigned int kexec_fdt_totalsize_ppc64(struct kimage *image) +{ + unsigned int fdt_size; + u64 usm_entries; + + /* + * The below estimate more than accounts for a typical kexec case where + * the additional space is to accommodate things like kexec cmdline, + * chosen node with properties for initrd start & end addresses and + * a property to indicate kexec boot.. + */ + fdt_size = fdt_totalsize(initial_boot_params) + (2 * COMMAND_LINE_SIZE); + if (image->type != KEXEC_TYPE_CRASH) + return fdt_size; + + /* + * For kdump kernel, also account for linux,usable-memory and + * linux,drconf-usable-memory properties. Get an approximate on the + * number of usable memory entries and use for FDT size estimation. + */ + usm_entries = ((memblock_end_of_DRAM() / drmem_lmb_size()) + + (2 * (resource_size(&crashk_res) / drmem_lmb_size()))); + fdt_size += (unsigned int)(usm_entries * sizeof(u64)); + + return fdt_size; +} + +/** * setup_new_fdt_ppc64 - Update the flattend device-tree of the kernel * being loaded. * @image: kexec image being loaded. diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig index 549591d9aaa2..e45644657d49 100644 --- a/arch/powerpc/kvm/Kconfig +++ b/arch/powerpc/kvm/Kconfig @@ -54,6 +54,7 @@ config KVM_BOOK3S_32 select KVM select KVM_BOOK3S_32_HANDLER select KVM_BOOK3S_PR_POSSIBLE + select PPC_FPU help Support running unmodified book3s_32 guest kernels in virtual machines on book3s_32 host processors. diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index 38ea396a23d6..c77f2d4f44ca 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c @@ -27,6 +27,7 @@ #include <asm/cputable.h> #include <asm/pte-walk.h> +#include "book3s.h" #include "trace_hv.h" //#define DEBUG_RESIZE_HPT 1 diff --git a/arch/powerpc/kvm/book3s_emulate.c b/arch/powerpc/kvm/book3s_emulate.c index b08cc15f31c7..fdb57be71aa6 100644 --- a/arch/powerpc/kvm/book3s_emulate.c +++ b/arch/powerpc/kvm/book3s_emulate.c @@ -61,10 +61,6 @@ #define SPRN_GQR6 918 #define SPRN_GQR7 919 -/* Book3S_32 defines mfsrin(v) - but that messes up our abstract - * function pointers, so let's just disable the define. */ -#undef mfsrin - enum priv_level { PRIV_PROBLEM = 0, PRIV_SUPER = 1, diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index f09708da216e..13bad6bf4c95 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -53,6 +53,7 @@ #include <asm/cputable.h> #include <asm/cacheflush.h> #include <linux/uaccess.h> +#include <asm/interrupt.h> #include <asm/io.h> #include <asm/kvm_ppc.h> #include <asm/kvm_book3s.h> @@ -3408,8 +3409,9 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc) kvmppc_set_host_core(pcpu); + guest_exit_irqoff(); + local_irq_enable(); - guest_exit(); /* Let secondaries go back to the offline loop */ for (i = 0; i < controlled_threads; ++i) { @@ -4236,8 +4238,9 @@ int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu, u64 time_limit, kvmppc_set_host_core(pcpu); + guest_exit_irqoff(); + local_irq_enable(); - guest_exit(); cpumask_clear_cpu(pcpu, &kvm->arch.cpu_in_guest); diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c index f3d3183249fe..158d309b42a3 100644 --- a/arch/powerpc/kvm/book3s_hv_builtin.c +++ b/arch/powerpc/kvm/book3s_hv_builtin.c @@ -17,6 +17,7 @@ #include <asm/asm-prototypes.h> #include <asm/cputable.h> +#include <asm/interrupt.h> #include <asm/kvm_ppc.h> #include <asm/kvm_book3s.h> #include <asm/archrandom.h> diff --git a/arch/powerpc/kvm/book3s_xive.c b/arch/powerpc/kvm/book3s_xive.c index 30dfeac731c6..e7219b6f5f9a 100644 --- a/arch/powerpc/kvm/book3s_xive.c +++ b/arch/powerpc/kvm/book3s_xive.c @@ -1813,9 +1813,9 @@ int kvmppc_xive_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level, return -EINVAL; if ((level == 1 && state->lsi) || level == KVM_INTERRUPT_SET_LEVEL) - state->asserted = 1; + state->asserted = true; else if (level == 0 || level == KVM_INTERRUPT_UNSET) { - state->asserted = 0; + state->asserted = false; return 0; } diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index f38ae3e54b37..7d5fe43f85c4 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -20,6 +20,7 @@ #include <asm/cputable.h> #include <linux/uaccess.h> +#include <asm/interrupt.h> #include <asm/kvm_ppc.h> #include <asm/cacheflush.h> #include <asm/dbell.h> diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 6c083a9b3545..a2a68a958fa0 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -1522,7 +1522,7 @@ int kvmppc_handle_vmx_load(struct kvm_vcpu *vcpu, return emulated; } -int kvmppc_get_vmx_dword(struct kvm_vcpu *vcpu, int index, u64 *val) +static int kvmppc_get_vmx_dword(struct kvm_vcpu *vcpu, int index, u64 *val) { union kvmppc_one_reg reg; int vmx_offset = 0; @@ -1540,7 +1540,7 @@ int kvmppc_get_vmx_dword(struct kvm_vcpu *vcpu, int index, u64 *val) return result; } -int kvmppc_get_vmx_word(struct kvm_vcpu *vcpu, int index, u64 *val) +static int kvmppc_get_vmx_word(struct kvm_vcpu *vcpu, int index, u64 *val) { union kvmppc_one_reg reg; int vmx_offset = 0; @@ -1558,7 +1558,7 @@ int kvmppc_get_vmx_word(struct kvm_vcpu *vcpu, int index, u64 *val) return result; } -int kvmppc_get_vmx_hword(struct kvm_vcpu *vcpu, int index, u64 *val) +static int kvmppc_get_vmx_hword(struct kvm_vcpu *vcpu, int index, u64 *val) { union kvmppc_one_reg reg; int vmx_offset = 0; @@ -1576,7 +1576,7 @@ int kvmppc_get_vmx_hword(struct kvm_vcpu *vcpu, int index, u64 *val) return result; } -int kvmppc_get_vmx_byte(struct kvm_vcpu *vcpu, int index, u64 *val) +static int kvmppc_get_vmx_byte(struct kvm_vcpu *vcpu, int index, u64 *val) { union kvmppc_one_reg reg; int vmx_offset = 0; diff --git a/arch/powerpc/lib/pmem.c b/arch/powerpc/lib/pmem.c index 1550e0d2513a..eb2919ddf9b9 100644 --- a/arch/powerpc/lib/pmem.c +++ b/arch/powerpc/lib/pmem.c @@ -6,6 +6,7 @@ #include <linux/string.h> #include <linux/export.h> #include <linux/uaccess.h> +#include <linux/libnvdimm.h> #include <asm/cacheflush.h> diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c index ede093e96234..bb5c20d4ca91 100644 --- a/arch/powerpc/lib/sstep.c +++ b/arch/powerpc/lib/sstep.c @@ -1306,9 +1306,11 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, if ((word & 0xfe2) == 2) op->type = SYSCALL; else if (IS_ENABLED(CONFIG_PPC_BOOK3S_64) && - (word & 0xfe3) == 1) + (word & 0xfe3) == 1) { /* scv */ op->type = SYSCALL_VECTORED_0; - else + if (!cpu_has_feature(CPU_FTR_ARCH_300)) + goto unknown_opcode; + } else op->type = UNKNOWN; return 0; #endif @@ -1412,7 +1414,7 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, #ifdef __powerpc64__ case 1: if (!cpu_has_feature(CPU_FTR_ARCH_31)) - return -1; + goto unknown_opcode; prefix_r = GET_PREFIX_R(word); ra = GET_PREFIX_RA(suffix); @@ -1445,8 +1447,13 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, #ifdef __powerpc64__ case 4: + /* + * There are very many instructions with this primary opcode + * introduced in the ISA as early as v2.03. However, the ones + * we currently emulate were all introduced with ISA 3.0 + */ if (!cpu_has_feature(CPU_FTR_ARCH_300)) - return -1; + goto unknown_opcode; switch (word & 0x3f) { case 48: /* maddhd */ @@ -1472,7 +1479,7 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, * There are other instructions from ISA 3.0 with the same * primary opcode which do not have emulation support yet. */ - return -1; + goto unknown_opcode; #endif case 7: /* mulli */ @@ -1532,6 +1539,8 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, case 19: if (((word >> 1) & 0x1f) == 2) { /* addpcis */ + if (!cpu_has_feature(CPU_FTR_ARCH_300)) + goto unknown_opcode; imm = (short) (word & 0xffc1); /* d0 + d2 fields */ imm |= (word >> 15) & 0x3e; /* d1 field */ op->val = regs->nip + (imm << 16) + 4; @@ -1844,7 +1853,7 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, #ifdef __powerpc64__ case 265: /* modud */ if (!cpu_has_feature(CPU_FTR_ARCH_300)) - return -1; + goto unknown_opcode; op->val = regs->gpr[ra] % regs->gpr[rb]; goto compute_done; #endif @@ -1854,7 +1863,7 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, case 267: /* moduw */ if (!cpu_has_feature(CPU_FTR_ARCH_300)) - return -1; + goto unknown_opcode; op->val = (unsigned int) regs->gpr[ra] % (unsigned int) regs->gpr[rb]; goto compute_done; @@ -1891,7 +1900,7 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, #endif case 755: /* darn */ if (!cpu_has_feature(CPU_FTR_ARCH_300)) - return -1; + goto unknown_opcode; switch (ra & 0x3) { case 0: /* 32-bit conditioned */ @@ -1909,18 +1918,18 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, goto compute_done; } - return -1; + goto unknown_opcode; #ifdef __powerpc64__ case 777: /* modsd */ if (!cpu_has_feature(CPU_FTR_ARCH_300)) - return -1; + goto unknown_opcode; op->val = (long int) regs->gpr[ra] % (long int) regs->gpr[rb]; goto compute_done; #endif case 779: /* modsw */ if (!cpu_has_feature(CPU_FTR_ARCH_300)) - return -1; + goto unknown_opcode; op->val = (int) regs->gpr[ra] % (int) regs->gpr[rb]; goto compute_done; @@ -1997,14 +2006,14 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, #endif case 538: /* cnttzw */ if (!cpu_has_feature(CPU_FTR_ARCH_300)) - return -1; + goto unknown_opcode; val = (unsigned int) regs->gpr[rd]; op->val = (val ? __builtin_ctz(val) : 32); goto logical_done; #ifdef __powerpc64__ case 570: /* cnttzd */ if (!cpu_has_feature(CPU_FTR_ARCH_300)) - return -1; + goto unknown_opcode; val = regs->gpr[rd]; op->val = (val ? __builtin_ctzl(val) : 64); goto logical_done; @@ -2114,7 +2123,7 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, case 890: /* extswsli with sh_5 = 0 */ case 891: /* extswsli with sh_5 = 1 */ if (!cpu_has_feature(CPU_FTR_ARCH_300)) - return -1; + goto unknown_opcode; op->type = COMPUTE + SETREG; sh = rb | ((word & 2) << 4); val = (signed int) regs->gpr[rd]; @@ -2441,6 +2450,8 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, break; case 268: /* lxvx */ + if (!cpu_has_feature(CPU_FTR_ARCH_300)) + goto unknown_opcode; op->reg = rd | ((word & 1) << 5); op->type = MKOP(LOAD_VSX, 0, 16); op->element_size = 16; @@ -2450,6 +2461,8 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, case 269: /* lxvl */ case 301: { /* lxvll */ int nb; + if (!cpu_has_feature(CPU_FTR_ARCH_300)) + goto unknown_opcode; op->reg = rd | ((word & 1) << 5); op->ea = ra ? regs->gpr[ra] : 0; nb = regs->gpr[rb] & 0xff; @@ -2470,13 +2483,15 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, case 333: /* lxvpx */ if (!cpu_has_feature(CPU_FTR_ARCH_31)) - return -1; + goto unknown_opcode; op->reg = VSX_REGISTER_XTP(rd); op->type = MKOP(LOAD_VSX, 0, 32); op->element_size = 32; break; case 364: /* lxvwsx */ + if (!cpu_has_feature(CPU_FTR_ARCH_300)) + goto unknown_opcode; op->reg = rd | ((word & 1) << 5); op->type = MKOP(LOAD_VSX, 0, 4); op->element_size = 4; @@ -2484,6 +2499,8 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, break; case 396: /* stxvx */ + if (!cpu_has_feature(CPU_FTR_ARCH_300)) + goto unknown_opcode; op->reg = rd | ((word & 1) << 5); op->type = MKOP(STORE_VSX, 0, 16); op->element_size = 16; @@ -2493,6 +2510,8 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, case 397: /* stxvl */ case 429: { /* stxvll */ int nb; + if (!cpu_has_feature(CPU_FTR_ARCH_300)) + goto unknown_opcode; op->reg = rd | ((word & 1) << 5); op->ea = ra ? regs->gpr[ra] : 0; nb = regs->gpr[rb] & 0xff; @@ -2506,7 +2525,7 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, } case 461: /* stxvpx */ if (!cpu_has_feature(CPU_FTR_ARCH_31)) - return -1; + goto unknown_opcode; op->reg = VSX_REGISTER_XTP(rd); op->type = MKOP(STORE_VSX, 0, 32); op->element_size = 32; @@ -2544,6 +2563,8 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, break; case 781: /* lxsibzx */ + if (!cpu_has_feature(CPU_FTR_ARCH_300)) + goto unknown_opcode; op->reg = rd | ((word & 1) << 5); op->type = MKOP(LOAD_VSX, 0, 1); op->element_size = 8; @@ -2551,6 +2572,8 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, break; case 812: /* lxvh8x */ + if (!cpu_has_feature(CPU_FTR_ARCH_300)) + goto unknown_opcode; op->reg = rd | ((word & 1) << 5); op->type = MKOP(LOAD_VSX, 0, 16); op->element_size = 2; @@ -2558,6 +2581,8 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, break; case 813: /* lxsihzx */ + if (!cpu_has_feature(CPU_FTR_ARCH_300)) + goto unknown_opcode; op->reg = rd | ((word & 1) << 5); op->type = MKOP(LOAD_VSX, 0, 2); op->element_size = 8; @@ -2571,6 +2596,8 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, break; case 876: /* lxvb16x */ + if (!cpu_has_feature(CPU_FTR_ARCH_300)) + goto unknown_opcode; op->reg = rd | ((word & 1) << 5); op->type = MKOP(LOAD_VSX, 0, 16); op->element_size = 1; @@ -2584,6 +2611,8 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, break; case 909: /* stxsibx */ + if (!cpu_has_feature(CPU_FTR_ARCH_300)) + goto unknown_opcode; op->reg = rd | ((word & 1) << 5); op->type = MKOP(STORE_VSX, 0, 1); op->element_size = 8; @@ -2591,6 +2620,8 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, break; case 940: /* stxvh8x */ + if (!cpu_has_feature(CPU_FTR_ARCH_300)) + goto unknown_opcode; op->reg = rd | ((word & 1) << 5); op->type = MKOP(STORE_VSX, 0, 16); op->element_size = 2; @@ -2598,6 +2629,8 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, break; case 941: /* stxsihx */ + if (!cpu_has_feature(CPU_FTR_ARCH_300)) + goto unknown_opcode; op->reg = rd | ((word & 1) << 5); op->type = MKOP(STORE_VSX, 0, 2); op->element_size = 8; @@ -2611,6 +2644,8 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, break; case 1004: /* stxvb16x */ + if (!cpu_has_feature(CPU_FTR_ARCH_300)) + goto unknown_opcode; op->reg = rd | ((word & 1) << 5); op->type = MKOP(STORE_VSX, 0, 16); op->element_size = 1; @@ -2719,12 +2754,16 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, op->type = MKOP(LOAD_FP, 0, 16); break; case 2: /* lxsd */ + if (!cpu_has_feature(CPU_FTR_ARCH_300)) + goto unknown_opcode; op->reg = rd + 32; op->type = MKOP(LOAD_VSX, 0, 8); op->element_size = 8; op->vsx_flags = VSX_CHECK_VEC; break; case 3: /* lxssp */ + if (!cpu_has_feature(CPU_FTR_ARCH_300)) + goto unknown_opcode; op->reg = rd + 32; op->type = MKOP(LOAD_VSX, 0, 4); op->element_size = 8; @@ -2754,7 +2793,7 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, #ifdef CONFIG_VSX case 6: if (!cpu_has_feature(CPU_FTR_ARCH_31)) - return -1; + goto unknown_opcode; op->ea = dqform_ea(word, regs); op->reg = VSX_REGISTER_XTP(rd); op->element_size = 32; @@ -2777,6 +2816,8 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, break; case 1: /* lxv */ + if (!cpu_has_feature(CPU_FTR_ARCH_300)) + goto unknown_opcode; op->ea = dqform_ea(word, regs); if (word & 8) op->reg = rd + 32; @@ -2787,6 +2828,8 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, case 2: /* stxsd with LSB of DS field = 0 */ case 6: /* stxsd with LSB of DS field = 1 */ + if (!cpu_has_feature(CPU_FTR_ARCH_300)) + goto unknown_opcode; op->ea = dsform_ea(word, regs); op->reg = rd + 32; op->type = MKOP(STORE_VSX, 0, 8); @@ -2796,6 +2839,8 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, case 3: /* stxssp with LSB of DS field = 0 */ case 7: /* stxssp with LSB of DS field = 1 */ + if (!cpu_has_feature(CPU_FTR_ARCH_300)) + goto unknown_opcode; op->ea = dsform_ea(word, regs); op->reg = rd + 32; op->type = MKOP(STORE_VSX, 0, 4); @@ -2804,6 +2849,8 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, break; case 5: /* stxv */ + if (!cpu_has_feature(CPU_FTR_ARCH_300)) + goto unknown_opcode; op->ea = dqform_ea(word, regs); if (word & 8) op->reg = rd + 32; @@ -2833,7 +2880,7 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, break; case 1: /* Prefixed instructions */ if (!cpu_has_feature(CPU_FTR_ARCH_31)) - return -1; + goto unknown_opcode; prefix_r = GET_PREFIX_R(word); ra = GET_PREFIX_RA(suffix); @@ -2972,6 +3019,20 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, } + if (OP_IS_LOAD_STORE(op->type) && (op->type & UPDATE)) { + switch (GETTYPE(op->type)) { + case LOAD: + if (ra == rd) + goto unknown_opcode; + fallthrough; + case STORE: + case LOAD_FP: + case STORE_FP: + if (ra == 0) + goto unknown_opcode; + } + } + #ifdef CONFIG_VSX if ((GETTYPE(op->type) == LOAD_VSX || GETTYPE(op->type) == STORE_VSX) && @@ -2982,6 +3043,10 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, return 0; + unknown_opcode: + op->type = UNKNOWN; + return 0; + logical_done: if (word & 1) set_cr0(regs, op); diff --git a/arch/powerpc/mm/book3s32/Makefile b/arch/powerpc/mm/book3s32/Makefile index 3f972db17761..446d9de88ce4 100644 --- a/arch/powerpc/mm/book3s32/Makefile +++ b/arch/powerpc/mm/book3s32/Makefile @@ -6,4 +6,6 @@ ifdef CONFIG_KASAN CFLAGS_mmu.o += -DDISABLE_BRANCH_PROFILING endif -obj-y += mmu.o hash_low.o mmu_context.o tlb.o nohash_low.o +obj-y += mmu.o mmu_context.o +obj-$(CONFIG_PPC_BOOK3S_603) += nohash_low.o +obj-$(CONFIG_PPC_BOOK3S_604) += hash_low.o tlb.o diff --git a/arch/powerpc/mm/book3s32/mmu.c b/arch/powerpc/mm/book3s32/mmu.c index 859e5bd603ac..d7eb266a3f7a 100644 --- a/arch/powerpc/mm/book3s32/mmu.c +++ b/arch/powerpc/mm/book3s32/mmu.c @@ -234,7 +234,7 @@ void mmu_mark_initmem_nx(void) if (is_module_segment(i << 28)) continue; - mtsrin(mfsrin(i << 28) | 0x10000000, i << 28); + mtsr(mfsr(i << 28) | 0x10000000, i << 28); } } diff --git a/arch/powerpc/mm/book3s64/hash_hugetlbpage.c b/arch/powerpc/mm/book3s64/hash_hugetlbpage.c index b5e9fff8c217..a688e1324ae5 100644 --- a/arch/powerpc/mm/book3s64/hash_hugetlbpage.c +++ b/arch/powerpc/mm/book3s64/hash_hugetlbpage.c @@ -16,10 +16,6 @@ unsigned int hpage_shift; EXPORT_SYMBOL(hpage_shift); -extern long hpte_insert_repeating(unsigned long hash, unsigned long vpn, - unsigned long pa, unsigned long rlags, - unsigned long vflags, int psize, int ssize); - int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid, pte_t *ptep, unsigned long trap, unsigned long flags, int ssize, unsigned int shift, unsigned int mmu_psize) diff --git a/arch/powerpc/mm/book3s64/hash_utils.c b/arch/powerpc/mm/book3s64/hash_utils.c index 73b06adb6eeb..581b20a2feaf 100644 --- a/arch/powerpc/mm/book3s64/hash_utils.c +++ b/arch/powerpc/mm/book3s64/hash_utils.c @@ -38,6 +38,7 @@ #include <linux/pgtable.h> #include <asm/debugfs.h> +#include <asm/interrupt.h> #include <asm/processor.h> #include <asm/mmu.h> #include <asm/mmu_context.h> @@ -1143,10 +1144,10 @@ unsigned int hash_page_do_lazy_icache(unsigned int pp, pte_t pte, int trap) page = pte_page(pte); /* page is dirty */ - if (!test_bit(PG_arch_1, &page->flags) && !PageReserved(page)) { + if (!test_bit(PG_dcache_clean, &page->flags) && !PageReserved(page)) { if (trap == 0x400) { flush_dcache_icache_page(page); - set_bit(PG_arch_1, &page->flags); + set_bit(PG_dcache_clean, &page->flags); } else pp |= HPTE_R_N; } @@ -1288,7 +1289,6 @@ int hash_page_mm(struct mm_struct *mm, unsigned long ea, unsigned long flags) { bool is_thp; - enum ctx_state prev_state = exception_enter(); pgd_t *pgdir; unsigned long vsid; pte_t *ptep; @@ -1490,7 +1490,6 @@ int hash_page_mm(struct mm_struct *mm, unsigned long ea, DBG_LOW(" -> rc=%d\n", rc); bail: - exception_exit(prev_state); return rc; } EXPORT_SYMBOL_GPL(hash_page_mm); @@ -1512,16 +1511,22 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap, } EXPORT_SYMBOL_GPL(hash_page); -int __hash_page(unsigned long trap, unsigned long ea, unsigned long dsisr, - unsigned long msr) +DECLARE_INTERRUPT_HANDLER_RET(__do_hash_fault); +DEFINE_INTERRUPT_HANDLER_RET(__do_hash_fault) { + unsigned long ea = regs->dar; + unsigned long dsisr = regs->dsisr; unsigned long access = _PAGE_PRESENT | _PAGE_READ; unsigned long flags = 0; - struct mm_struct *mm = current->mm; - unsigned int region_id = get_region_id(ea); + struct mm_struct *mm; + unsigned int region_id; + long err; + region_id = get_region_id(ea); if ((region_id == VMALLOC_REGION_ID) || (region_id == IO_REGION_ID)) mm = &init_mm; + else + mm = current->mm; if (dsisr & DSISR_NOHPTE) flags |= HPTE_NOHPTE_UPDATE; @@ -1537,13 +1542,66 @@ int __hash_page(unsigned long trap, unsigned long ea, unsigned long dsisr, * 2) user space access kernel space. */ access |= _PAGE_PRIVILEGED; - if ((msr & MSR_PR) || (region_id == USER_REGION_ID)) + if (user_mode(regs) || (region_id == USER_REGION_ID)) access &= ~_PAGE_PRIVILEGED; - if (trap == 0x400) + if (regs->trap == 0x400) access |= _PAGE_EXEC; - return hash_page_mm(mm, ea, access, trap, flags); + err = hash_page_mm(mm, ea, access, regs->trap, flags); + if (unlikely(err < 0)) { + // failed to instert a hash PTE due to an hypervisor error + if (user_mode(regs)) { + if (IS_ENABLED(CONFIG_PPC_SUBPAGE_PROT) && err == -2) + _exception(SIGSEGV, regs, SEGV_ACCERR, ea); + else + _exception(SIGBUS, regs, BUS_ADRERR, ea); + } else { + bad_page_fault(regs, SIGBUS); + } + err = 0; + } + + return err; +} + +/* + * The _RAW interrupt entry checks for the in_nmi() case before + * running the full handler. + */ +DEFINE_INTERRUPT_HANDLER_RAW(do_hash_fault) +{ + unsigned long dsisr = regs->dsisr; + long err; + + if (unlikely(dsisr & (DSISR_BAD_FAULT_64S | DSISR_KEYFAULT))) + goto page_fault; + + /* + * If we are in an "NMI" (e.g., an interrupt when soft-disabled), then + * don't call hash_page, just fail the fault. This is required to + * prevent re-entrancy problems in the hash code, namely perf + * interrupts hitting while something holds H_PAGE_BUSY, and taking a + * hash fault. See the comment in hash_preload(). + * + * We come here as a result of a DSI at a point where we don't want + * to call hash_page, such as when we are accessing memory (possibly + * user memory) inside a PMU interrupt that occurred while interrupts + * were soft-disabled. We want to invoke the exception handler for + * the access, or panic if there isn't a handler. + */ + if (unlikely(in_nmi())) { + do_bad_page_fault_segv(regs); + return 0; + } + + err = __do_hash_fault(regs); + if (err) { +page_fault: + err = hash__do_page_fault(regs); + } + + return err; } #ifdef CONFIG_PPC_MM_SLICES @@ -1843,27 +1901,6 @@ void flush_hash_range(unsigned long number, int local) } } -/* - * low_hash_fault is called when we the low level hash code failed - * to instert a PTE due to an hypervisor error - */ -void low_hash_fault(struct pt_regs *regs, unsigned long address, int rc) -{ - enum ctx_state prev_state = exception_enter(); - - if (user_mode(regs)) { -#ifdef CONFIG_PPC_SUBPAGE_PROT - if (rc == -2) - _exception(SIGSEGV, regs, SEGV_ACCERR, address); - else -#endif - _exception(SIGBUS, regs, BUS_ADRERR, address); - } else - bad_page_fault(regs, address, SIGBUS); - - exception_exit(prev_state); -} - long hpte_insert_repeating(unsigned long hash, unsigned long vpn, unsigned long pa, unsigned long rflags, unsigned long vflags, int psize, int ssize) diff --git a/arch/powerpc/mm/book3s64/internal.h b/arch/powerpc/mm/book3s64/internal.h index c12d78ee42f5..5045048ce244 100644 --- a/arch/powerpc/mm/book3s64/internal.h +++ b/arch/powerpc/mm/book3s64/internal.h @@ -15,4 +15,6 @@ static inline bool stress_slb(void) void slb_setup_new_exec(void); +void exit_lazy_flush_tlb(struct mm_struct *mm, bool always_flush); + #endif /* ARCH_POWERPC_MM_BOOK3S64_INTERNAL_H */ diff --git a/arch/powerpc/mm/book3s64/iommu_api.c b/arch/powerpc/mm/book3s64/iommu_api.c index 685d7bb3d26f..cd18e94d0843 100644 --- a/arch/powerpc/mm/book3s64/iommu_api.c +++ b/arch/powerpc/mm/book3s64/iommu_api.c @@ -129,7 +129,8 @@ good_exit: mutex_lock(&mem_list_mutex); - list_for_each_entry_rcu(mem2, &mm->context.iommu_group_mem_list, next) { + list_for_each_entry_rcu(mem2, &mm->context.iommu_group_mem_list, next, + lockdep_is_held(&mem_list_mutex)) { /* Overlap? */ if ((mem2->ua < (ua + (entries << PAGE_SHIFT))) && (ua < (mem2->ua + @@ -289,6 +290,7 @@ struct mm_iommu_table_group_mem_t *mm_iommu_lookup(struct mm_struct *mm, { struct mm_iommu_table_group_mem_t *mem, *ret = NULL; + rcu_read_lock(); list_for_each_entry_rcu(mem, &mm->context.iommu_group_mem_list, next) { if ((mem->ua <= ua) && (ua + size <= mem->ua + @@ -297,6 +299,7 @@ struct mm_iommu_table_group_mem_t *mm_iommu_lookup(struct mm_struct *mm, break; } } + rcu_read_unlock(); return ret; } @@ -327,7 +330,8 @@ struct mm_iommu_table_group_mem_t *mm_iommu_get(struct mm_struct *mm, mutex_lock(&mem_list_mutex); - list_for_each_entry_rcu(mem, &mm->context.iommu_group_mem_list, next) { + list_for_each_entry_rcu(mem, &mm->context.iommu_group_mem_list, next, + lockdep_is_held(&mem_list_mutex)) { if ((mem->ua == ua) && (mem->entries == entries)) { ret = mem; ++mem->used; @@ -421,6 +425,7 @@ bool mm_iommu_is_devmem(struct mm_struct *mm, unsigned long hpa, struct mm_iommu_table_group_mem_t *mem; unsigned long end; + rcu_read_lock(); list_for_each_entry_rcu(mem, &mm->context.iommu_group_mem_list, next) { if (mem->dev_hpa == MM_IOMMU_TABLE_INVALID_HPA) continue; @@ -437,6 +442,7 @@ bool mm_iommu_is_devmem(struct mm_struct *mm, unsigned long hpa, return true; } } + rcu_read_unlock(); return false; } diff --git a/arch/powerpc/mm/book3s64/pgtable.c b/arch/powerpc/mm/book3s64/pgtable.c index 5b3a3bae21aa..9ffa65074cb0 100644 --- a/arch/powerpc/mm/book3s64/pgtable.c +++ b/arch/powerpc/mm/book3s64/pgtable.c @@ -20,6 +20,8 @@ #include <mm/mmu_decl.h> #include <trace/events/thp.h> +#include "internal.h" + unsigned long __pmd_frag_nr; EXPORT_SYMBOL(__pmd_frag_nr); unsigned long __pmd_frag_size_shift; @@ -79,10 +81,15 @@ void set_pmd_at(struct mm_struct *mm, unsigned long addr, return set_pte_at(mm, addr, pmdp_ptep(pmdp), pmd_pte(pmd)); } -static void do_nothing(void *unused) +static void do_serialize(void *arg) { - + /* We've taken the IPI, so try to trim the mask while here */ + if (radix_enabled()) { + struct mm_struct *mm = arg; + exit_lazy_flush_tlb(mm, false); + } } + /* * Serialize against find_current_mm_pte which does lock-less * lookup in page tables with local interrupts disabled. For huge pages @@ -96,7 +103,7 @@ static void do_nothing(void *unused) void serialize_against_pte_lookup(struct mm_struct *mm) { smp_mb(); - smp_call_function_many(mm_cpumask(mm), do_nothing, NULL, 1); + smp_call_function_many(mm_cpumask(mm), do_serialize, mm, 1); } /* diff --git a/arch/powerpc/mm/book3s64/radix_tlb.c b/arch/powerpc/mm/book3s64/radix_tlb.c index fb66d154b26c..409e61210789 100644 --- a/arch/powerpc/mm/book3s64/radix_tlb.c +++ b/arch/powerpc/mm/book3s64/radix_tlb.c @@ -18,6 +18,8 @@ #include <asm/cputhreads.h> #include <asm/plpar_wrappers.h> +#include "internal.h" + #define RIC_FLUSH_TLB 0 #define RIC_FLUSH_PWC 1 #define RIC_FLUSH_ALL 2 @@ -627,15 +629,6 @@ void radix__local_flush_tlb_page(struct vm_area_struct *vma, unsigned long vmadd } EXPORT_SYMBOL(radix__local_flush_tlb_page); -static bool mm_is_singlethreaded(struct mm_struct *mm) -{ - if (atomic_read(&mm->context.copros) > 0) - return false; - if (atomic_read(&mm->mm_users) <= 1 && current->mm == mm) - return true; - return false; -} - static bool mm_needs_flush_escalation(struct mm_struct *mm) { /* @@ -648,21 +641,24 @@ static bool mm_needs_flush_escalation(struct mm_struct *mm) return false; } -#ifdef CONFIG_SMP -static void do_exit_flush_lazy_tlb(void *arg) +/* + * If always_flush is true, then flush even if this CPU can't be removed + * from mm_cpumask. + */ +void exit_lazy_flush_tlb(struct mm_struct *mm, bool always_flush) { - struct mm_struct *mm = arg; unsigned long pid = mm->context.id; + int cpu = smp_processor_id(); /* * A kthread could have done a mmget_not_zero() after the flushing CPU - * checked mm_is_singlethreaded, and be in the process of - * kthread_use_mm when interrupted here. In that case, current->mm will - * be set to mm, because kthread_use_mm() setting ->mm and switching to - * the mm is done with interrupts off. + * checked mm_cpumask, and be in the process of kthread_use_mm when + * interrupted here. In that case, current->mm will be set to mm, + * because kthread_use_mm() setting ->mm and switching to the mm is + * done with interrupts off. */ if (current->mm == mm) - goto out_flush; + goto out; if (current->active_mm == mm) { WARN_ON_ONCE(current->mm != NULL); @@ -673,11 +669,30 @@ static void do_exit_flush_lazy_tlb(void *arg) mmdrop(mm); } - atomic_dec(&mm->context.active_cpus); - cpumask_clear_cpu(smp_processor_id(), mm_cpumask(mm)); + /* + * This IPI may be initiated from any source including those not + * running the mm, so there may be a racing IPI that comes after + * this one which finds the cpumask already clear. Check and avoid + * underflowing the active_cpus count in that case. The race should + * not otherwise be a problem, but the TLB must be flushed because + * that's what the caller expects. + */ + if (cpumask_test_cpu(cpu, mm_cpumask(mm))) { + atomic_dec(&mm->context.active_cpus); + cpumask_clear_cpu(cpu, mm_cpumask(mm)); + always_flush = true; + } -out_flush: - _tlbiel_pid(pid, RIC_FLUSH_ALL); +out: + if (always_flush) + _tlbiel_pid(pid, RIC_FLUSH_ALL); +} + +#ifdef CONFIG_SMP +static void do_exit_flush_lazy_tlb(void *arg) +{ + struct mm_struct *mm = arg; + exit_lazy_flush_tlb(mm, true); } static void exit_flush_lazy_tlbs(struct mm_struct *mm) @@ -693,9 +708,110 @@ static void exit_flush_lazy_tlbs(struct mm_struct *mm) (void *)mm, 1); } +#else /* CONFIG_SMP */ +static inline void exit_flush_lazy_tlbs(struct mm_struct *mm) { } +#endif /* CONFIG_SMP */ + +static DEFINE_PER_CPU(unsigned int, mm_cpumask_trim_clock); + +/* + * Interval between flushes at which we send out IPIs to check whether the + * mm_cpumask can be trimmed for the case where it's not a single-threaded + * process flushing its own mm. The intent is to reduce the cost of later + * flushes. Don't want this to be so low that it adds noticable cost to TLB + * flushing, or so high that it doesn't help reduce global TLBIEs. + */ +static unsigned long tlb_mm_cpumask_trim_timer = 1073; + +static bool tick_and_test_trim_clock(void) +{ + if (__this_cpu_inc_return(mm_cpumask_trim_clock) == + tlb_mm_cpumask_trim_timer) { + __this_cpu_write(mm_cpumask_trim_clock, 0); + return true; + } + return false; +} + +enum tlb_flush_type { + FLUSH_TYPE_NONE, + FLUSH_TYPE_LOCAL, + FLUSH_TYPE_GLOBAL, +}; + +static enum tlb_flush_type flush_type_needed(struct mm_struct *mm, bool fullmm) +{ + int active_cpus = atomic_read(&mm->context.active_cpus); + int cpu = smp_processor_id(); + + if (active_cpus == 0) + return FLUSH_TYPE_NONE; + if (active_cpus == 1 && cpumask_test_cpu(cpu, mm_cpumask(mm))) { + if (current->mm != mm) { + /* + * Asynchronous flush sources may trim down to nothing + * if the process is not running, so occasionally try + * to trim. + */ + if (tick_and_test_trim_clock()) { + exit_lazy_flush_tlb(mm, true); + return FLUSH_TYPE_NONE; + } + } + return FLUSH_TYPE_LOCAL; + } + + /* Coprocessors require TLBIE to invalidate nMMU. */ + if (atomic_read(&mm->context.copros) > 0) + return FLUSH_TYPE_GLOBAL; + + /* + * In the fullmm case there's no point doing the exit_flush_lazy_tlbs + * because the mm is being taken down anyway, and a TLBIE tends to + * be faster than an IPI+TLBIEL. + */ + if (fullmm) + return FLUSH_TYPE_GLOBAL; + + /* + * If we are running the only thread of a single-threaded process, + * then we should almost always be able to trim off the rest of the + * CPU mask (except in the case of use_mm() races), so always try + * trimming the mask. + */ + if (atomic_read(&mm->mm_users) <= 1 && current->mm == mm) { + exit_flush_lazy_tlbs(mm); + /* + * use_mm() race could prevent IPIs from being able to clear + * the cpumask here, however those users are established + * after our first check (and so after the PTEs are removed), + * and the TLB still gets flushed by the IPI, so this CPU + * will only require a local flush. + */ + return FLUSH_TYPE_LOCAL; + } + + /* + * Occasionally try to trim down the cpumask. It's possible this can + * bring the mask to zero, which results in no flush. + */ + if (tick_and_test_trim_clock()) { + exit_flush_lazy_tlbs(mm); + if (current->mm == mm) + return FLUSH_TYPE_LOCAL; + if (cpumask_test_cpu(cpu, mm_cpumask(mm))) + exit_lazy_flush_tlb(mm, true); + return FLUSH_TYPE_NONE; + } + + return FLUSH_TYPE_GLOBAL; +} + +#ifdef CONFIG_SMP void radix__flush_tlb_mm(struct mm_struct *mm) { unsigned long pid; + enum tlb_flush_type type; pid = mm->context.id; if (unlikely(pid == MMU_NO_CONTEXT)) @@ -703,16 +819,15 @@ void radix__flush_tlb_mm(struct mm_struct *mm) preempt_disable(); /* - * Order loads of mm_cpumask vs previous stores to clear ptes before - * the invalidate. See barrier in switch_mm_irqs_off + * Order loads of mm_cpumask (in flush_type_needed) vs previous + * stores to clear ptes before the invalidate. See barrier in + * switch_mm_irqs_off */ smp_mb(); - if (!mm_is_thread_local(mm)) { - if (unlikely(mm_is_singlethreaded(mm))) { - exit_flush_lazy_tlbs(mm); - goto local; - } - + type = flush_type_needed(mm, false); + if (type == FLUSH_TYPE_LOCAL) { + _tlbiel_pid(pid, RIC_FLUSH_TLB); + } else if (type == FLUSH_TYPE_GLOBAL) { if (!mmu_has_feature(MMU_FTR_GTSE)) { unsigned long tgt = H_RPTI_TARGET_CMMU; @@ -728,9 +843,6 @@ void radix__flush_tlb_mm(struct mm_struct *mm) } else { _tlbiel_pid_multicast(mm, pid, RIC_FLUSH_TLB); } - } else { -local: - _tlbiel_pid(pid, RIC_FLUSH_TLB); } preempt_enable(); } @@ -739,6 +851,7 @@ EXPORT_SYMBOL(radix__flush_tlb_mm); static void __flush_all_mm(struct mm_struct *mm, bool fullmm) { unsigned long pid; + enum tlb_flush_type type; pid = mm->context.id; if (unlikely(pid == MMU_NO_CONTEXT)) @@ -746,13 +859,10 @@ static void __flush_all_mm(struct mm_struct *mm, bool fullmm) preempt_disable(); smp_mb(); /* see radix__flush_tlb_mm */ - if (!mm_is_thread_local(mm)) { - if (unlikely(mm_is_singlethreaded(mm))) { - if (!fullmm) { - exit_flush_lazy_tlbs(mm); - goto local; - } - } + type = flush_type_needed(mm, fullmm); + if (type == FLUSH_TYPE_LOCAL) { + _tlbiel_pid(pid, RIC_FLUSH_ALL); + } else if (type == FLUSH_TYPE_GLOBAL) { if (!mmu_has_feature(MMU_FTR_GTSE)) { unsigned long tgt = H_RPTI_TARGET_CMMU; unsigned long type = H_RPTI_TYPE_TLB | H_RPTI_TYPE_PWC | @@ -766,9 +876,6 @@ static void __flush_all_mm(struct mm_struct *mm, bool fullmm) _tlbie_pid(pid, RIC_FLUSH_ALL); else _tlbiel_pid_multicast(mm, pid, RIC_FLUSH_ALL); - } else { -local: - _tlbiel_pid(pid, RIC_FLUSH_ALL); } preempt_enable(); } @@ -783,6 +890,7 @@ void radix__flush_tlb_page_psize(struct mm_struct *mm, unsigned long vmaddr, int psize) { unsigned long pid; + enum tlb_flush_type type; pid = mm->context.id; if (unlikely(pid == MMU_NO_CONTEXT)) @@ -790,11 +898,10 @@ void radix__flush_tlb_page_psize(struct mm_struct *mm, unsigned long vmaddr, preempt_disable(); smp_mb(); /* see radix__flush_tlb_mm */ - if (!mm_is_thread_local(mm)) { - if (unlikely(mm_is_singlethreaded(mm))) { - exit_flush_lazy_tlbs(mm); - goto local; - } + type = flush_type_needed(mm, false); + if (type == FLUSH_TYPE_LOCAL) { + _tlbiel_va(vmaddr, pid, psize, RIC_FLUSH_TLB); + } else if (type == FLUSH_TYPE_GLOBAL) { if (!mmu_has_feature(MMU_FTR_GTSE)) { unsigned long tgt, pg_sizes, size; @@ -811,9 +918,6 @@ void radix__flush_tlb_page_psize(struct mm_struct *mm, unsigned long vmaddr, _tlbie_va(vmaddr, pid, psize, RIC_FLUSH_TLB); else _tlbiel_va_multicast(mm, vmaddr, pid, psize, RIC_FLUSH_TLB); - } else { -local: - _tlbiel_va(vmaddr, pid, psize, RIC_FLUSH_TLB); } preempt_enable(); } @@ -828,8 +932,6 @@ void radix__flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr) } EXPORT_SYMBOL(radix__flush_tlb_page); -#else /* CONFIG_SMP */ -static inline void exit_flush_lazy_tlbs(struct mm_struct *mm) { } #endif /* CONFIG_SMP */ static void do_tlbiel_kernel(void *info) @@ -893,7 +995,9 @@ static inline void __radix__flush_tlb_range(struct mm_struct *mm, unsigned int page_shift = mmu_psize_defs[mmu_virtual_psize].shift; unsigned long page_size = 1UL << page_shift; unsigned long nr_pages = (end - start) >> page_shift; - bool local, full; + bool fullmm = (end == TLB_FLUSH_ALL); + bool flush_pid; + enum tlb_flush_type type; pid = mm->context.id; if (unlikely(pid == MMU_NO_CONTEXT)) @@ -901,24 +1005,18 @@ static inline void __radix__flush_tlb_range(struct mm_struct *mm, preempt_disable(); smp_mb(); /* see radix__flush_tlb_mm */ - if (!mm_is_thread_local(mm)) { - if (unlikely(mm_is_singlethreaded(mm))) { - if (end != TLB_FLUSH_ALL) { - exit_flush_lazy_tlbs(mm); - goto is_local; - } - } - local = false; - full = (end == TLB_FLUSH_ALL || - nr_pages > tlb_single_page_flush_ceiling); - } else { -is_local: - local = true; - full = (end == TLB_FLUSH_ALL || - nr_pages > tlb_local_single_page_flush_ceiling); - } + type = flush_type_needed(mm, fullmm); + if (type == FLUSH_TYPE_NONE) + goto out; + + if (fullmm) + flush_pid = true; + else if (type == FLUSH_TYPE_GLOBAL) + flush_pid = nr_pages > tlb_single_page_flush_ceiling; + else + flush_pid = nr_pages > tlb_local_single_page_flush_ceiling; - if (!mmu_has_feature(MMU_FTR_GTSE) && !local) { + if (!mmu_has_feature(MMU_FTR_GTSE) && type == FLUSH_TYPE_GLOBAL) { unsigned long tgt = H_RPTI_TARGET_CMMU; unsigned long pg_sizes = psize_to_rpti_pgsize(mmu_virtual_psize); @@ -928,8 +1026,8 @@ is_local: tgt |= H_RPTI_TARGET_NMMU; pseries_rpt_invalidate(pid, tgt, H_RPTI_TYPE_TLB, pg_sizes, start, end); - } else if (full) { - if (local) { + } else if (flush_pid) { + if (type == FLUSH_TYPE_LOCAL) { _tlbiel_pid(pid, RIC_FLUSH_TLB); } else { if (cputlb_use_tlbie()) { @@ -952,7 +1050,7 @@ is_local: hflush = true; } - if (local) { + if (type == FLUSH_TYPE_LOCAL) { asm volatile("ptesync": : :"memory"); __tlbiel_va_range(start, end, pid, page_size, mmu_virtual_psize); if (hflush) @@ -974,6 +1072,7 @@ is_local: hstart, hend, pid, PMD_SIZE, MMU_PAGE_2M, false); } } +out: preempt_enable(); } @@ -1085,32 +1184,30 @@ static __always_inline void __radix__flush_tlb_range_psize(struct mm_struct *mm, unsigned int page_shift = mmu_psize_defs[psize].shift; unsigned long page_size = 1UL << page_shift; unsigned long nr_pages = (end - start) >> page_shift; - bool local, full; + bool fullmm = (end == TLB_FLUSH_ALL); + bool flush_pid; + enum tlb_flush_type type; pid = mm->context.id; if (unlikely(pid == MMU_NO_CONTEXT)) return; + fullmm = (end == TLB_FLUSH_ALL); + preempt_disable(); smp_mb(); /* see radix__flush_tlb_mm */ - if (!mm_is_thread_local(mm)) { - if (unlikely(mm_is_singlethreaded(mm))) { - if (end != TLB_FLUSH_ALL) { - exit_flush_lazy_tlbs(mm); - goto is_local; - } - } - local = false; - full = (end == TLB_FLUSH_ALL || - nr_pages > tlb_single_page_flush_ceiling); - } else { -is_local: - local = true; - full = (end == TLB_FLUSH_ALL || - nr_pages > tlb_local_single_page_flush_ceiling); - } + type = flush_type_needed(mm, fullmm); + if (type == FLUSH_TYPE_NONE) + goto out; + + if (fullmm) + flush_pid = true; + else if (type == FLUSH_TYPE_GLOBAL) + flush_pid = nr_pages > tlb_single_page_flush_ceiling; + else + flush_pid = nr_pages > tlb_local_single_page_flush_ceiling; - if (!mmu_has_feature(MMU_FTR_GTSE) && !local) { + if (!mmu_has_feature(MMU_FTR_GTSE) && type == FLUSH_TYPE_GLOBAL) { unsigned long tgt = H_RPTI_TARGET_CMMU; unsigned long type = H_RPTI_TYPE_TLB; unsigned long pg_sizes = psize_to_rpti_pgsize(psize); @@ -1120,8 +1217,8 @@ is_local: if (atomic_read(&mm->context.copros) > 0) tgt |= H_RPTI_TARGET_NMMU; pseries_rpt_invalidate(pid, tgt, type, pg_sizes, start, end); - } else if (full) { - if (local) { + } else if (flush_pid) { + if (type == FLUSH_TYPE_LOCAL) { _tlbiel_pid(pid, also_pwc ? RIC_FLUSH_ALL : RIC_FLUSH_TLB); } else { if (cputlb_use_tlbie()) { @@ -1137,7 +1234,7 @@ is_local: } } else { - if (local) + if (type == FLUSH_TYPE_LOCAL) _tlbiel_va_range(start, end, pid, page_size, psize, also_pwc); else if (cputlb_use_tlbie()) _tlbie_va_range(start, end, pid, page_size, psize, also_pwc); @@ -1145,6 +1242,7 @@ is_local: _tlbiel_va_range_multicast(mm, start, end, pid, page_size, psize, also_pwc); } +out: preempt_enable(); } @@ -1164,6 +1262,7 @@ static void radix__flush_tlb_pwc_range_psize(struct mm_struct *mm, unsigned long void radix__flush_tlb_collapsed_pmd(struct mm_struct *mm, unsigned long addr) { unsigned long pid, end; + enum tlb_flush_type type; pid = mm->context.id; if (unlikely(pid == MMU_NO_CONTEXT)) @@ -1180,11 +1279,10 @@ void radix__flush_tlb_collapsed_pmd(struct mm_struct *mm, unsigned long addr) /* Otherwise first do the PWC, then iterate the pages. */ preempt_disable(); smp_mb(); /* see radix__flush_tlb_mm */ - if (!mm_is_thread_local(mm)) { - if (unlikely(mm_is_singlethreaded(mm))) { - exit_flush_lazy_tlbs(mm); - goto local; - } + type = flush_type_needed(mm, false); + if (type == FLUSH_TYPE_LOCAL) { + _tlbiel_va_range(addr, end, pid, PAGE_SIZE, mmu_virtual_psize, true); + } else if (type == FLUSH_TYPE_GLOBAL) { if (!mmu_has_feature(MMU_FTR_GTSE)) { unsigned long tgt, type, pg_sizes; @@ -1202,9 +1300,6 @@ void radix__flush_tlb_collapsed_pmd(struct mm_struct *mm, unsigned long addr) else _tlbiel_va_range_multicast(mm, addr, end, pid, PAGE_SIZE, mmu_virtual_psize, true); - } else { -local: - _tlbiel_va_range(addr, end, pid, PAGE_SIZE, mmu_virtual_psize, true); } preempt_enable(); diff --git a/arch/powerpc/mm/book3s64/slb.c b/arch/powerpc/mm/book3s64/slb.c index 584567970c11..c91bd85eb90e 100644 --- a/arch/powerpc/mm/book3s64/slb.c +++ b/arch/powerpc/mm/book3s64/slb.c @@ -10,6 +10,7 @@ */ #include <asm/asm-prototypes.h> +#include <asm/interrupt.h> #include <asm/mmu.h> #include <asm/mmu_context.h> #include <asm/paca.h> @@ -813,8 +814,9 @@ static long slb_allocate_user(struct mm_struct *mm, unsigned long ea) return slb_insert_entry(ea, context, flags, ssize, false); } -long do_slb_fault(struct pt_regs *regs, unsigned long ea) +DEFINE_INTERRUPT_HANDLER_RAW(do_slb_fault) { + unsigned long ea = regs->dar; unsigned long id = get_region_id(ea); /* IRQs are not reconciled here, so can't check irqs_disabled */ @@ -824,19 +826,21 @@ long do_slb_fault(struct pt_regs *regs, unsigned long ea) return -EINVAL; /* - * SLB kernel faults must be very careful not to touch anything - * that is not bolted. E.g., PACA and global variables are okay, - * mm->context stuff is not. - * - * SLB user faults can access all of kernel memory, but must be - * careful not to touch things like IRQ state because it is not - * "reconciled" here. The difficulty is that we must use - * fast_exception_return to return from kernel SLB faults without - * looking at possible non-bolted memory. We could test user vs - * kernel faults in the interrupt handler asm and do a full fault, - * reconcile, ret_from_except for user faults which would make them - * first class kernel code. But for performance it's probably nicer - * if they go via fast_exception_return too. + * SLB kernel faults must be very careful not to touch anything that is + * not bolted. E.g., PACA and global variables are okay, mm->context + * stuff is not. SLB user faults may access all of memory (and induce + * one recursive SLB kernel fault), so the kernel fault must not + * trample on the user fault state at those points. + */ + + /* + * This is a raw interrupt handler, for performance, so that + * fast_interrupt_return can be used. The handler must not touch local + * irq state, or schedule. We could test for usermode and upgrade to a + * normal process context (synchronous) interrupt for those, which + * would make them first-class kernel code and able to be traced and + * instrumented, although performance would suffer a bit, it would + * probably be a good tradeoff. */ if (id >= LINEAR_MAP_REGION_ID) { long err; @@ -865,13 +869,15 @@ long do_slb_fault(struct pt_regs *regs, unsigned long ea) } } -void do_bad_slb_fault(struct pt_regs *regs, unsigned long ea, long err) +DEFINE_INTERRUPT_HANDLER(do_bad_slb_fault) { + int err = regs->result; + if (err == -EFAULT) { if (user_mode(regs)) - _exception(SIGSEGV, regs, SEGV_BNDERR, ea); + _exception(SIGSEGV, regs, SEGV_BNDERR, regs->dar); else - bad_page_fault(regs, ea, SIGSEGV); + bad_page_fault(regs, SIGSEGV); } else if (err == -EINVAL) { unrecoverable_exception(regs); } else { diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index 8961b44f350c..bb368257b55c 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -34,6 +34,7 @@ #include <linux/uaccess.h> #include <asm/firmware.h> +#include <asm/interrupt.h> #include <asm/page.h> #include <asm/mmu.h> #include <asm/mmu_context.h> @@ -377,18 +378,16 @@ static void sanity_check_fault(bool is_write, bool is_user, /* * For 600- and 800-family processors, the error_code parameter is DSISR - * for a data fault, SRR1 for an instruction fault. For 400-family processors - * the error_code parameter is ESR for a data fault, 0 for an instruction - * fault. - * For 64-bit processors, the error_code parameter is - * - DSISR for a non-SLB data access fault, - * - SRR1 & 0x08000000 for a non-SLB instruction access fault - * - 0 any SLB fault. + * for a data fault, SRR1 for an instruction fault. + * For 400-family processors the error_code parameter is ESR for a data fault, + * 0 for an instruction fault. + * For 64-bit processors, the error_code parameter is DSISR for a data access + * fault, SRR1 & 0x08000000 for an instruction access fault. * * The return value is 0 if the fault was handled, or the signal * number if this is a kernel fault that can't be handled here. */ -static int __do_page_fault(struct pt_regs *regs, unsigned long address, +static int ___do_page_fault(struct pt_regs *regs, unsigned long address, unsigned long error_code) { struct vm_area_struct * vma; @@ -435,9 +434,7 @@ static int __do_page_fault(struct pt_regs *regs, unsigned long address, return bad_area_nosemaphore(regs, address); } - /* We restore the interrupt state now */ - if (!arch_irq_disabled_regs(regs)) - local_irq_enable(); + interrupt_cond_local_irq_enable(regs); perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address); @@ -540,34 +537,51 @@ retry: return 0; } -NOKPROBE_SYMBOL(__do_page_fault); +NOKPROBE_SYMBOL(___do_page_fault); -int do_page_fault(struct pt_regs *regs, unsigned long address, - unsigned long error_code) +static long __do_page_fault(struct pt_regs *regs) { const struct exception_table_entry *entry; - enum ctx_state prev_state = exception_enter(); - int rc = __do_page_fault(regs, address, error_code); - exception_exit(prev_state); - if (likely(!rc)) - return 0; + long err; + + err = ___do_page_fault(regs, regs->dar, regs->dsisr); + if (likely(!err)) + return err; entry = search_exception_tables(regs->nip); - if (unlikely(!entry)) - return rc; + if (likely(entry)) { + instruction_pointer_set(regs, extable_fixup(entry)); + return 0; + } else if (IS_ENABLED(CONFIG_PPC_BOOK3S_64)) { + __bad_page_fault(regs, err); + return 0; + } else { + /* 32 and 64e handle the bad page fault in asm */ + return err; + } +} +NOKPROBE_SYMBOL(__do_page_fault); - instruction_pointer_set(regs, extable_fixup(entry)); +DEFINE_INTERRUPT_HANDLER_RET(do_page_fault) +{ + return __do_page_fault(regs); +} - return 0; +#ifdef CONFIG_PPC_BOOK3S_64 +/* Same as do_page_fault but interrupt entry has already run in do_hash_fault */ +long hash__do_page_fault(struct pt_regs *regs) +{ + return __do_page_fault(regs); } -NOKPROBE_SYMBOL(do_page_fault); +NOKPROBE_SYMBOL(hash__do_page_fault); +#endif /* * bad_page_fault is called when we have a bad access from the kernel. * It is called from the DSI and ISI handlers in head.S and from some * of the procedures in traps.c. */ -void __bad_page_fault(struct pt_regs *regs, unsigned long address, int sig) +void __bad_page_fault(struct pt_regs *regs, int sig) { int is_write = page_fault_is_write(regs->dsisr); @@ -605,7 +619,7 @@ void __bad_page_fault(struct pt_regs *regs, unsigned long address, int sig) die("Kernel access of bad area", regs, sig); } -void bad_page_fault(struct pt_regs *regs, unsigned long address, int sig) +void bad_page_fault(struct pt_regs *regs, int sig) { const struct exception_table_entry *entry; @@ -614,5 +628,12 @@ void bad_page_fault(struct pt_regs *regs, unsigned long address, int sig) if (entry) instruction_pointer_set(regs, extable_fixup(entry)); else - __bad_page_fault(regs, address, sig); + __bad_page_fault(regs, sig); } + +#ifdef CONFIG_PPC_BOOK3S_64 +DEFINE_INTERRUPT_HANDLER(do_bad_page_fault_segv) +{ + bad_page_fault(regs, SIGSEGV); +} +#endif diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index 8b3cc4d688e8..d142b76d507d 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c @@ -217,7 +217,7 @@ void __init pseries_add_gpage(u64 addr, u64 page_size, unsigned long number_of_p } } -int __init pseries_alloc_bootmem_huge_page(struct hstate *hstate) +static int __init pseries_alloc_bootmem_huge_page(struct hstate *hstate) { struct huge_bootmem_page *m; if (nr_gpages == 0) @@ -663,24 +663,6 @@ static int __init hugetlbpage_init(void) arch_initcall(hugetlbpage_init); -void flush_dcache_icache_hugepage(struct page *page) -{ - int i; - void *start; - - BUG_ON(!PageCompound(page)); - - for (i = 0; i < compound_nr(page); i++) { - if (!PageHighMem(page)) { - __flush_dcache_icache(page_address(page+i)); - } else { - start = kmap_atomic(page+i); - __flush_dcache_icache(start); - kunmap_atomic(start); - } - } -} - void __init gigantic_hugetlb_cma_reserve(void) { unsigned long order = 0; diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index afab328d0887..4e8ce6d85232 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -91,27 +91,6 @@ int __weak remove_section_mapping(unsigned long start, unsigned long end) return -ENODEV; } -#define FLUSH_CHUNK_SIZE SZ_1G -/** - * flush_dcache_range_chunked(): Write any modified data cache blocks out to - * memory and invalidate them, in chunks of up to FLUSH_CHUNK_SIZE - * Does not invalidate the corresponding instruction cache blocks. - * - * @start: the start address - * @stop: the stop address (exclusive) - * @chunk: the max size of the chunks - */ -static void flush_dcache_range_chunked(unsigned long start, unsigned long stop, - unsigned long chunk) -{ - unsigned long i; - - for (i = start; i < stop; i += chunk) { - flush_dcache_range(i, min(stop, i + chunk)); - cond_resched(); - } -} - int __ref arch_create_linear_mapping(int nid, u64 start, u64 size, struct mhp_params *params) { @@ -136,7 +115,6 @@ void __ref arch_remove_linear_mapping(u64 start, u64 size) /* Remove htab bolted mappings for this section of memory */ start = (unsigned long)__va(start); - flush_dcache_range_chunked(start, start + size, FLUSH_CHUNK_SIZE); mutex_lock(&linear_mapping_mutex); ret = remove_section_mapping(start, start + size); @@ -489,19 +467,35 @@ void flush_dcache_page(struct page *page) if (cpu_has_feature(CPU_FTR_COHERENT_ICACHE)) return; /* avoid an atomic op if possible */ - if (test_bit(PG_arch_1, &page->flags)) - clear_bit(PG_arch_1, &page->flags); + if (test_bit(PG_dcache_clean, &page->flags)) + clear_bit(PG_dcache_clean, &page->flags); } EXPORT_SYMBOL(flush_dcache_page); -void flush_dcache_icache_page(struct page *page) +static void flush_dcache_icache_hugepage(struct page *page) { -#ifdef CONFIG_HUGETLB_PAGE - if (PageCompound(page)) { - flush_dcache_icache_hugepage(page); - return; + int i; + void *start; + + BUG_ON(!PageCompound(page)); + + for (i = 0; i < compound_nr(page); i++) { + if (!PageHighMem(page)) { + __flush_dcache_icache(page_address(page+i)); + } else { + start = kmap_atomic(page+i); + __flush_dcache_icache(start); + kunmap_atomic(start); + } } -#endif +} + +void flush_dcache_icache_page(struct page *page) +{ + + if (PageCompound(page)) + return flush_dcache_icache_hugepage(page); + #if defined(CONFIG_PPC_8xx) || defined(CONFIG_PPC64) /* On 8xx there is no need to kmap since highmem is not supported */ __flush_dcache_icache(page_address(page)); diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c index 15555c95cebc..354611940118 100644 --- a/arch/powerpc/mm/pgtable.c +++ b/arch/powerpc/mm/pgtable.c @@ -26,6 +26,7 @@ #include <asm/tlbflush.h> #include <asm/tlb.h> #include <asm/hugetlb.h> +#include <asm/pte-walk.h> static inline int is_exec_fault(void) { @@ -81,9 +82,9 @@ static pte_t set_pte_filter_hash(pte_t pte) struct page *pg = maybe_pte_to_page(pte); if (!pg) return pte; - if (!test_bit(PG_arch_1, &pg->flags)) { + if (!test_bit(PG_dcache_clean, &pg->flags)) { flush_dcache_icache_page(pg); - set_bit(PG_arch_1, &pg->flags); + set_bit(PG_dcache_clean, &pg->flags); } } return pte; @@ -116,13 +117,13 @@ static inline pte_t set_pte_filter(pte_t pte) return pte; /* If the page clean, we move on */ - if (test_bit(PG_arch_1, &pg->flags)) + if (test_bit(PG_dcache_clean, &pg->flags)) return pte; /* If it's an exec fault, we flush the cache and make it clean */ if (is_exec_fault()) { flush_dcache_icache_page(pg); - set_bit(PG_arch_1, &pg->flags); + set_bit(PG_dcache_clean, &pg->flags); return pte; } @@ -161,12 +162,12 @@ static pte_t set_access_flags_filter(pte_t pte, struct vm_area_struct *vma, goto bail; /* If the page is already clean, we move on */ - if (test_bit(PG_arch_1, &pg->flags)) + if (test_bit(PG_dcache_clean, &pg->flags)) goto bail; - /* Clean the page and set PG_arch_1 */ + /* Clean the page and set PG_dcache_clean */ flush_dcache_icache_page(pg); - set_bit(PG_arch_1, &pg->flags); + set_bit(PG_dcache_clean, &pg->flags); bail: return pte_mkexec(pte); diff --git a/arch/powerpc/mm/ptdump/segment_regs.c b/arch/powerpc/mm/ptdump/segment_regs.c index dde2fe8de4b2..565048a0c9be 100644 --- a/arch/powerpc/mm/ptdump/segment_regs.c +++ b/arch/powerpc/mm/ptdump/segment_regs.c @@ -10,7 +10,7 @@ static void seg_show(struct seq_file *m, int i) { - u32 val = mfsrin(i << 28); + u32 val = mfsr(i << 28); seq_printf(m, "0x%01x0000000-0x%01xfffffff ", i, i); seq_printf(m, "Kern key %d ", (val >> 30) & 1); diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c index 869d999a836e..6817331e22ff 100644 --- a/arch/powerpc/perf/core-book3s.c +++ b/arch/powerpc/perf/core-book3s.c @@ -54,6 +54,9 @@ struct cpu_hw_events { struct perf_branch_stack bhrb_stack; struct perf_branch_entry bhrb_entries[BHRB_MAX_ENTRIES]; u64 ic_init; + + /* Store the PMC values */ + unsigned long pmcs[MAX_HWEVENTS]; }; static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events); @@ -110,10 +113,6 @@ static inline void perf_read_regs(struct pt_regs *regs) { regs->result = 0; } -static inline int perf_intr_is_nmi(struct pt_regs *regs) -{ - return 0; -} static inline int siar_valid(struct pt_regs *regs) { @@ -147,6 +146,17 @@ bool is_sier_available(void) return false; } +/* + * Return PMC value corresponding to the + * index passed. + */ +unsigned long get_pmcs_ext_regs(int idx) +{ + struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events); + + return cpuhw->pmcs[idx]; +} + static bool regs_use_siar(struct pt_regs *regs) { /* @@ -354,15 +364,6 @@ static inline void perf_read_regs(struct pt_regs *regs) } /* - * If interrupts were soft-disabled when a PMU interrupt occurs, treat - * it as an NMI. - */ -static inline int perf_intr_is_nmi(struct pt_regs *regs) -{ - return (regs->softe & IRQS_DISABLED); -} - -/* * On processors like P7+ that have the SIAR-Valid bit, marked instructions * must be sampled only if the SIAR-valid bit is set. * @@ -915,7 +916,7 @@ void perf_event_print_debug(void) */ static int power_check_constraints(struct cpu_hw_events *cpuhw, u64 event_id[], unsigned int cflags[], - int n_ev) + int n_ev, struct perf_event **event) { unsigned long mask, value, nv; unsigned long smasks[MAX_HWEVENTS], svalues[MAX_HWEVENTS]; @@ -938,7 +939,7 @@ static int power_check_constraints(struct cpu_hw_events *cpuhw, event_id[i] = cpuhw->alternatives[i][0]; } if (ppmu->get_constraint(event_id[i], &cpuhw->amasks[i][0], - &cpuhw->avalues[i][0])) + &cpuhw->avalues[i][0], event[i]->attr.config1)) return -1; } value = mask = 0; @@ -973,7 +974,8 @@ static int power_check_constraints(struct cpu_hw_events *cpuhw, for (j = 1; j < n_alt[i]; ++j) ppmu->get_constraint(cpuhw->alternatives[i][j], &cpuhw->amasks[i][j], - &cpuhw->avalues[i][j]); + &cpuhw->avalues[i][j], + event[i]->attr.config1); } /* enumerate all possibilities and see if any will work */ @@ -1391,7 +1393,7 @@ static void power_pmu_enable(struct pmu *pmu) memset(&cpuhw->mmcr, 0, sizeof(cpuhw->mmcr)); if (ppmu->compute_mmcr(cpuhw->events, cpuhw->n_events, hwc_index, - &cpuhw->mmcr, cpuhw->event)) { + &cpuhw->mmcr, cpuhw->event, ppmu->flags)) { /* shouldn't ever get here */ printk(KERN_ERR "oops compute_mmcr failed\n"); goto out; @@ -1579,7 +1581,7 @@ static int power_pmu_add(struct perf_event *event, int ef_flags) if (check_excludes(cpuhw->event, cpuhw->flags, n0, 1)) goto out; - if (power_check_constraints(cpuhw, cpuhw->events, cpuhw->flags, n0 + 1)) + if (power_check_constraints(cpuhw, cpuhw->events, cpuhw->flags, n0 + 1, cpuhw->event)) goto out; event->hw.config = cpuhw->events[n0]; @@ -1789,7 +1791,7 @@ static int power_pmu_commit_txn(struct pmu *pmu) n = cpuhw->n_events; if (check_excludes(cpuhw->event, cpuhw->flags, 0, n)) return -EAGAIN; - i = power_check_constraints(cpuhw, cpuhw->events, cpuhw->flags, n); + i = power_check_constraints(cpuhw, cpuhw->events, cpuhw->flags, n, cpuhw->event); if (i < 0) return -EAGAIN; @@ -2027,7 +2029,7 @@ static int power_pmu_event_init(struct perf_event *event) local_irq_save(irq_flags); cpuhw = this_cpu_ptr(&cpu_hw_events); - err = power_check_constraints(cpuhw, events, cflags, n + 1); + err = power_check_constraints(cpuhw, events, cflags, n + 1, ctrs); if (has_branch_stack(event)) { u64 bhrb_filter = -1; @@ -2149,7 +2151,17 @@ static void record_and_restart(struct perf_event *event, unsigned long val, left += period; if (left <= 0) left = period; - record = siar_valid(regs); + + /* + * If address is not requested in the sample via + * PERF_SAMPLE_IP, just record that sample irrespective + * of SIAR valid check. + */ + if (event->attr.sample_type & PERF_SAMPLE_IP) + record = siar_valid(regs); + else + record = 1; + event->hw.last_period = event->hw.sample_period; } if (left < 0x80000000LL) @@ -2167,9 +2179,10 @@ static void record_and_restart(struct perf_event *event, unsigned long val, * MMCR2. Check attr.exclude_kernel and address to drop the sample in * these cases. */ - if (event->attr.exclude_kernel && record) - if (is_kernel_addr(mfspr(SPRN_SIAR))) - record = 0; + if (event->attr.exclude_kernel && + (event->attr.sample_type & PERF_SAMPLE_IP) && + is_kernel_addr(mfspr(SPRN_SIAR))) + record = 0; /* * Finally record data if requested. @@ -2277,9 +2290,7 @@ static void __perf_event_interrupt(struct pt_regs *regs) int i, j; struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events); struct perf_event *event; - unsigned long val[8]; int found, active; - int nmi; if (cpuhw->n_limited) freeze_limited_counters(cpuhw, mfspr(SPRN_PMC5), @@ -2287,26 +2298,14 @@ static void __perf_event_interrupt(struct pt_regs *regs) perf_read_regs(regs); - /* - * If perf interrupts hit in a local_irq_disable (soft-masked) region, - * we consider them as NMIs. This is required to prevent hash faults on - * user addresses when reading callchains. See the NMI test in - * do_hash_page. - */ - nmi = perf_intr_is_nmi(regs); - if (nmi) - nmi_enter(); - else - irq_enter(); - /* Read all the PMCs since we'll need them a bunch of times */ for (i = 0; i < ppmu->n_counter; ++i) - val[i] = read_pmc(i + 1); + cpuhw->pmcs[i] = read_pmc(i + 1); /* Try to find what caused the IRQ */ found = 0; for (i = 0; i < ppmu->n_counter; ++i) { - if (!pmc_overflow(val[i])) + if (!pmc_overflow(cpuhw->pmcs[i])) continue; if (is_limited_pmc(i + 1)) continue; /* these won't generate IRQs */ @@ -2321,7 +2320,7 @@ static void __perf_event_interrupt(struct pt_regs *regs) event = cpuhw->event[j]; if (event->hw.idx == (i + 1)) { active = 1; - record_and_restart(event, val[i], regs); + record_and_restart(event, cpuhw->pmcs[i], regs); break; } } @@ -2335,17 +2334,17 @@ static void __perf_event_interrupt(struct pt_regs *regs) event = cpuhw->event[i]; if (!event->hw.idx || is_limited_pmc(event->hw.idx)) continue; - if (pmc_overflow_power7(val[event->hw.idx - 1])) { + if (pmc_overflow_power7(cpuhw->pmcs[event->hw.idx - 1])) { /* event has overflowed in a buggy way*/ found = 1; record_and_restart(event, - val[event->hw.idx - 1], + cpuhw->pmcs[event->hw.idx - 1], regs); } } } - if (!found && !nmi && printk_ratelimit()) - printk(KERN_WARNING "Can't find PMC that caused IRQ\n"); + if (unlikely(!found) && !arch_irq_disabled_regs(regs)) + printk_ratelimited(KERN_WARNING "Can't find PMC that caused IRQ\n"); /* * Reset MMCR0 to its normal value. This will set PMXE and @@ -2356,10 +2355,9 @@ static void __perf_event_interrupt(struct pt_regs *regs) */ write_mmcr0(cpuhw, cpuhw->mmcr.mmcr0); - if (nmi) - nmi_exit(); - else - irq_exit(); + /* Clear the cpuhw->pmcs */ + memset(&cpuhw->pmcs, 0, sizeof(cpuhw->pmcs)); + } static void perf_event_interrupt(struct pt_regs *regs) diff --git a/arch/powerpc/perf/core-fsl-emb.c b/arch/powerpc/perf/core-fsl-emb.c index e0e7e276bfd2..ee721f420a7b 100644 --- a/arch/powerpc/perf/core-fsl-emb.c +++ b/arch/powerpc/perf/core-fsl-emb.c @@ -31,19 +31,6 @@ static atomic_t num_events; /* Used to avoid races in calling reserve/release_pmc_hardware */ static DEFINE_MUTEX(pmc_reserve_mutex); -/* - * If interrupts were soft-disabled when a PMU interrupt occurs, treat - * it as an NMI. - */ -static inline int perf_intr_is_nmi(struct pt_regs *regs) -{ -#ifdef __powerpc64__ - return (regs->softe & IRQS_DISABLED); -#else - return 0; -#endif -} - static void perf_event_interrupt(struct pt_regs *regs); /* @@ -659,13 +646,6 @@ static void perf_event_interrupt(struct pt_regs *regs) struct perf_event *event; unsigned long val; int found = 0; - int nmi; - - nmi = perf_intr_is_nmi(regs); - if (nmi) - nmi_enter(); - else - irq_enter(); for (i = 0; i < ppmu->n_counter; ++i) { event = cpuhw->event[i]; @@ -690,11 +670,6 @@ static void perf_event_interrupt(struct pt_regs *regs) mtmsr(mfmsr() | MSR_PMM); mtpmr(PMRN_PMGC0, PMGC0_PMIE | PMGC0_FCECE); isync(); - - if (nmi) - nmi_exit(); - else - irq_exit(); } void hw_perf_event_setup(int cpu) diff --git a/arch/powerpc/perf/hv-24x7.c b/arch/powerpc/perf/hv-24x7.c index 6e7e820508df..e5eb33255066 100644 --- a/arch/powerpc/perf/hv-24x7.c +++ b/arch/powerpc/perf/hv-24x7.c @@ -764,6 +764,14 @@ static ssize_t catalog_event_len_validate(struct hv_24x7_event_data *event, return ev_len; } +/* + * Return true incase of invalid or dummy events with names like RESERVED* + */ +static bool ignore_event(const char *name) +{ + return strncmp(name, "RESERVED", 8) == 0; +} + #define MAX_4K (SIZE_MAX / 4096) static int create_events_from_catalog(struct attribute ***events_, @@ -894,6 +902,10 @@ static int create_events_from_catalog(struct attribute ***events_, name = event_name(event, &nl); + if (ignore_event(name)) { + junk_events++; + continue; + } if (event->event_group_record_len == 0) { pr_devel("invalid event %zu (%.*s): group_record_len == 0, skipping\n", event_idx, nl, name); @@ -955,6 +967,9 @@ static int create_events_from_catalog(struct attribute ***events_, continue; name = event_name(event, &nl); + if (ignore_event(name)) + continue; + nonce = event_uniq_add(&ev_uniq, name, nl, event->domain); ct = event_data_to_attrs(event_idx, events + event_attr_ct, event, nonce); diff --git a/arch/powerpc/perf/isa207-common.c b/arch/powerpc/perf/isa207-common.c index 6ab5b272090a..e4f577da33d8 100644 --- a/arch/powerpc/perf/isa207-common.c +++ b/arch/powerpc/perf/isa207-common.c @@ -108,12 +108,57 @@ static void mmcra_sdar_mode(u64 event, unsigned long *mmcra) *mmcra |= MMCRA_SDAR_MODE_TLB; } +static u64 p10_thresh_cmp_val(u64 value) +{ + int exp = 0; + u64 result = value; + + if (!value) + return value; + + /* + * Incase of P10, thresh_cmp value is not part of raw event code + * and provided via attr.config1 parameter. To program threshold in MMCRA, + * take a 18 bit number N and shift right 2 places and increment + * the exponent E by 1 until the upper 10 bits of N are zero. + * Write E to the threshold exponent and write the lower 8 bits of N + * to the threshold mantissa. + * The max threshold that can be written is 261120. + */ + if (cpu_has_feature(CPU_FTR_ARCH_31)) { + if (value > 261120) + value = 261120; + while ((64 - __builtin_clzl(value)) > 8) { + exp++; + value >>= 2; + } + + /* + * Note that it is invalid to write a mantissa with the + * upper 2 bits of mantissa being zero, unless the + * exponent is also zero. + */ + if (!(value & 0xC0) && exp) + result = 0; + else + result = (exp << 8) | value; + } + return result; +} + static u64 thresh_cmp_val(u64 value) { + if (cpu_has_feature(CPU_FTR_ARCH_31)) + value = p10_thresh_cmp_val(value); + + /* + * Since location of threshold compare bits in MMCRA + * is different for p8, using different shift value. + */ if (cpu_has_feature(CPU_FTR_ARCH_300)) return value << p9_MMCRA_THR_CMP_SHIFT; - - return value << MMCRA_THR_CMP_SHIFT; + else + return value << MMCRA_THR_CMP_SHIFT; } static unsigned long combine_from_event(u64 event) @@ -141,13 +186,13 @@ static bool is_thresh_cmp_valid(u64 event) { unsigned int cmp, exp; + if (cpu_has_feature(CPU_FTR_ARCH_31)) + return p10_thresh_cmp_val(event) != 0; + /* * Check the mantissa upper two bits are not zero, unless the * exponent is also zero. See the THRESH_CMP_MANTISSA doc. - * Power10: thresh_cmp is replaced by l2_l3 event select. */ - if (cpu_has_feature(CPU_FTR_ARCH_31)) - return false; cmp = (event >> EVENT_THR_CMP_SHIFT) & EVENT_THR_CMP_MASK; exp = cmp >> 7; @@ -256,7 +301,7 @@ void isa207_get_mem_weight(u64 *weight) *weight = mantissa << (2 * exp); } -int isa207_get_constraint(u64 event, unsigned long *maskp, unsigned long *valp) +int isa207_get_constraint(u64 event, unsigned long *maskp, unsigned long *valp, u64 event_config1) { unsigned int unit, pmc, cache, ebb; unsigned long mask, value; @@ -355,9 +400,11 @@ int isa207_get_constraint(u64 event, unsigned long *maskp, unsigned long *valp) } if (cpu_has_feature(CPU_FTR_ARCH_31)) { - if (event_is_threshold(event)) { + if (event_is_threshold(event) && is_thresh_cmp_valid(event_config1)) { mask |= CNST_THRESH_CTL_SEL_MASK; value |= CNST_THRESH_CTL_SEL_VAL(event >> EVENT_THRESH_SHIFT); + mask |= p10_CNST_THRESH_CMP_MASK; + value |= p10_CNST_THRESH_CMP_VAL(p10_thresh_cmp_val(event_config1)); } } else if (cpu_has_feature(CPU_FTR_ARCH_300)) { if (event_is_threshold(event) && is_thresh_cmp_valid(event)) { @@ -411,7 +458,7 @@ ebb_bhrb: int isa207_compute_mmcr(u64 event[], int n_ev, unsigned int hwc[], struct mmcr_regs *mmcr, - struct perf_event *pevents[]) + struct perf_event *pevents[], u32 flags) { unsigned long mmcra, mmcr1, mmcr2, unit, combine, psel, cache, val; unsigned long mmcr3; @@ -504,6 +551,10 @@ int isa207_compute_mmcr(u64 event[], int n_ev, val = (event[i] >> EVENT_THR_CMP_SHIFT) & EVENT_THR_CMP_MASK; mmcra |= thresh_cmp_val(val); + } else if (flags & PPMU_HAS_ATTR_CONFIG1) { + val = (pevents[i]->attr.config1 >> p10_EVENT_THR_CMP_SHIFT) & + p10_EVENT_THR_CMP_MASK; + mmcra |= thresh_cmp_val(val); } } diff --git a/arch/powerpc/perf/isa207-common.h b/arch/powerpc/perf/isa207-common.h index 454b32c31440..1af0e8c97ac7 100644 --- a/arch/powerpc/perf/isa207-common.h +++ b/arch/powerpc/perf/isa207-common.h @@ -105,6 +105,10 @@ #define p10_EVENT_RADIX_SCOPE_QUAL_MASK 0x1 #define p10_MMCR1_RADIX_SCOPE_QUAL_SHIFT 45 +/* Event Threshold Compare bit constant for power10 in config1 attribute */ +#define p10_EVENT_THR_CMP_SHIFT 0 +#define p10_EVENT_THR_CMP_MASK 0x3FFFFull + #define p10_EVENT_VALID_MASK \ ((p10_SDAR_MODE_MASK << p10_SDAR_MODE_SHIFT | \ (p10_EVENT_THRESH_MASK << EVENT_THRESH_SHIFT) | \ @@ -124,8 +128,8 @@ * 60 56 52 48 44 40 36 32 * | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | * [ fab_match ] [ thresh_cmp ] [ thresh_ctl ] [ ] - * | - * thresh_sel -* + * | | + * [ thresh_cmp bits for p10] thresh_sel -* * * 28 24 20 16 12 8 4 0 * | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | @@ -152,6 +156,9 @@ #define CNST_THRESH_CTL_SEL_VAL(v) (((v) & 0x7ffull) << 32) #define CNST_THRESH_CTL_SEL_MASK CNST_THRESH_CTL_SEL_VAL(0x7ff) +#define p10_CNST_THRESH_CMP_VAL(v) (((v) & 0x7ffull) << 43) +#define p10_CNST_THRESH_CMP_MASK p10_CNST_THRESH_CMP_VAL(0x7ff) + #define CNST_EBB_VAL(v) (((v) & EVENT_EBB_MASK) << 24) #define CNST_EBB_MASK CNST_EBB_VAL(EVENT_EBB_MASK) @@ -262,10 +269,10 @@ #define PH(a, b) (P(LVL, HIT) | P(a, b)) #define PM(a, b) (P(LVL, MISS) | P(a, b)) -int isa207_get_constraint(u64 event, unsigned long *maskp, unsigned long *valp); +int isa207_get_constraint(u64 event, unsigned long *maskp, unsigned long *valp, u64 event_config1); int isa207_compute_mmcr(u64 event[], int n_ev, unsigned int hwc[], struct mmcr_regs *mmcr, - struct perf_event *pevents[]); + struct perf_event *pevents[], u32 flags); void isa207_disable_pmc(unsigned int pmc, struct mmcr_regs *mmcr); int isa207_get_alternatives(u64 event, u64 alt[], int size, unsigned int flags, const unsigned int ev_alt[][MAX_ALT]); diff --git a/arch/powerpc/perf/mpc7450-pmu.c b/arch/powerpc/perf/mpc7450-pmu.c index 1919e9df9165..e39b15b79a83 100644 --- a/arch/powerpc/perf/mpc7450-pmu.c +++ b/arch/powerpc/perf/mpc7450-pmu.c @@ -148,7 +148,7 @@ static u32 classbits[N_CLASSES - 1][2] = { }; static int mpc7450_get_constraint(u64 event, unsigned long *maskp, - unsigned long *valp) + unsigned long *valp, u64 event_config1 __maybe_unused) { int pmc, class; u32 mask, value; @@ -258,7 +258,8 @@ static const u32 pmcsel_mask[N_COUNTER] = { */ static int mpc7450_compute_mmcr(u64 event[], int n_ev, unsigned int hwc[], struct mmcr_regs *mmcr, - struct perf_event *pevents[]) + struct perf_event *pevents[], + u32 flags __maybe_unused) { u8 event_index[N_CLASSES][N_COUNTER]; int n_classevent[N_CLASSES]; diff --git a/arch/powerpc/perf/perf_regs.c b/arch/powerpc/perf/perf_regs.c index 6f681b105eec..b931eed482c9 100644 --- a/arch/powerpc/perf/perf_regs.c +++ b/arch/powerpc/perf/perf_regs.c @@ -75,6 +75,8 @@ static unsigned int pt_regs_offset[PERF_REG_POWERPC_MAX] = { static u64 get_ext_regs_value(int idx) { switch (idx) { + case PERF_REG_POWERPC_PMC1 ... PERF_REG_POWERPC_PMC6: + return get_pmcs_ext_regs(idx - PERF_REG_POWERPC_PMC1); case PERF_REG_POWERPC_MMCR0: return mfspr(SPRN_MMCR0); case PERF_REG_POWERPC_MMCR1: @@ -95,13 +97,6 @@ static u64 get_ext_regs_value(int idx) u64 perf_reg_value(struct pt_regs *regs, int idx) { - u64 perf_reg_extended_max = PERF_REG_POWERPC_MAX; - - if (cpu_has_feature(CPU_FTR_ARCH_31)) - perf_reg_extended_max = PERF_REG_MAX_ISA_31; - else if (cpu_has_feature(CPU_FTR_ARCH_300)) - perf_reg_extended_max = PERF_REG_MAX_ISA_300; - if (idx == PERF_REG_POWERPC_SIER && (IS_ENABLED(CONFIG_FSL_EMB_PERF_EVENT) || IS_ENABLED(CONFIG_PPC32) || @@ -113,14 +108,14 @@ u64 perf_reg_value(struct pt_regs *regs, int idx) IS_ENABLED(CONFIG_PPC32))) return 0; - if (idx >= PERF_REG_POWERPC_MAX && idx < perf_reg_extended_max) + if (idx >= PERF_REG_POWERPC_MAX && idx < PERF_REG_EXTENDED_MAX) return get_ext_regs_value(idx); /* * If the idx is referring to value beyond the * supported registers, return 0 with a warning */ - if (WARN_ON_ONCE(idx >= perf_reg_extended_max)) + if (WARN_ON_ONCE(idx >= PERF_REG_EXTENDED_MAX)) return 0; return regs_get_register(regs, pt_regs_offset[idx]); diff --git a/arch/powerpc/perf/power10-pmu.c b/arch/powerpc/perf/power10-pmu.c index 79e0206ca454..a901c1348cad 100644 --- a/arch/powerpc/perf/power10-pmu.c +++ b/arch/powerpc/perf/power10-pmu.c @@ -216,6 +216,7 @@ PMU_FORMAT_ATTR(invert_bit, "config:47"); PMU_FORMAT_ATTR(src_mask, "config:48-53"); PMU_FORMAT_ATTR(src_match, "config:54-59"); PMU_FORMAT_ATTR(radix_scope, "config:9"); +PMU_FORMAT_ATTR(thresh_cmp, "config1:0-17"); static struct attribute *power10_pmu_format_attr[] = { &format_attr_event.attr, @@ -236,6 +237,7 @@ static struct attribute *power10_pmu_format_attr[] = { &format_attr_src_mask.attr, &format_attr_src_match.attr, &format_attr_radix_scope.attr, + &format_attr_thresh_cmp.attr, NULL, }; @@ -550,7 +552,7 @@ static struct power_pmu power10_pmu = { .get_mem_weight = isa207_get_mem_weight, .disable_pmc = isa207_disable_pmc, .flags = PPMU_HAS_SIER | PPMU_ARCH_207S | - PPMU_ARCH_31, + PPMU_ARCH_31 | PPMU_HAS_ATTR_CONFIG1, .n_generic = ARRAY_SIZE(power10_generic_events), .generic_events = power10_generic_events, .cache_events = &power10_cache_events, diff --git a/arch/powerpc/perf/power5+-pmu.c b/arch/powerpc/perf/power5+-pmu.c index 3e64b4a1511f..18732267993a 100644 --- a/arch/powerpc/perf/power5+-pmu.c +++ b/arch/powerpc/perf/power5+-pmu.c @@ -132,7 +132,7 @@ static unsigned long unit_cons[PM_LASTUNIT+1][2] = { }; static int power5p_get_constraint(u64 event, unsigned long *maskp, - unsigned long *valp) + unsigned long *valp, u64 event_config1 __maybe_unused) { int pmc, byte, unit, sh; int bit, fmask; @@ -451,7 +451,8 @@ static int power5p_marked_instr_event(u64 event) static int power5p_compute_mmcr(u64 event[], int n_ev, unsigned int hwc[], struct mmcr_regs *mmcr, - struct perf_event *pevents[]) + struct perf_event *pevents[], + u32 flags __maybe_unused) { unsigned long mmcr1 = 0; unsigned long mmcra = 0; diff --git a/arch/powerpc/perf/power5-pmu.c b/arch/powerpc/perf/power5-pmu.c index 017bb19b73fb..cb611c1e7abe 100644 --- a/arch/powerpc/perf/power5-pmu.c +++ b/arch/powerpc/perf/power5-pmu.c @@ -136,7 +136,7 @@ static unsigned long unit_cons[PM_LASTUNIT+1][2] = { }; static int power5_get_constraint(u64 event, unsigned long *maskp, - unsigned long *valp) + unsigned long *valp, u64 event_config1 __maybe_unused) { int pmc, byte, unit, sh; int bit, fmask; @@ -382,7 +382,8 @@ static int power5_marked_instr_event(u64 event) static int power5_compute_mmcr(u64 event[], int n_ev, unsigned int hwc[], struct mmcr_regs *mmcr, - struct perf_event *pevents[]) + struct perf_event *pevents[], + u32 flags __maybe_unused) { unsigned long mmcr1 = 0; unsigned long mmcra = MMCRA_SDAR_DCACHE_MISS | MMCRA_SDAR_ERAT_MISS; diff --git a/arch/powerpc/perf/power6-pmu.c b/arch/powerpc/perf/power6-pmu.c index 189974478e9f..69ef38216418 100644 --- a/arch/powerpc/perf/power6-pmu.c +++ b/arch/powerpc/perf/power6-pmu.c @@ -173,7 +173,8 @@ static int power6_marked_instr_event(u64 event) * Assign PMC numbers and compute MMCR1 value for a set of events */ static int p6_compute_mmcr(u64 event[], int n_ev, - unsigned int hwc[], struct mmcr_regs *mmcr, struct perf_event *pevents[]) + unsigned int hwc[], struct mmcr_regs *mmcr, struct perf_event *pevents[], + u32 flags __maybe_unused) { unsigned long mmcr1 = 0; unsigned long mmcra = MMCRA_SDAR_DCACHE_MISS | MMCRA_SDAR_ERAT_MISS; @@ -266,7 +267,7 @@ static int p6_compute_mmcr(u64 event[], int n_ev, * 32-34 select field: nest (subunit) event selector */ static int p6_get_constraint(u64 event, unsigned long *maskp, - unsigned long *valp) + unsigned long *valp, u64 event_config1 __maybe_unused) { int pmc, byte, sh, subunit; unsigned long mask = 0, value = 0; diff --git a/arch/powerpc/perf/power7-pmu.c b/arch/powerpc/perf/power7-pmu.c index bacfab104a1a..894c17f9a762 100644 --- a/arch/powerpc/perf/power7-pmu.c +++ b/arch/powerpc/perf/power7-pmu.c @@ -81,7 +81,7 @@ enum { */ static int power7_get_constraint(u64 event, unsigned long *maskp, - unsigned long *valp) + unsigned long *valp, u64 event_config1 __maybe_unused) { int pmc, sh, unit; unsigned long mask = 0, value = 0; @@ -245,7 +245,8 @@ static int power7_marked_instr_event(u64 event) static int power7_compute_mmcr(u64 event[], int n_ev, unsigned int hwc[], struct mmcr_regs *mmcr, - struct perf_event *pevents[]) + struct perf_event *pevents[], + u32 flags __maybe_unused) { unsigned long mmcr1 = 0; unsigned long mmcra = MMCRA_SDAR_DCACHE_MISS | MMCRA_SDAR_ERAT_MISS; diff --git a/arch/powerpc/perf/ppc970-pmu.c b/arch/powerpc/perf/ppc970-pmu.c index 7d78df97f272..1f8263785286 100644 --- a/arch/powerpc/perf/ppc970-pmu.c +++ b/arch/powerpc/perf/ppc970-pmu.c @@ -190,7 +190,7 @@ static unsigned long unit_cons[PM_LASTUNIT+1][2] = { }; static int p970_get_constraint(u64 event, unsigned long *maskp, - unsigned long *valp) + unsigned long *valp, u64 event_config1 __maybe_unused) { int pmc, byte, unit, sh, spcsel; unsigned long mask = 0, value = 0; @@ -256,7 +256,8 @@ static int p970_get_alternatives(u64 event, unsigned int flags, u64 alt[]) static int p970_compute_mmcr(u64 event[], int n_ev, unsigned int hwc[], struct mmcr_regs *mmcr, - struct perf_event *pevents[]) + struct perf_event *pevents[], + u32 flags __maybe_unused) { unsigned long mmcr0 = 0, mmcr1 = 0, mmcra = 0; unsigned int pmc, unit, byte, psel; diff --git a/arch/powerpc/platforms/44x/Kconfig b/arch/powerpc/platforms/44x/Kconfig index 78ac6d67a935..7d41e9264510 100644 --- a/arch/powerpc/platforms/44x/Kconfig +++ b/arch/powerpc/platforms/44x/Kconfig @@ -206,17 +206,10 @@ config AKEBONO select PPC4xx_HSTA_MSI select I2C select I2C_IBM_IIC - select NETDEVICES - select ETHERNET - select NET_VENDOR_IBM select IBM_EMAC_EMAC4 if IBM_EMAC select USB if USB_SUPPORT select USB_OHCI_HCD_PLATFORM if USB_OHCI_HCD select USB_EHCI_HCD_PLATFORM if USB_EHCI_HCD - select MMC_SDHCI - select MMC_SDHCI_PLTFM - select ATA - select SATA_AHCI_PLATFORM help This option enables support for the IBM Akebono (476gtr) evaluation board diff --git a/arch/powerpc/platforms/512x/mpc5121_ads.c b/arch/powerpc/platforms/512x/mpc5121_ads.c index 6303fbfc4e4f..9d030c2e0004 100644 --- a/arch/powerpc/platforms/512x/mpc5121_ads.c +++ b/arch/powerpc/platforms/512x/mpc5121_ads.c @@ -24,21 +24,23 @@ static void __init mpc5121_ads_setup_arch(void) { -#ifdef CONFIG_PCI - struct device_node *np; -#endif printk(KERN_INFO "MPC5121 ADS board from Freescale Semiconductor\n"); /* * cpld regs are needed early */ mpc5121_ads_cpld_map(); + mpc512x_setup_arch(); +} + +static void __init mpc5121_ads_setup_pci(void) +{ #ifdef CONFIG_PCI + struct device_node *np; + for_each_compatible_node(np, "pci", "fsl,mpc5121-pci") mpc83xx_add_bridge(np); #endif - - mpc512x_setup_arch(); } static void __init mpc5121_ads_init_IRQ(void) @@ -64,6 +66,7 @@ define_machine(mpc5121_ads) { .name = "MPC5121 ADS", .probe = mpc5121_ads_probe, .setup_arch = mpc5121_ads_setup_arch, + .discover_phbs = mpc5121_ads_setup_pci, .init = mpc512x_init, .init_IRQ = mpc5121_ads_init_IRQ, .get_irq = ipic_get_irq, diff --git a/arch/powerpc/platforms/52xx/efika.c b/arch/powerpc/platforms/52xx/efika.c index 4514a6f7458a..3b7d70d71692 100644 --- a/arch/powerpc/platforms/52xx/efika.c +++ b/arch/powerpc/platforms/52xx/efika.c @@ -185,8 +185,6 @@ static void __init efika_setup_arch(void) /* Map important registers from the internal memory map */ mpc52xx_map_common_devices(); - efika_pcisetup(); - #ifdef CONFIG_PM mpc52xx_suspend.board_suspend_prepare = efika_suspend_prepare; mpc52xx_pm_init(); @@ -218,6 +216,7 @@ define_machine(efika) .name = EFIKA_PLATFORM_NAME, .probe = efika_probe, .setup_arch = efika_setup_arch, + .discover_phbs = efika_pcisetup, .init = mpc52xx_declare_of_platform_devices, .show_cpuinfo = efika_show_cpuinfo, .init_IRQ = mpc52xx_init_irq, diff --git a/arch/powerpc/platforms/52xx/lite5200.c b/arch/powerpc/platforms/52xx/lite5200.c index 3181aac08225..04cc97397095 100644 --- a/arch/powerpc/platforms/52xx/lite5200.c +++ b/arch/powerpc/platforms/52xx/lite5200.c @@ -165,8 +165,6 @@ static void __init lite5200_setup_arch(void) mpc52xx_suspend.board_resume_finish = lite5200_resume_finish; lite5200_pm_init(); #endif - - mpc52xx_setup_pci(); } static const char * const board[] __initconst = { @@ -187,6 +185,7 @@ define_machine(lite5200) { .name = "lite5200", .probe = lite5200_probe, .setup_arch = lite5200_setup_arch, + .discover_phbs = mpc52xx_setup_pci, .init = mpc52xx_declare_of_platform_devices, .init_IRQ = mpc52xx_init_irq, .get_irq = mpc52xx_get_irq, diff --git a/arch/powerpc/platforms/52xx/media5200.c b/arch/powerpc/platforms/52xx/media5200.c index 07c5bc4ed0b5..efb8bdecbcc7 100644 --- a/arch/powerpc/platforms/52xx/media5200.c +++ b/arch/powerpc/platforms/52xx/media5200.c @@ -202,8 +202,6 @@ static void __init media5200_setup_arch(void) /* Some mpc5200 & mpc5200b related configuration */ mpc5200_setup_xlb_arbiter(); - mpc52xx_setup_pci(); - np = of_find_matching_node(NULL, mpc5200_gpio_ids); gpio = of_iomap(np, 0); of_node_put(np); @@ -244,6 +242,7 @@ define_machine(media5200_platform) { .name = "media5200-platform", .probe = media5200_probe, .setup_arch = media5200_setup_arch, + .discover_phbs = mpc52xx_setup_pci, .init = mpc52xx_declare_of_platform_devices, .init_IRQ = media5200_init_irq, .get_irq = mpc52xx_get_irq, diff --git a/arch/powerpc/platforms/52xx/mpc5200_simple.c b/arch/powerpc/platforms/52xx/mpc5200_simple.c index 2d01e9b2e779..b9f5675b0a1d 100644 --- a/arch/powerpc/platforms/52xx/mpc5200_simple.c +++ b/arch/powerpc/platforms/52xx/mpc5200_simple.c @@ -40,8 +40,6 @@ static void __init mpc5200_simple_setup_arch(void) /* Some mpc5200 & mpc5200b related configuration */ mpc5200_setup_xlb_arbiter(); - - mpc52xx_setup_pci(); } /* list of the supported boards */ @@ -73,6 +71,7 @@ define_machine(mpc5200_simple_platform) { .name = "mpc5200-simple-platform", .probe = mpc5200_simple_probe, .setup_arch = mpc5200_simple_setup_arch, + .discover_phbs = mpc52xx_setup_pci, .init = mpc52xx_declare_of_platform_devices, .init_IRQ = mpc52xx_init_irq, .get_irq = mpc52xx_get_irq, diff --git a/arch/powerpc/platforms/52xx/mpc52xx_lpbfifo.c b/arch/powerpc/platforms/52xx/mpc52xx_lpbfifo.c index 05e19470d523..b91ebebd9ff2 100644 --- a/arch/powerpc/platforms/52xx/mpc52xx_lpbfifo.c +++ b/arch/powerpc/platforms/52xx/mpc52xx_lpbfifo.c @@ -229,7 +229,7 @@ static irqreturn_t mpc52xx_lpbfifo_irq(int irq, void *dev_id) int dma, write, poll_dma; spin_lock_irqsave(&lpbfifo.lock, flags); - ts = get_tbl(); + ts = mftb(); req = lpbfifo.req; if (!req) { @@ -307,7 +307,7 @@ static irqreturn_t mpc52xx_lpbfifo_irq(int irq, void *dev_id) if (irq != 0) /* don't increment on polled case */ req->irq_count++; - req->irq_ticks += get_tbl() - ts; + req->irq_ticks += mftb() - ts; spin_unlock_irqrestore(&lpbfifo.lock, flags); /* Spinlock is released; it is now safe to call the callback */ @@ -330,7 +330,7 @@ static irqreturn_t mpc52xx_lpbfifo_bcom_irq(int irq, void *dev_id) u32 ts; spin_lock_irqsave(&lpbfifo.lock, flags); - ts = get_tbl(); + ts = mftb(); req = lpbfifo.req; if (!req || (req->flags & MPC52XX_LPBFIFO_FLAG_NO_DMA)) { @@ -361,7 +361,7 @@ static irqreturn_t mpc52xx_lpbfifo_bcom_irq(int irq, void *dev_id) lpbfifo.req = NULL; /* Release the lock before calling out to the callback. */ - req->irq_ticks += get_tbl() - ts; + req->irq_ticks += mftb() - ts; spin_unlock_irqrestore(&lpbfifo.lock, flags); if (req->callback) diff --git a/arch/powerpc/platforms/82xx/mpc8272_ads.c b/arch/powerpc/platforms/82xx/mpc8272_ads.c index 3fe1a6593280..0b5b9dec16d5 100644 --- a/arch/powerpc/platforms/82xx/mpc8272_ads.c +++ b/arch/powerpc/platforms/82xx/mpc8272_ads.c @@ -171,7 +171,6 @@ static void __init mpc8272_ads_setup_arch(void) iounmap(bcsr); init_ioports(); - pq2_init_pci(); if (ppc_md.progress) ppc_md.progress("mpc8272_ads_setup_arch(), finish", 0); @@ -205,6 +204,7 @@ define_machine(mpc8272_ads) .name = "Freescale MPC8272 ADS", .probe = mpc8272_ads_probe, .setup_arch = mpc8272_ads_setup_arch, + .discover_phbs = pq2_init_pci, .init_IRQ = mpc8272_ads_pic_init, .get_irq = cpm2_get_irq, .calibrate_decr = generic_calibrate_decr, diff --git a/arch/powerpc/platforms/82xx/pq2ads-pci-pic.c b/arch/powerpc/platforms/82xx/pq2ads-pci-pic.c index 096cc0d59fd8..f82f75a6085c 100644 --- a/arch/powerpc/platforms/82xx/pq2ads-pci-pic.c +++ b/arch/powerpc/platforms/82xx/pq2ads-pci-pic.c @@ -123,20 +123,17 @@ int __init pq2ads_pci_init_irq(void) np = of_find_compatible_node(NULL, NULL, "fsl,pq2ads-pci-pic"); if (!np) { printk(KERN_ERR "No pci pic node in device tree.\n"); - of_node_put(np); goto out; } irq = irq_of_parse_and_map(np, 0); if (!irq) { printk(KERN_ERR "No interrupt in pci pic node.\n"); - of_node_put(np); - goto out; + goto out_put_node; } priv = kzalloc(sizeof(*priv), GFP_KERNEL); if (!priv) { - of_node_put(np); ret = -ENOMEM; goto out_unmap_irq; } @@ -161,17 +158,17 @@ int __init pq2ads_pci_init_irq(void) priv->host = host; irq_set_handler_data(irq, priv); irq_set_chained_handler(irq, pq2ads_pci_irq_demux); - - of_node_put(np); - return 0; + ret = 0; + goto out_put_node; out_unmap_regs: iounmap(priv->regs); out_free_kmalloc: kfree(priv); - of_node_put(np); out_unmap_irq: irq_dispose_mapping(irq); +out_put_node: + of_node_put(np); out: return ret; } diff --git a/arch/powerpc/platforms/82xx/pq2fads.c b/arch/powerpc/platforms/82xx/pq2fads.c index a74082140718..ac9113d524af 100644 --- a/arch/powerpc/platforms/82xx/pq2fads.c +++ b/arch/powerpc/platforms/82xx/pq2fads.c @@ -150,8 +150,6 @@ static void __init pq2fads_setup_arch(void) /* Enable external IRQs */ clrbits32(&cpm2_immr->im_siu_conf.siu_82xx.sc_siumcr, 0x0c000000); - pq2_init_pci(); - if (ppc_md.progress) ppc_md.progress("pq2fads_setup_arch(), finish", 0); } @@ -184,6 +182,7 @@ define_machine(pq2fads) .name = "Freescale PQ2FADS", .probe = pq2fads_probe, .setup_arch = pq2fads_setup_arch, + .discover_phbs = pq2_init_pci, .init_IRQ = pq2fads_pic_init, .get_irq = cpm2_get_irq, .calibrate_decr = generic_calibrate_decr, diff --git a/arch/powerpc/platforms/83xx/asp834x.c b/arch/powerpc/platforms/83xx/asp834x.c index 28474876f41b..68061c2a57c1 100644 --- a/arch/powerpc/platforms/83xx/asp834x.c +++ b/arch/powerpc/platforms/83xx/asp834x.c @@ -44,6 +44,7 @@ define_machine(asp834x) { .name = "ASP8347E", .probe = asp834x_probe, .setup_arch = asp834x_setup_arch, + .discover_phbs = mpc83xx_setup_pci, .init_IRQ = mpc83xx_ipic_init_IRQ, .get_irq = ipic_get_irq, .restart = mpc83xx_restart, diff --git a/arch/powerpc/platforms/83xx/km83xx.c b/arch/powerpc/platforms/83xx/km83xx.c index bcdc2c203ec9..108e1e4d2683 100644 --- a/arch/powerpc/platforms/83xx/km83xx.c +++ b/arch/powerpc/platforms/83xx/km83xx.c @@ -180,6 +180,7 @@ define_machine(mpc83xx_km) { .name = "mpc83xx-km-platform", .probe = mpc83xx_km_probe, .setup_arch = mpc83xx_km_setup_arch, + .discover_phbs = mpc83xx_setup_pci, .init_IRQ = mpc83xx_ipic_init_IRQ, .get_irq = ipic_get_irq, .restart = mpc83xx_restart, diff --git a/arch/powerpc/platforms/83xx/misc.c b/arch/powerpc/platforms/83xx/misc.c index a952e91db3ee..3285dabcf923 100644 --- a/arch/powerpc/platforms/83xx/misc.c +++ b/arch/powerpc/platforms/83xx/misc.c @@ -132,8 +132,6 @@ void __init mpc83xx_setup_arch(void) setbat(-1, va, immrbase, immrsize, PAGE_KERNEL_NCG); update_bats(); } - - mpc83xx_setup_pci(); } int machine_check_83xx(struct pt_regs *regs) diff --git a/arch/powerpc/platforms/83xx/mpc830x_rdb.c b/arch/powerpc/platforms/83xx/mpc830x_rdb.c index 51426e88ec67..956d4389effa 100644 --- a/arch/powerpc/platforms/83xx/mpc830x_rdb.c +++ b/arch/powerpc/platforms/83xx/mpc830x_rdb.c @@ -48,6 +48,7 @@ define_machine(mpc830x_rdb) { .name = "MPC830x RDB", .probe = mpc830x_rdb_probe, .setup_arch = mpc830x_rdb_setup_arch, + .discover_phbs = mpc83xx_setup_pci, .init_IRQ = mpc83xx_ipic_init_IRQ, .get_irq = ipic_get_irq, .restart = mpc83xx_restart, diff --git a/arch/powerpc/platforms/83xx/mpc831x_rdb.c b/arch/powerpc/platforms/83xx/mpc831x_rdb.c index 5ccd57a48492..3b578f080e3b 100644 --- a/arch/powerpc/platforms/83xx/mpc831x_rdb.c +++ b/arch/powerpc/platforms/83xx/mpc831x_rdb.c @@ -48,6 +48,7 @@ define_machine(mpc831x_rdb) { .name = "MPC831x RDB", .probe = mpc831x_rdb_probe, .setup_arch = mpc831x_rdb_setup_arch, + .discover_phbs = mpc83xx_setup_pci, .init_IRQ = mpc83xx_ipic_init_IRQ, .get_irq = ipic_get_irq, .restart = mpc83xx_restart, diff --git a/arch/powerpc/platforms/83xx/mpc832x_mds.c b/arch/powerpc/platforms/83xx/mpc832x_mds.c index 6fa5402ebf20..850d566ef900 100644 --- a/arch/powerpc/platforms/83xx/mpc832x_mds.c +++ b/arch/powerpc/platforms/83xx/mpc832x_mds.c @@ -101,6 +101,7 @@ define_machine(mpc832x_mds) { .name = "MPC832x MDS", .probe = mpc832x_sys_probe, .setup_arch = mpc832x_sys_setup_arch, + .discover_phbs = mpc83xx_setup_pci, .init_IRQ = mpc83xx_ipic_init_IRQ, .get_irq = ipic_get_irq, .restart = mpc83xx_restart, diff --git a/arch/powerpc/platforms/83xx/mpc832x_rdb.c b/arch/powerpc/platforms/83xx/mpc832x_rdb.c index 622c625d5ce4..b6133a237a70 100644 --- a/arch/powerpc/platforms/83xx/mpc832x_rdb.c +++ b/arch/powerpc/platforms/83xx/mpc832x_rdb.c @@ -219,6 +219,7 @@ define_machine(mpc832x_rdb) { .name = "MPC832x RDB", .probe = mpc832x_rdb_probe, .setup_arch = mpc832x_rdb_setup_arch, + .discover_phbs = mpc83xx_setup_pci, .init_IRQ = mpc83xx_ipic_init_IRQ, .get_irq = ipic_get_irq, .restart = mpc83xx_restart, diff --git a/arch/powerpc/platforms/83xx/mpc834x_itx.c b/arch/powerpc/platforms/83xx/mpc834x_itx.c index ebfd139bca20..9630f3aa4d9c 100644 --- a/arch/powerpc/platforms/83xx/mpc834x_itx.c +++ b/arch/powerpc/platforms/83xx/mpc834x_itx.c @@ -70,6 +70,7 @@ define_machine(mpc834x_itx) { .name = "MPC834x ITX", .probe = mpc834x_itx_probe, .setup_arch = mpc834x_itx_setup_arch, + .discover_phbs = mpc83xx_setup_pci, .init_IRQ = mpc83xx_ipic_init_IRQ, .get_irq = ipic_get_irq, .restart = mpc83xx_restart, diff --git a/arch/powerpc/platforms/83xx/mpc834x_mds.c b/arch/powerpc/platforms/83xx/mpc834x_mds.c index 356228e35279..6d91bdce0a18 100644 --- a/arch/powerpc/platforms/83xx/mpc834x_mds.c +++ b/arch/powerpc/platforms/83xx/mpc834x_mds.c @@ -91,6 +91,7 @@ define_machine(mpc834x_mds) { .name = "MPC834x MDS", .probe = mpc834x_mds_probe, .setup_arch = mpc834x_mds_setup_arch, + .discover_phbs = mpc83xx_setup_pci, .init_IRQ = mpc83xx_ipic_init_IRQ, .get_irq = ipic_get_irq, .restart = mpc83xx_restart, diff --git a/arch/powerpc/platforms/83xx/mpc836x_mds.c b/arch/powerpc/platforms/83xx/mpc836x_mds.c index 90d9cbfae659..da4cf52cb55b 100644 --- a/arch/powerpc/platforms/83xx/mpc836x_mds.c +++ b/arch/powerpc/platforms/83xx/mpc836x_mds.c @@ -201,6 +201,7 @@ define_machine(mpc836x_mds) { .name = "MPC836x MDS", .probe = mpc836x_mds_probe, .setup_arch = mpc836x_mds_setup_arch, + .discover_phbs = mpc83xx_setup_pci, .init_IRQ = mpc83xx_ipic_init_IRQ, .get_irq = ipic_get_irq, .restart = mpc83xx_restart, diff --git a/arch/powerpc/platforms/83xx/mpc836x_rdk.c b/arch/powerpc/platforms/83xx/mpc836x_rdk.c index b4aac2cde849..3427ad0d9d38 100644 --- a/arch/powerpc/platforms/83xx/mpc836x_rdk.c +++ b/arch/powerpc/platforms/83xx/mpc836x_rdk.c @@ -41,6 +41,7 @@ define_machine(mpc836x_rdk) { .name = "MPC836x RDK", .probe = mpc836x_rdk_probe, .setup_arch = mpc836x_rdk_setup_arch, + .discover_phbs = mpc83xx_setup_pci, .init_IRQ = mpc83xx_ipic_init_IRQ, .get_irq = ipic_get_irq, .restart = mpc83xx_restart, diff --git a/arch/powerpc/platforms/83xx/mpc837x_mds.c b/arch/powerpc/platforms/83xx/mpc837x_mds.c index 9d3721c965be..f28d166ea7db 100644 --- a/arch/powerpc/platforms/83xx/mpc837x_mds.c +++ b/arch/powerpc/platforms/83xx/mpc837x_mds.c @@ -93,6 +93,7 @@ define_machine(mpc837x_mds) { .name = "MPC837x MDS", .probe = mpc837x_mds_probe, .setup_arch = mpc837x_mds_setup_arch, + .discover_phbs = mpc83xx_setup_pci, .init_IRQ = mpc83xx_ipic_init_IRQ, .get_irq = ipic_get_irq, .restart = mpc83xx_restart, diff --git a/arch/powerpc/platforms/83xx/mpc837x_rdb.c b/arch/powerpc/platforms/83xx/mpc837x_rdb.c index 7c45f7ac2607..7fb7684c256b 100644 --- a/arch/powerpc/platforms/83xx/mpc837x_rdb.c +++ b/arch/powerpc/platforms/83xx/mpc837x_rdb.c @@ -73,6 +73,7 @@ define_machine(mpc837x_rdb) { .name = "MPC837x RDB/WLAN", .probe = mpc837x_rdb_probe, .setup_arch = mpc837x_rdb_setup_arch, + .discover_phbs = mpc83xx_setup_pci, .init_IRQ = mpc83xx_ipic_init_IRQ, .get_irq = ipic_get_irq, .restart = mpc83xx_restart, diff --git a/arch/powerpc/platforms/83xx/mpc83xx.h b/arch/powerpc/platforms/83xx/mpc83xx.h index f37d04332fc7..a30d30588cf6 100644 --- a/arch/powerpc/platforms/83xx/mpc83xx.h +++ b/arch/powerpc/platforms/83xx/mpc83xx.h @@ -76,7 +76,7 @@ extern void mpc83xx_ipic_init_IRQ(void); #ifdef CONFIG_PCI extern void mpc83xx_setup_pci(void); #else -#define mpc83xx_setup_pci() do {} while (0) +#define mpc83xx_setup_pci NULL #endif extern int mpc83xx_declare_of_platform_devices(void); diff --git a/arch/powerpc/platforms/8xx/machine_check.c b/arch/powerpc/platforms/8xx/machine_check.c index 88dedf38eccd..656365975895 100644 --- a/arch/powerpc/platforms/8xx/machine_check.c +++ b/arch/powerpc/platforms/8xx/machine_check.c @@ -26,7 +26,7 @@ int machine_check_8xx(struct pt_regs *regs) * to deal with that than having a wart in the mcheck handler. * -- BenH */ - bad_page_fault(regs, regs->dar, SIGBUS); + bad_page_fault(regs, SIGBUS); return 1; #else return 0; diff --git a/arch/powerpc/platforms/amigaone/setup.c b/arch/powerpc/platforms/amigaone/setup.c index f5d0bf999759..9d252c554f7f 100644 --- a/arch/powerpc/platforms/amigaone/setup.c +++ b/arch/powerpc/platforms/amigaone/setup.c @@ -66,6 +66,12 @@ static int __init amigaone_add_bridge(struct device_node *dev) void __init amigaone_setup_arch(void) { + if (ppc_md.progress) + ppc_md.progress("Linux/PPC "UTS_RELEASE"\n", 0); +} + +static void __init amigaone_discover_phbs(void) +{ struct device_node *np; int phb = -ENODEV; @@ -74,9 +80,6 @@ void __init amigaone_setup_arch(void) phb = amigaone_add_bridge(np); BUG_ON(phb != 0); - - if (ppc_md.progress) - ppc_md.progress("Linux/PPC "UTS_RELEASE"\n", 0); } void __init amigaone_init_IRQ(void) @@ -159,6 +162,7 @@ define_machine(amigaone) { .name = "AmigaOne", .probe = amigaone_probe, .setup_arch = amigaone_setup_arch, + .discover_phbs = amigaone_discover_phbs, .show_cpuinfo = amigaone_show_cpuinfo, .init_IRQ = amigaone_init_IRQ, .restart = amigaone_restart, diff --git a/arch/powerpc/platforms/cell/pervasive.c b/arch/powerpc/platforms/cell/pervasive.c index 9068edef71f7..5b9a7e9f144b 100644 --- a/arch/powerpc/platforms/cell/pervasive.c +++ b/arch/powerpc/platforms/cell/pervasive.c @@ -25,6 +25,7 @@ #include <asm/cpu_has_feature.h> #include "pervasive.h" +#include "ras.h" static void cbe_power_save(void) { diff --git a/arch/powerpc/platforms/cell/pervasive.h b/arch/powerpc/platforms/cell/pervasive.h index c6fccad6caee..0da74ab10716 100644 --- a/arch/powerpc/platforms/cell/pervasive.h +++ b/arch/powerpc/platforms/cell/pervasive.h @@ -13,9 +13,6 @@ #define PERVASIVE_H extern void cbe_pervasive_init(void); -extern void cbe_system_error_exception(struct pt_regs *regs); -extern void cbe_maintenance_exception(struct pt_regs *regs); -extern void cbe_thermal_exception(struct pt_regs *regs); #ifdef CONFIG_PPC_IBM_CELL_RESETBUTTON extern int cbe_sysreset_hack(void); diff --git a/arch/powerpc/platforms/cell/ras.c b/arch/powerpc/platforms/cell/ras.c index 6ea480539419..4325c05bedd9 100644 --- a/arch/powerpc/platforms/cell/ras.c +++ b/arch/powerpc/platforms/cell/ras.c @@ -49,7 +49,7 @@ static void dump_fir(int cpu) } -void cbe_system_error_exception(struct pt_regs *regs) +DEFINE_INTERRUPT_HANDLER(cbe_system_error_exception) { int cpu = smp_processor_id(); @@ -58,7 +58,7 @@ void cbe_system_error_exception(struct pt_regs *regs) dump_stack(); } -void cbe_maintenance_exception(struct pt_regs *regs) +DEFINE_INTERRUPT_HANDLER(cbe_maintenance_exception) { int cpu = smp_processor_id(); @@ -70,7 +70,7 @@ void cbe_maintenance_exception(struct pt_regs *regs) dump_stack(); } -void cbe_thermal_exception(struct pt_regs *regs) +DEFINE_INTERRUPT_HANDLER(cbe_thermal_exception) { int cpu = smp_processor_id(); diff --git a/arch/powerpc/platforms/cell/ras.h b/arch/powerpc/platforms/cell/ras.h index 6c2e6bc0062e..226dbd48efad 100644 --- a/arch/powerpc/platforms/cell/ras.h +++ b/arch/powerpc/platforms/cell/ras.h @@ -2,9 +2,12 @@ #ifndef RAS_H #define RAS_H -extern void cbe_system_error_exception(struct pt_regs *regs); -extern void cbe_maintenance_exception(struct pt_regs *regs); -extern void cbe_thermal_exception(struct pt_regs *regs); +#include <asm/interrupt.h> + +DECLARE_INTERRUPT_HANDLER(cbe_system_error_exception); +DECLARE_INTERRUPT_HANDLER(cbe_maintenance_exception); +DECLARE_INTERRUPT_HANDLER(cbe_thermal_exception); + extern void cbe_ras_init(void); #endif /* RAS_H */ diff --git a/arch/powerpc/platforms/chrp/pci.c b/arch/powerpc/platforms/chrp/pci.c index b2c2bf35b76c..8c421dc78b28 100644 --- a/arch/powerpc/platforms/chrp/pci.c +++ b/arch/powerpc/platforms/chrp/pci.c @@ -314,6 +314,14 @@ chrp_find_bridges(void) } } of_node_put(root); + + /* + * "Temporary" fixes for PCI devices. + * -- Geert + */ + hydra_init(); /* Mac I/O */ + + pci_create_OF_bus_map(); } /* SL82C105 IDE Control/Status Register */ diff --git a/arch/powerpc/platforms/chrp/setup.c b/arch/powerpc/platforms/chrp/setup.c index c45435aa5e36..3cfc382841e5 100644 --- a/arch/powerpc/platforms/chrp/setup.c +++ b/arch/powerpc/platforms/chrp/setup.c @@ -334,22 +334,11 @@ static void __init chrp_setup_arch(void) /* On pegasos, enable the L2 cache if not already done by OF */ pegasos_set_l2cr(); - /* Lookup PCI host bridges */ - chrp_find_bridges(); - - /* - * Temporary fixes for PCI devices. - * -- Geert - */ - hydra_init(); /* Mac I/O */ - /* * Fix the Super I/O configuration */ sio_init(); - pci_create_OF_bus_map(); - /* * Print the banner, then scroll down so boot progress * can be printed. -- Cort @@ -582,6 +571,7 @@ define_machine(chrp) { .name = "CHRP", .probe = chrp_probe, .setup_arch = chrp_setup_arch, + .discover_phbs = chrp_find_bridges, .init = chrp_init2, .show_cpuinfo = chrp_show_cpuinfo, .init_IRQ = chrp_init_IRQ, diff --git a/arch/powerpc/platforms/embedded6xx/holly.c b/arch/powerpc/platforms/embedded6xx/holly.c index d8f2e2c737bb..53065d564161 100644 --- a/arch/powerpc/platforms/embedded6xx/holly.c +++ b/arch/powerpc/platforms/embedded6xx/holly.c @@ -108,15 +108,13 @@ static void holly_remap_bridge(void) tsi108_write_reg(TSI108_PCI_P2O_BAR2, 0x0); } -static void __init holly_setup_arch(void) +static void __init holly_init_pci(void) { struct device_node *np; if (ppc_md.progress) ppc_md.progress("holly_setup_arch():set_bridge", 0); - tsi108_csr_vir_base = get_vir_csrbase(); - /* setup PCI host bridge */ holly_remap_bridge(); @@ -127,6 +125,11 @@ static void __init holly_setup_arch(void) ppc_md.pci_exclude_device = holly_exclude_device; if (ppc_md.progress) ppc_md.progress("tsi108: resources set", 0x100); +} + +static void __init holly_setup_arch(void) +{ + tsi108_csr_vir_base = get_vir_csrbase(); printk(KERN_INFO "PPC750GX/CL Platform\n"); } @@ -259,6 +262,7 @@ define_machine(holly){ .name = "PPC750 GX/CL TSI", .probe = holly_probe, .setup_arch = holly_setup_arch, + .discover_phbs = holly_init_pci, .init_IRQ = holly_init_IRQ, .show_cpuinfo = holly_show_cpuinfo, .get_irq = mpic_get_irq, diff --git a/arch/powerpc/platforms/embedded6xx/linkstation.c b/arch/powerpc/platforms/embedded6xx/linkstation.c index f514d5d28cd4..eb8342e7f84e 100644 --- a/arch/powerpc/platforms/embedded6xx/linkstation.c +++ b/arch/powerpc/platforms/embedded6xx/linkstation.c @@ -64,14 +64,17 @@ static int __init linkstation_add_bridge(struct device_node *dev) static void __init linkstation_setup_arch(void) { + printk(KERN_INFO "BUFFALO Network Attached Storage Series\n"); + printk(KERN_INFO "(C) 2002-2005 BUFFALO INC.\n"); +} + +static void __init linkstation_setup_pci(void) +{ struct device_node *np; /* Lookup PCI host bridges */ for_each_compatible_node(np, "pci", "mpc10x-pci") linkstation_add_bridge(np); - - printk(KERN_INFO "BUFFALO Network Attached Storage Series\n"); - printk(KERN_INFO "(C) 2002-2005 BUFFALO INC.\n"); } /* @@ -153,6 +156,7 @@ define_machine(linkstation){ .name = "Buffalo Linkstation", .probe = linkstation_probe, .setup_arch = linkstation_setup_arch, + .discover_phbs = linkstation_setup_pci, .init_IRQ = linkstation_init_IRQ, .show_cpuinfo = linkstation_show_cpuinfo, .get_irq = mpic_get_irq, diff --git a/arch/powerpc/platforms/embedded6xx/mpc7448_hpc2.c b/arch/powerpc/platforms/embedded6xx/mpc7448_hpc2.c index b95c3380d2b5..5565647dc879 100644 --- a/arch/powerpc/platforms/embedded6xx/mpc7448_hpc2.c +++ b/arch/powerpc/platforms/embedded6xx/mpc7448_hpc2.c @@ -58,16 +58,14 @@ int mpc7448_hpc2_exclude_device(struct pci_controller *hose, return PCIBIOS_SUCCESSFUL; } -static void __init mpc7448_hpc2_setup_arch(void) +static void __init mpc7448_hpc2_setup_pci(void) { +#ifdef CONFIG_PCI struct device_node *np; if (ppc_md.progress) - ppc_md.progress("mpc7448_hpc2_setup_arch():set_bridge", 0); - - tsi108_csr_vir_base = get_vir_csrbase(); + ppc_md.progress("mpc7448_hpc2_setup_pci():set_bridge", 0); /* setup PCI host bridge */ -#ifdef CONFIG_PCI for_each_compatible_node(np, "pci", "tsi108-pci") tsi108_setup_pci(np, MPC7448HPC2_PCI_CFG_PHYS, 0); @@ -75,6 +73,11 @@ static void __init mpc7448_hpc2_setup_arch(void) if (ppc_md.progress) ppc_md.progress("tsi108: resources set", 0x100); #endif +} + +static void __init mpc7448_hpc2_setup_arch(void) +{ + tsi108_csr_vir_base = get_vir_csrbase(); printk(KERN_INFO "MPC7448HPC2 (TAIGA) Platform\n"); printk(KERN_INFO @@ -181,6 +184,7 @@ define_machine(mpc7448_hpc2){ .name = "MPC7448 HPC2", .probe = mpc7448_hpc2_probe, .setup_arch = mpc7448_hpc2_setup_arch, + .discover_phbs = mpc7448_hpc2_setup_pci, .init_IRQ = mpc7448_hpc2_init_IRQ, .show_cpuinfo = mpc7448_hpc2_show_cpuinfo, .get_irq = mpic_get_irq, diff --git a/arch/powerpc/platforms/embedded6xx/mvme5100.c b/arch/powerpc/platforms/embedded6xx/mvme5100.c index 1cd488daa0bf..c06a0490d157 100644 --- a/arch/powerpc/platforms/embedded6xx/mvme5100.c +++ b/arch/powerpc/platforms/embedded6xx/mvme5100.c @@ -154,17 +154,19 @@ static const struct of_device_id mvme5100_of_bus_ids[] __initconst = { */ static void __init mvme5100_setup_arch(void) { - struct device_node *np; - if (ppc_md.progress) ppc_md.progress("mvme5100_setup_arch()", 0); - for_each_compatible_node(np, "pci", "hawk-pci") - mvme5100_add_bridge(np); - restart = ioremap(BOARD_MODRST_REG, 4); } +static void __init mvme5100_setup_pci(void) +{ + struct device_node *np; + + for_each_compatible_node(np, "pci", "hawk-pci") + mvme5100_add_bridge(np); +} static void mvme5100_show_cpuinfo(struct seq_file *m) { @@ -205,6 +207,7 @@ define_machine(mvme5100) { .name = "MVME5100", .probe = mvme5100_probe, .setup_arch = mvme5100_setup_arch, + .discover_phbs = mvme5100_setup_pci, .init_IRQ = mvme5100_pic_init, .show_cpuinfo = mvme5100_show_cpuinfo, .get_irq = mpic_get_irq, diff --git a/arch/powerpc/platforms/embedded6xx/storcenter.c b/arch/powerpc/platforms/embedded6xx/storcenter.c index e346ddcef45e..e188b90f7016 100644 --- a/arch/powerpc/platforms/embedded6xx/storcenter.c +++ b/arch/powerpc/platforms/embedded6xx/storcenter.c @@ -66,13 +66,16 @@ static int __init storcenter_add_bridge(struct device_node *dev) static void __init storcenter_setup_arch(void) { + printk(KERN_INFO "IOMEGA StorCenter\n"); +} + +static void __init storcenter_setup_pci(void) +{ struct device_node *np; /* Lookup PCI host bridges */ for_each_compatible_node(np, "pci", "mpc10x-pci") storcenter_add_bridge(np); - - printk(KERN_INFO "IOMEGA StorCenter\n"); } /* @@ -117,6 +120,7 @@ define_machine(storcenter){ .name = "IOMEGA StorCenter", .probe = storcenter_probe, .setup_arch = storcenter_setup_arch, + .discover_phbs = storcenter_setup_pci, .init_IRQ = storcenter_init_IRQ, .get_irq = mpic_get_irq, .restart = storcenter_restart, diff --git a/arch/powerpc/platforms/maple/pci.c b/arch/powerpc/platforms/maple/pci.c index c86a66d5e998..a20b9576de22 100644 --- a/arch/powerpc/platforms/maple/pci.c +++ b/arch/powerpc/platforms/maple/pci.c @@ -536,6 +536,9 @@ static int __init maple_add_bridge(struct device_node *dev) /* Check for legacy IOs */ isa_bridge_find_early(hose); + /* create pci_dn's for DT nodes under this PHB */ + pci_devs_phb_init_dynamic(hose); + return 0; } diff --git a/arch/powerpc/platforms/maple/setup.c b/arch/powerpc/platforms/maple/setup.c index f7e66a2005b4..4e9ad5bf3efb 100644 --- a/arch/powerpc/platforms/maple/setup.c +++ b/arch/powerpc/platforms/maple/setup.c @@ -179,9 +179,6 @@ static void __init maple_setup_arch(void) #ifdef CONFIG_SMP smp_ops = &maple_smp_ops; #endif - /* Lookup PCI hosts */ - maple_pci_init(); - maple_use_rtas_reboot_and_halt_if_present(); printk(KERN_DEBUG "Using native/NAP idle loop\n"); @@ -351,6 +348,7 @@ define_machine(maple) { .name = "Maple", .probe = maple_probe, .setup_arch = maple_setup_arch, + .discover_phbs = maple_pci_init, .init_IRQ = maple_init_IRQ, .pci_irq_fixup = maple_pci_irq_fixup, .pci_get_legacy_ide_irq = maple_pci_get_legacy_ide_irq, diff --git a/arch/powerpc/platforms/pasemi/setup.c b/arch/powerpc/platforms/pasemi/setup.c index b612474f8f8e..376797eb7894 100644 --- a/arch/powerpc/platforms/pasemi/setup.c +++ b/arch/powerpc/platforms/pasemi/setup.c @@ -144,8 +144,6 @@ static void __init pas_setup_arch(void) /* Setup SMP callback */ smp_ops = &pas_smp_ops; #endif - /* Lookup PCI hosts */ - pas_pci_init(); /* Remap SDC register for doing reset */ /* XXXOJN This should maybe come out of the device tree */ @@ -446,6 +444,7 @@ define_machine(pasemi) { .name = "PA Semi PWRficient", .probe = pas_probe, .setup_arch = pas_setup_arch, + .discover_phbs = pas_pci_init, .init_IRQ = pas_init_IRQ, .get_irq = mpic_get_irq, .restart = pas_restart, diff --git a/arch/powerpc/platforms/powermac/pci.c b/arch/powerpc/platforms/powermac/pci.c index e35eaa9cf938..e9abe0f2e7f0 100644 --- a/arch/powerpc/platforms/powermac/pci.c +++ b/arch/powerpc/platforms/powermac/pci.c @@ -850,6 +850,10 @@ static int __init pmac_add_bridge(struct device_node *dev) /* Fixup "bus-range" OF property */ fixup_bus_range(dev); + /* create pci_dn's for DT nodes under this PHB */ + if (IS_ENABLED(CONFIG_PPC64)) + pci_devs_phb_init_dynamic(hose); + return 0; } diff --git a/arch/powerpc/platforms/powermac/setup.c b/arch/powerpc/platforms/powermac/setup.c index 2e2cc0c75d87..86aee3f2483f 100644 --- a/arch/powerpc/platforms/powermac/setup.c +++ b/arch/powerpc/platforms/powermac/setup.c @@ -298,9 +298,6 @@ static void __init pmac_setup_arch(void) of_node_put(ic); } - /* Lookup PCI hosts */ - pmac_pci_init(); - #ifdef CONFIG_PPC32 ohare_init(); l2cr_init(); @@ -600,6 +597,7 @@ define_machine(powermac) { .name = "PowerMac", .probe = pmac_probe, .setup_arch = pmac_setup_arch, + .discover_phbs = pmac_pci_init, .show_cpuinfo = pmac_show_cpuinfo, .init_IRQ = pmac_pic_init, .get_irq = NULL, /* changed later */ diff --git a/arch/powerpc/platforms/powernv/idle.c b/arch/powerpc/platforms/powernv/idle.c index e6f461812856..999997d9e9a9 100644 --- a/arch/powerpc/platforms/powernv/idle.c +++ b/arch/powerpc/platforms/powernv/idle.c @@ -14,6 +14,7 @@ #include <asm/asm-prototypes.h> #include <asm/firmware.h> +#include <asm/interrupt.h> #include <asm/machdep.h> #include <asm/opal.h> #include <asm/cputhreads.h> diff --git a/arch/powerpc/platforms/powernv/memtrace.c b/arch/powerpc/platforms/powernv/memtrace.c index 5fc9408bb0b3..019669eb21d2 100644 --- a/arch/powerpc/platforms/powernv/memtrace.c +++ b/arch/powerpc/platforms/powernv/memtrace.c @@ -19,6 +19,7 @@ #include <linux/numa.h> #include <asm/machdep.h> #include <asm/debugfs.h> +#include <asm/cacheflush.h> /* This enables us to keep track of the memory removed from each node. */ struct memtrace_entry { @@ -51,6 +52,27 @@ static const struct file_operations memtrace_fops = { .open = simple_open, }; +#define FLUSH_CHUNK_SIZE SZ_1G +/** + * flush_dcache_range_chunked(): Write any modified data cache blocks out to + * memory and invalidate them, in chunks of up to FLUSH_CHUNK_SIZE + * Does not invalidate the corresponding instruction cache blocks. + * + * @start: the start address + * @stop: the stop address (exclusive) + * @chunk: the max size of the chunks + */ +static void flush_dcache_range_chunked(unsigned long start, unsigned long stop, + unsigned long chunk) +{ + unsigned long i; + + for (i = start; i < stop; i += chunk) { + flush_dcache_range(i, min(stop, i + chunk)); + cond_resched(); + } +} + static void memtrace_clear_range(unsigned long start_pfn, unsigned long nr_pages) { @@ -62,6 +84,13 @@ static void memtrace_clear_range(unsigned long start_pfn, cond_resched(); clear_page(__va(PFN_PHYS(pfn))); } + /* + * Before we go ahead and use this range as cache inhibited range + * flush the cache. + */ + flush_dcache_range_chunked(PFN_PHYS(start_pfn), + PFN_PHYS(start_pfn + nr_pages), + FLUSH_CHUNK_SIZE); } static u64 memtrace_alloc_node(u32 nid, u64 size) diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c index c61c3b62c8c6..303d7c775740 100644 --- a/arch/powerpc/platforms/powernv/opal.c +++ b/arch/powerpc/platforms/powernv/opal.c @@ -624,7 +624,7 @@ static int opal_recover_mce(struct pt_regs *regs, */ recovered = 0; } else { - die("Machine check", regs, SIGBUS); + die_mce("Machine check", regs, SIGBUS); recovered = 1; } } diff --git a/arch/powerpc/platforms/powernv/pci-ioda-tce.c b/arch/powerpc/platforms/powernv/pci-ioda-tce.c index 5218f5da2737..30551bbd7988 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda-tce.c +++ b/arch/powerpc/platforms/powernv/pci-ioda-tce.c @@ -380,6 +380,8 @@ void pnv_pci_unlink_table_and_group(struct iommu_table *tbl, /* Remove link to a group from table's list of attached groups */ found = false; + + rcu_read_lock(); list_for_each_entry_rcu(tgl, &tbl->it_group_list, next) { if (tgl->table_group == table_group) { list_del_rcu(&tgl->next); @@ -388,6 +390,8 @@ void pnv_pci_unlink_table_and_group(struct iommu_table *tbl, break; } } + rcu_read_unlock(); + if (WARN_ON(!found)) return; diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index c4f72cdc9b51..f0f901683a2f 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -2402,9 +2402,6 @@ static void pnv_pci_ioda_create_dbgfs(void) list_for_each_entry_safe(hose, tmp, &hose_list, list_node) { phb = hose->private_data; - /* Notify initialization of PHB done */ - phb->initialized = 1; - sprintf(name, "PCI%04x", hose->global_number); phb->dbgfs = debugfs_create_dir(name, powerpc_debugfs_root); @@ -2601,17 +2598,8 @@ static resource_size_t pnv_pci_default_alignment(void) */ static bool pnv_pci_enable_device_hook(struct pci_dev *dev) { - struct pnv_phb *phb = pci_bus_to_pnvhb(dev->bus); struct pci_dn *pdn; - /* The function is probably called while the PEs have - * not be created yet. For example, resource reassignment - * during PCI probe period. We just skip the check if - * PEs isn't ready. - */ - if (!phb->initialized) - return true; - pdn = pci_get_pdn(dev); if (!pdn || pdn->pe_number == IODA_INVALID_PE) { pci_err(dev, "pci_enable_device() blocked, no PE assigned.\n"); @@ -2623,14 +2611,9 @@ static bool pnv_pci_enable_device_hook(struct pci_dev *dev) static bool pnv_ocapi_enable_device_hook(struct pci_dev *dev) { - struct pci_controller *hose = pci_bus_to_host(dev->bus); - struct pnv_phb *phb = hose->private_data; struct pci_dn *pdn; struct pnv_ioda_pe *pe; - if (!phb->initialized) - return true; - pdn = pci_get_pdn(dev); if (!pdn) return false; @@ -2938,7 +2921,7 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np, phb_id = be64_to_cpup(prop64); pr_debug(" PHB-ID : 0x%016llx\n", phb_id); - phb = memblock_alloc(sizeof(*phb), SMP_CACHE_BYTES); + phb = kzalloc(sizeof(*phb), GFP_KERNEL); if (!phb) panic("%s: Failed to allocate %zu bytes\n", __func__, sizeof(*phb)); @@ -2987,7 +2970,7 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np, else phb->diag_data_size = PNV_PCI_DIAG_BUF_SIZE; - phb->diag_data = memblock_alloc(phb->diag_data_size, SMP_CACHE_BYTES); + phb->diag_data = kzalloc(phb->diag_data_size, GFP_KERNEL); if (!phb->diag_data) panic("%s: Failed to allocate %u bytes\n", __func__, phb->diag_data_size); @@ -3049,9 +3032,10 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np, } pemap_off = size; size += phb->ioda.total_pe_num * sizeof(struct pnv_ioda_pe); - aux = memblock_alloc(size, SMP_CACHE_BYTES); + aux = kzalloc(size, GFP_KERNEL); if (!aux) panic("%s: Failed to allocate %lu bytes\n", __func__, size); + phb->ioda.pe_alloc = aux; phb->ioda.m64_segmap = aux + m64map_off; phb->ioda.m32_segmap = aux + m32map_off; @@ -3178,6 +3162,9 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np, /* Remove M64 resource if we can't configure it successfully */ if (!phb->init_m64 || phb->init_m64(phb)) hose->mem_resources[1].flags = 0; + + /* create pci_dn's for DT nodes under this PHB */ + pci_devs_phb_init_dynamic(hose); } void __init pnv_pci_init_ioda2_phb(struct device_node *np) diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h index 739a0b3b72e1..36d22920f5a3 100644 --- a/arch/powerpc/platforms/powernv/pci.h +++ b/arch/powerpc/platforms/powernv/pci.h @@ -119,7 +119,6 @@ struct pnv_phb { int flags; void __iomem *regs; u64 regs_phys; - int initialized; spinlock_t lock; #ifdef CONFIG_DEBUG_FS diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c index 4426a109ec2f..aadf932c4e61 100644 --- a/arch/powerpc/platforms/powernv/setup.c +++ b/arch/powerpc/platforms/powernv/setup.c @@ -180,9 +180,6 @@ static void __init pnv_setup_arch(void) /* Initialize SMP */ pnv_smp_init(); - /* Setup PCI */ - pnv_pci_init(); - /* Setup RTC and NVRAM callbacks */ if (firmware_has_feature(FW_FEATURE_OPAL)) opal_nvram_init(); @@ -547,6 +544,7 @@ define_machine(powernv) { .init_IRQ = pnv_init_IRQ, .show_cpuinfo = pnv_show_cpuinfo, .get_proc_freq = pnv_get_proc_freq, + .discover_phbs = pnv_pci_init, .progress = pnv_progress, .machine_shutdown = pnv_shutdown, .power_save = NULL, diff --git a/arch/powerpc/platforms/powernv/subcore.h b/arch/powerpc/platforms/powernv/subcore.h index c8f574d1c04a..77feee8436d4 100644 --- a/arch/powerpc/platforms/powernv/subcore.h +++ b/arch/powerpc/platforms/powernv/subcore.h @@ -15,7 +15,7 @@ void split_core_secondary_loop(u8 *state); extern void update_subcore_sibling_mask(void); #else -static inline void update_subcore_sibling_mask(void) { }; +static inline void update_subcore_sibling_mask(void) { } #endif /* CONFIG_SMP */ #endif /* __ASSEMBLY__ */ diff --git a/arch/powerpc/platforms/powernv/vas.c b/arch/powerpc/platforms/powernv/vas.c index 598e4cd563fb..b65256a63e87 100644 --- a/arch/powerpc/platforms/powernv/vas.c +++ b/arch/powerpc/platforms/powernv/vas.c @@ -28,12 +28,10 @@ static DEFINE_PER_CPU(int, cpu_vas_id); static int vas_irq_fault_window_setup(struct vas_instance *vinst) { - char devname[64]; int rc = 0; - snprintf(devname, sizeof(devname), "vas-%d", vinst->vas_id); rc = request_threaded_irq(vinst->virq, vas_fault_handler, - vas_fault_thread_fn, 0, devname, vinst); + vas_fault_thread_fn, 0, vinst->name, vinst); if (rc) { pr_err("VAS[%d]: Request IRQ(%d) failed with %d\n", @@ -80,6 +78,12 @@ static int init_vas_instance(struct platform_device *pdev) if (!vinst) return -ENOMEM; + vinst->name = kasprintf(GFP_KERNEL, "vas-%d", vasid); + if (!vinst->name) { + kfree(vinst); + return -ENOMEM; + } + INIT_LIST_HEAD(&vinst->node); ida_init(&vinst->ida); mutex_init(&vinst->mutex); @@ -162,6 +166,7 @@ static int init_vas_instance(struct platform_device *pdev) return 0; free_vinst: + kfree(vinst->name); kfree(vinst); return -ENODEV; diff --git a/arch/powerpc/platforms/powernv/vas.h b/arch/powerpc/platforms/powernv/vas.h index 70f793e8f6cc..c7db3190baca 100644 --- a/arch/powerpc/platforms/powernv/vas.h +++ b/arch/powerpc/platforms/powernv/vas.h @@ -340,6 +340,7 @@ struct vas_instance { struct vas_window *rxwin[VAS_COP_TYPE_MAX]; struct vas_window *windows[VAS_WINDOWS_PER_CHIP]; + char *name; char *dbgname; struct dentry *dbgdir; }; diff --git a/arch/powerpc/platforms/pseries/dlpar.c b/arch/powerpc/platforms/pseries/dlpar.c index 16e86ba8aa20..233503fcf8f0 100644 --- a/arch/powerpc/platforms/pseries/dlpar.c +++ b/arch/powerpc/platforms/pseries/dlpar.c @@ -127,7 +127,6 @@ void dlpar_free_cc_nodes(struct device_node *dn) #define NEXT_PROPERTY 3 #define PREV_PARENT 4 #define MORE_MEMORY 5 -#define CALL_AGAIN -2 #define ERR_CFG_USE -9003 struct device_node *dlpar_configure_connector(__be32 drc_index, @@ -168,6 +167,9 @@ struct device_node *dlpar_configure_connector(__be32 drc_index, spin_unlock(&rtas_data_buf_lock); + if (rtas_busy_delay(rc)) + continue; + switch (rc) { case COMPLETE: break; @@ -216,9 +218,6 @@ struct device_node *dlpar_configure_connector(__be32 drc_index, last_dn = last_dn->parent; break; - case CALL_AGAIN: - break; - case MORE_MEMORY: case ERR_CFG_USE: default: @@ -521,11 +520,8 @@ static ssize_t dlpar_store(struct class *class, struct class_attribute *attr, int rc; args = argbuf = kstrdup(buf, GFP_KERNEL); - if (!argbuf) { - pr_info("Could not allocate resources for DLPAR operation\n"); - kfree(argbuf); + if (!argbuf) return -ENOMEM; - } /* * Parse out the request from the user, this will be in the form: diff --git a/arch/powerpc/platforms/pseries/eeh_pseries.c b/arch/powerpc/platforms/pseries/eeh_pseries.c index cf024fa37bda..bc15200852b7 100644 --- a/arch/powerpc/platforms/pseries/eeh_pseries.c +++ b/arch/powerpc/platforms/pseries/eeh_pseries.c @@ -43,7 +43,7 @@ static int ibm_get_config_addr_info; static int ibm_get_config_addr_info2; static int ibm_configure_pe; -void pseries_pcibios_bus_add_device(struct pci_dev *pdev) +static void pseries_pcibios_bus_add_device(struct pci_dev *pdev) { struct pci_dn *pdn = pci_get_pdn(pdev); @@ -694,8 +694,7 @@ static int pseries_eeh_write_config(struct eeh_dev *edev, int where, int size, u } #ifdef CONFIG_PCI_IOV -int pseries_send_allow_unfreeze(struct pci_dn *pdn, - u16 *vf_pe_array, int cur_vfs) +static int pseries_send_allow_unfreeze(struct pci_dn *pdn, u16 *vf_pe_array, int cur_vfs) { int rc; int ibm_allow_unfreeze = rtas_token("ibm,open-sriov-allow-unfreeze"); diff --git a/arch/powerpc/platforms/pseries/pci.c b/arch/powerpc/platforms/pseries/pci.c index 72a4d4167849..1bffbd1c9a94 100644 --- a/arch/powerpc/platforms/pseries/pci.c +++ b/arch/powerpc/platforms/pseries/pci.c @@ -55,9 +55,8 @@ struct pe_map_bar_entry { __be32 reserved; /* Reserved Space */ }; -int pseries_send_map_pe(struct pci_dev *pdev, - u16 num_vfs, - struct pe_map_bar_entry *vf_pe_array) +static int pseries_send_map_pe(struct pci_dev *pdev, u16 num_vfs, + struct pe_map_bar_entry *vf_pe_array) { struct pci_dn *pdn; int rc; @@ -88,7 +87,7 @@ int pseries_send_map_pe(struct pci_dev *pdev, return rc; } -void pseries_set_pe_num(struct pci_dev *pdev, u16 vf_index, __be16 pe_num) +static void pseries_set_pe_num(struct pci_dev *pdev, u16 vf_index, __be16 pe_num) { struct pci_dn *pdn; @@ -102,7 +101,7 @@ void pseries_set_pe_num(struct pci_dev *pdev, u16 vf_index, __be16 pe_num) pdn->pe_num_map[vf_index]); } -int pseries_associate_pes(struct pci_dev *pdev, u16 num_vfs) +static int pseries_associate_pes(struct pci_dev *pdev, u16 num_vfs) { struct pci_dn *pdn; int i, rc, vf_index; @@ -146,7 +145,7 @@ int pseries_associate_pes(struct pci_dev *pdev, u16 num_vfs) return rc; } -int pseries_pci_sriov_enable(struct pci_dev *pdev, u16 num_vfs) +static int pseries_pci_sriov_enable(struct pci_dev *pdev, u16 num_vfs) { struct pci_dn *pdn; int rc; @@ -189,14 +188,14 @@ int pseries_pci_sriov_enable(struct pci_dev *pdev, u16 num_vfs) return rc; } -int pseries_pcibios_sriov_enable(struct pci_dev *pdev, u16 num_vfs) +static int pseries_pcibios_sriov_enable(struct pci_dev *pdev, u16 num_vfs) { /* Allocate PCI data */ add_sriov_vf_pdns(pdev); return pseries_pci_sriov_enable(pdev, num_vfs); } -int pseries_pcibios_sriov_disable(struct pci_dev *pdev) +static int pseries_pcibios_sriov_disable(struct pci_dev *pdev) { struct pci_dn *pdn; diff --git a/arch/powerpc/platforms/pseries/pseries.h b/arch/powerpc/platforms/pseries/pseries.h index 593840847cd3..4fe48c04c6c2 100644 --- a/arch/powerpc/platforms/pseries/pseries.h +++ b/arch/powerpc/platforms/pseries/pseries.h @@ -33,7 +33,7 @@ int smp_query_cpu_stopped(unsigned int pcpu); #define QCSS_HARDWARE_ERROR -1 #define QCSS_HARDWARE_BUSY -2 #else -static inline void smp_init_pseries(void) { }; +static inline void smp_init_pseries(void) { } #endif extern void pseries_kexec_cpu_down(int crash_shutdown, int secondary); diff --git a/arch/powerpc/platforms/pseries/ras.c b/arch/powerpc/platforms/pseries/ras.c index 149cec2212e6..f8b390a9d9fb 100644 --- a/arch/powerpc/platforms/pseries/ras.c +++ b/arch/powerpc/platforms/pseries/ras.c @@ -122,7 +122,7 @@ static inline u8 rtas_mc_error_sub_type(const struct pseries_mc_errorlog *mlog) * devices or systems (e.g. hugepages) that have not been initialized at the * subsys stage. */ -int __init init_ras_hotplug_IRQ(void) +static int __init init_ras_hotplug_IRQ(void) { struct device_node *np; @@ -315,12 +315,10 @@ static irqreturn_t ras_hotplug_interrupt(int irq, void *dev_id) /* Handle environmental and power warning (EPOW) interrupts. */ static irqreturn_t ras_epow_interrupt(int irq, void *dev_id) { - int status; int state; int critical; - status = rtas_get_sensor_fast(EPOW_SENSOR_TOKEN, EPOW_SENSOR_INDEX, - &state); + rtas_get_sensor_fast(EPOW_SENSOR_TOKEN, EPOW_SENSOR_INDEX, &state); if (state > 3) critical = 1; /* Time Critical */ @@ -329,12 +327,9 @@ static irqreturn_t ras_epow_interrupt(int irq, void *dev_id) spin_lock(&ras_log_buf_lock); - status = rtas_call(ras_check_exception_token, 6, 1, NULL, - RTAS_VECTOR_EXTERNAL_INTERRUPT, - virq_to_hw(irq), - RTAS_EPOW_WARNING, - critical, __pa(&ras_log_buf), - rtas_get_error_log_max()); + rtas_call(ras_check_exception_token, 6, 1, NULL, RTAS_VECTOR_EXTERNAL_INTERRUPT, + virq_to_hw(irq), RTAS_EPOW_WARNING, critical, __pa(&ras_log_buf), + rtas_get_error_log_max()); log_error(ras_log_buf, ERR_TYPE_RTAS_LOG, 0); @@ -722,6 +717,7 @@ static int mce_handle_error(struct pt_regs *regs, struct rtas_error_log *errp) struct pseries_errorlog *pseries_log; struct pseries_mc_errorlog *mce_log = NULL; int disposition = rtas_error_disposition(errp); + unsigned long msr; u8 error_type; if (!rtas_error_extended(errp)) @@ -747,9 +743,21 @@ static int mce_handle_error(struct pt_regs *regs, struct rtas_error_log *errp) * SLB multihit is done by now. */ out: - mtmsr(mfmsr() | MSR_IR | MSR_DR); + msr = mfmsr(); + mtmsr(msr | MSR_IR | MSR_DR); + disposition = mce_handle_err_virtmode(regs, errp, mce_log, disposition); + + /* + * Queue irq work to log this rtas event later. + * irq_work_queue uses per-cpu variables, so do this in virt + * mode as well. + */ + irq_work_queue(&mce_errlog_process_work); + + mtmsr(msr); + return disposition; } @@ -813,7 +821,7 @@ static int recover_mce(struct pt_regs *regs, struct machine_check_event *evt) */ recovered = 0; } else { - die("Machine check", regs, SIGBUS); + die_mce("Machine check", regs, SIGBUS); recovered = 1; } } @@ -865,10 +873,8 @@ long pseries_machine_check_realmode(struct pt_regs *regs) * virtual mode. */ disposition = mce_handle_error(regs, errp); - fwnmi_release_errinfo(); - /* Queue irq work to log this rtas event later. */ - irq_work_queue(&mce_errlog_process_work); + fwnmi_release_errinfo(); if (disposition == RTAS_DISP_FULLY_RECOVERED) return 1; diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c index 090c13f6c881..46e1540abc22 100644 --- a/arch/powerpc/platforms/pseries/setup.c +++ b/arch/powerpc/platforms/pseries/setup.c @@ -463,7 +463,7 @@ void pseries_little_endian_exceptions(void) } #endif -static void __init find_and_init_phbs(void) +static void __init pSeries_discover_phbs(void) { struct device_node *node; struct pci_controller *phb; @@ -481,6 +481,9 @@ static void __init find_and_init_phbs(void) pci_process_bridge_OF_ranges(phb, node, 0); isa_bridge_find_early(phb); phb->controller_ops = pseries_pci_controller_ops; + + /* create pci_dn's for DT nodes under this PHB */ + pci_devs_phb_init_dynamic(phb); } of_node_put(root); @@ -607,8 +610,8 @@ enum get_iov_fw_value_index { WDW_SIZE = 3 /* Get Window Size */ }; -resource_size_t pseries_get_iov_fw_value(struct pci_dev *dev, int resno, - enum get_iov_fw_value_index value) +static resource_size_t pseries_get_iov_fw_value(struct pci_dev *dev, int resno, + enum get_iov_fw_value_index value) { const int *indexes; struct device_node *dn = pci_device_to_OF_node(dev); @@ -643,7 +646,7 @@ resource_size_t pseries_get_iov_fw_value(struct pci_dev *dev, int resno, return ret; } -void of_pci_set_vf_bar_size(struct pci_dev *dev, const int *indexes) +static void of_pci_set_vf_bar_size(struct pci_dev *dev, const int *indexes) { struct resource *res; resource_size_t base, size; @@ -665,7 +668,7 @@ void of_pci_set_vf_bar_size(struct pci_dev *dev, const int *indexes) } } -void of_pci_parse_iov_addrs(struct pci_dev *dev, const int *indexes) +static void of_pci_parse_iov_addrs(struct pci_dev *dev, const int *indexes) { struct resource *res, *root, *conflict; resource_size_t base, size; @@ -786,7 +789,6 @@ static void __init pSeries_setup_arch(void) /* Find and initialize PCI host bridges */ init_pci_config_tokens(); - find_and_init_phbs(); of_reconfig_notifier_register(&pci_dn_reconfig_nb); pSeries_nvram_init(); @@ -1050,6 +1052,7 @@ define_machine(pseries) { .init_IRQ = pseries_init_irq, .show_cpuinfo = pSeries_show_cpuinfo, .log_error = pSeries_log_error, + .discover_phbs = pSeries_discover_phbs, .pcibios_fixup = pSeries_final_fixup, .restart = rtas_restart, .halt = rtas_halt, diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c index dcd817ca2edf..3fe37495f63d 100644 --- a/arch/powerpc/xmon/xmon.c +++ b/arch/powerpc/xmon/xmon.c @@ -1383,7 +1383,6 @@ static long check_bp_loc(unsigned long addr) return 1; } -#ifndef CONFIG_PPC_8xx static int find_free_data_bpt(void) { int i; @@ -1395,7 +1394,6 @@ static int find_free_data_bpt(void) printf("Couldn't find free breakpoint register\n"); return -1; } -#endif static void print_data_bpts(void) { @@ -1435,7 +1433,6 @@ bpt_cmds(void) cmd = inchar(); switch (cmd) { -#ifndef CONFIG_PPC_8xx static const char badaddr[] = "Only kernel addresses are permitted for breakpoints\n"; int mode; case 'd': /* bd - hardware data breakpoint */ @@ -1497,7 +1494,6 @@ bpt_cmds(void) force_enable_xmon(); } break; -#endif case 'c': if (!scanhex(&a)) { @@ -3723,7 +3719,7 @@ void dump_segments(void) printf("sr0-15 ="); for (i = 0; i < 16; ++i) - printf(" %x", mfsrin(i << 28)); + printf(" %x", mfsr(i << 28)); printf("\n"); } #endif diff --git a/drivers/misc/cxl/cxllib.c b/drivers/misc/cxl/cxllib.c index 2a1783f32254..53b919856426 100644 --- a/drivers/misc/cxl/cxllib.c +++ b/drivers/misc/cxl/cxllib.c @@ -170,8 +170,6 @@ int cxllib_get_PE_attributes(struct task_struct *task, unsigned long translation_mode, struct cxllib_pe_attributes *attr) { - struct mm_struct *mm = NULL; - if (translation_mode != CXL_TRANSLATED_MODE && translation_mode != CXL_REAL_MODE) return -EINVAL; @@ -182,7 +180,7 @@ int cxllib_get_PE_attributes(struct task_struct *task, true); attr->lpid = mfspr(SPRN_LPID); if (task) { - mm = get_task_mm(task); + struct mm_struct *mm = get_task_mm(task); if (mm == NULL) return -EINVAL; /* diff --git a/drivers/misc/ocxl/file.c b/drivers/misc/ocxl/file.c index 4d1b44de1492..e70525eedaae 100644 --- a/drivers/misc/ocxl/file.c +++ b/drivers/misc/ocxl/file.c @@ -15,7 +15,7 @@ static dev_t ocxl_dev; static struct class *ocxl_class; -static struct mutex minors_idr_lock; +static DEFINE_MUTEX(minors_idr_lock); static struct idr minors_idr; static struct ocxl_file_info *find_and_get_file_info(dev_t devno) @@ -588,7 +588,6 @@ int ocxl_file_init(void) { int rc; - mutex_init(&minors_idr_lock); idr_init(&minors_idr); rc = alloc_chrdev_region(&ocxl_dev, 0, OCXL_NUM_MINORS, "ocxl"); diff --git a/drivers/spi/spi-mpc52xx.c b/drivers/spi/spi-mpc52xx.c index 36f941500676..124cba7213f1 100644 --- a/drivers/spi/spi-mpc52xx.c +++ b/drivers/spi/spi-mpc52xx.c @@ -120,7 +120,7 @@ static void mpc52xx_spi_start_transfer(struct mpc52xx_spi *ms) ms->cs_change = ms->transfer->cs_change; /* Write out the first byte */ - ms->wcol_tx_timestamp = get_tbl(); + ms->wcol_tx_timestamp = mftb(); if (ms->tx_buf) out_8(ms->regs + SPI_DATA, *ms->tx_buf++); else @@ -221,8 +221,8 @@ static int mpc52xx_spi_fsmstate_transfer(int irq, struct mpc52xx_spi *ms, * but it can also be worked around simply by retrying the * transfer which is what we do here. */ ms->wcol_count++; - ms->wcol_ticks += get_tbl() - ms->wcol_tx_timestamp; - ms->wcol_tx_timestamp = get_tbl(); + ms->wcol_ticks += mftb() - ms->wcol_tx_timestamp; + ms->wcol_tx_timestamp = mftb(); data = 0; if (ms->tx_buf) data = *(ms->tx_buf - 1); @@ -247,7 +247,7 @@ static int mpc52xx_spi_fsmstate_transfer(int irq, struct mpc52xx_spi *ms, /* Is the transfer complete? */ ms->len--; if (ms->len == 0) { - ms->timestamp = get_tbl(); + ms->timestamp = mftb(); if (ms->transfer->delay.unit == SPI_DELAY_UNIT_USECS) ms->timestamp += ms->transfer->delay.value * tb_ticks_per_usec; @@ -256,7 +256,7 @@ static int mpc52xx_spi_fsmstate_transfer(int irq, struct mpc52xx_spi *ms, } /* Write out the next byte */ - ms->wcol_tx_timestamp = get_tbl(); + ms->wcol_tx_timestamp = mftb(); if (ms->tx_buf) out_8(ms->regs + SPI_DATA, *ms->tx_buf++); else @@ -278,7 +278,7 @@ mpc52xx_spi_fsmstate_wait(int irq, struct mpc52xx_spi *ms, u8 status, u8 data) dev_err(&ms->master->dev, "spurious irq, status=0x%.2x\n", status); - if (((int)get_tbl()) - ms->timestamp < 0) + if (((int)mftb()) - ms->timestamp < 0) return FSM_POLL; ms->message->actual_length += ms->transfer->len; diff --git a/tools/testing/selftests/powerpc/eeh/eeh-basic.sh b/tools/testing/selftests/powerpc/eeh/eeh-basic.sh index 0d783e1065c8..442b666ccdb5 100755 --- a/tools/testing/selftests/powerpc/eeh/eeh-basic.sh +++ b/tools/testing/selftests/powerpc/eeh/eeh-basic.sh @@ -1,28 +1,13 @@ #!/bin/sh # SPDX-License-Identifier: GPL-2.0-only -KSELFTESTS_SKIP=4 - . ./eeh-functions.sh -if ! eeh_supported ; then - echo "EEH not supported on this system, skipping" - exit $KSELFTESTS_SKIP; -fi - -if [ ! -e "/sys/kernel/debug/powerpc/eeh_dev_check" ] && \ - [ ! -e "/sys/kernel/debug/powerpc/eeh_dev_break" ] ; then - echo "debugfs EEH testing files are missing. Is debugfs mounted?" - exit $KSELFTESTS_SKIP; -fi +eeh_test_prep # NB: may exit pre_lspci=`mktemp` lspci > $pre_lspci -# Bump the max freeze count to something absurd so we don't -# trip over it while breaking things. -echo 5000 > /sys/kernel/debug/powerpc/eeh_max_freezes - # record the devices that we break in here. Assuming everything # goes to plan we should get them back once the recover process # is finished. @@ -30,34 +15,16 @@ devices="" # Build up a list of candidate devices. for dev in `ls -1 /sys/bus/pci/devices/ | grep '\.0$'` ; do - # skip bridges since we can't recover them (yet...) - if [ -e "/sys/bus/pci/devices/$dev/pci_bus" ] ; then - echo "$dev, Skipped: bridge" + if ! eeh_can_break $dev ; then continue; fi - # Skip VFs for now since we don't have a reliable way - # to break them. + # Skip VFs for now since we don't have a reliable way to break them. if [ -e "/sys/bus/pci/devices/$dev/physfn" ] ; then echo "$dev, Skipped: virtfn" continue; fi - if [ "ahci" = "$(basename $(realpath /sys/bus/pci/devices/$dev/driver))" ] ; then - echo "$dev, Skipped: ahci doesn't support recovery" - continue - fi - - # Don't inject errosr into an already-frozen PE. This happens with - # PEs that contain multiple PCI devices (e.g. multi-function cards) - # and injecting new errors during the recovery process will probably - # result in the recovery failing and the device being marked as - # failed. - if ! pe_ok $dev ; then - echo "$dev, Skipped: Bad initial PE state" - continue; - fi - echo "$dev, Added" # Add to this list of device to check @@ -86,5 +53,5 @@ echo "$failed devices failed to recover ($dev_count tested)" lspci | diff -u $pre_lspci - rm -f $pre_lspci -test "$failed" == 0 +test "$failed" -eq 0 exit $? diff --git a/tools/testing/selftests/powerpc/eeh/eeh-functions.sh b/tools/testing/selftests/powerpc/eeh/eeh-functions.sh index 00dc32c0ed75..70daa3925dcb 100755..100644 --- a/tools/testing/selftests/powerpc/eeh/eeh-functions.sh +++ b/tools/testing/selftests/powerpc/eeh/eeh-functions.sh @@ -1,6 +1,12 @@ #!/bin/sh # SPDX-License-Identifier: GPL-2.0-only +export KSELFTESTS_SKIP=4 + +log() { + echo >/dev/stderr $* +} + pe_ok() { local dev="$1" local path="/sys/bus/pci/devices/$dev/eeh_pe_state" @@ -39,6 +45,52 @@ eeh_supported() { grep -q 'EEH Subsystem is enabled' /proc/powerpc/eeh } +eeh_test_prep() { + if ! eeh_supported ; then + echo "EEH not supported on this system, skipping" + exit $KSELFTESTS_SKIP; + fi + + if [ ! -e "/sys/kernel/debug/powerpc/eeh_dev_check" ] && \ + [ ! -e "/sys/kernel/debug/powerpc/eeh_dev_break" ] ; then + log "debugfs EEH testing files are missing. Is debugfs mounted?" + exit $KSELFTESTS_SKIP; + fi + + # Bump the max freeze count to something absurd so we don't + # trip over it while breaking things. + echo 5000 > /sys/kernel/debug/powerpc/eeh_max_freezes +} + +eeh_can_break() { + # skip bridges since we can't recover them (yet...) + if [ -e "/sys/bus/pci/devices/$dev/pci_bus" ] ; then + log "$dev, Skipped: bridge" + return 1; + fi + + # The ahci driver doesn't support error recovery. If the ahci device + # happens to be hosting the root filesystem, and then we go and break + # it the system will generally go down. We should probably fix that + # at some point + if [ "ahci" = "$(basename $(realpath /sys/bus/pci/devices/$dev/driver))" ] ; then + log "$dev, Skipped: ahci doesn't support recovery" + return 1; + fi + + # Don't inject errosr into an already-frozen PE. This happens with + # PEs that contain multiple PCI devices (e.g. multi-function cards) + # and injecting new errors during the recovery process will probably + # result in the recovery failing and the device being marked as + # failed. + if ! pe_ok $dev ; then + log "$dev, Skipped: Bad initial PE state" + return 1; + fi + + return 0 +} + eeh_one_dev() { local dev="$1" @@ -46,7 +98,7 @@ eeh_one_dev() { # testing so check that the argument is a well-formed sysfs device # name. if ! test -e /sys/bus/pci/devices/$dev/ ; then - echo "Error: '$dev' must be a sysfs device name (DDDD:BB:DD.F)" + log "Error: '$dev' must be a sysfs device name (DDDD:BB:DD.F)" return 1; fi @@ -70,16 +122,124 @@ eeh_one_dev() { if pe_ok $dev ; then break; fi - echo "$dev, waited $i/${max_wait}" + log "$dev, waited $i/${max_wait}" sleep 1 done if ! pe_ok $dev ; then - echo "$dev, Failed to recover!" + log "$dev, Failed to recover!" return 1; fi - echo "$dev, Recovered after $i seconds" + log "$dev, Recovered after $i seconds" return 0; } +eeh_has_driver() { + test -e /sys/bus/pci/devices/$1/driver; + return $? +} + +eeh_can_recover() { + # we'll get an IO error if the device's current driver doesn't support + # error recovery + echo $1 > '/sys/kernel/debug/powerpc/eeh_dev_can_recover' 2>/dev/null + + return $? +} + +eeh_find_all_pfs() { + devices="" + + # SR-IOV on pseries requires hypervisor support, so check for that + is_pseries="" + if grep -q pSeries /proc/cpuinfo ; then + if [ ! -f /proc/device-tree/rtas/ibm,open-sriov-allow-unfreeze ] || + [ ! -f /proc/device-tree/rtas/ibm,open-sriov-map-pe-number ] ; then + return 1; + fi + + is_pseries="true" + fi + + for dev in `ls -1 /sys/bus/pci/devices/` ; do + sysfs="/sys/bus/pci/devices/$dev" + if [ ! -e "$sysfs/sriov_numvfs" ] ; then + continue + fi + + # skip unsupported PFs on pseries + if [ -z "$is_pseries" ] && + [ ! -f "$sysfs/of_node/ibm,is-open-sriov-pf" ] && + [ ! -f "$sysfs/of_node/ibm,open-sriov-vf-bar-info" ] ; then + continue; + fi + + # no driver, no vfs + if ! eeh_has_driver $dev ; then + continue + fi + + devices="$devices $dev" + done + + if [ -z "$devices" ] ; then + return 1; + fi + + echo $devices + return 0; +} + +# attempts to enable one VF on each PF so we can do VF specific tests. +# stdout: list of enabled VFs, one per line +# return code: 0 if vfs are found, 1 otherwise +eeh_enable_vfs() { + pf_list="$(eeh_find_all_pfs)" + + vfs=0 + for dev in $pf_list ; do + pf_sysfs="/sys/bus/pci/devices/$dev" + + # make sure we have a single VF + echo 0 > "$pf_sysfs/sriov_numvfs" + echo 1 > "$pf_sysfs/sriov_numvfs" + if [ "$?" != 0 ] ; then + log "Unable to enable VFs on $pf, skipping" + continue; + fi + + vf="$(basename $(realpath "$pf_sysfs/virtfn0"))" + if [ $? != 0 ] ; then + log "unable to find enabled vf on $pf" + echo 0 > "$pf_sysfs/sriov_numvfs" + continue; + fi + + if ! eeh_can_break $vf ; then + log "skipping " + + echo 0 > "$pf_sysfs/sriov_numvfs" + continue; + fi + + vfs="$((vfs + 1))" + echo $vf + done + + test "$vfs" != 0 + return $? +} + +eeh_disable_vfs() { + pf_list="$(eeh_find_all_pfs)" + if [ -z "$pf_list" ] ; then + return 1; + fi + + for dev in $pf_list ; do + echo 0 > "/sys/bus/pci/devices/$dev/sriov_numvfs" + done + + return 0; +} diff --git a/tools/testing/selftests/powerpc/eeh/eeh-vf-aware.sh b/tools/testing/selftests/powerpc/eeh/eeh-vf-aware.sh new file mode 100755 index 000000000000..874c11953bb6 --- /dev/null +++ b/tools/testing/selftests/powerpc/eeh/eeh-vf-aware.sh @@ -0,0 +1,45 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0-only + +. ./eeh-functions.sh + +eeh_test_prep # NB: may exit + +vf_list="$(eeh_enable_vfs)"; +if $? != 0 ; then + log "No usable VFs found. Skipping EEH unaware VF test" + exit $KSELFTESTS_SKIP; +fi + +log "Enabled VFs: $vf_list" + +tested=0 +passed=0 +for vf in $vf_list ; do + log "Testing $vf" + + if ! eeh_can_recover $vf ; then + log "Driver for $vf doesn't support error recovery, skipping" + continue; + fi + + tested="$((tested + 1))" + + log "Breaking $vf..." + if ! eeh_one_dev $vf ; then + log "$vf failed to recover" + continue; + fi + + passed="$((passed + 1))" +done + +eeh_disable_vfs + +if [ "$tested" == 0 ] ; then + echo "No VFs with EEH aware drivers found, skipping" + exit $KSELFTESTS_SKIP +fi + +test "$failed" != 0 +exit $?; diff --git a/tools/testing/selftests/powerpc/eeh/eeh-vf-unaware.sh b/tools/testing/selftests/powerpc/eeh/eeh-vf-unaware.sh new file mode 100755 index 000000000000..8a4c147b9d43 --- /dev/null +++ b/tools/testing/selftests/powerpc/eeh/eeh-vf-unaware.sh @@ -0,0 +1,35 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0-only + +. ./eeh-functions.sh + +eeh_test_prep # NB: may exit + +vf_list="$(eeh_enable_vfs)"; +if $? != 0 ; then + log "No usable VFs found. Skipping EEH unaware VF test" + exit $KSELFTESTS_SKIP; +fi + +log "Enabled VFs: $vf_list" + +failed=0 +for vf in $vf_list ; do + log "Testing $vf" + + if eeh_can_recover $vf ; then + log "Driver for $vf supports error recovery. Unbinding..." + echo "$vf" > /sys/bus/pci/devices/$vf/driver/unbind + fi + + log "Breaking $vf..." + if ! eeh_one_dev $vf ; then + log "$vf failed to recover" + failed="$((failed + 1))" + fi +done + +eeh_disable_vfs + +test "$failed" != 0 +exit $?; |