diff options
author | David S. Miller <davem@davemloft.net> | 2017-12-29 15:14:27 -0500 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2017-12-29 15:42:26 -0500 |
commit | 6bb8824732f69de0f233ae6b1a8158e149627b38 (patch) | |
tree | 78642311a28f42df9042da41eb98652c39d51327 | |
parent | d367341b25bd5aef3bf5524baa6f73e16ceced85 (diff) | |
parent | 2758b3e3e630ba304fc4aca434d591e70e528298 (diff) |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net
net/ipv6/ip6_gre.c is a case of parallel adds.
include/trace/events/tcp.h is a little bit more tricky. The removal
of in-trace-macro ifdefs in 'net' paralleled with moving
show_tcp_state_name and friends over to include/trace/events/sock.h
in 'net-next'.
Signed-off-by: David S. Miller <davem@davemloft.net>
212 files changed, 2378 insertions, 1204 deletions
diff --git a/Documentation/devicetree/bindings/sound/da7218.txt b/Documentation/devicetree/bindings/sound/da7218.txt index 5ca5a709b6aa..3ab9dfef38d1 100644 --- a/Documentation/devicetree/bindings/sound/da7218.txt +++ b/Documentation/devicetree/bindings/sound/da7218.txt @@ -73,7 +73,7 @@ Example: compatible = "dlg,da7218"; reg = <0x1a>; interrupt-parent = <&gpio6>; - interrupts = <11 IRQ_TYPE_LEVEL_HIGH>; + interrupts = <11 IRQ_TYPE_LEVEL_LOW>; wakeup-source; VDD-supply = <®_audio>; diff --git a/Documentation/devicetree/bindings/sound/da7219.txt b/Documentation/devicetree/bindings/sound/da7219.txt index cf61681826b6..5b54d2d045c3 100644 --- a/Documentation/devicetree/bindings/sound/da7219.txt +++ b/Documentation/devicetree/bindings/sound/da7219.txt @@ -77,7 +77,7 @@ Example: reg = <0x1a>; interrupt-parent = <&gpio6>; - interrupts = <11 IRQ_TYPE_LEVEL_HIGH>; + interrupts = <11 IRQ_TYPE_LEVEL_LOW>; VDD-supply = <®_audio>; VDDMIC-supply = <®_audio>; diff --git a/Documentation/x86/x86_64/mm.txt b/Documentation/x86/x86_64/mm.txt index 3448e675b462..51101708a03a 100644 --- a/Documentation/x86/x86_64/mm.txt +++ b/Documentation/x86/x86_64/mm.txt @@ -1,6 +1,4 @@ -<previous description obsolete, deleted> - Virtual memory map with 4 level page tables: 0000000000000000 - 00007fffffffffff (=47 bits) user space, different per mm @@ -14,13 +12,15 @@ ffffea0000000000 - ffffeaffffffffff (=40 bits) virtual memory map (1TB) ... unused hole ... ffffec0000000000 - fffffbffffffffff (=44 bits) kasan shadow memory (16TB) ... unused hole ... +fffffe8000000000 - fffffeffffffffff (=39 bits) cpu_entry_area mapping ffffff0000000000 - ffffff7fffffffff (=39 bits) %esp fixup stacks ... unused hole ... ffffffef00000000 - fffffffeffffffff (=64 GB) EFI region mapping space ... unused hole ... ffffffff80000000 - ffffffff9fffffff (=512 MB) kernel text mapping, from phys 0 -ffffffffa0000000 - ffffffffff5fffff (=1526 MB) module mapping space (variable) -ffffffffff600000 - ffffffffffdfffff (=8 MB) vsyscalls +ffffffffa0000000 - [fixmap start] (~1526 MB) module mapping space (variable) +[fixmap start] - ffffffffff5fffff kernel-internal fixmap range +ffffffffff600000 - ffffffffff600fff (=4 kB) legacy vsyscall ABI ffffffffffe00000 - ffffffffffffffff (=2 MB) unused hole Virtual memory map with 5 level page tables: @@ -36,19 +36,22 @@ ffd4000000000000 - ffd5ffffffffffff (=49 bits) virtual memory map (512TB) ... unused hole ... ffdf000000000000 - fffffc0000000000 (=53 bits) kasan shadow memory (8PB) ... unused hole ... +fffffe8000000000 - fffffeffffffffff (=39 bits) cpu_entry_area mapping ffffff0000000000 - ffffff7fffffffff (=39 bits) %esp fixup stacks ... unused hole ... ffffffef00000000 - fffffffeffffffff (=64 GB) EFI region mapping space ... unused hole ... ffffffff80000000 - ffffffff9fffffff (=512 MB) kernel text mapping, from phys 0 -ffffffffa0000000 - ffffffffff5fffff (=1526 MB) module mapping space -ffffffffff600000 - ffffffffffdfffff (=8 MB) vsyscalls +ffffffffa0000000 - [fixmap start] (~1526 MB) module mapping space +[fixmap start] - ffffffffff5fffff kernel-internal fixmap range +ffffffffff600000 - ffffffffff600fff (=4 kB) legacy vsyscall ABI ffffffffffe00000 - ffffffffffffffff (=2 MB) unused hole Architecture defines a 64-bit virtual address. Implementations can support less. Currently supported are 48- and 57-bit virtual addresses. Bits 63 -through to the most-significant implemented bit are set to either all ones -or all zero. This causes hole between user space and kernel addresses. +through to the most-significant implemented bit are sign extended. +This causes hole between user space and kernel addresses if you interpret them +as unsigned. The direct mapping covers all memory in the system up to the highest memory address (this means in some cases it can also include PCI memory @@ -58,9 +61,6 @@ vmalloc space is lazily synchronized into the different PML4/PML5 pages of the processes using the page fault handler, with init_top_pgt as reference. -Current X86-64 implementations support up to 46 bits of address space (64 TB), -which is our current limit. This expands into MBZ space in the page tables. - We map EFI runtime services in the 'efi_pgd' PGD in a 64Gb large virtual memory window (this size is arbitrary, it can be raised later if needed). The mappings are not part of any other kernel PGD and are only available @@ -72,5 +72,3 @@ following fixmap section. Note that if CONFIG_RANDOMIZE_MEMORY is enabled, the direct mapping of all physical memory, vmalloc/ioremap space and virtual memory map are randomized. Their order is preserved but their base will be offset early at boot time. - --Andi Kleen, Jul 2004 @@ -2,7 +2,7 @@ VERSION = 4 PATCHLEVEL = 15 SUBLEVEL = 0 -EXTRAVERSION = -rc4 +EXTRAVERSION = -rc5 NAME = Fearless Coyote # *DOCUMENTATION* diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h index 6177d43f0ce8..e2a2b8400490 100644 --- a/arch/powerpc/include/asm/mmu_context.h +++ b/arch/powerpc/include/asm/mmu_context.h @@ -160,9 +160,10 @@ static inline void enter_lazy_tlb(struct mm_struct *mm, #endif } -static inline void arch_dup_mmap(struct mm_struct *oldmm, - struct mm_struct *mm) +static inline int arch_dup_mmap(struct mm_struct *oldmm, + struct mm_struct *mm) { + return 0; } #ifndef CONFIG_PPC_BOOK3S_64 diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 5acb5a176dbe..72be0c32e902 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -1403,7 +1403,7 @@ void show_regs(struct pt_regs * regs) printk("NIP: "REG" LR: "REG" CTR: "REG"\n", regs->nip, regs->link, regs->ctr); - printk("REGS: %p TRAP: %04lx %s (%s)\n", + printk("REGS: %px TRAP: %04lx %s (%s)\n", regs, regs->trap, print_tainted(), init_utsname()->release); printk("MSR: "REG" ", regs->msr); print_msr_bits(regs->msr); diff --git a/arch/powerpc/kvm/book3s_xive.c b/arch/powerpc/kvm/book3s_xive.c index bf457843e032..0d750d274c4e 100644 --- a/arch/powerpc/kvm/book3s_xive.c +++ b/arch/powerpc/kvm/book3s_xive.c @@ -725,7 +725,8 @@ u64 kvmppc_xive_get_icp(struct kvm_vcpu *vcpu) /* Return the per-cpu state for state saving/migration */ return (u64)xc->cppr << KVM_REG_PPC_ICP_CPPR_SHIFT | - (u64)xc->mfrr << KVM_REG_PPC_ICP_MFRR_SHIFT; + (u64)xc->mfrr << KVM_REG_PPC_ICP_MFRR_SHIFT | + (u64)0xff << KVM_REG_PPC_ICP_PPRI_SHIFT; } int kvmppc_xive_set_icp(struct kvm_vcpu *vcpu, u64 icpval) @@ -1558,7 +1559,7 @@ static int xive_set_source(struct kvmppc_xive *xive, long irq, u64 addr) /* * Restore P and Q. If the interrupt was pending, we - * force both P and Q, which will trigger a resend. + * force Q and !P, which will trigger a resend. * * That means that a guest that had both an interrupt * pending (queued) and Q set will restore with only @@ -1566,7 +1567,7 @@ static int xive_set_source(struct kvmppc_xive *xive, long irq, u64 addr) * is perfectly fine as coalescing interrupts that haven't * been presented yet is always allowed. */ - if (val & KVM_XICS_PRESENTED || val & KVM_XICS_PENDING) + if (val & KVM_XICS_PRESENTED && !(val & KVM_XICS_PENDING)) state->old_p = true; if (val & KVM_XICS_QUEUED || val & KVM_XICS_PENDING) state->old_q = true; diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c index 153812966365..fce545774d50 100644 --- a/arch/powerpc/perf/core-book3s.c +++ b/arch/powerpc/perf/core-book3s.c @@ -410,8 +410,12 @@ static __u64 power_pmu_bhrb_to(u64 addr) int ret; __u64 target; - if (is_kernel_addr(addr)) - return branch_target((unsigned int *)addr); + if (is_kernel_addr(addr)) { + if (probe_kernel_read(&instr, (void *)addr, sizeof(instr))) + return 0; + + return branch_target(&instr); + } /* Userspace: need copy instruction here then translate it */ pagefault_disable(); diff --git a/arch/powerpc/perf/imc-pmu.c b/arch/powerpc/perf/imc-pmu.c index 0ead3cd73caa..be4e7f84f70a 100644 --- a/arch/powerpc/perf/imc-pmu.c +++ b/arch/powerpc/perf/imc-pmu.c @@ -310,6 +310,19 @@ static int ppc_nest_imc_cpu_offline(unsigned int cpu) return 0; /* + * Check whether nest_imc is registered. We could end up here if the + * cpuhotplug callback registration fails. i.e, callback invokes the + * offline path for all successfully registered nodes. At this stage, + * nest_imc pmu will not be registered and we should return here. + * + * We return with a zero since this is not an offline failure. And + * cpuhp_setup_state() returns the actual failure reason to the caller, + * which in turn will call the cleanup routine. + */ + if (!nest_pmus) + return 0; + + /* * Now that this cpu is one of the designated, * find a next cpu a) which is online and b) in same chip. */ @@ -1171,6 +1184,7 @@ static void imc_common_cpuhp_mem_free(struct imc_pmu *pmu_ptr) if (nest_pmus == 1) { cpuhp_remove_state(CPUHP_AP_PERF_POWERPC_NEST_IMC_ONLINE); kfree(nest_imc_refc); + kfree(per_nest_pmu_arr); } if (nest_pmus > 0) @@ -1195,7 +1209,6 @@ static void imc_common_cpuhp_mem_free(struct imc_pmu *pmu_ptr) kfree(pmu_ptr->attr_groups[IMC_EVENT_ATTR]->attrs); kfree(pmu_ptr->attr_groups[IMC_EVENT_ATTR]); kfree(pmu_ptr); - kfree(per_nest_pmu_arr); return; } @@ -1309,6 +1322,8 @@ int init_imc_pmu(struct device_node *parent, struct imc_pmu *pmu_ptr, int pmu_id ret = nest_pmu_cpumask_init(); if (ret) { mutex_unlock(&nest_init_lock); + kfree(nest_imc_refc); + kfree(per_nest_pmu_arr); goto err_free; } } diff --git a/arch/um/include/asm/mmu_context.h b/arch/um/include/asm/mmu_context.h index b668e351fd6c..fca34b2177e2 100644 --- a/arch/um/include/asm/mmu_context.h +++ b/arch/um/include/asm/mmu_context.h @@ -15,9 +15,10 @@ extern void uml_setup_stubs(struct mm_struct *mm); /* * Needed since we do not use the asm-generic/mm_hooks.h: */ -static inline void arch_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm) +static inline int arch_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm) { uml_setup_stubs(mm); + return 0; } extern void arch_exit_mmap(struct mm_struct *mm); static inline void arch_unmap(struct mm_struct *mm, diff --git a/arch/unicore32/include/asm/mmu_context.h b/arch/unicore32/include/asm/mmu_context.h index 59b06b48f27d..5c205a9cb5a6 100644 --- a/arch/unicore32/include/asm/mmu_context.h +++ b/arch/unicore32/include/asm/mmu_context.h @@ -81,9 +81,10 @@ do { \ } \ } while (0) -static inline void arch_dup_mmap(struct mm_struct *oldmm, - struct mm_struct *mm) +static inline int arch_dup_mmap(struct mm_struct *oldmm, + struct mm_struct *mm) { + return 0; } static inline void arch_unmap(struct mm_struct *mm, diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 04d66e6fa447..45dc6233f2b9 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -927,7 +927,8 @@ config MAXSMP config NR_CPUS int "Maximum number of CPUs" if SMP && !MAXSMP range 2 8 if SMP && X86_32 && !X86_BIGSMP - range 2 512 if SMP && !MAXSMP && !CPUMASK_OFFSTACK + range 2 64 if SMP && X86_32 && X86_BIGSMP + range 2 512 if SMP && !MAXSMP && !CPUMASK_OFFSTACK && X86_64 range 2 8192 if SMP && !MAXSMP && CPUMASK_OFFSTACK && X86_64 default "1" if !SMP default "8192" if MAXSMP diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S index bd8b57a5c874..ace8f321a5a1 100644 --- a/arch/x86/entry/entry_32.S +++ b/arch/x86/entry/entry_32.S @@ -942,9 +942,9 @@ ENTRY(debug) /* Are we currently on the SYSENTER stack? */ movl PER_CPU_VAR(cpu_entry_area), %ecx - addl $CPU_ENTRY_AREA_SYSENTER_stack + SIZEOF_SYSENTER_stack, %ecx - subl %eax, %ecx /* ecx = (end of SYSENTER_stack) - esp */ - cmpl $SIZEOF_SYSENTER_stack, %ecx + addl $CPU_ENTRY_AREA_entry_stack + SIZEOF_entry_stack, %ecx + subl %eax, %ecx /* ecx = (end of entry_stack) - esp */ + cmpl $SIZEOF_entry_stack, %ecx jb .Ldebug_from_sysenter_stack TRACE_IRQS_OFF @@ -986,9 +986,9 @@ ENTRY(nmi) /* Are we currently on the SYSENTER stack? */ movl PER_CPU_VAR(cpu_entry_area), %ecx - addl $CPU_ENTRY_AREA_SYSENTER_stack + SIZEOF_SYSENTER_stack, %ecx - subl %eax, %ecx /* ecx = (end of SYSENTER_stack) - esp */ - cmpl $SIZEOF_SYSENTER_stack, %ecx + addl $CPU_ENTRY_AREA_entry_stack + SIZEOF_entry_stack, %ecx + subl %eax, %ecx /* ecx = (end of entry_stack) - esp */ + cmpl $SIZEOF_entry_stack, %ecx jb .Lnmi_from_sysenter_stack /* Not on SYSENTER stack. */ diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 423885bee398..3d19c830e1b1 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -158,8 +158,8 @@ END(native_usergs_sysret64) _entry_trampoline - CPU_ENTRY_AREA_entry_trampoline(%rip) /* The top word of the SYSENTER stack is hot and is usable as scratch space. */ -#define RSP_SCRATCH CPU_ENTRY_AREA_SYSENTER_stack + \ - SIZEOF_SYSENTER_stack - 8 + CPU_ENTRY_AREA +#define RSP_SCRATCH CPU_ENTRY_AREA_entry_stack + \ + SIZEOF_entry_stack - 8 + CPU_ENTRY_AREA ENTRY(entry_SYSCALL_64_trampoline) UNWIND_HINT_EMPTY diff --git a/arch/x86/entry/vsyscall/vsyscall_64.c b/arch/x86/entry/vsyscall/vsyscall_64.c index f279ba2643dc..1faf40f2dda9 100644 --- a/arch/x86/entry/vsyscall/vsyscall_64.c +++ b/arch/x86/entry/vsyscall/vsyscall_64.c @@ -37,6 +37,7 @@ #include <asm/unistd.h> #include <asm/fixmap.h> #include <asm/traps.h> +#include <asm/paravirt.h> #define CREATE_TRACE_POINTS #include "vsyscall_trace.h" @@ -138,6 +139,10 @@ bool emulate_vsyscall(struct pt_regs *regs, unsigned long address) WARN_ON_ONCE(address != regs->ip); + /* This should be unreachable in NATIVE mode. */ + if (WARN_ON(vsyscall_mode == NATIVE)) + return false; + if (vsyscall_mode == NONE) { warn_bad_vsyscall(KERN_INFO, regs, "vsyscall attempted with vsyscall=none"); @@ -329,16 +334,47 @@ int in_gate_area_no_mm(unsigned long addr) return vsyscall_mode != NONE && (addr & PAGE_MASK) == VSYSCALL_ADDR; } +/* + * The VSYSCALL page is the only user-accessible page in the kernel address + * range. Normally, the kernel page tables can have _PAGE_USER clear, but + * the tables covering VSYSCALL_ADDR need _PAGE_USER set if vsyscalls + * are enabled. + * + * Some day we may create a "minimal" vsyscall mode in which we emulate + * vsyscalls but leave the page not present. If so, we skip calling + * this. + */ +static void __init set_vsyscall_pgtable_user_bits(void) +{ + pgd_t *pgd; + p4d_t *p4d; + pud_t *pud; + pmd_t *pmd; + + pgd = pgd_offset_k(VSYSCALL_ADDR); + set_pgd(pgd, __pgd(pgd_val(*pgd) | _PAGE_USER)); + p4d = p4d_offset(pgd, VSYSCALL_ADDR); +#if CONFIG_PGTABLE_LEVELS >= 5 + p4d->p4d |= _PAGE_USER; +#endif + pud = pud_offset(p4d, VSYSCALL_ADDR); + set_pud(pud, __pud(pud_val(*pud) | _PAGE_USER)); + pmd = pmd_offset(pud, VSYSCALL_ADDR); + set_pmd(pmd, __pmd(pmd_val(*pmd) | _PAGE_USER)); +} + void __init map_vsyscall(void) { extern char __vsyscall_page; unsigned long physaddr_vsyscall = __pa_symbol(&__vsyscall_page); - if (vsyscall_mode != NONE) + if (vsyscall_mode != NONE) { __set_fixmap(VSYSCALL_PAGE, physaddr_vsyscall, vsyscall_mode == NATIVE ? PAGE_KERNEL_VSYSCALL : PAGE_KERNEL_VVAR); + set_vsyscall_pgtable_user_bits(); + } BUILD_BUG_ON((unsigned long)__fix_to_virt(VSYSCALL_PAGE) != (unsigned long)VSYSCALL_ADDR); diff --git a/arch/x86/include/asm/cpu_entry_area.h b/arch/x86/include/asm/cpu_entry_area.h new file mode 100644 index 000000000000..2fbc69a0916e --- /dev/null +++ b/arch/x86/include/asm/cpu_entry_area.h @@ -0,0 +1,68 @@ +// SPDX-License-Identifier: GPL-2.0 + +#ifndef _ASM_X86_CPU_ENTRY_AREA_H +#define _ASM_X86_CPU_ENTRY_AREA_H + +#include <linux/percpu-defs.h> +#include <asm/processor.h> + +/* + * cpu_entry_area is a percpu region that contains things needed by the CPU + * and early entry/exit code. Real types aren't used for all fields here + * to avoid circular header dependencies. + * + * Every field is a virtual alias of some other allocated backing store. + * There is no direct allocation of a struct cpu_entry_area. + */ +struct cpu_entry_area { + char gdt[PAGE_SIZE]; + + /* + * The GDT is just below entry_stack and thus serves (on x86_64) as + * a a read-only guard page. + */ + struct entry_stack_page entry_stack_page; + + /* + * On x86_64, the TSS is mapped RO. On x86_32, it's mapped RW because + * we need task switches to work, and task switches write to the TSS. + */ + struct tss_struct tss; + + char entry_trampoline[PAGE_SIZE]; + +#ifdef CONFIG_X86_64 + /* + * Exception stacks used for IST entries. + * + * In the future, this should have a separate slot for each stack + * with guard pages between them. + */ + char exception_stacks[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ]; +#endif +}; + +#define CPU_ENTRY_AREA_SIZE (sizeof(struct cpu_entry_area)) +#define CPU_ENTRY_AREA_TOT_SIZE (CPU_ENTRY_AREA_SIZE * NR_CPUS) + +DECLARE_PER_CPU(struct cpu_entry_area *, cpu_entry_area); + +extern void setup_cpu_entry_areas(void); +extern void cea_set_pte(void *cea_vaddr, phys_addr_t pa, pgprot_t flags); + +#define CPU_ENTRY_AREA_RO_IDT CPU_ENTRY_AREA_BASE +#define CPU_ENTRY_AREA_PER_CPU (CPU_ENTRY_AREA_RO_IDT + PAGE_SIZE) + +#define CPU_ENTRY_AREA_RO_IDT_VADDR ((void *)CPU_ENTRY_AREA_RO_IDT) + +#define CPU_ENTRY_AREA_MAP_SIZE \ + (CPU_ENTRY_AREA_PER_CPU + CPU_ENTRY_AREA_TOT_SIZE - CPU_ENTRY_AREA_BASE) + +extern struct cpu_entry_area *get_cpu_entry_area(int cpu); + +static inline struct entry_stack *cpu_entry_stack(int cpu) +{ + return &get_cpu_entry_area(cpu)->entry_stack_page.stack; +} + +#endif diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h index aab4fe9f49f8..ec8be07c0cda 100644 --- a/arch/x86/include/asm/desc.h +++ b/arch/x86/include/asm/desc.h @@ -7,6 +7,7 @@ #include <asm/mmu.h> #include <asm/fixmap.h> #include <asm/irq_vectors.h> +#include <asm/cpu_entry_area.h> #include <linux/smp.h> #include <linux/percpu.h> diff --git a/arch/x86/include/asm/espfix.h b/arch/x86/include/asm/espfix.h index 0211029076ea..6777480d8a42 100644 --- a/arch/x86/include/asm/espfix.h +++ b/arch/x86/include/asm/espfix.h @@ -2,7 +2,7 @@ #ifndef _ASM_X86_ESPFIX_H #define _ASM_X86_ESPFIX_H -#ifdef CONFIG_X86_64 +#ifdef CONFIG_X86_ESPFIX64 #include <asm/percpu.h> @@ -11,7 +11,8 @@ DECLARE_PER_CPU_READ_MOSTLY(unsigned long, espfix_waddr); extern void init_espfix_bsp(void); extern void init_espfix_ap(int cpu); - -#endif /* CONFIG_X86_64 */ +#else +static inline void init_espfix_ap(int cpu) { } +#endif #endif /* _ASM_X86_ESPFIX_H */ diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h index 94fc4fa14127..64c4a30e0d39 100644 --- a/arch/x86/include/asm/fixmap.h +++ b/arch/x86/include/asm/fixmap.h @@ -45,46 +45,6 @@ extern unsigned long __FIXADDR_TOP; #endif /* - * cpu_entry_area is a percpu region in the fixmap that contains things - * needed by the CPU and early entry/exit code. Real types aren't used - * for all fields here to avoid circular header dependencies. - * - * Every field is a virtual alias of some other allocated backing store. - * There is no direct allocation of a struct cpu_entry_area. - */ -struct cpu_entry_area { - char gdt[PAGE_SIZE]; - - /* - * The GDT is just below SYSENTER_stack and thus serves (on x86_64) as - * a a read-only guard page. - */ - struct SYSENTER_stack_page SYSENTER_stack_page; - - /* - * On x86_64, the TSS is mapped RO. On x86_32, it's mapped RW because - * we need task switches to work, and task switches write to the TSS. - */ - struct tss_struct tss; - - char entry_trampoline[PAGE_SIZE]; - -#ifdef CONFIG_X86_64 - /* - * Exception stacks used for IST entries. - * - * In the future, this should have a separate slot for each stack - * with guard pages between them. - */ - char exception_stacks[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ]; -#endif -}; - -#define CPU_ENTRY_AREA_PAGES (sizeof(struct cpu_entry_area) / PAGE_SIZE) - -extern void setup_cpu_entry_areas(void); - -/* * Here we define all the compile-time 'special' virtual * addresses. The point is to have a constant address at * compile time, but to set the physical address only @@ -123,7 +83,6 @@ enum fixed_addresses { FIX_IO_APIC_BASE_0, FIX_IO_APIC_BASE_END = FIX_IO_APIC_BASE_0 + MAX_IO_APICS - 1, #endif - FIX_RO_IDT, /* Virtual mapping for read-only IDT */ #ifdef CONFIG_X86_32 FIX_KMAP_BEGIN, /* reserved pte's for temporary kernel mappings */ FIX_KMAP_END = FIX_KMAP_BEGIN+(KM_TYPE_NR*NR_CPUS)-1, @@ -139,9 +98,6 @@ enum fixed_addresses { #ifdef CONFIG_X86_INTEL_MID FIX_LNW_VRTC, #endif - /* Fixmap entries to remap the GDTs, one per processor. */ - FIX_CPU_ENTRY_AREA_TOP, - FIX_CPU_ENTRY_AREA_BOTTOM = FIX_CPU_ENTRY_AREA_TOP + (CPU_ENTRY_AREA_PAGES * NR_CPUS) - 1, #ifdef CONFIG_ACPI_APEI_GHES /* Used for GHES mapping from assorted contexts */ @@ -182,7 +138,7 @@ enum fixed_addresses { extern void reserve_top_address(unsigned long reserve); #define FIXADDR_SIZE (__end_of_permanent_fixed_addresses << PAGE_SHIFT) -#define FIXADDR_START (FIXADDR_TOP - FIXADDR_SIZE) +#define FIXADDR_START (FIXADDR_TOP - FIXADDR_SIZE) extern int fixmaps_set; @@ -230,30 +186,5 @@ void __init *early_memremap_decrypted_wp(resource_size_t phys_addr, void __early_set_fixmap(enum fixed_addresses idx, phys_addr_t phys, pgprot_t flags); -static inline unsigned int __get_cpu_entry_area_page_index(int cpu, int page) -{ - BUILD_BUG_ON(sizeof(struct cpu_entry_area) % PAGE_SIZE != 0); - - return FIX_CPU_ENTRY_AREA_BOTTOM - cpu*CPU_ENTRY_AREA_PAGES - page; -} - -#define __get_cpu_entry_area_offset_index(cpu, offset) ({ \ - BUILD_BUG_ON(offset % PAGE_SIZE != 0); \ - __get_cpu_entry_area_page_index(cpu, offset / PAGE_SIZE); \ - }) - -#define get_cpu_entry_area_index(cpu, field) \ - __get_cpu_entry_area_offset_index((cpu), offsetof(struct cpu_entry_area, field)) - -static inline struct cpu_entry_area *get_cpu_entry_area(int cpu) -{ - return (struct cpu_entry_area *)__fix_to_virt(__get_cpu_entry_area_page_index(cpu, 0)); -} - -static inline struct SYSENTER_stack *cpu_SYSENTER_stack(int cpu) -{ - return &get_cpu_entry_area(cpu)->SYSENTER_stack_page.stack; -} - #endif /* !__ASSEMBLY__ */ #endif /* _ASM_X86_FIXMAP_H */ diff --git a/arch/x86/include/asm/invpcid.h b/arch/x86/include/asm/invpcid.h new file mode 100644 index 000000000000..989cfa86de85 --- /dev/null +++ b/arch/x86/include/asm/invpcid.h @@ -0,0 +1,53 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_INVPCID +#define _ASM_X86_INVPCID + +static inline void __invpcid(unsigned long pcid, unsigned long addr, + unsigned long type) +{ + struct { u64 d[2]; } desc = { { pcid, addr } }; + + /* + * The memory clobber is because the whole point is to invalidate + * stale TLB entries and, especially if we're flushing global + * mappings, we don't want the compiler to reorder any subsequent + * memory accesses before the TLB flush. + * + * The hex opcode is invpcid (%ecx), %eax in 32-bit mode and + * invpcid (%rcx), %rax in long mode. + */ + asm volatile (".byte 0x66, 0x0f, 0x38, 0x82, 0x01" + : : "m" (desc), "a" (type), "c" (&desc) : "memory"); +} + +#define INVPCID_TYPE_INDIV_ADDR 0 +#define INVPCID_TYPE_SINGLE_CTXT 1 +#define INVPCID_TYPE_ALL_INCL_GLOBAL 2 +#define INVPCID_TYPE_ALL_NON_GLOBAL 3 + +/* Flush all mappings for a given pcid and addr, not including globals. */ +static inline void invpcid_flush_one(unsigned long pcid, + unsigned long addr) +{ + __invpcid(pcid, addr, INVPCID_TYPE_INDIV_ADDR); +} + +/* Flush all mappings for a given PCID, not including globals. */ +static inline void invpcid_flush_single_context(unsigned long pcid) +{ + __invpcid(pcid, 0, INVPCID_TYPE_SINGLE_CTXT); +} + +/* Flush all mappings, including globals, for all PCIDs. */ +static inline void invpcid_flush_all(void) +{ + __invpcid(0, 0, INVPCID_TYPE_ALL_INCL_GLOBAL); +} + +/* Flush all mappings for all PCIDs except globals. */ +static inline void invpcid_flush_all_nonglobals(void) +{ + __invpcid(0, 0, INVPCID_TYPE_ALL_NON_GLOBAL); +} + +#endif /* _ASM_X86_INVPCID */ diff --git a/arch/x86/include/asm/mmu.h b/arch/x86/include/asm/mmu.h index 9ea26f167497..5ff3e8af2c20 100644 --- a/arch/x86/include/asm/mmu.h +++ b/arch/x86/include/asm/mmu.h @@ -3,6 +3,7 @@ #define _ASM_X86_MMU_H #include <linux/spinlock.h> +#include <linux/rwsem.h> #include <linux/mutex.h> #include <linux/atomic.h> @@ -27,7 +28,8 @@ typedef struct { atomic64_t tlb_gen; #ifdef CONFIG_MODIFY_LDT_SYSCALL - struct ldt_struct *ldt; + struct rw_semaphore ldt_usr_sem; + struct ldt_struct *ldt; #endif #ifdef CONFIG_X86_64 diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h index 6d16d15d09a0..5ede7cae1d67 100644 --- a/arch/x86/include/asm/mmu_context.h +++ b/arch/x86/include/asm/mmu_context.h @@ -57,11 +57,17 @@ struct ldt_struct { /* * Used for LDT copy/destruction. */ -int init_new_context_ldt(struct task_struct *tsk, struct mm_struct *mm); +static inline void init_new_context_ldt(struct mm_struct *mm) +{ + mm->context.ldt = NULL; + init_rwsem(&mm->context.ldt_usr_sem); +} +int ldt_dup_context(struct mm_struct *oldmm, struct mm_struct *mm); void destroy_context_ldt(struct mm_struct *mm); #else /* CONFIG_MODIFY_LDT_SYSCALL */ -static inline int init_new_context_ldt(struct task_struct *tsk, - struct mm_struct *mm) +static inline void init_new_context_ldt(struct mm_struct *mm) { } +static inline int ldt_dup_context(struct mm_struct *oldmm, + struct mm_struct *mm) { return 0; } @@ -132,18 +138,21 @@ void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk); static inline int init_new_context(struct task_struct *tsk, struct mm_struct *mm) { + mutex_init(&mm->context.lock); + mm->context.ctx_id = atomic64_inc_return(&last_mm_ctx_id); atomic64_set(&mm->context.tlb_gen, 0); - #ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS +#ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS if (cpu_feature_enabled(X86_FEATURE_OSPKE)) { /* pkey 0 is the default and always allocated */ mm->context.pkey_allocation_map = 0x1; /* -1 means unallocated or invalid */ mm->context.execute_only_pkey = -1; } - #endif - return init_new_context_ldt(tsk, mm); +#endif + init_new_context_ldt(mm); + return 0; } static inline void destroy_context(struct mm_struct *mm) { @@ -176,10 +185,10 @@ do { \ } while (0) #endif -static inline void arch_dup_mmap(struct mm_struct *oldmm, - struct mm_struct *mm) +static inline int arch_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm) { paravirt_arch_dup_mmap(oldmm, mm); + return ldt_dup_context(oldmm, mm); } static inline void arch_exit_mmap(struct mm_struct *mm) @@ -282,33 +291,6 @@ static inline bool arch_vma_access_permitted(struct vm_area_struct *vma, } /* - * If PCID is on, ASID-aware code paths put the ASID+1 into the PCID - * bits. This serves two purposes. It prevents a nasty situation in - * which PCID-unaware code saves CR3, loads some other value (with PCID - * == 0), and then restores CR3, thus corrupting the TLB for ASID 0 if - * the saved ASID was nonzero. It also means that any bugs involving - * loading a PCID-enabled CR3 with CR4.PCIDE off will trigger - * deterministically. - */ - -static inline unsigned long build_cr3(struct mm_struct *mm, u16 asid) -{ - if (static_cpu_has(X86_FEATURE_PCID)) { - VM_WARN_ON_ONCE(asid > 4094); - return __sme_pa(mm->pgd) | (asid + 1); - } else { - VM_WARN_ON_ONCE(asid != 0); - return __sme_pa(mm->pgd); - } -} - -static inline unsigned long build_cr3_noflush(struct mm_struct *mm, u16 asid) -{ - VM_WARN_ON_ONCE(asid > 4094); - return __sme_pa(mm->pgd) | (asid + 1) | CR3_NOFLUSH; -} - -/* * This can be used from process context to figure out what the value of * CR3 is without needing to do a (slow) __read_cr3(). * @@ -317,7 +299,7 @@ static inline unsigned long build_cr3_noflush(struct mm_struct *mm, u16 asid) */ static inline unsigned long __get_current_cr3_fast(void) { - unsigned long cr3 = build_cr3(this_cpu_read(cpu_tlbstate.loaded_mm), + unsigned long cr3 = build_cr3(this_cpu_read(cpu_tlbstate.loaded_mm)->pgd, this_cpu_read(cpu_tlbstate.loaded_mm_asid)); /* For now, be very restrictive about when this can be called. */ diff --git a/arch/x86/include/asm/pgtable_32_types.h b/arch/x86/include/asm/pgtable_32_types.h index f2ca9b28fd68..ce245b0cdfca 100644 --- a/arch/x86/include/asm/pgtable_32_types.h +++ b/arch/x86/include/asm/pgtable_32_types.h @@ -38,13 +38,22 @@ extern bool __vmalloc_start_set; /* set once high_memory is set */ #define LAST_PKMAP 1024 #endif -#define PKMAP_BASE ((FIXADDR_START - PAGE_SIZE * (LAST_PKMAP + 1)) \ - & PMD_MASK) +/* + * Define this here and validate with BUILD_BUG_ON() in pgtable_32.c + * to avoid include recursion hell + */ +#define CPU_ENTRY_AREA_PAGES (NR_CPUS * 40) + +#define CPU_ENTRY_AREA_BASE \ + ((FIXADDR_START - PAGE_SIZE * (CPU_ENTRY_AREA_PAGES + 1)) & PMD_MASK) + +#define PKMAP_BASE \ + ((CPU_ENTRY_AREA_BASE - PAGE_SIZE) & PMD_MASK) #ifdef CONFIG_HIGHMEM # define VMALLOC_END (PKMAP_BASE - 2 * PAGE_SIZE) #else -# define VMALLOC_END (FIXADDR_START - 2 * PAGE_SIZE) +# define VMALLOC_END (CPU_ENTRY_AREA_BASE - 2 * PAGE_SIZE) #endif #define MODULES_VADDR VMALLOC_START diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h index 6d5f45dcd4a1..3d27831bc58d 100644 --- a/arch/x86/include/asm/pgtable_64_types.h +++ b/arch/x86/include/asm/pgtable_64_types.h @@ -76,32 +76,41 @@ typedef struct { pteval_t pte; } pte_t; #define PGDIR_MASK (~(PGDIR_SIZE - 1)) /* See Documentation/x86/x86_64/mm.txt for a description of the memory map. */ -#define MAXMEM _AC(__AC(1, UL) << MAX_PHYSMEM_BITS, UL) +#define MAXMEM _AC(__AC(1, UL) << MAX_PHYSMEM_BITS, UL) + #ifdef CONFIG_X86_5LEVEL -#define VMALLOC_SIZE_TB _AC(16384, UL) -#define __VMALLOC_BASE _AC(0xff92000000000000, UL) -#define __VMEMMAP_BASE _AC(0xffd4000000000000, UL) +# define VMALLOC_SIZE_TB _AC(16384, UL) +# define __VMALLOC_BASE _AC(0xff92000000000000, UL) +# define __VMEMMAP_BASE _AC(0xffd4000000000000, UL) #else -#define VMALLOC_SIZE_TB _AC(32, UL) -#define __VMALLOC_BASE _AC(0xffffc90000000000, UL) -#define __VMEMMAP_BASE _AC(0xffffea0000000000, UL) +# define VMALLOC_SIZE_TB _AC(32, UL) +# define __VMALLOC_BASE _AC(0xffffc90000000000, UL) +# define __VMEMMAP_BASE _AC(0xffffea0000000000, UL) #endif + #ifdef CONFIG_RANDOMIZE_MEMORY -#define VMALLOC_START vmalloc_base -#define VMEMMAP_START vmemmap_base +# define VMALLOC_START vmalloc_base +# define VMEMMAP_START vmemmap_base #else -#define VMALLOC_START __VMALLOC_BASE -#define VMEMMAP_START __VMEMMAP_BASE +# define VMALLOC_START __VMALLOC_BASE +# define VMEMMAP_START __VMEMMAP_BASE #endif /* CONFIG_RANDOMIZE_MEMORY */ -#define VMALLOC_END (VMALLOC_START + _AC((VMALLOC_SIZE_TB << 40) - 1, UL)) -#define MODULES_VADDR (__START_KERNEL_map + KERNEL_IMAGE_SIZE) + +#define VMALLOC_END (VMALLOC_START + _AC((VMALLOC_SIZE_TB << 40) - 1, UL)) + +#define MODULES_VADDR (__START_KERNEL_map + KERNEL_IMAGE_SIZE) /* The module sections ends with the start of the fixmap */ -#define MODULES_END __fix_to_virt(__end_of_fixed_addresses + 1) -#define MODULES_LEN (MODULES_END - MODULES_VADDR) -#define ESPFIX_PGD_ENTRY _AC(-2, UL) -#define ESPFIX_BASE_ADDR (ESPFIX_PGD_ENTRY << P4D_SHIFT) -#define EFI_VA_START ( -4 * (_AC(1, UL) << 30)) -#define EFI_VA_END (-68 * (_AC(1, UL) << 30)) +#define MODULES_END __fix_to_virt(__end_of_fixed_addresses + 1) +#define MODULES_LEN (MODULES_END - MODULES_VADDR) + +#define ESPFIX_PGD_ENTRY _AC(-2, UL) +#define ESPFIX_BASE_ADDR (ESPFIX_PGD_ENTRY << P4D_SHIFT) + +#define CPU_ENTRY_AREA_PGD _AC(-3, UL) +#define CPU_ENTRY_AREA_BASE (CPU_ENTRY_AREA_PGD << P4D_SHIFT) + +#define EFI_VA_START ( -4 * (_AC(1, UL) << 30)) +#define EFI_VA_END (-68 * (_AC(1, UL) << 30)) #define EARLY_DYNAMIC_PAGE_TABLES 64 diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 1f2434ee9f80..cad8dab266bc 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -337,12 +337,12 @@ struct x86_hw_tss { #define IO_BITMAP_OFFSET (offsetof(struct tss_struct, io_bitmap) - offsetof(struct tss_struct, x86_tss)) #define INVALID_IO_BITMAP_OFFSET 0x8000 -struct SYSENTER_stack { +struct entry_stack { unsigned long words[64]; }; -struct SYSENTER_stack_page { - struct SYSENTER_stack stack; +struct entry_stack_page { + struct entry_stack stack; } __aligned(PAGE_SIZE); struct tss_struct { diff --git a/arch/x86/include/asm/stacktrace.h b/arch/x86/include/asm/stacktrace.h index f8062bfd43a0..f73706878772 100644 --- a/arch/x86/include/asm/stacktrace.h +++ b/arch/x86/include/asm/stacktrace.h @@ -16,7 +16,7 @@ enum stack_type { STACK_TYPE_TASK, STACK_TYPE_IRQ, STACK_TYPE_SOFTIRQ, - STACK_TYPE_SYSENTER, + STACK_TYPE_ENTRY, STACK_TYPE_EXCEPTION, STACK_TYPE_EXCEPTION_LAST = STACK_TYPE_EXCEPTION + N_EXCEPTION_STACKS-1, }; @@ -29,7 +29,7 @@ struct stack_info { bool in_task_stack(unsigned long *stack, struct task_struct *task, struct stack_info *info); -bool in_sysenter_stack(unsigned long *stack, struct stack_info *info); +bool in_entry_stack(unsigned long *stack, struct stack_info *info); int get_stack_info(unsigned long *stack, struct task_struct *task, struct stack_info *info, unsigned long *visit_mask); diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h index 877b5c1a1b12..e1884cf35257 100644 --- a/arch/x86/include/asm/tlbflush.h +++ b/arch/x86/include/asm/tlbflush.h @@ -9,70 +9,66 @@ #include <asm/cpufeature.h> #include <asm/special_insns.h> #include <asm/smp.h> +#include <asm/invpcid.h> -static inline void __invpcid(unsigned long pcid, unsigned long addr, - unsigned long type) +static inline u64 inc_mm_tlb_gen(struct mm_struct *mm) { - struct { u64 d[2]; } desc = { { pcid, addr } }; - /* - * The memory clobber is because the whole point is to invalidate - * stale TLB entries and, especially if we're flushing global - * mappings, we don't want the compiler to reorder any subsequent - * memory accesses before the TLB flush. - * - * The hex opcode is invpcid (%ecx), %eax in 32-bit mode and - * invpcid (%rcx), %rax in long mode. + * Bump the generation count. This also serves as a full barrier + * that synchronizes with switch_mm(): callers are required to order + * their read of mm_cpumask after their writes to the paging + * structures. */ - asm volatile (".byte 0x66, 0x0f, 0x38, 0x82, 0x01" - : : "m" (desc), "a" (type), "c" (&desc) : "memory"); + return atomic64_inc_return(&mm->context.tlb_gen); } -#define INVPCID_TYPE_INDIV_ADDR 0 -#define INVPCID_TYPE_SINGLE_CTXT 1 -#define INVPCID_TYPE_ALL_INCL_GLOBAL 2 -#define INVPCID_TYPE_ALL_NON_GLOBAL 3 +/* There are 12 bits of space for ASIDS in CR3 */ +#define CR3_HW_ASID_BITS 12 +/* + * When enabled, PAGE_TABLE_ISOLATION consumes a single bit for + * user/kernel switches + */ +#define PTI_CONSUMED_ASID_BITS 0 -/* Flush all mappings for a given pcid and addr, not including globals. */ -static inline void invpcid_flush_one(unsigned long pcid, - unsigned long addr) -{ - __invpcid(pcid, addr, INVPCID_TYPE_INDIV_ADDR); -} +#define CR3_AVAIL_ASID_BITS (CR3_HW_ASID_BITS - PTI_CONSUMED_ASID_BITS) +/* + * ASIDs are zero-based: 0->MAX_AVAIL_ASID are valid. -1 below to account + * for them being zero-based. Another -1 is because ASID 0 is reserved for + * use by non-PCID-aware users. + */ +#define MAX_ASID_AVAILABLE ((1 << CR3_AVAIL_ASID_BITS) - 2) -/* Flush all mappings for a given PCID, not including globals. */ -static inline void invpcid_flush_single_context(unsigned long pcid) +static inline u16 kern_pcid(u16 asid) { - __invpcid(pcid, 0, INVPCID_TYPE_SINGLE_CTXT); + VM_WARN_ON_ONCE(asid > MAX_ASID_AVAILABLE); + /* + * If PCID is on, ASID-aware code paths put the ASID+1 into the + * PCID bits. This serves two purposes. It prevents a nasty + * situation in which PCID-unaware code saves CR3, loads some other + * value (with PCID == 0), and then restores CR3, thus corrupting + * the TLB for ASID 0 if the saved ASID was nonzero. It also means + * that any bugs involving loading a PCID-enabled CR3 with + * CR4.PCIDE off will trigger deterministically. + */ + return asid + 1; } -/* Flush all mappings, including globals, for all PCIDs. */ -static inline void invpcid_flush_all(void) +struct pgd_t; +static inline unsigned long build_cr3(pgd_t *pgd, u16 asid) { - __invpcid(0, 0, INVPCID_TYPE_ALL_INCL_GLOBAL); + if (static_cpu_has(X86_FEATURE_PCID)) { + return __sme_pa(pgd) | kern_pcid(asid); + } else { + VM_WARN_ON_ONCE(asid != 0); + return __sme_pa(pgd); + } } -/* Flush all mappings for all PCIDs except globals. */ -static inline void invpcid_flush_all_nonglobals(void) +static inline unsigned long build_cr3_noflush(pgd_t *pgd, u16 asid) { - __invpcid(0, 0, INVPCID_TYPE_ALL_NON_GLOBAL); -} - -static inline u64 inc_mm_tlb_gen(struct mm_struct *mm) -{ - u64 new_tlb_gen; - - /* - * Bump the generation count. This also serves as a full barrier - * that synchronizes with switch_mm(): callers are required to order - * their read of mm_cpumask after their writes to the paging - * structures. - */ - smp_mb__before_atomic(); - new_tlb_gen = atomic64_inc_return(&mm->context.tlb_gen); - smp_mb__after_atomic(); - - return new_tlb_gen; + VM_WARN_ON_ONCE(asid > MAX_ASID_AVAILABLE); + VM_WARN_ON_ONCE(!this_cpu_has(X86_FEATURE_PCID)); + return __sme_pa(pgd) | kern_pcid(asid) | CR3_NOFLUSH; } #ifdef CONFIG_PARAVIRT @@ -237,6 +233,9 @@ static inline void cr4_set_bits_and_update_boot(unsigned long mask) extern void initialize_tlbstate_and_flush(void); +/* + * flush the entire current user mapping + */ static inline void __native_flush_tlb(void) { /* @@ -249,20 +248,12 @@ static inline void __native_flush_tlb(void) preempt_enable(); } -static inline void __native_flush_tlb_global_irq_disabled(void) -{ - unsigned long cr4; - - cr4 = this_cpu_read(cpu_tlbstate.cr4); - /* clear PGE */ - native_write_cr4(cr4 & ~X86_CR4_PGE); - /* write old PGE again and flush TLBs */ - native_write_cr4(cr4); -} - +/* + * flush everything + */ static inline void __native_flush_tlb_global(void) { - unsigned long flags; + unsigned long cr4, flags; if (static_cpu_has(X86_FEATURE_INVPCID)) { /* @@ -280,22 +271,36 @@ static inline void __native_flush_tlb_global(void) */ raw_local_irq_save(flags); - __native_flush_tlb_global_irq_disabled(); + cr4 = this_cpu_read(cpu_tlbstate.cr4); + /* toggle PGE */ + native_write_cr4(cr4 ^ X86_CR4_PGE); + /* write old PGE again and flush TLBs */ + native_write_cr4(cr4); raw_local_irq_restore(flags); } +/* + * flush one page in the user mapping + */ static inline void __native_flush_tlb_single(unsigned long addr) { asm volatile("invlpg (%0)" ::"r" (addr) : "memory"); } +/* + * flush everything + */ static inline void __flush_tlb_all(void) { - if (boot_cpu_has(X86_FEATURE_PGE)) + if (boot_cpu_has(X86_FEATURE_PGE)) { __flush_tlb_global(); - else + } else { + /* + * !PGE -> !PCID (setup_pcid()), thus every flush is total. + */ __flush_tlb(); + } /* * Note: if we somehow had PCID but not PGE, then this wouldn't work -- @@ -306,6 +311,9 @@ static inline void __flush_tlb_all(void) */ } +/* + * flush one page in the kernel mapping + */ static inline void __flush_tlb_one(unsigned long addr) { count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ONE); diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c index cd360a5e0dca..676b7cf4b62b 100644 --- a/arch/x86/kernel/asm-offsets.c +++ b/arch/x86/kernel/asm-offsets.c @@ -97,6 +97,6 @@ void common(void) { /* Layout info for cpu_entry_area */ OFFSET(CPU_ENTRY_AREA_tss, cpu_entry_area, tss); OFFSET(CPU_ENTRY_AREA_entry_trampoline, cpu_entry_area, entry_trampoline); - OFFSET(CPU_ENTRY_AREA_SYSENTER_stack, cpu_entry_area, SYSENTER_stack_page); - DEFINE(SIZEOF_SYSENTER_stack, sizeof(struct SYSENTER_stack)); + OFFSET(CPU_ENTRY_AREA_entry_stack, cpu_entry_area, entry_stack_page); + DEFINE(SIZEOF_entry_stack, sizeof(struct entry_stack)); } diff --git a/arch/x86/kernel/asm-offsets_32.c b/arch/x86/kernel/asm-offsets_32.c index 7d20d9c0b3d6..fa1261eefa16 100644 --- a/arch/x86/kernel/asm-offsets_32.c +++ b/arch/x86/kernel/asm-offsets_32.c @@ -48,7 +48,7 @@ void foo(void) /* Offset from the sysenter stack to tss.sp0 */ DEFINE(TSS_sysenter_sp0, offsetof(struct cpu_entry_area, tss.x86_tss.sp0) - - offsetofend(struct cpu_entry_area, SYSENTER_stack_page.stack)); + offsetofend(struct cpu_entry_area, entry_stack_page.stack)); #ifdef CONFIG_CC_STACKPROTECTOR BLANK(); diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 7416da3ec4df..c9757f07d738 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -506,102 +506,8 @@ static const unsigned int exception_stack_sizes[N_EXCEPTION_STACKS] = { [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STKSZ, [DEBUG_STACK - 1] = DEBUG_STKSZ }; - -static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks - [(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ]); -#endif - -static DEFINE_PER_CPU_PAGE_ALIGNED(struct SYSENTER_stack_page, - SYSENTER_stack_storage); - -static void __init -set_percpu_fixmap_pages(int idx, void *ptr, int pages, pgprot_t prot) -{ - for ( ; pages; pages--, idx--, ptr += PAGE_SIZE) - __set_fixmap(idx, per_cpu_ptr_to_phys(ptr), prot); -} - -/* Setup the fixmap mappings only once per-processor */ -static void __init setup_cpu_entry_area(int cpu) -{ -#ifdef CONFIG_X86_64 - extern char _entry_trampoline[]; - - /* On 64-bit systems, we use a read-only fixmap GDT and TSS. */ - pgprot_t gdt_prot = PAGE_KERNEL_RO; - pgprot_t tss_prot = PAGE_KERNEL_RO; -#else - /* - * On native 32-bit systems, the GDT cannot be read-only because - * our double fault handler uses a task gate, and entering through - * a task gate needs to change an available TSS to busy. If the - * GDT is read-only, that will triple fault. The TSS cannot be - * read-only because the CPU writes to it on task switches. - * - * On Xen PV, the GDT must be read-only because the hypervisor - * requires it. - */ - pgprot_t gdt_prot = boot_cpu_has(X86_FEATURE_XENPV) ? - PAGE_KERNEL_RO : PAGE_KERNEL; - pgprot_t tss_prot = PAGE_KERNEL; -#endif - - __set_fixmap(get_cpu_entry_area_index(cpu, gdt), get_cpu_gdt_paddr(cpu), gdt_prot); - set_percpu_fixmap_pages(get_cpu_entry_area_index(cpu, SYSENTER_stack_page), - per_cpu_ptr(&SYSENTER_stack_storage, cpu), 1, - PAGE_KERNEL); - - /* - * The Intel SDM says (Volume 3, 7.2.1): - * - * Avoid placing a page boundary in the part of the TSS that the - * processor reads during a task switch (the first 104 bytes). The - * processor may not correctly perform address translations if a - * boundary occurs in this area. During a task switch, the processor - * reads and writes into the first 104 bytes of each TSS (using - * contiguous physical addresses beginning with the physical address - * of the first byte of the TSS). So, after TSS access begins, if - * part of the 104 bytes is not physically contiguous, the processor - * will access incorrect information without generating a page-fault - * exception. - * - * There are also a lot of errata involving the TSS spanning a page - * boundary. Assert that we're not doing that. - */ - BUILD_BUG_ON((offsetof(struct tss_struct, x86_tss) ^ - offsetofend(struct tss_struct, x86_tss)) & PAGE_MASK); - BUILD_BUG_ON(sizeof(struct tss_struct) % PAGE_SIZE != 0); - set_percpu_fixmap_pages(get_cpu_entry_area_index(cpu, tss), - &per_cpu(cpu_tss_rw, cpu), - sizeof(struct tss_struct) / PAGE_SIZE, - tss_prot); - -#ifdef CONFIG_X86_32 - per_cpu(cpu_entry_area, cpu) = get_cpu_entry_area(cpu); #endif -#ifdef CONFIG_X86_64 - BUILD_BUG_ON(sizeof(exception_stacks) % PAGE_SIZE != 0); - BUILD_BUG_ON(sizeof(exception_stacks) != - sizeof(((struct cpu_entry_area *)0)->exception_stacks)); - set_percpu_fixmap_pages(get_cpu_entry_area_index(cpu, exception_stacks), - &per_cpu(exception_stacks, cpu), - sizeof(exception_stacks) / PAGE_SIZE, - PAGE_KERNEL); - - __set_fixmap(get_cpu_entry_area_index(cpu, entry_trampoline), - __pa_symbol(_entry_trampoline), PAGE_KERNEL_RX); -#endif -} - -void __init setup_cpu_entry_areas(void) -{ - unsigned int cpu; - - for_each_possible_cpu(cpu) - setup_cpu_entry_area(cpu); -} - /* Load the original GDT from the per-cpu structure */ void load_direct_gdt(int cpu) { @@ -1348,7 +1254,7 @@ void enable_sep_cpu(void) tss->x86_tss.ss1 = __KERNEL_CS; wrmsr(MSR_IA32_SYSENTER_CS, tss->x86_tss.ss1, 0); - wrmsr(MSR_IA32_SYSENTER_ESP, (unsigned long)(cpu_SYSENTER_stack(cpu) + 1), 0); + wrmsr(MSR_IA32_SYSENTER_ESP, (unsigned long)(cpu_entry_stack(cpu) + 1), 0); wrmsr(MSR_IA32_SYSENTER_EIP, (unsigned long)entry_SYSENTER_32, 0); put_cpu(); @@ -1465,7 +1371,7 @@ void syscall_init(void) * AMD doesn't allow SYSENTER in long mode (either 32- or 64-bit). */ wrmsrl_safe(MSR_IA32_SYSENTER_CS, (u64)__KERNEL_CS); - wrmsrl_safe(MSR_IA32_SYSENTER_ESP, (unsigned long)(cpu_SYSENTER_stack(cpu) + 1)); + wrmsrl_safe(MSR_IA32_SYSENTER_ESP, (unsigned long)(cpu_entry_stack(cpu) + 1)); wrmsrl_safe(MSR_IA32_SYSENTER_EIP, (u64)entry_SYSENTER_compat); #else wrmsrl(MSR_CSTAR, (unsigned long)ignore_sysret); @@ -1680,7 +1586,7 @@ void cpu_init(void) */ set_tss_desc(cpu, &get_cpu_entry_area(cpu)->tss.x86_tss); load_TR_desc(); - load_sp0((unsigned long)(cpu_SYSENTER_stack(cpu) + 1)); + load_sp0((unsigned long)(cpu_entry_stack(cpu) + 1)); load_mm_ldt(&init_mm); diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c index 7dbcb7adf797..8ccdca6d3f9e 100644 --- a/arch/x86/kernel/cpu/microcode/intel.c +++ b/arch/x86/kernel/cpu/microcode/intel.c @@ -565,15 +565,6 @@ static void print_ucode(struct ucode_cpu_info *uci) } #else -/* - * Flush global tlb. We only do this in x86_64 where paging has been enabled - * already and PGE should be enabled as well. - */ -static inline void flush_tlb_early(void) -{ - __native_flush_tlb_global_irq_disabled(); -} - static inline void print_ucode(struct ucode_cpu_info *uci) { struct microcode_intel *mc; @@ -602,10 +593,6 @@ static int apply_microcode_early(struct ucode_cpu_info *uci, bool early) if (rev != mc->hdr.rev) return -1; -#ifdef CONFIG_X86_64 - /* Flush global tlb. This is precaution. */ - flush_tlb_early(); -#endif uci->cpu_sig.rev = rev; if (early) diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c index bbd6d986e2d0..36b17e0febe8 100644 --- a/arch/x86/kernel/dumpstack.c +++ b/arch/x86/kernel/dumpstack.c @@ -18,6 +18,7 @@ #include <linux/nmi.h> #include <linux/sysfs.h> +#include <asm/cpu_entry_area.h> #include <asm/stacktrace.h> #include <asm/unwind.h> @@ -43,9 +44,9 @@ bool in_task_stack(unsigned long *stack, struct task_struct *task, return true; } -bool in_sysenter_stack(unsigned long *stack, struct stack_info *info) +bool in_entry_stack(unsigned long *stack, struct stack_info *info) { - struct SYSENTER_stack *ss = cpu_SYSENTER_stack(smp_processor_id()); + struct entry_stack *ss = cpu_entry_stack(smp_processor_id()); void *begin = ss; void *end = ss + 1; @@ -53,7 +54,7 @@ bool in_sysenter_stack(unsigned long *stack, struct stack_info *info) if ((void *)stack < begin || (void *)stack >= end) return false; - info->type = STACK_TYPE_SYSENTER; + info->type = STACK_TYPE_ENTRY; info->begin = begin; info->end = end; info->next_sp = NULL; @@ -111,13 +112,13 @@ void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, * - task stack * - interrupt stack * - HW exception stacks (double fault, nmi, debug, mce) - * - SYSENTER stack + * - entry stack * * x86-32 can have up to four stacks: * - task stack * - softirq stack * - hardirq stack - * - SYSENTER stack + * - entry stack */ for (regs = NULL; stack; stack = PTR_ALIGN(stack_info.next_sp, sizeof(long))) { const char *stack_name; diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c index 5ff13a6b3680..04170f63e3a1 100644 --- a/arch/x86/kernel/dumpstack_32.c +++ b/arch/x86/kernel/dumpstack_32.c @@ -26,8 +26,8 @@ const char *stack_type_name(enum stack_type type) if (type == STACK_TYPE_SOFTIRQ) return "SOFTIRQ"; - if (type == STACK_TYPE_SYSENTER) - return "SYSENTER"; + if (type == STACK_TYPE_ENTRY) + return "ENTRY_TRAMPOLINE"; return NULL; } @@ -96,7 +96,7 @@ int get_stack_info(unsigned long *stack, struct task_struct *task, if (task != current) goto unknown; - if (in_sysenter_stack(stack, info)) + if (in_entry_stack(stack, info)) goto recursion_check; if (in_hardirq_stack(stack, info)) diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c index abc828f8c297..563e28d14f2c 100644 --- a/arch/x86/kernel/dumpstack_64.c +++ b/arch/x86/kernel/dumpstack_64.c @@ -37,8 +37,14 @@ const char *stack_type_name(enum stack_type type) if (type == STACK_TYPE_IRQ) return "IRQ"; - if (type == STACK_TYPE_SYSENTER) - return "SYSENTER"; + if (type == STACK_TYPE_ENTRY) { + /* + * On 64-bit, we have a generic entry stack that we + * use for all the kernel entry points, including + * SYSENTER. + */ + return "ENTRY_TRAMPOLINE"; + } if (type >= STACK_TYPE_EXCEPTION && type <= STACK_TYPE_EXCEPTION_LAST) return exception_stack_names[type - STACK_TYPE_EXCEPTION]; @@ -118,7 +124,7 @@ int get_stack_info(unsigned long *stack, struct task_struct *task, if (in_irq_stack(stack, info)) goto recursion_check; - if (in_sysenter_stack(stack, info)) + if (in_entry_stack(stack, info)) goto recursion_check; goto unknown; diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c index 1c1eae961340..a6b5d62f45a7 100644 --- a/arch/x86/kernel/ldt.c +++ b/arch/x86/kernel/ldt.c @@ -5,6 +5,11 @@ * Copyright (C) 2002 Andi Kleen * * This handles calls from both 32bit and 64bit mode. + * + * Lock order: + * contex.ldt_usr_sem + * mmap_sem + * context.lock */ #include <linux/errno.h> @@ -42,7 +47,7 @@ static void refresh_ldt_segments(void) #endif } -/* context.lock is held for us, so we don't need any locking. */ +/* context.lock is held by the task which issued the smp function call */ static void flush_ldt(void *__mm) { struct mm_struct *mm = __mm; @@ -99,15 +104,17 @@ static void finalize_ldt_struct(struct ldt_struct *ldt) paravirt_alloc_ldt(ldt->entries, ldt->nr_entries); } -/* context.lock is held */ -static void install_ldt(struct mm_struct *current_mm, - struct ldt_struct *ldt) +static void install_ldt(struct mm_struct *mm, struct ldt_struct *ldt) { + mutex_lock(&mm->context.lock); + /* Synchronizes with READ_ONCE in load_mm_ldt. */ - smp_store_release(¤t_mm->context.ldt, ldt); + smp_store_release(&mm->context.ldt, ldt); - /* Activate the LDT for all CPUs using current_mm. */ - on_each_cpu_mask(mm_cpumask(current_mm), flush_ldt, current_mm, true); + /* Activate the LDT for all CPUs using currents mm. */ + on_each_cpu_mask(mm_cpumask(mm), flush_ldt, mm, true); + + mutex_unlock(&mm->context.lock); } static void free_ldt_struct(struct ldt_struct *ldt) @@ -124,27 +131,20 @@ static void free_ldt_struct(struct ldt_struct *ldt) } /* - * we do not have to muck with descriptors here, that is - * done in switch_mm() as needed. + * Called on fork from arch_dup_mmap(). Just copy the current LDT state, + * the new task is not running, so nothing can be installed. */ -int init_new_context_ldt(struct task_struct *tsk, struct mm_struct *mm) +int ldt_dup_context(struct mm_struct *old_mm, struct mm_struct *mm) { struct ldt_struct *new_ldt; - struct mm_struct *old_mm; int retval = 0; - mutex_init(&mm->context.lock); - old_mm = current->mm; - if (!old_mm) { - mm->context.ldt = NULL; + if (!old_mm) return 0; - } mutex_lock(&old_mm->context.lock); - if (!old_mm->context.ldt) { - mm->context.ldt = NULL; + if (!old_mm->context.ldt) goto out_unlock; - } new_ldt = alloc_ldt_struct(old_mm->context.ldt->nr_entries); if (!new_ldt) { @@ -180,7 +180,7 @@ static int read_ldt(void __user *ptr, unsigned long bytecount) unsigned long entries_size; int retval; - mutex_lock(&mm->context.lock); + down_read(&mm->context.ldt_usr_sem); if (!mm->context.ldt) { retval = 0; @@ -209,7 +209,7 @@ static int read_ldt(void __user *ptr, unsigned long bytecount) retval = bytecount; out_unlock: - mutex_unlock(&mm->context.lock); + up_read(&mm->context.ldt_usr_sem); return retval; } @@ -269,7 +269,8 @@ static int write_ldt(void __user *ptr, unsigned long bytecount, int oldmode) ldt.avl = 0; } - mutex_lock(&mm->context.lock); + if (down_write_killable(&mm->context.ldt_usr_sem)) + return -EINTR; old_ldt = mm->context.ldt; old_nr_entries = old_ldt ? old_ldt->nr_entries : 0; @@ -291,7 +292,7 @@ static int write_ldt(void __user *ptr, unsigned long bytecount, int oldmode) error = 0; out_unlock: - mutex_unlock(&mm->context.lock); + up_write(&mm->context.ldt_usr_sem); out: return error; } diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 35cb20994e32..c5970efa8557 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -932,12 +932,8 @@ static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle, initial_code = (unsigned long)start_secondary; initial_stack = idle->thread.sp; - /* - * Enable the espfix hack for this CPU - */ -#ifdef CONFIG_X86_ESPFIX64 + /* Enable the espfix hack for this CPU */ init_espfix_ap(cpu); -#endif /* So we see what's up */ announce_cpu(cpu, apicid); diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index e98f8b66a460..f69dbd47d733 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -51,6 +51,7 @@ #include <asm/traps.h> #include <asm/desc.h> #include <asm/fpu/internal.h> +#include <asm/cpu_entry_area.h> #include <asm/mce.h> #include <asm/fixmap.h> #include <asm/mach_traps.h> @@ -951,8 +952,9 @@ void __init trap_init(void) * "sidt" instruction will not leak the location of the kernel, and * to defend the IDT against arbitrary memory write vulnerabilities. * It will be reloaded in cpu_init() */ - __set_fixmap(FIX_RO_IDT, __pa_symbol(idt_table), PAGE_KERNEL_RO); - idt_descr.address = fix_to_virt(FIX_RO_IDT); + cea_set_pte(CPU_ENTRY_AREA_RO_IDT_VADDR, __pa_symbol(idt_table), + PAGE_KERNEL_RO); + idt_descr.address = CPU_ENTRY_AREA_RO_IDT; /* * Should be a barrier for any external CPU state: diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile index 8e13b8cc6bed..52195ee3f6d5 100644 --- a/arch/x86/mm/Makefile +++ b/arch/x86/mm/Makefile @@ -10,7 +10,7 @@ CFLAGS_REMOVE_mem_encrypt.o = -pg endif obj-y := init.o init_$(BITS).o fault.o ioremap.o extable.o pageattr.o mmap.o \ - pat.o pgtable.o physaddr.o setup_nx.o tlb.o + pat.o pgtable.o physaddr.o setup_nx.o tlb.o cpu_entry_area.o # Make sure __phys_addr has no stackprotector nostackp := $(call cc-option, -fno-stack-protector) diff --git a/arch/x86/mm/cpu_entry_area.c b/arch/x86/mm/cpu_entry_area.c new file mode 100644 index 000000000000..fe814fd5e014 --- /dev/null +++ b/arch/x86/mm/cpu_entry_area.c @@ -0,0 +1,139 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <linux/spinlock.h> +#include <linux/percpu.h> + +#include <asm/cpu_entry_area.h> +#include <asm/pgtable.h> +#include <asm/fixmap.h> +#include <asm/desc.h> + +static DEFINE_PER_CPU_PAGE_ALIGNED(struct entry_stack_page, entry_stack_storage); + +#ifdef CONFIG_X86_64 +static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks + [(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ]); +#endif + +struct cpu_entry_area *get_cpu_entry_area(int cpu) +{ + unsigned long va = CPU_ENTRY_AREA_PER_CPU + cpu * CPU_ENTRY_AREA_SIZE; + BUILD_BUG_ON(sizeof(struct cpu_entry_area) % PAGE_SIZE != 0); + + return (struct cpu_entry_area *) va; +} +EXPORT_SYMBOL(get_cpu_entry_area); + +void cea_set_pte(void *cea_vaddr, phys_addr_t pa, pgprot_t flags) +{ + unsigned long va = (unsigned long) cea_vaddr; + + set_pte_vaddr(va, pfn_pte(pa >> PAGE_SHIFT, flags)); +} + +static void __init +cea_map_percpu_pages(void *cea_vaddr, void *ptr, int pages, pgprot_t prot) +{ + for ( ; pages; pages--, cea_vaddr+= PAGE_SIZE, ptr += PAGE_SIZE) + cea_set_pte(cea_vaddr, per_cpu_ptr_to_phys(ptr), prot); +} + +/* Setup the fixmap mappings only once per-processor */ +static void __init setup_cpu_entry_area(int cpu) +{ +#ifdef CONFIG_X86_64 + extern char _entry_trampoline[]; + + /* On 64-bit systems, we use a read-only fixmap GDT and TSS. */ + pgprot_t gdt_prot = PAGE_KERNEL_RO; + pgprot_t tss_prot = PAGE_KERNEL_RO; +#else + /* + * On native 32-bit systems, the GDT cannot be read-only because + * our double fault handler uses a task gate, and entering through + * a task gate needs to change an available TSS to busy. If the + * GDT is read-only, that will triple fault. The TSS cannot be + * read-only because the CPU writes to it on task switches. + * + * On Xen PV, the GDT must be read-only because the hypervisor + * requires it. + */ + pgprot_t gdt_prot = boot_cpu_has(X86_FEATURE_XENPV) ? + PAGE_KERNEL_RO : PAGE_KERNEL; + pgprot_t tss_prot = PAGE_KERNEL; +#endif + + cea_set_pte(&get_cpu_entry_area(cpu)->gdt, get_cpu_gdt_paddr(cpu), + gdt_prot); + + cea_map_percpu_pages(&get_cpu_entry_area(cpu)->entry_stack_page, + per_cpu_ptr(&entry_stack_storage, cpu), 1, + PAGE_KERNEL); + + /* + * The Intel SDM says (Volume 3, 7.2.1): + * + * Avoid placing a page boundary in the part of the TSS that the + * processor reads during a task switch (the first 104 bytes). The + * processor may not correctly perform address translations if a + * boundary occurs in this area. During a task switch, the processor + * reads and writes into the first 104 bytes of each TSS (using + * contiguous physical addresses beginning with the physical address + * of the first byte of the TSS). So, after TSS access begins, if + * part of the 104 bytes is not physically contiguous, the processor + * will access incorrect information without generating a page-fault + * exception. + * + * There are also a lot of errata involving the TSS spanning a page + * boundary. Assert that we're not doing that. + */ + BUILD_BUG_ON((offsetof(struct tss_struct, x86_tss) ^ + offsetofend(struct tss_struct, x86_tss)) & PAGE_MASK); + BUILD_BUG_ON(sizeof(struct tss_struct) % PAGE_SIZE != 0); + cea_map_percpu_pages(&get_cpu_entry_area(cpu)->tss, + &per_cpu(cpu_tss_rw, cpu), + sizeof(struct tss_struct) / PAGE_SIZE, tss_prot); + +#ifdef CONFIG_X86_32 + per_cpu(cpu_entry_area, cpu) = get_cpu_entry_area(cpu); +#endif + +#ifdef CONFIG_X86_64 + BUILD_BUG_ON(sizeof(exception_stacks) % PAGE_SIZE != 0); + BUILD_BUG_ON(sizeof(exception_stacks) != + sizeof(((struct cpu_entry_area *)0)->exception_stacks)); + cea_map_percpu_pages(&get_cpu_entry_area(cpu)->exception_stacks, + &per_cpu(exception_stacks, cpu), + sizeof(exception_stacks) / PAGE_SIZE, PAGE_KERNEL); + + cea_set_pte(&get_cpu_entry_area(cpu)->entry_trampoline, + __pa_symbol(_entry_trampoline), PAGE_KERNEL_RX); +#endif +} + +static __init void setup_cpu_entry_area_ptes(void) +{ +#ifdef CONFIG_X86_32 + unsigned long start, end; + + BUILD_BUG_ON(CPU_ENTRY_AREA_PAGES * PAGE_SIZE < CPU_ENTRY_AREA_MAP_SIZE); + BUG_ON(CPU_ENTRY_AREA_BASE & ~PMD_MASK); + + start = CPU_ENTRY_AREA_BASE; + end = start + CPU_ENTRY_AREA_MAP_SIZE; + + /* Careful here: start + PMD_SIZE might wrap around */ + for (; start < end && start >= CPU_ENTRY_AREA_BASE; start += PMD_SIZE) + populate_extra_pte(start); +#endif +} + +void __init setup_cpu_entry_areas(void) +{ + unsigned int cpu; + + setup_cpu_entry_area_ptes(); + + for_each_possible_cpu(cpu) + setup_cpu_entry_area(cpu); +} diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c index 5e3ac6fe6c9e..43dedbfb7257 100644 --- a/arch/x86/mm/dump_pagetables.c +++ b/arch/x86/mm/dump_pagetables.c @@ -44,10 +44,12 @@ struct addr_marker { unsigned long max_lines; }; -/* indices for address_markers; keep sync'd w/ address_markers below */ +/* Address space markers hints */ + +#ifdef CONFIG_X86_64 + enum address_markers_idx { USER_SPACE_NR = 0, -#ifdef CONFIG_X86_64 KERNEL_SPACE_NR, LOW_KERNEL_NR, VMALLOC_START_NR, @@ -56,56 +58,74 @@ enum address_markers_idx { KASAN_SHADOW_START_NR, KASAN_SHADOW_END_NR, #endif -# ifdef CONFIG_X86_ESPFIX64 + CPU_ENTRY_AREA_NR, +#ifdef CONFIG_X86_ESPFIX64 ESPFIX_START_NR, -# endif +#endif +#ifdef CONFIG_EFI + EFI_END_NR, +#endif HIGH_KERNEL_NR, MODULES_VADDR_NR, MODULES_END_NR, -#else + FIXADDR_START_NR, + END_OF_SPACE_NR, +}; + +static struct addr_marker address_markers[] = { + [USER_SPACE_NR] = { 0, "User Space" }, + [KERNEL_SPACE_NR] = { (1UL << 63), "Kernel Space" }, + [LOW_KERNEL_NR] = { 0UL, "Low Kernel Mapping" }, + [VMALLOC_START_NR] = { 0UL, "vmalloc() Area" }, + [VMEMMAP_START_NR] = { 0UL, "Vmemmap" }, +#ifdef CONFIG_KASAN + [KASAN_SHADOW_START_NR] = { KASAN_SHADOW_START, "KASAN shadow" }, + [KASAN_SHADOW_END_NR] = { KASAN_SHADOW_END, "KASAN shadow end" }, +#endif + [CPU_ENTRY_AREA_NR] = { CPU_ENTRY_AREA_BASE,"CPU entry Area" }, +#ifdef CONFIG_X86_ESPFIX64 + [ESPFIX_START_NR] = { ESPFIX_BASE_ADDR, "ESPfix Area", 16 }, +#endif +#ifdef CONFIG_EFI + [EFI_END_NR] = { EFI_VA_END, "EFI Runtime Services" }, +#endif + [HIGH_KERNEL_NR] = { __START_KERNEL_map, "High Kernel Mapping" }, + [MODULES_VADDR_NR] = { MODULES_VADDR, "Modules" }, + [MODULES_END_NR] = { MODULES_END, "End Modules" }, + [FIXADDR_START_NR] = { FIXADDR_START, "Fixmap Area" }, + [END_OF_SPACE_NR] = { -1, NULL } +}; + +#else /* CONFIG_X86_64 */ + +enum address_markers_idx { + USER_SPACE_NR = 0, KERNEL_SPACE_NR, VMALLOC_START_NR, VMALLOC_END_NR, -# ifdef CONFIG_HIGHMEM +#ifdef CONFIG_HIGHMEM PKMAP_BASE_NR, -# endif - FIXADDR_START_NR, #endif + CPU_ENTRY_AREA_NR, + FIXADDR_START_NR, + END_OF_SPACE_NR, }; -/* Address space markers hints */ static struct addr_marker address_markers[] = { - { 0, "User Space" }, -#ifdef CONFIG_X86_64 - { 0x8000000000000000UL, "Kernel Space" }, - { 0/* PAGE_OFFSET */, "Low Kernel Mapping" }, - { 0/* VMALLOC_START */, "vmalloc() Area" }, - { 0/* VMEMMAP_START */, "Vmemmap" }, -#ifdef CONFIG_KASAN - { KASAN_SHADOW_START, "KASAN shadow" }, - { KASAN_SHADOW_END, "KASAN shadow end" }, + [USER_SPACE_NR] = { 0, "User Space" }, + [KERNEL_SPACE_NR] = { PAGE_OFFSET, "Kernel Mapping" }, + [VMALLOC_START_NR] = { 0UL, "vmalloc() Area" }, + [VMALLOC_END_NR] = { 0UL, "vmalloc() End" }, +#ifdef CONFIG_HIGHMEM + [PKMAP_BASE_NR] = { 0UL, "Persistent kmap() Area" }, #endif -# ifdef CONFIG_X86_ESPFIX64 - { ESPFIX_BASE_ADDR, "ESPfix Area", 16 }, -# endif -# ifdef CONFIG_EFI - { EFI_VA_END, "EFI Runtime Services" }, -# endif - { __START_KERNEL_map, "High Kernel Mapping" }, - { MODULES_VADDR, "Modules" }, - { MODULES_END, "End Modules" }, -#else - { PAGE_OFFSET, "Kernel Mapping" }, - { 0/* VMALLOC_START */, "vmalloc() Area" }, - { 0/*VMALLOC_END*/, "vmalloc() End" }, -# ifdef CONFIG_HIGHMEM - { 0/*PKMAP_BASE*/, "Persistent kmap() Area" }, -# endif - { 0/*FIXADDR_START*/, "Fixmap Area" }, -#endif - { -1, NULL } /* End of list */ + [CPU_ENTRY_AREA_NR] = { 0UL, "CPU entry area" }, + [FIXADDR_START_NR] = { 0UL, "Fixmap area" }, + [END_OF_SPACE_NR] = { -1, NULL } }; +#endif /* !CONFIG_X86_64 */ + /* Multipliers for offsets within the PTEs */ #define PTE_LEVEL_MULT (PAGE_SIZE) #define PMD_LEVEL_MULT (PTRS_PER_PTE * PTE_LEVEL_MULT) @@ -140,7 +160,7 @@ static void printk_prot(struct seq_file *m, pgprot_t prot, int level, bool dmsg) static const char * const level_name[] = { "cr3", "pgd", "p4d", "pud", "pmd", "pte" }; - if (!pgprot_val(prot)) { + if (!(pr & _PAGE_PRESENT)) { /* Not present */ pt_dump_cont_printf(m, dmsg, " "); } else { @@ -525,8 +545,8 @@ static int __init pt_dump_init(void) address_markers[PKMAP_BASE_NR].start_address = PKMAP_BASE; # endif address_markers[FIXADDR_START_NR].start_address = FIXADDR_START; + address_markers[CPU_ENTRY_AREA_NR].start_address = CPU_ENTRY_AREA_BASE; #endif - return 0; } __initcall(pt_dump_init); diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 8a64a6f2848d..135c9a7898c7 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -50,6 +50,7 @@ #include <asm/setup.h> #include <asm/set_memory.h> #include <asm/page_types.h> +#include <asm/cpu_entry_area.h> #include <asm/init.h> #include "mm_internal.h" @@ -766,6 +767,7 @@ void __init mem_init(void) mem_init_print_info(NULL); printk(KERN_INFO "virtual kernel memory layout:\n" " fixmap : 0x%08lx - 0x%08lx (%4ld kB)\n" + " cpu_entry : 0x%08lx - 0x%08lx (%4ld kB)\n" #ifdef CONFIG_HIGHMEM " pkmap : 0x%08lx - 0x%08lx (%4ld kB)\n" #endif @@ -777,6 +779,10 @@ void __init mem_init(void) FIXADDR_START, FIXADDR_TOP, (FIXADDR_TOP - FIXADDR_START) >> 10, + CPU_ENTRY_AREA_BASE, + CPU_ENTRY_AREA_BASE + CPU_ENTRY_AREA_MAP_SIZE, + CPU_ENTRY_AREA_MAP_SIZE >> 10, + #ifdef CONFIG_HIGHMEM PKMAP_BASE, PKMAP_BASE+LAST_PKMAP*PAGE_SIZE, (LAST_PKMAP*PAGE_SIZE) >> 10, diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c index 9ec70d780f1f..47388f0c0e59 100644 --- a/arch/x86/mm/kasan_init_64.c +++ b/arch/x86/mm/kasan_init_64.c @@ -15,6 +15,7 @@ #include <asm/tlbflush.h> #include <asm/sections.h> #include <asm/pgtable.h> +#include <asm/cpu_entry_area.h> extern struct range pfn_mapped[E820_MAX_ENTRIES]; @@ -322,31 +323,33 @@ void __init kasan_init(void) map_range(&pfn_mapped[i]); } - kasan_populate_zero_shadow( - kasan_mem_to_shadow((void *)PAGE_OFFSET + MAXMEM), - kasan_mem_to_shadow((void *)__START_KERNEL_map)); - - kasan_populate_shadow((unsigned long)kasan_mem_to_shadow(_stext), - (unsigned long)kasan_mem_to_shadow(_end), - early_pfn_to_nid(__pa(_stext))); - - shadow_cpu_entry_begin = (void *)__fix_to_virt(FIX_CPU_ENTRY_AREA_BOTTOM); + shadow_cpu_entry_begin = (void *)CPU_ENTRY_AREA_BASE; shadow_cpu_entry_begin = kasan_mem_to_shadow(shadow_cpu_entry_begin); shadow_cpu_entry_begin = (void *)round_down((unsigned long)shadow_cpu_entry_begin, PAGE_SIZE); - shadow_cpu_entry_end = (void *)(__fix_to_virt(FIX_CPU_ENTRY_AREA_TOP) + PAGE_SIZE); + shadow_cpu_entry_end = (void *)(CPU_ENTRY_AREA_BASE + + CPU_ENTRY_AREA_MAP_SIZE); shadow_cpu_entry_end = kasan_mem_to_shadow(shadow_cpu_entry_end); shadow_cpu_entry_end = (void *)round_up((unsigned long)shadow_cpu_entry_end, PAGE_SIZE); - kasan_populate_zero_shadow(kasan_mem_to_shadow((void *)MODULES_END), - shadow_cpu_entry_begin); + kasan_populate_zero_shadow( + kasan_mem_to_shadow((void *)PAGE_OFFSET + MAXMEM), + shadow_cpu_entry_begin); kasan_populate_shadow((unsigned long)shadow_cpu_entry_begin, (unsigned long)shadow_cpu_entry_end, 0); - kasan_populate_zero_shadow(shadow_cpu_entry_end, (void *)KASAN_SHADOW_END); + kasan_populate_zero_shadow(shadow_cpu_entry_end, + kasan_mem_to_shadow((void *)__START_KERNEL_map)); + + kasan_populate_shadow((unsigned long)kasan_mem_to_shadow(_stext), + (unsigned long)kasan_mem_to_shadow(_end), + early_pfn_to_nid(__pa(_stext))); + + kasan_populate_zero_shadow(kasan_mem_to_shadow((void *)MODULES_END), + (void *)KASAN_SHADOW_END); load_cr3(init_top_pgt); __flush_tlb_all(); diff --git a/arch/x86/mm/pgtable_32.c b/arch/x86/mm/pgtable_32.c index 6b9bf023a700..c3c5274410a9 100644 --- a/arch/x86/mm/pgtable_32.c +++ b/arch/x86/mm/pgtable_32.c @@ -10,6 +10,7 @@ #include <linux/pagemap.h> #include <linux/spinlock.h> +#include <asm/cpu_entry_area.h> #include <asm/pgtable.h> #include <asm/pgalloc.h> #include <asm/fixmap.h> diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index 3118392cdf75..0a1be3adc97e 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c @@ -128,7 +128,7 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, * isn't free. */ #ifdef CONFIG_DEBUG_VM - if (WARN_ON_ONCE(__read_cr3() != build_cr3(real_prev, prev_asid))) { + if (WARN_ON_ONCE(__read_cr3() != build_cr3(real_prev->pgd, prev_asid))) { /* * If we were to BUG here, we'd be very likely to kill * the system so hard that we don't see the call trace. @@ -195,7 +195,7 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, if (need_flush) { this_cpu_write(cpu_tlbstate.ctxs[new_asid].ctx_id, next->context.ctx_id); this_cpu_write(cpu_tlbstate.ctxs[new_asid].tlb_gen, next_tlb_gen); - write_cr3(build_cr3(next, new_asid)); + write_cr3(build_cr3(next->pgd, new_asid)); /* * NB: This gets called via leave_mm() in the idle path @@ -208,7 +208,7 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, trace_tlb_flush_rcuidle(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL); } else { /* The new ASID is already up to date. */ - write_cr3(build_cr3_noflush(next, new_asid)); + write_cr3(build_cr3_noflush(next->pgd, new_asid)); /* See above wrt _rcuidle. */ trace_tlb_flush_rcuidle(TLB_FLUSH_ON_TASK_SWITCH, 0); @@ -288,7 +288,7 @@ void initialize_tlbstate_and_flush(void) !(cr4_read_shadow() & X86_CR4_PCIDE)); /* Force ASID 0 and force a TLB flush. */ - write_cr3(build_cr3(mm, 0)); + write_cr3(build_cr3(mm->pgd, 0)); /* Reinitialize tlbstate. */ this_cpu_write(cpu_tlbstate.loaded_mm_asid, 0); @@ -551,7 +551,7 @@ static void do_kernel_range_flush(void *info) /* flush range by one by one 'invlpg' */ for (addr = f->start; addr < f->end; addr += PAGE_SIZE) - __flush_tlb_single(addr); + __flush_tlb_one(addr); } void flush_tlb_kernel_range(unsigned long start, unsigned long end) diff --git a/arch/x86/platform/uv/tlb_uv.c b/arch/x86/platform/uv/tlb_uv.c index f44c0bc95aa2..8538a6723171 100644 --- a/arch/x86/platform/uv/tlb_uv.c +++ b/arch/x86/platform/uv/tlb_uv.c @@ -299,7 +299,7 @@ static void bau_process_message(struct msg_desc *mdp, struct bau_control *bcp, local_flush_tlb(); stat->d_alltlb++; } else { - __flush_tlb_one(msg->address); + __flush_tlb_single(msg->address); stat->d_onetlb++; } stat->d_requestee++; diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index d669e9d89001..c9081c6671f0 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -1,8 +1,12 @@ +#ifdef CONFIG_XEN_BALLOON_MEMORY_HOTPLUG +#include <linux/bootmem.h> +#endif #include <linux/cpu.h> #include <linux/kexec.h> #include <xen/features.h> #include <xen/page.h> +#include <xen/interface/memory.h> #include <asm/xen/hypercall.h> #include <asm/xen/hypervisor.h> @@ -331,3 +335,80 @@ void xen_arch_unregister_cpu(int num) } EXPORT_SYMBOL(xen_arch_unregister_cpu); #endif + +#ifdef CONFIG_XEN_BALLOON_MEMORY_HOTPLUG +void __init arch_xen_balloon_init(struct resource *hostmem_resource) +{ + struct xen_memory_map memmap; + int rc; + unsigned int i, last_guest_ram; + phys_addr_t max_addr = PFN_PHYS(max_pfn); + struct e820_table *xen_e820_table; + const struct e820_entry *entry; + struct resource *res; + + if (!xen_initial_domain()) + return; + + xen_e820_table = kmalloc(sizeof(*xen_e820_table), GFP_KERNEL); + if (!xen_e820_table) + return; + + memmap.nr_entries = ARRAY_SIZE(xen_e820_table->entries); + set_xen_guest_handle(memmap.buffer, xen_e820_table->entries); + rc = HYPERVISOR_memory_op(XENMEM_machine_memory_map, &memmap); + if (rc) { + pr_warn("%s: Can't read host e820 (%d)\n", __func__, rc); + goto out; + } + + last_guest_ram = 0; + for (i = 0; i < memmap.nr_entries; i++) { + if (xen_e820_table->entries[i].addr >= max_addr) + break; + if (xen_e820_table->entries[i].type == E820_TYPE_RAM) + last_guest_ram = i; + } + + entry = &xen_e820_table->entries[last_guest_ram]; + if (max_addr >= entry->addr + entry->size) + goto out; /* No unallocated host RAM. */ + + hostmem_resource->start = max_addr; + hostmem_resource->end = entry->addr + entry->size; + + /* + * Mark non-RAM regions between the end of dom0 RAM and end of host RAM + * as unavailable. The rest of that region can be used for hotplug-based + * ballooning. + */ + for (; i < memmap.nr_entries; i++) { + entry = &xen_e820_table->entries[i]; + + if (entry->type == E820_TYPE_RAM) + continue; + + if (entry->addr >= hostmem_resource->end) + break; + + res = kzalloc(sizeof(*res), GFP_KERNEL); + if (!res) + goto out; + + res->name = "Unavailable host RAM"; + res->start = entry->addr; + res->end = (entry->addr + entry->size < hostmem_resource->end) ? + entry->addr + entry->size : hostmem_resource->end; + rc = insert_resource(hostmem_resource, res); + if (rc) { + pr_warn("%s: Can't insert [%llx - %llx) (%d)\n", + __func__, res->start, res->end, rc); + kfree(res); + goto out; + } + } + + out: + kfree(xen_e820_table); +} +#endif /* CONFIG_XEN_BALLOON_MEMORY_HOTPLUG */ diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c index 7beeee1443b3..c047f42552e1 100644 --- a/arch/x86/xen/enlighten_pv.c +++ b/arch/x86/xen/enlighten_pv.c @@ -88,6 +88,8 @@ #include "multicalls.h" #include "pmu.h" +#include "../kernel/cpu/cpu.h" /* get_cpu_cap() */ + void *xen_initial_gdt; static int xen_cpu_up_prepare_pv(unsigned int cpu); @@ -1258,6 +1260,7 @@ asmlinkage __visible void __init xen_start_kernel(void) __userpte_alloc_gfp &= ~__GFP_HIGHMEM; /* Work out if we support NX */ + get_cpu_cap(&boot_cpu_data); x86_configure_nx(); /* Get mfn list */ diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c index 6cf801ca1142..4d62c071b166 100644 --- a/arch/x86/xen/mmu_pv.c +++ b/arch/x86/xen/mmu_pv.c @@ -1902,6 +1902,18 @@ void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn) /* Graft it onto L4[511][510] */ copy_page(level2_kernel_pgt, l2); + /* + * Zap execute permission from the ident map. Due to the sharing of + * L1 entries we need to do this in the L2. + */ + if (__supported_pte_mask & _PAGE_NX) { + for (i = 0; i < PTRS_PER_PMD; ++i) { + if (pmd_none(level2_ident_pgt[i])) + continue; + level2_ident_pgt[i] = pmd_set_flags(level2_ident_pgt[i], _PAGE_NX); + } + } + /* Copy the initial P->M table mappings if necessary. */ i = pgd_index(xen_start_info->mfn_list); if (i && i < pgd_index(__START_KERNEL_map)) @@ -2261,7 +2273,6 @@ static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot) switch (idx) { case FIX_BTMAP_END ... FIX_BTMAP_BEGIN: - case FIX_RO_IDT: #ifdef CONFIG_X86_32 case FIX_WP_TEST: # ifdef CONFIG_HIGHMEM @@ -2272,7 +2283,6 @@ static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot) #endif case FIX_TEXT_POKE0: case FIX_TEXT_POKE1: - case FIX_CPU_ENTRY_AREA_TOP ... FIX_CPU_ENTRY_AREA_BOTTOM: /* All local page mappings */ pte = pfn_pte(phys, prot); break; diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c index c114ca767b3b..6e0d2086eacb 100644 --- a/arch/x86/xen/setup.c +++ b/arch/x86/xen/setup.c @@ -808,7 +808,6 @@ char * __init xen_memory_setup(void) addr = xen_e820_table.entries[0].addr; size = xen_e820_table.entries[0].size; while (i < xen_e820_table.nr_entries) { - bool discard = false; chunk_size = size; type = xen_e820_table.entries[i].type; @@ -824,11 +823,10 @@ char * __init xen_memory_setup(void) xen_add_extra_mem(pfn_s, n_pfns); xen_max_p2m_pfn = pfn_s + n_pfns; } else - discard = true; + type = E820_TYPE_UNUSABLE; } - if (!discard) - xen_align_and_add_e820_region(addr, chunk_size, type); + xen_align_and_add_e820_region(addr, chunk_size, type); addr += chunk_size; size -= chunk_size; diff --git a/crypto/af_alg.c b/crypto/af_alg.c index 415a54ced4d6..444a387df219 100644 --- a/crypto/af_alg.c +++ b/crypto/af_alg.c @@ -1138,12 +1138,6 @@ int af_alg_get_rsgl(struct sock *sk, struct msghdr *msg, int flags, if (!af_alg_readable(sk)) break; - if (!ctx->used) { - err = af_alg_wait_for_data(sk, flags); - if (err) - return err; - } - seglen = min_t(size_t, (maxsize - len), msg_data_left(msg)); diff --git a/crypto/algif_aead.c b/crypto/algif_aead.c index 48b34e9c6834..ddcc45f77edd 100644 --- a/crypto/algif_aead.c +++ b/crypto/algif_aead.c @@ -111,6 +111,12 @@ static int _aead_recvmsg(struct socket *sock, struct msghdr *msg, size_t usedpages = 0; /* [in] RX bufs to be used from user */ size_t processed = 0; /* [in] TX bufs to be consumed */ + if (!ctx->used) { + err = af_alg_wait_for_data(sk, flags); + if (err) + return err; + } + /* * Data length provided by caller via sendmsg/sendpage that has not * yet been processed. @@ -285,6 +291,10 @@ static int _aead_recvmsg(struct socket *sock, struct msghdr *msg, /* AIO operation */ sock_hold(sk); areq->iocb = msg->msg_iocb; + + /* Remember output size that will be generated. */ + areq->outlen = outlen; + aead_request_set_callback(&areq->cra_u.aead_req, CRYPTO_TFM_REQ_MAY_BACKLOG, af_alg_async_cb, areq); @@ -292,12 +302,8 @@ static int _aead_recvmsg(struct socket *sock, struct msghdr *msg, crypto_aead_decrypt(&areq->cra_u.aead_req); /* AIO operation in progress */ - if (err == -EINPROGRESS || err == -EBUSY) { - /* Remember output size that will be generated. */ - areq->outlen = outlen; - + if (err == -EINPROGRESS || err == -EBUSY) return -EIOCBQUEUED; - } sock_put(sk); } else { diff --git a/crypto/algif_skcipher.c b/crypto/algif_skcipher.c index 30cff827dd8f..baef9bfccdda 100644 --- a/crypto/algif_skcipher.c +++ b/crypto/algif_skcipher.c @@ -72,6 +72,12 @@ static int _skcipher_recvmsg(struct socket *sock, struct msghdr *msg, int err = 0; size_t len = 0; + if (!ctx->used) { + err = af_alg_wait_for_data(sk, flags); + if (err) + return err; + } + /* Allocate cipher request for current operation. */ areq = af_alg_alloc_areq(sk, sizeof(struct af_alg_async_req) + crypto_skcipher_reqsize(tfm)); @@ -119,6 +125,10 @@ static int _skcipher_recvmsg(struct socket *sock, struct msghdr *msg, /* AIO operation */ sock_hold(sk); areq->iocb = msg->msg_iocb; + + /* Remember output size that will be generated. */ + areq->outlen = len; + skcipher_request_set_callback(&areq->cra_u.skcipher_req, CRYPTO_TFM_REQ_MAY_SLEEP, af_alg_async_cb, areq); @@ -127,12 +137,8 @@ static int _skcipher_recvmsg(struct socket *sock, struct msghdr *msg, crypto_skcipher_decrypt(&areq->cra_u.skcipher_req); /* AIO operation in progress */ - if (err == -EINPROGRESS || err == -EBUSY) { - /* Remember output size that will be generated. */ - areq->outlen = len; - + if (err == -EINPROGRESS || err == -EBUSY) return -EIOCBQUEUED; - } sock_put(sk); } else { diff --git a/crypto/mcryptd.c b/crypto/mcryptd.c index 4e6472658852..eca04d3729b3 100644 --- a/crypto/mcryptd.c +++ b/crypto/mcryptd.c @@ -81,6 +81,7 @@ static int mcryptd_init_queue(struct mcryptd_queue *queue, pr_debug("cpu_queue #%d %p\n", cpu, queue->cpu_queue); crypto_init_queue(&cpu_queue->queue, max_cpu_qlen); INIT_WORK(&cpu_queue->work, mcryptd_queue_worker); + spin_lock_init(&cpu_queue->q_lock); } return 0; } @@ -104,15 +105,16 @@ static int mcryptd_enqueue_request(struct mcryptd_queue *queue, int cpu, err; struct mcryptd_cpu_queue *cpu_queue; - cpu = get_cpu(); - cpu_queue = this_cpu_ptr(queue->cpu_queue); - rctx->tag.cpu = cpu; + cpu_queue = raw_cpu_ptr(queue->cpu_queue); + spin_lock(&cpu_queue->q_lock); + cpu = smp_processor_id(); + rctx->tag.cpu = smp_processor_id(); err = crypto_enqueue_request(&cpu_queue->queue, request); pr_debug("enqueue request: cpu %d cpu_queue %p request %p\n", cpu, cpu_queue, request); + spin_unlock(&cpu_queue->q_lock); queue_work_on(cpu, kcrypto_wq, &cpu_queue->work); - put_cpu(); return err; } @@ -161,16 +163,11 @@ static void mcryptd_queue_worker(struct work_struct *work) cpu_queue = container_of(work, struct mcryptd_cpu_queue, work); i = 0; while (i < MCRYPTD_BATCH || single_task_running()) { - /* - * preempt_disable/enable is used to prevent - * being preempted by mcryptd_enqueue_request() - */ - local_bh_disable(); - preempt_disable(); + + spin_lock_bh(&cpu_queue->q_lock); backlog = crypto_get_backlog(&cpu_queue->queue); req = crypto_dequeue_request(&cpu_queue->queue); - preempt_enable(); - local_bh_enable(); + spin_unlock_bh(&cpu_queue->q_lock); if (!req) { mcryptd_opportunistic_flush(); @@ -185,7 +182,7 @@ static void mcryptd_queue_worker(struct work_struct *work) ++i; } if (cpu_queue->queue.qlen) - queue_work(kcrypto_wq, &cpu_queue->work); + queue_work_on(smp_processor_id(), kcrypto_wq, &cpu_queue->work); } void mcryptd_flusher(struct work_struct *__work) diff --git a/crypto/skcipher.c b/crypto/skcipher.c index 778e0ff42bfa..11af5fd6a443 100644 --- a/crypto/skcipher.c +++ b/crypto/skcipher.c @@ -449,6 +449,8 @@ static int skcipher_walk_skcipher(struct skcipher_walk *walk, walk->total = req->cryptlen; walk->nbytes = 0; + walk->iv = req->iv; + walk->oiv = req->iv; if (unlikely(!walk->total)) return 0; @@ -456,9 +458,6 @@ static int skcipher_walk_skcipher(struct skcipher_walk *walk, scatterwalk_start(&walk->in, req->src); scatterwalk_start(&walk->out, req->dst); - walk->iv = req->iv; - walk->oiv = req->iv; - walk->flags &= ~SKCIPHER_WALK_SLEEP; walk->flags |= req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP ? SKCIPHER_WALK_SLEEP : 0; @@ -510,6 +509,8 @@ static int skcipher_walk_aead_common(struct skcipher_walk *walk, int err; walk->nbytes = 0; + walk->iv = req->iv; + walk->oiv = req->iv; if (unlikely(!walk->total)) return 0; @@ -525,9 +526,6 @@ static int skcipher_walk_aead_common(struct skcipher_walk *walk, scatterwalk_done(&walk->in, 0, walk->total); scatterwalk_done(&walk->out, 0, walk->total); - walk->iv = req->iv; - walk->oiv = req->iv; - if (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) walk->flags |= SKCIPHER_WALK_SLEEP; else diff --git a/drivers/acpi/nfit/core.c b/drivers/acpi/nfit/core.c index ff2580e7611d..abeb4df4f22e 100644 --- a/drivers/acpi/nfit/core.c +++ b/drivers/acpi/nfit/core.c @@ -1670,6 +1670,11 @@ static int acpi_nfit_add_dimm(struct acpi_nfit_desc *acpi_desc, dev_name(&adev_dimm->dev)); return -ENXIO; } + /* + * Record nfit_mem for the notification path to track back to + * the nfit sysfs attributes for this dimm device object. + */ + dev_set_drvdata(&adev_dimm->dev, nfit_mem); /* * Until standardization materializes we need to consider 4 @@ -1752,9 +1757,11 @@ static void shutdown_dimm_notify(void *data) sysfs_put(nfit_mem->flags_attr); nfit_mem->flags_attr = NULL; } - if (adev_dimm) + if (adev_dimm) { acpi_remove_notify_handler(adev_dimm->handle, ACPI_DEVICE_NOTIFY, acpi_nvdimm_notify); + dev_set_drvdata(&adev_dimm->dev, NULL); + } } mutex_unlock(&acpi_desc->init_mutex); } diff --git a/drivers/clk/clk.c b/drivers/clk/clk.c index 647d056df88c..b56c11f51baf 100644 --- a/drivers/clk/clk.c +++ b/drivers/clk/clk.c @@ -220,7 +220,8 @@ static bool clk_core_is_enabled(struct clk_core *core) ret = core->ops->is_enabled(core->hw); done: - clk_pm_runtime_put(core); + if (core->dev) + pm_runtime_put(core->dev); return ret; } @@ -1564,6 +1565,9 @@ static void clk_change_rate(struct clk_core *core) best_parent_rate = core->parent->rate; } + if (clk_pm_runtime_get(core)) + return; + if (core->flags & CLK_SET_RATE_UNGATE) { unsigned long flags; @@ -1634,6 +1638,8 @@ static void clk_change_rate(struct clk_core *core) /* handle the new child who might not be in core->children yet */ if (core->new_child) clk_change_rate(core->new_child); + + clk_pm_runtime_put(core); } static int clk_core_set_rate_nolock(struct clk_core *core, diff --git a/drivers/clk/sunxi/clk-sun9i-mmc.c b/drivers/clk/sunxi/clk-sun9i-mmc.c index a1a634253d6f..f00d8758ba24 100644 --- a/drivers/clk/sunxi/clk-sun9i-mmc.c +++ b/drivers/clk/sunxi/clk-sun9i-mmc.c @@ -16,6 +16,7 @@ #include <linux/clk.h> #include <linux/clk-provider.h> +#include <linux/delay.h> #include <linux/init.h> #include <linux/of.h> #include <linux/of_device.h> @@ -83,9 +84,20 @@ static int sun9i_mmc_reset_deassert(struct reset_controller_dev *rcdev, return 0; } +static int sun9i_mmc_reset_reset(struct reset_controller_dev *rcdev, + unsigned long id) +{ + sun9i_mmc_reset_assert(rcdev, id); + udelay(10); + sun9i_mmc_reset_deassert(rcdev, id); + + return 0; +} + static const struct reset_control_ops sun9i_mmc_reset_ops = { .assert = sun9i_mmc_reset_assert, .deassert = sun9i_mmc_reset_deassert, + .reset = sun9i_mmc_reset_reset, }; static int sun9i_a80_mmc_config_clk_probe(struct platform_device *pdev) diff --git a/drivers/gpio/gpio-reg.c b/drivers/gpio/gpio-reg.c index 23e771dba4c1..e85903eddc68 100644 --- a/drivers/gpio/gpio-reg.c +++ b/drivers/gpio/gpio-reg.c @@ -103,8 +103,8 @@ static int gpio_reg_to_irq(struct gpio_chip *gc, unsigned offset) struct gpio_reg *r = to_gpio_reg(gc); int irq = r->irqs[offset]; - if (irq >= 0 && r->irq.domain) - irq = irq_find_mapping(r->irq.domain, irq); + if (irq >= 0 && r->irqdomain) + irq = irq_find_mapping(r->irqdomain, irq); return irq; } diff --git a/drivers/gpio/gpiolib-acpi.c b/drivers/gpio/gpiolib-acpi.c index eb4528c87c0b..d6f3d9ee1350 100644 --- a/drivers/gpio/gpiolib-acpi.c +++ b/drivers/gpio/gpiolib-acpi.c @@ -1074,7 +1074,7 @@ void acpi_gpiochip_add(struct gpio_chip *chip) } if (!chip->names) - devprop_gpiochip_set_names(chip); + devprop_gpiochip_set_names(chip, dev_fwnode(chip->parent)); acpi_gpiochip_request_regions(acpi_gpio); acpi_gpiochip_scan_gpios(acpi_gpio); diff --git a/drivers/gpio/gpiolib-devprop.c b/drivers/gpio/gpiolib-devprop.c index 27f383bda7d9..f748aa3e77f7 100644 --- a/drivers/gpio/gpiolib-devprop.c +++ b/drivers/gpio/gpiolib-devprop.c @@ -19,30 +19,27 @@ /** * devprop_gpiochip_set_names - Set GPIO line names using device properties * @chip: GPIO chip whose lines should be named, if possible + * @fwnode: Property Node containing the gpio-line-names property * * Looks for device property "gpio-line-names" and if it exists assigns * GPIO line names for the chip. The memory allocated for the assigned * names belong to the underlying firmware node and should not be released * by the caller. */ -void devprop_gpiochip_set_names(struct gpio_chip *chip) +void devprop_gpiochip_set_names(struct gpio_chip *chip, + const struct fwnode_handle *fwnode) { struct gpio_device *gdev = chip->gpiodev; const char **names; int ret, i; - if (!chip->parent) { - dev_warn(&gdev->dev, "GPIO chip parent is NULL\n"); - return; - } - - ret = device_property_read_string_array(chip->parent, "gpio-line-names", + ret = fwnode_property_read_string_array(fwnode, "gpio-line-names", NULL, 0); if (ret < 0) return; if (ret != gdev->ngpio) { - dev_warn(chip->parent, + dev_warn(&gdev->dev, "names %d do not match number of GPIOs %d\n", ret, gdev->ngpio); return; @@ -52,10 +49,10 @@ void devprop_gpiochip_set_names(struct gpio_chip *chip) if (!names) return; - ret = device_property_read_string_array(chip->parent, "gpio-line-names", + ret = fwnode_property_read_string_array(fwnode, "gpio-line-names", names, gdev->ngpio); if (ret < 0) { - dev_warn(chip->parent, "failed to read GPIO line names\n"); + dev_warn(&gdev->dev, "failed to read GPIO line names\n"); kfree(names); return; } diff --git a/drivers/gpio/gpiolib-of.c b/drivers/gpio/gpiolib-of.c index e0d59e61b52f..72a0695d2ac3 100644 --- a/drivers/gpio/gpiolib-of.c +++ b/drivers/gpio/gpiolib-of.c @@ -493,7 +493,8 @@ int of_gpiochip_add(struct gpio_chip *chip) /* If the chip defines names itself, these take precedence */ if (!chip->names) - devprop_gpiochip_set_names(chip); + devprop_gpiochip_set_names(chip, + of_fwnode_handle(chip->of_node)); of_node_get(chip->of_node); diff --git a/drivers/gpio/gpiolib.h b/drivers/gpio/gpiolib.h index af48322839c3..6c44d1652139 100644 --- a/drivers/gpio/gpiolib.h +++ b/drivers/gpio/gpiolib.h @@ -228,7 +228,8 @@ static inline int gpio_chip_hwgpio(const struct gpio_desc *desc) return desc - &desc->gdev->descs[0]; } -void devprop_gpiochip_set_names(struct gpio_chip *chip); +void devprop_gpiochip_set_names(struct gpio_chip *chip, + const struct fwnode_handle *fwnode); /* With descriptor prefix */ diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index da43813d67a4..5aeb5f8816f3 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -2467,7 +2467,7 @@ static int gfx_v9_0_kiq_kcq_enable(struct amdgpu_device *adev) PACKET3_MAP_QUEUES_PIPE(ring->pipe) | PACKET3_MAP_QUEUES_ME((ring->me == 1 ? 0 : 1)) | PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | /*queue_type: normal compute queue */ - PACKET3_MAP_QUEUES_ALLOC_FORMAT(1) | /* alloc format: all_on_one_pipe */ + PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) | /* alloc format: all_on_one_pipe */ PACKET3_MAP_QUEUES_ENGINE_SEL(0) | /* engine_sel: compute */ PACKET3_MAP_QUEUES_NUM_QUEUES(1)); /* num_queues: must be 1 */ amdgpu_ring_write(kiq_ring, PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index)); diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index f71fe6d2ddda..bb5fa895fb64 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -2336,7 +2336,7 @@ create_stream_for_sink(struct amdgpu_dm_connector *aconnector, const struct dm_connector_state *dm_state) { struct drm_display_mode *preferred_mode = NULL; - const struct drm_connector *drm_connector; + struct drm_connector *drm_connector; struct dc_stream_state *stream = NULL; struct drm_display_mode mode = *drm_mode; bool native_mode_found = false; @@ -2355,11 +2355,13 @@ create_stream_for_sink(struct amdgpu_dm_connector *aconnector, if (!aconnector->dc_sink) { /* - * Exclude MST from creating fake_sink - * TODO: need to enable MST into fake_sink feature + * Create dc_sink when necessary to MST + * Don't apply fake_sink to MST */ - if (aconnector->mst_port) - goto stream_create_fail; + if (aconnector->mst_port) { + dm_dp_mst_dc_sink_create(drm_connector); + goto mst_dc_sink_create_done; + } if (create_fake_sink(aconnector)) goto stream_create_fail; @@ -2410,6 +2412,7 @@ create_stream_for_sink(struct amdgpu_dm_connector *aconnector, stream_create_fail: dm_state_null: drm_connector_null: +mst_dc_sink_create_done: return stream; } diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h index 117521c6a6ed..0230250a1164 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h @@ -189,6 +189,8 @@ struct amdgpu_dm_connector { struct mutex hpd_lock; bool fake_enable; + + bool mst_connected; }; #define to_amdgpu_dm_connector(x) container_of(x, struct amdgpu_dm_connector, base) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c index f8efb98b1fa7..638c2c2b5cd7 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c @@ -185,6 +185,42 @@ static int dm_connector_update_modes(struct drm_connector *connector, return ret; } +void dm_dp_mst_dc_sink_create(struct drm_connector *connector) +{ + struct amdgpu_dm_connector *aconnector = to_amdgpu_dm_connector(connector); + struct edid *edid; + struct dc_sink *dc_sink; + struct dc_sink_init_data init_params = { + .link = aconnector->dc_link, + .sink_signal = SIGNAL_TYPE_DISPLAY_PORT_MST }; + + edid = drm_dp_mst_get_edid(connector, &aconnector->mst_port->mst_mgr, aconnector->port); + + if (!edid) { + drm_mode_connector_update_edid_property( + &aconnector->base, + NULL); + return; + } + + aconnector->edid = edid; + + dc_sink = dc_link_add_remote_sink( + aconnector->dc_link, + (uint8_t *)aconnector->edid, + (aconnector->edid->extensions + 1) * EDID_LENGTH, + &init_params); + + dc_sink->priv = aconnector; + aconnector->dc_sink = dc_sink; + + amdgpu_dm_add_sink_to_freesync_module( + connector, aconnector->edid); + + drm_mode_connector_update_edid_property( + &aconnector->base, aconnector->edid); +} + static int dm_dp_mst_get_modes(struct drm_connector *connector) { struct amdgpu_dm_connector *aconnector = to_amdgpu_dm_connector(connector); @@ -311,6 +347,7 @@ dm_dp_add_mst_connector(struct drm_dp_mst_topology_mgr *mgr, drm_mode_connector_set_path_property(connector, pathprop); drm_connector_list_iter_end(&conn_iter); + aconnector->mst_connected = true; return &aconnector->base; } } @@ -363,6 +400,8 @@ dm_dp_add_mst_connector(struct drm_dp_mst_topology_mgr *mgr, */ amdgpu_dm_connector_funcs_reset(connector); + aconnector->mst_connected = true; + DRM_INFO("DM_MST: added connector: %p [id: %d] [master: %p]\n", aconnector, connector->base.id, aconnector->mst_port); @@ -394,6 +433,8 @@ static void dm_dp_destroy_mst_connector(struct drm_dp_mst_topology_mgr *mgr, drm_mode_connector_update_edid_property( &aconnector->base, NULL); + + aconnector->mst_connected = false; } static void dm_dp_mst_hotplug(struct drm_dp_mst_topology_mgr *mgr) @@ -404,10 +445,18 @@ static void dm_dp_mst_hotplug(struct drm_dp_mst_topology_mgr *mgr) drm_kms_helper_hotplug_event(dev); } +static void dm_dp_mst_link_status_reset(struct drm_connector *connector) +{ + mutex_lock(&connector->dev->mode_config.mutex); + drm_mode_connector_set_link_status_property(connector, DRM_MODE_LINK_STATUS_BAD); + mutex_unlock(&connector->dev->mode_config.mutex); +} + static void dm_dp_mst_register_connector(struct drm_connector *connector) { struct drm_device *dev = connector->dev; struct amdgpu_device *adev = dev->dev_private; + struct amdgpu_dm_connector *aconnector = to_amdgpu_dm_connector(connector); if (adev->mode_info.rfbdev) drm_fb_helper_add_one_connector(&adev->mode_info.rfbdev->helper, connector); @@ -416,6 +465,8 @@ static void dm_dp_mst_register_connector(struct drm_connector *connector) drm_connector_register(connector); + if (aconnector->mst_connected) + dm_dp_mst_link_status_reset(connector); } static const struct drm_dp_mst_topology_cbs dm_mst_cbs = { diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.h index 2da851b40042..8cf51da26657 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.h +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.h @@ -31,5 +31,6 @@ struct amdgpu_dm_connector; void amdgpu_dm_initialize_dp_connector(struct amdgpu_display_manager *dm, struct amdgpu_dm_connector *aconnector); +void dm_dp_mst_dc_sink_create(struct drm_connector *connector); #endif diff --git a/drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c b/drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c index 3dce35e66b09..b142629a1058 100644 --- a/drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c +++ b/drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c @@ -900,6 +900,15 @@ bool dcn_validate_bandwidth( v->override_vta_ps[input_idx] = pipe->plane_res.scl_data.taps.v_taps; v->override_hta_pschroma[input_idx] = pipe->plane_res.scl_data.taps.h_taps_c; v->override_vta_pschroma[input_idx] = pipe->plane_res.scl_data.taps.v_taps_c; + /* + * Spreadsheet doesn't handle taps_c is one properly, + * need to force Chroma to always be scaled to pass + * bandwidth validation. + */ + if (v->override_hta_pschroma[input_idx] == 1) + v->override_hta_pschroma[input_idx] = 2; + if (v->override_vta_pschroma[input_idx] == 1) + v->override_vta_pschroma[input_idx] = 2; v->source_scan[input_idx] = (pipe->plane_state->rotation % 2) ? dcn_bw_vert : dcn_bw_hor; } if (v->is_line_buffer_bpp_fixed == dcn_bw_yes) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link.c b/drivers/gpu/drm/amd/display/dc/core/dc_link.c index e27ed4a45265..42a111b9505d 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link.c @@ -1801,7 +1801,7 @@ static void disable_link(struct dc_link *link, enum signal_type signal) link->link_enc->funcs->disable_output(link->link_enc, signal, link); } -bool dp_active_dongle_validate_timing( +static bool dp_active_dongle_validate_timing( const struct dc_crtc_timing *timing, const struct dc_dongle_caps *dongle_caps) { @@ -1833,6 +1833,8 @@ bool dp_active_dongle_validate_timing( /* Check Color Depth and Pixel Clock */ if (timing->pixel_encoding == PIXEL_ENCODING_YCBCR420) required_pix_clk /= 2; + else if (timing->pixel_encoding == PIXEL_ENCODING_YCBCR422) + required_pix_clk = required_pix_clk * 2 / 3; switch (timing->display_color_depth) { case COLOR_DEPTH_666: diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c index 07ff8d2faf3f..d844fadcd56f 100644 --- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c @@ -2866,16 +2866,19 @@ static void dce110_apply_ctx_for_surface( int num_planes, struct dc_state *context) { - int i, be_idx; + int i; if (num_planes == 0) return; - be_idx = -1; for (i = 0; i < dc->res_pool->pipe_count; i++) { - if (stream == context->res_ctx.pipe_ctx[i].stream) { - be_idx = context->res_ctx.pipe_ctx[i].stream_res.tg->inst; - break; + struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i]; + struct pipe_ctx *old_pipe_ctx = &dc->current_state->res_ctx.pipe_ctx[i]; + + if (stream == pipe_ctx->stream) { + if (!pipe_ctx->top_pipe && + (pipe_ctx->plane_state || old_pipe_ctx->plane_state)) + dc->hwss.pipe_control_lock(dc, pipe_ctx, true); } } @@ -2895,9 +2898,22 @@ static void dce110_apply_ctx_for_surface( context->stream_count); dce110_program_front_end_for_pipe(dc, pipe_ctx); + + dc->hwss.update_plane_addr(dc, pipe_ctx); + program_surface_visibility(dc, pipe_ctx); } + + for (i = 0; i < dc->res_pool->pipe_count; i++) { + struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i]; + struct pipe_ctx *old_pipe_ctx = &dc->current_state->res_ctx.pipe_ctx[i]; + + if ((stream == pipe_ctx->stream) && + (!pipe_ctx->top_pipe) && + (pipe_ctx->plane_state || old_pipe_ctx->plane_state)) + dc->hwss.pipe_control_lock(dc, pipe_ctx, false); + } } static void dce110_power_down_fe(struct dc *dc, int fe_idx) diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.c index 74e7c82bdc76..a9d55d0dd69e 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.c @@ -159,11 +159,10 @@ bool dpp_get_optimal_number_of_taps( scl_data->taps.h_taps = 1; if (IDENTITY_RATIO(scl_data->ratios.vert)) scl_data->taps.v_taps = 1; - /* - * Spreadsheet doesn't handle taps_c is one properly, - * need to force Chroma to always be scaled to pass - * bandwidth validation. - */ + if (IDENTITY_RATIO(scl_data->ratios.horz_c)) + scl_data->taps.h_taps_c = 1; + if (IDENTITY_RATIO(scl_data->ratios.vert_c)) + scl_data->taps.v_taps_c = 1; } return true; diff --git a/drivers/gpu/drm/drm_lease.c b/drivers/gpu/drm/drm_lease.c index 59849f02e2ad..1402c0e71b03 100644 --- a/drivers/gpu/drm/drm_lease.c +++ b/drivers/gpu/drm/drm_lease.c @@ -220,17 +220,6 @@ static struct drm_master *drm_lease_create(struct drm_master *lessor, struct idr mutex_lock(&dev->mode_config.idr_mutex); - /* Insert the new lessee into the tree */ - id = idr_alloc(&(drm_lease_owner(lessor)->lessee_idr), lessee, 1, 0, GFP_KERNEL); - if (id < 0) { - error = id; - goto out_lessee; - } - - lessee->lessee_id = id; - lessee->lessor = drm_master_get(lessor); - list_add_tail(&lessee->lessee_list, &lessor->lessees); - idr_for_each_entry(leases, entry, object) { error = 0; if (!idr_find(&dev->mode_config.crtc_idr, object)) @@ -246,6 +235,17 @@ static struct drm_master *drm_lease_create(struct drm_master *lessor, struct idr } } + /* Insert the new lessee into the tree */ + id = idr_alloc(&(drm_lease_owner(lessor)->lessee_idr), lessee, 1, 0, GFP_KERNEL); + if (id < 0) { + error = id; + goto out_lessee; + } + + lessee->lessee_id = id; + lessee->lessor = drm_master_get(lessor); + list_add_tail(&lessee->lessee_list, &lessor->lessees); + /* Move the leases over */ lessee->leases = *leases; DRM_DEBUG_LEASE("new lessee %d %p, lessor %d %p\n", lessee->lessee_id, lessee, lessor->lessee_id, lessor); diff --git a/drivers/gpu/drm/drm_plane.c b/drivers/gpu/drm/drm_plane.c index 37a93cdffb4a..2c90519576a3 100644 --- a/drivers/gpu/drm/drm_plane.c +++ b/drivers/gpu/drm/drm_plane.c @@ -558,11 +558,10 @@ int drm_plane_check_pixel_format(const struct drm_plane *plane, u32 format) } /* - * setplane_internal - setplane handler for internal callers + * __setplane_internal - setplane handler for internal callers * - * Note that we assume an extra reference has already been taken on fb. If the - * update fails, this reference will be dropped before return; if it succeeds, - * the previous framebuffer (if any) will be unreferenced instead. + * This function will take a reference on the new fb for the plane + * on success. * * src_{x,y,w,h} are provided in 16.16 fixed point format */ @@ -630,14 +629,12 @@ static int __setplane_internal(struct drm_plane *plane, if (!ret) { plane->crtc = crtc; plane->fb = fb; - fb = NULL; + drm_framebuffer_get(plane->fb); } else { plane->old_fb = NULL; } out: - if (fb) - drm_framebuffer_put(fb); if (plane->old_fb) drm_framebuffer_put(plane->old_fb); plane->old_fb = NULL; @@ -685,6 +682,7 @@ int drm_mode_setplane(struct drm_device *dev, void *data, struct drm_plane *plane; struct drm_crtc *crtc = NULL; struct drm_framebuffer *fb = NULL; + int ret; if (!drm_core_check_feature(dev, DRIVER_MODESET)) return -EINVAL; @@ -717,15 +715,16 @@ int drm_mode_setplane(struct drm_device *dev, void *data, } } - /* - * setplane_internal will take care of deref'ing either the old or new - * framebuffer depending on success. - */ - return setplane_internal(plane, crtc, fb, - plane_req->crtc_x, plane_req->crtc_y, - plane_req->crtc_w, plane_req->crtc_h, - plane_req->src_x, plane_req->src_y, - plane_req->src_w, plane_req->src_h); + ret = setplane_internal(plane, crtc, fb, + plane_req->crtc_x, plane_req->crtc_y, + plane_req->crtc_w, plane_req->crtc_h, + plane_req->src_x, plane_req->src_y, + plane_req->src_w, plane_req->src_h); + + if (fb) + drm_framebuffer_put(fb); + + return ret; } static int drm_mode_cursor_universal(struct drm_crtc *crtc, @@ -788,13 +787,12 @@ static int drm_mode_cursor_universal(struct drm_crtc *crtc, src_h = fb->height << 16; } - /* - * setplane_internal will take care of deref'ing either the old or new - * framebuffer depending on success. - */ ret = __setplane_internal(crtc->cursor, crtc, fb, - crtc_x, crtc_y, crtc_w, crtc_h, - 0, 0, src_w, src_h, ctx); + crtc_x, crtc_y, crtc_w, crtc_h, + 0, 0, src_w, src_h, ctx); + + if (fb) + drm_framebuffer_put(fb); /* Update successful; save new cursor position, if necessary */ if (ret == 0 && req->flags & DRM_MODE_CURSOR_MOVE) { diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c index f776fc1cc543..cb4d09c70fd4 100644 --- a/drivers/gpu/drm/drm_syncobj.c +++ b/drivers/gpu/drm/drm_syncobj.c @@ -369,40 +369,26 @@ static const struct file_operations drm_syncobj_file_fops = { .release = drm_syncobj_file_release, }; -static int drm_syncobj_alloc_file(struct drm_syncobj *syncobj) -{ - struct file *file = anon_inode_getfile("syncobj_file", - &drm_syncobj_file_fops, - syncobj, 0); - if (IS_ERR(file)) - return PTR_ERR(file); - - drm_syncobj_get(syncobj); - if (cmpxchg(&syncobj->file, NULL, file)) { - /* lost the race */ - fput(file); - } - - return 0; -} - int drm_syncobj_get_fd(struct drm_syncobj *syncobj, int *p_fd) { - int ret; + struct file *file; int fd; fd = get_unused_fd_flags(O_CLOEXEC); if (fd < 0) return fd; - if (!syncobj->file) { - ret = drm_syncobj_alloc_file(syncobj); - if (ret) { - put_unused_fd(fd); - return ret; - } + file = anon_inode_getfile("syncobj_file", + &drm_syncobj_file_fops, + syncobj, 0); + if (IS_ERR(file)) { + put_unused_fd(fd); + return PTR_ERR(file); } - fd_install(fd, syncobj->file); + + drm_syncobj_get(syncobj); + fd_install(fd, file); + *p_fd = fd; return 0; } @@ -422,31 +408,24 @@ static int drm_syncobj_handle_to_fd(struct drm_file *file_private, return ret; } -static struct drm_syncobj *drm_syncobj_fdget(int fd) -{ - struct file *file = fget(fd); - - if (!file) - return NULL; - if (file->f_op != &drm_syncobj_file_fops) - goto err; - - return file->private_data; -err: - fput(file); - return NULL; -}; - static int drm_syncobj_fd_to_handle(struct drm_file *file_private, int fd, u32 *handle) { - struct drm_syncobj *syncobj = drm_syncobj_fdget(fd); + struct drm_syncobj *syncobj; + struct file *file; int ret; - if (!syncobj) + file = fget(fd); + if (!file) return -EINVAL; + if (file->f_op != &drm_syncobj_file_fops) { + fput(file); + return -EINVAL; + } + /* take a reference to put in the idr */ + syncobj = file->private_data; drm_syncobj_get(syncobj); idr_preload(GFP_KERNEL); @@ -455,12 +434,14 @@ static int drm_syncobj_fd_to_handle(struct drm_file *file_private, spin_unlock(&file_private->syncobj_table_lock); idr_preload_end(); - if (ret < 0) { - fput(syncobj->file); - return ret; - } - *handle = ret; - return 0; + if (ret > 0) { + *handle = ret; + ret = 0; + } else + drm_syncobj_put(syncobj); + + fput(file); + return ret; } static int drm_syncobj_import_sync_file_fence(struct drm_file *file_private, diff --git a/drivers/gpu/drm/i915/gvt/display.c b/drivers/gpu/drm/i915/gvt/display.c index 355120865efd..309f3fa6794a 100644 --- a/drivers/gpu/drm/i915/gvt/display.c +++ b/drivers/gpu/drm/i915/gvt/display.c @@ -266,6 +266,8 @@ static void emulate_monitor_status_change(struct intel_vgpu *vgpu) /* Clear host CRT status, so guest couldn't detect this host CRT. */ if (IS_BROADWELL(dev_priv)) vgpu_vreg(vgpu, PCH_ADPA) &= ~ADPA_CRT_HOTPLUG_MONITOR_MASK; + + vgpu_vreg(vgpu, PIPECONF(PIPE_A)) |= PIPECONF_ENABLE; } static void clean_virtual_dp_monitor(struct intel_vgpu *vgpu, int port_num) @@ -282,7 +284,6 @@ static void clean_virtual_dp_monitor(struct intel_vgpu *vgpu, int port_num) static int setup_virtual_dp_monitor(struct intel_vgpu *vgpu, int port_num, int type, unsigned int resolution) { - struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv; struct intel_vgpu_port *port = intel_vgpu_port(vgpu, port_num); if (WARN_ON(resolution >= GVT_EDID_NUM)) @@ -308,7 +309,7 @@ static int setup_virtual_dp_monitor(struct intel_vgpu *vgpu, int port_num, port->type = type; emulate_monitor_status_change(vgpu); - vgpu_vreg(vgpu, PIPECONF(PIPE_A)) |= PIPECONF_ENABLE; + return 0; } diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index ad4050f7ab3b..18de6569d04a 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -330,17 +330,10 @@ int i915_gem_object_unbind(struct drm_i915_gem_object *obj) * must wait for all rendering to complete to the object (as unbinding * must anyway), and retire the requests. */ - ret = i915_gem_object_wait(obj, - I915_WAIT_INTERRUPTIBLE | - I915_WAIT_LOCKED | - I915_WAIT_ALL, - MAX_SCHEDULE_TIMEOUT, - NULL); + ret = i915_gem_object_set_to_cpu_domain(obj, false); if (ret) return ret; - i915_gem_retire_requests(to_i915(obj->base.dev)); - while ((vma = list_first_entry_or_null(&obj->vma_list, struct i915_vma, obj_link))) { diff --git a/drivers/gpu/drm/i915/i915_sw_fence.c b/drivers/gpu/drm/i915/i915_sw_fence.c index e8ca67a129d2..ac236b88c99c 100644 --- a/drivers/gpu/drm/i915/i915_sw_fence.c +++ b/drivers/gpu/drm/i915/i915_sw_fence.c @@ -367,6 +367,7 @@ struct i915_sw_dma_fence_cb { struct dma_fence *dma; struct timer_list timer; struct irq_work work; + struct rcu_head rcu; }; static void timer_i915_sw_fence_wake(struct timer_list *t) @@ -406,7 +407,7 @@ static void irq_i915_sw_fence_work(struct irq_work *wrk) del_timer_sync(&cb->timer); dma_fence_put(cb->dma); - kfree(cb); + kfree_rcu(cb, rcu); } int i915_sw_fence_await_dma_fence(struct i915_sw_fence *fence, diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c index 5f8b9f1f40f1..bcbc7abe6693 100644 --- a/drivers/gpu/drm/i915/intel_breadcrumbs.c +++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c @@ -186,7 +186,7 @@ void intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine) struct intel_wait *wait, *n, *first; if (!b->irq_armed) - return; + goto wakeup_signaler; /* We only disarm the irq when we are idle (all requests completed), * so if the bottom-half remains asleep, it missed the request @@ -208,6 +208,14 @@ void intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine) b->waiters = RB_ROOT; spin_unlock_irq(&b->rb_lock); + + /* + * The signaling thread may be asleep holding a reference to a request, + * that had its signaling cancelled prior to being preempted. We need + * to kick the signaler, just in case, to release any such reference. + */ +wakeup_signaler: + wake_up_process(b->signaler); } static bool use_fake_irq(const struct intel_breadcrumbs *b) @@ -651,23 +659,15 @@ static int intel_breadcrumbs_signaler(void *arg) } if (unlikely(do_schedule)) { - DEFINE_WAIT(exec); - if (kthread_should_park()) kthread_parkme(); - if (kthread_should_stop()) { - GEM_BUG_ON(request); + if (unlikely(kthread_should_stop())) { + i915_gem_request_put(request); break; } - if (request) - add_wait_queue(&request->execute, &exec); - schedule(); - - if (request) - remove_wait_queue(&request->execute, &exec); } i915_gem_request_put(request); } while (1); diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c index e0843bb99169..58a3755544b2 100644 --- a/drivers/gpu/drm/i915/intel_ddi.c +++ b/drivers/gpu/drm/i915/intel_ddi.c @@ -2128,6 +2128,8 @@ static void intel_ddi_clk_select(struct intel_encoder *encoder, if (WARN_ON(!pll)) return; + mutex_lock(&dev_priv->dpll_lock); + if (IS_CANNONLAKE(dev_priv)) { /* Configure DPCLKA_CFGCR0 to map the DPLL to the DDI. */ val = I915_READ(DPCLKA_CFGCR0); @@ -2157,6 +2159,8 @@ static void intel_ddi_clk_select(struct intel_encoder *encoder, } else if (INTEL_INFO(dev_priv)->gen < 9) { I915_WRITE(PORT_CLK_SEL(port), hsw_pll_to_ddi_pll_sel(pll)); } + + mutex_unlock(&dev_priv->dpll_lock); } static void intel_ddi_clk_disable(struct intel_encoder *encoder) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index e8ccf89cb17b..30cf273d57aa 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -9944,11 +9944,10 @@ found: } ret = intel_modeset_setup_plane_state(state, crtc, mode, fb, 0, 0); + drm_framebuffer_put(fb); if (ret) goto fail; - drm_framebuffer_put(fb); - ret = drm_atomic_set_mode_for_crtc(&crtc_state->base, mode); if (ret) goto fail; @@ -13195,7 +13194,7 @@ intel_primary_plane_create(struct drm_i915_private *dev_priv, enum pipe pipe) primary->frontbuffer_bit = INTEL_FRONTBUFFER_PRIMARY(pipe); primary->check_plane = intel_check_primary_plane; - if (INTEL_GEN(dev_priv) >= 10 || IS_GEMINILAKE(dev_priv)) { + if (INTEL_GEN(dev_priv) >= 10) { intel_primary_formats = skl_primary_formats; num_formats = ARRAY_SIZE(skl_primary_formats); modifiers = skl_format_modifiers_ccs; diff --git a/drivers/gpu/drm/i915/intel_lpe_audio.c b/drivers/gpu/drm/i915/intel_lpe_audio.c index 3bf65288ffff..5809b29044fc 100644 --- a/drivers/gpu/drm/i915/intel_lpe_audio.c +++ b/drivers/gpu/drm/i915/intel_lpe_audio.c @@ -193,7 +193,7 @@ static bool lpe_audio_detect(struct drm_i915_private *dev_priv) }; if (!pci_dev_present(atom_hdaudio_ids)) { - DRM_INFO("%s\n", "HDaudio controller not detected, using LPE audio instead\n"); + DRM_INFO("HDaudio controller not detected, using LPE audio instead\n"); lpe_present = true; } } diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c index 2615912430cc..435ff8662cfa 100644 --- a/drivers/gpu/drm/nouveau/nouveau_bo.c +++ b/drivers/gpu/drm/nouveau/nouveau_bo.c @@ -224,7 +224,7 @@ nouveau_bo_new(struct nouveau_cli *cli, u64 size, int align, /* Determine if we can get a cache-coherent map, forcing * uncached mapping if we can't. */ - if (mmu->type[drm->ttm.type_host].type & NVIF_MEM_UNCACHED) + if (!nouveau_drm_use_coherent_gpu_mapping(drm)) nvbo->force_coherent = true; } @@ -262,7 +262,8 @@ nouveau_bo_new(struct nouveau_cli *cli, u64 size, int align, if (cli->device.info.family > NV_DEVICE_INFO_V0_CURIE && (flags & TTM_PL_FLAG_VRAM) && !vmm->page[i].vram) continue; - if ((flags & TTM_PL_FLAG_TT ) && !vmm->page[i].host) + if ((flags & TTM_PL_FLAG_TT) && + (!vmm->page[i].host || vmm->page[i].shift > PAGE_SHIFT)) continue; /* Select this page size if it's the first that supports diff --git a/drivers/gpu/drm/nouveau/nouveau_drm.c b/drivers/gpu/drm/nouveau/nouveau_drm.c index 8d4a5be3b913..56fe261b6268 100644 --- a/drivers/gpu/drm/nouveau/nouveau_drm.c +++ b/drivers/gpu/drm/nouveau/nouveau_drm.c @@ -152,9 +152,9 @@ nouveau_cli_work_queue(struct nouveau_cli *cli, struct dma_fence *fence, work->cli = cli; mutex_lock(&cli->lock); list_add_tail(&work->head, &cli->worker); - mutex_unlock(&cli->lock); if (dma_fence_add_callback(fence, &work->cb, nouveau_cli_work_fence)) nouveau_cli_work_fence(fence, &work->cb); + mutex_unlock(&cli->lock); } static void diff --git a/drivers/gpu/drm/nouveau/nouveau_drv.h b/drivers/gpu/drm/nouveau/nouveau_drv.h index 3331e82ae9e7..96f6bd8aee5d 100644 --- a/drivers/gpu/drm/nouveau/nouveau_drv.h +++ b/drivers/gpu/drm/nouveau/nouveau_drv.h @@ -157,8 +157,8 @@ struct nouveau_drm { struct nvif_object copy; int mtrr; int type_vram; - int type_host; - int type_ncoh; + int type_host[2]; + int type_ncoh[2]; } ttm; /* GEM interface support */ @@ -217,6 +217,13 @@ nouveau_drm(struct drm_device *dev) return dev->dev_private; } +static inline bool +nouveau_drm_use_coherent_gpu_mapping(struct nouveau_drm *drm) +{ + struct nvif_mmu *mmu = &drm->client.mmu; + return !(mmu->type[drm->ttm.type_host[0]].type & NVIF_MEM_UNCACHED); +} + int nouveau_pmops_suspend(struct device *); int nouveau_pmops_resume(struct device *); bool nouveau_pmops_runtime(void); diff --git a/drivers/gpu/drm/nouveau/nouveau_fbcon.c b/drivers/gpu/drm/nouveau/nouveau_fbcon.c index c533d8e04afc..be7357bf2246 100644 --- a/drivers/gpu/drm/nouveau/nouveau_fbcon.c +++ b/drivers/gpu/drm/nouveau/nouveau_fbcon.c @@ -429,7 +429,7 @@ nouveau_fbcon_destroy(struct drm_device *dev, struct nouveau_fbdev *fbcon) drm_fb_helper_unregister_fbi(&fbcon->helper); drm_fb_helper_fini(&fbcon->helper); - if (nouveau_fb->nvbo) { + if (nouveau_fb && nouveau_fb->nvbo) { nouveau_vma_del(&nouveau_fb->vma); nouveau_bo_unmap(nouveau_fb->nvbo); nouveau_bo_unpin(nouveau_fb->nvbo); diff --git a/drivers/gpu/drm/nouveau/nouveau_mem.c b/drivers/gpu/drm/nouveau/nouveau_mem.c index 589a9621db76..c002f8968507 100644 --- a/drivers/gpu/drm/nouveau/nouveau_mem.c +++ b/drivers/gpu/drm/nouveau/nouveau_mem.c @@ -103,10 +103,10 @@ nouveau_mem_host(struct ttm_mem_reg *reg, struct ttm_dma_tt *tt) u8 type; int ret; - if (mmu->type[drm->ttm.type_host].type & NVIF_MEM_UNCACHED) - type = drm->ttm.type_ncoh; + if (!nouveau_drm_use_coherent_gpu_mapping(drm)) + type = drm->ttm.type_ncoh[!!mem->kind]; else - type = drm->ttm.type_host; + type = drm->ttm.type_host[0]; if (mem->kind && !(mmu->type[type].type & NVIF_MEM_KIND)) mem->comp = mem->kind = 0; diff --git a/drivers/gpu/drm/nouveau/nouveau_ttm.c b/drivers/gpu/drm/nouveau/nouveau_ttm.c index 08b974b30482..dff51a0ee028 100644 --- a/drivers/gpu/drm/nouveau/nouveau_ttm.c +++ b/drivers/gpu/drm/nouveau/nouveau_ttm.c @@ -235,27 +235,46 @@ nouveau_ttm_global_release(struct nouveau_drm *drm) drm->ttm.mem_global_ref.release = NULL; } -int -nouveau_ttm_init(struct nouveau_drm *drm) +static int +nouveau_ttm_init_host(struct nouveau_drm *drm, u8 kind) { - struct nvkm_device *device = nvxx_device(&drm->client.device); - struct nvkm_pci *pci = device->pci; struct nvif_mmu *mmu = &drm->client.mmu; - struct drm_device *dev = drm->dev; - int typei, ret; + int typei; typei = nvif_mmu_type(mmu, NVIF_MEM_HOST | NVIF_MEM_MAPPABLE | - NVIF_MEM_COHERENT); + kind | NVIF_MEM_COHERENT); if (typei < 0) return -ENOSYS; - drm->ttm.type_host = typei; + drm->ttm.type_host[!!kind] = typei; - typei = nvif_mmu_type(mmu, NVIF_MEM_HOST | NVIF_MEM_MAPPABLE); + typei = nvif_mmu_type(mmu, NVIF_MEM_HOST | NVIF_MEM_MAPPABLE | kind); if (typei < 0) return -ENOSYS; - drm->ttm.type_ncoh = typei; + drm->ttm.type_ncoh[!!kind] = typei; + return 0; +} + +int +nouveau_ttm_init(struct nouveau_drm *drm) +{ + struct nvkm_device *device = nvxx_device(&drm->client.device); + struct nvkm_pci *pci = device->pci; + struct nvif_mmu *mmu = &drm->client.mmu; + struct drm_device *dev = drm->dev; + int typei, ret; + + ret = nouveau_ttm_init_host(drm, 0); + if (ret) + return ret; + + if (drm->client.device.info.family >= NV_DEVICE_INFO_V0_TESLA && + drm->client.device.info.chipset != 0x50) { + ret = nouveau_ttm_init_host(drm, NVIF_MEM_KIND); + if (ret) + return ret; + } if (drm->client.device.info.platform != NV_DEVICE_INFO_V0_SOC && drm->client.device.info.family >= NV_DEVICE_INFO_V0_TESLA) { diff --git a/drivers/gpu/drm/nouveau/nouveau_vmm.c b/drivers/gpu/drm/nouveau/nouveau_vmm.c index 9e2628dd8e4d..f5371d96b003 100644 --- a/drivers/gpu/drm/nouveau/nouveau_vmm.c +++ b/drivers/gpu/drm/nouveau/nouveau_vmm.c @@ -67,8 +67,8 @@ nouveau_vma_del(struct nouveau_vma **pvma) nvif_vmm_put(&vma->vmm->vmm, &tmp); } list_del(&vma->head); - *pvma = NULL; kfree(*pvma); + *pvma = NULL; } } diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c index e14643615698..00eeaaffeae5 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c @@ -2369,7 +2369,7 @@ nv13b_chipset = { .imem = gk20a_instmem_new, .ltc = gp100_ltc_new, .mc = gp10b_mc_new, - .mmu = gf100_mmu_new, + .mmu = gp10b_mmu_new, .secboot = gp10b_secboot_new, .pmu = gm20b_pmu_new, .timer = gk20a_timer_new, diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/dp.c b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/dp.c index 972370ed36f0..7c7efa4ea0d0 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/dp.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/dp.c @@ -36,6 +36,7 @@ nvbios_dp_table(struct nvkm_bios *bios, u8 *ver, u8 *hdr, u8 *cnt, u8 *len) if (data) { *ver = nvbios_rd08(bios, data + 0x00); switch (*ver) { + case 0x20: case 0x21: case 0x30: case 0x40: @@ -63,6 +64,7 @@ nvbios_dpout_entry(struct nvkm_bios *bios, u8 idx, if (data && idx < *cnt) { u16 outp = nvbios_rd16(bios, data + *hdr + idx * *len); switch (*ver * !!outp) { + case 0x20: case 0x21: case 0x30: *hdr = nvbios_rd08(bios, data + 0x04); @@ -96,12 +98,16 @@ nvbios_dpout_parse(struct nvkm_bios *bios, u8 idx, info->type = nvbios_rd16(bios, data + 0x00); info->mask = nvbios_rd16(bios, data + 0x02); switch (*ver) { + case 0x20: + info->mask |= 0x00c0; /* match any link */ + /* fall-through */ case 0x21: case 0x30: info->flags = nvbios_rd08(bios, data + 0x05); info->script[0] = nvbios_rd16(bios, data + 0x06); info->script[1] = nvbios_rd16(bios, data + 0x08); - info->lnkcmp = nvbios_rd16(bios, data + 0x0a); + if (*len >= 0x0c) + info->lnkcmp = nvbios_rd16(bios, data + 0x0a); if (*len >= 0x0f) { info->script[2] = nvbios_rd16(bios, data + 0x0c); info->script[3] = nvbios_rd16(bios, data + 0x0e); @@ -170,6 +176,7 @@ nvbios_dpcfg_parse(struct nvkm_bios *bios, u16 outp, u8 idx, memset(info, 0x00, sizeof(*info)); if (data) { switch (*ver) { + case 0x20: case 0x21: info->dc = nvbios_rd08(bios, data + 0x02); info->pe = nvbios_rd08(bios, data + 0x03); diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/nv50.c b/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/nv50.c index 1ba7289684aa..db48a1daca0c 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/nv50.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/nv50.c @@ -249,7 +249,7 @@ nv50_instobj_acquire(struct nvkm_memory *memory) iobj->base.memory.ptrs = &nv50_instobj_fast; else iobj->base.memory.ptrs = &nv50_instobj_slow; - refcount_inc(&iobj->maps); + refcount_set(&iobj->maps, 1); } mutex_unlock(&imem->subdev.mutex); diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/pci/base.c b/drivers/gpu/drm/nouveau/nvkm/subdev/pci/base.c index b1b1f3626b96..deb96de54b00 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/pci/base.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/pci/base.c @@ -136,6 +136,13 @@ nvkm_pci_init(struct nvkm_subdev *subdev) return ret; pci->irq = pdev->irq; + + /* Ensure MSI interrupts are armed, for the case where there are + * already interrupts pending (for whatever reason) at load time. + */ + if (pci->msi) + pci->func->msi_rearm(pci); + return ret; } diff --git a/drivers/gpu/drm/sun4i/sun4i_hdmi_enc.c b/drivers/gpu/drm/sun4i/sun4i_hdmi_enc.c index dda904ec0534..500b6fb3e028 100644 --- a/drivers/gpu/drm/sun4i/sun4i_hdmi_enc.c +++ b/drivers/gpu/drm/sun4i/sun4i_hdmi_enc.c @@ -175,11 +175,31 @@ static void sun4i_hdmi_mode_set(struct drm_encoder *encoder, writel(val, hdmi->base + SUN4I_HDMI_VID_TIMING_POL_REG); } +static enum drm_mode_status sun4i_hdmi_mode_valid(struct drm_encoder *encoder, + const struct drm_display_mode *mode) +{ + struct sun4i_hdmi *hdmi = drm_encoder_to_sun4i_hdmi(encoder); + unsigned long rate = mode->clock * 1000; + unsigned long diff = rate / 200; /* +-0.5% allowed by HDMI spec */ + long rounded_rate; + + /* 165 MHz is the typical max pixelclock frequency for HDMI <= 1.2 */ + if (rate > 165000000) + return MODE_CLOCK_HIGH; + rounded_rate = clk_round_rate(hdmi->tmds_clk, rate); + if (rounded_rate > 0 && + max_t(unsigned long, rounded_rate, rate) - + min_t(unsigned long, rounded_rate, rate) < diff) + return MODE_OK; + return MODE_NOCLOCK; +} + static const struct drm_encoder_helper_funcs sun4i_hdmi_helper_funcs = { .atomic_check = sun4i_hdmi_atomic_check, .disable = sun4i_hdmi_disable, .enable = sun4i_hdmi_enable, .mode_set = sun4i_hdmi_mode_set, + .mode_valid = sun4i_hdmi_mode_valid, }; static const struct drm_encoder_funcs sun4i_hdmi_funcs = { diff --git a/drivers/gpu/drm/sun4i/sun4i_tcon.c b/drivers/gpu/drm/sun4i/sun4i_tcon.c index e122f5b2a395..f4284b51bdca 100644 --- a/drivers/gpu/drm/sun4i/sun4i_tcon.c +++ b/drivers/gpu/drm/sun4i/sun4i_tcon.c @@ -724,12 +724,12 @@ static int sun4i_tcon_bind(struct device *dev, struct device *master, if (IS_ERR(tcon->crtc)) { dev_err(dev, "Couldn't create our CRTC\n"); ret = PTR_ERR(tcon->crtc); - goto err_free_clocks; + goto err_free_dotclock; } ret = sun4i_rgb_init(drm, tcon); if (ret < 0) - goto err_free_clocks; + goto err_free_dotclock; if (tcon->quirks->needs_de_be_mux) { /* diff --git a/drivers/gpu/drm/ttm/ttm_page_alloc.c b/drivers/gpu/drm/ttm/ttm_page_alloc.c index 44343a2bf55c..b5ba6441489f 100644 --- a/drivers/gpu/drm/ttm/ttm_page_alloc.c +++ b/drivers/gpu/drm/ttm/ttm_page_alloc.c @@ -455,6 +455,7 @@ ttm_pool_shrink_scan(struct shrinker *shrink, struct shrink_control *sc) freed += (nr_free_pool - shrink_pages) << pool->order; if (freed >= sc->nr_to_scan) break; + shrink_pages <<= pool->order; } mutex_unlock(&lock); return freed; @@ -543,7 +544,7 @@ static int ttm_alloc_new_pages(struct list_head *pages, gfp_t gfp_flags, int r = 0; unsigned i, j, cpages; unsigned npages = 1 << order; - unsigned max_cpages = min(count, (unsigned)NUM_PAGES_TO_ALLOC); + unsigned max_cpages = min(count << order, (unsigned)NUM_PAGES_TO_ALLOC); /* allocate array for page caching change */ caching_array = kmalloc(max_cpages*sizeof(struct page *), GFP_KERNEL); diff --git a/drivers/hwmon/hwmon.c b/drivers/hwmon/hwmon.c index c9790e2c3440..af5123042990 100644 --- a/drivers/hwmon/hwmon.c +++ b/drivers/hwmon/hwmon.c @@ -143,6 +143,7 @@ static int hwmon_thermal_add_sensor(struct device *dev, struct hwmon_device *hwdev, int index) { struct hwmon_thermal_data *tdata; + struct thermal_zone_device *tzd; tdata = devm_kzalloc(dev, sizeof(*tdata), GFP_KERNEL); if (!tdata) @@ -151,8 +152,14 @@ static int hwmon_thermal_add_sensor(struct device *dev, tdata->hwdev = hwdev; tdata->index = index; - devm_thermal_zone_of_sensor_register(&hwdev->dev, index, tdata, - &hwmon_thermal_ops); + tzd = devm_thermal_zone_of_sensor_register(&hwdev->dev, index, tdata, + &hwmon_thermal_ops); + /* + * If CONFIG_THERMAL_OF is disabled, this returns -ENODEV, + * so ignore that error but forward any other error. + */ + if (IS_ERR(tzd) && (PTR_ERR(tzd) != -ENODEV)) + return PTR_ERR(tzd); return 0; } @@ -621,14 +628,20 @@ __hwmon_device_register(struct device *dev, const char *name, void *drvdata, if (!chip->ops->is_visible(drvdata, hwmon_temp, hwmon_temp_input, j)) continue; - if (info[i]->config[j] & HWMON_T_INPUT) - hwmon_thermal_add_sensor(dev, hwdev, j); + if (info[i]->config[j] & HWMON_T_INPUT) { + err = hwmon_thermal_add_sensor(dev, + hwdev, j); + if (err) + goto free_device; + } } } } return hdev; +free_device: + device_unregister(hdev); free_hwmon: kfree(hwdev); ida_remove: diff --git a/drivers/infiniband/core/security.c b/drivers/infiniband/core/security.c index feafdb961c48..59b2f96d986a 100644 --- a/drivers/infiniband/core/security.c +++ b/drivers/infiniband/core/security.c @@ -386,6 +386,9 @@ int ib_open_shared_qp_security(struct ib_qp *qp, struct ib_device *dev) if (ret) return ret; + if (!qp->qp_sec) + return 0; + mutex_lock(&real_qp->qp_sec->mutex); ret = check_qp_port_pkey_settings(real_qp->qp_sec->ports_pkeys, qp->qp_sec); diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index d0202bb176a4..840b24096690 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -2074,8 +2074,8 @@ int ib_uverbs_ex_modify_qp(struct ib_uverbs_file *file, return -EOPNOTSUPP; if (ucore->inlen > sizeof(cmd)) { - if (ib_is_udata_cleared(ucore, sizeof(cmd), - ucore->inlen - sizeof(cmd))) + if (!ib_is_udata_cleared(ucore, sizeof(cmd), + ucore->inlen - sizeof(cmd))) return -EOPNOTSUPP; } diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index 3fb8fb6cc824..e36d27ed4daa 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -1438,7 +1438,8 @@ int ib_close_qp(struct ib_qp *qp) spin_unlock_irqrestore(&real_qp->device->event_handler_lock, flags); atomic_dec(&real_qp->usecnt); - ib_close_shared_qp_security(qp->qp_sec); + if (qp->qp_sec) + ib_close_shared_qp_security(qp->qp_sec); kfree(qp); return 0; diff --git a/drivers/infiniband/hw/cxgb4/cq.c b/drivers/infiniband/hw/cxgb4/cq.c index b7bfc536e00f..6f2b26126c64 100644 --- a/drivers/infiniband/hw/cxgb4/cq.c +++ b/drivers/infiniband/hw/cxgb4/cq.c @@ -395,7 +395,7 @@ next_cqe: static int cqe_completes_wr(struct t4_cqe *cqe, struct t4_wq *wq) { - if (CQE_OPCODE(cqe) == C4IW_DRAIN_OPCODE) { + if (DRAIN_CQE(cqe)) { WARN_ONCE(1, "Unexpected DRAIN CQE qp id %u!\n", wq->sq.qid); return 0; } @@ -494,7 +494,7 @@ static int poll_cq(struct t4_wq *wq, struct t4_cq *cq, struct t4_cqe *cqe, /* * Special cqe for drain WR completions... */ - if (CQE_OPCODE(hw_cqe) == C4IW_DRAIN_OPCODE) { + if (DRAIN_CQE(hw_cqe)) { *cookie = CQE_DRAIN_COOKIE(hw_cqe); *cqe = *hw_cqe; goto skip_cqe; @@ -571,10 +571,10 @@ static int poll_cq(struct t4_wq *wq, struct t4_cq *cq, struct t4_cqe *cqe, ret = -EAGAIN; goto skip_cqe; } - if (unlikely((CQE_WRID_MSN(hw_cqe) != (wq->rq.msn)))) { + if (unlikely(!CQE_STATUS(hw_cqe) && + CQE_WRID_MSN(hw_cqe) != wq->rq.msn)) { t4_set_wq_in_error(wq); - hw_cqe->header |= htonl(CQE_STATUS_V(T4_ERR_MSN)); - goto proc_cqe; + hw_cqe->header |= cpu_to_be32(CQE_STATUS_V(T4_ERR_MSN)); } goto proc_cqe; } @@ -748,9 +748,6 @@ static int c4iw_poll_cq_one(struct c4iw_cq *chp, struct ib_wc *wc) c4iw_invalidate_mr(qhp->rhp, CQE_WRID_FR_STAG(&cqe)); break; - case C4IW_DRAIN_OPCODE: - wc->opcode = IB_WC_SEND; - break; default: pr_err("Unexpected opcode %d in the CQE received for QPID=0x%0x\n", CQE_OPCODE(&cqe), CQE_QPID(&cqe)); diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h index 470f97a79ebb..65dd3726ca02 100644 --- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h +++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h @@ -693,8 +693,6 @@ static inline int to_ib_qp_state(int c4iw_qp_state) return IB_QPS_ERR; } -#define C4IW_DRAIN_OPCODE FW_RI_SGE_EC_CR_RETURN - static inline u32 c4iw_ib_to_tpt_access(int a) { return (a & IB_ACCESS_REMOTE_WRITE ? FW_RI_MEM_ACCESS_REM_WRITE : 0) | diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c index 38bddd02a943..d5c92fc520d6 100644 --- a/drivers/infiniband/hw/cxgb4/qp.c +++ b/drivers/infiniband/hw/cxgb4/qp.c @@ -790,21 +790,57 @@ static int ring_kernel_rq_db(struct c4iw_qp *qhp, u16 inc) return 0; } -static void complete_sq_drain_wr(struct c4iw_qp *qhp, struct ib_send_wr *wr) +static int ib_to_fw_opcode(int ib_opcode) +{ + int opcode; + + switch (ib_opcode) { + case IB_WR_SEND_WITH_INV: + opcode = FW_RI_SEND_WITH_INV; + break; + case IB_WR_SEND: + opcode = FW_RI_SEND; + break; + case IB_WR_RDMA_WRITE: + opcode = FW_RI_RDMA_WRITE; + break; + case IB_WR_RDMA_READ: + case IB_WR_RDMA_READ_WITH_INV: + opcode = FW_RI_READ_REQ; + break; + case IB_WR_REG_MR: + opcode = FW_RI_FAST_REGISTER; + break; + case IB_WR_LOCAL_INV: + opcode = FW_RI_LOCAL_INV; + break; + default: + opcode = -EINVAL; + } + return opcode; +} + +static int complete_sq_drain_wr(struct c4iw_qp *qhp, struct ib_send_wr *wr) { struct t4_cqe cqe = {}; struct c4iw_cq *schp; unsigned long flag; struct t4_cq *cq; + int opcode; schp = to_c4iw_cq(qhp->ibqp.send_cq); cq = &schp->cq; + opcode = ib_to_fw_opcode(wr->opcode); + if (opcode < 0) + return opcode; + cqe.u.drain_cookie = wr->wr_id; cqe.header = cpu_to_be32(CQE_STATUS_V(T4_ERR_SWFLUSH) | - CQE_OPCODE_V(C4IW_DRAIN_OPCODE) | + CQE_OPCODE_V(opcode) | CQE_TYPE_V(1) | CQE_SWCQE_V(1) | + CQE_DRAIN_V(1) | CQE_QPID_V(qhp->wq.sq.qid)); spin_lock_irqsave(&schp->lock, flag); @@ -819,6 +855,23 @@ static void complete_sq_drain_wr(struct c4iw_qp *qhp, struct ib_send_wr *wr) schp->ibcq.cq_context); spin_unlock_irqrestore(&schp->comp_handler_lock, flag); } + return 0; +} + +static int complete_sq_drain_wrs(struct c4iw_qp *qhp, struct ib_send_wr *wr, + struct ib_send_wr **bad_wr) +{ + int ret = 0; + + while (wr) { + ret = complete_sq_drain_wr(qhp, wr); + if (ret) { + *bad_wr = wr; + break; + } + wr = wr->next; + } + return ret; } static void complete_rq_drain_wr(struct c4iw_qp *qhp, struct ib_recv_wr *wr) @@ -833,9 +886,10 @@ static void complete_rq_drain_wr(struct c4iw_qp *qhp, struct ib_recv_wr *wr) cqe.u.drain_cookie = wr->wr_id; cqe.header = cpu_to_be32(CQE_STATUS_V(T4_ERR_SWFLUSH) | - CQE_OPCODE_V(C4IW_DRAIN_OPCODE) | + CQE_OPCODE_V(FW_RI_SEND) | CQE_TYPE_V(0) | CQE_SWCQE_V(1) | + CQE_DRAIN_V(1) | CQE_QPID_V(qhp->wq.sq.qid)); spin_lock_irqsave(&rchp->lock, flag); @@ -852,6 +906,14 @@ static void complete_rq_drain_wr(struct c4iw_qp *qhp, struct ib_recv_wr *wr) } } +static void complete_rq_drain_wrs(struct c4iw_qp *qhp, struct ib_recv_wr *wr) +{ + while (wr) { + complete_rq_drain_wr(qhp, wr); + wr = wr->next; + } +} + int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, struct ib_send_wr **bad_wr) { @@ -875,7 +937,7 @@ int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, */ if (qhp->wq.flushed) { spin_unlock_irqrestore(&qhp->lock, flag); - complete_sq_drain_wr(qhp, wr); + err = complete_sq_drain_wrs(qhp, wr, bad_wr); return err; } num_wrs = t4_sq_avail(&qhp->wq); @@ -1023,7 +1085,7 @@ int c4iw_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr, */ if (qhp->wq.flushed) { spin_unlock_irqrestore(&qhp->lock, flag); - complete_rq_drain_wr(qhp, wr); + complete_rq_drain_wrs(qhp, wr); return err; } num_wrs = t4_rq_avail(&qhp->wq); diff --git a/drivers/infiniband/hw/cxgb4/t4.h b/drivers/infiniband/hw/cxgb4/t4.h index e9ea94268d51..79e8ee12c391 100644 --- a/drivers/infiniband/hw/cxgb4/t4.h +++ b/drivers/infiniband/hw/cxgb4/t4.h @@ -197,6 +197,11 @@ struct t4_cqe { #define CQE_SWCQE_G(x) ((((x) >> CQE_SWCQE_S)) & CQE_SWCQE_M) #define CQE_SWCQE_V(x) ((x)<<CQE_SWCQE_S) +#define CQE_DRAIN_S 10 +#define CQE_DRAIN_M 0x1 +#define CQE_DRAIN_G(x) ((((x) >> CQE_DRAIN_S)) & CQE_DRAIN_M) +#define CQE_DRAIN_V(x) ((x)<<CQE_DRAIN_S) + #define CQE_STATUS_S 5 #define CQE_STATUS_M 0x1F #define CQE_STATUS_G(x) ((((x) >> CQE_STATUS_S)) & CQE_STATUS_M) @@ -213,6 +218,7 @@ struct t4_cqe { #define CQE_OPCODE_V(x) ((x)<<CQE_OPCODE_S) #define SW_CQE(x) (CQE_SWCQE_G(be32_to_cpu((x)->header))) +#define DRAIN_CQE(x) (CQE_DRAIN_G(be32_to_cpu((x)->header))) #define CQE_QPID(x) (CQE_QPID_G(be32_to_cpu((x)->header))) #define CQE_TYPE(x) (CQE_TYPE_G(be32_to_cpu((x)->header))) #define SQ_TYPE(x) (CQE_TYPE((x))) diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h index 4a9b4d7efe63..8ce9118d4a7f 100644 --- a/drivers/infiniband/hw/hfi1/hfi.h +++ b/drivers/infiniband/hw/hfi1/hfi.h @@ -1131,7 +1131,6 @@ struct hfi1_devdata { u16 pcie_lnkctl; u16 pcie_devctl2; u32 pci_msix0; - u32 pci_lnkctl3; u32 pci_tph2; /* diff --git a/drivers/infiniband/hw/hfi1/pcie.c b/drivers/infiniband/hw/hfi1/pcie.c index 09e50fd2a08f..8c7e7a60b715 100644 --- a/drivers/infiniband/hw/hfi1/pcie.c +++ b/drivers/infiniband/hw/hfi1/pcie.c @@ -411,15 +411,12 @@ int restore_pci_variables(struct hfi1_devdata *dd) if (ret) goto error; - ret = pci_write_config_dword(dd->pcidev, PCIE_CFG_SPCIE1, - dd->pci_lnkctl3); - if (ret) - goto error; - - ret = pci_write_config_dword(dd->pcidev, PCIE_CFG_TPH2, dd->pci_tph2); - if (ret) - goto error; - + if (pci_find_ext_capability(dd->pcidev, PCI_EXT_CAP_ID_TPH)) { + ret = pci_write_config_dword(dd->pcidev, PCIE_CFG_TPH2, + dd->pci_tph2); + if (ret) + goto error; + } return 0; error: @@ -469,15 +466,12 @@ int save_pci_variables(struct hfi1_devdata *dd) if (ret) goto error; - ret = pci_read_config_dword(dd->pcidev, PCIE_CFG_SPCIE1, - &dd->pci_lnkctl3); - if (ret) - goto error; - - ret = pci_read_config_dword(dd->pcidev, PCIE_CFG_TPH2, &dd->pci_tph2); - if (ret) - goto error; - + if (pci_find_ext_capability(dd->pcidev, PCI_EXT_CAP_ID_TPH)) { + ret = pci_read_config_dword(dd->pcidev, PCIE_CFG_TPH2, + &dd->pci_tph2); + if (ret) + goto error; + } return 0; error: diff --git a/drivers/infiniband/hw/mlx5/cmd.c b/drivers/infiniband/hw/mlx5/cmd.c index 470995fa38d2..6f6712f87a73 100644 --- a/drivers/infiniband/hw/mlx5/cmd.c +++ b/drivers/infiniband/hw/mlx5/cmd.c @@ -47,17 +47,6 @@ int mlx5_cmd_null_mkey(struct mlx5_core_dev *dev, u32 *null_mkey) return err; } -int mlx5_cmd_query_cong_counter(struct mlx5_core_dev *dev, - bool reset, void *out, int out_size) -{ - u32 in[MLX5_ST_SZ_DW(query_cong_statistics_in)] = { }; - - MLX5_SET(query_cong_statistics_in, in, opcode, - MLX5_CMD_OP_QUERY_CONG_STATISTICS); - MLX5_SET(query_cong_statistics_in, in, clear, reset); - return mlx5_cmd_exec(dev, in, sizeof(in), out, out_size); -} - int mlx5_cmd_query_cong_params(struct mlx5_core_dev *dev, int cong_point, void *out, int out_size) { diff --git a/drivers/infiniband/hw/mlx5/cmd.h b/drivers/infiniband/hw/mlx5/cmd.h index af4c24596274..78ffded7cc2c 100644 --- a/drivers/infiniband/hw/mlx5/cmd.h +++ b/drivers/infiniband/hw/mlx5/cmd.h @@ -37,8 +37,6 @@ #include <linux/mlx5/driver.h> int mlx5_cmd_null_mkey(struct mlx5_core_dev *dev, u32 *null_mkey); -int mlx5_cmd_query_cong_counter(struct mlx5_core_dev *dev, - bool reset, void *out, int out_size); int mlx5_cmd_query_cong_params(struct mlx5_core_dev *dev, int cong_point, void *out, int out_size); int mlx5_cmd_modify_cong_params(struct mlx5_core_dev *mdev, diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 543d0a4c8bf3..8ac50de2b242 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -1463,6 +1463,7 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev, } INIT_LIST_HEAD(&context->vma_private_list); + mutex_init(&context->vma_private_list_mutex); INIT_LIST_HEAD(&context->db_page_list); mutex_init(&context->db_page_mutex); @@ -1624,7 +1625,9 @@ static void mlx5_ib_vma_close(struct vm_area_struct *area) * mlx5_ib_disassociate_ucontext(). */ mlx5_ib_vma_priv_data->vma = NULL; + mutex_lock(mlx5_ib_vma_priv_data->vma_private_list_mutex); list_del(&mlx5_ib_vma_priv_data->list); + mutex_unlock(mlx5_ib_vma_priv_data->vma_private_list_mutex); kfree(mlx5_ib_vma_priv_data); } @@ -1644,10 +1647,13 @@ static int mlx5_ib_set_vma_data(struct vm_area_struct *vma, return -ENOMEM; vma_prv->vma = vma; + vma_prv->vma_private_list_mutex = &ctx->vma_private_list_mutex; vma->vm_private_data = vma_prv; vma->vm_ops = &mlx5_ib_vm_ops; + mutex_lock(&ctx->vma_private_list_mutex); list_add(&vma_prv->list, vma_head); + mutex_unlock(&ctx->vma_private_list_mutex); return 0; } @@ -1690,6 +1696,7 @@ static void mlx5_ib_disassociate_ucontext(struct ib_ucontext *ibcontext) * mlx5_ib_vma_close. */ down_write(&owning_mm->mmap_sem); + mutex_lock(&context->vma_private_list_mutex); list_for_each_entry_safe(vma_private, n, &context->vma_private_list, list) { vma = vma_private->vma; @@ -1704,6 +1711,7 @@ static void mlx5_ib_disassociate_ucontext(struct ib_ucontext *ibcontext) list_del(&vma_private->list); kfree(vma_private); } + mutex_unlock(&context->vma_private_list_mutex); up_write(&owning_mm->mmap_sem); mmput(owning_mm); put_task_struct(owning_process); @@ -3737,34 +3745,6 @@ free: return ret; } -static int mlx5_ib_query_cong_counters(struct mlx5_ib_dev *dev, - struct mlx5_ib_port *port, - struct rdma_hw_stats *stats) -{ - int outlen = MLX5_ST_SZ_BYTES(query_cong_statistics_out); - void *out; - int ret, i; - int offset = port->cnts.num_q_counters; - - out = kvzalloc(outlen, GFP_KERNEL); - if (!out) - return -ENOMEM; - - ret = mlx5_cmd_query_cong_counter(dev->mdev, false, out, outlen); - if (ret) - goto free; - - for (i = 0; i < port->cnts.num_cong_counters; i++) { - stats->value[i + offset] = - be64_to_cpup((__be64 *)(out + - port->cnts.offsets[i + offset])); - } - -free: - kvfree(out); - return ret; -} - static int mlx5_ib_get_hw_stats(struct ib_device *ibdev, struct rdma_hw_stats *stats, u8 port_num, int index) @@ -3782,7 +3762,12 @@ static int mlx5_ib_get_hw_stats(struct ib_device *ibdev, num_counters = port->cnts.num_q_counters; if (MLX5_CAP_GEN(dev->mdev, cc_query_allowed)) { - ret = mlx5_ib_query_cong_counters(dev, port, stats); + ret = mlx5_lag_query_cong_counters(dev->mdev, + stats->value + + port->cnts.num_q_counters, + port->cnts.num_cong_counters, + port->cnts.offsets + + port->cnts.num_q_counters); if (ret) return ret; num_counters += port->cnts.num_cong_counters; diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 6dd8cac78de2..2c5f3533bbc9 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -115,6 +115,8 @@ enum { struct mlx5_ib_vma_private_data { struct list_head list; struct vm_area_struct *vma; + /* protect vma_private_list add/del */ + struct mutex *vma_private_list_mutex; }; struct mlx5_ib_ucontext { @@ -129,6 +131,8 @@ struct mlx5_ib_ucontext { /* Transport Domain number */ u32 tdn; struct list_head vma_private_list; + /* protect vma_private_list add/del */ + struct mutex vma_private_list_mutex; unsigned long upd_xlt_page; /* protect ODP/KSM */ diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index ee0ee1f9994b..d109fe8290a7 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -1637,6 +1637,7 @@ struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd, MLX5_SET(mkc, mkc, access_mode, mr->access_mode); MLX5_SET(mkc, mkc, umr_en, 1); + mr->ibmr.device = pd->device; err = mlx5_core_create_mkey(dev->mdev, &mr->mmkey, in, inlen); if (err) goto err_destroy_psv; diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma.h b/drivers/infiniband/hw/vmw_pvrdma/pvrdma.h index 63bc2efc34eb..4f7bd3b6a315 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma.h +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma.h @@ -94,7 +94,7 @@ struct pvrdma_cq { u32 cq_handle; bool is_kernel; atomic_t refcnt; - wait_queue_head_t wait; + struct completion free; }; struct pvrdma_id_table { @@ -175,7 +175,7 @@ struct pvrdma_srq { u32 srq_handle; int npages; refcount_t refcnt; - wait_queue_head_t wait; + struct completion free; }; struct pvrdma_qp { @@ -197,7 +197,7 @@ struct pvrdma_qp { bool is_kernel; struct mutex mutex; /* QP state mutex. */ atomic_t refcnt; - wait_queue_head_t wait; + struct completion free; }; struct pvrdma_dev { diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c index 3562c0c30492..e529622cefad 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c @@ -179,7 +179,7 @@ struct ib_cq *pvrdma_create_cq(struct ib_device *ibdev, pvrdma_page_dir_insert_umem(&cq->pdir, cq->umem, 0); atomic_set(&cq->refcnt, 1); - init_waitqueue_head(&cq->wait); + init_completion(&cq->free); spin_lock_init(&cq->cq_lock); memset(cmd, 0, sizeof(*cmd)); @@ -230,8 +230,9 @@ err_cq: static void pvrdma_free_cq(struct pvrdma_dev *dev, struct pvrdma_cq *cq) { - atomic_dec(&cq->refcnt); - wait_event(cq->wait, !atomic_read(&cq->refcnt)); + if (atomic_dec_and_test(&cq->refcnt)) + complete(&cq->free); + wait_for_completion(&cq->free); if (!cq->is_kernel) ib_umem_release(cq->umem); diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c index 1f4e18717a00..e92681878c93 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c @@ -346,9 +346,8 @@ static void pvrdma_qp_event(struct pvrdma_dev *dev, u32 qpn, int type) ibqp->event_handler(&e, ibqp->qp_context); } if (qp) { - atomic_dec(&qp->refcnt); - if (atomic_read(&qp->refcnt) == 0) - wake_up(&qp->wait); + if (atomic_dec_and_test(&qp->refcnt)) + complete(&qp->free); } } @@ -373,9 +372,8 @@ static void pvrdma_cq_event(struct pvrdma_dev *dev, u32 cqn, int type) ibcq->event_handler(&e, ibcq->cq_context); } if (cq) { - atomic_dec(&cq->refcnt); - if (atomic_read(&cq->refcnt) == 0) - wake_up(&cq->wait); + if (atomic_dec_and_test(&cq->refcnt)) + complete(&cq->free); } } @@ -404,7 +402,7 @@ static void pvrdma_srq_event(struct pvrdma_dev *dev, u32 srqn, int type) } if (srq) { if (refcount_dec_and_test(&srq->refcnt)) - wake_up(&srq->wait); + complete(&srq->free); } } @@ -539,9 +537,8 @@ static irqreturn_t pvrdma_intrx_handler(int irq, void *dev_id) if (cq && cq->ibcq.comp_handler) cq->ibcq.comp_handler(&cq->ibcq, cq->ibcq.cq_context); if (cq) { - atomic_dec(&cq->refcnt); - if (atomic_read(&cq->refcnt)) - wake_up(&cq->wait); + if (atomic_dec_and_test(&cq->refcnt)) + complete(&cq->free); } pvrdma_idx_ring_inc(&ring->cons_head, ring_slots); } diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c index 10420a18d02f..4059308e1454 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c @@ -246,7 +246,7 @@ struct ib_qp *pvrdma_create_qp(struct ib_pd *pd, spin_lock_init(&qp->rq.lock); mutex_init(&qp->mutex); atomic_set(&qp->refcnt, 1); - init_waitqueue_head(&qp->wait); + init_completion(&qp->free); qp->state = IB_QPS_RESET; @@ -428,8 +428,16 @@ static void pvrdma_free_qp(struct pvrdma_qp *qp) pvrdma_unlock_cqs(scq, rcq, &scq_flags, &rcq_flags); - atomic_dec(&qp->refcnt); - wait_event(qp->wait, !atomic_read(&qp->refcnt)); + if (atomic_dec_and_test(&qp->refcnt)) + complete(&qp->free); + wait_for_completion(&qp->free); + + if (!qp->is_kernel) { + if (qp->rumem) + ib_umem_release(qp->rumem); + if (qp->sumem) + ib_umem_release(qp->sumem); + } pvrdma_page_dir_cleanup(dev, &qp->pdir); diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c index 826ccb864596..5acebb1ef631 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c @@ -149,7 +149,7 @@ struct ib_srq *pvrdma_create_srq(struct ib_pd *pd, spin_lock_init(&srq->lock); refcount_set(&srq->refcnt, 1); - init_waitqueue_head(&srq->wait); + init_completion(&srq->free); dev_dbg(&dev->pdev->dev, "create shared receive queue from user space\n"); @@ -236,8 +236,9 @@ static void pvrdma_free_srq(struct pvrdma_dev *dev, struct pvrdma_srq *srq) dev->srq_tbl[srq->srq_handle] = NULL; spin_unlock_irqrestore(&dev->srq_tbl_lock, flags); - refcount_dec(&srq->refcnt); - wait_event(srq->wait, !refcount_read(&srq->refcnt)); + if (refcount_dec_and_test(&srq->refcnt)) + complete(&srq->free); + wait_for_completion(&srq->free); /* There is no support for kernel clients, so this is safe. */ ib_umem_release(srq->umem); diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c index 3b96cdaf9a83..e6151a29c412 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c @@ -1236,13 +1236,10 @@ static void __ipoib_ib_dev_flush(struct ipoib_dev_priv *priv, ipoib_ib_dev_down(dev); if (level == IPOIB_FLUSH_HEAVY) { - rtnl_lock(); if (test_bit(IPOIB_FLAG_INITIALIZED, &priv->flags)) ipoib_ib_dev_stop(dev); - result = ipoib_ib_dev_open(dev); - rtnl_unlock(); - if (result) + if (ipoib_ib_dev_open(dev)) return; if (netif_queue_stopped(dev)) @@ -1282,7 +1279,9 @@ void ipoib_ib_dev_flush_heavy(struct work_struct *work) struct ipoib_dev_priv *priv = container_of(work, struct ipoib_dev_priv, flush_heavy); + rtnl_lock(); __ipoib_ib_dev_flush(priv, IPOIB_FLUSH_HEAVY, 0); + rtnl_unlock(); } void ipoib_ib_dev_cleanup(struct net_device *dev) diff --git a/drivers/leds/led-core.c b/drivers/leds/led-core.c index fd83c7f77a95..f3654fd2eaf3 100644 --- a/drivers/leds/led-core.c +++ b/drivers/leds/led-core.c @@ -186,7 +186,7 @@ void led_blink_set(struct led_classdev *led_cdev, unsigned long *delay_on, unsigned long *delay_off) { - del_timer_sync(&led_cdev->blink_timer); + led_stop_software_blink(led_cdev); clear_bit(LED_BLINK_ONESHOT, &led_cdev->work_flags); clear_bit(LED_BLINK_ONESHOT_STOP, &led_cdev->work_flags); diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c index 01b7f2fc249c..57eb26dfc059 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c @@ -3029,7 +3029,7 @@ int bnx2x_nic_unload(struct bnx2x *bp, int unload_mode, bool keep_link) del_timer_sync(&bp->timer); - if (IS_PF(bp)) { + if (IS_PF(bp) && !BP_NOMCP(bp)) { /* Set ALWAYS_ALIVE bit in shmem */ bp->fw_drv_pulse_wr_seq |= DRV_PULSE_ALWAYS_ALIVE; bnx2x_drv_pulse(bp); @@ -3115,7 +3115,7 @@ int bnx2x_nic_unload(struct bnx2x *bp, int unload_mode, bool keep_link) bp->cnic_loaded = false; /* Clear driver version indication in shmem */ - if (IS_PF(bp)) + if (IS_PF(bp) && !BP_NOMCP(bp)) bnx2x_update_mng_version(bp); /* Check if there are pending parity attentions. If there are - set diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c index 4d0654813de1..7b08323e3f3d 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c @@ -9578,6 +9578,15 @@ static int bnx2x_init_shmem(struct bnx2x *bp) do { bp->common.shmem_base = REG_RD(bp, MISC_REG_SHARED_MEM_ADDR); + + /* If we read all 0xFFs, means we are in PCI error state and + * should bail out to avoid crashes on adapter's FW reads. + */ + if (bp->common.shmem_base == 0xFFFFFFFF) { + bp->flags |= NO_MCP_FLAG; + return -ENODEV; + } + if (bp->common.shmem_base) { val = SHMEM_RD(bp, validity_map[BP_PORT(bp)]); if (val & SHR_MEM_VALIDITY_MB) @@ -14322,7 +14331,10 @@ static pci_ers_result_t bnx2x_io_slot_reset(struct pci_dev *pdev) BNX2X_ERR("IO slot reset --> driver unload\n"); /* MCP should have been reset; Need to wait for validity */ - bnx2x_init_shmem(bp); + if (bnx2x_init_shmem(bp)) { + rtnl_unlock(); + return PCI_ERS_RESULT_DISCONNECT; + } if (IS_PF(bp) && SHMEM2_HAS(bp, drv_capabilities_flag)) { u32 v; diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c index d09c5a9c53b5..8995cfefbfcf 100644 --- a/drivers/net/ethernet/broadcom/tg3.c +++ b/drivers/net/ethernet/broadcom/tg3.c @@ -4,11 +4,13 @@ * Copyright (C) 2001, 2002, 2003, 2004 David S. Miller (davem@redhat.com) * Copyright (C) 2001, 2002, 2003 Jeff Garzik (jgarzik@pobox.com) * Copyright (C) 2004 Sun Microsystems Inc. - * Copyright (C) 2005-2014 Broadcom Corporation. + * Copyright (C) 2005-2016 Broadcom Corporation. + * Copyright (C) 2016-2017 Broadcom Limited. * * Firmware is: * Derived from proprietary unpublished source code, - * Copyright (C) 2000-2003 Broadcom Corporation. + * Copyright (C) 2000-2016 Broadcom Corporation. + * Copyright (C) 2016-2017 Broadcom Ltd. * * Permission is hereby granted for the distribution of this firmware * data in hexadecimal or equivalent format, provided this copyright @@ -10052,6 +10054,16 @@ static int tg3_reset_hw(struct tg3 *tp, bool reset_phy) tw32(GRC_MODE, tp->grc_mode | val); + /* On one of the AMD platform, MRRS is restricted to 4000 because of + * south bridge limitation. As a workaround, Driver is setting MRRS + * to 2048 instead of default 4096. + */ + if (tp->pdev->subsystem_vendor == PCI_VENDOR_ID_DELL && + tp->pdev->subsystem_device == TG3PCI_SUBDEVICE_ID_DELL_5762) { + val = tr32(TG3PCI_DEV_STATUS_CTRL) & ~MAX_READ_REQ_MASK; + tw32(TG3PCI_DEV_STATUS_CTRL, val | MAX_READ_REQ_SIZE_2048); + } + /* Setup the timer prescalar register. Clock is always 66Mhz. */ val = tr32(GRC_MISC_CFG); val &= ~0xff; @@ -14227,7 +14239,8 @@ static int tg3_change_mtu(struct net_device *dev, int new_mtu) */ if (tg3_asic_rev(tp) == ASIC_REV_57766 || tg3_asic_rev(tp) == ASIC_REV_5717 || - tg3_asic_rev(tp) == ASIC_REV_5719) + tg3_asic_rev(tp) == ASIC_REV_5719 || + tg3_asic_rev(tp) == ASIC_REV_5720) reset_phy = true; err = tg3_restart_hw(tp, reset_phy); diff --git a/drivers/net/ethernet/broadcom/tg3.h b/drivers/net/ethernet/broadcom/tg3.h index c2d02d02d1e6..1f0271fa7c74 100644 --- a/drivers/net/ethernet/broadcom/tg3.h +++ b/drivers/net/ethernet/broadcom/tg3.h @@ -5,7 +5,8 @@ * Copyright (C) 2001, 2002, 2003, 2004 David S. Miller (davem@redhat.com) * Copyright (C) 2001 Jeff Garzik (jgarzik@pobox.com) * Copyright (C) 2004 Sun Microsystems Inc. - * Copyright (C) 2007-2014 Broadcom Corporation. + * Copyright (C) 2007-2016 Broadcom Corporation. + * Copyright (C) 2016-2017 Broadcom Limited. */ #ifndef _T3_H @@ -96,6 +97,7 @@ #define TG3PCI_SUBDEVICE_ID_DELL_JAGUAR 0x0106 #define TG3PCI_SUBDEVICE_ID_DELL_MERLOT 0x0109 #define TG3PCI_SUBDEVICE_ID_DELL_SLIM_MERLOT 0x010a +#define TG3PCI_SUBDEVICE_ID_DELL_5762 0x07f0 #define TG3PCI_SUBVENDOR_ID_COMPAQ PCI_VENDOR_ID_COMPAQ #define TG3PCI_SUBDEVICE_ID_COMPAQ_BANSHEE 0x007c #define TG3PCI_SUBDEVICE_ID_COMPAQ_BANSHEE_2 0x009a @@ -281,6 +283,9 @@ #define TG3PCI_STD_RING_PROD_IDX 0x00000098 /* 64-bit */ #define TG3PCI_RCV_RET_RING_CON_IDX 0x000000a0 /* 64-bit */ /* 0xa8 --> 0xb8 unused */ +#define TG3PCI_DEV_STATUS_CTRL 0x000000b4 +#define MAX_READ_REQ_SIZE_2048 0x00004000 +#define MAX_READ_REQ_MASK 0x00007000 #define TG3PCI_DUAL_MAC_CTRL 0x000000b8 #define DUAL_MAC_CTRL_CH_MASK 0x00000003 #define DUAL_MAC_CTRL_ID 0x00000004 diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index 2d1b06579c1a..e17d10b8b041 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -818,6 +818,12 @@ static void fec_enet_bd_init(struct net_device *dev) for (i = 0; i < txq->bd.ring_size; i++) { /* Initialize the BD for every fragment in the page. */ bdp->cbd_sc = cpu_to_fec16(0); + if (bdp->cbd_bufaddr && + !IS_TSO_HEADER(txq, fec32_to_cpu(bdp->cbd_bufaddr))) + dma_unmap_single(&fep->pdev->dev, + fec32_to_cpu(bdp->cbd_bufaddr), + fec16_to_cpu(bdp->cbd_datlen), + DMA_TO_DEVICE); if (txq->tx_skbuff[i]) { dev_kfree_skb_any(txq->tx_skbuff[i]); txq->tx_skbuff[i] = NULL; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag.c index f26f97fe4666..582b2f18010a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag.c @@ -137,6 +137,17 @@ int mlx5_cmd_destroy_vport_lag(struct mlx5_core_dev *dev) } EXPORT_SYMBOL(mlx5_cmd_destroy_vport_lag); +static int mlx5_cmd_query_cong_counter(struct mlx5_core_dev *dev, + bool reset, void *out, int out_size) +{ + u32 in[MLX5_ST_SZ_DW(query_cong_statistics_in)] = { }; + + MLX5_SET(query_cong_statistics_in, in, opcode, + MLX5_CMD_OP_QUERY_CONG_STATISTICS); + MLX5_SET(query_cong_statistics_in, in, clear, reset); + return mlx5_cmd_exec(dev, in, sizeof(in), out, out_size); +} + static struct mlx5_lag *mlx5_lag_dev_get(struct mlx5_core_dev *dev) { return dev->priv.lag; @@ -633,3 +644,48 @@ bool mlx5_lag_intf_add(struct mlx5_interface *intf, struct mlx5_priv *priv) /* If bonded, we do not add an IB device for PF1. */ return false; } + +int mlx5_lag_query_cong_counters(struct mlx5_core_dev *dev, + u64 *values, + int num_counters, + size_t *offsets) +{ + int outlen = MLX5_ST_SZ_BYTES(query_cong_statistics_out); + struct mlx5_core_dev *mdev[MLX5_MAX_PORTS]; + struct mlx5_lag *ldev; + int num_ports; + int ret, i, j; + void *out; + + out = kvzalloc(outlen, GFP_KERNEL); + if (!out) + return -ENOMEM; + + memset(values, 0, sizeof(*values) * num_counters); + + mutex_lock(&lag_mutex); + ldev = mlx5_lag_dev_get(dev); + if (ldev && mlx5_lag_is_bonded(ldev)) { + num_ports = MLX5_MAX_PORTS; + mdev[0] = ldev->pf[0].dev; + mdev[1] = ldev->pf[1].dev; + } else { + num_ports = 1; + mdev[0] = dev; + } + + for (i = 0; i < num_ports; ++i) { + ret = mlx5_cmd_query_cong_counter(mdev[i], false, out, outlen); + if (ret) + goto unlock; + + for (j = 0; j < num_counters; ++j) + values[j] += be64_to_cpup((__be64 *)(out + offsets[j])); + } + +unlock: + mutex_unlock(&lag_mutex); + kvfree(out); + return ret; +} +EXPORT_SYMBOL(mlx5_lag_query_cong_counters); diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c index fd500b18e77f..0f45310300f6 100644 --- a/drivers/net/phy/micrel.c +++ b/drivers/net/phy/micrel.c @@ -624,6 +624,7 @@ static int ksz9031_read_status(struct phy_device *phydev) phydev->link = 0; if (phydev->drv->config_intr && phy_interrupt_is_valid(phydev)) phydev->drv->config_intr(phydev); + return genphy_config_aneg(phydev); } return 0; diff --git a/drivers/net/phy/phylink.c b/drivers/net/phy/phylink.c index 2ec140ec7923..82166a26f5c6 100644 --- a/drivers/net/phy/phylink.c +++ b/drivers/net/phy/phylink.c @@ -567,6 +567,7 @@ struct phylink *phylink_create(struct net_device *ndev, pl->link_config.pause = MLO_PAUSE_AN; pl->link_config.speed = SPEED_UNKNOWN; pl->link_config.duplex = DUPLEX_UNKNOWN; + pl->link_config.an_enabled = true; pl->ops = ops; __set_bit(PHYLINK_DISABLE_STOPPED, &pl->phylink_disable_state); @@ -1139,6 +1140,7 @@ int phylink_ethtool_ksettings_set(struct phylink *pl, mutex_lock(&pl->state_mutex); /* Configure the MAC to match the new settings */ linkmode_copy(pl->link_config.advertising, our_kset.link_modes.advertising); + pl->link_config.interface = config.interface; pl->link_config.speed = our_kset.base.speed; pl->link_config.duplex = our_kset.base.duplex; pl->link_config.an_enabled = our_kset.base.autoneg != AUTONEG_DISABLE; diff --git a/drivers/nvdimm/btt.c b/drivers/nvdimm/btt.c index e949e3302af4..c586bcdb5190 100644 --- a/drivers/nvdimm/btt.c +++ b/drivers/nvdimm/btt.c @@ -211,12 +211,12 @@ static int btt_map_read(struct arena_info *arena, u32 lba, u32 *mapping, return ret; } -static int btt_log_read_pair(struct arena_info *arena, u32 lane, - struct log_entry *ent) +static int btt_log_group_read(struct arena_info *arena, u32 lane, + struct log_group *log) { return arena_read_bytes(arena, - arena->logoff + (2 * lane * LOG_ENT_SIZE), ent, - 2 * LOG_ENT_SIZE, 0); + arena->logoff + (lane * LOG_GRP_SIZE), log, + LOG_GRP_SIZE, 0); } static struct dentry *debugfs_root; @@ -256,6 +256,8 @@ static void arena_debugfs_init(struct arena_info *a, struct dentry *parent, debugfs_create_x64("logoff", S_IRUGO, d, &a->logoff); debugfs_create_x64("info2off", S_IRUGO, d, &a->info2off); debugfs_create_x32("flags", S_IRUGO, d, &a->flags); + debugfs_create_u32("log_index_0", S_IRUGO, d, &a->log_index[0]); + debugfs_create_u32("log_index_1", S_IRUGO, d, &a->log_index[1]); } static void btt_debugfs_init(struct btt *btt) @@ -274,6 +276,11 @@ static void btt_debugfs_init(struct btt *btt) } } +static u32 log_seq(struct log_group *log, int log_idx) +{ + return le32_to_cpu(log->ent[log_idx].seq); +} + /* * This function accepts two log entries, and uses the * sequence number to find the 'older' entry. @@ -283,8 +290,10 @@ static void btt_debugfs_init(struct btt *btt) * * TODO The logic feels a bit kludge-y. make it better.. */ -static int btt_log_get_old(struct log_entry *ent) +static int btt_log_get_old(struct arena_info *a, struct log_group *log) { + int idx0 = a->log_index[0]; + int idx1 = a->log_index[1]; int old; /* @@ -292,23 +301,23 @@ static int btt_log_get_old(struct log_entry *ent) * the next time, the following logic works out to put this * (next) entry into [1] */ - if (ent[0].seq == 0) { - ent[0].seq = cpu_to_le32(1); + if (log_seq(log, idx0) == 0) { + log->ent[idx0].seq = cpu_to_le32(1); return 0; } - if (ent[0].seq == ent[1].seq) + if (log_seq(log, idx0) == log_seq(log, idx1)) return -EINVAL; - if (le32_to_cpu(ent[0].seq) + le32_to_cpu(ent[1].seq) > 5) + if (log_seq(log, idx0) + log_seq(log, idx1) > 5) return -EINVAL; - if (le32_to_cpu(ent[0].seq) < le32_to_cpu(ent[1].seq)) { - if (le32_to_cpu(ent[1].seq) - le32_to_cpu(ent[0].seq) == 1) + if (log_seq(log, idx0) < log_seq(log, idx1)) { + if ((log_seq(log, idx1) - log_seq(log, idx0)) == 1) old = 0; else old = 1; } else { - if (le32_to_cpu(ent[0].seq) - le32_to_cpu(ent[1].seq) == 1) + if ((log_seq(log, idx0) - log_seq(log, idx1)) == 1) old = 1; else old = 0; @@ -328,17 +337,18 @@ static int btt_log_read(struct arena_info *arena, u32 lane, { int ret; int old_ent, ret_ent; - struct log_entry log[2]; + struct log_group log; - ret = btt_log_read_pair(arena, lane, log); + ret = btt_log_group_read(arena, lane, &log); if (ret) return -EIO; - old_ent = btt_log_get_old(log); + old_ent = btt_log_get_old(arena, &log); if (old_ent < 0 || old_ent > 1) { dev_err(to_dev(arena), "log corruption (%d): lane %d seq [%d, %d]\n", - old_ent, lane, log[0].seq, log[1].seq); + old_ent, lane, log.ent[arena->log_index[0]].seq, + log.ent[arena->log_index[1]].seq); /* TODO set error state? */ return -EIO; } @@ -346,7 +356,7 @@ static int btt_log_read(struct arena_info *arena, u32 lane, ret_ent = (old_flag ? old_ent : (1 - old_ent)); if (ent != NULL) - memcpy(ent, &log[ret_ent], LOG_ENT_SIZE); + memcpy(ent, &log.ent[arena->log_index[ret_ent]], LOG_ENT_SIZE); return ret_ent; } @@ -360,17 +370,13 @@ static int __btt_log_write(struct arena_info *arena, u32 lane, u32 sub, struct log_entry *ent, unsigned long flags) { int ret; - /* - * Ignore the padding in log_entry for calculating log_half. - * The entry is 'committed' when we write the sequence number, - * and we want to ensure that that is the last thing written. - * We don't bother writing the padding as that would be extra - * media wear and write amplification - */ - unsigned int log_half = (LOG_ENT_SIZE - 2 * sizeof(u64)) / 2; - u64 ns_off = arena->logoff + (((2 * lane) + sub) * LOG_ENT_SIZE); + u32 group_slot = arena->log_index[sub]; + unsigned int log_half = LOG_ENT_SIZE / 2; void *src = ent; + u64 ns_off; + ns_off = arena->logoff + (lane * LOG_GRP_SIZE) + + (group_slot * LOG_ENT_SIZE); /* split the 16B write into atomic, durable halves */ ret = arena_write_bytes(arena, ns_off, src, log_half, flags); if (ret) @@ -453,7 +459,7 @@ static int btt_log_init(struct arena_info *arena) { size_t logsize = arena->info2off - arena->logoff; size_t chunk_size = SZ_4K, offset = 0; - struct log_entry log; + struct log_entry ent; void *zerobuf; int ret; u32 i; @@ -485,11 +491,11 @@ static int btt_log_init(struct arena_info *arena) } for (i = 0; i < arena->nfree; i++) { - log.lba = cpu_to_le32(i); - log.old_map = cpu_to_le32(arena->external_nlba + i); - log.new_map = cpu_to_le32(arena->external_nlba + i); - log.seq = cpu_to_le32(LOG_SEQ_INIT); - ret = __btt_log_write(arena, i, 0, &log, 0); + ent.lba = cpu_to_le32(i); + ent.old_map = cpu_to_le32(arena->external_nlba + i); + ent.new_map = cpu_to_le32(arena->external_nlba + i); + ent.seq = cpu_to_le32(LOG_SEQ_INIT); + ret = __btt_log_write(arena, i, 0, &ent, 0); if (ret) goto free; } @@ -594,6 +600,123 @@ static int btt_freelist_init(struct arena_info *arena) return 0; } +static bool ent_is_padding(struct log_entry *ent) +{ + return (ent->lba == 0) && (ent->old_map == 0) && (ent->new_map == 0) + && (ent->seq == 0); +} + +/* + * Detecting valid log indices: We read a log group (see the comments in btt.h + * for a description of a 'log_group' and its 'slots'), and iterate over its + * four slots. We expect that a padding slot will be all-zeroes, and use this + * to detect a padding slot vs. an actual entry. + * + * If a log_group is in the initial state, i.e. hasn't been used since the + * creation of this BTT layout, it will have three of the four slots with + * zeroes. We skip over these log_groups for the detection of log_index. If + * all log_groups are in the initial state (i.e. the BTT has never been + * written to), it is safe to assume the 'new format' of log entries in slots + * (0, 1). + */ +static int log_set_indices(struct arena_info *arena) +{ + bool idx_set = false, initial_state = true; + int ret, log_index[2] = {-1, -1}; + u32 i, j, next_idx = 0; + struct log_group log; + u32 pad_count = 0; + + for (i = 0; i < arena->nfree; i++) { + ret = btt_log_group_read(arena, i, &log); + if (ret < 0) + return ret; + + for (j = 0; j < 4; j++) { + if (!idx_set) { + if (ent_is_padding(&log.ent[j])) { + pad_count++; + continue; + } else { + /* Skip if index has been recorded */ + if ((next_idx == 1) && + (j == log_index[0])) + continue; + /* valid entry, record index */ + log_index[next_idx] = j; + next_idx++; + } + if (next_idx == 2) { + /* two valid entries found */ + idx_set = true; + } else if (next_idx > 2) { + /* too many valid indices */ + return -ENXIO; + } + } else { + /* + * once the indices have been set, just verify + * that all subsequent log groups are either in + * their initial state or follow the same + * indices. + */ + if (j == log_index[0]) { + /* entry must be 'valid' */ + if (ent_is_padding(&log.ent[j])) + return -ENXIO; + } else if (j == log_index[1]) { + ; + /* + * log_index[1] can be padding if the + * lane never got used and it is still + * in the initial state (three 'padding' + * entries) + */ + } else { + /* entry must be invalid (padding) */ + if (!ent_is_padding(&log.ent[j])) + return -ENXIO; + } + } + } + /* + * If any of the log_groups have more than one valid, + * non-padding entry, then the we are no longer in the + * initial_state + */ + if (pad_count < 3) + initial_state = false; + pad_count = 0; + } + + if (!initial_state && !idx_set) + return -ENXIO; + + /* + * If all the entries in the log were in the initial state, + * assume new padding scheme + */ + if (initial_state) + log_index[1] = 1; + + /* + * Only allow the known permutations of log/padding indices, + * i.e. (0, 1), and (0, 2) + */ + if ((log_index[0] == 0) && ((log_index[1] == 1) || (log_index[1] == 2))) + ; /* known index possibilities */ + else { + dev_err(to_dev(arena), "Found an unknown padding scheme\n"); + return -ENXIO; + } + + arena->log_index[0] = log_index[0]; + arena->log_index[1] = log_index[1]; + dev_dbg(to_dev(arena), "log_index_0 = %d\n", log_index[0]); + dev_dbg(to_dev(arena), "log_index_1 = %d\n", log_index[1]); + return 0; +} + static int btt_rtt_init(struct arena_info *arena) { arena->rtt = kcalloc(arena->nfree, sizeof(u32), GFP_KERNEL); @@ -650,8 +773,7 @@ static struct arena_info *alloc_arena(struct btt *btt, size_t size, available -= 2 * BTT_PG_SIZE; /* The log takes a fixed amount of space based on nfree */ - logsize = roundup(2 * arena->nfree * sizeof(struct log_entry), - BTT_PG_SIZE); + logsize = roundup(arena->nfree * LOG_GRP_SIZE, BTT_PG_SIZE); available -= logsize; /* Calculate optimal split between map and data area */ @@ -668,6 +790,10 @@ static struct arena_info *alloc_arena(struct btt *btt, size_t size, arena->mapoff = arena->dataoff + datasize; arena->logoff = arena->mapoff + mapsize; arena->info2off = arena->logoff + logsize; + + /* Default log indices are (0,1) */ + arena->log_index[0] = 0; + arena->log_index[1] = 1; return arena; } @@ -758,6 +884,13 @@ static int discover_arenas(struct btt *btt) arena->external_lba_start = cur_nlba; parse_arena_meta(arena, super, cur_off); + ret = log_set_indices(arena); + if (ret) { + dev_err(to_dev(arena), + "Unable to deduce log/padding indices\n"); + goto out; + } + mutex_init(&arena->err_lock); ret = btt_freelist_init(arena); if (ret) diff --git a/drivers/nvdimm/btt.h b/drivers/nvdimm/btt.h index 578c2057524d..db3cb6d4d0d4 100644 --- a/drivers/nvdimm/btt.h +++ b/drivers/nvdimm/btt.h @@ -27,6 +27,7 @@ #define MAP_ERR_MASK (1 << MAP_ERR_SHIFT) #define MAP_LBA_MASK (~((1 << MAP_TRIM_SHIFT) | (1 << MAP_ERR_SHIFT))) #define MAP_ENT_NORMAL 0xC0000000 +#define LOG_GRP_SIZE sizeof(struct log_group) #define LOG_ENT_SIZE sizeof(struct log_entry) #define ARENA_MIN_SIZE (1UL << 24) /* 16 MB */ #define ARENA_MAX_SIZE (1ULL << 39) /* 512 GB */ @@ -50,12 +51,52 @@ enum btt_init_state { INIT_READY }; +/* + * A log group represents one log 'lane', and consists of four log entries. + * Two of the four entries are valid entries, and the remaining two are + * padding. Due to an old bug in the padding location, we need to perform a + * test to determine the padding scheme being used, and use that scheme + * thereafter. + * + * In kernels prior to 4.15, 'log group' would have actual log entries at + * indices (0, 2) and padding at indices (1, 3), where as the correct/updated + * format has log entries at indices (0, 1) and padding at indices (2, 3). + * + * Old (pre 4.15) format: + * +-----------------+-----------------+ + * | ent[0] | ent[1] | + * | 16B | 16B | + * | lba/old/new/seq | pad | + * +-----------------------------------+ + * | ent[2] | ent[3] | + * | 16B | 16B | + * | lba/old/new/seq | pad | + * +-----------------+-----------------+ + * + * New format: + * +-----------------+-----------------+ + * | ent[0] | ent[1] | + * | 16B | 16B | + * | lba/old/new/seq | lba/old/new/seq | + * +-----------------------------------+ + * | ent[2] | ent[3] | + * | 16B | 16B | + * | pad | pad | + * +-----------------+-----------------+ + * + * We detect during start-up which format is in use, and set + * arena->log_index[(0, 1)] with the detected format. + */ + struct log_entry { __le32 lba; __le32 old_map; __le32 new_map; __le32 seq; - __le64 padding[2]; +}; + +struct log_group { + struct log_entry ent[4]; }; struct btt_sb { @@ -125,6 +166,8 @@ struct aligned_lock { * @list: List head for list of arenas * @debugfs_dir: Debugfs dentry * @flags: Arena flags - may signify error states. + * @err_lock: Mutex for synchronizing error clearing. + * @log_index: Indices of the valid log entries in a log_group * * arena_info is a per-arena handle. Once an arena is narrowed down for an * IO, this struct is passed around for the duration of the IO. @@ -157,6 +200,7 @@ struct arena_info { /* Arena flags */ u32 flags; struct mutex err_lock; + int log_index[2]; }; /** @@ -176,6 +220,7 @@ struct arena_info { * @init_lock: Mutex used for the BTT initialization * @init_state: Flag describing the initialization state for the BTT * @num_arenas: Number of arenas in the BTT instance + * @phys_bb: Pointer to the namespace's badblocks structure */ struct btt { struct gendisk *btt_disk; diff --git a/drivers/nvdimm/pfn_devs.c b/drivers/nvdimm/pfn_devs.c index 65cc171c721d..2adada1a5855 100644 --- a/drivers/nvdimm/pfn_devs.c +++ b/drivers/nvdimm/pfn_devs.c @@ -364,9 +364,9 @@ struct device *nd_pfn_create(struct nd_region *nd_region) int nd_pfn_validate(struct nd_pfn *nd_pfn, const char *sig) { u64 checksum, offset; - unsigned long align; enum nd_pfn_mode mode; struct nd_namespace_io *nsio; + unsigned long align, start_pad; struct nd_pfn_sb *pfn_sb = nd_pfn->pfn_sb; struct nd_namespace_common *ndns = nd_pfn->ndns; const u8 *parent_uuid = nd_dev_to_uuid(&ndns->dev); @@ -410,6 +410,7 @@ int nd_pfn_validate(struct nd_pfn *nd_pfn, const char *sig) align = le32_to_cpu(pfn_sb->align); offset = le64_to_cpu(pfn_sb->dataoff); + start_pad = le32_to_cpu(pfn_sb->start_pad); if (align == 0) align = 1UL << ilog2(offset); mode = le32_to_cpu(pfn_sb->mode); @@ -468,7 +469,7 @@ int nd_pfn_validate(struct nd_pfn *nd_pfn, const char *sig) return -EBUSY; } - if ((align && !IS_ALIGNED(offset, align)) + if ((align && !IS_ALIGNED(nsio->res.start + offset + start_pad, align)) || !IS_ALIGNED(offset, PAGE_SIZE)) { dev_err(&nd_pfn->dev, "bad offset: %#llx dax disabled align: %#lx\n", @@ -582,6 +583,12 @@ static struct vmem_altmap *__nvdimm_setup_pfn(struct nd_pfn *nd_pfn, return altmap; } +static u64 phys_pmem_align_down(struct nd_pfn *nd_pfn, u64 phys) +{ + return min_t(u64, PHYS_SECTION_ALIGN_DOWN(phys), + ALIGN_DOWN(phys, nd_pfn->align)); +} + static int nd_pfn_init(struct nd_pfn *nd_pfn) { u32 dax_label_reserve = is_nd_dax(&nd_pfn->dev) ? SZ_128K : 0; @@ -637,13 +644,16 @@ static int nd_pfn_init(struct nd_pfn *nd_pfn) start = nsio->res.start; size = PHYS_SECTION_ALIGN_UP(start + size) - start; if (region_intersects(start, size, IORESOURCE_SYSTEM_RAM, - IORES_DESC_NONE) == REGION_MIXED) { + IORES_DESC_NONE) == REGION_MIXED + || !IS_ALIGNED(start + resource_size(&nsio->res), + nd_pfn->align)) { size = resource_size(&nsio->res); - end_trunc = start + size - PHYS_SECTION_ALIGN_DOWN(start + size); + end_trunc = start + size - phys_pmem_align_down(nd_pfn, + start + size); } if (start_pad + end_trunc) - dev_info(&nd_pfn->dev, "%s section collision, truncate %d bytes\n", + dev_info(&nd_pfn->dev, "%s alignment collision, truncate %d bytes\n", dev_name(&ndns->dev), start_pad + end_trunc); /* diff --git a/drivers/pinctrl/intel/pinctrl-cherryview.c b/drivers/pinctrl/intel/pinctrl-cherryview.c index bdedb6325c72..4471fd94e1fe 100644 --- a/drivers/pinctrl/intel/pinctrl-cherryview.c +++ b/drivers/pinctrl/intel/pinctrl-cherryview.c @@ -1620,6 +1620,22 @@ static int chv_gpio_probe(struct chv_pinctrl *pctrl, int irq) clear_bit(i, chip->irq.valid_mask); } + /* + * The same set of machines in chv_no_valid_mask[] have incorrectly + * configured GPIOs that generate spurious interrupts so we use + * this same list to apply another quirk for them. + * + * See also https://bugzilla.kernel.org/show_bug.cgi?id=197953. + */ + if (!need_valid_mask) { + /* + * Mask all interrupts the community is able to generate + * but leave the ones that can only generate GPEs unmasked. + */ + chv_writel(GENMASK(31, pctrl->community->nirqs), + pctrl->regs + CHV_INTMASK); + } + /* Clear all interrupts */ chv_writel(0xffff, pctrl->regs + CHV_INTSTAT); diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c index f77e499afddd..065f0b607373 100644 --- a/drivers/xen/balloon.c +++ b/drivers/xen/balloon.c @@ -257,10 +257,25 @@ static void release_memory_resource(struct resource *resource) kfree(resource); } +/* + * Host memory not allocated to dom0. We can use this range for hotplug-based + * ballooning. + * + * It's a type-less resource. Setting IORESOURCE_MEM will make resource + * management algorithms (arch_remove_reservations()) look into guest e820, + * which we don't want. + */ +static struct resource hostmem_resource = { + .name = "Host RAM", +}; + +void __attribute__((weak)) __init arch_xen_balloon_init(struct resource *res) +{} + static struct resource *additional_memory_resource(phys_addr_t size) { - struct resource *res; - int ret; + struct resource *res, *res_hostmem; + int ret = -ENOMEM; res = kzalloc(sizeof(*res), GFP_KERNEL); if (!res) @@ -269,13 +284,42 @@ static struct resource *additional_memory_resource(phys_addr_t size) res->name = "System RAM"; res->flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY; - ret = allocate_resource(&iomem_resource, res, - size, 0, -1, - PAGES_PER_SECTION * PAGE_SIZE, NULL, NULL); - if (ret < 0) { - pr_err("Cannot allocate new System RAM resource\n"); - kfree(res); - return NULL; + res_hostmem = kzalloc(sizeof(*res), GFP_KERNEL); + if (res_hostmem) { + /* Try to grab a range from hostmem */ + res_hostmem->name = "Host memory"; + ret = allocate_resource(&hostmem_resource, res_hostmem, + size, 0, -1, + PAGES_PER_SECTION * PAGE_SIZE, NULL, NULL); + } + + if (!ret) { + /* + * Insert this resource into iomem. Because hostmem_resource + * tracks portion of guest e820 marked as UNUSABLE noone else + * should try to use it. + */ + res->start = res_hostmem->start; + res->end = res_hostmem->end; + ret = insert_resource(&iomem_resource, res); + if (ret < 0) { + pr_err("Can't insert iomem_resource [%llx - %llx]\n", + res->start, res->end); + release_memory_resource(res_hostmem); + res_hostmem = NULL; + res->start = res->end = 0; + } + } + + if (ret) { + ret = allocate_resource(&iomem_resource, res, + size, 0, -1, + PAGES_PER_SECTION * PAGE_SIZE, NULL, NULL); + if (ret < 0) { + pr_err("Cannot allocate new System RAM resource\n"); + kfree(res); + return NULL; + } } #ifdef CONFIG_SPARSEMEM @@ -287,6 +331,7 @@ static struct resource *additional_memory_resource(phys_addr_t size) pr_err("New System RAM resource outside addressable RAM (%lu > %lu)\n", pfn, limit); release_memory_resource(res); + release_memory_resource(res_hostmem); return NULL; } } @@ -765,6 +810,8 @@ static int __init balloon_init(void) set_online_page_callback(&xen_online_page); register_memory_notifier(&xen_memory_nb); register_sysctl_table(xen_root); + + arch_xen_balloon_init(&hostmem_resource); #endif #ifdef CONFIG_XEN_PV diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c index 0da80019a917..83ed7715f856 100644 --- a/fs/xfs/libxfs/xfs_alloc.c +++ b/fs/xfs/libxfs/xfs_alloc.c @@ -702,7 +702,7 @@ xfs_alloc_ag_vextent( ASSERT(args->agbno % args->alignment == 0); /* if not file data, insert new block into the reverse map btree */ - if (args->oinfo.oi_owner != XFS_RMAP_OWN_UNKNOWN) { + if (!xfs_rmap_should_skip_owner_update(&args->oinfo)) { error = xfs_rmap_alloc(args->tp, args->agbp, args->agno, args->agbno, args->len, &args->oinfo); if (error) @@ -1682,7 +1682,7 @@ xfs_free_ag_extent( bno_cur = cnt_cur = NULL; mp = tp->t_mountp; - if (oinfo->oi_owner != XFS_RMAP_OWN_UNKNOWN) { + if (!xfs_rmap_should_skip_owner_update(oinfo)) { error = xfs_rmap_free(tp, agbp, agno, bno, len, oinfo); if (error) goto error0; diff --git a/fs/xfs/libxfs/xfs_attr.c b/fs/xfs/libxfs/xfs_attr.c index 6249c92671de..a76914db72ef 100644 --- a/fs/xfs/libxfs/xfs_attr.c +++ b/fs/xfs/libxfs/xfs_attr.c @@ -212,6 +212,7 @@ xfs_attr_set( int flags) { struct xfs_mount *mp = dp->i_mount; + struct xfs_buf *leaf_bp = NULL; struct xfs_da_args args; struct xfs_defer_ops dfops; struct xfs_trans_res tres; @@ -327,9 +328,16 @@ xfs_attr_set( * GROT: another possible req'mt for a double-split btree op. */ xfs_defer_init(args.dfops, args.firstblock); - error = xfs_attr_shortform_to_leaf(&args); + error = xfs_attr_shortform_to_leaf(&args, &leaf_bp); if (error) goto out_defer_cancel; + /* + * Prevent the leaf buffer from being unlocked so that a + * concurrent AIL push cannot grab the half-baked leaf + * buffer and run into problems with the write verifier. + */ + xfs_trans_bhold(args.trans, leaf_bp); + xfs_defer_bjoin(args.dfops, leaf_bp); xfs_defer_ijoin(args.dfops, dp); error = xfs_defer_finish(&args.trans, args.dfops); if (error) @@ -337,13 +345,14 @@ xfs_attr_set( /* * Commit the leaf transformation. We'll need another (linked) - * transaction to add the new attribute to the leaf. + * transaction to add the new attribute to the leaf, which + * means that we have to hold & join the leaf buffer here too. */ - error = xfs_trans_roll_inode(&args.trans, dp); if (error) goto out; - + xfs_trans_bjoin(args.trans, leaf_bp); + leaf_bp = NULL; } if (xfs_bmap_one_block(dp, XFS_ATTR_FORK)) @@ -374,8 +383,9 @@ xfs_attr_set( out_defer_cancel: xfs_defer_cancel(&dfops); - args.trans = NULL; out: + if (leaf_bp) + xfs_trans_brelse(args.trans, leaf_bp); if (args.trans) xfs_trans_cancel(args.trans); xfs_iunlock(dp, XFS_ILOCK_EXCL); diff --git a/fs/xfs/libxfs/xfs_attr_leaf.c b/fs/xfs/libxfs/xfs_attr_leaf.c index 53cc8b986eac..601eaa36f1ad 100644 --- a/fs/xfs/libxfs/xfs_attr_leaf.c +++ b/fs/xfs/libxfs/xfs_attr_leaf.c @@ -735,10 +735,13 @@ xfs_attr_shortform_getvalue(xfs_da_args_t *args) } /* - * Convert from using the shortform to the leaf. + * Convert from using the shortform to the leaf. On success, return the + * buffer so that we can keep it locked until we're totally done with it. */ int -xfs_attr_shortform_to_leaf(xfs_da_args_t *args) +xfs_attr_shortform_to_leaf( + struct xfs_da_args *args, + struct xfs_buf **leaf_bp) { xfs_inode_t *dp; xfs_attr_shortform_t *sf; @@ -818,7 +821,7 @@ xfs_attr_shortform_to_leaf(xfs_da_args_t *args) sfe = XFS_ATTR_SF_NEXTENTRY(sfe); } error = 0; - + *leaf_bp = bp; out: kmem_free(tmpbuffer); return error; diff --git a/fs/xfs/libxfs/xfs_attr_leaf.h b/fs/xfs/libxfs/xfs_attr_leaf.h index f7dda0c237b0..894124efb421 100644 --- a/fs/xfs/libxfs/xfs_attr_leaf.h +++ b/fs/xfs/libxfs/xfs_attr_leaf.h @@ -48,7 +48,8 @@ void xfs_attr_shortform_create(struct xfs_da_args *args); void xfs_attr_shortform_add(struct xfs_da_args *args, int forkoff); int xfs_attr_shortform_lookup(struct xfs_da_args *args); int xfs_attr_shortform_getvalue(struct xfs_da_args *args); -int xfs_attr_shortform_to_leaf(struct xfs_da_args *args); +int xfs_attr_shortform_to_leaf(struct xfs_da_args *args, + struct xfs_buf **leaf_bp); int xfs_attr_shortform_remove(struct xfs_da_args *args); int xfs_attr_shortform_allfit(struct xfs_buf *bp, struct xfs_inode *dp); int xfs_attr_shortform_bytesfit(struct xfs_inode *dp, int bytes); diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index 1210f684d3c2..1bddbba6b80c 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -5136,7 +5136,7 @@ __xfs_bunmapi( * blowing out the transaction with a mix of EFIs and reflink * adjustments. */ - if (xfs_is_reflink_inode(ip) && whichfork == XFS_DATA_FORK) + if (tp && xfs_is_reflink_inode(ip) && whichfork == XFS_DATA_FORK) max_len = min(len, xfs_refcount_max_unmap(tp->t_log_res)); else max_len = len; diff --git a/fs/xfs/libxfs/xfs_defer.c b/fs/xfs/libxfs/xfs_defer.c index 072ebfe1d6ae..087fea02c389 100644 --- a/fs/xfs/libxfs/xfs_defer.c +++ b/fs/xfs/libxfs/xfs_defer.c @@ -249,6 +249,10 @@ xfs_defer_trans_roll( for (i = 0; i < XFS_DEFER_OPS_NR_INODES && dop->dop_inodes[i]; i++) xfs_trans_log_inode(*tp, dop->dop_inodes[i], XFS_ILOG_CORE); + /* Hold the (previously bjoin'd) buffer locked across the roll. */ + for (i = 0; i < XFS_DEFER_OPS_NR_BUFS && dop->dop_bufs[i]; i++) + xfs_trans_dirty_buf(*tp, dop->dop_bufs[i]); + trace_xfs_defer_trans_roll((*tp)->t_mountp, dop); /* Roll the transaction. */ @@ -264,6 +268,12 @@ xfs_defer_trans_roll( for (i = 0; i < XFS_DEFER_OPS_NR_INODES && dop->dop_inodes[i]; i++) xfs_trans_ijoin(*tp, dop->dop_inodes[i], 0); + /* Rejoin the buffers and dirty them so the log moves forward. */ + for (i = 0; i < XFS_DEFER_OPS_NR_BUFS && dop->dop_bufs[i]; i++) { + xfs_trans_bjoin(*tp, dop->dop_bufs[i]); + xfs_trans_bhold(*tp, dop->dop_bufs[i]); + } + return error; } @@ -295,6 +305,31 @@ xfs_defer_ijoin( } } + ASSERT(0); + return -EFSCORRUPTED; +} + +/* + * Add this buffer to the deferred op. Each joined buffer is relogged + * each time we roll the transaction. + */ +int +xfs_defer_bjoin( + struct xfs_defer_ops *dop, + struct xfs_buf *bp) +{ + int i; + + for (i = 0; i < XFS_DEFER_OPS_NR_BUFS; i++) { + if (dop->dop_bufs[i] == bp) + return 0; + else if (dop->dop_bufs[i] == NULL) { + dop->dop_bufs[i] = bp; + return 0; + } + } + + ASSERT(0); return -EFSCORRUPTED; } @@ -493,9 +528,7 @@ xfs_defer_init( struct xfs_defer_ops *dop, xfs_fsblock_t *fbp) { - dop->dop_committed = false; - dop->dop_low = false; - memset(&dop->dop_inodes, 0, sizeof(dop->dop_inodes)); + memset(dop, 0, sizeof(struct xfs_defer_ops)); *fbp = NULLFSBLOCK; INIT_LIST_HEAD(&dop->dop_intake); INIT_LIST_HEAD(&dop->dop_pending); diff --git a/fs/xfs/libxfs/xfs_defer.h b/fs/xfs/libxfs/xfs_defer.h index d4f046dd44bd..045beacdd37d 100644 --- a/fs/xfs/libxfs/xfs_defer.h +++ b/fs/xfs/libxfs/xfs_defer.h @@ -59,6 +59,7 @@ enum xfs_defer_ops_type { }; #define XFS_DEFER_OPS_NR_INODES 2 /* join up to two inodes */ +#define XFS_DEFER_OPS_NR_BUFS 2 /* join up to two buffers */ struct xfs_defer_ops { bool dop_committed; /* did any trans commit? */ @@ -66,8 +67,9 @@ struct xfs_defer_ops { struct list_head dop_intake; /* unlogged pending work */ struct list_head dop_pending; /* logged pending work */ - /* relog these inodes with each roll */ + /* relog these with each roll */ struct xfs_inode *dop_inodes[XFS_DEFER_OPS_NR_INODES]; + struct xfs_buf *dop_bufs[XFS_DEFER_OPS_NR_BUFS]; }; void xfs_defer_add(struct xfs_defer_ops *dop, enum xfs_defer_ops_type type, @@ -77,6 +79,7 @@ void xfs_defer_cancel(struct xfs_defer_ops *dop); void xfs_defer_init(struct xfs_defer_ops *dop, xfs_fsblock_t *fbp); bool xfs_defer_has_unfinished_work(struct xfs_defer_ops *dop); int xfs_defer_ijoin(struct xfs_defer_ops *dop, struct xfs_inode *ip); +int xfs_defer_bjoin(struct xfs_defer_ops *dop, struct xfs_buf *bp); /* Description of a deferred type. */ struct xfs_defer_op_type { diff --git a/fs/xfs/libxfs/xfs_iext_tree.c b/fs/xfs/libxfs/xfs_iext_tree.c index 89bf16b4d937..b0f31791c7e6 100644 --- a/fs/xfs/libxfs/xfs_iext_tree.c +++ b/fs/xfs/libxfs/xfs_iext_tree.c @@ -632,8 +632,6 @@ xfs_iext_insert( struct xfs_iext_leaf *new = NULL; int nr_entries, i; - trace_xfs_iext_insert(ip, cur, state, _RET_IP_); - if (ifp->if_height == 0) xfs_iext_alloc_root(ifp, cur); else if (ifp->if_height == 1) @@ -661,6 +659,8 @@ xfs_iext_insert( xfs_iext_set(cur_rec(cur), irec); ifp->if_bytes += sizeof(struct xfs_iext_rec); + trace_xfs_iext_insert(ip, cur, state, _RET_IP_); + if (new) xfs_iext_insert_node(ifp, xfs_iext_leaf_key(new, 0), new, 2); } diff --git a/fs/xfs/libxfs/xfs_refcount.c b/fs/xfs/libxfs/xfs_refcount.c index 585b35d34142..c40d26763075 100644 --- a/fs/xfs/libxfs/xfs_refcount.c +++ b/fs/xfs/libxfs/xfs_refcount.c @@ -1488,27 +1488,12 @@ __xfs_refcount_cow_alloc( xfs_extlen_t aglen, struct xfs_defer_ops *dfops) { - int error; - trace_xfs_refcount_cow_increase(rcur->bc_mp, rcur->bc_private.a.agno, agbno, aglen); /* Add refcount btree reservation */ - error = xfs_refcount_adjust_cow(rcur, agbno, aglen, + return xfs_refcount_adjust_cow(rcur, agbno, aglen, XFS_REFCOUNT_ADJUST_COW_ALLOC, dfops); - if (error) - return error; - - /* Add rmap entry */ - if (xfs_sb_version_hasrmapbt(&rcur->bc_mp->m_sb)) { - error = xfs_rmap_alloc_extent(rcur->bc_mp, dfops, - rcur->bc_private.a.agno, - agbno, aglen, XFS_RMAP_OWN_COW); - if (error) - return error; - } - - return error; } /* @@ -1521,27 +1506,12 @@ __xfs_refcount_cow_free( xfs_extlen_t aglen, struct xfs_defer_ops *dfops) { - int error; - trace_xfs_refcount_cow_decrease(rcur->bc_mp, rcur->bc_private.a.agno, agbno, aglen); /* Remove refcount btree reservation */ - error = xfs_refcount_adjust_cow(rcur, agbno, aglen, + return xfs_refcount_adjust_cow(rcur, agbno, aglen, XFS_REFCOUNT_ADJUST_COW_FREE, dfops); - if (error) - return error; - - /* Remove rmap entry */ - if (xfs_sb_version_hasrmapbt(&rcur->bc_mp->m_sb)) { - error = xfs_rmap_free_extent(rcur->bc_mp, dfops, - rcur->bc_private.a.agno, - agbno, aglen, XFS_RMAP_OWN_COW); - if (error) - return error; - } - - return error; } /* Record a CoW staging extent in the refcount btree. */ @@ -1552,11 +1522,19 @@ xfs_refcount_alloc_cow_extent( xfs_fsblock_t fsb, xfs_extlen_t len) { + int error; + if (!xfs_sb_version_hasreflink(&mp->m_sb)) return 0; - return __xfs_refcount_add(mp, dfops, XFS_REFCOUNT_ALLOC_COW, + error = __xfs_refcount_add(mp, dfops, XFS_REFCOUNT_ALLOC_COW, fsb, len); + if (error) + return error; + + /* Add rmap entry */ + return xfs_rmap_alloc_extent(mp, dfops, XFS_FSB_TO_AGNO(mp, fsb), + XFS_FSB_TO_AGBNO(mp, fsb), len, XFS_RMAP_OWN_COW); } /* Forget a CoW staging event in the refcount btree. */ @@ -1567,9 +1545,17 @@ xfs_refcount_free_cow_extent( xfs_fsblock_t fsb, xfs_extlen_t len) { + int error; + if (!xfs_sb_version_hasreflink(&mp->m_sb)) return 0; + /* Remove rmap entry */ + error = xfs_rmap_free_extent(mp, dfops, XFS_FSB_TO_AGNO(mp, fsb), + XFS_FSB_TO_AGBNO(mp, fsb), len, XFS_RMAP_OWN_COW); + if (error) + return error; + return __xfs_refcount_add(mp, dfops, XFS_REFCOUNT_FREE_COW, fsb, len); } diff --git a/fs/xfs/libxfs/xfs_rmap.c b/fs/xfs/libxfs/xfs_rmap.c index dd019cee1b3b..50db920ceeeb 100644 --- a/fs/xfs/libxfs/xfs_rmap.c +++ b/fs/xfs/libxfs/xfs_rmap.c @@ -368,6 +368,51 @@ xfs_rmap_lookup_le_range( } /* + * Perform all the relevant owner checks for a removal op. If we're doing an + * unknown-owner removal then we have no owner information to check. + */ +static int +xfs_rmap_free_check_owner( + struct xfs_mount *mp, + uint64_t ltoff, + struct xfs_rmap_irec *rec, + xfs_fsblock_t bno, + xfs_filblks_t len, + uint64_t owner, + uint64_t offset, + unsigned int flags) +{ + int error = 0; + + if (owner == XFS_RMAP_OWN_UNKNOWN) + return 0; + + /* Make sure the unwritten flag matches. */ + XFS_WANT_CORRUPTED_GOTO(mp, (flags & XFS_RMAP_UNWRITTEN) == + (rec->rm_flags & XFS_RMAP_UNWRITTEN), out); + + /* Make sure the owner matches what we expect to find in the tree. */ + XFS_WANT_CORRUPTED_GOTO(mp, owner == rec->rm_owner, out); + + /* Check the offset, if necessary. */ + if (XFS_RMAP_NON_INODE_OWNER(owner)) + goto out; + + if (flags & XFS_RMAP_BMBT_BLOCK) { + XFS_WANT_CORRUPTED_GOTO(mp, rec->rm_flags & XFS_RMAP_BMBT_BLOCK, + out); + } else { + XFS_WANT_CORRUPTED_GOTO(mp, rec->rm_offset <= offset, out); + XFS_WANT_CORRUPTED_GOTO(mp, + ltoff + rec->rm_blockcount >= offset + len, + out); + } + +out: + return error; +} + +/* * Find the extent in the rmap btree and remove it. * * The record we find should always be an exact match for the extent that we're @@ -444,33 +489,40 @@ xfs_rmap_unmap( goto out_done; } - /* Make sure the unwritten flag matches. */ - XFS_WANT_CORRUPTED_GOTO(mp, (flags & XFS_RMAP_UNWRITTEN) == - (ltrec.rm_flags & XFS_RMAP_UNWRITTEN), out_error); + /* + * If we're doing an unknown-owner removal for EFI recovery, we expect + * to find the full range in the rmapbt or nothing at all. If we + * don't find any rmaps overlapping either end of the range, we're + * done. Hopefully this means that the EFI creator already queued + * (and finished) a RUI to remove the rmap. + */ + if (owner == XFS_RMAP_OWN_UNKNOWN && + ltrec.rm_startblock + ltrec.rm_blockcount <= bno) { + struct xfs_rmap_irec rtrec; + + error = xfs_btree_increment(cur, 0, &i); + if (error) + goto out_error; + if (i == 0) + goto out_done; + error = xfs_rmap_get_rec(cur, &rtrec, &i); + if (error) + goto out_error; + XFS_WANT_CORRUPTED_GOTO(mp, i == 1, out_error); + if (rtrec.rm_startblock >= bno + len) + goto out_done; + } /* Make sure the extent we found covers the entire freeing range. */ XFS_WANT_CORRUPTED_GOTO(mp, ltrec.rm_startblock <= bno && - ltrec.rm_startblock + ltrec.rm_blockcount >= - bno + len, out_error); + ltrec.rm_startblock + ltrec.rm_blockcount >= + bno + len, out_error); - /* Make sure the owner matches what we expect to find in the tree. */ - XFS_WANT_CORRUPTED_GOTO(mp, owner == ltrec.rm_owner || - XFS_RMAP_NON_INODE_OWNER(owner), out_error); - - /* Check the offset, if necessary. */ - if (!XFS_RMAP_NON_INODE_OWNER(owner)) { - if (flags & XFS_RMAP_BMBT_BLOCK) { - XFS_WANT_CORRUPTED_GOTO(mp, - ltrec.rm_flags & XFS_RMAP_BMBT_BLOCK, - out_error); - } else { - XFS_WANT_CORRUPTED_GOTO(mp, - ltrec.rm_offset <= offset, out_error); - XFS_WANT_CORRUPTED_GOTO(mp, - ltoff + ltrec.rm_blockcount >= offset + len, - out_error); - } - } + /* Check owner information. */ + error = xfs_rmap_free_check_owner(mp, ltoff, <rec, bno, len, owner, + offset, flags); + if (error) + goto out_error; if (ltrec.rm_startblock == bno && ltrec.rm_blockcount == len) { /* exact match, simply remove the record from rmap tree */ @@ -664,6 +716,7 @@ xfs_rmap_map( flags |= XFS_RMAP_UNWRITTEN; trace_xfs_rmap_map(mp, cur->bc_private.a.agno, bno, len, unwritten, oinfo); + ASSERT(!xfs_rmap_should_skip_owner_update(oinfo)); /* * For the initial lookup, look for an exact match or the left-adjacent diff --git a/fs/xfs/libxfs/xfs_rmap.h b/fs/xfs/libxfs/xfs_rmap.h index 466ede637080..0fcd5b1ba729 100644 --- a/fs/xfs/libxfs/xfs_rmap.h +++ b/fs/xfs/libxfs/xfs_rmap.h @@ -61,7 +61,21 @@ static inline void xfs_rmap_skip_owner_update( struct xfs_owner_info *oi) { - oi->oi_owner = XFS_RMAP_OWN_UNKNOWN; + xfs_rmap_ag_owner(oi, XFS_RMAP_OWN_NULL); +} + +static inline bool +xfs_rmap_should_skip_owner_update( + struct xfs_owner_info *oi) +{ + return oi->oi_owner == XFS_RMAP_OWN_NULL; +} + +static inline void +xfs_rmap_any_owner_update( + struct xfs_owner_info *oi) +{ + xfs_rmap_ag_owner(oi, XFS_RMAP_OWN_UNKNOWN); } /* Reverse mapping functions. */ diff --git a/fs/xfs/xfs_extfree_item.c b/fs/xfs/xfs_extfree_item.c index 44f8c5451210..64da90655e95 100644 --- a/fs/xfs/xfs_extfree_item.c +++ b/fs/xfs/xfs_extfree_item.c @@ -538,7 +538,7 @@ xfs_efi_recover( return error; efdp = xfs_trans_get_efd(tp, efip, efip->efi_format.efi_nextents); - xfs_rmap_skip_owner_update(&oinfo); + xfs_rmap_any_owner_update(&oinfo); for (i = 0; i < efip->efi_format.efi_nextents; i++) { extp = &efip->efi_format.efi_extents[i]; error = xfs_trans_free_extent(tp, efdp, extp->ext_start, diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c index 8f22fc579dbb..60a2e128cb6a 100644 --- a/fs/xfs/xfs_fsops.c +++ b/fs/xfs/xfs_fsops.c @@ -571,6 +571,11 @@ xfs_growfs_data_private( * this doesn't actually exist in the rmap btree. */ xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_NULL); + error = xfs_rmap_free(tp, bp, agno, + be32_to_cpu(agf->agf_length) - new, + new, &oinfo); + if (error) + goto error0; error = xfs_free_extent(tp, XFS_AGB_TO_FSB(mp, agno, be32_to_cpu(agf->agf_length) - new), diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c index 43005fbe8b1e..3861d61fb265 100644 --- a/fs/xfs/xfs_icache.c +++ b/fs/xfs/xfs_icache.c @@ -870,7 +870,7 @@ xfs_eofblocks_worker( * based on the 'speculative_cow_prealloc_lifetime' tunable (5m by default). * (We'll just piggyback on the post-EOF prealloc space workqueue.) */ -STATIC void +void xfs_queue_cowblocks( struct xfs_mount *mp) { @@ -1536,8 +1536,23 @@ xfs_inode_free_quota_eofblocks( return __xfs_inode_free_quota_eofblocks(ip, xfs_icache_free_eofblocks); } +static inline unsigned long +xfs_iflag_for_tag( + int tag) +{ + switch (tag) { + case XFS_ICI_EOFBLOCKS_TAG: + return XFS_IEOFBLOCKS; + case XFS_ICI_COWBLOCKS_TAG: + return XFS_ICOWBLOCKS; + default: + ASSERT(0); + return 0; + } +} + static void -__xfs_inode_set_eofblocks_tag( +__xfs_inode_set_blocks_tag( xfs_inode_t *ip, void (*execute)(struct xfs_mount *mp), void (*set_tp)(struct xfs_mount *mp, xfs_agnumber_t agno, @@ -1552,10 +1567,10 @@ __xfs_inode_set_eofblocks_tag( * Don't bother locking the AG and looking up in the radix trees * if we already know that we have the tag set. */ - if (ip->i_flags & XFS_IEOFBLOCKS) + if (ip->i_flags & xfs_iflag_for_tag(tag)) return; spin_lock(&ip->i_flags_lock); - ip->i_flags |= XFS_IEOFBLOCKS; + ip->i_flags |= xfs_iflag_for_tag(tag); spin_unlock(&ip->i_flags_lock); pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino)); @@ -1587,13 +1602,13 @@ xfs_inode_set_eofblocks_tag( xfs_inode_t *ip) { trace_xfs_inode_set_eofblocks_tag(ip); - return __xfs_inode_set_eofblocks_tag(ip, xfs_queue_eofblocks, + return __xfs_inode_set_blocks_tag(ip, xfs_queue_eofblocks, trace_xfs_perag_set_eofblocks, XFS_ICI_EOFBLOCKS_TAG); } static void -__xfs_inode_clear_eofblocks_tag( +__xfs_inode_clear_blocks_tag( xfs_inode_t *ip, void (*clear_tp)(struct xfs_mount *mp, xfs_agnumber_t agno, int error, unsigned long caller_ip), @@ -1603,7 +1618,7 @@ __xfs_inode_clear_eofblocks_tag( struct xfs_perag *pag; spin_lock(&ip->i_flags_lock); - ip->i_flags &= ~XFS_IEOFBLOCKS; + ip->i_flags &= ~xfs_iflag_for_tag(tag); spin_unlock(&ip->i_flags_lock); pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino)); @@ -1630,7 +1645,7 @@ xfs_inode_clear_eofblocks_tag( xfs_inode_t *ip) { trace_xfs_inode_clear_eofblocks_tag(ip); - return __xfs_inode_clear_eofblocks_tag(ip, + return __xfs_inode_clear_blocks_tag(ip, trace_xfs_perag_clear_eofblocks, XFS_ICI_EOFBLOCKS_TAG); } @@ -1724,7 +1739,7 @@ xfs_inode_set_cowblocks_tag( xfs_inode_t *ip) { trace_xfs_inode_set_cowblocks_tag(ip); - return __xfs_inode_set_eofblocks_tag(ip, xfs_queue_cowblocks, + return __xfs_inode_set_blocks_tag(ip, xfs_queue_cowblocks, trace_xfs_perag_set_cowblocks, XFS_ICI_COWBLOCKS_TAG); } @@ -1734,6 +1749,6 @@ xfs_inode_clear_cowblocks_tag( xfs_inode_t *ip) { trace_xfs_inode_clear_cowblocks_tag(ip); - return __xfs_inode_clear_eofblocks_tag(ip, + return __xfs_inode_clear_blocks_tag(ip, trace_xfs_perag_clear_cowblocks, XFS_ICI_COWBLOCKS_TAG); } diff --git a/fs/xfs/xfs_icache.h b/fs/xfs/xfs_icache.h index bff4d85e5498..d4a77588eca1 100644 --- a/fs/xfs/xfs_icache.h +++ b/fs/xfs/xfs_icache.h @@ -81,6 +81,7 @@ void xfs_inode_clear_cowblocks_tag(struct xfs_inode *ip); int xfs_icache_free_cowblocks(struct xfs_mount *, struct xfs_eofblocks *); int xfs_inode_free_quota_cowblocks(struct xfs_inode *ip); void xfs_cowblocks_worker(struct work_struct *); +void xfs_queue_cowblocks(struct xfs_mount *); int xfs_inode_ag_iterator(struct xfs_mount *mp, int (*execute)(struct xfs_inode *ip, int flags, void *args), diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index b41952a4ddd8..6f95bdb408ce 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -1487,6 +1487,24 @@ xfs_link( return error; } +/* Clear the reflink flag and the cowblocks tag if possible. */ +static void +xfs_itruncate_clear_reflink_flags( + struct xfs_inode *ip) +{ + struct xfs_ifork *dfork; + struct xfs_ifork *cfork; + + if (!xfs_is_reflink_inode(ip)) + return; + dfork = XFS_IFORK_PTR(ip, XFS_DATA_FORK); + cfork = XFS_IFORK_PTR(ip, XFS_COW_FORK); + if (dfork->if_bytes == 0 && cfork->if_bytes == 0) + ip->i_d.di_flags2 &= ~XFS_DIFLAG2_REFLINK; + if (cfork->if_bytes == 0) + xfs_inode_clear_cowblocks_tag(ip); +} + /* * Free up the underlying blocks past new_size. The new size must be smaller * than the current size. This routine can be used both for the attribute and @@ -1583,15 +1601,7 @@ xfs_itruncate_extents( if (error) goto out; - /* - * Clear the reflink flag if there are no data fork blocks and - * there are no extents staged in the cow fork. - */ - if (xfs_is_reflink_inode(ip) && ip->i_cnextents == 0) { - if (ip->i_d.di_nblocks == 0) - ip->i_d.di_flags2 &= ~XFS_DIFLAG2_REFLINK; - xfs_inode_clear_cowblocks_tag(ip); - } + xfs_itruncate_clear_reflink_flags(ip); /* * Always re-log the inode so that our permanent transaction can keep diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index b2136af9289f..d383e392ec9d 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h @@ -232,6 +232,7 @@ static inline bool xfs_is_reflink_inode(struct xfs_inode *ip) * log recovery to replay a bmap operation on the inode. */ #define XFS_IRECOVERY (1 << 11) +#define XFS_ICOWBLOCKS (1 << 12)/* has the cowblocks tag set */ /* * Per-lifetime flags need to be reset when re-using a reclaimable inode during diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c index cf7c8f81bebb..47aea2e82c26 100644 --- a/fs/xfs/xfs_reflink.c +++ b/fs/xfs/xfs_reflink.c @@ -454,6 +454,8 @@ retry: if (error) goto out_bmap_cancel; + xfs_inode_set_cowblocks_tag(ip); + /* Finish up. */ error = xfs_defer_finish(&tp, &dfops); if (error) @@ -490,8 +492,9 @@ xfs_reflink_find_cow_mapping( struct xfs_iext_cursor icur; ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL | XFS_ILOCK_SHARED)); - ASSERT(xfs_is_reflink_inode(ip)); + if (!xfs_is_reflink_inode(ip)) + return false; offset_fsb = XFS_B_TO_FSBT(ip->i_mount, offset); if (!xfs_iext_lookup_extent(ip, ifp, offset_fsb, &icur, &got)) return false; @@ -610,6 +613,9 @@ xfs_reflink_cancel_cow_blocks( /* Remove the mapping from the CoW fork. */ xfs_bmap_del_extent_cow(ip, &icur, &got, &del); + } else { + /* Didn't do anything, push cursor back. */ + xfs_iext_prev(ifp, &icur); } next_extent: if (!xfs_iext_get_extent(ifp, &icur, &got)) @@ -725,7 +731,7 @@ xfs_reflink_end_cow( (unsigned int)(end_fsb - offset_fsb), XFS_DATA_FORK); error = xfs_trans_alloc(ip->i_mount, &M_RES(ip->i_mount)->tr_write, - resblks, 0, 0, &tp); + resblks, 0, XFS_TRANS_RESERVE, &tp); if (error) goto out; @@ -1291,6 +1297,17 @@ xfs_reflink_remap_range( trace_xfs_reflink_remap_range(src, pos_in, len, dest, pos_out); + /* + * Clear out post-eof preallocations because we don't have page cache + * backing the delayed allocations and they'll never get freed on + * their own. + */ + if (xfs_can_free_eofblocks(dest, true)) { + ret = xfs_free_eofblocks(dest); + if (ret) + goto out_unlock; + } + /* Set flags and remap blocks. */ ret = xfs_reflink_set_inode_flag(src, dest); if (ret) diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index 5122d3021117..1dacccc367f8 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -1360,6 +1360,7 @@ xfs_fs_remount( xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); return error; } + xfs_queue_cowblocks(mp); /* Create the per-AG metadata reservation pool .*/ error = xfs_fs_reserve_ag_blocks(mp); @@ -1369,6 +1370,14 @@ xfs_fs_remount( /* rw -> ro */ if (!(mp->m_flags & XFS_MOUNT_RDONLY) && (*flags & SB_RDONLY)) { + /* Get rid of any leftover CoW reservations... */ + cancel_delayed_work_sync(&mp->m_cowblocks_work); + error = xfs_icache_free_cowblocks(mp, NULL); + if (error) { + xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); + return error; + } + /* Free the per-AG metadata reservation pool. */ error = xfs_fs_unreserve_ag_blocks(mp); if (error) { diff --git a/include/asm-generic/mm_hooks.h b/include/asm-generic/mm_hooks.h index ea189d88a3cc..8ac4e68a12f0 100644 --- a/include/asm-generic/mm_hooks.h +++ b/include/asm-generic/mm_hooks.h @@ -7,9 +7,10 @@ #ifndef _ASM_GENERIC_MM_HOOKS_H #define _ASM_GENERIC_MM_HOOKS_H -static inline void arch_dup_mmap(struct mm_struct *oldmm, - struct mm_struct *mm) +static inline int arch_dup_mmap(struct mm_struct *oldmm, + struct mm_struct *mm) { + return 0; } static inline void arch_exit_mmap(struct mm_struct *mm) diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h index b234d54f2cb6..868e68561f91 100644 --- a/include/asm-generic/pgtable.h +++ b/include/asm-generic/pgtable.h @@ -1025,6 +1025,11 @@ static inline int pmd_clear_huge(pmd_t *pmd) struct file; int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn, unsigned long size, pgprot_t *vma_prot); + +#ifndef CONFIG_X86_ESPFIX64 +static inline void init_espfix_bsp(void) { } +#endif + #endif /* !__ASSEMBLY__ */ #ifndef io_remap_pfn_range diff --git a/include/crypto/mcryptd.h b/include/crypto/mcryptd.h index cceafa01f907..b67404fc4b34 100644 --- a/include/crypto/mcryptd.h +++ b/include/crypto/mcryptd.h @@ -27,6 +27,7 @@ static inline struct mcryptd_ahash *__mcryptd_ahash_cast( struct mcryptd_cpu_queue { struct crypto_queue queue; + spinlock_t q_lock; struct work_struct work; }; diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 57b109c6e422..1f509d072026 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -1165,6 +1165,10 @@ int mlx5_cmd_create_vport_lag(struct mlx5_core_dev *dev); int mlx5_cmd_destroy_vport_lag(struct mlx5_core_dev *dev); bool mlx5_lag_is_active(struct mlx5_core_dev *dev); struct net_device *mlx5_lag_get_roce_netdev(struct mlx5_core_dev *dev); +int mlx5_lag_query_cong_counters(struct mlx5_core_dev *dev, + u64 *values, + int num_counters, + size_t *offsets); struct mlx5_uars_page *mlx5_get_uars_page(struct mlx5_core_dev *mdev); void mlx5_put_uars_page(struct mlx5_core_dev *mdev, struct mlx5_uars_page *up); diff --git a/include/net/sock.h b/include/net/sock.h index 6c1db823f8b9..66fd3951e6f3 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1515,6 +1515,11 @@ static inline bool sock_owned_by_user(const struct sock *sk) return sk->sk_lock.owned; } +static inline bool sock_owned_by_user_nocheck(const struct sock *sk) +{ + return sk->sk_lock.owned; +} + /* no reclassification while locks are held */ static inline bool sock_allow_reclassification(const struct sock *csk) { diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 079ea9455bcd..2e6d4fe6b0ba 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -1601,6 +1601,9 @@ int xfrm_init_state(struct xfrm_state *x); int xfrm_prepare_input(struct xfrm_state *x, struct sk_buff *skb); int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type); int xfrm_input_resume(struct sk_buff *skb, int nexthdr); +int xfrm_trans_queue(struct sk_buff *skb, + int (*finish)(struct net *, struct sock *, + struct sk_buff *)); int xfrm_output_resume(struct sk_buff *skb, int err); int xfrm_output(struct sock *sk, struct sk_buff *skb); int xfrm_inner_extract_output(struct xfrm_state *x, struct sk_buff *skb); diff --git a/include/trace/events/clk.h b/include/trace/events/clk.h index 758607226bfd..2cd449328aee 100644 --- a/include/trace/events/clk.h +++ b/include/trace/events/clk.h @@ -134,12 +134,12 @@ DECLARE_EVENT_CLASS(clk_parent, TP_STRUCT__entry( __string( name, core->name ) - __string( pname, parent->name ) + __string( pname, parent ? parent->name : "none" ) ), TP_fast_assign( __assign_str(name, core->name); - __assign_str(pname, parent->name); + __assign_str(pname, parent ? parent->name : "none"); ), TP_printk("%s %s", __get_str(name), __get_str(pname)) diff --git a/include/trace/events/tcp.h b/include/trace/events/tcp.h index 8e88a1671538..bb00459d2d4d 100644 --- a/include/trace/events/tcp.h +++ b/include/trace/events/tcp.h @@ -9,6 +9,35 @@ #include <linux/tracepoint.h> #include <net/ipv6.h> +#define TP_STORE_V4MAPPED(__entry, saddr, daddr) \ + do { \ + struct in6_addr *pin6; \ + \ + pin6 = (struct in6_addr *)__entry->saddr_v6; \ + ipv6_addr_set_v4mapped(saddr, pin6); \ + pin6 = (struct in6_addr *)__entry->daddr_v6; \ + ipv6_addr_set_v4mapped(daddr, pin6); \ + } while (0) + +#if IS_ENABLED(CONFIG_IPV6) +#define TP_STORE_ADDRS(__entry, saddr, daddr, saddr6, daddr6) \ + do { \ + if (sk->sk_family == AF_INET6) { \ + struct in6_addr *pin6; \ + \ + pin6 = (struct in6_addr *)__entry->saddr_v6; \ + *pin6 = saddr6; \ + pin6 = (struct in6_addr *)__entry->daddr_v6; \ + *pin6 = daddr6; \ + } else { \ + TP_STORE_V4MAPPED(__entry, saddr, daddr); \ + } \ + } while (0) +#else +#define TP_STORE_ADDRS(__entry, saddr, daddr, saddr6, daddr6) \ + TP_STORE_V4MAPPED(__entry, saddr, daddr) +#endif + /* * tcp event with arguments sk and skb * @@ -34,7 +63,6 @@ DECLARE_EVENT_CLASS(tcp_event_sk_skb, TP_fast_assign( struct inet_sock *inet = inet_sk(sk); - struct in6_addr *pin6; __be32 *p32; __entry->skbaddr = skb; @@ -49,20 +77,8 @@ DECLARE_EVENT_CLASS(tcp_event_sk_skb, p32 = (__be32 *) __entry->daddr; *p32 = inet->inet_daddr; -#if IS_ENABLED(CONFIG_IPV6) - if (sk->sk_family == AF_INET6) { - pin6 = (struct in6_addr *)__entry->saddr_v6; - *pin6 = sk->sk_v6_rcv_saddr; - pin6 = (struct in6_addr *)__entry->daddr_v6; - *pin6 = sk->sk_v6_daddr; - } else -#endif - { - pin6 = (struct in6_addr *)__entry->saddr_v6; - ipv6_addr_set_v4mapped(inet->inet_saddr, pin6); - pin6 = (struct in6_addr *)__entry->daddr_v6; - ipv6_addr_set_v4mapped(inet->inet_daddr, pin6); - } + TP_STORE_ADDRS(__entry, inet->inet_saddr, inet->inet_daddr, + sk->sk_v6_rcv_saddr, sk->sk_v6_daddr); ), TP_printk("sport=%hu dport=%hu saddr=%pI4 daddr=%pI4 saddrv6=%pI6c daddrv6=%pI6c", @@ -111,7 +127,6 @@ DECLARE_EVENT_CLASS(tcp_event_sk, TP_fast_assign( struct inet_sock *inet = inet_sk(sk); - struct in6_addr *pin6; __be32 *p32; __entry->skaddr = sk; @@ -125,20 +140,8 @@ DECLARE_EVENT_CLASS(tcp_event_sk, p32 = (__be32 *) __entry->daddr; *p32 = inet->inet_daddr; -#if IS_ENABLED(CONFIG_IPV6) - if (sk->sk_family == AF_INET6) { - pin6 = (struct in6_addr *)__entry->saddr_v6; - *pin6 = sk->sk_v6_rcv_saddr; - pin6 = (struct in6_addr *)__entry->daddr_v6; - *pin6 = sk->sk_v6_daddr; - } else -#endif - { - pin6 = (struct in6_addr *)__entry->saddr_v6; - ipv6_addr_set_v4mapped(inet->inet_saddr, pin6); - pin6 = (struct in6_addr *)__entry->daddr_v6; - ipv6_addr_set_v4mapped(inet->inet_daddr, pin6); - } + TP_STORE_ADDRS(__entry, inet->inet_saddr, inet->inet_daddr, + sk->sk_v6_rcv_saddr, sk->sk_v6_daddr); ), TP_printk("sport=%hu dport=%hu saddr=%pI4 daddr=%pI4 saddrv6=%pI6c daddrv6=%pI6c", @@ -181,7 +184,6 @@ TRACE_EVENT(tcp_set_state, TP_fast_assign( struct inet_sock *inet = inet_sk(sk); - struct in6_addr *pin6; __be32 *p32; __entry->skaddr = sk; @@ -197,20 +199,8 @@ TRACE_EVENT(tcp_set_state, p32 = (__be32 *) __entry->daddr; *p32 = inet->inet_daddr; -#if IS_ENABLED(CONFIG_IPV6) - if (sk->sk_family == AF_INET6) { - pin6 = (struct in6_addr *)__entry->saddr_v6; - *pin6 = sk->sk_v6_rcv_saddr; - pin6 = (struct in6_addr *)__entry->daddr_v6; - *pin6 = sk->sk_v6_daddr; - } else -#endif - { - pin6 = (struct in6_addr *)__entry->saddr_v6; - ipv6_addr_set_v4mapped(inet->inet_saddr, pin6); - pin6 = (struct in6_addr *)__entry->daddr_v6; - ipv6_addr_set_v4mapped(inet->inet_daddr, pin6); - } + TP_STORE_ADDRS(__entry, inet->inet_saddr, inet->inet_daddr, + sk->sk_v6_rcv_saddr, sk->sk_v6_daddr); ), TP_printk("sport=%hu dport=%hu saddr=%pI4 daddr=%pI4 saddrv6=%pI6c daddrv6=%pI6c oldstate=%s newstate=%s", @@ -240,7 +230,6 @@ TRACE_EVENT(tcp_retransmit_synack, TP_fast_assign( struct inet_request_sock *ireq = inet_rsk(req); - struct in6_addr *pin6; __be32 *p32; __entry->skaddr = sk; @@ -255,20 +244,8 @@ TRACE_EVENT(tcp_retransmit_synack, p32 = (__be32 *) __entry->daddr; *p32 = ireq->ir_rmt_addr; -#if IS_ENABLED(CONFIG_IPV6) - if (sk->sk_family == AF_INET6) { - pin6 = (struct in6_addr *)__entry->saddr_v6; - *pin6 = ireq->ir_v6_loc_addr; - pin6 = (struct in6_addr *)__entry->daddr_v6; - *pin6 = ireq->ir_v6_rmt_addr; - } else -#endif - { - pin6 = (struct in6_addr *)__entry->saddr_v6; - ipv6_addr_set_v4mapped(ireq->ir_loc_addr, pin6); - pin6 = (struct in6_addr *)__entry->daddr_v6; - ipv6_addr_set_v4mapped(ireq->ir_rmt_addr, pin6); - } + TP_STORE_ADDRS(__entry, ireq->ir_loc_addr, ireq->ir_rmt_addr, + ireq->ir_v6_loc_addr, ireq->ir_v6_rmt_addr); ), TP_printk("sport=%hu dport=%hu saddr=%pI4 daddr=%pI4 saddrv6=%pI6c daddrv6=%pI6c", diff --git a/include/xen/balloon.h b/include/xen/balloon.h index 4914b93a23f2..61f410fd74e4 100644 --- a/include/xen/balloon.h +++ b/include/xen/balloon.h @@ -44,3 +44,8 @@ static inline void xen_balloon_init(void) { } #endif + +#ifdef CONFIG_XEN_BALLOON_MEMORY_HOTPLUG +struct resource; +void arch_xen_balloon_init(struct resource *hostmem_resource); +#endif diff --git a/init/main.c b/init/main.c index e96e3a14533c..7b606fc48482 100644 --- a/init/main.c +++ b/init/main.c @@ -504,6 +504,8 @@ static void __init mm_init(void) pgtable_init(); vmalloc_init(); ioremap_huge_init(); + /* Should be run before the first non-init thread is created */ + init_espfix_bsp(); } asmlinkage __visible void __init start_kernel(void) @@ -679,10 +681,6 @@ asmlinkage __visible void __init start_kernel(void) if (efi_enabled(EFI_RUNTIME_SERVICES)) efi_enter_virtual_mode(); #endif -#ifdef CONFIG_X86_ESPFIX64 - /* Should be run before the first non-init thread is created */ - init_espfix_bsp(); -#endif thread_stack_cache_init(); cred_init(); fork_init(); diff --git a/kernel/fork.c b/kernel/fork.c index 432eadf6b58c..2295fc69717f 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -721,8 +721,7 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm, goto out; } /* a new mm has just been created */ - arch_dup_mmap(oldmm, mm); - retval = 0; + retval = arch_dup_mmap(oldmm, mm); out: up_write(&mm->mmap_sem); flush_tlb_mm(oldmm); diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index c87766c1c204..9ab18995ff1e 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -280,6 +280,8 @@ EXPORT_SYMBOL_GPL(ring_buffer_event_data); /* Missed count stored at end */ #define RB_MISSED_STORED (1 << 30) +#define RB_MISSED_FLAGS (RB_MISSED_EVENTS|RB_MISSED_STORED) + struct buffer_data_page { u64 time_stamp; /* page time stamp */ local_t commit; /* write committed index */ @@ -331,7 +333,9 @@ static void rb_init_page(struct buffer_data_page *bpage) */ size_t ring_buffer_page_len(void *page) { - return local_read(&((struct buffer_data_page *)page)->commit) + struct buffer_data_page *bpage = page; + + return (local_read(&bpage->commit) & ~RB_MISSED_FLAGS) + BUF_PAGE_HDR_SIZE; } @@ -4400,8 +4404,13 @@ void ring_buffer_free_read_page(struct ring_buffer *buffer, int cpu, void *data) { struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu]; struct buffer_data_page *bpage = data; + struct page *page = virt_to_page(bpage); unsigned long flags; + /* If the page is still in use someplace else, we can't reuse it */ + if (page_ref_count(page) > 1) + goto out; + local_irq_save(flags); arch_spin_lock(&cpu_buffer->lock); @@ -4413,6 +4422,7 @@ void ring_buffer_free_read_page(struct ring_buffer *buffer, int cpu, void *data) arch_spin_unlock(&cpu_buffer->lock); local_irq_restore(flags); + out: free_page((unsigned long)bpage); } EXPORT_SYMBOL_GPL(ring_buffer_free_read_page); diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 59518b8126d0..2a8d8a294345 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -6769,7 +6769,7 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos, .spd_release = buffer_spd_release, }; struct buffer_ref *ref; - int entries, size, i; + int entries, i; ssize_t ret = 0; #ifdef CONFIG_TRACER_MAX_TRACE @@ -6823,14 +6823,6 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos, break; } - /* - * zero out any left over data, this is going to - * user land. - */ - size = ring_buffer_page_len(ref->page); - if (size < PAGE_SIZE) - memset(ref->page + size, 0, PAGE_SIZE - size); - page = virt_to_page(ref->page); spd.pages[i] = page; @@ -7588,6 +7580,7 @@ allocate_trace_buffer(struct trace_array *tr, struct trace_buffer *buf, int size buf->data = alloc_percpu(struct trace_array_cpu); if (!buf->data) { ring_buffer_free(buf->buffer); + buf->buffer = NULL; return -ENOMEM; } @@ -7611,7 +7604,9 @@ static int allocate_trace_buffers(struct trace_array *tr, int size) allocate_snapshot ? size : 1); if (WARN_ON(ret)) { ring_buffer_free(tr->trace_buffer.buffer); + tr->trace_buffer.buffer = NULL; free_percpu(tr->trace_buffer.data); + tr->trace_buffer.data = NULL; return -ENOMEM; } tr->allocated_snapshot = allocate_snapshot; diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 00b0757830e2..01e8285aea73 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -1177,12 +1177,12 @@ int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask) int i, new_frags; u32 d_off; - if (!num_frags) - goto release; - if (skb_shared(skb) || skb_unclone(skb, gfp_mask)) return -EINVAL; + if (!num_frags) + goto release; + new_frags = (__skb_pagelen(skb) + PAGE_SIZE - 1) >> PAGE_SHIFT; for (i = 0; i < new_frags; i++) { page = alloc_page(gfp_mask); diff --git a/net/ipv4/xfrm4_input.c b/net/ipv4/xfrm4_input.c index e50b7fea57ee..bcfc00e88756 100644 --- a/net/ipv4/xfrm4_input.c +++ b/net/ipv4/xfrm4_input.c @@ -23,6 +23,12 @@ int xfrm4_extract_input(struct xfrm_state *x, struct sk_buff *skb) return xfrm4_extract_header(skb); } +static int xfrm4_rcv_encap_finish2(struct net *net, struct sock *sk, + struct sk_buff *skb) +{ + return dst_input(skb); +} + static inline int xfrm4_rcv_encap_finish(struct net *net, struct sock *sk, struct sk_buff *skb) { @@ -33,7 +39,11 @@ static inline int xfrm4_rcv_encap_finish(struct net *net, struct sock *sk, iph->tos, skb->dev)) goto drop; } - return dst_input(skb); + + if (xfrm_trans_queue(skb, xfrm4_rcv_encap_finish2)) + goto drop; + + return 0; drop: kfree_skb(skb); return NET_RX_DROP; diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index b345b7e484c5..db99446e0276 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -1333,6 +1333,36 @@ static void ip6gre_tunnel_setup(struct net_device *dev) eth_random_addr(dev->perm_addr); } +#define GRE6_FEATURES (NETIF_F_SG | \ + NETIF_F_FRAGLIST | \ + NETIF_F_HIGHDMA | \ + NETIF_F_HW_CSUM) + +static void ip6gre_tnl_init_features(struct net_device *dev) +{ + struct ip6_tnl *nt = netdev_priv(dev); + + dev->features |= GRE6_FEATURES; + dev->hw_features |= GRE6_FEATURES; + + if (!(nt->parms.o_flags & TUNNEL_SEQ)) { + /* TCP offload with GRE SEQ is not supported, nor + * can we support 2 levels of outer headers requiring + * an update. + */ + if (!(nt->parms.o_flags & TUNNEL_CSUM) || + nt->encap.type == TUNNEL_ENCAP_NONE) { + dev->features |= NETIF_F_GSO_SOFTWARE; + dev->hw_features |= NETIF_F_GSO_SOFTWARE; + } + + /* Can use a lockless transmit, unless we generate + * output sequences + */ + dev->features |= NETIF_F_LLTX; + } +} + static int ip6gre_tunnel_init_common(struct net_device *dev) { struct ip6_tnl *tunnel; @@ -1371,6 +1401,7 @@ static int ip6gre_tunnel_init_common(struct net_device *dev) dev->features |= NETIF_F_NETNS_LOCAL; netif_keep_dst(dev); } + ip6gre_tnl_init_features(dev); return 0; } @@ -1705,11 +1736,6 @@ static const struct net_device_ops ip6gre_tap_netdev_ops = { .ndo_get_iflink = ip6_tnl_get_iflink, }; -#define GRE6_FEATURES (NETIF_F_SG | \ - NETIF_F_FRAGLIST | \ - NETIF_F_HIGHDMA | \ - NETIF_F_HW_CSUM) - static int ip6erspan_tap_init(struct net_device *dev) { struct ip6_tnl *tunnel; @@ -1848,26 +1874,6 @@ static int ip6gre_newlink(struct net *src_net, struct net_device *dev, nt->net = dev_net(dev); ip6gre_tnl_link_config(nt, !tb[IFLA_MTU]); - dev->features |= GRE6_FEATURES; - dev->hw_features |= GRE6_FEATURES; - - if (!(nt->parms.o_flags & TUNNEL_SEQ)) { - /* TCP offload with GRE SEQ is not supported, nor - * can we support 2 levels of outer headers requiring - * an update. - */ - if (!(nt->parms.o_flags & TUNNEL_CSUM) || - (nt->encap.type == TUNNEL_ENCAP_NONE)) { - dev->features |= NETIF_F_GSO_SOFTWARE; - dev->hw_features |= NETIF_F_GSO_SOFTWARE; - } - - /* Can use a lockless transmit, unless we generate - * output sequences - */ - dev->features |= NETIF_F_LLTX; - } - err = register_netdevice(dev); if (err) goto out; diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c index fe04e23af986..841f4a07438e 100644 --- a/net/ipv6/xfrm6_input.c +++ b/net/ipv6/xfrm6_input.c @@ -32,6 +32,14 @@ int xfrm6_rcv_spi(struct sk_buff *skb, int nexthdr, __be32 spi, } EXPORT_SYMBOL(xfrm6_rcv_spi); +static int xfrm6_transport_finish2(struct net *net, struct sock *sk, + struct sk_buff *skb) +{ + if (xfrm_trans_queue(skb, ip6_rcv_finish)) + __kfree_skb(skb); + return -1; +} + int xfrm6_transport_finish(struct sk_buff *skb, int async) { struct xfrm_offload *xo = xfrm_offload(skb); @@ -56,7 +64,7 @@ int xfrm6_transport_finish(struct sk_buff *skb, int async) NF_HOOK(NFPROTO_IPV6, NF_INET_PRE_ROUTING, dev_net(skb->dev), NULL, skb, skb->dev, NULL, - ip6_rcv_finish); + xfrm6_transport_finish2); return -1; } diff --git a/net/rds/send.c b/net/rds/send.c index b52cdc8ae428..f72466c63f0c 100644 --- a/net/rds/send.c +++ b/net/rds/send.c @@ -1009,6 +1009,9 @@ static int rds_rdma_bytes(struct msghdr *msg, size_t *rdma_bytes) continue; if (cmsg->cmsg_type == RDS_CMSG_RDMA_ARGS) { + if (cmsg->cmsg_len < + CMSG_LEN(sizeof(struct rds_rdma_args))) + return -EINVAL; args = CMSG_DATA(cmsg); *rdma_bytes += args->remote_vec.bytes; } diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 4591b87eaab5..6708b6953bfa 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -373,6 +373,8 @@ void tcf_block_put(struct tcf_block *block) { struct tcf_block_ext_info ei = {0, }; + if (!block) + return; tcf_block_put_ext(block, block->q, &ei); } diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 28b2a7964133..cc069b2acf0e 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -1261,6 +1261,8 @@ void mini_qdisc_pair_swap(struct mini_Qdisc_pair *miniqp, if (!tp_head) { RCU_INIT_POINTER(*miniqp->p_miniq, NULL); + /* Wait for flying RCU callback before it is freed. */ + rcu_barrier_bh(); return; } @@ -1276,7 +1278,7 @@ void mini_qdisc_pair_swap(struct mini_Qdisc_pair *miniqp, rcu_assign_pointer(*miniqp->p_miniq, miniq); if (miniq_old) - /* This is counterpart of the rcu barrier above. We need to + /* This is counterpart of the rcu barriers above. We need to * block potential new user of miniq_old until all readers * are not seeing it. */ diff --git a/net/sctp/socket.c b/net/sctp/socket.c index aadcd4244d9b..a5e2150ab013 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -4569,7 +4569,7 @@ static int sctp_init_sock(struct sock *sk) SCTP_DBG_OBJCNT_INC(sock); local_bh_disable(); - percpu_counter_inc(&sctp_sockets_allocated); + sk_sockets_allocated_inc(sk); sock_prot_inuse_add(net, sk->sk_prot, 1); /* Nothing can fail after this block, otherwise @@ -4613,7 +4613,7 @@ static void sctp_destroy_sock(struct sock *sk) } sctp_endpoint_free(sp->ep); local_bh_disable(); - percpu_counter_dec(&sctp_sockets_allocated); + sk_sockets_allocated_dec(sk); sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); local_bh_enable(); } diff --git a/net/strparser/strparser.c b/net/strparser/strparser.c index c5fda15ba319..1fdab5c4eda8 100644 --- a/net/strparser/strparser.c +++ b/net/strparser/strparser.c @@ -401,7 +401,7 @@ void strp_data_ready(struct strparser *strp) * allows a thread in BH context to safely check if the process * lock is held. In this case, if the lock is held, queue work. */ - if (sock_owned_by_user(strp->sk)) { + if (sock_owned_by_user_nocheck(strp->sk)) { queue_work(strp_wq, &strp->work); return; } diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c index 47ec121574ce..c8001471da6c 100644 --- a/net/tipc/bearer.c +++ b/net/tipc/bearer.c @@ -324,6 +324,7 @@ restart: if (res) { pr_warn("Bearer <%s> rejected, enable failure (%d)\n", name, -res); + kfree(b); return -EINVAL; } @@ -347,8 +348,10 @@ restart: if (skb) tipc_bearer_xmit_skb(net, bearer_id, skb, &b->bcast_addr); - if (tipc_mon_create(net, bearer_id)) + if (tipc_mon_create(net, bearer_id)) { + bearer_disable(net, b); return -ENOMEM; + } pr_info("Enabled bearer <%s>, discovery domain %s, priority %u\n", name, diff --git a/net/tipc/group.c b/net/tipc/group.c index 7ebbdeb2a90e..8e12ab55346b 100644 --- a/net/tipc/group.c +++ b/net/tipc/group.c @@ -368,18 +368,20 @@ void tipc_group_update_bc_members(struct tipc_group *grp, int len, bool ack) u16 prev = grp->bc_snd_nxt - 1; struct tipc_member *m; struct rb_node *n; + u16 ackers = 0; for (n = rb_first(&grp->members); n; n = rb_next(n)) { m = container_of(n, struct tipc_member, tree_node); if (tipc_group_is_enabled(m)) { tipc_group_update_member(m, len); m->bc_acked = prev; + ackers++; } } /* Mark number of acknowledges to expect, if any */ if (ack) - grp->bc_ackers = grp->member_cnt; + grp->bc_ackers = ackers; grp->bc_snd_nxt++; } @@ -848,17 +850,26 @@ void tipc_group_member_evt(struct tipc_group *grp, *usr_wakeup = true; m->usr_pending = false; node_up = tipc_node_is_up(net, node); - - /* Hold back event if more messages might be expected */ - if (m->state != MBR_LEAVING && node_up) { - m->event_msg = skb; - tipc_group_decr_active(grp, m); - m->state = MBR_LEAVING; - } else { - if (node_up) + m->event_msg = NULL; + + if (node_up) { + /* Hold back event if a LEAVE msg should be expected */ + if (m->state != MBR_LEAVING) { + m->event_msg = skb; + tipc_group_decr_active(grp, m); + m->state = MBR_LEAVING; + } else { msg_set_grp_bc_seqno(hdr, m->bc_syncpt); - else + __skb_queue_tail(inputq, skb); + } + } else { + if (m->state != MBR_LEAVING) { + tipc_group_decr_active(grp, m); + m->state = MBR_LEAVING; msg_set_grp_bc_seqno(hdr, m->bc_rcv_nxt); + } else { + msg_set_grp_bc_seqno(hdr, m->bc_syncpt); + } __skb_queue_tail(inputq, skb); } list_del_init(&m->list); diff --git a/net/tipc/monitor.c b/net/tipc/monitor.c index 8e884ed06d4b..32dc33a94bc7 100644 --- a/net/tipc/monitor.c +++ b/net/tipc/monitor.c @@ -642,9 +642,13 @@ void tipc_mon_delete(struct net *net, int bearer_id) { struct tipc_net *tn = tipc_net(net); struct tipc_monitor *mon = tipc_monitor(net, bearer_id); - struct tipc_peer *self = get_self(net, bearer_id); + struct tipc_peer *self; struct tipc_peer *peer, *tmp; + if (!mon) + return; + + self = get_self(net, bearer_id); write_lock_bh(&mon->lock); tn->monitors[bearer_id] = NULL; list_for_each_entry_safe(peer, tmp, &self->list, list) { diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 0cdf5c2ad881..b51d5cba5094 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -727,11 +727,11 @@ static unsigned int tipc_poll(struct file *file, struct socket *sock, switch (sk->sk_state) { case TIPC_ESTABLISHED: + case TIPC_CONNECTING: if (!tsk->cong_link_cnt && !tsk_conn_cong(tsk)) revents |= POLLOUT; /* fall thru' */ case TIPC_LISTEN: - case TIPC_CONNECTING: if (!skb_queue_empty(&sk->sk_receive_queue)) revents |= POLLIN | POLLRDNORM; break; diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c index ac277b97e0d7..26b10eb7a206 100644 --- a/net/xfrm/xfrm_input.c +++ b/net/xfrm/xfrm_input.c @@ -8,15 +8,29 @@ * */ +#include <linux/bottom_half.h> +#include <linux/interrupt.h> #include <linux/slab.h> #include <linux/module.h> #include <linux/netdevice.h> +#include <linux/percpu.h> #include <net/dst.h> #include <net/ip.h> #include <net/xfrm.h> #include <net/ip_tunnels.h> #include <net/ip6_tunnel.h> +struct xfrm_trans_tasklet { + struct tasklet_struct tasklet; + struct sk_buff_head queue; +}; + +struct xfrm_trans_cb { + int (*finish)(struct net *net, struct sock *sk, struct sk_buff *skb); +}; + +#define XFRM_TRANS_SKB_CB(__skb) ((struct xfrm_trans_cb *)&((__skb)->cb[0])) + static struct kmem_cache *secpath_cachep __read_mostly; static DEFINE_SPINLOCK(xfrm_input_afinfo_lock); @@ -25,6 +39,8 @@ static struct xfrm_input_afinfo const __rcu *xfrm_input_afinfo[AF_INET6 + 1]; static struct gro_cells gro_cells; static struct net_device xfrm_napi_dev; +static DEFINE_PER_CPU(struct xfrm_trans_tasklet, xfrm_trans_tasklet); + int xfrm_input_register_afinfo(const struct xfrm_input_afinfo *afinfo) { int err = 0; @@ -207,7 +223,7 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type) xfrm_address_t *daddr; struct xfrm_mode *inner_mode; u32 mark = skb->mark; - unsigned int family; + unsigned int family = AF_UNSPEC; int decaps = 0; int async = 0; bool xfrm_gro = false; @@ -216,6 +232,16 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type) if (encap_type < 0) { x = xfrm_input_state(skb); + + if (unlikely(x->km.state != XFRM_STATE_VALID)) { + if (x->km.state == XFRM_STATE_ACQ) + XFRM_INC_STATS(net, LINUX_MIB_XFRMACQUIREERROR); + else + XFRM_INC_STATS(net, + LINUX_MIB_XFRMINSTATEINVALID); + goto drop; + } + family = x->outer_mode->afinfo->family; /* An encap_type of -1 indicates async resumption. */ @@ -466,9 +492,41 @@ int xfrm_input_resume(struct sk_buff *skb, int nexthdr) } EXPORT_SYMBOL(xfrm_input_resume); +static void xfrm_trans_reinject(unsigned long data) +{ + struct xfrm_trans_tasklet *trans = (void *)data; + struct sk_buff_head queue; + struct sk_buff *skb; + + __skb_queue_head_init(&queue); + skb_queue_splice_init(&trans->queue, &queue); + + while ((skb = __skb_dequeue(&queue))) + XFRM_TRANS_SKB_CB(skb)->finish(dev_net(skb->dev), NULL, skb); +} + +int xfrm_trans_queue(struct sk_buff *skb, + int (*finish)(struct net *, struct sock *, + struct sk_buff *)) +{ + struct xfrm_trans_tasklet *trans; + + trans = this_cpu_ptr(&xfrm_trans_tasklet); + + if (skb_queue_len(&trans->queue) >= netdev_max_backlog) + return -ENOBUFS; + + XFRM_TRANS_SKB_CB(skb)->finish = finish; + skb_queue_tail(&trans->queue, skb); + tasklet_schedule(&trans->tasklet); + return 0; +} +EXPORT_SYMBOL(xfrm_trans_queue); + void __init xfrm_input_init(void) { int err; + int i; init_dummy_netdev(&xfrm_napi_dev); err = gro_cells_init(&gro_cells, &xfrm_napi_dev); @@ -479,4 +537,13 @@ void __init xfrm_input_init(void) sizeof(struct sec_path), 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL); + + for_each_possible_cpu(i) { + struct xfrm_trans_tasklet *trans; + + trans = &per_cpu(xfrm_trans_tasklet, i); + __skb_queue_head_init(&trans->queue); + tasklet_init(&trans->tasklet, xfrm_trans_reinject, + (unsigned long)trans); + } } diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index e3a5aca9cdda..d8a8129b9232 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -1168,9 +1168,15 @@ static struct xfrm_policy *xfrm_sk_policy_lookup(const struct sock *sk, int dir, again: pol = rcu_dereference(sk->sk_policy[dir]); if (pol != NULL) { - bool match = xfrm_selector_match(&pol->selector, fl, family); + bool match; int err = 0; + if (pol->family != family) { + pol = NULL; + goto out; + } + + match = xfrm_selector_match(&pol->selector, fl, family); if (match) { if ((sk->sk_mark & pol->mark.m) != pol->mark.v) { pol = NULL; @@ -1835,6 +1841,7 @@ xfrm_resolve_and_create_bundle(struct xfrm_policy **pols, int num_pols, sizeof(struct xfrm_policy *) * num_pols) == 0 && xfrm_xdst_can_reuse(xdst, xfrm, err)) { dst_hold(&xdst->u.dst); + xfrm_pols_put(pols, num_pols); while (err > 0) xfrm_state_put(xfrm[--err]); return xdst; diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 1b7856be3eeb..cc4c519cad76 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -1343,6 +1343,7 @@ static struct xfrm_state *xfrm_state_clone(struct xfrm_state *orig, if (orig->aead) { x->aead = xfrm_algo_aead_clone(orig->aead); + x->geniv = orig->geniv; if (!x->aead) goto error; } diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 983b0233767b..bdb48e5dba04 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -1419,11 +1419,14 @@ static void copy_templates(struct xfrm_policy *xp, struct xfrm_user_tmpl *ut, static int validate_tmpl(int nr, struct xfrm_user_tmpl *ut, u16 family) { + u16 prev_family; int i; if (nr > XFRM_MAX_DEPTH) return -EINVAL; + prev_family = family; + for (i = 0; i < nr; i++) { /* We never validated the ut->family value, so many * applications simply leave it at zero. The check was @@ -1435,6 +1438,12 @@ static int validate_tmpl(int nr, struct xfrm_user_tmpl *ut, u16 family) if (!ut[i].family) ut[i].family = family; + if ((ut[i].mode == XFRM_MODE_TRANSPORT) && + (ut[i].family != prev_family)) + return -EINVAL; + + prev_family = ut[i].family; + switch (ut[i].family) { case AF_INET: break; @@ -1445,6 +1454,21 @@ static int validate_tmpl(int nr, struct xfrm_user_tmpl *ut, u16 family) default: return -EINVAL; } + + switch (ut[i].id.proto) { + case IPPROTO_AH: + case IPPROTO_ESP: + case IPPROTO_COMP: +#if IS_ENABLED(CONFIG_IPV6) + case IPPROTO_ROUTING: + case IPPROTO_DSTOPTS: +#endif + case IPSEC_PROTO_ANY: + break; + default: + return -EINVAL; + } + } return 0; @@ -2470,7 +2494,7 @@ static const struct nla_policy xfrma_policy[XFRMA_MAX+1] = { [XFRMA_PROTO] = { .type = NLA_U8 }, [XFRMA_ADDRESS_FILTER] = { .len = sizeof(struct xfrm_address_filter) }, [XFRMA_OFFLOAD_DEV] = { .len = sizeof(struct xfrm_user_offload) }, - [XFRMA_OUTPUT_MARK] = { .len = NLA_U32 }, + [XFRMA_OUTPUT_MARK] = { .type = NLA_U32 }, }; static const struct nla_policy xfrma_spd_policy[XFRMA_SPD_MAX+1] = { diff --git a/sound/hda/hdac_i915.c b/sound/hda/hdac_i915.c index 038a180d3f81..cbe818eda336 100644 --- a/sound/hda/hdac_i915.c +++ b/sound/hda/hdac_i915.c @@ -325,7 +325,7 @@ static int hdac_component_master_match(struct device *dev, void *data) */ int snd_hdac_i915_register_notifier(const struct i915_audio_component_audio_ops *aops) { - if (WARN_ON(!hdac_acomp)) + if (!hdac_acomp) return -ENODEV; hdac_acomp->audio_ops = aops; diff --git a/sound/pci/hda/patch_conexant.c b/sound/pci/hda/patch_conexant.c index a81aacf684b2..37e1cf8218ff 100644 --- a/sound/pci/hda/patch_conexant.c +++ b/sound/pci/hda/patch_conexant.c @@ -271,6 +271,8 @@ enum { CXT_FIXUP_HP_SPECTRE, CXT_FIXUP_HP_GATE_MIC, CXT_FIXUP_MUTE_LED_GPIO, + CXT_FIXUP_HEADSET_MIC, + CXT_FIXUP_HP_MIC_NO_PRESENCE, }; /* for hda_fixup_thinkpad_acpi() */ @@ -350,6 +352,18 @@ static void cxt_fixup_headphone_mic(struct hda_codec *codec, } } +static void cxt_fixup_headset_mic(struct hda_codec *codec, + const struct hda_fixup *fix, int action) +{ + struct conexant_spec *spec = codec->spec; + + switch (action) { + case HDA_FIXUP_ACT_PRE_PROBE: + spec->parse_flags |= HDA_PINCFG_HEADSET_MIC; + break; + } +} + /* OPLC XO 1.5 fixup */ /* OLPC XO-1.5 supports DC input mode (e.g. for use with analog sensors) @@ -880,6 +894,19 @@ static const struct hda_fixup cxt_fixups[] = { .type = HDA_FIXUP_FUNC, .v.func = cxt_fixup_mute_led_gpio, }, + [CXT_FIXUP_HEADSET_MIC] = { + .type = HDA_FIXUP_FUNC, + .v.func = cxt_fixup_headset_mic, + }, + [CXT_FIXUP_HP_MIC_NO_PRESENCE] = { + .type = HDA_FIXUP_PINS, + .v.pins = (const struct hda_pintbl[]) { + { 0x1a, 0x02a1113c }, + { } + }, + .chained = true, + .chain_id = CXT_FIXUP_HEADSET_MIC, + }, }; static const struct snd_pci_quirk cxt5045_fixups[] = { @@ -934,6 +961,8 @@ static const struct snd_pci_quirk cxt5066_fixups[] = { SND_PCI_QUIRK(0x103c, 0x8115, "HP Z1 Gen3", CXT_FIXUP_HP_GATE_MIC), SND_PCI_QUIRK(0x103c, 0x814f, "HP ZBook 15u G3", CXT_FIXUP_MUTE_LED_GPIO), SND_PCI_QUIRK(0x103c, 0x822e, "HP ProBook 440 G4", CXT_FIXUP_MUTE_LED_GPIO), + SND_PCI_QUIRK(0x103c, 0x8299, "HP 800 G3 SFF", CXT_FIXUP_HP_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x103c, 0x829a, "HP 800 G3 DM", CXT_FIXUP_HP_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1043, 0x138d, "Asus", CXT_FIXUP_HEADPHONE_MIC_PIN), SND_PCI_QUIRK(0x152d, 0x0833, "OLPC XO-1.5", CXT_FIXUP_OLPC_XO), SND_PCI_QUIRK(0x17aa, 0x20f2, "Lenovo T400", CXT_PINCFG_LENOVO_TP410), diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 6a4db00511ab..8fd2d9c62c96 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -324,8 +324,12 @@ static void alc_fill_eapd_coef(struct hda_codec *codec) case 0x10ec0292: alc_update_coef_idx(codec, 0x4, 1<<15, 0); break; - case 0x10ec0215: case 0x10ec0225: + case 0x10ec0295: + case 0x10ec0299: + alc_update_coef_idx(codec, 0x67, 0xf000, 0x3000); + /* fallthrough */ + case 0x10ec0215: case 0x10ec0233: case 0x10ec0236: case 0x10ec0255: @@ -336,10 +340,8 @@ static void alc_fill_eapd_coef(struct hda_codec *codec) case 0x10ec0286: case 0x10ec0288: case 0x10ec0285: - case 0x10ec0295: case 0x10ec0298: case 0x10ec0289: - case 0x10ec0299: alc_update_coef_idx(codec, 0x10, 1<<9, 0); break; case 0x10ec0275: @@ -6328,6 +6330,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x17aa, 0x30bb, "ThinkCentre AIO", ALC233_FIXUP_LENOVO_LINE2_MIC_HOTKEY), SND_PCI_QUIRK(0x17aa, 0x30e2, "ThinkCentre AIO", ALC233_FIXUP_LENOVO_LINE2_MIC_HOTKEY), SND_PCI_QUIRK(0x17aa, 0x310c, "ThinkCentre Station", ALC294_FIXUP_LENOVO_MIC_LOCATION), + SND_PCI_QUIRK(0x17aa, 0x313c, "ThinkCentre Station", ALC294_FIXUP_LENOVO_MIC_LOCATION), SND_PCI_QUIRK(0x17aa, 0x3112, "ThinkCentre AIO", ALC233_FIXUP_LENOVO_LINE2_MIC_HOTKEY), SND_PCI_QUIRK(0x17aa, 0x3902, "Lenovo E50-80", ALC269_FIXUP_DMIC_THINKPAD_ACPI), SND_PCI_QUIRK(0x17aa, 0x3977, "IdeaPad S210", ALC283_FIXUP_INT_MIC), @@ -6586,6 +6589,11 @@ static const struct snd_hda_pin_quirk alc269_pin_fixup_tbl[] = { {0x1b, 0x01011020}, {0x21, 0x02211010}), SND_HDA_PIN_QUIRK(0x10ec0256, 0x1028, "Dell", ALC255_FIXUP_DELL1_MIC_NO_PRESENCE, + {0x12, 0x90a60130}, + {0x14, 0x90170110}, + {0x1b, 0x01011020}, + {0x21, 0x0221101f}), + SND_HDA_PIN_QUIRK(0x10ec0256, 0x1028, "Dell", ALC255_FIXUP_DELL1_MIC_NO_PRESENCE, {0x12, 0x90a60160}, {0x14, 0x90170120}, {0x21, 0x02211030}), diff --git a/sound/soc/amd/acp-pcm-dma.c b/sound/soc/amd/acp-pcm-dma.c index 9f521a55d610..b5e41df6bb3a 100644 --- a/sound/soc/amd/acp-pcm-dma.c +++ b/sound/soc/amd/acp-pcm-dma.c @@ -1051,6 +1051,11 @@ static int acp_audio_probe(struct platform_device *pdev) struct resource *res; const u32 *pdata = pdev->dev.platform_data; + if (!pdata) { + dev_err(&pdev->dev, "Missing platform data\n"); + return -ENODEV; + } + audio_drv_data = devm_kzalloc(&pdev->dev, sizeof(struct audio_drv_data), GFP_KERNEL); if (audio_drv_data == NULL) @@ -1058,6 +1063,8 @@ static int acp_audio_probe(struct platform_device *pdev) res = platform_get_resource(pdev, IORESOURCE_MEM, 0); audio_drv_data->acp_mmio = devm_ioremap_resource(&pdev->dev, res); + if (IS_ERR(audio_drv_data->acp_mmio)) + return PTR_ERR(audio_drv_data->acp_mmio); /* The following members gets populated in device 'open' * function. Till then interrupts are disabled in 'acp_init' diff --git a/sound/soc/atmel/Kconfig b/sound/soc/atmel/Kconfig index 4a56f3dfba51..dcee145dd179 100644 --- a/sound/soc/atmel/Kconfig +++ b/sound/soc/atmel/Kconfig @@ -64,7 +64,7 @@ config SND_AT91_SOC_SAM9X5_WM8731 config SND_ATMEL_SOC_CLASSD tristate "Atmel ASoC driver for boards using CLASSD" depends on ARCH_AT91 || COMPILE_TEST - select SND_ATMEL_SOC_DMA + select SND_SOC_GENERIC_DMAENGINE_PCM select REGMAP_MMIO help Say Y if you want to add support for Atmel ASoC driver for boards using diff --git a/sound/soc/codecs/da7218.c b/sound/soc/codecs/da7218.c index b2d42ec1dcd9..56564ce90cb6 100644 --- a/sound/soc/codecs/da7218.c +++ b/sound/soc/codecs/da7218.c @@ -2520,7 +2520,7 @@ static struct da7218_pdata *da7218_of_to_pdata(struct snd_soc_codec *codec) } if (da7218->dev_id == DA7218_DEV_ID) { - hpldet_np = of_find_node_by_name(np, "da7218_hpldet"); + hpldet_np = of_get_child_by_name(np, "da7218_hpldet"); if (!hpldet_np) return pdata; diff --git a/sound/soc/codecs/msm8916-wcd-analog.c b/sound/soc/codecs/msm8916-wcd-analog.c index 5f3c42c4f74a..066ea2f4ce7b 100644 --- a/sound/soc/codecs/msm8916-wcd-analog.c +++ b/sound/soc/codecs/msm8916-wcd-analog.c @@ -267,7 +267,7 @@ #define MSM8916_WCD_ANALOG_RATES (SNDRV_PCM_RATE_8000 | SNDRV_PCM_RATE_16000 |\ SNDRV_PCM_RATE_32000 | SNDRV_PCM_RATE_48000) #define MSM8916_WCD_ANALOG_FORMATS (SNDRV_PCM_FMTBIT_S16_LE |\ - SNDRV_PCM_FMTBIT_S24_LE) + SNDRV_PCM_FMTBIT_S32_LE) static int btn_mask = SND_JACK_BTN_0 | SND_JACK_BTN_1 | SND_JACK_BTN_2 | SND_JACK_BTN_3 | SND_JACK_BTN_4; diff --git a/sound/soc/codecs/msm8916-wcd-digital.c b/sound/soc/codecs/msm8916-wcd-digital.c index a10a724eb448..13354d6304a8 100644 --- a/sound/soc/codecs/msm8916-wcd-digital.c +++ b/sound/soc/codecs/msm8916-wcd-digital.c @@ -194,7 +194,7 @@ SNDRV_PCM_RATE_32000 | \ SNDRV_PCM_RATE_48000) #define MSM8916_WCD_DIGITAL_FORMATS (SNDRV_PCM_FMTBIT_S16_LE |\ - SNDRV_PCM_FMTBIT_S24_LE) + SNDRV_PCM_FMTBIT_S32_LE) struct msm8916_wcd_digital_priv { struct clk *ahbclk, *mclk; @@ -645,7 +645,7 @@ static int msm8916_wcd_digital_hw_params(struct snd_pcm_substream *substream, RX_I2S_CTL_RX_I2S_MODE_MASK, RX_I2S_CTL_RX_I2S_MODE_16); break; - case SNDRV_PCM_FORMAT_S24_LE: + case SNDRV_PCM_FORMAT_S32_LE: snd_soc_update_bits(dai->codec, LPASS_CDC_CLK_TX_I2S_CTL, TX_I2S_CTL_TX_I2S_MODE_MASK, TX_I2S_CTL_TX_I2S_MODE_32); diff --git a/sound/soc/codecs/nau8825.c b/sound/soc/codecs/nau8825.c index 714ce17da717..e853a6dfd33b 100644 --- a/sound/soc/codecs/nau8825.c +++ b/sound/soc/codecs/nau8825.c @@ -905,6 +905,7 @@ static int nau8825_adc_event(struct snd_soc_dapm_widget *w, switch (event) { case SND_SOC_DAPM_POST_PMU: + msleep(125); regmap_update_bits(nau8825->regmap, NAU8825_REG_ENA_CTRL, NAU8825_ENABLE_ADC, NAU8825_ENABLE_ADC); break; diff --git a/sound/soc/codecs/rt5514-spi.c b/sound/soc/codecs/rt5514-spi.c index 2df91db765ac..64bf26cec20d 100644 --- a/sound/soc/codecs/rt5514-spi.c +++ b/sound/soc/codecs/rt5514-spi.c @@ -289,6 +289,8 @@ static int rt5514_spi_pcm_probe(struct snd_soc_platform *platform) dev_err(&rt5514_spi->dev, "%s Failed to reguest IRQ: %d\n", __func__, ret); + else + device_init_wakeup(rt5514_dsp->dev, true); } return 0; @@ -456,8 +458,6 @@ static int rt5514_spi_probe(struct spi_device *spi) return ret; } - device_init_wakeup(&spi->dev, true); - return 0; } @@ -482,10 +482,13 @@ static int __maybe_unused rt5514_resume(struct device *dev) if (device_may_wakeup(dev)) disable_irq_wake(irq); - if (rt5514_dsp->substream) { - rt5514_spi_burst_read(RT5514_IRQ_CTRL, (u8 *)&buf, sizeof(buf)); - if (buf[0] & RT5514_IRQ_STATUS_BIT) - rt5514_schedule_copy(rt5514_dsp); + if (rt5514_dsp) { + if (rt5514_dsp->substream) { + rt5514_spi_burst_read(RT5514_IRQ_CTRL, (u8 *)&buf, + sizeof(buf)); + if (buf[0] & RT5514_IRQ_STATUS_BIT) + rt5514_schedule_copy(rt5514_dsp); + } } return 0; diff --git a/sound/soc/codecs/rt5514.c b/sound/soc/codecs/rt5514.c index 2a5b5d74e697..2dd6e9f990a4 100644 --- a/sound/soc/codecs/rt5514.c +++ b/sound/soc/codecs/rt5514.c @@ -496,7 +496,7 @@ static const struct snd_soc_dapm_widget rt5514_dapm_widgets[] = { SND_SOC_DAPM_PGA("DMIC1", SND_SOC_NOPM, 0, 0, NULL, 0), SND_SOC_DAPM_PGA("DMIC2", SND_SOC_NOPM, 0, 0, NULL, 0), - SND_SOC_DAPM_SUPPLY("DMIC CLK", SND_SOC_NOPM, 0, 0, + SND_SOC_DAPM_SUPPLY_S("DMIC CLK", 1, SND_SOC_NOPM, 0, 0, rt5514_set_dmic_clk, SND_SOC_DAPM_PRE_PMU), SND_SOC_DAPM_SUPPLY("ADC CLK", RT5514_CLK_CTRL1, diff --git a/sound/soc/codecs/rt5645.c b/sound/soc/codecs/rt5645.c index f020d2d1eef4..edc152c8a1fe 100644 --- a/sound/soc/codecs/rt5645.c +++ b/sound/soc/codecs/rt5645.c @@ -3823,6 +3823,8 @@ static int rt5645_i2c_probe(struct i2c_client *i2c, regmap_read(regmap, RT5645_VENDOR_ID, &val); rt5645->v_id = val & 0xff; + regmap_write(rt5645->regmap, RT5645_AD_DA_MIXER, 0x8080); + ret = regmap_register_patch(rt5645->regmap, init_list, ARRAY_SIZE(init_list)); if (ret != 0) diff --git a/sound/soc/codecs/rt5663.c b/sound/soc/codecs/rt5663.c index b036c9dc0c8c..d329bf719d80 100644 --- a/sound/soc/codecs/rt5663.c +++ b/sound/soc/codecs/rt5663.c @@ -1560,6 +1560,10 @@ static int rt5663_jack_detect(struct snd_soc_codec *codec, int jack_insert) RT5663_IRQ_POW_SAV_MASK, RT5663_IRQ_POW_SAV_EN); snd_soc_update_bits(codec, RT5663_IRQ_1, RT5663_EN_IRQ_JD1_MASK, RT5663_EN_IRQ_JD1_EN); + snd_soc_update_bits(codec, RT5663_EM_JACK_TYPE_1, + RT5663_EM_JD_MASK, RT5663_EM_JD_RST); + snd_soc_update_bits(codec, RT5663_EM_JACK_TYPE_1, + RT5663_EM_JD_MASK, RT5663_EM_JD_NOR); while (true) { regmap_read(rt5663->regmap, RT5663_INT_ST_2, &val); diff --git a/sound/soc/codecs/rt5663.h b/sound/soc/codecs/rt5663.h index c5a9b69579ad..03adc8004ba9 100644 --- a/sound/soc/codecs/rt5663.h +++ b/sound/soc/codecs/rt5663.h @@ -1029,6 +1029,10 @@ #define RT5663_POL_EXT_JD_SHIFT 10 #define RT5663_POL_EXT_JD_EN (0x1 << 10) #define RT5663_POL_EXT_JD_DIS (0x0 << 10) +#define RT5663_EM_JD_MASK (0x1 << 7) +#define RT5663_EM_JD_SHIFT 7 +#define RT5663_EM_JD_NOR (0x1 << 7) +#define RT5663_EM_JD_RST (0x0 << 7) /* DACREF LDO Control (0x0112)*/ #define RT5663_PWR_LDO_DACREFL_MASK (0x1 << 9) diff --git a/sound/soc/codecs/tlv320aic31xx.h b/sound/soc/codecs/tlv320aic31xx.h index 730fb2058869..1ff3edb7bbb6 100644 --- a/sound/soc/codecs/tlv320aic31xx.h +++ b/sound/soc/codecs/tlv320aic31xx.h @@ -116,7 +116,7 @@ struct aic31xx_pdata { /* INT2 interrupt control */ #define AIC31XX_INT2CTRL AIC31XX_REG(0, 49) /* GPIO1 control */ -#define AIC31XX_GPIO1 AIC31XX_REG(0, 50) +#define AIC31XX_GPIO1 AIC31XX_REG(0, 51) #define AIC31XX_DACPRB AIC31XX_REG(0, 60) /* ADC Instruction Set Register */ diff --git a/sound/soc/codecs/twl4030.c b/sound/soc/codecs/twl4030.c index c482b2e7a7d2..cfe72b9d4356 100644 --- a/sound/soc/codecs/twl4030.c +++ b/sound/soc/codecs/twl4030.c @@ -232,7 +232,7 @@ static struct twl4030_codec_data *twl4030_get_pdata(struct snd_soc_codec *codec) struct twl4030_codec_data *pdata = dev_get_platdata(codec->dev); struct device_node *twl4030_codec_node = NULL; - twl4030_codec_node = of_find_node_by_name(codec->dev->parent->of_node, + twl4030_codec_node = of_get_child_by_name(codec->dev->parent->of_node, "codec"); if (!pdata && twl4030_codec_node) { @@ -241,9 +241,11 @@ static struct twl4030_codec_data *twl4030_get_pdata(struct snd_soc_codec *codec) GFP_KERNEL); if (!pdata) { dev_err(codec->dev, "Can not allocate memory\n"); + of_node_put(twl4030_codec_node); return NULL; } twl4030_setup_pdata_of(pdata, twl4030_codec_node); + of_node_put(twl4030_codec_node); } return pdata; diff --git a/sound/soc/codecs/wm_adsp.c b/sound/soc/codecs/wm_adsp.c index 65c059b5ffd7..66e32f5d2917 100644 --- a/sound/soc/codecs/wm_adsp.c +++ b/sound/soc/codecs/wm_adsp.c @@ -1733,7 +1733,7 @@ static int wm_adsp_load(struct wm_adsp *dsp) le64_to_cpu(footer->timestamp)); while (pos < firmware->size && - pos - firmware->size > sizeof(*region)) { + sizeof(*region) < firmware->size - pos) { region = (void *)&(firmware->data[pos]); region_name = "Unknown"; reg = 0; @@ -1782,8 +1782,8 @@ static int wm_adsp_load(struct wm_adsp *dsp) regions, le32_to_cpu(region->len), offset, region_name); - if ((pos + le32_to_cpu(region->len) + sizeof(*region)) > - firmware->size) { + if (le32_to_cpu(region->len) > + firmware->size - pos - sizeof(*region)) { adsp_err(dsp, "%s.%d: %s region len %d bytes exceeds file length %zu\n", file, regions, region_name, @@ -2253,7 +2253,7 @@ static int wm_adsp_load_coeff(struct wm_adsp *dsp) blocks = 0; while (pos < firmware->size && - pos - firmware->size > sizeof(*blk)) { + sizeof(*blk) < firmware->size - pos) { blk = (void *)(&firmware->data[pos]); type = le16_to_cpu(blk->type); @@ -2327,8 +2327,8 @@ static int wm_adsp_load_coeff(struct wm_adsp *dsp) } if (reg) { - if ((pos + le32_to_cpu(blk->len) + sizeof(*blk)) > - firmware->size) { + if (le32_to_cpu(blk->len) > + firmware->size - pos - sizeof(*blk)) { adsp_err(dsp, "%s.%d: %s region len %d bytes exceeds file length %zu\n", file, blocks, region_name, diff --git a/sound/soc/fsl/fsl_asrc.h b/sound/soc/fsl/fsl_asrc.h index 0f163abe4ba3..52c27a358933 100644 --- a/sound/soc/fsl/fsl_asrc.h +++ b/sound/soc/fsl/fsl_asrc.h @@ -260,8 +260,8 @@ #define ASRFSTi_OUTPUT_FIFO_SHIFT 12 #define ASRFSTi_OUTPUT_FIFO_MASK (((1 << ASRFSTi_OUTPUT_FIFO_WIDTH) - 1) << ASRFSTi_OUTPUT_FIFO_SHIFT) #define ASRFSTi_IAEi_SHIFT 11 -#define ASRFSTi_IAEi_MASK (1 << ASRFSTi_OAFi_SHIFT) -#define ASRFSTi_IAEi (1 << ASRFSTi_OAFi_SHIFT) +#define ASRFSTi_IAEi_MASK (1 << ASRFSTi_IAEi_SHIFT) +#define ASRFSTi_IAEi (1 << ASRFSTi_IAEi_SHIFT) #define ASRFSTi_INPUT_FIFO_WIDTH 7 #define ASRFSTi_INPUT_FIFO_SHIFT 0 #define ASRFSTi_INPUT_FIFO_MASK ((1 << ASRFSTi_INPUT_FIFO_WIDTH) - 1) diff --git a/sound/soc/fsl/fsl_ssi.c b/sound/soc/fsl/fsl_ssi.c index f2f51e06e22c..424bafaf51ef 100644 --- a/sound/soc/fsl/fsl_ssi.c +++ b/sound/soc/fsl/fsl_ssi.c @@ -38,6 +38,7 @@ #include <linux/ctype.h> #include <linux/device.h> #include <linux/delay.h> +#include <linux/mutex.h> #include <linux/slab.h> #include <linux/spinlock.h> #include <linux/of.h> @@ -265,6 +266,8 @@ struct fsl_ssi_private { u32 fifo_watermark; u32 dma_maxburst; + + struct mutex ac97_reg_lock; }; /* @@ -1260,11 +1263,13 @@ static void fsl_ssi_ac97_write(struct snd_ac97 *ac97, unsigned short reg, if (reg > 0x7f) return; + mutex_lock(&fsl_ac97_data->ac97_reg_lock); + ret = clk_prepare_enable(fsl_ac97_data->clk); if (ret) { pr_err("ac97 write clk_prepare_enable failed: %d\n", ret); - return; + goto ret_unlock; } lreg = reg << 12; @@ -1278,6 +1283,9 @@ static void fsl_ssi_ac97_write(struct snd_ac97 *ac97, unsigned short reg, udelay(100); clk_disable_unprepare(fsl_ac97_data->clk); + +ret_unlock: + mutex_unlock(&fsl_ac97_data->ac97_reg_lock); } static unsigned short fsl_ssi_ac97_read(struct snd_ac97 *ac97, @@ -1285,16 +1293,18 @@ static unsigned short fsl_ssi_ac97_read(struct snd_ac97 *ac97, { struct regmap *regs = fsl_ac97_data->regs; - unsigned short val = -1; + unsigned short val = 0; u32 reg_val; unsigned int lreg; int ret; + mutex_lock(&fsl_ac97_data->ac97_reg_lock); + ret = clk_prepare_enable(fsl_ac97_data->clk); if (ret) { pr_err("ac97 read clk_prepare_enable failed: %d\n", ret); - return -1; + goto ret_unlock; } lreg = (reg & 0x7f) << 12; @@ -1309,6 +1319,8 @@ static unsigned short fsl_ssi_ac97_read(struct snd_ac97 *ac97, clk_disable_unprepare(fsl_ac97_data->clk); +ret_unlock: + mutex_unlock(&fsl_ac97_data->ac97_reg_lock); return val; } @@ -1458,12 +1470,6 @@ static int fsl_ssi_probe(struct platform_device *pdev) sizeof(fsl_ssi_ac97_dai)); fsl_ac97_data = ssi_private; - - ret = snd_soc_set_ac97_ops_of_reset(&fsl_ssi_ac97_ops, pdev); - if (ret) { - dev_err(&pdev->dev, "could not set AC'97 ops\n"); - return ret; - } } else { /* Initialize this copy of the CPU DAI driver structure */ memcpy(&ssi_private->cpu_dai_drv, &fsl_ssi_dai_template, @@ -1574,6 +1580,15 @@ static int fsl_ssi_probe(struct platform_device *pdev) return ret; } + if (fsl_ssi_is_ac97(ssi_private)) { + mutex_init(&ssi_private->ac97_reg_lock); + ret = snd_soc_set_ac97_ops_of_reset(&fsl_ssi_ac97_ops, pdev); + if (ret) { + dev_err(&pdev->dev, "could not set AC'97 ops\n"); + goto error_ac97_ops; + } + } + ret = devm_snd_soc_register_component(&pdev->dev, &fsl_ssi_component, &ssi_private->cpu_dai_drv, 1); if (ret) { @@ -1657,6 +1672,13 @@ error_sound_card: fsl_ssi_debugfs_remove(&ssi_private->dbg_stats); error_asoc_register: + if (fsl_ssi_is_ac97(ssi_private)) + snd_soc_set_ac97_ops(NULL); + +error_ac97_ops: + if (fsl_ssi_is_ac97(ssi_private)) + mutex_destroy(&ssi_private->ac97_reg_lock); + if (ssi_private->soc->imx) fsl_ssi_imx_clean(pdev, ssi_private); @@ -1675,8 +1697,10 @@ static int fsl_ssi_remove(struct platform_device *pdev) if (ssi_private->soc->imx) fsl_ssi_imx_clean(pdev, ssi_private); - if (fsl_ssi_is_ac97(ssi_private)) + if (fsl_ssi_is_ac97(ssi_private)) { snd_soc_set_ac97_ops(NULL); + mutex_destroy(&ssi_private->ac97_reg_lock); + } return 0; } diff --git a/sound/soc/intel/boards/kbl_rt5663_max98927.c b/sound/soc/intel/boards/kbl_rt5663_max98927.c index 6f9a8bcf20f3..6dcad0a8a0d0 100644 --- a/sound/soc/intel/boards/kbl_rt5663_max98927.c +++ b/sound/soc/intel/boards/kbl_rt5663_max98927.c @@ -101,7 +101,7 @@ static const struct snd_soc_dapm_route kabylake_map[] = { { "ssp0 Tx", NULL, "spk_out" }, { "AIF Playback", NULL, "ssp1 Tx" }, - { "ssp1 Tx", NULL, "hs_out" }, + { "ssp1 Tx", NULL, "codec1_out" }, { "hs_in", NULL, "ssp1 Rx" }, { "ssp1 Rx", NULL, "AIF Capture" }, diff --git a/sound/soc/intel/boards/kbl_rt5663_rt5514_max98927.c b/sound/soc/intel/boards/kbl_rt5663_rt5514_max98927.c index 6072164f2d43..271ae3c2c535 100644 --- a/sound/soc/intel/boards/kbl_rt5663_rt5514_max98927.c +++ b/sound/soc/intel/boards/kbl_rt5663_rt5514_max98927.c @@ -109,7 +109,7 @@ static const struct snd_soc_dapm_route kabylake_map[] = { { "ssp0 Tx", NULL, "spk_out" }, { "AIF Playback", NULL, "ssp1 Tx" }, - { "ssp1 Tx", NULL, "hs_out" }, + { "ssp1 Tx", NULL, "codec1_out" }, { "hs_in", NULL, "ssp1 Rx" }, { "ssp1 Rx", NULL, "AIF Capture" }, diff --git a/sound/soc/intel/skylake/skl-nhlt.c b/sound/soc/intel/skylake/skl-nhlt.c index d14c50a60289..3eaac41090ca 100644 --- a/sound/soc/intel/skylake/skl-nhlt.c +++ b/sound/soc/intel/skylake/skl-nhlt.c @@ -119,11 +119,16 @@ static bool skl_check_ep_match(struct device *dev, struct nhlt_endpoint *epnt, if ((epnt->virtual_bus_id == instance_id) && (epnt->linktype == link_type) && - (epnt->direction == dirn) && - (epnt->device_type == dev_type)) - return true; - else - return false; + (epnt->direction == dirn)) { + /* do not check dev_type for DMIC link type */ + if (epnt->linktype == NHLT_LINK_DMIC) + return true; + + if (epnt->device_type == dev_type) + return true; + } + + return false; } struct nhlt_specific_cfg diff --git a/sound/soc/intel/skylake/skl-topology.c b/sound/soc/intel/skylake/skl-topology.c index a072bcf209d2..81923da18ac2 100644 --- a/sound/soc/intel/skylake/skl-topology.c +++ b/sound/soc/intel/skylake/skl-topology.c @@ -2908,7 +2908,7 @@ static int skl_tplg_control_load(struct snd_soc_component *cmpnt, break; default: - dev_warn(bus->dev, "Control load not supported %d:%d:%d\n", + dev_dbg(bus->dev, "Control load not supported %d:%d:%d\n", hdr->ops.get, hdr->ops.put, hdr->ops.info); break; } diff --git a/sound/soc/rockchip/rockchip_spdif.c b/sound/soc/rockchip/rockchip_spdif.c index ee5055d47d13..a89fe9b6463b 100644 --- a/sound/soc/rockchip/rockchip_spdif.c +++ b/sound/soc/rockchip/rockchip_spdif.c @@ -322,26 +322,30 @@ static int rk_spdif_probe(struct platform_device *pdev) spdif->mclk = devm_clk_get(&pdev->dev, "mclk"); if (IS_ERR(spdif->mclk)) { dev_err(&pdev->dev, "Can't retrieve rk_spdif master clock\n"); - return PTR_ERR(spdif->mclk); + ret = PTR_ERR(spdif->mclk); + goto err_disable_hclk; } ret = clk_prepare_enable(spdif->mclk); if (ret) { dev_err(spdif->dev, "clock enable failed %d\n", ret); - return ret; + goto err_disable_clocks; } res = platform_get_resource(pdev, IORESOURCE_MEM, 0); regs = devm_ioremap_resource(&pdev->dev, res); - if (IS_ERR(regs)) - return PTR_ERR(regs); + if (IS_ERR(regs)) { + ret = PTR_ERR(regs); + goto err_disable_clocks; + } spdif->regmap = devm_regmap_init_mmio_clk(&pdev->dev, "hclk", regs, &rk_spdif_regmap_config); if (IS_ERR(spdif->regmap)) { dev_err(&pdev->dev, "Failed to initialise managed register map\n"); - return PTR_ERR(spdif->regmap); + ret = PTR_ERR(spdif->regmap); + goto err_disable_clocks; } spdif->playback_dma_data.addr = res->start + SPDIF_SMPDR; @@ -373,6 +377,10 @@ static int rk_spdif_probe(struct platform_device *pdev) err_pm_runtime: pm_runtime_disable(&pdev->dev); +err_disable_clocks: + clk_disable_unprepare(spdif->mclk); +err_disable_hclk: + clk_disable_unprepare(spdif->hclk); return ret; } diff --git a/sound/soc/sh/rcar/adg.c b/sound/soc/sh/rcar/adg.c index 8ddb08714faa..4672688cac32 100644 --- a/sound/soc/sh/rcar/adg.c +++ b/sound/soc/sh/rcar/adg.c @@ -222,7 +222,7 @@ int rsnd_adg_set_cmd_timsel_gen2(struct rsnd_mod *cmd_mod, NULL, &val, NULL); val = val << shift; - mask = 0xffff << shift; + mask = 0x0f1f << shift; rsnd_mod_bset(adg_mod, CMDOUT_TIMSEL, mask, val); @@ -250,7 +250,7 @@ int rsnd_adg_set_src_timesel_gen2(struct rsnd_mod *src_mod, in = in << shift; out = out << shift; - mask = 0xffff << shift; + mask = 0x0f1f << shift; switch (id / 2) { case 0: @@ -380,7 +380,7 @@ int rsnd_adg_ssi_clk_try_start(struct rsnd_mod *ssi_mod, unsigned int rate) ckr = 0x80000000; } - rsnd_mod_bset(adg_mod, BRGCKR, 0x80FF0000, adg->ckr | ckr); + rsnd_mod_bset(adg_mod, BRGCKR, 0x80770000, adg->ckr | ckr); rsnd_mod_write(adg_mod, BRRA, adg->rbga); rsnd_mod_write(adg_mod, BRRB, adg->rbgb); diff --git a/sound/soc/sh/rcar/core.c b/sound/soc/sh/rcar/core.c index c70eb2097816..f12a88a21dfa 100644 --- a/sound/soc/sh/rcar/core.c +++ b/sound/soc/sh/rcar/core.c @@ -1332,8 +1332,8 @@ static int rsnd_pcm_new(struct snd_soc_pcm_runtime *rtd) return snd_pcm_lib_preallocate_pages_for_all( rtd->pcm, - SNDRV_DMA_TYPE_CONTINUOUS, - snd_dma_continuous_data(GFP_KERNEL), + SNDRV_DMA_TYPE_DEV, + rtd->card->snd_card->dev, PREALLOC_BUFFER, PREALLOC_BUFFER_MAX); } diff --git a/sound/soc/sh/rcar/dma.c b/sound/soc/sh/rcar/dma.c index fd557abfe390..4d750bdf8e24 100644 --- a/sound/soc/sh/rcar/dma.c +++ b/sound/soc/sh/rcar/dma.c @@ -26,10 +26,7 @@ struct rsnd_dmaen { struct dma_chan *chan; dma_cookie_t cookie; - dma_addr_t dma_buf; unsigned int dma_len; - unsigned int dma_period; - unsigned int dma_cnt; }; struct rsnd_dmapp { @@ -71,38 +68,10 @@ static struct rsnd_mod mem = { /* * Audio DMAC */ -#define rsnd_dmaen_sync(dmaen, io, i) __rsnd_dmaen_sync(dmaen, io, i, 1) -#define rsnd_dmaen_unsync(dmaen, io, i) __rsnd_dmaen_sync(dmaen, io, i, 0) -static void __rsnd_dmaen_sync(struct rsnd_dmaen *dmaen, struct rsnd_dai_stream *io, - int i, int sync) -{ - struct device *dev = dmaen->chan->device->dev; - enum dma_data_direction dir; - int is_play = rsnd_io_is_play(io); - dma_addr_t buf; - int len, max; - size_t period; - - len = dmaen->dma_len; - period = dmaen->dma_period; - max = len / period; - i = i % max; - buf = dmaen->dma_buf + (period * i); - - dir = is_play ? DMA_TO_DEVICE : DMA_FROM_DEVICE; - - if (sync) - dma_sync_single_for_device(dev, buf, period, dir); - else - dma_sync_single_for_cpu(dev, buf, period, dir); -} - static void __rsnd_dmaen_complete(struct rsnd_mod *mod, struct rsnd_dai_stream *io) { struct rsnd_priv *priv = rsnd_mod_to_priv(mod); - struct rsnd_dma *dma = rsnd_mod_to_dma(mod); - struct rsnd_dmaen *dmaen = rsnd_dma_to_dmaen(dma); bool elapsed = false; unsigned long flags; @@ -115,22 +84,9 @@ static void __rsnd_dmaen_complete(struct rsnd_mod *mod, */ spin_lock_irqsave(&priv->lock, flags); - if (rsnd_io_is_working(io)) { - rsnd_dmaen_unsync(dmaen, io, dmaen->dma_cnt); - - /* - * Next period is already started. - * Let's sync Next Next period - * see - * rsnd_dmaen_start() - */ - rsnd_dmaen_sync(dmaen, io, dmaen->dma_cnt + 2); - + if (rsnd_io_is_working(io)) elapsed = true; - dmaen->dma_cnt++; - } - spin_unlock_irqrestore(&priv->lock, flags); if (elapsed) @@ -165,14 +121,8 @@ static int rsnd_dmaen_stop(struct rsnd_mod *mod, struct rsnd_dma *dma = rsnd_mod_to_dma(mod); struct rsnd_dmaen *dmaen = rsnd_dma_to_dmaen(dma); - if (dmaen->chan) { - int is_play = rsnd_io_is_play(io); - + if (dmaen->chan) dmaengine_terminate_all(dmaen->chan); - dma_unmap_single(dmaen->chan->device->dev, - dmaen->dma_buf, dmaen->dma_len, - is_play ? DMA_TO_DEVICE : DMA_FROM_DEVICE); - } return 0; } @@ -237,11 +187,7 @@ static int rsnd_dmaen_start(struct rsnd_mod *mod, struct device *dev = rsnd_priv_to_dev(priv); struct dma_async_tx_descriptor *desc; struct dma_slave_config cfg = {}; - dma_addr_t buf; - size_t len; - size_t period; int is_play = rsnd_io_is_play(io); - int i; int ret; cfg.direction = is_play ? DMA_MEM_TO_DEV : DMA_DEV_TO_MEM; @@ -258,19 +204,10 @@ static int rsnd_dmaen_start(struct rsnd_mod *mod, if (ret < 0) return ret; - len = snd_pcm_lib_buffer_bytes(substream); - period = snd_pcm_lib_period_bytes(substream); - buf = dma_map_single(dmaen->chan->device->dev, - substream->runtime->dma_area, - len, - is_play ? DMA_TO_DEVICE : DMA_FROM_DEVICE); - if (dma_mapping_error(dmaen->chan->device->dev, buf)) { - dev_err(dev, "dma map failed\n"); - return -EIO; - } - desc = dmaengine_prep_dma_cyclic(dmaen->chan, - buf, len, period, + substream->runtime->dma_addr, + snd_pcm_lib_buffer_bytes(substream), + snd_pcm_lib_period_bytes(substream), is_play ? DMA_MEM_TO_DEV : DMA_DEV_TO_MEM, DMA_PREP_INTERRUPT | DMA_CTRL_ACK); @@ -282,18 +219,7 @@ static int rsnd_dmaen_start(struct rsnd_mod *mod, desc->callback = rsnd_dmaen_complete; desc->callback_param = rsnd_mod_get(dma); - dmaen->dma_buf = buf; - dmaen->dma_len = len; - dmaen->dma_period = period; - dmaen->dma_cnt = 0; - - /* - * synchronize this and next period - * see - * __rsnd_dmaen_complete() - */ - for (i = 0; i < 2; i++) - rsnd_dmaen_sync(dmaen, io, i); + dmaen->dma_len = snd_pcm_lib_buffer_bytes(substream); dmaen->cookie = dmaengine_submit(desc); if (dmaen->cookie < 0) { diff --git a/sound/soc/sh/rcar/ssi.c b/sound/soc/sh/rcar/ssi.c index fece1e5f582f..cbf3bf312d23 100644 --- a/sound/soc/sh/rcar/ssi.c +++ b/sound/soc/sh/rcar/ssi.c @@ -446,25 +446,29 @@ static bool rsnd_ssi_pointer_update(struct rsnd_mod *mod, int byte) { struct rsnd_ssi *ssi = rsnd_mod_to_ssi(mod); + bool ret = false; + int byte_pos; - ssi->byte_pos += byte; + byte_pos = ssi->byte_pos + byte; - if (ssi->byte_pos >= ssi->next_period_byte) { + if (byte_pos >= ssi->next_period_byte) { struct snd_pcm_runtime *runtime = rsnd_io_to_runtime(io); ssi->period_pos++; ssi->next_period_byte += ssi->byte_per_period; if (ssi->period_pos >= runtime->periods) { - ssi->byte_pos = 0; + byte_pos = 0; ssi->period_pos = 0; ssi->next_period_byte = ssi->byte_per_period; } - return true; + ret = true; } - return false; + WRITE_ONCE(ssi->byte_pos, byte_pos); + + return ret; } /* @@ -838,7 +842,7 @@ static int rsnd_ssi_pointer(struct rsnd_mod *mod, struct rsnd_ssi *ssi = rsnd_mod_to_ssi(mod); struct snd_pcm_runtime *runtime = rsnd_io_to_runtime(io); - *pointer = bytes_to_frames(runtime, ssi->byte_pos); + *pointer = bytes_to_frames(runtime, READ_ONCE(ssi->byte_pos)); return 0; } diff --git a/sound/soc/sh/rcar/ssiu.c b/sound/soc/sh/rcar/ssiu.c index 4d948757d300..6ff8a36c2c82 100644 --- a/sound/soc/sh/rcar/ssiu.c +++ b/sound/soc/sh/rcar/ssiu.c @@ -125,6 +125,7 @@ static int rsnd_ssiu_init_gen2(struct rsnd_mod *mod, { int hdmi = rsnd_ssi_hdmi_port(io); int ret; + u32 mode = 0; ret = rsnd_ssiu_init(mod, io, priv); if (ret < 0) @@ -136,9 +137,11 @@ static int rsnd_ssiu_init_gen2(struct rsnd_mod *mod, * see * rsnd_ssi_config_init() */ - rsnd_mod_write(mod, SSI_MODE, 0x1); + mode = 0x1; } + rsnd_mod_write(mod, SSI_MODE, mode); + if (rsnd_ssi_use_busif(io)) { rsnd_mod_write(mod, SSI_BUSIF_ADINR, rsnd_get_adinr_bit(mod, io) | diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c index e2450c8e88e6..a8c3a33dd185 100644 --- a/tools/bpf/bpftool/map.c +++ b/tools/bpf/bpftool/map.c @@ -523,21 +523,23 @@ static int do_show(int argc, char **argv) break; p_err("can't get next map: %s%s", strerror(errno), errno == EINVAL ? " -- kernel too old?" : ""); - return -1; + break; } fd = bpf_map_get_fd_by_id(id); if (fd < 0) { + if (errno == ENOENT) + continue; p_err("can't get map by id (%u): %s", id, strerror(errno)); - return -1; + break; } err = bpf_obj_get_info_by_fd(fd, &info, &len); if (err) { p_err("can't get map info: %s", strerror(errno)); close(fd); - return -1; + break; } if (json_output) diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c index 42ee8892549c..fd0873178503 100644 --- a/tools/bpf/bpftool/prog.c +++ b/tools/bpf/bpftool/prog.c @@ -383,6 +383,8 @@ static int do_show(int argc, char **argv) fd = bpf_prog_get_fd_by_id(id); if (fd < 0) { + if (errno == ENOENT) + continue; p_err("can't get prog by id (%u): %s", id, strerror(errno)); err = -1; diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index f1fdb36269f2..1304753d29ea 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -41,7 +41,7 @@ $(BPFOBJ): force CLANG ?= clang LLC ?= llc -PROBE := $(shell llc -march=bpf -mcpu=probe -filetype=null /dev/null 2>&1) +PROBE := $(shell $(LLC) -march=bpf -mcpu=probe -filetype=null /dev/null 2>&1) # Let newer LLVM versions transparently probe the kernel for availability # of full BPF instruction set. diff --git a/tools/testing/selftests/x86/ldt_gdt.c b/tools/testing/selftests/x86/ldt_gdt.c index 66e5ce5b91f0..0304ffb714f2 100644 --- a/tools/testing/selftests/x86/ldt_gdt.c +++ b/tools/testing/selftests/x86/ldt_gdt.c @@ -627,13 +627,10 @@ static void do_multicpu_tests(void) static int finish_exec_test(void) { /* - * In a sensible world, this would be check_invalid_segment(0, 1); - * For better or for worse, though, the LDT is inherited across exec. - * We can probably change this safely, but for now we test it. + * Older kernel versions did inherit the LDT on exec() which is + * wrong because exec() starts from a clean state. */ - check_valid_segment(0, 1, - AR_DPL3 | AR_TYPE_XRCODE | AR_S | AR_P | AR_DB, - 42, true); + check_invalid_segment(0, 1); return nerrs ? 1 : 0; } |