diff options
author | Jakub Kicinski <kuba@kernel.org> | 2024-07-11 12:57:57 -0700 |
---|---|---|
committer | Jakub Kicinski <kuba@kernel.org> | 2024-07-11 12:58:13 -0700 |
commit | 7c8267275de6989a9b682a07d75e89395457ee01 (patch) | |
tree | db28c44520d9f786a4142871b42eb80f0d205ddd | |
parent | a6a9fcb10836105e525ccb8bc1a6af4b20a113be (diff) | |
parent | 51df8e0cbaefd432f7029dde94e6c7e4e5b19465 (diff) |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net
Cross-merge networking fixes after downstream PR.
Conflicts:
net/sched/act_ct.c
26488172b029 ("net/sched: Fix UAF when resolving a clash")
3abbd7ed8b76 ("act_ct: prepare for stolen verdict coming from conntrack and nat engine")
No adjacent changes.
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
195 files changed, 2210 insertions, 1334 deletions
@@ -384,6 +384,7 @@ Li Yang <leoyang.li@nxp.com> <leoli@freescale.com> Li Yang <leoyang.li@nxp.com> <leo@zh-kernel.org> Lior David <quic_liord@quicinc.com> <liord@codeaurora.org> Lorenzo Pieralisi <lpieralisi@kernel.org> <lorenzo.pieralisi@arm.com> +Lorenzo Stoakes <lorenzo.stoakes@oracle.com> <lstoakes@gmail.com> Luca Ceresoli <luca.ceresoli@bootlin.com> <luca@lucaceresoli.net> Lukasz Luba <lukasz.luba@arm.com> <l.luba@partner.samsung.com> Luo Jie <quic_luoj@quicinc.com> <luoj@codeaurora.org> @@ -3150,9 +3150,11 @@ S: Triftstra=DFe 55 S: 13353 Berlin S: Germany -N: Gustavo Pimental +N: Gustavo Pimentel E: gustavo.pimentel@synopsys.com D: PCI driver for Synopsys DesignWare +D: Synopsys DesignWare eDMA driver +D: Synopsys DesignWare xData traffic generator N: Emanuel Pirker E: epirker@edu.uni-klu.ac.at diff --git a/Documentation/arch/riscv/cmodx.rst b/Documentation/arch/riscv/cmodx.rst index 1c0ca06b6c97..8c48bcff3df9 100644 --- a/Documentation/arch/riscv/cmodx.rst +++ b/Documentation/arch/riscv/cmodx.rst @@ -62,10 +62,10 @@ cmodx.c:: printf("Value before cmodx: %d\n", value); // Call prctl before first fence.i is called inside modify_instruction - prctl(PR_RISCV_SET_ICACHE_FLUSH_CTX_ON, PR_RISCV_CTX_SW_FENCEI, PR_RISCV_SCOPE_PER_PROCESS); + prctl(PR_RISCV_SET_ICACHE_FLUSH_CTX, PR_RISCV_CTX_SW_FENCEI_ON, PR_RISCV_SCOPE_PER_PROCESS); modify_instruction(); // Call prctl after final fence.i is called in process - prctl(PR_RISCV_SET_ICACHE_FLUSH_CTX_OFF, PR_RISCV_CTX_SW_FENCEI, PR_RISCV_SCOPE_PER_PROCESS); + prctl(PR_RISCV_SET_ICACHE_FLUSH_CTX, PR_RISCV_CTX_SW_FENCEI_OFF, PR_RISCV_SCOPE_PER_PROCESS); value = get_value(); printf("Value after cmodx: %d\n", value); diff --git a/Documentation/networking/devlink/devlink-region.rst b/Documentation/networking/devlink/devlink-region.rst index 9232cd7da301..5d0b68f752c0 100644 --- a/Documentation/networking/devlink/devlink-region.rst +++ b/Documentation/networking/devlink/devlink-region.rst @@ -49,7 +49,7 @@ example usage $ devlink region show [ DEV/REGION ] $ devlink region del DEV/REGION snapshot SNAPSHOT_ID $ devlink region dump DEV/REGION [ snapshot SNAPSHOT_ID ] - $ devlink region read DEV/REGION [ snapshot SNAPSHOT_ID ] address ADDRESS length length + $ devlink region read DEV/REGION [ snapshot SNAPSHOT_ID ] address ADDRESS length LENGTH # Show all of the exposed regions with region sizes: $ devlink region show diff --git a/MAINTAINERS b/MAINTAINERS index e0f28278e504..9540f9290f56 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -6239,9 +6239,8 @@ S: Maintained F: drivers/usb/dwc3/ DESIGNWARE XDATA IP DRIVER -M: Gustavo Pimentel <gustavo.pimentel@synopsys.com> L: linux-pci@vger.kernel.org -S: Maintained +S: Orphan F: Documentation/misc-devices/dw-xdata-pcie.rst F: drivers/misc/dw-xdata-pcie.c @@ -14475,7 +14474,7 @@ MEMORY MAPPING M: Andrew Morton <akpm@linux-foundation.org> R: Liam R. Howlett <Liam.Howlett@oracle.com> R: Vlastimil Babka <vbabka@suse.cz> -R: Lorenzo Stoakes <lstoakes@gmail.com> +R: Lorenzo Stoakes <lorenzo.stoakes@oracle.com> L: linux-mm@kvack.org S: Maintained W: http://www.linux-mm.org @@ -2,7 +2,7 @@ VERSION = 6 PATCHLEVEL = 10 SUBLEVEL = 0 -EXTRAVERSION = -rc6 +EXTRAVERSION = -rc7 NAME = Baby Opossum Posse # *DOCUMENTATION* diff --git a/arch/powerpc/kernel/eeh_pe.c b/arch/powerpc/kernel/eeh_pe.c index d1030bc52564..d283d281d28e 100644 --- a/arch/powerpc/kernel/eeh_pe.c +++ b/arch/powerpc/kernel/eeh_pe.c @@ -849,6 +849,7 @@ struct pci_bus *eeh_pe_bus_get(struct eeh_pe *pe) { struct eeh_dev *edev; struct pci_dev *pdev; + struct pci_bus *bus = NULL; if (pe->type & EEH_PE_PHB) return pe->phb->bus; @@ -859,9 +860,11 @@ struct pci_bus *eeh_pe_bus_get(struct eeh_pe *pe) /* Retrieve the parent PCI bus of first (top) PCI device */ edev = list_first_entry_or_null(&pe->edevs, struct eeh_dev, entry); + pci_lock_rescan_remove(); pdev = eeh_dev_to_pci_dev(edev); if (pdev) - return pdev->bus; + bus = pdev->bus; + pci_unlock_rescan_remove(); - return NULL; + return bus; } diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S index 4690c219bfa4..63432a33ec49 100644 --- a/arch/powerpc/kernel/head_64.S +++ b/arch/powerpc/kernel/head_64.S @@ -647,8 +647,9 @@ __after_prom_start: * Note: This process overwrites the OF exception vectors. */ LOAD_REG_IMMEDIATE(r3, PAGE_OFFSET) - mr. r4,r26 /* In some cases the loader may */ - beq 9f /* have already put us at zero */ + mr r4,r26 /* Load the virtual source address into r4 */ + cmpld r3,r4 /* Check if source == dest */ + beq 9f /* If so skip the copy */ li r6,0x100 /* Start offset, the first 0x100 */ /* bytes were copied earlier. */ diff --git a/arch/powerpc/kexec/core_64.c b/arch/powerpc/kexec/core_64.c index 85050be08a23..72b12bc10f90 100644 --- a/arch/powerpc/kexec/core_64.c +++ b/arch/powerpc/kexec/core_64.c @@ -27,6 +27,7 @@ #include <asm/paca.h> #include <asm/mmu.h> #include <asm/sections.h> /* _end */ +#include <asm/setup.h> #include <asm/smp.h> #include <asm/hw_breakpoint.h> #include <asm/svm.h> @@ -317,6 +318,16 @@ void default_machine_kexec(struct kimage *image) if (!kdump_in_progress()) kexec_prepare_cpus(); +#ifdef CONFIG_PPC_PSERIES + /* + * This must be done after other CPUs have shut down, otherwise they + * could execute the 'scv' instruction, which is not supported with + * reloc disabled (see configure_exceptions()). + */ + if (firmware_has_feature(FW_FEATURE_SET_MODE)) + pseries_disable_reloc_on_exc(); +#endif + printk("kexec: Starting switchover sequence.\n"); /* switch to a staticly allocated stack. Based on irq stack code. diff --git a/arch/powerpc/platforms/pseries/kexec.c b/arch/powerpc/platforms/pseries/kexec.c index 096d09ed89f6..431be156ca9b 100644 --- a/arch/powerpc/platforms/pseries/kexec.c +++ b/arch/powerpc/platforms/pseries/kexec.c @@ -61,11 +61,3 @@ void pseries_kexec_cpu_down(int crash_shutdown, int secondary) } else xics_kexec_teardown_cpu(secondary); } - -void pseries_machine_kexec(struct kimage *image) -{ - if (firmware_has_feature(FW_FEATURE_SET_MODE)) - pseries_disable_reloc_on_exc(); - - default_machine_kexec(image); -} diff --git a/arch/powerpc/platforms/pseries/pseries.h b/arch/powerpc/platforms/pseries/pseries.h index bba4ad192b0f..3968a6970fa8 100644 --- a/arch/powerpc/platforms/pseries/pseries.h +++ b/arch/powerpc/platforms/pseries/pseries.h @@ -38,7 +38,6 @@ static inline void smp_init_pseries(void) { } #endif extern void pseries_kexec_cpu_down(int crash_shutdown, int secondary); -void pseries_machine_kexec(struct kimage *image); extern void pSeries_final_fixup(void); diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c index 284a6fa04b0c..b10a25325238 100644 --- a/arch/powerpc/platforms/pseries/setup.c +++ b/arch/powerpc/platforms/pseries/setup.c @@ -343,8 +343,8 @@ static int alloc_dispatch_log_kmem_cache(void) { void (*ctor)(void *) = get_dtl_cache_ctor(); - dtl_cache = kmem_cache_create("dtl", DISPATCH_LOG_BYTES, - DISPATCH_LOG_BYTES, 0, ctor); + dtl_cache = kmem_cache_create_usercopy("dtl", DISPATCH_LOG_BYTES, + DISPATCH_LOG_BYTES, 0, 0, DISPATCH_LOG_BYTES, ctor); if (!dtl_cache) { pr_warn("Failed to create dispatch trace log buffer cache\n"); pr_warn("Stolen time statistics will be unreliable\n"); @@ -1159,7 +1159,6 @@ define_machine(pseries) { .machine_check_exception = pSeries_machine_check_exception, .machine_check_log_err = pSeries_machine_check_log_err, #ifdef CONFIG_KEXEC_CORE - .machine_kexec = pseries_machine_kexec, .kexec_cpu_down = pseries_kexec_cpu_down, #endif #ifdef CONFIG_MEMORY_HOTPLUG diff --git a/arch/riscv/kernel/machine_kexec.c b/arch/riscv/kernel/machine_kexec.c index ed9cad20c039..3c830a6f7ef4 100644 --- a/arch/riscv/kernel/machine_kexec.c +++ b/arch/riscv/kernel/machine_kexec.c @@ -121,20 +121,12 @@ static void machine_kexec_mask_interrupts(void) for_each_irq_desc(i, desc) { struct irq_chip *chip; - int ret; chip = irq_desc_get_chip(desc); if (!chip) continue; - /* - * First try to remove the active state. If this - * fails, try to EOI the interrupt. - */ - ret = irq_set_irqchip_state(i, IRQCHIP_STATE_ACTIVE, false); - - if (ret && irqd_irq_inprogress(&desc->irq_data) && - chip->irq_eoi) + if (chip->irq_eoi && irqd_irq_inprogress(&desc->irq_data)) chip->irq_eoi(&desc->irq_data); if (chip->irq_mask) diff --git a/arch/riscv/kernel/stacktrace.c b/arch/riscv/kernel/stacktrace.c index 0d3f00eb0bae..10e311b2759d 100644 --- a/arch/riscv/kernel/stacktrace.c +++ b/arch/riscv/kernel/stacktrace.c @@ -32,6 +32,7 @@ void notrace walk_stackframe(struct task_struct *task, struct pt_regs *regs, bool (*fn)(void *, unsigned long), void *arg) { unsigned long fp, sp, pc; + int graph_idx = 0; int level = 0; if (regs) { @@ -68,7 +69,7 @@ void notrace walk_stackframe(struct task_struct *task, struct pt_regs *regs, pc = regs->ra; } else { fp = frame->fp; - pc = ftrace_graph_ret_addr(current, NULL, frame->ra, + pc = ftrace_graph_ret_addr(current, &graph_idx, frame->ra, &frame->ra); if (pc == (unsigned long)ret_from_exception) { if (unlikely(!__kernel_text_address(pc) || !fn(arg, pc))) diff --git a/arch/riscv/kvm/vcpu_pmu.c b/arch/riscv/kvm/vcpu_pmu.c index 04db1f993c47..bcf41d6e0df0 100644 --- a/arch/riscv/kvm/vcpu_pmu.c +++ b/arch/riscv/kvm/vcpu_pmu.c @@ -327,7 +327,7 @@ static long kvm_pmu_create_perf_event(struct kvm_pmc *pmc, struct perf_event_att event = perf_event_create_kernel_counter(attr, -1, current, kvm_riscv_pmu_overflow, pmc); if (IS_ERR(event)) { - pr_err("kvm pmu event creation failed for eidx %lx: %ld\n", eidx, PTR_ERR(event)); + pr_debug("kvm pmu event creation failed for eidx %lx: %ld\n", eidx, PTR_ERR(event)); return PTR_ERR(event); } diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index 95990461888f..9281063636a7 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -427,6 +427,7 @@ struct kvm_vcpu_stat { u64 instruction_io_other; u64 instruction_lpsw; u64 instruction_lpswe; + u64 instruction_lpswey; u64 instruction_pfmf; u64 instruction_ptff; u64 instruction_sck; diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 82e9631cd9ef..54b5b2565df8 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -132,6 +132,7 @@ const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = { STATS_DESC_COUNTER(VCPU, instruction_io_other), STATS_DESC_COUNTER(VCPU, instruction_lpsw), STATS_DESC_COUNTER(VCPU, instruction_lpswe), + STATS_DESC_COUNTER(VCPU, instruction_lpswey), STATS_DESC_COUNTER(VCPU, instruction_pfmf), STATS_DESC_COUNTER(VCPU, instruction_ptff), STATS_DESC_COUNTER(VCPU, instruction_sck), diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h index 111eb5c74784..bf8534218af3 100644 --- a/arch/s390/kvm/kvm-s390.h +++ b/arch/s390/kvm/kvm-s390.h @@ -138,6 +138,21 @@ static inline u64 kvm_s390_get_base_disp_s(struct kvm_vcpu *vcpu, u8 *ar) return (base2 ? vcpu->run->s.regs.gprs[base2] : 0) + disp2; } +static inline u64 kvm_s390_get_base_disp_siy(struct kvm_vcpu *vcpu, u8 *ar) +{ + u32 base1 = vcpu->arch.sie_block->ipb >> 28; + s64 disp1; + + /* The displacement is a 20bit _SIGNED_ value */ + disp1 = sign_extend64(((vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16) + + ((vcpu->arch.sie_block->ipb & 0xff00) << 4), 19); + + if (ar) + *ar = base1; + + return (base1 ? vcpu->run->s.regs.gprs[base1] : 0) + disp1; +} + static inline void kvm_s390_get_base_disp_sse(struct kvm_vcpu *vcpu, u64 *address1, u64 *address2, u8 *ar_b1, u8 *ar_b2) diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c index 1be19cc9d73c..1a49b89706f8 100644 --- a/arch/s390/kvm/priv.c +++ b/arch/s390/kvm/priv.c @@ -797,6 +797,36 @@ static int handle_lpswe(struct kvm_vcpu *vcpu) return 0; } +static int handle_lpswey(struct kvm_vcpu *vcpu) +{ + psw_t new_psw; + u64 addr; + int rc; + u8 ar; + + vcpu->stat.instruction_lpswey++; + + if (!test_kvm_facility(vcpu->kvm, 193)) + return kvm_s390_inject_program_int(vcpu, PGM_OPERATION); + + if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) + return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); + + addr = kvm_s390_get_base_disp_siy(vcpu, &ar); + if (addr & 7) + return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); + + rc = read_guest(vcpu, addr, ar, &new_psw, sizeof(new_psw)); + if (rc) + return kvm_s390_inject_prog_cond(vcpu, rc); + + vcpu->arch.sie_block->gpsw = new_psw; + if (!is_valid_psw(&vcpu->arch.sie_block->gpsw)) + return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); + + return 0; +} + static int handle_stidp(struct kvm_vcpu *vcpu) { u64 stidp_data = vcpu->kvm->arch.model.cpuid; @@ -1462,6 +1492,8 @@ int kvm_s390_handle_eb(struct kvm_vcpu *vcpu) case 0x61: case 0x62: return handle_ri(vcpu); + case 0x71: + return handle_lpswey(vcpu); default: return -EOPNOTSUPP; } diff --git a/arch/s390/mm/pgalloc.c b/arch/s390/mm/pgalloc.c index abb629d7e131..7e3e767ab87d 100644 --- a/arch/s390/mm/pgalloc.c +++ b/arch/s390/mm/pgalloc.c @@ -55,6 +55,8 @@ unsigned long *crst_table_alloc(struct mm_struct *mm) void crst_table_free(struct mm_struct *mm, unsigned long *table) { + if (!table) + return; pagetable_free(virt_to_ptdesc(table)); } @@ -262,6 +264,8 @@ static unsigned long *base_crst_alloc(unsigned long val) static void base_crst_free(unsigned long *table) { + if (!table) + return; pagetable_free(virt_to_ptdesc(table)); } diff --git a/arch/xtensa/include/asm/current.h b/arch/xtensa/include/asm/current.h index 08010dbf5e09..df275d554788 100644 --- a/arch/xtensa/include/asm/current.h +++ b/arch/xtensa/include/asm/current.h @@ -19,7 +19,7 @@ struct task_struct; -static inline struct task_struct *get_current(void) +static __always_inline struct task_struct *get_current(void) { return current_thread_info()->task; } diff --git a/arch/xtensa/include/asm/thread_info.h b/arch/xtensa/include/asm/thread_info.h index 326db1c1d5d8..e0dffcc43b9e 100644 --- a/arch/xtensa/include/asm/thread_info.h +++ b/arch/xtensa/include/asm/thread_info.h @@ -91,7 +91,7 @@ struct thread_info { } /* how to get the thread information struct from C */ -static inline struct thread_info *current_thread_info(void) +static __always_inline struct thread_info *current_thread_info(void) { struct thread_info *ti; __asm__("extui %0, a1, 0, "__stringify(CURRENT_SHIFT)"\n\t" diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c index bd6a7857ce05..831fa4a12159 100644 --- a/drivers/acpi/processor_idle.c +++ b/drivers/acpi/processor_idle.c @@ -16,7 +16,6 @@ #include <linux/acpi.h> #include <linux/dmi.h> #include <linux/sched.h> /* need_resched() */ -#include <linux/sort.h> #include <linux/tick.h> #include <linux/cpuidle.h> #include <linux/cpu.h> @@ -386,25 +385,24 @@ static void acpi_processor_power_verify_c3(struct acpi_processor *pr, acpi_write_bit_register(ACPI_BITREG_BUS_MASTER_RLD, 1); } -static int acpi_cst_latency_cmp(const void *a, const void *b) +static void acpi_cst_latency_sort(struct acpi_processor_cx *states, size_t length) { - const struct acpi_processor_cx *x = a, *y = b; + int i, j, k; - if (!(x->valid && y->valid)) - return 0; - if (x->latency > y->latency) - return 1; - if (x->latency < y->latency) - return -1; - return 0; -} -static void acpi_cst_latency_swap(void *a, void *b, int n) -{ - struct acpi_processor_cx *x = a, *y = b; + for (i = 1; i < length; i++) { + if (!states[i].valid) + continue; - if (!(x->valid && y->valid)) - return; - swap(x->latency, y->latency); + for (j = i - 1, k = i; j >= 0; j--) { + if (!states[j].valid) + continue; + + if (states[j].latency > states[k].latency) + swap(states[j].latency, states[k].latency); + + k = j; + } + } } static int acpi_processor_power_verify(struct acpi_processor *pr) @@ -449,10 +447,7 @@ static int acpi_processor_power_verify(struct acpi_processor *pr) if (buggy_latency) { pr_notice("FW issue: working around C-state latencies out of order\n"); - sort(&pr->power.states[1], max_cstate, - sizeof(struct acpi_processor_cx), - acpi_cst_latency_cmp, - acpi_cst_latency_swap); + acpi_cst_latency_sort(&pr->power.states[1], max_cstate); } lapic_timer_propagate_broadcast(pr); diff --git a/drivers/char/tpm/Makefile b/drivers/char/tpm/Makefile index 4c695b0388f3..9bb142c75243 100644 --- a/drivers/char/tpm/Makefile +++ b/drivers/char/tpm/Makefile @@ -16,8 +16,8 @@ tpm-y += eventlog/common.o tpm-y += eventlog/tpm1.o tpm-y += eventlog/tpm2.o tpm-y += tpm-buf.o +tpm-y += tpm2-sessions.o -tpm-$(CONFIG_TCG_TPM2_HMAC) += tpm2-sessions.o tpm-$(CONFIG_ACPI) += tpm_ppi.o eventlog/acpi.o tpm-$(CONFIG_EFI) += eventlog/efi.o tpm-$(CONFIG_OF) += eventlog/of.o diff --git a/drivers/char/tpm/tpm2-sessions.c b/drivers/char/tpm/tpm2-sessions.c index 907ac9956a78..2281d55df545 100644 --- a/drivers/char/tpm/tpm2-sessions.c +++ b/drivers/char/tpm/tpm2-sessions.c @@ -83,9 +83,6 @@ #define AES_KEY_BYTES AES_KEYSIZE_128 #define AES_KEY_BITS (AES_KEY_BYTES*8) -static int tpm2_create_primary(struct tpm_chip *chip, u32 hierarchy, - u32 *handle, u8 *name); - /* * This is the structure that carries all the auth information (like * session handle, nonces, session key and auth) from use to use it is @@ -148,6 +145,7 @@ struct tpm2_auth { u8 name[AUTH_MAX_NAMES][2 + SHA512_DIGEST_SIZE]; }; +#ifdef CONFIG_TCG_TPM2_HMAC /* * Name Size based on TPM algorithm (assumes no hash bigger than 255) */ @@ -163,6 +161,226 @@ static u8 name_size(const u8 *name) return size_map[alg] + 2; } +static int tpm2_parse_read_public(char *name, struct tpm_buf *buf) +{ + struct tpm_header *head = (struct tpm_header *)buf->data; + off_t offset = TPM_HEADER_SIZE; + u32 tot_len = be32_to_cpu(head->length); + u32 val; + + /* we're starting after the header so adjust the length */ + tot_len -= TPM_HEADER_SIZE; + + /* skip public */ + val = tpm_buf_read_u16(buf, &offset); + if (val > tot_len) + return -EINVAL; + offset += val; + /* name */ + val = tpm_buf_read_u16(buf, &offset); + if (val != name_size(&buf->data[offset])) + return -EINVAL; + memcpy(name, &buf->data[offset], val); + /* forget the rest */ + return 0; +} + +static int tpm2_read_public(struct tpm_chip *chip, u32 handle, char *name) +{ + struct tpm_buf buf; + int rc; + + rc = tpm_buf_init(&buf, TPM2_ST_NO_SESSIONS, TPM2_CC_READ_PUBLIC); + if (rc) + return rc; + + tpm_buf_append_u32(&buf, handle); + rc = tpm_transmit_cmd(chip, &buf, 0, "read public"); + if (rc == TPM2_RC_SUCCESS) + rc = tpm2_parse_read_public(name, &buf); + + tpm_buf_destroy(&buf); + + return rc; +} +#endif /* CONFIG_TCG_TPM2_HMAC */ + +/** + * tpm_buf_append_name() - add a handle area to the buffer + * @chip: the TPM chip structure + * @buf: The buffer to be appended + * @handle: The handle to be appended + * @name: The name of the handle (may be NULL) + * + * In order to compute session HMACs, we need to know the names of the + * objects pointed to by the handles. For most objects, this is simply + * the actual 4 byte handle or an empty buf (in these cases @name + * should be NULL) but for volatile objects, permanent objects and NV + * areas, the name is defined as the hash (according to the name + * algorithm which should be set to sha256) of the public area to + * which the two byte algorithm id has been appended. For these + * objects, the @name pointer should point to this. If a name is + * required but @name is NULL, then TPM2_ReadPublic() will be called + * on the handle to obtain the name. + * + * As with most tpm_buf operations, success is assumed because failure + * will be caused by an incorrect programming model and indicated by a + * kernel message. + */ +void tpm_buf_append_name(struct tpm_chip *chip, struct tpm_buf *buf, + u32 handle, u8 *name) +{ +#ifdef CONFIG_TCG_TPM2_HMAC + enum tpm2_mso_type mso = tpm2_handle_mso(handle); + struct tpm2_auth *auth; + int slot; +#endif + + if (!tpm2_chip_auth(chip)) { + tpm_buf_append_u32(buf, handle); + /* count the number of handles in the upper bits of flags */ + buf->handles++; + return; + } + +#ifdef CONFIG_TCG_TPM2_HMAC + slot = (tpm_buf_length(buf) - TPM_HEADER_SIZE) / 4; + if (slot >= AUTH_MAX_NAMES) { + dev_err(&chip->dev, "TPM: too many handles\n"); + return; + } + auth = chip->auth; + WARN(auth->session != tpm_buf_length(buf), + "name added in wrong place\n"); + tpm_buf_append_u32(buf, handle); + auth->session += 4; + + if (mso == TPM2_MSO_PERSISTENT || + mso == TPM2_MSO_VOLATILE || + mso == TPM2_MSO_NVRAM) { + if (!name) + tpm2_read_public(chip, handle, auth->name[slot]); + } else { + if (name) + dev_err(&chip->dev, "TPM: Handle does not require name but one is specified\n"); + } + + auth->name_h[slot] = handle; + if (name) + memcpy(auth->name[slot], name, name_size(name)); +#endif +} +EXPORT_SYMBOL_GPL(tpm_buf_append_name); + +/** + * tpm_buf_append_hmac_session() - Append a TPM session element + * @chip: the TPM chip structure + * @buf: The buffer to be appended + * @attributes: The session attributes + * @passphrase: The session authority (NULL if none) + * @passphrase_len: The length of the session authority (0 if none) + * + * This fills in a session structure in the TPM command buffer, except + * for the HMAC which cannot be computed until the command buffer is + * complete. The type of session is controlled by the @attributes, + * the main ones of which are TPM2_SA_CONTINUE_SESSION which means the + * session won't terminate after tpm_buf_check_hmac_response(), + * TPM2_SA_DECRYPT which means this buffers first parameter should be + * encrypted with a session key and TPM2_SA_ENCRYPT, which means the + * response buffer's first parameter needs to be decrypted (confusing, + * but the defines are written from the point of view of the TPM). + * + * Any session appended by this command must be finalized by calling + * tpm_buf_fill_hmac_session() otherwise the HMAC will be incorrect + * and the TPM will reject the command. + * + * As with most tpm_buf operations, success is assumed because failure + * will be caused by an incorrect programming model and indicated by a + * kernel message. + */ +void tpm_buf_append_hmac_session(struct tpm_chip *chip, struct tpm_buf *buf, + u8 attributes, u8 *passphrase, + int passphrase_len) +{ +#ifdef CONFIG_TCG_TPM2_HMAC + u8 nonce[SHA256_DIGEST_SIZE]; + struct tpm2_auth *auth; + u32 len; +#endif + + if (!tpm2_chip_auth(chip)) { + /* offset tells us where the sessions area begins */ + int offset = buf->handles * 4 + TPM_HEADER_SIZE; + u32 len = 9 + passphrase_len; + + if (tpm_buf_length(buf) != offset) { + /* not the first session so update the existing length */ + len += get_unaligned_be32(&buf->data[offset]); + put_unaligned_be32(len, &buf->data[offset]); + } else { + tpm_buf_append_u32(buf, len); + } + /* auth handle */ + tpm_buf_append_u32(buf, TPM2_RS_PW); + /* nonce */ + tpm_buf_append_u16(buf, 0); + /* attributes */ + tpm_buf_append_u8(buf, 0); + /* passphrase */ + tpm_buf_append_u16(buf, passphrase_len); + tpm_buf_append(buf, passphrase, passphrase_len); + return; + } + +#ifdef CONFIG_TCG_TPM2_HMAC + /* + * The Architecture Guide requires us to strip trailing zeros + * before computing the HMAC + */ + while (passphrase && passphrase_len > 0 && passphrase[passphrase_len - 1] == '\0') + passphrase_len--; + + auth = chip->auth; + auth->attrs = attributes; + auth->passphrase_len = passphrase_len; + if (passphrase_len) + memcpy(auth->passphrase, passphrase, passphrase_len); + + if (auth->session != tpm_buf_length(buf)) { + /* we're not the first session */ + len = get_unaligned_be32(&buf->data[auth->session]); + if (4 + len + auth->session != tpm_buf_length(buf)) { + WARN(1, "session length mismatch, cannot append"); + return; + } + + /* add our new session */ + len += 9 + 2 * SHA256_DIGEST_SIZE; + put_unaligned_be32(len, &buf->data[auth->session]); + } else { + tpm_buf_append_u32(buf, 9 + 2 * SHA256_DIGEST_SIZE); + } + + /* random number for our nonce */ + get_random_bytes(nonce, sizeof(nonce)); + memcpy(auth->our_nonce, nonce, sizeof(nonce)); + tpm_buf_append_u32(buf, auth->handle); + /* our new nonce */ + tpm_buf_append_u16(buf, SHA256_DIGEST_SIZE); + tpm_buf_append(buf, nonce, SHA256_DIGEST_SIZE); + tpm_buf_append_u8(buf, auth->attrs); + /* and put a placeholder for the hmac */ + tpm_buf_append_u16(buf, SHA256_DIGEST_SIZE); + tpm_buf_append(buf, nonce, SHA256_DIGEST_SIZE); +#endif +} +EXPORT_SYMBOL_GPL(tpm_buf_append_hmac_session); + +#ifdef CONFIG_TCG_TPM2_HMAC + +static int tpm2_create_primary(struct tpm_chip *chip, u32 hierarchy, + u32 *handle, u8 *name); + /* * It turns out the crypto hmac(sha256) is hard for us to consume * because it assumes a fixed key and the TPM seems to change the key @@ -344,82 +562,6 @@ static void tpm_buf_append_salt(struct tpm_buf *buf, struct tpm_chip *chip) } /** - * tpm_buf_append_hmac_session() - Append a TPM session element - * @chip: the TPM chip structure - * @buf: The buffer to be appended - * @attributes: The session attributes - * @passphrase: The session authority (NULL if none) - * @passphrase_len: The length of the session authority (0 if none) - * - * This fills in a session structure in the TPM command buffer, except - * for the HMAC which cannot be computed until the command buffer is - * complete. The type of session is controlled by the @attributes, - * the main ones of which are TPM2_SA_CONTINUE_SESSION which means the - * session won't terminate after tpm_buf_check_hmac_response(), - * TPM2_SA_DECRYPT which means this buffers first parameter should be - * encrypted with a session key and TPM2_SA_ENCRYPT, which means the - * response buffer's first parameter needs to be decrypted (confusing, - * but the defines are written from the point of view of the TPM). - * - * Any session appended by this command must be finalized by calling - * tpm_buf_fill_hmac_session() otherwise the HMAC will be incorrect - * and the TPM will reject the command. - * - * As with most tpm_buf operations, success is assumed because failure - * will be caused by an incorrect programming model and indicated by a - * kernel message. - */ -void tpm_buf_append_hmac_session(struct tpm_chip *chip, struct tpm_buf *buf, - u8 attributes, u8 *passphrase, - int passphrase_len) -{ - u8 nonce[SHA256_DIGEST_SIZE]; - u32 len; - struct tpm2_auth *auth = chip->auth; - - /* - * The Architecture Guide requires us to strip trailing zeros - * before computing the HMAC - */ - while (passphrase && passphrase_len > 0 - && passphrase[passphrase_len - 1] == '\0') - passphrase_len--; - - auth->attrs = attributes; - auth->passphrase_len = passphrase_len; - if (passphrase_len) - memcpy(auth->passphrase, passphrase, passphrase_len); - - if (auth->session != tpm_buf_length(buf)) { - /* we're not the first session */ - len = get_unaligned_be32(&buf->data[auth->session]); - if (4 + len + auth->session != tpm_buf_length(buf)) { - WARN(1, "session length mismatch, cannot append"); - return; - } - - /* add our new session */ - len += 9 + 2 * SHA256_DIGEST_SIZE; - put_unaligned_be32(len, &buf->data[auth->session]); - } else { - tpm_buf_append_u32(buf, 9 + 2 * SHA256_DIGEST_SIZE); - } - - /* random number for our nonce */ - get_random_bytes(nonce, sizeof(nonce)); - memcpy(auth->our_nonce, nonce, sizeof(nonce)); - tpm_buf_append_u32(buf, auth->handle); - /* our new nonce */ - tpm_buf_append_u16(buf, SHA256_DIGEST_SIZE); - tpm_buf_append(buf, nonce, SHA256_DIGEST_SIZE); - tpm_buf_append_u8(buf, auth->attrs); - /* and put a placeholder for the hmac */ - tpm_buf_append_u16(buf, SHA256_DIGEST_SIZE); - tpm_buf_append(buf, nonce, SHA256_DIGEST_SIZE); -} -EXPORT_SYMBOL(tpm_buf_append_hmac_session); - -/** * tpm_buf_fill_hmac_session() - finalize the session HMAC * @chip: the TPM chip structure * @buf: The buffer to be appended @@ -449,6 +591,9 @@ void tpm_buf_fill_hmac_session(struct tpm_chip *chip, struct tpm_buf *buf) u8 cphash[SHA256_DIGEST_SIZE]; struct sha256_state sctx; + if (!auth) + return; + /* save the command code in BE format */ auth->ordinal = head->ordinal; @@ -567,104 +712,6 @@ void tpm_buf_fill_hmac_session(struct tpm_chip *chip, struct tpm_buf *buf) } EXPORT_SYMBOL(tpm_buf_fill_hmac_session); -static int tpm2_parse_read_public(char *name, struct tpm_buf *buf) -{ - struct tpm_header *head = (struct tpm_header *)buf->data; - off_t offset = TPM_HEADER_SIZE; - u32 tot_len = be32_to_cpu(head->length); - u32 val; - - /* we're starting after the header so adjust the length */ - tot_len -= TPM_HEADER_SIZE; - - /* skip public */ - val = tpm_buf_read_u16(buf, &offset); - if (val > tot_len) - return -EINVAL; - offset += val; - /* name */ - val = tpm_buf_read_u16(buf, &offset); - if (val != name_size(&buf->data[offset])) - return -EINVAL; - memcpy(name, &buf->data[offset], val); - /* forget the rest */ - return 0; -} - -static int tpm2_read_public(struct tpm_chip *chip, u32 handle, char *name) -{ - struct tpm_buf buf; - int rc; - - rc = tpm_buf_init(&buf, TPM2_ST_NO_SESSIONS, TPM2_CC_READ_PUBLIC); - if (rc) - return rc; - - tpm_buf_append_u32(&buf, handle); - rc = tpm_transmit_cmd(chip, &buf, 0, "read public"); - if (rc == TPM2_RC_SUCCESS) - rc = tpm2_parse_read_public(name, &buf); - - tpm_buf_destroy(&buf); - - return rc; -} - -/** - * tpm_buf_append_name() - add a handle area to the buffer - * @chip: the TPM chip structure - * @buf: The buffer to be appended - * @handle: The handle to be appended - * @name: The name of the handle (may be NULL) - * - * In order to compute session HMACs, we need to know the names of the - * objects pointed to by the handles. For most objects, this is simply - * the actual 4 byte handle or an empty buf (in these cases @name - * should be NULL) but for volatile objects, permanent objects and NV - * areas, the name is defined as the hash (according to the name - * algorithm which should be set to sha256) of the public area to - * which the two byte algorithm id has been appended. For these - * objects, the @name pointer should point to this. If a name is - * required but @name is NULL, then TPM2_ReadPublic() will be called - * on the handle to obtain the name. - * - * As with most tpm_buf operations, success is assumed because failure - * will be caused by an incorrect programming model and indicated by a - * kernel message. - */ -void tpm_buf_append_name(struct tpm_chip *chip, struct tpm_buf *buf, - u32 handle, u8 *name) -{ - enum tpm2_mso_type mso = tpm2_handle_mso(handle); - struct tpm2_auth *auth = chip->auth; - int slot; - - slot = (tpm_buf_length(buf) - TPM_HEADER_SIZE)/4; - if (slot >= AUTH_MAX_NAMES) { - dev_err(&chip->dev, "TPM: too many handles\n"); - return; - } - WARN(auth->session != tpm_buf_length(buf), - "name added in wrong place\n"); - tpm_buf_append_u32(buf, handle); - auth->session += 4; - - if (mso == TPM2_MSO_PERSISTENT || - mso == TPM2_MSO_VOLATILE || - mso == TPM2_MSO_NVRAM) { - if (!name) - tpm2_read_public(chip, handle, auth->name[slot]); - } else { - if (name) - dev_err(&chip->dev, "TPM: Handle does not require name but one is specified\n"); - } - - auth->name_h[slot] = handle; - if (name) - memcpy(auth->name[slot], name, name_size(name)); -} -EXPORT_SYMBOL(tpm_buf_append_name); - /** * tpm_buf_check_hmac_response() - check the TPM return HMAC for correctness * @chip: the TPM chip structure @@ -705,6 +752,9 @@ int tpm_buf_check_hmac_response(struct tpm_chip *chip, struct tpm_buf *buf, u32 cc = be32_to_cpu(auth->ordinal); int parm_len, len, i, handles; + if (!auth) + return rc; + if (auth->session >= TPM_HEADER_SIZE) { WARN(1, "tpm session not filled correctly\n"); goto out; @@ -824,8 +874,13 @@ EXPORT_SYMBOL(tpm_buf_check_hmac_response); */ void tpm2_end_auth_session(struct tpm_chip *chip) { - tpm2_flush_context(chip, chip->auth->handle); - memzero_explicit(chip->auth, sizeof(*chip->auth)); + struct tpm2_auth *auth = chip->auth; + + if (!auth) + return; + + tpm2_flush_context(chip, auth->handle); + memzero_explicit(auth, sizeof(*auth)); } EXPORT_SYMBOL(tpm2_end_auth_session); @@ -907,6 +962,11 @@ int tpm2_start_auth_session(struct tpm_chip *chip) int rc; u32 null_key; + if (!auth) { + dev_warn_once(&chip->dev, "auth session is not active\n"); + return 0; + } + rc = tpm2_load_null(chip, &null_key); if (rc) goto out; @@ -1301,3 +1361,4 @@ int tpm2_sessions_init(struct tpm_chip *chip) return rc; } +#endif /* CONFIG_TCG_TPM2_HMAC */ diff --git a/drivers/clk/mediatek/clk-mt8183-mfgcfg.c b/drivers/clk/mediatek/clk-mt8183-mfgcfg.c index ba504e19d420..62d876e150e1 100644 --- a/drivers/clk/mediatek/clk-mt8183-mfgcfg.c +++ b/drivers/clk/mediatek/clk-mt8183-mfgcfg.c @@ -29,6 +29,7 @@ static const struct mtk_gate mfg_clks[] = { static const struct mtk_clk_desc mfg_desc = { .clks = mfg_clks, .num_clks = ARRAY_SIZE(mfg_clks), + .need_runtime_pm = true, }; static const struct of_device_id of_match_clk_mt8183_mfg[] = { diff --git a/drivers/clk/mediatek/clk-mtk.c b/drivers/clk/mediatek/clk-mtk.c index bd37ab4d1a9b..ba1d1c495bc2 100644 --- a/drivers/clk/mediatek/clk-mtk.c +++ b/drivers/clk/mediatek/clk-mtk.c @@ -496,14 +496,16 @@ static int __mtk_clk_simple_probe(struct platform_device *pdev, } - devm_pm_runtime_enable(&pdev->dev); - /* - * Do a pm_runtime_resume_and_get() to workaround a possible - * deadlock between clk_register() and the genpd framework. - */ - r = pm_runtime_resume_and_get(&pdev->dev); - if (r) - return r; + if (mcd->need_runtime_pm) { + devm_pm_runtime_enable(&pdev->dev); + /* + * Do a pm_runtime_resume_and_get() to workaround a possible + * deadlock between clk_register() and the genpd framework. + */ + r = pm_runtime_resume_and_get(&pdev->dev); + if (r) + return r; + } /* Calculate how many clk_hw_onecell_data entries to allocate */ num_clks = mcd->num_clks + mcd->num_composite_clks; @@ -585,7 +587,8 @@ static int __mtk_clk_simple_probe(struct platform_device *pdev, goto unregister_clks; } - pm_runtime_put(&pdev->dev); + if (mcd->need_runtime_pm) + pm_runtime_put(&pdev->dev); return r; @@ -618,7 +621,8 @@ free_base: if (mcd->shared_io && base) iounmap(base); - pm_runtime_put(&pdev->dev); + if (mcd->need_runtime_pm) + pm_runtime_put(&pdev->dev); return r; } diff --git a/drivers/clk/mediatek/clk-mtk.h b/drivers/clk/mediatek/clk-mtk.h index 22096501a60a..c17fe1c2d732 100644 --- a/drivers/clk/mediatek/clk-mtk.h +++ b/drivers/clk/mediatek/clk-mtk.h @@ -237,6 +237,8 @@ struct mtk_clk_desc { int (*clk_notifier_func)(struct device *dev, struct clk *clk); unsigned int mfg_clk_idx; + + bool need_runtime_pm; }; int mtk_clk_pdev_probe(struct platform_device *pdev); diff --git a/drivers/clk/qcom/apss-ipq-pll.c b/drivers/clk/qcom/apss-ipq-pll.c index 5f7f537e4ecb..e8632db2c542 100644 --- a/drivers/clk/qcom/apss-ipq-pll.c +++ b/drivers/clk/qcom/apss-ipq-pll.c @@ -70,7 +70,6 @@ static struct clk_alpha_pll ipq_pll_stromer_plus = { static const struct alpha_pll_config ipq5018_pll_config = { .l = 0x2a, .config_ctl_val = 0x4001075b, - .config_ctl_hi_val = 0x304, .main_output_mask = BIT(0), .aux_output_mask = BIT(1), .early_output_mask = BIT(3), @@ -84,7 +83,6 @@ static const struct alpha_pll_config ipq5018_pll_config = { static const struct alpha_pll_config ipq5332_pll_config = { .l = 0x2d, .config_ctl_val = 0x4001075b, - .config_ctl_hi_val = 0x304, .main_output_mask = BIT(0), .aux_output_mask = BIT(1), .early_output_mask = BIT(3), diff --git a/drivers/clk/qcom/clk-alpha-pll.c b/drivers/clk/qcom/clk-alpha-pll.c index d4227909d1fe..c51647e37df8 100644 --- a/drivers/clk/qcom/clk-alpha-pll.c +++ b/drivers/clk/qcom/clk-alpha-pll.c @@ -2574,6 +2574,9 @@ static int clk_alpha_pll_stromer_plus_set_rate(struct clk_hw *hw, regmap_write(pll->clkr.regmap, PLL_ALPHA_VAL_U(pll), a >> ALPHA_BITWIDTH); + regmap_update_bits(pll->clkr.regmap, PLL_USER_CTL(pll), + PLL_ALPHA_EN, PLL_ALPHA_EN); + regmap_write(pll->clkr.regmap, PLL_MODE(pll), PLL_BYPASSNL); /* Wait five micro seconds or more */ diff --git a/drivers/clk/qcom/gcc-ipq9574.c b/drivers/clk/qcom/gcc-ipq9574.c index 0a3f846695b8..f8b9a1e93bef 100644 --- a/drivers/clk/qcom/gcc-ipq9574.c +++ b/drivers/clk/qcom/gcc-ipq9574.c @@ -2140,9 +2140,10 @@ static struct clk_rcg2 pcnoc_bfdcd_clk_src = { static struct clk_branch gcc_crypto_axi_clk = { .halt_reg = 0x16010, + .halt_check = BRANCH_HALT_VOTED, .clkr = { - .enable_reg = 0x16010, - .enable_mask = BIT(0), + .enable_reg = 0xb004, + .enable_mask = BIT(15), .hw.init = &(const struct clk_init_data) { .name = "gcc_crypto_axi_clk", .parent_hws = (const struct clk_hw *[]) { @@ -2156,9 +2157,10 @@ static struct clk_branch gcc_crypto_axi_clk = { static struct clk_branch gcc_crypto_ahb_clk = { .halt_reg = 0x16014, + .halt_check = BRANCH_HALT_VOTED, .clkr = { - .enable_reg = 0x16014, - .enable_mask = BIT(0), + .enable_reg = 0xb004, + .enable_mask = BIT(16), .hw.init = &(const struct clk_init_data) { .name = "gcc_crypto_ahb_clk", .parent_hws = (const struct clk_hw *[]) { diff --git a/drivers/clk/qcom/gcc-sm6350.c b/drivers/clk/qcom/gcc-sm6350.c index cf4a7b6e0b23..0559a33faf00 100644 --- a/drivers/clk/qcom/gcc-sm6350.c +++ b/drivers/clk/qcom/gcc-sm6350.c @@ -100,8 +100,8 @@ static struct clk_alpha_pll gpll6 = { .enable_mask = BIT(6), .hw.init = &(struct clk_init_data){ .name = "gpll6", - .parent_hws = (const struct clk_hw*[]){ - &gpll0.clkr.hw, + .parent_data = &(const struct clk_parent_data){ + .fw_name = "bi_tcxo", }, .num_parents = 1, .ops = &clk_alpha_pll_fixed_fabia_ops, @@ -124,7 +124,7 @@ static struct clk_alpha_pll_postdiv gpll6_out_even = { .clkr.hw.init = &(struct clk_init_data){ .name = "gpll6_out_even", .parent_hws = (const struct clk_hw*[]){ - &gpll0.clkr.hw, + &gpll6.clkr.hw, }, .num_parents = 1, .ops = &clk_alpha_pll_postdiv_fabia_ops, @@ -139,8 +139,8 @@ static struct clk_alpha_pll gpll7 = { .enable_mask = BIT(7), .hw.init = &(struct clk_init_data){ .name = "gpll7", - .parent_hws = (const struct clk_hw*[]){ - &gpll0.clkr.hw, + .parent_data = &(const struct clk_parent_data){ + .fw_name = "bi_tcxo", }, .num_parents = 1, .ops = &clk_alpha_pll_fixed_fabia_ops, diff --git a/drivers/clk/sunxi-ng/ccu_common.c b/drivers/clk/sunxi-ng/ccu_common.c index ac0091b4ce24..be375ce0149c 100644 --- a/drivers/clk/sunxi-ng/ccu_common.c +++ b/drivers/clk/sunxi-ng/ccu_common.c @@ -132,7 +132,6 @@ static int sunxi_ccu_probe(struct sunxi_ccu *ccu, struct device *dev, for (i = 0; i < desc->hw_clks->num ; i++) { struct clk_hw *hw = desc->hw_clks->hws[i]; - struct ccu_common *common = hw_to_ccu_common(hw); const char *name; if (!hw) @@ -147,14 +146,21 @@ static int sunxi_ccu_probe(struct sunxi_ccu *ccu, struct device *dev, pr_err("Couldn't register clock %d - %s\n", i, name); goto err_clk_unreg; } + } + + for (i = 0; i < desc->num_ccu_clks; i++) { + struct ccu_common *cclk = desc->ccu_clks[i]; + + if (!cclk) + continue; - if (common->max_rate) - clk_hw_set_rate_range(hw, common->min_rate, - common->max_rate); + if (cclk->max_rate) + clk_hw_set_rate_range(&cclk->hw, cclk->min_rate, + cclk->max_rate); else - WARN(common->min_rate, + WARN(cclk->min_rate, "No max_rate, ignoring min_rate of clock %d - %s\n", - i, name); + i, clk_hw_get_name(&cclk->hw)); } ret = of_clk_add_hw_provider(node, of_clk_hw_onecell_get, diff --git a/drivers/cpufreq/acpi-cpufreq.c b/drivers/cpufreq/acpi-cpufreq.c index 37f1cdf46d29..4ac3a35dcd98 100644 --- a/drivers/cpufreq/acpi-cpufreq.c +++ b/drivers/cpufreq/acpi-cpufreq.c @@ -890,8 +890,10 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy) if (perf->states[0].core_frequency * 1000 != freq_table[0].frequency) pr_warn(FW_WARN "P-state 0 is not max freq\n"); - if (acpi_cpufreq_driver.set_boost) + if (acpi_cpufreq_driver.set_boost) { set_boost(policy, acpi_cpufreq_driver.boost_enabled); + policy->boost_enabled = acpi_cpufreq_driver.boost_enabled; + } return result; diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index a45aac17c20f..9e5060b27864 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -1431,7 +1431,8 @@ static int cpufreq_online(unsigned int cpu) } /* Let the per-policy boost flag mirror the cpufreq_driver boost during init */ - policy->boost_enabled = cpufreq_boost_enabled() && policy_has_boost_freq(policy); + if (cpufreq_boost_enabled() && policy_has_boost_freq(policy)) + policy->boost_enabled = true; /* * The initialization has succeeded and the policy is online. diff --git a/drivers/firmware/sysfb.c b/drivers/firmware/sysfb.c index 880ffcb50088..921f61507ae8 100644 --- a/drivers/firmware/sysfb.c +++ b/drivers/firmware/sysfb.c @@ -101,8 +101,10 @@ static __init struct device *sysfb_parent_dev(const struct screen_info *si) if (IS_ERR(pdev)) { return ERR_CAST(pdev); } else if (pdev) { - if (!sysfb_pci_dev_is_enabled(pdev)) + if (!sysfb_pci_dev_is_enabled(pdev)) { + pci_dev_put(pdev); return ERR_PTR(-ENODEV); + } return &pdev->dev; } @@ -137,7 +139,7 @@ static __init int sysfb_init(void) if (compatible) { pd = sysfb_create_simplefb(si, &mode, parent); if (!IS_ERR(pd)) - goto unlock_mutex; + goto put_device; } /* if the FB is incompatible, create a legacy framebuffer device */ @@ -155,7 +157,7 @@ static __init int sysfb_init(void) pd = platform_device_alloc(name, 0); if (!pd) { ret = -ENOMEM; - goto unlock_mutex; + goto put_device; } pd->dev.parent = parent; @@ -170,9 +172,11 @@ static __init int sysfb_init(void) if (ret) goto err; - goto unlock_mutex; + goto put_device; err: platform_device_put(pd); +put_device: + put_device(parent); unlock_mutex: mutex_unlock(&disable_lock); return ret; diff --git a/drivers/gpio/gpio-mmio.c b/drivers/gpio/gpio-mmio.c index 71e1af7c2184..d89e78f0ead3 100644 --- a/drivers/gpio/gpio-mmio.c +++ b/drivers/gpio/gpio-mmio.c @@ -619,8 +619,6 @@ int bgpio_init(struct gpio_chip *gc, struct device *dev, ret = gpiochip_get_ngpios(gc, dev); if (ret) gc->ngpio = gc->bgpio_bits; - else - gc->bgpio_bits = roundup_pow_of_two(round_up(gc->ngpio, 8)); ret = bgpio_setup_io(gc, dat, set, clr, flags); if (ret) diff --git a/drivers/gpio/gpiolib-of.c b/drivers/gpio/gpiolib-of.c index d75f6ee37028..89d5e64cf68b 100644 --- a/drivers/gpio/gpiolib-of.c +++ b/drivers/gpio/gpiolib-of.c @@ -203,6 +203,24 @@ static void of_gpio_try_fixup_polarity(const struct device_node *np, */ { "qi,lb60", "rb-gpios", true }, #endif +#if IS_ENABLED(CONFIG_PCI_LANTIQ) + /* + * According to the PCI specification, the RST# pin is an + * active-low signal. However, most of the device trees that + * have been widely used for a long time incorrectly describe + * reset GPIO as active-high, and were also using wrong name + * for the property. + */ + { "lantiq,pci-xway", "gpio-reset", false }, +#endif +#if IS_ENABLED(CONFIG_TOUCHSCREEN_TSC2005) + /* + * DTS for Nokia N900 incorrectly specified "active high" + * polarity for the reset line, while the chip actually + * treats it as "active low". + */ + { "ti,tsc2005", "reset-gpios", false }, +#endif }; unsigned int i; @@ -504,9 +522,9 @@ static struct gpio_desc *of_find_gpio_rename(struct device_node *np, { "reset", "reset-n-io", "marvell,nfc-uart" }, { "reset", "reset-n-io", "mrvl,nfc-uart" }, #endif -#if !IS_ENABLED(CONFIG_PCI_LANTIQ) +#if IS_ENABLED(CONFIG_PCI_LANTIQ) /* MIPS Lantiq PCI */ - { "reset", "gpios-reset", "lantiq,pci-xway" }, + { "reset", "gpio-reset", "lantiq,pci-xway" }, #endif /* diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index e9ac20bed0f2..3cdcadd41be1 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -10048,6 +10048,7 @@ skip_modeset: } /* Update Freesync settings. */ + reset_freesync_config_for_crtc(dm_new_crtc_state); get_freesync_config_for_crtc(dm_new_crtc_state, dm_new_conn_state); @@ -11181,6 +11182,49 @@ static bool parse_edid_cea(struct amdgpu_dm_connector *aconnector, return ret; } +static void parse_edid_displayid_vrr(struct drm_connector *connector, + struct edid *edid) +{ + u8 *edid_ext = NULL; + int i; + int j = 0; + u16 min_vfreq; + u16 max_vfreq; + + if (edid == NULL || edid->extensions == 0) + return; + + /* Find DisplayID extension */ + for (i = 0; i < edid->extensions; i++) { + edid_ext = (void *)(edid + (i + 1)); + if (edid_ext[0] == DISPLAYID_EXT) + break; + } + + if (edid_ext == NULL) + return; + + while (j < EDID_LENGTH) { + /* Get dynamic video timing range from DisplayID if available */ + if (EDID_LENGTH - j > 13 && edid_ext[j] == 0x25 && + (edid_ext[j+1] & 0xFE) == 0 && (edid_ext[j+2] == 9)) { + min_vfreq = edid_ext[j+9]; + if (edid_ext[j+1] & 7) + max_vfreq = edid_ext[j+10] + ((edid_ext[j+11] & 3) << 8); + else + max_vfreq = edid_ext[j+10]; + + if (max_vfreq && min_vfreq) { + connector->display_info.monitor_range.max_vfreq = max_vfreq; + connector->display_info.monitor_range.min_vfreq = min_vfreq; + + return; + } + } + j++; + } +} + static int parse_amd_vsdb(struct amdgpu_dm_connector *aconnector, struct edid *edid, struct amdgpu_hdmi_vsdb_info *vsdb_info) { @@ -11302,6 +11346,11 @@ void amdgpu_dm_update_freesync_caps(struct drm_connector *connector, if (!adev->dm.freesync_module) goto update; + /* Some eDP panels only have the refresh rate range info in DisplayID */ + if ((connector->display_info.monitor_range.min_vfreq == 0 || + connector->display_info.monitor_range.max_vfreq == 0)) + parse_edid_displayid_vrr(connector, edid); + if (edid && (sink->sink_signal == SIGNAL_TYPE_DISPLAY_PORT || sink->sink_signal == SIGNAL_TYPE_EDP)) { bool edid_check_required = false; @@ -11309,9 +11358,11 @@ void amdgpu_dm_update_freesync_caps(struct drm_connector *connector, if (is_dp_capable_without_timing_msa(adev->dm.dc, amdgpu_dm_connector)) { if (edid->features & DRM_EDID_FEATURE_CONTINUOUS_FREQ) { - freesync_capable = true; amdgpu_dm_connector->min_vfreq = connector->display_info.monitor_range.min_vfreq; amdgpu_dm_connector->max_vfreq = connector->display_info.monitor_range.max_vfreq; + if (amdgpu_dm_connector->max_vfreq - + amdgpu_dm_connector->min_vfreq > 10) + freesync_capable = true; } else { edid_check_required = edid->version > 1 || (edid->version == 1 && diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c index 6c84b0fa40f4..0782a34689a0 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c @@ -3364,6 +3364,9 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l &mode_lib->vba.UrgentBurstFactorLumaPre[k], &mode_lib->vba.UrgentBurstFactorChromaPre[k], &mode_lib->vba.NotUrgentLatencyHidingPre[k]); + + v->cursor_bw_pre[k] = mode_lib->vba.NumberOfCursors[k] * mode_lib->vba.CursorWidth[k][0] * mode_lib->vba.CursorBPP[k][0] / + 8.0 / (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k]) * v->VRatioPreY[i][j][k]; } { diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c b/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c index a41812598ce8..8ecc972dbffd 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c @@ -234,6 +234,7 @@ void dml2_init_socbb_params(struct dml2_context *dml2, const struct dc *in_dc, s out->round_trip_ping_latency_dcfclk_cycles = 106; out->smn_latency_us = 2; out->dispclk_dppclk_vco_speed_mhz = 3600; + out->pct_ideal_dram_bw_after_urgent_pixel_only = 65.0; break; } diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_utils.c b/drivers/gpu/drm/amd/display/dc/dml2/dml2_utils.c index 0f8b3336e26d..cbd1c1f26b7a 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_utils.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_utils.c @@ -294,7 +294,7 @@ void dml2_calculate_rq_and_dlg_params(const struct dc *dc, struct dc_state *cont context->bw_ctx.bw.dcn.clk.dcfclk_deep_sleep_khz = (unsigned int)in_ctx->v20.dml_core_ctx.mp.DCFCLKDeepSleep * 1000; context->bw_ctx.bw.dcn.clk.dppclk_khz = 0; - if (in_ctx->v20.dml_core_ctx.ms.support.FCLKChangeSupport[in_ctx->v20.scratch.mode_support_params.out_lowest_state_idx] == dml_fclock_change_unsupported) + if (in_ctx->v20.dml_core_ctx.ms.support.FCLKChangeSupport[0] == dml_fclock_change_unsupported) context->bw_ctx.bw.dcn.clk.fclk_p_state_change_support = false; else context->bw_ctx.bw.dcn.clk.fclk_p_state_change_support = true; diff --git a/drivers/gpu/drm/amd/include/atomfirmware.h b/drivers/gpu/drm/amd/include/atomfirmware.h index 571691837200..09cbc3afd6d8 100644 --- a/drivers/gpu/drm/amd/include/atomfirmware.h +++ b/drivers/gpu/drm/amd/include/atomfirmware.h @@ -734,7 +734,7 @@ struct atom_gpio_pin_lut_v2_1 { struct atom_common_table_header table_header; /*the real number of this included in the structure is calcualted by using the (whole structure size - the header size)/size of atom_gpio_pin_lut */ - struct atom_gpio_pin_assignment gpio_pin[8]; + struct atom_gpio_pin_assignment gpio_pin[]; }; diff --git a/drivers/gpu/drm/drm_fbdev_generic.c b/drivers/gpu/drm/drm_fbdev_generic.c index 97e579c33d84..1e200d815e1a 100644 --- a/drivers/gpu/drm/drm_fbdev_generic.c +++ b/drivers/gpu/drm/drm_fbdev_generic.c @@ -84,7 +84,8 @@ static int drm_fbdev_generic_helper_fb_probe(struct drm_fb_helper *fb_helper, sizes->surface_width, sizes->surface_height, sizes->surface_bpp); - format = drm_mode_legacy_fb_format(sizes->surface_bpp, sizes->surface_depth); + format = drm_driver_legacy_fb_format(dev, sizes->surface_bpp, + sizes->surface_depth); buffer = drm_client_framebuffer_create(client, sizes->surface_width, sizes->surface_height, format); if (IS_ERR(buffer)) diff --git a/drivers/gpu/drm/drm_panel_orientation_quirks.c b/drivers/gpu/drm/drm_panel_orientation_quirks.c index 2166208a961d..3860a8ce1e2d 100644 --- a/drivers/gpu/drm/drm_panel_orientation_quirks.c +++ b/drivers/gpu/drm/drm_panel_orientation_quirks.c @@ -420,13 +420,20 @@ static const struct dmi_system_id orientation_data[] = { DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "Galaxy Book 10.6"), }, .driver_data = (void *)&lcd1280x1920_rightside_up, - }, { /* Valve Steam Deck */ + }, { /* Valve Steam Deck (Jupiter) */ .matches = { DMI_EXACT_MATCH(DMI_SYS_VENDOR, "Valve"), DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "Jupiter"), DMI_EXACT_MATCH(DMI_PRODUCT_VERSION, "1"), }, .driver_data = (void *)&lcd800x1280_rightside_up, + }, { /* Valve Steam Deck (Galileo) */ + .matches = { + DMI_EXACT_MATCH(DMI_SYS_VENDOR, "Valve"), + DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "Galileo"), + DMI_EXACT_MATCH(DMI_PRODUCT_VERSION, "1"), + }, + .driver_data = (void *)&lcd800x1280_rightside_up, }, { /* VIOS LTH17 */ .matches = { DMI_EXACT_MATCH(DMI_SYS_VENDOR, "VIOS"), diff --git a/drivers/gpu/drm/i915/display/intel_ddi.c b/drivers/gpu/drm/i915/display/intel_ddi.c index 3c3fc53376ce..6bff169fa8d4 100644 --- a/drivers/gpu/drm/i915/display/intel_ddi.c +++ b/drivers/gpu/drm/i915/display/intel_ddi.c @@ -2088,6 +2088,9 @@ icl_program_mg_dp_mode(struct intel_digital_port *dig_port, u32 ln0, ln1, pin_assignment; u8 width; + if (DISPLAY_VER(dev_priv) >= 14) + return; + if (!intel_encoder_is_tc(&dig_port->base) || intel_tc_port_in_tbt_alt_mode(dig_port)) return; diff --git a/drivers/gpu/drm/nouveau/nouveau_connector.c b/drivers/gpu/drm/nouveau/nouveau_connector.c index 856b3ef5edb8..0c71d761d378 100644 --- a/drivers/gpu/drm/nouveau/nouveau_connector.c +++ b/drivers/gpu/drm/nouveau/nouveau_connector.c @@ -1001,6 +1001,9 @@ nouveau_connector_get_modes(struct drm_connector *connector) struct drm_display_mode *mode; mode = drm_mode_duplicate(dev, nv_connector->native_mode); + if (!mode) + return 0; + drm_mode_probed_add(connector, mode); ret = 1; } diff --git a/drivers/gpu/drm/panthor/panthor_drv.c b/drivers/gpu/drm/panthor/panthor_drv.c index b8a84f26b3ef..b5e7b919f241 100644 --- a/drivers/gpu/drm/panthor/panthor_drv.c +++ b/drivers/gpu/drm/panthor/panthor_drv.c @@ -86,15 +86,15 @@ panthor_get_uobj_array(const struct drm_panthor_obj_array *in, u32 min_stride, int ret = 0; void *out_alloc; + if (!in->count) + return NULL; + /* User stride must be at least the minimum object size, otherwise it might * lack useful information. */ if (in->stride < min_stride) return ERR_PTR(-EINVAL); - if (!in->count) - return NULL; - out_alloc = kvmalloc_array(in->count, obj_size, GFP_KERNEL); if (!out_alloc) return ERR_PTR(-ENOMEM); diff --git a/drivers/gpu/drm/panthor/panthor_sched.c b/drivers/gpu/drm/panthor/panthor_sched.c index 79ffcbc41d78..9a0ff48f7061 100644 --- a/drivers/gpu/drm/panthor/panthor_sched.c +++ b/drivers/gpu/drm/panthor/panthor_sched.c @@ -459,6 +459,16 @@ struct panthor_queue { atomic64_t seqno; /** + * @last_fence: Fence of the last submitted job. + * + * We return this fence when we get an empty command stream. + * This way, we are guaranteed that all earlier jobs have completed + * when drm_sched_job::s_fence::finished without having to feed + * the CS ring buffer with a dummy job that only signals the fence. + */ + struct dma_fence *last_fence; + + /** * @in_flight_jobs: List containing all in-flight jobs. * * Used to keep track and signal panthor_job::done_fence when the @@ -829,6 +839,9 @@ static void group_free_queue(struct panthor_group *group, struct panthor_queue * panthor_kernel_bo_destroy(queue->ringbuf); panthor_kernel_bo_destroy(queue->iface.mem); + /* Release the last_fence we were holding, if any. */ + dma_fence_put(queue->fence_ctx.last_fence); + kfree(queue); } @@ -2784,9 +2797,6 @@ static void group_sync_upd_work(struct work_struct *work) spin_lock(&queue->fence_ctx.lock); list_for_each_entry_safe(job, job_tmp, &queue->fence_ctx.in_flight_jobs, node) { - if (!job->call_info.size) - continue; - if (syncobj->seqno < job->done_fence->seqno) break; @@ -2865,11 +2875,14 @@ queue_run_job(struct drm_sched_job *sched_job) static_assert(sizeof(call_instrs) % 64 == 0, "call_instrs is not aligned on a cacheline"); - /* Stream size is zero, nothing to do => return a NULL fence and let - * drm_sched signal the parent. + /* Stream size is zero, nothing to do except making sure all previously + * submitted jobs are done before we signal the + * drm_sched_job::s_fence::finished fence. */ - if (!job->call_info.size) - return NULL; + if (!job->call_info.size) { + job->done_fence = dma_fence_get(queue->fence_ctx.last_fence); + return dma_fence_get(job->done_fence); + } ret = pm_runtime_resume_and_get(ptdev->base.dev); if (drm_WARN_ON(&ptdev->base, ret)) @@ -2928,6 +2941,10 @@ queue_run_job(struct drm_sched_job *sched_job) } } + /* Update the last fence. */ + dma_fence_put(queue->fence_ctx.last_fence); + queue->fence_ctx.last_fence = dma_fence_get(job->done_fence); + done_fence = dma_fence_get(job->done_fence); out_unlock: @@ -3378,10 +3395,15 @@ panthor_job_create(struct panthor_file *pfile, goto err_put_job; } - job->done_fence = kzalloc(sizeof(*job->done_fence), GFP_KERNEL); - if (!job->done_fence) { - ret = -ENOMEM; - goto err_put_job; + /* Empty command streams don't need a fence, they'll pick the one from + * the previously submitted job. + */ + if (job->call_info.size) { + job->done_fence = kzalloc(sizeof(*job->done_fence), GFP_KERNEL); + if (!job->done_fence) { + ret = -ENOMEM; + goto err_put_job; + } } ret = drm_sched_job_init(&job->base, diff --git a/drivers/gpu/drm/radeon/radeon_gem.c b/drivers/gpu/drm/radeon/radeon_gem.c index 2ef201a072f1..e66a230331ee 100644 --- a/drivers/gpu/drm/radeon/radeon_gem.c +++ b/drivers/gpu/drm/radeon/radeon_gem.c @@ -642,7 +642,7 @@ static void radeon_gem_va_update_vm(struct radeon_device *rdev, if (r) goto error_unlock; - if (bo_va->it.start) + if (bo_va->it.start && bo_va->bo) r = radeon_vm_bo_update(rdev, bo_va, bo_va->bo->tbo.resource); error_unlock: diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index 6396dece0db1..2427be8bc97f 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -346,6 +346,7 @@ static void ttm_bo_release(struct kref *kref) if (!dma_resv_test_signaled(bo->base.resv, DMA_RESV_USAGE_BOOKKEEP) || (want_init_on_free() && (bo->ttm != NULL)) || + bo->type == ttm_bo_type_sg || !dma_resv_trylock(bo->base.resv)) { /* The BO is not idle, resurrect it for delayed destroy */ ttm_bo_flush_all_fences(bo); diff --git a/drivers/gpu/drm/xe/xe_gt_mcr.c b/drivers/gpu/drm/xe/xe_gt_mcr.c index 577bd7043740..0443e07880a0 100644 --- a/drivers/gpu/drm/xe/xe_gt_mcr.c +++ b/drivers/gpu/drm/xe/xe_gt_mcr.c @@ -342,7 +342,7 @@ static void init_steering_oaddrm(struct xe_gt *gt) else gt->steering[OADDRM].group_target = 1; - gt->steering[DSS].instance_target = 0; /* unused */ + gt->steering[OADDRM].instance_target = 0; /* unused */ } static void init_steering_sqidi_psmi(struct xe_gt *gt) @@ -357,8 +357,8 @@ static void init_steering_sqidi_psmi(struct xe_gt *gt) static void init_steering_inst0(struct xe_gt *gt) { - gt->steering[DSS].group_target = 0; /* unused */ - gt->steering[DSS].instance_target = 0; /* unused */ + gt->steering[INSTANCE0].group_target = 0; /* unused */ + gt->steering[INSTANCE0].instance_target = 0; /* unused */ } static const struct { diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c index 65e5a3f4c340..198f5c2189cb 100644 --- a/drivers/gpu/drm/xe/xe_migrate.c +++ b/drivers/gpu/drm/xe/xe_migrate.c @@ -1334,7 +1334,7 @@ xe_migrate_update_pgtables(struct xe_migrate *m, GFP_KERNEL, true, 0); if (IS_ERR(sa_bo)) { err = PTR_ERR(sa_bo); - goto err; + goto err_bb; } ppgtt_ofs = NUM_KERNEL_PDE + @@ -1385,7 +1385,7 @@ xe_migrate_update_pgtables(struct xe_migrate *m, update_idx); if (IS_ERR(job)) { err = PTR_ERR(job); - goto err_bb; + goto err_sa; } /* Wait on BO move */ @@ -1434,12 +1434,12 @@ xe_migrate_update_pgtables(struct xe_migrate *m, err_job: xe_sched_job_put(job); +err_sa: + drm_suballoc_free(sa_bo, NULL); err_bb: if (!q) mutex_unlock(&m->job_mutex); xe_bb_free(bb, NULL); -err: - drm_suballoc_free(sa_bo, NULL); return ERR_PTR(err); } diff --git a/drivers/i2c/busses/i2c-pnx.c b/drivers/i2c/busses/i2c-pnx.c index a12525b3186b..f448505d5468 100644 --- a/drivers/i2c/busses/i2c-pnx.c +++ b/drivers/i2c/busses/i2c-pnx.c @@ -15,7 +15,6 @@ #include <linux/ioport.h> #include <linux/delay.h> #include <linux/i2c.h> -#include <linux/timer.h> #include <linux/completion.h> #include <linux/platform_device.h> #include <linux/io.h> @@ -32,7 +31,6 @@ struct i2c_pnx_mif { int ret; /* Return value */ int mode; /* Interface mode */ struct completion complete; /* I/O completion */ - struct timer_list timer; /* Timeout */ u8 * buf; /* Data buffer */ int len; /* Length of data buffer */ int order; /* RX Bytes to order via TX */ @@ -117,24 +115,6 @@ static inline int wait_reset(struct i2c_pnx_algo_data *data) return (timeout <= 0); } -static inline void i2c_pnx_arm_timer(struct i2c_pnx_algo_data *alg_data) -{ - struct timer_list *timer = &alg_data->mif.timer; - unsigned long expires = msecs_to_jiffies(alg_data->timeout); - - if (expires <= 1) - expires = 2; - - del_timer_sync(timer); - - dev_dbg(&alg_data->adapter.dev, "Timer armed at %lu plus %lu jiffies.\n", - jiffies, expires); - - timer->expires = jiffies + expires; - - add_timer(timer); -} - /** * i2c_pnx_start - start a device * @slave_addr: slave address @@ -259,8 +239,6 @@ static int i2c_pnx_master_xmit(struct i2c_pnx_algo_data *alg_data) ~(mcntrl_afie | mcntrl_naie | mcntrl_drmie), I2C_REG_CTL(alg_data)); - del_timer_sync(&alg_data->mif.timer); - dev_dbg(&alg_data->adapter.dev, "%s(): Waking up xfer routine.\n", __func__); @@ -276,8 +254,6 @@ static int i2c_pnx_master_xmit(struct i2c_pnx_algo_data *alg_data) ~(mcntrl_afie | mcntrl_naie | mcntrl_drmie), I2C_REG_CTL(alg_data)); - /* Stop timer. */ - del_timer_sync(&alg_data->mif.timer); dev_dbg(&alg_data->adapter.dev, "%s(): Waking up xfer routine after zero-xfer.\n", __func__); @@ -364,8 +340,6 @@ static int i2c_pnx_master_rcv(struct i2c_pnx_algo_data *alg_data) mcntrl_drmie | mcntrl_daie); iowrite32(ctl, I2C_REG_CTL(alg_data)); - /* Kill timer. */ - del_timer_sync(&alg_data->mif.timer); complete(&alg_data->mif.complete); } } @@ -400,8 +374,6 @@ static irqreturn_t i2c_pnx_interrupt(int irq, void *dev_id) mcntrl_drmie); iowrite32(ctl, I2C_REG_CTL(alg_data)); - /* Stop timer, to prevent timeout. */ - del_timer_sync(&alg_data->mif.timer); complete(&alg_data->mif.complete); } else if (stat & mstatus_nai) { /* Slave did not acknowledge, generate a STOP */ @@ -419,8 +391,6 @@ static irqreturn_t i2c_pnx_interrupt(int irq, void *dev_id) /* Our return value. */ alg_data->mif.ret = -EIO; - /* Stop timer, to prevent timeout. */ - del_timer_sync(&alg_data->mif.timer); complete(&alg_data->mif.complete); } else { /* @@ -453,9 +423,8 @@ static irqreturn_t i2c_pnx_interrupt(int irq, void *dev_id) return IRQ_HANDLED; } -static void i2c_pnx_timeout(struct timer_list *t) +static void i2c_pnx_timeout(struct i2c_pnx_algo_data *alg_data) { - struct i2c_pnx_algo_data *alg_data = from_timer(alg_data, t, mif.timer); u32 ctl; dev_err(&alg_data->adapter.dev, @@ -472,7 +441,6 @@ static void i2c_pnx_timeout(struct timer_list *t) iowrite32(ctl, I2C_REG_CTL(alg_data)); wait_reset(alg_data); alg_data->mif.ret = -EIO; - complete(&alg_data->mif.complete); } static inline void bus_reset_if_active(struct i2c_pnx_algo_data *alg_data) @@ -514,6 +482,7 @@ i2c_pnx_xfer(struct i2c_adapter *adap, struct i2c_msg *msgs, int num) struct i2c_msg *pmsg; int rc = 0, completed = 0, i; struct i2c_pnx_algo_data *alg_data = adap->algo_data; + unsigned long time_left; u32 stat; dev_dbg(&alg_data->adapter.dev, @@ -548,7 +517,6 @@ i2c_pnx_xfer(struct i2c_adapter *adap, struct i2c_msg *msgs, int num) dev_dbg(&alg_data->adapter.dev, "%s(): mode %d, %d bytes\n", __func__, alg_data->mif.mode, alg_data->mif.len); - i2c_pnx_arm_timer(alg_data); /* initialize the completion var */ init_completion(&alg_data->mif.complete); @@ -564,7 +532,10 @@ i2c_pnx_xfer(struct i2c_adapter *adap, struct i2c_msg *msgs, int num) break; /* Wait for completion */ - wait_for_completion(&alg_data->mif.complete); + time_left = wait_for_completion_timeout(&alg_data->mif.complete, + alg_data->timeout); + if (time_left == 0) + i2c_pnx_timeout(alg_data); if (!(rc = alg_data->mif.ret)) completed++; @@ -653,7 +624,10 @@ static int i2c_pnx_probe(struct platform_device *pdev) alg_data->adapter.algo_data = alg_data; alg_data->adapter.nr = pdev->id; - alg_data->timeout = I2C_PNX_TIMEOUT_DEFAULT; + alg_data->timeout = msecs_to_jiffies(I2C_PNX_TIMEOUT_DEFAULT); + if (alg_data->timeout <= 1) + alg_data->timeout = 2; + #ifdef CONFIG_OF alg_data->adapter.dev.of_node = of_node_get(pdev->dev.of_node); if (pdev->dev.of_node) { @@ -673,8 +647,6 @@ static int i2c_pnx_probe(struct platform_device *pdev) if (IS_ERR(alg_data->clk)) return PTR_ERR(alg_data->clk); - timer_setup(&alg_data->mif.timer, i2c_pnx_timeout, 0); - snprintf(alg_data->adapter.name, sizeof(alg_data->adapter.name), "%s", pdev->name); diff --git a/drivers/net/dsa/lan9303-core.c b/drivers/net/dsa/lan9303-core.c index 02f07b870f10..268949939636 100644 --- a/drivers/net/dsa/lan9303-core.c +++ b/drivers/net/dsa/lan9303-core.c @@ -1047,31 +1047,31 @@ static int lan9303_get_sset_count(struct dsa_switch *ds, int port, int sset) return ARRAY_SIZE(lan9303_mib); } -static int lan9303_phy_read(struct dsa_switch *ds, int phy, int regnum) +static int lan9303_phy_read(struct dsa_switch *ds, int port, int regnum) { struct lan9303 *chip = ds->priv; int phy_base = chip->phy_addr_base; - if (phy == phy_base) + if (port == 0) return lan9303_virt_phy_reg_read(chip, regnum); - if (phy > phy_base + 2) + if (port > 2) return -ENODEV; - return chip->ops->phy_read(chip, phy, regnum); + return chip->ops->phy_read(chip, phy_base + port, regnum); } -static int lan9303_phy_write(struct dsa_switch *ds, int phy, int regnum, +static int lan9303_phy_write(struct dsa_switch *ds, int port, int regnum, u16 val) { struct lan9303 *chip = ds->priv; int phy_base = chip->phy_addr_base; - if (phy == phy_base) + if (port == 0) return lan9303_virt_phy_reg_write(chip, regnum, val); - if (phy > phy_base + 2) + if (port > 2) return -ENODEV; - return chip->ops->phy_write(chip, phy, regnum, val); + return chip->ops->phy_write(chip, phy_base + port, regnum, val); } static int lan9303_port_enable(struct dsa_switch *ds, int port, @@ -1099,7 +1099,7 @@ static void lan9303_port_disable(struct dsa_switch *ds, int port) vlan_vid_del(dsa_port_to_conduit(dp), htons(ETH_P_8021Q), port); lan9303_disable_processing_port(chip, port); - lan9303_phy_write(ds, chip->phy_addr_base + port, MII_BMCR, BMCR_PDOWN); + lan9303_phy_write(ds, port, MII_BMCR, BMCR_PDOWN); } static int lan9303_port_bridge_join(struct dsa_switch *ds, int port, @@ -1374,8 +1374,6 @@ static const struct dsa_switch_ops lan9303_switch_ops = { static int lan9303_register_switch(struct lan9303 *chip) { - int base; - chip->ds = devm_kzalloc(chip->dev, sizeof(*chip->ds), GFP_KERNEL); if (!chip->ds) return -ENOMEM; @@ -1385,8 +1383,7 @@ static int lan9303_register_switch(struct lan9303 *chip) chip->ds->priv = chip; chip->ds->ops = &lan9303_switch_ops; chip->ds->phylink_mac_ops = &lan9303_phylink_mac_ops; - base = chip->phy_addr_base; - chip->ds->phys_mii_mask = GENMASK(LAN9303_NUM_PORTS - 1 + base, base); + chip->ds->phys_mii_mask = GENMASK(LAN9303_NUM_PORTS - 1, 0); return dsa_register_switch(chip->ds); } diff --git a/drivers/net/ethernet/broadcom/asp2/bcmasp.c b/drivers/net/ethernet/broadcom/asp2/bcmasp.c index a806dadc4196..20c6529ec135 100644 --- a/drivers/net/ethernet/broadcom/asp2/bcmasp.c +++ b/drivers/net/ethernet/broadcom/asp2/bcmasp.c @@ -1380,6 +1380,7 @@ static int bcmasp_probe(struct platform_device *pdev) dev_err(dev, "Cannot create eth interface %d\n", i); bcmasp_remove_intfs(priv); of_node_put(intf_node); + ret = -ENOMEM; goto of_put_exit; } list_add_tail(&intf->list, &priv->intfs); diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 220d05e2f6fa..80fce0aaad66 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -6282,6 +6282,21 @@ static u16 bnxt_get_max_rss_ring(struct bnxt *bp) return max_ring; } +u16 bnxt_get_max_rss_ctx_ring(struct bnxt *bp) +{ + u16 i, tbl_size, max_ring = 0; + struct bnxt_rss_ctx *rss_ctx; + + tbl_size = bnxt_get_rxfh_indir_size(bp->dev); + + list_for_each_entry(rss_ctx, &bp->rss_ctx_list, list) { + for (i = 0; i < tbl_size; i++) + max_ring = max(max_ring, rss_ctx->rss_indir_tbl[i]); + } + + return max_ring; +} + int bnxt_get_nr_rss_ctxs(struct bnxt *bp, int rx_rings) { if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS) { diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h index e46bd11e52b0..3c8826875ceb 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h @@ -2814,6 +2814,7 @@ int bnxt_hwrm_vnic_set_tpa(struct bnxt *bp, struct bnxt_vnic_info *vnic, void bnxt_fill_ipv6_mask(__be32 mask[4]); int bnxt_alloc_rss_indir_tbl(struct bnxt *bp, struct bnxt_rss_ctx *rss_ctx); void bnxt_set_dflt_rss_indir_tbl(struct bnxt *bp, struct bnxt_rss_ctx *rss_ctx); +u16 bnxt_get_max_rss_ctx_ring(struct bnxt *bp); int bnxt_get_nr_rss_ctxs(struct bnxt *bp, int rx_rings); int bnxt_hwrm_vnic_cfg(struct bnxt *bp, struct bnxt_vnic_info *vnic); int bnxt_hwrm_vnic_alloc(struct bnxt *bp, struct bnxt_vnic_info *vnic, diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c index bf157f6cc042..4d53ec7adc61 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c @@ -961,6 +961,12 @@ static int bnxt_set_channels(struct net_device *dev, return rc; } + if (req_rx_rings < bp->rx_nr_rings && + req_rx_rings <= bnxt_get_max_rss_ctx_ring(bp)) { + netdev_warn(dev, "Can't deactivate rings used by RSS contexts\n"); + return -EINVAL; + } + if (bnxt_get_nr_rss_ctxs(bp, req_rx_rings) != bnxt_get_nr_rss_ctxs(bp, bp->rx_nr_rings) && netif_is_rxfh_configured(dev)) { diff --git a/drivers/net/ethernet/intel/e1000e/ich8lan.c b/drivers/net/ethernet/intel/e1000e/ich8lan.c index 2e98a2a0bead..ce227b56cf72 100644 --- a/drivers/net/ethernet/intel/e1000e/ich8lan.c +++ b/drivers/net/ethernet/intel/e1000e/ich8lan.c @@ -1109,6 +1109,46 @@ static s32 e1000_platform_pm_pch_lpt(struct e1000_hw *hw, bool link) } /** + * e1000e_force_smbus - Force interfaces to transition to SMBUS mode. + * @hw: pointer to the HW structure + * + * Force the MAC and the PHY to SMBUS mode. Assumes semaphore already + * acquired. + * + * Return: 0 on success, negative errno on failure. + **/ +static s32 e1000e_force_smbus(struct e1000_hw *hw) +{ + u16 smb_ctrl = 0; + u32 ctrl_ext; + s32 ret_val; + + /* Switching PHY interface always returns MDI error + * so disable retry mechanism to avoid wasting time + */ + e1000e_disable_phy_retry(hw); + + /* Force SMBus mode in the PHY */ + ret_val = e1000_read_phy_reg_hv_locked(hw, CV_SMB_CTRL, &smb_ctrl); + if (ret_val) { + e1000e_enable_phy_retry(hw); + return ret_val; + } + + smb_ctrl |= CV_SMB_CTRL_FORCE_SMBUS; + e1000_write_phy_reg_hv_locked(hw, CV_SMB_CTRL, smb_ctrl); + + e1000e_enable_phy_retry(hw); + + /* Force SMBus mode in the MAC */ + ctrl_ext = er32(CTRL_EXT); + ctrl_ext |= E1000_CTRL_EXT_FORCE_SMBUS; + ew32(CTRL_EXT, ctrl_ext); + + return 0; +} + +/** * e1000_enable_ulp_lpt_lp - configure Ultra Low Power mode for LynxPoint-LP * @hw: pointer to the HW structure * @to_sx: boolean indicating a system power state transition to Sx @@ -1165,6 +1205,14 @@ s32 e1000_enable_ulp_lpt_lp(struct e1000_hw *hw, bool to_sx) if (ret_val) goto out; + if (hw->mac.type != e1000_pch_mtp) { + ret_val = e1000e_force_smbus(hw); + if (ret_val) { + e_dbg("Failed to force SMBUS: %d\n", ret_val); + goto release; + } + } + /* Si workaround for ULP entry flow on i127/rev6 h/w. Enable * LPLU and disable Gig speed when entering ULP */ @@ -1225,27 +1273,12 @@ s32 e1000_enable_ulp_lpt_lp(struct e1000_hw *hw, bool to_sx) } release: - /* Switching PHY interface always returns MDI error - * so disable retry mechanism to avoid wasting time - */ - e1000e_disable_phy_retry(hw); - - /* Force SMBus mode in PHY */ - ret_val = e1000_read_phy_reg_hv_locked(hw, CV_SMB_CTRL, &phy_reg); - if (ret_val) { - e1000e_enable_phy_retry(hw); - hw->phy.ops.release(hw); - goto out; + if (hw->mac.type == e1000_pch_mtp) { + ret_val = e1000e_force_smbus(hw); + if (ret_val) + e_dbg("Failed to force SMBUS over MTL system: %d\n", + ret_val); } - phy_reg |= CV_SMB_CTRL_FORCE_SMBUS; - e1000_write_phy_reg_hv_locked(hw, CV_SMB_CTRL, phy_reg); - - e1000e_enable_phy_retry(hw); - - /* Force SMBus mode in MAC */ - mac_reg = er32(CTRL_EXT); - mac_reg |= E1000_CTRL_EXT_FORCE_SMBUS; - ew32(CTRL_EXT, mac_reg); hw->phy.ops.release(hw); out: diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 284c3fad5a6e..310513d9321b 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -13293,6 +13293,10 @@ static int i40e_xdp_setup(struct i40e_vsi *vsi, struct bpf_prog *prog, bool need_reset; int i; + /* VSI shall be deleted in a moment, block loading new programs */ + if (prog && test_bit(__I40E_IN_REMOVE, pf->state)) + return -EINVAL; + /* Don't allow frames that span over multiple buffers */ if (vsi->netdev->mtu > frame_size - I40E_PACKET_HDR_PAD) { NL_SET_ERR_MSG_MOD(extack, "MTU too large for linear frames and XDP prog does not support frags"); @@ -13301,14 +13305,9 @@ static int i40e_xdp_setup(struct i40e_vsi *vsi, struct bpf_prog *prog, /* When turning XDP on->off/off->on we reset and rebuild the rings. */ need_reset = (i40e_enabled_xdp_vsi(vsi) != !!prog); - if (need_reset) i40e_prep_for_reset(pf); - /* VSI shall be deleted in a moment, just return EINVAL */ - if (test_bit(__I40E_IN_REMOVE, pf->state)) - return -EINVAL; - old_prog = xchg(&vsi->xdp_prog, prog); if (need_reset) { diff --git a/drivers/net/ethernet/lantiq_etop.c b/drivers/net/ethernet/lantiq_etop.c index 5352fee62d2b..0b9982804370 100644 --- a/drivers/net/ethernet/lantiq_etop.c +++ b/drivers/net/ethernet/lantiq_etop.c @@ -217,9 +217,9 @@ ltq_etop_free_channel(struct net_device *dev, struct ltq_etop_chan *ch) if (ch->dma.irq) free_irq(ch->dma.irq, priv); if (IS_RX(ch->idx)) { - int desc; + struct ltq_dma_channel *dma = &ch->dma; - for (desc = 0; desc < LTQ_DESC_NUM; desc++) + for (dma->desc = 0; dma->desc < LTQ_DESC_NUM; dma->desc++) dev_kfree_skb_any(ch->skb[ch->dma.desc]); } } diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu.c index 5a4f774aed64..ac7ee3f3598c 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.c @@ -1643,7 +1643,7 @@ static int rvu_check_rsrc_availability(struct rvu *rvu, if (req->ssow > block->lf.max) { dev_err(&rvu->pdev->dev, "Func 0x%x: Invalid SSOW req, %d > max %d\n", - pcifunc, req->sso, block->lf.max); + pcifunc, req->ssow, block->lf.max); return -EINVAL; } mappedlfs = rvu_get_rsrc_mapcount(pfvf, block->addr); diff --git a/drivers/net/ethernet/mediatek/mtk_star_emac.c b/drivers/net/ethernet/mediatek/mtk_star_emac.c index 31aebeb2e285..25989c79c92e 100644 --- a/drivers/net/ethernet/mediatek/mtk_star_emac.c +++ b/drivers/net/ethernet/mediatek/mtk_star_emac.c @@ -1524,6 +1524,7 @@ static int mtk_star_probe(struct platform_device *pdev) { struct device_node *of_node; struct mtk_star_priv *priv; + struct phy_device *phydev; struct net_device *ndev; struct device *dev; void __iomem *base; @@ -1649,6 +1650,12 @@ static int mtk_star_probe(struct platform_device *pdev) netif_napi_add(ndev, &priv->rx_napi, mtk_star_rx_poll); netif_napi_add_tx(ndev, &priv->tx_napi, mtk_star_tx_poll); + phydev = of_phy_find_device(priv->phy_node); + if (phydev) { + phydev->mac_managed_pm = true; + put_device(&phydev->mdio.dev); + } + return devm_register_netdev(dev, ndev); } diff --git a/drivers/net/ethernet/micrel/ks8851_common.c b/drivers/net/ethernet/micrel/ks8851_common.c index 6453c92f0fa7..7fa1820db9cc 100644 --- a/drivers/net/ethernet/micrel/ks8851_common.c +++ b/drivers/net/ethernet/micrel/ks8851_common.c @@ -352,11 +352,11 @@ static irqreturn_t ks8851_irq(int irq, void *_ks) netif_dbg(ks, intr, ks->netdev, "%s: txspace %d\n", __func__, tx_space); - spin_lock(&ks->statelock); + spin_lock_bh(&ks->statelock); ks->tx_space = tx_space; if (netif_queue_stopped(ks->netdev)) netif_wake_queue(ks->netdev); - spin_unlock(&ks->statelock); + spin_unlock_bh(&ks->statelock); } if (status & IRQ_SPIBEI) { @@ -482,6 +482,7 @@ static int ks8851_net_open(struct net_device *dev) ks8851_wrreg16(ks, KS_IER, ks->rc_ier); ks->queued_len = 0; + ks->tx_space = ks8851_rdreg16(ks, KS_TXMIR); netif_start_queue(ks->netdev); netif_dbg(ks, ifup, ks->netdev, "network device up\n"); @@ -635,14 +636,14 @@ static void ks8851_set_rx_mode(struct net_device *dev) /* schedule work to do the actual set of the data if needed */ - spin_lock(&ks->statelock); + spin_lock_bh(&ks->statelock); if (memcmp(&rxctrl, &ks->rxctrl, sizeof(rxctrl)) != 0) { memcpy(&ks->rxctrl, &rxctrl, sizeof(ks->rxctrl)); schedule_work(&ks->rxctrl_work); } - spin_unlock(&ks->statelock); + spin_unlock_bh(&ks->statelock); } static int ks8851_set_mac_address(struct net_device *dev, void *addr) @@ -1101,7 +1102,6 @@ int ks8851_probe_common(struct net_device *netdev, struct device *dev, int ret; ks->netdev = netdev; - ks->tx_space = 6144; ks->gpio = devm_gpiod_get_optional(dev, "reset", GPIOD_OUT_HIGH); ret = PTR_ERR_OR_ZERO(ks->gpio); diff --git a/drivers/net/ethernet/micrel/ks8851_spi.c b/drivers/net/ethernet/micrel/ks8851_spi.c index 670c1de966db..3062cc0f9199 100644 --- a/drivers/net/ethernet/micrel/ks8851_spi.c +++ b/drivers/net/ethernet/micrel/ks8851_spi.c @@ -340,10 +340,10 @@ static void ks8851_tx_work(struct work_struct *work) tx_space = ks8851_rdreg16_spi(ks, KS_TXMIR); - spin_lock(&ks->statelock); + spin_lock_bh(&ks->statelock); ks->queued_len -= dequeued_len; ks->tx_space = tx_space; - spin_unlock(&ks->statelock); + spin_unlock_bh(&ks->statelock); ks8851_unlock_spi(ks, &flags); } diff --git a/drivers/net/phy/microchip_t1.c b/drivers/net/phy/microchip_t1.c index a838b61cd844..a35528497a57 100644 --- a/drivers/net/phy/microchip_t1.c +++ b/drivers/net/phy/microchip_t1.c @@ -748,7 +748,7 @@ static int lan87xx_cable_test_report(struct phy_device *phydev) ethnl_cable_test_result(phydev, ETHTOOL_A_CABLE_PAIR_A, lan87xx_cable_test_report_trans(detect)); - return 0; + return phy_init_hw(phydev); } static int lan87xx_cable_test_get_status(struct phy_device *phydev, diff --git a/drivers/net/ppp/ppp_generic.c b/drivers/net/ppp/ppp_generic.c index 0a65b6d690fe..eb9acfcaeb09 100644 --- a/drivers/net/ppp/ppp_generic.c +++ b/drivers/net/ppp/ppp_generic.c @@ -70,6 +70,7 @@ #define MPHDRLEN_SSN 4 /* ditto with short sequence numbers */ #define PPP_PROTO_LEN 2 +#define PPP_LCP_HDRLEN 4 /* * An instance of /dev/ppp can be associated with either a ppp @@ -493,6 +494,15 @@ static ssize_t ppp_read(struct file *file, char __user *buf, return ret; } +static bool ppp_check_packet(struct sk_buff *skb, size_t count) +{ + /* LCP packets must include LCP header which 4 bytes long: + * 1-byte code, 1-byte identifier, and 2-byte length. + */ + return get_unaligned_be16(skb->data) != PPP_LCP || + count >= PPP_PROTO_LEN + PPP_LCP_HDRLEN; +} + static ssize_t ppp_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos) { @@ -515,6 +525,11 @@ static ssize_t ppp_write(struct file *file, const char __user *buf, kfree_skb(skb); goto out; } + ret = -EINVAL; + if (unlikely(!ppp_check_packet(skb, count))) { + kfree_skb(skb); + goto out; + } switch (pf->kind) { case INTERFACE: diff --git a/drivers/net/wireguard/allowedips.c b/drivers/net/wireguard/allowedips.c index 0ba714ca5185..4b8528206cc8 100644 --- a/drivers/net/wireguard/allowedips.c +++ b/drivers/net/wireguard/allowedips.c @@ -15,8 +15,8 @@ static void swap_endian(u8 *dst, const u8 *src, u8 bits) if (bits == 32) { *(u32 *)dst = be32_to_cpu(*(const __be32 *)src); } else if (bits == 128) { - ((u64 *)dst)[0] = be64_to_cpu(((const __be64 *)src)[0]); - ((u64 *)dst)[1] = be64_to_cpu(((const __be64 *)src)[1]); + ((u64 *)dst)[0] = get_unaligned_be64(src); + ((u64 *)dst)[1] = get_unaligned_be64(src + 8); } } diff --git a/drivers/net/wireguard/queueing.h b/drivers/net/wireguard/queueing.h index 1ea4f874e367..7eb76724b3ed 100644 --- a/drivers/net/wireguard/queueing.h +++ b/drivers/net/wireguard/queueing.h @@ -124,10 +124,10 @@ static inline int wg_cpumask_choose_online(int *stored_cpu, unsigned int id) */ static inline int wg_cpumask_next_online(int *last_cpu) { - int cpu = cpumask_next(*last_cpu, cpu_online_mask); + int cpu = cpumask_next(READ_ONCE(*last_cpu), cpu_online_mask); if (cpu >= nr_cpu_ids) cpu = cpumask_first(cpu_online_mask); - *last_cpu = cpu; + WRITE_ONCE(*last_cpu, cpu); return cpu; } diff --git a/drivers/net/wireguard/send.c b/drivers/net/wireguard/send.c index 0d48e0f4a1ba..26e09c30d596 100644 --- a/drivers/net/wireguard/send.c +++ b/drivers/net/wireguard/send.c @@ -222,7 +222,7 @@ void wg_packet_send_keepalive(struct wg_peer *peer) { struct sk_buff *skb; - if (skb_queue_empty(&peer->staged_packet_queue)) { + if (skb_queue_empty_lockless(&peer->staged_packet_queue)) { skb = alloc_skb(DATA_PACKET_HEAD_ROOM + MESSAGE_MINIMUM_LENGTH, GFP_ATOMIC); if (unlikely(!skb)) diff --git a/drivers/of/irq.c b/drivers/of/irq.c index 462375b293e4..c94203ce65bb 100644 --- a/drivers/of/irq.c +++ b/drivers/of/irq.c @@ -81,7 +81,8 @@ EXPORT_SYMBOL_GPL(of_irq_find_parent); /* * These interrupt controllers abuse interrupt-map for unspeakable * reasons and rely on the core code to *ignore* it (the drivers do - * their own parsing of the property). + * their own parsing of the property). The PAsemi entry covers a + * non-sensical interrupt-map that is better left ignored. * * If you think of adding to the list for something *new*, think * again. There is a high chance that you will be sent back to the @@ -95,6 +96,7 @@ static const char * const of_irq_imap_abusers[] = { "fsl,ls1043a-extirq", "fsl,ls1088a-extirq", "renesas,rza1-irqc", + "pasemi,rootbus", NULL, }; @@ -293,20 +295,8 @@ int of_irq_parse_raw(const __be32 *addr, struct of_phandle_args *out_irq) imaplen -= imap - oldimap; pr_debug(" -> imaplen=%d\n", imaplen); } - if (!match) { - if (intc) { - /* - * The PASEMI Nemo is a known offender, so - * let's only warn for anyone else. - */ - WARN(!IS_ENABLED(CONFIG_PPC_PASEMI), - "%pOF interrupt-map failed, using interrupt-controller\n", - ipar); - return 0; - } - + if (!match) goto fail; - } /* * Successfully parsed an interrupt-map translation; copy new diff --git a/drivers/perf/riscv_pmu.c b/drivers/perf/riscv_pmu.c index 78c490e0505a..0a02e85a8951 100644 --- a/drivers/perf/riscv_pmu.c +++ b/drivers/perf/riscv_pmu.c @@ -167,7 +167,7 @@ u64 riscv_pmu_event_update(struct perf_event *event) unsigned long cmask; u64 oldval, delta; - if (!rvpmu->ctr_read) + if (!rvpmu->ctr_read || (hwc->state & PERF_HES_UPTODATE)) return 0; cmask = riscv_pmu_ctr_get_width_mask(event); diff --git a/drivers/perf/riscv_pmu_sbi.c b/drivers/perf/riscv_pmu_sbi.c index a2e4005e1fd0..4e842dcedfba 100644 --- a/drivers/perf/riscv_pmu_sbi.c +++ b/drivers/perf/riscv_pmu_sbi.c @@ -20,6 +20,7 @@ #include <linux/cpu_pm.h> #include <linux/sched/clock.h> #include <linux/soc/andes/irq.h> +#include <linux/workqueue.h> #include <asm/errata_list.h> #include <asm/sbi.h> @@ -114,7 +115,7 @@ struct sbi_pmu_event_data { }; }; -static const struct sbi_pmu_event_data pmu_hw_event_map[] = { +static struct sbi_pmu_event_data pmu_hw_event_map[] = { [PERF_COUNT_HW_CPU_CYCLES] = {.hw_gen_event = { SBI_PMU_HW_CPU_CYCLES, SBI_PMU_EVENT_TYPE_HW, 0}}, @@ -148,7 +149,7 @@ static const struct sbi_pmu_event_data pmu_hw_event_map[] = { }; #define C(x) PERF_COUNT_HW_CACHE_##x -static const struct sbi_pmu_event_data pmu_cache_event_map[PERF_COUNT_HW_CACHE_MAX] +static struct sbi_pmu_event_data pmu_cache_event_map[PERF_COUNT_HW_CACHE_MAX] [PERF_COUNT_HW_CACHE_OP_MAX] [PERF_COUNT_HW_CACHE_RESULT_MAX] = { [C(L1D)] = { @@ -293,6 +294,34 @@ static const struct sbi_pmu_event_data pmu_cache_event_map[PERF_COUNT_HW_CACHE_M }, }; +static void pmu_sbi_check_event(struct sbi_pmu_event_data *edata) +{ + struct sbiret ret; + + ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_CFG_MATCH, + 0, cmask, 0, edata->event_idx, 0, 0); + if (!ret.error) { + sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_STOP, + ret.value, 0x1, SBI_PMU_STOP_FLAG_RESET, 0, 0, 0); + } else if (ret.error == SBI_ERR_NOT_SUPPORTED) { + /* This event cannot be monitored by any counter */ + edata->event_idx = -EINVAL; + } +} + +static void pmu_sbi_check_std_events(struct work_struct *work) +{ + for (int i = 0; i < ARRAY_SIZE(pmu_hw_event_map); i++) + pmu_sbi_check_event(&pmu_hw_event_map[i]); + + for (int i = 0; i < ARRAY_SIZE(pmu_cache_event_map); i++) + for (int j = 0; j < ARRAY_SIZE(pmu_cache_event_map[i]); j++) + for (int k = 0; k < ARRAY_SIZE(pmu_cache_event_map[i][j]); k++) + pmu_sbi_check_event(&pmu_cache_event_map[i][j][k]); +} + +static DECLARE_WORK(check_std_events_work, pmu_sbi_check_std_events); + static int pmu_sbi_ctr_get_width(int idx) { return pmu_ctr_list[idx].width; @@ -478,6 +507,12 @@ static int pmu_sbi_event_map(struct perf_event *event, u64 *econfig) u64 raw_config_val; int ret; + /* + * Ensure we are finished checking standard hardware events for + * validity before allowing userspace to configure any events. + */ + flush_work(&check_std_events_work); + switch (type) { case PERF_TYPE_HARDWARE: if (config >= PERF_COUNT_HW_MAX) @@ -762,7 +797,7 @@ static inline void pmu_sbi_stop_all(struct riscv_pmu *pmu) * which may include counters that are not enabled yet. */ sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_STOP, - 0, pmu->cmask, 0, 0, 0, 0); + 0, pmu->cmask, SBI_PMU_STOP_FLAG_RESET, 0, 0, 0); } static inline void pmu_sbi_stop_hw_ctrs(struct riscv_pmu *pmu) @@ -1359,6 +1394,9 @@ static int pmu_sbi_device_probe(struct platform_device *pdev) if (ret) goto out_unregister; + /* Asynchronously check which standard events are available */ + schedule_work(&check_std_events_work); + return 0; out_unregister: diff --git a/drivers/platform/x86/toshiba_acpi.c b/drivers/platform/x86/toshiba_acpi.c index 10d0ce6c8342..78a5aac2dcfd 100644 --- a/drivers/platform/x86/toshiba_acpi.c +++ b/drivers/platform/x86/toshiba_acpi.c @@ -3299,6 +3299,7 @@ static const struct dmi_system_id toshiba_dmi_quirks[] __initconst = { }, .driver_data = (void *)(QUIRK_TURN_ON_PANEL_ON_RESUME | QUIRK_HCI_HOTKEY_QUICKSTART), }, + { } }; static int toshiba_acpi_add(struct acpi_device *acpi_dev) diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index 6b64af7d4927..1b7561abe05d 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -4119,8 +4119,6 @@ static int sd_resume(struct device *dev) { struct scsi_disk *sdkp = dev_get_drvdata(dev); - sd_printk(KERN_NOTICE, sdkp, "Starting disk\n"); - if (opal_unlock_from_suspend(sdkp->opal_dev)) { sd_printk(KERN_NOTICE, sdkp, "OPAL unlock failed\n"); return -EIO; @@ -4137,12 +4135,13 @@ static int sd_resume_common(struct device *dev, bool runtime) if (!sdkp) /* E.g.: runtime resume at the start of sd_probe() */ return 0; + sd_printk(KERN_NOTICE, sdkp, "Starting disk\n"); + if (!sd_do_start_stop(sdkp->device, runtime)) { sdkp->suspended = false; return 0; } - sd_printk(KERN_NOTICE, sdkp, "Starting disk\n"); ret = sd_start_stop_device(sdkp, 1); if (!ret) { sd_resume(dev); diff --git a/drivers/thermal/gov_power_allocator.c b/drivers/thermal/gov_power_allocator.c index 45f04a25255a..1b2345a697c5 100644 --- a/drivers/thermal/gov_power_allocator.c +++ b/drivers/thermal/gov_power_allocator.c @@ -759,6 +759,9 @@ static void power_allocator_manage(struct thermal_zone_device *tz) return; } + if (!params->trip_max) + return; + allocate_power(tz, params->trip_max->temperature); params->update_cdevs = true; } diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c index 1b0ab2790860..ecc748d15eb7 100644 --- a/drivers/thermal/thermal_core.c +++ b/drivers/thermal/thermal_core.c @@ -300,6 +300,8 @@ static void monitor_thermal_zone(struct thermal_zone_device *tz) thermal_zone_device_set_polling(tz, tz->passive_delay_jiffies); else if (tz->polling_delay_jiffies) thermal_zone_device_set_polling(tz, tz->polling_delay_jiffies); + else if (tz->temperature == THERMAL_TEMP_INVALID) + thermal_zone_device_set_polling(tz, msecs_to_jiffies(THERMAL_RECHECK_DELAY_MS)); } static struct thermal_governor *thermal_get_tz_governor(struct thermal_zone_device *tz) @@ -482,16 +484,14 @@ static void thermal_trip_crossed(struct thermal_zone_device *tz, thermal_governor_trip_crossed(governor, tz, trip, crossed_up); } -static int thermal_trip_notify_cmp(void *ascending, const struct list_head *a, +static int thermal_trip_notify_cmp(void *not_used, const struct list_head *a, const struct list_head *b) { struct thermal_trip_desc *tda = container_of(a, struct thermal_trip_desc, notify_list_node); struct thermal_trip_desc *tdb = container_of(b, struct thermal_trip_desc, notify_list_node); - int ret = tdb->notify_temp - tda->notify_temp; - - return ascending ? ret : -ret; + return tda->notify_temp - tdb->notify_temp; } void __thermal_zone_device_update(struct thermal_zone_device *tz, @@ -511,7 +511,7 @@ void __thermal_zone_device_update(struct thermal_zone_device *tz, update_temperature(tz); if (tz->temperature == THERMAL_TEMP_INVALID) - return; + goto monitor; __thermal_zone_set_trips(tz); @@ -520,12 +520,12 @@ void __thermal_zone_device_update(struct thermal_zone_device *tz, for_each_trip_desc(tz, td) handle_thermal_trip(tz, td, &way_up_list, &way_down_list); - list_sort(&way_up_list, &way_up_list, thermal_trip_notify_cmp); + list_sort(NULL, &way_up_list, thermal_trip_notify_cmp); list_for_each_entry(td, &way_up_list, notify_list_node) thermal_trip_crossed(tz, &td->trip, governor, true); list_sort(NULL, &way_down_list, thermal_trip_notify_cmp); - list_for_each_entry(td, &way_down_list, notify_list_node) + list_for_each_entry_reverse(td, &way_down_list, notify_list_node) thermal_trip_crossed(tz, &td->trip, governor, false); if (governor->manage) @@ -533,6 +533,7 @@ void __thermal_zone_device_update(struct thermal_zone_device *tz, thermal_debug_update_trip_stats(tz); +monitor: monitor_thermal_zone(tz); } diff --git a/drivers/thermal/thermal_core.h b/drivers/thermal/thermal_core.h index 66f67e54e0c8..94eeb4011a48 100644 --- a/drivers/thermal/thermal_core.h +++ b/drivers/thermal/thermal_core.h @@ -133,6 +133,12 @@ struct thermal_zone_device { struct thermal_trip_desc trips[] __counted_by(num_trips); }; +/* + * Default delay after a failing thermal zone temperature check before + * attempting to check it again. + */ +#define THERMAL_RECHECK_DELAY_MS 250 + /* Default Thermal Governor */ #if defined(CONFIG_THERMAL_DEFAULT_GOV_STEP_WISE) #define DEFAULT_THERMAL_GOVERNOR "step_wise" diff --git a/drivers/ufs/core/ufs-mcq.c b/drivers/ufs/core/ufs-mcq.c index 8944548c30fa..c532416aec22 100644 --- a/drivers/ufs/core/ufs-mcq.c +++ b/drivers/ufs/core/ufs-mcq.c @@ -105,16 +105,15 @@ EXPORT_SYMBOL_GPL(ufshcd_mcq_config_mac); * @hba: per adapter instance * @req: pointer to the request to be issued * - * Return: the hardware queue instance on which the request would - * be queued. + * Return: the hardware queue instance on which the request will be or has + * been queued. %NULL if the request has already been freed. */ struct ufs_hw_queue *ufshcd_mcq_req_to_hwq(struct ufs_hba *hba, struct request *req) { - u32 utag = blk_mq_unique_tag(req); - u32 hwq = blk_mq_unique_tag_to_hwq(utag); + struct blk_mq_hw_ctx *hctx = READ_ONCE(req->mq_hctx); - return &hba->uhq[hwq]; + return hctx ? &hba->uhq[hctx->queue_num] : NULL; } /** @@ -515,6 +514,8 @@ int ufshcd_mcq_sq_cleanup(struct ufs_hba *hba, int task_tag) if (!cmd) return -EINVAL; hwq = ufshcd_mcq_req_to_hwq(hba, scsi_cmd_to_rq(cmd)); + if (!hwq) + return 0; } else { hwq = hba->dev_cmd_queue; } diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c index 1b65e6ae4137..46433ecf0c4d 100644 --- a/drivers/ufs/core/ufshcd.c +++ b/drivers/ufs/core/ufshcd.c @@ -6456,6 +6456,8 @@ static bool ufshcd_abort_one(struct request *rq, void *priv) /* Release cmd in MCQ mode if abort succeeds */ if (is_mcq_enabled(hba) && (*ret == 0)) { hwq = ufshcd_mcq_req_to_hwq(hba, scsi_cmd_to_rq(lrbp->cmd)); + if (!hwq) + return 0; spin_lock_irqsave(&hwq->cq_lock, flags); if (ufshcd_cmd_inflight(lrbp->cmd)) ufshcd_release_scsi_cmd(hba, lrbp); diff --git a/drivers/vfio/pci/vfio_pci_core.c b/drivers/vfio/pci/vfio_pci_core.c index 987c7921affa..ba0ce0075b2f 100644 --- a/drivers/vfio/pci/vfio_pci_core.c +++ b/drivers/vfio/pci/vfio_pci_core.c @@ -1260,7 +1260,7 @@ static int vfio_pci_ioctl_get_pci_hot_reset_info( struct vfio_pci_hot_reset_info hdr; struct vfio_pci_fill_info fill = {}; bool slot = false; - int ret, count; + int ret, count = 0; if (copy_from_user(&hdr, arg, minsz)) return -EFAULT; diff --git a/fs/bcachefs/alloc_background.c b/fs/bcachefs/alloc_background.c index 1de9fac3bcf4..658f11aebda1 100644 --- a/fs/bcachefs/alloc_background.c +++ b/fs/bcachefs/alloc_background.c @@ -3,6 +3,7 @@ #include "alloc_background.h" #include "alloc_foreground.h" #include "backpointers.h" +#include "bkey_buf.h" #include "btree_cache.h" #include "btree_io.h" #include "btree_key_cache.h" @@ -1553,13 +1554,13 @@ err: } static int bch2_check_alloc_to_lru_ref(struct btree_trans *trans, - struct btree_iter *alloc_iter) + struct btree_iter *alloc_iter, + struct bkey_buf *last_flushed) { struct bch_fs *c = trans->c; - struct btree_iter lru_iter; struct bch_alloc_v4 a_convert; const struct bch_alloc_v4 *a; - struct bkey_s_c alloc_k, lru_k; + struct bkey_s_c alloc_k; struct printbuf buf = PRINTBUF; int ret; @@ -1573,6 +1574,14 @@ static int bch2_check_alloc_to_lru_ref(struct btree_trans *trans, a = bch2_alloc_to_v4(alloc_k, &a_convert); + if (a->fragmentation_lru) { + ret = bch2_lru_check_set(trans, BCH_LRU_FRAGMENTATION_START, + a->fragmentation_lru, + alloc_k, last_flushed); + if (ret) + return ret; + } + if (a->data_type != BCH_DATA_cached) return 0; @@ -1597,41 +1606,30 @@ static int bch2_check_alloc_to_lru_ref(struct btree_trans *trans, a = &a_mut->v; } - lru_k = bch2_bkey_get_iter(trans, &lru_iter, BTREE_ID_lru, - lru_pos(alloc_k.k->p.inode, - bucket_to_u64(alloc_k.k->p), - a->io_time[READ]), 0); - ret = bkey_err(lru_k); + ret = bch2_lru_check_set(trans, alloc_k.k->p.inode, a->io_time[READ], + alloc_k, last_flushed); if (ret) - return ret; - - if (fsck_err_on(lru_k.k->type != KEY_TYPE_set, c, - alloc_key_to_missing_lru_entry, - "missing lru entry\n" - " %s", - (printbuf_reset(&buf), - bch2_bkey_val_to_text(&buf, c, alloc_k), buf.buf))) { - ret = bch2_lru_set(trans, - alloc_k.k->p.inode, - bucket_to_u64(alloc_k.k->p), - a->io_time[READ]); - if (ret) - goto err; - } + goto err; err: fsck_err: - bch2_trans_iter_exit(trans, &lru_iter); printbuf_exit(&buf); return ret; } int bch2_check_alloc_to_lru_refs(struct bch_fs *c) { + struct bkey_buf last_flushed; + + bch2_bkey_buf_init(&last_flushed); + bkey_init(&last_flushed.k->k); + int ret = bch2_trans_run(c, for_each_btree_key_commit(trans, iter, BTREE_ID_alloc, POS_MIN, BTREE_ITER_prefetch, k, NULL, NULL, BCH_TRANS_COMMIT_no_enospc, - bch2_check_alloc_to_lru_ref(trans, &iter))); + bch2_check_alloc_to_lru_ref(trans, &iter, &last_flushed))); + + bch2_bkey_buf_exit(&last_flushed, c); bch_err_fn(c, ret); return ret; } diff --git a/fs/bcachefs/alloc_foreground.c b/fs/bcachefs/alloc_foreground.c index 9d3d64746a5b..27d97c22ae27 100644 --- a/fs/bcachefs/alloc_foreground.c +++ b/fs/bcachefs/alloc_foreground.c @@ -1703,6 +1703,7 @@ void bch2_fs_alloc_debug_to_text(struct printbuf *out, struct bch_fs *c) for (unsigned i = 0; i < ARRAY_SIZE(c->open_buckets); i++) nr[c->open_buckets[i].data_type]++; + printbuf_tabstops_reset(out); printbuf_tabstop_push(out, 24); percpu_down_read(&c->mark_lock); @@ -1736,6 +1737,7 @@ void bch2_dev_alloc_debug_to_text(struct printbuf *out, struct bch_dev *ca) for (unsigned i = 0; i < ARRAY_SIZE(c->open_buckets); i++) nr[c->open_buckets[i].data_type]++; + printbuf_tabstops_reset(out); printbuf_tabstop_push(out, 12); printbuf_tabstop_push(out, 16); printbuf_tabstop_push(out, 16); diff --git a/fs/bcachefs/backpointers.c b/fs/bcachefs/backpointers.c index 4321f9fb73bd..6d8b1bc90be0 100644 --- a/fs/bcachefs/backpointers.c +++ b/fs/bcachefs/backpointers.c @@ -434,13 +434,6 @@ int bch2_check_btree_backpointers(struct bch_fs *c) return ret; } -static inline bool bkey_and_val_eq(struct bkey_s_c l, struct bkey_s_c r) -{ - return bpos_eq(l.k->p, r.k->p) && - bkey_bytes(l.k) == bkey_bytes(r.k) && - !memcmp(l.v, r.v, bkey_val_bytes(l.k)); -} - struct extents_to_bp_state { struct bpos bucket_start; struct bpos bucket_end; @@ -536,11 +529,8 @@ static int check_bp_exists(struct btree_trans *trans, struct btree_iter other_extent_iter = {}; struct printbuf buf = PRINTBUF; struct bkey_s_c bp_k; - struct bkey_buf tmp; int ret = 0; - bch2_bkey_buf_init(&tmp); - struct bch_dev *ca = bch2_dev_bucket_tryget(c, bucket); if (!ca) { prt_str(&buf, "extent for nonexistent device:bucket "); @@ -565,22 +555,9 @@ static int check_bp_exists(struct btree_trans *trans, if (bp_k.k->type != KEY_TYPE_backpointer || memcmp(bkey_s_c_to_backpointer(bp_k).v, &bp, sizeof(bp))) { - bch2_bkey_buf_reassemble(&tmp, c, orig_k); - - if (!bkey_and_val_eq(orig_k, bkey_i_to_s_c(s->last_flushed.k))) { - if (bp.level) { - bch2_trans_unlock(trans); - bch2_btree_interior_updates_flush(c); - } - - ret = bch2_btree_write_buffer_flush_sync(trans); - if (ret) - goto err; - - bch2_bkey_buf_copy(&s->last_flushed, c, tmp.k); - ret = -BCH_ERR_transaction_restart_write_buffer_flush; - goto out; - } + ret = bch2_btree_write_buffer_maybe_flush(trans, orig_k, &s->last_flushed); + if (ret) + goto err; goto check_existing_bp; } @@ -589,7 +566,6 @@ err: fsck_err: bch2_trans_iter_exit(trans, &other_extent_iter); bch2_trans_iter_exit(trans, &bp_iter); - bch2_bkey_buf_exit(&tmp, c); bch2_dev_put(ca); printbuf_exit(&buf); return ret; @@ -794,6 +770,8 @@ static int bch2_get_btree_in_memory_pos(struct btree_trans *trans, !((1U << btree) & btree_interior_mask)) continue; + bch2_trans_begin(trans); + __for_each_btree_node(trans, iter, btree, btree == start.btree ? start.pos : POS_MIN, 0, depth, BTREE_ITER_prefetch, b, ret) { @@ -905,7 +883,7 @@ static int check_one_backpointer(struct btree_trans *trans, struct bbpos start, struct bbpos end, struct bkey_s_c_backpointer bp, - struct bpos *last_flushed_pos) + struct bkey_buf *last_flushed) { struct bch_fs *c = trans->c; struct btree_iter iter; @@ -925,20 +903,18 @@ static int check_one_backpointer(struct btree_trans *trans, if (ret) return ret; - if (!k.k && !bpos_eq(*last_flushed_pos, bp.k->p)) { - *last_flushed_pos = bp.k->p; - ret = bch2_btree_write_buffer_flush_sync(trans) ?: - -BCH_ERR_transaction_restart_write_buffer_flush; - goto out; - } + if (!k.k) { + ret = bch2_btree_write_buffer_maybe_flush(trans, bp.s_c, last_flushed); + if (ret) + goto out; - if (fsck_err_on(!k.k, c, - backpointer_to_missing_ptr, - "backpointer for missing %s\n %s", - bp.v->level ? "btree node" : "extent", - (bch2_bkey_val_to_text(&buf, c, bp.s_c), buf.buf))) { - ret = bch2_btree_delete_at_buffered(trans, BTREE_ID_backpointers, bp.k->p); - goto out; + if (fsck_err(c, backpointer_to_missing_ptr, + "backpointer for missing %s\n %s", + bp.v->level ? "btree node" : "extent", + (bch2_bkey_val_to_text(&buf, c, bp.s_c), buf.buf))) { + ret = bch2_btree_delete_at_buffered(trans, BTREE_ID_backpointers, bp.k->p); + goto out; + } } out: fsck_err: @@ -951,14 +927,20 @@ static int bch2_check_backpointers_to_extents_pass(struct btree_trans *trans, struct bbpos start, struct bbpos end) { - struct bpos last_flushed_pos = SPOS_MAX; + struct bkey_buf last_flushed; - return for_each_btree_key_commit(trans, iter, BTREE_ID_backpointers, + bch2_bkey_buf_init(&last_flushed); + bkey_init(&last_flushed.k->k); + + int ret = for_each_btree_key_commit(trans, iter, BTREE_ID_backpointers, POS_MIN, BTREE_ITER_prefetch, k, NULL, NULL, BCH_TRANS_COMMIT_no_enospc, check_one_backpointer(trans, start, end, bkey_s_c_to_backpointer(k), - &last_flushed_pos)); + &last_flushed)); + + bch2_bkey_buf_exit(&last_flushed, trans->c); + return ret; } int bch2_check_backpointers_to_extents(struct bch_fs *c) diff --git a/fs/bcachefs/bkey.c b/fs/bcachefs/bkey.c index 94a1d1982fa8..587d7318a2e8 100644 --- a/fs/bcachefs/bkey.c +++ b/fs/bcachefs/bkey.c @@ -660,8 +660,9 @@ int bch2_bkey_format_invalid(struct bch_fs *c, bch2_bkey_format_field_overflows(f, i)) { unsigned unpacked_bits = bch2_bkey_format_current.bits_per_field[i]; u64 unpacked_max = ~((~0ULL << 1) << (unpacked_bits - 1)); - u64 packed_max = f->bits_per_field[i] - ? ~((~0ULL << 1) << (f->bits_per_field[i] - 1)) + unsigned packed_bits = min(64, f->bits_per_field[i]); + u64 packed_max = packed_bits + ? ~((~0ULL << 1) << (packed_bits - 1)) : 0; prt_printf(err, "field %u too large: %llu + %llu > %llu", diff --git a/fs/bcachefs/bkey.h b/fs/bcachefs/bkey.h index fcd43915df07..936357149cf0 100644 --- a/fs/bcachefs/bkey.h +++ b/fs/bcachefs/bkey.h @@ -194,6 +194,13 @@ static inline struct bpos bkey_max(struct bpos l, struct bpos r) return bkey_gt(l, r) ? l : r; } +static inline bool bkey_and_val_eq(struct bkey_s_c l, struct bkey_s_c r) +{ + return bpos_eq(l.k->p, r.k->p) && + bkey_bytes(l.k) == bkey_bytes(r.k) && + !memcmp(l.v, r.v, bkey_val_bytes(l.k)); +} + void bch2_bpos_swab(struct bpos *); void bch2_bkey_swab_key(const struct bkey_format *, struct bkey_packed *); diff --git a/fs/bcachefs/btree_gc.c b/fs/bcachefs/btree_gc.c index 0e477a926579..7c72a9e6f511 100644 --- a/fs/bcachefs/btree_gc.c +++ b/fs/bcachefs/btree_gc.c @@ -903,6 +903,8 @@ static int bch2_alloc_write_key(struct btree_trans *trans, bch2_dev_usage_update(c, ca, &old_gc, &gc, 0, true); percpu_up_read(&c->mark_lock); + gc.fragmentation_lru = alloc_lru_idx_fragmentation(gc, ca); + if (fsck_err_on(new.data_type != gc.data_type, c, alloc_key_data_type_wrong, "bucket %llu:%llu gen %u has wrong data_type" @@ -916,23 +918,19 @@ static int bch2_alloc_write_key(struct btree_trans *trans, #define copy_bucket_field(_errtype, _f) \ if (fsck_err_on(new._f != gc._f, c, _errtype, \ "bucket %llu:%llu gen %u data type %s has wrong " #_f \ - ": got %u, should be %u", \ + ": got %llu, should be %llu", \ iter->pos.inode, iter->pos.offset, \ gc.gen, \ bch2_data_type_str(gc.data_type), \ - new._f, gc._f)) \ + (u64) new._f, (u64) gc._f)) \ new._f = gc._f; \ - copy_bucket_field(alloc_key_gen_wrong, - gen); - copy_bucket_field(alloc_key_dirty_sectors_wrong, - dirty_sectors); - copy_bucket_field(alloc_key_cached_sectors_wrong, - cached_sectors); - copy_bucket_field(alloc_key_stripe_wrong, - stripe); - copy_bucket_field(alloc_key_stripe_redundancy_wrong, - stripe_redundancy); + copy_bucket_field(alloc_key_gen_wrong, gen); + copy_bucket_field(alloc_key_dirty_sectors_wrong, dirty_sectors); + copy_bucket_field(alloc_key_cached_sectors_wrong, cached_sectors); + copy_bucket_field(alloc_key_stripe_wrong, stripe); + copy_bucket_field(alloc_key_stripe_redundancy_wrong, stripe_redundancy); + copy_bucket_field(alloc_key_fragmentation_lru_wrong, fragmentation_lru); #undef copy_bucket_field if (!bch2_alloc_v4_cmp(*old, new)) @@ -946,7 +944,7 @@ static int bch2_alloc_write_key(struct btree_trans *trans, a->v = new; /* - * The trigger normally makes sure this is set, but we're not running + * The trigger normally makes sure these are set, but we're not running * triggers: */ if (a->v.data_type == BCH_DATA_cached && !a->v.io_time[READ]) diff --git a/fs/bcachefs/btree_write_buffer.c b/fs/bcachefs/btree_write_buffer.c index 75c8a196b3f6..d0e92d948002 100644 --- a/fs/bcachefs/btree_write_buffer.c +++ b/fs/bcachefs/btree_write_buffer.c @@ -1,11 +1,13 @@ // SPDX-License-Identifier: GPL-2.0 #include "bcachefs.h" +#include "bkey_buf.h" #include "btree_locking.h" #include "btree_update.h" #include "btree_update_interior.h" #include "btree_write_buffer.h" #include "error.h" +#include "extents.h" #include "journal.h" #include "journal_io.h" #include "journal_reclaim.h" @@ -492,6 +494,41 @@ int bch2_btree_write_buffer_tryflush(struct btree_trans *trans) return ret; } +/** + * In check and repair code, when checking references to write buffer btrees we + * need to issue a flush before we have a definitive error: this issues a flush + * if this is a key we haven't yet checked. + */ +int bch2_btree_write_buffer_maybe_flush(struct btree_trans *trans, + struct bkey_s_c referring_k, + struct bkey_buf *last_flushed) +{ + struct bch_fs *c = trans->c; + struct bkey_buf tmp; + int ret = 0; + + bch2_bkey_buf_init(&tmp); + + if (!bkey_and_val_eq(referring_k, bkey_i_to_s_c(last_flushed->k))) { + bch2_bkey_buf_reassemble(&tmp, c, referring_k); + + if (bkey_is_btree_ptr(referring_k.k)) { + bch2_trans_unlock(trans); + bch2_btree_interior_updates_flush(c); + } + + ret = bch2_btree_write_buffer_flush_sync(trans); + if (ret) + goto err; + + bch2_bkey_buf_copy(last_flushed, c, tmp.k); + ret = -BCH_ERR_transaction_restart_write_buffer_flush; + } +err: + bch2_bkey_buf_exit(&tmp, c); + return ret; +} + static void bch2_btree_write_buffer_flush_work(struct work_struct *work) { struct bch_fs *c = container_of(work, struct bch_fs, btree_write_buffer.flush_work); diff --git a/fs/bcachefs/btree_write_buffer.h b/fs/bcachefs/btree_write_buffer.h index eebcd2b15249..dd5e64218b50 100644 --- a/fs/bcachefs/btree_write_buffer.h +++ b/fs/bcachefs/btree_write_buffer.h @@ -23,6 +23,9 @@ int bch2_btree_write_buffer_flush_sync(struct btree_trans *); int bch2_btree_write_buffer_flush_nocheck_rw(struct btree_trans *); int bch2_btree_write_buffer_tryflush(struct btree_trans *); +struct bkey_buf; +int bch2_btree_write_buffer_maybe_flush(struct btree_trans *, struct bkey_s_c, struct bkey_buf *); + struct journal_keys_to_wb { struct btree_write_buffer_keys *wb; size_t room; diff --git a/fs/bcachefs/clock.c b/fs/bcachefs/clock.c index 363644451106..0f40b585ce2b 100644 --- a/fs/bcachefs/clock.c +++ b/fs/bcachefs/clock.c @@ -132,14 +132,9 @@ static struct io_timer *get_expired_timer(struct io_clock *clock, { struct io_timer *ret = NULL; - spin_lock(&clock->timer_lock); - if (clock->timers.used && time_after_eq(now, clock->timers.data[0]->expire)) heap_pop(&clock->timers, ret, io_timer_cmp, NULL); - - spin_unlock(&clock->timer_lock); - return ret; } @@ -148,8 +143,10 @@ void __bch2_increment_clock(struct io_clock *clock, unsigned sectors) struct io_timer *timer; unsigned long now = atomic64_add_return(sectors, &clock->now); + spin_lock(&clock->timer_lock); while ((timer = get_expired_timer(clock, now))) timer->fn(timer); + spin_unlock(&clock->timer_lock); } void bch2_io_timers_to_text(struct printbuf *out, struct io_clock *clock) diff --git a/fs/bcachefs/data_update.c b/fs/bcachefs/data_update.c index 1a0072eef109..0087b8555ead 100644 --- a/fs/bcachefs/data_update.c +++ b/fs/bcachefs/data_update.c @@ -5,7 +5,9 @@ #include "bkey_buf.h" #include "btree_update.h" #include "buckets.h" +#include "compress.h" #include "data_update.h" +#include "disk_groups.h" #include "ec.h" #include "error.h" #include "extents.h" @@ -454,6 +456,38 @@ static void bch2_update_unwritten_extent(struct btree_trans *trans, } } +void bch2_data_update_opts_to_text(struct printbuf *out, struct bch_fs *c, + struct bch_io_opts *io_opts, + struct data_update_opts *data_opts) +{ + printbuf_tabstop_push(out, 20); + prt_str(out, "rewrite ptrs:\t"); + bch2_prt_u64_base2(out, data_opts->rewrite_ptrs); + prt_newline(out); + + prt_str(out, "kill ptrs:\t"); + bch2_prt_u64_base2(out, data_opts->kill_ptrs); + prt_newline(out); + + prt_str(out, "target:\t"); + bch2_target_to_text(out, c, data_opts->target); + prt_newline(out); + + prt_str(out, "compression:\t"); + bch2_compression_opt_to_text(out, background_compression(*io_opts)); + prt_newline(out); + + prt_str(out, "extra replicas:\t"); + prt_u64(out, data_opts->extra_replicas); +} + +void bch2_data_update_to_text(struct printbuf *out, struct data_update *m) +{ + bch2_bkey_val_to_text(out, m->op.c, bkey_i_to_s_c(m->k.k)); + prt_newline(out); + bch2_data_update_opts_to_text(out, m->op.c, &m->op.opts, &m->data_opts); +} + int bch2_extent_drop_ptrs(struct btree_trans *trans, struct btree_iter *iter, struct bkey_s_c k, @@ -643,6 +677,16 @@ int bch2_data_update_init(struct btree_trans *trans, if (!(durability_have + durability_removing)) m->op.nr_replicas = max((unsigned) m->op.nr_replicas, 1); + if (!m->op.nr_replicas) { + struct printbuf buf = PRINTBUF; + + bch2_data_update_to_text(&buf, m); + WARN(1, "trying to move an extent, but nr_replicas=0\n%s", buf.buf); + printbuf_exit(&buf); + ret = -BCH_ERR_data_update_done; + goto done; + } + m->op.nr_replicas_required = m->op.nr_replicas; if (reserve_sectors) { diff --git a/fs/bcachefs/data_update.h b/fs/bcachefs/data_update.h index 991095bbd469..8d36365bdea8 100644 --- a/fs/bcachefs/data_update.h +++ b/fs/bcachefs/data_update.h @@ -17,6 +17,9 @@ struct data_update_opts { unsigned write_flags; }; +void bch2_data_update_opts_to_text(struct printbuf *, struct bch_fs *, + struct bch_io_opts *, struct data_update_opts *); + struct data_update { /* extent being updated: */ enum btree_id btree_id; @@ -27,6 +30,8 @@ struct data_update { struct bch_write_op op; }; +void bch2_data_update_to_text(struct printbuf *, struct data_update *); + int bch2_data_update_index_update(struct bch_write_op *); void bch2_data_update_read_done(struct data_update *, diff --git a/fs/bcachefs/debug.c b/fs/bcachefs/debug.c index f0d4727c4dc2..ebabab171fe5 100644 --- a/fs/bcachefs/debug.c +++ b/fs/bcachefs/debug.c @@ -610,7 +610,7 @@ restart: list_sort(&c->btree_trans_list, list_ptr_order_cmp); list_for_each_entry(trans, &c->btree_trans_list, list) { - if ((ulong) trans < i->iter) + if ((ulong) trans <= i->iter) continue; i->iter = (ulong) trans; @@ -832,16 +832,16 @@ static const struct file_operations btree_transaction_stats_op = { static void btree_deadlock_to_text(struct printbuf *out, struct bch_fs *c) { struct btree_trans *trans; - pid_t iter = 0; + ulong iter = 0; restart: seqmutex_lock(&c->btree_trans_lock); - list_for_each_entry(trans, &c->btree_trans_list, list) { - struct task_struct *task = READ_ONCE(trans->locking_wait.task); + list_sort(&c->btree_trans_list, list_ptr_order_cmp); - if (!task || task->pid <= iter) + list_for_each_entry(trans, &c->btree_trans_list, list) { + if ((ulong) trans <= iter) continue; - iter = task->pid; + iter = (ulong) trans; if (!closure_get_not_zero(&trans->ref)) continue; diff --git a/fs/bcachefs/eytzinger.h b/fs/bcachefs/eytzinger.h index 24840aee335c..795f4fc0bab1 100644 --- a/fs/bcachefs/eytzinger.h +++ b/fs/bcachefs/eytzinger.h @@ -48,7 +48,7 @@ static inline unsigned eytzinger1_right_child(unsigned i) static inline unsigned eytzinger1_first(unsigned size) { - return rounddown_pow_of_two(size); + return size ? rounddown_pow_of_two(size) : 0; } static inline unsigned eytzinger1_last(unsigned size) @@ -101,7 +101,9 @@ static inline unsigned eytzinger1_prev(unsigned i, unsigned size) static inline unsigned eytzinger1_extra(unsigned size) { - return (size + 1 - rounddown_pow_of_two(size)) << 1; + return size + ? (size + 1 - rounddown_pow_of_two(size)) << 1 + : 0; } static inline unsigned __eytzinger1_to_inorder(unsigned i, unsigned size, diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index f9c9a95d7d4c..74a1e12a7a14 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -194,6 +194,12 @@ static struct bch_inode_info *bch2_inode_insert(struct bch_fs *c, struct bch_ino * discard_new_inode() expects it to be set... */ inode->v.i_flags |= I_NEW; + /* + * We don't want bch2_evict_inode() to delete the inode on disk, + * we just raced and had another inode in cache. Normally new + * inodes don't have nlink == 0 - except tmpfiles do... + */ + set_nlink(&inode->v, 1); discard_new_inode(&inode->v); inode = old; } else { @@ -2026,6 +2032,8 @@ err_put_super: __bch2_fs_stop(c); deactivate_locked_super(sb); err: + if (ret) + pr_err("error: %s", bch2_err_str(ret)); /* * On an inconsistency error in recovery we might see an -EROFS derived * errorcode (from the journal), but we don't want to return that to @@ -2065,7 +2073,8 @@ int __init bch2_vfs_init(void) { int ret = -ENOMEM; - bch2_inode_cache = KMEM_CACHE(bch_inode_info, SLAB_RECLAIM_ACCOUNT); + bch2_inode_cache = KMEM_CACHE(bch_inode_info, SLAB_RECLAIM_ACCOUNT | + SLAB_ACCOUNT); if (!bch2_inode_cache) goto err; diff --git a/fs/bcachefs/io_read.c b/fs/bcachefs/io_read.c index c97fa7002b06..ebf39ef72fb2 100644 --- a/fs/bcachefs/io_read.c +++ b/fs/bcachefs/io_read.c @@ -389,7 +389,6 @@ retry: bch2_bkey_buf_reassemble(&sk, c, k); k = bkey_i_to_s_c(sk.k); - bch2_trans_unlock(trans); if (!bch2_bkey_matches_ptr(c, k, rbio->pick.ptr, @@ -1004,6 +1003,9 @@ get_bio: rbio->promote = promote; INIT_WORK(&rbio->work, NULL); + if (flags & BCH_READ_NODECODE) + orig->pick = pick; + rbio->bio.bi_opf = orig->bio.bi_opf; rbio->bio.bi_iter.bi_sector = pick.ptr.offset; rbio->bio.bi_end_io = bch2_read_endio; diff --git a/fs/bcachefs/journal.c b/fs/bcachefs/journal.c index 13669dd0e375..10b19791ec98 100644 --- a/fs/bcachefs/journal.c +++ b/fs/bcachefs/journal.c @@ -1095,7 +1095,7 @@ unlock: return ret; } -int bch2_dev_journal_alloc(struct bch_dev *ca) +int bch2_dev_journal_alloc(struct bch_dev *ca, bool new_fs) { unsigned nr; int ret; @@ -1117,7 +1117,7 @@ int bch2_dev_journal_alloc(struct bch_dev *ca) min(1 << 13, (1 << 24) / ca->mi.bucket_size)); - ret = __bch2_set_nr_journal_buckets(ca, nr, true, NULL); + ret = __bch2_set_nr_journal_buckets(ca, nr, new_fs, NULL); err: bch_err_fn(ca, ret); return ret; @@ -1129,7 +1129,7 @@ int bch2_fs_journal_alloc(struct bch_fs *c) if (ca->journal.nr) continue; - int ret = bch2_dev_journal_alloc(ca); + int ret = bch2_dev_journal_alloc(ca, true); if (ret) { percpu_ref_put(&ca->io_ref); return ret; @@ -1184,9 +1184,11 @@ void bch2_fs_journal_stop(struct journal *j) journal_quiesce(j); cancel_delayed_work_sync(&j->write_work); - BUG_ON(!bch2_journal_error(j) && - test_bit(JOURNAL_replay_done, &j->flags) && - j->last_empty_seq != journal_cur_seq(j)); + WARN(!bch2_journal_error(j) && + test_bit(JOURNAL_replay_done, &j->flags) && + j->last_empty_seq != journal_cur_seq(j), + "journal shutdown error: cur seq %llu but last empty seq %llu", + journal_cur_seq(j), j->last_empty_seq); if (!bch2_journal_error(j)) clear_bit(JOURNAL_running, &j->flags); @@ -1418,8 +1420,8 @@ void __bch2_journal_debug_to_text(struct printbuf *out, struct journal *j) unsigned long now = jiffies; u64 nr_writes = j->nr_flush_writes + j->nr_noflush_writes; - if (!out->nr_tabstops) - printbuf_tabstop_push(out, 28); + printbuf_tabstops_reset(out); + printbuf_tabstop_push(out, 28); out->atomic++; rcu_read_lock(); diff --git a/fs/bcachefs/journal.h b/fs/bcachefs/journal.h index fd1f7cdaa8bc..bc6b9c39dcb4 100644 --- a/fs/bcachefs/journal.h +++ b/fs/bcachefs/journal.h @@ -433,7 +433,7 @@ bool bch2_journal_seq_pins_to_text(struct printbuf *, struct journal *, u64 *); int bch2_set_nr_journal_buckets(struct bch_fs *, struct bch_dev *, unsigned nr); -int bch2_dev_journal_alloc(struct bch_dev *); +int bch2_dev_journal_alloc(struct bch_dev *, bool); int bch2_fs_journal_alloc(struct bch_fs *); void bch2_dev_journal_stop(struct journal *, struct bch_dev *); diff --git a/fs/bcachefs/journal_io.c b/fs/bcachefs/journal_io.c index db24ce21b2ac..2326e2cb9cd2 100644 --- a/fs/bcachefs/journal_io.c +++ b/fs/bcachefs/journal_io.c @@ -415,6 +415,8 @@ static int journal_entry_btree_keys_validate(struct bch_fs *c, flags|BCH_VALIDATE_journal); if (ret == FSCK_DELETED_KEY) continue; + else if (ret) + return ret; k = bkey_next(k); } @@ -1762,11 +1764,13 @@ static CLOSURE_CALLBACK(journal_write_preflush) if (j->seq_ondisk + 1 != le64_to_cpu(w->data->seq)) { spin_lock(&j->lock); - closure_wait(&j->async_wait, cl); + if (j->seq_ondisk + 1 != le64_to_cpu(w->data->seq)) { + closure_wait(&j->async_wait, cl); + spin_unlock(&j->lock); + continue_at(cl, journal_write_preflush, j->wq); + return; + } spin_unlock(&j->lock); - - continue_at(cl, journal_write_preflush, j->wq); - return; } if (w->separate_flush) { diff --git a/fs/bcachefs/lru.c b/fs/bcachefs/lru.c index a40d116224ed..b12894ef44f3 100644 --- a/fs/bcachefs/lru.c +++ b/fs/bcachefs/lru.c @@ -77,6 +77,45 @@ static const char * const bch2_lru_types[] = { NULL }; +int bch2_lru_check_set(struct btree_trans *trans, + u16 lru_id, u64 time, + struct bkey_s_c referring_k, + struct bkey_buf *last_flushed) +{ + struct bch_fs *c = trans->c; + struct printbuf buf = PRINTBUF; + struct btree_iter lru_iter; + struct bkey_s_c lru_k = + bch2_bkey_get_iter(trans, &lru_iter, BTREE_ID_lru, + lru_pos(lru_id, + bucket_to_u64(referring_k.k->p), + time), 0); + int ret = bkey_err(lru_k); + if (ret) + return ret; + + if (lru_k.k->type != KEY_TYPE_set) { + ret = bch2_btree_write_buffer_maybe_flush(trans, referring_k, last_flushed); + if (ret) + goto err; + + if (fsck_err(c, alloc_key_to_missing_lru_entry, + "missing %s lru entry\n" + " %s", + bch2_lru_types[lru_type(lru_k)], + (bch2_bkey_val_to_text(&buf, c, referring_k), buf.buf))) { + ret = bch2_lru_set(trans, lru_id, bucket_to_u64(referring_k.k->p), time); + if (ret) + goto err; + } + } +err: +fsck_err: + bch2_trans_iter_exit(trans, &lru_iter); + printbuf_exit(&buf); + return ret; +} + static int bch2_check_lru_key(struct btree_trans *trans, struct btree_iter *lru_iter, struct bkey_s_c lru_k, diff --git a/fs/bcachefs/lru.h b/fs/bcachefs/lru.h index bd71ba77de07..ed75bcf59d47 100644 --- a/fs/bcachefs/lru.h +++ b/fs/bcachefs/lru.h @@ -61,6 +61,9 @@ int bch2_lru_del(struct btree_trans *, u16, u64, u64); int bch2_lru_set(struct btree_trans *, u16, u64, u64); int bch2_lru_change(struct btree_trans *, u16, u64, u64, u64); +struct bkey_buf; +int bch2_lru_check_set(struct btree_trans *, u16, u64, struct bkey_s_c, struct bkey_buf *); + int bch2_check_lrus(struct bch_fs *); #endif /* _BCACHEFS_LRU_H */ diff --git a/fs/bcachefs/move.c b/fs/bcachefs/move.c index 6e477fadaa2a..e714e3bd5bbb 100644 --- a/fs/bcachefs/move.c +++ b/fs/bcachefs/move.c @@ -36,31 +36,6 @@ const char * const bch2_data_ops_strs[] = { NULL }; -static void bch2_data_update_opts_to_text(struct printbuf *out, struct bch_fs *c, - struct bch_io_opts *io_opts, - struct data_update_opts *data_opts) -{ - printbuf_tabstop_push(out, 20); - prt_str(out, "rewrite ptrs:\t"); - bch2_prt_u64_base2(out, data_opts->rewrite_ptrs); - prt_newline(out); - - prt_str(out, "kill ptrs:\t"); - bch2_prt_u64_base2(out, data_opts->kill_ptrs); - prt_newline(out); - - prt_str(out, "target:\t"); - bch2_target_to_text(out, c, data_opts->target); - prt_newline(out); - - prt_str(out, "compression:\t"); - bch2_compression_opt_to_text(out, background_compression(*io_opts)); - prt_newline(out); - - prt_str(out, "extra replicas:\t"); - prt_u64(out, data_opts->extra_replicas); -} - static void trace_move_extent2(struct bch_fs *c, struct bkey_s_c k, struct bch_io_opts *io_opts, struct data_update_opts *data_opts) diff --git a/fs/bcachefs/sb-errors_format.h b/fs/bcachefs/sb-errors_format.h index d6f35a99c429..d54121ec093f 100644 --- a/fs/bcachefs/sb-errors_format.h +++ b/fs/bcachefs/sb-errors_format.h @@ -286,7 +286,8 @@ enum bch_fsck_flags { x(accounting_mismatch, 272, 0) \ x(accounting_replicas_not_marked, 273, 0) \ x(invalid_btree_id, 274, 0) \ - x(alloc_key_io_time_bad, 275, 0) + x(alloc_key_io_time_bad, 275, 0) \ + x(alloc_key_fragmentation_lru_wrong, 276, FSCK_AUTOFIX) enum bch_sb_error_id { #define x(t, n, ...) BCH_FSCK_ERR_##t = n, diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c index fb906467201e..da735608d47c 100644 --- a/fs/bcachefs/super.c +++ b/fs/bcachefs/super.c @@ -563,8 +563,11 @@ static void __bch2_fs_free(struct bch_fs *c) BUG_ON(atomic_read(&c->journal_keys.ref)); bch2_fs_btree_write_buffer_exit(c); percpu_free_rwsem(&c->mark_lock); - EBUG_ON(c->online_reserved && percpu_u64_get(c->online_reserved)); - free_percpu(c->online_reserved); + if (c->online_reserved) { + u64 v = percpu_u64_get(c->online_reserved); + WARN(v, "online_reserved not 0 at shutdown: %lli", v); + free_percpu(c->online_reserved); + } darray_exit(&c->btree_roots_extra); free_percpu(c->pcpu); @@ -1769,7 +1772,7 @@ int bch2_dev_add(struct bch_fs *c, const char *path) if (ret) goto err; - ret = bch2_dev_journal_alloc(ca); + ret = bch2_dev_journal_alloc(ca, true); bch_err_msg(c, ret, "allocating journal"); if (ret) goto err; @@ -1929,7 +1932,7 @@ int bch2_dev_online(struct bch_fs *c, const char *path) } if (!ca->journal.nr) { - ret = bch2_dev_journal_alloc(ca); + ret = bch2_dev_journal_alloc(ca, false); bch_err_msg(ca, ret, "allocating journal"); if (ret) goto err; diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index f688fab55251..958155cc43a8 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -3553,7 +3553,7 @@ err: for (int i = 0; i < num_folios; i++) { if (eb->folios[i]) { detach_extent_buffer_folio(eb, eb->folios[i]); - __folio_put(eb->folios[i]); + folio_put(eb->folios[i]); } } __free_extent_buffer(eb); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 753db965f7c0..3a2b902b2d1f 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -10385,7 +10385,7 @@ out_unlock: out_folios: for (i = 0; i < nr_folios; i++) { if (folios[i]) - __folio_put(folios[i]); + folio_put(folios[i]); } kvfree(folios); out: diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index bf0f81d59b6b..39a15cca58ca 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -3062,8 +3062,6 @@ int btrfs_qgroup_check_inherit(struct btrfs_fs_info *fs_info, struct btrfs_qgroup_inherit *inherit, size_t size) { - if (!btrfs_qgroup_enabled(fs_info)) - return 0; if (inherit->flags & ~BTRFS_QGROUP_INHERIT_FLAGS_SUPP) return -EOPNOTSUPP; if (size < sizeof(*inherit) || size > PAGE_SIZE) @@ -3085,6 +3083,14 @@ int btrfs_qgroup_check_inherit(struct btrfs_fs_info *fs_info, return -EINVAL; /* + * Skip the inherit source qgroups check if qgroup is not enabled. + * Qgroup can still be later enabled causing problems, but in that case + * btrfs_qgroup_inherit() would just ignore those invalid ones. + */ + if (!btrfs_qgroup_enabled(fs_info)) + return 0; + + /* * Now check all the remaining qgroups, they should all: * * - Exist diff --git a/fs/btrfs/ref-verify.c b/fs/btrfs/ref-verify.c index cf531255ab76..9522a8b79d22 100644 --- a/fs/btrfs/ref-verify.c +++ b/fs/btrfs/ref-verify.c @@ -441,7 +441,8 @@ static int process_extent_item(struct btrfs_fs_info *fs_info, u32 item_size = btrfs_item_size(leaf, slot); unsigned long end, ptr; u64 offset, flags, count; - int type, ret; + int type; + int ret = 0; ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item); flags = btrfs_extent_flags(leaf, ei); @@ -486,7 +487,11 @@ static int process_extent_item(struct btrfs_fs_info *fs_info, key->objectid, key->offset); break; case BTRFS_EXTENT_OWNER_REF_KEY: - WARN_ON(!btrfs_fs_incompat(fs_info, SIMPLE_QUOTA)); + if (!btrfs_fs_incompat(fs_info, SIMPLE_QUOTA)) { + btrfs_err(fs_info, + "found extent owner ref without simple quotas enabled"); + ret = -EINVAL; + } break; default: btrfs_err(fs_info, "invalid key type in iref"); diff --git a/fs/btrfs/space-info.c b/fs/btrfs/space-info.c index d620323d08ea..ae8c56442549 100644 --- a/fs/btrfs/space-info.c +++ b/fs/btrfs/space-info.c @@ -373,11 +373,18 @@ static u64 calc_available_free_space(struct btrfs_fs_info *fs_info, * "optimal" chunk size based on the fs size. However when we actually * allocate the chunk we will strip this down further, making it no more * than 10% of the disk or 1G, whichever is smaller. + * + * On the zoned mode, we need to use zone_size (= + * data_sinfo->chunk_size) as it is. */ data_sinfo = btrfs_find_space_info(fs_info, BTRFS_BLOCK_GROUP_DATA); - data_chunk_size = min(data_sinfo->chunk_size, - mult_perc(fs_info->fs_devices->total_rw_bytes, 10)); - data_chunk_size = min_t(u64, data_chunk_size, SZ_1G); + if (!btrfs_is_zoned(fs_info)) { + data_chunk_size = min(data_sinfo->chunk_size, + mult_perc(fs_info->fs_devices->total_rw_bytes, 10)); + data_chunk_size = min_t(u64, data_chunk_size, SZ_1G); + } else { + data_chunk_size = data_sinfo->chunk_size; + } /* * Since data allocations immediately use block groups as part of the @@ -405,6 +412,17 @@ static u64 calc_available_free_space(struct btrfs_fs_info *fs_info, avail >>= 3; else avail >>= 1; + + /* + * On the zoned mode, we always allocate one zone as one chunk. + * Returning non-zone size alingned bytes here will result in + * less pressure for the async metadata reclaim process, and it + * will over-commit too much leading to ENOSPC. Align down to the + * zone size to avoid that. + */ + if (btrfs_is_zoned(fs_info)) + avail = ALIGN_DOWN(avail, fs_info->zone_size); + return avail; } diff --git a/fs/cachefiles/cache.c b/fs/cachefiles/cache.c index f449f7340aad..9fb06dc16520 100644 --- a/fs/cachefiles/cache.c +++ b/fs/cachefiles/cache.c @@ -8,6 +8,7 @@ #include <linux/slab.h> #include <linux/statfs.h> #include <linux/namei.h> +#include <trace/events/fscache.h> #include "internal.h" /* @@ -312,19 +313,59 @@ static void cachefiles_withdraw_objects(struct cachefiles_cache *cache) } /* - * Withdraw volumes. + * Withdraw fscache volumes. + */ +static void cachefiles_withdraw_fscache_volumes(struct cachefiles_cache *cache) +{ + struct list_head *cur; + struct cachefiles_volume *volume; + struct fscache_volume *vcookie; + + _enter(""); +retry: + spin_lock(&cache->object_list_lock); + list_for_each(cur, &cache->volumes) { + volume = list_entry(cur, struct cachefiles_volume, cache_link); + + if (atomic_read(&volume->vcookie->n_accesses) == 0) + continue; + + vcookie = fscache_try_get_volume(volume->vcookie, + fscache_volume_get_withdraw); + if (vcookie) { + spin_unlock(&cache->object_list_lock); + fscache_withdraw_volume(vcookie); + fscache_put_volume(vcookie, fscache_volume_put_withdraw); + goto retry; + } + } + spin_unlock(&cache->object_list_lock); + + _leave(""); +} + +/* + * Withdraw cachefiles volumes. */ static void cachefiles_withdraw_volumes(struct cachefiles_cache *cache) { _enter(""); for (;;) { + struct fscache_volume *vcookie = NULL; struct cachefiles_volume *volume = NULL; spin_lock(&cache->object_list_lock); if (!list_empty(&cache->volumes)) { volume = list_first_entry(&cache->volumes, struct cachefiles_volume, cache_link); + vcookie = fscache_try_get_volume(volume->vcookie, + fscache_volume_get_withdraw); + if (!vcookie) { + spin_unlock(&cache->object_list_lock); + cpu_relax(); + continue; + } list_del_init(&volume->cache_link); } spin_unlock(&cache->object_list_lock); @@ -332,6 +373,7 @@ static void cachefiles_withdraw_volumes(struct cachefiles_cache *cache) break; cachefiles_withdraw_volume(volume); + fscache_put_volume(vcookie, fscache_volume_put_withdraw); } _leave(""); @@ -371,6 +413,7 @@ void cachefiles_withdraw_cache(struct cachefiles_cache *cache) pr_info("File cache on %s unregistering\n", fscache->name); fscache_withdraw_cache(fscache); + cachefiles_withdraw_fscache_volumes(cache); /* we now have to destroy all the active objects pertaining to this * cache - which we do by passing them off to thread pool to be diff --git a/fs/cachefiles/daemon.c b/fs/cachefiles/daemon.c index 06cdf1a8a16f..89b11336a836 100644 --- a/fs/cachefiles/daemon.c +++ b/fs/cachefiles/daemon.c @@ -366,14 +366,14 @@ static __poll_t cachefiles_daemon_poll(struct file *file, if (cachefiles_in_ondemand_mode(cache)) { if (!xa_empty(&cache->reqs)) { - rcu_read_lock(); + xas_lock(&xas); xas_for_each_marked(&xas, req, ULONG_MAX, CACHEFILES_REQ_NEW) { if (!cachefiles_ondemand_is_reopening_read(req)) { mask |= EPOLLIN; break; } } - rcu_read_unlock(); + xas_unlock(&xas); } } else { if (test_bit(CACHEFILES_STATE_CHANGED, &cache->flags)) diff --git a/fs/cachefiles/internal.h b/fs/cachefiles/internal.h index 6845a90cdfcc..7b99bd98de75 100644 --- a/fs/cachefiles/internal.h +++ b/fs/cachefiles/internal.h @@ -48,6 +48,7 @@ enum cachefiles_object_state { CACHEFILES_ONDEMAND_OBJSTATE_CLOSE, /* Anonymous fd closed by daemon or initial state */ CACHEFILES_ONDEMAND_OBJSTATE_OPEN, /* Anonymous fd associated with object is available */ CACHEFILES_ONDEMAND_OBJSTATE_REOPENING, /* Object that was closed and is being reopened. */ + CACHEFILES_ONDEMAND_OBJSTATE_DROPPING, /* Object is being dropped. */ }; struct cachefiles_ondemand_info { @@ -128,6 +129,7 @@ struct cachefiles_cache { unsigned long req_id_next; struct xarray ondemand_ids; /* xarray for ondemand_id allocation */ u32 ondemand_id_next; + u32 msg_id_next; }; static inline bool cachefiles_in_ondemand_mode(struct cachefiles_cache *cache) @@ -335,6 +337,7 @@ cachefiles_ondemand_set_object_##_state(struct cachefiles_object *object) \ CACHEFILES_OBJECT_STATE_FUNCS(open, OPEN); CACHEFILES_OBJECT_STATE_FUNCS(close, CLOSE); CACHEFILES_OBJECT_STATE_FUNCS(reopening, REOPENING); +CACHEFILES_OBJECT_STATE_FUNCS(dropping, DROPPING); static inline bool cachefiles_ondemand_is_reopening_read(struct cachefiles_req *req) { diff --git a/fs/cachefiles/ondemand.c b/fs/cachefiles/ondemand.c index bce005f2b456..470c96658385 100644 --- a/fs/cachefiles/ondemand.c +++ b/fs/cachefiles/ondemand.c @@ -517,7 +517,8 @@ static int cachefiles_ondemand_send_req(struct cachefiles_object *object, */ xas_lock(&xas); - if (test_bit(CACHEFILES_DEAD, &cache->flags)) { + if (test_bit(CACHEFILES_DEAD, &cache->flags) || + cachefiles_ondemand_object_is_dropping(object)) { xas_unlock(&xas); ret = -EIO; goto out; @@ -527,20 +528,32 @@ static int cachefiles_ondemand_send_req(struct cachefiles_object *object, smp_mb(); if (opcode == CACHEFILES_OP_CLOSE && - !cachefiles_ondemand_object_is_open(object)) { + !cachefiles_ondemand_object_is_open(object)) { WARN_ON_ONCE(object->ondemand->ondemand_id == 0); xas_unlock(&xas); ret = -EIO; goto out; } - xas.xa_index = 0; + /* + * Cyclically find a free xas to avoid msg_id reuse that would + * cause the daemon to successfully copen a stale msg_id. + */ + xas.xa_index = cache->msg_id_next; xas_find_marked(&xas, UINT_MAX, XA_FREE_MARK); + if (xas.xa_node == XAS_RESTART) { + xas.xa_index = 0; + xas_find_marked(&xas, cache->msg_id_next - 1, XA_FREE_MARK); + } if (xas.xa_node == XAS_RESTART) xas_set_err(&xas, -EBUSY); + xas_store(&xas, req); - xas_clear_mark(&xas, XA_FREE_MARK); - xas_set_mark(&xas, CACHEFILES_REQ_NEW); + if (xas_valid(&xas)) { + cache->msg_id_next = xas.xa_index + 1; + xas_clear_mark(&xas, XA_FREE_MARK); + xas_set_mark(&xas, CACHEFILES_REQ_NEW); + } xas_unlock(&xas); } while (xas_nomem(&xas, GFP_KERNEL)); @@ -568,7 +581,8 @@ out: * If error occurs after creating the anonymous fd, * cachefiles_ondemand_fd_release() will set object to close. */ - if (opcode == CACHEFILES_OP_OPEN) + if (opcode == CACHEFILES_OP_OPEN && + !cachefiles_ondemand_object_is_dropping(object)) cachefiles_ondemand_set_object_close(object); kfree(req); return ret; @@ -667,8 +681,34 @@ int cachefiles_ondemand_init_object(struct cachefiles_object *object) void cachefiles_ondemand_clean_object(struct cachefiles_object *object) { + unsigned long index; + struct cachefiles_req *req; + struct cachefiles_cache *cache; + + if (!object->ondemand) + return; + cachefiles_ondemand_send_req(object, CACHEFILES_OP_CLOSE, 0, cachefiles_ondemand_init_close_req, NULL); + + if (!object->ondemand->ondemand_id) + return; + + /* Cancel all requests for the object that is being dropped. */ + cache = object->volume->cache; + xa_lock(&cache->reqs); + cachefiles_ondemand_set_object_dropping(object); + xa_for_each(&cache->reqs, index, req) { + if (req->object == object) { + req->error = -EIO; + complete(&req->done); + __xa_erase(&cache->reqs, index); + } + } + xa_unlock(&cache->reqs); + + /* Wait for ondemand_object_worker() to finish to avoid UAF. */ + cancel_work_sync(&object->ondemand->ondemand_work); } int cachefiles_ondemand_init_obj_info(struct cachefiles_object *object, diff --git a/fs/cachefiles/volume.c b/fs/cachefiles/volume.c index 89df0ba8ba5e..781aac4ef274 100644 --- a/fs/cachefiles/volume.c +++ b/fs/cachefiles/volume.c @@ -133,7 +133,6 @@ void cachefiles_free_volume(struct fscache_volume *vcookie) void cachefiles_withdraw_volume(struct cachefiles_volume *volume) { - fscache_withdraw_volume(volume->vcookie); cachefiles_set_volume_xattr(volume); __cachefiles_free_volume(volume); } diff --git a/fs/cachefiles/xattr.c b/fs/cachefiles/xattr.c index bcb6173943ee..4dd8a993c60a 100644 --- a/fs/cachefiles/xattr.c +++ b/fs/cachefiles/xattr.c @@ -110,9 +110,11 @@ int cachefiles_check_auxdata(struct cachefiles_object *object, struct file *file if (xlen == 0) xlen = vfs_getxattr(&nop_mnt_idmap, dentry, cachefiles_xattr_cache, buf, tlen); if (xlen != tlen) { - if (xlen < 0) + if (xlen < 0) { + ret = xlen; trace_cachefiles_vfs_error(object, file_inode(file), xlen, cachefiles_trace_getxattr_error); + } if (xlen == -EIO) cachefiles_io_error_obj( object, @@ -252,6 +254,7 @@ int cachefiles_check_volume_xattr(struct cachefiles_volume *volume) xlen = vfs_getxattr(&nop_mnt_idmap, dentry, cachefiles_xattr_cache, buf, len); if (xlen != len) { if (xlen < 0) { + ret = xlen; trace_cachefiles_vfs_error(NULL, d_inode(dentry), xlen, cachefiles_trace_getxattr_error); if (xlen == -EIO) diff --git a/fs/dcache.c b/fs/dcache.c index d58dc9e58f3b..4c144519aa70 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -355,7 +355,11 @@ static inline void __d_clear_type_and_inode(struct dentry *dentry) flags &= ~DCACHE_ENTRY_TYPE; WRITE_ONCE(dentry->d_flags, flags); dentry->d_inode = NULL; - if (flags & DCACHE_LRU_LIST) + /* + * The negative counter only tracks dentries on the LRU. Don't inc if + * d_lru is on another list. + */ + if ((flags & (DCACHE_LRU_LIST|DCACHE_SHRINK_LIST)) == DCACHE_LRU_LIST) this_cpu_inc(nr_dentry_negative); } @@ -1844,9 +1848,11 @@ static void __d_instantiate(struct dentry *dentry, struct inode *inode) spin_lock(&dentry->d_lock); /* - * Decrement negative dentry count if it was in the LRU list. + * The negative counter only tracks dentries on the LRU. Don't dec if + * d_lru is on another list. */ - if (dentry->d_flags & DCACHE_LRU_LIST) + if ((dentry->d_flags & + (DCACHE_LRU_LIST|DCACHE_SHRINK_LIST)) == DCACHE_LRU_LIST) this_cpu_dec(nr_dentry_negative); hlist_add_head(&dentry->d_u.d_alias, &inode->i_dentry); raw_write_seqcount_begin(&dentry->d_seq); diff --git a/fs/hfsplus/xattr.c b/fs/hfsplus/xattr.c index 5a400259ae74..9a1a93e3888b 100644 --- a/fs/hfsplus/xattr.c +++ b/fs/hfsplus/xattr.c @@ -696,7 +696,7 @@ ssize_t hfsplus_listxattr(struct dentry *dentry, char *buffer, size_t size) return err; } - strbuf = kmalloc(NLS_MAX_CHARSET_SIZE * HFSPLUS_ATTR_MAX_STRLEN + + strbuf = kzalloc(NLS_MAX_CHARSET_SIZE * HFSPLUS_ATTR_MAX_STRLEN + XATTR_MAC_OSX_PREFIX_LEN + 1, GFP_KERNEL); if (!strbuf) { res = -ENOMEM; diff --git a/fs/locks.c b/fs/locks.c index c360d1992d21..bdd94c32256f 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -1367,9 +1367,9 @@ retry: locks_wake_up_blocks(&left->c); } out: + trace_posix_lock_inode(inode, request, error); spin_unlock(&ctx->flc_lock); percpu_up_read(&file_rwsem); - trace_posix_lock_inode(inode, request, error); /* * Free any unused locks. */ diff --git a/fs/minix/namei.c b/fs/minix/namei.c index d6031acc34f0..a944a0f17b53 100644 --- a/fs/minix/namei.c +++ b/fs/minix/namei.c @@ -213,8 +213,7 @@ static int minix_rename(struct mnt_idmap *idmap, if (!new_de) goto out_dir; err = minix_set_link(new_de, new_page, old_inode); - kunmap(new_page); - put_page(new_page); + unmap_and_put_page(new_page, new_de); if (err) goto out_dir; inode_set_ctime_current(new_inode); diff --git a/fs/netfs/buffered_read.c b/fs/netfs/buffered_read.c index a6bb03bea920..4c0401dbbfcf 100644 --- a/fs/netfs/buffered_read.c +++ b/fs/netfs/buffered_read.c @@ -117,7 +117,7 @@ void netfs_rreq_unlock_folios(struct netfs_io_request *rreq) if (!test_bit(NETFS_RREQ_DONT_UNLOCK_FOLIOS, &rreq->flags)) { if (folio->index == rreq->no_unlock_folio && test_bit(NETFS_RREQ_NO_UNLOCK_FOLIO, &rreq->flags)) - _debug("no unlock"); + kdebug("no unlock"); else folio_unlock(folio); } @@ -204,7 +204,7 @@ void netfs_readahead(struct readahead_control *ractl) struct netfs_inode *ctx = netfs_inode(ractl->mapping->host); int ret; - _enter("%lx,%x", readahead_index(ractl), readahead_count(ractl)); + kenter("%lx,%x", readahead_index(ractl), readahead_count(ractl)); if (readahead_count(ractl) == 0) return; @@ -268,7 +268,7 @@ int netfs_read_folio(struct file *file, struct folio *folio) struct folio *sink = NULL; int ret; - _enter("%lx", folio->index); + kenter("%lx", folio->index); rreq = netfs_alloc_request(mapping, file, folio_file_pos(folio), folio_size(folio), @@ -508,7 +508,7 @@ retry: have_folio: *_folio = folio; - _leave(" = 0"); + kleave(" = 0"); return 0; error_put: @@ -518,7 +518,7 @@ error: folio_unlock(folio); folio_put(folio); } - _leave(" = %d", ret); + kleave(" = %d", ret); return ret; } EXPORT_SYMBOL(netfs_write_begin); @@ -536,7 +536,7 @@ int netfs_prefetch_for_write(struct file *file, struct folio *folio, size_t flen = folio_size(folio); int ret; - _enter("%zx @%llx", flen, start); + kenter("%zx @%llx", flen, start); ret = -ENOMEM; @@ -567,7 +567,7 @@ int netfs_prefetch_for_write(struct file *file, struct folio *folio, error_put: netfs_put_request(rreq, false, netfs_rreq_trace_put_discard); error: - _leave(" = %d", ret); + kleave(" = %d", ret); return ret; } diff --git a/fs/netfs/buffered_write.c b/fs/netfs/buffered_write.c index d583af7a2209..ecbc99ec7d36 100644 --- a/fs/netfs/buffered_write.c +++ b/fs/netfs/buffered_write.c @@ -56,7 +56,7 @@ static enum netfs_how_to_modify netfs_how_to_modify(struct netfs_inode *ctx, struct netfs_group *group = netfs_folio_group(folio); loff_t pos = folio_file_pos(folio); - _enter(""); + kenter(""); if (group != netfs_group && group != NETFS_FOLIO_COPY_TO_CACHE) return NETFS_FLUSH_CONTENT; @@ -272,12 +272,12 @@ ssize_t netfs_perform_write(struct kiocb *iocb, struct iov_iter *iter, */ howto = netfs_how_to_modify(ctx, file, folio, netfs_group, flen, offset, part, maybe_trouble); - _debug("howto %u", howto); + kdebug("howto %u", howto); switch (howto) { case NETFS_JUST_PREFETCH: ret = netfs_prefetch_for_write(file, folio, offset, part); if (ret < 0) { - _debug("prefetch = %zd", ret); + kdebug("prefetch = %zd", ret); goto error_folio_unlock; } break; @@ -418,7 +418,7 @@ out: } iocb->ki_pos += written; - _leave(" = %zd [%zd]", written, ret); + kleave(" = %zd [%zd]", written, ret); return written ? written : ret; error_folio_unlock: @@ -491,7 +491,7 @@ ssize_t netfs_file_write_iter(struct kiocb *iocb, struct iov_iter *from) struct netfs_inode *ictx = netfs_inode(inode); ssize_t ret; - _enter("%llx,%zx,%llx", iocb->ki_pos, iov_iter_count(from), i_size_read(inode)); + kenter("%llx,%zx,%llx", iocb->ki_pos, iov_iter_count(from), i_size_read(inode)); if (!iov_iter_count(from)) return 0; @@ -529,7 +529,7 @@ vm_fault_t netfs_page_mkwrite(struct vm_fault *vmf, struct netfs_group *netfs_gr vm_fault_t ret = VM_FAULT_RETRY; int err; - _enter("%lx", folio->index); + kenter("%lx", folio->index); sb_start_pagefault(inode->i_sb); diff --git a/fs/netfs/direct_read.c b/fs/netfs/direct_read.c index 10a1e4da6bda..b6debac6205f 100644 --- a/fs/netfs/direct_read.c +++ b/fs/netfs/direct_read.c @@ -33,7 +33,7 @@ ssize_t netfs_unbuffered_read_iter_locked(struct kiocb *iocb, struct iov_iter *i size_t orig_count = iov_iter_count(iter); bool async = !is_sync_kiocb(iocb); - _enter(""); + kenter(""); if (!orig_count) return 0; /* Don't update atime */ diff --git a/fs/netfs/direct_write.c b/fs/netfs/direct_write.c index 88f2adfab75e..792ef17bae21 100644 --- a/fs/netfs/direct_write.c +++ b/fs/netfs/direct_write.c @@ -37,7 +37,7 @@ ssize_t netfs_unbuffered_write_iter_locked(struct kiocb *iocb, struct iov_iter * size_t len = iov_iter_count(iter); bool async = !is_sync_kiocb(iocb); - _enter(""); + kenter(""); /* We're going to need a bounce buffer if what we transmit is going to * be different in some way to the source buffer, e.g. because it gets @@ -45,7 +45,7 @@ ssize_t netfs_unbuffered_write_iter_locked(struct kiocb *iocb, struct iov_iter * */ // TODO - _debug("uw %llx-%llx", start, end); + kdebug("uw %llx-%llx", start, end); wreq = netfs_create_write_req(iocb->ki_filp->f_mapping, iocb->ki_filp, start, iocb->ki_flags & IOCB_DIRECT ? @@ -96,7 +96,7 @@ ssize_t netfs_unbuffered_write_iter_locked(struct kiocb *iocb, struct iov_iter * wreq->cleanup = netfs_cleanup_dio_write; ret = netfs_unbuffered_write(wreq, is_sync_kiocb(iocb), wreq->len); if (ret < 0) { - _debug("begin = %zd", ret); + kdebug("begin = %zd", ret); goto out; } @@ -143,7 +143,7 @@ ssize_t netfs_unbuffered_write_iter(struct kiocb *iocb, struct iov_iter *from) loff_t pos = iocb->ki_pos; unsigned long long end = pos + iov_iter_count(from) - 1; - _enter("%llx,%zx,%llx", pos, iov_iter_count(from), i_size_read(inode)); + kenter("%llx,%zx,%llx", pos, iov_iter_count(from), i_size_read(inode)); if (!iov_iter_count(from)) return 0; diff --git a/fs/netfs/fscache_cache.c b/fs/netfs/fscache_cache.c index 9397ed39b0b4..288a73c3072d 100644 --- a/fs/netfs/fscache_cache.c +++ b/fs/netfs/fscache_cache.c @@ -237,7 +237,7 @@ int fscache_add_cache(struct fscache_cache *cache, { int n_accesses; - _enter("{%s,%s}", ops->name, cache->name); + kenter("{%s,%s}", ops->name, cache->name); BUG_ON(fscache_cache_state(cache) != FSCACHE_CACHE_IS_PREPARING); @@ -257,7 +257,7 @@ int fscache_add_cache(struct fscache_cache *cache, up_write(&fscache_addremove_sem); pr_notice("Cache \"%s\" added (type %s)\n", cache->name, ops->name); - _leave(" = 0 [%s]", cache->name); + kleave(" = 0 [%s]", cache->name); return 0; } EXPORT_SYMBOL(fscache_add_cache); diff --git a/fs/netfs/fscache_cookie.c b/fs/netfs/fscache_cookie.c index bce2492186d0..4d1e8bf4c615 100644 --- a/fs/netfs/fscache_cookie.c +++ b/fs/netfs/fscache_cookie.c @@ -456,7 +456,7 @@ struct fscache_cookie *__fscache_acquire_cookie( { struct fscache_cookie *cookie; - _enter("V=%x", volume->debug_id); + kenter("V=%x", volume->debug_id); if (!index_key || !index_key_len || index_key_len > 255 || aux_data_len > 255) return NULL; @@ -484,7 +484,7 @@ struct fscache_cookie *__fscache_acquire_cookie( trace_fscache_acquire(cookie); fscache_stat(&fscache_n_acquires_ok); - _leave(" = c=%08x", cookie->debug_id); + kleave(" = c=%08x", cookie->debug_id); return cookie; } EXPORT_SYMBOL(__fscache_acquire_cookie); @@ -505,7 +505,7 @@ static void fscache_perform_lookup(struct fscache_cookie *cookie) enum fscache_access_trace trace = fscache_access_lookup_cookie_end_failed; bool need_withdraw = false; - _enter(""); + kenter(""); if (!cookie->volume->cache_priv) { fscache_create_volume(cookie->volume, true); @@ -519,7 +519,7 @@ static void fscache_perform_lookup(struct fscache_cookie *cookie) if (cookie->state != FSCACHE_COOKIE_STATE_FAILED) fscache_set_cookie_state(cookie, FSCACHE_COOKIE_STATE_QUIESCENT); need_withdraw = true; - _leave(" [fail]"); + kleave(" [fail]"); goto out; } @@ -572,7 +572,7 @@ void __fscache_use_cookie(struct fscache_cookie *cookie, bool will_modify) bool queue = false; int n_active; - _enter("c=%08x", cookie->debug_id); + kenter("c=%08x", cookie->debug_id); if (WARN(test_bit(FSCACHE_COOKIE_RELINQUISHED, &cookie->flags), "Trying to use relinquished cookie\n")) @@ -636,7 +636,7 @@ again: spin_unlock(&cookie->lock); if (queue) fscache_queue_cookie(cookie, fscache_cookie_get_use_work); - _leave(""); + kleave(""); } EXPORT_SYMBOL(__fscache_use_cookie); @@ -702,7 +702,7 @@ static void fscache_cookie_state_machine(struct fscache_cookie *cookie) enum fscache_cookie_state state; bool wake = false; - _enter("c=%x", cookie->debug_id); + kenter("c=%x", cookie->debug_id); again: spin_lock(&cookie->lock); @@ -820,7 +820,7 @@ out: spin_unlock(&cookie->lock); if (wake) wake_up_cookie_state(cookie); - _leave(""); + kleave(""); } static void fscache_cookie_worker(struct work_struct *work) @@ -867,7 +867,7 @@ static void fscache_cookie_lru_do_one(struct fscache_cookie *cookie) set_bit(FSCACHE_COOKIE_DO_LRU_DISCARD, &cookie->flags); spin_unlock(&cookie->lock); fscache_stat(&fscache_n_cookies_lru_expired); - _debug("lru c=%x", cookie->debug_id); + kdebug("lru c=%x", cookie->debug_id); __fscache_withdraw_cookie(cookie); } @@ -971,7 +971,7 @@ void __fscache_relinquish_cookie(struct fscache_cookie *cookie, bool retire) if (retire) fscache_stat(&fscache_n_relinquishes_retire); - _enter("c=%08x{%d},%d", + kenter("c=%08x{%d},%d", cookie->debug_id, atomic_read(&cookie->n_active), retire); if (WARN(test_and_set_bit(FSCACHE_COOKIE_RELINQUISHED, &cookie->flags), @@ -1050,7 +1050,7 @@ void __fscache_invalidate(struct fscache_cookie *cookie, { bool is_caching; - _enter("c=%x", cookie->debug_id); + kenter("c=%x", cookie->debug_id); fscache_stat(&fscache_n_invalidates); @@ -1072,7 +1072,7 @@ void __fscache_invalidate(struct fscache_cookie *cookie, case FSCACHE_COOKIE_STATE_INVALIDATING: /* is_still_valid will catch it */ default: spin_unlock(&cookie->lock); - _leave(" [no %u]", cookie->state); + kleave(" [no %u]", cookie->state); return; case FSCACHE_COOKIE_STATE_LOOKING_UP: @@ -1081,7 +1081,7 @@ void __fscache_invalidate(struct fscache_cookie *cookie, fallthrough; case FSCACHE_COOKIE_STATE_CREATING: spin_unlock(&cookie->lock); - _leave(" [look %x]", cookie->inval_counter); + kleave(" [look %x]", cookie->inval_counter); return; case FSCACHE_COOKIE_STATE_ACTIVE: @@ -1094,7 +1094,7 @@ void __fscache_invalidate(struct fscache_cookie *cookie, if (is_caching) fscache_queue_cookie(cookie, fscache_cookie_get_inval_work); - _leave(" [inv]"); + kleave(" [inv]"); return; } } diff --git a/fs/netfs/fscache_io.c b/fs/netfs/fscache_io.c index 38637e5c9b57..bf4eaeec44fb 100644 --- a/fs/netfs/fscache_io.c +++ b/fs/netfs/fscache_io.c @@ -28,12 +28,12 @@ bool fscache_wait_for_operation(struct netfs_cache_resources *cres, again: if (!fscache_cache_is_live(cookie->volume->cache)) { - _leave(" [broken]"); + kleave(" [broken]"); return false; } state = fscache_cookie_state(cookie); - _enter("c=%08x{%u},%x", cookie->debug_id, state, want_state); + kenter("c=%08x{%u},%x", cookie->debug_id, state, want_state); switch (state) { case FSCACHE_COOKIE_STATE_CREATING: @@ -52,7 +52,7 @@ again: case FSCACHE_COOKIE_STATE_DROPPED: case FSCACHE_COOKIE_STATE_RELINQUISHING: default: - _leave(" [not live]"); + kleave(" [not live]"); return false; } @@ -92,7 +92,7 @@ again: spin_lock(&cookie->lock); state = fscache_cookie_state(cookie); - _enter("c=%08x{%u},%x", cookie->debug_id, state, want_state); + kenter("c=%08x{%u},%x", cookie->debug_id, state, want_state); switch (state) { case FSCACHE_COOKIE_STATE_LOOKING_UP: @@ -140,7 +140,7 @@ failed: cres->cache_priv = NULL; cres->ops = NULL; fscache_end_cookie_access(cookie, fscache_access_io_not_live); - _leave(" = -ENOBUFS"); + kleave(" = -ENOBUFS"); return -ENOBUFS; } @@ -224,7 +224,7 @@ void __fscache_write_to_cache(struct fscache_cookie *cookie, if (len == 0) goto abandon; - _enter("%llx,%zx", start, len); + kenter("%llx,%zx", start, len); wreq = kzalloc(sizeof(struct fscache_write_request), GFP_NOFS); if (!wreq) diff --git a/fs/netfs/fscache_main.c b/fs/netfs/fscache_main.c index 42e98bb523e3..bf9b33d26e31 100644 --- a/fs/netfs/fscache_main.c +++ b/fs/netfs/fscache_main.c @@ -99,7 +99,7 @@ error_wq: */ void __exit fscache_exit(void) { - _enter(""); + kenter(""); kmem_cache_destroy(fscache_cookie_jar); fscache_proc_cleanup(); diff --git a/fs/netfs/fscache_volume.c b/fs/netfs/fscache_volume.c index cdf991bdd9de..2e2a405ca9b0 100644 --- a/fs/netfs/fscache_volume.c +++ b/fs/netfs/fscache_volume.c @@ -27,6 +27,19 @@ struct fscache_volume *fscache_get_volume(struct fscache_volume *volume, return volume; } +struct fscache_volume *fscache_try_get_volume(struct fscache_volume *volume, + enum fscache_volume_trace where) +{ + int ref; + + if (!__refcount_inc_not_zero(&volume->ref, &ref)) + return NULL; + + trace_fscache_volume(volume->debug_id, ref + 1, where); + return volume; +} +EXPORT_SYMBOL(fscache_try_get_volume); + static void fscache_see_volume(struct fscache_volume *volume, enum fscache_volume_trace where) { @@ -251,7 +264,7 @@ static struct fscache_volume *fscache_alloc_volume(const char *volume_key, fscache_see_volume(volume, fscache_volume_new_acquire); fscache_stat(&fscache_n_volumes); up_write(&fscache_addremove_sem); - _leave(" = v=%x", volume->debug_id); + kleave(" = v=%x", volume->debug_id); return volume; err_vol: @@ -420,6 +433,7 @@ void fscache_put_volume(struct fscache_volume *volume, fscache_free_volume(volume); } } +EXPORT_SYMBOL(fscache_put_volume); /* * Relinquish a volume representation cookie. @@ -452,7 +466,7 @@ void fscache_withdraw_volume(struct fscache_volume *volume) { int n_accesses; - _debug("withdraw V=%x", volume->debug_id); + kdebug("withdraw V=%x", volume->debug_id); /* Allow wakeups on dec-to-0 */ n_accesses = atomic_dec_return(&volume->n_accesses); diff --git a/fs/netfs/internal.h b/fs/netfs/internal.h index acd9ca14e264..21e46bc9aa49 100644 --- a/fs/netfs/internal.h +++ b/fs/netfs/internal.h @@ -34,7 +34,6 @@ int netfs_begin_read(struct netfs_io_request *rreq, bool sync); /* * main.c */ -extern unsigned int netfs_debug; extern struct list_head netfs_io_requests; extern spinlock_t netfs_proc_lock; extern mempool_t netfs_request_pool; @@ -344,8 +343,6 @@ extern const struct seq_operations fscache_volumes_seq_ops; struct fscache_volume *fscache_get_volume(struct fscache_volume *volume, enum fscache_volume_trace where); -void fscache_put_volume(struct fscache_volume *volume, - enum fscache_volume_trace where); bool fscache_begin_volume_access(struct fscache_volume *volume, struct fscache_cookie *cookie, enum fscache_access_trace why); @@ -356,42 +353,12 @@ void fscache_create_volume(struct fscache_volume *volume, bool wait); * debug tracing */ #define dbgprintk(FMT, ...) \ - printk("[%-6.6s] "FMT"\n", current->comm, ##__VA_ARGS__) + pr_debug("[%-6.6s] "FMT"\n", current->comm, ##__VA_ARGS__) #define kenter(FMT, ...) dbgprintk("==> %s("FMT")", __func__, ##__VA_ARGS__) #define kleave(FMT, ...) dbgprintk("<== %s()"FMT"", __func__, ##__VA_ARGS__) #define kdebug(FMT, ...) dbgprintk(FMT, ##__VA_ARGS__) -#ifdef __KDEBUG -#define _enter(FMT, ...) kenter(FMT, ##__VA_ARGS__) -#define _leave(FMT, ...) kleave(FMT, ##__VA_ARGS__) -#define _debug(FMT, ...) kdebug(FMT, ##__VA_ARGS__) - -#elif defined(CONFIG_NETFS_DEBUG) -#define _enter(FMT, ...) \ -do { \ - if (netfs_debug) \ - kenter(FMT, ##__VA_ARGS__); \ -} while (0) - -#define _leave(FMT, ...) \ -do { \ - if (netfs_debug) \ - kleave(FMT, ##__VA_ARGS__); \ -} while (0) - -#define _debug(FMT, ...) \ -do { \ - if (netfs_debug) \ - kdebug(FMT, ##__VA_ARGS__); \ -} while (0) - -#else -#define _enter(FMT, ...) no_printk("==> %s("FMT")", __func__, ##__VA_ARGS__) -#define _leave(FMT, ...) no_printk("<== %s()"FMT"", __func__, ##__VA_ARGS__) -#define _debug(FMT, ...) no_printk(FMT, ##__VA_ARGS__) -#endif - /* * assertions */ diff --git a/fs/netfs/io.c b/fs/netfs/io.c index c93851b98368..c7576481c321 100644 --- a/fs/netfs/io.c +++ b/fs/netfs/io.c @@ -130,7 +130,7 @@ static void netfs_reset_subreq_iter(struct netfs_io_request *rreq, if (count == remaining) return; - _debug("R=%08x[%u] ITER RESUB-MISMATCH %zx != %zx-%zx-%llx %x\n", + kdebug("R=%08x[%u] ITER RESUB-MISMATCH %zx != %zx-%zx-%llx %x\n", rreq->debug_id, subreq->debug_index, iov_iter_count(&subreq->io_iter), subreq->transferred, subreq->len, rreq->i_size, @@ -326,7 +326,7 @@ void netfs_subreq_terminated(struct netfs_io_subrequest *subreq, struct netfs_io_request *rreq = subreq->rreq; int u; - _enter("R=%x[%x]{%llx,%lx},%zd", + kenter("R=%x[%x]{%llx,%lx},%zd", rreq->debug_id, subreq->debug_index, subreq->start, subreq->flags, transferred_or_error); @@ -435,7 +435,7 @@ netfs_rreq_prepare_read(struct netfs_io_request *rreq, struct netfs_inode *ictx = netfs_inode(rreq->inode); size_t lsize; - _enter("%llx-%llx,%llx", subreq->start, subreq->start + subreq->len, rreq->i_size); + kenter("%llx-%llx,%llx", subreq->start, subreq->start + subreq->len, rreq->i_size); if (rreq->origin != NETFS_DIO_READ) { source = netfs_cache_prepare_read(subreq, rreq->i_size); @@ -518,7 +518,7 @@ static bool netfs_rreq_submit_slice(struct netfs_io_request *rreq, subreq->start = rreq->start + rreq->submitted; subreq->len = io_iter->count; - _debug("slice %llx,%zx,%llx", subreq->start, subreq->len, rreq->submitted); + kdebug("slice %llx,%zx,%llx", subreq->start, subreq->len, rreq->submitted); list_add_tail(&subreq->rreq_link, &rreq->subrequests); /* Call out to the cache to find out what it can do with the remaining @@ -570,7 +570,7 @@ int netfs_begin_read(struct netfs_io_request *rreq, bool sync) struct iov_iter io_iter; int ret; - _enter("R=%x %llx-%llx", + kenter("R=%x %llx-%llx", rreq->debug_id, rreq->start, rreq->start + rreq->len - 1); if (rreq->len == 0) { @@ -593,7 +593,7 @@ int netfs_begin_read(struct netfs_io_request *rreq, bool sync) atomic_set(&rreq->nr_outstanding, 1); io_iter = rreq->io_iter; do { - _debug("submit %llx + %llx >= %llx", + kdebug("submit %llx + %llx >= %llx", rreq->start, rreq->submitted, rreq->i_size); if (rreq->origin == NETFS_DIO_READ && rreq->start + rreq->submitted >= rreq->i_size) diff --git a/fs/netfs/main.c b/fs/netfs/main.c index 5f0f438e5d21..db824c372842 100644 --- a/fs/netfs/main.c +++ b/fs/netfs/main.c @@ -20,10 +20,6 @@ MODULE_LICENSE("GPL"); EXPORT_TRACEPOINT_SYMBOL(netfs_sreq); -unsigned netfs_debug; -module_param_named(debug, netfs_debug, uint, S_IWUSR | S_IRUGO); -MODULE_PARM_DESC(netfs_debug, "Netfs support debugging mask"); - static struct kmem_cache *netfs_request_slab; static struct kmem_cache *netfs_subrequest_slab; mempool_t netfs_request_pool; diff --git a/fs/netfs/misc.c b/fs/netfs/misc.c index 83e644bd518f..172808e83ca8 100644 --- a/fs/netfs/misc.c +++ b/fs/netfs/misc.c @@ -26,7 +26,7 @@ bool netfs_dirty_folio(struct address_space *mapping, struct folio *folio) struct fscache_cookie *cookie = netfs_i_cookie(ictx); bool need_use = false; - _enter(""); + kenter(""); if (!filemap_dirty_folio(mapping, folio)) return false; @@ -99,7 +99,7 @@ void netfs_invalidate_folio(struct folio *folio, size_t offset, size_t length) struct netfs_folio *finfo; size_t flen = folio_size(folio); - _enter("{%lx},%zx,%zx", folio->index, offset, length); + kenter("{%lx},%zx,%zx", folio->index, offset, length); if (!folio_test_private(folio)) return; diff --git a/fs/netfs/write_collect.c b/fs/netfs/write_collect.c index 426cf87aaf2e..488147439fe0 100644 --- a/fs/netfs/write_collect.c +++ b/fs/netfs/write_collect.c @@ -161,7 +161,7 @@ static void netfs_retry_write_stream(struct netfs_io_request *wreq, { struct list_head *next; - _enter("R=%x[%x:]", wreq->debug_id, stream->stream_nr); + kenter("R=%x[%x:]", wreq->debug_id, stream->stream_nr); if (list_empty(&stream->subrequests)) return; @@ -374,7 +374,7 @@ static void netfs_collect_write_results(struct netfs_io_request *wreq) unsigned int notes; int s; - _enter("%llx-%llx", wreq->start, wreq->start + wreq->len); + kenter("%llx-%llx", wreq->start, wreq->start + wreq->len); trace_netfs_collect(wreq); trace_netfs_rreq(wreq, netfs_rreq_trace_collect); @@ -409,7 +409,7 @@ reassess_streams: front = stream->front; while (front) { trace_netfs_collect_sreq(wreq, front); - //_debug("sreq [%x] %llx %zx/%zx", + //kdebug("sreq [%x] %llx %zx/%zx", // front->debug_index, front->start, front->transferred, front->len); /* Stall if there may be a discontinuity. */ @@ -598,7 +598,7 @@ reassess_streams: out: netfs_put_group_many(wreq->group, wreq->nr_group_rel); wreq->nr_group_rel = 0; - _leave(" = %x", notes); + kleave(" = %x", notes); return; need_retry: @@ -606,7 +606,7 @@ need_retry: * that any partially completed op will have had any wholly transferred * folios removed from it. */ - _debug("retry"); + kdebug("retry"); netfs_retry_writes(wreq); goto out; } @@ -621,7 +621,7 @@ void netfs_write_collection_worker(struct work_struct *work) size_t transferred; int s; - _enter("R=%x", wreq->debug_id); + kenter("R=%x", wreq->debug_id); netfs_see_request(wreq, netfs_rreq_trace_see_work); if (!test_bit(NETFS_RREQ_IN_PROGRESS, &wreq->flags)) { @@ -684,7 +684,7 @@ void netfs_write_collection_worker(struct work_struct *work) if (wreq->origin == NETFS_DIO_WRITE) inode_dio_end(wreq->inode); - _debug("finished"); + kdebug("finished"); trace_netfs_rreq(wreq, netfs_rreq_trace_wake_ip); clear_bit_unlock(NETFS_RREQ_IN_PROGRESS, &wreq->flags); wake_up_bit(&wreq->flags, NETFS_RREQ_IN_PROGRESS); @@ -744,7 +744,7 @@ void netfs_write_subrequest_terminated(void *_op, ssize_t transferred_or_error, struct netfs_io_request *wreq = subreq->rreq; struct netfs_io_stream *stream = &wreq->io_streams[subreq->stream_nr]; - _enter("%x[%x] %zd", wreq->debug_id, subreq->debug_index, transferred_or_error); + kenter("%x[%x] %zd", wreq->debug_id, subreq->debug_index, transferred_or_error); switch (subreq->source) { case NETFS_UPLOAD_TO_SERVER: diff --git a/fs/netfs/write_issue.c b/fs/netfs/write_issue.c index ec6cf8707fb0..d7c971df8866 100644 --- a/fs/netfs/write_issue.c +++ b/fs/netfs/write_issue.c @@ -99,7 +99,7 @@ struct netfs_io_request *netfs_create_write_req(struct address_space *mapping, if (IS_ERR(wreq)) return wreq; - _enter("R=%x", wreq->debug_id); + kenter("R=%x", wreq->debug_id); ictx = netfs_inode(wreq->inode); if (test_bit(NETFS_RREQ_WRITE_TO_CACHE, &wreq->flags)) @@ -159,7 +159,7 @@ static void netfs_prepare_write(struct netfs_io_request *wreq, subreq->max_nr_segs = INT_MAX; subreq->stream_nr = stream->stream_nr; - _enter("R=%x[%x]", wreq->debug_id, subreq->debug_index); + kenter("R=%x[%x]", wreq->debug_id, subreq->debug_index); trace_netfs_sreq_ref(wreq->debug_id, subreq->debug_index, refcount_read(&subreq->ref), @@ -215,7 +215,7 @@ static void netfs_do_issue_write(struct netfs_io_stream *stream, { struct netfs_io_request *wreq = subreq->rreq; - _enter("R=%x[%x],%zx", wreq->debug_id, subreq->debug_index, subreq->len); + kenter("R=%x[%x],%zx", wreq->debug_id, subreq->debug_index, subreq->len); if (test_bit(NETFS_SREQ_FAILED, &subreq->flags)) return netfs_write_subrequest_terminated(subreq, subreq->error, false); @@ -272,11 +272,11 @@ int netfs_advance_write(struct netfs_io_request *wreq, size_t part; if (!stream->avail) { - _leave("no write"); + kleave("no write"); return len; } - _enter("R=%x[%x]", wreq->debug_id, subreq ? subreq->debug_index : 0); + kenter("R=%x[%x]", wreq->debug_id, subreq ? subreq->debug_index : 0); if (subreq && start != subreq->start + subreq->len) { netfs_issue_write(wreq, stream); @@ -288,7 +288,7 @@ int netfs_advance_write(struct netfs_io_request *wreq, subreq = stream->construct; part = min(subreq->max_len - subreq->len, len); - _debug("part %zx/%zx %zx/%zx", subreq->len, subreq->max_len, part, len); + kdebug("part %zx/%zx %zx/%zx", subreq->len, subreq->max_len, part, len); subreq->len += part; subreq->nr_segs++; @@ -319,7 +319,7 @@ static int netfs_write_folio(struct netfs_io_request *wreq, bool to_eof = false, streamw = false; bool debug = false; - _enter(""); + kenter(""); /* netfs_perform_write() may shift i_size around the page or from out * of the page to beyond it, but cannot move i_size into or through the @@ -329,7 +329,7 @@ static int netfs_write_folio(struct netfs_io_request *wreq, if (fpos >= i_size) { /* mmap beyond eof. */ - _debug("beyond eof"); + kdebug("beyond eof"); folio_start_writeback(folio); folio_unlock(folio); wreq->nr_group_rel += netfs_folio_written_back(folio); @@ -363,7 +363,7 @@ static int netfs_write_folio(struct netfs_io_request *wreq, } flen -= foff; - _debug("folio %zx %zx %zx", foff, flen, fsize); + kdebug("folio %zx %zx %zx", foff, flen, fsize); /* Deal with discontinuities in the stream of dirty pages. These can * arise from a number of sources: @@ -487,7 +487,7 @@ static int netfs_write_folio(struct netfs_io_request *wreq, for (int s = 0; s < NR_IO_STREAMS; s++) netfs_issue_write(wreq, &wreq->io_streams[s]); - _leave(" = 0"); + kleave(" = 0"); return 0; } @@ -522,7 +522,7 @@ int netfs_writepages(struct address_space *mapping, netfs_stat(&netfs_n_wh_writepages); do { - _debug("wbiter %lx %llx", folio->index, wreq->start + wreq->submitted); + kdebug("wbiter %lx %llx", folio->index, wreq->start + wreq->submitted); /* It appears we don't have to handle cyclic writeback wrapping. */ WARN_ON_ONCE(wreq && folio_pos(folio) < wreq->start + wreq->submitted); @@ -546,14 +546,14 @@ int netfs_writepages(struct address_space *mapping, mutex_unlock(&ictx->wb_lock); netfs_put_request(wreq, false, netfs_rreq_trace_put_return); - _leave(" = %d", error); + kleave(" = %d", error); return error; couldnt_start: netfs_kill_dirty_pages(mapping, wbc, folio); out: mutex_unlock(&ictx->wb_lock); - _leave(" = %d", error); + kleave(" = %d", error); return error; } EXPORT_SYMBOL(netfs_writepages); @@ -590,7 +590,7 @@ int netfs_advance_writethrough(struct netfs_io_request *wreq, struct writeback_c struct folio *folio, size_t copied, bool to_page_end, struct folio **writethrough_cache) { - _enter("R=%x ic=%zu ws=%u cp=%zu tp=%u", + kenter("R=%x ic=%zu ws=%u cp=%zu tp=%u", wreq->debug_id, wreq->iter.count, wreq->wsize, copied, to_page_end); if (!*writethrough_cache) { @@ -624,7 +624,7 @@ int netfs_end_writethrough(struct netfs_io_request *wreq, struct writeback_contr struct netfs_inode *ictx = netfs_inode(wreq->inode); int ret; - _enter("R=%x", wreq->debug_id); + kenter("R=%x", wreq->debug_id); if (writethrough_cache) netfs_write_folio(wreq, wbc, writethrough_cache); @@ -657,7 +657,7 @@ int netfs_unbuffered_write(struct netfs_io_request *wreq, bool may_wait, size_t loff_t start = wreq->start; int error = 0; - _enter("%zx", len); + kenter("%zx", len); if (wreq->origin == NETFS_DIO_WRITE) inode_dio_begin(wreq->inode); @@ -665,7 +665,7 @@ int netfs_unbuffered_write(struct netfs_io_request *wreq, bool may_wait, size_t while (len) { // TODO: Prepare content encryption - _debug("unbuffered %zx", len); + kdebug("unbuffered %zx", len); part = netfs_advance_write(wreq, upload, start, len, false); start += part; len -= part; @@ -684,6 +684,6 @@ int netfs_unbuffered_write(struct netfs_io_request *wreq, bool may_wait, size_t if (list_empty(&upload->subrequests)) netfs_wake_write_collector(wreq, false); - _leave(" = %d", error); + kleave(" = %d", error); return error; } diff --git a/fs/nilfs2/dir.c b/fs/nilfs2/dir.c index dddfa604491a..4a29b0138d75 100644 --- a/fs/nilfs2/dir.c +++ b/fs/nilfs2/dir.c @@ -383,11 +383,39 @@ found: struct nilfs_dir_entry *nilfs_dotdot(struct inode *dir, struct folio **foliop) { - struct nilfs_dir_entry *de = nilfs_get_folio(dir, 0, foliop); + struct folio *folio; + struct nilfs_dir_entry *de, *next_de; + size_t limit; + char *msg; + de = nilfs_get_folio(dir, 0, &folio); if (IS_ERR(de)) return NULL; - return nilfs_next_entry(de); + + limit = nilfs_last_byte(dir, 0); /* is a multiple of chunk size */ + if (unlikely(!limit || le64_to_cpu(de->inode) != dir->i_ino || + !nilfs_match(1, ".", de))) { + msg = "missing '.'"; + goto fail; + } + + next_de = nilfs_next_entry(de); + /* + * If "next_de" has not reached the end of the chunk, there is + * at least one more record. Check whether it matches "..". + */ + if (unlikely((char *)next_de == (char *)de + nilfs_chunk_size(dir) || + !nilfs_match(2, "..", next_de))) { + msg = "missing '..'"; + goto fail; + } + *foliop = folio; + return next_de; + +fail: + nilfs_error(dir->i_sb, "directory #%lu %s", dir->i_ino, msg); + folio_release_kmap(folio, de); + return NULL; } ino_t nilfs_inode_by_name(struct inode *dir, const struct qstr *qstr) diff --git a/fs/smb/client/file.c b/fs/smb/client/file.c index f1f2573bb18d..1374635e89fa 100644 --- a/fs/smb/client/file.c +++ b/fs/smb/client/file.c @@ -246,35 +246,6 @@ static int cifs_init_request(struct netfs_io_request *rreq, struct file *file) } /* - * Expand the size of a readahead to the size of the rsize, if at least as - * large as a page, allowing for the possibility that rsize is not pow-2 - * aligned. - */ -static void cifs_expand_readahead(struct netfs_io_request *rreq) -{ - unsigned int rsize = rreq->rsize; - loff_t misalignment, i_size = i_size_read(rreq->inode); - - if (rsize < PAGE_SIZE) - return; - - if (rsize < INT_MAX) - rsize = roundup_pow_of_two(rsize); - else - rsize = ((unsigned int)INT_MAX + 1) / 2; - - misalignment = rreq->start & (rsize - 1); - if (misalignment) { - rreq->start -= misalignment; - rreq->len += misalignment; - } - - rreq->len = round_up(rreq->len, rsize); - if (rreq->start < i_size && rreq->len > i_size - rreq->start) - rreq->len = i_size - rreq->start; -} - -/* * Completion of a request operation. */ static void cifs_rreq_done(struct netfs_io_request *rreq) @@ -329,7 +300,6 @@ const struct netfs_request_ops cifs_req_ops = { .init_request = cifs_init_request, .free_request = cifs_free_request, .free_subrequest = cifs_free_subrequest, - .expand_readahead = cifs_expand_readahead, .clamp_length = cifs_clamp_length, .issue_read = cifs_req_issue_read, .done = cifs_rreq_done, diff --git a/fs/smb/common/smb2pdu.h b/fs/smb/common/smb2pdu.h index 8d10be1fe18a..c3ee42188d25 100644 --- a/fs/smb/common/smb2pdu.h +++ b/fs/smb/common/smb2pdu.h @@ -917,6 +917,40 @@ struct smb2_query_directory_rsp { __u8 Buffer[]; } __packed; +/* DeviceType Flags */ +#define FILE_DEVICE_CD_ROM 0x00000002 +#define FILE_DEVICE_CD_ROM_FILE_SYSTEM 0x00000003 +#define FILE_DEVICE_DFS 0x00000006 +#define FILE_DEVICE_DISK 0x00000007 +#define FILE_DEVICE_DISK_FILE_SYSTEM 0x00000008 +#define FILE_DEVICE_FILE_SYSTEM 0x00000009 +#define FILE_DEVICE_NAMED_PIPE 0x00000011 +#define FILE_DEVICE_NETWORK 0x00000012 +#define FILE_DEVICE_NETWORK_FILE_SYSTEM 0x00000014 +#define FILE_DEVICE_NULL 0x00000015 +#define FILE_DEVICE_PARALLEL_PORT 0x00000016 +#define FILE_DEVICE_PRINTER 0x00000018 +#define FILE_DEVICE_SERIAL_PORT 0x0000001b +#define FILE_DEVICE_STREAMS 0x0000001e +#define FILE_DEVICE_TAPE 0x0000001f +#define FILE_DEVICE_TAPE_FILE_SYSTEM 0x00000020 +#define FILE_DEVICE_VIRTUAL_DISK 0x00000024 +#define FILE_DEVICE_NETWORK_REDIRECTOR 0x00000028 + +/* Device Characteristics */ +#define FILE_REMOVABLE_MEDIA 0x00000001 +#define FILE_READ_ONLY_DEVICE 0x00000002 +#define FILE_FLOPPY_DISKETTE 0x00000004 +#define FILE_WRITE_ONCE_MEDIA 0x00000008 +#define FILE_REMOTE_DEVICE 0x00000010 +#define FILE_DEVICE_IS_MOUNTED 0x00000020 +#define FILE_VIRTUAL_VOLUME 0x00000040 +#define FILE_DEVICE_SECURE_OPEN 0x00000100 +#define FILE_CHARACTERISTIC_TS_DEVICE 0x00001000 +#define FILE_CHARACTERISTIC_WEBDAV_DEVICE 0x00002000 +#define FILE_PORTABLE_DEVICE 0x00004000 +#define FILE_DEVICE_ALLOW_APPCONTAINER_TRAVERSAL 0x00020000 + /* * Maximum number of iovs we need for a set-info request. * The largest one is rename/hardlink diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c index e7e07891781b..840c71c66b30 100644 --- a/fs/smb/server/smb2pdu.c +++ b/fs/smb/server/smb2pdu.c @@ -2051,15 +2051,22 @@ out_err1: * @access: file access flags * @disposition: file disposition flags * @may_flags: set with MAY_ flags + * @is_dir: is creating open flags for directory * * Return: file open flags */ static int smb2_create_open_flags(bool file_present, __le32 access, __le32 disposition, - int *may_flags) + int *may_flags, + bool is_dir) { int oflags = O_NONBLOCK | O_LARGEFILE; + if (is_dir) { + access &= ~FILE_WRITE_DESIRE_ACCESS_LE; + ksmbd_debug(SMB, "Discard write access to a directory\n"); + } + if (access & FILE_READ_DESIRED_ACCESS_LE && access & FILE_WRITE_DESIRE_ACCESS_LE) { oflags |= O_RDWR; @@ -3167,7 +3174,9 @@ int smb2_open(struct ksmbd_work *work) open_flags = smb2_create_open_flags(file_present, daccess, req->CreateDisposition, - &may_flags); + &may_flags, + req->CreateOptions & FILE_DIRECTORY_FILE_LE || + (file_present && S_ISDIR(d_inode(path.dentry)->i_mode))); if (!test_tree_conn_flag(tcon, KSMBD_TREE_CONN_FLAG_WRITABLE)) { if (open_flags & (O_CREAT | O_TRUNC)) { @@ -5314,8 +5323,13 @@ static int smb2_get_info_filesystem(struct ksmbd_work *work, info = (struct filesystem_device_info *)rsp->Buffer; - info->DeviceType = cpu_to_le32(stfs.f_type); - info->DeviceCharacteristics = cpu_to_le32(0x00000020); + info->DeviceType = cpu_to_le32(FILE_DEVICE_DISK); + info->DeviceCharacteristics = + cpu_to_le32(FILE_DEVICE_IS_MOUNTED); + if (!test_tree_conn_flag(work->tcon, + KSMBD_TREE_CONN_FLAG_WRITABLE)) + info->DeviceCharacteristics |= + cpu_to_le32(FILE_READ_ONLY_DEVICE); rsp->OutputBufferLength = cpu_to_le32(8); break; } diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c index eee7320ab0b0..17e409ceaa33 100644 --- a/fs/userfaultfd.c +++ b/fs/userfaultfd.c @@ -2057,7 +2057,7 @@ static int userfaultfd_api(struct userfaultfd_ctx *ctx, goto out; features = uffdio_api.features; ret = -EINVAL; - if (uffdio_api.api != UFFD_API || (features & ~UFFD_API_FEATURES)) + if (uffdio_api.api != UFFD_API) goto err_out; ret = -EPERM; if ((features & UFFD_FEATURE_EVENT_FORK) && !capable(CAP_SYS_PTRACE)) @@ -2081,6 +2081,11 @@ static int userfaultfd_api(struct userfaultfd_ctx *ctx, uffdio_api.features &= ~UFFD_FEATURE_WP_UNPOPULATED; uffdio_api.features &= ~UFFD_FEATURE_WP_ASYNC; #endif + + ret = -EINVAL; + if (features & ~uffdio_api.features) + goto err_out; + uffdio_api.ioctls = UFFD_API_IOCTLS; ret = -EFAULT; if (copy_to_user(buf, &uffdio_api, sizeof(uffdio_api))) diff --git a/include/linux/closure.h b/include/linux/closure.h index 59b8c06b11ff..2af44427107d 100644 --- a/include/linux/closure.h +++ b/include/linux/closure.h @@ -159,6 +159,7 @@ struct closure { #ifdef CONFIG_DEBUG_CLOSURES #define CLOSURE_MAGIC_DEAD 0xc054dead #define CLOSURE_MAGIC_ALIVE 0xc054a11e +#define CLOSURE_MAGIC_STACK 0xc05451cc unsigned int magic; struct list_head all; @@ -323,12 +324,18 @@ static inline void closure_init_stack(struct closure *cl) { memset(cl, 0, sizeof(struct closure)); atomic_set(&cl->remaining, CLOSURE_REMAINING_INITIALIZER); +#ifdef CONFIG_DEBUG_CLOSURES + cl->magic = CLOSURE_MAGIC_STACK; +#endif } static inline void closure_init_stack_release(struct closure *cl) { memset(cl, 0, sizeof(struct closure)); atomic_set_release(&cl->remaining, CLOSURE_REMAINING_INITIALIZER); +#ifdef CONFIG_DEBUG_CLOSURES + cl->magic = CLOSURE_MAGIC_STACK; +#endif } /** diff --git a/include/linux/fscache-cache.h b/include/linux/fscache-cache.h index bdf7f3eddf0a..4c91a019972b 100644 --- a/include/linux/fscache-cache.h +++ b/include/linux/fscache-cache.h @@ -19,6 +19,7 @@ enum fscache_cache_trace; enum fscache_cookie_trace; enum fscache_access_trace; +enum fscache_volume_trace; enum fscache_cache_state { FSCACHE_CACHE_IS_NOT_PRESENT, /* No cache is present for this name */ @@ -97,6 +98,11 @@ extern void fscache_withdraw_cookie(struct fscache_cookie *cookie); extern void fscache_io_error(struct fscache_cache *cache); +extern struct fscache_volume * +fscache_try_get_volume(struct fscache_volume *volume, + enum fscache_volume_trace where); +extern void fscache_put_volume(struct fscache_volume *volume, + enum fscache_volume_trace where); extern void fscache_end_volume_access(struct fscache_volume *volume, struct fscache_cookie *cookie, enum fscache_access_trace why); diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 586a8f0104d7..1dc6248feb83 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -1979,8 +1979,9 @@ static inline int subsection_map_index(unsigned long pfn) static inline int pfn_section_valid(struct mem_section *ms, unsigned long pfn) { int idx = subsection_map_index(pfn); + struct mem_section_usage *usage = READ_ONCE(ms->usage); - return test_bit(idx, READ_ONCE(ms->usage)->subsection_map); + return usage ? test_bit(idx, usage->subsection_map) : 0; } #else static inline int pfn_section_valid(struct mem_section *ms, unsigned long pfn) diff --git a/include/linux/page_ref.h b/include/linux/page_ref.h index 1acf5bac7f50..8c236c651d1d 100644 --- a/include/linux/page_ref.h +++ b/include/linux/page_ref.h @@ -230,7 +230,13 @@ static inline int folio_ref_dec_return(struct folio *folio) static inline bool page_ref_add_unless(struct page *page, int nr, int u) { - bool ret = atomic_add_unless(&page->_refcount, nr, u); + bool ret = false; + + rcu_read_lock(); + /* avoid writing to the vmemmap area being remapped */ + if (!page_is_fake_head(page) && page_ref_count(page) != u) + ret = atomic_add_unless(&page->_refcount, nr, u); + rcu_read_unlock(); if (page_ref_tracepoint_active(page_ref_mod_unless)) __page_ref_mod_unless(page, nr, ret); @@ -258,54 +264,9 @@ static inline bool folio_try_get(struct folio *folio) return folio_ref_add_unless(folio, 1, 0); } -static inline bool folio_ref_try_add_rcu(struct folio *folio, int count) -{ -#ifdef CONFIG_TINY_RCU - /* - * The caller guarantees the folio will not be freed from interrupt - * context, so (on !SMP) we only need preemption to be disabled - * and TINY_RCU does that for us. - */ -# ifdef CONFIG_PREEMPT_COUNT - VM_BUG_ON(!in_atomic() && !irqs_disabled()); -# endif - VM_BUG_ON_FOLIO(folio_ref_count(folio) == 0, folio); - folio_ref_add(folio, count); -#else - if (unlikely(!folio_ref_add_unless(folio, count, 0))) { - /* Either the folio has been freed, or will be freed. */ - return false; - } -#endif - return true; -} - -/** - * folio_try_get_rcu - Attempt to increase the refcount on a folio. - * @folio: The folio. - * - * This is a version of folio_try_get() optimised for non-SMP kernels. - * If you are still holding the rcu_read_lock() after looking up the - * page and know that the page cannot have its refcount decreased to - * zero in interrupt context, you can use this instead of folio_try_get(). - * - * Example users include get_user_pages_fast() (as pages are not unmapped - * from interrupt context) and the page cache lookups (as pages are not - * truncated from interrupt context). We also know that pages are not - * frozen in interrupt context for the purposes of splitting or migration. - * - * You can also use this function if you're holding a lock that prevents - * pages being frozen & removed; eg the i_pages lock for the page cache - * or the mmap_lock or page table lock for page tables. In this case, - * it will always succeed, and you could have used a plain folio_get(), - * but it's sometimes more convenient to have a common function called - * from both locked and RCU-protected contexts. - * - * Return: True if the reference count was successfully incremented. - */ -static inline bool folio_try_get_rcu(struct folio *folio) +static inline bool folio_ref_try_add(struct folio *folio, int count) { - return folio_ref_try_add_rcu(folio, 1); + return folio_ref_add_unless(folio, count, 0); } static inline int page_ref_freeze(struct page *page, int count) diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 59f1df0cde5a..a0a026d2d244 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -354,11 +354,18 @@ static inline void mapping_set_gfp_mask(struct address_space *m, gfp_t mask) * a good order (that's 1MB if you're using 4kB pages) */ #ifdef CONFIG_TRANSPARENT_HUGEPAGE -#define MAX_PAGECACHE_ORDER HPAGE_PMD_ORDER +#define PREFERRED_MAX_PAGECACHE_ORDER HPAGE_PMD_ORDER #else -#define MAX_PAGECACHE_ORDER 8 +#define PREFERRED_MAX_PAGECACHE_ORDER 8 #endif +/* + * xas_split_alloc() does not support arbitrary orders. This implies no + * 512MB THP on ARM64 with 64KB base page size. + */ +#define MAX_XAS_ORDER (XA_CHUNK_SHIFT * 2 - 1) +#define MAX_PAGECACHE_ORDER min(MAX_XAS_ORDER, PREFERRED_MAX_PAGECACHE_ORDER) + /** * mapping_set_large_folios() - Indicate the file supports large folios. * @mapping: The file. diff --git a/include/linux/sched.h b/include/linux/sched.h index 5ff5e65a4627..d07d3645d2d5 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2198,13 +2198,13 @@ static inline int sched_core_idle_cpu(int cpu) { return idle_cpu(cpu); } extern void sched_set_stop_task(int cpu, struct task_struct *stop); #ifdef CONFIG_MEM_ALLOC_PROFILING -static inline struct alloc_tag *alloc_tag_save(struct alloc_tag *tag) +static __always_inline struct alloc_tag *alloc_tag_save(struct alloc_tag *tag) { swap(current->alloc_tag, tag); return tag; } -static inline void alloc_tag_restore(struct alloc_tag *tag, struct alloc_tag *old) +static __always_inline void alloc_tag_restore(struct alloc_tag *tag, struct alloc_tag *old) { #ifdef CONFIG_MEM_ALLOC_PROFILING_DEBUG WARN(current->alloc_tag != tag, "current->alloc_tag was changed:\n"); diff --git a/include/linux/swap.h b/include/linux/swap.h index bd450023b9a4..e685e93ba354 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -354,7 +354,8 @@ static inline swp_entry_t page_swap_entry(struct page *page) } /* linux/mm/workingset.c */ -bool workingset_test_recent(void *shadow, bool file, bool *workingset); +bool workingset_test_recent(void *shadow, bool file, bool *workingset, + bool flush); void workingset_age_nonresident(struct lruvec *lruvec, unsigned long nr_pages); void *workingset_eviction(struct folio *folio, struct mem_cgroup *target_memcg); void workingset_refault(struct folio *folio, void *shadow); diff --git a/include/linux/tpm.h b/include/linux/tpm.h index 21a67dc9efe8..e93ee8d936a9 100644 --- a/include/linux/tpm.h +++ b/include/linux/tpm.h @@ -490,9 +490,16 @@ static inline void tpm_buf_append_empty_auth(struct tpm_buf *buf, u32 handle) { } #endif + +static inline struct tpm2_auth *tpm2_chip_auth(struct tpm_chip *chip) +{ #ifdef CONFIG_TCG_TPM2_HMAC + return chip->auth; +#else + return NULL; +#endif +} -int tpm2_start_auth_session(struct tpm_chip *chip); void tpm_buf_append_name(struct tpm_chip *chip, struct tpm_buf *buf, u32 handle, u8 *name); void tpm_buf_append_hmac_session(struct tpm_chip *chip, struct tpm_buf *buf, @@ -504,9 +511,27 @@ static inline void tpm_buf_append_hmac_session_opt(struct tpm_chip *chip, u8 *passphrase, int passphraselen) { - tpm_buf_append_hmac_session(chip, buf, attributes, passphrase, - passphraselen); + struct tpm_header *head; + int offset; + + if (tpm2_chip_auth(chip)) { + tpm_buf_append_hmac_session(chip, buf, attributes, passphrase, passphraselen); + } else { + offset = buf->handles * 4 + TPM_HEADER_SIZE; + head = (struct tpm_header *)buf->data; + + /* + * If the only sessions are optional, the command tag must change to + * TPM2_ST_NO_SESSIONS. + */ + if (tpm_buf_length(buf) == offset) + head->tag = cpu_to_be16(TPM2_ST_NO_SESSIONS); + } } + +#ifdef CONFIG_TCG_TPM2_HMAC + +int tpm2_start_auth_session(struct tpm_chip *chip); void tpm_buf_fill_hmac_session(struct tpm_chip *chip, struct tpm_buf *buf); int tpm_buf_check_hmac_response(struct tpm_chip *chip, struct tpm_buf *buf, int rc); @@ -521,56 +546,6 @@ static inline int tpm2_start_auth_session(struct tpm_chip *chip) static inline void tpm2_end_auth_session(struct tpm_chip *chip) { } -static inline void tpm_buf_append_name(struct tpm_chip *chip, - struct tpm_buf *buf, - u32 handle, u8 *name) -{ - tpm_buf_append_u32(buf, handle); - /* count the number of handles in the upper bits of flags */ - buf->handles++; -} -static inline void tpm_buf_append_hmac_session(struct tpm_chip *chip, - struct tpm_buf *buf, - u8 attributes, u8 *passphrase, - int passphraselen) -{ - /* offset tells us where the sessions area begins */ - int offset = buf->handles * 4 + TPM_HEADER_SIZE; - u32 len = 9 + passphraselen; - - if (tpm_buf_length(buf) != offset) { - /* not the first session so update the existing length */ - len += get_unaligned_be32(&buf->data[offset]); - put_unaligned_be32(len, &buf->data[offset]); - } else { - tpm_buf_append_u32(buf, len); - } - /* auth handle */ - tpm_buf_append_u32(buf, TPM2_RS_PW); - /* nonce */ - tpm_buf_append_u16(buf, 0); - /* attributes */ - tpm_buf_append_u8(buf, 0); - /* passphrase */ - tpm_buf_append_u16(buf, passphraselen); - tpm_buf_append(buf, passphrase, passphraselen); -} -static inline void tpm_buf_append_hmac_session_opt(struct tpm_chip *chip, - struct tpm_buf *buf, - u8 attributes, - u8 *passphrase, - int passphraselen) -{ - int offset = buf->handles * 4 + TPM_HEADER_SIZE; - struct tpm_header *head = (struct tpm_header *) buf->data; - - /* - * if the only sessions are optional, the command tag - * must change to TPM2_ST_NO_SESSIONS - */ - if (tpm_buf_length(buf) == offset) - head->tag = cpu_to_be16(TPM2_ST_NO_SESSIONS); -} static inline void tpm_buf_fill_hmac_session(struct tpm_chip *chip, struct tpm_buf *buf) { diff --git a/include/net/tcx.h b/include/net/tcx.h index 72a3e75e539f..5ce0ce9e0c02 100644 --- a/include/net/tcx.h +++ b/include/net/tcx.h @@ -13,7 +13,7 @@ struct mini_Qdisc; struct tcx_entry { struct mini_Qdisc __rcu *miniq; struct bpf_mprog_bundle bundle; - bool miniq_active; + u32 miniq_active; struct rcu_head rcu; }; @@ -125,11 +125,16 @@ static inline void tcx_skeys_dec(bool ingress) tcx_dec(); } -static inline void tcx_miniq_set_active(struct bpf_mprog_entry *entry, - const bool active) +static inline void tcx_miniq_inc(struct bpf_mprog_entry *entry) { ASSERT_RTNL(); - tcx_entry(entry)->miniq_active = active; + tcx_entry(entry)->miniq_active++; +} + +static inline void tcx_miniq_dec(struct bpf_mprog_entry *entry) +{ + ASSERT_RTNL(); + tcx_entry(entry)->miniq_active--; } static inline bool tcx_entry_is_active(struct bpf_mprog_entry *entry) diff --git a/include/trace/events/fscache.h b/include/trace/events/fscache.h index a6190aa1b406..f1a73aa83fbb 100644 --- a/include/trace/events/fscache.h +++ b/include/trace/events/fscache.h @@ -35,12 +35,14 @@ enum fscache_volume_trace { fscache_volume_get_cookie, fscache_volume_get_create_work, fscache_volume_get_hash_collision, + fscache_volume_get_withdraw, fscache_volume_free, fscache_volume_new_acquire, fscache_volume_put_cookie, fscache_volume_put_create_work, fscache_volume_put_hash_collision, fscache_volume_put_relinquish, + fscache_volume_put_withdraw, fscache_volume_see_create_work, fscache_volume_see_hash_wake, fscache_volume_wait_create_work, @@ -120,12 +122,14 @@ enum fscache_access_trace { EM(fscache_volume_get_cookie, "GET cook ") \ EM(fscache_volume_get_create_work, "GET creat") \ EM(fscache_volume_get_hash_collision, "GET hcoll") \ + EM(fscache_volume_get_withdraw, "GET withd") \ EM(fscache_volume_free, "FREE ") \ EM(fscache_volume_new_acquire, "NEW acq ") \ EM(fscache_volume_put_cookie, "PUT cook ") \ EM(fscache_volume_put_create_work, "PUT creat") \ EM(fscache_volume_put_hash_collision, "PUT hcoll") \ EM(fscache_volume_put_relinquish, "PUT relnq") \ + EM(fscache_volume_put_withdraw, "PUT withd") \ EM(fscache_volume_see_create_work, "SEE creat") \ EM(fscache_volume_see_hash_wake, "SEE hwake") \ E_(fscache_volume_wait_create_work, "WAIT crea") diff --git a/include/uapi/drm/panthor_drm.h b/include/uapi/drm/panthor_drm.h index aaed8e12ad0b..926b1deb1116 100644 --- a/include/uapi/drm/panthor_drm.h +++ b/include/uapi/drm/panthor_drm.h @@ -802,6 +802,9 @@ struct drm_panthor_queue_submit { * Must be 64-bit/8-byte aligned (the size of a CS instruction) * * Can be zero if stream_addr is zero too. + * + * When the stream size is zero, the queue submit serves as a + * synchronization point. */ __u32 stream_size; @@ -822,6 +825,8 @@ struct drm_panthor_queue_submit { * ensure the GPU doesn't get garbage when reading the indirect command * stream buffers. If you want the cache flush to happen * unconditionally, pass a zero here. + * + * Ignored when stream_size is zero. */ __u32 latest_flush; diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index 5241ba671c5a..b5f0adae8293 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -1084,7 +1084,10 @@ struct bpf_async_cb { struct bpf_prog *prog; void __rcu *callback_fn; void *value; - struct rcu_head rcu; + union { + struct rcu_head rcu; + struct work_struct delete_work; + }; u64 flags; }; @@ -1107,6 +1110,7 @@ struct bpf_async_cb { struct bpf_hrtimer { struct bpf_async_cb cb; struct hrtimer timer; + atomic_t cancelling; }; struct bpf_work { @@ -1219,6 +1223,21 @@ static void bpf_wq_delete_work(struct work_struct *work) kfree_rcu(w, cb.rcu); } +static void bpf_timer_delete_work(struct work_struct *work) +{ + struct bpf_hrtimer *t = container_of(work, struct bpf_hrtimer, cb.delete_work); + + /* Cancel the timer and wait for callback to complete if it was running. + * If hrtimer_cancel() can be safely called it's safe to call + * kfree_rcu(t) right after for both preallocated and non-preallocated + * maps. The async->cb = NULL was already done and no code path can see + * address 't' anymore. Timer if armed for existing bpf_hrtimer before + * bpf_timer_cancel_and_free will have been cancelled. + */ + hrtimer_cancel(&t->timer); + kfree_rcu(t, cb.rcu); +} + static int __bpf_async_init(struct bpf_async_kern *async, struct bpf_map *map, u64 flags, enum bpf_async_type type) { @@ -1262,6 +1281,8 @@ static int __bpf_async_init(struct bpf_async_kern *async, struct bpf_map *map, u clockid = flags & (MAX_CLOCKS - 1); t = (struct bpf_hrtimer *)cb; + atomic_set(&t->cancelling, 0); + INIT_WORK(&t->cb.delete_work, bpf_timer_delete_work); hrtimer_init(&t->timer, clockid, HRTIMER_MODE_REL_SOFT); t->timer.function = bpf_timer_cb; cb->value = (void *)async - map->record->timer_off; @@ -1440,7 +1461,8 @@ static void drop_prog_refcnt(struct bpf_async_cb *async) BPF_CALL_1(bpf_timer_cancel, struct bpf_async_kern *, timer) { - struct bpf_hrtimer *t; + struct bpf_hrtimer *t, *cur_t; + bool inc = false; int ret = 0; if (in_nmi()) @@ -1452,14 +1474,41 @@ BPF_CALL_1(bpf_timer_cancel, struct bpf_async_kern *, timer) ret = -EINVAL; goto out; } - if (this_cpu_read(hrtimer_running) == t) { + + cur_t = this_cpu_read(hrtimer_running); + if (cur_t == t) { /* If bpf callback_fn is trying to bpf_timer_cancel() * its own timer the hrtimer_cancel() will deadlock - * since it waits for callback_fn to finish + * since it waits for callback_fn to finish. */ ret = -EDEADLK; goto out; } + + /* Only account in-flight cancellations when invoked from a timer + * callback, since we want to avoid waiting only if other _callbacks_ + * are waiting on us, to avoid introducing lockups. Non-callback paths + * are ok, since nobody would synchronously wait for their completion. + */ + if (!cur_t) + goto drop; + atomic_inc(&t->cancelling); + /* Need full barrier after relaxed atomic_inc */ + smp_mb__after_atomic(); + inc = true; + if (atomic_read(&cur_t->cancelling)) { + /* We're cancelling timer t, while some other timer callback is + * attempting to cancel us. In such a case, it might be possible + * that timer t belongs to the other callback, or some other + * callback waiting upon it (creating transitive dependencies + * upon us), and we will enter a deadlock if we continue + * cancelling and waiting for it synchronously, since it might + * do the same. Bail! + */ + ret = -EDEADLK; + goto out; + } +drop: drop_prog_refcnt(&t->cb); out: __bpf_spin_unlock_irqrestore(&timer->lock); @@ -1467,6 +1516,8 @@ out: * if it was running. */ ret = ret ?: hrtimer_cancel(&t->timer); + if (inc) + atomic_dec(&t->cancelling); rcu_read_unlock(); return ret; } @@ -1512,25 +1563,39 @@ void bpf_timer_cancel_and_free(void *val) if (!t) return; - /* Cancel the timer and wait for callback to complete if it was running. - * If hrtimer_cancel() can be safely called it's safe to call kfree(t) - * right after for both preallocated and non-preallocated maps. - * The async->cb = NULL was already done and no code path can - * see address 't' anymore. - * - * Check that bpf_map_delete/update_elem() wasn't called from timer - * callback_fn. In such case don't call hrtimer_cancel() (since it will - * deadlock) and don't call hrtimer_try_to_cancel() (since it will just - * return -1). Though callback_fn is still running on this cpu it's + /* We check that bpf_map_delete/update_elem() was called from timer + * callback_fn. In such case we don't call hrtimer_cancel() (since it + * will deadlock) and don't call hrtimer_try_to_cancel() (since it will + * just return -1). Though callback_fn is still running on this cpu it's * safe to do kfree(t) because bpf_timer_cb() read everything it needed * from 't'. The bpf subprog callback_fn won't be able to access 't', * since async->cb = NULL was already done. The timer will be * effectively cancelled because bpf_timer_cb() will return * HRTIMER_NORESTART. + * + * However, it is possible the timer callback_fn calling us armed the + * timer _before_ calling us, such that failing to cancel it here will + * cause it to possibly use struct hrtimer after freeing bpf_hrtimer. + * Therefore, we _need_ to cancel any outstanding timers before we do + * kfree_rcu, even though no more timers can be armed. + * + * Moreover, we need to schedule work even if timer does not belong to + * the calling callback_fn, as on two different CPUs, we can end up in a + * situation where both sides run in parallel, try to cancel one + * another, and we end up waiting on both sides in hrtimer_cancel + * without making forward progress, since timer1 depends on time2 + * callback to finish, and vice versa. + * + * CPU 1 (timer1_cb) CPU 2 (timer2_cb) + * bpf_timer_cancel_and_free(timer2) bpf_timer_cancel_and_free(timer1) + * + * To avoid these issues, punt to workqueue context when we are in a + * timer callback. */ - if (this_cpu_read(hrtimer_running) != t) - hrtimer_cancel(&t->timer); - kfree_rcu(t, cb.rcu); + if (this_cpu_read(hrtimer_running)) + queue_work(system_unbound_wq, &t->cb.delete_work); + else + bpf_timer_delete_work(&t->cb.delete_work); } /* This function is called by map_delete/update_elem for individual element and diff --git a/lib/build_OID_registry b/lib/build_OID_registry index 56d8bafeb848..8267e8d71338 100755 --- a/lib/build_OID_registry +++ b/lib/build_OID_registry @@ -38,7 +38,9 @@ close IN_FILE || die; # open C_FILE, ">$ARGV[1]" or die; print C_FILE "/*\n"; -print C_FILE " * Automatically generated by ", $0 =~ s#^\Q$abs_srctree/\E##r, ". Do not edit\n"; +my $scriptname = $0; +$scriptname =~ s#^\Q$abs_srctree/\E##; +print C_FILE " * Automatically generated by ", $scriptname, ". Do not edit\n"; print C_FILE " */\n"; # diff --git a/lib/closure.c b/lib/closure.c index c971216d9d77..116afae2eed9 100644 --- a/lib/closure.c +++ b/lib/closure.c @@ -244,6 +244,9 @@ void closure_debug_destroy(struct closure *cl) { unsigned long flags; + if (cl->magic == CLOSURE_MAGIC_STACK) + return; + BUG_ON(cl->magic != CLOSURE_MAGIC_ALIVE); cl->magic = CLOSURE_MAGIC_DEAD; diff --git a/mm/damon/core.c b/mm/damon/core.c index 6392f1cc97a3..e66823d6b10b 100644 --- a/mm/damon/core.c +++ b/mm/damon/core.c @@ -1358,14 +1358,31 @@ static void damon_merge_regions_of(struct damon_target *t, unsigned int thres, * access frequencies are similar. This is for minimizing the monitoring * overhead under the dynamically changeable access pattern. If a merge was * unnecessarily made, later 'kdamond_split_regions()' will revert it. + * + * The total number of regions could be higher than the user-defined limit, + * max_nr_regions for some cases. For example, the user can update + * max_nr_regions to a number that lower than the current number of regions + * while DAMON is running. For such a case, repeat merging until the limit is + * met while increasing @threshold up to possible maximum level. */ static void kdamond_merge_regions(struct damon_ctx *c, unsigned int threshold, unsigned long sz_limit) { struct damon_target *t; - - damon_for_each_target(t, c) - damon_merge_regions_of(t, threshold, sz_limit); + unsigned int nr_regions; + unsigned int max_thres; + + max_thres = c->attrs.aggr_interval / + (c->attrs.sample_interval ? c->attrs.sample_interval : 1); + do { + nr_regions = 0; + damon_for_each_target(t, c) { + damon_merge_regions_of(t, threshold, sz_limit); + nr_regions += damon_nr_regions(t); + } + threshold = max(1, threshold * 2); + } while (nr_regions > c->attrs.max_nr_regions && + threshold / 2 < max_thres); } /* diff --git a/mm/filemap.c b/mm/filemap.c index 876cc64aadd7..657bcd887fdb 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -1847,7 +1847,7 @@ repeat: if (!folio || xa_is_value(folio)) goto out; - if (!folio_try_get_rcu(folio)) + if (!folio_try_get(folio)) goto repeat; if (unlikely(folio != xas_reload(&xas))) { @@ -2001,7 +2001,7 @@ retry: if (!folio || xa_is_value(folio)) return folio; - if (!folio_try_get_rcu(folio)) + if (!folio_try_get(folio)) goto reset; if (unlikely(folio != xas_reload(xas))) { @@ -2181,7 +2181,7 @@ unsigned filemap_get_folios_contig(struct address_space *mapping, if (xa_is_value(folio)) goto update_start; - if (!folio_try_get_rcu(folio)) + if (!folio_try_get(folio)) goto retry; if (unlikely(folio != xas_reload(&xas))) @@ -2313,7 +2313,7 @@ static void filemap_get_read_batch(struct address_space *mapping, break; if (xa_is_sibling(folio)) break; - if (!folio_try_get_rcu(folio)) + if (!folio_try_get(folio)) goto retry; if (unlikely(folio != xas_reload(&xas))) @@ -3124,7 +3124,7 @@ static struct file *do_sync_mmap_readahead(struct vm_fault *vmf) #ifdef CONFIG_TRANSPARENT_HUGEPAGE /* Use the readahead code, even if readahead is disabled */ - if (vm_flags & VM_HUGEPAGE) { + if ((vm_flags & VM_HUGEPAGE) && HPAGE_PMD_ORDER <= MAX_PAGECACHE_ORDER) { fpin = maybe_unlock_mmap_for_io(vmf, fpin); ractl._index &= ~((unsigned long)HPAGE_PMD_NR - 1); ra->size = HPAGE_PMD_NR; @@ -3231,7 +3231,8 @@ static vm_fault_t filemap_fault_recheck_pte_none(struct vm_fault *vmf) if (!(vmf->flags & FAULT_FLAG_ORIG_PTE_VALID)) return 0; - ptep = pte_offset_map(vmf->pmd, vmf->address); + ptep = pte_offset_map_nolock(vma->vm_mm, vmf->pmd, vmf->address, + &vmf->ptl); if (unlikely(!ptep)) return VM_FAULT_NOPAGE; @@ -3472,7 +3473,7 @@ static struct folio *next_uptodate_folio(struct xa_state *xas, continue; if (folio_test_locked(folio)) continue; - if (!folio_try_get_rcu(folio)) + if (!folio_try_get(folio)) continue; /* Has the page moved or been split? */ if (unlikely(folio != xas_reload(xas))) @@ -4248,6 +4249,9 @@ static void filemap_cachestat(struct address_space *mapping, XA_STATE(xas, &mapping->i_pages, first_index); struct folio *folio; + /* Flush stats (and potentially sleep) outside the RCU read section. */ + mem_cgroup_flush_stats_ratelimited(NULL); + rcu_read_lock(); xas_for_each(&xas, folio, last_index) { int order; @@ -4311,7 +4315,7 @@ static void filemap_cachestat(struct address_space *mapping, goto resched; } #endif - if (workingset_test_recent(shadow, true, &workingset)) + if (workingset_test_recent(shadow, true, &workingset, false)) cs->nr_recently_evicted += nr_pages; goto resched; @@ -76,7 +76,7 @@ retry: folio = page_folio(page); if (WARN_ON_ONCE(folio_ref_count(folio) < 0)) return NULL; - if (unlikely(!folio_ref_try_add_rcu(folio, refs))) + if (unlikely(!folio_ref_try_add(folio, refs))) return NULL; /* @@ -97,95 +97,6 @@ retry: return folio; } -/** - * try_grab_folio() - Attempt to get or pin a folio. - * @page: pointer to page to be grabbed - * @refs: the value to (effectively) add to the folio's refcount - * @flags: gup flags: these are the FOLL_* flag values. - * - * "grab" names in this file mean, "look at flags to decide whether to use - * FOLL_PIN or FOLL_GET behavior, when incrementing the folio's refcount. - * - * Either FOLL_PIN or FOLL_GET (or neither) must be set, but not both at the - * same time. (That's true throughout the get_user_pages*() and - * pin_user_pages*() APIs.) Cases: - * - * FOLL_GET: folio's refcount will be incremented by @refs. - * - * FOLL_PIN on large folios: folio's refcount will be incremented by - * @refs, and its pincount will be incremented by @refs. - * - * FOLL_PIN on single-page folios: folio's refcount will be incremented by - * @refs * GUP_PIN_COUNTING_BIAS. - * - * Return: The folio containing @page (with refcount appropriately - * incremented) for success, or NULL upon failure. If neither FOLL_GET - * nor FOLL_PIN was set, that's considered failure, and furthermore, - * a likely bug in the caller, so a warning is also emitted. - */ -struct folio *try_grab_folio(struct page *page, int refs, unsigned int flags) -{ - struct folio *folio; - - if (WARN_ON_ONCE((flags & (FOLL_GET | FOLL_PIN)) == 0)) - return NULL; - - if (unlikely(!(flags & FOLL_PCI_P2PDMA) && is_pci_p2pdma_page(page))) - return NULL; - - if (flags & FOLL_GET) - return try_get_folio(page, refs); - - /* FOLL_PIN is set */ - - /* - * Don't take a pin on the zero page - it's not going anywhere - * and it is used in a *lot* of places. - */ - if (is_zero_page(page)) - return page_folio(page); - - folio = try_get_folio(page, refs); - if (!folio) - return NULL; - - /* - * Can't do FOLL_LONGTERM + FOLL_PIN gup fast path if not in a - * right zone, so fail and let the caller fall back to the slow - * path. - */ - if (unlikely((flags & FOLL_LONGTERM) && - !folio_is_longterm_pinnable(folio))) { - if (!put_devmap_managed_folio_refs(folio, refs)) - folio_put_refs(folio, refs); - return NULL; - } - - /* - * When pinning a large folio, use an exact count to track it. - * - * However, be sure to *also* increment the normal folio - * refcount field at least once, so that the folio really - * is pinned. That's why the refcount from the earlier - * try_get_folio() is left intact. - */ - if (folio_test_large(folio)) - atomic_add(refs, &folio->_pincount); - else - folio_ref_add(folio, - refs * (GUP_PIN_COUNTING_BIAS - 1)); - /* - * Adjust the pincount before re-checking the PTE for changes. - * This is essentially a smp_mb() and is paired with a memory - * barrier in folio_try_share_anon_rmap_*(). - */ - smp_mb__after_atomic(); - - node_stat_mod_folio(folio, NR_FOLL_PIN_ACQUIRED, refs); - - return folio; -} - static void gup_put_folio(struct folio *folio, int refs, unsigned int flags) { if (flags & FOLL_PIN) { @@ -203,58 +114,59 @@ static void gup_put_folio(struct folio *folio, int refs, unsigned int flags) } /** - * try_grab_page() - elevate a page's refcount by a flag-dependent amount - * @page: pointer to page to be grabbed - * @flags: gup flags: these are the FOLL_* flag values. + * try_grab_folio() - add a folio's refcount by a flag-dependent amount + * @folio: pointer to folio to be grabbed + * @refs: the value to (effectively) add to the folio's refcount + * @flags: gup flags: these are the FOLL_* flag values * * This might not do anything at all, depending on the flags argument. * * "grab" names in this file mean, "look at flags to decide whether to use - * FOLL_PIN or FOLL_GET behavior, when incrementing the page's refcount. + * FOLL_PIN or FOLL_GET behavior, when incrementing the folio's refcount. * * Either FOLL_PIN or FOLL_GET (or neither) may be set, but not both at the same - * time. Cases: please see the try_grab_folio() documentation, with - * "refs=1". + * time. * * Return: 0 for success, or if no action was required (if neither FOLL_PIN * nor FOLL_GET was set, nothing is done). A negative error code for failure: * - * -ENOMEM FOLL_GET or FOLL_PIN was set, but the page could not + * -ENOMEM FOLL_GET or FOLL_PIN was set, but the folio could not * be grabbed. + * + * It is called when we have a stable reference for the folio, typically in + * GUP slow path. */ -int __must_check try_grab_page(struct page *page, unsigned int flags) +int __must_check try_grab_folio(struct folio *folio, int refs, + unsigned int flags) { - struct folio *folio = page_folio(page); - if (WARN_ON_ONCE(folio_ref_count(folio) <= 0)) return -ENOMEM; - if (unlikely(!(flags & FOLL_PCI_P2PDMA) && is_pci_p2pdma_page(page))) + if (unlikely(!(flags & FOLL_PCI_P2PDMA) && is_pci_p2pdma_page(&folio->page))) return -EREMOTEIO; if (flags & FOLL_GET) - folio_ref_inc(folio); + folio_ref_add(folio, refs); else if (flags & FOLL_PIN) { /* * Don't take a pin on the zero page - it's not going anywhere * and it is used in a *lot* of places. */ - if (is_zero_page(page)) + if (is_zero_folio(folio)) return 0; /* - * Similar to try_grab_folio(): be sure to *also* - * increment the normal page refcount field at least once, + * Increment the normal page refcount field at least once, * so that the page really is pinned. */ if (folio_test_large(folio)) { - folio_ref_add(folio, 1); - atomic_add(1, &folio->_pincount); + folio_ref_add(folio, refs); + atomic_add(refs, &folio->_pincount); } else { - folio_ref_add(folio, GUP_PIN_COUNTING_BIAS); + folio_ref_add(folio, refs * GUP_PIN_COUNTING_BIAS); } - node_stat_mod_folio(folio, NR_FOLL_PIN_ACQUIRED, 1); + node_stat_mod_folio(folio, NR_FOLL_PIN_ACQUIRED, refs); } return 0; @@ -515,6 +427,102 @@ static int record_subpages(struct page *page, unsigned long sz, return nr; } + +/** + * try_grab_folio_fast() - Attempt to get or pin a folio in fast path. + * @page: pointer to page to be grabbed + * @refs: the value to (effectively) add to the folio's refcount + * @flags: gup flags: these are the FOLL_* flag values. + * + * "grab" names in this file mean, "look at flags to decide whether to use + * FOLL_PIN or FOLL_GET behavior, when incrementing the folio's refcount. + * + * Either FOLL_PIN or FOLL_GET (or neither) must be set, but not both at the + * same time. (That's true throughout the get_user_pages*() and + * pin_user_pages*() APIs.) Cases: + * + * FOLL_GET: folio's refcount will be incremented by @refs. + * + * FOLL_PIN on large folios: folio's refcount will be incremented by + * @refs, and its pincount will be incremented by @refs. + * + * FOLL_PIN on single-page folios: folio's refcount will be incremented by + * @refs * GUP_PIN_COUNTING_BIAS. + * + * Return: The folio containing @page (with refcount appropriately + * incremented) for success, or NULL upon failure. If neither FOLL_GET + * nor FOLL_PIN was set, that's considered failure, and furthermore, + * a likely bug in the caller, so a warning is also emitted. + * + * It uses add ref unless zero to elevate the folio refcount and must be called + * in fast path only. + */ +static struct folio *try_grab_folio_fast(struct page *page, int refs, + unsigned int flags) +{ + struct folio *folio; + + /* Raise warn if it is not called in fast GUP */ + VM_WARN_ON_ONCE(!irqs_disabled()); + + if (WARN_ON_ONCE((flags & (FOLL_GET | FOLL_PIN)) == 0)) + return NULL; + + if (unlikely(!(flags & FOLL_PCI_P2PDMA) && is_pci_p2pdma_page(page))) + return NULL; + + if (flags & FOLL_GET) + return try_get_folio(page, refs); + + /* FOLL_PIN is set */ + + /* + * Don't take a pin on the zero page - it's not going anywhere + * and it is used in a *lot* of places. + */ + if (is_zero_page(page)) + return page_folio(page); + + folio = try_get_folio(page, refs); + if (!folio) + return NULL; + + /* + * Can't do FOLL_LONGTERM + FOLL_PIN gup fast path if not in a + * right zone, so fail and let the caller fall back to the slow + * path. + */ + if (unlikely((flags & FOLL_LONGTERM) && + !folio_is_longterm_pinnable(folio))) { + if (!put_devmap_managed_folio_refs(folio, refs)) + folio_put_refs(folio, refs); + return NULL; + } + + /* + * When pinning a large folio, use an exact count to track it. + * + * However, be sure to *also* increment the normal folio + * refcount field at least once, so that the folio really + * is pinned. That's why the refcount from the earlier + * try_get_folio() is left intact. + */ + if (folio_test_large(folio)) + atomic_add(refs, &folio->_pincount); + else + folio_ref_add(folio, + refs * (GUP_PIN_COUNTING_BIAS - 1)); + /* + * Adjust the pincount before re-checking the PTE for changes. + * This is essentially a smp_mb() and is paired with a memory + * barrier in folio_try_share_anon_rmap_*(). + */ + smp_mb__after_atomic(); + + node_stat_mod_folio(folio, NR_FOLL_PIN_ACQUIRED, refs); + + return folio; +} #endif /* CONFIG_ARCH_HAS_HUGEPD || CONFIG_HAVE_GUP_FAST */ #ifdef CONFIG_ARCH_HAS_HUGEPD @@ -535,7 +543,7 @@ static unsigned long hugepte_addr_end(unsigned long addr, unsigned long end, */ static int gup_hugepte(struct vm_area_struct *vma, pte_t *ptep, unsigned long sz, unsigned long addr, unsigned long end, unsigned int flags, - struct page **pages, int *nr) + struct page **pages, int *nr, bool fast) { unsigned long pte_end; struct page *page; @@ -558,9 +566,15 @@ static int gup_hugepte(struct vm_area_struct *vma, pte_t *ptep, unsigned long sz page = pte_page(pte); refs = record_subpages(page, sz, addr, end, pages + *nr); - folio = try_grab_folio(page, refs, flags); - if (!folio) - return 0; + if (fast) { + folio = try_grab_folio_fast(page, refs, flags); + if (!folio) + return 0; + } else { + folio = page_folio(page); + if (try_grab_folio(folio, refs, flags)) + return 0; + } if (unlikely(pte_val(pte) != pte_val(ptep_get(ptep)))) { gup_put_folio(folio, refs, flags); @@ -588,7 +602,7 @@ static int gup_hugepte(struct vm_area_struct *vma, pte_t *ptep, unsigned long sz static int gup_hugepd(struct vm_area_struct *vma, hugepd_t hugepd, unsigned long addr, unsigned int pdshift, unsigned long end, unsigned int flags, - struct page **pages, int *nr) + struct page **pages, int *nr, bool fast) { pte_t *ptep; unsigned long sz = 1UL << hugepd_shift(hugepd); @@ -598,7 +612,8 @@ static int gup_hugepd(struct vm_area_struct *vma, hugepd_t hugepd, ptep = hugepte_offset(hugepd, addr, pdshift); do { next = hugepte_addr_end(addr, end, sz); - ret = gup_hugepte(vma, ptep, sz, addr, end, flags, pages, nr); + ret = gup_hugepte(vma, ptep, sz, addr, end, flags, pages, nr, + fast); if (ret != 1) return ret; } while (ptep++, addr = next, addr != end); @@ -625,7 +640,7 @@ static struct page *follow_hugepd(struct vm_area_struct *vma, hugepd_t hugepd, ptep = hugepte_offset(hugepd, addr, pdshift); ptl = huge_pte_lock(h, vma->vm_mm, ptep); ret = gup_hugepd(vma, hugepd, addr, pdshift, addr + PAGE_SIZE, - flags, &page, &nr); + flags, &page, &nr, false); spin_unlock(ptl); if (ret == 1) { @@ -642,7 +657,7 @@ static struct page *follow_hugepd(struct vm_area_struct *vma, hugepd_t hugepd, static inline int gup_hugepd(struct vm_area_struct *vma, hugepd_t hugepd, unsigned long addr, unsigned int pdshift, unsigned long end, unsigned int flags, - struct page **pages, int *nr) + struct page **pages, int *nr, bool fast) { return 0; } @@ -729,7 +744,7 @@ static struct page *follow_huge_pud(struct vm_area_struct *vma, gup_must_unshare(vma, flags, page)) return ERR_PTR(-EMLINK); - ret = try_grab_page(page, flags); + ret = try_grab_folio(page_folio(page), 1, flags); if (ret) page = ERR_PTR(ret); else @@ -806,7 +821,7 @@ static struct page *follow_huge_pmd(struct vm_area_struct *vma, VM_BUG_ON_PAGE((flags & FOLL_PIN) && PageAnon(page) && !PageAnonExclusive(page), page); - ret = try_grab_page(page, flags); + ret = try_grab_folio(page_folio(page), 1, flags); if (ret) return ERR_PTR(ret); @@ -968,8 +983,8 @@ static struct page *follow_page_pte(struct vm_area_struct *vma, VM_BUG_ON_PAGE((flags & FOLL_PIN) && PageAnon(page) && !PageAnonExclusive(page), page); - /* try_grab_page() does nothing unless FOLL_GET or FOLL_PIN is set. */ - ret = try_grab_page(page, flags); + /* try_grab_folio() does nothing unless FOLL_GET or FOLL_PIN is set. */ + ret = try_grab_folio(page_folio(page), 1, flags); if (unlikely(ret)) { page = ERR_PTR(ret); goto out; @@ -1233,7 +1248,7 @@ static int get_gate_page(struct mm_struct *mm, unsigned long address, goto unmap; *page = pte_page(entry); } - ret = try_grab_page(*page, gup_flags); + ret = try_grab_folio(page_folio(*page), 1, gup_flags); if (unlikely(ret)) goto unmap; out: @@ -1636,20 +1651,19 @@ next_page: * pages. */ if (page_increm > 1) { - struct folio *folio; + struct folio *folio = page_folio(page); /* * Since we already hold refcount on the * large folio, this should never fail. */ - folio = try_grab_folio(page, page_increm - 1, - foll_flags); - if (WARN_ON_ONCE(!folio)) { + if (try_grab_folio(folio, page_increm - 1, + foll_flags)) { /* * Release the 1st page ref if the * folio is problematic, fail hard. */ - gup_put_folio(page_folio(page), 1, + gup_put_folio(folio, 1, foll_flags); ret = -EFAULT; goto out; @@ -2797,7 +2811,6 @@ EXPORT_SYMBOL(get_user_pages_unlocked); * This code is based heavily on the PowerPC implementation by Nick Piggin. */ #ifdef CONFIG_HAVE_GUP_FAST - /* * Used in the GUP-fast path to determine whether GUP is permitted to work on * a specific folio. @@ -2962,7 +2975,7 @@ static int gup_fast_pte_range(pmd_t pmd, pmd_t *pmdp, unsigned long addr, VM_BUG_ON(!pfn_valid(pte_pfn(pte))); page = pte_page(pte); - folio = try_grab_folio(page, 1, flags); + folio = try_grab_folio_fast(page, 1, flags); if (!folio) goto pte_unmap; @@ -3049,7 +3062,7 @@ static int gup_fast_devmap_leaf(unsigned long pfn, unsigned long addr, break; } - folio = try_grab_folio(page, 1, flags); + folio = try_grab_folio_fast(page, 1, flags); if (!folio) { gup_fast_undo_dev_pagemap(nr, nr_start, flags, pages); break; @@ -3138,7 +3151,7 @@ static int gup_fast_pmd_leaf(pmd_t orig, pmd_t *pmdp, unsigned long addr, page = pmd_page(orig); refs = record_subpages(page, PMD_SIZE, addr, end, pages + *nr); - folio = try_grab_folio(page, refs, flags); + folio = try_grab_folio_fast(page, refs, flags); if (!folio) return 0; @@ -3182,7 +3195,7 @@ static int gup_fast_pud_leaf(pud_t orig, pud_t *pudp, unsigned long addr, page = pud_page(orig); refs = record_subpages(page, PUD_SIZE, addr, end, pages + *nr); - folio = try_grab_folio(page, refs, flags); + folio = try_grab_folio_fast(page, refs, flags); if (!folio) return 0; @@ -3222,7 +3235,7 @@ static int gup_fast_pgd_leaf(pgd_t orig, pgd_t *pgdp, unsigned long addr, page = pgd_page(orig); refs = record_subpages(page, PGDIR_SIZE, addr, end, pages + *nr); - folio = try_grab_folio(page, refs, flags); + folio = try_grab_folio_fast(page, refs, flags); if (!folio) return 0; @@ -3276,7 +3289,8 @@ static int gup_fast_pmd_range(pud_t *pudp, pud_t pud, unsigned long addr, * pmd format and THP pmd format */ if (gup_hugepd(NULL, __hugepd(pmd_val(pmd)), addr, - PMD_SHIFT, next, flags, pages, nr) != 1) + PMD_SHIFT, next, flags, pages, nr, + true) != 1) return 0; } else if (!gup_fast_pte_range(pmd, pmdp, addr, next, flags, pages, nr)) @@ -3306,7 +3320,8 @@ static int gup_fast_pud_range(p4d_t *p4dp, p4d_t p4d, unsigned long addr, return 0; } else if (unlikely(is_hugepd(__hugepd(pud_val(pud))))) { if (gup_hugepd(NULL, __hugepd(pud_val(pud)), addr, - PUD_SHIFT, next, flags, pages, nr) != 1) + PUD_SHIFT, next, flags, pages, nr, + true) != 1) return 0; } else if (!gup_fast_pmd_range(pudp, pud, addr, next, flags, pages, nr)) @@ -3333,7 +3348,8 @@ static int gup_fast_p4d_range(pgd_t *pgdp, pgd_t pgd, unsigned long addr, BUILD_BUG_ON(p4d_leaf(p4d)); if (unlikely(is_hugepd(__hugepd(p4d_val(p4d))))) { if (gup_hugepd(NULL, __hugepd(p4d_val(p4d)), addr, - P4D_SHIFT, next, flags, pages, nr) != 1) + P4D_SHIFT, next, flags, pages, nr, + true) != 1) return 0; } else if (!gup_fast_pud_range(p4dp, p4d, addr, next, flags, pages, nr)) @@ -3362,7 +3378,8 @@ static void gup_fast_pgd_range(unsigned long addr, unsigned long end, return; } else if (unlikely(is_hugepd(__hugepd(pgd_val(pgd))))) { if (gup_hugepd(NULL, __hugepd(pgd_val(pgd)), addr, - PGDIR_SHIFT, next, flags, pages, nr) != 1) + PGDIR_SHIFT, next, flags, pages, nr, + true) != 1) return; } else if (!gup_fast_p4d_range(pgdp, pgd, addr, next, flags, pages, nr)) diff --git a/mm/huge_memory.c b/mm/huge_memory.c index db7946a0a28c..2120f7478e55 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -1331,7 +1331,7 @@ struct page *follow_devmap_pmd(struct vm_area_struct *vma, unsigned long addr, if (!*pgmap) return ERR_PTR(-EFAULT); page = pfn_to_page(pfn); - ret = try_grab_page(page, flags); + ret = try_grab_folio(page_folio(page), 1, flags); if (ret) page = ERR_PTR(ret); diff --git a/mm/hugetlb.c b/mm/hugetlb.c index f35abff8be60..43e1af868cfd 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -1625,13 +1625,10 @@ static inline void destroy_compound_gigantic_folio(struct folio *folio, * folio appears as just a compound page. Otherwise, wait until after * allocating vmemmap to clear the flag. * - * A reference is held on the folio, except in the case of demote. - * * Must be called with hugetlb lock held. */ -static void __remove_hugetlb_folio(struct hstate *h, struct folio *folio, - bool adjust_surplus, - bool demote) +static void remove_hugetlb_folio(struct hstate *h, struct folio *folio, + bool adjust_surplus) { int nid = folio_nid(folio); @@ -1645,6 +1642,7 @@ static void __remove_hugetlb_folio(struct hstate *h, struct folio *folio, list_del(&folio->lru); if (folio_test_hugetlb_freed(folio)) { + folio_clear_hugetlb_freed(folio); h->free_huge_pages--; h->free_huge_pages_node[nid]--; } @@ -1661,33 +1659,13 @@ static void __remove_hugetlb_folio(struct hstate *h, struct folio *folio, if (!folio_test_hugetlb_vmemmap_optimized(folio)) __folio_clear_hugetlb(folio); - /* - * In the case of demote we do not ref count the page as it will soon - * be turned into a page of smaller size. - */ - if (!demote) - folio_ref_unfreeze(folio, 1); - h->nr_huge_pages--; h->nr_huge_pages_node[nid]--; } -static void remove_hugetlb_folio(struct hstate *h, struct folio *folio, - bool adjust_surplus) -{ - __remove_hugetlb_folio(h, folio, adjust_surplus, false); -} - -static void remove_hugetlb_folio_for_demote(struct hstate *h, struct folio *folio, - bool adjust_surplus) -{ - __remove_hugetlb_folio(h, folio, adjust_surplus, true); -} - static void add_hugetlb_folio(struct hstate *h, struct folio *folio, bool adjust_surplus) { - int zeroed; int nid = folio_nid(folio); VM_BUG_ON_FOLIO(!folio_test_hugetlb_vmemmap_optimized(folio), folio); @@ -1711,21 +1689,6 @@ static void add_hugetlb_folio(struct hstate *h, struct folio *folio, */ folio_set_hugetlb_vmemmap_optimized(folio); - /* - * This folio is about to be managed by the hugetlb allocator and - * should have no users. Drop our reference, and check for others - * just in case. - */ - zeroed = folio_put_testzero(folio); - if (unlikely(!zeroed)) - /* - * It is VERY unlikely soneone else has taken a ref - * on the folio. In this case, we simply return as - * free_huge_folio() will be called when this other ref - * is dropped. - */ - return; - arch_clear_hugetlb_flags(folio); enqueue_hugetlb_folio(h, folio); } @@ -1763,13 +1726,6 @@ static void __update_and_free_hugetlb_folio(struct hstate *h, } /* - * Move PageHWPoison flag from head page to the raw error pages, - * which makes any healthy subpages reusable. - */ - if (unlikely(folio_test_hwpoison(folio))) - folio_clear_hugetlb_hwpoison(folio); - - /* * If vmemmap pages were allocated above, then we need to clear the * hugetlb flag under the hugetlb lock. */ @@ -1780,6 +1736,15 @@ static void __update_and_free_hugetlb_folio(struct hstate *h, } /* + * Move PageHWPoison flag from head page to the raw error pages, + * which makes any healthy subpages reusable. + */ + if (unlikely(folio_test_hwpoison(folio))) + folio_clear_hugetlb_hwpoison(folio); + + folio_ref_unfreeze(folio, 1); + + /* * Non-gigantic pages demoted from CMA allocated gigantic pages * need to be given back to CMA in free_gigantic_folio. */ @@ -2197,6 +2162,9 @@ static struct folio *alloc_buddy_hugetlb_folio(struct hstate *h, nid = numa_mem_id(); retry: folio = __folio_alloc(gfp_mask, order, nid, nmask); + /* Ensure hugetlb folio won't have large_rmappable flag set. */ + if (folio) + folio_clear_large_rmappable(folio); if (folio && !folio_ref_freeze(folio, 1)) { folio_put(folio); @@ -3079,11 +3047,8 @@ retry: free_new: spin_unlock_irq(&hugetlb_lock); - if (new_folio) { - /* Folio has a zero ref count, but needs a ref to be freed */ - folio_ref_unfreeze(new_folio, 1); + if (new_folio) update_and_free_hugetlb_folio(h, new_folio, false); - } return ret; } @@ -3938,7 +3903,7 @@ static int demote_free_hugetlb_folio(struct hstate *h, struct folio *folio) target_hstate = size_to_hstate(PAGE_SIZE << h->demote_order); - remove_hugetlb_folio_for_demote(h, folio, false); + remove_hugetlb_folio(h, folio, false); spin_unlock_irq(&hugetlb_lock); /* @@ -3952,7 +3917,6 @@ static int demote_free_hugetlb_folio(struct hstate *h, struct folio *folio) if (rc) { /* Allocation of vmemmmap failed, we can not demote folio */ spin_lock_irq(&hugetlb_lock); - folio_ref_unfreeze(folio, 1); add_hugetlb_folio(h, folio, false); return rc; } diff --git a/mm/hugetlb_vmemmap.c b/mm/hugetlb_vmemmap.c index b9a55322e52c..8193906515c6 100644 --- a/mm/hugetlb_vmemmap.c +++ b/mm/hugetlb_vmemmap.c @@ -446,6 +446,8 @@ static int __hugetlb_vmemmap_restore_folio(const struct hstate *h, unsigned long vmemmap_reuse; VM_WARN_ON_ONCE_FOLIO(!folio_test_hugetlb(folio), folio); + VM_WARN_ON_ONCE_FOLIO(folio_ref_count(folio), folio); + if (!folio_test_hugetlb_vmemmap_optimized(folio)) return 0; @@ -481,6 +483,9 @@ static int __hugetlb_vmemmap_restore_folio(const struct hstate *h, */ int hugetlb_vmemmap_restore_folio(const struct hstate *h, struct folio *folio) { + /* avoid writes from page_ref_add_unless() while unfolding vmemmap */ + synchronize_rcu(); + return __hugetlb_vmemmap_restore_folio(h, folio, 0); } @@ -505,6 +510,9 @@ long hugetlb_vmemmap_restore_folios(const struct hstate *h, long restored = 0; long ret = 0; + /* avoid writes from page_ref_add_unless() while unfolding vmemmap */ + synchronize_rcu(); + list_for_each_entry_safe(folio, t_folio, folio_list, lru) { if (folio_test_hugetlb_vmemmap_optimized(folio)) { ret = __hugetlb_vmemmap_restore_folio(h, folio, @@ -550,6 +558,8 @@ static int __hugetlb_vmemmap_optimize_folio(const struct hstate *h, unsigned long vmemmap_reuse; VM_WARN_ON_ONCE_FOLIO(!folio_test_hugetlb(folio), folio); + VM_WARN_ON_ONCE_FOLIO(folio_ref_count(folio), folio); + if (!vmemmap_should_optimize_folio(h, folio)) return ret; @@ -601,6 +611,9 @@ void hugetlb_vmemmap_optimize_folio(const struct hstate *h, struct folio *folio) { LIST_HEAD(vmemmap_pages); + /* avoid writes from page_ref_add_unless() while folding vmemmap */ + synchronize_rcu(); + __hugetlb_vmemmap_optimize_folio(h, folio, &vmemmap_pages, 0); free_vmemmap_page_list(&vmemmap_pages); } @@ -644,6 +657,9 @@ void hugetlb_vmemmap_optimize_folios(struct hstate *h, struct list_head *folio_l flush_tlb_all(); + /* avoid writes from page_ref_add_unless() while folding vmemmap */ + synchronize_rcu(); + list_for_each_entry(folio, folio_list, lru) { int ret; diff --git a/mm/internal.h b/mm/internal.h index 6902b7dd8509..cc2c5e07fad3 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -1182,8 +1182,8 @@ int migrate_device_coherent_page(struct page *page); /* * mm/gup.c */ -struct folio *try_grab_folio(struct page *page, int refs, unsigned int flags); -int __must_check try_grab_page(struct page *page, unsigned int flags); +int __must_check try_grab_folio(struct folio *folio, int refs, + unsigned int flags); /* * mm/huge_memory.c diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 71fe2a95b8bd..8f2f1bb18c9c 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -7823,17 +7823,6 @@ void mem_cgroup_migrate(struct folio *old, struct folio *new) /* Transfer the charge and the css ref */ commit_charge(new, memcg); - /* - * If the old folio is a large folio and is in the split queue, it needs - * to be removed from the split queue now, in case getting an incorrect - * split queue in destroy_large_folio() after the memcg of the old folio - * is cleared. - * - * In addition, the old folio is about to be freed after migration, so - * removing from the split queue a bit earlier seems reasonable. - */ - if (folio_test_large(old) && folio_test_large_rmappable(old)) - folio_undo_large_rmappable(old); old->memcg_data = 0; } diff --git a/mm/migrate.c b/mm/migrate.c index 20cb9f5f7446..a8c6f466e33a 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -415,6 +415,15 @@ int folio_migrate_mapping(struct address_space *mapping, if (folio_ref_count(folio) != expected_count) return -EAGAIN; + /* Take off deferred split queue while frozen and memcg set */ + if (folio_test_large(folio) && + folio_test_large_rmappable(folio)) { + if (!folio_ref_freeze(folio, expected_count)) + return -EAGAIN; + folio_undo_large_rmappable(folio); + folio_ref_unfreeze(folio, expected_count); + } + /* No turning back from here */ newfolio->index = folio->index; newfolio->mapping = folio->mapping; @@ -433,6 +442,10 @@ int folio_migrate_mapping(struct address_space *mapping, return -EAGAIN; } + /* Take off deferred split queue while frozen and memcg set */ + if (folio_test_large(folio) && folio_test_large_rmappable(folio)) + folio_undo_large_rmappable(folio); + /* * Now we know that no one else is looking at the folio: * no turning back from here. diff --git a/mm/readahead.c b/mm/readahead.c index c1b23989d9ca..817b2a352d78 100644 --- a/mm/readahead.c +++ b/mm/readahead.c @@ -503,11 +503,11 @@ void page_cache_ra_order(struct readahead_control *ractl, limit = min(limit, index + ra->size - 1); - if (new_order < MAX_PAGECACHE_ORDER) { + if (new_order < MAX_PAGECACHE_ORDER) new_order += 2; - new_order = min_t(unsigned int, MAX_PAGECACHE_ORDER, new_order); - new_order = min_t(unsigned int, new_order, ilog2(ra->size)); - } + + new_order = min_t(unsigned int, MAX_PAGECACHE_ORDER, new_order); + new_order = min_t(unsigned int, new_order, ilog2(ra->size)); /* See comment in page_cache_ra_unbounded() */ nofs = memalloc_nofs_save(); diff --git a/mm/shmem.c b/mm/shmem.c index a8b181a63402..c1befe046c7e 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -541,8 +541,9 @@ static bool shmem_confirm_swap(struct address_space *mapping, static int shmem_huge __read_mostly = SHMEM_HUGE_NEVER; -bool shmem_is_huge(struct inode *inode, pgoff_t index, bool shmem_huge_force, - struct mm_struct *mm, unsigned long vm_flags) +static bool __shmem_is_huge(struct inode *inode, pgoff_t index, + bool shmem_huge_force, struct mm_struct *mm, + unsigned long vm_flags) { loff_t i_size; @@ -573,6 +574,16 @@ bool shmem_is_huge(struct inode *inode, pgoff_t index, bool shmem_huge_force, } } +bool shmem_is_huge(struct inode *inode, pgoff_t index, + bool shmem_huge_force, struct mm_struct *mm, + unsigned long vm_flags) +{ + if (HPAGE_PMD_ORDER > MAX_PAGECACHE_ORDER) + return false; + + return __shmem_is_huge(inode, index, shmem_huge_force, mm, vm_flags); +} + #if defined(CONFIG_SYSFS) static int shmem_parse_huge(const char *str) { diff --git a/mm/vmalloc.c b/mm/vmalloc.c index d0cbdd7c1e5b..e34ea860153f 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -2543,7 +2543,15 @@ static DEFINE_PER_CPU(struct vmap_block_queue, vmap_block_queue); static struct xarray * addr_to_vb_xa(unsigned long addr) { - int index = (addr / VMAP_BLOCK_SIZE) % num_possible_cpus(); + int index = (addr / VMAP_BLOCK_SIZE) % nr_cpu_ids; + + /* + * Please note, nr_cpu_ids points on a highest set + * possible bit, i.e. we never invoke cpumask_next() + * if an index points on it which is nr_cpu_ids - 1. + */ + if (!cpu_possible(index)) + index = cpumask_next(index, cpu_possible_mask); return &per_cpu(vmap_block_queue, index).vmap_blocks; } diff --git a/mm/workingset.c b/mm/workingset.c index c22adb93622a..a2b28e356e68 100644 --- a/mm/workingset.c +++ b/mm/workingset.c @@ -412,10 +412,12 @@ void *workingset_eviction(struct folio *folio, struct mem_cgroup *target_memcg) * @file: whether the corresponding folio is from the file lru. * @workingset: where the workingset value unpacked from shadow should * be stored. + * @flush: whether to flush cgroup rstat. * * Return: true if the shadow is for a recently evicted folio; false otherwise. */ -bool workingset_test_recent(void *shadow, bool file, bool *workingset) +bool workingset_test_recent(void *shadow, bool file, bool *workingset, + bool flush) { struct mem_cgroup *eviction_memcg; struct lruvec *eviction_lruvec; @@ -467,10 +469,16 @@ bool workingset_test_recent(void *shadow, bool file, bool *workingset) /* * Flush stats (and potentially sleep) outside the RCU read section. + * + * Note that workingset_test_recent() itself might be called in RCU read + * section (for e.g, in cachestat) - these callers need to skip flushing + * stats (via the flush argument). + * * XXX: With per-memcg flushing and thresholding, is ratelimiting * still needed here? */ - mem_cgroup_flush_stats_ratelimited(eviction_memcg); + if (flush) + mem_cgroup_flush_stats_ratelimited(eviction_memcg); eviction_lruvec = mem_cgroup_lruvec(eviction_memcg, pgdat); refault = atomic_long_read(&eviction_lruvec->nonresident_age); @@ -558,7 +566,7 @@ void workingset_refault(struct folio *folio, void *shadow) mod_lruvec_state(lruvec, WORKINGSET_REFAULT_BASE + file, nr); - if (!workingset_test_recent(shadow, file, &workingset)) + if (!workingset_test_recent(shadow, file, &workingset, true)) return; folio_set_active(folio); diff --git a/net/core/datagram.c b/net/core/datagram.c index b2f391edb9e8..a40f733b37d7 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -423,11 +423,12 @@ static int __skb_datagram_iter(const struct sk_buff *skb, int offset, if (copy > len) copy = len; + n = 0; skb_frag_foreach_page(frag, skb_frag_off(frag) + offset - start, copy, p, p_off, p_len, copied) { vaddr = kmap_local_page(p); - n = INDIRECT_CALL_1(cb, simple_copy_to_iter, + n += INDIRECT_CALL_1(cb, simple_copy_to_iter, vaddr + p_off, p_len, data, to); kunmap_local(vaddr); } diff --git a/net/core/skmsg.c b/net/core/skmsg.c index fd20aae30be2..bbf40b999713 100644 --- a/net/core/skmsg.c +++ b/net/core/skmsg.c @@ -434,7 +434,8 @@ int sk_msg_recvmsg(struct sock *sk, struct sk_psock *psock, struct msghdr *msg, page = sg_page(sge); if (copied + copy > len) copy = len - copied; - copy = copy_page_to_iter(page, sge->offset, copy, iter); + if (copy) + copy = copy_page_to_iter(page, sge->offset, copy, iter); if (!copy) { copied = copied ? copied : -EFAULT; goto out; diff --git a/net/ethtool/linkstate.c b/net/ethtool/linkstate.c index b2de2108b356..34d76e87847d 100644 --- a/net/ethtool/linkstate.c +++ b/net/ethtool/linkstate.c @@ -37,6 +37,8 @@ static int linkstate_get_sqi(struct net_device *dev) mutex_lock(&phydev->lock); if (!phydev->drv || !phydev->drv->get_sqi) ret = -EOPNOTSUPP; + else if (!phydev->link) + ret = -ENETDOWN; else ret = phydev->drv->get_sqi(phydev); mutex_unlock(&phydev->lock); @@ -55,6 +57,8 @@ static int linkstate_get_sqi_max(struct net_device *dev) mutex_lock(&phydev->lock); if (!phydev->drv || !phydev->drv->get_sqi_max) ret = -EOPNOTSUPP; + else if (!phydev->link) + ret = -ENETDOWN; else ret = phydev->drv->get_sqi_max(phydev); mutex_unlock(&phydev->lock); @@ -62,6 +66,17 @@ static int linkstate_get_sqi_max(struct net_device *dev) return ret; }; +static bool linkstate_sqi_critical_error(int sqi) +{ + return sqi < 0 && sqi != -EOPNOTSUPP && sqi != -ENETDOWN; +} + +static bool linkstate_sqi_valid(struct linkstate_reply_data *data) +{ + return data->sqi >= 0 && data->sqi_max >= 0 && + data->sqi <= data->sqi_max; +} + static int linkstate_get_link_ext_state(struct net_device *dev, struct linkstate_reply_data *data) { @@ -93,12 +108,12 @@ static int linkstate_prepare_data(const struct ethnl_req_info *req_base, data->link = __ethtool_get_link(dev); ret = linkstate_get_sqi(dev); - if (ret < 0 && ret != -EOPNOTSUPP) + if (linkstate_sqi_critical_error(ret)) goto out; data->sqi = ret; ret = linkstate_get_sqi_max(dev); - if (ret < 0 && ret != -EOPNOTSUPP) + if (linkstate_sqi_critical_error(ret)) goto out; data->sqi_max = ret; @@ -136,11 +151,10 @@ static int linkstate_reply_size(const struct ethnl_req_info *req_base, len = nla_total_size(sizeof(u8)) /* LINKSTATE_LINK */ + 0; - if (data->sqi != -EOPNOTSUPP) - len += nla_total_size(sizeof(u32)); - - if (data->sqi_max != -EOPNOTSUPP) - len += nla_total_size(sizeof(u32)); + if (linkstate_sqi_valid(data)) { + len += nla_total_size(sizeof(u32)); /* LINKSTATE_SQI */ + len += nla_total_size(sizeof(u32)); /* LINKSTATE_SQI_MAX */ + } if (data->link_ext_state_provided) len += nla_total_size(sizeof(u8)); /* LINKSTATE_EXT_STATE */ @@ -164,13 +178,14 @@ static int linkstate_fill_reply(struct sk_buff *skb, nla_put_u8(skb, ETHTOOL_A_LINKSTATE_LINK, !!data->link)) return -EMSGSIZE; - if (data->sqi != -EOPNOTSUPP && - nla_put_u32(skb, ETHTOOL_A_LINKSTATE_SQI, data->sqi)) - return -EMSGSIZE; + if (linkstate_sqi_valid(data)) { + if (nla_put_u32(skb, ETHTOOL_A_LINKSTATE_SQI, data->sqi)) + return -EMSGSIZE; - if (data->sqi_max != -EOPNOTSUPP && - nla_put_u32(skb, ETHTOOL_A_LINKSTATE_SQI_MAX, data->sqi_max)) - return -EMSGSIZE; + if (nla_put_u32(skb, ETHTOOL_A_LINKSTATE_SQI_MAX, + data->sqi_max)) + return -EMSGSIZE; + } if (data->link_ext_state_provided) { if (nla_put_u8(skb, ETHTOOL_A_LINKSTATE_EXT_STATE, diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 47dacb575f74..e0f54b9be850 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -2129,8 +2129,16 @@ void tcp_clear_retrans(struct tcp_sock *tp) static inline void tcp_init_undo(struct tcp_sock *tp) { tp->undo_marker = tp->snd_una; + /* Retransmission still in flight may cause DSACKs later. */ - tp->undo_retrans = tp->retrans_out ? : -1; + /* First, account for regular retransmits in flight: */ + tp->undo_retrans = tp->retrans_out; + /* Next, account for TLP retransmits in flight: */ + if (tp->tlp_high_seq && tp->tlp_retrans) + tp->undo_retrans++; + /* Finally, avoid 0, because undo_retrans==0 means "can undo now": */ + if (!tp->undo_retrans) + tp->undo_retrans = -1; } static bool tcp_is_rack(const struct sock *sk) @@ -2209,6 +2217,7 @@ void tcp_enter_loss(struct sock *sk) tcp_set_ca_state(sk, TCP_CA_Loss); tp->high_seq = tp->snd_nxt; + tp->tlp_high_seq = 0; tcp_ecn_queue_cwr(tp); /* F-RTO RFC5682 sec 3.1 step 1: retransmit SND.UNA if no previous diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index ab4b6de0e069..4d40615dc8fc 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -479,15 +479,26 @@ static bool tcp_rtx_probe0_timed_out(const struct sock *sk, const struct sk_buff *skb, u32 rtx_delta) { + const struct inet_connection_sock *icsk = inet_csk(sk); + u32 user_timeout = READ_ONCE(icsk->icsk_user_timeout); const struct tcp_sock *tp = tcp_sk(sk); - const int timeout = TCP_RTO_MAX * 2; + int timeout = TCP_RTO_MAX * 2; s32 rcv_delta; + if (user_timeout) { + /* If user application specified a TCP_USER_TIMEOUT, + * it does not want win 0 packets to 'reset the timer' + * while retransmits are not making progress. + */ + if (rtx_delta > user_timeout) + return true; + timeout = min_t(u32, timeout, msecs_to_jiffies(user_timeout)); + } /* Note: timer interrupt might have been delayed by at least one jiffy, * and tp->rcv_tstamp might very well have been written recently. * rcv_delta can thus be negative. */ - rcv_delta = inet_csk(sk)->icsk_timeout - tp->rcv_tstamp; + rcv_delta = icsk->icsk_timeout - tp->rcv_tstamp; if (rcv_delta <= timeout) return false; @@ -532,8 +543,6 @@ void tcp_retransmit_timer(struct sock *sk) if (WARN_ON_ONCE(!skb)) return; - tp->tlp_high_seq = 0; - if (!tp->snd_wnd && !sock_flag(sk, SOCK_DEAD) && !((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV))) { /* Receiver dastardly shrinks window. Our retransmits diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index ed97df6af14d..49c622e743e8 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -326,6 +326,8 @@ found: goto fail_unlock; } + sock_set_flag(sk, SOCK_RCU_FREE); + sk_add_node_rcu(sk, &hslot->head); hslot->count++; sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); @@ -342,7 +344,7 @@ found: hslot2->count++; spin_unlock(&hslot2->lock); } - sock_set_flag(sk, SOCK_RCU_FREE); + error = 0; fail_unlock: spin_unlock_bh(&hslot->lock); diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 4a07834809bb..481ee78e77bc 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -3866,6 +3866,15 @@ static void nf_tables_rule_release(const struct nft_ctx *ctx, struct nft_rule *r nf_tables_rule_destroy(ctx, rule); } +/** nft_chain_validate - loop detection and hook validation + * + * @ctx: context containing call depth and base chain + * @chain: chain to validate + * + * Walk through the rules of the given chain and chase all jumps/gotos + * and set lookups until either the jump limit is hit or all reachable + * chains have been validated. + */ int nft_chain_validate(const struct nft_ctx *ctx, const struct nft_chain *chain) { struct nft_expr *expr, *last; @@ -3887,6 +3896,9 @@ int nft_chain_validate(const struct nft_ctx *ctx, const struct nft_chain *chain) if (!expr->ops->validate) continue; + /* This may call nft_chain_validate() recursively, + * callers that do so must increment ctx->level. + */ err = expr->ops->validate(ctx, expr, &data); if (err < 0) return err; @@ -10879,150 +10891,6 @@ int nft_chain_validate_hooks(const struct nft_chain *chain, } EXPORT_SYMBOL_GPL(nft_chain_validate_hooks); -/* - * Loop detection - walk through the ruleset beginning at the destination chain - * of a new jump until either the source chain is reached (loop) or all - * reachable chains have been traversed. - * - * The loop check is performed whenever a new jump verdict is added to an - * expression or verdict map or a verdict map is bound to a new chain. - */ - -static int nf_tables_check_loops(const struct nft_ctx *ctx, - const struct nft_chain *chain); - -static int nft_check_loops(const struct nft_ctx *ctx, - const struct nft_set_ext *ext) -{ - const struct nft_data *data; - int ret; - - data = nft_set_ext_data(ext); - switch (data->verdict.code) { - case NFT_JUMP: - case NFT_GOTO: - ret = nf_tables_check_loops(ctx, data->verdict.chain); - break; - default: - ret = 0; - break; - } - - return ret; -} - -static int nf_tables_loop_check_setelem(const struct nft_ctx *ctx, - struct nft_set *set, - const struct nft_set_iter *iter, - struct nft_elem_priv *elem_priv) -{ - const struct nft_set_ext *ext = nft_set_elem_ext(set, elem_priv); - - if (!nft_set_elem_active(ext, iter->genmask)) - return 0; - - if (nft_set_ext_exists(ext, NFT_SET_EXT_FLAGS) && - *nft_set_ext_flags(ext) & NFT_SET_ELEM_INTERVAL_END) - return 0; - - return nft_check_loops(ctx, ext); -} - -static int nft_set_catchall_loops(const struct nft_ctx *ctx, - struct nft_set *set) -{ - u8 genmask = nft_genmask_next(ctx->net); - struct nft_set_elem_catchall *catchall; - struct nft_set_ext *ext; - int ret = 0; - - list_for_each_entry_rcu(catchall, &set->catchall_list, list) { - ext = nft_set_elem_ext(set, catchall->elem); - if (!nft_set_elem_active(ext, genmask)) - continue; - - ret = nft_check_loops(ctx, ext); - if (ret < 0) - return ret; - } - - return ret; -} - -static int nf_tables_check_loops(const struct nft_ctx *ctx, - const struct nft_chain *chain) -{ - const struct nft_rule *rule; - const struct nft_expr *expr, *last; - struct nft_set *set; - struct nft_set_binding *binding; - struct nft_set_iter iter; - - if (ctx->chain == chain) - return -ELOOP; - - if (fatal_signal_pending(current)) - return -EINTR; - - list_for_each_entry(rule, &chain->rules, list) { - nft_rule_for_each_expr(expr, last, rule) { - struct nft_immediate_expr *priv; - const struct nft_data *data; - int err; - - if (strcmp(expr->ops->type->name, "immediate")) - continue; - - priv = nft_expr_priv(expr); - if (priv->dreg != NFT_REG_VERDICT) - continue; - - data = &priv->data; - switch (data->verdict.code) { - case NFT_JUMP: - case NFT_GOTO: - err = nf_tables_check_loops(ctx, - data->verdict.chain); - if (err < 0) - return err; - break; - default: - break; - } - } - } - - list_for_each_entry(set, &ctx->table->sets, list) { - if (!nft_is_active_next(ctx->net, set)) - continue; - if (!(set->flags & NFT_SET_MAP) || - set->dtype != NFT_DATA_VERDICT) - continue; - - list_for_each_entry(binding, &set->bindings, list) { - if (!(binding->flags & NFT_SET_MAP) || - binding->chain != chain) - continue; - - iter.genmask = nft_genmask_next(ctx->net); - iter.type = NFT_ITER_UPDATE; - iter.skip = 0; - iter.count = 0; - iter.err = 0; - iter.fn = nf_tables_loop_check_setelem; - - set->ops->walk(ctx, set, &iter); - if (!iter.err) - iter.err = nft_set_catchall_loops(ctx, set); - - if (iter.err < 0) - return iter.err; - } - } - - return 0; -} - /** * nft_parse_u32_check - fetch u32 attribute and check for maximum value * @@ -11135,7 +11003,7 @@ static int nft_validate_register_store(const struct nft_ctx *ctx, if (data != NULL && (data->verdict.code == NFT_GOTO || data->verdict.code == NFT_JUMP)) { - err = nf_tables_check_loops(ctx, data->verdict.chain); + err = nft_chain_validate(ctx, data->verdict.chain); if (err < 0) return err; } diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c index f1c31757e496..55e28e1da66e 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue.c @@ -325,7 +325,7 @@ static void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict) hooks = nf_hook_entries_head(net, pf, entry->state.hook); i = entry->hook_index; - if (WARN_ON_ONCE(!hooks || i >= hooks->num_hook_entries)) { + if (!hooks || i >= hooks->num_hook_entries) { kfree_skb_reason(skb, SKB_DROP_REASON_NETFILTER_DROP); nf_queue_entry_free(entry); return; diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c index a6b7c514a181..113b907da0f7 100644 --- a/net/sched/act_ct.c +++ b/net/sched/act_ct.c @@ -1081,6 +1081,14 @@ do_nat: err = nf_conntrack_confirm(skb); if (err != NF_ACCEPT) goto nf_error; + + /* The ct may be dropped if a clash has been resolved, + * so it's necessary to retrieve it from skb again to + * prevent UAF. + */ + ct = nf_ct_get(skb, &ctinfo); + if (!ct) + skip_add = true; } if (!skip_add) diff --git a/net/sched/sch_ingress.c b/net/sched/sch_ingress.c index c2ef9dcf91d2..cc6051d4f2ef 100644 --- a/net/sched/sch_ingress.c +++ b/net/sched/sch_ingress.c @@ -91,7 +91,7 @@ static int ingress_init(struct Qdisc *sch, struct nlattr *opt, entry = tcx_entry_fetch_or_create(dev, true, &created); if (!entry) return -ENOMEM; - tcx_miniq_set_active(entry, true); + tcx_miniq_inc(entry); mini_qdisc_pair_init(&q->miniqp, sch, &tcx_entry(entry)->miniq); if (created) tcx_entry_update(dev, entry, true); @@ -121,7 +121,7 @@ static void ingress_destroy(struct Qdisc *sch) tcf_block_put_ext(q->block, sch, &q->block_info); if (entry) { - tcx_miniq_set_active(entry, false); + tcx_miniq_dec(entry); if (!tcx_entry_is_active(entry)) { tcx_entry_update(dev, NULL, true); tcx_entry_free(entry); @@ -257,7 +257,7 @@ static int clsact_init(struct Qdisc *sch, struct nlattr *opt, entry = tcx_entry_fetch_or_create(dev, true, &created); if (!entry) return -ENOMEM; - tcx_miniq_set_active(entry, true); + tcx_miniq_inc(entry); mini_qdisc_pair_init(&q->miniqp_ingress, sch, &tcx_entry(entry)->miniq); if (created) tcx_entry_update(dev, entry, true); @@ -276,7 +276,7 @@ static int clsact_init(struct Qdisc *sch, struct nlattr *opt, entry = tcx_entry_fetch_or_create(dev, false, &created); if (!entry) return -ENOMEM; - tcx_miniq_set_active(entry, true); + tcx_miniq_inc(entry); mini_qdisc_pair_init(&q->miniqp_egress, sch, &tcx_entry(entry)->miniq); if (created) tcx_entry_update(dev, entry, false); @@ -302,7 +302,7 @@ static void clsact_destroy(struct Qdisc *sch) tcf_block_put_ext(q->egress_block, sch, &q->egress_block_info); if (ingress_entry) { - tcx_miniq_set_active(ingress_entry, false); + tcx_miniq_dec(ingress_entry); if (!tcx_entry_is_active(ingress_entry)) { tcx_entry_update(dev, NULL, true); tcx_entry_free(ingress_entry); @@ -310,7 +310,7 @@ static void clsact_destroy(struct Qdisc *sch) } if (egress_entry) { - tcx_miniq_set_active(egress_entry, false); + tcx_miniq_dec(egress_entry); if (!tcx_entry_is_active(egress_entry)) { tcx_entry_update(dev, NULL, false); tcx_entry_free(egress_entry); diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index dfc353eea8ed..0e1691316f42 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -2441,6 +2441,13 @@ static void xs_tcp_setup_socket(struct work_struct *work) transport->srcport = 0; status = -EAGAIN; break; + case -EPERM: + /* Happens, for instance, if a BPF program is preventing + * the connect. Remap the error so upper layers can better + * deal with it. + */ + status = -ECONNREFUSED; + fallthrough; case -EINVAL: /* Happens, for instance, if the user specified a link * local IPv6 address without a scope-id. diff --git a/security/integrity/ima/ima_fs.c b/security/integrity/ima/ima_fs.c index abdd22007ed8..e4a79a9b2d58 100644 --- a/security/integrity/ima/ima_fs.c +++ b/security/integrity/ima/ima_fs.c @@ -427,8 +427,6 @@ static void __init remove_securityfs_measurement_lists(struct dentry **lists) kfree(lists); } - - securityfs_measurement_list_count = 0; } static int __init create_securityfs_measurement_lists(void) @@ -625,6 +623,7 @@ out: securityfs_remove(binary_runtime_measurements); remove_securityfs_measurement_lists(ascii_securityfs_measurement_lists); remove_securityfs_measurement_lists(binary_securityfs_measurement_lists); + securityfs_measurement_list_count = 0; securityfs_remove(ima_symlink); securityfs_remove(ima_dir); diff --git a/tools/perf/util/comm.c b/tools/perf/util/comm.c index 233f2b6edf52..49b79cf0c5cc 100644 --- a/tools/perf/util/comm.c +++ b/tools/perf/util/comm.c @@ -86,14 +86,6 @@ static struct comm_str *comm_str__new(const char *str) return result; } -static int comm_str__cmp(const void *_lhs, const void *_rhs) -{ - const struct comm_str *lhs = *(const struct comm_str * const *)_lhs; - const struct comm_str *rhs = *(const struct comm_str * const *)_rhs; - - return strcmp(comm_str__str(lhs), comm_str__str(rhs)); -} - static int comm_str__search(const void *_key, const void *_member) { const char *key = _key; @@ -169,9 +161,24 @@ static struct comm_str *comm_strs__findnew(const char *str) } result = comm_str__new(str); if (result) { - comm_strs->strs[comm_strs->num_strs++] = result; - qsort(comm_strs->strs, comm_strs->num_strs, sizeof(struct comm_str *), - comm_str__cmp); + int low = 0, high = comm_strs->num_strs - 1; + int insert = comm_strs->num_strs; /* Default to inserting at the end. */ + + while (low <= high) { + int mid = low + (high - low) / 2; + int cmp = strcmp(comm_str__str(comm_strs->strs[mid]), str); + + if (cmp < 0) { + low = mid + 1; + } else { + high = mid - 1; + insert = mid; + } + } + memmove(&comm_strs->strs[insert + 1], &comm_strs->strs[insert], + (comm_strs->num_strs - insert) * sizeof(struct comm_str *)); + comm_strs->num_strs++; + comm_strs->strs[insert] = result; } } up_write(&comm_strs->lock); diff --git a/tools/perf/util/dsos.c b/tools/perf/util/dsos.c index ab3d0c01dd63..a69a9c661200 100644 --- a/tools/perf/util/dsos.c +++ b/tools/perf/util/dsos.c @@ -203,11 +203,27 @@ int __dsos__add(struct dsos *dsos, struct dso *dso) dsos->dsos = temp; dsos->allocated = to_allocate; } - dsos->dsos[dsos->cnt++] = dso__get(dso); - if (dsos->cnt >= 2 && dsos->sorted) { - dsos->sorted = dsos__cmp_long_name_id_short_name(&dsos->dsos[dsos->cnt - 2], - &dsos->dsos[dsos->cnt - 1]) - <= 0; + if (!dsos->sorted) { + dsos->dsos[dsos->cnt++] = dso__get(dso); + } else { + int low = 0, high = dsos->cnt - 1; + int insert = dsos->cnt; /* Default to inserting at the end. */ + + while (low <= high) { + int mid = low + (high - low) / 2; + int cmp = dsos__cmp_long_name_id_short_name(&dsos->dsos[mid], &dso); + + if (cmp < 0) { + low = mid + 1; + } else { + high = mid - 1; + insert = mid; + } + } + memmove(&dsos->dsos[insert + 1], &dsos->dsos[insert], + (dsos->cnt - insert) * sizeof(struct dso *)); + dsos->cnt++; + dsos->dsos[insert] = dso__get(dso); } dso__set_dsos(dso, dsos); return 0; diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config index 5291e97df749..4ca84c8d9116 100644 --- a/tools/testing/selftests/bpf/config +++ b/tools/testing/selftests/bpf/config @@ -58,9 +58,12 @@ CONFIG_MPLS=y CONFIG_MPLS_IPTUNNEL=y CONFIG_MPLS_ROUTING=y CONFIG_MPTCP=y +CONFIG_NET_ACT_SKBMOD=y +CONFIG_NET_CLS=y CONFIG_NET_CLS_ACT=y CONFIG_NET_CLS_BPF=y CONFIG_NET_CLS_FLOWER=y +CONFIG_NET_CLS_MATCHALL=y CONFIG_NET_FOU=y CONFIG_NET_FOU_IP_TUNNELS=y CONFIG_NET_IPGRE=y diff --git a/tools/testing/selftests/bpf/prog_tests/tc_links.c b/tools/testing/selftests/bpf/prog_tests/tc_links.c index bc9841144685..1af9ec1149aa 100644 --- a/tools/testing/selftests/bpf/prog_tests/tc_links.c +++ b/tools/testing/selftests/bpf/prog_tests/tc_links.c @@ -9,6 +9,8 @@ #define ping_cmd "ping -q -c1 -w1 127.0.0.1 > /dev/null" #include "test_tc_link.skel.h" + +#include "netlink_helpers.h" #include "tc_helpers.h" void serial_test_tc_links_basic(void) @@ -1787,6 +1789,65 @@ void serial_test_tc_links_ingress(void) test_tc_links_ingress(BPF_TCX_INGRESS, false, false); } +struct qdisc_req { + struct nlmsghdr n; + struct tcmsg t; + char buf[1024]; +}; + +static int qdisc_replace(int ifindex, const char *kind, bool block) +{ + struct rtnl_handle rth = { .fd = -1 }; + struct qdisc_req req; + int err; + + err = rtnl_open(&rth, 0); + if (!ASSERT_OK(err, "open_rtnetlink")) + return err; + + memset(&req, 0, sizeof(req)); + req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct tcmsg)); + req.n.nlmsg_flags = NLM_F_CREATE | NLM_F_REPLACE | NLM_F_REQUEST; + req.n.nlmsg_type = RTM_NEWQDISC; + req.t.tcm_family = AF_UNSPEC; + req.t.tcm_ifindex = ifindex; + req.t.tcm_parent = 0xfffffff1; + + addattr_l(&req.n, sizeof(req), TCA_KIND, kind, strlen(kind) + 1); + if (block) + addattr32(&req.n, sizeof(req), TCA_INGRESS_BLOCK, 1); + + err = rtnl_talk(&rth, &req.n, NULL); + ASSERT_OK(err, "talk_rtnetlink"); + rtnl_close(&rth); + return err; +} + +void serial_test_tc_links_dev_chain0(void) +{ + int err, ifindex; + + ASSERT_OK(system("ip link add dev foo type veth peer name bar"), "add veth"); + ifindex = if_nametoindex("foo"); + ASSERT_NEQ(ifindex, 0, "non_zero_ifindex"); + err = qdisc_replace(ifindex, "ingress", true); + if (!ASSERT_OK(err, "attaching ingress")) + goto cleanup; + ASSERT_OK(system("tc filter add block 1 matchall action skbmod swap mac"), "add block"); + err = qdisc_replace(ifindex, "clsact", false); + if (!ASSERT_OK(err, "attaching clsact")) + goto cleanup; + /* Heuristic: kern_sync_rcu() alone does not work; a wait-time of ~5s + * triggered the issue without the fix reliably 100% of the time. + */ + sleep(5); + ASSERT_OK(system("tc filter add dev foo ingress matchall action skbmod swap mac"), "add filter"); +cleanup: + ASSERT_OK(system("ip link del dev foo"), "del veth"); + ASSERT_EQ(if_nametoindex("foo"), 0, "foo removed"); + ASSERT_EQ(if_nametoindex("bar"), 0, "bar removed"); +} + static void test_tc_links_dev_mixed(int target) { LIBBPF_OPTS(bpf_tc_opts, tc_opts, .handle = 1, .priority = 1); diff --git a/tools/testing/selftests/bpf/prog_tests/timer_lockup.c b/tools/testing/selftests/bpf/prog_tests/timer_lockup.c new file mode 100644 index 000000000000..871d16cb95cf --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/timer_lockup.c @@ -0,0 +1,91 @@ +// SPDX-License-Identifier: GPL-2.0 + +#define _GNU_SOURCE +#include <sched.h> +#include <test_progs.h> +#include <pthread.h> +#include <network_helpers.h> + +#include "timer_lockup.skel.h" + +static long cpu; +static int *timer1_err; +static int *timer2_err; +static bool skip; + +volatile int k = 0; + +static void *timer_lockup_thread(void *arg) +{ + LIBBPF_OPTS(bpf_test_run_opts, opts, + .data_in = &pkt_v4, + .data_size_in = sizeof(pkt_v4), + .repeat = 1000, + ); + int i, prog_fd = *(int *)arg; + cpu_set_t cpuset; + + CPU_ZERO(&cpuset); + CPU_SET(__sync_fetch_and_add(&cpu, 1), &cpuset); + ASSERT_OK(pthread_setaffinity_np(pthread_self(), sizeof(cpuset), + &cpuset), + "cpu affinity"); + + for (i = 0; !READ_ONCE(*timer1_err) && !READ_ONCE(*timer2_err); i++) { + bpf_prog_test_run_opts(prog_fd, &opts); + /* Skip the test if we can't reproduce the race in a reasonable + * amount of time. + */ + if (i > 50) { + WRITE_ONCE(skip, true); + break; + } + } + + return NULL; +} + +void test_timer_lockup(void) +{ + int timer1_prog, timer2_prog; + struct timer_lockup *skel; + pthread_t thrds[2]; + void *ret; + + skel = timer_lockup__open_and_load(); + if (!ASSERT_OK_PTR(skel, "timer_lockup__open_and_load")) + return; + + timer1_prog = bpf_program__fd(skel->progs.timer1_prog); + timer2_prog = bpf_program__fd(skel->progs.timer2_prog); + + timer1_err = &skel->bss->timer1_err; + timer2_err = &skel->bss->timer2_err; + + if (!ASSERT_OK(pthread_create(&thrds[0], NULL, timer_lockup_thread, + &timer1_prog), + "pthread_create thread1")) + goto out; + if (!ASSERT_OK(pthread_create(&thrds[1], NULL, timer_lockup_thread, + &timer2_prog), + "pthread_create thread2")) { + pthread_exit(&thrds[0]); + goto out; + } + + pthread_join(thrds[1], &ret); + pthread_join(thrds[0], &ret); + + if (skip) { + test__skip(); + goto out; + } + + if (*timer1_err != -EDEADLK && *timer1_err != 0) + ASSERT_FAIL("timer1_err bad value"); + if (*timer2_err != -EDEADLK && *timer2_err != 0) + ASSERT_FAIL("timer2_err bad value"); +out: + timer_lockup__destroy(skel); + return; +} diff --git a/tools/testing/selftests/bpf/progs/timer_lockup.c b/tools/testing/selftests/bpf/progs/timer_lockup.c new file mode 100644 index 000000000000..3e520133281e --- /dev/null +++ b/tools/testing/selftests/bpf/progs/timer_lockup.c @@ -0,0 +1,87 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <linux/bpf.h> +#include <time.h> +#include <errno.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> +#include "bpf_misc.h" + +char _license[] SEC("license") = "GPL"; + +struct elem { + struct bpf_timer t; +}; + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 1); + __type(key, int); + __type(value, struct elem); +} timer1_map SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 1); + __type(key, int); + __type(value, struct elem); +} timer2_map SEC(".maps"); + +int timer1_err; +int timer2_err; + +static int timer_cb1(void *map, int *k, struct elem *v) +{ + struct bpf_timer *timer; + int key = 0; + + timer = bpf_map_lookup_elem(&timer2_map, &key); + if (timer) + timer2_err = bpf_timer_cancel(timer); + + return 0; +} + +static int timer_cb2(void *map, int *k, struct elem *v) +{ + struct bpf_timer *timer; + int key = 0; + + timer = bpf_map_lookup_elem(&timer1_map, &key); + if (timer) + timer1_err = bpf_timer_cancel(timer); + + return 0; +} + +SEC("tc") +int timer1_prog(void *ctx) +{ + struct bpf_timer *timer; + int key = 0; + + timer = bpf_map_lookup_elem(&timer1_map, &key); + if (timer) { + bpf_timer_init(timer, &timer1_map, CLOCK_BOOTTIME); + bpf_timer_set_callback(timer, timer_cb1); + bpf_timer_start(timer, 1, BPF_F_TIMER_CPU_PIN); + } + + return 0; +} + +SEC("tc") +int timer2_prog(void *ctx) +{ + struct bpf_timer *timer; + int key = 0; + + timer = bpf_map_lookup_elem(&timer2_map, &key); + if (timer) { + bpf_timer_init(timer, &timer2_map, CLOCK_BOOTTIME); + bpf_timer_set_callback(timer, timer_cb2); + bpf_timer_start(timer, 1, BPF_F_TIMER_CPU_PIN); + } + + return 0; +} diff --git a/tools/testing/selftests/powerpc/flags.mk b/tools/testing/selftests/powerpc/flags.mk index b909bee3cb2a..abb9e58d95c4 100644 --- a/tools/testing/selftests/powerpc/flags.mk +++ b/tools/testing/selftests/powerpc/flags.mk @@ -5,8 +5,5 @@ GIT_VERSION := $(shell git describe --always --long --dirty || echo "unknown") export GIT_VERSION endif -ifeq ($(CFLAGS),) -CFLAGS := -std=gnu99 -O2 -Wall -Werror -DGIT_VERSION='"$(GIT_VERSION)"' -I$(selfdir)/powerpc/include $(CFLAGS) +CFLAGS := -std=gnu99 -O2 -Wall -Werror -DGIT_VERSION='"$(GIT_VERSION)"' -I$(selfdir)/powerpc/include $(USERCFLAGS) export CFLAGS -endif - diff --git a/tools/testing/selftests/riscv/sigreturn/sigreturn.c b/tools/testing/selftests/riscv/sigreturn/sigreturn.c index 62397d5934f1..ed351a1cb917 100644 --- a/tools/testing/selftests/riscv/sigreturn/sigreturn.c +++ b/tools/testing/selftests/riscv/sigreturn/sigreturn.c @@ -51,7 +51,7 @@ static int vector_sigreturn(int data, void (*handler)(int, siginfo_t *, void *)) asm(".option push \n\ .option arch, +v \n\ - vsetivli x0, 1, e32, ta, ma \n\ + vsetivli x0, 1, e32, m1, ta, ma \n\ vmv.s.x v0, %1 \n\ # Generate SIGSEGV \n\ lw a0, 0(x0) \n\ diff --git a/tools/testing/selftests/timens/exec.c b/tools/testing/selftests/timens/exec.c index e40dc5be2f66..d12ff955de0d 100644 --- a/tools/testing/selftests/timens/exec.c +++ b/tools/testing/selftests/timens/exec.c @@ -30,7 +30,7 @@ int main(int argc, char *argv[]) for (i = 0; i < 2; i++) { _gettime(CLOCK_MONOTONIC, &tst, i); - if (abs(tst.tv_sec - now.tv_sec) > 5) + if (labs(tst.tv_sec - now.tv_sec) > 5) return pr_fail("%ld %ld\n", now.tv_sec, tst.tv_sec); } return 0; @@ -50,7 +50,7 @@ int main(int argc, char *argv[]) for (i = 0; i < 2; i++) { _gettime(CLOCK_MONOTONIC, &tst, i); - if (abs(tst.tv_sec - now.tv_sec) > 5) + if (labs(tst.tv_sec - now.tv_sec) > 5) return pr_fail("%ld %ld\n", now.tv_sec, tst.tv_sec); } @@ -70,7 +70,7 @@ int main(int argc, char *argv[]) /* Check that a child process is in the new timens. */ for (i = 0; i < 2; i++) { _gettime(CLOCK_MONOTONIC, &tst, i); - if (abs(tst.tv_sec - now.tv_sec - OFFSET) > 5) + if (labs(tst.tv_sec - now.tv_sec - OFFSET) > 5) return pr_fail("%ld %ld\n", now.tv_sec + OFFSET, tst.tv_sec); } diff --git a/tools/testing/selftests/timens/timer.c b/tools/testing/selftests/timens/timer.c index 5e7f0051bd7b..5b939f59dfa4 100644 --- a/tools/testing/selftests/timens/timer.c +++ b/tools/testing/selftests/timens/timer.c @@ -56,7 +56,7 @@ int run_test(int clockid, struct timespec now) return pr_perror("timerfd_gettime"); elapsed = new_value.it_value.tv_sec; - if (abs(elapsed - 3600) > 60) { + if (llabs(elapsed - 3600) > 60) { ksft_test_result_fail("clockid: %d elapsed: %lld\n", clockid, elapsed); return 1; diff --git a/tools/testing/selftests/timens/timerfd.c b/tools/testing/selftests/timens/timerfd.c index 9edd43d6b2c1..a4196bbd6e33 100644 --- a/tools/testing/selftests/timens/timerfd.c +++ b/tools/testing/selftests/timens/timerfd.c @@ -61,7 +61,7 @@ int run_test(int clockid, struct timespec now) return pr_perror("timerfd_gettime(%d)", clockid); elapsed = new_value.it_value.tv_sec; - if (abs(elapsed - 3600) > 60) { + if (llabs(elapsed - 3600) > 60) { ksft_test_result_fail("clockid: %d elapsed: %lld\n", clockid, elapsed); return 1; diff --git a/tools/testing/selftests/timens/vfork_exec.c b/tools/testing/selftests/timens/vfork_exec.c index beb7614941fb..5b8907bf451d 100644 --- a/tools/testing/selftests/timens/vfork_exec.c +++ b/tools/testing/selftests/timens/vfork_exec.c @@ -32,7 +32,7 @@ static void *tcheck(void *_args) for (i = 0; i < 2; i++) { _gettime(CLOCK_MONOTONIC, &tst, i); - if (abs(tst.tv_sec - now->tv_sec) > 5) { + if (labs(tst.tv_sec - now->tv_sec) > 5) { pr_fail("%s: in-thread: unexpected value: %ld (%ld)\n", args->tst_name, tst.tv_sec, now->tv_sec); return (void *)1UL; @@ -64,7 +64,7 @@ static int check(char *tst_name, struct timespec *now) for (i = 0; i < 2; i++) { _gettime(CLOCK_MONOTONIC, &tst, i); - if (abs(tst.tv_sec - now->tv_sec) > 5) + if (labs(tst.tv_sec - now->tv_sec) > 5) return pr_fail("%s: unexpected value: %ld (%ld)\n", tst_name, tst.tv_sec, now->tv_sec); } diff --git a/tools/testing/selftests/vDSO/Makefile b/tools/testing/selftests/vDSO/Makefile index d53a4d8008f9..98d8ba2afa00 100644 --- a/tools/testing/selftests/vDSO/Makefile +++ b/tools/testing/selftests/vDSO/Makefile @@ -1,35 +1,30 @@ # SPDX-License-Identifier: GPL-2.0 -include ../lib.mk - uname_M := $(shell uname -m 2>/dev/null || echo not) ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/x86/ -e s/x86_64/x86/) -TEST_GEN_PROGS := $(OUTPUT)/vdso_test_gettimeofday $(OUTPUT)/vdso_test_getcpu -TEST_GEN_PROGS += $(OUTPUT)/vdso_test_abi -TEST_GEN_PROGS += $(OUTPUT)/vdso_test_clock_getres +TEST_GEN_PROGS := vdso_test_gettimeofday +TEST_GEN_PROGS += vdso_test_getcpu +TEST_GEN_PROGS += vdso_test_abi +TEST_GEN_PROGS += vdso_test_clock_getres ifeq ($(ARCH),$(filter $(ARCH),x86 x86_64)) -TEST_GEN_PROGS += $(OUTPUT)/vdso_standalone_test_x86 +TEST_GEN_PROGS += vdso_standalone_test_x86 endif -TEST_GEN_PROGS += $(OUTPUT)/vdso_test_correctness +TEST_GEN_PROGS += vdso_test_correctness CFLAGS := -std=gnu99 -CFLAGS_vdso_standalone_test_x86 := -nostdlib -fno-asynchronous-unwind-tables -fno-stack-protector -LDFLAGS_vdso_test_correctness := -ldl + ifeq ($(CONFIG_X86_32),y) LDLIBS += -lgcc_s endif -all: $(TEST_GEN_PROGS) +include ../lib.mk $(OUTPUT)/vdso_test_gettimeofday: parse_vdso.c vdso_test_gettimeofday.c $(OUTPUT)/vdso_test_getcpu: parse_vdso.c vdso_test_getcpu.c $(OUTPUT)/vdso_test_abi: parse_vdso.c vdso_test_abi.c $(OUTPUT)/vdso_test_clock_getres: vdso_test_clock_getres.c + $(OUTPUT)/vdso_standalone_test_x86: vdso_standalone_test_x86.c parse_vdso.c - $(CC) $(CFLAGS) $(CFLAGS_vdso_standalone_test_x86) \ - vdso_standalone_test_x86.c parse_vdso.c \ - -o $@ +$(OUTPUT)/vdso_standalone_test_x86: CFLAGS +=-nostdlib -fno-asynchronous-unwind-tables -fno-stack-protector + $(OUTPUT)/vdso_test_correctness: vdso_test_correctness.c - $(CC) $(CFLAGS) \ - vdso_test_correctness.c \ - -o $@ \ - $(LDFLAGS_vdso_test_correctness) +$(OUTPUT)/vdso_test_correctness: LDFLAGS += -ldl diff --git a/tools/testing/selftests/vDSO/parse_vdso.c b/tools/testing/selftests/vDSO/parse_vdso.c index 413f75620a35..4ae417372e9e 100644 --- a/tools/testing/selftests/vDSO/parse_vdso.c +++ b/tools/testing/selftests/vDSO/parse_vdso.c @@ -55,14 +55,20 @@ static struct vdso_info ELF(Verdef) *verdef; } vdso_info; -/* Straight from the ELF specification. */ -static unsigned long elf_hash(const unsigned char *name) +/* + * Straight from the ELF specification...and then tweaked slightly, in order to + * avoid a few clang warnings. + */ +static unsigned long elf_hash(const char *name) { unsigned long h = 0, g; - while (*name) + const unsigned char *uch_name = (const unsigned char *)name; + + while (*uch_name) { - h = (h << 4) + *name++; - if (g = h & 0xf0000000) + h = (h << 4) + *uch_name++; + g = h & 0xf0000000; + if (g) h ^= g >> 24; h &= ~g; } diff --git a/tools/testing/selftests/vDSO/vdso_standalone_test_x86.c b/tools/testing/selftests/vDSO/vdso_standalone_test_x86.c index 8a44ff973ee1..27f6fdf11969 100644 --- a/tools/testing/selftests/vDSO/vdso_standalone_test_x86.c +++ b/tools/testing/selftests/vDSO/vdso_standalone_test_x86.c @@ -18,7 +18,7 @@ #include "parse_vdso.h" -/* We need a libc functions... */ +/* We need some libc functions... */ int strcmp(const char *a, const char *b) { /* This implementation is buggy: it never returns -1. */ @@ -34,6 +34,20 @@ int strcmp(const char *a, const char *b) return 0; } +/* + * The clang build needs this, although gcc does not. + * Stolen from lib/string.c. + */ +void *memcpy(void *dest, const void *src, size_t count) +{ + char *tmp = dest; + const char *s = src; + + while (count--) + *tmp++ = *s++; + return dest; +} + /* ...and two syscalls. This is x86-specific. */ static inline long x86_syscall3(long nr, long a0, long a1, long a2) { @@ -70,7 +84,7 @@ void to_base10(char *lastdig, time_t n) } } -__attribute__((externally_visible)) void c_main(void **stack) +void c_main(void **stack) { /* Parse the stack */ long argc = (long)*stack; diff --git a/tools/testing/selftests/wireguard/qemu/Makefile b/tools/testing/selftests/wireguard/qemu/Makefile index e95bd56b332f..35856b11c143 100644 --- a/tools/testing/selftests/wireguard/qemu/Makefile +++ b/tools/testing/selftests/wireguard/qemu/Makefile @@ -109,9 +109,9 @@ KERNEL_ARCH := x86_64 KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/arch/x86/boot/bzImage QEMU_VPORT_RESULT := virtio-serial-device ifeq ($(HOST_ARCH),$(ARCH)) -QEMU_MACHINE := -cpu host -machine microvm,accel=kvm,pit=off,pic=off,rtc=off -no-acpi +QEMU_MACHINE := -cpu host -machine microvm,accel=kvm,pit=off,pic=off,rtc=off,acpi=off else -QEMU_MACHINE := -cpu max -machine microvm -no-acpi +QEMU_MACHINE := -cpu max -machine microvm,acpi=off endif else ifeq ($(ARCH),i686) CHOST := i686-linux-musl @@ -120,9 +120,9 @@ KERNEL_ARCH := x86 KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/arch/x86/boot/bzImage QEMU_VPORT_RESULT := virtio-serial-device ifeq ($(subst x86_64,i686,$(HOST_ARCH)),$(ARCH)) -QEMU_MACHINE := -cpu host -machine microvm,accel=kvm,pit=off,pic=off,rtc=off -no-acpi +QEMU_MACHINE := -cpu host -machine microvm,accel=kvm,pit=off,pic=off,rtc=off,acpi=off else -QEMU_MACHINE := -cpu coreduo -machine microvm -no-acpi +QEMU_MACHINE := -cpu coreduo -machine microvm,acpi=off endif else ifeq ($(ARCH),mips64) CHOST := mips64-linux-musl |