diff options
120 files changed, 3429 insertions, 786 deletions
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index d8fabfe37d5e..c5e7bb4babf0 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -429,6 +429,9 @@ arm64.nosme [ARM64] Unconditionally disable Scalable Matrix Extension support + arm64.nomops [ARM64] Unconditionally disable Memory Copy and Memory + Set instructions support + ataflop= [HW,M68k] atarimouse= [HW,MOUSE] Atari Mouse diff --git a/Documentation/admin-guide/perf/hisi-pmu.rst b/Documentation/admin-guide/perf/hisi-pmu.rst index 546979360513..e0174d20809a 100644 --- a/Documentation/admin-guide/perf/hisi-pmu.rst +++ b/Documentation/admin-guide/perf/hisi-pmu.rst @@ -56,14 +56,14 @@ Example usage of perf:: For HiSilicon uncore PMU v2 whose identifier is 0x30, the topology is the same as PMU v1, but some new functions are added to the hardware. -(a) L3C PMU supports filtering by core/thread within the cluster which can be +1. L3C PMU supports filtering by core/thread within the cluster which can be specified as a bitmap:: $# perf stat -a -e hisi_sccl3_l3c0/config=0x02,tt_core=0x3/ sleep 5 This will only count the operations from core/thread 0 and 1 in this cluster. -(b) Tracetag allow the user to chose to count only read, write or atomic +2. Tracetag allow the user to chose to count only read, write or atomic operations via the tt_req parameeter in perf. The default value counts all operations. tt_req is 3bits, 3'b100 represents read operations, 3'b101 represents write operations, 3'b110 represents atomic store operations and @@ -73,14 +73,16 @@ represents write operations, 3'b110 represents atomic store operations and This will only count the read operations in this cluster. -(c) Datasrc allows the user to check where the data comes from. It is 5 bits. +3. Datasrc allows the user to check where the data comes from. It is 5 bits. Some important codes are as follows: -5'b00001: comes from L3C in this die; -5'b01000: comes from L3C in the cross-die; -5'b01001: comes from L3C which is in another socket; -5'b01110: comes from the local DDR; -5'b01111: comes from the cross-die DDR; -5'b10000: comes from cross-socket DDR; + +- 5'b00001: comes from L3C in this die; +- 5'b01000: comes from L3C in the cross-die; +- 5'b01001: comes from L3C which is in another socket; +- 5'b01110: comes from the local DDR; +- 5'b01111: comes from the cross-die DDR; +- 5'b10000: comes from cross-socket DDR; + etc, it is mainly helpful to find that the data source is nearest from the CPU cores. If datasrc_cfg is used in the multi-chips, the datasrc_skt shall be configured in perf command:: @@ -88,15 +90,25 @@ configured in perf command:: $# perf stat -a -e hisi_sccl3_l3c0/config=0xb9,datasrc_cfg=0xE/, hisi_sccl3_l3c0/config=0xb9,datasrc_cfg=0xF/ sleep 5 -(d)Some HiSilicon SoCs encapsulate multiple CPU and IO dies. Each CPU die +4. Some HiSilicon SoCs encapsulate multiple CPU and IO dies. Each CPU die contains several Compute Clusters (CCLs). The I/O dies are called Super I/O clusters (SICL) containing multiple I/O clusters (ICLs). Each CCL/ICL in the SoC has a unique ID. Each ID is 11bits, include a 6-bit SCCL-ID and 5-bit CCL/ICL-ID. For I/O die, the ICL-ID is followed by: -5'b00000: I/O_MGMT_ICL; -5'b00001: Network_ICL; -5'b00011: HAC_ICL; -5'b10000: PCIe_ICL; + +- 5'b00000: I/O_MGMT_ICL; +- 5'b00001: Network_ICL; +- 5'b00011: HAC_ICL; +- 5'b10000: PCIe_ICL; + +5. uring_channel: UC PMU events 0x47~0x59 supports filtering by tx request +uring channel. It is 2 bits. Some important codes are as follows: + +- 2'b11: count the events which sent to the uring_ext (MATA) channel; +- 2'b01: is the same as 2'b11; +- 2'b10: count the events which sent to the uring (non-MATA) channel; +- 2'b00: default value, count the events which sent to the both uring and + uring_ext channel; Users could configure IDs to count data come from specific CCL/ICL, by setting srcid_cmd & srcid_msk, and data desitined for specific CCL/ICL by setting diff --git a/Documentation/arm64/acpi_object_usage.rst b/Documentation/arm64/acpi_object_usage.rst index 484ef9676653..1da22200fdf8 100644 --- a/Documentation/arm64/acpi_object_usage.rst +++ b/Documentation/arm64/acpi_object_usage.rst @@ -17,16 +17,37 @@ For ACPI on arm64, tables also fall into the following categories: - Recommended: BERT, EINJ, ERST, HEST, PCCT, SSDT - - Optional: BGRT, CPEP, CSRT, DBG2, DRTM, ECDT, FACS, FPDT, IBFT, - IORT, MCHI, MPST, MSCT, NFIT, PMTT, RASF, SBST, SLIT, SPMI, SRAT, - STAO, TCPA, TPM2, UEFI, XENV + - Optional: AGDI, BGRT, CEDT, CPEP, CSRT, DBG2, DRTM, ECDT, FACS, FPDT, + HMAT, IBFT, IORT, MCHI, MPAM, MPST, MSCT, NFIT, PMTT, PPTT, RASF, SBST, + SDEI, SLIT, SPMI, SRAT, STAO, TCPA, TPM2, UEFI, XENV - - Not supported: BOOT, DBGP, DMAR, ETDT, HPET, IVRS, LPIT, MSDM, OEMx, - PSDT, RSDT, SLIC, WAET, WDAT, WDRT, WPBT + - Not supported: AEST, APMT, BOOT, DBGP, DMAR, ETDT, HPET, IVRS, LPIT, + MSDM, OEMx, PDTT, PSDT, RAS2, RSDT, SLIC, WAET, WDAT, WDRT, WPBT ====== ======================================================================== Table Usage for ARMv8 Linux ====== ======================================================================== +AEST Signature Reserved (signature == "AEST") + + **Arm Error Source Table** + + This table informs the OS of any error nodes in the system that are + compliant with the Arm RAS architecture. + +AGDI Signature Reserved (signature == "AGDI") + + **Arm Generic diagnostic Dump and Reset Device Interface Table** + + This table describes a non-maskable event, that is used by the platform + firmware, to request the OS to generate a diagnostic dump and reset the device. + +APMT Signature Reserved (signature == "APMT") + + **Arm Performance Monitoring Table** + + This table describes the properties of PMU support implmented by + components in the system. + BERT Section 18.3 (signature == "BERT") **Boot Error Record Table** @@ -47,6 +68,13 @@ BGRT Section 5.2.22 (signature == "BGRT") Optional, not currently supported, with no real use-case for an ARM server. +CEDT Signature Reserved (signature == "CEDT") + + **CXL Early Discovery Table** + + This table allows the OS to discover any CXL Host Bridges and the Host + Bridge registers. + CPEP Section 5.2.18 (signature == "CPEP") **Corrected Platform Error Polling table** @@ -184,6 +212,15 @@ HEST Section 18.3.2 (signature == "HEST") Must be supplied if RAS support is provided by the platform. It is recommended this table be supplied. +HMAT Section 5.2.28 (signature == "HMAT") + + **Heterogeneous Memory Attribute Table** + + This table describes the memory attributes, such as memory side cache + attributes and bandwidth and latency details, related to Memory Proximity + Domains. The OS uses this information to optimize the system memory + configuration. + HPET Signature Reserved (signature == "HPET") **High Precision Event timer Table** @@ -241,6 +278,13 @@ MCHI Signature Reserved (signature == "MCHI") Optional, not currently supported. +MPAM Signature Reserved (signature == "MPAM") + + **Memory Partitioning And Monitoring table** + + This table allows the OS to discover the MPAM controls implemented by + the subsystems. + MPST Section 5.2.21 (signature == "MPST") **Memory Power State Table** @@ -281,18 +325,39 @@ PCCT Section 14.1 (signature == "PCCT) Recommend for use on arm64; use of PCC is recommended when using CPPC to control performance and power for platform processors. +PDTT Section 5.2.29 (signature == "PDTT") + + **Platform Debug Trigger Table** + + This table describes PCC channels used to gather debug logs of + non-architectural features. + + PMTT Section 5.2.21.12 (signature == "PMTT") **Platform Memory Topology Table** Optional, not currently supported. +PPTT Section 5.2.30 (signature == "PPTT") + + **Processor Properties Topology Table** + + This table provides the processor and cache topology. + PSDT Section 5.2.11.3 (signature == "PSDT") **Persistent System Description Table** Obsolete table, will not be supported. +RAS2 Section 5.2.21 (signature == "RAS2") + + **RAS Features 2 table** + + This table provides interfaces for the RAS capabilities implemented in + the platform. + RASF Section 5.2.20 (signature == "RASF") **RAS Feature table** @@ -318,6 +383,12 @@ SBST Section 5.2.14 (signature == "SBST") Optional, not currently supported. +SDEI Signature Reserved (signature == "SDEI") + + **Software Delegated Exception Interface table** + + This table advertises the presence of the SDEI interface. + SLIC Signature Reserved (signature == "SLIC") **Software LIcensing table** diff --git a/Documentation/arm64/arm-acpi.rst b/Documentation/arm64/arm-acpi.rst index 47ecb9930dde..37ec5e9b1575 100644 --- a/Documentation/arm64/arm-acpi.rst +++ b/Documentation/arm64/arm-acpi.rst @@ -1,40 +1,41 @@ -===================== -ACPI on ARMv8 Servers -===================== - -ACPI can be used for ARMv8 general purpose servers designed to follow -the ARM SBSA (Server Base System Architecture) [0] and SBBR (Server -Base Boot Requirements) [1] specifications. Please note that the SBBR -can be retrieved simply by visiting [1], but the SBSA is currently only -available to those with an ARM login due to ARM IP licensing concerns. - -The ARMv8 kernel implements the reduced hardware model of ACPI version +=================== +ACPI on Arm systems +=================== + +ACPI can be used for Armv8 and Armv9 systems designed to follow +the BSA (Arm Base System Architecture) [0] and BBR (Arm +Base Boot Requirements) [1] specifications. Both BSA and BBR are publicly +accessible documents. +Arm Servers, in addition to being BSA compliant, comply with a set +of rules defined in SBSA (Server Base System Architecture) [2]. + +The Arm kernel implements the reduced hardware model of ACPI version 5.1 or later. Links to the specification and all external documents it refers to are managed by the UEFI Forum. The specification is available at http://www.uefi.org/specifications and documents referenced by the specification can be found via http://www.uefi.org/acpi. -If an ARMv8 system does not meet the requirements of the SBSA and SBBR, +If an Arm system does not meet the requirements of the BSA and BBR, or cannot be described using the mechanisms defined in the required ACPI specifications, then ACPI may not be a good fit for the hardware. While the documents mentioned above set out the requirements for building -industry-standard ARMv8 servers, they also apply to more than one operating +industry-standard Arm systems, they also apply to more than one operating system. The purpose of this document is to describe the interaction between -ACPI and Linux only, on an ARMv8 system -- that is, what Linux expects of +ACPI and Linux only, on an Arm system -- that is, what Linux expects of ACPI and what ACPI can expect of Linux. -Why ACPI on ARM? +Why ACPI on Arm? ---------------- Before examining the details of the interface between ACPI and Linux, it is useful to understand why ACPI is being used. Several technologies already exist in Linux for describing non-enumerable hardware, after all. In this -section we summarize a blog post [2] from Grant Likely that outlines the -reasoning behind ACPI on ARMv8 servers. Actually, we snitch a good portion +section we summarize a blog post [3] from Grant Likely that outlines the +reasoning behind ACPI on Arm systems. Actually, we snitch a good portion of the summary text almost directly, to be honest. -The short form of the rationale for ACPI on ARM is: +The short form of the rationale for ACPI on Arm is: - ACPI’s byte code (AML) allows the platform to encode hardware behavior, while DT explicitly does not support this. For hardware vendors, being @@ -47,7 +48,7 @@ The short form of the rationale for ACPI on ARM is: - In the enterprise server environment, ACPI has established bindings (such as for RAS) which are currently used in production systems. DT does not. - Such bindings could be defined in DT at some point, but doing so means ARM + Such bindings could be defined in DT at some point, but doing so means Arm and x86 would end up using completely different code paths in both firmware and the kernel. @@ -108,7 +109,7 @@ recent version of the kernel. Relationship with Device Tree ----------------------------- -ACPI support in drivers and subsystems for ARMv8 should never be mutually +ACPI support in drivers and subsystems for Arm should never be mutually exclusive with DT support at compile time. At boot time the kernel will only use one description method depending on @@ -121,11 +122,11 @@ time). Booting using ACPI tables ------------------------- -The only defined method for passing ACPI tables to the kernel on ARMv8 +The only defined method for passing ACPI tables to the kernel on Arm is via the UEFI system configuration table. Just so it is explicit, this means that ACPI is only supported on platforms that boot via UEFI. -When an ARMv8 system boots, it can either have DT information, ACPI tables, +When an Arm system boots, it can either have DT information, ACPI tables, or in some very unusual cases, both. If no command line parameters are used, the kernel will try to use DT for device enumeration; if there is no DT present, the kernel will try to use ACPI tables, but only if they are present. @@ -169,7 +170,7 @@ hardware reduced mode must be set to zero. For the ACPI core to operate properly, and in turn provide the information the kernel needs to configure devices, it expects to find the following -tables (all section numbers refer to the ACPI 6.1 specification): +tables (all section numbers refer to the ACPI 6.5 specification): - RSDP (Root System Description Pointer), section 5.2.5 @@ -184,20 +185,76 @@ tables (all section numbers refer to the ACPI 6.1 specification): - GTDT (Generic Timer Description Table), section 5.2.24 + - PPTT (Processor Properties Topology Table), section 5.2.30 + + - DBG2 (DeBuG port table 2), section 5.2.6, specifically Table 5-6. + + - APMT (Arm Performance Monitoring unit Table), section 5.2.6, specifically Table 5-6. + + - AGDI (Arm Generic diagnostic Dump and Reset Device Interface Table), section 5.2.6, specifically Table 5-6. + - If PCI is supported, the MCFG (Memory mapped ConFiGuration - Table), section 5.2.6, specifically Table 5-31. + Table), section 5.2.6, specifically Table 5-6. - If booting without a console=<device> kernel parameter is supported, the SPCR (Serial Port Console Redirection table), - section 5.2.6, specifically Table 5-31. + section 5.2.6, specifically Table 5-6. - If necessary to describe the I/O topology, SMMUs and GIC ITSs, the IORT (Input Output Remapping Table, section 5.2.6, specifically - Table 5-31). + Table 5-6). + + - If NUMA is supported, the following tables are required: + + - SRAT (System Resource Affinity Table), section 5.2.16 + + - SLIT (System Locality distance Information Table), section 5.2.17 + + - If NUMA is supported, and the system contains heterogeneous memory, + the HMAT (Heterogeneous Memory Attribute Table), section 5.2.28. + + - If the ACPI Platform Error Interfaces are required, the following + tables are conditionally required: + + - BERT (Boot Error Record Table, section 18.3.1) + + - EINJ (Error INJection table, section 18.6.1) + + - ERST (Error Record Serialization Table, section 18.5) + + - HEST (Hardware Error Source Table, section 18.3.2) + + - SDEI (Software Delegated Exception Interface table, section 5.2.6, + specifically Table 5-6) + + - AEST (Arm Error Source Table, section 5.2.6, + specifically Table 5-6) + + - RAS2 (ACPI RAS2 feature table, section 5.2.21) + + - If the system contains controllers using PCC channel, the + PCCT (Platform Communications Channel Table), section 14.1 + + - If the system contains a controller to capture board-level system state, + and communicates with the host via PCC, the PDTT (Platform Debug Trigger + Table), section 5.2.29. + + - If NVDIMM is supported, the NFIT (NVDIMM Firmware Interface Table), section 5.2.26 + + - If video framebuffer is present, the BGRT (Boot Graphics Resource Table), section 5.2.23 + + - If IPMI is implemented, the SPMI (Server Platform Management Interface), + section 5.2.6, specifically Table 5-6. + + - If the system contains a CXL Host Bridge, the CEDT (CXL Early Discovery + Table), section 5.2.6, specifically Table 5-6. + + - If the system supports MPAM, the MPAM (Memory Partitioning And Monitoring table), section 5.2.6, + specifically Table 5-6. + + - If the system lacks persistent storage, the IBFT (ISCSI Boot Firmware + Table), section 5.2.6, specifically Table 5-6. - - If NUMA is supported, the SRAT (System Resource Affinity Table) - and SLIT (System Locality distance Information Table), sections - 5.2.16 and 5.2.17, respectively. If the above tables are not all present, the kernel may or may not be able to boot properly since it may not be able to configure all of the @@ -269,16 +326,14 @@ Drivers should look for device properties in the _DSD object ONLY; the _DSD object is described in the ACPI specification section 6.2.5, but this only describes how to define the structure of an object returned via _DSD, and how specific data structures are defined by specific UUIDs. Linux should -only use the _DSD Device Properties UUID [5]: +only use the _DSD Device Properties UUID [4]: - UUID: daffd814-6eba-4d8c-8a91-bc9bbf4aa301 - - https://www.uefi.org/sites/default/files/resources/_DSD-device-properties-UUID.pdf - -The UEFI Forum provides a mechanism for registering device properties [4] -so that they may be used across all operating systems supporting ACPI. -Device properties that have not been registered with the UEFI Forum should -not be used. +Common device properties can be registered by creating a pull request to [4] so +that they may be used across all operating systems supporting ACPI. +Device properties that have not been registered with the UEFI Forum can be used +but not as "uefi-" common properties. Before creating new device properties, check to be sure that they have not been defined before and either registered in the Linux kernel documentation @@ -306,7 +361,7 @@ process. Once registration and review have been completed, the kernel provides an interface for looking up device properties in a manner independent of -whether DT or ACPI is being used. This API should be used [6]; it can +whether DT or ACPI is being used. This API should be used [5]; it can eliminate some duplication of code paths in driver probing functions and discourage divergence between DT bindings and ACPI device properties. @@ -448,15 +503,15 @@ ASWG ---- The ACPI specification changes regularly. During the year 2014, for instance, version 5.1 was released and version 6.0 substantially completed, with most of -the changes being driven by ARM-specific requirements. Proposed changes are +the changes being driven by Arm-specific requirements. Proposed changes are presented and discussed in the ASWG (ACPI Specification Working Group) which is a part of the UEFI Forum. The current version of the ACPI specification -is 6.1 release in January 2016. +is 6.5 release in August 2022. Participation in this group is open to all UEFI members. Please see http://www.uefi.org/workinggroup for details on group membership. -It is the intent of the ARMv8 ACPI kernel code to follow the ACPI specification +It is the intent of the Arm ACPI kernel code to follow the ACPI specification as closely as possible, and to only implement functionality that complies with the released standards from UEFI ASWG. As a practical matter, there will be vendors that provide bad ACPI tables or violate the standards in some way. @@ -470,12 +525,12 @@ likely be willing to assist in submitting ECRs. Linux Code ---------- -Individual items specific to Linux on ARM, contained in the Linux +Individual items specific to Linux on Arm, contained in the Linux source code, are in the list that follows: ACPI_OS_NAME This macro defines the string to be returned when - an ACPI method invokes the _OS method. On ARM64 + an ACPI method invokes the _OS method. On Arm systems, this macro will be "Linux" by default. The command line parameter acpi_os=<string> can be used to set it to some other value. The @@ -490,31 +545,23 @@ Documentation/arm64/acpi_object_usage.rst. References ---------- -[0] http://silver.arm.com - document ARM-DEN-0029, or newer: - "Server Base System Architecture", version 2.3, dated 27 Mar 2014 +[0] https://developer.arm.com/documentation/den0094/latest + document Arm-DEN-0094: "Arm Base System Architecture", version 1.0C, dated 6 Oct 2022 + +[1] https://developer.arm.com/documentation/den0044/latest + Document Arm-DEN-0044: "Arm Base Boot Requirements", version 2.0G, dated 15 Apr 2022 -[1] http://infocenter.arm.com/help/topic/com.arm.doc.den0044a/Server_Base_Boot_Requirements.pdf - Document ARM-DEN-0044A, or newer: "Server Base Boot Requirements, System - Software on ARM Platforms", dated 16 Aug 2014 +[2] https://developer.arm.com/documentation/den0029/latest + Document Arm-DEN-0029: "Arm Server Base System Architecture", version 7.1, dated 06 Oct 2022 -[2] http://www.secretlab.ca/archives/151, +[3] http://www.secretlab.ca/archives/151, 10 Jan 2015, Copyright (c) 2015, Linaro Ltd., written by Grant Likely. -[3] AMD ACPI for Seattle platform documentation - http://amd-dev.wpengine.netdna-cdn.com/wordpress/media/2012/10/Seattle_ACPI_Guide.pdf - - -[4] http://www.uefi.org/acpi - please see the link for the "ACPI _DSD Device - Property Registry Instructions" - -[5] http://www.uefi.org/acpi - please see the link for the "_DSD (Device - Specific Data) Implementation Guide" +[4] _DSD (Device Specific Data) Implementation Guide + https://github.com/UEFI/DSD-Guide/blob/main/dsd-guide.pdf -[6] Kernel code for the unified device +[5] Kernel code for the unified device property interface can be found in include/linux/property.h and drivers/base/property.c. diff --git a/Documentation/arm64/booting.rst b/Documentation/arm64/booting.rst index ffeccdd6bdac..b57776a68f15 100644 --- a/Documentation/arm64/booting.rst +++ b/Documentation/arm64/booting.rst @@ -379,6 +379,38 @@ Before jumping into the kernel, the following conditions must be met: - SMCR_EL2.EZT0 (bit 30) must be initialised to 0b1. + For CPUs with Memory Copy and Memory Set instructions (FEAT_MOPS): + + - If the kernel is entered at EL1 and EL2 is present: + + - HCRX_EL2.MSCEn (bit 11) must be initialised to 0b1. + + For CPUs with the Extended Translation Control Register feature (FEAT_TCR2): + + - If EL3 is present: + + - SCR_EL3.TCR2En (bit 43) must be initialised to 0b1. + + - If the kernel is entered at EL1 and EL2 is present: + + - HCRX_EL2.TCR2En (bit 14) must be initialised to 0b1. + + For CPUs with the Stage 1 Permission Indirection Extension feature (FEAT_S1PIE): + + - If EL3 is present: + + - SCR_EL3.PIEn (bit 45) must be initialised to 0b1. + + - If the kernel is entered at EL1 and EL2 is present: + + - HFGRTR_EL2.nPIR_EL1 (bit 58) must be initialised to 0b1. + + - HFGWTR_EL2.nPIR_EL1 (bit 58) must be initialised to 0b1. + + - HFGRTR_EL2.nPIRE0_EL1 (bit 57) must be initialised to 0b1. + + - HFGRWR_EL2.nPIRE0_EL1 (bit 57) must be initialised to 0b1. + The requirements described above for CPU mode, caches, MMUs, architected timers, coherency and system registers apply to all CPUs. All CPUs must enter the kernel in the same exception level. Where the values documented diff --git a/Documentation/arm64/cpu-feature-registers.rst b/Documentation/arm64/cpu-feature-registers.rst index c7adc7897df6..4e4625f2455f 100644 --- a/Documentation/arm64/cpu-feature-registers.rst +++ b/Documentation/arm64/cpu-feature-registers.rst @@ -288,6 +288,8 @@ infrastructure: +------------------------------+---------+---------+ | Name | bits | visible | +------------------------------+---------+---------+ + | MOPS | [19-16] | y | + +------------------------------+---------+---------+ | RPRES | [7-4] | y | +------------------------------+---------+---------+ | WFXT | [3-0] | y | diff --git a/Documentation/arm64/elf_hwcaps.rst b/Documentation/arm64/elf_hwcaps.rst index 83e57e4d38e2..8f847d0dcf57 100644 --- a/Documentation/arm64/elf_hwcaps.rst +++ b/Documentation/arm64/elf_hwcaps.rst @@ -302,6 +302,9 @@ HWCAP2_SMEB16B16 HWCAP2_SMEF16F16 Functionality implied by ID_AA64SMFR0_EL1.F16F16 == 0b1 +HWCAP2_MOPS + Functionality implied by ID_AA64ISAR2_EL1.MOPS == 0b0001. + 4. Unused AT_HWCAP bits ----------------------- diff --git a/Documentation/arm64/index.rst b/Documentation/arm64/index.rst index ae21f8118830..d08e924204bf 100644 --- a/Documentation/arm64/index.rst +++ b/Documentation/arm64/index.rst @@ -15,11 +15,13 @@ ARM64 Architecture cpu-feature-registers elf_hwcaps hugetlbpage + kdump legacy_instructions memory memory-tagging-extension perf pointer-authentication + ptdump silicon-errata sme sve diff --git a/Documentation/arm64/kdump.rst b/Documentation/arm64/kdump.rst new file mode 100644 index 000000000000..56a89f45df28 --- /dev/null +++ b/Documentation/arm64/kdump.rst @@ -0,0 +1,92 @@ +======================================= +crashkernel memory reservation on arm64 +======================================= + +Author: Baoquan He <bhe@redhat.com> + +Kdump mechanism is used to capture a corrupted kernel vmcore so that +it can be subsequently analyzed. In order to do this, a preliminarily +reserved memory is needed to pre-load the kdump kernel and boot such +kernel if corruption happens. + +That reserved memory for kdump is adapted to be able to minimally +accommodate the kdump kernel and the user space programs needed for the +vmcore collection. + +Kernel parameter +================ + +Through the kernel parameters below, memory can be reserved accordingly +during the early stage of the first kernel booting so that a continuous +large chunk of memomy can be found. The low memory reservation needs to +be considered if the crashkernel is reserved from the high memory area. + +- crashkernel=size@offset +- crashkernel=size +- crashkernel=size,high crashkernel=size,low + +Low memory and high memory +========================== + +For kdump reservations, low memory is the memory area under a specific +limit, usually decided by the accessible address bits of the DMA-capable +devices needed by the kdump kernel to run. Those devices not related to +vmcore dumping can be ignored. On arm64, the low memory upper bound is +not fixed: it is 1G on the RPi4 platform but 4G on most other systems. +On special kernels built with CONFIG_ZONE_(DMA|DMA32) disabled, the +whole system RAM is low memory. Outside of the low memory described +above, the rest of system RAM is considered high memory. + +Implementation +============== + +1) crashkernel=size@offset +-------------------------- + +The crashkernel memory must be reserved at the user-specified region or +fail if already occupied. + + +2) crashkernel=size +------------------- + +The crashkernel memory region will be reserved in any available position +according to the search order: + +Firstly, the kernel searches the low memory area for an available region +with the specified size. + +If searching for low memory fails, the kernel falls back to searching +the high memory area for an available region of the specified size. If +the reservation in high memory succeeds, a default size reservation in +the low memory will be done. Currently the default size is 128M, +sufficient for the low memory needs of the kdump kernel. + +Note: crashkernel=size is the recommended option for crashkernel kernel +reservations. The user would not need to know the system memory layout +for a specific platform. + +3) crashkernel=size,high crashkernel=size,low +--------------------------------------------- + +crashkernel=size,(high|low) are an important supplement to +crashkernel=size. They allows the user to specify how much memory needs +to be allocated from the high memory and low memory respectively. On +many systems the low memory is precious and crashkernel reservations +from this area should be kept to a minimum. + +To reserve memory for crashkernel=size,high, searching is first +attempted from the high memory region. If the reservation succeeds, the +low memory reservation will be done subsequently. + +If reservation from the high memory failed, the kernel falls back to +searching the low memory with the specified size in crashkernel=,high. +If it succeeds, no further reservation for low memory is needed. + +Notes: + +- If crashkernel=,low is not specified, the default low memory + reservation will be done automatically. + +- if crashkernel=0,low is specified, it means that the low memory + reservation is omitted intentionally. diff --git a/Documentation/arm64/memory.rst b/Documentation/arm64/memory.rst index 2a641ba7be3b..55a55f30eed8 100644 --- a/Documentation/arm64/memory.rst +++ b/Documentation/arm64/memory.rst @@ -33,8 +33,8 @@ AArch64 Linux memory layout with 4KB pages + 4 levels (48-bit):: 0000000000000000 0000ffffffffffff 256TB user ffff000000000000 ffff7fffffffffff 128TB kernel logical memory map [ffff600000000000 ffff7fffffffffff] 32TB [kasan shadow region] - ffff800000000000 ffff800007ffffff 128MB modules - ffff800008000000 fffffbffefffffff 124TB vmalloc + ffff800000000000 ffff80007fffffff 2GB modules + ffff800080000000 fffffbffefffffff 124TB vmalloc fffffbfff0000000 fffffbfffdffffff 224MB fixed mappings (top down) fffffbfffe000000 fffffbfffe7fffff 8MB [guard region] fffffbfffe800000 fffffbffff7fffff 16MB PCI I/O space @@ -50,8 +50,8 @@ AArch64 Linux memory layout with 64KB pages + 3 levels (52-bit with HW support): 0000000000000000 000fffffffffffff 4PB user fff0000000000000 ffff7fffffffffff ~4PB kernel logical memory map [fffd800000000000 ffff7fffffffffff] 512TB [kasan shadow region] - ffff800000000000 ffff800007ffffff 128MB modules - ffff800008000000 fffffbffefffffff 124TB vmalloc + ffff800000000000 ffff80007fffffff 2GB modules + ffff800080000000 fffffbffefffffff 124TB vmalloc fffffbfff0000000 fffffbfffdffffff 224MB fixed mappings (top down) fffffbfffe000000 fffffbfffe7fffff 8MB [guard region] fffffbfffe800000 fffffbffff7fffff 16MB PCI I/O space diff --git a/Documentation/arm64/ptdump.rst b/Documentation/arm64/ptdump.rst new file mode 100644 index 000000000000..5dcfc5d7cddf --- /dev/null +++ b/Documentation/arm64/ptdump.rst @@ -0,0 +1,96 @@ +====================== +Kernel page table dump +====================== + +ptdump is a debugfs interface that provides a detailed dump of the +kernel page tables. It offers a comprehensive overview of the kernel +virtual memory layout as well as the attributes associated with the +various regions in a human-readable format. It is useful to dump the +kernel page tables to verify permissions and memory types. Examining the +page table entries and permissions helps identify potential security +vulnerabilities such as mappings with overly permissive access rights or +improper memory protections. + +Memory hotplug allows dynamic expansion or contraction of available +memory without requiring a system reboot. To maintain the consistency +and integrity of the memory management data structures, arm64 makes use +of the ``mem_hotplug_lock`` semaphore in write mode. Additionally, in +read mode, ``mem_hotplug_lock`` supports an efficient implementation of +``get_online_mems()`` and ``put_online_mems()``. These protect the +offlining of memory being accessed by the ptdump code. + +In order to dump the kernel page tables, enable the following +configurations and mount debugfs:: + + CONFIG_GENERIC_PTDUMP=y + CONFIG_PTDUMP_CORE=y + CONFIG_PTDUMP_DEBUGFS=y + + mount -t debugfs nodev /sys/kernel/debug + cat /sys/kernel/debug/kernel_page_tables + +On analysing the output of ``cat /sys/kernel/debug/kernel_page_tables`` +one can derive information about the virtual address range of the entry, +followed by size of the memory region covered by this entry, the +hierarchical structure of the page tables and finally the attributes +associated with each page. The page attributes provide information about +access permissions, execution capability, type of mapping such as leaf +level PTE or block level PGD, PMD and PUD, and access status of a page +within the kernel memory. Assessing these attributes can assist in +understanding the memory layout, access patterns and security +characteristics of the kernel pages. + +Kernel virtual memory layout example:: + + start address end address size attributes + +---------------------------------------------------------------------------------------+ + | ---[ Linear Mapping start ]---------------------------------------------------------- | + | .................. | + | 0xfff0000000000000-0xfff0000000210000 2112K PTE RW NX SHD AF UXN MEM/NORMAL-TAGGED | + | 0xfff0000000210000-0xfff0000001c00000 26560K PTE ro NX SHD AF UXN MEM/NORMAL | + | .................. | + | ---[ Linear Mapping end ]------------------------------------------------------------ | + +---------------------------------------------------------------------------------------+ + | ---[ Modules start ]----------------------------------------------------------------- | + | .................. | + | 0xffff800000000000-0xffff800008000000 128M PTE | + | .................. | + | ---[ Modules end ]------------------------------------------------------------------- | + +---------------------------------------------------------------------------------------+ + | ---[ vmalloc() area ]---------------------------------------------------------------- | + | .................. | + | 0xffff800008010000-0xffff800008200000 1984K PTE ro x SHD AF UXN MEM/NORMAL | + | 0xffff800008200000-0xffff800008e00000 12M PTE ro x SHD AF CON UXN MEM/NORMAL | + | .................. | + | ---[ vmalloc() end ]----------------------------------------------------------------- | + +---------------------------------------------------------------------------------------+ + | ---[ Fixmap start ]------------------------------------------------------------------ | + | .................. | + | 0xfffffbfffdb80000-0xfffffbfffdb90000 64K PTE ro x SHD AF UXN MEM/NORMAL | + | 0xfffffbfffdb90000-0xfffffbfffdba0000 64K PTE ro NX SHD AF UXN MEM/NORMAL | + | .................. | + | ---[ Fixmap end ]-------------------------------------------------------------------- | + +---------------------------------------------------------------------------------------+ + | ---[ PCI I/O start ]----------------------------------------------------------------- | + | .................. | + | 0xfffffbfffe800000-0xfffffbffff800000 16M PTE | + | .................. | + | ---[ PCI I/O end ]------------------------------------------------------------------- | + +---------------------------------------------------------------------------------------+ + | ---[ vmemmap start ]----------------------------------------------------------------- | + | .................. | + | 0xfffffc0002000000-0xfffffc0002200000 2M PTE RW NX SHD AF UXN MEM/NORMAL | + | 0xfffffc0002200000-0xfffffc0020000000 478M PTE | + | .................. | + | ---[ vmemmap end ]------------------------------------------------------------------- | + +---------------------------------------------------------------------------------------+ + +``cat /sys/kernel/debug/kernel_page_tables`` output:: + + 0xfff0000001c00000-0xfff0000080000000 2020M PTE RW NX SHD AF UXN MEM/NORMAL-TAGGED + 0xfff0000080000000-0xfff0000800000000 30G PMD + 0xfff0000800000000-0xfff0000800700000 7M PTE RW NX SHD AF UXN MEM/NORMAL-TAGGED + 0xfff0000800700000-0xfff0000800710000 64K PTE ro NX SHD AF UXN MEM/NORMAL-TAGGED + 0xfff0000800710000-0xfff0000880000000 2089920K PTE RW NX SHD AF UXN MEM/NORMAL-TAGGED + 0xfff0000880000000-0xfff0040000000000 4062G PMD + 0xfff0040000000000-0xffff800000000000 3964T PGD diff --git a/Documentation/devicetree/bindings/perf/fsl-imx-ddr.yaml b/Documentation/devicetree/bindings/perf/fsl-imx-ddr.yaml index 80a92385367e..e9fad4b3de68 100644 --- a/Documentation/devicetree/bindings/perf/fsl-imx-ddr.yaml +++ b/Documentation/devicetree/bindings/perf/fsl-imx-ddr.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/perf/fsl-imx-ddr.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Freescale(NXP) IMX8 DDR performance monitor +title: Freescale(NXP) IMX8/9 DDR performance monitor maintainers: - Frank Li <frank.li@nxp.com> @@ -19,6 +19,7 @@ properties: - fsl,imx8mm-ddr-pmu - fsl,imx8mn-ddr-pmu - fsl,imx8mp-ddr-pmu + - fsl,imx93-ddr-pmu - items: - enum: - fsl,imx8mm-ddr-pmu diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 44911bce5389..d260ca10e8c1 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -207,6 +207,7 @@ config ARM64 select HAVE_IOREMAP_PROT select HAVE_IRQ_TIME_ACCOUNTING select HAVE_KVM + select HAVE_MOD_ARCH_SPECIFIC select HAVE_NMI select HAVE_PERF_EVENTS select HAVE_PERF_REGS @@ -578,7 +579,6 @@ config ARM64_ERRATUM_845719 config ARM64_ERRATUM_843419 bool "Cortex-A53: 843419: A load or store might access an incorrect address" default y - select ARM64_MODULE_PLTS if MODULES help This option links the kernel with '--fix-cortex-a53-843419' and enables PLT support to replace certain ADRP instructions, which can @@ -2108,26 +2108,6 @@ config ARM64_SME register state capable of holding two dimensional matrix tiles to enable various matrix operations. -config ARM64_MODULE_PLTS - bool "Use PLTs to allow module memory to spill over into vmalloc area" - depends on MODULES - select HAVE_MOD_ARCH_SPECIFIC - help - Allocate PLTs when loading modules so that jumps and calls whose - targets are too far away for their relative offsets to be encoded - in the instructions themselves can be bounced via veneers in the - module's PLT. This allows modules to be allocated in the generic - vmalloc area after the dedicated module memory area has been - exhausted. - - When running with address space randomization (KASLR), the module - region itself may be too far away for ordinary relative jumps and - calls, and so in that case, module PLTs are required and cannot be - disabled. - - Specific errata workaround(s) might also force module PLTs to be - enabled (ARM64_ERRATUM_843419). - config ARM64_PSEUDO_NMI bool "Support for NMI-like interrupts" select ARM_GIC_V3 @@ -2168,7 +2148,6 @@ config RELOCATABLE config RANDOMIZE_BASE bool "Randomize the address of the kernel image" - select ARM64_MODULE_PLTS if MODULES select RELOCATABLE help Randomizes the virtual address at which the kernel image is @@ -2199,9 +2178,8 @@ config RANDOMIZE_MODULE_REGION_FULL When this option is not set, the module region will be randomized over a limited range that contains the [_stext, _etext] interval of the core kernel, so branch relocations are almost always in range unless - ARM64_MODULE_PLTS is enabled and the region is exhausted. In this - particular case of region exhaustion, modules might be able to fall - back to a larger 2GB area. + the region is exhausted. In this particular case of region + exhaustion, modules might be able to fall back to a larger 2GB area. config CC_HAVE_STACKPROTECTOR_SYSREG def_bool $(cc-option,-mstack-protector-guard=sysreg -mstack-protector-guard-reg=sp_el0 -mstack-protector-guard-offset=0) diff --git a/arch/arm64/include/asm/alternative-macros.h b/arch/arm64/include/asm/alternative-macros.h index bdf1f6bcd010..94b486192e1f 100644 --- a/arch/arm64/include/asm/alternative-macros.h +++ b/arch/arm64/include/asm/alternative-macros.h @@ -23,17 +23,17 @@ #include <linux/stringify.h> -#define ALTINSTR_ENTRY(feature) \ +#define ALTINSTR_ENTRY(cpucap) \ " .word 661b - .\n" /* label */ \ " .word 663f - .\n" /* new instruction */ \ - " .hword " __stringify(feature) "\n" /* feature bit */ \ + " .hword " __stringify(cpucap) "\n" /* cpucap */ \ " .byte 662b-661b\n" /* source len */ \ " .byte 664f-663f\n" /* replacement len */ -#define ALTINSTR_ENTRY_CB(feature, cb) \ +#define ALTINSTR_ENTRY_CB(cpucap, cb) \ " .word 661b - .\n" /* label */ \ - " .word " __stringify(cb) "- .\n" /* callback */ \ - " .hword " __stringify(feature) "\n" /* feature bit */ \ + " .word " __stringify(cb) "- .\n" /* callback */ \ + " .hword " __stringify(cpucap) "\n" /* cpucap */ \ " .byte 662b-661b\n" /* source len */ \ " .byte 664f-663f\n" /* replacement len */ @@ -53,13 +53,13 @@ * * Alternatives with callbacks do not generate replacement instructions. */ -#define __ALTERNATIVE_CFG(oldinstr, newinstr, feature, cfg_enabled) \ +#define __ALTERNATIVE_CFG(oldinstr, newinstr, cpucap, cfg_enabled) \ ".if "__stringify(cfg_enabled)" == 1\n" \ "661:\n\t" \ oldinstr "\n" \ "662:\n" \ ".pushsection .altinstructions,\"a\"\n" \ - ALTINSTR_ENTRY(feature) \ + ALTINSTR_ENTRY(cpucap) \ ".popsection\n" \ ".subsection 1\n" \ "663:\n\t" \ @@ -70,31 +70,31 @@ ".previous\n" \ ".endif\n" -#define __ALTERNATIVE_CFG_CB(oldinstr, feature, cfg_enabled, cb) \ +#define __ALTERNATIVE_CFG_CB(oldinstr, cpucap, cfg_enabled, cb) \ ".if "__stringify(cfg_enabled)" == 1\n" \ "661:\n\t" \ oldinstr "\n" \ "662:\n" \ ".pushsection .altinstructions,\"a\"\n" \ - ALTINSTR_ENTRY_CB(feature, cb) \ + ALTINSTR_ENTRY_CB(cpucap, cb) \ ".popsection\n" \ "663:\n\t" \ "664:\n\t" \ ".endif\n" -#define _ALTERNATIVE_CFG(oldinstr, newinstr, feature, cfg, ...) \ - __ALTERNATIVE_CFG(oldinstr, newinstr, feature, IS_ENABLED(cfg)) +#define _ALTERNATIVE_CFG(oldinstr, newinstr, cpucap, cfg, ...) \ + __ALTERNATIVE_CFG(oldinstr, newinstr, cpucap, IS_ENABLED(cfg)) -#define ALTERNATIVE_CB(oldinstr, feature, cb) \ - __ALTERNATIVE_CFG_CB(oldinstr, (1 << ARM64_CB_SHIFT) | (feature), 1, cb) +#define ALTERNATIVE_CB(oldinstr, cpucap, cb) \ + __ALTERNATIVE_CFG_CB(oldinstr, (1 << ARM64_CB_SHIFT) | (cpucap), 1, cb) #else #include <asm/assembler.h> -.macro altinstruction_entry orig_offset alt_offset feature orig_len alt_len +.macro altinstruction_entry orig_offset alt_offset cpucap orig_len alt_len .word \orig_offset - . .word \alt_offset - . - .hword (\feature) + .hword (\cpucap) .byte \orig_len .byte \alt_len .endm @@ -210,9 +210,9 @@ alternative_endif #endif /* __ASSEMBLY__ */ /* - * Usage: asm(ALTERNATIVE(oldinstr, newinstr, feature)); + * Usage: asm(ALTERNATIVE(oldinstr, newinstr, cpucap)); * - * Usage: asm(ALTERNATIVE(oldinstr, newinstr, feature, CONFIG_FOO)); + * Usage: asm(ALTERNATIVE(oldinstr, newinstr, cpucap, CONFIG_FOO)); * N.B. If CONFIG_FOO is specified, but not selected, the whole block * will be omitted, including oldinstr. */ @@ -224,15 +224,15 @@ alternative_endif #include <linux/types.h> static __always_inline bool -alternative_has_feature_likely(const unsigned long feature) +alternative_has_cap_likely(const unsigned long cpucap) { - compiletime_assert(feature < ARM64_NCAPS, - "feature must be < ARM64_NCAPS"); + compiletime_assert(cpucap < ARM64_NCAPS, + "cpucap must be < ARM64_NCAPS"); asm_volatile_goto( - ALTERNATIVE_CB("b %l[l_no]", %[feature], alt_cb_patch_nops) + ALTERNATIVE_CB("b %l[l_no]", %[cpucap], alt_cb_patch_nops) : - : [feature] "i" (feature) + : [cpucap] "i" (cpucap) : : l_no); @@ -242,15 +242,15 @@ l_no: } static __always_inline bool -alternative_has_feature_unlikely(const unsigned long feature) +alternative_has_cap_unlikely(const unsigned long cpucap) { - compiletime_assert(feature < ARM64_NCAPS, - "feature must be < ARM64_NCAPS"); + compiletime_assert(cpucap < ARM64_NCAPS, + "cpucap must be < ARM64_NCAPS"); asm_volatile_goto( - ALTERNATIVE("nop", "b %l[l_yes]", %[feature]) + ALTERNATIVE("nop", "b %l[l_yes]", %[cpucap]) : - : [feature] "i" (feature) + : [cpucap] "i" (cpucap) : : l_yes); diff --git a/arch/arm64/include/asm/alternative.h b/arch/arm64/include/asm/alternative.h index a38b92e11811..00d97b8a757f 100644 --- a/arch/arm64/include/asm/alternative.h +++ b/arch/arm64/include/asm/alternative.h @@ -13,7 +13,7 @@ struct alt_instr { s32 orig_offset; /* offset to original instruction */ s32 alt_offset; /* offset to replacement instruction */ - u16 cpufeature; /* cpufeature bit set for replacement */ + u16 cpucap; /* cpucap bit set for replacement */ u8 orig_len; /* size of original instruction(s) */ u8 alt_len; /* size of new instruction(s), <= orig_len */ }; @@ -23,7 +23,7 @@ typedef void (*alternative_cb_t)(struct alt_instr *alt, void __init apply_boot_alternatives(void); void __init apply_alternatives_all(void); -bool alternative_is_applied(u16 cpufeature); +bool alternative_is_applied(u16 cpucap); #ifdef CONFIG_MODULES void apply_alternatives_module(void *start, size_t length); @@ -31,5 +31,8 @@ void apply_alternatives_module(void *start, size_t length); static inline void apply_alternatives_module(void *start, size_t length) { } #endif +void alt_cb_patch_nops(struct alt_instr *alt, __le32 *origptr, + __le32 *updptr, int nr_inst); + #endif /* __ASSEMBLY__ */ #endif /* __ASM_ALTERNATIVE_H */ diff --git a/arch/arm64/include/asm/archrandom.h b/arch/arm64/include/asm/archrandom.h index 2f5f3da34782..b0abc64f86b0 100644 --- a/arch/arm64/include/asm/archrandom.h +++ b/arch/arm64/include/asm/archrandom.h @@ -129,4 +129,6 @@ static inline bool __init __early_cpu_has_rndr(void) return (ftr >> ID_AA64ISAR0_EL1_RNDR_SHIFT) & 0xf; } +u64 kaslr_early_init(void *fdt); + #endif /* _ASM_ARCHRANDOM_H */ diff --git a/arch/arm64/include/asm/asm-uaccess.h b/arch/arm64/include/asm/asm-uaccess.h index 75b211c98dea..5b6efe8abeeb 100644 --- a/arch/arm64/include/asm/asm-uaccess.h +++ b/arch/arm64/include/asm/asm-uaccess.h @@ -18,7 +18,6 @@ bic \tmp1, \tmp1, #TTBR_ASID_MASK sub \tmp1, \tmp1, #RESERVED_SWAPPER_OFFSET // reserved_pg_dir msr ttbr0_el1, \tmp1 // set reserved TTBR0_EL1 - isb add \tmp1, \tmp1, #RESERVED_SWAPPER_OFFSET msr ttbr1_el1, \tmp1 // set reserved ASID isb @@ -31,7 +30,6 @@ extr \tmp2, \tmp2, \tmp1, #48 ror \tmp2, \tmp2, #16 msr ttbr1_el1, \tmp2 // set the active ASID - isb msr ttbr0_el1, \tmp1 // set the non-PAN TTBR0_EL1 isb .endm diff --git a/arch/arm64/include/asm/compat.h b/arch/arm64/include/asm/compat.h index 74575c3d6987..ae904a1ad529 100644 --- a/arch/arm64/include/asm/compat.h +++ b/arch/arm64/include/asm/compat.h @@ -96,6 +96,8 @@ static inline int is_compat_thread(struct thread_info *thread) return test_ti_thread_flag(thread, TIF_32BIT); } +long compat_arm_syscall(struct pt_regs *regs, int scno); + #else /* !CONFIG_COMPAT */ static inline int is_compat_thread(struct thread_info *thread) diff --git a/arch/arm64/include/asm/cpu.h b/arch/arm64/include/asm/cpu.h index fd7a92219eea..e749838b9c5d 100644 --- a/arch/arm64/include/asm/cpu.h +++ b/arch/arm64/include/asm/cpu.h @@ -56,6 +56,7 @@ struct cpuinfo_arm64 { u64 reg_id_aa64mmfr0; u64 reg_id_aa64mmfr1; u64 reg_id_aa64mmfr2; + u64 reg_id_aa64mmfr3; u64 reg_id_aa64pfr0; u64 reg_id_aa64pfr1; u64 reg_id_aa64zfr0; diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index 6bf013fb110d..7a95c324e52a 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -107,7 +107,7 @@ extern struct arm64_ftr_reg arm64_ftr_reg_ctrel0; * CPU capabilities: * * We use arm64_cpu_capabilities to represent system features, errata work - * arounds (both used internally by kernel and tracked in cpu_hwcaps) and + * arounds (both used internally by kernel and tracked in system_cpucaps) and * ELF HWCAPs (which are exposed to user). * * To support systems with heterogeneous CPUs, we need to make sure that we @@ -419,12 +419,12 @@ static __always_inline bool is_hyp_code(void) return is_vhe_hyp_code() || is_nvhe_hyp_code(); } -extern DECLARE_BITMAP(cpu_hwcaps, ARM64_NCAPS); +extern DECLARE_BITMAP(system_cpucaps, ARM64_NCAPS); -extern DECLARE_BITMAP(boot_capabilities, ARM64_NCAPS); +extern DECLARE_BITMAP(boot_cpucaps, ARM64_NCAPS); #define for_each_available_cap(cap) \ - for_each_set_bit(cap, cpu_hwcaps, ARM64_NCAPS) + for_each_set_bit(cap, system_cpucaps, ARM64_NCAPS) bool this_cpu_has_cap(unsigned int cap); void cpu_set_feature(unsigned int num); @@ -437,7 +437,7 @@ unsigned long cpu_get_elf_hwcap2(void); static __always_inline bool system_capabilities_finalized(void) { - return alternative_has_feature_likely(ARM64_ALWAYS_SYSTEM); + return alternative_has_cap_likely(ARM64_ALWAYS_SYSTEM); } /* @@ -449,7 +449,7 @@ static __always_inline bool cpus_have_cap(unsigned int num) { if (num >= ARM64_NCAPS) return false; - return arch_test_bit(num, cpu_hwcaps); + return arch_test_bit(num, system_cpucaps); } /* @@ -464,7 +464,7 @@ static __always_inline bool __cpus_have_const_cap(int num) { if (num >= ARM64_NCAPS) return false; - return alternative_has_feature_unlikely(num); + return alternative_has_cap_unlikely(num); } /* @@ -504,16 +504,6 @@ static __always_inline bool cpus_have_const_cap(int num) return cpus_have_cap(num); } -static inline void cpus_set_cap(unsigned int num) -{ - if (num >= ARM64_NCAPS) { - pr_warn("Attempt to set an illegal CPU capability (%d >= %d)\n", - num, ARM64_NCAPS); - } else { - __set_bit(num, cpu_hwcaps); - } -} - static inline int __attribute_const__ cpuid_feature_extract_signed_field_width(u64 features, int field, int width) { diff --git a/arch/arm64/include/asm/efi.h b/arch/arm64/include/asm/efi.h index f86b157a5da3..ef46f2daca62 100644 --- a/arch/arm64/include/asm/efi.h +++ b/arch/arm64/include/asm/efi.h @@ -166,4 +166,6 @@ static inline void efi_capsule_flush_cache_range(void *addr, int size) dcache_clean_inval_poc((unsigned long)addr, (unsigned long)addr + size); } +efi_status_t efi_handle_corrupted_x18(efi_status_t s, const char *f); + #endif /* _ASM_EFI_H */ diff --git a/arch/arm64/include/asm/el2_setup.h b/arch/arm64/include/asm/el2_setup.h index 037724b19c5c..f4c3d30bf746 100644 --- a/arch/arm64/include/asm/el2_setup.h +++ b/arch/arm64/include/asm/el2_setup.h @@ -22,6 +22,15 @@ isb .endm +.macro __init_el2_hcrx + mrs x0, id_aa64mmfr1_el1 + ubfx x0, x0, #ID_AA64MMFR1_EL1_HCX_SHIFT, #4 + cbz x0, .Lskip_hcrx_\@ + mov_q x0, HCRX_HOST_FLAGS + msr_s SYS_HCRX_EL2, x0 +.Lskip_hcrx_\@: +.endm + /* * Allow Non-secure EL1 and EL0 to access physical timer and counter. * This is not necessary for VHE, since the host kernel runs in EL2, @@ -69,7 +78,7 @@ cbz x0, .Lskip_trace_\@ // Skip if TraceBuffer is not present mrs_s x0, SYS_TRBIDR_EL1 - and x0, x0, TRBIDR_PROG + and x0, x0, TRBIDR_EL1_P cbnz x0, .Lskip_trace_\@ // If TRBE is available at EL2 mov x0, #(MDCR_EL2_E2TB_MASK << MDCR_EL2_E2TB_SHIFT) @@ -150,12 +159,21 @@ mov x0, xzr mrs x1, id_aa64pfr1_el1 ubfx x1, x1, #ID_AA64PFR1_EL1_SME_SHIFT, #4 - cbz x1, .Lset_fgt_\@ + cbz x1, .Lset_pie_fgt_\@ /* Disable nVHE traps of TPIDR2 and SMPRI */ orr x0, x0, #HFGxTR_EL2_nSMPRI_EL1_MASK orr x0, x0, #HFGxTR_EL2_nTPIDR2_EL0_MASK +.Lset_pie_fgt_\@: + mrs_s x1, SYS_ID_AA64MMFR3_EL1 + ubfx x1, x1, #ID_AA64MMFR3_EL1_S1PIE_SHIFT, #4 + cbz x1, .Lset_fgt_\@ + + /* Disable trapping of PIR_EL1 / PIRE0_EL1 */ + orr x0, x0, #HFGxTR_EL2_nPIR_EL1 + orr x0, x0, #HFGxTR_EL2_nPIRE0_EL1 + .Lset_fgt_\@: msr_s SYS_HFGRTR_EL2, x0 msr_s SYS_HFGWTR_EL2, x0 @@ -184,6 +202,7 @@ */ .macro init_el2_state __init_el2_sctlr + __init_el2_hcrx __init_el2_timers __init_el2_debug __init_el2_lor @@ -284,14 +303,6 @@ cbz x1, .Lskip_sme_\@ msr_s SYS_SMPRIMAP_EL2, xzr // Make all priorities equal - - mrs x1, id_aa64mmfr1_el1 // HCRX_EL2 present? - ubfx x1, x1, #ID_AA64MMFR1_EL1_HCX_SHIFT, #4 - cbz x1, .Lskip_sme_\@ - - mrs_s x1, SYS_HCRX_EL2 - orr x1, x1, #HCRX_EL2_SMPME_MASK // Enable priority mapping - msr_s SYS_HCRX_EL2, x1 .Lskip_sme_\@: .endm diff --git a/arch/arm64/include/asm/esr.h b/arch/arm64/include/asm/esr.h index 8487aec9b658..ae35939f395b 100644 --- a/arch/arm64/include/asm/esr.h +++ b/arch/arm64/include/asm/esr.h @@ -47,7 +47,7 @@ #define ESR_ELx_EC_DABT_LOW (0x24) #define ESR_ELx_EC_DABT_CUR (0x25) #define ESR_ELx_EC_SP_ALIGN (0x26) -/* Unallocated EC: 0x27 */ +#define ESR_ELx_EC_MOPS (0x27) #define ESR_ELx_EC_FP_EXC32 (0x28) /* Unallocated EC: 0x29 - 0x2B */ #define ESR_ELx_EC_FP_EXC64 (0x2C) @@ -75,8 +75,11 @@ #define ESR_ELx_IL_SHIFT (25) #define ESR_ELx_IL (UL(1) << ESR_ELx_IL_SHIFT) -#define ESR_ELx_ISS_MASK (ESR_ELx_IL - 1) +#define ESR_ELx_ISS_MASK (GENMASK(24, 0)) #define ESR_ELx_ISS(esr) ((esr) & ESR_ELx_ISS_MASK) +#define ESR_ELx_ISS2_SHIFT (32) +#define ESR_ELx_ISS2_MASK (GENMASK_ULL(55, 32)) +#define ESR_ELx_ISS2(esr) (((esr) & ESR_ELx_ISS2_MASK) >> ESR_ELx_ISS2_SHIFT) /* ISS field definitions shared by different classes */ #define ESR_ELx_WNR_SHIFT (6) @@ -140,6 +143,20 @@ #define ESR_ELx_CM_SHIFT (8) #define ESR_ELx_CM (UL(1) << ESR_ELx_CM_SHIFT) +/* ISS2 field definitions for Data Aborts */ +#define ESR_ELx_TnD_SHIFT (10) +#define ESR_ELx_TnD (UL(1) << ESR_ELx_TnD_SHIFT) +#define ESR_ELx_TagAccess_SHIFT (9) +#define ESR_ELx_TagAccess (UL(1) << ESR_ELx_TagAccess_SHIFT) +#define ESR_ELx_GCS_SHIFT (8) +#define ESR_ELx_GCS (UL(1) << ESR_ELx_GCS_SHIFT) +#define ESR_ELx_Overlay_SHIFT (6) +#define ESR_ELx_Overlay (UL(1) << ESR_ELx_Overlay_SHIFT) +#define ESR_ELx_DirtyBit_SHIFT (5) +#define ESR_ELx_DirtyBit (UL(1) << ESR_ELx_DirtyBit_SHIFT) +#define ESR_ELx_Xs_SHIFT (0) +#define ESR_ELx_Xs_MASK (GENMASK_ULL(4, 0)) + /* ISS field definitions for exceptions taken in to Hyp */ #define ESR_ELx_CV (UL(1) << 24) #define ESR_ELx_COND_SHIFT (20) @@ -356,6 +373,15 @@ #define ESR_ELx_SME_ISS_ZA_DISABLED 3 #define ESR_ELx_SME_ISS_ZT_DISABLED 4 +/* ISS field definitions for MOPS exceptions */ +#define ESR_ELx_MOPS_ISS_MEM_INST (UL(1) << 24) +#define ESR_ELx_MOPS_ISS_FROM_EPILOGUE (UL(1) << 18) +#define ESR_ELx_MOPS_ISS_WRONG_OPTION (UL(1) << 17) +#define ESR_ELx_MOPS_ISS_OPTION_A (UL(1) << 16) +#define ESR_ELx_MOPS_ISS_DESTREG(esr) (((esr) & (UL(0x1f) << 10)) >> 10) +#define ESR_ELx_MOPS_ISS_SRCREG(esr) (((esr) & (UL(0x1f) << 5)) >> 5) +#define ESR_ELx_MOPS_ISS_SIZEREG(esr) (((esr) & (UL(0x1f) << 0)) >> 0) + #ifndef __ASSEMBLY__ #include <asm/types.h> diff --git a/arch/arm64/include/asm/exception.h b/arch/arm64/include/asm/exception.h index e73af709cb7a..ad688e157c9b 100644 --- a/arch/arm64/include/asm/exception.h +++ b/arch/arm64/include/asm/exception.h @@ -8,16 +8,11 @@ #define __ASM_EXCEPTION_H #include <asm/esr.h> -#include <asm/kprobes.h> #include <asm/ptrace.h> #include <linux/interrupt.h> -#ifdef CONFIG_FUNCTION_GRAPH_TRACER #define __exception_irq_entry __irq_entry -#else -#define __exception_irq_entry __kprobes -#endif static inline unsigned long disr_to_esr(u64 disr) { @@ -77,6 +72,7 @@ void do_el0_svc(struct pt_regs *regs); void do_el0_svc_compat(struct pt_regs *regs); void do_el0_fpac(struct pt_regs *regs, unsigned long esr); void do_el1_fpac(struct pt_regs *regs, unsigned long esr); +void do_el0_mops(struct pt_regs *regs, unsigned long esr); void do_serror(struct pt_regs *regs, unsigned long esr); void do_notify_resume(struct pt_regs *regs, unsigned long thread_flags); diff --git a/arch/arm64/include/asm/hw_breakpoint.h b/arch/arm64/include/asm/hw_breakpoint.h index fa4c6ff3aa9b..84055329cd8b 100644 --- a/arch/arm64/include/asm/hw_breakpoint.h +++ b/arch/arm64/include/asm/hw_breakpoint.h @@ -154,4 +154,12 @@ static inline int get_num_wrps(void) ID_AA64DFR0_EL1_WRPs_SHIFT); } +#ifdef CONFIG_CPU_PM +extern void cpu_suspend_set_dbg_restorer(int (*hw_bp_restore)(unsigned int)); +#else +static inline void cpu_suspend_set_dbg_restorer(int (*hw_bp_restore)(unsigned int)) +{ +} +#endif + #endif /* __ASM_BREAKPOINT_H */ diff --git a/arch/arm64/include/asm/hwcap.h b/arch/arm64/include/asm/hwcap.h index 5d45f19fda7f..692b1ec663b2 100644 --- a/arch/arm64/include/asm/hwcap.h +++ b/arch/arm64/include/asm/hwcap.h @@ -137,6 +137,7 @@ #define KERNEL_HWCAP_SME_BI32I32 __khwcap2_feature(SME_BI32I32) #define KERNEL_HWCAP_SME_B16B16 __khwcap2_feature(SME_B16B16) #define KERNEL_HWCAP_SME_F16F16 __khwcap2_feature(SME_F16F16) +#define KERNEL_HWCAP_MOPS __khwcap2_feature(MOPS) /* * This yields a mask that user programs can use to figure out what diff --git a/arch/arm64/include/asm/irqflags.h b/arch/arm64/include/asm/irqflags.h index e0f5f6b73edd..1f31ec146d16 100644 --- a/arch/arm64/include/asm/irqflags.h +++ b/arch/arm64/include/asm/irqflags.h @@ -24,7 +24,7 @@ static __always_inline bool __irqflags_uses_pmr(void) { return IS_ENABLED(CONFIG_ARM64_PSEUDO_NMI) && - alternative_has_feature_unlikely(ARM64_HAS_GIC_PRIO_MASKING); + alternative_has_cap_unlikely(ARM64_HAS_GIC_PRIO_MASKING); } static __always_inline void __daif_local_irq_enable(void) diff --git a/arch/arm64/include/asm/kernel-pgtable.h b/arch/arm64/include/asm/kernel-pgtable.h index 186dd7f85b14..577773870b66 100644 --- a/arch/arm64/include/asm/kernel-pgtable.h +++ b/arch/arm64/include/asm/kernel-pgtable.h @@ -107,14 +107,14 @@ /* * Initial memory map attributes. */ -#define SWAPPER_PTE_FLAGS (PTE_TYPE_PAGE | PTE_AF | PTE_SHARED) -#define SWAPPER_PMD_FLAGS (PMD_TYPE_SECT | PMD_SECT_AF | PMD_SECT_S) +#define SWAPPER_PTE_FLAGS (PTE_TYPE_PAGE | PTE_AF | PTE_SHARED | PTE_UXN) +#define SWAPPER_PMD_FLAGS (PMD_TYPE_SECT | PMD_SECT_AF | PMD_SECT_S | PTE_UXN) #ifdef CONFIG_ARM64_4K_PAGES -#define SWAPPER_RW_MMUFLAGS (PMD_ATTRINDX(MT_NORMAL) | SWAPPER_PMD_FLAGS) +#define SWAPPER_RW_MMUFLAGS (PMD_ATTRINDX(MT_NORMAL) | SWAPPER_PMD_FLAGS | PTE_WRITE) #define SWAPPER_RX_MMUFLAGS (SWAPPER_RW_MMUFLAGS | PMD_SECT_RDONLY) #else -#define SWAPPER_RW_MMUFLAGS (PTE_ATTRINDX(MT_NORMAL) | SWAPPER_PTE_FLAGS) +#define SWAPPER_RW_MMUFLAGS (PTE_ATTRINDX(MT_NORMAL) | SWAPPER_PTE_FLAGS | PTE_WRITE) #define SWAPPER_RX_MMUFLAGS (SWAPPER_RW_MMUFLAGS | PTE_RDONLY) #endif diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h index baef29fcbeee..c6e12e8f2751 100644 --- a/arch/arm64/include/asm/kvm_arm.h +++ b/arch/arm64/include/asm/kvm_arm.h @@ -9,6 +9,7 @@ #include <asm/esr.h> #include <asm/memory.h> +#include <asm/sysreg.h> #include <asm/types.h> /* Hyp Configuration Register (HCR) bits */ @@ -92,6 +93,9 @@ #define HCR_HOST_NVHE_PROTECTED_FLAGS (HCR_HOST_NVHE_FLAGS | HCR_TSC) #define HCR_HOST_VHE_FLAGS (HCR_RW | HCR_TGE | HCR_E2H) +#define HCRX_GUEST_FLAGS (HCRX_EL2_SMPME | HCRX_EL2_TCR2En) +#define HCRX_HOST_FLAGS (HCRX_EL2_MSCEn | HCRX_EL2_TCR2En) + /* TCR_EL2 Registers bits */ #define TCR_EL2_RES1 ((1U << 31) | (1 << 23)) #define TCR_EL2_TBI (1 << 20) diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h index 43c3bc0f9544..86042afa86c3 100644 --- a/arch/arm64/include/asm/kvm_asm.h +++ b/arch/arm64/include/asm/kvm_asm.h @@ -267,6 +267,24 @@ extern u64 __kvm_get_mdcr_el2(void); __kvm_at_err; \ } ) +void __noreturn hyp_panic(void); +asmlinkage void kvm_unexpected_el2_exception(void); +asmlinkage void __noreturn hyp_panic(void); +asmlinkage void __noreturn hyp_panic_bad_stack(void); +asmlinkage void kvm_unexpected_el2_exception(void); +struct kvm_cpu_context; +void handle_trap(struct kvm_cpu_context *host_ctxt); +asmlinkage void __noreturn kvm_host_psci_cpu_entry(bool is_cpu_on); +void __noreturn __pkvm_init_finalise(void); +void kvm_nvhe_prepare_backtrace(unsigned long fp, unsigned long pc); +void kvm_patch_vector_branch(struct alt_instr *alt, + __le32 *origptr, __le32 *updptr, int nr_inst); +void kvm_get_kimage_voffset(struct alt_instr *alt, + __le32 *origptr, __le32 *updptr, int nr_inst); +void kvm_compute_final_ctr_el0(struct alt_instr *alt, + __le32 *origptr, __le32 *updptr, int nr_inst); +void __noreturn __cold nvhe_hyp_panic_handler(u64 esr, u64 spsr, u64 elr_virt, + u64 elr_phys, u64 par, uintptr_t vcpu, u64 far, u64 hpfar); #else /* __ASSEMBLY__ */ diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 9787503ff43f..d48609d95423 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -279,6 +279,7 @@ enum vcpu_sysreg { TTBR0_EL1, /* Translation Table Base Register 0 */ TTBR1_EL1, /* Translation Table Base Register 1 */ TCR_EL1, /* Translation Control Register */ + TCR2_EL1, /* Extended Translation Control Register */ ESR_EL1, /* Exception Syndrome Register */ AFSR0_EL1, /* Auxiliary Fault Status Register 0 */ AFSR1_EL1, /* Auxiliary Fault Status Register 1 */ @@ -339,6 +340,10 @@ enum vcpu_sysreg { TFSR_EL1, /* Tag Fault Status Register (EL1) */ TFSRE0_EL1, /* Tag Fault Status Register (EL0) */ + /* Permission Indirection Extension registers */ + PIR_EL1, /* Permission Indirection Register 1 (EL1) */ + PIRE0_EL1, /* Permission Indirection Register 0 (EL1) */ + /* 32bit specific registers. */ DACR32_EL2, /* Domain Access Control Register */ IFSR32_EL2, /* Instruction Fault Status Register */ @@ -1033,7 +1038,7 @@ void kvm_arm_clear_debug(struct kvm_vcpu *vcpu); void kvm_arm_reset_debug_ptr(struct kvm_vcpu *vcpu); #define kvm_vcpu_os_lock_enabled(vcpu) \ - (!!(__vcpu_sys_reg(vcpu, OSLSR_EL1) & SYS_OSLSR_OSLK)) + (!!(__vcpu_sys_reg(vcpu, OSLSR_EL1) & OSLSR_EL1_OSLK)) int kvm_arm_vcpu_arch_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr); diff --git a/arch/arm64/include/asm/lse.h b/arch/arm64/include/asm/lse.h index f99d74826a7e..cbbcdc35c4cd 100644 --- a/arch/arm64/include/asm/lse.h +++ b/arch/arm64/include/asm/lse.h @@ -18,7 +18,7 @@ static __always_inline bool system_uses_lse_atomics(void) { - return alternative_has_feature_likely(ARM64_HAS_LSE_ATOMICS); + return alternative_has_cap_likely(ARM64_HAS_LSE_ATOMICS); } #define __lse_ll_sc_body(op, ...) \ diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h index c735afdf639b..6e0e5722f229 100644 --- a/arch/arm64/include/asm/memory.h +++ b/arch/arm64/include/asm/memory.h @@ -46,7 +46,7 @@ #define KIMAGE_VADDR (MODULES_END) #define MODULES_END (MODULES_VADDR + MODULES_VSIZE) #define MODULES_VADDR (_PAGE_END(VA_BITS_MIN)) -#define MODULES_VSIZE (SZ_128M) +#define MODULES_VSIZE (SZ_2G) #define VMEMMAP_START (-(UL(1) << (VA_BITS - VMEMMAP_SHIFT))) #define VMEMMAP_END (VMEMMAP_START + VMEMMAP_SIZE) #define PCI_IO_END (VMEMMAP_START - SZ_8M) @@ -204,15 +204,17 @@ static inline unsigned long kaslr_offset(void) return kimage_vaddr - KIMAGE_VADDR; } +#ifdef CONFIG_RANDOMIZE_BASE +void kaslr_init(void); static inline bool kaslr_enabled(void) { - /* - * The KASLR offset modulo MIN_KIMG_ALIGN is taken from the physical - * placement of the image rather than from the seed, so a displacement - * of less than MIN_KIMG_ALIGN means that no seed was provided. - */ - return kaslr_offset() >= MIN_KIMG_ALIGN; + extern bool __kaslr_is_enabled; + return __kaslr_is_enabled; } +#else +static inline void kaslr_init(void) { } +static inline bool kaslr_enabled(void) { return false; } +#endif /* * Allow all memory at the discovery stage. We will clip it later. diff --git a/arch/arm64/include/asm/mmu_context.h b/arch/arm64/include/asm/mmu_context.h index 56911691bef0..a6fb325424e7 100644 --- a/arch/arm64/include/asm/mmu_context.h +++ b/arch/arm64/include/asm/mmu_context.h @@ -39,11 +39,16 @@ static inline void contextidr_thread_switch(struct task_struct *next) /* * Set TTBR0 to reserved_pg_dir. No translations will be possible via TTBR0. */ -static inline void cpu_set_reserved_ttbr0(void) +static inline void cpu_set_reserved_ttbr0_nosync(void) { unsigned long ttbr = phys_to_ttbr(__pa_symbol(reserved_pg_dir)); write_sysreg(ttbr, ttbr0_el1); +} + +static inline void cpu_set_reserved_ttbr0(void) +{ + cpu_set_reserved_ttbr0_nosync(); isb(); } @@ -52,7 +57,6 @@ void cpu_do_switch_mm(phys_addr_t pgd_phys, struct mm_struct *mm); static inline void cpu_switch_mm(pgd_t *pgd, struct mm_struct *mm) { BUG_ON(pgd == swapper_pg_dir); - cpu_set_reserved_ttbr0(); cpu_do_switch_mm(virt_to_phys(pgd),mm); } @@ -164,7 +168,7 @@ static inline void cpu_replace_ttbr1(pgd_t *pgdp, pgd_t *idmap) * up (i.e. cpufeature framework is not up yet) and * latter only when we enable CNP via cpufeature's * enable() callback. - * Also we rely on the cpu_hwcap bit being set before + * Also we rely on the system_cpucaps bit being set before * calling the enable() function. */ ttbr1 |= TTBR_CNP_BIT; diff --git a/arch/arm64/include/asm/module.h b/arch/arm64/include/asm/module.h index 18734fed3bdd..bfa6638b4c93 100644 --- a/arch/arm64/include/asm/module.h +++ b/arch/arm64/include/asm/module.h @@ -7,7 +7,6 @@ #include <asm-generic/module.h> -#ifdef CONFIG_ARM64_MODULE_PLTS struct mod_plt_sec { int plt_shndx; int plt_num_entries; @@ -21,7 +20,6 @@ struct mod_arch_specific { /* for CONFIG_DYNAMIC_FTRACE */ struct plt_entry *ftrace_trampolines; }; -#endif u64 module_emit_plt_entry(struct module *mod, Elf64_Shdr *sechdrs, void *loc, const Elf64_Rela *rela, @@ -30,12 +28,6 @@ u64 module_emit_plt_entry(struct module *mod, Elf64_Shdr *sechdrs, u64 module_emit_veneer_for_adrp(struct module *mod, Elf64_Shdr *sechdrs, void *loc, u64 val); -#ifdef CONFIG_RANDOMIZE_BASE -extern u64 module_alloc_base; -#else -#define module_alloc_base ((u64)_etext - MODULES_VSIZE) -#endif - struct plt_entry { /* * A program that conforms to the AArch64 Procedure Call Standard diff --git a/arch/arm64/include/asm/module.lds.h b/arch/arm64/include/asm/module.lds.h index dbba4b7559aa..b9ae8349e35d 100644 --- a/arch/arm64/include/asm/module.lds.h +++ b/arch/arm64/include/asm/module.lds.h @@ -1,9 +1,7 @@ SECTIONS { -#ifdef CONFIG_ARM64_MODULE_PLTS .plt 0 : { BYTE(0) } .init.plt 0 : { BYTE(0) } .text.ftrace_trampoline 0 : { BYTE(0) } -#endif #ifdef CONFIG_KASAN_SW_TAGS /* diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h index f658aafc47df..e4944d517c99 100644 --- a/arch/arm64/include/asm/pgtable-hwdef.h +++ b/arch/arm64/include/asm/pgtable-hwdef.h @@ -171,6 +171,14 @@ #define PTE_ATTRINDX_MASK (_AT(pteval_t, 7) << 2) /* + * PIIndex[3:0] encoding (Permission Indirection Extension) + */ +#define PTE_PI_IDX_0 6 /* AP[1], USER */ +#define PTE_PI_IDX_1 51 /* DBM */ +#define PTE_PI_IDX_2 53 /* PXN */ +#define PTE_PI_IDX_3 54 /* UXN */ + +/* * Memory Attribute override for Stage-2 (MemAttr[3:0]) */ #define PTE_S2_MEMATTR(t) (_AT(pteval_t, (t)) << 2) diff --git a/arch/arm64/include/asm/pgtable-prot.h b/arch/arm64/include/asm/pgtable-prot.h index 9b165117a454..eed814b00a38 100644 --- a/arch/arm64/include/asm/pgtable-prot.h +++ b/arch/arm64/include/asm/pgtable-prot.h @@ -27,6 +27,40 @@ */ #define PMD_PRESENT_INVALID (_AT(pteval_t, 1) << 59) /* only when !PMD_SECT_VALID */ +#define _PROT_DEFAULT (PTE_TYPE_PAGE | PTE_AF | PTE_SHARED) +#define _PROT_SECT_DEFAULT (PMD_TYPE_SECT | PMD_SECT_AF | PMD_SECT_S) + +#define PROT_DEFAULT (_PROT_DEFAULT | PTE_MAYBE_NG) +#define PROT_SECT_DEFAULT (_PROT_SECT_DEFAULT | PMD_MAYBE_NG) + +#define PROT_DEVICE_nGnRnE (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_WRITE | PTE_ATTRINDX(MT_DEVICE_nGnRnE)) +#define PROT_DEVICE_nGnRE (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_WRITE | PTE_ATTRINDX(MT_DEVICE_nGnRE)) +#define PROT_NORMAL_NC (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_WRITE | PTE_ATTRINDX(MT_NORMAL_NC)) +#define PROT_NORMAL (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_WRITE | PTE_ATTRINDX(MT_NORMAL)) +#define PROT_NORMAL_TAGGED (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_WRITE | PTE_ATTRINDX(MT_NORMAL_TAGGED)) + +#define PROT_SECT_DEVICE_nGnRE (PROT_SECT_DEFAULT | PMD_SECT_PXN | PMD_SECT_UXN | PMD_ATTRINDX(MT_DEVICE_nGnRE)) +#define PROT_SECT_NORMAL (PROT_SECT_DEFAULT | PMD_SECT_PXN | PMD_SECT_UXN | PTE_WRITE | PMD_ATTRINDX(MT_NORMAL)) +#define PROT_SECT_NORMAL_EXEC (PROT_SECT_DEFAULT | PMD_SECT_UXN | PMD_ATTRINDX(MT_NORMAL)) + +#define _PAGE_DEFAULT (_PROT_DEFAULT | PTE_ATTRINDX(MT_NORMAL)) + +#define _PAGE_KERNEL (PROT_NORMAL) +#define _PAGE_KERNEL_RO ((PROT_NORMAL & ~PTE_WRITE) | PTE_RDONLY) +#define _PAGE_KERNEL_ROX ((PROT_NORMAL & ~(PTE_WRITE | PTE_PXN)) | PTE_RDONLY) +#define _PAGE_KERNEL_EXEC (PROT_NORMAL & ~PTE_PXN) +#define _PAGE_KERNEL_EXEC_CONT ((PROT_NORMAL & ~PTE_PXN) | PTE_CONT) + +#define _PAGE_SHARED (_PAGE_DEFAULT | PTE_USER | PTE_RDONLY | PTE_NG | PTE_PXN | PTE_UXN | PTE_WRITE) +#define _PAGE_SHARED_EXEC (_PAGE_DEFAULT | PTE_USER | PTE_RDONLY | PTE_NG | PTE_PXN | PTE_WRITE) +#define _PAGE_READONLY (_PAGE_DEFAULT | PTE_USER | PTE_RDONLY | PTE_NG | PTE_PXN | PTE_UXN) +#define _PAGE_READONLY_EXEC (_PAGE_DEFAULT | PTE_USER | PTE_RDONLY | PTE_NG | PTE_PXN) +#define _PAGE_EXECONLY (_PAGE_DEFAULT | PTE_RDONLY | PTE_NG | PTE_PXN) + +#ifdef __ASSEMBLY__ +#define PTE_MAYBE_NG 0 +#endif + #ifndef __ASSEMBLY__ #include <asm/cpufeature.h> @@ -34,9 +68,6 @@ extern bool arm64_use_ng_mappings; -#define _PROT_DEFAULT (PTE_TYPE_PAGE | PTE_AF | PTE_SHARED) -#define _PROT_SECT_DEFAULT (PMD_TYPE_SECT | PMD_SECT_AF | PMD_SECT_S) - #define PTE_MAYBE_NG (arm64_use_ng_mappings ? PTE_NG : 0) #define PMD_MAYBE_NG (arm64_use_ng_mappings ? PMD_SECT_NG : 0) @@ -50,26 +81,11 @@ extern bool arm64_use_ng_mappings; #define PTE_MAYBE_GP 0 #endif -#define PROT_DEFAULT (_PROT_DEFAULT | PTE_MAYBE_NG) -#define PROT_SECT_DEFAULT (_PROT_SECT_DEFAULT | PMD_MAYBE_NG) - -#define PROT_DEVICE_nGnRnE (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_WRITE | PTE_ATTRINDX(MT_DEVICE_nGnRnE)) -#define PROT_DEVICE_nGnRE (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_WRITE | PTE_ATTRINDX(MT_DEVICE_nGnRE)) -#define PROT_NORMAL_NC (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_WRITE | PTE_ATTRINDX(MT_NORMAL_NC)) -#define PROT_NORMAL (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_WRITE | PTE_ATTRINDX(MT_NORMAL)) -#define PROT_NORMAL_TAGGED (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_WRITE | PTE_ATTRINDX(MT_NORMAL_TAGGED)) - -#define PROT_SECT_DEVICE_nGnRE (PROT_SECT_DEFAULT | PMD_SECT_PXN | PMD_SECT_UXN | PMD_ATTRINDX(MT_DEVICE_nGnRE)) -#define PROT_SECT_NORMAL (PROT_SECT_DEFAULT | PMD_SECT_PXN | PMD_SECT_UXN | PMD_ATTRINDX(MT_NORMAL)) -#define PROT_SECT_NORMAL_EXEC (PROT_SECT_DEFAULT | PMD_SECT_UXN | PMD_ATTRINDX(MT_NORMAL)) - -#define _PAGE_DEFAULT (_PROT_DEFAULT | PTE_ATTRINDX(MT_NORMAL)) - -#define PAGE_KERNEL __pgprot(PROT_NORMAL) -#define PAGE_KERNEL_RO __pgprot((PROT_NORMAL & ~PTE_WRITE) | PTE_RDONLY) -#define PAGE_KERNEL_ROX __pgprot((PROT_NORMAL & ~(PTE_WRITE | PTE_PXN)) | PTE_RDONLY) -#define PAGE_KERNEL_EXEC __pgprot(PROT_NORMAL & ~PTE_PXN) -#define PAGE_KERNEL_EXEC_CONT __pgprot((PROT_NORMAL & ~PTE_PXN) | PTE_CONT) +#define PAGE_KERNEL __pgprot(_PAGE_KERNEL) +#define PAGE_KERNEL_RO __pgprot(_PAGE_KERNEL_RO) +#define PAGE_KERNEL_ROX __pgprot(_PAGE_KERNEL_ROX) +#define PAGE_KERNEL_EXEC __pgprot(_PAGE_KERNEL_EXEC) +#define PAGE_KERNEL_EXEC_CONT __pgprot(_PAGE_KERNEL_EXEC_CONT) #define PAGE_S2_MEMATTR(attr, has_fwb) \ ({ \ @@ -83,12 +99,62 @@ extern bool arm64_use_ng_mappings; #define PAGE_NONE __pgprot(((_PAGE_DEFAULT) & ~PTE_VALID) | PTE_PROT_NONE | PTE_RDONLY | PTE_NG | PTE_PXN | PTE_UXN) /* shared+writable pages are clean by default, hence PTE_RDONLY|PTE_WRITE */ -#define PAGE_SHARED __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_RDONLY | PTE_NG | PTE_PXN | PTE_UXN | PTE_WRITE) -#define PAGE_SHARED_EXEC __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_RDONLY | PTE_NG | PTE_PXN | PTE_WRITE) -#define PAGE_READONLY __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_RDONLY | PTE_NG | PTE_PXN | PTE_UXN) -#define PAGE_READONLY_EXEC __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_RDONLY | PTE_NG | PTE_PXN) -#define PAGE_EXECONLY __pgprot(_PAGE_DEFAULT | PTE_RDONLY | PTE_NG | PTE_PXN) +#define PAGE_SHARED __pgprot(_PAGE_SHARED) +#define PAGE_SHARED_EXEC __pgprot(_PAGE_SHARED_EXEC) +#define PAGE_READONLY __pgprot(_PAGE_READONLY) +#define PAGE_READONLY_EXEC __pgprot(_PAGE_READONLY_EXEC) +#define PAGE_EXECONLY __pgprot(_PAGE_EXECONLY) #endif /* __ASSEMBLY__ */ +#define pte_pi_index(pte) ( \ + ((pte & BIT(PTE_PI_IDX_3)) >> (PTE_PI_IDX_3 - 3)) | \ + ((pte & BIT(PTE_PI_IDX_2)) >> (PTE_PI_IDX_2 - 2)) | \ + ((pte & BIT(PTE_PI_IDX_1)) >> (PTE_PI_IDX_1 - 1)) | \ + ((pte & BIT(PTE_PI_IDX_0)) >> (PTE_PI_IDX_0 - 0))) + +/* + * Page types used via Permission Indirection Extension (PIE). PIE uses + * the USER, DBM, PXN and UXN bits to to generate an index which is used + * to look up the actual permission in PIR_ELx and PIRE0_EL1. We define + * combinations we use on non-PIE systems with the same encoding, for + * convenience these are listed here as comments as are the unallocated + * encodings. + */ + +/* 0: PAGE_DEFAULT */ +/* 1: PTE_USER */ +/* 2: PTE_WRITE */ +/* 3: PTE_WRITE | PTE_USER */ +/* 4: PAGE_EXECONLY PTE_PXN */ +/* 5: PAGE_READONLY_EXEC PTE_PXN | PTE_USER */ +/* 6: PTE_PXN | PTE_WRITE */ +/* 7: PAGE_SHARED_EXEC PTE_PXN | PTE_WRITE | PTE_USER */ +/* 8: PAGE_KERNEL_ROX PTE_UXN */ +/* 9: PTE_UXN | PTE_USER */ +/* a: PAGE_KERNEL_EXEC PTE_UXN | PTE_WRITE */ +/* b: PTE_UXN | PTE_WRITE | PTE_USER */ +/* c: PAGE_KERNEL_RO PTE_UXN | PTE_PXN */ +/* d: PAGE_READONLY PTE_UXN | PTE_PXN | PTE_USER */ +/* e: PAGE_KERNEL PTE_UXN | PTE_PXN | PTE_WRITE */ +/* f: PAGE_SHARED PTE_UXN | PTE_PXN | PTE_WRITE | PTE_USER */ + +#define PIE_E0 ( \ + PIRx_ELx_PERM(pte_pi_index(_PAGE_EXECONLY), PIE_X_O) | \ + PIRx_ELx_PERM(pte_pi_index(_PAGE_READONLY_EXEC), PIE_RX) | \ + PIRx_ELx_PERM(pte_pi_index(_PAGE_SHARED_EXEC), PIE_RWX) | \ + PIRx_ELx_PERM(pte_pi_index(_PAGE_READONLY), PIE_R) | \ + PIRx_ELx_PERM(pte_pi_index(_PAGE_SHARED), PIE_RW)) + +#define PIE_E1 ( \ + PIRx_ELx_PERM(pte_pi_index(_PAGE_EXECONLY), PIE_NONE_O) | \ + PIRx_ELx_PERM(pte_pi_index(_PAGE_READONLY_EXEC), PIE_R) | \ + PIRx_ELx_PERM(pte_pi_index(_PAGE_SHARED_EXEC), PIE_RW) | \ + PIRx_ELx_PERM(pte_pi_index(_PAGE_READONLY), PIE_R) | \ + PIRx_ELx_PERM(pte_pi_index(_PAGE_SHARED), PIE_RW) | \ + PIRx_ELx_PERM(pte_pi_index(_PAGE_KERNEL_ROX), PIE_RX) | \ + PIRx_ELx_PERM(pte_pi_index(_PAGE_KERNEL_EXEC), PIE_RWX) | \ + PIRx_ELx_PERM(pte_pi_index(_PAGE_KERNEL_RO), PIE_R) | \ + PIRx_ELx_PERM(pte_pi_index(_PAGE_KERNEL), PIE_RW)) + #endif /* __ASM_PGTABLE_PROT_H */ diff --git a/arch/arm64/include/asm/scs.h b/arch/arm64/include/asm/scs.h index 13df982a0808..3fdae5fe3142 100644 --- a/arch/arm64/include/asm/scs.h +++ b/arch/arm64/include/asm/scs.h @@ -73,6 +73,7 @@ static inline void dynamic_scs_init(void) {} #endif int scs_patch(const u8 eh_frame[], int size); +asmlinkage void scs_patch_vmlinux(void); #endif /* __ASSEMBLY __ */ diff --git a/arch/arm64/include/asm/spectre.h b/arch/arm64/include/asm/spectre.h index db7b371b367c..9cc501450486 100644 --- a/arch/arm64/include/asm/spectre.h +++ b/arch/arm64/include/asm/spectre.h @@ -100,5 +100,21 @@ bool is_spectre_bhb_affected(const struct arm64_cpu_capabilities *entry, int sco u8 spectre_bhb_loop_affected(int scope); void spectre_bhb_enable_mitigation(const struct arm64_cpu_capabilities *__unused); bool try_emulate_el1_ssbs(struct pt_regs *regs, u32 instr); + +void spectre_v4_patch_fw_mitigation_enable(struct alt_instr *alt, __le32 *origptr, + __le32 *updptr, int nr_inst); +void smccc_patch_fw_mitigation_conduit(struct alt_instr *alt, __le32 *origptr, + __le32 *updptr, int nr_inst); +void spectre_bhb_patch_loop_mitigation_enable(struct alt_instr *alt, __le32 *origptr, + __le32 *updptr, int nr_inst); +void spectre_bhb_patch_fw_mitigation_enabled(struct alt_instr *alt, __le32 *origptr, + __le32 *updptr, int nr_inst); +void spectre_bhb_patch_loop_iter(struct alt_instr *alt, + __le32 *origptr, __le32 *updptr, int nr_inst); +void spectre_bhb_patch_wa3(struct alt_instr *alt, + __le32 *origptr, __le32 *updptr, int nr_inst); +void spectre_bhb_patch_clearbhb(struct alt_instr *alt, + __le32 *origptr, __le32 *updptr, int nr_inst); + #endif /* __ASSEMBLY__ */ #endif /* __ASM_SPECTRE_H */ diff --git a/arch/arm64/include/asm/syscall_wrapper.h b/arch/arm64/include/asm/syscall_wrapper.h index d30217c21eff..17f687510c48 100644 --- a/arch/arm64/include/asm/syscall_wrapper.h +++ b/arch/arm64/include/asm/syscall_wrapper.h @@ -38,6 +38,7 @@ asmlinkage long __arm64_compat_sys_##sname(const struct pt_regs *__unused) #define COND_SYSCALL_COMPAT(name) \ + asmlinkage long __arm64_compat_sys_##name(const struct pt_regs *regs); \ asmlinkage long __weak __arm64_compat_sys_##name(const struct pt_regs *regs) \ { \ return sys_ni_syscall(); \ @@ -53,6 +54,7 @@ ALLOW_ERROR_INJECTION(__arm64_sys##name, ERRNO); \ static long __se_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__)); \ static inline long __do_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__)); \ + asmlinkage long __arm64_sys##name(const struct pt_regs *regs); \ asmlinkage long __arm64_sys##name(const struct pt_regs *regs) \ { \ return __se_sys##name(SC_ARM64_REGS_TO_ARGS(x,__VA_ARGS__)); \ @@ -73,11 +75,13 @@ asmlinkage long __arm64_sys_##sname(const struct pt_regs *__unused) #define COND_SYSCALL(name) \ + asmlinkage long __arm64_sys_##name(const struct pt_regs *regs); \ asmlinkage long __weak __arm64_sys_##name(const struct pt_regs *regs) \ { \ return sys_ni_syscall(); \ } +asmlinkage long __arm64_sys_ni_syscall(const struct pt_regs *__unused); #define SYS_NI(name) SYSCALL_ALIAS(__arm64_sys_##name, sys_ni_posix_timers); #endif /* __ASM_SYSCALL_WRAPPER_H */ diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index eefd712f2430..7a1e62631814 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -140,25 +140,17 @@ #define SYS_SVCR_SMSTART_SM_EL0 sys_reg(0, 3, 4, 3, 3) #define SYS_SVCR_SMSTOP_SMZA_EL0 sys_reg(0, 3, 4, 6, 3) -#define SYS_OSDTRRX_EL1 sys_reg(2, 0, 0, 0, 2) -#define SYS_MDCCINT_EL1 sys_reg(2, 0, 0, 2, 0) -#define SYS_MDSCR_EL1 sys_reg(2, 0, 0, 2, 2) -#define SYS_OSDTRTX_EL1 sys_reg(2, 0, 0, 3, 2) -#define SYS_OSECCR_EL1 sys_reg(2, 0, 0, 6, 2) #define SYS_DBGBVRn_EL1(n) sys_reg(2, 0, 0, n, 4) #define SYS_DBGBCRn_EL1(n) sys_reg(2, 0, 0, n, 5) #define SYS_DBGWVRn_EL1(n) sys_reg(2, 0, 0, n, 6) #define SYS_DBGWCRn_EL1(n) sys_reg(2, 0, 0, n, 7) #define SYS_MDRAR_EL1 sys_reg(2, 0, 1, 0, 0) -#define SYS_OSLAR_EL1 sys_reg(2, 0, 1, 0, 4) -#define SYS_OSLAR_OSLK BIT(0) - #define SYS_OSLSR_EL1 sys_reg(2, 0, 1, 1, 4) -#define SYS_OSLSR_OSLM_MASK (BIT(3) | BIT(0)) -#define SYS_OSLSR_OSLM_NI 0 -#define SYS_OSLSR_OSLM_IMPLEMENTED BIT(3) -#define SYS_OSLSR_OSLK BIT(1) +#define OSLSR_EL1_OSLM_MASK (BIT(3) | BIT(0)) +#define OSLSR_EL1_OSLM_NI 0 +#define OSLSR_EL1_OSLM_IMPLEMENTED BIT(3) +#define OSLSR_EL1_OSLK BIT(1) #define SYS_OSDLR_EL1 sys_reg(2, 0, 1, 3, 4) #define SYS_DBGPRCR_EL1 sys_reg(2, 0, 1, 4, 4) @@ -241,54 +233,8 @@ /*** End of Statistical Profiling Extension ***/ -/* - * TRBE Registers - */ -#define SYS_TRBLIMITR_EL1 sys_reg(3, 0, 9, 11, 0) -#define SYS_TRBPTR_EL1 sys_reg(3, 0, 9, 11, 1) -#define SYS_TRBBASER_EL1 sys_reg(3, 0, 9, 11, 2) -#define SYS_TRBSR_EL1 sys_reg(3, 0, 9, 11, 3) -#define SYS_TRBMAR_EL1 sys_reg(3, 0, 9, 11, 4) -#define SYS_TRBTRG_EL1 sys_reg(3, 0, 9, 11, 6) -#define SYS_TRBIDR_EL1 sys_reg(3, 0, 9, 11, 7) - -#define TRBLIMITR_LIMIT_MASK GENMASK_ULL(51, 0) -#define TRBLIMITR_LIMIT_SHIFT 12 -#define TRBLIMITR_NVM BIT(5) -#define TRBLIMITR_TRIG_MODE_MASK GENMASK(1, 0) -#define TRBLIMITR_TRIG_MODE_SHIFT 3 -#define TRBLIMITR_FILL_MODE_MASK GENMASK(1, 0) -#define TRBLIMITR_FILL_MODE_SHIFT 1 -#define TRBLIMITR_ENABLE BIT(0) -#define TRBPTR_PTR_MASK GENMASK_ULL(63, 0) -#define TRBPTR_PTR_SHIFT 0 -#define TRBBASER_BASE_MASK GENMASK_ULL(51, 0) -#define TRBBASER_BASE_SHIFT 12 -#define TRBSR_EC_MASK GENMASK(5, 0) -#define TRBSR_EC_SHIFT 26 -#define TRBSR_IRQ BIT(22) -#define TRBSR_TRG BIT(21) -#define TRBSR_WRAP BIT(20) -#define TRBSR_ABORT BIT(18) -#define TRBSR_STOP BIT(17) -#define TRBSR_MSS_MASK GENMASK(15, 0) -#define TRBSR_MSS_SHIFT 0 -#define TRBSR_BSC_MASK GENMASK(5, 0) -#define TRBSR_BSC_SHIFT 0 -#define TRBSR_FSC_MASK GENMASK(5, 0) -#define TRBSR_FSC_SHIFT 0 -#define TRBMAR_SHARE_MASK GENMASK(1, 0) -#define TRBMAR_SHARE_SHIFT 8 -#define TRBMAR_OUTER_MASK GENMASK(3, 0) -#define TRBMAR_OUTER_SHIFT 4 -#define TRBMAR_INNER_MASK GENMASK(3, 0) -#define TRBMAR_INNER_SHIFT 0 -#define TRBTRG_TRG_MASK GENMASK(31, 0) -#define TRBTRG_TRG_SHIFT 0 -#define TRBIDR_FLAG BIT(5) -#define TRBIDR_PROG BIT(4) -#define TRBIDR_ALIGN_MASK GENMASK(3, 0) -#define TRBIDR_ALIGN_SHIFT 0 +#define TRBSR_EL1_BSC_MASK GENMASK(5, 0) +#define TRBSR_EL1_BSC_SHIFT 0 #define SYS_PMINTENSET_EL1 sys_reg(3, 0, 9, 14, 1) #define SYS_PMINTENCLR_EL1 sys_reg(3, 0, 9, 14, 2) @@ -764,6 +710,25 @@ #define ICH_VTR_TDS_SHIFT 19 #define ICH_VTR_TDS_MASK (1 << ICH_VTR_TDS_SHIFT) +/* + * Permission Indirection Extension (PIE) permission encodings. + * Encodings with the _O suffix, have overlays applied (Permission Overlay Extension). + */ +#define PIE_NONE_O 0x0 +#define PIE_R_O 0x1 +#define PIE_X_O 0x2 +#define PIE_RX_O 0x3 +#define PIE_RW_O 0x5 +#define PIE_RWnX_O 0x6 +#define PIE_RWX_O 0x7 +#define PIE_R 0x8 +#define PIE_GCS 0x9 +#define PIE_RX 0xa +#define PIE_RW 0xc +#define PIE_RWX 0xe + +#define PIRx_ELx_PERM(idx, perm) ((perm) << ((idx) * 4)) + #define ARM64_FEATURE_FIELD_BITS 4 /* Defined for compatibility only, do not add new users. */ diff --git a/arch/arm64/include/asm/traps.h b/arch/arm64/include/asm/traps.h index 1f361e2da516..d66dfb3a72dd 100644 --- a/arch/arm64/include/asm/traps.h +++ b/arch/arm64/include/asm/traps.h @@ -29,6 +29,8 @@ void arm64_force_sig_fault(int signo, int code, unsigned long far, const char *s void arm64_force_sig_mceerr(int code, unsigned long far, short lsb, const char *str); void arm64_force_sig_ptrace_errno_trap(int errno, unsigned long far, const char *str); +int early_brk64(unsigned long addr, unsigned long esr, struct pt_regs *regs); + /* * Move regs->pc to next instruction and do necessary setup before it * is executed. diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h index 05f4fc265428..14be5000c5a0 100644 --- a/arch/arm64/include/asm/uaccess.h +++ b/arch/arm64/include/asm/uaccess.h @@ -65,7 +65,6 @@ static inline void __uaccess_ttbr0_disable(void) ttbr &= ~TTBR_ASID_MASK; /* reserved_pg_dir placed before swapper_pg_dir */ write_sysreg(ttbr - RESERVED_SWAPPER_OFFSET, ttbr0_el1); - isb(); /* Set reserved ASID */ write_sysreg(ttbr, ttbr1_el1); isb(); @@ -89,7 +88,6 @@ static inline void __uaccess_ttbr0_enable(void) ttbr1 &= ~TTBR_ASID_MASK; /* safety measure */ ttbr1 |= ttbr0 & TTBR_ASID_MASK; write_sysreg(ttbr1, ttbr1_el1); - isb(); /* Restore user page table */ write_sysreg(ttbr0, ttbr0_el1); diff --git a/arch/arm64/include/uapi/asm/hwcap.h b/arch/arm64/include/uapi/asm/hwcap.h index 69a4fb749c65..a2cac4305b1e 100644 --- a/arch/arm64/include/uapi/asm/hwcap.h +++ b/arch/arm64/include/uapi/asm/hwcap.h @@ -102,5 +102,6 @@ #define HWCAP2_SME_BI32I32 (1UL << 40) #define HWCAP2_SME_B16B16 (1UL << 41) #define HWCAP2_SME_F16F16 (1UL << 42) +#define HWCAP2_MOPS (1UL << 43) #endif /* _UAPI__ASM_HWCAP_H */ diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile index 7c2bb4e72476..3864a64e2b2b 100644 --- a/arch/arm64/kernel/Makefile +++ b/arch/arm64/kernel/Makefile @@ -42,8 +42,7 @@ obj-$(CONFIG_COMPAT) += sigreturn32.o obj-$(CONFIG_COMPAT_ALIGNMENT_FIXUPS) += compat_alignment.o obj-$(CONFIG_KUSER_HELPERS) += kuser32.o obj-$(CONFIG_FUNCTION_TRACER) += ftrace.o entry-ftrace.o -obj-$(CONFIG_MODULES) += module.o -obj-$(CONFIG_ARM64_MODULE_PLTS) += module-plts.o +obj-$(CONFIG_MODULES) += module.o module-plts.o obj-$(CONFIG_PERF_EVENTS) += perf_regs.o perf_callchain.o obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o obj-$(CONFIG_CPU_PM) += sleep.o suspend.o diff --git a/arch/arm64/kernel/alternative.c b/arch/arm64/kernel/alternative.c index d32d4ed5519b..8ff6610af496 100644 --- a/arch/arm64/kernel/alternative.c +++ b/arch/arm64/kernel/alternative.c @@ -24,8 +24,8 @@ #define ALT_ORIG_PTR(a) __ALT_PTR(a, orig_offset) #define ALT_REPL_PTR(a) __ALT_PTR(a, alt_offset) -#define ALT_CAP(a) ((a)->cpufeature & ~ARM64_CB_BIT) -#define ALT_HAS_CB(a) ((a)->cpufeature & ARM64_CB_BIT) +#define ALT_CAP(a) ((a)->cpucap & ~ARM64_CB_BIT) +#define ALT_HAS_CB(a) ((a)->cpucap & ARM64_CB_BIT) /* Volatile, as we may be patching the guts of READ_ONCE() */ static volatile int all_alternatives_applied; @@ -37,12 +37,12 @@ struct alt_region { struct alt_instr *end; }; -bool alternative_is_applied(u16 cpufeature) +bool alternative_is_applied(u16 cpucap) { - if (WARN_ON(cpufeature >= ARM64_NCAPS)) + if (WARN_ON(cpucap >= ARM64_NCAPS)) return false; - return test_bit(cpufeature, applied_alternatives); + return test_bit(cpucap, applied_alternatives); } /* @@ -121,11 +121,11 @@ static noinstr void patch_alternative(struct alt_instr *alt, * accidentally call into the cache.S code, which is patched by us at * runtime. */ -static void clean_dcache_range_nopatch(u64 start, u64 end) +static noinstr void clean_dcache_range_nopatch(u64 start, u64 end) { u64 cur, d_size, ctr_el0; - ctr_el0 = read_sanitised_ftr_reg(SYS_CTR_EL0); + ctr_el0 = arm64_ftr_reg_ctrel0.sys_val; d_size = 4 << cpuid_feature_extract_unsigned_field(ctr_el0, CTR_EL0_DminLine_SHIFT); cur = start & ~(d_size - 1); @@ -141,7 +141,7 @@ static void clean_dcache_range_nopatch(u64 start, u64 end) static void __apply_alternatives(const struct alt_region *region, bool is_module, - unsigned long *feature_mask) + unsigned long *cpucap_mask) { struct alt_instr *alt; __le32 *origptr, *updptr; @@ -151,7 +151,7 @@ static void __apply_alternatives(const struct alt_region *region, int nr_inst; int cap = ALT_CAP(alt); - if (!test_bit(cap, feature_mask)) + if (!test_bit(cap, cpucap_mask)) continue; if (!cpus_have_cap(cap)) @@ -188,11 +188,10 @@ static void __apply_alternatives(const struct alt_region *region, icache_inval_all_pou(); isb(); - /* Ignore ARM64_CB bit from feature mask */ bitmap_or(applied_alternatives, applied_alternatives, - feature_mask, ARM64_NCAPS); + cpucap_mask, ARM64_NCAPS); bitmap_and(applied_alternatives, applied_alternatives, - cpu_hwcaps, ARM64_NCAPS); + system_cpucaps, ARM64_NCAPS); } } @@ -239,7 +238,7 @@ static int __init __apply_alternatives_multi_stop(void *unused) } else { DECLARE_BITMAP(remaining_capabilities, ARM64_NCAPS); - bitmap_complement(remaining_capabilities, boot_capabilities, + bitmap_complement(remaining_capabilities, boot_cpucaps, ARM64_NCAPS); BUG_ON(all_alternatives_applied); @@ -274,7 +273,7 @@ void __init apply_boot_alternatives(void) pr_info("applying boot alternatives\n"); __apply_alternatives(&kernel_alternatives, false, - &boot_capabilities[0]); + &boot_cpucaps[0]); } #ifdef CONFIG_MODULES diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 7d7128c65161..6ea7f23b1287 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -105,11 +105,11 @@ unsigned int compat_elf_hwcap __read_mostly = COMPAT_ELF_HWCAP_DEFAULT; unsigned int compat_elf_hwcap2 __read_mostly; #endif -DECLARE_BITMAP(cpu_hwcaps, ARM64_NCAPS); -EXPORT_SYMBOL(cpu_hwcaps); -static struct arm64_cpu_capabilities const __ro_after_init *cpu_hwcaps_ptrs[ARM64_NCAPS]; +DECLARE_BITMAP(system_cpucaps, ARM64_NCAPS); +EXPORT_SYMBOL(system_cpucaps); +static struct arm64_cpu_capabilities const __ro_after_init *cpucap_ptrs[ARM64_NCAPS]; -DECLARE_BITMAP(boot_capabilities, ARM64_NCAPS); +DECLARE_BITMAP(boot_cpucaps, ARM64_NCAPS); bool arm64_use_ng_mappings = false; EXPORT_SYMBOL(arm64_use_ng_mappings); @@ -137,7 +137,7 @@ static cpumask_var_t cpu_32bit_el0_mask __cpumask_var_read_mostly; void dump_cpu_features(void) { /* file-wide pr_fmt adds "CPU features: " prefix */ - pr_emerg("0x%*pb\n", ARM64_NCAPS, &cpu_hwcaps); + pr_emerg("0x%*pb\n", ARM64_NCAPS, &system_cpucaps); } #define ARM64_CPUID_FIELDS(reg, field, min_value) \ @@ -223,6 +223,7 @@ static const struct arm64_ftr_bits ftr_id_aa64isar2[] = { ARM64_FTR_BITS(FTR_VISIBLE, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64ISAR2_EL1_CSSC_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64ISAR2_EL1_RPRFM_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_HIGHER_SAFE, ID_AA64ISAR2_EL1_BC_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR2_EL1_MOPS_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_PTR_AUTH), FTR_STRICT, FTR_EXACT, ID_AA64ISAR2_EL1_APA3_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_PTR_AUTH), @@ -364,6 +365,7 @@ static const struct arm64_ftr_bits ftr_id_aa64mmfr0[] = { static const struct arm64_ftr_bits ftr_id_aa64mmfr1[] = { ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_EL1_TIDCP1_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_EL1_AFP_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_EL1_HCX_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_EL1_ETS_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_EL1_TWED_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_EL1_XNX_SHIFT, 4, 0), @@ -396,6 +398,12 @@ static const struct arm64_ftr_bits ftr_id_aa64mmfr2[] = { ARM64_FTR_END, }; +static const struct arm64_ftr_bits ftr_id_aa64mmfr3[] = { + ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64MMFR3_EL1_S1PIE_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64MMFR3_EL1_TCRX_SHIFT, 4, 0), + ARM64_FTR_END, +}; + static const struct arm64_ftr_bits ftr_ctr[] = { ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_EXACT, 31, 1, 1), /* RES1 */ ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, CTR_EL0_DIC_SHIFT, 1, 1), @@ -722,6 +730,7 @@ static const struct __ftr_reg_entry { ARM64_FTR_REG_OVERRIDE(SYS_ID_AA64MMFR1_EL1, ftr_id_aa64mmfr1, &id_aa64mmfr1_override), ARM64_FTR_REG(SYS_ID_AA64MMFR2_EL1, ftr_id_aa64mmfr2), + ARM64_FTR_REG(SYS_ID_AA64MMFR3_EL1, ftr_id_aa64mmfr3), /* Op1 = 0, CRn = 1, CRm = 2 */ ARM64_FTR_REG(SYS_ZCR_EL1, ftr_zcr), @@ -954,24 +963,24 @@ extern const struct arm64_cpu_capabilities arm64_errata[]; static const struct arm64_cpu_capabilities arm64_features[]; static void __init -init_cpu_hwcaps_indirect_list_from_array(const struct arm64_cpu_capabilities *caps) +init_cpucap_indirect_list_from_array(const struct arm64_cpu_capabilities *caps) { for (; caps->matches; caps++) { if (WARN(caps->capability >= ARM64_NCAPS, "Invalid capability %d\n", caps->capability)) continue; - if (WARN(cpu_hwcaps_ptrs[caps->capability], + if (WARN(cpucap_ptrs[caps->capability], "Duplicate entry for capability %d\n", caps->capability)) continue; - cpu_hwcaps_ptrs[caps->capability] = caps; + cpucap_ptrs[caps->capability] = caps; } } -static void __init init_cpu_hwcaps_indirect_list(void) +static void __init init_cpucap_indirect_list(void) { - init_cpu_hwcaps_indirect_list_from_array(arm64_features); - init_cpu_hwcaps_indirect_list_from_array(arm64_errata); + init_cpucap_indirect_list_from_array(arm64_features); + init_cpucap_indirect_list_from_array(arm64_errata); } static void __init setup_boot_cpu_capabilities(void); @@ -1017,6 +1026,7 @@ void __init init_cpu_features(struct cpuinfo_arm64 *info) init_cpu_ftr_reg(SYS_ID_AA64MMFR0_EL1, info->reg_id_aa64mmfr0); init_cpu_ftr_reg(SYS_ID_AA64MMFR1_EL1, info->reg_id_aa64mmfr1); init_cpu_ftr_reg(SYS_ID_AA64MMFR2_EL1, info->reg_id_aa64mmfr2); + init_cpu_ftr_reg(SYS_ID_AA64MMFR3_EL1, info->reg_id_aa64mmfr3); init_cpu_ftr_reg(SYS_ID_AA64PFR0_EL1, info->reg_id_aa64pfr0); init_cpu_ftr_reg(SYS_ID_AA64PFR1_EL1, info->reg_id_aa64pfr1); init_cpu_ftr_reg(SYS_ID_AA64ZFR0_EL1, info->reg_id_aa64zfr0); @@ -1049,10 +1059,10 @@ void __init init_cpu_features(struct cpuinfo_arm64 *info) init_cpu_ftr_reg(SYS_GMID_EL1, info->reg_gmid); /* - * Initialize the indirect array of CPU hwcaps capabilities pointers - * before we handle the boot CPU below. + * Initialize the indirect array of CPU capabilities pointers before we + * handle the boot CPU below. */ - init_cpu_hwcaps_indirect_list(); + init_cpucap_indirect_list(); /* * Detect and enable early CPU capabilities based on the boot CPU, @@ -1262,6 +1272,8 @@ void update_cpu_features(int cpu, info->reg_id_aa64mmfr1, boot->reg_id_aa64mmfr1); taint |= check_update_ftr_reg(SYS_ID_AA64MMFR2_EL1, cpu, info->reg_id_aa64mmfr2, boot->reg_id_aa64mmfr2); + taint |= check_update_ftr_reg(SYS_ID_AA64MMFR3_EL1, cpu, + info->reg_id_aa64mmfr3, boot->reg_id_aa64mmfr3); taint |= check_update_ftr_reg(SYS_ID_AA64PFR0_EL1, cpu, info->reg_id_aa64pfr0, boot->reg_id_aa64pfr0); @@ -1391,6 +1403,7 @@ u64 __read_sysreg_by_encoding(u32 sys_id) read_sysreg_case(SYS_ID_AA64MMFR0_EL1); read_sysreg_case(SYS_ID_AA64MMFR1_EL1); read_sysreg_case(SYS_ID_AA64MMFR2_EL1); + read_sysreg_case(SYS_ID_AA64MMFR3_EL1); read_sysreg_case(SYS_ID_AA64ISAR0_EL1); read_sysreg_case(SYS_ID_AA64ISAR1_EL1); read_sysreg_case(SYS_ID_AA64ISAR2_EL1); @@ -2048,9 +2061,9 @@ static bool has_address_auth_cpucap(const struct arm64_cpu_capabilities *entry, static bool has_address_auth_metacap(const struct arm64_cpu_capabilities *entry, int scope) { - bool api = has_address_auth_cpucap(cpu_hwcaps_ptrs[ARM64_HAS_ADDRESS_AUTH_IMP_DEF], scope); - bool apa = has_address_auth_cpucap(cpu_hwcaps_ptrs[ARM64_HAS_ADDRESS_AUTH_ARCH_QARMA5], scope); - bool apa3 = has_address_auth_cpucap(cpu_hwcaps_ptrs[ARM64_HAS_ADDRESS_AUTH_ARCH_QARMA3], scope); + bool api = has_address_auth_cpucap(cpucap_ptrs[ARM64_HAS_ADDRESS_AUTH_IMP_DEF], scope); + bool apa = has_address_auth_cpucap(cpucap_ptrs[ARM64_HAS_ADDRESS_AUTH_ARCH_QARMA5], scope); + bool apa3 = has_address_auth_cpucap(cpucap_ptrs[ARM64_HAS_ADDRESS_AUTH_ARCH_QARMA3], scope); return apa || apa3 || api; } @@ -2186,6 +2199,11 @@ static void cpu_enable_dit(const struct arm64_cpu_capabilities *__unused) set_pstate_dit(1); } +static void cpu_enable_mops(const struct arm64_cpu_capabilities *__unused) +{ + sysreg_clear_set(sctlr_el1, 0, SCTLR_EL1_MSCEn); +} + /* Internal helper functions to match cpu capability type */ static bool cpucap_late_cpu_optional(const struct arm64_cpu_capabilities *cap) @@ -2235,11 +2253,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .capability = ARM64_HAS_ECV_CNTPOFF, .type = ARM64_CPUCAP_SYSTEM_FEATURE, .matches = has_cpuid_feature, - .sys_reg = SYS_ID_AA64MMFR0_EL1, - .field_pos = ID_AA64MMFR0_EL1_ECV_SHIFT, - .field_width = 4, - .sign = FTR_UNSIGNED, - .min_field_value = ID_AA64MMFR0_EL1_ECV_CNTPOFF, + ARM64_CPUID_FIELDS(ID_AA64MMFR0_EL1, ECV, CNTPOFF) }, #ifdef CONFIG_ARM64_PAN { @@ -2309,6 +2323,13 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .type = ARM64_CPUCAP_SYSTEM_FEATURE, .matches = is_kvm_protected_mode, }, + { + .desc = "HCRX_EL2 register", + .capability = ARM64_HAS_HCX, + .type = ARM64_CPUCAP_STRICT_BOOT_CPU_FEATURE, + .matches = has_cpuid_feature, + ARM64_CPUID_FIELDS(ID_AA64MMFR1_EL1, HCX, IMP) + }, #endif { .desc = "Kernel page table isolation (KPTI)", @@ -2641,6 +2662,27 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .cpu_enable = cpu_enable_dit, ARM64_CPUID_FIELDS(ID_AA64PFR0_EL1, DIT, IMP) }, + { + .desc = "Memory Copy and Memory Set instructions", + .capability = ARM64_HAS_MOPS, + .type = ARM64_CPUCAP_SYSTEM_FEATURE, + .matches = has_cpuid_feature, + .cpu_enable = cpu_enable_mops, + ARM64_CPUID_FIELDS(ID_AA64ISAR2_EL1, MOPS, IMP) + }, + { + .capability = ARM64_HAS_TCR2, + .type = ARM64_CPUCAP_SYSTEM_FEATURE, + .matches = has_cpuid_feature, + ARM64_CPUID_FIELDS(ID_AA64MMFR3_EL1, TCRX, IMP) + }, + { + .desc = "Stage-1 Permission Indirection Extension (S1PIE)", + .capability = ARM64_HAS_S1PIE, + .type = ARM64_CPUCAP_BOOT_CPU_FEATURE, + .matches = has_cpuid_feature, + ARM64_CPUID_FIELDS(ID_AA64MMFR3_EL1, S1PIE, IMP) + }, {}, }; @@ -2769,6 +2811,7 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = { HWCAP_CAP(ID_AA64ISAR2_EL1, RPRFM, IMP, CAP_HWCAP, KERNEL_HWCAP_RPRFM), HWCAP_CAP(ID_AA64ISAR2_EL1, RPRES, IMP, CAP_HWCAP, KERNEL_HWCAP_RPRES), HWCAP_CAP(ID_AA64ISAR2_EL1, WFxT, IMP, CAP_HWCAP, KERNEL_HWCAP_WFXT), + HWCAP_CAP(ID_AA64ISAR2_EL1, MOPS, IMP, CAP_HWCAP, KERNEL_HWCAP_MOPS), #ifdef CONFIG_ARM64_SME HWCAP_CAP(ID_AA64PFR1_EL1, SME, IMP, CAP_HWCAP, KERNEL_HWCAP_SME), HWCAP_CAP(ID_AA64SMFR0_EL1, FA64, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_FA64), @@ -2895,7 +2938,7 @@ static void update_cpu_capabilities(u16 scope_mask) scope_mask &= ARM64_CPUCAP_SCOPE_MASK; for (i = 0; i < ARM64_NCAPS; i++) { - caps = cpu_hwcaps_ptrs[i]; + caps = cpucap_ptrs[i]; if (!caps || !(caps->type & scope_mask) || cpus_have_cap(caps->capability) || !caps->matches(caps, cpucap_default_scope(caps))) @@ -2903,10 +2946,11 @@ static void update_cpu_capabilities(u16 scope_mask) if (caps->desc) pr_info("detected: %s\n", caps->desc); - cpus_set_cap(caps->capability); + + __set_bit(caps->capability, system_cpucaps); if ((scope_mask & SCOPE_BOOT_CPU) && (caps->type & SCOPE_BOOT_CPU)) - set_bit(caps->capability, boot_capabilities); + set_bit(caps->capability, boot_cpucaps); } } @@ -2920,7 +2964,7 @@ static int cpu_enable_non_boot_scope_capabilities(void *__unused) u16 non_boot_scope = SCOPE_ALL & ~SCOPE_BOOT_CPU; for_each_available_cap(i) { - const struct arm64_cpu_capabilities *cap = cpu_hwcaps_ptrs[i]; + const struct arm64_cpu_capabilities *cap = cpucap_ptrs[i]; if (WARN_ON(!cap)) continue; @@ -2950,7 +2994,7 @@ static void __init enable_cpu_capabilities(u16 scope_mask) for (i = 0; i < ARM64_NCAPS; i++) { unsigned int num; - caps = cpu_hwcaps_ptrs[i]; + caps = cpucap_ptrs[i]; if (!caps || !(caps->type & scope_mask)) continue; num = caps->capability; @@ -2995,7 +3039,7 @@ static void verify_local_cpu_caps(u16 scope_mask) scope_mask &= ARM64_CPUCAP_SCOPE_MASK; for (i = 0; i < ARM64_NCAPS; i++) { - caps = cpu_hwcaps_ptrs[i]; + caps = cpucap_ptrs[i]; if (!caps || !(caps->type & scope_mask)) continue; @@ -3194,7 +3238,7 @@ static void __init setup_boot_cpu_capabilities(void) bool this_cpu_has_cap(unsigned int n) { if (!WARN_ON(preemptible()) && n < ARM64_NCAPS) { - const struct arm64_cpu_capabilities *cap = cpu_hwcaps_ptrs[n]; + const struct arm64_cpu_capabilities *cap = cpucap_ptrs[n]; if (cap) return cap->matches(cap, SCOPE_LOCAL_CPU); @@ -3207,13 +3251,13 @@ EXPORT_SYMBOL_GPL(this_cpu_has_cap); /* * This helper function is used in a narrow window when, * - The system wide safe registers are set with all the SMP CPUs and, - * - The SYSTEM_FEATURE cpu_hwcaps may not have been set. + * - The SYSTEM_FEATURE system_cpucaps may not have been set. * In all other cases cpus_have_{const_}cap() should be used. */ static bool __maybe_unused __system_matches_cap(unsigned int n) { if (n < ARM64_NCAPS) { - const struct arm64_cpu_capabilities *cap = cpu_hwcaps_ptrs[n]; + const struct arm64_cpu_capabilities *cap = cpucap_ptrs[n]; if (cap) return cap->matches(cap, SCOPE_SYSTEM); diff --git a/arch/arm64/kernel/cpuidle.c b/arch/arm64/kernel/cpuidle.c index 42e19fff40ee..d1f68599c29f 100644 --- a/arch/arm64/kernel/cpuidle.c +++ b/arch/arm64/kernel/cpuidle.c @@ -13,7 +13,7 @@ #include <linux/of_device.h> #include <linux/psci.h> -#ifdef CONFIG_ACPI +#ifdef CONFIG_ACPI_PROCESSOR_IDLE #include <acpi/processor.h> diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c index eb4378c23b3c..58622dc85917 100644 --- a/arch/arm64/kernel/cpuinfo.c +++ b/arch/arm64/kernel/cpuinfo.c @@ -125,6 +125,7 @@ static const char *const hwcap_str[] = { [KERNEL_HWCAP_SME_BI32I32] = "smebi32i32", [KERNEL_HWCAP_SME_B16B16] = "smeb16b16", [KERNEL_HWCAP_SME_F16F16] = "smef16f16", + [KERNEL_HWCAP_MOPS] = "mops", }; #ifdef CONFIG_COMPAT @@ -446,6 +447,7 @@ static void __cpuinfo_store_cpu(struct cpuinfo_arm64 *info) info->reg_id_aa64mmfr0 = read_cpuid(ID_AA64MMFR0_EL1); info->reg_id_aa64mmfr1 = read_cpuid(ID_AA64MMFR1_EL1); info->reg_id_aa64mmfr2 = read_cpuid(ID_AA64MMFR2_EL1); + info->reg_id_aa64mmfr3 = read_cpuid(ID_AA64MMFR3_EL1); info->reg_id_aa64pfr0 = read_cpuid(ID_AA64PFR0_EL1); info->reg_id_aa64pfr1 = read_cpuid(ID_AA64PFR1_EL1); info->reg_id_aa64zfr0 = read_cpuid(ID_AA64ZFR0_EL1); diff --git a/arch/arm64/kernel/entry-common.c b/arch/arm64/kernel/entry-common.c index 3af3c01c93a6..6b2e0c367702 100644 --- a/arch/arm64/kernel/entry-common.c +++ b/arch/arm64/kernel/entry-common.c @@ -126,7 +126,7 @@ static __always_inline void __exit_to_user_mode(void) lockdep_hardirqs_on(CALLER_ADDR0); } -static __always_inline void prepare_exit_to_user_mode(struct pt_regs *regs) +static __always_inline void exit_to_user_mode_prepare(struct pt_regs *regs) { unsigned long flags; @@ -135,11 +135,13 @@ static __always_inline void prepare_exit_to_user_mode(struct pt_regs *regs) flags = read_thread_flags(); if (unlikely(flags & _TIF_WORK_MASK)) do_notify_resume(regs, flags); + + lockdep_sys_exit(); } static __always_inline void exit_to_user_mode(struct pt_regs *regs) { - prepare_exit_to_user_mode(regs); + exit_to_user_mode_prepare(regs); mte_check_tfsr_exit(); __exit_to_user_mode(); } @@ -611,6 +613,14 @@ static void noinstr el0_bti(struct pt_regs *regs) exit_to_user_mode(regs); } +static void noinstr el0_mops(struct pt_regs *regs, unsigned long esr) +{ + enter_from_user_mode(regs); + local_daif_restore(DAIF_PROCCTX); + do_el0_mops(regs, esr); + exit_to_user_mode(regs); +} + static void noinstr el0_inv(struct pt_regs *regs, unsigned long esr) { enter_from_user_mode(regs); @@ -688,6 +698,9 @@ asmlinkage void noinstr el0t_64_sync_handler(struct pt_regs *regs) case ESR_ELx_EC_BTI: el0_bti(regs); break; + case ESR_ELx_EC_MOPS: + el0_mops(regs, esr); + break; case ESR_ELx_EC_BREAKPT_LOW: case ESR_ELx_EC_SOFTSTP_LOW: case ESR_ELx_EC_WATCHPT_LOW: diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index ab2a6e33c052..a40e5e50fa55 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -101,12 +101,11 @@ .org .Lventry_start\@ + 128 // Did we overflow the ventry slot? .endm - .macro tramp_alias, dst, sym, tmp - mov_q \dst, TRAMP_VALIAS - adr_l \tmp, \sym - add \dst, \dst, \tmp - adr_l \tmp, .entry.tramp.text - sub \dst, \dst, \tmp + .macro tramp_alias, dst, sym + .set .Lalias\@, TRAMP_VALIAS + \sym - .entry.tramp.text + movz \dst, :abs_g2_s:.Lalias\@ + movk \dst, :abs_g1_nc:.Lalias\@ + movk \dst, :abs_g0_nc:.Lalias\@ .endm /* @@ -435,13 +434,14 @@ alternative_if_not ARM64_UNMAP_KERNEL_AT_EL0 eret alternative_else_nop_endif #ifdef CONFIG_UNMAP_KERNEL_AT_EL0 - bne 4f msr far_el1, x29 - tramp_alias x30, tramp_exit_native, x29 - br x30 -4: - tramp_alias x30, tramp_exit_compat, x29 - br x30 + + ldr_this_cpu x30, this_cpu_vector, x29 + tramp_alias x29, tramp_exit + msr vbar_el1, x30 // install vector table + ldr lr, [sp, #S_LR] // restore x30 + add sp, sp, #PT_REGS_SIZE // restore sp + br x29 #endif .else ldr lr, [sp, #S_LR] @@ -732,22 +732,6 @@ alternative_else_nop_endif .org 1b + 128 // Did we overflow the ventry slot? .endm - .macro tramp_exit, regsize = 64 - tramp_data_read_var x30, this_cpu_vector - get_this_cpu_offset x29 - ldr x30, [x30, x29] - - msr vbar_el1, x30 - ldr lr, [sp, #S_LR] - tramp_unmap_kernel x29 - .if \regsize == 64 - mrs x29, far_el1 - .endif - add sp, sp, #PT_REGS_SIZE // restore sp - eret - sb - .endm - .macro generate_tramp_vector, kpti, bhb .Lvector_start\@: .space 0x400 @@ -768,7 +752,7 @@ alternative_else_nop_endif */ .pushsection ".entry.tramp.text", "ax" .align 11 -SYM_CODE_START_NOALIGN(tramp_vectors) +SYM_CODE_START_LOCAL_NOALIGN(tramp_vectors) #ifdef CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY generate_tramp_vector kpti=1, bhb=BHB_MITIGATION_LOOP generate_tramp_vector kpti=1, bhb=BHB_MITIGATION_FW @@ -777,13 +761,12 @@ SYM_CODE_START_NOALIGN(tramp_vectors) generate_tramp_vector kpti=1, bhb=BHB_MITIGATION_NONE SYM_CODE_END(tramp_vectors) -SYM_CODE_START(tramp_exit_native) - tramp_exit -SYM_CODE_END(tramp_exit_native) - -SYM_CODE_START(tramp_exit_compat) - tramp_exit 32 -SYM_CODE_END(tramp_exit_compat) +SYM_CODE_START_LOCAL(tramp_exit) + tramp_unmap_kernel x29 + mrs x29, far_el1 // restore x29 + eret + sb +SYM_CODE_END(tramp_exit) .popsection // .entry.tramp.text #endif /* CONFIG_UNMAP_KERNEL_AT_EL0 */ @@ -1077,7 +1060,7 @@ alternative_if_not ARM64_UNMAP_KERNEL_AT_EL0 alternative_else_nop_endif #ifdef CONFIG_UNMAP_KERNEL_AT_EL0 - tramp_alias dst=x5, sym=__sdei_asm_exit_trampoline, tmp=x3 + tramp_alias dst=x5, sym=__sdei_asm_exit_trampoline br x5 #endif SYM_CODE_END(__sdei_asm_handler) diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index 2fbafa5cc7ac..7a1aeb95d7c3 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -1649,6 +1649,7 @@ void fpsimd_flush_thread(void) fpsimd_flush_thread_vl(ARM64_VEC_SME); current->thread.svcr = 0; + sme_smstop(); } current->thread.fp_type = FP_STATE_FPSIMD; diff --git a/arch/arm64/kernel/ftrace.c b/arch/arm64/kernel/ftrace.c index 432626c866a8..a650f5e11fc5 100644 --- a/arch/arm64/kernel/ftrace.c +++ b/arch/arm64/kernel/ftrace.c @@ -197,7 +197,7 @@ int ftrace_update_ftrace_func(ftrace_func_t func) static struct plt_entry *get_ftrace_plt(struct module *mod) { -#ifdef CONFIG_ARM64_MODULE_PLTS +#ifdef CONFIG_MODULES struct plt_entry *plt = mod->arch.ftrace_trampolines; return &plt[FTRACE_PLT_IDX]; @@ -249,7 +249,7 @@ static bool ftrace_find_callable_addr(struct dyn_ftrace *rec, * must use a PLT to reach it. We can only place PLTs for modules, and * only when module PLT support is built-in. */ - if (!IS_ENABLED(CONFIG_ARM64_MODULE_PLTS)) + if (!IS_ENABLED(CONFIG_MODULES)) return false; /* @@ -431,10 +431,8 @@ int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec, * * Note: 'mod' is only set at module load time. */ - if (!IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_ARGS) && - IS_ENABLED(CONFIG_ARM64_MODULE_PLTS) && mod) { + if (!IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_ARGS) && mod) return aarch64_insn_patch_text_nosync((void *)pc, new); - } if (!ftrace_find_callable_addr(rec, mod, &addr)) return -EINVAL; diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index e92caebff46a..0f5a30f109d9 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -382,7 +382,7 @@ SYM_FUNC_START_LOCAL(create_idmap) adrp x0, init_idmap_pg_dir adrp x3, _text adrp x6, _end + MAX_FDT_SIZE + SWAPPER_BLOCK_SIZE - mov x7, SWAPPER_RX_MMUFLAGS + mov_q x7, SWAPPER_RX_MMUFLAGS map_memory x0, x1, x3, x6, x7, x3, IDMAP_PGD_ORDER, x10, x11, x12, x13, x14, EXTRA_SHIFT @@ -391,7 +391,7 @@ SYM_FUNC_START_LOCAL(create_idmap) adrp x2, init_pg_dir adrp x3, init_pg_end bic x4, x2, #SWAPPER_BLOCK_SIZE - 1 - mov x5, SWAPPER_RW_MMUFLAGS + mov_q x5, SWAPPER_RW_MMUFLAGS mov x6, #SWAPPER_BLOCK_SHIFT bl remap_region @@ -402,7 +402,7 @@ SYM_FUNC_START_LOCAL(create_idmap) bfi x22, x21, #0, #SWAPPER_BLOCK_SHIFT // remapped FDT address add x3, x2, #MAX_FDT_SIZE + SWAPPER_BLOCK_SIZE bic x4, x21, #SWAPPER_BLOCK_SIZE - 1 - mov x5, SWAPPER_RW_MMUFLAGS + mov_q x5, SWAPPER_RW_MMUFLAGS mov x6, #SWAPPER_BLOCK_SHIFT bl remap_region @@ -430,7 +430,7 @@ SYM_FUNC_START_LOCAL(create_kernel_mapping) adrp x3, _text // runtime __pa(_text) sub x6, x6, x3 // _end - _text add x6, x6, x5 // runtime __va(_end) - mov x7, SWAPPER_RW_MMUFLAGS + mov_q x7, SWAPPER_RW_MMUFLAGS map_memory x0, x1, x5, x6, x7, x3, (VA_BITS - PGDIR_SHIFT), x10, x11, x12, x13, x14 diff --git a/arch/arm64/kernel/hibernate.c b/arch/arm64/kernel/hibernate.c index 788597a6b6a2..02870beb271e 100644 --- a/arch/arm64/kernel/hibernate.c +++ b/arch/arm64/kernel/hibernate.c @@ -99,7 +99,6 @@ int pfn_is_nosave(unsigned long pfn) void notrace save_processor_state(void) { - WARN_ON(num_online_cpus() != 1); } void notrace restore_processor_state(void) diff --git a/arch/arm64/kernel/hw_breakpoint.c b/arch/arm64/kernel/hw_breakpoint.c index b29a311bb055..db2a1861bb97 100644 --- a/arch/arm64/kernel/hw_breakpoint.c +++ b/arch/arm64/kernel/hw_breakpoint.c @@ -973,14 +973,6 @@ static int hw_breakpoint_reset(unsigned int cpu) return 0; } -#ifdef CONFIG_CPU_PM -extern void cpu_suspend_set_dbg_restorer(int (*hw_bp_restore)(unsigned int)); -#else -static inline void cpu_suspend_set_dbg_restorer(int (*hw_bp_restore)(unsigned int)) -{ -} -#endif - /* * One-time initialisation. */ diff --git a/arch/arm64/kernel/hyp-stub.S b/arch/arm64/kernel/hyp-stub.S index 9439240c3fcf..d63de1973ddb 100644 --- a/arch/arm64/kernel/hyp-stub.S +++ b/arch/arm64/kernel/hyp-stub.S @@ -119,6 +119,24 @@ SYM_CODE_START_LOCAL(__finalise_el2) msr ttbr1_el1, x0 mrs_s x0, SYS_MAIR_EL12 msr mair_el1, x0 + mrs x1, REG_ID_AA64MMFR3_EL1 + ubfx x1, x1, #ID_AA64MMFR3_EL1_TCRX_SHIFT, #4 + cbz x1, .Lskip_tcr2 + mrs x0, REG_TCR2_EL12 + msr REG_TCR2_EL1, x0 + + // Transfer permission indirection state + mrs x1, REG_ID_AA64MMFR3_EL1 + ubfx x1, x1, #ID_AA64MMFR3_EL1_S1PIE_SHIFT, #4 + cbz x1, .Lskip_indirection + mrs x0, REG_PIRE0_EL12 + msr REG_PIRE0_EL1, x0 + mrs x0, REG_PIR_EL12 + msr REG_PIR_EL1, x0 + +.Lskip_indirection: +.Lskip_tcr2: + isb // Hack the exception return to stay at EL2 diff --git a/arch/arm64/kernel/idreg-override.c b/arch/arm64/kernel/idreg-override.c index 370ab84fd06e..8439248c21d3 100644 --- a/arch/arm64/kernel/idreg-override.c +++ b/arch/arm64/kernel/idreg-override.c @@ -123,6 +123,7 @@ static const struct ftr_set_desc isar2 __initconst = { .fields = { FIELD("gpa3", ID_AA64ISAR2_EL1_GPA3_SHIFT, NULL), FIELD("apa3", ID_AA64ISAR2_EL1_APA3_SHIFT, NULL), + FIELD("mops", ID_AA64ISAR2_EL1_MOPS_SHIFT, NULL), {} }, }; @@ -174,6 +175,7 @@ static const struct { "id_aa64isar1.gpi=0 id_aa64isar1.gpa=0 " "id_aa64isar1.api=0 id_aa64isar1.apa=0 " "id_aa64isar2.gpa3=0 id_aa64isar2.apa3=0" }, + { "arm64.nomops", "id_aa64isar2.mops=0" }, { "arm64.nomte", "id_aa64pfr1.mte=0" }, { "nokaslr", "kaslr.disabled=1" }, }; diff --git a/arch/arm64/kernel/kaslr.c b/arch/arm64/kernel/kaslr.c index e7477f21a4c9..17f96a19781d 100644 --- a/arch/arm64/kernel/kaslr.c +++ b/arch/arm64/kernel/kaslr.c @@ -4,90 +4,35 @@ */ #include <linux/cache.h> -#include <linux/crc32.h> #include <linux/init.h> -#include <linux/libfdt.h> -#include <linux/mm_types.h> -#include <linux/sched.h> -#include <linux/types.h> -#include <linux/pgtable.h> -#include <linux/random.h> +#include <linux/printk.h> -#include <asm/fixmap.h> -#include <asm/kernel-pgtable.h> +#include <asm/cpufeature.h> #include <asm/memory.h> -#include <asm/mmu.h> -#include <asm/sections.h> -#include <asm/setup.h> -u64 __ro_after_init module_alloc_base; u16 __initdata memstart_offset_seed; struct arm64_ftr_override kaslr_feature_override __initdata; -static int __init kaslr_init(void) -{ - u64 module_range; - u32 seed; - - /* - * Set a reasonable default for module_alloc_base in case - * we end up running with module randomization disabled. - */ - module_alloc_base = (u64)_etext - MODULES_VSIZE; +bool __ro_after_init __kaslr_is_enabled = false; +void __init kaslr_init(void) +{ if (kaslr_feature_override.val & kaslr_feature_override.mask & 0xf) { pr_info("KASLR disabled on command line\n"); - return 0; - } - - if (!kaslr_enabled()) { - pr_warn("KASLR disabled due to lack of seed\n"); - return 0; + return; } - pr_info("KASLR enabled\n"); - /* - * KASAN without KASAN_VMALLOC does not expect the module region to - * intersect the vmalloc region, since shadow memory is allocated for - * each module at load time, whereas the vmalloc region will already be - * shadowed by KASAN zero pages. + * The KASLR offset modulo MIN_KIMG_ALIGN is taken from the physical + * placement of the image rather than from the seed, so a displacement + * of less than MIN_KIMG_ALIGN means that no seed was provided. */ - BUILD_BUG_ON((IS_ENABLED(CONFIG_KASAN_GENERIC) || - IS_ENABLED(CONFIG_KASAN_SW_TAGS)) && - !IS_ENABLED(CONFIG_KASAN_VMALLOC)); - - seed = get_random_u32(); - - if (IS_ENABLED(CONFIG_RANDOMIZE_MODULE_REGION_FULL)) { - /* - * Randomize the module region over a 2 GB window covering the - * kernel. This reduces the risk of modules leaking information - * about the address of the kernel itself, but results in - * branches between modules and the core kernel that are - * resolved via PLTs. (Branches between modules will be - * resolved normally.) - */ - module_range = SZ_2G - (u64)(_end - _stext); - module_alloc_base = max((u64)_end - SZ_2G, (u64)MODULES_VADDR); - } else { - /* - * Randomize the module region by setting module_alloc_base to - * a PAGE_SIZE multiple in the range [_etext - MODULES_VSIZE, - * _stext) . This guarantees that the resulting region still - * covers [_stext, _etext], and that all relative branches can - * be resolved without veneers unless this region is exhausted - * and we fall back to a larger 2GB window in module_alloc() - * when ARM64_MODULE_PLTS is enabled. - */ - module_range = MODULES_VSIZE - (u64)(_etext - _stext); + if (kaslr_offset() < MIN_KIMG_ALIGN) { + pr_warn("KASLR disabled due to lack of seed\n"); + return; } - /* use the lower 21 bits to randomize the base of the module region */ - module_alloc_base += (module_range * (seed & ((1 << 21) - 1))) >> 21; - module_alloc_base &= PAGE_MASK; - - return 0; + pr_info("KASLR enabled\n"); + __kaslr_is_enabled = true; } -subsys_initcall(kaslr_init) diff --git a/arch/arm64/kernel/module-plts.c b/arch/arm64/kernel/module-plts.c index 543493bf924d..ad02058756b5 100644 --- a/arch/arm64/kernel/module-plts.c +++ b/arch/arm64/kernel/module-plts.c @@ -7,6 +7,7 @@ #include <linux/ftrace.h> #include <linux/kernel.h> #include <linux/module.h> +#include <linux/moduleloader.h> #include <linux/sort.h> static struct plt_entry __get_adrp_add_pair(u64 dst, u64 pc, diff --git a/arch/arm64/kernel/module.c b/arch/arm64/kernel/module.c index 5af4975caeb5..dd851297596e 100644 --- a/arch/arm64/kernel/module.c +++ b/arch/arm64/kernel/module.c @@ -7,6 +7,8 @@ * Author: Will Deacon <will.deacon@arm.com> */ +#define pr_fmt(fmt) "Modules: " fmt + #include <linux/bitops.h> #include <linux/elf.h> #include <linux/ftrace.h> @@ -15,52 +17,131 @@ #include <linux/kernel.h> #include <linux/mm.h> #include <linux/moduleloader.h> +#include <linux/random.h> #include <linux/scs.h> #include <linux/vmalloc.h> + #include <asm/alternative.h> #include <asm/insn.h> #include <asm/scs.h> #include <asm/sections.h> +static u64 module_direct_base __ro_after_init = 0; +static u64 module_plt_base __ro_after_init = 0; + +/* + * Choose a random page-aligned base address for a window of 'size' bytes which + * entirely contains the interval [start, end - 1]. + */ +static u64 __init random_bounding_box(u64 size, u64 start, u64 end) +{ + u64 max_pgoff, pgoff; + + if ((end - start) >= size) + return 0; + + max_pgoff = (size - (end - start)) / PAGE_SIZE; + pgoff = get_random_u32_inclusive(0, max_pgoff); + + return start - pgoff * PAGE_SIZE; +} + +/* + * Modules may directly reference data and text anywhere within the kernel + * image and other modules. References using PREL32 relocations have a +/-2G + * range, and so we need to ensure that the entire kernel image and all modules + * fall within a 2G window such that these are always within range. + * + * Modules may directly branch to functions and code within the kernel text, + * and to functions and code within other modules. These branches will use + * CALL26/JUMP26 relocations with a +/-128M range. Without PLTs, we must ensure + * that the entire kernel text and all module text falls within a 128M window + * such that these are always within range. With PLTs, we can expand this to a + * 2G window. + * + * We chose the 128M region to surround the entire kernel image (rather than + * just the text) as using the same bounds for the 128M and 2G regions ensures + * by construction that we never select a 128M region that is not a subset of + * the 2G region. For very large and unusual kernel configurations this means + * we may fall back to PLTs where they could have been avoided, but this keeps + * the logic significantly simpler. + */ +static int __init module_init_limits(void) +{ + u64 kernel_end = (u64)_end; + u64 kernel_start = (u64)_text; + u64 kernel_size = kernel_end - kernel_start; + + /* + * The default modules region is placed immediately below the kernel + * image, and is large enough to use the full 2G relocation range. + */ + BUILD_BUG_ON(KIMAGE_VADDR != MODULES_END); + BUILD_BUG_ON(MODULES_VSIZE < SZ_2G); + + if (!kaslr_enabled()) { + if (kernel_size < SZ_128M) + module_direct_base = kernel_end - SZ_128M; + if (kernel_size < SZ_2G) + module_plt_base = kernel_end - SZ_2G; + } else { + u64 min = kernel_start; + u64 max = kernel_end; + + if (IS_ENABLED(CONFIG_RANDOMIZE_MODULE_REGION_FULL)) { + pr_info("2G module region forced by RANDOMIZE_MODULE_REGION_FULL\n"); + } else { + module_direct_base = random_bounding_box(SZ_128M, min, max); + if (module_direct_base) { + min = module_direct_base; + max = module_direct_base + SZ_128M; + } + } + + module_plt_base = random_bounding_box(SZ_2G, min, max); + } + + pr_info("%llu pages in range for non-PLT usage", + module_direct_base ? (SZ_128M - kernel_size) / PAGE_SIZE : 0); + pr_info("%llu pages in range for PLT usage", + module_plt_base ? (SZ_2G - kernel_size) / PAGE_SIZE : 0); + + return 0; +} +subsys_initcall(module_init_limits); + void *module_alloc(unsigned long size) { - u64 module_alloc_end = module_alloc_base + MODULES_VSIZE; - gfp_t gfp_mask = GFP_KERNEL; - void *p; - - /* Silence the initial allocation */ - if (IS_ENABLED(CONFIG_ARM64_MODULE_PLTS)) - gfp_mask |= __GFP_NOWARN; - - if (IS_ENABLED(CONFIG_KASAN_GENERIC) || - IS_ENABLED(CONFIG_KASAN_SW_TAGS)) - /* don't exceed the static module region - see below */ - module_alloc_end = MODULES_END; - - p = __vmalloc_node_range(size, MODULE_ALIGN, module_alloc_base, - module_alloc_end, gfp_mask, PAGE_KERNEL, VM_DEFER_KMEMLEAK, - NUMA_NO_NODE, __builtin_return_address(0)); - - if (!p && IS_ENABLED(CONFIG_ARM64_MODULE_PLTS) && - (IS_ENABLED(CONFIG_KASAN_VMALLOC) || - (!IS_ENABLED(CONFIG_KASAN_GENERIC) && - !IS_ENABLED(CONFIG_KASAN_SW_TAGS)))) - /* - * KASAN without KASAN_VMALLOC can only deal with module - * allocations being served from the reserved module region, - * since the remainder of the vmalloc region is already - * backed by zero shadow pages, and punching holes into it - * is non-trivial. Since the module region is not randomized - * when KASAN is enabled without KASAN_VMALLOC, it is even - * less likely that the module region gets exhausted, so we - * can simply omit this fallback in that case. - */ - p = __vmalloc_node_range(size, MODULE_ALIGN, module_alloc_base, - module_alloc_base + SZ_2G, GFP_KERNEL, - PAGE_KERNEL, 0, NUMA_NO_NODE, - __builtin_return_address(0)); + void *p = NULL; + + /* + * Where possible, prefer to allocate within direct branch range of the + * kernel such that no PLTs are necessary. + */ + if (module_direct_base) { + p = __vmalloc_node_range(size, MODULE_ALIGN, + module_direct_base, + module_direct_base + SZ_128M, + GFP_KERNEL | __GFP_NOWARN, + PAGE_KERNEL, 0, NUMA_NO_NODE, + __builtin_return_address(0)); + } - if (p && (kasan_alloc_module_shadow(p, size, gfp_mask) < 0)) { + if (!p && module_plt_base) { + p = __vmalloc_node_range(size, MODULE_ALIGN, + module_plt_base, + module_plt_base + SZ_2G, + GFP_KERNEL | __GFP_NOWARN, + PAGE_KERNEL, 0, NUMA_NO_NODE, + __builtin_return_address(0)); + } + + if (!p) { + pr_warn_ratelimited("%s: unable to allocate memory\n", + __func__); + } + + if (p && (kasan_alloc_module_shadow(p, size, GFP_KERNEL) < 0)) { vfree(p); return NULL; } @@ -448,9 +529,7 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, case R_AARCH64_CALL26: ovf = reloc_insn_imm(RELOC_OP_PREL, loc, val, 2, 26, AARCH64_INSN_IMM_26); - - if (IS_ENABLED(CONFIG_ARM64_MODULE_PLTS) && - ovf == -ERANGE) { + if (ovf == -ERANGE) { val = module_emit_plt_entry(me, sechdrs, loc, &rel[i], sym); if (!val) return -ENOEXEC; @@ -487,7 +566,7 @@ static int module_init_ftrace_plt(const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs, struct module *mod) { -#if defined(CONFIG_ARM64_MODULE_PLTS) && defined(CONFIG_DYNAMIC_FTRACE) +#if defined(CONFIG_DYNAMIC_FTRACE) const Elf_Shdr *s; struct plt_entry *plts; diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index b8ec7b3ac9cb..417a8a86b2db 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -296,6 +296,8 @@ void __init __no_sanitize_address setup_arch(char **cmdline_p) *cmdline_p = boot_command_line; + kaslr_init(); + /* * If know now we are going to need KPTI then use non-global * mappings from the start, avoiding the cost of rewriting diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c index 2cfc810d0a5b..e304f7ebec2a 100644 --- a/arch/arm64/kernel/signal.c +++ b/arch/arm64/kernel/signal.c @@ -23,6 +23,7 @@ #include <asm/daifflags.h> #include <asm/debug-monitors.h> #include <asm/elf.h> +#include <asm/exception.h> #include <asm/cacheflush.h> #include <asm/ucontext.h> #include <asm/unistd.h> @@ -398,7 +399,7 @@ static int restore_tpidr2_context(struct user_ctxs *user) __get_user_error(tpidr2_el0, &user->tpidr2->tpidr2, err); if (!err) - current->thread.tpidr2_el0 = tpidr2_el0; + write_sysreg_s(tpidr2_el0, SYS_TPIDR2_EL0); return err; } diff --git a/arch/arm64/kernel/syscall.c b/arch/arm64/kernel/syscall.c index da84cf855c44..5a668d7f3c1f 100644 --- a/arch/arm64/kernel/syscall.c +++ b/arch/arm64/kernel/syscall.c @@ -147,11 +147,9 @@ static void el0_svc_common(struct pt_regs *regs, int scno, int sc_nr, * exit regardless, as the old entry assembly did. */ if (!has_syscall_work(flags) && !IS_ENABLED(CONFIG_DEBUG_RSEQ)) { - local_daif_mask(); flags = read_thread_flags(); if (!has_syscall_work(flags) && !(flags & _TIF_SINGLESTEP)) return; - local_daif_restore(DAIF_PROCCTX); } trace_exit: diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index 4bb1b8f47298..794a2dd3659a 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -514,6 +514,63 @@ void do_el1_fpac(struct pt_regs *regs, unsigned long esr) die("Oops - FPAC", regs, esr); } +void do_el0_mops(struct pt_regs *regs, unsigned long esr) +{ + bool wrong_option = esr & ESR_ELx_MOPS_ISS_WRONG_OPTION; + bool option_a = esr & ESR_ELx_MOPS_ISS_OPTION_A; + int dstreg = ESR_ELx_MOPS_ISS_DESTREG(esr); + int srcreg = ESR_ELx_MOPS_ISS_SRCREG(esr); + int sizereg = ESR_ELx_MOPS_ISS_SIZEREG(esr); + unsigned long dst, src, size; + + dst = pt_regs_read_reg(regs, dstreg); + src = pt_regs_read_reg(regs, srcreg); + size = pt_regs_read_reg(regs, sizereg); + + /* + * Put the registers back in the original format suitable for a + * prologue instruction, using the generic return routine from the + * Arm ARM (DDI 0487I.a) rules CNTMJ and MWFQH. + */ + if (esr & ESR_ELx_MOPS_ISS_MEM_INST) { + /* SET* instruction */ + if (option_a ^ wrong_option) { + /* Format is from Option A; forward set */ + pt_regs_write_reg(regs, dstreg, dst + size); + pt_regs_write_reg(regs, sizereg, -size); + } + } else { + /* CPY* instruction */ + if (!(option_a ^ wrong_option)) { + /* Format is from Option B */ + if (regs->pstate & PSR_N_BIT) { + /* Backward copy */ + pt_regs_write_reg(regs, dstreg, dst - size); + pt_regs_write_reg(regs, srcreg, src - size); + } + } else { + /* Format is from Option A */ + if (size & BIT(63)) { + /* Forward copy */ + pt_regs_write_reg(regs, dstreg, dst + size); + pt_regs_write_reg(regs, srcreg, src + size); + pt_regs_write_reg(regs, sizereg, -size); + } + } + } + + if (esr & ESR_ELx_MOPS_ISS_FROM_EPILOGUE) + regs->pc -= 8; + else + regs->pc -= 4; + + /* + * If single stepping then finish the step before executing the + * prologue instruction. + */ + user_fastforward_single_step(current); +} + #define __user_cache_maint(insn, address, res) \ if (address >= TASK_SIZE_MAX) { \ res = -EFAULT; \ @@ -824,6 +881,7 @@ static const char *esr_class_str[] = { [ESR_ELx_EC_DABT_LOW] = "DABT (lower EL)", [ESR_ELx_EC_DABT_CUR] = "DABT (current EL)", [ESR_ELx_EC_SP_ALIGN] = "SP Alignment", + [ESR_ELx_EC_MOPS] = "MOPS", [ESR_ELx_EC_FP_EXC32] = "FP (AArch32)", [ESR_ELx_EC_FP_EXC64] = "FP (AArch64)", [ESR_ELx_EC_SERROR] = "SError", @@ -947,7 +1005,7 @@ void do_serror(struct pt_regs *regs, unsigned long esr) } /* GENERIC_BUG traps */ - +#ifdef CONFIG_GENERIC_BUG int is_valid_bugaddr(unsigned long addr) { /* @@ -959,6 +1017,7 @@ int is_valid_bugaddr(unsigned long addr) */ return 1; } +#endif static int bug_handler(struct pt_regs *regs, unsigned long esr) { diff --git a/arch/arm64/kvm/debug.c b/arch/arm64/kvm/debug.c index 55f80fb93925..8725291cb00a 100644 --- a/arch/arm64/kvm/debug.c +++ b/arch/arm64/kvm/debug.c @@ -333,7 +333,7 @@ void kvm_arch_vcpu_load_debug_state_flags(struct kvm_vcpu *vcpu) /* Check if we have TRBE implemented and available at the host */ if (cpuid_feature_extract_unsigned_field(dfr0, ID_AA64DFR0_EL1_TraceBuffer_SHIFT) && - !(read_sysreg_s(SYS_TRBIDR_EL1) & TRBIDR_PROG)) + !(read_sysreg_s(SYS_TRBIDR_EL1) & TRBIDR_EL1_P)) vcpu_set_flag(vcpu, DEBUG_STATE_SAVE_TRBE); } diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h index 4fe217efa218..2f6e0b3e4a75 100644 --- a/arch/arm64/kvm/hyp/include/hyp/switch.h +++ b/arch/arm64/kvm/hyp/include/hyp/switch.h @@ -141,6 +141,9 @@ static inline void ___activate_traps(struct kvm_vcpu *vcpu) if (cpus_have_final_cap(ARM64_HAS_RAS_EXTN) && (hcr & HCR_VSE)) write_sysreg_s(vcpu->arch.vsesr_el2, SYS_VSESR_EL2); + + if (cpus_have_final_cap(ARM64_HAS_HCX)) + write_sysreg_s(HCRX_GUEST_FLAGS, SYS_HCRX_EL2); } static inline void ___deactivate_traps(struct kvm_vcpu *vcpu) @@ -155,6 +158,9 @@ static inline void ___deactivate_traps(struct kvm_vcpu *vcpu) vcpu->arch.hcr_el2 &= ~HCR_VSE; vcpu->arch.hcr_el2 |= read_sysreg(hcr_el2) & HCR_VSE; } + + if (cpus_have_final_cap(ARM64_HAS_HCX)) + write_sysreg_s(HCRX_HOST_FLAGS, SYS_HCRX_EL2); } static inline bool __populate_fault_info(struct kvm_vcpu *vcpu) diff --git a/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h b/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h index 699ea1f8d409..bb6b571ec627 100644 --- a/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h +++ b/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h @@ -44,6 +44,8 @@ static inline void __sysreg_save_el1_state(struct kvm_cpu_context *ctxt) ctxt_sys_reg(ctxt, TTBR0_EL1) = read_sysreg_el1(SYS_TTBR0); ctxt_sys_reg(ctxt, TTBR1_EL1) = read_sysreg_el1(SYS_TTBR1); ctxt_sys_reg(ctxt, TCR_EL1) = read_sysreg_el1(SYS_TCR); + if (cpus_have_final_cap(ARM64_HAS_TCR2)) + ctxt_sys_reg(ctxt, TCR2_EL1) = read_sysreg_el1(SYS_TCR2); ctxt_sys_reg(ctxt, ESR_EL1) = read_sysreg_el1(SYS_ESR); ctxt_sys_reg(ctxt, AFSR0_EL1) = read_sysreg_el1(SYS_AFSR0); ctxt_sys_reg(ctxt, AFSR1_EL1) = read_sysreg_el1(SYS_AFSR1); @@ -53,6 +55,10 @@ static inline void __sysreg_save_el1_state(struct kvm_cpu_context *ctxt) ctxt_sys_reg(ctxt, CONTEXTIDR_EL1) = read_sysreg_el1(SYS_CONTEXTIDR); ctxt_sys_reg(ctxt, AMAIR_EL1) = read_sysreg_el1(SYS_AMAIR); ctxt_sys_reg(ctxt, CNTKCTL_EL1) = read_sysreg_el1(SYS_CNTKCTL); + if (cpus_have_final_cap(ARM64_HAS_S1PIE)) { + ctxt_sys_reg(ctxt, PIR_EL1) = read_sysreg_el1(SYS_PIR); + ctxt_sys_reg(ctxt, PIRE0_EL1) = read_sysreg_el1(SYS_PIRE0); + } ctxt_sys_reg(ctxt, PAR_EL1) = read_sysreg_par(); ctxt_sys_reg(ctxt, TPIDR_EL1) = read_sysreg(tpidr_el1); @@ -114,6 +120,8 @@ static inline void __sysreg_restore_el1_state(struct kvm_cpu_context *ctxt) write_sysreg_el1(ctxt_sys_reg(ctxt, CPACR_EL1), SYS_CPACR); write_sysreg_el1(ctxt_sys_reg(ctxt, TTBR0_EL1), SYS_TTBR0); write_sysreg_el1(ctxt_sys_reg(ctxt, TTBR1_EL1), SYS_TTBR1); + if (cpus_have_final_cap(ARM64_HAS_TCR2)) + write_sysreg_el1(ctxt_sys_reg(ctxt, TCR2_EL1), SYS_TCR2); write_sysreg_el1(ctxt_sys_reg(ctxt, ESR_EL1), SYS_ESR); write_sysreg_el1(ctxt_sys_reg(ctxt, AFSR0_EL1), SYS_AFSR0); write_sysreg_el1(ctxt_sys_reg(ctxt, AFSR1_EL1), SYS_AFSR1); @@ -123,6 +131,10 @@ static inline void __sysreg_restore_el1_state(struct kvm_cpu_context *ctxt) write_sysreg_el1(ctxt_sys_reg(ctxt, CONTEXTIDR_EL1), SYS_CONTEXTIDR); write_sysreg_el1(ctxt_sys_reg(ctxt, AMAIR_EL1), SYS_AMAIR); write_sysreg_el1(ctxt_sys_reg(ctxt, CNTKCTL_EL1), SYS_CNTKCTL); + if (cpus_have_final_cap(ARM64_HAS_S1PIE)) { + write_sysreg_el1(ctxt_sys_reg(ctxt, PIR_EL1), SYS_PIR); + write_sysreg_el1(ctxt_sys_reg(ctxt, PIRE0_EL1), SYS_PIRE0); + } write_sysreg(ctxt_sys_reg(ctxt, PAR_EL1), par_el1); write_sysreg(ctxt_sys_reg(ctxt, TPIDR_EL1), tpidr_el1); diff --git a/arch/arm64/kvm/hyp/nvhe/debug-sr.c b/arch/arm64/kvm/hyp/nvhe/debug-sr.c index d756b939f296..4558c02eb352 100644 --- a/arch/arm64/kvm/hyp/nvhe/debug-sr.c +++ b/arch/arm64/kvm/hyp/nvhe/debug-sr.c @@ -56,7 +56,7 @@ static void __debug_save_trace(u64 *trfcr_el1) *trfcr_el1 = 0; /* Check if the TRBE is enabled */ - if (!(read_sysreg_s(SYS_TRBLIMITR_EL1) & TRBLIMITR_ENABLE)) + if (!(read_sysreg_s(SYS_TRBLIMITR_EL1) & TRBLIMITR_EL1_E)) return; /* * Prohibit trace generation while we are in guest. diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 753aa7418149..5b5d5e5449dc 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -401,9 +401,9 @@ static bool trap_oslar_el1(struct kvm_vcpu *vcpu, return read_from_write_only(vcpu, p, r); /* Forward the OSLK bit to OSLSR */ - oslsr = __vcpu_sys_reg(vcpu, OSLSR_EL1) & ~SYS_OSLSR_OSLK; - if (p->regval & SYS_OSLAR_OSLK) - oslsr |= SYS_OSLSR_OSLK; + oslsr = __vcpu_sys_reg(vcpu, OSLSR_EL1) & ~OSLSR_EL1_OSLK; + if (p->regval & OSLAR_EL1_OSLK) + oslsr |= OSLSR_EL1_OSLK; __vcpu_sys_reg(vcpu, OSLSR_EL1) = oslsr; return true; @@ -427,7 +427,7 @@ static int set_oslsr_el1(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, * The only modifiable bit is the OSLK bit. Refuse the write if * userspace attempts to change any other bit in the register. */ - if ((val ^ rd->val) & ~SYS_OSLSR_OSLK) + if ((val ^ rd->val) & ~OSLSR_EL1_OSLK) return -EINVAL; __vcpu_sys_reg(vcpu, rd->reg) = val; @@ -1265,6 +1265,7 @@ static u64 read_id_reg(const struct kvm_vcpu *vcpu, struct sys_reg_desc const *r ARM64_FEATURE_MASK(ID_AA64ISAR2_EL1_GPA3)); if (!cpus_have_final_cap(ARM64_HAS_WFXT)) val &= ~ARM64_FEATURE_MASK(ID_AA64ISAR2_EL1_WFxT); + val &= ~ARM64_FEATURE_MASK(ID_AA64ISAR2_EL1_MOPS); break; case SYS_ID_AA64DFR0_EL1: /* Limit debug to ARMv8.0 */ @@ -1800,7 +1801,7 @@ static const struct sys_reg_desc sys_reg_descs[] = { { SYS_DESC(SYS_MDRAR_EL1), trap_raz_wi }, { SYS_DESC(SYS_OSLAR_EL1), trap_oslar_el1 }, { SYS_DESC(SYS_OSLSR_EL1), trap_oslsr_el1, reset_val, OSLSR_EL1, - SYS_OSLSR_OSLM_IMPLEMENTED, .set_user = set_oslsr_el1, }, + OSLSR_EL1_OSLM_IMPLEMENTED, .set_user = set_oslsr_el1, }, { SYS_DESC(SYS_OSDLR_EL1), trap_raz_wi }, { SYS_DESC(SYS_DBGPRCR_EL1), trap_raz_wi }, { SYS_DESC(SYS_DBGCLAIMSET_EL1), trap_raz_wi }, @@ -1891,7 +1892,7 @@ static const struct sys_reg_desc sys_reg_descs[] = { ID_SANITISED(ID_AA64MMFR0_EL1), ID_SANITISED(ID_AA64MMFR1_EL1), ID_SANITISED(ID_AA64MMFR2_EL1), - ID_UNALLOCATED(7,3), + ID_SANITISED(ID_AA64MMFR3_EL1), ID_UNALLOCATED(7,4), ID_UNALLOCATED(7,5), ID_UNALLOCATED(7,6), @@ -1911,6 +1912,7 @@ static const struct sys_reg_desc sys_reg_descs[] = { { SYS_DESC(SYS_TTBR0_EL1), access_vm_reg, reset_unknown, TTBR0_EL1 }, { SYS_DESC(SYS_TTBR1_EL1), access_vm_reg, reset_unknown, TTBR1_EL1 }, { SYS_DESC(SYS_TCR_EL1), access_vm_reg, reset_val, TCR_EL1, 0 }, + { SYS_DESC(SYS_TCR2_EL1), access_vm_reg, reset_val, TCR2_EL1, 0 }, PTRAUTH_KEY(APIA), PTRAUTH_KEY(APIB), @@ -1960,6 +1962,8 @@ static const struct sys_reg_desc sys_reg_descs[] = { { SYS_DESC(SYS_PMMIR_EL1), trap_raz_wi }, { SYS_DESC(SYS_MAIR_EL1), access_vm_reg, reset_unknown, MAIR_EL1 }, + { SYS_DESC(SYS_PIRE0_EL1), access_vm_reg, reset_unknown, PIRE0_EL1 }, + { SYS_DESC(SYS_PIR_EL1), access_vm_reg, reset_unknown, PIR_EL1 }, { SYS_DESC(SYS_AMAIR_EL1), access_vm_reg, reset_amair_el1, AMAIR_EL1 }, { SYS_DESC(SYS_LORSA_EL1), trap_loregion }, diff --git a/arch/arm64/lib/xor-neon.c b/arch/arm64/lib/xor-neon.c index 96b171995d19..f9a53b7f9842 100644 --- a/arch/arm64/lib/xor-neon.c +++ b/arch/arm64/lib/xor-neon.c @@ -10,7 +10,7 @@ #include <linux/module.h> #include <asm/neon-intrinsics.h> -void xor_arm64_neon_2(unsigned long bytes, unsigned long * __restrict p1, +static void xor_arm64_neon_2(unsigned long bytes, unsigned long * __restrict p1, const unsigned long * __restrict p2) { uint64_t *dp1 = (uint64_t *)p1; @@ -37,7 +37,7 @@ void xor_arm64_neon_2(unsigned long bytes, unsigned long * __restrict p1, } while (--lines > 0); } -void xor_arm64_neon_3(unsigned long bytes, unsigned long * __restrict p1, +static void xor_arm64_neon_3(unsigned long bytes, unsigned long * __restrict p1, const unsigned long * __restrict p2, const unsigned long * __restrict p3) { @@ -73,7 +73,7 @@ void xor_arm64_neon_3(unsigned long bytes, unsigned long * __restrict p1, } while (--lines > 0); } -void xor_arm64_neon_4(unsigned long bytes, unsigned long * __restrict p1, +static void xor_arm64_neon_4(unsigned long bytes, unsigned long * __restrict p1, const unsigned long * __restrict p2, const unsigned long * __restrict p3, const unsigned long * __restrict p4) @@ -118,7 +118,7 @@ void xor_arm64_neon_4(unsigned long bytes, unsigned long * __restrict p1, } while (--lines > 0); } -void xor_arm64_neon_5(unsigned long bytes, unsigned long * __restrict p1, +static void xor_arm64_neon_5(unsigned long bytes, unsigned long * __restrict p1, const unsigned long * __restrict p2, const unsigned long * __restrict p3, const unsigned long * __restrict p4, diff --git a/arch/arm64/mm/context.c b/arch/arm64/mm/context.c index e1e0dca01839..188197590fc9 100644 --- a/arch/arm64/mm/context.c +++ b/arch/arm64/mm/context.c @@ -364,8 +364,8 @@ void cpu_do_switch_mm(phys_addr_t pgd_phys, struct mm_struct *mm) ttbr1 &= ~TTBR_ASID_MASK; ttbr1 |= FIELD_PREP(TTBR_ASID_MASK, asid); + cpu_set_reserved_ttbr0_nosync(); write_sysreg(ttbr1, ttbr1_el1); - isb(); write_sysreg(ttbr0, ttbr0_el1); isb(); post_ttbr_update_workaround(); diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index 6045a5117ac1..c85b6d70b222 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -66,6 +66,8 @@ static inline const struct fault_info *esr_to_debug_fault_info(unsigned long esr static void data_abort_decode(unsigned long esr) { + unsigned long iss2 = ESR_ELx_ISS2(esr); + pr_alert("Data abort info:\n"); if (esr & ESR_ELx_ISV) { @@ -78,12 +80,21 @@ static void data_abort_decode(unsigned long esr) (esr & ESR_ELx_SF) >> ESR_ELx_SF_SHIFT, (esr & ESR_ELx_AR) >> ESR_ELx_AR_SHIFT); } else { - pr_alert(" ISV = 0, ISS = 0x%08lx\n", esr & ESR_ELx_ISS_MASK); + pr_alert(" ISV = 0, ISS = 0x%08lx, ISS2 = 0x%08lx\n", + esr & ESR_ELx_ISS_MASK, iss2); } - pr_alert(" CM = %lu, WnR = %lu\n", + pr_alert(" CM = %lu, WnR = %lu, TnD = %lu, TagAccess = %lu\n", (esr & ESR_ELx_CM) >> ESR_ELx_CM_SHIFT, - (esr & ESR_ELx_WNR) >> ESR_ELx_WNR_SHIFT); + (esr & ESR_ELx_WNR) >> ESR_ELx_WNR_SHIFT, + (iss2 & ESR_ELx_TnD) >> ESR_ELx_TnD_SHIFT, + (iss2 & ESR_ELx_TagAccess) >> ESR_ELx_TagAccess_SHIFT); + + pr_alert(" GCS = %ld, Overlay = %lu, DirtyBit = %lu, Xs = %llu\n", + (iss2 & ESR_ELx_GCS) >> ESR_ELx_GCS_SHIFT, + (iss2 & ESR_ELx_Overlay) >> ESR_ELx_Overlay_SHIFT, + (iss2 & ESR_ELx_DirtyBit) >> ESR_ELx_DirtyBit_SHIFT, + (iss2 & ESR_ELx_Xs_MASK) >> ESR_ELx_Xs_SHIFT); } static void mem_abort_decode(unsigned long esr) @@ -885,9 +896,6 @@ void do_sp_pc_abort(unsigned long addr, unsigned long esr, struct pt_regs *regs) } NOKPROBE_SYMBOL(do_sp_pc_abort); -int __init early_brk64(unsigned long addr, unsigned long esr, - struct pt_regs *regs); - /* * __refdata because early_brk64 is __init, but the reference to it is * clobbered at arch_initcall time. diff --git a/arch/arm64/mm/flush.c b/arch/arm64/mm/flush.c index 5f9379b3c8c8..4e6476094952 100644 --- a/arch/arm64/mm/flush.c +++ b/arch/arm64/mm/flush.c @@ -8,6 +8,7 @@ #include <linux/export.h> #include <linux/mm.h> +#include <linux/libnvdimm.h> #include <linux/pagemap.h> #include <asm/cacheflush.h> diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 66e70ca47680..c28c2c8483cc 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -69,6 +69,7 @@ phys_addr_t __ro_after_init arm64_dma_phys_limit; #define CRASH_ADDR_LOW_MAX arm64_dma_phys_limit #define CRASH_ADDR_HIGH_MAX (PHYS_MASK + 1) +#define CRASH_HIGH_SEARCH_BASE SZ_4G #define DEFAULT_CRASH_KERNEL_LOW_SIZE (128UL << 20) @@ -101,12 +102,13 @@ static int __init reserve_crashkernel_low(unsigned long long low_size) */ static void __init reserve_crashkernel(void) { - unsigned long long crash_base, crash_size; - unsigned long long crash_low_size = 0; + unsigned long long crash_low_size = 0, search_base = 0; unsigned long long crash_max = CRASH_ADDR_LOW_MAX; + unsigned long long crash_base, crash_size; char *cmdline = boot_command_line; - int ret; bool fixed_base = false; + bool high = false; + int ret; if (!IS_ENABLED(CONFIG_KEXEC_CORE)) return; @@ -129,7 +131,9 @@ static void __init reserve_crashkernel(void) else if (ret) return; + search_base = CRASH_HIGH_SEARCH_BASE; crash_max = CRASH_ADDR_HIGH_MAX; + high = true; } else if (ret || !crash_size) { /* The specified value is invalid */ return; @@ -140,31 +144,51 @@ static void __init reserve_crashkernel(void) /* User specifies base address explicitly. */ if (crash_base) { fixed_base = true; + search_base = crash_base; crash_max = crash_base + crash_size; } retry: crash_base = memblock_phys_alloc_range(crash_size, CRASH_ALIGN, - crash_base, crash_max); + search_base, crash_max); if (!crash_base) { /* - * If the first attempt was for low memory, fall back to - * high memory, the minimum required low memory will be - * reserved later. + * For crashkernel=size[KMG]@offset[KMG], print out failure + * message if can't reserve the specified region. */ - if (!fixed_base && (crash_max == CRASH_ADDR_LOW_MAX)) { + if (fixed_base) { + pr_warn("crashkernel reservation failed - memory is in use.\n"); + return; + } + + /* + * For crashkernel=size[KMG], if the first attempt was for + * low memory, fall back to high memory, the minimum required + * low memory will be reserved later. + */ + if (!high && crash_max == CRASH_ADDR_LOW_MAX) { crash_max = CRASH_ADDR_HIGH_MAX; + search_base = CRASH_ADDR_LOW_MAX; crash_low_size = DEFAULT_CRASH_KERNEL_LOW_SIZE; goto retry; } + /* + * For crashkernel=size[KMG],high, if the first attempt was + * for high memory, fall back to low memory. + */ + if (high && crash_max == CRASH_ADDR_HIGH_MAX) { + crash_max = CRASH_ADDR_LOW_MAX; + search_base = 0; + goto retry; + } pr_warn("cannot allocate crashkernel (size:0x%llx)\n", crash_size); return; } - if ((crash_base > CRASH_ADDR_LOW_MAX - crash_low_size) && - crash_low_size && reserve_crashkernel_low(crash_low_size)) { + if ((crash_base >= CRASH_ADDR_LOW_MAX) && crash_low_size && + reserve_crashkernel_low(crash_low_size)) { memblock_phys_free(crash_base, crash_size); return; } diff --git a/arch/arm64/mm/kasan_init.c b/arch/arm64/mm/kasan_init.c index e969e68de005..f17d066e85eb 100644 --- a/arch/arm64/mm/kasan_init.c +++ b/arch/arm64/mm/kasan_init.c @@ -214,7 +214,7 @@ static void __init clear_pgds(unsigned long start, static void __init kasan_init_shadow(void) { u64 kimg_shadow_start, kimg_shadow_end; - u64 mod_shadow_start, mod_shadow_end; + u64 mod_shadow_start; u64 vmalloc_shadow_end; phys_addr_t pa_start, pa_end; u64 i; @@ -223,7 +223,6 @@ static void __init kasan_init_shadow(void) kimg_shadow_end = PAGE_ALIGN((u64)kasan_mem_to_shadow(KERNEL_END)); mod_shadow_start = (u64)kasan_mem_to_shadow((void *)MODULES_VADDR); - mod_shadow_end = (u64)kasan_mem_to_shadow((void *)MODULES_END); vmalloc_shadow_end = (u64)kasan_mem_to_shadow((void *)VMALLOC_END); @@ -246,17 +245,9 @@ static void __init kasan_init_shadow(void) kasan_populate_early_shadow(kasan_mem_to_shadow((void *)PAGE_END), (void *)mod_shadow_start); - if (IS_ENABLED(CONFIG_KASAN_VMALLOC)) { - BUILD_BUG_ON(VMALLOC_START != MODULES_END); - kasan_populate_early_shadow((void *)vmalloc_shadow_end, - (void *)KASAN_SHADOW_END); - } else { - kasan_populate_early_shadow((void *)kimg_shadow_end, - (void *)KASAN_SHADOW_END); - if (kimg_shadow_start > mod_shadow_end) - kasan_populate_early_shadow((void *)mod_shadow_end, - (void *)kimg_shadow_start); - } + BUILD_BUG_ON(VMALLOC_START != MODULES_END); + kasan_populate_early_shadow((void *)vmalloc_shadow_end, + (void *)KASAN_SHADOW_END); for_each_mem_range(i, &pa_start, &pa_end) { void *start = (void *)__phys_to_virt(pa_start); diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index af6bc8403ee4..95d360805f8a 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -451,7 +451,7 @@ static phys_addr_t pgd_pgtable_alloc(int shift) void __init create_mapping_noalloc(phys_addr_t phys, unsigned long virt, phys_addr_t size, pgprot_t prot) { - if ((virt >= PAGE_END) && (virt < VMALLOC_START)) { + if (virt < PAGE_OFFSET) { pr_warn("BUG: not creating mapping for %pa at 0x%016lx - outside kernel range\n", &phys, virt); return; @@ -478,7 +478,7 @@ void __init create_pgd_mapping(struct mm_struct *mm, phys_addr_t phys, static void update_mapping_prot(phys_addr_t phys, unsigned long virt, phys_addr_t size, pgprot_t prot) { - if ((virt >= PAGE_END) && (virt < VMALLOC_START)) { + if (virt < PAGE_OFFSET) { pr_warn("BUG: not updating mapping for %pa at 0x%016lx - outside kernel range\n", &phys, virt); return; @@ -663,12 +663,17 @@ static void __init map_kernel_segment(pgd_t *pgdp, void *va_start, void *va_end, vm_area_add_early(vma); } +static pgprot_t kernel_exec_prot(void) +{ + return rodata_enabled ? PAGE_KERNEL_ROX : PAGE_KERNEL_EXEC; +} + #ifdef CONFIG_UNMAP_KERNEL_AT_EL0 static int __init map_entry_trampoline(void) { int i; - pgprot_t prot = rodata_enabled ? PAGE_KERNEL_ROX : PAGE_KERNEL_EXEC; + pgprot_t prot = kernel_exec_prot(); phys_addr_t pa_start = __pa_symbol(__entry_tramp_text_start); /* The trampoline is always mapped and can therefore be global */ @@ -723,7 +728,7 @@ static void __init map_kernel(pgd_t *pgdp) * mapping to install SW breakpoints. Allow this (only) when * explicitly requested with rodata=off. */ - pgprot_t text_prot = rodata_enabled ? PAGE_KERNEL_ROX : PAGE_KERNEL_EXEC; + pgprot_t text_prot = kernel_exec_prot(); /* * If we have a CPU that supports BTI and a kernel built for diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S index c2cb437821ca..2baeec419f62 100644 --- a/arch/arm64/mm/proc.S +++ b/arch/arm64/mm/proc.S @@ -199,7 +199,7 @@ SYM_FUNC_END(idmap_cpu_replace_ttbr1) #ifdef CONFIG_UNMAP_KERNEL_AT_EL0 -#define KPTI_NG_PTE_FLAGS (PTE_ATTRINDX(MT_NORMAL) | SWAPPER_PTE_FLAGS) +#define KPTI_NG_PTE_FLAGS (PTE_ATTRINDX(MT_NORMAL) | SWAPPER_PTE_FLAGS | PTE_WRITE) .pushsection ".idmap.text", "a" @@ -290,7 +290,7 @@ SYM_TYPED_FUNC_START(idmap_kpti_install_ng_mappings) isb mov temp_pte, x5 - mov pte_flags, #KPTI_NG_PTE_FLAGS + mov_q pte_flags, KPTI_NG_PTE_FLAGS /* Everybody is enjoying the idmap, so we can rewrite swapper. */ /* PGD */ @@ -454,6 +454,21 @@ SYM_FUNC_START(__cpu_setup) #endif /* CONFIG_ARM64_HW_AFDBM */ msr mair_el1, mair msr tcr_el1, tcr + + mrs_s x1, SYS_ID_AA64MMFR3_EL1 + ubfx x1, x1, #ID_AA64MMFR3_EL1_S1PIE_SHIFT, #4 + cbz x1, .Lskip_indirection + + mov_q x0, PIE_E0 + msr REG_PIRE0_EL1, x0 + mov_q x0, PIE_E1 + msr REG_PIR_EL1, x0 + + mov x0, TCR2_EL1x_PIE + msr REG_TCR2_EL1, x0 + +.Lskip_indirection: + /* * Prepare SCTLR */ diff --git a/arch/arm64/tools/cpucaps b/arch/arm64/tools/cpucaps index 40ba95472594..19c23c4fa2da 100644 --- a/arch/arm64/tools/cpucaps +++ b/arch/arm64/tools/cpucaps @@ -32,16 +32,20 @@ HAS_GENERIC_AUTH_IMP_DEF HAS_GIC_CPUIF_SYSREGS HAS_GIC_PRIO_MASKING HAS_GIC_PRIO_RELAXED_SYNC +HAS_HCX HAS_LDAPR HAS_LSE_ATOMICS +HAS_MOPS HAS_NESTED_VIRT HAS_NO_FPSIMD HAS_NO_HW_PREFETCH HAS_PAN +HAS_S1PIE HAS_RAS_EXTN HAS_RNG HAS_SB HAS_STAGE2_FWB +HAS_TCR2 HAS_TIDCP1 HAS_TLB_RANGE HAS_VIRT_HOST_EXTN diff --git a/arch/arm64/tools/gen-cpucaps.awk b/arch/arm64/tools/gen-cpucaps.awk index 00c9e72a200a..8525980379d7 100755 --- a/arch/arm64/tools/gen-cpucaps.awk +++ b/arch/arm64/tools/gen-cpucaps.awk @@ -24,12 +24,12 @@ BEGIN { } /^[vA-Z0-9_]+$/ { - printf("#define ARM64_%-30s\t%d\n", $0, cap_num++) + printf("#define ARM64_%-40s\t%d\n", $0, cap_num++) next } END { - printf("#define ARM64_NCAPS\t\t\t\t%d\n", cap_num) + printf("#define ARM64_NCAPS\t\t\t\t\t%d\n", cap_num) print "" print "#endif /* __ASM_CPUCAPS_H */" } diff --git a/arch/arm64/tools/sysreg b/arch/arm64/tools/sysreg index c9a0d1fa3209..1ea4a3dc68f8 100644 --- a/arch/arm64/tools/sysreg +++ b/arch/arm64/tools/sysreg @@ -48,6 +48,61 @@ # feature that introduces them (eg, FEAT_LS64_ACCDATA introduces enumeration # item ACCDATA) though it may be more taseful to do something else. +Sysreg OSDTRRX_EL1 2 0 0 0 2 +Res0 63:32 +Field 31:0 DTRRX +EndSysreg + +Sysreg MDCCINT_EL1 2 0 0 2 0 +Res0 63:31 +Field 30 RX +Field 29 TX +Res0 28:0 +EndSysreg + +Sysreg MDSCR_EL1 2 0 0 2 2 +Res0 63:36 +Field 35 EHBWE +Field 34 EnSPM +Field 33 TTA +Field 32 EMBWE +Field 31 TFO +Field 30 RXfull +Field 29 TXfull +Res0 28 +Field 27 RXO +Field 26 TXU +Res0 25:24 +Field 23:22 INTdis +Field 21 TDA +Res0 20 +Field 19 SC2 +Res0 18:16 +Field 15 MDE +Field 14 HDE +Field 13 KDE +Field 12 TDCC +Res0 11:7 +Field 6 ERR +Res0 5:1 +Field 0 SS +EndSysreg + +Sysreg OSDTRTX_EL1 2 0 0 3 2 +Res0 63:32 +Field 31:0 DTRTX +EndSysreg + +Sysreg OSECCR_EL1 2 0 0 6 2 +Res0 63:32 +Field 31:0 EDECCR +EndSysreg + +Sysreg OSLAR_EL1 2 0 1 0 4 +Res0 63:1 +Field 0 OSLK +EndSysreg + Sysreg ID_PFR0_EL1 3 0 0 1 0 Res0 63:32 UnsignedEnum 31:28 RAS @@ -1538,6 +1593,78 @@ UnsignedEnum 3:0 CnP EndEnum EndSysreg +Sysreg ID_AA64MMFR3_EL1 3 0 0 7 3 +UnsignedEnum 63:60 Spec_FPACC + 0b0000 NI + 0b0001 IMP +EndEnum +UnsignedEnum 59:56 ADERR + 0b0000 NI + 0b0001 DEV_ASYNC + 0b0010 FEAT_ADERR + 0b0011 FEAT_ADERR_IND +EndEnum +UnsignedEnum 55:52 SDERR + 0b0000 NI + 0b0001 DEV_SYNC + 0b0010 FEAT_ADERR + 0b0011 FEAT_ADERR_IND +EndEnum +Res0 51:48 +UnsignedEnum 47:44 ANERR + 0b0000 NI + 0b0001 ASYNC + 0b0010 FEAT_ANERR + 0b0011 FEAT_ANERR_IND +EndEnum +UnsignedEnum 43:40 SNERR + 0b0000 NI + 0b0001 SYNC + 0b0010 FEAT_ANERR + 0b0011 FEAT_ANERR_IND +EndEnum +UnsignedEnum 39:36 D128_2 + 0b0000 NI + 0b0001 IMP +EndEnum +UnsignedEnum 35:32 D128 + 0b0000 NI + 0b0001 IMP +EndEnum +UnsignedEnum 31:28 MEC + 0b0000 NI + 0b0001 IMP +EndEnum +UnsignedEnum 27:24 AIE + 0b0000 NI + 0b0001 IMP +EndEnum +UnsignedEnum 23:20 S2POE + 0b0000 NI + 0b0001 IMP +EndEnum +UnsignedEnum 19:16 S1POE + 0b0000 NI + 0b0001 IMP +EndEnum +UnsignedEnum 15:12 S2PIE + 0b0000 NI + 0b0001 IMP +EndEnum +UnsignedEnum 11:8 S1PIE + 0b0000 NI + 0b0001 IMP +EndEnum +UnsignedEnum 7:4 SCTLRX + 0b0000 NI + 0b0001 IMP +EndEnum +UnsignedEnum 3:0 TCRX + 0b0000 NI + 0b0001 IMP +EndEnum +EndSysreg + Sysreg SCTLR_EL1 3 0 1 0 0 Field 63 TIDCP Field 62 SPINTMASK @@ -2034,7 +2161,17 @@ Fields ZCR_ELx EndSysreg Sysreg HCRX_EL2 3 4 1 2 2 -Res0 63:12 +Res0 63:23 +Field 22 GCSEn +Field 21 EnIDCP128 +Field 20 EnSDERR +Field 19 TMEA +Field 18 EnSNERR +Field 17 D128En +Field 16 PTTWI +Field 15 SCTLR2En +Field 14 TCR2En +Res0 13:12 Field 11 MSCEn Field 10 MCE2 Field 9 CMOW @@ -2153,6 +2290,87 @@ Sysreg TTBR1_EL1 3 0 2 0 1 Fields TTBRx_EL1 EndSysreg +SysregFields TCR2_EL1x +Res0 63:16 +Field 15 DisCH1 +Field 14 DisCH0 +Res0 13:12 +Field 11 HAFT +Field 10 PTTWI +Res0 9:6 +Field 5 D128 +Field 4 AIE +Field 3 POE +Field 2 E0POE +Field 1 PIE +Field 0 PnCH +EndSysregFields + +Sysreg TCR2_EL1 3 0 2 0 3 +Fields TCR2_EL1x +EndSysreg + +Sysreg TCR2_EL12 3 5 2 0 3 +Fields TCR2_EL1x +EndSysreg + +Sysreg TCR2_EL2 3 4 2 0 3 +Res0 63:16 +Field 15 DisCH1 +Field 14 DisCH0 +Field 13 AMEC1 +Field 12 AMEC0 +Field 11 HAFT +Field 10 PTTWI +Field 9:8 SKL1 +Field 7:6 SKL0 +Field 5 D128 +Field 4 AIE +Field 3 POE +Field 2 E0POE +Field 1 PIE +Field 0 PnCH +EndSysreg + +SysregFields PIRx_ELx +Field 63:60 Perm15 +Field 59:56 Perm14 +Field 55:52 Perm13 +Field 51:48 Perm12 +Field 47:44 Perm11 +Field 43:40 Perm10 +Field 39:36 Perm9 +Field 35:32 Perm8 +Field 31:28 Perm7 +Field 27:24 Perm6 +Field 23:20 Perm5 +Field 19:16 Perm4 +Field 15:12 Perm3 +Field 11:8 Perm2 +Field 7:4 Perm1 +Field 3:0 Perm0 +EndSysregFields + +Sysreg PIRE0_EL1 3 0 10 2 2 +Fields PIRx_ELx +EndSysreg + +Sysreg PIRE0_EL12 3 5 10 2 2 +Fields PIRx_ELx +EndSysreg + +Sysreg PIR_EL1 3 0 10 2 3 +Fields PIRx_ELx +EndSysreg + +Sysreg PIR_EL12 3 5 10 2 3 +Fields PIRx_ELx +EndSysreg + +Sysreg PIR_EL2 3 4 10 2 3 +Fields PIRx_ELx +EndSysreg + Sysreg LORSA_EL1 3 0 10 4 0 Res0 63:52 Field 51:16 SA @@ -2200,3 +2418,80 @@ Sysreg ICC_NMIAR1_EL1 3 0 12 9 5 Res0 63:24 Field 23:0 INTID EndSysreg + +Sysreg TRBLIMITR_EL1 3 0 9 11 0 +Field 63:12 LIMIT +Res0 11:7 +Field 6 XE +Field 5 nVM +Enum 4:3 TM + 0b00 STOP + 0b01 IRQ + 0b11 IGNR +EndEnum +Enum 2:1 FM + 0b00 FILL + 0b01 WRAP + 0b11 CBUF +EndEnum +Field 0 E +EndSysreg + +Sysreg TRBPTR_EL1 3 0 9 11 1 +Field 63:0 PTR +EndSysreg + +Sysreg TRBBASER_EL1 3 0 9 11 2 +Field 63:12 BASE +Res0 11:0 +EndSysreg + +Sysreg TRBSR_EL1 3 0 9 11 3 +Res0 63:56 +Field 55:32 MSS2 +Field 31:26 EC +Res0 25:24 +Field 23 DAT +Field 22 IRQ +Field 21 TRG +Field 20 WRAP +Res0 19 +Field 18 EA +Field 17 S +Res0 16 +Field 15:0 MSS +EndSysreg + +Sysreg TRBMAR_EL1 3 0 9 11 4 +Res0 63:12 +Enum 11:10 PAS + 0b00 SECURE + 0b01 NON_SECURE + 0b10 ROOT + 0b11 REALM +EndEnum +Enum 9:8 SH + 0b00 NON_SHAREABLE + 0b10 OUTER_SHAREABLE + 0b11 INNER_SHAREABLE +EndEnum +Field 7:0 Attr +EndSysreg + +Sysreg TRBTRG_EL1 3 0 9 11 6 +Res0 63:32 +Field 31:0 TRG +EndSysreg + +Sysreg TRBIDR_EL1 3 0 9 11 7 +Res0 63:12 +Enum 11:8 EA + 0b0000 NON_DESC + 0b0001 IGNORE + 0b0010 SERROR +EndEnum +Res0 7:6 +Field 5 F +Field 4 P +Field 3:0 Align +EndSysreg diff --git a/drivers/acpi/arm64/Makefile b/drivers/acpi/arm64/Makefile index e21a9e84e394..f81fe24894b2 100644 --- a/drivers/acpi/arm64/Makefile +++ b/drivers/acpi/arm64/Makefile @@ -3,4 +3,4 @@ obj-$(CONFIG_ACPI_AGDI) += agdi.o obj-$(CONFIG_ACPI_IORT) += iort.o obj-$(CONFIG_ACPI_GTDT) += gtdt.o obj-$(CONFIG_ACPI_APMT) += apmt.o -obj-y += dma.o +obj-y += dma.o init.o diff --git a/drivers/acpi/arm64/agdi.c b/drivers/acpi/arm64/agdi.c index f605302395c3..8b3c7d42b41b 100644 --- a/drivers/acpi/arm64/agdi.c +++ b/drivers/acpi/arm64/agdi.c @@ -9,11 +9,11 @@ #define pr_fmt(fmt) "ACPI: AGDI: " fmt #include <linux/acpi.h> -#include <linux/acpi_agdi.h> #include <linux/arm_sdei.h> #include <linux/io.h> #include <linux/kernel.h> #include <linux/platform_device.h> +#include "init.h" struct agdi_data { int sdei_event; diff --git a/drivers/acpi/arm64/apmt.c b/drivers/acpi/arm64/apmt.c index 8cab69fa5d59..bb010f6164e5 100644 --- a/drivers/acpi/arm64/apmt.c +++ b/drivers/acpi/arm64/apmt.c @@ -10,10 +10,10 @@ #define pr_fmt(fmt) "ACPI: APMT: " fmt #include <linux/acpi.h> -#include <linux/acpi_apmt.h> #include <linux/init.h> #include <linux/kernel.h> #include <linux/platform_device.h> +#include "init.h" #define DEV_NAME "arm-cs-arch-pmu" @@ -35,11 +35,13 @@ static int __init apmt_init_resources(struct resource *res, num_res++; - res[num_res].start = node->base_address1; - res[num_res].end = node->base_address1 + SZ_4K - 1; - res[num_res].flags = IORESOURCE_MEM; + if (node->flags & ACPI_APMT_FLAGS_DUAL_PAGE) { + res[num_res].start = node->base_address1; + res[num_res].end = node->base_address1 + SZ_4K - 1; + res[num_res].flags = IORESOURCE_MEM; - num_res++; + num_res++; + } if (node->ovflw_irq != 0) { trigger = (node->ovflw_irq_flags & ACPI_APMT_OVFLW_IRQ_FLAGS_MODE); diff --git a/drivers/acpi/arm64/init.c b/drivers/acpi/arm64/init.c new file mode 100644 index 000000000000..d3ce53dda122 --- /dev/null +++ b/drivers/acpi/arm64/init.c @@ -0,0 +1,13 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include <linux/acpi.h> +#include "init.h" + +void __init acpi_arm_init(void) +{ + if (IS_ENABLED(CONFIG_ACPI_AGDI)) + acpi_agdi_init(); + if (IS_ENABLED(CONFIG_ACPI_APMT)) + acpi_apmt_init(); + if (IS_ENABLED(CONFIG_ACPI_IORT)) + acpi_iort_init(); +} diff --git a/drivers/acpi/arm64/init.h b/drivers/acpi/arm64/init.h new file mode 100644 index 000000000000..a1715a2a34e9 --- /dev/null +++ b/drivers/acpi/arm64/init.h @@ -0,0 +1,6 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +#include <linux/init.h> + +void __init acpi_agdi_init(void); +void __init acpi_apmt_init(void); +void __init acpi_iort_init(void); diff --git a/drivers/acpi/arm64/iort.c b/drivers/acpi/arm64/iort.c index 38fb84974f35..3631230a61c8 100644 --- a/drivers/acpi/arm64/iort.c +++ b/drivers/acpi/arm64/iort.c @@ -19,6 +19,7 @@ #include <linux/platform_device.h> #include <linux/slab.h> #include <linux/dma-map-ops.h> +#include "init.h" #define IORT_TYPE_MASK(type) (1 << (type)) #define IORT_MSI_TYPE (1 << ACPI_IORT_NODE_ITS_GROUP) diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c index d161ff707de4..7a1eaf8c7bde 100644 --- a/drivers/acpi/bus.c +++ b/drivers/acpi/bus.c @@ -26,9 +26,6 @@ #include <asm/mpspec.h> #include <linux/dmi.h> #endif -#include <linux/acpi_agdi.h> -#include <linux/acpi_apmt.h> -#include <linux/acpi_iort.h> #include <linux/acpi_viot.h> #include <linux/pci.h> #include <acpi/apei.h> @@ -1408,7 +1405,7 @@ static int __init acpi_init(void) acpi_init_ffh(); pci_mmcfg_late_init(); - acpi_iort_init(); + acpi_arm_init(); acpi_viot_early_init(); acpi_hest_init(); acpi_ghes_init(); @@ -1420,8 +1417,6 @@ static int __init acpi_init(void) acpi_debugger_init(); acpi_setup_sb_notify_handler(); acpi_viot_init(); - acpi_agdi_init(); - acpi_apmt_init(); return 0; } diff --git a/drivers/hwtracing/coresight/coresight-trbe.c b/drivers/hwtracing/coresight/coresight-trbe.c index 1fc4fd79a1c6..1bab91ce8e95 100644 --- a/drivers/hwtracing/coresight/coresight-trbe.c +++ b/drivers/hwtracing/coresight/coresight-trbe.c @@ -218,7 +218,7 @@ static inline void set_trbe_enabled(struct trbe_cpudata *cpudata, u64 trblimitr) * Enable the TRBE without clearing LIMITPTR which * might be required for fetching the buffer limits. */ - trblimitr |= TRBLIMITR_ENABLE; + trblimitr |= TRBLIMITR_EL1_E; write_sysreg_s(trblimitr, SYS_TRBLIMITR_EL1); /* Synchronize the TRBE enable event */ @@ -236,7 +236,7 @@ static inline void set_trbe_disabled(struct trbe_cpudata *cpudata) * Disable the TRBE without clearing LIMITPTR which * might be required for fetching the buffer limits. */ - trblimitr &= ~TRBLIMITR_ENABLE; + trblimitr &= ~TRBLIMITR_EL1_E; write_sysreg_s(trblimitr, SYS_TRBLIMITR_EL1); if (trbe_needs_drain_after_disable(cpudata)) @@ -582,12 +582,12 @@ static void clr_trbe_status(void) u64 trbsr = read_sysreg_s(SYS_TRBSR_EL1); WARN_ON(is_trbe_enabled()); - trbsr &= ~TRBSR_IRQ; - trbsr &= ~TRBSR_TRG; - trbsr &= ~TRBSR_WRAP; - trbsr &= ~(TRBSR_EC_MASK << TRBSR_EC_SHIFT); - trbsr &= ~(TRBSR_BSC_MASK << TRBSR_BSC_SHIFT); - trbsr &= ~TRBSR_STOP; + trbsr &= ~TRBSR_EL1_IRQ; + trbsr &= ~TRBSR_EL1_TRG; + trbsr &= ~TRBSR_EL1_WRAP; + trbsr &= ~TRBSR_EL1_EC_MASK; + trbsr &= ~TRBSR_EL1_BSC_MASK; + trbsr &= ~TRBSR_EL1_S; write_sysreg_s(trbsr, SYS_TRBSR_EL1); } @@ -596,13 +596,13 @@ static void set_trbe_limit_pointer_enabled(struct trbe_buf *buf) u64 trblimitr = read_sysreg_s(SYS_TRBLIMITR_EL1); unsigned long addr = buf->trbe_limit; - WARN_ON(!IS_ALIGNED(addr, (1UL << TRBLIMITR_LIMIT_SHIFT))); + WARN_ON(!IS_ALIGNED(addr, (1UL << TRBLIMITR_EL1_LIMIT_SHIFT))); WARN_ON(!IS_ALIGNED(addr, PAGE_SIZE)); - trblimitr &= ~TRBLIMITR_NVM; - trblimitr &= ~(TRBLIMITR_FILL_MODE_MASK << TRBLIMITR_FILL_MODE_SHIFT); - trblimitr &= ~(TRBLIMITR_TRIG_MODE_MASK << TRBLIMITR_TRIG_MODE_SHIFT); - trblimitr &= ~(TRBLIMITR_LIMIT_MASK << TRBLIMITR_LIMIT_SHIFT); + trblimitr &= ~TRBLIMITR_EL1_nVM; + trblimitr &= ~TRBLIMITR_EL1_FM_MASK; + trblimitr &= ~TRBLIMITR_EL1_TM_MASK; + trblimitr &= ~TRBLIMITR_EL1_LIMIT_MASK; /* * Fill trace buffer mode is used here while configuring the @@ -613,14 +613,15 @@ static void set_trbe_limit_pointer_enabled(struct trbe_buf *buf) * trace data in the interrupt handler, before reconfiguring * the TRBE. */ - trblimitr |= (TRBE_FILL_MODE_FILL & TRBLIMITR_FILL_MODE_MASK) << TRBLIMITR_FILL_MODE_SHIFT; + trblimitr |= (TRBLIMITR_EL1_FM_FILL << TRBLIMITR_EL1_FM_SHIFT) & + TRBLIMITR_EL1_FM_MASK; /* * Trigger mode is not used here while configuring the TRBE for * the trace capture. Hence just keep this in the ignore mode. */ - trblimitr |= (TRBE_TRIG_MODE_IGNORE & TRBLIMITR_TRIG_MODE_MASK) << - TRBLIMITR_TRIG_MODE_SHIFT; + trblimitr |= (TRBLIMITR_EL1_TM_IGNR << TRBLIMITR_EL1_TM_SHIFT) & + TRBLIMITR_EL1_TM_MASK; trblimitr |= (addr & PAGE_MASK); set_trbe_enabled(buf->cpudata, trblimitr); } diff --git a/drivers/hwtracing/coresight/coresight-trbe.h b/drivers/hwtracing/coresight/coresight-trbe.h index 98ff1b17ad07..77cbb5c63878 100644 --- a/drivers/hwtracing/coresight/coresight-trbe.h +++ b/drivers/hwtracing/coresight/coresight-trbe.h @@ -30,7 +30,7 @@ static inline bool is_trbe_enabled(void) { u64 trblimitr = read_sysreg_s(SYS_TRBLIMITR_EL1); - return trblimitr & TRBLIMITR_ENABLE; + return trblimitr & TRBLIMITR_EL1_E; } #define TRBE_EC_OTHERS 0 @@ -39,7 +39,7 @@ static inline bool is_trbe_enabled(void) static inline int get_trbe_ec(u64 trbsr) { - return (trbsr >> TRBSR_EC_SHIFT) & TRBSR_EC_MASK; + return (trbsr & TRBSR_EL1_EC_MASK) >> TRBSR_EL1_EC_SHIFT; } #define TRBE_BSC_NOT_STOPPED 0 @@ -48,63 +48,55 @@ static inline int get_trbe_ec(u64 trbsr) static inline int get_trbe_bsc(u64 trbsr) { - return (trbsr >> TRBSR_BSC_SHIFT) & TRBSR_BSC_MASK; + return (trbsr & TRBSR_EL1_BSC_MASK) >> TRBSR_EL1_BSC_SHIFT; } static inline void clr_trbe_irq(void) { u64 trbsr = read_sysreg_s(SYS_TRBSR_EL1); - trbsr &= ~TRBSR_IRQ; + trbsr &= ~TRBSR_EL1_IRQ; write_sysreg_s(trbsr, SYS_TRBSR_EL1); } static inline bool is_trbe_irq(u64 trbsr) { - return trbsr & TRBSR_IRQ; + return trbsr & TRBSR_EL1_IRQ; } static inline bool is_trbe_trg(u64 trbsr) { - return trbsr & TRBSR_TRG; + return trbsr & TRBSR_EL1_TRG; } static inline bool is_trbe_wrap(u64 trbsr) { - return trbsr & TRBSR_WRAP; + return trbsr & TRBSR_EL1_WRAP; } static inline bool is_trbe_abort(u64 trbsr) { - return trbsr & TRBSR_ABORT; + return trbsr & TRBSR_EL1_EA; } static inline bool is_trbe_running(u64 trbsr) { - return !(trbsr & TRBSR_STOP); + return !(trbsr & TRBSR_EL1_S); } -#define TRBE_TRIG_MODE_STOP 0 -#define TRBE_TRIG_MODE_IRQ 1 -#define TRBE_TRIG_MODE_IGNORE 3 - -#define TRBE_FILL_MODE_FILL 0 -#define TRBE_FILL_MODE_WRAP 1 -#define TRBE_FILL_MODE_CIRCULAR_BUFFER 3 - static inline bool get_trbe_flag_update(u64 trbidr) { - return trbidr & TRBIDR_FLAG; + return trbidr & TRBIDR_EL1_F; } static inline bool is_trbe_programmable(u64 trbidr) { - return !(trbidr & TRBIDR_PROG); + return !(trbidr & TRBIDR_EL1_P); } static inline int get_trbe_address_align(u64 trbidr) { - return (trbidr >> TRBIDR_ALIGN_SHIFT) & TRBIDR_ALIGN_MASK; + return (trbidr & TRBIDR_EL1_Align_MASK) >> TRBIDR_EL1_Align_SHIFT; } static inline unsigned long get_trbe_write_pointer(void) @@ -121,7 +113,7 @@ static inline void set_trbe_write_pointer(unsigned long addr) static inline unsigned long get_trbe_limit_pointer(void) { u64 trblimitr = read_sysreg_s(SYS_TRBLIMITR_EL1); - unsigned long addr = trblimitr & (TRBLIMITR_LIMIT_MASK << TRBLIMITR_LIMIT_SHIFT); + unsigned long addr = trblimitr & TRBLIMITR_EL1_LIMIT_MASK; WARN_ON(!IS_ALIGNED(addr, PAGE_SIZE)); return addr; @@ -130,7 +122,7 @@ static inline unsigned long get_trbe_limit_pointer(void) static inline unsigned long get_trbe_base_pointer(void) { u64 trbbaser = read_sysreg_s(SYS_TRBBASER_EL1); - unsigned long addr = trbbaser & (TRBBASER_BASE_MASK << TRBBASER_BASE_SHIFT); + unsigned long addr = trbbaser & TRBBASER_EL1_BASE_MASK; WARN_ON(!IS_ALIGNED(addr, PAGE_SIZE)); return addr; @@ -139,7 +131,7 @@ static inline unsigned long get_trbe_base_pointer(void) static inline void set_trbe_base_pointer(unsigned long addr) { WARN_ON(is_trbe_enabled()); - WARN_ON(!IS_ALIGNED(addr, (1UL << TRBBASER_BASE_SHIFT))); + WARN_ON(!IS_ALIGNED(addr, (1UL << TRBBASER_EL1_BASE_SHIFT))); WARN_ON(!IS_ALIGNED(addr, PAGE_SIZE)); write_sysreg_s(addr, SYS_TRBBASER_EL1); } diff --git a/drivers/perf/Kconfig b/drivers/perf/Kconfig index 711f82400086..4c07d71cab0b 100644 --- a/drivers/perf/Kconfig +++ b/drivers/perf/Kconfig @@ -127,6 +127,14 @@ config FSL_IMX8_DDR_PMU can give information about memory throughput and other related events. +config FSL_IMX9_DDR_PMU + tristate "Freescale i.MX9 DDR perf monitor" + depends on ARCH_MXC + help + Provides support for the DDR performance monitor in i.MX9, which + can give information about memory throughput and other related + events. + config QCOM_L2_PMU bool "Qualcomm Technologies L2-cache PMU" depends on ARCH_QCOM && ARM64 && ACPI diff --git a/drivers/perf/Makefile b/drivers/perf/Makefile index dabc859540ce..5cfe8954c250 100644 --- a/drivers/perf/Makefile +++ b/drivers/perf/Makefile @@ -8,6 +8,7 @@ obj-$(CONFIG_ARM_PMU_ACPI) += arm_pmu_acpi.o obj-$(CONFIG_ARM_PMUV3) += arm_pmuv3.o obj-$(CONFIG_ARM_SMMU_V3_PMU) += arm_smmuv3_pmu.o obj-$(CONFIG_FSL_IMX8_DDR_PMU) += fsl_imx8_ddr_perf.o +obj-$(CONFIG_FSL_IMX9_DDR_PMU) += fsl_imx9_ddr_perf.o obj-$(CONFIG_HISI_PMU) += hisilicon/ obj-$(CONFIG_QCOM_L2_PMU) += qcom_l2_pmu.o obj-$(CONFIG_QCOM_L3_PMU) += qcom_l3_pmu.o diff --git a/drivers/perf/apple_m1_cpu_pmu.c b/drivers/perf/apple_m1_cpu_pmu.c index 8574c6e58c83..cd2de44b61b9 100644 --- a/drivers/perf/apple_m1_cpu_pmu.c +++ b/drivers/perf/apple_m1_cpu_pmu.c @@ -493,6 +493,17 @@ static int m1_pmu_map_event(struct perf_event *event) return armpmu_map_event(event, &m1_pmu_perf_map, NULL, M1_PMU_CFG_EVENT); } +static int m2_pmu_map_event(struct perf_event *event) +{ + /* + * Same deal as the above, except that M2 has 64bit counters. + * Which, as far as we're concerned, actually means 63 bits. + * Yes, this is getting awkward. + */ + event->hw.flags |= ARMPMU_EVT_63BIT; + return armpmu_map_event(event, &m1_pmu_perf_map, NULL, M1_PMU_CFG_EVENT); +} + static void m1_pmu_reset(void *info) { int i; @@ -525,7 +536,7 @@ static int m1_pmu_set_event_filter(struct hw_perf_event *event, return 0; } -static int m1_pmu_init(struct arm_pmu *cpu_pmu) +static int m1_pmu_init(struct arm_pmu *cpu_pmu, u32 flags) { cpu_pmu->handle_irq = m1_pmu_handle_irq; cpu_pmu->enable = m1_pmu_enable_event; @@ -536,7 +547,14 @@ static int m1_pmu_init(struct arm_pmu *cpu_pmu) cpu_pmu->clear_event_idx = m1_pmu_clear_event_idx; cpu_pmu->start = m1_pmu_start; cpu_pmu->stop = m1_pmu_stop; - cpu_pmu->map_event = m1_pmu_map_event; + + if (flags & ARMPMU_EVT_47BIT) + cpu_pmu->map_event = m1_pmu_map_event; + else if (flags & ARMPMU_EVT_63BIT) + cpu_pmu->map_event = m2_pmu_map_event; + else + return WARN_ON(-EINVAL); + cpu_pmu->reset = m1_pmu_reset; cpu_pmu->set_event_filter = m1_pmu_set_event_filter; @@ -550,25 +568,25 @@ static int m1_pmu_init(struct arm_pmu *cpu_pmu) static int m1_pmu_ice_init(struct arm_pmu *cpu_pmu) { cpu_pmu->name = "apple_icestorm_pmu"; - return m1_pmu_init(cpu_pmu); + return m1_pmu_init(cpu_pmu, ARMPMU_EVT_47BIT); } static int m1_pmu_fire_init(struct arm_pmu *cpu_pmu) { cpu_pmu->name = "apple_firestorm_pmu"; - return m1_pmu_init(cpu_pmu); + return m1_pmu_init(cpu_pmu, ARMPMU_EVT_47BIT); } static int m2_pmu_avalanche_init(struct arm_pmu *cpu_pmu) { cpu_pmu->name = "apple_avalanche_pmu"; - return m1_pmu_init(cpu_pmu); + return m1_pmu_init(cpu_pmu, ARMPMU_EVT_63BIT); } static int m2_pmu_blizzard_init(struct arm_pmu *cpu_pmu) { cpu_pmu->name = "apple_blizzard_pmu"; - return m1_pmu_init(cpu_pmu); + return m1_pmu_init(cpu_pmu, ARMPMU_EVT_63BIT); } static const struct of_device_id m1_pmu_of_device_ids[] = { diff --git a/drivers/perf/arm-cci.c b/drivers/perf/arm-cci.c index 03b1309875ae..998259f1d973 100644 --- a/drivers/perf/arm-cci.c +++ b/drivers/perf/arm-cci.c @@ -645,7 +645,7 @@ static void cci_pmu_sync_counters(struct cci_pmu *cci_pmu) struct cci_pmu_hw_events *cci_hw = &cci_pmu->hw_events; DECLARE_BITMAP(mask, HW_CNTRS_MAX); - bitmap_zero(mask, cci_pmu->num_cntrs); + bitmap_zero(mask, HW_CNTRS_MAX); for_each_set_bit(i, cci_pmu->hw_events.used_mask, cci_pmu->num_cntrs) { struct perf_event *event = cci_hw->events[i]; @@ -656,7 +656,7 @@ static void cci_pmu_sync_counters(struct cci_pmu *cci_pmu) if (event->hw.state & PERF_HES_STOPPED) continue; if (event->hw.state & PERF_HES_ARCH) { - set_bit(i, mask); + __set_bit(i, mask); event->hw.state &= ~PERF_HES_ARCH; } } diff --git a/drivers/perf/arm-cmn.c b/drivers/perf/arm-cmn.c index 47d359f72957..b8c15878bc86 100644 --- a/drivers/perf/arm-cmn.c +++ b/drivers/perf/arm-cmn.c @@ -44,8 +44,11 @@ #define CMN_MAX_DTMS (CMN_MAX_XPS + (CMN_MAX_DIMENSION - 1) * 4) /* The CFG node has various info besides the discovery tree */ -#define CMN_CFGM_PERIPH_ID_2 0x0010 -#define CMN_CFGM_PID2_REVISION GENMASK(7, 4) +#define CMN_CFGM_PERIPH_ID_01 0x0008 +#define CMN_CFGM_PID0_PART_0 GENMASK_ULL(7, 0) +#define CMN_CFGM_PID1_PART_1 GENMASK_ULL(35, 32) +#define CMN_CFGM_PERIPH_ID_23 0x0010 +#define CMN_CFGM_PID2_REVISION GENMASK_ULL(7, 4) #define CMN_CFGM_INFO_GLOBAL 0x900 #define CMN_INFO_MULTIPLE_DTM_EN BIT_ULL(63) @@ -186,6 +189,7 @@ #define CMN_WP_DOWN 2 +/* Internal values for encoding event support */ enum cmn_model { CMN600 = 1, CMN650 = 2, @@ -197,26 +201,34 @@ enum cmn_model { CMN_650ON = CMN650 | CMN700, }; +/* Actual part numbers and revision IDs defined by the hardware */ +enum cmn_part { + PART_CMN600 = 0x434, + PART_CMN650 = 0x436, + PART_CMN700 = 0x43c, + PART_CI700 = 0x43a, +}; + /* CMN-600 r0px shouldn't exist in silicon, thankfully */ enum cmn_revision { - CMN600_R1P0, - CMN600_R1P1, - CMN600_R1P2, - CMN600_R1P3, - CMN600_R2P0, - CMN600_R3P0, - CMN600_R3P1, - CMN650_R0P0 = 0, - CMN650_R1P0, - CMN650_R1P1, - CMN650_R2P0, - CMN650_R1P2, - CMN700_R0P0 = 0, - CMN700_R1P0, - CMN700_R2P0, - CI700_R0P0 = 0, - CI700_R1P0, - CI700_R2P0, + REV_CMN600_R1P0, + REV_CMN600_R1P1, + REV_CMN600_R1P2, + REV_CMN600_R1P3, + REV_CMN600_R2P0, + REV_CMN600_R3P0, + REV_CMN600_R3P1, + REV_CMN650_R0P0 = 0, + REV_CMN650_R1P0, + REV_CMN650_R1P1, + REV_CMN650_R2P0, + REV_CMN650_R1P2, + REV_CMN700_R0P0 = 0, + REV_CMN700_R1P0, + REV_CMN700_R2P0, + REV_CI700_R0P0 = 0, + REV_CI700_R1P0, + REV_CI700_R2P0, }; enum cmn_node_type { @@ -306,7 +318,7 @@ struct arm_cmn { unsigned int state; enum cmn_revision rev; - enum cmn_model model; + enum cmn_part part; u8 mesh_x; u8 mesh_y; u16 num_xps; @@ -394,19 +406,35 @@ static struct arm_cmn_node *arm_cmn_node(const struct arm_cmn *cmn, return NULL; } +static enum cmn_model arm_cmn_model(const struct arm_cmn *cmn) +{ + switch (cmn->part) { + case PART_CMN600: + return CMN600; + case PART_CMN650: + return CMN650; + case PART_CMN700: + return CMN700; + case PART_CI700: + return CI700; + default: + return 0; + }; +} + static u32 arm_cmn_device_connect_info(const struct arm_cmn *cmn, const struct arm_cmn_node *xp, int port) { int offset = CMN_MXP__CONNECT_INFO(port); if (port >= 2) { - if (cmn->model & (CMN600 | CMN650)) + if (cmn->part == PART_CMN600 || cmn->part == PART_CMN650) return 0; /* * CI-700 may have extra ports, but still has the * mesh_port_connect_info registers in the way. */ - if (cmn->model == CI700) + if (cmn->part == PART_CI700) offset += CI700_CONNECT_INFO_P2_5_OFFSET; } @@ -640,7 +668,7 @@ static umode_t arm_cmn_event_attr_is_visible(struct kobject *kobj, eattr = container_of(attr, typeof(*eattr), attr.attr); - if (!(eattr->model & cmn->model)) + if (!(eattr->model & arm_cmn_model(cmn))) return 0; type = eattr->type; @@ -658,7 +686,7 @@ static umode_t arm_cmn_event_attr_is_visible(struct kobject *kobj, if ((intf & 4) && !(cmn->ports_used & BIT(intf & 3))) return 0; - if (chan == 4 && cmn->model == CMN600) + if (chan == 4 && cmn->part == PART_CMN600) return 0; if ((chan == 5 && cmn->rsp_vc_num < 2) || @@ -669,19 +697,19 @@ static umode_t arm_cmn_event_attr_is_visible(struct kobject *kobj, } /* Revision-specific differences */ - if (cmn->model == CMN600) { - if (cmn->rev < CMN600_R1P3) { + if (cmn->part == PART_CMN600) { + if (cmn->rev < REV_CMN600_R1P3) { if (type == CMN_TYPE_CXRA && eventid > 0x10) return 0; } - if (cmn->rev < CMN600_R1P2) { + if (cmn->rev < REV_CMN600_R1P2) { if (type == CMN_TYPE_HNF && eventid == 0x1b) return 0; if (type == CMN_TYPE_CXRA || type == CMN_TYPE_CXHA) return 0; } - } else if (cmn->model == CMN650) { - if (cmn->rev < CMN650_R2P0 || cmn->rev == CMN650_R1P2) { + } else if (cmn->part == PART_CMN650) { + if (cmn->rev < REV_CMN650_R2P0 || cmn->rev == REV_CMN650_R1P2) { if (type == CMN_TYPE_HNF && eventid > 0x22) return 0; if (type == CMN_TYPE_SBSX && eventid == 0x17) @@ -689,8 +717,8 @@ static umode_t arm_cmn_event_attr_is_visible(struct kobject *kobj, if (type == CMN_TYPE_RNI && eventid > 0x10) return 0; } - } else if (cmn->model == CMN700) { - if (cmn->rev < CMN700_R2P0) { + } else if (cmn->part == PART_CMN700) { + if (cmn->rev < REV_CMN700_R2P0) { if (type == CMN_TYPE_HNF && eventid > 0x2c) return 0; if (type == CMN_TYPE_CCHA && eventid > 0x74) @@ -698,7 +726,7 @@ static umode_t arm_cmn_event_attr_is_visible(struct kobject *kobj, if (type == CMN_TYPE_CCLA && eventid > 0x27) return 0; } - if (cmn->rev < CMN700_R1P0) { + if (cmn->rev < REV_CMN700_R1P0) { if (type == CMN_TYPE_HNF && eventid > 0x2b) return 0; } @@ -1171,19 +1199,31 @@ static ssize_t arm_cmn_cpumask_show(struct device *dev, static struct device_attribute arm_cmn_cpumask_attr = __ATTR(cpumask, 0444, arm_cmn_cpumask_show, NULL); -static struct attribute *arm_cmn_cpumask_attrs[] = { +static ssize_t arm_cmn_identifier_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct arm_cmn *cmn = to_cmn(dev_get_drvdata(dev)); + + return sysfs_emit(buf, "%03x%02x\n", cmn->part, cmn->rev); +} + +static struct device_attribute arm_cmn_identifier_attr = + __ATTR(identifier, 0444, arm_cmn_identifier_show, NULL); + +static struct attribute *arm_cmn_other_attrs[] = { &arm_cmn_cpumask_attr.attr, + &arm_cmn_identifier_attr.attr, NULL, }; -static const struct attribute_group arm_cmn_cpumask_attr_group = { - .attrs = arm_cmn_cpumask_attrs, +static const struct attribute_group arm_cmn_other_attrs_group = { + .attrs = arm_cmn_other_attrs, }; static const struct attribute_group *arm_cmn_attr_groups[] = { &arm_cmn_event_attrs_group, &arm_cmn_format_attrs_group, - &arm_cmn_cpumask_attr_group, + &arm_cmn_other_attrs_group, NULL }; @@ -1200,7 +1240,7 @@ static u32 arm_cmn_wp_config(struct perf_event *event) u32 grp = CMN_EVENT_WP_GRP(event); u32 exc = CMN_EVENT_WP_EXCLUSIVE(event); u32 combine = CMN_EVENT_WP_COMBINE(event); - bool is_cmn600 = to_cmn(event->pmu)->model == CMN600; + bool is_cmn600 = to_cmn(event->pmu)->part == PART_CMN600; config = FIELD_PREP(CMN_DTM_WPn_CONFIG_WP_DEV_SEL, dev) | FIELD_PREP(CMN_DTM_WPn_CONFIG_WP_CHN_SEL, chn) | @@ -1520,14 +1560,14 @@ done: return ret; } -static enum cmn_filter_select arm_cmn_filter_sel(enum cmn_model model, +static enum cmn_filter_select arm_cmn_filter_sel(const struct arm_cmn *cmn, enum cmn_node_type type, unsigned int eventid) { struct arm_cmn_event_attr *e; - int i; + enum cmn_model model = arm_cmn_model(cmn); - for (i = 0; i < ARRAY_SIZE(arm_cmn_event_attrs) - 1; i++) { + for (int i = 0; i < ARRAY_SIZE(arm_cmn_event_attrs) - 1; i++) { e = container_of(arm_cmn_event_attrs[i], typeof(*e), attr.attr); if (e->model & model && e->type == type && e->eventid == eventid) return e->fsel; @@ -1570,12 +1610,12 @@ static int arm_cmn_event_init(struct perf_event *event) /* ...but the DTM may depend on which port we're watching */ if (cmn->multi_dtm) hw->dtm_offset = CMN_EVENT_WP_DEV_SEL(event) / 2; - } else if (type == CMN_TYPE_XP && cmn->model == CMN700) { + } else if (type == CMN_TYPE_XP && cmn->part == PART_CMN700) { hw->wide_sel = true; } /* This is sufficiently annoying to recalculate, so cache it */ - hw->filter_sel = arm_cmn_filter_sel(cmn->model, type, eventid); + hw->filter_sel = arm_cmn_filter_sel(cmn, type, eventid); bynodeid = CMN_EVENT_BYNODEID(event); nodeid = CMN_EVENT_NODEID(event); @@ -1899,9 +1939,10 @@ static int arm_cmn_init_dtc(struct arm_cmn *cmn, struct arm_cmn_node *dn, int id if (dtc->irq < 0) return dtc->irq; - writel_relaxed(0, dtc->base + CMN_DT_PMCR); + writel_relaxed(CMN_DT_DTC_CTL_DT_EN, dtc->base + CMN_DT_DTC_CTL); + writel_relaxed(CMN_DT_PMCR_PMU_EN | CMN_DT_PMCR_OVFL_INTR_EN, dtc->base + CMN_DT_PMCR); + writeq_relaxed(0, dtc->base + CMN_DT_PMCCNTR); writel_relaxed(0x1ff, dtc->base + CMN_DT_PMOVSR_CLR); - writel_relaxed(CMN_DT_PMCR_OVFL_INTR_EN, dtc->base + CMN_DT_PMCR); return 0; } @@ -1961,7 +2002,7 @@ static int arm_cmn_init_dtcs(struct arm_cmn *cmn) dn->type = CMN_TYPE_CCLA; } - writel_relaxed(CMN_DT_DTC_CTL_DT_EN, cmn->dtc[0].base + CMN_DT_DTC_CTL); + arm_cmn_set_state(cmn, CMN_STATE_DISABLED); return 0; } @@ -2006,6 +2047,7 @@ static int arm_cmn_discover(struct arm_cmn *cmn, unsigned int rgn_offset) void __iomem *cfg_region; struct arm_cmn_node cfg, *dn; struct arm_cmn_dtm *dtm; + enum cmn_part part; u16 child_count, child_poff; u32 xp_offset[CMN_MAX_XPS]; u64 reg; @@ -2017,7 +2059,19 @@ static int arm_cmn_discover(struct arm_cmn *cmn, unsigned int rgn_offset) return -ENODEV; cfg_region = cmn->base + rgn_offset; - reg = readl_relaxed(cfg_region + CMN_CFGM_PERIPH_ID_2); + + reg = readq_relaxed(cfg_region + CMN_CFGM_PERIPH_ID_01); + part = FIELD_GET(CMN_CFGM_PID0_PART_0, reg); + part |= FIELD_GET(CMN_CFGM_PID1_PART_1, reg) << 8; + if (cmn->part && cmn->part != part) + dev_warn(cmn->dev, + "Firmware binding mismatch: expected part number 0x%x, found 0x%x\n", + cmn->part, part); + cmn->part = part; + if (!arm_cmn_model(cmn)) + dev_warn(cmn->dev, "Unknown part number: 0x%x\n", part); + + reg = readl_relaxed(cfg_region + CMN_CFGM_PERIPH_ID_23); cmn->rev = FIELD_GET(CMN_CFGM_PID2_REVISION, reg); reg = readq_relaxed(cfg_region + CMN_CFGM_INFO_GLOBAL); @@ -2081,7 +2135,7 @@ static int arm_cmn_discover(struct arm_cmn *cmn, unsigned int rgn_offset) if (xp->id == (1 << 3)) cmn->mesh_x = xp->logid; - if (cmn->model == CMN600) + if (cmn->part == PART_CMN600) xp->dtc = 0xf; else xp->dtc = 1 << readl_relaxed(xp_region + CMN_DTM_UNIT_INFO); @@ -2201,7 +2255,7 @@ static int arm_cmn_discover(struct arm_cmn *cmn, unsigned int rgn_offset) if (cmn->num_xps == 1) dev_warn(cmn->dev, "1x1 config not fully supported, translate XP events manually\n"); - dev_dbg(cmn->dev, "model %d, periph_id_2 revision %d\n", cmn->model, cmn->rev); + dev_dbg(cmn->dev, "periph_id part 0x%03x revision %d\n", cmn->part, cmn->rev); reg = cmn->ports_used; dev_dbg(cmn->dev, "mesh %dx%d, ID width %d, ports %6pbl%s\n", cmn->mesh_x, cmn->mesh_y, arm_cmn_xyidbits(cmn), ®, @@ -2256,17 +2310,17 @@ static int arm_cmn_probe(struct platform_device *pdev) return -ENOMEM; cmn->dev = &pdev->dev; - cmn->model = (unsigned long)device_get_match_data(cmn->dev); + cmn->part = (unsigned long)device_get_match_data(cmn->dev); platform_set_drvdata(pdev, cmn); - if (cmn->model == CMN600 && has_acpi_companion(cmn->dev)) { + if (cmn->part == PART_CMN600 && has_acpi_companion(cmn->dev)) { rootnode = arm_cmn600_acpi_probe(pdev, cmn); } else { rootnode = 0; cmn->base = devm_platform_ioremap_resource(pdev, 0); if (IS_ERR(cmn->base)) return PTR_ERR(cmn->base); - if (cmn->model == CMN600) + if (cmn->part == PART_CMN600) rootnode = arm_cmn600_of_probe(pdev->dev.of_node); } if (rootnode < 0) @@ -2335,10 +2389,10 @@ static int arm_cmn_remove(struct platform_device *pdev) #ifdef CONFIG_OF static const struct of_device_id arm_cmn_of_match[] = { - { .compatible = "arm,cmn-600", .data = (void *)CMN600 }, - { .compatible = "arm,cmn-650", .data = (void *)CMN650 }, - { .compatible = "arm,cmn-700", .data = (void *)CMN700 }, - { .compatible = "arm,ci-700", .data = (void *)CI700 }, + { .compatible = "arm,cmn-600", .data = (void *)PART_CMN600 }, + { .compatible = "arm,cmn-650" }, + { .compatible = "arm,cmn-700" }, + { .compatible = "arm,ci-700" }, {} }; MODULE_DEVICE_TABLE(of, arm_cmn_of_match); @@ -2346,9 +2400,9 @@ MODULE_DEVICE_TABLE(of, arm_cmn_of_match); #ifdef CONFIG_ACPI static const struct acpi_device_id arm_cmn_acpi_match[] = { - { "ARMHC600", CMN600 }, - { "ARMHC650", CMN650 }, - { "ARMHC700", CMN700 }, + { "ARMHC600", PART_CMN600 }, + { "ARMHC650" }, + { "ARMHC700" }, {} }; MODULE_DEVICE_TABLE(acpi, arm_cmn_acpi_match); diff --git a/drivers/perf/arm_cspmu/Kconfig b/drivers/perf/arm_cspmu/Kconfig index 0b316fe69a45..25d25ded0983 100644 --- a/drivers/perf/arm_cspmu/Kconfig +++ b/drivers/perf/arm_cspmu/Kconfig @@ -4,8 +4,7 @@ config ARM_CORESIGHT_PMU_ARCH_SYSTEM_PMU tristate "ARM Coresight Architecture PMU" - depends on ARM64 && ACPI - depends on ACPI_APMT || COMPILE_TEST + depends on ARM64 || COMPILE_TEST help Provides support for performance monitoring unit (PMU) devices based on ARM CoreSight PMU architecture. Note that this PMU diff --git a/drivers/perf/arm_cspmu/arm_cspmu.c b/drivers/perf/arm_cspmu/arm_cspmu.c index a3f1c410b417..e2b7827c4563 100644 --- a/drivers/perf/arm_cspmu/arm_cspmu.c +++ b/drivers/perf/arm_cspmu/arm_cspmu.c @@ -28,7 +28,6 @@ #include <linux/module.h> #include <linux/perf_event.h> #include <linux/platform_device.h> -#include <acpi/processor.h> #include "arm_cspmu.h" #include "nvidia_cspmu.h" @@ -101,10 +100,6 @@ #define ARM_CSPMU_ACTIVE_CPU_MASK 0x0 #define ARM_CSPMU_ASSOCIATED_CPU_MASK 0x1 -/* Check if field f in flags is set with value v */ -#define CHECK_APMT_FLAG(flags, f, v) \ - ((flags & (ACPI_APMT_FLAGS_ ## f)) == (ACPI_APMT_FLAGS_ ## f ## _ ## v)) - /* Check and use default if implementer doesn't provide attribute callback */ #define CHECK_DEFAULT_IMPL_OPS(ops, callback) \ do { \ @@ -122,6 +117,11 @@ static unsigned long arm_cspmu_cpuhp_state; +static struct acpi_apmt_node *arm_cspmu_apmt_node(struct device *dev) +{ + return *(struct acpi_apmt_node **)dev_get_platdata(dev); +} + /* * In CoreSight PMU architecture, all of the MMIO registers are 32-bit except * counter register. The counter register can be implemented as 32-bit or 64-bit @@ -156,12 +156,6 @@ static u64 read_reg64_hilohi(const void __iomem *addr, u32 max_poll_count) return val; } -/* Check if PMU supports 64-bit single copy atomic. */ -static inline bool supports_64bit_atomics(const struct arm_cspmu *cspmu) -{ - return CHECK_APMT_FLAG(cspmu->apmt_node->flags, ATOMIC, SUPP); -} - /* Check if cycle counter is supported. */ static inline bool supports_cycle_counter(const struct arm_cspmu *cspmu) { @@ -189,10 +183,10 @@ static inline bool use_64b_counter_reg(const struct arm_cspmu *cspmu) ssize_t arm_cspmu_sysfs_event_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct dev_ext_attribute *eattr = - container_of(attr, struct dev_ext_attribute, attr); - return sysfs_emit(buf, "event=0x%llx\n", - (unsigned long long)eattr->var); + struct perf_pmu_events_attr *pmu_attr; + + pmu_attr = container_of(attr, typeof(*pmu_attr), attr); + return sysfs_emit(buf, "event=0x%llx\n", pmu_attr->id); } EXPORT_SYMBOL_GPL(arm_cspmu_sysfs_event_show); @@ -320,7 +314,7 @@ static const char *arm_cspmu_get_name(const struct arm_cspmu *cspmu) static atomic_t pmu_idx[ACPI_APMT_NODE_TYPE_COUNT] = { 0 }; dev = cspmu->dev; - apmt_node = cspmu->apmt_node; + apmt_node = arm_cspmu_apmt_node(dev); pmu_type = apmt_node->type; if (pmu_type >= ACPI_APMT_NODE_TYPE_COUNT) { @@ -397,8 +391,8 @@ static const struct impl_match impl_match[] = { static int arm_cspmu_init_impl_ops(struct arm_cspmu *cspmu) { int ret; - struct acpi_apmt_node *apmt_node = cspmu->apmt_node; struct arm_cspmu_impl_ops *impl_ops = &cspmu->impl.ops; + struct acpi_apmt_node *apmt_node = arm_cspmu_apmt_node(cspmu->dev); const struct impl_match *match = impl_match; /* @@ -720,7 +714,7 @@ static u64 arm_cspmu_read_counter(struct perf_event *event) offset = counter_offset(sizeof(u64), event->hw.idx); counter_addr = cspmu->base1 + offset; - return supports_64bit_atomics(cspmu) ? + return cspmu->has_atomic_dword ? readq(counter_addr) : read_reg64_hilohi(counter_addr, HILOHI_MAX_POLL); } @@ -911,24 +905,18 @@ static struct arm_cspmu *arm_cspmu_alloc(struct platform_device *pdev) { struct acpi_apmt_node *apmt_node; struct arm_cspmu *cspmu; - struct device *dev; - - dev = &pdev->dev; - apmt_node = *(struct acpi_apmt_node **)dev_get_platdata(dev); - if (!apmt_node) { - dev_err(dev, "failed to get APMT node\n"); - return NULL; - } + struct device *dev = &pdev->dev; cspmu = devm_kzalloc(dev, sizeof(*cspmu), GFP_KERNEL); if (!cspmu) return NULL; cspmu->dev = dev; - cspmu->apmt_node = apmt_node; - platform_set_drvdata(pdev, cspmu); + apmt_node = arm_cspmu_apmt_node(dev); + cspmu->has_atomic_dword = apmt_node->flags & ACPI_APMT_FLAGS_ATOMIC; + return cspmu; } @@ -936,11 +924,9 @@ static int arm_cspmu_init_mmio(struct arm_cspmu *cspmu) { struct device *dev; struct platform_device *pdev; - struct acpi_apmt_node *apmt_node; dev = cspmu->dev; pdev = to_platform_device(dev); - apmt_node = cspmu->apmt_node; /* Base address for page 0. */ cspmu->base0 = devm_platform_ioremap_resource(pdev, 0); @@ -951,7 +937,7 @@ static int arm_cspmu_init_mmio(struct arm_cspmu *cspmu) /* Base address for page 1 if supported. Otherwise point to page 0. */ cspmu->base1 = cspmu->base0; - if (CHECK_APMT_FLAG(apmt_node->flags, DUAL_PAGE, SUPP)) { + if (platform_get_resource(pdev, IORESOURCE_MEM, 1)) { cspmu->base1 = devm_platform_ioremap_resource(pdev, 1); if (IS_ERR(cspmu->base1)) { dev_err(dev, "ioremap failed for page-1 resource\n"); @@ -1048,19 +1034,14 @@ static int arm_cspmu_request_irq(struct arm_cspmu *cspmu) int irq, ret; struct device *dev; struct platform_device *pdev; - struct acpi_apmt_node *apmt_node; dev = cspmu->dev; pdev = to_platform_device(dev); - apmt_node = cspmu->apmt_node; /* Skip IRQ request if the PMU does not support overflow interrupt. */ - if (apmt_node->ovflw_irq == 0) - return 0; - - irq = platform_get_irq(pdev, 0); + irq = platform_get_irq_optional(pdev, 0); if (irq < 0) - return irq; + return irq == -ENXIO ? 0 : irq; ret = devm_request_irq(dev, irq, arm_cspmu_handle_irq, IRQF_NOBALANCING | IRQF_NO_THREAD, dev_name(dev), @@ -1075,6 +1056,9 @@ static int arm_cspmu_request_irq(struct arm_cspmu *cspmu) return 0; } +#if defined(CONFIG_ACPI) && defined(CONFIG_ARM64) +#include <acpi/processor.h> + static inline int arm_cspmu_find_cpu_container(int cpu, u32 container_uid) { u32 acpi_uid; @@ -1099,15 +1083,13 @@ static inline int arm_cspmu_find_cpu_container(int cpu, u32 container_uid) return -ENODEV; } -static int arm_cspmu_get_cpus(struct arm_cspmu *cspmu) +static int arm_cspmu_acpi_get_cpus(struct arm_cspmu *cspmu) { - struct device *dev; struct acpi_apmt_node *apmt_node; int affinity_flag; int cpu; - dev = cspmu->pmu.dev; - apmt_node = cspmu->apmt_node; + apmt_node = arm_cspmu_apmt_node(cspmu->dev); affinity_flag = apmt_node->flags & ACPI_APMT_FLAGS_AFFINITY; if (affinity_flag == ACPI_APMT_FLAGS_AFFINITY_PROC) { @@ -1129,12 +1111,23 @@ static int arm_cspmu_get_cpus(struct arm_cspmu *cspmu) } if (cpumask_empty(&cspmu->associated_cpus)) { - dev_dbg(dev, "No cpu associated with the PMU\n"); + dev_dbg(cspmu->dev, "No cpu associated with the PMU\n"); return -ENODEV; } return 0; } +#else +static int arm_cspmu_acpi_get_cpus(struct arm_cspmu *cspmu) +{ + return -ENODEV; +} +#endif + +static int arm_cspmu_get_cpus(struct arm_cspmu *cspmu) +{ + return arm_cspmu_acpi_get_cpus(cspmu); +} static int arm_cspmu_register_pmu(struct arm_cspmu *cspmu) { @@ -1220,6 +1213,12 @@ static int arm_cspmu_device_remove(struct platform_device *pdev) return 0; } +static const struct platform_device_id arm_cspmu_id[] = { + {DRVNAME, 0}, + { }, +}; +MODULE_DEVICE_TABLE(platform, arm_cspmu_id); + static struct platform_driver arm_cspmu_driver = { .driver = { .name = DRVNAME, @@ -1227,12 +1226,14 @@ static struct platform_driver arm_cspmu_driver = { }, .probe = arm_cspmu_device_probe, .remove = arm_cspmu_device_remove, + .id_table = arm_cspmu_id, }; static void arm_cspmu_set_active_cpu(int cpu, struct arm_cspmu *cspmu) { cpumask_set_cpu(cpu, &cspmu->active_cpu); - WARN_ON(irq_set_affinity(cspmu->irq, &cspmu->active_cpu)); + if (cspmu->irq) + WARN_ON(irq_set_affinity(cspmu->irq, &cspmu->active_cpu)); } static int arm_cspmu_cpu_online(unsigned int cpu, struct hlist_node *node) diff --git a/drivers/perf/arm_cspmu/arm_cspmu.h b/drivers/perf/arm_cspmu/arm_cspmu.h index 51323b175a4a..83df53d1c132 100644 --- a/drivers/perf/arm_cspmu/arm_cspmu.h +++ b/drivers/perf/arm_cspmu/arm_cspmu.h @@ -8,7 +8,6 @@ #ifndef __ARM_CSPMU_H__ #define __ARM_CSPMU_H__ -#include <linux/acpi.h> #include <linux/bitfield.h> #include <linux/cpumask.h> #include <linux/device.h> @@ -118,16 +117,16 @@ struct arm_cspmu_impl { struct arm_cspmu { struct pmu pmu; struct device *dev; - struct acpi_apmt_node *apmt_node; const char *name; const char *identifier; void __iomem *base0; void __iomem *base1; - int irq; cpumask_t associated_cpus; cpumask_t active_cpu; struct hlist_node cpuhp_node; + int irq; + bool has_atomic_dword; u32 pmcfgr; u32 num_logical_ctrs; u32 num_set_clr_reg; diff --git a/drivers/perf/arm_dmc620_pmu.c b/drivers/perf/arm_dmc620_pmu.c index 5de06f9a4dd3..9d0f01c4455a 100644 --- a/drivers/perf/arm_dmc620_pmu.c +++ b/drivers/perf/arm_dmc620_pmu.c @@ -227,9 +227,31 @@ static const struct attribute_group dmc620_pmu_format_attr_group = { .attrs = dmc620_pmu_formats_attrs, }; +static ssize_t dmc620_pmu_cpumask_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct dmc620_pmu *dmc620_pmu = to_dmc620_pmu(dev_get_drvdata(dev)); + + return cpumap_print_to_pagebuf(true, buf, + cpumask_of(dmc620_pmu->irq->cpu)); +} + +static struct device_attribute dmc620_pmu_cpumask_attr = + __ATTR(cpumask, 0444, dmc620_pmu_cpumask_show, NULL); + +static struct attribute *dmc620_pmu_cpumask_attrs[] = { + &dmc620_pmu_cpumask_attr.attr, + NULL, +}; + +static const struct attribute_group dmc620_pmu_cpumask_attr_group = { + .attrs = dmc620_pmu_cpumask_attrs, +}; + static const struct attribute_group *dmc620_pmu_attr_groups[] = { &dmc620_pmu_events_attr_group, &dmc620_pmu_format_attr_group, + &dmc620_pmu_cpumask_attr_group, NULL, }; diff --git a/drivers/perf/arm_pmu.c b/drivers/perf/arm_pmu.c index 15bd1e34a88e..277e29fbd504 100644 --- a/drivers/perf/arm_pmu.c +++ b/drivers/perf/arm_pmu.c @@ -109,6 +109,8 @@ static inline u64 arm_pmu_event_max_period(struct perf_event *event) { if (event->hw.flags & ARMPMU_EVT_64BIT) return GENMASK_ULL(63, 0); + else if (event->hw.flags & ARMPMU_EVT_63BIT) + return GENMASK_ULL(62, 0); else if (event->hw.flags & ARMPMU_EVT_47BIT) return GENMASK_ULL(46, 0); else diff --git a/drivers/perf/fsl_imx9_ddr_perf.c b/drivers/perf/fsl_imx9_ddr_perf.c new file mode 100644 index 000000000000..71d5b07e3aff --- /dev/null +++ b/drivers/perf/fsl_imx9_ddr_perf.c @@ -0,0 +1,711 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright 2023 NXP + +#include <linux/bitfield.h> +#include <linux/init.h> +#include <linux/interrupt.h> +#include <linux/io.h> +#include <linux/module.h> +#include <linux/of.h> +#include <linux/of_address.h> +#include <linux/of_device.h> +#include <linux/of_irq.h> +#include <linux/perf_event.h> + +/* Performance monitor configuration */ +#define PMCFG1 0x00 +#define PMCFG1_RD_TRANS_FILT_EN BIT(31) +#define PMCFG1_WR_TRANS_FILT_EN BIT(30) +#define PMCFG1_RD_BT_FILT_EN BIT(29) +#define PMCFG1_ID_MASK GENMASK(17, 0) + +#define PMCFG2 0x04 +#define PMCFG2_ID GENMASK(17, 0) + +/* Global control register affects all counters and takes priority over local control registers */ +#define PMGC0 0x40 +/* Global control register bits */ +#define PMGC0_FAC BIT(31) +#define PMGC0_PMIE BIT(30) +#define PMGC0_FCECE BIT(29) + +/* + * 64bit counter0 exclusively dedicated to counting cycles + * 32bit counters monitor counter-specific events in addition to counting reference events + */ +#define PMLCA(n) (0x40 + 0x10 + (0x10 * n)) +#define PMLCB(n) (0x40 + 0x14 + (0x10 * n)) +#define PMC(n) (0x40 + 0x18 + (0x10 * n)) +/* Local control register bits */ +#define PMLCA_FC BIT(31) +#define PMLCA_CE BIT(26) +#define PMLCA_EVENT GENMASK(22, 16) + +#define NUM_COUNTERS 11 +#define CYCLES_COUNTER 0 + +#define to_ddr_pmu(p) container_of(p, struct ddr_pmu, pmu) + +#define DDR_PERF_DEV_NAME "imx9_ddr" +#define DDR_CPUHP_CB_NAME DDR_PERF_DEV_NAME "_perf_pmu" + +static DEFINE_IDA(ddr_ida); + +struct imx_ddr_devtype_data { + const char *identifier; /* system PMU identifier for userspace */ +}; + +struct ddr_pmu { + struct pmu pmu; + void __iomem *base; + unsigned int cpu; + struct hlist_node node; + struct device *dev; + struct perf_event *events[NUM_COUNTERS]; + int active_events; + enum cpuhp_state cpuhp_state; + const struct imx_ddr_devtype_data *devtype_data; + int irq; + int id; +}; + +static const struct imx_ddr_devtype_data imx93_devtype_data = { + .identifier = "imx93", +}; + +static const struct of_device_id imx_ddr_pmu_dt_ids[] = { + {.compatible = "fsl,imx93-ddr-pmu", .data = &imx93_devtype_data}, + { /* sentinel */ } +}; +MODULE_DEVICE_TABLE(of, imx_ddr_pmu_dt_ids); + +static ssize_t ddr_perf_identifier_show(struct device *dev, + struct device_attribute *attr, + char *page) +{ + struct ddr_pmu *pmu = dev_get_drvdata(dev); + + return sysfs_emit(page, "%s\n", pmu->devtype_data->identifier); +} + +static struct device_attribute ddr_perf_identifier_attr = + __ATTR(identifier, 0444, ddr_perf_identifier_show, NULL); + +static struct attribute *ddr_perf_identifier_attrs[] = { + &ddr_perf_identifier_attr.attr, + NULL, +}; + +static struct attribute_group ddr_perf_identifier_attr_group = { + .attrs = ddr_perf_identifier_attrs, +}; + +static ssize_t ddr_perf_cpumask_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct ddr_pmu *pmu = dev_get_drvdata(dev); + + return cpumap_print_to_pagebuf(true, buf, cpumask_of(pmu->cpu)); +} + +static struct device_attribute ddr_perf_cpumask_attr = + __ATTR(cpumask, 0444, ddr_perf_cpumask_show, NULL); + +static struct attribute *ddr_perf_cpumask_attrs[] = { + &ddr_perf_cpumask_attr.attr, + NULL, +}; + +static const struct attribute_group ddr_perf_cpumask_attr_group = { + .attrs = ddr_perf_cpumask_attrs, +}; + +static ssize_t ddr_pmu_event_show(struct device *dev, + struct device_attribute *attr, char *page) +{ + struct perf_pmu_events_attr *pmu_attr; + + pmu_attr = container_of(attr, struct perf_pmu_events_attr, attr); + return sysfs_emit(page, "event=0x%02llx\n", pmu_attr->id); +} + +#define IMX9_DDR_PMU_EVENT_ATTR(_name, _id) \ + (&((struct perf_pmu_events_attr[]) { \ + { .attr = __ATTR(_name, 0444, ddr_pmu_event_show, NULL),\ + .id = _id, } \ + })[0].attr.attr) + +static struct attribute *ddr_perf_events_attrs[] = { + /* counter0 cycles event */ + IMX9_DDR_PMU_EVENT_ATTR(cycles, 0), + + /* reference events for all normal counters, need assert DEBUG19[21] bit */ + IMX9_DDR_PMU_EVENT_ATTR(ddrc_ddrc1_rmw_for_ecc, 12), + IMX9_DDR_PMU_EVENT_ATTR(eddrtq_pmon_rreorder, 13), + IMX9_DDR_PMU_EVENT_ATTR(eddrtq_pmon_wreorder, 14), + IMX9_DDR_PMU_EVENT_ATTR(ddrc_pm_0, 15), + IMX9_DDR_PMU_EVENT_ATTR(ddrc_pm_1, 16), + IMX9_DDR_PMU_EVENT_ATTR(ddrc_pm_2, 17), + IMX9_DDR_PMU_EVENT_ATTR(ddrc_pm_3, 18), + IMX9_DDR_PMU_EVENT_ATTR(ddrc_pm_4, 19), + IMX9_DDR_PMU_EVENT_ATTR(ddrc_pm_5, 22), + IMX9_DDR_PMU_EVENT_ATTR(ddrc_pm_6, 23), + IMX9_DDR_PMU_EVENT_ATTR(ddrc_pm_7, 24), + IMX9_DDR_PMU_EVENT_ATTR(ddrc_pm_8, 25), + IMX9_DDR_PMU_EVENT_ATTR(ddrc_pm_9, 26), + IMX9_DDR_PMU_EVENT_ATTR(ddrc_pm_10, 27), + IMX9_DDR_PMU_EVENT_ATTR(ddrc_pm_11, 28), + IMX9_DDR_PMU_EVENT_ATTR(ddrc_pm_12, 31), + IMX9_DDR_PMU_EVENT_ATTR(ddrc_pm_13, 59), + IMX9_DDR_PMU_EVENT_ATTR(ddrc_pm_15, 61), + IMX9_DDR_PMU_EVENT_ATTR(ddrc_pm_29, 63), + + /* counter1 specific events */ + IMX9_DDR_PMU_EVENT_ATTR(ddrc_ld_riq_0, 64), + IMX9_DDR_PMU_EVENT_ATTR(ddrc_ld_riq_1, 65), + IMX9_DDR_PMU_EVENT_ATTR(ddrc_ld_riq_2, 66), + IMX9_DDR_PMU_EVENT_ATTR(ddrc_ld_riq_3, 67), + IMX9_DDR_PMU_EVENT_ATTR(ddrc_ld_riq_4, 68), + IMX9_DDR_PMU_EVENT_ATTR(ddrc_ld_riq_5, 69), + IMX9_DDR_PMU_EVENT_ATTR(ddrc_ld_riq_6, 70), + IMX9_DDR_PMU_EVENT_ATTR(ddrc_ld_riq_7, 71), + + /* counter2 specific events */ + IMX9_DDR_PMU_EVENT_ATTR(ddrc_ld_wiq_0, 64), + IMX9_DDR_PMU_EVENT_ATTR(ddrc_ld_wiq_1, 65), + IMX9_DDR_PMU_EVENT_ATTR(ddrc_ld_wiq_2, 66), + IMX9_DDR_PMU_EVENT_ATTR(ddrc_ld_wiq_3, 67), + IMX9_DDR_PMU_EVENT_ATTR(ddrc_ld_wiq_4, 68), + IMX9_DDR_PMU_EVENT_ATTR(ddrc_ld_wiq_5, 69), + IMX9_DDR_PMU_EVENT_ATTR(ddrc_ld_wiq_6, 70), + IMX9_DDR_PMU_EVENT_ATTR(ddrc_ld_wiq_7, 71), + IMX9_DDR_PMU_EVENT_ATTR(eddrtq_pmon_empty, 72), + IMX9_DDR_PMU_EVENT_ATTR(eddrtq_pm_rd_trans_filt, 73), + + /* counter3 specific events */ + IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_row_collision_0, 64), + IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_row_collision_1, 65), + IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_row_collision_2, 66), + IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_row_collision_3, 67), + IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_row_collision_4, 68), + IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_row_collision_5, 69), + IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_row_collision_6, 70), + IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_row_collision_7, 71), + IMX9_DDR_PMU_EVENT_ATTR(eddrtq_pmon_full, 72), + IMX9_DDR_PMU_EVENT_ATTR(eddrtq_pm_wr_trans_filt, 73), + + /* counter4 specific events */ + IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_row_open_0, 64), + IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_row_open_1, 65), + IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_row_open_2, 66), + IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_row_open_3, 67), + IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_row_open_4, 68), + IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_row_open_5, 69), + IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_row_open_6, 70), + IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_row_open_7, 71), + IMX9_DDR_PMU_EVENT_ATTR(eddrtq_pmon_ld_rdq2_rmw, 72), + IMX9_DDR_PMU_EVENT_ATTR(eddrtq_pm_rd_beat_filt, 73), + + /* counter5 specific events */ + IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_valid_start_0, 64), + IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_valid_start_1, 65), + IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_valid_start_2, 66), + IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_valid_start_3, 67), + IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_valid_start_4, 68), + IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_valid_start_5, 69), + IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_valid_start_6, 70), + IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_valid_start_7, 71), + IMX9_DDR_PMU_EVENT_ATTR(eddrtq_pmon_ld_rdq1, 72), + + /* counter6 specific events */ + IMX9_DDR_PMU_EVENT_ATTR(ddrc_qx_valid_end_0, 64), + IMX9_DDR_PMU_EVENT_ATTR(eddrtq_pmon_ld_rdq2, 72), + + /* counter7 specific events */ + IMX9_DDR_PMU_EVENT_ATTR(eddrtq_pmon_1_2_full, 64), + IMX9_DDR_PMU_EVENT_ATTR(eddrtq_pmon_ld_wrq0, 65), + + /* counter8 specific events */ + IMX9_DDR_PMU_EVENT_ATTR(eddrtq_pmon_bias_switched, 64), + IMX9_DDR_PMU_EVENT_ATTR(eddrtq_pmon_1_4_full, 65), + + /* counter9 specific events */ + IMX9_DDR_PMU_EVENT_ATTR(eddrtq_pmon_ld_wrq1, 65), + IMX9_DDR_PMU_EVENT_ATTR(eddrtq_pmon_3_4_full, 66), + + /* counter10 specific events */ + IMX9_DDR_PMU_EVENT_ATTR(eddrtq_pmon_misc_mrk, 65), + IMX9_DDR_PMU_EVENT_ATTR(eddrtq_pmon_ld_rdq0, 66), + NULL, +}; + +static const struct attribute_group ddr_perf_events_attr_group = { + .name = "events", + .attrs = ddr_perf_events_attrs, +}; + +PMU_FORMAT_ATTR(event, "config:0-7"); +PMU_FORMAT_ATTR(counter, "config:8-15"); +PMU_FORMAT_ATTR(axi_id, "config1:0-17"); +PMU_FORMAT_ATTR(axi_mask, "config2:0-17"); + +static struct attribute *ddr_perf_format_attrs[] = { + &format_attr_event.attr, + &format_attr_counter.attr, + &format_attr_axi_id.attr, + &format_attr_axi_mask.attr, + NULL, +}; + +static const struct attribute_group ddr_perf_format_attr_group = { + .name = "format", + .attrs = ddr_perf_format_attrs, +}; + +static const struct attribute_group *attr_groups[] = { + &ddr_perf_identifier_attr_group, + &ddr_perf_cpumask_attr_group, + &ddr_perf_events_attr_group, + &ddr_perf_format_attr_group, + NULL, +}; + +static void ddr_perf_clear_counter(struct ddr_pmu *pmu, int counter) +{ + if (counter == CYCLES_COUNTER) { + writel(0, pmu->base + PMC(counter) + 0x4); + writel(0, pmu->base + PMC(counter)); + } else { + writel(0, pmu->base + PMC(counter)); + } +} + +static u64 ddr_perf_read_counter(struct ddr_pmu *pmu, int counter) +{ + u32 val_lower, val_upper; + u64 val; + + if (counter != CYCLES_COUNTER) { + val = readl_relaxed(pmu->base + PMC(counter)); + goto out; + } + + /* special handling for reading 64bit cycle counter */ + do { + val_upper = readl_relaxed(pmu->base + PMC(counter) + 0x4); + val_lower = readl_relaxed(pmu->base + PMC(counter)); + } while (val_upper != readl_relaxed(pmu->base + PMC(counter) + 0x4)); + + val = val_upper; + val = (val << 32); + val |= val_lower; +out: + return val; +} + +static void ddr_perf_counter_global_config(struct ddr_pmu *pmu, bool enable) +{ + u32 ctrl; + + ctrl = readl_relaxed(pmu->base + PMGC0); + + if (enable) { + /* + * The performance monitor must be reset before event counting + * sequences. The performance monitor can be reset by first freezing + * one or more counters and then clearing the freeze condition to + * allow the counters to count according to the settings in the + * performance monitor registers. Counters can be frozen individually + * by setting PMLCAn[FC] bits, or simultaneously by setting PMGC0[FAC]. + * Simply clearing these freeze bits will then allow the performance + * monitor to begin counting based on the register settings. + */ + ctrl |= PMGC0_FAC; + writel(ctrl, pmu->base + PMGC0); + + /* + * Freeze all counters disabled, interrupt enabled, and freeze + * counters on condition enabled. + */ + ctrl &= ~PMGC0_FAC; + ctrl |= PMGC0_PMIE | PMGC0_FCECE; + writel(ctrl, pmu->base + PMGC0); + } else { + ctrl |= PMGC0_FAC; + ctrl &= ~(PMGC0_PMIE | PMGC0_FCECE); + writel(ctrl, pmu->base + PMGC0); + } +} + +static void ddr_perf_counter_local_config(struct ddr_pmu *pmu, int config, + int counter, bool enable) +{ + u32 ctrl_a; + + ctrl_a = readl_relaxed(pmu->base + PMLCA(counter)); + + if (enable) { + ctrl_a |= PMLCA_FC; + writel(ctrl_a, pmu->base + PMLCA(counter)); + + ddr_perf_clear_counter(pmu, counter); + + /* Freeze counter disabled, condition enabled, and program event.*/ + ctrl_a &= ~PMLCA_FC; + ctrl_a |= PMLCA_CE; + ctrl_a &= ~FIELD_PREP(PMLCA_EVENT, 0x7F); + ctrl_a |= FIELD_PREP(PMLCA_EVENT, (config & 0x000000FF)); + writel(ctrl_a, pmu->base + PMLCA(counter)); + } else { + /* Freeze counter. */ + ctrl_a |= PMLCA_FC; + writel(ctrl_a, pmu->base + PMLCA(counter)); + } +} + +static void ddr_perf_monitor_config(struct ddr_pmu *pmu, int cfg, int cfg1, int cfg2) +{ + u32 pmcfg1, pmcfg2; + int event, counter; + + event = cfg & 0x000000FF; + counter = (cfg & 0x0000FF00) >> 8; + + pmcfg1 = readl_relaxed(pmu->base + PMCFG1); + + if (counter == 2 && event == 73) + pmcfg1 |= PMCFG1_RD_TRANS_FILT_EN; + else if (counter == 2 && event != 73) + pmcfg1 &= ~PMCFG1_RD_TRANS_FILT_EN; + + if (counter == 3 && event == 73) + pmcfg1 |= PMCFG1_WR_TRANS_FILT_EN; + else if (counter == 3 && event != 73) + pmcfg1 &= ~PMCFG1_WR_TRANS_FILT_EN; + + if (counter == 4 && event == 73) + pmcfg1 |= PMCFG1_RD_BT_FILT_EN; + else if (counter == 4 && event != 73) + pmcfg1 &= ~PMCFG1_RD_BT_FILT_EN; + + pmcfg1 &= ~FIELD_PREP(PMCFG1_ID_MASK, 0x3FFFF); + pmcfg1 |= FIELD_PREP(PMCFG1_ID_MASK, cfg2); + writel(pmcfg1, pmu->base + PMCFG1); + + pmcfg2 = readl_relaxed(pmu->base + PMCFG2); + pmcfg2 &= ~FIELD_PREP(PMCFG2_ID, 0x3FFFF); + pmcfg2 |= FIELD_PREP(PMCFG2_ID, cfg1); + writel(pmcfg2, pmu->base + PMCFG2); +} + +static void ddr_perf_event_update(struct perf_event *event) +{ + struct ddr_pmu *pmu = to_ddr_pmu(event->pmu); + struct hw_perf_event *hwc = &event->hw; + int counter = hwc->idx; + u64 new_raw_count; + + new_raw_count = ddr_perf_read_counter(pmu, counter); + local64_add(new_raw_count, &event->count); + + /* clear counter's value every time */ + ddr_perf_clear_counter(pmu, counter); +} + +static int ddr_perf_event_init(struct perf_event *event) +{ + struct ddr_pmu *pmu = to_ddr_pmu(event->pmu); + struct hw_perf_event *hwc = &event->hw; + struct perf_event *sibling; + + if (event->attr.type != event->pmu->type) + return -ENOENT; + + if (is_sampling_event(event) || event->attach_state & PERF_ATTACH_TASK) + return -EOPNOTSUPP; + + if (event->cpu < 0) { + dev_warn(pmu->dev, "Can't provide per-task data!\n"); + return -EOPNOTSUPP; + } + + /* + * We must NOT create groups containing mixed PMUs, although software + * events are acceptable (for example to create a CCN group + * periodically read when a hrtimer aka cpu-clock leader triggers). + */ + if (event->group_leader->pmu != event->pmu && + !is_software_event(event->group_leader)) + return -EINVAL; + + for_each_sibling_event(sibling, event->group_leader) { + if (sibling->pmu != event->pmu && + !is_software_event(sibling)) + return -EINVAL; + } + + event->cpu = pmu->cpu; + hwc->idx = -1; + + return 0; +} + +static void ddr_perf_event_start(struct perf_event *event, int flags) +{ + struct ddr_pmu *pmu = to_ddr_pmu(event->pmu); + struct hw_perf_event *hwc = &event->hw; + int counter = hwc->idx; + + local64_set(&hwc->prev_count, 0); + + ddr_perf_counter_local_config(pmu, event->attr.config, counter, true); + hwc->state = 0; +} + +static int ddr_perf_event_add(struct perf_event *event, int flags) +{ + struct ddr_pmu *pmu = to_ddr_pmu(event->pmu); + struct hw_perf_event *hwc = &event->hw; + int cfg = event->attr.config; + int cfg1 = event->attr.config1; + int cfg2 = event->attr.config2; + int counter; + + counter = (cfg & 0x0000FF00) >> 8; + + pmu->events[counter] = event; + pmu->active_events++; + hwc->idx = counter; + hwc->state |= PERF_HES_STOPPED; + + if (flags & PERF_EF_START) + ddr_perf_event_start(event, flags); + + /* read trans, write trans, read beat */ + ddr_perf_monitor_config(pmu, cfg, cfg1, cfg2); + + return 0; +} + +static void ddr_perf_event_stop(struct perf_event *event, int flags) +{ + struct ddr_pmu *pmu = to_ddr_pmu(event->pmu); + struct hw_perf_event *hwc = &event->hw; + int counter = hwc->idx; + + ddr_perf_counter_local_config(pmu, event->attr.config, counter, false); + ddr_perf_event_update(event); + + hwc->state |= PERF_HES_STOPPED; +} + +static void ddr_perf_event_del(struct perf_event *event, int flags) +{ + struct ddr_pmu *pmu = to_ddr_pmu(event->pmu); + struct hw_perf_event *hwc = &event->hw; + + ddr_perf_event_stop(event, PERF_EF_UPDATE); + + pmu->active_events--; + hwc->idx = -1; +} + +static void ddr_perf_pmu_enable(struct pmu *pmu) +{ + struct ddr_pmu *ddr_pmu = to_ddr_pmu(pmu); + + ddr_perf_counter_global_config(ddr_pmu, true); +} + +static void ddr_perf_pmu_disable(struct pmu *pmu) +{ + struct ddr_pmu *ddr_pmu = to_ddr_pmu(pmu); + + ddr_perf_counter_global_config(ddr_pmu, false); +} + +static void ddr_perf_init(struct ddr_pmu *pmu, void __iomem *base, + struct device *dev) +{ + *pmu = (struct ddr_pmu) { + .pmu = (struct pmu) { + .module = THIS_MODULE, + .capabilities = PERF_PMU_CAP_NO_EXCLUDE, + .task_ctx_nr = perf_invalid_context, + .attr_groups = attr_groups, + .event_init = ddr_perf_event_init, + .add = ddr_perf_event_add, + .del = ddr_perf_event_del, + .start = ddr_perf_event_start, + .stop = ddr_perf_event_stop, + .read = ddr_perf_event_update, + .pmu_enable = ddr_perf_pmu_enable, + .pmu_disable = ddr_perf_pmu_disable, + }, + .base = base, + .dev = dev, + }; +} + +static irqreturn_t ddr_perf_irq_handler(int irq, void *p) +{ + struct ddr_pmu *pmu = (struct ddr_pmu *)p; + struct perf_event *event; + int i; + + /* + * Counters can generate an interrupt on an overflow when msb of a + * counter changes from 0 to 1. For the interrupt to be signalled, + * below condition mush be satisfied: + * PMGC0[PMIE] = 1, PMGC0[FCECE] = 1, PMLCAn[CE] = 1 + * When an interrupt is signalled, PMGC0[FAC] is set by hardware and + * all of the registers are frozen. + * Software can clear the interrupt condition by resetting the performance + * monitor and clearing the most significant bit of the counter that + * generate the overflow. + */ + for (i = 0; i < NUM_COUNTERS; i++) { + if (!pmu->events[i]) + continue; + + event = pmu->events[i]; + + ddr_perf_event_update(event); + } + + ddr_perf_counter_global_config(pmu, true); + + return IRQ_HANDLED; +} + +static int ddr_perf_offline_cpu(unsigned int cpu, struct hlist_node *node) +{ + struct ddr_pmu *pmu = hlist_entry_safe(node, struct ddr_pmu, node); + int target; + + if (cpu != pmu->cpu) + return 0; + + target = cpumask_any_but(cpu_online_mask, cpu); + if (target >= nr_cpu_ids) + return 0; + + perf_pmu_migrate_context(&pmu->pmu, cpu, target); + pmu->cpu = target; + + WARN_ON(irq_set_affinity(pmu->irq, cpumask_of(pmu->cpu))); + + return 0; +} + +static int ddr_perf_probe(struct platform_device *pdev) +{ + struct ddr_pmu *pmu; + void __iomem *base; + int ret, irq; + char *name; + + base = devm_platform_ioremap_resource(pdev, 0); + if (IS_ERR(base)) + return PTR_ERR(base); + + pmu = devm_kzalloc(&pdev->dev, sizeof(*pmu), GFP_KERNEL); + if (!pmu) + return -ENOMEM; + + ddr_perf_init(pmu, base, &pdev->dev); + + pmu->devtype_data = of_device_get_match_data(&pdev->dev); + + platform_set_drvdata(pdev, pmu); + + pmu->id = ida_simple_get(&ddr_ida, 0, 0, GFP_KERNEL); + name = devm_kasprintf(&pdev->dev, GFP_KERNEL, DDR_PERF_DEV_NAME "%d", pmu->id); + if (!name) { + ret = -ENOMEM; + goto format_string_err; + } + + pmu->cpu = raw_smp_processor_id(); + ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN, DDR_CPUHP_CB_NAME, + NULL, ddr_perf_offline_cpu); + if (ret < 0) { + dev_err(&pdev->dev, "Failed to add callbacks for multi state\n"); + goto cpuhp_state_err; + } + pmu->cpuhp_state = ret; + + /* Register the pmu instance for cpu hotplug */ + ret = cpuhp_state_add_instance_nocalls(pmu->cpuhp_state, &pmu->node); + if (ret) { + dev_err(&pdev->dev, "Error %d registering hotplug\n", ret); + goto cpuhp_instance_err; + } + + /* Request irq */ + irq = platform_get_irq(pdev, 0); + if (irq < 0) { + ret = irq; + goto ddr_perf_err; + } + + ret = devm_request_irq(&pdev->dev, irq, ddr_perf_irq_handler, + IRQF_NOBALANCING | IRQF_NO_THREAD, + DDR_CPUHP_CB_NAME, pmu); + if (ret < 0) { + dev_err(&pdev->dev, "Request irq failed: %d", ret); + goto ddr_perf_err; + } + + pmu->irq = irq; + ret = irq_set_affinity(pmu->irq, cpumask_of(pmu->cpu)); + if (ret) { + dev_err(pmu->dev, "Failed to set interrupt affinity\n"); + goto ddr_perf_err; + } + + ret = perf_pmu_register(&pmu->pmu, name, -1); + if (ret) + goto ddr_perf_err; + + return 0; + +ddr_perf_err: + cpuhp_state_remove_instance_nocalls(pmu->cpuhp_state, &pmu->node); +cpuhp_instance_err: + cpuhp_remove_multi_state(pmu->cpuhp_state); +cpuhp_state_err: +format_string_err: + ida_simple_remove(&ddr_ida, pmu->id); + dev_warn(&pdev->dev, "i.MX9 DDR Perf PMU failed (%d), disabled\n", ret); + return ret; +} + +static int ddr_perf_remove(struct platform_device *pdev) +{ + struct ddr_pmu *pmu = platform_get_drvdata(pdev); + + cpuhp_state_remove_instance_nocalls(pmu->cpuhp_state, &pmu->node); + cpuhp_remove_multi_state(pmu->cpuhp_state); + + perf_pmu_unregister(&pmu->pmu); + + ida_simple_remove(&ddr_ida, pmu->id); + + return 0; +} + +static struct platform_driver imx_ddr_pmu_driver = { + .driver = { + .name = "imx9-ddr-pmu", + .of_match_table = imx_ddr_pmu_dt_ids, + .suppress_bind_attrs = true, + }, + .probe = ddr_perf_probe, + .remove = ddr_perf_remove, +}; +module_platform_driver(imx_ddr_pmu_driver); + +MODULE_AUTHOR("Xu Yang <xu.yang_2@nxp.com>"); +MODULE_LICENSE("GPL v2"); +MODULE_DESCRIPTION("DDRC PerfMon for i.MX9 SoCs"); diff --git a/drivers/perf/hisilicon/Makefile b/drivers/perf/hisilicon/Makefile index 4d2c9abe3372..48dcc8381ea7 100644 --- a/drivers/perf/hisilicon/Makefile +++ b/drivers/perf/hisilicon/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0-only obj-$(CONFIG_HISI_PMU) += hisi_uncore_pmu.o hisi_uncore_l3c_pmu.o \ hisi_uncore_hha_pmu.o hisi_uncore_ddrc_pmu.o hisi_uncore_sllc_pmu.o \ - hisi_uncore_pa_pmu.o hisi_uncore_cpa_pmu.o + hisi_uncore_pa_pmu.o hisi_uncore_cpa_pmu.o hisi_uncore_uc_pmu.o obj-$(CONFIG_HISI_PCIE_PMU) += hisi_pcie_pmu.o obj-$(CONFIG_HNS3_PMU) += hns3_pmu.o diff --git a/drivers/perf/hisilicon/hisi_pcie_pmu.c b/drivers/perf/hisilicon/hisi_pcie_pmu.c index 6fee0b6e163b..e10fc7cb9493 100644 --- a/drivers/perf/hisilicon/hisi_pcie_pmu.c +++ b/drivers/perf/hisilicon/hisi_pcie_pmu.c @@ -683,7 +683,7 @@ static int hisi_pcie_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node) pcie_pmu->on_cpu = -1; /* Choose a new CPU from all online cpus. */ - target = cpumask_first(cpu_online_mask); + target = cpumask_any_but(cpu_online_mask, cpu); if (target >= nr_cpu_ids) { pci_err(pcie_pmu->pdev, "There is no CPU to set\n"); return 0; diff --git a/drivers/perf/hisilicon/hisi_uncore_pa_pmu.c b/drivers/perf/hisilicon/hisi_uncore_pa_pmu.c index 71b6687d6696..d941e746b424 100644 --- a/drivers/perf/hisilicon/hisi_uncore_pa_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_pa_pmu.c @@ -22,9 +22,15 @@ #define PA_TT_CTRL 0x1c08 #define PA_TGTID_CTRL 0x1c14 #define PA_SRCID_CTRL 0x1c18 + +/* H32 PA interrupt registers */ #define PA_INT_MASK 0x1c70 #define PA_INT_STATUS 0x1c78 #define PA_INT_CLEAR 0x1c7c + +#define H60PA_INT_STATUS 0x1c70 +#define H60PA_INT_MASK 0x1c74 + #define PA_EVENT_TYPE0 0x1c80 #define PA_PMU_VERSION 0x1cf0 #define PA_EVENT_CNT0_L 0x1d00 @@ -46,6 +52,12 @@ HISI_PMU_EVENT_ATTR_EXTRACTOR(srcid_cmd, config1, 32, 22); HISI_PMU_EVENT_ATTR_EXTRACTOR(srcid_msk, config1, 43, 33); HISI_PMU_EVENT_ATTR_EXTRACTOR(tracetag_en, config1, 44, 44); +struct hisi_pa_pmu_int_regs { + u32 mask_offset; + u32 clear_offset; + u32 status_offset; +}; + static void hisi_pa_pmu_enable_tracetag(struct perf_event *event) { struct hisi_pmu *pa_pmu = to_hisi_pmu(event->pmu); @@ -219,40 +231,40 @@ static void hisi_pa_pmu_disable_counter(struct hisi_pmu *pa_pmu, static void hisi_pa_pmu_enable_counter_int(struct hisi_pmu *pa_pmu, struct hw_perf_event *hwc) { + struct hisi_pa_pmu_int_regs *regs = pa_pmu->dev_info->private; u32 val; /* Write 0 to enable interrupt */ - val = readl(pa_pmu->base + PA_INT_MASK); + val = readl(pa_pmu->base + regs->mask_offset); val &= ~(1 << hwc->idx); - writel(val, pa_pmu->base + PA_INT_MASK); + writel(val, pa_pmu->base + regs->mask_offset); } static void hisi_pa_pmu_disable_counter_int(struct hisi_pmu *pa_pmu, struct hw_perf_event *hwc) { + struct hisi_pa_pmu_int_regs *regs = pa_pmu->dev_info->private; u32 val; /* Write 1 to mask interrupt */ - val = readl(pa_pmu->base + PA_INT_MASK); + val = readl(pa_pmu->base + regs->mask_offset); val |= 1 << hwc->idx; - writel(val, pa_pmu->base + PA_INT_MASK); + writel(val, pa_pmu->base + regs->mask_offset); } static u32 hisi_pa_pmu_get_int_status(struct hisi_pmu *pa_pmu) { - return readl(pa_pmu->base + PA_INT_STATUS); + struct hisi_pa_pmu_int_regs *regs = pa_pmu->dev_info->private; + + return readl(pa_pmu->base + regs->status_offset); } static void hisi_pa_pmu_clear_int_status(struct hisi_pmu *pa_pmu, int idx) { - writel(1 << idx, pa_pmu->base + PA_INT_CLEAR); -} + struct hisi_pa_pmu_int_regs *regs = pa_pmu->dev_info->private; -static const struct acpi_device_id hisi_pa_pmu_acpi_match[] = { - { "HISI0273", }, - {} -}; -MODULE_DEVICE_TABLE(acpi, hisi_pa_pmu_acpi_match); + writel(1 << idx, pa_pmu->base + regs->clear_offset); +} static int hisi_pa_pmu_init_data(struct platform_device *pdev, struct hisi_pmu *pa_pmu) @@ -276,6 +288,10 @@ static int hisi_pa_pmu_init_data(struct platform_device *pdev, pa_pmu->ccl_id = -1; pa_pmu->sccl_id = -1; + pa_pmu->dev_info = device_get_match_data(&pdev->dev); + if (!pa_pmu->dev_info) + return -ENODEV; + pa_pmu->base = devm_platform_ioremap_resource(pdev, 0); if (IS_ERR(pa_pmu->base)) { dev_err(&pdev->dev, "ioremap failed for pa_pmu resource.\n"); @@ -314,6 +330,32 @@ static const struct attribute_group hisi_pa_pmu_v2_events_group = { .attrs = hisi_pa_pmu_v2_events_attr, }; +static struct attribute *hisi_pa_pmu_v3_events_attr[] = { + HISI_PMU_EVENT_ATTR(tx_req, 0x0), + HISI_PMU_EVENT_ATTR(tx_dat, 0x1), + HISI_PMU_EVENT_ATTR(tx_snp, 0x2), + HISI_PMU_EVENT_ATTR(rx_req, 0x7), + HISI_PMU_EVENT_ATTR(rx_dat, 0x8), + HISI_PMU_EVENT_ATTR(rx_snp, 0x9), + NULL +}; + +static const struct attribute_group hisi_pa_pmu_v3_events_group = { + .name = "events", + .attrs = hisi_pa_pmu_v3_events_attr, +}; + +static struct attribute *hisi_h60pa_pmu_events_attr[] = { + HISI_PMU_EVENT_ATTR(rx_flit, 0x50), + HISI_PMU_EVENT_ATTR(tx_flit, 0x65), + NULL +}; + +static const struct attribute_group hisi_h60pa_pmu_events_group = { + .name = "events", + .attrs = hisi_h60pa_pmu_events_attr, +}; + static DEVICE_ATTR(cpumask, 0444, hisi_cpumask_sysfs_show, NULL); static struct attribute *hisi_pa_pmu_cpumask_attrs[] = { @@ -337,6 +379,12 @@ static const struct attribute_group hisi_pa_pmu_identifier_group = { .attrs = hisi_pa_pmu_identifier_attrs, }; +static struct hisi_pa_pmu_int_regs hisi_pa_pmu_regs = { + .mask_offset = PA_INT_MASK, + .clear_offset = PA_INT_CLEAR, + .status_offset = PA_INT_STATUS, +}; + static const struct attribute_group *hisi_pa_pmu_v2_attr_groups[] = { &hisi_pa_pmu_v2_format_group, &hisi_pa_pmu_v2_events_group, @@ -345,6 +393,46 @@ static const struct attribute_group *hisi_pa_pmu_v2_attr_groups[] = { NULL }; +static const struct hisi_pmu_dev_info hisi_h32pa_v2 = { + .name = "pa", + .attr_groups = hisi_pa_pmu_v2_attr_groups, + .private = &hisi_pa_pmu_regs, +}; + +static const struct attribute_group *hisi_pa_pmu_v3_attr_groups[] = { + &hisi_pa_pmu_v2_format_group, + &hisi_pa_pmu_v3_events_group, + &hisi_pa_pmu_cpumask_attr_group, + &hisi_pa_pmu_identifier_group, + NULL +}; + +static const struct hisi_pmu_dev_info hisi_h32pa_v3 = { + .name = "pa", + .attr_groups = hisi_pa_pmu_v3_attr_groups, + .private = &hisi_pa_pmu_regs, +}; + +static struct hisi_pa_pmu_int_regs hisi_h60pa_pmu_regs = { + .mask_offset = H60PA_INT_MASK, + .clear_offset = H60PA_INT_STATUS, /* Clear on write */ + .status_offset = H60PA_INT_STATUS, +}; + +static const struct attribute_group *hisi_h60pa_pmu_attr_groups[] = { + &hisi_pa_pmu_v2_format_group, + &hisi_h60pa_pmu_events_group, + &hisi_pa_pmu_cpumask_attr_group, + &hisi_pa_pmu_identifier_group, + NULL +}; + +static const struct hisi_pmu_dev_info hisi_h60pa = { + .name = "h60pa", + .attr_groups = hisi_h60pa_pmu_attr_groups, + .private = &hisi_h60pa_pmu_regs, +}; + static const struct hisi_uncore_ops hisi_uncore_pa_ops = { .write_evtype = hisi_pa_pmu_write_evtype, .get_event_idx = hisi_uncore_pmu_get_event_idx, @@ -375,7 +463,7 @@ static int hisi_pa_pmu_dev_probe(struct platform_device *pdev, if (ret) return ret; - pa_pmu->pmu_events.attr_groups = hisi_pa_pmu_v2_attr_groups; + pa_pmu->pmu_events.attr_groups = pa_pmu->dev_info->attr_groups; pa_pmu->num_counters = PA_NR_COUNTERS; pa_pmu->ops = &hisi_uncore_pa_ops; pa_pmu->check_event = 0xB0; @@ -400,8 +488,9 @@ static int hisi_pa_pmu_probe(struct platform_device *pdev) if (ret) return ret; - name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "hisi_sicl%u_pa%u", - pa_pmu->sicl_id, pa_pmu->index_id); + name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "hisi_sicl%d_%s%u", + pa_pmu->sicl_id, pa_pmu->dev_info->name, + pa_pmu->index_id); if (!name) return -ENOMEM; @@ -435,6 +524,14 @@ static int hisi_pa_pmu_remove(struct platform_device *pdev) return 0; } +static const struct acpi_device_id hisi_pa_pmu_acpi_match[] = { + { "HISI0273", (kernel_ulong_t)&hisi_h32pa_v2 }, + { "HISI0275", (kernel_ulong_t)&hisi_h32pa_v3 }, + { "HISI0274", (kernel_ulong_t)&hisi_h60pa }, + {} +}; +MODULE_DEVICE_TABLE(acpi, hisi_pa_pmu_acpi_match); + static struct platform_driver hisi_pa_pmu_driver = { .driver = { .name = "hisi_pa_pmu", diff --git a/drivers/perf/hisilicon/hisi_uncore_pmu.c b/drivers/perf/hisilicon/hisi_uncore_pmu.c index 2823f381930d..04031450d5fe 100644 --- a/drivers/perf/hisilicon/hisi_uncore_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_pmu.c @@ -20,7 +20,6 @@ #include "hisi_uncore_pmu.h" -#define HISI_GET_EVENTID(ev) (ev->hw.config_base & 0xff) #define HISI_MAX_PERIOD(nr) (GENMASK_ULL((nr) - 1, 0)) /* @@ -226,6 +225,9 @@ int hisi_uncore_pmu_event_init(struct perf_event *event) hwc->idx = -1; hwc->config_base = event->attr.config; + if (hisi_pmu->ops->check_filter && hisi_pmu->ops->check_filter(event)) + return -EINVAL; + /* Enforce to use the same CPU for all events in this PMU */ event->cpu = hisi_pmu->on_cpu; diff --git a/drivers/perf/hisilicon/hisi_uncore_pmu.h b/drivers/perf/hisilicon/hisi_uncore_pmu.h index 07890a8e96ca..92402aa69d70 100644 --- a/drivers/perf/hisilicon/hisi_uncore_pmu.h +++ b/drivers/perf/hisilicon/hisi_uncore_pmu.h @@ -43,9 +43,15 @@ return FIELD_GET(GENMASK_ULL(hi, lo), event->attr.config); \ } +#define HISI_GET_EVENTID(ev) (ev->hw.config_base & 0xff) + +#define HISI_PMU_EVTYPE_BITS 8 +#define HISI_PMU_EVTYPE_SHIFT(idx) ((idx) % 4 * HISI_PMU_EVTYPE_BITS) + struct hisi_pmu; struct hisi_uncore_ops { + int (*check_filter)(struct perf_event *event); void (*write_evtype)(struct hisi_pmu *, int, u32); int (*get_event_idx)(struct perf_event *); u64 (*read_counter)(struct hisi_pmu *, struct hw_perf_event *); @@ -62,6 +68,13 @@ struct hisi_uncore_ops { void (*disable_filter)(struct perf_event *event); }; +/* Describes the HISI PMU chip features information */ +struct hisi_pmu_dev_info { + const char *name; + const struct attribute_group **attr_groups; + void *private; +}; + struct hisi_pmu_hwevents { struct perf_event *hw_events[HISI_MAX_COUNTERS]; DECLARE_BITMAP(used_mask, HISI_MAX_COUNTERS); @@ -72,6 +85,7 @@ struct hisi_pmu_hwevents { struct hisi_pmu { struct pmu pmu; const struct hisi_uncore_ops *ops; + const struct hisi_pmu_dev_info *dev_info; struct hisi_pmu_hwevents pmu_events; /* associated_cpus: All CPUs associated with the PMU */ cpumask_t associated_cpus; diff --git a/drivers/perf/hisilicon/hisi_uncore_uc_pmu.c b/drivers/perf/hisilicon/hisi_uncore_uc_pmu.c new file mode 100644 index 000000000000..63da05e5831c --- /dev/null +++ b/drivers/perf/hisilicon/hisi_uncore_uc_pmu.c @@ -0,0 +1,578 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * HiSilicon SoC UC (unified cache) uncore Hardware event counters support + * + * Copyright (C) 2023 HiSilicon Limited + * + * This code is based on the uncore PMUs like hisi_uncore_l3c_pmu. + */ +#include <linux/cpuhotplug.h> +#include <linux/interrupt.h> +#include <linux/irq.h> +#include <linux/list.h> +#include <linux/mod_devicetable.h> +#include <linux/property.h> + +#include "hisi_uncore_pmu.h" + +/* Dynamic CPU hotplug state used by UC PMU */ +static enum cpuhp_state hisi_uc_pmu_online; + +/* UC register definition */ +#define HISI_UC_INT_MASK_REG 0x0800 +#define HISI_UC_INT_STS_REG 0x0808 +#define HISI_UC_INT_CLEAR_REG 0x080c +#define HISI_UC_TRACETAG_CTRL_REG 0x1b2c +#define HISI_UC_TRACETAG_REQ_MSK GENMASK(9, 7) +#define HISI_UC_TRACETAG_MARK_EN BIT(0) +#define HISI_UC_TRACETAG_REQ_EN (HISI_UC_TRACETAG_MARK_EN | BIT(2)) +#define HISI_UC_TRACETAG_SRCID_EN BIT(3) +#define HISI_UC_SRCID_CTRL_REG 0x1b40 +#define HISI_UC_SRCID_MSK GENMASK(14, 1) +#define HISI_UC_EVENT_CTRL_REG 0x1c00 +#define HISI_UC_EVENT_TRACETAG_EN BIT(29) +#define HISI_UC_EVENT_URING_MSK GENMASK(28, 27) +#define HISI_UC_EVENT_GLB_EN BIT(26) +#define HISI_UC_VERSION_REG 0x1cf0 +#define HISI_UC_EVTYPE_REGn(n) (0x1d00 + (n) * 4) +#define HISI_UC_EVTYPE_MASK GENMASK(7, 0) +#define HISI_UC_CNTR_REGn(n) (0x1e00 + (n) * 8) + +#define HISI_UC_NR_COUNTERS 0x8 +#define HISI_UC_V2_NR_EVENTS 0xFF +#define HISI_UC_CNTR_REG_BITS 64 + +#define HISI_UC_RD_REQ_TRACETAG 0x4 +#define HISI_UC_URING_EVENT_MIN 0x47 +#define HISI_UC_URING_EVENT_MAX 0x59 + +HISI_PMU_EVENT_ATTR_EXTRACTOR(rd_req_en, config1, 0, 0); +HISI_PMU_EVENT_ATTR_EXTRACTOR(uring_channel, config1, 5, 4); +HISI_PMU_EVENT_ATTR_EXTRACTOR(srcid, config1, 19, 6); +HISI_PMU_EVENT_ATTR_EXTRACTOR(srcid_en, config1, 20, 20); + +static int hisi_uc_pmu_check_filter(struct perf_event *event) +{ + struct hisi_pmu *uc_pmu = to_hisi_pmu(event->pmu); + + if (hisi_get_srcid_en(event) && !hisi_get_rd_req_en(event)) { + dev_err(uc_pmu->dev, + "rcid_en depends on rd_req_en being enabled!\n"); + return -EINVAL; + } + + if (!hisi_get_uring_channel(event)) + return 0; + + if ((HISI_GET_EVENTID(event) < HISI_UC_URING_EVENT_MIN) || + (HISI_GET_EVENTID(event) > HISI_UC_URING_EVENT_MAX)) + dev_warn(uc_pmu->dev, + "Only events: [%#x ~ %#x] support channel filtering!", + HISI_UC_URING_EVENT_MIN, HISI_UC_URING_EVENT_MAX); + + return 0; +} + +static void hisi_uc_pmu_config_req_tracetag(struct perf_event *event) +{ + struct hisi_pmu *uc_pmu = to_hisi_pmu(event->pmu); + u32 val; + + if (!hisi_get_rd_req_en(event)) + return; + + val = readl(uc_pmu->base + HISI_UC_TRACETAG_CTRL_REG); + + /* The request-type has been configured */ + if (FIELD_GET(HISI_UC_TRACETAG_REQ_MSK, val) == HISI_UC_RD_REQ_TRACETAG) + return; + + /* Set request-type for tracetag, only read request is supported! */ + val &= ~HISI_UC_TRACETAG_REQ_MSK; + val |= FIELD_PREP(HISI_UC_TRACETAG_REQ_MSK, HISI_UC_RD_REQ_TRACETAG); + val |= HISI_UC_TRACETAG_REQ_EN; + writel(val, uc_pmu->base + HISI_UC_TRACETAG_CTRL_REG); +} + +static void hisi_uc_pmu_clear_req_tracetag(struct perf_event *event) +{ + struct hisi_pmu *uc_pmu = to_hisi_pmu(event->pmu); + u32 val; + + if (!hisi_get_rd_req_en(event)) + return; + + val = readl(uc_pmu->base + HISI_UC_TRACETAG_CTRL_REG); + + /* Do nothing, the request-type tracetag has been cleaned up */ + if (FIELD_GET(HISI_UC_TRACETAG_REQ_MSK, val) == 0) + return; + + /* Clear request-type */ + val &= ~HISI_UC_TRACETAG_REQ_MSK; + val &= ~HISI_UC_TRACETAG_REQ_EN; + writel(val, uc_pmu->base + HISI_UC_TRACETAG_CTRL_REG); +} + +static void hisi_uc_pmu_config_srcid_tracetag(struct perf_event *event) +{ + struct hisi_pmu *uc_pmu = to_hisi_pmu(event->pmu); + u32 val; + + if (!hisi_get_srcid_en(event)) + return; + + val = readl(uc_pmu->base + HISI_UC_TRACETAG_CTRL_REG); + + /* Do nothing, the source id has been configured */ + if (FIELD_GET(HISI_UC_TRACETAG_SRCID_EN, val)) + return; + + /* Enable source id tracetag */ + val |= HISI_UC_TRACETAG_SRCID_EN; + writel(val, uc_pmu->base + HISI_UC_TRACETAG_CTRL_REG); + + val = readl(uc_pmu->base + HISI_UC_SRCID_CTRL_REG); + val &= ~HISI_UC_SRCID_MSK; + val |= FIELD_PREP(HISI_UC_SRCID_MSK, hisi_get_srcid(event)); + writel(val, uc_pmu->base + HISI_UC_SRCID_CTRL_REG); + + /* Depend on request-type tracetag enabled */ + hisi_uc_pmu_config_req_tracetag(event); +} + +static void hisi_uc_pmu_clear_srcid_tracetag(struct perf_event *event) +{ + struct hisi_pmu *uc_pmu = to_hisi_pmu(event->pmu); + u32 val; + + if (!hisi_get_srcid_en(event)) + return; + + val = readl(uc_pmu->base + HISI_UC_TRACETAG_CTRL_REG); + + /* Do nothing, the source id has been cleaned up */ + if (FIELD_GET(HISI_UC_TRACETAG_SRCID_EN, val) == 0) + return; + + hisi_uc_pmu_clear_req_tracetag(event); + + /* Disable source id tracetag */ + val &= ~HISI_UC_TRACETAG_SRCID_EN; + writel(val, uc_pmu->base + HISI_UC_TRACETAG_CTRL_REG); + + val = readl(uc_pmu->base + HISI_UC_SRCID_CTRL_REG); + val &= ~HISI_UC_SRCID_MSK; + writel(val, uc_pmu->base + HISI_UC_SRCID_CTRL_REG); +} + +static void hisi_uc_pmu_config_uring_channel(struct perf_event *event) +{ + struct hisi_pmu *uc_pmu = to_hisi_pmu(event->pmu); + u32 uring_channel = hisi_get_uring_channel(event); + u32 val; + + /* Do nothing if not being set or is set explicitly to zero (default) */ + if (uring_channel == 0) + return; + + val = readl(uc_pmu->base + HISI_UC_EVENT_CTRL_REG); + + /* Do nothing, the uring_channel has been configured */ + if (uring_channel == FIELD_GET(HISI_UC_EVENT_URING_MSK, val)) + return; + + val &= ~HISI_UC_EVENT_URING_MSK; + val |= FIELD_PREP(HISI_UC_EVENT_URING_MSK, uring_channel); + writel(val, uc_pmu->base + HISI_UC_EVENT_CTRL_REG); +} + +static void hisi_uc_pmu_clear_uring_channel(struct perf_event *event) +{ + struct hisi_pmu *uc_pmu = to_hisi_pmu(event->pmu); + u32 val; + + /* Do nothing if not being set or is set explicitly to zero (default) */ + if (hisi_get_uring_channel(event) == 0) + return; + + val = readl(uc_pmu->base + HISI_UC_EVENT_CTRL_REG); + + /* Do nothing, the uring_channel has been cleaned up */ + if (FIELD_GET(HISI_UC_EVENT_URING_MSK, val) == 0) + return; + + val &= ~HISI_UC_EVENT_URING_MSK; + writel(val, uc_pmu->base + HISI_UC_EVENT_CTRL_REG); +} + +static void hisi_uc_pmu_enable_filter(struct perf_event *event) +{ + if (event->attr.config1 == 0) + return; + + hisi_uc_pmu_config_uring_channel(event); + hisi_uc_pmu_config_req_tracetag(event); + hisi_uc_pmu_config_srcid_tracetag(event); +} + +static void hisi_uc_pmu_disable_filter(struct perf_event *event) +{ + if (event->attr.config1 == 0) + return; + + hisi_uc_pmu_clear_srcid_tracetag(event); + hisi_uc_pmu_clear_req_tracetag(event); + hisi_uc_pmu_clear_uring_channel(event); +} + +static void hisi_uc_pmu_write_evtype(struct hisi_pmu *uc_pmu, int idx, u32 type) +{ + u32 val; + + /* + * Select the appropriate event select register. + * There are 2 32-bit event select registers for the + * 8 hardware counters, each event code is 8-bit wide. + */ + val = readl(uc_pmu->base + HISI_UC_EVTYPE_REGn(idx / 4)); + val &= ~(HISI_UC_EVTYPE_MASK << HISI_PMU_EVTYPE_SHIFT(idx)); + val |= (type << HISI_PMU_EVTYPE_SHIFT(idx)); + writel(val, uc_pmu->base + HISI_UC_EVTYPE_REGn(idx / 4)); +} + +static void hisi_uc_pmu_start_counters(struct hisi_pmu *uc_pmu) +{ + u32 val; + + val = readl(uc_pmu->base + HISI_UC_EVENT_CTRL_REG); + val |= HISI_UC_EVENT_GLB_EN; + writel(val, uc_pmu->base + HISI_UC_EVENT_CTRL_REG); +} + +static void hisi_uc_pmu_stop_counters(struct hisi_pmu *uc_pmu) +{ + u32 val; + + val = readl(uc_pmu->base + HISI_UC_EVENT_CTRL_REG); + val &= ~HISI_UC_EVENT_GLB_EN; + writel(val, uc_pmu->base + HISI_UC_EVENT_CTRL_REG); +} + +static void hisi_uc_pmu_enable_counter(struct hisi_pmu *uc_pmu, + struct hw_perf_event *hwc) +{ + u32 val; + + /* Enable counter index */ + val = readl(uc_pmu->base + HISI_UC_EVENT_CTRL_REG); + val |= (1 << hwc->idx); + writel(val, uc_pmu->base + HISI_UC_EVENT_CTRL_REG); +} + +static void hisi_uc_pmu_disable_counter(struct hisi_pmu *uc_pmu, + struct hw_perf_event *hwc) +{ + u32 val; + + /* Clear counter index */ + val = readl(uc_pmu->base + HISI_UC_EVENT_CTRL_REG); + val &= ~(1 << hwc->idx); + writel(val, uc_pmu->base + HISI_UC_EVENT_CTRL_REG); +} + +static u64 hisi_uc_pmu_read_counter(struct hisi_pmu *uc_pmu, + struct hw_perf_event *hwc) +{ + return readq(uc_pmu->base + HISI_UC_CNTR_REGn(hwc->idx)); +} + +static void hisi_uc_pmu_write_counter(struct hisi_pmu *uc_pmu, + struct hw_perf_event *hwc, u64 val) +{ + writeq(val, uc_pmu->base + HISI_UC_CNTR_REGn(hwc->idx)); +} + +static void hisi_uc_pmu_enable_counter_int(struct hisi_pmu *uc_pmu, + struct hw_perf_event *hwc) +{ + u32 val; + + val = readl(uc_pmu->base + HISI_UC_INT_MASK_REG); + val &= ~(1 << hwc->idx); + writel(val, uc_pmu->base + HISI_UC_INT_MASK_REG); +} + +static void hisi_uc_pmu_disable_counter_int(struct hisi_pmu *uc_pmu, + struct hw_perf_event *hwc) +{ + u32 val; + + val = readl(uc_pmu->base + HISI_UC_INT_MASK_REG); + val |= (1 << hwc->idx); + writel(val, uc_pmu->base + HISI_UC_INT_MASK_REG); +} + +static u32 hisi_uc_pmu_get_int_status(struct hisi_pmu *uc_pmu) +{ + return readl(uc_pmu->base + HISI_UC_INT_STS_REG); +} + +static void hisi_uc_pmu_clear_int_status(struct hisi_pmu *uc_pmu, int idx) +{ + writel(1 << idx, uc_pmu->base + HISI_UC_INT_CLEAR_REG); +} + +static int hisi_uc_pmu_init_data(struct platform_device *pdev, + struct hisi_pmu *uc_pmu) +{ + /* + * Use SCCL (Super CPU Cluster) ID and CCL (CPU Cluster) ID to + * identify the topology information of UC PMU devices in the chip. + * They have some CCLs per SCCL and then 4 UC PMU per CCL. + */ + if (device_property_read_u32(&pdev->dev, "hisilicon,scl-id", + &uc_pmu->sccl_id)) { + dev_err(&pdev->dev, "Can not read uc sccl-id!\n"); + return -EINVAL; + } + + if (device_property_read_u32(&pdev->dev, "hisilicon,ccl-id", + &uc_pmu->ccl_id)) { + dev_err(&pdev->dev, "Can not read uc ccl-id!\n"); + return -EINVAL; + } + + if (device_property_read_u32(&pdev->dev, "hisilicon,sub-id", + &uc_pmu->sub_id)) { + dev_err(&pdev->dev, "Can not read sub-id!\n"); + return -EINVAL; + } + + uc_pmu->base = devm_platform_ioremap_resource(pdev, 0); + if (IS_ERR(uc_pmu->base)) { + dev_err(&pdev->dev, "ioremap failed for uc_pmu resource\n"); + return PTR_ERR(uc_pmu->base); + } + + uc_pmu->identifier = readl(uc_pmu->base + HISI_UC_VERSION_REG); + + return 0; +} + +static struct attribute *hisi_uc_pmu_format_attr[] = { + HISI_PMU_FORMAT_ATTR(event, "config:0-7"), + HISI_PMU_FORMAT_ATTR(rd_req_en, "config1:0-0"), + HISI_PMU_FORMAT_ATTR(uring_channel, "config1:4-5"), + HISI_PMU_FORMAT_ATTR(srcid, "config1:6-19"), + HISI_PMU_FORMAT_ATTR(srcid_en, "config1:20-20"), + NULL +}; + +static const struct attribute_group hisi_uc_pmu_format_group = { + .name = "format", + .attrs = hisi_uc_pmu_format_attr, +}; + +static struct attribute *hisi_uc_pmu_events_attr[] = { + HISI_PMU_EVENT_ATTR(sq_time, 0x00), + HISI_PMU_EVENT_ATTR(pq_time, 0x01), + HISI_PMU_EVENT_ATTR(hbm_time, 0x02), + HISI_PMU_EVENT_ATTR(iq_comp_time_cring, 0x03), + HISI_PMU_EVENT_ATTR(iq_comp_time_uring, 0x05), + HISI_PMU_EVENT_ATTR(cpu_rd, 0x10), + HISI_PMU_EVENT_ATTR(cpu_rd64, 0x17), + HISI_PMU_EVENT_ATTR(cpu_rs64, 0x19), + HISI_PMU_EVENT_ATTR(cpu_mru, 0x1a), + HISI_PMU_EVENT_ATTR(cycles, 0x9c), + HISI_PMU_EVENT_ATTR(spipe_hit, 0xb3), + HISI_PMU_EVENT_ATTR(hpipe_hit, 0xdb), + HISI_PMU_EVENT_ATTR(cring_rxdat_cnt, 0xfa), + HISI_PMU_EVENT_ATTR(cring_txdat_cnt, 0xfb), + HISI_PMU_EVENT_ATTR(uring_rxdat_cnt, 0xfc), + HISI_PMU_EVENT_ATTR(uring_txdat_cnt, 0xfd), + NULL +}; + +static const struct attribute_group hisi_uc_pmu_events_group = { + .name = "events", + .attrs = hisi_uc_pmu_events_attr, +}; + +static DEVICE_ATTR(cpumask, 0444, hisi_cpumask_sysfs_show, NULL); + +static struct attribute *hisi_uc_pmu_cpumask_attrs[] = { + &dev_attr_cpumask.attr, + NULL, +}; + +static const struct attribute_group hisi_uc_pmu_cpumask_attr_group = { + .attrs = hisi_uc_pmu_cpumask_attrs, +}; + +static struct device_attribute hisi_uc_pmu_identifier_attr = + __ATTR(identifier, 0444, hisi_uncore_pmu_identifier_attr_show, NULL); + +static struct attribute *hisi_uc_pmu_identifier_attrs[] = { + &hisi_uc_pmu_identifier_attr.attr, + NULL +}; + +static const struct attribute_group hisi_uc_pmu_identifier_group = { + .attrs = hisi_uc_pmu_identifier_attrs, +}; + +static const struct attribute_group *hisi_uc_pmu_attr_groups[] = { + &hisi_uc_pmu_format_group, + &hisi_uc_pmu_events_group, + &hisi_uc_pmu_cpumask_attr_group, + &hisi_uc_pmu_identifier_group, + NULL +}; + +static const struct hisi_uncore_ops hisi_uncore_uc_pmu_ops = { + .check_filter = hisi_uc_pmu_check_filter, + .write_evtype = hisi_uc_pmu_write_evtype, + .get_event_idx = hisi_uncore_pmu_get_event_idx, + .start_counters = hisi_uc_pmu_start_counters, + .stop_counters = hisi_uc_pmu_stop_counters, + .enable_counter = hisi_uc_pmu_enable_counter, + .disable_counter = hisi_uc_pmu_disable_counter, + .enable_counter_int = hisi_uc_pmu_enable_counter_int, + .disable_counter_int = hisi_uc_pmu_disable_counter_int, + .write_counter = hisi_uc_pmu_write_counter, + .read_counter = hisi_uc_pmu_read_counter, + .get_int_status = hisi_uc_pmu_get_int_status, + .clear_int_status = hisi_uc_pmu_clear_int_status, + .enable_filter = hisi_uc_pmu_enable_filter, + .disable_filter = hisi_uc_pmu_disable_filter, +}; + +static int hisi_uc_pmu_dev_probe(struct platform_device *pdev, + struct hisi_pmu *uc_pmu) +{ + int ret; + + ret = hisi_uc_pmu_init_data(pdev, uc_pmu); + if (ret) + return ret; + + ret = hisi_uncore_pmu_init_irq(uc_pmu, pdev); + if (ret) + return ret; + + uc_pmu->pmu_events.attr_groups = hisi_uc_pmu_attr_groups; + uc_pmu->check_event = HISI_UC_EVTYPE_MASK; + uc_pmu->ops = &hisi_uncore_uc_pmu_ops; + uc_pmu->counter_bits = HISI_UC_CNTR_REG_BITS; + uc_pmu->num_counters = HISI_UC_NR_COUNTERS; + uc_pmu->dev = &pdev->dev; + uc_pmu->on_cpu = -1; + + return 0; +} + +static void hisi_uc_pmu_remove_cpuhp_instance(void *hotplug_node) +{ + cpuhp_state_remove_instance_nocalls(hisi_uc_pmu_online, hotplug_node); +} + +static void hisi_uc_pmu_unregister_pmu(void *pmu) +{ + perf_pmu_unregister(pmu); +} + +static int hisi_uc_pmu_probe(struct platform_device *pdev) +{ + struct hisi_pmu *uc_pmu; + char *name; + int ret; + + uc_pmu = devm_kzalloc(&pdev->dev, sizeof(*uc_pmu), GFP_KERNEL); + if (!uc_pmu) + return -ENOMEM; + + platform_set_drvdata(pdev, uc_pmu); + + ret = hisi_uc_pmu_dev_probe(pdev, uc_pmu); + if (ret) + return ret; + + name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "hisi_sccl%d_uc%d_%u", + uc_pmu->sccl_id, uc_pmu->ccl_id, uc_pmu->sub_id); + if (!name) + return -ENOMEM; + + ret = cpuhp_state_add_instance(hisi_uc_pmu_online, &uc_pmu->node); + if (ret) + return dev_err_probe(&pdev->dev, ret, "Error registering hotplug\n"); + + ret = devm_add_action_or_reset(&pdev->dev, + hisi_uc_pmu_remove_cpuhp_instance, + &uc_pmu->node); + if (ret) + return ret; + + hisi_pmu_init(uc_pmu, THIS_MODULE); + + ret = perf_pmu_register(&uc_pmu->pmu, name, -1); + if (ret) + return ret; + + return devm_add_action_or_reset(&pdev->dev, + hisi_uc_pmu_unregister_pmu, + &uc_pmu->pmu); +} + +static const struct acpi_device_id hisi_uc_pmu_acpi_match[] = { + { "HISI0291", }, + {} +}; +MODULE_DEVICE_TABLE(acpi, hisi_uc_pmu_acpi_match); + +static struct platform_driver hisi_uc_pmu_driver = { + .driver = { + .name = "hisi_uc_pmu", + .acpi_match_table = hisi_uc_pmu_acpi_match, + /* + * We have not worked out a safe bind/unbind process, + * Forcefully unbinding during sampling will lead to a + * kernel panic, so this is not supported yet. + */ + .suppress_bind_attrs = true, + }, + .probe = hisi_uc_pmu_probe, +}; + +static int __init hisi_uc_pmu_module_init(void) +{ + int ret; + + ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN, + "perf/hisi/uc:online", + hisi_uncore_pmu_online_cpu, + hisi_uncore_pmu_offline_cpu); + if (ret < 0) { + pr_err("UC PMU: Error setup hotplug, ret = %d\n", ret); + return ret; + } + hisi_uc_pmu_online = ret; + + ret = platform_driver_register(&hisi_uc_pmu_driver); + if (ret) + cpuhp_remove_multi_state(hisi_uc_pmu_online); + + return ret; +} +module_init(hisi_uc_pmu_module_init); + +static void __exit hisi_uc_pmu_module_exit(void) +{ + platform_driver_unregister(&hisi_uc_pmu_driver); + cpuhp_remove_multi_state(hisi_uc_pmu_online); +} +module_exit(hisi_uc_pmu_module_exit); + +MODULE_DESCRIPTION("HiSilicon SoC UC uncore PMU driver"); +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Junhao He <hejunhao3@huawei.com>"); diff --git a/drivers/perf/qcom_l2_pmu.c b/drivers/perf/qcom_l2_pmu.c index aaca6db7d8f6..3f9a98c17a89 100644 --- a/drivers/perf/qcom_l2_pmu.c +++ b/drivers/perf/qcom_l2_pmu.c @@ -857,7 +857,6 @@ static int l2_cache_pmu_probe_cluster(struct device *dev, void *data) return -ENOMEM; INIT_LIST_HEAD(&cluster->next); - list_add(&cluster->next, &l2cache_pmu->clusters); cluster->cluster_id = fw_cluster_id; irq = platform_get_irq(sdev, 0); @@ -883,6 +882,7 @@ static int l2_cache_pmu_probe_cluster(struct device *dev, void *data) spin_lock_init(&cluster->pmu_lock); + list_add(&cluster->next, &l2cache_pmu->clusters); l2cache_pmu->num_pmus++; return 0; diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 7b71dd74baeb..5ef126a0a50f 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -1507,6 +1507,12 @@ static inline int find_acpi_cpu_topology_hetero_id(unsigned int cpu) } #endif +#ifdef CONFIG_ARM64 +void acpi_arm_init(void); +#else +static inline void acpi_arm_init(void) { } +#endif + #ifdef CONFIG_ACPI_PCC void acpi_init_pcc(void); #else diff --git a/include/linux/acpi_agdi.h b/include/linux/acpi_agdi.h deleted file mode 100644 index f477f0b452fa..000000000000 --- a/include/linux/acpi_agdi.h +++ /dev/null @@ -1,13 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ - -#ifndef __ACPI_AGDI_H__ -#define __ACPI_AGDI_H__ - -#include <linux/acpi.h> - -#ifdef CONFIG_ACPI_AGDI -void __init acpi_agdi_init(void); -#else -static inline void acpi_agdi_init(void) {} -#endif -#endif /* __ACPI_AGDI_H__ */ diff --git a/include/linux/acpi_apmt.h b/include/linux/acpi_apmt.h deleted file mode 100644 index 40bd634d082f..000000000000 --- a/include/linux/acpi_apmt.h +++ /dev/null @@ -1,19 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 - * - * ARM CoreSight PMU driver. - * Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. - * - */ - -#ifndef __ACPI_APMT_H__ -#define __ACPI_APMT_H__ - -#include <linux/acpi.h> - -#ifdef CONFIG_ACPI_APMT -void acpi_apmt_init(void); -#else -static inline void acpi_apmt_init(void) { } -#endif /* CONFIG_ACPI_APMT */ - -#endif /* __ACPI_APMT_H__ */ diff --git a/include/linux/acpi_iort.h b/include/linux/acpi_iort.h index 6b70d02bc5f9..ee7cb6aaff71 100644 --- a/include/linux/acpi_iort.h +++ b/include/linux/acpi_iort.h @@ -29,7 +29,6 @@ struct fwnode_handle *iort_find_domain_token(int trans_id); int iort_pmsi_get_dev_id(struct device *dev, u32 *dev_id); #ifdef CONFIG_ACPI_IORT -void acpi_iort_init(void); u32 iort_msi_map_id(struct device *dev, u32 id); struct irq_domain *iort_get_device_domain(struct device *dev, u32 id, enum irq_domain_bus_token bus_token); @@ -44,7 +43,6 @@ int iort_iommu_configure_id(struct device *dev, const u32 *id_in); void iort_iommu_get_resv_regions(struct device *dev, struct list_head *head); phys_addr_t acpi_iort_dma_get_max_cpu_address(void); #else -static inline void acpi_iort_init(void) { } static inline u32 iort_msi_map_id(struct device *dev, u32 id) { return id; } static inline struct irq_domain *iort_get_device_domain( diff --git a/include/linux/perf/arm_pmu.h b/include/linux/perf/arm_pmu.h index 525b5d64e394..c0e4baf940dc 100644 --- a/include/linux/perf/arm_pmu.h +++ b/include/linux/perf/arm_pmu.h @@ -26,9 +26,11 @@ */ #define ARMPMU_EVT_64BIT 0x00001 /* Event uses a 64bit counter */ #define ARMPMU_EVT_47BIT 0x00002 /* Event uses a 47bit counter */ +#define ARMPMU_EVT_63BIT 0x00004 /* Event uses a 63bit counter */ static_assert((PERF_EVENT_FLAG_ARCH & ARMPMU_EVT_64BIT) == ARMPMU_EVT_64BIT); static_assert((PERF_EVENT_FLAG_ARCH & ARMPMU_EVT_47BIT) == ARMPMU_EVT_47BIT); +static_assert((PERF_EVENT_FLAG_ARCH & ARMPMU_EVT_63BIT) == ARMPMU_EVT_63BIT); #define HW_OP_UNSUPPORTED 0xFFFF #define C(_x) PERF_COUNT_HW_CACHE_##_x diff --git a/tools/testing/selftests/arm64/abi/hwcap.c b/tools/testing/selftests/arm64/abi/hwcap.c index 93333a90bf3a..d4ad813fed10 100644 --- a/tools/testing/selftests/arm64/abi/hwcap.c +++ b/tools/testing/selftests/arm64/abi/hwcap.c @@ -39,6 +39,20 @@ static void cssc_sigill(void) asm volatile(".inst 0xdac01c00" : : : "x0"); } +static void mops_sigill(void) +{ + char dst[1], src[1]; + register char *dstp asm ("x0") = dst; + register char *srcp asm ("x1") = src; + register long size asm ("x2") = 1; + + /* CPYP [x0]!, [x1]!, x2! */ + asm volatile(".inst 0x1d010440" + : "+r" (dstp), "+r" (srcp), "+r" (size) + : + : "cc", "memory"); +} + static void rng_sigill(void) { asm volatile("mrs x0, S3_3_C2_C4_0" : : : "x0"); @@ -210,6 +224,14 @@ static const struct hwcap_data { .sigill_fn = cssc_sigill, }, { + .name = "MOPS", + .at_hwcap = AT_HWCAP2, + .hwcap_bit = HWCAP2_MOPS, + .cpuinfo = "mops", + .sigill_fn = mops_sigill, + .sigill_reliable = true, + }, + { .name = "RNG", .at_hwcap = AT_HWCAP2, .hwcap_bit = HWCAP2_RNG, diff --git a/tools/testing/selftests/arm64/abi/ptrace.c b/tools/testing/selftests/arm64/abi/ptrace.c index be952511af22..abe4d58d731d 100644 --- a/tools/testing/selftests/arm64/abi/ptrace.c +++ b/tools/testing/selftests/arm64/abi/ptrace.c @@ -20,7 +20,7 @@ #include "../../kselftest.h" -#define EXPECTED_TESTS 7 +#define EXPECTED_TESTS 11 #define MAX_TPIDRS 2 @@ -132,6 +132,34 @@ static void test_tpidr(pid_t child) } } +static void test_hw_debug(pid_t child, int type, const char *type_name) +{ + struct user_hwdebug_state state; + struct iovec iov; + int slots, arch, ret; + + iov.iov_len = sizeof(state); + iov.iov_base = &state; + + /* Should be able to read the values */ + ret = ptrace(PTRACE_GETREGSET, child, type, &iov); + ksft_test_result(ret == 0, "read_%s\n", type_name); + + if (ret == 0) { + /* Low 8 bits is the number of slots, next 4 bits the arch */ + slots = state.dbg_info & 0xff; + arch = (state.dbg_info >> 8) & 0xf; + + ksft_print_msg("%s version %d with %d slots\n", type_name, + arch, slots); + + /* Zero is not currently architecturally valid */ + ksft_test_result(arch, "%s_arch_set\n", type_name); + } else { + ksft_test_result_skip("%s_arch_set\n"); + } +} + static int do_child(void) { if (ptrace(PTRACE_TRACEME, -1, NULL, NULL)) @@ -207,6 +235,8 @@ static int do_parent(pid_t child) ksft_print_msg("Parent is %d, child is %d\n", getpid(), child); test_tpidr(child); + test_hw_debug(child, NT_ARM_HW_WATCH, "NT_ARM_HW_WATCH"); + test_hw_debug(child, NT_ARM_HW_BREAK, "NT_ARM_HW_BREAK"); ret = EXIT_SUCCESS; diff --git a/tools/testing/selftests/arm64/signal/.gitignore b/tools/testing/selftests/arm64/signal/.gitignore index 8ab4c86837fd..839e3a252629 100644 --- a/tools/testing/selftests/arm64/signal/.gitignore +++ b/tools/testing/selftests/arm64/signal/.gitignore @@ -4,7 +4,7 @@ fake_sigreturn_* sme_* ssve_* sve_* -tpidr2_siginfo +tpidr2_* za_* zt_* !*.[ch] diff --git a/tools/testing/selftests/arm64/signal/test_signals_utils.c b/tools/testing/selftests/arm64/signal/test_signals_utils.c index 40be8443949d..0dc948db3a4a 100644 --- a/tools/testing/selftests/arm64/signal/test_signals_utils.c +++ b/tools/testing/selftests/arm64/signal/test_signals_utils.c @@ -249,7 +249,8 @@ static void default_handler(int signum, siginfo_t *si, void *uc) fprintf(stderr, "-- Timeout !\n"); } else { fprintf(stderr, - "-- RX UNEXPECTED SIGNAL: %d\n", signum); + "-- RX UNEXPECTED SIGNAL: %d code %d address %p\n", + signum, si->si_code, si->si_addr); } default_result(current, 1); } diff --git a/tools/testing/selftests/arm64/signal/testcases/tpidr2_restore.c b/tools/testing/selftests/arm64/signal/testcases/tpidr2_restore.c new file mode 100644 index 000000000000..f9a86c00c28c --- /dev/null +++ b/tools/testing/selftests/arm64/signal/testcases/tpidr2_restore.c @@ -0,0 +1,86 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2023 ARM Limited + * + * Verify that the TPIDR2 register context in signal frames is restored. + */ + +#include <signal.h> +#include <ucontext.h> +#include <sys/auxv.h> +#include <sys/prctl.h> +#include <unistd.h> +#include <asm/sigcontext.h> + +#include "test_signals_utils.h" +#include "testcases.h" + +#define SYS_TPIDR2 "S3_3_C13_C0_5" + +static uint64_t get_tpidr2(void) +{ + uint64_t val; + + asm volatile ( + "mrs %0, " SYS_TPIDR2 "\n" + : "=r"(val) + : + : "cc"); + + return val; +} + +static void set_tpidr2(uint64_t val) +{ + asm volatile ( + "msr " SYS_TPIDR2 ", %0\n" + : + : "r"(val) + : "cc"); +} + + +static uint64_t initial_tpidr2; + +static bool save_tpidr2(struct tdescr *td) +{ + initial_tpidr2 = get_tpidr2(); + fprintf(stderr, "Initial TPIDR2: %lx\n", initial_tpidr2); + + return true; +} + +static int modify_tpidr2(struct tdescr *td, siginfo_t *si, ucontext_t *uc) +{ + uint64_t my_tpidr2 = get_tpidr2(); + + my_tpidr2++; + fprintf(stderr, "Setting TPIDR2 to %lx\n", my_tpidr2); + set_tpidr2(my_tpidr2); + + return 0; +} + +static void check_tpidr2(struct tdescr *td) +{ + uint64_t tpidr2 = get_tpidr2(); + + td->pass = tpidr2 == initial_tpidr2; + + if (td->pass) + fprintf(stderr, "TPIDR2 restored\n"); + else + fprintf(stderr, "TPIDR2 was %lx but is now %lx\n", + initial_tpidr2, tpidr2); +} + +struct tdescr tde = { + .name = "TPIDR2 restore", + .descr = "Validate that TPIDR2 is restored from the sigframe", + .feats_required = FEAT_SME, + .timeout = 3, + .sig_trig = SIGUSR1, + .init = save_tpidr2, + .run = modify_tpidr2, + .check_result = check_tpidr2, +}; diff --git a/tools/testing/selftests/kvm/aarch64/get-reg-list.c b/tools/testing/selftests/kvm/aarch64/get-reg-list.c index d4e1f4af29d6..4f10055af2aa 100644 --- a/tools/testing/selftests/kvm/aarch64/get-reg-list.c +++ b/tools/testing/selftests/kvm/aarch64/get-reg-list.c @@ -48,6 +48,34 @@ struct reg_sublist { __u64 rejects_set_n; }; +struct feature_id_reg { + __u64 reg; + __u64 id_reg; + __u64 feat_shift; + __u64 feat_min; +}; + +static struct feature_id_reg feat_id_regs[] = { + { + ARM64_SYS_REG(3, 0, 2, 0, 3), /* TCR2_EL1 */ + ARM64_SYS_REG(3, 0, 0, 7, 3), /* ID_AA64MMFR3_EL1 */ + 0, + 1 + }, + { + ARM64_SYS_REG(3, 0, 10, 2, 2), /* PIRE0_EL1 */ + ARM64_SYS_REG(3, 0, 0, 7, 3), /* ID_AA64MMFR3_EL1 */ + 4, + 1 + }, + { + ARM64_SYS_REG(3, 0, 10, 2, 3), /* PIR_EL1 */ + ARM64_SYS_REG(3, 0, 0, 7, 3), /* ID_AA64MMFR3_EL1 */ + 4, + 1 + } +}; + struct vcpu_config { char *name; struct reg_sublist sublists[]; @@ -68,7 +96,8 @@ static int vcpu_configs_n; #define for_each_missing_reg(i) \ for ((i) = 0; (i) < blessed_n; ++(i)) \ - if (!find_reg(reg_list->reg, reg_list->n, blessed_reg[i])) + if (!find_reg(reg_list->reg, reg_list->n, blessed_reg[i])) \ + if (check_supported_feat_reg(vcpu, blessed_reg[i])) #define for_each_new_reg(i) \ for_each_reg_filtered(i) \ @@ -132,6 +161,25 @@ static bool find_reg(__u64 regs[], __u64 nr_regs, __u64 reg) return false; } +static bool check_supported_feat_reg(struct kvm_vcpu *vcpu, __u64 reg) +{ + int i, ret; + __u64 data, feat_val; + + for (i = 0; i < ARRAY_SIZE(feat_id_regs); i++) { + if (feat_id_regs[i].reg == reg) { + ret = __vcpu_get_reg(vcpu, feat_id_regs[i].id_reg, &data); + if (ret < 0) + return false; + + feat_val = ((data >> feat_id_regs[i].feat_shift) & 0xf); + return feat_val >= feat_id_regs[i].feat_min; + } + } + + return true; +} + static const char *str_with_index(const char *template, __u64 index) { char *str, *p; @@ -843,12 +891,15 @@ static __u64 base_regs[] = { ARM64_SYS_REG(3, 0, 2, 0, 0), /* TTBR0_EL1 */ ARM64_SYS_REG(3, 0, 2, 0, 1), /* TTBR1_EL1 */ ARM64_SYS_REG(3, 0, 2, 0, 2), /* TCR_EL1 */ + ARM64_SYS_REG(3, 0, 2, 0, 3), /* TCR2_EL1 */ ARM64_SYS_REG(3, 0, 5, 1, 0), /* AFSR0_EL1 */ ARM64_SYS_REG(3, 0, 5, 1, 1), /* AFSR1_EL1 */ ARM64_SYS_REG(3, 0, 5, 2, 0), /* ESR_EL1 */ ARM64_SYS_REG(3, 0, 6, 0, 0), /* FAR_EL1 */ ARM64_SYS_REG(3, 0, 7, 4, 0), /* PAR_EL1 */ ARM64_SYS_REG(3, 0, 10, 2, 0), /* MAIR_EL1 */ + ARM64_SYS_REG(3, 0, 10, 2, 2), /* PIRE0_EL1 */ + ARM64_SYS_REG(3, 0, 10, 2, 3), /* PIR_EL1 */ ARM64_SYS_REG(3, 0, 10, 3, 0), /* AMAIR_EL1 */ ARM64_SYS_REG(3, 0, 12, 0, 0), /* VBAR_EL1 */ ARM64_SYS_REG(3, 0, 12, 1, 1), /* DISR_EL1 */ |